diff --git a/Package.swift b/Package.swift index 9bb02fcbb..79a72515f 100644 --- a/Package.swift +++ b/Package.swift @@ -7,7 +7,8 @@ let package = Package( name: "GutenbergKit", platforms: [.iOS(.v17), .macOS(.v14)], products: [ - .library(name: "GutenbergKit", targets: ["GutenbergKit"]) + .library(name: "GutenbergKit", targets: ["GutenbergKit"]), + .library(name: "GutenbergKitHTTP", targets: ["GutenbergKitHTTP"]), ], dependencies: [ .package(url: "https://github.com/scinfu/SwiftSoup.git", from: "2.7.5"), @@ -21,6 +22,17 @@ let package = Package( exclude: [], resources: [.copy("Gutenberg")] ), + .target( + name: "GutenbergKitHTTP", + path: "ios/Sources/GutenbergKitHTTP", + exclude: ["README.md"] + ), + .executableTarget( + name: "GutenbergKitDebugServer", + dependencies: ["GutenbergKitHTTP"], + path: "ios/Sources/GutenbergKitDebugServer", + exclude: ["README.md"] + ), .testTarget( name: "GutenbergKitTests", dependencies: ["GutenbergKit"], @@ -29,6 +41,14 @@ let package = Package( resources: [ .process("Resources") ] - ) + ), + .testTarget( + name: "GutenbergKitHTTPTests", + dependencies: ["GutenbergKitHTTP"], + path: "ios/Tests/GutenbergKitHTTPTests", + resources: [ + .copy("../../../test-fixtures/http") + ] + ), ] ) diff --git a/android/Gutenberg/build.gradle.kts b/android/Gutenberg/build.gradle.kts index 18ab79287..f3ebb2219 100644 --- a/android/Gutenberg/build.gradle.kts +++ b/android/Gutenberg/build.gradle.kts @@ -44,9 +44,23 @@ android { jvmTarget = "1.8" } + sourceSets { + getByName("androidTest") { + // Make shared test fixtures available as assets for instrumented tests. + assets.srcDir(rootProject.file("../test-fixtures")) + } + } + testOptions { unitTests { isReturnDefaultValues = true + all { + // Make the shared test fixtures available to fixture-driven tests. + val fixturesDir = rootProject.file("../test-fixtures/http") + it.systemProperty("test.fixtures.dir", fixturesDir.absolutePath) + // Track fixture files as task inputs so changes trigger re-runs. + it.inputs.dir(fixturesDir) + } } } } @@ -64,6 +78,7 @@ dependencies { implementation(libs.okhttp) testImplementation(libs.junit) + testImplementation(kotlin("test")) testImplementation(libs.kotlinx.coroutines.test) testImplementation(libs.mockito.core) testImplementation(libs.mockito.kotlin) diff --git a/android/Gutenberg/src/androidTest/java/org/wordpress/gutenberg/http/InstrumentedFixtureTests.kt b/android/Gutenberg/src/androidTest/java/org/wordpress/gutenberg/http/InstrumentedFixtureTests.kt new file mode 100644 index 000000000..86f4b2a36 --- /dev/null +++ b/android/Gutenberg/src/androidTest/java/org/wordpress/gutenberg/http/InstrumentedFixtureTests.kt @@ -0,0 +1,396 @@ +package org.wordpress.gutenberg.http + +import androidx.test.ext.junit.runners.AndroidJUnit4 +import androidx.test.platform.app.InstrumentationRegistry +import com.google.gson.Gson +import com.google.gson.JsonObject +import org.junit.Assert.assertEquals +import org.junit.Assert.assertNotNull +import org.junit.Assert.assertNull +import org.junit.Assert.assertTrue +import org.junit.Assert.fail +import org.junit.Test +import org.junit.runner.RunWith +import java.util.Base64 + +/** + * Instrumented fixture tests for the pure-Kotlin HTTP parser. + * + * Runs the same shared JSON test fixtures as the JVM unit tests, + * but executes on an actual Android device/emulator to validate + * the parser under the Android runtime (ART). + */ +@RunWith(AndroidJUnit4::class) +class InstrumentedFixtureTests { + + // MARK: - Header Value Fixtures + + @Test + fun headerValueExtraction() { + val fixtures = loadFixture("header-value-parsing") + val tests = fixtures.getAsJsonArray("tests") + + for (element in tests) { + val test = element.asJsonObject + val description = test.get("description").asString + val parameter = test.get("parameter").asString + val headerValue = test.get("headerValue").asString + val expected = if (test.get("expected").isJsonNull) null else test.get("expected").asString + + val result = HeaderValue.extractParameter(parameter, headerValue) + assertEquals("$description: result mismatch", expected, result) + } + } + + // MARK: - Request Parsing Fixtures + + @Test + fun requestParsingBasicCases() { + val fixtures = loadFixture("request-parsing") + val tests = fixtures.getAsJsonArray("tests") + + for (element in tests) { + val test = element.asJsonObject + val description = test.get("description").asString + val input = test.get("input").asString + val expected = test.getAsJsonObject("expected") + + val parser: HTTPRequestParser + if (test.has("maxBodySize")) { + val maxBodySize = test.get("maxBodySize").asLong + parser = HTTPRequestParser(maxBodySize) + parser.append(input.toByteArray(Charsets.UTF_8)) + } else { + parser = HTTPRequestParser(input) + } + + if (test.has("appendAfterComplete")) { + val extra = test.get("appendAfterComplete").asString + parser.append(extra.toByteArray(Charsets.UTF_8)) + } + + if (expected.has("isComplete") && !expected.get("isComplete").asBoolean && + expected.has("hasHeaders") && !expected.get("hasHeaders").asBoolean + ) { + assertTrue("$description: should not have headers", !parser.state.hasHeaders) + assertNull("$description: parseRequest should return null", parser.parseRequest()) + continue + } + + val request = parser.parseRequest() + assertNotNull("$description: parseRequest returned null", request) + request!! + + if (expected.has("method")) { + assertEquals("$description: method", expected.get("method").asString, request.method) + } + if (expected.has("target")) { + assertEquals("$description: target", expected.get("target").asString, request.target) + } + if (expected.has("isComplete") && expected.get("isComplete").asBoolean) { + assertTrue("$description: isComplete", parser.state.isComplete) + } + if (expected.has("headers")) { + val expectedHeaders = expected.getAsJsonObject("headers") + for (entry in expectedHeaders.entrySet()) { + assertEquals( + "$description: header ${entry.key}", + entry.value.asString, + request.header(entry.key) + ) + } + } + if (expected.has("body")) { + if (expected.get("body").isJsonNull) { + assertNull("$description: body should be null", request.body) + } else { + val expectedBody = expected.get("body").asString + assertNotNull("$description: body should not be null", request.body) + assertEquals( + "$description: body content", + expectedBody, + String(request.body!!.readBytes(), Charsets.UTF_8) + ) + } + } + } + } + + @Test + fun requestParsingErrorCases() { + val fixtures = loadFixture("request-parsing") + val errorTests = fixtures.getAsJsonArray("errorTests") + + for (element in errorTests) { + val test = element.asJsonObject + val description = test.get("description").asString + val expected = test.getAsJsonObject("expected") + val expectedError = expected.get("error").asString + + val parser: HTTPRequestParser + + if (test.has("inputBase64")) { + val base64 = test.get("inputBase64").asString + val data = Base64.getDecoder().decode(base64) + parser = if (test.has("maxBodySize")) { + HTTPRequestParser(test.get("maxBodySize").asLong) + } else { + HTTPRequestParser() + } + parser.append(data) + } else { + val input = test.get("input").asString + if (test.has("maxBodySize")) { + parser = HTTPRequestParser(test.get("maxBodySize").asLong) + parser.append(input.toByteArray(Charsets.UTF_8)) + } else { + parser = HTTPRequestParser(input) + } + } + + try { + parser.parseRequest() + fail("$description: expected error $expectedError but parsing succeeded") + } catch (e: HTTPRequestParseException) { + assertEquals( + expectedError, + e.error.errorId, + "$description: expected $expectedError but got ${e.error.errorId}" + ) + } + } + } + + @Test + fun requestParsingIncrementalCases() { + val fixtures = loadFixture("request-parsing") + val incrementalTests = fixtures.getAsJsonArray("incrementalTests") + + for (element in incrementalTests) { + val test = element.asJsonObject + val description = test.get("description").asString + val expected = test.getAsJsonObject("expected") + + val parser = HTTPRequestParser() + + if (test.has("input") && test.has("chunkSize")) { + val input = test.get("input").asString + val chunkSize = test.get("chunkSize").asInt + val data = input.toByteArray(Charsets.UTF_8) + var i = 0 + while (i < data.size) { + val end = minOf(i + chunkSize, data.size) + parser.append(data.copyOfRange(i, end)) + i = end + } + } else if (test.has("headers")) { + val headers = test.get("headers").asString + parser.append(headers.toByteArray(Charsets.UTF_8)) + + if (expected.has("afterHeaders")) { + val afterHeaders = expected.getAsJsonObject("afterHeaders") + if (afterHeaders.has("hasHeaders")) { + assertEquals( + "$description: hasHeaders after headers", + afterHeaders.get("hasHeaders").asBoolean, + parser.state.hasHeaders + ) + } + if (afterHeaders.has("isComplete")) { + assertEquals( + "$description: isComplete after headers", + afterHeaders.get("isComplete").asBoolean, + parser.state.isComplete + ) + } + if (afterHeaders.has("method") || afterHeaders.has("target")) { + val partialRequest = parser.parseRequest() + assertNotNull("$description: partial request should not be null", partialRequest) + partialRequest!! + if (afterHeaders.has("method")) { + assertEquals(afterHeaders.get("method").asString, partialRequest.method) + } + if (afterHeaders.has("target")) { + assertEquals(afterHeaders.get("target").asString, partialRequest.target) + } + } + } + + if (test.has("bodyChunks")) { + for (chunkElement in test.getAsJsonArray("bodyChunks")) { + parser.append(chunkElement.asString.toByteArray(Charsets.UTF_8)) + } + } + } else if (test.has("input")) { + parser.append(test.get("input").asString.toByteArray(Charsets.UTF_8)) + } + + if (expected.has("isComplete") && !expected.get("isComplete").asBoolean && + expected.has("hasHeaders") && !expected.get("hasHeaders").asBoolean + ) { + assertTrue("$description: should not have headers", !parser.state.hasHeaders) + assertNull("$description: parseRequest should return null", parser.parseRequest()) + continue + } + + val request = parser.parseRequest() + assertNotNull("$description: parseRequest returned null", request) + request!! + + if (expected.has("method")) { + assertEquals("$description: method", expected.get("method").asString, request.method) + } + if (expected.has("target")) { + assertEquals("$description: target", expected.get("target").asString, request.target) + } + if (expected.has("isComplete") && expected.get("isComplete").asBoolean) { + assertTrue("$description: isComplete", parser.state.isComplete) + } + if (expected.has("body")) { + if (expected.get("body").isJsonNull) { + assertNull("$description: body should be null", request.body) + } else { + val expectedBody = expected.get("body").asString + assertNotNull("$description: body should not be null", request.body) + assertEquals( + "$description: body content", + expectedBody, + String(request.body!!.readBytes(), Charsets.UTF_8) + ) + } + } + } + } + + // MARK: - Multipart Parsing Fixtures + + @Test + fun multipartParsingCases() { + val fixtures = loadFixture("multipart-parsing") + val tests = fixtures.getAsJsonArray("tests") + + for (element in tests) { + val test = element.asJsonObject + val description = test.get("description").asString + val boundary = test.get("boundary").asString + val quotedBoundary = test.has("quotedBoundary") && test.get("quotedBoundary").asBoolean + val rawBody = test.get("rawBody").asString + + val request = buildRawMultipartRequest(rawBody, boundary, quotedBoundary) + + val expected = test.getAsJsonObject("expected") + if (expected.has("contentType")) { + assertEquals( + "$description: Content-Type", + expected.get("contentType").asString, + request.header("Content-Type") + ) + } + + val parts = request.multipartParts() + val expectedParts = expected.getAsJsonArray("parts") + assertEquals("$description: part count", expectedParts.size(), parts.size) + + for (i in 0 until minOf(expectedParts.size(), parts.size)) { + val exp = expectedParts[i].asJsonObject + val part = parts[i] + assertPart(description, i, exp, part) + } + } + } + + @Test + fun multipartParsingErrorCases() { + val fixtures = loadFixture("multipart-parsing") + val errorTests = fixtures.getAsJsonArray("errorTests") + + for (element in errorTests) { + val test = element.asJsonObject + val description = test.get("description").asString + val expected = test.getAsJsonObject("expected") + val expectedError = expected?.get("error")?.asString ?: test.get("expectedError").asString + val contentType = test.get("contentType")?.asString ?: expected?.get("contentType")?.asString + + val request: ParsedHTTPRequest + + if (test.has("rawBody") && test.has("boundary")) { + request = buildRawMultipartRequest( + test.get("rawBody").asString, + test.get("boundary").asString + ) + } else if (contentType != null && test.has("body")) { + val body = test.get("body").asString + val raw = "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\n" + + "Content-Type: $contentType\r\n" + + "Content-Length: ${body.toByteArray(Charsets.UTF_8).size}\r\n\r\n$body" + val parser = HTTPRequestParser(raw) + val parsed = parser.parseRequest() + assertNotNull("$description: parsing request failed", parsed) + request = parsed!! + } else if (contentType != null) { + val raw = "GET /upload HTTP/1.1\r\nHost: localhost\r\n" + + "Content-Type: $contentType\r\n\r\n" + val parser = HTTPRequestParser(raw) + val parsed = parser.parseRequest() + assertNotNull("$description: parsing request failed", parsed) + request = parsed!! + } else { + fail("$description: invalid error test case") + return + } + + try { + request.multipartParts() + fail("$description: expected error $expectedError but succeeded") + } catch (e: MultipartParseException) { + assertEquals( + expectedError, + e.error.errorId, + "$description: expected $expectedError but got ${e.error.errorId}" + ) + } + } + } + + // MARK: - Helpers + + private fun loadFixture(name: String): JsonObject { + val context = InstrumentationRegistry.getInstrumentation().context + val json = context.assets.open("http/$name.json").bufferedReader().readText() + return Gson().fromJson(json, JsonObject::class.java) + } + + private fun assertPart(description: String, i: Int, exp: JsonObject, part: MultipartPart) { + assertEquals("$description: part[$i].name", exp.get("name").asString, part.name) + if (exp.has("filename")) { + if (exp.get("filename").isJsonNull) { + assertNull("$description: part[$i].filename should be null", part.filename) + } else { + assertEquals("$description: part[$i].filename", exp.get("filename").asString, part.filename) + } + } + if (exp.has("contentType")) { + assertEquals("$description: part[$i].contentType", exp.get("contentType").asString, part.contentType) + } + if (exp.has("body")) { + assertEquals( + "$description: part[$i].body", + exp.get("body").asString, + String(part.body.readBytes(), Charsets.UTF_8) + ) + } + } + + private fun buildRawMultipartRequest( + body: String, + boundary: String, + quotedBoundary: Boolean = false + ): ParsedHTTPRequest { + val boundaryParam = if (quotedBoundary) "\"$boundary\"" else boundary + val raw = "POST /wp/v2/media HTTP/1.1\r\nHost: localhost\r\n" + + "Content-Type: multipart/form-data; boundary=$boundaryParam\r\n" + + "Content-Length: ${body.toByteArray(Charsets.UTF_8).size}\r\n\r\n$body" + val parser = HTTPRequestParser(raw) + return parser.parseRequest()!! + } +} diff --git a/android/Gutenberg/src/main/java/org/wordpress/gutenberg/HttpServer.kt b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/HttpServer.kt new file mode 100644 index 000000000..067799649 --- /dev/null +++ b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/HttpServer.kt @@ -0,0 +1,632 @@ +package org.wordpress.gutenberg + +import android.util.Log +import org.wordpress.gutenberg.http.HTTPRequestParser +import org.wordpress.gutenberg.http.HTTPRequestParseException +import org.wordpress.gutenberg.http.TempFileOwner +import java.io.BufferedInputStream +import java.io.File +import java.net.Inet4Address +import java.net.InetAddress +import java.net.NetworkInterface +import java.net.ServerSocket +import java.net.Socket +import java.net.SocketException +import java.net.SocketTimeoutException +import java.security.SecureRandom +import java.text.SimpleDateFormat +import java.util.Date +import java.util.Locale +import java.util.TimeZone +import java.util.concurrent.Semaphore +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.SupervisorJob +import kotlinx.coroutines.cancel +import kotlinx.coroutines.launch + +/** + * A received HTTP request. + * + * @property method The HTTP method (e.g., "GET", "POST"). + * @property target The request target (e.g., "/path?query=1"). + * @property headers The request headers as an ordered map. + * @property body The request body, or null if there is no body. + * @property parseDurationMs Time spent receiving and parsing the request, in milliseconds. + */ +data class HttpRequest( + val method: String, + val target: String, + val headers: Map, + val body: org.wordpress.gutenberg.http.RequestBody? = null, + val parseDurationMs: Double = 0.0 +) { + /** + * Returns the value of the first header matching the given name (case-insensitive). + */ + fun header(name: String): String? { + val lowered = name.lowercase() + return headers.entries.firstOrNull { it.key.lowercase() == lowered }?.value + } +} + +/** + * An HTTP response to send back to a client. + * + * @property status The HTTP status code (e.g., 200, 404). + * @property headers Additional response headers. + * @property body The response body. The entire body is held in memory. This is + * fine for the current use case (Gutenberg REST API payloads — JSON, HTML, + * CSS, JS) which are small. If large responses (e.g., media downloads) need + * to be proxied in the future, this could be replaced with a streaming + * abstraction similar to [RequestBody][org.wordpress.gutenberg.http.RequestBody]. + */ +data class HttpResponse( + val status: Int = 200, + val headers: Map = mapOf("Content-Type" to "text/plain"), + val body: ByteArray = ByteArray(0) +) + +/** + * A lightweight local HTTP/1.1 server. + * + * Listens on a system-assigned port and dispatches each incoming request + * to a caller-provided [handler]. Uses the pure-Kotlin HTTP parser for + * request parsing. Includes connection limits, read timeouts, bearer token + * authentication, and request size limits to prevent resource exhaustion. + * + * ## Security + * + * The server itself is a generic request dispatcher — it does not forward + * requests or act as a proxy. SSRF protection is intentionally left to the + * [handler] implementation, since the server cannot know which upstream hosts + * are legitimate. The server provides two layers of defence by default: + * + * 1. Binds to loopback (localhost only) unless [externallyAccessible] is set. + * 2. Requires a randomly-generated bearer token in the `Proxy-Authorization` + * header on every request (when [requiresAuthentication] is enabled). + * Uses `Proxy-Authorization` per RFC 9110 §11.7.1 so that the client's + * `Authorization` header can carry upstream credentials (e.g. HTTP Basic) + * independently of the proxy token. + * + * ## Connection Model + * + * Each connection handles exactly one request (`Connection: close`). HTTP + * keep-alive / pipelining is intentionally unsupported. This simplifies body + * framing — in particular, GET/DELETE requests with unexpected body data are + * safe because leftover bytes are discarded when the connection closes. If + * keep-alive were ever added, body framing for all methods would need to be + * enforced to prevent request smuggling. + * + * @property name A stable identifier for this server instance. Must be consistent across + * runs of the same logical server. Used to namespace temporary files so that multiple + * server instances don't interfere with each other's orphan cleanup. Each distinct server + * should have a unique name. It is the caller's responsibility to choose a descriptive, + * collision-free identifier (e.g. `"media-proxy"`, `"editor-assets"`). + * + * ```kotlin + * val server = HttpServer( + * name = "media-proxy", + * externallyAccessible = true, + * handler = { request -> + * println("${request.method} ${request.target} (${"%.2f".format(request.parseDurationMs)}ms)") + * HttpResponse(body = "OK\n".toByteArray()) + * } + * ) + * server.start() + * println("Listening on port ${server.port}, token: ${server.token}") + * // ... + * server.stop() + * ``` + */ +class HttpServer( + val name: String, + private val requestedPort: Int = 0, + private val externallyAccessible: Boolean, + private val requiresAuthentication: Boolean = true, + private val maxConnections: Int = DEFAULT_MAX_CONNECTIONS, + private val maxBodySize: Long = DEFAULT_MAX_BODY_SIZE, + private val readTimeoutMs: Int = DEFAULT_READ_TIMEOUT_MS, + private val idleTimeoutMs: Int = DEFAULT_IDLE_TIMEOUT_MS, + private val cacheDir: File? = null, + private val handler: suspend (HttpRequest) -> HttpResponse +) { + @Volatile + private var serverSocket: ServerSocket? = null + private var scope: CoroutineScope? = null + private val connectionSemaphore = Semaphore(maxConnections) + private val stateLock = Any() + private val TAG = "GutenbergKit.HTTP" + private val tempSubdir = "gutenberg-http-${sanitizeName(name)}" + + @Volatile + private var running = false + + /** + * A bearer token required in the `Proxy-Authorization` header of every + * request. Uses `Proxy-Authorization` (RFC 9110 §11.7.1) rather than + * `Authorization` so that the client's own `Authorization` header + * (e.g. HTTP Basic credentials for the upstream server) passes through + * to the handler untouched. Generated randomly on each server instance creation. + */ + val token: String = generateToken() + + /** The port the server is listening on, or 0 if not started. */ + val port: Int get() = serverSocket?.localPort ?: 0 + + /** Starts the server. If already running, this is a no-op. */ + fun start() { + synchronized(stateLock) { + if (running) return + + // Clean up temp files left behind by previous runs (e.g., crash or process kill). + cacheDir?.let { TempFileOwner.cleanOrphans(it, tempSubdir) } + + val bindAddress = if (externallyAccessible) { + InetAddress.getByName("0.0.0.0") + } else { + InetAddress.getLoopbackAddress() + } + serverSocket = ServerSocket(requestedPort, maxConnections, bindAddress) + scope = CoroutineScope(SupervisorJob() + Dispatchers.IO) + running = true + Log.i(TAG, "HTTP server started on port ${serverSocket!!.localPort}") + + scope!!.launch(Dispatchers.IO) { + while (running) { + try { + val socket = serverSocket?.accept() ?: break + if (!connectionSemaphore.tryAcquire()) { + socket.close() + continue + } + launch { + try { + handleConnection(socket) + } finally { + connectionSemaphore.release() + } + } + } catch (_: SocketException) { + // Expected when stop() closes the server socket. + break + } catch (e: Exception) { + Log.e(TAG, "Accept loop terminated unexpectedly", e) + running = false + break + } + } + } + } + } + + /** Stops the server and releases resources. */ + fun stop() { + synchronized(stateLock) { + running = false + try { + serverSocket?.close() + } catch (_: Exception) { + // ignore + } + serverSocket = null + // Cancelling the scope cancels both the accept loop and all + // in-flight connection handlers. The blocking accept() call + // is unblocked by closing the server socket above. + scope?.cancel() + scope = null + Log.i(TAG, "HTTP server stopped") + } + } + + private suspend fun handleConnection(socket: Socket) { + socket.use { sock -> + try { + sock.soTimeout = idleTimeoutMs + handleRequest(sock) + } catch (_: SocketTimeoutException) { + // RFC 9110 §15.5.9: send 408 before closing on read timeout. + try { + sendResponse(sock, HttpResponse( + status = 408, + body = "Request Timeout".toByteArray() + )) + } catch (_: Exception) { + // Best-effort — socket may already be broken. + } + } catch (e: Exception) { + Log.w(TAG, "Connection error", e) + } + } + } + + private suspend fun handleRequest(socket: Socket) { + val input = BufferedInputStream(socket.getInputStream()) + + val parser = HTTPRequestParser(maxBodySize = maxBodySize, cacheDir = cacheDir, tempSubdir = tempSubdir) + parser.use { + val parseStart = System.nanoTime() + // Note: the deadline is checked between reads, not during a blocking + // read. Since each read can block for up to idleTimeoutMs (soTimeout), + // the effective maximum time is readTimeoutMs + idleTimeoutMs. This is + // a bounded imprecision — slow-loris protection is still effective + // because the attacker must send data to keep the connection alive, + // and each time data arrives the loop iterates and checks the deadline. + val deadlineNanos = parseStart + readTimeoutMs * 1_000_000L + val buffer = ByteArray(READ_CHUNK_SIZE) + + // Phase 1: receive headers only. + while (!parser.state.hasHeaders) { + if (System.nanoTime() > deadlineNanos) { + throw SocketTimeoutException("Read deadline exceeded") + } + val bytesRead = input.read(buffer) + if (bytesRead == -1) break + parser.append(buffer.copyOfRange(0, bytesRead)) + } + + // Validate headers (triggers full RFC validation). + val partial = try { + parser.parseRequest() + } catch (e: HTTPRequestParseException) { + val statusText = STATUS_TEXT[e.error.httpStatus] ?: "Bad Request" + sendResponse(socket, HttpResponse( + status = e.error.httpStatus, + body = statusText.toByteArray() + )) + return + } catch (_: java.io.IOException) { + sendResponse(socket, HttpResponse( + status = 500, + body = "Internal Server Error".toByteArray() + )) + return + } + + if (partial == null) { + sendResponse(socket, HttpResponse( + status = 400, + body = "Bad Request".toByteArray() + )) + return + } + + // Check auth before consuming body to avoid buffering up to + // maxBodySize for unauthenticated clients. + if (requiresAuthentication) { + val proxyAuth = partial.header("Proxy-Authorization") + if (!authenticate(proxyAuth, token)) { + sendResponse(socket, HttpResponse( + status = 407, + headers = mapOf("Content-Type" to "text/plain", "Proxy-Authenticate" to "Bearer") + )) + return + } + } + + // Reject body-bearing methods without Content-Length. + // We don't support Transfer-Encoding: chunked, so + // Content-Length is the only way to determine body size. + val upperMethod = partial.method.uppercase() + if (upperMethod in listOf("POST", "PUT", "PATCH") && partial.header("Content-Length") == null) { + sendResponse(socket, HttpResponse( + status = 411, + body = "Length Required".toByteArray() + )) + return + } + + // Phase 2: receive body (skipped if already complete). + while (!parser.state.isComplete) { + if (System.nanoTime() > deadlineNanos) { + throw SocketTimeoutException("Read deadline exceeded") + } + val bytesRead = input.read(buffer) + if (bytesRead == -1) break + parser.append(buffer.copyOfRange(0, bytesRead)) + } + + // Final parse with body. + val parsed = try { + parser.parseRequest() + } catch (e: HTTPRequestParseException) { + val statusText = STATUS_TEXT[e.error.httpStatus] ?: "Bad Request" + sendResponse(socket, HttpResponse( + status = e.error.httpStatus, + body = statusText.toByteArray() + )) + return + } catch (_: java.io.IOException) { + sendResponse(socket, HttpResponse( + status = 500, + body = "Internal Server Error".toByteArray() + )) + return + } + val parseDurationMs = (System.nanoTime() - parseStart) / 1_000_000.0 + + // Connection closed before request was complete — send 400. + if (parsed == null || !parsed.isComplete) { + parsed?.body?.fileOwner?.close() + sendResponse(socket, HttpResponse( + status = 400, + body = "Bad Request".toByteArray() + )) + return + } + + // Clean up the temp file backing the request body on all exit + // paths. The body (and any multipart parts derived from it) share + // a single TempFileOwner — .use{} guarantees it is closed whether + // we return normally, return early, or throw. + parsed.body?.fileOwner.use { + val request = HttpRequest( + method = parsed.method, + target = parsed.target, + headers = parsed.headers, + body = parsed.body, + parseDurationMs = parseDurationMs + ) + val response = try { + handler(request) + } catch (e: Exception) { + Log.e(TAG, "Handler threw", e) + HttpResponse( + status = 500, + body = "Internal Server Error".toByteArray() + ) + } + sendResponse(socket, response) + Log.d(TAG, "${parsed.method} ${parsed.target} → ${response.status} (${"%.1f".format(parseDurationMs)}ms)") + } + } + } + + private fun sendResponse(socket: Socket, response: HttpResponse) { + val output = socket.getOutputStream() + output.write(serializeResponse(response)) + output.flush() + } + + companion object { + /** Default maximum number of concurrent connections. */ + const val DEFAULT_MAX_CONNECTIONS: Int = 5 + + /** Default maximum request body size (4 GB). */ + const val DEFAULT_MAX_BODY_SIZE: Long = 4L * 1024 * 1024 * 1024 + + /** Default read timeout in milliseconds (30 seconds). */ + const val DEFAULT_READ_TIMEOUT_MS: Int = 30_000 + + /** + * Default idle timeout in milliseconds (5 seconds). + * If no data arrives within this interval on a single read, + * the connection is closed with a 408 response. Prevents slow-loris + * attacks where an attacker drip-feeds bytes to hold a connection slot. + */ + const val DEFAULT_IDLE_TIMEOUT_MS: Int = 5_000 + + /** Chunk size for reading request data. */ + private const val READ_CHUNK_SIZE: Int = 65536 + + /** Standard English reason phrases per RFC 9110 / RFC 9112 §4. */ + private val STATUS_TEXT = mapOf( + // 1xx Informational + 100 to "Continue", + 101 to "Switching Protocols", + 102 to "Processing", + 103 to "Early Hints", + // 2xx Success + 200 to "OK", + 201 to "Created", + 202 to "Accepted", + 203 to "Non-Authoritative Information", + 204 to "No Content", + 205 to "Reset Content", + 206 to "Partial Content", + 207 to "Multi-Status", + 208 to "Already Reported", + 226 to "IM Used", + // 3xx Redirection + 300 to "Multiple Choices", + 301 to "Moved Permanently", + 302 to "Found", + 303 to "See Other", + 304 to "Not Modified", + 307 to "Temporary Redirect", + 308 to "Permanent Redirect", + // 4xx Client Error + 400 to "Bad Request", + 401 to "Unauthorized", + 402 to "Payment Required", + 403 to "Forbidden", + 404 to "Not Found", + 405 to "Method Not Allowed", + 406 to "Not Acceptable", + 407 to "Proxy Authentication Required", + 408 to "Request Timeout", + 409 to "Conflict", + 410 to "Gone", + 411 to "Length Required", + 412 to "Precondition Failed", + 413 to "Content Too Large", + 414 to "URI Too Long", + 415 to "Unsupported Media Type", + 416 to "Range Not Satisfiable", + 417 to "Expectation Failed", + 421 to "Misdirected Request", + 422 to "Unprocessable Content", + 423 to "Locked", + 424 to "Failed Dependency", + 425 to "Too Early", + 426 to "Upgrade Required", + 428 to "Precondition Required", + 429 to "Too Many Requests", + 431 to "Request Header Fields Too Large", + 451 to "Unavailable For Legal Reasons", + // 5xx Server Error + 500 to "Internal Server Error", + 501 to "Not Implemented", + 502 to "Bad Gateway", + 503 to "Service Unavailable", + 504 to "Gateway Timeout", + 505 to "HTTP Version Not Supported", + 506 to "Variant Also Negotiates", + 507 to "Insufficient Storage", + 508 to "Loop Detected", + 510 to "Not Extended", + 511 to "Network Authentication Required" + ) + + /** + * Serializes an HTTP response to raw bytes per RFC 9112 §4. + */ + // Headers excluded during serialization: hop-by-hop headers (RFC 9110 §7.6.1) + // plus headers that are always recalculated (Content-Length, Date, Server). + private val RESPONSE_HOP_BY_HOP = setOf( + "connection", "transfer-encoding", "keep-alive", + "proxy-connection", "te", "upgrade", "trailer", + "date", "server" + ) + + internal fun serializeResponse(response: HttpResponse): ByteArray { + val statusText = STATUS_TEXT[response.status] ?: "Unknown" + val sb = StringBuilder() + val clampedStatus = response.status.coerceIn(0, 999) + sb.append("HTTP/1.1 %03d %s\r\n".format(clampedStatus, statusText)) + + for ((key, value) in response.headers) { + val lower = key.lowercase() + // Skip Content-Length (always recalculated) and hop-by-hop headers. + if (lower == "content-length") continue + if (lower in RESPONSE_HOP_BY_HOP) continue + val cleanKey = sanitizeHeaderString(key) + val cleanValue = sanitizeHeaderString(value) + if (cleanKey.isNotEmpty()) { + sb.append("$cleanKey: $cleanValue\r\n") + } + } + + sb.append("Content-Length: ${response.body.size}\r\n") + sb.append("Connection: close\r\n") + sb.append("Date: ${httpDate()}\r\n") + sb.append("Server: GutenbergKit\r\n") + + sb.append("\r\n") + return sb.toString().toByteArray(Charsets.UTF_8) + response.body + } + + /** + * Strips control characters per RFC 9110 §5.5. Preserves HTAB (0x09) + * and obs-text (0x80+), which the RFC explicitly allows in field values. + */ + private fun sanitizeHeaderString(value: String): String { + return buildString(value.length) { + for (c in value) { + val code = c.code + if (code == 0x09 || code in 0x20..0x7E || code >= 0x80) { + append(c) + } + } + } + } + + /** + * Validates the proxy bearer token from the `Proxy-Authorization` + * header (RFC 9110 §11.7.1). Using `Proxy-Authorization` keeps the + * client's `Authorization` header available for upstream credentials. + * + * Uses constant-time comparison to prevent timing attacks. + */ + private fun authenticate(proxyAuth: String?, expectedToken: String): Boolean { + if (proxyAuth == null) return false + val prefix = "Bearer " + if (!proxyAuth.startsWith(prefix, ignoreCase = true)) return false + val provided = proxyAuth.substring(prefix.length) + return constantTimeEqual(provided, expectedToken) + } + + /** + * Compares two strings in constant time to prevent timing attacks. + * + * Always iterates over the expected token ([b]) regardless of the input + * length, so timing reveals neither whether lengths match nor how many + * bytes are correct. When lengths differ, [b] is compared against itself + * to keep the work constant. + * + * **Do not replace this with `MessageDigest.isEqual()`.** + * On Android API 24–32 (our minSdk through Android 12), + * `isEqual()` returns early when array lengths differ — leaking + * the expected token length via timing. The fully constant-time + * fix (JDK-8295919) only shipped in Android 13 (API 33). The + * constant-time property is also only an `@implNote`, not a spec + * guarantee, so other runtimes are not obligated to honour it. + * + * **Do not "simplify" this to an early-return on length mismatch.** + * An early return would let an attacker measure response time to discover + * the expected token length, even though the token length is currently + * fixed at 64 hex characters. This implementation is intentionally + * branch-free in the hot path to avoid leaking any information. + */ + private fun constantTimeEqual(a: String, b: String): Boolean { + val aBytes = a.toByteArray(Charsets.UTF_8) + val bBytes = b.toByteArray(Charsets.UTF_8) + val comparand = if (aBytes.size == bBytes.size) aBytes else bBytes + var result: Int = if (aBytes.size == bBytes.size) 0 else 1 + for (i in bBytes.indices) { + result = result or (comparand[i].toInt() xor bBytes[i].toInt()) + } + return result == 0 + } + + /** Formats the current time as an HTTP-date per RFC 9110 §5.6.7. */ + private fun httpDate(): String { + val fmt = SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss 'GMT'", Locale.US) + fmt.timeZone = TimeZone.getTimeZone("GMT") + return fmt.format(Date()) + } + + /** + * Strips characters from [name] that are not letters, digits, `.`, `-`, or `_`. + * + * The server name is embedded in filesystem paths (temp directory). + * Allowing arbitrary characters (e.g. `../`) would enable path traversal. + */ + private fun sanitizeName(name: String): String { + val sanitized = name.filter { it.isLetterOrDigit() || it in ".-_" } + require(sanitized.isNotEmpty()) { + "Server name must contain at least one alphanumeric character, dot, hyphen, or underscore" + } + return sanitized + } + + /** Generates a cryptographically random 64-character hex token. */ + private fun generateToken(): String { + val bytes = ByteArray(32) + SecureRandom().nextBytes(bytes) + return bytes.joinToString("") { "%02x".format(it) } + } + + /** Returns the device's local IPv4 address on the network, or null. */ + @JvmStatic + fun getLocalIpAddress(): String? { + return try { + NetworkInterface.getNetworkInterfaces()?.toList() + ?.flatMap { it.inetAddresses.toList() } + ?.firstOrNull { !it.isLoopbackAddress && it is Inet4Address } + ?.hostAddress + } catch (_: Exception) { + null + } + } + } +} + +/** A logged HTTP request with timestamp. */ +data class RequestLogEntry( + val timestamp: Date, + val method: String, + val target: String, + val requestBodySize: Int = 0, + val parseDurationMs: Double = 0.0 +) diff --git a/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/HTTPRequestParser.kt b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/HTTPRequestParser.kt new file mode 100644 index 000000000..20e46a758 --- /dev/null +++ b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/HTTPRequestParser.kt @@ -0,0 +1,304 @@ +package org.wordpress.gutenberg.http + +import java.io.Closeable +import java.io.File +import java.io.IOException + +/** + * Parses raw HTTP/1.1 request data into a structured [ParsedHTTPRequest]. + * + * This parser handles incremental data — call [append] as bytes arrive, + * then check [state] to determine whether buffering is complete. + * + * The parser buffers incoming data to a temporary file on disk rather than + * accumulating it in memory, making it suitable for large request bodies. + * If the temp file cannot be created (e.g. disk full), the parser falls back + * to in-memory buffering automatically. + * + * State tracking is lightweight — [append] scans for the header separator + * (`\r\n\r\n`) and extracts `Content-Length`. Full parsing and RFC validation + * are deferred until [parseRequest] is called. + * + * ```kotlin + * val parser = HTTPRequestParser("GET /api HTTP/1.1\r\nHost: localhost\r\n\r\n") + * val request = parser.parseRequest() + * println("${request?.method} ${request?.target}") + * ``` + */ +class HTTPRequestParser( + private val maxBodySize: Long = DEFAULT_MAX_BODY_SIZE, + private val inMemoryBodyThreshold: Int = DEFAULT_IN_MEMORY_BODY_THRESHOLD, + cacheDir: File? = null, + tempSubdir: String = TempFileOwner.DEFAULT_TEMP_SUBDIR +) : Closeable { + /** The current buffering state of the parser. */ + enum class State { + /** More data is needed before headers are complete. */ + NEEDS_MORE_DATA, + /** Headers have been fully received but the body is still incomplete. */ + HEADERS_COMPLETE, + /** All data has been received (headers and body). */ + COMPLETE; + + /** Whether headers have been fully received. */ + val hasHeaders: Boolean + get() = this == HEADERS_COMPLETE || this == COMPLETE + + /** Whether all data has been received. */ + val isComplete: Boolean + get() = this == COMPLETE + } + + companion object { + /** Default maximum request body size (4 GB). */ + const val DEFAULT_MAX_BODY_SIZE: Long = 4L * 1024 * 1024 * 1024 + + /** Default threshold below which bodies are kept in memory (512 KB). */ + const val DEFAULT_IN_MEMORY_BODY_THRESHOLD: Int = 512 * 1024 + + /** Maximum number of bytes to buffer before the header terminator is found (64 KB). */ + const val MAX_HEADER_SIZE: Int = 65536 + } + + private val lock = Any() + private val buffer = Buffer(maxSize = MAX_HEADER_SIZE + inMemoryBodyThreshold, cacheDir = cacheDir, tempSubdir = tempSubdir) + private var _state: State = State.NEEDS_MORE_DATA + private var bytesWritten: Long = 0 + private var headerEndOffset: Long? = null + private var expectedContentLength: Long = 0 + + // Lazy parsing cache + private var parsedHeaders: ParsedHeaders? = null + private var parseError: HTTPRequestParseError? = null + private var cachedBody: RequestBody? = null + private var bodyExtracted = false + + /** The current buffering state. */ + val state: State get() = synchronized(lock) { _state } + + /** Creates a parser and immediately parses the given raw HTTP string. */ + constructor( + input: String, + maxBodySize: Long = DEFAULT_MAX_BODY_SIZE, + inMemoryBodyThreshold: Int = DEFAULT_IN_MEMORY_BODY_THRESHOLD, + cacheDir: File? = null, + tempSubdir: String = TempFileOwner.DEFAULT_TEMP_SUBDIR + ) : this(maxBodySize, inMemoryBodyThreshold, cacheDir, tempSubdir) { + append(input.toByteArray(Charsets.UTF_8)) + } + + /** Creates a parser and immediately parses the given raw HTTP data. */ + constructor( + data: ByteArray, + maxBodySize: Long = DEFAULT_MAX_BODY_SIZE, + inMemoryBodyThreshold: Int = DEFAULT_IN_MEMORY_BODY_THRESHOLD, + cacheDir: File? = null + ) : this(maxBodySize, inMemoryBodyThreshold, cacheDir) { + append(data) + } + + /** + * Appends received data to the buffer and updates the buffering state. + * + * This method performs lightweight scanning — it looks for the `\r\n\r\n` + * header separator and extracts the `Content-Length` value. Full parsing + * and RFC validation are deferred until [parseRequest] is called. + */ + fun append(data: ByteArray): Unit = synchronized(lock) { + if (_state == State.COMPLETE) return + + val accepted: Boolean + try { + accepted = buffer.append(data) + } catch (_: IOException) { + parseError = HTTPRequestParseError.BUFFER_IO_ERROR + _state = State.COMPLETE + return + } + if (!accepted) { + parseError = HTTPRequestParseError.PAYLOAD_TOO_LARGE + _state = State.COMPLETE + return + } + bytesWritten += data.size.toLong() + + if (headerEndOffset == null) { + val readLength = minOf(bytesWritten, MAX_HEADER_SIZE.toLong()).toInt() + val buffered: ByteArray + try { + buffered = buffer.read(0, readLength) + } catch (_: Exception) { + parseError = HTTPRequestParseError.BUFFER_IO_ERROR + _state = State.COMPLETE + return + } + val separator = "\r\n\r\n".toByteArray(Charsets.UTF_8) + + // RFC 7230 §3.5: Skip leading CRLFs for robustness. + var scanStart = 0 + while (scanStart + 1 < buffered.size && + buffered[scanStart] == 0x0D.toByte() && + buffered[scanStart + 1] == 0x0A.toByte() + ) { + scanStart += 2 + } + + val sepIndex = ReadOnlyBytes(buffered).indexOf(separator, scanStart) + if (sepIndex == -1) { + if (bytesWritten > MAX_HEADER_SIZE) { + parseError = HTTPRequestParseError.HEADERS_TOO_LARGE + _state = State.COMPLETE + } else { + _state = State.NEEDS_MORE_DATA + } + return + } + + headerEndOffset = (sepIndex + separator.size).toLong() + val headerBytes = buffered.copyOfRange(scanStart, sepIndex) + try { + expectedContentLength = scanContentLength(headerBytes) + } catch (e: HTTPRequestParseException) { + parseError = e.error + _state = State.COMPLETE + return + } + + if (expectedContentLength > maxBodySize) { + parseError = HTTPRequestParseError.PAYLOAD_TOO_LARGE + _state = State.COMPLETE + return + } + } + + val offset = headerEndOffset ?: return + val bodyBytesAvailable = bytesWritten - offset + + _state = if (bodyBytesAvailable >= expectedContentLength) { + State.COMPLETE + } else { + State.HEADERS_COMPLETE + } + } + + /** + * Parses the buffered data into a structured HTTP request. + * + * This triggers full parsing via [HTTPRequestSerializer] on the first call. + * The parsed headers are cached for subsequent calls. When the state is + * [State.COMPLETE] and a body is present, the body is extracted on the first access. + * + * @return The parsed request, or `null` if the state is [State.NEEDS_MORE_DATA]. + * @throws HTTPRequestParseException if the request is malformed. + */ + fun parseRequest(): ParsedHTTPRequest? = synchronized(lock) { + if (!_state.hasHeaders) return null + + parseError?.let { throw HTTPRequestParseException(it) } + + if (parsedHeaders == null) { + val headerData = buffer.read(0, minOf(bytesWritten, MAX_HEADER_SIZE.toLong()).toInt()) + when (val result = HTTPRequestSerializer.parseHeaders(headerData)) { + is HeaderParseResult.Parsed -> parsedHeaders = result.headers + is HeaderParseResult.Invalid -> { + parseError = result.error + throw HTTPRequestParseException(result.error) + } + is HeaderParseResult.NeedsMoreData -> return null + } + } + + val headers = parsedHeaders ?: return null + + if (_state != State.COMPLETE) { + return ParsedHTTPRequest( + method = headers.method, + target = headers.target, + httpVersion = headers.httpVersion, + headers = headers.headers, + body = null, + isComplete = false + ) + } + + if (headers.contentLength > 0L && !bodyExtracted) { + cachedBody = extractBody(headers.bodyOffset, headers.contentLength) + bodyExtracted = true + } + + return ParsedHTTPRequest( + method = headers.method, + target = headers.target, + httpVersion = headers.httpVersion, + headers = headers.headers, + body = cachedBody, + isComplete = true + ) + } + + /** + * Extracts the request body from the buffer. + * + * Bodies smaller than [inMemoryBodyThreshold] are read into memory. + * Larger bodies reference the buffer's backing file directly as a slice, + * avoiding a redundant copy. + */ + private fun extractBody(offset: Int, length: Long): RequestBody? { + if (length <= inMemoryBodyThreshold) { + return RequestBody.InMemory(buffer.read(offset, length.toInt())) + } + + // Reference the body range directly in the buffer's file. + val ownership = buffer.transferFileOwnership() + if (ownership != null) { + val (file, owner) = ownership + return RequestBody.FileBacked( + file = file, + fileOffset = offset.toLong(), + size = length, + fileOwner = owner + ) + } + + // Memory-backed buffer — read into a byte array. + require(length <= Int.MAX_VALUE) { "Body too large for in-memory buffer: $length bytes" } + return RequestBody.InMemory(buffer.read(offset, length.toInt())) + } + + override fun close() { + buffer.close() + } + + /** + * Extracts and validates the `Content-Length` value from header bytes without full parsing. + * + * Reuses [HTTPRequestSerializer.validateContentLength] so that the scan and the later + * full parse apply identical validation rules. Conflicting or malformed values are + * rejected immediately — before any body bytes are buffered. + * + * @return 0 if no `Content-Length` header is present. + * @throws HTTPRequestParseException if the value is invalid or conflicting. + */ + private fun scanContentLength(headerBytes: ByteArray): Long { + val string = try { + headerBytes.toString(Charsets.UTF_8) + } catch (_: Exception) { + return 0 + } + val lines = string.split("\r\n") + + var contentLength: Long? = null + for (line in lines.drop(1)) { + if (line.isEmpty()) continue + val colonIndex = line.indexOf(':') + if (colonIndex == -1) continue + val rawKey = line.substring(0, colonIndex) + if (rawKey.lowercase() == "content-length") { + val value = line.substring(colonIndex + 1).trimOWS() + contentLength = HTTPRequestSerializer.validateContentLength(value, contentLength) + } + } + + return contentLength ?: 0 + } +} diff --git a/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/HTTPRequestSerializer.kt b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/HTTPRequestSerializer.kt new file mode 100644 index 000000000..5a56493ea --- /dev/null +++ b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/HTTPRequestSerializer.kt @@ -0,0 +1,348 @@ +package org.wordpress.gutenberg.http + +/** + * Errors thrown when parsing an HTTP/1.1 request fails due to RFC 7230/9112 violations. + */ +enum class HTTPRequestParseError( + /** The HTTP status code that should be sent for this error. */ + val httpStatus: Int, + /** A camelCase identifier matching the Swift error case names and JSON fixture keys. */ + val errorId: String +) { + EMPTY_HEADER_SECTION(400, "emptyHeaderSection"), + MALFORMED_REQUEST_LINE(400, "malformedRequestLine"), + OBS_FOLD_DETECTED(400, "obsFoldDetected"), + WHITESPACE_BEFORE_COLON(400, "whitespaceBeforeColon"), + INVALID_CONTENT_LENGTH(400, "invalidContentLength"), + CONFLICTING_CONTENT_LENGTH(400, "conflictingContentLength"), + UNSUPPORTED_TRANSFER_ENCODING(400, "unsupportedTransferEncoding"), + INVALID_HTTP_VERSION(400, "invalidHTTPVersion"), + INVALID_FIELD_NAME(400, "invalidFieldName"), + INVALID_FIELD_VALUE(400, "invalidFieldValue"), + MISSING_HOST_HEADER(400, "missingHostHeader"), + MULTIPLE_HOST_HEADERS(400, "multipleHostHeaders"), + PAYLOAD_TOO_LARGE(413, "payloadTooLarge"), + HEADERS_TOO_LARGE(431, "headersTooLarge"), + TOO_MANY_HEADERS(431, "tooManyHeaders"), + INVALID_ENCODING(400, "invalidEncoding"), + BUFFER_IO_ERROR(500, "bufferIOError"); +} + +/** + * Exception thrown when HTTP request parsing fails. + */ +class HTTPRequestParseException(val error: HTTPRequestParseError) : Exception(error.errorId) + +/** + * Parsed header information from an HTTP request. + */ +data class ParsedHeaders( + /** The HTTP method (e.g., "GET", "POST"). */ + val method: String, + /** The request-target from the HTTP request line, per RFC 9112 Section 3. */ + val target: String, + /** The HTTP-version from the request line (e.g., "HTTP/1.1"), per RFC 9112 §2.3. */ + val httpVersion: String, + /** The HTTP headers as key-value pairs, preserving original casing. */ + val headers: LinkedHashMap, + /** The value of the `Content-Length` header, or 0 if absent. */ + val contentLength: Long, + /** The byte offset where the body begins (after the `\r\n\r\n` separator). */ + val bodyOffset: Int +) + +/** + * The result of attempting to parse HTTP request headers. + */ +sealed class HeaderParseResult { + /** The data does not yet contain the complete header section (`\r\n\r\n`). */ + data object NeedsMoreData : HeaderParseResult() + /** The header data is malformed and cannot be parsed. */ + data class Invalid(val error: HTTPRequestParseError) : HeaderParseResult() + /** Headers were successfully parsed. */ + data class Parsed(val headers: ParsedHeaders) : HeaderParseResult() +} + +/** + * Parses raw HTTP/1.1 request bytes into structured components. + * + * This is the stateless parser that converts raw HTTP request data into + * a [ParsedHeaders] structure. For incremental parsing with buffering, + * use [HTTPRequestParser] instead. + */ +internal object HTTPRequestSerializer { + + /** + * Attempts to parse the HTTP request line and headers from raw data. + * + * Looks for the `\r\n\r\n` header terminator, then parses the request line + * and individual headers. Returns [HeaderParseResult.NeedsMoreData] if the + * terminator hasn't been received yet, or [HeaderParseResult.Invalid] with + * a specific error if the request is malformed. + */ + fun parseHeaders(data: ByteArray): HeaderParseResult { + // RFC 7230 §3.5: Skip leading CRLFs for robustness. + var scanOffset = 0 + while (scanOffset + 1 < data.size && + data[scanOffset] == 0x0D.toByte() && + data[scanOffset + 1] == 0x0A.toByte() + ) { + scanOffset += 2 + } + if (scanOffset >= data.size) return HeaderParseResult.NeedsMoreData + + val separator = "\r\n\r\n".toByteArray(Charsets.UTF_8) + val separatorIndex = ReadOnlyBytes(data).indexOf(separator, scanOffset) + if (separatorIndex == -1) return HeaderParseResult.NeedsMoreData + + val headerBytes = data.copyOfRange(scanOffset, separatorIndex) + + // Validate UTF-8 encoding by round-tripping through String and back. + val headerString: String + try { + headerString = headerBytes.toString(Charsets.UTF_8) + val reEncoded = headerString.toByteArray(Charsets.UTF_8) + if (!reEncoded.contentEquals(headerBytes)) { + return HeaderParseResult.Invalid(HTTPRequestParseError.INVALID_ENCODING) + } + } catch (_: Exception) { + return HeaderParseResult.Invalid(HTTPRequestParseError.INVALID_ENCODING) + } + + val lines = headerString.split("\r\n") + val requestLine = lines.firstOrNull() + if (requestLine.isNullOrEmpty()) { + return HeaderParseResult.Invalid(HTTPRequestParseError.EMPTY_HEADER_SECTION) + } + + val parts = requestLine.split(" ", limit = 3) + if (parts.size < 2) { + return HeaderParseResult.Invalid(HTTPRequestParseError.MALFORMED_REQUEST_LINE) + } + + val method = parts[0] + val target = parts[1] + + // RFC 9110 §9.1: method = token (tchar characters only). + if (!method.all { isTokenChar(it) }) { + return HeaderParseResult.Invalid(HTTPRequestParseError.MALFORMED_REQUEST_LINE) + } + + // RFC 9112 §2.3: HTTP-version = "HTTP/" DIGIT "." DIGIT + if (parts.size < 3) { + return HeaderParseResult.Invalid(HTTPRequestParseError.INVALID_HTTP_VERSION) + } + val httpVersion = parts[2] + if (!isValidHTTPVersion(httpVersion)) { + return HeaderParseResult.Invalid(HTTPRequestParseError.INVALID_HTTP_VERSION) + } + + // RFC 9112 §3.2: Validate request-target form. + if (method == "CONNECT") { + if (target.startsWith("/") || !target.contains(":")) { + return HeaderParseResult.Invalid(HTTPRequestParseError.MALFORMED_REQUEST_LINE) + } + } else if (method == "OPTIONS" && target == "*") { + // asterisk-form is valid for OPTIONS + } else if (target.startsWith("/")) { + // origin-form — valid for all methods + } else if (target.lowercase().startsWith("http://") || target.lowercase().startsWith("https://")) { + // absolute-form — valid for all methods + } else { + return HeaderParseResult.Invalid(HTTPRequestParseError.MALFORMED_REQUEST_LINE) + } + + val headers = LinkedHashMap() + val keyIndex = HashMap() // lowercased -> original casing + var contentLengthValue: Long? = null + var hostHeaderCount = 0 + var headerCount = 0 + + for (line in lines.drop(1)) { + if (line.isEmpty()) continue + + headerCount++ + if (headerCount > 100) { + return HeaderParseResult.Invalid(HTTPRequestParseError.TOO_MANY_HEADERS) + } + + // RFC 7230 §3.2.4: Reject obs-fold + if (line[0] == ' ' || line[0] == '\t') { + return HeaderParseResult.Invalid(HTTPRequestParseError.OBS_FOLD_DETECTED) + } + + val colonIndex = line.indexOf(':') + if (colonIndex == -1) { + return HeaderParseResult.Invalid(HTTPRequestParseError.INVALID_FIELD_NAME) + } + + val rawKey = line.substring(0, colonIndex) + + // RFC 7230 §3.2.4: No whitespace is allowed between the field-name and colon. + // Check this before the general token validation so we return the more + // specific error (WHITESPACE_BEFORE_COLON) instead of INVALID_FIELD_NAME. + if (rawKey.any { it == ' ' || it == '\t' }) { + return HeaderParseResult.Invalid(HTTPRequestParseError.WHITESPACE_BEFORE_COLON) + } + + // RFC 9110 §5.1: field-name = token + if (rawKey.isEmpty() || !rawKey.all { isTokenChar(it) }) { + return HeaderParseResult.Invalid(HTTPRequestParseError.INVALID_FIELD_NAME) + } + + val key = rawKey + val lowerKey = key.lowercase() + // RFC 9110 §5.5: OWS (optional whitespace) is SP / HTAB only. + // Kotlin's String.trim() strips all chars <= 0x20 (including CR, LF, etc.), + // which would silently remove bare CRs before field value validation. + val value = line.substring(colonIndex + 1).trimOWS() + + // RFC 9110 §5.5: Validate field value characters. + for (c in value) { + val v = c.code + if (v <= 0x08 || (v in 0x0A..0x1F) || v == 0x7F) { + return HeaderParseResult.Invalid(HTTPRequestParseError.INVALID_FIELD_VALUE) + } + } + + // RFC 7230 §3.3.3: Reject Transfer-Encoding + if (lowerKey == "transfer-encoding") { + return HeaderParseResult.Invalid(HTTPRequestParseError.UNSUPPORTED_TRANSFER_ENCODING) + } + + // Content-Length: validate and normalize + if (lowerKey == "content-length") { + contentLengthValue = try { + validateContentLength(value, contentLengthValue) + } catch (e: HTTPRequestParseException) { + return HeaderParseResult.Invalid(e.error) + } + val resolved = contentLengthValue.toString() + val existingKey = keyIndex["content-length"] + if (existingKey != null) { + headers[existingKey] = resolved + } else { + headers[key] = resolved + keyIndex["content-length"] = key + } + continue + } + + // Track Host header occurrences + if (lowerKey == "host") { + hostHeaderCount++ + } + + // RFC 9110 §5.3: Combine duplicate field lines with comma-separated values. + val existingKey = keyIndex[lowerKey] + if (existingKey != null) { + headers[existingKey] = "${headers[existingKey]}, $value" + } else { + headers[key] = value + keyIndex[lowerKey] = key + } + } + + // RFC 9110 §7.2: Host header validation + if (hostHeaderCount > 1) { + return HeaderParseResult.Invalid(HTTPRequestParseError.MULTIPLE_HOST_HEADERS) + } + if (httpVersion == "HTTP/1.1" && hostHeaderCount == 0) { + return HeaderParseResult.Invalid(HTTPRequestParseError.MISSING_HOST_HEADER) + } + + val contentLength = contentLengthValue ?: 0L + val bodyOffset = separatorIndex + separator.size + + return HeaderParseResult.Parsed( + ParsedHeaders( + method = method, + target = target, + httpVersion = httpVersion, + headers = headers, + contentLength = contentLength, + bodyOffset = bodyOffset + ) + ) + } + + /** + * Validates a Content-Length header value per RFC 9110 §8.6 / RFC 7230 §3.3.3. + */ + internal fun validateContentLength(value: String, existing: Long?): Long { + val parts = value.split(",").map { it.trim() } + val first = parts.firstOrNull() + if (first.isNullOrEmpty() || !first.all { isAsciiDigit(it) }) { + throw HTTPRequestParseException(HTTPRequestParseError.INVALID_CONTENT_LENGTH) + } + val cl = first.toLongOrNull() + ?: throw HTTPRequestParseException(HTTPRequestParseError.INVALID_CONTENT_LENGTH) + if (cl < 0) throw HTTPRequestParseException(HTTPRequestParseError.INVALID_CONTENT_LENGTH) + + for (part in parts.drop(1)) { + if (part.isEmpty() || !part.all { isAsciiDigit(it) }) { + throw HTTPRequestParseException(HTTPRequestParseError.CONFLICTING_CONTENT_LENGTH) + } + val partValue = part.toLongOrNull() + if (partValue != cl) { + throw HTTPRequestParseException(HTTPRequestParseError.CONFLICTING_CONTENT_LENGTH) + } + } + + if (existing != null && existing != cl) { + throw HTTPRequestParseException(HTTPRequestParseError.CONFLICTING_CONTENT_LENGTH) + } + return cl + } + + /** + * Validates that a string matches the HTTP-version format: `HTTP/DIGIT.DIGIT`. + */ + private fun isValidHTTPVersion(version: String): Boolean { + if (!version.startsWith("HTTP/")) return false + val rest = version.removePrefix("HTTP/") + val parts = rest.split(".", limit = 2) + if (parts.size != 2) return false + return parts[0].length == 1 && parts[0][0].isDigit() && + parts[1].length == 1 && parts[1][0].isDigit() + } + + /** + * Returns whether a character is a valid HTTP token character (RFC 9110 §5.6.2). + */ + private fun isTokenChar(c: Char): Boolean { + val ascii = c.code + return when { + ascii in 0x41..0x5A -> true // A-Z + ascii in 0x61..0x7A -> true // a-z + ascii in 0x30..0x39 -> true // 0-9 + c in "!#\$%&'*+-.^_`|~" -> true + else -> false + } + } + + /** + * Returns whether a character is an ASCII digit (0-9). + * Unlike [Char.isDigit], this rejects non-ASCII Unicode digits. + */ + private fun isAsciiDigit(c: Char): Boolean = c.code in 0x30..0x39 + +} + +/** + * Trims only SP (0x20) and HTAB (0x09) from both ends of a string. + * + * This matches RFC 9110's OWS (optional whitespace) definition: + * OWS = *( SP / HTAB ) + * + * Unlike Kotlin's [String.trim], this does NOT strip CR, LF, or other + * control characters, ensuring they are preserved for field value + * validation (which must reject them per RFC 9110 §5.5). + */ +internal fun String.trimOWS(): String { + var start = 0 + var end = length + while (start < end && (this[start] == ' ' || this[start] == '\t')) start++ + while (end > start && (this[end - 1] == ' ' || this[end - 1] == '\t')) end-- + return if (start == 0 && end == length) this else substring(start, end) +} diff --git a/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/HeaderValue.kt b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/HeaderValue.kt new file mode 100644 index 000000000..234543ebd --- /dev/null +++ b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/HeaderValue.kt @@ -0,0 +1,122 @@ +package org.wordpress.gutenberg.http + +/** + * Utilities for parsing structured HTTP header values (RFC 9110 §5.6). + * + * HTTP headers like `Content-Type` and `Content-Disposition` carry parameters + * in `key=value` or `key="value"` form. This object provides a shared + * implementation for extracting those parameters while correctly handling + * quoted strings and backslash escapes per RFC 2045 §5.1. + */ +internal object HeaderValue { + + /** + * Extracts a parameter value from a header value string. + * + * Searches for `name=` while skipping occurrences that fall inside + * quoted strings, then extracts the value — handling both quoted + * (with backslash escapes per RFC 2045 §5.1) and unquoted forms. + * + * @param name The parameter name to search for (case-insensitive). + * @param headerValue The full header value string to search. + * @return The extracted parameter value, or `null` if not found. + */ + fun extractParameter(name: String, headerValue: String): String? { + val search = "$name=" + var searchStart = 0 + + while (searchStart < headerValue.length) { + val matchIndex = headerValue.indexOf(search, searchStart, ignoreCase = true) + if (matchIndex == -1) return null + + // Skip matches that fall inside a quoted string value. + if (isInsideQuotedString(headerValue, matchIndex)) { + searchStart = matchIndex + search.length + continue + } + + // Ensure the match is at a parameter boundary — not a substring + // of another parameter name (e.g., "name=" inside "filename="). + if (matchIndex > 0) { + val preceding = headerValue[matchIndex - 1] + if (preceding != ';' && preceding != ' ' && preceding != '\t') { + searchStart = matchIndex + search.length + continue + } + } + + val afterEquals = matchIndex + search.length + + return if (afterEquals < headerValue.length && headerValue[afterEquals] == '"') { + extractQuotedValue(headerValue, afterEquals) + } else { + val endIndex = headerValue.indexOf(';', afterEquals) + val raw = if (endIndex == -1) { + headerValue.substring(afterEquals) + } else { + headerValue.substring(afterEquals, endIndex) + } + raw.trim() + } + } + + return null + } + + /** + * Extracts a quoted value starting at [quoteStart] (the opening `"`), + * handling backslash escapes (`\"`, `\\`) per RFC 2045 §5.1. + */ + private fun extractQuotedValue(text: String, quoteStart: Int): String { + val valueStart = quoteStart + 1 + var index = valueStart + val result = StringBuilder() + + while (index < text.length) { + val char = text[index] + if (char == '\\') { + val next = index + 1 + if (next < text.length) { + result.append(text[next]) + index = next + 1 + } else { + break + } + } else if (char == '"') { + break + } else { + result.append(char) + index++ + } + } + + return result.toString() + } + + /** + * Returns whether the given position in the string falls inside a quoted string. + * + * Scans from the start, tracking quote open/close state while respecting + * backslash escapes. + */ + private fun isInsideQuotedString(string: String, position: Int): Boolean { + var inQuote = false + var index = 0 + while (index < position) { + val char = string[index] + if (inQuote && char == '\\') { + // Skip escaped character + index++ + if (index < position) { + index++ + } + continue + } + if (char == '"') { + inQuote = !inQuote + } + index++ + } + return inQuote + } +} diff --git a/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/MultipartPart.kt b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/MultipartPart.kt new file mode 100644 index 000000000..2c13068ac --- /dev/null +++ b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/MultipartPart.kt @@ -0,0 +1,426 @@ +package org.wordpress.gutenberg.http + +import java.io.RandomAccessFile + +/** + * Errors thrown when parsing a multipart/form-data body fails. + */ +enum class MultipartParseError( + /** A camelCase identifier matching the Swift error case names and JSON fixture keys. */ + val errorId: String +) { + NOT_MULTIPART_FORM_DATA("notMultipartFormData"), + MISSING_BODY("missingBody"), + MISSING_CONTENT_DISPOSITION("missingContentDisposition"), + MISSING_NAME_PARAMETER("missingNameParameter"), + MALFORMED_BODY("malformedBody"), + TOO_MANY_PARTS("tooManyParts"); +} + +/** + * Exception thrown when multipart parsing fails. + */ +class MultipartParseException(val error: MultipartParseError) : Exception(error.errorId) + +/** + * A single part from a `multipart/form-data` body, per RFC 7578. + * + * Each part represents one form field or file upload, with its own + * Content-Disposition parameters and optional Content-Type. + * + * Part bodies are represented as lightweight references (byte ranges) + * back to the original request body. No part data is copied during parsing + * for file-backed bodies; bytes are only read when [body] is accessed + * via [RequestBody.inputStream] or [RequestBody.readBytes]. + */ +data class MultipartPart( + /** The field name from `Content-Disposition: form-data; name="..."`. */ + val name: String, + /** The filename, if present, from `Content-Disposition: form-data; filename="..."`. */ + val filename: String?, + /** The `Content-Type` of this part, or `"text/plain"` if not specified (RFC 7578 §4.4). */ + val contentType: String, + /** The part's body content, backed by a reference to the original request body. */ + val body: RequestBody +) { + override fun equals(other: Any?): Boolean { + if (this === other) return true + if (other !is MultipartPart) return false + return name == other.name && + filename == other.filename && + contentType == other.contentType && + body == other.body + } + + override fun hashCode(): Int { + var result = name.hashCode() + result = 31 * result + (filename?.hashCode() ?: 0) + result = 31 * result + contentType.hashCode() + result = 31 * result + body.hashCode() + return result + } + + companion object { + private const val SCAN_CHUNK_SIZE = 65_536 + + /** + * Parses an in-memory `multipart/form-data` body into its constituent parts. + * + * Scans the body data to locate part boundaries and extract headers, but does + * not copy part body bytes for file-backed sources. Each part's [body] is a + * lightweight reference (offset + length) back to the source [RequestBody]. + * + * @param source The original request body to reference for part content. + * @param bodyData The raw body bytes (read once for scanning, then released by the caller). + * @param bodyFileOffset The byte offset of [bodyData] within [source]'s backing file + * (0 for data-backed bodies). + * @param boundary The boundary string from the Content-Type header. + * @return A list of parsed parts with lazy body references. + * @throws MultipartParseException if the body is malformed. + */ + fun parse( + source: RequestBody, + bodyData: ReadOnlyBytes, + bodyFileOffset: Long, + boundary: String + ): List { + val delimiter = "--$boundary".toByteArray(Charsets.UTF_8) + val closeDelimiter = "--$boundary--".toByteArray(Charsets.UTF_8) + val crlf = "\r\n".toByteArray(Charsets.UTF_8) + val crlfcrlf = "\r\n\r\n".toByteArray(Charsets.UTF_8) + + val firstDelimiterIndex = bodyData.indexOf(delimiter, 0) + if (firstDelimiterIndex == -1) { + throw MultipartParseException(MultipartParseError.MALFORMED_BODY) + } + + val parts = mutableListOf() + var searchStart = firstDelimiterIndex + delimiter.size + + while (searchStart < bodyData.size) { + // RFC 2046 §5.1.1: skip optional transport padding (LWSP) after boundary. + while (searchStart < bodyData.size && + (bodyData[searchStart] == ' '.code.toByte() || + bodyData[searchStart] == '\t'.code.toByte()) + ) { + searchStart++ + } + + // Skip the CRLF after the delimiter line + if (searchStart + 1 < bodyData.size && + bodyData[searchStart] == crlf[0] && + bodyData[searchStart + 1] == crlf[1] + ) { + searchStart += crlf.size + } + + if (searchStart >= bodyData.size) break + + // Find the header/body separator within this part + val headerEnd = bodyData.indexOf(crlfcrlf, searchStart) + if (headerEnd == -1) { + throw MultipartParseException(MultipartParseError.MALFORMED_BODY) + } + + val headerData = bodyData.copyOfRange(searchStart, headerEnd) + val partBodyStart = headerEnd + crlfcrlf.size + + // Find the next delimiter to determine where this part's body ends + val nextDelimiterIndex = bodyData.indexOf(delimiter, partBodyStart) + if (nextDelimiterIndex == -1) { + throw MultipartParseException(MultipartParseError.MALFORMED_BODY) + } + + // The body ends at the CRLF before the next delimiter + var partBodyEnd = nextDelimiterIndex + if (partBodyEnd >= partBodyStart + crlf.size) { + if (bodyData[partBodyEnd - 2] == crlf[0] && + bodyData[partBodyEnd - 1] == crlf[1] + ) { + partBodyEnd -= crlf.size + } + } + + // Build a lightweight body reference instead of copying bytes + val partBodyLength = partBodyEnd - partBodyStart + val partBody = makePartBody( + source = source, + bodyData = bodyData, + partOffset = partBodyStart, + partLength = partBodyLength, + bodyFileOffset = bodyFileOffset + ) + + val part = parsePartHeaders(headerData, partBody) + parts.add(part) + + if (parts.size > 100) { + throw MultipartParseException(MultipartParseError.TOO_MANY_PARTS) + } + + // Check if the next delimiter is the closing one + if (bodyData.startsWith(closeDelimiter, nextDelimiterIndex)) { + break + } + + searchStart = nextDelimiterIndex + delimiter.size + } + + return parts + } + + /** + * Parses a file-backed `multipart/form-data` body using chunked scanning. + * + * Reads the file in fixed-size chunks to find boundary offsets, keeping memory + * usage at O(chunk_size) regardless of body size. Part bodies are file-slice + * references, not copies. + * + * @param source The file-backed request body. + * @param boundary The boundary string from the Content-Type header. + * @return A list of parsed parts with lazy body references. + * @throws MultipartParseException if the body is malformed. + */ + fun parseChunked( + source: RequestBody.FileBacked, + boundary: String + ): List { + val delimiter = "--$boundary".toByteArray(Charsets.UTF_8) + val crlfcrlf = "\r\n\r\n".toByteArray(Charsets.UTF_8) + + val file = source.file!! + val bodyStart = source.fileOffset + val bodyLength = source.size + val bodyEnd = bodyStart + bodyLength + + RandomAccessFile(file, "r").use { raf -> + // Phase 1: Scan for all boundary delimiter offsets using chunked reads. + // An overlap region (delimiter.size - 1 bytes) is carried between chunks + // so boundaries split across chunk boundaries are still found. + val overlapSize = delimiter.size - 1 + val delimiterOffsets = mutableListOf() + var position = bodyStart + var carryOver = ByteArray(0) + + while (position < bodyEnd) { + val readSize = minOf(SCAN_CHUNK_SIZE.toLong(), bodyEnd - position).toInt() + raf.seek(position) + val chunk = ByteArray(readSize) + val bytesRead = raf.read(chunk) + if (bytesRead <= 0) break + val actualChunk = if (bytesRead < readSize) chunk.copyOf(bytesRead) else chunk + + val searchBuffer = if (carryOver.isEmpty()) { + actualChunk + } else { + ByteArray(carryOver.size + actualChunk.size).also { + carryOver.copyInto(it) + actualChunk.copyInto(it, carryOver.size) + } + } + val searchBytes = ReadOnlyBytes(searchBuffer) + + var searchOffset = 0 + while (true) { + val idx = searchBytes.indexOf(delimiter, searchOffset) + if (idx == -1) break + val absoluteOffset = position - carryOver.size + idx + if (absoluteOffset >= bodyStart && absoluteOffset + delimiter.size <= bodyEnd) { + delimiterOffsets.add(absoluteOffset) + } + searchOffset = idx + 1 + } + + carryOver = if (actualChunk.size > overlapSize) { + actualChunk.copyOfRange(actualChunk.size - overlapSize, actualChunk.size) + } else { + actualChunk.copyOf() + } + position += actualChunk.size + } + + if (delimiterOffsets.isEmpty()) { + throw MultipartParseException(MultipartParseError.MALFORMED_BODY) + } + + // Phase 2: Extract parts from consecutive delimiter pairs. + val parts = mutableListOf() + val maxPartHeaderSize = 8192 + + for (i in delimiterOffsets.indices) { + val delimStart = delimiterOffsets[i] + val afterDelim = delimStart + delimiter.size + + // Check if this is the close delimiter ("--boundary--"). + if (afterDelim + 2 <= bodyEnd) { + raf.seek(afterDelim) + val b1 = raf.read() + val b2 = raf.read() + if (b1 == '-'.code && b2 == '-'.code) { + break + } + } else { + break + } + + if (i + 1 >= delimiterOffsets.size) { + throw MultipartParseException(MultipartParseError.MALFORMED_BODY) + } + val nextDelimStart = delimiterOffsets[i + 1] + + // Read the region between this delimiter and the next to extract headers. + val regionLength = minOf(maxPartHeaderSize.toLong(), nextDelimStart - afterDelim).toInt() + raf.seek(afterDelim) + val headerRegion = ByteArray(regionLength) + val headerBytesRead = raf.read(headerRegion, 0, regionLength) + if (headerBytesRead <= 0) { + throw MultipartParseException(MultipartParseError.MALFORMED_BODY) + } + val actualHeaderRegion = if (headerBytesRead < regionLength) { + headerRegion.copyOf(headerBytesRead) + } else { + headerRegion + } + + // Skip optional transport padding (LWSP) after boundary. + var scanPos = 0 + while (scanPos < actualHeaderRegion.size && + (actualHeaderRegion[scanPos] == ' '.code.toByte() || + actualHeaderRegion[scanPos] == '\t'.code.toByte()) + ) { + scanPos++ + } + + // Skip CRLF after the delimiter line. + if (scanPos + 1 < actualHeaderRegion.size && + actualHeaderRegion[scanPos] == 0x0D.toByte() && + actualHeaderRegion[scanPos + 1] == 0x0A.toByte() + ) { + scanPos += 2 + } + + // Find the \r\n\r\n header/body separator. + val headerSearch = ReadOnlyBytes(actualHeaderRegion) + val headerEndIdx = headerSearch.indexOf(crlfcrlf, scanPos) + if (headerEndIdx == -1) { + throw MultipartParseException(MultipartParseError.MALFORMED_BODY) + } + + val headerData = actualHeaderRegion.copyOfRange(scanPos, headerEndIdx) + val partBodyStart = afterDelim + (headerEndIdx + crlfcrlf.size) + + // Body ends at the CRLF before the next delimiter. + var partBodyEnd = nextDelimStart + if (partBodyEnd >= partBodyStart + 2) { + raf.seek(nextDelimStart - 2) + val cr = raf.read() + val lf = raf.read() + if (cr == 0x0D && lf == 0x0A) { + partBodyEnd = nextDelimStart - 2 + } + } + + val partBodyLength = maxOf(0L, partBodyEnd - partBodyStart) + val partBody = RequestBody.FileBacked( + file = file, + fileOffset = partBodyStart, + size = partBodyLength, + fileOwner = source.fileOwner + ) + + val part = parsePartHeaders(headerData, partBody) + parts.add(part) + + if (parts.size > 100) { + throw MultipartParseException(MultipartParseError.TOO_MANY_PARTS) + } + } + + if (parts.isEmpty()) { + throw MultipartParseException(MultipartParseError.MALFORMED_BODY) + } + + return parts + } + } + + /** + * Creates a [RequestBody] for a part. + * + * For file-backed sources, returns a file-slice reference (no copy). + * For in-memory sources, creates a copy via [ByteArray.copyOfRange]. + * Unlike Swift's copy-on-write Data slicing, Kotlin's ByteArray has no + * COW semantics — but this path only triggers for bodies below the + * [HTTPRequestParser.DEFAULT_IN_MEMORY_BODY_THRESHOLD] (512 KB), so + * the copy is negligible. Large bodies are file-backed and use slices. + */ + private fun makePartBody( + source: RequestBody, + bodyData: ReadOnlyBytes, + partOffset: Int, + partLength: Int, + bodyFileOffset: Long + ): RequestBody { + return when (source) { + is RequestBody.FileBacked -> RequestBody.FileBacked( + file = source.file, + fileOffset = bodyFileOffset + partOffset, + size = partLength.toLong(), + fileOwner = source.fileOwner + ) + is RequestBody.InMemory -> { + val end = partOffset + partLength + RequestBody.InMemory(bodyData.copyOfRange(partOffset, end)) + } + } + } + + /** + * Parses a single part's headers into a [MultipartPart]. + */ + private fun parsePartHeaders(headerData: ByteArray, body: RequestBody): MultipartPart { + val headerString = try { + headerData.toString(Charsets.UTF_8) + } catch (_: Exception) { + throw MultipartParseException(MultipartParseError.MISSING_CONTENT_DISPOSITION) + } + + val lines = headerString.split("\r\n") + + var contentDisposition: String? = null + var contentType: String? = null + + for (line in lines) { + if (line.isEmpty()) continue + val colonIndex = line.indexOf(':') + if (colonIndex == -1) continue + val key = line.substring(0, colonIndex).trim() + val value = line.substring(colonIndex + 1).trim() + + when (key.lowercase()) { + "content-disposition" -> contentDisposition = value + "content-type" -> contentType = value + } + } + + val disposition = contentDisposition + ?: throw MultipartParseException(MultipartParseError.MISSING_CONTENT_DISPOSITION) + if (!disposition.lowercase().startsWith("form-data")) { + throw MultipartParseException(MultipartParseError.MISSING_CONTENT_DISPOSITION) + } + + val name = HeaderValue.extractParameter("name", disposition) + ?: throw MultipartParseException(MultipartParseError.MISSING_NAME_PARAMETER) + + val filename = HeaderValue.extractParameter("filename", disposition) + + return MultipartPart( + name = name, + filename = filename, + contentType = contentType ?: "text/plain", + body = body + ) + } + + } +} diff --git a/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/ParsedHTTPRequest.kt b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/ParsedHTTPRequest.kt new file mode 100644 index 000000000..2231fc129 --- /dev/null +++ b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/ParsedHTTPRequest.kt @@ -0,0 +1,155 @@ +package org.wordpress.gutenberg.http + +/** + * A parsed HTTP/1.1 request, either partial (headers only) or complete (headers and body). + * + * Contains the method, target, HTTP version, headers, and optional body. + * When [isComplete] is false, the body is still pending and will be null. + * + * The body, if present, is accessible via [RequestBody] which provides + * stream-based access regardless of backing storage (in-memory or file). + */ +class ParsedHTTPRequest( + /** The HTTP method (e.g., "GET", "POST"). */ + val method: String, + /** The request-target from the HTTP request line (e.g., "/wp/v2/posts?per_page=10"). */ + val target: String, + /** The HTTP-version from the request line (e.g., "HTTP/1.1"). */ + val httpVersion: String, + /** + * The raw HTTP headers map. Header names preserve their original casing, + * which makes map lookups case-sensitive. Use [header] for safe + * case-insensitive lookup, or [allHeaders] for iteration. + */ + internal val headers: Map, + /** The request body, or null if there is no body or the request is partial. */ + val body: RequestBody?, + /** Whether all data has been received. */ + val isComplete: Boolean +) { + /** The number of headers in the request. */ + val headerCount: Int get() = headers.size + + /** All headers as a list of name-value pairs, suitable for iteration. */ + val allHeaders: List> get() = headers.map { it.key to it.value } + + /** + * Returns the value of the first header matching the given name (case-insensitive). + */ + fun header(name: String): String? { + val lowered = name.lowercase() + return headers.entries.firstOrNull { it.key.lowercase() == lowered }?.value + } + + /** + * Returns the headers suitable for forwarding to an upstream server. + * + * Strips RFC 9110 §7.6.1 hop-by-hop headers, any headers listed in the + * `Connection` header, and the `Proxy-Authorization` header (which carries + * the proxy's own bearer token per RFC 9110 §11.7.1 and must not be + * forwarded upstream). The `Authorization` header is intentionally kept + * so that the client's own credentials (e.g. HTTP Basic) pass through. + */ + fun forwardingHeaders(): Map { + val hopByHop = mutableSetOf( + "host", "connection", "transfer-encoding", "keep-alive", + "proxy-connection", "te", "upgrade", "trailer", + "proxy-authorization", + ) + + // Headers listed in Connection are also hop-by-hop (RFC 9110 §7.6.1). + header("Connection")?.split(",")?.forEach { name -> + hopByHop.add(name.trim().lowercase()) + } + + return headers.filterKeys { it.lowercase() !in hopByHop } + } + + /** + * Parses the body as `multipart/form-data` and returns the individual parts. + * + * Extracts the boundary from the `Content-Type` header automatically. + * Part bodies are lazy references back to the original request body — no + * part data is copied during parsing for file-backed bodies. + * + * @throws MultipartParseException if the Content-Type is not `multipart/form-data`, + * the body is missing, or the multipart structure is malformed. + */ + fun multipartParts(): List { + val contentType = header("Content-Type") + ?: throw MultipartParseException(MultipartParseError.NOT_MULTIPART_FORM_DATA) + val boundary = extractBoundary(contentType) + ?: throw MultipartParseException(MultipartParseError.NOT_MULTIPART_FORM_DATA) + val bodyData = body + ?: throw MultipartParseException(MultipartParseError.MISSING_BODY) + + val inMemory = bodyData.inMemoryData + return if (inMemory != null) { + // In-memory: scan the data directly (already in memory, no extra allocation). + MultipartPart.parse( + source = bodyData, + bodyData = inMemory, + bodyFileOffset = 0L, + boundary = boundary + ) + } else { + // File-backed: scan in fixed-size chunks to avoid loading the entire + // body into memory. Memory usage is O(chunk_size) regardless of body size. + MultipartPart.parseChunked( + source = bodyData as RequestBody.FileBacked, + boundary = boundary + ) + } + } + + override fun equals(other: Any?): Boolean { + if (this === other) return true + if (other !is ParsedHTTPRequest) return false + return method == other.method && + target == other.target && + httpVersion == other.httpVersion && + headers == other.headers && + body == other.body && + isComplete == other.isComplete + } + + override fun hashCode(): Int { + var result = method.hashCode() + result = 31 * result + target.hashCode() + result = 31 * result + httpVersion.hashCode() + result = 31 * result + headers.hashCode() + result = 31 * result + (body?.hashCode() ?: 0) + result = 31 * result + isComplete.hashCode() + return result + } + + companion object { + /** + * Extracts the boundary parameter from a `multipart/form-data` Content-Type value. + */ + private fun extractBoundary(contentType: String): String? { + if (!contentType.lowercase().startsWith("multipart/form-data")) return null + val boundary = HeaderValue.extractParameter("boundary", contentType) ?: return null + if (boundary.isEmpty() || boundary.length > 70) return null + // RFC 2046 §5.1.1: boundary characters must be from the bchars set. + if (!boundary.all { isBoundaryChar(it) }) return null + // RFC 2046 §5.1.1: space cannot be the last character. + if (boundary.endsWith(" ")) return null + return boundary + } + + /** + * Returns whether a character is valid in a MIME boundary (RFC 2046 §5.1.1 bchars). + */ + private fun isBoundaryChar(c: Char): Boolean { + val ascii = c.code + return when { + ascii in 0x41..0x5A -> true // A-Z + ascii in 0x61..0x7A -> true // a-z + ascii in 0x30..0x39 -> true // 0-9 + c in "'()+_,-./:=? " -> true + else -> false + } + } + } +} diff --git a/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/README.md b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/README.md new file mode 100644 index 000000000..f18886d8f --- /dev/null +++ b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/README.md @@ -0,0 +1,160 @@ +# Kotlin HTTP Parser + +A zero-dependency pure-Kotlin module providing an HTTP/1.1 request parser. Designed for use as the front-end of an in-process HTTP proxy server, where raw bytes arrive over a socket and need to be converted into structured request objects suitable for forwarding via `HttpURLConnection`. + +## Why + +GutenbergKit's Android integration uses an in-process HTTP server to bridge requests between the embedded web editor and native networking. This module handles the parsing side of that bridge — turning raw TCP bytes into structured request objects — without pulling in a full HTTP server framework. + +Key design goals: + +- **Incremental parsing** — data can arrive in arbitrary chunks (byte-by-byte if needed); the parser buffers to disk so memory usage stays flat regardless of body size. +- **Lazy validation** — `append()` does only lightweight scanning (finding `\r\n\r\n` and extracting `Content-Length`). Full RFC validation is deferred to `parseRequest()`, keeping the hot path fast. +- **Strict conformance** — rejects request smuggling vectors (obs-fold, whitespace before colon), validates `Content-Length` per RFC 9110 §8.6, and combines duplicate headers per RFC 9110 §5.3. +- **No dependencies** — uses only the Kotlin and Java standard libraries. + +## Types + +| Type | Role | +|------|------| +| `HTTPRequestParser` | Incremental, stateful parser. Feed it bytes with `append()`, check `state`, then call `parseRequest()`. | +| `HTTPRequestSerializer` | Stateless header parser. Call `parseHeaders()` with a complete `ByteArray` buffer. | +| `ParsedHTTPRequest` | The result — a data class with `isComplete` indicating whether the body has fully arrived. | +| `RequestBody` | Abstracts body storage (in-memory or file-backed). Provides `size` (O(1) byte count), `readBytes()`, and `inputStream()`. | +| `MultipartPart` | A parsed multipart/form-data part with `name`, `filename`, `contentType`, and `body`. | +| `HTTPRequestParseError` | Error enum covering all rejection reasons, with `errorId` strings matching the Swift implementation. | + +The `HttpServer` class in the parent package (`org.wordpress.gutenberg`) provides a complete local HTTP/1.1 server built on top of this parser. + +## Usage + +### One-shot parsing + +Use this when the full HTTP request is already in memory (e.g., from a test fixture or a buffered read). Pass the raw string to the parser's convenience constructor, then call `parseRequest()` to get a `ParsedHTTPRequest`. + +```kotlin +import org.wordpress.gutenberg.http.HTTPRequestParser + +val raw = "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Length: 13\r\n\r\n{\"title\":\"Hi\"}" +val parser = HTTPRequestParser(raw) + +val request = parser.parseRequest()!! +println(request.method) // "POST" +println(request.target) // "/wp/v2/posts" +println(request.header("Host")) // "localhost" +``` + +The `header()` method performs case-insensitive lookup per RFC 9110. You can also access the full `headers` map directly. + +### Incremental parsing + +Use this when data arrives in chunks from a socket. Call `append()` as bytes arrive — the parser buffers body data to a temporary file so memory stays flat even for large uploads. Check `state` to decide when to parse. + +```kotlin +val parser = HTTPRequestParser() + +// Feed data as it arrives +parser.append(firstChunk) +parser.append(secondChunk) + +when { + parser.state == State.NEEDS_MORE_DATA -> { + // Keep reading from the socket + } + parser.state == State.HEADERS_COMPLETE -> { + // Headers are available but body is still arriving + val partial = parser.parseRequest()!! + println("${partial.method} ${partial.target}") + } + parser.state == State.COMPLETE -> { + // Everything received — parse and forward + val request = parser.parseRequest()!! + // ... + } +} +``` + +You can call `parseRequest()` in either `HEADERS_COMPLETE` or `COMPLETE` state. In `HEADERS_COMPLETE`, the returned `ParsedHTTPRequest` has `isComplete = false` (headers only, body still arriving). In `COMPLETE`, `isComplete = true` with the full body available via `request.body`. + +### Running a local server + +`HttpServer` uses a `ServerSocket` on a background thread. Set `externallyAccessible = true` to bind to `0.0.0.0` (for device-to-device testing), or `false` to bind to `127.0.0.1` only. Each request is parsed automatically and delivered to your handler as an `HttpRequest`, which includes `parseDurationMs` for diagnostics. The server generates a random `token` that clients must include as `Proxy-Authorization: Bearer ` (per RFC 9110 §11.7.1). Using `Proxy-Authorization` instead of `Authorization` keeps the client's `Authorization` header available for upstream credentials (e.g. HTTP Basic auth to the remote server). + +```kotlin +import org.wordpress.gutenberg.HttpServer +import org.wordpress.gutenberg.HttpResponse + +val server = HttpServer( + name = "my-server", + externallyAccessible = true, + handler = { request -> + println("${request.method} ${request.target} (${"%.2f".format(request.parseDurationMs)}ms)") + HttpResponse(body = "OK\n".toByteArray()) + } +) +server.start() +println("Listening on port ${server.port}, token: ${server.token}") +// ... later ... +server.stop() +``` + +### Multipart parsing + +For `multipart/form-data` requests (e.g., media uploads), call `multipartParts()` on a parsed request. The boundary is extracted automatically from the `Content-Type` header. Each `MultipartPart` gives you the field `name`, optional `filename` and `contentType`, and a `body` backed by the same `RequestBody` abstraction (in-memory or file-backed). + +```kotlin +val request = parser.parseRequest()!! +val parts = request.multipartParts() + +for (part in parts) { + println("${part.name}: ${String(part.body.readBytes())}") + if (part.filename != null) { + println(" filename: ${part.filename}, contentType: ${part.contentType}") + } +} +``` + +### Error handling + +`parseRequest()` throws `HTTPRequestParseException` for malformed input. Each error case maps to a specific RFC violation or safety check. The `errorId` strings match the Swift implementation for cross-platform consistency, and each error carries an `httpStatus` code suitable for responding to the client. + +```kotlin +try { + val request = parser.parseRequest() +} catch (e: HTTPRequestParseException) { + when (e.error) { + HTTPRequestParseError.EMPTY_HEADER_SECTION -> // No request line before \r\n\r\n + HTTPRequestParseError.MALFORMED_REQUEST_LINE -> // Missing method or target + HTTPRequestParseError.OBS_FOLD_DETECTED -> // Continuation line (rejected per RFC 7230 §3.2.4) + HTTPRequestParseError.WHITESPACE_BEFORE_COLON -> // Space or tab between field-name and colon (RFC 7230 §3.2.4) + HTTPRequestParseError.INVALID_CONTENT_LENGTH -> // Non-numeric or negative Content-Length + HTTPRequestParseError.CONFLICTING_CONTENT_LENGTH -> // Multiple Content-Length headers disagree + HTTPRequestParseError.UNSUPPORTED_TRANSFER_ENCODING -> // Transfer-Encoding not supported + HTTPRequestParseError.INVALID_HTTP_VERSION -> // Unrecognized HTTP version + HTTPRequestParseError.INVALID_FIELD_NAME -> // Invalid characters in header field name + HTTPRequestParseError.INVALID_FIELD_VALUE -> // Invalid characters in header field value + HTTPRequestParseError.MISSING_HOST_HEADER -> // HTTP/1.1 requires Host + HTTPRequestParseError.MULTIPLE_HOST_HEADERS -> // Duplicate Host headers + HTTPRequestParseError.PAYLOAD_TOO_LARGE -> // Body exceeds maxBodySize (HTTP 413) + HTTPRequestParseError.HEADERS_TOO_LARGE -> // Headers exceed limit (HTTP 431) + HTTPRequestParseError.INVALID_ENCODING -> // Headers aren't valid UTF-8 + } + + // All cases provide an HTTP status code: + println("HTTP ${e.error.httpStatus}") +} +``` + +You can also limit the maximum body size by passing `maxBodySize` to the parser constructor — requests exceeding the limit throw `PAYLOAD_TOO_LARGE`. + +## RFC Conformance + +The parser enforces or documents behavior for the following: + +- **RFC 7230 §3.2.4** — Rejects obs-fold (continuation lines) and whitespace before colon in field names. +- **RFC 7230 §3.3.3** — Rejects conflicting `Content-Length` values across multiple headers. +- **RFC 9110 §5.3** — Combines duplicate header field lines with comma-separated values. +- **RFC 9110 §8.6** — Validates `Content-Length` values including comma-separated lists of identical values (e.g., `5, 5`). +- **RFC 9112 §3** — Parses the request line into method, target, and optional HTTP version. + +Conformance is verified by shared cross-platform JSON test fixtures (also used by the Swift test suite) plus Kotlin-specific unit tests. diff --git a/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/RequestBody.kt b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/RequestBody.kt new file mode 100644 index 000000000..10f9daa50 --- /dev/null +++ b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/http/RequestBody.kt @@ -0,0 +1,397 @@ +package org.wordpress.gutenberg.http + +import java.io.ByteArrayInputStream +import java.io.Closeable +import java.io.File +import java.io.InputStream +import java.io.RandomAccessFile + +/** + * An immutable, read-only view over a [ByteArray]. + * + * This inline value class prevents callers from mutating the backing array + * through the type system, without the overhead of a defensive copy. + * At runtime the wrapper is erased — method calls compile to static + * functions that operate directly on the underlying `ByteArray`. + */ +@JvmInline +value class ReadOnlyBytes(private val backing: ByteArray) { + /** The number of bytes. */ + val size: Int get() = backing.size + + /** Returns the byte at the given [index]. */ + operator fun get(index: Int): Byte = backing[index] + + /** Returns a mutable copy of the underlying data. */ + fun copyOf(): ByteArray = backing.copyOf() + + /** Returns a mutable copy of the specified range. */ + fun copyOfRange(fromIndex: Int, toIndex: Int): ByteArray = + backing.copyOfRange(fromIndex, toIndex) + + /** Creates an [InputStream] that reads from the backing data. */ + fun inputStream(): InputStream = ByteArrayInputStream(backing) + + /** + * Finds the first occurrence of [pattern] starting from [fromIndex]. + * Returns -1 if not found. + * + * Uses a naive O(n·m) scan. This is sufficient because it is only used + * for finding the header terminator (`\r\n\r\n`) within the first 64 KB + * of a request — the pattern is 4 bytes and the data is capped by the + * header size limit, so KMP/Boyer-Moore overhead is not justified. + */ + fun indexOf(pattern: ByteArray, fromIndex: Int = 0): Int { + if (pattern.isEmpty()) return fromIndex + val limit = size - pattern.size + if (fromIndex > limit) return -1 + outer@ for (i in fromIndex..limit) { + for (j in pattern.indices) { + if (backing[i + j] != pattern[j]) continue@outer + } + return i + } + return -1 + } + + /** Returns whether the bytes at [offset] match [pattern]. */ + fun startsWith(pattern: ByteArray, offset: Int): Boolean { + if (offset + pattern.size > size) return false + for (i in pattern.indices) { + if (backing[offset + i] != pattern[i]) return false + } + return true + } +} + +/** + * Owner for a temporary file, responsible for deleting it when no longer needed. + * + * Multiple [RequestBody] instances may share the same owner — for example, when + * multipart parsing creates part bodies that reference byte ranges within the + * same backing file. Because of this shared ownership, individual RequestBody + * consumers should NOT close the owner; instead, the server's connection handler + * calls [close] once the entire request (including all derived parts) is done. + * + * Active files are tracked in a companion registry so that orphaned temp files — + * left behind by a crash or process kill — can be cleaned up on the next server + * start via [cleanOrphans]. + * + * ### Why not finalize() or deleteOnExit()? + * + * Both are unreliable on Android: + * - `finalize()` is deprecated and ART may skip finalizer execution entirely + * under memory pressure (Android 12+). + * - `deleteOnExit()` only fires on clean JVM shutdown. Android apps are killed + * by the OS, not shut down gracefully, so the hook effectively never runs. + * + * Instead, cleanup relies on two mechanisms: + * 1. **Explicit [close]** at the connection boundary (primary path). + * 2. **[cleanOrphans]** at server start (safety net for crashes). + */ +internal class TempFileOwner(val file: File) : Closeable { + init { + activeFiles.add(file.absolutePath) + } + + override fun close() { + activeFiles.remove(file.absolutePath) + file.delete() + } + + companion object { + /** The default subdirectory name used for temp files under the injected cache dir. */ + const val DEFAULT_TEMP_SUBDIR = "gutenberg-http" + + /** Paths of temp files currently owned by a live [TempFileOwner]. */ + private val activeFiles = java.util.concurrent.ConcurrentHashMap.newKeySet() + + /** + * Deletes orphaned temp files in the [cacheDir]/[tempSubdir] directory. + * + * A file is considered orphaned if it is not tracked by any active + * [TempFileOwner]. Call this at a safe point when no requests are + * in-flight (e.g., server start). + * + * **Important:** Two server instances with the same `name` must not run + * concurrently. On startup, this method deletes all files in the server's + * temp subdirectory that are not tracked by an active [TempFileOwner]. If + * another instance with the same name is still handling requests, its + * in-flight temp files may be removed, causing I/O failures. Callers must + * ensure each running server uses a unique name, or that the previous + * instance is fully stopped before starting a new one. + * + * @param cacheDir The base cache directory. + * @param tempSubdir The subdirectory name for temp files. When used via + * [HttpServer][org.wordpress.gutenberg.HttpServer], this is scoped by + * the server's `name` to prevent interference between concurrent instances. + */ + fun cleanOrphans(cacheDir: File, tempSubdir: String = DEFAULT_TEMP_SUBDIR) { + val dir = File(cacheDir, tempSubdir) + if (!dir.isDirectory) return + dir.listFiles()?.forEach { file -> + if (!activeFiles.contains(file.absolutePath)) { + file.delete() + } + } + } + } +} + +/** + * An HTTP request body with stream semantics. + * + * `RequestBody` abstracts over the underlying storage (in-memory data or a file on disk) + * and provides uniform access regardless of backing: + * - **Stream access**: Use [inputStream] to read without loading everything into memory. + * - **Materialized access**: Use [readBytes] to get the full contents. + */ +sealed class RequestBody { + + /** The number of bytes in the body. */ + abstract val size: Long + + /** + * Creates an [InputStream] for reading the body contents. + * + * The caller is responsible for closing the returned stream. + */ + abstract fun inputStream(): InputStream + + /** Reads the full body contents into a byte array. */ + abstract fun readBytes(): ByteArray + + /** + * Reads the full body contents and returns a read-only view along with the + * file offset at which the data begins (0 for in-memory bodies). + * + * This is used internally for multipart boundary scanning. + */ + internal abstract fun readAllData(): Pair + + /** A read-only view of the in-memory data backing this body, or `null` for file-backed bodies. */ + open val inMemoryData: ReadOnlyBytes? get() = null + + /** The file backing this body, or `null` for in-memory bodies. */ + open val file: File? get() = null + + /** The byte offset within the backing file where this body begins (0 for in-memory). */ + open val fileOffset: Long get() = 0 + + /** The temp file owner, if any. Used to propagate ownership to derived bodies. */ + internal open val fileOwner: TempFileOwner? get() = null + + /** + * A body backed by in-memory data. + */ + class InMemory(val data: ByteArray) : RequestBody() { + override val size: Long get() = data.size.toLong() + override val inMemoryData: ReadOnlyBytes get() = ReadOnlyBytes(data) + + override fun inputStream(): InputStream = + java.io.ByteArrayInputStream(data) + + override fun readBytes(): ByteArray = data.copyOf() + + override fun readAllData(): Pair = ReadOnlyBytes(data) to 0L + + override fun equals(other: Any?): Boolean { + if (this === other) return true + return other is InMemory && data.contentEquals(other.data) + } + + override fun hashCode(): Int = data.contentHashCode() + } + + /** + * A body backed by a byte range within a file on disk. + * + * Bytes are not read until [inputStream] or [readBytes] is called, + * keeping the representation lightweight for uses like multipart part bodies. + */ + class FileBacked internal constructor( + override val file: File, + override val fileOffset: Long = 0, + override val size: Long, + override val fileOwner: TempFileOwner? = null + ) : RequestBody() { + + override fun inputStream(): InputStream { + return object : InputStream() { + private val raf = RandomAccessFile(file, "r").also { it.seek(fileOffset) } + private var remaining = size + private var closed = false + + override fun read(): Int { + if (remaining <= 0) return -1 + remaining-- + return raf.read() + } + + override fun read(b: ByteArray, off: Int, len: Int): Int { + if (remaining <= 0) return -1 + val toRead = minOf(len.toLong(), remaining).toInt() + val n = raf.read(b, off, toRead) + if (n > 0) remaining -= n + return n + } + + override fun close() { + if (!closed) { + closed = true + raf.close() + } + } + } + } + + override fun readBytes(): ByteArray { + require(size <= Int.MAX_VALUE) { "Body too large to read into memory: $size bytes" } + val intSize = size.toInt() + RandomAccessFile(file, "r").use { raf -> + raf.seek(fileOffset) + val buf = ByteArray(intSize) + var pos = 0 + while (pos < intSize) { + val n = raf.read(buf, pos, intSize - pos) + if (n == -1) break + pos += n + } + return if (pos == intSize) buf else buf.copyOf(pos) + } + } + + override fun readAllData(): Pair = ReadOnlyBytes(readBytes()) to fileOffset + + override fun equals(other: Any?): Boolean { + if (this === other) return true + return other is FileBacked && + file == other.file && + fileOffset == other.fileOffset && + size == other.size + } + + override fun hashCode(): Int { + var result = file.hashCode() + result = 31 * result + fileOffset.hashCode() + result = 31 * result + size.hashCode() + return result + } + } +} + +/** + * Abstraction over the parser's backing store. + * + * Tries to use a temp file on disk (suitable for large bodies). If the file + * cannot be created, falls back to an in-memory buffer automatically. + * + * When memory-backed, the buffer is capped at [maxSize] to prevent unbounded + * growth. The file-backed path has no cap — body size enforcement is handled + * by the parser via `Content-Length` and `maxBodySize`. + * + * @param maxSize Maximum bytes allowed in the in-memory fallback buffer. + * Ignored when the buffer is file-backed. + * @param cacheDir Optional directory for temp files (e.g., from `Context.getCacheDir()`). + * Files are created in a [tempSubdir] subdirectory. When `null`, + * falls back to the system temp directory. + * @param tempSubdir The subdirectory name for temp files under [cacheDir]. + * Defaults to [TempFileOwner.DEFAULT_TEMP_SUBDIR]. + */ +internal class Buffer(maxSize: Int = Int.MAX_VALUE, cacheDir: File? = null, tempSubdir: String = TempFileOwner.DEFAULT_TEMP_SUBDIR) : Closeable { + private val maxSize = maxSize + private var file: File? = null + private var raf: RandomAccessFile? = null + private var memoryBuffer: java.io.ByteArrayOutputStream? = null + private var fileOwnershipTransferred = false + + init { + try { + val tempDir = if (cacheDir != null) { + File(cacheDir, tempSubdir).also { it.mkdirs() } + } else { + null + } + val tempFile = File.createTempFile("GutenbergKitHTTP-", null, tempDir) + // No deleteOnExit() — on Android, apps are killed by the OS rather + // than shut down gracefully, so the JVM shutdown hook never fires. + // Cleanup is handled by TempFileOwner.close() and cleanOrphans(). + val handle = RandomAccessFile(tempFile, "rw") + file = tempFile + raf = handle + } catch (_: Exception) { + // Temp file unavailable — buffer in memory instead. + memoryBuffer = java.io.ByteArrayOutputStream() + } + } + + /** + * Appends data to the buffer. + * + * @return `true` if the data was accepted, `false` if the in-memory + * buffer would exceed its size limit. + */ + fun append(data: ByteArray): Boolean { + val r = raf + if (r != null) { + r.seek(r.length()) + r.write(data) + return true + } else { + val buf = memoryBuffer!! + if (buf.size() + data.size > maxSize) { + return false + } + buf.write(data) + return true + } + } + + fun read(offset: Int, maxLength: Int): ByteArray { + require(offset >= 0) { "offset must be non-negative, was $offset" } + require(maxLength >= 0) { "maxLength must be non-negative, was $maxLength" } + if (maxLength == 0) return ByteArray(0) + val r = raf + if (r != null) { + r.seek(offset.toLong()) + val buf = ByteArray(maxLength) + var pos = 0 + while (pos < maxLength) { + val n = r.read(buf, pos, maxLength - pos) + if (n <= 0) break + pos += n + } + return if (pos == maxLength) buf else buf.copyOf(pos) + } else { + val bytes = memoryBuffer!!.toByteArray() + val start = offset.coerceAtMost(bytes.size) + // Use Long arithmetic to avoid Int overflow when offset + maxLength > Int.MAX_VALUE + // (possible with large media files like videos). + val end = (offset.toLong() + maxLength.toLong()).coerceAtMost(bytes.size.toLong()).toInt() + return bytes.copyOfRange(start, end) + } + } + + /** + * Transfers ownership of the backing file to a [TempFileOwner]. + * + * After this call, the buffer will no longer delete the file on [close]. + * Returns `null` if the buffer is memory-backed or ownership was already transferred. + */ + fun transferFileOwnership(): Pair? { + val f = file ?: return null + if (fileOwnershipTransferred) return null + fileOwnershipTransferred = true + return f to TempFileOwner(f) + } + + /** Whether this buffer is backed by a file (vs. in-memory). */ + val isFileBacked: Boolean get() = raf != null + + override fun close() { + try { raf?.close() } catch (_: Exception) {} + if (!fileOwnershipTransferred) { + file?.delete() + } + } +} diff --git a/android/Gutenberg/src/test/java/org/wordpress/gutenberg/HttpResponseSerializationTest.kt b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/HttpResponseSerializationTest.kt new file mode 100644 index 000000000..c7b7bca82 --- /dev/null +++ b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/HttpResponseSerializationTest.kt @@ -0,0 +1,162 @@ +package org.wordpress.gutenberg + +import org.junit.Assert.assertEquals +import org.junit.Assert.assertFalse +import org.junit.Assert.assertTrue +import org.junit.Test + +class HttpResponseSerializationTest { + + @Test + fun `Content-Length always matches actual body size`() { + val response = HttpResponse( + headers = mapOf("Content-Type" to "text/plain"), + body = "hello".toByteArray() + ) + val serialized = String(HttpServer.serializeResponse(response)) + + assertTrue("Should contain Content-Length: 5", serialized.contains("Content-Length: 5\r\n")) + } + + @Test + fun `caller-provided Content-Length is replaced with actual body size`() { + val response = HttpResponse( + headers = mapOf("Content-Length" to "999", "Content-Type" to "text/plain"), + body = "hello".toByteArray() + ) + val serialized = String(HttpServer.serializeResponse(response)) + + assertFalse("Wrong Content-Length must not appear", serialized.contains("Content-Length: 999")) + assertTrue("Correct Content-Length must be present", serialized.contains("Content-Length: 5\r\n")) + } + + @Test + fun `case-insensitive Content-Length replacement`() { + val response = HttpResponse( + headers = mapOf("content-length" to "0", "Content-Type" to "text/plain"), + body = "test body".toByteArray() + ) + val serialized = String(HttpServer.serializeResponse(response)) + + assertFalse("Wrong content-length must not appear", serialized.contains("content-length: 0")) + assertTrue("Correct Content-Length must be present", serialized.contains("Content-Length: 9\r\n")) + } + + @Test + fun `Connection close is added when not present`() { + val response = HttpResponse(body = "ok".toByteArray()) + val serialized = String(HttpServer.serializeResponse(response)) + + assertTrue("Should contain Connection: close", serialized.contains("Connection: close\r\n")) + } + + @Test + fun `hop-by-hop Connection header is stripped and replaced with close`() { + val response = HttpResponse( + headers = mapOf("Connection" to "keep-alive"), + body = "ok".toByteArray() + ) + val serialized = String(HttpServer.serializeResponse(response)) + + assertFalse("Hop-by-hop Connection must be stripped", serialized.contains("Connection: keep-alive")) + assertTrue("Connection: close must be present", serialized.contains("Connection: close")) + } + + @Test + fun `hop-by-hop Transfer-Encoding header is stripped`() { + val response = HttpResponse( + headers = mapOf("Transfer-Encoding" to "chunked", "Content-Type" to "text/plain"), + body = "ok".toByteArray() + ) + val serialized = String(HttpServer.serializeResponse(response)) + + assertFalse("Transfer-Encoding must be stripped", serialized.contains("Transfer-Encoding")) + } + + @Test + fun `401 response with WWW-Authenticate header serializes correctly`() { + val response = HttpResponse( + status = 401, + headers = mapOf("WWW-Authenticate" to "Bearer") + ) + val serialized = String(HttpServer.serializeResponse(response)) + + assertTrue("Should start with 401 status line", serialized.startsWith("HTTP/1.1 401 Unauthorized\r\n")) + assertTrue("Should contain WWW-Authenticate header", serialized.contains("WWW-Authenticate: Bearer\r\n")) + } + + @Test + fun `header values are sanitized`() { + val response = HttpResponse( + headers = mapOf("X-Test" to "value\u0007bell"), + body = ByteArray(0) + ) + val serialized = String(HttpServer.serializeResponse(response)) + + assertFalse("BEL should be stripped", serialized.contains("\u0007")) + assertTrue("Cleaned value present", serialized.contains("X-Test: valuebell")) + } + + @Test + fun `sanitize preserves obs-text (0x80+) per RFC 9110`() { + val response = HttpResponse( + headers = mapOf("X-Test" to "caf\u00e9"), + body = ByteArray(0) + ) + val serialized = String(HttpServer.serializeResponse(response)) + + assertTrue("obs-text characters must be preserved", serialized.contains("X-Test: caf\u00e9")) + } + + @Test + fun `sanitize preserves HTAB in header values`() { + val response = HttpResponse( + headers = mapOf("X-Test" to "a\tb"), + body = ByteArray(0) + ) + val serialized = String(HttpServer.serializeResponse(response)) + + assertTrue("HTAB must be preserved", serialized.contains("X-Test: a\tb")) + } + + @Test + fun `Date header is present in RFC 9110 HTTP-date format`() { + val serialized = String(HttpServer.serializeResponse(HttpResponse())) + + val datePattern = Regex("""Date: \w{3}, \d{2} \w{3} \d{4} \d{2}:\d{2}:\d{2} GMT\r\n""") + assertTrue("Date header must be present in HTTP-date format", datePattern.containsMatchIn(serialized)) + } + + @Test + fun `Server header is present`() { + val serialized = String(HttpServer.serializeResponse(HttpResponse())) + + assertTrue("Server header must be present", serialized.contains("Server: GutenbergKit\r\n")) + } + + @Test + fun `caller-provided Date header is replaced`() { + val response = HttpResponse( + headers = mapOf("Date" to "Thu, 01 Jan 1970 00:00:00 GMT"), + body = ByteArray(0) + ) + val serialized = String(HttpServer.serializeResponse(response)) + + // The old date must not appear; a fresh one is generated + val dateCount = Regex("""Date:""").findAll(serialized).count() + assertEquals("Exactly one Date header", 1, dateCount) + assertFalse("Caller-provided Date must be replaced", serialized.contains("Date: Thu, 01 Jan 1970")) + } + + @Test + fun `caller-provided Server header is replaced`() { + val response = HttpResponse( + headers = mapOf("Server" to "Apache"), + body = ByteArray(0) + ) + val serialized = String(HttpServer.serializeResponse(response)) + + assertFalse("Caller-provided Server must be stripped", serialized.contains("Server: Apache")) + assertTrue("Server: GutenbergKit must be present", serialized.contains("Server: GutenbergKit")) + } +} diff --git a/android/Gutenberg/src/test/java/org/wordpress/gutenberg/HttpServerAuthenticationTests.kt b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/HttpServerAuthenticationTests.kt new file mode 100644 index 000000000..6fe93fa5d --- /dev/null +++ b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/HttpServerAuthenticationTests.kt @@ -0,0 +1,181 @@ +package org.wordpress.gutenberg + +import org.junit.After +import org.junit.Assert.assertEquals +import org.junit.Before +import org.junit.Test +import java.net.HttpURLConnection +import java.net.URL + +class HttpServerAuthenticationTests { + + private lateinit var server: HttpServer + + @Before + fun setUp() { + server = HttpServer( + name = "auth-test", + externallyAccessible = false, + requiresAuthentication = true, + handler = { HttpResponse(body = "OK\n".toByteArray()) } + ) + server.start() + } + + @After + fun tearDown() { + server.stop() + } + + @Test + fun `request without token returns 407 with Content-Type and Proxy-Authenticate`() { + val conn = URL("http://127.0.0.1:${server.port}/test").openConnection() as HttpURLConnection + try { + assertEquals(407, conn.responseCode) + assertEquals("text/plain", conn.getHeaderField("Content-Type")) + assertEquals("Bearer", conn.getHeaderField("Proxy-Authenticate")) + } finally { + conn.disconnect() + } + } + + @Test + fun `request with wrong token returns 407`() { + val conn = URL("http://127.0.0.1:${server.port}/test").openConnection() as HttpURLConnection + conn.setRequestProperty("Proxy-Authorization", "Bearer wrong-token") + try { + assertEquals(407, conn.responseCode) + assertEquals("Bearer", conn.getHeaderField("Proxy-Authenticate")) + } finally { + conn.disconnect() + } + } + + @Test + fun `request with valid token returns 200`() { + val conn = URL("http://127.0.0.1:${server.port}/test").openConnection() as HttpURLConnection + conn.setRequestProperty("Proxy-Authorization", "Bearer ${server.token}") + try { + assertEquals(200, conn.responseCode) + } finally { + conn.disconnect() + } + } + + @Test + fun `request with lowercase 'bearer' scheme returns 200`() { + val conn = URL("http://127.0.0.1:${server.port}/test").openConnection() as HttpURLConnection + conn.setRequestProperty("Proxy-Authorization", "bearer ${server.token}") + try { + assertEquals(200, conn.responseCode) + } finally { + conn.disconnect() + } + } + + @Test + fun `request with uppercase 'BEARER' scheme returns 200`() { + val conn = URL("http://127.0.0.1:${server.port}/test").openConnection() as HttpURLConnection + conn.setRequestProperty("Proxy-Authorization", "BEARER ${server.token}") + try { + assertEquals(200, conn.responseCode) + } finally { + conn.disconnect() + } + } + + @Test + fun `Authorization header passes through to handler alongside Proxy-Authorization`() { + var receivedAuth: String? = null + + server.stop() + val authServer = HttpServer( + name = "auth-test-passthrough", + externallyAccessible = false, + requiresAuthentication = true, + handler = { request -> + receivedAuth = request.header("Authorization") + HttpResponse(body = "OK\n".toByteArray()) + } + ) + authServer.start() + try { + val conn = URL("http://127.0.0.1:${authServer.port}/test").openConnection() as HttpURLConnection + conn.setRequestProperty("Proxy-Authorization", "Bearer ${authServer.token}") + conn.setRequestProperty("Authorization", "Basic dXNlcjpwYXNz") + try { + assertEquals(200, conn.responseCode) + assertEquals("Basic dXNlcjpwYXNz", receivedAuth) + } finally { + conn.disconnect() + } + } finally { + authServer.stop() + } + } + + // Content-Length Requirement + + @Test + fun `POST without Content-Length returns 411`() { + // HttpURLConnection always adds Content-Length, so use a raw socket. + java.net.Socket("127.0.0.1", server.port).use { sock -> + val raw = "POST /test HTTP/1.1\r\nHost: 127.0.0.1\r\nProxy-Authorization: Bearer ${server.token}\r\n\r\n" + sock.getOutputStream().write(raw.toByteArray()) + sock.getOutputStream().flush() + val statusLine = sock.getInputStream().bufferedReader().readLine() + assertEquals("HTTP/1.1 411 Length Required", statusLine) + } + } + + @Test + fun `GET without Content-Length returns 200`() { + val conn = URL("http://127.0.0.1:${server.port}/test").openConnection() as HttpURLConnection + conn.setRequestProperty("Proxy-Authorization", "Bearer ${server.token}") + try { + assertEquals(200, conn.responseCode) + } finally { + conn.disconnect() + } + } + + @Test + fun `POST with Content-Length returns 200`() { + val conn = URL("http://127.0.0.1:${server.port}/test").openConnection() as HttpURLConnection + conn.setRequestProperty("Proxy-Authorization", "Bearer ${server.token}") + conn.requestMethod = "POST" + conn.doOutput = true + conn.outputStream.write("hello".toByteArray()) + conn.outputStream.flush() + try { + assertEquals(200, conn.responseCode) + } finally { + conn.disconnect() + } + } + + // Auth Disabled + + @Test + fun `authentication disabled passes through without token`() { + server.stop() + + val noAuthServer = HttpServer( + name = "auth-test-no-auth", + externallyAccessible = false, + requiresAuthentication = false, + handler = { HttpResponse(body = "OK\n".toByteArray()) } + ) + noAuthServer.start() + try { + val conn = URL("http://127.0.0.1:${noAuthServer.port}/test").openConnection() as HttpURLConnection + try { + assertEquals(200, conn.responseCode) + } finally { + conn.disconnect() + } + } finally { + noAuthServer.stop() + } + } +} diff --git a/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/BufferTests.kt b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/BufferTests.kt new file mode 100644 index 000000000..dd8dfd679 --- /dev/null +++ b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/BufferTests.kt @@ -0,0 +1,114 @@ +package org.wordpress.gutenberg.http + +import org.junit.Assert.assertArrayEquals +import org.junit.Assert.assertEquals +import org.junit.Assert.assertFalse +import org.junit.Assert.assertTrue +import org.junit.Test + +class BufferTests { + + @Test + fun `memory-backed append rejects data that would exceed maxSize`() { + // Use a nonexistent cacheDir to force the memory fallback path. + val bogusDir = java.io.File("/nonexistent-dir-that-will-never-exist") + val buffer = Buffer(maxSize = 10, cacheDir = bogusDir) + + assertTrue(buffer.append("hello".toByteArray())) // 5 bytes, under limit + assertTrue(buffer.append("world".toByteArray())) // 10 bytes, at limit + assertFalse(buffer.append("!".toByteArray())) // 11 bytes, rejected + + // Buffer still contains the data that was accepted. + assertArrayEquals("helloworld".toByteArray(), buffer.read(0, 10)) + buffer.close() + } + + @Test + fun `transferFileOwnership creates TempFileOwner and close deletes the file`() { + val buffer = Buffer() + buffer.append("hello".toByteArray()) + val (file, owner) = buffer.transferFileOwnership()!! + buffer.close() + + assertTrue("File should exist after buffer close when ownership was transferred", file.exists()) + owner.close() + assertFalse("File should be deleted after TempFileOwner.close()", file.exists()) + } + + @Test + fun `cleanOrphans deletes files not tracked by a live TempFileOwner`() { + val cacheDir = kotlin.io.path.createTempDirectory("gutenberg-test-").toFile() + val subDir = java.io.File(cacheDir, TempFileOwner.DEFAULT_TEMP_SUBDIR).also { it.mkdirs() } + val orphan = java.io.File.createTempFile("GutenbergKitHTTP-", null, subDir) + assertTrue(orphan.exists()) + + TempFileOwner.cleanOrphans(cacheDir) + assertFalse("Orphaned file should be deleted", orphan.exists()) + + cacheDir.deleteRecursively() + } + + @Test + fun `cleanOrphans preserves files tracked by a live TempFileOwner`() { + val cacheDir = kotlin.io.path.createTempDirectory("gutenberg-test-").toFile() + val buffer = Buffer(cacheDir = cacheDir) + buffer.append("data".toByteArray()) + val (file, owner) = buffer.transferFileOwnership()!! + buffer.close() + + TempFileOwner.cleanOrphans(cacheDir) + assertTrue("File should survive cleanOrphans while owner is live", file.exists()) + + owner.close() + cacheDir.deleteRecursively() + } + + @Test + fun `read with zero maxLength returns empty array`() { + val buffer = Buffer() + buffer.append("hello".toByteArray()) + val result = buffer.read(0, 0) + assertEquals(0, result.size) + buffer.close() + } + + @Test + fun `read with valid offset and length returns correct data`() { + val buffer = Buffer() + buffer.append("hello world".toByteArray()) + val result = buffer.read(6, 5) + assertArrayEquals("world".toByteArray(), result) + buffer.close() + } + + @Test(expected = IllegalArgumentException::class) + fun `read with negative offset throws`() { + val buffer = Buffer() + buffer.append("hello".toByteArray()) + try { + buffer.read(-1, 5) + } finally { + buffer.close() + } + } + + @Test(expected = IllegalArgumentException::class) + fun `read with negative maxLength throws`() { + val buffer = Buffer() + buffer.append("hello".toByteArray()) + try { + buffer.read(0, -1) + } finally { + buffer.close() + } + } + + @Test + fun `read beyond written data returns partial result`() { + val buffer = Buffer() + buffer.append("hi".toByteArray()) + val result = buffer.read(0, 100) + assertArrayEquals("hi".toByteArray(), result) + buffer.close() + } +} diff --git a/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/ChunkedMultipartTests.kt b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/ChunkedMultipartTests.kt new file mode 100644 index 000000000..6a9c08408 --- /dev/null +++ b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/ChunkedMultipartTests.kt @@ -0,0 +1,377 @@ +package org.wordpress.gutenberg.http + +import org.junit.Assert.assertArrayEquals +import org.junit.Assert.assertEquals +import org.junit.Test +import java.io.File + +/** + * Tests for the chunked (file-backed) multipart parsing path. + * + * The in-memory path is tested extensively in [FixtureTests] and the shared + * JSON fixture files. These tests verify the chunked scanner that runs when + * the body is backed by a file on disk. + */ +class ChunkedMultipartTests { + + // MARK: - Basic Parsing + + @Test + fun `single text field parsed from file-backed body`() { + val (file, request) = makeFileBackedRequest( + fields = listOf(Field("title", value = "My Blog Post".toByteArray())), + boundary = "AaB03x" + ) + file.deleteOnExit() + + val parts = request.multipartParts() + + assertEquals(1, parts.size) + assertEquals("title", parts[0].name) + assertEquals(null, parts[0].filename) + assertEquals("text/plain", parts[0].contentType) + assertArrayEquals("My Blog Post".toByteArray(), parts[0].body.readBytes()) + } + + @Test + fun `multiple parts parsed from file-backed body`() { + val (file, request) = makeFileBackedRequest( + fields = listOf( + Field("title", value = "Hello".toByteArray()), + Field("file", filename = "photo.jpg", contentType = "image/jpeg", value = "jpeg-data".toByteArray()), + Field("caption", value = "A photo".toByteArray()), + ), + boundary = "WebKitBoundary123" + ) + file.deleteOnExit() + + val parts = request.multipartParts() + + assertEquals(3, parts.size) + assertEquals("title", parts[0].name) + assertArrayEquals("Hello".toByteArray(), parts[0].body.readBytes()) + assertEquals("file", parts[1].name) + assertEquals("photo.jpg", parts[1].filename) + assertEquals("image/jpeg", parts[1].contentType) + assertArrayEquals("jpeg-data".toByteArray(), parts[1].body.readBytes()) + assertEquals("caption", parts[2].name) + assertArrayEquals("A photo".toByteArray(), parts[2].body.readBytes()) + } + + @Test + fun `empty part body parsed correctly`() { + val (file, request) = makeFileBackedRequest( + fields = listOf(Field("empty", value = ByteArray(0))), + boundary = "AaB03x" + ) + file.deleteOnExit() + + val parts = request.multipartParts() + + assertEquals(1, parts.size) + assertEquals("empty", parts[0].name) + assertArrayEquals(ByteArray(0), parts[0].body.readBytes()) + } + + @Test + fun `binary data preserved through file-backed parsing`() { + val binaryContent = ByteArray(128) { 0x00 } + + ByteArray(256) { it.toByte() } + + ByteArray(128) { 0xFF.toByte() } + + val (file, request) = makeFileBackedRequest( + fields = listOf(Field("file", filename = "binary.bin", contentType = "application/octet-stream", value = binaryContent)), + boundary = "BinaryBoundary99" + ) + file.deleteOnExit() + + val parts = request.multipartParts() + + assertEquals(1, parts.size) + assertEquals("binary.bin", parts[0].filename) + assertArrayEquals(binaryContent, parts[0].body.readBytes()) + } + + @Test + fun `preamble before first boundary is ignored`() { + val boundary = "AaB03x" + val body = "This is the preamble. It should be ignored.\r\n" + + "--$boundary\r\nContent-Disposition: form-data; name=\"field\"\r\n\r\nvalue\r\n--$boundary--\r\n" + + val (file, request) = makeFileBackedRequestFromRawBody(body, boundary) + file.deleteOnExit() + + val parts = request.multipartParts() + + assertEquals(1, parts.size) + assertEquals("field", parts[0].name) + assertArrayEquals("value".toByteArray(), parts[0].body.readBytes()) + } + + @Test + fun `transport padding after boundary is skipped`() { + val boundary = "AaB03x" + val body = "--$boundary \t \r\nContent-Disposition: form-data; name=\"field\"\r\n\r\nvalue\r\n--$boundary--\r\n" + + val (file, request) = makeFileBackedRequestFromRawBody(body, boundary) + file.deleteOnExit() + + val parts = request.multipartParts() + + assertEquals(1, parts.size) + assertEquals("field", parts[0].name) + assertArrayEquals("value".toByteArray(), parts[0].body.readBytes()) + } + + // MARK: - Error Cases + + @Test(expected = MultipartParseException::class) + fun `close-delimiter-only body throws malformedBody`() { + val boundary = "AaB03x" + val body = "--$boundary--\r\n" + + val (file, request) = makeFileBackedRequestFromRawBody(body, boundary) + file.deleteOnExit() + + request.multipartParts() + } + + @Test(expected = MultipartParseException::class) + fun `missing close delimiter throws malformedBody`() { + val boundary = "AaB03x" + val body = "--$boundary\r\nContent-Disposition: form-data; name=\"field\"\r\n\r\nvalue" + + val (file, request) = makeFileBackedRequestFromRawBody(body, boundary) + file.deleteOnExit() + + request.multipartParts() + } + + // MARK: - Chunk Boundary Edge Cases + + @Test + fun `boundary split across chunk boundary is found correctly`() { + val boundary = "AaB03x" + val delimiter = "--$boundary" // 10 bytes + + // We want the second delimiter to start 5 bytes before the 65536 chunk boundary. + val splitPoint = 65_536 - 5 + + val headerBytes = "--$boundary\r\nContent-Disposition: form-data; name=\"pad\"\r\n\r\n".toByteArray() + val headerOverhead = headerBytes.size + val crlfBeforeDelimiter = 2 + val paddingLength = splitPoint - headerOverhead - crlfBeforeDelimiter + + val padding = ByteArray(paddingLength) { 'A'.code.toByte() } + + val (file, request) = makeFileBackedRequest( + fields = listOf( + Field("pad", value = padding), + Field("after", value = "found-it".toByteArray()), + ), + boundary = boundary + ) + file.deleteOnExit() + + // Verify the delimiter actually straddles the chunk boundary. + val fileData = file.readBytes() + val delimBytes = delimiter.toByteArray() + val delimStart = ReadOnlyBytes(fileData).indexOf(delimBytes, headerOverhead) + assertEquals("Delimiter should start at $splitPoint", splitPoint, delimStart) + + val parts = request.multipartParts() + + assertEquals(2, parts.size) + assertEquals("pad", parts[0].name) + assertEquals(paddingLength.toLong(), parts[0].body.size) + assertEquals("after", parts[1].name) + assertArrayEquals("found-it".toByteArray(), parts[1].body.readBytes()) + } + + @Test + fun `large body spanning multiple chunks parses correctly`() { + val largeContent = ByteArray(200_000) { 'X'.code.toByte() } + + val (file, request) = makeFileBackedRequest( + fields = listOf( + Field("large", filename = "big.bin", contentType = "application/octet-stream", value = largeContent), + Field("meta", value = "description".toByteArray()), + ), + boundary = "LargeBoundary42" + ) + file.deleteOnExit() + + val parts = request.multipartParts() + + assertEquals(2, parts.size) + assertEquals("large", parts[0].name) + assertEquals(largeContent.size.toLong(), parts[0].body.size) + assertArrayEquals(largeContent, parts[0].body.readBytes()) + assertEquals("meta", parts[1].name) + assertArrayEquals("description".toByteArray(), parts[1].body.readBytes()) + } + + // MARK: - fileSlice Source + + @Test + fun `file-backed body with non-zero offset (fileSlice) parses correctly`() { + val boundary = "AaB03x" + val multipartBody = "--$boundary\r\nContent-Disposition: form-data; name=\"field\"\r\n\r\nvalue\r\n--$boundary--\r\n" + val multipartData = multipartBody.toByteArray() + + val garbagePrefix = ByteArray(500) { 'Z'.code.toByte() } + val file = File.createTempFile("slice-test-", null) + file.deleteOnExit() + file.writeBytes(garbagePrefix + multipartData) + + val body = RequestBody.FileBacked( + file = file, + fileOffset = garbagePrefix.size.toLong(), + size = multipartData.size.toLong() + ) + val request = ParsedHTTPRequest( + method = "POST", + target = "/upload", + httpVersion = "HTTP/1.1", + headers = mapOf("Content-Type" to "multipart/form-data; boundary=$boundary", "Host" to "localhost"), + body = body, + isComplete = true + ) + + val parts = request.multipartParts() + + assertEquals(1, parts.size) + assertEquals("field", parts[0].name) + assertArrayEquals("value".toByteArray(), parts[0].body.readBytes()) + } + + // MARK: - Part Count Limit + + @Test(expected = MultipartParseException::class) + fun `rejects multipart body with more than 100 parts (in-memory)`() { + val fields = (0 until 101).map { Field("field$it", value = "val$it".toByteArray()) } + val bodyData = buildMultipartBody(fields, "AaB03x") + val body = RequestBody.InMemory(bodyData) + val request = ParsedHTTPRequest( + method = "POST", + target = "/upload", + httpVersion = "HTTP/1.1", + headers = mapOf("Content-Type" to "multipart/form-data; boundary=AaB03x", "Host" to "localhost"), + body = body, + isComplete = true + ) + request.multipartParts() + } + + @Test + fun `accepts multipart body with exactly 100 parts (in-memory)`() { + val fields = (0 until 100).map { Field("field$it", value = "val$it".toByteArray()) } + val bodyData = buildMultipartBody(fields, "AaB03x") + val body = RequestBody.InMemory(bodyData) + val request = ParsedHTTPRequest( + method = "POST", + target = "/upload", + httpVersion = "HTTP/1.1", + headers = mapOf("Content-Type" to "multipart/form-data; boundary=AaB03x", "Host" to "localhost"), + body = body, + isComplete = true + ) + val parts = request.multipartParts() + assertEquals(100, parts.size) + } + + @Test(expected = MultipartParseException::class) + fun `rejects multipart body with more than 100 parts (file-backed)`() { + val fields = (0 until 101).map { Field("field$it", value = "val$it".toByteArray()) } + val (file, request) = makeFileBackedRequest(fields, "AaB03x") + file.deleteOnExit() + request.multipartParts() + } + + @Test + fun `accepts multipart body with exactly 100 parts (file-backed)`() { + val fields = (0 until 100).map { Field("field$it", value = "val$it".toByteArray()) } + val (file, request) = makeFileBackedRequest(fields, "AaB03x") + file.deleteOnExit() + val parts = request.multipartParts() + assertEquals(100, parts.size) + } + + // MARK: - Helpers + + private data class Field( + val name: String, + val filename: String? = null, + val contentType: String? = null, + val value: ByteArray + ) + + private fun makeFileBackedRequest( + fields: List, + boundary: String + ): Pair { + val body = buildMultipartBody(fields, boundary) + + val file = File.createTempFile("multipart-test-", null) + file.writeBytes(body) + + val requestBody = RequestBody.FileBacked( + file = file, + fileOffset = 0, + size = body.size.toLong() + ) + val request = ParsedHTTPRequest( + method = "POST", + target = "/upload", + httpVersion = "HTTP/1.1", + headers = mapOf("Content-Type" to "multipart/form-data; boundary=$boundary", "Host" to "localhost"), + body = requestBody, + isComplete = true + ) + return file to request + } + + private fun makeFileBackedRequestFromRawBody( + body: String, + boundary: String + ): Pair { + val bodyData = body.toByteArray() + val file = File.createTempFile("multipart-test-", null) + file.writeBytes(bodyData) + + val requestBody = RequestBody.FileBacked( + file = file, + fileOffset = 0, + size = bodyData.size.toLong() + ) + val request = ParsedHTTPRequest( + method = "POST", + target = "/upload", + httpVersion = "HTTP/1.1", + headers = mapOf("Content-Type" to "multipart/form-data; boundary=$boundary", "Host" to "localhost"), + body = requestBody, + isComplete = true + ) + return file to request + } + + private fun buildMultipartBody(fields: List, boundary: String): ByteArray { + val out = java.io.ByteArrayOutputStream() + for (field in fields) { + out.write("--$boundary\r\n".toByteArray()) + var disposition = "Content-Disposition: form-data; name=\"${field.name}\"" + if (field.filename != null) { + disposition += "; filename=\"${field.filename}\"" + } + out.write("$disposition\r\n".toByteArray()) + if (field.contentType != null) { + out.write("Content-Type: ${field.contentType}\r\n".toByteArray()) + } + out.write("\r\n".toByteArray()) + out.write(field.value) + out.write("\r\n".toByteArray()) + } + out.write("--$boundary--\r\n".toByteArray()) + return out.toByteArray() + } +} diff --git a/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/FixtureTests.kt b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/FixtureTests.kt new file mode 100644 index 000000000..a08e4d373 --- /dev/null +++ b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/FixtureTests.kt @@ -0,0 +1,417 @@ +package org.wordpress.gutenberg.http + +import com.google.gson.Gson +import com.google.gson.JsonObject +import kotlin.test.assertEquals +import kotlin.test.assertNotNull +import kotlin.test.assertNull +import kotlin.test.assertTrue +import kotlin.test.fail +import org.junit.Test +import java.io.File +import java.util.Base64 + +/** + * Fixture-driven tests for the pure-Kotlin HTTP parser. + * + * Loads the shared JSON test fixtures (also used by the Swift test suite) + * and validates the Kotlin implementation against them. + */ +class FixtureTests { + + // MARK: - Header Value Fixtures + + @Test + fun `header value extraction - all fixture cases pass`() { + val fixtures = loadFixture("header-value-parsing") + val tests = fixtures.getAsJsonArray("tests") + + for (element in tests) { + val test = element.asJsonObject + val description = test.get("description").asString + val parameter = test.get("parameter").asString + val headerValue = test.get("headerValue").asString + val expected = if (test.get("expected").isJsonNull) null else test.get("expected").asString + + val result = HeaderValue.extractParameter(parameter, headerValue) + assertEquals(expected, result, "$description: result mismatch") + } + } + + // MARK: - Request Parsing Fixtures + + @Test + fun `request parsing - all basic cases pass`() { + val fixtures = loadFixture("request-parsing") + val tests = fixtures.getAsJsonArray("tests") + + for (element in tests) { + val test = element.asJsonObject + val description = test.get("description").asString + val input = test.get("input").asString + val expected = test.getAsJsonObject("expected") + + val parser: HTTPRequestParser + if (test.has("maxBodySize")) { + val maxBodySize = test.get("maxBodySize").asLong + parser = HTTPRequestParser(maxBodySize) + parser.append(input.toByteArray(Charsets.UTF_8)) + } else { + parser = HTTPRequestParser(input) + } + + if (test.has("appendAfterComplete")) { + val extra = test.get("appendAfterComplete").asString + parser.append(extra.toByteArray(Charsets.UTF_8)) + } + + // Handle needsMoreData case + if (expected.has("isComplete") && !expected.get("isComplete").asBoolean && + expected.has("hasHeaders") && !expected.get("hasHeaders").asBoolean + ) { + assertTrue(!parser.state.hasHeaders, "$description: should not have headers") + assertNull(parser.parseRequest(), "$description: parseRequest should return null") + continue + } + + val request = parser.parseRequest() + assertNotNull(request, "$description: parseRequest returned null") + request!! + + if (expected.has("method")) { + assertEquals(expected.get("method").asString, request.method, "$description: method") + } + if (expected.has("target")) { + assertEquals(expected.get("target").asString, request.target, "$description: target") + } + if (expected.has("isComplete") && expected.get("isComplete").asBoolean) { + assertTrue(parser.state.isComplete, "$description: isComplete") + } + if (expected.has("headers")) { + val expectedHeaders = expected.getAsJsonObject("headers") + for (entry in expectedHeaders.entrySet()) { + assertEquals( + entry.value.asString, + request.header(entry.key), + "$description: header ${entry.key}" + ) + } + } + + // Body: check only if key is present in expected + if (expected.has("body")) { + if (expected.get("body").isJsonNull) { + assertNull(request.body, "$description: body should be null") + } else { + val expectedBody = expected.get("body").asString + assertNotNull(request.body, "$description: body should not be null") + assertEquals( + expectedBody, + String(request.body!!.readBytes(), Charsets.UTF_8), + "$description: body content" + ) + } + } + } + } + + @Test + fun `request parsing - all error cases pass`() { + val fixtures = loadFixture("request-parsing") + val errorTests = fixtures.getAsJsonArray("errorTests") + + for (element in errorTests) { + val test = element.asJsonObject + val description = test.get("description").asString + val expected = test.getAsJsonObject("expected") + val expectedError = expected.get("error").asString + + val parser: HTTPRequestParser + + if (test.has("inputBase64")) { + val base64 = test.get("inputBase64").asString + val data = Base64.getDecoder().decode(base64) + parser = if (test.has("maxBodySize")) { + HTTPRequestParser(test.get("maxBodySize").asLong) + } else { + HTTPRequestParser() + } + parser.append(data) + } else { + val input = test.get("input").asString + if (test.has("maxBodySize")) { + parser = HTTPRequestParser(test.get("maxBodySize").asLong) + parser.append(input.toByteArray(Charsets.UTF_8)) + } else { + parser = HTTPRequestParser(input) + } + } + + try { + parser.parseRequest() + fail("$description: expected error $expectedError but parsing succeeded") + } catch (e: HTTPRequestParseException) { + assertEquals( + expectedError, + e.error.errorId, + "$description: expected $expectedError but got ${e.error.errorId}" + ) + } + } + } + + @Test + fun `request parsing - all incremental cases pass`() { + val fixtures = loadFixture("request-parsing") + val incrementalTests = fixtures.getAsJsonArray("incrementalTests") + + for (element in incrementalTests) { + val test = element.asJsonObject + val description = test.get("description").asString + val expected = test.getAsJsonObject("expected") + + val parser = HTTPRequestParser() + + if (test.has("input") && test.has("chunkSize")) { + val input = test.get("input").asString + val chunkSize = test.get("chunkSize").asInt + val data = input.toByteArray(Charsets.UTF_8) + var i = 0 + while (i < data.size) { + val end = minOf(i + chunkSize, data.size) + parser.append(data.copyOfRange(i, end)) + i = end + } + } else if (test.has("headers")) { + val headers = test.get("headers").asString + parser.append(headers.toByteArray(Charsets.UTF_8)) + + // Check state after headers + if (expected.has("afterHeaders")) { + val afterHeaders = expected.getAsJsonObject("afterHeaders") + if (afterHeaders.has("hasHeaders")) { + assertEquals( + afterHeaders.get("hasHeaders").asBoolean, + parser.state.hasHeaders, + "$description: hasHeaders after headers" + ) + } + if (afterHeaders.has("isComplete")) { + assertEquals( + afterHeaders.get("isComplete").asBoolean, + parser.state.isComplete, + "$description: isComplete after headers" + ) + } + if (afterHeaders.has("method") || afterHeaders.has("target")) { + val partialRequest = parser.parseRequest() + assertNotNull(partialRequest, "$description: partial request should not be null") + partialRequest!! + if (afterHeaders.has("method")) { + assertEquals( + afterHeaders.get("method").asString, + partialRequest.method, + "$description: partial method" + ) + } + if (afterHeaders.has("target")) { + assertEquals( + afterHeaders.get("target").asString, + partialRequest.target, + "$description: partial target" + ) + } + } + } + + // Append body chunks + if (test.has("bodyChunks")) { + for (chunkElement in test.getAsJsonArray("bodyChunks")) { + parser.append(chunkElement.asString.toByteArray(Charsets.UTF_8)) + } + } + } else if (test.has("input")) { + val input = test.get("input").asString + parser.append(input.toByteArray(Charsets.UTF_8)) + } + + // Verify final expectations + if (expected.has("isComplete") && !expected.get("isComplete").asBoolean && + expected.has("hasHeaders") && !expected.get("hasHeaders").asBoolean + ) { + assertTrue(!parser.state.hasHeaders, "$description: should not have headers") + assertNull(parser.parseRequest(), "$description: parseRequest should return null") + continue + } + + val request = parser.parseRequest() + assertNotNull(request, "$description: parseRequest returned null") + request!! + + if (expected.has("method")) { + assertEquals(expected.get("method").asString, request.method, "$description: method") + } + if (expected.has("target")) { + assertEquals(expected.get("target").asString, request.target, "$description: target") + } + if (expected.has("isComplete") && expected.get("isComplete").asBoolean) { + assertTrue(parser.state.isComplete, "$description: isComplete") + } + if (expected.has("body")) { + if (expected.get("body").isJsonNull) { + assertNull(request.body, "$description: body should be null") + } else { + val expectedBody = expected.get("body").asString + assertNotNull(request.body, "$description: body should not be null") + assertEquals( + expectedBody, + String(request.body!!.readBytes(), Charsets.UTF_8), + "$description: body content" + ) + } + } + } + } + + // MARK: - Multipart Parsing Fixtures + + @Test + fun `multipart parsing - all cases pass`() { + val fixtures = loadFixture("multipart-parsing") + val tests = fixtures.getAsJsonArray("tests") + + for (element in tests) { + val test = element.asJsonObject + val description = test.get("description").asString + val boundary = test.get("boundary").asString + val quotedBoundary = test.has("quotedBoundary") && test.get("quotedBoundary").asBoolean + val rawBody = test.get("rawBody").asString + + val request = buildRawMultipartRequest(rawBody, boundary, quotedBoundary) + + val expected = test.getAsJsonObject("expected") + if (expected.has("contentType")) { + assertEquals( + expected.get("contentType").asString, + request.header("Content-Type"), + "$description: Content-Type" + ) + } + + val parts = request.multipartParts() + val expectedParts = expected.getAsJsonArray("parts") + assertEquals(expectedParts.size(), parts.size, "$description: part count") + + for (i in 0 until minOf(expectedParts.size(), parts.size)) { + val exp = expectedParts[i].asJsonObject + val part = parts[i] + + assertEquals(exp.get("name").asString, part.name, "$description: part[$i].name") + + if (exp.has("filename")) { + if (exp.get("filename").isJsonNull) { + assertNull(part.filename, "$description: part[$i].filename should be null") + } else { + assertEquals( + exp.get("filename").asString, + part.filename, + "$description: part[$i].filename" + ) + } + } + if (exp.has("contentType")) { + assertEquals( + exp.get("contentType").asString, + part.contentType, + "$description: part[$i].contentType" + ) + } + if (exp.has("body")) { + val expectedBody = exp.get("body").asString + assertEquals( + expectedBody, + String(part.body.readBytes(), Charsets.UTF_8), + "$description: part[$i].body" + ) + } + } + } + } + + @Test + fun `multipart parsing - all error cases pass`() { + val fixtures = loadFixture("multipart-parsing") + val errorTests = fixtures.getAsJsonArray("errorTests") + + for (element in errorTests) { + val test = element.asJsonObject + val description = test.get("description").asString + val expected = test.getAsJsonObject("expected") + val expectedError = expected.get("error").asString + val contentType = test.get("contentType")?.asString ?: expected.get("contentType")?.asString + + val request: ParsedHTTPRequest + + if (test.has("rawBody") && test.has("boundary")) { + val rawBody = test.get("rawBody").asString + val boundary = test.get("boundary").asString + request = buildRawMultipartRequest(rawBody, boundary) + } else if (contentType != null && test.has("body")) { + val body = test.get("body").asString + val raw = "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\n" + + "Content-Type: $contentType\r\n" + + "Content-Length: ${body.toByteArray(Charsets.UTF_8).size}\r\n\r\n$body" + val parser = HTTPRequestParser(raw) + val parsed = parser.parseRequest() + assertNotNull(parsed, "$description: parsing request failed") + request = parsed!! + } else if (contentType != null) { + val raw = "GET /upload HTTP/1.1\r\nHost: localhost\r\n" + + "Content-Type: $contentType\r\n\r\n" + val parser = HTTPRequestParser(raw) + val parsed = parser.parseRequest() + assertNotNull(parsed, "$description: parsing request failed") + request = parsed!! + } else { + fail("$description: invalid error test case") + return + } + + try { + request.multipartParts() + fail("$description: expected error $expectedError but succeeded") + } catch (e: MultipartParseException) { + assertEquals( + expectedError, + e.error.errorId, + "$description: expected $expectedError but got ${e.error.errorId}" + ) + } + } + } + + // MARK: - Helpers + + private fun loadFixture(name: String): JsonObject { + val fixturesDir = System.getProperty("test.fixtures.dir") + ?: error("test.fixtures.dir system property not set") + val file = File(fixturesDir, "$name.json") + return Gson().fromJson(file.reader(), JsonObject::class.java) + } + + private fun buildRawMultipartRequest( + body: String, + boundary: String, + quotedBoundary: Boolean = false + ): ParsedHTTPRequest { + val boundaryParam = if (quotedBoundary) "\"$boundary\"" else boundary + val raw = "POST /wp/v2/media HTTP/1.1\r\nHost: localhost\r\n" + + "Content-Type: multipart/form-data; boundary=$boundaryParam\r\n" + + "Content-Length: ${body.toByteArray(Charsets.UTF_8).size}\r\n\r\n$body" + + val parser = HTTPRequestParser(raw) + val request = parser.parseRequest() + assertNotNull(request, "Failed to parse multipart request") + return request!! + } +} diff --git a/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/HTTPRequestParserTests.kt b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/HTTPRequestParserTests.kt new file mode 100644 index 000000000..93bdefa35 --- /dev/null +++ b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/HTTPRequestParserTests.kt @@ -0,0 +1,157 @@ +package org.wordpress.gutenberg.http + +import org.junit.Assert.assertArrayEquals +import org.junit.Assert.assertEquals +import org.junit.Assert.assertFalse +import org.junit.Assert.assertNull +import org.junit.Assert.assertTrue +import org.junit.Test + +/** + * Tests that require platform-specific assertions (internal dict representation, + * state enum transitions, error HTTP status mapping) or configurations not + * expressible in the shared JSON fixture format. All pure parse-input → + * expected-output tests have been migrated to test-fixtures/http/request-parsing.json. + */ +class HTTPRequestParserTests { + + // MARK: - Duplicate Header Key Casing (Internal Dict Representation) + + @Test + fun `duplicate headers preserve first occurrence key casing`() { + val request = HTTPRequestParser( + "GET / HTTP/1.1\r\nHost: localhost\r\nX-Custom: one\r\nx-custom: two\r\n\r\n" + ).parseRequest()!! + + // The combined value should be stored under the first key's casing. + assertTrue(request.headers.containsKey("X-Custom")) + assertFalse(request.headers.containsKey("x-custom")) + assertEquals("one, two", request.headers["X-Custom"]) + } + + // MARK: - Max Header Size + + @Test(expected = HTTPRequestParseException::class) + fun `rejects headers that exceed maxHeaderSize`() { + // MAX_HEADER_SIZE is 65536. Build headers larger than that without a terminator. + val longValue = "X".repeat(65_500) + val request = "GET / HTTP/1.1\r\nHost: localhost\r\nX-Long: $longValue\r\n" + // No \r\n\r\n terminator, so the parser will see 65K+ bytes with no end. + val parser = HTTPRequestParser(request) + parser.parseRequest() + } + + @Test + fun `accepts headers just under maxHeaderSize`() { + val request = HTTPRequestParser( + "GET / HTTP/1.1\r\nHost: localhost\r\n\r\n" + ).parseRequest()!! + + assertEquals("GET", request.method) + } + + // MARK: - Incremental Parsing (State Transitions) + + @Test + fun `handles data arriving in chunks`() { + val parser = HTTPRequestParser() + parser.append("GET /wp/v2/posts ".toByteArray()) + assertEquals(HTTPRequestParser.State.NEEDS_MORE_DATA, parser.state) + assertNull(parser.parseRequest()) + + parser.append("HTTP/1.1\r\nHost: localhost\r\n\r\n".toByteArray()) + assertEquals(HTTPRequestParser.State.COMPLETE, parser.state) + + val request = parser.parseRequest()!! + assertEquals("GET", request.method) + assertEquals("/wp/v2/posts", request.target) + } + + @Test + fun `body arriving in multiple chunks`() { + val parser = HTTPRequestParser() + parser.append("POST / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 10\r\n\r\n".toByteArray()) + assertEquals(HTTPRequestParser.State.HEADERS_COMPLETE, parser.state) + + parser.append("hello".toByteArray()) + assertEquals(HTTPRequestParser.State.HEADERS_COMPLETE, parser.state) + + parser.append("world".toByteArray()) + assertEquals(HTTPRequestParser.State.COMPLETE, parser.state) + + val request = parser.parseRequest()!! + assertArrayEquals("helloworld".toByteArray(), request.body?.readBytes()) + } + + // MARK: - Content-Length at maxBodySize Boundary + + @Test + fun `accepts Content-Length at maxBodySize limit`() { + val body = "X".repeat(100) + val request = HTTPRequestParser( + input = "POST / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 100\r\n\r\n$body", + maxBodySize = 100 + ).parseRequest()!! + + assertTrue(request.isComplete) + assertArrayEquals(body.toByteArray(), request.body?.readBytes()) + } + + // MARK: - Error HTTP Status Mapping + + @Test + fun `headersTooLarge maps to HTTP 431`() { + assertEquals(431, HTTPRequestParseError.HEADERS_TOO_LARGE.httpStatus) + } + + @Test + fun `payloadTooLarge maps to HTTP 413`() { + assertEquals(413, HTTPRequestParseError.PAYLOAD_TOO_LARGE.httpStatus) + } + + @Test + fun `invalidFieldValue maps to HTTP 400`() { + assertEquals(400, HTTPRequestParseError.INVALID_FIELD_VALUE.httpStatus) + } + + // MARK: - Bare CR at Field Value Edges + + @Test(expected = HTTPRequestParseException::class) + fun `rejects bare CR at start of field value`() { + HTTPRequestParser( + "GET / HTTP/1.1\r\nHost: localhost\r\nX-Bad: \rhello\r\n\r\n" + ).parseRequest() + } + + @Test(expected = HTTPRequestParseException::class) + fun `rejects bare CR at end of field value`() { + HTTPRequestParser( + "GET / HTTP/1.1\r\nHost: localhost\r\nX-Bad: hello\r\r\n\r\n" + ).parseRequest() + } + + // MARK: - Header Count Limit + + @Test + fun `tooManyHeaders maps to HTTP 431`() { + assertEquals(431, HTTPRequestParseError.TOO_MANY_HEADERS.httpStatus) + } + + @Test(expected = HTTPRequestParseException::class) + fun `rejects requests with more than 100 header field lines`() { + // 1 Host + 100 X-Headers = 101 total header lines → rejected + val headers = (0 until 100).joinToString("") { "X-Header-$it: value\r\n" } + val raw = "GET / HTTP/1.1\r\nHost: localhost\r\n$headers\r\n" + HTTPRequestParser(raw).parseRequest() + } + + @Test + fun `accepts requests with exactly 100 header field lines`() { + // 1 Host + 99 X-Headers = 100 total header lines → accepted + val headers = (0 until 99).joinToString("") { "X-Header-$it: value\r\n" } + val raw = "GET / HTTP/1.1\r\nHost: localhost\r\n$headers\r\n" + val request = HTTPRequestParser(raw).parseRequest()!! + + assertEquals("GET", request.method) + } +} diff --git a/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/ParsedHTTPRequestTests.kt b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/ParsedHTTPRequestTests.kt new file mode 100644 index 000000000..696c9021c --- /dev/null +++ b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/ParsedHTTPRequestTests.kt @@ -0,0 +1,173 @@ +package org.wordpress.gutenberg.http + +import org.junit.Assert.assertEquals +import org.junit.Assert.assertFalse +import org.junit.Assert.assertNull +import org.junit.Assert.assertTrue +import org.junit.Test + +class ParsedHTTPRequestTests { + + // MARK: - Case-insensitive Authorization header lookup (fix #1 regression test) + + @Test + fun `header returns Authorization value with lowercase header name`() { + // HTTP header names are case-insensitive per RFC 9110 §5.1. + // The server uses header("Authorization") to authenticate — a client + // sending "authorization" (lowercase) must be matched. + val request = ParsedHTTPRequest( + method = "GET", + target = "/wp/v2/posts", + httpVersion = "HTTP/1.1", + headers = mapOf("authorization" to "Bearer tok123"), + body = null, + isComplete = true + ) + + assertEquals("Bearer tok123", request.header("Authorization")) + assertEquals("Bearer tok123", request.header("authorization")) + assertEquals("Bearer tok123", request.header("AUTHORIZATION")) + } + + @Test + fun `header returns Authorization value with mixed-case header name`() { + val request = ParsedHTTPRequest( + method = "GET", + target = "/wp/v2/posts", + httpVersion = "HTTP/1.1", + headers = mapOf("AUTHORIZATION" to "Bearer secret"), + body = null, + isComplete = true + ) + + assertEquals("Bearer secret", request.header("Authorization")) + assertEquals("Bearer secret", request.header("authorization")) + } + + @Test + fun `header returns null for missing Authorization`() { + val request = ParsedHTTPRequest( + method = "GET", + target = "/wp/v2/posts", + httpVersion = "HTTP/1.1", + headers = mapOf("Accept" to "application/json"), + body = null, + isComplete = true + ) + + assertNull(request.header("Authorization")) + } + + // MARK: - Case-insensitive Connection header lookup (fix #2 regression test) + + @Test + fun `header returns Connection value with lowercase header name`() { + val request = ParsedHTTPRequest( + method = "GET", + target = "/wp/v2/posts", + httpVersion = "HTTP/1.1", + headers = mapOf("connection" to "keep-alive"), + body = null, + isComplete = true + ) + + assertEquals("keep-alive", request.header("Connection")) + assertEquals("keep-alive", request.header("connection")) + assertEquals("keep-alive", request.header("CONNECTION")) + } + + // MARK: - forwardingHeaders + + @Test + fun `forwardingHeaders strips standard hop-by-hop headers`() { + val request = ParsedHTTPRequest( + method = "GET", + target = "/wp/v2/posts", + httpVersion = "HTTP/1.1", + headers = mapOf( + "Host" to "localhost", + "Connection" to "keep-alive", + "Transfer-Encoding" to "chunked", + "Keep-Alive" to "timeout=5", + "Accept" to "application/json", + "Content-Type" to "text/plain" + ), + body = null, + isComplete = true + ) + + val forwarded = request.forwardingHeaders() + assertFalse(forwarded.keys.any { it.equals("Host", ignoreCase = true) }) + assertFalse(forwarded.keys.any { it.equals("Connection", ignoreCase = true) }) + assertFalse(forwarded.keys.any { it.equals("Transfer-Encoding", ignoreCase = true) }) + assertFalse(forwarded.keys.any { it.equals("Keep-Alive", ignoreCase = true) }) + assertEquals("application/json", forwarded["Accept"]) + assertEquals("text/plain", forwarded["Content-Type"]) + } + + @Test + fun `forwardingHeaders strips Proxy-Authorization but keeps Authorization`() { + val request = ParsedHTTPRequest( + method = "GET", + target = "/wp/v2/posts", + httpVersion = "HTTP/1.1", + headers = mapOf( + "Proxy-Authorization" to "Bearer proxy-token", + "Authorization" to "Basic dXNlcjpwYXNz", + "Accept" to "application/json" + ), + body = null, + isComplete = true + ) + + val forwarded = request.forwardingHeaders() + assertFalse(forwarded.keys.any { it.equals("Proxy-Authorization", ignoreCase = true) }) + assertEquals("Basic dXNlcjpwYXNz", forwarded["Authorization"]) + assertEquals("application/json", forwarded["Accept"]) + } + + @Test + fun `forwardingHeaders strips headers listed in Connection header`() { + val request = ParsedHTTPRequest( + method = "GET", + target = "/wp/v2/posts", + httpVersion = "HTTP/1.1", + headers = mapOf( + "Connection" to "X-Custom, X-Other", + "X-Custom" to "value1", + "X-Other" to "value2", + "Accept" to "application/json" + ), + body = null, + isComplete = true + ) + + val forwarded = request.forwardingHeaders() + assertFalse(forwarded.keys.any { it.equals("Connection", ignoreCase = true) }) + assertFalse(forwarded.keys.any { it.equals("X-Custom", ignoreCase = true) }) + assertFalse(forwarded.keys.any { it.equals("X-Other", ignoreCase = true) }) + assertEquals("application/json", forwarded["Accept"]) + } + + @Test + fun `forwardingHeaders preserves non-hop-by-hop headers`() { + val request = ParsedHTTPRequest( + method = "POST", + target = "/wp/v2/posts", + httpVersion = "HTTP/1.1", + headers = mapOf( + "Content-Type" to "application/json", + "Accept" to "application/json", + "X-WP-Nonce" to "abc123" + ), + body = null, + isComplete = true + ) + + val forwarded = request.forwardingHeaders() + assertEquals(3, forwarded.size) + assertEquals("application/json", forwarded["Content-Type"]) + assertEquals("application/json", forwarded["Accept"]) + assertEquals("abc123", forwarded["X-WP-Nonce"]) + } +} diff --git a/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/RequestBodyTests.kt b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/RequestBodyTests.kt new file mode 100644 index 000000000..ed252f63f --- /dev/null +++ b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/RequestBodyTests.kt @@ -0,0 +1,243 @@ +package org.wordpress.gutenberg.http + +import org.junit.Assert.assertArrayEquals +import org.junit.Assert.assertEquals +import org.junit.Assert.assertFalse +import org.junit.Assert.assertNotEquals +import org.junit.Assert.assertNotNull +import org.junit.Assert.assertNull +import org.junit.Assert.assertTrue +import org.junit.Test +import java.io.File + +class RequestBodyTests { + + // MARK: - InMemory + + @Test + fun `InMemory inputStream returns correct data`() { + val data = "hello world".toByteArray() + val body = RequestBody.InMemory(data) + + val result = body.inputStream().use { it.readBytes() } + assertArrayEquals(data, result) + } + + @Test + fun `InMemory readBytes returns copy of data`() { + val data = "hello".toByteArray() + val body = RequestBody.InMemory(data) + val copy = body.readBytes() + + assertArrayEquals(data, copy) + // Mutating the copy should not affect the original. + copy[0] = 'X'.code.toByte() + assertEquals('h'.code.toByte(), data[0]) + } + + @Test + fun `InMemory size returns correct value`() { + val body = RequestBody.InMemory("hello".toByteArray()) + assertEquals(5L, body.size) + } + + @Test + fun `InMemory inMemoryData returns non-null`() { + val body = RequestBody.InMemory("test".toByteArray()) + assertNotNull(body.inMemoryData) + assertEquals(4, body.inMemoryData!!.size) + } + + @Test + fun `InMemory file returns null`() { + val body = RequestBody.InMemory("test".toByteArray()) + assertNull(body.file) + } + + @Test + fun `InMemory equality with same data`() { + val a = RequestBody.InMemory("hello".toByteArray()) + val b = RequestBody.InMemory("hello".toByteArray()) + assertEquals(a, b) + assertEquals(a.hashCode(), b.hashCode()) + } + + @Test + fun `InMemory inequality with different data`() { + val a = RequestBody.InMemory("hello".toByteArray()) + val b = RequestBody.InMemory("world".toByteArray()) + assertNotEquals(a, b) + } + + @Test + fun `InMemory empty body`() { + val body = RequestBody.InMemory(ByteArray(0)) + assertEquals(0L, body.size) + assertArrayEquals(ByteArray(0), body.readBytes()) + assertArrayEquals(ByteArray(0), body.inputStream().use { it.readBytes() }) + } + + // MARK: - FileBacked + + @Test + fun `FileBacked inputStream returns correct data`() { + val data = "file-backed content".toByteArray() + val file = File.createTempFile("rb-test-", null) + file.deleteOnExit() + file.writeBytes(data) + + val body = RequestBody.FileBacked(file = file, fileOffset = 0, size = data.size.toLong()) + val result = body.inputStream().use { it.readBytes() } + assertArrayEquals(data, result) + } + + @Test + fun `FileBacked readBytes returns correct data`() { + val data = "readBytes test".toByteArray() + val file = File.createTempFile("rb-test-", null) + file.deleteOnExit() + file.writeBytes(data) + + val body = RequestBody.FileBacked(file = file, fileOffset = 0, size = data.size.toLong()) + assertArrayEquals(data, body.readBytes()) + } + + @Test + fun `FileBacked with offset reads correct slice`() { + val file = File.createTempFile("rb-test-", null) + file.deleteOnExit() + file.writeBytes("GARBAGE_hello world_TRAILING".toByteArray()) + + // Read "hello world" starting at offset 8, length 11. + val body = RequestBody.FileBacked(file = file, fileOffset = 8, size = 11) + + assertArrayEquals("hello world".toByteArray(), body.readBytes()) + assertArrayEquals("hello world".toByteArray(), body.inputStream().use { it.readBytes() }) + } + + @Test + fun `FileBacked inputStream respects offset and size boundary`() { + val file = File.createTempFile("rb-test-", null) + file.deleteOnExit() + file.writeBytes("AAAbbbCCC".toByteArray()) + + val body = RequestBody.FileBacked(file = file, fileOffset = 3, size = 3) + + val stream = body.inputStream() + val result = stream.use { it.readBytes() } + assertArrayEquals("bbb".toByteArray(), result) + } + + @Test + fun `FileBacked single byte read`() { + val file = File.createTempFile("rb-test-", null) + file.deleteOnExit() + file.writeBytes("ABCDE".toByteArray()) + + val body = RequestBody.FileBacked(file = file, fileOffset = 2, size = 1) + val stream = body.inputStream() + assertEquals('C'.code, stream.read()) + assertEquals(-1, stream.read()) // Past the size boundary. + stream.close() + } + + @Test + fun `FileBacked binary data preserved`() { + val binaryData = ByteArray(512) { it.toByte() } + val file = File.createTempFile("rb-test-", null) + file.deleteOnExit() + file.writeBytes(binaryData) + + val body = RequestBody.FileBacked(file = file, fileOffset = 0, size = binaryData.size.toLong()) + assertArrayEquals(binaryData, body.readBytes()) + assertArrayEquals(binaryData, body.inputStream().use { it.readBytes() }) + } + + @Test + fun `FileBacked zero-length body`() { + val file = File.createTempFile("rb-test-", null) + file.deleteOnExit() + file.writeBytes("content".toByteArray()) + + val body = RequestBody.FileBacked(file = file, fileOffset = 3, size = 0) + assertEquals(0L, body.size) + assertArrayEquals(ByteArray(0), body.readBytes()) + } + + @Test + fun `FileBacked multiple streams read independently`() { + val data = "independent reads".toByteArray() + val file = File.createTempFile("rb-test-", null) + file.deleteOnExit() + file.writeBytes(data) + + val body = RequestBody.FileBacked(file = file, fileOffset = 0, size = data.size.toLong()) + + val stream1 = body.inputStream() + val stream2 = body.inputStream() + + val result1 = stream1.use { it.readBytes() } + val result2 = stream2.use { it.readBytes() } + + assertArrayEquals(data, result1) + assertArrayEquals(data, result2) + } + + @Test + fun `FileBacked inMemoryData returns null`() { + val file = File.createTempFile("rb-test-", null) + file.deleteOnExit() + file.writeBytes("test".toByteArray()) + + val body = RequestBody.FileBacked(file = file, fileOffset = 0, size = 4) + assertNull(body.inMemoryData) + } + + @Test + fun `FileBacked equality with same file and range`() { + val file = File.createTempFile("rb-test-", null) + file.deleteOnExit() + file.writeBytes("content".toByteArray()) + + val a = RequestBody.FileBacked(file = file, fileOffset = 1, size = 3) + val b = RequestBody.FileBacked(file = file, fileOffset = 1, size = 3) + assertEquals(a, b) + assertEquals(a.hashCode(), b.hashCode()) + } + + @Test + fun `FileBacked inequality with different offset`() { + val file = File.createTempFile("rb-test-", null) + file.deleteOnExit() + file.writeBytes("content".toByteArray()) + + val a = RequestBody.FileBacked(file = file, fileOffset = 0, size = 3) + val b = RequestBody.FileBacked(file = file, fileOffset = 1, size = 3) + assertNotEquals(a, b) + } + + @Test + fun `FileBacked inequality with different size`() { + val file = File.createTempFile("rb-test-", null) + file.deleteOnExit() + file.writeBytes("content".toByteArray()) + + val a = RequestBody.FileBacked(file = file, fileOffset = 0, size = 3) + val b = RequestBody.FileBacked(file = file, fileOffset = 0, size = 5) + assertNotEquals(a, b) + } + + // MARK: - Cross-type inequality + + @Test + fun `InMemory and FileBacked are not equal even with same content`() { + val data = "same content".toByteArray() + val file = File.createTempFile("rb-test-", null) + file.deleteOnExit() + file.writeBytes(data) + + val inMemory = RequestBody.InMemory(data) + val fileBacked = RequestBody.FileBacked(file = file, fileOffset = 0, size = data.size.toLong()) + assertNotEquals(inMemory as RequestBody, fileBacked as RequestBody) + } +} diff --git a/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/TrimOWSTests.kt b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/TrimOWSTests.kt new file mode 100644 index 000000000..79d776479 --- /dev/null +++ b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/http/TrimOWSTests.kt @@ -0,0 +1,129 @@ +package org.wordpress.gutenberg.http + +import org.junit.Assert.assertEquals +import org.junit.Assert.assertSame +import org.junit.Test + +class TrimOWSTests { + + // MARK: - No-op cases + + @Test + fun `returns same instance when no OWS present`() { + val input = "hello" + assertSame(input, input.trimOWS()) + } + + @Test + fun `returns same instance for empty string`() { + val input = "" + assertSame(input, input.trimOWS()) + } + + // MARK: - Leading whitespace + + @Test + fun `trims leading spaces`() { + assertEquals("hello", " hello".trimOWS()) + } + + @Test + fun `trims leading tabs`() { + assertEquals("hello", "\t\thello".trimOWS()) + } + + @Test + fun `trims mixed leading SP and HTAB`() { + assertEquals("hello", " \t \thello".trimOWS()) + } + + // MARK: - Trailing whitespace + + @Test + fun `trims trailing spaces`() { + assertEquals("hello", "hello ".trimOWS()) + } + + @Test + fun `trims trailing tabs`() { + assertEquals("hello", "hello\t\t".trimOWS()) + } + + @Test + fun `trims mixed trailing SP and HTAB`() { + assertEquals("hello", "hello \t \t".trimOWS()) + } + + // MARK: - Both ends + + @Test + fun `trims both leading and trailing OWS`() { + assertEquals("hello", " \t hello \t ".trimOWS()) + } + + @Test + fun `all OWS returns empty string`() { + assertEquals("", " \t \t ".trimOWS()) + } + + // MARK: - Preserves interior whitespace + + @Test + fun `preserves interior spaces`() { + assertEquals("hello world", " hello world ".trimOWS()) + } + + @Test + fun `preserves interior tabs`() { + assertEquals("hello\tworld", "\thello\tworld\t".trimOWS()) + } + + // MARK: - Does NOT strip non-OWS characters + + @Test + fun `preserves leading CR`() { + assertEquals("\rhello", "\rhello".trimOWS()) + } + + @Test + fun `preserves trailing CR`() { + assertEquals("hello\r", "hello\r".trimOWS()) + } + + @Test + fun `preserves leading LF`() { + assertEquals("\nhello", "\nhello".trimOWS()) + } + + @Test + fun `preserves trailing LF`() { + assertEquals("hello\n", "hello\n".trimOWS()) + } + + @Test + fun `preserves vertical tab`() { + assertEquals("\u000Bhello", "\u000Bhello".trimOWS()) + } + + @Test + fun `preserves form feed`() { + assertEquals("\u000Chello", "\u000Chello".trimOWS()) + } + + @Test + fun `preserves null byte`() { + assertEquals("\u0000hello", "\u0000hello".trimOWS()) + } + + // MARK: - OWS around non-OWS control chars + + @Test + fun `trims OWS but preserves CR between`() { + assertEquals("\rhello\r", " \rhello\r ".trimOWS()) + } + + @Test + fun `trims OWS but preserves LF between`() { + assertEquals("\nhello\n", "\t\nhello\n\t".trimOWS()) + } +} diff --git a/android/app/src/main/AndroidManifest.xml b/android/app/src/main/AndroidManifest.xml index 0a22ac632..354a162b5 100644 --- a/android/app/src/main/AndroidManifest.xml +++ b/android/app/src/main/AndroidManifest.xml @@ -4,7 +4,7 @@ + + \ No newline at end of file diff --git a/android/app/src/main/java/com/example/gutenbergkit/MainActivity.kt b/android/app/src/main/java/com/example/gutenbergkit/MainActivity.kt index 3725cbfd6..57f69f5ed 100644 --- a/android/app/src/main/java/com/example/gutenbergkit/MainActivity.kt +++ b/android/app/src/main/java/com/example/gutenbergkit/MainActivity.kt @@ -20,12 +20,16 @@ import androidx.compose.foundation.lazy.items import androidx.compose.material.icons.Icons import androidx.compose.material.icons.filled.Add import androidx.compose.material.icons.filled.Language +import androidx.compose.material.icons.filled.MoreVert import androidx.compose.material.icons.outlined.Computer import androidx.compose.material.icons.outlined.Article import androidx.compose.material3.Card +import androidx.compose.material3.DropdownMenu +import androidx.compose.material3.DropdownMenuItem import androidx.compose.material3.ExperimentalMaterial3Api import androidx.compose.material3.FloatingActionButton import androidx.compose.material3.Icon +import androidx.compose.material3.IconButton import androidx.compose.material3.ListItem import androidx.compose.material3.MaterialTheme import androidx.compose.material3.Scaffold @@ -99,6 +103,9 @@ class MainActivity : ComponentActivity(), AuthenticationManager.AuthenticationCa configurations.remove(config) configurationStorage.saveConfigurations(configurations) }, + onMediaProxyServer = { + startActivity(Intent(this, MediaProxyServerActivity::class.java)) + }, isDiscoveringSite = isDiscoveringSite.value, onDismissDiscovering = { isDiscoveringSite.value = false }, isLoadingCapabilities = isLoadingCapabilities.value @@ -185,6 +192,7 @@ fun MainScreen( onConfigurationLongClick: (ConfigurationItem) -> Boolean, onAddConfiguration: (String) -> Unit, onDeleteConfiguration: (ConfigurationItem) -> Unit, + onMediaProxyServer: () -> Unit = {}, isDiscoveringSite: Boolean = false, onDismissDiscovering: () -> Unit = {}, isLoadingCapabilities: Boolean = false @@ -192,12 +200,33 @@ fun MainScreen( var showAddDialog = remember { mutableStateOf(false) } var showDeleteDialog = remember { mutableStateOf(null) } var siteUrlInput = remember { mutableStateOf("") } + var showOverflowMenu = remember { mutableStateOf(false) } Scaffold( modifier = Modifier.fillMaxSize(), topBar = { TopAppBar( - title = { Text(stringResource(R.string.demo_title)) } + title = { Text(stringResource(R.string.demo_title)) }, + actions = { + IconButton(onClick = { showOverflowMenu.value = true }) { + Icon( + imageVector = Icons.Default.MoreVert, + contentDescription = stringResource(R.string.more_options) + ) + } + DropdownMenu( + expanded = showOverflowMenu.value, + onDismissRequest = { showOverflowMenu.value = false } + ) { + DropdownMenuItem( + text = { Text("Media Proxy Server") }, + onClick = { + showOverflowMenu.value = false + onMediaProxyServer() + } + ) + } + } ) }, floatingActionButton = { diff --git a/android/app/src/main/java/com/example/gutenbergkit/MediaProxyServerActivity.kt b/android/app/src/main/java/com/example/gutenbergkit/MediaProxyServerActivity.kt new file mode 100644 index 000000000..7ff069ebb --- /dev/null +++ b/android/app/src/main/java/com/example/gutenbergkit/MediaProxyServerActivity.kt @@ -0,0 +1,362 @@ +package com.example.gutenbergkit + +import android.os.Bundle +import androidx.activity.ComponentActivity +import androidx.activity.compose.setContent +import androidx.activity.enableEdgeToEdge +import androidx.compose.animation.AnimatedContent +import androidx.compose.foundation.layout.Arrangement +import androidx.compose.foundation.layout.Column +import androidx.compose.foundation.layout.PaddingValues +import androidx.compose.foundation.layout.fillMaxSize +import androidx.compose.foundation.layout.fillMaxWidth +import androidx.compose.foundation.layout.padding +import androidx.compose.foundation.lazy.LazyColumn +import androidx.compose.foundation.lazy.items +import androidx.compose.material.icons.Icons +import androidx.compose.material.icons.automirrored.filled.ArrowBack +import androidx.compose.material3.Button +import androidx.compose.material3.ButtonDefaults +import androidx.compose.material3.ExperimentalMaterial3Api +import androidx.compose.material3.Icon +import androidx.compose.material3.IconButton +import androidx.compose.material3.ListItem +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.Scaffold +import androidx.compose.material3.Switch +import androidx.compose.material3.Text +import androidx.compose.material3.TopAppBar +import androidx.compose.runtime.Composable +import androidx.compose.runtime.DisposableEffect +import androidx.compose.runtime.LaunchedEffect +import androidx.compose.runtime.mutableStateListOf +import androidx.compose.runtime.mutableStateOf +import androidx.compose.runtime.remember +import androidx.compose.ui.Modifier +import androidx.compose.ui.text.font.FontFamily +import androidx.compose.ui.unit.dp +import com.example.gutenbergkit.ui.theme.AppTheme +import org.wordpress.gutenberg.HttpResponse +import org.wordpress.gutenberg.HttpServer +import org.wordpress.gutenberg.RequestLogEntry +import android.text.format.Formatter +import androidx.compose.foundation.layout.Row +import androidx.compose.foundation.layout.Spacer +import androidx.compose.runtime.rememberCoroutineScope +import java.net.HttpURLConnection +import java.net.URL +import java.text.SimpleDateFormat +import java.util.Locale +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext + +data class SpeedTestResult( + val size: Int, + val durationMs: Long +) { + val throughputBytesPerSec: Double get() = size.toDouble() / (durationMs.toDouble() / 1000.0) +} + +class MediaProxyServerActivity : ComponentActivity() { + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + enableEdgeToEdge() + + setContent { + AppTheme { + MediaProxyServerScreen(onBack = { finish() }) + } + } + } +} + +@OptIn(ExperimentalMaterial3Api::class) +@Composable +fun MediaProxyServerScreen(onBack: () -> Unit) { + val server = remember { mutableStateOf(null) } + val logs = remember { mutableStateListOf() } + val localAddress = remember { mutableStateOf("") } + val externallyAccessible = remember { mutableStateOf(true) } + val isRunning = server.value != null + val timeFormat = remember { SimpleDateFormat("HH:mm:ss", Locale.getDefault()) } + val speedTestResults = remember { mutableStateListOf() } + val isRunningSpeedTest = remember { mutableStateOf(false) } + val coroutineScope = rememberCoroutineScope() + + fun startServer() { + // Authentication is disabled for this demo app — it is never shipped to + // end users. Production code should always set requiresAuthentication = true. + val s = HttpServer( + name = "media-proxy-demo", + requestedPort = 8080, + externallyAccessible = externallyAccessible.value, + requiresAuthentication = false, + handler = { request -> + // Note: logs grows without bound. This is acceptable for a demo app; + // a production UI should cap the list or use a ring buffer. + withContext(Dispatchers.Main) { + logs.add(0, RequestLogEntry( + timestamp = java.util.Date(), + method = request.method, + target = request.target, + requestBodySize = request.body?.size?.toInt() ?: 0, + parseDurationMs = request.parseDurationMs + )) + } + HttpResponse(body = "OK\n".toByteArray()) + } + ) + s.start() + localAddress.value = if (externallyAccessible.value) { + HttpServer.getLocalIpAddress() ?: "unknown" + } else { + "127.0.0.1" + } + server.value = s + } + + fun stopServer() { + server.value?.stop() + server.value = null + } + + LaunchedEffect(Unit) { + startServer() + } + + DisposableEffect(Unit) { + onDispose { stopServer() } + } + + Scaffold( + modifier = Modifier.fillMaxSize(), + topBar = { + TopAppBar( + title = { Text("Media Proxy Server") }, + navigationIcon = { + IconButton(onClick = onBack) { + Icon( + imageVector = Icons.AutoMirrored.Filled.ArrowBack, + contentDescription = "Back" + ) + } + } + ) + } + ) { innerPadding -> + LazyColumn( + modifier = Modifier + .fillMaxSize() + .padding(innerPadding), + contentPadding = PaddingValues(16.dp), + verticalArrangement = Arrangement.spacedBy(4.dp) + ) { + item { + ListItem( + headlineContent = { Text("Address") }, + trailingContent = { + AnimatedContent( + targetState = isRunning, + label = "address" + ) { running -> + if (running) { + Text( + text = "${localAddress.value}:${server.value?.port}", + fontFamily = FontFamily.Monospace + ) + } else { + Text( + text = "Loading...", + color = MaterialTheme.colorScheme.onSurfaceVariant + ) + } + } + } + ) + } + + item { + ListItem( + headlineContent = { Text("Externally Accessible") }, + trailingContent = { + Switch( + checked = externallyAccessible.value, + enabled = isRunning, + onCheckedChange = { + externallyAccessible.value = it + stopServer() + startServer() + } + ) + } + ) + } + + item { + if (isRunning) { + Button( + onClick = { stopServer() }, + colors = ButtonDefaults.buttonColors( + containerColor = MaterialTheme.colorScheme.error + ), + modifier = Modifier + .fillMaxWidth() + .padding(horizontal = 16.dp, vertical = 8.dp) + ) { + Text("Stop Server") + } + } else { + Button( + onClick = { startServer() }, + modifier = Modifier + .fillMaxWidth() + .padding(horizontal = 16.dp, vertical = 8.dp) + ) { + Text("Start Server") + } + } + } + + item { + Column( + modifier = Modifier.padding(horizontal = 16.dp, vertical = 8.dp) + ) { + Text( + text = "SPEED TEST", + style = MaterialTheme.typography.labelLarge, + color = MaterialTheme.colorScheme.primary + ) + } + } + + item { + if (isRunning) { + Button( + onClick = { + coroutineScope.launch { + isRunningSpeedTest.value = true + speedTestResults.clear() + val sizes = listOf(128 * 1024, 512 * 1024, 1024 * 1024, 5 * 1024 * 1024, 10 * 1024 * 1024) + val port = server.value?.port ?: return@launch + for (size in sizes) { + val result = withContext(Dispatchers.IO) { + val payload = ByteArray(size) { 0x42 } + // 127.0.0.1 is intentional — the speed test is a local + // self-benchmark, not a device-to-device test. The server's + // externallyAccessible toggle controls whether remote clients + // can connect. + val conn = URL("http://127.0.0.1:$port/speed-test").openConnection() as HttpURLConnection + conn.requestMethod = "POST" + conn.doOutput = true + conn.setFixedLengthStreamingMode(size) + val start = System.nanoTime() + conn.outputStream.use { it.write(payload) } + conn.inputStream.use { it.readBytes() } + conn.disconnect() + val elapsed = System.nanoTime() - start + SpeedTestResult(size, elapsed / 1_000_000) + } + speedTestResults.add(result) + } + isRunningSpeedTest.value = false + } + }, + enabled = !isRunningSpeedTest.value, + modifier = Modifier + .fillMaxWidth() + .padding(horizontal = 16.dp, vertical = 4.dp) + ) { + Text(if (isRunningSpeedTest.value) "Running..." else "Run Speed Test") + } + } + } + + if (speedTestResults.isNotEmpty()) { + item { + Row( + modifier = Modifier + .fillMaxWidth() + .padding(horizontal = 16.dp, vertical = 2.dp), + horizontalArrangement = Arrangement.SpaceBetween + ) { + Text("Size", fontFamily = FontFamily.Monospace, style = MaterialTheme.typography.labelSmall, color = MaterialTheme.colorScheme.onSurfaceVariant, modifier = Modifier.weight(1f)) + Text("Time", fontFamily = FontFamily.Monospace, style = MaterialTheme.typography.labelSmall, color = MaterialTheme.colorScheme.onSurfaceVariant, modifier = Modifier.weight(1f)) + Text("Throughput", fontFamily = FontFamily.Monospace, style = MaterialTheme.typography.labelSmall, color = MaterialTheme.colorScheme.onSurfaceVariant, modifier = Modifier.weight(1f)) + } + } + } + + items(speedTestResults) { result -> + Row( + modifier = Modifier + .fillMaxWidth() + .padding(horizontal = 16.dp, vertical = 4.dp), + horizontalArrangement = Arrangement.SpaceBetween + ) { + Text( + text = Formatter.formatShortFileSize(androidx.compose.ui.platform.LocalContext.current, result.size.toLong()), + fontFamily = FontFamily.Monospace, + style = MaterialTheme.typography.bodySmall, + modifier = Modifier.weight(1f) + ) + Text( + text = "${result.durationMs} ms", + fontFamily = FontFamily.Monospace, + style = MaterialTheme.typography.bodySmall, + modifier = Modifier.weight(1f) + ) + Text( + text = "${Formatter.formatShortFileSize(androidx.compose.ui.platform.LocalContext.current, result.throughputBytesPerSec.toLong())}/s", + fontFamily = FontFamily.Monospace, + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant, + modifier = Modifier.weight(1f) + ) + } + } + + item { + Column( + modifier = Modifier.padding(horizontal = 16.dp, vertical = 8.dp) + ) { + Text( + text = "REQUEST LOG", + style = MaterialTheme.typography.labelLarge, + color = MaterialTheme.colorScheme.primary + ) + } + } + + if (logs.isEmpty()) { + item { + Text( + text = "No requests yet", + style = MaterialTheme.typography.bodyMedium, + color = MaterialTheme.colorScheme.onSurfaceVariant, + modifier = Modifier.padding(horizontal = 16.dp) + ) + } + } else { + items(logs) { entry -> + ListItem( + headlineContent = { + Text( + text = "${entry.method} ${entry.target}", + fontFamily = FontFamily.Monospace, + style = MaterialTheme.typography.bodySmall + ) + }, + supportingContent = { + Text( + text = "${timeFormat.format(entry.timestamp)} · ${Formatter.formatShortFileSize(androidx.compose.ui.platform.LocalContext.current, entry.requestBodySize.toLong())} · ${"%.2f".format(entry.parseDurationMs)}ms", + style = MaterialTheme.typography.labelSmall, + color = MaterialTheme.colorScheme.onSurfaceVariant + ) + } + ) + } + } + } + } +} diff --git a/android/app/src/main/res/xml/network_security_config.xml b/android/app/src/main/res/xml/network_security_config.xml index 398176839..fbc0db5d8 100644 --- a/android/app/src/main/res/xml/network_security_config.xml +++ b/android/app/src/main/res/xml/network_security_config.xml @@ -2,6 +2,7 @@ localhost + 127.0.0.1 10.0.2.2 diff --git a/ios/Demo-iOS/Gutenberg.xcodeproj/project.pbxproj b/ios/Demo-iOS/Gutenberg.xcodeproj/project.pbxproj index ebe8102d3..6e095b78b 100644 --- a/ios/Demo-iOS/Gutenberg.xcodeproj/project.pbxproj +++ b/ios/Demo-iOS/Gutenberg.xcodeproj/project.pbxproj @@ -15,6 +15,7 @@ 246852562EAABB7800ED1F09 /* WordPressAPI in Frameworks */ = {isa = PBXBuildFile; productRef = 0C4F59A12BEFF4980028BD96 /* WordPressAPI */; }; 2468526B2EAACCA100ED1F09 /* AuthenticationManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 246852682EAACCA100ED1F09 /* AuthenticationManager.swift */; }; 2468526C2EAACCA100ED1F09 /* ConfigurationStorage.swift in Sources */ = {isa = PBXBuildFile; fileRef = 246852692EAACCA100ED1F09 /* ConfigurationStorage.swift */; }; + BB0000012F11000000000001 /* GutenbergKitHTTP in Frameworks */ = {isa = PBXBuildFile; productRef = BB0000012F11000000000002 /* GutenbergKitHTTP */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -51,6 +52,7 @@ files = ( 246852562EAABB7800ED1F09 /* WordPressAPI in Frameworks */, 0CF6E04C2BEFF60E00EDEE8A /* GutenbergKit in Frameworks */, + BB0000012F11000000000001 /* GutenbergKitHTTP in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -152,6 +154,7 @@ packageProductDependencies = ( 0CF6E04B2BEFF60E00EDEE8A /* GutenbergKit */, 0C4F59A12BEFF4980028BD96 /* WordPressAPI */, + BB0000012F11000000000002 /* GutenbergKitHTTP */, ); productName = Gutenberg; productReference = 0C4F598B2BEFF4970028BD96 /* Gutenberg.app */; @@ -554,6 +557,10 @@ isa = XCSwiftPackageProductDependency; productName = GutenbergKit; }; + BB0000012F11000000000002 /* GutenbergKitHTTP */ = { + isa = XCSwiftPackageProductDependency; + productName = GutenbergKitHTTP; + }; /* End XCSwiftPackageProductDependency section */ }; rootObject = 0C4F59832BEFF4970028BD96 /* Project object */; diff --git a/ios/Demo-iOS/Gutenberg.xcodeproj/xcshareddata/xcschemes/Gutenberg.xcscheme b/ios/Demo-iOS/Gutenberg.xcodeproj/xcshareddata/xcschemes/Gutenberg.xcscheme index 073ac419f..f11eb35e2 100644 --- a/ios/Demo-iOS/Gutenberg.xcodeproj/xcshareddata/xcschemes/Gutenberg.xcscheme +++ b/ios/Demo-iOS/Gutenberg.xcodeproj/xcshareddata/xcschemes/Gutenberg.xcscheme @@ -29,6 +29,18 @@ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" shouldUseLaunchSchemeArgsEnv = "YES" shouldAutocreateTestPlan = "YES"> + + + + + + .Continuation? + @State private var externallyAccessible = true + @State private var speedTestResults: [SpeedTestResult] = [] + @State private var isRunningSpeedTest = false + + struct LogEntry: Identifiable, Sendable { + let id = UUID() + let timestamp: Date + let method: String + let target: String + let requestBodySize: Int + } + + struct SpeedTestResult: Identifiable { + let id = UUID() + let size: Int + let duration: TimeInterval + var throughput: Double { Double(size) / duration } + } + + private var isRunning: Bool { server != nil } + + var body: some View { + List { + Section { + LabeledContent("Address") { + if let server { + Text(verbatim: "\(localAddress):\(server.port)") + .monospaced() + .textSelection(.enabled) + } else { + Text("Loading...") + .foregroundStyle(.secondary) + } + } + + Toggle("Externally Accessible", isOn: $externallyAccessible) + .disabled(!isRunning) + .onChange(of: externallyAccessible) { + Task { + stopServer() + await startServer() + } + } + + if isRunning { + Button("Stop Server", role: .destructive) { + stopServer() + } + } else if !isStarting { + Button("Start Server") { + Task { await startServer() } + } + } + } footer: { + if let errorMessage { + Text(errorMessage).foregroundStyle(.red) + } + } + + Section("Speed Test") { + if isRunning { + Button(isRunningSpeedTest ? "Running..." : "Run Speed Test") { + Task { await runSpeedTest() } + } + .disabled(isRunningSpeedTest) + } + if !speedTestResults.isEmpty { + HStack { + Text("Size") + .frame(width: 80, alignment: .leading) + Text("Time") + .frame(width: 70, alignment: .trailing) + Spacer() + Text("Throughput") + .foregroundStyle(.secondary) + } + .font(.system(.caption2, design: .monospaced)) + .foregroundStyle(.secondary) + } + ForEach(speedTestResults) { result in + HStack { + Text(ByteCountFormatter.string(fromByteCount: Int64(result.size), countStyle: .binary)) + .frame(width: 80, alignment: .leading) + Text(String(format: "%.0f ms", result.duration * 1000)) + .frame(width: 70, alignment: .trailing) + Spacer() + Text(ByteCountFormatter.string(fromByteCount: Int64(result.throughput), countStyle: .binary) + "/s") + .foregroundStyle(.secondary) + } + .font(.system(.caption, design: .monospaced)) + } + } + + Section("Request Log") { + if logs.isEmpty { + ContentUnavailableView("No Requests", systemImage: "network") + } else { + ForEach(logs) { entry in + VStack(alignment: .leading, spacing: 2) { + Text("\(entry.method) \(entry.target)") + .font(.system(.caption, design: .monospaced)) + HStack { + Text(entry.timestamp, style: .time) + Text(verbatim: "·") + Text(verbatim: ByteCountFormatter.string(fromByteCount: Int64(entry.requestBodySize), countStyle: .binary)) + } + .font(.caption2) + .foregroundStyle(.secondary) + } + } + } + } + } + .navigationTitle("Media Proxy Server") + .task { + await startServer() + } + .onDisappear { + stopServer() + } + } + + private func startServer() async { + guard server == nil else { return } + isStarting = true + errorMessage = nil + + let (stream, continuation) = AsyncStream.makeStream(of: LogEntry.self) + self.logContinuation = continuation + + do { + // Authentication is disabled for this demo app — it is never shipped to + // end users. Production code should always set requiresAuthentication: true. + let s = try await HTTPServer.start( + name: "media-proxy-demo", + port: 8080, + listenOnAllInterfaces: externallyAccessible, + requiresAuthentication: false + ) { request in + let entry = LogEntry( + timestamp: Date(), + method: request.parsed.method, + target: request.parsed.target, + requestBodySize: request.parsed.body?.count ?? 0 + ) + continuation.yield(entry) + return HTTPResponse(status: 200, body: Data("OK\n".utf8)) + } + localAddress = externallyAccessible + ? (Self.getLocalIPAddress() ?? "unknown") + : "127.0.0.1" + server = s + isStarting = false + + // Note: logs grows without bound. This is acceptable for a demo app; + // a production UI should cap the list or use a ring buffer. + for await entry in stream { + logs.insert(entry, at: 0) + } + } catch { + errorMessage = error.localizedDescription + isStarting = false + } + } + + private func runSpeedTest() async { + guard let server else { return } + isRunningSpeedTest = true + speedTestResults = [] + + let sizes = [128 * 1024, 512 * 1024, 1024 * 1024, 5 * 1024 * 1024, 10 * 1024 * 1024] + // 127.0.0.1 is intentional — the speed test is a local self-benchmark, + // not a device-to-device test. The server's externallyAccessible toggle + // controls whether remote clients can connect. + let url = URL(string: "http://127.0.0.1:\(server.port)/speed-test")! + + for size in sizes { + let payload = Data(repeating: 0x42, count: size) + var request = URLRequest(url: url) + request.httpMethod = "POST" + request.httpBody = payload + + let start = ContinuousClock.now + _ = try? await URLSession.shared.data(for: request) + let elapsed = start.duration(to: .now) + let seconds = Double(elapsed.components.seconds) + Double(elapsed.components.attoseconds) / 1e18 + + speedTestResults.append(SpeedTestResult(size: size, duration: seconds)) + } + + isRunningSpeedTest = false + } + + private func stopServer() { + logContinuation?.finish() + logContinuation = nil + server?.stop() + server = nil + } + + static func getLocalIPAddress() -> String? { + var ifaddr: UnsafeMutablePointer? + guard getifaddrs(&ifaddr) == 0, let firstAddr = ifaddr else { return nil } + defer { freeifaddrs(ifaddr) } + + for ptr in sequence(first: firstAddr, next: { $0.pointee.ifa_next }) { + let flags = Int32(ptr.pointee.ifa_flags) + let addr = ptr.pointee.ifa_addr.pointee + + guard (flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING) else { continue } + guard addr.sa_family == UInt8(AF_INET) else { continue } + + let name = String(cString: ptr.pointee.ifa_name) + guard name == "en0" || name == "en1" else { continue } + + var hostname = [CChar](repeating: 0, count: Int(NI_MAXHOST)) + if getnameinfo( + ptr.pointee.ifa_addr, + socklen_t(addr.sa_len), + &hostname, + socklen_t(hostname.count), + nil, 0, NI_NUMERICHOST + ) == 0 { + return String(cString: hostname) + } + } + return nil + } +} diff --git a/ios/Sources/GutenbergKitDebugServer/README.md b/ios/Sources/GutenbergKitDebugServer/README.md new file mode 100644 index 000000000..afaa31fb7 --- /dev/null +++ b/ios/Sources/GutenbergKitDebugServer/README.md @@ -0,0 +1,36 @@ +# GutenbergKitDebugServer + +A command-line HTTP server for testing and debugging the `GutenbergKitHTTP` module. It logs incoming requests in detail and can optionally proxy them to an upstream URL. + +## Running + +```bash +swift run GutenbergKitDebugServer # auto-assign a port +swift run GutenbergKitDebugServer 8080 # listen on port 8080 +``` + +## What it does + +For every incoming request, the server: + +1. **Logs** the method, target, headers, body size, and parse duration. +2. **Inspects multipart bodies** — if the request has a `multipart/form-data` content type, each part's name, filename, content type, and size are printed. Small parts (≤ 200 bytes) have their text content printed inline. +3. **Proxies** the request if an `X-URL-to-fetch` header is present. The header value is used as the upstream URL; the original method and headers (minus `Host` and `X-URL-to-fetch`) are forwarded via `URLSession`. The upstream response is returned to the client. +4. **Echoes** a JSON summary if no proxy header is set. + +## Example output + +``` +GutenbergKitDebugServer listening on http://localhost:49312 +[2026-03-10T12:00:00Z] POST /wp/v2/media (0.42ms) + Host: localhost:49312 + Content-Type: multipart/form-data; boundary=----FormBoundary + Content-Length: 1234 + Body: 1234 bytes + Multipart: 2 part(s) + [0] name="title" (text/plain) + 5 bytes + Hello + [1] name="file" filename="photo.jpg" (image/jpeg) + 1100 bytes +``` diff --git a/ios/Sources/GutenbergKitDebugServer/main.swift b/ios/Sources/GutenbergKitDebugServer/main.swift new file mode 100644 index 000000000..9cb26d4fb --- /dev/null +++ b/ios/Sources/GutenbergKitDebugServer/main.swift @@ -0,0 +1,96 @@ +import Foundation +import GutenbergKitHTTP + +let port: UInt16? = CommandLine.arguments.dropFirst().first.flatMap(UInt16.init) + +let server = try await HTTPServer.start(name: "debug-server", port: port) { req in + await logRequest(req) + + if let response = await fetchUrl(req) { + return response + } + + let request = req.parsed + + let json: [String: Any] = [ + "method": request.method, + "target": request.target, + "headers": request.headerCount, + "status": "ok" + ] + let body = try! JSONSerialization.data(withJSONObject: json) + + return HTTPResponse( + status: 200, + headers: [("Content-Type", "application/json")], + body: body + ) +} + +print("GutenbergKitDebugServer listening on http://localhost:\(server.port)") +print("Proxy-Authorization: Bearer \(server.token)") +try await Task.sleep(for: .seconds(86400)) // Run the server for 24 hours + +func fetchUrl(_ req: HTTPServer.Request) async -> HTTPServer.Response? { + guard let urlString = req.parsed.header("X-URL-to-fetch"), let url = URL(string: urlString) else { + return nil + } + + guard let scheme = url.scheme?.lowercased(), scheme == "http" || scheme == "https" else { + return HTTPResponse(status: 400, body: Data("Only http/https URLs are supported".utf8)) + } + + do { + let filteredHeaders: Set = [ + "host", "x-url-to-fetch", "proxy-authorization" + ] + + var request = URLRequest(url: url) + request.httpMethod = req.parsed.method + for (name, value) in req.parsed.allHeaders where !filteredHeaders.contains(name.lowercased()) { + request.setValue(value, forHTTPHeaderField: name) + } + + print(" Requesting \(url)") + print(" Method: \(request.httpMethod)") + print(" Headers: \(String(describing: request.allHTTPHeaderFields))") + + return try await HTTPResponse(URLSession.shared.data(for: request)) + } catch { + print(" Request Failed: \(error.localizedDescription)") + return HTTPResponse(status: 500, body: Data(error.localizedDescription.utf8)) + } +} + +// MARK: - Logging + +func logRequest(_ req: HTTPServer.Request) async { + let request = req.parsed + let timestamp = ISO8601DateFormatter().string(from: Date()) + let ms = String(format: "%.2f", Double(req.parseDuration.components.attoseconds) / 1e15) + print("[\(timestamp)] \(request.method) \(request.target) (\(ms)ms)") + + for (name, value) in request.allHeaders { + print(" \(name): \(value)") + } + + if let body = request.body { + print(" Body: \(body.count) bytes") + + if let parts = try? request.multipartParts() { + print(" Multipart: \(parts.count) part(s)") + for (i, part) in parts.enumerated() { + let filename = part.filename.map { " filename=\"\($0)\"" } ?? "" + print(" [\(i)] name=\"\(part.name)\"\(filename) (\(part.contentType))") + print(" \(part.body.count) bytes") + if part.body.count <= 200, let text = try? await String(data: part.body.data, encoding: .utf8) { + print(" \(text)") + } + } + } else if body.count <= 500, let text = try? await String(data: body.data, encoding: .utf8) { + print(" \(text)") + } + } + print() + fflush(stdout) +} diff --git a/ios/Sources/GutenbergKitHTTP/Extensions.swift b/ios/Sources/GutenbergKitHTTP/Extensions.swift new file mode 100644 index 000000000..4a7ee8f66 --- /dev/null +++ b/ios/Sources/GutenbergKitHTTP/Extensions.swift @@ -0,0 +1,26 @@ +import Foundation + +extension FileHandle { + + /// Opens a file for reading, passes the handle to `body`, and guarantees the handle + /// is closed when `body` returns — whether normally or by throwing. + /// + /// ```swift + /// let data = try FileHandle.withReadHandle(forUrl: fileURL) { handle in + /// try handle.seek(toOffset: 100) + /// return try handle.read(upToCount: 50) ?? Data() + /// } + /// ``` + /// + /// - Parameters: + /// - url: The file URL to open for reading. + /// - body: A closure that receives the open `FileHandle`. + /// - Returns: The value returned by `body`. + /// - Throws: Rethrows any error from opening the file or from `body`. + static func withReadHandle(forUrl url: URL, _ body: (FileHandle) throws -> T) throws -> T { + let handle = try FileHandle(forReadingFrom: url) + // Read-only handle — close errors (EBADF, EINTR) are harmless; no buffered writes to lose. + defer { try? handle.close() } + return try body(handle) + } +} diff --git a/ios/Sources/GutenbergKitHTTP/HTTPRequestParser.swift b/ios/Sources/GutenbergKitHTTP/HTTPRequestParser.swift new file mode 100644 index 000000000..293c92773 --- /dev/null +++ b/ios/Sources/GutenbergKitHTTP/HTTPRequestParser.swift @@ -0,0 +1,417 @@ +import Foundation + +/// Parses raw HTTP/1.1 request data into a structured `ParsedHTTPRequest`. +/// +/// This parser handles incremental data — call `append(_:)` as bytes arrive, +/// then check `state` to determine whether buffering is complete. +/// +/// The parser buffers incoming data to a temporary file on disk rather than +/// accumulating it in memory, making it suitable for large request bodies. +/// If the temp file cannot be created (e.g. disk full), the parser falls back +/// to in-memory buffering automatically. +/// +/// State tracking is lightweight — `append(_:)` scans for the header separator +/// (`\r\n\r\n`) and extracts `Content-Length`. Full parsing and RFC validation +/// are deferred until ``parseRequest()`` is called. +/// +/// ```swift +/// let parser = HTTPRequestParser("GET /api HTTP/1.1\r\nHost: localhost\r\n\r\n") +/// let request = try parser.parseRequest() +/// print(request?.method, request?.target) +/// ``` +public final class HTTPRequestParser: @unchecked Sendable { + + /// The current buffering state of the parser. + public enum State: Sendable { + /// More data is needed before headers are complete. + case needsMoreData + /// Headers have been fully received but the body is still incomplete. + case headersComplete + /// All data has been received (headers and body). + case complete + } + + /// The default maximum request body size (4 GB). + public static let defaultMaxBodySize: Int64 = Int64(4) * 1024 * 1024 * 1024 + + /// The default threshold below which bodies are kept in memory (512 KB). + public static let defaultInMemoryBodyThreshold: Int = 512 * 1024 + + /// The maximum number of bytes to buffer before the header terminator is found (64 KB). + /// This matches the `readFromBuffer` scan cap and prevents unbounded disk writes + /// from clients that never send `\r\n\r\n`. + static let maxHeaderSize: Int = 65536 + + private let lock = NSLock() + private var buffer: Buffer + private let maxBodySize: Int64 + private let inMemoryBodyThreshold: Int + private var bytesWritten: Int = 0 + private var _state: State = .needsMoreData + + // Lightweight scan results (populated by append) + private var headerEndOffset: Int? + private var expectedContentLength: Int64 = 0 + + // Lazy parsing cache (populated by parseRequest) + private var _parsedHeaders: HTTPRequestSerializer.ParsedHeaders? + private var _parseError: HTTPRequestParseError? + private var _cachedBody: RequestBody? + private var _bodyExtracted: Bool = false + + /// Creates a new parser. + /// + /// - Parameters: + /// - maxBodySize: The maximum allowed request body size in bytes. + /// Requests with a `Content-Length` exceeding this will be rejected. + /// Defaults to ``defaultMaxBodySize`` (4 GB). + /// - inMemoryBodyThreshold: Bodies smaller than this are kept in memory; + /// larger bodies are streamed to a temporary file. Defaults to + /// ``defaultInMemoryBodyThreshold`` (512 KB). + /// - tempDirectory: Directory for temporary files. Defaults to the system + /// temp directory. When used via ``HTTPServer``, this is a server-specific + /// subdirectory scoped by the server's `name`. + public init( + maxBodySize: Int64 = HTTPRequestParser.defaultMaxBodySize, + inMemoryBodyThreshold: Int = HTTPRequestParser.defaultInMemoryBodyThreshold, + tempDirectory: URL? = nil + ) { + // Cap in-memory buffers at headers + inMemoryBodyThreshold to prevent + // unbounded memory growth when temp file creation fails. + self.buffer = Buffer(maxSize: Self.maxHeaderSize + inMemoryBodyThreshold, directory: tempDirectory) + self.maxBodySize = maxBodySize + self.inMemoryBodyThreshold = inMemoryBodyThreshold + } + + /// Creates a parser and immediately parses the given raw HTTP string. + /// + /// This is a convenience for one-shot parsing when all data is available upfront. + public convenience init(_ string: String) { + self.init(Data(string.utf8)) + } + + /// Creates a parser and immediately parses the given raw HTTP data. + /// + /// This is a convenience for one-shot parsing when all data is available upfront. + public convenience init(_ data: Data) { + self.init() + append(data) + } + + /// The current buffering state. + public var state: State { + lock.withLock { _state } + } + + /// The expected body length from `Content-Length`, available once headers have been received. + public var expectedBodyLength: Int64? { + lock.withLock { + guard _state.hasHeaders else { return nil } + return expectedContentLength + } + } + + /// Parses the buffered data into a structured HTTP request. + /// + /// This triggers full parsing via ``HTTPRequestSerializer`` on the first call. + /// The parsed headers are cached for subsequent calls. When the state is + /// `.complete` and a body is present, the body is extracted to a temporary + /// file on the first access. + /// + /// - Returns: The parsed request, or `nil` if the state is `.needsMoreData`. + /// - Throws: ``HTTPRequestParseError`` if the request is malformed. + public func parseRequest() throws -> ParsedHTTPRequest? { + try lock.withLock { + guard _state.hasHeaders else { return nil } + + if let error = _parseError { + throw error + } + + if _parsedHeaders == nil { + let headerData = try buffer.read(from: 0, maxLength: min(bytesWritten, Self.maxHeaderSize)) + switch HTTPRequestSerializer.parseHeaders(from: headerData) { + case .parsed(let headers): + _parsedHeaders = headers + case .invalid(let error): + _parseError = error + throw error + case .needsMoreData: + return nil + } + } + + guard let headers = _parsedHeaders else { return nil } + + guard _state.isComplete else { + return .partial( + method: headers.method, + target: headers.target, + httpVersion: headers.httpVersion, + headers: headers.headers + ) + } + + if headers.contentLength > 0 && !_bodyExtracted { + _cachedBody = try extractBody( + offset: headers.bodyOffset, + length: headers.contentLength + ) + _bodyExtracted = true + } + + return .complete( + method: headers.method, + target: headers.target, + httpVersion: headers.httpVersion, + headers: headers.headers, + body: _cachedBody + ) + } + } + + /// Appends received data to the buffer and updates the buffering state. + /// + /// This method performs lightweight scanning — it looks for the `\r\n\r\n` + /// header separator and extracts the `Content-Length` value. Full parsing + /// and RFC validation are deferred until ``parseRequest()`` is called. + public func append(_ data: Data) { + lock.withLock { + guard !_state.isComplete else { return } + + let accepted: Bool + do { + accepted = try buffer.append(data) + } catch { + _parseError = .bufferIOError + _state = .complete + return + } + guard accepted else { + _parseError = .payloadTooLarge + _state = .complete + return + } + bytesWritten += data.count + + if headerEndOffset == nil { + let buffered: Data + do { + buffered = try buffer.read(from: 0, maxLength: min(bytesWritten, Self.maxHeaderSize)) + } catch { + _parseError = .bufferIOError + _state = .complete + return + } + let separator = Data("\r\n\r\n".utf8) + + // RFC 7230 §3.5: Skip leading CRLFs for robustness. + var scanStart = 0 + while scanStart + 1 < buffered.count, + buffered[scanStart] == 0x0D, + buffered[scanStart + 1] == 0x0A { + scanStart += 2 + } + let effectiveData = buffered[scanStart...] + + guard let separatorRange = effectiveData.range(of: separator) else { + if bytesWritten > Self.maxHeaderSize { + _parseError = .headersTooLarge + _state = .complete + } else { + _state = .needsMoreData + } + return + } + + headerEndOffset = buffered.distance(from: buffered.startIndex, to: separatorRange.upperBound) + let headerBytes = effectiveData[effectiveData.startIndex.. maxBodySize { + _parseError = .payloadTooLarge + _state = .complete + return + } + } + + guard let offset = headerEndOffset else { return } + let bodyBytesAvailable = bytesWritten - offset + + if Int64(bodyBytesAvailable) >= expectedContentLength { + _state = .complete + } else { + _state = .headersComplete + } + } + } + + // MARK: - Content-Length Scanning + + /// Extracts and validates the `Content-Length` value from header bytes without full parsing. + /// + /// This reuses ``HTTPRequestSerializer/validateContentLength(_:existing:)`` so that + /// the scan and the later full parse apply identical validation rules. Conflicting + /// or malformed values are rejected immediately — before any body bytes are buffered. + /// + /// Returns 0 if no `Content-Length` header is present. + private static func scanContentLength(in headerBytes: Data) throws(HTTPRequestParseError) -> Int64 { + guard let string = String(data: headerBytes, encoding: .utf8) else { return 0 } + let lines = string.components(separatedBy: "\r\n") + + var contentLength: Int64? + for line in lines.dropFirst() where !line.isEmpty { + guard let colonIndex = line.firstIndex(of: ":") else { continue } + let rawKey = line[line.startIndex.. RequestBody? { + if length <= inMemoryBodyThreshold { + return RequestBody(data: try buffer.read(from: offset, maxLength: Int(length))) + } + + // Reference the body range directly in the buffer's file. + if let (fileURL, owner) = buffer.transferFileOwnership() { + return RequestBody( + fileURL: fileURL, + offset: UInt64(offset), + length: Int(length), + owner: owner + ) + } + + // Memory-backed buffer — read into a Data. + return RequestBody(data: try buffer.read(from: offset, maxLength: Int(length))) + } +} + +// MARK: - Buffer + +/// Abstraction over the parser's backing store. +/// +/// Tries to use a temp file on disk (suitable for large bodies). If the file +/// cannot be created, falls back to an in-memory `Data` buffer automatically. +/// When memory-backed, the buffer is capped at `maxSize` to prevent unbounded growth. +private final class Buffer { + private let fileURL: URL? + private let fileHandle: FileHandle? + private var memoryBuffer: Data? + private var fileOwnershipTransferred = false + private let maxSize: Int + + /// Whether the buffer is backed by memory rather than a file. + var isMemoryBacked: Bool { fileHandle == nil } + + init(maxSize: Int, directory: URL? = nil) { + self.maxSize = maxSize + + let dir = directory ?? FileManager.default.temporaryDirectory + let url = dir.appendingPathComponent("GutenbergKitHTTP-\(UUID().uuidString)") + + if FileManager.default.createFile(atPath: url.path, contents: nil), + let handle = FileHandle(forUpdatingAtPath: url.path) { + self.fileURL = url + self.fileHandle = handle + self.memoryBuffer = nil + } else { + // Temp file unavailable — buffer in memory instead. + self.fileURL = nil + self.fileHandle = nil + self.memoryBuffer = Data() + } + } + + deinit { + if let fileHandle { + // Writable handle, but the file is a temp buffer deleted immediately below — + // an EIO on close cannot cause data loss here. + try? fileHandle.close() + } + if let fileURL, !fileOwnershipTransferred { + try? FileManager.default.removeItem(at: fileURL) + } + } + + /// Transfers ownership of the backing file to a `TempFileOwner`. + /// + /// After this call, the buffer will no longer delete the file on deinit. + /// Returns `nil` if the buffer is memory-backed or ownership was already transferred. + func transferFileOwnership() -> (URL, TempFileOwner)? { + guard let fileURL, !fileOwnershipTransferred else { return nil } + fileOwnershipTransferred = true + return (fileURL, TempFileOwner(url: fileURL)) + } + + /// Appends data to the buffer. + /// + /// - Returns: `true` if the data was accepted, `false` if the in-memory + /// buffer would exceed its size limit. + /// - Throws: If the file-backed write fails (e.g. disk full). + @discardableResult + func append(_ data: Data) throws -> Bool { + if let fileHandle { + try fileHandle.seekToEnd() + try fileHandle.write(contentsOf: data) + return true + } else { + if memoryBuffer!.count + data.count > maxSize { + return false + } + memoryBuffer!.append(data) + return true + } + } + + func read(from offset: Int, maxLength: Int) throws -> Data { + precondition(offset >= 0, "offset must be non-negative, was \(offset)") + precondition(maxLength >= 0, "maxLength must be non-negative, was \(maxLength)") + if maxLength == 0 { return Data() } + if let fileHandle { + try fileHandle.seek(toOffset: UInt64(offset)) + return try fileHandle.read(upToCount: maxLength) ?? Data() + } else { + let start = memoryBuffer!.startIndex + offset + let end = min(start + maxLength, memoryBuffer!.endIndex) + return Data(memoryBuffer![start.. HeaderParseResult { + // Ensure zero-based indexing — Data slices retain their original indices, + // so a caller passing e.g. `fullData[500...]` would crash on `data[0]`. + let data = Data(data) + + // RFC 7230 §3.5: Skip leading CRLFs for robustness. + // A server SHOULD ignore at least one empty line received prior to the request-line. + var scanOffset = 0 + while scanOffset + 1 < data.count, + data[scanOffset] == 0x0D, + data[scanOffset + 1] == 0x0A { + scanOffset += 2 + } + guard scanOffset < data.count else { + return .needsMoreData + } + let effectiveData = data[scanOffset...] + + let separator = Data("\r\n\r\n".utf8) + guard let separatorRange = effectiveData.range(of: separator) else { + return .needsMoreData + } + + let headerData = effectiveData[effectiveData.startIndex..= 2 else { + return .invalid(.malformedRequestLine) + } + + let method = String(parts[0]) + let target = String(parts[1]) + + // RFC 9110 §9.1: method = token (tchar characters only). + guard method.allSatisfy({ isTokenChar($0) }) else { + return .invalid(.malformedRequestLine) + } + + // RFC 9112 §2.3: HTTP-version = "HTTP/" DIGIT "." DIGIT + guard parts.count >= 3 else { + return .invalid(.invalidHTTPVersion) + } + let httpVersion = String(parts[2]) + guard isValidHTTPVersion(httpVersion) else { + return .invalid(.invalidHTTPVersion) + } + + // RFC 9112 §3.2: Validate request-target form. + // origin-form: starts with "/" + // absolute-form: starts with a scheme (e.g. "http://", "https://") + // asterisk-form: "*" (only valid for OPTIONS) + // authority-form: only valid for CONNECT + if method == "CONNECT" { + // authority-form: host:port — must contain a colon and not start with "/" + if target.hasPrefix("/") || !target.contains(":") { + return .invalid(.malformedRequestLine) + } + } else if method == "OPTIONS" && target == "*" { + // asterisk-form is valid for OPTIONS + } else if target.hasPrefix("/") { + // origin-form — valid for all methods + } else if target.lowercased().hasPrefix("http://") || target.lowercased().hasPrefix("https://") { + // absolute-form — valid for all methods + } else { + return .invalid(.malformedRequestLine) + } + + var headers: [String: String] = [:] + var keyIndex: [String: String] = [:] // lowercased -> original casing + var contentLengthValue: Int64? + var hostHeaderCount = 0 + var headerCount = 0 + for line in lines.dropFirst() where !line.isEmpty { + headerCount += 1 + if headerCount > 100 { + return .invalid(.tooManyHeaders) + } + // RFC 7230 §3.2.4: Reject obs-fold (continuation line starting with SP or HTAB) + if line.first == " " || line.first == "\t" { + return .invalid(.obsFoldDetected) + } + + guard let colonIndex = line.firstIndex(of: ":") else { + // RFC 9112 §5: A line with content but no colon is not a valid field line. + return .invalid(.invalidFieldName) + } + + let rawKey = line[line.startIndex..= 0x0A && v <= 0x1F) || v == 0x7F { + return .invalid(.invalidFieldValue) + } + } + + // RFC 7230 §3.3.3: Reject requests with Transfer-Encoding since this + // server does not support chunked decoding. Silently ignoring it would + // cause body framing mismatches (request smuggling). + if lowerKey == "transfer-encoding" { + return .invalid(.unsupportedTransferEncoding) + } + + // Content-Length: validate and normalize to a single integer value. + if lowerKey == "content-length" { + do { + contentLengthValue = try validateContentLength(value, existing: contentLengthValue) + } catch { + return .invalid(error) + } + // Store the resolved integer as the canonical header value, + // not the raw (possibly comma-separated) form. + let resolved = String(contentLengthValue!) + if let existingKey = keyIndex["content-length"] { + headers[existingKey] = resolved + } else { + headers[key] = resolved + keyIndex["content-length"] = key + } + continue + } + + // Track Host header occurrences for RFC 9110 §7.2 validation. + if lowerKey == "host" { + hostHeaderCount += 1 + } + + // RFC 9110 §5.3: Combine duplicate field lines with comma-separated values. + if let existingKey = keyIndex[lowerKey] { + headers[existingKey] = "\(headers[existingKey]!), \(value)" + } else { + headers[key] = value + keyIndex[lowerKey] = key + } + } + + // RFC 9110 §7.2: Reject requests with multiple Host headers (any version) + // or missing Host header (HTTP/1.1 only). + if hostHeaderCount > 1 { + return .invalid(.multipleHostHeaders) + } + if httpVersion == "HTTP/1.1" && hostHeaderCount == 0 { + return .invalid(.missingHostHeader) + } + + let contentLength = contentLengthValue ?? 0 + + let bodyOffset = data.distance(from: data.startIndex, to: separatorRange.upperBound) + + return .parsed(ParsedHeaders( + method: method, + target: target, + httpVersion: httpVersion, + headers: headers, + contentLength: contentLength, + bodyOffset: bodyOffset + )) + } + + /// Validates a Content-Length header value per RFC 9110 §8.6 / RFC 7230 §3.3.3. + /// + /// A Content-Length value may be a single number or a comma-separated list of + /// identical values (e.g. "5, 5"). Each element must be a non-negative decimal + /// integer (ASCII digits only — no +, ., 0x, etc.). + /// + /// - Parameters: + /// - value: The raw header value string. + /// - existing: A previously parsed Content-Length value, if any. + /// - Returns: The validated content length as an integer. + /// - Throws: ``HTTPRequestParseError/invalidContentLength`` or ``HTTPRequestParseError/conflictingContentLength``. + static func validateContentLength(_ value: String, existing: Int64?) throws(HTTPRequestParseError) -> Int64 { + let parts = value.split(separator: ",", omittingEmptySubsequences: false).map { + $0.trimmingCharacters(in: .whitespaces) + } + guard let first = parts.first, + !first.isEmpty, + first.allSatisfy({ $0.isASCII && $0.isNumber }), + let cl = Int64(first), + cl >= 0 + else { + throw HTTPRequestParseError.invalidContentLength + } + // All parts in a comma-separated list must represent the same integer value + for part in parts.dropFirst() { + guard !part.isEmpty, + part.allSatisfy({ $0.isASCII && $0.isNumber }), + let partValue = Int64(part), + partValue == cl + else { + throw HTTPRequestParseError.conflictingContentLength + } + } + if let existing = existing, existing != cl { + throw HTTPRequestParseError.conflictingContentLength + } + return cl + } + + /// Validates that a string matches the HTTP-version format: `HTTP/DIGIT.DIGIT`. + private static func isValidHTTPVersion(_ version: String) -> Bool { + let prefix = "HTTP/" + guard version.hasPrefix(prefix) else { return false } + let rest = version.dropFirst(prefix.count) + let parts = rest.split(separator: ".", maxSplits: 1) + guard parts.count == 2, + parts[0].count == 1, parts[0].first?.isASCII == true, parts[0].first?.isNumber == true, + parts[1].count == 1, parts[1].first?.isASCII == true, parts[1].first?.isNumber == true + else { return false } + return true + } + + /// Returns whether a character is a valid HTTP token character (RFC 9110 §5.6.2). + /// + /// `tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / + /// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA` + private static func isTokenChar(_ c: Character) -> Bool { + guard let ascii = c.asciiValue else { return false } + switch ascii { + case UInt8(ascii: "A")...UInt8(ascii: "Z"), + UInt8(ascii: "a")...UInt8(ascii: "z"), + UInt8(ascii: "0")...UInt8(ascii: "9"): + return true + case UInt8(ascii: "!"), UInt8(ascii: "#"), UInt8(ascii: "$"), UInt8(ascii: "%"), + UInt8(ascii: "&"), UInt8(ascii: "'"), UInt8(ascii: "*"), UInt8(ascii: "+"), + UInt8(ascii: "-"), UInt8(ascii: "."), UInt8(ascii: "^"), UInt8(ascii: "_"), + UInt8(ascii: "`"), UInt8(ascii: "|"), UInt8(ascii: "~"): + return true + default: + return false + } + } +} diff --git a/ios/Sources/GutenbergKitHTTP/HTTPResponse.swift b/ios/Sources/GutenbergKitHTTP/HTTPResponse.swift new file mode 100644 index 000000000..39e88e146 --- /dev/null +++ b/ios/Sources/GutenbergKitHTTP/HTTPResponse.swift @@ -0,0 +1,234 @@ +import Foundation + +/// Converts a `StaticString` to a `String`. +/// +/// The Swift standard library does not provide a direct `String.init(_ : StaticString)` initializer. +/// This extension bridges that gap so that APIs returning `StaticString` for pointer-stability +/// reasons (e.g., `HTTPResponse.defaultStatusText`, `HTTPRequestParseError.httpStatusText`) can +/// be used naturally where a `String` is expected. +extension String { + init(_ staticString: StaticString) { + self = staticString.withUTF8Buffer { String(decoding: $0, as: UTF8.self) } + } +} + +/// An HTTP response to send back to a client. +public struct HTTPResponse: Sendable { + + /// The HTTP status code (e.g., 200, 404, 500). + public let status: Int + + /// The HTTP reason phrase (e.g., "OK", "Not Found"). + /// + /// When no explicit override was provided at init, this returns the standard + /// phrase for `status` from `defaultStatusText(for:)`. + public var statusText: String { + statusTextOverride ?? String(Self.defaultStatusText(for: status)) + } + + /// Additional response headers. `Content-Length` is always set to the actual body size + /// during serialization (any caller-provided value is replaced). `Connection` is added + /// automatically if not already present. + public let headers: [(String, String)] + + /// The response body. + /// + /// The entire body is held in memory. This is fine for the current use case + /// (Gutenberg REST API payloads — JSON, HTML, CSS, JS) which are small. If + /// large responses (e.g., media downloads) need to be proxied in the future, + /// this could be replaced with a streaming abstraction similar to `RequestBody`. + public let body: Data + + /// Caller-provided reason phrase override, or `nil` to use the default. + private let statusTextOverride: String? + + /// Creates an HTTP response. + /// + /// - Parameters: + /// - status: The HTTP status code. + /// - statusText: The reason phrase. Defaults to a standard phrase for common status codes. + /// - headers: Additional headers to include. Defaults to `Content-Type: text/plain`. + /// - body: The response body. Defaults to empty. + public init( + status: Int, + statusText: String? = nil, + headers: [(String, String)] = [("Content-Type", "text/plain")], + body: Data = Data() + ) { + self.status = status + self.statusTextOverride = statusText + self.headers = headers + self.body = body + } + + #if canImport(Network) + /// RFC 9110 §7.6.1: hop-by-hop headers that must not be forwarded by proxies. + private static let responseHopByHop: Set = [ + "connection", "transfer-encoding", "keep-alive", + "proxy-connection", "te", "upgrade", "trailer", + ] + + public init(_ response: (Data, URLResponse)) { + guard let httpResponse = response.1 as? HTTPURLResponse else { + self.status = 502 + self.statusTextOverride = "Bad Gateway" + self.headers = [("Content-Type", "text/plain")] + self.body = Data("Upstream returned a non-HTTP response".utf8) + return + } + self.status = httpResponse.statusCode + self.statusTextOverride = nil + + // Strip hop-by-hop headers (RFC 9110 §7.6.1) and the upstream Content-Length, + // then set Content-Length from the actual body size. This ensures `headers` is + // always truthful — consumers reading it directly (without going through + // `serialized()`) won't see a stale upstream value. + let upstream = (httpResponse.allHeaderFields as? [String: String] ?? [:]) + self.headers = upstream.compactMap { key, value in + let lower = key.lowercased() + guard !Self.responseHopByHop.contains(lower), + lower != "content-length" else { return nil } + return (key, value) + } + [("Content-Length", "\(response.0.count)")] + self.body = response.0 + } + #endif + + /// Headers excluded during serialization: hop-by-hop headers (RFC 9110 §7.6.1) + /// plus headers that are always recalculated (Content-Length, Date, Server). + private static let serializationExcluded: Set = [ + "content-length", "connection", "transfer-encoding", "keep-alive", + "proxy-connection", "te", "upgrade", "trailer", + "date", "server", + ] + + /// Serializes the response into raw HTTP/1.1 bytes ready to send on the wire. + public func serialized() -> Data { + var allHeaders = headers.filter { !Self.serializationExcluded.contains($0.0.lowercased()) } + allHeaders.append(("Content-Length", "\(body.count)")) + allHeaders.append(("Connection", "close")) + allHeaders.append(("Date", Self.httpDate())) + allHeaders.append(("Server", "GutenbergKit")) + + // Strip CR/LF from header names and values to prevent header injection. + let headerString = allHeaders.map { "\(Self.sanitize($0.0)): \(Self.sanitize($0.1))" }.joined(separator: "\r\n") + // RFC 9112 §4: status-code = 3DIGIT — always zero-pad to 3 digits. + let statusCode = String(format: "%03d", min(max(status, 0), 999)) + let head = "HTTP/1.1 \(statusCode) \(Self.sanitize(statusText))\r\n\(headerString)\r\n\r\n" + + var data = Data(head.utf8) + data.append(body) + return data + } + + /// Removes control characters (including CR, LF, NUL, BEL, etc.) to prevent + /// HTTP response header injection and malformed output per RFC 9112 §4. + /// HTAB (0x09) and non-ASCII characters (obs-text, 0x80+) are preserved, + /// as RFC 9110 §5.5 explicitly allows them in header field values. + private static func sanitize(_ value: String) -> String { + value.filter { char in + guard let ascii = char.asciiValue else { return true } // Keep non-ASCII + if ascii == 0x09 { return true } // Keep HTAB + return ascii >= 0x20 && ascii != 0x7F // Strip CTLs and DEL + } + } + + private static let httpDateLock = NSLock() + private static let httpDateFormatter: DateFormatter = { + let formatter = DateFormatter() + formatter.locale = Locale(identifier: "en_US_POSIX") + formatter.timeZone = TimeZone(identifier: "GMT") + formatter.dateFormat = "EEE, dd MMM yyyy HH:mm:ss 'GMT'" + return formatter + }() + + /// Formats the current time as an HTTP-date per RFC 9110 §5.6.7. + private static func httpDate() -> String { + httpDateLock.lock() + defer { httpDateLock.unlock() } + return httpDateFormatter.string(from: Date()) + } + + /// Returns this response's reason phrase as a `StaticString`. + /// + /// This is the same table as `defaultStatusText(for:)`, exposed for callers + /// (like the JNI bridge) that need a stable pointer without allocation. + var staticStatusText: StaticString { + Self.defaultStatusText(for: status) + } + + /// Standard English reason phrases per RFC 9110 / RFC 9112 §4. + /// + /// This avoids `HTTPURLResponse.localizedString(forStatusCode:)` which may + /// return locale-dependent translations. + static func defaultStatusText(for status: Int) -> StaticString { + switch status { + // 1xx Informational + case 100: "Continue" + case 101: "Switching Protocols" + case 102: "Processing" + case 103: "Early Hints" + // 2xx Success + case 200: "OK" + case 201: "Created" + case 202: "Accepted" + case 203: "Non-Authoritative Information" + case 204: "No Content" + case 205: "Reset Content" + case 206: "Partial Content" + case 207: "Multi-Status" + case 208: "Already Reported" + case 226: "IM Used" + // 3xx Redirection + case 300: "Multiple Choices" + case 301: "Moved Permanently" + case 302: "Found" + case 303: "See Other" + case 304: "Not Modified" + case 307: "Temporary Redirect" + case 308: "Permanent Redirect" + // 4xx Client Error + case 400: "Bad Request" + case 401: "Unauthorized" + case 402: "Payment Required" + case 403: "Forbidden" + case 404: "Not Found" + case 405: "Method Not Allowed" + case 406: "Not Acceptable" + case 407: "Proxy Authentication Required" + case 408: "Request Timeout" + case 409: "Conflict" + case 410: "Gone" + case 411: "Length Required" + case 412: "Precondition Failed" + case 413: "Content Too Large" + case 414: "URI Too Long" + case 415: "Unsupported Media Type" + case 416: "Range Not Satisfiable" + case 417: "Expectation Failed" + case 421: "Misdirected Request" + case 422: "Unprocessable Content" + case 423: "Locked" + case 424: "Failed Dependency" + case 425: "Too Early" + case 426: "Upgrade Required" + case 428: "Precondition Required" + case 429: "Too Many Requests" + case 431: "Request Header Fields Too Large" + case 451: "Unavailable For Legal Reasons" + // 5xx Server Error + case 500: "Internal Server Error" + case 501: "Not Implemented" + case 502: "Bad Gateway" + case 503: "Service Unavailable" + case 504: "Gateway Timeout" + case 505: "HTTP Version Not Supported" + case 506: "Variant Also Negotiates" + case 507: "Insufficient Storage" + case 508: "Loop Detected" + case 510: "Not Extended" + case 511: "Network Authentication Required" + default: "Unknown" + } + } +} diff --git a/ios/Sources/GutenbergKitHTTP/HTTPServer.swift b/ios/Sources/GutenbergKitHTTP/HTTPServer.swift new file mode 100644 index 000000000..6d5526dd5 --- /dev/null +++ b/ios/Sources/GutenbergKitHTTP/HTTPServer.swift @@ -0,0 +1,599 @@ +#if canImport(Network) + +import Foundation +import Network +import OSLog + +/// A lightweight local HTTP/1.1 server built on Network.framework. +/// +/// The server binds to `127.0.0.1` on a specified or system-assigned port and +/// dispatches each incoming request to a caller-provided handler. Requests are +/// parsed incrementally using ``HTTPRequestParser``, so large bodies are buffered +/// to disk rather than held in memory. +/// +/// ```swift +/// let server = try await HTTPServer.start(name: "media-proxy", port: 0) { req in +/// print("\(req.parsed.method) \(req.parsed.target) (\(req.parseDuration))") +/// return HTTPResponse(status: 200, body: Data("OK".utf8)) +/// } +/// print("Listening on port \(server.port)") +/// // ... +/// server.stop() +/// ``` +/// +/// ## Security +/// +/// The server itself is a generic request dispatcher — it does not forward +/// requests or act as a proxy. SSRF protection is intentionally left to the +/// `handler` implementation, since the server cannot know which upstream hosts +/// are legitimate. The server provides two layers of defence by default: +/// +/// 1. Binds to `127.0.0.1` (localhost only) unless `listenOnAllInterfaces` is set. +/// 2. Requires a randomly-generated bearer token in the `Proxy-Authorization` +/// header on every request (when `requiresAuthentication` is enabled). +/// Uses `Proxy-Authorization` per RFC 9110 §11.7.1 so that the client's +/// `Authorization` header can carry upstream credentials (e.g. HTTP Basic) +/// independently of the proxy token. +/// +/// ## Connection Model +/// +/// Each connection handles exactly one request (`Connection: close`). HTTP +/// keep-alive / pipelining is intentionally unsupported. This simplifies body +/// framing — in particular, GET/DELETE requests with unexpected body data are +/// safe because leftover bytes are discarded when the connection closes. If +/// keep-alive were ever added, body framing for all methods would need to be +/// enforced to prevent request smuggling. +/// +/// Lifecycle is managed explicitly: call ``stop()`` when the server is no longer +/// needed, or let `deinit` cancel the listener. +public final class HTTPServer: Sendable { + + /// A received HTTP request with server-side metadata. + public struct Request: Sendable { + /// The parsed HTTP request. + public let parsed: ParsedHTTPRequest + /// Time spent receiving and parsing the request. + public let parseDuration: Duration + } + + public typealias Response = HTTPResponse + + public typealias Error = HTTPServerError + + /// The port the server is listening on. + public let port: UInt16 + + /// A bearer token required in the `Proxy-Authorization` header of every + /// request. Uses `Proxy-Authorization` (RFC 9110 §11.7.1) rather than + /// `Authorization` so that the client's own `Authorization` header + /// (e.g. HTTP Basic credentials for the upstream server) passes through + /// to the handler untouched. Generated randomly on each server start. + public let token: String + + private let listener: NWListener + private let queue: DispatchQueue + private let connectionTasks: ConnectionTasks + + private init(listener: NWListener, port: UInt16, queue: DispatchQueue, token: String, connectionTasks: ConnectionTasks) { + self.listener = listener + self.port = port + self.queue = queue + self.token = token + self.connectionTasks = connectionTasks + } + + /// The default maximum number of concurrent connections. + public static let defaultMaxConnections: Int = 5 + + /// The default read timeout for receiving a complete request (30 seconds). + public static let defaultReadTimeout: Duration = .seconds(30) + + /// The default idle timeout between consecutive reads (5 seconds). + /// If no data arrives within this interval, the connection is closed with a 408 response. + public static let defaultIdleTimeout: Duration = .seconds(5) + + /// The maximum number of bytes to read from the network in a single receive call. + private static let readChunkSize: Int = 65536 + + /// Creates and starts a new HTTP server. + /// + /// - Parameters: + /// - name: A stable identifier for this server instance. Must be consistent across + /// runs of the same logical server. Used to: + /// - Namespace temporary files so that multiple server instances don't interfere + /// with each other's orphan cleanup. + /// - Label the server's dispatch queue (`com.gutenbergkit.http-server.`). + /// + /// Each distinct server should have a unique name. It is the caller's responsibility + /// to choose a descriptive, collision-free identifier (e.g. `"media-proxy"`, + /// `"editor-assets"`). + /// - port: The port to listen on. Pass `nil` or omit to let the system assign an available port. + /// - maxRequestBodySize: The maximum allowed request body size in bytes. + /// Requests exceeding this limit receive a 413 response. Defaults to 4 GB. + /// - maxConnections: The maximum number of concurrent connections. New connections + /// beyond this limit are immediately closed. Defaults to 5. + /// - readTimeout: The maximum time to wait for a complete request before closing + /// the connection. Defaults to 30 seconds. + /// - idleTimeout: The maximum time to wait between consecutive reads before closing + /// the connection. Prevents slow-loris attacks. Defaults to 5 seconds. + /// - handler: A closure invoked for each fully-parsed request. Return an ``HTTPResponse`` + /// to send back to the client. + /// - Returns: A running ``HTTPServer`` instance. + /// - Throws: ``HTTPServerError/failedToStart`` if the listener cannot bind to the port. + public static func start( + name: String, + port: UInt16? = nil, + listenOnAllInterfaces: Bool = false, + requiresAuthentication: Bool = true, + maxRequestBodySize: Int64 = HTTPRequestParser.defaultMaxBodySize, + maxConnections: Int = HTTPServer.defaultMaxConnections, + readTimeout: Duration = HTTPServer.defaultReadTimeout, + idleTimeout: Duration = HTTPServer.defaultIdleTimeout, + handler: @escaping @Sendable (HTTPServer.Request) async -> HTTPResponse + ) async throws -> HTTPServer { + // Sanitize to prevent path traversal — only allow safe filename characters. + let safeName = sanitizeName(name) + + // Temp files are namespaced into a server-specific subdirectory so that + // multiple server instances (with different names) don't interfere with + // each other's orphan cleanup. + let tempDirectory = FileManager.default.temporaryDirectory + .appendingPathComponent("GutenbergKitHTTP-\(safeName)") + + // Clean up temp files left behind by previous runs (e.g., crash or process kill). + // Swift's ARC guarantees deterministic cleanup during normal operation, but a + // crash can leave orphaned files in the system temp directory. + cleanOrphanedTempFiles(in: tempDirectory) + try? FileManager.default.createDirectory(at: tempDirectory, withIntermediateDirectories: true) + + let parameters = NWParameters.tcp + let requestedPort = NWEndpoint.Port(rawValue: port ?? 0) ?? .any + let host: NWEndpoint.Host = listenOnAllInterfaces ? .ipv4(.any) : .ipv4(.loopback) + parameters.requiredLocalEndpoint = NWEndpoint.hostPort(host: host, port: requestedPort) + + let token = generateToken() + let connectionCounter = ConnectionCounter(limit: maxConnections) + let connectionTasks = ConnectionTasks() + let listener = try NWListener(using: parameters) + let queue = DispatchQueue(label: "com.gutenbergkit.http-server.\(safeName)") + + let requiresAuth = requiresAuthentication + listener.newConnectionHandler = { connection in + guard connectionCounter.tryIncrement() else { + Logger.httpServer.warning("Connection limit reached, rejecting connection") + connection.cancel() + return + } + handleConnection( + connection, queue: queue, token: token, + requiresAuthentication: requiresAuth, + maxRequestBodySize: maxRequestBodySize, readTimeout: readTimeout, + idleTimeout: idleTimeout, tempDirectory: tempDirectory, + connectionCounter: connectionCounter, connectionTasks: connectionTasks, handler: handler + ) + } + + // Bridge listener state callbacks to an AsyncStream so we can await readiness. + // The listener is started synchronously — only the wait is async. + let states = AsyncStream { continuation in + listener.stateUpdateHandler = { state in + continuation.yield(state) + } + } + listener.start(queue: queue) + + for await state in states { + switch state { + case .ready: + listener.stateUpdateHandler = nil + guard let p = listener.port else { + throw HTTPServerError.failedToStart + } + let server = HTTPServer(listener: listener, port: p.rawValue, queue: queue, token: token, connectionTasks: connectionTasks) + Logger.httpServer.info("HTTP server started on port \(p.rawValue)") + return server + case .failed(let error): + Logger.httpServer.error("Listener failed: \(error)") + throw HTTPServerError.failedToStart + case .cancelled: + throw HTTPServerError.failedToStart + default: + continue + } + } + + throw HTTPServerError.failedToStart + } + + /// Stops the server and releases resources. + /// + /// Cancels the listener and all in-flight connection tasks. Handlers that + /// are currently executing will receive a `CancellationError`. + public func stop() { + listener.cancel() + connectionTasks.cancelAll() + Logger.httpServer.info("HTTP server stopped") + } + + deinit { + listener.cancel() + connectionTasks.cancelAll() + } + + // MARK: - Connection Handling + + private static func handleConnection( + _ connection: NWConnection, + queue: DispatchQueue, + token: String, + requiresAuthentication: Bool, + maxRequestBodySize: Int64, + readTimeout: Duration, + idleTimeout: Duration, + tempDirectory: URL, + connectionCounter: ConnectionCounter, + connectionTasks: ConnectionTasks, + handler: @escaping @Sendable (HTTPServer.Request) async -> HTTPResponse + ) { + connection.start(queue: queue) + + let taskID = UUID() + let task = Task { + defer { + connectionCounter.decrement() + } + + do { + let parser = HTTPRequestParser(maxBodySize: maxRequestBodySize, tempDirectory: tempDirectory) + var request: ParsedHTTPRequest! + let duration = try await ContinuousClock().measure { + request = try await withThrowingTaskGroup(of: ParsedHTTPRequest.self) { group in + group.addTask { + // Phase 1: receive headers only. + try await Self.receiveUntil(\.hasHeaders, parser: parser, on: connection, idleTimeout: idleTimeout) + + // Validate headers (triggers full RFC validation). + guard let partial = try parser.parseRequest() else { + throw HTTPServerError.connectionClosed + } + + // Check auth before consuming body to avoid buffering + // up to maxRequestBodySize for unauthenticated clients. + if requiresAuthentication { + guard authenticate(partial, token: token) else { + throw HTTPServerError.authenticationFailed + } + } + + // Reject body-bearing methods without Content-Length. + // We don't support Transfer-Encoding: chunked, so + // Content-Length is the only way to determine body size. + let upperMethod = partial.method.uppercased() + if ["POST", "PUT", "PATCH"].contains(upperMethod) && partial.header("Content-Length") == nil { + throw HTTPServerError.lengthRequired + } + + // Phase 2: receive body (skipped if already complete). + if !parser.state.isComplete { + try await Self.receiveUntil(\.isComplete, parser: parser, on: connection, idleTimeout: idleTimeout) + } + + guard let complete = try parser.parseRequest(), complete.isComplete else { + throw HTTPServerError.connectionClosed + } + return complete + } + group.addTask { + try await Task.sleep(for: readTimeout) + throw HTTPServerError.readTimeout + } + let result = try await group.next()! + group.cancelAll() + return result + } + } + + let response = await handler(Request(parsed: request, parseDuration: duration)) + await send(response, on: connection) + let (sec, atto) = duration.components + let ms = Double(sec) * 1000.0 + Double(atto) / 1_000_000_000_000_000.0 + Logger.httpServer.debug("\(request.method) \(request.target) → \(response.status) (\(String(format: "%.1f", ms))ms)") + } catch HTTPServerError.authenticationFailed { + await send(HTTPResponse(status: 407, headers: [("Content-Type", "text/plain"), ("Proxy-Authenticate", "Bearer")]), on: connection) + } catch HTTPServerError.lengthRequired { + await send(HTTPResponse(status: 411, statusText: "Length Required", body: Data("Length Required".utf8)), on: connection) + } catch is CancellationError { + Logger.httpServer.debug("Connection cancelled during shutdown") + connection.cancel() + } catch HTTPServerError.readTimeout { + Logger.httpServer.warning("Read timeout, closing connection") + await send(HTTPResponse(status: 408, statusText: "Request Timeout", body: Data("Request Timeout".utf8)), on: connection) + } catch let error as HTTPRequestParseError { + Logger.httpServer.error("Parse error: \(error)") + let statusText = String(error.httpStatusText) + let response = HTTPResponse( + status: error.httpStatus, + statusText: statusText, + body: Data(statusText.utf8) + ) + await send(response, on: connection) + } catch { + Logger.httpServer.error("Unexpected error: \(error)") + await send(HTTPResponse(status: 400, statusText: "Bad Request", body: Data("Malformed HTTP request".utf8)), on: connection) + } + } + connectionTasks.track(taskID, task) + } + + /// Feeds data from the connection into the parser until the given state + /// predicate is satisfied or the connection closes. + /// + /// Each individual read is guarded by `idleTimeout` to prevent slow-loris + /// attacks where an attacker drip-feeds one byte at a time to hold a + /// connection slot open. + private static func receiveUntil( + _ condition: KeyPath, + parser: HTTPRequestParser, + on connection: NWConnection, + idleTimeout: Duration + ) async throws { + while !parser.state[keyPath: condition] { + let data = try await receiveWithIdleTimeout(on: connection, timeout: idleTimeout) + + guard let data else { + throw HTTPServerError.connectionClosed + } + + parser.append(data) + } + } + + /// Reads a chunk of data from the connection, enforcing an idle timeout. + /// + /// - Returns: The received data, or `nil` if the connection completed with no more data. + /// - Throws: ``HTTPServerError/readTimeout`` if no data arrives within the timeout. + private static func receiveWithIdleTimeout(on connection: NWConnection, timeout: Duration) async throws -> Data? { + try await withThrowingTaskGroup(of: Data?.self) { group in + group.addTask { + try await receive(on: connection) + } + group.addTask { + try await Task.sleep(for: timeout) + throw HTTPServerError.readTimeout + } + let result = try await group.next()! + group.cancelAll() + return result + } + } + + /// Reads a chunk of data from the connection. + /// + /// - Returns: The received data, or `nil` if the connection completed with no more data. + /// + /// On a spurious wake (no content, no error, not complete), re-issues the + /// receive once. Uses a class-based flag to guarantee the continuation is + /// resumed exactly once even if `onCancel` fires concurrently with the + /// receive callback. + private static func receive(on connection: NWConnection) async throws -> Data? { + try await withTaskCancellationHandler { + try await withCheckedThrowingContinuation { (continuation: CheckedContinuation) in + receiveOnce(on: connection, retryOnSpuriousWake: true, continuation: continuation) + } + } onCancel: { + connection.cancel() + } + } + + /// Issues a single `connection.receive` and resumes the continuation. + /// + /// If the callback delivers a spurious wake (no data, no error, not + /// complete) and `retryOnSpuriousWake` is true, re-issues the receive once. + /// On the second spurious wake, treats it as connection closed. + /// + /// The `OnceGuard` ensures the continuation is resumed at most once. If + /// `onCancel` fires and cancels the connection, NWConnection delivers an + /// error callback. Without the guard, that error callback could race with a + /// legitimate resume from the data path. The guard makes the first resume + /// win and silently drops any subsequent attempts. + private static func receiveOnce( + on connection: NWConnection, + retryOnSpuriousWake: Bool, + continuation: CheckedContinuation + ) { + let guard_ = OnceGuard() + connection.receive(minimumIncompleteLength: 1, maximumLength: readChunkSize) { content, _, isComplete, error in + if let error { + if guard_.claim() { continuation.resume(throwing: HTTPServerError.networkError(error)) } + } else if let content, !content.isEmpty { + if guard_.claim() { continuation.resume(returning: content) } + } else if isComplete { + if guard_.claim() { continuation.resume(returning: nil) } + } else if retryOnSpuriousWake { + // Spurious wake — re-issue once. The recursive call creates a + // fresh OnceGuard, which is correct: the old callback from this + // receive won't fire again, so the old guard is inert. + receiveOnce(on: connection, retryOnSpuriousWake: false, continuation: continuation) + } else { + if guard_.claim() { continuation.resume(returning: nil) } + } + } + } + + /// Thread-safe flag ensuring a continuation is resumed exactly once. + private final class OnceGuard: @unchecked Sendable { + private let _claimed = NSLock() + private var _value = false + func claim() -> Bool { + _claimed.lock() + defer { _claimed.unlock() } + if _value { return false } + _value = true + return true + } + } + + /// Sends a response on the connection and then closes it. + private static func send(_ response: HTTPResponse, on connection: NWConnection) async { + await withCheckedContinuation { (continuation: CheckedContinuation) in + connection.send(content: response.serialized(), completion: .contentProcessed { _ in + connection.cancel() + continuation.resume() + }) + } + } + + // MARK: - Authentication + + /// Validates the proxy bearer token from the `Proxy-Authorization` header + /// (RFC 9110 §11.7.1). Using `Proxy-Authorization` keeps the client's + /// `Authorization` header available for upstream credentials. + private static func authenticate(_ request: ParsedHTTPRequest, token: String) -> Bool { + guard let proxyAuth = request.header("Proxy-Authorization") else { + return false + } + + let prefix = "Bearer " + guard proxyAuth.prefix(prefix.count).caseInsensitiveCompare(prefix) == .orderedSame else { + return false + } + + let provided = String(proxyAuth.dropFirst(prefix.count)) + return constantTimeEqual(provided, token) + } + + /// Compares two strings in constant time to prevent timing attacks. + /// + /// Always iterates over the expected token (b) regardless of the input + /// length, so timing reveals neither whether lengths match nor how many + /// bytes are correct. When lengths differ, b is compared against itself + /// to keep the work constant. + /// + /// **Do not "simplify" this to an early-return on length mismatch.** + /// An early return would let an attacker measure response time to discover + /// the expected token length, even though the token length is currently + /// fixed at 64 hex characters. This implementation is intentionally + /// branch-free in the hot path to avoid leaking any information. + private static func constantTimeEqual(_ a: String, _ b: String) -> Bool { + let aBytes = Array(a.utf8) + let bBytes = Array(b.utf8) + var result: UInt8 = aBytes.count == bBytes.count ? 0 : 1 + let comparand = aBytes.count == bBytes.count ? aBytes : bBytes + for i in bBytes.indices { + result |= comparand[i] ^ bBytes[i] + } + return result == 0 + } + + /// Strips characters from `name` that are not letters, digits, `.`, `-`, or `_`. + /// + /// The server `name` is embedded in filesystem paths (temp directory) and + /// dispatch queue labels. Allowing arbitrary characters (e.g. `../`) would + /// enable path traversal. This filter reduces the name to a safe subset. + private static func sanitizeName(_ name: String) -> String { + let sanitized = String(name.unicodeScalars.filter { + CharacterSet.alphanumerics.contains($0) || $0 == "." || $0 == "-" || $0 == "_" + }) + precondition(!sanitized.isEmpty, "Server name must contain at least one alphanumeric character, dot, hyphen, or underscore") + return sanitized + } + + private static func generateToken() -> String { + var bytes = [UInt8](repeating: 0, count: 32) + let status = SecRandomCopyBytes(kSecRandomDefault, bytes.count, &bytes) + precondition(status == noErr, "Failed to generate random token") + return bytes.map { String(format: "%02x", $0) }.joined() + } + + /// Removes orphaned temp files left by a previous crash. + /// + /// The parser creates temp files in a server-specific subdirectory under the + /// system temp directory (e.g., `GutenbergKitHTTP-media-proxy/`). Under normal + /// operation, `TempFileOwner.deinit` deletes them via ARC. After a crash, these + /// files survive — this method cleans them up on the next server start. + /// + /// Because each server `name` maps to its own subdirectory, cleanup is scoped + /// to a single server instance and will not affect files belonging to other + /// servers running concurrently. + /// + /// **Important:** Two server instances with the same `name` must not run + /// concurrently. On startup, this method deletes **all** files in the + /// server's temp subdirectory. If another instance with the same name is + /// still handling requests, its in-flight temp files will be removed, + /// causing `bufferIOError` failures. Callers must ensure each running + /// server uses a unique name, or that the previous instance is fully + /// stopped before starting a new one. + private static func cleanOrphanedTempFiles(in directory: URL) { + guard let contents = try? FileManager.default.contentsOfDirectory( + at: directory, includingPropertiesForKeys: nil + ) else { return } + for url in contents { + try? FileManager.default.removeItem(at: url) + } + } +} + +/// Thread-safe tracker for in-flight connection tasks, enabling graceful shutdown. +/// +/// Completed tasks are intentionally not removed. Entries are tiny (UUID + Task +/// reference) and accumulate only for the server's lifetime. Removing on completion +/// would require a `defer` inside each Task, but if the task finishes before +/// `track()` is called the `remove()` is a no-op — leaving a stale entry anyway. +/// Skipping removal avoids that race entirely. `cancelAll()` clears everything +/// on `stop()`. +/// +/// All mutable state is guarded by `lock`. +final class ConnectionTasks: @unchecked Sendable { + private let lock = NSLock() + private var tasks: [UUID: Task] = [:] + + func track(_ id: UUID, _ task: Task) { + lock.withLock { + tasks[id] = task + } + } + + func cancelAll() { + lock.withLock { + for task in tasks.values { + task.cancel() + } + tasks.removeAll() + } + } +} + +/// Thread-safe counter for tracking active connections. +final class ConnectionCounter: @unchecked Sendable { + private let lock = NSLock() + private let limit: Int + private var _count: Int = 0 + + init(limit: Int) { + self.limit = limit + } + + /// Attempts to increment the counter. Returns `true` if the connection is allowed. + func tryIncrement() -> Bool { + lock.withLock { + guard _count < limit else { return false } + _count += 1 + return true + } + } + + /// Decrements the counter when a connection completes. + func decrement() { + lock.withLock { + _count -= 1 + } + } +} + +// MARK: - Logger + +extension Logger { + static let httpServer = Logger(subsystem: "com.gutenbergkit.http", category: "server") +} + +#endif // canImport(Network) diff --git a/ios/Sources/GutenbergKitHTTP/HTTPServerError.swift b/ios/Sources/GutenbergKitHTTP/HTTPServerError.swift new file mode 100644 index 000000000..6c054329d --- /dev/null +++ b/ios/Sources/GutenbergKitHTTP/HTTPServerError.swift @@ -0,0 +1,33 @@ +#if canImport(Network) + +import Foundation +import Network + +/// Errors thrown by ``HTTPServer``. +public enum HTTPServerError: Error, LocalizedError, Sendable { + /// The server failed to bind to the requested port. + case failedToStart + /// The connection closed before a complete request was received. + case connectionClosed + /// The read timeout expired before a complete request was received. + case readTimeout + /// The request failed authentication (checked after headers, before body). + case authenticationFailed + /// The request method requires a Content-Length header but none was provided. + case lengthRequired + /// A network-level error occurred on the connection. + case networkError(NWError) + + public var errorDescription: String? { + switch self { + case .failedToStart: "Failed to start HTTP server" + case .connectionClosed: "Connection closed before request was complete" + case .readTimeout: "Read timeout expired before request was complete" + case .authenticationFailed: "Request failed authentication" + case .lengthRequired: "Content-Length header is required for this method" + case .networkError(let error): "Network error: \(error.localizedDescription)" + } + } +} + +#endif // canImport(Network) diff --git a/ios/Sources/GutenbergKitHTTP/HeaderValue.swift b/ios/Sources/GutenbergKitHTTP/HeaderValue.swift new file mode 100644 index 000000000..f3f5b7d92 --- /dev/null +++ b/ios/Sources/GutenbergKitHTTP/HeaderValue.swift @@ -0,0 +1,121 @@ +import Foundation + +/// Utilities for parsing structured HTTP header values (RFC 9110 §5.6). +/// +/// HTTP headers like `Content-Type` and `Content-Disposition` carry parameters +/// in `key=value` or `key="value"` form. This enum provides a shared +/// implementation for extracting those parameters while correctly handling +/// quoted strings and backslash escapes per RFC 2045 §5.1. +enum HeaderValue { + + /// Extracts a parameter value from a header value string. + /// + /// Searches for `name=` while skipping occurrences that fall inside + /// quoted strings, then extracts the value — handling both quoted + /// (with backslash escapes per RFC 2045 §5.1) and unquoted forms. + /// + /// ```swift + /// // Content-Type: multipart/form-data; boundary=----WebKitFormBoundary + /// HeaderValue.extractParameter("boundary", from: contentType) + /// + /// // Content-Disposition: form-data; name="file"; filename="photo.jpg" + /// HeaderValue.extractParameter("filename", from: disposition) + /// ``` + /// + /// - Parameters: + /// - name: The parameter name to search for (case-insensitive). + /// - headerValue: The full header value string to search. + /// - Returns: The extracted parameter value, or `nil` if not found. + static func extractParameter(_ name: String, from headerValue: String) -> String? { + let search = "\(name)=" + var searchStart = headerValue.startIndex + + while searchStart < headerValue.endIndex { + guard let paramRange = headerValue.range( + of: search, + options: .caseInsensitive, + range: searchStart.. String { + let valueStart = text.index(after: text.startIndex) + var index = valueStart + var result = "" + + while index < text.endIndex { + let char = text[index] + if char == "\\" { + let next = text.index(after: index) + if next < text.endIndex { + result.append(text[next]) + index = text.index(after: next) + } else { + break + } + } else if char == "\"" { + break + } else { + result.append(char) + index = text.index(after: index) + } + } + return result + } + + /// Returns whether the given position in the string falls inside a quoted string. + /// + /// Scans from the start, tracking quote open/close state while respecting + /// backslash escapes. + private static func isInsideQuotedString(_ string: String, position: String.Index) -> Bool { + var inQuote = false + var index = string.startIndex + while index < position { + let char = string[index] + if inQuote && char == "\\" { + // Skip escaped character + index = string.index(after: index) + if index < position { + index = string.index(after: index) + } + continue + } + if char == "\"" { + inQuote = !inQuote + } + index = string.index(after: index) + } + return inQuote + } +} diff --git a/ios/Sources/GutenbergKitHTTP/MultipartPart.swift b/ios/Sources/GutenbergKitHTTP/MultipartPart.swift new file mode 100644 index 000000000..6a2284657 --- /dev/null +++ b/ios/Sources/GutenbergKitHTTP/MultipartPart.swift @@ -0,0 +1,399 @@ +import Foundation + +/// A single part from a `multipart/form-data` body, per RFC 7578. +/// +/// Each part represents one form field or file upload, with its own +/// Content-Disposition parameters and optional Content-Type. +/// +/// Part bodies are represented as lightweight references (byte ranges) +/// back to the original request body. No part data is copied during parsing; +/// bytes are only read when ``body`` is accessed via ``RequestBody/makeInputStream()``. +/// +/// ```swift +/// let request = try parser.parseRequest() +/// for part in try request?.multipartParts() ?? [] { +/// print(part.name, part.filename, part.contentType) +/// } +/// ``` +public struct MultipartPart: Sendable, Equatable { + /// The field name from the `Content-Disposition: form-data; name="..."` parameter. + public let name: String + /// The filename, if present, from the `Content-Disposition: form-data; filename="..."` parameter. + public let filename: String? + /// The `Content-Type` of this part, or `"text/plain"` if not specified (RFC 7578 §4.4). + public let contentType: String + /// The part's body content, backed by a reference to the original request body. + public let body: RequestBody +} + +/// Errors thrown when parsing a multipart/form-data body fails. +public enum MultipartParseError: Error, Sendable, Equatable, LocalizedError { + /// The Content-Type is not `multipart/form-data` or is missing the `boundary` parameter. + case notMultipartFormData + /// The body is missing or the request is incomplete. + case missingBody + /// A part is missing the required `Content-Disposition: form-data` header. + case missingContentDisposition + /// A part's `Content-Disposition` header is missing the required `name` parameter. + case missingNameParameter + /// The multipart body structure is malformed (e.g., missing closing boundary). + case malformedBody + /// The multipart body contains more than 100 parts. + case tooManyParts + + public var errorDescription: String? { + switch self { + case .notMultipartFormData: + return "The Content-Type is not multipart/form-data or is missing the boundary parameter." + case .missingBody: + return "The request body is missing or the request is incomplete." + case .missingContentDisposition: + return "A multipart part is missing the required Content-Disposition header." + case .missingNameParameter: + return "A multipart part's Content-Disposition header is missing the required name parameter." + case .malformedBody: + return "The multipart body is malformed." + case .tooManyParts: + return "The multipart body contains more than 100 parts." + } + } +} + +// MARK: - Parsing + +extension MultipartPart { + + private static let scanChunkSize = 65_536 + + /// Parses an in-memory `multipart/form-data` body into its constituent parts. + /// + /// Scans the body data to locate part boundaries and extract headers, but does + /// not copy part body bytes. Each part's ``body`` is a lightweight reference + /// (offset + length) back to the source `RequestBody`. + /// + /// - Parameters: + /// - source: The original request body to reference for part content. + /// - bodyData: The raw body bytes (read once for scanning, then released by the caller). + /// - bodyFileOffset: The byte offset of `bodyData` within `source`'s backing file + /// (0 for data-backed bodies). + /// - boundary: The boundary string from the Content-Type header. + /// - Returns: An array of parsed parts with lazy body references. + /// - Throws: ``MultipartParseError`` if the body is malformed. + static func parse( + source: RequestBody, + bodyData: Data, + bodyFileOffset: UInt64, + boundary: String + ) throws -> [MultipartPart] { + let delimiter = Data("--\(boundary)".utf8) + let closeDelimiter = Data("--\(boundary)--".utf8) + let crlf = Data("\r\n".utf8) + let crlfcrlf = Data("\r\n\r\n".utf8) + + guard let firstRange = bodyData.range(of: delimiter) else { + throw MultipartParseError.malformedBody + } + + var parts: [MultipartPart] = [] + var searchStart = firstRange.upperBound + + while searchStart < bodyData.endIndex { + // RFC 2046 §5.1.1: skip optional transport padding (LWSP) after the boundary. + // delimiter = CRLF "--" boundary *(SP / HTAB) CRLF + while searchStart < bodyData.endIndex && + (bodyData[searchStart] == UInt8(ascii: " ") || bodyData[searchStart] == UInt8(ascii: "\t")) { + searchStart = bodyData.index(after: searchStart) + } + + // Skip the CRLF after the delimiter line + if bodyData[searchStart...].starts(with: crlf) { + searchStart = bodyData.index(searchStart, offsetBy: crlf.count) + } + + let remaining = bodyData[searchStart...] + if remaining.isEmpty { + break + } + + // Find the header/body separator within this part + guard let headerEnd = bodyData[searchStart...].range(of: crlfcrlf) else { + throw MultipartParseError.malformedBody + } + + let headerData = bodyData[searchStart..= minBodyEnd { + let beforeDelimiter = bodyData[bodyData.index(partBodyEnd, offsetBy: -crlf.count).. 100 { + throw MultipartParseError.tooManyParts + } + + // Check if the next delimiter is the closing one + if bodyData[nextDelimiter.lowerBound...].starts(with: closeDelimiter) { + break + } + + searchStart = nextDelimiter.upperBound + } + + return parts + } + + /// Parses a file-backed `multipart/form-data` body using chunked scanning. + /// + /// Reads the file in fixed-size chunks to find boundary offsets, keeping memory + /// usage at O(chunk_size) regardless of body size. Part bodies are file-slice + /// references, not copies. + /// + /// - Parameters: + /// - source: The file-backed request body. + /// - boundary: The boundary string from the Content-Type header. + /// - Returns: An array of parsed parts with lazy body references. + /// - Throws: ``MultipartParseError`` if the body is malformed. + static func parseChunked( + source: RequestBody, + boundary: String + ) throws -> [MultipartPart] { + guard let fileURL = source.fileURL else { + throw MultipartParseError.malformedBody + } + + let delimiter = Data("--\(boundary)".utf8) + let crlfcrlf = Data("\r\n\r\n".utf8) + + let bodyStart = source.fileOffset + let bodyLength = UInt64(source.count) + let bodyEnd = bodyStart + bodyLength + + return try FileHandle.withReadHandle(forUrl: fileURL) { fileHandle in + // Phase 1: Scan for all boundary delimiter offsets using chunked reads. + // An overlap region (delimiter.count - 1 bytes) is carried between chunks + // so boundaries split across chunk boundaries are still found. + let overlapSize = delimiter.count - 1 + var delimiterOffsets: [UInt64] = [] + var position = bodyStart + var carryOver = Data() + + while position < bodyEnd { + let readSize = min(UInt64(scanChunkSize), bodyEnd - position) + try fileHandle.seek(toOffset: position) + guard let chunk = try fileHandle.read(upToCount: Int(readSize)), + !chunk.isEmpty else { + break + } + + let searchBuffer = carryOver.isEmpty ? chunk : carryOver + chunk + + var searchOffset = searchBuffer.startIndex + while let range = searchBuffer.range(of: delimiter, in: searchOffset..= bodyStart && absoluteOffset + UInt64(delimiter.count) <= bodyEnd { + delimiterOffsets.append(absoluteOffset) + } + searchOffset = searchBuffer.index(after: range.lowerBound) + } + + if chunk.count > overlapSize { + carryOver = chunk.suffix(overlapSize) + } else { + carryOver = chunk + } + position += UInt64(chunk.count) + } + + guard !delimiterOffsets.isEmpty else { + throw MultipartParseError.malformedBody + } + + // Phase 2: Extract parts from consecutive delimiter pairs. + var parts: [MultipartPart] = [] + let maxPartHeaderSize: UInt64 = 8192 + + for i in 0..= partBodyStart + 2 { + try fileHandle.seek(toOffset: nextDelimStart - 2) + if let peek = try fileHandle.read(upToCount: 2), + peek.count == 2, + peek[peek.startIndex] == 0x0D && peek[peek.startIndex + 1] == 0x0A { + partBodyEnd = nextDelimStart - 2 + } + } + + let partBodyLength = Int(partBodyEnd - partBodyStart) + let partBody = RequestBody( + fileURL: fileURL, + offset: partBodyStart, + length: max(0, partBodyLength), + owner: source.fileOwner + ) + + let part = try parsePartHeaders(headerData: headerData, body: partBody) + parts.append(part) + + if parts.count > 100 { + throw MultipartParseError.tooManyParts + } + } + + guard !parts.isEmpty else { + throw MultipartParseError.malformedBody + } + + return parts + } + } + + /// Creates a `RequestBody` for a part without copying bytes. + /// + /// For file-backed sources, returns a file-slice reference. For data-backed + /// sources, returns a Data slice (which shares storage via copy-on-write). + private static func makePartBody( + source: RequestBody, + bodyData: Data, + partOffset: Int, + partLength: Int, + bodyFileOffset: UInt64 + ) -> RequestBody { + switch source.storage { + case .file(let url), .fileSlice(let url, _, _): + return RequestBody( + fileURL: url, + offset: bodyFileOffset + UInt64(partOffset), + length: partLength, + owner: source.fileOwner + ) + case .data: + let start = bodyData.startIndex + partOffset + let end = start + partLength + return RequestBody(data: bodyData[start.. MultipartPart { + guard let headerString = String(data: headerData, encoding: .utf8) else { + throw MultipartParseError.missingContentDisposition + } + + let lines = headerString.components(separatedBy: "\r\n") + + var contentDisposition: String? + var contentType: String? + + for line in lines where !line.isEmpty { + guard let colonIndex = line.firstIndex(of: ":") else { continue } + let key = line[line.startIndex.. String? { + let lowered = name.lowercased() + return headers.first(where: { $0.key.lowercased() == lowered })?.value + } + + /// Parses the body as `multipart/form-data` and returns the individual parts. + /// + /// Extracts the boundary from the `Content-Type` header automatically. + /// Part bodies are lazy references back to the original request body — no + /// part data is copied during parsing. Bytes are only read when a part's + /// body is accessed via ``RequestBody/makeInputStream()``. + /// + /// - Returns: The parsed parts. + /// - Throws: ``MultipartParseError`` if the Content-Type is not `multipart/form-data`, + /// the body is missing, or the multipart structure is malformed. + public func multipartParts() throws -> [MultipartPart] { + guard let contentType = header("Content-Type"), + let boundary = Self.extractBoundary(from: contentType) else { + throw MultipartParseError.notMultipartFormData + } + + guard let body else { + throw MultipartParseError.missingBody + } + + if let data = body.inMemoryData { + // In-memory: scan the data directly (already in memory, no extra allocation). + return try MultipartPart.parse( + source: body, + bodyData: data, + bodyFileOffset: 0, + boundary: boundary + ) + } else { + // File-backed: scan in fixed-size chunks to avoid loading the entire + // body into memory. Memory usage is O(chunk_size) regardless of body size. + return try MultipartPart.parseChunked(source: body, boundary: boundary) + } + } + + /// Extracts the boundary parameter from a `multipart/form-data` Content-Type value. + /// + /// Uses ``HeaderValue/extractParameter(_:from:)`` for the actual extraction, + /// then validates the result against RFC 2046 §5.1.1 boundary constraints. + private static func extractBoundary(from contentType: String) -> String? { + guard contentType.lowercased().hasPrefix("multipart/form-data") else { + return nil + } + + guard let boundary = HeaderValue.extractParameter("boundary", from: contentType) else { + return nil + } + + guard !boundary.isEmpty, boundary.count <= 70 else { return nil } + // RFC 2046 §5.1.1: boundary characters must be from the bchars set. + guard boundary.allSatisfy({ isBoundaryChar($0) }) else { return nil } + // RFC 2046 §5.1.1: space cannot be the last character of a boundary. + guard !boundary.hasSuffix(" ") else { return nil } + return boundary + } + + /// Returns whether a character is valid in a MIME boundary (RFC 2046 §5.1.1 bchars). + /// + /// `bchars = bcharsnospace / " "` + /// `bcharsnospace = DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" / "," / "-" / "." / "/" / ":" / "=" / "?"` + private static func isBoundaryChar(_ c: Character) -> Bool { + guard let ascii = c.asciiValue else { return false } + switch ascii { + case UInt8(ascii: "A")...UInt8(ascii: "Z"), + UInt8(ascii: "a")...UInt8(ascii: "z"), + UInt8(ascii: "0")...UInt8(ascii: "9"): + return true + case UInt8(ascii: "'"), UInt8(ascii: "("), UInt8(ascii: ")"), + UInt8(ascii: "+"), UInt8(ascii: "_"), UInt8(ascii: ","), + UInt8(ascii: "-"), UInt8(ascii: "."), UInt8(ascii: "/"), + UInt8(ascii: ":"), UInt8(ascii: "="), UInt8(ascii: "?"), + UInt8(ascii: " "): + return true + default: + return false + } + } + + #if canImport(Network) + /// Converts this parsed request into a `URLRequest` using the given base URL. + /// + /// The `target` (path and query) is resolved against `baseURL` to produce the + /// final request URL. If a body is present, it is attached as an `httpBodyStream`. + /// + /// - Parameter baseURL: The base URL to resolve the request target against. + /// - Returns: A configured `URLRequest`, or `nil` if the URL cannot be constructed. + public func urlRequest(relativeTo baseURL: URL) -> URLRequest? { + guard let url = URL(string: target, relativeTo: baseURL) else { + return nil + } + + var request = URLRequest(url: url) + request.httpMethod = method + + // RFC 9110 §7.6.1: hop-by-hop headers must not be forwarded by proxies. + // "proxy-authorization" carries the proxy's own bearer token + // (RFC 9110 §11.7.1) and must not be forwarded to the upstream server. + // "authorization" is intentionally kept so that the client's own + // credentials (e.g. HTTP Basic for the upstream server) pass through. + var hopByHop: Set = [ + "host", "connection", "transfer-encoding", "keep-alive", + "proxy-connection", "te", "upgrade", "trailer", + "proxy-authorization", + ] + + // Headers listed in Connection are also hop-by-hop (RFC 9110 §7.6.1). + if let connectionValue = header("Connection") { + for name in connectionValue.split(separator: ",") { + hopByHop.insert(name.trimmingCharacters(in: .whitespaces).lowercased()) + } + } + + for (key, value) in headers { + guard !hopByHop.contains(key.lowercased()) else { continue } + request.setValue(value, forHTTPHeaderField: key) + } + + if let body { + request.httpBodyStream = try? body.makeInputStream() + } + + return request + } + #endif +} diff --git a/ios/Sources/GutenbergKitHTTP/README.md b/ios/Sources/GutenbergKitHTTP/README.md new file mode 100644 index 000000000..43721b305 --- /dev/null +++ b/ios/Sources/GutenbergKitHTTP/README.md @@ -0,0 +1,184 @@ +# GutenbergKitHTTP + +A zero-dependency Swift module providing an HTTP/1.1 request parser and a lightweight local server built on Network.framework. Designed for use as the front-end of an in-process HTTP proxy server, where raw bytes arrive over a socket and need to be converted into structured request objects suitable for forwarding via `URLSession`. + +## Why + +GutenbergKit's iOS integration uses an in-process HTTP server to bridge requests between the embedded web editor and native networking. This module handles the parsing side of that bridge — turning raw TCP bytes into `URLRequest` objects — without pulling in a full HTTP server framework. + +Key design goals: + +- **Incremental parsing** — data can arrive in arbitrary chunks (byte-by-byte if needed); the parser buffers to disk so memory usage stays flat regardless of body size. +- **Lazy validation** — `append()` does only lightweight scanning (finding `\r\n\r\n` and extracting `Content-Length`). Full RFC validation is deferred to `parseRequest()`, keeping the hot path fast. +- **Strict conformance** — rejects request smuggling vectors (obs-fold, whitespace before colon), validates `Content-Length` per RFC 9110 §8.6, and combines duplicate headers per RFC 9110 §5.3. +- **No dependencies** — uses only Foundation. + +## Types + +| Type | Role | +|------|------| +| `HTTPServer` | Local HTTP/1.1 server on Network.framework. Binds to `127.0.0.1`, dispatches requests to an async handler. | +| `HTTPResponse` | Response struct with status, headers, and body. Can be initialized from a `(Data, URLResponse)` tuple for proxying. | +| `HTTPRequestParser` | Incremental, stateful parser. Feed it bytes with `append(_:)`, check `state`, then call `parseRequest()`. | +| `HTTPRequestSerializer` | Stateless header parser. Call `parseHeaders(from:)` with a complete `Data` buffer. | +| `ParsedHTTPRequest` | The result — either `.partial` (headers only) or `.complete` (headers + body). | +| `MultipartPart` | A parsed multipart/form-data part with `name`, `filename`, `contentType`, and `body`. | +| `RequestBody` | Abstracts body storage (in-memory or file-backed). Provides `count` (O(1) byte count), `data` (async accessor), and `makeInputStream()`. | +| `HTTPRequestParseError` | Error enum covering all rejection reasons, with human-readable `localizedDescription` messages. | + +## Usage + +### Running a local server + +`HTTPServer` listens on the loopback interface (`127.0.0.1`) using Network.framework. Each incoming request is parsed automatically and delivered to your handler as a `ServerRequest`, which bundles the parsed HTTP request with timing diagnostics (`.parseDuration`). Your handler returns an `HTTPResponse` — the server serializes it back over the socket. + +```swift +import GutenbergKitHTTP + +let server = try await HTTPServer.start(name: "my-server", port: 8080) { req in + print("\(req.parsed.method) \(req.parsed.target) (\(req.parseDuration))") + return HTTPResponse(status: 200, body: Data("OK".utf8)) +} +print("Listening on port \(server.port)") +// ... later ... +server.stop() +``` + +Pass `nil` (or omit `port`) to let the system assign an available port — useful for tests or when running multiple servers. + +When `requiresAuthentication` is enabled (the default), each request must include a `Proxy-Authorization: Bearer ` header carrying the server's randomly-generated token. The server uses `Proxy-Authorization` per RFC 9110 §11.7.1 rather than `Authorization`, so the client's `Authorization` header remains available for upstream credentials (e.g. HTTP Basic auth to the remote server). Unauthenticated requests receive a `407 Proxy Authentication Required` response with a `Proxy-Authenticate: Bearer` challenge header. + +### Proxying via URLSession + +The most common use case: forward web editor requests to a remote WordPress site. `HTTPResponse` has a convenience initializer that accepts a `(Data, URLResponse)` tuple, so you can pipe `URLSession` results directly back. + +```swift +let server = try await HTTPServer.start { req in + let url = URL(string: "https://example.com\(req.parsed.target)")! + var upstream = URLRequest(url: url) + upstream.httpMethod = req.parsed.method + return try await HTTPResponse(URLSession.shared.data(for: upstream)) +} +``` + +### One-shot parsing + +Use this when the full HTTP request is already in memory (e.g., from a test fixture or a buffered read). Pass the raw string or `Data` to the parser's convenience initializer, then call `parseRequest()` to get a `ParsedHTTPRequest`. + +```swift +import GutenbergKitHTTP + +let raw = "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Length: 13\r\n\r\n{\"title\":\"Hi\"}" +let parser = HTTPRequestParser(raw) + +let request = try parser.parseRequest()! +print(request.method) // "POST" +print(request.target) // "/wp/v2/posts" +print(request.header("Host")) // Optional("localhost") +``` + +The `header(_:)` method performs case-insensitive lookup per RFC 9110. + +### Incremental parsing + +Use this when data arrives in chunks from a socket. Call `append(_:)` as bytes arrive — the parser buffers body data to a temporary file so memory stays flat even for large uploads. Check `state` to decide when to parse. + +```swift +let parser = HTTPRequestParser() + +// Feed data as it arrives +parser.append(firstChunk) +parser.append(secondChunk) + +switch parser.state { +case .needsMoreData: + // Keep reading from the socket + break +case .headersComplete: + // Headers are available but body is still arriving + let partial = try parser.parseRequest()! + print(partial.method, partial.target) +case .complete: + // Everything received — parse and forward + let request = try parser.parseRequest()! + // ... +} +``` + +You can call `parseRequest()` in either `.headersComplete` or `.complete` state. In `.headersComplete`, the returned `ParsedHTTPRequest` is `.partial` (headers only, no body). In `.complete`, it is `.complete` with the full body available via `request.body`. + +### Converting to URLRequest for forwarding + +`ParsedHTTPRequest` can generate a Foundation `URLRequest` for forwarding to a remote server. Pass a base URL — the request's target path is resolved relative to it. The method, headers, and body are carried over automatically. + +```swift +let baseURL = URL(string: "https://example.com")! +if let urlRequest = request.urlRequest(relativeTo: baseURL) { + let (data, response) = try await URLSession.shared.data(for: urlRequest) +} +``` + +### Multipart parsing + +For `multipart/form-data` requests (e.g., media uploads), call `multipartParts()` on a parsed request. The boundary is extracted automatically from the `Content-Type` header. Each `MultipartPart` gives you the field `name`, optional `filename` and `contentType`, and a `body` backed by the same `RequestBody` abstraction (in-memory or file-backed). + +```swift +let request = try parser.parseRequest()! +let parts = try request.multipartParts() + +for part in parts { + print("\(part.name): \(try readAll(part.body))") + if let filename = part.filename { + print(" filename: \(filename), contentType: \(part.contentType ?? "unknown")") + } +} +``` + +### Error handling + +`parseRequest()` throws `HTTPRequestParseError` for malformed input. Each case maps to a specific RFC violation or safety check, and carries a human-readable `localizedDescription`. + +```swift +do { + let request = try parser.parseRequest() +} catch let error as HTTPRequestParseError { + switch error { + case .emptyHeaderSection: // No request line before \r\n\r\n + case .malformedRequestLine: // Missing method or target + case .obsFoldDetected: // Continuation line (rejected per RFC 7230 §3.2.4) + case .whitespaceBeforeColon: // Space or tab between field-name and colon (RFC 7230 §3.2.4) + case .invalidContentLength: // Non-numeric or negative Content-Length + case .conflictingContentLength: // Multiple Content-Length headers disagree + case .unsupportedTransferEncoding: // Transfer-Encoding not supported + case .invalidHTTPVersion: // Unrecognized HTTP version + case .invalidFieldName: // Invalid characters in header field name + case .invalidFieldValue: // Invalid characters in header field value + case .missingHostHeader: // HTTP/1.1 requires Host + case .multipleHostHeaders: // Duplicate Host headers + case .payloadTooLarge: // Body exceeds maxBodySize (HTTP 413) + case .headersTooLarge: // Headers exceed limit (HTTP 431) + case .invalidEncoding: // Headers aren't valid UTF-8 + } + + // All cases provide a human-readable description: + print(error.localizedDescription) +} +``` + +You can also limit the maximum body size by passing `maxBodySize` to the parser initializer — requests exceeding the limit throw `.payloadTooLarge`. + +## RFC Conformance + +The parser enforces or documents behavior for the following: + +- **RFC 7230 §3.2.4** — Rejects obs-fold (continuation lines) and whitespace before colon in field names. +- **RFC 7230 §3.3.3** — Rejects conflicting `Content-Length` values across multiple headers. +- **RFC 9110 §5.3** — Combines duplicate header field lines with comma-separated values. +- **RFC 9110 §8.6** — Validates `Content-Length` values including comma-separated lists of identical values (e.g., `5, 5`). +- **RFC 9112 §3** — Parses the request line into method, target, and optional HTTP version. + +Conformance is verified by 115+ tests across `HTTPRequestParserTests`, `RFC7230ConformanceTests`, and `RFC9112ConformanceTests`, plus shared cross-platform JSON test fixtures (also used by the Kotlin test suite). + +## Debug Server + +The companion `GutenbergKitDebugServer` executable provides a ready-made server for manual testing. See its [README](../GutenbergKitDebugServer/README.md) for details. diff --git a/ios/Sources/GutenbergKitHTTP/RequestBody.swift b/ios/Sources/GutenbergKitHTTP/RequestBody.swift new file mode 100644 index 000000000..80d7a2d72 --- /dev/null +++ b/ios/Sources/GutenbergKitHTTP/RequestBody.swift @@ -0,0 +1,271 @@ +import Foundation + +/// Reference-counted owner for a temporary file. +/// +/// The file is deleted when the last reference is released. This allows +/// ``RequestBody`` (a value type) to share ownership of a temp file across +/// copies — including multipart part bodies that reference byte ranges within +/// the same file. +final class TempFileOwner: Sendable { + let url: URL + init(url: URL) { self.url = url } + deinit { try? FileManager.default.removeItem(at: url) } +} + +/// An HTTP request body with stream semantics. +/// +/// `RequestBody` abstracts over the underlying storage (in-memory data or a file on disk) +/// and provides uniform access regardless of backing: +/// - **Stream access**: Use ``makeInputStream()`` to read without loading everything into memory. +/// - **Materialized access**: Use ``data`` to get the full contents. For file-backed bodies, +/// this reads the entire file into memory. +public struct RequestBody: Sendable, Equatable { + + enum Storage: Sendable, Equatable { + case data(Data) + case file(URL) + case fileSlice(url: URL, offset: UInt64, length: Int) + } + + let storage: Storage + + /// Retains the backing file for parser-created bodies. + /// Ignored in equality comparisons. + private let _owner: TempFileOwner? + + public static func == (lhs: RequestBody, rhs: RequestBody) -> Bool { + lhs.storage == rhs.storage + } + + /// Creates a body backed by in-memory data. + public init(data: Data) { + self.storage = .data(data) + self._owner = nil + } + + /// Creates a body backed by a file on disk. + /// + /// The caller is responsible for ensuring the file exists for the lifetime of the body. + public init(fileURL: URL) { + self.storage = .file(fileURL) + self._owner = nil + } + + /// Creates a body backed by a byte range within a file on disk. + /// + /// The bytes are not read until ``makeInputStream()`` is called, keeping the + /// representation lightweight for use cases like multipart part bodies. + init(fileURL: URL, offset: UInt64, length: Int) { + self.storage = .fileSlice(url: fileURL, offset: offset, length: length) + self._owner = nil + } + + /// Creates a body backed by an owned temporary file. + /// + /// The file is automatically deleted when the last `RequestBody` referencing it + /// (including multipart part bodies derived from it) is released. + init(ownedFileURL: URL) { + self.storage = .file(ownedFileURL) + self._owner = TempFileOwner(url: ownedFileURL) + } + + /// Creates a body backed by a byte range within a file, sharing ownership + /// with the source body's temp file. + init(fileURL: URL, offset: UInt64, length: Int, owner: TempFileOwner?) { + self.storage = .fileSlice(url: fileURL, offset: offset, length: length) + self._owner = owner + } + + /// The temp file owner, if any. Used to propagate ownership to derived bodies + /// (e.g., multipart part slices). + var fileOwner: TempFileOwner? { _owner } + + /// The number of bytes in the body. + /// + /// For in-memory and file-slice bodies this is O(1). For file-backed bodies + /// this queries the file system without reading any data. + public var count: Int { + switch storage { + case .data(let data): + return data.count + case .file(let url): + return (try? url.resourceValues(forKeys: [.fileSizeKey]))?.fileSize ?? 0 + case .fileSlice(_, _, let length): + return length + } + } + + /// The file URL backing this body, or `nil` for in-memory bodies. + public var fileURL: URL? { + switch storage { + case .data: return nil + case .file(let url), .fileSlice(let url, _, _): return url + } + } + + /// The byte offset within the backing file where this body begins. + /// Returns 0 for in-memory and whole-file bodies. + public var fileOffset: UInt64 { + switch storage { + case .fileSlice(_, let offset, _): return offset + default: return 0 + } + } + + /// The in-memory data backing this body, or `nil` for file-backed bodies. + /// + /// Unlike ``data``, this does **not** read from disk. + public var inMemoryData: Data? { + switch storage { + case .data(let data): return data + default: return nil + } + } + + /// The full body contents as `Data`. + /// + /// For in-memory bodies this returns the data directly. For file-backed + /// bodies the file is read on a background thread to avoid blocking. + /// + /// You should almost always use `InputStream` instead. + public var data: Data { + get async throws { + switch storage { + case .data(let data): + return data + case .file(let url): + return try Data(contentsOf: url) + case .fileSlice(let url, let offset, let length): + return try FileHandle.withReadHandle(forUrl: url) { + try $0.seek(toOffset: offset) + return try $0.read(upToCount: length) ?? Data() + } + } + } + } + + /// Creates an `InputStream` for reading the body contents. + /// + /// For file-slice bodies, a bound stream pair is used instead of subclassing + /// `InputStream`. Subclassing `InputStream` with `super.init(data:)` triggers + /// Foundation's class cluster design, causing `URLSession` to read from the + /// empty superclass `Data` instead of the overridden `read(_:maxLength:)`. + /// The bound stream pair avoids this because `Stream.getBoundStreams` returns + /// a native `InputStream` that `URLSession` handles correctly. + /// + /// - Throws: A `CocoaError` if the backing file does not exist, is not readable, or is a directory. + public func makeInputStream() throws -> InputStream { + switch storage { + case .data(let data): + return InputStream(data: data) + case .file(let url): + return try Self.openFileStream(url: url) + case .fileSlice(let url, let offset, let length): + return try Self.makePipedFileSliceStream(url: url, offset: offset, length: length) + } + } + + /// Reads the entire body contents into memory and returns the data along with the + /// file offset at which the data begins (0 for in-memory bodies). + /// + /// This is intended for scanning operations (e.g., multipart boundary detection) + /// where the full body must be examined. The caller should release the returned + /// `Data` as soon as scanning is complete. + func readAllData() throws -> (Data, UInt64) { + return switch storage { + case .data(let data): (data, 0) + case .file(let url): try (Data(contentsOf: url), 0) + case .fileSlice(let url, let offset, let length): + try FileHandle.withReadHandle(forUrl: url) { + try $0.seek(toOffset: offset) + return (try $0.read(upToCount: length) ?? Data(), offset) + } + } + } + + /// Creates an `InputStream` backed by a bound stream pair that reads a byte + /// range from a file on a background thread. + /// + /// The writer thread reads chunks from the file and pushes them into the + /// `OutputStream` end of the pair. The returned `InputStream` is a native + /// Foundation stream that `URLSession` and other consumers handle correctly. + /// Backpressure is automatic: `OutputStream.write` blocks when the internal + /// buffer is full. + private static func makePipedFileSliceStream(url: URL, offset: UInt64, length: Int) throws -> InputStream { + guard length > 0 else { + return InputStream(data: Data()) + } + + let fileHandle = try FileHandle(forReadingFrom: url) + try fileHandle.seek(toOffset: offset) + + var readStream: InputStream? + var writeStream: OutputStream? + Stream.getBoundStreams(withBufferSize: 65_536, inputStream: &readStream, outputStream: &writeStream) + + guard let inputStream = readStream, let outputStream = writeStream else { + try? fileHandle.close() + throw CocoaError(.fileReadUnknown, userInfo: [NSFilePathErrorKey: url.path]) + } + + outputStream.open() + + // OutputStream is not Sendable but is safely transferred to the + // writer thread — only the thread accesses it after this point. + nonisolated(unsafe) let output = outputStream + + Thread.detachNewThread { + defer { + output.close() + try? fileHandle.close() + } + + var remaining = length + while remaining > 0 { + let chunkSize = min(65_536, remaining) + guard let chunk = try? fileHandle.read(upToCount: chunkSize), + !chunk.isEmpty else { + break + } + + var written = 0 + chunk.withUnsafeBytes { buffer in + guard let base = buffer.baseAddress?.assumingMemoryBound(to: UInt8.self) else { return } + while written < chunk.count { + let result = output.write(base.advanced(by: written), maxLength: chunk.count - written) + if result <= 0 { return } + written += result + } + } + + if written < chunk.count { break } + remaining -= chunk.count + } + } + + return inputStream + } + + private static func openFileStream(url: URL) throws -> InputStream { + let path = url.path + var isDirectory: ObjCBool = false + + guard FileManager.default.fileExists(atPath: path, isDirectory: &isDirectory) else { + throw CocoaError(.fileNoSuchFile, userInfo: [NSFilePathErrorKey: path]) + } + + guard !isDirectory.boolValue else { + throw CocoaError(.fileReadInvalidFileName, userInfo: [NSFilePathErrorKey: path]) + } + + guard FileManager.default.isReadableFile(atPath: path) else { + throw CocoaError(.fileReadNoPermission, userInfo: [NSFilePathErrorKey: path]) + } + + guard let stream = InputStream(url: url) else { + throw CocoaError(.fileReadUnknown, userInfo: [NSFilePathErrorKey: path]) + } + + return stream + } +} diff --git a/ios/Tests/GutenbergKitHTTPTests/ChunkedMultipartTests.swift b/ios/Tests/GutenbergKitHTTPTests/ChunkedMultipartTests.swift new file mode 100644 index 000000000..cf26dfbd4 --- /dev/null +++ b/ios/Tests/GutenbergKitHTTPTests/ChunkedMultipartTests.swift @@ -0,0 +1,342 @@ +import Foundation +import Testing +@testable import GutenbergKitHTTP + +/// Tests for the chunked (file-backed) multipart parsing path. +/// +/// The in-memory path is tested extensively in ``RFC7578ConformanceTests`` and +/// the shared fixture tests. These tests verify the chunked scanner that runs +/// when the body is backed by a file on disk. +@Suite("Chunked Multipart Parsing") +struct ChunkedMultipartTests { + + // MARK: - Basic Parsing + + @Test("single text field parsed from file-backed body") + func singleTextField() throws { + let (url, request) = try makeFileBackedRequest( + fields: [("title", nil, nil, Data("My Blog Post".utf8))], + boundary: "AaB03x" + ) + defer { try? FileManager.default.removeItem(at: url) } + + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "title") + #expect(parts[0].filename == nil) + #expect(parts[0].contentType == "text/plain") + #expect(try readAll(parts[0].body) == Data("My Blog Post".utf8)) + } + + @Test("multiple parts parsed from file-backed body") + func multipleParts() throws { + let (url, request) = try makeFileBackedRequest( + fields: [ + ("title", nil, nil, Data("Hello".utf8)), + ("file", "photo.jpg", "image/jpeg", Data("jpeg-data".utf8)), + ("caption", nil, nil, Data("A photo".utf8)), + ], + boundary: "WebKitBoundary123" + ) + defer { try? FileManager.default.removeItem(at: url) } + + let parts = try request.multipartParts() + + #expect(parts.count == 3) + #expect(parts[0].name == "title") + #expect(try readAll(parts[0].body) == Data("Hello".utf8)) + #expect(parts[1].name == "file") + #expect(parts[1].filename == "photo.jpg") + #expect(parts[1].contentType == "image/jpeg") + #expect(try readAll(parts[1].body) == Data("jpeg-data".utf8)) + #expect(parts[2].name == "caption") + #expect(try readAll(parts[2].body) == Data("A photo".utf8)) + } + + @Test("empty part body parsed correctly") + func emptyPartBody() throws { + let (url, request) = try makeFileBackedRequest( + fields: [("empty", nil, nil, Data())], + boundary: "AaB03x" + ) + defer { try? FileManager.default.removeItem(at: url) } + + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "empty") + #expect(try readAll(parts[0].body) == Data()) + } + + @Test("binary data preserved through file-backed parsing") + func binaryData() throws { + // Include bytes that would be problematic if treated as text: NUL, 0xFF, CRLF sequences. + var binaryContent = Data(repeating: 0x00, count: 128) + binaryContent.append(contentsOf: (0...255).map { UInt8($0) }) + binaryContent.append(Data(repeating: 0xFF, count: 128)) + + let (url, request) = try makeFileBackedRequest( + fields: [("file", "binary.bin", "application/octet-stream", binaryContent)], + boundary: "BinaryBoundary99" + ) + defer { try? FileManager.default.removeItem(at: url) } + + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].filename == "binary.bin") + #expect(try readAll(parts[0].body) == binaryContent) + } + + @Test("preamble before first boundary is ignored") + func preambleIgnored() throws { + let boundary = "AaB03x" + let preamble = "This is the preamble. It should be ignored.\r\n" + let body = "\(preamble)--\(boundary)\r\nContent-Disposition: form-data; name=\"field\"\r\n\r\nvalue\r\n--\(boundary)--\r\n" + + let (url, request) = try makeFileBackedRequestFromRawBody(body: body, boundary: boundary) + defer { try? FileManager.default.removeItem(at: url) } + + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "field") + #expect(try readAll(parts[0].body) == Data("value".utf8)) + } + + @Test("transport padding after boundary is skipped") + func transportPadding() throws { + let boundary = "AaB03x" + // Add spaces and tabs after the boundary delimiter + let body = "--\(boundary) \t \r\nContent-Disposition: form-data; name=\"field\"\r\n\r\nvalue\r\n--\(boundary)--\r\n" + + let (url, request) = try makeFileBackedRequestFromRawBody(body: body, boundary: boundary) + defer { try? FileManager.default.removeItem(at: url) } + + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "field") + #expect(try readAll(parts[0].body) == Data("value".utf8)) + } + + // MARK: - Error Cases + + @Test("close-delimiter-only body throws malformedBody") + func closeDelimiterOnly() throws { + let boundary = "AaB03x" + let body = "--\(boundary)--\r\n" + + let (url, request) = try makeFileBackedRequestFromRawBody(body: body, boundary: boundary) + defer { try? FileManager.default.removeItem(at: url) } + + #expect(throws: MultipartParseError.malformedBody) { + try request.multipartParts() + } + } + + @Test("missing close delimiter throws malformedBody") + func missingCloseDelimiter() throws { + let boundary = "AaB03x" + let body = "--\(boundary)\r\nContent-Disposition: form-data; name=\"field\"\r\n\r\nvalue" + + let (url, request) = try makeFileBackedRequestFromRawBody(body: body, boundary: boundary) + defer { try? FileManager.default.removeItem(at: url) } + + #expect(throws: MultipartParseError.malformedBody) { + try request.multipartParts() + } + } + + // MARK: - Chunk Boundary Edge Cases + + @Test("boundary split across chunk boundary is found correctly") + func boundarySplitAcrossChunk() throws { + let boundary = "AaB03x" + let delimiter = "--\(boundary)" // 10 bytes + + // We want the second delimiter to start 5 bytes before the 65536 chunk boundary, + // so it straddles the boundary: 5 bytes in chunk 1, 5 bytes in chunk 2. + let splitPoint = 65_536 - 5 + + // Calculate the header overhead for the first part: + // "--AaB03x\r\n" = 12 bytes + // "Content-Disposition: form-data; name=\"pad\"\r\n" = 45 bytes + // "\r\n" = 2 bytes (header/body separator) + // Total: 59 bytes + // After the body: "\r\n" = 2 bytes (CRLF before next delimiter) + // So: padding_length = splitPoint - 59 - 2 = splitPoint - 61 + let headerOverhead = Data("--\(boundary)\r\nContent-Disposition: form-data; name=\"pad\"\r\n\r\n".utf8).count + let crlfBeforeDelimiter = 2 + let paddingLength = splitPoint - headerOverhead - crlfBeforeDelimiter + + let padding = Data(repeating: UInt8(ascii: "A"), count: paddingLength) + + let (url, request) = try makeFileBackedRequest( + fields: [ + ("pad", nil, nil, padding), + ("after", nil, nil, Data("found-it".utf8)), + ], + boundary: boundary + ) + defer { try? FileManager.default.removeItem(at: url) } + + // Verify the delimiter actually straddles the chunk boundary. + let fileData = try Data(contentsOf: url) + let delimData = Data(delimiter.utf8) + if let range = fileData.range(of: delimData, in: (fileData.startIndex + headerOverhead).. 128 KB). + let largeContent = Data(repeating: UInt8(ascii: "X"), count: 200_000) + + let (url, request) = try makeFileBackedRequest( + fields: [ + ("large", "big.bin", "application/octet-stream", largeContent), + ("meta", nil, nil, Data("description".utf8)), + ], + boundary: "LargeBoundary42" + ) + defer { try? FileManager.default.removeItem(at: url) } + + let parts = try request.multipartParts() + + #expect(parts.count == 2) + #expect(parts[0].name == "large") + #expect(parts[0].body.count == largeContent.count) + #expect(try readAll(parts[0].body) == largeContent) + #expect(parts[1].name == "meta") + #expect(try readAll(parts[1].body) == Data("description".utf8)) + } + + // MARK: - fileSlice Source + + @Test("file-backed body with non-zero offset (fileSlice) parses correctly") + func fileSliceSource() throws { + let boundary = "AaB03x" + let multipartBody = "--\(boundary)\r\nContent-Disposition: form-data; name=\"field\"\r\n\r\nvalue\r\n--\(boundary)--\r\n" + let multipartData = Data(multipartBody.utf8) + + // Write garbage prefix + multipart body to the file. + let garbagePrefix = Data(repeating: UInt8(ascii: "Z"), count: 500) + let url = FileManager.default.temporaryDirectory + .appendingPathComponent("slice-test-\(UUID().uuidString)") + try (garbagePrefix + multipartData).write(to: url) + defer { try? FileManager.default.removeItem(at: url) } + + // Create a fileSlice body that starts after the garbage prefix. + let body = RequestBody(fileURL: url, offset: UInt64(garbagePrefix.count), length: multipartData.count) + let request = ParsedHTTPRequest.complete( + method: "POST", + target: "/upload", + httpVersion: "HTTP/1.1", + headers: ["Content-Type": "multipart/form-data; boundary=\(boundary)", "Host": "localhost"], + body: body + ) + + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "field") + #expect(try readAll(parts[0].body) == Data("value".utf8)) + } + + // MARK: - Helpers + + /// Builds a multipart body from field descriptors, writes it to a temp file, + /// and returns a `ParsedHTTPRequest` with a file-backed body. + private func makeFileBackedRequest( + fields: [(name: String, filename: String?, contentType: String?, value: Data)], + boundary: String + ) throws -> (URL, ParsedHTTPRequest) { + var body = Data() + for field in fields { + body.append(Data("--\(boundary)\r\n".utf8)) + var disposition = "Content-Disposition: form-data; name=\"\(field.name)\"" + if let filename = field.filename { + disposition += "; filename=\"\(filename)\"" + } + body.append(Data("\(disposition)\r\n".utf8)) + if let ct = field.contentType { + body.append(Data("Content-Type: \(ct)\r\n".utf8)) + } + body.append(Data("\r\n".utf8)) + body.append(field.value) + body.append(Data("\r\n".utf8)) + } + body.append(Data("--\(boundary)--\r\n".utf8)) + + let url = FileManager.default.temporaryDirectory + .appendingPathComponent("multipart-test-\(UUID().uuidString)") + try body.write(to: url) + + let requestBody = RequestBody(fileURL: url) + let request = ParsedHTTPRequest.complete( + method: "POST", + target: "/upload", + httpVersion: "HTTP/1.1", + headers: ["Content-Type": "multipart/form-data; boundary=\(boundary)", "Host": "localhost"], + body: requestBody + ) + + return (url, request) + } + + /// Writes a raw multipart body string to a temp file and returns a file-backed request. + private func makeFileBackedRequestFromRawBody( + body: String, + boundary: String + ) throws -> (URL, ParsedHTTPRequest) { + let bodyData = Data(body.utf8) + let url = FileManager.default.temporaryDirectory + .appendingPathComponent("multipart-test-\(UUID().uuidString)") + try bodyData.write(to: url) + + let requestBody = RequestBody(fileURL: url) + let request = ParsedHTTPRequest.complete( + method: "POST", + target: "/upload", + httpVersion: "HTTP/1.1", + headers: ["Content-Type": "multipart/form-data; boundary=\(boundary)", "Host": "localhost"], + body: requestBody + ) + + return (url, request) + } + + private func readAll(_ body: RequestBody) throws -> Data { + let stream = try body.makeInputStream() + stream.open() + defer { stream.close() } + + var data = Data() + let bufferSize = 1024 + let buffer = UnsafeMutablePointer.allocate(capacity: bufferSize) + defer { buffer.deallocate() } + + // Use read() directly instead of hasBytesAvailable to avoid race + // conditions with bound stream pairs, where data from the writer + // thread may not have arrived yet when hasBytesAvailable is checked. + while true { + let bytesRead = stream.read(buffer, maxLength: bufferSize) + if bytesRead <= 0 { break } + data.append(buffer, count: bytesRead) + } + + return data + } +} diff --git a/ios/Tests/GutenbergKitHTTPTests/ConnectionTasksTests.swift b/ios/Tests/GutenbergKitHTTPTests/ConnectionTasksTests.swift new file mode 100644 index 000000000..60c2b205a --- /dev/null +++ b/ios/Tests/GutenbergKitHTTPTests/ConnectionTasksTests.swift @@ -0,0 +1,129 @@ +#if canImport(Network) + +import Foundation +import Testing +@testable import GutenbergKitHTTP + +@Suite("ConnectionTasks") +struct ConnectionTasksTests { + + @Test("track registers a task") + func track() async { + let tasks = ConnectionTasks() + let id = UUID() + let task = Task {} + tasks.track(id, task) + + // Tracking the same ID again should not crash (overwrites) + let task2 = Task {} + tasks.track(id, task2) + } + + @Test("cancelAll cancels all tracked tasks") + func cancelAllCancelsTasks() async { + let tasks = ConnectionTasks() + + let cancelled1 = ManagedAtomic(false) + let cancelled2 = ManagedAtomic(false) + + let id1 = UUID() + let t1 = Task { + // Spin until cancelled + while !Task.isCancelled { + await Task.yield() + } + cancelled1.set(true) + } + tasks.track(id1, t1) + + let id2 = UUID() + let t2 = Task { + while !Task.isCancelled { + await Task.yield() + } + cancelled2.set(true) + } + tasks.track(id2, t2) + + // Give tasks a moment to start + await Task.yield() + + tasks.cancelAll() + + // Wait for tasks to finish + await t1.value + await t2.value + + #expect(cancelled1.get()) + #expect(cancelled2.get()) + } + + @Test("cancelAll is idempotent") + func cancelAllIdempotent() async { + let tasks = ConnectionTasks() + + let id = UUID() + let task = Task { + while !Task.isCancelled { + await Task.yield() + } + } + tasks.track(id, task) + + tasks.cancelAll() + await task.value + + // Second cancelAll should not crash + tasks.cancelAll() + } + + @Test("cancelAll on already-completed tasks does not crash") + func cancelAllWithCompletedTasks() async { + let tasks = ConnectionTasks() + let id = UUID() + let task = Task {} + tasks.track(id, task) + await task.value + + // Task is already done — cancelAll should not crash + tasks.cancelAll() + } + + @Test("concurrent track does not crash") + func concurrentAccess() async { + let tasks = ConnectionTasks() + + await withTaskGroup(of: Void.self) { group in + for _ in 0..<100 { + group.addTask { + let id = UUID() + let task = Task {} + tasks.track(id, task) + } + } + } + + // Final cleanup should not crash + tasks.cancelAll() + } +} + +/// Minimal thread-safe boolean for test assertions. +private final class ManagedAtomic: @unchecked Sendable { + private let lock = NSLock() + private var value: Bool + + init(_ value: Bool) { + self.value = value + } + + func get() -> Bool { + lock.withLock { value } + } + + func set(_ newValue: Bool) { + lock.withLock { value = newValue } + } +} + +#endif // canImport(Network) diff --git a/ios/Tests/GutenbergKitHTTPTests/FixtureTests.swift b/ios/Tests/GutenbergKitHTTPTests/FixtureTests.swift new file mode 100644 index 000000000..f361ccdb7 --- /dev/null +++ b/ios/Tests/GutenbergKitHTTPTests/FixtureTests.swift @@ -0,0 +1,457 @@ +import Foundation +import Testing +@testable import GutenbergKitHTTP + +// MARK: - Fixture Models + +struct HeaderValueFixtures: Decodable { + let tests: [HeaderValueTestCase] + + struct HeaderValueTestCase: Decodable { + let description: String + let parameter: String + let headerValue: String + let expected: String? + } +} + +struct RequestParsingFixtures: Decodable { + let tests: [RequestTestCase] + let errorTests: [RequestErrorTestCase] + let incrementalTests: [IncrementalTestCase] + + struct ExpectedAfterHeaders: Decodable { + var hasHeaders: Bool? + var isComplete: Bool? + var method: String? + var target: String? + } + + struct ExpectedRequest: Decodable { + var method: String? + var target: String? + var headers: [String: String]? + var isComplete: Bool? + var hasHeaders: Bool? + var parseResult: String? + + // body uses explicit key presence tracking so we can distinguish + // "body key absent" (don't check) from "body": null (expect nil) + private(set) var body: String? + private(set) var hasBodyExpectation: Bool = false + + var afterHeaders: ExpectedAfterHeaders? + + private enum CodingKeys: String, CodingKey { + case method, target, headers, body, isComplete, hasHeaders, parseResult, afterHeaders + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + method = try container.decodeIfPresent(String.self, forKey: .method) + target = try container.decodeIfPresent(String.self, forKey: .target) + headers = try container.decodeIfPresent([String: String].self, forKey: .headers) + isComplete = try container.decodeIfPresent(Bool.self, forKey: .isComplete) + hasHeaders = try container.decodeIfPresent(Bool.self, forKey: .hasHeaders) + parseResult = try container.decodeIfPresent(String.self, forKey: .parseResult) + hasBodyExpectation = container.contains(.body) + body = try container.decodeIfPresent(String.self, forKey: .body) + afterHeaders = try container.decodeIfPresent(ExpectedAfterHeaders.self, forKey: .afterHeaders) + } + } + + struct RequestTestCase: Decodable { + let description: String + let input: String + let expected: ExpectedRequest + var appendAfterComplete: String? + var maxBodySize: Int64? + } + + struct RequestErrorExpected: Decodable { + let error: String + } + + struct RequestErrorTestCase: Decodable { + let description: String + var input: String? + var inputBase64: String? + let expected: RequestErrorExpected + var maxBodySize: Int64? + } + + struct IncrementalTestCase: Decodable { + let description: String + var input: String? + var headers: String? + var bodyChunks: [String]? + var chunkSize: Int? + let expected: ExpectedRequest + } +} + +struct MultipartFixtures: Decodable { + let tests: [MultipartTestCase] + let errorTests: [MultipartErrorTestCase] + + struct ExpectedPart: Decodable { + let name: String + var filename: String? + var contentType: String? + var body: String? + /// Whether the "filename" key was present in the JSON fixture (distinguishes absent from null). + var filenameSpecified: Bool = false + + private enum CodingKeys: String, CodingKey { + case name, filename, contentType, body + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + name = try container.decode(String.self, forKey: .name) + contentType = try container.decodeIfPresent(String.self, forKey: .contentType) + body = try container.decodeIfPresent(String.self, forKey: .body) + if container.contains(.filename) { + filename = try container.decodeIfPresent(String.self, forKey: .filename) + filenameSpecified = true + } + } + } + + struct Expected: Decodable { + var contentType: String? + var parts: [ExpectedPart]? + var error: String? + } + + struct MultipartTestCase: Decodable { + let description: String + let boundary: String + var quotedBoundary: Bool? + let rawBody: String + let expected: Expected + } + + struct MultipartErrorTestCase: Decodable { + let description: String + var contentType: String? + var body: String? + var boundary: String? + var rawBody: String? + let expected: Expected + } +} + +// MARK: - Fixture Loading + +private func fixtureURL(_ name: String) -> URL { + Bundle.module.url(forResource: name, withExtension: "json", subdirectory: "http")! +} + +private func loadFixture(_ name: String) throws -> T { + let data = try Data(contentsOf: fixtureURL(name)) + return try JSONDecoder().decode(T.self, from: data) +} + +// MARK: - Header Value Fixture Tests + +@Suite("Header Value Fixtures") +struct HeaderValueFixtureTests { + + @Test("All fixture cases pass", arguments: try! loadHeaderValueTests()) + func fixtureCase(_ testCase: HeaderValueFixtures.HeaderValueTestCase) { + let result = HeaderValue.extractParameter(testCase.parameter, from: testCase.headerValue) + #expect(result == testCase.expected, "\(testCase.description)") + } +} + +private func loadHeaderValueTests() throws -> [HeaderValueFixtures.HeaderValueTestCase] { + let fixtures: HeaderValueFixtures = try loadFixture("header-value-parsing") + return fixtures.tests +} + +extension HeaderValueFixtures.HeaderValueTestCase: CustomTestStringConvertible { + var testDescription: String { description } +} + +// MARK: - Request Parsing Fixture Tests + +@Suite("Request Parsing Fixtures") +struct RequestParsingFixtureTests { + + @Test("All basic parsing cases pass", arguments: try! loadRequestTests()) + func basicCase(_ testCase: RequestParsingFixtures.RequestTestCase) throws { + let raw = testCase.input + let parser: HTTPRequestParser + if let maxBodySize = testCase.maxBodySize { + parser = HTTPRequestParser(maxBodySize: maxBodySize) + parser.append(Data(raw.utf8)) + } else { + parser = HTTPRequestParser(raw) + } + + if let extra = testCase.appendAfterComplete { + parser.append(Data(extra.utf8)) + } + + let exp = testCase.expected + + if exp.isComplete == false && exp.hasHeaders == false { + #expect(!parser.state.hasHeaders) + #expect(try parser.parseRequest() == nil) + return + } + + let request = try #require(try parser.parseRequest()) + + if let method = exp.method { + #expect(request.method == method, "\(testCase.description): method") + } + if let target = exp.target { + #expect(request.target == target, "\(testCase.description): target") + } + if let isComplete = exp.isComplete { + if isComplete { + #expect(parser.state.isComplete, "\(testCase.description): isComplete") + } + } + if let headers = exp.headers { + for (key, value) in headers { + #expect(request.header(key) == value, "\(testCase.description): header \(key)") + } + } + if exp.hasBodyExpectation { + if let expectedBody = exp.body { + let requestBody = try #require(request.body) + #expect(try readAll(requestBody) == Data(expectedBody.utf8), "\(testCase.description): body content") + } else { + #expect(request.body == nil, "\(testCase.description): body should be nil") + } + } + } + + @Test("All error cases pass", arguments: try! loadRequestErrorTests()) + func errorCase(_ testCase: RequestParsingFixtures.RequestErrorTestCase) { + let parser: HTTPRequestParser + + if let base64 = testCase.inputBase64 { + let data = Data(base64Encoded: base64)! + if let maxBodySize = testCase.maxBodySize { + parser = HTTPRequestParser(maxBodySize: maxBodySize) + } else { + parser = HTTPRequestParser() + } + parser.append(data) + } else { + let raw = testCase.input! + if let maxBodySize = testCase.maxBodySize { + parser = HTTPRequestParser(maxBodySize: maxBodySize) + parser.append(Data(raw.utf8)) + } else { + parser = HTTPRequestParser(raw) + } + } + + let expectedError = testCase.expected.error + do { + _ = try parser.parseRequest() + Issue.record("Expected error \(expectedError) but parsing succeeded — \(testCase.description)") + } catch { + let errorName = String(describing: error) + #expect(errorName == expectedError, "\(testCase.description): expected \(expectedError) but got \(errorName)") + } + } + + @Test("All incremental cases pass", arguments: try! loadIncrementalTests()) + func incrementalCase(_ testCase: RequestParsingFixtures.IncrementalTestCase) throws { + let parser = HTTPRequestParser() + + if let input = testCase.input, let chunkSize = testCase.chunkSize { + let raw = input + let data = Data(raw.utf8) + for i in stride(from: 0, to: data.count, by: chunkSize) { + let end = min(i + chunkSize, data.count) + parser.append(data[i.. [RequestParsingFixtures.RequestTestCase] { + let fixtures: RequestParsingFixtures = try loadFixture("request-parsing") + return fixtures.tests +} + +private func loadRequestErrorTests() throws -> [RequestParsingFixtures.RequestErrorTestCase] { + let fixtures: RequestParsingFixtures = try loadFixture("request-parsing") + return fixtures.errorTests +} + +private func loadIncrementalTests() throws -> [RequestParsingFixtures.IncrementalTestCase] { + let fixtures: RequestParsingFixtures = try loadFixture("request-parsing") + return fixtures.incrementalTests +} + +extension RequestParsingFixtures.RequestTestCase: CustomTestStringConvertible { + var testDescription: String { description } +} + +extension RequestParsingFixtures.RequestErrorTestCase: CustomTestStringConvertible { + var testDescription: String { description } +} + +extension RequestParsingFixtures.IncrementalTestCase: CustomTestStringConvertible { + var testDescription: String { description } +} + +// MARK: - Multipart Parsing Fixture Tests + +@Suite("Multipart Parsing Fixtures") +struct MultipartParsingFixtureTests { + + @Test("All cases pass", arguments: try! loadMultipartTests()) + func testCase(_ testCase: MultipartFixtures.MultipartTestCase) throws { + let request = try buildRawMultipartRequest( + body: testCase.rawBody, + boundary: testCase.boundary, + quotedBoundary: testCase.quotedBoundary ?? false + ) + + if let expectedCT = testCase.expected.contentType { + #expect(request.header("Content-Type") == expectedCT, "\(testCase.description): Content-Type") + } + + let expectedParts = testCase.expected.parts ?? [] + let parts = try request.multipartParts() + #expect(parts.count == expectedParts.count, "\(testCase.description): part count") + + for (i, expectedPart) in expectedParts.enumerated() where i < parts.count { + #expect(parts[i].name == expectedPart.name, "\(testCase.description): part[\(i)].name") + if expectedPart.filenameSpecified { + #expect(parts[i].filename == expectedPart.filename, "\(testCase.description): part[\(i)].filename") + } + if let ct = expectedPart.contentType { + #expect(parts[i].contentType == ct, "\(testCase.description): part[\(i)].contentType") + } + if let body = expectedPart.body { + #expect(try readAll(parts[i].body) == Data(body.utf8), "\(testCase.description): part[\(i)].body") + } + } + } + + @Test("All error cases pass", arguments: try! loadMultipartErrorTests()) + func errorCase(_ testCase: MultipartFixtures.MultipartErrorTestCase) throws { + let request: ParsedHTTPRequest + + let contentType = testCase.contentType ?? testCase.expected.contentType + + if let rawBody = testCase.rawBody, let boundary = testCase.boundary { + request = try buildRawMultipartRequest(body: rawBody, boundary: boundary) + } else if let contentType, let body = testCase.body { + let raw = "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Type: \(contentType)\r\nContent-Length: \(body.utf8.count)\r\n\r\n\(body)" + let parser = HTTPRequestParser(raw) + request = try #require(try parser.parseRequest()) + } else if let contentType { + let raw = "GET /upload HTTP/1.1\r\nHost: localhost\r\nContent-Type: \(contentType)\r\n\r\n" + let parser = HTTPRequestParser(raw) + request = try #require(try parser.parseRequest()) + } else { + Issue.record("Invalid error test case: \(testCase.description)") + return + } + + let expectedError = testCase.expected.error! + do { + _ = try request.multipartParts() + Issue.record("Expected error \(expectedError) but succeeded — \(testCase.description)") + } catch { + let errorName = String(describing: error) + #expect(errorName == expectedError, "\(testCase.description): expected \(expectedError) but got \(errorName)") + } + } +} + +private func loadMultipartTests() throws -> [MultipartFixtures.MultipartTestCase] { + let fixtures: MultipartFixtures = try loadFixture("multipart-parsing") + return fixtures.tests +} + +private func loadMultipartErrorTests() throws -> [MultipartFixtures.MultipartErrorTestCase] { + let fixtures: MultipartFixtures = try loadFixture("multipart-parsing") + return fixtures.errorTests +} + +extension MultipartFixtures.MultipartTestCase: CustomTestStringConvertible { + var testDescription: String { description } +} + +extension MultipartFixtures.MultipartErrorTestCase: CustomTestStringConvertible { + var testDescription: String { description } +} + +private func buildRawMultipartRequest( + body: String, + boundary: String, + quotedBoundary: Bool = false +) throws -> ParsedHTTPRequest { + let boundaryParam = quotedBoundary ? "\"\(boundary)\"" : boundary + let raw = "POST /wp/v2/media HTTP/1.1\r\nHost: localhost\r\nContent-Type: multipart/form-data; boundary=\(boundaryParam)\r\nContent-Length: \(body.utf8.count)\r\n\r\n\(body)" + let parser = HTTPRequestParser(raw) + return try #require(try parser.parseRequest()) +} diff --git a/ios/Tests/GutenbergKitHTTPTests/HTTPRequestParserTests.swift b/ios/Tests/GutenbergKitHTTPTests/HTTPRequestParserTests.swift new file mode 100644 index 000000000..f4df6bd08 --- /dev/null +++ b/ios/Tests/GutenbergKitHTTPTests/HTTPRequestParserTests.swift @@ -0,0 +1,497 @@ +import Foundation +import Testing +@testable import GutenbergKitHTTP + +@Suite("HTTPRequestParser") +struct HTTPRequestParserTests { + + // MARK: - Basic Request Parsing + + @Test("parses a simple GET request") + func parsesSimpleGet() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nHost: localhost:8080\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(parser.state.isComplete) + #expect(request.method == "GET") + #expect(request.target == "/wp/v2/posts") + #expect(request.body == nil) + } + + @Test("parses request target with query string") + func parsesQueryString() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts?per_page=10&status=publish HTTP/1.1\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.target == "/wp/v2/posts?per_page=10&status=publish") + } + + @Test("parses POST request with JSON body") + func parsesPostWithBody() throws { + let body = #"{"title":"Hello","content":"World"}"# + let parser = HTTPRequestParser("POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Type: application/json\r\nContent-Length: \(body.utf8.count)\r\n\r\n\(body)") + let request = try #require(try parser.parseRequest()) + + #expect(parser.state.isComplete) + #expect(request.method == "POST") + #expect(request.target == "/wp/v2/posts") + #expect(request.header("Content-Type") == "application/json") + + let requestBody = try #require(request.body) + #expect(try readAll(requestBody) == Data(body.utf8)) + } + + @Test("parses DELETE request") + func parsesDelete() throws { + let parser = HTTPRequestParser("DELETE /wp/v2/posts/42 HTTP/1.1\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.method == "DELETE") + #expect(request.target == "/wp/v2/posts/42") + } + + @Test("parses PUT request with body") + func parsesPutWithBody() throws { + let body = #"{"title":"Updated"}"# + let parser = HTTPRequestParser("PUT /wp/v2/posts/42 HTTP/1.1\r\nHost: localhost\r\nContent-Length: \(body.utf8.count)\r\n\r\n\(body)") + let request = try #require(try parser.parseRequest()) + + #expect(request.method == "PUT") + let requestBody = try #require(request.body) + #expect(try readAll(requestBody) == Data(body.utf8)) + } + + // MARK: - Header Parsing + + @Test("parses multiple headers") + func parsesMultipleHeaders() throws { + let parser = HTTPRequestParser("GET / HTTP/1.1\r\nHost: localhost\r\nAccept: application/json\r\nAuthorization: Bearer token123\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.headers["Host"] == "localhost") + #expect(request.headers["Accept"] == "application/json") + #expect(request.headers["Authorization"] == "Bearer token123") + } + + @Test("header lookup is case-insensitive") + func headerLookupCaseInsensitive() throws { + let parser = HTTPRequestParser("GET / HTTP/1.1\r\nHost: localhost\r\nContent-Type: text/html\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("content-type") == "text/html") + #expect(request.header("CONTENT-TYPE") == "text/html") + #expect(request.header("Content-Type") == "text/html") + } + + @Test("parses header values containing colons") + func parsesHeaderValuesWithColons() throws { + let parser = HTTPRequestParser("GET / HTTP/1.1\r\nHost: localhost\r\nAuthorization: Basic dXNlcjpwYXNz\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Authorization") == "Basic dXNlcjpwYXNz") + } + + // MARK: - Incremental Parsing + + @Test("handles data arriving in chunks") + func handlesIncrementalData() throws { + let raw = "GET /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\n\r\n" + let data = Data(raw.utf8) + + let parser = HTTPRequestParser() + + // Feed data byte by byte + for i in 0.. String { + let connection = NWConnection( + host: .ipv4(.loopback), + port: NWEndpoint.Port(rawValue: port)!, + using: .tcp + ) + defer { connection.cancel() } + + try await withCheckedThrowingContinuation { (cont: CheckedContinuation) in + connection.stateUpdateHandler = { state in + switch state { + case .ready: + connection.stateUpdateHandler = nil + cont.resume() + case .failed(let error): + connection.stateUpdateHandler = nil + cont.resume(throwing: error) + default: + break + } + } + connection.start(queue: .global()) + } + + try await withCheckedThrowingContinuation { (cont: CheckedContinuation) in + connection.send(content: Data(request.utf8), completion: .contentProcessed { error in + if let error { cont.resume(throwing: error) } else { cont.resume() } + }) + } + + return try await withCheckedThrowingContinuation { cont in + connection.receive(minimumIncompleteLength: 1, maximumLength: 8192) { data, _, _, error in + if let error { + cont.resume(throwing: error) + } else { + cont.resume(returning: String(data: data ?? Data(), encoding: .utf8) ?? "") + } + } + } + } +} + +#endif // canImport(Network) diff --git a/ios/Tests/GutenbergKitHTTPTests/HeaderValueTests.swift b/ios/Tests/GutenbergKitHTTPTests/HeaderValueTests.swift new file mode 100644 index 000000000..d7fcdcaad --- /dev/null +++ b/ios/Tests/GutenbergKitHTTPTests/HeaderValueTests.swift @@ -0,0 +1,147 @@ +import Foundation +import Testing +@testable import GutenbergKitHTTP + +@Suite("HeaderValue") +struct HeaderValueTests { + + // MARK: - Unquoted Values + + @Test("Extracts unquoted parameter value") + func unquotedValue() { + let result = HeaderValue.extractParameter("boundary", from: "multipart/form-data; boundary=AaB03x") + #expect(result == "AaB03x") + } + + @Test("Unquoted value terminated by semicolon") + func unquotedValueTerminatedBySemicolon() { + let result = HeaderValue.extractParameter("boundary", from: "multipart/form-data; boundary=AaB03x; charset=utf-8") + #expect(result == "AaB03x") + } + + @Test("Unquoted value at end of string") + func unquotedValueAtEnd() { + let result = HeaderValue.extractParameter("charset", from: "text/plain; charset=utf-8") + #expect(result == "utf-8") + } + + // MARK: - Quoted Values + + @Test("Extracts quoted parameter value") + func quotedValue() { + let result = HeaderValue.extractParameter("name", from: "form-data; name=\"field1\"") + #expect(result == "field1") + } + + @Test("Empty quoted value") + func emptyQuotedValue() { + let result = HeaderValue.extractParameter("name", from: "form-data; name=\"\"") + #expect(result == "") + } + + @Test("Quoted value with spaces") + func quotedValueWithSpaces() { + let result = HeaderValue.extractParameter("name", from: "form-data; name=\"my field\"") + #expect(result == "my field") + } + + // MARK: - Backslash Escapes + + @Test("Quoted value with escaped quote") + func escapedQuote() { + let result = HeaderValue.extractParameter("name", from: #"form-data; name="field\"name""#) + #expect(result == "field\"name") + } + + @Test("Quoted value with escaped backslash") + func escapedBackslash() { + let result = HeaderValue.extractParameter("filename", from: #"form-data; filename="C:\\path\\file.txt""#) + #expect(result == "C:\\path\\file.txt") + } + + @Test("Quoted value with escaped single-quote") + func escapedSingleQuote() { + let result = HeaderValue.extractParameter("boundary", from: #"multipart/form-data; boundary="abc\'def""#) + #expect(result == "abc'def") + } + + // MARK: - Case Insensitivity + + @Test("Parameter name matching is case-insensitive") + func caseInsensitive() { + let result = HeaderValue.extractParameter("Boundary", from: "multipart/form-data; boundary=AaB03x") + #expect(result == "AaB03x") + } + + // MARK: - Skipping Quoted Strings + + @Test("Parameter inside another quoted value is not matched") + func paramInsideQuotedValueSkipped() { + let result = HeaderValue.extractParameter("name", from: #"form-data; dummy="name=evil"; name="real""#) + #expect(result == "real") + } + + @Test("boundary= inside quoted value is skipped") + func boundaryInsideQuotedValueSkipped() { + let result = HeaderValue.extractParameter("boundary", from: #"multipart/form-data; charset="boundary=fake"; boundary=RealBoundary"#) + #expect(result == "RealBoundary") + } + + // MARK: - Missing Parameters + + @Test("Missing parameter returns nil") + func missingParameter() { + let result = HeaderValue.extractParameter("filename", from: "form-data; name=\"field1\"") + #expect(result == nil) + } + + @Test("Empty header value returns nil") + func emptyHeaderValue() { + let result = HeaderValue.extractParameter("name", from: "") + #expect(result == nil) + } + + // MARK: - Multiple Parameters + + @Test("Extracts correct parameter when multiple are present") + func multipleParameters() { + let result = HeaderValue.extractParameter("filename", from: "form-data; name=\"file\"; filename=\"photo.jpg\"") + #expect(result == "photo.jpg") + } + + // MARK: - Substring Matching + + @Test("Does not match parameter name as substring of another parameter name") + func noSubstringMatch() { + // "name=" appears inside "filename=" — should not match it. + let result = HeaderValue.extractParameter("name", from: #"form-data; filename="test.jpg"; name="real""#) + #expect(result == "real") + } + + // MARK: - Edge Cases + + @Test("Unterminated quoted value returns accumulated content") + func unterminatedQuotedValue() { + let result = HeaderValue.extractParameter("name", from: "form-data; name=\"unclosed") + #expect(result == "unclosed") + } + + @Test("Quoted value containing semicolons") + func quotedValueWithSemicolons() { + let result = HeaderValue.extractParameter("name", from: "form-data; name=\"a;b;c\"") + #expect(result == "a;b;c") + } + + @Test("No space after semicolon") + func noSpaceAfterSemicolon() { + let result = HeaderValue.extractParameter("name", from: "form-data;name=\"field1\"") + #expect(result == "field1") + } + + @Test("Backslash at end of quoted value") + func backslashAtEndOfQuotedValue() { + // Backslash with nothing after it — should stop extraction. + let result = HeaderValue.extractParameter("name", from: #"form-data; name="trailing\"#) + #expect(result == "trailing") + } +} diff --git a/ios/Tests/GutenbergKitHTTPTests/ParsedHTTPRequestTests.swift b/ios/Tests/GutenbergKitHTTPTests/ParsedHTTPRequestTests.swift new file mode 100644 index 000000000..042336ed9 --- /dev/null +++ b/ios/Tests/GutenbergKitHTTPTests/ParsedHTTPRequestTests.swift @@ -0,0 +1,239 @@ +import Foundation +import Testing +@testable import GutenbergKitHTTP + +@Suite("ParsedHTTPRequest") +struct ParsedHTTPRequestTests { + + // MARK: - urlRequest(relativeTo:) + + @Test("urlRequest resolves path against base URL") + func urlRequestResolvesPath() { + let request = ParsedHTTPRequest.complete( + method: "GET", + target: "/wp/v2/posts?per_page=10", + httpVersion: "HTTP/1.1", + headers: ["Accept": "application/json"], + body: nil + ) + + let baseURL = URL(string: "https://example.com/wp-json")! + let urlRequest = request.urlRequest(relativeTo: baseURL) + + #expect(urlRequest != nil) + #expect(urlRequest?.url?.absoluteString == "https://example.com/wp/v2/posts?per_page=10") + #expect(urlRequest?.httpMethod == "GET") + #expect(urlRequest?.value(forHTTPHeaderField: "Accept") == "application/json") + } + + @Test("urlRequest includes body stream") + func urlRequestIncludesBody() throws { + let body = Data(#"{"title":"Test"}"#.utf8) + let request = ParsedHTTPRequest.complete( + method: "POST", + target: "/wp/v2/posts", + httpVersion: "HTTP/1.1", + headers: ["Content-Type": "application/json"], + body: RequestBody(data: body) + ) + + let baseURL = URL(string: "https://example.com/wp-json")! + let urlRequest = request.urlRequest(relativeTo: baseURL) + + #expect(urlRequest?.httpBodyStream != nil) + #expect(urlRequest?.httpMethod == "POST") + } + + @Test("urlRequest strips hop-by-hop headers") + func urlRequestStripsHopByHopHeaders() { + let request = ParsedHTTPRequest.complete( + method: "GET", + target: "/wp/v2/posts", + httpVersion: "HTTP/1.1", + headers: [ + "Host": "localhost:8080", + "Connection": "keep-alive", + "Accept": "application/json", + "Transfer-Encoding": "chunked", + "Keep-Alive": "timeout=5", + "Proxy-Connection": "keep-alive", + ], + body: nil + ) + + let baseURL = URL(string: "https://example.com")! + let urlRequest = request.urlRequest(relativeTo: baseURL)! + + #expect(urlRequest.value(forHTTPHeaderField: "Host") == nil) + #expect(urlRequest.value(forHTTPHeaderField: "Connection") == nil) + #expect(urlRequest.value(forHTTPHeaderField: "Transfer-Encoding") == nil) + #expect(urlRequest.value(forHTTPHeaderField: "Keep-Alive") == nil) + #expect(urlRequest.value(forHTTPHeaderField: "Proxy-Connection") == nil) + #expect(urlRequest.value(forHTTPHeaderField: "Accept") == "application/json") + } + + // MARK: - header(_:) + + @Test("header returns nil for missing header") + func headerReturnsNilForMissing() { + let request = ParsedHTTPRequest.complete( + method: "GET", + target: "/", + httpVersion: "HTTP/1.1", + headers: ["Accept": "text/html"], + body: nil + ) + + #expect(request.header("Authorization") == nil) + } + + @Test("header is case-insensitive") + func headerCaseInsensitive() { + let request = ParsedHTTPRequest.complete( + method: "GET", + target: "/", + httpVersion: "HTTP/1.1", + headers: ["X-Custom-Header": "value123"], + body: nil + ) + + #expect(request.header("x-custom-header") == "value123") + #expect(request.header("X-CUSTOM-HEADER") == "value123") + } + + // MARK: - Partial vs Complete + + @Test("partial request has no body") + func partialHasNoBody() { + let request = ParsedHTTPRequest.partial( + method: "POST", + target: "/wp/v2/posts", + httpVersion: "HTTP/1.1", + headers: ["Content-Type": "application/json"] + ) + + #expect(!request.isComplete) + #expect(request.body == nil) + #expect(request.method == "POST") + #expect(request.target == "/wp/v2/posts") + } + + @Test("complete request without body") + func completeWithoutBody() { + let request = ParsedHTTPRequest.complete( + method: "GET", + target: "/wp/v2/settings", + httpVersion: "HTTP/1.1", + headers: [:], + body: nil + ) + + #expect(request.isComplete) + #expect(request.body == nil) + } + + // MARK: - urlRequest Edge Cases + + @Test("urlRequest returns nil for malformed target") + func urlRequestReturnsNilForMalformedTarget() { + let request = ParsedHTTPRequest.complete( + method: "GET", + target: "://not a valid url", + httpVersion: "HTTP/1.1", + headers: [:], + body: nil + ) + + let baseURL = URL(string: "https://example.com")! + #expect(request.urlRequest(relativeTo: baseURL) == nil) + } + + // MARK: - Proxy-Authorization is stripped, Authorization passes through + + @Test("urlRequest strips Proxy-Authorization header (proxy token)") + func urlRequestStripsProxyAuthorizationHeader() { + let request = ParsedHTTPRequest.complete( + method: "GET", + target: "/wp/v2/posts", + httpVersion: "HTTP/1.1", + headers: [ + "Proxy-Authorization": "Bearer secret-proxy-token", + "Authorization": "Basic dXNlcjpwYXNz", + "Accept": "application/json", + ], + body: nil + ) + + let baseURL = URL(string: "https://example.com")! + let urlRequest = request.urlRequest(relativeTo: baseURL)! + + #expect(urlRequest.value(forHTTPHeaderField: "Proxy-Authorization") == nil) + #expect(urlRequest.value(forHTTPHeaderField: "Authorization") == "Basic dXNlcjpwYXNz") + #expect(urlRequest.value(forHTTPHeaderField: "Accept") == "application/json") + } + + @Test("urlRequest strips lowercase proxy-authorization header") + func urlRequestStripsLowercaseProxyAuthorizationHeader() { + let request = ParsedHTTPRequest.complete( + method: "GET", + target: "/wp/v2/posts", + httpVersion: "HTTP/1.1", + headers: [ + "proxy-authorization": "Bearer secret-proxy-token", + "Accept": "application/json", + ], + body: nil + ) + + let baseURL = URL(string: "https://example.com")! + let urlRequest = request.urlRequest(relativeTo: baseURL)! + + #expect(urlRequest.value(forHTTPHeaderField: "proxy-authorization") == nil) + #expect(urlRequest.value(forHTTPHeaderField: "Accept") == "application/json") + } + + // MARK: - Fix #2: Case-insensitive Connection header for hop-by-hop extension + + @Test("urlRequest strips headers listed in lowercase connection header") + func urlRequestStripsHeadersFromLowercaseConnectionHeader() { + let request = ParsedHTTPRequest.complete( + method: "GET", + target: "/wp/v2/posts", + httpVersion: "HTTP/1.1", + headers: [ + "connection": "X-Custom, close", + "X-Custom": "should-be-stripped", + "Accept": "application/json", + ], + body: nil + ) + + let baseURL = URL(string: "https://example.com")! + let urlRequest = request.urlRequest(relativeTo: baseURL)! + + #expect(urlRequest.value(forHTTPHeaderField: "X-Custom") == nil) + #expect(urlRequest.value(forHTTPHeaderField: "connection") == nil) + #expect(urlRequest.value(forHTTPHeaderField: "Accept") == "application/json") + } + + @Test("urlRequest strips headers listed in mixed-case CONNECTION header") + func urlRequestStripsHeadersFromMixedCaseConnectionHeader() { + let request = ParsedHTTPRequest.complete( + method: "GET", + target: "/wp/v2/posts", + httpVersion: "HTTP/1.1", + headers: [ + "CONNECTION": "X-Private", + "X-Private": "should-be-stripped", + "Accept": "text/html", + ], + body: nil + ) + + let baseURL = URL(string: "https://example.com")! + let urlRequest = request.urlRequest(relativeTo: baseURL)! + + #expect(urlRequest.value(forHTTPHeaderField: "X-Private") == nil) + #expect(urlRequest.value(forHTTPHeaderField: "Accept") == "text/html") + } +} diff --git a/ios/Tests/GutenbergKitHTTPTests/RFC7230ConformanceTests.swift b/ios/Tests/GutenbergKitHTTPTests/RFC7230ConformanceTests.swift new file mode 100644 index 000000000..579eead96 --- /dev/null +++ b/ios/Tests/GutenbergKitHTTPTests/RFC7230ConformanceTests.swift @@ -0,0 +1,373 @@ +import Foundation +import Testing +@testable import GutenbergKitHTTP + +@Suite("RFC 7230 Conformance") +struct RFC7230ConformanceTests { + + // MARK: - Section 3.5 (Message Parsing Robustness) + + @Test("RFC 7230 §3.5: single leading CRLF before request line is ignored") + func singleLeadingCRLFIsIgnored() throws { + // RFC 7230 §3.5: server SHOULD ignore at least one leading CRLF. + let parser = HTTPRequestParser("\r\nGET /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.method == "GET") + #expect(request.target == "/wp/v2/posts") + #expect(request.header("Host") == "localhost") + } + + @Test("RFC 7230 §3.5: multiple leading CRLFs before request line are ignored") + func multipleLeadingCRLFsAreIgnored() throws { + // RFC 7230 §3.5: server SHOULD ignore at least one leading CRLF. + let parser = HTTPRequestParser("\r\n\r\nGET /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.method == "GET") + #expect(request.target == "/wp/v2/posts") + #expect(request.header("Host") == "localhost") + } + + // MARK: - Section 3.2 (Header Fields) + + @Test("RFC 7230 §3.2.4: whitespace between field-name and colon is rejected") + func whitespaceBetweenFieldNameAndColon() { + // RFC 7230 §3.2.4: "No whitespace is allowed between the header field-name + // and colon." This is a request smuggling vector — the parser returns the + // specific .whitespaceBeforeColon error rather than the generic .invalidFieldName. + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nHost : localhost\r\nX-WP-Nonce : abc123\r\n\r\n") + + #expect(throws: HTTPRequestParseError.whitespaceBeforeColon) { + try parser.parseRequest() + } + } + + @Test("RFC 7230 §3.2.4: obs-fold continuation line is rejected") + func obsFoldContinuationLineRejected() { + // RFC 7230 says server MUST reject with 400 or replace obs-fold with SP. + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nX-WP-Custom: value1\r\n continued-value\r\nHost: localhost\r\n\r\n") + + #expect(throws: HTTPRequestParseError.obsFoldDetected) { + try parser.parseRequest() + } + } + + @Test("RFC 7230 §3.2.4: obs-fold with tab continuation is rejected") + func obsFoldTabContinuationRejected() { + // Tab-prefixed continuation lines are rejected per RFC 7230 §3.2.4. + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nAuthorization: Bearer\r\n\ttok123\r\nHost: localhost\r\n\r\n") + + #expect(throws: HTTPRequestParseError.obsFoldDetected) { + try parser.parseRequest() + } + } + + @Test("RFC 7230 §3.2: field-name is a token — preserved without normalization") + func fieldNameTokenPreservedVerbatim() throws { + // Header field names should be treated as opaque tokens + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nX-WP-Nonce: abc\r\nX_Underscore_Header: val\r\nX-123-Numeric: num\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.headers["X-WP-Nonce"] == "abc") + #expect(request.headers["X_Underscore_Header"] == "val") + #expect(request.headers["X-123-Numeric"] == "num") + } + + // MARK: - Section 3.3 (Message Body) + + @Test("RFC 7230 §3.3.3: Transfer-Encoding: chunked is rejected") + func transferEncodingChunkedRejected() { + let body = #"{"title":"Test"}"# + let raw = "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nTransfer-Encoding: chunked\r\nContent-Length: \(body.utf8.count)\r\n\r\n\(body)" + let parser = HTTPRequestParser(raw) + + #expect(throws: HTTPRequestParseError.unsupportedTransferEncoding) { + try parser.parseRequest() + } + } + + @Test("RFC 7230 §3.3.3: Transfer-Encoding without Content-Length is rejected") + func transferEncodingWithoutContentLengthRejected() { + let raw = "POST /wp/v2/posts HTTP/1.1\r\nTransfer-Encoding: chunked\r\nHost: localhost\r\n\r\n" + let parser = HTTPRequestParser(raw) + + #expect(throws: HTTPRequestParseError.unsupportedTransferEncoding) { + try parser.parseRequest() + } + } + + @Test("RFC 7230 §3.3.3: Transfer-Encoding: identity is also rejected") + func transferEncodingIdentityRejected() { + // Even `identity` is rejected — this server only supports Content-Length framing. + let raw = "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nTransfer-Encoding: identity\r\nContent-Length: 5\r\n\r\nhello" + let parser = HTTPRequestParser(raw) + + #expect(throws: HTTPRequestParseError.unsupportedTransferEncoding) { + try parser.parseRequest() + } + } + + @Test("RFC 7230 §3.3.3: Transfer-Encoding with mixed case is rejected") + func transferEncodingMixedCaseRejected() { + let raw = "POST /wp/v2/posts HTTP/1.1\r\nTRANSFER-ENCODING: chunked\r\nHost: localhost\r\n\r\n" + let parser = HTTPRequestParser(raw) + + #expect(throws: HTTPRequestParseError.unsupportedTransferEncoding) { + try parser.parseRequest() + } + } + + @Test("RFC 7230 §3.3.3: duplicate identical Content-Length values — first value used by scanner") + func duplicateIdenticalContentLength() throws { + // RFC says recipient MUST reject or consolidate. Our parser's scanContentLength + // finds the first match and the header dict overwrites with the last. + let body = #"{"id":1}"# + let raw = "POST /wp/v2/posts HTTP/1.1\r\nContent-Length: \(body.utf8.count)\r\nContent-Length: \(body.utf8.count)\r\nHost: localhost\r\n\r\n\(body)" + let parser = HTTPRequestParser(raw) + + #expect(parser.state.isComplete) + let request = try #require(try parser.parseRequest()) + let requestBody = try #require(request.body) + #expect(try readAll(requestBody) == Data(body.utf8)) + } + + @Test("RFC 7230 §3.3.3: conflicting Content-Length values are rejected as invalid") + func conflictingContentLengthRejected() { + // RFC says conflicting Content-Length is unrecoverable error (MUST reject). + let parser = HTTPRequestParser("POST /wp/v2/posts HTTP/1.1\r\nContent-Length: 100\r\nContent-Length: 5\r\nHost: localhost\r\n\r\nhello") + + #expect(throws: HTTPRequestParseError.conflictingContentLength) { + try parser.parseRequest() + } + } + + @Test("RFC 7230 §3.3.3 step 6: request with no Content-Length and no Transfer-Encoding has zero-length body") + func noContentLengthNoTransferEncodingMeansZeroBody() throws { + let parser = HTTPRequestParser("DELETE /wp/v2/posts/42?force=true HTTP/1.1\r\nHost: localhost\r\nAuthorization: Bearer tok\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(parser.state.isComplete) + #expect(request.body == nil) + } + + @Test("RFC 7230 §3.3: Content-Length: 0 is explicitly zero-length body") + func contentLengthZeroIsExplicitNoBody() throws { + let parser = HTTPRequestParser("POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Length: 0\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(parser.state.isComplete) + #expect(request.body == nil) + #expect(parser.expectedBodyLength == 0) + } + + // MARK: - Section 3.3.3 (Message Body Length — Incremental) + + @Test("RFC 7230 §3.3.3: body arriving after headers in separate chunk") + func bodyArrivesInSeparateChunk() throws { + let body = #"{"title":"Hello","status":"publish"}"# + let headers = "POST /wp/v2/posts HTTP/1.1\r\nContent-Length: \(body.utf8.count)\r\nHost: localhost\r\n\r\n" + + let parser = HTTPRequestParser() + parser.append(Data(headers.utf8)) + #expect(parser.state.hasHeaders) + #expect(!parser.state.isComplete) + + parser.append(Data(body.utf8)) + #expect(parser.state.isComplete) + + let request = try #require(try parser.parseRequest()) + let requestBody = try #require(request.body) + #expect(try readAll(requestBody) == Data(body.utf8)) + } + + @Test("RFC 7230 §3.4: incomplete body — fewer bytes than Content-Length") + func incompleteBodyFewerBytesThanContentLength() throws { + let parser = HTTPRequestParser() + parser.append(Data("POST /wp/v2/media HTTP/1.1\r\nHost: localhost\r\nContent-Length: 500\r\n\r\n".utf8)) + parser.append(Data("partial data".utf8)) + + #expect(parser.state.hasHeaders) + #expect(!parser.state.isComplete) + + let request = try #require(try parser.parseRequest()) + #expect(!request.isComplete) + #expect(request.method == "POST") + #expect(request.target == "/wp/v2/media") + } + + // MARK: - Section 5.3 (Request Target) + + @Test("RFC 7230 §5.3.1: origin-form with empty query string") + func originFormEmptyQuery() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts? HTTP/1.1\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.target == "/wp/v2/posts?") + } + + @Test("RFC 7230 §5.3.1: origin-form must start with /") + func originFormStartsWithSlash() { + // Relative path without leading slash — rejected per RFC 9112 §3.2 + let parser = HTTPRequestParser("GET wp/v2/posts HTTP/1.1\r\nHost: localhost\r\n\r\n") + + #expect(throws: HTTPRequestParseError.self) { + try parser.parseRequest() + } + } + + @Test("RFC 7230 §5.3.2: absolute-form with HTTPS scheme") + func absoluteFormHTTPS() throws { + let parser = HTTPRequestParser("GET https://example.com/wp/v2/posts HTTP/1.1\r\nHost: example.com\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.target == "https://example.com/wp/v2/posts") + } + + @Test("RFC 7230 §5.3.2: absolute-form with userinfo in authority") + func absoluteFormWithUserinfo() throws { + let parser = HTTPRequestParser("GET http://admin:pass@example.com/wp/v2/posts HTTP/1.1\r\nHost: example.com\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.target == "http://admin:pass@example.com/wp/v2/posts") + } + + @Test("RFC 7230 §5.3.3: authority-form with port") + func authorityFormWithPort() throws { + let parser = HTTPRequestParser("CONNECT wordpress.org:8443 HTTP/1.1\r\nHost: wordpress.org:8443\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.method == "CONNECT") + #expect(request.target == "wordpress.org:8443") + } + + @Test("RFC 7230 §5.3.4: asterisk-form for server-wide OPTIONS") + func asteriskFormServerWideOptions() throws { + let parser = HTTPRequestParser("OPTIONS * HTTP/1.1\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.method == "OPTIONS") + #expect(request.target == "*") + } + + // MARK: - Section 5.4 (Host) + + @Test("RFC 7230 §5.4: request without Host header is rejected in HTTP/1.1") + func requestWithoutHostRejected() { + // RFC 9110 §7.2: server MUST respond 400 if Host is missing in HTTP/1.1. + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nAccept: application/json\r\n\r\n") + + #expect(throws: HTTPRequestParseError.missingHostHeader) { + try parser.parseRequest() + } + } + + @Test("RFC 7230 §5.4: Host header with port") + func hostHeaderWithPort() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nHost: localhost:8080\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Host") == "localhost:8080") + } + + @Test("RFC 7230 §5.4: empty Host header is accepted") + func emptyHostHeaderAccepted() throws { + // RFC allows empty Host for requests to root origin server + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nHost:\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Host") == "") + } + + // MARK: - Section 3.1.1 (Request Line Edge Cases) + + @Test("RFC 7230 §3.1.1: request line with extra spaces between components is rejected") + func requestLineExtraSpaces() { + // RFC 9112 §3: request-line = method SP request-target SP HTTP-version + // Double space produces an empty target which fails validation. + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\n\r\n") + + #expect(throws: HTTPRequestParseError.invalidHTTPVersion) { + try parser.parseRequest() + } + } + + @Test("RFC 7230 §3.1.1: request with very long method token") + func veryLongMethodToken() throws { + let longMethod = String(repeating: "X", count: 100) + let parser = HTTPRequestParser("\(longMethod) /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.method == longMethod) + } + + // MARK: - Section 3 (General Message Format) + + @Test("RFC 7230 §3: only CRLF (no headers, no request line content) is needsMoreData") + func onlyCRLFIsNeedsMoreData() throws { + // Leading CRLFs are stripped per RFC 7230 §3.5, leaving no data — needsMoreData. + let parser = HTTPRequestParser("\r\n\r\n") + + #expect(!parser.state.hasHeaders) + #expect(try parser.parseRequest() == nil) + } + + @Test("RFC 7230 §3: request with many headers") + func requestWithManyHeaders() throws { + var raw = "GET /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\n" + for i in 0..<50 { + raw += "X-WP-Header-\(i): value-\(i)\r\n" + } + raw += "\r\n" + + let parser = HTTPRequestParser(raw) + let request = try #require(try parser.parseRequest()) + + #expect(parser.state.isComplete) + #expect(request.headers.count == 51) // 50 X-WP-Header + Host + #expect(request.header("X-WP-Header-0") == "value-0") + #expect(request.header("X-WP-Header-49") == "value-49") + } + + // MARK: - Header Count Limit + + @Test("rejects requests with more than 100 header field lines") + func tooManyHeaders() { + // 1 Host + 100 X-Headers = 101 total header lines → rejected + var raw = "GET / HTTP/1.1\r\nHost: localhost\r\n" + for i in 0..<100 { + raw += "X-Header-\(i): value\r\n" + } + raw += "\r\n" + let parser = HTTPRequestParser(raw) + + #expect(throws: HTTPRequestParseError.tooManyHeaders) { + try parser.parseRequest() + } + } + + @Test("accepts requests with exactly 100 header field lines") + func maxHeadersAccepted() throws { + // 1 Host + 99 X-Headers = 100 total header lines → accepted + var raw = "GET / HTTP/1.1\r\nHost: localhost\r\n" + for i in 0..<99 { + raw += "X-Header-\(i): value\r\n" + } + raw += "\r\n" + let parser = HTTPRequestParser(raw) + let request = try #require(try parser.parseRequest()) + + #expect(request.method == "GET") + } + + @Test("RFC 7230 §3: header value with all printable ASCII characters") + func headerValueAllPrintableASCII() throws { + // Field values can contain any VCHAR (0x21-0x7E) plus SP and HTAB. + // Leading/trailing whitespace in the value is stripped by the parser (OWS trimming). + let printable = "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nX-WP-Test: \(printable)\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("X-WP-Test") == printable) + } +} diff --git a/ios/Tests/GutenbergKitHTTPTests/RFC7578ConformanceTests.swift b/ios/Tests/GutenbergKitHTTPTests/RFC7578ConformanceTests.swift new file mode 100644 index 000000000..2bd84db0f --- /dev/null +++ b/ios/Tests/GutenbergKitHTTPTests/RFC7578ConformanceTests.swift @@ -0,0 +1,694 @@ +import Foundation +import Testing +@testable import GutenbergKitHTTP + +/// Tests multipart/form-data parsing per RFC 7578. +@Suite("RFC 7578 Conformance") +struct RFC7578ConformanceTests { + + // MARK: - Content-Type / Boundary Extraction + + @Test("RFC 7578 §4.1: Content-Type with boundary parameter is preserved") + func contentTypeWithBoundary() throws { + let request = try parse(fields: [("field1", nil, nil, "value1")], boundary: "AaB03x") + + #expect(request.header("Content-Type") == "multipart/form-data; boundary=AaB03x") + } + + @Test("RFC 7578 §4.1: Content-Type with quoted boundary extracts correctly") + func contentTypeWithQuotedBoundary() throws { + let boundary = "----WebKitFormBoundary7MA4YWxk" + let body = "--\(boundary)\r\nContent-Disposition: form-data; name=\"field1\"\r\n\r\nvalue1\r\n--\(boundary)--\r\n" + let raw = "POST /wp/v2/media HTTP/1.1\r\nHost: localhost\r\nContent-Type: multipart/form-data; boundary=\"\(boundary)\"\r\nContent-Length: \(body.utf8.count)\r\n\r\n\(body)" + let parser = HTTPRequestParser(raw) + let request = try #require(try parser.parseRequest()) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "field1") + } + + @Test("RFC 7578 §4.1: non-multipart Content-Type throws notMultipartFormData") + func nonMultipartContentTypeThrows() throws { + let body = #"{"title":"Test"}"# + let raw = "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Type: application/json\r\nContent-Length: \(body.utf8.count)\r\n\r\n\(body)" + let parser = HTTPRequestParser(raw) + let request = try #require(try parser.parseRequest()) + + #expect(throws: MultipartParseError.notMultipartFormData) { + try request.multipartParts() + } + } + + @Test("RFC 7578 §4.1: missing boundary parameter throws notMultipartFormData") + func missingBoundaryThrows() throws { + let body = "some data" + let raw = "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Type: multipart/form-data\r\nContent-Length: \(body.utf8.count)\r\n\r\n\(body)" + let parser = HTTPRequestParser(raw) + let request = try #require(try parser.parseRequest()) + + #expect(throws: MultipartParseError.notMultipartFormData) { + try request.multipartParts() + } + } + + // MARK: - Single Text Field + + @Test("RFC 7578 §4.2: single text field parsed correctly") + func singleTextField() throws { + let request = try parse(fields: [("title", nil, nil, "My Blog Post")], boundary: "AaB03x") + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "title") + #expect(parts[0].filename == nil) + #expect(parts[0].contentType == "text/plain") + #expect(try readAll(parts[0].body) == Data("My Blog Post".utf8)) + } + + @Test("RFC 7578 §4.2: field with empty value") + func fieldWithEmptyValue() throws { + let request = try parse(fields: [("excerpt", nil, nil, "")], boundary: "AaB03x") + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "excerpt") + #expect(try readAll(parts[0].body) == Data()) + } + + // MARK: - Multiple Fields + + @Test("RFC 7578 §4.2: multiple text fields in order") + func multipleTextFields() throws { + let request = try parse(fields: [ + ("title", nil, nil, "My Post"), + ("status", nil, nil, "publish"), + ("content", nil, nil, "

Hello world

"), + ], boundary: "AaB03x") + let parts = try request.multipartParts() + + #expect(parts.count == 3) + #expect(parts[0].name == "title") + #expect(try readAll(parts[0].body) == Data("My Post".utf8)) + #expect(parts[1].name == "status") + #expect(try readAll(parts[1].body) == Data("publish".utf8)) + #expect(parts[2].name == "content") + #expect(try readAll(parts[2].body) == Data("

Hello world

".utf8)) + } + + // MARK: - File Upload + + @Test("RFC 7578 §4.2: file upload with filename and content-type") + func fileUploadWithFilename() throws { + let fileContent = "Hello, this is a test file." + let request = try parse(fields: [ + ("file", "test.txt", "text/plain", fileContent), + ], boundary: "AaB03x") + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "file") + #expect(parts[0].filename == "test.txt") + #expect(parts[0].contentType == "text/plain") + #expect(try readAll(parts[0].body) == Data(fileContent.utf8)) + } + + @Test("RFC 7578 §4.2: file upload with application/octet-stream") + func fileUploadOctetStream() throws { + let request = try parse(fields: [ + ("upload", "data.bin", "application/octet-stream", "binary-content"), + ], boundary: "boundary42") + let parts = try request.multipartParts() + + #expect(parts[0].filename == "data.bin") + #expect(parts[0].contentType == "application/octet-stream") + } + + @Test("RFC 7578 §4.4: part without Content-Type defaults to text/plain") + func partWithoutContentTypeDefaultsToTextPlain() throws { + let request = try parse(fields: [("field", nil, nil, "value")], boundary: "AaB03x") + let parts = try request.multipartParts() + + #expect(parts[0].contentType == "text/plain") + } + + // MARK: - Mixed Fields and Files + + @Test("RFC 7578 §4.2: form with text fields and file upload") + func formWithTextAndFile() throws { + let request = try parse(fields: [ + ("title", nil, nil, "My Image Post"), + ("file", "image.jpg", "image/jpeg", "JFIF-binary-data"), + ], boundary: "AaB03x") + let parts = try request.multipartParts() + + #expect(parts.count == 2) + #expect(parts[0].name == "title") + #expect(parts[0].filename == nil) + #expect(parts[1].name == "file") + #expect(parts[1].filename == "image.jpg") + #expect(parts[1].contentType == "image/jpeg") + } + + // MARK: - Section 5.1 (Multiple Files for One Field) + + @Test("RFC 7578 §5.1: multiple files with same field name in separate parts") + func multipleFilesWithSameFieldName() throws { + let request = try parse(fields: [ + ("documents", "file1.txt", "text/plain", "First file"), + ("documents", "file2.txt", "text/plain", "Second file"), + ], boundary: "AaB03x") + let parts = try request.multipartParts() + + #expect(parts.count == 2) + #expect(parts[0].name == "documents") + #expect(parts[0].filename == "file1.txt") + #expect(try readAll(parts[0].body) == Data("First file".utf8)) + #expect(parts[1].name == "documents") + #expect(parts[1].filename == "file2.txt") + #expect(try readAll(parts[1].body) == Data("Second file".utf8)) + } + + // MARK: - Section 5.1.2 (Filenames with Special Characters) + + @Test("RFC 7578 §5.1.2: filename with spaces") + func filenameWithSpaces() throws { + let request = try parse(fields: [ + ("file", "my document.pdf", "application/pdf", "pdf-data"), + ], boundary: "AaB03x") + let parts = try request.multipartParts() + + #expect(parts[0].filename == "my document.pdf") + } + + @Test("RFC 7578 §5.1.2: filename with percent-encoded UTF-8") + func filenameWithPercentEncodedUTF8() throws { + let request = try parse(fields: [ + ("file", "caf%C3%A9.txt", "text/plain", "data"), + ], boundary: "AaB03x") + let parts = try request.multipartParts() + + // The parser preserves the raw filename — percent-decoding is the caller's concern + #expect(parts[0].filename == "caf%C3%A9.txt") + } + + @Test("RFC 7578 §5.1.2: filename with direct UTF-8 encoding") + func filenameWithDirectUTF8() throws { + let request = try parse(fields: [ + ("file", "café.txt", "text/plain", "data"), + ], boundary: "AaB03x") + let parts = try request.multipartParts() + + #expect(parts[0].filename == "café.txt") + } + + // MARK: - Section 5.1.3 (_charset_ Field) + + @Test("RFC 7578 §5.1.3: _charset_ field is parsed as a normal field") + func charsetFieldParsed() throws { + let request = try parse(fields: [ + ("_charset_", nil, nil, "UTF-8"), + ("title", nil, nil, "My Post"), + ], boundary: "AaB03x") + let parts = try request.multipartParts() + + #expect(parts.count == 2) + #expect(parts[0].name == "_charset_") + #expect(try readAll(parts[0].body) == Data("UTF-8".utf8)) + } + + // MARK: - Part Content-Type Variations + + @Test("RFC 7578 §4.4: part with charset parameter in Content-Type") + func partWithCharsetParameter() throws { + let boundary = "AaB03x" + let body = "--\(boundary)\r\nContent-Disposition: form-data; name=\"bio\"\r\nContent-Type: text/plain; charset=UTF-8\r\n\r\nHello world\r\n--\(boundary)--\r\n" + let request = try parseRaw(body: body, boundary: boundary) + let parts = try request.multipartParts() + + #expect(parts[0].contentType == "text/plain; charset=UTF-8") + } + + // MARK: - Boundary Edge Cases + + @Test("RFC 7578: boundary with hyphens (common browser format)") + func boundaryWithHyphens() throws { + let request = try parse( + fields: [("title", nil, nil, "test")], + boundary: "----WebKitFormBoundary7MA4YWxkTrZu0gW" + ) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "title") + } + + @Test("RFC 7578: long boundary (70 characters)") + func longBoundary() throws { + let request = try parse( + fields: [("f", nil, nil, "v")], + boundary: String(repeating: "x", count: 70) + ) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + } + + @Test("RFC 7578: body content resembling boundary is not split") + func bodyContentResemblingBoundary() throws { + let boundary = "AaB03x" + let body = "--\(boundary)\r\nContent-Disposition: form-data; name=\"data\"\r\n\r\n--AaB03 not a boundary\r\n--\(boundary)--\r\n" + let request = try parseRaw(body: body, boundary: boundary) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(try readAll(parts[0].body) == Data("--AaB03 not a boundary".utf8)) + } + + // MARK: - Error Cases + + @Test("RFC 7578: part missing Content-Disposition throws error") + func missingContentDisposition() throws { + let boundary = "AaB03x" + let body = "--\(boundary)\r\nContent-Type: text/plain\r\n\r\nvalue\r\n--\(boundary)--\r\n" + let request = try parseRaw(body: body, boundary: boundary) + + #expect(throws: MultipartParseError.missingContentDisposition) { + try request.multipartParts() + } + } + + @Test("RFC 7578: part missing name parameter throws error") + func missingNameParameter() throws { + let boundary = "AaB03x" + let body = "--\(boundary)\r\nContent-Disposition: form-data\r\n\r\nvalue\r\n--\(boundary)--\r\n" + let request = try parseRaw(body: body, boundary: boundary) + + #expect(throws: MultipartParseError.missingNameParameter) { + try request.multipartParts() + } + } + + @Test("RFC 7578: malformed body throws error") + func malformedBody() throws { + let boundary = "AaB03x" + let body = "this is not multipart at all" + let request = try parseRaw(body: body, boundary: boundary) + + #expect(throws: MultipartParseError.malformedBody) { + try request.multipartParts() + } + } + + @Test("RFC 7578: incomplete request throws missingBody") + func incompleteRequestThrowsMissingBody() throws { + let parser = HTTPRequestParser("POST /wp/v2/media HTTP/1.1\r\nHost: localhost\r\nContent-Type: multipart/form-data; boundary=AaB03x\r\nContent-Length: 1000\r\n\r\npartial") + let request = try #require(try parser.parseRequest()) + + // Request is partial — body not fully received + #expect(!request.isComplete) + #expect(throws: MultipartParseError.missingBody) { + try request.multipartParts() + } + } + + // MARK: - Incremental Arrival + + @Test("RFC 7578: multipart body arriving incrementally parses correctly") + func multipartBodyArrivingIncrementally() throws { + let boundary = "AaB03x" + let body = "--\(boundary)\r\nContent-Disposition: form-data; name=\"title\"\r\n\r\nMy Post\r\n--\(boundary)\r\nContent-Disposition: form-data; name=\"content\"\r\n\r\n

Hello

\r\n--\(boundary)--\r\n" + let headers = "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Type: multipart/form-data; boundary=\(boundary)\r\nContent-Length: \(body.utf8.count)\r\n\r\n" + + let parser = HTTPRequestParser() + parser.append(Data(headers.utf8)) + + let bodyData = Data(body.utf8) + let midpoint = bodyData.count / 2 + parser.append(bodyData[0..Hello

".utf8)) + } + + // MARK: - Large Bodies + + @Test("RFC 7578: large part body is captured completely") + func largePartBody() throws { + let largeContent = String(repeating: "x", count: 10000) + let request = try parse(fields: [("content", nil, nil, largeContent)], boundary: "AaB03x") + let parts = try request.multipartParts() + + #expect(try readAll(parts[0].body) == Data(largeContent.utf8)) + } + + // MARK: - Body Content Edge Cases + + @Test("RFC 7578: binary data (non-UTF-8 bytes) in part body") + func binaryDataInPartBody() throws { + // PNG file signature bytes (includes 0x0D 0x0A which is CRLF) + let binaryBytes: [UInt8] = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A] + let binaryData = Data(binaryBytes) + let boundary = "AaB03x" + + var bodyData = Data() + bodyData.append(Data("--\(boundary)\r\nContent-Disposition: form-data; name=\"file\"; filename=\"image.png\"\r\nContent-Type: image/png\r\n\r\n".utf8)) + bodyData.append(binaryData) + bodyData.append(Data("\r\n--\(boundary)--\r\n".utf8)) + + let raw = "POST /wp/v2/media HTTP/1.1\r\nHost: localhost\r\nContent-Type: multipart/form-data; boundary=\(boundary)\r\nContent-Length: \(bodyData.count)\r\n\r\n" + + let parser = HTTPRequestParser() + parser.append(Data(raw.utf8)) + parser.append(bodyData) + + let request = try #require(try parser.parseRequest()) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "file") + #expect(parts[0].filename == "image.png") + #expect(try readAll(parts[0].body) == binaryData) + } + + @Test("RFC 7578: part body containing CRLF sequences") + func partBodyContainingCRLF() throws { + let boundary = "AaB03x" + let body = "--\(boundary)\r\nContent-Disposition: form-data; name=\"content\"\r\n\r\nline1\r\nline2\r\nline3\r\n--\(boundary)--\r\n" + let request = try parseRaw(body: body, boundary: boundary) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(try readAll(parts[0].body) == Data("line1\r\nline2\r\nline3".utf8)) + } + + @Test("RFC 7578: part body containing text resembling a different closing delimiter") + func partBodyResemblingOtherClosingDelimiter() throws { + let boundary = "AaB03x" + let body = "--\(boundary)\r\nContent-Disposition: form-data; name=\"data\"\r\n\r\nsome --other-- text\r\n--\(boundary)--\r\n" + let request = try parseRaw(body: body, boundary: boundary) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(try readAll(parts[0].body) == Data("some --other-- text".utf8)) + } + + @Test("RFC 2046: empty multipart body with only close delimiter throws malformedBody") + func emptyMultipartBodyThrows() throws { + let boundary = "AaB03x" + // RFC 2046 requires at least one body part + let body = "--\(boundary)--\r\n" + let request = try parseRaw(body: body, boundary: boundary) + + #expect(throws: MultipartParseError.malformedBody) { + try request.multipartParts() + } + } + + // MARK: - Header Edge Cases + + @Test("RFC 7578: Content-Disposition with extra whitespace around parameters") + func contentDispositionExtraWhitespace() throws { + let boundary = "AaB03x" + let body = "--\(boundary)\r\nContent-Disposition: form-data; name=\"field1\"\r\n\r\nvalue1\r\n--\(boundary)--\r\n" + let request = try parseRaw(body: body, boundary: boundary) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "field1") + } + + @Test("RFC 7578: Content-Disposition name with escaped quotes") + func contentDispositionEscapedQuotes() throws { + let boundary = "AaB03x" + let body = "--\(boundary)\r\nContent-Disposition: form-data; name=\"field\\\"name\"\r\n\r\nvalue\r\n--\(boundary)--\r\n" + let request = try parseRaw(body: body, boundary: boundary) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "field\"name") + } + + @Test("RFC 7578: additional part headers beyond Content-Disposition and Content-Type are ignored") + func additionalPartHeadersIgnored() throws { + let boundary = "AaB03x" + let body = "--\(boundary)\r\nContent-Disposition: form-data; name=\"file\"; filename=\"test.txt\"\r\nContent-Type: text/plain\r\nContent-Transfer-Encoding: binary\r\nX-Custom-Header: custom-value\r\n\r\nfile content\r\n--\(boundary)--\r\n" + let request = try parseRaw(body: body, boundary: boundary) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "file") + #expect(parts[0].contentType == "text/plain") + #expect(try readAll(parts[0].body) == Data("file content".utf8)) + } + + @Test("RFC 7578: case-insensitive Content-Disposition header name") + func caseInsensitiveContentDisposition() throws { + let boundary = "AaB03x" + let body = "--\(boundary)\r\ncontent-disposition: form-data; name=\"field1\"\r\n\r\nvalue1\r\n--\(boundary)--\r\n" + let request = try parseRaw(body: body, boundary: boundary) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "field1") + } + + @Test("RFC 7578: case-insensitive form-data token in Content-Disposition") + func caseInsensitiveFormData() throws { + let boundary = "AaB03x" + let body = "--\(boundary)\r\nContent-Disposition: FORM-DATA; name=\"field1\"\r\n\r\nvalue1\r\n--\(boundary)--\r\n" + let request = try parseRaw(body: body, boundary: boundary) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "field1") + } + + // MARK: - Boundary Edge Cases + + @Test("RFC 7578 §4.2: name= inside another parameter's quoted value is not matched") + func nameInsideQuotedValueNotMatched() throws { + let boundary = "AaB03x" + let body = "--\(boundary)\r\nContent-Disposition: form-data; dummy=\"name=evil\"; name=\"real\"\r\n\r\nvalue\r\n--\(boundary)--\r\n" + let raw = "POST /upload HTTP/1.1\r\nHost: localhost\r\nContent-Type: multipart/form-data; boundary=\(boundary)\r\nContent-Length: \(body.utf8.count)\r\n\r\n\(body)" + let parser = HTTPRequestParser(raw) + let request = try #require(try parser.parseRequest()) + let parts = try request.multipartParts() + + #expect(parts[0].name == "real") + } + + @Test("RFC 2045 §5.1: quoted boundary with backslash-escaped single-quote") + func quotedBoundaryWithEscapedSingleQuote() throws { + let unescapedBoundary = "abc'def" + let body = "--\(unescapedBoundary)\r\nContent-Disposition: form-data; name=\"field\"\r\n\r\nvalue\r\n--\(unescapedBoundary)--\r\n" + let raw = "POST /upload HTTP/1.1\r\nHost: localhost\r\nContent-Type: multipart/form-data; boundary=\"abc\\'def\"\r\nContent-Length: \(body.utf8.count)\r\n\r\n\(body)" + let parser = HTTPRequestParser(raw) + let request = try #require(try parser.parseRequest()) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "field") + } + + @Test("RFC 2045 §5.1: boundary= inside another parameter's quoted value is not matched") + func boundaryInsideQuotedParameterNotMatched() throws { + let realBoundary = "RealBoundary123" + let body = "--\(realBoundary)\r\nContent-Disposition: form-data; name=\"field\"\r\n\r\nvalue\r\n--\(realBoundary)--\r\n" + let raw = "POST /upload HTTP/1.1\r\nHost: localhost\r\nContent-Type: multipart/form-data; charset=\"boundary=fake\"; boundary=\(realBoundary)\r\nContent-Length: \(body.utf8.count)\r\n\r\n\(body)" + let parser = HTTPRequestParser(raw) + let request = try #require(try parser.parseRequest()) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "field") + } + + @Test("RFC 7578: boundary with special characters (plus, equals, slash)") + func boundaryWithSpecialCharacters() throws { + let boundary = "abc+def/ghi=123" + let request = try parse(fields: [("field", nil, nil, "value")], boundary: boundary) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "field") + } + + @Test("RFC 2046: preamble before first boundary is ignored") + func preambleBeforeFirstBoundaryIgnored() throws { + let boundary = "AaB03x" + let body = "This is the preamble. It should be ignored.\r\n--\(boundary)\r\nContent-Disposition: form-data; name=\"field1\"\r\n\r\nvalue1\r\n--\(boundary)--\r\n" + let request = try parseRaw(body: body, boundary: boundary) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "field1") + #expect(try readAll(parts[0].body) == Data("value1".utf8)) + } + + @Test("RFC 2046: epilogue after closing boundary is ignored") + func epilogueAfterClosingBoundaryIgnored() throws { + let boundary = "AaB03x" + let body = "--\(boundary)\r\nContent-Disposition: form-data; name=\"field1\"\r\n\r\nvalue1\r\n--\(boundary)--\r\nThis is the epilogue. It should be ignored.\r\n" + let request = try parseRaw(body: body, boundary: boundary) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "field1") + #expect(try readAll(parts[0].body) == Data("value1".utf8)) + } + + @Test("RFC 2046 §5.1.1: transport padding between parts does not corrupt body data") + func transportPaddingBetweenPartsDoesNotCorruptBody() throws { + let boundary = "AaB03x" + // Transport padding (spaces) after the boundary delimiter, between two parts. + // RFC 2046 §5.1.1: delimiter = CRLF "--" boundary *( SP / HTAB ) CRLF + // The parser should strip the padding so it doesn't end up in part bodies. + let body = "--\(boundary) \r\nContent-Disposition: form-data; name=\"field1\"\r\n\r\nvalue1\r\n--\(boundary) \r\nContent-Disposition: form-data; name=\"field2\"\r\n\r\nvalue2\r\n--\(boundary)--\r\n" + let request = try parseRaw(body: body, boundary: boundary) + let parts = try request.multipartParts() + + #expect(parts.count == 2) + #expect(parts[0].name == "field1") + // The body should be exactly "value1", not "value1" with trailing padding artifacts. + #expect(try readAll(parts[0].body) == Data("value1".utf8)) + #expect(parts[1].name == "field2") + #expect(try readAll(parts[1].body) == Data("value2".utf8)) + } + + @Test("RFC 2046 §5.1.1: transport padding (tabs) after boundary does not corrupt headers") + func transportPaddingTabsDoNotCorruptHeaders() throws { + let boundary = "AaB03x" + // Tabs after the boundary delimiter before the CRLF + let body = "--\(boundary)\t\t\r\nContent-Disposition: form-data; name=\"field1\"\r\n\r\nvalue1\r\n--\(boundary)--\r\n" + let request = try parseRaw(body: body, boundary: boundary) + let parts = try request.multipartParts() + + #expect(parts.count == 1) + // The name should be "field1" — padding should not cause header parsing to break. + #expect(parts[0].name == "field1") + #expect(try readAll(parts[0].body) == Data("value1".utf8)) + } + + // MARK: - WordPress / Real-World Scenarios + + @Test("WordPress: media upload with file and metadata fields") + func wordPressMediaUpload() throws { + let request = try parse(fields: [ + ("title", nil, nil, "My Featured Image"), + ("alt_text", nil, nil, "A beautiful sunset over the mountains"), + ("caption", nil, nil, "Photo taken at Yosemite National Park"), + ("description", nil, nil, "Full resolution sunset photo"), + ("file", "sunset.jpg", "image/jpeg", "JFIF-binary-data-here"), + ], boundary: "----WebKitFormBoundary7MA4YWxk") + let parts = try request.multipartParts() + + #expect(parts.count == 5) + #expect(parts[0].name == "title") + #expect(try readAll(parts[0].body) == Data("My Featured Image".utf8)) + #expect(parts[1].name == "alt_text") + #expect(try readAll(parts[1].body) == Data("A beautiful sunset over the mountains".utf8)) + #expect(parts[2].name == "caption") + #expect(parts[3].name == "description") + #expect(parts[4].name == "file") + #expect(parts[4].filename == "sunset.jpg") + #expect(parts[4].contentType == "image/jpeg") + } + + @Test("WordPress: multiple image uploads in a single request") + func multipleImageUploads() throws { + let request = try parse(fields: [ + ("files", "photo1.jpg", "image/jpeg", "jpeg-data-1"), + ("files", "photo2.png", "image/png", "png-data-2"), + ("files", "photo3.gif", "image/gif", "gif-data-3"), + ], boundary: "----WebKitFormBoundary9876") + let parts = try request.multipartParts() + + #expect(parts.count == 3) + #expect(parts[0].filename == "photo1.jpg") + #expect(parts[0].contentType == "image/jpeg") + #expect(parts[1].filename == "photo2.png") + #expect(parts[1].contentType == "image/png") + #expect(parts[2].filename == "photo3.gif") + #expect(parts[2].contentType == "image/gif") + #expect(try readAll(parts[0].body) == Data("jpeg-data-1".utf8)) + #expect(try readAll(parts[1].body) == Data("png-data-2".utf8)) + #expect(try readAll(parts[2].body) == Data("gif-data-3".utf8)) + } + + @Test("WordPress: file upload with zero-byte body (empty file)") + func emptyFileUpload() throws { + let request = try parse(fields: [ + ("file", "empty.txt", "text/plain", ""), + ], boundary: "AaB03x") + let parts = try request.multipartParts() + + #expect(parts.count == 1) + #expect(parts[0].name == "file") + #expect(parts[0].filename == "empty.txt") + #expect(try readAll(parts[0].body) == Data()) + } + + // MARK: - Part Count Limit + + @Test("rejects multipart body with more than 100 parts") + func tooManyParts() throws { + var fields: [(name: String, filename: String?, contentType: String?, value: String)] = [] + for i in 0..<101 { + fields.append(("field\(i)", nil, nil, "value\(i)")) + } + let request = try parse(fields: fields, boundary: "AaB03x") + + #expect(throws: MultipartParseError.tooManyParts) { + try request.multipartParts() + } + } + + @Test("accepts multipart body with exactly 100 parts") + func maxPartsAccepted() throws { + var fields: [(name: String, filename: String?, contentType: String?, value: String)] = [] + for i in 0..<100 { + fields.append(("field\(i)", nil, nil, "value\(i)")) + } + let request = try parse(fields: fields, boundary: "AaB03x") + let parts = try request.multipartParts() + + #expect(parts.count == 100) + } + + // MARK: - Helpers + + /// Builds a multipart/form-data request from field descriptors and parses it. + private func parse( + fields: [(name: String, filename: String?, contentType: String?, value: String)], + boundary: String + ) throws -> ParsedHTTPRequest { + var bodyParts: [String] = [] + for field in fields { + var partHeaders = "Content-Disposition: form-data; name=\"\(field.name)\"" + if let filename = field.filename { + partHeaders += "; filename=\"\(filename)\"" + } + if let ct = field.contentType { + partHeaders += "\r\nContent-Type: \(ct)" + } + bodyParts.append("--\(boundary)\r\n\(partHeaders)\r\n\r\n\(field.value)") + } + let body = bodyParts.joined(separator: "\r\n") + "\r\n--\(boundary)--\r\n" + + return try parseRaw(body: body, boundary: boundary) + } + + private func parseRaw(body: String, boundary: String) throws -> ParsedHTTPRequest { + let raw = "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Type: multipart/form-data; boundary=\(boundary)\r\nContent-Length: \(body.utf8.count)\r\n\r\n\(body)" + let parser = HTTPRequestParser(raw) + return try #require(try parser.parseRequest()) + } +} diff --git a/ios/Tests/GutenbergKitHTTPTests/RFC8941ConformanceTests.swift b/ios/Tests/GutenbergKitHTTPTests/RFC8941ConformanceTests.swift new file mode 100644 index 000000000..1df36f6db --- /dev/null +++ b/ios/Tests/GutenbergKitHTTPTests/RFC8941ConformanceTests.swift @@ -0,0 +1,423 @@ +import Foundation +import Testing +@testable import GutenbergKitHTTP + +/// Tests that the HTTP parser correctly preserves RFC 8941 Structured Field Values. +/// +/// Our parser treats field values as opaque strings — it does not parse structured +/// fields internally. These tests verify that all RFC 8941 syntax constructs pass +/// through the parser without being mangled, truncated, or misinterpreted. +@Suite("RFC 8941 Conformance") +struct RFC8941ConformanceTests { + + // MARK: - Section 3.1 (Lists) + + @Test("RFC 8941 §3.1: simple list of tokens") + func simpleListOfTokens() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-List: sugar, tea, rum\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-List") == "sugar, tea, rum") + } + + @Test("RFC 8941 §3.1: list with parameters on members") + func listWithParameters() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-List: abc;a=1;b=2, cde_456\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-List") == "abc;a=1;b=2, cde_456") + } + + @Test("RFC 8941 §3.1: empty list is represented by absent header") + func emptyListAbsentHeader() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-List") == nil) + } + + @Test("RFC 8941 §3.1: list with inner lists") + func listWithInnerLists() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-List: (\"foo\" \"bar\"), (\"baz\"), (\"bat\" \"one\"), ()\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-List") == "(\"foo\" \"bar\"), (\"baz\"), (\"bat\" \"one\"), ()") + } + + @Test("RFC 8941 §3.1: list with parameterised inner lists") + func listWithParameterisedInnerLists() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-List: (\"foo\";a=1;b=2);lvl=5, (\"bar\" \"baz\");lvl=1\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-List") == "(\"foo\";a=1;b=2);lvl=5, (\"bar\" \"baz\");lvl=1") + } + + @Test("RFC 8941 §3.1: list spread across multiple header lines is combined") + func listSpreadAcrossMultipleHeaderLines() throws { + // RFC 8941 notes that list-based fields can be split across multiple lines + // and combined per RFC 9110 §5.3 + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-List: sugar, tea\r\nExample-List: rum\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-List") == "sugar, tea, rum") + } + + // MARK: - Section 3.2 (Dictionaries) + + @Test("RFC 8941 §3.2: simple dictionary") + func simpleDictionary() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Dict: en=\"Applepie\", da=:w4teleAA=:\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Dict") == "en=\"Applepie\", da=:w4teleAA=:") + } + + @Test("RFC 8941 §3.2: dictionary with boolean true values (value omitted)") + func dictionaryWithBooleanTrueOmitted() throws { + // When a dictionary value is boolean true, the =?1 is omitted + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Dict: a=?0, b, c; foo=bar\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Dict") == "a=?0, b, c; foo=bar") + } + + @Test("RFC 8941 §3.2: dictionary with inner list values") + func dictionaryWithInnerListValues() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Dict: rating=1.5, feelings=(joy sadness)\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Dict") == "rating=1.5, feelings=(joy sadness)") + } + + @Test("RFC 8941 §3.2: dictionary with parameters on members") + func dictionaryWithParametersOnMembers() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Dict: abc=123;a=1;b=2, def=456, ghi=789;q=9;r=\"+w\"\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Dict") == "abc=123;a=1;b=2, def=456, ghi=789;q=9;r=\"+w\"") + } + + @Test("RFC 8941 §3.2: dictionary spread across multiple header lines") + func dictionarySpreadAcrossMultipleHeaderLines() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Dict: a=1, b=2\r\nExample-Dict: c=3\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Dict") == "a=1, b=2, c=3") + } + + // MARK: - Section 3.3 (Items) + + @Test("RFC 8941 §3.3: item with parameters") + func itemWithParameters() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Item: 5;foo=bar\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Item") == "5;foo=bar") + } + + // MARK: - Section 3.3.1 (Integers) + + @Test("RFC 8941 §3.3.1: positive integer") + func positiveInteger() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Integer: 42\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Integer") == "42") + } + + @Test("RFC 8941 §3.3.1: negative integer") + func negativeInteger() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Integer: -42\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Integer") == "-42") + } + + @Test("RFC 8941 §3.3.1: zero") + func zeroInteger() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Integer: 0\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Integer") == "0") + } + + @Test("RFC 8941 §3.3.1: maximum 15-digit integer") + func maximumInteger() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Integer: 999999999999999\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Integer") == "999999999999999") + } + + @Test("RFC 8941 §3.3.1: minimum 15-digit negative integer") + func minimumInteger() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Integer: -999999999999999\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Integer") == "-999999999999999") + } + + // MARK: - Section 3.3.2 (Decimals) + + @Test("RFC 8941 §3.3.2: simple decimal") + func simpleDecimal() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Decimal: 4.5\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Decimal") == "4.5") + } + + @Test("RFC 8941 §3.3.2: negative decimal") + func negativeDecimal() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Decimal: -3.14\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Decimal") == "-3.14") + } + + @Test("RFC 8941 §3.3.2: decimal with three fractional digits (maximum precision)") + func decimalMaxPrecision() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Decimal: 123456789012.123\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Decimal") == "123456789012.123") + } + + // MARK: - Section 3.3.3 (Strings) + + @Test("RFC 8941 §3.3.3: simple string") + func simpleString() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-String: \"hello world\"\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-String") == "\"hello world\"") + } + + @Test("RFC 8941 §3.3.3: string with escaped backslash") + func stringWithEscapedBackslash() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-String: \"path\\\\to\\\\file\"\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-String") == "\"path\\\\to\\\\file\"") + } + + @Test("RFC 8941 §3.3.3: string with escaped double quote") + func stringWithEscapedQuote() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-String: \"she said \\\"hi\\\"\"\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-String") == "\"she said \\\"hi\\\"\"") + } + + @Test("RFC 8941 §3.3.3: empty string") + func emptyString() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-String: \"\"\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-String") == "\"\"") + } + + @Test("RFC 8941 §3.3.3: string with special printable ASCII characters") + func stringWithSpecialASCII() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-String: \"!#$%&'()*+,-./:;<=>?@[]^_`{|}~\"\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-String") == "\"!#$%&'()*+,-./:;<=>?@[]^_`{|}~\"") + } + + // MARK: - Section 3.3.4 (Tokens) + + @Test("RFC 8941 §3.3.4: simple token") + func simpleToken() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Token: foo123\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Token") == "foo123") + } + + @Test("RFC 8941 §3.3.4: token starting with asterisk") + func tokenStartingWithAsterisk() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Token: *foo\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Token") == "*foo") + } + + @Test("RFC 8941 §3.3.4: token with colon and slash") + func tokenWithColonAndSlash() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Token: foo/bar:baz\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Token") == "foo/bar:baz") + } + + @Test("RFC 8941 §3.3.4: token with tchar characters") + func tokenWithTcharCharacters() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Token: application/x-www-form-urlencoded\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Token") == "application/x-www-form-urlencoded") + } + + // MARK: - Section 3.3.5 (Byte Sequences) + + @Test("RFC 8941 §3.3.5: base64-encoded byte sequence") + func base64ByteSequence() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-ByteSeq: :cHJldGVuZCB0aGlzIGlzIGJpbmFyeS8=:\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-ByteSeq") == ":cHJldGVuZCB0aGlzIGlzIGJpbmFyeS8=:") + } + + @Test("RFC 8941 §3.3.5: empty byte sequence") + func emptyByteSequence() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-ByteSeq: ::\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-ByteSeq") == "::") + } + + @Test("RFC 8941 §3.3.5: byte sequence colon delimiters are not confused with header field syntax") + func byteSequenceColonsNotConfusedWithFieldSyntax() throws { + // The colons in :base64: must not be misinterpreted by the header parser + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-ByteSeq: :AQID:\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-ByteSeq") == ":AQID:") + } + + // MARK: - Section 3.3.6 (Booleans) + + @Test("RFC 8941 §3.3.6: boolean true") + func booleanTrue() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Boolean: ?1\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Boolean") == "?1") + } + + @Test("RFC 8941 §3.3.6: boolean false") + func booleanFalse() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Boolean: ?0\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Boolean") == "?0") + } + + // MARK: - Section 3.1.2 (Parameters) + + @Test("RFC 8941 §3.1.2: parameters with various value types") + func parametersWithVariousTypes() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Param: token;str=\"val\";int=42;dec=1.5;bool=?1;bin=:AQID:\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Param") == "token;str=\"val\";int=42;dec=1.5;bool=?1;bin=:AQID:") + } + + @Test("RFC 8941 §3.1.2: boolean true parameter with value omitted") + func booleanTrueParameterOmitted() throws { + // Serialised form of a boolean true parameter omits =?1 + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Item: 1; a; b=?0\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Item") == "1; a; b=?0") + } + + @Test("RFC 8941 §3.1.2: multiple parameters with same key — last wins") + func duplicateParameterKeysLastWins() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Item: token;a=1;a=2\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + // Parser preserves the raw value — structured field parsing is application-level + #expect(request.header("Example-Item") == "token;a=1;a=2") + } + + @Test("RFC 8941 §3.1.2: parameter keys use lowercase and special characters") + func parameterKeysLowercaseAndSpecial() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Item: token;*key=1;a-b=2;c.d=3;e_f=4\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Item") == "token;*key=1;a-b=2;c.d=3;e_f=4") + } + + // MARK: - Section 3.1.1 (Inner Lists) + + @Test("RFC 8941 §3.1.1: inner list with parameters on items and list") + func innerListWithParametersOnItemsAndList() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-List: (\"foo\";a=1 \"bar\";b=2);lvl=5\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-List") == "(\"foo\";a=1 \"bar\";b=2);lvl=5") + } + + @Test("RFC 8941 §3.1.1: empty inner list") + func emptyInnerList() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-List: ()\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-List") == "()") + } + + @Test("RFC 8941 §3.1.1: inner list with mixed item types") + func innerListWithMixedTypes() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-List: (token 42 3.14 \"string\" ?1 :AQID:)\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-List") == "(token 42 3.14 \"string\" ?1 :AQID:)") + } + + // MARK: - Complex / Real-World Structured Headers + + @Test("RFC 8941: Priority header (RFC 9218) uses structured dictionary") + func priorityHeader() throws { + // Priority is a real-world structured dictionary header + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nPriority: u=3, i\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Priority") == "u=3, i") + } + + @Test("RFC 8941: complex nested structure with all types") + func complexNestedStructure() throws { + let value = "a=(1 2.0 \"three\");q=0.9, b=:AQID:;flag, c=token;*key=?0" + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Complex: \(value)\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Complex") == value) + } + + @Test("RFC 8941: value with semicolons is not split by parser") + func semicolonsNotSplitByParser() throws { + // Semicolons in structured field values must not be misinterpreted + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Item: token;a=1;b=2;c=3\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + // The entire value including semicolons must be preserved as one string + #expect(request.header("Example-Item") == "token;a=1;b=2;c=3") + } + + @Test("RFC 8941: value with parentheses is not misinterpreted") + func parenthesesNotMisinterpreted() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-List: (a b c), (d e)\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-List") == "(a b c), (d e)") + } + + @Test("RFC 8941: value with equals signs is not misinterpreted") + func equalsSignsNotMisinterpreted() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Dict: a=1, b=\"hello=world\", c=:YQ==:\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + // Equals signs in strings and base64 must be preserved + #expect(request.header("Example-Dict") == "a=1, b=\"hello=world\", c=:YQ==:") + } + + @Test("RFC 8941: value with question marks is not misinterpreted") + func questionMarksNotMisinterpreted() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nExample-Dict: enabled=?1, disabled=?0\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + #expect(request.header("Example-Dict") == "enabled=?1, disabled=?0") + } +} diff --git a/ios/Tests/GutenbergKitHTTPTests/RFC9110ConformanceTests.swift b/ios/Tests/GutenbergKitHTTPTests/RFC9110ConformanceTests.swift new file mode 100644 index 000000000..4e7a2f1b7 --- /dev/null +++ b/ios/Tests/GutenbergKitHTTPTests/RFC9110ConformanceTests.swift @@ -0,0 +1,235 @@ +import Foundation +import Testing +@testable import GutenbergKitHTTP + +#if canImport(Network) +import Network +#endif + +/// Tests that require platform-specific APIs (URLRequest conversion, response +/// serialization, server behavior) or conditional logic that cannot be expressed +/// in the shared JSON fixture format. All pure parse-input → expected-output +/// tests have been migrated to test-fixtures/http/request-parsing.json. +@Suite("RFC 9110 Conformance") +struct RFC9110ConformanceTests { + + // MARK: - Section 5.3 (Internal Dict Representation) + + @Test("RFC 9110 §5.3: duplicate headers preserve first occurrence's key casing") + func duplicateHeadersPreserveFirstKeyCasing() throws { + let parser = HTTPRequestParser("GET /wp/v2/posts HTTP/1.1\r\nX-Custom: first\r\nx-custom: second\r\nHost: localhost\r\n\r\n") + let request = try #require(try parser.parseRequest()) + + // The dict key should use the casing from the first occurrence + #expect(request.headers["X-Custom"] == "first, second") + #expect(request.headers["x-custom"] == nil) + } + + // MARK: - Section 7.1 (Edge Cases) + + @Test("RFC 9110 §7.1: request target with empty path") + func requestTargetWithEmptyPath() throws { + // An empty request-target should be rejected or treated as "/" + let parser = HTTPRequestParser("GET HTTP/1.1\r\nHost: localhost\r\n\r\n") + let request = try? parser.parseRequest() + + // With split(maxSplits: 2) on "GET HTTP/1.1", this splits to ["GET", "", "HTTP/1.1"] + // The target becomes "" + if let request { + #expect(request.method == "GET") + } + } + + // MARK: - Section 15 (Security - Request Splitting / Smuggling) + + @Test("RFC 9110 §15.6: LF in header value should not cause request splitting") + func lfInHeaderValueNoRequestSplitting() throws { + // A bare LF in a header value within a CRLF-terminated message + // The parser should not split on bare LF + let raw = "GET /wp/v2/posts HTTP/1.1\r\nX-Test: val\nue\r\nHost: localhost\r\n\r\n" + let parser = HTTPRequestParser(raw) + let request = try? parser.parseRequest() + + // Whether it parses or not, it shouldn't create a request smuggling vector + if let request { + #expect(request.header("Host") == "localhost") + } + } + + // MARK: - Response Serialization + + @Test("RFC 9110 §15.5.2: 401 response with WWW-Authenticate header serializes correctly") + func wwwAuthenticateHeaderOn401() { + let response = HTTPResponse( + status: 401, + headers: [("WWW-Authenticate", "Bearer")] + ) + let serialized = String(data: response.serialized(), encoding: .utf8)! + + #expect(serialized.hasPrefix("HTTP/1.1 401 Unauthorized\r\n")) + #expect(serialized.contains("WWW-Authenticate: Bearer\r\n")) + } + + // MARK: - Section 15.5.9 (408 Request Timeout) + + #if canImport(Network) + @Test("RFC 9110 §15.5.9: server sends 408 before closing on read timeout", .disabled("HTTPServer does not yet send 408 on idle timeout — needs NWConnection write support"), .timeLimit(.minutes(1))) + func serverSends408OnReadTimeout() async throws { + // Per RFC 9110 §15.5.9, a server that decides to close an idle connection + // SHOULD send a 408 (Request Timeout) response. + let server = try await HTTPServer.start( + name: "timeout-test", + port: nil, + requiresAuthentication: false, + readTimeout: .milliseconds(500) + ) { _ in + HTTPResponse(status: 200) + } + defer { server.stop() } + + // Connect and immediately read — don't send any request data. + // URLSession won't work here (it always sends a request), so use raw sockets. + let fd = socket(AF_INET, SOCK_STREAM, 0) + #expect(fd >= 0, "Failed to create socket") + defer { close(fd) } + + var addr = sockaddr_in() + addr.sin_family = sa_family_t(AF_INET) + addr.sin_port = server.port.bigEndian + addr.sin_addr.s_addr = inet_addr("127.0.0.1") + + let connectResult = withUnsafePointer(to: &addr) { + $0.withMemoryRebound(to: sockaddr.self, capacity: 1) { + connect(fd, $0, socklen_t(MemoryLayout.size)) + } + } + #expect(connectResult == 0, "Failed to connect to server") + + // Set a 5-second read timeout so we don't block forever. + var timeout = timeval(tv_sec: 5, tv_usec: 0) + setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, socklen_t(MemoryLayout.size)) + + // Wait for the server to respond (it should send 408 within its 500ms timeout). + var buffer = [UInt8](repeating: 0, count: 4096) + let bytesRead = recv(fd, &buffer, buffer.count, 0) + + // The server should have sent a 408 response, not just closed silently. + #expect(bytesRead > 0, "Server closed connection without sending a response") + + if bytesRead > 0 { + let response = String(bytes: buffer[...allocate(capacity: 10) + defer { buffer.deallocate() } + + // First read gets all bytes + let n1 = stream.read(buffer, maxLength: 10) + #expect(n1 == 2) + + // Second read returns 0 (at end) + let n2 = stream.read(buffer, maxLength: 10) + #expect(n2 == 0) + + stream.close() + } + + @Test("fileSlice init throws for missing file") + func fileSliceThrowsForMissingFile() { + let url = FileManager.default.temporaryDirectory + .appendingPathComponent("nonexistent-\(UUID().uuidString)") + let body = RequestBody(fileURL: url, offset: 0, length: 10) + + #expect(throws: Error.self) { + _ = try body.makeInputStream() + } + } + + @Test("fileSlice stream with zero length returns no data") + func fileSliceZeroLength() throws { + let contents = Data("ABCDE".utf8) + let url = makeTemporaryFile(contents: contents) + defer { try? FileManager.default.removeItem(at: url) } + + let body = RequestBody(fileURL: url, offset: 2, length: 0) + + let stream = try body.makeInputStream() + #expect(readAll(stream) == Data()) + } + + @Test("fileSlice stream does not read beyond slice boundary") + func fileSliceDoesNotReadBeyondBoundary() throws { + let contents = Data("ABCDEFGHIJ".utf8) + let url = makeTemporaryFile(contents: contents) + defer { try? FileManager.default.removeItem(at: url) } + + // Slice is "CDE" (offset 2, length 3) — must not return "FGHIJ" + let body = RequestBody(fileURL: url, offset: 2, length: 3) + + let stream = try body.makeInputStream() + let result = readAll(stream) + #expect(result == Data("CDE".utf8)) + #expect(result.count == 3) + } + + @Test("fileSlice stream with binary data preserves all bytes") + func fileSliceBinaryData() throws { + // All byte values 0x00-0xFF + let contents = Data(0...255) + let url = makeTemporaryFile(contents: contents) + defer { try? FileManager.default.removeItem(at: url) } + + let body = RequestBody(fileURL: url, offset: 100, length: 50) + + let stream = try body.makeInputStream() + let result = readAll(stream) + #expect(result == Data(100..<150)) + } + + @Test("multiple fileSlice streams from same file read independently") + func fileSliceMultipleStreamsIndependent() throws { + let contents = Data("ABCDEFGHIJKLMNOP".utf8) + let url = makeTemporaryFile(contents: contents) + defer { try? FileManager.default.removeItem(at: url) } + + let body1 = RequestBody(fileURL: url, offset: 0, length: 4) + let body2 = RequestBody(fileURL: url, offset: 8, length: 4) + + let stream1 = try body1.makeInputStream() + let stream2 = try body2.makeInputStream() + + #expect(readAll(stream1) == Data("ABCD".utf8)) + #expect(readAll(stream2) == Data("IJKL".utf8)) + } + + // MARK: - Equatable + + @Test("data-backed bodies with same data are equal") + func dataEquality() { + let data = Data("same".utf8) + #expect(RequestBody(data: data) == RequestBody(data: data)) + } + + @Test("data-backed bodies with different data are not equal") + func dataInequality() { + #expect(RequestBody(data: Data("a".utf8)) != RequestBody(data: Data("b".utf8))) + } + + @Test("file-backed bodies with same URL are equal") + func fileEquality() { + let url = URL(fileURLWithPath: "/tmp/same-file") + #expect(RequestBody(fileURL: url) == RequestBody(fileURL: url)) + } + + @Test("data-backed and file-backed bodies are not equal") + func dataVsFileInequality() { + let data = Data("hello".utf8) + let url = URL(fileURLWithPath: "/tmp/hello") + #expect(RequestBody(data: data) != RequestBody(fileURL: url)) + } + + // MARK: - URLSession integration (file-slice via httpBodyStream) + + #if canImport(Network) + @Test("fileSlice body sent via URLSession httpBodyStream delivers correct bytes") + func fileSliceBodyStreamWorksWithURLSession() async throws { + // Write a known payload to a temp file + let payload = Data("The quick brown fox jumps over the lazy dog".utf8) + let fileURL = makeTemporaryFile(contents: payload) + defer { try? FileManager.default.removeItem(at: fileURL) } + + // Slice out "brown fox" (offset 10, length 9) + let expectedSlice = Data("brown fox".utf8) + let body = RequestBody(fileURL: fileURL, offset: 10, length: 9) + + // Start an echo server that returns the request body as the response + let server = try await HTTPServer.start( + name: "body-echo", + requiresAuthentication: false + ) { request in + guard let body = request.parsed.body, + let data = try? await body.data else { + return HTTPResponse(status: 200, body: Data()) + } + return HTTPResponse(status: 200, body: data) + } + defer { server.stop() } + + // Build a URLRequest using the same code path as the proxy: + // this assigns body.makeInputStream() to request.httpBodyStream. + let baseURL = URL(string: "http://127.0.0.1:\(server.port)")! + let parsed = ParsedHTTPRequest.complete( + method: "POST", + target: "/echo", + httpVersion: "HTTP/1.1", + headers: ["Host": "localhost", "Content-Length": "9"], + body: body + ) + var request = try #require(parsed.urlRequest(relativeTo: baseURL)) + request.setValue("application/octet-stream", forHTTPHeaderField: "Content-Type") + + let (responseData, response) = try await URLSession.shared.data(for: request) + let http = try #require(response as? HTTPURLResponse) + + #expect(http.statusCode == 200) + // This assertion would fail with the old FileSliceInputStream subclass + // because URLSession reads from the empty Data() superclass instead of + // the overridden read(_:maxLength:) — resulting in an empty body. + #expect(responseData == expectedSlice) + } + #endif + + // MARK: - Helpers + + private func makeTemporaryFile(contents: Data) -> URL { + let url = FileManager.default.temporaryDirectory + .appendingPathComponent("RequestBodyTests-\(UUID().uuidString)") + FileManager.default.createFile(atPath: url.path, contents: contents) + return url + } + + private func readAll(_ stream: InputStream) -> Data { + readAllWithBufferSize(stream, bufferSize: 1024) + } + + private func readAllWithBufferSize(_ stream: InputStream, bufferSize: Int) -> Data { + stream.open() + defer { stream.close() } + + var data = Data() + let buffer = UnsafeMutablePointer.allocate(capacity: bufferSize) + defer { buffer.deallocate() } + + // Use read() directly instead of hasBytesAvailable to avoid race + // conditions with bound stream pairs, where data from the writer + // thread may not have arrived yet when hasBytesAvailable is checked. + while true { + let bytesRead = stream.read(buffer, maxLength: bufferSize) + if bytesRead <= 0 { break } + data.append(buffer, count: bytesRead) + } + + return data + } +} diff --git a/ios/Tests/GutenbergKitHTTPTests/TestHelpers.swift b/ios/Tests/GutenbergKitHTTPTests/TestHelpers.swift new file mode 100644 index 000000000..27396fc78 --- /dev/null +++ b/ios/Tests/GutenbergKitHTTPTests/TestHelpers.swift @@ -0,0 +1,26 @@ +import Foundation +@testable import GutenbergKitHTTP + +/// Reads the full contents of a `RequestBody` by streaming through its `InputStream`. +/// +/// Shared across test files that need to verify body data. This version uses +/// `hasBytesAvailable`, which is correct for data-backed and file-backed streams. +/// For bound stream pairs, use a `while true` loop instead (see `ChunkedMultipartTests`). +func readAll(_ body: RequestBody) throws -> Data { + let stream = try body.makeInputStream() + stream.open() + defer { stream.close() } + + var data = Data() + let bufferSize = 1024 + let buffer = UnsafeMutablePointer.allocate(capacity: bufferSize) + defer { buffer.deallocate() } + + while stream.hasBytesAvailable { + let bytesRead = stream.read(buffer, maxLength: bufferSize) + guard bytesRead > 0 else { break } + data.append(buffer, count: bytesRead) + } + + return data +} diff --git a/test-fixtures/http/header-value-parsing.json b/test-fixtures/http/header-value-parsing.json new file mode 100644 index 000000000..30692bacc --- /dev/null +++ b/test-fixtures/http/header-value-parsing.json @@ -0,0 +1,125 @@ +{ + "description": "Header value parameter extraction tests. Given a parameter name and a header value string, extract the parameter's value.", + "tests": [ + { + "description": "Extracts unquoted parameter value", + "parameter": "boundary", + "headerValue": "multipart/form-data; boundary=AaB03x", + "expected": "AaB03x" + }, + { + "description": "Unquoted value terminated by semicolon", + "parameter": "boundary", + "headerValue": "multipart/form-data; boundary=AaB03x; charset=utf-8", + "expected": "AaB03x" + }, + { + "description": "Unquoted value at end of string", + "parameter": "charset", + "headerValue": "text/plain; charset=utf-8", + "expected": "utf-8" + }, + { + "description": "Extracts quoted parameter value", + "parameter": "name", + "headerValue": "form-data; name=\"field1\"", + "expected": "field1" + }, + { + "description": "Empty quoted value", + "parameter": "name", + "headerValue": "form-data; name=\"\"", + "expected": "" + }, + { + "description": "Quoted value with spaces", + "parameter": "name", + "headerValue": "form-data; name=\"my field\"", + "expected": "my field" + }, + { + "description": "Quoted value with escaped quote", + "parameter": "name", + "headerValue": "form-data; name=\"field\\\"name\"", + "expected": "field\"name" + }, + { + "description": "Quoted value with escaped backslash", + "parameter": "filename", + "headerValue": "form-data; filename=\"C:\\\\path\\\\file.txt\"", + "expected": "C:\\path\\file.txt" + }, + { + "description": "Quoted value with escaped single-quote", + "parameter": "boundary", + "headerValue": "multipart/form-data; boundary=\"abc\\'def\"", + "expected": "abc'def" + }, + { + "description": "Parameter name matching is case-insensitive", + "parameter": "Boundary", + "headerValue": "multipart/form-data; boundary=AaB03x", + "expected": "AaB03x" + }, + { + "description": "Parameter inside another quoted value is not matched", + "parameter": "name", + "headerValue": "form-data; dummy=\"name=evil\"; name=\"real\"", + "expected": "real" + }, + { + "description": "boundary= inside quoted value is skipped", + "parameter": "boundary", + "headerValue": "multipart/form-data; charset=\"boundary=fake\"; boundary=RealBoundary", + "expected": "RealBoundary" + }, + { + "description": "Missing parameter returns null", + "parameter": "filename", + "headerValue": "form-data; name=\"field1\"", + "expected": null + }, + { + "description": "Empty header value returns null", + "parameter": "name", + "headerValue": "", + "expected": null + }, + { + "description": "Extracts correct parameter when multiple are present", + "parameter": "filename", + "headerValue": "form-data; name=\"file\"; filename=\"photo.jpg\"", + "expected": "photo.jpg" + }, + { + "description": "Does not match parameter name as substring of another parameter name", + "parameter": "name", + "headerValue": "form-data; filename=\"test.jpg\"; name=\"real\"", + "expected": "real" + }, + { + "description": "Unterminated quoted value returns accumulated content", + "parameter": "name", + "headerValue": "form-data; name=\"unclosed", + "expected": "unclosed" + }, + { + "description": "Quoted value containing semicolons", + "parameter": "name", + "headerValue": "form-data; name=\"a;b;c\"", + "expected": "a;b;c" + }, + { + "description": "No space after semicolon", + "parameter": "name", + "headerValue": "form-data;name=\"field1\"", + "expected": "field1" + }, + { + "description": "Backslash at end of quoted value", + "parameter": "name", + "headerValue": "form-data; name=\"trailing\\", + "expected": "trailing" + } + ] +} diff --git a/test-fixtures/http/multipart-parsing.json b/test-fixtures/http/multipart-parsing.json new file mode 100644 index 000000000..b63e7de7b --- /dev/null +++ b/test-fixtures/http/multipart-parsing.json @@ -0,0 +1,504 @@ +{ + "description": "Multipart/form-data parsing tests (RFC 7578). Given a multipart request body, parse into individual parts with name, filename, contentType, and body.", + "tests": [ + { + "description": "RFC 7578 §4.1: Content-Type with boundary parameter is preserved", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"field1\"\r\n\r\nvalue1\r\n--AaB03x--\r\n", + "expected": { + "contentType": "multipart/form-data; boundary=AaB03x", + "parts": [ + { + "name": "field1" + } + ] + } + }, + { + "description": "RFC 7578 §4.1: Content-Type with quoted boundary extracts correctly", + "boundary": "----WebKitFormBoundary7MA4YWxk", + "quotedBoundary": true, + "rawBody": "------WebKitFormBoundary7MA4YWxk\r\nContent-Disposition: form-data; name=\"field1\"\r\n\r\nvalue1\r\n------WebKitFormBoundary7MA4YWxk--\r\n", + "expected": { + "parts": [ + { + "name": "field1" + } + ] + } + }, + { + "description": "RFC 7578 §4.2: single text field parsed correctly", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"title\"\r\n\r\nMy Blog Post\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "title", + "filename": null, + "contentType": "text/plain", + "body": "My Blog Post" + } + ] + } + }, + { + "description": "RFC 7578 §4.2: field with empty value", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"excerpt\"\r\n\r\n\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "excerpt", + "body": "" + } + ] + } + }, + { + "description": "RFC 7578 §4.2: multiple text fields in order", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"title\"\r\n\r\nMy Post\r\n--AaB03x\r\nContent-Disposition: form-data; name=\"status\"\r\n\r\npublish\r\n--AaB03x\r\nContent-Disposition: form-data; name=\"content\"\r\n\r\n

Hello world

\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "title", + "body": "My Post" + }, + { + "name": "status", + "body": "publish" + }, + { + "name": "content", + "body": "

Hello world

" + } + ] + } + }, + { + "description": "RFC 7578 §4.2: file upload with filename and content-type", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"file\"; filename=\"test.txt\"\r\nContent-Type: text/plain\r\n\r\nHello, this is a test file.\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "file", + "filename": "test.txt", + "contentType": "text/plain", + "body": "Hello, this is a test file." + } + ] + } + }, + { + "description": "RFC 7578 §4.2: file upload with application/octet-stream", + "boundary": "boundary42", + "rawBody": "--boundary42\r\nContent-Disposition: form-data; name=\"upload\"; filename=\"data.bin\"\r\nContent-Type: application/octet-stream\r\n\r\nbinary-content\r\n--boundary42--\r\n", + "expected": { + "parts": [ + { + "name": "upload", + "filename": "data.bin", + "contentType": "application/octet-stream" + } + ] + } + }, + { + "description": "RFC 7578 §4.4: part without Content-Type defaults to text/plain", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"field\"\r\n\r\nvalue\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "field", + "contentType": "text/plain" + } + ] + } + }, + { + "description": "RFC 7578 §4.2: form with text fields and file upload", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"title\"\r\n\r\nMy Image Post\r\n--AaB03x\r\nContent-Disposition: form-data; name=\"file\"; filename=\"image.jpg\"\r\nContent-Type: image/jpeg\r\n\r\nJFIF-binary-data\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "title", + "filename": null + }, + { + "name": "file", + "filename": "image.jpg", + "contentType": "image/jpeg" + } + ] + } + }, + { + "description": "RFC 7578 §5.1: multiple files with same field name in separate parts", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"documents\"; filename=\"file1.txt\"\r\nContent-Type: text/plain\r\n\r\nFirst file\r\n--AaB03x\r\nContent-Disposition: form-data; name=\"documents\"; filename=\"file2.txt\"\r\nContent-Type: text/plain\r\n\r\nSecond file\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "documents", + "filename": "file1.txt", + "body": "First file" + }, + { + "name": "documents", + "filename": "file2.txt", + "body": "Second file" + } + ] + } + }, + { + "description": "RFC 7578 §5.1.2: filename with spaces", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"file\"; filename=\"my document.pdf\"\r\nContent-Type: application/pdf\r\n\r\npdf-data\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "file", + "filename": "my document.pdf" + } + ] + } + }, + { + "description": "RFC 7578 §5.1.2: filename with percent-encoded UTF-8 is preserved", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"file\"; filename=\"caf%C3%A9.txt\"\r\nContent-Type: text/plain\r\n\r\ndata\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "file", + "filename": "caf%C3%A9.txt" + } + ] + } + }, + { + "description": "RFC 7578 §5.1.2: filename with direct UTF-8 encoding", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"file\"; filename=\"café.txt\"\r\nContent-Type: text/plain\r\n\r\ndata\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "file", + "filename": "café.txt" + } + ] + } + }, + { + "description": "RFC 7578 §5.1.3: _charset_ field is parsed as a normal field", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"_charset_\"\r\n\r\nUTF-8\r\n--AaB03x\r\nContent-Disposition: form-data; name=\"title\"\r\n\r\nMy Post\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "_charset_", + "body": "UTF-8" + }, + { + "name": "title", + "body": "My Post" + } + ] + } + }, + { + "description": "RFC 7578: boundary with hyphens (common browser format)", + "boundary": "----WebKitFormBoundary7MA4YWxkTrZu0gW", + "rawBody": "------WebKitFormBoundary7MA4YWxkTrZu0gW\r\nContent-Disposition: form-data; name=\"title\"\r\n\r\ntest\r\n------WebKitFormBoundary7MA4YWxkTrZu0gW--\r\n", + "expected": { + "parts": [ + { + "name": "title" + } + ] + } + }, + { + "description": "RFC 7578: long boundary (70 characters)", + "boundary": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "rawBody": "--xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\r\nContent-Disposition: form-data; name=\"f\"\r\n\r\nv\r\n--xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx--\r\n", + "expected": { + "parts": [ + { + "name": "f" + } + ] + } + }, + { + "description": "RFC 7578: boundary with special characters (plus, equals, slash)", + "boundary": "abc+def/ghi=123", + "rawBody": "--abc+def/ghi=123\r\nContent-Disposition: form-data; name=\"field\"\r\n\r\nvalue\r\n--abc+def/ghi=123--\r\n", + "expected": { + "parts": [ + { + "name": "field" + } + ] + } + }, + { + "description": "WordPress: file upload with zero-byte body (empty file)", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"file\"; filename=\"empty.txt\"\r\nContent-Type: text/plain\r\n\r\n\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "file", + "filename": "empty.txt", + "body": "" + } + ] + } + }, + { + "description": "RFC 7578: body content resembling boundary is not split", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"data\"\r\n\r\n--AaB03 not a boundary\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "data", + "body": "--AaB03 not a boundary" + } + ] + } + }, + { + "description": "RFC 7578 §4.4: part with charset parameter in Content-Type", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"bio\"\r\nContent-Type: text/plain; charset=UTF-8\r\n\r\nHello world\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "bio", + "contentType": "text/plain; charset=UTF-8" + } + ] + } + }, + { + "description": "RFC 7578: Content-Disposition with extra whitespace around parameters", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"field1\"\r\n\r\nvalue1\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "field1" + } + ] + } + }, + { + "description": "RFC 7578: Content-Disposition name with escaped quotes", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"field\\\"name\"\r\n\r\nvalue\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "field\"name" + } + ] + } + }, + { + "description": "RFC 7578: additional part headers beyond Content-Disposition and Content-Type are ignored", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"file\"; filename=\"test.txt\"\r\nContent-Type: text/plain\r\nContent-Transfer-Encoding: binary\r\nX-Custom-Header: custom-value\r\n\r\nfile content\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "file", + "contentType": "text/plain", + "body": "file content" + } + ] + } + }, + { + "description": "RFC 7578: case-insensitive Content-Disposition header name", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\ncontent-disposition: form-data; name=\"field1\"\r\n\r\nvalue1\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "field1" + } + ] + } + }, + { + "description": "RFC 7578: case-insensitive form-data token in Content-Disposition", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: FORM-DATA; name=\"field1\"\r\n\r\nvalue1\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "field1" + } + ] + } + }, + { + "description": "RFC 7578: part body containing CRLF sequences", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"content\"\r\n\r\nline1\r\nline2\r\nline3\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "content", + "body": "line1\r\nline2\r\nline3" + } + ] + } + }, + { + "description": "RFC 7578: part body containing text resembling a different closing delimiter", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"data\"\r\n\r\nsome --other-- text\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "data", + "body": "some --other-- text" + } + ] + } + }, + { + "description": "RFC 2046: preamble before first boundary is ignored", + "boundary": "AaB03x", + "rawBody": "This is the preamble. It should be ignored.\r\n--AaB03x\r\nContent-Disposition: form-data; name=\"field1\"\r\n\r\nvalue1\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "field1", + "body": "value1" + } + ] + } + }, + { + "description": "RFC 2046: epilogue after closing boundary is ignored", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; name=\"field1\"\r\n\r\nvalue1\r\n--AaB03x--\r\nThis is the epilogue. It should be ignored.\r\n", + "expected": { + "parts": [ + { + "name": "field1", + "body": "value1" + } + ] + } + }, + { + "description": "RFC 2046 §5.1.1: transport padding between parts does not corrupt body data", + "boundary": "AaB03x", + "rawBody": "--AaB03x \r\nContent-Disposition: form-data; name=\"field1\"\r\n\r\nvalue1\r\n--AaB03x \r\nContent-Disposition: form-data; name=\"field2\"\r\n\r\nvalue2\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "field1", + "body": "value1" + }, + { + "name": "field2", + "body": "value2" + } + ] + } + }, + { + "description": "RFC 2046 §5.1.1: transport padding (tabs) after boundary does not corrupt headers", + "boundary": "AaB03x", + "rawBody": "--AaB03x\t\t\r\nContent-Disposition: form-data; name=\"field1\"\r\n\r\nvalue1\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "field1", + "body": "value1" + } + ] + } + }, + { + "description": "RFC 7578 §4.2: name= inside another parameter's quoted value is not matched", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data; dummy=\"name=evil\"; name=\"real\"\r\n\r\nvalue\r\n--AaB03x--\r\n", + "expected": { + "parts": [ + { + "name": "real" + } + ] + } + } + ], + "errorTests": [ + { + "description": "RFC 7578 §4.1: non-multipart Content-Type throws notMultipartFormData", + "contentType": "application/json", + "body": "{\"title\":\"Test\"}", + "expected": { + "error": "notMultipartFormData" + } + }, + { + "description": "RFC 7578 §4.1: missing boundary parameter throws notMultipartFormData", + "contentType": "multipart/form-data", + "body": "some data", + "expected": { + "error": "notMultipartFormData" + } + }, + { + "description": "RFC 7578: part missing Content-Disposition throws error", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Type: text/plain\r\n\r\nvalue\r\n--AaB03x--\r\n", + "expected": { + "error": "missingContentDisposition" + } + }, + { + "description": "RFC 7578: part missing name parameter throws error", + "boundary": "AaB03x", + "rawBody": "--AaB03x\r\nContent-Disposition: form-data\r\n\r\nvalue\r\n--AaB03x--\r\n", + "expected": { + "error": "missingNameParameter" + } + }, + { + "description": "RFC 7578: malformed body throws error", + "boundary": "AaB03x", + "rawBody": "this is not multipart at all", + "expected": { + "error": "malformedBody" + } + }, + { + "description": "RFC 2046: empty multipart body with only close delimiter throws malformedBody", + "boundary": "AaB03x", + "rawBody": "--AaB03x--\r\n", + "expected": { + "error": "malformedBody" + } + }, + { + "description": "RFC 7578: multipart request with no body throws missingBody", + "expected": { + "contentType": "multipart/form-data; boundary=AaB03x", + "error": "missingBody" + } + } + ] +} diff --git a/test-fixtures/http/request-parsing.json b/test-fixtures/http/request-parsing.json new file mode 100644 index 000000000..b7601cca9 --- /dev/null +++ b/test-fixtures/http/request-parsing.json @@ -0,0 +1,923 @@ +{ + "description": "HTTP request parsing tests. Given a raw HTTP request string, parse into method, target, headers, and body.", + "tests": [ + { + "description": "Parses a simple GET request", + "input": "GET /wp/v2/posts HTTP/1.1\r\nHost: localhost:8080\r\n\r\n", + "expected": { + "method": "GET", + "target": "/wp/v2/posts", + "headers": { + "Host": "localhost:8080" + }, + "body": null, + "isComplete": true + } + }, + { + "description": "Parses request target with query string", + "input": "GET /wp/v2/posts?per_page=10&status=publish HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "method": "GET", + "target": "/wp/v2/posts?per_page=10&status=publish", + "body": null, + "isComplete": true + } + }, + { + "description": "Parses POST request with JSON body", + "input": "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Type: application/json\r\nContent-Length: 35\r\n\r\n{\"title\":\"Hello\",\"content\":\"World\"}", + "expected": { + "method": "POST", + "target": "/wp/v2/posts", + "headers": { + "Host": "localhost", + "Content-Type": "application/json", + "Content-Length": "35" + }, + "body": "{\"title\":\"Hello\",\"content\":\"World\"}", + "isComplete": true + } + }, + { + "description": "Parses DELETE request", + "input": "DELETE /wp/v2/posts/42 HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "method": "DELETE", + "target": "/wp/v2/posts/42", + "body": null, + "isComplete": true + } + }, + { + "description": "Parses PUT request with body", + "input": "PUT /wp/v2/posts/42 HTTP/1.1\r\nHost: localhost\r\nContent-Length: 19\r\n\r\n{\"title\":\"Updated\"}", + "expected": { + "method": "PUT", + "target": "/wp/v2/posts/42", + "body": "{\"title\":\"Updated\"}", + "isComplete": true + } + }, + { + "description": "Parses multiple headers", + "input": "GET / HTTP/1.1\r\nHost: localhost\r\nAccept: application/json\r\nAuthorization: Bearer token123\r\n\r\n", + "expected": { + "method": "GET", + "target": "/", + "headers": { + "Host": "localhost", + "Accept": "application/json", + "Authorization": "Bearer token123" + }, + "body": null, + "isComplete": true + } + }, + { + "description": "Header lookup is case-insensitive", + "input": "GET / HTTP/1.1\r\nHost: localhost\r\nContent-Type: text/html\r\n\r\n", + "expected": { + "method": "GET", + "target": "/", + "isComplete": true, + "headers": { + "content-type": "text/html", + "CONTENT-TYPE": "text/html", + "Content-Type": "text/html" + } + } + }, + { + "description": "Parses header values containing colons", + "input": "GET / HTTP/1.1\r\nHost: localhost\r\nAuthorization: Basic dXNlcjpwYXNz\r\n\r\n", + "expected": { + "method": "GET", + "target": "/", + "isComplete": true, + "headers": { + "Authorization": "Basic dXNlcjpwYXNz" + } + } + }, + { + "description": "Content-Length with non-standard casing", + "input": "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\ncontent-length: 4\r\n\r\ndata", + "expected": { + "method": "POST", + "body": "data", + "isComplete": true + } + }, + { + "description": "Parses PATCH request", + "input": "PATCH /wp/v2/posts/1 HTTP/1.1\r\nHost: localhost\r\nContent-Length: 19\r\n\r\n{\"title\":\"Patched\"}", + "expected": { + "method": "PATCH", + "target": "/wp/v2/posts/1", + "isComplete": true + } + }, + { + "description": "Parses OPTIONS request", + "input": "OPTIONS /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "method": "OPTIONS", + "body": null, + "isComplete": true + } + }, + { + "description": "Preserves semicolon path parameters in target", + "input": "GET /wp/v2/posts;embed?per_page=5 HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "target": "/wp/v2/posts;embed?per_page=5", + "isComplete": true + } + }, + { + "description": "Preserves percent-encoded characters in target", + "input": "GET /wp/v2/posts?search=%E4%BD%A0%E5%A5%BD HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "target": "/wp/v2/posts?search=%E4%BD%A0%E5%A5%BD", + "isComplete": true + } + }, + { + "description": "Preserves multiple consecutive slashes in target", + "input": "GET //wp//v2//posts HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "target": "//wp//v2//posts", + "isComplete": true + } + }, + { + "description": "Preserves double question marks in target", + "input": "GET /wp/v2/posts?search=what??&per_page=10 HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "target": "/wp/v2/posts?search=what??&per_page=10", + "isComplete": true + } + }, + { + "description": "Preserves at-sign and brackets in target", + "input": "GET /wp/v2/users/@admin/[meta] HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "target": "/wp/v2/users/@admin/[meta]", + "isComplete": true + } + }, + { + "description": "Preserves dot segments in target", + "input": "GET /wp/v2/media/..uploads/...pending HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "target": "/wp/v2/media/..uploads/...pending", + "isComplete": true + } + }, + { + "description": "Preserves absolute-form URI as target", + "input": "GET https://example.com:443/wp/v2/posts HTTP/1.1\r\nHost: example.com\r\n\r\n", + "expected": { + "target": "https://example.com:443/wp/v2/posts", + "isComplete": true + } + }, + { + "description": "Preserves fragment in target", + "input": "GET /wp/v2/posts?page=1&per_page=10#post-17408 HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "target": "/wp/v2/posts?page=1&per_page=10#post-17408", + "isComplete": true + } + }, + { + "description": "Preserves quotes in target", + "input": "GET /wp/v2/posts?search=\"hello+world\" HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "target": "/wp/v2/posts?search=\"hello+world\"", + "isComplete": true + } + }, + { + "description": "Parses HEAD request", + "input": "HEAD /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "method": "HEAD", + "body": null, + "isComplete": true + } + }, + { + "description": "Parses CONNECT request with authority-form target", + "input": "CONNECT wordpress.org:443 HTTP/1.1\r\nHost: wordpress.org:443\r\n\r\n", + "expected": { + "method": "CONNECT", + "target": "wordpress.org:443", + "isComplete": true + } + }, + { + "description": "Parses OPTIONS request with asterisk-form target", + "input": "OPTIONS * HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "method": "OPTIONS", + "target": "*", + "isComplete": true + } + }, + { + "description": "Parses header value with no space after colon", + "input": "GET /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nX-WP-Nonce:abc123\r\n\r\n", + "expected": { + "method": "GET", + "isComplete": true, + "headers": { + "X-WP-Nonce": "abc123" + } + } + }, + { + "description": "Bare CRLF returns needsMoreData after leading CRLF stripping", + "input": "\r\n\r\n", + "expected": { + "isComplete": false, + "hasHeaders": false, + "parseResult": null + } + }, + { + "description": "Handles HTTP/1.0 request with no headers", + "input": "GET / HTTP/1.0\r\n\r\n", + "expected": { + "method": "GET", + "target": "/", + "headers": {}, + "isComplete": true + } + }, + { + "description": "Handles zero Content-Length", + "input": "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Length: 0\r\n\r\n", + "expected": { + "method": "POST", + "body": null, + "isComplete": true + } + }, + { + "description": "Append after complete is a no-op", + "input": "GET /wp/v2/settings HTTP/1.1\r\nHost: localhost\r\n\r\n", + "appendAfterComplete": "extra garbage", + "expected": { + "method": "GET", + "target": "/wp/v2/settings", + "isComplete": true + } + }, + { + "description": "No Content-Length is allowed regardless of maxBodySize", + "input": "GET /posts HTTP/1.1\r\nHost: localhost\r\n\r\n", + "maxBodySize": 0, + "expected": { + "method": "GET", + "body": null, + "isComplete": true + } + }, + { + "description": "RFC 9110 §5.1: field names with dot, underscore, tilde are valid tokens", + "input": "GET /wp/v2/posts HTTP/1.1\r\nX-My.Header: dotted\r\nX_Under_Score: underscore\r\nX~Tilde: tilde\r\nHost: localhost\r\n\r\n", + "expected": { + "method": "GET", + "isComplete": true, + "headers": { + "X-My.Header": "dotted", + "X_Under_Score": "underscore", + "X~Tilde": "tilde" + } + } + }, + { + "description": "RFC 9110 §5.2: multiple field lines with same name combined with comma", + "input": "GET /wp/v2/posts HTTP/1.1\r\nAccept: text/html\r\nAccept: application/json\r\nHost: localhost\r\n\r\n", + "expected": { + "method": "GET", + "isComplete": true, + "headers": { + "Accept": "text/html, application/json" + } + } + }, + { + "description": "RFC 9110 §5.2: single field line with comma-separated values", + "input": "GET /wp/v2/posts HTTP/1.1\r\nAccept: text/html, application/json\r\nHost: localhost\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "Accept": "text/html, application/json" + } + } + }, + { + "description": "RFC 9110 §5.3: header field order is preserved", + "input": "GET /wp/v2/posts HTTP/1.1\r\nX-First: 1\r\nX-Second: 2\r\nX-Third: 3\r\nHost: localhost\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "X-First": "1", + "X-Second": "2", + "X-Third": "3" + } + } + }, + { + "description": "RFC 9110 §5.3: three duplicate Cache-Control headers combined in order", + "input": "GET /wp/v2/posts HTTP/1.1\r\nCache-Control: no-cache\r\nCache-Control: no-store\r\nCache-Control: must-revalidate\r\nHost: localhost\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "Cache-Control": "no-cache, no-store, must-revalidate" + } + } + }, + { + "description": "RFC 9110 §5.3: duplicate headers with different casing are combined", + "input": "GET /wp/v2/posts HTTP/1.1\r\nAccept: text/html\r\naccept: application/json\r\nHost: localhost\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "Accept": "text/html, application/json" + } + } + }, + { + "description": "RFC 9110 §5.3: duplicate header where one value is empty", + "input": "GET /wp/v2/posts HTTP/1.1\r\nX-Foo: bar\r\nX-Foo:\r\nHost: localhost\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "X-Foo": "bar, " + } + } + }, + { + "description": "RFC 9110 §5.3: duplicate Content-Length with identical values still parses", + "input": "POST /wp/v2/posts HTTP/1.1\r\nContent-Length: 8\r\nContent-Length: 8\r\nHost: localhost\r\n\r\n{\"id\":1}", + "expected": { + "isComplete": true, + "body": "{\"id\":1}" + } + }, + { + "description": "RFC 9110 §5.3: combined value order matches field line order", + "input": "GET /wp/v2/posts HTTP/1.1\r\nVia: 1.0 proxy-a\r\nVia: 1.1 proxy-b\r\nVia: 1.0 proxy-c\r\nHost: localhost\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "Via": "1.0 proxy-a, 1.1 proxy-b, 1.0 proxy-c" + } + } + }, + { + "description": "RFC 9110 §5.5: field value with VCHAR, SP, and HTAB", + "input": "GET /wp/v2/posts HTTP/1.1\r\nX-WP-Test: hello\tworld 123!@#\r\nHost: localhost\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "X-WP-Test": "hello\tworld 123!@#" + } + } + }, + { + "description": "RFC 9110 §5.5: field value with obs-text bytes is permitted", + "input": "GET /wp/v2/posts HTTP/1.1\r\nX-WP-Custom: café\r\nHost: localhost\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "X-WP-Custom": "café" + } + } + }, + { + "description": "RFC 9110 §7.2: Host header not required in HTTP/1.0", + "input": "GET /wp/v2/posts HTTP/1.0\r\nAccept: text/html\r\n\r\n", + "expected": { + "method": "GET", + "isComplete": true, + "headers": { + "Accept": "text/html" + } + } + }, + { + "description": "RFC 9110 §7.2: Host header with IPv6 address", + "input": "GET /wp/v2/posts HTTP/1.1\r\nHost: [::1]:8080\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "Host": "[::1]:8080" + } + } + }, + { + "description": "RFC 9110 §7.2: Host header with internationalized domain", + "input": "GET /wp/v2/posts HTTP/1.1\r\nHost: xn--nxasmq6b.example\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "Host": "xn--nxasmq6b.example" + } + } + }, + { + "description": "RFC 9110 §8.6: Content-Length comma-separated identical values accepted", + "input": "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5, 5\r\n\r\nhello", + "expected": { + "isComplete": true, + "headers": { + "Content-Length": "5" + }, + "body": "hello" + } + }, + { + "description": "RFC 9110 §8.6: Content-Length with leading zeros compared as integers", + "input": "POST /upload HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5, 05\r\n\r\nhello", + "expected": { + "isComplete": true, + "body": "hello" + } + }, + { + "description": "RFC 9110 §9.1: method token is case-sensitive — lowercase preserved", + "input": "get /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "method": "get", + "isComplete": true, + "body": null + } + }, + { + "description": "RFC 9110 §9.3.8: TRACE request", + "input": "TRACE /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "method": "TRACE", + "target": "/wp/v2/posts", + "body": null, + "isComplete": true + } + }, + { + "description": "RFC 9110 §9.1: extension method (custom token) is accepted", + "input": "PURGE /wp/v2/cache HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "method": "PURGE", + "isComplete": true + } + }, + { + "description": "RFC 9110 §7.1: absolute-form target with http scheme and port", + "input": "GET http://localhost:3000/wp/v2/posts HTTP/1.1\r\nHost: localhost:3000\r\n\r\n", + "expected": { + "target": "http://localhost:3000/wp/v2/posts", + "isComplete": true + } + }, + { + "description": "RFC 9110 §7.1: request target with multiple query parameters", + "input": "GET /wp/v2/posts?per_page=10&page=2&status=publish&orderby=date HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "target": "/wp/v2/posts?per_page=10&page=2&status=publish&orderby=date", + "isComplete": true + } + }, + { + "description": "RFC 9110 §10.1.2: Referer header preserves full URI", + "input": "GET /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nReferer: https://example.com/editor?post=42&action=edit\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "Referer": "https://example.com/editor?post=42&action=edit" + } + } + }, + { + "description": "RFC 9110 §10.1.5: User-Agent header with product and comment", + "input": "GET /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nUser-Agent: GutenbergKit/1.0 (iOS 17.0; iPhone)\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "User-Agent": "GutenbergKit/1.0 (iOS 17.0; iPhone)" + } + } + }, + { + "description": "RFC 9110 §12.5.1: Accept header with quality values", + "input": "GET /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nAccept: text/html;q=0.9, application/json;q=1.0, */*;q=0.1\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "Accept": "text/html;q=0.9, application/json;q=1.0, */*;q=0.1" + } + } + }, + { + "description": "RFC 9110 §12.5.3: Accept-Encoding with multiple encodings", + "input": "GET /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nAccept-Encoding: gzip, deflate, br\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "Accept-Encoding": "gzip, deflate, br" + } + } + }, + { + "description": "RFC 9110 §12.5.4: Accept-Language with quality values", + "input": "GET /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nAccept-Language: en-US,en;q=0.9,fr;q=0.5\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "Accept-Language": "en-US,en;q=0.9,fr;q=0.5" + } + } + }, + { + "description": "RFC 9110 §8.3: Content-Type with charset parameter", + "input": "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Type: application/json; charset=utf-8\r\nContent-Length: 16\r\n\r\n{\"title\":\"Test\"}", + "expected": { + "isComplete": true, + "headers": { + "Content-Type": "application/json; charset=utf-8" + }, + "body": "{\"title\":\"Test\"}" + } + }, + { + "description": "RFC 9110 §11.6.2: Authorization header with Bearer token", + "input": "GET /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nAuthorization: Bearer eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxIn0.signature\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxIn0.signature" + } + } + }, + { + "description": "RFC 9110 §11.6.2: case-insensitive Authorization header lookup", + "input": "GET /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nauthorization: Bearer tok123\r\n\r\n", + "expected": { + "isComplete": true, + "headers": { + "Authorization": "Bearer tok123", + "authorization": "Bearer tok123", + "AUTHORIZATION": "Bearer tok123" + } + } + } + ], + "errorTests": [ + { + "description": "Rejects non-UTF-8 header bytes (0xFF 0xFE BOM-like)", + "inputBase64": "//4NCg0K", + "expected": { + "error": "invalidEncoding" + } + }, + { + "description": "Rejects truncated 2-byte UTF-8 sequence (0xC0 alone) in header value", + "inputBase64": "R0VUIC8gSFRUUC8xLjENCkhvc3Q6IGxvY2FsaG9zdA0KWC1UZXN0OiDADQoNCg==", + "expected": { + "error": "invalidEncoding" + } + }, + { + "description": "Rejects overlong UTF-8 encoding (0xC0 0xAF for U+002F) in header value", + "inputBase64": "R0VUIC8gSFRUUC8xLjENCkhvc3Q6IGxvY2FsaG9zdA0KWC1UZXN0OiDArw0KDQo=", + "expected": { + "error": "invalidEncoding" + } + }, + { + "description": "Rejects lone continuation byte (0x80) in header value", + "inputBase64": "R0VUIC8gSFRUUC8xLjENCkhvc3Q6IGxvY2FsaG9zdA0KWC1UZXN0OiCADQoNCg==", + "expected": { + "error": "invalidEncoding" + } + }, + { + "description": "Rejects code point above U+10FFFF (0xF4 0x90 0x80 0x80) in header value", + "inputBase64": "R0VUIC8gSFRUUC8xLjENCkhvc3Q6IGxvY2FsaG9zdA0KWC1UZXN0OiD0kICADQoNCg==", + "expected": { + "error": "invalidEncoding" + } + }, + { + "description": "Rejects truncated 3-byte UTF-8 sequence in header value", + "inputBase64": "R0VUIC8gSFRUUC8xLjENCkhvc3Q6IGxvY2FsaG9zdA0KWC1UZXN0OiDggA0KDQo=", + "expected": { + "error": "invalidEncoding" + } + }, + { + "description": "Rejects 0xFE byte (never valid in UTF-8) in header value", + "inputBase64": "R0VUIC8gSFRUUC8xLjENCkhvc3Q6IGxvY2FsaG9zdA0KWC1UZXN0OiD+DQoNCg==", + "expected": { + "error": "invalidEncoding" + } + }, + { + "description": "Rejects lone surrogate half (U+D800 as CESU-8) in header value", + "inputBase64": "R0VUIC8gSFRUUC8xLjENCkhvc3Q6IGxvY2FsaG9zdA0KWC1UZXN0OiDtoIANCg0K", + "expected": { + "error": "invalidEncoding" + } + }, + { + "description": "Rejects header with space before colon (request smuggling vector)", + "input": "GET / HTTP/1.1\r\nHost: localhost\r\nContent-Length : 0\r\n\r\n", + "expected": { + "error": "whitespaceBeforeColon" + } + }, + { + "description": "Rejects header with tab before colon", + "input": "GET / HTTP/1.1\r\nHost: localhost\r\nX-Custom\t: value\r\n\r\n", + "expected": { + "error": "whitespaceBeforeColon" + } + }, + { + "description": "Rejects request when Content-Length exceeds maxBodySize", + "input": "POST /upload HTTP/1.1\r\nHost: localhost\r\nContent-Length: 101\r\n\r\n", + "maxBodySize": 100, + "expected": { + "error": "payloadTooLarge" + } + }, + { + "description": "Rejects oversized Content-Length even when body data hasn't arrived", + "input": "POST /upload HTTP/1.1\r\nHost: localhost\r\nContent-Length: 999999\r\n\r\n", + "maxBodySize": 50, + "expected": { + "error": "payloadTooLarge" + } + }, + { + "description": "RFC 9110 §7.2: rejects Transfer-Encoding (not supported by this parser)", + "input": "POST / HTTP/1.1\r\nHost: localhost\r\nTransfer-Encoding: chunked\r\n\r\n", + "expected": { + "error": "unsupportedTransferEncoding" + } + }, + { + "description": "RFC 9110 §8.6: rejects Transfer-Encoding even with identity encoding", + "input": "GET / HTTP/1.1\r\nHost: localhost\r\nTransfer-Encoding: identity\r\n\r\n", + "expected": { + "error": "unsupportedTransferEncoding" + } + }, + { + "description": "RFC 9110 §8.6: rejects conflicting Content-Length values (request smuggling vector)", + "input": "POST / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5\r\nContent-Length: 10\r\n\r\nhello", + "expected": { + "error": "conflictingContentLength" + } + }, + { + "description": "RFC 9110 §8.6: rejects Content-Length with different values via comma-separated list", + "input": "POST / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5, 10\r\n\r\nhello", + "expected": { + "error": "conflictingContentLength" + } + }, + { + "description": "RFC 9110 §8.6: rejects Content-Length with trailing comma (empty segment)", + "input": "POST / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5,\r\n\r\nhello", + "expected": { + "error": "conflictingContentLength" + } + }, + { + "description": "RFC 9110 §8.6: rejects Content-Length with leading comma (empty segment)", + "input": "POST / HTTP/1.1\r\nHost: localhost\r\nContent-Length: ,5\r\n\r\nhello", + "expected": { + "error": "invalidContentLength" + } + }, + { + "description": "RFC 9110 §8.6: rejects Content-Length with consecutive commas (empty segment)", + "input": "POST / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5,,5\r\n\r\nhello", + "expected": { + "error": "conflictingContentLength" + } + }, + { + "description": "RFC 9112 §3: rejects request line with double space between method and target", + "input": "GET /path HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "error": "invalidHTTPVersion" + } + }, + { + "description": "RFC 9112 §7.1: rejects request with missing Host header", + "input": "GET / HTTP/1.1\r\nAccept: */*\r\n\r\n", + "expected": { + "error": "missingHostHeader" + } + }, + { + "description": "RFC 9112 §7.1: rejects request with multiple Host headers (request smuggling vector)", + "input": "GET / HTTP/1.1\r\nHost: example.com\r\nHost: evil.com\r\n\r\n", + "expected": { + "error": "multipleHostHeaders" + } + }, + { + "description": "RFC 9112 §5.2: rejects obs-fold (line folding) in header value", + "input": "GET / HTTP/1.1\r\nHost: localhost\r\nX-Custom: value\r\n continued\r\n\r\n", + "expected": { + "error": "obsFoldDetected" + } + }, + { + "description": "RFC 9112 §5.2: rejects obs-fold with tab continuation", + "input": "GET / HTTP/1.1\r\nHost: localhost\r\nX-Custom: value\r\n\tcontinued\r\n\r\n", + "expected": { + "error": "obsFoldDetected" + } + }, + { + "description": "RFC 9110 §5.1: empty field name before colon is rejected", + "input": "GET /wp/v2/posts HTTP/1.1\r\n: empty-name\r\nHost: localhost\r\n\r\n", + "expected": { + "error": "invalidFieldName" + } + }, + { + "description": "RFC 9110 §5.5: NUL byte (0x00) in field value is rejected", + "input": "GET /wp/v2/posts HTTP/1.1\r\nX-Bad: hel\u0000lo\r\nHost: localhost\r\n\r\n", + "expected": { + "error": "invalidFieldValue" + } + }, + { + "description": "RFC 9110 §5.5: bare CR in field value is rejected", + "input": "GET /wp/v2/posts HTTP/1.1\r\nX-Bad: hello\rworld\r\nHost: localhost\r\n\r\n", + "expected": { + "error": "invalidFieldValue" + } + }, + { + "description": "RFC 9110 §5.5: BEL (0x07) in field value is rejected", + "input": "GET / HTTP/1.1\r\nX-Test: hel\u0007lo\r\nHost: localhost\r\n\r\n", + "expected": { + "error": "invalidFieldValue" + } + }, + { + "description": "RFC 9110 §5.5: BS (0x08) in field value is rejected", + "input": "GET / HTTP/1.1\r\nX-Test: hel\blo\r\nHost: localhost\r\n\r\n", + "expected": { + "error": "invalidFieldValue" + } + }, + { + "description": "RFC 9110 §5.5: VT (0x0B) in field value is rejected", + "input": "GET / HTTP/1.1\r\nX-Test: hel\u000Blo\r\nHost: localhost\r\n\r\n", + "expected": { + "error": "invalidFieldValue" + } + }, + { + "description": "RFC 9110 §5.5: FF (0x0C) in field value is rejected", + "input": "GET / HTTP/1.1\r\nX-Test: hel\flo\r\nHost: localhost\r\n\r\n", + "expected": { + "error": "invalidFieldValue" + } + }, + { + "description": "RFC 9110 §5.5: ESC (0x1B) in field value is rejected", + "input": "GET / HTTP/1.1\r\nX-Test: hel\u001Blo\r\nHost: localhost\r\n\r\n", + "expected": { + "error": "invalidFieldValue" + } + }, + { + "description": "RFC 9110 §5.5: DEL (0x7F) in field value is rejected", + "input": "GET / HTTP/1.1\r\nX-Test: hel\u007Flo\r\nHost: localhost\r\n\r\n", + "expected": { + "error": "invalidFieldValue" + } + }, + { + "description": "RFC 9110 §5.5: bare LF in field value is rejected", + "input": "GET / HTTP/1.1\r\nX-Test: val\nue\r\nHost: localhost\r\n\r\n", + "expected": { + "error": "invalidFieldValue" + } + }, + { + "description": "RFC 9110 §8.6: Content-Length rejects floating point", + "input": "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5.0\r\n\r\nhello", + "expected": { + "error": "invalidContentLength" + } + }, + { + "description": "RFC 9110 §8.6: Content-Length rejects hex", + "input": "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Length: 0x0A\r\n\r\nhello12345", + "expected": { + "error": "invalidContentLength" + } + }, + { + "description": "RFC 9110 §8.6: Content-Length rejects plus sign prefix", + "input": "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Length: +5\r\n\r\nhello", + "expected": { + "error": "invalidContentLength" + } + }, + { + "description": "RFC 9110 §8.6: Content-Length rejects whitespace between digits", + "input": "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Length: 1 0\r\n\r\nhello12345", + "expected": { + "error": "invalidContentLength" + } + }, + { + "description": "RFC 9110 §9.1: method containing parenthesis is rejected", + "input": "G(T / HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "error": "malformedRequestLine" + } + }, + { + "description": "RFC 9110 §9.1: method containing comma is rejected", + "input": "GE,T / HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "error": "malformedRequestLine" + } + }, + { + "description": "RFC 9110 §9.1: method containing slash is rejected", + "input": "GE/T / HTTP/1.1\r\nHost: localhost\r\n\r\n", + "expected": { + "error": "malformedRequestLine" + } + }, + { + "description": "RFC 9110 §5.3: conflicting Content-Length values rejected after header combining", + "input": "POST /wp/v2/posts HTTP/1.1\r\nContent-Length: 100\r\nContent-Length: 5\r\nHost: localhost\r\n\r\nhello", + "expected": { + "error": "conflictingContentLength" + } + } + ], + "incrementalTests": [ + { + "description": "Handles data arriving byte by byte", + "input": "GET /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\n\r\n", + "chunkSize": 1, + "expected": { + "method": "GET", + "target": "/wp/v2/posts", + "isComplete": true + } + }, + { + "description": "Body arriving in multiple small chunks", + "headers": "POST /wp/v2/media HTTP/1.1\r\nHost: localhost\r\nContent-Length: 10\r\n\r\n", + "bodyChunks": [ "ab", "cd", "ef", "gh", "ij" ], + "expected": { + "method": "POST", + "body": "abcdefghij", + "isComplete": true + } + }, + { + "description": "Returns needsMoreData for incomplete headers", + "input": "GET /wp/v2/posts HTTP/1.1\r\nHost: loc", + "expected": { + "hasHeaders": false, + "isComplete": false, + "parseResult": null + } + }, + { + "description": "Transitions to headersComplete then complete as body arrives", + "headers": "POST /wp/v2/posts HTTP/1.1\r\nHost: localhost\r\nContent-Length: 11\r\n\r\n", + "bodyChunks": [ "hello world" ], + "expected": { + "body": "hello world", + "isComplete": true, + "afterHeaders": { + "hasHeaders": true, + "isComplete": false, + "method": "POST", + "target": "/wp/v2/posts" + } + } + } + ] +}