From 0594704664b7233f3fd02eceea0c0420a53d4336 Mon Sep 17 00:00:00 2001 From: Anthony Lukach Date: Fri, 26 Jun 2026 09:28:22 -0700 Subject: [PATCH 1/3] fix: return clear 400 for keyless writes instead of misleading sha256 error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A PUT/DELETE to `/{account}/{product}` (no object key) was forwarded upstream and rejected with the confusing "x-amz-content-sha256 header is invalid". This happens easily with `aws s3 cp f s3://account/product` (no trailing slash): the CLI uploads `f` as the object literally named `product`, so the request carries a streaming-payload content-sha256 against a keyless path. Short-circuit PUT/DELETE to a keyless path with a 400 InvalidRequest that names the real cause — the missing object key — so callers stop chasing the sha256 red herring. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/lib.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 96b05e2..c09c421 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -154,6 +154,35 @@ async fn fetch(req: web_sys::Request, env: Env, ctx: Context) -> Result Date: Fri, 26 Jun 2026 09:45:29 -0700 Subject: [PATCH 2/3] test: cover keyless-write detection Extract `extract_path_segments` and the new `is_keyless_write` predicate into a wasm-free `object_path` module so they can be unit-tested natively via the `#[path]` pattern already used by `tests/authz.rs` (the lib is `cdylib` with `[lib] test = false`). Adds `tests/object_path.rs` covering segment parsing, keyless PUT/DELETE, keyed writes, and the read / multi-object-delete cases that must not be flagged. Co-Authored-By: Claude Opus 4.8 (1M context) --- Cargo.toml | 4 +++ src/lib.rs | 35 ++++++------------------ src/object_path.rs | 42 +++++++++++++++++++++++++++++ tests/object_path.rs | 63 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 117 insertions(+), 27 deletions(-) create mode 100644 src/object_path.rs create mode 100644 tests/object_path.rs diff --git a/Cargo.toml b/Cargo.toml index 2b0aa77..04037f9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,10 @@ path = "tests/backend_auth.rs" name = "authz" path = "tests/authz.rs" +[[test]] +name = "object_path" +path = "tests/object_path.rs" + [dependencies] # Multistore multistore = { version = "0.6.1", features = ["azure"] } diff --git a/src/lib.rs b/src/lib.rs index c09c421..b11ae3d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,7 @@ mod backend_auth; mod config; mod handlers; mod location; +mod object_path; mod pagination; mod source_api; mod sts; @@ -34,6 +35,7 @@ use multistore_oidc_provider::{HttpExchange, OidcCredentialProvider, OidcProvide use multistore_path_mapping::{MappedRegistry, PathMapping}; use multistore_sts::jwks::JwksCache; use multistore_sts::route_handler::StsRouterExt; +use object_path::{extract_path_segments, is_keyless_write}; use std::sync::OnceLock; use sts::StsCredentialRegistry; use worker::{event, Context, Env, Result}; @@ -155,19 +157,12 @@ async fn fetch(req: web_sys::Request, env: Env, ctx: Context) -> Result(headers: &'a http::HeaderMap, name: &str) -> &'a st .unwrap_or("") } -/// Split `/{account}/{product}[/{key}]` into its segments; any segment not -/// present is `None`. Used to tag the analytics event and the location broadcast. -fn extract_path_segments(path: &str) -> (Option<&str>, Option<&str>, Option<&str>) { - let trimmed = path.trim_matches('/'); - if trimmed.is_empty() { - return (None, None, None); - } - let mut parts = trimmed.splitn(3, '/'); - let account = parts.next(); - let product = parts.next(); - let key = parts.next(); - (account, product, key) -} - // ── CORS ──────────────────────────────────────────────────────────── fn add_cors(resp: web_sys::Response) -> web_sys::Response { diff --git a/src/object_path.rs b/src/object_path.rs new file mode 100644 index 0000000..7e99e80 --- /dev/null +++ b/src/object_path.rs @@ -0,0 +1,42 @@ +//! Object-path parsing for the Source Cooperative path model +//! (`/{account}/{product}/{key}`). Kept wasm-free so it can be unit-tested +//! natively (see `tests/object_path.rs`), despite the crate's `[lib] test = false`. + +/// Split a request path into `(account, product, key)`. +/// +/// The key is everything after the first two segments, so nested keys stay +/// intact. Leading/trailing slashes are ignored, so `/a/b` and `/a/b/` both +/// parse with `key = None`. Examples: +/// `/` → (None, None, None) +/// `/acct` → (Some("acct"), None, None) +/// `/acct/prod` → (Some("acct"), Some("prod"), None) +/// `/acct/prod/dir/f` → (Some("acct"), Some("prod"), Some("dir/f")) +pub(crate) fn extract_path_segments(path: &str) -> (Option<&str>, Option<&str>, Option<&str>) { + let trimmed = path.trim_matches('/'); + if trimmed.is_empty() { + return (None, None, None); + } + let mut parts = trimmed.splitn(3, '/'); + let account = parts.next(); + let product = parts.next(); + let key = parts.next(); + (account, product, key) +} + +/// Whether `method` writes to a single object but `path` carries no object key. +/// +/// `PUT`/`DELETE` (PutObject/DeleteObject) address one object, so they need a +/// key: `/{account}/{product}/{key}`. A request to `/{account}/{product}` (or +/// shorter) targets the product root, which has no key — e.g. +/// `aws s3 cp f s3://account/product` (no trailing slash) uploads `f` as the +/// object literally named `product`. Such a request can't be served and, if +/// forwarded, the upstream rejects the streaming upload with a misleading +/// "x-amz-content-sha256 header is invalid"; callers should be told the real +/// cause instead. +/// +/// `POST` is intentionally excluded: keyless `POST /{account}/{product}?delete` +/// (multi-object delete) is a legitimate bucket-level operation. +pub(crate) fn is_keyless_write(method: &http::Method, path: &str) -> bool { + (*method == http::Method::PUT || *method == http::Method::DELETE) + && extract_path_segments(path).2.is_none_or(str::is_empty) +} diff --git a/tests/object_path.rs b/tests/object_path.rs new file mode 100644 index 0000000..58f32b6 --- /dev/null +++ b/tests/object_path.rs @@ -0,0 +1,63 @@ +//! Native unit tests for the wasm-free `object_path` module, included via +//! `#[path]` (the lib itself is `cdylib` with `test = false`). Mirrors the +//! pattern in `tests/authz.rs`. + +#[path = "../src/object_path.rs"] +mod object_path; + +use http::Method; +use object_path::{extract_path_segments, is_keyless_write}; + +#[test] +fn extract_splits_account_product_key() { + assert_eq!(extract_path_segments("/"), (None, None, None)); + assert_eq!(extract_path_segments("/acct"), (Some("acct"), None, None)); + assert_eq!( + extract_path_segments("/acct/prod"), + (Some("acct"), Some("prod"), None) + ); + assert_eq!( + extract_path_segments("/acct/prod/README.md"), + (Some("acct"), Some("prod"), Some("README.md")) + ); + // Nested keys stay intact. + assert_eq!( + extract_path_segments("/acct/prod/dir/sub/f.parquet"), + (Some("acct"), Some("prod"), Some("dir/sub/f.parquet")) + ); + // A trailing slash is not a key. + assert_eq!( + extract_path_segments("/acct/prod/"), + (Some("acct"), Some("prod"), None) + ); +} + +#[test] +fn keyless_writes_are_flagged() { + // The reported bug: PUT to the product root (no trailing slash, no key). + assert!(is_keyless_write(&Method::PUT, "/acct/prod")); + assert!(is_keyless_write(&Method::PUT, "/acct/prod/")); + assert!(is_keyless_write(&Method::PUT, "/acct")); + assert!(is_keyless_write(&Method::PUT, "/")); + // DELETE shares the same failure mode (DeleteObject needs a key). + assert!(is_keyless_write(&Method::DELETE, "/acct/prod")); +} + +#[test] +fn writes_with_a_key_are_allowed() { + assert!(!is_keyless_write(&Method::PUT, "/acct/prod/README.md")); + assert!(!is_keyless_write( + &Method::DELETE, + "/acct/prod/dir/f.parquet" + )); +} + +#[test] +fn reads_and_multi_delete_are_not_flagged() { + // Reads to the product root are legitimate (account/product listings). + assert!(!is_keyless_write(&Method::GET, "/acct/prod")); + assert!(!is_keyless_write(&Method::HEAD, "/acct")); + // Keyless POST is left to the gateway: `POST /{account}/{product}?delete` + // (multi-object delete) is a valid bucket-level operation. + assert!(!is_keyless_write(&Method::POST, "/acct/prod")); +} From 8e25e7d79c42aaf9b765981e551b08e5c69d1a21 Mon Sep 17 00:00:00 2001 From: Anthony Lukach Date: Mon, 29 Jun 2026 15:45:53 -0700 Subject: [PATCH 3/3] refactor(object-path): drop dead is_empty branch in keyless-write check extract_path_segments trims all leading/trailing slashes before splitting, so the key slot can never be Some(""). The is_none_or(str::is_empty) check therefore had a dead empty-string branch; is_none() alone is equivalent. As a bonus this drops the is_none_or call (stable only since Rust 1.82), easing MSRV concerns. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/object_path.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/object_path.rs b/src/object_path.rs index 7e99e80..68c75f6 100644 --- a/src/object_path.rs +++ b/src/object_path.rs @@ -38,5 +38,5 @@ pub(crate) fn extract_path_segments(path: &str) -> (Option<&str>, Option<&str>, /// (multi-object delete) is a legitimate bucket-level operation. pub(crate) fn is_keyless_write(method: &http::Method, path: &str) -> bool { (*method == http::Method::PUT || *method == http::Method::DELETE) - && extract_path_segments(path).2.is_none_or(str::is_empty) + && extract_path_segments(path).2.is_none() }