diff --git a/Cargo.toml b/Cargo.toml index 2b0aa77..04037f9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,10 @@ path = "tests/backend_auth.rs" name = "authz" path = "tests/authz.rs" +[[test]] +name = "object_path" +path = "tests/object_path.rs" + [dependencies] # Multistore multistore = { version = "0.6.1", features = ["azure"] } diff --git a/src/lib.rs b/src/lib.rs index 96b05e2..b11ae3d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,7 @@ mod backend_auth; mod config; mod handlers; mod location; +mod object_path; mod pagination; mod source_api; mod sts; @@ -34,6 +35,7 @@ use multistore_oidc_provider::{HttpExchange, OidcCredentialProvider, OidcProvide use multistore_path_mapping::{MappedRegistry, PathMapping}; use multistore_sts::jwks::JwksCache; use multistore_sts::route_handler::StsRouterExt; +use object_path::{extract_path_segments, is_keyless_write}; use std::sync::OnceLock; use sts::StsCredentialRegistry; use worker::{event, Context, Env, Result}; @@ -154,6 +156,28 @@ async fn fetch(req: web_sys::Request, env: Env, ctx: Context) -> Result(headers: &'a http::HeaderMap, name: &str) -> &'a st .unwrap_or("") } -/// Split `/{account}/{product}[/{key}]` into its segments; any segment not -/// present is `None`. Used to tag the analytics event and the location broadcast. -fn extract_path_segments(path: &str) -> (Option<&str>, Option<&str>, Option<&str>) { - let trimmed = path.trim_matches('/'); - if trimmed.is_empty() { - return (None, None, None); - } - let mut parts = trimmed.splitn(3, '/'); - let account = parts.next(); - let product = parts.next(); - let key = parts.next(); - (account, product, key) -} - // ── CORS ──────────────────────────────────────────────────────────── fn add_cors(resp: web_sys::Response) -> web_sys::Response { diff --git a/src/object_path.rs b/src/object_path.rs new file mode 100644 index 0000000..68c75f6 --- /dev/null +++ b/src/object_path.rs @@ -0,0 +1,42 @@ +//! Object-path parsing for the Source Cooperative path model +//! (`/{account}/{product}/{key}`). Kept wasm-free so it can be unit-tested +//! natively (see `tests/object_path.rs`), despite the crate's `[lib] test = false`. + +/// Split a request path into `(account, product, key)`. +/// +/// The key is everything after the first two segments, so nested keys stay +/// intact. Leading/trailing slashes are ignored, so `/a/b` and `/a/b/` both +/// parse with `key = None`. Examples: +/// `/` → (None, None, None) +/// `/acct` → (Some("acct"), None, None) +/// `/acct/prod` → (Some("acct"), Some("prod"), None) +/// `/acct/prod/dir/f` → (Some("acct"), Some("prod"), Some("dir/f")) +pub(crate) fn extract_path_segments(path: &str) -> (Option<&str>, Option<&str>, Option<&str>) { + let trimmed = path.trim_matches('/'); + if trimmed.is_empty() { + return (None, None, None); + } + let mut parts = trimmed.splitn(3, '/'); + let account = parts.next(); + let product = parts.next(); + let key = parts.next(); + (account, product, key) +} + +/// Whether `method` writes to a single object but `path` carries no object key. +/// +/// `PUT`/`DELETE` (PutObject/DeleteObject) address one object, so they need a +/// key: `/{account}/{product}/{key}`. A request to `/{account}/{product}` (or +/// shorter) targets the product root, which has no key — e.g. +/// `aws s3 cp f s3://account/product` (no trailing slash) uploads `f` as the +/// object literally named `product`. Such a request can't be served and, if +/// forwarded, the upstream rejects the streaming upload with a misleading +/// "x-amz-content-sha256 header is invalid"; callers should be told the real +/// cause instead. +/// +/// `POST` is intentionally excluded: keyless `POST /{account}/{product}?delete` +/// (multi-object delete) is a legitimate bucket-level operation. +pub(crate) fn is_keyless_write(method: &http::Method, path: &str) -> bool { + (*method == http::Method::PUT || *method == http::Method::DELETE) + && extract_path_segments(path).2.is_none() +} diff --git a/tests/object_path.rs b/tests/object_path.rs new file mode 100644 index 0000000..58f32b6 --- /dev/null +++ b/tests/object_path.rs @@ -0,0 +1,63 @@ +//! Native unit tests for the wasm-free `object_path` module, included via +//! `#[path]` (the lib itself is `cdylib` with `test = false`). Mirrors the +//! pattern in `tests/authz.rs`. + +#[path = "../src/object_path.rs"] +mod object_path; + +use http::Method; +use object_path::{extract_path_segments, is_keyless_write}; + +#[test] +fn extract_splits_account_product_key() { + assert_eq!(extract_path_segments("/"), (None, None, None)); + assert_eq!(extract_path_segments("/acct"), (Some("acct"), None, None)); + assert_eq!( + extract_path_segments("/acct/prod"), + (Some("acct"), Some("prod"), None) + ); + assert_eq!( + extract_path_segments("/acct/prod/README.md"), + (Some("acct"), Some("prod"), Some("README.md")) + ); + // Nested keys stay intact. + assert_eq!( + extract_path_segments("/acct/prod/dir/sub/f.parquet"), + (Some("acct"), Some("prod"), Some("dir/sub/f.parquet")) + ); + // A trailing slash is not a key. + assert_eq!( + extract_path_segments("/acct/prod/"), + (Some("acct"), Some("prod"), None) + ); +} + +#[test] +fn keyless_writes_are_flagged() { + // The reported bug: PUT to the product root (no trailing slash, no key). + assert!(is_keyless_write(&Method::PUT, "/acct/prod")); + assert!(is_keyless_write(&Method::PUT, "/acct/prod/")); + assert!(is_keyless_write(&Method::PUT, "/acct")); + assert!(is_keyless_write(&Method::PUT, "/")); + // DELETE shares the same failure mode (DeleteObject needs a key). + assert!(is_keyless_write(&Method::DELETE, "/acct/prod")); +} + +#[test] +fn writes_with_a_key_are_allowed() { + assert!(!is_keyless_write(&Method::PUT, "/acct/prod/README.md")); + assert!(!is_keyless_write( + &Method::DELETE, + "/acct/prod/dir/f.parquet" + )); +} + +#[test] +fn reads_and_multi_delete_are_not_flagged() { + // Reads to the product root are legitimate (account/product listings). + assert!(!is_keyless_write(&Method::GET, "/acct/prod")); + assert!(!is_keyless_write(&Method::HEAD, "/acct")); + // Keyless POST is left to the gateway: `POST /{account}/{product}?delete` + // (multi-object delete) is a valid bucket-level operation. + assert!(!is_keyless_write(&Method::POST, "/acct/prod")); +}