Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ path = "tests/backend_auth.rs"
name = "authz"
path = "tests/authz.rs"

[[test]]
name = "object_path"
path = "tests/object_path.rs"

[dependencies]
# Multistore
multistore = { version = "0.6.1", features = ["azure"] }
Expand Down
38 changes: 24 additions & 14 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mod backend_auth;
mod config;
mod handlers;
mod location;
mod object_path;
mod pagination;
mod source_api;
mod sts;
Expand All @@ -34,6 +35,7 @@ use multistore_oidc_provider::{HttpExchange, OidcCredentialProvider, OidcProvide
use multistore_path_mapping::{MappedRegistry, PathMapping};
use multistore_sts::jwks::JwksCache;
use multistore_sts::route_handler::StsRouterExt;
use object_path::{extract_path_segments, is_keyless_write};
use std::sync::OnceLock;
use sts::StsCredentialRegistry;
use worker::{event, Context, Env, Result};
Expand Down Expand Up @@ -154,6 +156,28 @@ async fn fetch(req: web_sys::Request, env: Env, ctx: Context) -> Result<web_sys:
));
}

// ── Short-circuit: write to a keyless path ──────────────────────
// A keyless PUT/DELETE (e.g. `aws s3 cp f s3://account/product` with no
// trailing slash) targets the product root, which has no object key.
// Forwarding it makes the upstream reject the streaming upload with a
// misleading "x-amz-content-sha256 header is invalid"; return an actionable
// 400 instead so the caller sees the real cause. See `is_keyless_write`.
if !is_special_path && is_keyless_write(&parts.method, &parts.path) {
let resp = ErrorResponse {
code: "InvalidRequest".to_string(),
message: format!(
"Missing object key: a {} must address an object at \
/{{account}}/{{product}}/{{key}}, not the product root.",
parts.method.as_str()
),
resource: parts.path.clone(),
request_id: request_id.clone(),
};
return Ok(add_cors(
GatewayResponse::Response(ProxyResult::xml(400, resp.to_xml())).into_web_sys(),
));
}

// ── Path rewriting ─────────────────────────────────────────────
// Source Cooperative path mapping: `/{account}/{product}/{key}`
// → internal bucket `account:product`, display name shows just `account`.
Expand Down Expand Up @@ -343,20 +367,6 @@ pub(crate) fn header_str<'a>(headers: &'a http::HeaderMap, name: &str) -> &'a st
.unwrap_or("")
}

/// Split `/{account}/{product}[/{key}]` into its segments; any segment not
/// present is `None`. Used to tag the analytics event and the location broadcast.
fn extract_path_segments(path: &str) -> (Option<&str>, Option<&str>, Option<&str>) {
let trimmed = path.trim_matches('/');
if trimmed.is_empty() {
return (None, None, None);
}
let mut parts = trimmed.splitn(3, '/');
let account = parts.next();
let product = parts.next();
let key = parts.next();
(account, product, key)
}

// ── CORS ────────────────────────────────────────────────────────────

fn add_cors(resp: web_sys::Response) -> web_sys::Response {
Expand Down
42 changes: 42 additions & 0 deletions src/object_path.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
//! Object-path parsing for the Source Cooperative path model
//! (`/{account}/{product}/{key}`). Kept wasm-free so it can be unit-tested
//! natively (see `tests/object_path.rs`), despite the crate's `[lib] test = false`.

/// Split a request path into `(account, product, key)`.
///
/// The key is everything after the first two segments, so nested keys stay
/// intact. Leading/trailing slashes are ignored, so `/a/b` and `/a/b/` both
/// parse with `key = None`. Examples:
/// `/` → (None, None, None)
/// `/acct` → (Some("acct"), None, None)
/// `/acct/prod` → (Some("acct"), Some("prod"), None)
/// `/acct/prod/dir/f` → (Some("acct"), Some("prod"), Some("dir/f"))
pub(crate) fn extract_path_segments(path: &str) -> (Option<&str>, Option<&str>, Option<&str>) {
let trimmed = path.trim_matches('/');
if trimmed.is_empty() {
return (None, None, None);
}
let mut parts = trimmed.splitn(3, '/');
let account = parts.next();
let product = parts.next();
let key = parts.next();
(account, product, key)
}

/// Whether `method` writes to a single object but `path` carries no object key.
///
/// `PUT`/`DELETE` (PutObject/DeleteObject) address one object, so they need a
/// key: `/{account}/{product}/{key}`. A request to `/{account}/{product}` (or
/// shorter) targets the product root, which has no key — e.g.
/// `aws s3 cp f s3://account/product` (no trailing slash) uploads `f` as the
/// object literally named `product`. Such a request can't be served and, if
/// forwarded, the upstream rejects the streaming upload with a misleading
/// "x-amz-content-sha256 header is invalid"; callers should be told the real
/// cause instead.
///
/// `POST` is intentionally excluded: keyless `POST /{account}/{product}?delete`
/// (multi-object delete) is a legitimate bucket-level operation.
pub(crate) fn is_keyless_write(method: &http::Method, path: &str) -> bool {
(*method == http::Method::PUT || *method == http::Method::DELETE)
&& extract_path_segments(path).2.is_none()
}
63 changes: 63 additions & 0 deletions tests/object_path.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
//! Native unit tests for the wasm-free `object_path` module, included via
//! `#[path]` (the lib itself is `cdylib` with `test = false`). Mirrors the
//! pattern in `tests/authz.rs`.

#[path = "../src/object_path.rs"]
mod object_path;

use http::Method;
use object_path::{extract_path_segments, is_keyless_write};

#[test]
fn extract_splits_account_product_key() {
assert_eq!(extract_path_segments("/"), (None, None, None));
assert_eq!(extract_path_segments("/acct"), (Some("acct"), None, None));
assert_eq!(
extract_path_segments("/acct/prod"),
(Some("acct"), Some("prod"), None)
);
assert_eq!(
extract_path_segments("/acct/prod/README.md"),
(Some("acct"), Some("prod"), Some("README.md"))
);
// Nested keys stay intact.
assert_eq!(
extract_path_segments("/acct/prod/dir/sub/f.parquet"),
(Some("acct"), Some("prod"), Some("dir/sub/f.parquet"))
);
// A trailing slash is not a key.
assert_eq!(
extract_path_segments("/acct/prod/"),
(Some("acct"), Some("prod"), None)
);
}

#[test]
fn keyless_writes_are_flagged() {
// The reported bug: PUT to the product root (no trailing slash, no key).
assert!(is_keyless_write(&Method::PUT, "/acct/prod"));
assert!(is_keyless_write(&Method::PUT, "/acct/prod/"));
assert!(is_keyless_write(&Method::PUT, "/acct"));
assert!(is_keyless_write(&Method::PUT, "/"));
// DELETE shares the same failure mode (DeleteObject needs a key).
assert!(is_keyless_write(&Method::DELETE, "/acct/prod"));
}

#[test]
fn writes_with_a_key_are_allowed() {
assert!(!is_keyless_write(&Method::PUT, "/acct/prod/README.md"));
assert!(!is_keyless_write(
&Method::DELETE,
"/acct/prod/dir/f.parquet"
));
}

#[test]
fn reads_and_multi_delete_are_not_flagged() {
// Reads to the product root are legitimate (account/product listings).
assert!(!is_keyless_write(&Method::GET, "/acct/prod"));
assert!(!is_keyless_write(&Method::HEAD, "/acct"));
// Keyless POST is left to the gateway: `POST /{account}/{product}?delete`
// (multi-object delete) is a valid bucket-level operation.
assert!(!is_keyless_write(&Method::POST, "/acct/prod"));
}
Loading