diff --git a/Cargo.lock b/Cargo.lock index bc21bec..9a79638 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1601,6 +1601,7 @@ dependencies = [ name = "sandlock-ffi" version = "0.7.0" dependencies = [ + "libc", "sandlock-core", "serde_json", "tokio", diff --git a/crates/sandlock-core/src/lib.rs b/crates/sandlock-core/src/lib.rs index c9d272d..19323ae 100644 --- a/crates/sandlock-core/src/lib.rs +++ b/crates/sandlock-core/src/lib.rs @@ -28,6 +28,7 @@ pub mod dry_run; pub(crate) mod http_acl; pub use error::SandlockError; +pub use sys::structs::{SeccompData, SeccompNotif}; pub use checkpoint::Checkpoint; pub use sandbox::{Confinement, ConfinementBuilder, Sandbox, SandboxBuilder}; pub use result::{RunResult, ExitStatus}; diff --git a/crates/sandlock-ffi/Cargo.toml b/crates/sandlock-ffi/Cargo.toml index 2c1505b..8722299 100644 --- a/crates/sandlock-ffi/Cargo.toml +++ b/crates/sandlock-ffi/Cargo.toml @@ -9,10 +9,14 @@ readme = "../../README.md" description = "C ABI for sandlock process sandbox" [lib] -crate-type = ["cdylib", "staticlib"] +crate-type = ["cdylib", "staticlib", "rlib"] [dependencies] +libc = "0.2" sandlock-core = { version = "0.7.0", path = "../sandlock-core" } serde_json = "1" tokio = { version = "1", features = ["rt-multi-thread"] } +[dev-dependencies] +tokio = { version = "1", features = ["macros"] } + diff --git a/crates/sandlock-ffi/include/sandlock.h b/crates/sandlock-ffi/include/sandlock.h index d98d9f2..9c52aeb 100644 --- a/crates/sandlock-ffi/include/sandlock.h +++ b/crates/sandlock-ffi/include/sandlock.h @@ -118,6 +118,209 @@ sandlock_result_t *sandlock_pipeline_run(sandlock_pipeline_t *pipe, uint64_t tim void sandlock_pipeline_free(sandlock_pipeline_t *pipe); +/* ---------------------------------------------------------------- + * Handler ABI — extension handlers for seccomp-notif syscalls. + * ---------------------------------------------------------------- */ + +/** Snapshot of a kernel seccomp notification. Field layout must stay + * in lock-step with `sandlock_ffi::notif_repr::sandlock_notif_data_t`. */ +typedef struct sandlock_notif_data_t { + uint64_t id; + uint32_t pid; + uint32_t flags; + int32_t syscall_nr; + uint32_t arch; + uint64_t instruction_pointer; + uint64_t args[6]; +} sandlock_notif_data_t; + +/** Opaque child-memory accessor (lifetime: single callback invocation). */ +typedef struct sandlock_mem_handle_t sandlock_mem_handle_t; + +/** Read a NUL-terminated string. Returns 0 on success, -1 on failure. + * On success the buffer is NUL-terminated and `*out_len` holds the byte + * count copied (excluding NUL); `max_len` must be at least 1 to fit the + * NUL. */ +int sandlock_mem_read_cstr(const sandlock_mem_handle_t *handle, + uint64_t addr, + uint8_t *buf, size_t max_len, + size_t *out_len); + +/** Raw memory read. Returns 0/-1; `*out_len` holds actual bytes copied. */ +int sandlock_mem_read(const sandlock_mem_handle_t *handle, + uint64_t addr, + uint8_t *buf, size_t len, + size_t *out_len); + +/** Raw memory write. Returns 0/-1. */ +int sandlock_mem_write(const sandlock_mem_handle_t *handle, + uint64_t addr, + const uint8_t *buf, size_t len); + +typedef enum sandlock_action_kind { + SANDLOCK_ACTION_UNSET = 0, + SANDLOCK_ACTION_CONTINUE = 1, + SANDLOCK_ACTION_ERRNO = 2, + SANDLOCK_ACTION_RETURN_VALUE = 3, + SANDLOCK_ACTION_INJECT_FD_SEND = 4, + SANDLOCK_ACTION_INJECT_FD_SEND_TRACKED = 5, + SANDLOCK_ACTION_HOLD = 6, + SANDLOCK_ACTION_KILL = 7, +} sandlock_action_kind_t; + +typedef struct { int32_t sig; int32_t pgid; } sandlock_action_kill_t; + +typedef struct { + int32_t srcfd; + uint32_t newfd_flags; +} sandlock_action_inject_t; + +typedef uint64_t sandlock_inject_tracker_t; + +typedef struct { + int32_t srcfd; + uint32_t newfd_flags; + sandlock_inject_tracker_t tracker; +} sandlock_action_inject_tracked_t; + +typedef union { + uint64_t none; + int32_t errno_value; + int64_t return_value; + sandlock_action_inject_t inject_send; + sandlock_action_inject_tracked_t inject_send_tracked; + sandlock_action_kill_t kill; +} sandlock_action_payload_t; + +typedef struct sandlock_action_out_t { + uint32_t kind; /* sandlock_action_kind_t */ + sandlock_action_payload_t payload; +} sandlock_action_out_t; + +/* Setters — exactly one tag is written; the payload is filled in + * accordingly. Calling a setter overwrites any prior setting. */ +void sandlock_action_set_continue(sandlock_action_out_t *out); +void sandlock_action_set_errno(sandlock_action_out_t *out, int32_t errno_value); +void sandlock_action_set_return_value(sandlock_action_out_t *out, int64_t value); +/** Ownership of `srcfd` transfers from the caller to the supervisor + * only when the resulting action is actually dispatched. If the + * caller subsequently calls a different setter on the same + * `sandlock_action_out_t` (overwriting the kind tag before the + * supervisor reads it), `srcfd` is NOT closed and leaks. Pick one + * setter per action. */ +void sandlock_action_set_inject_fd_send(sandlock_action_out_t *out, + int32_t srcfd, uint32_t newfd_flags); +/* NOTE: `SANDLOCK_ACTION_INJECT_FD_SEND_TRACKED` (= 5) and + * `sandlock_action_inject_tracked_t` are reserved for a future + * tracker-aware inject variant. No setter is exposed in this release; + * actions left with that kind tag are treated as `UNSET` and routed + * through the handler's exception policy. */ +void sandlock_action_set_hold(sandlock_action_out_t *out); +/** Kill action setter. `pgid == 0` is a sentinel — the supervisor + * substitutes the child process group id (resolved via getpgid(pid) + * on the notification's pid). To target a specific group, pass an + * explicit non-zero pgid. */ +void sandlock_action_set_kill(sandlock_action_out_t *out, int32_t sig, int32_t pgid); + +typedef enum sandlock_exception_policy { + SANDLOCK_EXCEPTION_KILL = 0, + SANDLOCK_EXCEPTION_DENY_EPERM = 1, + SANDLOCK_EXCEPTION_CONTINUE = 2, +} sandlock_exception_policy_t; + +/** Opaque handler container. + * + * Ownership: allocated by `sandlock_handler_new` and freed by either + * `sandlock_handler_free` (if never registered) or by the supervisor + * after a successful or failed `sandlock_run_with_handlers` call. + * + * Thread safety: the supervisor MAY invoke the handler callback from + * multiple worker threads concurrently across different notifications + * (today's dispatch loop is largely serial; the public ABI makes no + * concurrency guarantee, so a future dispatcher could parallelise + * without breaking compatibility). The caller MUST ensure their `ud` + * pointer is thread-safe — either immutable, or guarded by their own + * synchronization primitives (atomics, mutex, etc.). Rust provides no + * synchronization for an opaque `void*`. */ +typedef struct sandlock_handler_t sandlock_handler_t; + +/** C handler signature. Return 0 on success; a non-zero return triggers + * the handler's exception policy. The callee MUST call exactly one + * sandlock_action_set_*() on `out` before returning 0. + * + * Thread safety: see `sandlock_handler_t` — this function may be + * invoked concurrently from multiple worker threads. Any state + * reachable through `ud` must be thread-safe. */ +typedef int (*sandlock_handler_fn_t)(void *ud, + const sandlock_notif_data_t *notif, + sandlock_mem_handle_t *mem, + sandlock_action_out_t *out); + +typedef void (*sandlock_handler_ud_drop_t)(void *ud); + +/** Allocate a handler container. Returns NULL when `handler_fn` is NULL + * or when `on_exception` is not one of the documented `SANDLOCK_EXCEPTION_*` + * values. + * + * `ud` must be thread-safe to access — see `sandlock_handler_t` for + * the concurrency contract. `ud_drop`, if non-NULL, is invoked exactly + * once when the container is freed. */ +sandlock_handler_t *sandlock_handler_new(sandlock_handler_fn_t handler_fn, + void *ud, + sandlock_handler_ud_drop_t ud_drop, + sandlock_exception_policy_t on_exception); + +/** Free a handler container that has not been handed to the supervisor. */ +void sandlock_handler_free(sandlock_handler_t *h); + +typedef struct sandlock_handler_registration_t { + int64_t syscall_nr; + sandlock_handler_t *handler; /* ownership transferred on a successful run */ +} sandlock_handler_registration_t; + +/** Run the policy with extra C handlers. Returns NULL on failure. + * + * `name` may be NULL to auto-generate as `sandbox-{pid}`, mirroring the + * convention used by `sandlock_run`. + * + * Ownership of every `registrations[i].handler` pointer transfers into + * the call on entry. After this function returns, the caller MUST NOT + * call `sandlock_handler_free` on any handler pointer that was passed + * in — successful or not, the supervisor is responsible for freeing + * the containers (which also invokes the registered `ud_drop`). + * + * Null handler pointers in the array are treated as a validation error + * and the call returns NULL; non-null entries in the same array are + * still freed by the supervisor (the array is consumed as a whole). */ +sandlock_result_t *sandlock_run_with_handlers( + const sandlock_sandbox_t *policy, + const char *name, + const char *const *argv, unsigned int argc, + const sandlock_handler_registration_t *registrations, + size_t nregistrations); + +/** Interactive-stdio variant of `sandlock_run_with_handlers`. Returns + * NULL on failure. + * + * `name` may be NULL to auto-generate as `sandbox-{pid}`, mirroring the + * convention used by `sandlock_run_interactive`. + * + * Ownership of every `registrations[i].handler` pointer transfers into + * the call on entry. After this function returns, the caller MUST NOT + * call `sandlock_handler_free` on any handler pointer that was passed + * in — successful or not, the supervisor is responsible for freeing + * the containers (which also invokes the registered `ud_drop`). + * + * Null handler pointers in the array are treated as a validation error + * and the call returns NULL; non-null entries in the same array are + * still freed by the supervisor (the array is consumed as a whole). */ +sandlock_result_t *sandlock_run_interactive_with_handlers( + const sandlock_sandbox_t *policy, + const char *name, + const char *const *argv, unsigned int argc, + const sandlock_handler_registration_t *registrations, + size_t nregistrations); + #ifdef __cplusplus } #endif diff --git a/crates/sandlock-ffi/src/handler/abi.rs b/crates/sandlock-ffi/src/handler/abi.rs new file mode 100644 index 0000000..5241674 --- /dev/null +++ b/crates/sandlock-ffi/src/handler/abi.rs @@ -0,0 +1,487 @@ +//! Public ABI types, setters, and accessor entry points exposed by the +//! handler module. No Rust-side dispatch logic lives here — only the +//! data layout and the thin `extern "C-unwind"` wrappers around it. + +use std::os::unix::io::RawFd; +use std::slice; + +use sandlock_core::seccomp::notif::{read_child_cstr, read_child_mem, write_child_mem}; + +/// Opaque child-memory accessor handed to a C handler callback. +/// +/// Constructed on the stack inside the Rust adapter just before the +/// callback fires, invalidated when the callback returns. C handlers +/// must not store the pointer beyond the callback's return. +#[repr(C)] +#[allow(non_camel_case_types)] +pub struct sandlock_mem_handle_t { + notif_fd: RawFd, + notif_id: u64, + pid: u32, +} + +impl sandlock_mem_handle_t { + pub(super) fn new(notif_fd: RawFd, notif_id: u64, pid: u32) -> Self { + Self { notif_fd, notif_id, pid } + } +} + +/// Read up to `max_len-1` bytes of a NUL-terminated string at `addr` from the +/// traced child. On success the destination buffer is NUL-terminated and +/// `*out_len` holds the byte count copied (excluding the NUL); returns 0. +/// On failure returns -1 and leaves `*out_len` untouched. `max_len` must be +/// at least 1 to fit the NUL terminator. +/// +/// # Safety +/// `handle` must point to a live `sandlock_mem_handle_t` provided by the +/// supervisor; `buf` must be writable for `max_len` bytes; `out_len` must +/// be a valid `size_t*`. +#[no_mangle] +pub unsafe extern "C" fn sandlock_mem_read_cstr( + handle: *const sandlock_mem_handle_t, + addr: u64, + buf: *mut u8, + max_len: usize, + out_len: *mut usize, +) -> i32 { + if handle.is_null() || buf.is_null() || out_len.is_null() || max_len == 0 { + return -1; + } + let h = &*handle; + // `max_len` is the caller-supplied buffer size including space for the + // trailing NUL. The C header documents `max_len >= 1` as sufficient + // (the buffer holds at least the NUL terminator), so a 1-byte buffer + // must succeed when the target string is empty. The general path + // below computes `cap = max_len - 1`, which is 0 for `max_len == 1` + // — and `read_child_cstr` rejects `max_len == 0` outright. Take the + // edge case via an explicit fast-path: probe the target for one + // byte; on a NUL (= empty string) write the terminator and return + // success, otherwise the caller's buffer cannot fit the payload. + if max_len == 1 { + match read_child_cstr(h.notif_fd, h.notif_id, h.pid, addr, 1) { + Some(s) if s.is_empty() => { + *buf = 0; + *out_len = 0; + return 0; + } + // Either the target string is non-empty (we have no room + // for it) or the read failed entirely. Either way, -1. + _ => return -1, + } + } + let cap = max_len - 1; + let s = match read_child_cstr(h.notif_fd, h.notif_id, h.pid, addr, cap) { + Some(s) => s, + None => return -1, + }; + let bytes = s.as_bytes(); + let n = bytes.len().min(cap); + std::ptr::copy_nonoverlapping(bytes.as_ptr(), buf, n); + *buf.add(n) = 0; + *out_len = n; + 0 +} + +/// Raw byte read at `addr` of exactly `len` bytes. Writes byte count +/// actually read to `*out_len`. Returns 0 on success, -1 on failure. +/// +/// # Safety +/// Same constraints as `sandlock_mem_read_cstr`. +#[no_mangle] +pub unsafe extern "C" fn sandlock_mem_read( + handle: *const sandlock_mem_handle_t, + addr: u64, + buf: *mut u8, + len: usize, + out_len: *mut usize, +) -> i32 { + if handle.is_null() || buf.is_null() || out_len.is_null() { + return -1; + } + let h = &*handle; + let v = match read_child_mem(h.notif_fd, h.notif_id, h.pid, addr, len) { + Ok(v) => v, + Err(_) => return -1, + }; + let n = v.len(); + std::ptr::copy_nonoverlapping(v.as_ptr(), buf, n); + *out_len = n; + 0 +} + +/// Write `len` bytes from `buf` into the child at `addr`. Returns 0 on +/// success, -1 on failure. +/// +/// # Safety +/// Same constraints as `sandlock_mem_read_cstr`; `buf` must be readable +/// for `len` bytes. +#[no_mangle] +pub unsafe extern "C" fn sandlock_mem_write( + handle: *const sandlock_mem_handle_t, + addr: u64, + buf: *const u8, + len: usize, +) -> i32 { + if handle.is_null() || buf.is_null() { + return -1; + } + let h = &*handle; + let data = slice::from_raw_parts(buf, len); + match write_child_mem(h.notif_fd, h.notif_id, h.pid, addr, data) { + Ok(()) => 0, + Err(_) => -1, + } +} + +/// Tag distinguishing payload variants of `sandlock_action_out_t`. +#[repr(u32)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[allow(non_camel_case_types)] +pub enum sandlock_action_kind_t { + /// No action set yet; the supervisor treats this as "fall through to + /// the handler's on_exception policy" (see `exception_action` in + /// `FfiHandler`). + Unset = 0, + Continue = 1, + Errno = 2, + ReturnValue = 3, + InjectFdSend = 4, + InjectFdSendTracked = 5, + Hold = 6, + Kill = 7, +} + +#[repr(C)] +#[derive(Clone, Copy)] +#[allow(non_camel_case_types)] +pub struct sandlock_action_kill_t { + pub sig: i32, + pub pgid: i32, +} + +#[repr(C)] +#[derive(Clone, Copy)] +#[allow(non_camel_case_types)] +pub struct sandlock_action_inject_t { + /// Owned by the C caller; ownership transfers to the supervisor on + /// successful invocation of the corresponding setter. + pub srcfd: i32, + pub newfd_flags: u32, +} + +/// Token reserved for a future tracker-aware inject variant. Currently +/// unimplemented — kept as a type alias so the ABI of the +/// `sandlock_action_inject_tracked_t` payload stays stable across the +/// future release that wires the tracker callback. +#[allow(non_camel_case_types)] +pub type sandlock_inject_tracker_t = u64; + +#[repr(C)] +#[derive(Clone, Copy)] +#[allow(non_camel_case_types)] +pub struct sandlock_action_inject_tracked_t { + pub srcfd: i32, + pub newfd_flags: u32, + pub tracker: sandlock_inject_tracker_t, +} + +#[repr(C)] +#[allow(non_camel_case_types)] +pub union sandlock_action_payload_t { + pub none: u64, + /// `errno_value` rather than `errno` to mirror the C header field + /// (the C side avoids the name `errno` because `` macros + /// it). Keeping both languages in sync removes a documentation + /// hazard for callers that grep across Rust and C sources. + pub errno_value: i32, + pub return_value: i64, + pub inject_send: sandlock_action_inject_t, + pub inject_send_tracked: sandlock_action_inject_tracked_t, + pub kill: sandlock_action_kill_t, +} + +#[repr(C)] +#[allow(non_camel_case_types)] +pub struct sandlock_action_out_t { + pub kind: u32, + pub payload: sandlock_action_payload_t, +} + +impl sandlock_action_out_t { + /// Construct an `Unset` action with all payload bytes zero. The payload + /// union has variants up to 16 bytes; this ensures all bytes are + /// initialised before the C handler writes its decision. + pub fn zeroed() -> Self { + // Safety: `sandlock_action_payload_t` is `#[repr(C)]` with only + // integer-and-integer-aggregate variants; the zero bit-pattern is + // valid for all of them. + Self { + kind: sandlock_action_kind_t::Unset as u32, + payload: unsafe { std::mem::MaybeUninit::zeroed().assume_init() }, + } + } +} + +/// Mark the action as `Continue` (let the syscall proceed unchanged). +/// +/// # Safety +/// `out` must be a valid pointer to a `sandlock_action_out_t` writable +/// for the duration of the call, or null (in which case the call is a +/// no-op). +#[no_mangle] +pub unsafe extern "C" fn sandlock_action_set_continue(out: *mut sandlock_action_out_t) { + if out.is_null() { return; } + (*out).kind = sandlock_action_kind_t::Continue as u32; +} + +/// Fail the syscall with `errno`. +/// +/// # Safety +/// Same constraints as `sandlock_action_set_continue`. +#[no_mangle] +pub unsafe extern "C" fn sandlock_action_set_errno( + out: *mut sandlock_action_out_t, + errno_value: i32, +) { + if out.is_null() { return; } + (*out).kind = sandlock_action_kind_t::Errno as u32; + (*out).payload.errno_value = errno_value; +} + +/// Return a specific value from the syscall without entering the kernel. +/// +/// # Safety +/// Same constraints as `sandlock_action_set_continue`. +#[no_mangle] +pub unsafe extern "C" fn sandlock_action_set_return_value( + out: *mut sandlock_action_out_t, + value: i64, +) { + if out.is_null() { return; } + (*out).kind = sandlock_action_kind_t::ReturnValue as u32; + (*out).payload.return_value = value; +} + +/// Inject the supervisor-side fd `srcfd` into the traced child as a new +/// fd (number chosen by the kernel via `SECCOMP_IOCTL_NOTIF_ADDFD`). +/// +/// Note: ownership of `srcfd` transfers from the C caller to the +/// supervisor only when the resulting action is actually dispatched. +/// If the C caller subsequently calls a different setter on the same +/// `sandlock_action_out_t` (overwriting the kind tag before the +/// supervisor reads it), `srcfd` is NOT closed and leaks. Pick one +/// setter per action. +/// +/// # Safety +/// Same constraints as `sandlock_action_set_continue`; `srcfd` must be +/// a valid open fd in the supervisor process at the moment of the +/// supervisor's dispatch. +#[no_mangle] +pub unsafe extern "C" fn sandlock_action_set_inject_fd_send( + out: *mut sandlock_action_out_t, + srcfd: RawFd, + newfd_flags: u32, +) { + if out.is_null() { return; } + (*out).kind = sandlock_action_kind_t::InjectFdSend as u32; + (*out).payload.inject_send = sandlock_action_inject_t { srcfd, newfd_flags }; +} + +/// Hold the syscall pending until the supervisor explicitly releases it. +/// +/// # Safety +/// Same constraints as `sandlock_action_set_continue`. +#[no_mangle] +pub unsafe extern "C" fn sandlock_action_set_hold(out: *mut sandlock_action_out_t) { + if out.is_null() { return; } + (*out).kind = sandlock_action_kind_t::Hold as u32; +} + +/// Kill the target with signal `sig`. Pass `pgid > 0` to target an +/// explicit process group; `pgid == 0` is a sentinel — the supervisor +/// substitutes the child process group id resolved via `getpgid(pid)` +/// on the notification's pid. +/// +/// # Safety +/// Same constraints as `sandlock_action_set_continue`. +#[no_mangle] +pub unsafe extern "C" fn sandlock_action_set_kill( + out: *mut sandlock_action_out_t, + sig: i32, + pgid: i32, +) { + if out.is_null() { return; } + (*out).kind = sandlock_action_kind_t::Kill as u32; + (*out).payload.kill = sandlock_action_kill_t { sig, pgid }; +} + +/// Exception policy applied when the handler callback fails to set a +/// valid action (returns non-zero rc, leaves `kind == Unset`, or panics +/// across the FFI boundary). +#[repr(u32)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[allow(non_camel_case_types)] +pub enum sandlock_exception_policy_t { + /// Treat the failure as `NotifAction::Kill { sig: SIGKILL, pgid: child_pgid }`. + /// Default; "fail-closed" — the safe option. + Kill = 0, + /// Treat the failure as `NotifAction::Errno(EPERM)`. Useful for + /// audit-style handlers where the syscall is what failed rather than + /// the supervisor. + DenyEperm = 1, + /// Treat the failure as `NotifAction::Continue`. Explicit fail-open; + /// only safe when the syscall is *also* allowed by the BPF filter and + /// Landlock layer (e.g. observability handlers). + Continue = 2, +} + +/// C-callable handler entry point. +/// +/// Returns 0 on success (and must have called exactly one setter on +/// `out`). Returns non-zero to signal a handler-internal error; the +/// supervisor then applies the configured exception policy. +/// +/// The ABI is `extern "C-unwind"` rather than plain `extern "C"`. Pure-C +/// callers see no difference (C has no unwinding); Rust handlers plugged +/// into this C ABI surface may panic and the supervisor's `catch_unwind` +/// in [`super::adapter::FfiHandler::handle`] will route the panic to the +/// configured exception policy instead of aborting the process. +#[allow(non_camel_case_types)] +pub type sandlock_handler_fn_t = extern "C-unwind" fn( + ud: *mut std::ffi::c_void, + notif: *const crate::notif_repr::sandlock_notif_data_t, + mem: *mut sandlock_mem_handle_t, + out: *mut sandlock_action_out_t, +) -> i32; + +/// Optional destructor invoked when the container is freed. +/// +/// Uses `extern "C-unwind"` for consistency with [`sandlock_handler_fn_t`] +/// and so that a Rust-side destructor panicking through this pointer +/// unwinds rather than aborts (panic-safety in destructors is good +/// practice even though no in-tree caller currently relies on it). +#[allow(non_camel_case_types)] +pub type sandlock_handler_ud_drop_t = extern "C-unwind" fn(ud: *mut std::ffi::c_void); + +/// Opaque handler container (B4 — opaque box). +#[repr(C)] +#[allow(non_camel_case_types)] +pub struct sandlock_handler_t { + pub(super) handler_fn: Option, + pub(super) ud: *mut std::ffi::c_void, + pub(super) ud_drop: Option, + pub(super) on_exception: sandlock_exception_policy_t, +} + +// Safety: +// +// `Send`: required so the supervisor can move the handler container into +// a `tokio::task::spawn_blocking` closure. The struct contains only +// pointers (function pointer + `void*` user-data) and a `#[repr(u32)]` +// enum, all of which are `Send`-safe to move across threads. +// +// `Sync`: required because the dispatch table stores handlers as +// `Arc`, and `Arc` requires `T: Send + Sync`. The +// supervisor MAY dispatch handler invocations concurrently across +// different notifications (today's loop is largely serial, but the +// contract makes no guarantee — a future dispatcher could parallelise +// without breaking the public ABI). Consequently the C caller MUST +// ensure their `ud` is either immutable, or guarded by thread-safe +// state of their own (atomics, mutex, etc.). Rust offers no +// synchronization for an opaque `void*` — the responsibility is on +// the C side. +unsafe impl Send for sandlock_handler_t {} +unsafe impl Sync for sandlock_handler_t {} + +impl Drop for sandlock_handler_t { + fn drop(&mut self) { + if let Some(drop_fn) = self.ud_drop.take() { + // Per the C header contract on `sandlock_handler_ud_drop_t`: + // the dropper fires exactly once when the container is freed, + // regardless of whether `ud` is null. C callers that store + // metadata via `ud_drop` (e.g., for lifecycle logging) need + // the call even with null ud; idiomatic C dropper code can + // mirror `free(NULL)` semantics on its own. + (drop_fn)(self.ud); + self.ud = std::ptr::null_mut(); + } + } +} + +/// Allocate a handler container. `handler_fn` must be non-null; passing +/// `ud_drop = None` is legal when `ud` does not require cleanup. +/// +/// # Safety +/// `ud` is opaque to Rust — the caller guarantees that the pointer +/// remains valid until either (a) `sandlock_handler_free` is called or +/// (b) the supervisor takes ownership via `sandlock_run_with_handlers` +/// and the run completes. +/// If `on_exception` does not match a defined `sandlock_exception_policy_t` +/// discriminant (0, 1, or 2), the call returns null and no allocation occurs. +#[no_mangle] +pub unsafe extern "C" fn sandlock_handler_new( + handler_fn: Option, + ud: *mut std::ffi::c_void, + ud_drop: Option, + on_exception: u32, +) -> *mut sandlock_handler_t { + if handler_fn.is_none() { + return std::ptr::null_mut(); + } + let on_exception = match on_exception { + 0 => sandlock_exception_policy_t::Kill, + 1 => sandlock_exception_policy_t::DenyEperm, + 2 => sandlock_exception_policy_t::Continue, + // Reject out-of-range discriminants at the FFI boundary so we never + // store an invalid enum value into the struct — reading one later + // via `match` would be undefined behaviour. + _ => return std::ptr::null_mut(), + }; + let h = Box::new(sandlock_handler_t { + handler_fn, + ud, + ud_drop, + on_exception, + }); + Box::into_raw(h) +} + +/// Free a handler container that has *not* been registered with a +/// sandbox. After successful registration the supervisor owns the +/// handler; calling this on a registered handler is undefined behaviour +/// (the supervisor's later free would double-free). +/// +/// The ABI is `extern "C-unwind"` rather than plain `extern "C"` so a +/// panic propagated from a Rust-side `ud_drop` (declared as +/// [`sandlock_handler_ud_drop_t`], itself `extern "C-unwind"`) unwinds +/// the caller rather than aborting the process. Pure-C callers see no +/// difference (C has no unwinding). +/// +/// # Safety +/// `h` must be either null or a pointer previously returned by +/// `sandlock_handler_new` that has not yet been registered with the +/// supervisor and has not already been freed. +#[no_mangle] +pub unsafe extern "C-unwind" fn sandlock_handler_free(h: *mut sandlock_handler_t) { + if h.is_null() { return; } + drop(Box::from_raw(h)); +} + +/// C-side pair of `(syscall_nr, handler*)` consumed by +/// `sandlock_run_with_handlers`. Ownership of `handler` transfers into +/// the run on success; the supervisor frees the container. +#[repr(C)] +#[derive(Clone, Copy)] +#[allow(non_camel_case_types)] +pub struct sandlock_handler_registration_t { + pub syscall_nr: i64, + pub handler: *mut sandlock_handler_t, +} + +// Safety: the raw pointer field is opaque to Rust. The supervisor moves +// the registration array into a worker thread once it has been turned +// into `(i64, FfiHandler)` pairs; the registration struct itself never +// crosses thread boundaries while holding the raw pointer. We mark +// `Send` to allow the input array to be borrowed inside `unsafe` +// contexts without per-call wrapper structs. +unsafe impl Send for sandlock_handler_registration_t {} diff --git a/crates/sandlock-ffi/src/handler/adapter.rs b/crates/sandlock-ffi/src/handler/adapter.rs new file mode 100644 index 0000000..a6b9af8 --- /dev/null +++ b/crates/sandlock-ffi/src/handler/adapter.rs @@ -0,0 +1,325 @@ +//! Rust-side adapter wiring the C ABI to the `Handler` trait. +//! +//! `FfiHandler` owns the `sandlock_handler_t` container produced by +//! `sandlock_handler_new` and implements `Handler` so the supervisor's +//! dispatch loop can invoke C callbacks transparently. + +use std::future::Future; +use std::os::unix::io::FromRawFd; +use std::pin::Pin; + +use sandlock_core::seccomp::dispatch::{Handler, HandlerCtx}; +use sandlock_core::seccomp::notif::NotifAction; + +use super::abi::{ + sandlock_action_kind_t, sandlock_action_out_t, sandlock_exception_policy_t, + sandlock_handler_t, sandlock_mem_handle_t, +}; + +/// Sentinel for "we cannot safely resolve a process-group id for the +/// trapped child." `i32::MIN` is not a valid pgid: `killpg(i32::MIN, _)` +/// returns `ESRCH` rather than harming the supervisor or any real +/// process group. Used in two places: +/// +/// * adapter.rs `child_pgid` resolution falls back to this when the +/// bare pid would otherwise be `0` (POSIX `killpg(0)` would target +/// the supervisor's own group) or whenever `getpgid(pid)` failed or +/// produced the supervisor's pgid. +/// * `translate_action`'s `Kill` arm refuses to produce an action +/// with this pgid, routing the dispatcher onto the configured +/// exception policy instead. +/// * `exception_action`'s `Kill` arm degrades to `Errno(EPERM)` if +/// it sees the sentinel, so the policy default never lets the +/// suicide vector through either. +pub(crate) const UNSAFE_PGID: i32 = i32::MIN; + +/// Rust adapter wrapping an owned `sandlock_handler_t` and implementing +/// `Handler`. Constructed when the supervisor accepts handlers passed +/// through `sandlock_run_with_handlers`. +pub struct FfiHandler { + inner: Box, +} + +impl FfiHandler { + /// Take ownership of a raw `sandlock_handler_t*` produced by + /// `sandlock_handler_new`. + /// + /// # Safety + /// `raw` must be a non-null pointer returned by `sandlock_handler_new` + /// and never freed via `sandlock_handler_free`. After this call the + /// supervisor owns the container. + pub unsafe fn from_raw(raw: *mut sandlock_handler_t) -> Self { + assert!(!raw.is_null(), "FfiHandler::from_raw on null pointer"); + Self { inner: Box::from_raw(raw) } + } + + fn exception_action(&self, child_pgid: i32) -> NotifAction { + match self.inner.on_exception { + sandlock_exception_policy_t::Kill => { + if child_pgid == UNSAFE_PGID { + // No safe pgid is available (nested PID namespace, + // ESRCH, or supervisor-pgid collision). Degrading + // to EPERM keeps the suicide vector closed: the + // syscall is rejected with EPERM and the child can + // retry. Killing the supervisor would be strictly + // worse than letting the sandboxed process see one + // failed syscall. + NotifAction::Errno(libc::EPERM) + } else { + NotifAction::Kill { sig: libc::SIGKILL, pgid: child_pgid } + } + } + sandlock_exception_policy_t::DenyEperm => NotifAction::Errno(libc::EPERM), + sandlock_exception_policy_t::Continue => NotifAction::Continue, + } + } +} + +/// `Send`-only wrapper around the C user-data pointer so it can travel +/// into `spawn_blocking`. Only the move (not sharing across threads) is +/// required; the deeper Send/Sync rationale for the underlying handler +/// container lives on `sandlock_handler_t`. +struct UdPtr(*mut std::ffi::c_void); +// Safety: ud is opaque to Rust; the spawn_blocking pipeline only moves +// (not shares) the wrapper. See `sandlock_handler_t` for the deeper +// Send/Sync rationale that justifies the underlying handler container. +unsafe impl Send for UdPtr {} + +impl Handler for FfiHandler { + fn handle<'a>( + &'a self, + cx: &'a HandlerCtx, + ) -> Pin + Send + 'a>> { + // Capture the pieces we need by value so spawn_blocking can run + // the C callback on a worker thread without &self lifetime games. + let notif_snap = crate::notif_repr::sandlock_notif_data_t::from(&cx.notif); + let notif_fd = cx.notif_fd; + let notif_id = cx.notif.id; + let pid = cx.notif.pid; + // Resolve the trapped child's process group id for use as a fallback + // pgid in Kill actions where the caller passed pgid == 0. Three guard + // rails, all routed through the `UNSAFE_PGID` sentinel when they + // trip: + // + // 1. `notif.pid <= 0` can occur in nested PID namespaces (e.g., + // Kubernetes pod-in-pod, KubeVirt, DinD). The kernel reports + // the trapped task as invisible. The bare pid is unusable + // (POSIX `killpg(0)` targets the caller's group — supervisor + // suicide) and there is no other safe substitute. Signal + // failure via `UNSAFE_PGID`. + // + // 2. `getpgid(pid) <= 0` indicates ESRCH (child exited between + // notif and our query) or another kernel-side failure — no + // pgid we can safely use. + // + // 3. Even on success, the resolved pgid must differ from the + // supervisor's own pgid. If sandlock-core does not call + // `setpgid(0, 0)` after fork, the child inherits the parent's + // pgid — sending `killpg(supervisor_pgid)` would kill the + // supervisor along with the child. + // + // Earlier versions fell back to the bare `pid`. That looked safe + // for guards 2 and 3 (kernel rejects `killpg(pid)` with ESRCH if + // it does not name a group), but for guard 1 the bare pid is `0` + // or negative; `killpg(0, sig)` is supervisor suicide per POSIX, + // and `killpg(-1, sig)` broadcasts to every process the caller + // can signal. Routing all three branches through `UNSAFE_PGID` + // is the only way to keep guard 1 from re-introducing the very + // suicide vector this resolution exists to close. + let child_pgid = { + let pid = cx.notif.pid as i32; + if pid <= 0 { + UNSAFE_PGID + } else { + // SAFETY: `getpgid` is signal-safe; positive pid is the only + // documented precondition. `getpgid(0)` reports the caller's + // (supervisor's) pgid; both calls have no other preconditions. + let supervisor_pgid = unsafe { libc::getpgid(0) }; + let pgid = unsafe { libc::getpgid(pid) }; + if pgid <= 0 || pgid == supervisor_pgid { + UNSAFE_PGID + } else { + pgid + } + } + }; + let handler_fn = self.inner.handler_fn; + let ud = UdPtr(self.inner.ud); + let on_exception_fallback = self.exception_action(child_pgid); + + Box::pin(async move { + let join = tokio::task::spawn_blocking(move || { + // Rust 2021 disjoint closure captures (RFC 2229) would + // otherwise capture `ud.0` (a bare `*mut c_void`, not + // `Send`) rather than the whole `UdPtr`. Binding `ud` to + // a fresh local at the top of the closure forces a + // whole-struct capture so the `Send` impl on `UdPtr` + // applies to the outer closure. + let ud = ud; + let UdPtr(ud_raw) = ud; + let mut mem = sandlock_mem_handle_t::new(notif_fd, notif_id, pid); + let mut out = sandlock_action_out_t::zeroed(); + let rc = match handler_fn { + Some(f) => std::panic::catch_unwind(std::panic::AssertUnwindSafe( + || f(ud_raw, ¬if_snap, &mut mem, &mut out), + )), + None => Ok(-1), + }; + (rc, out) + }).await; + + let (rc_or_panic, out) = match join { + Ok(pair) => pair, + Err(_join_err) => return on_exception_fallback, + }; + + match rc_or_panic { + Ok(0) => match translate_action(&out, child_pgid) { + Some(action) => action, + None => { + // Action kind ended up Unset, unknown, or the + // reserved InjectFdSendTracked discriminant. + // Drain any inject-fd payload before falling + // back to the exception policy — otherwise the + // supervisor leaks the srcfd that was armed by + // the (failed) callback. + // SAFETY: `drain_pending_inject_fd` inspects + // `out.kind` itself before touching the union, + // and `out.kind` matches the union variant per + // the action setters' contract. + unsafe { drain_pending_inject_fd(&out) }; + on_exception_fallback + } + }, + _ => { + // Either the callback returned a non-zero rc OR + // `catch_unwind` caught a panic. The callback may + // have armed an InjectFdSend{,Tracked} payload + // before failing; drain it so its srcfd doesn't + // leak in the supervisor. + // SAFETY: see the `Ok(0) -> None` branch above. + unsafe { drain_pending_inject_fd(&out) }; + on_exception_fallback + } + } + }) + } +} + +/// Drains a still-pending `InjectFdSend` or `InjectFdSendTracked` +/// payload by consuming the contained `srcfd` into an `OwnedFd` and +/// dropping it (which closes the fd). Called from error paths in +/// [`FfiHandler::handle`] that fall back to the exception policy +/// without dispatching the action — without this, the supervisor +/// silently leaks fds armed by a C handler that subsequently panicked +/// or returned a non-zero rc. +/// +/// No-op for any other action kind (including `Unset`). +/// +/// # Safety +/// `out` must point at a fully-initialised `sandlock_action_out_t`. +/// The function inspects only `out.kind` and the union arm matching +/// that kind, which is sound because the action setters establish the +/// invariant "the `kind` tag selects the union arm". +unsafe fn drain_pending_inject_fd(out: &sandlock_action_out_t) { + use sandlock_action_kind_t as K; + if out.kind == K::InjectFdSend as u32 { + // SAFETY: `kind == InjectFdSend` selects the `inject_send` + // arm per the setter contract. Wrapping the raw fd in an + // `OwnedFd` and dropping it closes the fd. + drop(std::os::unix::io::OwnedFd::from_raw_fd( + out.payload.inject_send.srcfd, + )); + } else if out.kind == K::InjectFdSendTracked as u32 { + // The C header exposes the discriminant value publicly even + // though we don't ship a setter for it. A C caller can still + // assign `out->kind = 5; out->payload.inject_send_tracked.srcfd = X;` + // by hand. Treat it like `InjectFdSend` for cleanup purposes: + // the srcfd was armed and must be released. + // SAFETY: see `InjectFdSend` arm above. + drop(std::os::unix::io::OwnedFd::from_raw_fd( + out.payload.inject_send_tracked.srcfd, + )); + } +} + +/// Convert the C-side decision into a `NotifAction`. Returns `None` if +/// the kind is `Unset`, unknown, or `InjectFdSendTracked` (no setter +/// exposed; treated as fallback). The caller then falls back to the +/// exception policy, and is responsible for invoking +/// [`drain_pending_inject_fd`] to release any armed inject-fd payload. +/// +/// Note: this function takes `&sandlock_action_out_t` rather than +/// consuming the struct so that the caller can still inspect `out.kind` +/// on the `None` branch and drain any pending fd payload. The +/// `InjectFdSend` arm uses `OwnedFd::from_raw_fd` on the union field, +/// which is what materialises the ownership transfer from the C caller +/// to the supervisor when this branch is taken. +fn translate_action(out: &sandlock_action_out_t, child_pgid: i32) -> Option { + use sandlock_action_kind_t as K; + let kind = match out.kind { + x if x == K::Continue as u32 => K::Continue, + x if x == K::Errno as u32 => K::Errno, + x if x == K::ReturnValue as u32 => K::ReturnValue, + x if x == K::InjectFdSend as u32 => K::InjectFdSend, + // Discriminant reserved for a future tracker-injection ABI; no + // setter is exposed in this release. A C caller can still set + // it by hand (the value is public in the C header). Return + // `None` so the caller drains the srcfd and falls back to the + // exception policy. + x if x == K::InjectFdSendTracked as u32 => return None, + x if x == K::Hold as u32 => K::Hold, + x if x == K::Kill as u32 => K::Kill, + _ => return None, // Unset or unknown + }; + + // Safety: the `out.payload` union variant matched here was just + // selected by the `kind` discriminant above. The C action setters + // documented in this module pair each `kind` value with exactly one + // payload variant, so reading that variant is the only legal access. + // For `InjectFdSend` the documented contract on + // `sandlock_action_set_inject_fd_send` transfers ownership of + // `srcfd` to the supervisor; wrapping it in an `OwnedFd` here is + // what materialises that transfer. + let action = unsafe { + match kind { + K::Continue => NotifAction::Continue, + K::Errno => NotifAction::Errno(out.payload.errno_value), + K::ReturnValue => NotifAction::ReturnValue(out.payload.return_value), + K::Hold => NotifAction::Hold, + K::Kill => { + let user_pgid = out.payload.kill.pgid; + if user_pgid == 0 { + // Caller asked us to substitute the child's pgid. + // Refuse if we have no safe value: routing through + // `None` falls back to the configured exception + // policy, whose `Kill` arm also checks for the + // sentinel and degrades to EPERM. + if child_pgid == UNSAFE_PGID { + return None; + } + NotifAction::Kill { sig: out.payload.kill.sig, pgid: child_pgid } + } else { + // Caller passed an explicit pgid. Defence in depth: + // refuse if it matches the supervisor's own group + // (malicious or confused caller). `getpgid(0)` is + // safe and signal-safe; we re-query here because + // the earlier resolution path only computes + // supervisor_pgid in the `pid > 0` branch. Already + // inside the outer `unsafe` block. + let supervisor_pgid = libc::getpgid(0); + if user_pgid == supervisor_pgid { + return None; + } + NotifAction::Kill { sig: out.payload.kill.sig, pgid: user_pgid } + } + } + K::InjectFdSend => NotifAction::InjectFdSend { + srcfd: std::os::unix::io::OwnedFd::from_raw_fd(out.payload.inject_send.srcfd), + newfd_flags: out.payload.inject_send.newfd_flags, + }, + K::InjectFdSendTracked | K::Unset => unreachable!(), + } + }; + Some(action) +} diff --git a/crates/sandlock-ffi/src/handler/mod.rs b/crates/sandlock-ffi/src/handler/mod.rs new file mode 100644 index 0000000..f269aba --- /dev/null +++ b/crates/sandlock-ffi/src/handler/mod.rs @@ -0,0 +1,16 @@ +//! FFI surface for the sandlock `Handler` trait. See `docs/extension-handlers.md`. +//! +//! Split across three submodules for clarity: +//! * [`abi`] — public ABI types, setters, and accessor entry points. +//! * [`adapter`] — `FfiHandler` adapter implementing `Handler`. +//! * [`run`] — `sandlock_run_with_handlers` entry points and helpers. + +pub mod abi; +pub mod adapter; +pub mod run; + +// Re-export every symbol that was at `sandlock_ffi::handler::FOO` before +// the split so external tests and downstream consumers do not break. +pub use abi::*; +pub use adapter::FfiHandler; +pub use run::{sandlock_run_interactive_with_handlers, sandlock_run_with_handlers}; diff --git a/crates/sandlock-ffi/src/handler/run.rs b/crates/sandlock-ffi/src/handler/run.rs new file mode 100644 index 0000000..8a1bed5 --- /dev/null +++ b/crates/sandlock-ffi/src/handler/run.rs @@ -0,0 +1,319 @@ +//! `sandlock_run_with_handlers` entry points and their plumbing helpers. +//! +//! This module owns the FFI surface that takes an array of +//! `sandlock_handler_registration_t`, converts them into `FfiHandler` +//! instances, and drives the supervisor runtime. + +use std::ffi::CStr; +use std::slice; + +use sandlock_core::{RunResult, Sandbox, SandlockError}; + +use super::abi::sandlock_handler_registration_t; +use super::adapter::FfiHandler; + +/// Defensive upper bound on `argc`. Linux's `ARG_MAX` is typically +/// 128 KiB-2 MiB of *characters* across all argv+envp; an argv with +/// 4096 entries is already preposterous in practice. Bounding here +/// turns a malicious or buggy caller passing `argc = u32::MAX` (which +/// would otherwise drive an unbounded deref loop) into a fast NULL +/// return at the FFI boundary. +const MAX_ARGV: u32 = 4096; + +/// Defensive upper bound on `nregistrations`. The kernel exposes +/// ~400-500 syscalls on Linux; registering even all of them is well +/// under this cap. Bounding here closes the same unbounded-deref vector +/// for the registration array. +const MAX_REGISTRATIONS: usize = 4096; + +fn argv_from_c( + argv: *const *const std::os::raw::c_char, + argc: u32, +) -> Option> { + if argv.is_null() { + return None; + } + // Reject argc == 0 here: an empty argv would have us hand the + // sandbox an empty command vector, which the supervisor cannot + // execute. Failing fast keeps the error surfacing at the FFI + // boundary where the C caller can react. + if argc == 0 { + return None; + } + // Reject implausible `argc` values before we start dereferencing + // `argv`. Without this cap, a caller passing `argc = u32::MAX` + // would have us walk 4 billion pointer slots looking for nulls. + if argc > MAX_ARGV { + return None; + } + let mut out = Vec::with_capacity(argc as usize); + for i in 0..(argc as isize) { + let p = unsafe { *argv.offset(i) }; + if p.is_null() { + return None; + } + let s = unsafe { CStr::from_ptr(p) }.to_str().ok()?.to_owned(); + out.push(s); + } + Some(out) +} + +fn collect_registrations( + regs: *const sandlock_handler_registration_t, + nregs: usize, +) -> Option> { + if regs.is_null() && nregs > 0 { + return None; + } + if nregs == 0 { + return Some(Vec::new()); + } + // Bound `nregs` before we materialise the slice. An attacker-supplied + // `nregs = usize::MAX` would otherwise hand `slice::from_raw_parts` + // a length larger than the underlying allocation — UB. The cap is + // generous enough for any legitimate caller. + if nregs > MAX_REGISTRATIONS { + return None; + } + let slice = unsafe { slice::from_raw_parts(regs, nregs) }; + // First pass: validate all entries before taking ownership of any. + // Without this, a null pointer at index k+1 would leave us having + // already consumed handlers [0..k] via `Box::from_raw`; dropping the + // partial `out` would free them while the C caller still believes it + // owns the originals — a latent double-free via + // `sandlock_handler_free`. + for r in slice { + if r.handler.is_null() { + return None; + } + } + // Second pass: ownership transfer. Every pointer is non-null per the + // pass above. + let mut out = Vec::with_capacity(nregs); + for r in slice { + // SAFETY: validated non-null above; caller provided pointer from + // `sandlock_handler_new` and must not reuse after this call (the + // public C ABI doc states ownership transfers in). + let h = unsafe { FfiHandler::from_raw(r.handler) }; + out.push((r.syscall_nr, h)); + } + Some(out) +} + +fn block_on_run( + sandbox: &Sandbox, + name: Option, + cmd: Vec, + handlers: Vec<(i64, FfiHandler)>, + interactive: bool, +) -> Option> { + // Use a fresh runtime — sandlock-core already pulls in tokio with + // rt-multi-thread; this matches the pattern used by the existing + // `sandlock_run` path. A panic in an `extern "C"`-reachable path is + // UB, so we report runtime-build failure to the caller via `None` + // instead of unwrapping. + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .ok()?; + let cmd_refs: Vec<&str> = cmd.iter().map(String::as_str).collect(); + // Apply `name` via the builder method on a clone — mirrors the + // pattern used by `sandlock_run` in lib.rs. A `None` here means + // "auto-generate `sandbox-{pid}`", matching the C ABI contract. + let mut sb = match name { + Some(n) => sandbox.clone().with_name(n), + None => sandbox.clone(), + }; + Some(rt.block_on(async move { + if interactive { + sb.run_interactive_with_extra_handlers(&cmd_refs, handlers).await + } else { + sb.run_with_extra_handlers(&cmd_refs, handlers).await + } + })) +} + +/// Run the policy with extra C handlers. Returns NULL on failure. +/// +/// `name` may be NULL to auto-generate `sandbox-{pid}`, or a valid +/// NUL-terminated UTF-8 C string; the placement mirrors the existing +/// `sandlock_run` entry point in `lib.rs`. +/// +/// Declared `extern "C-unwind"` because the handler containers reach +/// this entry point as part of the registration array and their +/// user-supplied `ud_drop` may panic when the supervisor frees them +/// (either during a normal Box-drop or on the early-return cleanup in +/// `release_registrations`). Unwinding across an `extern "C"` boundary +/// is undefined behaviour and aborts the process under modern +/// rustc — `extern "C-unwind"` is the only legal way to let such a +/// panic propagate to the caller, who can then decide whether to +/// catch it. +/// +/// # Safety +/// All pointer arguments must be valid for their documented lifetimes: +/// `policy` must come from `sandlock_sandbox_build`, `argv` must be a +/// readable array of `argc` NUL-terminated strings, and each handler +/// pointer must come from `sandlock_handler_new` and must not be reused +/// after this call (ownership transfers in). +#[no_mangle] +pub unsafe extern "C-unwind" fn sandlock_run_with_handlers( + policy: *const crate::sandlock_sandbox_t, + name: *const std::os::raw::c_char, + argv: *const *const std::os::raw::c_char, + argc: u32, + registrations: *const sandlock_handler_registration_t, + nregistrations: usize, +) -> *mut crate::sandlock_result_t { + run_with_handlers_inner(policy, name, argv, argc, registrations, nregistrations, false) +} + +/// Interactive-stdio variant of `sandlock_run_with_handlers`. +/// +/// `name` follows the same convention as `sandlock_run_with_handlers`. +/// The `extern "C-unwind"` declaration carries the same rationale: a +/// panicking `ud_drop` must be able to unwind out of this entry point +/// without process abort. +/// +/// # Safety +/// Same constraints as `sandlock_run_with_handlers`. +#[no_mangle] +pub unsafe extern "C-unwind" fn sandlock_run_interactive_with_handlers( + policy: *const crate::sandlock_sandbox_t, + name: *const std::os::raw::c_char, + argv: *const *const std::os::raw::c_char, + argc: u32, + registrations: *const sandlock_handler_registration_t, + nregistrations: usize, +) -> *mut crate::sandlock_result_t { + run_with_handlers_inner(policy, name, argv, argc, registrations, nregistrations, true) +} + +/// Drops every non-null handler pointer in the registration array. +/// Used by [`run_with_handlers_inner`] on early-return paths where +/// `collect_registrations` was not reached — guarantees the C ABI +/// contract "all handler pointers are consumed by this call". +/// +/// Each per-element drop runs an arbitrary, user-supplied `ud_drop` +/// that may panic. Without protection, a panic mid-loop would unwind +/// past the remaining handlers — leaving them allocated and violating +/// the "array consumed as a whole" contract (partial-consume leak). +/// We wrap each drop in `catch_unwind`, remember the first panic, and +/// re-raise it after the loop completes via `resume_unwind`. The +/// caller is `extern "C-unwind"` so the propagated panic is legal at +/// the FFI boundary, while every handler container is still released. +/// +/// # Safety +/// `regs` is either null (no-op) or points to `nregs` valid +/// `sandlock_handler_registration_t` slots whose `handler` pointer is +/// either null or comes from `sandlock_handler_new` and has not been +/// freed by anyone else. +unsafe fn release_registrations( + regs: *const sandlock_handler_registration_t, + nregs: usize, +) { + if regs.is_null() || nregs == 0 { + return; + } + // Apply the same defensive cap as `collect_registrations`. Reach + // here from early-return paths in `run_with_handlers_inner` where + // `collect_registrations` may not have validated yet — without the + // cap, an attacker-supplied `nregs = usize::MAX` would feed + // `slice::from_raw_parts` a bogus length. Out-of-range counts + // can't have come from a valid registration array; refuse the + // walk entirely. The C-ABI "always consume" contract is then + // moot because no legitimate caller can hit this branch. + if nregs > MAX_REGISTRATIONS { + return; + } + let slice = slice::from_raw_parts(regs, nregs); + let mut first_panic: Option> = None; + for r in slice { + if !r.handler.is_null() { + let h = r.handler; + // SAFETY: `h` is non-null and came from `sandlock_handler_new` + // per the type contract. The closure is `AssertUnwindSafe` + // because the only state crossing the unwind boundary is the + // raw pointer (consumed by `Box::from_raw`) — no shared + // references with broken invariants. + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + drop(Box::from_raw(h)); + })); + if let Err(payload) = result { + if first_panic.is_none() { + first_panic = Some(payload); + } + // Subsequent panics are dropped: they would compose + // into "panic during panic" → abort. Keeping only the + // first preserves the original failure context for the + // outer caller while still finishing the loop. + } + } + } + if let Some(payload) = first_panic { + // Re-raise the first captured panic. The outer entry point is + // `extern "C-unwind"` so this propagates legally to the C + // caller, who can decide whether to catch it. + std::panic::resume_unwind(payload); + } +} + +unsafe fn run_with_handlers_inner( + policy: *const crate::sandlock_sandbox_t, + name: *const std::os::raw::c_char, + argv: *const *const std::os::raw::c_char, + argc: u32, + registrations: *const sandlock_handler_registration_t, + nregistrations: usize, + interactive: bool, +) -> *mut crate::sandlock_result_t { + if policy.is_null() { + // Honour the documented contract: ownership of every handler + // pointer transfers in on entry, regardless of return value. + release_registrations(registrations, nregistrations); + return std::ptr::null_mut(); + } + // Decode the optional name eagerly so a malformed (non-UTF-8) C + // string fails the call fast, before we take ownership of any + // handler containers via `collect_registrations`. Matches the + // contract used by `sandlock_run`. + let name_opt: Option = if name.is_null() { + None + } else { + match CStr::from_ptr(name).to_str() { + Ok(s) => Some(s.to_owned()), + Err(_) => { + release_registrations(registrations, nregistrations); + return std::ptr::null_mut(); + } + } + }; + let cmd = match argv_from_c(argv, argc) { + Some(v) => v, + None => { + release_registrations(registrations, nregistrations); + return std::ptr::null_mut(); + } + }; + let handlers = match collect_registrations(registrations, nregistrations) { + Some(v) => v, + None => { + // Validation failed (null handler in the array). The + // non-null handlers in the array have not been taken into + // FfiHandler ownership by `collect_registrations` (it is + // validate-first), but the public C-ABI contract guarantees + // "array consumed as a whole" — release them here so the C + // caller is never responsible for any registered pointer + // after this call returns. + release_registrations(registrations, nregistrations); + return std::ptr::null_mut(); + } + }; + let sandbox_ref: &Sandbox = (*policy).inner(); + match block_on_run(sandbox_ref, name_opt, cmd, handlers, interactive) { + Some(Ok(rr)) => { + let boxed = Box::new(crate::sandlock_result_t::from_run_result(rr)); + Box::into_raw(boxed) + } + _ => std::ptr::null_mut(), + } +} diff --git a/crates/sandlock-ffi/src/lib.rs b/crates/sandlock-ffi/src/lib.rs index edf2917..d6b165a 100644 --- a/crates/sandlock-ffi/src/lib.rs +++ b/crates/sandlock-ffi/src/lib.rs @@ -12,6 +12,9 @@ use sandlock_core::pipeline::Stage; use sandlock_core::sandbox::{BranchAction, ByteSize, FsIsolation, SandboxBuilder}; use sandlock_core::{Sandbox, RunResult}; +pub mod handler; +pub mod notif_repr; + // ---------------------------------------------------------------- // Opaque wrapper types // ---------------------------------------------------------------- @@ -22,6 +25,24 @@ pub struct sandlock_sandbox_t { _private: Sandbox, } +impl sandlock_sandbox_t { + /// Crate-private accessor used by `handler.rs` to reach the inner + /// `Sandbox` when wiring `sandlock_run_with_handlers`. Public-API + /// callers still go through the opaque-pointer functions in this + /// module. + pub(crate) fn inner(&self) -> &Sandbox { + &self._private + } +} + +impl sandlock_result_t { + /// Crate-private constructor used by `handler.rs` to wrap a + /// freshly-produced [`RunResult`] in the opaque public type. + pub(crate) fn from_run_result(rr: RunResult) -> Self { + Self { _private: rr } + } +} + /// Opaque handle wrapping a [`RunResult`]. #[repr(C)] pub struct sandlock_result_t { diff --git a/crates/sandlock-ffi/src/notif_repr.rs b/crates/sandlock-ffi/src/notif_repr.rs new file mode 100644 index 0000000..60c7e96 --- /dev/null +++ b/crates/sandlock-ffi/src/notif_repr.rs @@ -0,0 +1,37 @@ +//! `repr(C)` snapshot of a `SeccompNotif`. The C side reads this struct +//! by value; no pointers into Rust memory live past the callback return. + +use sandlock_core::SeccompNotif; + +/// Stable wire-layout snapshot of a seccomp notification. +/// +/// Field order, types, and padding must match `sandlock.h` exactly. The +/// size assertion in `tests/handler_smoke.rs` guards against accidental +/// drift; if a new field is added, bump the documented size and update +/// the C header in the same commit. +#[repr(C)] +#[derive(Clone, Copy)] +#[allow(non_camel_case_types)] +pub struct sandlock_notif_data_t { + pub id: u64, + pub pid: u32, + pub flags: u32, + pub syscall_nr: i32, + pub arch: u32, + pub instruction_pointer: u64, + pub args: [u64; 6], +} + +impl From<&SeccompNotif> for sandlock_notif_data_t { + fn from(n: &SeccompNotif) -> Self { + Self { + id: n.id, + pid: n.pid, + flags: n.flags, + syscall_nr: n.data.nr, + arch: n.data.arch, + instruction_pointer: n.data.instruction_pointer, + args: n.data.args, + } + } +} diff --git a/crates/sandlock-ffi/tests/c/handler_smoke.c b/crates/sandlock-ffi/tests/c/handler_smoke.c new file mode 100644 index 0000000..e061a59 --- /dev/null +++ b/crates/sandlock-ffi/tests/c/handler_smoke.c @@ -0,0 +1,108 @@ +/* Canonical C example for sandlock's Handler ABI. + * + * Builds a sandbox, registers a single handler on SYS_getpid that + * forces a synthetic return value of 777, runs the system python3 + * interpreter with an inline script that prints os.getpid(), and + * asserts that the captured stdout contains "777". + * + * Downstream consumers writing C/Python/etc. bindings can copy this + * file as a starting point. + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include + +#include "sandlock.h" + +static int force_getpid_to_777( + void *ud, + const sandlock_notif_data_t *notif, + sandlock_mem_handle_t *mem, + sandlock_action_out_t *out +) { + (void)ud; + (void)notif; + (void)mem; + sandlock_action_set_return_value(out, 777); + return 0; +} + +int main(void) { + /* Build a sandbox that exposes just enough of the host for the + * system python3 interpreter to start. Mirrors the read mounts + * from the Rust integration test in tests/handler_smoke.rs. */ + sandlock_builder_t *b = sandlock_sandbox_builder_new(); + b = sandlock_sandbox_builder_fs_read(b, "/usr"); + b = sandlock_sandbox_builder_fs_read(b, "/bin"); + b = sandlock_sandbox_builder_fs_read(b, "/lib"); + b = sandlock_sandbox_builder_fs_read(b, "/lib64"); + b = sandlock_sandbox_builder_fs_read(b, "/etc"); + b = sandlock_sandbox_builder_fs_write(b, "/tmp"); + + int err = 0; + sandlock_sandbox_t *p = sandlock_sandbox_build(b, &err, NULL); + if (p == NULL) { + fprintf(stderr, "sandlock: sandbox build failed: err=%d\n", err); + return 1; + } + + sandlock_handler_t *h = sandlock_handler_new( + force_getpid_to_777, NULL, NULL, SANDLOCK_EXCEPTION_KILL); + if (h == NULL) { + fprintf(stderr, "sandlock: handler_new returned NULL\n"); + sandlock_sandbox_free(p); + return 1; + } + + sandlock_handler_registration_t regs[1] = { + { .syscall_nr = SYS_getpid, .handler = h }, + }; + + /* Invoke python3 directly (no `/usr/bin/env` shim) so the + * interpreter does not chase venv pyvenv.cfg files outside the + * sandbox's read allowlist. */ + const char *argv[] = { + "/usr/bin/python3", + "-c", + "import os, sys; sys.stdout.write('GOT:' + str(os.getpid()))", + }; + + sandlock_result_t *rr = sandlock_run_with_handlers( + p, NULL /* name: auto-generate sandbox-{pid} */, argv, 3, regs, 1); + if (rr == NULL) { + fprintf(stderr, "sandlock: run_with_handlers returned NULL\n"); + /* Per sandlock.h: on NULL return, do NOT free handler `h` — + * ownership transfer state is undefined and freeing risks + * double-free. The leak is bounded (one handler box). */ + sandlock_sandbox_free(p); + return 1; + } + + size_t len = 0; + const uint8_t *stdout_bytes = sandlock_result_stdout_bytes(rr, &len); + if (stdout_bytes == NULL) { + fprintf(stderr, "sandlock: no stdout captured\n"); + sandlock_result_free(rr); + sandlock_sandbox_free(p); + return 1; + } + fwrite(stdout_bytes, 1, len, stdout); + fputc('\n', stdout); + + int contains_777 = + (memmem(stdout_bytes, len, "GOT:777", 7) != NULL); + + sandlock_result_free(rr); + sandlock_sandbox_free(p); + + if (!contains_777) { + fprintf(stderr, "expected 'GOT:777' in child stdout\n"); + return 1; + } + return 0; +} diff --git a/crates/sandlock-ffi/tests/c_smoke.rs b/crates/sandlock-ffi/tests/c_smoke.rs new file mode 100644 index 0000000..a1c8ad2 --- /dev/null +++ b/crates/sandlock-ffi/tests/c_smoke.rs @@ -0,0 +1,52 @@ +//! Compile and run the pure-C smoke test against the cdylib. + +#[test] +fn c_smoke_compiles_and_runs() { + use std::path::PathBuf; + use std::process::Command; + + let out_dir = PathBuf::from(env!("CARGO_TARGET_TMPDIR")); + let bin = out_dir.join("handler_smoke"); + let profile = if cfg!(debug_assertions) { "debug" } else { "release" }; + let cdylib_dir = std::env::var_os("CARGO_TARGET_DIR") + .map(PathBuf::from) + .unwrap_or_else(|| { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .parent() + .unwrap() + .join("target") + }) + .join(profile); + + let rpath_arg = format!("-Wl,-rpath,{}", cdylib_dir.to_str().unwrap()); + + let status = Command::new("cc") + .args([ + "-std=c11", + "-Wall", + "-Wextra", + "-Werror", + "-I", + concat!(env!("CARGO_MANIFEST_DIR"), "/include"), + concat!(env!("CARGO_MANIFEST_DIR"), "/tests/c/handler_smoke.c"), + "-L", + cdylib_dir.to_str().unwrap(), + &rpath_arg, + "-lsandlock_ffi", + "-o", + bin.to_str().unwrap(), + ]) + .status() + .expect("cc invocation"); + assert!(status.success(), "C compile failed"); + + let out = Command::new(&bin).output().expect("run handler_smoke"); + assert!( + out.status.success(), + "handler_smoke exited non-zero: stdout={:?} stderr={:?}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); +} diff --git a/crates/sandlock-ffi/tests/handler_smoke.rs b/crates/sandlock-ffi/tests/handler_smoke.rs new file mode 100644 index 0000000..2e41224 --- /dev/null +++ b/crates/sandlock-ffi/tests/handler_smoke.rs @@ -0,0 +1,2097 @@ +//! Integration smoke tests for the FFI handler ABI. + +use sandlock_ffi::notif_repr::sandlock_notif_data_t; + +#[test] +fn notif_data_layout_matches_documented_size() { + // 8 + 4 + 4 + 4 + 4 + 8 + 6*8 = 80 bytes. If this changes, the C header + // and any external consumers need to be updated together. + assert_eq!(std::mem::size_of::(), 80); + assert_eq!(std::mem::align_of::(), 8); +} + +#[test] +fn notif_data_from_seccomp_notif_copies_all_fields() { + use sandlock_core::{SeccompData, SeccompNotif}; + + let notif = SeccompNotif { + id: 0xDEAD_BEEF_CAFE_F00D, + pid: 4242, + flags: 7, + data: SeccompData { + nr: 21, // SYS_access on x86_64 + arch: 0xC000_003E, + instruction_pointer: 0x7FFF_FFFF_AAAA, + args: [1, 2, 3, 4, 5, 6], + }, + }; + let snap = sandlock_notif_data_t::from(¬if); + assert_eq!(snap.id, 0xDEAD_BEEF_CAFE_F00D); + assert_eq!(snap.pid, 4242); + assert_eq!(snap.flags, 7); + assert_eq!(snap.syscall_nr, 21); + assert_eq!(snap.arch, 0xC000_003E); + assert_eq!(snap.instruction_pointer, 0x7FFF_FFFF_AAAA); + assert_eq!(snap.args, [1, 2, 3, 4, 5, 6]); +} + +use sandlock_ffi::handler::{ + sandlock_mem_read, sandlock_mem_read_cstr, sandlock_mem_write, +}; + +#[test] +fn mem_accessors_reject_null_arguments() { + // Verifies the null-pointer guards in each accessor. Happy-path + // coverage with a live notif_fd is exercised by the end-to-end + // tests further down this file. + let mut buf = [0u8; 4]; + let mut out_len: usize = 0; + let p = std::ptr::null(); + unsafe { + assert_eq!( + sandlock_mem_read_cstr(p, 0, buf.as_mut_ptr(), buf.len(), &mut out_len), + -1, + "read_cstr should reject null handle", + ); + assert_eq!( + sandlock_mem_read(p, 0, buf.as_mut_ptr(), buf.len(), &mut out_len), + -1, + "read should reject null handle", + ); + assert_eq!( + sandlock_mem_write(p, 0, buf.as_ptr(), buf.len()), + -1, + "write should reject null handle", + ); + } +} + +use sandlock_ffi::handler::{ + sandlock_action_kind_t, sandlock_action_out_t, sandlock_action_set_continue, + sandlock_action_set_errno, sandlock_action_set_hold, sandlock_action_set_kill, + sandlock_action_set_return_value, +}; + +#[test] +fn action_setters_record_kind_and_payload() { + let mut a = sandlock_action_out_t::zeroed(); + + // Plant a sentinel covering the first 8 bytes of the union (the + // largest scalar variant) before each tag-only setter. A setter + // documented as "kind only" that accidentally stomps the payload + // would clobber the sentinel. + const SENTINEL: u64 = 0xDEAD_BEEF_CAFE_F00D; + + // Writing through a union field is safe; reading is unsafe (we + // might be looking at bytes deposited by a different variant). The + // sentinel writes therefore need no `unsafe`, the post-condition + // reads do. + a.payload.none = SENTINEL; + unsafe { sandlock_action_set_continue(&mut a) }; + assert_eq!(a.kind, sandlock_action_kind_t::Continue as u32); + assert_eq!(unsafe { a.payload.none }, SENTINEL, + "set_continue must be tag-only and leave payload untouched"); + + unsafe { sandlock_action_set_errno(&mut a, 13) }; + assert_eq!(a.kind, sandlock_action_kind_t::Errno as u32); + assert_eq!(unsafe { a.payload.errno_value }, 13); + + unsafe { sandlock_action_set_return_value(&mut a, -1) }; + assert_eq!(a.kind, sandlock_action_kind_t::ReturnValue as u32); + assert_eq!(unsafe { a.payload.return_value }, -1); + + a.payload.none = SENTINEL; + unsafe { sandlock_action_set_hold(&mut a) }; + assert_eq!(a.kind, sandlock_action_kind_t::Hold as u32); + assert_eq!(unsafe { a.payload.none }, SENTINEL, + "set_hold must be tag-only and leave payload untouched"); + + unsafe { sandlock_action_set_kill(&mut a, libc::SIGKILL, 4321) }; + assert_eq!(a.kind, sandlock_action_kind_t::Kill as u32); + assert_eq!(unsafe { a.payload.kill.sig }, libc::SIGKILL); + assert_eq!(unsafe { a.payload.kill.pgid }, 4321); +} + +#[test] +fn action_out_layout_is_stable() { + // Size + align are gross guards; pin down field offsets so a + // field reorder that preserves size still gets caught. + use std::mem::{align_of, size_of, MaybeUninit}; + use sandlock_ffi::handler::sandlock_action_out_t; + + assert_eq!(size_of::(), 24, + "size drift breaks the C ABI layout"); + assert_eq!(align_of::(), 8, + "align drift breaks the C ABI layout"); + + // Hand-roll offset_of through MaybeUninit — works on stable Rust + // without an extra crate. The C header has kind at offset 0 and + // payload at offset 8 (4 bytes implicit padding after kind). + let mut probe = MaybeUninit::::uninit(); + let base = probe.as_mut_ptr() as usize; + let kind_offset = unsafe { std::ptr::addr_of_mut!((*probe.as_mut_ptr()).kind) as usize - base }; + let payload_offset = unsafe { std::ptr::addr_of_mut!((*probe.as_mut_ptr()).payload) as usize - base }; + assert_eq!(kind_offset, 0, "kind must be at offset 0"); + assert_eq!(payload_offset, 8, "payload must be at offset 8 (kind+4 bytes padding)"); +} + +#[test] +fn notif_data_field_offsets_are_stable() { + use std::mem::MaybeUninit; + use sandlock_ffi::notif_repr::sandlock_notif_data_t; + + let probe = MaybeUninit::::uninit(); + let base = probe.as_ptr() as usize; + + // C header order: id(u64), pid(u32), flags(u32), syscall_nr(i32), + // arch(u32), instruction_pointer(u64), args([u64;6]). Each + // `addr_of!` is cast to `*const u8` so the closure-free subtraction + // works uniformly across the heterogeneous field types. + assert_eq!( + unsafe { std::ptr::addr_of!((*probe.as_ptr()).id) as *const u8 as usize - base }, + 0, + "id must be at offset 0", + ); + assert_eq!( + unsafe { std::ptr::addr_of!((*probe.as_ptr()).pid) as *const u8 as usize - base }, + 8, + "pid must be at offset 8", + ); + assert_eq!( + unsafe { std::ptr::addr_of!((*probe.as_ptr()).flags) as *const u8 as usize - base }, + 12, + "flags must be at offset 12", + ); + assert_eq!( + unsafe { std::ptr::addr_of!((*probe.as_ptr()).syscall_nr) as *const u8 as usize - base }, + 16, + "syscall_nr must be at offset 16", + ); + assert_eq!( + unsafe { std::ptr::addr_of!((*probe.as_ptr()).arch) as *const u8 as usize - base }, + 20, + "arch must be at offset 20", + ); + assert_eq!( + unsafe { std::ptr::addr_of!((*probe.as_ptr()).instruction_pointer) as *const u8 as usize - base }, + 24, + "instruction_pointer must be at offset 24", + ); + assert_eq!( + unsafe { std::ptr::addr_of!((*probe.as_ptr()).args) as *const u8 as usize - base }, + 32, + "args must be at offset 32", + ); +} + +use sandlock_ffi::handler::{ + sandlock_exception_policy_t, sandlock_handler_free, sandlock_handler_fn_t, + sandlock_handler_new, sandlock_handler_t, +}; + +extern "C-unwind" fn test_handler( + _ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + unsafe { sandlock_ffi::handler::sandlock_action_set_continue(out) }; + 0 +} + +static ROUND_TRIP_DROPPER_CALLS: std::sync::atomic::AtomicUsize = + std::sync::atomic::AtomicUsize::new(0); + +extern "C-unwind" fn round_trip_dropper(ud: *mut std::ffi::c_void) { + // Reclaim the leaked Box so its destructor runs (real drop path). + unsafe { drop(Box::from_raw(ud as *mut u32)); } + ROUND_TRIP_DROPPER_CALLS.fetch_add(1, std::sync::atomic::Ordering::SeqCst); +} + +#[test] +fn handler_new_and_free_round_trip() { + // Reset in case another test in the binary touched this counter. + ROUND_TRIP_DROPPER_CALLS.store(0, std::sync::atomic::Ordering::SeqCst); + + let ud = Box::into_raw(Box::new(0xABCDu32)) as *mut std::ffi::c_void; + let on_ex = sandlock_exception_policy_t::Kill as u32; + let h: *mut sandlock_handler_t = unsafe { + sandlock_handler_new( + Some(test_handler as sandlock_handler_fn_t), + ud, + Some(round_trip_dropper), + on_ex, + ) + }; + assert!(!h.is_null()); + assert_eq!( + ROUND_TRIP_DROPPER_CALLS.load(std::sync::atomic::Ordering::SeqCst), + 0, + "dropper must not fire before sandlock_handler_free", + ); + + unsafe { sandlock_handler_free(h) }; + + assert_eq!( + ROUND_TRIP_DROPPER_CALLS.load(std::sync::atomic::Ordering::SeqCst), + 1, + "dropper must fire exactly once during Drop", + ); +} + +#[test] +fn handler_new_rejects_invalid_exception_policy() { + // Cover the boundary (one past the highest valid Continue=2), + // a mid-range value, and the extreme u32::MAX. A mutation that + // rejects only specific values would fail at least one of these. + for bad in [3u32, 4u32, 99u32, u32::MAX] { + let h = unsafe { + sandlock_handler_new( + Some(test_handler as sandlock_handler_fn_t), + std::ptr::null_mut(), + None, + bad, + ) + }; + assert!(h.is_null(), "expected null for on_exception={bad}"); + } +} + +use sandlock_core::seccomp::dispatch::{Handler, HandlerCtx}; +use sandlock_core::seccomp::notif::NotifAction; +use sandlock_core::{SeccompData, SeccompNotif}; +use sandlock_ffi::handler::FfiHandler; + +fn fake_ctx() -> HandlerCtx { + HandlerCtx { + notif: SeccompNotif { + id: 1, pid: std::process::id(), flags: 0, + data: SeccompData { nr: 39, arch: 0xC000003E, + instruction_pointer: 0, args: [0; 6] }, + }, + notif_fd: -1, + } +} + +/// Spawn a `sleep 30` child that immediately calls `setpgid(0, 0)` so +/// it becomes its own pgid leader (distinct from the supervisor's +/// pgid). Returns a `HandlerCtx` carrying the child's pid plus the +/// `Child` handle so the caller can reap it. +/// +/// Use this in tests that need `FfiHandler::handle` to produce +/// `child_pgid != UNSAFE_PGID` — i.e., where the exception policy's +/// `Kill` arm must remain observable. `fake_ctx()` cannot satisfy +/// that requirement because the test process IS the supervisor, so +/// `getpgid(std::process::id()) == getpgid(0)` and the +/// `pgid == supervisor_pgid` guard would trip, yielding `UNSAFE_PGID` +/// and degrading the policy to `Errno(EPERM)`. +fn fake_ctx_with_isolated_child() -> (HandlerCtx, std::process::Child) { + use std::os::unix::process::CommandExt; + let mut cmd = std::process::Command::new("sleep"); + cmd.arg("30"); + // SAFETY: `setpgid` is async-signal-safe; pid=0 acts on the + // calling process; pgid=0 creates a new group whose leader is the + // calling process. + unsafe { + cmd.pre_exec(|| { + if libc::setpgid(0, 0) != 0 { + return Err(std::io::Error::last_os_error()); + } + Ok(()) + }); + } + let child = cmd.spawn().expect("spawn sleep child"); + let child_pid = child.id() as i32; + // pre_exec runs after fork; poll briefly for the kernel to + // observe the pgid change. + let supervisor_pgid = unsafe { libc::getpgid(0) }; + for _ in 0..50 { + // SAFETY: signal-safe; positive pid. + let resolved = unsafe { libc::getpgid(child_pid) }; + if resolved == child_pid && resolved != supervisor_pgid { + break; + } + std::thread::sleep(std::time::Duration::from_millis(10)); + } + let resolved = unsafe { libc::getpgid(child_pid) }; + assert_eq!( + resolved, child_pid, + "precondition: pre_exec setpgid(0,0) did not take effect (resolved={resolved})", + ); + assert_ne!( + resolved, supervisor_pgid, + "precondition: child's pgid must differ from supervisor's", + ); + let ctx = HandlerCtx { + notif: SeccompNotif { + id: 1, pid: child_pid as u32, flags: 0, + data: SeccompData { nr: 39, arch: 0xC000003E, + instruction_pointer: 0, args: [0; 6] }, + }, + notif_fd: -1, + }; + (ctx, child) +} + +extern "C-unwind" fn return_value_42( + _ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + unsafe { sandlock_ffi::handler::sandlock_action_set_return_value(out, 42) }; + 0 +} + +extern "C-unwind" fn returns_error_with_unset_action( + _ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + _out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + -1 +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn ffi_handler_translates_return_value() { + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(return_value_42), + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::Kill as u32, + ) + }; + let h = unsafe { FfiHandler::from_raw(raw) }; + let cx = fake_ctx(); + let action = h.handle(&cx).await; + assert!(matches!(action, NotifAction::ReturnValue(42))); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn ffi_handler_applies_exception_policy_on_failure() { + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(returns_error_with_unset_action), + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::DenyEperm as u32, + ) + }; + let h = unsafe { FfiHandler::from_raw(raw) }; + let cx = fake_ctx(); + let action = h.handle(&cx).await; + assert!(matches!(action, NotifAction::Errno(e) if e == libc::EPERM)); +} + +use std::ffi::CString; +use sandlock_ffi::handler::{ + sandlock_handler_registration_t, sandlock_run_with_handlers, +}; + +extern "C-unwind" fn force_getpid_to_777( + _ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + unsafe { sandlock_ffi::handler::sandlock_action_set_return_value(out, 777) }; + 0 +} + +#[test] +fn run_with_handlers_intercepts_getpid() { + use sandlock_ffi::*; // bring in builder + result symbols + + let builder = sandlock_sandbox_builder_new(); + // Allow the runtime bits the child needs. The exact mounts mirror + // sandlock's own integration tests — read-only access to the system + // libraries and the python interpreter, plus a writable /tmp. + let builder = unsafe { + let p = CString::new("/usr").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/bin").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/lib").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/lib64").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/etc").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/tmp").unwrap(); + sandlock_sandbox_builder_fs_write(builder, p.as_ptr()) + }; + + let policy = { + let mut err: i32 = 0; + unsafe { sandlock_sandbox_build(builder, &mut err, std::ptr::null_mut()) } + }; + assert!(!policy.is_null(), "policy build failed"); + + let handler = unsafe { + handler::sandlock_handler_new( + Some(force_getpid_to_777), + std::ptr::null_mut(), + None, + handler::sandlock_exception_policy_t::Kill as u32, + ) + }; + assert!(!handler.is_null(), "handler_new returned null"); + let registrations = [sandlock_handler_registration_t { + syscall_nr: libc::SYS_getpid, + handler, + }]; + + let script = CString::new( + "import os, sys; sys.stdout.write(str(os.getpid()))", + ).unwrap(); + // Use the system python3 directly. Running through `/usr/bin/env + // python3` would pick up any venv shim in $PATH whose pyvenv.cfg + // sits outside the sandbox's read allowlist and fail before our + // handler ever gets a chance to fire. + let arg0 = CString::new("/usr/bin/python3").unwrap(); + let arg1 = CString::new("-c").unwrap(); + let argv = [ + arg0.as_ptr(), + arg1.as_ptr(), + script.as_ptr(), + ]; + + let rr = unsafe { + sandlock_run_with_handlers( + policy, + std::ptr::null(), // name: auto-generate `sandbox-{pid}` + argv.as_ptr(), + argv.len() as u32, + registrations.as_ptr(), + registrations.len(), + ) + }; + assert!(!rr.is_null(), "sandlock_run_with_handlers returned null"); + let stdout = unsafe { + let mut len: usize = 0; + let p = sandlock_result_stdout_bytes(rr, &mut len); + if p.is_null() { Vec::new() } else { std::slice::from_raw_parts(p, len).to_vec() } + }; + let stderr = unsafe { + let mut len: usize = 0; + let p = sandlock_result_stderr_bytes(rr, &mut len); + if p.is_null() { Vec::new() } else { std::slice::from_raw_parts(p, len).to_vec() } + }; + let stdout_str = String::from_utf8_lossy(&stdout); + let stderr_str = String::from_utf8_lossy(&stderr); + let exit_code = unsafe { sandlock_result_exit_code(rr) }; + // The child script writes exactly `str(os.getpid())` with + // `sys.stdout.write`, so no trailing newline is expected. Match + // the full stdout — a substring check would silently pass on a + // mutation that broke dispatch when the real pid happened to + // contain "777" (pids 7770-7779, 17770-17779, ...). + assert_eq!(stdout_str.trim_end_matches('\n'), "777", + "expected getpid to be intercepted; exit={} stdout={:?} stderr={:?}", + exit_code, stdout_str, stderr_str); + + unsafe { sandlock_result_free(rr); } + unsafe { sandlock_sandbox_free(policy); } +} + +// --------------------------------------------------------------------------- +// Expanded coverage +// --------------------------------------------------------------------------- +// +// The tests below probe each remaining branch of the handler ABI surface: +// +// * Group A: setters for the inject-fd variants and null-pointer safety. +// * Group B: every `NotifAction` translation the dispatcher must produce. +// * Group C: exception-policy fallbacks beyond the default `DenyEperm`. +// * Group D: panic recovery across the FFI boundary. +// * Group E: `Unset` action when the callback returns 0 but never sets one. +// * Group F: `sandlock_handler_new` edge cases (null fn / null ud + dropper). +// * Group G: `sandlock_run_with_handlers` failure paths and ownership. +// * Group H: multiple handlers each firing for their own syscall. +// * Group I: live-fd `sandlock_mem_read_cstr` via an intercepted `openat`. +// +// Style mirrors the existing end-to-end test: explicit `extern "C-unwind"` +// handler fns, no helper macros, `assert!(matches!(...))` for action +// variants. + +use sandlock_ffi::handler::sandlock_action_set_inject_fd_send; + +// ---- Group A: action setters -------------------------------------------- + +#[test] +fn action_inject_fd_send_setter_records_payload() { + let mut a = sandlock_action_out_t::zeroed(); + // O_CLOEXEC is the canonical flag a handler would pass through. + unsafe { sandlock_action_set_inject_fd_send(&mut a, 42, 0o2000000) }; + assert_eq!(a.kind, sandlock_action_kind_t::InjectFdSend as u32); + // Safety: kind == InjectFdSend selects the `inject_send` union arm + // (matches the ABI contract documented on the setter). + assert_eq!(unsafe { a.payload.inject_send.srcfd }, 42); + assert_eq!(unsafe { a.payload.inject_send.newfd_flags }, 0o2000000); +} + +#[test] +fn action_setters_are_null_safe() { + // Safety: each setter documents null as a no-op; this test is the + // executable form of that contract. If any setter dereferences null + // the process aborts and the test reports failure. + unsafe { + sandlock_action_set_continue(std::ptr::null_mut()); + sandlock_action_set_errno(std::ptr::null_mut(), 13); + sandlock_action_set_return_value(std::ptr::null_mut(), -1); + sandlock_action_set_hold(std::ptr::null_mut()); + sandlock_action_set_kill(std::ptr::null_mut(), libc::SIGKILL, 0); + sandlock_action_set_inject_fd_send(std::ptr::null_mut(), 0, 0); + } +} + +// ---- Group B: FfiHandler translation ------------------------------------ +// +// Each variant gets its own explicit `extern "C-unwind"` handler so the +// test retains the line-by-line transparency of the existing tests rather +// than hiding setup behind a macro. + +extern "C-unwind" fn handler_set_continue( + _ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + unsafe { sandlock_ffi::handler::sandlock_action_set_continue(out) }; + 0 +} + +extern "C-unwind" fn handler_set_errno_eacces( + _ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + unsafe { sandlock_ffi::handler::sandlock_action_set_errno(out, 13) }; + 0 +} + +extern "C-unwind" fn handler_set_hold( + _ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + unsafe { sandlock_ffi::handler::sandlock_action_set_hold(out) }; + 0 +} + +extern "C-unwind" fn handler_set_kill_sigterm_1234( + _ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + unsafe { sandlock_ffi::handler::sandlock_action_set_kill(out, libc::SIGTERM, 1234) }; + 0 +} + +extern "C-unwind" fn handler_set_kill_sigkill_zero_pgid( + _ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + unsafe { sandlock_ffi::handler::sandlock_action_set_kill(out, libc::SIGKILL, 0) }; + 0 +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn ffi_handler_translates_continue() { + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(handler_set_continue), + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::Kill as u32, + ) + }; + // Safety: `raw` was just produced by `sandlock_handler_new` and is + // non-null; ownership transfers into the adapter. + let h = unsafe { FfiHandler::from_raw(raw) }; + let action = h.handle(&fake_ctx()).await; + assert!(matches!(action, NotifAction::Continue)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn ffi_handler_translates_errno() { + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(handler_set_errno_eacces), + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::Kill as u32, + ) + }; + // Safety: see `ffi_handler_translates_continue`. + let h = unsafe { FfiHandler::from_raw(raw) }; + let action = h.handle(&fake_ctx()).await; + assert!(matches!(action, NotifAction::Errno(e) if e == 13)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn ffi_handler_translates_hold() { + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(handler_set_hold), + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::Kill as u32, + ) + }; + // Safety: see `ffi_handler_translates_continue`. + let h = unsafe { FfiHandler::from_raw(raw) }; + let action = h.handle(&fake_ctx()).await; + assert!(matches!(action, NotifAction::Hold)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn ffi_handler_translates_kill_with_explicit_pgid() { + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(handler_set_kill_sigterm_1234), + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::Kill as u32, + ) + }; + // Safety: see `ffi_handler_translates_continue`. + let h = unsafe { FfiHandler::from_raw(raw) }; + let action = h.handle(&fake_ctx()).await; + assert!(matches!( + action, + NotifAction::Kill { sig, pgid } if sig == libc::SIGTERM && pgid == 1234 + )); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn ffi_handler_translates_kill_zero_pgid_substitutes_child_pgid() { + // Spawn a child that places itself into a fresh process group via + // `setpgid(0, 0)` in pre_exec. The child therefore becomes its own + // pgid leader: `getpgid(child_pid) == child_pid`, and crucially + // `getpgid(child_pid) != getpgid(0)` (the supervisor's pgid). + // + // The supervisor_pgid guard added in the defense-in-depth pass would + // otherwise refuse the substitution and fall back to the bare pid. + // By breaking the child away into its own group we keep this test + // exercising the happy path: zero pgid in a Kill action is replaced + // with the resolved pgid (here `== child_pid`, but reached through + // the substitution branch — not the supervisor-guard fallback). + use std::os::unix::process::CommandExt; + let mut cmd = std::process::Command::new("sleep"); + cmd.arg("30"); + unsafe { + cmd.pre_exec(|| { + // SAFETY: `setpgid` is async-signal-safe; pid=0 acts on the + // calling process; pgid=0 creates a new group whose leader + // is the calling process. + if libc::setpgid(0, 0) != 0 { + return Err(std::io::Error::last_os_error()); + } + Ok(()) + }); + } + let mut child = cmd.spawn().expect("spawn sleep child"); + let child_pid = child.id() as i32; + let supervisor_pgid = unsafe { libc::getpgid(0) }; + // Poll briefly because pre_exec runs after fork but the parent may + // observe the pgid change asynchronously. + let mut resolved_pgid = unsafe { libc::getpgid(child_pid) }; + for _ in 0..50 { + if resolved_pgid == child_pid { + break; + } + std::thread::sleep(std::time::Duration::from_millis(10)); + resolved_pgid = unsafe { libc::getpgid(child_pid) }; + } + // The child is its own pgid leader; supervisor's pgid is distinct. + assert_eq!( + resolved_pgid, child_pid, + "precondition: pre_exec setpgid(0,0) should leave child as its own pgid leader", + ); + assert_ne!( + resolved_pgid, supervisor_pgid, + "precondition: child's pgid must differ from supervisor's pgid for the substitution branch to fire", + ); + let expected_pgid = child_pid; + + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(handler_set_kill_sigkill_zero_pgid), + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::Kill as u32, + ) + }; + // Safety: see `ffi_handler_translates_continue`. + let h = unsafe { FfiHandler::from_raw(raw) }; + let cx = HandlerCtx { + notif: SeccompNotif { + id: 1, + pid: child_pid as u32, + flags: 0, + data: SeccompData { + nr: 39, + arch: 0xC000_003E, + instruction_pointer: 0, + args: [0; 6], + }, + }, + notif_fd: -1, + }; + let action = h.handle(&cx).await; + + // Reap the child regardless of assertion outcome. + let _ = child.kill(); + let _ = child.wait(); + + assert!( + matches!( + action, + NotifAction::Kill { sig, pgid } + if sig == libc::SIGKILL && pgid == expected_pgid + ), + "expected Kill {{ sig: SIGKILL, pgid: {expected_pgid} }}, got {action:?}", + ); +} + +// ---- Group K: defense-in-depth guards for child pgid resolution ---------- +// +// These tests verify the three guard rails in `FfiHandler::handle` that +// protect against the supervisor-suicide vector when resolving the +// fallback pgid for `Kill { pgid: 0 }` actions. + +fn fake_ctx_with_pid(pid: u32) -> HandlerCtx { + HandlerCtx { + notif: SeccompNotif { + id: 1, + pid, + flags: 0, + data: SeccompData { + nr: 39, + arch: 0xC000_003E, + instruction_pointer: 0, + args: [0; 6], + }, + }, + notif_fd: -1, + } +} + +extern "C-unwind" fn k_handler_set_kill_sigkill_zero_pgid( + _ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + unsafe { sandlock_ffi::handler::sandlock_action_set_kill(out, libc::SIGKILL, 0) }; + 0 +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn k1_pgid_resolution_rejects_pid_zero() { + // notif.pid == 0 can occur in nested PID namespaces (Kubernetes + // pod-in-pod, KubeVirt, DinD). The earlier resolution fell back to + // the bare pid (`0`) here, and `translate_action`'s `Kill` arm then + // produced `Kill { pgid: 0 }`. POSIX `killpg(0, sig)` is "signal + // the caller's process group" — supervisor suicide, the very + // vector this resolution exists to close. + // + // The new resolution flags this case via `UNSAFE_PGID`. + // `translate_action`'s `Kill` arm refuses substitution and returns + // `None`, which routes the dispatcher onto the configured + // exception policy (here `Continue`). + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(k_handler_set_kill_sigkill_zero_pgid), + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::Continue as u32, + ) + }; + let handler = unsafe { FfiHandler::from_raw(raw) }; + let cx = fake_ctx_with_pid(0); + let action = handler.handle(&cx).await; + assert!( + matches!(action, NotifAction::Continue), + "expected exception-policy fallback (Continue) when no safe pgid available, got {action:?}", + ); +} + +// Defence-in-depth: in addition to the unit-level assertion above, +// verify directly that the supervisor's process group is NOT signalled +// when the lethal-pgid path triggers. We register a SIGURG handler on +// the test process (a signal not used by tokio or by the test runtime), +// run a callback that arms a `Kill { sig: SIGURG, pgid: 0 }` action +// through the FFI handler dispatch, and assert the counter never +// increments. If the old behaviour (substitute pgid=0 and dispatch) +// regressed, the supervisor's group would receive SIGURG and the +// assertion would fail. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn k1_no_supervisor_signal_on_pid_zero_kill() { + use std::sync::atomic::{AtomicUsize, Ordering}; + static SIGURG_COUNT: AtomicUsize = AtomicUsize::new(0); + + extern "C" fn sigurg_handler(_: libc::c_int) { + SIGURG_COUNT.fetch_add(1, Ordering::SeqCst); + } + // SAFETY: installing a signal handler is signal-safe; the handler + // itself touches only an AtomicUsize (lock-free, async-signal-safe + // on Linux). + unsafe { + let mut sa: libc::sigaction = std::mem::zeroed(); + sa.sa_sigaction = sigurg_handler as usize; + libc::sigemptyset(&mut sa.sa_mask); + libc::sigaction(libc::SIGURG, &sa, std::ptr::null_mut()); + } + SIGURG_COUNT.store(0, Ordering::SeqCst); + + extern "C-unwind" fn arm_lethal_kill( + _ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + out: *mut sandlock_ffi::handler::sandlock_action_out_t, + ) -> i32 { + unsafe { sandlock_ffi::handler::sandlock_action_set_kill(out, libc::SIGURG, 0) }; + 0 + } + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(arm_lethal_kill), + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::Continue as u32, + ) + }; + let handler = unsafe { FfiHandler::from_raw(raw) }; + let cx = fake_ctx_with_pid(0); // pid=0 -> UNSAFE_PGID + let action = handler.handle(&cx).await; + + // The action must be Continue (exception-policy fallback), NOT a + // Kill that send_response would forward to killpg(0). + assert!( + matches!(action, NotifAction::Continue), + "action must not be Kill when no safe pgid is available; got {action:?}", + ); + + // Give the OS a moment in case SIGURG was actually delivered. + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + assert_eq!( + SIGURG_COUNT.load(Ordering::SeqCst), + 0, + "supervisor's process group must NOT receive the signal", + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn k2_pgid_resolution_rejects_supervisor_pgid_match() { + // Spawn a child WITHOUT pre_exec setpgid, so it inherits the + // supervisor's process group. `getpgid(child_pid) == getpgid(0) == + // supervisor_pgid`. Earlier versions fell back to the bare pid here + // (Kill { pgid: child_pid }), but that left the substitution + // semantics under-defined: `killpg(child_pid)` succeeds only if + // child_pid happens to also be a pgid. With the new resolution + // we flag the case via `UNSAFE_PGID`, and `translate_action`'s + // `Kill` arm refuses substitution — routing the dispatcher onto + // the exception policy (here `Continue`). + let supervisor_pgid = unsafe { libc::getpgid(0) }; + let mut child = std::process::Command::new("sleep") + .arg("30") + .spawn() + .expect("spawn sleep child"); + let child_pid = child.id() as i32; + + // The child inherits the supervisor's pgid by default. Confirm the + // precondition holds; otherwise this test cannot discriminate. + let resolved_pgid = unsafe { libc::getpgid(child_pid) }; + assert_eq!( + resolved_pgid, supervisor_pgid, + "precondition: child should inherit supervisor's pgid; got {resolved_pgid}, supervisor={supervisor_pgid}", + ); + + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(k_handler_set_kill_sigkill_zero_pgid), + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::Continue as u32, + ) + }; + let handler = unsafe { FfiHandler::from_raw(raw) }; + let cx = fake_ctx_with_pid(child_pid as u32); + let action = handler.handle(&cx).await; + + // Reap the child regardless of assertion outcome. + let _ = child.kill(); + let _ = child.wait(); + + assert!( + matches!(action, NotifAction::Continue), + "expected exception-policy fallback (Continue) when child's pgid matches supervisor's (supervisor_pgid={supervisor_pgid}), got {action:?}", + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn k3_pgid_resolution_falls_back_on_esrch() { + // Use a clearly-dead pid that will never exist on this host. + // `getpgid(i32::MAX)` returns -1 with ESRCH on Linux. Earlier + // versions fell back to the bare pid here, producing + // `Kill { pgid: i32::MAX }` — which the kernel would reject with + // ESRCH in the response path, but only after `translate_action` + // had emitted a Kill action. The new resolution flags the case + // via `UNSAFE_PGID`; `translate_action`'s `Kill` arm refuses + // substitution and routes through the exception policy + // (here `Continue`). + let dead_pid: u32 = i32::MAX as u32; + + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(k_handler_set_kill_sigkill_zero_pgid), + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::Continue as u32, + ) + }; + let handler = unsafe { FfiHandler::from_raw(raw) }; + let cx = fake_ctx_with_pid(dead_pid); + let action = handler.handle(&cx).await; + assert!( + matches!(action, NotifAction::Continue), + "expected exception-policy fallback (Continue) on ESRCH, got {action:?}", + ); +} + +// ---- Group C: exception policy fallbacks -------------------------------- + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn ffi_handler_kill_policy_on_callback_rc_nonzero() { + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(returns_error_with_unset_action), + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::Kill as u32, + ) + }; + // Safety: see `ffi_handler_translates_continue`. + let h = unsafe { FfiHandler::from_raw(raw) }; + // Use an isolated child so the resolved child_pgid is not + // UNSAFE_PGID — otherwise the exception policy's Kill arm + // (correctly) degrades to Errno(EPERM) to avoid supervisor + // suicide, and the assertion below would not exercise the + // Kill-path the test exists to cover. + let (cx, mut child) = fake_ctx_with_isolated_child(); + let action = h.handle(&cx).await; + let _ = child.kill(); + let _ = child.wait(); + assert!(matches!(action, NotifAction::Kill { sig, .. } if sig == libc::SIGKILL)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn ffi_handler_continue_policy_on_callback_rc_nonzero() { + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(returns_error_with_unset_action), + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::Continue as u32, + ) + }; + // Safety: see `ffi_handler_translates_continue`. + let h = unsafe { FfiHandler::from_raw(raw) }; + let action = h.handle(&fake_ctx()).await; + assert!(matches!(action, NotifAction::Continue)); +} + +// ---- Group D: panic recovery -------------------------------------------- + +extern "C-unwind" fn panicking_handler( + _ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + _out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + panic!("test panic from extern C handler"); +} + +// `sandlock_handler_fn_t` is `extern "C-unwind" fn`, so a panic raised +// inside the Rust handler unwinds across the C ABI boundary and is +// caught by the `std::panic::catch_unwind` in `FfiHandler::handle`. The +// dispatcher then falls back to the configured exception policy — here +// `Kill` — which the assertion below verifies. Pure-C callers cannot +// panic, so this stability claim is exclusively for Rust handlers +// exposed through the C ABI (the integration-test pattern here). +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn ffi_handler_recovers_from_callback_panic() { + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(panicking_handler), + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::Kill as u32, + ) + }; + // Safety: see `ffi_handler_translates_continue`. + let h = unsafe { FfiHandler::from_raw(raw) }; + // Use an isolated child so the Kill exception policy is observable + // (rationale identical to `ffi_handler_kill_policy_on_callback_rc_nonzero`). + let (cx, mut child) = fake_ctx_with_isolated_child(); + let action = h.handle(&cx).await; + let _ = child.kill(); + let _ = child.wait(); + // The `catch_unwind` inside `spawn_blocking` swallows the panic and + // the dispatcher falls back to the configured exception policy. + assert!(matches!(action, NotifAction::Kill { sig, .. } if sig == libc::SIGKILL)); +} + +// ---- Group E: Unset action with zero rc --------------------------------- + +extern "C-unwind" fn never_sets_action( + _ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + _out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + 0 +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn ffi_handler_callback_returns_zero_but_never_sets_action_triggers_fallback() { + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(never_sets_action), + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::DenyEperm as u32, + ) + }; + // Safety: see `ffi_handler_translates_continue`. + let h = unsafe { FfiHandler::from_raw(raw) }; + let action = h.handle(&fake_ctx()).await; + // `translate_action` returns `None` for `Unset`, which routes the + // dispatcher onto the exception policy fallback. + assert!(matches!(action, NotifAction::Errno(e) if e == libc::EPERM)); +} + +// ---- Group F: handler_new edge cases ------------------------------------ + +static NULL_UD_DROP_CALLS: std::sync::atomic::AtomicUsize = + std::sync::atomic::AtomicUsize::new(0); + +extern "C-unwind" fn counting_null_ud_dropper(ud: *mut std::ffi::c_void) { + // Sanity: confirm the dropper sees the null ud we passed in. + assert!(ud.is_null(), "dropper invoked with non-null ud unexpectedly"); + NULL_UD_DROP_CALLS.fetch_add(1, std::sync::atomic::Ordering::SeqCst); +} + +#[test] +fn handler_new_with_null_handler_fn_returns_null() { + let h = unsafe { + sandlock_handler_new( + None, + std::ptr::null_mut(), + None, + sandlock_exception_policy_t::Kill as u32, + ) + }; + assert!(h.is_null(), "expected null handle when handler_fn is None"); +} + +#[test] +fn handler_new_with_null_ud_still_invokes_dropper() { + // C header guarantees ud_drop fires exactly once on free, regardless + // of whether ud is null. C-side droppers can mirror free(NULL) + // semantics themselves; the Rust container does not gate on ud. + + NULL_UD_DROP_CALLS.store(0, std::sync::atomic::Ordering::SeqCst); + let h = unsafe { + sandlock_handler_new( + Some(test_handler as sandlock_handler_fn_t), + std::ptr::null_mut(), // <-- null ud + Some(counting_null_ud_dropper), + sandlock_exception_policy_t::Kill as u32, + ) + }; + assert!(!h.is_null()); + assert_eq!( + NULL_UD_DROP_CALLS.load(std::sync::atomic::Ordering::SeqCst), + 0, + "dropper must not fire before sandlock_handler_free", + ); + unsafe { sandlock_handler_free(h) }; + assert_eq!( + NULL_UD_DROP_CALLS.load(std::sync::atomic::Ordering::SeqCst), + 1, + "dropper must fire exactly once during Drop", + ); +} + +// ---- Group G: run_with_handlers failure paths --------------------------- + +#[test] +fn run_with_handlers_null_policy_returns_null() { + let arg0 = CString::new("/bin/true").unwrap(); + let argv = [arg0.as_ptr()]; + let rr = unsafe { + sandlock_run_with_handlers( + std::ptr::null(), + std::ptr::null(), // name: auto-generate `sandbox-{pid}` + argv.as_ptr(), + argv.len() as u32, + std::ptr::null(), + 0, + ) + }; + assert!(rr.is_null(), "expected null result for null policy"); +} + +#[test] +fn run_with_handlers_null_argv_returns_null() { + use sandlock_ffi::*; + let builder = sandlock_sandbox_builder_new(); + let policy = { + let mut err: i32 = 0; + unsafe { sandlock_sandbox_build(builder, &mut err, std::ptr::null_mut()) } + }; + assert!(!policy.is_null(), "policy build failed"); + + let rr = unsafe { + sandlock_run_with_handlers( + policy, + std::ptr::null(), // name: auto-generate `sandbox-{pid}` + std::ptr::null(), + 3, // argc > 0 with null argv must fail validation + std::ptr::null(), + 0, + ) + }; + assert!(rr.is_null(), "expected null result for null argv with argc > 0"); + + unsafe { sandlock_sandbox_free(policy); } +} + +#[test] +fn run_with_handlers_zero_argc_returns_null() { + // argc == 0 means "no command to execute" — the sandbox cannot + // exec an empty argv, so the FFI must reject it at the boundary + // before consuming handler containers. + use sandlock_ffi::*; + let builder = sandlock_sandbox_builder_new(); + let policy = { + let mut err: i32 = 0; + unsafe { sandlock_sandbox_build(builder, &mut err, std::ptr::null_mut()) } + }; + assert!(!policy.is_null(), "policy build failed"); + + let arg0 = CString::new("/bin/true").unwrap(); + let argv = [arg0.as_ptr()]; + let rr = unsafe { + sandlock_run_with_handlers( + policy, + std::ptr::null(), // name: auto-generate `sandbox-{pid}` + argv.as_ptr(), + 0, // zero argc must reject + std::ptr::null(), + 0, + ) + }; + assert!(rr.is_null(), "expected null result for argc == 0"); + + unsafe { sandlock_sandbox_free(policy); } +} + +#[test] +fn run_with_handlers_null_registrations_with_nonzero_count_returns_null() { + use sandlock_ffi::*; + let builder = sandlock_sandbox_builder_new(); + let policy = { + let mut err: i32 = 0; + unsafe { sandlock_sandbox_build(builder, &mut err, std::ptr::null_mut()) } + }; + assert!(!policy.is_null(), "policy build failed"); + + let arg0 = CString::new("/bin/true").unwrap(); + let argv = [arg0.as_ptr()]; + let rr = unsafe { + sandlock_run_with_handlers( + policy, + std::ptr::null(), // name: auto-generate `sandbox-{pid}` + argv.as_ptr(), + argv.len() as u32, + std::ptr::null(), // null registrations with nregistrations > 0 + 1, + ) + }; + assert!(rr.is_null(), "expected null result for null registrations + count > 0"); + + unsafe { sandlock_sandbox_free(policy); } +} + +#[test] +fn run_with_handlers_rejects_oversize_argc() { + // Defence-in-depth: `argc` is a `u32` from C, so a malicious or + // buggy caller could pass e.g. `u32::MAX` with a small backing + // array. Without an upper bound, `argv_from_c` would dereference + // four billion pointer slots before returning. We cap at 4096 + // (vastly larger than any plausible argv) and reject anything + // above. + use sandlock_ffi::*; + let builder = sandlock_sandbox_builder_new(); + let policy = { + let mut err: i32 = 0; + unsafe { sandlock_sandbox_build(builder, &mut err, std::ptr::null_mut()) } + }; + assert!(!policy.is_null(), "policy build failed"); + + let arg0 = CString::new("/bin/true").unwrap(); + // Backing argv has only one real entry; we lie about argc to + // exercise the bound check. The FFI must reject before reading + // past the first slot. + let argv = [arg0.as_ptr()]; + let rr = unsafe { + sandlock_run_with_handlers( + policy, + std::ptr::null(), + argv.as_ptr(), + 5000, // > MAX_ARGV (4096) + std::ptr::null(), + 0, + ) + }; + assert!(rr.is_null(), "expected null result for argc > MAX_ARGV"); + + unsafe { sandlock_sandbox_free(policy); } +} + +#[test] +fn run_with_handlers_rejects_oversize_nregistrations() { + // Mirror of `..._oversize_argc` for the registration count. + // A `nregistrations = usize::MAX` with a small backing array + // would hand `slice::from_raw_parts` a length larger than the + // allocation — UB. The FFI must refuse before that point. + use sandlock_ffi::*; + let builder = sandlock_sandbox_builder_new(); + let policy = { + let mut err: i32 = 0; + unsafe { sandlock_sandbox_build(builder, &mut err, std::ptr::null_mut()) } + }; + assert!(!policy.is_null(), "policy build failed"); + + let arg0 = CString::new("/bin/true").unwrap(); + let argv = [arg0.as_ptr()]; + // Single real registration slot; we lie about the count. + // `handler` is null so even if the bound check were bypassed the + // validation pass would still fail — that is fine because the + // bound check must trip first (a missing check would have us + // walk 5000 invalid slots before noticing). + let regs = [sandlock_handler_registration_t { + syscall_nr: libc::SYS_getpid, + handler: std::ptr::null_mut(), + }]; + let rr = unsafe { + sandlock_run_with_handlers( + policy, + std::ptr::null(), + argv.as_ptr(), + argv.len() as u32, + regs.as_ptr(), + 5000, // > MAX_REGISTRATIONS (4096) + ) + }; + assert!(rr.is_null(), "expected null result for nregistrations > MAX_REGISTRATIONS"); + + unsafe { sandlock_sandbox_free(policy); } +} + +#[test] +fn run_with_handlers_empty_registrations_runs_normally() { + use sandlock_ffi::*; + + let builder = sandlock_sandbox_builder_new(); + // Same allowlist as the existing end-to-end test — /bin/true links + // against libc and ld.so so it still needs /lib + /lib64 + /usr. + let builder = unsafe { + let p = CString::new("/usr").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/bin").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/lib").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/lib64").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/etc").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/tmp").unwrap(); + sandlock_sandbox_builder_fs_write(builder, p.as_ptr()) + }; + + let policy = { + let mut err: i32 = 0; + unsafe { sandlock_sandbox_build(builder, &mut err, std::ptr::null_mut()) } + }; + assert!(!policy.is_null(), "policy build failed"); + + let arg0 = CString::new("/bin/true").unwrap(); + let argv = [arg0.as_ptr()]; + + let rr = unsafe { + sandlock_run_with_handlers( + policy, + std::ptr::null(), // name: auto-generate `sandbox-{pid}` + argv.as_ptr(), + argv.len() as u32, + std::ptr::null(), + 0, + ) + }; + assert!(!rr.is_null(), "empty registrations should still run /bin/true"); + let success = unsafe { sandlock_result_success(rr) }; + let exit_code = unsafe { sandlock_result_exit_code(rr) }; + assert!(success, "/bin/true should exit successfully; exit={}", exit_code); + + unsafe { sandlock_result_free(rr); } + unsafe { sandlock_sandbox_free(policy); } +} + +static ONE_SHOT_DROPPER_CALLS: std::sync::atomic::AtomicUsize = + std::sync::atomic::AtomicUsize::new(0); + +extern "C-unwind" fn one_shot_dropper(ud: *mut std::ffi::c_void) { + ONE_SHOT_DROPPER_CALLS.fetch_add(1, std::sync::atomic::Ordering::SeqCst); + if !ud.is_null() { + // Reclaim the leaked Box so leak-sanitizer builds stay clean. + unsafe { drop(Box::from_raw(ud as *mut u32)); } + } +} + +#[test] +fn run_with_handlers_null_handler_in_array_returns_null() { + use sandlock_ffi::*; + + let builder = sandlock_sandbox_builder_new(); + let policy = { + let mut err: i32 = 0; + unsafe { sandlock_sandbox_build(builder, &mut err, std::ptr::null_mut()) } + }; + assert!(!policy.is_null(), "policy build failed"); + + // The supervisor owns and frees the valid handler even when the + // call rejects the array because of a null entry. We assert this + // by registering `one_shot_dropper` and verifying it fires + // exactly once — from the supervisor's `release_registrations`, + // not from a manual `sandlock_handler_free` (which would now be + // a double-free per the always-consume contract documented in + // sandlock.h). + ONE_SHOT_DROPPER_CALLS.store(0, std::sync::atomic::Ordering::SeqCst); + let ud = Box::into_raw(Box::new(0xAAu32)) as *mut std::ffi::c_void; + let valid = unsafe { + sandlock_handler_new( + Some(test_handler as sandlock_handler_fn_t), + ud, + Some(one_shot_dropper), + sandlock_exception_policy_t::Kill as u32, + ) + }; + assert!(!valid.is_null()); + + let regs = [ + sandlock_handler_registration_t { + syscall_nr: libc::SYS_getpid, + handler: valid, + }, + sandlock_handler_registration_t { + syscall_nr: libc::SYS_getppid, + handler: std::ptr::null_mut(), // forces validation failure + }, + ]; + + let arg0 = CString::new("/bin/true").unwrap(); + let argv = [arg0.as_ptr()]; + + let rr = unsafe { + sandlock_run_with_handlers( + policy, + std::ptr::null(), // name: auto-generate `sandbox-{pid}` + argv.as_ptr(), + argv.len() as u32, + regs.as_ptr(), + regs.len(), + ) + }; + assert!(rr.is_null(), "expected null result when an array entry is null"); + assert_eq!( + ONE_SHOT_DROPPER_CALLS.load(std::sync::atomic::Ordering::SeqCst), + 1, + "dropper must fire exactly once (from the supervisor's release_registrations)", + ); + + unsafe { sandlock_sandbox_free(policy); } +} + +// ---- Group H: multiple handlers ----------------------------------------- + +extern "C-unwind" fn force_getpid_to_111( + _ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + unsafe { sandlock_ffi::handler::sandlock_action_set_return_value(out, 111) }; + 0 +} + +extern "C-unwind" fn force_getppid_to_222( + _ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + unsafe { sandlock_ffi::handler::sandlock_action_set_return_value(out, 222) }; + 0 +} + +#[test] +fn run_with_handlers_two_handlers_each_fires_for_own_syscall() { + use sandlock_ffi::*; + + let builder = sandlock_sandbox_builder_new(); + let builder = unsafe { + let p = CString::new("/usr").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/bin").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/lib").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/lib64").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/etc").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/tmp").unwrap(); + sandlock_sandbox_builder_fs_write(builder, p.as_ptr()) + }; + + let policy = { + let mut err: i32 = 0; + unsafe { sandlock_sandbox_build(builder, &mut err, std::ptr::null_mut()) } + }; + assert!(!policy.is_null(), "policy build failed"); + + let h_pid = unsafe { + handler::sandlock_handler_new( + Some(force_getpid_to_111), + std::ptr::null_mut(), + None, + handler::sandlock_exception_policy_t::Kill as u32, + ) + }; + let h_ppid = unsafe { + handler::sandlock_handler_new( + Some(force_getppid_to_222), + std::ptr::null_mut(), + None, + handler::sandlock_exception_policy_t::Kill as u32, + ) + }; + assert!(!h_pid.is_null() && !h_ppid.is_null()); + + let registrations = [ + sandlock_handler_registration_t { + syscall_nr: libc::SYS_getpid, + handler: h_pid, + }, + sandlock_handler_registration_t { + syscall_nr: libc::SYS_getppid, + handler: h_ppid, + }, + ]; + + let script = CString::new( + "import os, sys; sys.stdout.write(str(os.getpid())+'|'+str(os.getppid()))", + ).unwrap(); + let arg0 = CString::new("/usr/bin/python3").unwrap(); + let arg1 = CString::new("-c").unwrap(); + let argv = [ + arg0.as_ptr(), + arg1.as_ptr(), + script.as_ptr(), + ]; + + let rr = unsafe { + sandlock_run_with_handlers( + policy, + std::ptr::null(), // name: auto-generate `sandbox-{pid}` + argv.as_ptr(), + argv.len() as u32, + registrations.as_ptr(), + registrations.len(), + ) + }; + assert!(!rr.is_null(), "sandlock_run_with_handlers returned null"); + let stdout = unsafe { + let mut len: usize = 0; + let p = sandlock_result_stdout_bytes(rr, &mut len); + if p.is_null() { Vec::new() } else { std::slice::from_raw_parts(p, len).to_vec() } + }; + let stderr = unsafe { + let mut len: usize = 0; + let p = sandlock_result_stderr_bytes(rr, &mut len); + if p.is_null() { Vec::new() } else { std::slice::from_raw_parts(p, len).to_vec() } + }; + let stdout_str = String::from_utf8_lossy(&stdout); + let stderr_str = String::from_utf8_lossy(&stderr); + let exit_code = unsafe { sandlock_result_exit_code(rr) }; + // The child writes exactly `getpid|getppid` with `sys.stdout.write` + // — no trailing newline. Exact-match catches mutations where one + // handler silently fails but the real pid/ppid still contains the + // sentinel substring. + assert_eq!( + stdout_str.trim_end_matches('\n'), + "111|222", + "expected both handlers to fire; exit={} stdout={:?} stderr={:?}", + exit_code, stdout_str, stderr_str, + ); + + unsafe { sandlock_result_free(rr); } + unsafe { sandlock_sandbox_free(policy); } +} + +// ---- Group I: live-fd mem_read_cstr ------------------------------------- + +extern "C-unwind" fn deny_magic_marker_path( + _ud: *mut std::ffi::c_void, + notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + // openat(dirfd, pathname, flags, ...) — pathname is args[1]. + // Safety: `notif` and `mem` are valid pointers supplied by the + // dispatcher for the duration of this callback; `out` is the + // caller-allocated action-out buffer. + let addr = unsafe { (*notif).args[1] }; + let mut buf = [0u8; 256]; + let mut n: usize = 0; + let rc = unsafe { + sandlock_ffi::handler::sandlock_mem_read_cstr( + mem, addr, buf.as_mut_ptr(), buf.len(), &mut n, + ) + }; + if rc != 0 { + // Read failed — fall back to letting the syscall through so the + // test runner sees a clean ENOENT rather than a fabricated EACCES. + unsafe { sandlock_ffi::handler::sandlock_action_set_continue(out) }; + return 0; + } + let path = std::str::from_utf8(&buf[..n]).unwrap_or(""); + if path == "/sandlock-test-magic-marker" { + unsafe { sandlock_ffi::handler::sandlock_action_set_errno(out, libc::EACCES) }; + } else { + unsafe { sandlock_ffi::handler::sandlock_action_set_continue(out) }; + } + 0 +} + +#[test] +fn mem_read_cstr_reads_path_from_intercepted_openat() { + use sandlock_ffi::*; + + let builder = sandlock_sandbox_builder_new(); + let builder = unsafe { + let p = CString::new("/usr").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/bin").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/lib").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/lib64").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/etc").unwrap(); + sandlock_sandbox_builder_fs_read(builder, p.as_ptr()) + }; + let builder = unsafe { + let p = CString::new("/tmp").unwrap(); + sandlock_sandbox_builder_fs_write(builder, p.as_ptr()) + }; + + let policy = { + let mut err: i32 = 0; + unsafe { sandlock_sandbox_build(builder, &mut err, std::ptr::null_mut()) } + }; + assert!(!policy.is_null(), "policy build failed"); + + let handler = unsafe { + handler::sandlock_handler_new( + Some(deny_magic_marker_path), + std::ptr::null_mut(), + None, + handler::sandlock_exception_policy_t::Kill as u32, + ) + }; + assert!(!handler.is_null()); + let registrations = [sandlock_handler_registration_t { + syscall_nr: libc::SYS_openat, + handler, + }]; + + // Child opens the magic path and prints the errno on failure. + let script = CString::new( + "import os, sys\n\ + try:\n\ + \x20 os.open('/sandlock-test-magic-marker', os.O_RDONLY)\n\ + \x20 sys.exit(0)\n\ + except OSError as e:\n\ + \x20 sys.stderr.write('errno=' + str(e.errno) + '\\n')\n\ + \x20 sys.exit(1)\n", + ).unwrap(); + let arg0 = CString::new("/usr/bin/python3").unwrap(); + let arg1 = CString::new("-c").unwrap(); + let argv = [ + arg0.as_ptr(), + arg1.as_ptr(), + script.as_ptr(), + ]; + + let rr = unsafe { + sandlock_run_with_handlers( + policy, + std::ptr::null(), // name: auto-generate `sandbox-{pid}` + argv.as_ptr(), + argv.len() as u32, + registrations.as_ptr(), + registrations.len(), + ) + }; + assert!(!rr.is_null(), "sandlock_run_with_handlers returned null"); + let stderr = unsafe { + let mut len: usize = 0; + let p = sandlock_result_stderr_bytes(rr, &mut len); + if p.is_null() { Vec::new() } else { std::slice::from_raw_parts(p, len).to_vec() } + }; + let stdout = unsafe { + let mut len: usize = 0; + let p = sandlock_result_stdout_bytes(rr, &mut len); + if p.is_null() { Vec::new() } else { std::slice::from_raw_parts(p, len).to_vec() } + }; + let stderr_str = String::from_utf8_lossy(&stderr); + let stdout_str = String::from_utf8_lossy(&stdout); + let exit_code = unsafe { sandlock_result_exit_code(rr) }; + // EACCES is 13; if the path-read worked the child saw errno=13. If a + // different errno appears the handler ran but `mem_read_cstr` failed + // and we fell through — fail with a diagnostic message rather than + // silently masking. + assert!( + stderr_str.contains("errno=13"), + "expected handler to inject EACCES via mem_read_cstr; \ + exit={} stdout={:?} stderr={:?}", + exit_code, stdout_str, stderr_str, + ); + + unsafe { sandlock_result_free(rr); } + unsafe { sandlock_sandbox_free(policy); } +} + +// --------------------------------------------------------------------------- +// Ownership regression tests (A1, A2, A3, A5) +// --------------------------------------------------------------------------- +// +// These exercise the four ownership/leak gaps that adversarial review +// surfaced after the initial handler ABI landed: +// +// * A1: a callback that arms `InjectFdSend` then panics or returns +// non-zero must NOT leak the supervisor-side srcfd. +// * A2: a callback that writes the `InjectFdSendTracked` discriminant +// by hand (no setter is exposed but the value is public in the C +// header) must NOT leak the supervisor-side srcfd. +// * A3: `sandlock_run_with_handlers` early-return paths (null policy, +// invalid argv, invalid name) must still consume the registered +// handler containers — the documented contract is "ownership +// transfers on entry, regardless of return value". +// * A5: `sandlock_handler_free` was `extern "C"`, so a panicking +// `ud_drop` would abort. Switched to `extern "C-unwind"`; verify a +// panic propagates back instead of aborting the process. + +// A small pipe helper used by the inject-fd drain tests below. Returns +// `(read_end, write_end)`. The write end is what the handler hands to +// the supervisor as the "inject" srcfd; the read end stays in this +// test and observes EOF once the drain path closes the write end. +fn make_pipe() -> (i32, i32) { + // Use `pipe2` with `O_CLOEXEC` so concurrent tests that spawn + // children (via std::process::Command, including + // `fake_ctx_with_isolated_child`) do not inherit a copy of the + // write end. Without this, an inherited duplicate keeps the read + // end from observing EOF even after the supervisor's drain path + // closes its own copy — the EOF-drain assertion would then hang + // on EAGAIN instead of returning 0. + // + // SAFETY: `libc::pipe2` writes exactly two fds into the array on + // success and returns 0; we assert success below. + let mut fds = [0i32; 2]; + let rc = unsafe { libc::pipe2(fds.as_mut_ptr(), libc::O_CLOEXEC) }; + assert_eq!(rc, 0, "pipe2() failed: errno={}", std::io::Error::last_os_error()); + (fds[0], fds[1]) +} + +// Reads up to one byte from `fd` with `O_NONBLOCK` set first. Returns +// the value `libc::read` returned (>=0 byte count, or -1 on error; +// `errno` is preserved in that case so the caller can distinguish EOF +// from EAGAIN). +fn read_eof_or_eagain(fd: i32) -> isize { + // SAFETY: `F_SETFL` with `O_NONBLOCK` is a simple flag set; `read` + // reads at most one byte into the on-stack buffer. + unsafe { + libc::fcntl(fd, libc::F_SETFL, libc::O_NONBLOCK); + let mut buf = [0u8; 1]; + libc::read(fd, buf.as_mut_ptr() as *mut std::ffi::c_void, 1) + } +} + +extern "C-unwind" fn arm_inject_fd_then_panic( + ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + // The test stashes the write-end fd in a heap-allocated i32 and + // passes its pointer as `ud`. Read the fd, arm the inject action, + // then panic — the dispatcher must still drain the fd. + // SAFETY: `ud` points to a live `i32` for the duration of this call + // (owned by the test). + let fd = unsafe { *(ud as *const i32) }; + unsafe { sandlock_ffi::handler::sandlock_action_set_inject_fd_send(out, fd, 0) }; + panic!("test panic after arming InjectFdSend"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn a1_ffi_handler_drains_inject_fd_on_panic() { + // Bug A1 regression hook: a C handler that calls + // `sandlock_action_set_inject_fd_send` and then panics used to leak + // the supervisor-side srcfd. After the fix, the dispatcher's + // catch-unwind path drains the pending payload, closing the fd. + // + // The exception policy below is `Kill`. With `fake_ctx()` (test + // process's own pid), the pgid resolution sees + // `pgid == supervisor_pgid` and yields `UNSAFE_PGID`. The Kill + // exception arm then degrades to `Errno(EPERM)` (D-new-1: avoid + // supervisor suicide via killpg(0)). The drain assertion below is + // the load-bearing one for this regression hook — the exception + // action just demonstrates that the dispatcher routed onto the + // policy fallback at all. + let (read_fd, write_fd) = make_pipe(); + // Heap-allocated so the pointer stays valid across spawn_blocking. + let fd_holder: Box = Box::new(write_fd); + let fd_ptr = Box::into_raw(fd_holder) as *mut std::ffi::c_void; + + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(arm_inject_fd_then_panic), + fd_ptr, + None, + sandlock_exception_policy_t::Kill as u32, + ) + }; + // SAFETY: `raw` was just produced and is non-null. + let h = unsafe { FfiHandler::from_raw(raw) }; + let action = h.handle(&fake_ctx()).await; + assert!( + matches!(action, NotifAction::Errno(e) if e == libc::EPERM), + "panic must route to the exception-policy fallback (Kill degraded to EPERM under UNSAFE_PGID), got {action:?}", + ); + + // After `handle` returns, the drain path should have closed + // `write_fd`. Reading from `read_fd` (with O_NONBLOCK) returns 0 + // (EOF). If the leak were still present, the write end would + // remain open and `read` would return -1/EAGAIN. + let n = read_eof_or_eagain(read_fd); + let errno = std::io::Error::last_os_error(); + assert_eq!( + n, 0, + "expected EOF on read end (write end closed by drain); got n={n} errno={errno}", + ); + + // Reclaim the heap allocation for the fd holder so the test is + // leak-clean. `write_fd` itself is owned by the drain path; do NOT + // close it here. + // SAFETY: `fd_ptr` came from `Box::into_raw` on a `Box`. + unsafe { drop(Box::from_raw(fd_ptr as *mut i32)); } + // SAFETY: `read_fd` is still open; close it. + unsafe { libc::close(read_fd); } +} + +extern "C-unwind" fn arm_inject_fd_send_tracked_discriminant( + ud: *mut std::ffi::c_void, + _notif: *const sandlock_ffi::notif_repr::sandlock_notif_data_t, + _mem: *mut sandlock_ffi::handler::sandlock_mem_handle_t, + out: *mut sandlock_ffi::handler::sandlock_action_out_t, +) -> i32 { + // Write the InjectFdSendTracked discriminant by hand. The setter + // is not exposed in this release, but the discriminant value is + // public in the C header, so a C caller could do exactly this. + // SAFETY: `ud` and `out` are valid for the duration of this call. + let fd = unsafe { *(ud as *const i32) }; + unsafe { + (*out).kind = sandlock_ffi::handler::sandlock_action_kind_t::InjectFdSendTracked as u32; + (*out).payload.inject_send_tracked = + sandlock_ffi::handler::sandlock_action_inject_tracked_t { + srcfd: fd, + newfd_flags: 0, + tracker: 0, + }; + } + 0 +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn a2_ffi_handler_drains_inject_fd_tracked_discriminant() { + // Bug A2 regression hook: a C handler that writes the + // `InjectFdSendTracked` discriminant directly used to leak the + // srcfd because `translate_action`'s `K::InjectFdSendTracked` arm + // returned None and dropped the value without reclaiming the fd. + // + // See `a1_ffi_handler_drains_inject_fd_on_panic` for why the + // exception action below is `Errno(EPERM)` rather than `Kill`. + let (read_fd, write_fd) = make_pipe(); + let fd_holder: Box = Box::new(write_fd); + let fd_ptr = Box::into_raw(fd_holder) as *mut std::ffi::c_void; + + let raw = unsafe { + sandlock_ffi::handler::sandlock_handler_new( + Some(arm_inject_fd_send_tracked_discriminant), + fd_ptr, + None, + sandlock_exception_policy_t::Kill as u32, + ) + }; + // SAFETY: `raw` was just produced and is non-null. + let h = unsafe { FfiHandler::from_raw(raw) }; + let action = h.handle(&fake_ctx()).await; + assert!( + matches!(action, NotifAction::Errno(e) if e == libc::EPERM), + "unsupported tracked discriminant must route to the exception-policy fallback (Kill degraded to EPERM under UNSAFE_PGID), got {action:?}", + ); + + let n = read_eof_or_eagain(read_fd); + let errno = std::io::Error::last_os_error(); + assert_eq!( + n, 0, + "expected EOF on read end (write end closed by drain); got n={n} errno={errno}", + ); + + // SAFETY: `fd_ptr` came from `Box::into_raw` on a `Box`. + unsafe { drop(Box::from_raw(fd_ptr as *mut i32)); } + // SAFETY: `read_fd` is still open; close it. + unsafe { libc::close(read_fd); } +} + +static A3_UD_DROPPER_CALLS: std::sync::atomic::AtomicUsize = + std::sync::atomic::AtomicUsize::new(0); + +extern "C-unwind" fn a3_counter_dropper(_ud: *mut std::ffi::c_void) { + A3_UD_DROPPER_CALLS.fetch_add(1, std::sync::atomic::Ordering::SeqCst); +} + +#[test] +fn a3_run_with_handlers_releases_registrations_on_null_policy() { + // Bug A3 regression hook: the null-policy early-return path used to + // abandon the registration array. After the fix, the supervisor + // consumes every non-null handler pointer on entry, regardless of + // return value. + A3_UD_DROPPER_CALLS.store(0, std::sync::atomic::Ordering::SeqCst); + // Non-null ud — the dropper itself ignores the value, so any + // non-null bit pattern works. (Null ud would also fire the + // dropper per the C header contract; we just pick a non-null + // sentinel here for clarity of intent.) + let h = unsafe { + sandlock_handler_new( + Some(test_handler as sandlock_handler_fn_t), + 0xFEED_FACEusize as *mut std::ffi::c_void, + Some(a3_counter_dropper), + sandlock_exception_policy_t::Kill as u32, + ) + }; + assert!(!h.is_null(), "handler_new must produce a valid container"); + let regs = [sandlock_handler_registration_t { + syscall_nr: libc::SYS_getpid, + handler: h, + }]; + let rr = unsafe { + sandlock_run_with_handlers( + std::ptr::null(), // null policy triggers the early-return path + std::ptr::null(), // name + std::ptr::null(), // argv + 0, // argc + regs.as_ptr(), + regs.len(), + ) + }; + assert!(rr.is_null(), "expected null result for null policy"); + assert_eq!( + A3_UD_DROPPER_CALLS.load(std::sync::atomic::Ordering::SeqCst), + 1, + "ud_drop must fire on the early-return path (handler consumed by supervisor)", + ); +} + +extern "C-unwind" fn a5_panicking_dropper(_ud: *mut std::ffi::c_void) { + panic!("test panic from dropper"); +} + +static C_NEW_1_DROPPER_A: std::sync::atomic::AtomicUsize = + std::sync::atomic::AtomicUsize::new(0); +static C_NEW_1_DROPPER_B: std::sync::atomic::AtomicUsize = + std::sync::atomic::AtomicUsize::new(0); + +extern "C-unwind" fn c_new_1_dropper_a(_ud: *mut std::ffi::c_void) { + C_NEW_1_DROPPER_A.fetch_add(1, std::sync::atomic::Ordering::SeqCst); + panic!("c_new_1 dropper_a panic"); +} + +extern "C-unwind" fn c_new_1_dropper_b(_ud: *mut std::ffi::c_void) { + C_NEW_1_DROPPER_B.fetch_add(1, std::sync::atomic::Ordering::SeqCst); +} + +#[test] +fn release_registrations_continues_after_mid_loop_panic() { + // Bug C-new-1 regression hook: `release_registrations` used to + // drop each container in a bare loop. A mid-loop panic from a + // user-supplied `ud_drop` would unwind past the remaining slots, + // leaving handler containers leaked (partial-consume — violates + // the "array consumed as a whole" C-ABI contract). After the fix, + // each drop runs inside `catch_unwind`, the first panic is + // captured, the loop completes, and the panic is then re-raised + // through the `extern "C-unwind"` entry point. + C_NEW_1_DROPPER_A.store(0, std::sync::atomic::Ordering::SeqCst); + C_NEW_1_DROPPER_B.store(0, std::sync::atomic::Ordering::SeqCst); + + let h1 = unsafe { + sandlock_handler_new( + Some(test_handler as sandlock_handler_fn_t), + // Non-null ud sentinel; the dropper does not read the + // pointer. Null ud would also fire the dropper per the + // C header contract. + 0xDEAD_BEEFusize as *mut std::ffi::c_void, + Some(c_new_1_dropper_a), + sandlock_exception_policy_t::Kill as u32, + ) + }; + let h2 = unsafe { + sandlock_handler_new( + Some(test_handler as sandlock_handler_fn_t), + 0xCAFE_F00Dusize as *mut std::ffi::c_void, + Some(c_new_1_dropper_b), + sandlock_exception_policy_t::Kill as u32, + ) + }; + assert!(!h1.is_null() && !h2.is_null(), "handler_new must succeed"); + let regs = [ + sandlock_handler_registration_t { syscall_nr: libc::SYS_getpid, handler: h1 }, + sandlock_handler_registration_t { syscall_nr: libc::SYS_getppid, handler: h2 }, + ]; + // Null policy triggers `release_registrations` on the + // early-return path. With the fix, `sandlock_run_with_handlers` + // unwinds (extern "C-unwind") because dropper_a panics; + // `catch_unwind` here captures it. + let result = std::panic::catch_unwind(|| { + unsafe { + sandlock_run_with_handlers( + std::ptr::null(), + std::ptr::null(), + std::ptr::null(), + 0, + regs.as_ptr(), + regs.len(), + ) + } + }); + assert!( + result.is_err(), + "expected sandlock_run_with_handlers to propagate the captured panic out of release_registrations", + ); + assert_eq!( + C_NEW_1_DROPPER_A.load(std::sync::atomic::Ordering::SeqCst), + 1, + "dropper_a must have fired exactly once", + ); + assert_eq!( + C_NEW_1_DROPPER_B.load(std::sync::atomic::Ordering::SeqCst), + 1, + "dropper_b must have fired despite dropper_a panicking (no partial-consume leak)", + ); +} + +#[test] +fn a5_handler_free_unwinds_on_panicking_dropper() { + // Bug A5 regression hook: `sandlock_handler_free` used to be + // `extern "C"`, which aborts on unwind. After the fix it is + // `extern "C-unwind"` and a panicking `ud_drop` propagates back to + // the caller's `catch_unwind`. + // + // Note: with the bug still present, the process aborts here and + // the test binary dies — `catch_unwind` cannot recover from an + // abort. So we write the test against the FIXED code; the + // destructive sanity check (manually flipping the ABI back to + // `extern "C"`) is a one-shot manual confirmation. + let h = unsafe { + sandlock_handler_new( + Some(test_handler as sandlock_handler_fn_t), + // Any non-null bit pattern works because the dropper + // itself never reads through the pointer — it just panics. + // Null ud would also fire the dropper per the C header + // contract. + 0xDEAD_BEEFusize as *mut std::ffi::c_void, + Some(a5_panicking_dropper), + sandlock_exception_policy_t::Kill as u32, + ) + }; + assert!(!h.is_null(), "handler_new must produce a valid container"); + let result = std::panic::catch_unwind(|| { + // SAFETY: `h` is a valid, unregistered container; we + // intentionally trigger the panicking dropper by freeing it. + unsafe { sandlock_handler_free(h) }; + }); + assert!( + result.is_err(), + "expected sandlock_handler_free to unwind a panicking dropper instead of aborting", + ); +} diff --git a/docs/extension-handlers.md b/docs/extension-handlers.md index cd24e2e..fd966ef 100644 --- a/docs/extension-handlers.md +++ b/docs/extension-handlers.md @@ -380,15 +380,18 @@ The contract is exercised at two layers: ### Continue-site safety -The supervisor processes notifications sequentially in a single tokio task, so the response sent -for one notification gates the kernel resumption of the trapped syscall. Sandlock-internal -locks (`tokio::sync::Mutex`/`RwLock`) live on the supervisor; user handlers do not have access -to them through `HandlerCtx`, so the contract here is local to handler-owned state on `&self`: -a `tokio::sync::Mutex` or `RwLock` field on your handler must not be held across an -`.await` point. If the guard is alive when control returns to the supervisor loop, the next -notification that needs the same lock parks, the response for the current notification is not -sent, and the child stays trapped in the syscall. Acquire, mutate, drop — `await` only after -the guard is out of scope. +Today's supervisor processes notifications sequentially in a single tokio task, so the response +sent for one notification gates the kernel resumption of the trapped syscall. Treat this as an +implementation detail, not a contract — the public API makes no promise that a future +dispatcher will not parallelise. The `Handler` trait already requires `Send + Sync`, and the C +ABI requires `ud` to be thread-safe (see [C ABI → Thread safety](#thread-safety)) for exactly +this reason. Sandlock-internal locks (`tokio::sync::Mutex`/`RwLock`) live on the supervisor; +user handlers do not have access to them through `HandlerCtx`, so the contract here is local to +handler-owned state on `&self`: a `tokio::sync::Mutex` or `RwLock` field on your handler +must not be held across an `.await` point. If the guard is alive when control returns to the +supervisor loop, the next notification that needs the same lock parks, the response for the +current notification is not sent, and the child stays trapped in the syscall. Acquire, mutate, +drop — `await` only after the guard is out of scope. See [issue #27][i27] for the underlying supervisor-loop contract that this convention extends to user handlers. @@ -604,3 +607,48 @@ For a single concrete handler type the bare struct works without the `Box::new` The crate links against `sandlock-core` as an ordinary dependency — no fork, no `[patch.crates-io]`, no duplication of `notif::supervisor`. + +## C ABI + +The same handler model is available to non-Rust callers via the +`sandlock-ffi` cdylib (header: `crates/sandlock-ffi/include/sandlock.h`). + +### Lifetimes + +| Object | Allocated by | Freed by | +|--------------------------------|----------------------------------------|---------------------------------------------| +| `sandlock_handler_t*` | `sandlock_handler_new` | `sandlock_handler_free` (if never registered)
or the supervisor (after a successful `sandlock_run_with_handlers`) | +| `sandlock_action_out_t` | Rust adapter (stack), pointer to C | Adapter (stack-scoped to one callback) | +| `sandlock_mem_handle_t*` | Rust adapter (stack) | Adapter (do not retain past callback return) | +| `sandlock_notif_data_t` | Rust adapter (stack), pointer to C | Adapter (do not retain past callback return) | + +### Callback contract + +A C handler must: + +1. Return `0` exactly when it has called one — and only one — of the + `sandlock_action_set_*` setters on `out`. +2. Return non-zero on any internal error. The supervisor then applies + the handler's `on_exception` policy (default: `SANDLOCK_EXCEPTION_KILL`). +3. Not retain `notif`, `mem`, or `out` past the return statement. +4. May panic from inside a Rust-side handler exposed through the + C ABI — the supervisor catches the unwind via `catch_unwind` and + applies the configured exception policy. Pure-C callers cannot + panic (C has no unwinding); this clause is for Rust handlers + plugged into the C ABI surface. + +### Thread safety + +The supervisor MAY invoke a C handler callback from multiple worker +threads concurrently across different notifications. Today's dispatch +loop is largely serial, but the public C ABI makes no concurrency +guarantee — a future dispatcher could parallelise without an ABI +break. Consequently the caller MUST ensure their `ud` pointer is +thread-safe: either immutable, or guarded by their own synchronization +primitives (atomics, mutex, etc.). Rust offers no synchronization for +an opaque `void*`; the responsibility is on the C side. + +### Minimal example + +See `crates/sandlock-ffi/tests/c/handler_smoke.c` for the canonical +end-to-end example.