diff --git a/Cargo.toml b/Cargo.toml index 7086d4b3..a7b26166 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,7 @@ composefs-ctl = { version = "0.7.0", path = "crates/composefs-ctl", default-feat composefs-ioctls = { version = "0.7.0", path = "crates/composefs-ioctls", default-features = false } composefs-oci = { version = "0.7.0", path = "crates/composefs-oci", default-features = false } composefs-boot = { version = "0.7.0", path = "crates/composefs-boot", default-features = false } +composefs-fuse = { version = "0.7.0", path = "crates/composefs-fuse", default-features = false } composefs-http = { version = "0.7.0", path = "crates/composefs-http", default-features = false } composefs-ostree = { version = "0.7.0", path = "crates/composefs-ostree", default-features = false } cap-std-ext = "5.1.2" diff --git a/crates/composefs-ctl/Cargo.toml b/crates/composefs-ctl/Cargo.toml index b7155319..b4b47ba0 100644 --- a/crates/composefs-ctl/Cargo.toml +++ b/crates/composefs-ctl/Cargo.toml @@ -17,11 +17,12 @@ name = "cfsctl" path = "src/main.rs" [features] -default = ['pre-6.15', 'oci', 'containers-storage', 'ostree'] +default = ['pre-6.15', 'oci', 'containers-storage', 'ostree', 'fuse'] http = ['composefs-http'] oci = ['composefs-oci', 'composefs-oci/varlink'] containers-storage = ['composefs-oci/containers-storage', 'cstorage'] ostree = ['composefs-ostree'] +fuse = ['dep:composefs-fuse'] rhel9 = ['composefs/rhel9'] 'pre-6.15' = ['composefs/pre-6.15'] @@ -35,13 +36,14 @@ composefs-boot = { workspace = true } composefs-oci = { workspace = true, optional = true, features = ["boot"] } composefs-http = { workspace = true, optional = true } cstorage = { package = "composefs-storage", path = "../composefs-storage", version = "0.7.0", features = ["userns-helper"], optional = true } +composefs-fuse = { workspace = true, optional = true } composefs-ostree = { workspace = true, optional = true } env_logger = { version = "0.11.0", default-features = false } hex = { version = "0.4.0", default-features = false } indicatif = { version = "0.17.0", default-features = false } libsystemd = { version = "0.7" } log = { version = "0.4", default-features = false } -rustix = { version = "1.0.0", default-features = false, features = ["fs", "process"] } +rustix = { version = "1.0.0", default-features = false, features = ["fs", "pipe", "process", "thread"] } serde = { version = "1.0", default-features = false, features = ["derive"] } serde_json = { version = "1.0", default-features = false, features = ["std"] } tokio = { version = "1.24.2", default-features = false, features = ["io-std", "io-util", "net", "rt", "sync"] } diff --git a/crates/composefs-ctl/src/lib.rs b/crates/composefs-ctl/src/lib.rs index 9a8c592e..64fcdbaf 100644 --- a/crates/composefs-ctl/src/lib.rs +++ b/crates/composefs-ctl/src/lib.rs @@ -48,6 +48,10 @@ use comfy_table::{Table, presets::UTF8_FULL}; #[cfg(any(feature = "oci", feature = "http"))] use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use rustix::fs::{CWD, Mode, OFlags}; +#[cfg(feature = "fuse")] +use rustix::process::getuid; +#[cfg(feature = "fuse")] +use rustix::thread::{CapabilitySet, capabilities}; #[cfg(any(feature = "oci", feature = "http"))] use composefs::progress::{ @@ -441,6 +445,18 @@ enum OciCommand { /// Mount the bootable variant instead of the regular EROFS image #[arg(long)] bootable: bool, + /// Force FUSE mount instead of auto-detecting + #[cfg(feature = "fuse")] + #[arg(long, conflicts_with = "no_fuse")] + fuse: bool, + /// Force kernel mount instead of auto-detecting + #[cfg(feature = "fuse")] + #[arg(long, conflicts_with = "fuse")] + no_fuse: bool, + /// Run FUSE server in the foreground (don't daemonize) + #[cfg(feature = "fuse")] + #[arg(long)] + foreground: bool, /// Writable upper layer directory for overlayfs #[arg(long, requires = "workdir")] upperdir: Option, @@ -525,6 +541,18 @@ enum OstreeCommand { commit: String, /// Target mountpoint mountpoint: String, + /// Force FUSE mount instead of auto-detecting + #[cfg(feature = "fuse")] + #[arg(long, conflicts_with = "no_fuse")] + fuse: bool, + /// Force kernel mount instead of auto-detecting + #[cfg(feature = "fuse")] + #[arg(long, conflicts_with = "fuse")] + no_fuse: bool, + /// Run FUSE server in the foreground (don't daemonize) + #[cfg(feature = "fuse")] + #[arg(long)] + foreground: bool, /// Writable upper layer directory for overlayfs #[arg(long, requires = "workdir")] upperdir: Option, @@ -652,6 +680,18 @@ enum Command { name: String, /// the mountpoint mountpoint: String, + /// Force FUSE mount instead of auto-detecting + #[cfg(feature = "fuse")] + #[arg(long, conflicts_with = "no_fuse")] + fuse: bool, + /// Force kernel mount instead of auto-detecting + #[cfg(feature = "fuse")] + #[arg(long, conflicts_with = "fuse")] + no_fuse: bool, + /// Run FUSE server in the foreground (don't daemonize) + #[cfg(feature = "fuse")] + #[arg(long)] + foreground: bool, /// Writable upper layer directory for overlayfs #[arg(long, requires = "workdir")] upperdir: Option, @@ -810,6 +850,275 @@ fn get_mount_options( Ok(options) } +#[cfg(feature = "fuse")] +enum MountMode { + Kernel, + Fuse, + FuseOverlay, +} + +#[cfg(feature = "fuse")] +fn in_init_user_namespace() -> bool { + std::fs::read_to_string("/proc/self/uid_map") + .map(|s| s.trim() == "0 0 4294967295") + .unwrap_or(false) +} + +#[cfg(feature = "fuse")] +fn has_cap_sys_admin() -> bool { + if let Ok(caps) = capabilities(None) { + caps.effective.contains(CapabilitySet::SYS_ADMIN) + } else { + false + } +} + +#[cfg(feature = "fuse")] +fn detect_mount_mode(force_fuse: bool, no_fuse: bool, has_upper: bool) -> MountMode { + let use_fuse = if force_fuse { + true + } else if no_fuse { + false + } else { + !(getuid().is_root() && in_init_user_namespace()) + }; + + if !use_fuse { + return MountMode::Kernel; + } + + if has_upper || has_cap_sys_admin() { + MountMode::FuseOverlay + } else { + MountMode::Fuse + } +} + +#[cfg(feature = "fuse")] +fn run_fuse_foreground( + image_fd: std::os::fd::OwnedFd, + objects_fd: Arc, + mountpoint: &str, + mode: MountMode, + mount_options: MountOptions, + enable_verity: bool, + ready_fd: Option, +) -> Result<()> { + match mode { + MountMode::Kernel => unreachable!(), + MountMode::Fuse => { + let options = composefs_fuse::ServeFuseOptions::default(); + composefs_fuse::serve_fuse(mountpoint, image_fd, objects_fd, &options, ready_fd) + .context("FUSE server error")?; + } + MountMode::FuseOverlay => { + let dev_fuse = composefs_fuse::open_fuse()?; + let fuse_options = composefs_fuse::FuseMountOptions::default(); + let fuse_mnt = + composefs_fuse::mount_fuse(&dev_fuse, &fuse_options).context("FUSE mount")?; + + let mut serve_options = composefs_fuse::ServeFuseOptions::default(); + serve_options.set_overlay_xattr(Some(composefs_fuse::OverlayXattrMode::User)); + + let serve_objects = Arc::clone(&objects_fd); + let serve_dev = dev_fuse; + let join_handle = std::thread::spawn(move || { + composefs_fuse::serve_fuse_fd(serve_dev, image_fd, serve_objects, &serve_options) + }); + + let read_write = mount_options.read_write(); + let mut overlay_options = composefs_fuse::OverlayMountOptions::default(); + if let Some((upper_fd, work_fd)) = mount_options.into_overlay() { + overlay_options.set_overlay(upper_fd, work_fd); + } + overlay_options.set_read_write(read_write); + overlay_options.set_enable_verity(enable_verity); + + let overlay_mnt = + composefs_fuse::mount_fuse_overlay(fuse_mnt, &*objects_fd, &overlay_options) + .context("overlay mount")?; + composefs::mount::mount_at(overlay_mnt, CWD, mountpoint)?; + + if let Some(fd) = ready_fd { + let _ = rustix::io::write(&fd, b"r"); + } + + join_handle + .join() + .map_err(|_| anyhow::anyhow!("FUSE server thread panicked"))? + .context("FUSE server error")?; + } + } + Ok(()) +} + +/// Re-exec ourselves as `--internal-fuse-serve` to run the FUSE server in a +/// clean process without the tokio runtime. The parent waits on a pipe for +/// mount readiness, then returns. +#[cfg(feature = "fuse")] +#[allow(unsafe_code)] +fn run_fuse_mount( + repo: &Arc>, + name: &str, + mountpoint: &str, + mode: MountMode, + mount_options: MountOptions, + foreground: bool, +) -> Result<()> { + if foreground { + let (image_fd, enable_verity) = repo.open_image(name)?; + let objects_fd = Arc::new(repo.objects_dir()?.try_clone()?); + return run_fuse_foreground( + image_fd, + objects_fd, + mountpoint, + mode, + mount_options, + enable_verity, + None, + ); + } + + use std::os::fd::AsRawFd; + use std::os::unix::process::CommandExt; + + let (image_fd, enable_verity) = repo.open_image(name)?; + let (read_pipe, write_pipe) = rustix::pipe::pipe_with(rustix::pipe::PipeFlags::CLOEXEC)?; + + let self_exe = std::env::current_exe().context("resolving own binary path")?; + let mut cmd = std::process::Command::new(&self_exe); + cmd.arg("--internal-fuse-serve"); + cmd.arg("--mountpoint").arg(mountpoint); + cmd.arg("--image-fd").arg(image_fd.as_raw_fd().to_string()); + let repo_fd = repo.repo_fd().try_clone_to_owned()?; + cmd.arg("--repo-fd").arg(repo_fd.as_raw_fd().to_string()); + cmd.arg("--ready-fd") + .arg(write_pipe.as_raw_fd().to_string()); + + match mode { + MountMode::Kernel => unreachable!(), + MountMode::Fuse => cmd.arg("--mode").arg("fuse"), + MountMode::FuseOverlay => cmd.arg("--mode").arg("fuse-overlay"), + }; + + if enable_verity { + cmd.arg("--enable-verity"); + } + if mount_options.read_write() { + cmd.arg("--read-write"); + } + if let Some((upper_fd, work_fd)) = mount_options.into_overlay() { + cmd.arg("--upper-fd").arg(upper_fd.as_raw_fd().to_string()); + cmd.arg("--work-fd").arg(work_fd.as_raw_fd().to_string()); + clear_cloexec(&upper_fd); + clear_cloexec(&work_fd); + std::mem::forget(upper_fd); + std::mem::forget(work_fd); + } + + clear_cloexec(&image_fd); + clear_cloexec(&repo_fd); + clear_cloexec(&write_pipe); + + unsafe { + cmd.pre_exec(|| { + let _ = rustix::process::setsid(); + Ok(()) + }); + } + + std::mem::forget(image_fd); + std::mem::forget(repo_fd); + std::mem::forget(write_pipe); + + cmd.stdin(std::process::Stdio::null()); + cmd.stdout(std::process::Stdio::null()); + cmd.stderr(std::process::Stdio::inherit()); + + let _child = cmd.spawn().context("spawning FUSE server process")?; + + // Wait for mount readiness + let mut buf = [0u8; 1]; + let _ = rustix::io::read(&read_pipe, &mut buf); + + Ok(()) +} + +#[cfg(feature = "fuse")] +fn clear_cloexec(fd: &impl std::os::fd::AsFd) { + let _ = rustix::io::fcntl_setfd(fd, rustix::io::FdFlags::empty()); +} + +/// Arguments for the internal FUSE server process. +#[cfg(feature = "fuse")] +#[derive(Debug, clap::Parser)] +pub struct InternalFuseServeArgs { + #[arg(long)] + mountpoint: String, + #[arg(long)] + image_fd: i32, + #[arg(long)] + repo_fd: i32, + #[arg(long)] + ready_fd: i32, + #[arg(long, value_parser = ["fuse", "fuse-overlay"])] + mode: String, + #[arg(long)] + enable_verity: bool, + #[arg(long)] + read_write: bool, + #[arg(long)] + upper_fd: Option, + #[arg(long)] + work_fd: Option, +} + +/// Entry point for the internal FUSE server process, called from main() +/// before the tokio runtime is created. +#[cfg(feature = "fuse")] +#[allow(unsafe_code)] +pub fn run_internal_fuse_serve(args: InternalFuseServeArgs) -> Result<()> { + use std::os::fd::FromRawFd; + + let image_fd = unsafe { std::os::fd::OwnedFd::from_raw_fd(args.image_fd) }; + let repo_fd = unsafe { std::os::fd::OwnedFd::from_raw_fd(args.repo_fd) }; + let ready_fd = unsafe { std::os::fd::OwnedFd::from_raw_fd(args.ready_fd) }; + + let objects_fd = Arc::new( + rustix::fs::openat( + &repo_fd, + "objects", + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .context("opening objects dir")?, + ); + + let mode = match args.mode.as_str() { + "fuse" => MountMode::Fuse, + "fuse-overlay" => MountMode::FuseOverlay, + _ => unreachable!(), + }; + + let mut mount_options = MountOptions::default(); + if let (Some(upper_raw), Some(work_raw)) = (args.upper_fd, args.work_fd) { + let upper_fd = unsafe { std::os::fd::OwnedFd::from_raw_fd(upper_raw) }; + let work_fd = unsafe { std::os::fd::OwnedFd::from_raw_fd(work_raw) }; + mount_options.set_overlay(upper_fd, work_fd); + } + mount_options.set_read_write(args.read_write); + + run_fuse_foreground( + image_fd, + objects_fd, + &args.mountpoint, + mode, + mount_options, + args.enable_verity, + Some(ready_fd), + ) +} + #[cfg(feature = "oci")] pub(crate) fn verity_opt(opt: &Option) -> Result> where @@ -1388,12 +1697,16 @@ where ref image, ref mountpoint, bootable, + #[cfg(feature = "fuse")] + fuse, + #[cfg(feature = "fuse")] + no_fuse, + #[cfg(feature = "fuse")] + foreground, ref upperdir, ref workdir, read_write, } => { - let mount_options = - get_mount_options(upperdir.as_deref(), workdir.as_deref(), read_write)?; let img = if image.starts_with("sha256:") { let digest: composefs_oci::OciDigest = image.parse().context("Parsing manifest digest")?; @@ -1416,7 +1729,25 @@ where ), } }; - repo.mount_at(&erofs_id.to_hex(), mountpoint.as_str(), &mount_options)?; + let erofs_name = erofs_id.to_hex(); + let mount_options = + get_mount_options(upperdir.as_deref(), workdir.as_deref(), read_write)?; + + #[cfg(feature = "fuse")] + if let mode @ (MountMode::Fuse | MountMode::FuseOverlay) = + detect_mount_mode(fuse, no_fuse, upperdir.is_some()) + { + run_fuse_mount( + &repo, + &erofs_name, + mountpoint.as_str(), + mode, + mount_options, + foreground, + )?; + } else { + repo.mount_at(&erofs_name, mountpoint.as_str(), &mount_options)?; + } } OciCommand::ComputeId { config_opts } => { let fs = load_filesystem_from_oci_image(&repo, config_opts)?; @@ -1699,14 +2030,36 @@ where OstreeCommand::Mount { ref commit, ref mountpoint, + #[cfg(feature = "fuse")] + fuse, + #[cfg(feature = "fuse")] + no_fuse, + #[cfg(feature = "fuse")] + foreground, ref upperdir, ref workdir, read_write, } => { + let image_id = composefs_ostree::get_image_ref(&repo, commit)?; + let image_name = image_id.to_hex(); let mount_options = get_mount_options(upperdir.as_deref(), workdir.as_deref(), read_write)?; - let image_id = composefs_ostree::get_image_ref(&repo, commit)?; - repo.mount_at(&image_id.to_hex(), mountpoint.as_str(), &mount_options)?; + + #[cfg(feature = "fuse")] + if let mode @ (MountMode::Fuse | MountMode::FuseOverlay) = + detect_mount_mode(fuse, no_fuse, upperdir.is_some()) + { + run_fuse_mount( + &repo, + &image_name, + mountpoint.as_str(), + mode, + mount_options, + foreground, + )?; + } else { + repo.mount_at(&image_name, mountpoint.as_str(), &mount_options)?; + } } OstreeCommand::Dump { ref commit_name } => { let fs = composefs_ostree::create_filesystem(&repo, commit_name)?; @@ -1766,13 +2119,27 @@ where Command::Mount { name, mountpoint, + #[cfg(feature = "fuse")] + fuse, + #[cfg(feature = "fuse")] + no_fuse, + #[cfg(feature = "fuse")] + foreground, ref upperdir, ref workdir, read_write, } => { let mount_options = get_mount_options(upperdir.as_deref(), workdir.as_deref(), read_write)?; - repo.mount_at(&name, &mountpoint, &mount_options)?; + + #[cfg(feature = "fuse")] + if let mode @ (MountMode::Fuse | MountMode::FuseOverlay) = + detect_mount_mode(fuse, no_fuse, upperdir.is_some()) + { + run_fuse_mount(&repo, &name, &mountpoint, mode, mount_options, foreground)?; + } else { + repo.mount_at(&name, &mountpoint, &mount_options)?; + } } Command::ImageObjects { name } => { let objects = repo.objects_for_image(&name)?; diff --git a/crates/composefs-ctl/src/main.rs b/crates/composefs-ctl/src/main.rs index 942f1fc0..4ace5574 100644 --- a/crates/composefs-ctl/src/main.rs +++ b/crates/composefs-ctl/src/main.rs @@ -59,6 +59,19 @@ fn main() -> Result<()> { _ if std::env::args_os().nth(1).as_deref() == Some(OsStr::new("mount.composefs")) => { composefs_ctl::mountcomposefs::run_from_args(rest_of_args()) } + _ if std::env::args_os().nth(1).as_deref() == Some(OsStr::new("--internal-fuse-serve")) => { + #[cfg(feature = "fuse")] + { + use clap::Parser; + let args = + composefs_ctl::InternalFuseServeArgs::parse_from(std::env::args_os().skip(1)); + composefs_ctl::run_internal_fuse_serve(args) + } + #[cfg(not(feature = "fuse"))] + { + anyhow::bail!("--internal-fuse-serve requires the 'fuse' feature"); + } + } _ => { // If we were spawned as a userns helper process, handle that and exit. // This MUST be called before the tokio runtime is created. diff --git a/crates/composefs-fuse/Cargo.toml b/crates/composefs-fuse/Cargo.toml index 05f6fcfe..f1558228 100644 --- a/crates/composefs-fuse/Cargo.toml +++ b/crates/composefs-fuse/Cargo.toml @@ -13,6 +13,8 @@ version.workspace = true [dependencies] anyhow = { version = "1.0.98", default-features = false } composefs = { workspace = true } -fuser = { version = "0.15.1", default-features = false, features = ["abi-7-31"] } +fuser = { version = "0.17.0", default-features = false } log = { version = "0.4.8", default-features = false } +memmap2 = { version = "0.9", default-features = false } rustix = { version = "1.0.0", default-features = false, features = ["fs", "mount"] } +zerocopy = { version = "0.8.0", default-features = false } diff --git a/crates/composefs-fuse/src/lib.rs b/crates/composefs-fuse/src/lib.rs index 20d108f3..45bcef79 100644 --- a/crates/composefs-fuse/src/lib.rs +++ b/crates/composefs-fuse/src/lib.rs @@ -1,205 +1,248 @@ -//! FUSE filesystem implementation for composefs trees. +//! FUSE filesystem implementation for composefs EROFS images. //! -//! This crate provides a userspace filesystem implementation that exposes composefs -//! directory trees through FUSE. It supports read-only access to files, directories, -//! symlinks, and extended attributes, with data served from a composefs repository. +//! This crate serves a composefs EROFS image directly over FUSE without +//! parsing the entire image into a high-level tree. FUSE inode numbers +//! are EROFS NIDs, and all metadata is resolved on demand from the +//! on-disk structures. -#![forbid(unsafe_code)] +#![deny(unsafe_code)] use std::{ + borrow::Cow, collections::HashMap, ffi::OsStr, os::{ fd::{AsFd, AsRawFd, OwnedFd}, unix::ffi::OsStrExt, }, + path::Path, + sync::{Arc, Mutex}, time::{Duration, SystemTime}, }; use anyhow::Context; use fuser::{ - FileAttr, FileType, Filesystem, ReplyAttr, ReplyData, ReplyDirectory, ReplyEntry, ReplyOpen, - Request, Session, SessionACL, + Config, FileAttr, FileHandle, FileType, Filesystem, FopenFlags, Generation, INodeNo, + MountOption, OpenFlags, ReplyAttr, ReplyData, ReplyDirectory, ReplyDirectoryPlus, ReplyEntry, + ReplyOpen, Request, Session, SessionACL, }; use rustix::{ buffer::spare_capacity, - fs::{Mode, OFlags, open}, - io::{Errno, pread}, + fs::{Mode, OFlags, open, openat}, + io::pread, mount::{ FsMountFlags, MountAttrFlags, fsconfig_create, fsconfig_set_flag, fsconfig_set_string, fsmount, }, }; +use zerocopy::FromBytes as _; + use composefs::{ - fsverity::FsVerityHashValue, - generic_tree::LeafId, + erofs::{ + format::{ + self, DataLayout, FileType as ErofsFileType, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, + S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, XATTR_PREFIXES, + }, + reader::{DirectoryBlock, Image, InodeHeader, InodeOps, InodeType}, + }, mount::FsHandle, - repository::Repository, - tree::{Directory, FileSystem, Inode, Leaf, LeafContent, RegularFile, Stat}, + mountcompat::{overlayfs_set_fd, overlayfs_set_lower_and_data_fds, prepare_mount}, }; const TTL: Duration = Duration::from_secs(1_000_000); -/// FUSE inode number. Assigned eagerly at mount time. -/// -/// Inode 1 is the root directory, then all other nodes get sequential -/// numbers from a depth-first walk. The numbering is an internal FUSE -/// concern and not exposed in the public API. +/// Controls the overlay xattr namespace. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +#[non_exhaustive] +pub enum OverlayXattrMode { + /// Synthesize `user.overlay.*` xattrs (for unprivileged `userxattr` mounts). + #[default] + User, + /// Synthesize `trusted.overlay.*` xattrs (requires CAP_SYS_ADMIN). + Trusted, +} + +/// FUSE inode number = EROFS NID. type Ino = u64; -/// Precomputed inode number assignments for the entire filesystem tree. -/// -/// Directories are identified by pointer (stable because the tree is -/// borrowed immutably for the lifetime of the FUSE session). Leaves -/// are identified by `LeafId`. -#[derive(Debug)] -struct InodeMap { - /// Directory pointer → inode number. - dir_inos: HashMap<*const Directory, Ino>, - /// LeafId → inode number. Indexed by `LeafId.0`. - /// Hardlinked leaves (same `LeafId`) naturally get the same ino. - leaf_inos: Vec, -} - -impl InodeMap { - /// Walk the tree and assign sequential inode numbers. - fn build(fs: &FileSystem) -> Self { - let mut next_ino: Ino = 1; // root = 1 - let mut dir_inos = HashMap::new(); - let mut leaf_inos = vec![0u64; fs.leaves.len()]; - - fn walk( - dir: &Directory, - next_ino: &mut Ino, - dir_inos: &mut HashMap<*const Directory, Ino>, - leaf_inos: &mut [Ino], - ) { - let ino = *next_ino; - *next_ino += 1; - dir_inos.insert(dir as *const _, ino); - - for (_, inode) in dir.entries() { - match inode { - Inode::Directory(subdir) => walk(subdir, next_ino, dir_inos, leaf_inos), - Inode::Leaf(id, _) => { - if leaf_inos[id.0] == 0 { - leaf_inos[id.0] = *next_ino; - *next_ino += 1; - } - // Hardlinks: same LeafId keeps the same ino. - } - } - } - } +fn mode_to_filetype(mode: u16) -> FileType { + match mode & S_IFMT { + S_IFREG => FileType::RegularFile, + S_IFDIR => FileType::Directory, + S_IFCHR => FileType::CharDevice, + S_IFBLK => FileType::BlockDevice, + S_IFIFO => FileType::NamedPipe, + S_IFLNK => FileType::Symlink, + S_IFSOCK => FileType::Socket, + _ => FileType::RegularFile, + } +} - walk(&fs.root, &mut next_ino, &mut dir_inos, &mut leaf_inos); - InodeMap { - dir_inos, - leaf_inos, - } +fn inode_rdev(inode: &InodeType) -> u32 { + let mode = inode.mode().0.get(); + match mode & S_IFMT { + S_IFCHR | S_IFBLK => inode.u(), + _ => 0, } +} - fn dir_ino(&self, dir: &Directory) -> Ino { - self.dir_inos[&(dir as *const _)] +fn inode_fileattr(image: &Image, nid: Ino, inode: &InodeType) -> FileAttr { + let mode = inode.mode().0.get(); + let mtime = match inode { + InodeType::Extended(i) => { + let secs = (i.header.mtime.get() as i64).max(0) as u64; + SystemTime::UNIX_EPOCH + Duration::from_secs(secs) + } + InodeType::Compact(_) => { + let secs = (image.sb.build_time.get() as i64).max(0) as u64; + SystemTime::UNIX_EPOCH + Duration::from_secs(secs) + } + }; + let (uid, gid) = match inode { + InodeType::Extended(i) => (i.header.uid.get(), i.header.gid.get()), + InodeType::Compact(i) => (i.header.uid.get() as u32, i.header.gid.get() as u32), + }; + let size = match mode & S_IFMT { + S_IFDIR => 0, + _ => inode.size(), + }; + + FileAttr { + ino: INodeNo(nid), + size, + blocks: 1, + atime: mtime, + mtime, + ctime: mtime, + crtime: mtime, + kind: mode_to_filetype(mode), + perm: mode & 0o7777, + nlink: inode.nlink(), + uid, + gid, + rdev: inode_rdev(inode), + blksize: 4096, + flags: 0, } +} - fn leaf_ino(&self, id: LeafId) -> Ino { - self.leaf_inos[id.0] +fn inode_fileattr_overlay(image: &Image, nid: Ino, inode: &InodeType) -> FileAttr { + let mut attr = inode_fileattr(image, nid, inode); + if is_whiteout(image, inode) { + attr.kind = FileType::RegularFile; + attr.size = 0; + attr.rdev = 0; } + attr +} + +fn is_whiteout(image: &Image, inode: &InodeType) -> bool { + has_xattr(image, inode, b"trusted.overlay.overlay.whiteout") +} - fn inode_ino(&self, inode: &Inode) -> Ino { - match inode { - Inode::Directory(dir) => self.dir_ino(dir), - Inode::Leaf(id, _) => self.leaf_ino(*id), +fn has_xattr(image: &Image, inode: &InodeType, name: &[u8]) -> bool { + find_raw_xattr(image, inode, name).is_some() +} + +fn find_raw_xattr(image: &Image, inode: &InodeType, name: &[u8]) -> Option> { + let xattrs_section = inode.xattrs().ok()??; + for id in xattrs_section.shared().ok()? { + let xattr = image.shared_xattr(id.get()).ok()?; + if xattr_full_name(xattr) == name { + return Some(xattr.value().ok()?.to_vec()); + } + } + for xattr_result in xattrs_section.local().ok()? { + let xattr = xattr_result.ok()?; + if xattr_full_name(xattr) == name { + return Some(xattr.value().ok()?.to_vec()); } } + None } -/// A reference to a filesystem node, used for FUSE inode lookup. -#[derive(Debug, Clone)] -enum InodeRef<'a, ObjectID: FsVerityHashValue> { - Directory(&'a Directory, Ino), - Leaf(LeafId, &'a Leaf), +fn xattr_full_name(xattr: &composefs::erofs::reader::XAttr) -> Vec { + let idx = xattr.header.name_index as usize; + let prefix = if idx < XATTR_PREFIXES.len() { + XATTR_PREFIXES[idx] + } else { + b"" + }; + let suffix = xattr.suffix().unwrap_or(b""); + let mut name = Vec::with_capacity(prefix.len() + suffix.len()); + name.extend_from_slice(prefix); + name.extend_from_slice(suffix); + name } -impl<'a, ObjectID: FsVerityHashValue> InodeRef<'a, ObjectID> { - fn nlink(&self, nlink_map: &[u32]) -> u32 { - (match self { - InodeRef::Directory(dir, ..) => { - 2 + dir - .inodes() - .filter(|i| matches!(i, Inode::Directory(..))) - .count() - } - InodeRef::Leaf(leaf_id, _) => nlink_map[leaf_id.0] as usize, - }) as u32 - } +const TRUSTED_OVERLAY_PREFIX: &[u8] = b"trusted.overlay."; +const USER_OVERLAY_PREFIX: &[u8] = b"user.overlay."; +const ESCAPED_OVERLAY_PREFIX: &[u8] = b"trusted.overlay.overlay."; - fn rdev(&self) -> u32 { - (match self { - InodeRef::Directory(..) => 0, - InodeRef::Leaf(_, leaf) => match &leaf.content { - LeafContent::BlockDevice(rdev) | LeafContent::CharacterDevice(rdev) => *rdev, - _ => 0, - }, - }) as u32 - } - - fn kind(&self) -> FileType { - match self { - InodeRef::Directory(..) => FileType::Directory, - InodeRef::Leaf(_, leaf) => match leaf.content { - LeafContent::BlockDevice(..) => FileType::BlockDevice, - LeafContent::CharacterDevice(..) => FileType::CharDevice, - LeafContent::Fifo => FileType::NamedPipe, - LeafContent::Regular(..) => FileType::RegularFile, - LeafContent::Socket => FileType::Socket, - LeafContent::Symlink(..) => FileType::Symlink, - }, - } +fn is_composefs_internal_xattr(name: &[u8]) -> bool { + name == format::XATTR_OVERLAY_METACOPY + || name == format::XATTR_OVERLAY_REDIRECT + || name.starts_with(ESCAPED_OVERLAY_PREFIX) +} + +fn unescape_xattr_name(name: &[u8]) -> Cow<'_, [u8]> { + if let Some(rest) = name.strip_prefix(ESCAPED_OVERLAY_PREFIX) { + let mut unescaped = Vec::with_capacity(TRUSTED_OVERLAY_PREFIX.len() + rest.len()); + unescaped.extend_from_slice(TRUSTED_OVERLAY_PREFIX); + unescaped.extend_from_slice(rest); + Cow::Owned(unescaped) + } else { + Cow::Borrowed(name) } +} - fn stat(&self) -> &'a Stat { - match self { - InodeRef::Directory(dir, ..) => &dir.stat, - InodeRef::Leaf(_, leaf) => &leaf.stat, - } +fn rewrite_xattr_name_for_user(name: &[u8]) -> Option> { + if let Some(rest) = name.strip_prefix(TRUSTED_OVERLAY_PREFIX) { + let mut rewritten = Vec::with_capacity(USER_OVERLAY_PREFIX.len() + rest.len()); + rewritten.extend_from_slice(USER_OVERLAY_PREFIX); + rewritten.extend_from_slice(rest); + Some(rewritten) + } else { + None } +} - fn size(&self) -> u64 { - match self { - InodeRef::Directory(..) => 0, - InodeRef::Leaf(_, leaf) => match &leaf.content { - LeafContent::Regular(RegularFile::Inline(data)) => data.len() as u64, - LeafContent::Regular(RegularFile::External(.., size)) => *size, - _ => 0, - }, +/// Iterate directory entries across inline data and blocks. +fn for_each_dir_entry(image: &Image, inode: &InodeType, mut f: F) -> Result<(), fuser::Errno> +where + F: FnMut(&composefs::erofs::reader::DirectoryEntry) -> std::ops::ControlFlow<()>, +{ + if let Some(inline) = inode.inline() + && let Ok(block) = DirectoryBlock::ref_from_bytes(inline) + && let Ok(entries) = block.entries() + { + for entry in entries.flatten() { + if entry.name == b"." || entry.name == b".." { + continue; + } + if f(&entry).is_break() { + return Ok(()); + } } } - - fn fileattr(&self, ino: Ino, nlink_map: &[u32]) -> FileAttr { - let stat = self.stat(); - let mtime = SystemTime::UNIX_EPOCH + Duration::from_secs(stat.st_mtim_sec as u64); - - FileAttr { - ino, - size: self.size(), - blocks: 1, - atime: mtime, - mtime, - ctime: mtime, - crtime: mtime, - kind: self.kind(), - perm: stat.st_mode as u16, - nlink: self.nlink(nlink_map), - uid: stat.st_uid, - gid: stat.st_gid, - rdev: self.rdev(), - blksize: 4096, - flags: 0, + if let Ok(block_range) = image.inode_blocks(inode) { + for block_id in block_range { + if let Ok(block) = image.directory_block(block_id) + && let Ok(entries) = block.entries() + { + for entry in entries.flatten() { + if entry.name == b"." || entry.name == b".." { + continue; + } + if f(&entry).is_break() { + return Ok(()); + } + } + } } } + Ok(()) } #[derive(Debug)] @@ -208,281 +251,540 @@ enum OpenHandle { Data(Box<[u8]>), } -#[derive(Debug)] -struct TreeFuse<'a, ObjectID: FsVerityHashValue> { - repo: &'a Repository, - fs: &'a FileSystem, - inode_map: InodeMap, - nlink_map: Vec, - inodes: HashMap>, - attrs: HashMap, +#[derive(Debug, Default)] +struct FuseHandles { handles: HashMap, next_fh: u64, } -impl<'a, ObjectID: FsVerityHashValue> TreeFuse<'a, ObjectID> { - fn register_inode(&mut self, inode: &'a Inode, parent: Ino) -> (Ino, FileType) { - let ino = self.inode_map.inode_ino(inode); - let iref = match inode { - Inode::Directory(dir) => InodeRef::Directory(dir, parent), - Inode::Leaf(leaf_id, _) => InodeRef::Leaf(*leaf_id, self.fs.leaf(*leaf_id)), - }; - let kind = iref.kind(); - self.attrs.insert(ino, iref.fileattr(ino, &self.nlink_map)); - self.inodes.insert(ino, iref); - (ino, kind) - } +#[derive(Debug)] +struct ComposefsFuse { + image: Image<'static>, + objects_fd: Arc, + overlay_xattr: Option, + handles: Mutex, } -impl Filesystem for TreeFuse<'_, ObjectID> { - fn statfs(&mut self, _req: &Request<'_>, _ino: u64, reply: fuser::ReplyStatfs) { - reply.statfs(0, 0, 0, 0, 0, 4096, 255, 4096); +impl ComposefsFuse { + fn root_nid(&self) -> Ino { + self.image.sb.root_nid.get() as Ino } - fn lookup(&mut self, _req: &Request, parent: u64, name: &OsStr, reply: ReplyEntry) { - log::trace!("lookup {parent} {name:?}"); - let Some(InodeRef::Directory(dir, ..)) = self.inodes.get(&parent) else { - log::error!("lookup({parent}, {name:?}) parent does not exist"); - return reply.error(Errno::BADF.raw_os_error()); - }; - let dir = *dir; + /// Translate a FUSE inode number to an EROFS NID. + /// FUSE always uses inode 1 for the root, but EROFS root NID may differ. + fn fuse_ino_to_nid(&self, ino: Ino) -> Ino { + if ino == 1 { self.root_nid() } else { ino } + } - match dir.lookup(name) { - Some(inode) => { - let (ino, _) = self.register_inode(inode, parent); - reply.entry(&TTL, self.attrs.get(&ino).unwrap(), 0); - } - None => reply.error(Errno::NOENT.raw_os_error()), - } + /// Translate an EROFS NID to a FUSE inode number. + fn nid_to_fuse_ino(&self, nid: Ino) -> Ino { + if nid == self.root_nid() { 1 } else { nid } } - fn getattr(&mut self, _req: &Request, ino: u64, _fh: Option, reply: ReplyAttr) { - if let Some(attrs) = self.attrs.get(&ino) { - return reply.attr(&TTL, attrs); + fn get_inode(&self, nid: Ino) -> Result, fuser::Errno> { + self.image.inode(nid).map_err(|e| { + log::error!("inode({nid}): {e}"); + fuser::Errno::EIO + }) + } + + fn get_fileattr(&self, fuse_ino: Ino) -> Result { + let nid = self.fuse_ino_to_nid(fuse_ino); + let inode = self.get_inode(nid)?; + if self.overlay_xattr.is_some() { + Ok(inode_fileattr_overlay(&self.image, fuse_ino, &inode)) + } else { + Ok(inode_fileattr(&self.image, fuse_ino, &inode)) } + } + + fn open_object_by_redirect(&self, inode: &InodeType) -> Result { + let redirect = find_raw_xattr(&self.image, inode, format::XATTR_OVERLAY_REDIRECT) + .ok_or(fuser::Errno::EIO)?; + let path = redirect.strip_prefix(b"/").unwrap_or(&redirect); + openat( + &*self.objects_fd, + OsStr::from_bytes(path), + OFlags::RDONLY | OFlags::CLOEXEC | OFlags::NOFOLLOW, + Mode::empty(), + ) + .map_err(|e| { + log::error!("open object {}: {e}", String::from_utf8_lossy(path)); + fuser::Errno::EIO + }) + } + + fn collect_xattr_names(&self, _nid: Ino, inode: &InodeType) -> Vec> { + let mut names = Vec::new(); + let Some(xattrs_section) = inode.xattrs().ok().flatten() else { + return names; + }; - let Some(iref) = self.inodes.get(&ino) else { - log::error!("getattr({ino}) inode does not exist"); - return reply.error(Errno::BADF.raw_os_error()); + let process_xattr = |names: &mut Vec>, raw_name: Vec| match self.overlay_xattr { + Some(OverlayXattrMode::User) => { + if let Some(rewritten) = rewrite_xattr_name_for_user(&raw_name) { + names.push(rewritten); + } else { + names.push(raw_name); + } + } + Some(OverlayXattrMode::Trusted) => { + names.push(raw_name); + } + None => { + if is_composefs_internal_xattr(&raw_name) { + let unescaped = unescape_xattr_name(&raw_name); + if unescaped != raw_name.as_slice() { + names.push(unescaped.into_owned()); + } + } else { + names.push(raw_name); + } + } }; - let iref = iref.clone(); - let attr = iref.fileattr(ino, &self.nlink_map); - self.attrs.insert(ino, attr); - reply.attr(&TTL, self.attrs.get(&ino).unwrap()); + if let Ok(shared) = xattrs_section.shared() { + for id in shared { + if let Ok(xattr) = self.image.shared_xattr(id.get()) { + process_xattr(&mut names, xattr_full_name(xattr)); + } + } + } + if let Ok(local) = xattrs_section.local() { + for xattr in local.flatten() { + process_xattr(&mut names, xattr_full_name(xattr)); + } + } + names } - fn readlink(&mut self, _req: &Request<'_>, ino: u64, reply: ReplyData) { - let Some(InodeRef::Leaf(_, leaf)) = self.inodes.get(&ino) else { - return reply.error(Errno::INVAL.raw_os_error()); + fn find_xattr_value(&self, _nid: Ino, inode: &InodeType, name: &[u8]) -> Option> { + let lookup_name: Cow<'_, [u8]> = match self.overlay_xattr { + Some(OverlayXattrMode::User) => { + if let Some(rest) = name.strip_prefix(USER_OVERLAY_PREFIX) { + let mut trusted = Vec::with_capacity(TRUSTED_OVERLAY_PREFIX.len() + rest.len()); + trusted.extend_from_slice(TRUSTED_OVERLAY_PREFIX); + trusted.extend_from_slice(rest); + Cow::Owned(trusted) + } else { + Cow::Borrowed(name) + } + } + Some(OverlayXattrMode::Trusted) => Cow::Borrowed(name), + None => { + if let Some(rest) = name.strip_prefix(TRUSTED_OVERLAY_PREFIX) { + let mut escaped = Vec::with_capacity(ESCAPED_OVERLAY_PREFIX.len() + rest.len()); + escaped.extend_from_slice(ESCAPED_OVERLAY_PREFIX); + escaped.extend_from_slice(rest); + if let Some(val) = find_raw_xattr(&self.image, inode, &escaped) { + return Some(val.to_vec()); + } + } + if is_composefs_internal_xattr(name) { + return None; + } + Cow::Borrowed(name) + } }; + find_raw_xattr(&self.image, inode, &lookup_name).map(|v| v.to_vec()) + } +} - let LeafContent::Symlink(target) = &leaf.content else { - return reply.error(Errno::INVAL.raw_os_error()); +impl Filesystem for ComposefsFuse { + fn statfs(&self, _req: &Request, _ino: INodeNo, reply: fuser::ReplyStatfs) { + reply.statfs(0, 0, 0, 0, 0, 4096, 255, 4096); + } + + fn forget(&self, _req: &Request, _ino: INodeNo, _nlookup: u64) {} + + fn lookup(&self, _req: &Request, parent: INodeNo, name: &OsStr, reply: ReplyEntry) { + let parent_nid = self.fuse_ino_to_nid(parent.0); + log::trace!("lookup {parent_nid} {name:?}"); + + let Ok(parent_inode) = self.get_inode(parent_nid) else { + return reply.error(fuser::Errno::EBADF); }; - reply.data(target.as_bytes()); + let name_bytes = name.as_bytes(); + let mut found = None; + let _ = for_each_dir_entry(&self.image, &parent_inode, |entry| { + if entry.name == name_bytes { + found = Some(entry.nid()); + std::ops::ControlFlow::Break(()) + } else { + std::ops::ControlFlow::Continue(()) + } + }); + + match found { + Some(child_nid) => { + let child_fuse_ino = self.nid_to_fuse_ino(child_nid); + match self.get_fileattr(child_fuse_ino) { + Ok(attrs) => reply.entry(&TTL, &attrs, Generation(0)), + Err(e) => reply.error(e), + } + } + None => reply.error(fuser::Errno::ENOENT), + } } - fn opendir(&mut self, _req: &Request<'_>, _ino: u64, _flags: i32, reply: ReplyOpen) { - reply.opened(0, 0); + fn getattr(&self, _req: &Request, ino: INodeNo, _fh: Option, reply: ReplyAttr) { + match self.get_fileattr(ino.0) { + Ok(attrs) => reply.attr(&TTL, &attrs), + Err(e) => reply.error(e), + } + } + + fn readlink(&self, _req: &Request, ino: INodeNo, reply: ReplyData) { + let Ok(inode) = self.get_inode(self.fuse_ino_to_nid(ino.0)) else { + return reply.error(fuser::Errno::EINVAL); + }; + match inode.inline() { + Some(data) => reply.data(data), + None => reply.error(fuser::Errno::EINVAL), + } + } + + fn opendir(&self, _req: &Request, _ino: INodeNo, _flags: OpenFlags, reply: ReplyOpen) { + reply.opened(FileHandle(0), FopenFlags::empty()); } fn readdir( - &mut self, + &self, _req: &Request, - ino: u64, - _fh: u64, - mut offset: i64, + ino: INodeNo, + _fh: FileHandle, + offset: u64, mut reply: ReplyDirectory, ) { - let Some(InodeRef::Directory(dir, parent)) = self.inodes.get(&ino) else { - log::error!("readdir({ino}) inode is not a directory"); - return reply.error(Errno::BADF.raw_os_error()); + let fuse_ino = ino.0; + let nid = self.fuse_ino_to_nid(fuse_ino); + let Ok(inode) = self.get_inode(nid) else { + return reply.error(fuser::Errno::EBADF); }; - let (dir, parent) = (*dir, *parent); - if offset == 0 { - offset += 1; - if reply.add(ino, offset, FileType::Directory, ".") { + let mut cur_offset = offset; + + if cur_offset == 0 { + cur_offset += 1; + if reply.add(INodeNo(fuse_ino), cur_offset, FileType::Directory, ".") { return reply.ok(); } } - if offset == 1 { - offset += 1; - if reply.add(parent, offset, FileType::Directory, "..") { + if cur_offset == 1 { + cur_offset += 1; + if reply.add(INodeNo(fuse_ino), cur_offset, FileType::Directory, "..") { return reply.ok(); } } - for (name, inode) in dir.sorted_entries().skip(offset as usize - 2) { - let (child_ino, kind) = self.register_inode(inode, ino); + let mut entry_idx: u64 = 2; + let _ = for_each_dir_entry(&self.image, &inode, |entry| { + if entry_idx < cur_offset { + entry_idx += 1; + return std::ops::ControlFlow::Continue(()); + } + let child_fuse_ino = self.nid_to_fuse_ino(entry.nid()); + let kind = match ErofsFileType::from(entry.header.file_type) { + ErofsFileType::RegularFile => FileType::RegularFile, + ErofsFileType::Directory => FileType::Directory, + ErofsFileType::CharacterDevice => FileType::CharDevice, + ErofsFileType::BlockDevice => FileType::BlockDevice, + ErofsFileType::Fifo => FileType::NamedPipe, + ErofsFileType::Socket => FileType::Socket, + ErofsFileType::Symlink => FileType::Symlink, + ErofsFileType::Unknown => FileType::RegularFile, + }; + entry_idx += 1; + if reply.add( + INodeNo(child_fuse_ino), + entry_idx, + kind, + OsStr::from_bytes(entry.name), + ) { + return std::ops::ControlFlow::Break(()); + } + std::ops::ControlFlow::Continue(()) + }); + + reply.ok(); + } + + fn readdirplus( + &self, + _req: &Request, + ino: INodeNo, + _fh: FileHandle, + offset: u64, + mut reply: ReplyDirectoryPlus, + ) { + let fuse_ino = ino.0; + let nid = self.fuse_ino_to_nid(fuse_ino); + let Ok(inode) = self.get_inode(nid) else { + return reply.error(fuser::Errno::EBADF); + }; + + let Ok(dir_attrs) = self.get_fileattr(fuse_ino) else { + return reply.error(fuser::Errno::EIO); + }; - offset += 1; - if reply.add(child_ino, offset, kind, name) { - break; + let mut cur_offset = offset; + + if cur_offset == 0 { + cur_offset += 1; + if reply.add( + INodeNo(fuse_ino), + cur_offset, + ".", + &TTL, + &dir_attrs, + Generation(0), + ) { + return reply.ok(); + } + } + + if cur_offset == 1 { + cur_offset += 1; + if reply.add( + INodeNo(fuse_ino), + cur_offset, + "..", + &TTL, + &dir_attrs, + Generation(0), + ) { + return reply.ok(); } } + let mut entry_idx: u64 = 2; + let _ = for_each_dir_entry(&self.image, &inode, |entry| { + if entry_idx < cur_offset { + entry_idx += 1; + return std::ops::ControlFlow::Continue(()); + } + let child_fuse_ino = self.nid_to_fuse_ino(entry.nid()); + let child_attrs = match self.get_fileattr(child_fuse_ino) { + Ok(a) => a, + Err(_) => { + entry_idx += 1; + return std::ops::ControlFlow::Continue(()); + } + }; + entry_idx += 1; + if reply.add( + INodeNo(child_fuse_ino), + entry_idx, + OsStr::from_bytes(entry.name), + &TTL, + &child_attrs, + Generation(0), + ) { + return std::ops::ControlFlow::Break(()); + } + std::ops::ControlFlow::Continue(()) + }); + reply.ok(); } fn releasedir( - &mut self, - _req: &Request<'_>, - _ino: u64, - _fh: u64, - _flags: i32, + &self, + _req: &Request, + _ino: INodeNo, + _fh: FileHandle, + _flags: OpenFlags, reply: fuser::ReplyEmpty, ) { reply.ok(); } fn getxattr( - &mut self, - _req: &Request<'_>, - ino: u64, + &self, + _req: &Request, + ino: INodeNo, name: &OsStr, size: u32, reply: fuser::ReplyXattr, ) { - let Some(iref) = self.inodes.get(&ino) else { - log::error!("getxattr({ino}, {name:?}, {size}) inode does not exist"); - return reply.error(Errno::BADF.raw_os_error()); + let nid = self.fuse_ino_to_nid(ino.0); + let Ok(inode) = self.get_inode(nid) else { + return reply.error(fuser::Errno::EBADF); }; - let xattrs = &iref.stat().xattrs; - let Some(value) = xattrs.get(name) else { - return reply.error(Errno::NODATA.raw_os_error()); - }; - - if size == 0 { - return reply.size(value.len() as u32); - } else if value.len() > size as usize { - return reply.error(Errno::RANGE.raw_os_error()); + match self.find_xattr_value(nid, &inode, name.as_bytes()) { + Some(value) => { + if size == 0 { + reply.size(value.len() as u32); + } else if value.len() > size as usize { + reply.error(fuser::Errno::ERANGE); + } else { + reply.data(&value); + } + } + None => reply.error(fuser::Errno::ENODATA), } - - reply.data(value); } - fn listxattr(&mut self, _req: &Request<'_>, ino: u64, size: u32, reply: fuser::ReplyXattr) { - let Some(iref) = self.inodes.get(&ino) else { - log::error!("listxattr({ino}, {size}) inode does not exist"); - return reply.error(Errno::BADF.raw_os_error()); + fn listxattr(&self, _req: &Request, ino: INodeNo, size: u32, reply: fuser::ReplyXattr) { + let nid = self.fuse_ino_to_nid(ino.0); + let Ok(inode) = self.get_inode(nid) else { + return reply.error(fuser::Errno::EBADF); }; - let mut list = vec![]; - for name in iref.stat().xattrs.keys() { - list.extend_from_slice(name.as_bytes()); + let names = self.collect_xattr_names(nid, &inode); + let mut list = Vec::new(); + for name in &names { + list.extend_from_slice(name); list.push(b'\0'); } if size == 0 { - return reply.size(list.len() as u32); + reply.size(list.len() as u32); } else if list.len() > size as usize { - return reply.error(Errno::RANGE.raw_os_error()); + reply.error(fuser::Errno::ERANGE); + } else { + reply.data(&list); } - - reply.data(&list); } - fn open(&mut self, _req: &Request<'_>, ino: u64, _flags: i32, reply: ReplyOpen) { - log::trace!("open({ino})"); - let Some(iref) = self.inodes.get(&ino) else { - log::error!("open({ino}) inode does not exist"); - return reply.error(Errno::BADF.raw_os_error()); + fn open(&self, _req: &Request, ino: INodeNo, _flags: OpenFlags, reply: ReplyOpen) { + let nid = self.fuse_ino_to_nid(ino.0); + log::trace!("open({nid})"); + + let Ok(inode) = self.get_inode(nid) else { + return reply.error(fuser::Errno::EBADF); }; - let InodeRef::Leaf(_, leaf) = iref else { - log::error!("open({ino}) inode is a directory"); - return reply.error(Errno::BADF.raw_os_error()); + let Ok(layout) = inode.data_layout() else { + return reply.error(fuser::Errno::EIO); }; - let handle = match &leaf.content { - LeafContent::Regular(RegularFile::External(id, ..)) => { - let Ok(fd) = self.repo.open_object(id) else { - log::error!("open({ino}) open object failed"); - return reply.error(Errno::INVAL.raw_os_error()); - }; - OpenHandle::Fd(fd) + let handle = match layout { + DataLayout::FlatInline => match inode.inline() { + Some(data) => OpenHandle::Data(data.into()), + None => OpenHandle::Data(Box::new([])), + }, + DataLayout::FlatPlain => { + if self.overlay_xattr.is_some() { + return reply.error(errno_to_fuser(rustix::io::Errno::OPNOTSUPP)); + } + match self.open_object_by_redirect(&inode) { + Ok(fd) => OpenHandle::Fd(fd), + Err(e) => return reply.error(e), + } } - LeafContent::Regular(RegularFile::Inline(data)) => OpenHandle::Data(data.clone()), - _ => { - log::error!("open({ino}) non-regular file"); - return reply.error(Errno::BADF.raw_os_error()); + DataLayout::ChunkBased => { + if self.overlay_xattr.is_some() { + return reply.error(errno_to_fuser(rustix::io::Errno::OPNOTSUPP)); + } + match self.open_object_by_redirect(&inode) { + Ok(fd) => OpenHandle::Fd(fd), + Err(e) => return reply.error(e), + } } }; - let fh = self.next_fh; - self.next_fh += 1; - log::debug!("self.handles.insert({fh}, {handle:?})"); - self.handles.insert(fh, handle); - reply.opened(fh, 0); + let mut state = self.handles.lock().expect("fuse handles mutex poisoned"); + let fh = state.next_fh; + state.next_fh += 1; + state.handles.insert(fh, handle); + reply.opened(FileHandle(fh), FopenFlags::FOPEN_KEEP_CACHE); } fn read( - &mut self, - _req: &Request<'_>, - _ino: u64, - fh: u64, - offset: i64, + &self, + _req: &Request, + _ino: INodeNo, + fh: FileHandle, + offset: u64, size: u32, - _flags: i32, - _lock_owner: Option, - reply: fuser::ReplyData, + _flags: OpenFlags, + _lock_owner: Option, + reply: ReplyData, ) { - match self.handles.get(&fh) { + let state = self.handles.lock().expect("fuse handles mutex poisoned"); + match state.handles.get(&fh.0) { Some(OpenHandle::Fd(fd)) => { let mut data = Vec::with_capacity(size as usize); - match pread(fd, spare_capacity(&mut data), offset as u64) { + match pread(fd, spare_capacity(&mut data), offset) { Ok(_) => reply.data(&data), - Err(errno) => reply.error(errno.raw_os_error()), + Err(errno) => reply.error(errno_to_fuser(errno)), } } Some(OpenHandle::Data(data)) => { - if offset as usize > data.len() { - reply.data(b""); - } else { - let mut data = &data[offset as usize..]; - if data.len() > size as usize { - data = &data[..size as usize]; - } - reply.data(data); - } + let start = (offset as usize).min(data.len()); + let end = (start + size as usize).min(data.len()); + reply.data(&data[start..end]); } None => { - log::error!("Handle doesn't exist: pread({fh}, {size}, {offset})"); - reply.error(Errno::BADF.raw_os_error()); + log::error!("read(fh={fh}): handle does not exist"); + reply.error(fuser::Errno::EBADF); } } } fn release( - &mut self, - _req: &Request<'_>, - _ino: u64, - fh: u64, - _flags: i32, - _lock_owner: Option, + &self, + _req: &Request, + _ino: INodeNo, + fh: FileHandle, + _flags: OpenFlags, + _lock_owner: Option, _flush: bool, reply: fuser::ReplyEmpty, ) { - match self.handles.remove(&fh) { + let mut state = self.handles.lock().expect("fuse handles mutex poisoned"); + match state.handles.remove(&fh.0) { Some(_) => reply.ok(), None => { - log::error!("Handle doesn't exist: close({fh})"); - reply.error(Errno::BADF.raw_os_error()) + log::error!("release(fh={fh}): handle does not exist"); + reply.error(fuser::Errno::EBADF); } } } } -/// Opens /dev/fuse. +fn errno_to_fuser(errno: rustix::io::Errno) -> fuser::Errno { + fuser::Errno::from(std::io::Error::from_raw_os_error(errno.raw_os_error())) +} + +/// Check if an fd has fs-verity enabled, meaning its contents cannot change. +fn is_safe_to_mmap(fd: &impl AsFd) -> bool { + composefs::fsverity::measure_verity_opt::(fd) + .ok() + .flatten() + .is_some() +} + +/// Load an EROFS image from a file descriptor. +/// +/// If the image has fs-verity enabled (contents guaranteed immutable), +/// it is memory-mapped for zero-copy access. Otherwise it is read into +/// an owned buffer. /// -/// After you do this, you can mount it using mount_fuse() and then start serving requests using -/// serve_tree_fuse(). You might want to do this in different threads, which is why these -/// operations are defined separately. +/// Returns a `&'static [u8]` via `Box::leak` — the FUSE server process +/// lives until unmount, so the leak is harmless. +#[allow(unsafe_code)] +fn load_image(fd: OwnedFd) -> anyhow::Result<&'static [u8]> { + if is_safe_to_mmap(&fd) { + let file = std::fs::File::from(fd); + let mmap = unsafe { memmap2::Mmap::map(&file) }.context("mmap EROFS image")?; + let leaked: &'static memmap2::Mmap = Box::leak(Box::new(mmap)); + Ok(leaked.as_ref()) + } else { + use std::io::Read as _; + let mut buf = Vec::new(); + std::fs::File::from(fd) + .read_to_end(&mut buf) + .context("reading EROFS image")?; + Ok(Vec::leak(buf)) + } +} + +/// Opens /dev/fuse. pub fn open_fuse() -> anyhow::Result { open("/dev/fuse", OFlags::RDWR | OFlags::CLOEXEC, Mode::empty()) .context("Unable to open fuse device /dev/fuse") @@ -497,10 +799,6 @@ pub struct FuseMountOptions { impl FuseMountOptions { /// Allow users other than the mounter to access the filesystem. - /// - /// Requires either CAP_SYS_ADMIN in the init user namespace or - /// `user_allow_other` in `/etc/fuse.conf`. Should be set to false - /// when mounting inside a user namespace. pub fn set_allow_other(&mut self, allow_other: bool) -> &mut Self { self.allow_other = allow_other; self @@ -509,9 +807,11 @@ impl FuseMountOptions { /// Mounts a FUSE filesystem with the given /dev/fuse fd. /// -/// This does the necessary dance of creating the mount object, given a /dev/fuse device node. In -/// order for this to be useful, you'll also need to call serve_tree_fuse() to actually satisfy the -/// requests for data. +/// Returns a detached FUSE mount fd. You'll need to call +/// [`serve_fuse`] to actually satisfy the FUSE requests. +/// +/// For overlay-lower mode, call [`mount_fuse_overlay`] *after* the FUSE +/// server is running to layer an overlayfs on top. pub fn mount_fuse(dev_fuse: impl AsFd, options: &FuseMountOptions) -> anyhow::Result { let fusefs = FsHandle::open("fuse")?; fsconfig_set_flag(fusefs.as_fd(), "ro")?; @@ -536,30 +836,148 @@ pub fn mount_fuse(dev_fuse: impl AsFd, options: &FuseMountOptions) -> anyhow::Re )?) } -/// Serves a FUSE filesystem exposing the content of `filesystem`, backed by `repo`. +/// Options controlling how an overlayfs is created on top of a FUSE mount. +#[derive(Debug, Default)] +#[non_exhaustive] +pub struct OverlayMountOptions { + overlay_xattr: OverlayXattrMode, + upperdirs: Option<(OwnedFd, OwnedFd)>, + read_write: bool, + enable_verity: bool, +} + +impl OverlayMountOptions { + /// Set the overlay xattr mode. Defaults to [`OverlayXattrMode::User`]. + pub fn set_overlay_xattr(&mut self, mode: OverlayXattrMode) -> &mut Self { + self.overlay_xattr = mode; + self + } + + /// Add an overlayfs upper layer and work directory. + pub fn set_overlay(&mut self, upperdir: OwnedFd, workdir: OwnedFd) -> &mut Self { + self.upperdirs = Some((upperdir, workdir)); + self + } + + /// Make the mount read-write. + pub fn set_read_write(&mut self, read_write: bool) -> &mut Self { + self.read_write = read_write; + self + } + + /// Require fs-verity for overlay metacopy verification. + pub fn set_enable_verity(&mut self, enable_verity: bool) -> &mut Self { + self.enable_verity = enable_verity; + self + } +} + +/// Creates an overlayfs on top of a FUSE mount. +pub fn mount_fuse_overlay( + fuse_mnt: OwnedFd, + basedir: impl AsFd, + options: &OverlayMountOptions, +) -> anyhow::Result { + let prepared = prepare_mount(fuse_mnt)?; + + let overlayfs = FsHandle::open("overlay")?; + fsconfig_set_string(overlayfs.as_fd(), "source", "composefs-fuse")?; + if options.overlay_xattr == OverlayXattrMode::User { + fsconfig_set_flag(overlayfs.as_fd(), "userxattr")?; + } + if options.enable_verity { + fsconfig_set_string(overlayfs.as_fd(), "verity", "require")?; + } + if let Some((upperdir, workdir)) = &options.upperdirs { + overlayfs_set_fd(overlayfs.as_fd(), "upperdir", upperdir.as_fd())?; + overlayfs_set_fd(overlayfs.as_fd(), "workdir", workdir.as_fd())?; + } + overlayfs_set_lower_and_data_fds(&overlayfs, &prepared, &[basedir.as_fd()])?; + fsconfig_create(overlayfs.as_fd())?; + + let mount_attr = if options.read_write { + MountAttrFlags::empty() + } else { + MountAttrFlags::MOUNT_ATTR_RDONLY + }; + Ok(fsmount( + overlayfs.as_fd(), + FsMountFlags::FSMOUNT_CLOEXEC, + mount_attr, + )?) +} + +/// Options controlling how the FUSE server behaves. +#[derive(Debug, Default)] +#[non_exhaustive] +pub struct ServeFuseOptions { + overlay_xattr: Option, +} + +impl ServeFuseOptions { + /// Set the overlay xattr mode. When `Some`, the server presents overlay + /// xattrs and refuses to open external files. When `None` (the default), + /// the server follows redirects and serves file content from the + /// repository directly. + pub fn set_overlay_xattr(&mut self, mode: Option) -> &mut Self { + self.overlay_xattr = mode; + self + } +} + +fn build_fuse( + image_fd: OwnedFd, + objects_fd: Arc, + options: &ServeFuseOptions, +) -> std::io::Result<(ComposefsFuse, Config)> { + let image_bytes = load_image(image_fd).map_err(|e| std::io::Error::other(format!("{e:#}")))?; + let image = Image::open(image_bytes).map_err(|e| std::io::Error::other(format!("{e}")))?; + + let tf = ComposefsFuse { + image, + objects_fd, + overlay_xattr: options.overlay_xattr, + handles: Mutex::new(FuseHandles::default()), + }; + + Ok((tf, Config::default())) +} + +/// Mounts and serves a FUSE filesystem at `mountpoint`. +/// +/// Uses `Session::new` which handles `fusermount3` fallback for unprivileged +/// callers. Blocks until the session ends. /// -/// You should have called mount_fuse() on the dev_fuse fd to establish a mount point. -pub fn serve_tree_fuse<'a, ObjectID: FsVerityHashValue>( +/// If `ready_fd` is provided, a single byte is written after the mount is +/// established but before serving starts. +pub fn serve_fuse( + mountpoint: impl AsRef, + image_fd: OwnedFd, + objects_fd: Arc, + options: &ServeFuseOptions, + ready_fd: Option, +) -> std::io::Result<()> { + let (tf, mut config) = build_fuse(image_fd, objects_fd, options)?; + config.mount_options = vec![MountOption::RO, MountOption::DefaultPermissions]; + let session = Session::new(tf, mountpoint.as_ref(), &config)?; + if let Some(fd) = ready_fd { + let _ = rustix::io::write(&fd, b"r"); + } + session.spawn()?.join() +} + +/// Serves a FUSE filesystem over a pre-mounted `/dev/fuse` fd. +/// +/// Use together with [`open_fuse`] and [`mount_fuse`] when you need control +/// over the mount lifecycle. Blocks until the session ends. +pub fn serve_fuse_fd( dev_fuse: OwnedFd, - filesystem: &'a FileSystem, - repo: &'a Repository, + image_fd: OwnedFd, + objects_fd: Arc, + options: &ServeFuseOptions, ) -> std::io::Result<()> { - let inode_map = InodeMap::build(filesystem); - let nlink_map = filesystem.nlinks(); - - let root_ino = inode_map.dir_ino(&filesystem.root); - let root_ref = InodeRef::Directory(&filesystem.root, root_ino); - let root_attr = root_ref.fileattr(root_ino, &nlink_map); - - let tf = TreeFuse:: { - repo, - fs: filesystem, - inode_map, - nlink_map, - inodes: HashMap::from([(root_ino, root_ref)]), - attrs: HashMap::from([(root_ino, root_attr)]), - handles: Default::default(), - next_fh: 1, - }; - Session::from_fd(tf, dev_fuse, SessionACL::All).run() + let (tf, config) = build_fuse(image_fd, objects_fd, options)?; + Session::from_fd(tf, dev_fuse, SessionACL::All, config)? + .spawn()? + .join() } diff --git a/crates/composefs-integration-tests/Cargo.toml b/crates/composefs-integration-tests/Cargo.toml index b7b89b0c..d946ae7c 100644 --- a/crates/composefs-integration-tests/Cargo.toml +++ b/crates/composefs-integration-tests/Cargo.toml @@ -46,7 +46,7 @@ libtest-mimic = "0.8" linkme = "0.3" ocidir = { workspace = true } paste = "1" -rustix = { version = "1", features = ["fs", "process"] } +rustix = { version = "1", features = ["fs", "mount", "process"] } serde = { version = "1", features = ["derive"] } serde_json = "1" similar-asserts = "1" diff --git a/crates/composefs-integration-tests/src/tests/privileged.rs b/crates/composefs-integration-tests/src/tests/privileged.rs index a5c5f922..a1495f8d 100644 --- a/crates/composefs-integration-tests/src/tests/privileged.rs +++ b/crates/composefs-integration-tests/src/tests/privileged.rs @@ -968,3 +968,293 @@ fn privileged_cstor_import_xfs_reflink() -> Result<()> { Ok(()) } integration_test!(privileged_cstor_import_xfs_reflink); + +// ============================================================================ +// FUSE integration test +// ============================================================================ + +struct MountGuard { + mountpoint: PathBuf, + child: Option, +} + +impl Drop for MountGuard { + fn drop(&mut self) { + if let Some(mut child) = self.child.take() { + let _ = child.kill(); + let _ = child.wait(); + } + let _ = rustix::mount::unmount(&self.mountpoint, rustix::mount::UnmountFlags::DETACH); + } +} + +fn bigfile_content() -> Vec { + vec![b'A'; 600] +} + +fn biglib_content() -> Vec { + (0u8..=255).cycle().take(800).collect() +} + +fn build_test_filesystem( + repo: &Repository, +) -> Result> { + use std::collections::BTreeMap; + use std::ffi::OsStr; + + use composefs_oci::composefs::generic_tree::{LeafId, Stat}; + use composefs_oci::composefs::tree::{ + Directory, FileSystem, Inode, Leaf, LeafContent, RegularFile, + }; + + fn mkstat(mode: u32, uid: u32, gid: u32, mtime: i64) -> Stat { + Stat { + st_mode: mode, + st_uid: uid, + st_gid: gid, + st_mtim_sec: mtime, + st_mtim_nsec: 0, + xattrs: BTreeMap::new(), + } + } + + let root_stat = mkstat(0o755, 0, 0, 1_700_000_000); + + let mut fs = FileSystem::::new(root_stat); + + let hello_id = LeafId(fs.leaves.len()); + { + let mut xattrs = BTreeMap::new(); + xattrs.insert( + OsStr::new("user.test").into(), + Box::from(b"hello-value".as_ref()), + ); + fs.leaves.push(Leaf { + stat: Stat { + st_mode: 0o755, + st_uid: 0, + st_gid: 0, + st_mtim_sec: 1_700_000_001, + st_mtim_nsec: 0, + xattrs, + }, + content: LeafContent::Regular(RegularFile::Inline( + b"hello world binary stub".as_ref().into(), + )), + }); + } + + let readme_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o644, 0, 0, 1_700_000_002), + content: LeafContent::Regular(RegularFile::Inline( + b"readme text content\n".as_ref().into(), + )), + }); + + let hostname_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o644, 0, 0, 1_700_000_003), + content: LeafContent::Regular(RegularFile::Inline(b"integration-test\n".as_ref().into())), + }); + + let os_release_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o644, 0, 0, 1_700_000_004), + content: LeafContent::Regular(RegularFile::Inline(b"ID=test\nNAME=Test\n".as_ref().into())), + }); + + let symlink_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o777, 0, 0, 1_700_000_005), + content: LeafContent::Symlink(OsStr::new("../usr/lib/os-release").into()), + }); + + let devnull_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o666, 0, 0, 0), + content: LeafContent::CharacterDevice(rustix::fs::makedev(1, 3)), + }); + + let fifo_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o644, 0, 0, 1_700_000_006), + content: LeafContent::Fifo, + }); + + let bigfile_data = bigfile_content(); + let bigfile_hash = repo.ensure_object(&bigfile_data)?; + let bigfile_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o755, 0, 0, 1_700_000_007), + content: LeafContent::Regular(RegularFile::External( + bigfile_hash, + bigfile_data.len() as u64, + )), + }); + + let biglib_data = biglib_content(); + let biglib_hash = repo.ensure_object(&biglib_data)?; + let biglib_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o755, 0, 0, 1_700_000_008), + content: LeafContent::Regular(RegularFile::External(biglib_hash, biglib_data.len() as u64)), + }); + + let mut usr_bin = Directory::::new(mkstat(0o755, 0, 0, 1_700_000_010)); + usr_bin.insert(OsStr::new("hello"), Inode::leaf(hello_id)); + usr_bin.insert(OsStr::new("hello2"), Inode::leaf(hello_id)); + usr_bin.insert(OsStr::new("bigfile"), Inode::leaf(bigfile_id)); + + let mut usr_lib = Directory::::new(mkstat(0o755, 0, 0, 1_700_000_011)); + usr_lib.insert(OsStr::new("readme.txt"), Inode::leaf(readme_id)); + usr_lib.insert(OsStr::new("os-release"), Inode::leaf(os_release_id)); + usr_lib.insert(OsStr::new("biglib.so"), Inode::leaf(biglib_id)); + + let mut usr = Directory::::new(mkstat(0o755, 0, 0, 1_700_000_012)); + usr.insert(OsStr::new("bin"), Inode::Directory(Box::new(usr_bin))); + usr.insert(OsStr::new("lib"), Inode::Directory(Box::new(usr_lib))); + + let mut etc = Directory::::new(mkstat(0o755, 0, 0, 1_700_000_013)); + etc.insert(OsStr::new("hostname"), Inode::leaf(hostname_id)); + etc.insert(OsStr::new("os-release"), Inode::leaf(symlink_id)); + + let mut dev = Directory::::new(mkstat(0o755, 0, 0, 1_700_000_014)); + dev.insert(OsStr::new("null"), Inode::leaf(devnull_id)); + + let mut tmp_dir = Directory::::new(mkstat(0o1777, 0, 0, 1_700_000_015)); + tmp_dir.insert(OsStr::new("fifo"), Inode::leaf(fifo_id)); + + fs.root + .insert(OsStr::new("usr"), Inode::Directory(Box::new(usr))); + fs.root + .insert(OsStr::new("etc"), Inode::Directory(Box::new(etc))); + fs.root + .insert(OsStr::new("dev"), Inode::Directory(Box::new(dev))); + fs.root + .insert(OsStr::new("tmp"), Inode::Directory(Box::new(tmp_dir))); + + Ok(fs) +} + +fn privileged_fuse_dumpfile_roundtrip() -> Result<()> { + use std::os::unix::fs::MetadataExt as _; + use std::time::{Duration, Instant}; + + use composefs_oci::composefs::{ + dumpfile::write_dumpfile, + erofs::{ + reader::erofs_to_filesystem, + writer::{ValidatedFileSystem, mkfs_erofs}, + }, + repository::{Repository, RepositoryConfig}, + }; + + if require_privileged("privileged_fuse_dumpfile_roundtrip")?.is_some() { + return Ok(()); + } + + let work_dir = tempfile::tempdir()?; + let mountpoint = work_dir.path().join("mnt"); + let repo_path = work_dir.path().join("repo"); + std::fs::create_dir(&mountpoint)?; + std::fs::create_dir(&repo_path)?; + + let repo_fd = rustix::fs::open( + &repo_path, + rustix::fs::OFlags::CLOEXEC | rustix::fs::OFlags::RDONLY, + rustix::fs::Mode::empty(), + )?; + let (mut repo, _created) = Repository::::init_path( + &repo_fd, + ".", + RepositoryConfig::default().set_insecure(), + )?; + repo.set_insecure(); + + let synthetic = build_test_filesystem(&repo)?; + let erofs_bytes = mkfs_erofs(&mut ValidatedFileSystem::new(synthetic)?); + let canonical_fs = erofs_to_filesystem::(&erofs_bytes)?; + + let image_id = repo.write_image(None, &erofs_bytes)?; + let image_name = image_id.to_hex(); + + let mut expected_buf = Vec::new(); + write_dumpfile(&mut expected_buf, &canonical_fs)?; + let expected_dump = String::from_utf8(expected_buf)?; + + let pre_mount_dev = std::fs::metadata(&mountpoint)?.dev(); + + let cfsctl_bin = cfsctl()?; + let child = std::process::Command::new(&cfsctl_bin) + .arg("--repo") + .arg(&repo_path) + .arg("mount") + .arg("--fuse") + .arg("--foreground") + .arg(&image_name) + .arg(&mountpoint) + .spawn() + .context("spawning cfsctl mount --fuse")?; + + let mut guard = MountGuard { + mountpoint: mountpoint.clone(), + child: Some(child), + }; + + let deadline = Instant::now() + Duration::from_secs(30); + loop { + if let Some(child) = guard.child.as_mut() + && let Some(status) = child.try_wait()? + { + bail!("cfsctl mount --fuse exited before mount was ready: {status}"); + } + if std::fs::metadata(&mountpoint) + .map(|m| m.dev()) + .unwrap_or(pre_mount_dev) + != pre_mount_dev + { + break; + } + if Instant::now() >= deadline { + bail!("timed out waiting for FUSE mount"); + } + std::thread::sleep(Duration::from_millis(20)); + } + + let bigfile_actual = std::fs::read(mountpoint.join("usr/bin/bigfile")) + .context("reading bigfile from FUSE mount")?; + ensure!( + bigfile_actual == bigfile_content(), + "bigfile content mismatch: got {} bytes, expected {}", + bigfile_actual.len(), + bigfile_content().len(), + ); + let biglib_actual = std::fs::read(mountpoint.join("usr/lib/biglib.so")) + .context("reading biglib.so from FUSE mount")?; + ensure!( + biglib_actual == biglib_content(), + "biglib.so content mismatch: got {} bytes, expected {}", + biglib_actual.len(), + biglib_content().len(), + ); + + let sh = Shell::new()?; + let mp = mountpoint.to_str().context("non-UTF-8 mountpoint")?; + let repo_arg = repo_path.to_str().context("non-UTF-8 repo path")?; + let actual_dump = cmd!( + sh, + "{cfsctl_bin} --repo {repo_arg} create-dumpfile --no-propagate-usr-to-root {mp}" + ) + .read()?; + + drop(guard); + + similar_asserts::assert_eq!( + expected_dump.trim_end_matches('\n'), + actual_dump.trim_end_matches('\n') + ); + + Ok(()) +} +integration_test!(privileged_fuse_dumpfile_roundtrip); diff --git a/crates/composefs/src/mount.rs b/crates/composefs/src/mount.rs index 335040a9..83d73b3e 100644 --- a/crates/composefs/src/mount.rs +++ b/crates/composefs/src/mount.rs @@ -167,6 +167,21 @@ impl MountOptions { self.idmap_fd = Some(fd); self } + + /// Whether an overlay upper layer was configured. + pub fn has_overlay(&self) -> bool { + self.upperdirs.is_some() + } + + /// Whether the mount should be read-write. + pub fn read_write(&self) -> bool { + self.read_write + } + + /// Consume the options, returning the overlay fds if set. + pub fn into_overlay(self) -> Option<(OwnedFd, OwnedFd)> { + self.upperdirs + } } /// Creates a composefs mount using overlayfs with an erofs image and base directories.