From 87d5ecbdb57f03209f8f31b68dcb9c0ad26fb6df Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 2 Jun 2026 18:33:42 -0400 Subject: [PATCH 1/4] fuse: Reimplement based on direct erofs access Rather than serving a tree we serve directly from an erofs image passed as an fd. This should allow much less latency at startup, as we don't have to parse the entire file. It also allows us to use memmap, which should be safe at least for fs-verity (i.e. readonly) files. Fuse inodes are erofs nids, except the root inode which is always 1 in fuse. Fortunately erofs nids can't be 1, so we just map 1 <-> root nid. fuse requires the memory to be send and 'static, which is problematic for the self-referencing that happens if we store both Image and the owning buffer in ComposefsFuse. For now we just leak the erofs data chunk to make it 'static, as we expect the fuse process to keep it around until exit anyway. Assisted-by: Claude Code (Opus 4.6) Signed-off-by: Alexander Larsson --- crates/composefs-fuse/Cargo.toml | 4 +- crates/composefs-fuse/src/lib.rs | 1031 ++++++++++++++++++++---------- 2 files changed, 692 insertions(+), 343 deletions(-) diff --git a/crates/composefs-fuse/Cargo.toml b/crates/composefs-fuse/Cargo.toml index 05f6fcfe..f1558228 100644 --- a/crates/composefs-fuse/Cargo.toml +++ b/crates/composefs-fuse/Cargo.toml @@ -13,6 +13,8 @@ version.workspace = true [dependencies] anyhow = { version = "1.0.98", default-features = false } composefs = { workspace = true } -fuser = { version = "0.15.1", default-features = false, features = ["abi-7-31"] } +fuser = { version = "0.17.0", default-features = false } log = { version = "0.4.8", default-features = false } +memmap2 = { version = "0.9", default-features = false } rustix = { version = "1.0.0", default-features = false, features = ["fs", "mount"] } +zerocopy = { version = "0.8.0", default-features = false } diff --git a/crates/composefs-fuse/src/lib.rs b/crates/composefs-fuse/src/lib.rs index 20d108f3..75d0b279 100644 --- a/crates/composefs-fuse/src/lib.rs +++ b/crates/composefs-fuse/src/lib.rs @@ -1,205 +1,248 @@ -//! FUSE filesystem implementation for composefs trees. +//! FUSE filesystem implementation for composefs EROFS images. //! -//! This crate provides a userspace filesystem implementation that exposes composefs -//! directory trees through FUSE. It supports read-only access to files, directories, -//! symlinks, and extended attributes, with data served from a composefs repository. +//! This crate serves a composefs EROFS image directly over FUSE without +//! parsing the entire image into a high-level tree. FUSE inode numbers +//! are EROFS NIDs, and all metadata is resolved on demand from the +//! on-disk structures. -#![forbid(unsafe_code)] +#![deny(unsafe_code)] use std::{ + borrow::Cow, collections::HashMap, ffi::OsStr, os::{ fd::{AsFd, AsRawFd, OwnedFd}, unix::ffi::OsStrExt, }, + path::Path, + sync::{Arc, Mutex}, time::{Duration, SystemTime}, }; use anyhow::Context; use fuser::{ - FileAttr, FileType, Filesystem, ReplyAttr, ReplyData, ReplyDirectory, ReplyEntry, ReplyOpen, - Request, Session, SessionACL, + Config, FileAttr, FileHandle, FileType, Filesystem, FopenFlags, Generation, INodeNo, + MountOption, OpenFlags, ReplyAttr, ReplyData, ReplyDirectory, ReplyDirectoryPlus, ReplyEntry, + ReplyOpen, Request, Session, SessionACL, }; use rustix::{ buffer::spare_capacity, - fs::{Mode, OFlags, open}, - io::{Errno, pread}, + fs::{Mode, OFlags, open, openat}, + io::pread, mount::{ FsMountFlags, MountAttrFlags, fsconfig_create, fsconfig_set_flag, fsconfig_set_string, fsmount, }, }; +use zerocopy::FromBytes as _; + use composefs::{ - fsverity::FsVerityHashValue, - generic_tree::LeafId, + erofs::{ + format::{ + self, DataLayout, FileType as ErofsFileType, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, + S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, XATTR_PREFIXES, + }, + reader::{DirectoryBlock, Image, InodeHeader, InodeOps, InodeType}, + }, mount::FsHandle, - repository::Repository, - tree::{Directory, FileSystem, Inode, Leaf, LeafContent, RegularFile, Stat}, + mountcompat::{overlayfs_set_fd, overlayfs_set_lower_and_data_fds, prepare_mount}, }; const TTL: Duration = Duration::from_secs(1_000_000); -/// FUSE inode number. Assigned eagerly at mount time. -/// -/// Inode 1 is the root directory, then all other nodes get sequential -/// numbers from a depth-first walk. The numbering is an internal FUSE -/// concern and not exposed in the public API. +/// Controls the overlay xattr namespace. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +#[non_exhaustive] +pub enum OverlayXattrMode { + /// Synthesize `user.overlay.*` xattrs (for unprivileged `userxattr` mounts). + #[default] + User, + /// Synthesize `trusted.overlay.*` xattrs (requires CAP_SYS_ADMIN). + Trusted, +} + +/// FUSE inode number = EROFS NID. type Ino = u64; -/// Precomputed inode number assignments for the entire filesystem tree. -/// -/// Directories are identified by pointer (stable because the tree is -/// borrowed immutably for the lifetime of the FUSE session). Leaves -/// are identified by `LeafId`. -#[derive(Debug)] -struct InodeMap { - /// Directory pointer → inode number. - dir_inos: HashMap<*const Directory, Ino>, - /// LeafId → inode number. Indexed by `LeafId.0`. - /// Hardlinked leaves (same `LeafId`) naturally get the same ino. - leaf_inos: Vec, +fn mode_to_filetype(mode: u16) -> FileType { + match mode & S_IFMT { + S_IFREG => FileType::RegularFile, + S_IFDIR => FileType::Directory, + S_IFCHR => FileType::CharDevice, + S_IFBLK => FileType::BlockDevice, + S_IFIFO => FileType::NamedPipe, + S_IFLNK => FileType::Symlink, + S_IFSOCK => FileType::Socket, + _ => FileType::RegularFile, + } } -impl InodeMap { - /// Walk the tree and assign sequential inode numbers. - fn build(fs: &FileSystem) -> Self { - let mut next_ino: Ino = 1; // root = 1 - let mut dir_inos = HashMap::new(); - let mut leaf_inos = vec![0u64; fs.leaves.len()]; - - fn walk( - dir: &Directory, - next_ino: &mut Ino, - dir_inos: &mut HashMap<*const Directory, Ino>, - leaf_inos: &mut [Ino], - ) { - let ino = *next_ino; - *next_ino += 1; - dir_inos.insert(dir as *const _, ino); - - for (_, inode) in dir.entries() { - match inode { - Inode::Directory(subdir) => walk(subdir, next_ino, dir_inos, leaf_inos), - Inode::Leaf(id, _) => { - if leaf_inos[id.0] == 0 { - leaf_inos[id.0] = *next_ino; - *next_ino += 1; - } - // Hardlinks: same LeafId keeps the same ino. - } - } - } - } +fn inode_rdev(inode: &InodeType) -> u32 { + let mode = inode.mode().0.get(); + match mode & S_IFMT { + S_IFCHR | S_IFBLK => inode.u(), + _ => 0, + } +} - walk(&fs.root, &mut next_ino, &mut dir_inos, &mut leaf_inos); - InodeMap { - dir_inos, - leaf_inos, +fn inode_fileattr(image: &Image, nid: Ino, inode: &InodeType) -> FileAttr { + let mode = inode.mode().0.get(); + let mtime = match inode { + InodeType::Extended(i) => { + let secs = (i.header.mtime.get() as i64).max(0) as u64; + SystemTime::UNIX_EPOCH + Duration::from_secs(secs) } - } + InodeType::Compact(_) => { + let secs = (image.sb.build_time.get() as i64).max(0) as u64; + SystemTime::UNIX_EPOCH + Duration::from_secs(secs) + } + }; + let (uid, gid) = match inode { + InodeType::Extended(i) => (i.header.uid.get(), i.header.gid.get()), + InodeType::Compact(i) => (i.header.uid.get() as u32, i.header.gid.get() as u32), + }; + let size = match mode & S_IFMT { + S_IFDIR => 0, + _ => inode.size(), + }; - fn dir_ino(&self, dir: &Directory) -> Ino { - self.dir_inos[&(dir as *const _)] + FileAttr { + ino: INodeNo(nid), + size, + blocks: 1, + atime: mtime, + mtime, + ctime: mtime, + crtime: mtime, + kind: mode_to_filetype(mode), + perm: mode & 0o7777, + nlink: inode.nlink(), + uid, + gid, + rdev: inode_rdev(inode), + blksize: 4096, + flags: 0, } +} - fn leaf_ino(&self, id: LeafId) -> Ino { - self.leaf_inos[id.0] +fn inode_fileattr_overlay(image: &Image, nid: Ino, inode: &InodeType) -> FileAttr { + let mut attr = inode_fileattr(image, nid, inode); + if is_whiteout(image, inode) { + attr.kind = FileType::RegularFile; + attr.size = 0; + attr.rdev = 0; } + attr +} - fn inode_ino(&self, inode: &Inode) -> Ino { - match inode { - Inode::Directory(dir) => self.dir_ino(dir), - Inode::Leaf(id, _) => self.leaf_ino(*id), +fn is_whiteout(image: &Image, inode: &InodeType) -> bool { + has_xattr(image, inode, b"trusted.overlay.overlay.whiteout") +} + +fn has_xattr(image: &Image, inode: &InodeType, name: &[u8]) -> bool { + find_raw_xattr(image, inode, name).is_some() +} + +fn find_raw_xattr(image: &Image, inode: &InodeType, name: &[u8]) -> Option> { + let xattrs_section = inode.xattrs().ok()??; + for id in xattrs_section.shared().ok()? { + let xattr = image.shared_xattr(id.get()).ok()?; + if xattr_full_name(xattr) == name { + return Some(xattr.value().ok()?.to_vec()); + } + } + for xattr_result in xattrs_section.local().ok()? { + let xattr = xattr_result.ok()?; + if xattr_full_name(xattr) == name { + return Some(xattr.value().ok()?.to_vec()); } } + None } -/// A reference to a filesystem node, used for FUSE inode lookup. -#[derive(Debug, Clone)] -enum InodeRef<'a, ObjectID: FsVerityHashValue> { - Directory(&'a Directory, Ino), - Leaf(LeafId, &'a Leaf), +fn xattr_full_name(xattr: &composefs::erofs::reader::XAttr) -> Vec { + let idx = xattr.header.name_index as usize; + let prefix = if idx < XATTR_PREFIXES.len() { + XATTR_PREFIXES[idx] + } else { + b"" + }; + let suffix = xattr.suffix().unwrap_or(b""); + let mut name = Vec::with_capacity(prefix.len() + suffix.len()); + name.extend_from_slice(prefix); + name.extend_from_slice(suffix); + name } -impl<'a, ObjectID: FsVerityHashValue> InodeRef<'a, ObjectID> { - fn nlink(&self, nlink_map: &[u32]) -> u32 { - (match self { - InodeRef::Directory(dir, ..) => { - 2 + dir - .inodes() - .filter(|i| matches!(i, Inode::Directory(..))) - .count() - } - InodeRef::Leaf(leaf_id, _) => nlink_map[leaf_id.0] as usize, - }) as u32 - } +const TRUSTED_OVERLAY_PREFIX: &[u8] = b"trusted.overlay."; +const USER_OVERLAY_PREFIX: &[u8] = b"user.overlay."; +const ESCAPED_OVERLAY_PREFIX: &[u8] = b"trusted.overlay.overlay."; - fn rdev(&self) -> u32 { - (match self { - InodeRef::Directory(..) => 0, - InodeRef::Leaf(_, leaf) => match &leaf.content { - LeafContent::BlockDevice(rdev) | LeafContent::CharacterDevice(rdev) => *rdev, - _ => 0, - }, - }) as u32 - } - - fn kind(&self) -> FileType { - match self { - InodeRef::Directory(..) => FileType::Directory, - InodeRef::Leaf(_, leaf) => match leaf.content { - LeafContent::BlockDevice(..) => FileType::BlockDevice, - LeafContent::CharacterDevice(..) => FileType::CharDevice, - LeafContent::Fifo => FileType::NamedPipe, - LeafContent::Regular(..) => FileType::RegularFile, - LeafContent::Socket => FileType::Socket, - LeafContent::Symlink(..) => FileType::Symlink, - }, - } +fn is_composefs_internal_xattr(name: &[u8]) -> bool { + name == format::XATTR_OVERLAY_METACOPY + || name == format::XATTR_OVERLAY_REDIRECT + || name.starts_with(ESCAPED_OVERLAY_PREFIX) +} + +fn unescape_xattr_name(name: &[u8]) -> Cow<'_, [u8]> { + if let Some(rest) = name.strip_prefix(ESCAPED_OVERLAY_PREFIX) { + let mut unescaped = Vec::with_capacity(TRUSTED_OVERLAY_PREFIX.len() + rest.len()); + unescaped.extend_from_slice(TRUSTED_OVERLAY_PREFIX); + unescaped.extend_from_slice(rest); + Cow::Owned(unescaped) + } else { + Cow::Borrowed(name) } +} - fn stat(&self) -> &'a Stat { - match self { - InodeRef::Directory(dir, ..) => &dir.stat, - InodeRef::Leaf(_, leaf) => &leaf.stat, - } +fn rewrite_xattr_name_for_user(name: &[u8]) -> Option> { + if let Some(rest) = name.strip_prefix(TRUSTED_OVERLAY_PREFIX) { + let mut rewritten = Vec::with_capacity(USER_OVERLAY_PREFIX.len() + rest.len()); + rewritten.extend_from_slice(USER_OVERLAY_PREFIX); + rewritten.extend_from_slice(rest); + Some(rewritten) + } else { + None } +} - fn size(&self) -> u64 { - match self { - InodeRef::Directory(..) => 0, - InodeRef::Leaf(_, leaf) => match &leaf.content { - LeafContent::Regular(RegularFile::Inline(data)) => data.len() as u64, - LeafContent::Regular(RegularFile::External(.., size)) => *size, - _ => 0, - }, +/// Iterate directory entries across inline data and blocks. +fn for_each_dir_entry(image: &Image, inode: &InodeType, mut f: F) -> Result<(), fuser::Errno> +where + F: FnMut(&composefs::erofs::reader::DirectoryEntry) -> std::ops::ControlFlow<()>, +{ + if let Some(inline) = inode.inline() + && let Ok(block) = DirectoryBlock::ref_from_bytes(inline) + && let Ok(entries) = block.entries() + { + for entry in entries.flatten() { + if entry.name == b"." || entry.name == b".." { + continue; + } + if f(&entry).is_break() { + return Ok(()); + } } } - - fn fileattr(&self, ino: Ino, nlink_map: &[u32]) -> FileAttr { - let stat = self.stat(); - let mtime = SystemTime::UNIX_EPOCH + Duration::from_secs(stat.st_mtim_sec as u64); - - FileAttr { - ino, - size: self.size(), - blocks: 1, - atime: mtime, - mtime, - ctime: mtime, - crtime: mtime, - kind: self.kind(), - perm: stat.st_mode as u16, - nlink: self.nlink(nlink_map), - uid: stat.st_uid, - gid: stat.st_gid, - rdev: self.rdev(), - blksize: 4096, - flags: 0, + if let Ok(block_range) = image.inode_blocks(inode) { + for block_id in block_range { + if let Ok(block) = image.directory_block(block_id) + && let Ok(entries) = block.entries() + { + for entry in entries.flatten() { + if entry.name == b"." || entry.name == b".." { + continue; + } + if f(&entry).is_break() { + return Ok(()); + } + } + } } } + Ok(()) } #[derive(Debug)] @@ -208,281 +251,540 @@ enum OpenHandle { Data(Box<[u8]>), } -#[derive(Debug)] -struct TreeFuse<'a, ObjectID: FsVerityHashValue> { - repo: &'a Repository, - fs: &'a FileSystem, - inode_map: InodeMap, - nlink_map: Vec, - inodes: HashMap>, - attrs: HashMap, +#[derive(Debug, Default)] +struct FuseHandles { handles: HashMap, next_fh: u64, } -impl<'a, ObjectID: FsVerityHashValue> TreeFuse<'a, ObjectID> { - fn register_inode(&mut self, inode: &'a Inode, parent: Ino) -> (Ino, FileType) { - let ino = self.inode_map.inode_ino(inode); - let iref = match inode { - Inode::Directory(dir) => InodeRef::Directory(dir, parent), - Inode::Leaf(leaf_id, _) => InodeRef::Leaf(*leaf_id, self.fs.leaf(*leaf_id)), - }; - let kind = iref.kind(); - self.attrs.insert(ino, iref.fileattr(ino, &self.nlink_map)); - self.inodes.insert(ino, iref); - (ino, kind) - } +#[derive(Debug)] +struct ComposefsFuse { + image: Image<'static>, + objects_fd: Arc, + overlay_xattr: Option, + handles: Mutex, } -impl Filesystem for TreeFuse<'_, ObjectID> { - fn statfs(&mut self, _req: &Request<'_>, _ino: u64, reply: fuser::ReplyStatfs) { - reply.statfs(0, 0, 0, 0, 0, 4096, 255, 4096); +impl ComposefsFuse { + fn root_nid(&self) -> Ino { + self.image.sb.root_nid.get() as Ino } - fn lookup(&mut self, _req: &Request, parent: u64, name: &OsStr, reply: ReplyEntry) { - log::trace!("lookup {parent} {name:?}"); - let Some(InodeRef::Directory(dir, ..)) = self.inodes.get(&parent) else { - log::error!("lookup({parent}, {name:?}) parent does not exist"); - return reply.error(Errno::BADF.raw_os_error()); - }; - let dir = *dir; + /// Translate a FUSE inode number to an EROFS NID. + /// FUSE always uses inode 1 for the root, but EROFS root NID may differ. + fn fuse_ino_to_nid(&self, ino: Ino) -> Ino { + if ino == 1 { self.root_nid() } else { ino } + } - match dir.lookup(name) { - Some(inode) => { - let (ino, _) = self.register_inode(inode, parent); - reply.entry(&TTL, self.attrs.get(&ino).unwrap(), 0); - } - None => reply.error(Errno::NOENT.raw_os_error()), - } + /// Translate an EROFS NID to a FUSE inode number. + fn nid_to_fuse_ino(&self, nid: Ino) -> Ino { + if nid == self.root_nid() { 1 } else { nid } + } + + fn get_inode(&self, nid: Ino) -> Result, fuser::Errno> { + self.image.inode(nid).map_err(|e| { + log::error!("inode({nid}): {e}"); + fuser::Errno::EIO + }) } - fn getattr(&mut self, _req: &Request, ino: u64, _fh: Option, reply: ReplyAttr) { - if let Some(attrs) = self.attrs.get(&ino) { - return reply.attr(&TTL, attrs); + fn get_fileattr(&self, fuse_ino: Ino) -> Result { + let nid = self.fuse_ino_to_nid(fuse_ino); + let inode = self.get_inode(nid)?; + if self.overlay_xattr.is_some() { + Ok(inode_fileattr_overlay(&self.image, fuse_ino, &inode)) + } else { + Ok(inode_fileattr(&self.image, fuse_ino, &inode)) } + } - let Some(iref) = self.inodes.get(&ino) else { - log::error!("getattr({ino}) inode does not exist"); - return reply.error(Errno::BADF.raw_os_error()); + fn open_object_by_redirect(&self, inode: &InodeType) -> Result { + let redirect = find_raw_xattr(&self.image, inode, format::XATTR_OVERLAY_REDIRECT) + .ok_or(fuser::Errno::EIO)?; + let path = redirect.strip_prefix(b"/").unwrap_or(&redirect); + openat( + &*self.objects_fd, + OsStr::from_bytes(path), + OFlags::RDONLY | OFlags::CLOEXEC | OFlags::NOFOLLOW, + Mode::empty(), + ) + .map_err(|e| { + log::error!("open object {}: {e}", String::from_utf8_lossy(path)); + fuser::Errno::EIO + }) + } + + fn collect_xattr_names(&self, _nid: Ino, inode: &InodeType) -> Vec> { + let mut names = Vec::new(); + let Some(xattrs_section) = inode.xattrs().ok().flatten() else { + return names; + }; + + let process_xattr = |names: &mut Vec>, raw_name: Vec| match self.overlay_xattr { + Some(OverlayXattrMode::User) => { + if let Some(rewritten) = rewrite_xattr_name_for_user(&raw_name) { + names.push(rewritten); + } else { + names.push(raw_name); + } + } + Some(OverlayXattrMode::Trusted) => { + names.push(raw_name); + } + None => { + if is_composefs_internal_xattr(&raw_name) { + let unescaped = unescape_xattr_name(&raw_name); + if unescaped != raw_name.as_slice() { + names.push(unescaped.into_owned()); + } + } else { + names.push(raw_name); + } + } }; - let iref = iref.clone(); - let attr = iref.fileattr(ino, &self.nlink_map); - self.attrs.insert(ino, attr); - reply.attr(&TTL, self.attrs.get(&ino).unwrap()); + if let Ok(shared) = xattrs_section.shared() { + for id in shared { + if let Ok(xattr) = self.image.shared_xattr(id.get()) { + process_xattr(&mut names, xattr_full_name(xattr)); + } + } + } + if let Ok(local) = xattrs_section.local() { + for xattr in local.flatten() { + process_xattr(&mut names, xattr_full_name(xattr)); + } + } + names } - fn readlink(&mut self, _req: &Request<'_>, ino: u64, reply: ReplyData) { - let Some(InodeRef::Leaf(_, leaf)) = self.inodes.get(&ino) else { - return reply.error(Errno::INVAL.raw_os_error()); + fn find_xattr_value(&self, _nid: Ino, inode: &InodeType, name: &[u8]) -> Option> { + let lookup_name: Cow<'_, [u8]> = match self.overlay_xattr { + Some(OverlayXattrMode::User) => { + if let Some(rest) = name.strip_prefix(USER_OVERLAY_PREFIX) { + let mut trusted = Vec::with_capacity(TRUSTED_OVERLAY_PREFIX.len() + rest.len()); + trusted.extend_from_slice(TRUSTED_OVERLAY_PREFIX); + trusted.extend_from_slice(rest); + Cow::Owned(trusted) + } else { + Cow::Borrowed(name) + } + } + Some(OverlayXattrMode::Trusted) => Cow::Borrowed(name), + None => { + if let Some(rest) = name.strip_prefix(TRUSTED_OVERLAY_PREFIX) { + let mut escaped = Vec::with_capacity(ESCAPED_OVERLAY_PREFIX.len() + rest.len()); + escaped.extend_from_slice(ESCAPED_OVERLAY_PREFIX); + escaped.extend_from_slice(rest); + if let Some(val) = find_raw_xattr(&self.image, inode, &escaped) { + return Some(val.to_vec()); + } + } + if is_composefs_internal_xattr(name) { + return None; + } + Cow::Borrowed(name) + } }; + find_raw_xattr(&self.image, inode, &lookup_name).map(|v| v.to_vec()) + } +} - let LeafContent::Symlink(target) = &leaf.content else { - return reply.error(Errno::INVAL.raw_os_error()); +impl Filesystem for ComposefsFuse { + fn statfs(&self, _req: &Request, _ino: INodeNo, reply: fuser::ReplyStatfs) { + reply.statfs(0, 0, 0, 0, 0, 4096, 255, 4096); + } + + fn forget(&self, _req: &Request, _ino: INodeNo, _nlookup: u64) {} + + fn lookup(&self, _req: &Request, parent: INodeNo, name: &OsStr, reply: ReplyEntry) { + let parent_nid = self.fuse_ino_to_nid(parent.0); + log::trace!("lookup {parent_nid} {name:?}"); + + let Ok(parent_inode) = self.get_inode(parent_nid) else { + return reply.error(fuser::Errno::EBADF); }; - reply.data(target.as_bytes()); + let name_bytes = name.as_bytes(); + let mut found = None; + let _ = for_each_dir_entry(&self.image, &parent_inode, |entry| { + if entry.name == name_bytes { + found = Some(entry.nid()); + std::ops::ControlFlow::Break(()) + } else { + std::ops::ControlFlow::Continue(()) + } + }); + + match found { + Some(child_nid) => { + let child_fuse_ino = self.nid_to_fuse_ino(child_nid); + match self.get_fileattr(child_fuse_ino) { + Ok(attrs) => reply.entry(&TTL, &attrs, Generation(0)), + Err(e) => reply.error(e), + } + } + None => reply.error(fuser::Errno::ENOENT), + } + } + + fn getattr(&self, _req: &Request, ino: INodeNo, _fh: Option, reply: ReplyAttr) { + match self.get_fileattr(ino.0) { + Ok(attrs) => reply.attr(&TTL, &attrs), + Err(e) => reply.error(e), + } + } + + fn readlink(&self, _req: &Request, ino: INodeNo, reply: ReplyData) { + let Ok(inode) = self.get_inode(self.fuse_ino_to_nid(ino.0)) else { + return reply.error(fuser::Errno::EINVAL); + }; + match inode.inline() { + Some(data) => reply.data(data), + None => reply.error(fuser::Errno::EINVAL), + } } - fn opendir(&mut self, _req: &Request<'_>, _ino: u64, _flags: i32, reply: ReplyOpen) { - reply.opened(0, 0); + fn opendir(&self, _req: &Request, _ino: INodeNo, _flags: OpenFlags, reply: ReplyOpen) { + reply.opened(FileHandle(0), FopenFlags::empty()); } fn readdir( - &mut self, + &self, _req: &Request, - ino: u64, - _fh: u64, - mut offset: i64, + ino: INodeNo, + _fh: FileHandle, + offset: u64, mut reply: ReplyDirectory, ) { - let Some(InodeRef::Directory(dir, parent)) = self.inodes.get(&ino) else { - log::error!("readdir({ino}) inode is not a directory"); - return reply.error(Errno::BADF.raw_os_error()); + let fuse_ino = ino.0; + let nid = self.fuse_ino_to_nid(fuse_ino); + let Ok(inode) = self.get_inode(nid) else { + return reply.error(fuser::Errno::EBADF); }; - let (dir, parent) = (*dir, *parent); - if offset == 0 { - offset += 1; - if reply.add(ino, offset, FileType::Directory, ".") { + let mut cur_offset = offset; + + if cur_offset == 0 { + cur_offset += 1; + if reply.add(INodeNo(fuse_ino), cur_offset, FileType::Directory, ".") { return reply.ok(); } } - if offset == 1 { - offset += 1; - if reply.add(parent, offset, FileType::Directory, "..") { + if cur_offset == 1 { + cur_offset += 1; + if reply.add(INodeNo(fuse_ino), cur_offset, FileType::Directory, "..") { return reply.ok(); } } - for (name, inode) in dir.sorted_entries().skip(offset as usize - 2) { - let (child_ino, kind) = self.register_inode(inode, ino); + let mut entry_idx: u64 = 2; + let _ = for_each_dir_entry(&self.image, &inode, |entry| { + if entry_idx < cur_offset { + entry_idx += 1; + return std::ops::ControlFlow::Continue(()); + } + let child_fuse_ino = self.nid_to_fuse_ino(entry.nid()); + let kind = match ErofsFileType::from(entry.header.file_type) { + ErofsFileType::RegularFile => FileType::RegularFile, + ErofsFileType::Directory => FileType::Directory, + ErofsFileType::CharacterDevice => FileType::CharDevice, + ErofsFileType::BlockDevice => FileType::BlockDevice, + ErofsFileType::Fifo => FileType::NamedPipe, + ErofsFileType::Socket => FileType::Socket, + ErofsFileType::Symlink => FileType::Symlink, + ErofsFileType::Unknown => FileType::RegularFile, + }; + entry_idx += 1; + if reply.add( + INodeNo(child_fuse_ino), + entry_idx, + kind, + OsStr::from_bytes(entry.name), + ) { + return std::ops::ControlFlow::Break(()); + } + std::ops::ControlFlow::Continue(()) + }); + + reply.ok(); + } + + fn readdirplus( + &self, + _req: &Request, + ino: INodeNo, + _fh: FileHandle, + offset: u64, + mut reply: ReplyDirectoryPlus, + ) { + let fuse_ino = ino.0; + let nid = self.fuse_ino_to_nid(fuse_ino); + let Ok(inode) = self.get_inode(nid) else { + return reply.error(fuser::Errno::EBADF); + }; + + let Ok(dir_attrs) = self.get_fileattr(fuse_ino) else { + return reply.error(fuser::Errno::EIO); + }; - offset += 1; - if reply.add(child_ino, offset, kind, name) { - break; + let mut cur_offset = offset; + + if cur_offset == 0 { + cur_offset += 1; + if reply.add( + INodeNo(fuse_ino), + cur_offset, + ".", + &TTL, + &dir_attrs, + Generation(0), + ) { + return reply.ok(); + } + } + + if cur_offset == 1 { + cur_offset += 1; + if reply.add( + INodeNo(fuse_ino), + cur_offset, + "..", + &TTL, + &dir_attrs, + Generation(0), + ) { + return reply.ok(); } } + let mut entry_idx: u64 = 2; + let _ = for_each_dir_entry(&self.image, &inode, |entry| { + if entry_idx < cur_offset { + entry_idx += 1; + return std::ops::ControlFlow::Continue(()); + } + let child_fuse_ino = self.nid_to_fuse_ino(entry.nid()); + let child_attrs = match self.get_fileattr(child_fuse_ino) { + Ok(a) => a, + Err(_) => { + entry_idx += 1; + return std::ops::ControlFlow::Continue(()); + } + }; + entry_idx += 1; + if reply.add( + INodeNo(child_fuse_ino), + entry_idx, + OsStr::from_bytes(entry.name), + &TTL, + &child_attrs, + Generation(0), + ) { + return std::ops::ControlFlow::Break(()); + } + std::ops::ControlFlow::Continue(()) + }); + reply.ok(); } fn releasedir( - &mut self, - _req: &Request<'_>, - _ino: u64, - _fh: u64, - _flags: i32, + &self, + _req: &Request, + _ino: INodeNo, + _fh: FileHandle, + _flags: OpenFlags, reply: fuser::ReplyEmpty, ) { reply.ok(); } fn getxattr( - &mut self, - _req: &Request<'_>, - ino: u64, + &self, + _req: &Request, + ino: INodeNo, name: &OsStr, size: u32, reply: fuser::ReplyXattr, ) { - let Some(iref) = self.inodes.get(&ino) else { - log::error!("getxattr({ino}, {name:?}, {size}) inode does not exist"); - return reply.error(Errno::BADF.raw_os_error()); + let nid = self.fuse_ino_to_nid(ino.0); + let Ok(inode) = self.get_inode(nid) else { + return reply.error(fuser::Errno::EBADF); }; - let xattrs = &iref.stat().xattrs; - let Some(value) = xattrs.get(name) else { - return reply.error(Errno::NODATA.raw_os_error()); - }; - - if size == 0 { - return reply.size(value.len() as u32); - } else if value.len() > size as usize { - return reply.error(Errno::RANGE.raw_os_error()); + match self.find_xattr_value(nid, &inode, name.as_bytes()) { + Some(value) => { + if size == 0 { + reply.size(value.len() as u32); + } else if value.len() > size as usize { + reply.error(fuser::Errno::ERANGE); + } else { + reply.data(&value); + } + } + None => reply.error(fuser::Errno::ENODATA), } - - reply.data(value); } - fn listxattr(&mut self, _req: &Request<'_>, ino: u64, size: u32, reply: fuser::ReplyXattr) { - let Some(iref) = self.inodes.get(&ino) else { - log::error!("listxattr({ino}, {size}) inode does not exist"); - return reply.error(Errno::BADF.raw_os_error()); + fn listxattr(&self, _req: &Request, ino: INodeNo, size: u32, reply: fuser::ReplyXattr) { + let nid = self.fuse_ino_to_nid(ino.0); + let Ok(inode) = self.get_inode(nid) else { + return reply.error(fuser::Errno::EBADF); }; - let mut list = vec![]; - for name in iref.stat().xattrs.keys() { - list.extend_from_slice(name.as_bytes()); + let names = self.collect_xattr_names(nid, &inode); + let mut list = Vec::new(); + for name in &names { + list.extend_from_slice(name); list.push(b'\0'); } if size == 0 { - return reply.size(list.len() as u32); + reply.size(list.len() as u32); } else if list.len() > size as usize { - return reply.error(Errno::RANGE.raw_os_error()); + reply.error(fuser::Errno::ERANGE); + } else { + reply.data(&list); } - - reply.data(&list); } - fn open(&mut self, _req: &Request<'_>, ino: u64, _flags: i32, reply: ReplyOpen) { - log::trace!("open({ino})"); - let Some(iref) = self.inodes.get(&ino) else { - log::error!("open({ino}) inode does not exist"); - return reply.error(Errno::BADF.raw_os_error()); + fn open(&self, _req: &Request, ino: INodeNo, _flags: OpenFlags, reply: ReplyOpen) { + let nid = self.fuse_ino_to_nid(ino.0); + log::trace!("open({nid})"); + + let Ok(inode) = self.get_inode(nid) else { + return reply.error(fuser::Errno::EBADF); }; - let InodeRef::Leaf(_, leaf) = iref else { - log::error!("open({ino}) inode is a directory"); - return reply.error(Errno::BADF.raw_os_error()); + let Ok(layout) = inode.data_layout() else { + return reply.error(fuser::Errno::EIO); }; - let handle = match &leaf.content { - LeafContent::Regular(RegularFile::External(id, ..)) => { - let Ok(fd) = self.repo.open_object(id) else { - log::error!("open({ino}) open object failed"); - return reply.error(Errno::INVAL.raw_os_error()); - }; - OpenHandle::Fd(fd) + let handle = match layout { + DataLayout::FlatInline => match inode.inline() { + Some(data) => OpenHandle::Data(data.into()), + None => OpenHandle::Data(Box::new([])), + }, + DataLayout::FlatPlain => { + if self.overlay_xattr.is_some() { + return reply.error(errno_to_fuser(rustix::io::Errno::OPNOTSUPP)); + } + match self.open_object_by_redirect(&inode) { + Ok(fd) => OpenHandle::Fd(fd), + Err(e) => return reply.error(e), + } } - LeafContent::Regular(RegularFile::Inline(data)) => OpenHandle::Data(data.clone()), - _ => { - log::error!("open({ino}) non-regular file"); - return reply.error(Errno::BADF.raw_os_error()); + DataLayout::ChunkBased => { + if self.overlay_xattr.is_some() { + return reply.error(errno_to_fuser(rustix::io::Errno::OPNOTSUPP)); + } + match self.open_object_by_redirect(&inode) { + Ok(fd) => OpenHandle::Fd(fd), + Err(e) => return reply.error(e), + } } }; - let fh = self.next_fh; - self.next_fh += 1; - log::debug!("self.handles.insert({fh}, {handle:?})"); - self.handles.insert(fh, handle); - reply.opened(fh, 0); + let mut state = self.handles.lock().expect("fuse handles mutex poisoned"); + let fh = state.next_fh; + state.next_fh += 1; + state.handles.insert(fh, handle); + reply.opened(FileHandle(fh), FopenFlags::FOPEN_KEEP_CACHE); } fn read( - &mut self, - _req: &Request<'_>, - _ino: u64, - fh: u64, - offset: i64, + &self, + _req: &Request, + _ino: INodeNo, + fh: FileHandle, + offset: u64, size: u32, - _flags: i32, - _lock_owner: Option, - reply: fuser::ReplyData, + _flags: OpenFlags, + _lock_owner: Option, + reply: ReplyData, ) { - match self.handles.get(&fh) { + let state = self.handles.lock().expect("fuse handles mutex poisoned"); + match state.handles.get(&fh.0) { Some(OpenHandle::Fd(fd)) => { let mut data = Vec::with_capacity(size as usize); - match pread(fd, spare_capacity(&mut data), offset as u64) { + match pread(fd, spare_capacity(&mut data), offset) { Ok(_) => reply.data(&data), - Err(errno) => reply.error(errno.raw_os_error()), + Err(errno) => reply.error(errno_to_fuser(errno)), } } Some(OpenHandle::Data(data)) => { - if offset as usize > data.len() { - reply.data(b""); - } else { - let mut data = &data[offset as usize..]; - if data.len() > size as usize { - data = &data[..size as usize]; - } - reply.data(data); - } + let start = (offset as usize).min(data.len()); + let end = (start + size as usize).min(data.len()); + reply.data(&data[start..end]); } None => { - log::error!("Handle doesn't exist: pread({fh}, {size}, {offset})"); - reply.error(Errno::BADF.raw_os_error()); + log::error!("read(fh={fh}): handle does not exist"); + reply.error(fuser::Errno::EBADF); } } } fn release( - &mut self, - _req: &Request<'_>, - _ino: u64, - fh: u64, - _flags: i32, - _lock_owner: Option, + &self, + _req: &Request, + _ino: INodeNo, + fh: FileHandle, + _flags: OpenFlags, + _lock_owner: Option, _flush: bool, reply: fuser::ReplyEmpty, ) { - match self.handles.remove(&fh) { + let mut state = self.handles.lock().expect("fuse handles mutex poisoned"); + match state.handles.remove(&fh.0) { Some(_) => reply.ok(), None => { - log::error!("Handle doesn't exist: close({fh})"); - reply.error(Errno::BADF.raw_os_error()) + log::error!("release(fh={fh}): handle does not exist"); + reply.error(fuser::Errno::EBADF); } } } } -/// Opens /dev/fuse. +fn errno_to_fuser(errno: rustix::io::Errno) -> fuser::Errno { + fuser::Errno::from(std::io::Error::from_raw_os_error(errno.raw_os_error())) +} + +/// Check if an fd has fs-verity enabled, meaning its contents cannot change. +fn is_safe_to_mmap(fd: &impl AsFd) -> bool { + composefs::fsverity::measure_verity_opt::(fd) + .ok() + .flatten() + .is_some() +} + +/// Load an EROFS image from a file descriptor. +/// +/// If the image has fs-verity enabled (contents guaranteed immutable), +/// it is memory-mapped for zero-copy access. Otherwise it is read into +/// an owned buffer. /// -/// After you do this, you can mount it using mount_fuse() and then start serving requests using -/// serve_tree_fuse(). You might want to do this in different threads, which is why these -/// operations are defined separately. +/// Returns a `&'static [u8]` via `Box::leak` — the FUSE server process +/// lives until unmount, so the leak is harmless. +#[allow(unsafe_code)] +fn load_image(fd: OwnedFd) -> anyhow::Result<&'static [u8]> { + if is_safe_to_mmap(&fd) { + let file = std::fs::File::from(fd); + let mmap = unsafe { memmap2::Mmap::map(&file) }.context("mmap EROFS image")?; + let leaked: &'static memmap2::Mmap = Box::leak(Box::new(mmap)); + Ok(leaked.as_ref()) + } else { + use std::io::Read as _; + let mut buf = Vec::new(); + std::fs::File::from(fd) + .read_to_end(&mut buf) + .context("reading EROFS image")?; + Ok(Vec::leak(buf)) + } +} + +/// Opens /dev/fuse. pub fn open_fuse() -> anyhow::Result { open("/dev/fuse", OFlags::RDWR | OFlags::CLOEXEC, Mode::empty()) .context("Unable to open fuse device /dev/fuse") @@ -497,10 +799,6 @@ pub struct FuseMountOptions { impl FuseMountOptions { /// Allow users other than the mounter to access the filesystem. - /// - /// Requires either CAP_SYS_ADMIN in the init user namespace or - /// `user_allow_other` in `/etc/fuse.conf`. Should be set to false - /// when mounting inside a user namespace. pub fn set_allow_other(&mut self, allow_other: bool) -> &mut Self { self.allow_other = allow_other; self @@ -509,9 +807,11 @@ impl FuseMountOptions { /// Mounts a FUSE filesystem with the given /dev/fuse fd. /// -/// This does the necessary dance of creating the mount object, given a /dev/fuse device node. In -/// order for this to be useful, you'll also need to call serve_tree_fuse() to actually satisfy the -/// requests for data. +/// Returns a detached FUSE mount fd. You'll need to call +/// [`serve_fuse`] to actually satisfy the FUSE requests. +/// +/// For overlay-lower mode, call [`mount_fuse_overlay`] *after* the FUSE +/// server is running to layer an overlayfs on top. pub fn mount_fuse(dev_fuse: impl AsFd, options: &FuseMountOptions) -> anyhow::Result { let fusefs = FsHandle::open("fuse")?; fsconfig_set_flag(fusefs.as_fd(), "ro")?; @@ -536,30 +836,77 @@ pub fn mount_fuse(dev_fuse: impl AsFd, options: &FuseMountOptions) -> anyhow::Re )?) } -/// Serves a FUSE filesystem exposing the content of `filesystem`, backed by `repo`. +/// Options controlling how the FUSE server behaves. +#[derive(Debug, Default)] +#[non_exhaustive] +pub struct ServeFuseOptions { + overlay_xattr: Option, +} + +impl ServeFuseOptions { + /// Set the overlay xattr mode. When `Some`, the server presents overlay + /// xattrs and refuses to open external files. When `None` (the default), + /// the server follows redirects and serves file content from the + /// repository directly. + pub fn set_overlay_xattr(&mut self, mode: Option) -> &mut Self { + self.overlay_xattr = mode; + self + } +} + +fn build_fuse( + image_fd: OwnedFd, + objects_fd: Arc, + options: &ServeFuseOptions, +) -> std::io::Result<(ComposefsFuse, Config)> { + let image_bytes = load_image(image_fd).map_err(|e| std::io::Error::other(format!("{e:#}")))?; + let image = Image::open(image_bytes).map_err(|e| std::io::Error::other(format!("{e}")))?; + + let tf = ComposefsFuse { + image, + objects_fd, + overlay_xattr: options.overlay_xattr, + handles: Mutex::new(FuseHandles::default()), + }; + + Ok((tf, Config::default())) +} + +/// Mounts and serves a FUSE filesystem at `mountpoint`. +/// +/// Uses `Session::new` which handles `fusermount3` fallback for unprivileged +/// callers. Blocks until the session ends. /// -/// You should have called mount_fuse() on the dev_fuse fd to establish a mount point. -pub fn serve_tree_fuse<'a, ObjectID: FsVerityHashValue>( +/// If `ready_fd` is provided, a single byte is written after the mount is +/// established but before serving starts. +pub fn serve_fuse( + mountpoint: impl AsRef, + image_fd: OwnedFd, + objects_fd: Arc, + options: &ServeFuseOptions, + ready_fd: Option, +) -> std::io::Result<()> { + let (tf, mut config) = build_fuse(image_fd, objects_fd, options)?; + config.mount_options = vec![MountOption::RO, MountOption::DefaultPermissions]; + let session = Session::new(tf, mountpoint.as_ref(), &config)?; + if let Some(fd) = ready_fd { + let _ = rustix::io::write(&fd, b"r"); + } + session.spawn()?.join() +} + +/// Serves a FUSE filesystem over a pre-mounted `/dev/fuse` fd. +/// +/// Use together with [`open_fuse`] and [`mount_fuse`] when you need control +/// over the mount lifecycle. Blocks until the session ends. +pub fn serve_fuse_fd( dev_fuse: OwnedFd, - filesystem: &'a FileSystem, - repo: &'a Repository, + image_fd: OwnedFd, + objects_fd: Arc, + options: &ServeFuseOptions, ) -> std::io::Result<()> { - let inode_map = InodeMap::build(filesystem); - let nlink_map = filesystem.nlinks(); - - let root_ino = inode_map.dir_ino(&filesystem.root); - let root_ref = InodeRef::Directory(&filesystem.root, root_ino); - let root_attr = root_ref.fileattr(root_ino, &nlink_map); - - let tf = TreeFuse:: { - repo, - fs: filesystem, - inode_map, - nlink_map, - inodes: HashMap::from([(root_ino, root_ref)]), - attrs: HashMap::from([(root_ino, root_attr)]), - handles: Default::default(), - next_fh: 1, - }; - Session::from_fd(tf, dev_fuse, SessionACL::All).run() + let (tf, config) = build_fuse(image_fd, objects_fd, options)?; + Session::from_fd(tf, dev_fuse, SessionACL::All, config)? + .spawn()? + .join() } From 0f9ec3cc330eade7a84120dfc82d14cc847a8bc6 Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Fri, 26 Jun 2026 17:36:31 +0200 Subject: [PATCH 2/4] composefs-fuse: Add mount_fuse_overlay for overlayfs-on-FUSE mounts Add mount_fuse_overlay() which creates an overlayfs on top of a FUSE mount, using userxattr mode and data-only lower layers for file content. The FUSE server must already be running before calling this, since overlayfs probes the lower layer during setup. OverlayMountOptions controls the overlay configuration: upper/work dirs for writable mounts, read-write mode, and fs-verity enforcement. This is needed if using mount_fuse() that doesn't follow redirects. Assisted-by: Claude Code (Opus 4.6) Signed-off-by: Alexander Larsson --- crates/composefs-fuse/src/lib.rs | 71 ++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/crates/composefs-fuse/src/lib.rs b/crates/composefs-fuse/src/lib.rs index 75d0b279..45bcef79 100644 --- a/crates/composefs-fuse/src/lib.rs +++ b/crates/composefs-fuse/src/lib.rs @@ -836,6 +836,77 @@ pub fn mount_fuse(dev_fuse: impl AsFd, options: &FuseMountOptions) -> anyhow::Re )?) } +/// Options controlling how an overlayfs is created on top of a FUSE mount. +#[derive(Debug, Default)] +#[non_exhaustive] +pub struct OverlayMountOptions { + overlay_xattr: OverlayXattrMode, + upperdirs: Option<(OwnedFd, OwnedFd)>, + read_write: bool, + enable_verity: bool, +} + +impl OverlayMountOptions { + /// Set the overlay xattr mode. Defaults to [`OverlayXattrMode::User`]. + pub fn set_overlay_xattr(&mut self, mode: OverlayXattrMode) -> &mut Self { + self.overlay_xattr = mode; + self + } + + /// Add an overlayfs upper layer and work directory. + pub fn set_overlay(&mut self, upperdir: OwnedFd, workdir: OwnedFd) -> &mut Self { + self.upperdirs = Some((upperdir, workdir)); + self + } + + /// Make the mount read-write. + pub fn set_read_write(&mut self, read_write: bool) -> &mut Self { + self.read_write = read_write; + self + } + + /// Require fs-verity for overlay metacopy verification. + pub fn set_enable_verity(&mut self, enable_verity: bool) -> &mut Self { + self.enable_verity = enable_verity; + self + } +} + +/// Creates an overlayfs on top of a FUSE mount. +pub fn mount_fuse_overlay( + fuse_mnt: OwnedFd, + basedir: impl AsFd, + options: &OverlayMountOptions, +) -> anyhow::Result { + let prepared = prepare_mount(fuse_mnt)?; + + let overlayfs = FsHandle::open("overlay")?; + fsconfig_set_string(overlayfs.as_fd(), "source", "composefs-fuse")?; + if options.overlay_xattr == OverlayXattrMode::User { + fsconfig_set_flag(overlayfs.as_fd(), "userxattr")?; + } + if options.enable_verity { + fsconfig_set_string(overlayfs.as_fd(), "verity", "require")?; + } + if let Some((upperdir, workdir)) = &options.upperdirs { + overlayfs_set_fd(overlayfs.as_fd(), "upperdir", upperdir.as_fd())?; + overlayfs_set_fd(overlayfs.as_fd(), "workdir", workdir.as_fd())?; + } + overlayfs_set_lower_and_data_fds(&overlayfs, &prepared, &[basedir.as_fd()])?; + fsconfig_create(overlayfs.as_fd())?; + + let mount_attr = if options.read_write { + MountAttrFlags::empty() + } else { + MountAttrFlags::MOUNT_ATTR_RDONLY + }; + Ok(fsmount( + overlayfs.as_fd(), + FsMountFlags::FSMOUNT_CLOEXEC, + mount_attr, + )?) +} + /// Options controlling how the FUSE server behaves. #[derive(Debug, Default)] #[non_exhaustive] From 870e6592174e7aca4ae5b22b3dfddb8d0616ee8d Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Fri, 26 Jun 2026 15:46:18 +0200 Subject: [PATCH 3/4] composefs-ctl: Add FUSE mount support to CLI Add --fuse/--no-fuse flags to 'cfsctl mount' and 'cfsctl oci mount' with automatic mount mode detection based on privilege level: - Root in the init user namespace: kernel composefs mount (default) - Non-init namespace with CAP_SYS_ADMIN: FUSE with overlayfs (kernel reads data directly via data-only lower layer) - Non-init namespace without CAP_SYS_ADMIN: plain FUSE The --fuse flag forces FUSE, --no-fuse forces kernel mount, and omitting both auto-detects. When --upperdir is given, overlay mode is always used regardless of capabilities. By default the FUSE server daemonizes by re-executing itself as --internal-fuse-serve, passing the repo, image, and overlay fds via inherited file descriptors. The parent waits on a pipe for mount readiness then returns, matching the kernel mount's fire-and-forget behaviour. Use --foreground to keep the server in the calling process (useful for tests and debugging). Init namespace detection reads /proc/self/uid_map for the characteristic "0 0 4294967295" identity mapping. The composefs-fuse crate is an optional dependency behind the 'fuse' feature (on by default). MountOptions gains has_overlay(), read_write(), and into_overlay() accessors. serve_tree_fuse() gains an optional ready_fd parameter for signaling mount readiness. Assisted-by: Claude Code (Opus 4.6) Signed-off-by: Alexander Larsson --- Cargo.toml | 1 + crates/composefs-ctl/Cargo.toml | 6 +- crates/composefs-ctl/src/lib.rs | 341 ++++++++++++++++++++++++++++++- crates/composefs-ctl/src/main.rs | 13 ++ crates/composefs/src/mount.rs | 15 ++ 5 files changed, 370 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7086d4b3..a7b26166 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,7 @@ composefs-ctl = { version = "0.7.0", path = "crates/composefs-ctl", default-feat composefs-ioctls = { version = "0.7.0", path = "crates/composefs-ioctls", default-features = false } composefs-oci = { version = "0.7.0", path = "crates/composefs-oci", default-features = false } composefs-boot = { version = "0.7.0", path = "crates/composefs-boot", default-features = false } +composefs-fuse = { version = "0.7.0", path = "crates/composefs-fuse", default-features = false } composefs-http = { version = "0.7.0", path = "crates/composefs-http", default-features = false } composefs-ostree = { version = "0.7.0", path = "crates/composefs-ostree", default-features = false } cap-std-ext = "5.1.2" diff --git a/crates/composefs-ctl/Cargo.toml b/crates/composefs-ctl/Cargo.toml index b7155319..b4b47ba0 100644 --- a/crates/composefs-ctl/Cargo.toml +++ b/crates/composefs-ctl/Cargo.toml @@ -17,11 +17,12 @@ name = "cfsctl" path = "src/main.rs" [features] -default = ['pre-6.15', 'oci', 'containers-storage', 'ostree'] +default = ['pre-6.15', 'oci', 'containers-storage', 'ostree', 'fuse'] http = ['composefs-http'] oci = ['composefs-oci', 'composefs-oci/varlink'] containers-storage = ['composefs-oci/containers-storage', 'cstorage'] ostree = ['composefs-ostree'] +fuse = ['dep:composefs-fuse'] rhel9 = ['composefs/rhel9'] 'pre-6.15' = ['composefs/pre-6.15'] @@ -35,13 +36,14 @@ composefs-boot = { workspace = true } composefs-oci = { workspace = true, optional = true, features = ["boot"] } composefs-http = { workspace = true, optional = true } cstorage = { package = "composefs-storage", path = "../composefs-storage", version = "0.7.0", features = ["userns-helper"], optional = true } +composefs-fuse = { workspace = true, optional = true } composefs-ostree = { workspace = true, optional = true } env_logger = { version = "0.11.0", default-features = false } hex = { version = "0.4.0", default-features = false } indicatif = { version = "0.17.0", default-features = false } libsystemd = { version = "0.7" } log = { version = "0.4", default-features = false } -rustix = { version = "1.0.0", default-features = false, features = ["fs", "process"] } +rustix = { version = "1.0.0", default-features = false, features = ["fs", "pipe", "process", "thread"] } serde = { version = "1.0", default-features = false, features = ["derive"] } serde_json = { version = "1.0", default-features = false, features = ["std"] } tokio = { version = "1.24.2", default-features = false, features = ["io-std", "io-util", "net", "rt", "sync"] } diff --git a/crates/composefs-ctl/src/lib.rs b/crates/composefs-ctl/src/lib.rs index 9a8c592e..db6de88d 100644 --- a/crates/composefs-ctl/src/lib.rs +++ b/crates/composefs-ctl/src/lib.rs @@ -48,6 +48,10 @@ use comfy_table::{Table, presets::UTF8_FULL}; #[cfg(any(feature = "oci", feature = "http"))] use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use rustix::fs::{CWD, Mode, OFlags}; +#[cfg(feature = "fuse")] +use rustix::process::getuid; +#[cfg(feature = "fuse")] +use rustix::thread::{CapabilitySet, capabilities}; #[cfg(any(feature = "oci", feature = "http"))] use composefs::progress::{ @@ -441,6 +445,18 @@ enum OciCommand { /// Mount the bootable variant instead of the regular EROFS image #[arg(long)] bootable: bool, + /// Force FUSE mount instead of auto-detecting + #[cfg(feature = "fuse")] + #[arg(long, conflicts_with = "no_fuse")] + fuse: bool, + /// Force kernel mount instead of auto-detecting + #[cfg(feature = "fuse")] + #[arg(long, conflicts_with = "fuse")] + no_fuse: bool, + /// Run FUSE server in the foreground (don't daemonize) + #[cfg(feature = "fuse")] + #[arg(long)] + foreground: bool, /// Writable upper layer directory for overlayfs #[arg(long, requires = "workdir")] upperdir: Option, @@ -652,6 +668,18 @@ enum Command { name: String, /// the mountpoint mountpoint: String, + /// Force FUSE mount instead of auto-detecting + #[cfg(feature = "fuse")] + #[arg(long, conflicts_with = "no_fuse")] + fuse: bool, + /// Force kernel mount instead of auto-detecting + #[cfg(feature = "fuse")] + #[arg(long, conflicts_with = "fuse")] + no_fuse: bool, + /// Run FUSE server in the foreground (don't daemonize) + #[cfg(feature = "fuse")] + #[arg(long)] + foreground: bool, /// Writable upper layer directory for overlayfs #[arg(long, requires = "workdir")] upperdir: Option, @@ -810,6 +838,275 @@ fn get_mount_options( Ok(options) } +#[cfg(feature = "fuse")] +enum MountMode { + Kernel, + Fuse, + FuseOverlay, +} + +#[cfg(feature = "fuse")] +fn in_init_user_namespace() -> bool { + std::fs::read_to_string("/proc/self/uid_map") + .map(|s| s.trim() == "0 0 4294967295") + .unwrap_or(false) +} + +#[cfg(feature = "fuse")] +fn has_cap_sys_admin() -> bool { + if let Ok(caps) = capabilities(None) { + caps.effective.contains(CapabilitySet::SYS_ADMIN) + } else { + false + } +} + +#[cfg(feature = "fuse")] +fn detect_mount_mode(force_fuse: bool, no_fuse: bool, has_upper: bool) -> MountMode { + let use_fuse = if force_fuse { + true + } else if no_fuse { + false + } else { + !(getuid().is_root() && in_init_user_namespace()) + }; + + if !use_fuse { + return MountMode::Kernel; + } + + if has_upper || has_cap_sys_admin() { + MountMode::FuseOverlay + } else { + MountMode::Fuse + } +} + +#[cfg(feature = "fuse")] +fn run_fuse_foreground( + image_fd: std::os::fd::OwnedFd, + objects_fd: Arc, + mountpoint: &str, + mode: MountMode, + mount_options: MountOptions, + enable_verity: bool, + ready_fd: Option, +) -> Result<()> { + match mode { + MountMode::Kernel => unreachable!(), + MountMode::Fuse => { + let options = composefs_fuse::ServeFuseOptions::default(); + composefs_fuse::serve_fuse(mountpoint, image_fd, objects_fd, &options, ready_fd) + .context("FUSE server error")?; + } + MountMode::FuseOverlay => { + let dev_fuse = composefs_fuse::open_fuse()?; + let fuse_options = composefs_fuse::FuseMountOptions::default(); + let fuse_mnt = + composefs_fuse::mount_fuse(&dev_fuse, &fuse_options).context("FUSE mount")?; + + let mut serve_options = composefs_fuse::ServeFuseOptions::default(); + serve_options.set_overlay_xattr(Some(composefs_fuse::OverlayXattrMode::User)); + + let serve_objects = Arc::clone(&objects_fd); + let serve_dev = dev_fuse; + let join_handle = std::thread::spawn(move || { + composefs_fuse::serve_fuse_fd(serve_dev, image_fd, serve_objects, &serve_options) + }); + + let read_write = mount_options.read_write(); + let mut overlay_options = composefs_fuse::OverlayMountOptions::default(); + if let Some((upper_fd, work_fd)) = mount_options.into_overlay() { + overlay_options.set_overlay(upper_fd, work_fd); + } + overlay_options.set_read_write(read_write); + overlay_options.set_enable_verity(enable_verity); + + let overlay_mnt = + composefs_fuse::mount_fuse_overlay(fuse_mnt, &*objects_fd, &overlay_options) + .context("overlay mount")?; + composefs::mount::mount_at(overlay_mnt, CWD, mountpoint)?; + + if let Some(fd) = ready_fd { + let _ = rustix::io::write(&fd, b"r"); + } + + join_handle + .join() + .map_err(|_| anyhow::anyhow!("FUSE server thread panicked"))? + .context("FUSE server error")?; + } + } + Ok(()) +} + +/// Re-exec ourselves as `--internal-fuse-serve` to run the FUSE server in a +/// clean process without the tokio runtime. The parent waits on a pipe for +/// mount readiness, then returns. +#[cfg(feature = "fuse")] +#[allow(unsafe_code)] +fn run_fuse_mount( + repo: &Arc>, + name: &str, + mountpoint: &str, + mode: MountMode, + mount_options: MountOptions, + foreground: bool, +) -> Result<()> { + if foreground { + let (image_fd, enable_verity) = repo.open_image(name)?; + let objects_fd = Arc::new(repo.objects_dir()?.try_clone()?); + return run_fuse_foreground( + image_fd, + objects_fd, + mountpoint, + mode, + mount_options, + enable_verity, + None, + ); + } + + use std::os::fd::AsRawFd; + use std::os::unix::process::CommandExt; + + let (image_fd, enable_verity) = repo.open_image(name)?; + let (read_pipe, write_pipe) = rustix::pipe::pipe_with(rustix::pipe::PipeFlags::CLOEXEC)?; + + let self_exe = std::env::current_exe().context("resolving own binary path")?; + let mut cmd = std::process::Command::new(&self_exe); + cmd.arg("--internal-fuse-serve"); + cmd.arg("--mountpoint").arg(mountpoint); + cmd.arg("--image-fd").arg(image_fd.as_raw_fd().to_string()); + let repo_fd = repo.repo_fd().try_clone_to_owned()?; + cmd.arg("--repo-fd").arg(repo_fd.as_raw_fd().to_string()); + cmd.arg("--ready-fd") + .arg(write_pipe.as_raw_fd().to_string()); + + match mode { + MountMode::Kernel => unreachable!(), + MountMode::Fuse => cmd.arg("--mode").arg("fuse"), + MountMode::FuseOverlay => cmd.arg("--mode").arg("fuse-overlay"), + }; + + if enable_verity { + cmd.arg("--enable-verity"); + } + if mount_options.read_write() { + cmd.arg("--read-write"); + } + if let Some((upper_fd, work_fd)) = mount_options.into_overlay() { + cmd.arg("--upper-fd").arg(upper_fd.as_raw_fd().to_string()); + cmd.arg("--work-fd").arg(work_fd.as_raw_fd().to_string()); + clear_cloexec(&upper_fd); + clear_cloexec(&work_fd); + std::mem::forget(upper_fd); + std::mem::forget(work_fd); + } + + clear_cloexec(&image_fd); + clear_cloexec(&repo_fd); + clear_cloexec(&write_pipe); + + unsafe { + cmd.pre_exec(|| { + let _ = rustix::process::setsid(); + Ok(()) + }); + } + + std::mem::forget(image_fd); + std::mem::forget(repo_fd); + std::mem::forget(write_pipe); + + cmd.stdin(std::process::Stdio::null()); + cmd.stdout(std::process::Stdio::null()); + cmd.stderr(std::process::Stdio::inherit()); + + let _child = cmd.spawn().context("spawning FUSE server process")?; + + // Wait for mount readiness + let mut buf = [0u8; 1]; + let _ = rustix::io::read(&read_pipe, &mut buf); + + Ok(()) +} + +#[cfg(feature = "fuse")] +fn clear_cloexec(fd: &impl std::os::fd::AsFd) { + let _ = rustix::io::fcntl_setfd(fd, rustix::io::FdFlags::empty()); +} + +/// Arguments for the internal FUSE server process. +#[cfg(feature = "fuse")] +#[derive(Debug, clap::Parser)] +pub struct InternalFuseServeArgs { + #[arg(long)] + mountpoint: String, + #[arg(long)] + image_fd: i32, + #[arg(long)] + repo_fd: i32, + #[arg(long)] + ready_fd: i32, + #[arg(long, value_parser = ["fuse", "fuse-overlay"])] + mode: String, + #[arg(long)] + enable_verity: bool, + #[arg(long)] + read_write: bool, + #[arg(long)] + upper_fd: Option, + #[arg(long)] + work_fd: Option, +} + +/// Entry point for the internal FUSE server process, called from main() +/// before the tokio runtime is created. +#[cfg(feature = "fuse")] +#[allow(unsafe_code)] +pub fn run_internal_fuse_serve(args: InternalFuseServeArgs) -> Result<()> { + use std::os::fd::FromRawFd; + + let image_fd = unsafe { std::os::fd::OwnedFd::from_raw_fd(args.image_fd) }; + let repo_fd = unsafe { std::os::fd::OwnedFd::from_raw_fd(args.repo_fd) }; + let ready_fd = unsafe { std::os::fd::OwnedFd::from_raw_fd(args.ready_fd) }; + + let objects_fd = Arc::new( + rustix::fs::openat( + &repo_fd, + "objects", + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .context("opening objects dir")?, + ); + + let mode = match args.mode.as_str() { + "fuse" => MountMode::Fuse, + "fuse-overlay" => MountMode::FuseOverlay, + _ => unreachable!(), + }; + + let mut mount_options = MountOptions::default(); + if let (Some(upper_raw), Some(work_raw)) = (args.upper_fd, args.work_fd) { + let upper_fd = unsafe { std::os::fd::OwnedFd::from_raw_fd(upper_raw) }; + let work_fd = unsafe { std::os::fd::OwnedFd::from_raw_fd(work_raw) }; + mount_options.set_overlay(upper_fd, work_fd); + } + mount_options.set_read_write(args.read_write); + + run_fuse_foreground( + image_fd, + objects_fd, + &args.mountpoint, + mode, + mount_options, + args.enable_verity, + Some(ready_fd), + ) +} + #[cfg(feature = "oci")] pub(crate) fn verity_opt(opt: &Option) -> Result> where @@ -1388,12 +1685,16 @@ where ref image, ref mountpoint, bootable, + #[cfg(feature = "fuse")] + fuse, + #[cfg(feature = "fuse")] + no_fuse, + #[cfg(feature = "fuse")] + foreground, ref upperdir, ref workdir, read_write, } => { - let mount_options = - get_mount_options(upperdir.as_deref(), workdir.as_deref(), read_write)?; let img = if image.starts_with("sha256:") { let digest: composefs_oci::OciDigest = image.parse().context("Parsing manifest digest")?; @@ -1416,7 +1717,25 @@ where ), } }; - repo.mount_at(&erofs_id.to_hex(), mountpoint.as_str(), &mount_options)?; + let erofs_name = erofs_id.to_hex(); + let mount_options = + get_mount_options(upperdir.as_deref(), workdir.as_deref(), read_write)?; + + #[cfg(feature = "fuse")] + if let mode @ (MountMode::Fuse | MountMode::FuseOverlay) = + detect_mount_mode(fuse, no_fuse, upperdir.is_some()) + { + run_fuse_mount( + &repo, + &erofs_name, + mountpoint.as_str(), + mode, + mount_options, + foreground, + )?; + } else { + repo.mount_at(&erofs_name, mountpoint.as_str(), &mount_options)?; + } } OciCommand::ComputeId { config_opts } => { let fs = load_filesystem_from_oci_image(&repo, config_opts)?; @@ -1766,13 +2085,27 @@ where Command::Mount { name, mountpoint, + #[cfg(feature = "fuse")] + fuse, + #[cfg(feature = "fuse")] + no_fuse, + #[cfg(feature = "fuse")] + foreground, ref upperdir, ref workdir, read_write, } => { let mount_options = get_mount_options(upperdir.as_deref(), workdir.as_deref(), read_write)?; - repo.mount_at(&name, &mountpoint, &mount_options)?; + + #[cfg(feature = "fuse")] + if let mode @ (MountMode::Fuse | MountMode::FuseOverlay) = + detect_mount_mode(fuse, no_fuse, upperdir.is_some()) + { + run_fuse_mount(&repo, &name, &mountpoint, mode, mount_options, foreground)?; + } else { + repo.mount_at(&name, &mountpoint, &mount_options)?; + } } Command::ImageObjects { name } => { let objects = repo.objects_for_image(&name)?; diff --git a/crates/composefs-ctl/src/main.rs b/crates/composefs-ctl/src/main.rs index 942f1fc0..4ace5574 100644 --- a/crates/composefs-ctl/src/main.rs +++ b/crates/composefs-ctl/src/main.rs @@ -59,6 +59,19 @@ fn main() -> Result<()> { _ if std::env::args_os().nth(1).as_deref() == Some(OsStr::new("mount.composefs")) => { composefs_ctl::mountcomposefs::run_from_args(rest_of_args()) } + _ if std::env::args_os().nth(1).as_deref() == Some(OsStr::new("--internal-fuse-serve")) => { + #[cfg(feature = "fuse")] + { + use clap::Parser; + let args = + composefs_ctl::InternalFuseServeArgs::parse_from(std::env::args_os().skip(1)); + composefs_ctl::run_internal_fuse_serve(args) + } + #[cfg(not(feature = "fuse"))] + { + anyhow::bail!("--internal-fuse-serve requires the 'fuse' feature"); + } + } _ => { // If we were spawned as a userns helper process, handle that and exit. // This MUST be called before the tokio runtime is created. diff --git a/crates/composefs/src/mount.rs b/crates/composefs/src/mount.rs index 335040a9..83d73b3e 100644 --- a/crates/composefs/src/mount.rs +++ b/crates/composefs/src/mount.rs @@ -167,6 +167,21 @@ impl MountOptions { self.idmap_fd = Some(fd); self } + + /// Whether an overlay upper layer was configured. + pub fn has_overlay(&self) -> bool { + self.upperdirs.is_some() + } + + /// Whether the mount should be read-write. + pub fn read_write(&self) -> bool { + self.read_write + } + + /// Consume the options, returning the overlay fds if set. + pub fn into_overlay(self) -> Option<(OwnedFd, OwnedFd)> { + self.upperdirs + } } /// Creates a composefs mount using overlayfs with an erofs image and base directories. From d82e9abc2160323b0f2fa71ab01cbe4d6cdf4149 Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Fri, 26 Jun 2026 15:46:26 +0200 Subject: [PATCH 4/4] composefs-integration-tests: Add FUSE mount roundtrip test Add privileged_fuse_dumpfile_roundtrip test that validates the FUSE implementation by building a synthetic filesystem with diverse content (directories, inline files, external files, symlinks, xattrs, hardlinks, character devices, FIFOs), mounting it via `cfsctl mount --fuse --foreground`, and comparing the dumpfile output against the expected canonical form. The test uses --foreground so the FUSE server runs as a child process that the test can manage directly (kill + unmount on cleanup). The test also reads external file content from the FUSE mount to verify the repository object serving path works correctly. Based-on-work-by: Colin Walters Assisted-by: Claude Code (Opus 4.6) Signed-off-by: Alexander Larsson --- crates/composefs-ctl/src/lib.rs | 38 ++- crates/composefs-integration-tests/Cargo.toml | 2 +- .../src/tests/privileged.rs | 290 ++++++++++++++++++ 3 files changed, 327 insertions(+), 3 deletions(-) diff --git a/crates/composefs-ctl/src/lib.rs b/crates/composefs-ctl/src/lib.rs index db6de88d..64fcdbaf 100644 --- a/crates/composefs-ctl/src/lib.rs +++ b/crates/composefs-ctl/src/lib.rs @@ -541,6 +541,18 @@ enum OstreeCommand { commit: String, /// Target mountpoint mountpoint: String, + /// Force FUSE mount instead of auto-detecting + #[cfg(feature = "fuse")] + #[arg(long, conflicts_with = "no_fuse")] + fuse: bool, + /// Force kernel mount instead of auto-detecting + #[cfg(feature = "fuse")] + #[arg(long, conflicts_with = "fuse")] + no_fuse: bool, + /// Run FUSE server in the foreground (don't daemonize) + #[cfg(feature = "fuse")] + #[arg(long)] + foreground: bool, /// Writable upper layer directory for overlayfs #[arg(long, requires = "workdir")] upperdir: Option, @@ -2018,14 +2030,36 @@ where OstreeCommand::Mount { ref commit, ref mountpoint, + #[cfg(feature = "fuse")] + fuse, + #[cfg(feature = "fuse")] + no_fuse, + #[cfg(feature = "fuse")] + foreground, ref upperdir, ref workdir, read_write, } => { + let image_id = composefs_ostree::get_image_ref(&repo, commit)?; + let image_name = image_id.to_hex(); let mount_options = get_mount_options(upperdir.as_deref(), workdir.as_deref(), read_write)?; - let image_id = composefs_ostree::get_image_ref(&repo, commit)?; - repo.mount_at(&image_id.to_hex(), mountpoint.as_str(), &mount_options)?; + + #[cfg(feature = "fuse")] + if let mode @ (MountMode::Fuse | MountMode::FuseOverlay) = + detect_mount_mode(fuse, no_fuse, upperdir.is_some()) + { + run_fuse_mount( + &repo, + &image_name, + mountpoint.as_str(), + mode, + mount_options, + foreground, + )?; + } else { + repo.mount_at(&image_name, mountpoint.as_str(), &mount_options)?; + } } OstreeCommand::Dump { ref commit_name } => { let fs = composefs_ostree::create_filesystem(&repo, commit_name)?; diff --git a/crates/composefs-integration-tests/Cargo.toml b/crates/composefs-integration-tests/Cargo.toml index b7b89b0c..d946ae7c 100644 --- a/crates/composefs-integration-tests/Cargo.toml +++ b/crates/composefs-integration-tests/Cargo.toml @@ -46,7 +46,7 @@ libtest-mimic = "0.8" linkme = "0.3" ocidir = { workspace = true } paste = "1" -rustix = { version = "1", features = ["fs", "process"] } +rustix = { version = "1", features = ["fs", "mount", "process"] } serde = { version = "1", features = ["derive"] } serde_json = "1" similar-asserts = "1" diff --git a/crates/composefs-integration-tests/src/tests/privileged.rs b/crates/composefs-integration-tests/src/tests/privileged.rs index a5c5f922..a1495f8d 100644 --- a/crates/composefs-integration-tests/src/tests/privileged.rs +++ b/crates/composefs-integration-tests/src/tests/privileged.rs @@ -968,3 +968,293 @@ fn privileged_cstor_import_xfs_reflink() -> Result<()> { Ok(()) } integration_test!(privileged_cstor_import_xfs_reflink); + +// ============================================================================ +// FUSE integration test +// ============================================================================ + +struct MountGuard { + mountpoint: PathBuf, + child: Option, +} + +impl Drop for MountGuard { + fn drop(&mut self) { + if let Some(mut child) = self.child.take() { + let _ = child.kill(); + let _ = child.wait(); + } + let _ = rustix::mount::unmount(&self.mountpoint, rustix::mount::UnmountFlags::DETACH); + } +} + +fn bigfile_content() -> Vec { + vec![b'A'; 600] +} + +fn biglib_content() -> Vec { + (0u8..=255).cycle().take(800).collect() +} + +fn build_test_filesystem( + repo: &Repository, +) -> Result> { + use std::collections::BTreeMap; + use std::ffi::OsStr; + + use composefs_oci::composefs::generic_tree::{LeafId, Stat}; + use composefs_oci::composefs::tree::{ + Directory, FileSystem, Inode, Leaf, LeafContent, RegularFile, + }; + + fn mkstat(mode: u32, uid: u32, gid: u32, mtime: i64) -> Stat { + Stat { + st_mode: mode, + st_uid: uid, + st_gid: gid, + st_mtim_sec: mtime, + st_mtim_nsec: 0, + xattrs: BTreeMap::new(), + } + } + + let root_stat = mkstat(0o755, 0, 0, 1_700_000_000); + + let mut fs = FileSystem::::new(root_stat); + + let hello_id = LeafId(fs.leaves.len()); + { + let mut xattrs = BTreeMap::new(); + xattrs.insert( + OsStr::new("user.test").into(), + Box::from(b"hello-value".as_ref()), + ); + fs.leaves.push(Leaf { + stat: Stat { + st_mode: 0o755, + st_uid: 0, + st_gid: 0, + st_mtim_sec: 1_700_000_001, + st_mtim_nsec: 0, + xattrs, + }, + content: LeafContent::Regular(RegularFile::Inline( + b"hello world binary stub".as_ref().into(), + )), + }); + } + + let readme_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o644, 0, 0, 1_700_000_002), + content: LeafContent::Regular(RegularFile::Inline( + b"readme text content\n".as_ref().into(), + )), + }); + + let hostname_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o644, 0, 0, 1_700_000_003), + content: LeafContent::Regular(RegularFile::Inline(b"integration-test\n".as_ref().into())), + }); + + let os_release_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o644, 0, 0, 1_700_000_004), + content: LeafContent::Regular(RegularFile::Inline(b"ID=test\nNAME=Test\n".as_ref().into())), + }); + + let symlink_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o777, 0, 0, 1_700_000_005), + content: LeafContent::Symlink(OsStr::new("../usr/lib/os-release").into()), + }); + + let devnull_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o666, 0, 0, 0), + content: LeafContent::CharacterDevice(rustix::fs::makedev(1, 3)), + }); + + let fifo_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o644, 0, 0, 1_700_000_006), + content: LeafContent::Fifo, + }); + + let bigfile_data = bigfile_content(); + let bigfile_hash = repo.ensure_object(&bigfile_data)?; + let bigfile_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o755, 0, 0, 1_700_000_007), + content: LeafContent::Regular(RegularFile::External( + bigfile_hash, + bigfile_data.len() as u64, + )), + }); + + let biglib_data = biglib_content(); + let biglib_hash = repo.ensure_object(&biglib_data)?; + let biglib_id = LeafId(fs.leaves.len()); + fs.leaves.push(Leaf { + stat: mkstat(0o755, 0, 0, 1_700_000_008), + content: LeafContent::Regular(RegularFile::External(biglib_hash, biglib_data.len() as u64)), + }); + + let mut usr_bin = Directory::::new(mkstat(0o755, 0, 0, 1_700_000_010)); + usr_bin.insert(OsStr::new("hello"), Inode::leaf(hello_id)); + usr_bin.insert(OsStr::new("hello2"), Inode::leaf(hello_id)); + usr_bin.insert(OsStr::new("bigfile"), Inode::leaf(bigfile_id)); + + let mut usr_lib = Directory::::new(mkstat(0o755, 0, 0, 1_700_000_011)); + usr_lib.insert(OsStr::new("readme.txt"), Inode::leaf(readme_id)); + usr_lib.insert(OsStr::new("os-release"), Inode::leaf(os_release_id)); + usr_lib.insert(OsStr::new("biglib.so"), Inode::leaf(biglib_id)); + + let mut usr = Directory::::new(mkstat(0o755, 0, 0, 1_700_000_012)); + usr.insert(OsStr::new("bin"), Inode::Directory(Box::new(usr_bin))); + usr.insert(OsStr::new("lib"), Inode::Directory(Box::new(usr_lib))); + + let mut etc = Directory::::new(mkstat(0o755, 0, 0, 1_700_000_013)); + etc.insert(OsStr::new("hostname"), Inode::leaf(hostname_id)); + etc.insert(OsStr::new("os-release"), Inode::leaf(symlink_id)); + + let mut dev = Directory::::new(mkstat(0o755, 0, 0, 1_700_000_014)); + dev.insert(OsStr::new("null"), Inode::leaf(devnull_id)); + + let mut tmp_dir = Directory::::new(mkstat(0o1777, 0, 0, 1_700_000_015)); + tmp_dir.insert(OsStr::new("fifo"), Inode::leaf(fifo_id)); + + fs.root + .insert(OsStr::new("usr"), Inode::Directory(Box::new(usr))); + fs.root + .insert(OsStr::new("etc"), Inode::Directory(Box::new(etc))); + fs.root + .insert(OsStr::new("dev"), Inode::Directory(Box::new(dev))); + fs.root + .insert(OsStr::new("tmp"), Inode::Directory(Box::new(tmp_dir))); + + Ok(fs) +} + +fn privileged_fuse_dumpfile_roundtrip() -> Result<()> { + use std::os::unix::fs::MetadataExt as _; + use std::time::{Duration, Instant}; + + use composefs_oci::composefs::{ + dumpfile::write_dumpfile, + erofs::{ + reader::erofs_to_filesystem, + writer::{ValidatedFileSystem, mkfs_erofs}, + }, + repository::{Repository, RepositoryConfig}, + }; + + if require_privileged("privileged_fuse_dumpfile_roundtrip")?.is_some() { + return Ok(()); + } + + let work_dir = tempfile::tempdir()?; + let mountpoint = work_dir.path().join("mnt"); + let repo_path = work_dir.path().join("repo"); + std::fs::create_dir(&mountpoint)?; + std::fs::create_dir(&repo_path)?; + + let repo_fd = rustix::fs::open( + &repo_path, + rustix::fs::OFlags::CLOEXEC | rustix::fs::OFlags::RDONLY, + rustix::fs::Mode::empty(), + )?; + let (mut repo, _created) = Repository::::init_path( + &repo_fd, + ".", + RepositoryConfig::default().set_insecure(), + )?; + repo.set_insecure(); + + let synthetic = build_test_filesystem(&repo)?; + let erofs_bytes = mkfs_erofs(&mut ValidatedFileSystem::new(synthetic)?); + let canonical_fs = erofs_to_filesystem::(&erofs_bytes)?; + + let image_id = repo.write_image(None, &erofs_bytes)?; + let image_name = image_id.to_hex(); + + let mut expected_buf = Vec::new(); + write_dumpfile(&mut expected_buf, &canonical_fs)?; + let expected_dump = String::from_utf8(expected_buf)?; + + let pre_mount_dev = std::fs::metadata(&mountpoint)?.dev(); + + let cfsctl_bin = cfsctl()?; + let child = std::process::Command::new(&cfsctl_bin) + .arg("--repo") + .arg(&repo_path) + .arg("mount") + .arg("--fuse") + .arg("--foreground") + .arg(&image_name) + .arg(&mountpoint) + .spawn() + .context("spawning cfsctl mount --fuse")?; + + let mut guard = MountGuard { + mountpoint: mountpoint.clone(), + child: Some(child), + }; + + let deadline = Instant::now() + Duration::from_secs(30); + loop { + if let Some(child) = guard.child.as_mut() + && let Some(status) = child.try_wait()? + { + bail!("cfsctl mount --fuse exited before mount was ready: {status}"); + } + if std::fs::metadata(&mountpoint) + .map(|m| m.dev()) + .unwrap_or(pre_mount_dev) + != pre_mount_dev + { + break; + } + if Instant::now() >= deadline { + bail!("timed out waiting for FUSE mount"); + } + std::thread::sleep(Duration::from_millis(20)); + } + + let bigfile_actual = std::fs::read(mountpoint.join("usr/bin/bigfile")) + .context("reading bigfile from FUSE mount")?; + ensure!( + bigfile_actual == bigfile_content(), + "bigfile content mismatch: got {} bytes, expected {}", + bigfile_actual.len(), + bigfile_content().len(), + ); + let biglib_actual = std::fs::read(mountpoint.join("usr/lib/biglib.so")) + .context("reading biglib.so from FUSE mount")?; + ensure!( + biglib_actual == biglib_content(), + "biglib.so content mismatch: got {} bytes, expected {}", + biglib_actual.len(), + biglib_content().len(), + ); + + let sh = Shell::new()?; + let mp = mountpoint.to_str().context("non-UTF-8 mountpoint")?; + let repo_arg = repo_path.to_str().context("non-UTF-8 repo path")?; + let actual_dump = cmd!( + sh, + "{cfsctl_bin} --repo {repo_arg} create-dumpfile --no-propagate-usr-to-root {mp}" + ) + .read()?; + + drop(guard); + + similar_asserts::assert_eq!( + expected_dump.trim_end_matches('\n'), + actual_dump.trim_end_matches('\n') + ); + + Ok(()) +} +integration_test!(privileged_fuse_dumpfile_roundtrip);