diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ec60eda9..df210b28 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: options: "--privileged --pid=host -v /var/tmp:/var/tmp --tmpfs /tmp:rw,exec,nosuid,nodev -v /:/run/host" steps: - - run: dnf -y install cargo clippy composefs-devel e2fsprogs just rustfmt gcc-c++ + - run: dnf -y install cargo clippy composefs-devel e2fsprogs just ostree rustfmt gcc-c++ - name: Enable fs-verity on / run: tune2fs -O verity $(findmnt -vno SOURCE /run/host) - uses: actions/checkout@v7 @@ -53,6 +53,8 @@ jobs: - uses: dtolnay/rust-toolchain@stable - uses: taiki-e/install-action@nextest - uses: Swatinem/rust-cache@v2 + - name: Install ostree + run: sudo apt-get update && sudo apt-get install -y ostree - run: just test-integration # Fuzz smoke test — runs each fuzz target briefly to catch panics @@ -123,6 +125,9 @@ jobs: - uses: Swatinem/rust-cache@v2 + - name: Install ostree + run: sudo apt-get update && sudo apt-get install -y ostree + - name: Run integration tests (unprivileged + privileged via VM) run: just test-integration-vm diff --git a/Cargo.toml b/Cargo.toml index 9760c8db..7086d4b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ default-members = [ "crates/composefs-http", "crates/composefs-ioctls", "crates/composefs-oci", + "crates/composefs-ostree", "crates/composefs-setup-root", "crates/composefs-storage", "crates/composefs-erofs-debug", @@ -38,6 +39,7 @@ composefs-ioctls = { version = "0.7.0", path = "crates/composefs-ioctls", defaul composefs-oci = { version = "0.7.0", path = "crates/composefs-oci", default-features = false } composefs-boot = { version = "0.7.0", path = "crates/composefs-boot", default-features = false } composefs-http = { version = "0.7.0", path = "crates/composefs-http", default-features = false } +composefs-ostree = { version = "0.7.0", path = "crates/composefs-ostree", default-features = false } cap-std-ext = "5.1.2" ocidir = "0.7.2" diff --git a/contrib/packaging/install-test-deps.sh b/contrib/packaging/install-test-deps.sh index f1be65e6..982d839c 100755 --- a/contrib/packaging/install-test-deps.sh +++ b/contrib/packaging/install-test-deps.sh @@ -11,12 +11,12 @@ set -euo pipefail case "${ID}" in centos|fedora|rhel) - pkg_install composefs openssl podman skopeo xfsprogs + pkg_install composefs openssl ostree podman skopeo xfsprogs ;; debian|ubuntu) pkg_install \ openssl e2fsprogs bubblewrap openssh-server \ - podman skopeo + ostree podman skopeo # OSTree symlink targets — /root, /home, /srv, etc. are symlinks # into /var on OSTree systems, so the target directories must exist. diff --git a/crates/composefs-ctl/Cargo.toml b/crates/composefs-ctl/Cargo.toml index 59cf132b..b7155319 100644 --- a/crates/composefs-ctl/Cargo.toml +++ b/crates/composefs-ctl/Cargo.toml @@ -17,10 +17,11 @@ name = "cfsctl" path = "src/main.rs" [features] -default = ['pre-6.15', 'oci', 'containers-storage'] +default = ['pre-6.15', 'oci', 'containers-storage', 'ostree'] http = ['composefs-http'] oci = ['composefs-oci', 'composefs-oci/varlink'] containers-storage = ['composefs-oci/containers-storage', 'cstorage'] +ostree = ['composefs-ostree'] rhel9 = ['composefs/rhel9'] 'pre-6.15' = ['composefs/pre-6.15'] @@ -34,6 +35,7 @@ composefs-boot = { workspace = true } composefs-oci = { workspace = true, optional = true, features = ["boot"] } composefs-http = { workspace = true, optional = true } cstorage = { package = "composefs-storage", path = "../composefs-storage", version = "0.7.0", features = ["userns-helper"], optional = true } +composefs-ostree = { workspace = true, optional = true } env_logger = { version = "0.11.0", default-features = false } hex = { version = "0.4.0", default-features = false } indicatif = { version = "0.17.0", default-features = false } diff --git a/crates/composefs-ctl/src/lib.rs b/crates/composefs-ctl/src/lib.rs index bbad52c3..f6ccd2a1 100644 --- a/crates/composefs-ctl/src/lib.rs +++ b/crates/composefs-ctl/src/lib.rs @@ -42,7 +42,7 @@ use std::sync::Arc; use anyhow::{Context as _, Result}; use clap::{Parser, Subcommand, ValueEnum}; -#[cfg(feature = "oci")] +#[cfg(any(feature = "oci", feature = "ostree"))] use comfy_table::{Table, presets::UTF8_FULL}; #[cfg(any(feature = "oci", feature = "http"))] use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; @@ -501,6 +501,76 @@ enum OciCommand { }, } +#[cfg(feature = "ostree")] +#[derive(Debug, Subcommand)] +enum OstreeCommand { + PullLocal { + ostree_repo_path: PathBuf, + /// Ostree ref name or commit ID (64-character hex) + ostree_ref: String, + #[clap(long)] + base_name: Option, + }, + Pull { + ostree_repo_url: String, + /// Ostree ref name or commit ID (64-character hex) + ostree_ref: String, + #[clap(long)] + base_name: Option, + }, + /// Mount an ostree commit's composefs EROFS at the given mountpoint + Mount { + /// Ostree commit ref or commit ID + commit: String, + /// Target mountpoint + mountpoint: String, + /// Writable upper layer directory for overlayfs + #[arg(long, requires = "workdir")] + upperdir: Option, + /// Work directory for overlayfs (required with --upperdir) + #[arg(long, requires = "upperdir")] + workdir: Option, + /// Mount read-write (requires --upperdir) + #[arg(long, requires = "upperdir")] + read_write: bool, + }, + /// Dump the filesystem of an ostree commit as a composefs dumpfile to stdout + Dump { + /// Ostree commit ref name + commit_name: String, + }, + /// Compute the composefs image ID of an ostree commit + ComputeId { + /// Ostree commit ref name + commit_name: String, + }, + /// Show the contents of an ostree commit + Inspect { + /// Ostree ref name, commit ID, or commit ID prefix + source: String, + /// Print only the commit metadata key-value pairs + #[clap(long)] + metadata: bool, + }, + /// Tag an ostree commit with a name + /// + /// The source can be an ostree commit checksum or an existing ref name. + Tag { + /// Ostree commit checksum (hex) or existing ref name + source: String, + /// Tag name to assign + name: String, + }, + /// Remove a named ostree reference + Untag { + /// Tag name to remove + name: String, + }, + /// List all ostree commits in the repository + #[clap(name = "images")] + ListCommits, +} + /// Common options for reading a filesystem from a path #[derive(Debug, Parser)] struct FsReadOptions { @@ -570,6 +640,11 @@ enum Command { #[clap(subcommand)] cmd: OciCommand, }, + #[cfg(feature = "ostree")] + Ostree { + #[clap(subcommand)] + cmd: OstreeCommand, + }, /// Mounts a composefs image, possibly enforcing fsverity of the image Mount { /// the name of the image to mount, either an fs-verity hash or prefixed with 'ref/' @@ -1566,6 +1641,111 @@ where unreachable!("oci varlink is handled before opening a repository"); } }, + #[cfg(feature = "ostree")] + Command::Ostree { cmd: ostree_cmd } => match ostree_cmd { + OstreeCommand::PullLocal { + ref ostree_repo_path, + ref ostree_ref, + base_name, + } => { + eprintln!("Fetching {ostree_ref}"); + let (verity, stats) = composefs_ostree::pull_local( + &repo, + ostree_repo_path, + ostree_ref, + base_name.as_deref(), + ) + .await?; + + let image_id = composefs_ostree::get_image_ref(&repo, &stats.commit_id)?; + println!("commit {}", stats.commit_id); + println!("verity {}", verity.to_hex()); + println!("image {}", image_id.to_hex()); + if !composefs_ostree::is_commit_id(ostree_ref) { + println!("tagged {ostree_ref}"); + } + println!( + "objects {} metadata + {} files fetched", + stats.metadata_fetched, stats.files_fetched + ); + } + OstreeCommand::Pull { + ref ostree_repo_url, + ref ostree_ref, + base_name, + } => { + eprintln!("Fetching {ostree_ref}"); + let (verity, stats) = composefs_ostree::pull( + &repo, + ostree_repo_url, + ostree_ref, + base_name.as_deref(), + ) + .await?; + + let image_id = composefs_ostree::get_image_ref(&repo, &stats.commit_id)?; + println!("commit {}", stats.commit_id); + println!("verity {}", verity.to_hex()); + println!("image {}", image_id.to_hex()); + if !composefs_ostree::is_commit_id(ostree_ref) { + println!("tagged {ostree_ref}"); + } + println!( + "objects {} metadata + {} files fetched", + stats.metadata_fetched, stats.files_fetched + ); + } + OstreeCommand::Mount { + ref commit, + ref mountpoint, + ref upperdir, + ref workdir, + read_write, + } => { + let mount_options = + get_mount_options(upperdir.as_deref(), workdir.as_deref(), read_write)?; + let image_id = composefs_ostree::get_image_ref(&repo, commit)?; + repo.mount_at(&image_id.to_hex(), mountpoint.as_str(), &mount_options)?; + } + OstreeCommand::Dump { ref commit_name } => { + let fs = composefs_ostree::create_filesystem(&repo, commit_name)?; + fs.print_dumpfile()?; + } + OstreeCommand::ComputeId { ref commit_name } => { + let image_id = composefs_ostree::ensure_ostree_erofs(&repo, commit_name)?; + println!("{}", image_id.to_hex()); + } + OstreeCommand::Inspect { + ref source, + metadata, + } => { + composefs_ostree::inspect(&repo, source, metadata)?; + } + OstreeCommand::Tag { + ref source, + ref name, + } => { + composefs_ostree::tag(&repo, source, name)?; + println!("Tagged {source} as {name}"); + } + OstreeCommand::Untag { ref name } => { + composefs_ostree::untag(&repo, name)?; + } + OstreeCommand::ListCommits => { + let commits = composefs_ostree::list_commits(&repo)?; + if commits.is_empty() { + println!("No ostree commits found"); + } else { + let mut table = Table::new(); + table.load_preset(UTF8_FULL); + table.set_header(["NAME", "COMMIT"]); + for c in commits { + table.add_row([c.name.as_str(), &c.commit_id]); + } + println!("{table}"); + } + } + }, Command::CreateImage { fs_opts, ref image_name, diff --git a/crates/composefs-integration-tests/Cargo.toml b/crates/composefs-integration-tests/Cargo.toml index b7bfaa75..b7b89b0c 100644 --- a/crates/composefs-integration-tests/Cargo.toml +++ b/crates/composefs-integration-tests/Cargo.toml @@ -36,6 +36,7 @@ composefs = { workspace = true } # Only the test_util module is used — for creating test OCI images. # All verification must go through the cfsctl CLI. composefs-oci = { workspace = true, features = ["test", "boot", "containers-storage"] } +composefs-ostree = { workspace = true } # Used by the varlink tests to drive the service through zlink's native typed # proxy bindings (alongside the external `varlinkctl` CLI), and to assert on the # typed wire reply/error types. diff --git a/crates/composefs-integration-tests/src/tests/mod.rs b/crates/composefs-integration-tests/src/tests/mod.rs index a9b180db..6a89a406 100644 --- a/crates/composefs-integration-tests/src/tests/mod.rs +++ b/crates/composefs-integration-tests/src/tests/mod.rs @@ -5,5 +5,6 @@ pub mod cstor; pub mod digest_stability; pub mod oci_compat; pub mod old_format; +pub mod ostree; pub mod privileged; pub mod varlink; diff --git a/crates/composefs-integration-tests/src/tests/ostree.rs b/crates/composefs-integration-tests/src/tests/ostree.rs new file mode 100644 index 00000000..ce2e4434 --- /dev/null +++ b/crates/composefs-integration-tests/src/tests/ostree.rs @@ -0,0 +1,279 @@ +//! Integration tests for ostree pull functionality. + +use std::net::TcpListener; +use std::path::Path; +use std::process::Command; +use std::sync::Arc; + +use anyhow::Result; +use tempfile::TempDir; +use xshell::{Shell, cmd}; + +use composefs_oci::composefs::fsverity::Sha256HashValue; +use composefs_oci::composefs::repository::Repository; + +use crate::integration_test; +use composefs_integration_tests::create_test_repository; + +fn create_ostree_test_content(parent: &Path) -> Result { + let root = parent.join("content"); + std::fs::create_dir_all(root.join("subdir"))?; + + // Small file (will be inlined by ostree) + std::fs::write(root.join("small.txt"), "hello")?; + // Large file (external object) + std::fs::write(root.join("large.bin"), vec![0xABu8; 128 * 1024])?; + // Duplicate of large file (shared object) + std::fs::write(root.join("large-dup.bin"), vec![0xABu8; 128 * 1024])?; + // Symlink + std::os::unix::fs::symlink("small.txt", root.join("link.txt"))?; + // Nested file + std::fs::write(root.join("subdir/nested.txt"), "nested content")?; + + // File with xattr + let xattr_file = root.join("with-xattr.txt"); + std::fs::write(&xattr_file, "has xattr")?; + rustix::fs::setxattr( + &xattr_file, + c"user.testxattr", + b"testvalue", + rustix::fs::XattrFlags::CREATE, + )?; + + Ok(root) +} + +fn init_ostree_repo(sh: &Shell, path: &Path, mode: &str) -> Result<()> { + let path = path.to_str().unwrap(); + cmd!(sh, "ostree init --repo={path} --mode={mode}").run()?; + Ok(()) +} + +fn commit_to_ostree(sh: &Shell, repo: &Path, branch: &str, srcdir: &Path) -> Result { + let repo = repo.to_str().unwrap(); + let srcdir = srcdir.to_str().unwrap(); + let output = cmd!(sh, "ostree commit --repo={repo} --branch={branch} {srcdir}").read()?; + Ok(output.trim().to_string()) +} + +fn pull_and_get_image_id( + repo: &Arc>, + ostree_repo_path: &Path, + ostree_ref: &str, + base_name: Option<&str>, +) -> Result<(Sha256HashValue, composefs_ostree::PullStats)> { + let rt = tokio::runtime::Runtime::new()?; + let (_obj_id, stats) = rt.block_on(composefs_ostree::pull_local( + repo, + ostree_repo_path, + ostree_ref, + base_name, + ))?; + + let image_id = composefs_ostree::get_image_ref(repo, ostree_ref)?; + + Ok((image_id, stats)) +} + +fn test_ostree_pull_local_all_modes() -> Result<()> { + let sh = Shell::new()?; + let tmpdir = TempDir::new()?; + let content = create_ostree_test_content(tmpdir.path())?; + + // Commit to archive-z2 first — some modes can't be committed to directly + let archive_repo = tmpdir.path().join("ostree-archive-z2"); + init_ostree_repo(&sh, &archive_repo, "archive-z2")?; + commit_to_ostree(&sh, &archive_repo, "test", &content)?; + + // bare-user-only needs a commit with uid=0, gid=0, no xattrs + let buo_archive = tmpdir.path().join("ostree-buo-archive"); + init_ostree_repo(&sh, &buo_archive, "archive-z2")?; + let buo_archive_str = buo_archive.to_str().unwrap(); + let content_str = content.to_str().unwrap(); + cmd!( + sh, + "ostree commit --repo={buo_archive_str} --branch=test --owner-uid=0 --owner-gid=0 --no-xattrs {content_str}" + ) + .run()?; + + // bare-split-xattrs: ostree CLI doesn't support committing/pulling into this mode. + let modes = ["archive-z2", "bare-user", "bare", "bare-user-only"]; + + let mut image_ids: Vec<(String, Sha256HashValue)> = Vec::new(); + + for mode in &modes { + let ostree_repo_path = if *mode == "archive-z2" { + archive_repo.clone() + } else { + let p = tmpdir.path().join(format!("ostree-{mode}")); + init_ostree_repo(&sh, &p, mode)?; + let source = if *mode == "bare-user-only" { + &buo_archive + } else { + &archive_repo + }; + let src = source.to_str().unwrap(); + let dst = p.to_str().unwrap(); + cmd!(sh, "ostree pull-local --repo={dst} {src} test").run()?; + p + }; + + let composefs_dir = TempDir::new()?; + let repo = create_test_repository(&composefs_dir)?; + + let (image_id, stats) = pull_and_get_image_id(&repo, &ostree_repo_path, "test", None)?; + + assert!( + stats.metadata_fetched > 0, + "{mode}: expected metadata_fetched > 0" + ); + assert!( + stats.files_fetched > 0, + "{mode}: expected files_fetched > 0" + ); + + image_ids.push((mode.to_string(), image_id)); + } + + // bare-user-only uses a different commit (uid=0, gid=0, no xattrs), + // so its image ID will differ from the others. + let first_id = &image_ids[0].1; + for (mode, id) in &image_ids[1..] { + if mode == "bare-user-only" { + continue; + } + assert_eq!(first_id, id, "image ID from {mode} differs from archive-z2"); + } + + Ok(()) +} +integration_test!(test_ostree_pull_local_all_modes); + +fn test_ostree_pull_remote_archive() -> Result<()> { + let sh = Shell::new()?; + let tmpdir = TempDir::new()?; + let content = create_ostree_test_content(tmpdir.path())?; + + // Create archive-z2 repo and commit + let ostree_repo_path = tmpdir.path().join("ostree-archive"); + init_ostree_repo(&sh, &ostree_repo_path, "archive-z2")?; + commit_to_ostree(&sh, &ostree_repo_path, "test", &content)?; + + // Pull locally first to get reference image ID + let composefs_dir_local = TempDir::new()?; + let repo_local = create_test_repository(&composefs_dir_local)?; + let (local_image_id, _) = pull_and_get_image_id(&repo_local, &ostree_repo_path, "test", None)?; + + // Find a free port and start HTTP server + let listener = TcpListener::bind("127.0.0.1:0")?; + let port = listener.local_addr()?.port(); + drop(listener); + + let repo_path_str = ostree_repo_path.to_str().unwrap().to_string(); + let mut server = Command::new("python3") + .args([ + "-m", + "http.server", + &port.to_string(), + "--directory", + &repo_path_str, + ]) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .spawn()?; + + // Give the server a moment to start + std::thread::sleep(std::time::Duration::from_millis(500)); + + let result = (|| -> Result<()> { + let composefs_dir_remote = TempDir::new()?; + let repo_remote = create_test_repository(&composefs_dir_remote)?; + + let rt = tokio::runtime::Runtime::new()?; + let url = format!("http://127.0.0.1:{port}"); + let (_obj_id, stats) = + rt.block_on(composefs_ostree::pull(&repo_remote, &url, "test", None))?; + + assert!(stats.metadata_fetched > 0); + assert!(stats.files_fetched > 0); + + let remote_image_id = composefs_ostree::get_image_ref(&repo_remote, "test")?; + + assert_eq!( + local_image_id, remote_image_id, + "remote pull image ID differs from local pull" + ); + + Ok(()) + })(); + + server.kill().ok(); + server.wait().ok(); + + result +} +integration_test!(test_ostree_pull_remote_archive); + +fn test_ostree_pull_with_base() -> Result<()> { + let sh = Shell::new()?; + let tmpdir = TempDir::new()?; + let content = create_ostree_test_content(tmpdir.path())?; + + // Create archive-z2 repo with initial commit on branch "commit-a" + let ostree_repo_path = tmpdir.path().join("ostree-repo"); + init_ostree_repo(&sh, &ostree_repo_path, "archive-z2")?; + commit_to_ostree(&sh, &ostree_repo_path, "commit-a", &content)?; + + // Pull commit A + let composefs_dir = TempDir::new()?; + let repo = create_test_repository(&composefs_dir)?; + + let rt = tokio::runtime::Runtime::new()?; + let (_, _stats_a) = rt.block_on(composefs_ostree::pull_local( + &repo, + &ostree_repo_path, + "commit-a", + None, + ))?; + + // Modify content slightly and make commit B on branch "commit-b" + std::fs::write(content.join("new-file.txt"), "new content for commit B")?; + commit_to_ostree(&sh, &ostree_repo_path, "commit-b", &content)?; + + // Pull commit B with base + let (_, stats_with_base) = rt.block_on(composefs_ostree::pull_local( + &repo, + &ostree_repo_path, + "commit-b", + Some("commit-a"), + ))?; + + // Pull commit B without base into a fresh repo + let composefs_dir2 = TempDir::new()?; + let repo2 = create_test_repository(&composefs_dir2)?; + let (_, stats_without_base) = rt.block_on(composefs_ostree::pull_local( + &repo2, + &ostree_repo_path, + "commit-b", + None, + ))?; + + assert!( + stats_with_base.files_fetched < stats_without_base.files_fetched, + "expected base pull to fetch fewer files: with_base={} vs without_base={}", + stats_with_base.files_fetched, + stats_without_base.files_fetched, + ); + + // Both should produce the same image + let image_id1 = composefs_ostree::get_image_ref(&repo, "commit-b")?; + let image_id2 = composefs_ostree::get_image_ref(&repo2, "commit-b")?; + + assert_eq!( + image_id1, image_id2, + "image IDs should match with and without base" + ); + + Ok(()) +} +integration_test!(test_ostree_pull_with_base); diff --git a/crates/composefs-ostree/Cargo.toml b/crates/composefs-ostree/Cargo.toml new file mode 100644 index 00000000..46689946 --- /dev/null +++ b/crates/composefs-ostree/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "composefs-ostree" +description = "ostree support for composefs" +keywords = ["composefs", "ostree"] + +edition.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true +rust-version.workspace = true +version.workspace = true + +[dependencies] +anyhow = { version = "1.0.87", default-features = false } +chrono = { version = "0.4", default-features = false, features = ["alloc"] } +composefs = { workspace = true } +configparser = { version = "3.1.0", features = [] } +flate2 = { version = "1.1.2", default-features = true } +gvariant = { version = "0.5.1", default-features = true} +hex = { version = "0.4.0", default-features = false, features = ["std"] } +indicatif = { version = "0.17.0", default-features = false } +rustix = { version = "1.0.0", default-features = false, features = ["fs", "mount", "process", "std"] } +sha2 = { version = "0.11.0", default-features = false } +zerocopy = { version = "0.8.0", default-features = false, features = ["derive", "std"] } +reqwest = { version = "0.12.15", features = ["stream", "zstd"] } +tokio = { version = "1.24.2", default-features = false, features = ["rt"] } +tokio-stream = "0.1.18" +tokio-util = { version = "0.7", default-features = false, features = ["io", "io-util"] } + +[dev-dependencies] +similar-asserts = "1.7.0" + +[lints] +workspace = true diff --git a/crates/composefs-ostree/src/commit.rs b/crates/composefs-ostree/src/commit.rs new file mode 100644 index 00000000..3e1ce2c2 --- /dev/null +++ b/crates/composefs-ostree/src/commit.rs @@ -0,0 +1,588 @@ +//! Ostree commit splitstream serialization and deserialization. +//! +//! Implements the binary format described in `doc/ostree.md`: a sorted array +//! of ostree object IDs with bucket-indexed lookup, per-object data refs +//! (with optional external content references), and an 8-byte-aligned content +//! region. +use anyhow::{Result, anyhow, bail, ensure}; +use gvariant::aligned_bytes::{A8, AlignedBuf, AlignedSlice, TryAsAligned}; +use std::{fmt, io::Read, mem::size_of, sync::Arc}; +use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; + +use sha2::{Digest, Sha256}; +use std::{ + collections::{BTreeMap, HashMap}, + ffi::OsStr, + os::unix::ffi::OsStrExt, +}; + +use composefs::{ + fsverity::FsVerityHashValue, + generic_tree::LeafId, + repository::Repository, + tree::{Directory, FileSystem, Inode, LeafContent, RegularFile, Stat}, + util::Sha256Digest, +}; + +use crate::ostree::{ + OstreeCommit, OstreeDirMeta, OstreeDirTree, OstreeFileHeader, split_sized_variant, +}; + +const OSTREE_COMMIT_CONTENT_TYPE: u64 = 0xAFE138C18C463EF1; + +const S_IFMT: u32 = 0o170000; +const S_IFLNK: u32 = 0o120000; + +fn xattrs_to_btreemap(xattrs: &[(Vec, Vec)]) -> BTreeMap, Box<[u8]>> { + xattrs + .iter() + .map(|(k, v)| (OsStr::from_bytes(k).into(), Box::from(v.as_slice()))) + .collect() +} + +#[derive(Debug, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C)] +struct CommitHeader { + commit_id: u32, + flags: u32, + bucket_ends: [u32; 256], +} + +#[derive(Debug, FromBytes, Immutable, KnownLayout)] +#[repr(C)] +struct Sha256DigestArray { + ids: [Sha256Digest], +} + +const NO_EXTERNAL_INDEX: u32 = u32::MAX; + +#[derive(Debug, FromBytes, Immutable, IntoBytes, KnownLayout, Clone)] +#[repr(C)] +struct DataRef { + offset: u32, + size: u32, + external_index: u32, +} + +impl DataRef { + pub fn new(offset: usize, size: usize, external_index: Option) -> Self { + DataRef { + offset: u32::to_le(offset as u32), + size: u32::to_le(size as u32), + external_index: u32::to_le(match external_index { + Some(idx) => idx as u32, + None => NO_EXTERNAL_INDEX, + }), + } + } + pub fn get_offset(&self) -> usize { + u32::from_le(self.offset) as usize + } + pub fn get_size(&self) -> usize { + u32::from_le(self.size) as usize + } + pub fn get_external_index(&self) -> Option { + match u32::from_le(self.external_index) { + NO_EXTERNAL_INDEX => None, + idx => Some(idx as usize), + } + } +} + +#[derive(Debug, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C)] +struct DataRefs { + datas: [DataRef], +} + +#[derive(Debug)] +struct WriterEntry { + ostree_id: Sha256Digest, + external_object: Option, + data: AlignedBuf, +} + +/// Accumulates ostree objects and serializes them into a commit splitstream. +#[derive(Debug)] +pub(crate) struct CommitWriter { + commit_id: Option, + map: Vec>, +} + +fn align8(x: usize) -> usize { + (x + 7) & !7 +} + +impl CommitWriter { + pub fn new() -> Self { + CommitWriter { + commit_id: None, + map: vec![], + } + } + + fn lookup_idx(&self, ostree_id: &Sha256Digest) -> Option { + self.map + .binary_search_by_key(ostree_id, |e| e.ostree_id) + .ok() + } + + pub fn contains(&self, ostree_id: &Sha256Digest) -> bool { + self.lookup_idx(ostree_id).is_some() + } + + pub fn set_commit_id(&mut self, id: &Sha256Digest) { + self.commit_id = Some(*id); + } + + pub fn insert( + &mut self, + ostree_id: &Sha256Digest, + external_object: Option<&ObjectID>, + data: &[u8], + ) { + if let Err(idx) = self.map.binary_search_by_key(ostree_id, |e| e.ostree_id) { + let mut aligned_data = AlignedBuf::new(); + aligned_data.with_vec(|v| v.extend_from_slice(data)); + self.map.insert( + idx, + WriterEntry { + ostree_id: *ostree_id, + external_object: external_object.cloned(), + data: aligned_data, + }, + ); + } + } + + pub fn from_reader(reader: &CommitReader) -> Result { + let mut writer = CommitWriter::new(); + for (ostree_id, external_object, data) in reader.iter() { + let data = data?; + writer.insert(ostree_id, external_object, data); + } + writer.set_commit_id(&reader.commit_id()); + Ok(writer) + } + + pub fn serialize( + &self, + repo: &Arc>, + content_id: &str, + reference: Option<&str>, + image_ref: Option<&ObjectID>, + ) -> Result { + let mut ss = repo.create_stream(OSTREE_COMMIT_CONTENT_TYPE)?; + + if let Some(image_id) = image_ref { + ss.add_named_stream_ref(crate::IMAGE_REF_KEY, image_id); + } + + /* Ensure we can index and count items using u32 (leaving one for NO_EXTERNAL_INDEX) */ + let item_count = self.map.len(); + ensure!( + item_count <= (NO_EXTERNAL_INDEX - 1) as usize, + "Too many items in object map" + ); + + let objid = self + .commit_id + .as_ref() + .ok_or_else(|| anyhow!("No commit id set"))?; + let main_idx = self + .lookup_idx(objid) + .ok_or_else(|| anyhow!("commit object not in commit"))?; + + let mut header = CommitHeader { + commit_id: u32::to_le(main_idx as u32), + flags: 0, + bucket_ends: [0; 256], + }; + + // Compute data offsets (running sum of aligned sizes) + let mut data_size = 0usize; + let data_offsets: Vec = self + .map + .iter() + .map(|e| { + let offset = data_size; + data_size += align8(e.data.len()); + offset + }) + .collect(); + + // Ensure all data can be indexed by u32 + ensure!( + data_size <= u32::MAX as usize, + "Too large data in object map" + ); + + // Compute bucket ends + for e in self.map.iter() { + // Initially end is just the count + header.bucket_ends[e.ostree_id[0] as usize] += 1; + } + for i in 1..256 { + // Then we sum them up to the end + header.bucket_ends[i] += header.bucket_ends[i - 1]; + } + // Convert buckets to little endian + header + .bucket_ends + .iter_mut() + .for_each(|b| *b = u32::to_le(*b)); + + // Add header + ss.write_inline(header.as_bytes()); + // Add mapped ids + for e in self.map.iter() { + ss.write_inline(&e.ostree_id); + } + // Add data refs + for (e, &offset) in self.map.iter().zip(&data_offsets) { + let idx = e + .external_object + .as_ref() + .map(|external_object| ss.add_object_ref(external_object)); + let d = DataRef::new(offset, e.data.len(), idx); + ss.write_inline(d.as_bytes()); + } + + // Add 8-aligned data chunks + const ZERO_PAD: [u8; 7] = [0; 7]; + for e in self.map.iter() { + ss.write_inline(&e.data); + let padding = align8(e.data.len()) - e.data.len(); + if padding > 0 { + ss.write_inline(&ZERO_PAD[..padding]); + } + } + + repo.write_stream(ss, content_id, reference) + } +} + +#[derive(Debug)] +struct ReaderEntry { + ostree_id: Sha256Digest, + data_offset: usize, + data_size: usize, + external_object: Option, +} + +/// Reads and queries an ostree commit splitstream. +/// +/// Provides lookup by ostree object ID (using bucket-accelerated binary search) +/// and can reconstruct a [`FileSystem`] tree from the stored commit DAG. +pub(crate) struct CommitReader { + map: Vec>, + commit_id: Sha256Digest, + bucket_ends: [u32; 256], + data: AlignedBuf, +} + +impl fmt::Debug for CommitReader { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut m = f.debug_map(); + for e in self.map.iter() { + m.entry( + &hex::encode(e.ostree_id), + &format!("{:?}", self.lookup(&e.ostree_id)), + ); + } + m.finish() + } +} + +fn validate_buckets(buckets: &[u32; 256]) -> Result<()> { + ensure!( + buckets.windows(2).all(|w| w[0] <= w[1]), + "Invalid commit bucket data" + ); + Ok(()) +} + +impl CommitReader { + pub fn load(repo: &Repository, content_id: &str) -> Result { + let mut ss = repo.open_stream(content_id, None, Some(OSTREE_COMMIT_CONTENT_TYPE))?; + + let mut buf = AlignedBuf::new(); + + // Read and parse header + buf.with_vec(|v| v.resize(size_of::(), 0u8)); + Read::read_exact(&mut ss, &mut buf)?; + + let h = CommitHeader::ref_from_bytes(&buf).map_err(|_| anyhow!("Invalid commit header"))?; + + let commit_id_idx = u32::from_le(h.commit_id) as usize; + let buckets = h.bucket_ends.map(u32::from_le); + validate_buckets(&buckets)?; + let item_count = buckets[255] as usize; + + ensure!(commit_id_idx < item_count, "commit id out of bounds"); + + // Read object IDs + buf.with_vec(|v| v.resize(item_count * size_of::(), 0u8)); + Read::read_exact(&mut ss, &mut buf)?; + + let ostree_ids = Sha256DigestArray::ref_from_bytes(&buf) + .map_err(|_| anyhow!("Invalid object ID array"))?; + ensure!( + ostree_ids.ids.len() == item_count, + "Invalid object ID array" + ); + + let commit_id = ostree_ids.ids[commit_id_idx]; + + // Read data refs + let mut data_buf = AlignedBuf::new(); + data_buf.with_vec(|v| v.resize(item_count * size_of::(), 0u8)); + Read::read_exact(&mut ss, &mut data_buf)?; + + let data_refs = + DataRefs::ref_from_bytes(&data_buf).map_err(|_| anyhow!("Invalid data refs array"))?; + ensure!( + data_refs.datas.len() == item_count, + "Invalid data refs array" + ); + + // Combine object ids and data into ReaderEntry map + let map = ostree_ids + .ids + .iter() + .zip(data_refs.datas.iter()) + .map(|(id, dref)| { + let external_object = dref + .get_external_index() + .map(|idx| { + ss.lookup_external_ref(idx) + .ok_or_else(|| anyhow!("External ref index {idx} out of range")) + .cloned() + }) + .transpose()?; + Ok(ReaderEntry { + ostree_id: *id, + data_offset: dref.get_offset(), + data_size: dref.get_size(), + external_object, + }) + }) + .collect::>>()?; + + // Read remaining content data + buf.with_vec(|v| { + v.clear(); + ss.read_to_end(v) + })?; + + Ok(CommitReader { + map, + commit_id, + data: buf, + bucket_ends: buckets, + }) + } + + fn get_data(&self, entry: &ReaderEntry) -> Result<&AlignedSlice> { + let start = entry.data_offset; + let end = start + .checked_add(entry.data_size) + .ok_or_else(|| anyhow!("Object data offset/size overflow"))?; + self.data + .get(start..end) + .ok_or_else(|| anyhow!("Object data offset/size out of bounds"))? + .try_as_aligned() + .map_err(|_| anyhow!("Object data not 8-byte aligned")) + } + + fn get_bucket(&self, ostree_id: &Sha256Digest) -> (usize, usize) { + let first = ostree_id[0] as usize; + let start = if first == 0 { + 0 + } else { + self.bucket_ends[first - 1] + }; + let end = self.bucket_ends[first]; + (start as usize, end as usize) + } + + pub fn commit_id(&self) -> Sha256Digest { + self.commit_id + } + + pub fn lookup( + &self, + ostree_id: &Sha256Digest, + ) -> Result, &AlignedSlice)>> { + let (start, end) = self.get_bucket(ostree_id); + let in_bucket = self + .map + .get(start..end) + .ok_or_else(|| anyhow!("Bucket range out of bounds"))?; + let index = match in_bucket.binary_search_by_key(ostree_id, |e| e.ostree_id) { + Ok(i) => i, + Err(..) => return Ok(None), + }; + let entry = &in_bucket[index]; + Ok(Some(( + entry.external_object.as_ref(), + self.get_data(entry)?, + ))) + } + + pub fn lookup_data(&self, ostree_id: &Sha256Digest) -> Result>> { + match self.lookup(ostree_id)? { + Some((None, data)) => Ok(Some(data)), + _ => Ok(None), + } + } + + pub fn iter( + &self, + ) -> impl Iterator, Result<&AlignedSlice>)> { + self.map + .iter() + .map(|e| (&e.ostree_id, e.external_object.as_ref(), self.get_data(e))) + } + + fn create_filesystem_file( + &self, + fs: &mut FileSystem, + file_cache: &mut HashMap, + id: &Sha256Digest, + ) -> Result { + // Make hardlinks for files that are already used + if let Some(&leaf_id) = file_cache.get(id) { + return Ok(leaf_id); + } + + let (maybe_obj_id, file_header) = self.lookup(id)?.ok_or_else(|| { + anyhow!( + "Unexpectedly missing ostree file object {}", + hex::encode(id) + ) + })?; + + let (sized_data, remaining_data) = split_sized_variant(file_header)?; + + let file_header = OstreeFileHeader::from_zlib_sized(sized_data)?; + let xattrs = xattrs_to_btreemap(&file_header.xattrs); + + let stat = Stat { + st_mode: file_header.mode, + st_uid: file_header.uid, + st_gid: file_header.gid, + st_mtim_sec: 0, + st_mtim_nsec: 0, + xattrs, + }; + + let content = if (stat.st_mode & S_IFMT) == S_IFLNK { + LeafContent::Symlink(OsStr::new(&file_header.symlink_target).into()) + } else { + let file = if let Some(obj_id) = maybe_obj_id { + if !remaining_data.is_empty() { + bail!("Unexpected trailing file data"); + } + RegularFile::External(obj_id.clone(), file_header.size) + } else { + RegularFile::Inline(remaining_data.into()) + }; + LeafContent::Regular(file) + }; + + let leaf_id = fs.push_leaf(stat, content); + file_cache.insert(*id, leaf_id); + Ok(leaf_id) + } + + fn parse_dirmeta(&self, dirmeta_id: &Sha256Digest) -> Result { + let (_obj_id, dirmeta) = self.lookup(dirmeta_id)?.ok_or_else(|| { + anyhow!( + "Unexpectedly missing ostree dirmeta object {}", + hex::encode(dirmeta_id) + ) + })?; + + let dirmeta_sha = Sha256::digest(dirmeta); + if *dirmeta_sha != *dirmeta_id { + bail!( + "Invalid dirmeta checksum {:?}, expected {:?}", + dirmeta_sha, + dirmeta_id + ); + } + + let dm = OstreeDirMeta::from_data(dirmeta)?; + let xattrs = xattrs_to_btreemap(&dm.xattrs); + + Ok(Stat { + st_mode: dm.mode, + st_uid: dm.uid, + st_gid: dm.gid, + st_mtim_sec: 0, + st_mtim_nsec: 0, + xattrs, + }) + } + + fn create_filesystem_dir( + &self, + fs: &mut FileSystem, + file_cache: &mut HashMap, + dirtree_id: &Sha256Digest, + ) -> Result, Inode)>> { + let (_obj_id, dirtree) = self.lookup(dirtree_id)?.ok_or_else(|| { + anyhow!( + "Unexpectedly missing ostree dirtree object {}", + hex::encode(dirtree_id) + ) + })?; + + let tree = OstreeDirTree::from_data(dirtree)?; + + let mut entries = Vec::<(Box, Inode)>::new(); + + for (name, checksum) in &tree.files { + let leaf_id = self.create_filesystem_file(fs, file_cache, checksum)?; + entries.push((OsStr::new(name).into(), Inode::leaf(leaf_id))); + } + + for (name, tree_checksum, meta_checksum) in &tree.dirs { + let stat = self.parse_dirmeta(meta_checksum)?; + let mut subdir = Directory::new(stat); + for (name, inode) in self.create_filesystem_dir(fs, file_cache, tree_checksum)? { + subdir.insert(&name, inode); + } + + entries.push(( + OsStr::new(name.as_str()).into(), + Inode::Directory(Box::new(subdir)), + )); + } + + Ok(entries) + } + + /// Create a tree::Filesystem for the commit + pub fn create_filesystem(&self) -> Result> { + let commit = self + .lookup_data(&self.commit_id)? + .ok_or_else(|| anyhow!("Unexpectedly missing commit object"))?; + + let commit = OstreeCommit::from_data(commit)?; + + let stat = self.parse_dirmeta(&commit.root_metadata)?; + + let mut fs = FileSystem::::new(stat); + + let mut file_cache = HashMap::new(); + for (name, inode) in + self.create_filesystem_dir(&mut fs, &mut file_cache, &commit.root_tree)? + { + fs.root.insert(&name, inode); + } + + Ok(fs) + } +} diff --git a/crates/composefs-ostree/src/lib.rs b/crates/composefs-ostree/src/lib.rs new file mode 100644 index 00000000..e292188c --- /dev/null +++ b/crates/composefs-ostree/src/lib.rs @@ -0,0 +1,341 @@ +//! OSTree support for composefs repositories. +//! +//! This crate enables importing images from OSTree repositories into a composefs +//! repository, where they can be mounted as composefs images and share storage +//! (deduplication) with OCI or other image types. +//! +//! The main entry points are [`pull_local()`] and [`pull()`], which fetch an +//! ostree commit (from a local path or HTTP URL respectively), store it as a +//! splitstream, and produce an EROFS image that can be mounted. Additional +//! functions provide reference management ([`tag`]/[`untag`]), listing +//! ([`list_commits`]), and inspection ([`inspect`]). +//! + +use anyhow::{Context, Result, bail}; +use rustix::fs::{AtFlags, CWD, Dir, OFlags, readlinkat, unlinkat}; +use std::{path::Path, sync::Arc}; + +use composefs::{ + fsverity::FsVerityHashValue, repository::Repository, tree::FileSystem, util::parse_sha256, +}; + +/// Information about a stored ostree commit. +#[derive(Debug, Clone)] +pub struct CommitInfo { + /// The decoded ref name (e.g. "fedora/40/x86_64") + pub name: String, + /// The ostree commit ID (hex) + pub commit_id: String, +} + +mod commit; +mod ostree; +mod pull; +mod repo; + +use crate::commit::{CommitReader, CommitWriter}; +use crate::pull::PullOperation; +pub use crate::pull::PullStats; +use crate::repo::{LocalRepo, RemoteRepo}; + +const OSTREE_REF_PREFIX: &str = "ostree/"; +const IMAGE_REF_KEY: &str = "composefs.image"; + +fn ostree_ref_path(name: &str) -> String { + format!( + "{OSTREE_REF_PREFIX}{}", + name.replace('%', "%25").replace('/', "%2F") + ) +} + +/// Pull from a local ostree repo into the repository. +/// +/// Automatically creates the EROFS image and links it from the commit splitstream. +/// `ostree_ref` can be either a ref name or a 64-character hex commit ID. +pub async fn pull_local( + repo: &Arc>, + ostree_repo_path: &Path, + ostree_ref: &str, + base_reference: Option<&str>, +) -> Result<(ObjectID, PullStats)> { + let ostree_repo = LocalRepo::open_path(repo, CWD, ostree_repo_path)?; + + let (commit_checksum, reference) = if is_commit_id(ostree_ref) { + (parse_sha256(ostree_ref)?, None) + } else { + let checksum = ostree_repo.read_ref(ostree_ref)?; + (checksum, Some(ostree_ref_path(ostree_ref))) + }; + + let mut op = PullOperation::>::new(repo, ostree_repo); + if let Some(base_name) = base_reference { + let base_ref = format!("refs/{}", ostree_ref_path(base_name)); + op.add_base(&base_ref)?; + } + + let (verity, stats) = op + .pull_commit(&commit_checksum, reference.as_deref()) + .await?; + ensure_ostree_erofs(repo, &stats.commit_id)?; + Ok((verity, stats)) +} + +/// Pull from a remote ostree repo into the repository. +/// +/// Automatically creates the EROFS image and links it from the commit splitstream. +/// `ostree_ref` can be either a ref name or a 64-character hex commit ID. +pub async fn pull( + repo: &Arc>, + ostree_repo_url: &str, + ostree_ref: &str, + base_reference: Option<&str>, +) -> Result<(ObjectID, PullStats)> { + let ostree_repo = RemoteRepo::new(repo, ostree_repo_url)?; + + let (commit_checksum, reference) = if is_commit_id(ostree_ref) { + (parse_sha256(ostree_ref)?, None) + } else { + let checksum = ostree_repo.resolve_ref(ostree_ref).await?; + (checksum, Some(ostree_ref_path(ostree_ref))) + }; + + let mut op = PullOperation::>::new(repo, ostree_repo); + if let Some(base_name) = base_reference { + let base_ref = format!("refs/{}", ostree_ref_path(base_name)); + op.add_base(&base_ref)?; + } + + let (verity, stats) = op + .pull_commit(&commit_checksum, reference.as_deref()) + .await?; + ensure_ostree_erofs(repo, &stats.commit_id)?; + Ok((verity, stats)) +} + +fn is_hex(s: &str) -> bool { + !s.is_empty() && s.bytes().all(|b| b.is_ascii_hexdigit()) +} + +/// Returns `true` if the string is a full 64-character hex commit ID. +pub fn is_commit_id(s: &str) -> bool { + s.len() == 64 && is_hex(s) +} + +/// Finds the unique ostree commit stream whose commit ID starts with `prefix`. +fn resolve_commit_prefix( + repo: &Repository, + prefix: &str, +) -> Result { + let stream_prefix = format!("ostree-commit-{prefix}"); + + if prefix.len() == 64 { + if repo.has_stream(&stream_prefix)?.is_some() { + return Ok(stream_prefix); + } + bail!("ostree commit {prefix} not found in repository"); + } + + let dir_fd = rustix::fs::openat( + repo.repo_fd(), + "streams", + OFlags::RDONLY | OFlags::DIRECTORY, + rustix::fs::Mode::empty(), + )?; + let mut match_name: Option = None; + for item in Dir::read_from(&dir_fd)? { + let entry = item?; + let name = entry.file_name().to_bytes(); + if let Ok(s) = std::str::from_utf8(name) + && s.starts_with(&stream_prefix) + { + if match_name.is_some() { + bail!("ambiguous commit ID prefix '{prefix}'"); + } + match_name = Some(s.to_string()); + } + } + + match_name + .ok_or_else(|| anyhow::anyhow!("ostree commit prefix '{prefix}' not found in repository")) +} + +/// Resolves a source (either an ostree ref name or a commit ID / prefix) to a stream content_id. +/// +/// Validates that the resolved stream exists in the repository. +fn resolve_source( + repo: &Repository, + source: &str, +) -> Result { + // Try as an ostree ref first + let ref_path = format!("streams/refs/{}", ostree_ref_path(source)); + if let Ok(target) = readlinkat(repo.repo_fd(), ref_path.as_str(), vec![]) + && let Ok(target_str) = target.into_string() + { + let content_id = target_str + .rsplit('/') + .next() + .unwrap_or(&target_str) + .to_string(); + if repo.has_stream(&content_id)?.is_none() { + bail!("ref '{source}' points to missing stream '{content_id}'"); + } + return Ok(content_id); + } + + // Try as a commit ID or prefix + if !is_hex(source) || source.len() > 64 { + bail!("'{source}' is not a known ostree ref or valid commit ID"); + } + + resolve_commit_prefix(repo, source) +} + +/// Tags an ostree commit with a named reference. +/// +/// The `source` can be either an existing ostree ref name or a hex-encoded +/// ostree commit checksum. Creates a ref at `refs/ostree/{name}` pointing +/// to the commit's stream. +pub fn tag( + repo: &Repository, + source: &str, + name: &str, +) -> Result<()> { + let content_id = resolve_source(repo, source)?; + let ref_name = ostree_ref_path(name); + repo.name_stream(&content_id, &ref_name) +} + +/// Removes a named ostree reference. +/// +/// The commit data is not deleted; it becomes eligible for garbage collection +/// if no other references point to it. +pub fn untag(repo: &Repository, name: &str) -> Result<()> { + let ref_path = format!("streams/refs/{}", ostree_ref_path(name)); + unlinkat(repo.repo_fd(), &ref_path, AtFlags::empty()) + .with_context(|| format!("Failed to remove tag {name}"))?; + Ok(()) +} + +fn decode_ref(encoded: &str) -> String { + encoded.replace("%2F", "/").replace("%25", "%") +} + +/// Lists all ostree commits stored in the repository. +pub fn list_commits( + repo: &Repository, +) -> Result> { + let commits = repo + .list_stream_refs(OSTREE_REF_PREFIX)? + .into_iter() + .filter_map(|(encoded_name, target)| { + let target_name = target.rsplit('/').next().unwrap_or(&target); + target_name + .strip_prefix("ostree-commit-") + .map(|commit_id| CommitInfo { + name: decode_ref(&encoded_name), + commit_id: commit_id.to_string(), + }) + }) + .collect(); + + Ok(commits) +} + +/// Ensures the EROFS image exists for a commit and stores a named ref to it +/// in the commit splitstream. +/// +/// `source` can be an ostree ref name or a commit ID / prefix. +pub fn ensure_ostree_erofs( + repo: &Arc>, + source: &str, +) -> Result { + let content_id = resolve_source(repo, source)?; + + let ss = repo.open_stream(&content_id, None, None)?; + if let Some(id) = ss.lookup_named_ref(IMAGE_REF_KEY) { + return Ok(id.clone()); + } + + let commit = CommitReader::::load(repo, &content_id)?; + let fs = commit.create_filesystem()?; + let image_id = fs.commit_image(repo, None)?; + + let writer = CommitWriter::from_reader(&commit)?; + writer.serialize(repo, &content_id, None, Some(&image_id))?; + + Ok(image_id) +} + +/// Returns the EROFS image ObjectID stored in a commit's splitstream. +/// +/// The `source` can be an ostree ref name or a commit ID / prefix. +pub fn get_image_ref( + repo: &Repository, + source: &str, +) -> Result { + let content_id = resolve_source(repo, source)?; + let ss = repo.open_stream(&content_id, None, None)?; + ss.lookup_named_ref(IMAGE_REF_KEY) + .cloned() + .ok_or_else(|| anyhow::anyhow!("no composefs image linked to '{source}' — try re-pulling")) +} + +/// Creates a filesystem from the given OSTree commit. +/// +/// The `commit_name` is looked up as a ref under `refs/ostree/`. +pub fn create_filesystem( + repo: &Repository, + commit_name: &str, +) -> Result> { + let ref_path = format!("refs/{}", ostree_ref_path(commit_name)); + let commit = CommitReader::::load(repo, &ref_path)?; + let fs = commit.create_filesystem()?; + + Ok(fs) +} + +/// Prints the contents of an ostree commit object. +/// +/// `source` can be an ostree ref name or a commit ID / prefix. +/// If `metadata_only` is true, only the commit metadata key-value pairs are printed. +pub fn inspect( + repo: &Repository, + source: &str, + metadata_only: bool, +) -> Result<()> { + let content_id = resolve_source(repo, source)?; + let reader = CommitReader::::load(repo, &content_id)?; + let commit_id = reader.commit_id(); + + let commit_data = reader + .lookup_data(&commit_id)? + .ok_or_else(|| anyhow::anyhow!("commit object not found in stream"))?; + let commit = crate::ostree::OstreeCommit::from_data(commit_data)?; + + if metadata_only { + for (key, value) in &commit.metadata { + println!("{key}={value}"); + } + return Ok(()); + } + + println!("commit {}", hex::encode(commit_id)); + if let Some(parent) = &commit.parent_commit { + println!("parent {}", hex::encode(parent)); + } + if let Some(dt) = chrono::DateTime::from_timestamp(commit.timestamp as i64, 0) { + println!("date {}", dt.format("%Y-%m-%d %H:%M:%S UTC")); + } else { + println!("date {}", commit.timestamp); + } + println!("tree {}", hex::encode(commit.root_tree)); + println!("dirmeta {}", hex::encode(commit.root_metadata)); + if !commit.subject.is_empty() { + println!("\n{}", commit.subject); + } + if !commit.body.is_empty() { + println!("\n{}", commit.body); + } + + Ok(()) +} diff --git a/crates/composefs-ostree/src/ostree.rs b/crates/composefs-ostree/src/ostree.rs new file mode 100644 index 00000000..f509d26d --- /dev/null +++ b/crates/composefs-ostree/src/ostree.rs @@ -0,0 +1,364 @@ +//! Core ostree on-disk format types and gvariant deserialization. +//! +//! Defines the Rust representations of ostree objects (commits, directory +//! trees, directory metadata, file headers) and their gvariant wire formats. + +use anyhow::{Result, anyhow}; +use gvariant::aligned_bytes::{A8, AlignedBuf, AlignedSlice, AsAligned, TryAsAligned}; +use gvariant::{Marker, Structure, gv}; +use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; + +use composefs::{fsverity::FsVerityHashValue, util::Sha256Digest}; + +#[derive(Debug, PartialEq, Copy, Clone)] +pub(crate) enum RepoMode { + Bare, + Archive, + BareUser, + BareUserOnly, + BareSplitXAttrs, +} + +impl std::str::FromStr for RepoMode { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + match s { + "bare" => Ok(RepoMode::Bare), + "archive" | "archive-z2" => Ok(RepoMode::Archive), + "bare-user" => Ok(RepoMode::BareUser), + "bare-user-only" => Ok(RepoMode::BareUserOnly), + "bare-split-xattrs" => Ok(RepoMode::BareSplitXAttrs), + _ => Err(anyhow!("Unsupported repo mode {}", s)), + } + } +} + +#[allow(dead_code)] +#[derive(Debug, PartialEq, Copy, Clone)] +pub(crate) enum ObjectType { + File, + DirTree, + DirMeta, + Commit, + TombstoneCommit, + PayloadLink, + FileXAttrs, + FileXAttrsLink, +} + +impl ObjectType { + pub fn extension(&self, repo_mode: RepoMode) -> &'static str { + match self { + ObjectType::File => { + if repo_mode == RepoMode::Archive { + ".filez" + } else { + ".file" + } + } + ObjectType::DirTree => ".dirtree", + ObjectType::DirMeta => ".dirmeta", + ObjectType::Commit => ".commit", + ObjectType::TombstoneCommit => ".commit-tombstone", + ObjectType::PayloadLink => ".payload-link", + ObjectType::FileXAttrs => ".file-xattrs", + ObjectType::FileXAttrsLink => ".file-xattrs-link", + } + } +} + +pub(crate) fn get_object_pathname( + mode: RepoMode, + checksum: &Sha256Digest, + object_type: ObjectType, +) -> String { + format!( + "{:02x}/{}{}", + checksum[0], + hex::encode(&checksum[1..]), + object_type.extension(mode) + ) +} + +pub(crate) fn should_inline_file(file_size: usize) -> bool { + file_size <= size_of::() * 2 +} + +/// On-disk header prefixed to gvariant data in ostree objects +#[derive(Debug, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C)] +pub(crate) struct SizedVariantHeader { + size: u32, + padding: u32, +} + +pub(crate) fn size_prefix(data: &[u8]) -> AlignedBuf { + let mut buf = AlignedBuf::new(); + let svh = SizedVariantHeader { + size: u32::to_be(u32::try_from(data.len()).expect("data exceeds u32::MAX")), + padding: 0, + }; + buf.with_vec(|v| { + v.extend_from_slice(svh.as_bytes()); + v.extend_from_slice(data); + }); + buf +} + +pub(crate) fn get_sized_variant_size(data: &[u8]) -> Result { + let variant_header_size = size_of::(); + let header_data = data + .get(..variant_header_size) + .ok_or_else(|| anyhow!("Sized variant too small"))?; + + let aligned: AlignedBuf = header_data.to_vec().into(); + let h = SizedVariantHeader::ref_from_bytes(&aligned) + .map_err(|e| anyhow!("Sized variant header: {:?}", e))?; + Ok(u32::from_be(h.size) as usize) +} + +pub(crate) fn split_sized_variant(data: &AlignedSlice) -> Result<(&AlignedSlice, &[u8])> { + let variant_size = get_sized_variant_size(data)?; + let header_size = size_of::(); + let total_size = header_size + .checked_add(variant_size) + .ok_or_else(|| anyhow!("Sized variant overflow"))?; + + let sized_data: &AlignedSlice = data + .get(..total_size) + .ok_or_else(|| anyhow!("Sized variant too small"))? + .try_as_aligned() + .map_err(|_| anyhow!("Sized variant data not aligned"))?; + let remaining_data = data + .get(total_size..) + .ok_or_else(|| anyhow!("Sized variant too small"))?; + + Ok((sized_data, remaining_data)) +} + +/// Decoded ostree file header (uid, gid, mode, xattrs, symlink target). +pub(crate) struct OstreeFileHeader { + pub size: u64, + pub uid: u32, + pub gid: u32, + pub mode: u32, + pub symlink_target: String, + pub xattrs: Vec<(Vec, Vec)>, +} + +impl OstreeFileHeader { + pub fn from_zlib_sized(data: &AlignedSlice) -> Result { + let header_size = size_of::(); + let variant_data: &AlignedSlice = data + .get(header_size..) + .ok_or_else(|| anyhow!("Zlib file header too small"))? + .try_as_aligned() + .map_err(|_| anyhow!("Zlib file header not aligned"))?; + + let gv = gv!("(tuuuusa(ayay))").cast(variant_data.as_aligned()); + let (size, uid, gid, mode, _zero, symlink_target, xattrs_data) = gv.to_tuple(); + let xattrs = xattrs_data + .iter() + .map(|x| { + let (key, value) = x.to_tuple(); + (key.to_vec(), value.to_vec()) + }) + .collect(); + Ok(OstreeFileHeader { + size: u64::from_be(*size), + uid: u32::from_be(*uid), + gid: u32::from_be(*gid), + mode: u32::from_be(*mode), + symlink_target: symlink_target.to_str().to_string(), + xattrs, + }) + } + + fn xattrs_ref(&self) -> Vec<(&[u8], &[u8])> { + self.xattrs + .iter() + .map(|(k, v)| (k.as_slice(), v.as_slice())) + .collect() + } + + /// Serializes as the "zlib" format used in archive-mode .filez objects, + /// which includes the file size, prefixed with a SizedVariantHeader. + pub fn serialize_zlib_sized(&self) -> AlignedBuf { + let xattrs = self.xattrs_ref(); + let data = gv!("(tuuuusa(ayay))").serialize_to_vec(&( + u64::to_be(self.size), + u32::to_be(self.uid), + u32::to_be(self.gid), + u32::to_be(self.mode), + u32::to_be(0), + &self.symlink_target, + &xattrs, + )); + size_prefix(&data) + } + + /// Serializes as the "regular" format used for ostree content checksums, + /// which omits the file size, prefixed with a SizedVariantHeader. + pub fn serialize_regular_sized(&self) -> AlignedBuf { + let xattrs = self.xattrs_ref(); + let data = gv!("(uuuusa(ayay))").serialize_to_vec(&( + u32::to_be(self.uid), + u32::to_be(self.gid), + u32::to_be(self.mode), + u32::to_be(0), + &self.symlink_target, + &xattrs, + )); + size_prefix(&data) + } +} + +/// Decoded ostree directory metadata (uid, gid, mode, xattrs). +pub(crate) struct OstreeDirMeta { + pub uid: u32, + pub gid: u32, + pub mode: u32, + pub xattrs: Vec<(Vec, Vec)>, +} + +impl OstreeDirMeta { + pub fn from_data(data: &AlignedSlice) -> Result { + let gv = gv!("(uuua(ayay))").cast(data.as_aligned()); + let (uid, gid, mode, xattrs_data) = gv.to_tuple(); + let xattrs = xattrs_data + .iter() + .map(|x| { + let (key, value) = x.to_tuple(); + (key.to_vec(), value.to_vec()) + }) + .collect(); + Ok(OstreeDirMeta { + uid: u32::from_be(*uid), + gid: u32::from_be(*gid), + mode: u32::from_be(*mode), + xattrs, + }) + } +} + +pub(crate) fn parse_xattr_data(data: &AlignedSlice) -> Result, Vec)>> { + let gv = gv!("a(ayay)").cast(data.as_aligned()); + Ok(gv + .iter() + .map(|x| { + let (key, value) = x.to_tuple(); + (key.to_vec(), value.to_vec()) + }) + .collect()) +} + +/// Decoded ostree directory tree listing files and subdirectories. +pub(crate) struct OstreeDirTree { + pub files: Vec<(String, Sha256Digest)>, + pub dirs: Vec<(String, Sha256Digest, Sha256Digest)>, +} + +impl OstreeDirTree { + pub fn from_data(data: &AlignedSlice) -> Result { + let gv = gv!("(a(say)a(sayay))").cast(data.as_aligned()); + let (files_data, dirs_data) = gv.to_tuple(); + + let files = files_data + .iter() + .map(|f| { + let (name, checksum) = f.to_tuple(); + Ok((name.to_str().to_string(), checksum.try_into()?)) + }) + .collect::>>()?; + + let dirs = dirs_data + .iter() + .map(|d| { + let (name, tree_checksum, meta_checksum) = d.to_tuple(); + Ok(( + name.to_str().to_string(), + tree_checksum.try_into()?, + meta_checksum.try_into()?, + )) + }) + .collect::>>()?; + + Ok(OstreeDirTree { files, dirs }) + } +} + +/// Decoded ostree commit object with metadata, tree root, and optional parent. +pub(crate) struct OstreeCommit { + pub parent_commit: Option, + pub metadata: Vec<(String, String)>, + pub subject: String, + pub body: String, + pub timestamp: u64, + pub root_tree: Sha256Digest, + pub root_metadata: Sha256Digest, +} + +fn format_variant(v: &gvariant::Variant) -> String { + if let Some(s) = v.get(gv!("s")) { + return s.to_str().to_string(); + } + if let Some(b) = v.get(gv!("b")) { + return bool::from(*b).to_string(); + } + if let Some(u) = v.get(gv!("u")) { + return u.to_string(); + } + if let Some(t) = v.get(gv!("t")) { + return t.to_string(); + } + if let Some(arr) = v.get(gv!("as")) { + let items: Vec<&str> = arr.iter().map(|s| s.to_str()).collect(); + return format!("[{}]", items.join(", ")); + } + if let Some(ay) = v.get(gv!("ay")) { + return hex::encode(ay); + } + let (typestr, data) = v.split(); + format!( + "<{}:{}>", + std::str::from_utf8(typestr).unwrap_or("?"), + hex::encode(data) + ) +} + +impl OstreeCommit { + pub fn from_data(data: &AlignedSlice) -> Result { + let gv = gv!("(a{sv}aya(say)sstayay)").cast(data.as_aligned()); + let ( + metadata_data, + parent_checksum, + _related_objects, + subject, + body, + timestamp, + root_tree, + root_metadata, + ) = gv.to_tuple(); + + let parent_commit: Option = parent_checksum.try_into().ok(); + + let metadata = metadata_data + .iter() + .map(|entry| { + let (key, value) = entry.to_tuple(); + (key.to_str().to_string(), format_variant(value)) + }) + .collect(); + + Ok(OstreeCommit { + parent_commit, + metadata, + subject: subject.to_str().to_string(), + body: body.to_str().to_string(), + timestamp: u64::from_be(*timestamp), + root_tree: root_tree.try_into()?, + root_metadata: root_metadata.try_into()?, + }) + } +} diff --git a/crates/composefs-ostree/src/pull.rs b/crates/composefs-ostree/src/pull.rs new file mode 100644 index 00000000..d2b285cb --- /dev/null +++ b/crates/composefs-ostree/src/pull.rs @@ -0,0 +1,395 @@ +//! Orchestrates pulling ostree commits into a composefs repository. +//! +//! A pull proceeds in two phases: first all metadata objects (commits, dirtrees, +//! dirmetas) are fetched with high concurrency, then file content objects are +//! fetched. Previously-pulled commits can serve as a base to avoid re-fetching +//! shared objects. + +use anyhow::{Result, bail}; +use composefs::{fsverity::FsVerityHashValue, repository::Repository, util::Sha256Digest}; +use gvariant::aligned_bytes::AlignedBuf; +use indicatif::ProgressBar; +use sha2::{Digest, Sha256}; +use std::collections::{HashSet, VecDeque}; +use std::{fmt, sync::Arc}; +use tokio::sync::Semaphore; +use tokio::task::JoinSet; + +use crate::commit::{CommitReader, CommitWriter}; +use crate::ostree::{ObjectType, OstreeCommit, OstreeDirTree}; +use crate::repo::OstreeRepo; + +/// Statistics from a pull operation. +#[derive(Debug, Default)] +pub struct PullStats { + /// The ostree commit ID (hex). + pub commit_id: String, + /// Number of metadata objects (commits, dirtrees, dirmetas) fetched. + pub metadata_fetched: usize, + /// Number of file objects fetched. + pub files_fetched: usize, +} + +const MAX_CONCURRENT_METADATA_FETCHES: usize = 32; +const MAX_CONCURRENT_CONTENT_FETCHES: usize = 8; + +struct Outstanding { + id: Sha256Digest, + obj_type: ObjectType, +} + +impl fmt::Debug for Outstanding { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Outstanding") + .field("id", &hex::encode(self.id)) + .field("obj_type", &self.obj_type) + .finish() + } +} + +enum FetchResult { + Metadata { + id: Sha256Digest, + obj_type: ObjectType, + data: AlignedBuf, + }, + File { + id: Sha256Digest, + file_header: AlignedBuf, + obj_id: Option, + }, +} + +/// Drives a two-phase pull of an ostree commit into a composefs repository. +#[derive(Debug)] +pub(crate) struct PullOperation> { + repo: Arc>, + writer: CommitWriter, + ostree_repo: Arc, + base_commits: Vec>, + outstanding: VecDeque, + // All ids that were ever enqueued (including already fetched and currently being fetched) + fetched: HashSet, + stats: PullStats, +} + +impl + 'static> + PullOperation +{ + pub fn new(repo: &Arc>, ostree_repo: RepoType) -> Self { + PullOperation { + repo: repo.clone(), + writer: CommitWriter::::new(), + ostree_repo: Arc::new(ostree_repo), + outstanding: VecDeque::new(), + base_commits: vec![], + fetched: HashSet::new(), + stats: PullStats::default(), + } + } + + pub fn add_base(&mut self, base_name: &str) -> Result<()> { + let base = CommitReader::::load(&self.repo, base_name)?; + self.base_commits.push(base); + Ok(()) + } + + fn enqueue_fetch(&mut self, id: &Sha256Digest, obj_type: ObjectType) { + // To avoid fetching twice, even if the id is not in the outstanding list + // (for example we may be currenly downloading it) we keep all ids we ever + // fetch in a map + if self.fetched.contains(id) { + return; + } + self.fetched.insert(*id); + // We request metadata objects first + if obj_type == ObjectType::File { + self.outstanding + .push_back(Outstanding { id: *id, obj_type }); + } else { + self.outstanding + .push_front(Outstanding { id: *id, obj_type }); + } + } + + fn insert_commit(&mut self, id: &Sha256Digest, data: &[u8]) { + self.writer.insert(id, None, data); + self.writer.set_commit_id(id); + } + + fn insert_dirmeta(&mut self, id: &Sha256Digest, data: &[u8]) { + self.writer.insert(id, None, data); + } + + fn insert_dirtree(&mut self, id: &Sha256Digest, data: &[u8]) { + self.writer.insert(id, None, data); + } + + fn insert_file( + &mut self, + id: &Sha256Digest, + obj_id: Option<&ObjectID>, + file_header: AlignedBuf, + ) { + self.writer.insert(id, obj_id, &file_header); + } + + fn maybe_fetch_file(&mut self, id: &Sha256Digest) -> Result<()> { + if self.writer.contains(id) { + return Ok(()); + } + + for base in self.base_commits.iter() { + if let Some((obj_id, file_header)) = base.lookup(id)? { + self.add_file(id, obj_id.cloned().as_ref(), file_header.to_owned()); + return Ok(()); + } + } + + self.enqueue_fetch(id, ObjectType::File); + Ok(()) + } + + fn add_file(&mut self, id: &Sha256Digest, obj_id: Option<&ObjectID>, file_header: AlignedBuf) { + self.insert_file(id, obj_id, file_header); + } + + fn maybe_fetch_dirmeta(&mut self, id: &Sha256Digest) -> Result<()> { + if self.writer.contains(id) { + return Ok(()); + } + + for base in self.base_commits.iter() { + if let Some(dirmeta) = base.lookup_data(id)? { + self.add_dirmeta(id, dirmeta.to_owned()); + return Ok(()); + } + } + + self.enqueue_fetch(id, ObjectType::DirMeta); + Ok(()) + } + + fn add_dirmeta(&mut self, id: &Sha256Digest, data: AlignedBuf) { + self.insert_dirmeta(id, &data); + } + + fn maybe_fetch_dirtree(&mut self, id: &Sha256Digest) -> Result<()> { + if self.writer.contains(id) { + return Ok(()); + } + + for base in self.base_commits.iter() { + if let Some(dirtree) = base.lookup_data(id)? { + return self.add_dirtree(id, dirtree.to_owned()); + } + } + + self.enqueue_fetch(id, ObjectType::DirTree); + + Ok(()) + } + + fn add_dirtree(&mut self, id: &Sha256Digest, buf: AlignedBuf) -> Result<()> { + let dirtree = OstreeDirTree::from_data(&buf)?; + + for (_name, checksum) in &dirtree.files { + self.maybe_fetch_file(checksum)?; + } + + for (_name, tree_checksum, meta_checksum) in &dirtree.dirs { + self.maybe_fetch_dirmeta(meta_checksum)?; + self.maybe_fetch_dirtree(tree_checksum)?; + } + + self.insert_dirtree(id, &buf); + Ok(()) + } + + fn add_commit(&mut self, id: &Sha256Digest, buf: AlignedBuf) -> Result<()> { + let commit = OstreeCommit::from_data(&buf)?; + + if let Some(parent_id) = &commit.parent_commit { + let parent_stream = format!("ostree-commit-{}", hex::encode(parent_id)); + if self.repo.has_stream(&parent_stream)?.is_some() + && !self + .base_commits + .iter() + .any(|b| b.commit_id() == *parent_id) + { + let base = CommitReader::::load(&self.repo, &parent_stream)?; + self.base_commits.push(base); + } + } + + self.maybe_fetch_dirmeta(&commit.root_metadata)?; + self.maybe_fetch_dirtree(&commit.root_tree)?; + + self.insert_commit(id, &buf); + + Ok(()) + } + + fn process_metadata( + &mut self, + id: &Sha256Digest, + obj_type: ObjectType, + data: AlignedBuf, + ) -> Result<()> { + let data_sha = Sha256::digest(&*data); + if *data_sha != *id { + bail!( + "Invalid {:?} checksum {:?}, expected {:?}", + obj_type, + data_sha, + id + ); + } + match obj_type { + ObjectType::Commit => self.add_commit(id, data), + ObjectType::DirTree => self.add_dirtree(id, data), + ObjectType::DirMeta => { + self.add_dirmeta(id, data); + Ok(()) + } + _ => bail!("Unexpected metadata object type {:?}", obj_type), + } + } + + fn pop_metadata(&mut self) -> Option { + match self.outstanding.front() { + Some(front) if front.obj_type != ObjectType::File => self.outstanding.pop_front(), + _ => None, + } + } + + fn drain_files(&mut self) -> Vec { + let files: Vec<_> = self.outstanding.drain(..).collect(); + debug_assert!(files.iter().all(|f| f.obj_type == ObjectType::File)); + files + } + + pub async fn pull_commit( + &mut self, + commit_id: &Sha256Digest, + reference: Option<&str>, + ) -> Result<(ObjectID, PullStats)> { + let commit_hex = hex::encode(commit_id); + let content_id = format!("ostree-commit-{commit_hex}"); + if let Some(objid) = self.repo.has_stream(&content_id)? { + return Ok(( + objid, + PullStats { + commit_id: commit_hex, + ..Default::default() + }, + )); + } + self.stats.commit_id = commit_hex; + + self.enqueue_fetch(commit_id, ObjectType::Commit); + + // TODO: Support deltas + + let metadata_bar = ProgressBar::new_spinner().with_message("Fetching metadata"); + let metadata_semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_METADATA_FETCHES)); + + // Phase 1: Fetch all metadata (commits, dirtrees, dirmetas) in parallel. + // Processing results may discover new metadata to fetch, so we loop + // until the queue is drained and all in-flight fetches have completed. + let mut join_set: JoinSet>> = JoinSet::new(); + + loop { + while let Some(item) = self.pop_metadata() { + let permit = match metadata_semaphore.clone().try_acquire_owned() { + Ok(permit) => permit, + Err(_) => { + self.outstanding.push_front(item); + break; + } + }; + let ostree_repo = self.ostree_repo.clone(); + join_set.spawn(async move { + let data = ostree_repo.fetch_object(&item.id, item.obj_type).await?; + drop(permit); + Ok(FetchResult::Metadata { + id: item.id, + obj_type: item.obj_type, + data, + }) + }); + } + + match join_set.join_next().await { + Some(result) => { + metadata_bar.tick(); + let fetch = result??; + match fetch { + FetchResult::Metadata { id, obj_type, data } => { + self.stats.metadata_fetched += 1; + self.process_metadata(&id, obj_type, data)?; + } + _ => unreachable!(), + } + } + None => break, + } + } + + metadata_bar.finish_and_clear(); + + // Phase 2: Fetch all files in parallel. Files are leaf objects with + // no dependencies on each other. + let files = self.drain_files(); + if files.is_empty() { + let commit_id = self + .writer + .serialize(&self.repo, &content_id, reference, None)?; + let stats = std::mem::take(&mut self.stats); + return Ok((commit_id, stats)); + } + + let files_bar = ProgressBar::new(files.len() as u64); + let content_semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_CONTENT_FETCHES)); + let mut join_set: JoinSet>> = JoinSet::new(); + + for item in files { + let ostree_repo = self.ostree_repo.clone(); + let permit = content_semaphore.clone().acquire_owned().await?; + join_set.spawn(async move { + let (file_header, obj_id) = ostree_repo.fetch_file(&item.id).await?; + drop(permit); + Ok(FetchResult::File { + id: item.id, + file_header, + obj_id, + }) + }); + } + + while let Some(result) = join_set.join_next().await { + let fetch = result??; + match fetch { + FetchResult::File { + id, + file_header, + obj_id, + } => { + self.stats.files_fetched += 1; + self.add_file(&id, obj_id.as_ref(), file_header); + files_bar.inc(1); + } + _ => unreachable!(), + } + } + + files_bar.finish(); + + let commit_id = self + .writer + .serialize(&self.repo, &content_id, reference, None)?; + let stats = std::mem::take(&mut self.stats); + + Ok((commit_id, stats)) + } +} diff --git a/crates/composefs-ostree/src/repo.rs b/crates/composefs-ostree/src/repo.rs new file mode 100644 index 00000000..1462363b --- /dev/null +++ b/crates/composefs-ostree/src/repo.rs @@ -0,0 +1,509 @@ +//! Access layer for local and remote ostree repositories. +//! +//! Provides the [`OstreeRepo`] trait for fetching objects and files, with +//! concrete implementations for local filesystem repos ([`LocalRepo`]) and +//! HTTP-served repos ([`RemoteRepo`]). + +use anyhow::{Context, Result, anyhow, bail}; +use configparser::ini::Ini; +use flate2::read::DeflateDecoder; +use gvariant::aligned_bytes::AlignedBuf; +use reqwest::{Client, Url}; +use rustix::fd::AsRawFd; +use rustix::fs::{FileType, Mode, OFlags, fstat, getxattr, listxattr, openat, readlinkat}; +use rustix::io::Errno; +use sha2::{Digest, Sha256}; +use std::ffi::CStr; +use std::mem::MaybeUninit; +use std::{ + fs::File, + future::Future, + io::{Read, empty}, + os::fd::{AsFd, OwnedFd}, + path::Path, + sync::Arc, +}; +use tokio::io::AsyncReadExt; +use tokio_stream::StreamExt; +use tokio_util::io::StreamReader; + +use composefs::{ + fsverity::FsVerityHashValue, + repository::Repository, + util::{ErrnoFilter, Sha256Digest, parse_sha256}, +}; + +use crate::ostree::{ + ObjectType, OstreeDirMeta, OstreeFileHeader, RepoMode, SizedVariantHeader, get_object_pathname, + get_sized_variant_size, parse_xattr_data, should_inline_file, +}; + +struct HashingReader<'a, R: Read> { + inner: &'a mut R, + hasher: &'a mut Sha256, +} + +impl Read for HashingReader<'_, R> { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let n = self.inner.read(buf)?; + if n > 0 { + self.hasher.update(&buf[..n]); + } + Ok(n) + } +} + +fn hash_and_store_file( + repo: &Repository, + header: &OstreeFileHeader, + mut file_data: AlignedBuf, + reader: &mut impl Read, + expected_checksum: &Sha256Digest, +) -> Result<(AlignedBuf, Option)> { + let mut hasher = Sha256::new(); + let sized_regular_header = header.serialize_regular_sized(); + hasher.update(&*sized_regular_header); + + let obj_id = if should_inline_file::(header.size as usize) { + let mut file_content = Vec::new(); + reader.read_to_end(&mut file_content)?; + hasher.update(&file_content); + file_data.with_vec(|v| v.extend_from_slice(&file_content)); + None + } else { + let hashing_reader = HashingReader { + inner: reader, + hasher: &mut hasher, + }; + let obj_id = repo.ensure_object_from_reader(hashing_reader, header.size)?; + Some(obj_id) + }; + + let actual_checksum = hasher.finalize(); + if *actual_checksum != *expected_checksum { + bail!( + "Unexpected file checksum {}, expected {}", + hex::encode(actual_checksum), + hex::encode(expected_checksum) + ); + } + + Ok((file_data, obj_id)) +} + +/// Abstraction over local and remote ostree repository access. +pub(crate) trait OstreeRepo: Send + Sync { + fn fetch_object( + &self, + checksum: &Sha256Digest, + object_type: ObjectType, + ) -> impl Future> + Send; + fn fetch_file( + &self, + checksum: &Sha256Digest, + ) -> impl Future)>> + Send; +} + +/// Fetches ostree objects over HTTP from an archive-z2 repository. +#[derive(Debug)] +pub(crate) struct RemoteRepo { + repo: Arc>, + client: Client, + url: Url, +} + +impl RemoteRepo { + pub fn new(repo: &Arc>, url: &str) -> Result { + Ok(RemoteRepo { + repo: repo.clone(), + client: Client::new(), + url: Url::parse(url)?, + }) + } + + fn url_for(&self, segments: &[&str]) -> Url { + let mut url = self.url.clone(); + url.path_segments_mut() + .expect("repo URL is not cannot-be-a-base") + .pop_if_empty() + .extend(segments); + url + } + + pub async fn resolve_ref(&self, ref_name: &str) -> Result { + // TODO: Support summary format + let url = self.url_for(&["refs", "heads", ref_name]); + + let response = self.client.get(url.clone()).send().await?; + response.error_for_status_ref()?; + let t = response + .text() + .await + .with_context(|| format!("Cannot get ostree ref at {}", url))?; + + Ok(parse_sha256(t.trim())?) + } +} + +impl OstreeRepo for RemoteRepo { + async fn fetch_object( + &self, + checksum: &Sha256Digest, + object_type: ObjectType, + ) -> Result { + let dir = format!("{:02x}", checksum[0]); + let name = format!( + "{}{}", + hex::encode(&checksum[1..]), + object_type.extension(RepoMode::Archive) + ); + let url = self.url_for(&["objects", &dir, &name]); + + let response = self.client.get(url.clone()).send().await?; + response.error_for_status_ref()?; + let b = response + .bytes() + .await + .with_context(|| format!("Cannot get ostree object at {}", url))?; + + Ok(b.to_vec().into()) + } + + async fn fetch_file(&self, checksum: &Sha256Digest) -> Result<(AlignedBuf, Option)> { + let dir = format!("{:02x}", checksum[0]); + let name = format!( + "{}{}", + hex::encode(&checksum[1..]), + ObjectType::File.extension(RepoMode::Archive) + ); + let url = self.url_for(&["objects", &dir, &name]); + + let response = self.client.get(url.clone()).send().await?; + response.error_for_status_ref()?; + + let byte_stream = response + .bytes_stream() + .map(|r| r.map_err(std::io::Error::other)); + let mut reader = StreamReader::new(byte_stream); + + // Read the sized variant header from the stream + let header_size = size_of::(); + let mut header_buf = vec![0u8; header_size]; + reader + .read_exact(&mut header_buf) + .await + .with_context(|| format!("Cannot read ostree file header at {}", url))?; + + let variant_size = get_sized_variant_size(&header_buf)?; + header_buf.resize(header_size + variant_size, 0u8); + reader + .read_exact(&mut header_buf[header_size..]) + .await + .with_context(|| format!("Cannot read ostree file variant at {}", url))?; + + let file_header: AlignedBuf = header_buf.into(); + + let header = OstreeFileHeader::from_zlib_sized(&file_header)?; + + let checksum = *checksum; + let repo = self.repo.clone(); + + // Convert the async stream to a sync reader for decompression + let sync_reader = tokio_util::io::SyncIoBridge::new(reader); + let mut decompressor = DeflateDecoder::new(sync_reader); + + tokio::task::spawn_blocking(move || { + hash_and_store_file(&repo, &header, file_header, &mut decompressor, &checksum) + }) + .await + .context("spawn_blocking failed")? + } +} + +fn proc_self_fd(fd: &impl AsFd) -> String { + format!("/proc/self/fd/{}", fd.as_fd().as_raw_fd()) +} + +// Returns empty string instead of None for non-symlinks to match the ostree metadata format +fn read_symlink_target(fd: &impl AsFd, is_symlink: bool) -> Result { + if is_symlink { + readlinkat(fd, "", [])? + .into_string() + .map_err(|_| anyhow!("symlink target is not valid UTF-8")) + } else { + Ok(String::new()) + } +} + +fn read_xattr_value(path: &str, name: &CStr) -> Result> { + let mut buffer = [MaybeUninit::new(0u8); 65536]; + let (value, _) = getxattr(path, name, &mut buffer)?; + Ok(value.to_vec()) +} + +fn read_xattrs_from_path(fd: &impl AsFd) -> Result, Vec)>> { + let filename = proc_self_fd(fd); + + let mut names_buf = [MaybeUninit::new(0); 65536]; + let (names, _) = listxattr(&filename, &mut names_buf)?; + + let mut xattrs = names + .split_inclusive(|c| *c == 0) + .map(|name| { + let name = CStr::from_bytes_with_nul(name)?; + let value = read_xattr_value(&filename, name)?; + Ok((name.to_bytes_with_nul().to_vec(), value)) + }) + .collect::>>()?; + + xattrs.sort_by(|a, b| a.0.cmp(&b.0)); + Ok(xattrs) +} + +/// Reads ostree objects from a local on-disk repository (any mode). +#[derive(Debug)] +pub(crate) struct LocalRepo { + repo: Arc>, + mode: RepoMode, + dir: OwnedFd, + objects: OwnedFd, +} + +impl LocalRepo { + pub fn open_path( + repo: &Arc>, + dirfd: impl AsFd, + path: impl AsRef, + ) -> Result { + let path = path.as_ref(); + let repofd = openat( + &dirfd, + path, + OFlags::RDONLY | OFlags::CLOEXEC, + Mode::empty(), + ) + .with_context(|| format!("Cannot open ostree repository at {}", path.display()))?; + + let configfd = openat( + &repofd, + "config", + OFlags::RDONLY | OFlags::CLOEXEC, + Mode::empty(), + ) + .with_context(|| format!("Cannot open ostree repo config file at {}", path.display()))?; + + let mut config_data = String::new(); + + File::from(configfd) + .read_to_string(&mut config_data) + .with_context(|| "Can't read config file")?; + + let mut config = Ini::new(); + let map = config + .read(config_data) + .map_err(|e| anyhow!(e)) + .context("Can't read config file")?; + + let core = map + .get("core") + .ok_or_else(|| anyhow!("No [core] section in config"))?; + + let mode: RepoMode = core + .get("mode") + .and_then(|v| v.as_deref()) + .ok_or_else(|| anyhow!("No mode in [core] section in config"))? + .parse()?; + + let objectsfd = openat( + &repofd, + "objects", + OFlags::PATH | OFlags::CLOEXEC | OFlags::DIRECTORY, + 0o666.into(), + ) + .with_context(|| { + format!( + "Cannot open ostree repository objects directory at {}", + path.display() + ) + })?; + + Ok(Self { + repo: repo.clone(), + mode, + dir: repofd, + objects: objectsfd, + }) + } + + pub fn open_object_flags( + &self, + checksum: &Sha256Digest, + object_type: ObjectType, + flags: OFlags, + ) -> Result { + let path = get_object_pathname(self.mode, checksum, object_type); + + openat(&self.objects, &path, flags | OFlags::CLOEXEC, Mode::empty()) + .with_context(|| format!("Cannot open ostree objects object at {}", path)) + } + + pub fn open_object(&self, checksum: &Sha256Digest, object_type: ObjectType) -> Result { + self.open_object_flags(checksum, object_type, OFlags::RDONLY | OFlags::NOFOLLOW) + } + + pub fn read_ref(&self, ref_name: &str) -> Result { + let path1 = format!("refs/{}", ref_name); + let path2 = format!("refs/heads/{}", ref_name); + + let fd1 = openat( + &self.dir, + &path1, + OFlags::RDONLY | OFlags::CLOEXEC, + Mode::empty(), + ) + .filter_errno(Errno::NOENT) + .with_context(|| format!("Cannot open ostree ref at {}", path1))?; + + let fd = match fd1 { + Some(fd) => fd, + None => openat( + &self.dir, + &path2, + OFlags::RDONLY | OFlags::CLOEXEC, + Mode::empty(), + ) + .with_context(|| format!("Cannot open ostree ref at {}", path2))?, + }; + + let mut buffer = String::new(); + File::from(fd) + .read_to_string(&mut buffer) + .with_context(|| "Can't read ref file")?; + + Ok(parse_sha256(buffer.trim())?) + } + + async fn fetch_file_bare( + &self, + checksum: &Sha256Digest, + ) -> Result<(AlignedBuf, Box)> { + let path_fd = + self.open_object_flags(checksum, ObjectType::File, OFlags::PATH | OFlags::NOFOLLOW)?; + + let st = fstat(&path_fd)?; + let disk_filetype = FileType::from_raw_mode(st.st_mode); + + let (uid, gid, mode, xattrs, symlink_target) = match self.mode { + RepoMode::Bare => { + let xattrs = read_xattrs_from_path(&path_fd)?; + let symlink_target = read_symlink_target(&path_fd, disk_filetype.is_symlink())?; + (st.st_uid, st.st_gid, st.st_mode, xattrs, symlink_target) + } + RepoMode::BareUser => { + let fd_path = proc_self_fd(&path_fd); + let name = c"user.ostreemeta"; + let aligned: AlignedBuf = read_xattr_value(&fd_path, name)?.into(); + let meta = OstreeDirMeta::from_data(&aligned)?; + + let is_symlink = FileType::from_raw_mode(meta.mode).is_symlink(); + let symlink_target = if is_symlink { + let mut target = Vec::new(); + File::open(&fd_path)?.read_to_end(&mut target)?; + if target.last() == Some(&0) { + target.pop(); + } + String::from_utf8(target) + .map_err(|_| anyhow!("symlink target is not valid UTF-8"))? + } else { + String::new() + }; + (meta.uid, meta.gid, meta.mode, meta.xattrs, symlink_target) + } + RepoMode::BareUserOnly => { + let symlink_target = read_symlink_target(&path_fd, disk_filetype.is_symlink())?; + (0, 0, st.st_mode, vec![], symlink_target) + } + RepoMode::BareSplitXAttrs => { + let xattr_fd = self.open_object(checksum, ObjectType::FileXAttrsLink)?; + let mut xattr_data = Vec::new(); + File::from(xattr_fd).read_to_end(&mut xattr_data)?; + let aligned: AlignedBuf = xattr_data.into(); + let xattrs = parse_xattr_data(&aligned)?; + let symlink_target = read_symlink_target(&path_fd, disk_filetype.is_symlink())?; + (st.st_uid, st.st_gid, st.st_mode, xattrs, symlink_target) + } + RepoMode::Archive => { + bail!("Archive mode should not use fetch_file_bare"); + } + }; + + let is_symlink = FileType::from_raw_mode(mode).is_symlink(); + let header = OstreeFileHeader { + size: st.st_size as u64, + uid, + gid, + mode, + symlink_target, + xattrs, + }; + let zlib_header = header.serialize_zlib_sized(); + + if is_symlink { + Ok((zlib_header, Box::new(empty()))) + } else { + Ok((zlib_header, Box::new(File::open(proc_self_fd(&path_fd))?))) + } + } + + async fn fetch_file_archive( + &self, + checksum: &Sha256Digest, + ) -> Result<(AlignedBuf, Box)> { + let fd = self.open_object(checksum, ObjectType::File)?; + let mut file = File::from(fd); + + let mut header_buf = AlignedBuf::new(); + + // Read variant size header + let header_size = size_of::(); + header_buf.with_vec(|v| { + v.resize(header_size, 0u8); + file.read_exact(v) + })?; + + // Read variant + let variant_size = get_sized_variant_size(&header_buf)?; + header_buf.with_vec(|v| { + v.resize(header_size + variant_size, 0u8); + file.read_exact(&mut v[header_size..]) + })?; + + // Decompress rest + Ok((header_buf, Box::new(DeflateDecoder::new(file)))) + } +} + +impl OstreeRepo for LocalRepo { + async fn fetch_object( + &self, + checksum: &Sha256Digest, + object_type: ObjectType, + ) -> Result { + let fd = self.open_object(checksum, object_type)?; + + let mut buffer = Vec::new(); + File::from(fd).read_to_end(&mut buffer)?; + Ok(buffer.into()) + } + + async fn fetch_file(&self, checksum: &Sha256Digest) -> Result<(AlignedBuf, Option)> { + let (header_buf, mut rest) = if self.mode == RepoMode::Archive { + self.fetch_file_archive(checksum).await? + } else { + self.fetch_file_bare(checksum).await? + }; + + let header = OstreeFileHeader::from_zlib_sized(&header_buf)?; + hash_and_store_file(&self.repo, &header, header_buf, &mut rest, checksum) + } +} diff --git a/crates/composefs/src/repository.rs b/crates/composefs/src/repository.rs index bd6eb5c8..915e2302 100644 --- a/crates/composefs/src/repository.rs +++ b/crates/composefs/src/repository.rs @@ -1528,14 +1528,22 @@ impl Repository { /// avoiding a second read pass. #[context("Ensuring object from file descriptor")] pub(crate) fn ensure_object_from_fd(&self, source: OwnedFd, size: u64) -> Result { + self.ensure_object_from_reader(File::from(source), size) + } + + /// Ensures that the given data is stored as an object in the repository, + /// reading from any source that implements [`Read`]. + /// + /// In insecure mode, the fs-verity digest is computed while copying, + /// avoiding a second read pass. + #[context("Ensuring object from reader")] + pub fn ensure_object_from_reader(&self, mut source: impl Read, size: u64) -> Result { let writable = self.ensure_writable_token()?; let tmpfile_fd = self.create_object_tmpfile_impl(&writable)?; if self.insecure { - // Insecure mode: compute verity digest while copying, avoiding - // a second read of the data in finalize_object_tmpfile_impl. let mut hasher = FsVerityHasher::::new(); - let mut src = std::io::BufReader::with_capacity(IO_BUF_CAPACITY, File::from(source)); + let mut src = std::io::BufReader::with_capacity(IO_BUF_CAPACITY, &mut source); let mut dst = File::from(tmpfile_fd.try_clone()?); loop { @@ -1561,9 +1569,8 @@ impl Repository { // Secure mode: let std::io::copy use copy_file_range for // potential reflinks, then finalize_object_tmpfile_impl // enables kernel verity and measures the digest. - let mut src = File::from(source); let mut dst = File::from(tmpfile_fd.try_clone()?); - let copied = std::io::copy(&mut src, &mut dst)?; + let copied = std::io::copy(&mut source, &mut dst)?; ensure!(copied == size, "Expected {size} bytes, got {copied}"); drop(dst); diff --git a/crates/composefs/src/splitstream.rs b/crates/composefs/src/splitstream.rs index 8fd45f0b..2f827749 100644 --- a/crates/composefs/src/splitstream.rs +++ b/crates/composefs/src/splitstream.rs @@ -928,6 +928,11 @@ impl SplitStreamReader { self.named_refs } + /// Look up the digest of an external reference by index + pub fn lookup_external_ref(&self, idx: usize) -> Option<&ObjectID> { + self.object_refs.get(idx) + } + fn ensure_chunk( &mut self, eof_ok: bool, diff --git a/crates/composefs/src/util.rs b/crates/composefs/src/util.rs index bdc43485..00375f00 100644 --- a/crates/composefs/src/util.rs +++ b/crates/composefs/src/util.rs @@ -173,7 +173,10 @@ pub fn parse_sha256(string: impl AsRef) -> Result { Ok(value) } -pub(crate) trait ErrnoFilter { +/// Utility for filtering ErrnoResult errors. +pub trait ErrnoFilter { + /// Parse a ErrnoResult into ErrnoResult> where the option is + /// None if the errno was a specified errno (often used with ENOENT). fn filter_errno(self, ignored: Errno) -> ErrnoResult>; } diff --git a/doc/ostree.md b/doc/ostree.md new file mode 100644 index 00000000..e96b85d7 --- /dev/null +++ b/doc/ostree.md @@ -0,0 +1,139 @@ +# OSTree + +composefs-rs has support for importing images from OSTree +repositories, by pulling from local or remote OSTree +repositories. These images can then be mounted as composefs images, +sharing disk (deduplication) with other ostree or other types of +images in the composefs repository. + +Native OSTree repositories are a format similar to a composefs +repository, but not quite the same. This means we need some +conversions when handling ostree commits in a composefs repository. + +OSTree images (commits) are fundamentally made up of many small sha256 +content-addressed objects that reference each other. Each commit is +the root of a DAG that defines the total image. Some of the OSTree +objects are metadata like directory permissions, or list of files in a +directory. These don't really exist in composefs where all metadata is +part of the erofs image. However, some objects are large file objects, +and these are similar to the file objects in composefs +images. However, even these differ, because the checksum defining the +object is made up of both the file content and the file metadata. + +When an OSTree commit is stored in a composefs repo it is stored as a +single splitstream file, named `ostree-commit-$commit_id`, which uses +external object references to all the file content objects that will +be used when creating an erofs image for it. This means OSTree objects +for files that would be inlined in the erofs image will not be +external objects. + +OStree commit splitstream objects are created during a pull operation +and are used for two things, creating a composefs image by walking the +DAG, and serving as a source of already available OSTree object during +a pull operation. Such sources are found automatically during pull +(e.g. parent commit, or old commit for a ref being pulled) or can be +manually specified. + +## File format + +This describes the format of the `ostree-commit-$commit_id` files. + +### Splitstream header + +Since the commit file is a split stream it starts with the splitstream +headers. Of these we use two, the named refs and the object +refs: + + * When an erofs image is created for the commit, it is referenced by + the `composefs.image` named ref. + + * Any external file content objects are in the external_refs + table. The index of the references in this header table is used to + refer to the file in the splitstream itself. + +The splitstream content type used for commits is 0xAFE138C18C463EF1. + +### Splitstream content + +A splitstream is normally a series of internal and external chunks, +but the ostree commit uses only one inline chunk. This chunk is +basically a serialized form of the "objects" directory of an OSTree +repository. I.e. it has a mapping of sha256 to ostree object data. +All objects except file objects are stored in the standard ostree +object format. + +OSTree file objects are stored in the archive-z2 format, except not +compressed, and optionally the file content part of it may be stored +as referencing the index of an external object. The z2 format is, +first an 8-byte header that gives the size (in bytes) of a gvariant, +then comes the gvariant with the file meta in +OSTREE_ZLIB_FILE_HEADER_GVARIANT_FORMAT format, and then the +file/symlink inline data. If an external object is referenced for the +object then it is expected that there is no inline file data. + +The high level view of the file looks like this: +``` ++---------------+ +| Header | ++---------------| +| Object IDs | ++---------------| +| Object Info | ++---------------| +| Content | ++---------------+ +``` + +The Object IDs is a sorted array of sha256 digests, and you would do +lookups in it using a binary search. The buckets in the header can be +used to quickly limit the binary search based on the first byte of a +digest. + +Then, at the same index as the binary searched object you can look up +the object info which gives you the offset/length of the object +content data and optionally a reference to an external object. + +The exact form of the data looks like this, packed in order from the +start of the splitstream content. All ints are in little endian. + +### Header +``` ++-----------------------------------+ +| u32: index of commit object | +| u32: flags (currently unused) | +| [u32; 256]: end index of bucket | ++-----------------------------------+ +``` + +The bucket list contains the end index (in the object ids table) of +objects starting with that particular byte, and can be used to quickly +limit the search. We can also compute the total number of objects +(n_objects) by looking in the last bucket. + +### Object ids +``` + n_objects x ++-----------------------------------+ +| [u8; 32] ostree object id | ++-----------------------------------+ +``` + +### Object Info +``` + n_objects x ++-----------------------------------+ +| u32: Offset to per-object data | +| u32: Length of per-object data | +| u32: Index of external object ref | +| or MAXUINT32 if none. | ++-----------------------------------+ +``` + +This is an array of information for each object. Once you have found +the object id in the object ids table, you would look at the same +index in this table to find the information. Offsets to per-object +data are in bytes from the start of the content area, which starts at +the end of the Objects Info table. All data chunks references are +aligned to 8 bytes with respect to the start of the content area. +This is useful because GVariants (used by ostree) naturally want +8-byte alignment.