From ff05f717acd19dbfc9a0498c401bb40a1c247ff3 Mon Sep 17 00:00:00 2001 From: Ahmed Kamal Date: Sun, 17 May 2026 19:33:47 +0300 Subject: [PATCH 1/5] contrib/guix: deterministic, reproducible release pipeline for cuprated Adds a Guix-first build pipeline that produces byte-identical cuprated--x86_64-unknown-linux-gnu.tar.gz artifacts from this repository. Pipeline layout: contrib/guix/ channels.scm pinned Guix instance (commit + channel introduction for the official channel) manifest.scm pinned build profile (bash, coreutils, git, gcc-toolchain, cmake, make, pkg-config, openssl, perl, python, rust, rust:cargo, gzip, tar, findutils, diffutils, gawk, nss-certs) guix-mk-distsrc create a deterministic source archive (cargo vendor --locked + deterministic tar flags) inside guix shell --container guix-build build cuprated inside a hermetic guix-shell --container --pure profile; captures guix-describe.json on the outer host (the container has no `guix`) guix-checksums aggregate SHA256SUMS over output tarballs only (ldd output is intentionally excluded as host-loader-variable) guix-verify integrity check vs sidecar .SHA256SUM (documented narrowly: integrity, not authenticity) guix-attest GPG-sign a canonical JSON attestation (builder_id, distsrc sha, artifact sha, guix channel commit, rustc/cargo versions); fail-closed when gpg missing libexec/build.sh inner build driver: - per-run mktemp build root - CARGO_NET_OFFLINE set before any cargo invocation - distsrc content-equivalence check (diff -rq vs git archive of the claimed git_commit) - CXXFLAGS workaround for Guix gcc-15.2 libstdc++'s undefined _GLIBCXX_HAVE_FENV_H libexec/package.sh deterministic tar.gz packaging of cuprated + license + service file smoke-reproducible.sh self-check that builds twice and compares ALL determinism-sensitive outputs (distsrc, artifact, metadata, rustc/cargo versions, guix-describe); fails if any build log contains -march=native / -mcpu=native / target-cpu=native; preserves work dirs on failure for debugging Determinism inputs are pinned at every layer: - Guix instance pinned by commit in channels.scm - Build profile pinned via Guix commit + manifest.scm - Source tree pinned by git commit, verified at build time via diff -rq against git archive - Rust deps Cargo.lock + cargo vendor --locked --versioned-dirs - Build flags --remap-path-prefix, -ffile-prefix-map, codegen-units=1 - Time SOURCE_DATE_EPOCH = git commit time - Tar/gzip metadata sorted names, fixed mtime/uid/gid/mode, --pax-option strips atime/ctime, gzip -n --- contrib/guix/channels.scm | 23 ++++ contrib/guix/guix-attest | 171 ++++++++++++++++++++++++ contrib/guix/guix-build | 77 +++++++++++ contrib/guix/guix-checksums | 61 +++++++++ contrib/guix/guix-mk-distsrc | 35 +++++ contrib/guix/guix-verify | 26 ++++ contrib/guix/libexec/build.sh | 202 +++++++++++++++++++++++++++++ contrib/guix/libexec/package.sh | 38 ++++++ contrib/guix/manifest.scm | 20 +++ contrib/guix/mk-distsrc | 80 ++++++++++++ contrib/guix/smoke-reproducible.sh | 107 +++++++++++++++ 11 files changed, 840 insertions(+) create mode 100644 contrib/guix/channels.scm create mode 100755 contrib/guix/guix-attest create mode 100755 contrib/guix/guix-build create mode 100755 contrib/guix/guix-checksums create mode 100755 contrib/guix/guix-mk-distsrc create mode 100755 contrib/guix/guix-verify create mode 100755 contrib/guix/libexec/build.sh create mode 100755 contrib/guix/libexec/package.sh create mode 100644 contrib/guix/manifest.scm create mode 100755 contrib/guix/mk-distsrc create mode 100755 contrib/guix/smoke-reproducible.sh diff --git a/contrib/guix/channels.scm b/contrib/guix/channels.scm new file mode 100644 index 000000000..7ea7ad469 --- /dev/null +++ b/contrib/guix/channels.scm @@ -0,0 +1,23 @@ +(list + (channel + (name 'guix) + (url "https://git.savannah.gnu.org/git/guix.git") + (branch "master") + ;; Commit pin policy: pin to a tagged Guix release whenever the rust + ;; available there is >= the maximum rustc version pulled in by + ;; cuprate's workspace deps. Today (May 2026) v1.5.0 (230aa373f3) only + ;; ships rust up to 1.88, but several deps (fjall, lsm-tree, + ;; typed-index-collections, monero-daemon-rpc, ...) require up to 1.91, + ;; so we pin to a recent master commit that ships rust-1.93. Repin to + ;; the next stable Guix tag as soon as one carrying rust >= 1.91 lands. + (commit "7041be9c117cbae2a5238bb22a0ff93ef11ca91a") + ;; Guix v1.5.0 requires every channel to carry an `introduction` with the + ;; commit + OpenPGP fingerprint that started the chain of trust; without + ;; this `guix time-machine` aborts with "channel 'guix' lacks an + ;; introduction and cannot be authenticated". These values are the + ;; canonical introduction for the official Guix channel. + (introduction + (make-channel-introduction + "9edb3f66fd807b096b48283debdcddccfea34bad" + (openpgp-fingerprint + "BBB0 2DDF 2CEA F6A8 0D1D E643 A2A0 6DF2 A33A 54FA"))))) diff --git a/contrib/guix/guix-attest b/contrib/guix/guix-attest new file mode 100755 index 000000000..4b60c5473 --- /dev/null +++ b/contrib/guix/guix-attest @@ -0,0 +1,171 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Write a signed release attestation. +# +# The attestation is a stable, sorted-key JSON document (Python json.dumps +# with sort_keys=True, indent=2) containing every fact a downstream +# verifier needs to bind a binary to its inputs: +# +# - package, version +# - rust_target, guix_system +# - source_date_epoch, git_commit +# - distsrc filename + sha256 +# - artifact filename + sha256 + length +# - guix channel commit (from guix-describe.json) +# - rustc / cargo versions (from rustc-version.txt / cargo-version.txt) +# - builder identity (passed in) +# - attestation timestamp +# +# We then GPG-sign that JSON. Signing concatenated text was the previous +# shape and has weak filename/section boundaries; a single signed JSON +# closes that gap. The JSON itself is intentionally NOT byte-stable across +# runs (attested_at and the detached GPG sig are both fresh per run); the +# binding is via the sha256 fields, not via byte-identity of the payload. +# +# Fails closed: if gpg is not on PATH, or the signing key is unavailable, +# the script exits non-zero rather than writing an unsigned attestation - +# an unsigned attestation that "looks signed" is a footgun. + +repo_root="$(git rev-parse --show-toplevel)" +out_dir="${1:-$repo_root/contrib/guix/out}" +if [[ ! -d "$out_dir" ]]; then + echo "output directory not found: $out_dir" >&2 + exit 1 +fi + +identity_raw="${2:-}" +version="${3:-}" +if [[ -z "$identity_raw" || -z "$version" ]]; then + echo "usage: $0 " >&2 + exit 1 +fi +identity="$(printf '%s' "$identity_raw" | tr -c 'A-Za-z0-9_.@+-' '_')" + +if ! command -v gpg >/dev/null 2>&1; then + echo "ERROR: gpg not found; refusing to write unsigned attestation" >&2 + exit 1 +fi + +# Collect required inputs. +for f in SHA256SUMS build-metadata.json guix-describe.json rustc-version.txt cargo-version.txt; do + if [[ ! -f "$out_dir/$f" ]]; then + echo "ERROR: missing $out_dir/$f - run guix-checksums before guix-attest" >&2 + exit 1 + fi +done + +# Derive the artifact and distsrc filenames from build-metadata.json rather +# than globbing the directory. A `find | sort | head` approach can attest +# the lexicographically first stale tarball from a previous build/version/ +# target if out_dir is not pristine; binding to the metadata file the build +# just wrote eliminates that class of mistake. +# +# Use one-field-per-line + `mapfile -t` so a value containing whitespace +# (none today, but cheap insurance) can't corrupt later fields. +mapfile -t meta_fields < <( + python3 - "$out_dir/build-metadata.json" <<'PY' +import json, sys +m = json.load(open(sys.argv[1])) +for k in ("version", "rust_target", "distsrc", "distsrc_sha256"): + print(m[k]) +PY +) +if [[ "${#meta_fields[@]}" -ne 4 ]]; then + echo "ERROR: could not extract version/rust_target/distsrc/distsrc_sha256 from build-metadata.json" >&2 + exit 1 +fi +meta_version="${meta_fields[0]}" +meta_rust_target="${meta_fields[1]}" +meta_distsrc_name="${meta_fields[2]}" +meta_distsrc_sha="${meta_fields[3]}" +if [[ -z "$meta_version" || -z "$meta_rust_target" || -z "$meta_distsrc_name" || -z "$meta_distsrc_sha" ]]; then + echo "ERROR: build-metadata.json has empty version/rust_target/distsrc/distsrc_sha256" >&2 + exit 1 +fi +if [[ "$meta_version" != "$version" ]]; then + echo "ERROR: build-metadata version ($meta_version) != passed version ($version)" >&2 + exit 1 +fi +# Reject path-traversal or shell-meaningful characters in the metadata +# version before using it in the sigs/ path layout below. The CLI version +# arg already had to match meta_version above, so this also gates the CLI +# input transitively. +if [[ ! "$meta_version" =~ ^[A-Za-z0-9._+-]+$ ]]; then + echo "ERROR: build-metadata version contains unsafe characters: $meta_version" >&2 + exit 1 +fi + +# Now that the metadata version is validated, it's safe to compose the +# signatures directory. The CLI `version` arg is no longer used past this +# point; meta_version is the authoritative value. +sig_dir="$repo_root/contrib/guix/sigs/${meta_version}/${identity}" +mkdir -p "$sig_dir" + +artifact="$out_dir/cuprated-${meta_version}-${meta_rust_target}.tar.gz" +distsrc="$out_dir/${meta_distsrc_name}" +[[ -f "$artifact" ]] || { echo "ERROR: expected artifact missing: $artifact" >&2; exit 1; } +[[ -f "$distsrc" ]] || { echo "ERROR: expected distsrc missing: $distsrc" >&2; exit 1; } + +# Reject a stale distsrc with the same filename: build-metadata.json +# records the sha256 the build saw; if the file on disk now hashes +# differently, somebody swapped it under us. +actual_distsrc_sha="$(sha256sum "$distsrc" | awk '{print $1}')" +if [[ "$actual_distsrc_sha" != "$meta_distsrc_sha" ]]; then + echo "ERROR: distsrc sha mismatch for $distsrc" >&2 + echo " build-metadata.json: $meta_distsrc_sha" >&2 + echo " on disk now: $actual_distsrc_sha" >&2 + exit 1 +fi + +artifact_sha="$(sha256sum "$artifact" | awk '{print $1}')" +distsrc_sha="$actual_distsrc_sha" +artifact_size="$(wc -c <"$artifact" | tr -d ' ')" +attest_iso="$(date -u +%Y-%m-%dT%H:%M:%SZ)" + +# Emit stable, sorted-key JSON (Python json.dumps with sort_keys=True). +python3 - "$out_dir" "$artifact" "$distsrc" "$artifact_sha" "$distsrc_sha" "$artifact_size" "$identity_raw" "$attest_iso" > "$sig_dir/attestation.json" <<'PY' +import json, sys, pathlib +out_dir, artifact, distsrc, asha, dsha, asize, identity, when = sys.argv[1:9] +meta = json.loads(pathlib.Path(out_dir, "build-metadata.json").read_text()) +describe = json.loads(pathlib.Path(out_dir, "guix-describe.json").read_text()) +rustc = pathlib.Path(out_dir, "rustc-version.txt").read_text().strip() +cargo = pathlib.Path(out_dir, "cargo-version.txt").read_text().strip() +# Extract the official-guix channel commit, if present. +guix_commit = None +if isinstance(describe, list): + for ch in describe: + if ch.get("name") == "guix": + guix_commit = ch.get("commit") + break +elif isinstance(describe, dict): + guix_commit = describe.get("commit") +doc = { + "schema": "cuprate-guix-attestation/v1", + "package": meta["package"], + "version": meta["version"], + "rust_target": meta["rust_target"], + "guix_system": meta["guix_system"], + "source_date_epoch": meta["source_date_epoch"], + "git_commit": meta["git_commit"], + "distsrc": { + "filename": pathlib.Path(distsrc).name, + "sha256": dsha, + }, + "artifact": { + "filename": pathlib.Path(artifact).name, + "sha256": asha, + "size": int(asize), + }, + "guix_channel_commit": guix_commit, + "rustc_version": rustc.splitlines()[0] if rustc else None, + "cargo_version": cargo.splitlines()[0] if cargo else None, + "builder_id": identity, + "attested_at": when, +} +print(json.dumps(doc, sort_keys=True, indent=2)) +PY + +# Detached ASCII-armored signature over the JSON payload. +(cd "$sig_dir" && gpg --detach-sign --armor --output attestation.json.asc attestation.json) +echo "Wrote $sig_dir/attestation.json + attestation.json.asc" diff --git a/contrib/guix/guix-build b/contrib/guix/guix-build new file mode 100755 index 000000000..046156af5 --- /dev/null +++ b/contrib/guix/guix-build @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +set -euo pipefail + +script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# realpath the repo root so the containment check below compares two +# physical paths. Without this, a checkout reached via a symlink would +# return a logical path from `git rev-parse` while `realpath` further +# down returns the physical path, and an in-repo --out-dir would be +# falsely rejected. +repo_root="$(realpath "$(git -C "$script_dir/../.." rev-parse --show-toplevel)")" +cd "$repo_root" + +guix_system="x86_64-linux" +rust_target="x86_64-unknown-linux-gnu" +package_name="cuprated" +distsrc="" +out_dir="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --guix-system) guix_system="$2"; shift 2 ;; + --target) rust_target="$2"; shift 2 ;; + --distsrc) distsrc="$2"; shift 2 ;; + --package) package_name="$2"; shift 2 ;; + --out-dir) out_dir="$2"; shift 2 ;; + *) echo "unknown argument: $1" >&2; exit 1 ;; + esac +done + +[[ "$package_name" == "cuprated" ]] || { echo "only --package cuprated is supported" >&2; exit 1; } +[[ -n "$distsrc" ]] || { echo "missing required --distsrc " >&2; exit 1; } +[[ -f "$distsrc" ]] || { echo "distsrc not found: $distsrc" >&2; exit 1; } + +dist_src="$(realpath "$distsrc")" +case "$dist_src" in + "$repo_root"/*) ;; + *) echo "distsrc must be inside the repository" >&2; exit 1 ;; +esac + +if [[ -z "$out_dir" ]]; then + out_dir="$repo_root/contrib/guix/out" +fi +mkdir -p "$out_dir" +# Normalize and constrain to inside the repo. `guix shell --container +# --pure` does not bind-mount arbitrary paths; an absolute path outside +# the repo would silently fail or land in a non-persisted overlay inside +# the container. Same goes for relative paths that resolve outside. +out_dir="$(realpath "$out_dir")" +case "$out_dir" in + "$repo_root"/*) ;; + *) echo "ERROR: --out-dir must resolve inside the repository ($repo_root); got $out_dir" >&2; exit 1 ;; +esac + +# Capture the exact Guix instance that will be used BEFORE entering the +# hermetic container. `guix` itself isn't in manifest.scm (and shouldn't be - +# the whole point of `guix shell --container --pure` is no host tools), so +# `guix describe` cannot run inside the container. Running it here, on the +# outer host, against the same channels.scm gives the authoritative pinned +# instance metadata for build-metadata.json / attestation. +if command -v guix >/dev/null 2>&1; then + guix time-machine -C "$script_dir/channels.scm" -- describe --format=json \ + > "$out_dir/guix-describe.json" +else + echo "ERROR: guix not on PATH; cannot capture guix-describe.json" >&2 + exit 1 +fi + +guix time-machine -C "$script_dir/channels.scm" \ + -- shell --system="$guix_system" -m "$script_dir/manifest.scm" --container --pure -- \ + env \ + GUIX_BUILD_SYSTEM="$guix_system" \ + GUIX_RUST_TARGET="$rust_target" \ + GUIX_DIST_SRC="$dist_src" \ + GUIX_OUT_DIR="$out_dir" \ + GUIX_ALLOW_COMMIT_MISMATCH="${GUIX_ALLOW_COMMIT_MISMATCH:-0}" \ + GUIX_SKIP_FENV_WORKAROUND="${GUIX_SKIP_FENV_WORKAROUND:-0}" \ + bash "$script_dir/libexec/build.sh" diff --git a/contrib/guix/guix-checksums b/contrib/guix/guix-checksums new file mode 100755 index 000000000..bbffe3631 --- /dev/null +++ b/contrib/guix/guix-checksums @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Aggregate the two release tarballs (distsrc + artifact) into a single +# SHA256SUMS file. We deliberately do NOT glob *.tar.gz: a stale tarball +# left over from a previous build/version/target could end up in the +# aggregate and be subsequently signed by guix-attest. Instead, read the +# canonical filenames from build-metadata.json, sha each, and require +# both to exist. + +out_dir="${1:-contrib/guix/out}" +if [[ ! -d "$out_dir" ]]; then + echo "output directory not found: $out_dir" >&2 + exit 1 +fi +if [[ ! -f "$out_dir/build-metadata.json" ]]; then + echo "ERROR: $out_dir/build-metadata.json not found - run a build first" >&2 + exit 1 +fi + +# One-field-per-line + `mapfile -t` so a metadata value containing +# whitespace (none today, but cheap insurance) can't corrupt later fields. +mapfile -t meta_fields < <( + python3 - "$out_dir/build-metadata.json" <<'PY' +import json, sys +m = json.load(open(sys.argv[1])) +for k in ("version", "rust_target", "distsrc"): + print(m[k]) +PY +) +if [[ "${#meta_fields[@]}" -ne 3 ]]; then + echo "ERROR: could not extract version/rust_target/distsrc from build-metadata.json" >&2 + exit 1 +fi +meta_version="${meta_fields[0]}" +meta_rust_target="${meta_fields[1]}" +meta_distsrc_name="${meta_fields[2]}" +if [[ -z "$meta_version" || -z "$meta_rust_target" || -z "$meta_distsrc_name" ]]; then + echo "ERROR: build-metadata.json has empty version/rust_target/distsrc" >&2 + exit 1 +fi + +artifact_name="cuprated-${meta_version}-${meta_rust_target}.tar.gz" +distsrc_name="$meta_distsrc_name" + +[[ -f "$out_dir/$artifact_name" ]] || { echo "ERROR: missing $out_dir/$artifact_name" >&2; exit 1; } +[[ -f "$out_dir/$distsrc_name" ]] || { echo "ERROR: missing $out_dir/$distsrc_name" >&2; exit 1; } + +( + cd "$out_dir" + # Sort the two filenames into a stable order before hashing. Use a + # `while read` loop rather than `xargs -d` so this stays portable + # across BSD/macOS hosts that don't ship GNU xargs. + printf '%s\n%s\n' "$distsrc_name" "$artifact_name" \ + | LC_ALL=C sort \ + | while IFS= read -r f; do + sha256sum "$f" + done > SHA256SUMS +) + +echo "Wrote $out_dir/SHA256SUMS" diff --git a/contrib/guix/guix-mk-distsrc b/contrib/guix/guix-mk-distsrc new file mode 100755 index 000000000..4bf40188d --- /dev/null +++ b/contrib/guix/guix-mk-distsrc @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +set -euo pipefail +script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# Match guix-build: realpath so the containment check below compares +# physical paths consistently with `realpath "$out_dir"`. +repo_root="$(realpath "$(git -C "$script_dir/../.." rev-parse --show-toplevel)")" +cd "$repo_root" + +guix_system="x86_64-linux" +out_dir="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --out-dir) out_dir="$2"; shift 2 ;; + --) shift; break ;; + -*) echo "unknown argument: $1" >&2; exit 1 ;; + *) guix_system="$1"; shift ;; + esac +done + +if [[ -z "$out_dir" ]]; then + out_dir="$repo_root/contrib/guix/out" +fi +mkdir -p "$out_dir" +# Same constraint as guix-build: out_dir must live inside the repo so +# `guix shell --container --pure` can see it. +out_dir="$(realpath "$out_dir")" +case "$out_dir" in + "$repo_root"/*) ;; + *) echo "ERROR: --out-dir must resolve inside the repository ($repo_root); got $out_dir" >&2; exit 1 ;; +esac + +guix time-machine -C "$script_dir/channels.scm" \ + -- shell --system="$guix_system" -m "$script_dir/manifest.scm" --container --pure --network -- \ + env GUIX_OUT_DIR="$out_dir" bash "$script_dir/mk-distsrc" diff --git a/contrib/guix/guix-verify b/contrib/guix/guix-verify new file mode 100755 index 000000000..e7e53af21 --- /dev/null +++ b/contrib/guix/guix-verify @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Integrity check (NOT authentication) of a release artifact against the +# sidecar SHA256SUM that lives next to it. Use this to check that the +# tarball matches the sum it was published with - it does NOT verify who +# produced the tarball or that the sum itself is trustworthy. For +# authenticity, fetch and verify the GPG-signed attestation under +# contrib/guix/sigs///attestation.json.asc. + +archive="${1:-}" +if [[ -z "$archive" ]]; then + echo "usage: $0 " >&2 + echo " integrity check vs the sidecar .SHA256SUM" >&2 + exit 1 +fi + +dir="$(cd -- "$(dirname -- "$archive")" && pwd)" +base="$(basename -- "$archive")" +sum_file="$dir/${base}.SHA256SUM" +if [[ ! -f "$sum_file" ]]; then + echo "missing checksum file: $sum_file" >&2 + exit 1 +fi + +( cd "$dir" && sha256sum --check "${base}.SHA256SUM" ) diff --git a/contrib/guix/libexec/build.sh b/contrib/guix/libexec/build.sh new file mode 100755 index 000000000..e3724822d --- /dev/null +++ b/contrib/guix/libexec/build.sh @@ -0,0 +1,202 @@ +#!/usr/bin/env bash +set -euo pipefail + +repo_root="$(git rev-parse --show-toplevel)" + +export LC_ALL=C +export TZ=UTC + +# Hold all the "no network after distsrc creation" invariants up front so a +# later `cargo metadata` or anything else can't accidentally hit the network. +export CARGO_INCREMENTAL=0 +export CARGO_NET_OFFLINE=true + +dist_src="${GUIX_DIST_SRC:-}" +if [[ -z "$dist_src" || ! -f "$dist_src" ]]; then + echo "GUIX_DIST_SRC must point to a source archive produced by contrib/guix/mk-distsrc" >&2 + exit 1 +fi + +# Use a per-run build root so two concurrent invocations in the same checkout +# don't trample each other. Inside `guix shell --container --pure`, `$TMPDIR` +# is private to the container, so collisions across simultaneous outer +# invocations are also avoided. +build_root="$(mktemp -d "${TMPDIR:-/tmp}/guix-build-src.XXXXXX")" +src_dir="$build_root/src" +aside_dir="$build_root/distsrc-aside" +cleanup_build_root() { + # On success, drop the tree. On failure keep it so the user can inspect + # state - this is hugely valuable when distsrc verification or CMake + # configure fails inside a sealed container. + local code=$? + # If the distsrc-equivalence check exited mid-way (between move-aside + # and the explicit restore), put the three legitimately-added paths + # back so the preserved tree reflects the original distsrc contents. + # Idempotent: no-op if aside_dir was already cleaned up on success. + if [[ -d "$aside_dir" ]]; then + [[ -e "$aside_dir/vendor" ]] && mv "$aside_dir/vendor" "$src_dir/vendor" 2>/dev/null || true + [[ -e "$aside_dir/cargo" ]] && mv "$aside_dir/cargo" "$src_dir/.cargo" 2>/dev/null || true + [[ -e "$aside_dir/distsrc-meta.json" ]] && mv "$aside_dir/distsrc-meta.json" "$src_dir/.cuprate-distsrc.json" 2>/dev/null || true + rmdir "$aside_dir" 2>/dev/null || true + fi + if [[ $code -ne 0 ]]; then + echo "build failed; preserving build root for inspection: $build_root" >&2 + else + rm -rf "$build_root" + fi +} +trap cleanup_build_root EXIT + +export CARGO_HOME="$build_root/cargo-home" +mkdir -p "$src_dir" "$CARGO_HOME" + +# See mk-distsrc: guix shell --container runs as a mapped non-root user that +# cannot honor the archive's stored uid/gid; pass --no-same-owner so tar +# accepts the unprivileged extraction. +tar -xf "$dist_src" --no-same-owner --no-same-permissions -C "$src_dir" + +# Parse the manifest we just extracted. cargo can pick up its own +# vendor config now that CARGO_HOME and CARGO_NET_OFFLINE are set. +version="$({ cd "$src_dir" && cargo metadata --locked --format-version=1 --no-deps | python3 -c 'import json,sys; print(next(p["version"] for p in json.load(sys.stdin)["packages"] if p["name"]=="cuprated"))'; })" +: "${version:?unable to parse version}" + +SOURCE_DATE_EPOCH="$(python3 -c 'import json; print(json.load(open("'"$src_dir"'/.cuprate-distsrc.json"))["source_date_epoch"])')" +git_commit="$(python3 -c 'import json; print(json.load(open("'"$src_dir"'/.cuprate-distsrc.json"))["git_commit"])')" +distsrc_sha256="$(sha256sum "$dist_src" | awk '{print $1}')" +outer_commit="$(git -C "$repo_root" rev-parse HEAD)" +if [[ "$outer_commit" != "$git_commit" && "${GUIX_ALLOW_COMMIT_MISMATCH:-0}" != "1" ]]; then + echo "outer checkout $outer_commit differs from distsrc commit $git_commit" >&2 + exit 1 +fi +export SOURCE_DATE_EPOCH + +# cuprate-constants/build.rs hardcodes the embedded git commit by running +# `git show -s --format=%H` in CARGO_MANIFEST_DIR (relying on git walking up +# to find a .git/). The distsrc tarball legitimately contains no .git/, so +# `git show` finds nothing and the build script's assert(commit.len() == 40) +# trips. constants/build.rs also honors a GITHUB_SHA env var as an override - +# set it from the distsrc's authoritative git_commit so the embedded commit +# is bound to the distsrc, not to whatever outer checkout the build happens +# to run inside. +export GITHUB_SHA="$git_commit" + +# Distsrc content equivalence: verify the extracted source tree matches +# `git archive $git_commit` from the outer checkout. The three paths +# `mk-distsrc` legitimately adds (vendor/, .cargo/, .cuprate-distsrc.json) +# are moved aside before the diff and restored afterwards, so any nested +# directory or file that *happens* to share one of those basenames is +# still compared. This catches a tampered distsrc that lies about its +# git_commit but contains modified source files. The git_commit equality +# check above is metadata-only and not sufficient on its own. +# +# The cleanup_build_root EXIT trap installed near the top of this script +# also restores the aside-d paths on any failure that lands here mid-way, +# so the preserved $build_root reflects the as-extracted distsrc. +verify_dir="$build_root/git-baseline" +mkdir -p "$verify_dir" +git -C "$repo_root" archive --format=tar "$git_commit" | \ + tar -xf - --no-same-owner --no-same-permissions -C "$verify_dir" +mkdir -p "$aside_dir" +mv "$src_dir/vendor" "$aside_dir/vendor" +mv "$src_dir/.cargo" "$aside_dir/cargo" +mv "$src_dir/.cuprate-distsrc.json" "$aside_dir/distsrc-meta.json" +if ! diff -rq "$verify_dir" "$src_dir" > "$build_root/distsrc-diff.log" 2>&1; then + echo "ERROR: distsrc contents diverge from git commit $git_commit" >&2 + echo "see diff log:" >&2 + cat "$build_root/distsrc-diff.log" >&2 + exit 1 +fi +mv "$aside_dir/vendor" "$src_dir/vendor" +mv "$aside_dir/cargo" "$src_dir/.cargo" +mv "$aside_dir/distsrc-meta.json" "$src_dir/.cuprate-distsrc.json" +rmdir "$aside_dir" +rm -rf "$verify_dir" + +cd "$src_dir" + +# RandomX builds with CMake's ARCH default ("default"), which uses +# compiler-capability-gated -maes -mssse3 -mavx2 - all compiler-deterministic, +# none host-CPU-specific. The bundled randomx-rs build.rs passes +# .define("DARCH", "native"), but cmake reads ARCH, not DARCH, so that line +# is a silent no-op (a years-old typo that fortuitously keeps the build +# portable). See contrib/guix/README.md > "RandomX" for the longer story. +export RUSTFLAGS="--remap-path-prefix=$src_dir=/cuprate -C codegen-units=1" +export CFLAGS="-ffile-prefix-map=$src_dir=/cuprate" + +# Workaround: Guix gcc-15.2 libstdc++ ships with both _GLIBCXX_HAVE_FENV_H +# and _GLIBCXX_USE_C99_FENV undefined in bits/c++config.h, so never +# pulls in glibc's and `fesetround` is absent in the global +# namespace - even though the underlying glibc 2.41 obviously has it. Define +# both macros so any C++ caller of (RandomX, plus future C++ deps) +# compiles cleanly. +# +# This block is REMOVABLE once Guix's libstdc++ packaging is fixed upstream. +# To check: build with GUIX_SKIP_FENV_WORKAROUND=1 and see if RandomX still +# compiles. If yes, this whole `if` block can be deleted. +fenv_workaround="" +if [[ "${GUIX_SKIP_FENV_WORKAROUND:-0}" != "1" ]]; then + fenv_workaround=" -D_GLIBCXX_HAVE_FENV_H=1 -D_GLIBCXX_USE_C99_FENV=1" +fi +export CXXFLAGS="-ffile-prefix-map=$src_dir=/cuprate${fenv_workaround}" +# Guix's gcc-toolchain profile only provides `gcc`/`g++`, not the legacy `cc` +# alias; cc-rs (used by -sys crates such as libsqlite3-sys, openssl-sys, +# randomx-rs, ring, etc.) defaults to `cc` and fails with +# ToolNotFound: failed to find tool "cc": No such file or directory +# Pointing CC/CXX/AR/AS at the actual binaries fixes every -sys crate. +export CC=gcc +export CXX=g++ +export AR=ar +export AS=as +export LD=ld +export RANLIB=ranlib +export STRIP=strip +# Force openssl-sys to use the system openssl provided by the manifest via +# pkg-config rather than building it from source via openssl-src (which would +# also need `make`, but is wasteful when we already ship openssl). +export OPENSSL_NO_VENDOR=1 +# Some -sys crates use $GUIX_ENVIRONMENT to find headers/libs when pkg-config +# is not available; provide explicit hints. +if [[ -n "${GUIX_ENVIRONMENT:-}" ]]; then + export OPENSSL_DIR="$GUIX_ENVIRONMENT" + export PKG_CONFIG_PATH="$GUIX_ENVIRONMENT/lib/pkgconfig:${PKG_CONFIG_PATH:-}" +fi + +rust_target="${GUIX_RUST_TARGET:-x86_64-unknown-linux-gnu}" + +out_dir="${GUIX_OUT_DIR:-$repo_root/contrib/guix/out}" +mkdir -p "$out_dir" + +# Stream verbose-makefile + verbose-build output to a file so smoke tests +# can mechanically grep for -march=native / -mcpu=native / target-cpu=native +# regressions (RandomX or any future cc-rs crate could re-introduce them). +build_log="$out_dir/build-${rust_target}.log" +{ + cargo build --frozen --release --package cuprated --target "$rust_target" --verbose +} 2>&1 | tee "$build_log" + +bash "$repo_root/contrib/guix/libexec/package.sh" "$version" "$rust_target" "$SOURCE_DATE_EPOCH" "$out_dir" "$src_dir" + +binary="$src_dir/target/${rust_target}/release/cuprated" +# ldd output is host/loader-dependent and varies by system; keep it as a +# diagnostic file only - it MUST NOT be included in SHA256SUMS or +# attestation. guix-checksums excludes it explicitly. +if command -v ldd >/dev/null 2>&1; then + ldd "$binary" > "$out_dir/ldd-${rust_target}.diag.txt" 2>&1 || true +fi + +rustc --version --verbose > "$out_dir/rustc-version.txt" +cargo --version --verbose > "$out_dir/cargo-version.txt" + +cat > "$out_dir/build-metadata.json" < "$out_dir/$archive" +) + +( + cd "$out_dir" + sha256sum "$archive" > "$archive.SHA256SUM" +) diff --git a/contrib/guix/manifest.scm b/contrib/guix/manifest.scm new file mode 100644 index 000000000..a1e61c375 --- /dev/null +++ b/contrib/guix/manifest.scm @@ -0,0 +1,20 @@ +(specifications->manifest + (list + "bash" + "coreutils" + "git" + "gcc-toolchain" + "cmake" + "make" + "pkg-config" + "openssl" + "perl" + "python" + "rust" + "rust:cargo" + "gzip" + "tar" + "findutils" + "diffutils" + "gawk" + "nss-certs")) diff --git a/contrib/guix/mk-distsrc b/contrib/guix/mk-distsrc new file mode 100755 index 000000000..29763c95a --- /dev/null +++ b/contrib/guix/mk-distsrc @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +set -euo pipefail + +repo_root="$(git rev-parse --show-toplevel)" +cd "$repo_root" +export LC_ALL=C +export TZ=UTC + +# Refresh the stat cache first: guix shell --container can serve files with +# stat metadata that differs from what was indexed on the host, which makes +# `git diff-index --quiet` report dirty without an actual content diff. +git update-index -q --refresh >/dev/null 2>&1 || true +if ! git diff-index --quiet HEAD --; then + echo "working tree is dirty; commit changes before creating distsrc" >&2 + echo "--- git status --porcelain ---" >&2 + git status --porcelain >&2 + echo "--- git diff-index --name-only HEAD ---" >&2 + git diff-index --name-only HEAD -- >&2 + exit 1 +fi + +out_dir="${GUIX_OUT_DIR:-$repo_root/contrib/guix/out}" +mkdir -p "$out_dir" + +# Per-run work dir so two concurrent invocations don't trample each other's +# extracted source / cargo vendor output. +work_dir="$(mktemp -d "${TMPDIR:-/tmp}/guix-distsrc.XXXXXX")" +cleanup_work_dir() { + local code=$? + if [[ $code -ne 0 ]]; then + echo "mk-distsrc failed; preserving work dir for inspection: $work_dir" >&2 + else + rm -rf "$work_dir" + fi +} +trap cleanup_work_dir EXIT + +mkdir -p "$work_dir/src" +export CARGO_HOME="$work_dir/cargo-home" +mkdir -p "$CARGO_HOME" + +version="$({ cargo metadata --locked --format-version=1 --no-deps | python3 -c 'import json,sys; print(next(p["version"] for p in json.load(sys.stdin)["packages"] if p["name"]=="cuprated"))'; })" +commit="$(git rev-parse HEAD)" +short_commit="$(git rev-parse --short=12 HEAD)" +source_date_epoch="$(git log -1 --pretty=%ct)" +archive="cuprate-${version}-${short_commit}-src.tar.gz" + +if [[ -n "${GUIX_ENVIRONMENT:-}" ]]; then + cert_file="$GUIX_ENVIRONMENT/etc/ssl/certs/ca-certificates.crt" + cert_dir="$GUIX_ENVIRONMENT/etc/ssl/certs" + [[ -f "$cert_file" ]] && export SSL_CERT_FILE="$cert_file" GIT_SSL_CAINFO="$cert_file" CURL_CA_BUNDLE="$cert_file" + [[ -d "$cert_dir" ]] && export SSL_CERT_DIR="$cert_dir" +fi + +# --no-same-owner: inside guix shell --container we run as a mapped user +# without privilege to chown to uid 0/gid 0; without this flag, tar fails +# with "Cannot change ownership ... Invalid argument" on every entry. +# --no-same-permissions: similarly avoid setting setuid/setgid bits. +git archive --format=tar HEAD | tar -xf - --no-same-owner --no-same-permissions -C "$work_dir/src" +( + cd "$work_dir/src" + mkdir -p .cargo + cargo vendor --locked --versioned-dirs vendor > .cargo/config.toml + # No post-vendor patching needed. The bundled randomx-rs has a years-old + # build.rs typo (`.define("DARCH", ...)` instead of `ARCH`) that means + # RandomX's CMake never sees the override and falls through to + # ARCH=default, which is exactly the portable, compiler-deterministic + # build we want. The libstdc++ /fesetround workaround is handled + # in libexec/build.sh via CXXFLAGS, not by mutating vendored sources. + cat > .cuprate-distsrc.json < "$out_dir/$archive" +) +(cd "$out_dir" && sha256sum "$archive" > "$archive.SHA256SUM") +printf '%s\n' "$out_dir/$archive" diff --git a/contrib/guix/smoke-reproducible.sh b/contrib/guix/smoke-reproducible.sh new file mode 100755 index 000000000..d87edb1dd --- /dev/null +++ b/contrib/guix/smoke-reproducible.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +set -euo pipefail + +# End-to-end reproducibility self-check. +# +# Builds cuprated twice in two independent checkouts (different paths, +# different temp dirs) and asserts that ALL determinism-sensitive outputs +# match - not just the final tarball. Specifically: +# +# - cuprate---src.tar.gz (deterministic source archive) +# - cuprated--.tar.gz (release artifact) +# - build-metadata.json (records SOURCE_DATE_EPOCH, distsrc hash) +# - rustc-version.txt (proves manifest pinned the same rustc) +# - cargo-version.txt (proves manifest pinned the same cargo) +# - guix-describe.json (proves time-machine pinned the same +# channel instance) +# +# Also greps the build log for `-march=native`, `-mcpu=native`, and +# `target-cpu=native` and FAILS if any of them appear - protection against a +# future cc-rs / rustc / CMake change quietly re-introducing host-CPU codegen. + +repo_root="$(git rev-parse --show-toplevel)" +tmp="$(mktemp -d "${TMPDIR:-/tmp}/cuprate-smoke.XXXXXX")" + +# Preserve the working tree on failure so the user can dig through logs and +# intermediate artifacts. On success, clean up. +on_exit() { + local code=$? + if [[ $code -ne 0 ]]; then + echo "smoke FAILED; preserving working dir for inspection: $tmp" >&2 + else + rm -rf "$tmp" + fi +} +trap on_exit EXIT + +run_once() { + local src="$1" + local commit + commit="$(git -C "$repo_root" rev-parse HEAD)" + + git clone --quiet --no-local "$repo_root" "$src" >&2 + git -C "$src" checkout --quiet --detach "$commit" >&2 + + ( + cd "$src" + local distsrc artifact + distsrc="$(./contrib/guix/guix-mk-distsrc x86_64-linux 2>"$src/mk-distsrc.log" | tail -n1)" + ./contrib/guix/guix-build \ + --guix-system x86_64-linux \ + --target x86_64-unknown-linux-gnu \ + --package cuprated \ + --distsrc "$distsrc" >"$src/build.log" 2>&1 + + artifact="$({ find contrib/guix/out -maxdepth 1 -type f -name 'cuprated-*-x86_64-unknown-linux-gnu.tar.gz' | LC_ALL=C sort | tail -n1; })" + sha256sum "$artifact" | awk '{print $1}' + ) +} + +# Mechanical regression guard: native-arch flags should never appear in the +# build log. Catches both the obvious (-march=native, -mcpu=native) and the +# rustc form (target-cpu=native) that any future cc-rs crate or rustc config +# could re-introduce. +assert_no_native_flags() { + local src="$1" + local pat='-march=native|-mcpu=native|target-cpu=native' + if grep -E "$pat" "$src/contrib/guix/out/build-x86_64-unknown-linux-gnu.log" >/dev/null 2>&1; then + echo "FAIL: host-CPU-native build flag detected in $src build log:" >&2 + grep -nE "$pat" "$src/contrib/guix/out/build-x86_64-unknown-linux-gnu.log" >&2 | head -5 + return 1 + fi +} + +h1="$(run_once "$tmp/a")" +h2="$(run_once "$tmp/b")" + +# Compare every reproducibility-sensitive output, not just the final binary. +compare() { + local rel="$1" + local a="$tmp/a/contrib/guix/out/$rel" + local b="$tmp/b/contrib/guix/out/$rel" + if [[ ! -f "$a" || ! -f "$b" ]]; then + echo "FAIL: missing comparison file '$rel' in one of the runs" >&2 + return 1 + fi + if ! cmp -s "$a" "$b"; then + echo "FAIL: '$rel' differs between runs:" >&2 + diff -u "$a" "$b" | head -50 >&2 || true + return 1 + fi +} + +distsrc_basename="$(basename "$(find "$tmp/a/contrib/guix/out" -maxdepth 1 -type f -name 'cuprate-*-src.tar.gz' | head -n1)")" +artifact_basename="cuprated-$(awk -F'\"' '/"version"/{print $4; exit}' "$tmp/a/contrib/guix/out/build-metadata.json")-x86_64-unknown-linux-gnu.tar.gz" + +compare "$distsrc_basename" +compare "$artifact_basename" +compare "build-metadata.json" +compare "rustc-version.txt" +compare "cargo-version.txt" +compare "guix-describe.json" + +assert_no_native_flags "$tmp/a" +assert_no_native_flags "$tmp/b" + +[[ "$h1" == "$h2" ]] || { echo "FAIL: final artifact sha mismatch ($h1 vs $h2)" >&2; exit 1; } +echo "reproducibility smoke test: PASS ($h1)" From 31623c97efb4afe79bf360018b3576eadc0cc0f1 Mon Sep 17 00:00:00 2001 From: Ahmed Kamal Date: Sun, 17 May 2026 19:34:12 +0300 Subject: [PATCH 2/5] ci: smoke job for the contrib/guix reproducible pipeline Runs ./contrib/guix/smoke-reproducible.sh on every PR that touches the pipeline scripts or workspace Cargo metadata, and on demand via workflow_dispatch. Trust-anchor pinning: - actions/checkout pinned by full commit SHA (de0fac2e... = v6.0.2), not by mutable tag - Guix binary tarball verified by BOTH: * pinned SHA256 (aa41025489c5061543e9c48873eaa829b900b2da75d40f9648913622f5f47817) * pinned signer fingerprint (A28BF40C3E551372662D14F741AAE7DCCA3D8351 - Efraim Flashner, Guix release signer, expires 2029-01-18) Both checks run BEFORE the tarball is extracted; a single compromised anchor cannot bootstrap a malicious daemon. - No third-party actions other than actions/checkout. Disk cleanup, Guix install, and verification are all inline so the workflow file IS the full supply-chain spec. The job also runs a top-level mechanical grep for native-arch flags (-march=native / -mcpu=native / target-cpu=native) across every build log, in addition to the same check inside the smoke script, so a regression surfaces directly in the GH job summary. Path filter is intentionally narrow (~25-35 min per run). For PRs that change crate source without touching the pipeline, trigger the smoke manually via workflow_dispatch. --- .github/workflows/guix-reproducibility.yml | 235 +++++++++++++++++++++ contrib/guix/smoke-reproducible.sh | 59 +++++- 2 files changed, 290 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/guix-reproducibility.yml diff --git a/.github/workflows/guix-reproducibility.yml b/.github/workflows/guix-reproducibility.yml new file mode 100644 index 000000000..01a86452c --- /dev/null +++ b/.github/workflows/guix-reproducibility.yml @@ -0,0 +1,235 @@ +name: Guix reproducible build smoke + +# Verify that the contrib/guix pipeline still produces byte-identical +# cuprated artifacts across two independent runs. Runs on: +# - every PR touching the pipeline itself or workspace Cargo +# metadata (catches a regression in the toolchain pin, scripts, +# or any vendored crate that affects determinism) +# - workflow_dispatch (manual; use this when changing crate source +# code without touching the pipeline) +# +# We do NOT run on every source-tree change. The smoke job takes +# ~25-35 min on a stock ubuntu-24.04 runner and we'd burn that budget +# on every PR otherwise; the mechanical native-flag grep below catches +# the most common regression class anyway. If a change touches a crate +# that ends up linking into cuprated and might affect determinism, +# trigger the workflow manually before merging. + +on: + pull_request: + paths: + - 'contrib/guix/**' + - 'Cargo.toml' + - 'Cargo.lock' + - '.github/workflows/guix-reproducibility.yml' + workflow_dispatch: + schedule: + # Weekly smoke run on the default branch. The path-filtered PR + # trigger above intentionally skips source-only PRs (to keep CI + # cost bounded); this catches drift those PRs would introduce. + - cron: '0 7 * * 1' # Mondays 07:00 UTC + +env: + # Guix binary tarball: pin BOTH the SHA256 of the bytes and the GPG + # signer fingerprint. The SHA256 is the primary trust anchor (a + # mismatched tarball fails the workflow before any key import). The + # signature verification is defense in depth and forces a maintainer + # to update both pins together when bumping Guix versions. + GUIX_VER: '1.5.0' + GUIX_ARCH: 'x86_64-linux' + GUIX_TARBALL_SHA256: 'aa41025489c5061543e9c48873eaa829b900b2da75d40f9648913622f5f47817' + GUIX_SIGNER_FPR: 'A28BF40C3E551372662D14F741AAE7DCCA3D8351' # Efraim Flashner, Guix release signer (expires 2029-01-18) + +jobs: + smoke: + name: smoke-reproducible.sh + runs-on: ubuntu-24.04 + timeout-minutes: 90 + permissions: + contents: read + steps: + # Pinned by full commit SHA (NOT tag) - tags are mutable, commit SHAs + # are not. v6.0.2 -> de0fac2e4500dabe0009e67214ff5f5447ce83dd + # Bump together with any deliberate action update. + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + # The build needs ~20 GiB of writable disk for Guix substitutes + # plus two cargo build trees. The default runner ships with only + # ~14 GiB free; reclaim the rest by removing pre-installed + # toolchains we don't use. Inline to avoid a third-party action. + - name: Reclaim runner disk + run: | + set -euxo pipefail + df -h / + sudo rm -rf \ + /usr/share/dotnet \ + /opt/ghc \ + /usr/local/lib/android \ + /usr/local/share/boost \ + /opt/hostedtoolcache/CodeQL \ + /opt/hostedtoolcache/Java_* \ + /opt/hostedtoolcache/Ruby \ + /opt/hostedtoolcache/PyPy \ + /opt/hostedtoolcache/go \ + /opt/hostedtoolcache/node \ + "$AGENT_TOOLSDIRECTORY" || true + sudo docker system prune -af || true + df -h / + + - name: Install Guix + run: | + set -euxo pipefail + export DEBIAN_FRONTEND=noninteractive + sudo apt-get update -qq + sudo apt-get install -y --no-install-recommends \ + xz-utils gpg gpg-agent ca-certificates curl jq + + tarball="guix-binary-${GUIX_VER}.${GUIX_ARCH}.tar.xz" + base="https://ftp.gnu.org/gnu/guix" + + # 1. Fetch tarball + detached sig. + curl -fsSL "$base/$tarball" -o "/tmp/$tarball" + curl -fsSL "$base/$tarball.sig" -o "/tmp/$tarball.sig" + + # 2. SHA256 check first - this is the primary trust anchor and + # runs before any key import. + echo "$GUIX_TARBALL_SHA256 /tmp/$tarball" > /tmp/expected.sha256 + sha256sum -c /tmp/expected.sha256 + + # 3. GPG verification against the pinned signer fingerprint. + # + # Public keyservers flake on GitHub runners (we saw an empty + # response from the dirmngr default within 70ms in a previous + # run). Try a list of well-known keyservers in order, and do + # the FULL verification (--status-fd + VALIDSIG check) inside + # each iteration with a per-keyserver scratch homedir. This + # way, a malformed-but-importable key from server A doesn't + # poison the homedir and block trying server B - we only + # commit the homedir on a successful pinned VALIDSIG. + verified=0 + for ks in \ + hkps://keyserver.ubuntu.com \ + hkps://keys.openpgp.org \ + hkps://pgp.mit.edu; do + echo "Trying keyserver: $ks" + try_home="$(mktemp -d /tmp/gpg-try.XXXXXX)" + chmod 0700 "$try_home" + if gpg --homedir "$try_home" --batch --no-tty --quiet \ + --keyserver "$ks" --recv-keys "$GUIX_SIGNER_FPR" \ + >/dev/null 2>&1 \ + && gpg --homedir "$try_home" --batch --no-tty --status-fd 1 --verify \ + "/tmp/$tarball.sig" "/tmp/$tarball" 2>/dev/null \ + | tee "$try_home/status" >/dev/null \ + && grep -q "^\[GNUPG:\] VALIDSIG $GUIX_SIGNER_FPR " "$try_home/status"; then + # Commit this homedir as the trusted one. + rm -rf /tmp/gpghome + mv "$try_home" /tmp/gpghome + echo "Verified Guix tarball signature with $GUIX_SIGNER_FPR via $ks" + verified=1 + break + fi + rm -rf "$try_home" + done + if [[ "$verified" -ne 1 ]]; then + echo "FATAL: no keyserver yielded a key that verifies the Guix tarball signature against pinned fingerprint $GUIX_SIGNER_FPR" >&2 + exit 1 + fi + + # 4. Only now extract. + sudo tar --warning=no-timestamp -xJf "/tmp/$tarball" -C / + + # Create build users (Guix daemon needs an isolated UID pool). + sudo groupadd --system guixbuild || true + for i in $(seq -w 1 10); do + id "guixbuilder$i" >/dev/null 2>&1 || sudo useradd \ + -g guixbuild -G guixbuild -d /var/empty -s /usr/sbin/nologin \ + -c "Guix build user $i" --system "guixbuilder$i" + done + + # Profile symlink for root + sudo mkdir -p /root/.config/guix + sudo ln -sf /var/guix/profiles/per-user/root/current-guix /root/.config/guix/current + + # Authorize substitute keys (the daemon will only accept + # substitutes signed by these). These come out of the + # verified tarball we extracted above. No `|| true`: a + # failure here would leave the daemon with no authorized + # keys, which is a real problem worth surfacing loudly. + GUIX_BIN=/var/guix/profiles/per-user/root/current-guix/bin + key_count=0 + for key in /var/guix/profiles/per-user/root/current-guix/share/guix/*.pub; do + [[ -f "$key" ]] || continue + # `cat | sudo` rather than `sudo ... < "$key"`: the input + # redirect is opened by the unprivileged shell, not by sudo, + # so this is the shellcheck-clean way to feed the key to the + # privileged process (also robust if the .pub ever stops + # being world-readable to non-root). + cat "$key" | sudo "$GUIX_BIN/guix" archive --authorize + key_count=$((key_count + 1)) + done + if [[ "$key_count" -eq 0 ]]; then + echo "FATAL: no substitute keys authorized from verified tarball" >&2 + exit 1 + fi + echo "Authorized $key_count substitute key(s) from verified tarball" + + # Start the daemon. The `> /tmp/guix-daemon.log 2>&1` redirect + # has to happen INSIDE the sudo shell - otherwise the redirect + # is opened by the unprivileged shell (SC2024). The + # `setsid /tmp/guix-daemon.log 2>&1" > "$GITHUB_PATH" + + - name: Run smoke-reproducible.sh + run: | + set -euxo pipefail + sudo -E PATH="$PATH" ./contrib/guix/smoke-reproducible.sh + + # Mechanical regression guard. smoke-reproducible.sh already does + # this check itself; running it again at the workflow level makes + # the regression render directly in the job summary. The smoke + # script exports its per-run build logs to contrib/guix/smoke-logs/ + # before its cleanup runs (and `chmod a+rX` them so this non-sudo + # step can read them even when the smoke script ran under sudo). + - name: Assert no host-CPU-native flags in build logs + if: always() + run: | + set -uo pipefail + shopt -s globstar nullglob + pat='-march=native|-mcpu=native|target-cpu=native' + # Positive self-test: a known-bad line MUST trip the grep. Without + # this, a future regression in `pat` or the grep invocation could + # silently turn the guard into a no-op (the regex starts with + # `-m`, which grep parses as an option unless `--` or `-e` is + # used - exactly the trap this self-test catches). + selftest="$(mktemp)" + printf '%s\n' 'cc -march=native foo.c' > "$selftest" + if ! grep -nE -- "$pat" "$selftest" >/dev/null 2>&1; then + echo "::error::native-flag guard self-test failed; regex is broken" >&2 + rm -f "$selftest" + exit 2 + fi + rm -f "$selftest" + logs=( contrib/guix/smoke-logs/build-*.log ) + if [[ "${#logs[@]}" -eq 0 ]]; then + echo "::warning::no exported smoke logs found at contrib/guix/smoke-logs/build-*.log; native-flag scan not run" + exit 0 + fi + found=0 + for f in "${logs[@]}"; do + if grep -nE -- "$pat" "$f" >/dev/null 2>&1; then + echo "::error file=$f::native-arch flag detected" + grep -nE -- "$pat" "$f" | head -5 + found=1 + fi + done + exit "$found" + + - name: Surface daemon log on failure + if: failure() + run: tail -200 /tmp/guix-daemon.log || true diff --git a/contrib/guix/smoke-reproducible.sh b/contrib/guix/smoke-reproducible.sh index d87edb1dd..0b257ee5e 100755 --- a/contrib/guix/smoke-reproducible.sh +++ b/contrib/guix/smoke-reproducible.sh @@ -22,10 +22,32 @@ set -euo pipefail repo_root="$(git rev-parse --show-toplevel)" tmp="$(mktemp -d "${TMPDIR:-/tmp}/cuprate-smoke.XXXXXX")" +# Where to drop the per-run build logs so a later CI step (or a curious +# operator) can scan them. The smoke script is typically invoked under +# `sudo` in CI, which makes $tmp root-owned mode 0700 - so the later +# non-sudo workflow step that greps for native-arch flags can't read +# anything in /tmp/cuprate-smoke.*. Exporting the logs to a workspace- +# relative path and chmod a+rX fixes that for both success and failure +# without changing the failure-preserves-tmp behaviour. +LOG_EXPORT_DIR="${LOG_EXPORT_DIR:-$repo_root/contrib/guix/smoke-logs}" + # Preserve the working tree on failure so the user can dig through logs and -# intermediate artifacts. On success, clean up. +# intermediate artifacts. On success, clean up. Always export the build +# logs first so the assert step (in this script and at the workflow level) +# has something to scan even on success. on_exit() { local code=$? + if [[ -d "$tmp" ]]; then + mkdir -p "$LOG_EXPORT_DIR" + local sub + for sub in a b; do + local src_log="$tmp/$sub/contrib/guix/out/build-x86_64-unknown-linux-gnu.log" + if [[ -f "$src_log" ]]; then + cp "$src_log" "$LOG_EXPORT_DIR/build-$sub.log" 2>/dev/null || true + fi + done + chmod -R a+rX "$LOG_EXPORT_DIR" 2>/dev/null || true + fi if [[ $code -ne 0 ]]; then echo "smoke FAILED; preserving working dir for inspection: $tmp" >&2 else @@ -61,16 +83,45 @@ run_once() { # build log. Catches both the obvious (-march=native, -mcpu=native) and the # rustc form (target-cpu=native) that any future cc-rs crate or rustc config # could re-introduce. +# +# NOTE: the `--` before "$pat" is load-bearing. The regex begins with `-m`, +# which grep otherwise parses as the `-m` (max-count) option, resulting in +# `grep: invalid max count` and a non-zero exit that the `if` block +# silently treats as "not found" - i.e. the guard becomes fail-open. +NATIVE_FLAG_REGEX='-march=native|-mcpu=native|target-cpu=native' assert_no_native_flags() { local src="$1" - local pat='-march=native|-mcpu=native|target-cpu=native' - if grep -E "$pat" "$src/contrib/guix/out/build-x86_64-unknown-linux-gnu.log" >/dev/null 2>&1; then + local log="$src/contrib/guix/out/build-x86_64-unknown-linux-gnu.log" + if [[ ! -s "$log" ]]; then + echo "FAIL: build log is missing or empty: $log" >&2 + return 1 + fi + if grep -E -- "$NATIVE_FLAG_REGEX" "$log" >/dev/null 2>&1; then echo "FAIL: host-CPU-native build flag detected in $src build log:" >&2 - grep -nE "$pat" "$src/contrib/guix/out/build-x86_64-unknown-linux-gnu.log" >&2 | head -5 + # `head -5 >&2`: redirect head's output to stderr, not grep's + # (otherwise head reads from an empty pipe and prints nothing). + grep -nE -- "$NATIVE_FLAG_REGEX" "$log" | head -5 >&2 return 1 fi } +# Positive self-test for the regex/guard: a known-bad line MUST trip the +# grep. If this ever stops firing, the regression guard is broken (the +# leading `-m` parses as an option without `--`/`-e`). +native_flag_selftest() { + local tmpfile + tmpfile="$(mktemp "${TMPDIR:-/tmp}/native-flag-selftest.XXXXXX")" + printf '%s\n' 'cc -march=native foo.c' > "$tmpfile" + if ! grep -E -- "$NATIVE_FLAG_REGEX" "$tmpfile" >/dev/null 2>&1; then + echo "FAIL: native-flag guard self-test failed; regex is broken" >&2 + rm -f "$tmpfile" + return 1 + fi + rm -f "$tmpfile" +} + +native_flag_selftest + h1="$(run_once "$tmp/a")" h2="$(run_once "$tmp/b")" From 260bc19ab12cf7ddb10da66d3729b923bdf85fb6 Mon Sep 17 00:00:00 2001 From: Ahmed Kamal Date: Sun, 17 May 2026 19:34:12 +0300 Subject: [PATCH 3/5] contrib/guix: README + threat model Operator-facing documentation for the Guix reproducible build flow. Notable sections: Scope x86_64-unknown-linux-gnu only; aarch64 / macOS on the roadmap. RandomX Documents the years-old `cmake::Config::define("DARCH", "native")` typo in Cuprate/randomx-rs's build.rs. cmake reads ARCH (not DARCH), so the line is a silent no-op and the actual build uses CMake's ARCH=default - which produces compiler-capability-gated -maes/-mssse3/-mavx2 (all host-CPU-independent under a pinned toolchain). Filing a typo fix upstream would unblock the `RANDOMX_ARCH=native` env-var path for miners; until then, the smoke job's -march=native grep is the regression guard. Distsrc content equivalence Explains the libexec/build.sh check that compares the extracted distsrc source tree against git archive of the claimed commit, excluding only mk-distsrc-added paths (vendor/, .cargo/, .cuprate-distsrc.json). A tampered distsrc that lies about its git_commit fails this check before the build starts. Threat model Spells out the trust roots (Guix substitute keys, Guix binary tarball, channel commit+introduction, release signing key, git tree integrity) and what the pipeline protects against vs not. Substitute trust is described in Guix-precise terms (signed by authorized substitute keys), not the looser "content-addressed and signed". Known workarounds Documents the CXXFLAGS _GLIBCXX_HAVE_FENV_H workaround for Guix's gcc-15.2 libstdc++, with a clear `GUIX_SKIP_FENV_WORKAROUND=1` knob to test whether it's still needed. --- contrib/guix/README.md | 258 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 contrib/guix/README.md diff --git a/contrib/guix/README.md b/contrib/guix/README.md new file mode 100644 index 000000000..96ee41afb --- /dev/null +++ b/contrib/guix/README.md @@ -0,0 +1,258 @@ +# Guix reproducible build flow (cuprated) + +This directory provides a Guix-first reproducible release pipeline for +`cuprated` on Linux. The goal is that anyone with the right toolchain +can rebuild the published `cuprated--x86_64-unknown-linux-gnu.tar.gz` +from this repository and get **byte-identical** output, then verify the +release's SHA256 against the upstream publication. + +## Scope + +| | | +|---|---| +| Supported target today | `x86_64-unknown-linux-gnu` | +| Supported Guix system | `x86_64-linux` | +| Other architectures | not yet — see [Roadmap](#roadmap) | + +This is a node binary, not a miner. RandomX is built in CMake's +`ARCH=default` mode (compiler-capability-gated `-maes -mssse3 -mavx2`, +no `-march=native`), so the binary is identical across x86_64 CPUs +under the same toolchain. Block-verification uses RandomX's "light" +mode, which doesn't depend on CPU-specific codegen for correctness; +miners using `randomx-rs` directly should override via the +`RANDOMX_ARCH` env var to opt in to host-specific instructions. + +## Build flow + +1. Create a deterministic source archive (vendored Cargo deps, fixed + mtimes/uid/gid/path-prefix), inside a hermetic Guix shell: + + ```bash + ./contrib/guix/guix-mk-distsrc x86_64-linux + ``` + +2. Build `cuprated` from the source archive using `guix time-machine` + pinned to the channel in `channels.scm`: + + ```bash + ./contrib/guix/guix-build \ + --guix-system x86_64-linux \ + --target x86_64-unknown-linux-gnu \ + --package cuprated \ + --distsrc contrib/guix/out/cuprate---src.tar.gz + ``` + +3. Aggregate checksums + GPG-signed JSON attestation: + + ```bash + ./contrib/guix/guix-checksums contrib/guix/out + ./contrib/guix/guix-attest contrib/guix/out + ``` + +4. Sidecar SHA256 integrity check against a published `.SHA256SUM`: + + ```bash + ./contrib/guix/guix-verify contrib/guix/out/cuprated--x86_64-unknown-linux-gnu.tar.gz + ``` + +5. End-to-end reproducibility self-check (builds twice and compares + every determinism-sensitive output): + + ```bash + ./contrib/guix/smoke-reproducible.sh + ``` + +### Concurrency + +Each script uses per-run `mktemp` working directories, so concurrent +invocations cannot trample each other's *intermediate* state. **Final +outputs share fixed names by default** (e.g. `cuprated--.tar.gz`, +`build-metadata.json`, `build-.log`), so two concurrent runs +writing into the same `contrib/guix/out` would overwrite each other's +results. Use `--out-dir ` (on `guix-mk-distsrc` and `guix-build`) +to give each parallel run its own output directory; the path is +required to live inside the repository so the Guix container can see +it. `smoke-reproducible.sh` sidesteps this entirely by performing each +run in its own `git clone` of the working tree. + +## Output files + +`contrib/guix/out/` after a full run: + +- `cuprate---src.tar.gz` (+ `.SHA256SUM`) — deterministic source +- `cuprated--.tar.gz` (+ `.SHA256SUM`) — release artifact +- `SHA256SUMS` — aggregate sum over the two tarballs only +- `build-metadata.json` — package, version, target, `SOURCE_DATE_EPOCH`, + git commit, distsrc sha256, RandomX arch mode +- `guix-describe.json` — channel + commit metadata for the Guix instance + used to build (captured on the *outer* host before entering the + container, since the container doesn't ship `guix` itself) +- `rustc-version.txt`, `cargo-version.txt` — toolchain proof +- `build-.log` — full verbose build log (used by the + smoke test's native-flag regression guard) +- `ldd-.diag.txt` — **diagnostic only**; host-loader output + varies by system and is intentionally excluded from `SHA256SUMS` and + the signed attestation + +## Determinism inputs + +Everything that goes into a build is pinned at one of these layers: + +| Layer | Pin | Where | +|---|---|---| +| Guix instance | commit sha + channel introduction | `channels.scm` | +| Build profile (rust, gcc-toolchain, cmake, make, openssl, …) | by Guix package name; transitively pinned via Guix commit | `manifest.scm` | +| Rust source tree | git commit, verified at build time against `git archive` of the same commit (`libexec/build.sh`'s distsrc-equivalence check) | `.cuprate-distsrc.json` | +| Rust deps | `Cargo.lock`, then `cargo vendor --locked --versioned-dirs` | `Cargo.lock`, `vendor/` | +| C/C++ build flags | `--remap-path-prefix`, `-ffile-prefix-map`, `-C codegen-units=1` | `libexec/build.sh` | +| Time | `SOURCE_DATE_EPOCH` = git-commit time | `libexec/build.sh` | +| Tar metadata | `--sort=name --mtime=@EPOCH --owner=0 --group=0 --numeric-owner --mode='a=rX,u+w'` | `mk-distsrc`, `libexec/package.sh` | +| Gzip metadata | `gzip -n` (no name, no timestamp) | same | + +## Distsrc content equivalence + +`libexec/build.sh` does **not** trust `.cuprate-distsrc.json`'s +`git_commit` field on its own. After extracting the distsrc tarball, +it runs `git archive` on that commit from the outer checkout. The +three paths `mk-distsrc` legitimately adds (`vendor/`, `.cargo/`, +`.cuprate-distsrc.json`) are moved aside to a temporary directory +under `$build_root`, then `diff -rq` runs over the rest of the tree +with no excludes — so a nested file or directory that happens to +share one of those basenames (a hypothetical future `tests/vendor/`, +for example) is still compared. After the diff succeeds, the three +paths are restored so the build can use them; on diff failure, the +build root is preserved with the paths restored so the operator can +inspect the as-extracted distsrc. A distsrc that claims a git commit +but contains modified sources fails this check before the build +starts. + +## Threat model + +**Trust roots** (compromise of any of these defeats reproducibility +verification): + +1. **The Guix substitute key set authorised on the building host.** + Guix accepts substitutes only when they validate under one of the + keys passed to `guix archive --authorize`. The CI workflow loads + those keys from the verified Guix binary tarball (see #2 below); + any host with a substitute key controls what binary artifacts the + Guix daemon will accept as cached toolchain pieces. +2. **The Guix binary bootstrap tarball.** The CI workflow downloads + `guix-binary-..tar.xz` from `ftp.gnu.org`, then verifies + it against both a pinned SHA256 *and* a pinned OpenPGP fingerprint + (`A28BF40C…3D8351`, Efraim Flashner). Compromise of *both* trust + anchors at the same time would let a malicious tarball through. +3. **The Guix channel commit + introduction.** `channels.scm` pins the + channel by commit sha and supplies the canonical + `make-channel-introduction` for the official Guix channel. `guix + time-machine` authenticates the channel head against that + introduction before evaluating anything. +4. **The release signing key + publication channel.** Reproducibility + proves *what got shipped matches the source*; it does not prove + that the shipped binary is the one users should run. Users still + need to verify the published `cuprated-*.tar.gz.SHA256SUM` (or the + signed JSON attestation under `contrib/guix/sigs/`) against + maintainer-controlled distribution. +5. **The git tree itself.** A malicious commit landing on `main` + yields a malicious-but-reproducibly-built binary. Reproducibility + shifts the trust requirement onto code review, not away from it. + +**This pipeline protects against:** + +- A future toolchain regression silently changing artifact contents + (Guix pin + lockfile + vendor make any drift a load-bearing diff). +- A maintainer (or attacker with access to one) shipping a binary + that doesn't correspond to the published source. +- Supply-chain bit-rot in transitive Cargo deps — the lockfile + + vendor freezes everything; new upstream versions only enter via a + reviewable commit. +- A tampered distsrc tarball that claims a git commit but contains + modified sources (see [Distsrc content + equivalence](#distsrc-content-equivalence)). + +**This pipeline does NOT protect against:** + +- Compromise of any of the trust roots listed above. +- Hardware-level attacks on the build host (compromised microcode, + flashed firmware, etc.). +- A bug in `cuprated` itself. Reproducibility is about *what got + shipped*, not whether what got shipped is correct. + +## RandomX + +This pipeline depends on `randomx-rs` building RandomX in a +host-CPU-independent mode. As of `Cuprate/randomx-rs@567bdca`, this +happens **by accident** — a useful one to be aware of: + +- `randomx-rs/build.rs` calls `cmake::Config::new(…).define("DARCH", + "native")`, which emits `-DDARCH=native` to CMake. +- Upstream [`tevador/RandomX`](https://github.com/tevador/RandomX)'s + `CMakeLists.txt` has **zero** references to `DARCH`. It reads + `ARCH`, which defaults to `"default"` when unset. +- So the `.define("DARCH", …)` line is a years-old silent typo. The + actual ARCH value that takes effect is the CMake default, + `"default"`, regardless of what `DARCH` is set to. +- `ARCH="default"` produces a build with `-maes -mssse3 -mavx2` + flags, but each of those is gated on a *compiler-capability* check + (`check_c_compiler_flag`), not on the build host's CPU. With the + Guix toolchain pinned, all three flags resolve identically across + hosts, so the produced object code is identical. + +If `randomx-rs` ever corrects the typo (legitimately, since the line +is meaningless as written), the *named-by-intent* behaviour +(`-march=native`) would kick in and the build would silently stop +being reproducible across CPUs. The CI smoke job grep-fails on +`-march=native` / `-mcpu=native` / `target-cpu=native` to catch that +regression before merge. + +Miners using `randomx-rs` directly and wanting host-specific +performance can set `RANDOMX_ARCH=native` in their environment — but +that has no effect today because of the same typo. Filing a one-line +fix upstream at `Cuprate/randomx-rs` would unblock both that and our +regression guard simultaneously. + +## Known workarounds (remove when upstream fixes land) + +- **`_GLIBCXX_HAVE_FENV_H` / `_GLIBCXX_USE_C99_FENV` defined in + `CXXFLAGS`** (`libexec/build.sh`). Guix's gcc-15.2 libstdc++ ships + with these undefined in `bits/c++config.h`, so `` doesn't + pull in `` and `fesetround` is absent from the global + namespace. Any C++ caller of `` (RandomX, + `src/instructions_portable.cpp`) then fails to link. Setting both + macros restores the expected `` behaviour. To check whether + this is still needed, run with `GUIX_SKIP_FENV_WORKAROUND=1` and + see if RandomX still compiles. + +- **`OPENSSL_NO_VENDOR=1`** to force `openssl-sys` to link the audited + openssl from `manifest.scm` instead of recompiling its bundled + `openssl-src` from source. This is a behavioural improvement, not a + workaround for a bug; it's recorded here as a knob. + +## CI + +`.github/workflows/guix-reproducibility.yml` runs `smoke-reproducible.sh` +on every PR that touches `contrib/guix/**`, `Cargo.toml`, `Cargo.lock`, +or the workflow itself. It can also be triggered manually via +`workflow_dispatch` for PRs that change crate source without touching +the pipeline, and runs weekly on the default branch via `schedule` to +catch drift those source-only PRs would otherwise miss. The workflow: + +- pins `actions/checkout` by commit sha (not tag) +- pins the Guix binary tarball by SHA256 AND by GPG signer + fingerprint, and fails before extracting if either check fails +- runs the smoke script, which itself fails on any divergence in + distsrc / artifact / metadata / toolchain versions / guix-describe + output, and on any `-march=native` / `-mcpu=native` / + `target-cpu=native` flag appearing in the build log + +## Roadmap + +- aarch64-linux target +- macOS (`x86_64-darwin`, `aarch64-darwin`) — needs an alternative + hermetic-build story; Guix doesn't run natively on macOS +- A signed-attestation flow compatible with sigstore / SLSA + provenance (the current `guix-attest` produces a stable, sorted-key + signed JSON attestation; SLSA-format export is a future step) +- Re-pinning `channels.scm` to a tagged Guix release once one ships + with rust ≥ 1.91 (v1.5.0 only carries 1.88; we currently pin to a + recent commit on master) From 69872d7cf0d6470417355dda150d0f0507428de2 Mon Sep 17 00:00:00 2001 From: kim0 Date: Mon, 18 May 2026 00:20:06 +0300 Subject: [PATCH 4/5] contrib/guix, ci: round-5 path-safety + smoke forensics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply review feedback from a second round of GPT-5.5-pro oracle review: - guix-attest, guix-checksums: reject `.`/`..` and non-basename path components for every metadata value used as a filename or directory name (version, rust_target, distsrc, sanitized identity). The existing `^[A-Za-z0-9._+-]+$` regex was correct against slashes but permitted `.` and `..` segments. Also pass `--` to `sha256sum` as belt-and-braces against filenames starting with `-`. - workflow: rewrite the `cat "$key" | sudo …` comment to accurately describe why the pipe form is preferred (SC2024 cleanliness; the cat itself runs unprivileged) instead of overclaiming robustness against non-world-readable keys. - smoke-reproducible.sh: turn the silent `cp`/`chmod` swallows in the EXIT trap into explicit warnings so a degraded workflow-level log scan is visible; export `build.log` and `mk-distsrc.log` per run alongside the cargo-verbose log. When `guix-build` exits 0 but no artifact appears, dump `ls -la $out_dir`, the last 100 lines of `build.log`, and the last 50 lines of `mk-distsrc.log` so the failure mode is no longer mute. - workflow: add an `actions/upload-artifact@v5.0.0` step (pinned by full SHA) that uploads `contrib/guix/smoke-logs/` on failure, so the next recurrence of the silent-no-artifact failure has the full forensic set instead of just the timestamped tail. --- .github/workflows/guix-reproducibility.yml | 28 ++++++++-- contrib/guix/guix-attest | 56 ++++++++++++++++--- contrib/guix/guix-checksums | 26 ++++++++- contrib/guix/smoke-reproducible.sh | 65 +++++++++++++++++++--- 4 files changed, 151 insertions(+), 24 deletions(-) diff --git a/.github/workflows/guix-reproducibility.yml b/.github/workflows/guix-reproducibility.yml index 01a86452c..81b613a1c 100644 --- a/.github/workflows/guix-reproducibility.yml +++ b/.github/workflows/guix-reproducibility.yml @@ -159,11 +159,13 @@ jobs: key_count=0 for key in /var/guix/profiles/per-user/root/current-guix/share/guix/*.pub; do [[ -f "$key" ]] || continue - # `cat | sudo` rather than `sudo ... < "$key"`: the input - # redirect is opened by the unprivileged shell, not by sudo, - # so this is the shellcheck-clean way to feed the key to the - # privileged process (also robust if the .pub ever stops - # being world-readable to non-root). + # `cat | sudo` rather than `sudo ... < "$key"`: the + # redirect form is SC2024 (the redirect is opened by the + # unprivileged shell, not by sudo). The `cat` here is also + # unprivileged - this is purely about shellcheck-clean + # piping, not about reading a root-only key. Today the .pub + # files in the extracted tarball are world-readable, so a + # plain unprivileged read works. cat "$key" | sudo "$GUIX_BIN/guix" archive --authorize key_count=$((key_count + 1)) done @@ -233,3 +235,19 @@ jobs: - name: Surface daemon log on failure if: failure() run: tail -200 /tmp/guix-daemon.log || true + + # On failure, upload every log smoke-reproducible.sh's EXIT trap + # exported (cargo-verbose + guix-build wrapper output + mk-distsrc + # stderr, per run a/b). Without this the runner is torn down and + # the only artifact left is the timestamped grep at the end of the + # job log, which is rarely enough to root-cause a silent failure + # like "guix-build returned 0 but no cuprated tarball appeared". + # Pinned by full SHA (v5.0.0 -> 330a01c490aca151604b8cf639adc76d48f6c5d4). + - name: Upload smoke logs on failure + if: failure() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: smoke-logs + path: contrib/guix/smoke-logs/ + if-no-files-found: warn + retention-days: 7 diff --git a/contrib/guix/guix-attest b/contrib/guix/guix-attest index 4b60c5473..907043d73 100755 --- a/contrib/guix/guix-attest +++ b/contrib/guix/guix-attest @@ -40,7 +40,32 @@ if [[ -z "$identity_raw" || -z "$version" ]]; then echo "usage: $0 " >&2 exit 1 fi -identity="$(printf '%s' "$identity_raw" | tr -c 'A-Za-z0-9_.@+-' '_')" +# Sanitize the raw builder identity to a path-safe form. The keep-set +# matches what `safe_path_component` (defined below) will accept, so a +# common email-shaped builder id like `kim0@example.org` becomes +# `kim0_example.org` rather than surviving sanitization and then being +# rejected by the validator. Anything outside the keep-set collapses to +# underscore. +identity="$(printf '%s' "$identity_raw" | tr -c 'A-Za-z0-9_.+-' '_')" + +# Path-component validator. Used on every metadata value that ends up as +# a filename or directory name below. The character regex allows `.`, +# which means `.` and `..` would slip through it - those are valid path +# traversal in a way slashes are not, so reject them explicitly. +safe_path_component() { + local s="$1" + [[ -n "$s" ]] || return 1 + [[ "$s" != "." && "$s" != ".." ]] || return 1 + [[ "$s" =~ ^[A-Za-z0-9._+-]+$ ]] || return 1 + return 0 +} + +# Identity is sanitized via `tr -c '...' '_'` above, but the surviving +# charset still admits "." and ".." - explicitly reject those. +if ! safe_path_component "$identity"; then + echo "ERROR: builder identity sanitizes to unsafe path component: '$identity'" >&2 + exit 1 +fi if ! command -v gpg >/dev/null 2>&1; then echo "ERROR: gpg not found; refusing to write unsigned attestation" >&2 @@ -87,12 +112,23 @@ if [[ "$meta_version" != "$version" ]]; then echo "ERROR: build-metadata version ($meta_version) != passed version ($version)" >&2 exit 1 fi -# Reject path-traversal or shell-meaningful characters in the metadata -# version before using it in the sigs/ path layout below. The CLI version +# Reject path-traversal or shell-meaningful characters in every metadata +# value that becomes a filename or directory name below. The CLI version # arg already had to match meta_version above, so this also gates the CLI -# input transitively. -if [[ ! "$meta_version" =~ ^[A-Za-z0-9._+-]+$ ]]; then - echo "ERROR: build-metadata version contains unsafe characters: $meta_version" >&2 +# input transitively. meta_rust_target and meta_distsrc_name are also +# checked because they end up in the artifact / distsrc paths and a +# malicious/corrupted build-metadata.json could otherwise smuggle slashes +# or `..` segments into those paths. +if ! safe_path_component "$meta_version"; then + echo "ERROR: build-metadata version is not a safe path component: '$meta_version'" >&2 + exit 1 +fi +if ! safe_path_component "$meta_rust_target"; then + echo "ERROR: build-metadata rust_target is not a safe path component: '$meta_rust_target'" >&2 + exit 1 +fi +if ! safe_path_component "$meta_distsrc_name"; then + echo "ERROR: build-metadata distsrc is not a safe path component: '$meta_distsrc_name'" >&2 exit 1 fi @@ -109,8 +145,10 @@ distsrc="$out_dir/${meta_distsrc_name}" # Reject a stale distsrc with the same filename: build-metadata.json # records the sha256 the build saw; if the file on disk now hashes -# differently, somebody swapped it under us. -actual_distsrc_sha="$(sha256sum "$distsrc" | awk '{print $1}')" +# differently, somebody swapped it under us. The `--` separator on +# sha256sum is belt-and-braces against a filename starting with `-` +# (already excluded by safe_path_component, but cheap). +actual_distsrc_sha="$(sha256sum -- "$distsrc" | awk '{print $1}')" if [[ "$actual_distsrc_sha" != "$meta_distsrc_sha" ]]; then echo "ERROR: distsrc sha mismatch for $distsrc" >&2 echo " build-metadata.json: $meta_distsrc_sha" >&2 @@ -118,7 +156,7 @@ if [[ "$actual_distsrc_sha" != "$meta_distsrc_sha" ]]; then exit 1 fi -artifact_sha="$(sha256sum "$artifact" | awk '{print $1}')" +artifact_sha="$(sha256sum -- "$artifact" | awk '{print $1}')" distsrc_sha="$actual_distsrc_sha" artifact_size="$(wc -c <"$artifact" | tr -d ' ')" attest_iso="$(date -u +%Y-%m-%dT%H:%M:%SZ)" diff --git a/contrib/guix/guix-checksums b/contrib/guix/guix-checksums index bbffe3631..9a407eb63 100755 --- a/contrib/guix/guix-checksums +++ b/contrib/guix/guix-checksums @@ -40,6 +40,26 @@ if [[ -z "$meta_version" || -z "$meta_rust_target" || -z "$meta_distsrc_name" ]] exit 1 fi +# Each metadata value below ends up as a filename component fed to +# sha256sum. The non-empty check above is necessary but not sufficient: +# the regex permits `.` and `..` (which are valid path-traversal +# segments), and the field could in principle contain slashes from a +# corrupted/malicious build-metadata.json. Reject everything we don't +# want to see in an output filename. +safe_path_component() { + local s="$1" + [[ -n "$s" ]] || return 1 + [[ "$s" != "." && "$s" != ".." ]] || return 1 + [[ "$s" =~ ^[A-Za-z0-9._+-]+$ ]] || return 1 + return 0 +} +for v in "$meta_version" "$meta_rust_target" "$meta_distsrc_name"; do + if ! safe_path_component "$v"; then + echo "ERROR: build-metadata.json contains unsafe path component: '$v'" >&2 + exit 1 + fi +done + artifact_name="cuprated-${meta_version}-${meta_rust_target}.tar.gz" distsrc_name="$meta_distsrc_name" @@ -50,11 +70,13 @@ distsrc_name="$meta_distsrc_name" cd "$out_dir" # Sort the two filenames into a stable order before hashing. Use a # `while read` loop rather than `xargs -d` so this stays portable - # across BSD/macOS hosts that don't ship GNU xargs. + # across BSD/macOS hosts that don't ship GNU xargs. `sha256sum --` is + # belt-and-braces against any filename starting with `-` (already + # excluded by safe_path_component above). printf '%s\n%s\n' "$distsrc_name" "$artifact_name" \ | LC_ALL=C sort \ | while IFS= read -r f; do - sha256sum "$f" + sha256sum -- "$f" done > SHA256SUMS ) diff --git a/contrib/guix/smoke-reproducible.sh b/contrib/guix/smoke-reproducible.sh index 0b257ee5e..65a5fcbb8 100755 --- a/contrib/guix/smoke-reproducible.sh +++ b/contrib/guix/smoke-reproducible.sh @@ -35,18 +35,51 @@ LOG_EXPORT_DIR="${LOG_EXPORT_DIR:-$repo_root/contrib/guix/smoke-logs}" # intermediate artifacts. On success, clean up. Always export the build # logs first so the assert step (in this script and at the workflow level) # has something to scan even on success. +# +# Export failures (mkdir/cp/chmod) emit warnings rather than fail the +# trap, because: +# - the trap is firing on exit; its job is to preserve forensic state, +# not to surface a NEW failure that obscures the original one; +# - the native-flag scan in this script runs on `$tmp/$sub/...` BEFORE +# cleanup, so the primary guard does not depend on the workspace +# copy; the export is purely for the workflow-level duplicate scan; +# - we still want a visible signal if the workflow-level scan is about +# to silently degrade to "no logs found". on_exit() { local code=$? if [[ -d "$tmp" ]]; then - mkdir -p "$LOG_EXPORT_DIR" - local sub - for sub in a b; do - local src_log="$tmp/$sub/contrib/guix/out/build-x86_64-unknown-linux-gnu.log" - if [[ -f "$src_log" ]]; then - cp "$src_log" "$LOG_EXPORT_DIR/build-$sub.log" 2>/dev/null || true + if ! mkdir -p "$LOG_EXPORT_DIR"; then + echo "warning: could not create smoke log export dir: $LOG_EXPORT_DIR" >&2 + else + local sub + for sub in a b; do + # cargo's --verbose build log (this is what the workflow scans + # for native-arch flags). + local cargo_log="$tmp/$sub/contrib/guix/out/build-x86_64-unknown-linux-gnu.log" + if [[ -f "$cargo_log" ]]; then + if ! cp "$cargo_log" "$LOG_EXPORT_DIR/build-$sub.log"; then + echo "warning: could not export smoke log $cargo_log -> $LOG_EXPORT_DIR/build-$sub.log" >&2 + fi + fi + # The smoke script also captures guix-build's wrapper output and + # mk-distsrc's stderr - these contain the actual failure + # explanation when the build dies before producing the cargo + # log (e.g. distsrc-equivalence diff, OOM-killed rustc, + # substitute fetch failure). Export them too so a workflow + # `upload-artifact` step can collect the full forensic set. + for inner in build.log mk-distsrc.log; do + local src_log="$tmp/$sub/$inner" + if [[ -f "$src_log" ]]; then + if ! cp "$src_log" "$LOG_EXPORT_DIR/$sub-$inner"; then + echo "warning: could not export smoke log $src_log -> $LOG_EXPORT_DIR/$sub-$inner" >&2 + fi + fi + done + done + if ! chmod -R a+rX "$LOG_EXPORT_DIR" 2>/dev/null; then + echo "warning: could not chmod a+rX $LOG_EXPORT_DIR (workflow log scan may be unable to read it)" >&2 fi - done - chmod -R a+rX "$LOG_EXPORT_DIR" 2>/dev/null || true + fi fi if [[ $code -ne 0 ]]; then echo "smoke FAILED; preserving working dir for inspection: $tmp" >&2 @@ -75,6 +108,22 @@ run_once() { --distsrc "$distsrc" >"$src/build.log" 2>&1 artifact="$({ find contrib/guix/out -maxdepth 1 -type f -name 'cuprated-*-x86_64-unknown-linux-gnu.tar.gz' | LC_ALL=C sort | tail -n1; })" + if [[ -z "$artifact" || ! -f "$artifact" ]]; then + # guix-build returned 0 (we're past `set -e`) but didn't write an + # artifact - print enough state to diagnose without leaving the + # caller to guess. This is the classic "build silently succeeded + # without producing output" failure mode; the immediate diagnostic + # is the tail of build.log, which the on_exit trap also exports to + # LOG_EXPORT_DIR for the workflow to archive. + echo "FAIL: guix-build exited 0 but no cuprated artifact found in $src/contrib/guix/out" >&2 + echo "--- contents of $src/contrib/guix/out ---" >&2 + ls -la "$src/contrib/guix/out" >&2 || true + echo "--- last 100 lines of $src/build.log ---" >&2 + tail -100 "$src/build.log" >&2 || true + echo "--- last 50 lines of $src/mk-distsrc.log ---" >&2 + tail -50 "$src/mk-distsrc.log" >&2 || true + return 1 + fi sha256sum "$artifact" | awk '{print $1}' ) } From c73db9d41124818936604955006b39150476e8d7 Mon Sep 17 00:00:00 2001 From: kim0 Date: Mon, 18 May 2026 01:06:13 +0300 Subject: [PATCH 5/5] contrib/guix: route inner TMPDIR onto the bind-mounted host The first GHA run after the round-5 forensics commit finally surfaced the real reason GHA smoke kept silently producing no artifact: error: failed to sync Caused by: failed to unpack `windows-0.62.2/...` Caused by: No space left on device (os error 28) The host /dev/root had 112 GB free after the Reclaim runner disk step, so this is not a host-disk problem. It's the container's /tmp: `guix shell --container` mounts a private tmpfs over /tmp, sized at the kernel's tmpfs default (~50% of RAM ~= 8 GB on a 16 GB runner). cuprate's full cargo-vendor tree exceeds that - `windows-0.62.2` alone unpacks to a couple of GB. Fix: redirect TMPDIR inside the container to a bind-mounted `$repo_root/contrib/guix/.work/` directory. Bind-mounted host paths are NOT shadowed by the container's tmpfs, so mktemp/cargo-vendor target the host volume. While here, also fix the secondary bug that made this so confusing to diagnose: bash's `set -e` is silently disabled inside `$(...)` command substitutions unless `shopt -s inherit_errexit` is set. The smoke script was assigning `distsrc="$(./guix-mk-distsrc ...)"`, mk-distsrc was failing, but the empty distsrc fell through to `guix-build --distsrc ""` which printed a misleading "missing required --distsrc" instead of surfacing the original disk-full error. With inherit_errexit, the substitution exits non-zero and set -e fires at the assignment. Also gitignore .work/ and the existing out/ + smoke-logs/ paths so local runs don't dirty the working tree. Cluster smoke is unaffected (32 GB RAM, plenty of container tmpfs). GHA smoke should now actually finish. --- .gitignore | 3 +++ contrib/guix/guix-build | 7 +++++++ contrib/guix/guix-mk-distsrc | 15 ++++++++++++++- contrib/guix/smoke-reproducible.sh | 8 ++++++++ 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 533eaa4c7..d6507a8f9 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,6 @@ fast_sync_hashes.bin /books/user/Cuprated.toml fuzz/corpus fuzz/artifacts +/contrib/guix/out/ +/contrib/guix/.work/ +/contrib/guix/smoke-logs/ diff --git a/contrib/guix/guix-build b/contrib/guix/guix-build index 046156af5..01560e22e 100755 --- a/contrib/guix/guix-build +++ b/contrib/guix/guix-build @@ -65,6 +65,12 @@ else exit 1 fi +# See guix-mk-distsrc for the rationale: redirect TMPDIR inside the +# container to a bind-mounted host directory so the cargo target tree +# isn't constrained by the container's private tmpfs (~50% of RAM ~= +# 8 GB on a 16 GB GHA runner, smaller than a release build of cuprated). +work_root="$repo_root/contrib/guix/.work" +mkdir -p "$work_root" guix time-machine -C "$script_dir/channels.scm" \ -- shell --system="$guix_system" -m "$script_dir/manifest.scm" --container --pure -- \ env \ @@ -74,4 +80,5 @@ guix time-machine -C "$script_dir/channels.scm" \ GUIX_OUT_DIR="$out_dir" \ GUIX_ALLOW_COMMIT_MISMATCH="${GUIX_ALLOW_COMMIT_MISMATCH:-0}" \ GUIX_SKIP_FENV_WORKAROUND="${GUIX_SKIP_FENV_WORKAROUND:-0}" \ + TMPDIR="$work_root" \ bash "$script_dir/libexec/build.sh" diff --git a/contrib/guix/guix-mk-distsrc b/contrib/guix/guix-mk-distsrc index 4bf40188d..3ef40fbed 100755 --- a/contrib/guix/guix-mk-distsrc +++ b/contrib/guix/guix-mk-distsrc @@ -30,6 +30,19 @@ case "$out_dir" in *) echo "ERROR: --out-dir must resolve inside the repository ($repo_root); got $out_dir" >&2; exit 1 ;; esac +# `guix shell --container` mounts a private tmpfs over /tmp inside the +# container, sized at the kernel's tmpfs default (~50% of RAM). On a +# 16 GB GHA runner that's ~8 GB - smaller than the full cuprate cargo- +# vendor tree (windows-0.62.2 alone unpacks to ~2 GB, and the whole +# tree easily exceeds 10 GB). Redirect TMPDIR inside the container to +# a bind-mounted work directory on the host filesystem so mktemp and +# cargo vendor target the host volume (typically 100+ GB free) rather +# than the constrained tmpfs. +work_root="$repo_root/contrib/guix/.work" +mkdir -p "$work_root" guix time-machine -C "$script_dir/channels.scm" \ -- shell --system="$guix_system" -m "$script_dir/manifest.scm" --container --pure --network -- \ - env GUIX_OUT_DIR="$out_dir" bash "$script_dir/mk-distsrc" + env \ + GUIX_OUT_DIR="$out_dir" \ + TMPDIR="$work_root" \ + bash "$script_dir/mk-distsrc" diff --git a/contrib/guix/smoke-reproducible.sh b/contrib/guix/smoke-reproducible.sh index 65a5fcbb8..91ed8c27e 100755 --- a/contrib/guix/smoke-reproducible.sh +++ b/contrib/guix/smoke-reproducible.sh @@ -1,5 +1,13 @@ #!/usr/bin/env bash set -euo pipefail +# Without inherit_errexit, set -e is silently disabled inside `$(...)` +# command substitutions - so a failing `distsrc="$(./guix-mk-distsrc ...)"` +# would set $distsrc to "" and the script would soldier on with an empty +# `--distsrc` arg, producing the confusing "guix-build returned 0 but no +# artifact" tail-of-the-iceberg failure mode. With inherit_errexit, the +# substitution exits non-zero and set -e fires at the assignment, so the +# real error (e.g. disk-full in cargo vendor) surfaces immediately. +shopt -s inherit_errexit # End-to-end reproducibility self-check. #