From a9d1e706f9af8700a235323a6e41cc9cf7dbfebd Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Sat, 30 May 2026 07:45:52 -0400 Subject: [PATCH 01/32] Add scripts for testing older Linux distros and kernels via qemu --- .github/workflows/pull_request.yml | 16 + .swift-version | 2 +- scripts/prep-linux-swift.sh | 65 +++ scripts/test-using-qemu.sh | 898 +++++++++++++++++++++++++++++ 4 files changed, 980 insertions(+), 1 deletion(-) create mode 100755 scripts/prep-linux-swift.sh create mode 100755 scripts/test-using-qemu.sh diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 2720a19f..d0ce7376 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -58,6 +58,22 @@ jobs: done # empty line to ignore the --swift-sdk given by swiftlang/github-workflows/.github/workflows/scripts/install-and-build-with-sdk.sh \ + test_linux_kernel: + name: Test Linux Kernel / ${{ matrix.dist-kern }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + # These are specific distro and kernel versions that the test qemu script supports from here: https://images.linuxcontainers.org + dist-kern: ["al2-4.18", "al2-5.10"] + container: + image: ubuntu:24.04 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Run Test + run: bash -c './scripts/test-using-qemu.sh ${{ matrix.dist-kern }}' + soundness: name: Soundness uses: swiftlang/github-workflows/.github/workflows/soundness.yml@0.0.11 diff --git a/.swift-version b/.swift-version index 4ac4fded..91e4a9f2 100644 --- a/.swift-version +++ b/.swift-version @@ -1 +1 @@ -6.2.0 \ No newline at end of file +6.3.2 diff --git a/scripts/prep-linux-swift.sh b/scripts/prep-linux-swift.sh new file mode 100755 index 00000000..3175a238 --- /dev/null +++ b/scripts/prep-linux-swift.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# This script does a bit of extra preparation of the docker containers used to run the GitHub workflows +# that are specific to this project's needs when building/testing. Note that this script runs on +# every supported Linux distribution so it must adapt to the distribution that it is running. + +if [[ "$(uname -s)" == "Linux" ]]; then + # Install the basic utilities depending on the type of Linux distribution + apt-get --help && apt-get update && TZ=Etc/UTC apt-get -y install curl make gpg tzdata + yum --help && (curl --help && yum -y install curl) && yum -y install make gpg tar procps +fi + +set -e + +while [ $# -ne 0 ]; do + arg="$1" + case "$arg" in + --install-swiftly) + installSwiftly=true + ;; + --swift-snapshot) + swiftSnapshot="$2" + shift; + ;; + *) + ;; + esac + shift +done + +if [ "$installSwiftly" == true ]; then + echo "Installing swiftly" + + curl -O https://download.swift.org/swiftly/linux/swiftly-$(uname -m).tar.gz && tar zxf swiftly-*.tar.gz && ./swiftly init -y --skip-install + . "/root/.local/share/swiftly/env.sh" + + hash -r + + selector=() + runSelector=() + + if [ "$swiftSnapshot" != "" ]; then + echo "Installing latest $swiftSnapshot-snapshot toolchain" + selector=("$swiftSnapshot-snapshot") + runSelector=("+$swiftSnapshot-snapshot") + elif [ -f .swift-version ]; then + echo "Installing selected swift toolchain from .swift-version file" + selector=() + runSelector=() + else + echo "Installing latest toolchain" + selector=("latest") + runSelector=("+latest") + fi + + TMPDIR=/var/tmp swiftly install --post-install-file=post-install.sh "${selector[@]}" + + if [ -f post-install.sh ]; then + echo "Performing swift toolchain post-installation" + chmod u+x post-install.sh && ./post-install.sh + fi + + echo "Displaying swift version" + swiftly run "${runSelector[@]}" swift --version +fi diff --git a/scripts/test-using-qemu.sh b/scripts/test-using-qemu.sh new file mode 100755 index 00000000..25246112 --- /dev/null +++ b/scripts/test-using-qemu.sh @@ -0,0 +1,898 @@ +#!/usr/bin/env bash +# test-using-qemu.sh +# +# Boots a Linux VM under QEMU to run commands (default: swift test) against a +# specific distro+kernel combination. Supports two modes: +# +# disk mode — downloads a pre-built disk.qcow2 from images.linuxcontainers.org +# and injects credentials via a cloud-init seed ISO. +# +# rootfs mode — downloads a rootfs.tar.xz from images.linuxcontainers.org and +# fetches the kernel separately (e.g. from an RPM repo), then +# builds the disk image locally with mkfs.ext4 -d. +# +# Known profiles live in the PROFILES table near the top of this file. +# Must run as root on Linux. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LXC_BASE="https://images.linuxcontainers.org/images" +LXC_GPG_KEY_IDS=( + "602F567663359FCDE9BCD0E79F93B4C4F3D4444A" + "C2DE3F5BDE2F6068" +) + +# ── Defaults ────────────────────────────────────────────────────────────────── +WORK_DIR="${WORK_DIR:-${TMPDIR:-/tmp}/qemu-swift-$$}" +VM_MEMORY="${VM_MEMORY:-2048}" +VM_CPUS="${VM_CPUS:-2}" +SSH_HOST_PORT="${SSH_PORT:-2222}" +KEEP_WORK="${KEEP_WORK:-false}" + +QEMU_PID="" +SSH_OPTS=() + +# ── Logging ─────────────────────────────────────────────────────────────────── +info() { printf '\033[32m[INFO]\033[0m %s\n' "$*"; } +warn() { printf '\033[33m[WARN]\033[0m %s\n' "$*" >&2; } +error() { printf '\033[31m[ERROR]\033[0m %s\n' "$*" >&2; exit 1; } +step() { printf '\033[36m[STEP]\033[0m %s\n' "$*"; } + +# ── Profile registry ────────────────────────────────────────────────────────── +# Each entry is a pipe-separated string: +# LXC_SPEC | USERSPACE_FILE | KERNEL_MODE | KERNEL_DONOR_SPEC +# +# USERSPACE_FILE: "disk.qcow2" → disk mode (cloud-init SSH injection) +# "rootfs.tar.xz" → rootfs mode (direct SSH config, -kernel boot) +# +# KERNEL_MODE: "disk" → kernel lives inside the disk.qcow2 (GRUB boots it) +# "lxc-disk" → extract kernel+initrd from a second LXC disk.qcow2; +# KERNEL_DONOR_SPEC names that donor image's LXC path +# +# KERNEL_DONOR_SPEC: LXC path (distro/release/arch/variant) of the kernel donor image. +# Used only for lxc-disk mode. Debian Bullseye ships kernel 5.10 LTS +# and is publicly accessible, making it a good donor for AL2 userspace. +# +# To add a new combination, append a line here — no other code changes needed. + +declare -A PROFILES +# LXC spec | file | kernel | kernel donor spec +PROFILES["al2-5.10"]="amazonlinux/2/amd64/default | rootfs.tar.xz | lxc-disk | debian/bullseye/amd64/cloud" +PROFILES["al2-5.10-arm64"]="amazonlinux/2/arm64/default | rootfs.tar.xz | lxc-disk | debian/bullseye/arm64/cloud" +# almalinux/8/amd64/default (not cloud) carries kernel 4.18 and has disk.qcow2; +# no arm64 equivalent exists on LXC so this profile is amd64-only. +PROFILES["al2-4.18"]="amazonlinux/2/amd64/default | rootfs.tar.xz | lxc-disk | almalinux/8/amd64/default" +PROFILES["almalinux-8"]="almalinux/8/amd64/cloud | disk.qcow2 | disk | " +PROFILES["almalinux-8-arm64"]="almalinux/8/arm64/cloud | disk.qcow2 | disk | " +PROFILES["almalinux-9"]="almalinux/9/amd64/cloud | disk.qcow2 | disk | " +PROFILES["almalinux-9-arm64"]="almalinux/9/arm64/cloud | disk.qcow2 | disk | " +PROFILES["rockylinux-8"]="rockylinux/8/amd64/cloud | disk.qcow2 | disk | " +PROFILES["rockylinux-8-arm64"]="rockylinux/8/arm64/cloud | disk.qcow2 | disk | " +PROFILES["rockylinux-9"]="rockylinux/9/amd64/cloud | disk.qcow2 | disk | " +PROFILES["rockylinux-9-arm64"]="rockylinux/9/arm64/cloud | disk.qcow2 | disk | " +PROFILES["ubuntu-22.04"]="ubuntu/jammy/amd64/cloud | disk.qcow2 | disk | " +PROFILES["ubuntu-22.04-arm64"]="ubuntu/jammy/arm64/cloud | disk.qcow2 | disk | " +PROFILES["ubuntu-24.04"]="ubuntu/noble/amd64/cloud | disk.qcow2 | disk | " +PROFILES["ubuntu-24.04-arm64"]="ubuntu/noble/arm64/cloud | disk.qcow2 | disk | " +PROFILES["debian-12"]="debian/bookworm/amd64/cloud | disk.qcow2 | disk | " +PROFILES["debian-12-arm64"]="debian/bookworm/arm64/cloud | disk.qcow2 | disk | " + +list_profiles() { + echo "Available profiles:" + for key in $(echo "${!PROFILES[@]}" | tr ' ' '\n' | sort); do + local lxc_spec; lxc_spec=$(awk -F'|' '{gsub(/^[ \t]+|[ \t]+$/,"",$1); print $1}' <<< "${PROFILES[$key]}") + printf " %-20s %s\n" "$key" "$lxc_spec" + done +} + +# ── Cleanup ─────────────────────────────────────────────────────────────────── +cleanup() { + set +e + if [[ -n "$QEMU_PID" ]] && kill -0 "$QEMU_PID" 2>/dev/null; then + info "Shutting down VM (PID $QEMU_PID)..." + ssh "${SSH_OPTS[@]}" root@localhost "poweroff" 2>/dev/null || true + sleep 3 + kill "$QEMU_PID" 2>/dev/null || true + wait "$QEMU_PID" 2>/dev/null || true + fi + if [[ "$KEEP_WORK" != "true" && -d "$WORK_DIR" ]]; then + rm -rf "$WORK_DIR" + elif [[ "$KEEP_WORK" == "true" ]]; then + info "Work directory preserved: $WORK_DIR" + fi +} +trap cleanup EXIT INT TERM + +# ── Usage ───────────────────────────────────────────────────────────────────── +usage() { + cat <] [-- ] + +Boot a Linux VM under QEMU and run a command inside it. + +OPTIONS: + -h, --help Show this help + --list-profiles List available profiles and exit + -m, --memory MB VM RAM in MB (default: $VM_MEMORY, env: VM_MEMORY) + -c, --cpus N VM CPU count (default: $VM_CPUS, env: VM_CPUS) + -w, --workdir DIR Host directory to copy to /mnt/host (default: CWD) + -p, --ssh-port PORT Host-side SSH forwarding port (default: $SSH_HOST_PORT, env: SSH_PORT) + --keep-image Reuse existing disk image in WORK_DIR (skip download/build) + --keep-work Preserve WORK_DIR after exit + --skip-install Skip tool installation + --no-swift Skip Swift toolchain installation + +PROFILE (default: al2-5.10): + A named entry from the built-in profile table. Run --list-profiles to see all. + Examples: al2-5.10 almalinux-8 ubuntu-22.04 debian-12 + +GUEST COMMAND (after --): + Runs as root inside the VM, in /mnt/host, with Swift on PATH. + Default: swift test + +ENVIRONMENT: VM_MEMORY VM_CPUS SSH_PORT KEEP_WORK WORK_DIR VM_DISK_SIZE TMPDIR + Note: use "sudo VAR=val ./$(basename "$0")" — plain "VAR=val sudo ..." is stripped by sudo. + +NOTES: + - Host must be Ubuntu (apt-get is used for tool installation); python3 must be available. + - disk mode images require cloud-init in the guest (Ubuntu, Debian, Fedora, AlmaLinux do). + - rootfs mode (e.g. al2-5.10) fetches userspace from LXC and kernel from a donor disk image. + - lxc-disk kernel mode downloads a second LXC disk.qcow2 and extracts vmlinuz+initrd via debugfs. + - rootfs mode waits up to 10 min for SSH; first boot installs openssh-server if absent. + - amd64: QEMU's built-in SeaBIOS handles GRUB boot; no extra firmware needed. + - arm64: install qemu-efi-aarch64 for UEFI firmware. +EOF + exit 0 +} + +# ── Argument parsing ────────────────────────────────────────────────────────── +HOST_WORKDIR="$(pwd)" +PROFILE_NAME="al2-5.10" +GUEST_COMMAND="swift test" +KEEP_IMAGE=false +SKIP_INSTALL=false +NO_SWIFT=false + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) usage ;; + --list-profiles) list_profiles; exit 0 ;; + -m|--memory) VM_MEMORY="$2"; shift 2 ;; + -c|--cpus) VM_CPUS="$2"; shift 2 ;; + -w|--workdir) HOST_WORKDIR="$2"; shift 2 ;; + -p|--ssh-port) SSH_HOST_PORT="$2"; shift 2 ;; + --keep-image) KEEP_IMAGE=true; shift ;; + --keep-work) KEEP_WORK=true; shift ;; + --skip-install) SKIP_INSTALL=true; shift ;; + --no-swift) NO_SWIFT=true; shift ;; + --) shift; GUEST_COMMAND="$*"; break ;; + -*) error "Unknown option: $1" ;; + *) PROFILE_NAME="$1"; shift ;; + esac +done + +[[ ! -d "$HOST_WORKDIR" ]] && error "Workdir not found: $HOST_WORKDIR" +[[ "$(id -u)" -ne 0 ]] && error "Must run as root." + +# ── Load profile ────────────────────────────────────────────────────────────── +[[ -z "${PROFILES[$PROFILE_NAME]+_}" ]] && { + error "Unknown profile '$PROFILE_NAME'. Run --list-profiles to see available profiles." +} + +IFS='|' read -r P_LXC_SPEC P_USERSPACE_FILE P_KERNEL_MODE P_KERNEL_DONOR_SPEC \ + <<< "${PROFILES[$PROFILE_NAME]}" + +# Trim whitespace from each field +P_LXC_SPEC=$(echo "$P_LXC_SPEC" | xargs) +P_USERSPACE_FILE=$(echo "$P_USERSPACE_FILE" | xargs) +P_KERNEL_MODE=$(echo "$P_KERNEL_MODE" | xargs) +P_KERNEL_DONOR_SPEC=$(echo "$P_KERNEL_DONOR_SPEC" | xargs) + +IFS='/' read -r DISTRO RELEASE ARCH VARIANT <<< "$P_LXC_SPEC" +VARIANT="${VARIANT:-default}" + +info "Profile: $PROFILE_NAME ($P_LXC_SPEC, $P_USERSPACE_FILE, kernel: $P_KERNEL_MODE)" +info "Workdir: $HOST_WORKDIR" +info "Command: $GUEST_COMMAND" +info "VM: ${VM_MEMORY} MB RAM, ${VM_CPUS} CPU(s)" +info "SSH port: $SSH_HOST_PORT" + +mkdir -p "$WORK_DIR" + +# ── Architecture ────────────────────────────────────────────────────────────── +# amd64: q35 + built-in SeaBIOS — no firmware file needed. +# arm64: virt machine needs UEFI (EDK2); install qemu-efi-aarch64. +case "$ARCH" in + amd64|x86_64) + QEMU_BIN="qemu-system-x86_64" + # -cpu max exposes the broadest feature set QEMU can emulate (SSE4, AES-NI, + # AVX, etc.). Without it QEMU defaults to qemu64 — a bare x86-64 baseline + # that causes SIGILL in Swift/LLVM binaries that use SSE4.2 or newer. + # When -enable-kvm is also present, max becomes equivalent to host. + # mpx=off,pku=off: MPX and Protection Keys (PKU) require QEMU TCG to JIT + # BNDMOV/WRPKRU instructions; under macOS Rosetta this fills the JIT page + # budget and triggers mprotect(ENOMEM) → SIGTRAP. Neither feature is needed + # by Swift/LLVM, and Linux 5.6+ already dropped MPX support. + QEMU_MACHINE_ARGS=(-machine q35 -cpu max,mpx=off,pku=off) + UEFI_PKG="" + UEFI_FIRMWARE_SEARCH=() + ;; + arm64|aarch64) + QEMU_BIN="qemu-system-aarch64" + QEMU_MACHINE_ARGS=(-machine virt -cpu cortex-a57) + UEFI_PKG="qemu-efi-aarch64" + UEFI_FIRMWARE_SEARCH=( + /usr/share/qemu-efi-aarch64/QEMU_EFI.fd + /usr/share/edk2/aarch64/QEMU_EFI.fd + /usr/share/edk2-aarch64/QEMU_EFI.fd + ) + ;; + *) error "Unsupported architecture: $ARCH" ;; +esac + +# ── Install host tools (host must be Ubuntu) ────────────────────────────────── +install_tools() { + step "Installing required tools..." + apt-get update -qq + local pkgs=(qemu-utils openssh-client gpg curl genisoimage xz-utils) + case "$ARCH" in + amd64|x86_64) pkgs+=(qemu-system-x86) ;; + arm64|aarch64) pkgs+=(qemu-system-arm qemu-efi-aarch64) ;; + esac + # rootfs mode additionally needs e2fsprogs (debugfs) for lxc-disk kernel extraction + [[ "$P_USERSPACE_FILE" != "disk.qcow2" ]] && pkgs+=(e2fsprogs) + DEBIAN_FRONTEND=noninteractive apt-get install -y "${pkgs[@]}" +} + +[[ "$SKIP_INSTALL" != "true" ]] && install_tools + +# Verify required binaries +command -v "$QEMU_BIN" &>/dev/null || error "$QEMU_BIN not found." +command -v ssh &>/dev/null || error "ssh not found." +command -v curl &>/dev/null || error "curl not found." +command -v sha256sum &>/dev/null || error "sha256sum not found." + +MKISO_CMD="" +for c in genisoimage mkisofs xorriso; do + command -v "$c" &>/dev/null && { MKISO_CMD="$c"; break; } +done + +# ── Locate UEFI firmware (arm64 only) ──────────────────────────────────────── +UEFI_FIRMWARE="" +for f in "${UEFI_FIRMWARE_SEARCH[@]:-}"; do + [[ -f "$f" ]] && { UEFI_FIRMWARE="$f"; break; } +done +[[ "${#UEFI_FIRMWARE_SEARCH[@]}" -gt 0 && -z "$UEFI_FIRMWARE" ]] && \ + error "ARM64 UEFI firmware not found. Install: $UEFI_PKG" +[[ -n "$UEFI_FIRMWARE" ]] && info "UEFI firmware: $UEFI_FIRMWARE" + +# ── Find latest complete LXC build ─────────────────────────────────────────── +# Builds are published incrementally: SHA256SUMS appears before the images are +# fully uploaded. Walk candidates newest-first and stop at the first build +# whose SHA256SUMS already lists the file we need. +step "Finding latest build for $P_LXC_SPEC..." +IMAGE_URL_BASE="$LXC_BASE/$DISTRO/$RELEASE/$ARCH/$VARIANT" +BUILDS=$(curl -sf "$IMAGE_URL_BASE/" \ + | grep -oE '[0-9]{8}_[0-9]{2}:[0-9]{2}' \ + | sort -r) || true +[[ -z "$BUILDS" ]] && error "No builds found at $IMAGE_URL_BASE/" + +SHA256SUMS_FILE="$WORK_DIR/SHA256SUMS" +SHA256SUMS_SIG="$WORK_DIR/SHA256SUMS.asc" +BUILD="" +for candidate in $BUILDS; do + if curl -sf "$IMAGE_URL_BASE/$candidate/SHA256SUMS" -o "$SHA256SUMS_FILE" && \ + grep -qE '(^|\s)\.?/?'"$P_USERSPACE_FILE"'(\s|$)' "$SHA256SUMS_FILE"; then + BUILD="$candidate" + break + fi +done +[[ -z "$BUILD" ]] && error "No complete build found for $P_LXC_SPEC (tried all candidates)" +info "Latest complete build: $BUILD" +BUILD_URL="$IMAGE_URL_BASE/$BUILD" + + +# ── Download userspace image ────────────────────────────────────────────────── +USERSPACE_FILE="$WORK_DIR/$P_USERSPACE_FILE" + +if [[ "$KEEP_IMAGE" == "true" && -f "$USERSPACE_FILE" ]]; then + info "Reusing existing userspace image: $USERSPACE_FILE" +else + step "Downloading $P_USERSPACE_FILE..." + curl -f --progress-bar -o "$USERSPACE_FILE" "$BUILD_URL/$P_USERSPACE_FILE" +fi + +if ! curl -f -sS -o "$SHA256SUMS_SIG" "$BUILD_URL/SHA256SUMS.asc" 2>/dev/null; then + curl -f -sS -o "$SHA256SUMS_SIG" "$BUILD_URL/SHA256SUMS.gpg" 2>/dev/null || \ + warn "No signature file; skipping GPG verification." +fi + +# ── Verify checksum ─────────────────────────────────────────────────────────── +step "Verifying image integrity..." +export GNUPGHOME="$WORK_DIR/.gnupg" +mkdir -p "$GNUPGHOME" && chmod 700 "$GNUPGHOME" + +if [[ -s "$SHA256SUMS_SIG" ]]; then + GPG_VERIFIED=false + for key_id in "${LXC_GPG_KEY_IDS[@]}"; do + if gpg --keyserver hkp://keyserver.ubuntu.com --recv-keys "$key_id" 2>/dev/null || + gpg --keyserver hkps://keys.openpgp.org --recv-keys "$key_id" 2>/dev/null; then + if gpg --verify "$SHA256SUMS_SIG" "$SHA256SUMS_FILE" 2>/dev/null; then + info "GPG signature verified (key: $key_id)" + GPG_VERIFIED=true; break + fi + fi + done + [[ "$GPG_VERIFIED" != "true" ]] && warn "GPG verification failed; continuing with SHA256 only." +else + warn "No signature file; skipping GPG verification." +fi + +EXPECTED_HASH=$(awk -v f="$P_USERSPACE_FILE" '$2==f || $2=="./"f {print $1}' "$SHA256SUMS_FILE") +[[ -z "$EXPECTED_HASH" ]] && error "No checksum for $P_USERSPACE_FILE in SHA256SUMS" +ACTUAL_HASH=$(sha256sum "$USERSPACE_FILE" | awk '{print $1}') +[[ "$EXPECTED_HASH" == "$ACTUAL_HASH" ]] || \ + error "SHA256 mismatch!\n Expected: $EXPECTED_HASH\n Actual: $ACTUAL_HASH" +info "SHA256 checksum OK" + +# ── Generate SSH key (used by both modes) ───────────────────────────────────── +ssh-keygen -t ed25519 -f "$WORK_DIR/vm_key" -N "" -C "qemu-swift-test" -q +VM_PUBKEY=$(cat "$WORK_DIR/vm_key.pub") + +SSH_OPTS=( + -o StrictHostKeyChecking=no + -o ConnectTimeout=5 + -o BatchMode=yes + -o LogLevel=ERROR + -i "$WORK_DIR/vm_key" + -p "$SSH_HOST_PORT" +) + +# ══════════════════════════════════════════════════════════════════════════════ +# DISK MODE — pre-built qcow2, cloud-init seed ISO for credentials +# ══════════════════════════════════════════════════════════════════════════════ +if [[ "$P_USERSPACE_FILE" == "disk.qcow2" ]]; then + + # Cloud-init NoCloud seed ISO: injects our SSH key into root's authorized_keys + # on first boot via runcmd. All images in this mode have cloud-init installed. + [[ -z "$MKISO_CMD" ]] && error "No ISO tool found. Install genisoimage, mkisofs, or xorriso." + + step "Creating cloud-init seed ISO..." + CLOUD_DIR="$WORK_DIR/cloud-init" + mkdir -p "$CLOUD_DIR" + + cat > "$CLOUD_DIR/meta-data" < "$CLOUD_DIR/user-data" < /root/.ssh/authorized_keys + - chmod 600 /root/.ssh/authorized_keys + - mkdir -p /etc/ssh/sshd_config.d + - echo "PermitRootLogin yes" > /etc/ssh/sshd_config.d/99-root.conf + - systemctl enable --now ssh 2>/dev/null || systemctl enable --now sshd 2>/dev/null || true + - systemctl reload-or-restart ssh 2>/dev/null || systemctl reload-or-restart sshd 2>/dev/null || true +USERDATA + + SEED_ISO="$WORK_DIR/seed.iso" + case "$MKISO_CMD" in + genisoimage|mkisofs) + "$MKISO_CMD" -output "$SEED_ISO" -volid cidata -joliet -rock \ + "$CLOUD_DIR/user-data" "$CLOUD_DIR/meta-data" 2>/dev/null ;; + xorriso) + xorriso -as mkisofs -output "$SEED_ISO" -volid cidata -joliet -rock \ + "$CLOUD_DIR/user-data" "$CLOUD_DIR/meta-data" 2>/dev/null ;; + esac + + # COW overlay keeps the base image pristine; --keep-image reuses the base. + # Resize the overlay if VM_DISK_SIZE is set — cloud-init's growpart module + # will expand the partition and filesystem to fill the new size on first boot. + OVERLAY_IMAGE="$WORK_DIR/overlay.qcow2" + qemu-img create -q -f qcow2 -b "$USERSPACE_FILE" -F qcow2 "$OVERLAY_IMAGE" + if [[ -n "${VM_DISK_SIZE:-}" ]]; then + qemu-img resize -q "$OVERLAY_IMAGE" "$VM_DISK_SIZE" + info "Overlay resized to $VM_DISK_SIZE (cloud-init will expand partition on first boot)" + fi + + QEMU_CMD=( + "$QEMU_BIN" + "${QEMU_MACHINE_ARGS[@]}" + -m "$VM_MEMORY" + -smp "$VM_CPUS" + -drive "file=${OVERLAY_IMAGE},if=virtio,format=qcow2" + -drive "file=${SEED_ISO},if=virtio,format=raw" + -netdev "user,id=net0,hostfwd=tcp::${SSH_HOST_PORT}-:22" + -device virtio-net-pci,netdev=net0 + -display none + -serial "file:${WORK_DIR}/console.log" + ) + [[ -n "$UEFI_FIRMWARE" ]] && QEMU_CMD+=(-bios "$UEFI_FIRMWARE") + if [[ -e /dev/kvm ]]; then + QEMU_CMD+=(-enable-kvm) + else + QEMU_CMD+=(-accel tcg,tb-size=512) + fi + +# ══════════════════════════════════════════════════════════════════════════════ +# ROOTFS MODE — assemble kernel + userspace from separate sources, -kernel boot +# ══════════════════════════════════════════════════════════════════════════════ +else + + ROOTFS_DIR="$WORK_DIR/rootfs" + DISK_IMAGE="$WORK_DIR/vm.qcow2" + + if [[ "$KEEP_IMAGE" == "true" && -f "$DISK_IMAGE" ]]; then + info "Reusing existing disk image: $DISK_IMAGE" + else + + # ── Extract rootfs ──────────────────────────────────────────────────── + step "Extracting rootfs to $ROOTFS_DIR..." + rm -rf "$ROOTFS_DIR" + mkdir -p "$ROOTFS_DIR" + # --no-same-owner avoids chown() failures when CAP_CHOWN is restricted + tar -C "$ROOTFS_DIR" -xf "$USERSPACE_FILE" --no-same-owner + [[ -d "$ROOTFS_DIR/etc" ]] || error "Rootfs extraction looks empty; check the tarball." + + # resolv.conf is often a dangling symlink in LXC images. Use QEMU's + # built-in DNS proxy (10.0.2.3) as primary: it forwards to the host's + # resolver, which in CI-on-AWS environments is the VPC DNS that can + # resolve AWS-internal names like amazonlinux.default.amazonaws.com. + # Fall back to public DNS for non-AWS environments. Prevent + # NetworkManager from overwriting this file. + rm -f "$ROOTFS_DIR/etc/resolv.conf" + printf 'nameserver 10.0.2.3\nnameserver 8.8.8.8\nnameserver 1.1.1.1\n' > "$ROOTFS_DIR/etc/resolv.conf" + mkdir -p "$ROOTFS_DIR/etc/NetworkManager/conf.d" + printf '[main]\ndns=none\n' > "$ROOTFS_DIR/etc/NetworkManager/conf.d/90-dns-none.conf" + + # LXC container rootfs images have no ifcfg for the VM NIC (containers + # use host networking). Without it NetworkManager burns time on DHCP + # retries. QEMU SLIRP NAT always uses fixed addresses, so a static + # config is safe and comes up instantly. + # Use TYPE=Ethernet (not DEVICE=eth0) so the config matches regardless + # of whether RHEL 8 udev renames virtio-net to ens3/enp0s2/etc. + mkdir -p "$ROOTFS_DIR/etc/sysconfig/network-scripts" + cat > "$ROOTFS_DIR/etc/sysconfig/network-scripts/ifcfg-eth0" << 'EOF' +TYPE=Ethernet +ONBOOT=yes +BOOTPROTO=none +IPADDR=10.0.2.15 +PREFIX=24 +GATEWAY=10.0.2.2 +NM_CONTROLLED=yes +DEFROUTE=yes +PEERDNS=no +IPV4_FAILURE_FATAL=no +IPV6INIT=no +EOF + + # AL2 .repo files use yum variables ($awsproto, $amazonlinux, $awsregion, + # $awsdomain) that resolve to AWS-internal hostnames — only reachable from + # inside an AWS VPC. Override the vars so the URL template + # $awsproto://$amazonlinux.$awsregion.$awsdomain/... + # expands to the public CloudFront CDN instead: + # http://cdn.amazonlinux.com/... + # (same path structure; works everywhere, including in AWS CI) + if [[ -d "$ROOTFS_DIR/etc/yum.repos.d" ]]; then + mkdir -p "$ROOTFS_DIR/etc/yum/vars" + printf 'cdn\n' > "$ROOTFS_DIR/etc/yum/vars/amazonlinux" + printf 'amazonlinux\n' > "$ROOTFS_DIR/etc/yum/vars/awsregion" + printf 'com\n' > "$ROOTFS_DIR/etc/yum/vars/awsdomain" + printf 'https\n' > "$ROOTFS_DIR/etc/yum/vars/awsproto" + + # QEMU's DNS proxy (10.0.2.3) may not resolve in Docker CI environments + # (libslirp skips 127.0.0.11, direct UDP to 8.8.8.8:53 is blocked). + # Pre-resolve the CDN hostname on the HOST (where DNS always works) and + # pin it in the VM's /etc/hosts so the bootstrap install needs no DNS. + CDN_IP=$(python3 -c " +import socket +r = socket.getaddrinfo('cdn.amazonlinux.com', 80, socket.AF_INET) +print(r[0][4][0]) +" 2>/dev/null || true) + if [[ -n "$CDN_IP" ]]; then + printf '%s\t%s\n' "$CDN_IP" "cdn.amazonlinux.com" >> "$ROOTFS_DIR/etc/hosts" + info " Pinned cdn.amazonlinux.com → $CDN_IP in /etc/hosts" + else + warn " Could not resolve cdn.amazonlinux.com on host; VM must use in-VM DNS" + fi + fi + + step "Rootfs network/repo state:" + info " resolv.conf: $(cat "$ROOTFS_DIR/etc/resolv.conf" 2>/dev/null | tr '\n' ' ')" + info " ifcfg-eth0: $(grep -s BOOTPROTO "$ROOTFS_DIR/etc/sysconfig/network-scripts/ifcfg-eth0" 2>/dev/null || echo 'absent')" + info " yum repos: $(grep -rh 'mirrorlist\|baseurl' "$ROOTFS_DIR/etc/yum.repos.d/" 2>/dev/null | tr '\n' '|' || echo 'none')" + info " yum vars: awsproto=$(cat "$ROOTFS_DIR/etc/yum/vars/awsproto" 2>/dev/null || echo 'unset') amazonlinux=$(cat "$ROOTFS_DIR/etc/yum/vars/amazonlinux" 2>/dev/null || echo 'unset') awsregion=$(cat "$ROOTFS_DIR/etc/yum/vars/awsregion" 2>/dev/null || echo 'unset') awsdomain=$(cat "$ROOTFS_DIR/etc/yum/vars/awsdomain" 2>/dev/null || echo 'unset')" + + # The AlmaLinux 8 dracut initrd is built for local-disk boot and does not + # include the network module, so ip= on the kernel cmdline is ignored. + # The Debian initrd does DHCP in early boot, which is why al2-5.10 gets + # a working network for free. For al2-4.18 we inject a minimal early + # service that configures the first non-loopback NIC with QEMU SLIRP's + # fixed addresses before network.target is reached. + # NOTE: net.ifnames=0 suppresses the kernel's own predictable naming, but + # RHEL 8 udev's 80-net-setup-link.rules may still rename virtio-net to + # e.g. ens3 or enp0s2 via the "path" policy in 99-default.link. The + # service therefore detects the interface name dynamically instead of + # hardcoding eth0. + mkdir -p "$ROOTFS_DIR/etc/systemd/system" + cat > "$ROOTFS_DIR/etc/systemd/system/qemu-network-setup.service" << 'SVCEOF' +[Unit] +Description=QEMU SLIRP NAT early network setup +Before=network.target network-pre.target +After=basic.target + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/bin/sh -c '\ + echo "NETSETUP: finding interface..."; \ + IFACE=; \ + for i in $(seq 60); do \ + IFACE=$(ip link | grep -v ": lo:" | grep -m1 "^[0-9]*:" | cut -d: -f2 | tr -d " " | sed "s/@.*//"); \ + [ -n "$IFACE" ] && break; \ + sleep 0.5; \ + done; \ + echo "NETSETUP: configuring ${IFACE:-NONE}"; \ + [ -z "$IFACE" ] && exit 1; \ + ip link set "$IFACE" up; \ + ip addr add 10.0.2.15/24 dev "$IFACE" 2>/dev/null || true; \ + ip route add default via 10.0.2.2 2>/dev/null || true' +StandardOutput=journal+console +StandardError=journal+console + +[Install] +WantedBy=network.target +SVCEOF + mkdir -p "$ROOTFS_DIR/etc/systemd/system/network.target.wants" + ln -sf /etc/systemd/system/qemu-network-setup.service \ + "$ROOTFS_DIR/etc/systemd/system/network.target.wants/qemu-network-setup.service" + + # ── Configure SSH access (no chroot needed) ─────────────────────────── + step "Configuring SSH access in rootfs..." + mkdir -p "$ROOTFS_DIR/root/.ssh" + chmod 700 "$ROOTFS_DIR/root/.ssh" + echo "$VM_PUBKEY" > "$ROOTFS_DIR/root/.ssh/authorized_keys" + chmod 600 "$ROOTFS_DIR/root/.ssh/authorized_keys" + + mkdir -p "$ROOTFS_DIR/etc/ssh/sshd_config.d" + echo "PermitRootLogin yes" > "$ROOTFS_DIR/etc/ssh/sshd_config.d/99-root.conf" + + # Pre-generate SSH host keys so sshd can start without /dev/urandom issues + ssh-keygen -A -f "$ROOTFS_DIR" 2>/dev/null || true + + # Enable sshd at boot via systemd wants symlink + mkdir -p "$ROOTFS_DIR/etc/systemd/system/multi-user.target.wants" + for svc in \ + "$ROOTFS_DIR/usr/lib/systemd/system/sshd.service" \ + "$ROOTFS_DIR/lib/systemd/system/sshd.service" \ + "$ROOTFS_DIR/usr/lib/systemd/system/ssh.service" \ + "$ROOTFS_DIR/lib/systemd/system/ssh.service"; do + if [[ -f "$svc" ]]; then + ln -sf "${svc#"$ROOTFS_DIR"}" \ + "$ROOTFS_DIR/etc/systemd/system/multi-user.target.wants/$(basename "$svc")" + break + fi + done + + # ── Bootstrap sshd if not pre-installed ────────────────────────────────── + # LXC container images are minimal; openssh-server is often absent (e.g. AL2). + # If sshd is missing, drop a one-shot systemd unit that installs it on first + # boot via the guest's own package manager. QEMU's user-mode NAT gives the VM + # internet access, so yum/apt can reach their public CDN mirrors. + # The ConditionPathExists guard makes it a no-op on subsequent boots. + if [[ ! -f "$ROOTFS_DIR/usr/sbin/sshd" && ! -f "$ROOTFS_DIR/sbin/sshd" ]]; then + warn "sshd not found in rootfs — adding first-boot service to install it (~30–60 s extra)." + cat > "$ROOTFS_DIR/etc/systemd/system/qemu-bootstrap-sshd.service" <<'SVCEOF' +[Unit] +Description=Bootstrap: install openssh-server for QEMU SSH access +After=network.target +Wants=network.target +ConditionPathExists=!/root/.qemu-sshd-bootstrapped + +[Service] +Type=oneshot +TimeoutStartSec=300 +ExecStart=/bin/bash -c '\ + echo "BOOTSTRAP: configuring network..."; \ + IFACE=$(ip link | grep -v ": lo:" | grep -m1 "^[0-9]*:" | cut -d: -f2 | tr -d " " | sed "s/@.*//"); \ + echo "BOOTSTRAP: iface=${IFACE:-NONE}"; \ + if [ -n "$IFACE" ]; then \ + ip link set "$IFACE" up 2>/dev/null || true; \ + ip addr add 10.0.2.15/24 dev "$IFACE" 2>/dev/null || true; \ + ip route add default via 10.0.2.2 dev "$IFACE" 2>/dev/null || true; \ + fi; \ + echo "BOOTSTRAP: addrs=$(ip addr show dev "$IFACE" 2>/dev/null | grep "inet " | tr -s " ")"; \ + echo "BOOTSTRAP: routes=$(ip route 2>/dev/null | tr "\n" "|")"; \ + echo "BOOTSTRAP: redirecting yum to public CDN..."; \ + if [ -d /etc/yum.repos.d ]; then \ + mkdir -p /etc/yum/vars; \ + printf cdn > /etc/yum/vars/amazonlinux; \ + printf amazonlinux > /etc/yum/vars/awsregion; \ + printf com > /etc/yum/vars/awsdomain; \ + printf https > /etc/yum/vars/awsproto; \ + fi; \ + printf "nameserver 10.0.2.3\nnameserver 8.8.8.8\nnameserver 1.1.1.1\n" > /etc/resolv.conf; \ + echo "BOOTSTRAP: waiting for TCP connectivity to cdn.amazonlinux.com:443..."; \ + CONNECTED=false; \ + for i in $(seq 1 30); do \ + bash -c "exec 3<>/dev/tcp/cdn.amazonlinux.com/443" 2>/dev/null && CONNECTED=true && break; \ + sleep 2; \ + done; \ + echo "BOOTSTRAP: TCP connected=$CONNECTED"; \ + echo "BOOTSTRAP: hosts=$(grep cdn.amazon /etc/hosts 2>/dev/null || echo none)"; \ + echo "BOOTSTRAP: installing openssh-server"; \ + if command -v yum >/dev/null 2>&1; then \ + yum install -y openssh-server; \ + elif command -v apt-get >/dev/null 2>&1; then \ + DEBIAN_FRONTEND=noninteractive apt-get install -y openssh-server; \ + fi' +ExecStartPost=/bin/bash -c 'systemctl enable --now sshd 2>/dev/null || systemctl enable --now ssh 2>/dev/null || true' +ExecStartPost=/bin/bash -c 'touch /root/.qemu-sshd-bootstrapped' +RemainAfterExit=yes +StandardOutput=journal+console +StandardError=journal+console + +[Install] +WantedBy=multi-user.target +SVCEOF + ln -sf /etc/systemd/system/qemu-bootstrap-sshd.service \ + "$ROOTFS_DIR/etc/systemd/system/multi-user.target.wants/qemu-bootstrap-sshd.service" + fi + + printf '/dev/vda / ext4 defaults 0 1\ntmpfs /tmp tmpfs defaults 0 0\n' \ + > "$ROOTFS_DIR/etc/fstab" + + # ── Fetch kernel from LXC donor disk image (lxc-disk mode) ────────────── + if [[ "$P_KERNEL_MODE" == "lxc-disk" ]]; then + [[ -z "$P_KERNEL_DONOR_SPEC" ]] && error "lxc-disk mode requires a KERNEL_DONOR_SPEC" + step "Fetching kernel from LXC donor: $P_KERNEL_DONOR_SPEC..." + + IFS='/' read -r D_DISTRO D_RELEASE D_ARCH D_VARIANT <<< "$P_KERNEL_DONOR_SPEC" + D_VARIANT="${D_VARIANT:-cloud}" + DONOR_URL_BASE="$LXC_BASE/$D_DISTRO/$D_RELEASE/$D_ARCH/$D_VARIANT" + + DONOR_BUILD=$(curl -sf "$DONOR_URL_BASE/" \ + | grep -oE '[0-9]{8}_[0-9]{2}:[0-9]{2}' \ + | sort | tail -1) || true + [[ -z "$DONOR_BUILD" ]] && error "No builds found for kernel donor at $DONOR_URL_BASE/" + info "Donor build: $DONOR_BUILD" + + DONOR_QCOW="$WORK_DIR/kernel-donor.qcow2" + step "Downloading donor disk.qcow2..." + curl -f --progress-bar -o "$DONOR_QCOW" "$DONOR_URL_BASE/$DONOR_BUILD/disk.qcow2" + + # Convert to raw so dd can extract partitions without losetup + DONOR_RAW="$WORK_DIR/kernel-donor.raw" + step "Converting donor image to raw..." + qemu-img convert -f qcow2 -O raw "$DONOR_QCOW" "$DONOR_RAW" + rm -f "$DONOR_QCOW" + + # Parse the partition table with Python3 — reads MBR/GPT bytes directly + # from the raw file, no sfdisk/losetup/block device access needed. + # Outputs one "START SIZE" line per partition (in 512-byte sectors). + VMLINUZ_FOUND=false + DONOR_PART="$WORK_DIR/donor-part.raw" + while IFS=' ' read -r START SIZE; do + [[ -z "$START" || -z "$SIZE" ]] && continue + dd if="$DONOR_RAW" of="$DONOR_PART" bs=512 \ + skip="$START" count="$SIZE" conv=sparse 2>/dev/null + # Kernel files may be at root level (separate /boot partition on RHEL-style) + # or inside /boot (single-partition Debian-style root). + LS_ROOT=$(debugfs -R "ls /" "$DONOR_PART" 2>/dev/null || true) + LS_BOOT=$(debugfs -R "ls /boot" "$DONOR_PART" 2>/dev/null || true) + KDIR="" + LS_OUT="" + if echo "$LS_ROOT" | grep -q "vmlinuz-"; then + KDIR="/"; LS_OUT="$LS_ROOT" + elif echo "$LS_BOOT" | grep -q "vmlinuz-"; then + KDIR="/boot/"; LS_OUT="$LS_BOOT" + else + continue + fi + step "Extracting kernel+initrd from partition at sector $START (kdir=$KDIR)..." + VMLINUZ_NAME=$(echo "$LS_OUT" | grep -oE 'vmlinuz-[^ <]+' | grep -v '\.hmac$' | sort -V | tail -1 || true) + # RHEL names it initramfs-*.img; Debian names it initrd.img-* + INITRD_NAME=$(echo "$LS_OUT" | grep -oE 'initramfs-[^ <]+\.img' | sort -V | tail -1 || true) + if [[ -z "$INITRD_NAME" ]]; then + INITRD_NAME=$(echo "$LS_OUT" | grep -oE 'initrd\.img-[^ <]+' | sort -V | tail -1 || true) + fi + [[ -z "$VMLINUZ_NAME" ]] && continue + debugfs -R "dump ${KDIR}${VMLINUZ_NAME} $WORK_DIR/vmlinuz" "$DONOR_PART" 2>/dev/null || true + [[ -s "$WORK_DIR/vmlinuz" ]] || { warn "debugfs dump of vmlinuz empty (tried ${KDIR}${VMLINUZ_NAME})"; continue; } + if [[ -n "$INITRD_NAME" ]]; then + debugfs -R "dump ${KDIR}${INITRD_NAME} $WORK_DIR/initrd.img" "$DONOR_PART" 2>/dev/null || true + fi + VMLINUZ_FOUND=true + info "Kernel: $VMLINUZ_NAME" + [[ -n "$INITRD_NAME" ]] && info "Initrd: $INITRD_NAME" + break + done < <(python3 - "$DONOR_RAW" <<'PYEOF' +import struct, sys + +def parse(path): + with open(path, 'rb') as f: + s0 = f.read(512) + if len(s0) < 512 or s0[510:512] != b'\x55\xaa': + return + if s0[446 + 4] == 0xEE: # protective MBR → GPT disk + with open(path, 'rb') as f: + f.seek(512); h = f.read(512) + if h[:8] != b'EFI PART': + return + elba = struct.unpack_from(' 0: + print(s, end - s + 1) + else: # MBR / DOS partition table + for i in range(4): + e = s0[446 + i * 16:462 + i * 16] + s, n = struct.unpack_from(' 0 and n > 0 and e[4] != 0: + print(s, n) + +parse(sys.argv[1]) +PYEOF +) + # When kdir=/boot/ the donor partition IS the root filesystem; + # /lib/modules/ lives on the same partition. LXC container rootfs + # images (AL2, etc.) ship no kernel modules because containers share + # the host kernel. Copy the donor's modules into the rootfs so that + # udev can load virtio_net (and other drivers) after switch_root. + if [[ "$VMLINUZ_FOUND" == "true" && "$KDIR" == "/boot/" && -n "$VMLINUZ_NAME" ]]; then + KERN_VER="${VMLINUZ_NAME#vmlinuz-}" + if [[ ! -d "$ROOTFS_DIR/lib/modules/$KERN_VER" ]]; then + step "Extracting kernel modules from donor (for $KERN_VER)..." + DONOR_MOD_TMP="$WORK_DIR/donor-mod-tmp" + rm -rf "$DONOR_MOD_TMP" + mkdir -p "$DONOR_MOD_TMP" + # debugfs rdump SOURCE DEST creates SOURCE's last path component + # as a subdirectory of DEST, so /lib/modules → DEST/modules/ + debugfs -R "rdump /lib/modules $DONOR_MOD_TMP" "$DONOR_PART" 2>/dev/null || true + DONOR_KERN_DIR="" + for _cand in \ + "$DONOR_MOD_TMP/modules/$KERN_VER" \ + "$DONOR_MOD_TMP/$KERN_VER"; do + [[ -d "$_cand" ]] && DONOR_KERN_DIR="$_cand" && break + done + if [[ -n "$DONOR_KERN_DIR" ]]; then + mkdir -p "$ROOTFS_DIR/lib/modules" + cp -a "$DONOR_KERN_DIR" "$ROOTFS_DIR/lib/modules/" + info " Kernel modules ($KERN_VER) installed in rootfs" + else + warn " Could not find /lib/modules/$KERN_VER in donor; virtio_net may not load" + fi + rm -rf "$DONOR_MOD_TMP" + fi + fi + + rm -f "$DONOR_PART" "$DONOR_RAW" + [[ "$VMLINUZ_FOUND" != "true" ]] && error "No vmlinuz found in donor disk image" + fi + + # ── Create disk image from rootfs directory (no mount needed) ───────── + step "Creating disk image via mkfs.ext4 -d (no losetup required)..." + RAW_IMAGE="$WORK_DIR/vm.raw" + DISK_SIZE="${VM_DISK_SIZE:-10G}" + truncate -s "$DISK_SIZE" "$RAW_IMAGE" + # Write the filesystem directly onto the raw file — no partition table needed + # since we boot with -kernel (root=/dev/vda, no partition suffix). + # This also eliminates the parted + losetup dependency entirely. + mkfs.ext4 -F -L root -d "$ROOTFS_DIR" "$RAW_IMAGE" + # Some e2fsprogs versions size the filesystem to directory contents rather + # than the full file when -d is used; resize2fs corrects that. + resize2fs "$RAW_IMAGE" &>/dev/null || true + + step "Converting to qcow2..." + qemu-img convert -f raw -O qcow2 "$RAW_IMAGE" "$DISK_IMAGE" + rm -f "$RAW_IMAGE" + info "Disk image ready: $DISK_IMAGE" + fi + + # Build QEMU command for direct kernel boot + QEMU_KERNEL="$WORK_DIR/vmlinuz" + QEMU_INITRD="$WORK_DIR/initrd.img" + [[ -f "$QEMU_KERNEL" ]] || error "Kernel not found at $QEMU_KERNEL" + + # net.ifnames=0: use eth0 naming so NetworkManager/ifcfg finds the NIC + KERNEL_APPEND="root=/dev/vda rw console=ttyS0,115200n8 net.ifnames=0 biosdevname=0 ip=10.0.2.15::10.0.2.2:255.255.255.0::eth0:off" + + QEMU_CMD=( + "$QEMU_BIN" + "${QEMU_MACHINE_ARGS[@]}" + -m "$VM_MEMORY" + -smp "$VM_CPUS" + -kernel "$QEMU_KERNEL" + -append "$KERNEL_APPEND" + -drive "file=${DISK_IMAGE},if=virtio,format=qcow2" + -netdev "user,id=net0,hostfwd=tcp::${SSH_HOST_PORT}-:22" + -device virtio-net-pci,netdev=net0 + -display none + -serial "file:${WORK_DIR}/console.log" + ) + [[ -f "$QEMU_INITRD" ]] && QEMU_CMD+=(-initrd "$QEMU_INITRD") + [[ -n "$UEFI_FIRMWARE" ]] && QEMU_CMD+=(-bios "$UEFI_FIRMWARE") + if [[ -e /dev/kvm ]]; then + QEMU_CMD+=(-enable-kvm) + else + # Without KVM (macOS/Rosetta), QEMU uses TCG software emulation. The + # default 32 MB JIT translation-block cache fills up during a heavy + # dracut initrd boot (full systemd init), and the subsequent forced + # tb_flush triggers mprotect(PROT_WRITE) on exec pages — which Rosetta + # blocks with ENOMEM. A 512 MB cache avoids eviction entirely. + QEMU_CMD+=(-accel tcg,tb-size=512) + fi + +fi + +# ── Boot ────────────────────────────────────────────────────────────────────── +step "Booting VM..." +info "QEMU: ${QEMU_CMD[*]}" +> "$WORK_DIR/console.log" +"${QEMU_CMD[@]}" & +QEMU_PID=$! +info "QEMU PID: $QEMU_PID" + +# ── Wait for SSH ────────────────────────────────────────────────────────────── +# rootfs mode may need extra time on first boot to install openssh-server via yum/apt. +SSH_MAX_WAIT=60 +[[ "$P_USERSPACE_FILE" != "disk.qcow2" ]] && SSH_MAX_WAIT=120 +step "Waiting for VM SSH (up to $((SSH_MAX_WAIT * 5 / 60)) min)..." +SSH_CONNECTED=false +for i in $(seq 1 $SSH_MAX_WAIT); do + if ssh "${SSH_OPTS[@]}" root@localhost "echo ok" &>/dev/null; then + SSH_CONNECTED=true + info "SSH connected (${i}×5 s = $((i*5)) s elapsed)." + break + fi + sleep 5 + if ! kill -0 "$QEMU_PID" 2>/dev/null; then + warn "QEMU exited unexpectedly. Boot console:" + cat "$WORK_DIR/console.log" >&2 + error "VM boot failed." + fi +done + +if [[ "$SSH_CONNECTED" != "true" ]]; then + warn "SSH timeout. Boot console:" + cat "$WORK_DIR/console.log" >&2 + error "Could not SSH into VM after $((SSH_MAX_WAIT * 5 / 60)) minutes." +fi + +# ── Copy working directory into VM ─────────────────────────────────────────── +step "Copying workdir to VM at /mnt/host..." +ssh "${SSH_OPTS[@]}" root@localhost "mkdir -p /mnt/host" +scp -r -P "$SSH_HOST_PORT" -i "$WORK_DIR/vm_key" \ + -o StrictHostKeyChecking=no -o BatchMode=yes -o LogLevel=ERROR \ + "$HOST_WORKDIR/." root@localhost:/mnt/host/ + +# ── Install Swift toolchain ─────────────────────────────────────────────────── +if [[ "$NO_SWIFT" != "true" ]]; then + step "Installing Swift toolchain via swiftly..." + ssh "${SSH_OPTS[@]}" root@localhost \ + "cd /mnt/host && bash scripts/prep-linux-swift.sh --install-swiftly" +fi + +# ── Run guest command ───────────────────────────────────────────────────────── +step "Running guest command: $GUEST_COMMAND" +ssh "${SSH_OPTS[@]}" root@localhost bash < Date: Sat, 30 May 2026 08:08:23 -0400 Subject: [PATCH 02/32] Code cleanup and fix various checks --- .github/workflows/pull_request.yml | 4 ++-- scripts/prep-linux-swift.sh | 13 ++++++++++++- scripts/test-using-qemu.sh | 22 +++++++++++++++++++--- 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index d0ce7376..bde0c945 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -64,8 +64,8 @@ jobs: strategy: fail-fast: false matrix: - # These are specific distro and kernel versions that the test qemu script supports from here: https://images.linuxcontainers.org - dist-kern: ["al2-4.18", "al2-5.10"] + # These are specific distro and kernel versions that the test qemu script supports from here: https://images.linuxcontainers.org + dist-kern: ["al2-4.18", "al2-5.10"] container: image: ubuntu:24.04 steps: diff --git a/scripts/prep-linux-swift.sh b/scripts/prep-linux-swift.sh index 3175a238..60a9447e 100755 --- a/scripts/prep-linux-swift.sh +++ b/scripts/prep-linux-swift.sh @@ -1,4 +1,14 @@ #!/bin/bash +##===----------------------------------------------------------------------===## +## +## This source file is part of the Swift.org open source project +## +## Copyright (c) 2026 Apple Inc. and the Swift project authors +## Licensed under Apache License v2.0 with Runtime Library Exception +## +## See https://swift.org/LICENSE.txt for license information +## +##===----------------------------------------------------------------------===## # This script does a bit of extra preparation of the docker containers used to run the GitHub workflows # that are specific to this project's needs when building/testing. Note that this script runs on @@ -31,7 +41,8 @@ done if [ "$installSwiftly" == true ]; then echo "Installing swiftly" - curl -O https://download.swift.org/swiftly/linux/swiftly-$(uname -m).tar.gz && tar zxf swiftly-*.tar.gz && ./swiftly init -y --skip-install + curl -O "https://download.swift.org/swiftly/linux/swiftly-$(uname -m).tar.gz" && tar zxf swiftly-*.tar.gz && ./swiftly init -y --skip-install + # shellcheck source=/dev/null . "/root/.local/share/swiftly/env.sh" hash -r diff --git a/scripts/test-using-qemu.sh b/scripts/test-using-qemu.sh index 25246112..70108f28 100755 --- a/scripts/test-using-qemu.sh +++ b/scripts/test-using-qemu.sh @@ -1,4 +1,14 @@ #!/usr/bin/env bash +##===----------------------------------------------------------------------===## +## +## This source file is part of the Swift.org open source project +## +## Copyright (c) 2026 Apple Inc. and the Swift project authors +## Licensed under Apache License v2.0 with Runtime Library Exception +## +## See https://swift.org/LICENSE.txt for license information +## +##===----------------------------------------------------------------------===## # test-using-qemu.sh # # Boots a Linux VM under QEMU to run commands (default: swift test) against a @@ -16,7 +26,6 @@ set -euo pipefail -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" LXC_BASE="https://images.linuxcontainers.org/images" LXC_GPG_KEY_IDS=( "602F567663359FCDE9BCD0E79F93B4C4F3D4444A" @@ -214,6 +223,7 @@ case "$ARCH" in # BNDMOV/WRPKRU instructions; under macOS Rosetta this fills the JIT page # budget and triggers mprotect(ENOMEM) → SIGTRAP. Neither feature is needed # by Swift/LLVM, and Linux 5.6+ already dropped MPX support. + # shellcheck disable=SC2054 QEMU_MACHINE_ARGS=(-machine q35 -cpu max,mpx=off,pku=off) UEFI_PKG="" UEFI_FIRMWARE_SEARCH=() @@ -402,6 +412,7 @@ USERDATA info "Overlay resized to $VM_DISK_SIZE (cloud-init will expand partition on first boot)" fi + # shellcheck disable=SC2054 QEMU_CMD=( "$QEMU_BIN" "${QEMU_MACHINE_ARGS[@]}" @@ -418,6 +429,7 @@ USERDATA if [[ -e /dev/kvm ]]; then QEMU_CMD+=(-enable-kvm) else + # shellcheck disable=SC2054 QEMU_CMD+=(-accel tcg,tb-size=512) fi @@ -505,7 +517,7 @@ print(r[0][4][0]) fi step "Rootfs network/repo state:" - info " resolv.conf: $(cat "$ROOTFS_DIR/etc/resolv.conf" 2>/dev/null | tr '\n' ' ')" + info " resolv.conf: $(tr '\n' ' ' < "$ROOTFS_DIR/etc/resolv.conf" 2>/dev/null)" info " ifcfg-eth0: $(grep -s BOOTPROTO "$ROOTFS_DIR/etc/sysconfig/network-scripts/ifcfg-eth0" 2>/dev/null || echo 'absent')" info " yum repos: $(grep -rh 'mirrorlist\|baseurl' "$ROOTFS_DIR/etc/yum.repos.d/" 2>/dev/null | tr '\n' '|' || echo 'none')" info " yum vars: awsproto=$(cat "$ROOTFS_DIR/etc/yum/vars/awsproto" 2>/dev/null || echo 'unset') amazonlinux=$(cat "$ROOTFS_DIR/etc/yum/vars/amazonlinux" 2>/dev/null || echo 'unset') awsregion=$(cat "$ROOTFS_DIR/etc/yum/vars/awsregion" 2>/dev/null || echo 'unset') awsdomain=$(cat "$ROOTFS_DIR/etc/yum/vars/awsdomain" 2>/dev/null || echo 'unset')" @@ -811,6 +823,7 @@ PYEOF # net.ifnames=0: use eth0 naming so NetworkManager/ifcfg finds the NIC KERNEL_APPEND="root=/dev/vda rw console=ttyS0,115200n8 net.ifnames=0 biosdevname=0 ip=10.0.2.15::10.0.2.2:255.255.255.0::eth0:off" + # shellcheck disable=SC2054 QEMU_CMD=( "$QEMU_BIN" "${QEMU_MACHINE_ARGS[@]}" @@ -834,6 +847,7 @@ PYEOF # dracut initrd boot (full systemd init), and the subsequent forced # tb_flush triggers mprotect(PROT_WRITE) on exec pages — which Rosetta # blocks with ENOMEM. A 512 MB cache avoids eviction entirely. + # shellcheck disable=SC2054 QEMU_CMD+=(-accel tcg,tb-size=512) fi @@ -842,7 +856,7 @@ fi # ── Boot ────────────────────────────────────────────────────────────────────── step "Booting VM..." info "QEMU: ${QEMU_CMD[*]}" -> "$WORK_DIR/console.log" +: > "$WORK_DIR/console.log" "${QEMU_CMD[@]}" & QEMU_PID=$! info "QEMU PID: $QEMU_PID" @@ -889,6 +903,8 @@ fi # ── Run guest command ───────────────────────────────────────────────────────── step "Running guest command: $GUEST_COMMAND" +# $GUEST_COMMAND is intentionally expanded on the client side. +# shellcheck disable=SC2087 ssh "${SSH_OPTS[@]}" root@localhost bash < Date: Sat, 30 May 2026 08:20:09 -0400 Subject: [PATCH 03/32] Fix remaining license header check failure --- scripts/prep-linux-swift.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/prep-linux-swift.sh b/scripts/prep-linux-swift.sh index 60a9447e..df07ab7d 100755 --- a/scripts/prep-linux-swift.sh +++ b/scripts/prep-linux-swift.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash ##===----------------------------------------------------------------------===## ## ## This source file is part of the Swift.org open source project From 05d9966e780cb39e5bf75f26f54766069e88d9c0 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Sat, 30 May 2026 08:21:29 -0400 Subject: [PATCH 04/32] Fix remaining license header check failure --- scripts/prep-linux-swift.sh | 2 +- scripts/test-using-qemu.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/prep-linux-swift.sh b/scripts/prep-linux-swift.sh index df07ab7d..60a9447e 100755 --- a/scripts/prep-linux-swift.sh +++ b/scripts/prep-linux-swift.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash +#!/bin/bash ##===----------------------------------------------------------------------===## ## ## This source file is part of the Swift.org open source project diff --git a/scripts/test-using-qemu.sh b/scripts/test-using-qemu.sh index 70108f28..7a575494 100755 --- a/scripts/test-using-qemu.sh +++ b/scripts/test-using-qemu.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash +#!/bin/bash ##===----------------------------------------------------------------------===## ## ## This source file is part of the Swift.org open source project From c33326bb2ac2e4880f4dcd337c968ba9b7fd834a Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Sat, 30 May 2026 12:49:55 -0400 Subject: [PATCH 05/32] Fix tests so that they work in a slow qemu environment --- Tests/SubprocessTests/UnixTests.swift | 28 +++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/Tests/SubprocessTests/UnixTests.swift b/Tests/SubprocessTests/UnixTests.swift index fd28d298..9164424b 100644 --- a/Tests/SubprocessTests/UnixTests.swift +++ b/Tests/SubprocessTests/UnixTests.swift @@ -591,7 +591,8 @@ extension SubprocessUnixTests { // // Spawns 16 `bash` children, each with a SIGTERM trap that sleeps one second // before exiting. Every body closure sends SIGTERM at roughly the same instant, - // so all 16 children finish their trap and become zombies inside a single small. + // so all 16 children finish their trap and become zombies inside a single small + // window. @Test(.requiresBash) func testConcurrentSlowExitsDoNotHang() async throws { // 16 concurrent slow-to-exit children is enough to flood the // monitor and trigger the burst on Linux. @@ -600,7 +601,13 @@ extension SubprocessUnixTests { // POSIX sh) is required: `sh -c 'trap ...; sleep 300'` would defer // the trap until `sleep 300` completes, while bash interrupts // `wait` immediately on signal. - let script = "trap 'sleep 1; exit 0' TERM; sleep 300 & wait" + // + // `echo x` prints a readiness byte to stdout after the trap is + // installed. bash flushes stdio before forking `sleep 300`, so + // by the time the parent reads that byte the trap is guaranteed + // to be in place. This replaces the fixed 100 ms sleep which is + // too short on slow systems (e.g. QEMU without KVM). + let script = "trap 'sleep 1; exit 0' TERM; echo x; sleep 300 & wait" try await withThrowingTaskGroup(of: TerminationStatus.self) { group in for _ in 0.. Date: Sat, 30 May 2026 16:07:43 -0400 Subject: [PATCH 06/32] Improved ergonomics of test script for interactive debugging --- .github/workflows/pull_request.yml | 2 +- scripts/test-using-qemu.sh | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index bde0c945..7e50862f 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -72,7 +72,7 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 - name: Run Test - run: bash -c './scripts/test-using-qemu.sh ${{ matrix.dist-kern }}' + run: bash -c './scripts/test-using-qemu.sh ${{ matrix.dist-kern }} -- swift test --no-parallel' soundness: name: Soundness diff --git a/scripts/test-using-qemu.sh b/scripts/test-using-qemu.sh index 7a575494..ddc0bc9f 100755 --- a/scripts/test-using-qemu.sh +++ b/scripts/test-using-qemu.sh @@ -138,7 +138,7 @@ PROFILE (default: al2-5.10): GUEST COMMAND (after --): Runs as root inside the VM, in /mnt/host, with Swift on PATH. - Default: swift test + Default: bash (interactive, when stdin is a terminal) or swift test (otherwise) ENVIRONMENT: VM_MEMORY VM_CPUS SSH_PORT KEEP_WORK WORK_DIR VM_DISK_SIZE TMPDIR Note: use "sudo VAR=val ./$(basename "$0")" — plain "VAR=val sudo ..." is stripped by sudo. @@ -158,7 +158,8 @@ EOF # ── Argument parsing ────────────────────────────────────────────────────────── HOST_WORKDIR="$(pwd)" PROFILE_NAME="al2-5.10" -GUEST_COMMAND="swift test" +# Default to an interactive shell when stdin is a terminal, swift test otherwise +[[ -t 0 ]] && GUEST_COMMAND="bash" || GUEST_COMMAND="swift test" KEEP_IMAGE=false SKIP_INSTALL=false NO_SWIFT=false @@ -903,12 +904,20 @@ fi # ── Run guest command ───────────────────────────────────────────────────────── step "Running guest command: $GUEST_COMMAND" -# $GUEST_COMMAND is intentionally expanded on the client side. -# shellcheck disable=SC2087 -ssh "${SSH_OPTS[@]}" root@localhost bash < Date: Sat, 30 May 2026 17:18:05 -0400 Subject: [PATCH 07/32] Unlock subprocess fork lock after failures in clone3 --- Sources/_SubprocessCShims/process_shims.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Sources/_SubprocessCShims/process_shims.c b/Sources/_SubprocessCShims/process_shims.c index 76f46d72..f699dad3 100644 --- a/Sources/_SubprocessCShims/process_shims.c +++ b/Sources/_SubprocessCShims/process_shims.c @@ -576,6 +576,7 @@ int _subprocess_fork_exec( if (rc != 0) { close(pipefd[0]); close(pipefd[1]); + pthread_mutex_unlock(&_subprocess_fork_lock); return errno; } @@ -594,6 +595,7 @@ int _subprocess_fork_exec( // Report all other errors close(pipefd[0]); close(pipefd[1]); + pthread_mutex_unlock(&_subprocess_fork_lock); return errno; } } @@ -602,6 +604,7 @@ int _subprocess_fork_exec( // Fork failed close(pipefd[0]); close(pipefd[1]); + pthread_mutex_unlock(&_subprocess_fork_lock); return errno; } @@ -758,6 +761,7 @@ int _subprocess_fork_exec( // Restore old signmask rc = pthread_sigmask(SIG_SETMASK, &old_sigmask, NULL); if (rc != 0) { + pthread_mutex_unlock(&_subprocess_fork_lock); reap_child_process_and_return_errno; } From 80d682ab4d65fff2de2f8aff03021b1fe7f51d7f Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Sun, 31 May 2026 08:43:19 -0400 Subject: [PATCH 08/32] Close the pidfd once epoll no longer references it --- Sources/Subprocess/Platforms/Subprocess+Linux.swift | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Sources/Subprocess/Platforms/Subprocess+Linux.swift b/Sources/Subprocess/Platforms/Subprocess+Linux.swift index 198c97b8..953b79bc 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Linux.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Linux.swift @@ -456,6 +456,10 @@ private func _unregisterProcessDescriptorAndNotify(_ pidfd: CInt, context: Monit ) return (continuationList, error) } + // Close the pidfd now that epoll no longer references it. + // reapProcess uses P_PID (not P_PIDFD) so the fd is not + // needed past this point. + _ = Glibc.close(pidfd) return (continuationList, nil) } From 0943dca73bb4ca52d18d8bd6cc6bbfe6fa8f23ee Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Sun, 31 May 2026 12:44:15 -0400 Subject: [PATCH 09/32] Revert closing pidfds, adjust concurrency limit used for tests --- Sources/Subprocess/Platforms/Subprocess+Linux.swift | 9 +++++---- Tests/SubprocessTests/UnixTests.swift | 10 ++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/Sources/Subprocess/Platforms/Subprocess+Linux.swift b/Sources/Subprocess/Platforms/Subprocess+Linux.swift index 953b79bc..ff13de67 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Linux.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Linux.swift @@ -456,10 +456,11 @@ private func _unregisterProcessDescriptorAndNotify(_ pidfd: CInt, context: Monit ) return (continuationList, error) } - // Close the pidfd now that epoll no longer references it. - // reapProcess uses P_PID (not P_PIDFD) so the fd is not - // needed past this point. - _ = Glibc.close(pidfd) + // The pidfd is intentionally left open here. It is owned by + // ProcessIdentifier and will be closed by processIdentifier.close() + // in the defer in Configuration.swift once monitoring is fully done. + // Closing it here would free the fd number and allow it to be recycled + // before that defer runs, causing a close-the-wrong-fd race. return (continuationList, nil) } diff --git a/Tests/SubprocessTests/UnixTests.swift b/Tests/SubprocessTests/UnixTests.swift index 9164424b..df50da5f 100644 --- a/Tests/SubprocessTests/UnixTests.swift +++ b/Tests/SubprocessTests/UnixTests.swift @@ -714,10 +714,12 @@ extension SubprocessUnixTests { // Constrain to an ultimate upper limit of 4096, since Docker containers can have limits like 2^20 which is a bit too high for this test. // Common defaults are 2560 for macOS and 1024 for Linux. let limit = min(ulimit, 4096) - // Since we open two pipes per `run`, launch - // limit / 4 subprocesses should reveal any - // file descriptor leaks - let maxConcurrent = limit / 4 + // Each concurrent spawn holds both ends of the stdout and stderr pipes + // plus a temporary exec-error notification pipe while the child's exec() + // completes — roughly 6 fds per in-flight spawn regardless of whether + // stdin is connected. Divide by 8 to leave headroom for the process's + // own fds and avoid EMFILE under high concurrency. + let maxConcurrent = limit / 8 try await withThrowingTaskGroup(of: Void.self) { group in var running = 0 let byteCount = 1000 From 683b38dda2de056a6ee956306399040d65e2132a Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Sun, 31 May 2026 16:31:16 -0400 Subject: [PATCH 10/32] Better handling for epoll errors --- .../Platforms/Subprocess+Linux.swift | 35 ++++++++----------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/Sources/Subprocess/Platforms/Subprocess+Linux.swift b/Sources/Subprocess/Platforms/Subprocess+Linux.swift index ff13de67..0350f340 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Linux.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Linux.swift @@ -430,8 +430,7 @@ internal func _setupMonitorSignalHandler() { } private func _unregisterProcessDescriptorAndNotify(_ pidfd: CInt, context: MonitorThreadContext) { - // Remove the continuation - let result = _processMonitorState.withLock { state -> (continuations: [CheckedContinuation], error: SubprocessError?)? in + let continuations = _processMonitorState.withLock { state -> [CheckedContinuation]? in guard case .started(let storage) = state, let continuationList = storage.continuations[pidfd] else { @@ -442,41 +441,35 @@ private func _unregisterProcessDescriptorAndNotify(_ pidfd: CInt, context: Monit newStorage.continuations.removeValue(forKey: pidfd) state = .started(newStorage) - // Remove this pidfd from epoll to prevent further notifications - let rc = epoll_ctl( + // Remove this pidfd from epoll to prevent further notifications. + // Ignore the return value: if DEL fails (e.g., ENOENT due to a + // concurrent removal, or a transient kernel error on older 5.x kernels), + // the process has still exited and the continuation must be resumed + // normally. The fd is removed from epoll automatically by the kernel + // when processIdentifier.close() closes it, so a failed DEL here is + // never permanent. Propagating a DEL error as a monitoring failure + // would trigger onCleanup → SIGKILL against an already-dead process. + _ = epoll_ctl( context.epollFileDescriptor, EPOLL_CTL_DEL, pidfd, nil ) - if rc != 0 { - let epollErrno = errno - let error = SubprocessError.failedToMonitor( - withUnderlyingError: Errno(rawValue: epollErrno) - ) - return (continuationList, error) - } // The pidfd is intentionally left open here. It is owned by // ProcessIdentifier and will be closed by processIdentifier.close() // in the defer in Configuration.swift once monitoring is fully done. // Closing it here would free the fd number and allow it to be recycled // before that defer runs, causing a close-the-wrong-fd race. - return (continuationList, nil) + return continuationList } - guard let result else { + guard let continuations else { return } - if let error = result.error { - for c in result.continuations { - c.resume(throwing: error) - } - } else { - for c in result.continuations { - c.resume() - } + for c in continuations { + c.resume() } } From b70141e5449e5193b77403c0dad90732a0b2f206 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Sun, 31 May 2026 18:50:42 -0400 Subject: [PATCH 11/32] Bump memory for test VM --- scripts/test-using-qemu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test-using-qemu.sh b/scripts/test-using-qemu.sh index ddc0bc9f..3e451775 100755 --- a/scripts/test-using-qemu.sh +++ b/scripts/test-using-qemu.sh @@ -34,7 +34,7 @@ LXC_GPG_KEY_IDS=( # ── Defaults ────────────────────────────────────────────────────────────────── WORK_DIR="${WORK_DIR:-${TMPDIR:-/tmp}/qemu-swift-$$}" -VM_MEMORY="${VM_MEMORY:-2048}" +VM_MEMORY="${VM_MEMORY:-4096}" VM_CPUS="${VM_CPUS:-2}" SSH_HOST_PORT="${SSH_PORT:-2222}" KEEP_WORK="${KEEP_WORK:-false}" From c540777bf43f6b19254f3b9fb193224149e38bab Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Sun, 31 May 2026 19:28:18 -0400 Subject: [PATCH 12/32] Gracefully handle epoll_ctl(DEL) failures, prevent single task failure cascades during testing Throttle thresholds for constrainted qemu environments --- Sources/Subprocess/Configuration.swift | 12 ++++++-- Tests/SubprocessTests/UnixTests.swift | 40 ++++++++++++++++---------- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/Sources/Subprocess/Configuration.swift b/Sources/Subprocess/Configuration.swift index d64b1b64..cbf65368 100644 --- a/Sources/Subprocess/Configuration.swift +++ b/Sources/Subprocess/Configuration.swift @@ -134,8 +134,16 @@ public struct Configuration: Sendable { if taskFinishFlag.addOne() == 1 { // The body closure hasn't finished but the child // process has terminated. Cancel all active - // AsyncIO now. - try AsyncIO.shared.cancelAsyncIO(for: processIdentifier) + // AsyncIO now. Use try? rather than try: the + // process has already exited, so any epoll DEL + // failure here is harmless — dropped continuations + // will terminate the I/O streams on their own. + // Propagating this error as monitorError would + // trigger onCleanup → SIGKILL on an already-dead + // process, and if Subprocess.run() then throws, + // the caller's task group cascades cancellation to + // other live processes. + try? AsyncIO.shared.cancelAsyncIO(for: processIdentifier) } return nil } catch { diff --git a/Tests/SubprocessTests/UnixTests.swift b/Tests/SubprocessTests/UnixTests.swift index df50da5f..1859f197 100644 --- a/Tests/SubprocessTests/UnixTests.swift +++ b/Tests/SubprocessTests/UnixTests.swift @@ -725,24 +725,34 @@ extension SubprocessUnixTests { let byteCount = 1000 for _ in 0..&2"#, "--", String(repeating: "X", count: byteCount), - ], - output: .data(limit: .max), - error: .data(limit: .max) - ) - guard r.terminationStatus.isSuccess else { - Issue.record("Unexpected exit \(r.terminationStatus) from \(r.processIdentifier)") - return + // Catch errors so a single spawn/monitor failure doesn't + // cascade-cancel sibling tasks (which would SIGKILL their + // live subprocesses and flood the log with false failures). + do { + // This invocation specifically requires bash semantics; sh (on FreeBSD at least) does not consistently support -s in this way + let r = try await Subprocess.run( + .name("bash"), + arguments: [ + "-sc", #"echo "$1" && echo "$1" >&2"#, "--", String(repeating: "X", count: byteCount), + ], + output: .data(limit: .max), + error: .data(limit: .max) + ) + guard r.terminationStatus.isSuccess else { + Issue.record("Unexpected exit \(r.terminationStatus) from \(r.processIdentifier)") + return + } + #expect(r.standardOutput.count == byteCount + 1, "\(r.standardOutput)") + #expect(r.standardError.count == byteCount + 1, "\(r.standardError)") + } catch { + Issue.record("Subprocess.run threw: \(error)") } - #expect(r.standardOutput.count == byteCount + 1, "\(r.standardOutput)") - #expect(r.standardError.count == byteCount + 1, "\(r.standardError)") } running += 1 - if running >= maxConcurrent / 4 { + // Throttle to maxConcurrent/8 live subprocesses at a time + // (rather than /4) to reduce peak memory pressure on + // memory-constrained kernel-testing VMs (e.g. QEMU + 5.10). + if running >= maxConcurrent / 8 { try await group.next() } } From 6db447a3c48261886fdd6cf6412b85607ced02dd Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Sun, 31 May 2026 20:09:13 -0400 Subject: [PATCH 13/32] Add EMFILE and ENFILE to the ENOSYS fallback condition --- Sources/_SubprocessCShims/process_shims.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Sources/_SubprocessCShims/process_shims.c b/Sources/_SubprocessCShims/process_shims.c index f699dad3..5afe07fb 100644 --- a/Sources/_SubprocessCShims/process_shims.c +++ b/Sources/_SubprocessCShims/process_shims.c @@ -585,8 +585,9 @@ int _subprocess_fork_exec( // First attempt to create a process file descriptor on supported platforms, only fall back to fork if those are not available pid_t childPid = _subprocess_pdfork(&_pidfd); if (childPid < 0) { - if (errno == ENOSYS) { - // process file descriptor is not implemented. Use fork instead + if (errno == ENOSYS || errno == EMFILE || errno == ENFILE) { + // pidfd not available (ENOSYS) or no fd slots available (EMFILE/ENFILE); + // fall back to plain fork() without a pidfd. #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated" childPid = fork(); From c3c96e678e4617e4ce5ac9749f5f89bc38a35e37 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Mon, 1 Jun 2026 07:08:11 -0400 Subject: [PATCH 14/32] Revert change to catch EMFILE/ENFILE errors on pdfork --- Sources/_SubprocessCShims/process_shims.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Sources/_SubprocessCShims/process_shims.c b/Sources/_SubprocessCShims/process_shims.c index 5afe07fb..f699dad3 100644 --- a/Sources/_SubprocessCShims/process_shims.c +++ b/Sources/_SubprocessCShims/process_shims.c @@ -585,9 +585,8 @@ int _subprocess_fork_exec( // First attempt to create a process file descriptor on supported platforms, only fall back to fork if those are not available pid_t childPid = _subprocess_pdfork(&_pidfd); if (childPid < 0) { - if (errno == ENOSYS || errno == EMFILE || errno == ENFILE) { - // pidfd not available (ENOSYS) or no fd slots available (EMFILE/ENFILE); - // fall back to plain fork() without a pidfd. + if (errno == ENOSYS) { + // process file descriptor is not implemented. Use fork instead #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated" childPid = fork(); From e086ce20d449c357f5b8d9bc64542350de7b5bdd Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Mon, 1 Jun 2026 08:45:26 -0400 Subject: [PATCH 15/32] Fix two pidfd leak cases, and calculate available concurrency by reading ulimit directly on Linux --- .../Platforms/Subprocess+Linux.swift | 1 + .../Platforms/Subprocess+Unix.swift | 6 +++ Tests/SubprocessTests/UnixTests.swift | 44 +++++++++---------- 3 files changed, 27 insertions(+), 24 deletions(-) diff --git a/Sources/Subprocess/Platforms/Subprocess+Linux.swift b/Sources/Subprocess/Platforms/Subprocess+Linux.swift index 0350f340..48c2e06d 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Linux.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Linux.swift @@ -545,6 +545,7 @@ internal func _isWaitprocessDescriptorSupported() -> Bool { // If we can not retrieve pidfd, the system does not support waitid(P_PIDFD) return false } + defer { try? FileDescriptor(rawValue: selfPidfd).close() } /// The following call will fail either with /// - ECHILD: in this case we know P_PIDFD is supported and waitid correctly /// reported that we don't have a child with the same selfPidfd; diff --git a/Sources/Subprocess/Platforms/Subprocess+Unix.swift b/Sources/Subprocess/Platforms/Subprocess+Unix.swift index 87eafe71..b2e02282 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Unix.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Unix.swift @@ -526,6 +526,12 @@ extension Configuration { // Spawn error if spawnError != 0 { if [ENOENT, EACCES, ENOTDIR].contains(spawnError) { + // clone3(CLONE_PIDFD) allocates a pidfd before exec runs. + // If exec fails we retry with the next candidate path, so + // close the pidfd here to avoid leaking it across retries. + if processDescriptor > 0 { + try? FileDescriptor(rawValue: processDescriptor).close() + } // Move on to another possible path continue } diff --git a/Tests/SubprocessTests/UnixTests.swift b/Tests/SubprocessTests/UnixTests.swift index 1859f197..b05d55a7 100644 --- a/Tests/SubprocessTests/UnixTests.swift +++ b/Tests/SubprocessTests/UnixTests.swift @@ -695,31 +695,27 @@ internal func assertNewSessionCreated( extension SubprocessUnixTests { #if SubprocessFoundation @Test(.requiresBash) func testConcurrentRun() async throws { - // Launch as many processes as we can - // Figure out the max open file limit - let limitResult = try await Subprocess.run( - .path("/bin/sh"), - arguments: ["-c", "ulimit -n"], - output: .string(limit: 32) - ) - guard - let limitString = limitResult - .standardOutput? - .trimmingCharacters(in: .whitespacesAndNewlines), - let ulimit = Int(limitString) - else { - Issue.record("Failed to run ulimit -n") - return - } - // Constrain to an ultimate upper limit of 4096, since Docker containers can have limits like 2^20 which is a bit too high for this test. - // Common defaults are 2560 for macOS and 1024 for Linux. - let limit = min(ulimit, 4096) + // Read the soft fd limit directly rather than spawning a helper process. + // Cap at 4096: Docker containers can report limits like 2^20. + var rl = rlimit() + getrlimit(Int32(RLIMIT_NOFILE.rawValue), &rl) + let softLimit = Int(min(rl.rlim_cur, rlim_t(4096))) + + // On Linux, account for any fds already open (e.g. from prior tests in + // the same suite) to avoid hitting EMFILE during the concurrent spawn + // burst. /proc/self/fd lists every open descriptor; subtracting the + // current count plus a small margin gives the true available headroom. + #if os(Linux) + let currentFds = (try? FileManager.default.contentsOfDirectory(atPath: "/proc/self/fd"))?.count ?? 50 + let available = max(32, softLimit - currentFds - 50) + #else + let available = softLimit + #endif // Each concurrent spawn holds both ends of the stdout and stderr pipes - // plus a temporary exec-error notification pipe while the child's exec() - // completes — roughly 6 fds per in-flight spawn regardless of whether - // stdin is connected. Divide by 8 to leave headroom for the process's - // own fds and avoid EMFILE under high concurrency. - let maxConcurrent = limit / 8 + // plus a temporary exec-error notification pipe while the child's + // exec() completes — roughly 6–8 fds per in-flight spawn. Divide by + // 8 to leave headroom and avoid EMFILE under high concurrency. + let maxConcurrent = available / 8 try await withThrowingTaskGroup(of: Void.self) { group in var running = 0 let byteCount = 1000 From dcf84fa4f1fdcfc564a5d0d75e925fe9ca574688 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Mon, 1 Jun 2026 09:38:22 -0400 Subject: [PATCH 16/32] Fix compile error for RLIMIT_NOFILE usage with FreeBSD --- Tests/SubprocessTests/UnixTests.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tests/SubprocessTests/UnixTests.swift b/Tests/SubprocessTests/UnixTests.swift index b05d55a7..0c6cd443 100644 --- a/Tests/SubprocessTests/UnixTests.swift +++ b/Tests/SubprocessTests/UnixTests.swift @@ -698,7 +698,7 @@ extension SubprocessUnixTests { // Read the soft fd limit directly rather than spawning a helper process. // Cap at 4096: Docker containers can report limits like 2^20. var rl = rlimit() - getrlimit(Int32(RLIMIT_NOFILE.rawValue), &rl) + getrlimit(RLIMIT_NOFILE, &rl) let softLimit = Int(min(rl.rlim_cur, rlim_t(4096))) // On Linux, account for any fds already open (e.g. from prior tests in From c603101438feb6f3bd328347eb6987dcd4e1fd23 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Mon, 1 Jun 2026 09:58:02 -0400 Subject: [PATCH 17/32] Fix compile error for RLIMIT_NOFILE usage with FreeBSD --- Tests/SubprocessTests/UnixTests.swift | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Tests/SubprocessTests/UnixTests.swift b/Tests/SubprocessTests/UnixTests.swift index 0c6cd443..b96a48d3 100644 --- a/Tests/SubprocessTests/UnixTests.swift +++ b/Tests/SubprocessTests/UnixTests.swift @@ -698,7 +698,13 @@ extension SubprocessUnixTests { // Read the soft fd limit directly rather than spawning a helper process. // Cap at 4096: Docker containers can report limits like 2^20. var rl = rlimit() + #if canImport(Glibc) + // On Linux/Glibc RLIMIT_NOFILE is __rlimit_resource (enum) but + // getrlimit expects __rlimit_resource_t (Int32); rawValue converts it. + getrlimit(Int32(RLIMIT_NOFILE.rawValue), &rl) + #else getrlimit(RLIMIT_NOFILE, &rl) + #endif let softLimit = Int(min(rl.rlim_cur, rlim_t(4096))) // On Linux, account for any fds already open (e.g. from prior tests in From f099368f3e2bf5bbbb274161a170dbef31fe399a Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Mon, 1 Jun 2026 10:08:39 -0400 Subject: [PATCH 18/32] Fix compile error for RLIMIT_NOFILE usage with FreeBSD --- Sources/_SubprocessCShims/include/process_shims.h | 6 ++++++ Sources/_SubprocessCShims/process_shims.c | 8 ++++++++ Tests/SubprocessTests/UnixTests.swift | 14 ++++---------- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/Sources/_SubprocessCShims/include/process_shims.h b/Sources/_SubprocessCShims/include/process_shims.h index ac076d84..c6813bc8 100644 --- a/Sources/_SubprocessCShims/include/process_shims.h +++ b/Sources/_SubprocessCShims/include/process_shims.h @@ -17,6 +17,7 @@ #if !TARGET_OS_WINDOWS #include #include +#include #if _POSIX_SPAWN #include @@ -95,6 +96,11 @@ int _was_process_signaled(int status); int _get_signal_code(int status); int _was_process_suspended(int status); +/// Returns the soft RLIMIT_NOFILE value for the current process, or 0 on +/// error. Implemented in C so that RLIMIT_NOFILE always resolves to the +/// correct type regardless of how the Swift Glibc/Darwin overlay imports it. +uint64_t _subprocess_nofile_soft_limit(void); + void _subprocess_lock_environ(void); void _subprocess_unlock_environ(void); char * _Nullable * _Nullable _subprocess_get_environ(void); diff --git a/Sources/_SubprocessCShims/process_shims.c b/Sources/_SubprocessCShims/process_shims.c index f699dad3..b3a89d71 100644 --- a/Sources/_SubprocessCShims/process_shims.c +++ b/Sources/_SubprocessCShims/process_shims.c @@ -78,6 +78,14 @@ int _was_process_suspended(int status) { return WIFSTOPPED(status); } +uint64_t _subprocess_nofile_soft_limit(void) { + struct rlimit rl; + if (getrlimit(RLIMIT_NOFILE, &rl) != 0) { + return 0; + } + return (uint64_t)rl.rlim_cur; +} + int _subprocess_pthread_create( pthread_t * _Nonnull ptr, pthread_attr_t const * _Nullable attr, diff --git a/Tests/SubprocessTests/UnixTests.swift b/Tests/SubprocessTests/UnixTests.swift index b96a48d3..7176deba 100644 --- a/Tests/SubprocessTests/UnixTests.swift +++ b/Tests/SubprocessTests/UnixTests.swift @@ -695,17 +695,11 @@ internal func assertNewSessionCreated( extension SubprocessUnixTests { #if SubprocessFoundation @Test(.requiresBash) func testConcurrentRun() async throws { - // Read the soft fd limit directly rather than spawning a helper process. + // Read the soft fd limit via a C shim: RLIMIT_NOFILE's Swift type + // varies across platforms and Swift versions, so calling getrlimit + // directly from Swift is not reliably portable. // Cap at 4096: Docker containers can report limits like 2^20. - var rl = rlimit() - #if canImport(Glibc) - // On Linux/Glibc RLIMIT_NOFILE is __rlimit_resource (enum) but - // getrlimit expects __rlimit_resource_t (Int32); rawValue converts it. - getrlimit(Int32(RLIMIT_NOFILE.rawValue), &rl) - #else - getrlimit(RLIMIT_NOFILE, &rl) - #endif - let softLimit = Int(min(rl.rlim_cur, rlim_t(4096))) + let softLimit = Int(min(_subprocess_nofile_soft_limit(), UInt64(4096))) // On Linux, account for any fds already open (e.g. from prior tests in // the same suite) to avoid hitting EMFILE during the concurrent spawn From 7eeb59606c4e7d4bf320d31d58872fa6f69a1945 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Tue, 2 Jun 2026 07:00:31 -0400 Subject: [PATCH 19/32] Fix occasional test hangs on FreeBSD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thread.swift — WorkQueue.dequeue() now uses while queue.isEmpty && !isShuttingDown so spurious pthread_cond_wait wakeups (which POSIX permits, and FreeBSD exercises frequently) no longer permanently kill the worker thread. shutdown() sets isShuttingDown = true before signaling so the thread exits cleanly. Subprocess+BSD.swift — source.resume() now happens before peekIfExited(). The AtomicCounter prevents double-resume in the case where both the DispatchSource event handler and the backup peek fire for the same exit. The backup is still necessary because kqueue may not deliver NOTE_EXIT retroactively if the process was already a zombie at registration time. --- .../Subprocess/Platforms/Subprocess+BSD.swift | 40 +++++++++++++------ Sources/Subprocess/Thread.swift | 28 ++++++++----- 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/Sources/Subprocess/Platforms/Subprocess+BSD.swift b/Sources/Subprocess/Platforms/Subprocess+BSD.swift index 50030e52..ab021380 100644 --- a/Sources/Subprocess/Platforms/Subprocess+BSD.swift +++ b/Sources/Subprocess/Platforms/Subprocess+BSD.swift @@ -30,28 +30,44 @@ internal import Dispatch internal func waitForProcessTermination( for processIdentifier: ProcessIdentifier ) async throws(SubprocessError) { - // Fast path: if the process is already a zombie, return immediately. - // Using WNOWAIT leaves the zombie in place for the eventual `reapProcess`. - do throws(Errno) { - if try processIdentifier.peekIfExited() { - return - } - } catch { - throw .failedToMonitor(withUnderlyingError: error) - } - return try await _castError { try await withCheckedThrowingContinuation { continuation in + // Guard against double-resume: the event handler and the backup + // peekIfExited() check below can both fire for the same exit. + let alreadyResumed = AtomicCounter() let source = DispatchSource.makeProcessSource( identifier: processIdentifier.value, eventMask: [.exit], queue: .global() ) source.setEventHandler { - source.cancel() - continuation.resume() + if alreadyResumed.addOne() == 1 { + source.cancel() + continuation.resume() + } } + // Register the source BEFORE checking peekIfExited() to eliminate + // the TOCTOU race: if the process exits between the peek and resume() + // the NOTE_EXIT event would be lost and the continuation would hang. source.resume() + // Backup: if the process already exited before we registered the + // source above, kqueue may not fire NOTE_EXIT retroactively. + // Uses WNOWAIT so it doesn't reap the zombie (that's done elsewhere). + do throws(Errno) { + if try processIdentifier.peekIfExited() { + if alreadyResumed.addOne() == 1 { + source.cancel() + continuation.resume() + } + } + } catch { + if alreadyResumed.addOne() == 1 { + source.cancel() + continuation.resume( + throwing: SubprocessError.failedToMonitor(withUnderlyingError: error) + ) + } + } } } } diff --git a/Sources/Subprocess/Thread.swift b/Sources/Subprocess/Thread.swift index a678236a..9cc1280a 100644 --- a/Sources/Subprocess/Thread.swift +++ b/Sources/Subprocess/Thread.swift @@ -91,6 +91,7 @@ private struct BackgroundWorkItem { // exposed so we can use it with `pthread_cond_wait`. private final class WorkQueue: Sendable { private nonisolated(unsafe) var queue: [BackgroundWorkItem] + private nonisolated(unsafe) var isShuttingDown: Bool = false internal nonisolated(unsafe) let mutex: UnsafeMutablePointer internal nonisolated(unsafe) let waitCondition: UnsafeMutablePointer @@ -138,17 +139,25 @@ private final class WorkQueue: Sendable { return body(mutex, &queue) } - // Only called in worker thread. Sleeps the thread if there's no more item + // Only called in worker thread. Sleeps the thread if there's no more item. + // Uses an explicit while loop to handle spurious pthread_cond_wait wakeups, + // which are common on FreeBSD and permitted by POSIX. func dequeue() -> BackgroundWorkItem? { - return self.withUnsafeUnderlyingLock { queue in - // Sleep the worker thread if there's no more work - queue.isEmpty - } body: { mutex, queue in - guard !queue.isEmpty else { - return nil - } - return queue.removeFirst() + #if canImport(WinSDK) + EnterCriticalSection(self.mutex) + defer { LeaveCriticalSection(self.mutex) } + while queue.isEmpty && !isShuttingDown { + SleepConditionVariableCS(self.waitCondition, self.mutex, INFINITE) } + #else + pthread_mutex_lock(self.mutex) + defer { pthread_mutex_unlock(self.mutex) } + while queue.isEmpty && !isShuttingDown { + pthread_cond_wait(self.waitCondition, self.mutex) + } + #endif + guard !queue.isEmpty else { return nil } + return queue.removeFirst() } // Only called in parent thread. Signals wait condition to wake up worker thread @@ -165,6 +174,7 @@ private final class WorkQueue: Sendable { func shutdown() { self.withLock { queue in + isShuttingDown = true queue.removeAll() #if canImport(WinSDK) WakeConditionVariable(self.waitCondition) From 404067b24705514f40dd1f78b197e3d9baaef8ea Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Tue, 2 Jun 2026 09:14:30 -0400 Subject: [PATCH 20/32] Fix FreeBSD hang while testing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thread.swift — WorkQueue.dequeue() now uses while queue.isEmpty && !isShuttingDown with an explicit isShuttingDown flag, so spurious pthread_cond_wait wakeups (permitted by POSIX, common on FreeBSD) no longer permanently kill the single worker thread. Subprocess+BSD.swift — Replaced DispatchSource NOTE_EXIT entirely with blocking waitid(WEXITED | WNOWAIT) dispatched to DispatchQueue.global(). This closes the race at its root: the kernel holds waitid until the process exits regardless of whether it was already a zombie at call time, the zombie is preserved for reapProcess, and GCD's thread pool handles concurrent subprocess waits without serialisation. --- .../Subprocess/Platforms/Subprocess+BSD.swift | 49 +++++++------------ 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/Sources/Subprocess/Platforms/Subprocess+BSD.swift b/Sources/Subprocess/Platforms/Subprocess+BSD.swift index ab021380..d457ebee 100644 --- a/Sources/Subprocess/Platforms/Subprocess+BSD.swift +++ b/Sources/Subprocess/Platforms/Subprocess+BSD.swift @@ -30,39 +30,26 @@ internal import Dispatch internal func waitForProcessTermination( for processIdentifier: ProcessIdentifier ) async throws(SubprocessError) { + // Use blocking waitid(WNOWAIT) dispatched on a global queue rather than + // DispatchSource NOTE_EXIT. On FreeBSD (and macOS), kqueue does not + // retroactively deliver NOTE_EXIT if the process exits before the + // EVFILT_PROC filter is registered, and libdispatch registers that filter + // asynchronously — leaving an unavoidable TOCTOU window. Blocking waitid + // with WNOWAIT is race-free: the kernel holds the call until the process + // exits and the zombie is left intact for reapProcess. DispatchQueue.global + // is used instead of runOnBackgroundThread so concurrent subprocess waits + // are not serialised on the single worker thread. return try await _castError { - try await withCheckedThrowingContinuation { continuation in - // Guard against double-resume: the event handler and the backup - // peekIfExited() check below can both fire for the same exit. - let alreadyResumed = AtomicCounter() - let source = DispatchSource.makeProcessSource( - identifier: processIdentifier.value, - eventMask: [.exit], - queue: .global() - ) - source.setEventHandler { - if alreadyResumed.addOne() == 1 { - source.cancel() + try await withCheckedThrowingContinuation { (continuation: CheckedContinuation) in + DispatchQueue.global().async { + do throws(Errno) { + _ = try _waitid( + idtype: P_PID, + id: id_t(processIdentifier.value), + flags: WEXITED | WNOWAIT + ) continuation.resume() - } - } - // Register the source BEFORE checking peekIfExited() to eliminate - // the TOCTOU race: if the process exits between the peek and resume() - // the NOTE_EXIT event would be lost and the continuation would hang. - source.resume() - // Backup: if the process already exited before we registered the - // source above, kqueue may not fire NOTE_EXIT retroactively. - // Uses WNOWAIT so it doesn't reap the zombie (that's done elsewhere). - do throws(Errno) { - if try processIdentifier.peekIfExited() { - if alreadyResumed.addOne() == 1 { - source.cancel() - continuation.resume() - } - } - } catch { - if alreadyResumed.addOne() == 1 { - source.cancel() + } catch { continuation.resume( throwing: SubprocessError.failedToMonitor(withUnderlyingError: error) ) From d4cde49446229e0e9b10b5a727efc13855f5ca76 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Tue, 2 Jun 2026 09:51:36 -0400 Subject: [PATCH 21/32] Code review feedback --- .../Platforms/Subprocess+Linux.swift | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Sources/Subprocess/Platforms/Subprocess+Linux.swift b/Sources/Subprocess/Platforms/Subprocess+Linux.swift index 48c2e06d..0028d654 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Linux.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Linux.swift @@ -545,15 +545,17 @@ internal func _isWaitprocessDescriptorSupported() -> Bool { // If we can not retrieve pidfd, the system does not support waitid(P_PIDFD) return false } - defer { try? FileDescriptor(rawValue: selfPidfd).close() } - /// The following call will fail either with - /// - ECHILD: in this case we know P_PIDFD is supported and waitid correctly - /// reported that we don't have a child with the same selfPidfd; - /// - EINVAL: in this case we know P_PIDFD is not supported because it does not - /// recognize the `P_PIDFD` type - errno = 0 - waitid(idtype_t(UInt32(P_PIDFD)), id_t(selfPidfd), &siginfo, WEXITED | WNOWAIT) - return errno == ECHILD + + return try? FileDescriptor(rawValue: selfPidfd).closeAfter() { + /// The following call will fail either with + /// - ECHILD: in this case we know P_PIDFD is supported and waitid correctly + /// reported that we don't have a child with the same selfPidfd; + /// - EINVAL: in this case we know P_PIDFD is not supported because it does not + /// recognize the `P_PIDFD` type + errno = 0 + waitid(idtype_t(UInt32(P_PIDFD)), id_t(selfPidfd), &siginfo, WEXITED | WNOWAIT) + return errno == ECHILD + } } #endif // canImport(Glibc) || canImport(Android) || canImport(Musl) From 899b624986bb1e79ba7dce154c6488fca017a952 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Tue, 2 Jun 2026 10:00:57 -0400 Subject: [PATCH 22/32] Revert converting defer close to closeAfter --- .../Platforms/Subprocess+Linux.swift | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/Sources/Subprocess/Platforms/Subprocess+Linux.swift b/Sources/Subprocess/Platforms/Subprocess+Linux.swift index 0028d654..48c2e06d 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Linux.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Linux.swift @@ -545,17 +545,15 @@ internal func _isWaitprocessDescriptorSupported() -> Bool { // If we can not retrieve pidfd, the system does not support waitid(P_PIDFD) return false } - - return try? FileDescriptor(rawValue: selfPidfd).closeAfter() { - /// The following call will fail either with - /// - ECHILD: in this case we know P_PIDFD is supported and waitid correctly - /// reported that we don't have a child with the same selfPidfd; - /// - EINVAL: in this case we know P_PIDFD is not supported because it does not - /// recognize the `P_PIDFD` type - errno = 0 - waitid(idtype_t(UInt32(P_PIDFD)), id_t(selfPidfd), &siginfo, WEXITED | WNOWAIT) - return errno == ECHILD - } + defer { try? FileDescriptor(rawValue: selfPidfd).close() } + /// The following call will fail either with + /// - ECHILD: in this case we know P_PIDFD is supported and waitid correctly + /// reported that we don't have a child with the same selfPidfd; + /// - EINVAL: in this case we know P_PIDFD is not supported because it does not + /// recognize the `P_PIDFD` type + errno = 0 + waitid(idtype_t(UInt32(P_PIDFD)), id_t(selfPidfd), &siginfo, WEXITED | WNOWAIT) + return errno == ECHILD } #endif // canImport(Glibc) || canImport(Android) || canImport(Musl) From 8ff3fd412d3aaa95d5a83d03a221aebe5988de23 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Tue, 2 Jun 2026 11:44:53 -0400 Subject: [PATCH 23/32] Add detailed description for the continuation resume --- Sources/Subprocess/Platforms/Subprocess+Linux.swift | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Sources/Subprocess/Platforms/Subprocess+Linux.swift b/Sources/Subprocess/Platforms/Subprocess+Linux.swift index 48c2e06d..bd9ec3f0 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Linux.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Linux.swift @@ -468,6 +468,16 @@ private func _unregisterProcessDescriptorAndNotify(_ pidfd: CInt, context: Monit return } + // This function is only called because epoll fired an event for this pidfd, + // which means the process has definitively exited — monitoring succeeded. Any + // failure from the preceding epoll_ctl(DEL) is a cleanup detail, not a + // monitoring failure: on older 5.x kernels epoll_ctl(DEL) can return ENOENT + // transiently even when the process is dead, and on all kernels the pidfd is + // removed from epoll automatically when processIdentifier.close() closes it. + // Resuming the continuations with that DEL error would incorrectly signal a + // monitoring failure, which triggers onCleanup → SIGKILL against an already- + // dead process and would cause Subprocess.run() to throw, cascading task + // cancellation to other live processes. for c in continuations { c.resume() } From fbfd28f53ed4f139cbfbb1f98e7b979da61271d6 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Wed, 3 Jun 2026 06:26:04 -0400 Subject: [PATCH 24/32] Revert changes to Thread --- Sources/Subprocess/Thread.swift | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/Sources/Subprocess/Thread.swift b/Sources/Subprocess/Thread.swift index 9cc1280a..a678236a 100644 --- a/Sources/Subprocess/Thread.swift +++ b/Sources/Subprocess/Thread.swift @@ -91,7 +91,6 @@ private struct BackgroundWorkItem { // exposed so we can use it with `pthread_cond_wait`. private final class WorkQueue: Sendable { private nonisolated(unsafe) var queue: [BackgroundWorkItem] - private nonisolated(unsafe) var isShuttingDown: Bool = false internal nonisolated(unsafe) let mutex: UnsafeMutablePointer internal nonisolated(unsafe) let waitCondition: UnsafeMutablePointer @@ -139,25 +138,17 @@ private final class WorkQueue: Sendable { return body(mutex, &queue) } - // Only called in worker thread. Sleeps the thread if there's no more item. - // Uses an explicit while loop to handle spurious pthread_cond_wait wakeups, - // which are common on FreeBSD and permitted by POSIX. + // Only called in worker thread. Sleeps the thread if there's no more item func dequeue() -> BackgroundWorkItem? { - #if canImport(WinSDK) - EnterCriticalSection(self.mutex) - defer { LeaveCriticalSection(self.mutex) } - while queue.isEmpty && !isShuttingDown { - SleepConditionVariableCS(self.waitCondition, self.mutex, INFINITE) - } - #else - pthread_mutex_lock(self.mutex) - defer { pthread_mutex_unlock(self.mutex) } - while queue.isEmpty && !isShuttingDown { - pthread_cond_wait(self.waitCondition, self.mutex) + return self.withUnsafeUnderlyingLock { queue in + // Sleep the worker thread if there's no more work + queue.isEmpty + } body: { mutex, queue in + guard !queue.isEmpty else { + return nil + } + return queue.removeFirst() } - #endif - guard !queue.isEmpty else { return nil } - return queue.removeFirst() } // Only called in parent thread. Signals wait condition to wake up worker thread @@ -174,7 +165,6 @@ private final class WorkQueue: Sendable { func shutdown() { self.withLock { queue in - isShuttingDown = true queue.removeAll() #if canImport(WinSDK) WakeConditionVariable(self.waitCondition) From d542ed544647eb44419adf20328df05bf2138d77 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Wed, 3 Jun 2026 06:30:08 -0400 Subject: [PATCH 25/32] Revert vestigial BSD changes --- .../Subprocess/Platforms/Subprocess+BSD.swift | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/Sources/Subprocess/Platforms/Subprocess+BSD.swift b/Sources/Subprocess/Platforms/Subprocess+BSD.swift index f4966b32..ceeb11b1 100644 --- a/Sources/Subprocess/Platforms/Subprocess+BSD.swift +++ b/Sources/Subprocess/Platforms/Subprocess+BSD.swift @@ -58,15 +58,16 @@ private func _monitorTarget( internal func waitForProcessTermination( for processIdentifier: ProcessIdentifier ) async throws(SubprocessError) { - // Use blocking waitid(WNOWAIT) dispatched on a global queue rather than - // DispatchSource NOTE_EXIT. On FreeBSD (and macOS), kqueue does not - // retroactively deliver NOTE_EXIT if the process exits before the - // EVFILT_PROC filter is registered, and libdispatch registers that filter - // asynchronously — leaving an unavoidable TOCTOU window. Blocking waitid - // with WNOWAIT is race-free: the kernel holds the call until the process - // exits and the zombie is left intact for reapProcess. DispatchQueue.global - // is used instead of runOnBackgroundThread so concurrent subprocess waits - // are not serialised on the single worker thread. + // Fast path: if the process is already a zombie, return immediately. + // Using WNOWAIT leaves the zombie in place for the eventual `reapProcess`. + do throws(Errno) { + if try processIdentifier.peekIfExited() { + return + } + } catch { + throw .failedToMonitor(withUnderlyingError: error) + } + return try await _castError { return try await withCheckedThrowingContinuation { (continuation: CheckedContinuation) in let status = _processMonitorState.withLock { state -> Result? in From 3a3dcc7fcc36ff3e02dd20df1ec0529db94eac36 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Thu, 4 Jun 2026 14:03:21 -0400 Subject: [PATCH 26/32] Code review feedback --- Sources/Subprocess/Platforms/Subprocess+Unix.swift | 2 +- Tests/SubprocessTests/UnixTests.swift | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/Subprocess/Platforms/Subprocess+Unix.swift b/Sources/Subprocess/Platforms/Subprocess+Unix.swift index 89d4f676..2c1dfc62 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Unix.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Unix.swift @@ -531,7 +531,7 @@ extension Configuration { // clone3(CLONE_PIDFD) allocates a pidfd before exec runs. // If exec fails we retry with the next candidate path, so // close the pidfd here to avoid leaking it across retries. - if processDescriptor > 0 { + if processDescriptor >= 0 { try? FileDescriptor(rawValue: processDescriptor).close() } // Move on to another possible path diff --git a/Tests/SubprocessTests/UnixTests.swift b/Tests/SubprocessTests/UnixTests.swift index 691bcf9d..493e302b 100644 --- a/Tests/SubprocessTests/UnixTests.swift +++ b/Tests/SubprocessTests/UnixTests.swift @@ -905,7 +905,7 @@ extension SubprocessUnixTests { // the same suite) to avoid hitting EMFILE during the concurrent spawn // burst. /proc/self/fd lists every open descriptor; subtracting the // current count plus a small margin gives the true available headroom. - #if os(Linux) + #if os(Linux) || os(Android) let currentFds = (try? FileManager.default.contentsOfDirectory(atPath: "/proc/self/fd"))?.count ?? 50 let available = max(32, softLimit - currentFds - 50) #else From e18bbc1d5011306cda63b36d9a482310b983c7b3 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Thu, 4 Jun 2026 14:30:56 -0400 Subject: [PATCH 27/32] Try without the no-parallel option for the qemu tests --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index e19d7b81..57be1811 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -72,7 +72,7 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 - name: Run Test - run: bash -c './scripts/test-using-qemu.sh ${{ matrix.dist-kern }} -- swift test --no-parallel' + run: bash -c './scripts/test-using-qemu.sh ${{ matrix.dist-kern }} -- swift test' soundness: name: Soundness From 49beeff740bf1f919a4193a462aa6b0bc63e7e6c Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Thu, 4 Jun 2026 15:10:03 -0400 Subject: [PATCH 28/32] Revert process descriptor check --- Sources/Subprocess/Platforms/Subprocess+Unix.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Subprocess/Platforms/Subprocess+Unix.swift b/Sources/Subprocess/Platforms/Subprocess+Unix.swift index 2c1dfc62..89d4f676 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Unix.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Unix.swift @@ -531,7 +531,7 @@ extension Configuration { // clone3(CLONE_PIDFD) allocates a pidfd before exec runs. // If exec fails we retry with the next candidate path, so // close the pidfd here to avoid leaking it across retries. - if processDescriptor >= 0 { + if processDescriptor > 0 { try? FileDescriptor(rawValue: processDescriptor).close() } // Move on to another possible path From b9cb6cf05fd1ba2a3a1380614a043c4d3b723386 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Fri, 5 Jun 2026 09:31:46 -0400 Subject: [PATCH 29/32] Code review feedback Assert programming errors on epoll_ctl(DEL) Rethrow file descriptor closure errors Remove Linux assumption in Configuration leading to swallowing of cancellation errors --- Sources/Subprocess/Configuration.swift | 12 ++-------- .../Platforms/Subprocess+Linux.swift | 24 ++++++++++++------- .../Platforms/Subprocess+Unix.swift | 6 ++++- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/Sources/Subprocess/Configuration.swift b/Sources/Subprocess/Configuration.swift index b936d478..6d84a085 100644 --- a/Sources/Subprocess/Configuration.swift +++ b/Sources/Subprocess/Configuration.swift @@ -118,16 +118,8 @@ public struct Configuration: Sendable { if taskFinishFlag.addOne() == 1 { // The body closure hasn't finished but the child // process has terminated. Cancel all active - // AsyncIO now. Use try? rather than try: the - // process has already exited, so any epoll DEL - // failure here is harmless — dropped continuations - // will terminate the I/O streams on their own. - // Propagating this error as monitorError would - // trigger onCleanup → SIGKILL on an already-dead - // process, and if Subprocess.run() then throws, - // the caller's task group cascades cancellation to - // other live processes. - try? AsyncIO.shared.cancelAsyncIO(for: processIdentifier) + // AsyncIO now. + try AsyncIO.shared.cancelAsyncIO(for: processIdentifier) } return nil } catch { diff --git a/Sources/Subprocess/Platforms/Subprocess+Linux.swift b/Sources/Subprocess/Platforms/Subprocess+Linux.swift index d56b65fe..383451b7 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Linux.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Linux.swift @@ -447,19 +447,27 @@ private func _unregisterProcessDescriptorAndNotify(_ pidfd: CInt, context: Monit state = .started(newStorage) // Remove this pidfd from epoll to prevent further notifications. - // Ignore the return value: if DEL fails (e.g., ENOENT due to a - // concurrent removal, or a transient kernel error on older 5.x kernels), - // the process has still exited and the continuation must be resumed - // normally. The fd is removed from epoll automatically by the kernel - // when processIdentifier.close() closes it, so a failed DEL here is - // never permanent. Propagating a DEL error as a monitoring failure - // would trigger onCleanup → SIGKILL against an already-dead process. - _ = epoll_ctl( + // The return value is intentionally not propagated to the continuation: + // epoll firing this event means the process has already exited, so + // monitoring succeeded regardless of cleanup outcome. + // + // ENOENT is silently ignored: it means the fd is not (or is no longer) + // in the epoll instance, which is harmless — this occurs on concurrent + // removals and on older 5.x kernels where epoll_ctl(DEL) incorrectly + // reports ENOENT for pidfds after process exit. The fd is removed from + // epoll automatically by the kernel when processIdentifier.close() closes + // it anyway, so a failed DEL is never permanent. + // + // Any other error (EBADF, EINVAL, …) would indicate a programming error + // in fd lifecycle management — e.g. the pidfd was closed prematurely — + // and is surfaced as an assertion failure in debug builds. + let delRC = epoll_ctl( context.epollFileDescriptor, EPOLL_CTL_DEL, pidfd, nil ) + assert(delRC == 0 || errno == ENOENT, "epoll_ctl(DEL) failed unexpectedly: \(errno)") // The pidfd is intentionally left open here. It is owned by // ProcessIdentifier and will be closed by processIdentifier.close() // in the defer in Configuration.swift once monitoring is fully done. diff --git a/Sources/Subprocess/Platforms/Subprocess+Unix.swift b/Sources/Subprocess/Platforms/Subprocess+Unix.swift index 89d4f676..000cfa76 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Unix.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Unix.swift @@ -532,7 +532,11 @@ extension Configuration { // If exec fails we retry with the next candidate path, so // close the pidfd here to avoid leaking it across retries. if processDescriptor > 0 { - try? FileDescriptor(rawValue: processDescriptor).close() + do { + try FileDescriptor(rawValue: processDescriptor).close() + } catch Error { + throw SubprocessError.spawnFailed(withUnderlyingError: error) + } } // Move on to another possible path continue From 346c6e749eeed43994e190c10c9e3c666525fc9c Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Fri, 5 Jun 2026 09:37:42 -0400 Subject: [PATCH 30/32] Fix compile error --- Sources/Subprocess/Platforms/Subprocess+Unix.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/Subprocess/Platforms/Subprocess+Unix.swift b/Sources/Subprocess/Platforms/Subprocess+Unix.swift index 000cfa76..e727f688 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Unix.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Unix.swift @@ -534,8 +534,8 @@ extension Configuration { if processDescriptor > 0 { do { try FileDescriptor(rawValue: processDescriptor).close() - } catch Error { - throw SubprocessError.spawnFailed(withUnderlyingError: error) + } catch { + throw SubprocessError.spawnFailed(withUnderlyingError: error as? SubprocessError.UnderlyingError) } } // Move on to another possible path From 2a41bd437076a68f88e5148b67280ca2e14bfee0 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Fri, 5 Jun 2026 15:05:51 -0400 Subject: [PATCH 31/32] Code review feedback --- .../Platforms/Subprocess+Linux.swift | 36 ++++++++++--------- .../Platforms/Subprocess+Unix.swift | 4 +-- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/Sources/Subprocess/Platforms/Subprocess+Linux.swift b/Sources/Subprocess/Platforms/Subprocess+Linux.swift index 383451b7..a9106a03 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Linux.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Linux.swift @@ -435,7 +435,8 @@ internal func _setupMonitorSignalHandler() { } private func _unregisterProcessDescriptorAndNotify(_ pidfd: CInt, context: MonitorThreadContext) { - let continuations = _processMonitorState.withLock { state -> [CheckedContinuation]? in + // Remove the continuation + let result = _processMonitorState.withLock { state -> (continuations: [CheckedContinuation], error: SubprocessError?)? in guard case .started(let storage) = state, let continuationList = storage.continuations[pidfd] else { @@ -467,32 +468,35 @@ private func _unregisterProcessDescriptorAndNotify(_ pidfd: CInt, context: Monit pidfd, nil ) - assert(delRC == 0 || errno == ENOENT, "epoll_ctl(DEL) failed unexpectedly: \(errno)") + // The pidfd is intentionally left open here. It is owned by // ProcessIdentifier and will be closed by processIdentifier.close() // in the defer in Configuration.swift once monitoring is fully done. // Closing it here would free the fd number and allow it to be recycled // before that defer runs, causing a close-the-wrong-fd race. - return continuationList + if delRC != 0 && errno != ENOENT { + let error = SubprocessError.failedToMonitor( + withUnderlyingError: Errno(rawValue: errno) + ) + return (continuationList, error) + } + + return (continuationList, nil) } - guard let continuations else { + guard let result else { return } - // This function is only called because epoll fired an event for this pidfd, - // which means the process has definitively exited — monitoring succeeded. Any - // failure from the preceding epoll_ctl(DEL) is a cleanup detail, not a - // monitoring failure: on older 5.x kernels epoll_ctl(DEL) can return ENOENT - // transiently even when the process is dead, and on all kernels the pidfd is - // removed from epoll automatically when processIdentifier.close() closes it. - // Resuming the continuations with that DEL error would incorrectly signal a - // monitoring failure, which triggers onCleanup → SIGKILL against an already- - // dead process and would cause Subprocess.run() to throw, cascading task - // cancellation to other live processes. - for c in continuations { - c.resume() + if let error = result.error { + for c in result.continuations { + c.resume(throwing: error) + } + } else { + for c in result.continuations { + c.resume() + } } } diff --git a/Sources/Subprocess/Platforms/Subprocess+Unix.swift b/Sources/Subprocess/Platforms/Subprocess+Unix.swift index e727f688..c4405c9b 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Unix.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Unix.swift @@ -531,11 +531,11 @@ extension Configuration { // clone3(CLONE_PIDFD) allocates a pidfd before exec runs. // If exec fails we retry with the next candidate path, so // close the pidfd here to avoid leaking it across retries. - if processDescriptor > 0 { + if processDescriptor != .invalidDescriptor { do { try FileDescriptor(rawValue: processDescriptor).close() } catch { - throw SubprocessError.spawnFailed(withUnderlyingError: error as? SubprocessError.UnderlyingError) + throw SubprocessError.spawnFailed(withUnderlyingError: error as? SubprocessError.UnderlyingError ) } } // Move on to another possible path From 8499acd7b8c70cced5e23773c8420a098b006cd5 Mon Sep 17 00:00:00 2001 From: Chris McGee Date: Fri, 5 Jun 2026 15:31:36 -0400 Subject: [PATCH 32/32] Fix formatting --- Sources/Subprocess/Platforms/Subprocess+Unix.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Subprocess/Platforms/Subprocess+Unix.swift b/Sources/Subprocess/Platforms/Subprocess+Unix.swift index c4405c9b..dae5a13b 100644 --- a/Sources/Subprocess/Platforms/Subprocess+Unix.swift +++ b/Sources/Subprocess/Platforms/Subprocess+Unix.swift @@ -535,7 +535,7 @@ extension Configuration { do { try FileDescriptor(rawValue: processDescriptor).close() } catch { - throw SubprocessError.spawnFailed(withUnderlyingError: error as? SubprocessError.UnderlyingError ) + throw SubprocessError.spawnFailed(withUnderlyingError: error as? SubprocessError.UnderlyingError) } } // Move on to another possible path