diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7c1cc036..b7a750d9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -161,6 +161,185 @@ jobs: - name: Run tests run: cargo test --all-features + # ────────────────────────────────────────────────────────────── + # Interface boot smoke: build each changed Rust worker from source, + # boot it against a live engine exactly as the registry-publish flow + # does, and assert its interface is collectable & non-empty. This is + # the PR-time guard for the class of failure that broke publish in + # #104 (database/v0.2.6): the worker compiled and passed the 2s + # liveness check but then crashed building its SQLite pool because + # the default `sqlite:./data/iii.db` parent dir was missing — so + # `Collect worker interface` timed out at release time, never on PRs. + # Scoped to Rust workers (the binary-deploy bucket where this lives); + # building from source means a clean checkout with no `data/` dir, + # which is precisely the condition that triggered the crash. + # ────────────────────────────────────────────────────────────── + interface-smoke: + name: "${{ matrix.worker }}: interface boot smoke" + needs: discover + if: needs.discover.outputs.rust != '[]' + runs-on: ubuntu-latest + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + worker: ${{ fromJSON(needs.discover.outputs.rust) }} + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - run: pip install --quiet pyyaml + + - name: Rewrite SSH to HTTPS for public deps + run: git config --global url."https://github.com/".insteadOf "ssh://git@github.com/" + + - uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + with: + workspaces: ${{ matrix.worker }} -> target + + # Workers that embed a Vite SPA into the binary (e.g. console) have a + # build.rs that shells out to pnpm. Pre-build the dist so `cargo build` + # short-circuits it — mirrors the `rust` job above. + - name: Setup pnpm (workers with bundled web/) + if: hashFiles(format('{0}/web/package.json', matrix.worker)) != '' + uses: pnpm/action-setup@v4 + with: + version: 10 + + - name: Setup Node (workers with bundled web/) + if: hashFiles(format('{0}/web/package.json', matrix.worker)) != '' + uses: actions/setup-node@v4 + with: + node-version: 22 + cache: 'pnpm' + cache-dependency-path: ${{ matrix.worker }}/web/pnpm-lock.yaml + + - name: Pre-build SPA bundle + if: hashFiles(format('{0}/web/package.json', matrix.worker)) != '' + working-directory: ${{ matrix.worker }}/web + run: | + pnpm install --frozen-lockfile + pnpm build + + # Build with default features so we boot the same artifact that ships + # in the GitHub Release (`_rust-binary.yml` builds default features). + - name: Build worker binary + working-directory: ${{ matrix.worker }} + run: cargo build + + - name: Install iii CLI + engine + env: + VERSION: '0.17.0' + run: | + set -euo pipefail + curl -fsSL https://install.iii.dev/iii/main/install.sh -o /tmp/install-iii.sh + sh /tmp/install-iii.sh + { + echo "$HOME/.local/bin" + echo "$HOME/.iii/bin" + } >> "$GITHUB_PATH" + export PATH="$HOME/.local/bin:$HOME/.iii/bin:$PATH" + iii --version + + - name: Start III engine + run: | + set -euo pipefail + printf 'workers: []\n' > config.yaml + iii --config config.yaml --no-update-check > iii-engine.log 2>&1 & + echo "$!" > iii-engine.pid + + for _ in {1..60}; do + if ! kill -0 "$(cat iii-engine.pid)" 2>/dev/null; then + echo "::error::iii engine exited before becoming ready" + cat iii-engine.log + exit 1 + fi + if iii trigger 'engine::workers::list' --json '{}' >/dev/null 2>&1; then + exit 0 + fi + sleep 1 + done + + echo "::error::iii engine did not become ready" + cat iii-engine.log || true + exit 1 + + - name: Snapshot engine baselines + run: | + set -euo pipefail + iii trigger 'engine::triggers::list' --json '{"include_internal": false}' \ + > trigger-types-baseline.json + iii trigger 'engine::workers::list' --json '{}' > workers-baseline.json + + - name: Start worker from source + env: + WORKER: ${{ matrix.worker }} + run: | + set -euo pipefail + # Resolve the cargo bin name the same way the publish flow does + # (iii.worker.yaml `bin:`, falling back to the worker name). + bin=$(WORKER="$WORKER" python3 -c 'import os, sys, pathlib; sys.path.insert(0, ".github/scripts"); import _lib; m = _lib.read_iii_worker_yaml(pathlib.Path(os.environ["WORKER"])); print(m.bin or m.name)') + + worker_log="worker-$WORKER.log" + # Run from inside the worker dir — this is what makes a relative + # default db path (sqlite:./data/iii.db) resolve against a clean + # checkout with no data/ dir, reproducing the publish boot exactly. + pushd "$WORKER" >/dev/null + echo "Starting local worker: (cd $WORKER && ./target/debug/$bin)" + "./target/debug/$bin" > "../$worker_log" 2>&1 & + echo "$!" > ../worker.pid + popd >/dev/null + + sleep 2 + if ! kill -0 "$(cat worker.pid)" 2>/dev/null; then + echo "::error::$WORKER exited before interface collection" + cat "$worker_log" || true + exit 1 + fi + + - name: Collect worker interface + env: + WORKER: ${{ matrix.worker }} + run: | + set -euo pipefail + python3 .github/scripts/collect_worker_interface.py \ + --worker "$WORKER" \ + --out worker-interface.json \ + --wait-seconds 120 \ + --trigger-types-baseline trigger-types-baseline.json \ + --workers-baseline workers-baseline.json + + - name: Assert worker interface was collected + run: | + set -euo pipefail + python3 .github/scripts/collect_worker_interface.py \ + --assert-non-empty --assert-file worker-interface.json + + - name: Dump logs on failure + if: failure() + run: | + echo "::group::iii engine log" + tail -n 200 iii-engine.log || true + echo "::endgroup::" + echo "::group::worker log" + tail -n 200 "worker-${{ matrix.worker }}.log" || true + echo "::endgroup::" + + - name: Stop processes + if: always() + run: | + if [[ -f worker.pid ]]; then + kill "$(cat worker.pid)" 2>/dev/null || true + fi + if [[ -f iii-engine.pid ]]; then + kill "$(cat iii-engine.pid)" 2>/dev/null || true + fi + # ────────────────────────────────────────────────────────────── # Node per-worker lint (biome) + test # ────────────────────────────────────────────────────────────── diff --git a/database/Cargo.lock b/database/Cargo.lock index 80e21e38..69dc9110 100644 --- a/database/Cargo.lock +++ b/database/Cargo.lock @@ -596,7 +596,7 @@ checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" [[package]] name = "database" -version = "0.2.5" +version = "0.2.7" dependencies = [ "anyhow", "async-trait", diff --git a/database/Cargo.toml b/database/Cargo.toml index b87ccc04..2c72efff 100644 --- a/database/Cargo.toml +++ b/database/Cargo.toml @@ -2,7 +2,7 @@ [package] name = "database" -version = "0.2.6" +version = "0.2.7" edition = "2021" publish = false diff --git a/database/src/pool/sqlite.rs b/database/src/pool/sqlite.rs index 933627a0..3a3ebe3d 100644 --- a/database/src/pool/sqlite.rs +++ b/database/src/pool/sqlite.rs @@ -37,6 +37,23 @@ impl SqlitePool { let manager = if path == ":memory:" || path.starts_with(":memory:") { SqliteConnectionManager::memory() } else { + // SQLite opens (and, with the default flags, creates) the database + // *file*, but it will NOT create missing parent directories — a + // fresh `sqlite:./data/iii.db` boot where `./data` does not yet + // exist fails at pool-build time with `unable to open database + // file`. r2d2 eagerly opens connections in `build()`, so this + // surfaces as a hard startup crash (exactly what broke the registry + // publish CI: the worker is launched from a clean checkout with no + // `data/` dir). Create the parent dir up front so the default + // config — and any relative/nested sqlite path — boots cleanly. + if let Some(parent) = parent_dir_to_create(path) { + std::fs::create_dir_all(&parent).map_err(|e| DbError::ConfigError { + message: format!( + "sqlite: could not create parent directory {}: {e}", + parent.display() + ), + })?; + } SqliteConnectionManager::file(path) }; let inner = R2Pool::builder() @@ -78,6 +95,28 @@ impl SqlitePool { } } +/// Compute the parent directory that must exist before SQLite can open +/// `path`, or `None` when there is nothing to create. Pure (does no IO) so +/// the path-parsing rules can be unit-tested without touching the filesystem. +/// +/// Returns `None` for: +/// - in-memory databases (`:memory:`, `file::memory:?cache=shared`), +/// - bare filenames (`iii.db`) whose parent is the current directory. +/// +/// Handles `file:` URI forms and strips any `?query` suffix (e.g. +/// `file:./data/iii.db?mode=rwc`) before extracting the parent. +fn parent_dir_to_create(path: &str) -> Option { + let without_scheme = path.strip_prefix("file:").unwrap_or(path); + let file_part = without_scheme.split('?').next().unwrap_or(without_scheme); + if file_part.is_empty() || file_part.contains(":memory:") { + return None; + } + match std::path::Path::new(file_part).parent() { + Some(parent) if !parent.as_os_str().is_empty() => Some(parent.to_path_buf()), + _ => None, + } +} + /// `r2d2::get_timeout` returns one error type (`r2d2::Error`) for both /// "no connection became free in time" and "the underlying connection /// manager kept failing to open a connection until we hit the timeout". @@ -194,4 +233,67 @@ mod tests { other => panic!("expected PoolTimeout, got {other:?}"), } } + + #[test] + fn parent_dir_to_create_skips_in_memory_forms() { + assert_eq!(parent_dir_to_create(":memory:"), None); + assert_eq!(parent_dir_to_create("file::memory:?cache=shared"), None); + } + + #[test] + fn parent_dir_to_create_skips_bare_filename() { + // No directory component → SQLite creates the file in the CWD. + assert_eq!(parent_dir_to_create("iii.db"), None); + assert_eq!(parent_dir_to_create(""), None); + } + + #[test] + fn parent_dir_to_create_returns_nested_dir() { + assert_eq!( + parent_dir_to_create("./data/iii.db"), + Some(std::path::PathBuf::from("./data")) + ); + assert_eq!( + parent_dir_to_create("/var/lib/iii/db.sqlite"), + Some(std::path::PathBuf::from("/var/lib/iii")) + ); + } + + #[test] + fn parent_dir_to_create_handles_file_uri_and_query() { + assert_eq!( + parent_dir_to_create("file:./data/iii.db?mode=rwc"), + Some(std::path::PathBuf::from("./data")) + ); + } + + /// Regression for the registry-publish crash: a fresh boot with the + /// default `sqlite:./data/iii.db` config (and no pre-existing `data/` + /// dir) must succeed. r2d2 opens connections eagerly in `build()`, so + /// before the fix `SqlitePool::new` returned + /// `unable to open database file` and the worker died on startup, making + /// interface collection time out. `SqlitePool::new` now creates the + /// missing parent directory, so the pool builds and a query runs. + #[tokio::test(flavor = "multi_thread")] + async fn file_pool_creates_missing_parent_dir() { + let tmp = tempfile::tempdir().unwrap(); + // Point at a *nested* path that does not exist yet, mirroring the + // default `./data/iii.db` shape (two missing levels for good measure). + let db_path = tmp.path().join("data").join("nested").join("iii.db"); + assert!(!db_path.parent().unwrap().exists()); + let url = format!("sqlite:{}", db_path.display()); + + let pool = SqlitePool::new(&url, &PoolConfig::default()) + .expect("pool should build after creating the missing parent dir"); + assert!(db_path.parent().unwrap().exists()); + + let conn = pool.acquire().await.unwrap(); + let result: i64 = tokio::task::spawn_blocking(move || { + conn.with(|c| c.query_row("SELECT 1", [], |row| row.get(0))) + .unwrap() + }) + .await + .unwrap(); + assert_eq!(result, 1); + } } diff --git a/database/tests/integration.rs b/database/tests/integration.rs index c08d84f3..b7d40274 100644 --- a/database/tests/integration.rs +++ b/database/tests/integration.rs @@ -186,3 +186,61 @@ async fn apply_config_updates_list_snapshot() { fn binary_name_matches_manifest() { assert_eq!(database::worker_name(), "database"); } + +/// Regression for the registry-publish crash (`unable to open database file: +/// ./data/iii.db`). This drives the *startup* dispatch path the worker uses at +/// boot — `pool::build` (called by `main.rs` via `configuration::build_pools`) +/// — for a file-backed sqlite database whose parent directory does not exist +/// yet, mirroring the default `sqlite:./data/iii.db` config running from a +/// clean checkout. The pool must build (creating the parent dir) and serve a +/// connection rather than dying on startup. +#[tokio::test(flavor = "multi_thread")] +async fn build_pool_creates_missing_sqlite_parent_dir() { + let tmp = tempfile::tempdir().unwrap(); + let db_path = tmp.path().join("data").join("iii.db"); + assert!(!db_path.parent().unwrap().exists()); + + let url = format!("sqlite:{}", db_path.display()); + let yaml = format!("databases:\n primary:\n url: \"{url}\"\n"); + let cfg = WorkerConfig::from_yaml(&yaml).unwrap(); + + let mut pools = HashMap::new(); + for (name, db) in &cfg.databases { + let p = pool::build(name, db) + .await + .expect("startup pool build must create the missing parent dir"); + pools.insert(name.clone(), p); + } + assert!(db_path.parent().unwrap().exists()); + + let st = AppState { + pools: Arc::new(RwLock::new(pools)), + config: Arc::new(RwLock::new(cfg)), + handles: Arc::new(HandleRegistry::new()), + transactions: TxRegistry::new(), + log: Logger::new(), + }; + + // The freshly-created on-disk db is usable end-to-end. + execute::handle( + &st, + serde_json::from_value::(json!({ + "db": "primary", + "sql": "CREATE TABLE t (id INTEGER PRIMARY KEY)" + })) + .unwrap(), + ) + .await + .unwrap(); + let r = query::handle( + &st, + serde_json::from_value::(json!({ + "db": "primary", + "sql": "SELECT count(*) AS n FROM t" + })) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(r.row_count, 1); +} diff --git a/harness/README.md b/harness/README.md index 758e9cbb..1c15bff6 100644 --- a/harness/README.md +++ b/harness/README.md @@ -49,7 +49,7 @@ Rust workers (`shell`, `iii-directory`) and engine builtins (`state::*`, `stream 1. Install iii: `curl -fsSL https://install.iii.dev/iii/main/install.sh | sh` 2. Verify the install: `iii --version` -3. Add the harness and console workers: `iii worker add harness console` +3. Add the harness and console workers: `iii worker add harness console coder` 4. Start the engine: `iii --config config.yaml` 5. Open the [console](https://workers.iii.dev/workers/console) at `http://127.0.0.1:3113`