From a8fdc16fb1530fb8d40c44de52e057d34f8a3ad7 Mon Sep 17 00:00:00 2001 From: Sergio Marcelino Date: Fri, 5 Jun 2026 15:38:45 -0300 Subject: [PATCH 1/2] feat(database): update SQLite pool to create missing parent directories This commit enhances the SQLite connection pool to ensure that any missing parent directories for the database file are created during initialization. This change addresses a critical issue where the worker would crash on startup if the default database path did not exist. Additionally, it includes tests to verify the new behavior and updates the version to 0.2.7 in both `Cargo.toml` and `Cargo.lock` files. --- .github/workflows/ci.yml | 179 ++++++++++++++++++++++++++++++++++ database/Cargo.lock | 2 +- database/Cargo.toml | 2 +- database/src/pool/sqlite.rs | 102 +++++++++++++++++++ database/tests/integration.rs | 57 +++++++++++ harness/README.md | 2 +- 6 files changed, 341 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7c1cc036..b7a750d9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -161,6 +161,185 @@ jobs: - name: Run tests run: cargo test --all-features + # ────────────────────────────────────────────────────────────── + # Interface boot smoke: build each changed Rust worker from source, + # boot it against a live engine exactly as the registry-publish flow + # does, and assert its interface is collectable & non-empty. This is + # the PR-time guard for the class of failure that broke publish in + # #104 (database/v0.2.6): the worker compiled and passed the 2s + # liveness check but then crashed building its SQLite pool because + # the default `sqlite:./data/iii.db` parent dir was missing — so + # `Collect worker interface` timed out at release time, never on PRs. + # Scoped to Rust workers (the binary-deploy bucket where this lives); + # building from source means a clean checkout with no `data/` dir, + # which is precisely the condition that triggered the crash. + # ────────────────────────────────────────────────────────────── + interface-smoke: + name: "${{ matrix.worker }}: interface boot smoke" + needs: discover + if: needs.discover.outputs.rust != '[]' + runs-on: ubuntu-latest + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + worker: ${{ fromJSON(needs.discover.outputs.rust) }} + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - run: pip install --quiet pyyaml + + - name: Rewrite SSH to HTTPS for public deps + run: git config --global url."https://github.com/".insteadOf "ssh://git@github.com/" + + - uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + with: + workspaces: ${{ matrix.worker }} -> target + + # Workers that embed a Vite SPA into the binary (e.g. console) have a + # build.rs that shells out to pnpm. Pre-build the dist so `cargo build` + # short-circuits it — mirrors the `rust` job above. + - name: Setup pnpm (workers with bundled web/) + if: hashFiles(format('{0}/web/package.json', matrix.worker)) != '' + uses: pnpm/action-setup@v4 + with: + version: 10 + + - name: Setup Node (workers with bundled web/) + if: hashFiles(format('{0}/web/package.json', matrix.worker)) != '' + uses: actions/setup-node@v4 + with: + node-version: 22 + cache: 'pnpm' + cache-dependency-path: ${{ matrix.worker }}/web/pnpm-lock.yaml + + - name: Pre-build SPA bundle + if: hashFiles(format('{0}/web/package.json', matrix.worker)) != '' + working-directory: ${{ matrix.worker }}/web + run: | + pnpm install --frozen-lockfile + pnpm build + + # Build with default features so we boot the same artifact that ships + # in the GitHub Release (`_rust-binary.yml` builds default features). + - name: Build worker binary + working-directory: ${{ matrix.worker }} + run: cargo build + + - name: Install iii CLI + engine + env: + VERSION: '0.17.0' + run: | + set -euo pipefail + curl -fsSL https://install.iii.dev/iii/main/install.sh -o /tmp/install-iii.sh + sh /tmp/install-iii.sh + { + echo "$HOME/.local/bin" + echo "$HOME/.iii/bin" + } >> "$GITHUB_PATH" + export PATH="$HOME/.local/bin:$HOME/.iii/bin:$PATH" + iii --version + + - name: Start III engine + run: | + set -euo pipefail + printf 'workers: []\n' > config.yaml + iii --config config.yaml --no-update-check > iii-engine.log 2>&1 & + echo "$!" > iii-engine.pid + + for _ in {1..60}; do + if ! kill -0 "$(cat iii-engine.pid)" 2>/dev/null; then + echo "::error::iii engine exited before becoming ready" + cat iii-engine.log + exit 1 + fi + if iii trigger 'engine::workers::list' --json '{}' >/dev/null 2>&1; then + exit 0 + fi + sleep 1 + done + + echo "::error::iii engine did not become ready" + cat iii-engine.log || true + exit 1 + + - name: Snapshot engine baselines + run: | + set -euo pipefail + iii trigger 'engine::triggers::list' --json '{"include_internal": false}' \ + > trigger-types-baseline.json + iii trigger 'engine::workers::list' --json '{}' > workers-baseline.json + + - name: Start worker from source + env: + WORKER: ${{ matrix.worker }} + run: | + set -euo pipefail + # Resolve the cargo bin name the same way the publish flow does + # (iii.worker.yaml `bin:`, falling back to the worker name). + bin=$(WORKER="$WORKER" python3 -c 'import os, sys, pathlib; sys.path.insert(0, ".github/scripts"); import _lib; m = _lib.read_iii_worker_yaml(pathlib.Path(os.environ["WORKER"])); print(m.bin or m.name)') + + worker_log="worker-$WORKER.log" + # Run from inside the worker dir — this is what makes a relative + # default db path (sqlite:./data/iii.db) resolve against a clean + # checkout with no data/ dir, reproducing the publish boot exactly. + pushd "$WORKER" >/dev/null + echo "Starting local worker: (cd $WORKER && ./target/debug/$bin)" + "./target/debug/$bin" > "../$worker_log" 2>&1 & + echo "$!" > ../worker.pid + popd >/dev/null + + sleep 2 + if ! kill -0 "$(cat worker.pid)" 2>/dev/null; then + echo "::error::$WORKER exited before interface collection" + cat "$worker_log" || true + exit 1 + fi + + - name: Collect worker interface + env: + WORKER: ${{ matrix.worker }} + run: | + set -euo pipefail + python3 .github/scripts/collect_worker_interface.py \ + --worker "$WORKER" \ + --out worker-interface.json \ + --wait-seconds 120 \ + --trigger-types-baseline trigger-types-baseline.json \ + --workers-baseline workers-baseline.json + + - name: Assert worker interface was collected + run: | + set -euo pipefail + python3 .github/scripts/collect_worker_interface.py \ + --assert-non-empty --assert-file worker-interface.json + + - name: Dump logs on failure + if: failure() + run: | + echo "::group::iii engine log" + tail -n 200 iii-engine.log || true + echo "::endgroup::" + echo "::group::worker log" + tail -n 200 "worker-${{ matrix.worker }}.log" || true + echo "::endgroup::" + + - name: Stop processes + if: always() + run: | + if [[ -f worker.pid ]]; then + kill "$(cat worker.pid)" 2>/dev/null || true + fi + if [[ -f iii-engine.pid ]]; then + kill "$(cat iii-engine.pid)" 2>/dev/null || true + fi + # ────────────────────────────────────────────────────────────── # Node per-worker lint (biome) + test # ────────────────────────────────────────────────────────────── diff --git a/database/Cargo.lock b/database/Cargo.lock index 80e21e38..69dc9110 100644 --- a/database/Cargo.lock +++ b/database/Cargo.lock @@ -596,7 +596,7 @@ checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" [[package]] name = "database" -version = "0.2.5" +version = "0.2.7" dependencies = [ "anyhow", "async-trait", diff --git a/database/Cargo.toml b/database/Cargo.toml index b87ccc04..2c72efff 100644 --- a/database/Cargo.toml +++ b/database/Cargo.toml @@ -2,7 +2,7 @@ [package] name = "database" -version = "0.2.6" +version = "0.2.7" edition = "2021" publish = false diff --git a/database/src/pool/sqlite.rs b/database/src/pool/sqlite.rs index 933627a0..3a3ebe3d 100644 --- a/database/src/pool/sqlite.rs +++ b/database/src/pool/sqlite.rs @@ -37,6 +37,23 @@ impl SqlitePool { let manager = if path == ":memory:" || path.starts_with(":memory:") { SqliteConnectionManager::memory() } else { + // SQLite opens (and, with the default flags, creates) the database + // *file*, but it will NOT create missing parent directories — a + // fresh `sqlite:./data/iii.db` boot where `./data` does not yet + // exist fails at pool-build time with `unable to open database + // file`. r2d2 eagerly opens connections in `build()`, so this + // surfaces as a hard startup crash (exactly what broke the registry + // publish CI: the worker is launched from a clean checkout with no + // `data/` dir). Create the parent dir up front so the default + // config — and any relative/nested sqlite path — boots cleanly. + if let Some(parent) = parent_dir_to_create(path) { + std::fs::create_dir_all(&parent).map_err(|e| DbError::ConfigError { + message: format!( + "sqlite: could not create parent directory {}: {e}", + parent.display() + ), + })?; + } SqliteConnectionManager::file(path) }; let inner = R2Pool::builder() @@ -78,6 +95,28 @@ impl SqlitePool { } } +/// Compute the parent directory that must exist before SQLite can open +/// `path`, or `None` when there is nothing to create. Pure (does no IO) so +/// the path-parsing rules can be unit-tested without touching the filesystem. +/// +/// Returns `None` for: +/// - in-memory databases (`:memory:`, `file::memory:?cache=shared`), +/// - bare filenames (`iii.db`) whose parent is the current directory. +/// +/// Handles `file:` URI forms and strips any `?query` suffix (e.g. +/// `file:./data/iii.db?mode=rwc`) before extracting the parent. +fn parent_dir_to_create(path: &str) -> Option { + let without_scheme = path.strip_prefix("file:").unwrap_or(path); + let file_part = without_scheme.split('?').next().unwrap_or(without_scheme); + if file_part.is_empty() || file_part.contains(":memory:") { + return None; + } + match std::path::Path::new(file_part).parent() { + Some(parent) if !parent.as_os_str().is_empty() => Some(parent.to_path_buf()), + _ => None, + } +} + /// `r2d2::get_timeout` returns one error type (`r2d2::Error`) for both /// "no connection became free in time" and "the underlying connection /// manager kept failing to open a connection until we hit the timeout". @@ -194,4 +233,67 @@ mod tests { other => panic!("expected PoolTimeout, got {other:?}"), } } + + #[test] + fn parent_dir_to_create_skips_in_memory_forms() { + assert_eq!(parent_dir_to_create(":memory:"), None); + assert_eq!(parent_dir_to_create("file::memory:?cache=shared"), None); + } + + #[test] + fn parent_dir_to_create_skips_bare_filename() { + // No directory component → SQLite creates the file in the CWD. + assert_eq!(parent_dir_to_create("iii.db"), None); + assert_eq!(parent_dir_to_create(""), None); + } + + #[test] + fn parent_dir_to_create_returns_nested_dir() { + assert_eq!( + parent_dir_to_create("./data/iii.db"), + Some(std::path::PathBuf::from("./data")) + ); + assert_eq!( + parent_dir_to_create("/var/lib/iii/db.sqlite"), + Some(std::path::PathBuf::from("/var/lib/iii")) + ); + } + + #[test] + fn parent_dir_to_create_handles_file_uri_and_query() { + assert_eq!( + parent_dir_to_create("file:./data/iii.db?mode=rwc"), + Some(std::path::PathBuf::from("./data")) + ); + } + + /// Regression for the registry-publish crash: a fresh boot with the + /// default `sqlite:./data/iii.db` config (and no pre-existing `data/` + /// dir) must succeed. r2d2 opens connections eagerly in `build()`, so + /// before the fix `SqlitePool::new` returned + /// `unable to open database file` and the worker died on startup, making + /// interface collection time out. `SqlitePool::new` now creates the + /// missing parent directory, so the pool builds and a query runs. + #[tokio::test(flavor = "multi_thread")] + async fn file_pool_creates_missing_parent_dir() { + let tmp = tempfile::tempdir().unwrap(); + // Point at a *nested* path that does not exist yet, mirroring the + // default `./data/iii.db` shape (two missing levels for good measure). + let db_path = tmp.path().join("data").join("nested").join("iii.db"); + assert!(!db_path.parent().unwrap().exists()); + let url = format!("sqlite:{}", db_path.display()); + + let pool = SqlitePool::new(&url, &PoolConfig::default()) + .expect("pool should build after creating the missing parent dir"); + assert!(db_path.parent().unwrap().exists()); + + let conn = pool.acquire().await.unwrap(); + let result: i64 = tokio::task::spawn_blocking(move || { + conn.with(|c| c.query_row("SELECT 1", [], |row| row.get(0))) + .unwrap() + }) + .await + .unwrap(); + assert_eq!(result, 1); + } } diff --git a/database/tests/integration.rs b/database/tests/integration.rs index c08d84f3..b493d2dd 100644 --- a/database/tests/integration.rs +++ b/database/tests/integration.rs @@ -186,3 +186,60 @@ async fn apply_config_updates_list_snapshot() { fn binary_name_matches_manifest() { assert_eq!(database::worker_name(), "database"); } + +/// Regression for the registry-publish crash (`unable to open database file: +/// ./data/iii.db`). This drives the *startup* dispatch path the worker uses at +/// boot — `pool::build` (called by `main.rs` via `configuration::build_pools`) +/// — for a file-backed sqlite database whose parent directory does not exist +/// yet, mirroring the default `sqlite:./data/iii.db` config running from a +/// clean checkout. The pool must build (creating the parent dir) and serve a +/// connection rather than dying on startup. +#[tokio::test(flavor = "multi_thread")] +async fn build_pool_creates_missing_sqlite_parent_dir() { + let tmp = tempfile::tempdir().unwrap(); + let db_path = tmp.path().join("data").join("iii.db"); + assert!(!db_path.parent().unwrap().exists()); + + let url = format!("sqlite:{}", db_path.display()); + let yaml = format!("databases:\n primary:\n url: \"{url}\"\n"); + let cfg = WorkerConfig::from_yaml(&yaml).unwrap(); + + let mut pools = HashMap::new(); + for (name, db) in &cfg.databases { + let p = pool::build(name, db) + .await + .expect("startup pool build must create the missing parent dir"); + pools.insert(name.clone(), p); + } + assert!(db_path.parent().unwrap().exists()); + + let st = AppState { + pools: Arc::new(RwLock::new(pools)), + handles: Arc::new(HandleRegistry::new()), + transactions: TxRegistry::new(), + log: Logger::new(), + }; + + // The freshly-created on-disk db is usable end-to-end. + execute::handle( + &st, + serde_json::from_value::(json!({ + "db": "primary", + "sql": "CREATE TABLE t (id INTEGER PRIMARY KEY)" + })) + .unwrap(), + ) + .await + .unwrap(); + let r = query::handle( + &st, + serde_json::from_value::(json!({ + "db": "primary", + "sql": "SELECT count(*) AS n FROM t" + })) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(r.row_count, 1); +} diff --git a/harness/README.md b/harness/README.md index 758e9cbb..1c15bff6 100644 --- a/harness/README.md +++ b/harness/README.md @@ -49,7 +49,7 @@ Rust workers (`shell`, `iii-directory`) and engine builtins (`state::*`, `stream 1. Install iii: `curl -fsSL https://install.iii.dev/iii/main/install.sh | sh` 2. Verify the install: `iii --version` -3. Add the harness and console workers: `iii worker add harness console` +3. Add the harness and console workers: `iii worker add harness console coder` 4. Start the engine: `iii --config config.yaml` 5. Open the [console](https://workers.iii.dev/workers/console) at `http://127.0.0.1:3113` From 47d7c4c4ad8dc14ade0b13d118bb5f60e636aded Mon Sep 17 00:00:00 2001 From: Sergio Marcelino Date: Fri, 5 Jun 2026 16:46:35 -0300 Subject: [PATCH 2/2] fix(database): add missing config field in integration test AppState The new SQLite parent-dir regression test omitted the required config field, causing clippy and E2E harness jobs to fail at compile time. Co-authored-by: Cursor --- database/tests/integration.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/database/tests/integration.rs b/database/tests/integration.rs index b493d2dd..b7d40274 100644 --- a/database/tests/integration.rs +++ b/database/tests/integration.rs @@ -215,6 +215,7 @@ async fn build_pool_creates_missing_sqlite_parent_dir() { let st = AppState { pools: Arc::new(RwLock::new(pools)), + config: Arc::new(RwLock::new(cfg)), handles: Arc::new(HandleRegistry::new()), transactions: TxRegistry::new(), log: Logger::new(),