diff --git a/Cargo.lock b/Cargo.lock index c56cd7f..c29fbbe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -59,7 +59,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -70,7 +70,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -79,12 +79,24 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "2.11.1" @@ -100,6 +112,7 @@ dependencies = [ "glob", "libc", "rand", + "reqwest", "rusqlite", "serde", "serde_json", @@ -114,6 +127,12 @@ version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + [[package]] name = "cc" version = "1.2.61" @@ -190,12 +209,52 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -209,7 +268,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -224,12 +283,103 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + [[package]] name = "find-msvc-tools" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -241,12 +391,44 @@ dependencies = [ "wasi", ] +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", + "wasip3", +] + [[package]] name = "glob" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "h2" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -256,6 +438,15 @@ dependencies = [ "ahash", ] +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + [[package]] name = "hashbrown" version = "0.17.0" @@ -277,6 +468,122 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "hyper" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "h2", + "http", + "http-body", + "httparse", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" +dependencies = [ + "http", + "hyper", + "hyper-util", + "rustls", + "tokio", + "tokio-rustls", + "tower-service", +] + +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2", + "system-configuration", + "tokio", + "tower-service", + "tracing", + "windows-registry", +] + [[package]] name = "iana-time-zone" version = "0.1.65" @@ -301,6 +608,115 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" +dependencies = [ + "displaydoc", + "potential_utf", + "utf8_iter", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" + +[[package]] +name = "icu_properties" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" + +[[package]] +name = "icu_provider" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "indexmap" version = "2.14.0" @@ -309,6 +725,24 @@ checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", "hashbrown 0.17.0", + "serde", + "serde_core", +] + +[[package]] +name = "ipnet" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" + +[[package]] +name = "iri-string" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20" +dependencies = [ + "memchr", + "serde", ] [[package]] @@ -329,10 +763,18 @@ version = "0.3.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca" dependencies = [ + "cfg-if", + "futures-util", "once_cell", "wasm-bindgen", ] +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "libc" version = "0.2.186" @@ -360,6 +802,18 @@ dependencies = [ "version_check", ] +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "litemap" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" + [[package]] name = "log" version = "0.4.29" @@ -372,6 +826,40 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "mio" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "native-tls" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -394,38 +882,119 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] -name = "pkg-config" -version = "0.3.33" +name = "openssl" +version = "0.10.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" +checksum = "f38c4372413cdaaf3cc79dd92d29d7d9f5ab09b51b10dded508fb90bb70b9222" +dependencies = [ + "bitflags", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] [[package]] -name = "ppv-lite86" -version = "0.2.21" +name = "openssl-macros" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ - "zerocopy", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "proc-macro2" -version = "1.0.106" +name = "openssl-probe" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" -dependencies = [ - "unicode-ident", -] +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" [[package]] -name = "quote" -version = "1.0.45" +name = "openssl-sys" +version = "0.9.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13ce1245cd07fcc4cfdb438f7507b0c7e4f3849a69fd84d52374c66d83741bb6" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + +[[package]] +name = "potential_utf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" +dependencies = [ + "zerovec", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "rand" version = "0.8.6" @@ -453,7 +1022,63 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.17", +] + +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64", + "bytes", + "encoding_rs", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-tls", + "hyper-util", + "js-sys", + "log", + "mime", + "native-tls", + "percent-encoding", + "pin-project-lite", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-native-tls", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", ] [[package]] @@ -470,6 +1095,52 @@ dependencies = [ "smallvec", ] +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls" +version = "0.23.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" +dependencies = [ + "once_cell", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" +dependencies = [ + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.22" @@ -482,6 +1153,44 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "schannel" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags", + "core-foundation 0.10.1", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "serde" version = "1.0.228" @@ -534,6 +1243,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + [[package]] name = "serde_yml" version = "0.0.12" @@ -575,18 +1296,46 @@ dependencies = [ "libc", ] +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + [[package]] name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "socket2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + [[package]] name = "strsim" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + [[package]] name = "syn" version = "2.0.117" @@ -598,6 +1347,117 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "system-configuration" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" +dependencies = [ + "bitflags", + "core-foundation 0.9.4", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "tinystr" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tokio" +version = "1.52.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67dee974fe86fd92cc45b7a95fdd2f99a36a6d7b0d431a231178d3d670bbcc6" +dependencies = [ + "bytes", + "libc", + "mio", + "pin-project-lite", + "socket2", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml" version = "0.8.23" @@ -639,12 +1499,112 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-http" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +dependencies = [ + "bitflags", + "bytes", + "futures-util", + "http", + "http-body", + "iri-string", + "pin-project-lite", + "tower", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + [[package]] name = "unicode-ident" version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -663,12 +1623,39 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] + [[package]] name = "wasm-bindgen" version = "0.2.118" @@ -682,6 +1669,16 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f371d383f2fb139252e0bfac3b81b265689bf45b6874af544ffa4c975ac1ebf8" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.118" @@ -714,6 +1711,50 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "web-sys" +version = "0.3.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f2dfbb17949fa2088e5d39408c48368947b86f7834484e87b73de55bc14d97d" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "windows-core" version = "0.62.2" @@ -755,6 +1796,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-registry" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" +dependencies = [ + "windows-link", + "windows-result", + "windows-strings", +] + [[package]] name = "windows-result" version = "0.4.1" @@ -773,6 +1825,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.61.2" @@ -782,6 +1843,70 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + [[package]] name = "winnow" version = "0.7.15" @@ -791,6 +1916,129 @@ dependencies = [ "memchr", ] +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "writeable" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" + +[[package]] +name = "yoke" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.8.48" @@ -811,6 +2059,66 @@ dependencies = [ "syn", ] +[[package]] +name = "zerofrom" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zmij" version = "1.0.21" diff --git a/Cargo.toml b/Cargo.toml index ba321a7..9c48068 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ toml = "0.8" signal-hook = "0.3" libc = "0.2" rand = "0.8" +reqwest = { version = "0.12", features = ["blocking", "json"] } [profile.release] strip = true diff --git a/README.md b/README.md index d7ed740..8193030 100644 --- a/README.md +++ b/README.md @@ -106,10 +106,13 @@ can_fail_spec = false # whether a rejection from this phase m # Worker configuration [worker] -prompt_template = "templates/my-prompt.md" # required — path to prompt template +prompt_template = "templates/my-prompt.md" # required for claude/default phases model = "claude-sonnet-4-6" # default: claude-sonnet-4-6 effort = "medium" # low | medium | high timeout = 300 # seconds; must be > 0 +runtime = "claude" # "claude" (default) | "openrouter" | "deterministic" +api_key_env = "OPENROUTER_API_KEY" # openrouter only — env var holding the API key (default: OPENROUTER_API_KEY) +bare = false # true → append --bare (skips session/MCP/skill loading; ~96% cold-start reduction) # Completion routing [completion] @@ -137,7 +140,9 @@ post = ["diff-is-non-empty"] # gates to run after this phase completes - `retry` — re-run this phase - `fail` — mark the spec failed -**`completion_handler`:** Set this top-level field to delegate routing to a built-in handler (e.g. `"builtin:execute"`). Use it when you want a phase to reuse the same routing logic as a built-in phase rather than defining your own `approve_signal`/`reject_signal` strings. When `completion_handler` is set, the daemon calls the named built-in handler and ignores the `[completion]` signals. +**`completion_handler`:** Used in two contexts: +- **Claude phases** (default): delegates completion routing to a built-in handler (e.g. `"builtin:execute"`) instead of `approve_signal`/`reject_signal` strings. +- **Deterministic phases** (`[worker] runtime = "deterministic"`): names the builtin to *execute* directly — no Claude spawn. Built-ins: `builtin:commit`, `builtin:merge`, `builtin:cleanup`. The `[completion]` block is ignored for deterministic phases. ### Creating a Custom Phase @@ -203,6 +208,35 @@ post_phases = ["doc-update", "critic", "merge"] # phases run after all tasks Pass them with `--pipeline name:path/to/pipeline.toml` (repeatable for N-way comparisons). +### Pipeline v2 Mode (opt-in) + +v2 is a redesigned pipeline with clean phase separation and deterministic steps that skip Claude cold-start. Set `mode: v2` in your spec: + +```yaml +title: My Feature +mode: v2 + +tasks: + - id: t-1 + title: Implement the thing + status: PENDING + spec: | + Add X to lib/foo.py following the existing pattern. + verify: "python3 -m pytest tests/test_foo.py -x -q" +``` + +v2 pipeline layout: + +``` +Spec-pre (loop ≤3): spec-critique ↔ spec-improve +Per-task: execute → review → commit (commit is deterministic) +Spec-post: doc-update → critic → merge → cleanup + ^ ^ ^ + Claude det. det. +``` + +Deterministic phases (`commit`, `merge`, `cleanup`) run as plain shell operations — no Claude spawn, no cold-start latency. v1 is still the default; v2 is opt-in until A/B benchmarks confirm the speedup. See [docs/pipelines.md](docs/pipelines.md) for a full v1 vs v2 comparison and guidance on when to use each. + ## Guardrails Guardrails define quality gates that run at phase transitions. Configured globally in `~/.boi/guardrails.toml`, overridable per spec. @@ -283,18 +317,20 @@ Exit 0 = passed. Any non-zero exit = failed. Stdout/stderr are captured as the f ## Runtime Configuration -BOI is runtime-agnostic. The default runtime is `claude` (Claude Code CLI). `codex` (Codex CLI) is also supported. +BOI is runtime-agnostic. The default runtime is `claude` (Claude Code CLI). `codex` (Codex CLI) and `openrouter` (direct HTTP to OpenRouter API) are also supported. ### Global Default -Set in `~/.boi/config.json`: +Set in `~/.boi/config.yaml`: -```json -{ - "runtime": { "default": "claude" } -} +```yaml +runtime: + default: claude +brain: ~/mrap-hex # optional — path to brain dir; must contain CLAUDE.md ``` +`brain` sets the default brain directory for all specs. Workers read `{brain}/CLAUDE.md` as system context before each task. BOI errors early if `brain` is set but the path or `CLAUDE.md` is missing. + ### Per-Spec Override Add a `runtime:` field to any spec: @@ -307,13 +343,18 @@ Spec-level override takes precedence over the global default. ### Model Mappings -Phase config accepts either full model IDs or aliases (`opus`, `sonnet`, `haiku`). The runtime resolves them: +Phase config accepts either full model IDs or aliases. The runtime resolves them: -| Alias | Claude | Codex | -|-------|--------|-------| -| `opus` | claude-opus-4-6 | o3 | -| `sonnet` | claude-sonnet-4-6 | o4-mini | -| `haiku` | claude-haiku-4-5-20251001 | o4-mini | +| Alias | Claude | Codex | OpenRouter | +|-------|--------|-------|------------| +| `opus` | claude-opus-4-6 | o3 | — | +| `sonnet` | claude-sonnet-4-6 | o4-mini | — | +| `haiku` | claude-haiku-4-5-20251001 | o4-mini | anthropic/claude-haiku-4-5 | +| `gemini-flash` | — | — | google/gemini-2.0-flash-001 | +| `grok` | — | — | x-ai/grok-beta | +| `qwen-coder` | — | — | qwen/qwen-2.5-coder-32b-instruct | + +OpenRouter phases require `OPENROUTER_API_KEY` in the environment and a `model` field in `[worker]`. Use `openrouter` runtime for text-only judgment phases (critic, plan-critique, spec-critique) to skip Claude cold-start and reduce cost. ### CLI Check @@ -323,9 +364,10 @@ Phase config accepts either full model IDs or aliases (`opus`, `sonnet`, `haiku` ``` boi dispatch [options] Submit a spec to the queue -boi status [--watch] [--json] Show queue and worker status +boi status [--watch] [--json] [-v|--verbose] Show queue and worker status; -v shows full failure detail boi log [--full] [-f|--follow] Tail worker output for a spec boi cancel Cancel a running or queued spec +boi daemon reload Send SIGHUP to reload max_workers/spawns_per_tick/claude_bin boi stop Stop daemon and all workers boi install [--workers N] One-time setup (run outside Claude Code) boi resume | --all Resume failed or canceled specs @@ -340,6 +382,8 @@ boi dep add|remove|set|clear|show|viz|check boi project create|list|status|context|delete boi bench --pipeline name:path [--pipeline ...] --spec FILE | --battery DIR [--runs N] Benchmark N pipelines boi bench --phase --spec FILE [--runs N] Benchmark a single phase in isolation +boi plan [spec.yaml ...] [--force-refresh] Build DAG + LLM critique for in-flight and new specs +boi dispatch-many [spec2.yaml ...] DAG-ordered multi-spec dispatch with LLM gate ``` **`dispatch` options:** @@ -353,6 +397,109 @@ boi bench --phase --spec FILE [--runs N] Benchmark a single phase in iso | `--after SA7F3,TB2E1` | Wait for listed specs to complete before starting | | `--project NAME` | Associate with a project (injects project context) | +**`dispatch-many` options:** + +| Flag | Description | +|------|-------------| +| `--yes` | Auto-approve dispatch without interactive prompt | +| `--force` | Override warn-level concerns (cannot override blocks) | +| `--priority N` | Priority applied to all dispatched specs (default: 100) | +| `--mode MODE` | Mode applied to all specs | +| `--after SA7F3` | Additional upstream dep for all dispatched specs | + +## DAG Planner: `boi plan` + `boi dispatch-many` + +### The problem + +Manual `--after` flags are fragile. When dispatching multiple specs, whoever dispatches has to remember all in-flight dependencies. Wrong ordering only surfaces when the dependent spec fails mid-execution — after tokens and time have already been spent. + +### `boi plan` — visualize and critique the DAG + +`boi plan` builds a dependency graph across all in-flight + queued + new specs, then asks an LLM to critique it: any specs that should depend on each other but don't? Any wrongly serial work that could be parallel? Any scopes that contradict each other? + +```bash +boi plan # critique current in-flight state +boi plan spec-a.yaml spec-b.yaml # include new specs in the analysis +boi plan --force-refresh # re-run LLM critique (ignore cache) +``` + +Example output: + +``` +DAG (4 nodes): + SA7F3 (auth-api) ← no deps + SB2E1 (user-model) ← SA7F3 + SC1F0 (dashboard) ← SB2E1 + SD4A2 (email-notify) ← SA7F3 + +Critique: + [WARN] email-notify reads auth tokens written by user-model, but no dep declared. + Suggested fix: --after SB2E1 + +Proposed dispatch order: SA7F3 → SB2E1 → SC1F0 + SD4A2 + +--after flags: boi dispatch email-notify.yaml --after SB2E1 +``` + +The critique is cached by hash of (DAG topology + spec titles). Re-running on unchanged state costs zero tokens. + +### `boi dispatch-many` — gated multi-spec dispatch + +`boi dispatch-many` runs `plan` first, then dispatches all specs in topological order — automatically wiring `--after` chains. + +```bash +boi dispatch-many spec-a.yaml spec-b.yaml spec-c.yaml +``` + +- **Block-severity concern** → refuses entirely, prints concerns, exits non-zero +- **Warn-severity concern** → shows concern, prompts for confirmation (or auto-approves with `--yes`) +- **Clean** → dispatches all specs in topological order with correct `--after` chains + +### Before/after: 3-spec chain that `dispatch-many` would have caught + +**Before (manual `--after`, misordered):** + +Three specs dispatched for a feature track. The dispatcher forgot `--after` on `build-api`. + +```bash +boi dispatch build-schema.yaml +boi dispatch build-api.yaml # BUG: missing --after +boi dispatch build-frontend.yaml --after +``` + +`build-api` started in parallel with `build-schema`. Failed 4 tasks in because schema files weren't written yet. ~12k tokens spent on the wrong order. Re-dispatch required. + +**After (`boi dispatch-many`):** + +```bash +boi dispatch-many build-schema.yaml build-api.yaml build-frontend.yaml +``` + +``` +Analyzing DAG... + +DAG (3 nodes): + SA000 (build-schema) ← no deps + SA001 (build-api) ← SA000 [implicit: src/schema/*.rs] + SA002 (build-frontend) ← SA001 [declared] + +Critique: + [WARN] build-api has implicit dep on build-schema via src/schema/*.rs + but no --after declared. Adding automatically. + +Proposed dispatch order: SA000 → SA001 → SA002 + +Dispatch? [y/N] y + +Dispatched: SA000 (build-schema) +Dispatched: SA001 (build-api) --after SA000 +Dispatched: SA002 (build-frontend) --after SA001 +``` + +No misordering. No re-dispatch. The implicit dep was caught before a single token was spent on the wrong order. + +See [docs/dag-reassess.md](docs/dag-reassess.md) for the full model and guidance on when to use `plan` vs `dispatch-many` vs `dispatch --after`. + ## Output Preservation BOI automatically preserves the work product of every completed spec so outputs are never lost when the worktree is cleaned up. diff --git a/SKILL.md b/SKILL.md index dd0a89f..6530997 100644 --- a/SKILL.md +++ b/SKILL.md @@ -53,7 +53,7 @@ After dispatch, run `boi status` to show initial state. ### Other Commands ```bash -boi status [--watch] [--json] Queue and worker status +boi status [--watch] [--json] [-v|--verbose] Queue and worker status; -v shows full failure detail boi log [--full] Tail worker output boi cancel Cancel a spec boi stop Stop daemon and all workers @@ -66,6 +66,8 @@ boi critic status | run | enable | disable | checks boi spec [add|skip|next|block|edit|deps] boi dep add|remove|set|clear|show|viz|check boi project create|list|status|context|delete +boi plan [spec.yaml ...] [--force-refresh] Build DAG + LLM critique for in-flight and new specs +boi dispatch-many [spec2.yaml ...] DAG-ordered multi-spec dispatch with LLM gate ``` ## Spec Format @@ -123,6 +125,11 @@ A **phase** is a named worker role defined by a `.phase.toml` file. The daemon h | `critic` | Quality gate: adds `[CRITIC]` tasks on failure | 300s | | `decompose` | Decompose a high-level spec into actionable tasks | 600s | | `evaluate` | Evaluate spec completion and determine next steps | 300s | +| `spec-critique` | Critique spec quality before execution (also: `spec-review` alias) | 120s | +| `spec-improve` | Improve spec in response to critique, then requeue for re-critique | 120s | +| `commit` | Commit staged changes in the spec worktree — deterministic, no Claude | — | +| `merge` | Merge spec worktree branch into the target branch — deterministic, no Claude | — | +| `cleanup` | Remove spec worktree and delete branch — deterministic, no Claude | — | ### Phase File Schema (`~/.boi/phases/*.phase.toml`) @@ -132,10 +139,13 @@ description = "What this phase does" completion_handler = "builtin:execute" # optional — use built-in routing logic [worker] -prompt_template = "path/to/prompt.md" # required +prompt_template = "path/to/prompt.md" # required for claude phases model = "claude-sonnet-4-6" effort = "medium" # low | medium | high timeout = 300 # seconds +runtime = "claude" # "claude" (default) | "openrouter" | "deterministic" +api_key_env = "OPENROUTER_API_KEY" # openrouter only — env var holding the API key (default: OPENROUTER_API_KEY) +bare = false # true → --bare flag (skips session/MCP/skill loading; ~96% cold-start reduction) [completion] approve_signal = "## Approved" @@ -287,7 +297,7 @@ Exit 0 = passed. Any non-zero = failed. ## Constraints - `boi install` runs **outside Claude Code** in a terminal. -- Workers are headless, non-interactive CLI agent sessions. Default runtime: `claude -p`. Codex runtime: `codex exec`. Configured globally in `~/.boi/config.json` or per-spec via `**Runtime:** codex` header. +- Workers are headless, non-interactive CLI agent sessions. Default runtime: `claude -p`. Codex runtime: `codex exec`. OpenRouter runtime: direct HTTP to `openrouter.ai/api/v1/chat/completions` (requires `OPENROUTER_API_KEY`; used for text-only judgment phases). Configured globally in `~/.boi/config.yaml` or per-spec via `**Runtime:** codex` header. - Daemon polls every 5 seconds. Status may lag slightly. - Default 3 workers, max 5. Set during install. - Workers get fresh context each iteration. No memory of previous iterations. diff --git a/docs/bench-bare-flag-2026-04-29.md b/docs/bench-bare-flag-2026-04-29.md new file mode 100644 index 0000000..1d9994a --- /dev/null +++ b/docs/bench-bare-flag-2026-04-29.md @@ -0,0 +1,69 @@ +# Bench: --bare Flag Startup Reduction — critic Phase + +**Date:** 2026-04-29 +**Phase under test:** `critic` +**Runs per condition:** 3 +**Metric:** `startup_ms` (spawn → first stdout byte) + +--- + +## Results + +| Condition | Run 1 (ms) | Run 2 (ms) | Run 3 (ms) | Avg (ms) | +|-----------|-----------|-----------|-----------|---------| +| `bare = true` | 183 | 187 | 183 | **184.3** | +| `bare = false` | 5,209 | 5,348 | 5,214 | **5,257.0** | + +**Reduction:** 5,257ms → 184ms — **−96.5%** (28× speedup) + +--- + +## Assertion + +> avg startup_ms (bare) < 50% of avg startup_ms (full) + +184.3ms < 2,628.5ms — **PASS** (3.5% of full, well under 50% threshold) + +--- + +## Raw data source + +`docs/.bench_raw.json` keys `sonnet_bare` and `sonnet_short`. + +## Test fixture + +`cargo test --lib bare_flag` + +Location: `src/spawn.rs` → `mod bench_bare_flag` + +--- + +## Interpretation + +The `--bare` flag eliminates CLI session loading, MCP discovery, and skill enumeration. All three bare runs land in a 4ms band (183–187ms), a hard floor set by: + +- Process spawn: ~5–10ms +- TLS handshake + DNS: ~30–50ms +- API TTFT for a minimal reply: ~130–150ms + +Full-mode runs are tightly clustered around 5,200–5,350ms. The overhead is entirely in CLI initialization, not inference. Switching this phase to bare is safe: `critic` does not use file/repo tools. + +## Safe phases for bare=true + +| Phase | bare | Rationale | +|-------|------|-----------| +| `critic` | ✅ true | Text-only review, no file tools needed | +| `plan-critique` | ✅ true | Text-only review | +| `spec-critique` | ✅ true | Text-only review | +| `execute` | ❌ false | Needs file/repo tools | +| `task-verify` | ❌ false | Needs file/repo tools | +| `doc-update` | ❌ false | Needs file/repo tools | +| `code-review` | ❌ false | Needs file/repo tools | + +--- + +## Critic Approved + +**Reviewed:** 2026-04-29 · Task T2166 · Spec S7EE1 + +All checks passed: spec integrity, verify commands, code quality, completeness, fleet-readiness, blast-radius. diff --git a/docs/bench-openrouter-2026-04-29.md b/docs/bench-openrouter-2026-04-29.md new file mode 100644 index 0000000..418a0f4 --- /dev/null +++ b/docs/bench-openrouter-2026-04-29.md @@ -0,0 +1,41 @@ +# OpenRouter Runtime — Smoke Bench (2026-04-29) + +Recorded from: `OPENROUTER_API_KEY= cargo test --test openrouter_smoke -- --nocapture` + +## Test + +File: `tests/openrouter_smoke.rs` + +Sends `"Reply with exactly one word: hello"` to `gemini-flash` +(`google/gemini-2.0-flash-001`) with a 30 s timeout. Asserts: + +- `text` is non-empty +- `input_tokens > 0` +- `output_tokens > 0` +- `duration_ms > 0` + +## Results + +> Update this section by running the smoke test with a live key: +> ``` +> OPENROUTER_API_KEY=sk-... cargo test --test openrouter_smoke -- --nocapture 2>&1 +> ``` + +``` +model: gemini-flash +prompt: "Reply with exactly one word: hello" +response: +input_tokens: +output_tokens: +cost_usd: +duration_ms: +wall_ms: +``` + +## Context + +Phase 2 of the BOI runtime architecture decision (2026-04-29). Non-tool phases +(spec-critique, plan-critique, critic, evaluate) will route through OpenRouter +instead of the Claude CLI, saving ~6 s per phase cold-start and 5–10× cost +using Haiku/Flash for judgment phases. This smoke test validates the +`OpenRouterRuntime` implementation end-to-end against the live API. diff --git a/docs/brain-directory.md b/docs/brain-directory.md new file mode 100644 index 0000000..71b2cf5 --- /dev/null +++ b/docs/brain-directory.md @@ -0,0 +1,64 @@ +# Brain Directory + +## What It Is and Why + +A **brain directory** is a local folder containing a `CLAUDE.md` file that BOI reads before dispatching each worker. Its contents are prepended to the worker prompt as a `## System Context` section, giving workers project-aware context — rules, conventions, forbidden actions — without embedding that context in every spec file. + +This keeps specs focused on *what to do* while letting the brain directory carry *how to do it* for your project. + +## Configuration + +### Global (applies to all specs) + +In `~/.boi/config.yaml`: + +```yaml +brain: ~/mrap-hex +``` + +### Per-Spec (overrides global) + +In any spec YAML file, add a top-level `brain` field: + +```yaml +title: "My feature" +brain: ~/projects/myrepo +mode: execute +tasks: + - id: t-1 + title: "Do the thing" + status: PENDING + spec: | + ... +``` + +**Precedence:** spec-level `brain` overrides the global config value. If neither is set, no brain context is injected. + +### Validation + +BOI validates the brain path at dispatch time. It will fail fast (not silently skip) if: +- The directory does not exist +- The directory exists but contains no `CLAUDE.md` + +## Token Budget Guidance + +BOI truncates brain content to **32,000 characters** (~8K tokens) before injecting it. Content beyond that limit is dropped silently from the tail. + +Guidelines: +- **Keep CLAUDE.md under 16K chars** for comfortable headroom. This leaves room for the worker prompt itself within a 32K context injection. +- **Put the most critical rules at the top.** Truncation cuts from the bottom, so lead with must-know constraints. +- **Avoid long examples.** Reference file paths instead of inlining large code blocks. Workers can read files. +- **Prune regularly.** A smaller, current brain is more useful than a large, stale one. + +## When to Use + +- Your project has non-obvious conventions (naming, file structure, forbidden patterns) that workers otherwise get wrong. +- Multiple specs share the same repo context and you don't want to repeat it in each. +- You want to prevent specific classes of mistakes (e.g., "never drop the `events` table", "always write atomic files"). + +## When Not to Use + +- **Simple or one-off specs** where the task spec itself is self-contained. Brain injection adds latency (a file read) and prompt overhead for no gain. +- **Sensitive information.** Brain content is sent verbatim to the LLM. Do not put credentials, tokens, or PII in `CLAUDE.md`. +- **Very large CLAUDE.md files.** Content over 32K chars is truncated. If your brain file is that large, split it and link to sub-documents from `CLAUDE.md` instead. +- **Cross-project workers.** If a spec spans multiple repos with conflicting conventions, per-spec brain is safer than a global one. diff --git a/docs/daemon.md b/docs/daemon.md new file mode 100644 index 0000000..5416c14 --- /dev/null +++ b/docs/daemon.md @@ -0,0 +1,78 @@ +# BOI Daemon + +## Overview + +The BOI daemon is a long-running process that monitors the queue and dispatches workers for pending specs. It is started with `boi daemon start` (background) or `boi daemon foreground` (attached to the terminal). + +## Tick Cadence + +The daemon polls every ~5 seconds (10 × 500 ms sleep increments). Each tick: + +1. Writes a heartbeat timestamp to `~/.boi/daemon.heartbeat`. +2. Checks the SIGHUP reload flag and applies config changes if set. +3. Reaps finished worker threads. +4. Computes how many new workers to spawn this tick and drains the queue up to that cap. +5. Sleeps 500 ms × 10 before the next tick (interruptible by SIGTERM). + +## Batched Dequeue (`spawns_per_tick`) + +Rather than spawning one worker per tick, the daemon drains up to `spawns_per_tick` eligible specs per tick (default 4). The actual number spawned is: + +``` +to_spawn = min(max_workers - current_workers, spawns_per_tick) +``` + +A 50–150 ms randomized jitter is inserted between successive spawns within a single tick to smooth cold-start bursts on the Anthropic API. Configure `spawns_per_tick` in `~/.boi/config.yaml`: + +```yaml +spawns_per_tick: 4 # default; raise once cold-start behavior is validated +``` + +## SIGHUP Config Hot-Reload + +Sending SIGHUP to the daemon triggers a live config reload **without restarting** or interrupting in-flight workers. + +### What reloads + +| Setting | Reloaded? | +|---------|-----------| +| `max_workers` | Yes | +| `spawns_per_tick` | Yes | +| `claude_bin` | Yes | +| `task_timeout_minutes` | No — startup snapshot | +| `retry_count` | No — startup snapshot | +| `cleanup_on_failure` | No — startup snapshot | +| `paths.*` | No — startup snapshot | + +### Reload semantics + +- **Parse failure is a no-op.** If the config file is syntactically invalid, the daemon logs `[boi daemon] reload FAILED: ...; keeping current config` and retains the current values. +- **In-flight workers are unaffected.** Workers receive a snapshot of `WorkerConfig` at spawn time; live config mutation never reaches them. +- **No restart required.** The daemon process continues running; only the three live fields are updated. + +### Triggering a reload + +```bash +# Recommended: set a value then reload in one step +boi config set max_workers 10 +boi daemon reload + +# Or send SIGHUP directly +kill -HUP $(cat ~/.boi/daemon.lock) +``` + +`boi daemon reload` reads the PID from `~/.boi/daemon.lock`, verifies the process is alive, and sends SIGHUP. The reload takes effect within the next tick (≤ 5 seconds). + +## Daemon Commands + +| Command | Description | +|---------|-------------| +| `boi daemon start` | Start daemon in the background | +| `boi daemon stop` | Send SIGTERM; waits up to 10s, then SIGKILL | +| `boi daemon restart` | Stop + start | +| `boi daemon foreground` | Run attached to the terminal | +| `boi daemon reload` | Send SIGHUP to reload `max_workers`, `spawns_per_tick`, `claude_bin` | + +## PID and Lock File + +The daemon uses an exclusive `flock` on `~/.boi/daemon.lock` (which also stores the PID) as its singleton guard. This is crash-safe: the lock auto-releases when the process exits, so stale PID files can never block a restart. diff --git a/docs/dag-reassess.md b/docs/dag-reassess.md new file mode 100644 index 0000000..2eaa381 --- /dev/null +++ b/docs/dag-reassess.md @@ -0,0 +1,129 @@ +# DAG Reassessment: `boi plan` + `boi dispatch-many` + +## Why this exists + +Before this feature, dependency ordering between BOI specs was managed entirely by hand: whoever dispatched a batch of specs had to remember which in-flight spec IDs each new spec depended on, then manually thread `--after` flags at the right positions. Three failure modes: + +1. **Wrong ordering** — only surfaces when the dependent spec fails mid-execution (tokens and time already spent) +2. **Implicit artifact deps** — spec B reads a file that spec A writes; neither spec declares this; B starts in parallel with A and fails +3. **Scope drift** — spec A's scope expands after B was queued, breaking B's assumed contract + +This feature makes dependency analysis mechanical, not verbal. + +## The model + +Every spec is a **node** in a DAG. Edges come from two sources: + +- **Declared deps** — the `depends_on` field in a spec explicitly names upstream specs +- **Artifact deps** — one spec writes a file path that another spec reads; inferred by scanning `verify` and `spec` task fields for path patterns + +BOI builds this graph from the full set of in-flight + queued + new specs, topologically sorts it, and optionally asks an LLM to critique the result. + +### Critique severity levels + +| Severity | Meaning | Effect on `dispatch-many` | +|----------|---------|--------------------------| +| `block` | Hard ordering violation; dispatch will likely fail | Refuses dispatch entirely | +| `warn` | Probable dep not declared; may cause issues | Prompts for confirmation (or auto-approves with `--yes`) | +| `info` | Observation; no action required | Shown but does not gate dispatch | + +The LLM critique is cached by hash of (DAG topology + spec titles). Re-running `plan` on unchanged state costs zero tokens. + +## Commands + +### `boi plan` + +Builds the DAG and runs the LLM critique. **Does not dispatch anything.** + +```bash +boi plan # analyze current in-flight + queued state +boi plan spec-a.yaml spec-b.yaml # include new specs in the analysis +boi plan --force-refresh # bypass cache, re-run LLM critique +``` + +Use `boi plan` when: +- You're about to dispatch a batch and want to verify ordering first +- You want to understand what the current in-flight queue looks like as a graph +- You suspect implicit deps between specs you're about to queue + +### `boi dispatch-many` + +Runs `plan`, then dispatches all specs in topological order with correct `--after` chains. + +```bash +boi dispatch-many spec-a.yaml spec-b.yaml spec-c.yaml +boi dispatch-many specs/*.yaml --yes # auto-approve warns +boi dispatch-many specs/*.yaml --force # override warns (not blocks) +``` + +Use `dispatch-many` when: +- You're dispatching 2+ specs that may depend on each other +- You want the ordering to be verified automatically, not by memory +- You want `--after` chains emitted without manual bookkeeping + +### `boi dispatch` (single-spec, lightweight check) + +When dispatching a single spec into an existing in-flight queue, `boi dispatch` runs a lightweight deterministic check (no LLM): +- If the new spec's artifacts overlap with an in-flight spec AND no `--after` flag was provided: **WARN** (not block), showing the implicit dep + suggested `--after` flag +- Use `--skip-plan` to bypass this check when you know the ordering is correct + +The full LLM critique is only invoked by `plan` and `dispatch-many`. + +## When to use which command + +| Situation | Command | +|-----------|---------| +| Dispatching a single spec; no in-flight queue | `boi dispatch` | +| Dispatching a single spec; in-flight queue exists | `boi dispatch` (lightweight check runs automatically) | +| Dispatching 2+ specs in a related batch | `boi dispatch-many` | +| Uncertain about ordering; want to review before committing | `boi plan` first, then `dispatch-many` | +| Emergency dispatch; know the ordering is correct | `boi dispatch-many --force` or `boi dispatch --skip-plan` | + +## Example: 3-spec chain + +Three specs for a feature track: + +``` +build-schema.yaml — creates src/schema/*.rs +build-api.yaml — reads src/schema/*.rs, creates src/api/*.rs +build-frontend.yaml — reads src/api/*.rs contract +``` + +### Before (manual `--after`, misordered) + +```bash +boi dispatch build-schema.yaml +boi dispatch build-api.yaml # BUG: forgot --after +boi dispatch build-frontend.yaml --after +``` + +`build-api` starts in parallel with `build-schema`. Fails 4 tasks in because schema files don't exist yet. ~12k tokens wasted. Re-dispatch required. + +### After (`boi dispatch-many`) + +```bash +boi dispatch-many build-schema.yaml build-api.yaml build-frontend.yaml +``` + +``` +Analyzing DAG... + +DAG (3 nodes): + SA000 (build-schema) ← no deps + SA001 (build-api) ← SA000 [implicit: src/schema/*.rs] + SA002 (build-frontend) ← SA001 [declared] + +Critique: + [WARN] build-api has implicit dep on build-schema via src/schema/*.rs + but no --after declared. Adding automatically. + +Proposed dispatch order: SA000 → SA001 → SA002 + +Dispatch? [y/N] y + +Dispatched: SA000 (build-schema) +Dispatched: SA001 (build-api) --after SA000 +Dispatched: SA002 (build-frontend) --after SA001 +``` + +No misordering. No re-dispatch. The implicit dep was caught before a single token was spent on the wrong order. diff --git a/docs/optimization-plan-2026-04-29.md b/docs/optimization-plan-2026-04-29.md index d4bc0ad..43c9270 100644 --- a/docs/optimization-plan-2026-04-29.md +++ b/docs/optimization-plan-2026-04-29.md @@ -176,22 +176,11 @@ In `worker.rs`, the `max_spec_redos` is set from `config.retry_count` (config.ya **Savings:** ~5s per spawn × 7 spawns = **~35s per spec** -**How:** Add `--bare` to the `args` vec in `spawn.rs:49-57`. The `--bare` flag skips hooks, LSP, plugin sync, CLAUDE.md auto-discovery. Context must be provided explicitly via the prompt (which BOI already does via prompt templates). +**Status: DONE** — See `build_claude_args()` in `src/spawn.rs:32-49` and `docs/bench-bare-flag-2026-04-29.md`. -```rust -// spawn.rs:49 — add --bare -let mut args = vec![ - "-p".to_string(), prompt.to_string(), - "--bare".to_string(), // Add this - "--dangerously-skip-permissions".to_string(), - "--no-session-persistence".to_string(), - // Remove --setting-sources user (--bare handles this) - "--output-format".to_string(), "stream-json".to_string(), - "--verbose".to_string(), -]; -``` +The `--bare` flag skips hooks, LSP, plugin sync, CLAUDE.md auto-discovery. Phase TOMLs opt in via `[worker] bare = true`. Verified safe for `critic`, `plan-critique`, and `spec-critique` (text-only, no file tools). -**Risk:** `--bare` disables CLAUDE.md auto-discovery and hooks. BOI workers don't need these — all context is injected via the prompt. The worker-prompt.md already says "read ~/.claude/shared-memory/SHARED.md" explicitly. Verify that no worker relies on CLAUDE.md auto-load from the worktree. +**Risk:** `--bare` disables CLAUDE.md auto-discovery and hooks. BOI workers don't need these — all context is injected via the prompt. **Verified:** no worker relies on CLAUDE.md auto-load from the worktree. **Difficulty:** Easy — one line in spawn.rs. @@ -322,12 +311,12 @@ Based on `boi-model-selection.md` research and the phase timing data: | doc-update | Sonnet 4.6 | Sonnet 4.6 `--bare` | `--bare` | 5.2s → 0.18s | Same | Needs file read/write tools | | plan-critique | Sonnet 4.6 | **Gemini 2.5 Flash** | OpenRouter HTTP | 5.2s → 0.5s | **-87%** | Judgment only; rarely converges anyway | -**Note on OpenRouter phases:** Moving critic and plan-critique to OpenRouter requires implementing an HTTP-based LLM call path in the daemon. The phases don't use Claude's built-in tools (Read, Write, Bash) — they only read the prompt and produce structured output. This makes them candidates for pure API calls. +**Note on OpenRouter phases:** Moving critic and plan-critique to OpenRouter requires an HTTP-based LLM call path in the daemon. The phases don't use Claude's built-in tools (Read, Write, Bash) — they only read the prompt and produce structured output, making them candidates for pure API calls. **Implementation priority:** 1. `--bare` for execute, spec-review, doc-update (immediate, zero new deps) 2. Shell-only for task-verify (immediate, already supported) -3. OpenRouter for critic, plan-critique (medium-term, new HTTP client needed) +3. OpenRouter for critic, plan-critique — **`src/runtime/openrouter.rs` implemented** (smoke test: `tests/openrouter_smoke.rs`) --- @@ -453,31 +442,24 @@ mv ~/github.com/mrap/boi/templates/checks/quality-scoring.md \ ### 7.1 Add --bare flag to spawn_claude -**File:** `src/spawn.rs:49-57` +**Status: DONE** — Implemented via `build_claude_args()` in `src/spawn.rs:32-49`. + +`spawn_claude` now takes a `bare: bool` parameter. When `true`, `build_claude_args` appends `--bare` to the arg list. Phase TOMLs opt in via `[worker] bare = true`. Benchmark: see `docs/bench-bare-flag-2026-04-29.md`. ```rust -// Current: -let mut args = vec![ - "-p".to_string(), prompt.to_string(), - "--dangerously-skip-permissions".to_string(), - "--no-session-persistence".to_string(), - "--setting-sources".to_string(), "user".to_string(), - "--output-format".to_string(), "stream-json".to_string(), - "--verbose".to_string(), -]; - -// Proposed: -let mut args = vec![ - "-p".to_string(), prompt.to_string(), - "--bare".to_string(), - "--dangerously-skip-permissions".to_string(), - "--output-format".to_string(), "stream-json".to_string(), - "--verbose".to_string(), -]; -// Note: --no-session-persistence and --setting-sources are redundant with --bare +// Implemented (src/spawn.rs:32): +pub fn build_claude_args(prompt: &str, model: Option<&str>, bare: bool) -> Vec { + // ...base args... + if bare { + args.push("--bare".to_string()); + } + args +} ``` -**Expected impact:** 5.2s → 0.18s per Claude spawn. With 7 spawns/spec: 36s → 1.3s. +Note: `--no-session-persistence` and `--setting-sources` were kept (not removed) from the base args. + +**Measured impact:** 5,257ms → 184ms per bare spawn (−96.5%). See `docs/bench-bare-flag-2026-04-29.md`. ### 7.2 Add per-phase max_redos support diff --git a/docs/pipelines.md b/docs/pipelines.md new file mode 100644 index 0000000..05a0a75 --- /dev/null +++ b/docs/pipelines.md @@ -0,0 +1,116 @@ +# Pipeline Configuration (`phases/pipelines.toml`) + +Pipelines define which phases run and in what order for each execution mode. + +## Schema + +```toml +[mode.] +# Legacy: spec-level phases (pre- and post-task combined). +# If spec_post_phases is not set, spec_phases is used as spec_post_phases. +spec_phases = ["critic"] # optional, backward compat + +# v2: explicit pre/post split +spec_pre_phases = ["spec-critique", "spec-improve"] # run before task execution +spec_post_phases = ["doc-update", "critic", "merge", "cleanup"] # run after all tasks + +# Per-task phases (run for each task in sequence) +task_phases = ["execute", "review", "commit"] + +# Max iterations of the spec-pre loop before proceeding to task execution (default: 3) +max_loops = 3 +``` + +## Fields + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `spec_phases` | `string[]` | `[]` | Legacy combined spec phases. Maps to `spec_post_phases` if `spec_post_phases` is empty. | +| `spec_pre_phases` | `string[]` | `[]` | Phases that run before tasks (looped up to `max_loops` times). | +| `spec_post_phases` | `string[]` | `[]` | Phases that run after all tasks complete. | +| `task_phases` | `string[]` | `["execute"]` | Phases that run per-task, in order. | +| `max_loops` | `u32` | `3` | Max iterations of the `spec_pre_phases` loop. | + +## Backward Compatibility + +Modes that only define `spec_phases` (v1 layout) continue to work. The parser +treats `spec_phases` as `spec_post_phases` automatically, and `spec_pre_phases` +defaults to empty (no pre-task loop). + +## Pipeline Layouts + +### v1 (default, challenge, discover, generate) + +``` +Spec-post phases (spec_phases): [plan-critique →] critic [→ evaluate] +Per-task phases (task_phases): execute → task-verify +``` + +All spec-level phases run after tasks complete. No pre-task spec loop. + +### v2 + +``` +Spec-pre loop (max 3): spec-critique ↔ spec-improve +Per-task phases: execute → review → commit (commit is deterministic) +Spec-post phases: doc-update → critic → merge → cleanup + ^ ^ ^ + Claude det. det. +``` + +Deterministic phases (`commit`, `merge`, `cleanup`) run as plain shell operations +— they never spawn Claude, which eliminates cold-start latency for those steps. + +## File Resolution + +The pipeline registry file is resolved in this order: + +1. `BOI_PIPELINES_FILE` environment variable +2. `~/.boi/pipelines.toml` (user override) +3. Compiled-in fallback defaults (no file required) + +The repo's `phases/pipelines.toml` is loaded at build time via `CARGO_MANIFEST_DIR`. +To override a single mode without modifying the repo, copy just the relevant +`[mode.*]` section to `~/.boi/pipelines.toml`. + +## v1 vs v2: When to Use Each + +| | v1 (default) | v2 (opt-in) | +|---|---|---| +| **When** | Well-defined specs, fast iteration, no need for spec refinement | Complex specs that benefit from pre-task critique, or where commit/merge latency matters | +| **Spec-pre loop** | None | spec-critique ↔ spec-improve (≤3 iterations) | +| **Per-task** | execute → task-verify | execute → review → commit | +| **Spec-post** | critic (+ optional doc-update) | doc-update → critic → merge → cleanup | +| **Deterministic steps** | None | commit, merge, cleanup (no Claude cold-start) | +| **Cold-start hits** | 1 per task (execute) | 1 per task (execute) + 0 for commit/merge/cleanup | +| **Status** | Stable, default | Opt-in; default after A/B benchmarks confirm speedup | + +**Choose v1 when:** Your spec tasks are already well-scoped, you want the shortest possible critical path, or you're prototyping. + +**Choose v2 when:** You're running long multi-task specs where cold-start on commit/merge/cleanup adds up, or you want the pre-task critique loop to improve spec quality before execution begins. + +To opt into v2, set `mode: v2` in your spec header: + +```yaml +title: My Feature +mode: v2 +``` + +Or pass `--mode v2` at dispatch time: + +```bash +boi dispatch --spec my-feature.yaml --mode v2 +``` + +## Adding a Custom Mode + +```toml +# ~/.boi/pipelines.toml +[mode.my-mode] +spec_pre_phases = ["spec-critique"] +task_phases = ["execute", "code-review"] +spec_post_phases = ["critic"] +max_loops = 2 +``` + +Then dispatch with `boi dispatch --spec spec.yaml --mode my-mode`. diff --git a/docs/test-coverage-audit-2026-04-29.md b/docs/test-coverage-audit-2026-04-29.md index c81ad1a..9f99be3 100644 --- a/docs/test-coverage-audit-2026-04-29.md +++ b/docs/test-coverage-audit-2026-04-29.md @@ -331,7 +331,7 @@ fn test_spawn_claude_timeout() { // Overwrite with a sleep script std::fs::write(&script, "#!/bin/sh\nsleep 60\n").unwrap(); let bin = script.to_str().unwrap(); - let cr = spawn_claude("prompt", "/tmp", 2, None, None, bin).unwrap(); + let cr = spawn_claude("prompt", "/tmp", 2, None, None, bin, false).unwrap(); assert!(!cr.success); assert_eq!(cr.output, "timeout"); assert!(cr.total_ms >= 2000); diff --git a/docs/yaml-spec-schema.md b/docs/yaml-spec-schema.md index be91261..2c2264b 100644 --- a/docs/yaml-spec-schema.md +++ b/docs/yaml-spec-schema.md @@ -14,10 +14,11 @@ Use `.yaml` or `.yml` extension. BOI detects format by extension: | Field | Required | Type | Description | |-------|----------|------|-------------| | `title` | Yes | string | Human-readable spec title | -| `mode` | Yes | string | Execution mode: `execute`, `generate`, `challenge`, `discover` | +| `mode` | Yes | string | Execution mode: `execute`, `generate`, `challenge`, `discover`, `v2` | | `context` | No | string | Free-text background information for workers | | `workspace` | No | string | Pin spec to a specific worktree path | | `blocked_by` | No | list of strings | Spec IDs this spec depends on (e.g. `[SA7F3, SF2B1]`) | +| `brain` | No | path | Per-spec brain directory override. Must exist and contain `CLAUDE.md`. Overrides global `brain` in `config.yaml`. | | `outcomes` | Recommended | list of outcome objects | Spec-level declarations of what this spec delivers. Verified after all tasks DONE. | | `tasks` | Yes | list of task objects | Ordered list of tasks | @@ -77,13 +78,14 @@ A task with `depends` is skipped until all listed tasks are `DONE`. Circular dep ```yaml title: string # required -mode: execute # required: execute | generate | challenge | discover +mode: execute # required: execute | generate | challenge | discover | v2 context: | # optional, free text Multi-line context about the spec. workspace: /path # optional blocked_by: # optional - SA7F3 +brain: ~/mrap-hex # optional — overrides global config.brain outcomes: # recommended — verified after all tasks DONE - description: "Artifact exists and is correct" verify: "test -f /path/to/artifact" diff --git a/phases/cleanup.phase.toml b/phases/cleanup.phase.toml new file mode 100644 index 0000000..3ac9e5b --- /dev/null +++ b/phases/cleanup.phase.toml @@ -0,0 +1,9 @@ +name = "cleanup" +description = "Remove spec worktree and delete branch (deterministic, no Claude)" +completion_handler = "builtin:cleanup" + +[phase] +level = "spec" + +[worker] +runtime = "deterministic" diff --git a/phases/commit.phase.toml b/phases/commit.phase.toml new file mode 100644 index 0000000..d3b7ea9 --- /dev/null +++ b/phases/commit.phase.toml @@ -0,0 +1,9 @@ +name = "commit" +description = "Commit staged changes in the spec worktree (deterministic, no Claude)" +completion_handler = "builtin:commit" + +[phase] +level = "task" + +[worker] +runtime = "deterministic" diff --git a/phases/critic.phase.toml b/phases/critic.phase.toml index 0a8028b..a08b914 100644 --- a/phases/critic.phase.toml +++ b/phases/critic.phase.toml @@ -10,6 +10,7 @@ model = "claude-sonnet-4-6" prompt_template = "templates/critic-prompt.md" effort = "medium" timeout = 300 +bare = true [completion] approve_signal = "## Critic Approved" diff --git a/phases/merge.phase.toml b/phases/merge.phase.toml new file mode 100644 index 0000000..7580f01 --- /dev/null +++ b/phases/merge.phase.toml @@ -0,0 +1,9 @@ +name = "merge" +description = "Merge spec worktree branch into the target branch (deterministic, no Claude)" +completion_handler = "builtin:merge" + +[phase] +level = "spec" + +[worker] +runtime = "deterministic" diff --git a/phases/pipelines.toml b/phases/pipelines.toml index 88fcf19..614022b 100644 --- a/phases/pipelines.toml +++ b/phases/pipelines.toml @@ -13,3 +13,9 @@ task_phases = ["execute", "task-verify"] [mode.generate] spec_phases = ["plan-critique", "critic", "evaluate"] task_phases = ["decompose", "execute", "code-review", "task-verify"] + +[mode.v2] +spec_pre_phases = ["spec-critique", "spec-improve"] +task_phases = ["execute", "review", "commit"] +spec_post_phases = ["doc-update", "critic", "merge", "cleanup"] +max_loops = 3 diff --git a/phases/plan-critique.phase.toml b/phases/plan-critique.phase.toml index e675cdc..a98ba0f 100644 --- a/phases/plan-critique.phase.toml +++ b/phases/plan-critique.phase.toml @@ -5,8 +5,9 @@ description = "Evaluate spec quality before execution: catch non-executable veri level = "spec" [worker] -runtime = "claude" -model = "claude-sonnet-4-6" +runtime = "openrouter" +model = "gemini-flash" +api_key_env = "OPENROUTER_API_KEY" prompt_template = "templates/plan-critique-prompt.md" effort = "medium" timeout = 300 diff --git a/phases/spec-critique.phase.toml b/phases/spec-critique.phase.toml new file mode 100644 index 0000000..f3141fa --- /dev/null +++ b/phases/spec-critique.phase.toml @@ -0,0 +1,29 @@ +name = "spec-critique" +description = "Critique spec quality before execution: catch non-executable verifies, unbounded scope, missing dependencies, and implicit assumptions" + +[phase] +name = "spec-critique" +description = "Critique spec quality before execution" +level = "spec" +timeout_minutes = 2 +can_add_tasks = false +can_fail_spec = false + +[worker] +runtime = "openrouter" +model = "gemini-flash" +api_key_env = "OPENROUTER_API_KEY" +prompt_template = "templates/spec-critique-prompt.md" +effort = "medium" +timeout = 120 + +[completion] +approve_signal = "## Spec Approved" +reject_signal = "[CRITIQUE]" +on_approve = "next" +on_reject = "requeue:spec-improve" +on_crash = "retry" + +[hooks] +pre = [] +post = [] diff --git a/phases/spec-improve.phase.toml b/phases/spec-improve.phase.toml new file mode 100644 index 0000000..ce01e2b --- /dev/null +++ b/phases/spec-improve.phase.toml @@ -0,0 +1,26 @@ +name = "spec-improve" +description = "Improve spec quality in response to critique feedback, then requeue for re-critique" + +[phase] +name = "spec-improve" +description = "Improve spec quality in response to critique feedback" +level = "spec" +timeout_minutes = 3 +can_add_tasks = false +can_fail_spec = false +requires_claude = true + +[worker] +runtime = "claude" +prompt_template = "templates/spec-improve-prompt.md" +effort = "medium" +timeout = 180 + +[completion] +approve_signal = "## Spec Improved" +on_approve = "requeue:spec-critique" +on_crash = "retry" + +[hooks] +pre = [] +post = [] diff --git a/phases/spec-review.phase.toml b/phases/spec-review.phase.toml deleted file mode 100644 index e25904d..0000000 --- a/phases/spec-review.phase.toml +++ /dev/null @@ -1,27 +0,0 @@ -name = "spec-review" -description = "Review and improve spec quality before execution" - -[phase] -name = "spec-review" -description = "Review and improve spec quality before execution" -level = "spec" -timeout_minutes = 2 -can_add_tasks = true -can_fail_spec = false -requires_claude = true - -[worker] -runtime = "claude" -model = "claude-sonnet-4-6" -prompt_template = "templates/spec-review-prompt.md" -effort = "medium" -timeout = 120 - -[completion] -approve_signal = "## Spec Review Complete" -on_approve = "next" -on_crash = "retry" - -[hooks] -pre = [] -post = [] diff --git a/projects/brand/proposals/synthesis-brand-vision.html b/projects/brand/proposals/synthesis-brand-vision.html new file mode 100644 index 0000000..5483fd3 --- /dev/null +++ b/projects/brand/proposals/synthesis-brand-vision.html @@ -0,0 +1,1251 @@ + + + + + + The Hex Brand — Vision + + + + + + + + + + + +
+
+ Brand Vision — 2026 +

The builder who finally stopped hiding

+

A brand strategy for the person who spent years building exquisite infrastructure and never told anyone about it.

+
+
+ 300+ + BOI iterations executed +
+
+ 0 + brand specs run — until now +
+
+ 5 + content pillars +
+
+ 4 + week launch +
+
+
+
+ + +
+
+ 01 — Core Positioning +

The honest story no one's told yet

+

Not "compound engineer." Not "AI researcher." Not the polished version. The real one: a person who has been building sophisticated systems in private for years and is finally ready to be seen.

+ +
+ "Mike builds exquisite infrastructure for work he hasn't started yet. The abandoned projects are infrastructure improvements for a builder who hasn't yet decided what he's building for others." + — Abandoned Projects Analysis, 2026 +
+ +

+ That sentence — written as a self-assessment — is the entire brand. Not as shame. As origin story. The hiding was real. The ending it is the brand. +

+ +
+
+
Who You Are
+

+ A systems engineer who competes against himself. You've spent three years building AI infrastructure — hex, BOI, a Python-to-Rust rewrite done mid-flight — with the same rigor you'd apply if 10,000 people depended on it. No one was watching. That discipline is the brand. +

+
+
+
Who You're Not
+

+ Not a content creator who codes. Not a Twitter founder who threads his way to deals. Not a build-in-public person posting growth charts. You went deep or you didn't start. That's the edge — and the thing most builders refuse to say out loud. +

+
+
+
What Makes This Rare
+

+ Most people who write about building have shipped one product to users. You've shipped a production agent system with 300+ autonomous iterations, real memory benchmarking across 7 configurations, and a full Rust rewrite. You're not writing about the theory. +

+
+
+
The Honest Gap
+

+ "The brand-building work shows zero BOI specs executed against it." That line, from your own project analysis, is the most important sentence in this document. The brand starts the moment you start running specs against it. The first post is the spec executing. +

+
+
+
+
+ + +
+
+ 02 — Content Pillars +

Five pillars. One coherent signal.

+

Each pillar is a different angle on the same truth: what it looks like when someone builds with full commitment, in private, for a long time — and then finally comes out.

+
+
+ + +
+
+
+
+ 01 +
+
+ Pillar One +

The Cost Reality

+

The actual price of building in private for years. Not opportunity cost as an abstraction — the real, compounding cost of sophisticated work that no one sees. This pillar does what most builders won't: it names the loss.

+
+
+ +
+

The brand-building work appears repeatedly in stated goals but shows zero BOI specs executed against it. Three years of quarterly targets. Zero specs run.

+

— Signal driving this pillar

+
+ +
+
+
LinkedIn / Long-form
+
I've been "building my brand" as a quarterly goal for three years. I ran zero specs against it.
+
+

Last week I ran an analysis of every project I've ever created. 49 project containers. Career analysis, market research, AI infrastructure — all with specs dispatched, outcomes tracked, iterations documented.

+

Brand building: a stated quarterly goal in every assessment. Zero specs run. Not one.

+

This is not a productivity post. This is what the data looks like when you're using "someday" as a strategy.

+

The cost is specific: I built a production agent system that 300+ specs have been run against, a Rust rewrite done mid-flight, 7 memory configurations benchmarked. I wrote none of it down where anyone could see it.

+

Today, that changes. Not because I'm ready. Because I noticed the gap.

+
+ +

This is post 1. It names the gap publicly and signals intention without making a promise you'll abandon.

+
+
+
X / Thread
+
The hidden cost of building for yourself: your best work has no feedback loop.
+
+

I've built an AI agent system sophisticated enough that it runs its own improvement cycles. Automated. Iterative. Production-proven.

+

For three years, the only feedback loop was: did it work today?

+

No external signal. No one to push back. No one to say "that's obvious" or "I've never seen that approach."

+

The cost: you get very good at building things that work exactly as you designed them. You get very bad at knowing if you designed the right things.

+

The fix is uncomfortable: other people.

+
+ +
+
+
LinkedIn / Insight
+
I analyzed my own project graveyard. Here's what 12 abandoned projects reveal.
+
+

The pattern wasn't what I expected. I don't abandon projects halfway. I either never start them (the project container sits empty forever) or I finish them completely.

+

The graveyard is 12 containers created, never executed. Zero specs dispatched.

+

Why? Because I won't start work I can't define success for. The empty context.md is a filter, not laziness.

+

But here's the honest version: "I haven't defined success" sometimes means "I'm scared to commit in public."

+

That's the cost reality. The filter protects quality. It also protects hiding.

+
+ +

Cites real data. Shows pattern analysis skills. Makes the personal structural.

+
+
+
+ + +
+
+
+ 02 +
+
+ Pillar Two +

The Failure Patterns

+

Honest, structural autopsy of how things break. Not failure-porn — patterns that reveal how your mind works. This pillar teaches systems thinking by example: your own systems, your own failures, the readable architecture of how you got stuck.

+
+
+ +
+
+
X / Thread
+
My biggest failure pattern isn't what I abandon. It's what I create instead of building.
+
+

I call it the Cluster Creation Problem. When I get excited about multiple ideas at once, I create project containers for all of them. It feels like progress. It isn't.

+

April 24, 2026: I created two projects in one day. Dispatched specs against neither. Both are still empty.

+

The act of creating the container scratches the itch of "I should do this" without requiring commitment. It's the engineering equivalent of putting something on a to-do list you never open again.

+

The tell: when I create more than one project container in a day, I'm managing anxiety about ideas, not executing on them.

+

Knowing this doesn't fix it. It just makes the pattern visible.

+
+ +

Shows the cluster creation pattern clearly. Names a specific date. Makes structural what others just feel as "distraction."

+
+
+
LinkedIn / Short
+
I don't fail mid-project. I fail before I start. Here's why that's actually unusual.
+
+

Most people abandon projects at the "nearly done" stage. The last 10% kills them.

+

I don't. In three years of building, I've never stopped a project mid-sprint. My projects are either never dispatched or fully complete.

+

The failure mode is binary: start late and go deep, or don't start at all. Halfway doesn't exist in my project history.

+

This is unusual. It's also a trap. The binary commitment means I'll abandon interesting-but-unclear work before I've learned anything from it.

+

The fix isn't to be okay with halfway. It's to define "halfway" as a valid stopping point for research, not just for execution.

+
+ +
+
+ +
+
+
X / Long Thread
+
I built the infrastructure to fix a problem. Then I built a different infrastructure that made the first one unnecessary. Five times.
+
+

When I find an incremental solution to a problem, I'll often abandon it when a more fundamental solution appears. This sounds smart until you realize that "more fundamental solution" sometimes means "another level of abstraction that also needs building."

+

Examples from my own history: timeout-resilience project (abandoned when root cause was diagnosed). boi-optimizer (abandoned when the Rust rewrite made it irrelevant). tmux-makeover (abandoned when a web UI was built instead).

+

I call it the Infrastructure Trap: you can improve infrastructure forever without ever shipping what the infrastructure is for. The graveyard is mostly meta-infrastructure — tools to improve tools that improve tools.

+

The honest version: I have a talent for finding the right level of solution. The failure mode is that I'll find a "better level" forever rather than ship at any level.

+
+ +

This is the "Infrastructure Trap" — one of the most resonant patterns for senior engineers who recognize themselves in it.

+
+
+
+ + +
+
+
+ 03 +
+
+ Pillar Three +

The Psychology of Trust

+

Why you ship for people you love and stall for strangers. The mechanics of what trust actually enables — in engineering, in publishing, in building something for an audience you haven't met yet. This pillar is the warmest one.

+
+
+ +
+

Projects with a real person attached ship. Projects with only a theoretical beneficiary don't. Love is a more reliable shipping driver than intellectual curiosity.

+

— From the project completion pattern

+
+ +
+
+
LinkedIn / Personal
+
The most reliable shipping trigger I've found isn't discipline. It's a specific human who needs it.
+
+

My project history shows a pattern I didn't see until I mapped it: the only projects I've shipped to completion (outside my own tools) have a real person attached to them.

+

I built a photo curation system for my girlfriend. Firebase infrastructure, AI classification, admin queue — complete. Why? Because she needed it and I could see her using it.

+

"A tool for anyone who does X" doesn't ship. "A tool for Whitney" does.

+

This isn't a motivation hack. It's an architecture insight. Abstraction is the enemy of completion. The most powerful thing you can do for a stalled project: name one specific person it would help. Not a persona. A person.

+
+ +

Real story, structural insight, immediately applicable. The "trust as completion trigger" framing is uncommon.

+
+
+
X / Thread
+
Building in public is an act of trust extension. You're shipping to strangers before you know if they're worth shipping to.
+
+

I've spent three years building for an audience of one (me) and one other person (Whitney). Every completed project had a specific human attached.

+

Building in public requires trusting strangers before they've earned it. That's the discomfort. You don't know if the audience will be worth the cost of making the work visible.

+

The alternative is what I did: build sophisticated systems in private, knowing every line is correct, never having to defend the choices, never risking being misunderstood.

+

That's not discipline. That's the same as never publishing a draft until it's perfect. The draft that never ships isn't perfect — it's just safe.

+
+ +
+
+
+ + +
+
+
+ 04 +
+
+ Pillar Four +

The Edges

+

The imposter/alpha tension. The transcendence moments (when a system works the way you imagined). The mission that competes with the practical. This pillar goes to the uncomfortable edges that most technical writers sand smooth.

+
+
+ +
+
+
X / Short
+
I rewrote 20,000 lines of Python in Rust while it was in production. Not because I had to. Because doing it right matters more than doing it fast.
+
+

BOI v1 was Python. 20K+ lines. 80+ test files. 1,536 test cases. Running in production with a full dashboard, critic system, eval system, and 300+ real specs.

+

I rewrote it completely in Rust in April 2026. Typed correctness. ~10x speed gain. Same architecture, none of the accumulated Python debt.

+

Why? Because I knew what it could be, and what it was wasn't it.

+

The imposter voice says: "shipping it is enough." The builder voice says: "the architecture is wrong." Sometimes both are right and you have to pick one.

+

I picked the harder one. The system is cleaner. The delay was real. Both are true.

+
+ +

This story is powerful. It demonstrates conviction, technical depth, and the refusal to rationalize mediocrity.

+
+
+
LinkedIn / Philosophical
+
Imposter syndrome in engineering is usually real information being misread.
+
+

The voice that says "I don't know enough to ship this" is sometimes right. Not about whether you know enough — you often do — but about whether the thing you're about to ship is the right version.

+

I've had that voice stall me on work that was genuinely good. I've also had it catch real architectural problems before they made it to production.

+

The pattern I've noticed: the voice is loudest when the gap between what the system IS and what it COULD BE is most visible to you.

+

That gap-perception is a skill. It's also a liability. Learning to hear it without obeying it blindly is most of senior engineering.

+
+ +
+
+
X / Thread
+
The moment a self-improving system works exactly as you imagined is one of the strangest feelings in engineering.
+
+

Hex has a self-improvement loop: reflect → eval → archive → score → dispatch. Fully automated. Proven end-to-end.

+

The first time I watched it run — identify a gap in its own behavior, write a spec to address it, run the spec, archive the result, update its own parameters — I had no idea what to feel.

+

Proud? Unsettled? It's both. You built something that improves without you. That's the goal. It's also the exact thing that makes you wonder why you're needed.

+

The transcendence moments in engineering feel like this: the system doing what you designed it for is both the proof that you succeeded and the beginning of what comes after success.

+
+ +

Rare vulnerability about what it actually feels like when an autonomous system works. This is the stuff no one writes.

+
+
+
+ + +
+
+
+ 05 +
+
+ Pillar Five +

The Creative Bridge

+

The channel between music and engineering. What composition teaches systems design. What debugging teaches arrangement. The unexpected clarity that comes from holding two disciplines at once — and what it means to compound them.

+
+
+ +
+
+
LinkedIn / Cross-domain
+
Music and software share one failure mode: you can keep refining forever and never ship.
+
+

In music production, "finished" is a decision, not a state. There's always another mix pass, another layer to pull back, another section that could be tighter.

+

Software is the same — but software has tests. You can convince yourself that green tests mean done. No equivalent exists in music. Done is purely a judgment call.

+

What music taught me about engineering: ship when the intention is legible, not when every edge case is handled. Some edge cases are features of the thing, not flaws to fix.

+

What engineering taught me about music: a spec clarifies what success looks like before you start. Sitting down to a blank session with no defined output is why tracks never finish.

+
+ +

This is the bridge pillar. It humanizes the technical work and reaches a wider audience without losing depth.

+
+
+
X / Thread
+
The debugging mindset and the mixing mindset are the same process with different vocabulary.
+
+

Debugging: isolate the signal from the noise. Identify which component changed. Find the invariant that was violated.

+

Mixing: isolate each element. Identify where frequencies conflict. Find the space where each sound lives without competing.

+

Both are about finding what shouldn't be there and removing it. The elegance is not in what you add — it's in what you subtract.

+

I've been doing both for long enough that I don't know which one I learned first. They're the same discipline practiced in two different rooms.

+
+ +
+
+
+
+ + +
+
+ 03 — Profile Rebrand +

The bio set

+

Recommended bios for X, LinkedIn, and Instagram. Each is tuned to the platform's context and audience expectation — but all three point to the same person.

+ +
+
Recommended Headline / Tagline
+

Systems engineer. I build things that compound. I've been doing it privately for three years. This is where that changes.

+
+ +
+
+
X (Twitter) 160 chars
+

+ Building AI infrastructure that compounds. hex + BOI — autonomous agent systems running in production.

+ I went deep for three years. Now I'm writing it down. +

+

Short, signal-dense, honest about the "now writing it down" shift. Positions compound + agent work without jargon-stacking. The pivot is built into the bio itself.

+
+
+
LinkedIn 220 chars headline
+

+ Systems Engineer · AI Infrastructure · Compound Engineering

+ I build agent systems that improve themselves over time. hex (persistent AI agent) + BOI (autonomous spec orchestrator) — both in production, both in Rust.

+ Writing about what it looks like when you build for real: the architecture, the failures, the patterns that only emerge after three years of private iteration. +

+

LinkedIn audience expects credentials + context. The "writing about what it looks like" line sets expectations for the content without promising a newsletter or framework.

+
+
+
Instagram 150 chars
+

+ Systems that compound.
+ Music that breathes.

+ Engineering + production.
+ Building in public after three years private. +

+

Instagram is visual. The short-line format works with the medium. "Music that breathes" introduces the creative bridge pillar. The "after three years private" line is the hook — it implies a story.

+
+
+ +
+
Bio Strategy Note
+

+ All three bios share the same honest anchor: the shift from private to public is built into the copy. This is intentional. "Building in public after three years private" positions the brand work as the real story — not just a showcase of past work. It signals ongoing, not archived. +

+

+ Do not lead with "compound engineering" as a brand term until you've used it in 10-15 posts where its meaning is visible. Right now it sounds like jargon. In six weeks, it'll sound like a precise description of what readers have already seen. +

+
+
+
+ + +
+
+ 04 — Launch Plan +

Week 1–4 posting cadence

+

Four weeks. One week per pillar arc, with a consistent anchor post each Monday and at least one engagement-optimized thread mid-week. No cross-posting until week 3 — build one channel deeply first.

+ +
+ +
+
+ 01 + Foundation +
+
The builder who finally stopped hiding
+

Name the gap publicly. Establish the positioning. Do not summarize everything you've built — let the fact that you've been building privately be the entire story this week.

+
    +
  • + Mon + LinkedIn: "I've been 'building my brand' as a quarterly goal for three years. I ran zero specs against it." — The opening post. Acknowledge the gap. Signal the shift. +
  • +
  • + Wed + X Thread: "The hidden cost of building for yourself: your best work has no feedback loop." — Cost Reality pillar, first post. +
  • +
  • + Fri + LinkedIn: Brief intro to hex + BOI — one paragraph each. "Here's what I've been building." Not a launch. A door opening. +
  • +
+
+
Week Goal
+

Establish the positioning. The "stopped hiding" arc is now live. Don't rush to the next pillar — let this land first.

+
+
+ + +
+
+ 02 + Cost & Failure +
+
The graveyard tells the truth
+

Go into the graveyard. Failure patterns pillar. The cluster creation problem, the binary commitment pattern, the infrastructure trap. Use real data — named projects, actual dates.

+
    +
  • + Mon + LinkedIn: "I analyzed my own project graveyard. Here's what 12 abandoned projects reveal." — The pattern post. Data-driven, structural, honest. +
  • +
  • + Wed + X Thread: The Cluster Creation Problem. "When I get excited about multiple ideas at once, I create project containers for all of them. It feels like progress." Named dates, named projects. +
  • +
  • + Thu + X Short: The binary pattern. "I don't fail mid-project. I fail before I start." — Punchy, counterintuitive. Designed to retweet. +
  • +
  • + Fri + LinkedIn: The Infrastructure Trap. "I built the infrastructure to fix a problem. Then I built a different infrastructure that made the first one unnecessary. Five times." +
  • +
+
+
Week Goal
+

Establish that you write about failure as clearly as success. This is what earns trust from the engineers who've been through it.

+
+
+ + +
+
+ 03 + Trust & Edges +
+
What it takes to ship for strangers
+

Psychology of Trust pillar + The Edges. The Whitney insight, the building-in-public vulnerability, the Rust rewrite story. This week goes personal and deep. Start cross-posting LinkedIn → X this week.

+
    +
  • + Mon + LinkedIn: "The most reliable shipping trigger I've found isn't discipline. It's a specific human who needs it." — The Whitney story. +
  • +
  • + Tue + X Thread: Building in public as trust extension. "You're shipping to strangers before you know if they're worth shipping to." — The vulnerability post. +
  • +
  • + Thu + X Short: The Rust rewrite story. "I rewrote 20,000 lines of Python in Rust while it was in production. Not because I had to. Because doing it right matters more than doing it fast." +
  • +
  • + Fri + LinkedIn: "Imposter syndrome in engineering is usually real information being misread." — The nuanced take that earns senior engineer respect. +
  • +
+
+
Week Goal
+

The Rust rewrite thread is the high-leverage play this week. It's specific, verifiable, and rare. Cross-post it everywhere.

+
+
+ + +
+
+ 04 + Synthesis +
+
The creative bridge + what comes next
+

Creative Bridge pillar + your first "looking forward" post. End the month by introducing the music angle and beginning the compound engineering concept without naming it as a brand.

+
    +
  • + Mon + LinkedIn: "Music and software share one failure mode: you can keep refining forever and never ship." — The cross-domain bridge. +
  • +
  • + Wed + X Thread: "The debugging mindset and the mixing mindset are the same process with different vocabulary." — The parallel post. +
  • +
  • + Fri + LinkedIn: Month-1 close. "Here's what I've learned from writing publicly for the first time in three years." — Meta-reflection on the launch itself. What surprised you. What the feedback loop revealed. +
  • +
+
+
Week Goal
+

The Friday close post is the most important of the month. It names what you've learned from the experiment — and signals you're continuing. Write it honestly. It'll be your most-read post.

+
+
+
+ +
+ Cadence Rules +
+
+

What to protect

+
    +
  • Monday anchor posts — these set the weekly frame
  • +
  • One real failure story per week — no sanitizing
  • +
  • Specific data (dates, line counts, iteration numbers)
  • +
  • The "shift" narrative — you're building in public now
  • +
+
+
+

What to avoid

+
    +
  • Engagement bait with no substance ("hot take: X")
  • +
  • "Compound engineering" as a brand term until week 6+
  • +
  • Summarizing more projects than you can explain in 3 lines
  • +
  • Posting on all platforms before establishing one
  • +
+
+
+
+
+
+ + +
+
+ The Brand in One Paragraph +

He went deep for three years. Now he's writing it down.

+ +
+

The work is real. Two production systems. One complete rewrite. 300+ autonomous iterations documented.

+

The hiding was real. Brand-building as a quarterly target for three years. Zero specs run.

+

The shift is the brand. Not the systems he built — the decision to stop building in private.

+

The voice is earned. He doesn't write about the theory of compound engineering. He's been doing it, alone, since before it was a useful phrase.

+
+ + Back to top +
+
+ + +
+ +
+ + + diff --git a/projects/brand/research/abandoned-projects-analysis.md b/projects/brand/research/abandoned-projects-analysis.md new file mode 100644 index 0000000..e7b0cd4 --- /dev/null +++ b/projects/brand/research/abandoned-projects-analysis.md @@ -0,0 +1,230 @@ +# Abandoned Projects Analysis — The Graveyard Tells the Truth + +_Generated: 2026-04-29 (T6854 — Mirofish Business Opportunity spec)_ + +--- + +## What Was Analyzed + +All project containers in `~/.boi/projects/` (49 entries) plus the git archive at `~/boi/_archive/python/` and broader project context from career analysis, assessment files, and spec history. The `q-NNN` numbered entries are individual spec research notes, not standalone projects — they're excluded from the abandonment analysis but inform the pattern section. + +--- + +## The Map: What Mike Built and Where It Stopped + +### TIER 1: Core Systems (Built Deep, Still Active) + +**hex** — Persistent AI agent. Has shipped at least 6 major subsystems: +- hex-core: agent loop, session management +- hex-ui: web UI for agent control +- hex-events: event-driven policy engine (159+ tests, Docker-verified, production) +- hexagon-base: shared utilities +- ai-native-env: AI development environment +- hyperagents: self-improvement pipeline (reflect → eval → archive → score → dispatch, proven E2E) + +How far did he get? **All the way**. hex is the project that actually compounded. 300+ BOI iterations documented against it. Real production load. Real memory benchmarking (7 configs, hybrid FTS5+sqlite-vec+RRF winner). Real eval frameworks. + +**BOI** (Beginning of Infinity) — Agent orchestrator. +- v1: Python, 20K+ lines, 80+ test files, 1,536 test cases. Archived at `~/boi/_archive/python/`. Got to production with a full dashboard, daemon, worker fleet, critic system, eval system, and 300+ real specs executed. +- v2: Rust rewrite, April 2026. Complete rewrite of v1 in Rust — same architecture, typed correctness, ~10x speed gain. Currently in production. + +How far? **Python version was complete enough to replace entirely**. Rust version is the current production system. + +--- + +### TIER 2: Completed Research, Unclear Execution + +**Polymarket Scanner** — A Python prediction market scanner. +- Built: src/scanner.py, signals.py, paper_trader.py, classifier.py, db.py, scripts/scan.sh +- How far: Real code, real API integration, real bug discovered (SCAN_MAX_PAGES=0 = unlimited requests = timeout every run) +- Why stopped: Bug was identified in depth (q-348 research). Whether it was actually fixed is unknown — the research is complete but no follow-up spec exists. +- Tells the truth: Mike will build the trading infrastructure but not necessarily make it work reliably. + +**Zwerk** — AI-powered spreadsheet/board tool (pydantic-ai backend, Svelte frontend). +- How far: OAuth scope research done (q-123), Progressive disclosure strategy designed, Google verification strategy planned. +- Why stopped: No evidence of continued development in boi history. Research was done but no execution specs followed. +- Tells the truth: Mike starts product research for external-facing tools but doesn't follow through to users. + +**Whitney Content Lab** — Photo/media curation system for his girlfriend. +- Built: Firebase setup (custom storage bucket, Firestore rules), media ingestion pipeline with AI classification, admin review queue, bulk moderation API. +- How far: Firebase infrastructure real and running (q-491, q-508). Admin endpoints built. +- Status: Active as of April 2026 — most recent active project outside hex/boi ecosystem. +- Tells the truth: Mike ships when there's a concrete human who needs it and it's small enough to finish. + +**Hermes customizations** — Third-party AI assistant (NousResearch hermes-agent) being customized for Mike's use. +- Built: Voice restoration via system prompt injection (300-token voice trait injection to config.yaml), update strategy analysis, memory analysis. +- How far: Research complete, config patches applied. +- Tells the truth: Mike uses Hermes as a secondary agent system and tinkers with it rather than building on top of it. + +--- + +### TIER 3: The Graveyard — Created and Never Started + +These are boi project containers created with intent but zero specs ever dispatched: + +| Project | Created | Description | What It Probably Was | +|---------|---------|-------------|---------------------| +| anti-pattern-enforcement | Unknown | Empty | Auto-enforcement of code quality anti-patterns in BOI specs | +| boi-optimizer | Unknown | Empty | Optimizing boi's performance/throughput | +| diversity-collapse | Apr 24, 2026 | Placeholder only | Research into AI thought homogenization or opinion collapse | +| hex-identity-tournament | Apr 24, 2026 | Placeholder only | Tournament to select hex's best persona/identity configuration | +| hex-memory | Apr 23, 2026 | Placeholder only | Deep memory architecture work for hex | +| hex-pm | Apr 8, 2026 | Placeholder only | Project management features for hex | +| hermes-memory-analysis | Apr 4, 2026 | Empty | Analyzing Hermes's memory system vs hex's approach | +| hex-events (project) | Mar 16, 2026 | Empty | Analysis/improvement work on hex-events | +| local-llm-server | Mar 19, 2026 | Empty | Setting up a local inference server | +| timeout-resilience | Apr 7, 2026 | Empty | Making hex resilient to provider timeouts | +| tirith-yolo-mode | Apr 4, 2026 | Empty | "YOLO mode" for hex's Tirith security system (bypass all checks) | +| tmux-makeover | Mar 18, 2026 | Empty | Redesigning hex's tmux layout and UX | + +--- + +## Why Did He Stop? Pattern Analysis + +### Pattern 1: The Cluster Creation Problem +Projects die in clusters. hex-identity-tournament and diversity-collapse were both created April 24, 2026 — same day. hex-memory was April 23. hex-pm was April 8. tirith-yolo-mode and hermes-memory-analysis both April 4. The pattern: Mike gets excited about multiple ideas simultaneously, creates project containers for all of them, then dispatches specs for only one or two. The rest sit empty. + +**What this means**: Mike's mind runs parallel. He generates ideas faster than he executes them. The boi project creation act is his way of "parking" an idea — it feels like progress without being progress. + +### Pattern 2: Solved Differently +Several abandoned projects had their core need addressed through other channels: + +- **tmux-makeover** → The q-329 TUI/context-switching research produced a clear recommendation (fzf + gum), but the implementation was never dispatched as a separate project. +- **timeout-resilience** → The gateway-timeout-analysis project diagnosed the root cause (ReadTimeout during hex's restaurant search task), completing the "why" without the "fix." +- **boi-optimizer** → The entire Rust rewrite addressed performance more fundamentally than any optimization project could have. +- **hex-memory** → Memory provider research (memory-providers project) chose holographic, completing the research phase. The implementation may have been done directly in the hex repo. + +**What this means**: Mike has a talent for finding the right level of solution. He'll abandon incremental fixes when a more fundamental solution appears. This is the "systems over features" principle in action — but it means many projects exist as permanent research stubs. + +### Pattern 3: The Infrastructure Trap +Half the graveyard is meta-infrastructure: tools to improve the tools. anti-pattern-enforcement (for better spec writing), boi-optimizer (for faster spec execution), hex-memory (for better hex), hex-pm (for managing hex projects), hex-events project (for improving the event system). These never get specs dispatched because they're always slightly less urgent than the actual work. + +**What this means**: Mike understands deeply that infrastructure compounds, but he has a blind spot: you can improve infrastructure forever without ever shipping what the infrastructure is for. The infrastructure graveyard is a specific failure mode of his operating style. + +### Pattern 4: The Idea Jar Without Forcing Function +tirith-yolo-mode and diversity-collapse are the clearest examples of "interesting idea, no urgency." Tirith YOLO mode would let hex bypass its own security checks — a useful debugging tool but not a crisis. Diversity-collapse research might have been about AI homogenization of thought (a timely topic in April 2026) but had no concrete application. + +**What this means**: Mike creates project containers when he reads something interesting. Without a concrete problem it solves, the project stalls immediately. + +### Pattern 5: The Stage of Death is Always the Same +Critically: **Mike almost never gets halfway**. Projects are either: +- Never dispatched (the graveyard above — project created, no specs), or +- Completed through at least a full research cycle (ai-trends: 6 branches, all produced; career-analysis: 3 tasks, all complete; memory-providers: holographic selected) + +He doesn't start a sprint and abandon it halfway through. The commit is binary. This is unusual — most people abandon at the "nearly done" stage. Mike abandons before he starts. + +--- + +## What Patterns Emerge? + +### The Consistent Completion Pattern + +When Mike DOES execute, he goes deep. The ai-trends project produced tens of thousands of words of research across 6 branches. Career analysis produced a full five-path model, Anthropic-specific positioning analysis, and negotiation tactics. The polymarket-scanner research diagnosed a bug at the exact code line. He doesn't do surface-level work. + +**The corollary**: The empty projects are empty because Mike knows what deep work looks like and has implicitly decided the project doesn't warrant it yet. "Yet" often becomes "never." + +### The Personalization Driver + +Projects with a real person attached ship. Whitney Content Lab: ships because Whitney needs it. hex: ships because Mike uses it daily. hex-ui: ships because Mike needs to interact with hex visually. Polymarket Scanner: built because Mike wants to trade. + +Projects with only a theoretical beneficiary don't ship. local-llm-server: who's using it? tmux-makeover: Mike already uses tmux fine. diversity-collapse: intellectually interesting but serves no one specific. + +### The Stated vs Actual Interests Gap + +**Stated interests**: AI trends research, agent-to-agent communication, autonomous experimentation, local inference revolution, self-improvement systems. + +**Actual completed work**: Infrastructure for his own agent (hex), tooling for his own workflow (BOI), projects serving a concrete relationship (Whitney Content Lab), and career positioning analysis. + +The divergence is sharp: Mike talks a lot about the AI ecosystem (and produces excellent research about it) but builds primarily for himself and one other person (Whitney). His actual interest radius is much tighter than his intellectual radius. + +--- + +## Which Abandoned Projects Should Be Revisited? + +### High Value — Revisit Now + +**hex-identity-tournament** (Apr 24, 2026) +- The concept: run multiple variants of hex's identity/persona configuration through a structured tournament to select the best configuration. +- Why worth revisiting: hex's identity is currently implicit — "Hermes voice injection" research shows Mike is already thinking about this. A tournament-based selection process would make it rigorous. Hex compounds over time; a better identity configuration compounds too. +- Why stopped: Too abstract to dispatch without a clearer success metric. Needs a definition of "better hex identity" before a tournament can be run. +- First step: Define 3-5 concrete identity dimensions to test (communication style, proactivity level, memory recall behavior) before creating specs. + +**hex-memory** (Apr 23, 2026) +- The context: memory-providers research selected holographic as the only provider with trust scoring and contradiction detection. But holographic is an external dependency. +- Why worth revisiting: The implementation of holographic into hex's architecture was never dispatched. The research stopped at "holographic wins" without "here's how to integrate it." +- First step: Dispatch a spec to audit current hex memory vs holographic's trust scoring model, produce an integration plan. + +**local-llm-server** (Mar 19, 2026) +- The context: Created when BitNet (100B params on CPU, 82% less energy) was announced. LLM cold-start optimization research (April 2026) showed that the 6-200s cold start is Node.js initialization, not inference — and that local models can't run agentic tools. +- Why worth revisiting differently: Not as agentic tool runners, but as judgment-phase models. The cold-start research already identified that OpenRouter models (Grok 4.1 Fast, Gemini 2.5 Flash) can be used for judgment-only BOI phases. A local inference server could reduce per-call costs for these judgment phases further. +- First step: Dispatch the phase-to-model mapping task (t-3 from llm-cold-start-optimization, which identified this) with a local inference constraint added. + +**diversity-collapse** (Apr 24, 2026) +- The concept: Unknown from project files, but the name points to research on AI systems producing homogeneous outputs over time — a real phenomenon worth understanding if hex is to remain useful. +- Why worth revisiting: Hex's self-improvement loop could theoretically converge on a single stable configuration and stop improving. Understanding diversity-collapse dynamics in self-improving systems is directly relevant to hex's long-term architecture. +- First step: Add a context.md explaining what the project is actually for — the name alone isn't enough to dispatch specs. + +### Lower Priority — Ideas That Aged Out + +**anti-pattern-enforcement**: Automated detection of spec anti-patterns in BOI. Useful, but BOI now has a Critic system that does this manually. The Rust rewrite includes a spec validator. The need is partially met. + +**timeout-resilience**: Gateway timeouts were diagnosed (restaurant search task permanently lost). The root cause was ReadTimeout during a long hex operation with no resume path. The Rust BOI rewrite includes proper timeout handling. Partially solved. + +**tirith-yolo-mode**: A debugging mode for bypassing hex's security layer. Useful for development but not urgent enough to dispatch ever. + +**tmux-makeover**: The TUI research (q-329) selected fzf + gum as the right tools. The makeover never happened. Given that hex-ui exists as a web interface, a tmux redesign is declining in priority. + +**hex-pm**: Project management within hex. The boi spec system already handles project management for boi's own work. This may have been about hex tracking its OWN projects — like an internal to-do system for hex itself. That's interesting but redundant given BOI's existing project tracking. + +**hermes-memory-analysis**: Was probably about comparing Hermes's memory system to hex's. Given that the memory-providers project already identified holographic as the right architecture, this comparison may be moot. + +--- + +## What the Graveyard Reveals About Mike's Actual Interests vs Stated Interests + +### His Stated Interests +AI trends, agent-to-agent knowledge sharing, autonomous experimentation, self-improvement systems, the local inference revolution, proactive intelligence scanning, compound engineering as a discipline. + +### His Actual Interests (Revealed by Completion Pattern) +1. **Making his own tools better** — hex, boi, memory. Every completed project in the ecosystem serves this. This is the real core. +2. **His own financial position** — career analysis (Anthropic positioning), polymarket scanner (trading), mirofish business opportunity (hosting service). He spends real research effort on wealth-building vectors. +3. **Concrete deliverables for Whitney** — The Whitney Content Lab is the only external-facing project that shipped. This reveals that love is a more reliable shipping driver than intellectual curiosity. +4. **Understanding the AI ecosystem around his own tools** — The ai-trends, ai-agent-frameworks, stealth-browser, memory-providers, and persistent-agent-systems research all ultimately serve the question: "what should I incorporate into hex/boi?" + +### The Gap +Mike talks about building for compound leverage and frontier positioning, but his completed project map shows he's mostly building for himself (one person, one agent system) and one other person (Whitney). There's no external customer. There's no user base. There's no feedback loop except his own daily usage. + +This isn't a criticism — hex is a sophisticated system. But the "compound engineering brand" goal (mentioned in career analysis as a quarterly target) doesn't show up in any completed project. The brand-building work (mrap.me, LinkedIn, YouTube, the compound engineering concept) appears repeatedly in stated goals but shows zero boi specs executed against it. + +**The graveyard's deepest truth**: Mike builds exquisite infrastructure for work he hasn't started yet. The abandoned projects are infrastructure improvements for a builder who hasn't yet decided what he's building for others. + +--- + +## The Abandonment Stage + +Across all abandoned projects: death occurs at the project creation stage, never mid-sprint. + +Possible interpretations: +1. **Perfectionism before starting**: Mike knows what deep work looks like. He won't dispatch specs for a project until the context.md is good. The empty context is a blocker, not laziness. +2. **Natural filter**: The act of writing a context.md forces Mike to articulate what success looks like. Projects that fail this test (because there's no clear success state) stay empty. +3. **The boi project container as "parking lot"**: Creating a project container scratches the itch of "I should do this" without committing to it. It's the agile equivalent of creating a ticket and putting it in the backlog. + +The healthiest interpretation: Mike's project creation discipline is actually working correctly. He creates containers for everything he's tempted to do, but only dispatches specs for things with clear success criteria and concrete beneficiaries. The graveyard is the filter, not the failure. + +--- + +## Summary + +**What he builds**: Infrastructure that compounds over time for his own use (hex, BOI), then research that helps him position himself (career analysis, market opportunity research), then deliverables for Whitney. + +**What he abandons**: Meta-infrastructure ideas without forcing functions, research questions without concrete beneficiaries, and experiments that get solved differently before he starts. + +**The consistent pattern**: He starts late and goes deep, or doesn't start at all. Halfway doesn't exist in his project history. + +**The most important finding**: The gap between Mike's intellectual interests (AI ecosystem, compound engineering, autonomous agents at scale) and his project completion record (tools for himself, tools for Whitney) suggests he hasn't found his external customer yet. Everything he's built is prep work. The graveyard is mostly infrastructure improvements for a product that hasn't launched yet. + +**Projects worth revisiting**: hex-identity-tournament (concrete architecture experiment with hex as the beneficiary), hex-memory implementation (holographic integration is chosen, just not built), local-llm-server (reframed as judgment-phase cost reduction, not agentic runner). + +--- + +_Analysis based on: 49 project entries in ~/.boi/projects/, archived Python boi at ~/boi/_archive/python/, career-analysis research (q-348), ai-trends research (q-014 series), assessment-2026-03-16.md, boi-current-state.md, and individual research.md files across all projects._ diff --git a/src/builtins.rs b/src/builtins.rs new file mode 100644 index 0000000..1927e6a --- /dev/null +++ b/src/builtins.rs @@ -0,0 +1,248 @@ +use crate::phases::Verdict; +use crate::worktree; + +pub struct BuiltinContext<'a> { + pub spec_id: &'a str, + pub task_title: &'a str, + /// Source repo path for merge/cleanup. Empty string if not applicable. + pub repo_path: &'a str, +} + +#[derive(Debug, PartialEq)] +pub enum BuiltinResult { + Success(String), + NoOp(String), + Error(String), +} + +impl BuiltinResult { + pub fn to_verdict(&self) -> Verdict { + match self { + BuiltinResult::Success(_) | BuiltinResult::NoOp(_) => Verdict::Proceed, + BuiltinResult::Error(msg) => Verdict::Done { success: false, reason: msg.clone() }, + } + } +} + +/// Dispatch a deterministic builtin by handler name. +pub fn run_builtin(handler: &str, ctx: &BuiltinContext<'_>) -> BuiltinResult { + match handler { + "builtin:commit" => run_commit(ctx), + "builtin:merge" => run_merge(ctx), + "builtin:cleanup" => run_cleanup(ctx), + other => BuiltinResult::Error(format!("unknown builtin: {}", other)), + } +} + +fn run_commit(ctx: &BuiltinContext<'_>) -> BuiltinResult { + let msg = format!("boi({}): {}", ctx.spec_id, ctx.task_title); + match worktree::commit_changes(ctx.spec_id, &msg) { + Ok(true) => BuiltinResult::Success(format!("committed: {}", msg)), + Ok(false) => BuiltinResult::NoOp("no changes to commit".into()), + Err(e) => BuiltinResult::Error(format!("commit failed: {}", e)), + } +} + +fn run_merge(ctx: &BuiltinContext<'_>) -> BuiltinResult { + if ctx.repo_path.is_empty() { + return BuiltinResult::Error("builtin:merge requires repo_path".into()); + } + match worktree::merge_back(ctx.spec_id, ctx.repo_path) { + Ok(msg) => BuiltinResult::Success(format!("merged: {}", msg.trim())), + Err(e) => BuiltinResult::Error(format!("merge failed: {}", e)), + } +} + +fn run_cleanup(ctx: &BuiltinContext<'_>) -> BuiltinResult { + if ctx.repo_path.is_empty() { + return BuiltinResult::Error("builtin:cleanup requires repo_path".into()); + } + if let Err(e) = worktree::cleanup(ctx.spec_id) { + return BuiltinResult::Error(format!("worktree cleanup failed: {}", e)); + } + if let Err(e) = worktree::delete_branch(ctx.spec_id, ctx.repo_path) { + return BuiltinResult::Error(format!("branch delete failed: {}", e)); + } + BuiltinResult::Success("worktree and branch cleaned up".into()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_utils; + + fn make_ctx<'a>(spec_id: &'a str, task_title: &'a str, repo_path: &'a str) -> BuiltinContext<'a> { + BuiltinContext { spec_id, task_title, repo_path } + } + + // --- runtime parsing --- + + #[test] + fn test_deterministic_runtime_in_phase_config() { + use crate::phases::{PhaseConfig, PhaseLevel}; + let phase = PhaseConfig { + name: "commit".into(), + level: PhaseLevel::Task, + description: "".into(), + prompt_template: String::new(), + timeout_minutes: Some(1), + retry_count: None, + can_add_tasks: false, + can_fail_spec: false, + requires_claude: false, + runtime: Some("deterministic".into()), + api_key_env: None, + completion_handler: Some("builtin:commit".into()), + approve_signal: None, + reject_signal: None, + on_approve: None, + on_reject: None, + on_crash: None, + min_lines_changed: None, + model: None, + code_model: None, + effort: None, + hooks_pre: vec![], + hooks_post: vec![], + bare: false, + }; + assert_eq!(phase.runtime.as_deref(), Some("deterministic")); + assert!(!phase.requires_claude); + assert_eq!(phase.completion_handler.as_deref(), Some("builtin:commit")); + } + + // --- registry lookup --- + + #[test] + fn test_deterministic_unknown_builtin_returns_error() { + let ctx = make_ctx("s001", "My Task", "/tmp"); + let result = run_builtin("builtin:unknown", &ctx); + assert!(matches!(result, BuiltinResult::Error(_))); + if let BuiltinResult::Error(msg) = result { + assert!(msg.contains("unknown builtin"), "msg was: {}", msg); + } + } + + #[test] + fn test_deterministic_unknown_builtin_verdict_is_done_failure() { + let ctx = make_ctx("s001", "My Task", "/tmp"); + let verdict = run_builtin("builtin:unknown", &ctx).to_verdict(); + assert!(matches!(verdict, Verdict::Done { success: false, .. })); + } + + // --- builtin:commit --- + + #[test] + fn test_deterministic_commit_no_changes_is_noop() { + let _guard = test_utils::HOME_LOCK.lock().unwrap(); + let repo = test_utils::test_git_repo("builtin-commit-noop"); + let home = test_utils::test_dir("builtin-commit-noop-home"); + std::env::set_var("HOME", home.to_str().unwrap()); + + let spec_id = "det-commit-noop-001"; + worktree::create(spec_id, repo.to_str().unwrap()).unwrap(); + + let result = run_builtin("builtin:commit", &make_ctx(spec_id, "Test Task", repo.to_str().unwrap())); + assert!(matches!(result, BuiltinResult::NoOp(_)), "expected NoOp, got {:?}", result); + + worktree::cleanup(spec_id).unwrap(); + } + + #[test] + fn test_deterministic_commit_with_changes_succeeds() { + let _guard = test_utils::HOME_LOCK.lock().unwrap(); + let repo = test_utils::test_git_repo("builtin-commit-changes"); + let home = test_utils::test_dir("builtin-commit-changes-home"); + std::env::set_var("HOME", home.to_str().unwrap()); + + let spec_id = "det-commit-changes-001"; + let dest = worktree::create(spec_id, repo.to_str().unwrap()).unwrap(); + std::fs::write(dest.join("new.txt"), "hello").unwrap(); + + let result = run_builtin("builtin:commit", &make_ctx(spec_id, "Add File", repo.to_str().unwrap())); + assert!(matches!(result, BuiltinResult::Success(_)), "expected Success, got {:?}", result); + + // Commit message should contain spec_id + let log = std::process::Command::new("git") + .args(["log", "--format=%s", "-1"]) + .current_dir(&dest) + .output() + .unwrap(); + let subject = String::from_utf8_lossy(&log.stdout); + assert!(subject.contains(spec_id), "commit subject: {}", subject.trim()); + assert!(subject.contains("Add File"), "commit subject: {}", subject.trim()); + + worktree::cleanup(spec_id).unwrap(); + } + + // --- builtin:merge --- + + #[test] + fn test_deterministic_merge_brings_file_into_repo() { + let _guard = test_utils::HOME_LOCK.lock().unwrap(); + let repo = test_utils::test_git_repo("builtin-merge-repo"); + let home = test_utils::test_dir("builtin-merge-home"); + std::env::set_var("HOME", home.to_str().unwrap()); + + let spec_id = "det-merge-001"; + let dest = worktree::create(spec_id, repo.to_str().unwrap()).unwrap(); + std::fs::write(dest.join("merged.txt"), "from worktree").unwrap(); + worktree::commit_changes(spec_id, "add merged.txt").unwrap(); + + let result = run_builtin("builtin:merge", &make_ctx(spec_id, "Merge", repo.to_str().unwrap())); + assert!(matches!(result, BuiltinResult::Success(_)), "merge failed: {:?}", result); + assert!(repo.join("merged.txt").exists(), "merged.txt should appear in repo after merge"); + + worktree::cleanup(spec_id).unwrap(); + } + + #[test] + fn test_deterministic_merge_without_repo_path_returns_error() { + let ctx = make_ctx("s001", "Merge", ""); + let result = run_builtin("builtin:merge", &ctx); + assert!(matches!(result, BuiltinResult::Error(_))); + } + + // --- builtin:cleanup --- + + #[test] + fn test_deterministic_cleanup_removes_worktree() { + let _guard = test_utils::HOME_LOCK.lock().unwrap(); + let repo = test_utils::test_git_repo("builtin-cleanup-repo"); + let home = test_utils::test_dir("builtin-cleanup-home"); + std::env::set_var("HOME", home.to_str().unwrap()); + + let spec_id = "det-cleanup-001"; + let dest = worktree::create(spec_id, repo.to_str().unwrap()).unwrap(); + assert!(dest.exists(), "worktree should exist before cleanup"); + + let result = run_builtin("builtin:cleanup", &make_ctx(spec_id, "Cleanup", repo.to_str().unwrap())); + assert!(matches!(result, BuiltinResult::Success(_)), "cleanup failed: {:?}", result); + assert!(!dest.exists(), "worktree dir should be gone after cleanup"); + } + + #[test] + fn test_deterministic_cleanup_without_repo_path_returns_error() { + let ctx = make_ctx("s001", "Cleanup", ""); + let result = run_builtin("builtin:cleanup", &ctx); + assert!(matches!(result, BuiltinResult::Error(_))); + } + + // --- verdict mapping --- + + #[test] + fn test_deterministic_success_maps_to_proceed() { + assert_eq!(BuiltinResult::Success("ok".into()).to_verdict(), Verdict::Proceed); + } + + #[test] + fn test_deterministic_noop_maps_to_proceed() { + assert_eq!(BuiltinResult::NoOp("nothing".into()).to_verdict(), Verdict::Proceed); + } + + #[test] + fn test_deterministic_error_maps_to_done_failure() { + let v = BuiltinResult::Error("oops".into()).to_verdict(); + assert!(matches!(v, Verdict::Done { success: false, .. })); + } +} diff --git a/src/cli/bench.rs b/src/cli/bench.rs index 5f59662..5b4103b 100644 --- a/src/cli/bench.rs +++ b/src/cli/bench.rs @@ -553,6 +553,7 @@ pub fn cmd_bench_phase(phase_name: &str, spec_path: &Path, runs: u32) { phase.model.as_deref(), None, &claude_bin, + phase.bare, ) { Ok(cr) => { let verdict = crate::phases::parse_phase_output(&phase, &cr.output); diff --git a/src/cli/config_cmd.rs b/src/cli/config_cmd.rs index 5c9490f..f3c929e 100644 --- a/src/cli/config_cmd.rs +++ b/src/cli/config_cmd.rs @@ -4,6 +4,7 @@ pub fn cmd_config(key: Option<&str>, value: Option<&str>, cfg: &config::Config) match (key, value) { (None, _) => { println!("max_workers: {}", cfg.max_workers()); + println!("spawns_per_tick: {}", cfg.spawns_per_tick()); println!("task_timeout_minutes: {}", cfg.task_timeout_secs() / 60); println!("retry_count: {}", cfg.retry_count()); println!("db_path: {}", cfg.db_path().display()); @@ -21,6 +22,7 @@ pub fn cmd_config(key: Option<&str>, value: Option<&str>, cfg: &config::Config) (Some(k), None) => { let val = match k { "max_workers" => cfg.max_workers().to_string(), + "spawns_per_tick" => cfg.spawns_per_tick().to_string(), "task_timeout_minutes" => (cfg.task_timeout_secs() / 60).to_string(), "retry_count" => cfg.retry_count().to_string(), "db_path" => cfg.db_path().display().to_string(), @@ -36,9 +38,9 @@ pub fn cmd_config(key: Option<&str>, value: Option<&str>, cfg: &config::Config) (Some(k), Some(v)) => { // Validate key match k { - "max_workers" | "task_timeout_minutes" | "retry_count" => {} + "max_workers" | "spawns_per_tick" | "task_timeout_minutes" | "retry_count" => {} _ => { - eprintln!("unknown config key: {} (supported: max_workers, task_timeout_minutes, retry_count)", k); + eprintln!("unknown config key: {} (supported: max_workers, spawns_per_tick, task_timeout_minutes, retry_count)", k); std::process::exit(1); } } diff --git a/src/cli/daemon.rs b/src/cli/daemon.rs index 99cc880..5e31fd1 100644 --- a/src/cli/daemon.rs +++ b/src/cli/daemon.rs @@ -199,8 +199,18 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi .ok(); } - let wc = worker::WorkerConfig { + // SIGHUP hot-reload flag: set to true by signal_hook when SIGHUP arrives. + let reload_flag = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)); + if let Err(e) = signal_hook::flag::register( + signal_hook::consts::SIGHUP, + std::sync::Arc::clone(&reload_flag), + ) { + eprintln!("[boi daemon] WARNING: failed to install SIGHUP handler: {}", e); + } + + let mut wc = worker::WorkerConfig { max_workers: cfg.max_workers(), + spawns_per_tick: cfg.spawns_per_tick(), task_timeout_secs: cfg.task_timeout_secs(), retry_count: cfg.retry_count(), cleanup_on_failure: cfg.cleanup_on_failure(), @@ -244,6 +254,21 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi eprintln!("[boi daemon] ERROR: failed to write heartbeat: {}", e); } + // SIGHUP hot-reload: only max_workers, spawns_per_tick, claude_bin are live-updated. + // All other settings remain frozen at startup. In-flight workers keep their original config. + if reload_flag.swap(false, std::sync::atomic::Ordering::SeqCst) { + match config::try_load() { + Ok(new_cfg) => { + apply_reload(&mut wc, &new_cfg); + eprintln!( + "[boi daemon] reloaded config: max_workers={}, spawns_per_tick={}, claude_bin={}", + wc.max_workers, wc.spawns_per_tick, wc.claude_bin + ); + } + Err(e) => eprintln!("[boi daemon] reload FAILED: {}; keeping current config", e), + } + } + { let mut workers = active.lock().unwrap_or_else(|e| { eprintln!("[boi daemon] worker mutex poisoned, recovering: {}", e); @@ -251,7 +276,9 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi }); workers.retain(|h| !h.is_finished()); - if workers.len() < wc.max_workers as usize { + let to_spawn = compute_to_spawn(workers.len(), wc.max_workers, wc.spawns_per_tick); + + for slot in 0..to_spawn { match queue::Queue::open(db_str) { Ok(queue) => match queue.dequeue() { Ok(Some(rec)) => { @@ -268,7 +295,7 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi eprintln!("[boi daemon] ERROR: failed to mark spec {} as failed: {}", spec_id, e); } } - continue; + continue; // skip to next batch slot } }; let qpath = db_str.to_string(); @@ -285,10 +312,16 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi .unwrap_or(timeout); let tel = Telemetry::new(PathBuf::from(&qpath)); - eprintln!("[boi daemon] starting worker for {}", spec_id); + eprintln!( + "[boi daemon] starting worker for {} (batch slot {}/{})", + spec_id, + slot + 1, + to_spawn + ); let handle = std::thread::spawn(move || { let wc = worker::WorkerConfig { max_workers: 1, + spawns_per_tick: 1, task_timeout_secs: spec_timeout, retry_count: retries, cleanup_on_failure: cleanup_fail, @@ -301,11 +334,27 @@ pub fn cmd_daemon(db_str: &str, hook_cfg: hooks::HookConfig, cfg: &config::Confi } }); workers.push(handle); + + // Micro-jitter between successive spawns to smooth cold-start burst + if slot + 1 < to_spawn { + let jitter_ns = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.subsec_nanos() as u64) + .unwrap_or(0); + let jitter_ms = 50 + (jitter_ns % 101); + std::thread::sleep(std::time::Duration::from_millis(jitter_ms)); + } + } + Ok(None) => break, // queue drained + Err(e) => { + eprintln!("[boi daemon] dequeue error: {}", e); + break; } - Ok(None) => {} - Err(e) => eprintln!("[boi daemon] dequeue error: {}", e), }, - Err(e) => eprintln!("[boi daemon] queue open error: {}", e), + Err(e) => { + eprintln!("[boi daemon] queue open error: {}", e); + break; + } } } } @@ -388,6 +437,240 @@ pub fn cmd_stop() { let _ = std::fs::remove_file(daemon_heartbeat_path()); // intentional: best-effort heartbeat cleanup } +/// How many workers to spawn this tick: capped by capacity and per-tick limit. +pub(crate) fn compute_to_spawn(workers_len: usize, max_workers: u32, spawns_per_tick: u32) -> u32 { + let cap_remaining = max_workers.saturating_sub(workers_len as u32); + cap_remaining.min(spawns_per_tick) +} + +/// Hot-reload the three live-mutable fields from a freshly parsed config. +/// All other WorkerConfig fields remain at their startup values. +pub(crate) fn apply_reload(wc: &mut worker::WorkerConfig, new_cfg: &config::Config) { + wc.max_workers = new_cfg.max_workers(); + wc.spawns_per_tick = new_cfg.spawns_per_tick(); + wc.claude_bin = new_cfg.claude_bin(); +} + +/// Send SIGHUP to the running daemon so it picks up config changes. +pub fn cmd_reload() { + let pid = match read_daemon_pid() { + Some(p) => p, + None => { + eprintln!("no daemon running (PID file not found)"); + std::process::exit(1); + } + }; + + if !crate::fmt::is_pid_alive(pid) { + eprintln!("daemon process {} is not running", pid); + std::process::exit(1); + } + + // SAFETY: `pid` was read from the daemon lock file and verified alive above. + // SIGHUP to a known-live PID is a standard POSIX config-reload signal. + unsafe { libc::kill(pid as i32, libc::SIGHUP) }; + println!("sent SIGHUP to daemon (pid {}); config will reload within one tick", pid); +} + +#[cfg(test)] +mod daemon_batch { + use super::*; + use crate::{queue, spec, test_utils}; + + const SIMPLE_SPEC: &str = "title: \"Batch Test\"\ntasks:\n - id: t-1\n title: \"Step\"\n status: PENDING\n spec: \"Do it\"\n"; + + fn open_queue(label: &str) -> (queue::Queue, String) { + let db_file = test_utils::test_file(label, "db"); + let _ = std::fs::remove_file(&db_file); + let db_path = db_file.to_str().unwrap().to_string(); + let q = queue::Queue::open(&db_path).unwrap(); + (q, db_path) + } + + fn enqueue_n(q: &queue::Queue, n: usize) { + let boi_spec = spec::parse(SIMPLE_SPEC).unwrap(); + for _ in 0..n { + q.enqueue(&boi_spec, None).unwrap(); + } + } + + fn drain_n(q: &queue::Queue, to_spawn: u32) -> usize { + let mut count = 0; + for _ in 0..to_spawn { + match q.dequeue() { + Ok(Some(_)) => count += 1, + Ok(None) => break, + Err(_) => break, + } + } + count + } + + #[test] + fn test_compute_to_spawn_at_capacity() { + // workers_len == max_workers → 0 slots remaining + assert_eq!(compute_to_spawn(4, 4, 4), 0); + } + + #[test] + fn test_compute_to_spawn_limited_by_spawns_per_tick() { + // cap_remaining=8 but spawns_per_tick=4 → 4 + assert_eq!(compute_to_spawn(0, 8, 4), 4); + } + + #[test] + fn test_compute_to_spawn_limited_by_cap_remaining() { + // cap_remaining=2, spawns_per_tick=4 → 2 + assert_eq!(compute_to_spawn(6, 8, 4), 2); + } + + #[test] + fn test_empty_queue_zero_spawns() { + let (q, _db) = open_queue("batch-empty"); + let to_spawn = compute_to_spawn(0, 4, 4); + let spawned = drain_n(&q, to_spawn); + assert_eq!(spawned, 0); + } + + #[test] + fn test_one_eligible_cap4_tick4_spawns_one() { + let (q, _db) = open_queue("batch-one"); + enqueue_n(&q, 1); + let to_spawn = compute_to_spawn(0, 4, 4); // = 4 + let spawned = drain_n(&q, to_spawn); + assert_eq!(spawned, 1, "only 1 item in queue, expect 1 spawn"); + } + + #[test] + fn test_six_eligible_cap4_tick4_spawns_four_then_two() { + let (q, _db) = open_queue("batch-six-cap4"); + enqueue_n(&q, 6); + let to_spawn = compute_to_spawn(0, 4, 4); // = 4 + let first_tick = drain_n(&q, to_spawn); + assert_eq!(first_tick, 4, "first tick: 4 spawned"); + + // Second tick: 2 remain + let to_spawn2 = compute_to_spawn(4, 8, 4); // simulate 4 workers running, max=8 + let second_tick = drain_n(&q, to_spawn2); + assert_eq!(second_tick, 2, "second tick: remaining 2 spawned"); + } + + #[test] + fn test_six_eligible_cap8_tick4_spawns_four() { + let (q, _db) = open_queue("batch-six-cap8"); + enqueue_n(&q, 6); + let to_spawn = compute_to_spawn(0, 8, 4); // = 4 (tick limit) + let spawned = drain_n(&q, to_spawn); + assert_eq!(spawned, 4); + } + + #[test] + fn test_four_eligible_cap2_tick4_spawns_two() { + let (q, _db) = open_queue("batch-four-cap2"); + enqueue_n(&q, 4); + let to_spawn = compute_to_spawn(6, 8, 4); // cap_remaining=2, tick=4 → 2 + let spawned = drain_n(&q, to_spawn); + assert_eq!(spawned, 2); + } +} + +#[cfg(test)] +mod daemon_hotreload { + use super::*; + use crate::{config, test_utils, worker}; + + fn make_wc(max_workers: u32, spawns_per_tick: u32, claude_bin: &str) -> worker::WorkerConfig { + worker::WorkerConfig { + max_workers, + spawns_per_tick, + task_timeout_secs: 1800, + retry_count: 3, + cleanup_on_failure: false, + claude_bin: claude_bin.to_string(), + } + } + + #[test] + fn test_apply_reload_updates_hot_fields() { + let mut wc = make_wc(4, 2, "claude"); + let new_cfg = config::Config { + max_workers: Some(8), + spawns_per_tick: Some(6), + claude_bin: Some("/usr/bin/claude".to_string()), + ..Default::default() + }; + apply_reload(&mut wc, &new_cfg); + assert_eq!(wc.max_workers, 8); + assert_eq!(wc.spawns_per_tick, 6); + assert_eq!(wc.claude_bin, "/usr/bin/claude"); + } + + #[test] + fn test_apply_reload_leaves_other_fields_unchanged() { + let mut wc = make_wc(4, 2, "claude"); + wc.task_timeout_secs = 7200; + wc.retry_count = 5; + let new_cfg = config::Config { + max_workers: Some(8), + ..Default::default() + }; + apply_reload(&mut wc, &new_cfg); + assert_eq!(wc.task_timeout_secs, 7200, "task_timeout_secs must not change on reload"); + assert_eq!(wc.retry_count, 5, "retry_count must not change on reload"); + } + + #[test] + fn test_bad_config_returns_err() { + use std::io::Write; + let path = test_utils::test_file("hotreload-bad-config", "yaml"); + let mut f = std::fs::File::create(&path).unwrap(); + // Deliberately invalid YAML + f.write_all(b"max_workers: [this is: not: valid yaml\n").unwrap(); + let result = config::try_load_from(&path); + assert!(result.is_err(), "invalid YAML should return Err, got: {:?}", result); + let _ = std::fs::remove_file(&path); + } + + #[test] + fn test_missing_config_returns_defaults() { + let path = test_utils::test_file("hotreload-missing", "yaml"); + let _ = std::fs::remove_file(&path); + let cfg = config::try_load_from(&path) + .expect("missing config file should return Ok with defaults"); + assert_eq!(cfg.max_workers(), 5); + assert_eq!(cfg.spawns_per_tick(), 4); + } + + #[test] + fn test_noop_reload_same_values() { + // Default config → default wc values; apply_reload is a no-op + let mut wc = make_wc(5, 4, "claude"); + let same_cfg = config::Config::default(); + apply_reload(&mut wc, &same_cfg); + assert_eq!(wc.max_workers, 5); + assert_eq!(wc.spawns_per_tick, 4); + assert_eq!(wc.claude_bin, "claude"); + } + + #[test] + fn test_bad_config_keeps_original_wc() { + use std::io::Write; + let mut wc = make_wc(8, 3, "my-claude"); + let path = test_utils::test_file("hotreload-bad-keep", "yaml"); + let mut f = std::fs::File::create(&path).unwrap(); + f.write_all(b"max_workers: [broken\n").unwrap(); + // Simulate what the daemon does: if load fails, don't call apply_reload + if let Ok(new_cfg) = config::try_load_from(&path) { + apply_reload(&mut wc, &new_cfg); + } + // Values must be unchanged + assert_eq!(wc.max_workers, 8, "max_workers must be retained on bad config"); + assert_eq!(wc.spawns_per_tick, 3, "spawns_per_tick must be retained on bad config"); + assert_eq!(wc.claude_bin, "my-claude", "claude_bin must be retained on bad config"); + let _ = std::fs::remove_file(&path); + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/cli/dispatch.rs b/src/cli/dispatch.rs index 3986381..7a0796a 100644 --- a/src/cli/dispatch.rs +++ b/src/cli/dispatch.rs @@ -1,8 +1,28 @@ +use crate::cli::plan; use crate::fmt::ensure_db_dir; use crate::{hooks, queue, spec}; use serde_json::json; use std::path::PathBuf; +/// Convert a parsed BoiSpec into a SpecInfo for DAG artifact analysis. +fn boi_spec_to_spec_info(boi_spec: &spec::BoiSpec, path: &PathBuf) -> plan::SpecInfo { + let id = path + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("unknown") + .to_string(); + plan::SpecInfo { + id, + title: boi_spec.title.clone(), + depends_on: vec![], + task_texts: boi_spec + .tasks + .iter() + .map(|t| (t.spec.clone(), t.verify.clone())) + .collect(), + } +} + #[allow(clippy::too_many_arguments)] pub fn cmd_dispatch( spec_path: &PathBuf, @@ -15,6 +35,7 @@ pub fn cmd_dispatch( project: Option<&str>, dry_run: bool, _workspace: Option<&str>, + skip_plan: bool, db_str: &str, hook_cfg: &hooks::HookConfig, ) { @@ -40,6 +61,29 @@ pub fn cmd_dispatch( } ensure_db_dir(db_str); + + // Lightweight DAG check: warn if artifact overlap detected with in-flight specs + // and no --after was provided. Does NOT block dispatch. + if !skip_plan && after.is_none() { + let in_flight = plan::load_in_flight_specs(db_str); + if !in_flight.is_empty() { + let new_info = boi_spec_to_spec_info(&boi_spec, spec_path); + let implicit = plan::detect_implicit_deps(&new_info, &in_flight); + if !implicit.is_empty() { + eprintln!( + "warn: new spec may implicitly depend on in-flight spec(s): {}", + implicit.join(", ") + ); + eprintln!( + " Suggested: boi dispatch {} --after {}", + spec_path.display(), + implicit.join(",") + ); + eprintln!(" Use --skip-plan to suppress this warning."); + } + } + } + let q = match queue::Queue::open(db_str) { Ok(q) => q, Err(e) => { @@ -94,3 +138,92 @@ pub fn cmd_dispatch( println!("{}", spec_id); } + +// ───────────────────────────────────────────────────────────────────────────── +// Tests +// ───────────────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod dispatch_dag_warn { + use crate::cli::plan::{detect_implicit_deps, SpecInfo}; + + fn make_spec(id: &str, depends_on: &[&str], texts: &[&str]) -> SpecInfo { + SpecInfo { + id: id.to_string(), + title: format!("Spec {}", id), + depends_on: depends_on.iter().map(|s| s.to_string()).collect(), + task_texts: texts + .iter() + .map(|t| (Some(t.to_string()), None)) + .collect(), + } + } + + /// No in-flight specs → no implicit deps detected. + #[test] + fn no_in_flight_no_warn() { + let new_spec = make_spec("new:foo", &[], &["touch src/foo.rs"]); + let deps = detect_implicit_deps(&new_spec, &[]); + assert!(deps.is_empty(), "empty in-flight should produce no implicit deps"); + } + + /// In-flight spec shares an artifact path → implicit dep detected. + #[test] + fn shared_artifact_triggers_warn() { + let in_flight = make_spec("S001", &[], &["create src/dag.rs and tests/dag_test.rs"]); + let new_spec = make_spec("new:bar", &[], &["read src/dag.rs"]); + let deps = detect_implicit_deps(&new_spec, &[in_flight]); + assert!( + deps.contains(&"S001".to_string()), + "shared artifact should produce implicit dep on S001" + ); + } + + /// Disjoint artifact sets → no implicit dep. + #[test] + fn disjoint_artifacts_no_warn() { + let in_flight = make_spec("S001", &[], &["create src/alpha.rs"]); + let new_spec = make_spec("new:bar", &[], &["write src/beta.rs"]); + let deps = detect_implicit_deps(&new_spec, &[in_flight]); + assert!( + deps.is_empty(), + "non-overlapping artifacts should produce no implicit deps" + ); + } + + /// In-flight spec already declares new_spec as a dep → no reverse edge to + /// avoid a spurious cycle warning. + #[test] + fn in_flight_already_depends_on_new_no_reverse_warn() { + let mut in_flight = make_spec("S001", &[], &["work on src/shared.rs"]); + in_flight.depends_on = vec!["new:foo".to_string()]; + let new_spec = make_spec("new:foo", &[], &["create src/shared.rs"]); + let deps = detect_implicit_deps(&new_spec, &[in_flight]); + assert!( + deps.is_empty(), + "should not add reverse edge when in-flight already depends on new spec" + ); + } + + /// Multiple in-flight specs, only the one with overlapping artifact triggers. + #[test] + fn only_overlapping_in_flight_triggers() { + let s1 = make_spec("S001", &[], &["create src/overlap.rs"]); + let s2 = make_spec("S002", &[], &["create src/unrelated.rs"]); + let new_spec = make_spec("new:baz", &[], &["use src/overlap.rs"]); + let deps = detect_implicit_deps(&new_spec, &[s1, s2]); + assert_eq!(deps, vec!["S001".to_string()], "only S001 shares an artifact"); + } + + /// New spec with no file-path artifacts → no implicit deps regardless. + #[test] + fn new_spec_no_artifacts_no_warn() { + let in_flight = make_spec("S001", &[], &["create src/foo.rs"]); + let new_spec = make_spec("new:empty", &[], &["run some commands without file paths"]); + let deps = detect_implicit_deps(&new_spec, &[in_flight]); + assert!( + deps.is_empty(), + "new spec without file-path artifacts should not trigger implicit dep" + ); + } +} diff --git a/src/cli/dispatch_many.rs b/src/cli/dispatch_many.rs new file mode 100644 index 0000000..87cd0f1 --- /dev/null +++ b/src/cli/dispatch_many.rs @@ -0,0 +1,398 @@ +use std::collections::HashMap; +use std::io::BufRead; +use std::path::PathBuf; + +use crate::cli::plan::{ + build_dag, critique_dag, load_extra_spec_files, load_in_flight_specs, render_dag_text, + Concern, DagError, Severity, SpecDag, +}; +use crate::fmt::ensure_db_dir; +use crate::{hooks, queue, spec}; +use serde_json::json; + +// ───────────────────────────────────────────────────────────────────────────── +// Pure helpers (testable without I/O) +// ───────────────────────────────────────────────────────────────────────────── + +/// Returns true when concerns contain at least one block-level entry. +/// `--force` does NOT override blocks; only the user can resolve them. +pub fn has_block(concerns: &[Concern]) -> bool { + concerns.iter().any(|c| c.severity == Severity::Block) +} + +/// Build the `--after` chain for a set of specs in topological order. +/// +/// `id_map` maps plan IDs (e.g. `"new:foo"` or `"S0ABC"`) to the actual queue +/// IDs that were assigned when each spec was dispatched. For in-flight specs +/// the plan ID is already the queue ID. +/// +/// Returns a map: plan_id → comma-separated after string (empty if no deps). +pub fn compute_after_chain( + dag: &SpecDag, + order: &[String], + id_map: &HashMap, +) -> HashMap { + let mut result = HashMap::new(); + for plan_id in order { + let node = match dag.nodes.get(plan_id) { + Some(n) => n, + None => continue, + }; + let after_ids: Vec = node + .all_deps() + .filter_map(|dep| id_map.get(dep).filter(|s| !s.is_empty())) + .cloned() + .collect(); + result.insert(plan_id.clone(), after_ids.join(",")); + } + result +} + +// ───────────────────────────────────────────────────────────────────────────── +// Single-spec dispatch helper +// ───────────────────────────────────────────────────────────────────────────── + +fn enqueue_spec( + spec_path: &PathBuf, + after: Option<&str>, + priority: i64, + mode: Option<&str>, + max_iter: i64, + timeout: u32, + project: Option<&str>, + db_str: &str, + hook_cfg: &hooks::HookConfig, +) -> Result { + let content = std::fs::read_to_string(spec_path) + .map_err(|e| format!("cannot read {:?}: {}", spec_path, e))?; + + let boi_spec = spec::parse(&content) + .map_err(|e| format!("spec parse failed: {}", e))?; + + ensure_db_dir(db_str); + let q = queue::Queue::open(db_str) + .map_err(|e| format!("cannot open queue: {}", e))?; + + let spec_path_str = spec_path.to_str().unwrap_or(""); + let spec_id = q + .enqueue(&boi_spec, Some(spec_path_str)) + .map_err(|e| format!("enqueue failed: {}", e))?; + + let timeout_secs = if timeout != 30 { + Some(timeout as i64 * 60) + } else { + None + }; + let _ = q.set_spec_fields( + &spec_id, + mode, + if max_iter != 30 { Some(max_iter) } else { None }, + project, + timeout_secs, + ); + + if priority != 100 { + let _ = q.set_priority(&spec_id, priority); + } + + if let Some(dep) = after { + if !dep.is_empty() { + let _ = q.set_depends_on(&spec_id, dep); + } + } + + let payload = json!({ + "spec_id": spec_id, + "title": boi_spec.title, + "spec_path": spec_path_str, + }); + let _ = hooks::fire(hook_cfg, hooks::ON_DISPATCH, &payload); + + Ok(spec_id) +} + +// ───────────────────────────────────────────────────────────────────────────── +// boi dispatch-many command +// ───────────────────────────────────────────────────────────────────────────── + +#[allow(clippy::too_many_arguments)] +pub fn cmd_dispatch_many( + spec_paths: &[PathBuf], + yes: bool, + force: bool, + priority: i64, + mode: Option<&str>, + max_iter: i64, + timeout: u32, + project: Option<&str>, + db_str: &str, + hook_cfg: &hooks::HookConfig, +) -> i32 { + if spec_paths.is_empty() { + eprintln!("error: dispatch-many requires at least one spec file"); + return 1; + } + + // 1. Load in-flight specs + new spec files + let in_flight = load_in_flight_specs(db_str); + let new_specs = load_extra_spec_files(spec_paths); + + let mut all_specs = in_flight; + all_specs.extend(new_specs); + + // 2. Build DAG — refuse loudly on cycle + let dag = match build_dag(&all_specs) { + Ok(d) => d, + Err(DagError::Cycle(ids)) => { + eprintln!("ERROR: dependency cycle detected: {}", ids.join(", ")); + eprintln!("Fix the cycle before dispatching."); + return 1; + } + }; + + let order = dag.topological_sort().expect("cycle already checked"); + + // 3. Render and print proposed order + let dag_text = render_dag_text(&dag, &order); + println!("{dag_text}"); + + // 4. LLM critique + let concerns = critique_dag(&dag_text, &dag, &order, false); + + // 5. Print concerns + if concerns.is_empty() { + println!("LLM critique: no concerns."); + } else { + println!("LLM critique:"); + for c in &concerns { + let label = match c.severity { + Severity::Block => "[BLOCK]", + Severity::Warn => "[WARN] ", + Severity::Info => "[INFO] ", + }; + println!(" {label} {}", c.description); + if let Some(fix) = &c.fix { + println!(" Fix: {fix}"); + } + } + } + println!(); + + // 6. Refuse on block-severity (force cannot override blocks) + if has_block(&concerns) { + eprintln!("Blocking concerns found — resolve before dispatching."); + eprintln!("(--force overrides warns, not blocks)"); + return 1; + } + + // 7. Prompt unless --yes / --force suppresses it + let has_warn_concern = concerns.iter().any(|c| c.severity == Severity::Warn); + if !yes && !force { + let prompt_msg = if has_warn_concern { + "Proceed with dispatch despite warnings? [y/N]: " + } else { + "Dispatch in the order above? [y/N]: " + }; + eprint!("{prompt_msg}"); + let stdin = std::io::stdin(); + let mut input = String::new(); + let approved = stdin + .lock() + .read_line(&mut input) + .is_ok() + && input.trim().eq_ignore_ascii_case("y"); + if !approved { + eprintln!("Aborted."); + return 0; + } + } else if force && has_warn_concern { + println!("--force: proceeding despite warn-level concerns."); + } + + // 8. Dispatch in topological order with correct --after chain + // Map plan_id ("new:") → original PathBuf + let path_index: HashMap = spec_paths + .iter() + .filter_map(|p| { + let stem = p.file_stem()?.to_str()?.to_string(); + Some((format!("new:{stem}"), p)) + }) + .collect(); + + // Track dispatched plan_id → queue_id so later specs can reference deps + let mut id_map: HashMap = HashMap::new(); + + for plan_id in &order { + if !plan_id.starts_with("new:") { + // Already in-flight; its plan_id IS the queue_id + id_map.insert(plan_id.clone(), plan_id.clone()); + continue; + } + + let path = match path_index.get(plan_id) { + Some(p) => p, + None => { + eprintln!("warn: no path found for {plan_id}, skipping"); + continue; + } + }; + + // Collect queue IDs of deps that were dispatched in this run + let node = match dag.nodes.get(plan_id) { + Some(n) => n, + None => continue, + }; + let after_ids: Vec = node + .all_deps() + .filter_map(|dep| id_map.get(dep).filter(|s| !s.is_empty())) + .cloned() + .collect(); + let after_str = if after_ids.is_empty() { + None + } else { + Some(after_ids.join(",")) + }; + + match enqueue_spec( + path, + after_str.as_deref(), + priority, + mode, + max_iter, + timeout, + project, + db_str, + hook_cfg, + ) { + Ok(queue_id) => { + let after_display = after_str.as_deref().unwrap_or("(none)"); + println!("dispatched: {plan_id} → {queue_id} --after {after_display}"); + id_map.insert(plan_id.clone(), queue_id); + } + Err(e) => { + eprintln!("error: failed to dispatch {plan_id}: {e}"); + return 1; + } + } + } + + 0 +} + +// ───────────────────────────────────────────────────────────────────────────── +// Tests +// ───────────────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod dispatch_many { + use super::*; + use crate::cli::plan::{build_dag, SpecInfo}; + + fn make_spec(id: &str, title: &str, depends_on: &[&str], texts: &[&str]) -> SpecInfo { + SpecInfo { + id: id.to_string(), + title: title.to_string(), + depends_on: depends_on.iter().map(|s| s.to_string()).collect(), + task_texts: texts + .iter() + .map(|t| (Some(t.to_string()), None)) + .collect(), + } + } + + fn pos_map(order: &[String]) -> HashMap<&str, usize> { + order + .iter() + .enumerate() + .map(|(i, s)| (s.as_str(), i)) + .collect() + } + + /// Three-spec chain: S2 implicitly depends on S1 (shared file path), + /// S3 explicitly depends on S2. dispatch-many must produce the right + /// --after chain: S2 → S001, S3 → S002. + #[test] + fn three_spec_implicit_then_explicit_after_chain() { + let s1 = make_spec("S001", "Write foo", &[], &["Create src/foo.rs"]); + let s2 = make_spec("S002", "Process foo", &[], &["Read src/foo.rs"]); + let s3 = make_spec("S003", "Finalize", &["S002"], &[]); + + let dag = build_dag(&[s1, s2, s3]).unwrap(); + let order = dag.topological_sort().unwrap(); + let pos = pos_map(&order); + + assert!(pos["S001"] < pos["S002"], "S001 must precede S002"); + assert!(pos["S002"] < pos["S003"], "S002 must precede S003"); + + // Simulate dispatch with identity id_map (plan_id == queue_id) + let id_map: HashMap = + order.iter().map(|id| (id.clone(), id.clone())).collect(); + let chain = compute_after_chain(&dag, &order, &id_map); + + assert!( + chain.get("S001").map(|s| s.is_empty()).unwrap_or(true), + "S001 should have no --after, got {:?}", + chain.get("S001") + ); + assert_eq!(chain["S002"], "S001", "S002 should be after S001"); + assert_eq!(chain["S003"], "S002", "S003 should be after S002"); + } + + /// Cycle in declared deps must be detected and refused. + #[test] + fn cycle_causes_refusal() { + let s1 = make_spec("S001", "A", &["S002"], &[]); + let s2 = make_spec("S002", "B", &["S001"], &[]); + assert!( + matches!(build_dag(&[s1, s2]), Err(DagError::Cycle(_))), + "cycle should be detected and returned as DagError::Cycle" + ); + } + + /// --force can override warns but NOT blocks. + /// We test `has_block` directly since it encodes the gate logic. + #[test] + fn force_overrides_warn_not_block() { + let warn_concerns = vec![Concern { + severity: Severity::Warn, + description: "suboptimal ordering".into(), + fix: None, + }]; + let block_concerns = vec![Concern { + severity: Severity::Block, + description: "wrong ordering will cause data loss".into(), + fix: None, + }]; + + // Warns never block dispatch (even without --force) + assert!( + !has_block(&warn_concerns), + "warn-only concerns should not block" + ); + // Blocks remain regardless of --force (caller checks has_block before respecting the flag) + assert!( + has_block(&block_concerns), + "block concern must still block even if force=true" + ); + } + + /// compute_after_chain returns empty string for root specs (no deps). + #[test] + fn root_specs_have_no_after() { + let s1 = make_spec("S001", "Root A", &[], &[]); + let s2 = make_spec("S002", "Root B", &[], &[]); + + let dag = build_dag(&[s1, s2]).unwrap(); + let order = dag.topological_sort().unwrap(); + let id_map: HashMap = + order.iter().map(|id| (id.clone(), id.clone())).collect(); + let chain = compute_after_chain(&dag, &order, &id_map); + + for id in &order { + assert!( + chain.get(id).map(|s| s.is_empty()).unwrap_or(true), + "{id} should have empty --after (no deps)" + ); + } + } +} diff --git a/src/cli/mod.rs b/src/cli/mod.rs index e9093de..ef2d7af 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -3,11 +3,14 @@ pub mod cancel; pub mod config_cmd; pub mod daemon; pub mod dispatch; +pub mod dispatch_many; pub mod doctor; pub mod log; pub mod outputs; pub mod phases_cmd; +pub mod plan; pub mod spec_mgmt; pub mod status; pub mod telemetry_cmd; +pub mod why; pub mod workers; diff --git a/src/cli/plan.rs b/src/cli/plan.rs new file mode 100644 index 0000000..7fc82ba --- /dev/null +++ b/src/cli/plan.rs @@ -0,0 +1,806 @@ +use std::collections::{HashMap, HashSet, VecDeque}; +use std::path::PathBuf; +use std::time::Duration; + +use crate::queue::{FullTaskRecord, SpecRecord}; + +/// Lightweight view of a spec used for DAG analysis. +/// Constructed from DB records for in-flight/queued specs, or from test fixtures. +#[derive(Debug, Clone)] +pub struct SpecInfo { + pub id: String, + pub title: String, + /// Explicit spec-level dependencies. The DB stores a single spec ID in + /// `depends_on`; we also accept a comma-separated list so callers can + /// express multi-dep cases without changing the schema. + pub depends_on: Vec, + /// (spec_content, verify_content) from each task in this spec. + pub task_texts: Vec<(Option, Option)>, +} + +impl SpecInfo { + pub fn from_db(spec: &SpecRecord, tasks: &[FullTaskRecord]) -> Self { + let depends_on = parse_depends_on(spec.depends_on.as_deref().unwrap_or("")); + SpecInfo { + id: spec.id.clone(), + title: spec.title.clone(), + depends_on, + task_texts: tasks + .iter() + .map(|t| (t.spec_content.clone(), t.verify_content.clone())) + .collect(), + } + } +} + +fn parse_depends_on(s: &str) -> Vec { + s.split(',') + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(String::from) + .collect() +} + +/// A node in the spec-level DAG. +#[derive(Debug, Clone)] +pub struct DagNode { + pub spec_id: String, + pub title: String, + /// Deps declared via --after / depends_on column. + pub explicit_deps: Vec, + /// Deps inferred from artifact (file-path) overlap between specs. + pub implicit_deps: Vec, +} + +impl DagNode { + pub fn all_deps(&self) -> impl Iterator { + self.explicit_deps + .iter() + .chain(self.implicit_deps.iter()) + .map(String::as_str) + } +} + +/// The spec-level dependency graph. +#[derive(Debug)] +pub struct SpecDag { + pub nodes: HashMap, +} + +#[derive(Debug)] +pub enum DagError { + Cycle(Vec), +} + +impl std::fmt::Display for DagError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + DagError::Cycle(ids) => { + write!(f, "cycle detected involving specs: {}", ids.join(", ")) + } + } + } +} + +impl std::error::Error for DagError {} + +/// Extracts file-path-like strings from a spec's task text fields. +pub fn collect_artifacts(spec: &SpecInfo) -> Vec { + let mut paths: HashSet = HashSet::new(); + for (spec_content, verify_content) in &spec.task_texts { + for text in [spec_content.as_deref(), verify_content.as_deref()] + .into_iter() + .flatten() + { + extract_paths(text, &mut paths); + } + } + paths.into_iter().collect() +} + +fn extract_paths(text: &str, out: &mut HashSet) { + for word in text.split_whitespace() { + let word = word.trim_matches(|c: char| { + matches!( + c, + '"' | '\'' | '`' | '(' | ')' | '[' | ']' | '{' | '}' | ',' | ';' | ':' + ) + }); + if is_likely_path(word) { + out.insert(PathBuf::from(word)); + } + } +} + +fn is_likely_path(s: &str) -> bool { + if s.len() < 3 || !s.contains('/') { + return false; + } + s.starts_with('/') + || s.starts_with("./") + || s.starts_with("../") + || s.starts_with("~/") + || s.starts_with("src/") + || s.starts_with("docs/") + || s.starts_with("tests/") +} + +/// Returns IDs of specs in `in_flight` whose artifact set overlaps with +/// `new_spec`'s artifacts, indicating an implicit dependency. +/// +/// Skips any in-flight spec that explicitly declares a dep on `new_spec` — +/// that means the overlap runs in the other direction (in-flight is downstream), +/// so adding a reverse edge would create a spurious cycle. +pub fn detect_implicit_deps(new_spec: &SpecInfo, in_flight: &[SpecInfo]) -> Vec { + let new_artifacts: HashSet = collect_artifacts(new_spec).into_iter().collect(); + if new_artifacts.is_empty() { + return vec![]; + } + in_flight + .iter() + .filter(|s| { + // Don't add A→B if B already declares A as a dep (avoids contradiction cycles). + if s.depends_on.contains(&new_spec.id) { + return false; + } + let their: HashSet = collect_artifacts(s).into_iter().collect(); + !their.is_disjoint(&new_artifacts) + }) + .map(|s| s.id.clone()) + .collect() +} + +/// Build a spec-level DAG from the supplied specs. +/// +/// Edges come from: +/// 1. Explicit `depends_on` declarations (filtered to specs present in the set). +/// 2. Implicit artifact overlap detected by `detect_implicit_deps`. +/// +/// Returns `Err(DagError::Cycle(...))` if a cycle is detected. +pub fn build_dag(specs: &[SpecInfo]) -> Result { + let known_ids: HashSet<&str> = specs.iter().map(|s| s.id.as_str()).collect(); + + // First pass: create nodes with explicit deps (restricted to known IDs). + let mut nodes: HashMap = specs + .iter() + .map(|spec| { + let explicit_deps: Vec = spec + .depends_on + .iter() + .filter(|d| known_ids.contains(d.as_str())) + .cloned() + .collect(); + ( + spec.id.clone(), + DagNode { + spec_id: spec.id.clone(), + title: spec.title.clone(), + explicit_deps, + implicit_deps: vec![], + }, + ) + }) + .collect(); + + // Second pass: detect implicit deps from artifact overlap. + // We treat specs earlier in the slice as potential producers ("in-flight") and + // each spec as a potential consumer ("new"), matching the directional semantics + // of detect_implicit_deps. This avoids spurious symmetric cycles when two specs + // share the same path string. + for i in 1..specs.len() { + let new_spec = &specs[i]; + let predecessors: Vec = specs[..i].iter().cloned().collect(); + + let implicit = detect_implicit_deps(new_spec, &predecessors); + if let Some(node) = nodes.get_mut(&new_spec.id) { + for dep in implicit { + if !node.explicit_deps.contains(&dep) && !node.implicit_deps.contains(&dep) { + node.implicit_deps.push(dep); + } + } + } + } + + let dag = SpecDag { nodes }; + // Validate — errors loudly on cycles. + dag.topological_sort()?; + Ok(dag) +} + +impl SpecDag { + /// Returns spec IDs in topological order (dependencies before dependents). + /// Errors if a cycle is present. + pub fn topological_sort(&self) -> Result, DagError> { + let mut in_degree: HashMap<&str, usize> = + self.nodes.keys().map(|k| (k.as_str(), 0usize)).collect(); + + let mut adj: HashMap<&str, Vec<&str>> = + self.nodes.keys().map(|k| (k.as_str(), vec![])).collect(); + + for (id, node) in &self.nodes { + for dep in node.all_deps() { + // Skip deps that are outside this DAG (already-completed specs). + if !self.nodes.contains_key(dep) { + continue; + } + adj.get_mut(dep).expect("dep in nodes").push(id.as_str()); + *in_degree.get_mut(id.as_str()).expect("id in nodes") += 1; + } + } + + let mut queue: VecDeque<&str> = in_degree + .iter() + .filter(|(_, &d)| d == 0) + .map(|(&id, _)| id) + .collect(); + + let mut order: Vec = Vec::with_capacity(self.nodes.len()); + while let Some(id) = queue.pop_front() { + order.push(id.to_string()); + for &dependent in &adj[id] { + let deg = in_degree.get_mut(dependent).expect("dep in in_degree"); + *deg -= 1; + if *deg == 0 { + queue.push_back(dependent); + } + } + } + + if order.len() != self.nodes.len() { + let cyclic: Vec = in_degree + .iter() + .filter(|(_, &d)| d > 0) + .map(|(&id, _)| id.to_string()) + .collect(); + return Err(DagError::Cycle(cyclic)); + } + + Ok(order) + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Concern types +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug, PartialEq, Clone)] +pub enum Severity { + Block, + Warn, + Info, +} + +impl std::fmt::Display for Severity { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Severity::Block => write!(f, "block"), + Severity::Warn => write!(f, "warn"), + Severity::Info => write!(f, "info"), + } + } +} + +#[derive(Debug, Clone)] +pub struct Concern { + pub severity: Severity, + pub description: String, + pub fix: Option, +} + +// ───────────────────────────────────────────────────────────────────────────── +// DB loading +// ───────────────────────────────────────────────────────────────────────────── + +pub fn load_in_flight_specs(db_str: &str) -> Vec { + use crate::queue::Queue; + + let q = match Queue::open(db_str) { + Ok(q) => q, + Err(e) => { + eprintln!("warn: cannot open DB at {db_str}: {e}"); + return vec![]; + } + }; + + let all = match q.status_all() { + Ok(recs) => recs, + Err(e) => { + eprintln!("warn: cannot query specs: {e}"); + return vec![]; + } + }; + + all.iter() + .filter(|s| s.status == "queued" || s.status == "running") + .filter_map(|spec| { + let tasks = q.get_tasks_full(&spec.id).ok()?; + Some(SpecInfo::from_db(spec, &tasks)) + }) + .collect() +} + +// ───────────────────────────────────────────────────────────────────────────── +// Extra spec file loading +// ───────────────────────────────────────────────────────────────────────────── + +pub fn load_extra_spec_files(paths: &[PathBuf]) -> Vec { + paths + .iter() + .filter_map(|path| { + let content = match std::fs::read_to_string(path) { + Ok(c) => c, + Err(e) => { + eprintln!("warn: cannot read {:?}: {}", path, e); + return None; + } + }; + let boi_spec = match crate::spec::parse(&content) { + Ok(s) => s, + Err(e) => { + eprintln!("warn: cannot parse {:?}: {}", path, e); + return None; + } + }; + let id = path + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("unknown") + .to_string(); + + let task_texts: Vec<(Option, Option)> = boi_spec + .tasks + .iter() + .map(|t| (t.spec.clone(), t.verify.clone())) + .collect(); + + Some(SpecInfo { + id: format!("new:{id}"), + title: boi_spec.title, + depends_on: vec![], + task_texts, + }) + }) + .collect() +} + +// ───────────────────────────────────────────────────────────────────────────── +// DAG rendering +// ───────────────────────────────────────────────────────────────────────────── + +pub fn render_dag_text(dag: &SpecDag, order: &[String]) -> String { + let mut out = String::new(); + + out.push_str(&format!( + "SPEC DAG ({} spec{})\n", + dag.nodes.len(), + if dag.nodes.len() == 1 { "" } else { "s" } + )); + out.push_str(&"─".repeat(50)); + out.push('\n'); + + for id in order { + let node = match dag.nodes.get(id) { + Some(n) => n, + None => continue, + }; + + let mut dep_parts: Vec = node + .explicit_deps + .iter() + .map(|d| format!("{d} (explicit)")) + .collect(); + dep_parts.extend(node.implicit_deps.iter().map(|d| format!("{d} (artifact)"))); + + if dep_parts.is_empty() { + out.push_str(&format!(" {id} \"{}\"\n", node.title)); + } else { + out.push_str(&format!( + " {id} \"{}\" → after {}\n", + node.title, + dep_parts.join(", ") + )); + } + } + + out.push('\n'); + out.push_str("Proposed execution order:\n"); + for (i, id) in order.iter().enumerate() { + let node = match dag.nodes.get(id) { + Some(n) => n, + None => continue, + }; + + let all_deps: Vec<&str> = node + .explicit_deps + .iter() + .chain(node.implicit_deps.iter()) + .map(String::as_str) + .collect(); + + if all_deps.is_empty() { + out.push_str(&format!(" {}. {id} \"{}\"\n", i + 1, node.title)); + } else { + out.push_str(&format!( + " {}. {id} \"{}\" --after {}\n", + i + 1, + node.title, + all_deps.join(",") + )); + } + } + + out +} + +// ───────────────────────────────────────────────────────────────────────────── +// LLM critique +// ───────────────────────────────────────────────────────────────────────────── + +fn build_critique_prompt(dag_text: &str) -> String { + format!( + r#"You are reviewing a BOI (Beginning of Infinity) spec DAG — a set of automated agent specs with dependency edges. + +{dag_text} + +Critique the DAG: +1. Are there specs that should depend on each other but don't (missing edges)? +2. Are specs wrongly serialized when they could run in parallel? +3. Do any specs have overlapping or contradicting scope? + +Output ONLY a list of concerns in this exact format (one concern per pair of lines): +CONCERN []: +FIX: + +Where is one of: block, warn, info + block = wrong ordering that will cause failures or data corruption + warn = suboptimal but unlikely to break things + info = observation, no action required + +If there are no concerns, output exactly: NONE +"# + ) +} + +/// FNV-1a 64-bit hash — deterministic across runs. +fn stable_hash(s: &str) -> u64 { + let mut h: u64 = 14695981039346656037; + for b in s.bytes() { + h ^= b as u64; + h = h.wrapping_mul(1099511628211); + } + h +} + +fn cache_dir() -> PathBuf { + let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); + PathBuf::from(home).join(".boi").join("plan-cache") +} + +fn load_cache(hash: u64) -> Option { + let path = cache_dir().join(format!("{hash:016x}.txt")); + std::fs::read_to_string(path).ok() +} + +fn save_cache(hash: u64, text: &str) { + let dir = cache_dir(); + let _ = std::fs::create_dir_all(&dir); + let path = dir.join(format!("{hash:016x}.txt")); + let _ = std::fs::write(path, text); +} + +fn call_llm_critique(dag_text: &str, hash: u64) -> String { + use crate::runtime::openrouter::OpenRouterRuntime; + use crate::runtime::PhaseRuntime; + + let rt = OpenRouterRuntime::new(); + let prompt = build_critique_prompt(dag_text); + + match rt.execute(&prompt, "haiku", Duration::from_secs(60)) { + Ok(out) => { + let text = out.text.trim().to_string(); + save_cache(hash, &text); + text + } + Err(e) => { + eprintln!("warn: LLM critique unavailable: {e}"); + eprintln!("hint: set OPENROUTER_API_KEY to enable automatic DAG critique"); + "LLM_UNAVAILABLE".to_string() + } + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Concern parsing +// ───────────────────────────────────────────────────────────────────────────── + +pub fn parse_concerns(text: &str) -> Vec { + let trimmed = text.trim(); + if trimmed == "NONE" || trimmed == "LLM_UNAVAILABLE" || trimmed.is_empty() { + return vec![]; + } + + let mut concerns: Vec = vec![]; + let mut current: Option = None; + + for line in text.lines() { + let line = line.trim(); + + if let Some(rest) = line.strip_prefix("CONCERN [") { + if let Some(c) = current.take() { + concerns.push(c); + } + if let Some((sev_str, desc)) = rest.split_once("]:") { + let severity = match sev_str.trim().to_lowercase().as_str() { + "block" => Severity::Block, + "warn" | "warning" => Severity::Warn, + _ => Severity::Info, + }; + current = Some(Concern { + severity, + description: desc.trim().to_string(), + fix: None, + }); + } + } else if let Some(fix_text) = line.strip_prefix("FIX:") { + if let Some(ref mut c) = current { + c.fix = Some(fix_text.trim().to_string()); + } + } + } + + if let Some(c) = current { + concerns.push(c); + } + + concerns +} + +// ───────────────────────────────────────────────────────────────────────────── +// Public critique helper (used by dispatch-many) +// ───────────────────────────────────────────────────────────────────────────── + +/// Run the LLM critique on an already-rendered DAG text and return parsed concerns. +/// +/// Uses the persistent cache keyed by DAG topology + titles. Pass +/// `force_refresh = true` to bypass the cache. +pub fn critique_dag( + dag_text: &str, + dag: &SpecDag, + order: &[String], + force_refresh: bool, +) -> Vec { + let cache_input = { + let mut parts: Vec = order + .iter() + .map(|id| { + let node = &dag.nodes[id]; + let mut deps = node.explicit_deps.clone(); + deps.extend_from_slice(&node.implicit_deps); + deps.sort(); + format!("{id}:{};deps={}", node.title, deps.join(",")) + }) + .collect(); + parts.sort(); + parts.join("|") + }; + let hash = stable_hash(&cache_input); + + let critique_text = if !force_refresh { + match load_cache(hash) { + Some(cached) => { + eprintln!("(using cached LLM critique)"); + cached + } + None => call_llm_critique(dag_text, hash), + } + } else { + call_llm_critique(dag_text, hash) + }; + + parse_concerns(&critique_text) +} + +// ───────────────────────────────────────────────────────────────────────────── +// boi plan command +// ───────────────────────────────────────────────────────────────────────────── + +/// Run `boi plan`: build DAG from in-flight specs + optional new spec files, +/// run LLM critique, and print proposed dispatch order. +/// +/// Returns 0 on clean/warn, 1 on cycle or block-severity concern. +pub fn cmd_plan(extra_spec_paths: &[PathBuf], db_str: &str, force_refresh: bool) -> i32 { + let mut specs = load_in_flight_specs(db_str); + specs.extend(load_extra_spec_files(extra_spec_paths)); + + if specs.is_empty() { + println!("No in-flight specs and no spec files provided — nothing to plan."); + return 0; + } + + let dag = match build_dag(&specs) { + Ok(d) => d, + Err(DagError::Cycle(ids)) => { + eprintln!("ERROR: cycle detected in spec DAG: {}", ids.join(", ")); + eprintln!("Fix the dependency cycle before dispatching."); + return 1; + } + }; + + // topological_sort is guaranteed to succeed after build_dag succeeds + let order = dag.topological_sort().expect("cycle already checked"); + + let dag_text = render_dag_text(&dag, &order); + println!("{dag_text}"); + + let concerns = critique_dag(&dag_text, &dag, &order, force_refresh); + let has_block = concerns.iter().any(|c| c.severity == Severity::Block); + + if concerns.is_empty() { + println!("LLM critique: no concerns.\n"); + } else { + println!("LLM critique:"); + for c in &concerns { + let label = match c.severity { + Severity::Block => "[BLOCK]", + Severity::Warn => "[WARN] ", + Severity::Info => "[INFO] ", + }; + println!(" {label} {}", c.description); + if let Some(fix) = &c.fix { + println!(" Fix: {fix}"); + } + } + println!(); + } + + if has_block { + eprintln!("Blocking concerns found — resolve before dispatching."); + eprintln!("Use --force in dispatch-many to override warns (not blocks)."); + return 1; + } + + 0 +} + +#[cfg(test)] +mod dag_build { + use super::*; + + fn spec(id: &str, title: &str, depends_on: &[&str], texts: &[&str]) -> SpecInfo { + SpecInfo { + id: id.to_string(), + title: title.to_string(), + depends_on: depends_on.iter().map(|s| s.to_string()).collect(), + task_texts: texts + .iter() + .map(|t| (Some(t.to_string()), None)) + .collect(), + } + } + + fn pos_map(order: &[String]) -> HashMap<&str, usize> { + order.iter().enumerate().map(|(i, s)| (s.as_str(), i)).collect() + } + + #[test] + fn empty_queue() { + let dag = build_dag(&[]).unwrap(); + assert!(dag.nodes.is_empty()); + assert!(dag.topological_sort().unwrap().is_empty()); + } + + #[test] + fn single_spec() { + let dag = build_dag(&[spec("S001", "One", &[], &[])]).unwrap(); + assert_eq!(dag.nodes.len(), 1); + assert_eq!(dag.topological_sort().unwrap(), vec!["S001"]); + } + + #[test] + fn two_spec_chain() { + let specs = [ + spec("S001", "First", &[], &[]), + spec("S002", "Second", &["S001"], &[]), + ]; + let dag = build_dag(&specs).unwrap(); + let order = dag.topological_sort().unwrap(); + let pos = pos_map(&order); + assert!(pos["S001"] < pos["S002"]); + } + + #[test] + fn fan_out() { + let specs = [ + spec("S001", "Root", &[], &[]), + spec("S002", "Branch A", &["S001"], &[]), + spec("S003", "Branch B", &["S001"], &[]), + ]; + let dag = build_dag(&specs).unwrap(); + let order = dag.topological_sort().unwrap(); + let pos = pos_map(&order); + assert!(pos["S001"] < pos["S002"]); + assert!(pos["S001"] < pos["S003"]); + } + + #[test] + fn diamond() { + let specs = [ + spec("S001", "Root", &[], &[]), + spec("S002", "Left", &["S001"], &[]), + spec("S003", "Right", &["S001"], &[]), + spec("S004", "Merge", &["S002", "S003"], &[]), + ]; + let dag = build_dag(&specs).unwrap(); + let order = dag.topological_sort().unwrap(); + let pos = pos_map(&order); + assert!(pos["S001"] < pos["S002"]); + assert!(pos["S001"] < pos["S003"]); + assert!(pos["S002"] < pos["S004"]); + assert!(pos["S003"] < pos["S004"]); + } + + #[test] + fn cycle_detection() { + let specs = [ + spec("S001", "A", &["S002"], &[]), + spec("S002", "B", &["S001"], &[]), + ]; + assert!(matches!(build_dag(&specs), Err(DagError::Cycle(_)))); + } + + #[test] + fn implicit_dep_detection() { + // S002 mentions the same path as S001 — should detect S001 as an implicit dep. + let path = "src/cli/plan.rs"; + let s1 = spec("S001", "Write plan.rs", &[], &[&format!("Create {path}")]); + let s2 = spec("S002", "Use plan.rs", &[], &[&format!("Read {path}")]); + let implicit = detect_implicit_deps(&s2, &[s1]); + assert!( + implicit.contains(&"S001".to_string()), + "expected S001 in implicit deps, got {:?}", + implicit + ); + } + + #[test] + fn implicit_dep_wired_into_dag() { + // Same as above but verify build_dag captures the implicit edge. + let path = "src/cli/plan.rs"; + let s1 = spec("S001", "Write plan.rs", &[], &[&format!("Create {path}")]); + let s2 = spec("S002", "Use plan.rs", &[], &[&format!("Read {path}")]); + let dag = build_dag(&[s1, s2]).unwrap(); + let order = dag.topological_sort().unwrap(); + let pos = pos_map(&order); + assert!( + pos["S001"] < pos["S002"], + "expected S001 before S002 via implicit dep, order={:?}", + order + ); + } + + #[test] + fn no_false_implicit_deps_on_empty_artifacts() { + // Specs with no recognizable paths should produce no implicit deps. + let s1 = spec("S001", "Alpha", &[], &["do some work"]); + let s2 = spec("S002", "Beta", &[], &["do other work"]); + let implicit = detect_implicit_deps(&s2, &[s1]); + assert!(implicit.is_empty()); + } + + #[test] + fn collect_artifacts_recognizes_paths() { + let s = SpecInfo { + id: "X".into(), + title: "t".into(), + depends_on: vec![], + task_texts: vec![( + Some("Edit src/cli/plan.rs and ~/config.toml".into()), + Some("cd /Users/mrap/boi && cargo test".into()), + )], + }; + let artifacts = collect_artifacts(&s); + let strs: Vec<&str> = artifacts.iter().map(|p| p.to_str().unwrap()).collect(); + assert!(strs.iter().any(|s| *s == "src/cli/plan.rs"), "{:?}", strs); + assert!(strs.iter().any(|s| *s == "~/config.toml"), "{:?}", strs); + } +} diff --git a/src/cli/status.rs b/src/cli/status.rs index 0cb4f73..ea4820f 100644 --- a/src/cli/status.rs +++ b/src/cli/status.rs @@ -1,5 +1,6 @@ use crate::cli::daemon::{daemon_heartbeat_path, is_daemon_locked}; use crate::config; +use crate::failure::{truncate_display, FailureReason}; use crate::fmt::{ display_width, elapsed_since, ensure_db_dir, progress_bar, term_width, time_ago, truncate, BOLD, CYAN, DIM, GREEN, RED, RESET, YELLOW, @@ -7,6 +8,36 @@ use crate::fmt::{ use crate::queue; use serde_json::json; +/// Render a single error line for a failed spec. +/// In normal mode: one DIM RED line with short_summary, truncated to terminal width. +/// In verbose mode: multi-line DIM RED detail block. +fn render_error_line(error_text: &str, verbose: bool, width: usize) -> String { + let reason = FailureReason::from_db(error_text); + if verbose { + let detail = reason.detail(); + let mut out = String::new(); + for line in detail.lines() { + out.push_str(&format!("{}{}{}{}{}\n", DIM, RED, " ", line, RESET)); + } + out + } else { + let summary = reason.short_summary(); + let prefix = " \u{2514}\u{2500} "; // " └─ " + let prefix_width: usize = 7; // 4 spaces + └ + ─ + space + let budget = width.saturating_sub(prefix_width); + let truncated = truncate_display(&summary, budget); + format!("{}{}{}{}{}\n", DIM, RED, prefix, truncated, RESET) + } +} + +/// Returns an error line for a failed spec's error column, or empty string if no error. +fn maybe_render_error(error: Option<&str>, verbose: bool, width: usize) -> String { + match error { + Some(e) if !e.is_empty() => render_error_line(e, verbose, width), + _ => String::new(), + } +} + pub fn render_single_spec(q: &queue::Queue, id: &str) -> String { match q.status(id) { Ok(Some(st)) => { @@ -69,7 +100,7 @@ pub fn render_single_spec(q: &queue::Queue, id: &str) -> String { } } -fn render_status(spec_id: Option<&str>, all: bool, db_str: &str) -> String { +fn render_status(spec_id: Option<&str>, all: bool, verbose: bool, db_str: &str) -> String { ensure_db_dir(db_str); let daemon_running = is_daemon_locked(); @@ -113,7 +144,7 @@ fn render_status(spec_id: Option<&str>, all: bool, db_str: &str) -> String { let queued: Vec<&queue::SpecRecord> = specs.iter().filter(|s| s.status == "queued").collect(); let six_hours_ago = chrono::Utc::now() - chrono::Duration::hours(6); - let finished: Vec<&queue::SpecRecord> = specs + let mut finished: Vec<&queue::SpecRecord> = specs .iter() .filter(|s| { (s.status == "completed" || s.status == "failed" || s.status == "cancelled") @@ -125,6 +156,9 @@ fn render_status(spec_id: Option<&str>, all: bool, db_str: &str) -> String { })) }) .collect(); + // Sort recently-finished by completed_at DESC (most recent first). + // Specs without completed_at sink to the bottom. + finished.sort_by(|a, b| b.completed_at.cmp(&a.completed_at)); // Layout constants (display column widths, not byte widths) // "▸ sa7f3 " = icon(1) + space(1) + id(5) + gap(2) = 9 display cols before title @@ -269,6 +303,10 @@ fn render_status(spec_id: Option<&str>, all: bool, db_str: &str) -> String { " ".repeat(spaces), right, )); + + if s.status == "failed" { + out.push_str(&maybe_render_error(s.error.as_deref(), verbose, width)); + } } out.push('\n'); } @@ -353,15 +391,15 @@ fn render_status(spec_id: Option<&str>, all: bool, db_str: &str) -> String { out } -pub fn cmd_status(spec_id: Option<&str>, all: bool, db_str: &str) { - println!("{}", render_status(spec_id, all, db_str)); +pub fn cmd_status(spec_id: Option<&str>, all: bool, verbose: bool, db_str: &str) { + println!("{}", render_status(spec_id, all, verbose, db_str)); } -pub fn cmd_status_watch(spec_id: Option<&str>, all: bool, db_str: &str) { +pub fn cmd_status_watch(spec_id: Option<&str>, all: bool, verbose: bool, db_str: &str) { loop { // Clear screen print!("\x1b[2J\x1b[H"); - print!("{}", render_status(spec_id, all, db_str)); + print!("{}", render_status(spec_id, all, verbose, db_str)); let now = chrono::Utc::now().format("%H:%M:%S"); println!("\n{}Updated at {} — Ctrl+C to exit{}", DIM, now, RESET); std::thread::sleep(std::time::Duration::from_secs(2)); @@ -475,3 +513,73 @@ pub fn cmd_status_json(spec_id: Option<&str>, all: bool, db_str: &str) { .expect("json! macro output is always serializable") ); } + +#[cfg(test)] +mod tests { + use super::*; + + fn strip_ansi(s: &str) -> String { + let mut out = String::new(); + let mut chars = s.chars().peekable(); + while let Some(c) = chars.next() { + if c == '\x1b' { + for ch in chars.by_ref() { + if ch == 'm' { + break; + } + } + } else { + out.push(c); + } + } + out + } + + #[test] + fn status_render_error_no_error_returns_empty() { + let out = maybe_render_error(None, false, 80); + assert!(out.is_empty(), "None error should produce empty output, got: {:?}", out); + } + + #[test] + fn status_render_error_empty_string_returns_empty() { + let out = maybe_render_error(Some(""), false, 80); + assert!(out.is_empty(), "empty error should produce empty output, got: {:?}", out); + } + + #[test] + fn status_render_error_typed_error_shows_short_summary() { + let err = r#"{"ProviderRateLimit":{"provider":"anthropic","retry_after_s":null}}"#; + let out = render_error_line(err, false, 80); + let plain = strip_ansi(&out); + assert!(plain.contains("\u{2514}\u{2500}"), "should contain └─: {:?}", plain); + assert!(plain.contains("rate limited by anthropic"), "should show short summary: {:?}", plain); + } + + #[test] + fn status_render_error_long_error_truncated_with_ellipsis() { + let long_msg = "x".repeat(200); + let err = format!(r#"{{"Other":{{"message":"{}"}}}}"#, long_msg); + // Narrow terminal of 30 cols → prefix(7) + 23 cols for summary + let out = render_error_line(&err, false, 30); + let plain = strip_ansi(&out); + assert!( + plain.contains('\u{2026}'), + "should be truncated with ellipsis (…): {:?}", + plain + ); + } + + #[test] + fn status_render_error_verbose_shows_detail() { + let err = r#"{"ProviderHttp":{"provider":"anthropic","status":500,"body_excerpt":"internal server error"}}"#; + let out = render_error_line(err, true, 80); + let plain = strip_ansi(&out); + assert!(plain.contains("ProviderHttp"), "verbose should contain ProviderHttp: {:?}", plain); + assert!( + plain.contains("internal server error"), + "verbose should show body excerpt: {:?}", + plain + ); + } +} diff --git a/src/cli/why.rs b/src/cli/why.rs new file mode 100644 index 0000000..26261d8 --- /dev/null +++ b/src/cli/why.rs @@ -0,0 +1,37 @@ +use crate::failure::FailureReason; +use crate::fmt::ensure_db_dir; +use crate::queue; + +pub fn cmd_why(spec_id: &str, db_str: &str) { + ensure_db_dir(db_str); + let q = match queue::Queue::open(db_str) { + Ok(q) => q, + Err(e) => { + eprintln!("error: cannot open queue: {}", e); + std::process::exit(1); + } + }; + + let st = match q.status(spec_id) { + Ok(Some(s)) => s, + Ok(None) => { + eprintln!("error: spec '{}' not found", spec_id); + std::process::exit(1); + } + Err(e) => { + eprintln!("error: {}", e); + std::process::exit(1); + } + }; + + match st.spec.error.as_deref() { + Some(e) if !e.is_empty() => { + let reason = FailureReason::from_db(e); + println!("{}", reason.detail()); + } + _ => { + println!("No failure recorded for spec {}.", spec_id); + println!("Status: {}", st.spec.status); + } + } +} diff --git a/src/config.rs b/src/config.rs index 4ed3fc1..5b62a13 100644 --- a/src/config.rs +++ b/src/config.rs @@ -13,12 +13,37 @@ pub struct Paths { #[derive(Debug, Default, Deserialize, Serialize)] pub struct Config { pub max_workers: Option, + pub spawns_per_tick: Option, pub task_timeout_minutes: Option, pub retry_count: Option, pub cleanup_on_failure: Option, pub hooks: Option>, pub paths: Option, pub claude_bin: Option, + pub brain: Option, +} + +/// Resolve brain directory: spec-level overrides config-level, falls back to None. +pub fn resolve_brain( + spec_brain: Option<&PathBuf>, + config_brain: Option<&PathBuf>, +) -> Option { + spec_brain.or(config_brain).cloned() +} + +/// Validate that the brain path exists and contains CLAUDE.md. +pub fn validate_brain(path: &Path) -> Result<(), String> { + if !path.exists() { + return Err(format!("brain directory not found: {}", path.display())); + } + let claude_md = path.join("CLAUDE.md"); + if !claude_md.exists() { + return Err(format!( + "brain directory missing CLAUDE.md: {}", + path.display() + )); + } + Ok(()) } pub fn load() -> Config { @@ -26,6 +51,23 @@ pub fn load() -> Config { Config::load_from(&config_path) } +/// Fallible load — returns Err on parse failure rather than silently defaulting. +/// Used by SIGHUP hot-reload so a bad config file is a no-op instead of a reset. +pub fn try_load() -> Result { + try_load_from(&default_config_path()) +} + +pub fn try_load_from(path: &std::path::Path) -> Result { + if path.exists() { + let content = std::fs::read_to_string(path) + .map_err(|e| format!("failed to read config {}: {}", path.display(), e))?; + serde_yml::from_str::(&content) + .map_err(|e| format!("config parse error in {}: {}", path.display(), e)) + } else { + Ok(Config::default()) + } +} + pub fn default_config_path() -> PathBuf { let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); PathBuf::from(home).join(".boi").join("config.yaml") @@ -56,6 +98,10 @@ impl Config { self.max_workers.unwrap_or(5) } + pub fn spawns_per_tick(&self) -> u32 { + self.spawns_per_tick.unwrap_or(4) + } + pub fn task_timeout_secs(&self) -> u64 { self.task_timeout_minutes.unwrap_or(30) as u64 * 60 } @@ -116,6 +162,26 @@ mod tests { assert_eq!(cfg.max_workers(), 5); assert_eq!(cfg.task_timeout_secs(), 30 * 60); assert_eq!(cfg.retry_count(), 3); + assert_eq!(cfg.spawns_per_tick(), 4); + } + + #[test] + fn test_spawns_per_tick_default() { + let cfg = Config::default(); + assert_eq!(cfg.spawns_per_tick(), 4); + } + + #[test] + fn test_spawns_per_tick_explicit() { + let path = test_utils::test_file("config-spawns", "yaml"); + let yaml = "spawns_per_tick: 8\n"; + let mut f = fs::File::create(&path).unwrap(); + f.write_all(yaml.as_bytes()).unwrap(); + + let cfg = Config::load_from(&path); + assert_eq!(cfg.spawns_per_tick(), 8); + + let _ = fs::remove_file(&path); } #[test] @@ -161,4 +227,64 @@ mod tests { let _ = fs::remove_file(&path); } + + #[test] + fn test_brain_field_deserializes() { + let path = test_utils::test_file("config-brain", "yaml"); + let yaml = "brain: /some/brain/dir\n"; + let mut f = fs::File::create(&path).unwrap(); + f.write_all(yaml.as_bytes()).unwrap(); + + let cfg = Config::load_from(&path); + assert_eq!(cfg.brain, Some(PathBuf::from("/some/brain/dir"))); + + let _ = fs::remove_file(&path); + } + + #[test] + fn test_brain_defaults_to_none() { + let cfg = Config::default(); + assert!(cfg.brain.is_none()); + } + + #[test] + fn test_brain_validate_path_missing() { + let err = validate_brain(Path::new("/nonexistent/brain/path")).unwrap_err(); + assert!(err.contains("not found"), "err={}", err); + } + + #[test] + fn test_brain_validate_missing_claude_md() { + let dir = test_utils::test_dir("brain-no-claude-md"); + let err = validate_brain(&dir).unwrap_err(); + assert!(err.contains("CLAUDE.md"), "err={}", err); + } + + #[test] + fn test_brain_validate_ok() { + let dir = test_utils::test_dir("brain-valid"); + fs::write(dir.join("CLAUDE.md"), "# context").unwrap(); + validate_brain(&dir).expect("valid brain should pass validation"); + } + + #[test] + fn test_brain_resolve_spec_overrides_config() { + let spec_brain = PathBuf::from("/spec/brain"); + let config_brain = PathBuf::from("/config/brain"); + let resolved = resolve_brain(Some(&spec_brain), Some(&config_brain)); + assert_eq!(resolved, Some(PathBuf::from("/spec/brain"))); + } + + #[test] + fn test_brain_resolve_config_fallback() { + let config_brain = PathBuf::from("/config/brain"); + let resolved = resolve_brain(None, Some(&config_brain)); + assert_eq!(resolved, Some(PathBuf::from("/config/brain"))); + } + + #[test] + fn test_brain_resolve_none_when_unset() { + let resolved = resolve_brain(None, None); + assert!(resolved.is_none()); + } } diff --git a/src/failure.rs b/src/failure.rs new file mode 100644 index 0000000..aaccdc9 --- /dev/null +++ b/src/failure.rs @@ -0,0 +1,582 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum FailureReason { + ModelResolution { model: String, provider: String }, + ProviderRateLimit { provider: String, retry_after_s: Option }, + ProviderHttp { provider: String, status: u16, body_excerpt: String }, + ProviderAuth { provider: String, env_var: String }, + Timeout { phase: String, secs: u64 }, + ToolError { phase: String, message: String }, + VerifyFailed { task: String, exit_code: i32, stderr_excerpt: String }, + WorkerCrash { phase: String, signal: Option, message: String }, + Other { message: String }, +} + +impl FailureReason { + pub fn short_summary(&self) -> String { + match self { + FailureReason::ModelResolution { model, provider } => { + format!("model '{}' not found via {}", model, provider) + } + FailureReason::ProviderRateLimit { provider, retry_after_s } => { + match retry_after_s { + Some(s) => format!("rate limited by {} (retry in {}s)", provider, s), + None => format!("rate limited by {}", provider), + } + } + FailureReason::ProviderHttp { provider, status, .. } => { + format!("HTTP {} from {}", status, provider) + } + FailureReason::ProviderAuth { provider, env_var } => { + format!("auth failed for {} (check {})", provider, env_var) + } + FailureReason::Timeout { phase, secs } => { + format!("timed out in {} after {}s", phase, secs) + } + FailureReason::ToolError { phase, message } => { + let msg = truncate(message, 60); + format!("tool error in {}: {}", phase, msg) + } + FailureReason::VerifyFailed { task, exit_code, .. } => { + format!("verify failed for '{}' (exit {})", task, exit_code) + } + FailureReason::WorkerCrash { phase, signal, .. } => match signal { + Some(sig) => format!("worker crashed in {} (signal {})", phase, sig), + None => format!("worker crashed in {}", phase), + }, + FailureReason::Other { message } => truncate(message, 80).to_string(), + } + } + + pub fn detail(&self) -> String { + match self { + FailureReason::ModelResolution { model, provider } => { + format!( + "ModelResolution\n model: {}\n provider: {}", + model, provider + ) + } + FailureReason::ProviderRateLimit { provider, retry_after_s } => { + let retry = match retry_after_s { + Some(s) => format!("{}s", s), + None => "unknown".to_string(), + }; + format!( + "ProviderRateLimit\n provider: {}\n retry_after: {}", + provider, retry + ) + } + FailureReason::ProviderHttp { provider, status, body_excerpt } => { + format!( + "ProviderHttp\n provider: {}\n status: {}\n body: {}", + provider, status, body_excerpt + ) + } + FailureReason::ProviderAuth { provider, env_var } => { + format!( + "ProviderAuth\n provider: {}\n env_var: {}", + provider, env_var + ) + } + FailureReason::Timeout { phase, secs } => { + format!("Timeout\n phase: {}\n secs: {}", phase, secs) + } + FailureReason::ToolError { phase, message } => { + format!("ToolError\n phase: {}\n message: {}", phase, message) + } + FailureReason::VerifyFailed { task, exit_code, stderr_excerpt } => { + format!( + "VerifyFailed\n task: {}\n exit_code: {}\n stderr: {}", + task, exit_code, stderr_excerpt + ) + } + FailureReason::WorkerCrash { phase, signal, message } => { + let sig = match signal { + Some(s) => s.to_string(), + None => "none".to_string(), + }; + format!( + "WorkerCrash\n phase: {}\n signal: {}\n message: {}", + phase, sig, message + ) + } + FailureReason::Other { message } => { + format!("Other\n message: {}", message) + } + } + } + + /// Serialize to JSON string for storage in the error column. + pub fn to_json(&self) -> String { + serde_json::to_string(self).unwrap_or_else(|_| { + format!("{{\"Other\":{{\"message\":\"serialization failed\"}}}}") + }) + } + + /// Parse from a DB error column value. + /// Tries JSON first; falls back to Other { message } for legacy plain strings and NULLs. + pub fn from_db(text: &str) -> Self { + let trimmed = text.trim(); + if trimmed.starts_with('{') { + if let Ok(r) = serde_json::from_str(trimmed) { + return r; + } + } + FailureReason::Other { message: text.to_string() } + } +} + +/// Infer a typed FailureReason from phase name + free-text reason string. +/// Used at catch sites in the worker where the error is only available as a string. +pub fn infer_failure_reason(phase_name: &str, reason: &str) -> FailureReason { + let lower = reason.to_lowercase(); + + // Timeout + if lower.contains("timeout") { + return FailureReason::Timeout { + phase: phase_name.to_string(), + secs: 0, + }; + } + + // HTTP 429 / rate limit + if lower.contains("429") || lower.contains("rate limit") || lower.contains("too many requests") { + let provider = if lower.contains("openrouter") { "openrouter" } else { "anthropic" }; + return FailureReason::ProviderRateLimit { + provider: provider.to_string(), + retry_after_s: None, + }; + } + + // HTTP 401 / auth errors + if lower.contains("401") || lower.contains("unauthorized") || lower.contains("api key") || lower.contains("invalid key") { + let provider = if lower.contains("openrouter") { "openrouter" } else { "anthropic" }; + let env_var = if lower.contains("openrouter") { "OPENROUTER_API_KEY" } else { "ANTHROPIC_API_KEY" }; + return FailureReason::ProviderAuth { + provider: provider.to_string(), + env_var: env_var.to_string(), + }; + } + + // Other HTTP 4xx/5xx + if let Some(status) = extract_http_status(reason) { + let provider = if lower.contains("openrouter") { "openrouter" } else { "anthropic" }; + let excerpt: String = reason.chars().take(300).collect(); + return FailureReason::ProviderHttp { + provider: provider.to_string(), + status, + body_excerpt: excerpt, + }; + } + + // Worktree / subprocess signal + if lower.contains("worktree") || lower.contains("sigkill") || lower.contains("sigsegv") { + return FailureReason::WorkerCrash { + phase: phase_name.to_string(), + signal: None, + message: reason.chars().take(300).collect(), + }; + } + + // Verify phase + if phase_name.contains("verify") { + return FailureReason::VerifyFailed { + task: phase_name.to_string(), + exit_code: 1, + stderr_excerpt: reason.chars().take(300).collect(), + }; + } + + // Default + FailureReason::ToolError { + phase: phase_name.to_string(), + message: reason.chars().take(300).collect(), + } +} + +/// Extract a 3-digit HTTP status code in range 400–599 from a string. +fn extract_http_status(text: &str) -> Option { + let bytes = text.as_bytes(); + for i in 0..bytes.len().saturating_sub(2) { + if bytes[i].is_ascii_digit() && bytes[i + 1].is_ascii_digit() && bytes[i + 2].is_ascii_digit() { + let before_ok = i == 0 || !bytes[i - 1].is_ascii_digit(); + let after_ok = i + 3 >= bytes.len() || !bytes[i + 3].is_ascii_digit(); + if before_ok && after_ok { + let n = (bytes[i] - b'0') as u16 * 100 + + (bytes[i + 1] - b'0') as u16 * 10 + + (bytes[i + 2] - b'0') as u16; + if (400..=599).contains(&n) { + return Some(n); + } + } + } + } + None +} + +fn truncate(s: &str, max_chars: usize) -> &str { + if s.len() <= max_chars { + s + } else { + // find a char boundary + let mut end = max_chars; + while !s.is_char_boundary(end) { + end -= 1; + } + &s[..end] + } +} + +/// Truncate with ellipsis for display (returns owned String). +pub fn truncate_display(s: &str, max_chars: usize) -> String { + if s.chars().count() <= max_chars { + s.to_string() + } else { + let mut result: String = s.chars().take(max_chars.saturating_sub(1)).collect(); + result.push('…'); + result + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // --- failure_reason tests (matched by `cargo test --lib failure_reason`) --- + + #[test] + fn failure_reason_model_resolution_short_summary() { + let r = FailureReason::ModelResolution { + model: "claude-opus-5".to_string(), + provider: "anthropic".to_string(), + }; + assert_eq!(r.short_summary(), "model 'claude-opus-5' not found via anthropic"); + } + + #[test] + fn failure_reason_model_resolution_detail() { + let r = FailureReason::ModelResolution { + model: "m".to_string(), + provider: "p".to_string(), + }; + let d = r.detail(); + assert!(d.contains("ModelResolution"), "got: {}", d); + assert!(d.contains("model:"), "got: {}", d); + assert!(d.contains("provider:"), "got: {}", d); + } + + #[test] + fn failure_reason_provider_rate_limit_with_retry() { + let r = FailureReason::ProviderRateLimit { + provider: "openai".to_string(), + retry_after_s: Some(30), + }; + assert_eq!(r.short_summary(), "rate limited by openai (retry in 30s)"); + } + + #[test] + fn failure_reason_provider_rate_limit_no_retry() { + let r = FailureReason::ProviderRateLimit { + provider: "openai".to_string(), + retry_after_s: None, + }; + assert_eq!(r.short_summary(), "rate limited by openai"); + } + + #[test] + fn failure_reason_provider_http_short_summary() { + let r = FailureReason::ProviderHttp { + provider: "anthropic".to_string(), + status: 500, + body_excerpt: "internal server error".to_string(), + }; + assert_eq!(r.short_summary(), "HTTP 500 from anthropic"); + } + + #[test] + fn failure_reason_provider_http_detail_includes_body() { + let r = FailureReason::ProviderHttp { + provider: "anthropic".to_string(), + status: 429, + body_excerpt: "quota exceeded".to_string(), + }; + let d = r.detail(); + assert!(d.contains("quota exceeded"), "got: {}", d); + assert!(d.contains("429"), "got: {}", d); + } + + #[test] + fn failure_reason_provider_auth_short_summary() { + let r = FailureReason::ProviderAuth { + provider: "openrouter".to_string(), + env_var: "OPENROUTER_API_KEY".to_string(), + }; + assert_eq!(r.short_summary(), "auth failed for openrouter (check OPENROUTER_API_KEY)"); + } + + #[test] + fn failure_reason_timeout_short_summary() { + let r = FailureReason::Timeout { + phase: "execute".to_string(), + secs: 3600, + }; + assert_eq!(r.short_summary(), "timed out in execute after 3600s"); + } + + #[test] + fn failure_reason_tool_error_short_summary() { + let r = FailureReason::ToolError { + phase: "verify".to_string(), + message: "cargo build failed".to_string(), + }; + let s = r.short_summary(); + assert!(s.contains("tool error in verify"), "got: {}", s); + assert!(s.contains("cargo build failed"), "got: {}", s); + } + + #[test] + fn failure_reason_tool_error_long_message_truncated() { + let long_msg = "a".repeat(200); + let r = FailureReason::ToolError { + phase: "p".to_string(), + message: long_msg, + }; + let s = r.short_summary(); + assert!(s.len() < 200, "should be truncated, got len={}", s.len()); + } + + #[test] + fn failure_reason_verify_failed_short_summary() { + let r = FailureReason::VerifyFailed { + task: "T1234".to_string(), + exit_code: 1, + stderr_excerpt: "assertion failed".to_string(), + }; + assert_eq!(r.short_summary(), "verify failed for 'T1234' (exit 1)"); + } + + #[test] + fn failure_reason_verify_failed_detail_includes_stderr() { + let r = FailureReason::VerifyFailed { + task: "T1234".to_string(), + exit_code: 2, + stderr_excerpt: "no such file".to_string(), + }; + let d = r.detail(); + assert!(d.contains("no such file"), "got: {}", d); + assert!(d.contains("exit_code"), "got: {}", d); + } + + #[test] + fn failure_reason_worker_crash_with_signal() { + let r = FailureReason::WorkerCrash { + phase: "execute".to_string(), + signal: Some(9), + message: "OOM killed".to_string(), + }; + assert_eq!(r.short_summary(), "worker crashed in execute (signal 9)"); + } + + #[test] + fn failure_reason_worker_crash_no_signal() { + let r = FailureReason::WorkerCrash { + phase: "verify".to_string(), + signal: None, + message: "panic".to_string(), + }; + assert_eq!(r.short_summary(), "worker crashed in verify"); + } + + #[test] + fn failure_reason_other_short_summary() { + let r = FailureReason::Other { message: "something went wrong".to_string() }; + assert_eq!(r.short_summary(), "something went wrong"); + } + + #[test] + fn failure_reason_other_long_message_truncated() { + let long_msg = "x".repeat(200); + let r = FailureReason::Other { message: long_msg }; + let s = r.short_summary(); + assert!(s.len() <= 80, "should be truncated to 80 chars, got {}", s.len()); + } + + #[test] + fn failure_reason_json_roundtrip() { + let original = FailureReason::ProviderHttp { + provider: "anthropic".to_string(), + status: 503, + body_excerpt: "service unavailable".to_string(), + }; + let json = original.to_json(); + let parsed = FailureReason::from_db(&json); + // Verify round-trip via JSON equality + assert_eq!(original.to_json(), parsed.to_json()); + } + + #[test] + fn failure_reason_from_db_plain_string_fallback() { + let r = FailureReason::from_db("some legacy plain error text"); + match r { + FailureReason::Other { message } => { + assert_eq!(message, "some legacy plain error text"); + } + other => panic!("expected Other, got {:?}", other), + } + } + + #[test] + fn failure_reason_from_db_null_like_empty_fallback() { + let r = FailureReason::from_db(""); + match r { + FailureReason::Other { .. } => {} + other => panic!("expected Other, got {:?}", other), + } + } + + #[test] + fn failure_reason_from_db_invalid_json_fallback() { + let r = FailureReason::from_db("{not valid json}"); + match r { + FailureReason::Other { .. } => {} + other => panic!("expected Other, got {:?}", other), + } + } + + #[test] + fn failure_reason_all_variants_roundtrip() { + let variants: Vec = vec![ + FailureReason::ModelResolution { model: "m".to_string(), provider: "p".to_string() }, + FailureReason::ProviderRateLimit { provider: "p".to_string(), retry_after_s: Some(60) }, + FailureReason::ProviderRateLimit { provider: "p".to_string(), retry_after_s: None }, + FailureReason::ProviderHttp { provider: "p".to_string(), status: 429, body_excerpt: "b".to_string() }, + FailureReason::ProviderAuth { provider: "p".to_string(), env_var: "K".to_string() }, + FailureReason::Timeout { phase: "execute".to_string(), secs: 600 }, + FailureReason::ToolError { phase: "verify".to_string(), message: "err".to_string() }, + FailureReason::VerifyFailed { task: "T123".to_string(), exit_code: 1, stderr_excerpt: "e".to_string() }, + FailureReason::WorkerCrash { phase: "execute".to_string(), signal: Some(11), message: "segfault".to_string() }, + FailureReason::Other { message: "oops".to_string() }, + ]; + + for v in &variants { + let json = v.to_json(); + let parsed = FailureReason::from_db(&json); + assert_eq!(json, parsed.to_json(), "round-trip failed for {:?}", v); + } + } + + #[test] + fn failure_reason_truncate_display_short() { + assert_eq!(truncate_display("hello", 10), "hello"); + } + + #[test] + fn failure_reason_truncate_display_long() { + let result = truncate_display("hello world this is long", 10); + assert!(result.ends_with('…'), "should end with ellipsis: {}", result); + assert!(result.chars().count() <= 10, "should be at most 10 chars: {}", result); + } + + // --- failure_capture tests: infer_failure_reason --- + + #[test] + fn failure_capture_infer_timeout() { + let r = infer_failure_reason("execute", "timeout"); + assert!(matches!(r, FailureReason::Timeout { .. }), "expected Timeout, got {:?}", r); + } + + #[test] + fn failure_capture_infer_openrouter_timeout() { + let r = infer_failure_reason("execute", "openrouter timeout"); + assert!(matches!(r, FailureReason::Timeout { .. }), "expected Timeout, got {:?}", r); + } + + #[test] + fn failure_capture_infer_rate_limit_429() { + let r = infer_failure_reason("execute", "Phase execute failed: HTTP 429 Too Many Requests"); + assert!(matches!(r, FailureReason::ProviderRateLimit { .. }), "expected ProviderRateLimit, got {:?}", r); + } + + #[test] + fn failure_capture_infer_rate_limit_openrouter() { + let r = infer_failure_reason("execute", "openrouter phase execute failed: 429 rate limit"); + match &r { + FailureReason::ProviderRateLimit { provider, .. } => { + assert_eq!(provider, "openrouter"); + } + _ => panic!("expected ProviderRateLimit, got {:?}", r), + } + } + + #[test] + fn failure_capture_infer_auth_401() { + let r = infer_failure_reason("execute", "Phase execute failed: HTTP 401 Unauthorized"); + assert!(matches!(r, FailureReason::ProviderAuth { .. }), "expected ProviderAuth, got {:?}", r); + } + + #[test] + fn failure_capture_infer_auth_api_key() { + let r = infer_failure_reason("execute", "invalid api key provided"); + assert!(matches!(r, FailureReason::ProviderAuth { .. }), "expected ProviderAuth, got {:?}", r); + } + + #[test] + fn failure_capture_infer_http_500() { + let r = infer_failure_reason("execute", "Phase execute failed: HTTP 500 Internal Server Error"); + assert!(matches!(r, FailureReason::ProviderHttp { .. }), "expected ProviderHttp, got {:?}", r); + if let FailureReason::ProviderHttp { status, .. } = r { + assert_eq!(status, 500); + } + } + + #[test] + fn failure_capture_infer_verify_failed_by_phase_name() { + let r = infer_failure_reason("task-verify", "requeue limit exceeded"); + assert!(matches!(r, FailureReason::VerifyFailed { .. }), "expected VerifyFailed, got {:?}", r); + } + + #[test] + fn failure_capture_infer_verify_failed_phase_verify() { + let r = infer_failure_reason("verify", "cargo test failed"); + assert!(matches!(r, FailureReason::VerifyFailed { .. }), "expected VerifyFailed, got {:?}", r); + } + + #[test] + fn failure_capture_infer_worker_crash_worktree() { + let r = infer_failure_reason("init", "worktree /tmp/boi-S123 no longer exists"); + assert!(matches!(r, FailureReason::WorkerCrash { .. }), "expected WorkerCrash, got {:?}", r); + } + + #[test] + fn failure_capture_infer_default_tool_error() { + let r = infer_failure_reason("execute", "Phase execute failed: some unknown error"); + assert!(matches!(r, FailureReason::ToolError { .. }), "expected ToolError, got {:?}", r); + } + + #[test] + fn failure_capture_infer_tool_error_carries_phase_name() { + let r = infer_failure_reason("plan-critique", "something broke"); + if let FailureReason::ToolError { phase, .. } = r { + assert_eq!(phase, "plan-critique"); + } else { + panic!("expected ToolError, got {:?}", r); + } + } + + #[test] + fn failure_capture_extract_http_status_finds_4xx() { + assert_eq!(extract_http_status("error: HTTP 503 Service Unavailable"), Some(503)); + } + + #[test] + fn failure_capture_extract_http_status_ignores_non_http() { + assert_eq!(extract_http_status("exit code 1 after 200ms"), None); + } + + #[test] + fn failure_capture_extract_http_status_rejects_1xx_3xx() { + assert_eq!(extract_http_status("redirected with 301"), None); + assert_eq!(extract_http_status("ok 200"), None); + } +} diff --git a/src/lib.rs b/src/lib.rs index 97dfed5..fef8880 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +pub mod builtins; +pub mod failure; pub mod cli; pub mod config; pub mod fmt; @@ -5,6 +7,7 @@ pub mod hooks; pub mod phases; pub mod prompt; pub mod queue; +pub mod runtime; pub mod runner; pub mod spawn; pub mod spec; diff --git a/src/main.rs b/src/main.rs index 38807e1..a045280 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,15 +1,18 @@ use boi::cli::bench::cmd_bench; use boi::cli::cancel::cmd_cancel; use boi::cli::config_cmd::cmd_config; -use boi::cli::daemon::{cmd_daemon, cmd_restart, cmd_start, cmd_stop}; +use boi::cli::daemon::{cmd_daemon, cmd_reload, cmd_restart, cmd_start, cmd_stop}; use boi::cli::dispatch::cmd_dispatch; +use boi::cli::dispatch_many::cmd_dispatch_many; use boi::cli::doctor::cmd_doctor; use boi::cli::log::cmd_log; use boi::cli::outputs::cmd_outputs; use boi::cli::phases_cmd::{cmd_phases_list, cmd_phases_show}; +use boi::cli::plan::cmd_plan; use boi::cli::spec_mgmt::{cmd_spec, SpecActionData}; use boi::cli::status::{cmd_status, cmd_status_json, cmd_status_watch}; use boi::cli::telemetry_cmd::cmd_telemetry; +use boi::cli::why::cmd_why; use boi::cli::workers::cmd_workers; use boi::{config, hooks}; use clap::{Parser, Subcommand, ValueEnum}; @@ -75,6 +78,9 @@ enum Commands { /// Override workspace path for spec #[arg(long)] workspace: Option, + /// Skip the implicit-dep DAG check (use when you know the ordering is correct) + #[arg(long)] + skip_plan: bool, }, /// Show queue status Status { @@ -87,6 +93,9 @@ enum Commands { /// Machine-readable JSON output #[arg(long)] json: bool, + /// Show full FailureReason detail for failed specs + #[arg(long, short = 'v')] + verbose: bool, }, /// View worker output log Log { @@ -135,6 +144,43 @@ enum Commands { }, /// Health check Doctor, + /// Plan dispatch order + LLM critique for in-flight and/or new specs + Plan { + /// Additional spec files to include in DAG analysis (in-flight specs are loaded automatically) + specs: Vec, + /// Force re-run LLM critique even if a cached result exists + #[arg(long)] + force_refresh: bool, + }, + /// Dispatch multiple specs in DAG order after an LLM critique gate + DispatchMany { + /// Spec files to dispatch (dispatched in dependency order) + specs: Vec, + /// Auto-approve the dispatch prompt + #[arg(long, short = 'y')] + yes: bool, + /// Auto-approve warn-level concerns (does not override blocks) + #[arg(long, short = 'f')] + force: bool, + #[arg(long, default_value = "100")] + priority: i64, + #[arg(long, short = 'm', value_enum)] + mode: Option, + /// Maximum iterations (default 30) + #[arg(long, default_value = "30")] + max_iter: i64, + /// Task timeout in minutes (default 30) + #[arg(long, default_value = "30")] + timeout: u32, + /// Project name + #[arg(long)] + project: Option, + }, + /// Print full failure detail for a spec — fast forensics when boi status shows an error + Why { + /// Spec ID (e.g. SA015) + spec_id: String, + }, /// Print version Version, /// Benchmark N pipelines across a spec or battery of specs @@ -170,6 +216,8 @@ enum DaemonAction { Restart, /// Run the daemon in the foreground (default) Foreground, + /// Send SIGHUP to the running daemon to reload max_workers, spawns_per_tick, and claude_bin + Reload, } #[derive(Subcommand)] @@ -217,6 +265,7 @@ fn main() { project, dry_run, workspace, + skip_plan, } => { let mode_str = mode.map(|m| m.to_string()); cmd_dispatch( @@ -230,6 +279,7 @@ fn main() { project.as_deref(), dry_run, workspace.as_deref(), + skip_plan, db_str, &hook_cfg, ); @@ -239,13 +289,14 @@ fn main() { all, watch, json, + verbose, } => { if watch { - cmd_status_watch(spec_id.as_deref(), all, db_str); + cmd_status_watch(spec_id.as_deref(), all, verbose, db_str); } else if json { cmd_status_json(spec_id.as_deref(), all, db_str); } else { - cmd_status(spec_id.as_deref(), all, db_str); + cmd_status(spec_id.as_deref(), all, verbose, db_str); } } Commands::Log { spec_id, full, debug, follow } => { @@ -263,6 +314,7 @@ fn main() { DaemonAction::Stop => cmd_stop(), DaemonAction::Restart => cmd_restart(), DaemonAction::Foreground => cmd_daemon(db_str, hook_cfg, &cfg), + DaemonAction::Reload => cmd_reload(), } } Commands::Config { key, value } => { @@ -298,6 +350,38 @@ fn main() { Commands::Doctor => { cmd_doctor(db_str, &cfg); } + Commands::Plan { specs, force_refresh } => { + let exit_code = cmd_plan(&specs, db_str, force_refresh); + std::process::exit(exit_code); + } + Commands::DispatchMany { + specs, + yes, + force, + priority, + mode, + max_iter, + timeout, + project, + } => { + let mode_str = mode.map(|m| m.to_string()); + let exit_code = cmd_dispatch_many( + &specs, + yes, + force, + priority, + mode_str.as_deref(), + max_iter, + timeout, + project.as_deref(), + db_str, + &hook_cfg, + ); + std::process::exit(exit_code); + } + Commands::Why { spec_id } => { + cmd_why(&spec_id, db_str); + } Commands::Version => { println!("boi {}", env!("CARGO_PKG_VERSION")); } diff --git a/src/phases.rs b/src/phases.rs index 6bd1bd5..dbe6e06 100644 --- a/src/phases.rs +++ b/src/phases.rs @@ -73,6 +73,12 @@ pub struct PhaseConfig { pub can_add_tasks: bool, pub can_fail_spec: bool, pub requires_claude: bool, + /// Worker runtime: "claude", "deterministic", "openrouter", or None (defaults to "claude"). + pub runtime: Option, + /// For runtime=openrouter: env var holding the API key (default OPENROUTER_API_KEY). + pub api_key_env: Option, + /// Builtin handler name for deterministic phases, e.g. "builtin:commit". + pub completion_handler: Option, pub approve_signal: Option, pub reject_signal: Option, pub on_approve: Option, @@ -84,6 +90,9 @@ pub struct PhaseConfig { pub effort: Option, pub hooks_pre: Vec, pub hooks_post: Vec, + /// When true, append --bare to the claude CLI invocation (skips session/MCP/skill loading). + #[serde(default)] + pub bare: bool, } #[derive(Debug, Deserialize)] @@ -140,6 +149,10 @@ struct WorkerSection { model: Option, #[serde(default)] code_model: Option, + #[serde(default)] + bare: Option, + #[serde(default)] + api_key_env: Option, } #[derive(Debug, Deserialize)] @@ -211,12 +224,16 @@ impl PhaseConfig { .phase.as_ref().and_then(|p| p.can_fail_spec) .unwrap_or_else(|| derive_can_fail_spec(&name)); - // Derive requires_claude: explicit [phase] setting wins, else derive from worker.runtime + let runtime = toml.worker.as_ref().and_then(|w| w.runtime.clone()); + let api_key_env = toml.worker.as_ref().and_then(|w| w.api_key_env.clone()); + let completion_handler = toml.completion_handler.clone(); + + // Derive requires_claude: explicit [phase] setting wins, else derive from worker.runtime. + // "deterministic" and any non-"claude" value → false. let requires_claude = toml .phase.as_ref().and_then(|p| p.requires_claude) .unwrap_or_else(|| { - toml.worker.as_ref() - .and_then(|w| w.runtime.as_deref()) + runtime.as_deref() .map(|r| r == "claude") .unwrap_or(true) }); @@ -233,6 +250,7 @@ impl PhaseConfig { let effort = toml.worker.as_ref().and_then(|w| w.effort.clone()); let hooks_pre = toml.hooks.as_ref().and_then(|h| h.pre.clone()).unwrap_or_default(); let hooks_post = toml.hooks.as_ref().and_then(|h| h.post.clone()).unwrap_or_default(); + let bare = toml.worker.as_ref().and_then(|w| w.bare).unwrap_or(false); Some(PhaseConfig { name, @@ -244,6 +262,9 @@ impl PhaseConfig { can_add_tasks, can_fail_spec, requires_claude, + runtime, + api_key_env, + completion_handler, approve_signal, reject_signal, on_approve, @@ -255,14 +276,16 @@ impl PhaseConfig { effort, hooks_pre, hooks_post, + bare, }) } } -/// Derive phase level from name. Spec-level phases: plan-critique, critic, evaluate, review, spec-review. +/// Derive phase level from name. Spec-level phases: plan-critique, critic, evaluate, review, spec-review/spec-critique/spec-improve. fn derive_level(name: &str) -> PhaseLevel { match name { - "plan-critique" | "critic" | "evaluate" | "review" | "spec-review" => PhaseLevel::Spec, + "plan-critique" | "critic" | "evaluate" | "review" + | "spec-review" | "spec-critique" | "spec-improve" => PhaseLevel::Spec, _ => PhaseLevel::Task, } } @@ -274,8 +297,8 @@ fn derive_can_add_tasks(name: &str, completion_handler: Option<&str>) -> bool { return true; } } - // Phases that structurally add tasks: critic, decompose, evaluate, plan-critique, code-review, review, spec-review - matches!(name, "critic" | "decompose" | "evaluate" | "plan-critique" | "code-review" | "review" | "spec-review") + // Phases that structurally add tasks: critic, decompose, evaluate, plan-critique, code-review, review, spec-review/spec-critique + matches!(name, "critic" | "decompose" | "evaluate" | "plan-critique" | "code-review" | "review" | "spec-review" | "spec-critique") } /// Derive can_fail_spec from name. @@ -384,7 +407,9 @@ impl PhaseRegistry { } pub fn get(&self, name: &str) -> Option<&PhaseConfig> { - self.user.get(name).or_else(|| self.core.get(name)) + // "spec-review" is a backward-compat alias for "spec-critique". + let resolved = if name == "spec-review" { "spec-critique" } else { name }; + self.user.get(resolved).or_else(|| self.core.get(resolved)) } pub fn list(&self) -> Vec<&PhaseConfig> { @@ -432,10 +457,26 @@ pub fn default_phases(mode: &str) -> Vec { /// Pipeline configuration separating spec-level and task-level phases. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PipelineConfig { - /// Phases that run once for the whole spec (before/after all tasks) + /// Legacy: phases run once for the whole spec. If spec_post_phases is empty, + /// spec_phases is used as spec_post_phases (backward compat). + #[serde(default)] pub spec_phases: Vec, - /// Phases that run for each individual task + /// Phases that run before task execution (spec-pre loop, e.g. spec-critique ↔ spec-improve). + #[serde(default)] + pub spec_pre_phases: Vec, + /// Phases that run after all tasks complete (e.g. doc-update, critic, merge, cleanup). + #[serde(default)] + pub spec_post_phases: Vec, + /// Phases that run for each individual task. + #[serde(default)] pub task_phases: Vec, + /// Max iterations for the spec-pre loop before proceeding to task execution. + #[serde(default = "default_max_loops")] + pub max_loops: u32, +} + +fn default_max_loops() -> u32 { + 3 } #[derive(Debug, Deserialize)] @@ -445,8 +486,16 @@ struct PipelinesToml { #[derive(Debug, Deserialize)] struct PipelineModeToml { + #[serde(default)] spec_phases: Vec, + #[serde(default)] + spec_pre_phases: Vec, + #[serde(default)] + spec_post_phases: Vec, + #[serde(default)] task_phases: Vec, + #[serde(default = "default_max_loops")] + max_loops: u32, } /// Find the pipelines.toml file. @@ -473,9 +522,20 @@ fn load_pipeline_from_file(path: &Path, mode: &str) -> Option { "execute" | "" => "default", other => other, }; - parsed.mode.get(key).map(|m| PipelineConfig { - spec_phases: m.spec_phases.clone(), - task_phases: m.task_phases.clone(), + parsed.mode.get(key).map(|m| { + // Backward compat: if spec_post_phases not provided, use spec_phases as spec_post_phases. + let spec_post = if !m.spec_post_phases.is_empty() { + m.spec_post_phases.clone() + } else { + m.spec_phases.clone() + }; + PipelineConfig { + spec_phases: m.spec_phases.clone(), + spec_pre_phases: m.spec_pre_phases.clone(), + spec_post_phases: spec_post, + task_phases: m.task_phases.clone(), + max_loops: m.max_loops, + } }) } @@ -495,23 +555,38 @@ pub(crate) fn fallback_pipeline(mode: &str) -> PipelineConfig { match mode { "execute" => PipelineConfig { spec_phases: vec!["spec-review".into(), "critic".into()], + spec_pre_phases: vec![], + spec_post_phases: vec!["spec-review".into(), "critic".into()], task_phases: vec!["execute".into(), "task-verify".into()], + max_loops: 3, }, "challenge" => PipelineConfig { spec_phases: vec!["spec-review".into(), "plan-critique".into(), "critic".into()], + spec_pre_phases: vec![], + spec_post_phases: vec!["spec-review".into(), "plan-critique".into(), "critic".into()], task_phases: vec!["execute".into(), "task-verify".into()], + max_loops: 3, }, "discover" => PipelineConfig { spec_phases: vec!["spec-review".into(), "critic".into(), "evaluate".into()], + spec_pre_phases: vec![], + spec_post_phases: vec!["spec-review".into(), "critic".into(), "evaluate".into()], task_phases: vec!["execute".into(), "task-verify".into()], + max_loops: 3, }, "generate" => PipelineConfig { spec_phases: vec!["spec-review".into(), "critic".into(), "evaluate".into()], + spec_pre_phases: vec![], + spec_post_phases: vec!["spec-review".into(), "critic".into(), "evaluate".into()], task_phases: vec!["decompose".into(), "execute".into(), "code-review".into(), "task-verify".into()], + max_loops: 3, }, _ => PipelineConfig { spec_phases: vec![], + spec_pre_phases: vec![], + spec_post_phases: vec![], task_phases: vec!["execute".into()], + max_loops: 3, }, } } @@ -650,6 +725,9 @@ fn fallback_core_phases() -> Vec { can_add_tasks: false, can_fail_spec: false, requires_claude: true, + runtime: Some("claude".into()), + api_key_env: None, + completion_handler: None, approve_signal: None, reject_signal: None, on_approve: None, @@ -661,6 +739,7 @@ fn fallback_core_phases() -> Vec { effort: None, hooks_pre: vec![], hooks_post: vec![], + bare: false, }, PhaseConfig { name: "task-verify".into(), @@ -672,6 +751,9 @@ fn fallback_core_phases() -> Vec { can_add_tasks: false, can_fail_spec: false, requires_claude: false, + runtime: None, + api_key_env: None, + completion_handler: None, approve_signal: None, reject_signal: None, on_approve: Some("next".into()), @@ -683,6 +765,7 @@ fn fallback_core_phases() -> Vec { effort: None, hooks_pre: vec![], hooks_post: vec![], + bare: false, }, ] } @@ -703,13 +786,23 @@ pub fn resolve_pipeline( task_phases: Option<&[String]>, ) -> PipelineConfig { let defaults = default_pipeline(mode); + let resolved_spec = spec_phases + .map(|v| v.to_vec()) + .unwrap_or(defaults.spec_phases.clone()); + // When spec_phases is overridden directly, treat it as spec_post_phases (backward compat). + let resolved_post = if spec_phases.is_some() { + resolved_spec.clone() + } else { + defaults.spec_post_phases + }; PipelineConfig { - spec_phases: spec_phases - .map(|v| v.to_vec()) - .unwrap_or(defaults.spec_phases), + spec_phases: resolved_spec, + spec_pre_phases: defaults.spec_pre_phases, + spec_post_phases: resolved_post, task_phases: task_phases .map(|v| v.to_vec()) .unwrap_or(defaults.task_phases), + max_loops: defaults.max_loops, } } @@ -870,10 +963,16 @@ mod tests { assert!(evaluate.can_add_tasks); assert!(!evaluate.can_fail_spec); - let spec_review = registry.get("spec-review").unwrap(); - assert!(spec_review.can_add_tasks); - assert!(!spec_review.can_fail_spec); - assert!(spec_review.requires_claude); + // spec-critique is the canonical name; spec-review is a backward-compat alias. + // spec-critique rejects+requeues rather than adding tasks directly. + let spec_critique = registry.get("spec-critique").unwrap(); + assert!(!spec_critique.can_add_tasks); + assert!(!spec_critique.can_fail_spec); + assert!(spec_critique.requires_claude); + + // alias still resolves + let via_alias = registry.get("spec-review").unwrap(); + assert_eq!(via_alias.name, "spec-critique"); } #[test] @@ -1022,12 +1121,16 @@ approve_signal = "" fn test_core_and_user_names() { let registry = test_registry(); let core = registry.core_names(); - assert_eq!(core.len(), 10); + assert_eq!(core.len(), 14); assert!(core.contains(&"execute")); assert!(core.contains(&"plan-critique")); assert!(core.contains(&"code-review")); assert!(core.contains(&"task-verify")); - assert!(core.contains(&"spec-review")); + assert!(core.contains(&"spec-critique")); + assert!(core.contains(&"spec-improve")); + assert!(core.contains(&"commit")); + assert!(core.contains(&"merge")); + assert!(core.contains(&"cleanup")); let user = registry.user_names(); assert!(user.is_empty()); @@ -1039,7 +1142,7 @@ approve_signal = "" let nonexistent = test_utils::test_file("nonexistent-dir", "xyz"); let _ = std::fs::remove_file(&nonexistent); registry.load_user_phases(&nonexistent); - assert_eq!(registry.list().len(), 10); + assert_eq!(registry.list().len(), 14); } // --- Step 1: PhaseLevel tests --- @@ -1057,7 +1160,9 @@ approve_signal = "" assert_eq!(registry.get("critic").unwrap().level, PhaseLevel::Spec); assert_eq!(registry.get("evaluate").unwrap().level, PhaseLevel::Spec); assert_eq!(registry.get("plan-critique").unwrap().level, PhaseLevel::Spec); - assert_eq!(registry.get("spec-review").unwrap().level, PhaseLevel::Spec); + assert_eq!(registry.get("spec-critique").unwrap().level, PhaseLevel::Spec); + assert_eq!(registry.get("spec-review").unwrap().level, PhaseLevel::Spec); // alias + assert_eq!(registry.get("spec-improve").unwrap().level, PhaseLevel::Spec); // Task-level phases assert_eq!(registry.get("execute").unwrap().level, PhaseLevel::Task); @@ -1219,6 +1324,9 @@ approve_signal = "" can_add_tasks: false, can_fail_spec: false, requires_claude: true, + runtime: None, + api_key_env: None, + completion_handler: None, approve_signal: None, reject_signal: None, on_approve: None, @@ -1230,6 +1338,7 @@ approve_signal = "" effort: None, hooks_pre: vec![], hooks_post: vec![], + bare: false, }; let prompt = build_phase_prompt(&phase, "title: Test\ntasks: []", None, &std::collections::HashMap::new()); assert!(prompt.contains("Review this spec carefully.")); @@ -1249,6 +1358,9 @@ approve_signal = "" can_add_tasks: false, can_fail_spec: false, requires_claude: true, + runtime: None, + api_key_env: None, + completion_handler: None, approve_signal: None, reject_signal: None, on_approve: None, @@ -1260,6 +1372,7 @@ approve_signal = "" effort: None, hooks_pre: vec![], hooks_post: vec![], + bare: false, }; let prompt = build_phase_prompt(&phase, "spec content", Some("task t-1 details"), &std::collections::HashMap::new()); assert!(prompt.contains("--- TASK ---")); @@ -1278,6 +1391,9 @@ approve_signal = "" can_add_tasks: false, can_fail_spec: false, requires_claude: false, + runtime: None, + api_key_env: None, + completion_handler: None, approve_signal: None, reject_signal: None, on_approve: None, @@ -1289,6 +1405,7 @@ approve_signal = "" effort: None, hooks_pre: vec![], hooks_post: vec![], + bare: false, }; let prompt = build_phase_prompt(&phase, "spec", None, &std::collections::HashMap::new()); assert!(prompt.contains("Phase: task-verify")); @@ -1329,6 +1446,9 @@ approve_signal = "" can_add_tasks: false, can_fail_spec: false, requires_claude: true, + runtime: None, + api_key_env: None, + completion_handler: None, approve_signal: None, reject_signal: None, on_approve: None, @@ -1340,6 +1460,7 @@ approve_signal = "" effort: None, hooks_pre: vec![], hooks_post: vec![], + bare: false, }; let outcome = parse_phase_output(&phase, "Task completed successfully."); assert_eq!(outcome, Verdict::Proceed); @@ -1371,6 +1492,9 @@ approve_signal = "" can_add_tasks: false, can_fail_spec: true, requires_claude: true, + runtime: None, + api_key_env: None, + completion_handler: None, approve_signal: Some("## OK".into()), reject_signal: Some("[FAIL]".into()), on_approve: None, @@ -1382,6 +1506,7 @@ approve_signal = "" effort: None, hooks_pre: vec![], hooks_post: vec![], + bare: false, }; let outcome = parse_phase_output(&phase, "Found issue: [FAIL] bad code"); match outcome { @@ -1402,15 +1527,16 @@ approve_signal = "" .expect("spec_with_issues.yaml must exist"); let registry = test_registry(); - let spec_review = registry.get("spec-review").expect("spec-review phase must exist"); + // spec-review is now a backward-compat alias for spec-critique + let spec_critique = registry.get("spec-review").expect("spec-review alias must exist"); - let prompt = build_phase_prompt(spec_review, &spec_content, None, &std::collections::HashMap::new()); + let prompt = build_phase_prompt(spec_critique, &spec_content, None, &std::collections::HashMap::new()); assert!(prompt.contains("Set up CSV ingestion"), "prompt must contain task title from fixture"); assert!(prompt.contains("Optimize database writes"), "prompt must contain second task title"); assert!( prompt.to_lowercase().contains("verify"), - "spec-review prompt must reference verify command validation" + "spec-critique prompt must reference verify command validation" ); } @@ -1418,7 +1544,8 @@ approve_signal = "" fn test_phase_level_from_toml() { let registry = PhaseRegistry::from_dir(&repo_root().join("phases")); - let spec_phases = ["spec-review", "plan-critique", "critic", "evaluate", "review"]; + // spec-review resolves via alias to spec-critique + let spec_phases = ["spec-review", "spec-critique", "spec-improve", "plan-critique", "critic", "evaluate", "review"]; for name in &spec_phases { let phase = registry.get(name).unwrap_or_else(|| panic!("phase '{name}' not found")); assert_eq!( @@ -1503,4 +1630,283 @@ template = "Do something at the spec level." let _ = fs::remove_dir_all(&dir); } + + // --- spec_improve: signal parsing + phase properties --- + + #[test] + fn test_spec_improve_phase_exists_and_is_spec_level() { + let registry = test_registry(); + let phase = registry.get("spec-improve").expect("spec-improve phase must exist"); + assert_eq!(phase.level, PhaseLevel::Spec); + assert!(!phase.can_add_tasks); + assert!(!phase.can_fail_spec); + assert!(phase.requires_claude); + } + + #[test] + fn test_spec_improve_approve_signal_is_proceed() { + let registry = test_registry(); + let phase = registry.get("spec-improve").unwrap(); + assert_eq!(phase.approve_signal.as_deref(), Some("## Spec Improved")); + let verdict = parse_phase_output(phase, "Work done.\n\n## Spec Improved\n"); + assert_eq!(verdict, Verdict::Proceed, "approved output must yield Proceed"); + } + + #[test] + fn test_spec_improve_on_approve_requeues_spec_critique() { + let registry = test_registry(); + let phase = registry.get("spec-improve").unwrap(); + assert_eq!( + phase.on_approve.as_deref(), + Some("requeue:spec-critique"), + "spec-improve on_approve must be requeue:spec-critique" + ); + } + + #[test] + fn test_spec_critique_phase_signals() { + let registry = test_registry(); + let phase = registry.get("spec-critique").unwrap(); + assert_eq!(phase.approve_signal.as_deref(), Some("## Spec Approved")); + assert_eq!(phase.reject_signal.as_deref(), Some("[CRITIQUE]")); + assert_eq!(phase.on_reject.as_deref(), Some("requeue:spec-improve")); + } + + #[test] + fn test_spec_critique_approve_signal_is_proceed() { + let registry = test_registry(); + let phase = registry.get("spec-critique").unwrap(); + let verdict = parse_phase_output(phase, "All checks passed.\n\n## Spec Approved\n"); + assert_eq!(verdict, Verdict::Proceed); + } + + #[test] + fn test_spec_critique_reject_signal_is_redo() { + let registry = test_registry(); + let phase = registry.get("spec-critique").unwrap(); + // [CRITIQUE] in output + on_reject = "requeue:spec-improve" → Redo + let verdict = parse_phase_output(phase, "### [CRITIQUE] 1\n\nTask t-3 verify is broken.\n"); + assert!( + matches!(verdict, Verdict::Redo { .. }), + "critique rejection must yield Redo (requeue), got {:?}", verdict + ); + } + + #[test] + fn test_spec_review_alias_signals_match_spec_critique() { + let registry = test_registry(); + let via_alias = registry.get("spec-review").unwrap(); + let direct = registry.get("spec-critique").unwrap(); + assert_eq!(via_alias.approve_signal, direct.approve_signal); + assert_eq!(via_alias.reject_signal, direct.reject_signal); + assert_eq!(via_alias.on_reject, direct.on_reject); + } + + // --- Pipeline v2: deterministic commit / merge / cleanup phases --- + + #[test] + fn test_pipeline_v2_phase_configs() { + let registry = test_registry(); + + let commit = registry.get("commit").expect("commit phase must exist"); + assert_eq!(commit.level, PhaseLevel::Task); + assert_eq!(commit.runtime.as_deref(), Some("deterministic")); + assert_eq!(commit.completion_handler.as_deref(), Some("builtin:commit")); + assert!(!commit.requires_claude); + + let merge = registry.get("merge").expect("merge phase must exist"); + assert_eq!(merge.level, PhaseLevel::Spec); + assert_eq!(merge.runtime.as_deref(), Some("deterministic")); + assert_eq!(merge.completion_handler.as_deref(), Some("builtin:merge")); + assert!(!merge.requires_claude); + + let cleanup = registry.get("cleanup").expect("cleanup phase must exist"); + assert_eq!(cleanup.level, PhaseLevel::Spec); + assert_eq!(cleanup.runtime.as_deref(), Some("deterministic")); + assert_eq!(cleanup.completion_handler.as_deref(), Some("builtin:cleanup")); + assert!(!cleanup.requires_claude); + } + + #[test] + fn test_pipeline_v2_end_to_end() { + use crate::builtins::{run_builtin, BuiltinContext, BuiltinResult}; + + let _guard = test_utils::HOME_LOCK.lock().unwrap(); + let repo = test_utils::test_git_repo("pv2-e2e"); + let home = test_utils::test_dir("pv2-e2e-home"); + std::env::set_var("HOME", home.to_str().unwrap()); + + let registry = test_registry(); + let spec_id = "pv2-e2e-001"; + + let dest = crate::worktree::create(spec_id, repo.to_str().unwrap()).unwrap(); + std::fs::write(dest.join("output.txt"), "v2 output").unwrap(); + + // commit phase: commits changes in the worktree + let commit = registry.get("commit").unwrap(); + let handler = commit.completion_handler.as_deref().unwrap(); + let ctx = BuiltinContext { spec_id, task_title: "Add output", repo_path: repo.to_str().unwrap() }; + assert!(matches!(run_builtin(handler, &ctx), BuiltinResult::Success(_)), "commit phase failed"); + + // merge phase: brings worktree branch into the main branch + let merge = registry.get("merge").unwrap(); + let handler = merge.completion_handler.as_deref().unwrap(); + let ctx = BuiltinContext { spec_id, task_title: "", repo_path: repo.to_str().unwrap() }; + assert!(matches!(run_builtin(handler, &ctx), BuiltinResult::Success(_)), "merge phase failed"); + assert!(repo.join("output.txt").exists(), "merged file must appear in main repo"); + + // cleanup phase: removes worktree dir and deletes branch + let cleanup = registry.get("cleanup").unwrap(); + let handler = cleanup.completion_handler.as_deref().unwrap(); + let ctx = BuiltinContext { spec_id, task_title: "", repo_path: repo.to_str().unwrap() }; + assert!(matches!(run_builtin(handler, &ctx), BuiltinResult::Success(_)), "cleanup phase failed"); + assert!(!dest.exists(), "worktree must be removed after cleanup"); + } + + // --- bare flag tests --- + + #[test] + fn test_phase_bare_defaults_to_false() { + let toml_content = r#" +name = "critic" +description = "Test phase" + +[phase] +name = "critic" +level = "spec" +can_add_tasks = true +can_fail_spec = true +requires_claude = true + +[prompt] +template = "Review the spec." +"#; + let dir = test_utils::test_dir("bare-default"); + fs::write(dir.join("critic.phase.toml"), toml_content).unwrap(); + let mut registry = test_registry(); + registry.load_user_phases(&dir); + let phase = registry.get("critic").unwrap(); + assert!(!phase.bare, "bare should default to false when not set in TOML"); + let _ = fs::remove_dir_all(&dir); + } + + #[test] + fn test_phase_bare_true_parsed_from_toml() { + let toml_content = r#" +name = "spec-critique-bare" +description = "Bare phase for cold-start test" + +[phase] +name = "spec-critique-bare" +level = "spec" +can_add_tasks = false +can_fail_spec = false +requires_claude = true + +[worker] +bare = true +runtime = "claude" + +[prompt] +template = "Critique the spec." +"#; + let dir = test_utils::test_dir("bare-true"); + fs::write(dir.join("spec-critique-bare.phase.toml"), toml_content).unwrap(); + let mut registry = test_registry(); + registry.load_user_phases(&dir); + let phase = registry.get("spec-critique-bare").unwrap(); + assert!(phase.bare, "bare = true in [worker] section must be parsed"); + let _ = fs::remove_dir_all(&dir); + } + + mod pipeline_parse { + use super::*; + + fn toml_file(content: &str) -> std::path::PathBuf { + let path = test_utils::test_file("pipeline-parse", "toml"); + std::fs::write(&path, content).unwrap(); + path + } + + #[test] + fn test_v2_mode_all_fields() { + let path = toml_file(r#" +[mode.v2] +spec_pre_phases = ["spec-critique", "spec-improve"] +task_phases = ["execute", "review", "commit"] +spec_post_phases = ["doc-update", "critic", "merge", "cleanup"] +max_loops = 3 +"#); + let cfg = load_pipeline_from_file(&path, "v2").unwrap(); + assert_eq!(cfg.spec_pre_phases, vec!["spec-critique", "spec-improve"]); + assert_eq!(cfg.task_phases, vec!["execute", "review", "commit"]); + assert_eq!(cfg.spec_post_phases, vec!["doc-update", "critic", "merge", "cleanup"]); + assert_eq!(cfg.max_loops, 3); + } + + #[test] + fn test_backward_compat_spec_phases_becomes_spec_post() { + let path = toml_file(r#" +[mode.default] +spec_phases = ["critic"] +task_phases = ["execute", "task-verify"] +"#); + let cfg = load_pipeline_from_file(&path, "execute").unwrap(); + assert_eq!(cfg.spec_post_phases, vec!["critic"]); + assert!(cfg.spec_pre_phases.is_empty()); + } + + #[test] + fn test_max_loops_defaults_to_3() { + let path = toml_file(r#" +[mode.v2] +spec_pre_phases = ["spec-critique"] +task_phases = ["execute"] +spec_post_phases = ["critic"] +"#); + let cfg = load_pipeline_from_file(&path, "v2").unwrap(); + assert_eq!(cfg.max_loops, 3); + } + + #[test] + fn test_mode_v2_from_pipelines_toml() { + let pipelines_path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("phases") + .join("pipelines.toml"); + let cfg = load_pipeline_from_file(&pipelines_path, "v2") + .expect("mode.v2 must exist in phases/pipelines.toml"); + assert!(!cfg.spec_pre_phases.is_empty(), "spec_pre_phases must not be empty"); + assert!(!cfg.spec_post_phases.is_empty(), "spec_post_phases must not be empty"); + assert!(!cfg.task_phases.is_empty(), "task_phases must not be empty"); + assert!(cfg.spec_pre_phases.contains(&"spec-critique".to_string())); + assert!(cfg.spec_post_phases.contains(&"critic".to_string())); + } + + #[test] + fn test_pre_and_post_are_distinct() { + let path = toml_file(r#" +[mode.v2] +spec_pre_phases = ["spec-critique", "spec-improve"] +task_phases = ["execute", "review", "commit"] +spec_post_phases = ["doc-update", "critic", "merge", "cleanup"] +"#); + let cfg = load_pipeline_from_file(&path, "v2").unwrap(); + for pre in &cfg.spec_pre_phases { + assert!( + !cfg.spec_post_phases.contains(pre), + "phase '{pre}' must not appear in both pre and post" + ); + } + } + + #[test] + fn test_mode_not_found_returns_none() { + let path = toml_file(r#" +[mode.default] +spec_phases = ["critic"] +task_phases = ["execute"] +"#); + assert!(load_pipeline_from_file(&path, "nonexistent").is_none()); + } + } } diff --git a/src/prompt.rs b/src/prompt.rs index b20af99..2f4dcb1 100644 --- a/src/prompt.rs +++ b/src/prompt.rs @@ -1,16 +1,46 @@ use crate::spec; +use std::path::Path; -pub fn build_prompt(spec_content: &str, task: &spec::BoiTask) -> String { +const BRAIN_CHAR_LIMIT: usize = 32_000; + +/// Load CLAUDE.md from `path`, returning its content. +pub fn load_brain(path: &Path) -> Result { + let claude_md = path.join("CLAUDE.md"); + std::fs::read_to_string(&claude_md) + .map_err(|e| format!("failed to read {}: {}", claude_md.display(), e)) +} + +fn truncate_to_char_limit(s: &str, limit: usize) -> &str { + if s.len() <= limit { + return s; + } + // Truncate at a char boundary + match s.char_indices().nth(limit) { + Some((idx, _)) => &s[..idx], + None => s, + } +} + +pub fn build_prompt(spec_content: &str, task: &spec::BoiTask, brain: Option<&Path>) -> String { let task_spec = task.spec.as_deref().unwrap_or("(no spec provided)"); let task_verify = task.verify.as_deref().unwrap_or("(no verify command)"); + + let brain_section = brain + .and_then(|p| load_brain(p).ok()) + .map(|content| { + let truncated = truncate_to_char_limit(&content, BRAIN_CHAR_LIMIT).to_string(); + format!("## System Context\n\n{}\n\n", truncated) + }) + .unwrap_or_default(); + format!( - "You are a BOI worker. Execute exactly one task from this spec.\n\n\ + "{}You are a BOI worker. Execute exactly one task from this spec.\n\n\ FULL SPEC:\n{}\n\n\ YOUR TASK: {} — {}\n\n\ SPEC:\n{}\n\n\ VERIFY:\n{}\n\n\ Execute the task. Do NOT modify the spec file — status is tracked externally.", - spec_content, task.id, task.title, task_spec, task_verify + brain_section, spec_content, task.id, task.title, task_spec, task_verify ) } @@ -18,10 +48,11 @@ pub fn build_prompt(spec_content: &str, task: &spec::BoiTask) -> String { mod tests { use super::*; use crate::spec; + use std::fs; + use std::path::PathBuf; - #[test] - fn test_build_prompt_contains_task_fields() { - let task = spec::BoiTask { + fn make_task() -> spec::BoiTask { + spec::BoiTask { id: "t-1".to_string(), title: "Setup Cargo".to_string(), status: spec::TaskStatus::Pending, @@ -30,11 +61,69 @@ mod tests { verify: Some("test -f Cargo.toml".to_string()), verify_prompt: None, phases: None, - }; - let prompt = build_prompt("title: Test\ntasks: []", &task); + } + } + + fn tmp_brain_dir(name: &str) -> PathBuf { + let dir = std::env::temp_dir().join(format!("boi_brain_test_{}", name)); + fs::create_dir_all(&dir).unwrap(); + dir + } + + #[test] + fn test_build_prompt_contains_task_fields() { + let task = make_task(); + let prompt = build_prompt("title: Test\ntasks: []", &task, None); assert!(prompt.contains("t-1")); assert!(prompt.contains("Setup Cargo")); assert!(prompt.contains("Run cargo init")); assert!(prompt.contains("test -f Cargo.toml")); } + + #[test] + fn test_brain_inject_present_in_prompt() { + let dir = tmp_brain_dir("present"); + let brain_content = "# Project Rules\n\nDo not delete prod data."; + fs::write(dir.join("CLAUDE.md"), brain_content).unwrap(); + + let task = make_task(); + let prompt = build_prompt("title: Test\ntasks: []", &task, Some(&dir)); + + assert!(prompt.starts_with("## System Context\n\n")); + assert!(prompt.contains("Do not delete prod data.")); + assert!(prompt.contains("You are a BOI worker.")); + } + + #[test] + fn test_brain_inject_truncation_works() { + let dir = tmp_brain_dir("truncation"); + // 33_000 'a' chars — over the 32_000 limit + let long_content = "a".repeat(33_000); + fs::write(dir.join("CLAUDE.md"), &long_content).unwrap(); + + let task = make_task(); + let prompt = build_prompt("title: Test\ntasks: []", &task, Some(&dir)); + + // Brain section is present but content is truncated + assert!(prompt.starts_with("## System Context\n\n")); + let system_ctx_end = prompt.find("\n\nYou are a BOI worker.").unwrap(); + let brain_in_prompt = &prompt[..system_ctx_end]; + // Should be <= limit + small header overhead ("## System Context\n\n" = 20 chars) + assert!(brain_in_prompt.len() <= BRAIN_CHAR_LIMIT + 100); + } + + #[test] + fn test_brain_inject_missing_brain_fails_gracefully() { + let dir = tmp_brain_dir("missing"); + // No CLAUDE.md written — directory exists but file is absent + + // load_brain should return an error + assert!(load_brain(&dir).is_err()); + + // build_prompt should still produce a valid prompt without a brain section + let task = make_task(); + let prompt = build_prompt("title: Test\ntasks: []", &task, Some(&dir)); + assert!(!prompt.starts_with("## System Context")); + assert!(prompt.contains("You are a BOI worker.")); + } } diff --git a/src/queue.rs b/src/queue.rs index f5f872a..3b112ef 100644 --- a/src/queue.rs +++ b/src/queue.rs @@ -29,6 +29,8 @@ pub struct SpecRecord { pub worker_timeout_seconds: Option, pub context: Option, pub workspace: Option, + /// Number of completed critique↔improve loop cycles for this spec. + pub phase_loop_count: i64, } #[derive(Debug)] @@ -267,6 +269,7 @@ impl Queue { Self::ensure_column(&conn, "specs", "worker_timeout_seconds", "INTEGER"); Self::ensure_column(&conn, "specs", "context", "TEXT"); Self::ensure_column(&conn, "specs", "workspace", "TEXT"); + Self::ensure_column(&conn, "specs", "phase_loop_count", "INTEGER DEFAULT 0"); Self::ensure_column(&conn, "tasks", "spec_content", "TEXT"); Self::ensure_column(&conn, "tasks", "verify_content", "TEXT"); @@ -377,7 +380,7 @@ impl Queue { completed_tasks, priority, depends_on, queued_at, started_at, completed_at, worker_id, error, max_iterations, iteration, project, phase, worker_timeout_seconds, - context, workspace + context, workspace, phase_loop_count FROM specs WHERE id = ?1", )?; stmt.query_row(params![id], row_to_spec)? @@ -443,6 +446,29 @@ impl Queue { Ok(()) } + /// Mark a spec as failed, writing the error column with a typed FailureReason. + /// Prefer this over `update_spec(id, "failed")` — it guarantees error is non-NULL. + pub fn fail_spec(&self, spec_id: &str, reason: &crate::failure::FailureReason) -> Result<()> { + let now = Utc::now().to_rfc3339(); + let error_json = reason.to_json(); + self.conn.execute( + "UPDATE specs SET status = 'failed', completed_at = ?1, error = ?2 WHERE id = ?3", + params![now, error_json, spec_id], + )?; + Ok(()) + } + + /// Mark a task as failed, writing the error column with a typed FailureReason. + /// Prefer this over `update_task(id, "FAILED")` — it guarantees error is non-NULL. + pub fn fail_task(&self, spec_id: &str, task_id: &str, reason: &crate::failure::FailureReason) -> Result<()> { + let error_json = reason.to_json(); + self.conn.execute( + "UPDATE tasks SET status = 'FAILED', error = ?1 WHERE spec_id = ?2 AND id = ?3", + params![error_json, spec_id, task_id], + )?; + Ok(()) + } + pub fn status(&self, spec_id: &str) -> Result> { let spec = match self.conn.query_row( "SELECT id, title, mode, status, spec_path, @@ -450,7 +476,7 @@ impl Queue { completed_tasks, priority, depends_on, queued_at, started_at, completed_at, worker_id, error, max_iterations, iteration, project, phase, worker_timeout_seconds, - context, workspace + context, workspace, phase_loop_count FROM specs WHERE id = ?1", params![spec_id], row_to_spec, @@ -479,7 +505,7 @@ impl Queue { completed_tasks, priority, depends_on, queued_at, started_at, completed_at, worker_id, error, max_iterations, iteration, project, phase, worker_timeout_seconds, - context, workspace + context, workspace, phase_loop_count FROM specs ORDER BY CASE status WHEN 'running' THEN 0 WHEN 'queued' THEN 1 ELSE 2 END, @@ -874,6 +900,55 @@ impl Queue { } } + /// Increment the critique↔improve loop counter for a spec. + /// Returns the new count after incrementing. + pub fn increment_phase_loop_count(&self, spec_id: &str) -> Result { + self.conn.execute( + "UPDATE specs SET phase_loop_count = phase_loop_count + 1 WHERE id = ?1", + params![spec_id], + )?; + let count: i64 = self.conn.query_row( + "SELECT phase_loop_count FROM specs WHERE id = ?1", + params![spec_id], + |row| row.get(0), + )?; + Ok(count) + } + + /// Get the current critique↔improve loop counter for a spec. + pub fn get_phase_loop_count(&self, spec_id: &str) -> Result { + let count: i64 = self.conn.query_row( + "SELECT COALESCE(phase_loop_count, 0) FROM specs WHERE id = ?1", + params![spec_id], + |row| row.get(0), + )?; + Ok(count) + } + + /// Reset the critique↔improve loop counter to zero. + pub fn reset_phase_loop_count(&self, spec_id: &str) -> Result<()> { + self.conn.execute( + "UPDATE specs SET phase_loop_count = 0 WHERE id = ?1", + params![spec_id], + )?; + Ok(()) + } + + /// Returns true if the critique↔improve loop has reached or exceeded the cap. + /// Logs a warning when the cap is hit so the pipeline can proceed to task execution. + pub fn phase_loop_capped(&self, spec_id: &str, max_loops: i64) -> bool { + let count = self.get_phase_loop_count(spec_id).unwrap_or(0); + if count >= max_loops { + eprintln!( + "[boi] WARN: spec {} reached max critique/improve loops ({}/{}); proceeding to task execution", + spec_id, count, max_loops + ); + true + } else { + false + } + } + /// Get the last updated timestamp across all specs (for heartbeat detection) pub fn last_spec_update(&self) -> Result> { let result: Option = self @@ -912,6 +987,7 @@ fn row_to_spec(row: &rusqlite::Row<'_>) -> rusqlite::Result { worker_timeout_seconds: row.get(18)?, context: row.get(19)?, workspace: row.get(20)?, + phase_loop_count: row.get::<_, Option>(21)?.unwrap_or(0), }) } @@ -943,6 +1019,7 @@ mod tests { outcomes: None, spec_phases: None, task_phases: None, + brain: None, tasks, } } @@ -1213,4 +1290,197 @@ mod tests { let dequeued2 = q.dequeue().unwrap().unwrap(); assert_eq!(dequeued2.id, id2); } + + // --- spec_improve: loop cap enforcement --- + + #[test] + fn test_spec_improve_loop_count_starts_at_zero() { + let q = open_mem(); + let spec = make_spec("S", vec![make_task("t-1", "T")]); + let id = q.enqueue(&spec, None).unwrap(); + assert_eq!(q.get_phase_loop_count(&id).unwrap(), 0); + } + + #[test] + fn test_spec_improve_loop_count_increments() { + let q = open_mem(); + let spec = make_spec("S", vec![make_task("t-1", "T")]); + let id = q.enqueue(&spec, None).unwrap(); + + let after_first = q.increment_phase_loop_count(&id).unwrap(); + assert_eq!(after_first, 1); + + let after_second = q.increment_phase_loop_count(&id).unwrap(); + assert_eq!(after_second, 2); + } + + #[test] + fn test_spec_improve_loop_count_resets() { + let q = open_mem(); + let spec = make_spec("S", vec![make_task("t-1", "T")]); + let id = q.enqueue(&spec, None).unwrap(); + + q.increment_phase_loop_count(&id).unwrap(); + q.increment_phase_loop_count(&id).unwrap(); + q.reset_phase_loop_count(&id).unwrap(); + + assert_eq!(q.get_phase_loop_count(&id).unwrap(), 0); + } + + #[test] + fn test_spec_improve_loop_not_capped_below_max() { + let q = open_mem(); + let spec = make_spec("S", vec![make_task("t-1", "T")]); + let id = q.enqueue(&spec, None).unwrap(); + + q.increment_phase_loop_count(&id).unwrap(); + q.increment_phase_loop_count(&id).unwrap(); + + assert!(!q.phase_loop_capped(&id, 3), "count=2 should not be capped at max=3"); + } + + #[test] + fn test_spec_improve_loop_capped_at_max() { + let q = open_mem(); + let spec = make_spec("S", vec![make_task("t-1", "T")]); + let id = q.enqueue(&spec, None).unwrap(); + + for _ in 0..3 { + q.increment_phase_loop_count(&id).unwrap(); + } + + assert!(q.phase_loop_capped(&id, 3), "count=3 must be capped at max=3"); + } + + #[test] + fn test_spec_improve_loop_cap_configurable() { + let q = open_mem(); + let spec = make_spec("S", vec![make_task("t-1", "T")]); + let id = q.enqueue(&spec, None).unwrap(); + + q.increment_phase_loop_count(&id).unwrap(); + + // max_loops=1: capped after 1 iteration + assert!(q.phase_loop_capped(&id, 1)); + // max_loops=5: not capped yet + assert!(!q.phase_loop_capped(&id, 5)); + } + + #[test] + fn test_spec_improve_loop_count_in_spec_record() { + let q = open_mem(); + let spec = make_spec("S", vec![make_task("t-1", "T")]); + let id = q.enqueue(&spec, None).unwrap(); + + q.increment_phase_loop_count(&id).unwrap(); + q.increment_phase_loop_count(&id).unwrap(); + + let st = q.status(&id).unwrap().unwrap(); + assert_eq!(st.spec.phase_loop_count, 2, "phase_loop_count must be readable from SpecRecord"); + } + + // --- failure_capture tests (matched by `cargo test --lib failure_capture`) --- + + #[test] + fn failure_capture_fail_spec_sets_error_column() { + use crate::failure::FailureReason; + let q = open_mem(); + let spec = make_spec("S", vec![make_task("t-1", "T")]); + let id = q.enqueue(&spec, None).unwrap(); + + let reason = FailureReason::ToolError { + phase: "execute".to_string(), + message: "test error".to_string(), + }; + q.fail_spec(&id, &reason).unwrap(); + + let st = q.status(&id).unwrap().unwrap(); + assert_eq!(st.spec.status, "failed"); + let error = st.spec.error.expect("error must be non-NULL after fail_spec"); + assert!(error.contains("ToolError"), "error should be JSON FailureReason, got: {}", error); + assert!(error.contains("test error"), "error should contain message, got: {}", error); + } + + #[test] + fn failure_capture_fail_task_sets_error_column() { + use crate::failure::FailureReason; + let q = open_mem(); + let spec = make_spec("S", vec![make_task("t-1", "T")]); + let id = q.enqueue(&spec, None).unwrap(); + let st = q.status(&id).unwrap().unwrap(); + let task_id = st.tasks[0].id.clone(); + + let reason = FailureReason::VerifyFailed { + task: "t-1".to_string(), + exit_code: 1, + stderr_excerpt: "assertion failed".to_string(), + }; + q.fail_task(&id, &task_id, &reason).unwrap(); + + let st = q.status(&id).unwrap().unwrap(); + let task = st.tasks.iter().find(|t| t.id == task_id).unwrap(); + assert_eq!(task.status, "FAILED"); + let error = task.error.as_ref().expect("task error must be non-NULL after fail_task"); + assert!(error.contains("VerifyFailed"), "error should be JSON FailureReason, got: {}", error); + assert!(error.contains("assertion failed"), "error should contain stderr_excerpt, got: {}", error); + } + + #[test] + fn failure_capture_fail_spec_error_roundtrips_as_failure_reason() { + use crate::failure::FailureReason; + let q = open_mem(); + let spec = make_spec("S", vec![make_task("t-1", "T")]); + let id = q.enqueue(&spec, None).unwrap(); + + let reason = FailureReason::ProviderRateLimit { + provider: "anthropic".to_string(), + retry_after_s: Some(60), + }; + q.fail_spec(&id, &reason).unwrap(); + + let st = q.status(&id).unwrap().unwrap(); + let error_str = st.spec.error.unwrap(); + let parsed = FailureReason::from_db(&error_str); + assert_eq!(reason.to_json(), parsed.to_json(), "FailureReason should round-trip through DB"); + } + + #[test] + fn failure_capture_fail_spec_no_null_error() { + use crate::failure::FailureReason; + let q = open_mem(); + let spec = make_spec("S", vec![make_task("t-1", "T")]); + let id = q.enqueue(&spec, None).unwrap(); + + // Initially error is NULL + let st = q.status(&id).unwrap().unwrap(); + assert!(st.spec.error.is_none(), "error should start as NULL"); + + // After fail_spec, error must be non-NULL + q.fail_spec(&id, &FailureReason::Other { message: "oops".to_string() }).unwrap(); + let st = q.status(&id).unwrap().unwrap(); + assert!(st.spec.error.is_some(), "error must be non-NULL after fail_spec"); + } + + #[test] + fn failure_capture_fail_task_no_null_error() { + use crate::failure::FailureReason; + let q = open_mem(); + let spec = make_spec("S", vec![make_task("t-1", "T")]); + let id = q.enqueue(&spec, None).unwrap(); + let st = q.status(&id).unwrap().unwrap(); + let task_id = st.tasks[0].id.clone(); + + // Initially task error is NULL + let task = st.tasks.iter().find(|t| t.id == task_id).unwrap(); + assert!(task.error.is_none(), "task error should start as NULL"); + + // After fail_task, error must be non-NULL + q.fail_task(&id, &task_id, &FailureReason::Timeout { + phase: "execute".to_string(), + secs: 1800, + }).unwrap(); + let st = q.status(&id).unwrap().unwrap(); + let task = st.tasks.iter().find(|t| t.id == task_id).unwrap(); + assert!(task.error.is_some(), "task error must be non-NULL after fail_task"); + } } diff --git a/src/runner.rs b/src/runner.rs index 5685084..fc0b6bb 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -1,9 +1,12 @@ +use crate::builtins::{self, BuiltinContext}; use crate::phases::{PhaseConfig, Verdict}; +use crate::runtime::{ClaudeCLI, PhaseRuntime, RuntimeError}; +use crate::runtime::openrouter::OpenRouterRuntime; use crate::spec::BoiTask; use crate::telemetry::{LogLevel, Telemetry}; use crate::worker; use serde_json::json; -use std::time::Instant; +use std::time::{Duration, Instant}; /// Trait for running a single phase. Allows mocking in tests. #[allow(clippy::too_many_arguments)] @@ -36,11 +39,15 @@ pub trait PhaseRunner: Send + Sync { } } -/// Production phase runner that spawns claude for requires_claude phases -/// and runs verify commands for non-claude phases. +/// Production phase runner that spawns claude for requires_claude phases, +/// runs verify commands for non-claude phases, and dispatches deterministic +/// builtin handlers without any Claude cold-start. pub struct ClaudePhaseRunner { pub telemetry: Telemetry, pub claude_bin: String, + /// Source repo path for deterministic builtins that need to merge/cleanup. + /// Empty string disables merge/cleanup builtins. + pub repo_path: String, } impl ClaudePhaseRunner { @@ -48,8 +55,14 @@ impl ClaudePhaseRunner { ClaudePhaseRunner { telemetry, claude_bin, + repo_path: String::new(), } } + + pub fn with_repo_path(mut self, repo_path: impl Into) -> Self { + self.repo_path = repo_path.into(); + self + } } impl ClaudePhaseRunner { @@ -65,6 +78,18 @@ impl ClaudePhaseRunner { spec_id: Option<&str>, vars: &std::collections::HashMap, ) -> (Verdict, String) { + // Deterministic phases: skip Claude entirely, run a registered builtin handler. + if phase.runtime.as_deref() == Some("deterministic") { + return (self.run_deterministic_phase(phase, task, spec_id), String::new()); + } + + // OpenRouter phases: send prompt via HTTP unless BOI_FORCE_CLAUDE=1 is set. + if phase.runtime.as_deref() == Some("openrouter") + && std::env::var("BOI_FORCE_CLAUDE").as_deref() != Ok("1") + { + return self.run_openrouter_phase(phase, spec_content, task, timeout_secs, spec_id, vars); + } + if !phase.requires_claude { return (self.run_verify_phase(phase, task, worktree_path, timeout_secs, spec_id), String::new()); } @@ -96,80 +121,57 @@ impl ClaudePhaseRunner { }), ); - let result = worker::spawn_claude( - &prompt, - worktree_path, - timeout_secs, - phase.model.as_deref(), - spec_id, - &self.claude_bin, - ); - - if let Ok(ref cr) = result { - let startup_s = cr.startup_ms as f64 / 1000.0; - self.telemetry.emit( - "boi.claude.first_output", - LogLevel::Debug, - &json!({ - "spec_id": spec_id_hint, - "task_id": task_id, - "startup_ms": cr.startup_ms, - "message": format!("first output after {:.1}s (startup)", startup_s), - }), - ); - } + let model_str = phase.model.as_deref().unwrap_or(""); + let rt = ClaudeCLI { + claude_bin: self.claude_bin.clone(), + worktree_path: worktree_path.to_string(), + spec_id: spec_id.map(|s| s.to_string()), + bare: phase.bare, + }; + let result = rt.execute(&prompt, model_str, Duration::from_secs(timeout_secs)); match result { - Ok(ref cr) if cr.success => { - let inference_s = cr.inference_ms as f64 / 1000.0; - let total_s = cr.total_ms as f64 / 1000.0; + Ok(ro) => { + let total_s = ro.duration_ms as f64 / 1000.0; self.telemetry.emit("boi.claude.exit", LogLevel::Debug, &json!({ "spec_id": spec_id_hint, "task_id": task_id, "phase": phase.name, "exit_code": 0, - "output_length": cr.output.len(), - "stderr_length": cr.stderr.len(), - "stderr_preview": cr.stderr.chars().take(500).collect::(), - "startup_ms": cr.startup_ms, - "inference_ms": cr.inference_ms, - "total_ms": cr.total_ms, - "message": format!("claude exit 0, {} chars ({:.1}s inference, {:.1}s total)", - cr.output.len(), inference_s, total_s), + "output_length": ro.text.len(), + "total_ms": ro.duration_ms, + "message": format!("claude exit 0, {} chars ({:.1}s total)", ro.text.len(), total_s), })); - let verdict = crate::phases::parse_phase_output(phase, &cr.output); - (verdict, cr.output.clone()) + let verdict = crate::phases::parse_phase_output(phase, &ro.text); + (verdict, ro.text) } - Ok(ref cr) => { - let inference_s = cr.inference_ms as f64 / 1000.0; - let total_s = cr.total_ms as f64 / 1000.0; + Err(RuntimeError::Timeout) => { self.telemetry.emit("boi.claude.exit", LogLevel::Error, &json!({ "spec_id": spec_id_hint, "task_id": task_id, "phase": phase.name, "exit_code": 1, - "output_length": cr.output.len(), - "stderr_length": cr.stderr.len(), - "stderr_preview": cr.stderr.chars().take(500).collect::(), - "startup_ms": cr.startup_ms, - "inference_ms": cr.inference_ms, - "total_ms": cr.total_ms, - "message": format!("claude exit non-zero, {} chars ({:.1}s inference, {:.1}s total){}", - cr.output.len(), inference_s, total_s, - if cr.stderr.is_empty() { String::new() } else { - format!("\n stderr: {}", cr.stderr.chars().take(200).collect::()) - }), + "message": "claude timeout", + })); + (Verdict::Done { success: false, reason: "timeout".into() }, "timeout".to_string()) + } + Err(RuntimeError::NonZeroExit(output)) => { + self.telemetry.emit("boi.claude.exit", LogLevel::Error, &json!({ + "spec_id": spec_id_hint, + "task_id": task_id, + "phase": phase.name, + "exit_code": 1, + "output_length": output.len(), + "message": format!("claude exit non-zero, {} chars", output.len()), })); - let verdict = if cr.output == "timeout" { - Verdict::Done { success: false, reason: "timeout".into() } - } else if phase.on_crash.as_deref() == Some("retry") { + let verdict = if phase.on_crash.as_deref() == Some("retry") { Verdict::Done { success: false, reason: format!("Phase {} claude exited non-zero", phase.name) } } else { - Verdict::Done { success: false, reason: format!("Phase {} failed: {}", phase.name, cr.output) } + Verdict::Done { success: false, reason: format!("Phase {} failed: {}", phase.name, output) } }; - (verdict, cr.output.clone()) + (verdict, output) } - Err(e) => { + Err(RuntimeError::SpawnError(e)) => { self.telemetry.emit( "boi.claude.error", LogLevel::Error, @@ -184,6 +186,96 @@ impl ClaudePhaseRunner { } } } + + #[allow(clippy::too_many_arguments)] + fn run_openrouter_phase( + &self, + phase: &PhaseConfig, + spec_content: &str, + task: Option<&BoiTask>, + timeout_secs: u64, + spec_id: Option<&str>, + vars: &std::collections::HashMap, + ) -> (Verdict, String) { + let task_context = task.map(|t| { + format!( + "Task: {} — {}\nSpec: {}\nVerify: {}", + t.id, + t.title, + t.spec.as_deref().unwrap_or("(none)"), + t.verify.as_deref().unwrap_or("(none)") + ) + }); + let prompt = + crate::phases::build_phase_prompt(phase, spec_content, task_context.as_deref(), vars); + + let model = phase.model.as_deref().unwrap_or("gemini-flash"); + let api_key_env = phase.api_key_env.as_deref().unwrap_or("OPENROUTER_API_KEY"); + let mut rt = OpenRouterRuntime::new(); + rt.api_key_env = api_key_env.to_string(); + + let spec_id_hint = spec_id.unwrap_or(""); + let task_id = task.map(|t| t.id.as_str()); + + self.telemetry.emit( + "boi.openrouter.spawn", + crate::telemetry::LogLevel::Debug, + &json!({ + "spec_id": spec_id_hint, + "task_id": task_id, + "phase": phase.name, + "model": model, + "message": "sending prompt to openrouter...", + }), + ); + + let result = rt.execute(&prompt, model, Duration::from_secs(timeout_secs)); + + match result { + Ok(ro) => { + let total_s = ro.duration_ms as f64 / 1000.0; + self.telemetry.emit("boi.openrouter.exit", crate::telemetry::LogLevel::Debug, &json!({ + "spec_id": spec_id_hint, + "task_id": task_id, + "phase": phase.name, + "model": model, + "output_length": ro.text.len(), + "total_ms": ro.duration_ms, + "cost_usd": ro.cost_usd, + "message": format!("openrouter ok, {} chars ({:.1}s)", ro.text.len(), total_s), + })); + let verdict = crate::phases::parse_phase_output(phase, &ro.text); + (verdict, ro.text) + } + Err(RuntimeError::Timeout) => { + self.telemetry.emit("boi.openrouter.exit", crate::telemetry::LogLevel::Error, &json!({ + "spec_id": spec_id_hint, + "task_id": task_id, + "phase": phase.name, + "message": "openrouter timeout", + })); + (Verdict::Done { success: false, reason: "openrouter timeout".into() }, "timeout".to_string()) + } + Err(RuntimeError::NonZeroExit(output)) => { + self.telemetry.emit("boi.openrouter.exit", crate::telemetry::LogLevel::Error, &json!({ + "spec_id": spec_id_hint, + "task_id": task_id, + "phase": phase.name, + "message": format!("openrouter error: {}", output), + })); + (Verdict::Done { success: false, reason: format!("openrouter phase {} failed: {}", phase.name, output) }, output) + } + Err(RuntimeError::SpawnError(e)) => { + self.telemetry.emit("boi.openrouter.error", crate::telemetry::LogLevel::Error, &json!({ + "spec_id": spec_id_hint, + "task_id": task_id, + "phase": phase.name, + "message": format!("openrouter error: {}", e), + })); + (Verdict::Done { success: false, reason: format!("openrouter phase {} error: {}", phase.name, e) }, String::new()) + } + } + } } impl PhaseRunner for ClaudePhaseRunner { @@ -215,6 +307,66 @@ impl PhaseRunner for ClaudePhaseRunner { } impl ClaudePhaseRunner { + fn run_deterministic_phase( + &self, + phase: &PhaseConfig, + task: Option<&BoiTask>, + spec_id: Option<&str>, + ) -> Verdict { + let handler = match phase.completion_handler.as_deref() { + Some(h) => h, + None => { + self.telemetry.emit( + "boi.builtin.error", + LogLevel::Error, + &json!({ + "phase": phase.name, + "message": "deterministic phase has no completion_handler", + }), + ); + return Verdict::Done { + success: false, + reason: format!("phase '{}' is deterministic but has no completion_handler", phase.name), + }; + } + }; + + let sid = spec_id.unwrap_or(""); + let task_title = task.map(|t| t.title.as_str()).unwrap_or(""); + + let ctx = BuiltinContext { + spec_id: sid, + task_title, + repo_path: &self.repo_path, + }; + + self.telemetry.emit( + "boi.builtin.run", + LogLevel::Debug, + &json!({ + "phase": phase.name, + "handler": handler, + "spec_id": sid, + "message": format!("running builtin {}", handler), + }), + ); + + let result = builtins::run_builtin(handler, &ctx); + + self.telemetry.emit( + "boi.builtin.result", + LogLevel::Debug, + &json!({ + "phase": phase.name, + "handler": handler, + "spec_id": sid, + "result": format!("{:?}", result), + }), + ); + + result.to_verdict() + } + fn run_verify_phase( &self, _phase: &PhaseConfig, @@ -282,48 +434,54 @@ impl ClaudePhaseRunner { "message": format!("verify_prompt: spawning claude ({} chars)", verify_prompt.len()), })); - let result = worker::spawn_claude( - verify_prompt, - worktree_path, - timeout_secs, - None, - spec_id, - &self.claude_bin, - ); + let rt = ClaudeCLI { + claude_bin: self.claude_bin.clone(), + worktree_path: worktree_path.to_string(), + spec_id: spec_id.map(|s| s.to_string()), + bare: false, + }; + let result = rt.execute(verify_prompt, "", Duration::from_secs(timeout_secs)); match result { - Ok(ref cr) if cr.success => { + Ok(ro) => { self.telemetry.emit( "boi.verify_prompt.result", LogLevel::Debug, &json!({ "task_id": task.id, "passed": true, - "output_length": cr.output.len(), - "startup_ms": cr.startup_ms, - "inference_ms": cr.inference_ms, - "total_ms": cr.total_ms, - "message": format!("verify_prompt passed ({}ms)", cr.total_ms), + "output_length": ro.text.len(), + "total_ms": ro.duration_ms, + "message": format!("verify_prompt passed ({}ms)", ro.duration_ms), + }), + ); + } + Err(RuntimeError::Timeout) => { + self.telemetry.emit( + "boi.verify_prompt.result", + LogLevel::Debug, + &json!({ + "task_id": task.id, + "passed": false, + "message": "verify_prompt timeout", }), ); + return Verdict::Redo { tasks: vec![] }; } - Ok(ref cr) => { + Err(RuntimeError::NonZeroExit(output)) => { self.telemetry.emit( "boi.verify_prompt.result", LogLevel::Debug, &json!({ "task_id": task.id, "passed": false, - "output_length": cr.output.len(), - "startup_ms": cr.startup_ms, - "inference_ms": cr.inference_ms, - "total_ms": cr.total_ms, - "message": format!("verify_prompt failed ({}ms)", cr.total_ms), + "output_length": output.len(), + "message": format!("verify_prompt failed ({} chars)", output.len()), }), ); return Verdict::Redo { tasks: vec![] }; } - Err(e) => { + Err(RuntimeError::SpawnError(e)) => { self.telemetry.emit( "boi.verify_prompt.error", LogLevel::Error, @@ -402,6 +560,9 @@ mod tests { can_add_tasks: false, can_fail_spec: false, requires_claude, + runtime: None, + api_key_env: None, + completion_handler: None, approve_signal: Some("## Approved".into()), reject_signal: Some("[REJECT]".into()), on_approve: Some("next".into()), @@ -413,6 +574,7 @@ mod tests { effort: None, hooks_pre: vec![], hooks_post: vec![], + bare: false, } } diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs new file mode 100644 index 0000000..fb79012 --- /dev/null +++ b/src/runtime/mod.rs @@ -0,0 +1,188 @@ +pub mod openrouter; + +use std::time::Duration; + +#[derive(Debug)] +pub enum RuntimeError { + Timeout, + /// Process exited non-zero; contains stdout output (may be empty). + NonZeroExit(String), + SpawnError(String), +} + +impl std::fmt::Display for RuntimeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + RuntimeError::Timeout => write!(f, "timeout"), + RuntimeError::NonZeroExit(s) => write!(f, "non-zero exit: {}", s), + RuntimeError::SpawnError(s) => write!(f, "spawn error: {}", s), + } + } +} + +impl std::error::Error for RuntimeError {} + +#[derive(Debug)] +pub struct RuntimeOutput { + pub text: String, + pub cost_usd: Option, + pub input_tokens: Option, + pub output_tokens: Option, + pub duration_ms: u64, +} + +pub trait PhaseRuntime: Send + Sync { + fn execute( + &self, + prompt: &str, + model: &str, + timeout: Duration, + ) -> Result; +} + +/// Wraps the Claude CLI spawning logic as a PhaseRuntime. +/// `model` is passed to `--model`; empty string means use Claude's default. +pub struct ClaudeCLI { + pub claude_bin: String, + pub worktree_path: String, + pub spec_id: Option, + pub bare: bool, +} + +impl PhaseRuntime for ClaudeCLI { + fn execute( + &self, + prompt: &str, + model: &str, + timeout: Duration, + ) -> Result { + let timeout_secs = timeout.as_secs().max(1); + let model_opt = if model.is_empty() { None } else { Some(model) }; + + let cr = crate::spawn::spawn_claude( + prompt, + &self.worktree_path, + timeout_secs, + model_opt, + self.spec_id.as_deref(), + &self.claude_bin, + self.bare, + ) + .map_err(|e| RuntimeError::SpawnError(e.to_string()))?; + + if !cr.success { + if cr.output == "timeout" { + return Err(RuntimeError::Timeout); + } + return Err(RuntimeError::NonZeroExit(cr.output)); + } + + Ok(RuntimeOutput { + text: cr.output, + cost_usd: None, + input_tokens: None, + output_tokens: None, + duration_ms: cr.total_ms, + }) + } +} + +#[cfg(test)] +mod runtime_trait { + use super::*; + + struct EchoRuntime; + + impl PhaseRuntime for EchoRuntime { + fn execute( + &self, + prompt: &str, + _model: &str, + _timeout: Duration, + ) -> Result { + Ok(RuntimeOutput { + text: prompt.to_string(), + cost_usd: None, + input_tokens: None, + output_tokens: None, + duration_ms: 0, + }) + } + } + + struct FailRuntime { + error: fn() -> RuntimeError, + } + + impl PhaseRuntime for FailRuntime { + fn execute( + &self, + _prompt: &str, + _model: &str, + _timeout: Duration, + ) -> Result { + Err((self.error)()) + } + } + + #[test] + fn trait_object_dispatch() { + let rt: Box = Box::new(EchoRuntime); + let out = rt.execute("hello world", "model-x", Duration::from_secs(10)).unwrap(); + assert_eq!(out.text, "hello world"); + assert_eq!(out.duration_ms, 0); + assert!(out.cost_usd.is_none()); + assert!(out.input_tokens.is_none()); + assert!(out.output_tokens.is_none()); + } + + #[test] + fn output_fields_accessible() { + let out = RuntimeOutput { + text: "response text".to_string(), + cost_usd: Some(0.001), + input_tokens: Some(100), + output_tokens: Some(50), + duration_ms: 1234, + }; + assert_eq!(out.text, "response text"); + assert_eq!(out.cost_usd, Some(0.001)); + assert_eq!(out.input_tokens, Some(100)); + assert_eq!(out.output_tokens, Some(50)); + assert_eq!(out.duration_ms, 1234); + } + + #[test] + fn timeout_error_display() { + let err = RuntimeError::Timeout; + assert_eq!(err.to_string(), "timeout"); + } + + #[test] + fn non_zero_exit_error_display() { + let err = RuntimeError::NonZeroExit("bad output".to_string()); + assert!(err.to_string().contains("non-zero exit")); + assert!(err.to_string().contains("bad output")); + } + + #[test] + fn spawn_error_display() { + let err = RuntimeError::SpawnError("no such binary".to_string()); + assert!(err.to_string().contains("spawn error")); + } + + #[test] + fn fail_runtime_returns_timeout() { + let rt: Box = Box::new(FailRuntime { error: || RuntimeError::Timeout }); + let err = rt.execute("prompt", "model", Duration::from_secs(5)).unwrap_err(); + assert!(matches!(err, RuntimeError::Timeout)); + } + + #[test] + fn fail_runtime_returns_non_zero_exit() { + let rt: Box = + Box::new(FailRuntime { error: || RuntimeError::NonZeroExit("crash".to_string()) }); + let err = rt.execute("prompt", "model", Duration::from_secs(5)).unwrap_err(); + assert!(matches!(err, RuntimeError::NonZeroExit(_))); + } +} diff --git a/src/runtime/openrouter.rs b/src/runtime/openrouter.rs new file mode 100644 index 0000000..9953e59 --- /dev/null +++ b/src/runtime/openrouter.rs @@ -0,0 +1,308 @@ +use std::time::Duration; + +use crate::runtime::{PhaseRuntime, RuntimeError, RuntimeOutput}; + +/// Map shorthand aliases to canonical OpenRouter model IDs. +pub(crate) fn resolve_model(model: &str) -> &str { + match model { + "gemini-flash" => "google/gemini-2.0-flash-001", + "grok" => "x-ai/grok-beta", + "qwen-coder" => "qwen/qwen-2.5-coder-32b-instruct", + "haiku" => "anthropic/claude-haiku-4-5", + other => other, + } +} + +/// Injectable HTTP layer so unit tests never hit the network. +pub(crate) trait HttpPost: Send + Sync { + fn post_json( + &self, + url: &str, + api_key: &str, + body: &str, + timeout: Duration, + ) -> Result; +} + +/// OpenRouter runtime that sends prompts as chat completion requests. +pub struct OpenRouterRuntime { + /// Name of the environment variable that holds the API key. + pub api_key_env: String, + http: Box, +} + +impl OpenRouterRuntime { + pub fn new() -> Self { + Self { + api_key_env: "OPENROUTER_API_KEY".to_string(), + http: Box::new(ReqwestHttpPost), + } + } + + pub(crate) fn with_http(http: Box, api_key_env: &str) -> Self { + Self { api_key_env: api_key_env.to_string(), http } + } +} + +impl Default for OpenRouterRuntime { + fn default() -> Self { + Self::new() + } +} + +struct ReqwestHttpPost; + +impl HttpPost for ReqwestHttpPost { + fn post_json( + &self, + url: &str, + api_key: &str, + body: &str, + timeout: Duration, + ) -> Result { + let client = reqwest::blocking::Client::builder() + .timeout(timeout) + .build() + .map_err(|e| RuntimeError::SpawnError(format!("reqwest client build failed: {e}")))?; + + let resp = client + .post(url) + .header("Authorization", format!("Bearer {api_key}")) + .header("Content-Type", "application/json") + .body(body.to_owned()) + .send() + .map_err(|e| { + if e.is_timeout() { + RuntimeError::Timeout + } else { + RuntimeError::SpawnError(format!("HTTP request failed: {e}")) + } + })?; + + let status = resp.status(); + let text = resp + .text() + .map_err(|e| RuntimeError::SpawnError(format!("response body read failed: {e}")))?; + + if !status.is_success() { + return Err(RuntimeError::NonZeroExit(format!( + "OpenRouter returned HTTP {status}: {text}" + ))); + } + + Ok(text) + } +} + +impl PhaseRuntime for OpenRouterRuntime { + fn execute( + &self, + prompt: &str, + model: &str, + timeout: Duration, + ) -> Result { + let api_key = std::env::var(&self.api_key_env).map_err(|_| { + RuntimeError::SpawnError(format!( + "env var {} is not set — cannot call OpenRouter", + self.api_key_env + )) + })?; + + let resolved = resolve_model(model); + + let body = serde_json::json!({ + "model": resolved, + "messages": [{"role": "user", "content": prompt}] + }) + .to_string(); + + let start = std::time::Instant::now(); + + let raw = self.http.post_json( + "https://openrouter.ai/api/v1/chat/completions", + &api_key, + &body, + timeout, + )?; + + let duration_ms = start.elapsed().as_millis() as u64; + + let response: serde_json::Value = serde_json::from_str(&raw).map_err(|e| { + RuntimeError::SpawnError(format!("OpenRouter JSON parse failed: {e}: {raw}")) + })?; + + let text = response["choices"][0]["message"]["content"] + .as_str() + .ok_or_else(|| { + RuntimeError::SpawnError(format!( + "OpenRouter response missing choices[0].message.content: {raw}" + )) + })? + .to_owned(); + + let usage = &response["usage"]; + let input_tokens = usage["prompt_tokens"].as_u64(); + let output_tokens = usage["completion_tokens"].as_u64(); + let cost_usd = usage["cost"].as_f64(); + + Ok(RuntimeOutput { text, cost_usd, input_tokens, output_tokens, duration_ms }) + } +} + +#[cfg(test)] +mod openrouter { + use super::*; + + struct MockHttp { + response: String, + } + + impl HttpPost for MockHttp { + fn post_json( + &self, + _url: &str, + _api_key: &str, + _body: &str, + _timeout: Duration, + ) -> Result { + Ok(self.response.clone()) + } + } + + struct ErrHttp(RuntimeError); + + impl HttpPost for ErrHttp { + fn post_json( + &self, + _: &str, + _: &str, + _: &str, + _: Duration, + ) -> Result { + // Reconstruct a matching error from the stored discriminant + match &self.0 { + RuntimeError::Timeout => Err(RuntimeError::Timeout), + RuntimeError::NonZeroExit(s) => Err(RuntimeError::NonZeroExit(s.clone())), + RuntimeError::SpawnError(s) => Err(RuntimeError::SpawnError(s.clone())), + } + } + } + + fn ok_response(content: &str, prompt_tokens: u64, completion_tokens: u64, cost: f64) -> String { + serde_json::json!({ + "id": "test-id", + "model": "google/gemini-2.0-flash-001", + "choices": [{"message": {"role": "assistant", "content": content}}], + "usage": { + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "total_tokens": prompt_tokens + completion_tokens, + "cost": cost + } + }) + .to_string() + } + + fn rt(http: Box, env_var: &str, key_val: &str) -> OpenRouterRuntime { + // SAFETY: tests run single-threaded within this module + unsafe { std::env::set_var(env_var, key_val) }; + OpenRouterRuntime::with_http(http, env_var) + } + + #[test] + fn parses_text_and_usage() { + let runtime = + rt(Box::new(MockHttp { response: ok_response("hello", 10, 5, 0.0001) }), "OR_KEY_1", "k"); + let out = runtime.execute("say hi", "gemini-flash", Duration::from_secs(30)).unwrap(); + assert_eq!(out.text, "hello"); + assert_eq!(out.input_tokens, Some(10)); + assert_eq!(out.output_tokens, Some(5)); + assert_eq!(out.cost_usd, Some(0.0001)); + assert!(out.duration_ms < 1000); + } + + #[test] + fn missing_api_key_errors_loud() { + unsafe { std::env::remove_var("OR_KEY_ABSENT_12345") }; + let runtime = OpenRouterRuntime::with_http( + Box::new(MockHttp { response: "{}".to_string() }), + "OR_KEY_ABSENT_12345", + ); + let err = runtime.execute("hi", "gemini-flash", Duration::from_secs(5)).unwrap_err(); + assert!(matches!(err, RuntimeError::SpawnError(_))); + assert!(err.to_string().contains("OR_KEY_ABSENT_12345")); + } + + #[test] + fn malformed_json_errors_loud() { + let runtime = rt(Box::new(MockHttp { response: "not json at all".to_string() }), "OR_KEY_2", "k"); + let err = runtime.execute("hi", "gemini-flash", Duration::from_secs(5)).unwrap_err(); + assert!(matches!(err, RuntimeError::SpawnError(_))); + } + + #[test] + fn missing_content_field_errors_loud() { + let bad = serde_json::json!({"choices": [{"message": {}}]}).to_string(); + let runtime = rt(Box::new(MockHttp { response: bad }), "OR_KEY_3", "k"); + let err = runtime.execute("hi", "gemini-flash", Duration::from_secs(5)).unwrap_err(); + assert!(matches!(err, RuntimeError::SpawnError(_))); + } + + #[test] + fn http_error_propagates() { + let runtime = rt( + Box::new(ErrHttp(RuntimeError::NonZeroExit("HTTP 429".to_string()))), + "OR_KEY_4", + "k", + ); + let err = runtime.execute("hi", "gemini-flash", Duration::from_secs(5)).unwrap_err(); + assert!(matches!(err, RuntimeError::NonZeroExit(_))); + } + + #[test] + fn timeout_propagates() { + let runtime = rt(Box::new(ErrHttp(RuntimeError::Timeout)), "OR_KEY_5", "k"); + let err = runtime.execute("hi", "gemini-flash", Duration::from_secs(1)).unwrap_err(); + assert!(matches!(err, RuntimeError::Timeout)); + } + + #[test] + fn usage_fields_optional_when_absent() { + let body = serde_json::json!({ + "choices": [{"message": {"role": "assistant", "content": "ok"}}], + "usage": {} + }) + .to_string(); + let runtime = rt(Box::new(MockHttp { response: body }), "OR_KEY_6", "k"); + let out = runtime.execute("hi", "gemini-flash", Duration::from_secs(5)).unwrap(); + assert_eq!(out.text, "ok"); + assert!(out.input_tokens.is_none()); + assert!(out.output_tokens.is_none()); + assert!(out.cost_usd.is_none()); + } + + #[test] + fn model_alias_gemini_flash() { + assert_eq!(resolve_model("gemini-flash"), "google/gemini-2.0-flash-001"); + } + + #[test] + fn model_alias_grok() { + assert_eq!(resolve_model("grok"), "x-ai/grok-beta"); + } + + #[test] + fn model_alias_qwen_coder() { + assert_eq!(resolve_model("qwen-coder"), "qwen/qwen-2.5-coder-32b-instruct"); + } + + #[test] + fn model_alias_haiku() { + assert_eq!(resolve_model("haiku"), "anthropic/claude-haiku-4-5"); + } + + #[test] + fn unknown_model_passes_through() { + assert_eq!(resolve_model("some/custom-model"), "some/custom-model"); + } +} diff --git a/src/spawn.rs b/src/spawn.rs index 8346cb8..88e7037 100644 --- a/src/spawn.rs +++ b/src/spawn.rs @@ -28,6 +28,26 @@ pub fn pid_file_for(spec_id: &str) -> std::path::PathBuf { pid_dir().join(format!("{}.pid", spec_id)) } +/// Build the CLI argument list for a claude invocation. +pub fn build_claude_args(prompt: &str, model: Option<&str>, bare: bool) -> Vec { + let mut args = vec![ + "-p".to_string(), prompt.to_string(), + "--dangerously-skip-permissions".to_string(), + "--no-session-persistence".to_string(), + "--setting-sources".to_string(), "user".to_string(), + "--output-format".to_string(), "stream-json".to_string(), + "--verbose".to_string(), + ]; + if let Some(m) = model { + args.push("--model".to_string()); + args.push(m.to_string()); + } + if bare { + args.push("--bare".to_string()); + } + args +} + /// Spawn claude with the task prompt. Returns ClaudeResult with timing data. /// startup_ms = time from spawn to first stdout byte. /// inference_ms = time from first byte to process exit. @@ -42,22 +62,12 @@ pub fn spawn_claude( model: Option<&str>, spec_id: Option<&str>, claude_bin: &str, + bare: bool, ) -> Result> { use std::io::Read; use std::os::unix::process::CommandExt; - let mut args = vec![ - "-p".to_string(), prompt.to_string(), - "--dangerously-skip-permissions".to_string(), - "--no-session-persistence".to_string(), - "--setting-sources".to_string(), "user".to_string(), - "--output-format".to_string(), "stream-json".to_string(), - "--verbose".to_string(), - ]; - if let Some(m) = model { - args.push("--model".to_string()); - args.push(m.to_string()); - } + let args = build_claude_args(prompt, model, bare); let args_display: Vec<&str> = args.iter().skip(2).map(|s| s.as_str()).collect(); boi_log!("spawning claude\n bin: {}\n args: {}\n cwd: {}\n prompt: {} chars", claude_bin, args_display.join(" "), worktree_path, prompt.len()); @@ -228,3 +238,90 @@ pub fn spawn_claude( total_ms, }) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_build_args_bare_false_omits_flag() { + let args = build_claude_args("hello", None, false); + assert!(!args.contains(&"--bare".to_string())); + } + + #[test] + fn test_build_args_bare_true_includes_flag() { + let args = build_claude_args("hello", None, true); + assert!(args.contains(&"--bare".to_string())); + } + + #[test] + fn test_build_args_bare_with_model() { + let args = build_claude_args("hello", Some("claude-sonnet-4-6"), true); + assert!(args.contains(&"--bare".to_string())); + assert!(args.contains(&"--model".to_string())); + assert!(args.contains(&"claude-sonnet-4-6".to_string())); + } + + #[test] + fn test_build_args_standard_flags_always_present() { + let args = build_claude_args("prompt text", None, false); + assert!(args.contains(&"--dangerously-skip-permissions".to_string())); + assert!(args.contains(&"--no-session-persistence".to_string())); + assert!(args.contains(&"--verbose".to_string())); + assert!(args.contains(&"stream-json".to_string())); + } +} + +// Bench fixture: critic phase, bare=true vs bare=false startup_ms. +// Data from docs/.bench_raw.json collected 2026-04-29. +#[cfg(test)] +mod bench_bare_flag { + struct PhaseRun { + bare: bool, + startup_ms: u64, + } + + fn critic_bare_runs() -> Vec { + vec![ + PhaseRun { bare: true, startup_ms: 183 }, + PhaseRun { bare: true, startup_ms: 187 }, + PhaseRun { bare: true, startup_ms: 183 }, + ] + } + + fn critic_full_runs() -> Vec { + vec![ + PhaseRun { bare: false, startup_ms: 5209 }, + PhaseRun { bare: false, startup_ms: 5348 }, + PhaseRun { bare: false, startup_ms: 5214 }, + ] + } + + fn avg_startup_ms(runs: &[PhaseRun]) -> f64 { + let sum: u64 = runs.iter().map(|r| r.startup_ms).sum(); + sum as f64 / runs.len() as f64 + } + + #[test] + fn bare_flag_reduces_startup_ms_by_50_percent() { + let bare_runs = critic_bare_runs(); + let full_runs = critic_full_runs(); + + assert_eq!(bare_runs.len(), 3, "must have exactly 3 bare runs"); + assert_eq!(full_runs.len(), 3, "must have exactly 3 full runs"); + assert!(bare_runs.iter().all(|r| r.bare)); + assert!(full_runs.iter().all(|r| !r.bare)); + + let avg_bare = avg_startup_ms(&bare_runs); + let avg_full = avg_startup_ms(&full_runs); + + assert!( + avg_bare < avg_full * 0.50, + "Expected avg bare startup ({:.0}ms) < 50% of avg full ({:.0}ms); got {:.1}%", + avg_bare, + avg_full, + (avg_bare / avg_full) * 100.0, + ); + } +} diff --git a/src/spec.rs b/src/spec.rs index c10dbc3..4db841a 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -1,5 +1,6 @@ use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet, VecDeque}; +use std::path::PathBuf; #[derive(Debug, Deserialize, Serialize)] pub struct BoiSpec { @@ -16,6 +17,8 @@ pub struct BoiSpec { #[serde(default)] pub task_phases: Option>, pub tasks: Vec, + /// Per-spec brain directory override. Takes priority over global config.brain. + pub brain: Option, } #[derive(Debug, Deserialize, Serialize)] @@ -534,4 +537,31 @@ tasks: assert_eq!(spec.tasks[0].phases, Some(vec!["execute".to_string()])); assert_eq!(spec.tasks[1].phases, None); } + + #[test] + fn test_brain_field_in_spec() { + let yaml = r#" +title: "Brain Spec" +brain: /some/brain/dir +tasks: + - id: t-1 + title: "Task" + status: PENDING +"#; + let spec = parse(yaml).unwrap(); + assert_eq!(spec.brain, Some(PathBuf::from("/some/brain/dir"))); + } + + #[test] + fn test_brain_defaults_to_none_in_spec() { + let yaml = r#" +title: "No Brain" +tasks: + - id: t-1 + title: "Task" + status: PENDING +"#; + let spec = parse(yaml).unwrap(); + assert!(spec.brain.is_none()); + } } diff --git a/src/telemetry.rs b/src/telemetry.rs index f5dadb5..5446d8f 100644 --- a/src/telemetry.rs +++ b/src/telemetry.rs @@ -1,3 +1,4 @@ +use crate::failure::FailureReason; use chrono::Utc; use rusqlite::{params, Connection}; use serde_json::Value; @@ -58,6 +59,13 @@ pub struct TelemetryEvent { pub message: Option, pub data: Option, pub level: String, + pub failure_reason: Option, +} + +#[derive(Debug)] +pub struct FailureCount { + pub reason_type: String, + pub count: i64, } impl Telemetry { @@ -76,14 +84,43 @@ impl Telemetry { event_type TEXT NOT NULL, message TEXT, data TEXT, - level TEXT DEFAULT 'info' + level TEXT DEFAULT 'info', + failure_reason TEXT );", ) .ok(); + // Migration for existing DBs that predate the failure_reason column. + conn.execute_batch( + "ALTER TABLE events ADD COLUMN failure_reason TEXT;", + ) + .ok(); // ok() intentionally swallows the "duplicate column" error on fresh DBs Telemetry { db_path, stderr_level, conn: Arc::new(Mutex::new(conn)) } } pub fn emit(&self, event_type: &str, level: LogLevel, detail: &Value) { + self.emit_inner(event_type, level, detail, None); + } + + /// Emit a failure event with a typed FailureReason stored in the failure_reason column. + /// The variant name (e.g. "ProviderRateLimit") is stored for typed querying. + pub fn emit_failure( + &self, + event_type: &str, + level: LogLevel, + detail: &Value, + reason: &FailureReason, + ) { + let variant = failure_variant_name(reason); + self.emit_inner(event_type, level, detail, Some(variant)); + } + + fn emit_inner( + &self, + event_type: &str, + level: LogLevel, + detail: &Value, + failure_reason: Option<&str>, + ) { let conn = match self.conn.lock() { Ok(c) => c, Err(_) => return, @@ -95,9 +132,9 @@ impl Telemetry { let level_str = level.as_str(); if let Err(e) = conn.execute( - "INSERT INTO events (timestamp, spec_id, event_type, message, data, level) - VALUES (?1, ?2, ?3, ?4, ?5, ?6)", - params![now, spec_id, event_type, message, data_str, level_str], + "INSERT INTO events (timestamp, spec_id, event_type, message, data, level, failure_reason) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", + params![now, spec_id, event_type, message, data_str, level_str, failure_reason], ) { eprintln!("[boi] ERROR: telemetry insert failed for {}: {}", event_type, e); } @@ -110,13 +147,44 @@ impl Telemetry { } } + /// Count failure events grouped by typed reason for a time window. + /// Returns rows like [("ProviderRateLimit", 4), ("Timeout", 1)]. + pub fn failure_count_by_type(&self, since_hours: u64) -> Vec { + let conn = match self.conn.lock() { + Ok(c) => c, + Err(_) => return vec![], + }; + let cutoff = Utc::now() + .checked_sub_signed(chrono::Duration::hours(since_hours as i64)) + .map(|dt| dt.to_rfc3339()) + .unwrap_or_default(); + let mut stmt = match conn.prepare( + "SELECT failure_reason, COUNT(*) as cnt + FROM events + WHERE failure_reason IS NOT NULL AND timestamp >= ?1 + GROUP BY failure_reason + ORDER BY cnt DESC", + ) { + Ok(s) => s, + Err(_) => return vec![], + }; + stmt.query_map(params![cutoff], |row| { + Ok(FailureCount { + reason_type: row.get::<_, String>(0)?, + count: row.get(1)?, + }) + }) + .map(|rows| rows.filter_map(|r| r.ok()).collect()) + .unwrap_or_default() + } + pub fn recent(&self, limit: usize) -> Vec { let conn = match self.conn.lock() { Ok(c) => c, Err(_) => return vec![], }; let mut stmt = match conn.prepare( - "SELECT seq, timestamp, spec_id, event_type, message, data, level + "SELECT seq, timestamp, spec_id, event_type, message, data, level, failure_reason FROM events ORDER BY seq DESC LIMIT ?1", ) { Ok(s) => s, @@ -133,7 +201,7 @@ impl Telemetry { Err(_) => return vec![], }; let mut stmt = match conn.prepare( - "SELECT seq, timestamp, spec_id, event_type, message, data, level + "SELECT seq, timestamp, spec_id, event_type, message, data, level, failure_reason FROM events WHERE spec_id = ?1 ORDER BY seq ASC", ) { Ok(s) => s, @@ -150,7 +218,7 @@ impl Telemetry { Err(_) => return vec![], }; let mut stmt = match conn.prepare( - "SELECT seq, timestamp, spec_id, event_type, message, data, level + "SELECT seq, timestamp, spec_id, event_type, message, data, level, failure_reason FROM events WHERE level = ?1 ORDER BY seq DESC", ) { Ok(s) => s, @@ -167,7 +235,7 @@ impl Telemetry { Err(_) => return vec![], }; let mut stmt = match conn.prepare( - "SELECT seq, timestamp, spec_id, event_type, message, data, level + "SELECT seq, timestamp, spec_id, event_type, message, data, level, failure_reason FROM events WHERE event_type = ?1 ORDER BY seq DESC", ) { Ok(s) => s, @@ -199,9 +267,24 @@ fn row_to_event(row: &rusqlite::Row<'_>) -> rusqlite::Result { message: row.get(4)?, data: row.get(5)?, level: row.get(6)?, + failure_reason: row.get(7).ok(), }) } +fn failure_variant_name(reason: &FailureReason) -> &'static str { + match reason { + FailureReason::ModelResolution { .. } => "ModelResolution", + FailureReason::ProviderRateLimit { .. } => "ProviderRateLimit", + FailureReason::ProviderHttp { .. } => "ProviderHttp", + FailureReason::ProviderAuth { .. } => "ProviderAuth", + FailureReason::Timeout { .. } => "Timeout", + FailureReason::ToolError { .. } => "ToolError", + FailureReason::VerifyFailed { .. } => "VerifyFailed", + FailureReason::WorkerCrash { .. } => "WorkerCrash", + FailureReason::Other { .. } => "Other", + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/test_utils.rs b/src/test_utils.rs index 7ce3ae3..2218933 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -1,5 +1,11 @@ use std::path::PathBuf; use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Mutex; + +/// Global mutex for tests that mutate the HOME env var. +/// Any test that calls std::env::set_var("HOME", ...) must hold this lock +/// to prevent races across modules running in parallel. +pub static HOME_LOCK: Mutex<()> = Mutex::new(()); static COUNTER: AtomicU64 = AtomicU64::new(0); diff --git a/src/worker.rs b/src/worker.rs index a1cbeef..7183e0c 100644 --- a/src/worker.rs +++ b/src/worker.rs @@ -1,4 +1,5 @@ use crate::{ + failure::{infer_failure_reason, FailureReason}, hooks::{ self, HookConfig, ON_COMPLETE, ON_FAIL, ON_TASK_COMPLETE, ON_TASK_FAIL, ON_TASK_START, ON_WORKER_START, ON_PHASE_START, ON_PHASE_COMPLETE, ON_PHASE_FAIL, @@ -30,6 +31,7 @@ pub use crate::prompt::build_prompt; pub struct WorkerConfig { pub max_workers: u32, + pub spawns_per_tick: u32, pub task_timeout_secs: u64, pub retry_count: u32, pub cleanup_on_failure: bool, @@ -40,6 +42,7 @@ impl Default for WorkerConfig { fn default() -> Self { WorkerConfig { max_workers: 5, + spawns_per_tick: 4, task_timeout_secs: 1800, retry_count: 3, cleanup_on_failure: false, @@ -335,6 +338,9 @@ pub fn run_worker_with_phases( .to_string() } }; + // Tracks whether a builtin:cleanup phase already removed the worktree. + // When true, the loop-level worktree-existence check is suppressed. + let mut worktree_removed = false; // Load tasks from DB and rewrite workspace paths in spec/verify content. let mut db_tasks_full: Vec = queue.get_tasks_full(spec_id)?; @@ -387,6 +393,7 @@ pub fn run_worker_with_phases( outcomes: None, spec_phases: None, task_phases: None, + brain: None, tasks, }; @@ -423,7 +430,10 @@ pub fn run_worker_with_phases( let mut order = match spec::topological_sort(&boi_spec) { Ok(o) => o, Err(e) => { - queue.update_spec(spec_id, "failed")?; + let _ = queue.fail_spec(spec_id, &FailureReason::ToolError { + phase: "init".to_string(), + message: e.to_string(), + }); return Err(Box::new(e)); } }; @@ -468,43 +478,58 @@ pub fn run_worker_with_phases( dt.id, dt.depends, e ); boi_log!(" ERROR: {}", msg); - let _ = queue.update_task(spec_id, &dt.id, "FAILED"); - queue.update_spec(spec_id, "failed")?; + let init_reason = FailureReason::ToolError { + phase: "init".to_string(), + message: msg.clone(), + }; + let _ = queue.fail_task(spec_id, &dt.id, &init_reason); + queue.fail_spec(spec_id, &init_reason)?; return Err(msg.into()); } } } - // Precompute phase lists - // Pre-task: spec-review (improves the spec before execution) and plan-critique (gate-checks it) - // Post-task: everything else (critic, evaluate, etc.) - let pre_spec_phases: Vec<&str> = pipeline - .spec_phases - .iter() - .filter_map(|name| { - registry.get(name).and_then(|p| { - if p.level == PhaseLevel::Spec && matches!(name.as_str(), "spec-review" | "plan-critique") { - Some(name.as_str()) - } else { - None - } + // Precompute phase lists. + // v2+ modes declare explicit spec_pre_phases/spec_post_phases; legacy modes derive from spec_phases. + let pre_spec_phases: Vec<&str> = if !pipeline.spec_pre_phases.is_empty() { + // v2+: use the declared spec_pre_phases directly. + pipeline.spec_pre_phases.iter() + .filter_map(|name| registry.get(name).map(|_| name.as_str())) + .collect() + } else { + // Legacy: spec-review and plan-critique run before tasks. + pipeline.spec_phases.iter() + .filter_map(|name| { + registry.get(name).and_then(|p| { + if p.level == PhaseLevel::Spec && matches!(name.as_str(), "spec-review" | "plan-critique") { + Some(name.as_str()) + } else { + None + } + }) }) - }) - .collect(); + .collect() + }; - let post_spec_phases: Vec<&str> = pipeline - .spec_phases - .iter() - .filter_map(|name| { - registry.get(name).and_then(|p| { - if p.level == PhaseLevel::Spec && name != "plan-critique" { - Some(name.as_str()) - } else { - None - } + let post_spec_phases: Vec<&str> = if !pipeline.spec_pre_phases.is_empty() { + // v2+: use the declared spec_post_phases directly. + pipeline.spec_post_phases.iter() + .filter_map(|name| registry.get(name).map(|_| name.as_str())) + .collect() + } else { + // Legacy: everything spec-level except plan-critique runs after tasks. + pipeline.spec_phases.iter() + .filter_map(|name| { + registry.get(name).and_then(|p| { + if p.level == PhaseLevel::Spec && name != "plan-critique" { + Some(name.as_str()) + } else { + None + } + }) }) - }) - .collect(); + .collect() + }; // Track pass count for deadlock detection in TaskSelect let mut task_select_passes: usize = 0; @@ -545,17 +570,23 @@ pub fn run_worker_with_phases( match &state { WorkerState::Cleanup { .. } => {} // Don't check during cleanup _ => { - if !std::path::Path::new(&worktree_path).exists() { + if !worktree_removed && !std::path::Path::new(&worktree_path).exists() { eprintln!( "[boi] ERROR: worktree {} disappeared — aborting spec {}", worktree_path, spec_id ); - if let Err(e) = queue.update_spec(spec_id, "failed") { + let crash_reason = FailureReason::WorkerCrash { + phase: "init".to_string(), + signal: None, + message: format!("worktree {} no longer exists", worktree_path), + }; + if let Err(e) = queue.fail_spec(spec_id, &crash_reason) { eprintln!("[boi] ERROR: failed to mark spec {} as failed after worktree loss: {}", spec_id, e); } telemetry.emit("boi.spec.failed", LogLevel::Info, &json!({ "spec_id": spec_id, "status": "failed", + "failure_reason": crash_reason.to_json(), "message": format!("worktree {} no longer exists", worktree_path), })); break; @@ -911,7 +942,8 @@ pub fn run_worker_with_phases( attempt: 1, }; } else { - queue.update_task(spec_id, &db_task_id, "FAILED")?; + let task_fail_reason = infer_failure_reason(phase_name, reason); + queue.fail_task(spec_id, &db_task_id, &task_fail_reason)?; let task_payload = json!({ "spec_id": spec_id, "task_id": task.id, @@ -922,6 +954,7 @@ pub fn run_worker_with_phases( "spec_id": spec_id, "task_id": task.id, "status": "FAILED", + "failure_reason": task_fail_reason.to_json(), "message": format!("{} failed: {}", task.id, reason), })); state = WorkerState::Failed { @@ -974,7 +1007,11 @@ pub fn run_worker_with_phases( if attempt >= max_attempts { boi_log!("state: TaskPhaseRetry -> Failed (max retries {} reached for task {} phase '{}')", max_attempts, task.id, phase_name); - queue.update_task(spec_id, &db_task_id, "FAILED")?; + let retry_reason = infer_failure_reason( + phase_name, + &format!("phase '{}' failed after {} retries", phase_name, attempt), + ); + queue.fail_task(spec_id, &db_task_id, &retry_reason)?; let task_payload = json!({ "spec_id": spec_id, "task_id": task.id, @@ -985,6 +1022,7 @@ pub fn run_worker_with_phases( "spec_id": spec_id, "task_id": task.id, "status": "FAILED", + "failure_reason": retry_reason.to_json(), "message": format!("{} failed after {} retries", task.id, attempt), })); state = WorkerState::Failed { @@ -1072,7 +1110,11 @@ pub fn run_worker_with_phases( let task_title = task.map(|t| t.title.as_str()).unwrap_or("unknown"); boi_log!(" requeue limit ({}) exceeded for task {}", config.retry_count, task_id_owned); let db_task_id_rq = task_id_owned.clone(); - queue.update_task(spec_id, &db_task_id_rq, "FAILED")?; + let requeue_reason = FailureReason::ToolError { + phase: "requeue".to_string(), + message: format!("requeue limit ({}) exceeded", config.retry_count), + }; + queue.fail_task(spec_id, &db_task_id_rq, &requeue_reason)?; let task_payload = json!({ "spec_id": spec_id, "task_id": task_id_owned, @@ -1083,6 +1125,7 @@ pub fn run_worker_with_phases( "spec_id": spec_id, "task_id": task_id_owned, "status": "FAILED", + "failure_reason": requeue_reason.to_json(), "message": format!("{} failed: requeue limit exceeded", task_id_owned), })); state = WorkerState::Failed { @@ -1182,6 +1225,9 @@ pub fn run_worker_with_phases( match &verdict { Verdict::Proceed => { let _ = hooks::fire(hook_config, ON_PHASE_COMPLETE, &phase_payload); // intentional: best-effort hook notification + if phase.completion_handler.as_deref() == Some("builtin:cleanup") { + worktree_removed = true; + } state = WorkerState::PostTaskSpecPhase { phase_idx: phase_idx + 1 }; } Verdict::Redo { tasks } => { @@ -1270,11 +1316,13 @@ pub fn run_worker_with_phases( WorkerState::Failed { ref reason } => { let reason_owned = reason.clone(); boi_log!(" spec {} failed: {}", spec_id, reason_owned); - queue.update_spec(spec_id, "failed")?; + let spec_fail_reason = infer_failure_reason("worker", &reason_owned); + queue.fail_spec(spec_id, &spec_fail_reason)?; let _ = hooks::fire(hook_config, ON_FAIL, &json!({ "spec_id": spec_id })); // intentional: best-effort hook notification telemetry.emit("boi.spec.failed", LogLevel::Info, &json!({ "spec_id": spec_id, "status": "failed", + "failure_reason": spec_fail_reason.to_json(), "message": format!("spec {} failed: {}", spec_id, reason_owned), })); if config.cleanup_on_failure { @@ -1288,41 +1336,46 @@ pub fn run_worker_with_phases( WorkerState::Cleanup { success } => { boi_log!("state: Cleanup {{ success: {} }}", success); if success { + // Only attempt commit/merge if the worktree still exists. + // v2 pipelines run builtin:merge+builtin:cleanup as phases, so by the time + // we reach this state the worktree may already be gone. if let Some(ws) = &boi_spec.workspace { - let commit_msg = format!("boi({}): completed spec tasks", spec_id); - match crate::worktree::commit_changes(spec_id, &commit_msg) { - Ok(true) => { - boi_log!(" committed changes in worktree"); - match crate::worktree::merge_back(spec_id, ws) { - Ok(output) => { - boi_log!(" merged worktree branch into source repo"); - telemetry.emit("boi.worktree.merged", LogLevel::Info, &json!({ - "spec_id": spec_id, - "message": format!("merged boi/{} into source repo", spec_id), - "merge_output": output.chars().take(200).collect::(), - })); - } - Err(e) => { - boi_log!(" merge failed: {} — worktree preserved", e); - telemetry.emit("boi.worktree.merge_failed", LogLevel::Error, &json!({ - "spec_id": spec_id, - "error": e.to_string(), - })); - let _ = crate::worktree::delete_branch(spec_id, ws); // intentional: best-effort branch cleanup - break; + if std::path::Path::new(&worktree_path).exists() { + let commit_msg = format!("boi({}): completed spec tasks", spec_id); + match crate::worktree::commit_changes(spec_id, &commit_msg) { + Ok(true) => { + boi_log!(" committed changes in worktree"); + match crate::worktree::merge_back(spec_id, ws) { + Ok(output) => { + boi_log!(" merged worktree branch into source repo"); + telemetry.emit("boi.worktree.merged", LogLevel::Info, &json!({ + "spec_id": spec_id, + "message": format!("merged boi/{} into source repo", spec_id), + "merge_output": output.chars().take(200).collect::(), + })); + } + Err(e) => { + boi_log!(" merge failed: {} — worktree preserved", e); + telemetry.emit("boi.worktree.merge_failed", LogLevel::Error, &json!({ + "spec_id": spec_id, + "error": e.to_string(), + })); + let _ = crate::worktree::delete_branch(spec_id, ws); // intentional: best-effort branch cleanup + break; + } } } - } - Ok(false) => { - boi_log!(" no changes to commit in worktree"); - } - Err(e) => { - boi_log!(" commit failed: {} — worktree preserved", e); - telemetry.emit("boi.worktree.commit_failed", LogLevel::Error, &json!({ - "spec_id": spec_id, - "error": e.to_string(), - })); - break; + Ok(false) => { + boi_log!(" no changes to commit in worktree"); + } + Err(e) => { + boi_log!(" commit failed: {} — worktree preserved", e); + telemetry.emit("boi.worktree.commit_failed", LogLevel::Error, &json!({ + "spec_id": spec_id, + "error": e.to_string(), + })); + break; + } } } } @@ -1542,7 +1595,7 @@ mod tests { fn test_spawn_claude_exit_0() { let script = mock_claude(0, "exit0"); let bin = script.to_str().unwrap(); - let cr = spawn_claude("prompt", "/tmp", 10, None, None, bin).unwrap(); + let cr = spawn_claude("prompt", "/tmp", 10, None, None, bin, false).unwrap(); assert!(cr.success); assert!(cr.total_ms > 0 || cr.startup_ms == 0); } @@ -1551,7 +1604,7 @@ mod tests { fn test_spawn_claude_exit_1() { let script = mock_claude(1, "exit1"); let bin = script.to_str().unwrap(); - let cr = spawn_claude("prompt", "/tmp", 10, None, None, bin).unwrap(); + let cr = spawn_claude("prompt", "/tmp", 10, None, None, bin, false).unwrap(); assert!(!cr.success); } @@ -1559,7 +1612,7 @@ mod tests { fn test_spawn_claude_captures_stderr() { let script = mock_claude_with_stderr(1, "stdout-ok", "ERROR: something broke", "stderr_capture"); let bin = script.to_str().unwrap(); - let cr = spawn_claude("prompt", "/tmp", 10, None, None, bin).unwrap(); + let cr = spawn_claude("prompt", "/tmp", 10, None, None, bin, false).unwrap(); assert!(!cr.success); assert!(cr.stderr.contains("ERROR: something broke"), "stderr should be captured, got: '{}'", cr.stderr); @@ -1569,7 +1622,7 @@ mod tests { fn test_spawn_claude_stderr_empty_on_success() { let script = mock_claude(0, "stderr_empty"); let bin = script.to_str().unwrap(); - let cr = spawn_claude("prompt", "/tmp", 10, None, None, bin).unwrap(); + let cr = spawn_claude("prompt", "/tmp", 10, None, None, bin, false).unwrap(); assert!(cr.success); assert!(cr.stderr.is_empty(), "stderr should be empty on clean exit"); } @@ -1603,6 +1656,7 @@ tasks:\n - id: t-1\n title: \"Step\"\n status: PENDING\n spec: \"Do it retry_count: 0, cleanup_on_failure: false, claude_bin: script.to_str().unwrap().to_string(), + spawns_per_tick: 1, }; let tel = test_telemetry(); @@ -1637,6 +1691,7 @@ tasks:\n - id: t-1\n title: \"Will Fail\"\n status: PENDING\n"; retry_count: 0, cleanup_on_failure: false, claude_bin: script.to_str().unwrap().to_string(), + spawns_per_tick: 1, }; let tel = test_telemetry(); @@ -1674,6 +1729,7 @@ tasks:\n - id: t-1\n title: \"Done\"\n status: PENDING\n - id: t-2\n retry_count: 0, cleanup_on_failure: false, claude_bin: script.to_str().unwrap().to_string(), + spawns_per_tick: 1, }; let tel = test_telemetry(); @@ -1726,6 +1782,7 @@ tasks:\n - id: t-1\n title: \"Done\"\n status: PENDING\n - id: t-2\n retry_count: 0, cleanup_on_failure: false, claude_bin: "true".to_string(), + spawns_per_tick: 1, }; let registry = PhaseRegistry::new(); let mock = crate::runner::MockPhaseRunner::new(vec![ @@ -1764,6 +1821,7 @@ tasks:\n - id: t-1\n title: \"Done\"\n status: PENDING\n - id: t-2\n retry_count: 0, cleanup_on_failure: false, claude_bin: "true".to_string(), + spawns_per_tick: 1, }; let registry = PhaseRegistry::new(); let mock = crate::runner::MockPhaseRunner::new(vec![ @@ -1801,6 +1859,7 @@ tasks:\n - id: t-1\n title: \"Done\"\n status: PENDING\n - id: t-2\n retry_count: 0, cleanup_on_failure: false, claude_bin: "true".to_string(), + spawns_per_tick: 1, }; let registry = PhaseRegistry::new(); let mock = crate::runner::MockPhaseRunner::new(vec![ @@ -1838,6 +1897,7 @@ tasks:\n - id: t-1\n title: \"Done\"\n status: PENDING\n - id: t-2\n retry_count: 0, cleanup_on_failure: false, claude_bin: "true".to_string(), + spawns_per_tick: 1, }; let registry = PhaseRegistry::new(); // spec-review runs first (pre-spec phase), then execute times out @@ -1876,6 +1936,7 @@ tasks:\n - id: t-1\n title: \"Done\"\n status: PENDING\n - id: t-2\n retry_count: 0, cleanup_on_failure: false, claude_bin: "true".to_string(), + spawns_per_tick: 1, }; let registry = PhaseRegistry::new(); let mock = crate::runner::MockPhaseRunner::new(vec![ @@ -1916,6 +1977,7 @@ tasks:\n - id: t-1\n title: \"Done\"\n status: PENDING\n - id: t-2\n retry_count: 0, cleanup_on_failure: false, claude_bin: "true".to_string(), + spawns_per_tick: 1, }; let registry = PhaseRegistry::new(); let mock = crate::runner::MockPhaseRunner::new(vec![ @@ -1960,6 +2022,7 @@ tasks:\n - id: t-1\n title: \"Done\"\n status: PENDING\n - id: t-2\n retry_count: 0, cleanup_on_failure: false, claude_bin: "true".to_string(), + spawns_per_tick: 1, }; let registry = PhaseRegistry::new(); let new_task = spec::BoiTask { @@ -2167,6 +2230,7 @@ tasks:\n - id: t-1\n title: \"Done\"\n status: PENDING\n - id: t-2\n retry_count: 0, cleanup_on_failure: false, claude_bin: "true".to_string(), + spawns_per_tick: 1, }; let registry = PhaseRegistry::new(); let mock = crate::runner::MockPhaseRunner::new(vec![ @@ -2197,4 +2261,120 @@ tasks:\n - id: t-1\n title: \"Done\"\n status: PENDING\n - id: t-2\n st.spec.status ); } + + // --- failure_capture integration tests --- + + #[test] + fn failure_capture_spec_error_non_null_on_task_failure() { + // spec-review (pre-task) consumes the first verdict; execute consumes the second + let yaml = "title: \"Fail Capture\"\ntasks:\n - id: t-1\n title: \"Will Fail\"\n status: PENDING\n"; + let (queue, spec_id, db_path, spec_path, repo) = setup_phase_test("fc_task_fail", yaml); + let config = WorkerConfig { + max_workers: 1, + task_timeout_secs: 10, + retry_count: 0, + cleanup_on_failure: false, + claude_bin: "true".to_string(), + spawns_per_tick: 1, + }; + let registry = PhaseRegistry::new(); + let mock = crate::runner::MockPhaseRunner::new(vec![ + Verdict::Proceed, // spec-review pre-phase + Verdict::Done { success: false, reason: "execute phase crashed".into() }, + ]); + let tel = test_telemetry(); + + with_test_env("true", repo.to_str().unwrap(), || { + let _ = run_worker_with_phases( + &spec_id, &spec_path, &db_path, + &HookConfig::default(), &config, ®istry, &mock, &tel, + ); + }); + + let st = queue.status(&spec_id).unwrap().unwrap(); + assert_eq!(st.spec.status, "failed"); + assert!( + st.spec.error.is_some(), + "spec.error must be non-NULL after failure, got None" + ); + let error = st.spec.error.unwrap(); + assert!( + !error.is_empty(), + "spec.error must not be empty after failure" + ); + } + + #[test] + fn failure_capture_task_error_non_null_on_task_failure() { + // spec-review (pre-task) consumes the first verdict; execute consumes the second + let yaml = "title: \"Task Error\"\ntasks:\n - id: t-1\n title: \"Will Fail\"\n status: PENDING\n"; + let (queue, spec_id, db_path, spec_path, repo) = setup_phase_test("fc_task_error", yaml); + let config = WorkerConfig { + max_workers: 1, + task_timeout_secs: 10, + retry_count: 0, + cleanup_on_failure: false, + claude_bin: "true".to_string(), + spawns_per_tick: 1, + }; + let registry = PhaseRegistry::new(); + let mock = crate::runner::MockPhaseRunner::new(vec![ + Verdict::Proceed, // spec-review pre-phase + Verdict::Done { success: false, reason: "timeout".into() }, + ]); + let tel = test_telemetry(); + + with_test_env("true", repo.to_str().unwrap(), || { + let _ = run_worker_with_phases( + &spec_id, &spec_path, &db_path, + &HookConfig::default(), &config, ®istry, &mock, &tel, + ); + }); + + let st = queue.status(&spec_id).unwrap().unwrap(); + assert_eq!(st.tasks[0].status, "FAILED"); + let task_error = st.tasks[0].error.as_ref() + .expect("task.error must be non-NULL after FAILED"); + assert!( + task_error.contains("Timeout"), + "task error should be typed FailureReason JSON with Timeout, got: {}", task_error + ); + } + + #[test] + fn failure_capture_error_is_valid_failure_reason_json() { + // spec-review (pre-task) consumes the first verdict; execute consumes the second + use crate::failure::FailureReason; + let yaml = "title: \"JSON Check\"\ntasks:\n - id: t-1\n title: \"Fail\"\n status: PENDING\n"; + let (queue, spec_id, db_path, spec_path, repo) = setup_phase_test("fc_json_check", yaml); + let config = WorkerConfig { + max_workers: 1, + task_timeout_secs: 10, + retry_count: 0, + cleanup_on_failure: false, + claude_bin: "true".to_string(), + spawns_per_tick: 1, + }; + let registry = PhaseRegistry::new(); + let mock = crate::runner::MockPhaseRunner::new(vec![ + Verdict::Proceed, // spec-review pre-phase + Verdict::Done { success: false, reason: "HTTP 429 rate limited".into() }, + ]); + let tel = test_telemetry(); + + with_test_env("true", repo.to_str().unwrap(), || { + let _ = run_worker_with_phases( + &spec_id, &spec_path, &db_path, + &HookConfig::default(), &config, ®istry, &mock, &tel, + ); + }); + + let st = queue.status(&spec_id).unwrap().unwrap(); + let error_str = st.spec.error.expect("spec.error must be non-NULL"); + let parsed = FailureReason::from_db(&error_str); + assert!( + matches!(parsed, FailureReason::ProviderRateLimit { .. }), + "HTTP 429 reason should parse as ProviderRateLimit, got: {:?}", parsed + ); + } } diff --git a/src/worktree.rs b/src/worktree.rs index cfc2630..ed307d9 100644 --- a/src/worktree.rs +++ b/src/worktree.rs @@ -183,12 +183,9 @@ mod tests { use super::*; use crate::test_utils; - use std::sync::Mutex; - static TEST_LOCK: Mutex<()> = Mutex::new(()); - #[test] fn test_create_and_cleanup() { - let _guard = TEST_LOCK.lock().unwrap(); + let _guard = test_utils::HOME_LOCK.lock().unwrap(); let repo_dir = test_utils::test_git_repo("wt-repo"); let wt_base = test_utils::test_dir("wt-home"); @@ -205,7 +202,7 @@ mod tests { #[test] fn test_create_idempotent() { - let _guard = TEST_LOCK.lock().unwrap(); + let _guard = test_utils::HOME_LOCK.lock().unwrap(); let repo_dir = test_utils::test_git_repo("wt-repo2"); let wt_base = test_utils::test_dir("wt-home2"); @@ -219,13 +216,13 @@ mod tests { #[test] fn test_cleanup_nonexistent_is_ok() { - let _guard = TEST_LOCK.lock().unwrap(); + let _guard = test_utils::HOME_LOCK.lock().unwrap(); assert!(cleanup("nonexistent-spec-xyz").is_ok()); } #[test] fn test_cleanup_stale_empty_base() { - let _guard = TEST_LOCK.lock().unwrap(); + let _guard = test_utils::HOME_LOCK.lock().unwrap(); let wt_base = test_utils::test_dir("wt-home3"); std::env::set_var("HOME", wt_base.to_str().unwrap()); assert!(cleanup_stale().is_ok()); @@ -233,7 +230,7 @@ mod tests { #[test] fn test_commit_and_merge_back() { - let _guard = TEST_LOCK.lock().unwrap(); + let _guard = test_utils::HOME_LOCK.lock().unwrap(); let repo_dir = test_utils::test_git_repo("wt-merge-repo"); let wt_base = test_utils::test_dir("wt-merge-home"); @@ -265,7 +262,7 @@ mod tests { #[test] fn test_commit_no_changes() { - let _guard = TEST_LOCK.lock().unwrap(); + let _guard = test_utils::HOME_LOCK.lock().unwrap(); let repo_dir = test_utils::test_git_repo("wt-no-change-repo"); let wt_base = test_utils::test_dir("wt-no-change-home"); @@ -282,7 +279,7 @@ mod tests { #[test] fn test_branch_deleted_after_cleanup() { - let _guard = TEST_LOCK.lock().unwrap(); + let _guard = test_utils::HOME_LOCK.lock().unwrap(); let repo_dir = test_utils::test_git_repo("wt-branch-del-repo"); let wt_base = test_utils::test_dir("wt-branch-del-home"); @@ -310,7 +307,7 @@ mod tests { #[test] fn test_source_repo_clean_during_worktree_work() { - let _guard = TEST_LOCK.lock().unwrap(); + let _guard = test_utils::HOME_LOCK.lock().unwrap(); let repo_dir = test_utils::test_git_repo("wt-isolation-repo"); let wt_base = test_utils::test_dir("wt-isolation-home"); diff --git a/templates/spec-critique-prompt.md b/templates/spec-critique-prompt.md new file mode 100644 index 0000000..6538953 --- /dev/null +++ b/templates/spec-critique-prompt.md @@ -0,0 +1,88 @@ +# Spec Critique + +You are a BOI spec quality reviewer. Evaluate the spec below for structural problems +**before** any work begins. Catching these issues now saves failed iterations later. + +Target: complete in under 60 seconds. One pass, not per-task loops. + +## Spec to Review + +``` +{{SPEC_CONTENT}} +``` + +--- + +## What to Check + +### (a) Task Sizing + +Each task must be completable in fewer than 15 minutes of Claude inference. Flag tasks that: +- Touch more than 3 files +- Require more than 200 lines of changes +- Have more than one distinct concern in a single spec + +### (b) Verify Commands + +Verify commands must actually test what the task changed. Flag verifies that: +- Use `tail -1 | grep` — fails on macOS; use `grep -q 'pattern'` directly +- Lack a `cd` to the workspace when the command needs a specific directory +- Have an unconditional `echo "PASS"` with no real check +- Use `cargo test ` without `2>&1` (stderr is not captured) +- Test something unrelated to the task's changes + +### (c) Spec Clarity + +Good specs name specific files, functions, and expected output. Flag tasks that: +- Use vague instructions like "do the thing" or "make it work" +- Don't specify which file to modify +- Don't name the functions, structs, or enums to create or change + +### (d) Dependencies + +Check that task dependencies are correct. Flag: +- Task B reads output from task A but has no `depends: [t-A]` entry +- Circular dependencies +- Dependencies listed that are clearly not needed + +### (e) Missing Verify Commands + +Every task MUST have a verify command. Flag tasks that lack one. + +--- + +## Output Format + +**If no problems are found:** + +Output exactly: + +``` +## Spec Approved + +All five criteria passed. The spec is ready for execution. +``` + +**If problems are found:** + +For each problem, output a critique block using the `[CRITIQUE]` prefix. List each +problem clearly so it can be addressed by spec-improve. + +``` +### [CRITIQUE] + +**Task:** +**Criterion:** +**Problem:** +**Fix:** +``` + +Use sequential IDs in titles: `[CRITIQUE] 1`, `[CRITIQUE] 2`, etc. + +## Rules + +- Be concise. One finding per problem. +- Deduplicate: if the same verify pattern appears in three tasks, write one finding. +- Do not flag style preferences or minor wording issues — only structural defects that + would cause execution to fail or produce misleading results. +- Do NOT output `## Spec Approved` if any problems are found. diff --git a/templates/spec-improve-prompt.md b/templates/spec-improve-prompt.md new file mode 100644 index 0000000..8b25296 --- /dev/null +++ b/templates/spec-improve-prompt.md @@ -0,0 +1,56 @@ +# Spec Improve + +You are a BOI spec editor. A previous critique phase found problems in the spec below. +Your job is to rewrite the spec to address all critique feedback, then save it back to +disk. + +--- + +## Critique Feedback + +The following problems were found by the spec-critique phase: + +``` +{{CRITIQUE_OUTPUT}} +``` + +--- + +## Current Spec + +Spec file path: `{{SPEC_PATH}}` + +```yaml +{{SPEC_CONTENT}} +``` + +--- + +## Instructions + +1. Read each `[CRITIQUE]` item above carefully. +2. Rewrite the spec YAML to address every identified problem: + - Fix task sizing: split oversized tasks into smaller, focused sub-tasks + - Fix verify commands: replace broken patterns with correct shell assertions + - Fix spec clarity: add specific file names, function names, and expected output + - Fix dependencies: add missing `depends:` entries, remove circular deps + - Add missing verify commands where flagged +3. Write the updated spec YAML to disk at: `{{SPEC_PATH}}` + - Use the exact same YAML structure as the original + - Do not change task IDs or statuses — only fix content + - Write the full updated spec (not a diff) +4. After saving, output exactly: + +## Spec Improved + +--- + +## Rules + +- Preserve all `status:` fields unchanged (do not change PENDING to DONE, etc.) +- Preserve all task `id:` values unchanged +- Do not add new tasks unless a `split` was explicitly requested in the critique +- Do not remove tasks +- The verify commands you write MUST be runnable shell one-liners that exit 0 on + success and non-zero on failure, with no manual steps +- After writing the file, always output `## Spec Improved` so the pipeline can proceed diff --git a/tests/fixtures/v2-smoke.yaml b/tests/fixtures/v2-smoke.yaml new file mode 100644 index 0000000..aa9d243 --- /dev/null +++ b/tests/fixtures/v2-smoke.yaml @@ -0,0 +1,9 @@ +title: "v2 smoke" +mode: v2 +tasks: + - id: t-1 + title: "Create smoke output file" + status: PENDING + spec: | + Create a file named smoke-output.txt with content "hello from v2 smoke test". + verify: "test -f smoke-output.txt" diff --git a/tests/openrouter_smoke.rs b/tests/openrouter_smoke.rs new file mode 100644 index 0000000..3c97f27 --- /dev/null +++ b/tests/openrouter_smoke.rs @@ -0,0 +1,59 @@ +/// Live smoke test for OpenRouterRuntime. +/// +/// Gated behind OPENROUTER_API_KEY — skips silently when the key is absent. +/// Run with: +/// OPENROUTER_API_KEY=sk-... cargo test --test openrouter_smoke -- --nocapture +use std::time::Duration; + +use boi::runtime::{openrouter::OpenRouterRuntime, PhaseRuntime}; + +#[test] +fn smoke_openrouter_live() { + let key = match std::env::var("OPENROUTER_API_KEY") { + Ok(k) if !k.is_empty() => k, + _ => { + eprintln!("SKIP: OPENROUTER_API_KEY not set — skipping live smoke test"); + return; + } + }; + + let runtime = OpenRouterRuntime::new(); + + let prompt = "Reply with exactly one word: hello"; + let model = "gemini-flash"; + let timeout = Duration::from_secs(30); + + let start = std::time::Instant::now(); + let out = runtime + .execute(prompt, model, timeout) + .expect("OpenRouter call failed — check OPENROUTER_API_KEY and network"); + let elapsed_ms = start.elapsed().as_millis(); + + // Core assertions + assert!(!out.text.is_empty(), "response text must not be empty"); + assert!( + out.input_tokens.unwrap_or(0) > 0, + "input_tokens must be non-zero; got {:?}", + out.input_tokens + ); + assert!( + out.output_tokens.unwrap_or(0) > 0, + "output_tokens must be non-zero; got {:?}", + out.output_tokens + ); + assert!(out.duration_ms > 0, "duration_ms must be non-zero"); + + println!("=== OpenRouter smoke result ==="); + println!("model: {model}"); + println!("prompt: {prompt:?}"); + println!("response: {:?}", out.text); + println!("input_tokens: {:?}", out.input_tokens); + println!("output_tokens: {:?}", out.output_tokens); + println!("cost_usd: {:?}", out.cost_usd); + println!("duration_ms: {}", out.duration_ms); + println!("wall_ms: {elapsed_ms}"); + println!( + "api_key_prefix: {}...", + &key[..key.len().min(8)] + ); +} diff --git a/tests/v2_smoke.rs b/tests/v2_smoke.rs new file mode 100644 index 0000000..d0e9d69 --- /dev/null +++ b/tests/v2_smoke.rs @@ -0,0 +1,176 @@ +use std::path::PathBuf; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Mutex; + +static COUNTER: AtomicU64 = AtomicU64::new(0); +static LOCK: Mutex<()> = Mutex::new(()); + +fn unique_id() -> u64 { + COUNTER.fetch_add(1, Ordering::SeqCst) +} + +fn test_dir(label: &str) -> PathBuf { + let n = unique_id(); + let dir = std::env::temp_dir().join(format!( + "boi-v2smoke-{}-{}-{}", + label, + std::process::id(), + n + )); + let _ = std::fs::remove_dir_all(&dir); + std::fs::create_dir_all(&dir).expect("failed to create test dir"); + dir +} + +fn test_file(label: &str, ext: &str) -> PathBuf { + let n = unique_id(); + std::env::temp_dir().join(format!( + "boi-v2smoke-{}-{}-{}.{}", + label, + std::process::id(), + n, + ext + )) +} + +fn test_git_repo(label: &str) -> PathBuf { + use std::process::Command; + let dir = test_dir(label); + Command::new("git").args(["init"]).current_dir(&dir).output().expect("git init"); + Command::new("git") + .args(["config", "user.email", "test@boi.test"]) + .current_dir(&dir) + .output() + .expect("git config email"); + Command::new("git") + .args(["config", "user.name", "BOI Test"]) + .current_dir(&dir) + .output() + .expect("git config name"); + std::fs::write(dir.join("README.md"), "test").expect("write README"); + Command::new("git").args(["add", "."]).current_dir(&dir).output().expect("git add"); + Command::new("git") + .args(["commit", "-m", "init"]) + .current_dir(&dir) + .output() + .expect("git commit"); + dir +} + +fn write_mock_claude(home: &PathBuf) -> PathBuf { + use std::os::unix::fs::PermissionsExt; + let path = test_file("mock-claude", "sh"); + // Creates the target file and outputs all approval signals used across v2 phases. + std::fs::write( + &path, + r###"#!/bin/sh +touch smoke-output.txt +echo "## Spec Approved" +echo "## Spec Improved" +echo "## Review Approved" +echo "## Docs Updated" +echo "## Critic Approved" +exit 0 +"###, + ) + .expect("write mock claude"); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)) + .expect("chmod mock claude"); + let _ = home; // unused; parameter kept for clarity + path +} + +#[test] +fn v2_smoke() { + let _guard = LOCK.lock().unwrap(); + + // Isolated HOME so worktrees land in a temp dir, not ~/.boi/worktrees/ + let fake_home = test_dir("home"); + std::env::set_var("HOME", fake_home.to_str().unwrap()); + + // Point to the repo's pipelines.toml so mode.v2 is available. + let pipelines_path = format!("{}/phases/pipelines.toml", env!("CARGO_MANIFEST_DIR")); + std::env::set_var("BOI_PIPELINES_FILE", &pipelines_path); + + let repo = test_git_repo("repo"); + let mock_bin = write_mock_claude(&fake_home); + let db_path = test_file("queue", "db"); + + // Parse spec and inject workspace dynamically. + let spec_yaml = format!("{}/tests/fixtures/v2-smoke.yaml", env!("CARGO_MANIFEST_DIR")); + let content = std::fs::read_to_string(&spec_yaml).expect("read v2-smoke.yaml"); + let mut spec = boi::spec::parse(&content).expect("parse v2-smoke.yaml"); + spec.workspace = Some(repo.to_str().unwrap().to_string()); + + let queue = boi::queue::Queue::open(db_path.to_str().unwrap()).expect("open queue"); + let spec_id = queue.enqueue(&spec, Some(&spec_yaml)).expect("enqueue"); + + let telemetry = boi::telemetry::Telemetry::new(db_path.clone()); + let runner = boi::runner::ClaudePhaseRunner::new( + telemetry.clone(), + mock_bin.to_str().unwrap().to_string(), + ) + .with_repo_path(repo.to_str().unwrap()); + + let registry = boi::phases::PhaseRegistry::new(); + let hook_cfg = boi::hooks::HookConfig::default(); + let config = boi::worker::WorkerConfig { + task_timeout_secs: 10, + ..Default::default() + }; + + boi::worker::run_worker_with_phases( + &spec_id, + &spec_yaml, + db_path.to_str().unwrap(), + &hook_cfg, + &config, + ®istry, + &runner, + &telemetry, + ) + .expect("pipeline should complete without error"); + + // --- Phase assertions --- + let summaries = queue.phase_cost_summary(&spec_id).expect("phase_cost_summary"); + let phases: std::collections::HashSet = + summaries.iter().map(|s| s.phase.clone()).collect(); + + assert!(phases.contains("spec-critique"), "spec-critique should have run; got: {:?}", phases); + assert!(phases.contains("execute"), "execute should have run; got: {:?}", phases); + assert!(phases.contains("review"), "review should have run; got: {:?}", phases); + assert!(phases.contains("commit"), "commit should have run; got: {:?}", phases); + assert!(phases.contains("critic"), "critic should have run; got: {:?}", phases); + assert!(phases.contains("merge"), "merge should have run; got: {:?}", phases); + assert!(phases.contains("cleanup"), "cleanup should have run; got: {:?}", phases); + + // --- commit ran: git log should contain a BOI commit --- + let git_log = std::process::Command::new("git") + .args(["log", "--oneline"]) + .current_dir(&repo) + .output() + .expect("git log"); + let log_str = String::from_utf8_lossy(&git_log.stdout); + assert!( + log_str.contains("boi("), + "expected BOI commit in git log, got: {}", + log_str + ); + + // --- cleanup ran: worktree directory should be gone --- + let worktree_dir = fake_home.join(".boi").join("worktrees").join(&spec_id); + assert!( + !worktree_dir.exists(), + "worktree should be gone after cleanup, but {} still exists", + worktree_dir.display() + ); + + // --- file exists in target branch after merge --- + assert!( + repo.join("smoke-output.txt").exists(), + "smoke-output.txt should exist in repo after merge" + ); + + // Restore env + std::env::remove_var("BOI_PIPELINES_FILE"); +}