diff --git a/.github/workflows/on-merge.yml b/.github/workflows/on-merge.yml index 9244b4d..a081932 100644 --- a/.github/workflows/on-merge.yml +++ b/.github/workflows/on-merge.yml @@ -130,6 +130,49 @@ jobs: run: | sudo apt-get update sudo apt-get install -yq openssl libssl-dev musl-tools perl make + # `usearch` (pulled in transitively via cersei-agent -> cersei-tools -> + # cersei-embeddings) compiles C++ through `cxx`, so the musl target needs a + # musl-targeting C++ compiler. `musl-tools` only ships `musl-gcc` (C), not + # g++, so we install a full GNU musl-cross toolchain and point cc-rs + the + # Rust linker at it. GNU (not zig/clang) deliberately: `link-cplusplus` + # links GNU libstdc++, which this toolchain bundles statically — the ABI + # match is what lets the fully-static musl binary link the C++ objects. + # + # Sourced from the cross-tools/musl-cross GitHub release (version-pinned, + # checksum-verified) rather than musl.cc, which is unreachable from + # GitHub's Azure-hosted runners; GitHub release assets always are. Kept in + # sync with release-prebuild.yml.stub (which dist injects into release.yml). + - name: Install MUSL C++ cross toolchain + if: ${{ contains(join(matrix.targets, ','), '-linux-musl') }} + shell: bash + run: | + set -euo pipefail + ver=20260515 + file=x86_64-unknown-linux-musl.tar.xz + base="https://github.com/cross-tools/musl-cross/releases/download/${ver}" + for attempt in 1 2 3; do + if curl -fSL --retry 3 --connect-timeout 30 -o "/tmp/${file}" "${base}/${file}" \ + && curl -fSL --retry 3 --connect-timeout 30 -o "/tmp/${file}.sha256" "${base}/${file}.sha256"; then + break + fi + echo "download attempt $attempt failed; retrying in 10s" >&2 + sleep 10 + done + expected=$(awk '{print $1}' "/tmp/${file}.sha256") + actual=$(sha256sum "/tmp/${file}" | awk '{print $1}') + if [ "$expected" != "$actual" ]; then + echo "checksum mismatch for ${file}: got ${actual}, expected ${expected}" >&2 + exit 1 + fi + sudo tar -xJf "/tmp/${file}" -C /opt + bindir=/opt/x86_64-unknown-linux-musl/bin + echo "$bindir" >> "$GITHUB_PATH" + { + echo "CC_x86_64_unknown_linux_musl=$bindir/x86_64-unknown-linux-musl-gcc" + echo "CXX_x86_64_unknown_linux_musl=$bindir/x86_64-unknown-linux-musl-g++" + echo "AR_x86_64_unknown_linux_musl=$bindir/x86_64-unknown-linux-musl-ar" + echo "CARGO_TARGET_X86_64_UNKNOWN_LINUX_MUSL_LINKER=$bindir/x86_64-unknown-linux-musl-gcc" + } >> "$GITHUB_ENV" - name: Install dist run: ${{ matrix.install_dist.run }} - name: Install dependencies diff --git a/.github/workflows/release-prebuild.yml.stub b/.github/workflows/release-prebuild.yml.stub index 4ea67f2..b8396ca 100644 --- a/.github/workflows/release-prebuild.yml.stub +++ b/.github/workflows/release-prebuild.yml.stub @@ -5,3 +5,45 @@ run: | sudo apt-get update sudo apt-get install -yq openssl libssl-dev musl-tools perl make +# `usearch` (pulled in transitively via cersei-agent -> cersei-tools -> +# cersei-embeddings) compiles C++ through `cxx`, so the musl target needs a +# musl-targeting C++ compiler. `musl-tools` only ships `musl-gcc` (C), not g++, +# so we install a full GNU musl-cross toolchain and point cc-rs + the Rust +# linker at it. GNU (not zig/clang) deliberately: `link-cplusplus` links GNU +# libstdc++, which this toolchain bundles statically — the ABI match is what +# lets the fully-static musl binary link the C++ objects. +# +# Sourced from the cross-tools/musl-cross GitHub release (version-pinned, +# checksum-verified) rather than musl.cc, which is unreachable from GitHub's +# Azure-hosted runners; GitHub release assets always are. +- name: "Install MUSL C++ cross toolchain" + if: ${{ contains(join(matrix.targets, ','), '-linux-musl') }} + shell: bash + run: | + set -euo pipefail + ver=20260515 + file=x86_64-unknown-linux-musl.tar.xz + base="https://github.com/cross-tools/musl-cross/releases/download/${ver}" + for attempt in 1 2 3; do + if curl -fSL --retry 3 --connect-timeout 30 -o "/tmp/${file}" "${base}/${file}" \ + && curl -fSL --retry 3 --connect-timeout 30 -o "/tmp/${file}.sha256" "${base}/${file}.sha256"; then + break + fi + echo "download attempt $attempt failed; retrying in 10s" >&2 + sleep 10 + done + expected=$(awk '{print $1}' "/tmp/${file}.sha256") + actual=$(sha256sum "/tmp/${file}" | awk '{print $1}') + if [ "$expected" != "$actual" ]; then + echo "checksum mismatch for ${file}: got ${actual}, expected ${expected}" >&2 + exit 1 + fi + sudo tar -xJf "/tmp/${file}" -C /opt + bindir=/opt/x86_64-unknown-linux-musl/bin + echo "$bindir" >> "$GITHUB_PATH" + { + echo "CC_x86_64_unknown_linux_musl=$bindir/x86_64-unknown-linux-musl-gcc" + echo "CXX_x86_64_unknown_linux_musl=$bindir/x86_64-unknown-linux-musl-g++" + echo "AR_x86_64_unknown_linux_musl=$bindir/x86_64-unknown-linux-musl-ar" + echo "CARGO_TARGET_X86_64_UNKNOWN_LINUX_MUSL_LINKER=$bindir/x86_64-unknown-linux-musl-gcc" + } >> "$GITHUB_ENV" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e593cf6..2d5583a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -136,6 +136,37 @@ jobs: run: | sudo apt-get update sudo apt-get install -yq openssl libssl-dev musl-tools perl make + - name: "Install MUSL C++ cross toolchain" + if: "${{ contains(join(matrix.targets, ','), '-linux-musl') }}" + run: | + set -euo pipefail + ver=20260515 + file=x86_64-unknown-linux-musl.tar.xz + base="https://github.com/cross-tools/musl-cross/releases/download/${ver}" + for attempt in 1 2 3; do + if curl -fSL --retry 3 --connect-timeout 30 -o "/tmp/${file}" "${base}/${file}" \ + && curl -fSL --retry 3 --connect-timeout 30 -o "/tmp/${file}.sha256" "${base}/${file}.sha256"; then + break + fi + echo "download attempt $attempt failed; retrying in 10s" >&2 + sleep 10 + done + expected=$(awk '{print $1}' "/tmp/${file}.sha256") + actual=$(sha256sum "/tmp/${file}" | awk '{print $1}') + if [ "$expected" != "$actual" ]; then + echo "checksum mismatch for ${file}: got ${actual}, expected ${expected}" >&2 + exit 1 + fi + sudo tar -xJf "/tmp/${file}" -C /opt + bindir=/opt/x86_64-unknown-linux-musl/bin + echo "$bindir" >> "$GITHUB_PATH" + { + echo "CC_x86_64_unknown_linux_musl=$bindir/x86_64-unknown-linux-musl-gcc" + echo "CXX_x86_64_unknown_linux_musl=$bindir/x86_64-unknown-linux-musl-g++" + echo "AR_x86_64_unknown_linux_musl=$bindir/x86_64-unknown-linux-musl-ar" + echo "CARGO_TARGET_X86_64_UNKNOWN_LINUX_MUSL_LINKER=$bindir/x86_64-unknown-linux-musl-gcc" + } >> "$GITHUB_ENV" + shell: "bash" - name: Install dist run: ${{ matrix.install_dist.run }} # Get the dist-manifest diff --git a/CHECKS.md b/CHECKS.md index 41c7f20..746eb0c 100644 --- a/CHECKS.md +++ b/CHECKS.md @@ -2,31 +2,47 @@ # Requirement Provider-Agnostic Execution -The agent executor must be swappable so we can evolve from shelling out to `claude -p` toward a Claude Code SDK (or another provider) without rewriting the execution phase. This extensibility is the whole point of the configuration/executor seam. +The agent executor is swappable behind a trait so execution can run over different engines (the in-process `cersei-agent` executor, the legacy `claude -p` fallback, or a test fake) without rewriting the execution phase. This extensibility is the whole point of the configuration/executor seam. ## Check Boxed Executor Trait -Inspect the Rust sources for the `multi check` feature. Confirm that agent execution is defined behind a trait (for example `CheckExecutor`) that is consumed as a boxed, dynamically-dispatched trait object — a type alias of the form `Box`, mirroring the existing `BoxedIngress`, `BoxedMonitor`, and `BoxedPlatform` aliases. The check passes only if the execution phase depends on this trait rather than naming a concrete `claude -p` executor type. Report a failure if the execution path references a concrete executor struct directly instead of the trait object. +Inspect the Rust sources for the `multi check` feature. Confirm that agent execution is defined behind a trait (for example `CheckExecutor`) that is consumed as a boxed or `Arc`-wrapped, dynamically-dispatched trait object — for example a type alias of the form `Box`, mirroring the existing `BoxedIngress`, `BoxedMonitor`, and `BoxedPlatform` aliases. The check passes only if the execution phase depends on this trait rather than naming a concrete executor type. Report a failure if the execution path references a concrete executor struct (such as the cersei or claude executor) directly instead of the trait object. + +# Requirement In-Process Agent Execution + +The default executor runs each check's agent **inside the CLI process** via the `cersei-agent` library — there is no longer any requirement to shell out to the `claude` CLI for a check to run. A `claude -p` executor is retained only as an optional, explicitly-selected migration fallback. + +## Check Default Executor Runs In-Process + +Inspect the executor implementations and how the default executor is selected from configuration. Confirm that the default execution engine builds and runs a `cersei_agent::Agent` in-process (calling its `run`/`run_stream` method) rather than spawning the `claude` CLI. Confirm that any use of `std::process::Command` / `tokio::process` to launch `claude -p` lives only in the non-default fallback executor, gated behind an explicit `executor` selection. The check fails if running a check with the default configuration requires spawning an external `claude` process. # Requirement Trustworthy In-Process Reporting -Check verdicts must arrive through the single `report-check-result` MCP tool, served from inside the CLI process. This guardrail is what makes results reliable despite agent nondeterminism; running the server out-of-process, or trusting agent stdout, would defeat it. +Check verdicts must arrive through a single in-process **judge tool**, `report-check-result`, registered on the agent — not from agent stdout, a sentinel file, or a process exit code. Capturing the verdict in-process is what makes results reliable despite agent nondeterminism. + +## Check Verdict Captured Via Judge Tool + +Inspect how a check's verdict is reported and captured. Confirm there is an in-process tool named `report-check-result` (implementing the cersei `Tool` trait) that is registered fresh on each check's agent and writes the verdict into an in-process sink the executor reads after the run. Confirm the verdict is **not** carried over a network MCP server or an external transport: there should be no in-process HTTP/`rmcp` server standing up per-check endpoints for reporting. The check fails if verdict reporting relies on an out-of-process server or a network endpoint. -## Check Server Runs In-Process +## Check Verdict From Tool Call Not Stdout -Inspect how the result-reporting MCP server is started. Confirm it is built with the `rmcp` framework and run on a Tokio task within the CLI process — not spawned as a child process. Search for any use of `std::process::Command` or `tokio::process` that would launch the server externally; if the MCP server runs as a subprocess, the check fails. It passes only if the server runs on a task in the same process as the CLI. +Confirm that a check's pass/fail verdict is derived from the `success` boolean reported through the `report-check-result` judge tool, and never from the agent's stdout, stderr, or process exit code. Verify that a check whose agent finishes without ever calling the tool (for example by hitting the turn limit) is treated as a failure or error rather than silently passing. The check fails if the verdict is obtained by parsing stdout or by reading an exit status. -## Check Verdict From Tool Call +# Requirement Least-Privilege Agent Permissions -Confirm that a check's pass/fail verdict is derived from the `success` boolean of the `report-check-result` tool call, and never from the agent's stdout, stderr, or process exit code. Verify that a check whose agent exits without ever calling the tool is treated as a failure or error rather than silently passing. The check fails if the verdict is obtained by parsing stdout or by reading the agent's exit status. +A verification agent should observe, not mutate. By default each check's agent gets a read-only tool set and a permission policy that denies anything beyond read-only, so a check cannot alter files even within its sandbox. -# Requirement Stateful MCP Sessions +## Check Read-Only Tools By Default -The Claude Code MCP client connects to the result-reporting server over Streamable HTTP and expects the standard *stateful* session flow: it sends `initialize`, receives an `Mcp-Session-Id`, and issues subsequent requests under that session. A stateless server stalls this multi-step handshake, so the in-process MCP server must run in stateful mode. +Inspect how the in-process executor configures its agent's tools and permission policy. Confirm that, by default, the agent is given a read-only tool set (such as file-read, grep, and glob) plus the reporting/judge tool, and a read-only permission policy (for example `AllowReadOnly`) — not a full read-write-execute tool set with an allow-all policy. The check fails if the default agent is granted write or shell-execution tools, or an allow-everything permission policy. -## Check Server Is Stateful +# Requirement Isolated Agent Sessions -Inspect how the `rmcp` result-reporting MCP server is configured (its `StreamableHttpServerConfig`). Confirm the server runs in stateful Streamable HTTP mode — that is, `stateful_mode` is true and is not set to `false`. The check passes only if the server is configured for stateful sessions; it fails if `stateful_mode` is set to `false` (stateless mode). +Checks run concurrently and the agent's shell tools persist per-session state in a process-global registry. Each check must therefore use a distinct session identifier so parallel agents cannot clobber one another's shell state. + +## Check Distinct Session Per Check + +Inspect how the in-process executor assigns a session identifier to each check's agent. Confirm that each check is given a unique `session_id` (for example derived from the check's id) rather than a shared constant, and that the per-session shell state is cleared on teardown. The check fails if all checks share one session id, or if per-check shell state is never cleared. # Requirement Checks Cannot Corrupt the Workspace @@ -64,6 +80,14 @@ Confirm that discovery-time validation produces `miette` diagnostics for both ma Confirm that the command exits with status code 0 when every requirement is satisfied (including the trivial case of an empty suite), and with status code 1 when one or more requirements are unsatisfied. The check fails if a run containing an unsatisfied requirement can exit 0, or if an all-satisfied run exits with a non-zero code. -# Requirement No Hidden Configuration +# Requirement Layered Configuration + +The model, provider, effort, and executor are resolved from three sources with standard CLI precedence — flag, then environment, then config file (flag wins) — while credentials are read only from each provider's native environment variable, never from the config file. + +## Check Precedence And Validation + +Inspect the configuration phase of `multi check`. Confirm that provider/model/effort/executor are merged from a config file, `MULTI_`-prefixed environment variables, and CLI flags, with flags overriding environment overriding file. Confirm the selected model is validated against a hardcoded allowlist of known IDs for the provider (an unknown ID is a clear error). The check fails if any of these values cannot be set from configuration, or if the merge precedence is not flag > env > file. + +## Check Credentials Are Environment-Only -For the MVP, the model, model-provider URL, and effort level are hardcoded and injected into the pipeline. Inspect the configuration phase of `multi check` and confirm these values are hardcoded, and that the discovery and execution phases do not read them from environment variables or from a configuration file. The requirement is satisfied only if configuration is hardcoded for the MVP; it is not satisfied if any model, provider, or effort value is sourced from the environment or a configuration file. +Confirm that provider API keys are read directly from each provider's native environment variable (for example `ANTHROPIC_API_KEY`) and are never loaded from the config file or from a `MULTI_`-prefixed variable. The check fails if a credential can be supplied through the config file or the `MULTI_` namespace. diff --git a/Cargo.lock b/Cargo.lock index 9476b98..4338c75 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -191,7 +191,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -663,54 +663,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "axum" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90" -dependencies = [ - "axum-core", - "bytes", - "futures-util", - "http 1.3.1", - "http-body 1.0.1", - "http-body-util", - "hyper 1.7.0", - "hyper-util", - "itoa", - "matchit", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "serde_core", - "serde_json", - "serde_path_to_error", - "sync_wrapper 1.0.2", - "tokio", - "tower", - "tower-layer", - "tower-service", -] - -[[package]] -name = "axum-core" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" -dependencies = [ - "bytes", - "futures-core", - "http 1.3.1", - "http-body 1.0.1", - "http-body-util", - "mime", - "pin-project-lite", - "sync_wrapper 1.0.2", - "tower-layer", - "tower-service", -] - [[package]] name = "backtrace" version = "0.3.76" @@ -778,16 +730,16 @@ version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.13.0", "cexpr", "clang-sys", - "itertools", + "itertools 0.13.0", "log", "prettyplease", "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 2.1.1", "shlex", "syn 2.0.109", ] @@ -800,9 +752,18 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.10.0" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" + +[[package]] +name = "bitpacking" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +checksum = "96a7139abd3d9cebf8cd6f920a389cf3dc9576172e32f4563f188cae3c3eb019" +dependencies = [ + "crunchy", +] [[package]] name = "blake2" @@ -838,7 +799,7 @@ version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77e9d642a7e3a318e37c2c9427b5a6a48aa1ad55dcd986f3034ab2239045a645" dependencies = [ - "darling 0.21.3", + "darling", "ident_case", "prettyplease", "proc-macro2", @@ -948,10 +909,136 @@ dependencies = [ ] [[package]] -name = "cersei-provider" +name = "census" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" + +[[package]] +name = "cersei-agent" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a08fc0818d95193cd88b0c48614d0c4e365575c24dd661c77bfd1e7bed2aeb24" +dependencies = [ + "anyhow", + "async-trait", + "cersei-compression", + "cersei-hooks", + "cersei-mcp", + "cersei-memory", + "cersei-provider", + "cersei-tools", + "cersei-types", + "chrono", + "futures", + "parking_lot", + "serde", + "serde_json", + "tempfile", + "tokio", + "tokio-stream", + "tokio-util", + "tracing", + "uuid", +] + +[[package]] +name = "cersei-compression" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebaa0636dc53483e65754b84c4c3e21f925eb993bb744fcf5e0a5f5af78dff2" +dependencies = [ + "anyhow", + "once_cell", + "regex", + "serde", + "serde_json", + "toml", + "tracing", +] + +[[package]] +name = "cersei-embeddings" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10381161084bfe0e8667956569fb84c27bbae77ce7464418be7d494d778d83ea" +dependencies = [ + "async-trait", + "futures", + "reqwest 0.12.24", + "serde", + "serde_json", + "thiserror 2.0.17", + "tokio", + "tracing", + "usearch", +] + +[[package]] +name = "cersei-hooks" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7ba452fa659817bd9d156bfb76012db1e7b4742166a3fd306542df285889f0a" +dependencies = [ + "async-trait", + "cersei-types", + "serde", + "serde_json", + "tracing", +] + +[[package]] +name = "cersei-lsp" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "242d5870423770ca850cb26e2af874bf84d2fac0f370ebc398fd9a82fa33c468" +dependencies = [ + "dashmap", + "serde", + "serde_json", + "thiserror 2.0.17", + "tokio", + "tracing", + "which", +] + +[[package]] +name = "cersei-mcp" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82a89c0952ebfdb2001cc915150e9a65fc1400caabd34b8c3d27bd94d8d39fdc" +dependencies = [ + "async-trait", + "cersei-types", + "serde", + "serde_json", + "tokio", + "tracing", + "uuid", +] + +[[package]] +name = "cersei-memory" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac34f8f70b62c3e8cd7a7b2bf2bdfc015127a8fde80bc3d103e2ac078bb0e488" +checksum = "df45f5f8e75c59ee65f063c961be9278eb051e0b1494b4b3ea6de53c066953bc" +dependencies = [ + "async-trait", + "cersei-types", + "chrono", + "dirs", + "parking_lot", + "serde", + "serde_json", + "tempfile", + "tokio", + "tracing", + "uuid", +] + +[[package]] +name = "cersei-provider" +version = "0.1.9" dependencies = [ "async-trait", "base64 0.22.1", @@ -967,6 +1054,48 @@ dependencies = [ "url", ] +[[package]] +name = "cersei-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b47981f73e04abfca64959ac2a82908c08f3027f03ba080e90de8042a438720" +dependencies = [ + "async-trait", + "base64 0.22.1", + "cersei-embeddings", + "cersei-lsp", + "cersei-mcp", + "cersei-types", + "chrono", + "dashmap", + "dirs", + "glob", + "html2text", + "nix 0.29.0", + "notify", + "once_cell", + "parking_lot", + "regex", + "reqwest 0.12.24", + "schemars 0.8.22", + "serde", + "serde_json", + "similar", + "tantivy", + "tempfile", + "tokio", + "tracing", + "tree-sitter", + "tree-sitter-bash", + "tree-sitter-go", + "tree-sitter-python", + "tree-sitter-rust", + "tree-sitter-typescript", + "uuid", + "walkdir", + "which", +] + [[package]] name = "cersei-types" version = "0.1.9" @@ -1127,6 +1256,17 @@ dependencies = [ "cc", ] +[[package]] +name = "codespan-reporting" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681" +dependencies = [ + "serde", + "termcolor", + "unicode-width 0.2.2", +] + [[package]] name = "colorchoice" version = "1.0.4" @@ -1143,9 +1283,9 @@ dependencies = [ "entities", "finl_unicode", "jetscii", - "phf", - "phf_codegen", - "rustc-hash", + "phf 0.13.1", + "phf_codegen 0.13.1", + "rustc-hash 2.1.1", "smallvec", "typed-arena", ] @@ -1259,6 +1399,12 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.6" @@ -1270,54 +1416,93 @@ dependencies = [ ] [[package]] -name = "daemonize" -version = "0.5.0" +name = "cxx" +version = "1.0.194" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab8bfdaacb3c887a54d41bdf48d3af8873b3f5566469f8ba21b92057509f116e" +checksum = "747d8437319e3a2f43d93b341c137927ca70c0f5dabeea7a005a73665e247c7e" dependencies = [ - "libc", + "cc", + "cxx-build", + "cxxbridge-cmd", + "cxxbridge-flags", + "cxxbridge-macro", + "foldhash 0.2.0", + "link-cplusplus", ] [[package]] -name = "darling" -version = "0.21.3" +name = "cxx-build" +version = "1.0.194" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +checksum = "b0f4697d190a142477b16aef7da8a99bfdc41e7e8b1687583c0d23a79c7afc1e" dependencies = [ - "darling_core 0.21.3", - "darling_macro 0.21.3", + "cc", + "codespan-reporting", + "indexmap 2.12.0", + "proc-macro2", + "quote", + "scratch", + "syn 2.0.109", ] [[package]] -name = "darling" -version = "0.23.0" +name = "cxxbridge-cmd" +version = "1.0.194" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" +checksum = "d0956799fa8678d4c50eed028f2de1c0552ae183c76e976cf7ca8c4e36a7c328" dependencies = [ - "darling_core 0.23.0", - "darling_macro 0.23.0", + "clap 4.5.51", + "codespan-reporting", + "indexmap 2.12.0", + "proc-macro2", + "quote", + "syn 2.0.109", ] [[package]] -name = "darling_core" -version = "0.21.3" +name = "cxxbridge-flags" +version = "1.0.194" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" +checksum = "23384a836ab4f0ad98ace7e3955ad2de39de42378ab487dc28d3990392cb283a" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.194" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6acc6b5822b9526adfb4fc377b67128fdd60aac757cc4a741a6278603f763cf" dependencies = [ - "fnv", - "ident_case", + "indexmap 2.12.0", "proc-macro2", "quote", - "strsim 0.11.1", "syn 2.0.109", ] +[[package]] +name = "daemonize" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab8bfdaacb3c887a54d41bdf48d3af8873b3f5566469f8ba21b92057509f116e" +dependencies = [ + "libc", +] + +[[package]] +name = "darling" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +dependencies = [ + "darling_core", + "darling_macro", +] + [[package]] name = "darling_core" -version = "0.23.0" +version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" +checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" dependencies = [ + "fnv", "ident_case", "proc-macro2", "quote", @@ -1331,20 +1516,23 @@ version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ - "darling_core 0.21.3", + "darling_core", "quote", "syn 2.0.109", ] [[package]] -name = "darling_macro" -version = "0.23.0" +name = "dashmap" +version = "6.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" +checksum = "e6361d5c062261c78a176addb82d4c821ae42bed6089de0e12603cd25de2059c" dependencies = [ - "darling_core 0.23.0", - "quote", - "syn 2.0.109", + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", ] [[package]] @@ -1414,7 +1602,28 @@ version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16f5094c54661b38d03bd7e50df373292118db60b585c08a411c6d840017fe7d" dependencies = [ - "dirs-sys", + "dirs-sys 0.5.0", +] + +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys 0.4.1", +] + +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.4.6", + "windows-sys 0.48.0", ] [[package]] @@ -1425,7 +1634,7 @@ checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" dependencies = [ "libc", "option-ext", - "redox_users", + "redox_users 0.5.2", "windows-sys 0.61.2", ] @@ -1446,6 +1655,12 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" +[[package]] +name = "downcast-rs" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" + [[package]] name = "dunce" version = "1.0.5" @@ -1485,6 +1700,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5320ae4c3782150d900b79807611a59a99fc9a1d61d686faafc24b93fc8d7ca" +[[package]] +name = "env_home" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe" + [[package]] name = "equivalent" version = "1.0.2" @@ -1524,6 +1745,12 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "fastdivide" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" + [[package]] name = "fastrand" version = "2.3.0" @@ -1545,6 +1772,16 @@ dependencies = [ "version_check", ] +[[package]] +name = "filetime" +version = "0.2.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c287a33c7f0a620c38e641e7f60827713987b3c0f26e8ddc9462cc69cf75759" +dependencies = [ + "cfg-if", + "libc", +] + [[package]] name = "find-msvc-tools" version = "0.1.4" @@ -1574,6 +1811,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "foldhash" version = "0.2.0" @@ -1610,12 +1853,41 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28dd6caf6059519a65843af8fe2a3ae298b14b80179855aeb4adc2c1934ee619" +[[package]] +name = "fs4" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8" +dependencies = [ + "rustix 0.38.44", + "windows-sys 0.52.0", +] + [[package]] name = "fs_extra" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +[[package]] +name = "fsevent-sys" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2" +dependencies = [ + "libc", +] + +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.31" @@ -1829,6 +2101,23 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.1.5", +] + [[package]] name = "hashbrown" version = "0.16.0" @@ -1837,7 +2126,7 @@ checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" dependencies = [ "allocator-api2", "equivalent", - "foldhash", + "foldhash 0.2.0", ] [[package]] @@ -1861,6 +2150,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -1887,6 +2182,39 @@ dependencies = [ "winapi", ] +[[package]] +name = "html2text" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c66ee488a63a92237d5b48875b7e05bb293be8fb2894641c8118b60c08ab5ef" +dependencies = [ + "html5ever", + "markup5ever", + "tendril", + "thiserror 1.0.69", + "unicode-width 0.1.14", +] + +[[package]] +name = "html5ever" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn 2.0.109", +] + +[[package]] +name = "htmlescape" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" + [[package]] name = "http" version = "0.2.12" @@ -1993,7 +2321,6 @@ dependencies = [ "http 1.3.1", "http-body 1.0.1", "httparse", - "httpdate", "itoa", "pin-project-lite", "pin-utils", @@ -2255,6 +2582,38 @@ version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" +[[package]] +name = "inotify" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd168d97690d0b8c412d6b6c10360277f4d7ee495c5d0d5d5fe0854923255cc" +dependencies = [ + "bitflags 1.3.2", + "inotify-sys", + "libc", +] + +[[package]] +name = "inotify-sys" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" +dependencies = [ + "libc", +] + +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -2283,6 +2642,15 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -2324,12 +2692,38 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "kqueue" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "273c0752728918e0ac4976f2b275b6fefb9ecd400585dec929419f3844cd87b5" +dependencies = [ + "kqueue-sys", + "libc", +] + +[[package]] +name = "kqueue-sys" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07293a4e297ac234359b510362495713f75ea345d5307140414f20c69ffeb087" +dependencies = [ + "bitflags 2.13.0", + "libc", +] + [[package]] name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "levenshtein_automata" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" + [[package]] name = "libc" version = "0.2.186" @@ -2358,7 +2752,7 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.13.0", "libc", ] @@ -2372,12 +2766,27 @@ dependencies = [ "libc", ] +[[package]] +name = "link-cplusplus" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f78c730aaa7d0b9336a299029ea49f9ee53b0ed06e9202e8cb7db9bae7b8c82" +dependencies = [ + "cc", +] + [[package]] name = "linked-hash-map" version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -2405,6 +2814,15 @@ version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.5", +] + [[package]] name = "lru" version = "0.16.2" @@ -2420,6 +2838,32 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "lz4_flex" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a" + +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" +dependencies = [ + "log", + "phf 0.11.3", + "phf_codegen 0.11.3", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "match_cfg" version = "0.1.0" @@ -2436,10 +2880,14 @@ dependencies = [ ] [[package]] -name = "matchit" -version = "0.8.4" +name = "measure_time" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" +checksum = "dbefd235b0aadd181626f281e1d684e116972988c14c264e42069d5e8a5775cc" +dependencies = [ + "instant", + "log", +] [[package]] name = "memchr" @@ -2447,6 +2895,15 @@ version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "memmap2" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1219ed1b7f229ee7104d281dd01d6802fe28bb6e95d292942c4daacdeb798c0" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.6.5" @@ -2525,6 +2982,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873" dependencies = [ "libc", + "log", "wasi", "windows-sys 0.61.2", ] @@ -2584,11 +3042,13 @@ dependencies = [ "aws-sdk-lambda", "aws-smithy-runtime-api", "aws-smithy-types", - "axum", "base64 0.22.1", "bigdecimal", "bon", + "cersei-agent", "cersei-provider", + "cersei-tools", + "cersei-types", "chrono", "clap 4.5.51", "comrak", @@ -2612,8 +3072,6 @@ dependencies = [ "pretty_assertions", "rand 0.9.2", "reqwest 0.12.24", - "rmcp", - "schemars 1.1.0", "serde", "serde_json", "serde_json5", @@ -2647,6 +3105,12 @@ dependencies = [ "uuid", ] +[[package]] +name = "murmurhash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" + [[package]] name = "native-tls" version = "0.2.14" @@ -2664,6 +3128,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "nix" version = "0.24.3" @@ -2676,6 +3146,18 @@ dependencies = [ "memoffset", ] +[[package]] +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags 2.13.0", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "nom" version = "7.1.3" @@ -2686,6 +3168,34 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "notify" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c533b4c39709f9ba5005d8002048266593c1cfaf3c5f0739d5b8ab0c6c504009" +dependencies = [ + "bitflags 2.13.0", + "filetime", + "fsevent-sys", + "inotify", + "kqueue", + "libc", + "log", + "mio", + "notify-types", + "walkdir", + "windows-sys 0.52.0", +] + +[[package]] +name = "notify-types" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "585d3cb5e12e01aed9e8a1f70d5c6b5e86fe2a6e48fc8cd0b3e0b8df6f6eb174" +dependencies = [ + "instant", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -2727,6 +3237,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi 0.5.2", + "libc", ] [[package]] @@ -2749,9 +3270,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "once_cell_polyfill" @@ -2759,13 +3280,19 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "oneshot" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107" + [[package]] name = "openssl" version = "0.10.74" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24ad14dd45412269e1a30f52ad8f0664f0f4f4a89ee8fe28c3b3527021ebb654" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.13.0", "cfg-if", "foreign-types", "libc", @@ -2831,6 +3358,15 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" +[[package]] +name = "ownedbytes" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3a059efb063b8f425b948e042e6b9bd85edfe60e913630ed727b23e2dfcc558" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "owo-colors" version = "4.2.3" @@ -2866,12 +3402,6 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" -[[package]] -name = "pastey" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ee67f1008b1ba2321834326597b8e186293b049a023cdef258527550b9935b4" - [[package]] name = "pear" version = "0.2.9" @@ -2944,24 +3474,53 @@ dependencies = [ "sha2", ] +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_shared 0.11.3", +] + [[package]] name = "phf" version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" dependencies = [ - "phf_shared", + "phf_shared 0.13.1", "serde", ] +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", +] + [[package]] name = "phf_codegen" version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.13.1", + "phf_shared 0.13.1", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared 0.11.3", + "rand 0.8.5", ] [[package]] @@ -2971,7 +3530,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" dependencies = [ "fastrand", - "phf_shared", + "phf_shared 0.13.1", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", ] [[package]] @@ -3045,7 +3613,7 @@ dependencies = [ "httpdate", "indexmap 1.9.3", "log", - "lru", + "lru 0.16.2", "once_cell", "parking_lot", "pingora-core", @@ -3084,8 +3652,8 @@ dependencies = [ "httpdate", "libc", "log", - "lru", - "nix", + "lru 0.16.2", + "nix 0.24.3", "once_cell", "openssl-probe", "parking_lot", @@ -3209,7 +3777,7 @@ checksum = "7d10a7b0e4115c8098e454d77a96d8ac3d89cc9a95910a6ef10f2656160d463a" dependencies = [ "crossbeam-queue", "log", - "lru", + "lru 0.16.2", "parking_lot", "pingora-timeout", "thread_local", @@ -3295,6 +3863,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "predicates" version = "3.1.3" @@ -3419,7 +3993,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash", + "rustc-hash 2.1.1", "rustls 0.23.35", "socket2 0.6.1", "thiserror 2.0.17", @@ -3439,7 +4013,7 @@ dependencies = [ "lru-slab", "rand 0.9.2", "ring", - "rustc-hash", + "rustc-hash 2.1.1", "rustls 0.23.35", "rustls-pki-types", "slab", @@ -3560,13 +4134,54 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.13.0", +] + +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom 0.2.16", + "libredox", + "thiserror 1.0.69", ] [[package]] @@ -3737,64 +4352,21 @@ dependencies = [ "nom", "pin-project-lite", "reqwest 0.12.24", - "thiserror 1.0.69", -] - -[[package]] -name = "ring" -version = "0.17.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" -dependencies = [ - "cc", - "cfg-if", - "getrandom 0.2.16", - "libc", - "untrusted", - "windows-sys 0.52.0", -] - -[[package]] -name = "rmcp" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1f571c72940a19d9532fe52dbea8bc9912bf1d766c2970bb824056b86f3f59" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures", - "http 1.3.1", - "http-body 1.0.1", - "http-body-util", - "pastey", - "pin-project-lite", - "rand 0.10.1", - "rmcp-macros", - "schemars 1.1.0", - "serde", - "serde_json", - "sse-stream", - "thiserror 2.0.17", - "tokio", - "tokio-stream", - "tokio-util", - "tower-service", - "tracing", - "uuid", + "thiserror 1.0.69", ] [[package]] -name = "rmcp-macros" -version = "1.8.0" +name = "ring" +version = "0.17.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aad0035b69380782d78ea95b508327e6deaa2235909053e596eea8f27b5e1d5" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ - "darling 0.23.0", - "proc-macro2", - "quote", - "serde_json", - "syn 2.0.109", + "cc", + "cfg-if", + "getrandom 0.2.16", + "libc", + "untrusted", + "windows-sys 0.52.0", ] [[package]] @@ -3819,12 +4391,28 @@ dependencies = [ "serde", ] +[[package]] +name = "rust-stemmers" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] + [[package]] name = "rustc-demangle" version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc-hash" version = "2.1.1" @@ -3840,16 +4428,29 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags 2.13.0", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + [[package]] name = "rustix" version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.13.0", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.11.0", "windows-sys 0.61.2", ] @@ -4002,6 +4603,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "schemars" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +dependencies = [ + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + [[package]] name = "schemars" version = "0.9.0" @@ -4020,19 +4633,17 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9558e172d4e8533736ba97870c4b2cd63f84b382a3d6eb063da41b91cce17289" dependencies = [ - "chrono", "dyn-clone", "ref-cast", - "schemars_derive", "serde", "serde_json", ] [[package]] name = "schemars_derive" -version = "1.1.0" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301858a4023d78debd2353c7426dc486001bddc91ae31a76fb1f55132f7e2633" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" dependencies = [ "proc-macro2", "quote", @@ -4046,6 +4657,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scratch" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d68f2ec51b097e4c1a75b681a8bec621909b5e91f15bb7b840c4f2f7b01148b2" + [[package]] name = "sct" version = "0.7.1" @@ -4062,7 +4679,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.13.0", "core-foundation 0.9.4", "core-foundation-sys", "libc", @@ -4075,7 +4692,7 @@ version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.13.0", "core-foundation 0.10.1", "core-foundation-sys", "libc", @@ -4225,6 +4842,7 @@ version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ + "indexmap 2.12.0", "itoa", "memchr", "ryu", @@ -4243,17 +4861,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_path_to_error" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" -dependencies = [ - "itoa", - "serde", - "serde_core", -] - [[package]] name = "serde_repr" version = "0.1.20" @@ -4311,7 +4918,7 @@ version = "3.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b91a903660542fced4e99881aa481bdbaec1634568ee02e0b8bd57c64cb38955" dependencies = [ - "darling 0.21.3", + "darling", "proc-macro2", "quote", "syn 2.0.109", @@ -4387,12 +4994,27 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + [[package]] name = "siphasher" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ee5873ec9cce0195efcb7a4e9507a04cd49aec9c83d0389df45b1ef7ba2e649" +[[package]] +name = "sketches-ddsketch" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c" +dependencies = [ + "serde", +] + [[package]] name = "slab" version = "0.4.11" @@ -4425,19 +5047,6 @@ dependencies = [ "windows-sys 0.60.2", ] -[[package]] -name = "sse-stream" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3962b63f038885f15bce2c6e02c0e7925c072f1ac86bb60fd44c5c6b762fb72" -dependencies = [ - "bytes", - "futures-util", - "http-body 1.0.1", - "http-body-util", - "pin-project-lite", -] - [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -4450,6 +5059,37 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "string_cache" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared 0.11.3", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", + "proc-macro2", + "quote", +] + [[package]] name = "strsim" version = "0.10.0" @@ -4573,7 +5213,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.13.0", "core-foundation 0.9.4", "system-configuration-sys 0.6.0", ] @@ -4598,6 +5238,147 @@ dependencies = [ "libc", ] +[[package]] +name = "tantivy" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96599ea6fccd844fc833fed21d2eecac2e6a7c1afd9e044057391d78b1feb141" +dependencies = [ + "aho-corasick", + "arc-swap", + "base64 0.22.1", + "bitpacking", + "byteorder", + "census", + "crc32fast", + "crossbeam-channel", + "downcast-rs", + "fastdivide", + "fnv", + "fs4", + "htmlescape", + "itertools 0.12.1", + "levenshtein_automata", + "log", + "lru 0.12.5", + "lz4_flex", + "measure_time", + "memmap2", + "num_cpus", + "once_cell", + "oneshot", + "rayon", + "regex", + "rust-stemmers", + "rustc-hash 1.1.0", + "serde", + "serde_json", + "sketches-ddsketch", + "smallvec", + "tantivy-bitpacker", + "tantivy-columnar", + "tantivy-common", + "tantivy-fst", + "tantivy-query-grammar", + "tantivy-stacker", + "tantivy-tokenizer-api", + "tempfile", + "thiserror 1.0.69", + "time", + "uuid", + "winapi", +] + +[[package]] +name = "tantivy-bitpacker" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "284899c2325d6832203ac6ff5891b297fc5239c3dc754c5bc1977855b23c10df" +dependencies = [ + "bitpacking", +] + +[[package]] +name = "tantivy-columnar" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12722224ffbe346c7fec3275c699e508fd0d4710e629e933d5736ec524a1f44e" +dependencies = [ + "downcast-rs", + "fastdivide", + "itertools 0.12.1", + "serde", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-sstable", + "tantivy-stacker", +] + +[[package]] +name = "tantivy-common" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8019e3cabcfd20a1380b491e13ff42f57bb38bf97c3d5fa5c07e50816e0621f4" +dependencies = [ + "async-trait", + "byteorder", + "ownedbytes", + "serde", + "time", +] + +[[package]] +name = "tantivy-fst" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" +dependencies = [ + "byteorder", + "regex-syntax", + "utf8-ranges", +] + +[[package]] +name = "tantivy-query-grammar" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "847434d4af57b32e309f4ab1b4f1707a6c566656264caa427ff4285c4d9d0b82" +dependencies = [ + "nom", +] + +[[package]] +name = "tantivy-sstable" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c69578242e8e9fc989119f522ba5b49a38ac20f576fc778035b96cc94f41f98e" +dependencies = [ + "tantivy-bitpacker", + "tantivy-common", + "tantivy-fst", + "zstd", +] + +[[package]] +name = "tantivy-stacker" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56d6ff5591fc332739b3ce7035b57995a3ce29a93ffd6012660e0949c956ea8" +dependencies = [ + "murmurhash32", + "rand_distr", + "tantivy-common", +] + +[[package]] +name = "tantivy-tokenizer-api" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0dcade25819a89cfe6f17d932c9cedff11989936bf6dd4f336d50392053b04" +dependencies = [ + "serde", +] + [[package]] name = "tempfile" version = "3.23.0" @@ -4607,10 +5388,21 @@ dependencies = [ "fastrand", "getrandom 0.3.4", "once_cell", - "rustix", + "rustix 1.1.2", "windows-sys 0.61.2", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -4626,7 +5418,7 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0" dependencies = [ - "rustix", + "rustix 1.1.2", "windows-sys 0.60.2", ] @@ -4882,6 +5674,7 @@ dependencies = [ "bytes", "futures-core", "futures-sink", + "futures-util", "pin-project-lite", "tokio", ] @@ -4949,7 +5742,7 @@ version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.13.0", "bytes", "futures-util", "http 1.3.1", @@ -5078,6 +5871,76 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "tree-sitter" +version = "0.25.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78f873475d258561b06f1c595d93308a7ed124d9977cb26b148c2084a4a3cc87" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-bash" +version = "0.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "329a4d48623ac337d42b1df84e81a1c9dbb2946907c102ca72db158c1964a52e" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-go" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b13d476345220dbe600147dd444165c5791bf85ef53e28acbedd46112ee18431" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" + +[[package]] +name = "tree-sitter-python" +version = "0.23.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca8ccb3e3a3495c8a943f6c3fd24c3804c471fd7f4f16087623c7fa4c0068e8a" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "try-lock" version = "0.2.5" @@ -5192,6 +6055,28 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "usearch" +version = "2.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c08f764417012cf6aea6d1380ef9ea8712c5795a938b726fc67b9bf7ea8824b" +dependencies = [ + "cxx", + "cxx-build", +] + +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + +[[package]] +name = "utf8-ranges" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -5380,6 +6265,18 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "which" +version = "7.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d643ce3fd3e5b54854602a080f34fb10ab75e0b813ee32d00ca2b44fa74762" +dependencies = [ + "either", + "env_home", + "rustix 1.1.2", + "winsafe", +] + [[package]] name = "winapi" version = "0.3.9" @@ -5755,6 +6652,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "winsafe" +version = "0.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" + [[package]] name = "wit-bindgen" version = "0.46.0" diff --git a/Cargo.toml b/Cargo.toml index def680f..33fd2fa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,15 @@ path = "src/bin/main.rs" [workspace] members = ["crates/multi-core"] +# MultiTool local patch (MULTI-1367). cersei-provider 0.1.9 unconditionally sends +# a stale `anthropic-beta: interleaved-thinking-2025-04-14` header that the +# current Anthropic API rejects with HTTP 400, breaking every in-process check. +# Vendor the crate with that header corrected until upstream releases a fix. +# See third_party/cersei-provider/PATCH.md and +# https://github.com/pacifio/cersei/issues/20. +[patch.crates-io] +cersei-provider = { path = "third_party/cersei-provider" } + [workspace.package] authors = ["The MultiTool Team"] edition = "2024" @@ -80,10 +89,10 @@ base64.workspace = true tokio-util.workspace = true comrak = { version = "0.52", default-features = false } libc = "0.2.186" -rmcp = { version = "1.8", default-features = false, features = ["server", "macros", "schemars", "transport-streamable-http-server", "transport-streamable-http-server-session", "tower"] } -axum = { version = "0.8", default-features = false, features = ["http1", "tokio", "json"] } -schemars = "1" tempfile = "3" +cersei-agent = "0.1.9" +cersei-tools = "0.1.9" +cersei-types = "0.1.9" [workspace.dependencies] pretty_assertions = "1.4" @@ -97,13 +106,6 @@ miette = { version = "7", features = ["fancy"] } tokio-stream = { version = "0.1", features = ["time"] } async-stream = "0.3.6" thiserror = "2.0" -reqwest = { version = "0.12", default-features = false, features = [ - "rustls-tls", - "charset", - "http2", - "macos-system-configuration", -] } -openssl = { version = "0.10", features = ["vendored"] } [dev-dependencies] # The `test` feature enables `figment::Jail` for hermetic env/file config tests. diff --git a/guides/checks.md b/guides/checks.md index ab7ce14..114a34d 100644 --- a/guides/checks.md +++ b/guides/checks.md @@ -7,14 +7,19 @@ requirements** that have no direct, programmatic unit to test (e.g. "no serif fonts", "no images over 5 MB", "every public function is documented"). Each requirement is validated by one or more **checks**. In the MVP a check is a -`prompt`: a natural-language instruction that a Claude Code agent carries out to -decide whether the requirement is satisfied. A requirement is satisfied only if -**all** of its checks pass (logical AND). +`prompt`: a natural-language instruction that an AI agent carries out to decide +whether the requirement is satisfied. The agent runs **in-process** (no external +CLI), explores the sandbox with read-only tools, and reports its verdict. A +requirement is satisfied only if **all** of its checks pass (logical AND). ## ✅ Prerequisites -- [ ] A working [`claude`](https://docs.claude.com/en/docs/claude-code) CLI on - your `PATH` (checks shell out to `claude -p`). +- [ ] An API key for your chosen provider in the environment (e.g. + `ANTHROPIC_API_KEY`) — see [Configuration](#️-configuration). The default + in-process executor talks to the provider directly; **no `claude` CLI is + required**. (The optional `claude -p` fallback — `--executor claude` — does + need the [`claude`](https://docs.claude.com/en/docs/claude-code) CLI on your + `PATH`.) - [ ] **macOS** — the MVP sandboxes each check with an APFS copy-on-write clone. Other operating systems are not yet supported. @@ -144,19 +149,23 @@ to a clear plain-text form when disabled. Agents are nondeterministic, so `multi check` does **not** trust their stdout or any sentinel file. Instead, each agent reports its verdict by calling a single -MCP tool, `report-check-result(success, evidence?)`, served by an in-process MCP -server the CLI runs on `localhost` (one dedicated endpoint per check). An agent -that finishes **without** calling the tool fails its check. This keeps results +in-process **judge tool**, `report-check-result(success, evidence?)`, registered +fresh on that check's agent and closing over its own result sink. An agent that +finishes **without** calling the tool fails its check. This keeps results trustworthy despite agent nondeterminism. +Agents run with a **least-privilege, read-only** tool set by default (Read, Grep, +Glob, plus the judge tool) — a verification agent observes, it does not mutate. + ## ⚙️ Configuration -The default **provider**, **model**, and **effort** are resolved from three -sources, in order of precedence (highest wins): +The default **provider**, **model**, **effort**, and **executor** are resolved +from three sources, in order of precedence (highest wins): -1. **Flags** — `--provider`, `--model`, `--effort` on `multi check`. +1. **Flags** — `--provider`, `--model`, `--effort`, `--executor` on `multi check`. 2. **Environment** — `MULTI_`-prefixed vars mapped into the `checks` namespace, - e.g. `MULTI_CHECKS_MODEL`, `MULTI_CHECKS_PROVIDER`, `MULTI_CHECKS_EFFORT`. + e.g. `MULTI_CHECKS_MODEL`, `MULTI_CHECKS_PROVIDER`, `MULTI_CHECKS_EFFORT`, + `MULTI_CHECKS_EXECUTOR`. 3. **Config file** — the `[checks]` table of `MultiTool.toml` (or `.json` / `.jsonc`), discovered up the directory tree like any MultiTool manifest. @@ -164,7 +173,8 @@ sources, in order of precedence (highest wins): [checks] provider = "anthropic" # anthropic | openai | gemini model = "claude-sonnet-4-6" # must be a known model ID for the provider -effort = "low" # low | medium | high +effort = "low" # low | medium | high → thinking-token budget +executor = "cersei" # cersei (in-process, default) | claude (fallback) # optional, non-secret base-URL overrides per provider [checks.providers.anthropic] @@ -173,7 +183,16 @@ base_url = "https://..." An unset flag contributes nothing — it never overrides a value from the environment or file. The `model` is validated against a hardcoded allowlist of -known IDs for the selected provider; an unknown ID is a clear error. +known IDs for the selected provider; an unknown ID is a clear error. `effort` +currently maps to the in-process agent's sampling temperature (`low` → most +deterministic, `high` → most exploratory); mapping it to an extended-thinking +budget is pending an upstream provider fix. + +The **`executor`** selects the execution engine. The default `cersei` runs each +check as an in-process agent (native multi-provider model swapping, no external +CLI). `claude` is the legacy `claude -p` shell-out fallback, kept selectable for +migration while the in-process path is validated; it requires the `claude` CLI on +your `PATH` and will be removed once cersei is proven out. **Credentials are environment-only.** API keys are read directly from each provider's native variable and never live in the config file or under the @@ -192,9 +211,12 @@ whose key is missing is an error. - **macOS only** — copy-on-write sandboxing uses APFS `clonefile`. Linux and Windows support is planned. -- **`prompt`-type checks only** — checks run via `claude -p` against the +- **`prompt`-type checks only** — checks run an in-process agent against the configured model (the `sonnet` family by default). A `shell` check type is planned. +- **Read-only agents** — checks observe the sandbox with read-only tools and + cannot execute code. Per-check execution capability (for checks that must run + the project to verify behavior) is planned. ## 📬 Need help? diff --git a/src/checks/config/mod.rs b/src/checks/config/mod.rs index 064870c..083c7a7 100644 --- a/src/checks/config/mod.rs +++ b/src/checks/config/mod.rs @@ -1,17 +1,16 @@ -//! The configuration phase (M2 #1341, global config #1359). +//! The configuration phase (M2 #1341, global config #1359, executor wiring #1367). //! -//! Resolves the global default **provider**, **model**, and **effort** from -//! three sources with standard CLI precedence — `flag > env var > config file` — -//! merged with [`figment`], and constructs a registry of ready-to-use model -//! providers for a future executor to consume. +//! Resolves the global default **provider**, **model**, **effort**, and +//! **executor** from three sources with standard CLI precedence — +//! `flag > env var > config file` — merged with [`figment`], constructs a +//! registry of ready-to-use model providers, and builds the selected +//! [`BoxedExecutor`] from a per-provider [`ProviderFactory`]. //! -//! The resolved [`Config`] is still **dependency-injected** forward: execution -//! receives a [`BoxedExecutor`] built from it rather than reading provider -//! details at point of use. -//! -//! The execution path is out of scope here: this phase *constructs and hands -//! off* providers (see [`Resolved::providers`]); a follow-up wires them into a -//! real executor. The MVP [`ClaudeExecutor`] still runs the checks. +//! The resolved [`Config`] is **dependency-injected** forward: execution +//! receives a [`BoxedExecutor`] (see [`Resolved::build_executor`]) rather than +//! reading provider details at point of use. The default executor is the +//! in-process [`CerseiExecutor`]; the legacy [`ClaudeExecutor`] remains +//! selectable as a migration fallback. mod file; mod models; @@ -27,10 +26,11 @@ use figment::{ use miette::{Result, miette}; use crate::checks::executor::BoxedExecutor; +use crate::checks::executor::cersei::CerseiExecutor; use crate::checks::executor::claude::ClaudeExecutor; -pub use providers::ProviderRegistry; -pub use schema::{CliOverrides, Effort, ProviderKind}; +pub use providers::{ProviderFactory, ProviderRegistry}; +pub use schema::{CliOverrides, Effort, ExecutorKind, ProviderKind}; /// Maximum number of checks executed concurrently. A small fan-out gives each /// (CPU-heavy) reasoning agent enough cores to finish promptly. @@ -46,29 +46,33 @@ const DEFAULT_MAX_ATTEMPTS: usize = 3; pub struct Config { /// The selected provider. pub provider: ProviderKind, - /// The selected provider's optional base-URL override, if configured. Passed - /// to the MVP executor as `ANTHROPIC_BASE_URL`; `None` uses the default. + /// The selected provider's optional base-URL override, if configured. Used + /// by the `claude -p` fallback as `ANTHROPIC_BASE_URL` (the in-process + /// executor applies it via the provider factory); `None` uses the default. pub provider_url: Option, /// The concrete model ID to run (validated against the hardcoded allowlist). pub model: String, /// The effort level. pub effort: Effort, + /// Which execution engine runs each check (default: in-process cersei). + pub executor: ExecutorKind, /// Maximum number of checks executed concurrently. pub concurrency: usize, /// Per-agent wall-clock timeout (reaps an agent that hangs before reporting). pub agent_timeout: Duration, /// How many times to (re)run a check whose agent fails to report. Agents are - /// nondeterministic and occasionally hang or finish without calling the - /// report tool; a fresh attempt against the same endpoint usually succeeds. - /// A check only resolves as errored after all attempts are exhausted. + /// nondeterministic and occasionally hit the turn cap or finish without + /// calling the judge tool; a fresh attempt usually succeeds. A check only + /// resolves as errored after all attempts are exhausted. pub max_attempts: usize, } impl Config { - /// Construct the concrete [`BoxedExecutor`] from this configuration. This is - /// the injection point: execution is handed the boxed executor, never a - /// concrete type or a global. - pub fn build_executor(&self) -> BoxedExecutor { + /// Construct the legacy `claude -p` fallback executor from this + /// configuration. The in-process cersei executor needs the resolved provider + /// factory and so is built from [`Resolved`]; this builder only covers the + /// fallback, which needs nothing beyond [`Config`]. + pub fn build_claude_executor(&self) -> BoxedExecutor { Box::new(ClaudeExecutor::new( self.model.clone(), self.provider_url.clone(), @@ -101,11 +105,32 @@ fn resolve_layers( } /// The product of the configuration phase: the resolved [`Config`] the pipeline -/// consumes, plus the constructed [`ProviderRegistry`] handed off for a future -/// executor. +/// consumes, the constructed [`ProviderRegistry`] (one live handle per available +/// provider), and the [`ProviderFactory`] for the *selected* provider that the +/// in-process executor uses to mint a fresh handle per check. pub struct Resolved { pub config: Config, pub providers: ProviderRegistry, + pub factory: ProviderFactory, +} + +impl Resolved { + /// Construct the selected [`BoxedExecutor`]. This is the injection point and + /// the migration lever: `cersei` (default) runs the in-process agent; + /// `claude` runs the legacy `claude -p` fallback over the same checks. + pub fn build_executor(&self) -> Result { + let cfg = &self.config; + let executor: BoxedExecutor = match cfg.executor { + ExecutorKind::Cersei => Box::new(CerseiExecutor::new( + self.factory.clone(), + cfg.model.clone(), + cfg.effort, + cfg.agent_timeout, + )), + ExecutorKind::Claude => cfg.build_claude_executor(), + }; + Ok(executor) + } } /// The configuration phase: resolve provider/model/effort from file + env + @@ -122,6 +147,7 @@ pub fn load(overrides: CliOverrides) -> Result { .model .unwrap_or_else(|| models::default_model(provider).to_string()); let effort = checks.effort.unwrap_or(Effort::Low); + let executor = checks.executor.unwrap_or(ExecutorKind::Cersei); if !models::is_valid_model(provider, &model) { return Err(miette!( @@ -142,6 +168,11 @@ pub fn load(overrides: CliOverrides) -> Result { )); } + // The factory for the selected provider mints a fresh handle per check; its + // credential is guaranteed present by the availability check above. + let factory = providers::build_factory(provider, &checks.providers) + .expect("selected provider availability was just verified"); + let provider_url = checks.providers.base_url(provider).map(ToOwned::to_owned); let config = Config { @@ -149,6 +180,7 @@ pub fn load(overrides: CliOverrides) -> Result { provider_url, model, effort, + executor, concurrency: DEFAULT_CONCURRENCY, agent_timeout: DEFAULT_AGENT_TIMEOUT, max_attempts: DEFAULT_MAX_ATTEMPTS, @@ -157,6 +189,7 @@ pub fn load(overrides: CliOverrides) -> Result { Ok(Resolved { config, providers: registry, + factory, }) } @@ -173,6 +206,7 @@ pub fn configuration() -> Config { // sonnet reasons efficiently and reports in well under a minute.) model: models::default_model(provider).to_string(), effort: Effort::Low, + executor: ExecutorKind::Cersei, concurrency: DEFAULT_CONCURRENCY, agent_timeout: DEFAULT_AGENT_TIMEOUT, max_attempts: DEFAULT_MAX_ATTEMPTS, @@ -195,6 +229,7 @@ mod tests { provider: Some(provider), model: Some(model.to_string()), effort: Some(Effort::Low), + executor: None, providers: ProvidersSection::default(), }, } @@ -205,17 +240,22 @@ mod tests { let cfg = configuration(); assert_eq!(cfg.provider, ProviderKind::Anthropic); assert_eq!(cfg.model, "claude-sonnet-4-6"); + assert_eq!(cfg.executor, ExecutorKind::Cersei); assert!(cfg.concurrency >= 1); - // The executor is constructible (DI seam works). - let _exec = cfg.build_executor(); + // The fallback executor is constructible from config alone (DI seam works). + let _exec = cfg.build_claude_executor(); } #[test] fn flag_beats_file() { Jail::expect_with(|_jail| { let file = file_with(ProviderKind::Anthropic, "claude-haiku-4-5"); - let overrides = - CliOverrides::new(Some(ProviderKind::OpenAi), Some("gpt-4o".into()), None); + let overrides = CliOverrides::new( + Some(ProviderKind::OpenAi), + Some("gpt-4o".into()), + None, + None, + ); let checks = resolve_layers(file, overrides).unwrap(); assert_eq!(checks.provider, Some(ProviderKind::OpenAi)); assert_eq!(checks.model.as_deref(), Some("gpt-4o")); @@ -248,7 +288,7 @@ mod tests { assert_eq!(checks.model.as_deref(), Some("claude-haiku-4-5")); // ...and a flag outranks env. - let overrides = CliOverrides::new(None, Some("claude-opus-4-8".into()), None); + let overrides = CliOverrides::new(None, Some("claude-opus-4-8".into()), None, None); let checks = resolve_layers(file, overrides).unwrap(); assert_eq!(checks.model.as_deref(), Some("claude-opus-4-8")); Ok(()) diff --git a/src/checks/config/providers.rs b/src/checks/config/providers.rs index 75ea421..a4e8742 100644 --- a/src/checks/config/providers.rs +++ b/src/checks/config/providers.rs @@ -104,6 +104,53 @@ fn build_one( Ok(provider) } +/// A per-provider handle factory: the resolved provider kind + credential + +/// base URL needed to mint a **fresh** [`Box`] on demand. +/// +/// cersei's `Agent` takes an *owned* `Box` and checks run +/// concurrently (plus retries), so a single pre-built handle cannot be shared +/// across agents. The configuration phase resolves credentials once and hands +/// the executor this factory, which builds one handle per check run. +#[derive(Clone)] +pub struct ProviderFactory { + kind: ProviderKind, + key: String, + base_url: Option, +} + +impl std::fmt::Debug for ProviderFactory { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // Never print the credential. + f.debug_struct("ProviderFactory") + .field("kind", &self.kind) + .field("base_url", &self.base_url) + .finish_non_exhaustive() + } +} + +impl ProviderFactory { + /// Mint a fresh provider handle. + pub fn build(&self) -> Result> { + build_one(self.kind, self.key.clone(), self.base_url.clone()) + } +} + +/// Build a [`ProviderFactory`] for `provider` if its credential is present, +/// resolving the same base-URL precedence as the registry. Returns `None` when +/// the provider has no credential (and therefore cannot be selected). +pub fn build_factory( + provider: ProviderKind, + overrides: &ProvidersSection, +) -> Option { + let key = credential(provider)?; + let base_url = resolve_base_url(provider, overrides); + Some(ProviderFactory { + kind: provider, + key, + base_url, + }) +} + /// Build the registry: one handle per provider whose credential is present. pub fn build_registry(overrides: &ProvidersSection) -> Result { let mut registry = ProviderRegistry::new(); diff --git a/src/checks/config/schema.rs b/src/checks/config/schema.rs index 8540fe1..16e81b0 100644 --- a/src/checks/config/schema.rs +++ b/src/checks/config/schema.rs @@ -32,7 +32,7 @@ impl ProviderKind { } /// The agent effort level. Carried through configuration and consumed by the -/// executor. `Medium`/`High` are reserved for richer providers. +/// executor, where it maps to a thinking-token budget. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ValueEnum)] #[serde(rename_all = "lowercase")] pub enum Effort { @@ -41,6 +41,20 @@ pub enum Effort { High, } +/// Which execution engine runs each check. The default is the in-process +/// [`cersei`](crate::checks::executor::cersei) agent; `claude` selects the +/// legacy `claude -p` shell-out fallback, kept selectable during the migration +/// (MULTI-1367) so verdicts from both can be compared before the fallback is +/// retired. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ValueEnum)] +#[serde(rename_all = "lowercase")] +pub enum ExecutorKind { + /// The in-process `cersei-agent` executor (default). + Cersei, + /// The legacy `claude -p` shell-out fallback. + Claude, +} + /// The whole config file, of which only the `[checks]` table concerns us. Other /// top-level keys (the legacy manifest's `workspace`/`application`/`config`) are /// ignored rather than rejected, so a single `MultiTool.toml` can carry both. @@ -60,6 +74,9 @@ pub struct ChecksSection { pub model: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub effort: Option, + /// Which execution engine runs each check (`cersei` by default). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub executor: Option, /// Optional, non-secret per-provider base-URL overrides. #[serde(default)] pub providers: ProvidersSection, @@ -115,6 +132,8 @@ pub struct CliChecksOverrides { pub model: Option, #[serde(skip_serializing_if = "Option::is_none")] pub effort: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub executor: Option, } impl CliOverrides { @@ -123,12 +142,14 @@ impl CliOverrides { provider: Option, model: Option, effort: Option, + executor: Option, ) -> Self { Self { checks: CliChecksOverrides { provider, model, effort, + executor, }, } } diff --git a/src/checks/execution.rs b/src/checks/execution.rs index 5ddd615..4a75fa9 100644 --- a/src/checks/execution.rs +++ b/src/checks/execution.rs @@ -1,23 +1,22 @@ -//! The execution phase (M5). +//! The execution phase (M5; in-process rework in MULTI-1367). //! -//! Run every check **in parallel** (bounded), each in its own CoW sandbox and -//! against its own MCP endpoint; then reconcile each check's verdict (the -//! MCP-reported `success` is authoritative) and aggregate checks into -//! per-requirement outcomes via logical AND. +//! Run every check **in parallel** (bounded), each in its own CoW sandbox, via +//! an in-process agent that reports its verdict through a per-check judge tool; +//! then reconcile each check's verdict (the reported `success` is authoritative) +//! and aggregate checks into per-requirement outcomes via logical AND. use std::collections::HashMap; use std::path::Path; use std::sync::Arc; use miette::{IntoDiagnostic, Result}; -use tokio::sync::{Notify, Semaphore}; +use tokio::sync::Semaphore; use tokio::task::JoinSet; use crate::checks::config::Config; use crate::checks::executor::{ - AgentOutcome, AgentRunRequest, BoxedExecutor, CheckExecutor, assemble_instructions, + AgentOutcome, AgentRunRequest, BoxedExecutor, CheckExecutor, CheckReport, }; -use crate::checks::mcp::{CheckReport, ReportStore, ResultServer, mcp_config_json}; use crate::checks::model::{ Check, CheckId, CheckOutcome, Requirement, RequirementOutcome, Verdict, }; @@ -31,8 +30,8 @@ struct PlannedCheck { check: Check, } -/// Convenience wrapper used by the pipeline orchestrator: build the executor and -/// sandbox from configuration (DI) and run [`execute`]. +/// Convenience wrapper used by the pipeline orchestrator: build the sandbox from +/// configuration (DI) and run [`execute`] with the injected executor. pub async fn execution_phase( cfg: &Config, executor: BoxedExecutor, @@ -73,20 +72,14 @@ pub async fn execute( return Ok(aggregate(requirements, HashMap::new())); } - // Stand up the one MCP server with an endpoint per check. - let ids: Vec = planned.iter().map(|p| p.id).collect(); - let server = ResultServer::start(&ids).await?; - - // The most recent agent outcome per check. The MCP-reported verdict is folded - // into `AgentOutcome::reported` by `run_one` (which kills the agent the - // instant it reports). + // The most recent agent outcome per check. The reported verdict lives in + // `AgentOutcome::verdict`. let mut last_outcome: HashMap> = HashMap::new(); - // Re-run any check whose agent fails to report, up to `max_attempts`. Agents - // are nondeterministic and occasionally hang or stop without calling the - // tool; a fresh attempt usually succeeds. The per-check endpoint's - // single-call flag stays unset until a real report arrives, so a retry - // reports to the same endpoint. + // Re-run any check whose agent fails to report a verdict, up to + // `max_attempts`. Agents are nondeterministic and occasionally hit the turn + // cap, error, or time out without reporting; a fresh attempt usually + // succeeds. let mut pending: Vec<&PlannedCheck> = planned.iter().collect(); let attempts = cfg.max_attempts.max(1); for attempt in 1..=attempts { @@ -109,23 +102,11 @@ pub async fn execute( let executor = executor.clone(); let sandbox = sandbox.clone(); let id = p.id; - let instructions = assemble_instructions(&p.check); - let endpoint_url = server.endpoint_url(id); - let (notify, reports) = server.report_handle(id); + let check = p.check.clone(); let working_dir = working_dir.to_path_buf(); set.spawn(async move { let _permit = permit; - let outcome = run_one( - executor, - sandbox, - id, - instructions, - &endpoint_url, - &working_dir, - notify, - reports, - ) - .await; + let outcome = run_one(executor, sandbox, id, check, &working_dir).await; (id, outcome) }); } @@ -137,12 +118,10 @@ pub async fn execute( // Whatever still has no reported verdict is retried in the next round. pending = planned .iter() - .filter(|p| !has_report(last_outcome.get(&p.id))) + .filter(|p| !has_verdict(last_outcome.get(&p.id))) .collect(); } - server.shutdown().await; - // Reconcile each check, then aggregate per requirement. let mut outcomes: HashMap = HashMap::new(); for p in &planned { @@ -153,87 +132,40 @@ pub async fn execute( } /// Whether an agent outcome carries a reported verdict. -fn has_report(outcome: Option<&Result>) -> bool { - matches!(outcome, Some(Ok(o)) if o.reported.is_some()) +fn has_verdict(outcome: Option<&Result>) -> bool { + matches!(outcome, Some(Ok(o)) if o.has_verdict()) } -/// Drive one check: sandbox → mcp-config → dispatch the agent, racing the -/// agent's MCP report against its process. The agent's job is done the instant -/// it reports, so on a report we drop the run future — which kills the agent -/// (`kill_on_drop`) and avoids the post-report cleanup hangs some agents -/// exhibit. If the process exits (or the executor's timeout fires) first, we -/// fold in any report that landed alongside. -#[allow(clippy::too_many_arguments)] +/// Drive one check: create its CoW sandbox, run the agent against it, then tear +/// the sandbox down. The executor owns the agent lifecycle (the in-process +/// executor cancels its agent the instant it reports; the legacy fallback runs +/// the subprocess to completion or timeout). async fn run_one( executor: Arc, sandbox: Arc, id: CheckId, - instructions: String, - endpoint_url: &str, + check: Check, working_dir: &Path, - notify: Arc, - reports: ReportStore, ) -> Result { let handle = sandbox.create(working_dir).await?; - let config_file = write_mcp_config(&mcp_config_json(endpoint_url))?; let request = AgentRunRequest { check_id: id, - instructions, + check, working_dir: handle.path().to_path_buf(), - mcp_config_path: config_file.path().to_path_buf(), }; - let report_for = |reports: &ReportStore| reports.lock().unwrap().get(&id).cloned(); - // `run_check` already returns a boxed (Unpin) future, so `&mut run` is fine. - let mut run = executor.run_check(request); - let outcome = tokio::select! { - // The agent reported: take the verdict. `run` is dropped when this - // function returns (just below), which kills the now-redundant agent. - _ = notify.notified() => { - AgentOutcome { - exited_cleanly: true, - exit_code: None, - stderr: String::new(), - reported: report_for(&reports), - } - } - // The process finished (clean exit, error, or the executor's timeout). - result = &mut run => { - let mut o = result?; - if o.reported.is_none() { - o.reported = report_for(&reports); - } - o - } - }; + let outcome = executor.run_check(request).await; - // Drop the run future first so a still-running agent is killed before we - // tear down its sandbox and mcp-config file. - drop(run); - drop(config_file); + // Drop the sandbox after the run completes (RAII teardown of the clone). drop(handle); - Ok(outcome) -} - -/// Write the `--mcp-config` JSON to a temp file the agent can read. -fn write_mcp_config(json: &str) -> Result { - use std::io::Write; - let mut file = tempfile::Builder::new() - .prefix("multi-mcp-") - .suffix(".json") - .tempfile() - .into_diagnostic()?; - file.write_all(json.as_bytes()).into_diagnostic()?; - file.flush().into_diagnostic()?; - Ok(file) + outcome } -/// Reconcile a single check's verdict from its agent outcome. The report folded -/// into [`AgentOutcome::reported`] (the MCP-reported `success`) is authoritative; -/// its absence is an error. +/// Reconcile a single check's verdict from its agent outcome. The reported +/// verdict ([`AgentOutcome::verdict`]) is authoritative; its absence is an error. fn reconcile(agent: Option<&Result>, title: &str) -> CheckOutcome { - if let Some(report) = inline_report(agent) { + if let Some(report) = reported_verdict(agent) { let verdict = if report.success { Verdict::Satisfied } else { @@ -247,14 +179,14 @@ fn reconcile(agent: Option<&Result>, title: &str) -> CheckOutcome } let reason = match agent { - Some(Ok(o)) if o.exited_cleanly => { - "agent finished without calling report-check-result".to_string() - } - Some(Ok(o)) => format!( - "agent exited without reporting (exit {:?}){}", - o.exit_code, - stderr_suffix(&o.stderr) - ), + Some(Ok(o)) => match &o.error { + Some(err) => format!("agent errored without reporting: {err}"), + None => format!( + "agent finished without reporting a verdict{}{}", + stop_reason_suffix(o.stop_reason.as_deref()), + turns_suffix(o.turns), + ), + }, Some(Err(e)) => format!("execution error: {e}"), None => "no result was collected for this check".to_string(), }; @@ -265,20 +197,25 @@ fn reconcile(agent: Option<&Result>, title: &str) -> CheckOutcome } } -fn inline_report(agent: Option<&Result>) -> Option { +fn reported_verdict(agent: Option<&Result>) -> Option { match agent { - Some(Ok(o)) => o.reported.clone(), + Some(Ok(o)) => o.verdict.clone(), _ => None, } } -fn stderr_suffix(stderr: &str) -> String { - let trimmed = stderr.trim(); - if trimmed.is_empty() { - String::new() +fn stop_reason_suffix(stop_reason: Option<&str>) -> String { + match stop_reason { + Some(r) if !r.is_empty() => format!(" (stop: {r})"), + _ => String::new(), + } +} + +fn turns_suffix(turns: u32) -> String { + if turns > 0 { + format!(" after {turns} turns") } else { - let snippet: String = trimmed.chars().take(200).collect(); - format!(": {snippet}") + String::new() } } diff --git a/src/checks/executor/cersei.rs b/src/checks/executor/cersei.rs new file mode 100644 index 0000000..4895247 --- /dev/null +++ b/src/checks/executor/cersei.rs @@ -0,0 +1,169 @@ +//! The real, in-process [`CheckExecutor`] (MULTI-1367): run each check as a +//! `cersei_agent::Agent` in its CoW sandbox, capturing the verdict through a +//! per-check judge tool — no `claude -p` subprocess, no MCP endpoints. + +use std::time::Duration; + +use async_trait::async_trait; +use cersei_agent::Agent; +use cersei_tools::permissions::AllowReadOnly; +use cersei_tools::{Tool, clear_session_shell_state}; +use cersei_types::CerseiError; +use miette::{Result, miette}; +use tokio_util::sync::CancellationToken; + +use super::judge::{JudgeTool, VerdictSink}; +use super::{ + AgentOutcome, AgentRunRequest, CheckExecutor, assemble_instructions, judge_tool_directive, +}; +use crate::checks::config::Effort; +use crate::checks::config::ProviderFactory; + +/// How many agentic turns a check may take before it is treated as +/// "finished without reporting". Generous: the reasoning checks this feature +/// exists for explore several files before concluding. +const MAX_TURNS: u32 = 30; + +/// Runs each check by driving an in-process cersei agent. Model/provider/effort +/// come from injected configuration (see [`crate::checks::config::Config`]), +/// never hardcoded here. +pub struct CerseiExecutor { + /// Builds a fresh provider handle per check. cersei's `Agent` takes an owned + /// `Box`, and checks run concurrently, so we cannot share one + /// handle — the factory mints one per run from the resolved credentials. + factory: ProviderFactory, + /// The concrete model ID to run (e.g. `claude-sonnet-4-6`). + model: String, + /// The effort level, mapped to a sampling temperature (see + /// [`effort_temperature`]). + effort: Effort, + /// Per-agent wall-clock timeout; on expiry the run is dropped (which stops + /// the in-process agent) and the check resolves as errored. + timeout: Duration, +} + +impl CerseiExecutor { + pub fn new(factory: ProviderFactory, model: String, effort: Effort, timeout: Duration) -> Self { + Self { + factory, + model, + effort, + timeout, + } + } +} + +/// The read-only tool set a verification agent gets by default: observe, do not +/// mutate. Execution-requiring checks (which would need Bash/Write) are gated +/// separately and are future work — the default is least privilege. +fn read_only_tools() -> Vec> { + vec![ + Box::new(cersei_tools::file_read::FileReadTool), + Box::new(cersei_tools::grep_tool::GrepTool), + Box::new(cersei_tools::glob_tool::GlobTool), + ] +} + +/// Map our coarse [`Effort`] onto a sampling temperature. +/// +/// Extended thinking would be the natural effort vehicle, but cersei-provider +/// 0.1.9 cannot round-trip Anthropic *thinking-block signatures*: its SSE parser +/// drops `signature_delta`, so the thinking block it sends back on the second +/// turn carries an empty signature and the API rejects it +/// (`Invalid signature in thinking block`). Until that is fixed upstream +/// (https://github.com/pacifio/cersei/issues/21) we leave thinking disabled and +/// apply effort as temperature instead — lower effort is more deterministic, +/// higher effort more exploratory. +fn effort_temperature(effort: Effort) -> f32 { + match effort { + Effort::Low => 0.0, + Effort::Medium => 0.5, + Effort::High => 1.0, + } +} + +#[async_trait] +impl CheckExecutor for CerseiExecutor { + async fn run_check(&self, req: AgentRunRequest) -> Result { + // Distinct session id per check: cersei's BashTool persists shell cwd/env + // in a process-global registry keyed by session_id, so a shared id would + // let parallel agents clobber each other's shell state. + let session_id = format!("multi-check-{}", req.check_id); + + tracing::debug!( + check_id = req.check_id, + model = %self.model, + effort = ?self.effort, + session_id = %session_id, + "dispatching in-process cersei check", + ); + + let provider = self.factory.build()?; + + // The judge tool and the agent share a cancellation token: a recorded + // verdict cancels the agent so `run` returns the instant the check is + // decided, instead of burning the remaining turn budget. + let sink = VerdictSink::new(); + let cancel = CancellationToken::new(); + let judge = JudgeTool::new(sink.clone(), cancel.clone()); + + let instructions = assemble_instructions(&req.check, &judge_tool_directive()); + + let agent = Agent::builder() + .provider_boxed(provider) + .model(self.model.clone()) + .working_dir(req.working_dir.clone()) + .session_id(session_id.clone()) + // Least privilege: read-only tools + a policy that denies anything + // above ReadOnly (defense in depth if the tool set ever widens). + .permission_policy(AllowReadOnly) + .tools(read_only_tools()) + .tool(judge) + // Thinking is intentionally left disabled (see `effort_temperature`). + .temperature(effort_temperature(self.effort)) + .max_turns(MAX_TURNS) + .cancel_token(cancel.clone()) + .build() + .map_err(|e| miette!("building check agent: {e}"))?; + + let result = tokio::time::timeout(self.timeout, agent.run(&instructions)).await; + + // Clear this session's shell state so a retry (same id) or a later run + // never inherits stale cwd/env from the global registry. + clear_session_shell_state(&session_id); + + // The judge slot is authoritative: if a verdict landed, the run finished + // cleanly regardless of how `run` returned (we cancel it post-report, + // which surfaces as `CerseiError::Cancelled`). + let verdict = sink.verdict(); + + let outcome = match result { + Ok(Ok(output)) => AgentOutcome { + verdict, + stop_reason: Some(format!("{:?}", output.stop_reason)), + turns: output.turns, + error: None, + }, + Ok(Err(err)) => { + let reported = verdict.is_some(); + AgentOutcome { + verdict, + // Our own post-report cancellation is not an error. + stop_reason: matches!(err, CerseiError::Cancelled) + .then(|| "cancelled".to_string()), + turns: 0, + error: (!reported).then(|| err.to_string()), + } + } + Err(_elapsed) => AgentOutcome { + // A verdict may have landed in the instant before the timeout. + verdict, + stop_reason: None, + turns: 0, + error: Some(format!("agent timed out after {:?}", self.timeout)), + }, + }; + + Ok(outcome) + } +} diff --git a/src/checks/executor/claude.rs b/src/checks/executor/claude.rs index 8343640..39fdbfa 100644 --- a/src/checks/executor/claude.rs +++ b/src/checks/executor/claude.rs @@ -1,28 +1,41 @@ -//! The concrete [`CheckExecutor`] for the MVP: shell out to the Claude Code CLI -//! via `claude -p`. The shell specifics live behind the trait so a future Claude -//! Code SDK executor can replace this without touching execution. +//! The legacy shell-out [`CheckExecutor`]: invoke the Claude Code CLI via +//! `claude -p`. Retained only as a **migration fallback** (selectable with +//! `--executor claude`) so its verdicts can be compared against the in-process +//! [`super::cersei::CerseiExecutor`] until cersei is validated, then retired. +//! +//! With the in-process MCP result server removed (MULTI-1367), this path can no +//! longer report through a localhost tool endpoint. Instead the agent is told to +//! write its verdict as JSON to a per-check sentinel file in the sandbox, which +//! the executor reads after the process exits. This is a deliberately simpler, +//! less-trustworthy channel than the in-process judge tool — acceptable for a +//! soon-to-be-removed fallback. +use std::path::Path; use std::time::Duration; use async_trait::async_trait; use miette::{IntoDiagnostic, Result}; use tokio::process::Command; -use super::{AgentOutcome, AgentRunRequest, CheckExecutor}; +use super::judge::CheckReport; +use super::{AgentOutcome, AgentRunRequest, CheckExecutor, assemble_instructions}; use crate::checks::config::Effort; +/// The sandbox-relative filename the fallback agent writes its verdict to. +const REPORT_FILE: &str = ".multitool-check-report.json"; + /// Runs each check by invoking `claude -p` non-interactively. Model/provider/ /// effort come from injected configuration (see [`crate::checks::config::Config`]), /// never hardcoded here. pub struct ClaudeExecutor { - /// The model family to run (e.g. the `sonnet` family for the MVP). + /// The model to run (a concrete ID, e.g. `claude-sonnet-4-6`). model: String, /// Optional model-provider base URL; when set, passed as `ANTHROPIC_BASE_URL`. provider_url: Option, - /// The effort level (logged for now; see TODO in `run_check`). + /// The effort level (logged for diagnostics; `claude -p` has no effort flag). effort: Effort, /// Per-agent wall-clock timeout; on expiry the child is killed and the check - /// resolves as errored (no report). + /// resolves as errored (no verdict). timeout: Duration, /// The CLI program to invoke (`claude`). program: String, @@ -45,28 +58,55 @@ impl ClaudeExecutor { } } +/// The reporting directive for the file-based fallback channel. +fn file_report_directive() -> String { + format!( + "Carry out the check described below. When — and only when — you have reached a conclusion, \ +write your verdict as a single JSON object to the file `{REPORT_FILE}` in your current working \ +directory, with this exact shape:\n\ + {{\"success\": true|false, \"evidence\": \"a short explanation\"}}\n\ +Set `success` to true if the check passes, or false if it fails. Write the file EXACTLY ONCE, then \ +stop. If you finish without writing `{REPORT_FILE}`, the check is treated as a FAILURE.", + ) +} + +/// Read and parse the sentinel verdict file, if the agent wrote one. +fn read_report(working_dir: &Path) -> Option { + #[derive(serde::Deserialize)] + struct Wire { + success: bool, + #[serde(default)] + evidence: Option, + } + let path = working_dir.join(REPORT_FILE); + let contents = std::fs::read_to_string(path).ok()?; + let wire: Wire = serde_json::from_str(&contents).ok()?; + Some(CheckReport { + success: wire.success, + evidence: wire.evidence, + }) +} + #[async_trait] impl CheckExecutor for ClaudeExecutor { async fn run_check(&self, req: AgentRunRequest) -> Result { - // TODO: "effort level" has no clean `claude -p` flag yet; for - // now it is recorded for diagnostics and wired through when a richer - // provider lands. tracing::debug!( check_id = req.check_id, model = %self.model, effort = ?self.effort, - "dispatching claude -p check", + "dispatching claude -p check (fallback)", ); + let instructions = assemble_instructions(&req.check, &file_report_directive()); + let mut cmd = Command::new(&self.program); cmd.arg("-p") - .arg(&req.instructions) + .arg(&instructions) .arg("--model") .arg(&self.model) - .arg("--mcp-config") - .arg(&req.mcp_config_path) // The sandbox is a throwaway CoW clone, so skip interactive - // permission prompts (the agent runs non-interactively). + // permission prompts (the agent runs non-interactively and must be + // able to write its verdict file). .arg("--dangerously-skip-permissions") .current_dir(&req.working_dir) .stdin(std::process::Stdio::null()) @@ -82,22 +122,44 @@ impl CheckExecutor for ClaudeExecutor { match tokio::time::timeout(self.timeout, child.wait_with_output()).await { Ok(result) => { let output = result.into_diagnostic()?; + let verdict = read_report(&req.working_dir); + let error = if verdict.is_none() && !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + Some(format!( + "claude -p exited without a verdict (exit {:?}){}", + output.status.code(), + stderr_suffix(&stderr), + )) + } else { + None + }; Ok(AgentOutcome { - exited_cleanly: output.status.success(), - exit_code: output.status.code(), - stderr: String::from_utf8_lossy(&output.stderr).into_owned(), - reported: None, // authoritative verdict arrives via MCP + verdict, + stop_reason: Some(format!("exit {:?}", output.status.code())), + turns: 0, + error, }) } Err(_elapsed) => { // The wait future is dropped here; `kill_on_drop` reaps the child. Ok(AgentOutcome { - exited_cleanly: false, - exit_code: None, - stderr: format!("agent timed out after {:?}", self.timeout), - reported: None, + // A verdict file may have been written just before the timeout. + verdict: read_report(&req.working_dir), + stop_reason: None, + turns: 0, + error: Some(format!("agent timed out after {:?}", self.timeout)), }) } } } } + +fn stderr_suffix(stderr: &str) -> String { + let trimmed = stderr.trim(); + if trimmed.is_empty() { + String::new() + } else { + let snippet: String = trimmed.chars().take(200).collect(); + format!(": {snippet}") + } +} diff --git a/src/checks/executor/fake.rs b/src/checks/executor/fake.rs index 4f4b658..e701551 100644 --- a/src/checks/executor/fake.rs +++ b/src/checks/executor/fake.rs @@ -1,7 +1,7 @@ -//! A test-only [`CheckExecutor`] double: returns scripted inline verdicts per -//! check id without spawning any process or touching the MCP server, so the +//! A test-only [`CheckExecutor`] double: returns scripted verdicts per check id +//! without spawning a process, building a model, or touching the network, so the //! execution → reconciliation → reporting pipeline can be driven -//! deterministically. (Tests & docs, MULTI-1354) +//! deterministically. (Tests & docs, MULTI-1354; updated for MULTI-1367.) use std::collections::{HashMap, HashSet}; use std::sync::Mutex; @@ -9,14 +9,14 @@ use std::sync::Mutex; use async_trait::async_trait; use miette::Result; +use super::judge::CheckReport; use super::{AgentOutcome, AgentRunRequest, CheckExecutor}; -use crate::checks::mcp::CheckReport; use crate::checks::model::CheckId; #[derive(Default)] pub struct FakeExecutor { scripted: HashMap, - /// Check ids that should simulate an agent crashing without reporting. + /// Check ids that should simulate an agent finishing without reporting. silent: HashSet, seen: Mutex>, } @@ -38,7 +38,7 @@ impl FakeExecutor { self } - /// Make `id` simulate an agent that crashes/exits without reporting. + /// Make `id` simulate an agent that finishes without reporting a verdict. pub fn with_silent(mut self, id: CheckId) -> Self { self.silent.insert(id); self @@ -56,17 +56,17 @@ impl CheckExecutor for FakeExecutor { self.seen.lock().unwrap().push(req.check_id); if self.silent.contains(&req.check_id) { return Ok(AgentOutcome { - exited_cleanly: false, - exit_code: Some(1), - stderr: "fake: agent crashed".into(), - reported: None, + verdict: None, + stop_reason: Some("fake: finished without reporting".into()), + turns: 1, + error: None, }); } Ok(AgentOutcome { - exited_cleanly: true, - exit_code: Some(0), - stderr: String::new(), - reported: self.scripted.get(&req.check_id).cloned(), + verdict: self.scripted.get(&req.check_id).cloned(), + stop_reason: Some("fake: reported".into()), + turns: 1, + error: None, }) } } diff --git a/src/checks/executor/judge.rs b/src/checks/executor/judge.rs new file mode 100644 index 0000000..4f74300 --- /dev/null +++ b/src/checks/executor/judge.rs @@ -0,0 +1,212 @@ +//! The per-check "judge" tool — the in-process verdict sink that replaces the +//! MCP result server (MULTI-1367). +//! +//! Agents are nondeterministic, so we do **not** trust their stdout or any +//! sentinel file: every agent reports its verdict by calling exactly one tool, +//! [`JUDGE_TOOL`]. A fresh [`JudgeTool`] is built **per check**, closing over its +//! own [`VerdictSink`] and the agent's [`CancellationToken`]. When the agent +//! calls it, the verdict is recorded into the slot (first call wins) and the +//! agent is cancelled — its job is done — so `Agent::run` returns promptly +//! instead of burning further turns. The executor reads the slot after the run. + +use std::sync::{Arc, Mutex}; + +use async_trait::async_trait; +use cersei_tools::{PermissionLevel, Tool, ToolCategory, ToolContext, ToolResult}; +use serde::Deserialize; +use serde_json::{Value, json}; +use tokio_util::sync::CancellationToken; + +/// The exact tool name an agent calls to report a verdict. Referenced verbatim +/// by the agent instructions (see [`super::assemble_instructions`]). Hyphens are +/// valid in Anthropic/OpenAI tool names; the name is unchanged from the MCP-era +/// `report-check-result` server so existing prompts and docs still read true. +pub const JUDGE_TOOL: &str = "report-check-result"; + +/// A verdict recorded for one check. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CheckReport { + /// The check's verdict — `true` means the requirement is satisfied. + pub success: bool, + /// Optional explanation of how the agent reached its conclusion. + pub evidence: Option, +} + +/// A write-once sink for one check's verdict, shared between the [`JudgeTool`] +/// handed to the agent and the executor that reads it after the run completes. +#[derive(Clone, Default)] +pub struct VerdictSink(Arc>>); + +impl VerdictSink { + pub fn new() -> Self { + Self::default() + } + + /// Record the verdict with first-call-wins semantics. Returns `true` if this + /// was the first (honored) call, `false` for an ignored duplicate. + fn record(&self, report: CheckReport) -> bool { + let mut slot = self.0.lock().unwrap(); + if slot.is_some() { + return false; + } + *slot = Some(report); + true + } + + /// The recorded verdict, if the agent reported one. + pub fn verdict(&self) -> Option { + self.0.lock().unwrap().clone() + } +} + +/// The arguments of a `report-check-result` call (the wire contract). +#[derive(Debug, Deserialize)] +struct ReportInput { + /// `true` if the check passes, `false` if it fails. + success: bool, + /// Optional short explanation of how the agent concluded. + #[serde(default)] + evidence: Option, +} + +/// The per-check judge tool. Built fresh per check, closing over its own +/// [`VerdictSink`] and the agent's [`CancellationToken`]. +pub struct JudgeTool { + sink: VerdictSink, + cancel: CancellationToken, +} + +impl JudgeTool { + pub fn new(sink: VerdictSink, cancel: CancellationToken) -> Self { + Self { sink, cancel } + } +} + +#[async_trait] +impl Tool for JudgeTool { + fn name(&self) -> &str { + JUDGE_TOOL + } + + fn description(&self) -> &str { + "Report whether this check passed. Call exactly once: set success=true if the check passes or false if it fails, with optional evidence explaining your reasoning." + } + + /// Always permitted, even under a read-only policy: reporting the verdict is + /// the agent's entire purpose and mutates nothing on disk. + fn permission_level(&self) -> PermissionLevel { + PermissionLevel::None + } + + fn category(&self) -> ToolCategory { + ToolCategory::Custom + } + + fn input_schema(&self) -> Value { + json!({ + "type": "object", + "properties": { + "success": { + "type": "boolean", + "description": "true if the check passes, false if it fails" + }, + "evidence": { + "type": "string", + "description": "Optional short explanation of how you concluded" + } + }, + "required": ["success"] + }) + } + + async fn execute(&self, input: Value, _ctx: &ToolContext) -> ToolResult { + let parsed: ReportInput = match serde_json::from_value(input) { + Ok(p) => p, + Err(e) => return ToolResult::error(format!("invalid report arguments: {e}")), + }; + let recorded = self.sink.record(CheckReport { + success: parsed.success, + evidence: parsed.evidence, + }); + if recorded { + // The agent's job is done; stop it after the current turn rather than + // letting it burn the remaining turn budget. + self.cancel.cancel(); + ToolResult::success("result recorded") + } else { + ToolResult::success("result already recorded for this check; ignoring duplicate") + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sink_records_first_call_and_ignores_duplicates() { + let sink = VerdictSink::new(); + assert!(sink.verdict().is_none()); + + assert!(sink.record(CheckReport { + success: true, + evidence: Some("ok".into()), + })); + // A second call is ignored and does not overwrite. + assert!(!sink.record(CheckReport { + success: false, + evidence: None, + })); + + let stored = sink.verdict().expect("recorded"); + assert!(stored.success); + assert_eq!(stored.evidence.as_deref(), Some("ok")); + } + + #[tokio::test] + async fn execute_records_verdict_and_cancels_agent() { + let sink = VerdictSink::new(); + let cancel = CancellationToken::new(); + let tool = JudgeTool::new(sink.clone(), cancel.clone()); + let ctx = ToolContext { + working_dir: std::path::PathBuf::from("."), + session_id: "test".into(), + permissions: Arc::new(cersei_tools::permissions::AllowReadOnly), + cost_tracker: Arc::new(cersei_tools::CostTracker::new()), + mcp_manager: None, + extensions: cersei_tools::Extensions::default(), + }; + + assert!(!cancel.is_cancelled()); + let result = tool + .execute(json!({ "success": false, "evidence": "nope" }), &ctx) + .await; + assert!(!result.is_error); + assert!(cancel.is_cancelled(), "reporting must cancel the agent"); + + let verdict = sink.verdict().expect("verdict recorded"); + assert!(!verdict.success); + assert_eq!(verdict.evidence.as_deref(), Some("nope")); + + // A duplicate call is acknowledged but does not overwrite. + let dup = tool.execute(json!({ "success": true }), &ctx).await; + assert!(!dup.is_error); + assert!(!sink.verdict().unwrap().success); + } + + #[tokio::test] + async fn execute_rejects_malformed_input() { + let tool = JudgeTool::new(VerdictSink::new(), CancellationToken::new()); + let ctx = ToolContext { + working_dir: std::path::PathBuf::from("."), + session_id: "test".into(), + permissions: Arc::new(cersei_tools::permissions::AllowReadOnly), + cost_tracker: Arc::new(cersei_tools::CostTracker::new()), + mcp_manager: None, + extensions: cersei_tools::Extensions::default(), + }; + // `success` is required. + let result = tool.execute(json!({ "evidence": "x" }), &ctx).await; + assert!(result.is_error); + } +} diff --git a/src/checks/executor/mod.rs b/src/checks/executor/mod.rs index fe97700..8468caf 100644 --- a/src/checks/executor/mod.rs +++ b/src/checks/executor/mod.rs @@ -1,19 +1,27 @@ -//! The agent-executor seam (M2). [`CheckExecutor`] abstracts "run one check's -//! agent" so the concrete `claude -p` executor can later be swapped for a Claude -//! Code SDK (or other provider) without touching the execution phase. Per the -//! spec it is a boxed trait object for dynamic dispatch, mirroring the repo's -//! `BoxedIngress` / `BoxedMonitor` / `BoxedPlatform` convention. +//! The agent-executor seam (M2, narrowed in MULTI-1367). [`CheckExecutor`] +//! abstracts "run one check's agent → verdict/outcome". cersei-agent absorbs the +//! *provider-abstraction* rationale the seam originally carried, but not its +//! *test-seam* rationale: `cersei_agent::Agent` is a concrete struct, so the +//! execution-phase tests still need a fake. The trait keeps one method with three +//! impls — the real in-process [`cersei::CerseiExecutor`], the soon-to-retire +//! shell-out [`claude::ClaudeExecutor`] fallback (selectable for migration), and +//! the test [`FakeExecutor`]. It is a boxed trait object for dynamic dispatch, +//! mirroring the repo's `BoxedIngress` / `BoxedMonitor` / `BoxedPlatform` +//! convention. +pub mod cersei; pub mod claude; #[cfg(test)] mod fake; +pub mod judge; use std::path::PathBuf; use async_trait::async_trait; use miette::Result; -use crate::checks::mcp::{CheckReport, REPORT_TOOL}; +pub use judge::{CheckReport, JUDGE_TOOL}; + use crate::checks::model::{Check, CheckId}; #[cfg(test)] @@ -21,62 +29,79 @@ pub use fake::FakeExecutor; /// Everything an executor needs to run one check's agent. pub struct AgentRunRequest { - /// The check this request runs, for routing/labelling. + /// The check this request runs, for routing/labelling and assembling the + /// agent's instructions. pub check_id: CheckId, - /// The assembled instructions + check prompt (see [`assemble_instructions`]). - pub instructions: String, + /// The check to validate (title + prompt). Each executor assembles its own + /// instructions from this so it can describe its own reporting channel. + pub check: Check, /// The sandbox directory to run the agent in (its working directory). pub working_dir: PathBuf, - /// Path to the per-check `--mcp-config` JSON file (points at this check's - /// dedicated MCP endpoint). - pub mcp_config_path: PathBuf, } -/// Process-level signal from running an agent. +/// The result of running one check's agent in-process. /// -/// **Note:** the authoritative verdict is the MCP-reported result, not this. -/// [`AgentOutcome::reported`] is an *optional* inline verdict for executors that -/// capture the tool call directly (a future in-process SDK, or the test fake); -/// the shell-out `claude -p` executor always leaves it `None`. +/// The authoritative signal is [`AgentOutcome::verdict`]: when present, the agent +/// reported via the judge tool. The remaining fields are diagnostics that +/// distinguish the *new* failure modes — an agent that hit `max_turns` without +/// reporting, a stream error, or a timeout — so execution can synthesize a clear +/// "errored" reason when no verdict arrived. #[derive(Debug, Clone, Default)] pub struct AgentOutcome { - /// Whether the agent process exited with a success status. - pub exited_cleanly: bool, - /// The process exit code, if one was produced. - pub exit_code: Option, - /// Captured stderr, for surfacing execution errors (distinct from a check - /// merely *failing*). - pub stderr: String, - /// An inline verdict obtained by the executor itself, if any. - pub reported: Option, + /// The verdict the agent reported via the judge tool, if it reported at all. + pub verdict: Option, + /// Why the agent's loop stopped (human-readable), for diagnostics when no + /// verdict was reported. + pub stop_reason: Option, + /// How many turns the agent took (best-effort; `0` when unavailable). + pub turns: u32, + /// An execution-level error distinct from a check merely *failing* (stream + /// error, agent-build error, or timeout). `None` on a clean finish. + pub error: Option, +} + +impl AgentOutcome { + /// Whether the agent reported a verdict (the only authoritative signal). + pub fn has_verdict(&self) -> bool { + self.verdict.is_some() + } } /// The abstraction over running a single check's agent. #[async_trait] pub trait CheckExecutor: Send + Sync { - /// Run a single check's agent against its dedicated MCP endpoint. + /// Run a single check's agent and return its verdict/outcome. async fn run_check(&self, req: AgentRunRequest) -> Result; } /// A boxed [`CheckExecutor`] for dynamic dispatch (DI seam). pub type BoxedExecutor = Box; +/// The reporting directive for the default (in-process) executor: call the judge +/// tool exactly once. Kept separate from [`assemble_instructions`] so the legacy +/// shell-out fallback can substitute its own reporting channel. +pub fn judge_tool_directive() -> String { + format!( + "Carry out the check described below. When — and only when — you have reached a conclusion, \ +you MUST call the `{JUDGE_TOOL}` tool EXACTLY ONCE:\n\ + - set `success` to true if the check passes, or false if it fails;\n\ + - optionally set `evidence` to a short explanation of how you concluded.\n\ +Report your result ONLY through `{JUDGE_TOOL}` — not via stdout, not via a file — and do not \ +call it more than once. After calling it, stop. If you finish without calling `{JUDGE_TOOL}`, \ +the check is treated as a FAILURE.", + ) +} + /// Assemble the instruction text handed to an agent: standing operating -/// instructions (it MUST call the report tool exactly once) plus the check -/// prompt verbatim. (MULTI-1350) -pub fn assemble_instructions(check: &Check) -> String { +/// instructions, the executor-supplied `reporting` directive, then the check +/// prompt verbatim. (MULTI-1350, parametrized in MULTI-1367.) +pub fn assemble_instructions(check: &Check, reporting: &str) -> String { format!( "You are validating a single requirement for the MultiTool Checks tool.\n\ Your current working directory is a sandboxed, throwaway copy of the user's repository; \ you may read it and run commands against it freely.\n\ \n\ -Carry out the check described below. When — and only when — you have reached a conclusion, \ -you MUST call the `{REPORT_TOOL}` tool EXACTLY ONCE:\n\ - - set `success` to true if the check passes, or false if it fails;\n\ - - optionally set `evidence` to a short explanation of how you concluded.\n\ -Report your result ONLY through `{REPORT_TOOL}` — not via stdout, not via a file — and do not \ -call it more than once. After calling it, stop. If you finish without calling `{REPORT_TOOL}`, \ -the check is treated as a FAILURE.\n\ +{reporting}\n\ \n\ --- CHECK: {title} ---\n\ {prompt}\n", @@ -98,9 +123,9 @@ mod tests { title: "No yellow".into(), prompt: "scan for yellow text".into(), }; - let text = assemble_instructions(&check); + let text = assemble_instructions(&check, &judge_tool_directive()); assert!(text.contains("scan for yellow text")); - assert!(text.contains(REPORT_TOOL)); + assert!(text.contains(JUDGE_TOOL)); assert!(text.contains("EXACTLY ONCE")); assert!(text.contains("No yellow")); } diff --git a/src/checks/mcp/mod.rs b/src/checks/mcp/mod.rs deleted file mode 100644 index 0182bae..0000000 --- a/src/checks/mcp/mod.rs +++ /dev/null @@ -1,302 +0,0 @@ -//! The in-process MCP result-reporting server — the trustworthy guardrail. -//! -//! Because agents are nondeterministic, we do **not** trust stdout or sentinel -//! files. Every agent reports its verdict by calling a single MCP tool, -//! [`REPORT_TOOL`] (`report-check-result`), served by **one** in-process `rmcp` -//! server bound to a localhost port and run on a dedicated tokio task within -//! this process (never a subprocess). -//! -//! The single server hosts **N endpoints — one per check** (`/checks/{id}`), so -//! each agent has a unique URL to write its singleton result to. Each check has -//! a [`tokio::sync::Notify`] and a slot in a shared map; when an agent reports, -//! the handler records the verdict and notifies, so execution can wake the -//! instant a check reports (and kill that agent — its job is done). - -use std::collections::HashMap; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Arc, Mutex}; - -use miette::{IntoDiagnostic, Result}; -use rmcp::handler::server::wrapper::Parameters; -use rmcp::model::{CallToolResult, Content}; -use rmcp::transport::streamable_http_server::session::local::LocalSessionManager; -use rmcp::transport::streamable_http_server::{StreamableHttpServerConfig, StreamableHttpService}; -use rmcp::{ErrorData, ServerHandler, tool, tool_handler, tool_router}; -use schemars::JsonSchema; -use serde::Deserialize; -use tokio::sync::Notify; -use tokio::task::JoinHandle; -use tokio_util::sync::CancellationToken; - -use crate::checks::model::CheckId; - -/// The exact MCP tool name agents call to report a verdict. Referenced verbatim -/// by the agent instructions (M5) and the `--mcp-config` payload. -pub const REPORT_TOOL: &str = "report-check-result"; - -/// The MCP server name advertised in the `--mcp-config` payload. -pub const SERVER_NAME: &str = "multitool-checks"; - -/// Shared store of reported verdicts, keyed by check id. -pub type ReportStore = Arc>>; - -/// The arguments of a `report-check-result` tool call (the wire contract). -#[derive(Debug, Clone, Deserialize, JsonSchema)] -pub struct ReportCheckResult { - /// The check's verdict — `true` means the requirement is satisfied. - pub success: bool, - /// Optional explanation of how the agent reached its conclusion. - #[serde(default)] - pub evidence: Option, -} - -/// A verdict recorded by the server for one check. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct CheckReport { - pub success: bool, - pub evidence: Option, -} - -/// The per-check MCP handler. One is constructed per session by the service -/// factory; all sessions for a given check share the same `reported` flag, -/// result store, and notifier, so single-call semantics hold across reconnects -/// and the report is observable to execution. -#[derive(Clone)] -struct ReportServer { - check_id: CheckId, - reported: Arc, - reports: ReportStore, - notify: Arc, -} - -#[tool_router] -impl ReportServer { - #[tool( - name = "report-check-result", - description = "Report whether this check passed. Call exactly once: set success=true if the check passes or false if it fails, with optional evidence explaining your reasoning." - )] - async fn report_check_result( - &self, - params: Parameters, - ) -> Result { - let Parameters(input) = params; - tracing::debug!( - check_id = self.check_id, - success = input.success, - "report-check-result received" - ); - let recorded = self.record(CheckReport { - success: input.success, - evidence: input.evidence, - }); - let msg = if recorded { - "result recorded" - } else { - "result already recorded for this check; ignoring duplicate" - }; - Ok(CallToolResult::success(vec![Content::text(msg)])) - } -} - -impl ReportServer { - /// Record the report with single-call semantics. Returns `true` if this was - /// the first (and only honored) call for the check, `false` for a duplicate. - fn record(&self, report: CheckReport) -> bool { - if self.reported.swap(true, Ordering::SeqCst) { - tracing::warn!( - check_id = self.check_id, - "duplicate report-check-result call ignored" - ); - return false; - } - self.reports.lock().unwrap().insert(self.check_id, report); - // `notify_one` stores a permit if no one is waiting yet, so a waiter that - // arrives after the report still wakes immediately (no lost wakeups). - self.notify.notify_one(); - true - } -} - -#[tool_handler] -impl ServerHandler for ReportServer {} - -/// A handle to the running result server: the bound port, the per-check result -/// store + notifiers, and the means to shut the server task down. -pub struct ResultServer { - base_url: String, - cancel: CancellationToken, - join: JoinHandle<()>, - reports: ReportStore, - notifiers: HashMap>, -} - -/// The URL path hosting the endpoint for `check_id`. -fn endpoint_path(check_id: CheckId) -> String { - format!("/checks/{check_id}") -} - -impl ResultServer { - /// Stand up the single server with one endpoint per check id, bound to an - /// OS-assigned localhost port, on a dedicated tokio task. - pub async fn start(check_ids: &[CheckId]) -> Result { - let cancel = CancellationToken::new(); - let session_manager = Arc::new(LocalSessionManager::default()); - let reports: ReportStore = Arc::new(Mutex::new(HashMap::new())); - let mut notifiers: HashMap> = HashMap::new(); - - let mut router = axum::Router::new(); - for &id in check_ids { - let notify = Arc::new(Notify::new()); - notifiers.insert(id, notify.clone()); - let reported = Arc::new(AtomicBool::new(false)); - let reports_for_check = reports.clone(); - // `StreamableHttpServerConfig` is `#[non_exhaustive]`, so build it - // from `default()` and override the fields we care about. We run in - // *stateful* Streamable HTTP mode: the Claude Code MCP client expects - // the standard session flow (initialize → `Mcp-Session-Id` → - // subsequent requests), and a stateless server stalls its multi-step - // handshake. - let mut config = StreamableHttpServerConfig::default(); - config.stateful_mode = true; - config.cancellation_token = cancel.clone(); - let factory = move || { - Ok::<_, std::io::Error>(ReportServer { - check_id: id, - reported: reported.clone(), - reports: reports_for_check.clone(), - notify: notify.clone(), - }) - }; - let service = StreamableHttpService::new(factory, session_manager.clone(), config); - router = router.nest_service(&endpoint_path(id), service); - } - - let listener = tokio::net::TcpListener::bind(("127.0.0.1", 0)) - .await - .into_diagnostic()?; - let port = listener.local_addr().into_diagnostic()?.port(); - - let shutdown = cancel.clone(); - let join = tokio::spawn(async move { - let server = axum::serve(listener, router) - .with_graceful_shutdown(async move { shutdown.cancelled().await }); - if let Err(e) = server.await { - tracing::error!("MCP result server error: {e}"); - } - }); - - Ok(Self { - base_url: format!("http://127.0.0.1:{port}"), - cancel, - join, - reports, - notifiers, - }) - } - - /// The full endpoint URL an agent should connect to for `check_id`. - pub fn endpoint_url(&self, check_id: CheckId) -> String { - format!("{}{}", self.base_url, endpoint_path(check_id)) - } - - /// The notifier + result store for `check_id`, so a caller can await the - /// check's report and read it once it arrives. - pub fn report_handle(&self, check_id: CheckId) -> (Arc, ReportStore) { - (self.notifiers[&check_id].clone(), self.reports.clone()) - } - - /// The verdict recorded for `check_id`, if any. - pub fn report_for(&self, check_id: CheckId) -> Option { - self.reports.lock().unwrap().get(&check_id).cloned() - } - - /// Signal the server task to stop and wait for it to wind down. - pub async fn shutdown(self) { - self.cancel.cancel(); - let _ = self.join.await; - } -} - -/// Build the `--mcp-config` JSON payload pointing an agent at `endpoint_url`, -/// declaring the `report-check-result` server under [`SERVER_NAME`]. (M4 #1348) -pub fn mcp_config_json(endpoint_url: &str) -> String { - let server = serde_json::json!({ "type": "http", "url": endpoint_url }); - let mut servers = serde_json::Map::new(); - servers.insert(SERVER_NAME.to_string(), server); - let mut root = serde_json::Map::new(); - root.insert("mcpServers".to_string(), serde_json::Value::Object(servers)); - serde_json::Value::Object(root).to_string() -} - -#[cfg(test)] -mod tests { - use super::*; - - fn report_server(check_id: CheckId) -> (ReportServer, ReportStore, Arc) { - let reports: ReportStore = Arc::new(Mutex::new(HashMap::new())); - let notify = Arc::new(Notify::new()); - let server = ReportServer { - check_id, - reported: Arc::new(AtomicBool::new(false)), - reports: reports.clone(), - notify: notify.clone(), - }; - (server, reports, notify) - } - - #[test] - fn record_enforces_single_call_and_delivers() { - let (server, reports, _notify) = report_server(7); - - assert!(server.record(CheckReport { - success: true, - evidence: Some("ok".into()) - })); - // Duplicate is ignored and does not overwrite. - assert!(!server.record(CheckReport { - success: false, - evidence: None - })); - - let stored = reports.lock().unwrap().get(&7).cloned().expect("recorded"); - assert!(stored.success); - assert_eq!(stored.evidence.as_deref(), Some("ok")); - assert_eq!(reports.lock().unwrap().len(), 1); - } - - #[tokio::test] - async fn record_wakes_a_waiter() { - let (server, _reports, notify) = report_server(0); - // A report that lands before the wait still wakes it (notify_one permit). - server.record(CheckReport { - success: true, - evidence: None, - }); - // Should return promptly rather than hang. - tokio::time::timeout(std::time::Duration::from_secs(1), notify.notified()) - .await - .expect("notified"); - } - - #[test] - fn mcp_config_targets_the_endpoint_and_tool_server() { - let json = mcp_config_json("http://127.0.0.1:5050/checks/3"); - let value: serde_json::Value = serde_json::from_str(&json).unwrap(); - assert_eq!( - value["mcpServers"]["multitool-checks"]["url"], - "http://127.0.0.1:5050/checks/3" - ); - assert_eq!(value["mcpServers"]["multitool-checks"]["type"], "http"); - } - - #[tokio::test] - async fn server_binds_a_port_and_shuts_down() { - let server = ResultServer::start(&[0, 1, 2]).await.unwrap(); - assert!(server.endpoint_url(1).ends_with("/checks/1")); - assert!(server.endpoint_url(1).starts_with("http://127.0.0.1:")); - // No reports arrived. - assert!(server.report_for(1).is_none()); - let _ = server.report_handle(2); - server.shutdown().await; - } -} diff --git a/src/checks/mod.rs b/src/checks/mod.rs index 8833e05..00ae9c3 100644 --- a/src/checks/mod.rs +++ b/src/checks/mod.rs @@ -6,14 +6,13 @@ //! 1. [`config`] — the (hardcoded, dependency-injected) configuration phase. //! 2. [`discovery`] — find/parse `CHECKS.md` files into a `Vec`. //! 3. [`execution`] — run each check in a CoW [`sandbox`] via a boxed -//! [`executor`], reporting verdicts through the [`mcp`] result server. +//! [`executor`], capturing verdicts through each agent's in-process judge tool. //! 4. [`reporting`] — render verdicts and produce the process exit code. pub mod config; mod discovery; mod execution; pub mod executor; -pub mod mcp; pub mod model; mod reporting; pub mod sandbox; @@ -35,17 +34,14 @@ use crate::checks::config::CliOverrides; /// Operational errors (e.g. an invalid `CHECKS.md`) surface as `Err` diagnostics /// rather than an exit code, so CI can tell "checks failed" from "tool errored". pub async fn run(terminal: &Terminal, working_dir: &Path, overrides: CliOverrides) -> Result { - // Phase 1: configuration — resolve provider/model/effort (flag > env > file) - // and construct the provider registry, injected forward. + // Phase 1: configuration — resolve provider/model/effort/executor + // (flag > env > file) and construct the provider registry, injected forward. let resolved = config::load(overrides)?; - let cfg = resolved.config; - // The provider registry is constructed and handed off here; wiring it into a - // real executor is a follow-up. For now the MVP `ClaudeExecutor` runs the - // checks, so just record what was built. tracing::debug!( - provider = cfg.provider.as_str(), - model = %cfg.model, + provider = resolved.config.provider.as_str(), + model = %resolved.config.model, + executor = ?resolved.config.executor, available_providers = ?resolved.providers.keys().collect::>(), "resolved checks configuration and provider registry", ); @@ -53,9 +49,11 @@ pub async fn run(terminal: &Terminal, working_dir: &Path, overrides: CliOverride // Phase 2: discovery. let requirements = discovery::discover(working_dir).await?; - // Phase 3: execution — the executor is built from config and injected. - let executor = cfg.build_executor(); - let outcomes = execution::execution_phase(&cfg, executor, working_dir, &requirements).await?; + // Phase 3: execution — the selected executor is built from config and + // injected (default: the in-process cersei agent). + let executor = resolved.build_executor()?; + let outcomes = + execution::execution_phase(&resolved.config, executor, working_dir, &requirements).await?; // Phase 4: reporting + exit code. reporting::report(terminal, &outcomes) diff --git a/src/config/check/mod.rs b/src/config/check/mod.rs index 18b5cae..94c34d6 100644 --- a/src/config/check/mod.rs +++ b/src/config/check/mod.rs @@ -2,7 +2,7 @@ use std::path::{Path, PathBuf}; use clap::Args; -use crate::checks::config::{CliOverrides, Effort, ProviderKind}; +use crate::checks::config::{CliOverrides, Effort, ExecutorKind, ProviderKind}; /// `multi check`: validate the requirements declared in `CHECKS.md` files. /// @@ -27,6 +27,11 @@ pub struct CheckSubcommand { /// The agent effort level. Overrides `checks.effort` from env/file. #[arg(long, value_enum)] effort: Option, + + /// The execution engine: `cersei` (in-process, default) or `claude` (the + /// legacy `claude -p` fallback). Overrides `checks.executor` from env/file. + #[arg(long, value_enum)] + executor: Option, } impl CheckSubcommand { @@ -38,6 +43,11 @@ impl CheckSubcommand { /// The flag layer for the config merge, carrying only the values the user /// actually passed. pub fn overrides(&self) -> CliOverrides { - CliOverrides::new(self.provider, self.model.clone(), self.effort) + CliOverrides::new( + self.provider, + self.model.clone(), + self.effort, + self.executor, + ) } } diff --git a/third_party/cersei-provider/Cargo.toml b/third_party/cersei-provider/Cargo.toml new file mode 100644 index 0000000..a901cd6 --- /dev/null +++ b/third_party/cersei-provider/Cargo.toml @@ -0,0 +1,76 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +name = "cersei-provider" +version = "0.1.9" +authors = ["Adib Mohsin"] +build = false +autolib = false +autobins = false +autoexamples = false +autotests = false +autobenches = false +description = "Provider trait and built-in LLM providers for the Cersei SDK" +readme = "README.md" +license = "MIT" +repository = "https://github.com/pacifio/cersei" + +[lib] +name = "cersei_provider" +path = "src/lib.rs" + +[dependencies.async-trait] +version = "0.1" + +[dependencies.base64] +version = "0.22" + +[dependencies.cersei-types] +version = "0.1.9" + +[dependencies.chrono] +version = "0.4" +features = ["serde"] + +[dependencies.futures] +version = "0.3" + +[dependencies.reqwest] +version = "0.12" +features = [ + "json", + "stream", + "rustls-tls", + "rustls-tls-webpki-roots", +] +default-features = false + +[dependencies.reqwest-eventsource] +version = "0.6" + +[dependencies.serde] +version = "1" +features = ["derive"] + +[dependencies.serde_json] +version = "1" + +[dependencies.tokio] +version = "1.44" +features = ["full"] + +[dependencies.tracing] +version = "0.1" + +[dependencies.url] +version = "2" diff --git a/third_party/cersei-provider/PATCH.md b/third_party/cersei-provider/PATCH.md new file mode 100644 index 0000000..9f6549e --- /dev/null +++ b/third_party/cersei-provider/PATCH.md @@ -0,0 +1,50 @@ +# Vendored `cersei-provider` (local patch) + +This is a vendored copy of [`cersei-provider`](https://crates.io/crates/cersei-provider) +`0.1.9`, applied via `[patch.crates-io]` in the workspace `Cargo.toml`. + +## Why + +Upstream `0.1.9` (and `main` as of 2026-06) sends this header on **every** +Anthropic request, unconditionally: + +``` +anthropic-beta: interleaved-thinking-2025-04-14,token-efficient-tools-2025-02-19 +``` + +The current Anthropic API rejects `interleaved-thinking-2025-04-14` with: + +``` +HTTP 400 invalid_request_error: Unexpected value(s) `interleaved-thinking-2025-04-14` +for the `anthropic-beta` header. +``` + +Because the header is not gated on whether thinking is enabled, this breaks +**every** request — making the in-process check executor (MULTI-1367) unusable +against Anthropic. There is no builder/config knob to disable it. + +Upstream issue: https://github.com/pacifio/cersei/issues/20 + +## The change + +`src/anthropic.rs`: `ANTHROPIC_BETA_HEADER` no longer includes the stale +`interleaved-thinking-2025-04-14` value. Only the still-accepted +`token-efficient-tools-2025-02-19` beta is sent. Extended thinking continues to +work through the `thinking` request-body parameter (which is GA and needs no beta +header). This is the single, localized diff from upstream `0.1.9`. + +## Removing this patch + +Delete `third_party/cersei-provider/`, drop the `[patch.crates-io]` block in the +workspace `Cargo.toml`, and bump `cersei-provider` to the first upstream release +that corrects (or makes configurable) the `anthropic-beta` header +(https://github.com/pacifio/cersei/issues/20). + +## Related + +Extended thinking is also left disabled in the check executor because +cersei-provider drops Anthropic thinking-block signatures off the stream +(`signature_delta`), so thinking blocks round-trip with an empty signature and +the API rejects them on the second turn. Tracked separately at +https://github.com/pacifio/cersei/issues/21; see `effort_temperature` in +`src/checks/executor/cersei.rs`. diff --git a/third_party/cersei-provider/README.md b/third_party/cersei-provider/README.md new file mode 100644 index 0000000..b4e2ebf --- /dev/null +++ b/third_party/cersei-provider/README.md @@ -0,0 +1,526 @@ +# Cersei + +The complete Rust SDK for building coding agents. + +Cersei gives you every building block of a production coding agent — tool execution, LLM streaming, sub-agent orchestration, persistent memory, skills, MCP integration — as composable library functions. Build a Claude Code replacement, embed an agent in your app, or create something entirely new. + +```rust +use cersei::prelude::*; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let output = Agent::builder() + .provider(Anthropic::from_env()?) + .tools(cersei::tools::coding()) + .permission_policy(AllowAll) + .run_with("Fix the failing tests in src/") + .await?; + + println!("{}", output.text()); + Ok(()) +} +``` + +**MIT License** | Built by [Adib Mohsin](https://github.com/pacifio) | [Docs](https://cersei.pacifio.dev/docs) | [GitHub](https://github.com/pacifio/cersei) + +--- + +## Why Cersei + +| | Claude Code | OpenCode | **Cersei SDK** | **Abstract CLI** | +|---|---|---|---|---| +| Form factor | CLI app | CLI app | **Library** | **CLI app** | +| Embeddable | No | No | **Yes** | No (uses SDK) | +| Provider | Anthropic only | Multi-provider | **Multi-provider** | **Multi-provider** | +| Language | TypeScript | TypeScript | **Rust** | **Rust** | +| Custom tools | Plugins | Plugins | **`impl Tool` / `#[derive(Tool)]`** | Via SDK | +| Startup | ~269ms | ~300ms | N/A (library) | **~34ms** | +| Binary / RSS | 174MB / 330MB | — | N/A | **5.8MB / 4.9MB** | +| Memory | File-based | SQLite | **File + Graph** | **File + Graph** | +| Skills | `.claude/commands/` | `.claude/skills/` | **Both formats** | **Both formats** | + +Cersei is built from the architecture of Claude Code (reverse-engineered Rust port) and designed so that anyone can build a complete, drop-in replacement for Claude Code, OpenCode, or any coding agent — as a library call. + +--- + +## Abstract — The CLI + +**Abstract** is a complete CLI coding agent built on Cersei. One binary, zero runtime dependencies, graph memory by default. + +```bash +# Install +cargo install --path crates/abstract-cli + +# Use +abstract # Interactive REPL +abstract "fix the failing tests" # Single-shot +abstract --resume # Resume last session +abstract --model opus --max # Opus with max thinking +abstract --no-permissions --json # CI mode with NDJSON output +``` + +### Abstract vs Claude Code + +All numbers from `run_tool_bench.sh --full`. + +| Metric | Abstract | Claude Code | Winner | +|--------|----------|-------------|--------| +| Startup (warm) | **32ms** | 266ms | Abstract (8.2x) | +| Binary size | **6.0 MB** | 174 MB | Abstract (29x) | +| Memory (RSS) | **4.9 MB** | 333 MB | Abstract (68x) | +| Tool dispatch | **0.02-17ms** | 5-265ms+ | Abstract | +| Memory recall | **98us** (graph) | 7,545ms (LLM) | Abstract (77,000x) | +| Memory write | **30us** (graph) | 20,687ms (agent) | Abstract (689,000x) | +| MEMORY.md load | **9.6us** | 17.1ms | Abstract (1,781x) | +| Sequential throughput | **906ms/req** | 12,079ms/req | Abstract (13.3x) | +| System prompt tokens | **~2,200** | ~8,000+ | Abstract (3.6x fewer) | +| LLM call for recall | **Not needed** | Required (Sonnet) | Abstract | + +> Claude Code's memory recall calls Sonnet every turn to rank the top 5 files by relevance (7.5s measured). +> Abstract's graph does indexed lookups in 98 microseconds — same capability, no LLM call, no API cost. + +Full benchmark: [`crates/abstract-cli/benchmarks/REPORT.md`](crates/abstract-cli/benchmarks/REPORT.md) + +### Features + +- 34 built-in tools (file, shell, web, planning, orchestration, scheduling) +- Multi-provider: Anthropic + OpenAI (+ Ollama, Azure, vLLM) +- Graph memory (Grafeo) on by default +- Auto-compact, auto-dream, effort levels (Low/Medium/High/Max) +- MCP server support +- Session persistence (Claude Code-compatible JSONL) +- Interactive permissions with session caching +- 12 slash commands (`/help`, `/commit`, `/review`, `/memory`, `/model`, `/diff`, etc.) +- Streaming markdown rendering with syntax highlighting +- TOML config: `~/.abstract/config.toml` + `.abstract/config.toml` +- JSON output mode for piping (`--json`) + +--- + +## Install + +```toml +[dependencies] +cersei = { git = "https://github.com/pacifio/cersei" } +tokio = { version = "1", features = ["full"] } +anyhow = "1" +``` + +For graph-backed memory (optional): +```toml +cersei-memory = { git = "https://github.com/pacifio/cersei", features = ["graph"] } +``` + +--- + +## Architecture + +``` +cersei Facade crate — use cersei::prelude::*; + cersei-types Provider-agnostic messages, errors, stream events + cersei-provider Provider trait + Anthropic/OpenAI implementations + cersei-tools 30+ tools, permissions, bash classifier, skills, git utils + cersei-tools-derive #[derive(Tool)] proc macro + cersei-agent Agent builder, agentic loop, compact, coordinator, effort + cersei-memory Memory trait, memdir, CLAUDE.md, sessions, Grafeo graph + cersei-hooks Hook/middleware system + cersei-mcp MCP client (JSON-RPC 2.0, stdio transport) +abstract-cli CLI coding agent ("abstract") — REPL, commands, config, permissions +``` + +--- + +## Core Concepts + +### Provider + +Any LLM backend. Built-in: Anthropic (with OAuth), OpenAI (compatible with Ollama, Azure, vLLM). + +```rust +Agent::builder().provider(Anthropic::from_env()?) // Anthropic API key +Agent::builder().provider(OpenAi::builder() + .base_url("http://localhost:11434/v1") // Ollama + .model("llama3.1:70b").api_key("ollama").build()?) +Agent::builder().provider(MyCustomProvider) // impl Provider +``` + +### Tools (30+) + +Every tool a coding agent needs, organized into sets: + +```rust +cersei::tools::all() // 30+ tools +cersei::tools::coding() // filesystem + shell + web +cersei::tools::filesystem() // Read, Write, Edit, Glob, Grep, NotebookEdit +cersei::tools::shell() // Bash, PowerShell +cersei::tools::web() // WebFetch, WebSearch +cersei::tools::planning() // EnterPlanMode, ExitPlanMode, TodoWrite +cersei::tools::scheduling() // CronCreate/List/Delete, Sleep, RemoteTrigger +cersei::tools::orchestration() // SendMessage, Tasks (6 tools), Worktree +``` + +Custom tools in 10 lines: + +> The `#[derive(Tool)]` macro generates code with `#[async_trait::async_trait]` and `cercei-tools`, to make it work add both of it to depending on your project. +> ```toml +> async-trait = "0.1" +> cersei = { path = "path/to/cersei" } # or git +> cersei-tools = { path = "path/to/cersei/crates/cersei-tools" } +> ``` +> or write `use cersei::tools as cersei_tools;` when using `derive(Tool)`; + + +```rust +#[derive(Tool)] +#[tool(name = "search", description = "Search docs", permission = "read_only")] +struct SearchTool; + +#[async_trait] +impl ToolExecute for SearchTool { + type Input = SearchInput; // derives Deserialize + JsonSchema + async fn run(&self, input: SearchInput, ctx: &ToolContext) -> ToolResult { + ToolResult::success(format!("Found: {}", input.query)) + } +} +``` + +### Sub-Agent Orchestration + +Spawn parallel workers, coordinate tasks, pass messages between agents: + +```rust +// AgentTool — model spawns sub-agents autonomously +Agent::builder() + .tool(AgentTool::new(|| Box::new(Anthropic::from_env()?), cersei::tools::coding())) + +// Coordinator mode — orchestrate parallel workers +Agent::builder() + .tools(cersei::tools::all()) // includes Agent, Tasks, SendMessage + // Workers get filtered tools (no Agent — prevents recursion) + +// Task system +// TaskCreate → TaskUpdate → TaskGet → TaskList → TaskStop → TaskOutput +``` + +### Memory (Three-Tier) + +```rust +use cersei::memory::manager::MemoryManager; + +let mm = MemoryManager::new(project_root) + .with_graph(Path::new("./memory.grafeo"))?; // optional graph layer + +// Tier 1: Flat files (~/.claude/projects//memory/) +let metas = mm.scan(); // scan .md files with frontmatter +let content = mm.build_context(); // build system prompt injection + +// Tier 2: CLAUDE.md hierarchy (managed > user > project > local) +// Automatically merged into build_context() + +// Tier 3: Graph memory (Grafeo, optional) +let id = mm.store_memory("User prefers Rust", MemoryType::User, 0.9)?; +mm.tag_memory(&id, "preferences"); +let results = mm.recall("Rust", 5); // graph query with fallback to text match + +// Session persistence (JSONL, append-only, tombstone soft-delete) +mm.write_user_message("session-1", Message::user("Hello"))?; +let messages = mm.load_session_messages("session-1")?; +``` + +### Skills (Claude Code + OpenCode Compatible) + +```rust +// Auto-discovers skills from: +// .claude/commands/*.md (Claude Code format) +// .claude/skills/*/SKILL.md (OpenCode format) +// ~/.claude/commands/*.md (user-level) +// Bundled skills (simplify, debug, commit, verify, stuck, remember, loop) + +let skill_tool = SkillTool::new().with_project_root("."); +// skill="list" → lists all available skills +// skill="debug" args="tests are flaky" → expands $ARGUMENTS template +``` + +### Realtime Events + +Three observation mechanisms: + +```rust +// 1. Callback +Agent::builder().on_event(|e| match e { + AgentEvent::TextDelta(t) => print!("{}", t), + AgentEvent::ToolStart { name, .. } => eprintln!("[{}]", name), + _ => {} +}) + +// 2. Broadcast (multi-consumer) +let agent = Agent::builder().enable_broadcast(256).build()?; +let mut rx = agent.subscribe().unwrap(); +tokio::spawn(async move { while let Ok(e) = rx.recv().await { /* ... */ } }); + +// 3. Stream (bidirectional control) +let mut stream = agent.run_stream("Deploy"); +while let Some(e) = stream.next().await { + if let AgentEvent::PermissionRequired(req) = e { + stream.respond_permission(req.id, PermissionDecision::Allow); + } +} +``` + +### Context Management + +```rust +Agent::builder() + .auto_compact(true) // summarize old messages at 90% context usage + .compact_threshold(0.9) // trigger threshold + .tool_result_budget(50_000) // truncate oldest tool results above 50K chars + .thinking_budget(8192) // extended thinking tokens + .effort(EffortLevel::High) // Low/Medium/High/Max +``` + +### MCP (Model Context Protocol) + +```rust +let mcp = McpManager::connect(&[ + McpServerConfig::stdio("db", "npx", &["-y", "@my/db-mcp"]), + McpServerConfig::sse("docs", "https://mcp.example.com"), +]).await?; + +Agent::builder().tools(mcp.tool_definitions().await) +``` + +### OAuth (Anthropic Native) + +```rust +// Opens browser, PKCE flow, token storage, refresh +cargo run --example oauth_login +``` + +--- + +## Agent Builder — Complete API + +```rust +Agent::builder() + // Provider (required) + .provider(Anthropic::from_env()?) + + // Tools + .tool(MyTool) + .tools(cersei::tools::coding()) + + // Model & generation + .model("claude-sonnet-4-6") + .max_turns(10) + .max_tokens(16384) + .temperature(0.7) + .thinking_budget(8192) + + // Prompt + .system_prompt("You are a helpful assistant.") + .append_system_prompt("Extra context.") + + // Environment + .working_dir("./my-project") + .permission_policy(AllowAll) // or AllowReadOnly, DenyAll, RuleBased, Interactive + + // Memory + .memory(JsonlMemory::new("./sessions")) + .session_id("my-session") + + // Hooks & events + .hook(CostGuard { max_usd: 5.0 }) + .on_event(|e| { /* ... */ }) + .enable_broadcast(256) + .reporter(ConsoleReporter { verbose: true }) + + // Context management + .auto_compact(true) + .compact_threshold(0.9) + .tool_result_budget(50_000) + + // Execute + .build()? // -> Agent + .run_with("Fix the tests") // -> AgentOutput (shorthand) +``` + +--- + +## Benchmarks + +Measured on Apple Silicon, release build, 100 iterations with 3 warmup runs. + +### Tool I/O + +| Tool | Avg | Min | Max | +|------|-----|-----|-----| +| Edit | 0.04ms | 0.02ms | 0.05ms | +| Glob | 0.05ms | 0.05ms | 0.07ms | +| Write | 0.09ms | 0.07ms | 0.11ms | +| Read | 0.09ms | 0.08ms | 0.11ms | +| Grep | 5.85ms | 5.34ms | 8.51ms | +| Bash | 15.64ms | 14.50ms | 16.19ms | + +### vs Claude Code CLI + +> **Note:** Cersei is a library — tool dispatch happens in-process. Claude Code is a CLI where +> each sub-agent fork pays full startup cost. These are different layers; the comparison below +> shows the gap between in-process dispatch and CLI process overhead. + +| Metric | Cersei (SDK) | Claude Code (CLI) | Notes | +|--------|-------------|-------------------|-------| +| Tool dispatch (Read) | 0.09ms | ~5-15ms (est.) | In-process vs Node.js fs | +| CLI startup | N/A (library) | 269ms | Claude `--version` warm avg | +| Sub-agent spawn | ~1ms (in-process) | ~300ms (fork) | Agent tool overhead | + +For an apples-to-apples CLI comparison, see [Abstract CLI benchmarks](crates/abstract-cli/benchmarks/REPORT.md). + +### Memory I/O + +| Operation | Abstract (Cersei) | Claude Code (measured) | Ratio | +|-----------|------------------|----------------------|-------| +| Scan 100 files | **1.2ms** | 26.6ms (`find`) | 22x | +| Load MEMORY.md | **9.6μs** | 17.1ms | 1,781x | +| Memory recall (graph) | **98μs** | 7,545ms (LLM call) | 77,000x | +| Memory recall (text) | **1.3ms** | 17.5ms (`grep`) | 13x | +| Session write | **27μs/entry** | N/A | — | +| Session load (100) | **268μs** | N/A | — | +| Graph store | **30μs/node** | N/A (no graph) | — | +| Topic query | **77μs** | N/A (no graph) | — | + +### Benchmark suites + +Each bench lives in its own self-contained directory with its own runner and result schema. Add new benches as siblings. + +| Suite | Path | What it measures | Runner | +|---|---|---|---| +| **General-agent frameworks** | [`bench/general-agents/`](bench/general-agents/) | Per-agent memory, instantiation time, max concurrent agents — Cersei vs Agno / PydanticAI / LangGraph / CrewAI. | `./bench/general-agents/run.sh` | +| **Terminal Bench 2.0** | [`bench/term-bench/`](bench/term-bench/) | End-to-end coding tasks inside Daytona sandboxes using the full `abstract` CLI (Linux x86_64 / arm64 binaries shipped in-tree). | `./bench/term-bench/run.sh` | +| **LongMemEval (long-term memory)** | [`bench/long-mem/`](bench/long-mem/) | Recall accuracy on the ICLR-25 LongMemEval 500-question benchmark — head-to-head vs Mastra / Zep / Supermemory with identical prompts and LLM-as-judge rubric. Four Cersei configs: full-context baseline, usearch-HNSW semantic, grafeo-graph substring, hybrid w/ LLM fact extraction + RRF fusion. | `cargo run --release -p longmem-bench -- --dataset s --config all` | +| **Compression (real LLMs)** | `crates/cersei-agent/tests/e2e_openai_compression.rs` | Input-token savings from `cersei-compression` on OpenAI (`gpt-4o-mini`) and Gemini (`gemini-2.5-flash`). `#[ignore]`, runs with real API keys. | `cargo test -p cersei-agent --test e2e_openai_compression -- --ignored --nocapture` | +| **SDK Tool I/O** | `examples/benchmark_io.rs` | In-process tool dispatch latency for Read / Write / Edit / Grep / Bash / Glob. | `cargo run --example benchmark_io --release` | +| **SDK Memory I/O** | `crates/abstract-cli/examples/memory_bench.rs` | Graph-memory vs filesystem vs Claude Code-style paths. | `cargo run -p abstract-cli --example memory_bench --release` | +| **vs Claude Code CLI** | `run_tool_bench_claude.sh` · `run_tool_bench_codex.sh` | CLI-vs-CLI startup, memory, and dispatch overhead. | `./run_tool_bench.sh --iterations 20 --full` | + +### Run benchmarks + +```bash +# Rust-side SDK benches (no external services) +cargo run --example benchmark_io --release +cargo run --release -p abstract-cli --example memory_bench + +# vs Claude Code / Codex CLIs +./run_tool_bench.sh --iterations 20 --full + +# Python-harness benches (uv-managed; each dir self-contained) +./bench/general-agents/run.sh # Cersei vs Agno / PydanticAI / LangGraph / CrewAI +./bench/term-bench/run.sh # Terminal Bench 2.0 via Daytona + +# LongMemEval memory benchmark (head-to-head vs Mastra / Zep / Supermemory) +./bench/long-mem/setup.sh # downloads oracle + s datasets +OPENAI_API_KEY=sk-… cargo run --release -p longmem-bench -- \ + --dataset s --config all --concurrency 8 + +# Real-LLM compression savings (requires API keys) +OPENAI_API_KEY=sk-… cargo test -p cersei-agent \ + --test e2e_openai_compression -- --ignored --nocapture +``` + +--- + +## Stress Tests + +```bash +cargo run --example stress_core_infrastructure --release # system prompt, compact, context, bash classifier +cargo run --example stress_tools --release # all 30+ tools, registry, performance +cargo run --example stress_orchestration --release # sub-agents, coordinator, tasks, messaging +cargo run --example stress_skills --release # bundled + disk skills, Claude Code + OpenCode format +cargo run --example stress_memory --release # memdir, CLAUDE.md, sessions, extraction, auto-dream +``` + +--- + +## Examples + +| Example | Description | +|---------|-------------| +| [`simple_agent`](examples/simple_agent.rs) | Minimal agent in 3 lines | +| [`custom_tools`](examples/custom_tools.rs) | Define and register custom tools | +| [`streaming_events`](examples/streaming_events.rs) | Real-time `run_stream()` with colored output | +| [`multi_listener`](examples/multi_listener.rs) | Broadcast channel with multiple consumers | +| [`resumable_session`](examples/resumable_session.rs) | Persist and resume with `JsonlMemory` | +| [`custom_provider`](examples/custom_provider.rs) | Echo provider + OpenAI-compatible endpoints | +| [`hooks_middleware`](examples/hooks_middleware.rs) | Cost guard + audit logger + tool blocker | +| [`benchmark_io`](examples/benchmark_io.rs) | Full I/O benchmark suite | +| [`usage_report`](examples/usage_report.rs) | Token/cost tracking and billing estimates | +| [`coding_agent`](examples/coding_agent.rs) | Build a Python todo CLI (end-to-end) | +| [`oauth_login`](examples/oauth_login.rs) | Anthropic OAuth PKCE login flow | + +```bash +cargo run --example simple_agent --release +``` + +--- + +## Test Suite + +```bash +# Run all 160 unit tests +cargo test --workspace + +# Run with graph memory (requires grafeo) +cargo test --workspace --features graph + +# Run specific crate +cargo test -p cersei-tools +cargo test -p cersei-agent +cargo test -p cersei-memory +cargo test -p cersei-mcp +``` + +**160 unit tests** | **262 stress checks** | **0 failures** | **Zero I/O regression** + +--- + +## Extension Points + +| What | How | Example | +|------|-----|---------| +| Custom provider | `impl Provider` | Local LLM, Azure, Bedrock | +| Custom tool | `#[derive(Tool)]` or `impl Tool` | DB query, deploy, search | +| Custom permissions | `impl PermissionPolicy` | RBAC, OAuth-scoped | +| Custom memory | `impl Memory` | PostgreSQL, Redis, S3 | +| Custom hooks | `impl Hook` | Cost gating, audit logging | +| Custom reporters | `impl Reporter` | Dashboards, WebSocket relay | +| MCP servers | `McpServerConfig` via builder | Any MCP-compatible server | +| Skills | `.claude/commands/*.md` | Custom prompt templates | +| Graph memory | `features = ["graph"]` | Grafeo relationship tracking | + +--- + +## Documentation + +**[cersei.pacifio.dev/docs](https://cersei.pacifio.dev/docs)** — full docs with API reference, architecture, cookbooks, benchmarks, and llms.txt support. + +| Section | Content | +|---------|---------| +| [Quick Start](https://cersei.pacifio.dev/docs/quick-start) | First agent in 10 lines | +| [API Reference](https://cersei.pacifio.dev/docs/api-agent) | Agent, Provider, Tools, Memory, Hooks, MCP | +| [Architecture](https://cersei.pacifio.dev/docs/architecture) | Crate map, data flow, design principles | +| [Cookbooks](https://cersei.pacifio.dev/docs/cookbook-custom-tools) | Custom tools, deployment, embedding | +| [Abstract CLI](https://cersei.pacifio.dev/docs/abstract) | Reference CLI built on Cersei | +| [Benchmarks](https://cersei.pacifio.dev/docs/bench-vs-claude-code) | vs Claude Code vs Codex | + +--- + +## License + +MIT License + +Copyright (c) 2025 Adib Mohsin + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/third_party/cersei-provider/src/anthropic.rs b/third_party/cersei-provider/src/anthropic.rs new file mode 100644 index 0000000..98e7bdf --- /dev/null +++ b/third_party/cersei-provider/src/anthropic.rs @@ -0,0 +1,406 @@ +//! Anthropic provider: Claude API client with streaming SSE support. + +use crate::*; +use cersei_types::*; +use futures::StreamExt; +use tokio::sync::mpsc; + +const ANTHROPIC_API_BASE: &str = "https://api.anthropic.com"; +const ANTHROPIC_API_VERSION: &str = "2023-06-01"; +// MultiTool local patch (MULTI-1367): upstream cersei-provider 0.1.9 sends +// `interleaved-thinking-2025-04-14` unconditionally, which the current Anthropic +// API rejects with HTTP 400 ("Unexpected value(s) for the `anthropic-beta` +// header"), breaking every request. Drop the stale interleaved-thinking beta and +// keep only the still-accepted token-efficient-tools beta. See +// third_party/cersei-provider/PATCH.md and the upstream issue +// https://github.com/pacifio/cersei/issues/20. Remove once upstream ships a fix. +const ANTHROPIC_BETA_HEADER: &str = "token-efficient-tools-2025-02-19"; + +// ─── Anthropic provider ────────────────────────────────────────────────────── + +#[allow(dead_code)] +pub struct Anthropic { + auth: Auth, + base_url: String, + default_model: String, + thinking_budget: Option, + max_retries: u32, + client: reqwest::Client, +} + +impl Anthropic { + pub fn new(auth: Auth) -> Self { + let base_url = std::env::var("ANTHROPIC_BASE_URL") + .ok() + .filter(|u| !u.is_empty()) + .unwrap_or_else(|| ANTHROPIC_API_BASE.to_string()); + Self { + auth, + base_url, + default_model: "claude-sonnet-4-6".to_string(), + thinking_budget: None, + max_retries: 5, + client: reqwest::Client::new(), + } + } + + /// Create from `ANTHROPIC_API_KEY` environment variable. + pub fn from_env() -> Result { + let key = std::env::var("ANTHROPIC_API_KEY") + .map_err(|_| CerseiError::Auth("ANTHROPIC_API_KEY not set".into()))?; + Ok(Self::new(Auth::ApiKey(key))) + } + + pub fn builder() -> AnthropicBuilder { + AnthropicBuilder::default() + } + + async fn auth_headers(&self) -> Result> { + match &self.auth { + Auth::ApiKey(key) => Ok(vec![("x-api-key".into(), key.clone())]), + Auth::Bearer(token) => Ok(vec![("authorization".into(), format!("Bearer {}", token))]), + Auth::OAuth { token, .. } => Ok(vec![( + "authorization".into(), + format!("Bearer {}", token.access_token), + )]), + Auth::Custom(provider) => { + let (name, value) = provider.get_credentials().await?; + Ok(vec![(name, value)]) + } + } + } +} + +#[async_trait::async_trait] +impl Provider for Anthropic { + fn name(&self) -> &str { + "anthropic" + } + + fn context_window(&self, model: &str) -> u64 { + match model { + m if m.contains("opus") => 200_000, + m if m.contains("sonnet") => 200_000, + m if m.contains("haiku") => 200_000, + _ => 200_000, + } + } + + fn capabilities(&self, _model: &str) -> ProviderCapabilities { + ProviderCapabilities { + streaming: true, + tool_use: true, + vision: true, + thinking: true, + system_prompt: true, + caching: true, + } + } + + async fn complete(&self, request: CompletionRequest) -> Result { + let model = if request.model.is_empty() { + self.default_model.clone() + } else { + request.model.clone() + }; + + // Build API messages + let api_messages: Vec = request + .messages + .iter() + .filter(|m| m.role != Role::System) + .map(|m| { + serde_json::json!({ + "role": m.role, + "content": m.content, + }) + }) + .collect(); + + // Build request body + let mut body = serde_json::json!({ + "model": model, + "max_tokens": request.max_tokens, + "messages": api_messages, + "stream": true, + }); + + if let Some(system) = &request.system { + body["system"] = serde_json::Value::String(system.clone()); + } + + if !request.tools.is_empty() { + let api_tools: Vec = request + .tools + .iter() + .map(|t| { + serde_json::json!({ + "name": t.name, + "description": t.description, + "input_schema": t.input_schema, + }) + }) + .collect(); + body["tools"] = serde_json::Value::Array(api_tools); + } + + if let Some(temp) = request.temperature { + body["temperature"] = serde_json::json!(temp); + } + + if !request.stop_sequences.is_empty() { + body["stop_sequences"] = serde_json::json!(request.stop_sequences); + } + + // Thinking config + let thinking_budget = request + .options + .get::("thinking_budget") + .or(self.thinking_budget); + if let Some(budget) = thinking_budget { + body["thinking"] = serde_json::json!({ + "type": "enabled", + "budget_tokens": budget, + }); + } + + // Build HTTP request + let url = format!("{}/v1/messages", self.base_url); + let mut req_builder = self + .client + .post(&url) + .header("anthropic-version", ANTHROPIC_API_VERSION) + .header("anthropic-beta", ANTHROPIC_BETA_HEADER) + .header("content-type", "application/json"); + + for (name, value) in self.auth_headers().await? { + req_builder = req_builder.header(&name, &value); + } + + let (tx, rx) = mpsc::channel(256); + + let request = req_builder.json(&body).build().map_err(CerseiError::Http)?; + let client = self.client.clone(); + + // Spawn SSE consumer + tokio::spawn(async move { + match client.execute(request).await { + Ok(response) => { + if !response.status().is_success() { + let status = response.status().as_u16(); + let body = response.text().await.unwrap_or_default(); + let _ = tx + .send(StreamEvent::Error { + message: format!("HTTP {}: {}", status, body), + }) + .await; + return; + } + + let mut stream = response.bytes_stream(); + let mut buffer = String::new(); + + while let Some(chunk) = stream.next().await { + match chunk { + Ok(bytes) => { + buffer.push_str(&String::from_utf8_lossy(&bytes)); + // Process complete SSE events + while let Some(pos) = buffer.find("\n\n") { + let event_str = buffer[..pos].to_string(); + buffer = buffer[pos + 2..].to_string(); + + if let Some(event) = parse_sse_event(&event_str) { + if tx.send(event).await.is_err() { + return; + } + } + } + } + Err(e) => { + let _ = tx + .send(StreamEvent::Error { + message: e.to_string(), + }) + .await; + return; + } + } + } + } + Err(e) => { + let _ = tx + .send(StreamEvent::Error { + message: e.to_string(), + }) + .await; + } + } + }); + + Ok(CompletionStream::new(rx)) + } +} + +// ─── SSE parser ────────────────────────────────────────────────────────────── + +fn parse_sse_event(raw: &str) -> Option { + let mut event_type = String::new(); + let mut data = String::new(); + + for line in raw.lines() { + if let Some(rest) = line.strip_prefix("event: ") { + event_type = rest.trim().to_string(); + } else if let Some(rest) = line.strip_prefix("data: ") { + data = rest.trim().to_string(); + } + } + + let json: serde_json::Value = serde_json::from_str(&data).ok()?; + + match event_type.as_str() { + "message_start" => { + let msg = &json["message"]; + Some(StreamEvent::MessageStart { + id: msg["id"].as_str().unwrap_or("").to_string(), + model: msg["model"].as_str().unwrap_or("").to_string(), + }) + } + "content_block_start" => { + let index = json["index"].as_u64().unwrap_or(0) as usize; + let block_type = json["content_block"]["type"] + .as_str() + .unwrap_or("text") + .to_string(); + Some(StreamEvent::ContentBlockStart { + index, + block_type, + id: json["content_block"]["id"].as_str().map(String::from), + name: json["content_block"]["name"].as_str().map(String::from), + }) + } + "content_block_delta" => { + let index = json["index"].as_u64().unwrap_or(0) as usize; + let delta = &json["delta"]; + let delta_type = delta["type"].as_str().unwrap_or(""); + match delta_type { + "text_delta" => Some(StreamEvent::TextDelta { + index, + text: delta["text"].as_str().unwrap_or("").to_string(), + }), + "input_json_delta" => Some(StreamEvent::InputJsonDelta { + index, + partial_json: delta["partial_json"].as_str().unwrap_or("").to_string(), + }), + "thinking_delta" => Some(StreamEvent::ThinkingDelta { + index, + thinking: delta["thinking"].as_str().unwrap_or("").to_string(), + }), + _ => None, + } + } + "content_block_stop" => { + let index = json["index"].as_u64().unwrap_or(0) as usize; + Some(StreamEvent::ContentBlockStop { index }) + } + "message_delta" => { + let stop_reason = json["delta"]["stop_reason"].as_str().and_then(|s| match s { + "end_turn" => Some(StopReason::EndTurn), + "max_tokens" => Some(StopReason::MaxTokens), + "tool_use" => Some(StopReason::ToolUse), + "stop_sequence" => Some(StopReason::StopSequence), + _ => None, + }); + let usage = if let Some(u) = json["usage"].as_object() { + Some(Usage { + input_tokens: u.get("input_tokens").and_then(|v| v.as_u64()).unwrap_or(0), + output_tokens: u.get("output_tokens").and_then(|v| v.as_u64()).unwrap_or(0), + ..Default::default() + }) + } else { + None + }; + Some(StreamEvent::MessageDelta { stop_reason, usage }) + } + "message_stop" => Some(StreamEvent::MessageStop), + "ping" => Some(StreamEvent::Ping), + "error" => Some(StreamEvent::Error { + message: json["error"]["message"] + .as_str() + .unwrap_or("Unknown error") + .to_string(), + }), + _ => None, + } +} + +// ─── Builder ───────────────────────────────────────────────────────────────── + +#[derive(Default)] +pub struct AnthropicBuilder { + api_key: Option, + base_url: Option, + model: Option, + thinking_budget: Option, + oauth_token: Option, + max_retries: Option, +} + +impl AnthropicBuilder { + pub fn api_key(mut self, key: impl Into) -> Self { + self.api_key = Some(key.into()); + self + } + + pub fn base_url(mut self, url: impl Into) -> Self { + self.base_url = Some(url.into()); + self + } + + pub fn model(mut self, model: impl Into) -> Self { + self.model = Some(model.into()); + self + } + + pub fn thinking(mut self, budget_tokens: u32) -> Self { + self.thinking_budget = Some(budget_tokens); + self + } + + pub fn oauth(mut self, token: OAuthToken) -> Self { + self.oauth_token = Some(token); + self + } + + pub fn max_retries(mut self, n: u32) -> Self { + self.max_retries = Some(n); + self + } + + pub fn build(self) -> Result { + let auth = if let Some(token) = self.oauth_token { + Auth::OAuth { + client_id: String::new(), + token, + } + } else if let Some(key) = self.api_key { + Auth::ApiKey(key) + } else { + return Err(CerseiError::Auth( + "No API key or OAuth token provided. Set ANTHROPIC_API_KEY or use .oauth()".into(), + )); + }; + + Ok(Anthropic { + auth, + base_url: self + .base_url + .unwrap_or_else(|| ANTHROPIC_API_BASE.to_string()), + default_model: self + .model + .unwrap_or_else(|| "claude-sonnet-4-6".to_string()), + thinking_budget: self.thinking_budget, + max_retries: self.max_retries.unwrap_or(5), + client: reqwest::Client::new(), + }) + } +} diff --git a/third_party/cersei-provider/src/gemini.rs b/third_party/cersei-provider/src/gemini.rs new file mode 100644 index 0000000..2a7e22e --- /dev/null +++ b/third_party/cersei-provider/src/gemini.rs @@ -0,0 +1,556 @@ +//! Google Gemini provider: native Gemini API client with streaming support. +//! +//! Uses Google's `generateContent` API directly rather than the OpenAI-compatible +//! shim, enabling access to native Gemini features like safety settings, +//! grounding, and proper multimodal support. + +use crate::*; +use cersei_types::*; +use futures::StreamExt; +use tokio::sync::mpsc; + +const GEMINI_API_BASE: &str = "https://generativelanguage.googleapis.com/v1beta"; + +// ─── Gemini provider ──────────────────────────────────────────────────────── + +pub struct Gemini { + api_key: String, + base_url: String, + default_model: String, + client: reqwest::Client, +} + +impl Gemini { + pub fn new(api_key: impl Into) -> Self { + let base_url = std::env::var("GEMINI_BASE_URL") + .ok() + .filter(|u| !u.is_empty()) + .unwrap_or_else(|| GEMINI_API_BASE.to_string()); + Self { + api_key: api_key.into(), + base_url, + default_model: "gemini-3.1-pro-preview".to_string(), + client: reqwest::Client::new(), + } + } + + /// Create from `GOOGLE_API_KEY` or `GEMINI_API_KEY` environment variable. + pub fn from_env() -> Result { + let key = std::env::var("GOOGLE_API_KEY") + .or_else(|_| std::env::var("GEMINI_API_KEY")) + .map_err(|_| CerseiError::Auth("GOOGLE_API_KEY or GEMINI_API_KEY not set".into()))?; + Ok(Self::new(key)) + } + + pub fn builder() -> GeminiBuilder { + GeminiBuilder::default() + } +} + +#[async_trait::async_trait] +impl Provider for Gemini { + fn name(&self) -> &str { + "google" + } + + fn context_window(&self, model: &str) -> u64 { + match model { + m if m.contains("gemini-3.1") => 2_000_000, + m if m.contains("gemini-3.0") => 1_000_000, + m if m.contains("gemini-2.0") => 1_000_000, + m if m.contains("gemini-1.5-pro") => 2_000_000, + m if m.contains("gemini-1.5-flash") => 1_000_000, + _ => 1_000_000, + } + } + + fn capabilities(&self, _model: &str) -> ProviderCapabilities { + ProviderCapabilities { + streaming: true, + tool_use: true, + vision: true, + thinking: false, + system_prompt: true, + caching: false, + } + } + + async fn complete(&self, request: CompletionRequest) -> Result { + let model = if request.model.is_empty() { + self.default_model.clone() + } else { + request.model.clone() + }; + + // Build a map of tool_use_id → tool_name from conversation history + let tool_name_map: std::collections::HashMap = request + .messages + .iter() + .flat_map(|m| match &m.content { + MessageContent::Blocks(blocks) => blocks + .iter() + .filter_map(|b| { + if let ContentBlock::ToolUse { id, name, .. } = b { + Some((id.clone(), name.clone())) + } else { + None + } + }) + .collect::>(), + _ => vec![], + }) + .collect(); + + // Build Gemini-native contents array + let mut contents: Vec = Vec::new(); + + for msg in &request.messages { + match msg.role { + Role::User => { + let mut parts: Vec = Vec::new(); + + if let MessageContent::Blocks(blocks) = &msg.content { + for block in blocks { + match block { + ContentBlock::Text { text } => { + parts.push(serde_json::json!({ "text": text })); + } + ContentBlock::ToolResult { + tool_use_id, + content, + .. + } => { + // Gemini requires the function NAME, not the call ID + let func_name = tool_name_map + .get(tool_use_id) + .cloned() + .unwrap_or_else(|| tool_use_id.clone()); + let content_str = match content { + ToolResultContent::Text(s) => s.clone(), + ToolResultContent::Blocks(blocks) => blocks + .iter() + .filter_map(|b| { + if let ContentBlock::Text { text } = b { + Some(text.as_str()) + } else { + None + } + }) + .collect::>() + .join("\n"), + }; + parts.push(serde_json::json!({ + "functionResponse": { + "name": func_name, + "response": { "content": content_str }, + } + })); + } + _ => {} + } + } + } else { + parts.push(serde_json::json!({ "text": msg.get_all_text() })); + } + + if !parts.is_empty() { + contents.push(serde_json::json!({ + "role": "user", + "parts": parts, + })); + } + } + Role::Assistant => { + let mut parts: Vec = Vec::new(); + + if let MessageContent::Blocks(blocks) = &msg.content { + for block in blocks { + match block { + ContentBlock::Text { text } => { + parts.push(serde_json::json!({ "text": text })); + } + ContentBlock::ToolUse { id, name, input } => { + // Extract fc_id and thoughtSignature from encoded tool_id + // Format: "gemini-tool-N::fc_id::thoughtSignature" or "gemini-tool-N" + let segments: Vec<&str> = id.splitn(3, "::").collect(); + let mut fc = serde_json::json!({ + "name": name, + "args": input, + }); + let mut part_obj = serde_json::Map::new(); + if segments.len() >= 3 { + // Has fc_id and thoughtSignature + fc["id"] = + serde_json::Value::String(segments[1].to_string()); + part_obj.insert("functionCall".to_string(), fc); + part_obj.insert( + "thoughtSignature".to_string(), + serde_json::Value::String(segments[2].to_string()), + ); + } else { + part_obj.insert("functionCall".to_string(), fc); + } + parts.push(serde_json::Value::Object(part_obj)); + } + _ => {} + } + } + } else { + parts.push(serde_json::json!({ "text": msg.get_all_text() })); + } + + if !parts.is_empty() { + contents.push(serde_json::json!({ + "role": "model", + "parts": parts, + })); + } + } + Role::System => { + // System messages handled separately via systemInstruction + } + } + } + + // Build request body + let mut body = serde_json::json!({ + "contents": contents, + "generationConfig": { + "maxOutputTokens": request.max_tokens, + }, + }); + + // System instruction (Gemini's equivalent of system prompt) + if let Some(system) = &request.system { + body["systemInstruction"] = serde_json::json!({ + "parts": [{ "text": system }], + }); + } + + if let Some(temp) = request.temperature { + body["generationConfig"]["temperature"] = serde_json::json!(temp); + } + + if !request.stop_sequences.is_empty() { + body["generationConfig"]["stopSequences"] = serde_json::json!(request.stop_sequences); + } + + // Tool declarations + if !request.tools.is_empty() { + let function_declarations: Vec = request + .tools + .iter() + .map(|t| { + serde_json::json!({ + "name": t.name, + "description": t.description, + "parameters": t.input_schema, + }) + }) + .collect(); + body["tools"] = serde_json::json!([{ + "functionDeclarations": function_declarations, + }]); + } + + // Safety settings: use least restrictive defaults to avoid unexpected blocks + body["safetySettings"] = serde_json::json!([ + { "category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH" }, + { "category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_ONLY_HIGH" }, + { "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_ONLY_HIGH" }, + { "category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_ONLY_HIGH" }, + ]); + + // SECURITY: never put the API key in the URL. Use the + // `x-goog-api-key` header so that reqwest's error `Display` (which + // prints the URL) cannot leak the secret into logs or error-wrapped + // output. + let url = format!( + "{}/models/{}:streamGenerateContent?alt=sse", + self.base_url, model + ); + + let (tx, rx) = mpsc::channel(256); + + let req = self + .client + .post(&url) + .header("x-goog-api-key", &self.api_key) + .header("content-type", "application/json") + .json(&body) + .build() + .map_err(CerseiError::Http)?; + + let client = self.client.clone(); + + tokio::spawn(async move { + match client.execute(req).await { + Ok(response) => { + if !response.status().is_success() { + let status = response.status().as_u16(); + let body = response.text().await.unwrap_or_default(); + let _ = tx + .send(StreamEvent::Error { + message: format!("HTTP {}: {}", status, body), + }) + .await; + return; + } + + let _ = tx + .send(StreamEvent::MessageStart { + id: String::new(), + model: String::new(), + }) + .await; + + let mut stream = response.bytes_stream(); + let mut buffer = String::new(); + let mut block_index: usize = 0; + let mut total_input_tokens: u64 = 0; + let mut total_output_tokens: u64 = 0; + let mut saw_function_calls = false; + + while let Some(chunk) = stream.next().await { + match chunk { + Ok(bytes) => { + buffer.push_str(&String::from_utf8_lossy(&bytes)); + + while let Some(pos) = buffer.find("\n") { + let line = buffer[..pos].to_string(); + buffer = buffer[pos + 1..].to_string(); + + if let Some(data) = line.strip_prefix("data: ") { + let data = data.trim(); + if data.is_empty() { + continue; + } + + if let Ok(json) = + serde_json::from_str::(data) + { + // Extract usage metadata + if let Some(metadata) = json.get("usageMetadata") { + total_input_tokens = metadata + .get("promptTokenCount") + .and_then(|v| v.as_u64()) + .unwrap_or(total_input_tokens); + total_output_tokens = metadata + .get("candidatesTokenCount") + .and_then(|v| v.as_u64()) + .unwrap_or(total_output_tokens); + } + + // Process candidates + if let Some(candidates) = + json.get("candidates").and_then(|c| c.as_array()) + { + for candidate in candidates { + if let Some(parts) = candidate + .get("content") + .and_then(|c| c.get("parts")) + .and_then(|p| p.as_array()) + { + for part in parts { + if let Some(text) = part + .get("text") + .and_then(|t| t.as_str()) + { + let _ = tx + .send(StreamEvent::ContentBlockStart { + index: block_index, + block_type: "text".into(), + id: None, + name: None, + }) + .await; + let _ = tx + .send(StreamEvent::TextDelta { + index: block_index, + text: text.to_string(), + }) + .await; + let _ = tx + .send(StreamEvent::ContentBlockStop { + index: block_index, + }) + .await; + block_index += 1; + } + + if let Some(fc) = + part.get("functionCall") + { + saw_function_calls = true; + let name = fc + .get("name") + .and_then(|n| n.as_str()) + .unwrap_or("") + .to_string(); + let args = fc + .get("args") + .cloned() + .unwrap_or( + serde_json::Value::Object( + Default::default(), + ), + ); + // Capture thoughtSignature (sibling of functionCall at part level, Gemini 3.1+) + let thought_sig = part + .get("thoughtSignature") + .and_then(|s| s.as_str()) + .unwrap_or(""); + // Capture functionCall.id if present + let fc_id = fc + .get("id") + .and_then(|s| s.as_str()) + .unwrap_or(""); + // Encode both in tool_id for roundtrip + let tool_id = + if thought_sig.is_empty() { + format!( + "gemini-tool-{}", + block_index + ) + } else { + format!( + "gemini-tool-{}::{}::{}", + block_index, + fc_id, + thought_sig + ) + }; + + let _ = tx + .send(StreamEvent::ContentBlockStart { + index: block_index, + block_type: "tool_use".into(), + id: Some(tool_id), + name: Some(name), + }) + .await; + let _ = tx + .send(StreamEvent::InputJsonDelta { + index: block_index, + partial_json: serde_json::to_string(&args) + .unwrap_or_default(), + }) + .await; + let _ = tx + .send(StreamEvent::ContentBlockStop { + index: block_index, + }) + .await; + block_index += 1; + } + } + } + + // Check finish reason + let finish_reason = candidate + .get("finishReason") + .and_then(|r| r.as_str()); + if let Some(reason) = finish_reason { + let stop = if saw_function_calls { + StopReason::ToolUse + } else { + match reason { + "STOP" => StopReason::EndTurn, + "MAX_TOKENS" => { + StopReason::MaxTokens + } + "SAFETY" => StopReason::EndTurn, + _ => StopReason::EndTurn, + } + }; + let _ = tx + .send(StreamEvent::MessageDelta { + stop_reason: Some(stop), + usage: Some(Usage { + input_tokens: + total_input_tokens, + output_tokens: + total_output_tokens, + ..Default::default() + }), + }) + .await; + } + } + } + } + } + } + } + Err(e) => { + let _ = tx + .send(StreamEvent::Error { + message: e.to_string(), + }) + .await; + return; + } + } + } + + let _ = tx.send(StreamEvent::MessageStop).await; + } + Err(e) => { + let _ = tx + .send(StreamEvent::Error { + message: e.to_string(), + }) + .await; + } + } + }); + + Ok(CompletionStream::new(rx)) + } +} + +// ─── Builder ───────────────────────────────────────────────────────────────── + +#[derive(Default)] +pub struct GeminiBuilder { + api_key: Option, + base_url: Option, + model: Option, +} + +impl GeminiBuilder { + pub fn api_key(mut self, key: impl Into) -> Self { + self.api_key = Some(key.into()); + self + } + + pub fn base_url(mut self, url: impl Into) -> Self { + self.base_url = Some(url.into()); + self + } + + pub fn model(mut self, model: impl Into) -> Self { + self.model = Some(model.into()); + self + } + + pub fn build(self) -> Result { + let api_key = if let Some(key) = self.api_key { + key + } else { + return Err(CerseiError::Auth( + "No API key provided. Set GOOGLE_API_KEY or GEMINI_API_KEY or use .api_key()" + .into(), + )); + }; + + Ok(Gemini { + api_key, + base_url: self.base_url.unwrap_or_else(|| GEMINI_API_BASE.to_string()), + default_model: self + .model + .unwrap_or_else(|| "gemini-3.1-pro-preview".to_string()), + client: reqwest::Client::new(), + }) + } +} diff --git a/third_party/cersei-provider/src/lib.rs b/third_party/cersei-provider/src/lib.rs new file mode 100644 index 0000000..1efba1e --- /dev/null +++ b/third_party/cersei-provider/src/lib.rs @@ -0,0 +1,220 @@ +//! cersei-provider: Provider trait and built-in LLM providers. +//! +//! Providers abstract over different LLM backends (Anthropic, OpenAI, local models). +//! Each provider implements streaming completion, token counting, and capability discovery. + +pub mod anthropic; +pub mod gemini; +pub mod openai; +pub mod registry; +pub mod router; +mod stream; + +use async_trait::async_trait; +use cersei_types::*; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use tokio::sync::mpsc; + +// Re-exports +pub use anthropic::Anthropic; +pub use gemini::Gemini; +pub use openai::OpenAi; +pub use router::from_model_string; +pub use stream::StreamAccumulator; + +// ─── Provider trait ────────────────────────────────────────────────────────── + +#[async_trait] +pub trait Provider: Send + Sync { + /// Human-readable provider name (e.g., "anthropic", "openai"). + fn name(&self) -> &str; + + /// Context window size for the given model. + fn context_window(&self, model: &str) -> u64; + + /// Capabilities supported by the given model. + fn capabilities(&self, model: &str) -> ProviderCapabilities; + + /// Send a streaming completion request. + async fn complete(&self, request: CompletionRequest) -> Result; + + /// Send a blocking (non-streaming) completion request. + async fn complete_blocking(&self, request: CompletionRequest) -> Result { + self.complete(request).await?.collect().await + } + + /// Count tokens for a message list. Returns an estimate if exact counting is unavailable. + async fn count_tokens(&self, messages: &[Message], _model: &str) -> Result { + // Default: rough estimate based on character count + let chars: usize = messages.iter().map(|m| m.get_all_text().len()).sum(); + Ok((chars as u64) / 4) // ~4 chars per token + } +} + +// Blanket impl: Box is itself a Provider. +#[async_trait] +impl Provider for Box { + fn name(&self) -> &str { + (**self).name() + } + fn context_window(&self, model: &str) -> u64 { + (**self).context_window(model) + } + fn capabilities(&self, model: &str) -> ProviderCapabilities { + (**self).capabilities(model) + } + async fn complete(&self, request: CompletionRequest) -> Result { + (**self).complete(request).await + } + async fn complete_blocking(&self, request: CompletionRequest) -> Result { + (**self).complete_blocking(request).await + } + async fn count_tokens(&self, messages: &[Message], model: &str) -> Result { + (**self).count_tokens(messages, model).await + } +} + +// ─── Authentication ────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub enum Auth { + /// API key sent as `x-api-key` header (Anthropic Console) or `Authorization: Bearer` (OpenAI). + ApiKey(String), + /// Bearer token sent as `Authorization: Bearer `. + Bearer(String), + /// OAuth flow with client ID and token. + OAuth { + client_id: String, + token: OAuthToken, + }, + /// Custom auth provider for non-standard flows. + Custom(std::sync::Arc), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OAuthToken { + pub access_token: String, + pub refresh_token: Option, + pub expires_at_ms: Option, + pub scopes: Vec, +} + +impl OAuthToken { + pub fn is_expired(&self) -> bool { + if let Some(exp) = self.expires_at_ms { + chrono::Utc::now().timestamp_millis() >= exp + } else { + false + } + } +} + +#[async_trait] +pub trait AuthProvider: Send + Sync + std::fmt::Debug { + /// Returns (header_name, header_value) for the request. + async fn get_credentials(&self) -> Result<(String, String)>; + + /// Refresh credentials if they have expired. + async fn refresh(&self) -> Result<()>; +} + +// ─── Completion request/response ───────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct CompletionRequest { + pub model: String, + pub messages: Vec, + pub system: Option, + pub tools: Vec, + pub max_tokens: u32, + pub temperature: Option, + pub stop_sequences: Vec, + /// Provider-specific options (thinking budget, top_p, etc.) + pub options: ProviderOptions, +} + +impl CompletionRequest { + pub fn new(model: impl Into) -> Self { + Self { + model: model.into(), + messages: Vec::new(), + system: None, + tools: Vec::new(), + max_tokens: 16384, + temperature: None, + stop_sequences: Vec::new(), + options: ProviderOptions::default(), + } + } +} + +#[derive(Debug, Clone, Default)] +pub struct ProviderOptions { + entries: HashMap, +} + +impl ProviderOptions { + pub fn set(&mut self, key: impl Into, value: impl Serialize) { + if let Ok(v) = serde_json::to_value(value) { + self.entries.insert(key.into(), v); + } + } + + pub fn get Deserialize<'de>>(&self, key: &str) -> Option { + self.entries + .get(key) + .and_then(|v| serde_json::from_value(v.clone()).ok()) + } + + pub fn has(&self, key: &str) -> bool { + self.entries.contains_key(key) + } +} + +#[derive(Debug, Clone)] +pub struct CompletionResponse { + pub message: Message, + pub usage: Usage, + pub stop_reason: StopReason, +} + +#[derive(Debug, Clone, Default)] +pub struct ProviderCapabilities { + pub streaming: bool, + pub tool_use: bool, + pub vision: bool, + pub thinking: bool, + pub system_prompt: bool, + pub caching: bool, +} + +// ─── Completion stream ─────────────────────────────────────────────────────── + +/// A streaming response from a provider. Wraps a channel of StreamEvents. +pub struct CompletionStream { + rx: mpsc::Receiver, +} + +impl CompletionStream { + pub fn new(rx: mpsc::Receiver) -> Self { + Self { rx } + } + + /// Consume the stream and collect into a complete response. + pub async fn collect(mut self) -> Result { + let mut acc = StreamAccumulator::new(); + while let Some(event) = self.rx.recv().await { + if let StreamEvent::Error { message } = &event { + return Err(CerseiError::Provider(message.clone())); + } + acc.process_event(event); + } + acc.into_response() + } + + /// Access the underlying receiver for real-time event processing. + pub fn into_receiver(self) -> mpsc::Receiver { + self.rx + } +} diff --git a/third_party/cersei-provider/src/openai.rs b/third_party/cersei-provider/src/openai.rs new file mode 100644 index 0000000..2ae6272 --- /dev/null +++ b/third_party/cersei-provider/src/openai.rs @@ -0,0 +1,521 @@ +//! OpenAI-compatible provider (works with OpenAI, Azure, Ollama, etc.) + +use crate::*; +use cersei_types::*; +use futures::StreamExt; +use tokio::sync::mpsc; + +const OPENAI_API_BASE: &str = "https://api.openai.com/v1"; + +pub struct OpenAi { + auth: Auth, + base_url: String, + default_model: String, + client: reqwest::Client, +} + +impl OpenAi { + pub fn new(auth: Auth) -> Self { + let base_url = std::env::var("OPENAI_BASE_URL") + .ok() + .filter(|u| !u.is_empty()) + .unwrap_or_else(|| OPENAI_API_BASE.to_string()); + Self { + auth, + base_url, + default_model: "gpt-4o".to_string(), + client: reqwest::Client::new(), + } + } + + pub fn from_env() -> Result { + let key = std::env::var("OPENAI_API_KEY") + .map_err(|_| CerseiError::Auth("OPENAI_API_KEY not set".into()))?; + Ok(Self::new(Auth::ApiKey(key))) + } + + pub fn builder() -> OpenAiBuilder { + OpenAiBuilder::default() + } +} + +#[async_trait::async_trait] +impl Provider for OpenAi { + fn name(&self) -> &str { + "openai" + } + + fn context_window(&self, model: &str) -> u64 { + match model { + m if m.contains("gpt-5") => 1_000_000, + m if m.starts_with("o1") || m.starts_with("o3") => 200_000, + m if m.contains("gpt-4o") => 128_000, + m if m.contains("gpt-4-turbo") => 128_000, + m if m.contains("gpt-4") => 8_192, + m if m.contains("gpt-3.5") => 16_385, + _ => 128_000, + } + } + + fn capabilities(&self, _model: &str) -> ProviderCapabilities { + ProviderCapabilities { + streaming: true, + tool_use: true, + vision: true, + thinking: false, + system_prompt: true, + caching: false, + } + } + + async fn complete(&self, request: CompletionRequest) -> Result { + let model = if request.model.is_empty() { + self.default_model.clone() + } else { + request.model.clone() + }; + + // Build OpenAI-format messages + let mut api_messages: Vec = Vec::new(); + + if let Some(system) = &request.system { + api_messages.push(serde_json::json!({ + "role": "system", + "content": system, + })); + } + + for msg in &request.messages { + match msg.role { + Role::User => { + // Check if this is a tool result message + if let MessageContent::Blocks(blocks) = &msg.content { + for block in blocks { + if let ContentBlock::ToolResult { + tool_use_id, + content, + is_error, + } = block + { + api_messages.push(serde_json::json!({ + "role": "tool", + "tool_call_id": tool_use_id, + "content": content, + })); + } + } + // Also include any text blocks as a user message + let text: String = blocks + .iter() + .filter_map(|b| { + if let ContentBlock::Text { text } = b { + Some(text.as_str()) + } else { + None + } + }) + .collect::>() + .join("\n"); + if !text.is_empty() { + api_messages.push(serde_json::json!({ + "role": "user", + "content": text, + })); + } + } else { + api_messages.push(serde_json::json!({ + "role": "user", + "content": msg.get_all_text(), + })); + } + } + Role::Assistant => { + // Check for tool_use blocks — serialize as tool_calls + if let MessageContent::Blocks(blocks) = &msg.content { + let tool_uses: Vec<&ContentBlock> = blocks + .iter() + .filter(|b| matches!(b, ContentBlock::ToolUse { .. })) + .collect(); + if !tool_uses.is_empty() { + let tool_calls: Vec = tool_uses + .iter() + .map(|b| { + if let ContentBlock::ToolUse { id, name, input } = b { + serde_json::json!({ + "id": id, + "type": "function", + "function": { + "name": name, + "arguments": input.to_string(), + } + }) + } else { + serde_json::json!({}) + } + }) + .collect(); + + let text_content: String = blocks + .iter() + .filter_map(|b| { + if let ContentBlock::Text { text } = b { + Some(text.as_str()) + } else { + None + } + }) + .collect::>() + .join(""); + + let mut asst_msg = serde_json::json!({ + "role": "assistant", + "tool_calls": tool_calls, + }); + if !text_content.is_empty() { + asst_msg["content"] = serde_json::json!(text_content); + } + api_messages.push(asst_msg); + } else { + api_messages.push(serde_json::json!({ + "role": "assistant", + "content": msg.get_all_text(), + })); + } + } else { + api_messages.push(serde_json::json!({ + "role": "assistant", + "content": msg.get_all_text(), + })); + } + } + Role::System => { + api_messages.push(serde_json::json!({ + "role": "system", + "content": msg.get_all_text(), + })); + } + } + } + + // GPT-5+ and o-series use max_completion_tokens; older models use max_tokens + let use_new_param = + model.starts_with("gpt-5") || model.starts_with("o1") || model.starts_with("o3"); + + let mut body = if use_new_param { + serde_json::json!({ + "model": model, + "messages": api_messages, + "max_completion_tokens": request.max_tokens, + "stream": true, + "stream_options": { "include_usage": true }, + }) + } else { + serde_json::json!({ + "model": model, + "messages": api_messages, + "max_tokens": request.max_tokens, + "stream": true, + "stream_options": { "include_usage": true }, + }) + }; + + if let Some(temp) = request.temperature { + body["temperature"] = serde_json::json!(temp); + } + + if !request.tools.is_empty() { + let tools: Vec = request + .tools + .iter() + .map(|t| { + serde_json::json!({ + "type": "function", + "function": { + "name": t.name, + "description": t.description, + "parameters": t.input_schema, + } + }) + }) + .collect(); + body["tools"] = serde_json::Value::Array(tools); + } + + let url = format!("{}/chat/completions", self.base_url); + let auth_header = match &self.auth { + Auth::ApiKey(key) | Auth::Bearer(key) => format!("Bearer {}", key), + Auth::OAuth { token, .. } => format!("Bearer {}", token.access_token), + Auth::Custom(_) => String::new(), + }; + + let (tx, rx) = mpsc::channel(256); + + let req = self + .client + .post(&url) + .header("authorization", &auth_header) + .header("content-type", "application/json") + .json(&body) + .build() + .map_err(CerseiError::Http)?; + + let client = self.client.clone(); + + tokio::spawn(async move { + match client.execute(req).await { + Ok(response) => { + if !response.status().is_success() { + let status = response.status().as_u16(); + let body = response.text().await.unwrap_or_default(); + let _ = tx + .send(StreamEvent::Error { + message: format!("HTTP {}: {}", status, body), + }) + .await; + return; + } + + let _ = tx + .send(StreamEvent::MessageStart { + id: String::new(), + model: String::new(), + }) + .await; + let mut stream = response.bytes_stream(); + let mut buffer = String::new(); + let mut text_started = false; + // Track tool calls being assembled across chunks + // OpenAI sends: tool_calls[i].id, tool_calls[i].function.name (first chunk) + // tool_calls[i].function.arguments (subsequent chunks, accumulated) + let mut tool_calls: std::collections::HashMap = + std::collections::HashMap::new(); // index -> (id, name, args_json) + let mut has_tool_calls = false; + + while let Some(chunk) = stream.next().await { + match chunk { + Ok(bytes) => { + buffer.push_str(&String::from_utf8_lossy(&bytes)); + while let Some(pos) = buffer.find("\n") { + let line = buffer[..pos].to_string(); + buffer = buffer[pos + 1..].to_string(); + + if let Some(data) = line.strip_prefix("data: ") { + let data = data.trim(); + if data == "[DONE]" { + // Emit accumulated tool calls + for (idx, (id, name, args)) in &tool_calls { + let input: serde_json::Value = + serde_json::from_str(args) + .unwrap_or(serde_json::Value::Null); + let _ = tx + .send(StreamEvent::ContentBlockStart { + index: *idx + 1, + block_type: "tool_use".into(), + id: Some(id.clone()), + name: Some(name.clone()), + }) + .await; + // Send full args as InputJsonDelta + let _ = tx + .send(StreamEvent::InputJsonDelta { + index: *idx + 1, + partial_json: args.clone(), + }) + .await; + let _ = tx + .send(StreamEvent::ContentBlockStop { + index: *idx + 1, + }) + .await; + } + + if text_started { + let _ = tx + .send(StreamEvent::ContentBlockStop { + index: 0, + }) + .await; + } + + let stop = if has_tool_calls { + StopReason::ToolUse + } else { + StopReason::EndTurn + }; + + // Extract usage if available + let _ = tx + .send(StreamEvent::MessageDelta { + stop_reason: Some(stop), + usage: None, + }) + .await; + let _ = tx.send(StreamEvent::MessageStop).await; + return; + } + + if let Ok(json) = + serde_json::from_str::(data) + { + let delta = &json["choices"][0]["delta"]; + let finish_reason = + json["choices"][0]["finish_reason"].as_str(); + + // Text content + if let Some(text) = delta["content"].as_str() { + if !text_started { + text_started = true; + let _ = tx + .send(StreamEvent::ContentBlockStart { + index: 0, + block_type: "text".into(), + id: None, + name: None, + }) + .await; + } + let _ = tx + .send(StreamEvent::TextDelta { + index: 0, + text: text.to_string(), + }) + .await; + } + + // Tool calls (accumulated across chunks) + if let Some(tc_array) = delta["tool_calls"].as_array() { + has_tool_calls = true; + for tc in tc_array { + let idx = + tc["index"].as_u64().unwrap_or(0) as usize; + let entry = tool_calls + .entry(idx) + .or_insert_with(|| { + ( + String::new(), + String::new(), + String::new(), + ) + }); + + // First chunk has id and function.name + if let Some(id) = tc["id"].as_str() { + entry.0 = id.to_string(); + } + if let Some(name) = + tc["function"]["name"].as_str() + { + entry.1 = name.to_string(); + } + // Arguments accumulate across chunks + if let Some(args) = + tc["function"]["arguments"].as_str() + { + entry.2.push_str(args); + } + } + } + + // Usage from the final chunk + if let Some(usage) = json["usage"].as_object() { + let input_tokens = usage + .get("prompt_tokens") + .and_then(|v| v.as_u64()) + .unwrap_or(0); + let output_tokens = usage + .get("completion_tokens") + .and_then(|v| v.as_u64()) + .unwrap_or(0); + let _ = tx + .send(StreamEvent::MessageDelta { + stop_reason: finish_reason.and_then(|r| { + match r { + "stop" => Some(StopReason::EndTurn), + "tool_calls" => { + Some(StopReason::ToolUse) + } + "length" => { + Some(StopReason::MaxTokens) + } + _ => None, + } + }), + usage: Some(Usage { + input_tokens, + output_tokens, + ..Default::default() + }), + }) + .await; + } + } + } + } + } + Err(e) => { + let _ = tx + .send(StreamEvent::Error { + message: e.to_string(), + }) + .await; + return; + } + } + } + } + Err(e) => { + let _ = tx + .send(StreamEvent::Error { + message: e.to_string(), + }) + .await; + } + } + }); + + Ok(CompletionStream::new(rx)) + } +} + +// ─── Builder ───────────────────────────────────────────────────────────────── + +#[derive(Default)] +pub struct OpenAiBuilder { + api_key: Option, + base_url: Option, + model: Option, +} + +impl OpenAiBuilder { + pub fn api_key(mut self, key: impl Into) -> Self { + self.api_key = Some(key.into()); + self + } + + pub fn base_url(mut self, url: impl Into) -> Self { + self.base_url = Some(url.into()); + self + } + + pub fn model(mut self, model: impl Into) -> Self { + self.model = Some(model.into()); + self + } + + pub fn build(self) -> Result { + let auth = if let Some(key) = self.api_key { + Auth::ApiKey(key) + } else { + return Err(CerseiError::Auth( + "No API key provided. Set OPENAI_API_KEY or use .api_key()".into(), + )); + }; + + Ok(OpenAi { + auth, + base_url: self.base_url.unwrap_or_else(|| OPENAI_API_BASE.to_string()), + default_model: self.model.unwrap_or_else(|| "gpt-4o".to_string()), + client: reqwest::Client::new(), + }) + } +} diff --git a/third_party/cersei-provider/src/registry.rs b/third_party/cersei-provider/src/registry.rs new file mode 100644 index 0000000..6e96fa0 --- /dev/null +++ b/third_party/cersei-provider/src/registry.rs @@ -0,0 +1,492 @@ +//! Static registry of known LLM providers. +//! +//! Each entry contains the provider's API base URL, env var names for auth, +//! API format (Anthropic or OpenAI-compatible), and known models with +//! context windows and capabilities. + +use crate::ProviderCapabilities; + +/// API format used by a provider. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ApiFormat { + /// Anthropic's native API format (different SSE events, system prompt handling). + Anthropic, + /// OpenAI-compatible `/v1/chat/completions` format (used by most providers). + OpenAiCompatible, + /// Google Gemini native `generateContent` API format. + Google, +} + +/// A known LLM provider. +#[derive(Debug, Clone)] +pub struct ProviderEntry { + pub id: &'static str, + pub name: &'static str, + pub api_base: &'static str, + pub env_keys: &'static [&'static str], + pub api_format: ApiFormat, + pub default_model: &'static str, + pub models: &'static [ModelEntry], +} + +/// A known model within a provider. +#[derive(Debug, Clone)] +pub struct ModelEntry { + pub id: &'static str, + pub context_window: u64, + pub capabilities: ProviderCapabilities, +} + +impl ProviderEntry { + /// Try to read an API key from the environment using this provider's env key list. + pub fn api_key_from_env(&self) -> Option { + for key in self.env_keys { + if let Ok(val) = std::env::var(key) { + if !val.is_empty() { + return Some(val); + } + } + } + None + } + + /// Whether this provider requires an API key (Ollama does not). + pub fn requires_key(&self) -> bool { + !self.env_keys.is_empty() + } + + /// Whether a local provider (one with no `env_keys`, e.g. Ollama) is + /// actually reachable right now. Does a 200ms TCP probe against the + /// host:port parsed out of `api_base`. Returns `true` when the probe + /// succeeds, `false` otherwise. Providers that *do* require a key + /// return `true` unconditionally (their availability is gated on the + /// env var, not connectivity). + pub fn is_reachable(&self) -> bool { + if self.requires_key() { + return true; + } + let host_port = extract_host_port(self.api_base); + let Some(host_port) = host_port else { + return false; + }; + use std::net::ToSocketAddrs; + let addrs: Vec = match host_port.to_socket_addrs() { + Ok(it) => it.collect(), + Err(_) => return false, + }; + addrs.into_iter().any(|addr| { + std::net::TcpStream::connect_timeout(&addr, std::time::Duration::from_millis(200)) + .is_ok() + }) + } + + /// Get the context window for a model, falling back to a default. + pub fn context_window(&self, model: &str) -> u64 { + self.models + .iter() + .find(|m| m.id == model) + .map(|m| m.context_window) + .unwrap_or(128_000) + } +} + +// ─── Capabilities shorthand ──────────────────────────────────────────────── + +const FULL: ProviderCapabilities = ProviderCapabilities { + streaming: true, + tool_use: true, + vision: true, + thinking: false, + system_prompt: true, + caching: false, +}; + +const FULL_THINKING: ProviderCapabilities = ProviderCapabilities { + streaming: true, + tool_use: true, + vision: true, + thinking: true, + system_prompt: true, + caching: true, +}; + +const BASIC: ProviderCapabilities = ProviderCapabilities { + streaming: true, + tool_use: true, + vision: false, + thinking: false, + system_prompt: true, + caching: false, +}; + +// ─── Provider Registry ───────────────────────────────────────────────────── + +pub static REGISTRY: &[ProviderEntry] = &[ + ProviderEntry { + id: "anthropic", + name: "Anthropic", + api_base: "https://api.anthropic.com", + env_keys: &["ANTHROPIC_API_KEY", "ANTHROPIC_KEY"], + api_format: ApiFormat::Anthropic, + default_model: "claude-sonnet-4-6", + models: &[ + ModelEntry { + id: "claude-opus-4-6", + context_window: 200_000, + capabilities: FULL_THINKING, + }, + ModelEntry { + id: "claude-sonnet-4-6", + context_window: 200_000, + capabilities: FULL_THINKING, + }, + ModelEntry { + id: "claude-haiku-4-5", + context_window: 200_000, + capabilities: FULL, + }, + ], + }, + ProviderEntry { + id: "openai", + name: "OpenAI", + api_base: "https://api.openai.com/v1", + env_keys: &["OPENAI_API_KEY"], + api_format: ApiFormat::OpenAiCompatible, + default_model: "gpt-5.4-2026-03-05", + models: &[ + ModelEntry { + id: "gpt-5.4-2026-03-05", + context_window: 1_000_000, + capabilities: FULL, + }, + ModelEntry { + id: "gpt-5.3-chat-latest", + context_window: 1_000_000, + capabilities: FULL, + }, + ModelEntry { + id: "gpt-5.3-chat", + context_window: 1_000_000, + capabilities: FULL, + }, + ModelEntry { + id: "gpt-5.3-codex", + context_window: 1_000_000, + capabilities: FULL, + }, + ModelEntry { + id: "gpt-5-chat", + context_window: 1_000_000, + capabilities: FULL, + }, + ModelEntry { + id: "gpt-4o", + context_window: 128_000, + capabilities: FULL, + }, + ModelEntry { + id: "gpt-4-turbo", + context_window: 128_000, + capabilities: FULL, + }, + ModelEntry { + id: "o1", + context_window: 200_000, + capabilities: FULL, + }, + ModelEntry { + id: "o3", + context_window: 200_000, + capabilities: FULL, + }, + ModelEntry { + id: "o3-pro", + context_window: 200_000, + capabilities: FULL, + }, + ], + }, + ProviderEntry { + id: "google", + name: "Google", + api_base: "https://generativelanguage.googleapis.com/v1beta", + env_keys: &["GOOGLE_API_KEY", "GEMINI_API_KEY"], + api_format: ApiFormat::Google, + default_model: "gemini-3.1-pro-preview", + models: &[ + ModelEntry { + id: "gemini-3.1-pro-preview", + context_window: 2_000_000, + capabilities: FULL, + }, + ModelEntry { + id: "gemini-3.0-flash", + context_window: 1_000_000, + capabilities: FULL, + }, + ModelEntry { + id: "gemini-2.0-flash", + context_window: 1_000_000, + capabilities: FULL, + }, + ModelEntry { + id: "gemini-2.0-pro", + context_window: 1_000_000, + capabilities: FULL, + }, + ModelEntry { + id: "gemini-1.5-pro", + context_window: 2_000_000, + capabilities: FULL, + }, + ModelEntry { + id: "gemini-1.5-flash", + context_window: 1_000_000, + capabilities: FULL, + }, + ], + }, + ProviderEntry { + id: "mistral", + name: "Mistral", + api_base: "https://api.mistral.ai/v1", + env_keys: &["MISTRAL_API_KEY"], + api_format: ApiFormat::OpenAiCompatible, + default_model: "mistral-large-latest", + models: &[ + ModelEntry { + id: "mistral-large-latest", + context_window: 128_000, + capabilities: FULL, + }, + ModelEntry { + id: "codestral-latest", + context_window: 256_000, + capabilities: BASIC, + }, + ], + }, + ProviderEntry { + id: "groq", + name: "Groq", + api_base: "https://api.groq.com/openai/v1", + env_keys: &["GROQ_API_KEY"], + api_format: ApiFormat::OpenAiCompatible, + default_model: "llama-3.1-70b-versatile", + models: &[ + ModelEntry { + id: "llama-3.1-70b-versatile", + context_window: 128_000, + capabilities: BASIC, + }, + ModelEntry { + id: "llama-3.1-8b-instant", + context_window: 128_000, + capabilities: BASIC, + }, + ModelEntry { + id: "mixtral-8x7b-32768", + context_window: 32_768, + capabilities: BASIC, + }, + ], + }, + ProviderEntry { + id: "deepseek", + name: "DeepSeek", + api_base: "https://api.deepseek.com/v1", + env_keys: &["DEEPSEEK_API_KEY"], + api_format: ApiFormat::OpenAiCompatible, + default_model: "deepseek-chat", + models: &[ + ModelEntry { + id: "deepseek-chat", + context_window: 64_000, + capabilities: FULL, + }, + ModelEntry { + id: "deepseek-coder", + context_window: 64_000, + capabilities: BASIC, + }, + ], + }, + ProviderEntry { + id: "xai", + name: "xAI", + api_base: "https://api.x.ai/v1", + env_keys: &["XAI_API_KEY"], + api_format: ApiFormat::OpenAiCompatible, + default_model: "grok-2", + models: &[ModelEntry { + id: "grok-2", + context_window: 128_000, + capabilities: FULL, + }], + }, + ProviderEntry { + id: "together", + name: "Together", + api_base: "https://api.together.xyz/v1", + env_keys: &["TOGETHER_API_KEY"], + api_format: ApiFormat::OpenAiCompatible, + default_model: "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", + models: &[ModelEntry { + id: "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", + context_window: 128_000, + capabilities: BASIC, + }], + }, + ProviderEntry { + id: "fireworks", + name: "Fireworks", + api_base: "https://api.fireworks.ai/inference/v1", + env_keys: &["FIREWORKS_API_KEY"], + api_format: ApiFormat::OpenAiCompatible, + default_model: "accounts/fireworks/models/llama-v3p1-70b-instruct", + models: &[ModelEntry { + id: "accounts/fireworks/models/llama-v3p1-70b-instruct", + context_window: 128_000, + capabilities: BASIC, + }], + }, + ProviderEntry { + id: "perplexity", + name: "Perplexity", + api_base: "https://api.perplexity.ai", + env_keys: &["PERPLEXITY_API_KEY"], + api_format: ApiFormat::OpenAiCompatible, + default_model: "llama-3.1-sonar-large-128k-online", + models: &[ModelEntry { + id: "llama-3.1-sonar-large-128k-online", + context_window: 128_000, + capabilities: BASIC, + }], + }, + ProviderEntry { + id: "cerebras", + name: "Cerebras", + api_base: "https://api.cerebras.ai/v1", + env_keys: &["CEREBRAS_API_KEY"], + api_format: ApiFormat::OpenAiCompatible, + default_model: "llama3.1-70b", + models: &[ModelEntry { + id: "llama3.1-70b", + context_window: 128_000, + capabilities: BASIC, + }], + }, + ProviderEntry { + id: "ollama", + name: "Ollama", + api_base: "http://localhost:11434/v1", + env_keys: &[], + api_format: ApiFormat::OpenAiCompatible, + default_model: "llama3.1", + models: &[], + }, + ProviderEntry { + id: "openrouter", + name: "OpenRouter", + api_base: "https://openrouter.ai/api/v1", + env_keys: &["OPENROUTER_API_KEY"], + api_format: ApiFormat::OpenAiCompatible, + default_model: "anthropic/claude-3.5-sonnet", + models: &[], + }, + ProviderEntry { + id: "cohere", + name: "Cohere", + api_base: "https://api.cohere.com/compatibility/v1", + env_keys: &["COHERE_API_KEY", "CO_API_KEY"], + api_format: ApiFormat::OpenAiCompatible, + default_model: "command-r-plus", + models: &[ + ModelEntry { + id: "command-r-plus", + context_window: 128_000, + capabilities: FULL, + }, + ModelEntry { + id: "command-r", + context_window: 128_000, + capabilities: FULL, + }, + ModelEntry { + id: "command-a", + context_window: 256_000, + capabilities: FULL, + }, + ], + }, + ProviderEntry { + id: "sambanova", + name: "SambaNova", + api_base: "https://api.sambanova.ai/v1", + env_keys: &["SAMBANOVA_API_KEY"], + api_format: ApiFormat::OpenAiCompatible, + default_model: "Meta-Llama-3.1-70B-Instruct", + models: &[ + ModelEntry { + id: "Meta-Llama-3.1-70B-Instruct", + context_window: 128_000, + capabilities: BASIC, + }, + ModelEntry { + id: "Meta-Llama-3.1-405B-Instruct", + context_window: 128_000, + capabilities: BASIC, + }, + ], + }, +]; + +/// Look up a provider by ID. +pub fn lookup(provider_id: &str) -> Option<&'static ProviderEntry> { + REGISTRY.iter().find(|e| e.id == provider_id) +} + +/// All registered providers. +pub fn all() -> &'static [ProviderEntry] { + REGISTRY +} + +/// Providers that have valid auth configured in the environment **and** — for +/// local providers without an API key (e.g. Ollama) — are actually reachable +/// via a quick TCP probe. +/// +/// The probe prevents `from_model_string("auto")` from silently picking Ollama +/// when the daemon is not running, which was causing the CLI to default to +/// `llama3.1` on machines without any LLM installed. +pub fn available() -> Vec<&'static ProviderEntry> { + REGISTRY + .iter() + .filter(|e| { + if e.requires_key() { + e.api_key_from_env().is_some() + } else { + e.is_reachable() + } + }) + .collect() +} + +/// Extract a `host:port` string from an http(s) URL for TCP probing. +fn extract_host_port(api_base: &str) -> Option { + let trimmed = api_base + .trim_start_matches("https://") + .trim_start_matches("http://"); + let authority = trimmed.split('/').next()?; + if authority.contains(':') { + Some(authority.to_string()) + } else { + // default ports based on scheme + let port = if api_base.starts_with("https://") { + 443 + } else { + 80 + }; + Some(format!("{authority}:{port}")) + } +} diff --git a/third_party/cersei-provider/src/router.rs b/third_party/cersei-provider/src/router.rs new file mode 100644 index 0000000..507f84a --- /dev/null +++ b/third_party/cersei-provider/src/router.rs @@ -0,0 +1,274 @@ +//! Model router: parse `provider/model` strings and construct the right provider. +//! +//! ```rust,ignore +//! use cersei_provider::router; +//! +//! let (provider, model) = router::from_model_string("openai/gpt-4o")?; +//! let (provider, model) = router::from_model_string("groq/llama-3.1-70b-versatile")?; +//! let (provider, model) = router::from_model_string("gpt-4o")?; // auto-detect +//! ``` + +use crate::registry::{self, ApiFormat, ProviderEntry}; +use crate::{Anthropic, Auth, Gemini, OpenAi, Provider}; +use cersei_types::*; + +/// Parse a model string and return a configured provider + resolved model name. +/// +/// Accepts: +/// - `"provider/model"` — explicit routing (e.g., `"groq/llama-3.1-70b-versatile"`) +/// - `"model-name"` — auto-detect from known prefixes and env vars (e.g., `"gpt-4o"`) +/// +/// Returns `(provider, model_name)` where `model_name` has the provider prefix stripped. +pub fn from_model_string(model: &str) -> Result<(Box, String)> { + // "auto" — pick the first available *keyed* provider's default model. + // + // Local providers (Ollama, etc.) are skipped here on purpose: they need + // explicit opt-in via `--model ollama/` so the CLI never silently + // starts talking to a daemon the user didn't ask for. + if model == "auto" { + let available = registry::available(); + let entry = available + .iter() + .find(|e| e.requires_key()) + .copied() + .ok_or_else(|| { + let all_keys: Vec = registry::all() + .iter() + .flat_map(|e| e.env_keys.iter().map(|k| k.to_string())) + .collect(); + CerseiError::Auth(format!( + "No API keys found. Set one of: {}\n\nOr point at a local provider explicitly, e.g. --model ollama/llama3.1", + all_keys.join(", ") + )) + })?; + let model_name = entry.default_model; + let provider = build_provider(entry, model_name)?; + return Ok((provider, model_name.to_string())); + } + + if let Some((provider_id, model_name)) = model.split_once('/') { + // Explicit: "anthropic/claude-sonnet-4-6" + let entry = registry::lookup(provider_id).ok_or_else(|| { + let known: Vec<&str> = registry::all().iter().map(|e| e.id).collect(); + CerseiError::Config(format!( + "Unknown provider: '{}'. Known providers: {}", + provider_id, + known.join(", ") + )) + })?; + let provider = build_provider(entry, model_name)?; + Ok((provider, model_name.to_string())) + } else { + // Auto-detect: "gpt-4o" → openai + let (entry, resolved) = auto_detect(model)?; + let provider = build_provider(entry, resolved)?; + Ok((provider, resolved.to_string())) + } +} + +/// List all providers that have valid auth configured. +pub fn available_providers() -> Vec<&'static ProviderEntry> { + registry::available() +} + +/// List all known providers. +pub fn all_providers() -> &'static [ProviderEntry] { + registry::all() +} + +// ─── Internal ────────────────────────────────────────────────────────────── + +fn build_provider(entry: &ProviderEntry, model: &str) -> Result> { + match entry.api_format { + ApiFormat::Anthropic => { + let key = entry.api_key_from_env().ok_or_else(|| { + CerseiError::Auth(format!( + "No API key for {}. Set {} in your environment.", + entry.name, + entry.env_keys.join(" or ") + )) + })?; + Ok(Box::new(Anthropic::new(Auth::ApiKey(key)))) + } + ApiFormat::Google => { + let key = entry.api_key_from_env().ok_or_else(|| { + CerseiError::Auth(format!( + "No API key for {}. Set {} in your environment.", + entry.name, + entry.env_keys.join(" or ") + )) + })?; + let provider = Gemini::builder().api_key(key).model(model).build()?; + Ok(Box::new(provider)) + } + ApiFormat::OpenAiCompatible => { + let key = if entry.requires_key() { + entry.api_key_from_env().ok_or_else(|| { + CerseiError::Auth(format!( + "No API key for {}. Set {} in your environment.", + entry.name, + entry.env_keys.join(" or ") + )) + })? + } else { + // Ollama and other local providers don't need a key + "no-key".to_string() + }; + + let provider = OpenAi::builder() + .base_url(entry.api_base) + .api_key(key) + .model(model) + .build()?; + + Ok(Box::new(provider)) + } + } +} + +/// Auto-detect provider from a bare model name. +fn auto_detect(model: &str) -> Result<(&'static ProviderEntry, &str)> { + // 1. Check known model prefixes + let prefix_match = match model { + m if m.starts_with("claude-") => Some("anthropic"), + m if m.starts_with("gpt-") + || m.starts_with("o1") + || m.starts_with("o3") + || m.starts_with("gpt5") => + { + Some("openai") + } + m if m.starts_with("gemini-") => Some("google"), + m if m.starts_with("mistral-") || m.starts_with("codestral-") => Some("mistral"), + m if m.starts_with("deepseek-") => Some("deepseek"), + m if m.starts_with("grok-") => Some("xai"), + m if m.starts_with("command-") => Some("cohere"), + m if m.starts_with("llama") => { + // llama models could be on Groq, Together, etc. + // Prefer Groq if key is set, otherwise Together + if std::env::var("GROQ_API_KEY") + .ok() + .filter(|k| !k.is_empty()) + .is_some() + { + Some("groq") + } else if std::env::var("TOGETHER_API_KEY") + .ok() + .filter(|k| !k.is_empty()) + .is_some() + { + Some("together") + } else { + Some("ollama") + } + } + _ => None, + }; + + if let Some(provider_id) = prefix_match { + if let Some(entry) = registry::lookup(provider_id) { + return Ok((entry, model)); + } + } + + // 2. Fall back to first available provider + let available = registry::available(); + if let Some(entry) = available.first() { + return Ok((entry, model)); + } + + // 3. Nothing available + let all_keys: Vec = registry::all() + .iter() + .flat_map(|e| e.env_keys.iter().map(|k| k.to_string())) + .collect(); + + Err(CerseiError::Auth(format!( + "Cannot detect provider for model '{}'. No API keys found.\n\nSet one of: {}", + model, + all_keys.join(", ") + ))) +} + +// ─── Tests ───────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_explicit_routing_unknown_provider() { + let result = from_model_string("nonexistent/some-model"); + assert!(result.is_err()); + match result { + Err(e) => { + let msg = e.to_string(); + assert!( + msg.contains("nonexistent"), + "Error should mention the provider name: {msg}" + ); + } + Ok(_) => panic!("Expected error for unknown provider"), + } + } + + #[test] + fn test_auto_detect_prefixes() { + // These test auto_detect logic without requiring env vars + let (entry, model) = auto_detect("claude-sonnet-4-6").unwrap_or_else(|_| { + // If no key is set, it still identifies the provider + (registry::lookup("anthropic").unwrap(), "claude-sonnet-4-6") + }); + assert_eq!(entry.id, "anthropic"); + assert_eq!(model, "claude-sonnet-4-6"); + } + + #[test] + fn test_registry_lookup() { + assert!(registry::lookup("anthropic").is_some()); + assert!(registry::lookup("openai").is_some()); + assert!(registry::lookup("groq").is_some()); + assert!(registry::lookup("ollama").is_some()); + assert!(registry::lookup("nonexistent").is_none()); + } + + #[test] + fn test_registry_lookup_new_providers() { + assert!(registry::lookup("cohere").is_some()); + assert!(registry::lookup("sambanova").is_some()); + } + + #[test] + fn test_google_native_format() { + let entry = registry::lookup("google").unwrap(); + assert_eq!(entry.api_format, ApiFormat::Google); + assert!(entry.api_base.contains("v1beta")); + assert!(!entry.api_base.contains("openai")); + } + + #[test] + fn test_auto_detect_cohere() { + let (entry, model) = auto_detect("command-r-plus") + .unwrap_or_else(|_| (registry::lookup("cohere").unwrap(), "command-r-plus")); + assert_eq!(entry.id, "cohere"); + assert_eq!(model, "command-r-plus"); + } + + #[test] + fn test_ollama_no_key_required() { + let entry = registry::lookup("ollama").unwrap(); + assert!(!entry.requires_key()); + } + + #[test] + fn test_all_providers_count() { + assert!(registry::all().len() >= 15); + } + + #[test] + fn test_provider_entry_context_window() { + let entry = registry::lookup("anthropic").unwrap(); + assert_eq!(entry.context_window("claude-sonnet-4-6"), 200_000); + assert_eq!(entry.context_window("unknown-model"), 128_000); // fallback + } +} diff --git a/third_party/cersei-provider/src/stream.rs b/third_party/cersei-provider/src/stream.rs new file mode 100644 index 0000000..afb2b18 --- /dev/null +++ b/third_party/cersei-provider/src/stream.rs @@ -0,0 +1,156 @@ +//! Stream accumulator: collects SSE stream events into a complete response. + +use cersei_types::*; +use std::collections::HashMap; + +/// Accumulates streaming events into content blocks. +pub struct StreamAccumulator { + content_blocks: Vec, + partial_text: HashMap, + partial_json: HashMap, + partial_thinking: HashMap, + block_types: HashMap, + tool_use_ids: HashMap, + tool_use_names: HashMap, + stop_reason: Option, + usage: Usage, + model: Option, + message_id: Option, +} + +impl StreamAccumulator { + pub fn new() -> Self { + Self { + content_blocks: Vec::new(), + partial_text: HashMap::new(), + partial_json: HashMap::new(), + partial_thinking: HashMap::new(), + block_types: HashMap::new(), + tool_use_ids: HashMap::new(), + tool_use_names: HashMap::new(), + stop_reason: None, + usage: Usage::default(), + model: None, + message_id: None, + } + } + + pub fn process_event(&mut self, event: StreamEvent) { + match event { + StreamEvent::MessageStart { id, model } => { + self.message_id = Some(id); + self.model = Some(model); + } + StreamEvent::ContentBlockStart { + index, + block_type, + id, + name, + } => { + self.block_types.insert(index, block_type); + if let Some(id) = id { + self.tool_use_ids.insert(index, id); + } + if let Some(name) = name { + self.tool_use_names.insert(index, name); + } + } + StreamEvent::TextDelta { index, text } => { + self.partial_text.entry(index).or_default().push_str(&text); + } + StreamEvent::InputJsonDelta { + index, + partial_json, + } => { + self.partial_json + .entry(index) + .or_default() + .push_str(&partial_json); + } + StreamEvent::ThinkingDelta { index, thinking } => { + self.partial_thinking + .entry(index) + .or_default() + .push_str(&thinking); + } + StreamEvent::ContentBlockStop { index } => { + let block_type = self.block_types.get(&index).cloned().unwrap_or_default(); + let block = match block_type.as_str() { + "text" => ContentBlock::Text { + text: self.partial_text.remove(&index).unwrap_or_default(), + }, + "tool_use" => { + let json_str = self.partial_json.remove(&index).unwrap_or_default(); + let input = + serde_json::from_str(&json_str).unwrap_or(serde_json::Value::Null); + ContentBlock::ToolUse { + id: self.tool_use_ids.remove(&index).unwrap_or_default(), + name: self.tool_use_names.remove(&index).unwrap_or_default(), + input, + } + } + "thinking" => ContentBlock::Thinking { + thinking: self.partial_thinking.remove(&index).unwrap_or_default(), + signature: String::new(), + }, + _ => ContentBlock::Text { + text: self.partial_text.remove(&index).unwrap_or_default(), + }, + }; + // Ensure we have enough slots + while self.content_blocks.len() <= index { + self.content_blocks.push(ContentBlock::Text { + text: String::new(), + }); + } + self.content_blocks[index] = block; + } + StreamEvent::MessageDelta { stop_reason, usage } => { + if let Some(sr) = stop_reason { + self.stop_reason = Some(sr); + } + if let Some(u) = usage { + self.usage.merge(&u); + } + } + StreamEvent::MessageStop => {} + StreamEvent::Ping => {} + StreamEvent::Error { .. } => {} + } + } + + pub fn into_response(self) -> Result { + let message = Message { + role: Role::Assistant, + content: if self.content_blocks.is_empty() { + MessageContent::Text(String::new()) + } else { + MessageContent::Blocks(self.content_blocks) + }, + id: self.message_id, + metadata: Some(MessageMetadata { + model: self.model, + usage: Some(self.usage.clone()), + stop_reason: self.stop_reason.clone(), + provider_data: serde_json::Value::Null, + }), + }; + + Ok(super::CompletionResponse { + message, + usage: self.usage, + stop_reason: self.stop_reason.unwrap_or(StopReason::EndTurn), + }) + } + + /// Get accumulated text so far (for streaming display). + pub fn current_text(&self) -> String { + self.partial_text.values().cloned().collect() + } +} + +impl Default for StreamAccumulator { + fn default() -> Self { + Self::new() + } +}