diff --git a/Cargo.lock b/Cargo.lock index caeb630..9476b98 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -119,6 +119,12 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + [[package]] name = "arc-swap" version = "1.7.1" @@ -941,6 +947,41 @@ dependencies = [ "shlex", ] +[[package]] +name = "cersei-provider" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac34f8f70b62c3e8cd7a7b2bf2bdfc015127a8fde80bc3d103e2ac078bb0e488" +dependencies = [ + "async-trait", + "base64 0.22.1", + "cersei-types", + "chrono", + "futures", + "reqwest 0.12.24", + "reqwest-eventsource", + "serde", + "serde_json", + "tokio", + "tracing", + "url", +] + +[[package]] +name = "cersei-types" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93b96148387c0a203671ed399209327cbfca47d5fa10425260deeaf3a0595470" +dependencies = [ + "anyhow", + "chrono", + "reqwest 0.12.24", + "serde", + "serde_json", + "thiserror 2.0.17", + "uuid", +] + [[package]] name = "cexpr" version = "0.6.0" @@ -1460,6 +1501,17 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "eventsource-stream" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab" +dependencies = [ + "futures-core", + "nom", + "pin-project-lite", +] + [[package]] name = "failsafe" version = "1.3.0" @@ -1478,6 +1530,21 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "figment" +version = "0.10.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3" +dependencies = [ + "atomic", + "parking_lot", + "pear", + "serde", + "tempfile", + "uncased", + "version_check", +] + [[package]] name = "find-msvc-tools" version = "0.1.4" @@ -1620,6 +1687,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +[[package]] +name = "futures-timer" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af43fadb8a98512d547e37b4e92e0ced13e205c061b87b4623eff01d918d6968" + [[package]] name = "futures-util" version = "0.3.31" @@ -2176,6 +2249,12 @@ dependencies = [ "serde_core", ] +[[package]] +name = "inlinable_string" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" + [[package]] name = "ipnet" version = "2.11.0" @@ -2509,6 +2588,7 @@ dependencies = [ "base64 0.22.1", "bigdecimal", "bon", + "cersei-provider", "chrono", "clap 4.5.51", "comrak", @@ -2517,6 +2597,7 @@ dependencies = [ "dialoguer", "directories", "failsafe", + "figment", "futures-core", "futures-util", "ignore", @@ -2791,6 +2872,29 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2ee67f1008b1ba2321834326597b8e186293b049a023cdef258527550b9935b4" +[[package]] +name = "pear" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467" +dependencies = [ + "inlinable_string", + "pear_codegen", + "yansi", +] + +[[package]] +name = "pear_codegen" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147" +dependencies = [ + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn 2.0.109", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -3270,6 +3374,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.109", + "version_check", + "yansi", +] + [[package]] name = "prometheus" version = "0.13.4" @@ -3595,16 +3712,34 @@ dependencies = [ "tokio", "tokio-native-tls", "tokio-rustls 0.26.4", + "tokio-util", "tower", "tower-http", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", "webpki-roots 1.0.4", ] +[[package]] +name = "reqwest-eventsource" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "632c55746dbb44275691640e7b40c907c16a2dc1a5842aa98aaec90da6ec6bde" +dependencies = [ + "eventsource-stream", + "futures-core", + "futures-timer", + "mime", + "nom", + "pin-project-lite", + "reqwest 0.12.24", + "thiserror 1.0.69", +] + [[package]] name = "ring" version = "0.17.14" @@ -4985,6 +5120,15 @@ dependencies = [ "libc", ] +[[package]] +name = "uncased" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697" +dependencies = [ + "version_check", +] + [[package]] name = "unicase" version = "2.8.1" @@ -5188,6 +5332,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "web-sys" version = "0.3.82" diff --git a/Cargo.toml b/Cargo.toml index a1c366a..def680f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,12 +30,14 @@ aws-smithy-runtime-api = "1.7.4" aws-smithy-types = "1.2.9" bigdecimal = { version = "0.4.7", features = ["serde-json"] } bon = "3.3.2" +cersei-provider = "0.1.9" chrono = "0.4.38" clap = { version = "4.3", features = ["derive", "env"] } console = "0.15.8" derive-getters = "0.5.0" dialoguer = "0.11.0" directories = "6.0" +figment = { version = "0.10", default-features = false, features = ["env"] } failsafe = "1.3.0" futures-core = "0.3.31" futures-util = "0.3.31" @@ -104,6 +106,8 @@ reqwest = { version = "0.12", default-features = false, features = [ openssl = { version = "0.10", features = ["vendored"] } [dev-dependencies] +# The `test` feature enables `figment::Jail` for hermetic env/file config tests. +figment = { version = "0.10", default-features = false, features = ["env", "test"] } pretty_assertions.workspace = true static_assertions.workspace = true tempfile = "3.8" diff --git a/guides/checks.md b/guides/checks.md index f36544c..ab7ce14 100644 --- a/guides/checks.md +++ b/guides/checks.md @@ -149,14 +149,52 @@ server the CLI runs on `localhost` (one dedicated endpoint per check). An agent that finishes **without** calling the tool fails its check. This keeps results trustworthy despite agent nondeterminism. +## ⚙️ Configuration + +The default **provider**, **model**, and **effort** are resolved from three +sources, in order of precedence (highest wins): + +1. **Flags** — `--provider`, `--model`, `--effort` on `multi check`. +2. **Environment** — `MULTI_`-prefixed vars mapped into the `checks` namespace, + e.g. `MULTI_CHECKS_MODEL`, `MULTI_CHECKS_PROVIDER`, `MULTI_CHECKS_EFFORT`. +3. **Config file** — the `[checks]` table of `MultiTool.toml` (or `.json` / + `.jsonc`), discovered up the directory tree like any MultiTool manifest. + +```toml +[checks] +provider = "anthropic" # anthropic | openai | gemini +model = "claude-sonnet-4-6" # must be a known model ID for the provider +effort = "low" # low | medium | high + +# optional, non-secret base-URL overrides per provider +[checks.providers.anthropic] +base_url = "https://..." +``` + +An unset flag contributes nothing — it never overrides a value from the +environment or file. The `model` is validated against a hardcoded allowlist of +known IDs for the selected provider; an unknown ID is a clear error. + +**Credentials are environment-only.** API keys are read directly from each +provider's native variable and never live in the config file or under the +`MULTI_` prefix: + +| Provider | API key | Base URL (optional) | +| --------- | ---------------------------------------- | -------------------- | +| Anthropic | `ANTHROPIC_API_KEY` | `ANTHROPIC_BASE_URL` | +| OpenAI | `OPENAI_API_KEY` | `OPENAI_BASE_URL` | +| Gemini | `GOOGLE_API_KEY` (or `GEMINI_API_KEY`) | `GEMINI_BASE_URL` | + +A provider is only selectable when its API key is present; selecting a provider +whose key is missing is an error. + ## ⚠️ MVP constraints - **macOS only** — copy-on-write sandboxing uses APFS `clonefile`. Linux and Windows support is planned. -- **`prompt`-type checks only** — checks run via `claude -p` against the `sonnet` - model family. A `shell` check type is planned. -- **Hardcoded configuration** — the model/provider are fixed for the MVP; there - is no environment or file-based configuration yet. +- **`prompt`-type checks only** — checks run via `claude -p` against the + configured model (the `sonnet` family by default). A `shell` check type is + planned. ## 📬 Need help? diff --git a/src/checks/config.rs b/src/checks/config.rs deleted file mode 100644 index 9488692..0000000 --- a/src/checks/config.rs +++ /dev/null @@ -1,95 +0,0 @@ -//! The configuration phase (M2 #1341). -//! -//! For the MVP this is **hardcoded** (no env/file loading), but its values are -//! **dependency-injected** forward — execution receives a [`BoxedExecutor`] built -//! from this [`Config`] rather than reading provider details at point of use. So -//! swapping providers later is a config change, not a rewrite. - -use std::time::Duration; - -use crate::checks::executor::BoxedExecutor; -use crate::checks::executor::claude::ClaudeExecutor; - -/// Effort level for the agent. Carried through configuration and logged by the -/// executor; not yet mapped to a concrete `claude -p` flag for the MVP -/// (see TODO in the executor). `Medium`/`High` are reserved for richer providers. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[allow(dead_code)] // Medium/High are reserved for future provider wiring. -pub enum Effort { - Low, - Medium, - High, -} - -/// The resolved configuration for a `multi check` run. -#[derive(Debug, Clone)] -pub struct Config { - /// Optional model-provider base URL. `None` uses the `claude` CLI default. - pub provider_url: Option, - /// The model family to run (default: the `sonnet` family). - pub model: String, - /// The effort level (default: low). - pub effort: Effort, - /// Maximum number of checks executed concurrently. - pub concurrency: usize, - /// Per-agent wall-clock timeout (reaps an agent that hangs before reporting). - pub agent_timeout: Duration, - /// How many times to (re)run a check whose agent fails to report. Agents are - /// nondeterministic and occasionally hang or finish without calling the - /// report tool; a fresh attempt against the same endpoint usually succeeds. - /// A check only resolves as errored after all attempts are exhausted. - pub max_attempts: usize, -} - -impl Config { - /// Construct the concrete [`BoxedExecutor`] from this configuration. This is - /// the injection point: execution is handed the boxed executor, never a - /// concrete type or a global. - pub fn build_executor(&self) -> BoxedExecutor { - Box::new(ClaudeExecutor::new( - self.model.clone(), - self.provider_url.clone(), - self.effort, - self.agent_timeout, - )) - } -} - -/// The configuration phase: produce the hardcoded MVP [`Config`]. -/// -/// Hardcoded to `claude -p` + the `sonnet` family. Environment/file loading is -/// explicitly out of scope (see *Future work: global model configuration*). -pub fn configuration() -> Config { - Config { - provider_url: None, - // The `sonnet` family. (The original MVP target was `haiku`, but on the - // multi-file *reasoning* checks this feature exists for, haiku reliably - // spins in a runaway exploration loop and never reaches a verdict; - // sonnet reasons efficiently and reports in well under a minute.) - model: "sonnet".to_string(), - effort: Effort::Low, - // Each check is a full `claude` agent process, killed the instant it - // reports (see execution::run_one), so they don't linger. A small fan-out - // gives each (CPU-heavy) reasoning agent enough cores to finish promptly. - concurrency: 2, - // Reaps an agent that hangs *before* reporting so the check can be - // retried. Generous: the heaviest reasoning checks can take a few minutes - // under contention before they report. - agent_timeout: Duration::from_secs(240), - max_attempts: 3, - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn defaults_are_hardcoded_and_bounded_concurrency() { - let cfg = configuration(); - assert_eq!(cfg.model, "sonnet"); - assert!(cfg.concurrency >= 1); - // The executor is constructible (DI seam works). - let _exec = cfg.build_executor(); - } -} diff --git a/src/checks/config/file.rs b/src/checks/config/file.rs new file mode 100644 index 0000000..f4b3857 --- /dev/null +++ b/src/checks/config/file.rs @@ -0,0 +1,114 @@ +//! Locating and reading the `MultiTool.toml` file layer. +//! +//! Discovery, XDG/app-root resolution, and toml/json/jsonc parsing all reuse the +//! existing [`crate::fs`] machinery — these are just three more `StaticFile` +//! marker types pointing at the same `MultiTool.*` file the legacy manifest +//! reads. We deserialise into [`RootFileConfig`], which ignores the manifest's +//! own keys, so both readers can share one file. + +use crate::fs::{DirectoryType, FileSystem, StaticFile}; + +use super::schema::RootFileConfig; + +/// The shared file stem, matching the legacy manifest so a single +/// `MultiTool.toml` carries both rollout and checks config. +const FILE_STEM: &str = "MultiTool"; + +/// `MultiTool.toml`, read as checks config. +struct TomlChecksFile; +impl StaticFile for TomlChecksFile { + type Data = RootFileConfig; + const DIR: DirectoryType = DirectoryType::ApplicationRoot; + const NAME: &'static str = FILE_STEM; + const EXTENSION: &'static str = "toml"; +} + +/// `MultiTool.json`, read as checks config. +struct JsonChecksFile; +impl StaticFile for JsonChecksFile { + type Data = RootFileConfig; + const DIR: DirectoryType = DirectoryType::ApplicationRoot; + const NAME: &'static str = FILE_STEM; + const EXTENSION: &'static str = "json"; +} + +/// `MultiTool.jsonc`, read as checks config. +struct JsoncChecksFile; +impl StaticFile for JsoncChecksFile { + type Data = RootFileConfig; + const DIR: DirectoryType = DirectoryType::ApplicationRoot; + const NAME: &'static str = FILE_STEM; + const EXTENSION: &'static str = "jsonc"; +} + +/// Load the file layer, trying toml → json → jsonc in the discovered +/// application root. A missing or unreadable file is **not** an error: the file +/// layer is optional, so we fall back to an empty [`RootFileConfig`] and let the +/// env/flag layers (and hardcoded defaults) supply the values. +pub fn load_file_layer() -> RootFileConfig { + let Ok(fs) = FileSystem::new() else { + return RootFileConfig::default(); + }; + fs.load_file(TomlChecksFile) + .or_else(|_| fs.load_file(JsonChecksFile)) + .or_else(|_| fs.load_file(JsoncChecksFile)) + .unwrap_or_default() +} + +#[cfg(test)] +mod tests { + // `figment::Jail`'s closure returns a large `Result`; unavoidable here. + #![allow(clippy::result_large_err)] + + use super::*; + use crate::checks::config::ProviderKind; + use figment::Jail; + + #[test] + fn reads_checks_table_alongside_legacy_manifest_keys() { + Jail::expect_with(|jail| { + // A single MultiTool.toml carrying both the legacy manifest's keys + // and the new `[checks]` table. The checks loader must read its + // table and ignore the rest rather than rejecting the file. + jail.create_file( + "MultiTool.toml", + r#" +workspace = "wack" +application = "multitool" + +[config.cloudflare] +project-dir = "src" + +[checks] +provider = "openai" +model = "gpt-4o" +effort = "high" + +[checks.providers.anthropic] +base_url = "https://anthropic.example" +"#, + )?; + + let cfg = load_file_layer(); + assert_eq!(cfg.checks.provider, Some(ProviderKind::OpenAi)); + assert_eq!(cfg.checks.model.as_deref(), Some("gpt-4o")); + assert_eq!( + cfg.checks.providers.base_url(ProviderKind::Anthropic), + Some("https://anthropic.example"), + ); + Ok(()) + }); + } + + #[test] + fn absent_file_yields_empty_layer() { + Jail::expect_with(|_jail| { + // Empty jail dir: no MultiTool.* present, so the file layer is empty + // (not an error) and lower-precedence defaults take over. + let cfg = load_file_layer(); + assert!(cfg.checks.provider.is_none()); + assert!(cfg.checks.model.is_none()); + Ok(()) + }); + } +} diff --git a/src/checks/config/mod.rs b/src/checks/config/mod.rs new file mode 100644 index 0000000..064870c --- /dev/null +++ b/src/checks/config/mod.rs @@ -0,0 +1,306 @@ +//! The configuration phase (M2 #1341, global config #1359). +//! +//! Resolves the global default **provider**, **model**, and **effort** from +//! three sources with standard CLI precedence — `flag > env var > config file` — +//! merged with [`figment`], and constructs a registry of ready-to-use model +//! providers for a future executor to consume. +//! +//! The resolved [`Config`] is still **dependency-injected** forward: execution +//! receives a [`BoxedExecutor`] built from it rather than reading provider +//! details at point of use. +//! +//! The execution path is out of scope here: this phase *constructs and hands +//! off* providers (see [`Resolved::providers`]); a follow-up wires them into a +//! real executor. The MVP [`ClaudeExecutor`] still runs the checks. + +mod file; +mod models; +mod providers; +mod schema; + +use std::time::Duration; + +use figment::{ + Figment, + providers::{Env, Serialized}, +}; +use miette::{Result, miette}; + +use crate::checks::executor::BoxedExecutor; +use crate::checks::executor::claude::ClaudeExecutor; + +pub use providers::ProviderRegistry; +pub use schema::{CliOverrides, Effort, ProviderKind}; + +/// Maximum number of checks executed concurrently. A small fan-out gives each +/// (CPU-heavy) reasoning agent enough cores to finish promptly. +const DEFAULT_CONCURRENCY: usize = 2; +/// Per-agent wall-clock timeout. Generous: the heaviest reasoning checks can +/// take a few minutes under contention before they report. +const DEFAULT_AGENT_TIMEOUT: Duration = Duration::from_secs(240); +/// How many times to (re)run a check whose agent fails to report. +const DEFAULT_MAX_ATTEMPTS: usize = 3; + +/// The resolved configuration for a `multi check` run. +#[derive(Debug, Clone)] +pub struct Config { + /// The selected provider. + pub provider: ProviderKind, + /// The selected provider's optional base-URL override, if configured. Passed + /// to the MVP executor as `ANTHROPIC_BASE_URL`; `None` uses the default. + pub provider_url: Option, + /// The concrete model ID to run (validated against the hardcoded allowlist). + pub model: String, + /// The effort level. + pub effort: Effort, + /// Maximum number of checks executed concurrently. + pub concurrency: usize, + /// Per-agent wall-clock timeout (reaps an agent that hangs before reporting). + pub agent_timeout: Duration, + /// How many times to (re)run a check whose agent fails to report. Agents are + /// nondeterministic and occasionally hang or finish without calling the + /// report tool; a fresh attempt against the same endpoint usually succeeds. + /// A check only resolves as errored after all attempts are exhausted. + pub max_attempts: usize, +} + +impl Config { + /// Construct the concrete [`BoxedExecutor`] from this configuration. This is + /// the injection point: execution is handed the boxed executor, never a + /// concrete type or a global. + pub fn build_executor(&self) -> BoxedExecutor { + Box::new(ClaudeExecutor::new( + self.model.clone(), + self.provider_url.clone(), + self.effort, + self.agent_timeout, + )) + } +} + +/// Merge the three config layers and extract the resolved `[checks]` table. +/// +/// Merge order (low → high) is `file → MULTI_-prefixed env → flags`, giving +/// `flag > env > file`. figment's own CLI example orders env-highest; we invert +/// to flag-highest. The flag layer only serialises values the user actually +/// passed (the CLI fields are `Option` with no clap default), so an unset +/// flag contributes nothing and does not clobber env/file. +fn resolve_layers( + file_layer: schema::RootFileConfig, + overrides: CliOverrides, +) -> Result { + let figment = Figment::new() + .merge(Serialized::defaults(file_layer)) + .merge(Env::prefixed("MULTI_").split("_")) + .merge(Serialized::defaults(overrides)); + + let root: schema::RootFileConfig = figment + .extract() + .map_err(|e| miette!("invalid checks configuration: {e}"))?; + Ok(root.checks) +} + +/// The product of the configuration phase: the resolved [`Config`] the pipeline +/// consumes, plus the constructed [`ProviderRegistry`] handed off for a future +/// executor. +pub struct Resolved { + pub config: Config, + pub providers: ProviderRegistry, +} + +/// The configuration phase: resolve provider/model/effort from file + env + +/// flags, validate the model, and build the provider registry. +/// +/// Merge order (low → high) is `file → MULTI_-prefixed env → flags`, giving +/// `flag > env > file`. Credentials are **not** part of this merge: API keys are +/// read straight from each provider's native env var at construction. +pub fn load(overrides: CliOverrides) -> Result { + let checks = resolve_layers(file::load_file_layer(), overrides)?; + + let provider = checks.provider.unwrap_or(ProviderKind::Anthropic); + let model = checks + .model + .unwrap_or_else(|| models::default_model(provider).to_string()); + let effort = checks.effort.unwrap_or(Effort::Low); + + if !models::is_valid_model(provider, &model) { + return Err(miette!( + "model `{model}` is not a valid model for provider `{}`. Valid models: {}", + provider.as_str(), + models::models_for(provider).join(", "), + )); + } + + // Build one handle per provider whose credential is present, then require + // that the *selected* provider actually resolved to an available handle. + let registry = providers::build_registry(&checks.providers)?; + if !registry.contains_key(provider.as_str()) { + return Err(miette!( + "configured provider `{}` is unavailable: set its API key ({}) in the environment", + provider.as_str(), + providers::credential_env_hint(provider), + )); + } + + let provider_url = checks.providers.base_url(provider).map(ToOwned::to_owned); + + let config = Config { + provider, + provider_url, + model, + effort, + concurrency: DEFAULT_CONCURRENCY, + agent_timeout: DEFAULT_AGENT_TIMEOUT, + max_attempts: DEFAULT_MAX_ATTEMPTS, + }; + + Ok(Resolved { + config, + providers: registry, + }) +} + +/// The hardcoded default [`Config`], with no file/env/flag loading. Used as the +/// baseline by tests and internal callers that don't need the resolved layer. +pub fn configuration() -> Config { + let provider = ProviderKind::Anthropic; + Config { + provider, + provider_url: None, + // The `sonnet` family. (The original MVP target was `haiku`, but on the + // multi-file *reasoning* checks this feature exists for, haiku reliably + // spins in a runaway exploration loop and never reaches a verdict; + // sonnet reasons efficiently and reports in well under a minute.) + model: models::default_model(provider).to_string(), + effort: Effort::Low, + concurrency: DEFAULT_CONCURRENCY, + agent_timeout: DEFAULT_AGENT_TIMEOUT, + max_attempts: DEFAULT_MAX_ATTEMPTS, + } +} + +#[cfg(test)] +mod tests { + // `figment::Jail`'s closure returns `Result<(), figment::Error>`, and that + // error is large — unavoidable given the API, so allow it in these tests. + #![allow(clippy::result_large_err)] + + use super::*; + use figment::Jail; + use schema::{ChecksSection, ProviderOverrides, ProvidersSection, RootFileConfig}; + + fn file_with(provider: ProviderKind, model: &str) -> RootFileConfig { + RootFileConfig { + checks: ChecksSection { + provider: Some(provider), + model: Some(model.to_string()), + effort: Some(Effort::Low), + providers: ProvidersSection::default(), + }, + } + } + + #[test] + fn defaults_are_hardcoded_and_bounded_concurrency() { + let cfg = configuration(); + assert_eq!(cfg.provider, ProviderKind::Anthropic); + assert_eq!(cfg.model, "claude-sonnet-4-6"); + assert!(cfg.concurrency >= 1); + // The executor is constructible (DI seam works). + let _exec = cfg.build_executor(); + } + + #[test] + fn flag_beats_file() { + Jail::expect_with(|_jail| { + let file = file_with(ProviderKind::Anthropic, "claude-haiku-4-5"); + let overrides = + CliOverrides::new(Some(ProviderKind::OpenAi), Some("gpt-4o".into()), None); + let checks = resolve_layers(file, overrides).unwrap(); + assert_eq!(checks.provider, Some(ProviderKind::OpenAi)); + assert_eq!(checks.model.as_deref(), Some("gpt-4o")); + // `effort` was unset on the flag layer, so the file value survives. + assert_eq!(checks.effort, Some(Effort::Low)); + Ok(()) + }); + } + + #[test] + fn unset_flag_does_not_clobber_file() { + Jail::expect_with(|_jail| { + let file = file_with(ProviderKind::Anthropic, "claude-haiku-4-5"); + let checks = resolve_layers(file, CliOverrides::default()).unwrap(); + assert_eq!(checks.provider, Some(ProviderKind::Anthropic)); + assert_eq!(checks.model.as_deref(), Some("claude-haiku-4-5")); + Ok(()) + }); + } + + #[test] + fn env_beats_file_and_flag_beats_env() { + Jail::expect_with(|jail| { + // `MULTI_CHECKS_MODEL` maps to `checks.model` via the `_` split. + jail.set_env("MULTI_CHECKS_MODEL", "claude-haiku-4-5"); + let file = file_with(ProviderKind::Anthropic, "claude-sonnet-4-6"); + + // Env outranks the file... + let checks = resolve_layers(file.clone(), CliOverrides::default()).unwrap(); + assert_eq!(checks.model.as_deref(), Some("claude-haiku-4-5")); + + // ...and a flag outranks env. + let overrides = CliOverrides::new(None, Some("claude-opus-4-8".into()), None); + let checks = resolve_layers(file, overrides).unwrap(); + assert_eq!(checks.model.as_deref(), Some("claude-opus-4-8")); + Ok(()) + }); + } + + #[test] + fn env_maps_provider_into_checks_namespace() { + Jail::expect_with(|jail| { + jail.set_env("MULTI_CHECKS_PROVIDER", "gemini"); + let checks = + resolve_layers(RootFileConfig::default(), CliOverrides::default()).unwrap(); + assert_eq!(checks.provider, Some(ProviderKind::Gemini)); + Ok(()) + }); + } + + #[test] + fn load_rejects_invalid_model() { + Jail::expect_with(|jail| { + // A syntactically fine but disallowed model ID. `load` validates + // against the hardcoded allowlist *before* touching the provider + // registry, so this fails deterministically without any API keys. + jail.create_file( + "MultiTool.toml", + r#" +[checks] +provider = "anthropic" +model = "claude-totally-made-up" +"#, + )?; + let err = load(CliOverrides::default()) + .err() + .expect("an invalid model must be rejected") + .to_string(); + assert!(err.contains("claude-totally-made-up"), "got: {err}"); + Ok(()) + }); + } + + #[test] + fn base_url_override_is_read_from_file() { + let providers = ProvidersSection { + anthropic: Some(ProviderOverrides { + base_url: Some("https://example.test".into()), + }), + ..Default::default() + }; + assert_eq!( + providers.base_url(ProviderKind::Anthropic), + Some("https://example.test"), + ); + assert_eq!(providers.base_url(ProviderKind::OpenAi), None); + } +} diff --git a/src/checks/config/models.rs b/src/checks/config/models.rs new file mode 100644 index 0000000..bb1d40d --- /dev/null +++ b/src/checks/config/models.rs @@ -0,0 +1,75 @@ +//! The hardcoded allowlist of valid model IDs, keyed by provider. +//! +//! The set of selectable models lives **in the binary**, not in config: a +//! configured `model` is only accepted if it appears here, and a future +//! per-check override will draw from the same table. These are the **concrete** +//! provider IDs the [`cersei_provider`] builders expect (e.g. +//! `claude-sonnet-4-6`), never the `claude` CLI's short aliases (`sonnet`). + +use super::schema::ProviderKind; + +/// Valid Anthropic model IDs. +pub const ANTHROPIC_MODELS: &[&str] = &["claude-opus-4-8", "claude-sonnet-4-6", "claude-haiku-4-5"]; + +/// Valid OpenAI model IDs. +pub const OPENAI_MODELS: &[&str] = &["gpt-4o", "gpt-4o-mini"]; + +/// Valid Gemini model IDs. +pub const GEMINI_MODELS: &[&str] = &["gemini-3.1-pro-preview"]; + +/// The default model for a provider when none is configured. Chosen to match +/// each `cersei_provider` builder's own default so behaviour is unsurprising. +pub const fn default_model(provider: ProviderKind) -> &'static str { + match provider { + ProviderKind::Anthropic => "claude-sonnet-4-6", + ProviderKind::OpenAi => "gpt-4o", + ProviderKind::Gemini => "gemini-3.1-pro-preview", + } +} + +/// The allowlist of valid model IDs for a given provider. +pub const fn models_for(provider: ProviderKind) -> &'static [&'static str] { + match provider { + ProviderKind::Anthropic => ANTHROPIC_MODELS, + ProviderKind::OpenAi => OPENAI_MODELS, + ProviderKind::Gemini => GEMINI_MODELS, + } +} + +/// Whether `model` is a valid ID for `provider`. +pub fn is_valid_model(provider: ProviderKind, model: &str) -> bool { + models_for(provider).contains(&model) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn defaults_are_in_their_own_allowlist() { + for provider in [ + ProviderKind::Anthropic, + ProviderKind::OpenAi, + ProviderKind::Gemini, + ] { + assert!( + is_valid_model(provider, default_model(provider)), + "default model for {provider:?} must be in its allowlist", + ); + } + } + + #[test] + fn rejects_cli_aliases() { + // The `claude` CLI alias `sonnet` is *not* a concrete ID and must not + // validate — config carries the real ID. + assert!(!is_valid_model(ProviderKind::Anthropic, "sonnet")); + assert!(is_valid_model(ProviderKind::Anthropic, "claude-sonnet-4-6")); + } + + #[test] + fn models_are_not_cross_provider() { + assert!(!is_valid_model(ProviderKind::OpenAi, "claude-sonnet-4-6")); + assert!(!is_valid_model(ProviderKind::Anthropic, "gpt-4o")); + } +} diff --git a/src/checks/config/providers.rs b/src/checks/config/providers.rs new file mode 100644 index 0000000..75ea421 --- /dev/null +++ b/src/checks/config/providers.rs @@ -0,0 +1,142 @@ +//! Constructing the provider registry: one ready-to-use handle per provider +//! whose credential is present. +//! +//! This is the ticket's deliverable. We build at most three handles +//! (anthropic / openai / gemini), keyed by provider name, dispatching on +//! **provider name only** — never `router::from_model_string`, whose env/prefix +//! auto-detection is exactly the guessing a config layer must avoid. cersei has +//! no per-model types, so a handle is not pinned to a model; a future executor +//! supplies the model per request. +//! +//! Credentials are read here, at construction, straight from each provider's +//! native env var — never from the config file and never under `MULTI_`. A +//! provider is added iff its credential is present; a missing key is not an +//! error (those models simply can't be selected). The caller is responsible for +//! verifying the *configured* provider ended up available. + +use std::collections::HashMap; + +use cersei_provider::{Anthropic, Gemini, OpenAi, Provider}; +use miette::{Result, miette}; + +use super::schema::{ProviderKind, ProvidersSection}; + +/// A provider name → constructed handle. At most three entries. +pub type ProviderRegistry = HashMap>; + +/// Read an env var, treating empty as unset. +fn env_nonempty(key: &str) -> Option { + std::env::var(key).ok().filter(|v| !v.is_empty()) +} + +/// The credential for `provider` from its native env var(s), if present. +fn credential(provider: ProviderKind) -> Option { + match provider { + ProviderKind::Anthropic => env_nonempty("ANTHROPIC_API_KEY"), + ProviderKind::OpenAi => env_nonempty("OPENAI_API_KEY"), + // Google's SDK convention: GOOGLE_API_KEY, falling back to GEMINI_API_KEY. + ProviderKind::Gemini => { + env_nonempty("GOOGLE_API_KEY").or_else(|| env_nonempty("GEMINI_API_KEY")) + } + } +} + +/// A human-readable hint naming the env var(s) that supply `provider`'s +/// credential, for the "provider unavailable" error. +pub const fn credential_env_hint(provider: ProviderKind) -> &'static str { + match provider { + ProviderKind::Anthropic => "ANTHROPIC_API_KEY", + ProviderKind::OpenAi => "OPENAI_API_KEY", + ProviderKind::Gemini => "GOOGLE_API_KEY or GEMINI_API_KEY", + } +} + +/// The native env var carrying `provider`'s base-URL override, if set. Used as a +/// fallback when no `[checks.providers.].base_url` is configured. +fn native_base_url(provider: ProviderKind) -> Option { + let key = match provider { + ProviderKind::Anthropic => "ANTHROPIC_BASE_URL", + ProviderKind::OpenAi => "OPENAI_BASE_URL", + ProviderKind::Gemini => "GEMINI_BASE_URL", + }; + env_nonempty(key) +} + +/// The base URL to use for `provider`: an explicit config override wins, +/// otherwise the provider's native env var, otherwise the cersei default. +fn resolve_base_url(provider: ProviderKind, overrides: &ProvidersSection) -> Option { + overrides + .base_url(provider) + .map(ToOwned::to_owned) + .or_else(|| native_base_url(provider)) +} + +/// Construct the handle for `provider` with `key`, applying the optional +/// `base_url` before `.build()`. Dispatches on provider name only. +fn build_one( + provider: ProviderKind, + key: String, + base_url: Option, +) -> Result> { + let provider: Box = match provider { + ProviderKind::Anthropic => { + let mut b = Anthropic::builder().api_key(key); + if let Some(url) = base_url { + b = b.base_url(url); + } + Box::new(b.build().map_err(|e| miette!("anthropic provider: {e}"))?) + } + ProviderKind::OpenAi => { + let mut b = OpenAi::builder().api_key(key); + if let Some(url) = base_url { + b = b.base_url(url); + } + Box::new(b.build().map_err(|e| miette!("openai provider: {e}"))?) + } + ProviderKind::Gemini => { + let mut b = Gemini::builder().api_key(key); + if let Some(url) = base_url { + b = b.base_url(url); + } + Box::new(b.build().map_err(|e| miette!("gemini provider: {e}"))?) + } + }; + Ok(provider) +} + +/// Build the registry: one handle per provider whose credential is present. +pub fn build_registry(overrides: &ProvidersSection) -> Result { + let mut registry = ProviderRegistry::new(); + for provider in [ + ProviderKind::Anthropic, + ProviderKind::OpenAi, + ProviderKind::Gemini, + ] { + let Some(key) = credential(provider) else { + continue; + }; + let base_url = resolve_base_url(provider, overrides); + let handle = build_one(provider, key, base_url)?; + registry.insert(provider.as_str().to_owned(), handle); + } + Ok(registry) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn registry_skips_providers_without_credentials() { + // We can't assume any keys are set in CI, but build_registry must never + // error purely from missing keys — absent providers are just omitted. + let overrides = ProvidersSection::default(); + let registry = build_registry(&overrides).expect("missing keys are not an error"); + // Only providers with a credential present appear; the map is a subset + // of the three known providers. + assert!(registry.len() <= 3); + for name in registry.keys() { + assert!(matches!(name.as_str(), "anthropic" | "openai" | "gemini")); + } + } +} diff --git a/src/checks/config/schema.rs b/src/checks/config/schema.rs new file mode 100644 index 0000000..8540fe1 --- /dev/null +++ b/src/checks/config/schema.rs @@ -0,0 +1,135 @@ +//! The serde/clap schema shared by the three config sources. +//! +//! The same nested shape (`{ checks: { provider, model, effort, providers } }`) +//! is produced by the file loader, figment's `Env` provider, and the CLI +//! overrides, so figment can merge them by key path with `flag > env > file` +//! precedence. See [`super::load`]. + +use clap::ValueEnum; +use serde::{Deserialize, Serialize}; + +/// A model provider. Serialises to its lowercase name (`anthropic`, `openai`, +/// `gemini`) in every source — TOML, `MULTI_CHECKS_PROVIDER`, and `--provider`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, ValueEnum)] +#[serde(rename_all = "lowercase")] +pub enum ProviderKind { + Anthropic, + #[value(name = "openai")] + OpenAi, + Gemini, +} + +impl ProviderKind { + /// The canonical lowercase provider name, used as the registry key and as + /// the `[checks.providers.]` table name. + pub const fn as_str(self) -> &'static str { + match self { + ProviderKind::Anthropic => "anthropic", + ProviderKind::OpenAi => "openai", + ProviderKind::Gemini => "gemini", + } + } +} + +/// The agent effort level. Carried through configuration and consumed by the +/// executor. `Medium`/`High` are reserved for richer providers. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ValueEnum)] +#[serde(rename_all = "lowercase")] +pub enum Effort { + Low, + Medium, + High, +} + +/// The whole config file, of which only the `[checks]` table concerns us. Other +/// top-level keys (the legacy manifest's `workspace`/`application`/`config`) are +/// ignored rather than rejected, so a single `MultiTool.toml` can carry both. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct RootFileConfig { + #[serde(default)] + pub checks: ChecksSection, +} + +/// The `[checks]` table. Every selectable field is optional so an unset value in +/// a higher-precedence layer contributes nothing to the merge. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ChecksSection { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub provider: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub model: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub effort: Option, + /// Optional, non-secret per-provider base-URL overrides. + #[serde(default)] + pub providers: ProvidersSection, +} + +/// `[checks.providers]` — at most one table per provider, each carrying an +/// optional `base_url`. Credentials never live here (env-only). +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ProvidersSection { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub anthropic: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub openai: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub gemini: Option, +} + +impl ProvidersSection { + /// The configured base-URL override for `provider`, if any. + pub fn base_url(&self, provider: ProviderKind) -> Option<&str> { + let table = match provider { + ProviderKind::Anthropic => &self.anthropic, + ProviderKind::OpenAi => &self.openai, + ProviderKind::Gemini => &self.gemini, + }; + table.as_ref().and_then(|t| t.base_url.as_deref()) + } +} + +/// The non-secret overrides for one provider. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ProviderOverrides { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub base_url: Option, +} + +/// The flag layer, fed into figment via `Serialized::defaults`. Only the values +/// the user actually passed are serialised (`skip_serializing_if`), so unset +/// flags don't clobber the env/file layers — the clap-defaults gotcha the +/// ticket calls out. This is why the corresponding CLI fields carry no +/// `default_value`. +#[derive(Debug, Clone, Default, Serialize)] +pub struct CliOverrides { + pub checks: CliChecksOverrides, +} + +/// The `[checks]` subset settable from flags. +#[derive(Debug, Clone, Default, Serialize)] +pub struct CliChecksOverrides { + #[serde(skip_serializing_if = "Option::is_none")] + pub provider: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub effort: Option, +} + +impl CliOverrides { + /// Build the flag layer from the individual CLI values. + pub fn new( + provider: Option, + model: Option, + effort: Option, + ) -> Self { + Self { + checks: CliChecksOverrides { + provider, + model, + effort, + }, + } + } +} diff --git a/src/checks/mod.rs b/src/checks/mod.rs index ab2a838..8833e05 100644 --- a/src/checks/mod.rs +++ b/src/checks/mod.rs @@ -26,8 +26,7 @@ use std::path::Path; use miette::Result; use crate::Terminal; - -pub use config::configuration; +use crate::checks::config::CliOverrides; /// Run the full `multi check` pipeline rooted at `working_dir`. /// @@ -35,9 +34,21 @@ pub use config::configuration; /// empty tree counts as success), `1` if any requirement is unsatisfied. /// Operational errors (e.g. an invalid `CHECKS.md`) surface as `Err` diagnostics /// rather than an exit code, so CI can tell "checks failed" from "tool errored". -pub async fn run(terminal: &Terminal, working_dir: &Path) -> Result { - // Phase 1: configuration (hardcoded for the MVP, injected forward). - let cfg = configuration(); +pub async fn run(terminal: &Terminal, working_dir: &Path, overrides: CliOverrides) -> Result { + // Phase 1: configuration — resolve provider/model/effort (flag > env > file) + // and construct the provider registry, injected forward. + let resolved = config::load(overrides)?; + let cfg = resolved.config; + + // The provider registry is constructed and handed off here; wiring it into a + // real executor is a follow-up. For now the MVP `ClaudeExecutor` runs the + // checks, so just record what was built. + tracing::debug!( + provider = cfg.provider.as_str(), + model = %cfg.model, + available_providers = ?resolved.providers.keys().collect::>(), + "resolved checks configuration and provider registry", + ); // Phase 2: discovery. let requirements = discovery::discover(working_dir).await?; diff --git a/src/cmd/check.rs b/src/cmd/check.rs index 0b347ed..af8de28 100644 --- a/src/cmd/check.rs +++ b/src/cmd/check.rs @@ -24,7 +24,11 @@ impl Check { // The pipeline returns a process exit code distinct from operational // errors: check *failures* exit 1 cleanly; invalid input surfaces as a // `miette` diagnostic (non-zero) instead. - let code = rt.block_on(crate::checks::run(&self.terminal, self.args.directory()))?; + let code = rt.block_on(crate::checks::run( + &self.terminal, + self.args.directory(), + self.args.overrides(), + ))?; if code != 0 { std::process::exit(code); } diff --git a/src/config/check/mod.rs b/src/config/check/mod.rs index 4e94e59..18b5cae 100644 --- a/src/config/check/mod.rs +++ b/src/config/check/mod.rs @@ -2,15 +2,31 @@ use std::path::{Path, PathBuf}; use clap::Args; +use crate::checks::config::{CliOverrides, Effort, ProviderKind}; + /// `multi check`: validate the requirements declared in `CHECKS.md` files. /// -/// Model/provider flags are intentionally absent — configuration is hardcoded -/// for the MVP (see M2). Only the working directory is configurable. +/// The model/provider/effort flags are the highest-precedence config layer +/// (`flag > env > file`). They are intentionally `Option` with **no** +/// `default_value`: an unset flag must contribute nothing to the figment merge, +/// otherwise clap's defaults would silently clobber the env/file layers. #[derive(Args, Clone)] pub struct CheckSubcommand { /// The directory to recursively scan for `CHECKS.md` files. #[arg(default_value = ".")] directory: PathBuf, + + /// The model provider to use. Overrides `checks.provider` from env/file. + #[arg(long, value_enum)] + provider: Option, + + /// The concrete model ID to run. Overrides `checks.model` from env/file. + #[arg(long)] + model: Option, + + /// The agent effort level. Overrides `checks.effort` from env/file. + #[arg(long, value_enum)] + effort: Option, } impl CheckSubcommand { @@ -18,4 +34,10 @@ impl CheckSubcommand { pub fn directory(&self) -> &Path { &self.directory } + + /// The flag layer for the config merge, carrying only the values the user + /// actually passed. + pub fn overrides(&self) -> CliOverrides { + CliOverrides::new(self.provider, self.model.clone(), self.effort) + } } diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 2c61be6..3fce21d 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -1,5 +1,4 @@ use directories::ProjectDirs; -use file::StaticFile; use miette::{Diagnostic, IntoDiagnostic, Result, miette}; use std::fs; use thiserror::Error; @@ -10,6 +9,9 @@ use std::{ }; pub(crate) use file::File; +// Re-exported so config layers outside the `fs` module (e.g. `checks::config`) +// can declare their own `StaticFile` marker types and reuse the loader/discovery. +pub(crate) use file::StaticFile; pub(crate) use manifest::application_manifest; pub(crate) use session::{Session, SessionFile, UserCreds}; pub(crate) use wrangler::{JsonWranglerFile, JsoncWranglerFile, TomlWranglerFile};