From cf5f39721bb13e4422327c6ee6b32f88bc7febb8 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 05:53:54 +1000 Subject: [PATCH 01/60] feat(serve): degrade to no-index MCP server instead of exiting on missing DB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When `.loomweave/loomweave.db` is absent, `loomweave serve` did a hard `ensure!(db_path.exists())` and exited 1 before the MCP protocol started. An MCP stdio client (Claude Code) just saw the server die at startup with the real reason buried in stderr — it read as "loomweave mcp failing" with no actionable signal. Now `serve` starts a degraded "no-index" stdio session instead: - `initialize` succeeds so the client connects cleanly; the `instructions` field leads with the run-`install`+`analyze` chirp (mirrors the SessionStart hook wording). - Every `tools/call` returns the same chirp as a tool result with `isError: true` — the load-bearing channel, since not every client surfaces `initialize.instructions`. - `tools/list` and the static `loomweave-workflow` prompt still answer so the surface looks healthy. - No HTTP read API bind, no LLM/embedding providers, no Filigree client, no ReaderPool — nothing to back them without a DB. One warn line to stderr (never stdout) at degraded startup. loomweave-mcp gains `handle_json_rpc_no_index` + `serve_stdio_no_index` plus chirp helpers; serve.rs swaps the exit for a `serve_no_index` branch. Closes clarion-ac36f51c2b. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/serve.rs | 37 ++++-- crates/loomweave-mcp/src/lib.rs | 192 ++++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+), 7 deletions(-) diff --git a/crates/loomweave-cli/src/serve.rs b/crates/loomweave-cli/src/serve.rs index 9496be06..b96da876 100644 --- a/crates/loomweave-cli/src/serve.rs +++ b/crates/loomweave-cli/src/serve.rs @@ -5,7 +5,7 @@ use std::sync::{Arc, mpsc}; use std::thread; use std::time::Duration; -use anyhow::{Context, Result, anyhow, ensure}; +use anyhow::{Context, Result, anyhow}; use loomweave_core::{ ApiEmbeddingProvider, ApiEmbeddingProviderConfig, ClaudeCliProvider, ClaudeCliProviderConfig, CodexCliProvider, CodexCliProviderConfig, EmbeddingProvider, EmbeddingProviderError, @@ -19,12 +19,13 @@ use loomweave_storage::{DEFAULT_BATCH_SIZE, DEFAULT_CHANNEL_CAPACITY, ReaderPool pub fn run(path: &Path, config_path: Option<&Path>) -> Result<()> { let db_path = path.join(".loomweave").join("loomweave.db"); - ensure!( - db_path.exists(), - "Loomweave database not found at {}; run `loomweave install --path {}` first", - db_path.display(), - path.display() - ); + if !db_path.exists() { + // No index yet. Rather than exiting 1 — which leaves the MCP client + // staring at a server that died at startup with the reason buried in + // stderr — serve a degraded stdio session that answers `initialize` and + // chirps "run analyze" from every tool call. clarion-ac36f51c2b. + return serve_no_index(path, &db_path); + } let project_root = path .canonicalize() @@ -106,6 +107,28 @@ pub fn run(path: &Path, config_path: Option<&Path>) -> Result<()> { supervise_stdio_with_http(stdio, http_server) } +/// Serve a degraded MCP stdio session for a project with no index. No HTTP read +/// API, no LLM / embedding providers, no Filigree client, no `ReaderPool` — +/// there is no DB to back any of them. The session answers `initialize` and +/// chirps "run `loomweave install` + `loomweave analyze`" from every tool call, +/// so the client connects and is told how to recover instead of seeing the +/// server exit. clarion-ac36f51c2b. +fn serve_no_index(project_root: &Path, db_path: &Path) -> Result<()> { + // Goes to stderr (the CLI's tracing sink) — never stdout, which carries the + // MCP protocol — so it lands in the MCP server log without corrupting framing. + tracing::warn!( + db = %db_path.display(), + "Loomweave has no index; serving a degraded MCP session. Run \ + `loomweave analyze` to build the graph, then reconnect." + ); + let stdin = std::io::stdin(); + let stdout = std::io::stdout(); + let mut reader = BufReader::new(stdin); + let mut writer = stdout.lock(); + loomweave_mcp::serve_stdio_no_index(project_root, &mut reader, &mut writer) + .context("serve degraded MCP stdio (no index)") +} + /// Capture the LLM policy posture for `project_status`. `live` means a provider /// that actually dispatches (`OpenRouter` / Codex / Claude CLIs); the recording /// fixture and the disabled state are not live. diff --git a/crates/loomweave-mcp/src/lib.rs b/crates/loomweave-mcp/src/lib.rs index fc29d681..3be34ed0 100644 --- a/crates/loomweave-mcp/src/lib.rs +++ b/crates/loomweave-mcp/src/lib.rs @@ -803,6 +803,112 @@ pub fn handle_json_rpc(request: &Value) -> Option { }) } +/// Actionable chirp for a project with no index. Mirrors the `SessionStart` hook +/// wording (`hook.rs`) so the operator sees the same "install then analyze" +/// sequence whether they read it from the shell or from an MCP client. Surfaced +/// both in the degraded `initialize` instructions and from every degraded +/// `tools/call` result. +fn no_index_message(project_root: &Path) -> String { + let root = project_root.display(); + format!( + "Loomweave has no index for this project yet \ +({root}/.loomweave/loomweave.db is missing), so the structural graph has not been \ +built and every Loomweave tool is unavailable. Run `loomweave install --path {root}` \ +then `loomweave analyze {root}` in a terminal to extract the entity / edge graph, \ +then reconnect this MCP server." + ) +} + +/// Degraded-mode orientation for the `initialize` `instructions` field. Distinct +/// from [`server_instructions`] (the healthy-index orientation) so the normal +/// path — and its `server_instructions_enumerate_every_tool` guard — is +/// untouched. +fn server_instructions_no_index(project_root: &Path) -> String { + format!( + "⚠ NO INDEX. {}\n\nNormally Loomweave answers \"what calls X\", \"where is X \ +defined\", \"what subsystem is X in\" from a pre-extracted graph instead of grepping \ +the tree — but it needs an index first. `tools/list` still shows the surface; any tool \ +call returns this same instruction until the index exists.", + no_index_message(project_root) + ) +} + +/// The `initialize` result for the degraded no-index server. Advertises `tools` +/// and `prompts` (the static `loomweave-workflow` prompt works without a DB) but +/// not `resources` (the `loomweave://context` resource needs the index). +fn initialize_result_no_index(project_root: &Path) -> Value { + json!({ + "protocolVersion": MCP_PROTOCOL_VERSION, + "capabilities": { "tools": {}, "prompts": {} }, + "serverInfo": { + "name": "loomweave", + "version": env!("CARGO_PKG_VERSION") + }, + "instructions": server_instructions_no_index(project_root) + }) +} + +/// JSON-RPC dispatch for the degraded "no index" stdio server: the project has +/// no `.loomweave/loomweave.db`, so there is no graph to query. `initialize` +/// succeeds (the client connects cleanly rather than seeing the server die) and +/// `tools/call` returns the actionable chirp as a tool result with +/// `isError: true` — the load-bearing channel, since not every client surfaces +/// the `initialize` `instructions`. `tools/list` and the static +/// `loomweave-workflow` prompt answer normally so the surface looks healthy. +/// clarion-ac36f51c2b. +#[must_use] +pub fn handle_json_rpc_no_index(request: &Value, project_root: &Path) -> Option { + if is_json_rpc_notification(request) { + return None; + } + let id = request.get("id").cloned().unwrap_or(Value::Null); + let Some(method) = request.get("method").and_then(Value::as_str) else { + return Some(error_response(&id, -32600, "invalid request")); + }; + + Some(match method { + "initialize" => result_response(&id, &initialize_result_no_index(project_root)), + "tools/list" => result_response( + &id, + &json!({"tools": list_tools_for_policy(McpToolPolicy::default())}), + ), + "tools/call" => result_response( + &id, + &json!({ + "content": [ + { "type": "text", "text": no_index_message(project_root) } + ], + "isError": true + }), + ), + "prompts/list" => result_response(&id, &prompts_list()), + "prompts/get" => prompts_get(&id, request.get("params")), + _ => error_response(&id, -32601, "method not found"), + }) +} + +/// Serve a degraded MCP stdio session for a project with no index. Mirrors +/// [`serve_stdio`] (synchronous — there are no storage-backed async tools to +/// drive) but routes every request through [`handle_json_rpc_no_index`]. Used by +/// `loomweave serve` when `.loomweave/loomweave.db` is absent, so the client +/// connects and is told to run analyze rather than watching the server exit. +pub fn serve_stdio_no_index( + project_root: &Path, + reader: &mut impl std::io::BufRead, + writer: &mut impl std::io::Write, +) -> Result<(), McpError> { + loop { + let Some(frame) = read_stdio_frame(reader)? else { + return Ok(()); + }; + let framing = frame.framing; + let request: Value = serde_json::from_slice(&frame.body)?; + if let Some(response) = handle_json_rpc_no_index(&request, project_root) { + write_stdio_response(writer, &encode_response_frame(&response)?, framing)?; + } + } +} + /// Deterministic, non-storage diagnostics threaded in at server construction so /// `project_status` can report the LLM policy and the resolved Filigree /// endpoint without re-reading config or re-running URL resolution. Optional: @@ -4956,6 +5062,92 @@ mod tests { } } + #[test] + fn no_index_initialize_chirps_install_and_analyze() { + let root = std::path::Path::new("/tmp/demo"); + let request = serde_json::json!({"jsonrpc": "2.0", "id": 1, "method": "initialize"}); + let response = + super::handle_json_rpc_no_index(&request, root).expect("initialize yields a response"); + assert_eq!( + response["result"]["protocolVersion"], + super::MCP_PROTOCOL_VERSION + ); + assert_eq!(response["result"]["serverInfo"]["name"], "loomweave"); + assert!(response["result"]["capabilities"]["tools"].is_object()); + let instructions = response["result"]["instructions"] + .as_str() + .expect("instructions present"); + // Both halves of the canonical hook sequence, plus the project path. + assert!( + instructions.contains("loomweave install --path /tmp/demo"), + "instructions: {instructions}" + ); + assert!( + instructions.contains("loomweave analyze /tmp/demo"), + "instructions: {instructions}" + ); + } + + #[test] + fn no_index_tools_call_returns_actionable_is_error() { + let root = std::path::Path::new("/tmp/demo"); + let request = serde_json::json!({ + "jsonrpc": "2.0", + "id": 2, + "method": "tools/call", + "params": {"name": "entity_find", "arguments": {"query": "foo"}} + }); + let response = super::handle_json_rpc_no_index(&request, root).expect("response"); + // isError is the load-bearing chirp channel — fires the moment the agent + // touches any tool, regardless of whether the client surfaced instructions. + assert_eq!(response["result"]["isError"], serde_json::json!(true)); + let text = response["result"]["content"][0]["text"] + .as_str() + .expect("tool result text"); + assert!( + text.contains("loomweave analyze /tmp/demo"), + "tool chirp text: {text}" + ); + } + + #[test] + fn no_index_tools_list_still_advertises_tools() { + let root = std::path::Path::new("/tmp/demo"); + let request = serde_json::json!({"jsonrpc": "2.0", "id": 3, "method": "tools/list"}); + let response = super::handle_json_rpc_no_index(&request, root).expect("response"); + let tools = response["result"]["tools"].as_array().expect("tools array"); + assert!( + !tools.is_empty(), + "degraded tools/list should still advertise the surface" + ); + } + + #[test] + fn no_index_ignores_notifications() { + let root = std::path::Path::new("/tmp/demo"); + // The client sends notifications/initialized right after initialize; it + // has no id and must draw no response. + let request = serde_json::json!({"jsonrpc": "2.0", "method": "notifications/initialized"}); + assert!(super::handle_json_rpc_no_index(&request, root).is_none()); + } + + #[test] + fn serve_stdio_no_index_round_trips_initialize_over_json_line() { + let root = std::path::Path::new("/tmp/demo"); + let input = b"{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"initialize\"}\n"; + let mut reader = std::io::BufReader::new(&input[..]); + let mut output = Vec::new(); + super::serve_stdio_no_index(root, &mut reader, &mut output).expect("degraded serve"); + let response: serde_json::Value = serde_json::from_slice(&output).expect("framed json"); + let instructions = response["result"]["instructions"] + .as_str() + .expect("instructions present"); + assert!( + instructions.contains("loomweave analyze /tmp/demo"), + "instructions: {instructions}" + ); + } + #[test] fn initialize_returns_server_info_and_tools_capability() { let response = super::handle_json_rpc(&serde_json::json!({ From 49a6753d6ea741d44a8dc90ec12662baa11c4c8e Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 07:39:55 +1000 Subject: [PATCH 02/60] docs(adr): ADR-044 read-API ephemeral port; stopgap 9112; wardline tooling Read-API port deconfliction (clarion-7f574bc34f): - ADR-044 (Proposed): serve publishes .loomweave/ephemeral.port with a per-project deterministic port + ephemeral fallback and a loomweave-side resolver (twin of filigree_url), so concurrent projects stop colliding on the hardcoded 9111; installer stops pinning the port. Indexed in the ADR README. - Stopgap so this project coexists with others on 9111 until the ADR lands: loomweave.yaml serve.http.bind -> 127.0.0.1:9112 and wardline.yaml loomweave.url -> :9112. Wardline tooling: - .mcp.json: drop the hardcoded --loomweave-url/--filigree-url from the wardline MCP args (resolved from wardline.yaml instead); normalize server entries. - .pre-commit-config.yaml: add a local wardline-scan hook. - .agents/skills/wardline-gate: add the wardline-gate skill pack. - .gitignore: ignore the raw wardline scan output (findings.jsonl). Co-Authored-By: Claude Opus 4.8 (1M context) --- .agents/skills/wardline-gate/SKILL.md | 65 +++++++++++ .gitignore | 3 + .mcp.json | 24 ++--- .pre-commit-config.yaml | 9 ++ ...044-read-api-ephemeral-port-publication.md | 101 ++++++++++++++++++ docs/loomweave/adr/README.md | 1 + loomweave.yaml | 2 +- wardline.yaml | 4 +- 8 files changed, 192 insertions(+), 17 deletions(-) create mode 100644 .agents/skills/wardline-gate/SKILL.md create mode 100644 docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md diff --git a/.agents/skills/wardline-gate/SKILL.md b/.agents/skills/wardline-gate/SKILL.md new file mode 100644 index 00000000..87ee134a --- /dev/null +++ b/.agents/skills/wardline-gate/SKILL.md @@ -0,0 +1,65 @@ +--- +name: wardline-gate +description: > + Use when scanning for or fixing trust-boundary / taint findings, when a + `wardline scan` reports a defect, or when wiring wardline into an agent's + edit-verify loop. Explains the scan -> explain -> fix-at-the-boundary -> + rescan cycle and the baseline-vs-waiver discipline. +--- + +# Wardline: the trust-boundary gate + +Wardline is a deterministic, whole-program static taint analyzer. It marks trust +boundaries with two decorators from `wardline.decorators`: `@external_boundary` +(untrusted data arriving from outside) and `@trusted` (a producer that must only +receive validated data). When untrusted data reaches a trusted producer it raises +`PY-WL-101` at `ERROR`. + +## The loop + +1. **Scan.** Run `wardline scan . --fail-on ERROR` (or call the `scan` MCP tool). + Read the gate verdict and the active (non-suppressed) findings — `active` is + the population the gate enforces on. +2. **Explain.** For each active defect, call `explain_taint` with the finding's + `fingerprint`, `path`+`line`, and its `qualname` as `sink_qualname`. Do this + right after the scan and before editing — a stale fingerprint returns an error. + With a Loomweave store configured, pass `chain: true` to walk the full taint + chain back to the originating boundary. +3. **Fix at the BOUNDARY, not the sink.** Add validation or rejection at the hop + where untrusted data should have been checked — not a band-aid at the sink. +4. **Re-scan.** Confirm the finding is gone. + +## Exit codes (CLI path) + +- `0` — clean (or gate not requested). +- `1` — the gate tripped: a non-suppressed defect at/above `--fail-on`. +- `2` — a wardline error (bad config, unreadable path). Not a finding. + +Branch on the code. On a trip, read the structured report wardline just wrote — +the finding names the function, file, and lines, which is enough to locate the +leak. + +## Suppression discipline + +Prefer FIXING a finding. Suppress only a finding you have judged a true +non-issue, always with a reason: + +- MCP `baseline` — snapshot current defects so only NEW findings surface. + `overwrite: false` (default) refuses to clobber an existing baseline; + `overwrite: true` re-derives it. A coarse, whole-set tool; requires a reason. +- `waiver_add` — waive ONE finding by fingerprint with a mandatory reason and an + expiry date. An audited, time-boxed exception. +- `wardline judge` (opt-in, network) — an LLM pass that labels each defect + TRUE/FALSE positive. Never runs automatically, never folded into scan; fails + loud with no API key so "couldn't triage" is never mistaken for "nothing to + triage". Above-floor false positives can be recorded as audited suppressions. + +## CLI vs MCP + +- **CLI:** `wardline scan`, `wardline judge`, `wardline baseline create/update`. + Branch on the exit code; read the findings file it writes. +- **MCP:** `wardline mcp` exposes `scan`, `explain_taint`, `fix`, `judge` + (network), `baseline`, `waiver_add`; resources + `wardline://vocab|rules|config|config-schema`; and the `wardline:loop` prompt. + The server is stateless — the read-only tools are pure functions of your code + on disk and your config. diff --git a/.gitignore b/.gitignore index 9b5e95da..5713582a 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,9 @@ tests/e2e/external-operator-smoke-results-*.md # Documentation site build output (mkdocs `site_dir`, web/mkdocs.yml). /site-build/ +# Wardline scan output written by `wardline scan` (raw, regenerated per run). +/findings.jsonl + # Filigree-managed docs — a running filigree process rewrites its managed # instruction blocks in these every session; untracked to avoid diff churn # (filigree regenerates them on demand). diff --git a/.mcp.json b/.mcp.json index 6b84d9ec..6b3c178d 100644 --- a/.mcp.json +++ b/.mcp.json @@ -1,30 +1,26 @@ { "mcpServers": { + "filigree": { + "args": [], + "command": "/home/john/.local/bin/filigree-mcp", + "type": "stdio" + }, "loomweave": { "args": [ "serve" ], - "command": "/home/john/.local/bin/loomweave", + "command": "/home/john/.local/share/uv/tools/loomweave/bin/loomweave", "env": {}, "type": "stdio" }, - "filigree": { - "args": [], - "command": "/home/john/.local/bin/filigree-mcp", - "type": "stdio" - }, "wardline": { + "type": "stdio", + "command": "/home/john/.local/bin/wardline", "args": [ "mcp", "--root", - ".", - "--loomweave-url", - "http://127.0.0.1:9111", - "--filigree-url", - "http://127.0.0.1:8542/api/weft/scan-results" - ], - "command": "/home/john/.local/bin/wardline", - "type": "stdio" + "." + ] } } } diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d4158384..44b775f4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,3 +19,12 @@ repos: args: [--strict, --config-file=plugins/python/pyproject.toml, plugins/python] additional_dependencies: - pytest>=8.0 + - repo: local + hooks: + - id: wardline-scan + name: wardline scan + entry: wardline scan + language: system + types: [python] + pass_filenames: false + diff --git a/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md new file mode 100644 index 00000000..be88c80a --- /dev/null +++ b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md @@ -0,0 +1,101 @@ +# ADR-044: Read-API Ephemeral Port Publication + +**Status**: Proposed +**Date**: 2026-06-06 +**Relates to**: [ADR-034](./ADR-034-federation-http-read-api-hardening.md) +**Tracking**: clarion-7f574bc34f + +## Context + +`loomweave serve` exposes a federation HTTP read API. Its bind address is a +static `127.0.0.1:9111` — the default (`loomweave-federation/src/config.rs`) and +the value the installer stamps into every project's `loomweave.yaml` +(`crates/loomweave-cli/src/install.rs`). Every project gets the same port. + +Consequences observed live: + +- **Bind collision.** Two `loomweave serve` instances cannot run concurrently — + whichever starts first binds 9111, the second fails with `Address already in + use`. A `legis` session held 9111 for hours while another project's serve + could not come up. +- **Consumer mis-targeting.** Consumers point at the static port too + (`wardline.yaml: loomweave.url: http://127.0.0.1:9111`). A second project's + wardline therefore reaches the *first* project's loomweave instance. ADR-034's + instance-ID guard correctly rejects the cross-project taint write + (`PROJECT_MISMATCH`) — no data is corrupted — but federation is silently dead + for the mis-targeted project. + +Loomweave already solved the *consumer* side of this problem for the sibling +direction: `loomweave-federation/src/filigree_url.rs` resolves Filigree's live +endpoint by reading `/.filigree/ephemeral.port` (Filigree publishes a +per-project, deterministic-but-unpredictable port `8400 + sha256(path) % 1000`, +atomically, present only while running; consumers read it, never compute it, and +fail soft to configured URL). Loomweave never applied the same convention to its +*own* read API. + +Picking a free port at install time does not fix this: it is TOCTOU (a port free +at install can be taken before `serve` runs, and two installs at different times +can pick the same "first free" port). That is precisely why the established +pattern publishes the live port at runtime rather than assigning it at install. + +## Decision + +Mirror Filigree's endpoint-discovery convention symmetrically for loomweave's +own read API. + +1. **Deterministic per-project port, ephemeral fallback.** `serve` binds a + per-project deterministic port derived from the canonical project path, in a + loomweave-specific band chosen to *not* overlap Filigree's `8400–9399` band + (so the two products never contend for the same number). If that port is + taken, fall back to an OS-assigned ephemeral port (`bind :0`). The + bind-and-discover primitive already exists in test form at + `crates/loomweave-cli/src/http_read.rs` and is generalized to the production + serve path. +2. **Publish the live port.** On successful bind, write the *actual* bound port + to `/.loomweave/ephemeral.port` (plain integer, atomic write, + removed on clean shutdown, present only while serving) — the loomweave twin of + `.filigree/ephemeral.port`. +3. **Loomweave-side resolver.** Add a resolver in `loomweave-federation` (the + twin of `resolve_filigree_url`) that prefers `.loomweave/ephemeral.port` over + static config and fails soft when the file is missing/corrupt. Consumers use + it: wardline's `loomweave.url`, and loomweave's own `doctor` / + `project_status_get` (which report the resolved source, mirroring how + `project_status` reports the Filigree resolution). +4. **Installer stops pinning a port.** `install` no longer stamps a fixed + `serve.http.bind: 127.0.0.1:9111`. The `loomweave.yaml` stub documents that + the read-API port is auto-selected and published; an explicit `bind` override + remains honored for operators who need a fixed port. + +`.loomweave/ephemeral.port` is a runtime artifact and is git-ignored, consistent +with ADR-005's treatment of run-time-only state. + +## Consequences + +- Two or more projects can `serve` concurrently without port contention; the + cross-project `PROJECT_MISMATCH` federation failure disappears because each + consumer resolves *its own* project's live port. +- The read-API port becomes a *read-this-file*, never a *compute-or-configure*, + fact — matching the discipline loomweave already imposes on consuming + Filigree. "Read, never compute" is the load-bearing rule: nothing should hard + code or re-derive the band formula to guess a peer's port. +- Consumers pinned to a literal `:9111` (e.g. existing `wardline.yaml` files) + must migrate to the resolver. Until they do, they fail soft to the configured + URL — degraded, not broken. +- Federation stays enrich-only and solo-useful: a project with no published port + file (serve not running, or feature disabled) degrades to the configured + `base_url`, never to a sibling-internal default. + +## Verification + +- Two serves on distinct project paths bind distinct ports and each publishes + its own `.loomweave/ephemeral.port`; neither fails to bind. +- A deterministic-port collision forces the ephemeral-`0` fallback, and the + published file reflects the *actually* bound port (not the deterministic + guess). +- The resolver prefers the published port over stale config and fails soft on + missing/corrupt/out-of-range content (twin of the `filigree_url` resolver + tests). +- The published file is removed on clean shutdown; a consumer reading a stale + file degrades rather than erroring. +- A wardline scan against a project whose loomweave serve is running on a + non-9111 port resolves and writes taint successfully (no `PROJECT_MISMATCH`). diff --git a/docs/loomweave/adr/README.md b/docs/loomweave/adr/README.md index 3bafec9d..b18cd28f 100644 --- a/docs/loomweave/adr/README.md +++ b/docs/loomweave/adr/README.md @@ -44,6 +44,7 @@ This folder is the canonical home for authored Loomweave architecture decision r | [ADR-041](./ADR-041-resume-is-idempotent-reemit.md) | Analyze resume is idempotent re-emit, not checkpoint recovery; amends ADR-005/ADR-011 resume language | Accepted | | [ADR-042](./ADR-042-hmac-freshness-and-replay-window.md) | HMAC freshness and replay window — timestamp + nonce headers, crate-backed HMAC, process-local replay cache | Accepted | | [ADR-043](./ADR-043-edge-reanalysis-replacement.md) | Edge reanalysis replacement — per-source-file anchored-edge replacement and edge metadata upsert; amends ADR-026 | Accepted | +| [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) | Read-API ephemeral port publication — per-project deterministic port + `.loomweave/ephemeral.port` + loomweave-side resolver (twin of `filigree_url`), installer stops pinning 9111; relates to ADR-034 | Proposed | ## Backlog still tracked in the detailed design diff --git a/loomweave.yaml b/loomweave.yaml index 6efcfa9f..8ffa933d 100644 --- a/loomweave.yaml +++ b/loomweave.yaml @@ -36,7 +36,7 @@ llm_policy: session_token_ceiling: 1000000 serve: http: - bind: 127.0.0.1:9111 + bind: 127.0.0.1:9112 enabled: true wardline_taint_write: true version: 1 diff --git a/wardline.yaml b/wardline.yaml index dfc57f0b..5b049343 100644 --- a/wardline.yaml +++ b/wardline.yaml @@ -1,4 +1,4 @@ -loomweave: - url: http://127.0.0.1:9111 filigree: url: http://127.0.0.1:8542/api/weft/scan-results +loomweave: + url: http://127.0.0.1:9112 From d4e04745d8202bbff3652df797f5d72b83f796de Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 07:49:54 +1000 Subject: [PATCH 03/60] docs(adr): pin ADR-044 ephemeral-port file as normative cross-product contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Incorporates the Wardline (consumer-side) review. The interop surface is the file, not loomweave's Rust resolver — Wardline implements its own Python reader against it (SEI-style "consumers conform"). Pins, as normative: - File contract: /.loomweave/ephemeral.port, plain-ASCII port only, optional trailing newline, host/scheme implied, atomic temp+rename, created on loopback bind / removed on clean shutdown. - Loopback-only publication: a non-loopback bind (allow_non_loopback, ADR-034) publishes no file, so the port-only format never under-specifies the host. - Resolution precedence (consume-time, per read): explicit flag/env > published file > configured url > none. The file self-heals stale/default config but never overrides a deliberate explicit target. - Fail-soft: validate 1..=65535; malformed or resolved-but-refused (stale file / crashed serve) degrades, never errors. Instance-ID guard (ADR-034) is the correctness backstop so the reader can be simple. - Related follow-up: consume-time resolution applies to both sibling legs; Wardline's filigree leg (install-time today) should unify Wardline-side. Tracking: clarion-7f574bc34f Co-Authored-By: Claude Opus 4.8 (1M context) --- ...044-read-api-ephemeral-port-publication.md | 122 ++++++++++++++---- docs/loomweave/adr/README.md | 2 +- 2 files changed, 100 insertions(+), 24 deletions(-) diff --git a/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md index be88c80a..28a02633 100644 --- a/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md +++ b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md @@ -41,7 +41,13 @@ pattern publishes the live port at runtime rather than assigning it at install. ## Decision Mirror Filigree's endpoint-discovery convention symmetrically for loomweave's -own read API. +own read API. The **interop surface is the file**, not loomweave's resolver: +the resolver below is loomweave's own conforming reader, but the contract that +binds siblings is `.loomweave/ephemeral.port` itself. Cross-product consumers +(notably Wardline, which is Python and cannot call a Rust resolver) implement +their own reader against the file contract — the same "this is the contract, +consumers conform" posture as the SEI token (ADR-038). The normative file +contract and resolution semantics are pinned below. 1. **Deterministic per-project port, ephemeral fallback.** `serve` binds a per-project deterministic port derived from the canonical project path, in a @@ -51,23 +57,79 @@ own read API. bind-and-discover primitive already exists in test form at `crates/loomweave-cli/src/http_read.rs` and is generalized to the production serve path. -2. **Publish the live port.** On successful bind, write the *actual* bound port - to `/.loomweave/ephemeral.port` (plain integer, atomic write, - removed on clean shutdown, present only while serving) — the loomweave twin of - `.filigree/ephemeral.port`. +2. **Publish the live port** to `/.loomweave/ephemeral.port` per the + file contract below — the loomweave twin of `.filigree/ephemeral.port`. 3. **Loomweave-side resolver.** Add a resolver in `loomweave-federation` (the - twin of `resolve_filigree_url`) that prefers `.loomweave/ephemeral.port` over - static config and fails soft when the file is missing/corrupt. Consumers use - it: wardline's `loomweave.url`, and loomweave's own `doctor` / - `project_status_get` (which report the resolved source, mirroring how - `project_status` reports the Filigree resolution). + twin of `resolve_filigree_url`) implementing the resolution semantics below. + Loomweave's own consumers use it (`doctor`, `project_status_get`, which report + the resolved source). It is *one* conforming reader, not the interop surface. 4. **Installer stops pinning a port.** `install` no longer stamps a fixed `serve.http.bind: 127.0.0.1:9111`. The `loomweave.yaml` stub documents that the read-API port is auto-selected and published; an explicit `bind` override remains honored for operators who need a fixed port. -`.loomweave/ephemeral.port` is a runtime artifact and is git-ignored, consistent -with ADR-005's treatment of run-time-only state. +## File contract (normative) + +`.loomweave/ephemeral.port` is the cross-product interop surface. Producers +(loomweave `serve`) and every consumer (loomweave, Wardline, future siblings) +conform to exactly this: + +- **Path:** `/.loomweave/ephemeral.port`, where `` + is the directory the consumer is scanning/serving (the same anchor as + `.filigree/ephemeral.port`). +- **Content:** a single plain-ASCII integer — the **TCP port only**. No host, no + scheme, no key. An optional single trailing `\n` is permitted and ignored. No + other bytes. +- **Host/scheme are implied, not stored:** `127.0.0.1` and `http`. This is sound + *only* because publication is loopback-only (next bullet); a consumer composes + `http://127.0.0.1:`. +- **Loopback-only publication.** The file is written **only when `serve` binds a + loopback address**. If an operator opts into a non-loopback bind + (`allow_non_loopback`, ADR-034), `serve` does **not** publish the file — that + deployment is explicit-config territory and consumers fall back to their + configured URL (where the operator set the reachable host). This keeps the + port-only format unambiguous and prevents a port-only reader from mis-targeting + a non-loopback host. +- **Atomic write:** write to a temp file in `.loomweave/` and `rename(2)` into + place, so a reader never observes a partial/torn value. +- **Lifecycle:** created/refreshed on successful loopback bind; removed on clean + shutdown. Present-only-while-serving is best-effort, not guaranteed — a crash + leaves a stale file, which resolution semantics handle (below). +- **Git-ignored** runtime artifact, consistent with ADR-005's treatment of + run-time-only state. + +## Resolution semantics (normative) + +Every consumer resolves **at consume time** (each scan / read), never caches the +resolution at install time — a port resolved once and reused goes stale exactly +when another project rebinds. Wardline's filigree leg, which resolves at install +time today, is the cautionary case (see related follow-up). + +**Precedence (highest wins):** + +1. An **explicit, deliberate target** — `--loomweave-url` flag or environment + override — always wins. The published port must never override a target the + operator set on purpose (remote loomweave, debugging a specific instance). +2. The **published port file** `.loomweave/ephemeral.port` (composed to + `http://127.0.0.1:`). This **beats a stale/default configured URL** so + resolution self-heals without a config edit. +3. The **configured URL** (e.g. `wardline.yaml: loomweave.url`). +4. **None** — federation is simply absent for this read; degrade, do not error. + +**Fail-soft is mandatory at every step:** + +- The port value MUST be validated to `1..=65535`. Missing, non-integer, + out-of-range, or otherwise malformed content → fall through to the next + precedence level (it is not an error). +- A **resolved-but-refused** connection (file present, but the port is closed — + crashed serve / stale file) MUST be treated as soft: fall through to configured + URL or none. This — not malformed content — is the case a live consumer hits + most, and it must never surface as a hard error. +- The instance-ID guard (ADR-034) is the **correctness backstop** that lets the + reader be simple rather than perfect: even if a stale file points at a port now + owned by *another* project's serve, the write is rejected `PROJECT_MISMATCH`, + fail-soft — a stale file degrades, never corrupts. Consumers rely on this; they + do not need to verify project identity before connecting. ## Consequences @@ -76,14 +138,15 @@ with ADR-005's treatment of run-time-only state. consumer resolves *its own* project's live port. - The read-API port becomes a *read-this-file*, never a *compute-or-configure*, fact — matching the discipline loomweave already imposes on consuming - Filigree. "Read, never compute" is the load-bearing rule: nothing should hard - code or re-derive the band formula to guess a peer's port. + Filigree. "Read, never compute" is the load-bearing rule: nothing hard codes or + re-derives the band formula to guess a peer's port. - Consumers pinned to a literal `:9111` (e.g. existing `wardline.yaml` files) - must migrate to the resolver. Until they do, they fail soft to the configured - URL — degraded, not broken. + self-heal once they prefer the published file over config (precedence 2 > 3) — + no user edit required. Until a consumer adopts the resolver it fails soft to the + configured URL — degraded, not broken. - Federation stays enrich-only and solo-useful: a project with no published port - file (serve not running, or feature disabled) degrades to the configured - `base_url`, never to a sibling-internal default. + file (serve not running, feature disabled, or non-loopback bind) degrades to + the configured `base_url`, never to a sibling-internal default. ## Verification @@ -92,10 +155,23 @@ with ADR-005's treatment of run-time-only state. - A deterministic-port collision forces the ephemeral-`0` fallback, and the published file reflects the *actually* bound port (not the deterministic guess). -- The resolver prefers the published port over stale config and fails soft on - missing/corrupt/out-of-range content (twin of the `filigree_url` resolver - tests). -- The published file is removed on clean shutdown; a consumer reading a stale - file degrades rather than erroring. +- File contract: published content is a bare port (optional trailing newline), + written via temp + rename; a non-loopback bind publishes **no** file. +- Precedence: an explicit `--loomweave-url`/env target overrides the published + file; the published file overrides a stale/default configured URL; absent file + falls through to config, then none. +- Fail-soft: missing / non-integer / out-of-range (`0`, `>65535`) content, and a + **resolved-but-refused** connection (stale file, closed port), each degrade to + the next precedence level rather than erroring. +- The published file is removed on clean shutdown. - A wardline scan against a project whose loomweave serve is running on a non-9111 port resolves and writes taint successfully (no `PROJECT_MISMATCH`). + +## Related follow-up (not blocking this ADR) + +Consume-time live-port resolution should apply to **both** sibling directions. +Wardline reads `.filigree/ephemeral.port` only at install time and uses the +static config URL at scan time, so its filigree leg carries the same latent +staleness this ADR removes for the loomweave leg. Unifying both consumers on +consume-time resolution is Wardline-side work, tracked separately; flagged here +so the two legs are not designed divergently. diff --git a/docs/loomweave/adr/README.md b/docs/loomweave/adr/README.md index b18cd28f..8421b4fb 100644 --- a/docs/loomweave/adr/README.md +++ b/docs/loomweave/adr/README.md @@ -44,7 +44,7 @@ This folder is the canonical home for authored Loomweave architecture decision r | [ADR-041](./ADR-041-resume-is-idempotent-reemit.md) | Analyze resume is idempotent re-emit, not checkpoint recovery; amends ADR-005/ADR-011 resume language | Accepted | | [ADR-042](./ADR-042-hmac-freshness-and-replay-window.md) | HMAC freshness and replay window — timestamp + nonce headers, crate-backed HMAC, process-local replay cache | Accepted | | [ADR-043](./ADR-043-edge-reanalysis-replacement.md) | Edge reanalysis replacement — per-source-file anchored-edge replacement and edge metadata upsert; amends ADR-026 | Accepted | -| [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) | Read-API ephemeral port publication — per-project deterministic port + `.loomweave/ephemeral.port` + loomweave-side resolver (twin of `filigree_url`), installer stops pinning 9111; relates to ADR-034 | Proposed | +| [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) | Read-API ephemeral port publication — `.loomweave/ephemeral.port` as a normative cross-product file contract (loopback-only, port-only, atomic) + consume-time resolution precedence (explicit > file > config > none), per-project deterministic port, installer stops pinning 9111; relates to ADR-034 | Proposed | ## Backlog still tracked in the detailed design From a0731d45ebe2914477dcbaed137bab7afd45e692 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:09:25 +1000 Subject: [PATCH 04/60] =?UTF-8?q?feat(federation):=20loomweave=5Fport=20?= =?UTF-8?q?=E2=80=94=20deterministic=20read-API=20port=20+=20atomic=20publ?= =?UTF-8?q?ish=20(ADR-044)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 --- crates/loomweave-federation/Cargo.toml | 1 + crates/loomweave-federation/src/lib.rs | 1 + .../src/loomweave_port.rs | 193 ++++++++++++++++++ 3 files changed, 195 insertions(+) create mode 100644 crates/loomweave-federation/src/loomweave_port.rs diff --git a/crates/loomweave-federation/Cargo.toml b/crates/loomweave-federation/Cargo.toml index 27006e45..2ca76605 100644 --- a/crates/loomweave-federation/Cargo.toml +++ b/crates/loomweave-federation/Cargo.toml @@ -10,6 +10,7 @@ rust-version.workspace = true workspace = true [dependencies] +blake3.workspace = true loomweave-core = { path = "../loomweave-core", version = "1.0.0" } reqwest.workspace = true serde.workspace = true diff --git a/crates/loomweave-federation/src/lib.rs b/crates/loomweave-federation/src/lib.rs index 43993c83..bd0db468 100644 --- a/crates/loomweave-federation/src/lib.rs +++ b/crates/loomweave-federation/src/lib.rs @@ -3,4 +3,5 @@ pub mod config; pub mod filigree; pub mod filigree_url; +pub mod loomweave_port; pub mod scan_results; diff --git a/crates/loomweave-federation/src/loomweave_port.rs b/crates/loomweave-federation/src/loomweave_port.rs new file mode 100644 index 00000000..bc2dcf73 --- /dev/null +++ b/crates/loomweave-federation/src/loomweave_port.rs @@ -0,0 +1,193 @@ +//! Loomweave read-API ephemeral-port contract (ADR-044). +//! +//! The twin of Filigree's `.filigree/ephemeral.port` convention, applied to +//! Loomweave's own federation HTTP read API. `serve` binds a per-project +//! deterministic port (ephemeral `:0` fallback) and publishes the *actually +//! bound* port to `/.loomweave/ephemeral.port`. Cross-product +//! consumers (notably Wardline, which is Python) read this file; nobody +//! recomputes a peer's port. The deterministic band here is an implementation +//! detail, never part of the file contract. +//! +//! File contract (ADR-044, normative): a single plain-ASCII integer TCP port, +//! optional trailing `\n`, written atomically (temp + rename), present only +//! while `serve` holds a loopback bind. Host (`127.0.0.1`) and scheme (`http`) +//! are implied, sound only because publication is loopback-only. + +use std::path::{Path, PathBuf}; + +/// Base of Loomweave's deterministic read-API port band. Chosen to sit +/// **above** Filigree's `8400–9399` band so the two products never contend for +/// the same number. Internal only — never part of the cross-product file +/// contract (consumers read the published file, never recompute). +pub const PORT_BAND_BASE: u16 = 9400; +/// Width of the band: ports land in `[9400, 10400)` i.e. `9400..=10399`. +pub const PORT_BAND_SPAN: u16 = 1000; + +/// Canonical path of the published port file for a project root. +#[must_use] +pub fn published_port_path(project_root: &Path) -> PathBuf { + project_root.join(".loomweave").join("ephemeral.port") +} + +/// Deterministic-but-unpredictable read-API port for a project, derived from +/// the canonical project path. Stable across runs (so a consumer's static +/// config can match it) yet path-specific (so two projects differ). Mirrors +/// Filigree's `8400 + hash % 1000`, in a disjoint band, using Loomweave's own +/// hash (blake3, as for SEI). The bound port is published; this computation is +/// the producer's *starting guess*, not a value any consumer recomputes. +/// +/// # Panics +/// +/// Never in practice: the `expect` calls are on infallible arithmetic +/// (`blake3` always produces 32 bytes; `% 1000 < 1000` always fits `u16`). +#[must_use] +pub fn deterministic_port(project_root: &Path) -> u16 { + // Best-effort canonicalize so every caller (serve, install, doctor) agrees + // regardless of whether it pre-canonicalized; fall back to the path as-given. + let canonical = project_root + .canonicalize() + .unwrap_or_else(|_| project_root.to_path_buf()); + let bytes = canonical.to_string_lossy(); + let hash = blake3::hash(bytes.as_bytes()); + let head = u64::from_le_bytes( + hash.as_bytes()[..8] + .try_into() + .expect("blake3 digest is 32 bytes, so [..8] is 8 bytes"), + ); + let offset = u16::try_from(head % u64::from(PORT_BAND_SPAN)) + .expect("remainder of % 1000 is < 1000, which fits u16"); + PORT_BAND_BASE + offset +} + +/// Read and validate the published port. Any missing / non-integer / +/// out-of-range / zero content folds to `None` (fail-soft, ADR-044). A `u16` +/// parse already bounds `1..=65535` except `0`, which we reject explicitly. +#[must_use] +pub fn read_published_port(project_root: &Path) -> Option { + let raw = std::fs::read_to_string(published_port_path(project_root)).ok()?; + raw.trim().parse::().ok().filter(|port| *port != 0) +} + +/// Atomically publish `port` to `/.loomweave/ephemeral.port`. +/// Writes a temp file in the same directory and `rename(2)`s it into place, so +/// a concurrent reader never observes a torn value. Creates `.loomweave/` if +/// absent. The caller is responsible for the loopback-only invariant (only call +/// this when the bound address is loopback). +/// +/// # Errors +/// Returns the underlying I/O error if the directory cannot be created or the +/// temp file cannot be written/renamed. +pub fn publish_port(project_root: &Path, port: u16) -> std::io::Result<()> { + let dir = project_root.join(".loomweave"); + std::fs::create_dir_all(&dir)?; + // One `serve` per process publishes, so the PID makes the temp name unique + // within this directory without needing a random suffix. + let tmp = dir.join(format!("ephemeral.port.{}.tmp", std::process::id())); + std::fs::write(&tmp, format!("{port}\n"))?; + std::fs::rename(&tmp, dir.join("ephemeral.port"))?; + Ok(()) +} + +/// Best-effort removal of the published port file. A missing file is not an +/// error (idempotent). Called on clean shutdown; SIGKILL leaves a stale file, +/// which `read_published_port` validation + the ADR-034 instance-ID guard +/// handle (a stale file degrades, never corrupts). +pub fn remove_published_port(project_root: &Path) { + let _ = std::fs::remove_file(published_port_path(project_root)); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn deterministic_port_is_stable_and_in_band() { + let dir = tempfile::tempdir().unwrap(); + let a = deterministic_port(dir.path()); + let b = deterministic_port(dir.path()); + assert_eq!(a, b, "same path must yield the same port"); + assert!( + (PORT_BAND_BASE..PORT_BAND_BASE + PORT_BAND_SPAN).contains(&a), + "port {a} must land in the loomweave band [{PORT_BAND_BASE}, {})", + PORT_BAND_BASE + PORT_BAND_SPAN + ); + // Disjoint from Filigree's 8400-9399 band. + assert!( + a >= 9400, + "port {a} must not overlap Filigree's 8400-9399 band" + ); + } + + #[test] + fn deterministic_port_differs_by_path() { + let a = tempfile::tempdir().unwrap(); + let b = tempfile::tempdir().unwrap(); + // Distinct tempdirs almost always hash to distinct ports; assert the + // function is path-sensitive by checking the inputs differ and the + // computation is a pure function of the (canonical) path. + assert_ne!(a.path(), b.path()); + let pa = deterministic_port(a.path()); + let pb = deterministic_port(b.path()); + // Not guaranteed distinct (1/1000 collision), but the band membership + // and determinism are what matter; assert both are in-band. + assert!(pa >= 9400 && pb >= 9400); + } + + #[test] + fn publish_then_read_round_trips() { + let dir = tempfile::tempdir().unwrap(); + publish_port(dir.path(), 9412).expect("publish"); + assert_eq!(read_published_port(dir.path()), Some(9412)); + // Published content is the bare port plus a single trailing newline. + let raw = std::fs::read_to_string(published_port_path(dir.path())).unwrap(); + assert_eq!(raw, "9412\n"); + } + + #[test] + fn publish_creates_loomweave_dir_if_absent() { + let dir = tempfile::tempdir().unwrap(); + // No .loomweave/ yet. + assert!(!dir.path().join(".loomweave").exists()); + publish_port(dir.path(), 10000).expect("publish creates .loomweave/"); + assert_eq!(read_published_port(dir.path()), Some(10000)); + } + + #[test] + fn read_tolerates_trailing_whitespace_and_newline() { + let dir = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + std::fs::write(published_port_path(dir.path()), " 9500 \n").unwrap(); + assert_eq!(read_published_port(dir.path()), Some(9500)); + } + + #[test] + fn read_rejects_malformed_zero_and_out_of_range() { + let dir = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + for bad in ["", "not-a-port", "0", "65536", "70000", "-1", "12.5"] { + std::fs::write(published_port_path(dir.path()), bad).unwrap(); + assert_eq!( + read_published_port(dir.path()), + None, + "malformed/out-of-range content {bad:?} must fold to None (fail-soft)" + ); + } + } + + #[test] + fn read_absent_file_is_none() { + let dir = tempfile::tempdir().unwrap(); + assert_eq!(read_published_port(dir.path()), None); + } + + #[test] + fn remove_is_idempotent_and_clears_the_file() { + let dir = tempfile::tempdir().unwrap(); + publish_port(dir.path(), 9999).unwrap(); + assert!(published_port_path(dir.path()).exists()); + remove_published_port(dir.path()); + assert!(!published_port_path(dir.path()).exists()); + // Second remove on an absent file is a no-op, not an error. + remove_published_port(dir.path()); + } +} From adc312218da2cd1da2f722ee436c5435099c558a Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:13:38 +1000 Subject: [PATCH 05/60] refactor(federation): loomweave_port publish uses canonical path + cleans temp on rename failure Co-Authored-By: Claude Sonnet 4.6 --- crates/loomweave-federation/src/loomweave_port.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/loomweave-federation/src/loomweave_port.rs b/crates/loomweave-federation/src/loomweave_port.rs index bc2dcf73..dfbb186b 100644 --- a/crates/loomweave-federation/src/loomweave_port.rs +++ b/crates/loomweave-federation/src/loomweave_port.rs @@ -20,7 +20,7 @@ use std::path::{Path, PathBuf}; /// the same number. Internal only — never part of the cross-product file /// contract (consumers read the published file, never recompute). pub const PORT_BAND_BASE: u16 = 9400; -/// Width of the band: ports land in `[9400, 10400)` i.e. `9400..=10399`. +/// Width of the band: ports land in `[PORT_BAND_BASE, PORT_BAND_BASE + PORT_BAND_SPAN)`. pub const PORT_BAND_SPAN: u16 = 1000; /// Canonical path of the published port file for a project root. @@ -84,7 +84,11 @@ pub fn publish_port(project_root: &Path, port: u16) -> std::io::Result<()> { // within this directory without needing a random suffix. let tmp = dir.join(format!("ephemeral.port.{}.tmp", std::process::id())); std::fs::write(&tmp, format!("{port}\n"))?; - std::fs::rename(&tmp, dir.join("ephemeral.port"))?; + if let Err(err) = std::fs::rename(&tmp, published_port_path(project_root)) { + // A successful write + failed rename would otherwise strand the temp. + let _ = std::fs::remove_file(&tmp); + return Err(err); + } Ok(()) } From 69e1ff5252866b97c7128c5c4ecf9d68ae7447a0 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:21:19 +1000 Subject: [PATCH 06/60] feat(config): serve.http.bind is Option; None auto-selects per-project port (ADR-044) Co-Authored-By: Claude Sonnet 4.6 --- Cargo.lock | 1 + crates/loomweave-cli/src/http_read.rs | 20 ++++-- crates/loomweave-federation/src/config.rs | 78 +++++++++++++++++++---- 3 files changed, 84 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 508f3d12..c6cb083e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1125,6 +1125,7 @@ dependencies = [ name = "loomweave-federation" version = "1.0.0" dependencies = [ + "blake3", "loomweave-core", "reqwest", "serde", diff --git a/crates/loomweave-cli/src/http_read.rs b/crates/loomweave-cli/src/http_read.rs index 41b4c16f..fe8ff9c4 100644 --- a/crates/loomweave-cli/src/http_read.rs +++ b/crates/loomweave-cli/src/http_read.rs @@ -244,7 +244,16 @@ where .map(|raw| raw.trim().to_owned()) .filter(|trimmed| !trimmed.is_empty()) .map(Arc::new); - let bind = config.bind; + // ADR-044: an unset bind means auto-select a per-project deterministic + // read-API port. An explicit bind is honored verbatim. (Task 3 adds the + // ephemeral fallback + published-file lifecycle.) + let auto_port = config.bind.is_none(); + let bind = config.bind.unwrap_or_else(|| { + std::net::SocketAddr::from(( + [127, 0, 0, 1], + loomweave_federation::loomweave_port::deterministic_port(&project_root), + )) + }); let warn_unauthenticated_non_loopback = config.allow_non_loopback && !config.is_loopback_bind() && auth_token.is_none() @@ -272,6 +281,7 @@ where auth_token_thread, identity_secret_thread, bind, + auto_port, shutdown_rx, ready_tx, ); @@ -327,6 +337,8 @@ fn run_http_read_server( auth_token: Option>, identity_secret: Option>, bind: std::net::SocketAddr, + // ADR-044 Task 3 will consume this to drive the ephemeral fallback + publish. + _auto_port: bool, shutdown_rx: oneshot::Receiver<()>, ready_tx: mpsc::Sender>, ) -> Result<()> { @@ -836,7 +848,7 @@ mod tests { let config = HttpReadConfig { enabled: true, - bind, + bind: Some(bind), allow_non_loopback: false, token_env: "LOOMWEAVE_LOOPBACK_NO_TOKEN_TEST_UNSET".to_owned(), identity_token_env: None, @@ -907,7 +919,7 @@ mod tests { let config = HttpReadConfig { enabled: true, - bind, + bind: Some(bind), allow_non_loopback: false, wardline_taint_write: true, ..HttpReadConfig::default() @@ -963,7 +975,7 @@ mod tests { let config = HttpReadConfig { enabled: true, - bind, + bind: Some(bind), allow_non_loopback: false, ..HttpReadConfig::default() }; diff --git a/crates/loomweave-federation/src/config.rs b/crates/loomweave-federation/src/config.rs index e3b790a5..670e07bc 100644 --- a/crates/loomweave-federation/src/config.rs +++ b/crates/loomweave-federation/src/config.rs @@ -295,8 +295,8 @@ pub struct McpServeConfig { #[serde(default)] pub struct HttpReadConfig { pub enabled: bool, - #[serde(deserialize_with = "deserialize_socket_addr")] - pub bind: SocketAddr, + #[serde(default, deserialize_with = "deserialize_optional_socket_addr")] + pub bind: Option, pub allow_non_loopback: bool, /// Name of the env var holding the inbound bearer token. When the env /// var is set, every `/api/v1/files`-family request must carry @@ -323,7 +323,7 @@ impl Default for HttpReadConfig { fn default() -> Self { Self { enabled: false, - bind: SocketAddr::from(([127, 0, 0, 1], 9111)), + bind: None, allow_non_loopback: false, token_env: "WEFT_TOKEN".to_owned(), identity_token_env: None, @@ -333,11 +333,19 @@ impl Default for HttpReadConfig { } impl HttpReadConfig { + /// # Panics + /// + /// This function cannot panic in practice: the `.expect` is only reached + /// when `is_loopback_bind()` is `false`, which only occurs when + /// `self.bind` is `Some(non-loopback addr)`. pub fn validate_loopback_trust(&self) -> Result<(), ConfigError> { if self.enabled && !self.allow_non_loopback && !self.is_loopback_bind() { return Err(ConfigError::NonLoopbackHttpBind { code: "LMWV-CONFIG-HTTP-NON-LOOPBACK", - bind: self.bind, + // Safe: is_loopback_bind() is false only when bind is Some(non-loopback). + bind: self + .bind + .expect("non-loopback bind implies an explicit address"), }); } Ok(()) @@ -347,6 +355,12 @@ impl HttpReadConfig { /// token env var is unset. Loopback binds with the env var unset stay /// unauthenticated (v0.1 trust matrix); the failure case is the explicit /// `allow_non_loopback: true` opt-in plus an unset `token_env`. + /// + /// # Panics + /// + /// This function cannot panic in practice: the `.expect` is only reached + /// when `is_loopback_bind()` is `false`, which only occurs when + /// `self.bind` is `Some(non-loopback addr)`. pub fn validate_auth_trust(&self, env_lookup: F) -> Result<(), ConfigError> where F: Fn(&str) -> Option, @@ -383,24 +397,31 @@ impl HttpReadConfig { } Err(ConfigError::NonLoopbackHttpNoAuth { code: "LMWV-CONFIG-HTTP-NO-AUTH", - bind: self.bind, + bind: self + .bind + .expect("non-loopback bind implies an explicit address"), token_env: self.token_env.clone(), }) } + /// `None` (auto-select) always binds `127.0.0.1`, so it is loopback. #[must_use] pub fn is_loopback_bind(&self) -> bool { - self.bind.ip().is_loopback() + self.bind.is_none_or(|addr| addr.ip().is_loopback()) } } -fn deserialize_socket_addr<'de, D>(deserializer: D) -> Result +fn deserialize_optional_socket_addr<'de, D>(deserializer: D) -> Result, D::Error> where D: serde::Deserializer<'de>, { - let raw = String::deserialize(deserializer)?; - raw.parse() - .map_err(|err| serde::de::Error::custom(format!("invalid serve.http.bind {raw:?}: {err}"))) + let raw = Option::::deserialize(deserializer)?; + match raw { + None => Ok(None), + Some(raw) => raw.parse().map(Some).map_err(|err| { + serde::de::Error::custom(format!("invalid serve.http.bind {raw:?}: {err}")) + }), + } } #[derive(Debug, Clone, PartialEq, Deserialize)] @@ -899,7 +920,10 @@ serve: ) .expect("parse HTTP bind"); - assert_eq!(cfg.serve.http.bind, SocketAddr::from(([127, 0, 0, 1], 0))); + assert_eq!( + cfg.serve.http.bind, + Some(SocketAddr::from(([127, 0, 0, 1], 0))) + ); } #[test] @@ -1067,6 +1091,38 @@ serve: ); } + #[test] + fn http_bind_defaults_to_none_auto_select() { + // ADR-044: the installer no longer pins a port; an unset bind means + // "auto-select a per-project deterministic port and publish it". + assert_eq!(HttpReadConfig::default().bind, None); + } + + #[test] + fn http_bind_none_is_treated_as_loopback() { + // Auto-select always binds 127.0.0.1, so an absent bind is loopback and + // must satisfy the loopback-trust gate without allow_non_loopback. + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + assert!(cfg.is_loopback_bind()); + assert!(cfg.validate_loopback_trust().is_ok()); + } + + #[test] + fn http_explicit_bind_still_parses() { + let cfg = McpConfig::from_yaml_str( + "serve:\n http:\n enabled: true\n bind: \"127.0.0.1:9412\"\n", + ) + .expect("parse explicit bind"); + assert_eq!( + cfg.serve.http.bind, + Some(SocketAddr::from(([127, 0, 0, 1], 9412))) + ); + } + #[test] fn old_anthropic_provider_shape_reports_deprecated_provider() { let err = McpConfig::from_yaml_str( From 02a7a9089e21444c7e2d934d616182606a2d12f4 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:33:50 +1000 Subject: [PATCH 07/60] refactor(config): pattern-match HttpReadConfig bind instead of expect; drop misleading # Panics docs (ADR-044) Replace the two .expect() unwraps in validate_loopback_trust and validate_auth_trust with compiler-enforced pattern matching, and delete the # Panics doc sections (a # Panics heading documenting when a method will *not* panic inverts the rustdoc convention). Behavior is identical. Also add a field doc comment to HttpReadConfig.bind and two tests: the auth-trust None path and explicit YAML-null bind parsing. Co-Authored-By: Claude Opus 4.8 --- crates/loomweave-federation/src/config.rs | 55 ++++++++++++++--------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/crates/loomweave-federation/src/config.rs b/crates/loomweave-federation/src/config.rs index 670e07bc..804e35ec 100644 --- a/crates/loomweave-federation/src/config.rs +++ b/crates/loomweave-federation/src/config.rs @@ -295,6 +295,9 @@ pub struct McpServeConfig { #[serde(default)] pub struct HttpReadConfig { pub enabled: bool, + /// Bind address for the HTTP read API. `None` (the default) auto-selects a + /// per-project deterministic port on `127.0.0.1` (ADR-044). `Some(addr)` is + /// honored verbatim (operator override). #[serde(default, deserialize_with = "deserialize_optional_socket_addr")] pub bind: Option, pub allow_non_loopback: bool, @@ -333,20 +336,15 @@ impl Default for HttpReadConfig { } impl HttpReadConfig { - /// # Panics - /// - /// This function cannot panic in practice: the `.expect` is only reached - /// when `is_loopback_bind()` is `false`, which only occurs when - /// `self.bind` is `Some(non-loopback addr)`. pub fn validate_loopback_trust(&self) -> Result<(), ConfigError> { if self.enabled && !self.allow_non_loopback && !self.is_loopback_bind() { - return Err(ConfigError::NonLoopbackHttpBind { - code: "LMWV-CONFIG-HTTP-NON-LOOPBACK", - // Safe: is_loopback_bind() is false only when bind is Some(non-loopback). - bind: self - .bind - .expect("non-loopback bind implies an explicit address"), - }); + // is_loopback_bind() is true for None, so reaching here implies Some(non-loopback). + if let Some(bind) = self.bind { + return Err(ConfigError::NonLoopbackHttpBind { + code: "LMWV-CONFIG-HTTP-NON-LOOPBACK", + bind, + }); + } } Ok(()) } @@ -355,12 +353,6 @@ impl HttpReadConfig { /// token env var is unset. Loopback binds with the env var unset stay /// unauthenticated (v0.1 trust matrix); the failure case is the explicit /// `allow_non_loopback: true` opt-in plus an unset `token_env`. - /// - /// # Panics - /// - /// This function cannot panic in practice: the `.expect` is only reached - /// when `is_loopback_bind()` is `false`, which only occurs when - /// `self.bind` is `Some(non-loopback addr)`. pub fn validate_auth_trust(&self, env_lookup: F) -> Result<(), ConfigError> where F: Fn(&str) -> Option, @@ -383,7 +375,11 @@ impl HttpReadConfig { } None => false, }; - if self.is_loopback_bind() { + // None (auto-select) always binds 127.0.0.1, so it is loopback. + let Some(bind_addr) = self.bind else { + return Ok(()); + }; + if bind_addr.ip().is_loopback() { return Ok(()); } if has_identity_secret { @@ -397,9 +393,7 @@ impl HttpReadConfig { } Err(ConfigError::NonLoopbackHttpNoAuth { code: "LMWV-CONFIG-HTTP-NO-AUTH", - bind: self - .bind - .expect("non-loopback bind implies an explicit address"), + bind: bind_addr, token_env: self.token_env.clone(), }) } @@ -1123,6 +1117,23 @@ serve: ); } + #[test] + fn http_bind_none_passes_auth_trust_validation() { + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + assert!(cfg.validate_auth_trust(|_| None).is_ok()); + } + + #[test] + fn http_bind_explicit_null_is_treated_as_auto_select() { + let cfg = McpConfig::from_yaml_str("serve:\n http:\n enabled: true\n bind: ~\n") + .expect("explicit YAML null should parse as auto-select"); + assert_eq!(cfg.serve.http.bind, None); + } + #[test] fn old_anthropic_provider_shape_reports_deprecated_provider() { let err = McpConfig::from_yaml_str( From ba8233a6483056f9c85c88596c82fe876f791d7b Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:38:10 +1000 Subject: [PATCH 08/60] feat(serve): auto-select read-API port with ephemeral fallback; publish .loomweave/ephemeral.port (ADR-044) Co-Authored-By: Claude Opus 4.8 --- crates/loomweave-cli/src/http_read.rs | 167 +++++++++++++++++++++++++- 1 file changed, 165 insertions(+), 2 deletions(-) diff --git a/crates/loomweave-cli/src/http_read.rs b/crates/loomweave-cli/src/http_read.rs index fe8ff9c4..252259b1 100644 --- a/crates/loomweave-cli/src/http_read.rs +++ b/crates/loomweave-cli/src/http_read.rs @@ -55,6 +55,20 @@ static HTTP_ERROR_DISPATCH: LazyLock = LazyLock::new(|| { tracing::Dispatch::new(subscriber) }); +/// Removes the published `.loomweave/ephemeral.port` on drop — covering +/// graceful shutdown, error return, and panic-unwind in one place. Only +/// SIGKILL can strand a stale file, which the read-side validation and the +/// ADR-034 instance-ID guard tolerate (a stale file degrades, never corrupts). +struct PublishedPortGuard { + project_root: PathBuf, +} + +impl Drop for PublishedPortGuard { + fn drop(&mut self) { + loomweave_federation::loomweave_port::remove_published_port(&self.project_root); + } +} + #[derive(Debug)] pub struct HttpReadServer { shutdown: Option>, @@ -337,8 +351,9 @@ fn run_http_read_server( auth_token: Option>, identity_secret: Option>, bind: std::net::SocketAddr, - // ADR-044 Task 3 will consume this to drive the ephemeral fallback + publish. - _auto_port: bool, + // ADR-044: when true (bind auto-selected), an `AddrInUse` falls back to an + // OS-assigned ephemeral port; an explicit operator bind never falls back. + auto_port: bool, shutdown_rx: oneshot::Receiver<()>, ready_tx: mpsc::Sender>, ) -> Result<()> { @@ -349,8 +364,22 @@ fn run_http_read_server( let readers_identity = readers.identity().clone(); let runtime = build_http_runtime()?; runtime.block_on(async move { + // ADR-044: auto-selected ports fall back to an OS-assigned ephemeral + // port if the deterministic port is taken; an explicit operator bind + // does NOT fall back (a taken explicit port is a hard error). let listener = match tokio::net::TcpListener::bind(bind).await { Ok(listener) => listener, + Err(err) if auto_port && err.kind() == std::io::ErrorKind::AddrInUse => { + let fallback = std::net::SocketAddr::from(([127, 0, 0, 1], 0)); + match tokio::net::TcpListener::bind(fallback).await { + Ok(listener) => listener, + Err(err) => { + let _ = ready_tx + .send(Err(anyhow!("bind HTTP read API ephemeral fallback: {err}"))); + return Err(anyhow!("bind HTTP read API ephemeral fallback: {err}")); + } + } + } Err(err) => { let _ = ready_tx.send(Err(anyhow!("bind HTTP read API on {bind}: {err}"))); return Err(anyhow!("bind HTTP read API on {bind}: {err}")); @@ -363,6 +392,29 @@ fn run_http_read_server( return Err(anyhow!("read HTTP read API local addr: {err}")); } }; + // Publish the ACTUALLY-bound port loopback-only (ADR-044 file contract). + // A non-loopback bind publishes NO file — consumers fall back to their + // configured URL. The guard unlinks the file when this scope unwinds. + let _published_port_guard = if local_addr.ip().is_loopback() { + if let Err(err) = + loomweave_federation::loomweave_port::publish_port(&project_root, local_addr.port()) + { + // Publication is best-effort enrichment: a failure to write the + // discovery file must not take the read API down. + tracing::warn!( + error = %err, + port = local_addr.port(), + "failed to publish .loomweave/ephemeral.port; consumers will fall back to configured URL" + ); + None + } else { + Some(PublishedPortGuard { + project_root: project_root.clone(), + }) + } + } else { + None + }; let _ = ready_tx.send(Ok(HttpReadReady { local_addr, readers_identity, @@ -1018,6 +1070,117 @@ mod tests { ); } + /// ADR-044: with `bind: None`, two serves on distinct project paths each + /// bind their own deterministic port and publish their own + /// `.loomweave/ephemeral.port`. Neither fails to bind. + #[test] + fn auto_port_publishes_distinct_ports_per_project() { + use loomweave_federation::config::HttpReadConfig; + use loomweave_federation::loomweave_port::read_published_port; + use loomweave_storage::ReaderPool; + + let _guard = http_runtime_test_guard(); + + let make = |id: &str| { + let dir = tempfile::tempdir().expect("tempdir"); + let db = dir.path().join("loomweave.db"); + let readers = ReaderPool::open(&db, 4).expect("reader pool"); + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + let iid = crate::instance::parse_instance_id_for_test(id).expect("iid"); + let server = spawn(dir.path().to_path_buf(), db, readers, iid, &cfg) + .expect("spawn") + .expect("enabled => Some"); + (dir, server) + }; + + let (dir_a, server_a) = make("00000000-0000-4000-8000-0000000000a1"); + let (dir_b, server_b) = make("00000000-0000-4000-8000-0000000000a2"); + + let port_a = read_published_port(dir_a.path()).expect("a published a port"); + let port_b = read_published_port(dir_b.path()).expect("b published a port"); + assert!( + port_a >= 9400 && port_b >= 9400, + "ports in the loomweave band" + ); + // Two live servers => two live ports => they cannot be equal. + assert_ne!(port_a, port_b, "concurrent serves must hold distinct ports"); + + server_a.shutdown().expect("shutdown a"); + server_b.shutdown().expect("shutdown b"); + } + + /// The published file is removed on clean shutdown. + #[test] + fn auto_port_file_removed_on_clean_shutdown() { + use loomweave_federation::config::HttpReadConfig; + use loomweave_federation::loomweave_port::{published_port_path, read_published_port}; + use loomweave_storage::ReaderPool; + + let _guard = http_runtime_test_guard(); + + let dir = tempfile::tempdir().expect("tempdir"); + let db = dir.path().join("loomweave.db"); + let readers = ReaderPool::open(&db, 4).expect("reader pool"); + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + let iid = + crate::instance::parse_instance_id_for_test("00000000-0000-4000-8000-0000000000a3") + .expect("iid"); + let server = spawn(dir.path().to_path_buf(), db, readers, iid, &cfg) + .expect("spawn") + .expect("enabled => Some"); + + assert!( + read_published_port(dir.path()).is_some(), + "published while serving" + ); + server.shutdown().expect("shutdown"); + assert!( + !published_port_path(dir.path()).exists(), + "published port file must be gone after clean shutdown" + ); + } + + /// An explicit (operator-set) bind that is already in use is a HARD error — + /// the operator asked for that specific port. Only auto-select falls back. + #[test] + fn explicit_bind_in_use_is_a_hard_error() { + use loomweave_federation::config::HttpReadConfig; + use loomweave_storage::ReaderPool; + use std::net::{SocketAddr, TcpListener}; + + let _guard = http_runtime_test_guard(); + + // Hold a real listener so the address is genuinely occupied. + let held = TcpListener::bind(("127.0.0.1", 0)).expect("hold a port"); + let bind: SocketAddr = held.local_addr().expect("addr"); + + let dir = tempfile::tempdir().expect("tempdir"); + let db = dir.path().join("loomweave.db"); + let readers = ReaderPool::open(&db, 4).expect("reader pool"); + let cfg = HttpReadConfig { + enabled: true, + bind: Some(bind), + ..HttpReadConfig::default() + }; + let iid = + crate::instance::parse_instance_id_for_test("00000000-0000-4000-8000-0000000000a4") + .expect("iid"); + + let result = spawn(dir.path().to_path_buf(), db, readers, iid, &cfg); + assert!( + result.is_err(), + "an explicit in-use bind must fail, not silently fall back to :0" + ); + } + // ---------------------------------------------------------------------- // W.3 taint-fact READ endpoints (GET + :batch-get). // ---------------------------------------------------------------------- From 0b4df33e512f15b6bf9c29cd3052be309d2eedba Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:47:31 +1000 Subject: [PATCH 09/60] test(serve): cover ephemeral fallback when deterministic read-API port is taken (ADR-044) Co-Authored-By: Claude Opus 4.8 --- crates/loomweave-cli/src/http_read.rs | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/crates/loomweave-cli/src/http_read.rs b/crates/loomweave-cli/src/http_read.rs index 252259b1..3cdf4e7c 100644 --- a/crates/loomweave-cli/src/http_read.rs +++ b/crates/loomweave-cli/src/http_read.rs @@ -1181,6 +1181,46 @@ mod tests { ); } + /// The headline ADR-044 behavior: when the AUTO-selected deterministic port + /// is already taken, serve falls back to an OS-assigned ephemeral port and + /// publishes the *actually* bound port (not the deterministic guess). + #[test] + fn auto_port_falls_back_to_ephemeral_when_deterministic_taken() { + use loomweave_federation::config::HttpReadConfig; + use loomweave_federation::loomweave_port::{deterministic_port, read_published_port}; + use loomweave_storage::ReaderPool; + use std::net::TcpListener; + + let _guard = http_runtime_test_guard(); + + let dir = tempfile::tempdir().expect("tempdir"); + // Occupy this project's deterministic port so the auto bind must fall back. + let det = deterministic_port(dir.path()); + let _held = TcpListener::bind(("127.0.0.1", det)).expect("hold deterministic port"); + + let db = dir.path().join("loomweave.db"); + let readers = ReaderPool::open(&db, 4).expect("reader pool"); + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + let iid = + crate::instance::parse_instance_id_for_test("00000000-0000-4000-8000-0000000000a5") + .expect("iid"); + + let server = spawn(dir.path().to_path_buf(), db, readers, iid, &cfg) + .expect("spawn must succeed via ephemeral fallback") + .expect("enabled => Some"); + + let published = read_published_port(dir.path()).expect("published a port"); + assert_ne!( + published, det, + "fallback must publish the ephemeral port actually bound, not the taken deterministic one" + ); + server.shutdown().expect("shutdown"); + } + // ---------------------------------------------------------------------- // W.3 taint-fact READ endpoints (GET + :batch-get). // ---------------------------------------------------------------------- From c102297950f642d186008c4bbf70f49a08c489e3 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:50:11 +1000 Subject: [PATCH 10/60] feat(install): YAML stub no longer pins serve.http.bind 9111 (ADR-044) Co-Authored-By: Claude Sonnet 4.6 --- crates/loomweave-cli/src/install.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/loomweave-cli/src/install.rs b/crates/loomweave-cli/src/install.rs index 7a1ff576..626654c6 100644 --- a/crates/loomweave-cli/src/install.rs +++ b/crates/loomweave-cli/src/install.rs @@ -75,7 +75,9 @@ serve: enable_write_tools: false http: enabled: false - bind: 127.0.0.1:9111 + # The read-API port is auto-selected per project (deterministic, with an + # ephemeral fallback) and published to .loomweave/ephemeral.port while + # serving. Set `bind:` explicitly only to pin a fixed port (ADR-044). "; const GITIGNORE_CONTENTS: &str = "\ From 7cf778288dfab6b0795803d8e65dad4ced47406c Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:54:11 +1000 Subject: [PATCH 11/60] feat(install): integration bindings use per-project deterministic loomweave URL, no fixed bind (ADR-044) Co-Authored-By: Claude Opus 4.8 --- .../loomweave-cli/src/integration_bindings.rs | 21 ++++++++++++------- crates/loomweave-cli/tests/doctor.rs | 7 ++++++- crates/loomweave-cli/tests/install.rs | 12 ++++++++--- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/crates/loomweave-cli/src/integration_bindings.rs b/crates/loomweave-cli/src/integration_bindings.rs index 13825548..86008f4a 100644 --- a/crates/loomweave-cli/src/integration_bindings.rs +++ b/crates/loomweave-cli/src/integration_bindings.rs @@ -12,8 +12,6 @@ use std::path::{Path, PathBuf}; use anyhow::{Context, Result, bail}; use serde_json::{Map, Value, json}; -const LOOMWEAVE_HTTP_BIND: &str = "127.0.0.1:9111"; -const LOOMWEAVE_HTTP_URL: &str = "http://127.0.0.1:9111"; const DEFAULT_FILIGREE_BASE_URL: &str = "http://127.0.0.1:8766"; #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -23,9 +21,13 @@ pub enum BindingState { Unparseable, } +// All three fields are URLs by nature; the `_url` suffix is the meaningful part +// of each name, not redundant noise. +#[allow(clippy::struct_field_names)] struct DesiredBindings { filigree_base_url: String, wardline_filigree_url: String, + loomweave_url: String, } /// Classify the local three-way integration binding files without writing. @@ -67,9 +69,16 @@ fn desired_bindings(project_root: &Path) -> DesiredBindings { "{}/api/weft/scan-results", filigree_base_url.trim_end_matches('/') ); + // ADR-044: seed the consumer's static target with this project's + // deterministic read-API port. serve binds the same port (barring an + // ephemeral fallback), and the published .loomweave/ephemeral.port file + // overrides this at runtime once a consumer resolves consume-time. + let port = loomweave_federation::loomweave_port::deterministic_port(project_root); + let loomweave_url = format!("http://127.0.0.1:{port}"); DesiredBindings { filigree_base_url, wardline_filigree_url, + loomweave_url, } } @@ -114,7 +123,6 @@ fn loomweave_yaml_ok(project_root: &Path, desired: &DesiredBindings) -> Result Result Res let serve = ensure_object(root, "serve")?; let http = ensure_object(serve, "http")?; http.insert("enabled".to_owned(), json!(true)); - http.insert("bind".to_owned(), json!(LOOMWEAVE_HTTP_BIND)); http.insert("wardline_taint_write".to_owned(), json!(true)); write_yaml_if_changed(&path, &value) } @@ -191,7 +198,7 @@ fn install_wardline_yaml(project_root: &Path, desired: &DesiredBindings) -> Resu let mut value = read_yaml_value_or_empty(&path)?; let root = object_mut(&mut value, &path)?; let loomweave = ensure_object(root, "loomweave")?; - loomweave.insert("url".to_owned(), json!(LOOMWEAVE_HTTP_URL)); + loomweave.insert("url".to_owned(), json!(desired.loomweave_url)); let filigree = ensure_object(root, "filigree")?; filigree.insert("url".to_owned(), json!(desired.wardline_filigree_url)); write_yaml_if_changed(&path, &value) @@ -246,7 +253,7 @@ fn desired_wardline_args(desired: &DesiredBindings) -> Value { "--root", ".", "--loomweave-url", - LOOMWEAVE_HTTP_URL, + desired.loomweave_url, "--filigree-url", desired.wardline_filigree_url ]) diff --git a/crates/loomweave-cli/tests/doctor.rs b/crates/loomweave-cli/tests/doctor.rs index 4fff01d9..c848e4b5 100644 --- a/crates/loomweave-cli/tests/doctor.rs +++ b/crates/loomweave-cli/tests/doctor.rs @@ -211,6 +211,11 @@ fn doctor_fix_repairs_missing_three_way_integration_bindings() { "http://127.0.0.1:8749/api/weft/scan-results" ); + let expected_port = loomweave_federation::loomweave_port::deterministic_port( + &dir.path().canonicalize().unwrap(), + ); + let expected_loomweave_url = format!("http://127.0.0.1:{expected_port}"); + let mcp: serde_json::Value = serde_json::from_str(&fs::read_to_string(dir.path().join(".mcp.json")).unwrap()).unwrap(); assert_eq!( @@ -220,7 +225,7 @@ fn doctor_fix_repairs_missing_three_way_integration_bindings() { "--root", ".", "--loomweave-url", - "http://127.0.0.1:9111", + expected_loomweave_url, "--filigree-url", "http://127.0.0.1:8749/api/weft/scan-results" ]) diff --git a/crates/loomweave-cli/tests/install.rs b/crates/loomweave-cli/tests/install.rs index dce89f54..ca4f9a6b 100644 --- a/crates/loomweave-cli/tests/install.rs +++ b/crates/loomweave-cli/tests/install.rs @@ -92,14 +92,20 @@ fn install_all_wires_three_way_integration_bindings() { loomweave_yaml["serve"]["http"]["enabled"], serde_json::json!(true) ); - assert_eq!(loomweave_yaml["serve"]["http"]["bind"], "127.0.0.1:9111"); + // ADR-044: no fixed bind is written; the port is auto-selected at serve time. + assert!(loomweave_yaml["serve"]["http"].get("bind").is_none()); assert_eq!( loomweave_yaml["serve"]["http"]["wardline_taint_write"], serde_json::json!(true) ); + let expected_port = loomweave_federation::loomweave_port::deterministic_port( + &dir.path().canonicalize().unwrap(), + ); + let expected_loomweave_url = format!("http://127.0.0.1:{expected_port}"); + let wardline_yaml = read_yaml(&dir.path().join("wardline.yaml")); - assert_eq!(wardline_yaml["loomweave"]["url"], "http://127.0.0.1:9111"); + assert_eq!(wardline_yaml["loomweave"]["url"], expected_loomweave_url); assert_eq!( wardline_yaml["filigree"]["url"], "http://127.0.0.1:8749/api/weft/scan-results" @@ -114,7 +120,7 @@ fn install_all_wires_three_way_integration_bindings() { "--root", ".", "--loomweave-url", - "http://127.0.0.1:9111", + expected_loomweave_url, "--filigree-url", "http://127.0.0.1:8749/api/weft/scan-results" ]) From efd2285c9ae6f7cd06f7e0cdd5167e61bdf4e74d Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:03:18 +1000 Subject: [PATCH 12/60] fix(install): strip stale auto-stamped serve.http.bind 127.0.0.1:9111 on repair (ADR-044) Older `install --all` runs unconditionally stamped a fixed bind. Task 5 stopped writing it but left existing stamps in place, so re-install kept `bind: 9111`, serve honored it verbatim (no auto-port), and the collision returned invisibly (loomweave_yaml_ok no longer inspected bind). Strip exactly the old auto-default literal on repair and treat its presence as not-ok so doctor/binding_state flags and fixes it. Operator-chosen binds (any other value) are preserved. Co-Authored-By: Claude Opus 4.8 --- .../loomweave-cli/src/integration_bindings.rs | 14 ++++++ crates/loomweave-cli/tests/install.rs | 43 +++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/crates/loomweave-cli/src/integration_bindings.rs b/crates/loomweave-cli/src/integration_bindings.rs index 86008f4a..0a232ff4 100644 --- a/crates/loomweave-cli/src/integration_bindings.rs +++ b/crates/loomweave-cli/src/integration_bindings.rs @@ -14,6 +14,13 @@ use serde_json::{Map, Value, json}; const DEFAULT_FILIGREE_BASE_URL: &str = "http://127.0.0.1:8766"; +/// ADR-044 migration: older `install --all` runs unconditionally stamped a fixed +/// `serve.http.bind: 127.0.0.1:9111`. The deterministic read-API band is +/// `9400–10399`, so this exact literal can only be the old auto-default, never a +/// deterministic value. We strip it on repair so auto-port + ephemeral fallback +/// can engage; any other (operator-chosen) bind is left intact. +const STALE_DEFAULT_BIND: &str = "127.0.0.1:9111"; + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BindingState { Present, @@ -124,6 +131,7 @@ fn loomweave_yaml_ok(project_root: &Path, desired: &DesiredBindings) -> Result Res let serve = ensure_object(root, "serve")?; let http = ensure_object(serve, "http")?; + // ADR-044 migration: strip exactly the old auto-stamped `bind: 127.0.0.1:9111` + // so auto-port + ephemeral fallback can engage. A deliberately operator-chosen + // bind (any other value) is left intact. + if http.get("bind").and_then(Value::as_str) == Some(STALE_DEFAULT_BIND) { + http.remove("bind"); + } http.insert("enabled".to_owned(), json!(true)); http.insert("wardline_taint_write".to_owned(), json!(true)); write_yaml_if_changed(&path, &value) diff --git a/crates/loomweave-cli/tests/install.rs b/crates/loomweave-cli/tests/install.rs index ca4f9a6b..3a73c3e3 100644 --- a/crates/loomweave-cli/tests/install.rs +++ b/crates/loomweave-cli/tests/install.rs @@ -127,6 +127,49 @@ fn install_all_wires_three_way_integration_bindings() { ); } +/// ADR-044 migration: a project whose `loomweave.yaml` still carries the old +/// auto-stamped `serve.http.bind: 127.0.0.1:9111` has that exact literal stripped +/// on re-install, so auto-port + ephemeral fallback engages. A deliberately +/// operator-chosen bind (any other value) is preserved verbatim. +#[test] +fn install_all_strips_stale_default_bind_but_keeps_custom_bind() { + // Case 1: the stale auto-default is stripped. + let stale = tempfile::tempdir().unwrap(); + fs::write( + stale.path().join("loomweave.yaml"), + "version: 1\nserve:\n http:\n enabled: true\n bind: 127.0.0.1:9111\n wardline_taint_write: true\n", + ) + .unwrap(); + loomweave_bin() + .args(["install", "--all", "--path"]) + .arg(stale.path()) + .assert() + .success(); + let stale_yaml = read_yaml(&stale.path().join("loomweave.yaml")); + assert!( + stale_yaml["serve"]["http"].get("bind").is_none(), + "stale 127.0.0.1:9111 bind must be stripped on re-install: {stale_yaml}" + ); + + // Case 2: a deliberately custom bind is preserved. + let custom = tempfile::tempdir().unwrap(); + fs::write( + custom.path().join("loomweave.yaml"), + "version: 1\nserve:\n http:\n enabled: true\n bind: 127.0.0.1:9999\n wardline_taint_write: true\n", + ) + .unwrap(); + loomweave_bin() + .args(["install", "--all", "--path"]) + .arg(custom.path()) + .assert() + .success(); + let custom_yaml = read_yaml(&custom.path().join("loomweave.yaml")); + assert_eq!( + custom_yaml["serve"]["http"]["bind"], "127.0.0.1:9999", + "an operator-chosen bind must be preserved: {custom_yaml}" + ); +} + #[test] fn install_applies_each_migration_exactly_once() { let dir = tempfile::tempdir().unwrap(); From 461614eb0566a2ecbb50bc0d0c411a779fd38213 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:07:41 +1000 Subject: [PATCH 13/60] feat(doctor): resolve_loomweave_url + doctor reports live published read-API port (ADR-044) Co-Authored-By: Claude Opus 4.8 --- crates/loomweave-cli/src/doctor.rs | 27 ++++- crates/loomweave-cli/tests/doctor.rs | 24 +++++ crates/loomweave-federation/src/lib.rs | 1 + .../loomweave-federation/src/loomweave_url.rs | 99 +++++++++++++++++++ 4 files changed, 148 insertions(+), 3 deletions(-) create mode 100644 crates/loomweave-federation/src/loomweave_url.rs diff --git a/crates/loomweave-cli/src/doctor.rs b/crates/loomweave-cli/src/doctor.rs index aa6b0f96..8fbb9dc7 100644 --- a/crates/loomweave-cli/src/doctor.rs +++ b/crates/loomweave-cli/src/doctor.rs @@ -404,16 +404,37 @@ fn check_http_config_json(project_root: &Path) -> DoctorJsonCheck { .and_then(|http| http.get("enabled")) .and_then(Value::as_bool) == Some(true); + if !enabled { + return DoctorJsonCheck::warning( + "http.config", + "HTTP serve config is disabled or incomplete", + ); + } + // ADR-044: prefer the live published port over the (now usually absent) + // static bind. A running serve publishes .loomweave/ephemeral.port. + let resolution = loomweave_federation::loomweave_url::resolve_loomweave_url(None, project_root); + if let Some(url) = resolution.resolved_url { + return DoctorJsonCheck::ok( + "http.config", + format!("HTTP read API published on {url} ({})", resolution.source), + ); + } let bind = config .get("serve") .and_then(|serve| serve.get("http")) .and_then(|http| http.get("bind")) .and_then(Value::as_str) .unwrap_or(""); - if enabled && !bind.trim().is_empty() { - DoctorJsonCheck::ok("http.config", format!("HTTP configured on {bind}")) + if bind.trim().is_empty() { + DoctorJsonCheck::ok( + "http.config", + "HTTP enabled; read-API port auto-selected and published to .loomweave/ephemeral.port while serving", + ) } else { - DoctorJsonCheck::warning("http.config", "HTTP serve config is disabled or incomplete") + DoctorJsonCheck::ok( + "http.config", + format!("HTTP configured on {bind} (auto-published while serving)"), + ) } } diff --git a/crates/loomweave-cli/tests/doctor.rs b/crates/loomweave-cli/tests/doctor.rs index c848e4b5..5a389027 100644 --- a/crates/loomweave-cli/tests/doctor.rs +++ b/crates/loomweave-cli/tests/doctor.rs @@ -391,3 +391,27 @@ fn doctor_flags_untrusted_mcp_command_without_clobbering_it() { "an untrusted command makes the run not ok" ); } + +#[test] +fn doctor_reports_published_ephemeral_port() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + // Simulate a live serve having published its port. + let loomweave_dir = dir.path().join(".loomweave"); + std::fs::create_dir_all(&loomweave_dir).unwrap(); + std::fs::write(loomweave_dir.join("ephemeral.port"), "9876\n").unwrap(); + + let (code, json) = doctor_json(dir.path(), false); + assert_eq!(code, 0, "{json}"); + let http = json["checks"] + .as_array() + .unwrap() + .iter() + .find(|c| c["id"] == "http.config") + .expect("http.config check present"); + assert_eq!(http["status"], "ok"); + assert!( + http["message"].as_str().unwrap_or("").contains("9876"), + "http.config should report the published live port: {http}" + ); +} diff --git a/crates/loomweave-federation/src/lib.rs b/crates/loomweave-federation/src/lib.rs index bd0db468..71da905a 100644 --- a/crates/loomweave-federation/src/lib.rs +++ b/crates/loomweave-federation/src/lib.rs @@ -4,4 +4,5 @@ pub mod config; pub mod filigree; pub mod filigree_url; pub mod loomweave_port; +pub mod loomweave_url; pub mod scan_results; diff --git a/crates/loomweave-federation/src/loomweave_url.rs b/crates/loomweave-federation/src/loomweave_url.rs new file mode 100644 index 00000000..675fd286 --- /dev/null +++ b/crates/loomweave-federation/src/loomweave_url.rs @@ -0,0 +1,99 @@ +//! Resolve the live Loomweave read-API base URL (ADR-044). +//! +//! The reference reader of the `.loomweave/ephemeral.port` file contract and +//! the twin of [`crate::filigree_url`]. Precedence (consumer-side): the +//! published live port wins over a configured URL, which wins over nothing. +//! (ADR-044's higher "explicit flag/env" precedence level is realized by each +//! consumer's own CLI/env handling — e.g. Wardline's `--loomweave-url` — not by +//! this library function.) Fail-soft throughout: a missing/corrupt file folds +//! to the configured URL; absent both, `None` (federation simply degrades). + +use std::path::Path; + +use crate::loomweave_port::read_published_port; + +/// The live published port file `.loomweave/ephemeral.port`. +pub const SOURCE_EPHEMERAL_PORT: &str = ".loomweave/ephemeral.port"; +/// A statically configured URL (e.g. `wardline.yaml: loomweave.url`). +pub const SOURCE_CONFIG: &str = "config"; +/// Neither a published file nor a configured URL — federation is absent. +pub const SOURCE_NONE: &str = "none"; + +/// Where a resolved Loomweave read-API URL came from. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct LoomweaveUrlResolution { + /// The URL a consumer should call, or `None` when nothing resolves. + pub resolved_url: Option, + /// One of the `SOURCE_*` labels. + pub source: &'static str, +} + +/// Resolve the read-API URL, preferring the live published port over the +/// configured URL. `configured_url` is the consumer's static fallback (pass +/// `None` if it has none). +#[must_use] +pub fn resolve_loomweave_url( + configured_url: Option<&str>, + project_root: &Path, +) -> LoomweaveUrlResolution { + if let Some(port) = read_published_port(project_root) { + return LoomweaveUrlResolution { + resolved_url: Some(format!("http://127.0.0.1:{port}")), + source: SOURCE_EPHEMERAL_PORT, + }; + } + match configured_url { + Some(url) if !url.trim().is_empty() => LoomweaveUrlResolution { + resolved_url: Some(url.to_owned()), + source: SOURCE_CONFIG, + }, + _ => LoomweaveUrlResolution { + resolved_url: None, + source: SOURCE_NONE, + }, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::loomweave_port::publish_port; + + #[test] + fn published_port_beats_configured_url() { + let dir = tempfile::tempdir().unwrap(); + publish_port(dir.path(), 9412).unwrap(); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); + assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:9412")); + assert_eq!(res.source, SOURCE_EPHEMERAL_PORT); + } + + #[test] + fn falls_back_to_configured_url_when_no_file() { + let dir = tempfile::tempdir().unwrap(); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); + assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:9111")); + assert_eq!(res.source, SOURCE_CONFIG); + } + + #[test] + fn corrupt_file_folds_to_configured_url() { + let dir = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + std::fs::write( + dir.path().join(".loomweave").join("ephemeral.port"), + "not-a-port", + ) + .unwrap(); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); + assert_eq!(res.source, SOURCE_CONFIG); + } + + #[test] + fn nothing_resolves_to_none() { + let dir = tempfile::tempdir().unwrap(); + let res = resolve_loomweave_url(None, dir.path()); + assert_eq!(res.resolved_url, None); + assert_eq!(res.source, SOURCE_NONE); + } +} From d0b15b3fdce9be2eeddff5ffee3d4facc0164273 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:13:14 +1000 Subject: [PATCH 14/60] test(federation): cover blank-config SOURCE_NONE path in resolve_loomweave_url (ADR-044) Co-Authored-By: Claude Opus 4.8 --- crates/loomweave-federation/src/loomweave_url.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/loomweave-federation/src/loomweave_url.rs b/crates/loomweave-federation/src/loomweave_url.rs index 675fd286..38bb6f95 100644 --- a/crates/loomweave-federation/src/loomweave_url.rs +++ b/crates/loomweave-federation/src/loomweave_url.rs @@ -96,4 +96,12 @@ mod tests { assert_eq!(res.resolved_url, None); assert_eq!(res.source, SOURCE_NONE); } + + #[test] + fn blank_config_with_no_file_resolves_to_none() { + let dir = tempfile::tempdir().unwrap(); + let res = resolve_loomweave_url(Some(" "), dir.path()); + assert_eq!(res.resolved_url, None); + assert_eq!(res.source, SOURCE_NONE); + } } From 046c34177da21c95f78fa9ef5550ea7c3730c8f2 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:26:10 +1000 Subject: [PATCH 15/60] docs(adr): accept ADR-044; auto-port docs, glossary verdict, revert 9112 stopgap Co-Authored-By: Claude Opus 4.8 --- .gitignore | 1 + .loomweave/.gitignore | 4 +++ docs/federation/contracts.md | 5 +++- ...044-read-api-ephemeral-port-publication.md | 29 ++++++++++++++++++- docs/loomweave/adr/README.md | 2 +- docs/operator/loomweave-http-read-api.md | 9 ++++-- docs/operator/secret-scanning.md | 3 +- docs/suite/glossary.md | 13 +++++++++ loomweave.yaml | 1 - wardline.yaml | 5 +++- 10 files changed, 64 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 5713582a..d6e91368 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ tests/e2e/external-operator-smoke-results-*.md .loomweave/loomweave.db .loomweave/instance_id .loomweave/loomweave.lock +.loomweave/ephemeral.port # Documentation site build output (mkdocs `site_dir`, web/mkdocs.yml). /site-build/ diff --git a/.loomweave/.gitignore b/.loomweave/.gitignore index d1d0e32b..b0f4e45f 100644 --- a/.loomweave/.gitignore +++ b/.loomweave/.gitignore @@ -9,6 +9,10 @@ loomweave.db instance_id loomweave.lock +# Read-API live port discovery file (ADR-044): present only while serve runs, +# rewritten per bind, loopback-only — a runtime artifact, never committed. +ephemeral.port + # SQLite write-ahead files never belong in the repo. *-wal *-shm diff --git a/docs/federation/contracts.md b/docs/federation/contracts.md index 90d4a039..c55f3086 100644 --- a/docs/federation/contracts.md +++ b/docs/federation/contracts.md @@ -32,7 +32,10 @@ Filigree is absent (weft.md §5). serve: http: enabled: true - bind: 127.0.0.1:9111 + # The read-API port is auto-selected per project — a deterministic port in + # Loomweave's band (9400–10399, disjoint from Filigree's 8400–9399) with an + # ephemeral fallback — and published to .loomweave/ephemeral.port while + # serve runs. Set `bind:` explicitly only to pin a fixed port (ADR-044). # Preferred 1.0 identity mode. Optional on loopback, required for # authenticated Weft component requests. identity_token_env: WEFT_IDENTITY_SECRET diff --git a/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md index 28a02633..9af2ded1 100644 --- a/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md +++ b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md @@ -1,10 +1,17 @@ # ADR-044: Read-API Ephemeral Port Publication -**Status**: Proposed +**Status**: Accepted **Date**: 2026-06-06 **Relates to**: [ADR-034](./ADR-034-federation-http-read-api-hardening.md) **Tracking**: clarion-7f574bc34f +> **Accepted** on branch `feat/serve-no-index-chirp` (deterministic band +> `9400–10399`). Acceptance evidence: the cross-product-visible +> `.loomweave/ephemeral.port` term carries a **managed-clash** verdict in +> [`docs/suite/glossary.md`](../../suite/glossary.md), with the explicit +> `.filigree/ephemeral.port` ↔ `.loomweave/ephemeral.port` mapping table below +> (per the README acceptance criteria, model ADR-017). + ## Context `loomweave serve` exposes a federation HTTP read API. Its bind address is a @@ -98,6 +105,26 @@ conform to exactly this: - **Git-ignored** runtime artifact, consistent with ADR-005's treatment of run-time-only state. +## Managed-clash verdict + +`ephemeral.port` is a cross-product-visible term: Filigree owns the original +`.filigree/ephemeral.port` endpoint-discovery convention, and this ADR adopts the +same filename for Loomweave's own read API. Per the ADR-acceptance criteria +(`docs/loomweave/adr/README.md`), this is a **managed clash** — the same term is +used by a sibling, governed here by an explicit mapping table (model: ADR-017). +The verdict is recorded in [`docs/suite/glossary.md`](../../suite/glossary.md). + +| Product | Path | Format | Publication | Band (internal, not contract) | +|---|---|---|---|---| +| Filigree | `.filigree/ephemeral.port` | single plain-ASCII TCP port, optional trailing `\n`, atomic temp+rename | loopback-only, present only while running | `8400–9399` | +| Loomweave | `.loomweave/ephemeral.port` | identical | identical | `9400–10399` (disjoint) | + +The clash is *managed*, not *renamed*: the shared filename is deliberate (one +convention siblings recognize), the paths are distinct per product, the wire +format is identical, and the deterministic bands are disjoint so the two products +never contend for the same port. The band is never part of the file contract — +consumers read the published file, never recompute a peer's port. + ## Resolution semantics (normative) Every consumer resolves **at consume time** (each scan / read), never caches the diff --git a/docs/loomweave/adr/README.md b/docs/loomweave/adr/README.md index 8421b4fb..6d098a1c 100644 --- a/docs/loomweave/adr/README.md +++ b/docs/loomweave/adr/README.md @@ -44,7 +44,7 @@ This folder is the canonical home for authored Loomweave architecture decision r | [ADR-041](./ADR-041-resume-is-idempotent-reemit.md) | Analyze resume is idempotent re-emit, not checkpoint recovery; amends ADR-005/ADR-011 resume language | Accepted | | [ADR-042](./ADR-042-hmac-freshness-and-replay-window.md) | HMAC freshness and replay window — timestamp + nonce headers, crate-backed HMAC, process-local replay cache | Accepted | | [ADR-043](./ADR-043-edge-reanalysis-replacement.md) | Edge reanalysis replacement — per-source-file anchored-edge replacement and edge metadata upsert; amends ADR-026 | Accepted | -| [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) | Read-API ephemeral port publication — `.loomweave/ephemeral.port` as a normative cross-product file contract (loopback-only, port-only, atomic) + consume-time resolution precedence (explicit > file > config > none), per-project deterministic port, installer stops pinning 9111; relates to ADR-034 | Proposed | +| [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) | Read-API ephemeral port publication — `.loomweave/ephemeral.port` as a normative cross-product file contract (loopback-only, port-only, atomic) + consume-time resolution precedence (explicit > file > config > none), per-project deterministic port, installer stops pinning 9111; relates to ADR-034 | Accepted | ## Backlog still tracked in the detailed design diff --git a/docs/operator/loomweave-http-read-api.md b/docs/operator/loomweave-http-read-api.md index 39e7d7fe..2bfe07da 100644 --- a/docs/operator/loomweave-http-read-api.md +++ b/docs/operator/loomweave-http-read-api.md @@ -17,10 +17,14 @@ environment variable that contains the shared Weft component secret: serve: http: enabled: true - bind: 127.0.0.1:9111 identity_token_env: WEFT_IDENTITY_SECRET ``` +The read-API port is auto-selected per project — a deterministic port in +Loomweave's band (`9400–10399`, disjoint from Filigree's `8400–9399`) with an +ephemeral fallback — and published to `.loomweave/ephemeral.port` while `serve` +runs. Set `serve.http.bind` explicitly only to pin a fixed port (ADR-044). + When `identity_token_env` is configured, Loomweave refuses to start unless the env var is present and non-empty. Protected `/api/v1/files` routes then require `X-Weft-Component: loomweave:`, `X-Weft-Timestamp: `, and @@ -65,7 +69,8 @@ catalog, or unavailable because of storage errors. When both `serve.http.token_env` (legacy bearer) and `serve.http.identity_token_env` (HMAC, preferred per [ADR-034](../loomweave/adr/ADR-034-federation-http-read-api-hardening.md)) are unset and the -bind is loopback (default: `127.0.0.1:9111`), the HTTP read API serves +bind is loopback (the auto-selected per-project port, or an explicit loopback +`serve.http.bind`; see ADR-044), the HTTP read API serves unauthenticated. This is the intended single-user developer-workstation trust model — the loopback socket is reachable only from processes on the same host, and Loomweave's catalogue is no more sensitive than the project diff --git a/docs/operator/secret-scanning.md b/docs/operator/secret-scanning.md index d0f2e81d..8d3d5ff3 100644 --- a/docs/operator/secret-scanning.md +++ b/docs/operator/secret-scanning.md @@ -93,7 +93,8 @@ without authentication: **loopback bind with no token configured.** When both `serve.http.token_env` (legacy bearer) and `serve.http.identity_token_env` (HMAC, preferred per [ADR-034](../loomweave/adr/ADR-034-federation-http-read-api-hardening.md)) -are unset and the bind is loopback (default: `127.0.0.1:9111`), the HTTP read +are unset and the bind is loopback (the auto-selected per-project read-API port, +or an explicit loopback `serve.http.bind`; see ADR-044), the HTTP read API serves unauthenticated. On a single-user developer workstation this is the intended trust model: the loopback socket is reachable only from processes on that host, and Loomweave's catalogue is no more sensitive than diff --git a/docs/suite/glossary.md b/docs/suite/glossary.md index 2405b8e9..002126ee 100644 --- a/docs/suite/glossary.md +++ b/docs/suite/glossary.md @@ -22,3 +22,16 @@ in the hub doctrine: `~/loom/doctrine.md` §8. Loomweave's ADRs (e.g. ADR-004, ADR-017, ADR-022, ADR-024, ADR-036, ADR-038) remain Loomweave-owned and authoritative for Loomweave's own field shapes; the hub glossary points to them, not the reverse. + +--- + +## Managed clashes (mirror to the hub) + +The body of cross-product term tables now lives at `~/loom/glossary.md`. New +managed-clash verdicts are recorded here as well so the in-repo ADR-acceptance +gate (`docs/loomweave/adr/README.md` §"ADR acceptance criteria") resolves without +the hub; the hub copy is canonical and should mirror this entry. + +| Term | Verdict | Authority | Mapping / notes | +|---|---|---|---| +| `ephemeral.port` (read-API live-port discovery file) | **managed clash** | ADR-044 (Loomweave); Filigree owns the original `.filigree/ephemeral.port` convention | Shared filename convention, **distinct per-product paths**: `.filigree/ephemeral.port` ↔ `.loomweave/ephemeral.port`. Identical format (single plain-ASCII TCP port, optional trailing `\n`, atomic temp+rename), loopback-only publication, present only while the producer serves. Bands are disjoint and never part of the contract — consumers read the file, never recompute. Mapping table in ADR-044 §"Managed-clash verdict". | diff --git a/loomweave.yaml b/loomweave.yaml index 8ffa933d..ee5bf735 100644 --- a/loomweave.yaml +++ b/loomweave.yaml @@ -36,7 +36,6 @@ llm_policy: session_token_ceiling: 1000000 serve: http: - bind: 127.0.0.1:9112 enabled: true wardline_taint_write: true version: 1 diff --git a/wardline.yaml b/wardline.yaml index 5b049343..520ef8cf 100644 --- a/wardline.yaml +++ b/wardline.yaml @@ -1,4 +1,7 @@ filigree: url: http://127.0.0.1:8542/api/weft/scan-results loomweave: - url: http://127.0.0.1:9112 + # ADR-044: pinned to this project's deterministic read-API port. The published + # .loomweave/ephemeral.port overrides this once Wardline resolves consume-time + # (clarion-7f574bc34f follow-up). Until then this keeps local wardline->loomweave working. + url: http://127.0.0.1:10196 From 85d9cd1da2e21046f16922286c8d576fad23fb27 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:29:41 +1000 Subject: [PATCH 16/60] docs(plan): ADR-044 ephemeral-port implementation plan Co-Authored-By: Claude Opus 4.8 --- .../2026-06-06-loomweave-ephemeral-port.md | 1307 +++++++++++++++++ 1 file changed, 1307 insertions(+) create mode 100644 docs/superpowers/plans/2026-06-06-loomweave-ephemeral-port.md diff --git a/docs/superpowers/plans/2026-06-06-loomweave-ephemeral-port.md b/docs/superpowers/plans/2026-06-06-loomweave-ephemeral-port.md new file mode 100644 index 00000000..52953c5f --- /dev/null +++ b/docs/superpowers/plans/2026-06-06-loomweave-ephemeral-port.md @@ -0,0 +1,1307 @@ +# Loomweave Read-API Ephemeral Port Publication — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make `loomweave serve` bind a per-project deterministic read-API port (ephemeral fallback), publish the live port to `.loomweave/ephemeral.port` as a normative cross-product file contract, resolve it consume-time, and stop the installer pinning `9111` — so two projects can `serve` concurrently without the `9111` collision (ADR-044, clarion-7f574bc34f). + +**Architecture:** Mirror Filigree's `.filigree/ephemeral.port` convention symmetrically for Loomweave's own read API. A new `loomweave-federation::loomweave_port` module owns the deterministic-port computation (blake3, band `9400–10399`, disjoint from Filigree's `8400–9399`), the atomic publish/remove, and the validated read. The producer (`http_read.rs`) binds the deterministic port — falling back to OS-assigned `:0` only when the port was *auto-selected*, not when an operator set it explicitly — then publishes the actually-bound port loopback-only via an RAII guard that unlinks on drop. `HttpReadConfig.bind` becomes `Option` so "operator chose a port" is distinguishable from "auto." The installer and the local dogfood bindings stop hardcoding `9111`. + +**Tech Stack:** Rust (workspace edition 2024, rust 1.88), `blake3` (already a workspace dep, Loomweave's SEI hash), `tokio` TCP bind, `axum` serve, `serde`/`serde_norway` config, `cargo nextest`. + +**Branch:** Work on the current branch `feat/serve-no-index-chirp` (ADR-044 already lives here, unpushed). The user may split at push time. + +**The band is internal, never part of the contract.** Consumers read the published file; nobody recomputes a peer's port. The `9400` band number appears only in code, never in the ADR's normative section. + +--- + +## File Structure + +| File | Responsibility | Task | +|---|---|---| +| `crates/loomweave-federation/Cargo.toml` | add `blake3` dep | 1 | +| `crates/loomweave-federation/src/loomweave_port.rs` (CREATE) | deterministic port, atomic publish/remove, validated read | 1 | +| `crates/loomweave-federation/src/lib.rs` | declare `pub mod loomweave_port;` (+ `loomweave_url` in Task 6) | 1, 6 | +| `crates/loomweave-federation/src/config.rs` | `bind: Option`, method + default updates | 2 | +| `crates/loomweave-cli/src/http_read.rs` | candidate resolution; auto-fallback; publish RAII | 2, 3 | +| `crates/loomweave-cli/src/install.rs` | YAML stub drops `bind: 9111` | 4 | +| `crates/loomweave-cli/tests/install.rs` | install-stub + bindings assertions | 4, 5 | +| `crates/loomweave-cli/src/integration_bindings.rs` | deterministic `loomweave.url`; drop fixed bind | 5 | +| `crates/loomweave-cli/tests/doctor.rs` | bindings-repair assertion | 5, 6 | +| `crates/loomweave-federation/src/loomweave_url.rs` (CREATE) | `resolve_loomweave_url` (file>config>none) | 6 | +| `crates/loomweave-cli/src/doctor.rs` | `check_http_config_json` reports published port | 6 | +| `docs/operator/loomweave-http-read-api.md`, `docs/operator/secret-scanning.md`, `docs/federation/contracts.md` | auto-port wording | 7 | +| `loomweave.yaml`, `wardline.yaml` (repo root) | revert `9112` stopgap | 7 | +| `docs/loomweave/adr/ADR-044-*.md`, `docs/loomweave/adr/README.md`, `docs/suite/glossary.md` | ADR Proposed→Accepted + glossary verdict | 7 | + +--- + +## Task 1: Shared ephemeral-port module (`loomweave-federation`) + +**Files:** +- Modify: `crates/loomweave-federation/Cargo.toml` +- Create: `crates/loomweave-federation/src/loomweave_port.rs` +- Modify: `crates/loomweave-federation/src/lib.rs` + +This task ships pure, fully-unit-tested functions with no dependents yet, so the tree stays green standalone. + +- [ ] **Step 1: Add the `blake3` dependency** + +In `crates/loomweave-federation/Cargo.toml`, under `[dependencies]` (alphabetical-ish, after `loomweave-core`), add: + +```toml +blake3.workspace = true +``` + +The workspace already pins `blake3 = "1.8.5"` (root `Cargo.toml:39`); `.workspace = true` inherits it. + +- [ ] **Step 2: Write the failing tests for the new module** + +Create `crates/loomweave-federation/src/loomweave_port.rs` with ONLY the test module first (the `use super::*;` will fail to resolve the items until Step 4): + +```rust +//! Loomweave read-API ephemeral-port contract (ADR-044). +//! +//! The twin of Filigree's `.filigree/ephemeral.port` convention, applied to +//! Loomweave's own federation HTTP read API. `serve` binds a per-project +//! deterministic port (ephemeral `:0` fallback) and publishes the *actually +//! bound* port to `/.loomweave/ephemeral.port`. Cross-product +//! consumers (notably Wardline, which is Python) read this file; nobody +//! recomputes a peer's port. The deterministic band here is an implementation +//! detail, never part of the file contract. +//! +//! File contract (ADR-044, normative): a single plain-ASCII integer TCP port, +//! optional trailing `\n`, written atomically (temp + rename), present only +//! while `serve` holds a loopback bind. Host (`127.0.0.1`) and scheme (`http`) +//! are implied, sound only because publication is loopback-only. + +use std::path::{Path, PathBuf}; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn deterministic_port_is_stable_and_in_band() { + let dir = tempfile::tempdir().unwrap(); + let a = deterministic_port(dir.path()); + let b = deterministic_port(dir.path()); + assert_eq!(a, b, "same path must yield the same port"); + assert!( + (PORT_BAND_BASE..PORT_BAND_BASE + PORT_BAND_SPAN).contains(&a), + "port {a} must land in the loomweave band [{PORT_BAND_BASE}, {})", + PORT_BAND_BASE + PORT_BAND_SPAN + ); + // Disjoint from Filigree's 8400-9399 band. + assert!(a >= 9400, "port {a} must not overlap Filigree's 8400-9399 band"); + } + + #[test] + fn deterministic_port_differs_by_path() { + let a = tempfile::tempdir().unwrap(); + let b = tempfile::tempdir().unwrap(); + // Distinct tempdirs almost always hash to distinct ports; assert the + // function is path-sensitive by checking the inputs differ and the + // computation is a pure function of the (canonical) path. + assert_ne!(a.path(), b.path()); + let pa = deterministic_port(a.path()); + let pb = deterministic_port(b.path()); + // Not guaranteed distinct (1/1000 collision), but the band membership + // and determinism are what matter; assert both are in-band. + assert!(pa >= 9400 && pb >= 9400); + } + + #[test] + fn publish_then_read_round_trips() { + let dir = tempfile::tempdir().unwrap(); + publish_port(dir.path(), 9412).expect("publish"); + assert_eq!(read_published_port(dir.path()), Some(9412)); + // Published content is the bare port plus a single trailing newline. + let raw = std::fs::read_to_string(published_port_path(dir.path())).unwrap(); + assert_eq!(raw, "9412\n"); + } + + #[test] + fn publish_creates_loomweave_dir_if_absent() { + let dir = tempfile::tempdir().unwrap(); + // No .loomweave/ yet. + assert!(!dir.path().join(".loomweave").exists()); + publish_port(dir.path(), 10000).expect("publish creates .loomweave/"); + assert_eq!(read_published_port(dir.path()), Some(10000)); + } + + #[test] + fn read_tolerates_trailing_whitespace_and_newline() { + let dir = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + std::fs::write(published_port_path(dir.path()), " 9500 \n").unwrap(); + assert_eq!(read_published_port(dir.path()), Some(9500)); + } + + #[test] + fn read_rejects_malformed_zero_and_out_of_range() { + let dir = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + for bad in ["", "not-a-port", "0", "65536", "70000", "-1", "12.5"] { + std::fs::write(published_port_path(dir.path()), bad).unwrap(); + assert_eq!( + read_published_port(dir.path()), + None, + "malformed/out-of-range content {bad:?} must fold to None (fail-soft)" + ); + } + } + + #[test] + fn read_absent_file_is_none() { + let dir = tempfile::tempdir().unwrap(); + assert_eq!(read_published_port(dir.path()), None); + } + + #[test] + fn remove_is_idempotent_and_clears_the_file() { + let dir = tempfile::tempdir().unwrap(); + publish_port(dir.path(), 9999).unwrap(); + assert!(published_port_path(dir.path()).exists()); + remove_published_port(dir.path()); + assert!(!published_port_path(dir.path()).exists()); + // Second remove on an absent file is a no-op, not an error. + remove_published_port(dir.path()); + } +} +``` + +- [ ] **Step 3: Run the tests to verify they fail** + +Run: `cargo nextest run -p loomweave-federation loomweave_port` +Expected: FAIL — `cannot find function deterministic_port`, etc. + +- [ ] **Step 4: Implement the module functions** + +Insert the implementation *above* the `#[cfg(test)] mod tests` block in `loomweave_port.rs`: + +```rust +/// Base of Loomweave's deterministic read-API port band. Chosen to sit +/// **above** Filigree's `8400–9399` band so the two products never contend for +/// the same number. Internal only — never part of the cross-product file +/// contract (consumers read the published file, never recompute). +pub const PORT_BAND_BASE: u16 = 9400; +/// Width of the band: ports land in `[9400, 10400)` i.e. `9400..=10399`. +pub const PORT_BAND_SPAN: u16 = 1000; + +/// Canonical path of the published port file for a project root. +#[must_use] +pub fn published_port_path(project_root: &Path) -> PathBuf { + project_root.join(".loomweave").join("ephemeral.port") +} + +/// Deterministic-but-unpredictable read-API port for a project, derived from +/// the canonical project path. Stable across runs (so a consumer's static +/// config can match it) yet path-specific (so two projects differ). Mirrors +/// Filigree's `8400 + hash % 1000`, in a disjoint band, using Loomweave's own +/// hash (blake3, as for SEI). The bound port is published; this computation is +/// the producer's *starting guess*, not a value any consumer recomputes. +#[must_use] +pub fn deterministic_port(project_root: &Path) -> u16 { + // Best-effort canonicalize so every caller (serve, install, doctor) agrees + // regardless of whether it pre-canonicalized; fall back to the path as-given. + let canonical = project_root + .canonicalize() + .unwrap_or_else(|_| project_root.to_path_buf()); + let bytes = canonical.to_string_lossy(); + let hash = blake3::hash(bytes.as_bytes()); + let head = u64::from_le_bytes( + hash.as_bytes()[..8] + .try_into() + .expect("blake3 digest is 32 bytes, so [..8] is 8 bytes"), + ); + let offset = u16::try_from(head % u64::from(PORT_BAND_SPAN)) + .expect("remainder of % 1000 is < 1000, which fits u16"); + PORT_BAND_BASE + offset +} + +/// Read and validate the published port. Any missing / non-integer / +/// out-of-range / zero content folds to `None` (fail-soft, ADR-044). A `u16` +/// parse already bounds `1..=65535` except `0`, which we reject explicitly. +#[must_use] +pub fn read_published_port(project_root: &Path) -> Option { + let raw = std::fs::read_to_string(published_port_path(project_root)).ok()?; + raw.trim().parse::().ok().filter(|port| *port != 0) +} + +/// Atomically publish `port` to `/.loomweave/ephemeral.port`. +/// Writes a temp file in the same directory and `rename(2)`s it into place, so +/// a concurrent reader never observes a torn value. Creates `.loomweave/` if +/// absent. The caller is responsible for the loopback-only invariant (only call +/// this when the bound address is loopback). +/// +/// # Errors +/// Returns the underlying I/O error if the directory cannot be created or the +/// temp file cannot be written/renamed. +pub fn publish_port(project_root: &Path, port: u16) -> std::io::Result<()> { + let dir = project_root.join(".loomweave"); + std::fs::create_dir_all(&dir)?; + // One `serve` per process publishes, so the PID makes the temp name unique + // within this directory without needing a random suffix. + let tmp = dir.join(format!("ephemeral.port.{}.tmp", std::process::id())); + std::fs::write(&tmp, format!("{port}\n"))?; + std::fs::rename(&tmp, dir.join("ephemeral.port"))?; + Ok(()) +} + +/// Best-effort removal of the published port file. A missing file is not an +/// error (idempotent). Called on clean shutdown; SIGKILL leaves a stale file, +/// which `read_published_port` validation + the ADR-034 instance-ID guard +/// handle (a stale file degrades, never corrupts). +pub fn remove_published_port(project_root: &Path) { + let _ = std::fs::remove_file(published_port_path(project_root)); +} +``` + +- [ ] **Step 5: Declare the module** + +In `crates/loomweave-federation/src/lib.rs`, add `pub mod loomweave_port;` after `pub mod filigree_url;`: + +```rust +//! Shared federation/config helpers used by CLI and MCP surfaces. + +pub mod config; +pub mod filigree; +pub mod filigree_url; +pub mod loomweave_port; +pub mod scan_results; +``` + +- [ ] **Step 6: Run the tests to verify they pass** + +Run: `cargo nextest run -p loomweave-federation loomweave_port` +Expected: PASS (8 tests). + +- [ ] **Step 7: Lint + commit** + +```bash +cargo fmt --all +cargo clippy -p loomweave-federation --all-targets --all-features -- -D warnings +git add crates/loomweave-federation/Cargo.toml crates/loomweave-federation/src/loomweave_port.rs crates/loomweave-federation/src/lib.rs +git commit -m "feat(federation): loomweave_port — deterministic read-API port + atomic publish (ADR-044)" +``` + +--- + +## Task 2: `HttpReadConfig.bind` → `Option` (green-tree migration) + +**Files:** +- Modify: `crates/loomweave-federation/src/config.rs` +- Modify: `crates/loomweave-cli/src/http_read.rs` + +`None` means *auto* (deterministic + fallback + publish, wired in Tasks 2–3). `Some(addr)` means an explicit operator override. This is one atomic task: the type change plus every construction site, ending on a green tree. Producer *behavior* (fallback/publish) is Task 3 — here, `spawn` only resolves `None` to the deterministic candidate so it compiles and runs. + +- [ ] **Step 1: Write the failing config tests** + +In `crates/loomweave-federation/src/config.rs`, inside `mod tests`, add: + +```rust + #[test] + fn http_bind_defaults_to_none_auto_select() { + // ADR-044: the installer no longer pins a port; an unset bind means + // "auto-select a per-project deterministic port and publish it". + assert_eq!(HttpReadConfig::default().bind, None); + } + + #[test] + fn http_bind_none_is_treated_as_loopback() { + // Auto-select always binds 127.0.0.1, so an absent bind is loopback and + // must satisfy the loopback-trust gate without allow_non_loopback. + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + assert!(cfg.is_loopback_bind()); + assert!(cfg.validate_loopback_trust().is_ok()); + } + + #[test] + fn http_explicit_bind_still_parses() { + let cfg = McpConfig::from_yaml_str( + "serve:\n http:\n enabled: true\n bind: \"127.0.0.1:9412\"\n", + ) + .expect("parse explicit bind"); + assert_eq!( + cfg.serve.http.bind, + Some(SocketAddr::from(([127, 0, 0, 1], 9412))) + ); + } +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `cargo nextest run -p loomweave-federation http_bind` +Expected: FAIL to compile — `bind` is `SocketAddr`, not `Option`. + +- [ ] **Step 3: Change the field type and the default** + +In `config.rs`, change the `HttpReadConfig.bind` field: + +```rust + #[serde(default, deserialize_with = "deserialize_optional_socket_addr")] + pub bind: Option, +``` + +Change the `Default` impl: + +```rust +impl Default for HttpReadConfig { + fn default() -> Self { + Self { + enabled: false, + bind: None, + allow_non_loopback: false, + token_env: "WEFT_TOKEN".to_owned(), + identity_token_env: None, + wardline_taint_write: false, + } + } +} +``` + +- [ ] **Step 4: Update the loopback methods to treat `None` as loopback** + +Replace `validate_loopback_trust` and `is_loopback_bind`: + +```rust + pub fn validate_loopback_trust(&self) -> Result<(), ConfigError> { + if self.enabled && !self.allow_non_loopback && !self.is_loopback_bind() { + return Err(ConfigError::NonLoopbackHttpBind { + code: "LMWV-CONFIG-HTTP-NON-LOOPBACK", + // Safe: is_loopback_bind() is false only when bind is Some(non-loopback). + bind: self.bind.expect("non-loopback bind implies an explicit address"), + }); + } + Ok(()) + } +``` + +```rust + /// `None` (auto-select) always binds `127.0.0.1`, so it is loopback. + #[must_use] + pub fn is_loopback_bind(&self) -> bool { + self.bind.is_none_or(|addr| addr.ip().is_loopback()) + } +``` + +`validate_auth_trust` already calls `self.is_loopback_bind()` and only reads `self.bind` inside the `NonLoopbackHttpNoAuth` error arm, which is reached only when `is_loopback_bind()` is false (i.e. `Some(non-loopback)`). Update that one read: + +```rust + Err(ConfigError::NonLoopbackHttpNoAuth { + code: "LMWV-CONFIG-HTTP-NO-AUTH", + bind: self.bind.expect("non-loopback bind implies an explicit address"), + token_env: self.token_env.clone(), + }) +``` + +- [ ] **Step 5: Add the optional-socket deserializer** + +Below the existing `deserialize_socket_addr` in `config.rs`, add: + +```rust +fn deserialize_optional_socket_addr<'de, D>(deserializer: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + let raw = Option::::deserialize(deserializer)?; + match raw { + None => Ok(None), + Some(raw) => raw.parse().map(Some).map_err(|err| { + serde::de::Error::custom(format!("invalid serve.http.bind {raw:?}: {err}")) + }), + } +} +``` + +The old `deserialize_socket_addr` is now unused — delete it (clippy `dead_code` would otherwise fire). The `invalid_http_bind_fails_config_load` test still passes because the new deserializer emits the same `invalid serve.http.bind` message. + +- [ ] **Step 6: Fix the existing config tests that build/parse `bind`** + +In `config.rs` `mod tests`, update the two tests that assert a parsed bind value: + +`http_bind_is_parsed_when_config_loads`: +```rust + assert_eq!( + cfg.serve.http.bind, + Some(SocketAddr::from(([127, 0, 0, 1], 0))) + ); +``` + +The non-loopback / IPv6 / allow-non-loopback parse tests (`enabled_non_loopback_http_bind_requires_allow_non_loopback`, `enabled_lan_http_bind_requires_allow_non_loopback`, `enabled_ipv6_loopback_http_bind_is_allowed_by_default`, `enabled_non_loopback_http_bind_allows_explicit_opt_in`, `invalid_http_bind_fails_config_load`) all set `bind:` in YAML strings — those parse into `Some(..)` and need no change. + +- [ ] **Step 7: Fix `http_read.rs` construction + spawn sites** + +In `crates/loomweave-cli/src/http_read.rs`: + +(a) `spawn_with_env` currently does `let bind = config.bind;`. Replace with deterministic resolution (behavior-minimal — no fallback/publish yet; that's Task 3). The `project_root` is already a parameter: + +```rust + // ADR-044: an unset bind means auto-select a per-project deterministic + // read-API port. An explicit bind is honored verbatim. (Task 3 adds the + // ephemeral fallback + published-file lifecycle.) + let auto_port = config.bind.is_none(); + let bind = config.bind.unwrap_or_else(|| { + std::net::SocketAddr::from(( + [127, 0, 0, 1], + loomweave_federation::loomweave_port::deterministic_port(&project_root), + )) + }); +``` + +Thread `auto_port` and `project_root` (clone before it is moved) into `run_http_read_server`. `project_root` is currently moved into the thread closure; capture a clone for publication in Task 3. For Task 2, just add the `auto_port: bool` parameter to `run_http_read_server`'s signature and ignore it with a leading underscore at the call site is not allowed for a named param — instead accept it and bind it to `_auto_port` inside the fn body for now: + +In `run_http_read_server` signature add (after `bind`): +```rust + auto_port: bool, +``` +And at the top of `run_http_read_server` body, until Task 3 consumes it: +```rust + let _auto_port = auto_port; +``` +Pass `auto_port` at the call site inside the spawned thread closure in `spawn_with_env`. + +(b) The three `#[cfg(test)]` tests in `http_read.rs` that build `HttpReadConfig { ..., bind, ... }` (`spawn_emits_loopback_no_token_trust_warning`, `spawn_with_taint_writer_shuts_down_cleanly`, `check_running_surfaces_supervisor_signal_after_runtime_panic`) each set `bind` to a probed `SocketAddr`. Wrap each in `Some(...)`: + +```rust + let config = HttpReadConfig { + enabled: true, + bind: Some(bind), + allow_non_loopback: false, + // ...rest unchanged + }; +``` + +(There are three such literals; update all three. `spawn_with_taint_writer_shuts_down_cleanly` and `check_running_surfaces_supervisor_signal_after_runtime_panic` use `bind, ..HttpReadConfig::default()` shorthand — change `bind,` to `bind: Some(bind),`.) + +- [ ] **Step 8: Run the affected suites** + +Run: +```bash +cargo nextest run -p loomweave-federation +cargo nextest run -p loomweave-cli --lib http_read +``` +Expected: PASS. Then a workspace build to catch any other construction site: +```bash +cargo build --workspace --all-features --tests +``` +Expected: compiles clean. If any other `HttpReadConfig { bind: }` or `.bind` read surfaces, wrap/adapt it the same way. + +- [ ] **Step 9: Lint + commit** + +```bash +cargo fmt --all +cargo clippy --workspace --all-targets --all-features -- -D warnings +git add crates/loomweave-federation/src/config.rs crates/loomweave-cli/src/http_read.rs +git commit -m "feat(config): serve.http.bind is Option; None auto-selects per-project port (ADR-044)" +``` + +--- + +## Task 3: Producer — ephemeral fallback + publish RAII + +**Files:** +- Modify: `crates/loomweave-cli/src/http_read.rs` + +Add: auto-port falls back to `:0` on `AddrInUse`; the actually-bound port is published loopback-only via an RAII guard that unlinks on drop (covers graceful shutdown, error-return, and panic-unwind in one place). + +- [ ] **Step 1: Write the failing producer tests** + +In `http_read.rs` `mod tests`, add. These reuse the `http_runtime_test_guard()` and `ReaderPool` patterns already in the file: + +```rust + /// ADR-044: with `bind: None`, two serves on distinct project paths each + /// bind their own deterministic port and publish their own + /// `.loomweave/ephemeral.port`. Neither fails to bind. + #[test] + fn auto_port_publishes_distinct_ports_per_project() { + use loomweave_federation::config::HttpReadConfig; + use loomweave_federation::loomweave_port::read_published_port; + use loomweave_storage::ReaderPool; + + let _guard = http_runtime_test_guard(); + + let make = |id: &str| { + let dir = tempfile::tempdir().expect("tempdir"); + let db = dir.path().join("loomweave.db"); + let readers = ReaderPool::open(&db, 4).expect("reader pool"); + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + let iid = crate::instance::parse_instance_id_for_test(id).expect("iid"); + let server = spawn(dir.path().to_path_buf(), db, readers, iid, &cfg) + .expect("spawn") + .expect("enabled => Some"); + (dir, server) + }; + + let (dir_a, server_a) = make("00000000-0000-4000-8000-0000000000a1"); + let (dir_b, server_b) = make("00000000-0000-4000-8000-0000000000a2"); + + let port_a = read_published_port(dir_a.path()).expect("a published a port"); + let port_b = read_published_port(dir_b.path()).expect("b published a port"); + assert!(port_a >= 9400 && port_b >= 9400, "ports in the loomweave band"); + // Two live servers => two live ports => they cannot be equal. + assert_ne!(port_a, port_b, "concurrent serves must hold distinct ports"); + + server_a.shutdown().expect("shutdown a"); + server_b.shutdown().expect("shutdown b"); + } + + /// The published file is removed on clean shutdown. + #[test] + fn auto_port_file_removed_on_clean_shutdown() { + use loomweave_federation::config::HttpReadConfig; + use loomweave_federation::loomweave_port::{published_port_path, read_published_port}; + use loomweave_storage::ReaderPool; + + let _guard = http_runtime_test_guard(); + + let dir = tempfile::tempdir().expect("tempdir"); + let db = dir.path().join("loomweave.db"); + let readers = ReaderPool::open(&db, 4).expect("reader pool"); + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + let iid = crate::instance::parse_instance_id_for_test("00000000-0000-4000-8000-0000000000a3") + .expect("iid"); + let server = spawn(dir.path().to_path_buf(), db, readers, iid, &cfg) + .expect("spawn") + .expect("enabled => Some"); + + assert!(read_published_port(dir.path()).is_some(), "published while serving"); + server.shutdown().expect("shutdown"); + assert!( + !published_port_path(dir.path()).exists(), + "published port file must be gone after clean shutdown" + ); + } + + /// An explicit (operator-set) bind that is already in use is a HARD error — + /// the operator asked for that specific port. Only auto-select falls back. + #[test] + fn explicit_bind_in_use_is_a_hard_error() { + use loomweave_federation::config::HttpReadConfig; + use loomweave_storage::ReaderPool; + use std::net::{SocketAddr, TcpListener}; + + let _guard = http_runtime_test_guard(); + + // Hold a real listener so the address is genuinely occupied. + let held = TcpListener::bind(("127.0.0.1", 0)).expect("hold a port"); + let bind: SocketAddr = held.local_addr().expect("addr"); + + let dir = tempfile::tempdir().expect("tempdir"); + let db = dir.path().join("loomweave.db"); + let readers = ReaderPool::open(&db, 4).expect("reader pool"); + let cfg = HttpReadConfig { + enabled: true, + bind: Some(bind), + ..HttpReadConfig::default() + }; + let iid = crate::instance::parse_instance_id_for_test("00000000-0000-4000-8000-0000000000a4") + .expect("iid"); + + let result = spawn(dir.path().to_path_buf(), db, readers, iid, &cfg); + assert!( + result.is_err(), + "an explicit in-use bind must fail, not silently fall back to :0" + ); + } +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `cargo nextest run -p loomweave-cli --lib http_read::tests::auto_port` +Expected: FAIL — no port is published yet (Task 2 binds the deterministic port but does not publish), and the auto/explicit fallback split is not implemented. + +- [ ] **Step 3: Add the RAII publish guard** + +Near the top of `http_read.rs` (after the imports, before `HttpReadServer`), add: + +```rust +/// Removes the published `.loomweave/ephemeral.port` on drop — covering +/// graceful shutdown, error return, and panic-unwind in one place. Only +/// SIGKILL can strand a stale file, which the read-side validation and the +/// ADR-034 instance-ID guard tolerate (a stale file degrades, never corrupts). +struct PublishedPortGuard { + project_root: PathBuf, +} + +impl Drop for PublishedPortGuard { + fn drop(&mut self) { + loomweave_federation::loomweave_port::remove_published_port(&self.project_root); + } +} +``` + +- [ ] **Step 4: Implement fallback + publish in `run_http_read_server`** + +`run_http_read_server` now needs `auto_port: bool` (added in Task 2) and a clone of `project_root` for publication. `project_root: PathBuf` is already a parameter and is moved into `AppState` later — capture the publish path *before* that move. + +Replace the bind block (currently a single `tokio::net::TcpListener::bind(bind)`) with auto-fallback, and add publication right after `local_addr` is known. Inside the `runtime.block_on(async move { ... })`: + +```rust + // ADR-044: auto-selected ports fall back to an OS-assigned ephemeral + // port if the deterministic port is taken; an explicit operator bind + // does NOT fall back (a taken explicit port is a hard error). + let listener = match tokio::net::TcpListener::bind(bind).await { + Ok(listener) => listener, + Err(err) if auto_port && err.kind() == std::io::ErrorKind::AddrInUse => { + let fallback = std::net::SocketAddr::from(([127, 0, 0, 1], 0)); + match tokio::net::TcpListener::bind(fallback).await { + Ok(listener) => listener, + Err(err) => { + let _ = ready_tx + .send(Err(anyhow!("bind HTTP read API ephemeral fallback: {err}"))); + return Err(anyhow!("bind HTTP read API ephemeral fallback: {err}")); + } + } + } + Err(err) => { + let _ = ready_tx.send(Err(anyhow!("bind HTTP read API on {bind}: {err}"))); + return Err(anyhow!("bind HTTP read API on {bind}: {err}")); + } + }; + let local_addr = match listener.local_addr() { + Ok(addr) => addr, + Err(err) => { + let _ = ready_tx.send(Err(anyhow!("read HTTP read API local addr: {err}"))); + return Err(anyhow!("read HTTP read API local addr: {err}")); + } + }; + // Publish the ACTUALLY-bound port loopback-only (ADR-044 file contract). + // A non-loopback bind publishes NO file — consumers fall back to their + // configured URL. The guard unlinks the file when this scope unwinds. + let _published_port_guard = if local_addr.ip().is_loopback() { + if let Err(err) = + loomweave_federation::loomweave_port::publish_port(&project_root, local_addr.port()) + { + // Publication is best-effort enrichment: a failure to write the + // discovery file must not take the read API down. + tracing::warn!( + error = %err, + port = local_addr.port(), + "failed to publish .loomweave/ephemeral.port; consumers will fall back to configured URL" + ); + None + } else { + Some(PublishedPortGuard { + project_root: project_root.clone(), + }) + } + } else { + None + }; + let _ = ready_tx.send(Ok(HttpReadReady { + local_addr, + readers_identity, + })); +``` + +Note the `_published_port_guard` binding lives for the rest of the `block_on` async scope (through `serve_future`), so it drops — and unlinks — exactly when serving ends (graceful, error, or panic). Delete the old `let _auto_port = auto_port;` placeholder line from Task 2 now that `auto_port` is consumed. + +- [ ] **Step 5: Run the producer tests** + +Run: `cargo nextest run -p loomweave-cli --lib http_read` +Expected: PASS — including the three new tests and all pre-existing ones. + +- [ ] **Step 6: Lint + commit** + +```bash +cargo fmt --all +cargo clippy --workspace --all-targets --all-features -- -D warnings +git add crates/loomweave-cli/src/http_read.rs +git commit -m "feat(serve): auto-select read-API port with ephemeral fallback; publish .loomweave/ephemeral.port (ADR-044)" +``` + +--- + +## Task 4: Installer stub stops pinning `9111` + +**Files:** +- Modify: `crates/loomweave-cli/src/install.rs` +- Modify: `crates/loomweave-cli/tests/install.rs` + +- [ ] **Step 1: Update the YAML stub** + +In `install.rs`, the `LOOMWEAVE_YAML_STUB` ends with: + +``` +serve: + mcp: + enable_write_tools: false + http: + enabled: false + bind: 127.0.0.1:9111 +"; +``` + +Replace the `http:` block (drop the `bind:` line, add an explanatory comment): + +``` +serve: + mcp: + enable_write_tools: false + http: + enabled: false + # The read-API port is auto-selected per project (deterministic, with an + # ephemeral fallback) and published to .loomweave/ephemeral.port while + # serving. Set `bind:` explicitly only to pin a fixed port (ADR-044). +"; +``` + +- [ ] **Step 2: Update the install test that asserts the stub bind** + +There is no dedicated stub test asserting `serve.http.bind` in `tests/install.rs` for the bare `install` path; the `9111` assertions are all in the `--all` bindings test (Task 5) and `doctor.rs` (Task 5/6). Confirm with: + +Run: `grep -n "9111\|http\"\]\[\"bind" crates/loomweave-cli/tests/install.rs` +If a bare-stub test asserts the bind, change it to assert the key is absent: +```rust + assert!(loomweave_yaml["serve"]["http"].get("bind").is_none()); +``` + +- [ ] **Step 3: Run + commit** + +```bash +cargo nextest run -p loomweave-cli --test install +cargo fmt --all +git add crates/loomweave-cli/src/install.rs crates/loomweave-cli/tests/install.rs +git commit -m "feat(install): YAML stub no longer pins serve.http.bind 9111 (ADR-044)" +``` + +(If Step 2 found nothing to change, commit `install.rs` alone.) + +--- + +## Task 5: `integration_bindings` writes the deterministic URL + +**Files:** +- Modify: `crates/loomweave-cli/src/integration_bindings.rs` +- Modify: `crates/loomweave-cli/tests/install.rs` +- Modify: `crates/loomweave-cli/tests/doctor.rs` + +`loomweave install --all` currently stamps `bind: 9111` into `loomweave.yaml` and `loomweave.url: http://127.0.0.1:9111` into `wardline.yaml` + `.mcp.json` — the real cross-project root cause. After this task: it stops writing a fixed `bind` (so auto-port + fallback engages), and writes the **deterministic** `loomweave.url` (the best static target until Wardline adopts consume-time resolution; the published file overrides it at runtime). + +- [ ] **Step 1: Compute the deterministic Loomweave URL per project** + +In `integration_bindings.rs`, delete the two fixed constants: + +```rust +const LOOMWEAVE_HTTP_BIND: &str = "127.0.0.1:9111"; +const LOOMWEAVE_HTTP_URL: &str = "http://127.0.0.1:9111"; +``` + +Add the deterministic URL to `DesiredBindings` and compute it in `desired_bindings`: + +```rust +struct DesiredBindings { + filigree_base_url: String, + wardline_filigree_url: String, + loomweave_url: String, +} +``` + +```rust +fn desired_bindings(project_root: &Path) -> DesiredBindings { + let filigree_base_url = live_filigree_base_url(project_root) + .or_else(|| configured_filigree_base_url(project_root)) + .unwrap_or_else(|| DEFAULT_FILIGREE_BASE_URL.to_owned()); + let wardline_filigree_url = format!( + "{}/api/weft/scan-results", + filigree_base_url.trim_end_matches('/') + ); + // ADR-044: seed the consumer's static target with this project's + // deterministic read-API port. serve binds the same port (barring an + // ephemeral fallback), and the published .loomweave/ephemeral.port file + // overrides this at runtime once a consumer resolves consume-time. + let port = loomweave_federation::loomweave_port::deterministic_port(project_root); + let loomweave_url = format!("http://127.0.0.1:{port}"); + DesiredBindings { + filigree_base_url, + wardline_filigree_url, + loomweave_url, + } +} +``` + +- [ ] **Step 2: Stop writing a fixed `bind` into `loomweave.yaml`** + +In `install_loomweave_yaml`, the `serve.http` block currently inserts `bind`. Remove that line: + +```rust + let serve = ensure_object(root, "serve")?; + let http = ensure_object(serve, "http")?; + http.insert("enabled".to_owned(), json!(true)); + http.insert("wardline_taint_write".to_owned(), json!(true)); + write_yaml_if_changed(&path, &value) +``` + +In `loomweave_yaml_ok`, drop the `bind` predicate from the `serve.http` check: + +```rust + && value + .get("serve") + .and_then(|serve| serve.get("http")) + .is_some_and(|http| { + http.get("enabled").and_then(Value::as_bool) == Some(true) + && http.get("wardline_taint_write").and_then(Value::as_bool) == Some(true) + })) +``` + +- [ ] **Step 3: Write the deterministic URL into `wardline.yaml` + `.mcp.json`** + +`install_wardline_yaml`: +```rust + loomweave.insert("url".to_owned(), json!(desired.loomweave_url)); +``` + +`wardline_yaml_ok`: +```rust + Ok(value + .get("loomweave") + .and_then(|loomweave| loomweave.get("url")) + .and_then(Value::as_str) + == Some(desired.loomweave_url.as_str()) + && value + .get("filigree") + .and_then(|filigree| filigree.get("url")) + .and_then(Value::as_str) + == Some(desired.wardline_filigree_url.as_str())) +``` + +`desired_wardline_args`: +```rust +fn desired_wardline_args(desired: &DesiredBindings) -> Value { + json!([ + "mcp", + "--root", + ".", + "--loomweave-url", + desired.loomweave_url, + "--filigree-url", + desired.wardline_filigree_url + ]) +} +``` + +- [ ] **Step 4: Update the `--all` bindings test** + +In `tests/install.rs`, `install_all_wires_three_way_integration_bindings`: the install canonicalizes `--path`, so compute the expected URL the same way. Replace the `bind`/`loomweave-url` assertions: + +```rust + // ADR-044: no fixed bind is written; the port is auto-selected at serve time. + assert!(loomweave_yaml["serve"]["http"].get("bind").is_none()); + assert_eq!( + loomweave_yaml["serve"]["http"]["wardline_taint_write"], + serde_json::json!(true) + ); + + let expected_port = loomweave_federation::loomweave_port::deterministic_port( + &dir.path().canonicalize().unwrap(), + ); + let expected_loomweave_url = format!("http://127.0.0.1:{expected_port}"); + + let wardline_yaml = read_yaml(&dir.path().join("wardline.yaml")); + assert_eq!(wardline_yaml["loomweave"]["url"], expected_loomweave_url); + assert_eq!( + wardline_yaml["filigree"]["url"], + "http://127.0.0.1:8749/api/weft/scan-results" + ); + + let mcp: serde_json::Value = + serde_json::from_str(&fs::read_to_string(dir.path().join(".mcp.json")).unwrap()).unwrap(); + assert_eq!( + mcp["mcpServers"]["wardline"]["args"], + serde_json::json!([ + "mcp", + "--root", + ".", + "--loomweave-url", + expected_loomweave_url, + "--filigree-url", + "http://127.0.0.1:8749/api/weft/scan-results" + ]) + ); +``` + +Confirm `tests/install.rs` can reach the helper: `loomweave-cli` depends on `loomweave-federation`, so `loomweave_federation::loomweave_port::deterministic_port` is in scope from an integration test. If the import path errors, add `use loomweave_federation::loomweave_port::deterministic_port;` and call it unqualified. + +- [ ] **Step 5: Update the `doctor.rs` bindings-repair test** + +In `tests/doctor.rs`, the repair test (around line 198–227) asserts `--loomweave-url http://127.0.0.1:9111`. Replace with the computed URL (the doctor test also operates on a tempdir; check whether it canonicalizes — match whatever the repaired files actually contain by computing from the same path the repair used): + +```rust + let expected_port = loomweave_federation::loomweave_port::deterministic_port( + &dir.path().canonicalize().unwrap(), + ); + let expected_loomweave_url = format!("http://127.0.0.1:{expected_port}"); + // ... + assert_eq!( + mcp["mcpServers"]["wardline"]["args"], + serde_json::json!([ + "mcp", + "--root", + ".", + "--loomweave-url", + expected_loomweave_url, + "--filigree-url", + "http://127.0.0.1:8749/api/weft/scan-results" + ]) + ); +``` + +If the doctor test reads `loomweave.yaml["serve"]["http"]["bind"]` anywhere, change that to `.get("bind").is_none()`. + +- [ ] **Step 6: Run + commit** + +```bash +cargo nextest run -p loomweave-cli --test install --test doctor +cargo nextest run -p loomweave-cli --lib integration_bindings +cargo fmt --all +cargo clippy --workspace --all-targets --all-features -- -D warnings +git add crates/loomweave-cli/src/integration_bindings.rs crates/loomweave-cli/tests/install.rs crates/loomweave-cli/tests/doctor.rs +git commit -m "feat(install): integration bindings use per-project deterministic loomweave URL, no fixed bind (ADR-044)" +``` + +--- + +## Task 6 (CUTTABLE): `resolve_loomweave_url` + its one caller (doctor) + +**Files:** +- Create: `crates/loomweave-federation/src/loomweave_url.rs` +- Modify: `crates/loomweave-federation/src/lib.rs` +- Modify: `crates/loomweave-cli/src/doctor.rs` + +The resolver is the reference reader of the file contract (the shape Wardline's Python twin mirrors), and `doctor`'s HTTP check is its one in-tree caller — so it ships *with* a caller, not as dead code. **This task is cuttable**: if it slips, the collision is already fixed by Tasks 1–5; defer resolver+caller as a unit. + +- [ ] **Step 1: Write the failing resolver tests** + +Create `crates/loomweave-federation/src/loomweave_url.rs`: + +```rust +//! Resolve the live Loomweave read-API base URL (ADR-044). +//! +//! The reference reader of the `.loomweave/ephemeral.port` file contract and +//! the twin of [`crate::filigree_url`]. Precedence (consumer-side): the +//! published live port wins over a configured URL, which wins over nothing. +//! (ADR-044's higher "explicit flag/env" precedence level is realized by each +//! consumer's own CLI/env handling — e.g. Wardline's `--loomweave-url` — not by +//! this library function.) Fail-soft throughout: a missing/corrupt file folds +//! to the configured URL; absent both, `None` (federation simply degrades). + +use std::path::Path; + +use crate::loomweave_port::read_published_port; + +/// The live published port file `.loomweave/ephemeral.port`. +pub const SOURCE_EPHEMERAL_PORT: &str = ".loomweave/ephemeral.port"; +/// A statically configured URL (e.g. `wardline.yaml: loomweave.url`). +pub const SOURCE_CONFIG: &str = "config"; +/// Neither a published file nor a configured URL — federation is absent. +pub const SOURCE_NONE: &str = "none"; + +/// Where a resolved Loomweave read-API URL came from. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct LoomweaveUrlResolution { + /// The URL a consumer should call, or `None` when nothing resolves. + pub resolved_url: Option, + /// One of the `SOURCE_*` labels. + pub source: &'static str, +} + +/// Resolve the read-API URL, preferring the live published port over the +/// configured URL. `configured_url` is the consumer's static fallback (pass +/// `None` if it has none). +#[must_use] +pub fn resolve_loomweave_url( + configured_url: Option<&str>, + project_root: &Path, +) -> LoomweaveUrlResolution { + if let Some(port) = read_published_port(project_root) { + return LoomweaveUrlResolution { + resolved_url: Some(format!("http://127.0.0.1:{port}")), + source: SOURCE_EPHEMERAL_PORT, + }; + } + match configured_url { + Some(url) if !url.trim().is_empty() => LoomweaveUrlResolution { + resolved_url: Some(url.to_owned()), + source: SOURCE_CONFIG, + }, + _ => LoomweaveUrlResolution { + resolved_url: None, + source: SOURCE_NONE, + }, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::loomweave_port::publish_port; + + #[test] + fn published_port_beats_configured_url() { + let dir = tempfile::tempdir().unwrap(); + publish_port(dir.path(), 9412).unwrap(); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); + assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:9412")); + assert_eq!(res.source, SOURCE_EPHEMERAL_PORT); + } + + #[test] + fn falls_back_to_configured_url_when_no_file() { + let dir = tempfile::tempdir().unwrap(); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); + assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:9111")); + assert_eq!(res.source, SOURCE_CONFIG); + } + + #[test] + fn corrupt_file_folds_to_configured_url() { + let dir = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + std::fs::write( + dir.path().join(".loomweave").join("ephemeral.port"), + "not-a-port", + ) + .unwrap(); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); + assert_eq!(res.source, SOURCE_CONFIG); + } + + #[test] + fn nothing_resolves_to_none() { + let dir = tempfile::tempdir().unwrap(); + let res = resolve_loomweave_url(None, dir.path()); + assert_eq!(res.resolved_url, None); + assert_eq!(res.source, SOURCE_NONE); + } +} +``` + +Declare it in `lib.rs`: +```rust +pub mod loomweave_url; +``` + +- [ ] **Step 2: Run to verify failure, then pass** + +Run: `cargo nextest run -p loomweave-federation loomweave_url` +Expected: FAIL (module not yet declared / functions absent) → after Step 1 is fully in place, PASS (4 tests). + +- [ ] **Step 3: Write the failing doctor test** + +In `crates/loomweave-cli/tests/doctor.rs`, add a test that a serving project's published port shows up in the HTTP check. Since spawning a real server in the doctor integration test is heavy, instead test the file-present branch by writing the file directly, then run `doctor_json` and assert the `http.config` check reports the published port: + +```rust +#[test] +fn doctor_reports_published_ephemeral_port() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + // Simulate a live serve having published its port. + let loomweave_dir = dir.path().join(".loomweave"); + std::fs::create_dir_all(&loomweave_dir).unwrap(); + std::fs::write(loomweave_dir.join("ephemeral.port"), "9876\n").unwrap(); + + let (code, json) = doctor_json(dir.path(), false); + assert_eq!(code, 0, "{json}"); + let http = json["checks"] + .as_array() + .unwrap() + .iter() + .find(|c| c["id"] == "http.config") + .expect("http.config check present"); + assert_eq!(http["status"], "ok"); + assert!( + http["message"].as_str().unwrap_or("").contains("9876"), + "http.config should report the published live port: {http}" + ); +} +``` + +(`DoctorJsonCheck` serializes its human text as the `message` field — confirmed in `doctor.rs:105-110`.) + +- [ ] **Step 4: Run to verify failure** + +Run: `cargo nextest run -p loomweave-cli --test doctor doctor_reports_published` +Expected: FAIL — `check_http_config_json` does not read the published file yet. + +- [ ] **Step 5: Wire the resolver into `check_http_config_json`** + +Replace `check_http_config_json` in `doctor.rs`: + +```rust +fn check_http_config_json(project_root: &Path) -> DoctorJsonCheck { + let Some(config) = read_loomweave_yaml(project_root) else { + return DoctorJsonCheck::warning("http.config", "loomweave.yaml is absent or unparseable"); + }; + let enabled = config + .get("serve") + .and_then(|serve| serve.get("http")) + .and_then(|http| http.get("enabled")) + .and_then(Value::as_bool) + == Some(true); + if !enabled { + return DoctorJsonCheck::warning("http.config", "HTTP serve config is disabled or incomplete"); + } + // ADR-044: prefer the live published port over the (now usually absent) + // static bind. A running serve publishes .loomweave/ephemeral.port. + let resolution = + loomweave_federation::loomweave_url::resolve_loomweave_url(None, project_root); + if let Some(url) = resolution.resolved_url { + return DoctorJsonCheck::ok( + "http.config", + format!("HTTP read API published on {url} ({})", resolution.source), + ); + } + let bind = config + .get("serve") + .and_then(|serve| serve.get("http")) + .and_then(|http| http.get("bind")) + .and_then(Value::as_str) + .unwrap_or(""); + if bind.trim().is_empty() { + DoctorJsonCheck::ok( + "http.config", + "HTTP enabled; read-API port auto-selected and published to .loomweave/ephemeral.port while serving", + ) + } else { + DoctorJsonCheck::ok("http.config", format!("HTTP configured on {bind} (auto-published while serving)")) + } +} +``` + +- [ ] **Step 6: Run resolver + doctor suites** + +Run: +```bash +cargo nextest run -p loomweave-federation loomweave_url +cargo nextest run -p loomweave-cli --test doctor +``` +Expected: PASS. + +- [ ] **Step 7: Lint + commit** + +```bash +cargo fmt --all +cargo clippy --workspace --all-targets --all-features -- -D warnings +git add crates/loomweave-federation/src/loomweave_url.rs crates/loomweave-federation/src/lib.rs crates/loomweave-cli/src/doctor.rs crates/loomweave-cli/tests/doctor.rs +git commit -m "feat(doctor): resolve_loomweave_url + doctor reports live published read-API port (ADR-044)" +``` + +--- + +## Task 7: Docs, ADR acceptance, stopgap revert + +**Files:** +- Modify: `docs/operator/loomweave-http-read-api.md`, `docs/operator/secret-scanning.md`, `docs/federation/contracts.md` +- Modify: `loomweave.yaml`, `wardline.yaml` (repo root) +- Modify: `docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md`, `docs/loomweave/adr/README.md`, `docs/suite/glossary.md` + +- [ ] **Step 1: Update operator docs** + +In each of `docs/operator/loomweave-http-read-api.md`, `docs/operator/secret-scanning.md`, `docs/federation/contracts.md`, replace the `bind: 127.0.0.1:9111` / `default: 127.0.0.1:9111` references with the auto-port description. Read each hit (from `grep -n 9111 `) and rewrite in context, e.g.: + +> The read-API port is auto-selected per project — a deterministic port in Loomweave's band (`9400–10399`, disjoint from Filigree's `8400–9399`) with an ephemeral fallback — and published to `.loomweave/ephemeral.port` while `serve` runs. Set `serve.http.bind` explicitly only to pin a fixed port. (ADR-044) + +Leave `docs/loomweave/adr/ADR-044-*.md`'s own `9111` references (they describe the *problem*) and `docs/archive/**` (archived, non-normative) and the Filigree-side `docs/federation/filigree-side/ADR-014-*.md` (a Filigree example) unchanged. + +- [ ] **Step 2: Revert the local stopgaps** + +`loomweave.yaml` (repo root) currently has `serve.http.bind: 127.0.0.1:9112`. Remove the `bind:` line so this very project uses auto-port: + +```yaml +serve: + http: + enabled: true + wardline_taint_write: true +``` + +For `wardline.yaml`, make a conscious choice (advisor item 1): pin it to *this* project's deterministic port so local Wardline→Loomweave federation keeps working until the Wardline Python twin lands. Compute it: + +```bash +cargo run -p loomweave-cli -- doctor --json /home/john/loomweave 2>/dev/null | grep -o '127.0.0.1:[0-9]*' | head -1 +``` +…or simpler, add a throwaway `#[test]` that prints `deterministic_port(Path::new("/home/john/loomweave"))`, or compute via a one-off `cargo run`. Then set: + +```yaml +loomweave: + # ADR-044: pinned to this project's deterministic read-API port. The published + # .loomweave/ephemeral.port overrides this once Wardline resolves consume-time + # (clarion-7f574bc34f follow-up). Until then this static target keeps local + # wardline -> loomweave federation working. + url: http://127.0.0.1: +``` + +Verify by starting serve and confirming the published file matches: +```bash +# In one shell: +cargo run -p loomweave-cli -- serve /home/john/loomweave & +sleep 2 +cat /home/john/loomweave/.loomweave/ephemeral.port # should equal +kill %1 +``` + +- [ ] **Step 3: Glossary verdict (acceptance gate)** + +`docs/loomweave/adr/README.md` requires a `glossary.md` verdict before an ADR moves Proposed→Accepted for any cross-product-visible term. `.loomweave/ephemeral.port` mirrors Filigree's `.filigree/ephemeral.port` — a **managed clash** (shared convention, distinct per-product paths). Read `docs/suite/glossary.md`, find the `ephemeral.port` / Filigree entry, and add a Loomweave row recording the managed-clash verdict and the mapping (`.filigree/ephemeral.port` ↔ `.loomweave/ephemeral.port`, identical format, loopback-only). If no such entry exists, add one under the federation-terms section. + +- [ ] **Step 4: Flip ADR-044 to Accepted** + +In `ADR-044-*.md`, change `**Status**: Proposed` → `**Status**: Accepted` and add a one-line acceptance note referencing the glossary verdict and the implementing commits. In `README.md`, change the ADR-044 row's trailing `| Proposed |` → `| Accepted |`. + +- [ ] **Step 5: Full CI floor** + +Run the complete gate (CLAUDE.md): +```bash +cargo fmt --all -- --check +cargo clippy --workspace --all-targets --all-features -- -D warnings +cargo build --workspace --bins +cargo nextest run --workspace --all-features +RUSTDOCFLAGS="-D warnings" cargo doc --workspace --no-deps --all-features +cargo deny check +``` +Expected: all green. + +- [ ] **Step 6: Wardline boundary gate** + +This feature reads external input (the port file, config files). Run: +```bash +wardline scan . --fail-on ERROR +``` +Expected: exit 0. If it trips, fix at the boundary (the `read_published_port` parse is already validated/fail-soft; address any new finding). + +- [ ] **Step 7: Commit + close the issue** + +```bash +git add docs/operator docs/federation/contracts.md docs/loomweave/adr docs/suite/glossary.md loomweave.yaml wardline.yaml +git commit -m "docs(adr): accept ADR-044; auto-port docs, glossary verdict, revert 9112 stopgap" +``` + +Close `clarion-7f574bc34f` with a summary comment (CLI): `filigree close clarion-7f574bc34f --actor opus`. + +--- + +## Self-Review + +**Spec coverage (ADR-044 §Decision + §Verification):** +- Decision 1 (deterministic port + ephemeral fallback) → Task 1 (`deterministic_port`) + Task 3 (fallback). +- Decision 2 (publish `.loomweave/ephemeral.port` per file contract) → Task 1 (`publish_port`, atomic, port-only, trailing `\n`) + Task 3 (loopback-only, lifecycle via RAII). +- Decision 3 (loomweave-side resolver, one of conforming readers) → Task 6 (`resolve_loomweave_url` + doctor caller). +- Decision 4 (installer stops pinning a port; explicit override honored) → Task 4 (stub) + Task 5 (bindings) + Task 2 (`Some` honored, `None` auto). +- Verification: distinct ports/no bind failure → T3 `auto_port_publishes_distinct_ports_per_project`; collision→ephemeral fallback reflects actual port → T3 (fallback path) + T1; file contract (bare port, temp+rename, no file on non-loopback) → T1 + T3 publish branch; precedence (file>config>none) → T6; fail-soft (malformed/out-of-range/refused) → T1 read tests + T6 corrupt test; removed on clean shutdown → T3 `auto_port_file_removed_on_clean_shutdown`; wardline scan against non-9111 serve → realized by Task 5 deterministic URL + Task 7 local verify. +- Resolved-but-refused (closed port) softness: covered behaviorally — `resolve_loomweave_url` returns the URL; the *connection attempt* is the consumer's (Wardline's) responsibility, and the ADR-034 instance-ID guard backstops a stale file. No in-tree consumer connects, so no Rust test asserts refusal; noted, not silently dropped. + +**Placeholder scan:** every code step shows complete code. Two reads-to-confirm remain (doctor `DoctorJsonCheck` serialized field name in T6 Step 3; any bare-stub bind assertion in T4 Step 2) — both are explicit "read X, match the real name" instructions with the fallback spelled out, not hidden TODOs. + +**Type consistency:** `deterministic_port(&Path) -> u16`, `read_published_port(&Path) -> Option`, `publish_port(&Path, u16) -> io::Result<()>`, `remove_published_port(&Path)`, `published_port_path(&Path) -> PathBuf`, `resolve_loomweave_url(Option<&str>, &Path) -> LoomweaveUrlResolution` are used identically across Tasks 1, 3, 5, 6. `HttpReadConfig.bind: Option` is consistent across Tasks 2–3 and all test sites. `auto_port: bool` is added in Task 2 and consumed in Task 3 (placeholder `_auto_port` removed there). From c7f253000b751ada41502f4c6f1733040911d3a8 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:45:01 +1000 Subject: [PATCH 17/60] fix(adr-044): gitignore ephemeral.port template, wire project_status read-API report, reconcile ADR precedence - install.rs GITIGNORE_CONTENTS now ignores .loomweave/ephemeral.port so freshly-installed projects do not show the runtime port file as untracked while serving; install test asserts the new rule. - project_status_get reports loomweave_read_api (resolved_url + resolution_source) via a query-time resolve_loomweave_url(None, project_root), the second in-repo consumer named by ADR-044 alongside doctor. Additive field; existing project_status tests unaffected. Two new tests cover published-port and no-file ("none") cases. - ADR-044: clarify precedence level 1 is an operator's deliberately-supplied target (typed flag/env), while an installer-seeded --loomweave-url in .mcp.json is config-tier (precedence 3) so the published file self-heals it; added a Related follow-up bullet for Wardline (clarion-7f574bc34f). Co-Authored-By: Claude Opus 4.8 --- crates/loomweave-cli/src/install.rs | 7 +++- crates/loomweave-cli/tests/install.rs | 1 + crates/loomweave-mcp/src/tools/status.rs | 14 +++++++ crates/loomweave-mcp/tests/storage_tools.rs | 38 +++++++++++++++++++ ...044-read-api-ephemeral-port-publication.md | 17 +++++++-- 5 files changed, 73 insertions(+), 4 deletions(-) diff --git a/crates/loomweave-cli/src/install.rs b/crates/loomweave-cli/src/install.rs index 626654c6..a690b229 100644 --- a/crates/loomweave-cli/src/install.rs +++ b/crates/loomweave-cli/src/install.rs @@ -83,7 +83,12 @@ serve: const GITIGNORE_CONTENTS: &str = "\ # Loomweave .gitignore — ADR-005 tracked-vs-excluded list. # Tracked (committed): loomweave.db, config.json, .gitignore itself. -# Excluded (ignored): WAL sidecars, shadow DB, per-run logs, tmp scratch. +# Excluded (ignored): WAL sidecars, shadow DB, per-run logs, tmp scratch, +# the read-API live port discovery file. + +# Read-API live port discovery file (ADR-044): present only while serve runs, +# rewritten per bind, loopback-only — a runtime artifact, never committed. +ephemeral.port # SQLite write-ahead files never belong in the repo. *-wal diff --git a/crates/loomweave-cli/tests/install.rs b/crates/loomweave-cli/tests/install.rs index 3a73c3e3..66bdc47b 100644 --- a/crates/loomweave-cli/tests/install.rs +++ b/crates/loomweave-cli/tests/install.rs @@ -58,6 +58,7 @@ fn install_creates_loomweave_dir_with_expected_contents() { "runs/*/log.jsonl", "*-wal", "*-shm", + "ephemeral.port", ] { assert!( gitignore.contains(rule), diff --git a/crates/loomweave-mcp/src/tools/status.rs b/crates/loomweave-mcp/src/tools/status.rs index abeec78b..5df6669f 100644 --- a/crates/loomweave-mcp/src/tools/status.rs +++ b/crates/loomweave-mcp/src/tools/status.rs @@ -283,6 +283,7 @@ impl ServerState { }, "llm": self.llm_diagnostics_json(), "filigree": self.filigree_diagnostics_json(), + "loomweave_read_api": self.loomweave_read_api_json(), }); Ok(success_envelope(result)) @@ -354,6 +355,19 @@ impl ServerState { } } + /// ADR-044: report the live read-API endpoint resolved from + /// `.loomweave/ephemeral.port` (the reference reader; `doctor` reports the + /// same). Pass `None` config — `project_status` has no static loomweave URL + /// of its own; this surfaces whether serve is currently publishing. + pub(crate) fn loomweave_read_api_json(&self) -> Value { + let resolution = + loomweave_federation::loomweave_url::resolve_loomweave_url(None, &self.project_root); + json!({ + "resolved_url": resolution.resolved_url, + "resolution_source": resolution.source, + }) + } + pub(crate) async fn read_issues_for_entities( &self, entity_id: String, diff --git a/crates/loomweave-mcp/tests/storage_tools.rs b/crates/loomweave-mcp/tests/storage_tools.rs index 5eb3dda2..f9040659 100644 --- a/crates/loomweave-mcp/tests/storage_tools.rs +++ b/crates/loomweave-mcp/tests/storage_tools.rs @@ -11,6 +11,13 @@ use loomweave_core::{ LlmPurpose, LlmRequest, LlmResponse, OpenRouterProvider, OpenRouterProviderConfig, Recording, RecordingProvider, build_inferred_calls_prompt, build_leaf_summary_prompt, }; +use loomweave_federation::{ + loomweave_port::publish_port, + loomweave_url::{ + SOURCE_EPHEMERAL_PORT as LOOMWEAVE_SOURCE_EPHEMERAL_PORT, + SOURCE_NONE as LOOMWEAVE_SOURCE_NONE, + }, +}; use loomweave_mcp::{ DiagnosticsContext, LlmDiagnostics, McpToolPolicy, ServerState, config::{FiligreeConfig, LlmConfig, LlmProviderKind}, @@ -4982,6 +4989,37 @@ async fn project_status_filigree_falls_back_to_config_without_port_file() { assert_eq!(envelope["result"]["llm"]["live"], true); } +#[tokio::test] +async fn project_status_reports_loomweave_read_api_published_port() { + // ADR-044: project_status surfaces the live read-API endpoint resolved from + // .loomweave/ephemeral.port (the second in-repo consumer of the resolver, + // alongside doctor). No diagnostics context is needed — it resolves the + // file at query time from the project root. + let (project, db_path) = open_project(); + publish_port(project.path(), 9412).unwrap(); + + let state = state_for(project.path(), &db_path); + let envelope = call_tool(&state, "project_status", json!({})).await; + let read_api = &envelope["result"]["loomweave_read_api"]; + assert_eq!(read_api["resolved_url"], "http://127.0.0.1:9412"); + assert_eq!( + read_api["resolution_source"], + LOOMWEAVE_SOURCE_EPHEMERAL_PORT + ); +} + +#[tokio::test] +async fn project_status_loomweave_read_api_none_without_port_file() { + // No published port file → resolution_source is "none" and resolved_url is + // null (project_status has no static loomweave URL of its own). + let (project, db_path) = open_project(); + let state = state_for(project.path(), &db_path); + let envelope = call_tool(&state, "project_status", json!({})).await; + let read_api = &envelope["result"]["loomweave_read_api"]; + assert_eq!(read_api["resolved_url"], Value::Null); + assert_eq!(read_api["resolution_source"], LOOMWEAVE_SOURCE_NONE); +} + // --------------------------------------------------------------------------- // Wardline Flow B helpers and tests // --------------------------------------------------------------------------- diff --git a/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md index 9af2ded1..bf635329 100644 --- a/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md +++ b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md @@ -134,9 +134,15 @@ time today, is the cautionary case (see related follow-up). **Precedence (highest wins):** -1. An **explicit, deliberate target** — `--loomweave-url` flag or environment - override — always wins. The published port must never override a target the - operator set on purpose (remote loomweave, debugging a specific instance). +1. An **explicit, deliberate target** — a `--loomweave-url` flag the operator + *types* or an environment override they set — always wins. The published port + must never override a target the operator chose on purpose (remote loomweave, + debugging a specific instance). Provenance, not flag spelling, is what makes a + value level 1: an **installer-seeded `--loomweave-url` baked into `.mcp.json`** + (e.g. the deterministic URL `loomweave install` stamps into Wardline's MCP + args) is **not** an operator's deliberate choice — it is config-tier + (precedence 3), so the published file overrides it and self-heals when an + ephemeral fallback fired. 2. The **published port file** `.loomweave/ephemeral.port` (composed to `http://127.0.0.1:`). This **beats a stale/default configured URL** so resolution self-heals without a config edit. @@ -202,3 +208,8 @@ static config URL at scan time, so its filigree leg carries the same latent staleness this ADR removes for the loomweave leg. Unifying both consumers on consume-time resolution is Wardline-side work, tracked separately; flagged here so the two legs are not designed divergently. + +- Wardline should treat install-seeded MCP args (the `--loomweave-url` baked into + `.mcp.json` by `loomweave install`) as config-tier and resolve consume-time, so + the published `.loomweave/ephemeral.port` file wins over the baked deterministic + URL when an ephemeral fallback fired. Tracked clarion-7f574bc34f. From b3f58a6ae72ebe8ee043b694227e638074e3f973 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 10:26:43 +1000 Subject: [PATCH 18/60] chore(release): bump to 1.1.0rc1 (Cargo 1.1.0-rc1 / PEP440 1.1.0rc1) Workspace + Python plugin in lockstep. Cross-ecosystem version normalization in check-workspace-version-lockstep.py (SemVer prerelease 1.1.0-rc1 == PEP 440 1.1.0rc1). CHANGELOG: ADR-044 ephemeral-port deconfliction + no-index MCP chirp. No package published for release candidates. Co-Authored-By: Claude Opus 4.8 --- CHANGELOG.md | 36 ++++++++++++++ Cargo.lock | 16 +++---- Cargo.toml | 2 +- crates/loomweave-cli/Cargo.toml | 14 +++--- crates/loomweave-cli/pyproject.toml | 4 +- crates/loomweave-federation/Cargo.toml | 2 +- crates/loomweave-mcp/Cargo.toml | 6 +-- crates/loomweave-plugin-fixture/Cargo.toml | 2 +- crates/loomweave-storage/Cargo.toml | 2 +- plugins/python/plugin.toml | 2 +- plugins/python/pyproject.toml | 2 +- .../src/loomweave_plugin_python/__init__.py | 2 +- plugins/python/tests/test_package.py | 4 +- plugins/python/tests/test_server.py | 2 +- plugins/python/uv.lock | 2 +- scripts/check-workspace-version-lockstep.py | 47 +++++++++++++++---- 16 files changed, 105 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e5ec4e4..f69bd967 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,42 @@ only when an incompatible change is made to that surface. See ## [Unreleased] +## [1.1.0rc1] — 2026-06-06 + +First 1.1 release candidate. No package is published for release candidates — +the `1.1.0` package ships only at the final tag. (Cargo SemVer `1.1.0-rc1`; +the Python wheels normalise to PEP 440 `1.1.0rc1`.) + +### Added + +- **Read-API ephemeral port publication (ADR-044).** `loomweave serve` binds a + per-project **deterministic** read-API port (blake3 over the canonical project + path, band `9400–10399`, disjoint from Filigree's `8400–9399`) with an + OS-assigned **ephemeral fallback** when that port is taken, and publishes the + *actually bound* port to `.loomweave/ephemeral.port` — a normative cross-product + file contract (port-only ASCII + optional trailing newline, atomic temp+rename, + **loopback-only**, removed on clean shutdown). This resolves the cross-project + `127.0.0.1:9111` bind collision so multiple projects can `serve` concurrently + without mis-targeting one another. New consume-time resolver + `resolve_loomweave_url` (precedence: explicit target > published file > + configured URL > none) is the reference reader; `doctor` and + `project_status_get` report the live published endpoint. The published file is + git-ignored. +- **No-index degraded MCP mode.** `serve` on a project with no index no longer + exits 1 — it serves a degraded MCP stdio session that answers `initialize` and + chirps to run `loomweave install` + `loomweave analyze` from every tool call, + so the MCP client connects and is told how to recover. + +### Changed + +- **`serve.http.bind` is now optional** (`Option`). Unset — the new + default — auto-selects and publishes the per-project deterministic port; an + explicit value is honoured verbatim (no fallback). The installer no longer + stamps `serve.http.bind: 127.0.0.1:9111`, the integration bindings write the + per-project deterministic loomweave URL, and `install`/`doctor --fix` self-heal + the stale hard-coded `9111` stamp on existing projects. +- Version bumped to `1.1.0rc1` across the Rust workspace and the Python plugin. + ## [1.0.0] — Loomweave — 2026-06-05 **This release renames the product and re-baselines its version.** What shipped diff --git a/Cargo.lock b/Cargo.lock index c6cb083e..39a16ebc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1057,7 +1057,7 @@ checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" [[package]] name = "loomweave-analysis" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "anyhow", "serde", @@ -1067,7 +1067,7 @@ dependencies = [ [[package]] name = "loomweave-cli" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "anyhow", "assert_cmd", @@ -1106,7 +1106,7 @@ dependencies = [ [[package]] name = "loomweave-core" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "async-trait", "nix", @@ -1123,7 +1123,7 @@ dependencies = [ [[package]] name = "loomweave-federation" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "blake3", "loomweave-core", @@ -1137,7 +1137,7 @@ dependencies = [ [[package]] name = "loomweave-mcp" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "async-trait", "blake3", @@ -1160,7 +1160,7 @@ dependencies = [ [[package]] name = "loomweave-plugin-fixture" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "loomweave-core", "nix", @@ -1169,7 +1169,7 @@ dependencies = [ [[package]] name = "loomweave-scanner" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "regex", "serde", @@ -1181,7 +1181,7 @@ dependencies = [ [[package]] name = "loomweave-storage" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "blake3", "deadpool-sqlite", diff --git a/Cargo.toml b/Cargo.toml index ed0e35e8..de77d4aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ members = [ ] [workspace.package] -version = "1.0.0" +version = "1.1.0-rc1" edition = "2024" license = "MIT" repository = "https://github.com/foundryside-dev/loomweave" diff --git a/crates/loomweave-cli/Cargo.toml b/crates/loomweave-cli/Cargo.toml index edd5d37b..9343d850 100644 --- a/crates/loomweave-cli/Cargo.toml +++ b/crates/loomweave-cli/Cargo.toml @@ -18,12 +18,12 @@ anyhow.workspace = true axum.workspace = true blake3.workspace = true clap.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.0.0" } -loomweave-analysis = { path = "../loomweave-analysis", version = "1.0.0" } -loomweave-federation = { path = "../loomweave-federation", version = "1.0.0" } -loomweave-mcp = { path = "../loomweave-mcp", version = "1.0.0" } -loomweave-scanner = { path = "../loomweave-scanner", version = "1.0.0" } -loomweave-storage = { path = "../loomweave-storage", version = "1.0.0" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } +loomweave-analysis = { path = "../loomweave-analysis", version = "1.1.0-rc1" } +loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc1" } +loomweave-mcp = { path = "../loomweave-mcp", version = "1.1.0-rc1" } +loomweave-scanner = { path = "../loomweave-scanner", version = "1.1.0-rc1" } +loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc1" } dotenvy.workspace = true fs2.workspace = true hmac.workspace = true @@ -46,7 +46,7 @@ uuid.workspace = true [dev-dependencies] assert_cmd.workspace = true -loomweave-plugin-fixture = { path = "../loomweave-plugin-fixture", version = "1.0.0" } +loomweave-plugin-fixture = { path = "../loomweave-plugin-fixture", version = "1.1.0-rc1" } rusqlite.workspace = true serde_json.workspace = true sha1.workspace = true diff --git a/crates/loomweave-cli/pyproject.toml b/crates/loomweave-cli/pyproject.toml index 9df6faef..72e23181 100644 --- a/crates/loomweave-cli/pyproject.toml +++ b/crates/loomweave-cli/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "loomweave" -version = "1.0.0" +version = "1.1.0rc1" description = "Loomweave — graph-aware code archaeology (Rust core)" readme = "../../README.md" requires-python = ">=3.11" @@ -15,7 +15,7 @@ classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: 3", ] -dependencies = ["loomweave-plugin-python==1.0.0"] +dependencies = ["loomweave-plugin-python==1.1.0rc1"] [project.urls] Repository = "https://github.com/foundryside-dev/loomweave" diff --git a/crates/loomweave-federation/Cargo.toml b/crates/loomweave-federation/Cargo.toml index 2ca76605..335406e1 100644 --- a/crates/loomweave-federation/Cargo.toml +++ b/crates/loomweave-federation/Cargo.toml @@ -11,7 +11,7 @@ workspace = true [dependencies] blake3.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.0.0" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } reqwest.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/loomweave-mcp/Cargo.toml b/crates/loomweave-mcp/Cargo.toml index 457cedcd..684fd2ea 100644 --- a/crates/loomweave-mcp/Cargo.toml +++ b/crates/loomweave-mcp/Cargo.toml @@ -12,9 +12,9 @@ workspace = true [dependencies] async-trait.workspace = true blake3.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.0.0" } -loomweave-federation = { path = "../loomweave-federation", version = "1.0.0" } -loomweave-storage = { path = "../loomweave-storage", version = "1.0.0" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } +loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc1" } +loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc1" } reqwest.workspace = true rusqlite.workspace = true serde.workspace = true diff --git a/crates/loomweave-plugin-fixture/Cargo.toml b/crates/loomweave-plugin-fixture/Cargo.toml index 6d73fbeb..35f824d4 100644 --- a/crates/loomweave-plugin-fixture/Cargo.toml +++ b/crates/loomweave-plugin-fixture/Cargo.toml @@ -23,7 +23,7 @@ name = "loomweave-fixture-plugin" path = "src/main.rs" [dependencies] -loomweave-core = { path = "../loomweave-core", version = "1.0.0" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } serde_json.workspace = true [target.'cfg(unix)'.dependencies] diff --git a/crates/loomweave-storage/Cargo.toml b/crates/loomweave-storage/Cargo.toml index a358b6ef..726a0007 100644 --- a/crates/loomweave-storage/Cargo.toml +++ b/crates/loomweave-storage/Cargo.toml @@ -11,7 +11,7 @@ workspace = true [dependencies] blake3.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.0.0" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } deadpool-sqlite.workspace = true rusqlite.workspace = true serde.workspace = true diff --git a/plugins/python/plugin.toml b/plugins/python/plugin.toml index 51638a28..6e159265 100644 --- a/plugins/python/plugin.toml +++ b/plugins/python/plugin.toml @@ -1,7 +1,7 @@ [plugin] name = "loomweave-plugin-python" plugin_id = "python" -version = "1.0.0" +version = "1.1.0rc1" protocol_version = "1.0" # Bare basename per ADR-021 §Layer 1 + WP2 scrub commit eb0a41d — the host # refuses manifests whose `executable` carries any path component. diff --git a/plugins/python/pyproject.toml b/plugins/python/pyproject.toml index 18cee269..e26fb75d 100644 --- a/plugins/python/pyproject.toml +++ b/plugins/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "loomweave-plugin-python" -version = "1.0.0" +version = "1.1.0rc1" description = "Loomweave Python language plugin — v1.0 release" readme = "README.md" requires-python = ">=3.11" diff --git a/plugins/python/src/loomweave_plugin_python/__init__.py b/plugins/python/src/loomweave_plugin_python/__init__.py index 1e1d9cab..4b4fde08 100644 --- a/plugins/python/src/loomweave_plugin_python/__init__.py +++ b/plugins/python/src/loomweave_plugin_python/__init__.py @@ -1,3 +1,3 @@ """loomweave-plugin-python — Python language plugin for Loomweave.""" -__version__ = "1.0.0" +__version__ = "1.1.0rc1" diff --git a/plugins/python/tests/test_package.py b/plugins/python/tests/test_package.py index 0a8c7ca7..9cfae6ef 100644 --- a/plugins/python/tests/test_package.py +++ b/plugins/python/tests/test_package.py @@ -17,7 +17,7 @@ def _read_toml(path: Path) -> dict[str, Any]: def test_package_version_matches_pyproject() -> None: - assert loomweave_plugin_python.__version__ == "1.0.0" + assert loomweave_plugin_python.__version__ == "1.1.0rc1" def test_plugin_version_lockstep_across_pyproject_manifest_and_module() -> None: @@ -42,7 +42,7 @@ def test_plugin_version_lockstep_across_pyproject_manifest_and_module() -> None: def test_manifest_declares_current_v1_ontology_only() -> None: manifest = _read_toml(_PLUGIN_ROOT / "plugin.toml") - assert manifest["plugin"]["version"] == "1.0.0" + assert manifest["plugin"]["version"] == "1.1.0rc1" assert manifest["capabilities"]["runtime"]["wardline_aware"] is True assert manifest["integrations"]["wardline"]["expected_descriptor_version"] == ( EXPECTED_DESCRIPTOR_VERSION diff --git a/plugins/python/tests/test_server.py b/plugins/python/tests/test_server.py index 2d38fcfe..7abb5a1d 100644 --- a/plugins/python/tests/test_server.py +++ b/plugins/python/tests/test_server.py @@ -86,7 +86,7 @@ def test_initialize_roundtrip() -> None: assert response["id"] == 1 result = response["result"] assert result["name"] == "loomweave-plugin-python" - assert result["version"] == "1.0.0" + assert result["version"] == "1.1.0rc1" assert result["ontology_version"] == "0.7.0" assert set(result["capabilities"]) == {"wardline"} assert result["capabilities"]["wardline"]["status"] in { diff --git a/plugins/python/uv.lock b/plugins/python/uv.lock index 89aa6f96..94aa0d89 100644 --- a/plugins/python/uv.lock +++ b/plugins/python/uv.lock @@ -464,7 +464,7 @@ wheels = [ [[package]] name = "loomweave-plugin-python" -version = "1.0.0" +version = "1.1.0rc1" source = { editable = "." } dependencies = [ { name = "packaging" }, diff --git a/scripts/check-workspace-version-lockstep.py b/scripts/check-workspace-version-lockstep.py index 9eb494b9..db16ed83 100755 --- a/scripts/check-workspace-version-lockstep.py +++ b/scripts/check-workspace-version-lockstep.py @@ -75,6 +75,19 @@ def _dig(data: dict[str, Any], *keys: str) -> Any: return cursor +def _normalize(version: str) -> str: + """Normalize a version string for cross-ecosystem comparison. + + Cargo requires SemVer prerelease syntax (`1.1.0-rc1`) while the Python + packages (maturin/hatchling wheels) use PEP 440 (`1.1.0rc1`). The only `-` + in a valid workspace SemVer string is the prerelease separator, so stripping + hyphens maps the Cargo form onto the PEP 440 form. A no-op on final releases + like `1.0.0`, so the strict-equality policy is preserved for non-prerelease + versions. + """ + return version.replace("-", "") + + def _pinned_version(dependencies: Any, package: str) -> str | None: """Return the `==`-pinned version for `package` in a PEP 508 dependency list. @@ -105,10 +118,13 @@ def check_lockstep( except _Missing as missing: # Without the anchor version there is nothing to compare against. return [f"Cargo.toml key {missing} not found"] + # Compare against the PEP 440 form: the Cargo SemVer `1.1.0-rc1` and the + # wheel `1.1.0rc1` are the same product version (see `_normalize`). + rust_norm = _normalize(rust_version) try: plugin_version = _dig(plugin_pyproject, "project", "version") - if plugin_version != rust_version: + if _normalize(plugin_version) != rust_norm: errors.append( f"plugin version {plugin_version!r} != workspace {rust_version!r}" ) @@ -117,7 +133,7 @@ def check_lockstep( try: cli_version = _dig(cli_pyproject, "project", "version") - if cli_version != rust_version: + if _normalize(cli_version) != rust_norm: errors.append( f"loomweave-cli version {cli_version!r} != workspace {rust_version!r}" ) @@ -131,7 +147,7 @@ def check_lockstep( errors.append( f"loomweave-cli pyproject does not pin {PLUGIN_PACKAGE}==" ) - elif pin != rust_version: + elif _normalize(pin) != rust_norm: errors.append( f"loomweave-cli pins {PLUGIN_PACKAGE}=={pin} != workspace {rust_version!r}" ) @@ -143,7 +159,8 @@ def check_lockstep( def _self_test() -> int: """Exercise check_lockstep against in-memory fixtures.""" - cargo = tomllib.loads('[workspace.package]\nversion = "1.0.0"\n') + def cargo_at(version: str) -> dict[str, Any]: + return tomllib.loads(f'[workspace.package]\nversion = "{version}"\n') def plugin(version: str) -> dict[str, Any]: return tomllib.loads( @@ -156,32 +173,44 @@ def cli(version: str, deps: str) -> dict[str, Any]: ) good_deps = 'dependencies = ["loomweave-plugin-python==1.0.0"]' - cases: list[tuple[str, dict[str, Any], dict[str, Any], bool]] = [ - ("aligned", plugin("1.0.0"), cli("1.0.0", good_deps), True), - ("plugin version drift", plugin("1.0.1"), cli("1.0.0", good_deps), False), - ("cli version drift", plugin("1.0.0"), cli("0.9.0", good_deps), False), + rc_deps = 'dependencies = ["loomweave-plugin-python==1.1.0rc1"]' + final = cargo_at("1.0.0") + # Prerelease: the Cargo SemVer `1.1.0-rc1` and the PEP 440 wheel `1.1.0rc1` + # name the same product version and must read as aligned (see `_normalize`). + rc = cargo_at("1.1.0-rc1") + cases: list[tuple[str, dict[str, Any], dict[str, Any], dict[str, Any], bool]] = [ + ("aligned", final, plugin("1.0.0"), cli("1.0.0", good_deps), True), + ("plugin version drift", final, plugin("1.0.1"), cli("1.0.0", good_deps), False), + ("cli version drift", final, plugin("1.0.0"), cli("0.9.0", good_deps), False), ( "cli pin drift", + final, plugin("1.0.0"), cli("1.0.0", 'dependencies = ["loomweave-plugin-python==0.9.0"]'), False, ), ( "cli pin absent", + final, plugin("1.0.0"), cli("1.0.0", 'dependencies = ["something-else>=1"]'), False, ), ( "cli pin unpinned (>=)", + final, plugin("1.0.0"), cli("1.0.0", 'dependencies = ["loomweave-plugin-python>=1.0.0"]'), False, ), + # Cross-ecosystem prerelease normalization. + ("rc aligned", rc, plugin("1.1.0rc1"), cli("1.1.0rc1", rc_deps), True), + ("rc plugin drift", rc, plugin("1.1.0rc2"), cli("1.1.0rc1", rc_deps), False), + ("rc pin drift", rc, plugin("1.1.0rc1"), cli("1.1.0rc1", good_deps), False), ] failures = 0 - for name, plugin_py, cli_py, expect_ok in cases: + for name, cargo, plugin_py, cli_py, expect_ok in cases: errors = check_lockstep(cargo, plugin_py, cli_py) actual_ok = not errors if actual_ok != expect_ok: From 7ff84b2900e0c3ba8ad30020191009480c701353 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:47:23 +1000 Subject: [PATCH 19/60] fix(install): gitignore instance_id + *.lock; document live-index commit hazard (ADR-005) The shipped .loomweave/.gitignore (ADR-005) excluded WAL/shadow/logs but not the per-project `instance_id` fingerprint or the analyze advisory lock (`loomweave.lock`, fs2), so `git add -A` staged live runtime state into demo repos. Add `instance_id` and `*.lock` to GITIGNORE_CONTENTS and refresh ADR-005's verbatim block + Excluded list (also reconciling ephemeral.port/embeddings.db). The install test now asserts both rules ship. ADR-005 also gains a "Committing a live index" note: the on-disk loomweave.db lags its pending WAL while serve runs, so commit a consistent copy via `loomweave db backup` (or stop serve) rather than git-add-ing the live file. Closes clarion-7381e6382d. Refs clarion-cdee445ed8. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/install.rs | 11 ++++++++- crates/loomweave-cli/tests/install.rs | 5 ++++ .../adr/ADR-005-loomweave-dir-tracking.md | 24 ++++++++++++++++--- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/crates/loomweave-cli/src/install.rs b/crates/loomweave-cli/src/install.rs index a690b229..f8f5abcc 100644 --- a/crates/loomweave-cli/src/install.rs +++ b/crates/loomweave-cli/src/install.rs @@ -84,12 +84,21 @@ const GITIGNORE_CONTENTS: &str = "\ # Loomweave .gitignore — ADR-005 tracked-vs-excluded list. # Tracked (committed): loomweave.db, config.json, .gitignore itself. # Excluded (ignored): WAL sidecars, shadow DB, per-run logs, tmp scratch, -# the read-API live port discovery file. +# the read-API live port discovery file, the per-project instance id, and +# the analyze advisory lock. # Read-API live port discovery file (ADR-044): present only while serve runs, # rewritten per bind, loopback-only — a runtime artifact, never committed. ephemeral.port +# Per-project instance fingerprint (loomweave serve) and the analyze advisory +# lock (loomweave.lock, fs2). Both are process-/machine-local runtime state, +# never durable: committing them stages a live lock + instance id, and the lock +# is meaningless on another checkout (clarion-7381e6382d). `*.lock` also covers +# any future lock sidecar. +instance_id +*.lock + # SQLite write-ahead files never belong in the repo. *-wal *-shm diff --git a/crates/loomweave-cli/tests/install.rs b/crates/loomweave-cli/tests/install.rs index 66bdc47b..eb5b8dea 100644 --- a/crates/loomweave-cli/tests/install.rs +++ b/crates/loomweave-cli/tests/install.rs @@ -59,6 +59,11 @@ fn install_creates_loomweave_dir_with_expected_contents() { "*-wal", "*-shm", "ephemeral.port", + // Per-project fingerprint + analyze advisory lock are runtime artifacts, + // never durable — the shipped ignore must list them or `git add -A` + // stages a live lock / instance id (clarion-7381e6382d). + "instance_id", + "*.lock", ] { assert!( gitignore.contains(rule), diff --git a/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md b/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md index bc7166ac..09557739 100644 --- a/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md +++ b/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md @@ -40,16 +40,22 @@ and every developer's install produces their own variant `.gitignore` by acciden ## Decision `loomweave install` writes `.loomweave/.gitignore` with the following contents -(verbatim — the literal file lives at -`crates/loomweave-cli/src/install.rs` and ships as the v0.1 baseline): +(the literal file lives at `crates/loomweave-cli/src/install.rs` — +`GITIGNORE_CONTENTS` — which is the source of truth; the v0.1 baseline has since +grown the `ephemeral.port` (ADR-044), `embeddings.db` (ADR-040), `instance_id`, +and `*.lock` entries): ``` +ephemeral.port *-wal *-shm *.db-wal *.db-shm *.shadow.db *.db.new +embeddings.db +instance_id +*.lock tmp/ logs/ runs/*/log.jsonl @@ -59,7 +65,13 @@ runs/*/log.jsonl - `.loomweave/loomweave.db` — the main analysis store. SQLite diffs poorly; the `loomweave db export --textual` + `loomweave db merge-helper` pattern (detailed - design §3 File layout) handles the team case. + design §3 File layout) handles the team case. **Committing a live index:** while + `loomweave serve` is running, the on-disk `loomweave.db` lags by its pending WAL + (the `-wal` sidecar is `.gitignore`d), so `git add loomweave.db` mid-serve can + stage an incomplete database. To commit a consistent point-in-time index, take + an online WAL-safe copy with `loomweave db backup` and commit that, or stop + `serve` first (SQLite checkpoints the WAL away on last-connection close) — + clarion-cdee445ed8. - `.loomweave/config.json` — small, human-readable internal state (schema version, last run IDs). - `.loomweave/.gitignore` itself — this file. @@ -75,6 +87,12 @@ runs/*/log.jsonl - All shadow-DB intermediates. - `tmp/` and `logs/` (volatile scratch). - `runs/*/log.jsonl` (raw LLM bodies — audit-local, not commit-appropriate). +- `ephemeral.port` (ADR-044) — the read-API live port discovery file, present + only while `serve` runs and rewritten per bind. +- `embeddings.db` (ADR-040) — the semantic-search sidecar; large and rebuildable. +- `instance_id` and `*.lock` — the per-project `serve` fingerprint and the + analyze advisory lock (`loomweave.lock`, fs2). Both are process-/machine-local + runtime state, never durable (clarion-7381e6382d). ### Out of scope for `.loomweave/.gitignore` From 69ebaddc8c35e1df98495bb83c09797231d761a3 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:47:32 +1000 Subject: [PATCH 20/60] fix(storage): checkpoint WAL(TRUNCATE) after each committed run; surface db backup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a successful CommitRun the writer-actor now runs `PRAGMA wal_checkpoint(TRUNCATE)` so the on-disk loomweave.db reflects committed state while the writer is still alive — previously the WAL only truncated on last-connection close, leaving a multi-MB pending sidecar that made the .db an unreliable point-in-time artifact for commit. The checkpoint is best-effort: failure logs a warning and leaves committed frames durable. `loomweave analyze --help` now points at `loomweave db backup` for committing the index as a versioned artifact (the verb already exists; this is discoverability). Closes clarion-cdee445ed8. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/cli.rs | 4 ++ crates/loomweave-storage/src/writer.rs | 26 +++++++++ .../loomweave-storage/tests/writer_actor.rs | 53 +++++++++++++++++++ 3 files changed, 83 insertions(+) diff --git a/crates/loomweave-cli/src/cli.rs b/crates/loomweave-cli/src/cli.rs index f0cdd8af..d5f93a0d 100644 --- a/crates/loomweave-cli/src/cli.rs +++ b/crates/loomweave-cli/src/cli.rs @@ -62,6 +62,10 @@ pub enum Command { /// Re-runs are idempotent (UPSERT on `entities.id`). If no plugins are on /// `$PATH`, exits 0 with a WARN and status `skipped_no_plugins` — see /// `docs/operator/getting-started.md` Troubleshooting. + /// + /// To commit the index as a versioned artifact while `serve` may be running, + /// take a consistent online copy with `loomweave db backup` rather than + /// `git add`-ing the live file (whose pending WAL is not committable). Analyze { /// Path to analyse (default: current directory). #[arg(default_value = ".")] diff --git a/crates/loomweave-storage/src/writer.rs b/crates/loomweave-storage/src/writer.rs index 004d58a3..0a3a3721 100644 --- a/crates/loomweave-storage/src/writer.rs +++ b/crates/loomweave-storage/src/writer.rs @@ -337,6 +337,16 @@ fn run_actor( &stats_json, commits_observed, ); + // A committed run is the "snapshot" boundary: TRUNCATE-checkpoint + // so the on-disk loomweave.db is a whole, committable artifact + // (ADR-005 tracks it) without waiting for the process to exit. + // Only `CommitRun` (end of an analyze run) reaches here — never the + // serve summary-write path — so there is no per-write checkpoint + // cost. Best-effort and run before the ack so a caller that reads + // the file right after sees the truncated WAL (clarion-cdee445ed8). + if res.is_ok() { + checkpoint_truncate(conn); + } reply(ack, res); } WriterCmd::FailRun { @@ -375,6 +385,22 @@ fn cleanup_after_channel_close(conn: &mut Connection, state: &mut ActorState) { } } +/// Issue `PRAGMA wal_checkpoint(TRUNCATE)` on the writer's own connection, +/// best-effort. A concurrent reader (a live `serve` reader-pool connection) can +/// hold the checkpoint back from resetting the WAL — that returns a "busy" row, +/// not an error, and is harmless: the committed frames are already durable and +/// stay applied. A genuine failure is logged, never propagated, so a checkpoint +/// hiccup can never fail an otherwise-successful run commit (clarion-cdee445ed8). +fn checkpoint_truncate(conn: &Connection) { + if let Err(err) = conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);") { + tracing::warn!( + error = %err, + "loomweave writer: post-commit WAL checkpoint(TRUNCATE) failed (harmless; \ + committed frames remain durable)" + ); + } +} + fn reply(ack: Ack, result: Result) { // If the caller dropped the receiver, we discard the result. This is // correct behaviour — the writer is still responsible for its own diff --git a/crates/loomweave-storage/tests/writer_actor.rs b/crates/loomweave-storage/tests/writer_actor.rs index a051f29f..64f65e40 100644 --- a/crates/loomweave-storage/tests/writer_actor.rs +++ b/crates/loomweave-storage/tests/writer_actor.rs @@ -3270,3 +3270,56 @@ async fn channel_close_with_open_run_self_heals_to_failed() { "pending insert must be rolled back when channel closes" ); } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn commit_run_truncates_wal_while_writer_still_alive() { + // clarion-cdee445ed8: ADR-005 commits `.loomweave/loomweave.db`, so a finished + // analyze must leave the on-disk file a whole, committable snapshot WITHOUT + // waiting for the process to exit. `CommitRun` now issues an explicit + // `wal_checkpoint(TRUNCATE)`. We assert the WAL is reset to 0 bytes with the + // writer STILL ALIVE — proving it is the post-commit checkpoint, not SQLite's + // last-connection-close cleanup (which would only fire after the drop below). + // + // Scope note: `CommitRun` is reached only at the end of an analyze run, never + // by serve's summary-write path, so there is no per-write checkpoint cost. And + // while a long-lived serve holds reader connections open the TRUNCATE is + // best-effort (a reader can hold it back, harmlessly); `loomweave db backup` + // remains the way to capture a consistent committable copy mid-serve. + let dir = tempfile::tempdir().unwrap(); + let path = prepared_db(&dir); + let wal_path = dir.path().join("loomweave.db-wal"); + + let (writer, handle) = Writer::spawn(path.clone(), 50, 256).unwrap(); + let tx = writer.sender(); + begin_demo_run(&tx, "run-wal").await; + seed_module_and_functions(&tx).await; + seed_contains_edges_for_demo_functions(&tx).await; + send::<()>(&tx, |ack| WriterCmd::CommitRun { + run_id: "run-wal".into(), + status: RunStatus::Completed, + completed_at: now_iso(), + stats_json: "{}".into(), + ack, + }) + .await + .unwrap(); + + // Writer is STILL ALIVE here (tx/writer not dropped): the only thing that + // could have emptied the WAL is the explicit post-CommitRun checkpoint. + let wal_after_commit = std::fs::metadata(&wal_path).map_or(0, |m| m.len()); + assert_eq!( + wal_after_commit, 0, + "CommitRun must TRUNCATE-checkpoint the WAL to 0 bytes while the writer is \ + still alive, so the committed loomweave.db is whole on disk; got {wal_after_commit}" + ); + + // Clean shutdown still succeeds (and the actor task joins without error). + drop(tx); + drop(writer); + handle.await.unwrap().unwrap(); + let wal_after_shutdown = std::fs::metadata(&wal_path).map_or(0, |m| m.len()); + assert_eq!( + wal_after_shutdown, 0, + "WAL must remain truncated after shutdown; got {wal_after_shutdown}" + ); +} From 032425c851b5eb8f43cb4bf02da57f54cd707f14 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:47:45 +1000 Subject: [PATCH 21/60] =?UTF-8?q?feat(mcp):=20worktree-aware=20staleness?= =?UTF-8?q?=20=E2=80=94=20indexed=5Fat=5Fcommit=20+=20StaleWorktree=20verd?= =?UTF-8?q?ict=20(ADR-045)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit project_status_get reported staleness:"fresh" while the working tree held un-indexed source, so the session-start banner ("index is fresh, ask Loomweave") lied about uncommitted code. Make staleness worktree-aware: - Snapshot gains indexed_at_commit + worktree_dirty; a new Staleness::StaleWorktree verdict fires when an otherwise-fresh index has untracked source on disk. - Detection uses loomweave_core::list_untracked_files — hardened, hash-free `git ls-files --others --exclude-standard`, scoped to ingested source extensions so a scratch notes.txt does not flag (false-positive guard). Fail-soft outside a git work tree. - Surfaced on loomweave://context, project_status_get (worktree_dirty + staleness_note), and the session-start banner with a concrete re-analyze remedy; orientation treats StaleWorktree as stale. - ADR-045 records the maintainer-authorized security boundary: `git status` is forbidden (filter.clean RCE on hashed content; clarion-4b5a8aff54), but ls-files --others is hash-free — proven by the new ls_files_others_does_not_run_clean_filter security test, not reasoning alone. Closes clarion-26c7e52027 and clarion-d9cf8bcfa9. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/hook.rs | 191 +++++++++- crates/loomweave-core/src/hardened_git.rs | 98 ++++++ crates/loomweave-core/src/lib.rs | 2 +- crates/loomweave-mcp/src/snapshot.rs | 325 +++++++++++++++++- crates/loomweave-mcp/src/tools/orientation.rs | 6 +- crates/loomweave-mcp/src/tools/status.rs | 24 ++ crates/loomweave-mcp/tests/storage_tools.rs | 116 +++++++ .../adr/ADR-045-worktree-source-staleness.md | 118 +++++++ docs/loomweave/adr/README.md | 1 + 9 files changed, 865 insertions(+), 16 deletions(-) create mode 100644 docs/loomweave/adr/ADR-045-worktree-source-staleness.md diff --git a/crates/loomweave-cli/src/hook.rs b/crates/loomweave-cli/src/hook.rs index 38d00141..a0adcf08 100644 --- a/crates/loomweave-cli/src/hook.rs +++ b/crates/loomweave-cli/src/hook.rs @@ -154,11 +154,31 @@ fn snapshot_outcome_lines(project_root: &Path, outcome: &SnapshotOutcome) -> Vec } match snapshot.staleness() { Staleness::Fresh => { + // Surface the analyzed commit (when the run recorded one) so the + // "fresh" claim names the commit it reflects — short form for the + // banner; project_status_get carries the full `git_sha`. + let at_commit = snapshot + .indexed_at_commit() + .map(|c| format!(", commit {}", c.chars().take(12).collect::())) + .unwrap_or_default(); lines.push(format!( - "Index is fresh (last analyzed {}). Ask Loomweave before re-exploring \ + "Index is fresh (last analyzed {}{}). Ask Loomweave before re-exploring \ the tree; see the loomweave-workflow skill.", - snapshot.last_analyzed_at().unwrap_or("unknown") + snapshot.last_analyzed_at().unwrap_or("unknown"), + at_commit )); + // Honest caveat (clarion-26c7e52027): freshness compares the mtimes of + // *already-indexed* source files, so brand-new files in a not-yet- + // indexed top-level directory — or any uncommitted additions, which the + // untrusted-corpus git posture cannot safely detect — can sit unseen + // behind a "fresh" verdict. Re-analyze is the remedy. + lines.push( + "Caveat: \"fresh\" reflects already-indexed files only; it will NOT \ + detect brand-new modules in a not-yet-indexed directory. If you just \ + added or moved source, run `loomweave analyze` before relying on \ + graph answers (e.g. \"what calls X\")." + .to_string(), + ); } Staleness::Stale => { lines.push(format!( @@ -167,6 +187,19 @@ fn snapshot_outcome_lines(project_root: &Path, outcome: &SnapshotOutcome) -> Vec project_root.display() )); } + Staleness::StaleWorktree => { + // The ingested files are individually fresh, but the working tree has + // untracked source of an already-indexed type the index has not seen + // (the new-top-level-dir blind spot the mtime passes can't reach; + // clarion-26c7e52027). Concrete, not a caveat — name the remedy. + lines.push(format!( + "Index does NOT reflect the working tree: untracked source files of \ + already-indexed types are present (new modules not yet analyzed). \ + Run `loomweave analyze {}` before relying on graph answers \ + (e.g. \"what calls X\").", + project_root.display() + )); + } Staleness::NeverAnalyzed => { lines.push(format!( "No analysis recorded yet. Run `loomweave analyze {}` to build the index.", @@ -191,3 +224,157 @@ fn snapshot_outcome_lines(project_root: &Path, outcome: &SnapshotOutcome) -> Vec } lines } + +#[cfg(test)] +mod tests { + use super::*; + + use rusqlite::Connection; + + use loomweave_storage::{pragma, schema}; + + /// Build a `Fresh` snapshot for `project_root`: one ingested source file that + /// exists and is older than a completed run. `commit` populates + /// `runs.analyzed_at_commit` (or leaves it NULL). Mirrors the snapshot + /// module's own fixtures; the `TempDir` holding the db is returned so the + /// caller keeps it alive. + fn fresh_snapshot( + project_root: &Path, + commit: Option<&str>, + ) -> (tempfile::TempDir, ProjectSnapshot) { + std::fs::write(project_root.join("a.py"), "x = 1\n").unwrap(); + let db_dir = tempfile::tempdir().unwrap(); + let mut conn = Connection::open(db_dir.path().join("loomweave.db")).unwrap(); + pragma::apply_write_pragmas(&conn).unwrap(); + schema::apply_migrations(&mut conn).unwrap(); + conn.execute( + "INSERT INTO entities \ + (id, plugin_id, kind, name, short_name, properties, source_file_path, created_at, updated_at) \ + VALUES ('python:module:a', 'python', 'module', 'a', 'a', '{}', 'a.py', \ + '2026-01-01T00:00:00.000Z', '2026-01-01T00:00:00.000Z')", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO runs (id, started_at, completed_at, config, stats, status, analyzed_at_commit) \ + VALUES ('r', '2099-01-01T00:00:00.000Z', '2099-01-01T00:00:00.000Z', '{}', '{}', 'completed', ?1)", + rusqlite::params![commit], + ) + .unwrap(); + let snapshot = project_snapshot(&conn, project_root); + assert_eq!( + snapshot.staleness(), + Staleness::Fresh, + "fixture must be Fresh: {snapshot:?}" + ); + (db_dir, snapshot) + } + + #[test] + fn fresh_banner_carries_honest_caveat_and_commit() { + // The bare "fresh ... ask Loomweave before re-exploring" line lied about + // brand-new uncommitted modules (clarion-26c7e52027). The Fresh arm must + // now (a) name the indexed commit and (b) carry the re-analyze caveat. + let root = tempfile::tempdir().unwrap(); + let (_db, snapshot) = fresh_snapshot(root.path(), Some("abc123def4567890")); + let lines = snapshot_outcome_lines(root.path(), &SnapshotOutcome::Ready(snapshot)); + let banner = lines.join("\n"); + + assert!( + banner.contains("Index is fresh"), + "missing fresh line: {banner}" + ); + // Short commit form is surfaced (12 chars), not the full 16-char fixture. + assert!( + banner.contains("commit abc123def456"), + "missing indexed commit: {banner}" + ); + assert!( + banner.contains("loomweave analyze") && banner.contains("brand-new"), + "Fresh banner must disclose the not-yet-indexed blind spot and point at \ + re-analyze: {banner}" + ); + } + + #[test] + fn fresh_banner_omits_commit_clause_when_run_recorded_none() { + // A run analyzed outside a git repo has NULL analyzed_at_commit: the banner + // must not invent a commit clause, but still carries the caveat. + let root = tempfile::tempdir().unwrap(); + let (_db, snapshot) = fresh_snapshot(root.path(), None); + let lines = snapshot_outcome_lines(root.path(), &SnapshotOutcome::Ready(snapshot)); + let banner = lines.join("\n"); + + assert!( + banner.contains("Index is fresh"), + "missing fresh line: {banner}" + ); + assert!( + !banner.contains(", commit "), + "must not fabricate a commit: {banner}" + ); + assert!( + banner.contains("brand-new"), + "caveat must still be present: {banner}" + ); + } + + #[test] + fn stale_worktree_banner_names_untracked_source_and_remedy() { + // In a git work tree, a mtime-fresh index with an untracked module yields + // StaleWorktree (clarion-26c7e52027, ADR-045); the banner must say so + // concretely and point at re-analyze, not the soft Fresh caveat. + use std::process::Command; + let root = tempfile::tempdir().unwrap(); + let git = |args: &[&str]| -> bool { + Command::new("git") + .args(args) + .current_dir(root.path()) + .status() + .is_ok_and(|s| s.success()) + }; + if !git(&["init", "-q"]) { + return; // git unavailable → skip + } + let _ = git(&["config", "user.email", "t@t"]); + let _ = git(&["config", "user.name", "t"]); + std::fs::write(root.path().join("a.py"), "x = 1\n").unwrap(); + git(&["add", "."]); + git(&["commit", "-q", "-m", "init"]); + + let db_dir = tempfile::tempdir().unwrap(); + let mut conn = Connection::open(db_dir.path().join("loomweave.db")).unwrap(); + pragma::apply_write_pragmas(&conn).unwrap(); + schema::apply_migrations(&mut conn).unwrap(); + conn.execute( + "INSERT INTO entities \ + (id, plugin_id, kind, name, short_name, properties, source_file_path, created_at, updated_at) \ + VALUES ('python:module:a', 'python', 'module', 'a', 'a', '{}', 'a.py', \ + '2026-01-01T00:00:00.000Z', '2026-01-01T00:00:00.000Z')", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO runs (id, started_at, completed_at, config, stats, status) \ + VALUES ('r', '2099-01-01T00:00:00.000Z', '2099-01-01T00:00:00.000Z', '{}', '{}', 'completed')", + [], + ) + .unwrap(); + // Brand-new untracked module the index never saw. + std::fs::write(root.path().join("hub.py"), "y = 2\n").unwrap(); + + let snapshot = project_snapshot(&conn, root.path()); + assert_eq!( + snapshot.staleness(), + Staleness::StaleWorktree, + "fixture must be StaleWorktree: {snapshot:?}" + ); + let lines = snapshot_outcome_lines(root.path(), &SnapshotOutcome::Ready(snapshot)); + let banner = lines.join("\n"); + assert!( + banner.contains("does NOT reflect the working tree") + && banner.contains("loomweave analyze"), + "StaleWorktree banner must name the gap and the re-analyze remedy: {banner}" + ); + } +} diff --git a/crates/loomweave-core/src/hardened_git.rs b/crates/loomweave-core/src/hardened_git.rs index 729d437e..9c0068ac 100644 --- a/crates/loomweave-core/src/hardened_git.rs +++ b/crates/loomweave-core/src/hardened_git.rs @@ -161,6 +161,41 @@ pub fn hardened_git_command(repo_root: &Path) -> Command { command } +/// List untracked, non-ignored files in `repo_root`, hardened for an untrusted +/// corpus (clarion-d9cf8bcfa9; ADR-045). +/// +/// Uses `git ls-files --others --exclude-standard -z`: it enumerates worktree +/// paths Git is not tracking and that `.gitignore`/exclude rules do not cover, +/// **without hashing working-tree content**. That distinction is load-bearing — +/// `git status` must hash to report modifications, which runs a repo-controlled +/// `filter..clean` (the one residual the module docs describe, via +/// `$GIT_DIR/info/attributes`); listing untracked paths never hashes, so that +/// filter is never invoked. Verified by the +/// `ls_files_others_does_not_run_clean_filter` test in this module. +/// +/// `-z` is NUL-delimited, so paths containing newlines or other special bytes +/// are unambiguous (no C-quoting to decode). Fail-soft like the crate's other +/// corpus git probes: returns `None` when git is unavailable, `repo_root` is not +/// a work tree, or the command fails — never an error. An empty `Vec` means "a +/// git repo with no untracked files". +#[must_use] +pub fn list_untracked_files(repo_root: &Path) -> Option> { + let out = hardened_git_command(repo_root) + .args(["ls-files", "--others", "--exclude-standard", "-z"]) + .output() + .ok()?; + if !out.status.success() { + return None; + } + Some( + out.stdout + .split(|&b| b == 0) + .filter(|segment| !segment.is_empty()) + .map(|segment| String::from_utf8_lossy(segment).into_owned()) + .collect(), + ) +} + #[cfg(test)] mod tests { use super::*; @@ -229,4 +264,67 @@ mod tests { ); assert_eq!(parse_git_version("garbage"), None); } + + #[test] + fn ls_files_others_does_not_run_clean_filter() { + // The one corpus-controlled code-exec vector hardened_git CANNOT disable by + // config is `$GIT_DIR/info/attributes` naming a `filter`, whose `.clean` + // runs only when git HASHES working-tree content. `list_untracked_files` + // uses `ls-files --others`, which lists paths and never hashes — so the + // filter must never fire. Prove it empirically (ADR-045, clarion-d9cf8bcfa9): + // a booby-trapped repo whose clean filter would create a marker must leave + // NO marker after the call, while still returning the untracked file. + let dir = tempfile::tempdir().unwrap(); + let repo = dir.path(); + + // Skip cleanly if git is unavailable on the test host. + let Ok(init) = Command::new("git") + .args(["init", "-q"]) + .current_dir(repo) + .status() + else { + return; + }; + if !init.success() { + return; + } + // git refuses commands without an identity in some environments; not needed + // here (no commit), but set repo-local config defensively. + let _ = Command::new("git") + .args(["config", "user.email", "t@t"]) + .current_dir(repo) + .status(); + + // Booby-trap: an in-`.git` attribute selects a clean filter (the residual + // source --attr-source cannot neutralize), and a repo-local config defines + // that filter to create PWNED if ever invoked. Repo-local config + in-git + // attributes are exactly what an untrusted corpus controls. + std::fs::create_dir_all(repo.join(".git/info")).unwrap(); + std::fs::write(repo.join(".git/info/attributes"), "* filter=pwn\n").unwrap(); + let marker = repo.join("PWNED"); + Command::new("git") + .args([ + "config", + "filter.pwn.clean", + &format!("sh -c 'touch \"{}\"'", marker.display()), + ]) + .current_dir(repo) + .status() + .unwrap(); + + // An untracked file matching the `*` filter attribute. If anything hashed + // it, the clean filter would run and create the marker. + std::fs::write(repo.join("evil.py"), "x = 1\n").unwrap(); + + let untracked = list_untracked_files(repo).expect("ls-files must succeed in a git repo"); + assert!( + untracked.iter().any(|p| p == "evil.py"), + "the untracked file must be listed: {untracked:?}" + ); + assert!( + !marker.exists(), + "ls-files --others must NOT hash working-tree content, so the corpus \ + clean filter must never run (no PWNED marker)" + ); + } } diff --git a/crates/loomweave-core/src/lib.rs b/crates/loomweave-core/src/lib.rs index d30418b1..6f2aae28 100644 --- a/crates/loomweave-core/src/lib.rs +++ b/crates/loomweave-core/src/lib.rs @@ -19,7 +19,7 @@ pub use embedding_provider::{ }; pub use entity_id::{EntityId, EntityIdError, entity_id}; pub use errors::{HttpErrorCode, McpErrorCode}; -pub use hardened_git::hardened_git_command; +pub use hardened_git::{hardened_git_command, list_untracked_files}; pub use llm_provider::{ CachingModel, ClaudeCliProvider, ClaudeCliProviderConfig, CodexCliProvider, CodexCliProviderConfig, INFERRED_CALLS_PROMPT_VERSION, InferredCallsPromptInput, diff --git a/crates/loomweave-mcp/src/snapshot.rs b/crates/loomweave-mcp/src/snapshot.rs index bc58a9ba..530c741a 100644 --- a/crates/loomweave-mcp/src/snapshot.rs +++ b/crates/loomweave-mcp/src/snapshot.rs @@ -58,6 +58,18 @@ pub enum Staleness { /// modification scan and the unwatched-project-root caveat in the /// type-level note. Fresh, + /// The mtime/structural passes found every ingested file fresh, but the + /// working tree contains untracked source of an already-indexed file type + /// that the index has never seen — e.g. a brand-new top-level module the + /// structural pass cannot reach (the unwatched-project-root blind spot; + /// clarion-26c7e52027). Detected via a hardened, ignore-aware + /// `git ls-files --others` scoped to ingested extensions (ADR-045); the raw + /// signal is on [`ProjectSnapshot::worktree_dirty`]. Returned in place of + /// [`Fresh`] only when that worktree signal is positive — so it never fires + /// outside a git work tree, and a non-source untracked file never triggers it. + /// + /// [`Fresh`]: Staleness::Fresh + StaleWorktree, /// A completed run exists, but no ingested entity has a resolvable /// `source_file_path` to stat — there is *nothing to compare against*, so /// freshness is neither Fresh nor Stale. A normal outcome (e.g. a project @@ -94,6 +106,29 @@ pub struct ProjectSnapshot { staleness: Staleness, /// Latest run `completed_at` (ISO-8601) if any, else `None`. last_analyzed_at: Option, + /// The git commit HEAD pointed at when the latest completed run was analyzed + /// (`runs.analyzed_at_commit`), if Loomweave captured one — `None` for a run + /// analyzed outside a git work tree, or before WS9 began recording it. + /// Surfaced so the `loomweave://context` resource and the session-start + /// banner can state *which commit* the index reflects. It is descriptive, not + /// a freshness signal: a [`Staleness::Fresh`] verdict is only ever fresh + /// relative to the ingested source files' mtimes — never a claim that HEAD or + /// the working tree still matches this commit (clarion-26c7e52027). The + /// `project_status_get` tool already reports the same value as `git_sha`. + /// + /// [`Fresh`]: Staleness::Fresh + indexed_at_commit: Option, + /// Whether the working tree holds untracked source of an already-indexed file + /// type that the index does not reflect — the signal behind + /// [`Staleness::StaleWorktree`] (clarion-26c7e52027, ADR-045). `Some(true)` = + /// un-indexed source present; `Some(false)` = a git work tree with none; + /// `None` = not a git work tree, git unavailable, or nothing ingested to scope + /// against (the check is moot). Computed via a hardened, hash-free + /// `git ls-files --others --exclude-standard` filtered to ingested file + /// extensions, so an untracked non-source file (a scratch `notes.txt`) never + /// flags it, and the untrusted-corpus posture is preserved (no working-tree + /// hashing — see [`loomweave_core::list_untracked_files`]). + worktree_dirty: Option, /// `true` when this snapshot was produced from a *failure* rather than a /// healthy read: at least one backing SQL query failed unexpectedly and was /// folded to a safe default (a count to `0`, the run lookup to `None`, or @@ -161,6 +196,21 @@ impl ProjectSnapshot { self.last_analyzed_at.as_deref() } + /// The commit the latest completed run was analyzed at, if captured — see the + /// field note. `None` when never analyzed, analyzed outside a git repo, or the + /// `analyzed_at_commit` column is NULL. + #[must_use] + pub fn indexed_at_commit(&self) -> Option<&str> { + self.indexed_at_commit.as_deref() + } + + /// Whether the working tree holds untracked source the index has not seen — + /// see the field note. `None` outside a git work tree / with nothing ingested. + #[must_use] + pub fn worktree_dirty(&self) -> Option { + self.worktree_dirty + } + /// `true` when this snapshot was folded from a backing-query failure — see /// the field-level note for the precise contract. #[must_use] @@ -196,9 +246,9 @@ pub fn project_snapshot(conn: &Connection, project_root: &Path) -> ProjectSnapsh ); let finding_count = scalar_count(conn, "SELECT COUNT(*) FROM findings", &mut degraded); - let last_analyzed_at = latest_completed_run(conn, &mut degraded); + let (last_analyzed_at, indexed_at_commit) = latest_completed_run(conn, &mut degraded); let mut scan_truncated = false; - let staleness = compute_staleness( + let mut staleness = compute_staleness( conn, project_root, last_analyzed_at.as_deref(), @@ -206,6 +256,17 @@ pub fn project_snapshot(conn: &Connection, project_root: &Path) -> ProjectSnapsh &mut scan_truncated, ); + // Worktree-source detection (clarion-26c7e52027, ADR-045): the mtime/structural + // passes cannot see un-indexed source in a brand-new top-level directory (the + // unwatched-project-root blind spot), so a hardened, ignore-aware + // `git ls-files --others` scoped to ingested extensions catches it. Best-effort + // and never degrades — `None` outside a git work tree. When the index is + // otherwise Fresh but such source exists, the honest verdict is StaleWorktree. + let worktree_dirty = compute_worktree_dirty(conn, project_root); + if staleness == Staleness::Fresh && worktree_dirty == Some(true) { + staleness = Staleness::StaleWorktree; + } + ProjectSnapshot { db_present: true, entity_count, @@ -213,6 +274,8 @@ pub fn project_snapshot(conn: &Connection, project_root: &Path) -> ProjectSnapsh finding_count, staleness, last_analyzed_at, + indexed_at_commit, + worktree_dirty, degraded, scan_truncated, } @@ -228,6 +291,8 @@ pub fn missing_db_snapshot() -> ProjectSnapshot { finding_count: 0, staleness: Staleness::NeverAnalyzed, last_analyzed_at: None, + indexed_at_commit: None, + worktree_dirty: None, degraded: false, scan_truncated: false, } @@ -249,11 +314,62 @@ pub fn unreadable_db_snapshot() -> ProjectSnapshot { finding_count: 0, staleness: Staleness::Unknown, last_analyzed_at: None, + indexed_at_commit: None, + worktree_dirty: None, degraded: true, scan_truncated: false, } } +/// Whether the working tree holds untracked source of an already-indexed file +/// type — the [`ProjectSnapshot::worktree_dirty`] signal (clarion-26c7e52027, +/// ADR-045). Fail-soft: `None` when nothing is ingested (no extensions to scope +/// against, so the check is moot), the project is not a git work tree, or git is +/// unavailable. Never sets `degraded` — a missing git binary is environmental, +/// not a DB-machinery failure. +/// +/// Scoping to ingested extensions is what keeps this honest: a hardened +/// `git ls-files --others --exclude-standard` lists every untracked, non-ignored +/// path, but only those whose extension Loomweave actually ingests count — so an +/// untracked `notes.txt` never flags a fresh index dirty, while an untracked +/// `hub.py` (the dogfood scenario) does. +fn compute_worktree_dirty(conn: &Connection, project_root: &Path) -> Option { + let exts = ingested_source_extensions(conn); + if exts.is_empty() { + return None; + } + let untracked = loomweave_core::list_untracked_files(project_root)?; + Some(untracked.iter().any(|rel| { + Path::new(rel) + .extension() + .and_then(|ext| ext.to_str()) + .is_some_and(|ext| exts.contains(ext)) + })) +} + +/// The distinct file extensions among ingested `source_file_path`s (lowercased by +/// nothing — git and the filesystem are case-sensitive on the platforms we +/// target). Fail-soft to an empty set on any query error, which makes +/// [`compute_worktree_dirty`] return `None` (treat the scope as unknown). +fn ingested_source_extensions(conn: &Connection) -> BTreeSet { + let mut exts = BTreeSet::new(); + let Ok(mut stmt) = conn.prepare( + "SELECT DISTINCT source_file_path FROM entities \ + WHERE source_file_path IS NOT NULL", + ) else { + return exts; + }; + let Ok(rows) = stmt.query_map([], |row| row.get::<_, String>(0)) else { + return exts; + }; + for rel in rows.flatten() { + if let Some(ext) = Path::new(&rel).extension().and_then(|ext| ext.to_str()) { + exts.insert(ext.to_owned()); + } + } + exts +} + /// Run a scalar `COUNT(*)` query. On failure, log, fold to `0`, and set /// `*degraded` so the caller can mark the whole snapshot as a degraded read. fn scalar_count(conn: &Connection, sql: &str, degraded: &mut bool) -> i64 { @@ -267,23 +383,29 @@ fn scalar_count(conn: &Connection, sql: &str, degraded: &mut bool) -> i64 { } } -/// Look up the latest completed run's `completed_at`. `QueryReturnedNoRows` is a -/// normal "never analyzed" outcome and does *not* degrade; any other error is a -/// machinery failure that folds to `None` and sets `*degraded`. -fn latest_completed_run(conn: &Connection, degraded: &mut bool) -> Option { +/// Look up the latest completed run's `completed_at` and `analyzed_at_commit`. +/// `QueryReturnedNoRows` is a normal "never analyzed" outcome and does *not* +/// degrade; any other error is a machinery failure that folds to `(None, None)` +/// and sets `*degraded`. `analyzed_at_commit` is independently nullable (a run +/// analyzed outside a git work tree), so it is `None` even on the happy path when +/// the column was never populated. +fn latest_completed_run( + conn: &Connection, + degraded: &mut bool, +) -> (Option, Option) { match conn.query_row( - "SELECT completed_at FROM runs \ + "SELECT completed_at, analyzed_at_commit FROM runs \ WHERE completed_at IS NOT NULL AND status = 'completed' \ ORDER BY completed_at DESC LIMIT 1", [], - |row| row.get::<_, String>(0), + |row| Ok((row.get::<_, String>(0)?, row.get::<_, Option>(1)?)), ) { - Ok(s) => Some(s), - Err(rusqlite::Error::QueryReturnedNoRows) => None, + Ok((completed_at, analyzed_at_commit)) => (Some(completed_at), analyzed_at_commit), + Err(rusqlite::Error::QueryReturnedNoRows) => (None, None), Err(err) => { tracing::warn!(error = %err, "loomweave latest-completed-run query failed"); *degraded = true; - None + (None, None) } } } @@ -855,4 +977,185 @@ mod tests { let json = serde_json::to_value(&snap).unwrap(); assert_eq!(json["degraded"], serde_json::Value::Bool(false)); } + + #[test] + fn indexed_at_commit_is_surfaced_and_serialized_when_the_run_recorded_one() { + // `project_status_get` already reports the analyzed commit as `git_sha`; + // the snapshot (loomweave://context + the session-start banner) must carry + // the same value so a Fresh verdict can name the commit it reflects + // (clarion-26c7e52027). + let (_dir, conn) = migrated_conn(); + let dir = tempfile::tempdir().unwrap(); + std::fs::write(dir.path().join("a.py"), "x = 1\n").unwrap(); + insert_entity(&conn, "python:module:a", "module", Some("a.py")); + conn.execute( + "INSERT INTO runs (id, started_at, completed_at, config, stats, status, analyzed_at_commit) \ + VALUES ('r', '2099-01-01T00:00:00.000Z', '2099-01-01T00:00:00.000Z', '{}', '{}', 'completed', 'abc123def456')", + [], + ) + .unwrap(); + + let snap = project_snapshot(&conn, dir.path()); + assert_eq!(snap.indexed_at_commit(), Some("abc123def456"), "{snap:?}"); + let json = serde_json::to_value(&snap).unwrap(); + assert_eq!( + json["indexed_at_commit"], + serde_json::Value::String("abc123def456".into()) + ); + } + + #[test] + fn indexed_at_commit_is_none_when_run_analyzed_outside_a_git_repo() { + // `analyzed_at_commit` is independently nullable: a run outside a git work + // tree records NULL, and the snapshot must report None — never a fabricated + // or empty commit. + let (_dir, conn) = migrated_conn(); + insert_entity(&conn, "python:module:a", "module", Some("a.py")); + conn.execute( + "INSERT INTO runs (id, started_at, completed_at, config, stats, status) \ + VALUES ('r', '2026-01-01T00:00:00.000Z', '2026-01-02T00:00:00.000Z', '{}', '{}', 'completed')", + [], + ) + .unwrap(); + let snap = project_snapshot(&conn, std::path::Path::new("/tmp")); + assert_eq!(snap.indexed_at_commit(), None, "{snap:?}"); + } + + #[test] + fn new_top_level_directory_is_a_known_fresh_blind_spot() { + // Documents (and regression-locks) the conservative-nudge limitation the + // honest banner now discloses (clarion-26c7e52027). The watch set is the + // *direct parents of ingested files* and the project root is deliberately + // unwatched, so a brand-new top-level directory full of never-ingested + // source is invisible to BOTH the structural-drift and per-file passes — + // the verdict stays Fresh. This is the exact dogfood scenario (new + // specimen modules read as "fresh"). Detecting it would need working-tree + // git, which the untrusted-corpus posture (hardened_git) blocks; until + // then the banner tells the agent to re-analyze after adding modules. + use super::parse_iso8601_to_systemtime; + let (_dir, conn) = migrated_conn(); + let root = tempfile::tempdir().unwrap(); + let pkg = root.path().join("pkg"); + std::fs::create_dir(&pkg).unwrap(); + let a = pkg.join("a.py"); + std::fs::write(&a, "x = 1\n").unwrap(); + + let run_iso = "2026-06-15T00:00:00.000Z"; + let run_time = parse_iso8601_to_systemtime(run_iso).unwrap(); + let day = std::time::Duration::from_secs(86_400); + set_mtime(&a, run_time - day); // ingested file untouched since the run + set_mtime(&pkg, run_time - day); // its watched parent untouched too + + insert_entity(&conn, "python:module:pkg.a", "module", Some("pkg/a.py")); + conn.execute( + "INSERT INTO runs (id, started_at, completed_at, config, stats, status) \ + VALUES ('r', ?1, ?1, '{}', '{}', 'completed')", + rusqlite::params![run_iso], + ) + .unwrap(); + + // Add a brand-new top-level package AFTER the run. Its parent is the + // (unwatched) project root, so nothing in the watch set changed. + let newpkg = root.path().join("newpkg"); + std::fs::create_dir(&newpkg).unwrap(); + let hub = newpkg.join("hub.py"); + std::fs::write(&hub, "y = 2\n").unwrap(); + set_mtime(&hub, run_time + day); + + let snap = project_snapshot(&conn, root.path()); + // The mtime/structural passes can't see the new top-level dir, AND this + // tempdir is not a git work tree, so the worktree-source check returns + // None (nothing to detect with). Verdict stays Fresh — the mtime blind + // spot the banner caveat covers. In a GIT repo this same scenario flips to + // StaleWorktree (see `untracked_source_in_git_repo_reports_stale_worktree`). + assert_eq!(snap.staleness, Staleness::Fresh, "{snap:?}"); + assert_eq!( + snap.worktree_dirty, None, + "outside a git work tree, worktree_dirty must be None: {snap:?}" + ); + } + + /// `git init` + commit `files` in `root`; returns `false` (caller skips) if + /// git is unavailable on the host. Committing keeps the seeded source OUT of + /// the untracked set so a clean baseline really is clean. + fn git_init_with_committed(root: &std::path::Path, files: &[(&str, &str)]) -> bool { + use std::process::Command; + let run = |args: &[&str]| -> bool { + Command::new("git") + .args(args) + .current_dir(root) + .status() + .is_ok_and(|s| s.success()) + }; + if !run(&["init", "-q"]) { + return false; + } + let _ = run(&["config", "user.email", "t@t"]); + let _ = run(&["config", "user.name", "t"]); + for (name, body) in files { + std::fs::write(root.join(name), body).unwrap(); + } + if !files.is_empty() { + run(&["add", "."]); + run(&["commit", "-q", "-m", "init"]); + } + true + } + + #[test] + fn untracked_source_in_git_repo_reports_stale_worktree() { + // The dogfood scenario (clarion-26c7e52027, ADR-045): an index that is + // mtime-fresh but with a brand-new untracked source module the structural + // pass cannot reach. In a git work tree the hardened `ls-files --others` + // check (scoped to ingested `.py`) catches it and the verdict is honest: + // StaleWorktree, with worktree_dirty = Some(true). + let (_dir, conn) = migrated_conn(); + let root = tempfile::tempdir().unwrap(); + if !git_init_with_committed(root.path(), &[("demo.py", "x = 1\n")]) { + return; // git unavailable on host → skip (mechanism covered in core) + } + insert_entity(&conn, "python:module:demo", "module", Some("demo.py")); + // Far-future run → every ingested file is mtime-fresh. + conn.execute( + "INSERT INTO runs (id, started_at, completed_at, config, stats, status) \ + VALUES ('r', '2099-01-01T00:00:00.000Z', '2099-01-01T00:00:00.000Z', '{}', '{}', 'completed')", + [], + ) + .unwrap(); + // Brand-new untracked source the index never saw. + std::fs::write(root.path().join("hub.py"), "y = 2\n").unwrap(); + + let snap = project_snapshot(&conn, root.path()); + assert_eq!(snap.staleness, Staleness::StaleWorktree, "{snap:?}"); + assert_eq!(snap.worktree_dirty, Some(true), "{snap:?}"); + } + + #[test] + fn untracked_non_source_in_git_repo_stays_fresh() { + // False-positive guard: an untracked file whose extension Loomweave does + // not ingest (a scratch notes.txt) must NOT flag the index dirty. The + // extension scoping is what keeps the signal honest. + let (_dir, conn) = migrated_conn(); + let root = tempfile::tempdir().unwrap(); + if !git_init_with_committed(root.path(), &[("demo.py", "x = 1\n")]) { + return; + } + insert_entity(&conn, "python:module:demo", "module", Some("demo.py")); + conn.execute( + "INSERT INTO runs (id, started_at, completed_at, config, stats, status) \ + VALUES ('r', '2099-01-01T00:00:00.000Z', '2099-01-01T00:00:00.000Z', '{}', '{}', 'completed')", + [], + ) + .unwrap(); + // Untracked, but NOT a source extension the index uses. + std::fs::write(root.path().join("notes.txt"), "scratch\n").unwrap(); + + let snap = project_snapshot(&conn, root.path()); + assert_eq!( + snap.staleness, + Staleness::Fresh, + "an untracked non-source file must not flip the verdict: {snap:?}" + ); + assert_eq!(snap.worktree_dirty, Some(false), "{snap:?}"); + } } diff --git a/crates/loomweave-mcp/src/tools/orientation.rs b/crates/loomweave-mcp/src/tools/orientation.rs index d7a4f0ff..39d51ee2 100644 --- a/crates/loomweave-mcp/src/tools/orientation.rs +++ b/crates/loomweave-mcp/src/tools/orientation.rs @@ -92,8 +92,10 @@ impl ServerState { "degraded": snapshot.degraded(), "scan_truncated": snapshot.scan_truncated(), }); - let staleness_stale = - matches!(snapshot.staleness(), crate::snapshot::Staleness::Stale); + let staleness_stale = matches!( + snapshot.staleness(), + crate::snapshot::Staleness::Stale | crate::snapshot::Staleness::StaleWorktree + ); // Whether this index has any alive SEI bindings (REQ-C-04 / // ADR-038). Degrades to `false` on a pre-SEI database. let sei_populated = has_any_alive_binding(conn).unwrap_or(false); diff --git a/crates/loomweave-mcp/src/tools/status.rs b/crates/loomweave-mcp/src/tools/status.rs index 5df6669f..888e8f32 100644 --- a/crates/loomweave-mcp/src/tools/status.rs +++ b/crates/loomweave-mcp/src/tools/status.rs @@ -252,6 +252,28 @@ impl ServerState { ); } + // Disclose what a `fresh` verdict does NOT cover, on the named tool an + // agent reads directly — not just in the session-start banner + // (clarion-26c7e52027). `fresh` compares already-indexed files' mtimes; a + // brand-new module in a not-yet-indexed top-level directory, or any + // uncommitted addition (undetectable on an untrusted corpus), can sit + // unseen behind it. `index_diff_get` reports committed/staged drift in + // detail (it shares the untracked blind spot); re-analyze is the remedy. + let staleness_note = match snapshot.staleness() { + crate::snapshot::Staleness::Fresh => Some( + "\"fresh\" reflects already-indexed source files only; it does NOT detect \ + brand-new modules in a not-yet-indexed directory, nor uncommitted \ + additions. If source was added or moved since the last analyze, re-run \ + `loomweave analyze`. Use index_diff_get for committed/staged drift detail.", + ), + crate::snapshot::Staleness::StaleWorktree => Some( + "the working tree has untracked source files of already-indexed types that \ + the index has not seen (new modules not yet analyzed; see worktree_dirty). \ + Re-run `loomweave analyze` before relying on graph answers.", + ), + _ => None, + }; + let result = json!({ "project_root": root_display, "db_path": db_path.display().to_string(), @@ -269,6 +291,8 @@ impl ServerState { "briefing_blocked": briefing_blocked, }, "staleness": serde_json::to_value(snapshot.staleness()).unwrap_or(Value::Null), + "staleness_note": staleness_note, + "worktree_dirty": snapshot.worktree_dirty(), "scan_truncated": snapshot.scan_truncated(), "last_analyzed_at": snapshot.last_analyzed_at(), "git_sha": analyzed_git_sha, diff --git a/crates/loomweave-mcp/tests/storage_tools.rs b/crates/loomweave-mcp/tests/storage_tools.rs index f9040659..a9eb3b9f 100644 --- a/crates/loomweave-mcp/tests/storage_tools.rs +++ b/crates/loomweave-mcp/tests/storage_tools.rs @@ -4828,6 +4828,122 @@ async fn project_status_reports_counts_latest_run_and_plugins() { assert_eq!(result["filigree"], Value::Null); } +#[tokio::test] +async fn project_status_fresh_carries_staleness_note_caveat() { + // The named tool an agent reads directly must disclose what "fresh" omits — + // not only the session-start banner (clarion-26c7e52027). The seeded demo.py + // is older than a far-future run, so the verdict is Fresh. + let (project, db_path) = open_project(); + let conn = Connection::open(&db_path).expect("open sqlite"); + insert_run( + &conn, + "run-fresh", + "2099-01-01T00:00:00.000Z", + "completed", + Some("2099-01-01T00:00:00.000Z"), + ); + drop(conn); + + let state = state_for(project.path(), &db_path); + let result = call_tool(&state, "project_status", json!({})).await["result"].clone(); + assert_eq!( + result["staleness"], "fresh", + "fixture must be fresh: {result}" + ); + let note = result["staleness_note"] + .as_str() + .expect("a fresh verdict must carry a staleness_note"); + assert!( + note.contains("loomweave analyze") && note.contains("not-yet-indexed"), + "staleness_note must disclose the not-yet-indexed gap and the re-analyze \ + remedy: {note}" + ); +} + +#[tokio::test] +async fn project_status_non_fresh_has_null_staleness_note() { + // A non-fresh verdict has no "fresh" claim to qualify, so the note is omitted. + // The seeded demo.py was just written (mtime ~now), so a past-dated run makes + // the source newer than the run → Stale, deterministically. + let (project, db_path) = open_project(); + let conn = Connection::open(&db_path).expect("open sqlite"); + insert_run( + &conn, + "run-1", + "2026-02-02T00:00:00.000Z", + "completed", + Some("2026-02-02T00:00:00.000Z"), + ); + drop(conn); + + let state = state_for(project.path(), &db_path); + let result = call_tool(&state, "project_status", json!({})).await["result"].clone(); + assert_ne!( + result["staleness"], "fresh", + "fixture must NOT be fresh: {result}" + ); + assert_eq!( + result["staleness_note"], + Value::Null, + "a non-fresh verdict must omit the staleness_note: {result}" + ); +} + +#[tokio::test] +async fn project_status_reports_stale_worktree_for_untracked_source() { + // The exact tool the dogfood report quoted (clarion-26c7e52027, ADR-045): a + // mtime-fresh index in a git work tree that has a brand-new untracked module. + // project_status_get must report staleness="stale_worktree" + worktree_dirty + // = true, not a misleading bare "fresh". + let (project, db_path) = open_project(); + + // Make the project a git repo and commit everything seeded so far, so only + // the new module below is untracked. Skip cleanly if git is unavailable. + let git = |args: &[&str]| -> bool { + std::process::Command::new("git") + .args(args) + .current_dir(project.path()) + .status() + .is_ok_and(|s| s.success()) + }; + if !git(&["init", "-q"]) { + return; + } + let _ = git(&["config", "user.email", "t@t"]); + let _ = git(&["config", "user.name", "t"]); + git(&["add", "."]); + git(&["commit", "-q", "-m", "init"]); + + let conn = Connection::open(&db_path).expect("open sqlite"); + insert_run( + &conn, + "run-fresh", + "2099-01-01T00:00:00.000Z", + "completed", + Some("2099-01-01T00:00:00.000Z"), + ); + drop(conn); + // Brand-new untracked Python module the index never saw. + std::fs::write(project.path().join("hub.py"), "y = 2\n").expect("write untracked module"); + + let state = state_for(project.path(), &db_path); + let result = call_tool(&state, "project_status", json!({})).await["result"].clone(); + assert_eq!( + result["staleness"], "stale_worktree", + "untracked source must yield stale_worktree: {result}" + ); + assert_eq!( + result["worktree_dirty"], true, + "worktree_dirty must be true: {result}" + ); + assert!( + result["staleness_note"] + .as_str() + .is_some_and(|n| n.contains("loomweave analyze")), + "stale_worktree must carry a re-analyze note: {result}" + ); +} + #[tokio::test] async fn project_status_marks_skipped_no_plugins_run() { // AC#2: a skipped_no_plugins run is unmistakable as no index refresh. diff --git a/docs/loomweave/adr/ADR-045-worktree-source-staleness.md b/docs/loomweave/adr/ADR-045-worktree-source-staleness.md new file mode 100644 index 00000000..900ec0c8 --- /dev/null +++ b/docs/loomweave/adr/ADR-045-worktree-source-staleness.md @@ -0,0 +1,118 @@ +# ADR-045: Worktree-Source Staleness via Hardened `git ls-files --others` + +**Status**: Accepted +**Date**: 2026-06-06 +**Deciders**: qacona@gmail.com +**Context**: clarion-26c7e52027 (dogfood: `staleness:"fresh"` lied while +un-indexed top-level modules sat in the working tree) and its follow-up +clarion-d9cf8bcfa9. Builds on ADR-013/ADR-021 (untrusted-corpus posture) and the +`hardened_git` helper (clarion-4b5a8aff54). + +## Summary + +`project_snapshot` (the `loomweave://context` resource, the `loomweave hook +session-start` banner, and `project_status_get`) now reports a third "needs +re-analyze" signal beyond the mtime/structural passes: **worktree-source +drift**. When the index is otherwise mtime-fresh but the working tree contains an +**untracked source file of an already-indexed type**, the verdict becomes +`Staleness::StaleWorktree` and the snapshot carries `worktree_dirty: Some(true)`. + +Detection uses a **hardened, ignore-aware, hash-free** `git ls-files --others +--exclude-standard`, scoped to the file extensions Loomweave has actually +ingested. It is fail-soft: `worktree_dirty` is `None` outside a git work tree, +when git is unavailable, or when nothing is ingested. + +## Context + +The mtime/structural freshness passes (ADR note in `snapshot.rs`) watch the +*direct parent directories of ingested files*, and deliberately never watch the +project root (`analyze` writes `.loomweave/` under it, which would wedge every +check to a permanent `Stale`). The documented consequence: a brand-new +**top-level** directory of source the index has never seen is invisible — it +reports `Fresh`. That is the exact dogfood failure: new specimen modules added to +a tree, `project_status_get` still says `fresh`, and an agent trusts it and gives +wrong "what calls X" answers. + +Catching un-indexed worktree source requires looking at the working tree. The +untrusted-corpus posture forbids the obvious tool: `git status` must **hash** +working-tree content to detect modifications, which runs a repo-controlled +`filter..clean` selected by `$GIT_DIR/info/attributes` — a code-execution +vector no git config can disable (see `hardened_git` module docs). That is why +the SEI rename diff and `index_diff` use `git diff --cached` and a stat-based +per-file scan, never `git status`. + +## Decision + +Use `git ls-files --others --exclude-standard` through `hardened_git_command`, +exposed as `loomweave_core::list_untracked_files`. + +1. **Safe under the untrusted-corpus posture.** `ls-files --others` *enumerates* + untracked, non-ignored paths; it never computes blob hashes of working-tree + content, so the `filter.clean` vector is never triggered. This is verified + empirically, not by reasoning alone: `hardened_git::tests:: + ls_files_others_does_not_run_clean_filter` booby-traps a repo with `* filter=pwn` + in `$GIT_DIR/info/attributes` and a repo-local `filter.pwn.clean` that would + create a marker file, then asserts the marker does **not** appear after the + call. The hardened command also sets `core.fsmonitor=false` and + `GIT_OPTIONAL_LOCKS=0`, so no fsmonitor program runs and the index is not + written. + +2. **No false-positives.** A naive "any untracked file ⇒ dirty" would flag a + scratch `notes.txt` and make a genuinely-fresh index look dirty. The signal is + therefore **scoped to the file extensions present in `entities.source_file_path`** + — only an untracked file whose extension Loomweave actually ingests counts. An + untracked `notes.txt` never flags; an untracked `hub.py` (when `.py` is + indexed) does. `--exclude-standard` further drops `.gitignore`d paths, so + build dirs, virtualenvs, and the ignored `.loomweave/` sidecars never appear. + +3. **Verdict + field.** When mtime/structural say `Fresh` and the worktree signal + is positive, the verdict is `Staleness::StaleWorktree` (serialized + `"stale_worktree"`); `worktree_dirty: Option` carries the raw signal on + every snapshot and in `project_status_get`. `StaleWorktree` is treated as + "stale" by orientation consumers; the session-start banner names the remedy + (`loomweave analyze`). + +4. **Fail-soft.** Any git failure, a non-repo working directory, or an empty + ingested-extension set yields `worktree_dirty: None` and leaves the + mtime-derived verdict unchanged. Detection never sets `degraded` (a missing + git binary is environmental) and never errors — `project_snapshot` stays + infallible. + +### What it does NOT cover (deliberate scope) + +- **Untracked-source enumeration runs git at session start.** It is hash-free and + ignore-pruned (comparable to `git status` minus hashing), and fail-soft, but it + is the first git invocation in the session-start snapshot path. Accepted for the + honesty win. +- **Modified-but-unstaged edits to *tracked* indexed files** remain the job of the + stat-based mtime pass (→ `Stale`) and `index_diff_get`'s `diff --cached`; they + are not what `ls-files --others` reports. +- **Mid-serve committable snapshots** are still `loomweave db backup`'s job + (ADR-005 note; clarion-cdee445ed8), unrelated to this verdict. + +## Consequences + +- `Staleness` gains a `StaleWorktree` variant — a wire-vocabulary addition to + `loomweave://context` and `project_status_get` (`"stale_worktree"`). Consumers + that switch on `staleness` must handle it; `orientation` treats it as stale. +- `ProjectSnapshot` gains `worktree_dirty: Option`, surfaced on the context + resource and `project_status_get`. +- `loomweave_core` gains `list_untracked_files`, the only sanctioned untracked + probe, carrying the security contract + the empirical test. +- The session-start banner gives a concrete `StaleWorktree` line instead of only + the `Fresh` caveat when un-indexed source is present in a git repo. + +## Alternatives Considered + +- **`git status --porcelain`** — rejected: hashes the working tree, re-opening the + `filter.clean` RCE the whole `hardened_git` posture exists to close. +- **commit-mismatch (`rev-parse HEAD` vs `analyzed_at_commit`) and/or + `diff --cached` only** — rejected as the *sole* signal: both report "clean" for + the reported untracked-new-file case, i.e. misleadingly-clean — the original bug + wearing a new field name. +- **Watching the project root's mtime** — rejected: the root is poisoned by + `.loomweave/` writes and by unrelated top-level churn (editor temp files, + `.DS_Store`), trading a false-negative for frequent false-positives. +- **Prose-only honest banner (no detection)** — shipped first as the conservative + mitigation (clarion-26c7e52027) and retained for the non-git case; this ADR adds + real detection where a git work tree makes it safe and accurate. diff --git a/docs/loomweave/adr/README.md b/docs/loomweave/adr/README.md index 6d098a1c..f2c16778 100644 --- a/docs/loomweave/adr/README.md +++ b/docs/loomweave/adr/README.md @@ -45,6 +45,7 @@ This folder is the canonical home for authored Loomweave architecture decision r | [ADR-042](./ADR-042-hmac-freshness-and-replay-window.md) | HMAC freshness and replay window — timestamp + nonce headers, crate-backed HMAC, process-local replay cache | Accepted | | [ADR-043](./ADR-043-edge-reanalysis-replacement.md) | Edge reanalysis replacement — per-source-file anchored-edge replacement and edge metadata upsert; amends ADR-026 | Accepted | | [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) | Read-API ephemeral port publication — `.loomweave/ephemeral.port` as a normative cross-product file contract (loopback-only, port-only, atomic) + consume-time resolution precedence (explicit > file > config > none), per-project deterministic port, installer stops pinning 9111; relates to ADR-034 | Accepted | +| [ADR-045](./ADR-045-worktree-source-staleness.md) | Worktree-source staleness — `Staleness::StaleWorktree` + `worktree_dirty` via hardened, hash-free `git ls-files --others` scoped to ingested extensions; closes the unwatched-top-level-dir blind spot without `git status`'s filter-RCE vector; builds on ADR-013/021 untrusted-corpus posture | Accepted | ## Backlog still tracked in the detailed design From 5da9ccdda64ff4b7ed176a80abfbaa7f3c852815 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:51:56 +1000 Subject: [PATCH 22/60] docs(getting-started): macOS Gatekeeper quarantine workaround (V11-CI-04) Release archives are unsigned (ADR-033), so macOS Gatekeeper blocks the downloaded loomweave binary on first launch. Add a Troubleshooting entry with the `xattr -d com.apple.quarantine` fix and the GUI "Open Anyway" alternative. Closes clarion-03dfa1f94d. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/operator/getting-started.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/operator/getting-started.md b/docs/operator/getting-started.md index d63b15ad..0908960c 100644 --- a/docs/operator/getting-started.md +++ b/docs/operator/getting-started.md @@ -364,6 +364,22 @@ signals. A `loomweave doctor` subcommand that surfaces discovery state at exit is on the v2.0 roadmap; for v1.0 the diagnostic is the WARN line plus the `which loomweave-plugin-*` check above. +### macOS: "loomweave cannot be opened because the developer cannot be verified" + +The release archives are not notarized (ADR-033 ships unsigned binaries), so +macOS Gatekeeper quarantines the downloaded `loomweave` binary and refuses the +first launch with a developer-verification error. Clear the quarantine +attribute on the extracted binary before installing it: + +```bash +xattr -d com.apple.quarantine ./loomweave-aarch64-apple-darwin/loomweave +``` + +Alternatively, approve it once from the GUI — attempt to run it, then +**System Settings → Privacy & Security → "Open Anyway"**. Either is a one-time +step per downloaded binary; a source build (the fallback under [§1](#1-install)) +is never quarantined. Notarized release artifacts are on the post-1.0 roadmap. + ### "secret_present" block fires on a real file Add the file to `.loomweave/secrets-baseline.yaml` with a written justification From b598ebf1a46a98e471772fbfebb8fc6a4e8d13f6 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:51:56 +1000 Subject: [PATCH 23/60] chore(storage): backfill ADR-024 published_build marker to v1.0.0 v1.0.0 was the first externally-published build; 0001_initial_schema.sql is byte-identical at v1.0.0 and HEAD, and all schema changes since are additive 0002+ migrations. Backfilling the marker activates scripts/check-migration-retirement.py's guard (previously pre-trigger despite shipped releases): in-place edits to 0001 now fail CI, enforcing additive-only. Closes clarion-b20448b3ac. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-storage/migrations/published_build.txt | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 crates/loomweave-storage/migrations/published_build.txt diff --git a/crates/loomweave-storage/migrations/published_build.txt b/crates/loomweave-storage/migrations/published_build.txt new file mode 100644 index 00000000..819fd1ed --- /dev/null +++ b/crates/loomweave-storage/migrations/published_build.txt @@ -0,0 +1,9 @@ +# ADR-024 in-place migration-retirement marker (clarion-b20448b3ac). +# +# Names the first externally-published build whose 0001_initial_schema.sql is +# frozen. 0001 has been byte-identical since this tag (verified: `git diff +# v1.0.0 HEAD -- .../0001_initial_schema.sql` is empty), and every schema change +# since has been an additive 0002+ migration. With this marker present, +# scripts/check-migration-retirement.py fails if 0001 ever diverges from this +# ref — later schema changes must be additive migrations, never in-place edits. +v1.0.0 From 3c8feae6cd727c19e319f8cac163cfee4da64f36 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:53:23 +1000 Subject: [PATCH 24/60] ci(release): add macOS aarch64 verify gate mirroring ci.yml (clarion-47d395e03c) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit release.yml's `verify` job was Linux-only, so a macOS-only clippy/--all-targets regression — caught on PRs by ci.yml's rust-macos job but not re-verified at release — could pass `verify` and proceed to the build/publish jobs (the aarch64 build leg only builds --bins, not tests/all-targets). Add a `verify-macos` job mirroring ci.yml's rust-macos (clippy + bin build on macos-14) and add it to the needs chain of build-rust, build-wheels, and build-plugin. No new runner dependency — build-rust already uses macos-14. Closes clarion-47d395e03c. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/release.yml | 40 ++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9aa27abe..f7794065 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -175,8 +175,42 @@ jobs: - name: Phase 3 subsystems run: CARGO_BUILD=0 bash tests/e2e/phase3_subsystems.sh + # macOS (aarch64) pre-release gate — mirrors ci.yml's rust-macos job so a + # macOS-only clippy/--all-targets regression cannot reach the build/publish + # jobs. ci.yml gates every PR, but release.yml's Linux-only `verify` left a + # gap for a macOS-only test/all-targets lint issue that does not break the + # --bin build (clarion-47d395e03c). build-rust already uses macos-14, so no + # new runner dependency. Restore the x86_64 (macos-13) leg here alongside the + # build matrix when those runners recover (clarion-ec389a8e72). + verify-macos: + name: Verify macOS (${{ matrix.target }}) + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: false + matrix: + include: + - target: aarch64-apple-darwin + runner: macos-14 + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 + + - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 + with: + toolchain: stable + components: clippy + + - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 + with: + key: release-verify-${{ matrix.target }} + + - name: clippy + run: cargo clippy --workspace --all-targets --all-features -- -D warnings + + - name: build workspace bins + run: cargo build --workspace --bins + build-rust: - needs: [verify] + needs: [verify, verify-macos] name: Build loomweave (${{ matrix.target }}) runs-on: ${{ matrix.runner }} strategy: @@ -242,7 +276,7 @@ jobs: retention-days: 7 build-wheels: - needs: [verify] + needs: [verify, verify-macos] name: Build loomweave wheel (${{ matrix.target }}) runs-on: ${{ matrix.runner }} # maturin bin-wheels for PyPI. Matrix mirrors `build-rust` (Linux x86_64 + @@ -293,7 +327,7 @@ jobs: retention-days: 7 build-plugin: - needs: [verify] + needs: [verify, verify-macos] name: Build Python plugin sdist runs-on: ubuntu-latest steps: From 8be269d6dc66ba001bfa3b52adaf2034224c6eb9 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:56:21 +1000 Subject: [PATCH 25/60] fix(storage): drop dead entity_fts.content_text column (migration 0009, V11-STO-06) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit content_text shipped in 0001 reserved for an on-demand source-text projection that was never built: the entities_ai trigger always wrote '', the entities_au trigger never touched it, and no query reads it (search MATCHes the table, not the column). It was permanently-empty schema drift; content search is served by the ADR-040 embeddings sidecar. FTS5 has no ALTER DROP COLUMN, so migration 0009 recreates entity_fts and its triggers without it and rebuilds the index from entities. Behaviour-preserving — only a never-populated, never-read column goes. Bumps CURRENT_SCHEMA_VERSION to 9; updates the schema_migrations expectation tests and the authoritative detailed-design.md FTS block; adds a regression test asserting content_text is gone and MATCH search still works. Closes clarion-716449c371. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../migrations/0009_drop_fts_content_text.sql | 58 +++++++++++++++++++ crates/loomweave-storage/src/schema.rs | 7 ++- .../loomweave-storage/tests/schema_apply.rs | 52 ++++++++++++++++- docs/loomweave/1.0/detailed-design.md | 14 ++--- 4 files changed, 121 insertions(+), 10 deletions(-) create mode 100644 crates/loomweave-storage/migrations/0009_drop_fts_content_text.sql diff --git a/crates/loomweave-storage/migrations/0009_drop_fts_content_text.sql b/crates/loomweave-storage/migrations/0009_drop_fts_content_text.sql new file mode 100644 index 00000000..e7af0c68 --- /dev/null +++ b/crates/loomweave-storage/migrations/0009_drop_fts_content_text.sql @@ -0,0 +1,58 @@ +-- Migration 0009: drop the dead entity_fts.content_text column (V11-STO-06, +-- clarion-716449c371). +-- +-- content_text shipped in 0001 reserved for an on-demand source-text projection +-- that was never implemented: the entities_ai trigger always wrote '', the +-- entities_au trigger never touched it, and no query reads it (search MATCHes +-- the table, not the column). Semantic/content search is instead served by the +-- ADR-040 embeddings sidecar, so the column is permanently-empty drift that +-- misrepresents the FTS surface. FTS5 has no ALTER ... DROP COLUMN, so recreate +-- the virtual table and its triggers without it. Behaviour-preserving: only a +-- never-populated, never-read column is removed. + +BEGIN; + +DROP TRIGGER IF EXISTS entities_ai; +DROP TRIGGER IF EXISTS entities_au; +DROP TRIGGER IF EXISTS entities_ad; +DROP TABLE IF EXISTS entity_fts; + +CREATE VIRTUAL TABLE entity_fts USING fts5( + entity_id UNINDEXED, + name, + short_name, + summary_text, + tokenize = 'porter unicode61' +); + +-- FTS5 triggers keep entity_fts synchronised with entities (content_text dropped). +CREATE TRIGGER entities_ai AFTER INSERT ON entities BEGIN + INSERT INTO entity_fts (entity_id, name, short_name, summary_text) + VALUES ( + new.id, + new.name, + new.short_name, + COALESCE(json_extract(new.summary, '$.briefing.purpose'), '') + ); +END; +CREATE TRIGGER entities_au AFTER UPDATE ON entities BEGIN + UPDATE entity_fts + SET name = new.name, + short_name = new.short_name, + summary_text = COALESCE(json_extract(new.summary, '$.briefing.purpose'), '') + WHERE entity_id = new.id; +END; +CREATE TRIGGER entities_ad AFTER DELETE ON entities BEGIN + DELETE FROM entity_fts WHERE entity_id = old.id; +END; + +-- Rebuild the index from existing entities (the recreated vtable starts empty). +INSERT INTO entity_fts (entity_id, name, short_name, summary_text) +SELECT id, name, short_name, + COALESCE(json_extract(summary, '$.briefing.purpose'), '') +FROM entities; + +INSERT INTO schema_migrations (version, name, applied_at) +VALUES (9, '0009_drop_fts_content_text', strftime('%Y-%m-%dT%H:%M:%fZ', 'now')); + +COMMIT; diff --git a/crates/loomweave-storage/src/schema.rs b/crates/loomweave-storage/src/schema.rs index d11d8e32..1c474e56 100644 --- a/crates/loomweave-storage/src/schema.rs +++ b/crates/loomweave-storage/src/schema.rs @@ -55,12 +55,17 @@ const MIGRATIONS: &[Migration] = &[ name: "0008_run_owner_heartbeat", sql: include_str!("../migrations/0008_run_owner_heartbeat.sql"), }, + Migration { + version: 9, + name: "0009_drop_fts_content_text", + sql: include_str!("../migrations/0009_drop_fts_content_text.sql"), + }, ]; /// Highest migration version known to this build. Mirrored into the /// `SQLite` `user_version` header (STO-02) so a future-built database is /// refused at open instead of silently corrupting state. -pub const CURRENT_SCHEMA_VERSION: u32 = 8; +pub const CURRENT_SCHEMA_VERSION: u32 = 9; const _CURRENT_SCHEMA_VERSION_MATCHES_LAST_MIGRATION: () = { // Compile-time check: `CURRENT_SCHEMA_VERSION` must equal the highest diff --git a/crates/loomweave-storage/tests/schema_apply.rs b/crates/loomweave-storage/tests/schema_apply.rs index b94679ec..5da82010 100644 --- a/crates/loomweave-storage/tests/schema_apply.rs +++ b/crates/loomweave-storage/tests/schema_apply.rs @@ -630,6 +630,53 @@ fn fts_trigger_populates_entity_fts_on_insert() { assert_eq!(matched_id, "python:function:auth.refresh"); } +#[test] +fn migration_0009_drops_dead_fts_content_text_column() { + // V11-STO-06 / clarion-716449c371: the never-populated, never-read + // content_text column is gone after 0009, and search via the recreated + // virtual table + triggers still works. + let tempdir = tempfile::tempdir().unwrap(); + let conn = open_fresh(&tempdir); + + let sql: String = conn + .query_row( + "SELECT sql FROM sqlite_master WHERE name='entity_fts'", + [], + |row| row.get(0), + ) + .unwrap(); + assert!( + !sql.contains("content_text"), + "entity_fts must not declare content_text after 0009; sql was: {sql}" + ); + + let summary_json = r#"{"briefing": {"purpose": "rotate signing keys"}}"#; + conn.execute( + "INSERT INTO entities (id, plugin_id, kind, name, short_name, properties, summary, \ + created_at, updated_at) \ + VALUES (?1, ?2, ?3, ?4, ?5, '{}', ?6, \ + strftime('%Y-%m-%dT%H:%M:%fZ', 'now'), strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))", + params![ + "python:function:auth.rotate", + "python", + "function", + "auth.rotate", + "rotate", + summary_json, + ], + ) + .unwrap(); + + let matched_id: String = conn + .query_row( + "SELECT entity_id FROM entity_fts WHERE entity_fts MATCH 'rotate'", + [], + |row| row.get(0), + ) + .expect("FTS search still works after content_text drop"); + assert_eq!(matched_id, "python:function:auth.rotate"); +} + #[test] fn edges_table_has_no_id_column() { // ADR-026 decision 4: drop synthetic `id` PK from edges. Natural key @@ -795,7 +842,7 @@ fn migrations_are_idempotent() { let tempdir = tempfile::tempdir().unwrap(); let mut conn = open_fresh(&tempdir); schema::apply_migrations(&mut conn).expect("second apply should be a no-op"); - assert_eq!(schema::applied_count(&conn).unwrap(), 8); + assert_eq!(schema::applied_count(&conn).unwrap(), 9); let tables_after = table_names(&conn); assert!(tables_after.contains(&"entities".to_owned())); } @@ -809,7 +856,7 @@ fn schema_migrations_records_each_applied_migration() { row.get(0) }) .unwrap(); - assert_eq!(count, 8); + assert_eq!(count, 9); let names: Vec = { let mut stmt = conn .prepare("SELECT name FROM schema_migrations ORDER BY version") @@ -828,6 +875,7 @@ fn schema_migrations_records_each_applied_migration() { "0006_wardline_taint_sei", "0007_run_analyzed_commit", "0008_run_owner_heartbeat", + "0009_drop_fts_content_text", ] ); } diff --git a/docs/loomweave/1.0/detailed-design.md b/docs/loomweave/1.0/detailed-design.md index af81752d..8e449235 100644 --- a/docs/loomweave/1.0/detailed-design.md +++ b/docs/loomweave/1.0/detailed-design.md @@ -739,25 +739,25 @@ CREATE TABLE runs ( analyzed_at_commit TEXT -- git HEAD analyzed against (WS9 / SEI §6, migration 0007); NULL off-git ); --- FTS5 for text search +-- FTS5 for text search. (0001 also declared a `content_text` column reserved +-- for an on-demand source projection; it was never populated and was dropped in +-- migration 0009 — content search is served by the ADR-040 embeddings sidecar.) CREATE VIRTUAL TABLE entity_fts USING fts5( entity_id UNINDEXED, - name, short_name, summary_text, content_text, + name, short_name, summary_text, tokenize = 'porter unicode61' ); -- FTS5 triggers keep entity_fts synchronised with entities. -- summary_text is derived from the briefing's purpose + patterns + risks --- (short textual projection); content_text is populated on demand by the --- plugin during Phase 1 via the `file_analyzed` message. +-- (short textual projection). CREATE TRIGGER entities_ai AFTER INSERT ON entities BEGIN - INSERT INTO entity_fts (entity_id, name, short_name, summary_text, content_text) + INSERT INTO entity_fts (entity_id, name, short_name, summary_text) VALUES ( new.id, new.name, new.short_name, - COALESCE(json_extract(new.summary, '$.briefing.purpose'), ''), - '' + COALESCE(json_extract(new.summary, '$.briefing.purpose'), '') ); END; CREATE TRIGGER entities_au AFTER UPDATE ON entities BEGIN From 5675f4a9e9b4ecaf5a43591634bdcbb2f80ac829 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 18:08:49 +1000 Subject: [PATCH 26/60] chore(release): bump to 1.1.0rc2 (Cargo 1.1.0-rc2 / PEP440 1.1.0rc2) Workspace + Python plugin in lockstep. rc2 rolls up the dogfood-friction fixes and deferred v1.1 engineering items landed on this branch: worktree-aware staleness (ADR-045), .gitignore instance_id/*.lock (ADR-005), WAL checkpoint(TRUNCATE), entity_fts.content_text drop (migration 0009), the macOS aarch64 release verify gate, the Gatekeeper doc, and the ADR-024 marker backfill. No package published for release candidates. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 47 +++++++++++++++++++ Cargo.lock | 16 +++---- Cargo.toml | 2 +- crates/loomweave-cli/Cargo.toml | 14 +++--- crates/loomweave-cli/pyproject.toml | 4 +- crates/loomweave-federation/Cargo.toml | 2 +- crates/loomweave-mcp/Cargo.toml | 6 +-- crates/loomweave-plugin-fixture/Cargo.toml | 2 +- crates/loomweave-storage/Cargo.toml | 2 +- plugins/python/plugin.toml | 2 +- plugins/python/pyproject.toml | 2 +- .../src/loomweave_plugin_python/__init__.py | 2 +- plugins/python/tests/test_package.py | 4 +- plugins/python/tests/test_server.py | 2 +- plugins/python/uv.lock | 2 +- 15 files changed, 78 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f69bd967..baf2017b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,53 @@ only when an incompatible change is made to that surface. See ## [Unreleased] +## [1.1.0rc2] — 2026-06-06 + +Second 1.1 release candidate, rolling up dogfood-friction fixes and deferred +v1.1 engineering items on top of rc1. No package is published for release +candidates. (Cargo SemVer `1.1.0-rc2`; Python wheels normalise to PEP 440 +`1.1.0rc2`.) + +### Added + +- **Worktree-aware staleness (ADR-045).** `project_status_get`, the + `loomweave://context` resource, and the session-start banner now surface + `indexed_at_commit` + `worktree_dirty`, and a new `Staleness::StaleWorktree` + verdict fires when an otherwise-fresh index has untracked source on disk. + Detection uses a hardened, hash-free `git ls-files --others` scoped to ingested + source extensions (false-positive guard), proven filter-safe by test — closes + the "fresh lies about uncommitted code" friction (clarion-26c7e52027, + clarion-d9cf8bcfa9). + +### Changed + +- **`.loomweave/.gitignore` (ADR-005)** now also excludes `instance_id` and + `*.lock`, so `git add -A` no longer stages the per-project serve fingerprint or + the analyze advisory lock; ADR-005 documents the live-index commit hazard and + points at `loomweave db backup` (clarion-7381e6382d). +- **WAL hygiene.** The storage writer-actor runs `PRAGMA wal_checkpoint(TRUNCATE)` + after each committed run, so the on-disk `loomweave.db` reflects committed state + while `serve` is alive instead of lagging behind a multi-MB WAL sidecar + (clarion-cdee445ed8). +- **Release CI parity.** `release.yml` gains a macOS aarch64 `verify-macos` gate + (mirroring `ci.yml`) wired into the build/publish `needs` chain, closing the + gap where a macOS-only lint/test regression could reach the build jobs + (clarion-47d395e03c). + +### Removed + +- **Dead `entity_fts.content_text` column** dropped via migration 0009 — it was + never populated and never read (content search is served by the ADR-040 + embeddings sidecar). `CURRENT_SCHEMA_VERSION` is now 9 (clarion-716449c371). + +### Docs + +- macOS Gatekeeper quarantine workaround added to `getting-started.md` + Troubleshooting (clarion-03dfa1f94d). +- ADR-024 in-place migration-retirement guard activated: `published_build.txt` + backfilled to `v1.0.0` (first published build; 0001 byte-identical since), so + later schema changes must be additive migrations (clarion-b20448b3ac). + ## [1.1.0rc1] — 2026-06-06 First 1.1 release candidate. No package is published for release candidates — diff --git a/Cargo.lock b/Cargo.lock index 39a16ebc..410073f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1057,7 +1057,7 @@ checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" [[package]] name = "loomweave-analysis" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "anyhow", "serde", @@ -1067,7 +1067,7 @@ dependencies = [ [[package]] name = "loomweave-cli" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "anyhow", "assert_cmd", @@ -1106,7 +1106,7 @@ dependencies = [ [[package]] name = "loomweave-core" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "async-trait", "nix", @@ -1123,7 +1123,7 @@ dependencies = [ [[package]] name = "loomweave-federation" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "blake3", "loomweave-core", @@ -1137,7 +1137,7 @@ dependencies = [ [[package]] name = "loomweave-mcp" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "async-trait", "blake3", @@ -1160,7 +1160,7 @@ dependencies = [ [[package]] name = "loomweave-plugin-fixture" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "loomweave-core", "nix", @@ -1169,7 +1169,7 @@ dependencies = [ [[package]] name = "loomweave-scanner" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "regex", "serde", @@ -1181,7 +1181,7 @@ dependencies = [ [[package]] name = "loomweave-storage" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "blake3", "deadpool-sqlite", diff --git a/Cargo.toml b/Cargo.toml index de77d4aa..a8ac37b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ members = [ ] [workspace.package] -version = "1.1.0-rc1" +version = "1.1.0-rc2" edition = "2024" license = "MIT" repository = "https://github.com/foundryside-dev/loomweave" diff --git a/crates/loomweave-cli/Cargo.toml b/crates/loomweave-cli/Cargo.toml index 9343d850..bbe56dd3 100644 --- a/crates/loomweave-cli/Cargo.toml +++ b/crates/loomweave-cli/Cargo.toml @@ -18,12 +18,12 @@ anyhow.workspace = true axum.workspace = true blake3.workspace = true clap.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } -loomweave-analysis = { path = "../loomweave-analysis", version = "1.1.0-rc1" } -loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc1" } -loomweave-mcp = { path = "../loomweave-mcp", version = "1.1.0-rc1" } -loomweave-scanner = { path = "../loomweave-scanner", version = "1.1.0-rc1" } -loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc1" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc2" } +loomweave-analysis = { path = "../loomweave-analysis", version = "1.1.0-rc2" } +loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc2" } +loomweave-mcp = { path = "../loomweave-mcp", version = "1.1.0-rc2" } +loomweave-scanner = { path = "../loomweave-scanner", version = "1.1.0-rc2" } +loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc2" } dotenvy.workspace = true fs2.workspace = true hmac.workspace = true @@ -46,7 +46,7 @@ uuid.workspace = true [dev-dependencies] assert_cmd.workspace = true -loomweave-plugin-fixture = { path = "../loomweave-plugin-fixture", version = "1.1.0-rc1" } +loomweave-plugin-fixture = { path = "../loomweave-plugin-fixture", version = "1.1.0-rc2" } rusqlite.workspace = true serde_json.workspace = true sha1.workspace = true diff --git a/crates/loomweave-cli/pyproject.toml b/crates/loomweave-cli/pyproject.toml index 72e23181..11c21c6c 100644 --- a/crates/loomweave-cli/pyproject.toml +++ b/crates/loomweave-cli/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "loomweave" -version = "1.1.0rc1" +version = "1.1.0rc2" description = "Loomweave — graph-aware code archaeology (Rust core)" readme = "../../README.md" requires-python = ">=3.11" @@ -15,7 +15,7 @@ classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: 3", ] -dependencies = ["loomweave-plugin-python==1.1.0rc1"] +dependencies = ["loomweave-plugin-python==1.1.0rc2"] [project.urls] Repository = "https://github.com/foundryside-dev/loomweave" diff --git a/crates/loomweave-federation/Cargo.toml b/crates/loomweave-federation/Cargo.toml index 335406e1..db2f9f1a 100644 --- a/crates/loomweave-federation/Cargo.toml +++ b/crates/loomweave-federation/Cargo.toml @@ -11,7 +11,7 @@ workspace = true [dependencies] blake3.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc2" } reqwest.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/loomweave-mcp/Cargo.toml b/crates/loomweave-mcp/Cargo.toml index 684fd2ea..722e6208 100644 --- a/crates/loomweave-mcp/Cargo.toml +++ b/crates/loomweave-mcp/Cargo.toml @@ -12,9 +12,9 @@ workspace = true [dependencies] async-trait.workspace = true blake3.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } -loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc1" } -loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc1" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc2" } +loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc2" } +loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc2" } reqwest.workspace = true rusqlite.workspace = true serde.workspace = true diff --git a/crates/loomweave-plugin-fixture/Cargo.toml b/crates/loomweave-plugin-fixture/Cargo.toml index 35f824d4..f1d1ac26 100644 --- a/crates/loomweave-plugin-fixture/Cargo.toml +++ b/crates/loomweave-plugin-fixture/Cargo.toml @@ -23,7 +23,7 @@ name = "loomweave-fixture-plugin" path = "src/main.rs" [dependencies] -loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc2" } serde_json.workspace = true [target.'cfg(unix)'.dependencies] diff --git a/crates/loomweave-storage/Cargo.toml b/crates/loomweave-storage/Cargo.toml index 726a0007..dac7a33a 100644 --- a/crates/loomweave-storage/Cargo.toml +++ b/crates/loomweave-storage/Cargo.toml @@ -11,7 +11,7 @@ workspace = true [dependencies] blake3.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc2" } deadpool-sqlite.workspace = true rusqlite.workspace = true serde.workspace = true diff --git a/plugins/python/plugin.toml b/plugins/python/plugin.toml index 6e159265..93c4f75a 100644 --- a/plugins/python/plugin.toml +++ b/plugins/python/plugin.toml @@ -1,7 +1,7 @@ [plugin] name = "loomweave-plugin-python" plugin_id = "python" -version = "1.1.0rc1" +version = "1.1.0rc2" protocol_version = "1.0" # Bare basename per ADR-021 §Layer 1 + WP2 scrub commit eb0a41d — the host # refuses manifests whose `executable` carries any path component. diff --git a/plugins/python/pyproject.toml b/plugins/python/pyproject.toml index e26fb75d..46a98200 100644 --- a/plugins/python/pyproject.toml +++ b/plugins/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "loomweave-plugin-python" -version = "1.1.0rc1" +version = "1.1.0rc2" description = "Loomweave Python language plugin — v1.0 release" readme = "README.md" requires-python = ">=3.11" diff --git a/plugins/python/src/loomweave_plugin_python/__init__.py b/plugins/python/src/loomweave_plugin_python/__init__.py index 4b4fde08..5562b23f 100644 --- a/plugins/python/src/loomweave_plugin_python/__init__.py +++ b/plugins/python/src/loomweave_plugin_python/__init__.py @@ -1,3 +1,3 @@ """loomweave-plugin-python — Python language plugin for Loomweave.""" -__version__ = "1.1.0rc1" +__version__ = "1.1.0rc2" diff --git a/plugins/python/tests/test_package.py b/plugins/python/tests/test_package.py index 9cfae6ef..da595730 100644 --- a/plugins/python/tests/test_package.py +++ b/plugins/python/tests/test_package.py @@ -17,7 +17,7 @@ def _read_toml(path: Path) -> dict[str, Any]: def test_package_version_matches_pyproject() -> None: - assert loomweave_plugin_python.__version__ == "1.1.0rc1" + assert loomweave_plugin_python.__version__ == "1.1.0rc2" def test_plugin_version_lockstep_across_pyproject_manifest_and_module() -> None: @@ -42,7 +42,7 @@ def test_plugin_version_lockstep_across_pyproject_manifest_and_module() -> None: def test_manifest_declares_current_v1_ontology_only() -> None: manifest = _read_toml(_PLUGIN_ROOT / "plugin.toml") - assert manifest["plugin"]["version"] == "1.1.0rc1" + assert manifest["plugin"]["version"] == "1.1.0rc2" assert manifest["capabilities"]["runtime"]["wardline_aware"] is True assert manifest["integrations"]["wardline"]["expected_descriptor_version"] == ( EXPECTED_DESCRIPTOR_VERSION diff --git a/plugins/python/tests/test_server.py b/plugins/python/tests/test_server.py index 7abb5a1d..7be14705 100644 --- a/plugins/python/tests/test_server.py +++ b/plugins/python/tests/test_server.py @@ -86,7 +86,7 @@ def test_initialize_roundtrip() -> None: assert response["id"] == 1 result = response["result"] assert result["name"] == "loomweave-plugin-python" - assert result["version"] == "1.1.0rc1" + assert result["version"] == "1.1.0rc2" assert result["ontology_version"] == "0.7.0" assert set(result["capabilities"]) == {"wardline"} assert result["capabilities"]["wardline"]["status"] in { diff --git a/plugins/python/uv.lock b/plugins/python/uv.lock index 94aa0d89..aef6d0bd 100644 --- a/plugins/python/uv.lock +++ b/plugins/python/uv.lock @@ -464,7 +464,7 @@ wheels = [ [[package]] name = "loomweave-plugin-python" -version = "1.1.0rc1" +version = "1.1.0rc2" source = { editable = "." } dependencies = [ { name = "packaging" }, From 0a93731f726c7ac2f2f7ffd1d36cedd9d123a609 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 19:11:36 +1000 Subject: [PATCH 27/60] feat(cli): inject agent-orientation block into CLAUDE.md/AGENTS.md (install + doctor) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `loomweave install` now pushes a managed Loomweave block into the always-loaded CLAUDE.md / AGENTS.md context, mirroring Filigree's instruction injection, so an agent learns to ask Loomweave's MCP tools before re-grepping the tree. `loomweave doctor` verifies it and, with --fix, repairs it via the same idempotent installer. New `instructions.rs` manages a ``… `` span and edits ONLY its own bytes — it never truncates to EOF, so a co-resident Filigree/Wardline block in the same file survives every create/append/replace/malformed operation. Drift is a body content hash (not the marker version, so a version bump on identical content is not drift), and the malformed-repair strips all orphan start markers so it stays safe and converges in a single pass. - install: --instructions flag + InstallPlan plumbing; bare `install` does it. - doctor: Missing=warning (optional surface), Drifted/Malformed=problem; wired into both the text and JSON report paths with a next-action remediation. - thin embedded asset: a pointer to the MCP tools + the loomweave-workflow skill. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../assets/instructions/loomweave.md | 19 + crates/loomweave-cli/src/cli.rs | 8 +- crates/loomweave-cli/src/doctor.rs | 113 ++- crates/loomweave-cli/src/install.rs | 74 +- crates/loomweave-cli/src/instructions.rs | 765 ++++++++++++++++++ crates/loomweave-cli/src/main.rs | 5 + crates/loomweave-cli/tests/doctor.rs | 169 ++++ .../instructions-injection-plan.md | 224 +++++ 8 files changed, 1367 insertions(+), 10 deletions(-) create mode 100644 crates/loomweave-cli/assets/instructions/loomweave.md create mode 100644 crates/loomweave-cli/src/instructions.rs create mode 100644 docs/implementation/instructions-injection-plan.md diff --git a/crates/loomweave-cli/assets/instructions/loomweave.md b/crates/loomweave-cli/assets/instructions/loomweave.md new file mode 100644 index 00000000..d90bb596 --- /dev/null +++ b/crates/loomweave-cli/assets/instructions/loomweave.md @@ -0,0 +1,19 @@ +## Loomweave (code archaeology) + +This repo is indexed by Loomweave: it has pre-extracted the tree into a +queryable map of entities (functions, classes, modules, files), the call / +reference / import edges between them, and subsystem clusters. Before grepping +or re-reading the tree to answer "what calls X", "where is X defined", "what +subsystem owns X", or "find the thing that does Y" — ask Loomweave's MCP tools +(`mcp__loomweave__*`): `entity_find`, `entity_at`, `entity_callers_list`, +`entity_neighborhood_get`, `project_status_get`. + +Entity IDs are `{plugin}:{kind}:{qualified_name}` (e.g. +`python:function:pkg.mod.func`); subsystems are `core:subsystem:{hash}`. You +rarely type IDs — get one from `entity_find` or `entity_at`, then copy it +verbatim into the next tool. + +Index freshness and counts: `project_status_get` (or the `loomweave://context` +resource). If the index is stale, run `loomweave analyze `. + +Full workflow: the `loomweave-workflow` skill. diff --git a/crates/loomweave-cli/src/cli.rs b/crates/loomweave-cli/src/cli.rs index d5f93a0d..8723eb4a 100644 --- a/crates/loomweave-cli/src/cli.rs +++ b/crates/loomweave-cli/src/cli.rs @@ -52,7 +52,13 @@ pub enum Command { #[arg(long)] hooks: bool, - /// Do everything: .loomweave/ init + MCP config + skills + hooks. + /// Inject the Loomweave agent-orientation block into CLAUDE.md and + /// AGENTS.md (touching only Loomweave's own marker span). + #[arg(long)] + instructions: bool, + + /// Do everything: .loomweave/ init + MCP config + skills + hooks + + /// instructions. #[arg(long)] all: bool, }, diff --git a/crates/loomweave-cli/src/doctor.rs b/crates/loomweave-cli/src/doctor.rs index 8fbb9dc7..195d2447 100644 --- a/crates/loomweave-cli/src/doctor.rs +++ b/crates/loomweave-cli/src/doctor.rs @@ -1,10 +1,11 @@ //! `loomweave doctor [--fix]` — verify (and optionally repair) the installed //! agent-orientation surfaces. //! -//! Three surfaces are checked, each owned by an existing installer module: +//! Several surfaces are checked, each owned by an existing installer module: //! the `loomweave-workflow` skill pack ([`crate::skill_pack`]), the `SessionStart` -//! hook ([`crate::hooks_settings`]), and the Claude Code `.mcp.json` MCP -//! registration ([`crate::mcp_registration`]), plus the local +//! hook ([`crate::hooks_settings`]), the Claude Code `.mcp.json` MCP +//! registration ([`crate::mcp_registration`]), the `CLAUDE.md` / `AGENTS.md` +//! agent-orientation block ([`crate::instructions`]), and the local //! Loomweave/Filigree/Wardline binding files ([`crate::integration_bindings`]). //! The repair for each is that module's idempotent installer, so //! `doctor --fix` and `loomweave install` converge to the same state. @@ -30,10 +31,13 @@ use serde::Serialize; use serde_json::Value; use crate::hooks_settings::HookState; +use crate::instructions::InstructionsState; use crate::integration_bindings::BindingState; use crate::mcp_registration::McpState; use crate::skill_pack::SkillPackState; -use crate::{hook, hooks_settings, integration_bindings, mcp_registration, skill_pack}; +use crate::{ + hook, hooks_settings, instructions, integration_bindings, mcp_registration, skill_pack, +}; /// Run `loomweave doctor`. Returns `Ok(true)` iff every orientation surface is /// healthy after any requested repairs. @@ -66,6 +70,7 @@ pub fn run(path: &Path, fix: bool, json_output: bool) -> Result { tally += check_skill(&project_root, fix); tally += check_hook(&project_root, fix); tally += check_mcp(&project_root, fix); + tally += check_instructions(&project_root, fix); tally += check_integration_bindings(&project_root, fix); println!("--- index ---"); @@ -155,6 +160,7 @@ fn json_report(project_root: &Path, fix: bool) -> DoctorJsonReport { check_skill_json(project_root, fix), check_hook_json(project_root, fix), check_mcp_json(project_root, fix), + check_instructions_json(project_root, fix), check_http_config_json(project_root), check_filigree_url_json(project_root), check_sei_population_json(project_root), @@ -172,6 +178,9 @@ fn json_report(project_root: &Path, fix: bool) -> DoctorJsonReport { "hook.session_start" => { "Run `loomweave doctor --fix` or `loomweave install --hooks`.".to_owned() } + "instructions.block" => { + "Run `loomweave doctor --fix` or `loomweave install --instructions`.".to_owned() + } "mcp.registration" | "integration.bindings" => { "Run `loomweave doctor --fix`.".to_owned() } @@ -519,6 +528,54 @@ fn check_mcp_hygiene_json() -> DoctorJsonCheck { ) } +fn check_instructions_json(project_root: &Path, fix: bool) -> DoctorJsonCheck { + match instructions::instructions_state(project_root) { + InstructionsState::UpToDate => DoctorJsonCheck::ok( + "instructions.block", + "agent-orientation block present in CLAUDE.md + AGENTS.md", + ), + InstructionsState::Missing => { + let what = "agent-orientation block missing from CLAUDE.md / AGENTS.md"; + if !fix { + // Optional surface: absence is a warning, not a gate failure. + return DoctorJsonCheck::warning("instructions.block", what); + } + repair_instructions_json(project_root, what) + } + state => { + let what = match state { + InstructionsState::Drifted => { + "agent-orientation block drifted from the bundled copy" + } + InstructionsState::Malformed => { + "agent-orientation block malformed (dangling loomweave marker)" + } + InstructionsState::UpToDate | InstructionsState::Missing => unreachable!(), + }; + if !fix { + return DoctorJsonCheck::problem("instructions.block", what); + } + repair_instructions_json(project_root, what) + } + } +} + +fn repair_instructions_json(project_root: &Path, what: &str) -> DoctorJsonCheck { + match instructions::install_instructions(project_root) { + Ok(_) if instructions::instructions_state(project_root) == InstructionsState::UpToDate => { + DoctorJsonCheck::fixed("instructions.block", format!("{what}; fixed")) + } + Ok(_) => DoctorJsonCheck::problem( + "instructions.block", + format!("{what}; repair did not converge"), + ), + Err(err) => DoctorJsonCheck::problem( + "instructions.block", + format!("{what}; repair failed: {err}"), + ), + } +} + fn check_integration_bindings_json(project_root: &Path, fix: bool) -> DoctorJsonCheck { match integration_bindings::binding_state(project_root) { BindingState::Present => DoctorJsonCheck::ok( @@ -727,6 +784,54 @@ fn check_mcp(project_root: &Path, fix: bool) -> Tally { } } +fn check_instructions(project_root: &Path, fix: bool) -> Tally { + match instructions::instructions_state(project_root) { + InstructionsState::UpToDate => { + ok("agent-orientation block present in CLAUDE.md + AGENTS.md") + } + // Optional surface: the same guidance ships via the MCP preamble and the + // loomweave-workflow skill, so a missing block is advisory — never a gate + // failure. Mirrors the integration-bindings severity model. + InstructionsState::Missing => { + let what = "agent-orientation block missing from CLAUDE.md / AGENTS.md"; + if !fix { + return warn(what, Some("loomweave install --instructions")); + } + repair_instructions(project_root, what) + } + // Drifted / Malformed fail the gate: a stale or dangling block is a + // genuinely broken state. The repair is safe because it rewrites only + // Loomweave's own marker span. + state => { + let what = match state { + InstructionsState::Drifted => { + "agent-orientation block drifted from the bundled copy" + } + InstructionsState::Malformed => { + "agent-orientation block malformed (dangling loomweave marker)" + } + InstructionsState::UpToDate | InstructionsState::Missing => unreachable!(), + }; + if !fix { + return problem(what, Some("loomweave doctor --fix")); + } + repair_instructions(project_root, what) + } + } +} + +/// Shared `--fix` repair for the instructions block: re-inject, then re-classify +/// to confirm convergence. +fn repair_instructions(project_root: &Path, what: &str) -> Tally { + match instructions::install_instructions(project_root) { + Ok(_) if instructions::instructions_state(project_root) == InstructionsState::UpToDate => { + ok(&format!("{what} — fixed")) + } + Ok(_) => problem(&format!("{what} — repair did not converge"), None), + Err(err) => problem(&format!("{what} — repair failed: {err}"), None), + } +} + fn check_integration_bindings(project_root: &Path, fix: bool) -> Tally { match integration_bindings::binding_state(project_root) { BindingState::Present => { diff --git a/crates/loomweave-cli/src/install.rs b/crates/loomweave-cli/src/install.rs index f8f5abcc..55c5c775 100644 --- a/crates/loomweave-cli/src/install.rs +++ b/crates/loomweave-cli/src/install.rs @@ -131,6 +131,7 @@ pub enum InstallComponent { Skills, CodexSkills, Hooks, + Instructions, } /// What `loomweave install` should do, resolved from the CLI flags. @@ -155,6 +156,7 @@ pub enum InstallPlan { skills: bool, codex_skills: bool, hooks: bool, + instructions: bool, }, /// No flags or `--all`: initialise `.loomweave/` + every integration. All, @@ -176,6 +178,7 @@ impl InstallPlan { skills: components.contains(&InstallComponent::Skills), codex_skills: components.contains(&InstallComponent::CodexSkills), hooks: components.contains(&InstallComponent::Hooks), + instructions: components.contains(&InstallComponent::Instructions), } } } @@ -229,6 +232,20 @@ impl InstallPlan { pub fn hooks(self) -> bool { matches!(self, Self::All | Self::Components { hooks: true, .. }) } + + /// Whether to inject the agent-orientation block into `CLAUDE.md` / + /// `AGENTS.md`. + #[must_use] + pub fn instructions(self) -> bool { + matches!( + self, + Self::All + | Self::Components { + instructions: true, + .. + } + ) + } } /// Run the `install` subcommand. @@ -283,6 +300,10 @@ pub fn run( install_hooks(&project_root)?; } + if plan.instructions() { + install_instruction_blocks(&project_root)?; + } + if matches!(plan, InstallPlan::All) { install_integration_bindings(&project_root)?; } @@ -300,10 +321,11 @@ fn validate_plan(plan: InstallPlan) -> Result<()> { && !plan.skills() && !plan.codex_skills() && !plan.hooks() + && !plan.instructions() { bail!( "nothing to install: pass --claude-code, --codex, --skills, \ - --codex-skills, --hooks, --all, \ + --codex-skills, --hooks, --instructions, --all, \ or run bare `loomweave install` to do everything." ); } @@ -445,6 +467,20 @@ fn install_hooks(project_root: &Path) -> Result<()> { Ok(()) } +fn install_instruction_blocks(project_root: &Path) -> Result<()> { + let report = crate::instructions::install_instructions(project_root) + .context("inject loomweave instructions into CLAUDE.md / AGENTS.md")?; + if report.changed { + println!( + "Injected loomweave instructions block into {}/{{CLAUDE,AGENTS}}.md", + project_root.display() + ); + } else { + println!("loomweave instructions block already up to date"); + } + Ok(()) +} + fn install_integration_bindings(project_root: &Path) -> Result<()> { let changed = crate::integration_bindings::install_bindings(project_root) .context("install local Loomweave/Filigree/Wardline integration bindings")?; @@ -504,6 +540,7 @@ mod tests { assert!(naked.skills()); assert!(naked.codex_skills()); assert!(naked.hooks()); + assert!(naked.instructions()); // --skills: skills only, no init. let skills = InstallPlan::from_components(false, &[InstallComponent::Skills]); @@ -514,7 +551,8 @@ mod tests { codex: false, skills: true, codex_skills: false, - hooks: false + hooks: false, + instructions: false } ); assert!(!skills.init_loomweave()); @@ -523,6 +561,24 @@ mod tests { assert!(skills.skills()); assert!(!skills.codex_skills()); assert!(!skills.hooks()); + assert!(!skills.instructions()); + + // --instructions: instruction blocks only, no init. + let instr = InstallPlan::from_components(false, &[InstallComponent::Instructions]); + assert_eq!( + instr, + InstallPlan::Components { + claude_code: false, + codex: false, + skills: false, + codex_skills: false, + hooks: false, + instructions: true + } + ); + assert!(!instr.init_loomweave()); + assert!(instr.instructions()); + assert!(!instr.skills()); // --hooks: hooks only, no init. let hooks = InstallPlan::from_components(false, &[InstallComponent::Hooks]); @@ -533,7 +589,8 @@ mod tests { codex: false, skills: false, codex_skills: false, - hooks: true + hooks: true, + instructions: false } ); assert!(!hooks.init_loomweave()); @@ -552,6 +609,7 @@ mod tests { assert!(all.skills()); assert!(all.codex_skills()); assert!(all.hooks()); + assert!(all.instructions()); // Multiple component flags: selected components only, still no init. let both = InstallPlan::from_components( @@ -562,6 +620,7 @@ mod tests { InstallComponent::Skills, InstallComponent::CodexSkills, InstallComponent::Hooks, + InstallComponent::Instructions, ], ); assert_eq!( @@ -571,7 +630,8 @@ mod tests { codex: true, skills: true, codex_skills: true, - hooks: true + hooks: true, + instructions: true } ); assert!(!both.init_loomweave()); @@ -580,6 +640,7 @@ mod tests { assert!(both.skills()); assert!(both.codex_skills()); assert!(both.hooks()); + assert!(both.instructions()); } #[test] @@ -595,12 +656,14 @@ mod tests { &[InstallComponent::Skills], &[InstallComponent::CodexSkills], &[InstallComponent::Hooks], + &[InstallComponent::Instructions], &[ InstallComponent::ClaudeCode, InstallComponent::Codex, InstallComponent::Skills, InstallComponent::CodexSkills, InstallComponent::Hooks, + InstallComponent::Instructions, ], ]; for all in [false, true] { @@ -612,7 +675,8 @@ mod tests { || plan.codex() || plan.skills() || plan.codex_skills() - || plan.hooks(), + || plan.hooks() + || plan.instructions(), "from_components({all}, {components:?}) produced a do-nothing plan: {plan:?}" ); } diff --git a/crates/loomweave-cli/src/instructions.rs b/crates/loomweave-cli/src/instructions.rs new file mode 100644 index 00000000..e08b5b1d --- /dev/null +++ b/crates/loomweave-cli/src/instructions.rs @@ -0,0 +1,765 @@ +//! Loomweave-owned agent-orientation block injected into `CLAUDE.md` / +//! `AGENTS.md`, plus its idempotent installer and read-only health check. +//! +//! Like Filigree, Loomweave *pushes* a small managed marker-block into the +//! always-loaded `CLAUDE.md` / `AGENTS.md` context so an agent learns to ask +//! Loomweave's MCP tools before re-grepping the tree. Unlike the skill pack +//! (whose asset is owned by `loomweave-mcp`), this asset is cli-local — there +//! is no MCP owner for it — and is embedded with `include_str!`, matching the +//! embedding convention in [`crate::skill_pack`]. +//! +//! ## Coexistence is the whole point +//! +//! Every file Loomweave writes here **already** contains another tool's block: +//! this repo's own `AGENTS.md` holds Filigree's `` +//! span (and Wardline's). Loomweave therefore *never* owns the tail of the file, +//! so the installer must touch **only** its own +//! ``…`` span and +//! must not delete or move a single byte outside it. In particular it does NOT +//! copy Filigree's truncate-from-start-marker-to-EOF malformed recovery, which +//! is a data-loss bug in a two-block file. See [`install_instructions`]. +//! +//! ## Drift signal +//! +//! Drift is the block-body content compared byte-for-byte against the embedded +//! [`INSTRUCTIONS_BODY`], **not** the marker version string — so a workspace +//! version bump on byte-identical content does not report drift. This mirrors +//! [`crate::skill_pack`]'s fingerprint philosophy; the `v{version}` in the start +//! marker is human-readable provenance only. + +use std::fs; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result, bail}; + +/// Embedded, cli-local instructions body. Deliberately thin: it is +/// always-loaded context competing with the `loomweave-workflow` skill that +/// says the same thing, so it is a pointer, not a manual. +const INSTRUCTIONS_BODY: &str = include_str!("../assets/instructions/loomweave.md"); + +/// Detection prefix for Loomweave's start marker. The full marker carries a +/// `:v{version}:{hash}` provenance suffix (see [`start_marker`]); detection +/// keys only on this prefix so a provenance change is still recognised as the +/// same block. Never collides with `"; + +/// The two project-root files Loomweave manages a block in. +const TARGET_FILES: &[&str] = &["CLAUDE.md", "AGENTS.md"]; + +/// The canonical body bytes that live inside the span. `include_str!` keeps the +/// asset's trailing newline; we trim trailing whitespace so the drift compare +/// is invariant to how the asset file happens to end. This is the single source +/// of truth for both render ([`render_block`]) and extract ([`locate_span`]). +fn canonical_body() -> &'static str { + INSTRUCTIONS_BODY.trim_end() +} + +/// First 8 hex chars of the blake3 digest over [`canonical_body`] — provenance +/// only, stamped into the start marker; not the drift signal. +fn body_hash_prefix() -> String { + let digest = blake3::hash(canonical_body().as_bytes()); + digest.to_hex()[..8].to_owned() +} + +/// The full provenance start-marker line (no trailing newline). +fn start_marker() -> String { + format!( + "", + env!("CARGO_PKG_VERSION"), + body_hash_prefix() + ) +} + +/// Render the complete block (start marker + body + end marker), newline-pinned. +/// +/// Exactly one newline sits at each boundary: after the start marker, between +/// the body and the end marker. [`locate_span`] is the precise inverse, so a +/// freshly rendered block round-trips to [`canonical_body`] with no drift. +fn render_block() -> String { + format!("{}\n{}\n{}", start_marker(), canonical_body(), END_MARKER) +} + +/// Read-only health of the Loomweave block across both [`TARGET_FILES`], for +/// `loomweave doctor`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum InstructionsState { + /// Every target file holds a well-formed block whose body matches the + /// embedded bytes. + UpToDate, + /// At least one target file is missing the block entirely (and no file is + /// in a worse state). A first-class but *optional* surface: the same + /// guidance is delivered by the MCP preamble and the skill, so a project + /// that omits the block is still healthy. Doctor treats this as a + /// **warning**. + Missing, + /// Every file that should hold a block has a well-formed one, but at least + /// one block's body differs from the embedded bytes (a stale copy from an + /// older binary, or hand-edited). Doctor treats this as a **problem** + /// (auto-repaired with `--fix`). + Drifted, + /// At least one target file has a malformed block — a dangling start marker + /// with no following end marker, or an end marker preceding its start. + /// Doctor treats this as a **problem**; the repair is safe because it only + /// rewrites Loomweave's own span. + Malformed, +} + +/// Classify one file's Loomweave block without writing. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum FileState { + /// No start marker present. + Absent, + /// Well-formed block whose body matches the embedded bytes. + Current, + /// Well-formed block whose body differs from the embedded bytes. + Drifted, + /// Start marker present without a following end marker (or markers are + /// mis-ordered). + Malformed, +} + +/// Aggregate per-file states into a single [`InstructionsState`]. +/// +/// Precedence is **severity-ordered**, high → low: `Malformed` > `Drifted` > +/// `Missing` > `UpToDate`. This deliberately differs from +/// [`crate::skill_pack`]'s "Missing first" rule: here `Missing` is only a +/// warning while `Drifted`/`Malformed` fail the gate, so a missing block must +/// never mask a gate-failing drifted/malformed one. +fn aggregate(states: &[FileState]) -> InstructionsState { + if states.iter().any(|s| matches!(s, FileState::Malformed)) { + InstructionsState::Malformed + } else if states.iter().any(|s| matches!(s, FileState::Drifted)) { + InstructionsState::Drifted + } else if states.iter().any(|s| matches!(s, FileState::Absent)) { + InstructionsState::Missing + } else { + InstructionsState::UpToDate + } +} + +/// Classify the Loomweave block across both [`TARGET_FILES`] without writing. +#[must_use] +pub fn instructions_state(project_root: &Path) -> InstructionsState { + let states: Vec = TARGET_FILES + .iter() + .map(|name| file_state(&project_root.join(name))) + .collect(); + aggregate(&states) +} + +/// Classify a single target file. A file that does not exist is [`Absent`] +/// (the installer will create it); an unreadable file is treated as `Absent` +/// too, so the repair path attempts a fresh write rather than wedging. +/// +/// [`Absent`]: FileState::Absent +fn file_state(path: &Path) -> FileState { + let Ok(content) = fs::read_to_string(path) else { + return FileState::Absent; + }; + match locate_span(&content) { + Span::Absent => FileState::Absent, + Span::Malformed => FileState::Malformed, + Span::WellFormed { body, .. } => { + if body == canonical_body() { + FileState::Current + } else { + FileState::Drifted + } + } + } +} + +/// Where (and whether) a well-ordered Loomweave block sits in `content`. +enum Span { + /// No start marker line present. + Absent, + /// Start marker present without a following end marker, or mis-ordered. + Malformed, + /// A well-ordered block. `start` is the byte offset of the start-marker + /// line; `end` is the byte offset just past the end-marker line (including + /// its trailing newline if any). `body` is the extracted block body, + /// trailing-newline-trimmed, for the drift compare. + WellFormed { + start: usize, + end: usize, + body: String, + }, +} + +/// Locate Loomweave's block by scanning **whole lines** — never a bare `-->` +/// substring scan, which could match Filigree's or Wardline's end marker. The +/// start marker is the first line whose trimmed form starts with +/// [`START_PREFIX`]; the end marker is the first line *strictly after* it whose +/// trimmed form equals [`END_MARKER`]. +fn locate_span(content: &str) -> Span { + let mut start: Option<(usize, usize)> = None; // (line_start_byte, line_end_byte) + let mut offset = 0usize; + for line in content.split_inclusive('\n') { + let trimmed = line.trim(); + let line_start = offset; + let line_end = offset + line.len(); + offset = line_end; + + match start { + // Still scanning for the start marker. + None => { + if trimmed.starts_with(START_PREFIX) { + start = Some((line_start, line_end)); + } + } + // Start marker already seen; the first matching end-marker line closes + // the span. + Some((span_start, body_start)) if trimmed == END_MARKER => { + // Body is everything between the start-marker line and the + // end-marker line; trim a single trailing newline so it round-trips + // against `canonical_body` (which has no trailing newline). + let raw_body = &content[body_start..line_start]; + let body = raw_body.strip_suffix('\n').unwrap_or(raw_body).to_owned(); + return Span::WellFormed { + start: span_start, + end: line_end, + body, + }; + } + Some(_) => {} + } + } + match start { + // Start marker found but never a following end marker → dangling. + Some(_) => Span::Malformed, + None => Span::Absent, + } +} + +/// Outcome of an [`install_instructions`] call. +#[derive(Debug, Clone, Copy)] +pub struct InstructionsInstallReport { + /// True if any target file's bytes were (re)written this call; false if + /// every file already held the current well-formed block. + pub changed: bool, +} + +/// Inject (or repair) the Loomweave block into both [`TARGET_FILES`] under +/// `project_root`, idempotently. Doubles as the `doctor --fix` repair. +/// +/// Per-file behaviour, touching **only** Loomweave's own span: +/// +/// - **Replace** when a well-ordered `START_PREFIX`…`END_MARKER` span exists: +/// rewrite exactly that span, leaving every byte outside it (e.g. a +/// coexisting Filigree block) untouched. A no-op when the body already +/// matches. +/// - **Append** when no start marker is present: append the block (separated by +/// a blank line) to the file's existing content, which is left intact. +/// - **Dangling start marker** (start present, no following end): do **not** +/// truncate to EOF (that would eat a coexisting Filigree block). Strip only +/// the orphaned start-marker line and append a fresh well-formed block; all +/// other bytes — including the orphaned prose body, left as loose text — +/// survive. +/// +/// Writes are atomic (temp + rename in the same directory, preserving the +/// existing file mode) and reject a symlinked target. +/// +/// # Errors +/// +/// Returns an error if a target is a symlink, or if any read, temp write, or +/// rename fails. +pub fn install_instructions(project_root: &Path) -> Result { + let mut changed = false; + for name in TARGET_FILES { + let path = project_root.join(name); + changed |= install_into_file(&path) + .with_context(|| format!("inject loomweave instructions into {}", path.display()))?; + } + Ok(InstructionsInstallReport { changed }) +} + +fn install_into_file(path: &Path) -> Result { + reject_symlink(path)?; + + let existing = match fs::read_to_string(path) { + Ok(content) => Some(content), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => None, + Err(err) => { + return Err(err).with_context(|| format!("read {}", path.display())); + } + }; + + let block = render_block(); + let new_content = match existing.as_deref() { + None => format!("{block}\n"), + Some(content) => match locate_span(content) { + Span::WellFormed { start, end, body } => { + if body == canonical_body() { + // Already current — no-op, even if the provenance marker + // version differs (drift is body-only). + return Ok(false); + } + splice_span(content, start, end, &block) + } + Span::Absent => append_block(content, &block), + Span::Malformed => { + // Dangling start marker: strip only the orphan start-marker + // line, then append a fresh block. Never truncate to EOF. + let stripped = strip_start_marker_line(content); + append_block(&stripped, &block) + } + }, + }; + + if existing.as_deref() == Some(new_content.as_str()) { + return Ok(false); + } + atomic_write(path, &new_content)?; + Ok(true) +} + +/// Replace `content[start..end]` with `block`, normalising so the replacement +/// span ends in exactly one newline (the original end-marker line may or may +/// not have carried one at EOF). +fn splice_span(content: &str, start: usize, end: usize, block: &str) -> String { + let mut out = String::with_capacity(content.len()); + out.push_str(&content[..start]); + out.push_str(block); + let tail = &content[end..]; + // The located `end` is just past the end-marker line's trailing newline (if + // present). Preserve whatever followed it verbatim; guarantee a newline + // between our end marker and that tail when the tail is non-empty. + if tail.is_empty() { + out.push('\n'); + } else { + out.push('\n'); + out.push_str(tail); + } + out +} + +/// Append `block` to `content`, separated by a blank line, with a trailing +/// newline. `content`'s existing bytes are preserved verbatim. +fn append_block(content: &str, block: &str) -> String { + if content.is_empty() { + return format!("{block}\n"); + } + let sep = if content.ends_with("\n\n") { + "" + } else if content.ends_with('\n') { + "\n" + } else { + "\n\n" + }; + format!("{content}{sep}{block}\n") +} + +/// Remove **every** line whose trimmed form starts with [`START_PREFIX`]. +/// Every other byte — including any orphaned body that followed it — is kept. +/// +/// This is only reached from the [`Span::Malformed`] branch, where +/// [`locate_span`] returned `Malformed` precisely because no end marker follows +/// the first start marker — so *every* start marker in the file is orphaned by +/// definition. Stripping only the first would leave a second dangling start +/// behind; on the next install/doctor run [`locate_span`] would pair that +/// leftover orphan with the freshly-appended block's end marker, forming a +/// well-formed span that engulfs (and deletes) everything between — including a +/// co-resident Filigree block. Removing all orphan starts converges in one pass +/// and never eats a neighbouring tool's block. +fn strip_start_marker_line(content: &str) -> String { + let mut out = String::with_capacity(content.len()); + for line in content.split_inclusive('\n') { + if line.trim().starts_with(START_PREFIX) { + continue; + } + out.push_str(line); + } + out +} + +/// Reject a symlinked target so temp+rename never silently converts a link into +/// a regular file. A non-existent path is fine (we create it). +fn reject_symlink(path: &Path) -> Result<()> { + match fs::symlink_metadata(path) { + Ok(meta) if meta.file_type().is_symlink() => { + bail!( + "refusing to write {}: it is a symlink (resolve it by hand, then re-run)", + path.display() + ); + } + Ok(_) => Ok(()), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()), + Err(err) => Err(err).with_context(|| format!("stat {}", path.display())), + } +} + +/// Atomically write `content` to `path`: stage into a sibling temp file in the +/// same directory (so `rename` stays on one filesystem), preserve the existing +/// file mode when the target already exists, then `rename` over the target. +fn atomic_write(path: &Path, content: &str) -> Result<()> { + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + fs::create_dir_all(parent).with_context(|| format!("mkdir {}", parent.display()))?; + + let file_name = path.file_name().map_or_else( + || "instructions".to_owned(), + |n| n.to_string_lossy().into_owned(), + ); + let temp_path: PathBuf = parent.join(format!( + ".{}.loomweave.tmp-{}", + file_name, + std::process::id() + )); + + // Cleanup guard: drop the staged temp file if any step after creating it + // fails, so a failed write never leaks a `.tmp-*` sibling. + if let Err(err) = write_temp_then_rename(&temp_path, path, content) { + let _ = fs::remove_file(&temp_path); + return Err(err); + } + Ok(()) +} + +fn write_temp_then_rename(temp_path: &Path, path: &Path, content: &str) -> Result<()> { + fs::write(temp_path, content).with_context(|| format!("write {}", temp_path.display()))?; + #[cfg(unix)] + preserve_mode(path, temp_path)?; + fs::rename(temp_path, path) + .with_context(|| format!("rename {} -> {}", temp_path.display(), path.display()))?; + Ok(()) +} + +/// Copy the existing file's permission bits onto the staged temp file so the +/// rename preserves mode. A no-op when the target does not yet exist. +#[cfg(unix)] +fn preserve_mode(path: &Path, temp_path: &Path) -> Result<()> { + use std::os::unix::fs::PermissionsExt; + + let Ok(meta) = fs::metadata(path) else { + return Ok(()); + }; + let mode = meta.permissions().mode(); + fs::set_permissions(temp_path, fs::Permissions::from_mode(mode)) + .with_context(|| format!("preserve mode on {}", temp_path.display())) +} + +#[cfg(test)] +mod tests { + use super::{ + END_MARKER, INSTRUCTIONS_BODY, InstructionsState, START_PREFIX, canonical_body, + install_instructions, instructions_state, render_block, + }; + + /// A representative Filigree two-block neighbour, taken verbatim in shape + /// from this repo's own `AGENTS.md`. The coexistence tests assert these + /// bytes survive every operation untouched. + const FILIGREE_BLOCK: &str = "\n\ +## Filigree Issue Tracker\n\ +\n\ +filigree tracks tasks for this project.\n\ +\n"; + + #[test] + fn asset_is_thin_and_pointer_shaped() { + // The plan caps the always-loaded body at ~15-25 lines: a pointer, not + // a manual. Guard against it growing into a second skill. + let lines = INSTRUCTIONS_BODY.lines().count(); + assert!( + lines <= 30, + "instructions body grew to {lines} lines; keep it thin (a pointer)" + ); + assert!(INSTRUCTIONS_BODY.contains("mcp__loomweave__")); + assert!(INSTRUCTIONS_BODY.contains("loomweave-workflow")); + } + + #[test] + fn start_prefix_is_not_a_prefix_of_end_marker() { + // Detection keys the start on START_PREFIX and the end on an exact + // END_MARKER line; the `/` keeps the end marker from matching the start + // prefix. Pin that invariant. + assert!(!END_MARKER.starts_with(START_PREFIX)); + } + + #[test] + fn render_round_trips_to_canonical_body() { + let block = render_block(); + assert!(block.starts_with(START_PREFIX)); + assert!(block.ends_with(END_MARKER)); + // Wrapping the rendered block in a file and re-extracting must yield the + // canonical body, or idempotency breaks (install -> Drifted -> "fix" + // every run). + let file = format!("prefix\n\n{block}\n"); + let state = super::locate_span(&file); + match state { + super::Span::WellFormed { body, .. } => assert_eq!(body, canonical_body()), + _ => panic!("rendered block did not locate as well-formed"), + } + } + + #[test] + fn create_when_no_file() { + let dir = tempfile::tempdir().unwrap(); + let report = install_instructions(dir.path()).unwrap(); + assert!(report.changed, "first install should write"); + for name in ["CLAUDE.md", "AGENTS.md"] { + let body = std::fs::read_to_string(dir.path().join(name)).unwrap(); + assert!(body.starts_with(START_PREFIX), "{name} missing block"); + assert!(body.trim_end().ends_with(END_MARKER)); + } + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + } + + #[test] + fn install_is_idempotent() { + let dir = tempfile::tempdir().unwrap(); + assert!(install_instructions(dir.path()).unwrap().changed); + let second = install_instructions(dir.path()).unwrap(); + assert!( + !second.changed, + "second install must be a no-op on byte-identical body" + ); + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + } + + #[test] + fn append_preserves_prior_content() { + let dir = tempfile::tempdir().unwrap(); + let prior = "# Project notes\n\nSome existing prose.\n"; + for name in ["CLAUDE.md", "AGENTS.md"] { + std::fs::write(dir.path().join(name), prior).unwrap(); + } + assert!(install_instructions(dir.path()).unwrap().changed); + for name in ["CLAUDE.md", "AGENTS.md"] { + let body = std::fs::read_to_string(dir.path().join(name)).unwrap(); + assert!(body.starts_with(prior), "prior content not preserved"); + assert!(body.contains(START_PREFIX)); + } + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + } + + #[test] + fn replace_rewrites_on_drift_only() { + let dir = tempfile::tempdir().unwrap(); + install_instructions(dir.path()).unwrap(); + // Hand-edit the body inside the Loomweave span on one file. + let claude = dir.path().join("CLAUDE.md"); + let content = std::fs::read_to_string(&claude).unwrap(); + let drifted = content.replace("code archaeology", "DRIFTED HEADER"); + assert_ne!(drifted, content, "test setup: substitution must apply"); + std::fs::write(&claude, &drifted).unwrap(); + assert_eq!(instructions_state(dir.path()), InstructionsState::Drifted); + + let report = install_instructions(dir.path()).unwrap(); + assert!(report.changed, "drift must trigger a rewrite"); + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + } + + #[test] + fn state_missing_before_install() { + let dir = tempfile::tempdir().unwrap(); + assert_eq!(instructions_state(dir.path()), InstructionsState::Missing); + } + + #[test] + fn state_missing_when_one_file_lacks_block() { + let dir = tempfile::tempdir().unwrap(); + install_instructions(dir.path()).unwrap(); + // Remove the block from AGENTS.md entirely. + std::fs::write(dir.path().join("AGENTS.md"), "# just notes\n").unwrap(); + assert_eq!(instructions_state(dir.path()), InstructionsState::Missing); + } + + /// The headline coexistence guarantee: a file pre-seeded with a Filigree + /// block survives create / append / replace / malformed round-trips with + /// Filigree's bytes untouched. + #[test] + fn filigree_block_survives_every_operation() { + let dir = tempfile::tempdir().unwrap(); + let claude = dir.path().join("CLAUDE.md"); + let agents = dir.path().join("AGENTS.md"); + + // Seed both files with only the Filigree block (the append/create case). + std::fs::write(&claude, FILIGREE_BLOCK).unwrap(); + std::fs::write(&agents, FILIGREE_BLOCK).unwrap(); + + // 1. Append: Loomweave block added, Filigree bytes intact. + install_instructions(dir.path()).unwrap(); + for path in [&claude, &agents] { + let body = std::fs::read_to_string(path).unwrap(); + assert!( + body.contains(FILIGREE_BLOCK), + "filigree block lost on append" + ); + assert!(body.contains(START_PREFIX), "loomweave block missing"); + } + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + + // 2. Replace (drift): edit the Loomweave body; Filigree still survives. + let content = std::fs::read_to_string(&claude).unwrap(); + let drifted = content.replace("code archaeology", "EDITED"); + std::fs::write(&claude, &drifted).unwrap(); + assert_eq!(instructions_state(dir.path()), InstructionsState::Drifted); + install_instructions(dir.path()).unwrap(); + let repaired = std::fs::read_to_string(&claude).unwrap(); + assert!( + repaired.contains(FILIGREE_BLOCK), + "filigree block lost on drift repair" + ); + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + + // 3. Malformed (dangling Loomweave start marker, with the Filigree block + // present): repair must NOT truncate to EOF and eat Filigree. + let dangling = format!( + "{FILIGREE_BLOCK}\n\nstale orphan body\n" + ); + std::fs::write(&agents, &dangling).unwrap(); + assert_eq!(instructions_state(dir.path()), InstructionsState::Malformed); + install_instructions(dir.path()).unwrap(); + let fixed = std::fs::read_to_string(&agents).unwrap(); + assert!( + fixed.contains(FILIGREE_BLOCK), + "filigree block eaten by dangling-marker repair" + ); + assert!( + fixed.contains("stale orphan body"), + "orphaned body should be left as loose prose, not deleted" + ); + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + } + + /// Regression: two dangling Loomweave start markers (no intervening end + /// marker) co-resident with a Filigree block. The Malformed-branch repair + /// must strip BOTH orphan starts, not just the first — otherwise the leftover + /// orphan re-pairs with the freshly-appended block's end marker on a later + /// run, forming a well-formed span that engulfs and deletes the Filigree + /// block (silent data loss) and never converges. Asserts (a) Filigree bytes + /// survive and (b) the repair reaches a fixed point in a single pass. + #[test] + fn two_dangling_starts_with_filigree_block_converge_in_one_pass() { + let dir = tempfile::tempdir().unwrap(); + let agents = dir.path().join("AGENTS.md"); + // Give the other target file a clean block so AGENTS.md is the only + // malformed file driving the aggregate state. + install_instructions(dir.path()).unwrap(); + + // AGENTS.md: TWO dangling loomweave start markers (no end marker + // between them) sitting BEFORE the Filigree block (bad copy-paste / + // merge artifact). The ordering is load-bearing: it puts the leftover + // orphan start on the near side of the Filigree block, so the buggy + // strip-first path leaves an orphan that — on the next run — pairs with + // the appended block's end marker and engulfs (deletes) the Filigree + // bytes. Assertion (a) below then fails on the unfixed code, exercising + // the literal data-loss mechanism, not merely non-convergence. + let doubled = format!( + "\n\ + first orphan body\n\ + \n\ + second orphan body\n\ + \n\ + {FILIGREE_BLOCK}" + ); + std::fs::write(&agents, &doubled).unwrap(); + assert_eq!(instructions_state(dir.path()), InstructionsState::Malformed); + + // (a) Drive repeated install passes — the way `doctor --fix` runs over + // a project's lifetime. The data-loss mechanism only fires on the SECOND + // pass: the buggy strip-first repair leaves an orphan start that + // `locate_span` then pairs with pass-1's appended end marker, forming a + // well-formed span that engulfs the Filigree block, so pass 2's splice + // deletes it. Assert the Filigree bytes survive after EVERY pass, so the + // literal deletion is the load-bearing failure on the unfixed code. + for pass in 1..=3 { + install_instructions(dir.path()).unwrap(); + let after = std::fs::read_to_string(&agents).unwrap(); + assert!( + after.contains(FILIGREE_BLOCK), + "filigree block eaten by two-dangling-start repair on pass {pass}" + ); + } + + // (b) The repair reaches a fixed point: a single pass from Malformed must + // converge to UpToDate (not "repair did not converge"), and further + // passes are no-ops. + std::fs::write(&agents, &doubled).unwrap(); + install_instructions(dir.path()).unwrap(); + assert_eq!( + instructions_state(dir.path()), + InstructionsState::UpToDate, + "two-dangling-start repair must reach a fixed point in a single pass" + ); + let second = install_instructions(dir.path()).unwrap(); + assert!( + !second.changed, + "repaired file must be a stable fixed point (no further rewrite)" + ); + + let fixed = std::fs::read_to_string(&agents).unwrap(); + assert!( + fixed.contains(FILIGREE_BLOCK), + "filigree block must survive the converged repair" + ); + // Both orphaned bodies survive as loose prose; no bytes outside our span lost. + assert!(fixed.contains("first orphan body")); + assert!(fixed.contains("second orphan body")); + // Exactly one well-formed start marker remains (the appended block). + assert_eq!( + fixed.matches(START_PREFIX).count(), + 1, + "exactly one start marker must remain after stripping both orphans" + ); + } + + #[test] + fn dangling_start_marker_is_malformed_then_repaired() { + let dir = tempfile::tempdir().unwrap(); + let claude = dir.path().join("CLAUDE.md"); + let agents = dir.path().join("AGENTS.md"); + // One file gets a clean block so only the dangling file is malformed. + install_instructions(dir.path()).unwrap(); + std::fs::write( + &claude, + "# notes\n\norphan body, no end marker\n", + ) + .unwrap(); + let _ = &agents; + assert_eq!(instructions_state(dir.path()), InstructionsState::Malformed); + + install_instructions(dir.path()).unwrap(); + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + let fixed = std::fs::read_to_string(&claude).unwrap(); + assert!(fixed.contains("# notes"), "leading content eaten"); + assert!( + fixed.contains("orphan body, no end marker"), + "orphan body should survive as loose prose" + ); + // Exactly one well-formed start marker remains. + assert_eq!(fixed.matches(START_PREFIX).count(), 1); + } + + #[cfg(unix)] + #[test] + fn symlink_target_is_rejected() { + use std::os::unix::fs::symlink; + let dir = tempfile::tempdir().unwrap(); + let real = dir.path().join("real.md"); + std::fs::write(&real, "real contents\n").unwrap(); + symlink(&real, dir.path().join("CLAUDE.md")).unwrap(); + let err = install_instructions(dir.path()).unwrap_err(); + assert!( + err.to_string().contains("symlink") + || err.chain().any(|c| c.to_string().contains("symlink")), + "expected a symlink rejection, got: {err}" + ); + } + + #[cfg(unix)] + #[test] + fn atomic_write_preserves_mode() { + use std::os::unix::fs::PermissionsExt; + let dir = tempfile::tempdir().unwrap(); + let claude = dir.path().join("CLAUDE.md"); + std::fs::write(&claude, "# notes\n").unwrap(); + std::fs::set_permissions(&claude, std::fs::Permissions::from_mode(0o640)).unwrap(); + install_instructions(dir.path()).unwrap(); + let mode = std::fs::metadata(&claude).unwrap().permissions().mode() & 0o777; + assert_eq!(mode, 0o640, "file mode not preserved across rewrite"); + } +} diff --git a/crates/loomweave-cli/src/main.rs b/crates/loomweave-cli/src/main.rs index 43c75988..47ed6b57 100644 --- a/crates/loomweave-cli/src/main.rs +++ b/crates/loomweave-cli/src/main.rs @@ -10,6 +10,7 @@ mod hooks_settings; mod http_read; mod install; mod instance; +mod instructions; mod integration_bindings; mod mcp_registration; mod run_lifecycle; @@ -42,6 +43,7 @@ fn main() -> Result<()> { skills, codex_skills, hooks, + instructions, all, } => { let mut components = Vec::new(); @@ -60,6 +62,9 @@ fn main() -> Result<()> { if hooks { components.push(install::InstallComponent::Hooks); } + if instructions { + components.push(install::InstallComponent::Instructions); + } install::run( &path, force, diff --git a/crates/loomweave-cli/tests/doctor.rs b/crates/loomweave-cli/tests/doctor.rs index 5a389027..92266329 100644 --- a/crates/loomweave-cli/tests/doctor.rs +++ b/crates/loomweave-cli/tests/doctor.rs @@ -169,6 +169,7 @@ fn doctor_fix_repairs_missing_three_way_integration_bindings() { "--codex-skills", "--hooks", "--claude-code", + "--instructions", ], dir.path(), ); @@ -392,6 +393,174 @@ fn doctor_flags_untrusted_mcp_command_without_clobbering_it() { ); } +/// Instructions severity model (plan decision #2, the product-judgment veto +/// point): `Missing` is a non-gating **warning** — the same guidance ships via +/// the MCP preamble and the loomweave-workflow skill, so a project that omits +/// the always-loaded block is still first-class. A fresh `--all` install holds +/// the block; deleting it from one target file drives the aggregate to Missing, +/// which must surface as a warning and still exit 0. +#[test] +fn doctor_reports_missing_instructions_block_as_warning() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + // Drop the Loomweave block from one target file -> aggregate is Missing. + fs::write(dir.path().join("AGENTS.md"), "# just notes\n").unwrap(); + + let (code, out) = doctor(dir.path(), false); + assert_eq!( + code, 0, + "a missing instructions block is an optional surface; must NOT fail the gate:\n{out}" + ); + assert!( + out.contains("⚠ agent-orientation block missing from CLAUDE.md / AGENTS.md"), + "missing block should surface as a warning:\n{out}" + ); + + // --fix re-injects the block; a plain re-run is then clean. + let (code, out) = doctor(dir.path(), true); + assert_eq!(code, 0, "--fix should repair and exit 0:\n{out}"); + assert!( + out.contains("agent-orientation block missing from CLAUDE.md / AGENTS.md — fixed"), + "stdout:\n{out}" + ); + let (code, _) = doctor(dir.path(), false); + assert_eq!(code, 0, "repaired project must be healthy on re-run"); +} + +/// `Drifted` -> **problem**: a stale block body fails the gate without `--fix` +/// and is auto-repaired with `--fix`. This pins the one branch that actually +/// gates the doctor exit code; a refactor flipping Drifted to a warning would +/// otherwise pass the suite undetected. +#[test] +fn doctor_reports_drifted_instructions_block_as_gating_problem() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + // Hand-edit the body inside the Loomweave span -> Drifted. + let claude = dir.path().join("CLAUDE.md"); + let content = fs::read_to_string(&claude).unwrap(); + let drifted = content.replace("code archaeology", "DRIFTED HEADER"); + assert_ne!(drifted, content, "test setup: substitution must apply"); + fs::write(&claude, &drifted).unwrap(); + + let (code, out) = doctor(dir.path(), false); + assert_eq!( + code, 1, + "a drifted instructions block must FAIL the doctor gate without --fix:\n{out}" + ); + assert!( + out.contains("agent-orientation block drifted from the bundled copy"), + "stdout:\n{out}" + ); + + let (code, out) = doctor(dir.path(), true); + assert_eq!(code, 0, "--fix should repair drift and exit 0:\n{out}"); + assert!( + out.contains("agent-orientation block drifted from the bundled copy — fixed"), + "stdout:\n{out}" + ); + let (code, _) = doctor(dir.path(), false); + assert_eq!(code, 0, "repaired project must be healthy on re-run"); +} + +/// `Malformed` -> **problem**: a dangling Loomweave start marker (no following +/// end marker) fails the gate without `--fix`, and `--fix` repairs it without +/// truncating to EOF. +#[test] +fn doctor_reports_malformed_instructions_block_as_gating_problem() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + // Replace one target file's block with a dangling start marker. + fs::write( + dir.path().join("CLAUDE.md"), + "# notes\n\norphan body, no end marker\n", + ) + .unwrap(); + + let (code, out) = doctor(dir.path(), false); + assert_eq!( + code, 1, + "a malformed instructions block must FAIL the doctor gate without --fix:\n{out}" + ); + assert!( + out.contains("agent-orientation block malformed (dangling loomweave marker)"), + "stdout:\n{out}" + ); + + let (code, out) = doctor(dir.path(), true); + assert_eq!( + code, 0, + "--fix should repair the malformed block and exit 0:\n{out}" + ); + let fixed = fs::read_to_string(dir.path().join("CLAUDE.md")).unwrap(); + assert!( + fixed.contains("# notes"), + "leading content must survive the repair:\n{fixed}" + ); + assert!( + fixed.contains("orphan body, no end marker"), + "orphaned body must survive as loose prose:\n{fixed}" + ); + let (code, _) = doctor(dir.path(), false); + assert_eq!(code, 0, "repaired project must be healthy on re-run"); +} + +/// JSON surface: pin the `instructions.block` check shape. Healthy install -> +/// status `ok`, `fixed: false`; a drifted block -> status `problem` and the run +/// aggregates to `ok: false`. The healthy-install json shape test omits this +/// check, leaving the status string and `fixed` flag unverified. +#[test] +fn doctor_json_reports_instructions_block_check_shape() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + + // Healthy: instructions.block is ok, not fixed. + let (code, json) = doctor_json(dir.path(), false); + assert_eq!(code, 0, "healthy install should exit 0: {json}"); + let check = json["checks"] + .as_array() + .unwrap() + .iter() + .find(|c| c["id"] == "instructions.block") + .expect("instructions.block check present"); + assert_eq!(check["status"], "ok"); + assert_eq!(check["fixed"], serde_json::json!(false)); + + // Drift the block -> the json check becomes a problem and ok aggregates to false. + let claude = dir.path().join("CLAUDE.md"); + let content = fs::read_to_string(&claude).unwrap(); + fs::write( + &claude, + content.replace("code archaeology", "DRIFTED HEADER"), + ) + .unwrap(); + + let (code, json) = doctor_json(dir.path(), false); + assert_eq!(code, 1, "a drifted block must fail the json gate: {json}"); + assert_eq!( + json["ok"], false, + "an instructions-driven problem must make the run not ok: {json}" + ); + let check = json["checks"] + .as_array() + .unwrap() + .iter() + .find(|c| c["id"] == "instructions.block") + .expect("instructions.block check present"); + assert_eq!(check["status"], "problem"); + + // --fix repairs it: status becomes fixed. + let (code, json) = doctor_json(dir.path(), true); + assert_eq!(code, 0, "--fix json should repair and exit 0: {json}"); + let check = json["checks"] + .as_array() + .unwrap() + .iter() + .find(|c| c["id"] == "instructions.block") + .expect("instructions.block check present"); + assert_eq!(check["status"], "fixed"); + assert_eq!(check["fixed"], serde_json::json!(true)); +} + #[test] fn doctor_reports_published_ephemeral_port() { let dir = tempfile::tempdir().unwrap(); diff --git a/docs/implementation/instructions-injection-plan.md b/docs/implementation/instructions-injection-plan.md new file mode 100644 index 00000000..e95cc085 --- /dev/null +++ b/docs/implementation/instructions-injection-plan.md @@ -0,0 +1,224 @@ +# Plan: inject Loomweave agent-orientation guidance into CLAUDE.md / AGENTS.md + +Status: proposal / sketch (2026-06-06) +Scope: `loomweave install` (write) + `loomweave doctor [--fix]` (verify/repair) + +## Problem + +An agent that opens a Loomweave-indexed repo only learns how to use Loomweave +by *pulling* a surface: the MCP server's instructions preamble, or the +`loomweave-workflow` skill. Neither is in the always-loaded `CLAUDE.md` / +`AGENTS.md` context. Filigree already solves the same problem by *pushing* a +managed marker-block into those files (`inject_instructions`, +`src/filigree/install.py`). Loomweave does not — confirmed: a full grep of the +source for `CLAUDE.md`/`AGENTS.md` returns zero hits; the marker blocks in this +repo's own `CLAUDE.md`/`AGENTS.md` are Filigree's +(``). + +This plan adds the equivalent for Loomweave, slotting into the existing +orientation-surface machinery rather than inventing new structure. + +## Design (mirrors the established surface pattern) + +Every orientation surface in `loomweave-cli` already follows one shape, and the +new surface adopts it verbatim: + +| Surface | state query | idempotent installer (= `--fix` repair) | doctor checks | +|---|---|---|---| +| skill pack | `skill_pack::skill_pack_state` → `SkillPackState` | `install_skill_pack` | `check_skill` / `check_skill_json` | +| hook | `hooks_settings::…` → `HookState` | `install_session_start_hook` | `check_hook*` | +| MCP | `mcp_registration::…` → `McpState` | `install_mcp_entry` | `check_mcp*` | +| bindings | `integration_bindings::binding_state` → `BindingState` | `install_bindings` | `check_integration_bindings*` | +| **instructions (new)** | `instructions::instructions_state` → `InstructionsState` | `install_instructions` | `check_instructions*` | + +### New module: `crates/loomweave-cli/src/instructions.rs` + +Embeds one asset and manages a marker-block in two files at the project root: +`CLAUDE.md` and `AGENTS.md`. + +```rust +// Embedded, cli-local (no MCP owner exists for this asset, unlike the skill). +const INSTRUCTIONS_BODY: &str = + include_str!("../assets/instructions/loomweave.md"); + +// Loomweave's OWN marker namespace — must coexist with Filigree's block in the +// same file. Never collides with, reads, or edits ``. +const START_PREFIX: &str = ""; + +// Provenance only (human-readable); NOT the drift signal. See "Drift" below. +fn start_marker() -> String { + format!("", + env!("CARGO_PKG_VERSION"), body_hash_prefix()) +} + +const TARGET_FILES: &[&str] = &["CLAUDE.md", "AGENTS.md"]; +``` + +The rendered block is: + +``` + +…INSTRUCTIONS_BODY… + +``` + +## Five decisions baked in (do not regress these) + +### 1. SAFE marker recovery — do NOT copy Filigree's truncate-to-EOF + +Filigree's malformed-recovery (`install.py`: start marker present, end marker +missing → `content[:start] + INSTRUCTIONS`) **truncates from the start marker +to EOF**. That is only safe when the tool owns the tail of the file. Loomweave +*never* owns the tail: this repo's `AGENTS.md` already holds Filigree's block at +lines 1–119, so every file Loomweave writes is a **two-block file**. Copying +Filigree's recovery verbatim would let a dangling Loomweave start marker eat +Filigree's block (and vice-versa). + +Rule for `install_instructions`: + +- **Replace** only when BOTH `START_PREFIX` and a following `END_MARKER` are + found and well-ordered (`end > start`). Replace exactly that span; touch no + byte outside it. +- **Append** when neither marker is present. +- **Dangling start marker** (start present, no following end): do **not** + truncate to EOF. Treat as malformed → `install` strips only the orphaned + marker line and re-appends a fresh well-formed block; `doctor` without + `--fix` reports it as a **problem** (see #2). Never delete bytes outside the + Loomweave span. +- Atomic write (temp + rename, preserve mode) and symlink rejection, matching + Filigree's `_atomic_write_text` / `reject_symlink` and Loomweave's existing + atomic-write convention. + +Guard test (mandatory): inject into a file that already contains a Filigree +block, and assert **both blocks survive** create / append / replace / malformed +round-trips. + +### 2. Severity = `integration_bindings` model, not `skill_pack` model + +The how-to-use guidance is already delivered twice (MCP preamble + skill); the +CLAUDE.md block is a redundant always-on *push*. A project that omits it is +still first-class. Therefore: + +- `Missing` → **warning** (surfaced, suggests `--fix`; does NOT fail the + `doctor` gate). +- `Unparseable` / malformed / dangling-marker → **problem** (fails the gate; + this is the "genuinely broken, needs a human" case, and it composes with #1 — + we never auto-truncate an ambiguous block). +- `Drifted` → **problem** when `--fix` is absent, auto-repaired with `--fix` + (parity with skill pack's drift handling; safe because the repair only + rewrites Loomweave's own span). + +> **User veto point.** This is the one product-judgment call. If we'd rather +> treat the block as a first-class surface (Missing = problem, gate fails), +> flip `Missing` to problem. Recommended: warning. + +### 3. Drift signal = block-body content hash, not the marker version + +If the marker version string were the drift signal, every workspace version +bump (`v1.1.0-rc2` → next) would make `doctor` report "drifted" on byte-for-byte +identical content. `skill_pack` already avoids this: its blake3 fingerprint is +the drift signal and the version is "informational only." Mirror it — compare +the **extracted block-body bytes** against `INSTRUCTIONS_BODY`; keep +`v{version}` in the marker as provenance only. + +### 4. Concurrent session-start refresh race — accepted, not engineered around + +If the session-start hook re-injects on every start (as Filigree does), two +sessions race read-modify-write on the same files. Steady-state this is +harmless: each tool's refresh is deterministic, so a lost write reproduces +identical bytes next session. The only corruption risk was the truncation in +#1, already removed. Decision: **do not re-inject from the session-start hook**; +injection happens on `install` and `doctor --fix` only. Note the race as +accepted; no cross-tool lock. + +### 5. One flag, both files + +Add a single `--instructions` component (match `skill_pack`'s one-flag-both-roots +ergonomics, not Filigree's two-flag `--claude-md`/`--agents-md` split). It +writes both `CLAUDE.md` and `AGENTS.md`. + +## Content of the embedded asset (keep it THIN) + +`crates/loomweave-cli/assets/instructions/loomweave.md` — deliberately shorter +than Filigree's ~120 lines, because it is always-loaded context competing with +the skill that says the same thing. Target ~15–25 lines: a pointer, not a +manual. Sketch: + +```markdown +## Loomweave (code archaeology) + +This repo is indexed by Loomweave. Before grepping or re-reading the tree to +answer "what calls X", "where is X defined", "what subsystem owns X", or "find +the thing that does Y" — ask Loomweave's MCP tools (`mcp__loomweave__*`): +`entity_find`, `entity_at`, `entity_callers_list`, `entity_neighborhood_get`, +`project_status_get`. Entity IDs are `{plugin}:{kind}:{qualified_name}`. + +Index freshness and counts: `project_status_get` (or the `loomweave://context` +resource). If stale, run `loomweave analyze `. + +Full workflow: the `loomweave-workflow` skill. +``` + +(Final wording to track the MCP server instructions preamble so the two don't +drift apart in tone.) + +## Wiring changes (exact insertion points) + +1. **`cli.rs`** — add `InstallComponent::Instructions` and an `--instructions` + flag to the `Install` subcommand args (alongside `--skills`/`--hooks`). +2. **`install.rs`** + - `InstallPlan::Components` — add `instructions: bool` field. + - `from_components` — populate it from `InstallComponent::Instructions`. + - add `InstallPlan::instructions(self) -> bool` (true for `All` and the + component). + - `validate_plan` — include `instructions()` in the do-nothing guard. + - `run()` — `if plan.instructions() { install_instructions(&project_root)?; }` + plus an `install_instructions` wrapper printing changed/up-to-date, in the + same style as `install_claude_skills`. + - Naked `install` (`InstallPlan::All`) therefore writes the blocks by default. +3. **`doctor.rs`** + - `use crate::instructions::{self, InstructionsState};` + - text path: `tally += check_instructions(&project_root, fix);` in `run()`, + plus `fn check_instructions` mirroring `check_skill`. + - json path: add `check_instructions_json(project_root, fix)` to the + `json_report` `checks` vec. + - `next_actions` map: add + `"instructions.block" => "Run \`loomweave doctor --fix\` or \`loomweave install --instructions\`."`. +4. **`main.rs`** — route the new component flag into `from_components` (follows + the existing component plumbing; no new branch logic). + +## Tests + +- `instructions.rs` unit tests: + - create (no file) → file created with one well-formed block. + - append (file without marker) → block appended, prior content intact. + - replace (file with current marker) → idempotent no-op when body matches; + rewrite when body differs (drift). + - **coexistence**: file pre-seeded with a Filigree block → after every + operation, BOTH blocks present and Filigree's bytes untouched. + - dangling start marker → repaired without eating any other bytes; reported + `problem` without `--fix`. + - symlink target rejected. +- `doctor.rs`: `InstructionsState` → severity mapping (Missing=warning, + Unparseable/dangling=problem, Drifted=problem-without-fix / fixed-with-fix); + `--fix` converges to `UpToDate`; json `ok` flag reflects only problems. +- e2e: extend an install smoke script to assert the block lands in CLAUDE.md and + `doctor` reports it healthy. + +## Suite-level follow-up (out of band, worth flagging) + +The "each Weft tool owns `` and edits only its own +span" rule is a **suite contract**, not a Loomweave-local detail — Filigree's +current truncate-to-EOF recovery violates it and can eat Loomweave's block. +Recommend a short ADR (or a line in `docs/suite/weft.md`, already cited by +`doctor.rs`) capturing the contract, and a matching fix to Filigree's +`inject_instructions` so both tools stop being able to corrupt each other. + +## Effort + +Small–medium. One new module (~150 lines + tests) modelled line-for-line on +`skill_pack.rs`, one embedded asset, and ~6 mechanical insertion points across +`cli.rs` / `install.rs` / `doctor.rs` / `main.rs`. No schema, no migration, no +new dependency (blake3 + tempfile already in the tree). +``` From af435c4a50b8af2c0d8d4951ff9671f36313db71 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 22:49:09 +1000 Subject: [PATCH 28/60] fix(plugin): leave RLIMIT_NPROC uncapped for pyright-runtime plugins; retire PYRIGHT_MAX_NPROC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RLIMIT_NPROC is enforced by the kernel against every process/thread owned by the real UID system-wide, not against a plugin's descendant tree. Any fixed ceiling low enough to stop a fork-bomb is also low enough to fail a legitimate fork(2) with EAGAIN once the operator's unrelated processes (an interactive session, other Weft daemons) push the per-UID count past it — which intermittently broke pyright-langserver on busy workstations. `effective_max_nproc` now returns `None` (no RLIMIT_NPROC cap) for plugins declaring the `pyright` runtime capability and `Some(DEFAULT_MAX_NPROC)` otherwise; `apply_prlimit_nofile_nproc` threads the `Option` through and skips the RLIMIT_NPROC setrlimit when `None`. Language-server plugins rely on RLIMIT_AS + crash-loop supervision instead; cgroup v2 `pids.max` is the documented path for a true per-plugin process ceiling. - ADR-021: process-count control limitation under Alternative 4 + residual-risk note. - ADR-035: retire PYRIGHT_MAX_NPROC from the tuning-constant inventory. - plugin.toml: document why the pyright runtime sub-table disables the NPROC cap. - pyright_session.py + test: language-server session adjustments. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-core/src/plugin/host.rs | 39 ++++-- crates/loomweave-core/src/plugin/limits.rs | 23 ++- .../adr/ADR-021-plugin-authority-hybrid.md | 3 + .../ADR-035-operational-tuning-discipline.md | 2 +- plugins/python/plugin.toml | 5 + .../pyright_session.py | 79 +++++++++-- plugins/python/tests/test_pyright_session.py | 132 +++++++++++++++++- 7 files changed, 254 insertions(+), 29 deletions(-) diff --git a/crates/loomweave-core/src/plugin/host.rs b/crates/loomweave-core/src/plugin/host.rs index 90c8df35..73d623a9 100644 --- a/crates/loomweave-core/src/plugin/host.rs +++ b/crates/loomweave-core/src/plugin/host.rs @@ -102,23 +102,31 @@ pub const MAX_FINDING_SEVERITY_BYTES: usize = 32; /// payload is <2 KiB) while rejecting payload floods. pub const MAX_ENTITY_EXTRA_BYTES: usize = 64 * 1024; -/// Pyright's Node-based language server spawns helper threads/processes and -/// inherits the plugin child's `RLIMIT_NPROC`. On Linux that limit is checked -/// against all processes/threads for the user, not just descendants of the -/// plugin, so the Sprint-1 single-plugin ceiling is too low for B.4* call -/// resolution on ordinary developer workstations. +/// The `RLIMIT_NPROC` ceiling to apply to a plugin child, or `None` to leave +/// `RLIMIT_NPROC` uncapped. +/// +/// Plugins that declare the `pyright` runtime capability spawn a Node-based +/// language server, which itself spawns many helper threads/processes that +/// inherit the plugin child's `RLIMIT_NPROC`. On Linux that limit is checked +/// against **all** processes/threads for the real UID — not just descendants of +/// the plugin — so it cannot bound a single plugin without being tripped by the +/// user's unrelated processes (an interactive session, other Weft daemons, …). +/// Any fixed ceiling low enough to stop a fork-bomb is therefore also low enough +/// to fail a legitimate `fork(2)` with `EAGAIN` on a busy workstation. We leave +/// `RLIMIT_NPROC` uncapped for language-server plugins and rely on `RLIMIT_AS` +/// plus the host's crash-loop supervision instead. cgroups v2 `pids.max` is the +/// correct tool for a true per-plugin process ceiling (future work). +/// +/// Non-pyright plugins keep the [`DEFAULT_MAX_NPROC`] fork-bomb guard. // Used only from the Linux pre_exec limit path and from unit tests; gate // to match so other release builds don't see it as dead code under // `-D warnings`. #[cfg(any(target_os = "linux", test))] -const PYRIGHT_MAX_NPROC: u64 = 4096; - -#[cfg(any(target_os = "linux", test))] -fn effective_max_nproc(manifest: &Manifest) -> u64 { +fn effective_max_nproc(manifest: &Manifest) -> Option { if manifest.capabilities.runtime.pyright.is_some() { - PYRIGHT_MAX_NPROC + None } else { - DEFAULT_MAX_NPROC + Some(DEFAULT_MAX_NPROC) } } @@ -1561,12 +1569,15 @@ ontology_version = "0.1.0" } #[test] - fn pyright_runtime_raises_process_ceiling_for_language_server() { + fn pyright_runtime_leaves_process_ceiling_uncapped_for_language_server() { + // Non-pyright plugins keep the fork-bomb guard. assert_eq!( effective_max_nproc(&compliant_manifest()), - DEFAULT_MAX_NPROC + Some(DEFAULT_MAX_NPROC) ); - assert_eq!(effective_max_nproc(&pyright_manifest()), PYRIGHT_MAX_NPROC); + // Pyright plugins run with no RLIMIT_NPROC cap: the per-UID-global limit + // is the wrong tool for a language-server plugin (see effective_max_nproc). + assert_eq!(effective_max_nproc(&pyright_manifest()), None); } // ── Full end-to-end helper ──────────────────────────────────────────────── diff --git a/crates/loomweave-core/src/plugin/limits.rs b/crates/loomweave-core/src/plugin/limits.rs index 2741835c..5cc19d3c 100644 --- a/crates/loomweave-core/src/plugin/limits.rs +++ b/crates/loomweave-core/src/plugin/limits.rs @@ -306,7 +306,14 @@ pub fn apply_prlimit_as(max_rss_mib: u64) -> std::io::Result<()> { setrlimit(Resource::RLIMIT_AS, bytes, bytes).map_err(std::io::Error::from) } -/// Apply `RLIMIT_NOFILE` and `RLIMIT_NPROC` to the current process. +/// Apply `RLIMIT_NOFILE` (always) and `RLIMIT_NPROC` (when `max_nproc` is +/// `Some`) to the current process. +/// +/// `max_nproc` is `None` for plugins that spawn a language server: `RLIMIT_NPROC` +/// is a per-real-UID-global counter, so it cannot bound such a plugin without +/// being tripped by the user's unrelated processes (see +/// `host::effective_max_nproc`). Those plugins run with `RLIMIT_NOFILE` and +/// `RLIMIT_AS` only. /// /// Called from the same `pre_exec` closure as [`apply_prlimit_as`]. Same /// async-signal-safety notes apply — `setrlimit` is on the POSIX AS-safe @@ -318,11 +325,14 @@ pub fn apply_prlimit_as(max_rss_mib: u64) -> std::io::Result<()> { /// /// Returns `std::io::Error` on the first `setrlimit` failure. #[cfg(target_os = "linux")] -pub fn apply_prlimit_nofile_nproc(max_nofile: u64, max_nproc: u64) -> std::io::Result<()> { +pub fn apply_prlimit_nofile_nproc(max_nofile: u64, max_nproc: Option) -> std::io::Result<()> { use nix::sys::resource::{Resource, setrlimit}; setrlimit(Resource::RLIMIT_NOFILE, max_nofile, max_nofile).map_err(std::io::Error::from)?; - setrlimit(Resource::RLIMIT_NPROC, max_nproc, max_nproc).map_err(std::io::Error::from) + if let Some(max_nproc) = max_nproc { + setrlimit(Resource::RLIMIT_NPROC, max_nproc, max_nproc).map_err(std::io::Error::from)?; + } + Ok(()) } /// Non-Linux stub for [`apply_prlimit_nofile_nproc`]. @@ -330,7 +340,10 @@ pub fn apply_prlimit_nofile_nproc(max_nofile: u64, max_nproc: u64) -> std::io::R /// `nix` 0.28 does not expose `Resource::RLIMIT_NPROC` on macOS, so the real /// implementation stays restricted to Linux. #[cfg(not(target_os = "linux"))] -pub fn apply_prlimit_nofile_nproc(_max_nofile: u64, _max_nproc: u64) -> std::io::Result<()> { +pub fn apply_prlimit_nofile_nproc( + _max_nofile: u64, + _max_nproc: Option, +) -> std::io::Result<()> { Ok(()) } @@ -589,7 +602,7 @@ pub fn apply_prlimit_nofile_nproc" #[cfg(not(target_os = "linux"))] #[test] fn apply_prlimit_non_linux_stub_returns_ok() { - let result = apply_prlimit_nofile_nproc(DEFAULT_MAX_NOFILE, DEFAULT_MAX_NPROC); + let result = apply_prlimit_nofile_nproc(DEFAULT_MAX_NOFILE, Some(DEFAULT_MAX_NPROC)); assert!(result.is_ok()); } } diff --git a/docs/loomweave/adr/ADR-021-plugin-authority-hybrid.md b/docs/loomweave/adr/ADR-021-plugin-authority-hybrid.md index f2be850e..363486e5 100644 --- a/docs/loomweave/adr/ADR-021-plugin-authority-hybrid.md +++ b/docs/loomweave/adr/ADR-021-plugin-authority-hybrid.md @@ -112,6 +112,8 @@ Use cgroup v2 (`systemd-run --user --scope` or direct cgroup mounts) for per-plu **Why rejected**: incremental control richness is not worth the cross-platform surface area at v0.1 scale. +**Known limitation — process-count control.** `prlimit` cannot meaningfully bound a *single plugin's* process/thread count: `RLIMIT_NPROC` is enforced by the kernel against **every process/thread owned by the real UID system-wide**, not against the plugin's descendant tree. Any ceiling low enough to stop a fork-bomb is therefore also low enough to fail a legitimate `fork(2)` with `EAGAIN` once the operator's *unrelated* processes (an interactive session, other Weft daemons) push the per-UID count past it. Plugins that spawn a language server (e.g. the Python plugin's `pyright-langserver`, a Node process with many helper threads) are the acute case: the host therefore applies `RLIMIT_NPROC` only to plugins that do **not** declare the `pyright` runtime capability, and leaves it uncapped for those that do (relying on `RLIMIT_AS` + crash-loop supervision instead — see `host::effective_max_nproc`). cgroup v2 `pids.max` is the correct tool for a true per-plugin process ceiling and is the documented path if process-count bounding is ever required. + ## Consequences ### Positive @@ -127,6 +129,7 @@ Use cgroup v2 (`systemd-run --user --scope` or direct cgroup mounts) for per-plu - Plugin authors have one more contract to satisfy — the four limits are real and can bite a plugin that emits millions of noisy `LMWV-FACT-*` findings. Mitigation: the `expected_entities_per_file` manifest declaration produces a sanity-warning (`LMWV-INFRA-PLUGIN-ENTITY-OVERRUN-WARNING`) well before the hard cap, so the first sign of trouble isn't a killed plugin. - The `prlimit` approach doesn't cover RSS only — `RLIMIT_AS` caps virtual memory, which overcounts for plugins that `mmap` large file ranges (e.g., tree-sitter's incremental parse buffers). Mitigation: default cap of 2 GiB is generous enough that a well-behaved plugin won't trip it; operators on constrained hosts who do trip it get a specific finding subcode. - Full sandbox is deferred; a malicious plugin that stays under the four caps can still exfiltrate source to a network destination. This is a known v0.2 gap and is named in the "NOT in Layer 2" list and in v0.1 release notes. +- Language-server plugins (those declaring `capabilities.runtime.pyright`) run with **no** `RLIMIT_NPROC` cap, so their process *count* is not bounded — see the "process-count control" limitation under Alternative 4. `RLIMIT_AS` (per-process memory) and the crash-loop counter remain in force; the accepted residual risk is a fork-bomb from a *first-party* language-server plugin, which the system-wide `ulimit -u` still backstops. ### Neutral diff --git a/docs/loomweave/adr/ADR-035-operational-tuning-discipline.md b/docs/loomweave/adr/ADR-035-operational-tuning-discipline.md index 7b049a64..968ac3fa 100644 --- a/docs/loomweave/adr/ADR-035-operational-tuning-discipline.md +++ b/docs/loomweave/adr/ADR-035-operational-tuning-discipline.md @@ -121,7 +121,7 @@ DEFAULT_MAX_NOFILE (limits.rs) DEFAULT_MAX_NPROC (limits.rs) ``` -Plus `PYRIGHT_MAX_NPROC = 4096` (host.rs, raised for the language-server runtime). All twelve MUST be retrofitted to the four-axis declaration before the 1.1 release. +Previously this list carried `PYRIGHT_MAX_NPROC = 4096` (host.rs, a raised `RLIMIT_NPROC` ceiling for the language-server runtime). That constant was **retired**: `RLIMIT_NPROC` is a per-real-UID-global counter, so any fixed ceiling is tripped by the operator's unrelated processes and intermittently fails `pyright-langserver`'s `fork(2)` with `EAGAIN` on a busy workstation. `host::effective_max_nproc` now returns `None` (no `RLIMIT_NPROC` cap) for plugins declaring the `pyright` runtime capability, and `Some(DEFAULT_MAX_NPROC)` otherwise. See ADR-021 (Alternative 4, "process-count control") for the rationale and the cgroup v2 `pids.max` follow-up. The remaining constants MUST be retrofitted to the four-axis declaration before the 1.1 release. For the Python plugin, the inventory enumerated by `answer-python-engineer.md` is: diff --git a/plugins/python/plugin.toml b/plugins/python/plugin.toml index 93c4f75a..8d1da944 100644 --- a/plugins/python/plugin.toml +++ b/plugins/python/plugin.toml @@ -25,6 +25,11 @@ wardline_aware = true # v0.1 rejects `true` at initialize with LMWV-INFRA-MANIFEST-UNSUPPORTED-CAPABILITY. reads_outside_project_root = false +# Declaring this sub-table marks the plugin as a language-server host: it spawns +# pyright-langserver (Node). The host detects this and leaves RLIMIT_NPROC +# uncapped for the plugin — RLIMIT_NPROC is per-UID-global and would otherwise +# fail pyright's fork() with EAGAIN on a busy workstation (see ADR-021, +# "process-count control", and host::effective_max_nproc). [capabilities.runtime.pyright] pin = "1.1.409" diff --git a/plugins/python/src/loomweave_plugin_python/pyright_session.py b/plugins/python/src/loomweave_plugin_python/pyright_session.py index a90be613..f8096f74 100644 --- a/plugins/python/src/loomweave_plugin_python/pyright_session.py +++ b/plugins/python/src/loomweave_plugin_python/pyright_session.py @@ -3,6 +3,7 @@ import ast import ctypes import ctypes.util +import errno import json import math import os @@ -41,6 +42,8 @@ FINDING_PYRIGHT_INIT_TIMEOUT = "LMWV-PY-PYRIGHT-INIT-TIMEOUT" FINDING_PYRIGHT_UNAVAILABLE = "LMWV-PY-PYRIGHT-UNAVAILABLE" FINDING_PYRIGHT_INSTALL_FAILURE = "LMWV-PY-PYRIGHT-INSTALL-FAILURE" +FINDING_PYRIGHT_SPAWN_DEFERRED = "LMWV-PY-PYRIGHT-SPAWN-DEFERRED" +FINDING_PYRIGHT_RESOURCE_EXHAUSTED = "LMWV-PY-PYRIGHT-RESOURCE-EXHAUSTED" FINDING_PYRIGHT_CALL_RESOLUTION_TIMEOUT = "LMWV-PY-CALL-RESOLUTION-TIMEOUT" FINDING_PYRIGHT_REFERENCE_RESOLUTION_TIMEOUT = "LMWV-PY-REFERENCE-RESOLUTION-TIMEOUT" FINDING_PYRIGHT_REFERENCE_SITE_CAP = "LMWV-PY-REFERENCE-SITE-CAP" @@ -56,14 +59,32 @@ class PyrightRunState: consume ``ceil(N/25) * 3`` restarts instead of 3 for an entire analysis run. Pass the same ``PyrightRunState`` instance to every successive ``PyrightSession`` so the budget is enforced across the full run. + + ``consecutive_spawn_deferrals`` tracks transient (resource-pressure) spawn + failures separately from the ``restart_count`` crash budget: it is reset to + zero on every successful spawn, so intermittent pressure never poisons the + run, while a sustained run of deferrals still terminates pyright once it + exceeds ``MAX_CONSECUTIVE_SPAWN_DEFERRALS``. """ restart_count: int = 0 disabled: bool = False + consecutive_spawn_deferrals: int = 0 MAX_UNRESOLVED_CALLEE_EXPR_BYTES = 512 MAX_PYRIGHT_RESTARTS_PER_RUN = 3 +# A spawn that fails with one of these errnos is a *transient* resource-pressure +# condition (the host is momentarily out of process slots / memory), not a broken +# install. EAGAIN in particular is what a busy workstation returns from fork(2) +# when the per-UID RLIMIT_NPROC is hit. These are deferred-and-retried rather +# than treated as a permanent install failure. +_TRANSIENT_SPAWN_ERRNOS = frozenset({errno.EAGAIN, errno.ENOMEM, errno.EMFILE, errno.ENFILE}) +# Upper bound on *consecutive* transient spawn deferrals before pyright is +# disabled for the run. Reset to zero on any successful spawn, so this only +# fires under sustained pressure, never on an intermittent blip. A failed fork +# costs microseconds, so retrying once per file across a large run is cheap. +MAX_CONSECUTIVE_SPAWN_DEFERRALS = 50 MAX_REFERENCE_SITES_PER_FILE = 2000 PYRIGHT_INIT_TIMEOUT_SECS = 30.0 PYRIGHT_CALL_TIMEOUT_SECS = 5.0 @@ -711,14 +732,7 @@ def set_pdeathsig() -> None: preexec_fn=preexec_fn, # noqa: PLW1509 ) except OSError as exc: - self._run_state.disabled = True - self._record_finding( - FINDING_PYRIGHT_INSTALL_FAILURE, - "pyright-langserver failed to start", - executable=executable, - error=str(exc), - ) - return False + return self._handle_spawn_oserror(exc, executable) self._process = process self._start_stderr_drain(process) @@ -745,8 +759,57 @@ def set_pdeathsig() -> None: process.kill() process.wait(timeout=2) return False + # A clean spawn + handshake clears any accumulated transient-deferral + # pressure: the per-UID resource squeeze that caused earlier EAGAINs has + # eased, so the run is healthy again. + self._run_state.consecutive_spawn_deferrals = 0 return True + def _handle_spawn_oserror(self, exc: OSError, executable: str) -> bool: + """Triage a ``subprocess.Popen`` failure into transient vs. permanent. + + ``EAGAIN``/``ENOMEM``/``EMFILE``/``ENFILE`` are *transient* + resource-pressure errors: a busy host momentarily out of process slots, + memory, or file descriptors. The spawn is deferred — ``self._process`` + stays ``None`` and ``disabled`` is left unset, so the next file retries a + fresh spawn — and only a sustained run of deferrals + (``MAX_CONSECUTIVE_SPAWN_DEFERRALS``) gives up. Any other errno (notably + ``ENOENT``/``EACCES``) is a genuine, permanent install defect and + disables pyright for the rest of the run. + """ + if exc.errno in _TRANSIENT_SPAWN_ERRNOS: + self._run_state.consecutive_spawn_deferrals += 1 + if self._run_state.consecutive_spawn_deferrals > MAX_CONSECUTIVE_SPAWN_DEFERRALS: + self._run_state.disabled = True + self._record_finding( + FINDING_PYRIGHT_RESOURCE_EXHAUSTED, + "pyright-langserver persistently unavailable under resource " + "pressure; skipping call resolution", + executable=executable, + consecutive_spawn_deferrals=self._run_state.consecutive_spawn_deferrals, + error=str(exc), + ) + return False + # Emit one finding per pressure *episode* (the 0 -> 1 transition), + # not one per deferred file, so a busy run is not buried in findings. + if self._run_state.consecutive_spawn_deferrals == 1: + self._record_finding( + FINDING_PYRIGHT_SPAWN_DEFERRED, + "pyright-langserver spawn deferred under resource pressure; " + "will retry on subsequent files", + executable=executable, + error=str(exc), + ) + return False + self._run_state.disabled = True + self._record_finding( + FINDING_PYRIGHT_INSTALL_FAILURE, + "pyright-langserver failed to start", + executable=executable, + error=str(exc), + ) + return False + def _initialize(self) -> None: result = self._request( "initialize", diff --git a/plugins/python/tests/test_pyright_session.py b/plugins/python/tests/test_pyright_session.py index 04e27656..451ed595 100644 --- a/plugins/python/tests/test_pyright_session.py +++ b/plugins/python/tests/test_pyright_session.py @@ -1,8 +1,11 @@ from __future__ import annotations import ast +import errno +import os import shutil import stat +import subprocess import sys import textwrap from pathlib import Path @@ -17,9 +20,13 @@ FINDING_PYRIGHT_POISON_FRAME, FINDING_PYRIGHT_REFERENCE_RESOLUTION_TIMEOUT, FINDING_PYRIGHT_REFERENCE_SITE_CAP, + FINDING_PYRIGHT_RESOURCE_EXHAUSTED, FINDING_PYRIGHT_RESTART, + FINDING_PYRIGHT_SPAWN_DEFERRED, FINDING_PYRIGHT_UNAVAILABLE, + MAX_CONSECUTIVE_SPAWN_DEFERRALS, LspTimeoutError, + PyrightRunState, PyrightSession, _build_function_index, _CallSite, @@ -32,7 +39,8 @@ from loomweave_plugin_python.reference_resolver import ReferenceSite, ReferenceSiteKind if TYPE_CHECKING: - from collections.abc import Sequence + from collections.abc import Callable, Sequence + from typing import NoReturn from loomweave_plugin_python.call_resolver import Finding @@ -876,6 +884,128 @@ def test_pyright_session_install_failure(tmp_path: Path) -> None: assert FINDING_PYRIGHT_INSTALL_FAILURE in _finding_codes(result.findings) +def _popen_raising(err: int) -> Callable[..., NoReturn]: + def _factory(*args: object, **kwargs: object) -> NoReturn: + _ = (args, kwargs) + raise OSError(err, os.strerror(err)) + + return _factory + + +def test_transient_spawn_failure_defers_without_disabling( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """EAGAIN on spawn is transient: skip the file, retry next, never poison.""" + module = _write_module(tmp_path, "def caller():\n print('x')\n") + monkeypatch.setattr(subprocess, "Popen", _popen_raising(errno.EAGAIN)) + run_state = PyrightRunState() + + with PyrightSession(tmp_path, executable=sys.executable, run_state=run_state) as session: + first = session.resolve_calls(module, ["python:function:demo.caller"]) + second = session.resolve_calls(module, ["python:function:demo.caller"]) + + # A transient resource squeeze must NOT permanently disable pyright... + assert run_state.disabled is False + # ...and every file re-attempts the spawn (skip-and-continue). + assert run_state.consecutive_spawn_deferrals == 2 + # One finding per pressure episode (the 0 -> 1 transition), not per file, + # and never the permanent install-failure poison. + assert FINDING_PYRIGHT_SPAWN_DEFERRED in _finding_codes(first.findings) + assert FINDING_PYRIGHT_SPAWN_DEFERRED not in _finding_codes(second.findings) + assert FINDING_PYRIGHT_INSTALL_FAILURE not in _finding_codes(first.findings) + assert first.edges == [] + assert first.unresolved_call_sites_total == 1 + assert second.unresolved_call_sites_total == 1 + + +def test_permanent_spawn_failure_disables( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """A non-transient errno (ENOENT) is a genuine install defect: disable.""" + module = _write_module(tmp_path, "def caller():\n print('x')\n") + monkeypatch.setattr(subprocess, "Popen", _popen_raising(errno.ENOENT)) + run_state = PyrightRunState() + + with PyrightSession(tmp_path, executable=sys.executable, run_state=run_state) as session: + result = session.resolve_calls(module, ["python:function:demo.caller"]) + + assert run_state.disabled is True + assert FINDING_PYRIGHT_INSTALL_FAILURE in _finding_codes(result.findings) + assert FINDING_PYRIGHT_SPAWN_DEFERRED not in _finding_codes(result.findings) + assert result.unresolved_call_sites_total == 1 + + +def test_sustained_spawn_pressure_trips_resource_exhausted( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Unrelenting EAGAIN eventually gives up — with its own finding, not poison.""" + module = _write_module(tmp_path, "def caller():\n print('x')\n") + monkeypatch.setattr(subprocess, "Popen", _popen_raising(errno.EAGAIN)) + run_state = PyrightRunState() + codes: set[str] = set() + + with PyrightSession(tmp_path, executable=sys.executable, run_state=run_state) as session: + for _ in range(MAX_CONSECUTIVE_SPAWN_DEFERRALS + 1): + result = session.resolve_calls(module, ["python:function:demo.caller"]) + codes |= _finding_codes(result.findings) + + assert run_state.disabled is True + assert FINDING_PYRIGHT_RESOURCE_EXHAUSTED in codes + # The soft-stop is distinct from the install-failure poison. + assert FINDING_PYRIGHT_INSTALL_FAILURE not in codes + + +@pytest.mark.pyright +def test_successful_spawn_resets_deferral_counter( + tmp_path: Path, + pyright_langserver: str, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """After a deferred file, a clean spawn clears the pressure counter.""" + module = _write_module( + tmp_path, + """ + def callee(): + pass + + def caller(): + callee() + """, + ) + real_popen = cast("Callable[..., subprocess.Popen[bytes]]", subprocess.Popen) + calls = {"n": 0} + + def flaky_popen(*args: object, **kwargs: object) -> subprocess.Popen[bytes]: + # Fail only the *first pyright* spawn. _start_process incidentally shells + # out via ctypes.util.find_library (ldconfig/gcc/objdump); those must pass + # through to the real Popen, or the injected EAGAIN lands on the wrong call. + argv = args[0] if args else kwargs.get("args") + executable = argv[0] if isinstance(argv, (list, tuple)) and argv else None + if isinstance(executable, str) and executable.endswith("pyright-langserver"): + calls["n"] += 1 + if calls["n"] == 1: + raise OSError(errno.EAGAIN, os.strerror(errno.EAGAIN)) + return real_popen(*args, **kwargs) + + monkeypatch.setattr(subprocess, "Popen", flaky_popen) + run_state = PyrightRunState() + function_ids = ["python:function:demo.caller", "python:function:demo.callee"] + + with PyrightSession(tmp_path, executable=pyright_langserver, run_state=run_state) as session: + deferred = session.resolve_calls(module, function_ids) + resolved = session.resolve_calls(module, function_ids) + + assert FINDING_PYRIGHT_SPAWN_DEFERRED in _finding_codes(deferred.findings) + assert deferred.edges == [] + # The second file spawned cleanly: not disabled and the counter is reset. + assert run_state.disabled is False + assert run_state.consecutive_spawn_deferrals == 0 + assert resolved.edges + + class TimeoutSession(PyrightSession): def _request(self, method: str, params: dict[str, object], timeout_secs: float) -> object: if method == "callHierarchy/outgoingCalls": From 86355a0bbe2dfa339ce561f5e305903da74c5574 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 23:12:17 +1000 Subject: [PATCH 29/60] chore(release): bump to 1.1.0rc3 (Cargo 1.1.0-rc3 / PEP440 1.1.0rc3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Workspace + Python plugin in lockstep. rc3 ships the pyright spawn-failure hardening and the RLIMIT_NPROC sandbox correction (af435c4): a transient EAGAIN on pyright spawn no longer permanently disables analysis for the rest of the run, and RLIMIT_NPROC — a per-UID-global limit — is no longer applied to language-server plugins. All seven version-lockstep check scripts pass. No package is published for release candidates. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 31 +++++++++++++++++++ Cargo.lock | 16 +++++----- Cargo.toml | 2 +- crates/loomweave-cli/Cargo.toml | 14 ++++----- crates/loomweave-cli/pyproject.toml | 4 +-- crates/loomweave-federation/Cargo.toml | 2 +- crates/loomweave-mcp/Cargo.toml | 6 ++-- crates/loomweave-plugin-fixture/Cargo.toml | 2 +- crates/loomweave-storage/Cargo.toml | 2 +- plugins/python/plugin.toml | 2 +- plugins/python/pyproject.toml | 2 +- .../src/loomweave_plugin_python/__init__.py | 2 +- plugins/python/tests/test_package.py | 4 +-- plugins/python/tests/test_server.py | 2 +- plugins/python/uv.lock | 2 +- 15 files changed, 62 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index baf2017b..588732dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,37 @@ only when an incompatible change is made to that surface. See ## [Unreleased] +## [1.1.0rc3] — 2026-06-06 + +Third 1.1 release candidate. Hardens the Python plugin's pyright spawn path +against transient resource pressure and corrects the plugin process-limit +sandbox. No package is published for release candidates. (Cargo SemVer +`1.1.0-rc3`; Python wheels normalise to PEP 440 `1.1.0rc3`.) + +### Fixed + +- **Transient pyright spawn failures no longer disable analysis for the whole + run.** A `subprocess.Popen` failure with a transient errno + (`EAGAIN`/`ENOMEM`/`EMFILE`/`ENFILE`) now skips only the current file and + retries a fresh spawn on the next one, instead of being treated as a permanent + install failure. A new `LMWV-PY-PYRIGHT-SPAWN-DEFERRED` finding is emitted once + per pressure episode, and a resettable soft-cap emits + `LMWV-PY-PYRIGHT-RESOURCE-EXHAUSTED` (giving up only under *sustained* + pressure); genuine defects (`ENOENT`/`EACCES`) still disable as before. Closes + the `[Errno 11] Resource temporarily unavailable` → + `LMWV-PY-PYRIGHT-INSTALL-FAILURE` failure seen analysing large projects. + +### Changed + +- **`RLIMIT_NPROC` is no longer applied to language-server plugins.** Because + `RLIMIT_NPROC` is enforced per real UID system-wide — not per plugin subtree — + any fixed ceiling is tripped by the operator's unrelated processes and + intermittently fails `pyright-langserver`'s `fork(2)` with `EAGAIN`. The host + now leaves `RLIMIT_NPROC` uncapped for plugins declaring the `pyright` runtime + capability (relying on `RLIMIT_AS` + crash-loop supervision) and retires the + `PYRIGHT_MAX_NPROC = 4096` constant. cgroup v2 `pids.max` is documented as the + future tool for true per-plugin process bounds (ADR-021, ADR-035). + ## [1.1.0rc2] — 2026-06-06 Second 1.1 release candidate, rolling up dogfood-friction fixes and deferred diff --git a/Cargo.lock b/Cargo.lock index 410073f8..0ec6f839 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1057,7 +1057,7 @@ checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" [[package]] name = "loomweave-analysis" -version = "1.1.0-rc2" +version = "1.1.0-rc3" dependencies = [ "anyhow", "serde", @@ -1067,7 +1067,7 @@ dependencies = [ [[package]] name = "loomweave-cli" -version = "1.1.0-rc2" +version = "1.1.0-rc3" dependencies = [ "anyhow", "assert_cmd", @@ -1106,7 +1106,7 @@ dependencies = [ [[package]] name = "loomweave-core" -version = "1.1.0-rc2" +version = "1.1.0-rc3" dependencies = [ "async-trait", "nix", @@ -1123,7 +1123,7 @@ dependencies = [ [[package]] name = "loomweave-federation" -version = "1.1.0-rc2" +version = "1.1.0-rc3" dependencies = [ "blake3", "loomweave-core", @@ -1137,7 +1137,7 @@ dependencies = [ [[package]] name = "loomweave-mcp" -version = "1.1.0-rc2" +version = "1.1.0-rc3" dependencies = [ "async-trait", "blake3", @@ -1160,7 +1160,7 @@ dependencies = [ [[package]] name = "loomweave-plugin-fixture" -version = "1.1.0-rc2" +version = "1.1.0-rc3" dependencies = [ "loomweave-core", "nix", @@ -1169,7 +1169,7 @@ dependencies = [ [[package]] name = "loomweave-scanner" -version = "1.1.0-rc2" +version = "1.1.0-rc3" dependencies = [ "regex", "serde", @@ -1181,7 +1181,7 @@ dependencies = [ [[package]] name = "loomweave-storage" -version = "1.1.0-rc2" +version = "1.1.0-rc3" dependencies = [ "blake3", "deadpool-sqlite", diff --git a/Cargo.toml b/Cargo.toml index a8ac37b3..a9cc8b0f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ members = [ ] [workspace.package] -version = "1.1.0-rc2" +version = "1.1.0-rc3" edition = "2024" license = "MIT" repository = "https://github.com/foundryside-dev/loomweave" diff --git a/crates/loomweave-cli/Cargo.toml b/crates/loomweave-cli/Cargo.toml index bbe56dd3..25e80357 100644 --- a/crates/loomweave-cli/Cargo.toml +++ b/crates/loomweave-cli/Cargo.toml @@ -18,12 +18,12 @@ anyhow.workspace = true axum.workspace = true blake3.workspace = true clap.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc2" } -loomweave-analysis = { path = "../loomweave-analysis", version = "1.1.0-rc2" } -loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc2" } -loomweave-mcp = { path = "../loomweave-mcp", version = "1.1.0-rc2" } -loomweave-scanner = { path = "../loomweave-scanner", version = "1.1.0-rc2" } -loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc2" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc3" } +loomweave-analysis = { path = "../loomweave-analysis", version = "1.1.0-rc3" } +loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc3" } +loomweave-mcp = { path = "../loomweave-mcp", version = "1.1.0-rc3" } +loomweave-scanner = { path = "../loomweave-scanner", version = "1.1.0-rc3" } +loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc3" } dotenvy.workspace = true fs2.workspace = true hmac.workspace = true @@ -46,7 +46,7 @@ uuid.workspace = true [dev-dependencies] assert_cmd.workspace = true -loomweave-plugin-fixture = { path = "../loomweave-plugin-fixture", version = "1.1.0-rc2" } +loomweave-plugin-fixture = { path = "../loomweave-plugin-fixture", version = "1.1.0-rc3" } rusqlite.workspace = true serde_json.workspace = true sha1.workspace = true diff --git a/crates/loomweave-cli/pyproject.toml b/crates/loomweave-cli/pyproject.toml index 11c21c6c..01da2123 100644 --- a/crates/loomweave-cli/pyproject.toml +++ b/crates/loomweave-cli/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "loomweave" -version = "1.1.0rc2" +version = "1.1.0rc3" description = "Loomweave — graph-aware code archaeology (Rust core)" readme = "../../README.md" requires-python = ">=3.11" @@ -15,7 +15,7 @@ classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: 3", ] -dependencies = ["loomweave-plugin-python==1.1.0rc2"] +dependencies = ["loomweave-plugin-python==1.1.0rc3"] [project.urls] Repository = "https://github.com/foundryside-dev/loomweave" diff --git a/crates/loomweave-federation/Cargo.toml b/crates/loomweave-federation/Cargo.toml index db2f9f1a..14685c69 100644 --- a/crates/loomweave-federation/Cargo.toml +++ b/crates/loomweave-federation/Cargo.toml @@ -11,7 +11,7 @@ workspace = true [dependencies] blake3.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc2" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc3" } reqwest.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/loomweave-mcp/Cargo.toml b/crates/loomweave-mcp/Cargo.toml index 722e6208..ee91de58 100644 --- a/crates/loomweave-mcp/Cargo.toml +++ b/crates/loomweave-mcp/Cargo.toml @@ -12,9 +12,9 @@ workspace = true [dependencies] async-trait.workspace = true blake3.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc2" } -loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc2" } -loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc2" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc3" } +loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc3" } +loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc3" } reqwest.workspace = true rusqlite.workspace = true serde.workspace = true diff --git a/crates/loomweave-plugin-fixture/Cargo.toml b/crates/loomweave-plugin-fixture/Cargo.toml index f1d1ac26..9becb4a4 100644 --- a/crates/loomweave-plugin-fixture/Cargo.toml +++ b/crates/loomweave-plugin-fixture/Cargo.toml @@ -23,7 +23,7 @@ name = "loomweave-fixture-plugin" path = "src/main.rs" [dependencies] -loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc2" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc3" } serde_json.workspace = true [target.'cfg(unix)'.dependencies] diff --git a/crates/loomweave-storage/Cargo.toml b/crates/loomweave-storage/Cargo.toml index dac7a33a..4c815352 100644 --- a/crates/loomweave-storage/Cargo.toml +++ b/crates/loomweave-storage/Cargo.toml @@ -11,7 +11,7 @@ workspace = true [dependencies] blake3.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc2" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc3" } deadpool-sqlite.workspace = true rusqlite.workspace = true serde.workspace = true diff --git a/plugins/python/plugin.toml b/plugins/python/plugin.toml index 8d1da944..f22c819b 100644 --- a/plugins/python/plugin.toml +++ b/plugins/python/plugin.toml @@ -1,7 +1,7 @@ [plugin] name = "loomweave-plugin-python" plugin_id = "python" -version = "1.1.0rc2" +version = "1.1.0rc3" protocol_version = "1.0" # Bare basename per ADR-021 §Layer 1 + WP2 scrub commit eb0a41d — the host # refuses manifests whose `executable` carries any path component. diff --git a/plugins/python/pyproject.toml b/plugins/python/pyproject.toml index 46a98200..09a492da 100644 --- a/plugins/python/pyproject.toml +++ b/plugins/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "loomweave-plugin-python" -version = "1.1.0rc2" +version = "1.1.0rc3" description = "Loomweave Python language plugin — v1.0 release" readme = "README.md" requires-python = ">=3.11" diff --git a/plugins/python/src/loomweave_plugin_python/__init__.py b/plugins/python/src/loomweave_plugin_python/__init__.py index 5562b23f..408c7283 100644 --- a/plugins/python/src/loomweave_plugin_python/__init__.py +++ b/plugins/python/src/loomweave_plugin_python/__init__.py @@ -1,3 +1,3 @@ """loomweave-plugin-python — Python language plugin for Loomweave.""" -__version__ = "1.1.0rc2" +__version__ = "1.1.0rc3" diff --git a/plugins/python/tests/test_package.py b/plugins/python/tests/test_package.py index da595730..ec5e81ba 100644 --- a/plugins/python/tests/test_package.py +++ b/plugins/python/tests/test_package.py @@ -17,7 +17,7 @@ def _read_toml(path: Path) -> dict[str, Any]: def test_package_version_matches_pyproject() -> None: - assert loomweave_plugin_python.__version__ == "1.1.0rc2" + assert loomweave_plugin_python.__version__ == "1.1.0rc3" def test_plugin_version_lockstep_across_pyproject_manifest_and_module() -> None: @@ -42,7 +42,7 @@ def test_plugin_version_lockstep_across_pyproject_manifest_and_module() -> None: def test_manifest_declares_current_v1_ontology_only() -> None: manifest = _read_toml(_PLUGIN_ROOT / "plugin.toml") - assert manifest["plugin"]["version"] == "1.1.0rc2" + assert manifest["plugin"]["version"] == "1.1.0rc3" assert manifest["capabilities"]["runtime"]["wardline_aware"] is True assert manifest["integrations"]["wardline"]["expected_descriptor_version"] == ( EXPECTED_DESCRIPTOR_VERSION diff --git a/plugins/python/tests/test_server.py b/plugins/python/tests/test_server.py index 7be14705..b5274470 100644 --- a/plugins/python/tests/test_server.py +++ b/plugins/python/tests/test_server.py @@ -86,7 +86,7 @@ def test_initialize_roundtrip() -> None: assert response["id"] == 1 result = response["result"] assert result["name"] == "loomweave-plugin-python" - assert result["version"] == "1.1.0rc2" + assert result["version"] == "1.1.0rc3" assert result["ontology_version"] == "0.7.0" assert set(result["capabilities"]) == {"wardline"} assert result["capabilities"]["wardline"]["status"] in { diff --git a/plugins/python/uv.lock b/plugins/python/uv.lock index aef6d0bd..295e621f 100644 --- a/plugins/python/uv.lock +++ b/plugins/python/uv.lock @@ -464,7 +464,7 @@ wheels = [ [[package]] name = "loomweave-plugin-python" -version = "1.1.0rc2" +version = "1.1.0rc3" source = { editable = "." } dependencies = [ { name = "packaging" }, From 6938cf1303981089a0cc29bb4342ddce4d436b47 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 23:12:17 +1000 Subject: [PATCH 30/60] docs: agent-first experience report from elspeth dogfooding session Field feedback captured while using Loomweave as the primary code-archaeology surface against the elspeth project: what to protect, onboarding/config/feedback gaps, and proposed batch + project-level operations. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/agent-first-feedback-2026-06-06.md | 262 ++++++++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 docs/agent-first-feedback-2026-06-06.md diff --git a/docs/agent-first-feedback-2026-06-06.md b/docs/agent-first-feedback-2026-06-06.md new file mode 100644 index 00000000..cdb76d2c --- /dev/null +++ b/docs/agent-first-feedback-2026-06-06.md @@ -0,0 +1,262 @@ +# Loomweave: Agent-First Experience Report + +**From:** an LLM coding agent (Claude, Opus 4.8) that used Loomweave as its primary +code-archaeology surface for a session. +**Against:** a real downstream project — `elspeth`, ~40.4k entities, 142 +subsystems, 48.4k edges, 157 findings, a 210 MB `loomweave.db`. +**Date:** 2026-06-06 · Loomweave `1.1.0-rc1`, Wardline `1.0.0rc1`. +**Scope of the session:** investigate the toolset, then enable the live LLM +provider (`claude_cli`) and generate real entity summaries end-to-end. + +This is field feedback from your actual primary user — an agent. The headline: +**Loomweave's core is excellent and its instincts are right. The gap to +"agent-first" is almost entirely in the onboarding/config/feedback layer and in a +handful of missing batch + project-level operations.** Below: what to protect, +what to fix, and what I'd love to use if it existed. + +--- + +## 0. TL;DR for the busy maintainer + +- **Protect:** honest-empty discipline, `scope_excludes`, confidence tiers, SEI, + cost-preview-before-spend, structural fallback, the loom↔ward↔filigree triangle. + These are genuinely best-in-class and rare. +- **Fix first (correctness/trust):** silent LLM mis-config (no `deny_unknown_fields`, + missing `enabled` → silent disable, no validation warning); two status tools + disagree on `allow_live_provider`; config schema is only discoverable by reading + source. +- **Fix soon (discoverability):** ship the operator docs with the binary; make + `doctor` validate `loomweave.yaml`; stop advertising write-gated tools the agent + can't see. +- **Build (the agent-first leap):** a project-level orientation pack; budgeted bulk + summarization; diff-aware blast-radius; an error/traceback orientation tool; and + closing the discover→summarize→propose→fix loop across the three tools. + +--- + +## 1. What's already great (do not "fix" these) + +An agent-first report has to start here, because several of these are things a less +disciplined tool would "simplify" away, and they are exactly why I trusted the +output. + +1. **Honest-empty everywhere.** Every catalogue tool returns an empty result *with + a `signal` note* ("available:false", the reason) instead of fabricating an + answer. As an agent, the single most expensive failure mode is a confident wrong + answer. Loomweave's refusal to fabricate is the feature. +2. **`scope_excludes` / blind-spot honesty.** `callers_of` telling me "I did not + search attribute-receiver calls, so this empty result is not a guaranteed true + negative — re-query at `inferred`" is *exactly* the metacognition an agent needs. + This should be the model for every tool. +3. **Confidence tiers (`resolved`/`ambiguous`/`inferred`)** as an explicit ceiling, + with "there is no `all`." Forces me to reason about edge quality instead of + trusting a number. +4. **SEI (durable identity) vs `id` (mutable locator).** The right call for + cross-tool bindings. The fact that `project_status` reports `sei.populated` so I + can tell whether I'm in a degraded state is excellent. +5. **Cost preview before spend.** `entity_summary_preview_cost_get` reporting + `live_spend_would_occur` and an input-token estimate *without invoking the + provider*, and distinguishing "disabled" from "cache miss" — this is how every + spend-bearing tool should behave. +6. **Structural fallback.** When the LLM returns non-JSON, you degrade to a + deterministic source-derived summary *and cache it* so a retry is free, not + re-billed. Graceful, honest, cheap. Chef's kiss. +7. **The triangle.** Loomweave (map) + Wardline (taint) + Filigree (issues), bound + on SEI, with Wardline findings reconciled into `entity_issue_list`. This is a + genuinely powerful substrate for agent workflows — see §4 for how to exploit it. + +--- + +## 2. Defects & friction I actually hit (must-fix) + +Ranked by how badly each hurt an agent trying to self-serve. + +### 2.1 — Silent LLM mis-configuration (P0) +**Symptom:** I wrote a plausible `llm_policy` block, restarted, and the provider +stayed `disabled` with **zero diagnostics**. It took reading the stripped binary's +string table, then the source, to find two causes. + +**Root cause (`crates/loomweave-federation/src/config.rs`):** +- Every config struct is `#[serde(default)]` with **no `deny_unknown_fields`**. I + put `model_id:` *inside* `claude_cli` (the field is `model`); it was **silently + dropped**. Any typo is silently dropped. +- `enabled` defaults to `false`. I set `provider` + `allow_live_provider: true` but + omitted `enabled` → silently disabled. +- `validate()` only checks deprecated-provider / blank-actor / loopback-trust. It + **never warns** that a fully-specified live provider is sitting behind + `enabled: false`, or that `allow_live_provider: true` is inert without `enabled`. + +**Fix:** +- Add `deny_unknown_fields` (or a non-fatal "unknown config key: X" warning at load). +- Emit a startup diagnostic naming the **effective** LLM state, e.g. + `llm_policy.provider=claude_cli but enabled=false → summaries cache-only` and the + inverse for `allow_live_provider`. +- Tests: unknown nested key; `enabled` omitted; provider set + `enabled:false`; + `allow_live_provider` without `enabled`. + +> This single fix would have turned a 45-minute reverse-engineering session into a +> 30-second "oh, it told me what's wrong" loop. For an agent, *failing loud and +> specific* is worth more than any feature on the wishlist. + +### 2.2 — Two status surfaces disagree (P1) +For the same half-configured state, `project_status_get` reported +`allow_live_provider: true` while `entity_summary_preview_cost_get` reported +`false`. One reads raw config, the other reads effective/resolved state. An agent +debugging config cannot tell which to believe. +**Fix:** reconcile the two read paths; if one is "configured" and the other +"effective", **label them as such** in the payload. Add a test asserting agreement. + +### 2.3 — Schema is undiscoverable from the installed artifact (P1) +The `uv tool` install ships **no docs**. `analyze --help` literally references +`docs/operator/getting-started.md`, which isn't present. The authoritative +`docs/operator/coding-agent-llm-providers.md` (which would have told me +`max_turns: 2` is mandatory and `model` is the field name) exists **only in the +source repo**. I recovered the schema by `strings`-ing a stripped Rust binary. +**Fix:** bundle `docs/operator/*` in the package, **and/or** add a +`loomweave config example [--provider claude_cli]` that prints a complete annotated +config generated from the real structs, and `loomweave config check` that validates +a file and prints the effective state. + +### 2.4 — `doctor` doesn't validate the config (P1) +`doctor` is pitched as a CI/pre-commit gate but only checks the skill/hook/ +`.mcp.json` install surfaces — it skips `loomweave.yaml`, the file most likely to be +hand-edited wrong. +**Fix:** `doctor` should parse + lint `llm_policy`, report the effective +provider/live state and projected per-summary cost, and warn on the §2.1 patterns. + +### 2.5 — Advertised-but-gated tools (P2) +The `loomweave-workflow` skill and the MCP server's own instructions advertise +`summary`/`entity_summary_get`, `analyze_start`/`cancel`, `propose_guidance`/ +`promote_guidance`. But `tools/list` returns **34** tools and none of those appear +unless `serve.mcp.enable_write_tools: true` (then **39**). An agent that follows the +skill calls tools that don't exist and gets a hard error. +**Fix:** note the `enable_write_tools` gate in the skill + server instructions; and +consider having `tools/list` (or a `capabilities` tool) report disabled tools with a +one-line "set `serve.mcp.enable_write_tools: true` to enable." + +### 2.6 — Silent model/cost surprise (P2) +With `claude_cli.model: null`, Loomweave inherits the local CLI's **default** model +— which on my login is Opus. My first real summary cost **$0.27** (8,944 tokens). +Pinning `model: claude-sonnet-4-6` dropped the same summary to ~538 tokens. Nothing +surfaced "you are about to summarize on Opus at $0.27/call" at enable time. +**Fix:** surface the **effective model** and a **projected per-summary cost** at +serve start and in `project_status`. (You already have the preview machinery — +fold a cost estimate into it and into `orientation_pack`.) + +--- + +## 3. The agent-first wishlist (the fun part — "as creative or demanding as I want") + +These are features I, as the agent, would *love* to call. Roughly ordered by impact. + +### 3.1 — A **project-level orientation pack** (`project_orientation_pack`) +`entity_orientation_pack_get` is superb but per-entity. My very first question in any +repo is *"what is this whole thing?"* I want one deterministic call that returns: +top subsystems by size + a one-line role for each, the entry points, HTTP routes, +the coupling hotspots, recent-change hotspots, open findings by severity, and index +freshness. A generated **"map of the territory"** I read once at session start +instead of issuing 15 calls. Bonus: expose it as an MCP **resource** +(`loomweave://orientation`) so a client can auto-load it on connect. + +### 3.2 — **Budgeted bulk summarization** (`summarize_scope`) +I will *never* loop 40k `entity_summary_get` calls myself. Give me: +`summarize_scope({ scope, budget_usd | budget_tokens, order: "centrality" })`. It +summarizes the most-central entities first (PageRank / fan-in+fan-out), streams +progress like `analyze_start`, stops when the budget is hit, and returns +**what it covered and what it skipped** (honest-truncation, naturally). This turns +"populate the cache for the auth subsystem for under $5" into one call. + +### 3.3 — **Diff-aware blast radius** (`impact_of_change`) +`index_diff` tells me freshness; I want the next step: *"what changed since commit X, +and what's the blast radius?"* Return changed entities + their resolved callers +(transitively, bounded) + the tests that cover them + any findings on the changed +set. This is the #1 question an agent asks when resuming a branch, and right now I +hand-assemble it. + +### 3.4 — **Error/traceback orientation** (`orient_from_traceback`) +Paste a Python traceback (or a `pytest` failure), get back, per frame: the entity, +its neighborhood, recent changes, test coverage, and any findings. Agents debug +constantly; this would be the single most-used tool I can imagine. The parsing is +language-plugin territory, which fits your architecture. + +### 3.5 — **Subsystem-level summaries** (roll-up briefings) +`summary` is leaf-only by design (honest, and stated). But I'd love a +`subsystem_summary` that composes leaf summaries into "what is this cluster, its +public surface, its invariants, its risks." Cache-keyed on the set of member +content-hashes so it invalidates correctly. This is the altitude at which I make +architectural decisions. + +### 3.6 — **Inline, verifiable citations in summaries** +The summaries I got were *accurate* — but I had to re-read the source to *verify* +them. Have the provider cite line ranges per claim (`behavior` → `L57–L77`), so an +agent can spot-check a summary against source without trusting it blind. Pairs +beautifully with your existing content-hash provenance. + +### 3.7 — A **token/cost ledger tool** + hard budget guardrail +`session_token_ceiling` exists internally; expose it. `llm_budget_status` → +{ spent, remaining, per-route breakdown }. And let me *set* a per-session ceiling at +connect time so an autonomous agent physically cannot overspend. Fold projected cost +into every spend-bearing tool result, not just the preview. + +### 3.8 — **First-class semantic search** (no API key) +"Find the function that does X" is an inherently semantic query, but +`search_semantic` is opt-in, needs a separate OpenAI-compatible embedding provider, +and an API key. For an agent-first tool this should be near-zero-friction — ship/ +support a **local** embedding model so the highest-value discovery query works out of +the box. Today it's the one obvious agent query that's hardest to turn on. + +### 3.9 — Close the **discover → summarize → propose → fix** loop +You have all the pieces; wire them for agents: +- `propose_guidance` exists but is inert until operator promotion (correct + governance). Make proposing **frictionless** and add `list_pending_guidance` so an + agent can see what it (or peers) proposed and an operator can batch-promote. +- When I learn "this looks like X but is actually Y" while reading code, I want to + capture that *in the moment* against the SEI. The capture cost must be one call. +- Tie findings → fix: `finding → entity + neighborhood + test coverage → propose + patch → result to Filigree`. The triangle makes this possible; an agent-facing + recipe (or a composite tool) would make it routine. + +### 3.10 — An **`--agent` serve mode** / `loomweave install --agent` +The `enable_write_tools` gate is the right default for multi-tenant HTTP, but for a +local single-tenant agent dev loop it's pure friction. A `loomweave serve --agent` +(or an install profile) that turns on the full agent surface (write tools, a +cost-guarded cheap-model provider hint, semantic search) and **prints the effective +config + projected costs** would make first-run delightful instead of a +config-archaeology expedition. + +### 3.11 — Make `project_status` **actionable** (Filigree already does this) +`project_status` is informative; make every degraded field carry a `next_action` +hint (Filigree's `work_ready` does exactly this with its `startable`/`next_action` +pattern). "staleness: stale" → "run `analyze_start`"; "llm: disabled" → "set +`enabled: true` + `allow_live_provider: true`". Self-healing guidance beats a status +code an agent has to interpret. + +### 3.12 — **Async summary jobs** +A single live summary is multi-second (it spawns `claude -p`). For anything +interactive, let `entity_summary_get` optionally return a job handle and let me poll +(you already do this for `analyze_start`/`analyze_status`). Lets an agent fan out +summaries concurrently instead of serializing on wall-clock. + +--- + +## 4. The meta-point + +Loomweave is increasingly consumed **by agents as its primary user**, but its +**config and onboarding layer is still written for a human operator** who will read +the source. Every §2 item is a place where a human would shrug and dig, but an agent +hits a silent wall. The fastest path to "agent-first" is three moves: + +1. **Fail loud and self-describe.** Unknown keys, disabled providers, gated tools, + stale indexes — all should announce themselves with the exact fix. (§2.1–2.6, §3.11) +2. **Operate in batches with budgets.** Agents don't want 40k calls; they want + "summarize this scope for $5 and tell me what you skipped." (§3.2, §3.5, §3.7, §3.12) +3. **Answer the questions agents actually ask.** Not just "what calls X" but "what + is this project," "what does this change break," "where did this traceback come + from." (§3.1, §3.3, §3.4) + +The bones are right. The honesty discipline is rare and worth protecting. Close the +feedback-and-batch gap and Loomweave becomes the tool an agent reaches for *first* +in every unfamiliar repo. + +— Submitted with appreciation; the structural-fallback-and-cache detail genuinely +made my day. From 91e79d2325d9f12353007b8188b2eb2412f39585 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 23:18:19 +1000 Subject: [PATCH 31/60] docs(ci): rebrand stale Clarion->Loomweave in release.yml cosign comments The clarion->loomweave and Loom->Weft renames were already landed in prior commits (crates are loomweave-*, env vars WEFT_*, SEI loomweave:eid:). The only remaining mechanically-safe display residue was two CI comment lines referencing "the last good release (Clarion v1.1.0)"; rename the brand prose to Loomweave. No code, schema, or identity strings touched. SEI (loomweave:eid:) count unchanged: 98 before and after. Carve-outs left intact: Filigree issue IDs (clarion-), .filigree.conf prefix/project_name, docs/archive/ historical records, CHANGELOG rename-record prose, /api/loom + X-Loom wire contract (deferred until Wardline/Filigree move in lockstep). Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f7794065..04835d5e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -500,7 +500,7 @@ jobs: # whose `sign-blob` requires a bundle and breaks the # --output-signature/--output-certificate flow ("create bundle file: # open :"). v2.5.2 is the version that signed the last good release - # (Clarion v1.1.0). Porting sign-blob/verify-blob to cosign 3.x is a + # (Loomweave v1.1.0). Porting sign-blob/verify-blob to cosign 3.x is a # separate follow-up. cosign-release: 'v2.5.2' @@ -620,7 +620,7 @@ jobs: # whose `sign-blob` requires a bundle and breaks the # --output-signature/--output-certificate flow ("create bundle file: # open :"). v2.5.2 is the version that signed the last good release - # (Clarion v1.1.0). Porting sign-blob/verify-blob to cosign 3.x is a + # (Loomweave v1.1.0). Porting sign-blob/verify-blob to cosign 3.x is a # separate follow-up. cosign-release: 'v2.5.2' From 9328e8f1a7ed549af41deeec078fa7367d04cea7 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sun, 7 Jun 2026 00:50:10 +1000 Subject: [PATCH 32/60] =?UTF-8?q?fix(config):=20fail-loud=20LLM=20config?= =?UTF-8?q?=20+=20discoverability=20(agent-first-feedback=20=C2=A72)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A dogfood run hit a cluster of config/onboarding defects: a plausible llm_policy block silently disabled summaries with zero diagnostics, two status tools disagreed on allow_live_provider, the config schema was only discoverable from source, and the skill/MCP instructions advertised write-gated tools an agent could not call. Also remove the deprecated anthropic provider shape (pre-release, no back-compat techdebt). - config (loomweave-federation): deny_unknown_fields on every config struct, so a misplaced/typo'd key (e.g. model_id under claude_cli) is a hard parse error naming the field instead of a silent drop; add a `version` field so the install stub still parses; McpConfig::llm_warnings() surfaces configured-but-disabled providers and unpinned coding-agent models; serve logs the effective LLM posture to stderr at startup. - remove LlmProviderKind::Anthropic, LlmConfig.anthropic_api_key_env, ConfigError::DeprecatedProvider, and the deprecated-shape test. - status (loomweave-mcp): reconcile project_status_get and entity_summary_preview_cost_get on allow_live_provider/enabled (both read the diagnostics snapshot; live stays effective); add a next_action hint when the provider is disabled. - cli: new `loomweave config example|check` (shared stub = single source of truth with install); `doctor` parses+validates loomweave.yaml and reports the effective LLM state (text + json + next_actions); LLM-enablement docs in --help, the agent-orientation block, and operator docs; repoint dangling docs/operator references to stable URLs. - mcp: server instructions + loomweave-workflow skill stop advertising the write-gated tools (entity_summary_get, analyze_start/cancel, propose/promote_guidance) unless serve.mcp.enable_write_tools is set, and name them plus the gate instead. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../assets/instructions/loomweave.md | 4 + crates/loomweave-cli/src/cli.rs | 69 +++- crates/loomweave-cli/src/config.rs | 188 +++++++++++ crates/loomweave-cli/src/doctor.rs | 106 +++++++ crates/loomweave-cli/src/install.rs | 56 +--- crates/loomweave-cli/src/main.rs | 1 + crates/loomweave-cli/src/serve.rs | 19 ++ crates/loomweave-cli/tests/config.rs | 103 ++++++ crates/loomweave-federation/src/config.rs | 299 ++++++++++++++---- .../assets/skills/loomweave-workflow/SKILL.md | 16 +- crates/loomweave-mcp/src/lib.rs | 78 +++-- crates/loomweave-mcp/src/tools/status.rs | 77 +++-- crates/loomweave-mcp/tests/storage_tools.rs | 47 +++ docs/operator/getting-started.md | 5 + docs/operator/openrouter.md | 8 +- 15 files changed, 914 insertions(+), 162 deletions(-) create mode 100644 crates/loomweave-cli/tests/config.rs diff --git a/crates/loomweave-cli/assets/instructions/loomweave.md b/crates/loomweave-cli/assets/instructions/loomweave.md index d90bb596..fa72067b 100644 --- a/crates/loomweave-cli/assets/instructions/loomweave.md +++ b/crates/loomweave-cli/assets/instructions/loomweave.md @@ -16,4 +16,8 @@ verbatim into the next tool. Index freshness and counts: `project_status_get` (or the `loomweave://context` resource). If the index is stale, run `loomweave analyze `. +LLM summaries (`entity_summary_get`) are off by default and need a configured live +provider; `project_status_get` reports the posture and `loomweave config check` +explains how to enable it. + Full workflow: the `loomweave-workflow` skill. diff --git a/crates/loomweave-cli/src/cli.rs b/crates/loomweave-cli/src/cli.rs index 8723eb4a..b90ee2c3 100644 --- a/crates/loomweave-cli/src/cli.rs +++ b/crates/loomweave-cli/src/cli.rs @@ -3,7 +3,21 @@ use std::path::PathBuf; use clap::{Parser, Subcommand, ValueEnum}; #[derive(Parser)] -#[command(name = "loomweave", version, about = "Loomweave code-archaeology tool")] +#[command( + name = "loomweave", + version, + about = "Loomweave code-archaeology tool", + long_about = "Loomweave extracts a queryable graph from a codebase and serves it to \ +consult-mode agents over MCP.\n\n\ +Typical flow: `loomweave install` (set up .loomweave/ + agent assets), `loomweave \ +analyze` (build the index), `loomweave serve` (run the MCP server).\n\n\ +LLM-backed entity summaries are OFF by default. To enable them set \ +`llm_policy.enabled: true` + `allow_live_provider: true` in loomweave.yaml and supply \ +the provider credential (e.g. OPENROUTER_API_KEY), or point at a coding-agent CLI \ +(claude_cli / codex_cli). Run `loomweave config example` to print an annotated config \ +and `loomweave config check` to see the effective LLM state; `loomweave doctor` \ +validates the install and the config." +)] pub struct Cli { #[command(subcommand)] pub command: Command, @@ -66,8 +80,9 @@ pub enum Command { /// Run an analysis pass: walk the source tree, dispatch discovered plugins /// to extract entities/edges, and persist results to `.loomweave/loomweave.db`. /// Re-runs are idempotent (UPSERT on `entities.id`). If no plugins are on - /// `$PATH`, exits 0 with a WARN and status `skipped_no_plugins` — see - /// `docs/operator/getting-started.md` Troubleshooting. + /// `$PATH`, exits 0 with a WARN and status `skipped_no_plugins` — see the + /// Troubleshooting guide at + /// . /// /// To commit the index as a versioned artifact while `serve` may be running, /// take a consistent online copy with `loomweave db backup` rather than @@ -148,6 +163,17 @@ pub enum Command { }, /// Run the MCP stdio server. + /// + /// Serves the code graph to MCP clients. The entity_summary_get tool needs a + /// live LLM provider, which is OFF by default: set `llm_policy.enabled: true` + /// and `allow_live_provider: true` in loomweave.yaml and supply the provider + /// credential (OPENROUTER_API_KEY for the default openrouter provider), or + /// switch `llm_policy.provider` to claude_cli / codex_cli for a locally + /// authenticated coding-agent CLI. Without that, summaries are cache-only. + /// Write-capable tools (entity_summary_get, analyze_start, analyze_cancel, + /// propose_guidance, promote_guidance) require `serve.mcp.enable_write_tools: + /// true`. The effective LLM posture is logged to stderr at startup; run + /// `loomweave config check` to inspect it ahead of time. Serve { /// Project directory containing .loomweave/loomweave.db. #[arg(long, default_value = ".")] @@ -178,6 +204,15 @@ pub enum Command { command: GuidanceCommand, }, + /// Inspect `loomweave.yaml`: print an annotated example, or validate the + /// file and report the effective LLM provider state. The installed binary + /// ships no docs, so this is the in-tool way to discover the config schema + /// and see why live summaries are (or are not) enabled. + Config { + #[command(subcommand)] + command: ConfigCommand, + }, + /// Verify (and optionally repair) the installed agent-orientation surfaces: /// the `loomweave-workflow` skill pack, the `SessionStart` hook, and the /// `.mcp.json` MCP registration. Prints a per-surface report plus the index @@ -231,6 +266,34 @@ pub enum DbCommand { }, } +#[derive(Subcommand)] +pub enum ConfigCommand { + /// Print an annotated example `loomweave.yaml` to stdout — the same content + /// `loomweave install` writes, generated so it always matches the current + /// config schema. Redirect it to `loomweave.yaml` and edit. + Example { + /// Pre-select the active LLM provider block in the example + /// (`openrouter`, `codex_cli`, or `claude_cli`). Defaults to the stub's + /// `openrouter`. + #[arg(long, value_name = "PROVIDER")] + provider: Option, + }, + + /// Parse and validate `loomweave.yaml`, then print the effective LLM state + /// (provider, enabled, live, model) and any warnings — the in-tool answer to + /// "why are my summaries cache-only?". Exits non-zero if the file fails to + /// parse or validate, so it works as a CI / pre-commit gate. + Check { + /// Project directory containing loomweave.yaml (default: current). + #[arg(long, default_value = ".")] + path: PathBuf, + + /// Path to loomweave.yaml (default: /loomweave.yaml if present). + #[arg(long)] + config: Option, + }, +} + #[derive(Subcommand)] pub enum GuidanceCommand { /// Create a new guidance sheet (`kind: guidance`, provenance: manual). diff --git a/crates/loomweave-cli/src/config.rs b/crates/loomweave-cli/src/config.rs index db23073e..3df179b3 100644 --- a/crates/loomweave-cli/src/config.rs +++ b/crates/loomweave-cli/src/config.rs @@ -3,8 +3,166 @@ use std::path::Path; use anyhow::{Context, Result, bail, ensure}; use loomweave_analysis::ClusterAlgorithm; +use loomweave_federation::config::{McpConfig, ProviderSelection, select_provider_with_env}; use serde::{Deserialize, Serialize}; +// NOTE: Do not use `\` line-continuation in this string — Rust strips both the +// newline AND all leading whitespace on the continuation line, producing flat +// (and therefore broken) YAML. Use raw newlines + explicit indentation. +// +// This is the single source of truth for the default `loomweave.yaml`: both +// `loomweave install` (writes it on init) and `loomweave config example` (prints +// it) use this exact text, so they can never drift. A round-trip test +// (`stub_parses_under_deny_unknown_fields`) asserts it parses cleanly under the +// config structs' `deny_unknown_fields` — guarding against stub↔struct drift. +pub(crate) const LOOMWEAVE_YAML_STUB: &str = "# loomweave.yaml — user-edited config. +# Do not delete this file: loomweave serve reads MCP, LLM, and integration +# settings from here when present. Validate it any time with `loomweave config check`. +version: 1 +# --- LLM summaries (entity_summary_get) -------------------------------------- +# OFF by default. To enable LIVE summaries: +# 1. set both enabled: true AND allow_live_provider: true below; then +# 2. either keep provider: openrouter and export the key named by +# openrouter.api_key_env (default OPENROUTER_API_KEY), OR switch provider to +# claude_cli / codex_cli to drive a locally-authenticated coding-agent CLI +# (no API key stored in this file). +# `loomweave config check` prints the resulting effective state and any warnings. +llm_policy: + enabled: false + provider: openrouter + allow_live_provider: false + openrouter: + endpoint_url: https://openrouter.ai/api/v1 + api_key_env: OPENROUTER_API_KEY + attribution: + referer: https://github.com/foundryside-dev/loomweave + title: Loomweave + codex_cli: + executable: codex + model: null + profile: null + sandbox: read-only + timeout_seconds: 300 + claude_cli: + executable: claude + model: null + permission_mode: plan + tools: [] + timeout_seconds: 300 + max_turns: 2 + no_session_persistence: true + exclude_dynamic_system_prompt_sections: true + model_id: anthropic/claude-sonnet-4.6 + session_token_ceiling: 1000000 + max_inferred_edges_per_caller: 8 + cache_max_age_days: 180 +integrations: + filigree: + enabled: false + base_url: http://127.0.0.1:8766 + actor: loomweave-mcp + token_env: FILIGREE_API_TOKEN + timeout_seconds: 5 +serve: + mcp: + enable_write_tools: false + http: + enabled: false + # The read-API port is auto-selected per project (deterministic, with an + # ephemeral fallback) and published to .loomweave/ephemeral.port while + # serving. Set `bind:` explicitly only to pin a fixed port (ADR-044). +"; + +/// Dispatch `loomweave config `. +pub(crate) fn run(command: crate::cli::ConfigCommand) -> Result<()> { + match command { + crate::cli::ConfigCommand::Example { provider } => run_example(provider.as_deref()), + crate::cli::ConfigCommand::Check { path, config } => run_check(&path, config.as_deref()), + } +} + +/// Print the annotated default `loomweave.yaml`, optionally pre-selecting the +/// active LLM provider block. +fn run_example(provider: Option<&str>) -> Result<()> { + let yaml = match provider { + None | Some("openrouter") => LOOMWEAVE_YAML_STUB.to_owned(), + Some(p @ ("codex_cli" | "claude_cli")) => { + // The stub already carries every provider sub-block, so selecting a + // provider is just swapping the active `provider:` line. + LOOMWEAVE_YAML_STUB.replacen(" provider: openrouter", &format!(" provider: {p}"), 1) + } + Some(other) => bail!( + "unknown --provider {other:?}; expected one of: openrouter, codex_cli, claude_cli" + ), + }; + print!("{yaml}"); + Ok(()) +} + +/// Parse + validate `loomweave.yaml` and print the effective LLM provider state. +/// A parse/validate failure bubbles as an error (non-zero exit); a +/// provider-selection error (e.g. live provider with a missing API key) is a +/// real misconfiguration and also exits non-zero, after printing the diagnosis. +fn run_check(path: &Path, explicit_config: Option<&Path>) -> Result<()> { + let default_path = path.join("loomweave.yaml"); + let config_path = explicit_config.unwrap_or(&default_path); + let (config, source) = if config_path.exists() { + let config = McpConfig::from_path(config_path) + .with_context(|| format!("parse {}", config_path.display()))?; + (config, config_path.display().to_string()) + } else { + ( + McpConfig::default(), + "(absent — built-in defaults in effect)".to_owned(), + ) + }; + + let selection = select_provider_with_env(&config, |name| std::env::var(name).ok()); + + println!("loomweave.yaml: {source}"); + println!("LLM enabled: {}", config.llm.enabled); + println!("Provider (configured): {}", config.llm.provider.as_str()); + println!("allow_live_provider: {}", config.llm.allow_live_provider); + println!( + "Effective model: {}", + config.llm.effective_model_label() + ); + match &selection { + Ok(sel) => { + let live = matches!( + sel, + ProviderSelection::OpenRouter { .. } + | ProviderSelection::CodexCli + | ProviderSelection::ClaudeCli + ); + println!( + "Live: {}", + if live { + "yes — entity_summary_get will dispatch to the provider" + } else { + "no — entity_summary_get is cache-only" + } + ); + } + Err(err) => println!("Live: error — {err}"), + } + + let warnings = config.llm_warnings(); + if warnings.is_empty() { + println!("\nNo warnings."); + } else { + println!("\nWarnings:"); + for warning in &warnings { + println!(" - {warning}"); + } + } + + if selection.is_err() { + std::process::exit(1); + } + Ok(()) +} + #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] #[serde(default)] pub(crate) struct AnalyzeConfig { @@ -138,3 +296,33 @@ impl ClusteringWeightBy { } } } + +#[cfg(test)] +mod tests { + use super::LOOMWEAVE_YAML_STUB; + use loomweave_federation::config::McpConfig; + + #[test] + fn stub_parses_under_deny_unknown_fields() { + // The default loomweave.yaml `install` writes (and `config example` + // prints) must parse cleanly through the config structs, which now use + // deny_unknown_fields. This guards against the stub drifting from the + // structs — a drift would otherwise ship a config the binary rejects. + let config = McpConfig::from_yaml_str(LOOMWEAVE_YAML_STUB) + .expect("install stub must parse under deny_unknown_fields"); + assert_eq!(config.version, 1); + assert!( + !config.llm.enabled, + "stub ships with LLM disabled by default" + ); + assert!(!config.serve.mcp.enable_write_tools); + } + + #[test] + fn stub_also_parses_via_analyze_config() { + // install/analyze read the same file through AnalyzeConfig (clustering + // only); confirm the stub round-trips there too. + super::AnalyzeConfig::from_yaml_str(LOOMWEAVE_YAML_STUB) + .expect("install stub must parse as analyze config"); + } +} diff --git a/crates/loomweave-cli/src/doctor.rs b/crates/loomweave-cli/src/doctor.rs index 195d2447..1b4b576b 100644 --- a/crates/loomweave-cli/src/doctor.rs +++ b/crates/loomweave-cli/src/doctor.rs @@ -26,6 +26,7 @@ use std::fs; use std::path::Path; use anyhow::{Context, Result, bail}; +use loomweave_federation::config::{McpConfig, ProviderSelection, select_provider_with_env}; use rusqlite::Connection; use serde::Serialize; use serde_json::Value; @@ -72,6 +73,8 @@ pub fn run(path: &Path, fix: bool, json_output: bool) -> Result { tally += check_mcp(&project_root, fix); tally += check_instructions(&project_root, fix); tally += check_integration_bindings(&project_root, fix); + println!("--- llm ---"); + tally += check_llm_provider(&project_root); println!("--- index ---"); for line in hook::snapshot_report(&project_root) { @@ -163,6 +166,7 @@ fn json_report(project_root: &Path, fix: bool) -> DoctorJsonReport { check_instructions_json(project_root, fix), check_http_config_json(project_root), check_filigree_url_json(project_root), + check_llm_provider_json(project_root), check_sei_population_json(project_root), check_wardline_taint_capability_json(project_root), check_mcp_hygiene_json(), @@ -187,6 +191,13 @@ fn json_report(project_root: &Path, fix: bool) -> DoctorJsonReport { "index.freshness" => { "Run `loomweave analyze ` to refresh the index.".to_owned() } + "llm.provider" => { + "Run `loomweave config check` to see the effective LLM state; to enable live \ + summaries set llm_policy.enabled: true + allow_live_provider: true and supply the \ + provider credential. See \ + https://github.com/foundryside-dev/loomweave/blob/main/docs/operator/openrouter.md." + .to_owned() + } "plugin.availability" => { "Install a Loomweave language plugin (the Python plugin ships with `pip install \ loomweave`)." @@ -473,6 +484,74 @@ fn check_filigree_url_json(project_root: &Path) -> DoctorJsonCheck { } } +/// Severity classes for the LLM-config check, shared by the text and JSON +/// paths so they never diverge. +enum LlmPosture { + /// loomweave.yaml failed to parse/validate — serve would refuse to start. + Broken(String), + /// A live provider is configured but unusable (e.g. missing API key). + Unusable(String), + /// Healthy: a concise effective-state line, plus any advisory warnings. + Ok { + summary: String, + warnings: Vec, + }, +} + +/// Load loomweave.yaml *typed* (so deny_unknown_fields + validate() run), resolve +/// the effective provider, and classify the posture. This is the file most +/// likely to be hand-edited wrong (agent-first-feedback §2.4); an absent file is +/// fine (built-in defaults → LLM disabled). +fn llm_posture(project_root: &Path) -> LlmPosture { + let config_path = project_root.join("loomweave.yaml"); + let config = if config_path.exists() { + match McpConfig::from_path(&config_path) { + Ok(config) => config, + Err(err) => return LlmPosture::Broken(format!("loomweave.yaml: {err}")), + } + } else { + McpConfig::default() + }; + + let warnings = config.llm_warnings(); + let provider = config.llm.provider.as_str(); + match select_provider_with_env(&config, |name| std::env::var(name).ok()) { + Err(err) => LlmPosture::Unusable(format!("live provider selected but unusable: {err}")), + Ok(sel) => { + let live = matches!( + sel, + ProviderSelection::OpenRouter { .. } + | ProviderSelection::CodexCli + | ProviderSelection::ClaudeCli + ); + let summary = if live { + format!( + "LLM live: provider={provider}, model={}", + config.llm.effective_model_label() + ) + } else { + format!("LLM not live (provider={provider}); entity_summary_get is cache-only") + }; + LlmPosture::Ok { summary, warnings } + } + } +} + +fn check_llm_provider_json(project_root: &Path) -> DoctorJsonCheck { + match llm_posture(project_root) { + LlmPosture::Broken(msg) | LlmPosture::Unusable(msg) => { + DoctorJsonCheck::problem("llm.provider", msg) + } + LlmPosture::Ok { summary, warnings } if warnings.is_empty() => { + DoctorJsonCheck::ok("llm.provider", summary) + } + LlmPosture::Ok { summary, warnings } => DoctorJsonCheck::warning( + "llm.provider", + format!("{summary}; {}", warnings.join("; ")), + ), + } +} + fn check_sei_population_json(project_root: &Path) -> DoctorJsonCheck { let db = project_root.join(".loomweave/loomweave.db"); let Ok(conn) = Connection::open(&db) else { @@ -663,6 +742,33 @@ fn problem(line: &str, fix_hint: Option<&str>) -> Tally { } } +/// Text-path twin of [`check_llm_provider_json`]: report the effective LLM +/// state so a human running `loomweave doctor` sees why summaries are (or are +/// not) live, instead of having to read source (agent-first-feedback §2.4). +fn check_llm_provider(project_root: &Path) -> Tally { + match llm_posture(project_root) { + LlmPosture::Broken(msg) | LlmPosture::Unusable(msg) => problem( + &msg, + Some( + "loomweave config check (docs: \ + https://github.com/foundryside-dev/loomweave/blob/main/docs/operator/openrouter.md)", + ), + ), + LlmPosture::Ok { summary, warnings } => { + let tally = ok(&summary); + if warnings.is_empty() { + tally + } else { + let mut tally = tally; + for warning in &warnings { + tally += warn(warning, Some("loomweave config check")); + } + tally + } + } + } +} + fn check_skill(project_root: &Path, fix: bool) -> Tally { match skill_pack::skill_pack_state(project_root) { SkillPackState::UpToDate => ok("skill pack up to date (.claude + .agents)"), diff --git a/crates/loomweave-cli/src/install.rs b/crates/loomweave-cli/src/install.rs index 55c5c775..4137e44a 100644 --- a/crates/loomweave-cli/src/install.rs +++ b/crates/loomweave-cli/src/install.rs @@ -27,58 +27,10 @@ const CONFIG_JSON_STUB: &str = r#"{ } "#; -// NOTE: Do not use `\` line-continuation here — Rust strips both the newline -// AND all leading whitespace on the continuation line, producing flat (and -// therefore broken) YAML. Use raw newlines + explicit indentation. -const LOOMWEAVE_YAML_STUB: &str = "# loomweave.yaml — user-edited config. -# Do not delete this file: loomweave serve reads MCP, LLM, and integration -# settings from here when present. -version: 1 -llm_policy: - enabled: false - provider: openrouter - allow_live_provider: false - openrouter: - endpoint_url: https://openrouter.ai/api/v1 - api_key_env: OPENROUTER_API_KEY - attribution: - referer: https://github.com/foundryside-dev/loomweave - title: Loomweave - codex_cli: - executable: codex - model: null - profile: null - sandbox: read-only - timeout_seconds: 300 - claude_cli: - executable: claude - model: null - permission_mode: plan - tools: [] - timeout_seconds: 300 - max_turns: 2 - no_session_persistence: true - exclude_dynamic_system_prompt_sections: true - model_id: anthropic/claude-sonnet-4.6 - session_token_ceiling: 1000000 - max_inferred_edges_per_caller: 8 - cache_max_age_days: 180 -integrations: - filigree: - enabled: false - base_url: http://127.0.0.1:8766 - actor: loomweave-mcp - token_env: FILIGREE_API_TOKEN - timeout_seconds: 5 -serve: - mcp: - enable_write_tools: false - http: - enabled: false - # The read-API port is auto-selected per project (deterministic, with an - # ephemeral fallback) and published to .loomweave/ephemeral.port while - # serving. Set `bind:` explicitly only to pin a fixed port (ADR-044). -"; +// The default `loomweave.yaml` lives in `crate::config` so `loomweave install` +// and `loomweave config example` emit byte-identical content from a single +// source of truth (it can never drift from what install writes). +use crate::config::LOOMWEAVE_YAML_STUB; const GITIGNORE_CONTENTS: &str = "\ # Loomweave .gitignore — ADR-005 tracked-vs-excluded list. diff --git a/crates/loomweave-cli/src/main.rs b/crates/loomweave-cli/src/main.rs index 47ed6b57..0e8dce43 100644 --- a/crates/loomweave-cli/src/main.rs +++ b/crates/loomweave-cli/src/main.rs @@ -125,6 +125,7 @@ fn main() -> Result<()> { } => db::backup(&path, &output, force), }, cli::Command::Guidance { command } => guidance::run(command), + cli::Command::Config { command } => config::run(command), cli::Command::Doctor { path, fix, format } => { // doctor prints its own report; map an unhealthy result to a // non-zero exit so it can gate CI / pre-commit. The Result<()> arm diff --git a/crates/loomweave-cli/src/serve.rs b/crates/loomweave-cli/src/serve.rs index b96da876..a617375f 100644 --- a/crates/loomweave-cli/src/serve.rs +++ b/crates/loomweave-cli/src/serve.rs @@ -42,6 +42,24 @@ pub fn run(path: &Path, config_path: Option<&Path>) -> Result<()> { }; let provider_selection = select_provider_with_env(&config, |name| std::env::var(name).ok())?; let llm_diagnostics = llm_diagnostics(&provider_selection, &config.llm); + // Announce the *effective* LLM posture on stderr so a misconfigured provider + // is never silently disabled (agent-first-feedback §2.1/§2.6). stdout is the + // JSON-RPC channel, so diagnostics must not go there. + if llm_diagnostics.live { + tracing::info!( + provider = %llm_diagnostics.provider, + model = %config.llm.effective_model_label(), + "LLM live: entity_summary_get will dispatch to the provider" + ); + } else { + tracing::info!( + provider = %llm_diagnostics.provider, + "LLM not live: entity_summary_get is cache-only" + ); + } + for warning in config.llm_warnings() { + tracing::warn!("loomweave.yaml: {warning}"); + } let llm_provider = build_llm_provider(&config, provider_selection, &project_root)?; let embedding_provider = build_embedding_provider(&config.semantic_search, |name| std::env::var(name).ok())?; @@ -145,6 +163,7 @@ fn llm_diagnostics( }; loomweave_mcp::LlmDiagnostics { provider: provider.to_owned(), + enabled: llm.enabled, live, allow_live_provider: llm.allow_live_provider, cache_max_age_days: llm.cache_max_age_days, diff --git a/crates/loomweave-cli/tests/config.rs b/crates/loomweave-cli/tests/config.rs new file mode 100644 index 00000000..190f95de --- /dev/null +++ b/crates/loomweave-cli/tests/config.rs @@ -0,0 +1,103 @@ +//! `loomweave config example|check` integration tests, plus the `doctor` LLM +//! check. These cover the agent-first-feedback §2.1/§2.3/§2.4 fixes: the schema +//! is discoverable from the binary, a misconfigured `loomweave.yaml` fails loud +//! (naming the bad key), and a configured-but-disabled provider is surfaced. + +use std::fs; +use std::path::Path; + +use assert_cmd::Command; + +fn loomweave_bin() -> Command { + Command::cargo_bin("loomweave").expect("loomweave binary") +} + +/// Run `loomweave config ` in `dir` and return `(exit_code, stdout, stderr)`. +fn config(dir: &Path, args: &[&str]) -> (i32, String, String) { + let output = loomweave_bin() + .arg("config") + .args(args) + .current_dir(dir) + .output() + .expect("run config"); + ( + output.status.code().expect("exit code"), + String::from_utf8_lossy(&output.stdout).into_owned(), + String::from_utf8_lossy(&output.stderr).into_owned(), + ) +} + +#[test] +fn config_example_emits_parseable_annotated_stub() { + let (code, stdout, _) = config(Path::new("."), &["example"]); + assert_eq!(code, 0); + assert!(stdout.contains("llm_policy:"), "stub: {stdout}"); + assert!(stdout.contains("provider: openrouter"), "stub: {stdout}"); + // The annotated stub must round-trip as a generic YAML document. + serde_norway::from_str::(&stdout) + .expect("config example output must be valid YAML"); +} + +#[test] +fn config_example_provider_flag_swaps_active_provider() { + let (code, stdout, _) = config(Path::new("."), &["example", "--provider", "claude_cli"]); + assert_eq!(code, 0); + // Check the active config line (indented), not the comment that mentions + // "provider: openrouter" as the default. + assert!( + stdout.contains("\n provider: claude_cli"), + "stub: {stdout}" + ); + assert!( + !stdout.contains("\n provider: openrouter"), + "stub: {stdout}" + ); +} + +#[test] +fn config_example_rejects_unknown_provider() { + let (code, _, stderr) = config(Path::new("."), &["example", "--provider", "bogus"]); + assert_ne!(code, 0); + assert!(stderr.contains("bogus"), "stderr: {stderr}"); +} + +#[test] +fn config_check_reports_disabled_default_when_file_absent() { + let dir = tempfile::tempdir().unwrap(); + let (code, stdout, _) = config(dir.path(), &["check"]); + assert_eq!(code, 0); + assert!(stdout.contains("absent"), "out: {stdout}"); + assert!(stdout.contains("cache-only"), "out: {stdout}"); +} + +#[test] +fn config_check_warns_on_configured_but_disabled_provider() { + let dir = tempfile::tempdir().unwrap(); + fs::write( + dir.path().join("loomweave.yaml"), + "llm_policy:\n provider: claude_cli\n allow_live_provider: true\n", + ) + .unwrap(); + let (code, stdout, _) = config(dir.path(), &["check"]); + // A configured-but-disabled provider loads (exit 0) but must warn loudly. + assert_eq!(code, 0, "out: {stdout}"); + assert!(stdout.contains("Warnings:"), "out: {stdout}"); + assert!(stdout.contains("enabled=false"), "out: {stdout}"); +} + +#[test] +fn config_check_fails_loud_on_unknown_nested_key() { + // The exact dogfood bug: model_id placed under claude_cli (field is `model`). + let dir = tempfile::tempdir().unwrap(); + fs::write( + dir.path().join("loomweave.yaml"), + "llm_policy:\n enabled: true\n provider: claude_cli\n claude_cli:\n model_id: x\n", + ) + .unwrap(); + let (code, _, stderr) = config(dir.path(), &["check"]); + assert_ne!(code, 0, "a misplaced key must fail config check"); + assert!( + stderr.contains("model_id"), + "stderr should name the key: {stderr}" + ); +} diff --git a/crates/loomweave-federation/src/config.rs b/crates/loomweave-federation/src/config.rs index 804e35ec..bdf934f4 100644 --- a/crates/loomweave-federation/src/config.rs +++ b/crates/loomweave-federation/src/config.rs @@ -4,9 +4,13 @@ use std::{fs, net::SocketAddr}; use serde::Deserialize; use thiserror::Error; -#[derive(Debug, Clone, PartialEq, Deserialize, Default)] -#[serde(default)] +#[derive(Debug, Clone, PartialEq, Deserialize)] +#[serde(default, deny_unknown_fields)] pub struct McpConfig { + /// Config schema version marker. Accepted and currently informational; it + /// exists so a versioned `loomweave.yaml` (the install stub writes + /// `version: 1`) still parses under `deny_unknown_fields`. + pub version: u32, #[serde(alias = "llm_policy")] pub llm: LlmConfig, pub semantic_search: SemanticSearchConfig, @@ -14,6 +18,22 @@ pub struct McpConfig { pub serve: ServeConfig, } +fn default_config_version() -> u32 { + 1 +} + +impl Default for McpConfig { + fn default() -> Self { + Self { + version: default_config_version(), + llm: LlmConfig::default(), + semantic_search: SemanticSearchConfig::default(), + integrations: IntegrationsConfig::default(), + serve: ServeConfig::default(), + } + } +} + impl McpConfig { pub fn from_path(path: &Path) -> Result { let raw = fs::read_to_string(path).map_err(|source| ConfigError::Io { @@ -35,13 +55,6 @@ impl McpConfig { } fn validate(&self) -> Result<(), ConfigError> { - if self.llm.provider == LlmProviderKind::Anthropic - || self.llm.anthropic_api_key_env.is_some() - { - return Err(ConfigError::DeprecatedProvider { - code: "LMWV-CONFIG-DEPRECATED-PROVIDER", - }); - } if self.integrations.filigree.enabled && self.integrations.filigree.actor.trim().is_empty() { return Err(ConfigError::InvalidFiligreeActor { @@ -51,10 +64,60 @@ impl McpConfig { self.serve.http.validate_loopback_trust()?; Ok(()) } + + /// Non-fatal diagnostics about the *effective* LLM state, for surfacing at + /// `serve` startup, in `loomweave doctor`, and in `loomweave config check`. + /// + /// These never fail config load — `enabled: false` is the legitimate + /// safe default — they only explain why a configured provider may be inert, + /// so a misconfiguration announces itself instead of silently disabling + /// summaries (the agent-first-feedback §2.1 failure mode). + #[must_use] + pub fn llm_warnings(&self) -> Vec { + let llm = &self.llm; + let provider = llm.provider.as_str(); + let mut warnings = Vec::new(); + if !llm.enabled { + if llm.allow_live_provider { + warnings.push(format!( + "llm_policy.provider={provider} with allow_live_provider=true but \ + enabled=false → live summaries are off and entity_summary_get is \ + cache-only. Set llm_policy.enabled: true to enable." + )); + } + } else if !llm.allow_live_provider { + warnings.push(format!( + "llm_policy.enabled=true with provider={provider} but \ + allow_live_provider=false → live summaries are off (unless \ + LOOMWEAVE_LLM_LIVE=1 is set); entity_summary_get is cache-only. Set \ + llm_policy.allow_live_provider: true to enable live calls." + )); + } else { + // Live path is on: warn about an unpinned coding-agent model, which + // inherits the local CLI default and can be an expensive tier + // (agent-first-feedback §2.6). + match llm.provider { + LlmProviderKind::ClaudeCli if llm.claude_cli.model.is_none() => warnings.push( + "llm_policy.claude_cli.model is unset → summaries inherit the local \ + `claude` CLI default model, which may be an expensive tier. Pin \ + llm_policy.claude_cli.model to control per-summary cost." + .to_owned(), + ), + LlmProviderKind::CodexCli if llm.codex_cli.model.is_none() => warnings.push( + "llm_policy.codex_cli.model is unset → summaries inherit the local \ + `codex` CLI default model. Pin llm_policy.codex_cli.model to control \ + per-summary cost." + .to_owned(), + ), + _ => {} + } + } + warnings + } } #[derive(Debug, Clone, PartialEq, Deserialize)] -#[serde(default)] +#[serde(default, deny_unknown_fields)] pub struct LlmConfig { pub enabled: bool, pub provider: LlmProviderKind, @@ -67,7 +130,6 @@ pub struct LlmConfig { pub recording_fixture_path: Option, pub max_inferred_edges_per_caller: u32, pub cache_max_age_days: u32, - pub anthropic_api_key_env: Option, } impl Default for LlmConfig { @@ -84,7 +146,30 @@ impl Default for LlmConfig { recording_fixture_path: None, max_inferred_edges_per_caller: 8, cache_max_age_days: 180, - anthropic_api_key_env: None, + } + } +} + +impl LlmConfig { + /// Human-readable label for the model summaries will actually use, for + /// diagnostics (`serve` startup, `doctor`, `config check`). A coding-agent + /// CLI with an unpinned `model` inherits the local CLI's default, which this + /// names explicitly rather than rendering as a bare null. + #[must_use] + pub fn effective_model_label(&self) -> String { + match self.provider { + LlmProviderKind::OpenRouter => self.model_id.clone(), + LlmProviderKind::ClaudeCli => self + .claude_cli + .model + .clone() + .unwrap_or_else(|| "(local claude CLI default)".to_owned()), + LlmProviderKind::CodexCli => self + .codex_cli + .model + .clone() + .unwrap_or_else(|| "(local codex CLI default)".to_owned()), + LlmProviderKind::Recording => "(recording fixture)".to_owned(), } } } @@ -94,7 +179,7 @@ impl Default for LlmConfig { /// nothing here makes a hosted embedding service required. When `enabled` is /// false the `search_semantic` tool degrades honestly to "not enabled". #[derive(Debug, Clone, PartialEq, Deserialize)] -#[serde(default)] +#[serde(default, deny_unknown_fields)] pub struct SemanticSearchConfig { pub enabled: bool, /// Explicit opt-in to the live API provider (in addition to `enabled`). @@ -136,12 +221,23 @@ pub enum LlmProviderKind { CodexCli, #[serde(rename = "claude_cli", alias = "claude_code")] ClaudeCli, - Anthropic, Recording, } +impl LlmProviderKind { + #[must_use] + pub fn as_str(self) -> &'static str { + match self { + Self::OpenRouter => "openrouter", + Self::CodexCli => "codex_cli", + Self::ClaudeCli => "claude_cli", + Self::Recording => "recording", + } + } +} + #[derive(Debug, Clone, PartialEq, Deserialize)] -#[serde(default)] +#[serde(default, deny_unknown_fields)] pub struct OpenRouterConfig { pub endpoint_url: String, pub api_key_env: String, @@ -161,7 +257,7 @@ impl Default for OpenRouterConfig { } #[derive(Debug, Clone, PartialEq, Deserialize)] -#[serde(default)] +#[serde(default, deny_unknown_fields)] pub struct OpenRouterAttributionConfig { pub referer: String, pub title: String, @@ -177,7 +273,7 @@ impl Default for OpenRouterAttributionConfig { } #[derive(Debug, Clone, PartialEq, Deserialize)] -#[serde(default)] +#[serde(default, deny_unknown_fields)] pub struct CodexCliConfig { pub executable: String, pub model: Option, @@ -218,7 +314,7 @@ impl CodexSandboxMode { } #[derive(Debug, Clone, PartialEq, Deserialize)] -#[serde(default)] +#[serde(default, deny_unknown_fields)] pub struct ClaudeCliConfig { pub executable: String, pub model: Option, @@ -270,20 +366,20 @@ impl ClaudePermissionMode { } #[derive(Debug, Clone, PartialEq, Default, Deserialize)] -#[serde(default)] +#[serde(default, deny_unknown_fields)] pub struct IntegrationsConfig { pub filigree: FiligreeConfig, } #[derive(Debug, Clone, PartialEq, Default, Deserialize)] -#[serde(default)] +#[serde(default, deny_unknown_fields)] pub struct ServeConfig { pub mcp: McpServeConfig, pub http: HttpReadConfig, } #[derive(Debug, Clone, PartialEq, Deserialize, Default)] -#[serde(default)] +#[serde(default, deny_unknown_fields)] pub struct McpServeConfig { /// Enable MCP tools that can mutate state, spawn processes, or call an LLM. /// Default false: `loomweave serve` exposes consult-mode read tools unless an @@ -292,7 +388,7 @@ pub struct McpServeConfig { } #[derive(Debug, Clone, PartialEq, Deserialize)] -#[serde(default)] +#[serde(default, deny_unknown_fields)] pub struct HttpReadConfig { pub enabled: bool, /// Bind address for the HTTP read API. `None` (the default) auto-selects a @@ -419,7 +515,7 @@ where } #[derive(Debug, Clone, PartialEq, Deserialize)] -#[serde(default)] +#[serde(default, deny_unknown_fields)] pub struct FiligreeConfig { pub enabled: bool, pub base_url: String, @@ -479,9 +575,6 @@ where match config.llm.provider { LlmProviderKind::Recording => Ok(ProviderSelection::Recording), - LlmProviderKind::Anthropic => Err(ConfigError::DeprecatedProvider { - code: "LMWV-CONFIG-DEPRECATED-PROVIDER", - }), LlmProviderKind::OpenRouter => { let live_env_opt_in = env_lookup("LOOMWEAVE_LLM_LIVE").as_deref() == Some("1"); if !config.llm.allow_live_provider && !live_env_opt_in { @@ -532,11 +625,6 @@ pub enum ConfigError { #[error("live OpenRouter provider selected but API key env var {env_var} is missing")] MissingOpenRouterApiKey { env_var: String }, - #[error( - "{code}: llm.provider=anthropic is deprecated; use llm_policy.provider: openrouter with llm_policy.openrouter.api_key_env and llm_policy.model_id" - )] - DeprecatedProvider { code: &'static str }, - #[error("{code}: integrations.filigree.actor must not be blank when Filigree is enabled")] InvalidFiligreeActor { code: &'static str }, @@ -738,9 +826,7 @@ llm_policy: provider: LlmProviderKind::OpenRouter, ..LlmConfig::default() }, - semantic_search: SemanticSearchConfig::default(), - integrations: IntegrationsConfig::default(), - serve: ServeConfig::default(), + ..McpConfig::default() }; let selected = select_provider_with_env(&cfg, |name| { @@ -760,9 +846,7 @@ llm_policy: allow_live_provider: true, ..LlmConfig::default() }, - semantic_search: SemanticSearchConfig::default(), - integrations: IntegrationsConfig::default(), - serve: ServeConfig::default(), + ..McpConfig::default() }; let missing = select_provider_with_env(&cfg, |_| None).expect_err("missing key"); @@ -823,9 +907,7 @@ llm_policy: provider: LlmProviderKind::CodexCli, ..LlmConfig::default() }, - semantic_search: SemanticSearchConfig::default(), - integrations: IntegrationsConfig::default(), - serve: ServeConfig::default(), + ..McpConfig::default() }; let selected = select_provider_with_env(&cfg, |_| None).expect("provider selection"); @@ -887,9 +969,7 @@ llm_policy: provider: LlmProviderKind::ClaudeCli, ..LlmConfig::default() }, - semantic_search: SemanticSearchConfig::default(), - integrations: IntegrationsConfig::default(), - serve: ServeConfig::default(), + ..McpConfig::default() }; let selected = select_provider_with_env(&cfg, |_| None).expect("provider selection"); @@ -1134,23 +1214,6 @@ serve: assert_eq!(cfg.serve.http.bind, None); } - #[test] - fn old_anthropic_provider_shape_reports_deprecated_provider() { - let err = McpConfig::from_yaml_str( - r" -llm: - enabled: true - provider: anthropic - anthropic_api_key_env: ANTHROPIC_API_KEY -", - ) - .expect_err("old provider shape should be rejected"); - - assert!(matches!(err, ConfigError::DeprecatedProvider { .. })); - assert!(err.to_string().contains("LMWV-CONFIG-DEPRECATED-PROVIDER")); - assert!(err.to_string().contains("provider: openrouter")); - } - #[test] fn enabled_filigree_integration_rejects_blank_actor() { let err = McpConfig::from_yaml_str( @@ -1165,4 +1228,120 @@ integrations: assert!(err.to_string().contains("LMWV-CONFIG-FILIGREE-ACTOR-BLANK")); } + + #[test] + fn version_marker_is_accepted() { + let cfg = McpConfig::from_yaml_str("version: 1\n").expect("version marker should parse"); + assert_eq!(cfg.version, 1); + // Omitting it falls back to the default schema version. + assert_eq!(McpConfig::default().version, 1); + } + + #[test] + fn unknown_top_level_key_is_rejected() { + let err = McpConfig::from_yaml_str("not_a_real_section: true\n") + .expect_err("unknown top-level key should be rejected"); + let msg = err.to_string(); + assert!(matches!(err, ConfigError::Yaml(_)), "got: {msg}"); + assert!(msg.contains("not_a_real_section"), "got: {msg}"); + } + + #[test] + fn unknown_nested_key_under_claude_cli_is_rejected() { + // The exact agent-first-feedback §2.1 bug: `model_id` placed inside + // claude_cli (whose field is `model`) was silently dropped. With + // deny_unknown_fields it must now fail loudly, naming the key. + let err = McpConfig::from_yaml_str( + r" +llm_policy: + enabled: true + provider: claude_cli + allow_live_provider: true + claude_cli: + model_id: claude-sonnet-4-6 +", + ) + .expect_err("misplaced key under claude_cli should be rejected"); + let msg = err.to_string(); + assert!(matches!(err, ConfigError::Yaml(_)), "got: {msg}"); + assert!(msg.contains("model_id"), "got: {msg}"); + } + + #[test] + fn fully_specified_live_provider_behind_disabled_emits_warning() { + // enabled omitted (defaults false) but allow_live_provider set: a config + // that looks live but is inert. Must load (disabled is a legitimate + // default) AND warn. + let cfg = McpConfig::from_yaml_str( + r" +llm_policy: + provider: claude_cli + allow_live_provider: true +", + ) + .expect("configured-but-disabled provider should still load"); + assert!(!cfg.llm.enabled); + let warnings = cfg.llm_warnings(); + assert!( + warnings.iter().any(|w| w.contains("enabled=false")), + "expected an enabled=false warning, got: {warnings:?}" + ); + } + + #[test] + fn enabled_without_allow_live_provider_emits_warning() { + let cfg = McpConfig::from_yaml_str( + r" +llm_policy: + enabled: true + provider: claude_cli +", + ) + .expect("enabled-without-opt-in should load"); + let warnings = cfg.llm_warnings(); + assert!( + warnings + .iter() + .any(|w| w.contains("allow_live_provider=false")), + "expected an allow_live_provider=false warning, got: {warnings:?}" + ); + } + + #[test] + fn unpinned_claude_cli_model_on_live_path_warns_about_cost() { + let cfg = McpConfig::from_yaml_str( + r" +llm_policy: + enabled: true + provider: claude_cli + allow_live_provider: true +", + ) + .expect("live claude_cli without a pinned model should load"); + let warnings = cfg.llm_warnings(); + assert!( + warnings.iter().any(|w| w.contains("claude_cli.model")), + "expected an unpinned-model cost warning, got: {warnings:?}" + ); + } + + #[test] + fn healthy_live_config_emits_no_warnings() { + let cfg = McpConfig::from_yaml_str( + r" +llm_policy: + enabled: true + provider: claude_cli + allow_live_provider: true + claude_cli: + model: claude-sonnet-4-6 +", + ) + .expect("healthy live config should load"); + assert!( + cfg.llm_warnings().is_empty(), + "expected no warnings, got: {:?}", + cfg.llm_warnings() + ); + } } diff --git a/crates/loomweave-mcp/assets/skills/loomweave-workflow/SKILL.md b/crates/loomweave-mcp/assets/skills/loomweave-workflow/SKILL.md index 1b074574..fd7ab55c 100644 --- a/crates/loomweave-mcp/assets/skills/loomweave-workflow/SKILL.md +++ b/crates/loomweave-mcp/assets/skills/loomweave-workflow/SKILL.md @@ -65,18 +65,27 @@ tell which case you're in. | `execution_paths_from` | bounded call paths out of an entity | `{"id": "", "max_depth": 5}` | | `subsystem_members` | modules in a subsystem | `{"id": "core:subsystem:"}` | | `subsystem_of` | the subsystem an entity belongs to (reverse of `subsystem_members`) | `{"id": ""}` | -| `summary` | on-demand prose summary of one entity | `{"id": ""}` | +| `summary` † | on-demand prose summary of one entity | `{"id": ""}` | | `summary_preview_cost` | preview a `summary` call's cache status / cost before spending | `{"id": ""}` | | `issues_for` | Filigree issues attached to an entity | `{"id": ""}` | | `source_for_entity` | an entity's exact indexed source span + bounded context | `{"id": "", "context_lines": 10}` | | `call_sites` | the source line(s) behind a calls/references edge | `{"id": "", "role": "caller"}` | | `orientation_pack` | one deterministic orientation packet for an entity or file:line (entity + context + neighbors + paths + issues + freshness) | `{"file": "rel/path.py", "line": 42}` | | `index_diff` | index freshness / drift vs. the current working tree | `{}` | -| `analyze_start` | launch a background re-index, return its `run_id` | `{}` | +| `analyze_start` † | launch a background re-index, return its `run_id` | `{}` | | `analyze_status` | poll a started analyze (queued/running/terminal + progress) | `{"run_id": ""}` | -| `analyze_cancel` | stop a running analyze (group-kills plugin + Pyright) | `{"run_id": ""}` | +| `analyze_cancel` † | stop a running analyze (group-kills plugin + Pyright) | `{"run_id": ""}` | | `project_status` | index freshness, counts, LLM + Filigree status | `{}` | +† **Write-gated.** `summary` (`entity_summary_get`), `analyze_start`, +`analyze_cancel`, `propose_guidance`, and `promote_guidance` are registered only +when `serve.mcp.enable_write_tools: true` is set in `loomweave.yaml` (default +`false`). When the gate is off they do not appear in `tools/list` and a call +returns a tool-disabled error — run `loomweave config check` to see the active +policy. `summary` additionally requires the live LLM provider to be enabled +(`llm_policy.enabled: true` + `allow_live_provider: true`), or it serves cache +only. + `callers_of` / `neighborhood` / `execution_paths_from` take a `confidence` tier — one of `"resolved"` (default; only high-confidence edges), `"ambiguous"`, or `"inferred"`. There is no `"all"` value. When you suspect an @@ -163,6 +172,7 @@ for team sharing). Agents may call `propose_guidance` to create a Filigree observation, but that proposal is inert until an operator promotes it through `promote_guidance` or the CLI. Promoted sheets reach you through `guidance_for` and are composed into `summary` prompts with a real guidance fingerprint. +(`propose_guidance` and `promote_guidance` are write-gated — see the † note above.) ## Workflow: orient, then navigate diff --git a/crates/loomweave-mcp/src/lib.rs b/crates/loomweave-mcp/src/lib.rs index 3be34ed0..854c3860 100644 --- a/crates/loomweave-mcp/src/lib.rs +++ b/crates/loomweave-mcp/src/lib.rs @@ -60,16 +60,28 @@ pub const LOOMWEAVE_WORKFLOW_SKILL: &str = include_str!("../assets/skills/loomweave-workflow/SKILL.md"); /// Orientation text returned in the MCP `initialize` result's `instructions` -/// field. The `Tools:` enumeration is derived from [`list_tools`] (the single -/// source of truth) so it can never drift from the advertised tool set as tools -/// are added or removed; the surrounding prose is static. Kept consistent with -/// the loomweave-workflow skill. -fn server_instructions() -> String { - let tool_names = list_tools() +/// field. The `Tools:` enumeration is derived from [`list_tools_for_policy`] +/// under the *active* policy (the single source of truth) so it can never +/// advertise a tool the server will not actually register — the +/// agent-first-feedback §2.5 bug, where the write tools were listed but absent +/// from `tools/list` unless `serve.mcp.enable_write_tools: true`. When write +/// tools are gated off, a note names them and how to enable them. Kept +/// consistent with the loomweave-workflow skill. +fn server_instructions(policy: McpToolPolicy) -> String { + let tool_names = list_tools_for_policy(policy) .iter() .map(|tool| tool.name) .collect::>() .join(", "); + let write_tools_note = if policy.enable_write_tools { + String::new() + } else { + "\n\nNot listed above (write-gated): `entity_summary_get`, `analyze_start`, \ +`analyze_cancel`, `propose_guidance`, `promote_guidance`. These require \ +`serve.mcp.enable_write_tools: true` in loomweave.yaml; until then they are not \ +registered and calling one returns a tool-disabled error." + .to_owned() + }; format!( "Loomweave is a code-archaeology server: it has pre-extracted this project \ into a queryable map of entities (functions, classes, modules, files), the call \ @@ -84,7 +96,7 @@ verbatim into the next tool. Tools: {tool_names}. `entity_callers_list` / `entity_neighborhood_get` / `entity_execution_path_list` \ take a `confidence` tier (resolved | ambiguous | inferred; default resolved). \ `project_status_get` reports index freshness, counts, LLM policy, and the resolved \ -Filigree endpoint. +Filigree endpoint.{write_tools_note} For the full workflow see the loomweave-workflow skill (installed by \ `loomweave install --skills`), or read the `loomweave-workflow` prompt. Live \ @@ -789,7 +801,7 @@ pub fn handle_json_rpc(request: &Value) -> Option { }; Some(match method { - "initialize" => result_response(&id, &initialize_result(false)), + "initialize" => result_response(&id, &initialize_result(false, McpToolPolicy::default())), "tools/list" => result_response( &id, &json!({"tools": list_tools_for_policy(McpToolPolicy::default())}), @@ -927,9 +939,12 @@ pub struct LlmDiagnostics { /// Provider label, e.g. `"openrouter"`, `"codex_cli"`, `"recording"`, or /// `"disabled"` when no provider is wired. pub provider: String, - /// A live provider is wired and summaries will dispatch to it. + /// Whether LLM summaries are enabled at all (`llm.enabled`, configured). + pub enabled: bool, + /// A live provider is wired and summaries will dispatch to it (effective). pub live: bool, - /// Whether config permits a live provider at all (`llm.allow_live_provider`). + /// Whether config permits a live provider at all (`llm.allow_live_provider`, + /// configured). pub allow_live_provider: bool, /// Summary-cache freshness horizon in days (`llm.cache_max_age_days`). pub cache_max_age_days: u32, @@ -1086,7 +1101,7 @@ impl ServerState { let dispatch = async { match method { - "initialize" => result_response(&id, &initialize_result(true)), + "initialize" => result_response(&id, &initialize_result(true, self.tool_policy)), "tools/list" => result_response( &id, &json!({"tools": list_tools_for_policy(self.tool_policy)}), @@ -2515,7 +2530,7 @@ fn should_spawn_stateful_stdio_request(request: &Value) -> bool { /// so it passes `stateful = false`; [`ServerState::handle_json_rpc`] serves the /// full surface and passes `stateful = true`. The `instructions` field is static /// orientation guidance (not a capability) and is included in both. -fn initialize_result(stateful: bool) -> Value { +fn initialize_result(stateful: bool, policy: McpToolPolicy) -> Value { let capabilities = if stateful { json!({ "tools": {}, "prompts": {}, "resources": {} }) } else { @@ -2528,7 +2543,7 @@ fn initialize_result(stateful: bool) -> Value { "name": "loomweave", "version": env!("CARGO_PKG_VERSION") }, - "instructions": server_instructions() + "instructions": server_instructions(policy) }) } @@ -5049,17 +5064,40 @@ mod tests { #[test] fn server_instructions_enumerate_every_tool() { // Single-source guard (clarion-71f0d6c3dd): the `instructions` tool list - // is derived from list_tools(), so every advertised tool must appear in - // it. If a tool is added/removed and this drifts, the instructions would - // otherwise silently misdescribe the surface. - let instructions = super::server_instructions(); - for tool in super::list_tools() { + // is derived from list_tools_for_policy under the active policy, so every + // tool the server actually registers must appear in it — and a write-gated + // tool must NOT appear when the gate is off (agent-first-feedback §2.5). + use super::McpToolPolicy; + + // With write tools enabled, every tool is advertised. + let all = super::server_instructions(McpToolPolicy::allow_write_tools()); + for tool in super::list_tools_for_policy(McpToolPolicy::allow_write_tools()) { assert!( - instructions.contains(tool.name), - "instructions omit tool {:?}; instructions were:\n{instructions}", + all.contains(tool.name), + "instructions omit registered tool {:?}; instructions were:\n{all}", tool.name ); } + + // Under the default read-only policy, the advertised list matches the + // registered list exactly — gated write tools are absent from the list + // but named in the gate note. + let read_only = super::server_instructions(McpToolPolicy::default()); + let registered = super::list_tools_for_policy(McpToolPolicy::default()); + for tool in ®istered { + assert!( + read_only.contains(tool.name), + "instructions omit registered tool {:?}; instructions were:\n{read_only}", + tool.name + ); + } + assert!( + registered.len() < super::list_tools().len(), + "default policy should gate at least one write tool" + ); + // The gate note names the write tools and how to enable them. + assert!(read_only.contains("enable_write_tools"), "{read_only}"); + assert!(read_only.contains("entity_summary_get"), "{read_only}"); } #[test] diff --git a/crates/loomweave-mcp/src/tools/status.rs b/crates/loomweave-mcp/src/tools/status.rs index 888e8f32..eed728f6 100644 --- a/crates/loomweave-mcp/src/tools/status.rs +++ b/crates/loomweave-mcp/src/tools/status.rs @@ -81,22 +81,38 @@ impl ServerState { return Ok(summary_read_error(read)); }; - // LLM policy posture (no provider call). `live` means a provider is - // wired AND config permits it; that is what makes a miss spend. A - // disabled/unconfigured LLM is therefore distinct from a cache miss. - let llm_enabled = self - .summary_llm - .as_ref() - .is_some_and(|llm| llm.config.enabled); - let live = self.summary_llm.is_some() && llm_enabled; - let allow_live_provider = self - .summary_llm - .as_ref() - .is_some_and(|llm| llm.config.allow_live_provider); - let provider = self.diagnostics.as_ref().map_or_else( - || if live { "configured" } else { "disabled" }.to_owned(), - |diag| diag.llm.provider.clone(), - ); + // LLM policy posture (no provider call). Report `enabled` and + // `allow_live_provider` as the *configured* values from the diagnostics + // snapshot — the same source `project_status_get` reads — so the two + // tools never disagree (agent-first-feedback §2.2). `live` is the + // *effective* state: a live provider is wired and a miss would actually + // spend. When there is no diagnostics snapshot (a bare test harness), + // fall back to the wired provider's own config. + let (llm_enabled, allow_live_provider, live, provider) = match self.diagnostics.as_ref() { + Some(diag) => ( + diag.llm.enabled, + diag.llm.allow_live_provider, + diag.llm.live, + diag.llm.provider.clone(), + ), + None => { + let enabled = self + .summary_llm + .as_ref() + .is_some_and(|llm| llm.config.enabled); + let allow = self + .summary_llm + .as_ref() + .is_some_and(|llm| llm.config.allow_live_provider); + let live = self.summary_llm.is_some() && enabled; + ( + enabled, + allow, + live, + if live { "configured" } else { "disabled" }.to_owned(), + ) + } + }; // Cache status without spending: a fresh row is a hit; a present-but- // expired row would be re-billed; absence is a miss. @@ -357,12 +373,29 @@ impl ServerState { pub(crate) fn llm_diagnostics_json(&self) -> Value { match &self.diagnostics { - Some(diag) => json!({ - "provider": diag.llm.provider, - "live": diag.llm.live, - "allow_live_provider": diag.llm.allow_live_provider, - "cache_max_age_days": diag.llm.cache_max_age_days, - }), + Some(diag) => { + // Make a disabled provider self-healing (agent-first-feedback + // §3.11): carry the exact fix instead of a status code the agent + // must interpret. Null when already live. + let next_action = if diag.llm.live { + Value::Null + } else { + json!( + "Live summaries are off; entity_summary_get is cache-only. Set \ + llm_policy.enabled: true + allow_live_provider: true and supply the \ + provider credential (e.g. OPENROUTER_API_KEY), then restart serve. Run \ + `loomweave config check` to verify." + ) + }; + json!({ + "provider": diag.llm.provider, + "enabled": diag.llm.enabled, + "live": diag.llm.live, + "allow_live_provider": diag.llm.allow_live_provider, + "cache_max_age_days": diag.llm.cache_max_age_days, + "next_action": next_action, + }) + } None => Value::Null, } } diff --git a/crates/loomweave-mcp/tests/storage_tools.rs b/crates/loomweave-mcp/tests/storage_tools.rs index a9eb3b9f..ea4fecd1 100644 --- a/crates/loomweave-mcp/tests/storage_tools.rs +++ b/crates/loomweave-mcp/tests/storage_tools.rs @@ -1208,6 +1208,7 @@ async fn issues_for_reports_resolved_endpoint_and_result_kind() { let diagnostics = DiagnosticsContext { llm: LlmDiagnostics { provider: "disabled".to_owned(), + enabled: false, live: false, allow_live_provider: false, cache_max_age_days: 180, @@ -2424,6 +2425,50 @@ async fn summary_preview_cost_disabled_llm_is_distinct_from_miss() { ); } +#[tokio::test] +async fn status_surfaces_agree_on_allow_live_provider_when_half_configured() { + // agent-first-feedback §2.2: project_status_get and summary_preview_cost must + // report the SAME allow_live_provider for a half-configured state — a provider + // permitted by config (allow_live_provider: true) but with enabled=false, so + // no live provider is wired. Previously the two read paths disagreed (status + // read raw config → true; preview read the unwired provider → false). + let (project, db_path) = open_project(); + let diagnostics = DiagnosticsContext { + llm: LlmDiagnostics { + provider: "disabled".to_owned(), + enabled: false, + live: false, + allow_live_provider: true, // configured-but-inert + cache_max_age_days: 180, + }, + filigree: resolve_filigree_url(&FiligreeConfig::default(), project.path()), + }; + let state = state_for(project.path(), &db_path).with_diagnostics(diagnostics); + + let status = call_tool(&state, "project_status", json!({})).await; + let preview = call_tool( + &state, + "summary_preview_cost", + json!({"id": "python:function:demo.entry"}), + ) + .await; + + assert_eq!(status["result"]["llm"]["allow_live_provider"], true); + assert_eq!( + status["result"]["llm"]["allow_live_provider"], + preview["result"]["policy"]["allow_live_provider"], + "status surfaces disagree on allow_live_provider: status={status:?} preview={preview:?}" + ); + // Both must also agree the live path is off, so a miss would not spend. + assert_eq!(status["result"]["llm"]["enabled"], false); + assert_eq!(preview["result"]["policy"]["enabled"], false); + assert_eq!( + status["result"]["llm"]["live"], + preview["result"]["policy"]["live"] + ); + assert_eq!(preview["result"]["live_spend_would_occur"], false); +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn summary_expired_cache_row_is_refreshed_by_recording_provider() { let (project, db_path) = open_project(); @@ -5060,6 +5105,7 @@ async fn project_status_resolves_live_filigree_endpoint() { let diagnostics = DiagnosticsContext { llm: LlmDiagnostics { provider: "disabled".to_owned(), + enabled: false, live: false, allow_live_provider: false, cache_max_age_days: 180, @@ -5091,6 +5137,7 @@ async fn project_status_filigree_falls_back_to_config_without_port_file() { let diagnostics = DiagnosticsContext { llm: LlmDiagnostics { provider: "openrouter".to_owned(), + enabled: true, live: true, allow_live_provider: true, cache_max_age_days: 7, diff --git a/docs/operator/getting-started.md b/docs/operator/getting-started.md index 0908960c..1e2875a2 100644 --- a/docs/operator/getting-started.md +++ b/docs/operator/getting-started.md @@ -249,6 +249,11 @@ prerequisites section above. Skip this block if you don't have a key; the other seventeen tools still work, only `summary` will return an "LLM disabled" envelope. +Run `loomweave config check` after editing to confirm the effective state +(provider, enabled, live, model) before starting `serve` — it flags the common +mistakes (a provider left `enabled: false`, a missing key, or a misplaced key, +which is now a hard parse error rather than a silent drop). + ### The MCP tools The MCP surface exposes eighteen tools: the seventeen in the table below, plus diff --git a/docs/operator/openrouter.md b/docs/operator/openrouter.md index dbcbb9b8..d5b3d842 100644 --- a/docs/operator/openrouter.md +++ b/docs/operator/openrouter.md @@ -10,8 +10,12 @@ For local-login alternatives that avoid API keys in Loomweave config, see ## Configure Loomweave -`loomweave install` writes a default `loomweave.yaml` with LLMs disabled. To enable -live OpenRouter calls, set a concrete model ID and opt in explicitly: +`loomweave install` writes a default `loomweave.yaml` with LLMs disabled. Print a +fresh annotated example any time with `loomweave config example` (or +`loomweave config example --provider claude_cli`), and after editing, run +`loomweave config check` to see the *effective* provider/live/model state and any +warnings (e.g. a provider configured but left `enabled: false`). To enable live +OpenRouter calls, set a concrete model ID and opt in explicitly: ```yaml llm_policy: From 2504fc442d4ab7a00ffe71973aa91b1dc6057fae Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sun, 7 Jun 2026 04:46:31 +1000 Subject: [PATCH 33/60] =?UTF-8?q?fix(lint):=20satisfy=20clippy=20+=20rustd?= =?UTF-8?q?oc=20in=20the=20=C2=A72=20config=20work?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit's clippy/doc run was blocked at merge time by a concurrent cargo build-dir lock, so these slipped through: - status.rs: convert the preview-cost diagnostics `match` to `if let/else` (clippy::single_match_else). - cli.rs / doctor.rs: backtick identifiers in the serve --help and llm_posture doc comments (clippy::doc_markdown); backtick `` in the config-check doc so rustdoc does not read it as an HTML tag (rustdoc::invalid_html_tags). No behavior change; clippy --workspace --all-targets --all-features and RUSTDOCFLAGS="-D warnings" cargo doc are both clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/cli.rs | 19 ++++++++++--------- crates/loomweave-cli/src/doctor.rs | 4 ++-- crates/loomweave-mcp/src/tools/status.rs | 20 ++++++++++---------- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/crates/loomweave-cli/src/cli.rs b/crates/loomweave-cli/src/cli.rs index b90ee2c3..00889018 100644 --- a/crates/loomweave-cli/src/cli.rs +++ b/crates/loomweave-cli/src/cli.rs @@ -164,16 +164,17 @@ pub enum Command { /// Run the MCP stdio server. /// - /// Serves the code graph to MCP clients. The entity_summary_get tool needs a - /// live LLM provider, which is OFF by default: set `llm_policy.enabled: true` + /// Serves the code graph to MCP clients. The `entity_summary_get` tool needs + /// a live LLM provider, which is OFF by default: set `llm_policy.enabled: true` /// and `allow_live_provider: true` in loomweave.yaml and supply the provider - /// credential (OPENROUTER_API_KEY for the default openrouter provider), or - /// switch `llm_policy.provider` to claude_cli / codex_cli for a locally + /// credential (`OPENROUTER_API_KEY` for the default `openrouter` provider), or + /// switch `llm_policy.provider` to `claude_cli` / `codex_cli` for a locally /// authenticated coding-agent CLI. Without that, summaries are cache-only. - /// Write-capable tools (entity_summary_get, analyze_start, analyze_cancel, - /// propose_guidance, promote_guidance) require `serve.mcp.enable_write_tools: - /// true`. The effective LLM posture is logged to stderr at startup; run - /// `loomweave config check` to inspect it ahead of time. + /// Write-capable tools (`entity_summary_get`, `analyze_start`, + /// `analyze_cancel`, `propose_guidance`, `promote_guidance`) require + /// `serve.mcp.enable_write_tools: true`. The effective LLM posture is logged + /// to stderr at startup; run `loomweave config check` to inspect it ahead of + /// time. Serve { /// Project directory containing .loomweave/loomweave.db. #[arg(long, default_value = ".")] @@ -288,7 +289,7 @@ pub enum ConfigCommand { #[arg(long, default_value = ".")] path: PathBuf, - /// Path to loomweave.yaml (default: /loomweave.yaml if present). + /// Path to loomweave.yaml (default: `/loomweave.yaml` if present). #[arg(long)] config: Option, }, diff --git a/crates/loomweave-cli/src/doctor.rs b/crates/loomweave-cli/src/doctor.rs index 1b4b576b..90303c48 100644 --- a/crates/loomweave-cli/src/doctor.rs +++ b/crates/loomweave-cli/src/doctor.rs @@ -498,8 +498,8 @@ enum LlmPosture { }, } -/// Load loomweave.yaml *typed* (so deny_unknown_fields + validate() run), resolve -/// the effective provider, and classify the posture. This is the file most +/// Load loomweave.yaml *typed* (so `deny_unknown_fields` + `validate()` run), +/// resolve the effective provider, and classify the posture. This is the file most /// likely to be hand-edited wrong (agent-first-feedback §2.4); an absent file is /// fine (built-in defaults → LLM disabled). fn llm_posture(project_root: &Path) -> LlmPosture { diff --git a/crates/loomweave-mcp/src/tools/status.rs b/crates/loomweave-mcp/src/tools/status.rs index eed728f6..3d85886b 100644 --- a/crates/loomweave-mcp/src/tools/status.rs +++ b/crates/loomweave-mcp/src/tools/status.rs @@ -88,14 +88,15 @@ impl ServerState { // *effective* state: a live provider is wired and a miss would actually // spend. When there is no diagnostics snapshot (a bare test harness), // fall back to the wired provider's own config. - let (llm_enabled, allow_live_provider, live, provider) = match self.diagnostics.as_ref() { - Some(diag) => ( - diag.llm.enabled, - diag.llm.allow_live_provider, - diag.llm.live, - diag.llm.provider.clone(), - ), - None => { + let (llm_enabled, allow_live_provider, live, provider) = + if let Some(diag) = self.diagnostics.as_ref() { + ( + diag.llm.enabled, + diag.llm.allow_live_provider, + diag.llm.live, + diag.llm.provider.clone(), + ) + } else { let enabled = self .summary_llm .as_ref() @@ -111,8 +112,7 @@ impl ServerState { live, if live { "configured" } else { "disabled" }.to_owned(), ) - } - }; + }; // Cache status without spending: a fresh row is a hit; a present-but- // expired row would be re-billed; absence is a miss. From d7aa778f09c1a7abbe14e7701684ca240314bc8c Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sun, 7 Jun 2026 07:09:21 +1000 Subject: [PATCH 34/60] feat(federation): adopt WEFT_FEDERATION_TOKEN as the federation token env var Rename integrations.filigree.token_env from FILIGREE_API_TOKEN to WEFT_FEDERATION_TOKEN: federation plumbing is named by the Weft suite, not by the sibling member. This moves the hub's C-3 reference token_env (loomweave.yaml + the config example + what `loomweave install` stamps) and the FiligreeConfig default. The legacy FILIGREE_API_TOKEN name is honoured as a deprecated fallback at the sole token-resolution point (FiligreeHttpClient::from_config): the configured var is read first; the legacy literal only when the primary is unset or blank. A pre-rename global export keeps working during the transition. Does not touch serve.http.token_env (inbound HTTP read-API bearer auth, default WEFT_TOKEN) or the loomweave:eid: SEI scheme. Adds 5 token-resolution precedence tests (new wins, legacy fallback, both-set, empty-falls-through, neither-set) and a CHANGELOG entry. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 11 +++ crates/loomweave-cli/src/config.rs | 2 +- .../loomweave-cli/src/integration_bindings.rs | 2 +- crates/loomweave-federation/src/config.rs | 6 +- crates/loomweave-federation/src/filigree.rs | 78 ++++++++++++++++++- loomweave.yaml | 2 +- tests/e2e/sprint_2_mcp_surface.sh | 2 +- 7 files changed, 97 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 588732dd..df39a81b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,17 @@ only when an incompatible change is made to that surface. See ## [Unreleased] +### Changed + +- **Filigree federation token env var renamed to `WEFT_FEDERATION_TOKEN`.** The + `integrations.filigree.token_env` default (and the name stamped into + `loomweave.yaml` by `loomweave install`) is now `WEFT_FEDERATION_TOKEN` — + Weft-suite federation plumbing is named by the suite, not by the sibling + member. The legacy `FILIGREE_API_TOKEN` name is still honoured as a deprecated + fallback at token-resolution time, so an existing global export keeps working + during the transition. This does not affect `serve.http.token_env` + (inbound HTTP read-API bearer auth, default `WEFT_TOKEN`). + ## [1.1.0rc3] — 2026-06-06 Third 1.1 release candidate. Hardens the Python plugin's pyright spawn path diff --git a/crates/loomweave-cli/src/config.rs b/crates/loomweave-cli/src/config.rs index 3df179b3..965a64dd 100644 --- a/crates/loomweave-cli/src/config.rs +++ b/crates/loomweave-cli/src/config.rs @@ -61,7 +61,7 @@ integrations: enabled: false base_url: http://127.0.0.1:8766 actor: loomweave-mcp - token_env: FILIGREE_API_TOKEN + token_env: WEFT_FEDERATION_TOKEN timeout_seconds: 5 serve: mcp: diff --git a/crates/loomweave-cli/src/integration_bindings.rs b/crates/loomweave-cli/src/integration_bindings.rs index 0a232ff4..dff7187b 100644 --- a/crates/loomweave-cli/src/integration_bindings.rs +++ b/crates/loomweave-cli/src/integration_bindings.rs @@ -189,7 +189,7 @@ fn install_loomweave_yaml(project_root: &Path, desired: &DesiredBindings) -> Res filigree.insert("enabled".to_owned(), json!(true)); filigree.insert("base_url".to_owned(), json!(desired.filigree_base_url)); ensure_string(filigree, "actor", "loomweave-mcp"); - ensure_string(filigree, "token_env", "FILIGREE_API_TOKEN"); + ensure_string(filigree, "token_env", "WEFT_FEDERATION_TOKEN"); filigree .entry("timeout_seconds".to_owned()) .or_insert(json!(5)); diff --git a/crates/loomweave-federation/src/config.rs b/crates/loomweave-federation/src/config.rs index bdf934f4..593dc089 100644 --- a/crates/loomweave-federation/src/config.rs +++ b/crates/loomweave-federation/src/config.rs @@ -520,6 +520,10 @@ pub struct FiligreeConfig { pub enabled: bool, pub base_url: String, pub actor: String, + /// Name of the environment variable holding the Filigree bearer token. + /// Defaults to `WEFT_FEDERATION_TOKEN` (Weft-suite federation plumbing). + /// The legacy `FILIGREE_API_TOKEN` name is still honoured as a deprecated + /// fallback at token-resolution time — see `FiligreeHttpClient::from_config`. pub token_env: String, pub timeout_seconds: u64, /// Whether `loomweave analyze` POSTs its findings to Filigree's @@ -545,7 +549,7 @@ impl Default for FiligreeConfig { enabled: false, base_url: "http://127.0.0.1:8766".to_owned(), actor: "loomweave-mcp".to_owned(), - token_env: "FILIGREE_API_TOKEN".to_owned(), + token_env: "WEFT_FEDERATION_TOKEN".to_owned(), timeout_seconds: 5, emit_findings: false, prune_unseen_days: 30, diff --git a/crates/loomweave-federation/src/filigree.rs b/crates/loomweave-federation/src/filigree.rs index 4ceda8a1..8a4268b9 100644 --- a/crates/loomweave-federation/src/filigree.rs +++ b/crates/loomweave-federation/src/filigree.rs @@ -304,7 +304,13 @@ impl FiligreeHttpClient { .timeout(Duration::from_secs(config.timeout_seconds.max(1))) .build() .map_err(FiligreeClientError::Build)?; - let token = env_lookup(&config.token_env).filter(|value| !value.trim().is_empty()); + // Resolve the configured env var (default `WEFT_FEDERATION_TOKEN`) first; + // fall back to the legacy `FILIGREE_API_TOKEN` name so a pre-rename global + // export keeps working during the transition. Deprecated — remove the + // fallback once operators have migrated to the Weft-prefixed name. + let token = env_lookup(&config.token_env) + .filter(|value| !value.trim().is_empty()) + .or_else(|| env_lookup("FILIGREE_API_TOKEN").filter(|value| !value.trim().is_empty())); Ok(Some(Self { base_url: config.base_url.clone(), actor: config.actor.clone(), @@ -853,6 +859,76 @@ mod tests { use std::io::{Read, Write}; use std::net::TcpListener; + /// Minimal enabled config; `from_config` does not connect until a request is + /// issued, so no server is needed to exercise token resolution. + fn token_resolution_config() -> FiligreeConfig { + FiligreeConfig { + enabled: true, + base_url: "http://127.0.0.1:1".to_owned(), + actor: "loomweave-test".to_owned(), + token_env: "WEFT_FEDERATION_TOKEN".to_owned(), + timeout_seconds: 1, + emit_findings: false, + prune_unseen_days: 30, + } + } + + fn resolved_token(env: &[(&str, &str)]) -> Option { + let config = token_resolution_config(); + FiligreeHttpClient::from_config(&config, |name| { + env.iter() + .find(|(key, _)| *key == name) + .map(|(_, value)| (*value).to_owned()) + }) + .expect("build client") + .expect("enabled client") + .token + } + + #[test] + fn token_resolution_prefers_configured_env_var() { + assert_eq!( + resolved_token(&[("WEFT_FEDERATION_TOKEN", "new-secret")]), + Some("new-secret".to_owned()), + ); + } + + #[test] + fn token_resolution_falls_back_to_legacy_filigree_api_token() { + // Pre-rename global export still works during the transition. + assert_eq!( + resolved_token(&[("FILIGREE_API_TOKEN", "legacy-secret")]), + Some("legacy-secret".to_owned()), + ); + } + + #[test] + fn token_resolution_configured_var_wins_over_legacy_fallback() { + assert_eq!( + resolved_token(&[ + ("WEFT_FEDERATION_TOKEN", "new-secret"), + ("FILIGREE_API_TOKEN", "legacy-secret"), + ]), + Some("new-secret".to_owned()), + ); + } + + #[test] + fn token_resolution_empty_configured_var_falls_through_to_legacy() { + assert_eq!( + resolved_token(&[ + ("WEFT_FEDERATION_TOKEN", " "), + ("FILIGREE_API_TOKEN", "legacy-secret"), + ]), + Some("legacy-secret".to_owned()), + ); + } + + #[test] + fn token_resolution_none_when_neither_set() { + assert_eq!(resolved_token(&[]), None); + } + #[test] fn parses_reverse_entity_association_response_shape() { let parsed = parse_entity_associations_response( diff --git a/loomweave.yaml b/loomweave.yaml index ee5bf735..29a63ac6 100644 --- a/loomweave.yaml +++ b/loomweave.yaml @@ -4,7 +4,7 @@ integrations: base_url: http://127.0.0.1:8542 enabled: true timeout_seconds: 5 - token_env: FILIGREE_API_TOKEN + token_env: WEFT_FEDERATION_TOKEN llm_policy: allow_live_provider: false cache_max_age_days: 180 diff --git a/tests/e2e/sprint_2_mcp_surface.sh b/tests/e2e/sprint_2_mcp_surface.sh index 1afc74d1..58c86cd5 100755 --- a/tests/e2e/sprint_2_mcp_surface.sh +++ b/tests/e2e/sprint_2_mcp_surface.sh @@ -264,7 +264,7 @@ integrations: enabled: true base_url: http://127.0.0.1:{filigree_server.server_port} actor: loomweave-e2e - token_env: FILIGREE_API_TOKEN + token_env: WEFT_FEDERATION_TOKEN timeout_seconds: 2 """.lstrip(), encoding="utf-8", From 700a315d1143e69b1f350f9dc215928a4229b3eb Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sun, 7 Jun 2026 11:05:05 +1000 Subject: [PATCH 35/60] feat(weft): consolidate store to .weft/loomweave/ + .weft-only sibling resolution (ADR-046) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move Loomweave's per-project store from `.loomweave/` to the shared `.weft/loomweave/` Weft dotdir as a clean break (no fallback read of the old path), routed through a single `loomweave_core::store` helper so the path cannot drift. An operator may relocate the store via a member-private `[loomweave].store_dir` key in a project-root `weft.toml` (read-only to Loomweave; missing/malformed → silent fallback to the default, C-9c). Sibling resolution reads the consolidated `.weft//` location ONLY, with no fallback to the pre-consolidation `./` path: - Filigree live read-API port at `.weft/filigree/ephemeral.port` (`filigree_url.rs`; the `live_filigree_base_url` install/doctor binding now routes through the same canonical resolver instead of reading `.filigree/`). - Wardline trust-vocabulary descriptor at `.weft/wardline/vocabulary.yaml` (python plugin `wardline_descriptor.py`). Weft is pre-launch with a coordinated cutover, so post-launch every sibling is at `.weft/` by construction. A sibling found only on a legacy path means a mis-sequenced cutover; silently resolving it would bind a stale dir (the lacuna-401 wrong-but-quiet-resolve failure mode). Instead resolution folds to the fail-soft default — `source = "config"` for Filigree, an absent project descriptor for Wardline — so a mis-sequenced cutover is visible, not silent. Tests assert this plain fallback (no new "unreachable" enum). Legacy-seeding test fixtures migrate to the `.weft/` locations. Cutover ordering: Filigree migrates to `.weft/filigree/` -> this build installs -> downstream re-init. This build must not be installed against any project until Filigree has migrated. SEI scheme (`loomweave:eid:`) untouched. Co-Authored-By: Claude Opus 4.8 (1M context) --- .agents/skills/loomweave-workflow/SKILL.md | 16 +- .gitignore | 13 +- .loomweave/.gitignore | 33 ---- .loomweave/config.json | 4 - CHANGELOG.md | 27 +++ README.md | 4 +- crates/loomweave-cli/src/analyze.rs | 22 +-- crates/loomweave-cli/src/analyze_lock.rs | 10 +- crates/loomweave-cli/src/cli.rs | 42 ++-- crates/loomweave-cli/src/config.rs | 2 +- crates/loomweave-cli/src/db.rs | 6 +- crates/loomweave-cli/src/doctor.rs | 21 +- crates/loomweave-cli/src/guidance.rs | 4 +- crates/loomweave-cli/src/hook.rs | 16 +- crates/loomweave-cli/src/http_read.rs | 6 +- crates/loomweave-cli/src/install.rs | 36 ++-- crates/loomweave-cli/src/instance.rs | 2 +- .../loomweave-cli/src/integration_bindings.rs | 8 +- .../loomweave-cli/src/secret_scan/baseline.rs | 2 +- crates/loomweave-cli/src/secret_scan/files.rs | 6 +- crates/loomweave-cli/src/serve.rs | 6 +- crates/loomweave-cli/src/skill_pack.rs | 2 +- crates/loomweave-cli/src/wardline_guidance.rs | 2 +- crates/loomweave-cli/tests/analyze.rs | 96 ++++----- .../tests/analyze_failure_modes.rs | 6 +- crates/loomweave-cli/tests/db.rs | 6 +- crates/loomweave-cli/tests/doctor.rs | 8 +- crates/loomweave-cli/tests/guidance.rs | 28 +-- crates/loomweave-cli/tests/hook.rs | 6 +- crates/loomweave-cli/tests/install.rs | 40 ++-- crates/loomweave-cli/tests/sarif.rs | 4 +- crates/loomweave-cli/tests/secret_scan.rs | 10 +- crates/loomweave-cli/tests/serve.rs | 35 ++-- crates/loomweave-cli/tests/skills.rs | 19 +- crates/loomweave-cli/tests/wp1_e2e.rs | 2 +- crates/loomweave-cli/tests/wp2_e2e.rs | 12 +- crates/loomweave-core/src/lib.rs | 1 + crates/loomweave-core/src/store.rs | 187 ++++++++++++++++++ .../loomweave-federation/src/filigree_url.rs | 91 +++++++-- .../src/loomweave_port.rs | 28 +-- .../loomweave-federation/src/loomweave_url.rs | 15 +- .../assets/skills/loomweave-workflow/SKILL.md | 6 +- crates/loomweave-mcp/src/analyze_runs.rs | 2 +- .../loomweave-mcp/src/catalogue/semantic.rs | 2 +- crates/loomweave-mcp/src/lib.rs | 12 +- crates/loomweave-mcp/src/snapshot.rs | 8 +- crates/loomweave-mcp/src/tools/analyze.rs | 4 +- crates/loomweave-mcp/src/tools/graph.rs | 2 +- crates/loomweave-mcp/src/tools/status.rs | 4 +- .../loomweave-mcp/tests/analyze_lifecycle.rs | 4 +- crates/loomweave-mcp/tests/catalogue_tools.rs | 6 +- crates/loomweave-mcp/tests/storage_tools.rs | 14 +- crates/loomweave-scanner/tests/scanner.rs | 2 +- crates/loomweave-storage/src/embeddings.rs | 14 +- crates/loomweave-storage/src/pragma.rs | 2 +- crates/loomweave-storage/src/prior_index.rs | 2 +- .../loomweave-storage/tests/writer_actor.rs | 2 +- docs/federation/contracts.md | 25 ++- docs/loomweave/1.0/detailed-design.md | 32 +-- docs/loomweave/1.0/operations.md | 26 +-- docs/loomweave/1.0/requirements.md | 22 +-- docs/loomweave/1.0/system-design.md | 26 +-- .../adr/ADR-005-loomweave-dir-tracking.md | 7 +- .../adr/ADR-018-identity-reconciliation.md | 9 +- .../adr/ADR-046-weft-store-consolidation.md | 127 ++++++++++++ docs/loomweave/adr/README.md | 5 +- docs/operator/README.md | 2 +- docs/operator/clustering.md | 2 +- docs/operator/getting-started.md | 12 +- docs/operator/guidance.md | 2 +- docs/operator/loomweave-http-read-api.md | 2 +- docs/operator/runtime-topology.md | 12 +- docs/operator/secret-scanning.md | 4 +- .../pyright_session.py | 4 +- .../wardline_descriptor.py | 33 +++- plugins/python/tests/test_server.py | 8 +- .../python/tests/test_wardline_descriptor.py | 76 ++++++- scripts/b4-gate-run.sh | 2 +- tests/e2e/external-operator-smoke.sh | 24 +-- tests/e2e/phase3_subsystems.sh | 6 +- tests/e2e/sprint_1_walking_skeleton.sh | 62 +++--- tests/e2e/sprint_2_mcp_surface.sh | 8 +- tests/e2e/wp5_secret_scan.sh | 16 +- tests/perf/b5_reference_scale_smoke.py | 2 +- tests/perf/b8_scale_test/driver.py | 6 +- wardline.yaml | 2 +- 86 files changed, 1001 insertions(+), 533 deletions(-) delete mode 100644 .loomweave/.gitignore delete mode 100644 .loomweave/config.json create mode 100644 crates/loomweave-core/src/store.rs create mode 100644 docs/loomweave/adr/ADR-046-weft-store-consolidation.md diff --git a/.agents/skills/loomweave-workflow/SKILL.md b/.agents/skills/loomweave-workflow/SKILL.md index 1b074574..fd7ab55c 100644 --- a/.agents/skills/loomweave-workflow/SKILL.md +++ b/.agents/skills/loomweave-workflow/SKILL.md @@ -65,18 +65,27 @@ tell which case you're in. | `execution_paths_from` | bounded call paths out of an entity | `{"id": "", "max_depth": 5}` | | `subsystem_members` | modules in a subsystem | `{"id": "core:subsystem:"}` | | `subsystem_of` | the subsystem an entity belongs to (reverse of `subsystem_members`) | `{"id": ""}` | -| `summary` | on-demand prose summary of one entity | `{"id": ""}` | +| `summary` † | on-demand prose summary of one entity | `{"id": ""}` | | `summary_preview_cost` | preview a `summary` call's cache status / cost before spending | `{"id": ""}` | | `issues_for` | Filigree issues attached to an entity | `{"id": ""}` | | `source_for_entity` | an entity's exact indexed source span + bounded context | `{"id": "", "context_lines": 10}` | | `call_sites` | the source line(s) behind a calls/references edge | `{"id": "", "role": "caller"}` | | `orientation_pack` | one deterministic orientation packet for an entity or file:line (entity + context + neighbors + paths + issues + freshness) | `{"file": "rel/path.py", "line": 42}` | | `index_diff` | index freshness / drift vs. the current working tree | `{}` | -| `analyze_start` | launch a background re-index, return its `run_id` | `{}` | +| `analyze_start` † | launch a background re-index, return its `run_id` | `{}` | | `analyze_status` | poll a started analyze (queued/running/terminal + progress) | `{"run_id": ""}` | -| `analyze_cancel` | stop a running analyze (group-kills plugin + Pyright) | `{"run_id": ""}` | +| `analyze_cancel` † | stop a running analyze (group-kills plugin + Pyright) | `{"run_id": ""}` | | `project_status` | index freshness, counts, LLM + Filigree status | `{}` | +† **Write-gated.** `summary` (`entity_summary_get`), `analyze_start`, +`analyze_cancel`, `propose_guidance`, and `promote_guidance` are registered only +when `serve.mcp.enable_write_tools: true` is set in `loomweave.yaml` (default +`false`). When the gate is off they do not appear in `tools/list` and a call +returns a tool-disabled error — run `loomweave config check` to see the active +policy. `summary` additionally requires the live LLM provider to be enabled +(`llm_policy.enabled: true` + `allow_live_provider: true`), or it serves cache +only. + `callers_of` / `neighborhood` / `execution_paths_from` take a `confidence` tier — one of `"resolved"` (default; only high-confidence edges), `"ambiguous"`, or `"inferred"`. There is no `"all"` value. When you suspect an @@ -163,6 +172,7 @@ for team sharing). Agents may call `propose_guidance` to create a Filigree observation, but that proposal is inert until an operator promotes it through `promote_guidance` or the CLI. Promoted sheets reach you through `guidance_for` and are composed into `summary` prompts with a real guidance fingerprint. +(`propose_guidance` and `promote_guidance` are write-gated — see the † note above.) ## Workflow: orient, then navigate diff --git a/.gitignore b/.gitignore index d6e91368..6093915b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,10 @@ .filigree/ +.weft/filigree/ /target **/*.rs.bk Cargo.lock.bak -# SQLite working files (project-level .loomweave/ is tracked per ADR-005) +# SQLite working files (project-level .weft/loomweave/ is tracked per ADR-005/046) *.db-journal *.db-wal @@ -32,11 +33,11 @@ tests/e2e/external-operator-smoke-results-*.md # Loomweave runtime artifacts — the index DB, per-project instance fingerprint, # and analyze lock change on every run, so they are not tracked -# (see .loomweave/.gitignore). -.loomweave/loomweave.db -.loomweave/instance_id -.loomweave/loomweave.lock -.loomweave/ephemeral.port +# (see .weft/loomweave/.gitignore). +.weft/loomweave/loomweave.db +.weft/loomweave/instance_id +.weft/loomweave/loomweave.lock +.weft/loomweave/ephemeral.port # Documentation site build output (mkdocs `site_dir`, web/mkdocs.yml). /site-build/ diff --git a/.loomweave/.gitignore b/.loomweave/.gitignore deleted file mode 100644 index b0f4e45f..00000000 --- a/.loomweave/.gitignore +++ /dev/null @@ -1,33 +0,0 @@ -# Loomweave .gitignore — ADR-005 tracked-vs-excluded list. -# Tracked (committed): config.json, .gitignore itself. -# Excluded (ignored): loomweave.db + instance_id (runtime artifacts, also pinned -# in the repo-root .gitignore), WAL sidecars, shadow DB, per-run logs, tmp. - -# Runtime artifacts: the index DB and the per-project instance fingerprint -# change on every analyze run, so they are NOT tracked (untracked 2026-06-02). -loomweave.db -instance_id -loomweave.lock - -# Read-API live port discovery file (ADR-044): present only while serve runs, -# rewritten per bind, loopback-only — a runtime artifact, never committed. -ephemeral.port - -# SQLite write-ahead files never belong in the repo. -*-wal -*-shm -*.db-wal -*.db-shm - -# Shadow DB intermediate (ADR-011 --shadow-db). -*.shadow.db -*.db.new - -# Scratch / temp space. -tmp/ - -# Per-run log directories (see detailed-design §File layout). The run dir -# metadata (config.yaml, stats.json, partial.json) is tracked; only the -# raw LLM request/response log is excluded. -logs/ -runs/*/log.jsonl diff --git a/.loomweave/config.json b/.loomweave/config.json deleted file mode 100644 index d7ef3efe..00000000 --- a/.loomweave/config.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "schema_version": 1, - "last_run_id": null -} diff --git a/CHANGELOG.md b/CHANGELOG.md index df39a81b..ec9c1b5a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,33 @@ only when an incompatible change is made to that surface. See ### Changed +- **Project store moved `.loomweave/` → `.weft/loomweave/` (Weft store + consolidation, clean break; ADR-046).** All machine-written state — the index + DB, `config.json`, `.gitignore`, `embeddings.db`, `ephemeral.port`, + `instance_id`, locks, and per-run dirs — now lives under the shared + `.weft//` dotdir, routed through a single `loomweave_core::store` + helper. There is **no fallback read** of the old location: existing projects + must re-init (`loomweave install` then `loomweave analyze`) and may delete the + orphaned `.loomweave/`. An operator may relocate the store with a + member-private `[loomweave].store_dir` key in a project-root `weft.toml` + (read-only to Loomweave; a missing or malformed file falls back silently to + the default). Sibling resolution reads the consolidated `.weft//` + location **only** — Filigree's live port at `.weft/filigree/ephemeral.port`, + Wardline's trust-vocabulary descriptor at `.weft/wardline/vocabulary.yaml` — + with no fallback to the pre-consolidation `./` path. A sibling found + only on the legacy path folds to the fail-soft default (`source = "config"` + for Filigree, an absent descriptor for Wardline), making a mis-sequenced + cutover visible rather than a silent stale resolve. **Cutover ordering:** + Filigree migrates to `.weft/filigree/` → this build installs → downstream + re-init. + +- **`cargo nextest run --workspace` now always completes instead of hanging.** A + `slow-timeout` cap in `.config/nextest.toml` terminates any test that runs past + the bound and reports it as a timeout failure, so the literal CI test command + runs clean to a verdict even while the pre-existing emission tests + (clarion-1d405be546) hang. The cap does not fix that bug — it makes the suite + honestly red and fast rather than green-via-family-exclusion. + - **Filigree federation token env var renamed to `WEFT_FEDERATION_TOKEN`.** The `integrations.filigree.token_env` default (and the name stamped into `loomweave.yaml` by `loomweave install`) is now `WEFT_FEDERATION_TOKEN` — diff --git a/README.md b/README.md index ea4f44c8..d3d97ae9 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ loomweave analyze loomweave serve ``` -`loomweave install` is the one-step agent setup path: it initialises `.loomweave/`, +`loomweave install` is the one-step agent setup path: it initialises `.weft/loomweave/`, installs the `loomweave-workflow` skill for Claude Code and Codex, writes Claude Code MCP config, upserts Codex MCP config, and installs the SessionStart hook. Use component flags such as `--claude-code`, `--codex`, `--skills`, @@ -118,7 +118,7 @@ federation doctrine that anchors every cross-product decision is in ## Storage and operations -Loomweave keeps project state in a local `.loomweave/` directory. +Loomweave keeps project state in a local `.weft/loomweave/` directory. The local-first storage model, the no-NFS constraint, the no-double-analyze constraint (fs2 advisory lock), and the backup/restore procedure are documented in diff --git a/crates/loomweave-cli/src/analyze.rs b/crates/loomweave-cli/src/analyze.rs index 4e738f2b..a6a3397e 100644 --- a/crates/loomweave-cli/src/analyze.rs +++ b/crates/loomweave-cli/src/analyze.rs @@ -333,7 +333,7 @@ pub(crate) struct AnalyzeOptions { /// /// # Errors /// -/// Returns an error if the target directory does not exist, has no `.loomweave/` +/// Returns an error if the target directory does not exist, has no `.weft/loomweave/` /// directory, if analyze config is invalid, or if the writer actor fails to /// start or process commands. #[allow(clippy::too_many_lines)] @@ -347,10 +347,10 @@ pub(crate) async fn run_with_options(project_path: PathBuf, options: AnalyzeOpti let project_root = project_path .canonicalize() .with_context(|| format!("cannot canonicalise path {}", project_path.display()))?; - let loomweave_dir = project_root.join(".loomweave"); + let loomweave_dir = loomweave_core::store::store_dir(&project_root); if !loomweave_dir.exists() { bail!( - "{} has no .loomweave/ directory. Run `loomweave install` first.", + "{} has no .weft/loomweave/ store. Run `loomweave install` first.", project_root.display() ); } @@ -3171,7 +3171,7 @@ async fn populate_semantic_embeddings( let conn = Connection::open(db_path) .with_context(|| format!("open Loomweave database {}", db_path.display()))?; - let store = EmbeddingStore::open_in_loomweave_dir(project_root) + let store = EmbeddingStore::open_in_store_dir(project_root) .map_err(|err| anyhow::anyhow!("{err}")) .context("open semantic embedding sidecar")?; let pending = semantic_embedding_candidates(&conn, &store, &model_id, &mut stats)?; @@ -5101,10 +5101,10 @@ fn unresolved_call_site_key( /// Skip-list for directory names during the source walk. /// -/// Sprint 1 conservative set: VCS directories, loomweave's own state, and +/// Sprint 1 conservative set: VCS directories, the shared .weft/ runtime state, and /// common virtual-environment directories. const SKIP_DIRS: &[&str] = &[ - ".loomweave", + ".weft", ".git", ".hg", ".svn", @@ -6134,8 +6134,8 @@ mod tests { use loomweave_storage::{EmbeddingKey, EmbeddingStore, pragma, schema}; let project = tempfile::tempdir().unwrap(); - std::fs::create_dir(project.path().join(".loomweave")).unwrap(); - let db_path = project.path().join(".loomweave/loomweave.db"); + std::fs::create_dir_all(loomweave_core::store::store_dir(project.path())).unwrap(); + let db_path = loomweave_core::store::db_path(project.path()); let mut conn = rusqlite::Connection::open(&db_path).unwrap(); pragma::apply_write_pragmas(&conn).unwrap(); schema::apply_migrations(&mut conn).unwrap(); @@ -6150,7 +6150,7 @@ mod tests { .unwrap(); drop(conn); - let store = EmbeddingStore::open_in_loomweave_dir(project.path()).unwrap(); + let store = EmbeddingStore::open_in_store_dir(project.path()).unwrap(); store .upsert( &EmbeddingKey { @@ -6203,8 +6203,8 @@ mod tests { use loomweave_storage::{pragma, schema}; let project = tempfile::tempdir().unwrap(); - std::fs::create_dir(project.path().join(".loomweave")).unwrap(); - let db_path = project.path().join(".loomweave/loomweave.db"); + std::fs::create_dir_all(loomweave_core::store::store_dir(project.path())).unwrap(); + let db_path = loomweave_core::store::db_path(project.path()); let mut conn = rusqlite::Connection::open(&db_path).unwrap(); pragma::apply_write_pragmas(&conn).unwrap(); schema::apply_migrations(&mut conn).unwrap(); diff --git a/crates/loomweave-cli/src/analyze_lock.rs b/crates/loomweave-cli/src/analyze_lock.rs index e00f4947..8d862fd4 100644 --- a/crates/loomweave-cli/src/analyze_lock.rs +++ b/crates/loomweave-cli/src/analyze_lock.rs @@ -9,7 +9,7 @@ //! processes. //! //! This module acquires an exclusive `fs2`-advisory lock on a dedicated -//! sentinel file `.loomweave/loomweave.lock` for the duration of the analyze +//! sentinel file `.weft/loomweave/loomweave.lock` for the duration of the analyze //! run. The lock file is separate from `loomweave.db` so `SQLite`'s own //! locking (per-connection, transaction-scoped) is independent. The //! guard's `Drop` releases the OS-level lock. @@ -39,13 +39,13 @@ pub(crate) struct AnalyzeLockGuard { /// Acquire an exclusive cross-process lock on `/loomweave.lock`. /// -/// `loomweave_dir` is the `.loomweave/` directory inside the project root. The +/// `loomweave_dir` is the `.weft/loomweave/` directory inside the project root. The /// lock file is created on first use (0-byte sentinel) and kept across /// runs. The returned guard holds the lock for its lifetime. /// /// # Errors /// -/// - The lock file cannot be opened (missing `.loomweave/` directory, +/// - The lock file cannot be opened (missing `.weft/loomweave/` directory, /// permission denied, filesystem read-only). /// - Another `loomweave analyze` process already holds the lock. Returns /// an error containing the lock-file path so the operator can identify @@ -90,7 +90,7 @@ pub(crate) fn acquire_analyze_lock(loomweave_dir: &Path) -> Result`. Content comes from `--content`, else stdin (when /// piped) or `$EDITOR`/`$VISUAL`. Create { - /// Project directory containing .loomweave/loomweave.db (default: current). + /// Project directory containing .weft/loomweave/loomweave.db (default: current). #[arg(long, default_value = ".")] path: PathBuf, @@ -341,7 +341,7 @@ pub enum GuidanceCommand { /// Edit a sheet's content in `$EDITOR`/`$VISUAL` (other properties, including /// `authored_at` and provenance, are preserved). Edit { - /// Project directory containing .loomweave/loomweave.db (default: current). + /// Project directory containing .weft/loomweave/loomweave.db (default: current). #[arg(long, default_value = ".")] path: PathBuf, /// The guidance sheet id (`core:guidance:`). @@ -350,7 +350,7 @@ pub enum GuidanceCommand { /// Print a guidance sheet (human-readable). Show { - /// Project directory containing .loomweave/loomweave.db (default: current). + /// Project directory containing .weft/loomweave/loomweave.db (default: current). #[arg(long, default_value = ".")] path: PathBuf, /// The guidance sheet id. @@ -364,7 +364,7 @@ pub enum GuidanceCommand { /// filter (including `--for-entity`). Without any of them, behaves as the /// plain list. List { - /// Project directory containing .loomweave/loomweave.db (default: current). + /// Project directory containing .weft/loomweave/loomweave.db (default: current). #[arg(long, default_value = ".")] path: PathBuf, /// Only list sheets whose `match_rules` apply to this entity id. @@ -387,7 +387,7 @@ pub enum GuidanceCommand { /// Delete a guidance sheet. Delete { - /// Project directory containing .loomweave/loomweave.db (default: current). + /// Project directory containing .weft/loomweave/loomweave.db (default: current). #[arg(long, default_value = ".")] path: PathBuf, /// The guidance sheet id. @@ -398,7 +398,7 @@ pub enum GuidanceCommand { /// guidance sheet. The observation must have been produced by MCP /// `propose_guidance`; arbitrary observations are rejected. Promote { - /// Project directory containing .loomweave/loomweave.db (default: current). + /// Project directory containing .weft/loomweave/loomweave.db (default: current). #[arg(long, default_value = ".")] path: PathBuf, /// Path to loomweave.yaml (default: project-root/loomweave.yaml if present). @@ -413,7 +413,7 @@ pub enum GuidanceCommand { /// (REQ-GUIDANCE-06). Output is byte-stable across runs on identical DB /// state. The target directory is created if absent. Export { - /// Project directory containing .loomweave/loomweave.db (default: current). + /// Project directory containing .weft/loomweave/loomweave.db (default: current). #[arg(long, default_value = ".")] path: PathBuf, /// Directory to write the exported sheet files into. Export does NOT @@ -430,7 +430,7 @@ pub enum GuidanceCommand { /// untouched (never a destructive mirror). A malformed `*.json` aborts the /// import naming the offending file (a dropped sheet is silent data loss). Import { - /// Project directory containing .loomweave/loomweave.db (default: current). + /// Project directory containing .weft/loomweave/loomweave.db (default: current). #[arg(long, default_value = ".")] path: PathBuf, /// Directory of exported sheet files to import. @@ -442,7 +442,7 @@ pub enum GuidanceCommand { pub enum HookCommand { /// Print a project snapshot and re-sync the skill pack on drift. SessionStart { - /// Project directory containing .loomweave/loomweave.db. + /// Project directory containing .weft/loomweave/loomweave.db. #[arg(long, default_value = ".")] path: PathBuf, }, @@ -460,7 +460,7 @@ pub enum SarifCommand { #[arg(long)] scan_source: Option, - /// Project directory containing .loomweave/loomweave.db (default: current). + /// Project directory containing .weft/loomweave/loomweave.db (default: current). #[arg(long, default_value = ".")] path: PathBuf, }, diff --git a/crates/loomweave-cli/src/config.rs b/crates/loomweave-cli/src/config.rs index 965a64dd..de16eaf1 100644 --- a/crates/loomweave-cli/src/config.rs +++ b/crates/loomweave-cli/src/config.rs @@ -69,7 +69,7 @@ serve: http: enabled: false # The read-API port is auto-selected per project (deterministic, with an - # ephemeral fallback) and published to .loomweave/ephemeral.port while + # ephemeral fallback) and published to .weft/loomweave/ephemeral.port while # serving. Set `bind:` explicitly only to pin a fixed port (ADR-044). "; diff --git a/crates/loomweave-cli/src/db.rs b/crates/loomweave-cli/src/db.rs index ec63a7cd..814cf0f4 100644 --- a/crates/loomweave-cli/src/db.rs +++ b/crates/loomweave-cli/src/db.rs @@ -1,7 +1,7 @@ //! `loomweave db` maintenance subcommands. //! //! Currently a single verb: `backup`, an online, WAL-safe copy of -//! `.loomweave/loomweave.db` (gap-register STO-04 / clarion-6d433b61ba). +//! `.weft/loomweave/loomweave.db` (gap-register STO-04 / clarion-6d433b61ba). //! //! Why an online backup rather than `cp`: the live database runs in WAL mode, //! so committed pages live in `loomweave.db-wal` separately from the main file. @@ -17,7 +17,7 @@ use std::time::Duration; use anyhow::{Context, Result, anyhow, bail, ensure}; use rusqlite::{Connection, OpenFlags}; -/// Back up the project's `.loomweave/loomweave.db` to `output`. +/// Back up the project's `.weft/loomweave/loomweave.db` to `output`. /// /// The copy is taken with `rusqlite::backup::Backup` (a consistent online /// snapshot) and staged into a sibling temp file that is renamed over `output` @@ -30,7 +30,7 @@ use rusqlite::{Connection, OpenFlags}; /// exists and `force` is not set, if `output` resolves to the source database /// itself, or if the backup / integrity check fails. pub fn backup(project_root: &Path, output: &Path, force: bool) -> Result<()> { - let db_path = project_root.join(".loomweave").join("loomweave.db"); + let db_path = loomweave_core::store::db_path(project_root); ensure!( db_path.exists(), "Loomweave database not found at {}; run `loomweave analyze` first", diff --git a/crates/loomweave-cli/src/doctor.rs b/crates/loomweave-cli/src/doctor.rs index 90303c48..1a5b5439 100644 --- a/crates/loomweave-cli/src/doctor.rs +++ b/crates/loomweave-cli/src/doctor.rs @@ -217,20 +217,23 @@ fn json_report(project_root: &Path, fix: bool) -> DoctorJsonReport { } fn check_loomweave_dir_json(project_root: &Path) -> DoctorJsonCheck { - let loomweave_dir = project_root.join(".loomweave"); + let loomweave_dir = loomweave_core::store::store_dir(project_root); let db = loomweave_dir.join("loomweave.db"); if loomweave_dir.is_dir() && db.is_file() { DoctorJsonCheck::ok( - ".loomweave.schema", - ".loomweave directory and database are present", + ".weft/loomweave.schema", + ".weft/loomweave store directory and database are present", ) } else if loomweave_dir.is_dir() { DoctorJsonCheck::warning( - ".loomweave.schema", - ".loomweave directory exists but loomweave.db is absent", + ".weft/loomweave.schema", + ".weft/loomweave store directory exists but loomweave.db is absent", ) } else { - DoctorJsonCheck::warning(".loomweave.schema", ".loomweave directory is absent") + DoctorJsonCheck::warning( + ".weft/loomweave.schema", + ".weft/loomweave store directory is absent", + ) } } @@ -431,7 +434,7 @@ fn check_http_config_json(project_root: &Path) -> DoctorJsonCheck { ); } // ADR-044: prefer the live published port over the (now usually absent) - // static bind. A running serve publishes .loomweave/ephemeral.port. + // static bind. A running serve publishes .weft/loomweave/ephemeral.port. let resolution = loomweave_federation::loomweave_url::resolve_loomweave_url(None, project_root); if let Some(url) = resolution.resolved_url { return DoctorJsonCheck::ok( @@ -448,7 +451,7 @@ fn check_http_config_json(project_root: &Path) -> DoctorJsonCheck { if bind.trim().is_empty() { DoctorJsonCheck::ok( "http.config", - "HTTP enabled; read-API port auto-selected and published to .loomweave/ephemeral.port while serving", + "HTTP enabled; read-API port auto-selected and published to .weft/loomweave/ephemeral.port while serving", ) } else { DoctorJsonCheck::ok( @@ -553,7 +556,7 @@ fn check_llm_provider_json(project_root: &Path) -> DoctorJsonCheck { } fn check_sei_population_json(project_root: &Path) -> DoctorJsonCheck { - let db = project_root.join(".loomweave/loomweave.db"); + let db = loomweave_core::store::db_path(project_root); let Ok(conn) = Connection::open(&db) else { return DoctorJsonCheck::warning("sei.population", "loomweave.db is absent or unreadable"); }; diff --git a/crates/loomweave-cli/src/guidance.rs b/crates/loomweave-cli/src/guidance.rs index 88feb23f..65a0174f 100644 --- a/crates/loomweave-cli/src/guidance.rs +++ b/crates/loomweave-cli/src/guidance.rs @@ -749,11 +749,11 @@ fn render_sheet(sheet: &GuidanceSheet) -> String { // ── I/O helpers ─────────────────────────────────────────────────────────────── -/// Open a read-write connection to `.loomweave/loomweave.db` with a generous busy +/// Open a read-write connection to `.weft/loomweave/loomweave.db` with a generous busy /// timeout so a concurrently-running `serve` writer does not cause an immediate /// lock error. fn open_db(project_root: &Path) -> Result { - let db_path = project_root.join(".loomweave").join("loomweave.db"); + let db_path = loomweave_core::store::db_path(project_root); if !db_path.exists() { bail!( "Loomweave database not found at {}; run `loomweave analyze` first", diff --git a/crates/loomweave-cli/src/hook.rs b/crates/loomweave-cli/src/hook.rs index a0adcf08..a8a1c40d 100644 --- a/crates/loomweave-cli/src/hook.rs +++ b/crates/loomweave-cli/src/hook.rs @@ -30,11 +30,11 @@ pub fn session_start(path: &Path) -> anyhow::Result<()> { Ok(()) } -/// What [`load_snapshot`] could establish about the `.loomweave/` index. +/// What [`load_snapshot`] could establish about the `.weft/loomweave/` index. /// /// A *missing* db and a *present-but-unreadable* db are deliberately distinct: /// the missing case nudges toward `install` + `analyze`, but that advice is -/// wrong for a present-but-corrupt/locked db (`install` refuses while `.loomweave/` +/// wrong for a present-but-corrupt/locked db (`install` refuses while `.weft/loomweave/` /// exists; `analyze` cannot repair corruption). See [`print_snapshot`]. enum SnapshotOutcome { /// Either the db file is absent (a `missing_db_snapshot()`) or it opened and @@ -61,14 +61,14 @@ fn resync_skill_if_present(project_root: &Path) { } fn load_snapshot(project_root: &Path) -> SnapshotOutcome { - let db_path = project_root.join(".loomweave").join("loomweave.db"); + let db_path = loomweave_core::store::db_path(project_root); if !db_path.exists() { return SnapshotOutcome::Ready(missing_db_snapshot()); } let conn = match Connection::open_with_flags(&db_path, OpenFlags::SQLITE_OPEN_READ_ONLY) { Ok(conn) => conn, Err(err) => { - tracing::warn!(error = %err, "open .loomweave/loomweave.db read-only failed"); + tracing::warn!(error = %err, "open .weft/loomweave/loomweave.db read-only failed"); return SnapshotOutcome::DbUnreadable; } }; @@ -78,7 +78,7 @@ fn load_snapshot(project_root: &Path) -> SnapshotOutcome { // db is classified as unreadable rather than silently reported as 0 counts // (which would otherwise print the wrong "no analysis yet" nudge). if let Err(err) = conn.query_row("PRAGMA schema_version", [], |row| row.get::<_, i64>(0)) { - tracing::warn!(error = %err, "probe read of .loomweave/loomweave.db failed"); + tracing::warn!(error = %err, "probe read of .weft/loomweave/loomweave.db failed"); return SnapshotOutcome::DbUnreadable; } let root = project_root @@ -111,11 +111,11 @@ fn snapshot_outcome_lines(project_root: &Path, outcome: &SnapshotOutcome) -> Vec let snapshot = match outcome { SnapshotOutcome::Ready(snapshot) => snapshot, SnapshotOutcome::DbUnreadable => { - let db_path = project_root.join(".loomweave").join("loomweave.db"); + let db_path = loomweave_core::store::db_path(project_root); lines.push(format!( "Loomweave: an index exists at {} but could not be opened (it may be \ corrupt, locked by another process, or unreadable). Check permissions, \ - ensure no other loomweave process holds it, or remove .loomweave/ and re-run \ + ensure no other loomweave process holds it, or remove .weft/loomweave/ and re-run \ `loomweave install` + `loomweave analyze`. (Run with RUST_LOG=warn for the \ open error.)", db_path.display() @@ -125,7 +125,7 @@ fn snapshot_outcome_lines(project_root: &Path, outcome: &SnapshotOutcome) -> Vec }; if !snapshot.db_present() { lines.push(format!( - "Loomweave: no index at {}/.loomweave/loomweave.db. \ + "Loomweave: no index at {}/.weft/loomweave/loomweave.db. \ Run `loomweave install --path {}` then `loomweave analyze {}`.", project_root.display(), project_root.display(), diff --git a/crates/loomweave-cli/src/http_read.rs b/crates/loomweave-cli/src/http_read.rs index 3cdf4e7c..ae6bd146 100644 --- a/crates/loomweave-cli/src/http_read.rs +++ b/crates/loomweave-cli/src/http_read.rs @@ -55,7 +55,7 @@ static HTTP_ERROR_DISPATCH: LazyLock = LazyLock::new(|| { tracing::Dispatch::new(subscriber) }); -/// Removes the published `.loomweave/ephemeral.port` on drop — covering +/// Removes the published `.weft/loomweave/ephemeral.port` on drop — covering /// graceful shutdown, error return, and panic-unwind in one place. Only /// SIGKILL can strand a stale file, which the read-side validation and the /// ADR-034 instance-ID guard tolerate (a stale file degrades, never corrupts). @@ -404,7 +404,7 @@ fn run_http_read_server( tracing::warn!( error = %err, port = local_addr.port(), - "failed to publish .loomweave/ephemeral.port; consumers will fall back to configured URL" + "failed to publish .weft/loomweave/ephemeral.port; consumers will fall back to configured URL" ); None } else { @@ -1072,7 +1072,7 @@ mod tests { /// ADR-044: with `bind: None`, two serves on distinct project paths each /// bind their own deterministic port and publish their own - /// `.loomweave/ephemeral.port`. Neither fails to bind. + /// `.weft/loomweave/ephemeral.port`. Neither fails to bind. #[test] fn auto_port_publishes_distinct_ports_per_project() { use loomweave_federation::config::HttpReadConfig; diff --git a/crates/loomweave-cli/src/install.rs b/crates/loomweave-cli/src/install.rs index 4137e44a..d5339392 100644 --- a/crates/loomweave-cli/src/install.rs +++ b/crates/loomweave-cli/src/install.rs @@ -1,16 +1,16 @@ -//! `loomweave install` — initialise .loomweave/ in the target directory. +//! `loomweave install` — initialise .weft/loomweave/ in the target directory. //! //! Creates: -//! - `.loomweave/loomweave.db` (migrated) -//! - `.loomweave/config.json` (internal state stub) -//! - `.loomweave/.gitignore` (UQ-WP1-04 rules; ADR-005) +//! - `.weft/loomweave/loomweave.db` (migrated) +//! - `.weft/loomweave/config.json` (internal state stub) +//! - `.weft/loomweave/.gitignore` (UQ-WP1-04 rules; ADR-005) //! - `/loomweave.yaml` (user-edited config stub at project root; //! see detailed-design.md §File layout) //! //! A bare `loomweave install` (no flags) does everything: init + MCP config + -//! skills + hooks + local Weft integration bindings. If `.loomweave/` already +//! skills + hooks + local Weft integration bindings. If `.weft/loomweave/` already //! exists, init is skipped and the idempotent components are still applied. -//! Pass `--force` to wipe and reinitialise `.loomweave/`. Component flags and +//! Pass `--force` to wipe and reinitialise `.weft/loomweave/`. Component flags and //! `--all` are still accepted for explicit partial installs. use std::fs; @@ -100,7 +100,7 @@ pub enum InstallComponent { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum InstallPlan { /// Component flags without `--all`: apply the named components and do NOT - /// initialise `.loomweave/`. `from_components` only constructs this when at + /// initialise `.weft/loomweave/`. `from_components` only constructs this when at /// least one component is present. Components { claude_code: bool, @@ -110,7 +110,7 @@ pub enum InstallPlan { hooks: bool, instructions: bool, }, - /// No flags or `--all`: initialise `.loomweave/` + every integration. + /// No flags or `--all`: initialise `.weft/loomweave/` + every integration. All, } @@ -135,7 +135,7 @@ impl InstallPlan { } } - /// Whether to initialise `.loomweave/` (the index). True for `All`. + /// Whether to initialise `.weft/loomweave/` (the index). True for `All`. #[must_use] pub fn init_loomweave(self) -> bool { matches!(self, Self::All) @@ -204,7 +204,7 @@ impl InstallPlan { /// /// # Errors /// -/// Returns an error if `.loomweave/` already exists without `--force`, if the +/// Returns an error if `.weft/loomweave/` already exists without `--force`, if the /// target directory cannot be canonicalised, or if any filesystem or database /// operation fails. pub fn run( @@ -285,23 +285,23 @@ fn validate_plan(plan: InstallPlan) -> Result<()> { } fn initialise_project(project_root: &Path, force: bool) -> Result<()> { - let loomweave_dir = project_root.join(".loomweave"); + let loomweave_dir = loomweave_core::store::store_dir(project_root); let exists = loomweave_dir.exists(); - // `All` (including naked install) treats an existing .loomweave/ as + // `All` (including naked install) treats an existing .weft/loomweave/ as // already-initialised and skips re-init, still applying the idempotent - // components. A non-directory .loomweave is not a usable index, so refuse + // components. A non-directory .weft/loomweave is not a usable index, so refuse // rather than "succeed" with skills/hooks atop a project with no loomweave.db. // Component-only installs skip this block. if exists && !force { if !loomweave_dir.is_dir() { bail!( - "found a non-directory at {}; expected an initialised .loomweave/ \ + "found a non-directory at {}; expected an initialised .weft/loomweave/ \ directory. Remove it (or pass --force) and re-run.", loomweave_dir.display() ); } println!( - "{} already initialised; skipping .loomweave/ init (pass --force to recreate).", + "{} already initialised; skipping .weft/loomweave/ init (pass --force to recreate).", loomweave_dir.display() ); return Ok(()); @@ -311,7 +311,7 @@ fn initialise_project(project_root: &Path, force: bool) -> Result<()> { // --force overwrite path. if !loomweave_dir.is_dir() { bail!( - "--force can only overwrite an existing .loomweave/ directory; \ + "--force can only overwrite an existing .weft/loomweave/ directory; \ found non-directory at {}.", loomweave_dir.display() ); @@ -323,7 +323,7 @@ fn initialise_project(project_root: &Path, force: bool) -> Result<()> { fs::create_dir_all(&loomweave_dir) .with_context(|| format!("mkdir {}", loomweave_dir.display()))?; - // Cleanup guard: if any post-mkdir step fails, remove .loomweave/ before + // Cleanup guard: if any post-mkdir step fails, remove .weft/loomweave/ before // bubbling the error so the next install attempt isn't blocked by the // "already exists" check (clarion-ed5017139f). if let Err(err) = populate_after_mkdir(&loomweave_dir, project_root) { @@ -331,7 +331,7 @@ fn initialise_project(project_root: &Path, force: bool) -> Result<()> { tracing::warn!( loomweave_dir = %loomweave_dir.display(), error = %cleanup_err, - "install failed and cleanup of partial .loomweave/ also failed; \ + "install failed and cleanup of partial .weft/loomweave/ also failed; \ manual rm -rf may be required" ); } diff --git a/crates/loomweave-cli/src/instance.rs b/crates/loomweave-cli/src/instance.rs index 235a3fab..7d5630f0 100644 --- a/crates/loomweave-cli/src/instance.rs +++ b/crates/loomweave-cli/src/instance.rs @@ -42,7 +42,7 @@ impl Serialize for InstanceId { } pub fn load_or_create(project_root: &Path) -> Result { - let path = project_root.join(".loomweave").join(INSTANCE_ID_FILE); + let path = loomweave_core::store::store_dir(project_root).join(INSTANCE_ID_FILE); match fs::read_to_string(&path) { Ok(raw) => read_existing_instance_id(&path, &raw), Err(err) if err.kind() == io::ErrorKind::NotFound => create_instance_id(&path), diff --git a/crates/loomweave-cli/src/integration_bindings.rs b/crates/loomweave-cli/src/integration_bindings.rs index dff7187b..be4a5d34 100644 --- a/crates/loomweave-cli/src/integration_bindings.rs +++ b/crates/loomweave-cli/src/integration_bindings.rs @@ -78,7 +78,7 @@ fn desired_bindings(project_root: &Path) -> DesiredBindings { ); // ADR-044: seed the consumer's static target with this project's // deterministic read-API port. serve binds the same port (barring an - // ephemeral fallback), and the published .loomweave/ephemeral.port file + // ephemeral fallback), and the published .weft/loomweave/ephemeral.port file // overrides this at runtime once a consumer resolves consume-time. let port = loomweave_federation::loomweave_port::deterministic_port(project_root); let loomweave_url = format!("http://127.0.0.1:{port}"); @@ -90,8 +90,10 @@ fn desired_bindings(project_root: &Path) -> DesiredBindings { } fn live_filigree_base_url(project_root: &Path) -> Option { - let raw = fs::read_to_string(project_root.join(".filigree/ephemeral.port")).ok()?; - let port: u16 = raw.trim().parse().ok()?; + // ADR-046: read Filigree's live port only from the consolidated + // `.weft/filigree/ephemeral.port` location, via the canonical resolver so the + // single-location policy stays in one place. No `.filigree/` legacy fallback. + let port = loomweave_federation::filigree_url::read_filigree_ephemeral_port(project_root)?; Some(format!("http://127.0.0.1:{port}")) } diff --git a/crates/loomweave-cli/src/secret_scan/baseline.rs b/crates/loomweave-cli/src/secret_scan/baseline.rs index fb03f880..da610b4b 100644 --- a/crates/loomweave-cli/src/secret_scan/baseline.rs +++ b/crates/loomweave-cli/src/secret_scan/baseline.rs @@ -13,7 +13,7 @@ const BASELINE_NO_JUSTIFICATION: &str = "LMWV-INFRA-SECRET-BASELINE-NO-JUSTIFICA const BASELINE_MATCH: &str = "LMWV-INFRA-SECRET-BASELINE-MATCH"; pub(super) fn load_for_scan(project_root: &Path) -> Result<(Baseline, Vec)> { - let path = project_root.join(".loomweave/secrets-baseline.yaml"); + let path = loomweave_core::store::store_dir(project_root).join("secrets-baseline.yaml"); match loomweave_scanner::load_baseline(&path) { Ok(baseline) => Ok((baseline, Vec::new())), Err(BaselineError::MissingJustifications { entries }) => Ok(( diff --git a/crates/loomweave-cli/src/secret_scan/files.rs b/crates/loomweave-cli/src/secret_scan/files.rs index 654f8cbb..ec7e1a84 100644 --- a/crates/loomweave-cli/src/secret_scan/files.rs +++ b/crates/loomweave-cli/src/secret_scan/files.rs @@ -8,7 +8,7 @@ use ignore::{DirEntry, WalkBuilder}; use super::canonical_or_original; const SKIP_DIRS: &[&str] = &[ - ".loomweave", + ".weft", ".git", ".hg", ".svn", @@ -113,7 +113,7 @@ mod tests { write(root.join("nested/service.env"), "TOKEN=four\n"); write(root.join("nested/.env"), "TOKEN=five\n"); write(root.join("nested/not-env.txt"), "TOKEN=six\n"); - write(root.join(".loomweave/.env"), "TOKEN=skip\n"); + write(root.join(".weft/loomweave/.env"), "TOKEN=skip\n"); write(root.join("node_modules/.env"), "TOKEN=skip\n"); let files = collect_scan_files(root, &[root.join("src/app.py")]); @@ -126,7 +126,7 @@ mod tests { assert!(rel.contains(&"nested/.env".to_owned())); assert!(rel.contains(&"src/app.py".to_owned())); assert!(!rel.contains(&"nested/not-env.txt".to_owned())); - assert!(!rel.contains(&".loomweave/.env".to_owned())); + assert!(!rel.contains(&".weft/loomweave/.env".to_owned())); assert!(!rel.contains(&"node_modules/.env".to_owned())); } diff --git a/crates/loomweave-cli/src/serve.rs b/crates/loomweave-cli/src/serve.rs index a617375f..8ff745c5 100644 --- a/crates/loomweave-cli/src/serve.rs +++ b/crates/loomweave-cli/src/serve.rs @@ -18,7 +18,7 @@ use loomweave_federation::filigree::FiligreeHttpClient; use loomweave_storage::{DEFAULT_BATCH_SIZE, DEFAULT_CHANNEL_CAPACITY, ReaderPool, Writer}; pub fn run(path: &Path, config_path: Option<&Path>) -> Result<()> { - let db_path = path.join(".loomweave").join("loomweave.db"); + let db_path = loomweave_core::store::db_path(path); if !db_path.exists() { // No index yet. Rather than exiting 1 — which leaves the MCP client // staring at a server that died at startup with the reason buried in @@ -65,8 +65,8 @@ pub fn run(path: &Path, config_path: Option<&Path>) -> Result<()> { build_embedding_provider(&config.semantic_search, |name| std::env::var(name).ok())?; // Resolve where Filigree actually listens — prefer the live ethereal port - // published in `.filigree/ephemeral.port` over the static configured port - // (which goes stale, the dogfood bug) — then build the client against the + // published in `.weft/filigree/ephemeral.port` over the static configured + // port (which goes stale, the dogfood bug) — then build the client against the // resolved URL so `issues_for` reaches the running dashboard. The same // resolution is surfaced by `project_status`. let filigree_resolution = loomweave_federation::filigree_url::resolve_filigree_url( diff --git a/crates/loomweave-cli/src/skill_pack.rs b/crates/loomweave-cli/src/skill_pack.rs index c17c63ee..205cb591 100644 --- a/crates/loomweave-cli/src/skill_pack.rs +++ b/crates/loomweave-cli/src/skill_pack.rs @@ -176,7 +176,7 @@ fn stage_and_swap(root: &Path, dest: &Path, fingerprint: &str) -> Result<()> { // Cleanup guard: if writing the staged files fails, remove the staging dir // before bubbling the error so we don't leak a `.loomweave-workflow.tmp-*` - // sibling. Matches the partial-state-cleanup precedent on the `.loomweave/` + // sibling. Matches the partial-state-cleanup precedent on the `.weft/loomweave/` // path in install.rs. The original error is preserved. if let Err(err) = write_staged_pack(&staging, fingerprint) { let _ = fs::remove_dir_all(&staging); diff --git a/crates/loomweave-cli/src/wardline_guidance.rs b/crates/loomweave-cli/src/wardline_guidance.rs index 5d4a2762..3418798a 100644 --- a/crates/loomweave-cli/src/wardline_guidance.rs +++ b/crates/loomweave-cli/src/wardline_guidance.rs @@ -371,7 +371,7 @@ fn collect_overlay_paths(dir: &Path, paths: &mut Vec) -> Res if entry.file_type()?.is_dir() { if matches!( file_name.as_ref(), - ".git" | ".loomweave" | ".venv" | "target" | "node_modules" + ".git" | ".weft" | ".venv" | "target" | "node_modules" ) { continue; } diff --git a/crates/loomweave-cli/tests/analyze.rs b/crates/loomweave-cli/tests/analyze.rs index ef1651a8..3f9465f6 100644 --- a/crates/loomweave-cli/tests/analyze.rs +++ b/crates/loomweave-cli/tests/analyze.rs @@ -16,7 +16,7 @@ fn loomweave_bin() -> Command { } fn latest_run_config(project_root: &std::path::Path) -> serde_json::Value { - let conn = Connection::open(project_root.join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_root.join(".weft/loomweave/loomweave.db")).unwrap(); let config_raw: String = conn .query_row( "SELECT config FROM runs ORDER BY started_at DESC LIMIT 1", @@ -28,7 +28,7 @@ fn latest_run_config(project_root: &std::path::Path) -> serde_json::Value { } fn latest_run_stats(project_root: &std::path::Path) -> serde_json::Value { - let conn = Connection::open(project_root.join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_root.join(".weft/loomweave/loomweave.db")).unwrap(); let stats_raw: String = conn .query_row( "SELECT stats FROM runs ORDER BY started_at DESC LIMIT 1", @@ -839,7 +839,7 @@ semantic_search: requests[0] ); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let tag_count: i64 = conn .query_row( "SELECT COUNT(*) FROM entity_tags \ @@ -852,7 +852,7 @@ semantic_search: .expect("query persisted tags"); assert_eq!(tag_count, 1, "plugin-emitted tags must be persisted"); - let sidecar = project_dir.path().join(".loomweave/embeddings.db"); + let sidecar = project_dir.path().join(".weft/loomweave/embeddings.db"); assert!(sidecar.exists(), "analyze should create embeddings sidecar"); let sidecar_conn = Connection::open(sidecar).unwrap(); let embedding_count: i64 = sidecar_conn @@ -893,7 +893,7 @@ fn analyze_without_plugins_writes_skipped_run_row() { .assert() .success(); - let conn = Connection::open(dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let (count, status): (i64, String) = conn .query_row( "SELECT COUNT(*), COALESCE(MAX(status), '') FROM runs", @@ -926,7 +926,7 @@ fn analyze_migrates_a_stale_db_instead_of_failing() { .assert() .success(); - let db = dir.path().join(".loomweave/loomweave.db"); + let db = dir.path().join(".weft/loomweave/loomweave.db"); // Rewind to the pre-0007 (v6) shape: no `analyzed_at_commit`, no v7 ledger // row, user_version back to 6 — exactly an upgraded-binary-vs-old-DB state. { @@ -1082,7 +1082,7 @@ analysis: "stderr should identify invalid clustering algorithm; got: {stderr}" ); - let conn = Connection::open(dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let run_count: i64 = conn .query_row("SELECT COUNT(*) FROM runs", [], |row| row.get(0)) .expect("query run count"); @@ -1096,7 +1096,7 @@ fn analyze_phase3_emits_subsystem_entities_and_edges() { &["auth_a", "auth_b", "billing_a", "billing_b"], &phase3_config(2), ); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let subsystem_count: i64 = conn .query_row( @@ -1138,7 +1138,7 @@ fn analyze_phase3_emits_subsystem_entities_and_edges() { #[test] fn analyze_phase3_is_deterministic_across_two_runs() { fn signature(project_root: &std::path::Path) -> Vec<(String, String)> { - let conn = Connection::open(project_root.join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_root.join(".weft/loomweave/loomweave.db")).unwrap(); conn.prepare("SELECT id, properties FROM entities WHERE kind = 'subsystem' ORDER BY id") .unwrap() .query_map([], |row| Ok((row.get(0)?, row.get(1)?))) @@ -1158,7 +1158,7 @@ fn analyze_phase3_is_deterministic_across_two_runs() { #[test] fn analyze_phase3_skips_empty_graph_with_stats() { let project_dir = run_phase3_fixture(&["solo"], &phase3_config(2)); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let subsystem_count: i64 = conn .query_row( "SELECT COUNT(*) FROM entities WHERE kind = 'subsystem'", @@ -1186,7 +1186,7 @@ fn analyze_phase3_skips_empty_graph_with_stats() { #[test] fn analyze_phase3_emits_weak_modularity_fact_when_below_threshold() { let project_dir = run_phase3_fixture(&["weak_a", "weak_b"], &phase3_config(2)); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let row: (String, String, String, String, String) = conn .query_row( "SELECT rule_id, kind, severity, status, properties \ @@ -1385,7 +1385,8 @@ fn analyze_emits_post_commit_tier_finding_to_filigree_at_project_anchor() { std::env::join_paths(std::iter::once(plugin_dir.path().to_path_buf())).unwrap(); { - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = + Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); // Two subsystems → two tier findings, both anchored to the project root. // auth disagrees (MIXING); billing agrees (UNANIMOUS). They share // (rule-family, path, null line) but carry subsystem-distinct messages — @@ -1464,7 +1465,7 @@ fn analyze_emits_entity_deleted_finding_when_file_removed() { &plugin_path, ); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); // The plugin's `module` entity carries the canonical finding shape. let (kind, severity, status): (String, String, String) = conn .query_row( @@ -1510,7 +1511,7 @@ fn analyze_emits_guidance_orphan_and_invalidates_summary_cache_on_deletion() { phase3_project_for_rerun(&["auth_a", "auth_b", "billing_a", "billing_b"]); let plugin_path = std::env::join_paths(std::iter::once(plugin_dir.path().to_path_buf())).unwrap(); - let db_path = project_dir.path().join(".loomweave/loomweave.db"); + let db_path = project_dir.path().join(".weft/loomweave/loomweave.db"); let target = "phase3fixture:module:billing_a"; // Inject a guidance sheet that `guides` the soon-to-be-deleted entity, plus a @@ -1590,7 +1591,7 @@ fn analyze_emits_guidance_orphan_for_match_rule_entity_and_dedupes() { phase3_project_for_rerun(&["auth_a", "auth_b", "billing_a", "billing_b"]); let plugin_path = std::env::join_paths(std::iter::once(plugin_dir.path().to_path_buf())).unwrap(); - let db_path = project_dir.path().join(".loomweave/loomweave.db"); + let db_path = project_dir.path().join(".weft/loomweave/loomweave.db"); let target = "phase3fixture:module:billing_a"; { @@ -1670,7 +1671,7 @@ fn analyze_emits_guidance_expired_for_past_expiry_only() { let (project_dir, plugin_dir, config_path) = phase3_project_for_rerun(&["auth_a", "auth_b"]); let plugin_path = std::env::join_paths(std::iter::once(plugin_dir.path().to_path_buf())).unwrap(); - let db_path = project_dir.path().join(".loomweave/loomweave.db"); + let db_path = project_dir.path().join(".weft/loomweave/loomweave.db"); { let conn = Connection::open(&db_path).unwrap(); @@ -1730,7 +1731,7 @@ fn analyze_emits_guidance_expired_under_no_sei() { let (project_dir, plugin_dir, config_path) = phase3_project_for_rerun(&["auth_a", "auth_b"]); let plugin_path = std::env::join_paths(std::iter::once(plugin_dir.path().to_path_buf())).unwrap(); - let db_path = project_dir.path().join(".loomweave/loomweave.db"); + let db_path = project_dir.path().join(".weft/loomweave/loomweave.db"); { let conn = Connection::open(&db_path).unwrap(); @@ -1778,7 +1779,7 @@ fn analyze_emits_guidance_churn_stale_with_asymmetric_pinned_threshold() { let (project_dir, plugin_dir, config_path) = phase3_project_for_rerun(&["auth_a", "auth_b"]); let plugin_path = std::env::join_paths(std::iter::once(plugin_dir.path().to_path_buf())).unwrap(); - let db_path = project_dir.path().join(".loomweave/loomweave.db"); + let db_path = project_dir.path().join(".weft/loomweave/loomweave.db"); // Seed git_churn_count on the matched module via properties JSON (the analyze // pipeline does not populate it). A `kind:module` match_rule selects both @@ -1866,7 +1867,7 @@ fn analyze_guidance_churn_stale_is_honest_empty_without_churn() { let (project_dir, plugin_dir, config_path) = phase3_project_for_rerun(&["auth_a", "auth_b"]); let plugin_path = std::env::join_paths(std::iter::once(plugin_dir.path().to_path_buf())).unwrap(); - let db_path = project_dir.path().join(".loomweave/loomweave.db"); + let db_path = project_dir.path().join(".weft/loomweave/loomweave.db"); { let conn = Connection::open(&db_path).unwrap(); @@ -2030,7 +2031,7 @@ fn analyze_generates_pinned_wardline_derived_guidance() { phase3_project_for_rerun(&["auth_a", "auth_b", "billing_a"]); let plugin_path = std::env::join_paths(std::iter::once(plugin_dir.path().to_path_buf())).unwrap(); - let db_path = project_dir.path().join(".loomweave/loomweave.db"); + let db_path = project_dir.path().join(".weft/loomweave/loomweave.db"); write_wardline_manifest(project_dir.path(), "Keep integral code isolated."); run_phase3_analyze( @@ -2094,7 +2095,7 @@ fn analyze_accepts_real_wardline_output_bundle() { let (project_dir, plugin_dir, config_path) = phase3_project_for_rerun(&["seed"]); let plugin_path = std::env::join_paths(std::iter::once(plugin_dir.path().to_path_buf())).unwrap(); - let db_path = project_dir.path().join(".loomweave/loomweave.db"); + let db_path = project_dir.path().join(".weft/loomweave/loomweave.db"); write_real_wardline_output_fixture(project_dir.path()); run_phase3_analyze( @@ -2200,7 +2201,7 @@ fn analyze_preserves_wardline_override_and_emits_guidance_stale() { phase3_project_for_rerun(&["auth_a", "auth_b", "billing_a"]); let plugin_path = std::env::join_paths(std::iter::once(plugin_dir.path().to_path_buf())).unwrap(); - let db_path = project_dir.path().join(".loomweave/loomweave.db"); + let db_path = project_dir.path().join(".weft/loomweave/loomweave.db"); write_wardline_manifest(project_dir.path(), "Initial Wardline guidance."); run_phase3_analyze( project_dir.path(), @@ -2311,7 +2312,7 @@ fn analyze_emits_tier_mixing_and_unanimous_findings() { phase3_project_for_rerun(&["auth_a", "auth_b", "billing_a", "billing_b"]); let plugin_path = std::env::join_paths(std::iter::once(plugin_dir.path().to_path_buf())).unwrap(); - let db_path = project_dir.path().join(".loomweave/loomweave.db"); + let db_path = project_dir.path().join(".weft/loomweave/loomweave.db"); { let conn = Connection::open(&db_path).unwrap(); @@ -2371,7 +2372,7 @@ fn analyze_resolves_function_tier_through_contains_chain_to_subsystem() { let (project_dir, plugin_dir, config_path) = phase3_project_for_rerun(&["auth_a", "auth_b"]); let plugin_path = std::env::join_paths(std::iter::once(plugin_dir.path().to_path_buf())).unwrap(); - let db_path = project_dir.path().join(".loomweave/loomweave.db"); + let db_path = project_dir.path().join(".weft/loomweave/loomweave.db"); let func = "phase3fixture:function:auth_a.handler"; { @@ -2432,7 +2433,7 @@ analysis: weak_modularity_threshold: 0.0 ", ); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let finding_count: i64 = conn .query_row( "SELECT COUNT(*) FROM findings \ @@ -2461,7 +2462,7 @@ fn analyze_phase3_does_not_emit_weak_modularity_fact_when_threshold_is_met() { &["auth_a", "auth_b", "billing_a", "billing_b"], &phase3_config(2), ); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let finding_count: i64 = conn .query_row( "SELECT COUNT(*) FROM findings \ @@ -2492,7 +2493,7 @@ fn analyze_phase3_min_cluster_size_drops_undersized_weighted_components() { &["auth_a", "auth_b", "billing_a", "billing_b"], &phase3_weighted_components_config(3), ); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let subsystem_count: i64 = conn .query_row( "SELECT COUNT(*) FROM entities WHERE kind = 'subsystem'", @@ -2529,7 +2530,7 @@ fn analyze_phase3_persists_weighted_components_algorithm_when_selected() { &["auth_a", "auth_b", "billing_a", "billing_b"], &phase3_weighted_components_config(2), ); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let properties_json: String = conn .query_row( "SELECT properties FROM entities \ @@ -2591,7 +2592,7 @@ fn analyze_stats_reports_ambiguous_edges_total() { .assert() .success(); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let stats_raw: String = conn .query_row("SELECT stats FROM runs LIMIT 1", [], |row| row.get(0)) .expect("query runs.stats"); @@ -2688,7 +2689,7 @@ fn analyze_mints_core_file_entity_for_registry_resolution() { .assert() .success(); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let resolved = loomweave_storage::resolve_file(&conn, project_dir.path(), "demo.call", "") .expect("resolve_file should not error") .expect("analyzed ordinary source file should resolve as a core file entity"); @@ -2734,7 +2735,7 @@ fn analyze_filters_external_import_edges_before_writer_insert() { .assert() .success(); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let import_edges: Vec<(String, String)> = conn .prepare("SELECT from_id, to_id FROM edges WHERE kind = 'imports' ORDER BY from_id, to_id") .unwrap() @@ -2820,7 +2821,7 @@ fn analyze_failrun_exits_nonzero_with_run_row_marked_failed() { // The run row must still be marked `failed` — the FailRun WriterCmd // runs before the bail, so the DB state is consistent with the exit // code. - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let status: String = conn .query_row( "SELECT status FROM runs ORDER BY started_at DESC LIMIT 1", @@ -2875,7 +2876,7 @@ fn analyze_finding_emission_is_best_effort_when_filigree_unreachable() { // run_phase3_fixture already asserted the analyze invocation `.success()`; // confirm the run row landed `completed` despite the emission failure. - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let status: String = conn .query_row( "SELECT status FROM runs ORDER BY started_at DESC LIMIT 1", @@ -3037,7 +3038,8 @@ fn analyze_resume_reuses_run_row_and_emits_mark_unseen_false() { // Capture the fresh run's id, then resume it (POST 2). let run_id: String = { - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = + Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); conn.query_row( "SELECT id FROM runs ORDER BY started_at DESC LIMIT 1", [], @@ -3067,7 +3069,7 @@ fn analyze_resume_reuses_run_row_and_emits_mark_unseen_false() { ); // Resume reused the run row — exactly one row in `runs`, finalized. - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let run_rows: i64 = conn .query_row("SELECT COUNT(*) FROM runs", [], |row| row.get(0)) .unwrap(); @@ -3221,7 +3223,7 @@ fn analyze_prune_unseen_is_best_effort_when_filigree_unreachable() { .assert() .success(); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let run_status: String = conn .query_row( "SELECT status FROM runs ORDER BY started_at DESC LIMIT 1", @@ -3361,7 +3363,7 @@ fn analyze_prune_unseen_is_best_effort_on_non_2xx() { server.join().expect("mock server thread"); // A non-2xx clean-stale response must never fail the run. - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let run_status: String = conn .query_row( "SELECT status FROM runs ORDER BY started_at DESC LIMIT 1", @@ -3443,7 +3445,7 @@ fn analyze_rewrites_prior_index_to_current_run_entity_set() { use std::collections::BTreeSet; fn prior_index_locators(project_root: &std::path::Path) -> BTreeSet { - let conn = Connection::open(project_root.join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_root.join(".weft/loomweave/loomweave.db")).unwrap(); conn.prepare("SELECT locator FROM sei_prior_index") .unwrap() .query_map([], |row| row.get::<_, String>(0)) @@ -3508,7 +3510,7 @@ fn analyze_rewrites_prior_index_to_current_run_entity_set() { // Column contract: body_hash populated (NOT NULL), recorded_at stamped, and // signature still NULL in Wave 0 (the WS1 matcher fills it later). - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let (body_hash, recorded_at, signature): (String, String, Option) = conn .query_row( "SELECT body_hash, recorded_at, signature FROM sei_prior_index WHERE locator = ?1", @@ -3528,7 +3530,7 @@ fn analyze_rewrites_prior_index_to_current_run_entity_set() { fn alive_sei_bindings( project_root: &std::path::Path, ) -> std::collections::BTreeMap { - let conn = Connection::open(project_root.join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_root.join(".weft/loomweave/loomweave.db")).unwrap(); conn.prepare( "SELECT current_locator, sei FROM sei_bindings \ WHERE status = 'alive' AND current_locator IS NOT NULL", @@ -3543,7 +3545,7 @@ fn alive_sei_bindings( } fn all_entity_ids(project_root: &std::path::Path) -> std::collections::BTreeSet { - let conn = Connection::open(project_root.join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_root.join(".weft/loomweave/loomweave.db")).unwrap(); conn.prepare("SELECT id FROM entities") .unwrap() .query_map([], |row| row.get::<_, String>(0)) @@ -3709,7 +3711,7 @@ fn analyze_orphans_deleted_entity_bindings_through_the_real_pipeline() { std::fs::remove_file(project_dir.path().join("sei_drop.p3")).unwrap(); analyze(); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); // The dropped entity's binding is now orphaned (by SEI — its row persists). let dropped_status: String = conn .query_row( @@ -3816,7 +3818,7 @@ fn analyze_stamps_entities_with_git_head_commit() { .assert() .success(); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); for entity_id in ["core:file:demo.p3", "phase3fixture:module:demo"] { let (first_seen, last_seen): (Option, Option) = conn .query_row( @@ -3903,7 +3905,7 @@ fn analyze_incremental_skip_does_not_orphan_unchanged_file_entities() { ); // And the binding's status is literally alive (belt-and-braces: alive_sei_bindings // already filters status='alive', but assert no orphaned lineage was recorded). - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let orphaned_for_stable: i64 = conn .query_row( "SELECT COUNT(*) FROM sei_lineage WHERE sei = ?1 AND event = 'orphaned'", @@ -4138,7 +4140,7 @@ fn analyze_persists_syntax_error_finding_for_unparseable_file() { .assert() .success(); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let (count, anchor): (i64, String) = conn .query_row( "SELECT COUNT(*), COALESCE(MIN(entity_id), '') FROM findings \ @@ -4293,7 +4295,7 @@ fn analyze_persists_crash_finding_anchored_to_project() { .assert() .failure(); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let (count, anchor): (i64, String) = conn .query_row( "SELECT COUNT(*), COALESCE(MIN(entity_id), '') FROM findings \ @@ -4466,7 +4468,7 @@ fn analyze_persists_timeout_finding_for_hanging_plugin() { .assert() .failure(); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let (timeout_count, anchor): (i64, String) = conn .query_row( "SELECT COUNT(*), COALESCE(MIN(entity_id), '') FROM findings \ diff --git a/crates/loomweave-cli/tests/analyze_failure_modes.rs b/crates/loomweave-cli/tests/analyze_failure_modes.rs index 50e09c5c..1a8314a8 100644 --- a/crates/loomweave-cli/tests/analyze_failure_modes.rs +++ b/crates/loomweave-cli/tests/analyze_failure_modes.rs @@ -479,7 +479,7 @@ fn analyze_defers_cross_file_edges_until_target_entity_batch_arrives() { .assert() .success(); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let run_status: String = conn .query_row( "SELECT status FROM runs ORDER BY started_at DESC LIMIT 1", @@ -557,7 +557,7 @@ fn analyze_promotes_run_to_hard_failed_when_writer_actor_fails_mid_run() { "stderr should mention failure; got: {stderr}" ); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); // (1) Run row marked failed. let (run_status, run_stats_raw): (String, String) = conn @@ -659,7 +659,7 @@ fn analyze_persists_completed_file_batches_when_plugin_later_crashes() { .assert() .failure(); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let (run_status, run_stats_raw): (String, String) = conn .query_row( "SELECT status, stats FROM runs ORDER BY started_at DESC LIMIT 1", diff --git a/crates/loomweave-cli/tests/db.rs b/crates/loomweave-cli/tests/db.rs index 34896081..40aff813 100644 --- a/crates/loomweave-cli/tests/db.rs +++ b/crates/loomweave-cli/tests/db.rs @@ -15,10 +15,10 @@ fn loomweave_bin() -> Command { cmd } -/// Seed a real `.loomweave/loomweave.db` under `root` with one identifiable row, +/// Seed a real `.weft/loomweave/loomweave.db` under `root` with one identifiable row, /// left in WAL mode (the state a live analyze leaves behind). fn seed_db(root: &std::path::Path) { - let loomweave_dir = root.join(".loomweave"); + let loomweave_dir = root.join(".weft/loomweave"); std::fs::create_dir_all(&loomweave_dir).expect("mkdir .loomweave"); let db_path = loomweave_dir.join("loomweave.db"); let mut conn = Connection::open(&db_path).expect("open db"); @@ -122,7 +122,7 @@ fn backup_refuses_to_clobber_without_force() { #[test] fn backup_rejects_missing_source_db() { let dir = tempfile::tempdir().unwrap(); - // No seed_db: .loomweave/loomweave.db does not exist. + // No seed_db: .weft/loomweave/loomweave.db does not exist. let output = dir.path().join("snapshot.db"); loomweave_bin() diff --git a/crates/loomweave-cli/tests/doctor.rs b/crates/loomweave-cli/tests/doctor.rs index 92266329..9dc426e6 100644 --- a/crates/loomweave-cli/tests/doctor.rs +++ b/crates/loomweave-cli/tests/doctor.rs @@ -158,7 +158,7 @@ fn doctor_fix_preserves_sibling_mcp_server() { #[test] fn doctor_fix_repairs_missing_three_way_integration_bindings() { let dir = tempfile::tempdir().unwrap(); - let filigree_dir = dir.path().join(".filigree"); + let filigree_dir = dir.path().join(".weft").join("filigree"); fs::create_dir_all(&filigree_dir).unwrap(); fs::write(filigree_dir.join("ephemeral.port"), "8749\n").unwrap(); @@ -270,7 +270,7 @@ fn doctor_json_reports_stable_check_shape_for_healthy_install() { #[test] fn doctor_fix_json_reports_fixed_config_bindings() { let dir = tempfile::tempdir().unwrap(); - let filigree_dir = dir.path().join(".filigree"); + let filigree_dir = dir.path().join(".weft").join("filigree"); fs::create_dir_all(&filigree_dir).unwrap(); fs::write(filigree_dir.join("ephemeral.port"), "8749\n").unwrap(); install( @@ -315,7 +315,7 @@ fn doctor_fix_json_reports_fixed_config_bindings() { #[test] fn doctor_reports_missing_hook_and_mcp_and_prints_index_block() { let dir = tempfile::tempdir().unwrap(); - // Skill flags install ONLY the skill packs (no .loomweave/, no hook, no mcp). + // Skill flags install ONLY the skill packs (no .weft/loomweave/, no hook, no mcp). install(&["install", "--skills", "--codex-skills"], dir.path()); let (code, out) = doctor(dir.path(), false); @@ -566,7 +566,7 @@ fn doctor_reports_published_ephemeral_port() { let dir = tempfile::tempdir().unwrap(); install(&["install", "--all"], dir.path()); // Simulate a live serve having published its port. - let loomweave_dir = dir.path().join(".loomweave"); + let loomweave_dir = dir.path().join(".weft/loomweave"); std::fs::create_dir_all(&loomweave_dir).unwrap(); std::fs::write(loomweave_dir.join("ephemeral.port"), "9876\n").unwrap(); diff --git a/crates/loomweave-cli/tests/guidance.rs b/crates/loomweave-cli/tests/guidance.rs index aa27a044..407632d5 100644 --- a/crates/loomweave-cli/tests/guidance.rs +++ b/crates/loomweave-cli/tests/guidance.rs @@ -1,6 +1,6 @@ //! `loomweave guidance` authoring CLI integration tests (WS6 / REQ-GUIDANCE-03). //! -//! Drives the real binary end-to-end against a seeded `.loomweave/loomweave.db`: +//! Drives the real binary end-to-end against a seeded `.weft/loomweave/loomweave.db`: //! create (via `--content`), show, list (incl. `--for-entity`), edit (via a //! fake `$EDITOR`), and delete. Verifies the written `properties` JSON matches //! the shape the MCP read path consumes. @@ -23,10 +23,10 @@ fn loomweave_bin() -> Command { cmd } -/// Seed a real `.loomweave/loomweave.db` with the schema and one code entity (so +/// Seed a real `.weft/loomweave/loomweave.db` with the schema and one code entity (so /// `--for-entity` has a target to match). fn seed_db(root: &std::path::Path) { - let loomweave_dir = root.join(".loomweave"); + let loomweave_dir = root.join(".weft/loomweave"); std::fs::create_dir_all(&loomweave_dir).expect("mkdir .loomweave"); let db_path = loomweave_dir.join("loomweave.db"); let mut conn = Connection::open(&db_path).expect("open db"); @@ -59,7 +59,7 @@ fn seed_db(root: &std::path::Path) { } fn properties(root: &std::path::Path, id: &str) -> Value { - let db_path = root.join(".loomweave").join("loomweave.db"); + let db_path = root.join(".weft/loomweave").join("loomweave.db"); let conn = Connection::open(&db_path).expect("reopen db"); let raw: String = conn .query_row( @@ -76,7 +76,7 @@ fn properties(root: &std::path::Path, id: &str) -> Value { /// for the `--expired` / `--stale` filter tests). Bypasses the CLI `create` path /// deliberately — these tests exercise `list`, not authoring. fn seed_sheet(root: &std::path::Path, slug: &str, properties: &Value) { - let db_path = root.join(".loomweave").join("loomweave.db"); + let db_path = root.join(".weft/loomweave").join("loomweave.db"); let conn = Connection::open(&db_path).expect("open db for seed_sheet"); let id = format!("core:guidance:{slug}"); conn.execute( @@ -555,7 +555,7 @@ fn create_normalizes_and_validates_expires() { // Proxy the read path: a future expiry must NOT be lexically < now, i.e. the // sheet is not treated as already expired. - let db_path = dir.path().join(".loomweave").join("loomweave.db"); + let db_path = dir.path().join(".weft/loomweave").join("loomweave.db"); let conn = Connection::open(&db_path).unwrap(); let now: String = conn .query_row("SELECT strftime('%Y-%m-%dT%H:%M:%fZ','now')", [], |r| { @@ -737,7 +737,7 @@ fn edit_without_editor_set_fails_cleanly() { /// Seed one `summary_cache` row for the given entity (the column shape /// `analyze` and the cache writer use). fn seed_summary_cache(root: &std::path::Path, entity_id: &str) { - let db_path = root.join(".loomweave").join("loomweave.db"); + let db_path = root.join(".weft/loomweave").join("loomweave.db"); let conn = Connection::open(&db_path).expect("open db"); conn.execute( "INSERT INTO summary_cache \ @@ -752,7 +752,7 @@ fn seed_summary_cache(root: &std::path::Path, entity_id: &str) { } fn summary_cache_count(root: &std::path::Path, entity_id: &str) -> i64 { - let db_path = root.join(".loomweave").join("loomweave.db"); + let db_path = root.join(".weft/loomweave").join("loomweave.db"); let conn = Connection::open(&db_path).expect("open db"); conn.query_row( "SELECT COUNT(*) FROM summary_cache WHERE entity_id = ?1", @@ -942,7 +942,7 @@ fn import_from(root: &std::path::Path, from_dir: &std::path::Path) { /// Fetch a guidance sheet's (name, properties) tuple, or None if absent. fn sheet_fields(root: &std::path::Path, id: &str) -> Option<(String, Value)> { - let db_path = root.join(".loomweave").join("loomweave.db"); + let db_path = root.join(".weft/loomweave").join("loomweave.db"); let conn = Connection::open(&db_path).expect("reopen db"); conn.query_row( "SELECT name, properties FROM entities WHERE id = ?1 AND kind = 'guidance'", @@ -1105,7 +1105,7 @@ fn import_is_idempotent() { assert_eq!(first, second, "re-import is a content no-op"); // Exactly one sheet, not duplicated. - let db_path = dst.path().join(".loomweave").join("loomweave.db"); + let db_path = dst.path().join(".weft/loomweave").join("loomweave.db"); let conn = Connection::open(&db_path).unwrap(); let count: i64 = conn .query_row( @@ -1222,7 +1222,7 @@ fn import_rejects_code_entity_id_and_leaves_entity_intact() { /// Fetch the raw (name, kind, `plugin_id`, properties) tuple for ANY entity (not /// just guidance), or None. fn sheet_props_raw(root: &std::path::Path, id: &str) -> Option<(String, String, String, String)> { - let db_path = root.join(".loomweave").join("loomweave.db"); + let db_path = root.join(".weft/loomweave").join("loomweave.db"); let conn = Connection::open(&db_path).expect("reopen db"); conn.query_row( "SELECT name, kind, plugin_id, properties FROM entities WHERE id = ?1", @@ -1244,7 +1244,7 @@ fn import_invalidates_union_of_old_and_new_matches() { // Seed a `class` entity too, so an OLD `kind:class` rule has a target. { - let db_path = dst.path().join(".loomweave").join("loomweave.db"); + let db_path = dst.path().join(".weft/loomweave").join("loomweave.db"); let conn = Connection::open(&db_path).unwrap(); conn.execute( "INSERT INTO entities (id, plugin_id, kind, name, short_name, properties, \ @@ -1328,7 +1328,7 @@ fn delete_invalidates_guides_edge_target() { // Manually wire a `guides` edge (no authoring path creates one today) and a // cache row on the target. { - let db_path = dir.path().join(".loomweave").join("loomweave.db"); + let db_path = dir.path().join(".weft/loomweave").join("loomweave.db"); let conn = Connection::open(&db_path).unwrap(); conn.execute( "INSERT INTO edges (kind, from_id, to_id, confidence) VALUES \ @@ -1432,7 +1432,7 @@ fn import_rejects_guidance_id_with_path_separator() { fn export_flattens_legacy_guidance_id_path_separators() { let src = tempfile::tempdir().unwrap(); seed_db(src.path()); - let db_path = src.path().join(".loomweave").join("loomweave.db"); + let db_path = src.path().join(".weft/loomweave").join("loomweave.db"); let conn = Connection::open(&db_path).expect("open db"); conn.execute( "INSERT INTO entities (id, plugin_id, kind, name, short_name, properties, \ diff --git a/crates/loomweave-cli/tests/hook.rs b/crates/loomweave-cli/tests/hook.rs index a87c43aa..d7cb1638 100644 --- a/crates/loomweave-cli/tests/hook.rs +++ b/crates/loomweave-cli/tests/hook.rs @@ -16,7 +16,7 @@ fn loomweave_bin() -> Command { #[test] fn hook_session_start_exits_zero_without_loomweave_db() { - // Fail-soft: no .loomweave/ at all must still exit 0 and nudge. + // Fail-soft: no .weft/loomweave/ at all must still exit 0 and nudge. let dir = tempfile::tempdir().unwrap(); let assert = loomweave_bin() .args(["hook", "session-start", "--path"]) @@ -64,10 +64,10 @@ fn hook_session_start_prints_counts_for_installed_project() { #[test] fn hook_session_start_exits_zero_with_corrupt_db() { let dir = tempfile::tempdir().unwrap(); - std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + std::fs::create_dir_all(dir.path().join(".weft/loomweave")).unwrap(); // Garbage where loomweave.db should be — not a valid SQLite file. std::fs::write( - dir.path().join(".loomweave/loomweave.db"), + dir.path().join(".weft/loomweave/loomweave.db"), b"NOT A SQLITE DB", ) .unwrap(); diff --git a/crates/loomweave-cli/tests/install.rs b/crates/loomweave-cli/tests/install.rs index eb5b8dea..f95a8f47 100644 --- a/crates/loomweave-cli/tests/install.rs +++ b/crates/loomweave-cli/tests/install.rs @@ -30,7 +30,7 @@ fn install_creates_loomweave_dir_with_expected_contents() { .assert() .success(); - let loomweave = dir.path().join(".loomweave"); + let loomweave = dir.path().join(".weft/loomweave"); assert!( loomweave.join("loomweave.db").exists(), "loomweave.db missing" @@ -75,7 +75,7 @@ fn install_creates_loomweave_dir_with_expected_contents() { #[test] fn install_all_wires_three_way_integration_bindings() { let dir = tempfile::tempdir().unwrap(); - let filigree_dir = dir.path().join(".filigree"); + let filigree_dir = dir.path().join(".weft").join("filigree"); fs::create_dir_all(&filigree_dir).unwrap(); fs::write(filigree_dir.join("ephemeral.port"), "8749\n").unwrap(); @@ -185,7 +185,7 @@ fn install_applies_each_migration_exactly_once() { .assert() .success(); - let conn = Connection::open(dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let count: i64 = conn .query_row("SELECT COUNT(*) FROM schema_migrations", [], |row| { row.get(0) @@ -212,10 +212,11 @@ fn install_all_rejects_non_directory_loomweave() { // Bug (PR#21 review #6): when `.loomweave` already exists as a regular file // and `--all` (a non-bare init) is run without `--force`, install treated // it as "already initialised" and skipped DB creation, then proceeded to - // install skills/hooks atop a project with no usable `.loomweave/loomweave.db`. + // install skills/hooks atop a project with no usable `.weft/loomweave/loomweave.db`. // It must instead refuse with a clear non-directory error. let dir = tempfile::tempdir().unwrap(); - std::fs::write(dir.path().join(".loomweave"), "i am a file, not a dir").unwrap(); + std::fs::create_dir_all(dir.path().join(".weft")).unwrap(); + std::fs::write(dir.path().join(".weft/loomweave"), "i am a file, not a dir").unwrap(); let out = loomweave_bin() .args(["install", "--all", "--path"]) @@ -233,10 +234,11 @@ fn install_all_rejects_non_directory_loomweave() { #[test] fn install_force_rejects_non_directory_loomweave() { // The --force overwrite path has its own non-directory guard (distinct from - // the --all skip-init guard): it can only remove an existing .loomweave/ + // the --all skip-init guard): it can only remove an existing .weft/loomweave/ // *directory*, never a regular file masquerading as one. let dir = tempfile::tempdir().unwrap(); - std::fs::write(dir.path().join(".loomweave"), "i am a file, not a dir").unwrap(); + std::fs::create_dir_all(dir.path().join(".weft")).unwrap(); + std::fs::write(dir.path().join(".weft/loomweave"), "i am a file, not a dir").unwrap(); let out = loomweave_bin() .args(["install", "--force", "--path"]) @@ -246,7 +248,7 @@ fn install_force_rejects_non_directory_loomweave() { .failure(); let stderr = String::from_utf8(out.get_output().stderr.clone()).unwrap(); assert!( - stderr.contains("can only overwrite an existing .loomweave/ directory"), + stderr.contains("can only overwrite an existing .weft/loomweave/ directory"), "error did not mention the --force non-directory guard: {stderr}" ); } @@ -260,7 +262,7 @@ fn install_skips_loomweave_init_when_dir_already_exists() { .assert() .success(); - // Second bare install must succeed: skip .loomweave/ init but still apply + // Second bare install must succeed: skip .weft/loomweave/ init but still apply // skills/hooks idempotently and report "already initialised". let out = loomweave_bin() .args(["install", "--path"]) @@ -283,7 +285,7 @@ fn install_force_replaces_existing_loomweave_dir_without_overwriting_yaml() { .assert() .success(); - let loomweave = dir.path().join(".loomweave"); + let loomweave = dir.path().join(".weft/loomweave"); fs::write(loomweave.join("stale.tmp"), "stale").unwrap(); fs::write( dir.path().join("loomweave.yaml"), @@ -299,7 +301,7 @@ fn install_force_replaces_existing_loomweave_dir_without_overwriting_yaml() { assert!( !loomweave.join("stale.tmp").exists(), - "--force should remove stale .loomweave/ contents" + "--force should remove stale .weft/loomweave/ contents" ); assert!( loomweave.join("loomweave.db").exists(), @@ -316,14 +318,14 @@ fn install_force_replaces_existing_loomweave_dir_without_overwriting_yaml() { #[cfg(unix)] #[test] fn install_cleans_up_loomweave_dir_when_post_mkdir_step_fails() { - // Bug clarion-ed5017139f: `loomweave install` left .loomweave/ partially + // Bug clarion-ed5017139f: `loomweave install` left .weft/loomweave/ partially // populated on failure, blocking re-install without manual rm -rf. // // Reproducer: pre-create loomweave.yaml as a *broken symlink* whose target // sits under a non-existent parent dir. Install's `yaml_path.exists()` // check follows symlinks → returns false → install attempts `fs::write`, // which follows the symlink → tries to open a path under a non-existent - // dir → ENOENT. By that point .loomweave/ has been mkdir'd and populated; + // dir → ENOENT. By that point .weft/loomweave/ has been mkdir'd and populated; // the bug was leaving it on disk. use std::os::unix::fs::symlink; @@ -341,10 +343,10 @@ fn install_cleans_up_loomweave_dir_when_post_mkdir_step_fails() { .assert() .failure(); - let loomweave = dir.path().join(".loomweave"); + let loomweave = dir.path().join(".weft/loomweave"); assert!( !loomweave.exists(), - ".loomweave/ should have been cleaned up after install failed, \ + ".weft/loomweave/ should have been cleaned up after install failed, \ but it still exists at {}", loomweave.display() ); @@ -385,8 +387,8 @@ fn install_claude_code_writes_mcp_json_without_initialising_loomweave_dir() { .success(); assert!( - !dir.path().join(".loomweave").exists(), - "--claude-code should not create .loomweave/" + !dir.path().join(".weft/loomweave").exists(), + "--claude-code should not create .weft/loomweave/" ); let raw = fs::read_to_string(dir.path().join(".mcp.json")).unwrap(); let parsed: serde_json::Value = serde_json::from_str(&raw).unwrap(); @@ -417,8 +419,8 @@ fn install_codex_writes_requested_config_without_initialising_loomweave_dir() { .success(); assert!( - !dir.path().join(".loomweave").exists(), - "--codex should not create .loomweave/" + !dir.path().join(".weft/loomweave").exists(), + "--codex should not create .weft/loomweave/" ); let raw = fs::read_to_string(&codex_config).unwrap(); assert!( diff --git a/crates/loomweave-cli/tests/sarif.rs b/crates/loomweave-cli/tests/sarif.rs index 8d37de43..bd65da54 100644 --- a/crates/loomweave-cli/tests/sarif.rs +++ b/crates/loomweave-cli/tests/sarif.rs @@ -105,7 +105,7 @@ integrations: fs::write(dir.path().join("loomweave.yaml"), config_content).unwrap(); // Create a dummy .loomweave dir so it passes the project layout checks - fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + fs::create_dir_all(dir.path().join(".weft/loomweave")).unwrap(); // Write a mock SARIF file let sarif_content = r#"{ @@ -217,7 +217,7 @@ integrations: ), ) .unwrap(); - fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + fs::create_dir_all(dir.path().join(".weft/loomweave")).unwrap(); let sarif_content = r#"{ "version": "2.1.0", "runs": [{ diff --git a/crates/loomweave-cli/tests/secret_scan.rs b/crates/loomweave-cli/tests/secret_scan.rs index 6d10eb84..ae0dcc24 100644 --- a/crates/loomweave-cli/tests/secret_scan.rs +++ b/crates/loomweave-cli/tests/secret_scan.rs @@ -145,7 +145,7 @@ fn plugin_path(plugin_dir: &std::path::Path) -> std::ffi::OsString { } fn conn(project: &std::path::Path) -> Connection { - Connection::open(project.join(".loomweave/loomweave.db")).expect("open loomweave db") + Connection::open(project.join(".weft/loomweave/loomweave.db")).expect("open loomweave db") } fn sha1_hex(bytes: &[u8]) -> String { @@ -417,7 +417,7 @@ fn baseline_suppresses_secret_and_emits_audit_match() { .unwrap(); let hashed_secret = sha1_hex(b"AKIAIOSFODNN7EXAMPLE"); std::fs::write( - project.path().join(".loomweave/secrets-baseline.yaml"), + project.path().join(".weft/loomweave/secrets-baseline.yaml"), format!( r#" version: "1.0" @@ -480,7 +480,7 @@ fn missing_baseline_justification_degrades_to_finding() { .unwrap(); let hashed_secret = sha1_hex(b"AKIAIOSFODNN7EXAMPLE"); std::fs::write( - project.path().join(".loomweave/secrets-baseline.yaml"), + project.path().join(".weft/loomweave/secrets-baseline.yaml"), format!( r#" version: "1.0" @@ -672,7 +672,7 @@ fn baseline_suppression_and_override_admission_are_audited_together() { .unwrap(); let hashed_secret = sha1_hex(b"AKIAIOSFODNN7EXAMPLE"); std::fs::write( - project.path().join(".loomweave/secrets-baseline.yaml"), + project.path().join(".weft/loomweave/secrets-baseline.yaml"), format!( r#" version: "1.0" @@ -748,7 +748,7 @@ fn assert_invalid_baseline_aborts(raw_baseline: &str, expected_stderr: &str) { install_project(project.path()); std::fs::write(project.path().join("leaky.sec"), b"nothing to see\n").unwrap(); std::fs::write( - project.path().join(".loomweave/secrets-baseline.yaml"), + project.path().join(".weft/loomweave/secrets-baseline.yaml"), raw_baseline, ) .unwrap(); diff --git a/crates/loomweave-cli/tests/serve.rs b/crates/loomweave-cli/tests/serve.rs index c19c51e0..2f25303d 100644 --- a/crates/loomweave-cli/tests/serve.rs +++ b/crates/loomweave-cli/tests/serve.rs @@ -232,7 +232,7 @@ fn serve_http_responses_match_federation_fixture_contracts() { .assert() .success(); fs::write( - dir.path().join(".loomweave/instance_id"), + dir.path().join(".weft/loomweave/instance_id"), format!("{STABLE_INSTANCE_ID}\n"), ) .expect("seed stable instance ID"); @@ -490,7 +490,7 @@ fn seed_renamed_function_dossier(project_root: &Path) -> String { let new_locator = "python:function:mod.process_v2"; let old_locator = "python:function:mod.process"; let ts = "2026-06-02T00:00:00.000Z"; - let db_path = project_root.join(".loomweave/loomweave.db"); + let db_path = project_root.join(".weft/loomweave/loomweave.db"); let conn = Connection::open(&db_path).expect("open sqlite"); conn.execute( @@ -771,7 +771,7 @@ fn serve_http_files_storage_failure_returns_closed_error_without_raw_detail() { .expect("canonical source path") .display() .to_string(); - let db_path = dir.path().join(".loomweave/loomweave.db"); + let db_path = dir.path().join(".weft/loomweave/loomweave.db"); let conn = Connection::open(&db_path).expect("open sqlite"); conn.execute( "INSERT INTO entities ( @@ -833,7 +833,7 @@ fn serve_http_capabilities_and_mcp_stdio_coexist() { .assert() .success(); fs::write( - dir.path().join(".loomweave/instance_id"), + dir.path().join(".weft/loomweave/instance_id"), format!("{STABLE_INSTANCE_ID}\n"), ) .expect("seed stable instance ID"); @@ -909,7 +909,7 @@ fn serve_http_capabilities_reuses_persisted_instance_id_across_restarts() { .env("PATH", "") .assert() .success(); - let instance_id_path = dir.path().join(".loomweave/instance_id"); + let instance_id_path = dir.path().join(".weft/loomweave/instance_id"); let first_bind = free_loopback_bind(); write_http_config(dir.path(), &first_bind); @@ -961,7 +961,7 @@ fn serve_http_capabilities_returns_new_instance_id_after_rotation() { .env("PATH", "") .assert() .success(); - let instance_id_path = dir.path().join(".loomweave/instance_id"); + let instance_id_path = dir.path().join(".weft/loomweave/instance_id"); let first_bind = free_loopback_bind(); write_http_config(dir.path(), &first_bind); @@ -1023,7 +1023,7 @@ fn serve_http_capabilities_creates_instance_id_with_private_unix_mode() { .expect("HTTP /api/v1/_capabilities response"); stop_serve(&mut child); - let instance_id_path = dir.path().join(".loomweave/instance_id"); + let instance_id_path = dir.path().join(".weft/loomweave/instance_id"); assert_eq!( fs::read_to_string(&instance_id_path) .expect("read persisted instance_id") @@ -1047,7 +1047,7 @@ fn serve_http_capabilities_repairs_existing_instance_id_mode() { .env("PATH", "") .assert() .success(); - let instance_id_path = dir.path().join(".loomweave/instance_id"); + let instance_id_path = dir.path().join(".weft/loomweave/instance_id"); let seeded_id = "9bd7234e-6d44-4a38-9ae4-76f912a10221"; fs::write(&instance_id_path, format!("{seeded_id}\n")).expect("seed instance ID"); fs::set_permissions(&instance_id_path, fs::Permissions::from_mode(0o644)) @@ -1078,8 +1078,11 @@ fn serve_rejects_invalid_instance_id_before_serving_http() { .env("PATH", "") .assert() .success(); - fs::write(dir.path().join(".loomweave/instance_id"), "not-a-uuid\n") - .expect("write invalid instance ID"); + fs::write( + dir.path().join(".weft/loomweave/instance_id"), + "not-a-uuid\n", + ) + .expect("write invalid instance ID"); let bind = free_loopback_bind(); write_http_config(dir.path(), &bind); @@ -1644,7 +1647,7 @@ fn serve_wires_recording_llm_provider_and_writer_for_cached_summary_touches() { .success(); let source_path = dir.path().join("demo.py"); fs::write(&source_path, "def entry():\n return 1\n").expect("write source"); - let db_path = dir.path().join(".loomweave/loomweave.db"); + let db_path = dir.path().join(".weft/loomweave/loomweave.db"); let conn = Connection::open(&db_path).expect("open sqlite"); conn.execute( "INSERT INTO entities ( @@ -1927,7 +1930,7 @@ fn seed_summary_entity(project_root: &Path) { let source_path = project_root.join("demo.py"); fs::write(&source_path, source).expect("write source"); let content_hash = line_range_content_hash(source, 1, 2); - let db_path = project_root.join(".loomweave/loomweave.db"); + let db_path = project_root.join(".weft/loomweave/loomweave.db"); let conn = Connection::open(&db_path).expect("open sqlite"); conn.execute( "INSERT INTO entities ( @@ -2464,7 +2467,7 @@ fn seed_file_entity(project_root: &Path) -> (String, String, String) { .to_string(); let content_hash = "hash-demo-file".to_owned(); let file_id = "core:file:demo.py".to_owned(); - let db_path = project_root.join(".loomweave/loomweave.db"); + let db_path = project_root.join(".weft/loomweave/loomweave.db"); let conn = Connection::open(&db_path).expect("open sqlite"); conn.execute( "INSERT INTO entities ( @@ -2489,7 +2492,7 @@ fn seed_custom_language_file_entity(project_root: &Path) { .expect("canonical source path") .display() .to_string(); - let db_path = project_root.join(".loomweave/loomweave.db"); + let db_path = project_root.join(".weft/loomweave/loomweave.db"); let conn = Connection::open(&db_path).expect("open sqlite"); conn.execute( "INSERT INTO entities ( @@ -2513,7 +2516,7 @@ fn seed_briefing_blocked_file_entity(project_root: &Path) { .expect("canonical blocked path") .display() .to_string(); - let db_path = project_root.join(".loomweave/loomweave.db"); + let db_path = project_root.join(".weft/loomweave/loomweave.db"); let conn = Connection::open(&db_path).expect("open sqlite"); conn.execute( "INSERT INTO entities ( @@ -2540,7 +2543,7 @@ fn seed_storage_failure_file_entity(project_root: &Path) { .expect("canonical source path") .display() .to_string(); - let db_path = project_root.join(".loomweave/loomweave.db"); + let db_path = project_root.join(".weft/loomweave/loomweave.db"); let conn = Connection::open(&db_path).expect("open sqlite"); conn.execute( "INSERT INTO entities ( diff --git a/crates/loomweave-cli/tests/skills.rs b/crates/loomweave-cli/tests/skills.rs index e01fc549..10dca72f 100644 --- a/crates/loomweave-cli/tests/skills.rs +++ b/crates/loomweave-cli/tests/skills.rs @@ -37,10 +37,10 @@ fn install_skills_writes_claude_pack_without_initialising_loomweave_dir() { .exists(), "--skills should not install Codex skills under .agents" ); - // --skills MUST NOT init .loomweave/. + // --skills MUST NOT init .weft/loomweave/. assert!( - !dir.path().join(".loomweave").exists(), - "--skills should not create .loomweave/" + !dir.path().join(".weft/loomweave").exists(), + "--skills should not create .weft/loomweave/" ); } @@ -66,8 +66,8 @@ fn install_codex_skills_writes_agents_pack_without_initialising_loomweave_dir() "Codex skill not installed under .agents" ); assert!( - !dir.path().join(".loomweave").exists(), - "--codex-skills should not create .loomweave/" + !dir.path().join(".weft/loomweave").exists(), + "--codex-skills should not create .weft/loomweave/" ); } @@ -124,7 +124,7 @@ fn install_hooks_merges_session_start_without_clobbering() { cmds.iter() .any(|c| c.contains("loomweave hook session-start")) ); - assert!(!dir.path().join(".loomweave").exists()); + assert!(!dir.path().join(".weft/loomweave").exists()); } #[test] @@ -136,7 +136,10 @@ fn install_all_does_init_skills_and_hooks() { .assert() .success(); - assert!(dir.path().join(".loomweave/loomweave.db").exists(), "no db"); + assert!( + dir.path().join(".weft/loomweave/loomweave.db").exists(), + "no db" + ); assert!( dir.path() .join(".claude/skills/loomweave-workflow/SKILL.md") @@ -170,7 +173,7 @@ fn install_all_is_rerunnable_and_preserves_index() { .arg(dir.path()) .assert() .success(); - let db = dir.path().join(".loomweave/loomweave.db"); + let db = dir.path().join(".weft/loomweave/loomweave.db"); assert!(db.exists(), "first --all did not create db"); // Mark the db so we can prove the second run did NOT recreate it. let before = std::fs::metadata(&db).unwrap().modified().unwrap(); diff --git a/crates/loomweave-cli/tests/wp1_e2e.rs b/crates/loomweave-cli/tests/wp1_e2e.rs index e1e52624..24cd29b5 100644 --- a/crates/loomweave-cli/tests/wp1_e2e.rs +++ b/crates/loomweave-cli/tests/wp1_e2e.rs @@ -38,7 +38,7 @@ fn wp1_walking_skeleton_end_to_end() { .assert() .success(); - let loomweave_dir = dir.path().join(".loomweave"); + let loomweave_dir = dir.path().join(".weft/loomweave"); assert!(loomweave_dir.join("loomweave.db").exists()); assert!(loomweave_dir.join("config.json").exists()); assert!(loomweave_dir.join(".gitignore").exists()); diff --git a/crates/loomweave-cli/tests/wp2_e2e.rs b/crates/loomweave-cli/tests/wp2_e2e.rs index 596fd2a1..659871cd 100644 --- a/crates/loomweave-cli/tests/wp2_e2e.rs +++ b/crates/loomweave-cli/tests/wp2_e2e.rs @@ -3,7 +3,7 @@ //! Proves signoff A.2.8: the full Sprint 1 walking-skeleton pipeline works. //! //! Scenario: -//! 1. `loomweave install` initialises `.loomweave/loomweave.db`. +//! 1. `loomweave install` initialises `.weft/loomweave/loomweave.db`. //! 2. A `loomweave-plugin-fixture` binary is placed on a synthetic `$PATH` //! alongside its `plugin.toml` (neighbour-discovery convention, L9). //! 3. A single source file `demo.mt` is created in the project root. @@ -159,7 +159,7 @@ fn wp2_e2e_smoke_fixture_plugin_round_trip() { // 3. Set up the project directory. let project_dir = TempDir::new().expect("create project tempdir"); - // 4. `loomweave install` to initialise `.loomweave/`. + // 4. `loomweave install` to initialise `.weft/loomweave/`. loomweave_bin() .args(["install", "--path"]) .arg(project_dir.path()) @@ -190,7 +190,7 @@ fn wp2_e2e_smoke_fixture_plugin_round_trip() { .success(); // 8. Verify the database — full round-trip identity assertions. - let db_path = project_dir.path().join(".loomweave/loomweave.db"); + let db_path = project_dir.path().join(".weft/loomweave/loomweave.db"); let conn = Connection::open(&db_path).expect("open db"); // Assert 1 + 2: exactly one run row with status "completed". @@ -302,7 +302,7 @@ fn wp2_rlimit_as_oom_kill_is_reported_as_host_finding() { "OOM finding missing from analyze diagnostics.\nstdout: {stdout}\nstderr: {stderr}" ); - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let (run_status, stats_raw): (String, String) = conn .query_row("SELECT status, stats FROM runs LIMIT 1", [], |row| { Ok((row.get(0)?, row.get(1)?)) @@ -412,7 +412,7 @@ ontology_version = "0.1.0" // 7. Verify the DB: run = 'failed', entity from fixture IS persisted. // `fail_run` writes the reason into stats.failure_reason (JSON). - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let (row_count, run_status, stats_raw): (i64, String, String) = conn .query_row( "SELECT COUNT(*), COALESCE(MAX(status), ''), COALESCE(MAX(stats), '') FROM runs", @@ -590,7 +590,7 @@ ontology_version = "0.1.0" // synthetic `core:project:*` finding anchor (REQ-ANALYZE-06, minted to // hold the persisted crash findings) is excluded — it is not a // plugin-produced entity. - let conn = Connection::open(project_dir.path().join(".loomweave/loomweave.db")).unwrap(); + let conn = Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")).unwrap(); let entity_count: i64 = conn .query_row( "SELECT COUNT(*) FROM entities WHERE NOT (plugin_id = 'core' AND kind = 'project')", diff --git a/crates/loomweave-core/src/lib.rs b/crates/loomweave-core/src/lib.rs index 6f2aae28..40ba7800 100644 --- a/crates/loomweave-core/src/lib.rs +++ b/crates/loomweave-core/src/lib.rs @@ -12,6 +12,7 @@ pub mod errors; pub mod hardened_git; pub mod llm_provider; pub mod plugin; +pub mod store; pub use embedding_provider::{ ApiEmbeddingProvider, ApiEmbeddingProviderConfig, EmbeddingProvider, EmbeddingProviderError, diff --git a/crates/loomweave-core/src/store.rs b/crates/loomweave-core/src/store.rs new file mode 100644 index 00000000..c3772c97 --- /dev/null +++ b/crates/loomweave-core/src/store.rs @@ -0,0 +1,187 @@ +//! Canonical on-disk layout for Loomweave's per-project store. +//! +//! All of Loomweave's machine-written runtime state for a project lives under +//! `/.weft/loomweave/` — the Weft config/store consolidation +//! convention (`.weft//`, a subtree owned exclusively by that member, +//! which never reads or writes a sibling's subtree). This module is the single +//! source of truth for that location: every consumer routes through it, so the +//! path can never drift across the workspace. +//! +//! This **supersedes the legacy `.loomweave/` directory** (ADR-005, amended by +//! ADR-046). The move is a *clean break* — there is no fallback read of the old +//! location. +//! +//! ## Operator override (`weft.toml`) +//! +//! The operator-authored `/weft.toml` may relocate the store via a +//! member-private `[loomweave].store_dir` key (the canonical store-relocation key +//! across the federation). `weft.toml` is **read-only** to Loomweave — install, +//! doctor, and the CLI never write it (Gate `weft-eb3dee402f`: never add a writer +//! to a shared multi-section file). Loomweave reads **only its own +//! `[loomweave]` table**; every other top-level table (a sibling's section) is +//! ignored, so the file stays forward-compatible as siblings add their own keys. +//! +//! Resolution is fail-soft (C-9c, normative): a missing OR malformed `weft.toml` +//! — parse error, wrong type, absent table/key, blank value — is treated as +//! absent, and the built-in default applies. It is never a hard failure. + +use std::path::{Path, PathBuf}; + +use serde::Deserialize; + +/// The shared Weft dotdir under a project root (`.weft/`). Each federation member +/// owns the `/` subtree beneath it; a member never writes another +/// member's subtree, and deleting a sibling's subtree must not break it. +pub const WEFT_DIR: &str = ".weft"; + +/// Loomweave's member subdirectory name under [`WEFT_DIR`]. +pub const MEMBER: &str = "loomweave"; + +/// The operator-authored federation config file, at the project root. +pub const WEFT_TOML: &str = "weft.toml"; + +/// `/.weft/loomweave/` — Loomweave's exclusively-owned store dir. +/// +/// Holds the committed analysis state (`loomweave.db`, `config.json`, +/// `.gitignore`, per-run metadata) and the git-ignored runtime sidecars +/// (`embeddings.db`, `ephemeral.port`, `instance_id`, `*.lock`, WAL files). +/// +/// Honors a `[loomweave].store_dir` override in `weft.toml` when present (a +/// relative override resolves against `project_root`; an absolute one is used +/// verbatim). A missing or malformed `weft.toml` falls back to the built-in +/// default — see the module docs for the fail-soft contract. +#[must_use] +pub fn store_dir(project_root: &Path) -> PathBuf { + match store_dir_override(project_root) { + Some(dir) if dir.is_absolute() => dir, + Some(dir) => project_root.join(dir), + None => project_root.join(WEFT_DIR).join(MEMBER), + } +} + +/// `/.weft/loomweave/loomweave.db` — the structural-graph store. +#[must_use] +pub fn db_path(project_root: &Path) -> PathBuf { + store_dir(project_root).join("loomweave.db") +} + +/// Read the member-private `[loomweave].store_dir` override from `weft.toml`, if +/// any. Returns `None` (fail-soft, never an error) when the file is absent or +/// malformed, the `[loomweave]` table or `store_dir` key is absent, or the value +/// is blank. +fn store_dir_override(project_root: &Path) -> Option { + let raw = std::fs::read_to_string(project_root.join(WEFT_TOML)).ok()?; + // Parse only our own `[loomweave]` table; unknown top-level tables (a + // sibling's section) are ignored by serde's default, so a future `[filigree]` + // never makes this parse reject the file. + let parsed: WeftToml = match toml::from_str(&raw) { + Ok(parsed) => parsed, + Err(err) => { + tracing::debug!( + error = %err, + "weft.toml is malformed; falling back to the default store dir" + ); + return None; + } + }; + let store_dir = parsed.loomweave?.store_dir?; + let trimmed = store_dir.trim(); + if trimmed.is_empty() { + None + } else { + Some(PathBuf::from(trimmed)) + } +} + +/// The subset of `weft.toml` Loomweave reads: only its own member-private table. +/// No `deny_unknown_fields` — sibling tables and forward-compatible keys are +/// deliberately tolerated. +#[derive(Debug, Deserialize)] +struct WeftToml { + loomweave: Option, +} + +#[derive(Debug, Deserialize)] +struct LoomweaveSection { + store_dir: Option, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn defaults_to_weft_loomweave_when_no_weft_toml() { + let dir = tempfile::tempdir().unwrap(); + assert_eq!(store_dir(dir.path()), dir.path().join(".weft/loomweave")); + assert_eq!( + db_path(dir.path()), + dir.path().join(".weft/loomweave/loomweave.db") + ); + } + + #[test] + fn relative_override_resolves_against_project_root() { + let dir = tempfile::tempdir().unwrap(); + std::fs::write( + dir.path().join(WEFT_TOML), + "[loomweave]\nstore_dir = \"custom/store\"\n", + ) + .unwrap(); + assert_eq!(store_dir(dir.path()), dir.path().join("custom/store")); + } + + #[test] + fn absolute_override_is_used_verbatim() { + let dir = tempfile::tempdir().unwrap(); + std::fs::write( + dir.path().join(WEFT_TOML), + "[loomweave]\nstore_dir = \"/var/lib/loomweave\"\n", + ) + .unwrap(); + assert_eq!(store_dir(dir.path()), Path::new("/var/lib/loomweave")); + } + + #[test] + fn sibling_tables_are_ignored() { + // A sibling's section (and unknown keys in ours) must not make our read + // reject the file — forward-compatible by design. + let dir = tempfile::tempdir().unwrap(); + std::fs::write( + dir.path().join(WEFT_TOML), + "[filigree]\nbase_url = \"http://127.0.0.1:8766\"\n\n\ + [loomweave]\nstore_dir = \"s\"\nfuture_key = 42\n", + ) + .unwrap(); + assert_eq!(store_dir(dir.path()), dir.path().join("s")); + } + + #[test] + fn malformed_toml_falls_back_to_default() { + let dir = tempfile::tempdir().unwrap(); + std::fs::write(dir.path().join(WEFT_TOML), "this is not = = toml [[[").unwrap(); + assert_eq!(store_dir(dir.path()), dir.path().join(".weft/loomweave")); + } + + #[test] + fn wrong_type_store_dir_falls_back_to_default() { + let dir = tempfile::tempdir().unwrap(); + std::fs::write(dir.path().join(WEFT_TOML), "[loomweave]\nstore_dir = 123\n").unwrap(); + assert_eq!(store_dir(dir.path()), dir.path().join(".weft/loomweave")); + } + + #[test] + fn absent_table_or_blank_value_falls_back_to_default() { + let dir = tempfile::tempdir().unwrap(); + // No [loomweave] table at all. + std::fs::write(dir.path().join(WEFT_TOML), "[filigree]\nx = 1\n").unwrap(); + assert_eq!(store_dir(dir.path()), dir.path().join(".weft/loomweave")); + // Present table, blank value. + std::fs::write( + dir.path().join(WEFT_TOML), + "[loomweave]\nstore_dir = \" \"\n", + ) + .unwrap(); + assert_eq!(store_dir(dir.path()), dir.path().join(".weft/loomweave")); + } +} diff --git a/crates/loomweave-federation/src/filigree_url.rs b/crates/loomweave-federation/src/filigree_url.rs index f7d208c2..fed47578 100644 --- a/crates/loomweave-federation/src/filigree_url.rs +++ b/crates/loomweave-federation/src/filigree_url.rs @@ -1,11 +1,23 @@ //! Resolve the live Filigree API base URL. //! //! Mirrors Filigree's ethereal endpoint-discovery convention: the dashboard -//! publishes its live port to `/.filigree/ephemeral.port` (a plain +//! publishes its live port to a per-project `ephemeral.port` file (a plain //! integer, written atomically, present only while the dashboard runs) and //! serves the read API on that port. The port is chosen deterministically but //! unpredictably (`8400 + sha256(path) % 1000` with fallback), so it must be -//! *read*, never computed. This mirrors the Filigree sources: +//! *read*, never computed. +//! +//! **Location (Weft store consolidation, ADR-046):** Filigree publishes its +//! runtime state under the shared `.weft//` dotdir, so the port file +//! lives at `/.weft/filigree/ephemeral.port` — the single location this +//! resolver reads. There is **no** fallback to the pre-consolidation +//! `.filigree/` path: after the coordinated cutover every sibling is at `.weft/` +//! by construction, so a port file found only on the legacy path means a +//! mis-sequenced cutover, and resolving it would silently bind to a stale dir +//! (the lacuna-401 failure mode). Instead the resolver folds to the configured +//! URL (`source = "config"`), and the wire-facing `source` label reports that — +//! a loud, visible signal rather than a quiet stale resolve. +//! This mirrors the Filigree sources: //! - `filigree/src/filigree/ephemeral.py::{write,read}_port_file` //! - `filigree/src/filigree/scanner_callback.py::resolve_scanner_api_url_with_source` //! @@ -32,8 +44,9 @@ use crate::config::FiligreeConfig; /// by `project_status` (and, per clarion-318f1254eb, `issues_for`) so an agent /// can tell *where* the URL came from without shelling out to probe ports. pub const SOURCE_DISABLED: &str = "disabled"; -/// The live ethereal port published by Filigree's running dashboard. -pub const SOURCE_EPHEMERAL_PORT: &str = ".filigree/ephemeral.port"; +/// The live ethereal port published by Filigree's running dashboard at the +/// consolidated `.weft/filigree/` location — the only location read (ADR-046). +pub const SOURCE_EPHEMERAL_PORT: &str = ".weft/filigree/ephemeral.port"; /// Loomweave's own configured `integrations.filigree.base_url`. pub const SOURCE_CONFIG: &str = "config"; @@ -53,9 +66,12 @@ pub struct FiligreeUrlResolution { /// Resolve the Filigree read-API base URL, preferring the live ethereal port. /// /// - Disabled → no resolved URL, `source = "disabled"`. -/// - A valid `/.filigree/ephemeral.port` → the configured URL -/// with its port overridden by the live port, `source = ".filigree/ephemeral.port"`. -/// - Otherwise → the configured URL unchanged, `source = "config"`. +/// - A valid `/.weft/filigree/ephemeral.port` → the configured URL +/// with its port overridden by the live port, +/// `source = ".weft/filigree/ephemeral.port"`. +/// - Otherwise → the configured URL unchanged, `source = "config"`. A port file +/// present only at the pre-consolidation `.filigree/` path is **not** read; it +/// folds here, so a mis-sequenced cutover is visible (not a stale resolve). #[must_use] pub fn resolve_filigree_url(config: &FiligreeConfig, project_root: &Path) -> FiligreeUrlResolution { let configured_url = config.base_url.clone(); @@ -68,13 +84,13 @@ pub fn resolve_filigree_url(config: &FiligreeConfig, project_root: &Path) -> Fil }; } match read_ephemeral_port(project_root) { - Some(port) => { + Some((port, source)) => { let resolved = override_port(&configured_url, port); FiligreeUrlResolution { enabled: true, configured_url, resolved_url: Some(resolved), - source: SOURCE_EPHEMERAL_PORT, + source, } } None => FiligreeUrlResolution { @@ -86,14 +102,31 @@ pub fn resolve_filigree_url(config: &FiligreeConfig, project_root: &Path) -> Fil } } -/// Read `/.filigree/ephemeral.port` as a TCP port. +/// Filigree's live published ephemeral port at the consolidated +/// `.weft/filigree/` location. `None` when it does not resolve (fail-soft). Use +/// this instead of reading the port file directly so the canonical-location +/// policy stays in one place. +#[must_use] +pub fn read_filigree_ephemeral_port(project_root: &Path) -> Option { + read_ephemeral_port(project_root).map(|(port, _source)| port) +} + +/// Read Filigree's published ephemeral port from the consolidated +/// `.weft/filigree/ephemeral.port` location (ADR-046). Returns the port and the +/// `SOURCE_EPHEMERAL_PORT` label. /// /// Mirrors Filigree's `read_port_file`: a plain trimmed integer. Any -/// missing/corrupt/out-of-range/zero content folds to `None` (fail-soft). -fn read_ephemeral_port(project_root: &Path) -> Option { - let path = project_root.join(".filigree").join("ephemeral.port"); +/// missing/corrupt/out-of-range/zero content folds to `None` (fail-soft). The +/// pre-consolidation `.filigree/` path is deliberately not consulted — see the +/// module docs. +fn read_ephemeral_port(project_root: &Path) -> Option<(u16, &'static str)> { + let path = project_root + .join(".weft") + .join("filigree") + .join("ephemeral.port"); let raw = std::fs::read_to_string(&path).ok()?; - raw.trim().parse::().ok().filter(|port| *port != 0) + let port = raw.trim().parse::().ok().filter(|port| *port != 0)?; + Some((port, SOURCE_EPHEMERAL_PORT)) } /// Replace the port in a `scheme://host[:port][/path]` URL, preserving the @@ -132,7 +165,13 @@ mod tests { } } - fn write_port_file(root: &Path, contents: &str) { + fn write_weft_port_file(root: &Path, contents: &str) { + let dir = root.join(".weft").join("filigree"); + std::fs::create_dir_all(&dir).unwrap(); + std::fs::write(dir.join("ephemeral.port"), contents).unwrap(); + } + + fn write_legacy_port_file(root: &Path, contents: &str) { let dir = root.join(".filigree"); std::fs::create_dir_all(&dir).unwrap(); std::fs::write(dir.join("ephemeral.port"), contents).unwrap(); @@ -152,9 +191,9 @@ mod tests { #[test] fn live_ephemeral_port_overrides_the_stale_configured_port() { // The dogfood bug: configured 8766 is dead; the live dashboard is on - // 8542 per .filigree/ephemeral.port. + // 8542 per the consolidated .weft/filigree/ephemeral.port. let dir = tempfile::tempdir().unwrap(); - write_port_file(dir.path(), "8542\n"); + write_weft_port_file(dir.path(), "8542\n"); let res = resolve_filigree_url(&enabled_config(), dir.path()); assert!(res.enabled); assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:8542")); @@ -163,6 +202,20 @@ mod tests { assert_eq!(res.configured_url, "http://127.0.0.1:8766"); } + #[test] + fn legacy_filigree_port_is_not_resolved_after_clean_break() { + // ADR-046 clean break: a sibling still on the pre-consolidation + // `.filigree/` path is NOT read. The live legacy port is ignored and the + // resolver folds to the configured URL, so `source == "config"` surfaces + // the mis-sequenced cutover loudly instead of silently binding the stale + // dir (the lacuna-401 wrong-but-quiet-resolve failure mode). + let dir = tempfile::tempdir().unwrap(); + write_legacy_port_file(dir.path(), "8542\n"); + let res = resolve_filigree_url(&enabled_config(), dir.path()); + assert_eq!(res.source, SOURCE_CONFIG); + assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:8766")); + } + #[test] fn falls_back_to_configured_url_when_no_port_file() { let dir = tempfile::tempdir().unwrap(); @@ -175,7 +228,7 @@ mod tests { #[test] fn corrupt_port_file_folds_to_configured_url() { let dir = tempfile::tempdir().unwrap(); - write_port_file(dir.path(), "not-a-port"); + write_weft_port_file(dir.path(), "not-a-port"); let res = resolve_filigree_url(&enabled_config(), dir.path()); assert_eq!(res.source, SOURCE_CONFIG); assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:8766")); @@ -184,7 +237,7 @@ mod tests { #[test] fn zero_port_is_rejected_as_corrupt() { let dir = tempfile::tempdir().unwrap(); - write_port_file(dir.path(), "0"); + write_weft_port_file(dir.path(), "0"); let res = resolve_filigree_url(&enabled_config(), dir.path()); assert_eq!(res.source, SOURCE_CONFIG); } diff --git a/crates/loomweave-federation/src/loomweave_port.rs b/crates/loomweave-federation/src/loomweave_port.rs index dfbb186b..d02399b7 100644 --- a/crates/loomweave-federation/src/loomweave_port.rs +++ b/crates/loomweave-federation/src/loomweave_port.rs @@ -1,9 +1,9 @@ //! Loomweave read-API ephemeral-port contract (ADR-044). //! -//! The twin of Filigree's `.filigree/ephemeral.port` convention, applied to +//! The twin of Filigree's ephemeral-port convention, applied to //! Loomweave's own federation HTTP read API. `serve` binds a per-project //! deterministic port (ephemeral `:0` fallback) and publishes the *actually -//! bound* port to `/.loomweave/ephemeral.port`. Cross-product +//! bound* port to `/.weft/loomweave/ephemeral.port`. Cross-product //! consumers (notably Wardline, which is Python) read this file; nobody //! recomputes a peer's port. The deterministic band here is an implementation //! detail, never part of the file contract. @@ -26,7 +26,7 @@ pub const PORT_BAND_SPAN: u16 = 1000; /// Canonical path of the published port file for a project root. #[must_use] pub fn published_port_path(project_root: &Path) -> PathBuf { - project_root.join(".loomweave").join("ephemeral.port") + loomweave_core::store::store_dir(project_root).join("ephemeral.port") } /// Deterministic-but-unpredictable read-API port for a project, derived from @@ -68,17 +68,17 @@ pub fn read_published_port(project_root: &Path) -> Option { raw.trim().parse::().ok().filter(|port| *port != 0) } -/// Atomically publish `port` to `/.loomweave/ephemeral.port`. +/// Atomically publish `port` to `/.weft/loomweave/ephemeral.port`. /// Writes a temp file in the same directory and `rename(2)`s it into place, so -/// a concurrent reader never observes a torn value. Creates `.loomweave/` if -/// absent. The caller is responsible for the loopback-only invariant (only call -/// this when the bound address is loopback). +/// a concurrent reader never observes a torn value. Creates `.weft/loomweave/` +/// if absent. The caller is responsible for the loopback-only invariant (only +/// call this when the bound address is loopback). /// /// # Errors /// Returns the underlying I/O error if the directory cannot be created or the /// temp file cannot be written/renamed. pub fn publish_port(project_root: &Path, port: u16) -> std::io::Result<()> { - let dir = project_root.join(".loomweave"); + let dir = loomweave_core::store::store_dir(project_root); std::fs::create_dir_all(&dir)?; // One `serve` per process publishes, so the PID makes the temp name unique // within this directory without needing a random suffix. @@ -148,18 +148,18 @@ mod tests { } #[test] - fn publish_creates_loomweave_dir_if_absent() { + fn publish_creates_store_dir_if_absent() { let dir = tempfile::tempdir().unwrap(); - // No .loomweave/ yet. - assert!(!dir.path().join(".loomweave").exists()); - publish_port(dir.path(), 10000).expect("publish creates .loomweave/"); + // No .weft/loomweave/ yet. + assert!(!loomweave_core::store::store_dir(dir.path()).exists()); + publish_port(dir.path(), 10000).expect("publish creates .weft/loomweave/"); assert_eq!(read_published_port(dir.path()), Some(10000)); } #[test] fn read_tolerates_trailing_whitespace_and_newline() { let dir = tempfile::tempdir().unwrap(); - std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + std::fs::create_dir_all(loomweave_core::store::store_dir(dir.path())).unwrap(); std::fs::write(published_port_path(dir.path()), " 9500 \n").unwrap(); assert_eq!(read_published_port(dir.path()), Some(9500)); } @@ -167,7 +167,7 @@ mod tests { #[test] fn read_rejects_malformed_zero_and_out_of_range() { let dir = tempfile::tempdir().unwrap(); - std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + std::fs::create_dir_all(loomweave_core::store::store_dir(dir.path())).unwrap(); for bad in ["", "not-a-port", "0", "65536", "70000", "-1", "12.5"] { std::fs::write(published_port_path(dir.path()), bad).unwrap(); assert_eq!( diff --git a/crates/loomweave-federation/src/loomweave_url.rs b/crates/loomweave-federation/src/loomweave_url.rs index 38bb6f95..85dd9cd8 100644 --- a/crates/loomweave-federation/src/loomweave_url.rs +++ b/crates/loomweave-federation/src/loomweave_url.rs @@ -1,6 +1,6 @@ //! Resolve the live Loomweave read-API base URL (ADR-044). //! -//! The reference reader of the `.loomweave/ephemeral.port` file contract and +//! The reference reader of the `.weft/loomweave/ephemeral.port` file contract and //! the twin of [`crate::filigree_url`]. Precedence (consumer-side): the //! published live port wins over a configured URL, which wins over nothing. //! (ADR-044's higher "explicit flag/env" precedence level is realized by each @@ -12,8 +12,8 @@ use std::path::Path; use crate::loomweave_port::read_published_port; -/// The live published port file `.loomweave/ephemeral.port`. -pub const SOURCE_EPHEMERAL_PORT: &str = ".loomweave/ephemeral.port"; +/// The live published port file `.weft/loomweave/ephemeral.port`. +pub const SOURCE_EPHEMERAL_PORT: &str = ".weft/loomweave/ephemeral.port"; /// A statically configured URL (e.g. `wardline.yaml: loomweave.url`). pub const SOURCE_CONFIG: &str = "config"; /// Neither a published file nor a configured URL — federation is absent. @@ -79,12 +79,9 @@ mod tests { #[test] fn corrupt_file_folds_to_configured_url() { let dir = tempfile::tempdir().unwrap(); - std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); - std::fs::write( - dir.path().join(".loomweave").join("ephemeral.port"), - "not-a-port", - ) - .unwrap(); + let store = loomweave_core::store::store_dir(dir.path()); + std::fs::create_dir_all(&store).unwrap(); + std::fs::write(store.join("ephemeral.port"), "not-a-port").unwrap(); let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); assert_eq!(res.source, SOURCE_CONFIG); } diff --git a/crates/loomweave-mcp/assets/skills/loomweave-workflow/SKILL.md b/crates/loomweave-mcp/assets/skills/loomweave-workflow/SKILL.md index fd7ab55c..5b8e4d80 100644 --- a/crates/loomweave-mcp/assets/skills/loomweave-workflow/SKILL.md +++ b/crates/loomweave-mcp/assets/skills/loomweave-workflow/SKILL.md @@ -26,7 +26,7 @@ calls this?" without reading a single file. - You need a function's neighborhood, execution paths, or which subsystem it belongs to. **Not for:** editing code, reading exact implementation bodies (use `summary` or -read the file once you have its path), or codebases with no `.loomweave/` index. +read the file once you have its path), or codebases with no `.weft/loomweave/` index. ## Entity IDs — the model @@ -161,7 +161,7 @@ honest-empty unless a plugin emits those tags. Likewise `high_churn` and `search_semantic` is also in the catalogue. It is opt-in under `semantic_search:`; when enabled, `loomweave analyze` populates the git-ignored -`.loomweave/embeddings.db` sidecar and the query path filters stale vectors by +`.weft/loomweave/embeddings.db` sidecar and the query path filters stale vectors by content hash. > Not in this catalogue: `emit_observation` as a general-purpose write surface. @@ -202,7 +202,7 @@ and are composed into `summary` prompts with a real guidance fingerprint. ## Launch -`loomweave serve --path ` where `` contains `.loomweave/loomweave.db` +`loomweave serve --path ` where `` contains `.weft/loomweave/loomweave.db` (built by `loomweave analyze `). In an MCP client the tools appear as `mcp__loomweave__find_entity`, etc. diff --git a/crates/loomweave-mcp/src/analyze_runs.rs b/crates/loomweave-mcp/src/analyze_runs.rs index c9616ae7..70f358a5 100644 --- a/crates/loomweave-mcp/src/analyze_runs.rs +++ b/crates/loomweave-mcp/src/analyze_runs.rs @@ -120,7 +120,7 @@ pub(crate) fn kill_run(handle: &mut RunHandle) { /// Best-effort delete a finished run's progress file as its handle is evicted /// from the registry. A missing file is success — a run may exit before writing -/// one. Keeps `.loomweave/runs/*.progress.json` from accumulating across a +/// one. Keeps `.weft/loomweave/runs/*.progress.json` from accumulating across a /// long-lived `loomweave serve` (clarion-7e0c21558a). pub(crate) fn reap_progress_file(path: &std::path::Path) { match std::fs::remove_file(path) { diff --git a/crates/loomweave-mcp/src/catalogue/semantic.rs b/crates/loomweave-mcp/src/catalogue/semantic.rs index a884271a..fa16996b 100644 --- a/crates/loomweave-mcp/src/catalogue/semantic.rs +++ b/crates/loomweave-mcp/src/catalogue/semantic.rs @@ -4,7 +4,7 @@ //! or no provider is configured, the tool returns an explicit "not enabled" //! result — never a faked or empty-as-if-complete answer. When enabled it embeds //! the query, runs a **bounded exact cosine scan** over the git-ignored sidecar -//! (`.loomweave/embeddings.db`), and returns ranked, SEI-carrying entities. Only +//! (`.weft/loomweave/embeddings.db`), and returns ranked, SEI-carrying entities. Only //! embeddings whose `content_hash` matches the entity's current hash are //! considered, so stale vectors never surface (freshness, like the summary //! cache). diff --git a/crates/loomweave-mcp/src/lib.rs b/crates/loomweave-mcp/src/lib.rs index 854c3860..6a7ced3a 100644 --- a/crates/loomweave-mcp/src/lib.rs +++ b/crates/loomweave-mcp/src/lib.rs @@ -659,7 +659,7 @@ pub fn list_tools() -> Vec { }, ToolDefinition { name: "entity_semantic_search_list", - description: "Rank entities by semantic (embedding cosine) similarity to a `query` string, within an optional `scope`. OPT-IN: semantic search is OFF by default; when disabled or no embedding provider is configured the tool returns result_kind=`not_enabled` with a missing-signal note (never a faked or empty-as-complete result). When enabled it embeds the query and runs a bounded exact cosine scan over the git-ignored `.loomweave/embeddings.db` sidecar (built at analyze time), considering only embeddings whose content_hash matches the entity's current hash (stale vectors never surface). Bounded (limit default 20, max 100; page.total/truncated). Each result carries its `sei` and a `score`.", + description: "Rank entities by semantic (embedding cosine) similarity to a `query` string, within an optional `scope`. OPT-IN: semantic search is OFF by default; when disabled or no embedding provider is configured the tool returns result_kind=`not_enabled` with a missing-signal note (never a faked or empty-as-complete result). When enabled it embeds the query and runs a bounded exact cosine scan over the git-ignored `.weft/loomweave/embeddings.db` sidecar (built at analyze time), considering only embeddings whose content_hash matches the entity's current hash (stale vectors never surface). Bounded (limit default 20, max 100; page.total/truncated). Each result carries its `sei` and a `score`.", input_schema: json!({ "type": "object", "properties": { @@ -824,7 +824,7 @@ fn no_index_message(project_root: &Path) -> String { let root = project_root.display(); format!( "Loomweave has no index for this project yet \ -({root}/.loomweave/loomweave.db is missing), so the structural graph has not been \ +({root}/.weft/loomweave/loomweave.db is missing), so the structural graph has not been \ built and every Loomweave tool is unavailable. Run `loomweave install --path {root}` \ then `loomweave analyze {root}` in a terminal to extract the entity / edge graph, \ then reconnect this MCP server." @@ -861,7 +861,7 @@ fn initialize_result_no_index(project_root: &Path) -> Value { } /// JSON-RPC dispatch for the degraded "no index" stdio server: the project has -/// no `.loomweave/loomweave.db`, so there is no graph to query. `initialize` +/// no `.weft/loomweave/loomweave.db`, so there is no graph to query. `initialize` /// succeeds (the client connects cleanly rather than seeing the server die) and /// `tools/call` returns the actionable chirp as a tool result with /// `isError: true` — the load-bearing channel, since not every client surfaces @@ -902,7 +902,7 @@ pub fn handle_json_rpc_no_index(request: &Value, project_root: &Path) -> Option< /// Serve a degraded MCP stdio session for a project with no index. Mirrors /// [`serve_stdio`] (synchronous — there are no storage-backed async tools to /// drive) but routes every request through [`handle_json_rpc_no_index`]. Used by -/// `loomweave serve` when `.loomweave/loomweave.db` is absent, so the client +/// `loomweave serve` when `.weft/loomweave/loomweave.db` is absent, so the client /// connects and is told to run analyze rather than watching the server exit. pub fn serve_stdio_no_index( project_root: &Path, @@ -1627,7 +1627,7 @@ impl ServerState { } }; - let db_path = self.project_root.join(".loomweave").join("loomweave.db"); + let db_path = loomweave_core::store::db_path(&self.project_root); let project_root = self.project_root.clone(); let sheet_id = promoted.id.clone(); let write_result = @@ -3325,7 +3325,7 @@ fn summary_briefing_blocked(entity_json: &Value, reason: &str) -> Value { let remediation = if reason == "unscanned_source" { "Entity source file was not covered by the pre-ingest secret scan. Re-run with scanner coverage for that path or fix the plugin source path before requesting a summary." } else { - "File flagged by pre-ingest secret scan. Fix the secret or whitelist via .loomweave/secrets-baseline.yaml. See ADR-013." + "File flagged by pre-ingest secret scan. Fix the secret or whitelist via .weft/loomweave/secrets-baseline.yaml. See ADR-013." }; let entity_id = entity_json .get("id") diff --git a/crates/loomweave-mcp/src/snapshot.rs b/crates/loomweave-mcp/src/snapshot.rs index 530c741a..52138095 100644 --- a/crates/loomweave-mcp/src/snapshot.rs +++ b/crates/loomweave-mcp/src/snapshot.rs @@ -15,7 +15,7 @@ use std::time::SystemTime; use rusqlite::Connection; use serde::Serialize; -/// Freshness of the `.loomweave/` index relative to the source files Loomweave +/// Freshness of the `.weft/loomweave/` index relative to the source files Loomweave /// ingested. See the plan's Decision Point (b) for the algorithm. /// /// Freshness combines two passes over the files recorded in @@ -31,7 +31,7 @@ use serde::Serialize; /// when no tracked source actually changed. The watch set is the *direct /// parents* of ingested files, so an addition/removal in any directory that /// is not such a parent goes undetected — always including the project root -/// itself, which is deliberately never watched (`analyze` writes `.loomweave/` +/// itself, which is deliberately never watched (`analyze` writes `.weft/loomweave/` /// under it, which would otherwise wedge every check to a permanent Stale). /// 2. **In-place modification** — an ingested file edited since the run. This /// needs one `stat` per file and is bounded by `MAX_MODIFICATION_STAT_FILES` @@ -154,7 +154,7 @@ pub struct ProjectSnapshot { } impl ProjectSnapshot { - /// Whether a readable `.loomweave/loomweave.db` was found. When `false`, every + /// Whether a readable `.weft/loomweave/loomweave.db` was found. When `false`, every /// count is `0` and `staleness` is [`Staleness::NeverAnalyzed`]. #[must_use] pub fn db_present(&self) -> bool { @@ -475,7 +475,7 @@ fn compute_staleness( /// Resolve every distinct ingested `source_file_path` to an absolute path, and /// collect the distinct parent directories to watch for structural drift. The /// project root itself is deliberately excluded from the watch set: `analyze` -/// writes `.loomweave/loomweave.db` under it, so the root's mtime is always newer +/// writes `.weft/loomweave/loomweave.db` under it, so the root's mtime is always newer /// than the run and would wedge every check to a permanent false [`Stale`] /// (the footgun the type-level note records). Returns `None` only on a /// query/prepare failure, having set `*degraded`. diff --git a/crates/loomweave-mcp/src/tools/analyze.rs b/crates/loomweave-mcp/src/tools/analyze.rs index 180d2312..c20a02fe 100644 --- a/crates/loomweave-mcp/src/tools/analyze.rs +++ b/crates/loomweave-mcp/src/tools/analyze.rs @@ -40,7 +40,7 @@ impl ServerState { }; let run_id = uuid::Uuid::new_v4().to_string(); - let runs_dir = self.project_root.join(".loomweave").join("runs"); + let runs_dir = loomweave_core::store::store_dir(&self.project_root).join("runs"); if let Err(err) = std::fs::create_dir_all(&runs_dir) { return Ok(tool_error_envelope( McpErrorCode::IoError, @@ -218,7 +218,7 @@ impl ServerState { match outcome { CancelOutcome::Cancelled => { - let db_path = self.project_root.join(".loomweave").join("loomweave.db"); + let db_path = loomweave_core::store::db_path(&self.project_root); crate::analyze_runs::mark_run_cancelled_in_db(&db_path, &run_id, &now); Ok(success_envelope(json!({ "run_id": run_id, diff --git a/crates/loomweave-mcp/src/tools/graph.rs b/crates/loomweave-mcp/src/tools/graph.rs index ebbd8425..c5a4f34b 100644 --- a/crates/loomweave-mcp/src/tools/graph.rs +++ b/crates/loomweave-mcp/src/tools/graph.rs @@ -406,7 +406,7 @@ impl ServerState { // Surface the same configured-vs-resolved Filigree endpoint block that // `project_status` reports, so an agent can see WHICH endpoint a result // came from (e.g. an ethereal port resolved from - // `.filigree/ephemeral.port`) instead of curling ports by hand. Null on + // `.weft/filigree/ephemeral.port`) instead of curling ports by hand. Null on // storage-only servers built without a diagnostics context. let endpoint = self.filigree_diagnostics_json(); let Some(client) = self.filigree_client.clone() else { diff --git a/crates/loomweave-mcp/src/tools/status.rs b/crates/loomweave-mcp/src/tools/status.rs index 3d85886b..f2256bfd 100644 --- a/crates/loomweave-mcp/src/tools/status.rs +++ b/crates/loomweave-mcp/src/tools/status.rs @@ -190,7 +190,7 @@ impl ServerState { &self, _arguments: &serde_json::Map, ) -> std::result::Result { - let db_path = self.project_root.join(".loomweave").join("loomweave.db"); + let db_path = loomweave_core::store::db_path(&self.project_root); let root_display = self.project_root.display().to_string(); let project_root = self.project_root.clone(); @@ -413,7 +413,7 @@ impl ServerState { } /// ADR-044: report the live read-API endpoint resolved from - /// `.loomweave/ephemeral.port` (the reference reader; `doctor` reports the + /// `.weft/loomweave/ephemeral.port` (the reference reader; `doctor` reports the /// same). Pass `None` config — `project_status` has no static loomweave URL /// of its own; this surfaces whether serve is currently publishing. pub(crate) fn loomweave_read_api_json(&self) -> Value { diff --git a/crates/loomweave-mcp/tests/analyze_lifecycle.rs b/crates/loomweave-mcp/tests/analyze_lifecycle.rs index 5149accb..6ce298a8 100644 --- a/crates/loomweave-mcp/tests/analyze_lifecycle.rs +++ b/crates/loomweave-mcp/tests/analyze_lifecycle.rs @@ -20,8 +20,8 @@ use serde_json::{Value, json}; fn open_project() -> (tempfile::TempDir, PathBuf) { let project = tempfile::tempdir().expect("temp project"); - let loomweave_dir = project.path().join(".loomweave"); - std::fs::create_dir(&loomweave_dir).expect("create .loomweave"); + let loomweave_dir = project.path().join(".weft/loomweave"); + std::fs::create_dir_all(&loomweave_dir).expect("create .loomweave"); let db_path = loomweave_dir.join("loomweave.db"); let mut conn = Connection::open(&db_path).expect("open sqlite"); pragma::apply_write_pragmas(&conn).expect("write pragmas"); diff --git a/crates/loomweave-mcp/tests/catalogue_tools.rs b/crates/loomweave-mcp/tests/catalogue_tools.rs index cb0dc95a..aeb4a1a2 100644 --- a/crates/loomweave-mcp/tests/catalogue_tools.rs +++ b/crates/loomweave-mcp/tests/catalogue_tools.rs @@ -14,8 +14,8 @@ use serde_json::{Value, json}; fn open_project() -> (tempfile::TempDir, std::path::PathBuf, Connection) { let project = tempfile::tempdir().expect("temp project"); - let loomweave_dir = project.path().join(".loomweave"); - std::fs::create_dir(&loomweave_dir).expect("create .loomweave"); + let loomweave_dir = project.path().join(".weft/loomweave"); + std::fs::create_dir_all(&loomweave_dir).expect("create .loomweave"); let db_path = loomweave_dir.join("loomweave.db"); let mut conn = Connection::open(&db_path).expect("open sqlite"); pragma::apply_write_pragmas(&conn).expect("write pragmas"); @@ -1434,7 +1434,7 @@ async fn search_semantic_ranks_by_cosine_similarity() { drop(conn); let now = "2026-01-01T00:00:00.000Z"; - let store = EmbeddingStore::open_in_loomweave_dir(project.path()).expect("open sidecar"); + let store = EmbeddingStore::open_in_store_dir(project.path()).expect("open sidecar"); let mk = |id: &str, hash: &str| EmbeddingKey { entity_id: id.to_owned(), content_hash: hash.to_owned(), diff --git a/crates/loomweave-mcp/tests/storage_tools.rs b/crates/loomweave-mcp/tests/storage_tools.rs index ea4fecd1..563cc089 100644 --- a/crates/loomweave-mcp/tests/storage_tools.rs +++ b/crates/loomweave-mcp/tests/storage_tools.rs @@ -38,8 +38,8 @@ use serde_json::{Value, json}; fn open_project() -> (tempfile::TempDir, std::path::PathBuf) { let project = tempfile::tempdir().expect("temp project"); - let loomweave_dir = project.path().join(".loomweave"); - std::fs::create_dir(&loomweave_dir).expect("create .loomweave"); + let loomweave_dir = project.path().join(".weft/loomweave"); + std::fs::create_dir_all(&loomweave_dir).expect("create .loomweave"); let db_path = loomweave_dir.join("loomweave.db"); let mut conn = Connection::open(&db_path).expect("open sqlite"); pragma::apply_write_pragmas(&conn).expect("write pragmas"); @@ -1198,7 +1198,7 @@ async fn issues_for_reports_resolved_endpoint_and_result_kind() { // endpoint, and distinguishes reachable-but-empty (no_matches) from a // populated result (matched) — without the agent curling ports by hand. let (project, db_path) = open_project(); - let filigree_dir = project.path().join(".filigree"); + let filigree_dir = project.path().join(".weft").join("filigree"); fs::create_dir_all(&filigree_dir).unwrap(); fs::write(filigree_dir.join("ephemeral.port"), "8542").unwrap(); let config = FiligreeConfig { @@ -4865,7 +4865,7 @@ async fn project_status_reports_counts_latest_run_and_plugins() { result["db_path"] .as_str() .unwrap() - .ends_with(".loomweave/loomweave.db") + .ends_with(".weft/loomweave/loomweave.db") ); assert_eq!(result["git_sha"], "abc123status"); // A bare ServerState carries no diagnostics context. @@ -5091,10 +5091,10 @@ async fn project_status_skipped_run_keeps_prior_completed_index_visible() { #[tokio::test] async fn project_status_resolves_live_filigree_endpoint() { - // AC#3: the live ethereal port (.filigree/ephemeral.port) is reported as + // AC#3: the live ethereal port (.weft/filigree/ephemeral.port) is reported as // the resolution source, overriding the stale configured port. let (project, db_path) = open_project(); - let filigree_dir = project.path().join(".filigree"); + let filigree_dir = project.path().join(".weft").join("filigree"); fs::create_dir_all(&filigree_dir).unwrap(); fs::write(filigree_dir.join("ephemeral.port"), "8542").unwrap(); @@ -5155,7 +5155,7 @@ async fn project_status_filigree_falls_back_to_config_without_port_file() { #[tokio::test] async fn project_status_reports_loomweave_read_api_published_port() { // ADR-044: project_status surfaces the live read-API endpoint resolved from - // .loomweave/ephemeral.port (the second in-repo consumer of the resolver, + // .weft/loomweave/ephemeral.port (the second in-repo consumer of the resolver, // alongside doctor). No diagnostics context is needed — it resolves the // file at query time from the project root. let (project, db_path) = open_project(); diff --git a/crates/loomweave-scanner/tests/scanner.rs b/crates/loomweave-scanner/tests/scanner.rs index 492ec626..d1907310 100644 --- a/crates/loomweave-scanner/tests/scanner.rs +++ b/crates/loomweave-scanner/tests/scanner.rs @@ -477,7 +477,7 @@ results: #[test] fn absent_baseline_file_is_empty() { let dir = tempfile::tempdir().expect("tempdir"); - let baseline = load_baseline(&dir.path().join(".loomweave/secrets-baseline.yaml")) + let baseline = load_baseline(&dir.path().join(".weft/loomweave/secrets-baseline.yaml")) .expect("missing baseline is accepted"); assert!(baseline.entries().is_empty()); } diff --git a/crates/loomweave-storage/src/embeddings.rs b/crates/loomweave-storage/src/embeddings.rs index f266bdbb..b83c6fbc 100644 --- a/crates/loomweave-storage/src/embeddings.rs +++ b/crates/loomweave-storage/src/embeddings.rs @@ -1,8 +1,8 @@ -//! Git-ignored embeddings sidecar (`.loomweave/embeddings.db`) for `WS5b` semantic -//! search (ADR-040). +//! Git-ignored embeddings sidecar (`.weft/loomweave/embeddings.db`) for `WS5b` +//! semantic search (ADR-040). //! //! Embeddings are large and rebuildable, so they must **not** bloat the -//! committed `.loomweave/loomweave.db` (ADR-005). They live in a separate `SQLite` +//! committed `.weft/loomweave/loomweave.db` (ADR-005). They live in a separate `SQLite` //! file, keyed by `(entity_id, content_hash, model_id)` so they invalidate on //! content change exactly like the summary cache. Because the file is a private, //! rebuildable cache (git-ignored), it carries its own self-contained schema @@ -51,10 +51,10 @@ pub struct EmbeddingStore { conn: Connection, } -/// The conventional sidecar path for a project: `/.loomweave/embeddings.db`. +/// The conventional sidecar path for a project: `/.weft/loomweave/embeddings.db`. #[must_use] pub fn embeddings_db_path(project_root: &Path) -> PathBuf { - project_root.join(".loomweave").join("embeddings.db") + loomweave_core::store::store_dir(project_root).join("embeddings.db") } impl EmbeddingStore { @@ -67,8 +67,8 @@ impl EmbeddingStore { Ok(Self { conn }) } - /// Open the conventional `/.loomweave/embeddings.db` sidecar. - pub fn open_in_loomweave_dir(project_root: &Path) -> Result { + /// Open the conventional `/.weft/loomweave/embeddings.db` sidecar. + pub fn open_in_store_dir(project_root: &Path) -> Result { Self::open(&embeddings_db_path(project_root)) } diff --git a/crates/loomweave-storage/src/pragma.rs b/crates/loomweave-storage/src/pragma.rs index 2e3816f0..c58b04c0 100644 --- a/crates/loomweave-storage/src/pragma.rs +++ b/crates/loomweave-storage/src/pragma.rs @@ -7,7 +7,7 @@ //! `busy_timeout=5000` + `wal_autocheckpoint=1000` + `foreign_keys=ON`). //! The `application_id=0x4C4D5756` ("LMWV") and `user_version` PRAGMAs //! close gap STO-02 from `docs/implementation/v1.0-tag-cut/gap-register.md`: -//! they give `.loomweave/loomweave.db` a self-identifying on-disk header so +//! they give `.weft/loomweave/loomweave.db` a self-identifying on-disk header so //! `file(1)` / `sqlite3 .dbinfo` / a future migration runner can refuse //! foreign or forward-incompatible files. //! - **Override surface**: **recompile-only.** None of these PRAGMAs are diff --git a/crates/loomweave-storage/src/prior_index.rs b/crates/loomweave-storage/src/prior_index.rs index 9b2b1bbb..f055b4da 100644 --- a/crates/loomweave-storage/src/prior_index.rs +++ b/crates/loomweave-storage/src/prior_index.rs @@ -154,7 +154,7 @@ pub fn prior_locators_by_file(conn: &Connection) -> Result/.filigree/ephemeral.port` -— a plain trimmed integer, written atomically, present only while the dashboard -is up. The port is chosen deterministically but unpredictably +mode, publishes its live listen port to +`/.weft/filigree/ephemeral.port` (the consolidated Weft store +location, ADR-046) — a plain trimmed integer, written atomically, present only +while the dashboard is up. The port is chosen deterministically but unpredictably (`8400 + sha256(project_path) % 1000`, with collision fallback), so it **must be read, never computed**. This mirrors the Filigree sources `filigree/src/filigree/ephemeral.py::{write,read}_port_file` and @@ -1232,9 +1233,13 @@ read, never computed**. This mirrors the Filigree sources | Condition | Resolved URL | `source` label | |---|---|---| | Integration disabled | none (`null`) | `disabled` | -| Valid `/.filigree/ephemeral.port` present | configured URL with its **port** overridden by the live port (scheme, host, path preserved) | `.filigree/ephemeral.port` | +| Valid `/.weft/filigree/ephemeral.port` present | configured URL with its **port** overridden by the live port (scheme, host, path preserved) | `.weft/filigree/ephemeral.port` | | No / unreadable port file | configured URL unchanged | `config` | +A port file present only at the pre-consolidation `/.filigree/` path is +**not** read (ADR-046 clean break): resolution folds to `config`, so a +mis-sequenced cutover is visible rather than a silent stale resolve. + **The negative contract (the load-bearing part).** What Loomweave *refuses* to do is the weft-§5 safety argument: @@ -1267,14 +1272,14 @@ agent can tell *where* the URL came from without probing ports: "enabled": true, "configured_url": "http://127.0.0.1:8766", "resolved_url": "http://127.0.0.1:8542", - "resolution_source": ".filigree/ephemeral.port" + "resolution_source": ".weft/filigree/ephemeral.port" } } ``` `resolution_source` is exactly one of the three `source` labels above -(`disabled` / `.filigree/ephemeral.port` / `config`); `resolved_url` is `null` -only when the integration is disabled. +(`disabled` / `.weft/filigree/ephemeral.port` / `config`); `resolved_url` is +`null` only when the integration is disabled. **Verification scope.** There is no normative fixture for this convention — connection discovery resolves a single scalar (a port), not a wire document, so a diff --git a/docs/loomweave/1.0/detailed-design.md b/docs/loomweave/1.0/detailed-design.md index 8e449235..133ce247 100644 --- a/docs/loomweave/1.0/detailed-design.md +++ b/docs/loomweave/1.0/detailed-design.md @@ -880,7 +880,7 @@ CREATE INDEX ix_sei_lineage_sei ON sei_lineage(sei); - **Consult-mode writes** (summary cache, session state) during `loomweave serve` are dispatched on the same writer actor; they interleave with analyze-time writes if a user starts `loomweave analyze` against a running `loomweave serve` (not recommended but survivable). Writes are applied in arrival order; no starvation because consult writes are tiny and sparse. - **Readers** (plugin processes, MCP tool calls, HTTP API handlers, the markdown renderer) open read-only `rusqlite` connections from a `deadpool-sqlite` pool (configurable max: default 16). WAL lets them read against the committed snapshot without blocking writers. - **Checkpointing**: truncate-mode checkpoint issued after each 10 analyze-transactions or after `loomweave analyze` completes, whichever comes first. -- **Operational posture (v0.1)**: running `loomweave analyze` and `loomweave serve` against the same `.loomweave/loomweave.db` simultaneously is supported but `loomweave serve` will observe stale read-snapshots until the analyze finishes and checkpoint completes. `loomweave serve` emits a `LMWV-INFRA-STALE-SNAPSHOT` finding when this is detected. Users wanting zero-stale reads during long analyze runs should prefer the "shadow DB + atomic swap" pattern (analyze writes to `.loomweave/loomweave.db.new`, atomic rename on completion) — available via `loomweave analyze --shadow-db` flag. +- **Operational posture (v0.1)**: running `loomweave analyze` and `loomweave serve` against the same `.weft/loomweave/loomweave.db` simultaneously is supported but `loomweave serve` will observe stale read-snapshots until the analyze finishes and checkpoint completes. `loomweave serve` emits a `LMWV-INFRA-STALE-SNAPSHOT` finding when this is detected. Users wanting zero-stale reads during long analyze runs should prefer the "shadow DB + atomic swap" pattern (analyze writes to `.weft/loomweave/loomweave.db.new`, atomic rename on completion) — available via `loomweave analyze --shadow-db` flag. **Why not a single write transaction for the whole batch**: long transactions pin the WAL and prevent checkpoint; WAL growth is unbounded; readers pinned to the pre-analyse snapshot can't advance; SQLite `database is locked` errors surface to consult-mode writes. Per-batch transactions are the industry-standard posture for this workload. @@ -898,7 +898,7 @@ CREATE INDEX ix_sei_lineage_sei ON sei_lineage(sei); ### File layout ``` -/.loomweave/ +/.weft/loomweave/ loomweave.db # main store (WAL files beside it) config.json # internal state: schema version, last run IDs loomweave.log # structured log @@ -915,7 +915,7 @@ CREATE INDEX ix_sei_lineage_sei ON sei_lineage(sei); defaults.yaml # default policy overrides ``` -`.loomweave/` is checked into git (consistent with Filigree's pattern and with the "shared analysis state" principle). SQLite files can diff poorly, so v0.1 ships **two features** for multi-developer teams to handle the committed DB: +`.weft/loomweave/` is checked into git (consistent with Filigree's pattern and with the "shared analysis state" principle). SQLite files can diff poorly, so v0.1 ships **two features** for multi-developer teams to handle the committed DB: - `loomweave db export --textual ` — emits a deterministic JSON tree: `entities.jsonl` (one entity per line, sorted by id), `edges.jsonl` (sorted by `(kind, from_id, to_id)`), `guidance.jsonl` (sorted by id), `findings.jsonl` (sorted by id). Summary cache is **excluded** (re-derivable on next run, and JSON-diffing thousands of LLM-generated briefings is not useful). Output is git-friendly: a one-entity change produces a one-line diff. - `loomweave db merge-helper --output merged.db` — applied as a Git merge driver or manually during conflict resolution. Strategy: textual export of each side, deterministic union of entities/edges (last-writer-wins on conflicts keyed by `updated_at`), guidance-sheet conflict surfaced with a `CONFLICT` marker per affected sheet (human must resolve), summary cache cleared (will rebuild). @@ -926,7 +926,7 @@ Users can opt out entirely: `loomweave.yaml:storage.commit_db: false` excludes t ``` # .gitattributes -.loomweave/loomweave.db merge=loomweave-db +.weft/loomweave/loomweave.db merge=loomweave-db # .git/config (or per-developer) [merge "loomweave-db"] @@ -936,10 +936,10 @@ Users can opt out entirely: `loomweave.yaml:storage.commit_db: false` excludes t With the driver registered, conflicting runs from two developers produce a deterministic merged DB at commit time; without it, operators resolve manually via `loomweave db export --textual` on both sides plus `loomweave db import --textual `. -**Git-commit caveats for `.loomweave/loomweave.db`**: +**Git-commit caveats for `.weft/loomweave/loomweave.db`**: - LLM-derived content (briefings, guidance body text) lives in the DB and is therefore committed. Content derived from source files redacted by the pre-ingest secret scanner never reaches the LLM in the first place, so briefings don't contain secret material. Briefings that *describe* security-sensitive code (e.g., "this module is the JWT verifier") are fine to commit — they're public documentation. -- `runs//log.jsonl` records raw LLM request/response bodies for audit. This log is **excluded** from git by default via `.loomweave/.gitignore` (`runs/*/log.jsonl`) because those bodies may contain source excerpts that are fine to ship to Anthropic but not appropriate to commit to a public repo. Users opting in to commit run logs must accept that posture explicitly. +- `runs//log.jsonl` records raw LLM request/response bodies for audit. This log is **excluded** from git by default via `.weft/loomweave/.gitignore` (`runs/*/log.jsonl`) because those bodies may contain source excerpts that are fine to ship to Anthropic but not appropriate to commit to a public repo. Users opting in to commit run logs must accept that posture explicitly. - Operational rollouts where the DB is private-not-shared (single-developer experiments, pre-publication audits) can set `loomweave.yaml:storage.commit_db: false` and the DB is `.gitignore`'d instead. ### Migration strategy @@ -1124,7 +1124,7 @@ These core-emitted rules are the ADR-013 audit surface. ADR-013 remains canonica | Rule | Severity | Category | Description | Remediation | ADR | |---|---|---|---|---|---| -| `LMWV-SEC-SECRET-DETECTED` | ERROR | security | Pre-ingest secret scanner detected a credential pattern in a file slated for LLM dispatch. | Remove the secret, rotate the credential, or whitelist via `.loomweave/secrets-baseline.yaml` with a justification. | [ADR-013](../adr/ADR-013-pre-ingest-secret-scanner.md) | +| `LMWV-SEC-SECRET-DETECTED` | ERROR | security | Pre-ingest secret scanner detected a credential pattern in a file slated for LLM dispatch. | Remove the secret, rotate the credential, or whitelist via `.weft/loomweave/secrets-baseline.yaml` with a justification. | [ADR-013](../adr/ADR-013-pre-ingest-secret-scanner.md) | | `LMWV-SEC-UNREDACTED-SECRETS-ALLOWED` | ERROR | security | Operator invoked `--allow-unredacted-secrets`; file content reached the LLM provider with secrets intact. | Audit override usage via `filigree list --rule-id=LMWV-SEC-UNREDACTED-SECRETS-ALLOWED --since 30d`. | [ADR-013](../adr/ADR-013-pre-ingest-secret-scanner.md) | | `LMWV-INFRA-SECRET-BASELINE-NO-JUSTIFICATION` | ERROR | infra | Baseline entry missing required `justification` field; entry not honoured. | Add a `justification` string explaining why the match is safe. | [ADR-013](../adr/ADR-013-pre-ingest-secret-scanner.md) | | `LMWV-INFRA-SECRET-BASELINE-MATCH` | INFO | infra | Baseline entry suppressed a scanner detection as an audit event. | None; informational and retained for `NFR-SEC-04` audit. | [ADR-013](../adr/ADR-013-pre-ingest-secret-scanner.md) | @@ -1175,8 +1175,8 @@ $ loomweave analyze /home/john/elspeth Phase 6: subsystem (opus) [████████████████] 43/43 0:06:18 $4.91 Phase 7: cross-cutting (wardline ingest) ✓ (0.4s) Phase 8: emission - Catalog: .loomweave/catalog.json (4.1 MB) - Markdown: .loomweave/catalog/*.md (51 files) + Catalog: .weft/loomweave/catalog.json (4.1 MB) + Markdown: .weft/loomweave/catalog/*.md (51 files) Findings: 137 (127 facts, 10 defects) Filigree observations pushed: 2 Done in 0:38:12, total cost $11.37 @@ -1452,11 +1452,11 @@ not the implementation contract for `/api/v1/_capabilities` or Some risks sit outside Loomweave's code but inside the operator's responsibility. These belong in the team's onboarding doc, not in the tool's runtime defences: -- **Use project-scoped API keys, not personal ones, when `storage.commit_db: true`.** Briefings in `.loomweave/loomweave.db` were paid for by whoever ran `loomweave analyze`. A teammate pulling your committed DB benefits from LLM calls your personal key paid for. Use an Anthropic project / org key, not your personal key, when committing the DB. +- **Use project-scoped API keys, not personal ones, when `storage.commit_db: true`.** Briefings in `.weft/loomweave/loomweave.db` were paid for by whoever ran `loomweave analyze`. A teammate pulling your committed DB benefits from LLM calls your personal key paid for. Use an Anthropic project / org key, not your personal key, when committing the DB. - **Protect non-loopback HTTP exposure outside Loomweave.** If operators bind the ADR-014 HTTP read API outside loopback, place an authenticated reverse proxy or equivalent access-control layer in front of it. -- **Review `.loomweave/.gitignore` before first commit.** The default excludes `runs/*/log.jsonl` (raw LLM request/response bodies); if operators opt into committing run logs for audit, they accept that source excerpts sent to Anthropic ship to the repo. That's a choice, not an oversight — but it must be a deliberate one. +- **Review `.weft/loomweave/.gitignore` before first commit.** The default excludes `runs/*/log.jsonl` (raw LLM request/response bodies); if operators opt into committing run logs for audit, they accept that source excerpts sent to Anthropic ship to the repo. That's a choice, not an oversight — but it must be a deliberate one. ### Audit-surface finding IDs @@ -1609,7 +1609,7 @@ Every failure produces either a finding or a run-stats entry. Rule-ID namespacin 1. Single-binary: Linux (x86_64, ARM64), macOS (x86_64, ARM64), Windows (x86_64); no dynamic linking beyond libc. 2. Python plugin: `pip install loomweave-plugin-python`; Python 3.11+. 3. Store survives unclean shutdown (SIGKILL during analysis). -4. `.loomweave/loomweave.db` is git-committable and round-trips across machines. +4. `.weft/loomweave/loomweave.db` is git-committable and round-trips across machines. **Ecosystem**: @@ -1649,7 +1649,7 @@ The table below is a navigation aid for implementers: it maps each ADR to the se | ADR-002 | Plugin transport: Content-Length framed JSON-RPC 2.0 subprocess | §1 | | ADR-003 | Entity ID scheme: symbolic canonical-name; file path as property; EntityAlias v0.2 | §2 | | ADR-004 | Finding-exchange format: Filigree-native intake; `metadata.loomweave.*` nesting | §2, §7 | -| ADR-005 | `.loomweave/` git-committable by default | §3 | +| ADR-005 | `.weft/loomweave/` git-committable by default | §3 | | [ADR-006](../adr/ADR-006-clustering-algorithm.md), [ADR-032](../adr/ADR-032-weighted-components-clustering-fallback.md) | Clustering algorithm: Leiden with weighted-components fallback | §4, §5 | | [ADR-007](../adr/ADR-007-summary-cache-key.md) | Summary cache key design and invalidation | §4 | | ADR-008 | Superseded by ADR-014 | §7, §9 | @@ -1732,7 +1732,7 @@ For v0.1 through at least v0.3, each tool's store remains its own concern. Revis | **Knowledge basis** | EntityBriefing field indicating the evidence class a briefing rests on: `static_only`, `runtime_informed`, or `human_verified`. | | **Content-Length framing** | JSON-RPC message framing used by the plugin protocol — the same mechanism LSP uses. `Content-Length: \r\n\r\n`. Required for binary-safe streams and crash-resumability. See §1. | | **Canonical qualified name** | Plugin-language-native fully-qualified identifier used in entity IDs. Python example: `auth.tokens.TokenManager` (with `src/` prefix stripped), not `src/auth/tokens.py::TokenManager`. | -| **Shadow DB** | Operational posture where `loomweave analyze` writes to `.loomweave/loomweave.db.new` and atomic-renames on completion; zero impact on read-snapshot of an already-running `loomweave serve`. Available via `loomweave analyze --shadow-db`. | +| **Shadow DB** | Operational posture where `loomweave analyze` writes to `.weft/loomweave/loomweave.db.new` and atomic-renames on completion; zero impact on read-snapshot of an already-running `loomweave serve`. Available via `loomweave analyze --shadow-db`. | | **Pre-ingest redaction** | Secret-scanner pass (`detect-secrets` or equivalent) executed before any file content reaches the LLM provider; unredacted hits block LLM dispatch for that file. See §8, System-design §10. | | **Suite bootstrap** | The set of Filigree-side and Wardline-side changes Loomweave v0.1 requires as prerequisites. Not "integration with existing capabilities" but "new features in sibling tools." Documented in §9. | | **`metadata` extension slot** | Filigree's scan-result ingest preserves a per-finding `metadata` dict verbatim. Loomweave's extension fields (`kind`, `confidence`, `related_entities`, internal-severity preservation, etc.) nest under `metadata.loomweave.*`; Wardline's SARIF property-bag keys nest under `metadata.wardline_properties.*`. Top-level unknown keys are silently dropped by Filigree. | @@ -1744,7 +1744,7 @@ For v0.1 through at least v0.3, each tool's store remains its own concern. Revis | **Entity-resolve endpoint** | `GET /api/v1/entities/resolve?scheme=&value=` — exposes Loomweave's identity-translation layer as a public API so sibling tools can look up Loomweave entity IDs from their native schemes without embedding Loomweave's ID format. See §7. | | **Capability probe / compat report** | At `loomweave analyze` startup, Loomweave probes Filigree and Wardline capabilities and emits one `LMWV-INFRA-SUITE-COMPAT-REPORT` finding summarising what's present and what's degraded. See System-design §11. | | **Textual DB export** | `loomweave db export --textual` — deterministic JSON-lines dump of entities, edges, guidance, findings (summary cache excluded). Enables git-friendly diffs and multi-developer merge resolution on the committed SQLite database. See §3. | -| **DB merge-helper** | `loomweave db merge-helper` — Git merge driver that resolves `.loomweave/loomweave.db` conflicts deterministically: union entities/edges, last-writer-wins by `updated_at`, guidance conflicts surfaced for manual resolution, cache cleared. See §3. | +| **DB merge-helper** | `loomweave db merge-helper` — Git merge driver that resolves `.weft/loomweave/loomweave.db` conflicts deterministically: union entities/edges, last-writer-wins by `updated_at`, guidance conflicts surfaced for manual resolution, cache cleared. See §3. | | **Triage-state feedback** | Mechanism by which Filigree's suppression and acknowledgement reasons surface inside Loomweave briefings as operator-acknowledged evidence or synthetic risk entries. Read-only on Loomweave's side. | | **`first_seen_commit` / `last_seen_commit`** | Entity-level provenance: the git SHA of the first run to observe an entity and of the most recent run to still observe it. Enables point-in-time queries without re-running analysis. | @@ -1843,7 +1843,7 @@ Revision 5 (2026-04-17) restructures the single design document into a three-lay |---|---|---| | Gap 1: Triage state has no path back into briefings | Read-only feedback loop in briefing composition; operator-acknowledged evidence / synthetic risk entries; `guidance_fingerprint` includes acknowledged finding IDs | System-design §7 Composition, System-design §3 EntityBriefing | | Gap 2: Degraded-mode matrix incomplete (combinations & version skew) | Capability-negotiation probe at `loomweave analyze` startup; single `LMWV-INFRA-SUITE-COMPAT-REPORT` finding summarising all probe results; per-component fallback table covers pre-flag Filigree, REGISTRY additive skew, SARIF property-bag removal | System-design §11 | -| Gap 3: SQLite merge conflicts on committed `.loomweave/loomweave.db` | Promoted `loomweave db export --textual` to v0.1; added `loomweave db merge-helper` with git-merge-driver integration | §3 | +| Gap 3: SQLite merge conflicts on committed `.weft/loomweave/loomweave.db` | Promoted `loomweave db export --textual` to v0.1; added `loomweave db merge-helper` with git-merge-driver integration | §3 | | Gap 4: No story for entity deletion between runs | `LMWV-FACT-ENTITY-DELETED` emitted at Phase 7 via entity-set diff; `LMWV-FACT-GUIDANCE-ORPHAN` for affected sheets; cache invalidation on deletion | §5, System-design §6 | | Leverage 1: Loomweave as cross-tool identity oracle | New `GET /api/v1/entities/resolve?scheme=&value=` endpoint; covers wardline_qualname, wardline_exception_location, file_path, sarif_logical_location | §7 | | Leverage 2: Subsystem-tier-mixing rule | `LMWV-FACT-TIER-SUBSYSTEM-MIXING` and `LMWV-FACT-SUBSYSTEM-TIER-UNANIMOUS` added to Phase 7; unique-to-Loomweave structural signal | §5 | diff --git a/docs/loomweave/1.0/operations.md b/docs/loomweave/1.0/operations.md index 6bea2860..13a98b11 100644 --- a/docs/loomweave/1.0/operations.md +++ b/docs/loomweave/1.0/operations.md @@ -2,18 +2,18 @@ Operator-facing reference for the constraints around Loomweave's local-state directory. v1.0 is local-first; the storage subsystem is plain SQLite under -`.loomweave/`. The constraints below come straight out of +`.weft/loomweave/`. The constraints below come straight out of [ADR-011](../adr/ADR-011-storage-architecture.md) (writer-actor + reader-pool over SQLite) and the v1.0 tag-cut gap-register entries DOC-11, STO-01, STO-04, and STO-05. ## 1. Local-first storage layout -Per ADR-011, every Loomweave project keeps its state in a `.loomweave/` +Per ADR-011, every Loomweave project keeps its state in a `.weft/loomweave/` directory at the project root: ``` -.loomweave/ +.weft/loomweave/ ├── loomweave.db SQLite database (entities, edges, runs, findings, summary_cache) ├── loomweave.db-wal SQLite WAL companion file ├── loomweave.db-shm SQLite shared-memory file @@ -23,13 +23,13 @@ directory at the project root: ``` There is no central server, no shared registry, no networked state. -`loomweave install --path` creates `.loomweave/` on a new project root; +`loomweave install --path` creates `.weft/loomweave/` on a new project root; `loomweave analyze` and `loomweave serve` both read and (in `analyze`'s case) write into it. ## 2. NFS is prohibited -**`.loomweave/` MUST live on a local filesystem.** Do not place a project root +**`.weft/loomweave/` MUST live on a local filesystem.** Do not place a project root on NFS, SMB, sshfs, or any other network filesystem and expect `loomweave analyze` or `loomweave serve` to behave correctly. @@ -51,9 +51,9 @@ that workspace. Clone the project to local disk first. Only one `loomweave analyze` may run against a given project root at a time. The v1.0 binary enforces this with an exclusive `fs2` advisory lock on -`.loomweave/loomweave.lock`, acquired at the start of `analyze` and held for the +`.weft/loomweave/loomweave.lock`, acquired at the start of `analyze` and held for the writer-actor lifetime. A second `loomweave analyze` against the same -`.loomweave/` fails fast with a clear "another loomweave analyze is in progress +`.weft/loomweave/` fails fast with a clear "another loomweave analyze is in progress against this project" error rather than racing the first analyzer's run state. (See STO-01 in the v1.0 tag-cut gap register for the originating finding.) @@ -82,7 +82,7 @@ The v1.0 supported backup procedure is a four-step shutdown-and-copy. There is no live `loomweave db backup` subcommand at v1.0 (deferred to v1.1, §6). 1. **Ensure no `loomweave analyze` is running** against the project root. - The fs2 advisory lock on `.loomweave/loomweave.lock` will be released; a + The fs2 advisory lock on `.weft/loomweave/loomweave.lock` will be released; a subsequent `loomweave analyze` from a backup script would otherwise race with the backup. @@ -95,7 +95,7 @@ is no live `loomweave db backup` subcommand at v1.0 (deferred to v1.1, §6). captures all committed state: ```bash - sqlite3 .loomweave/loomweave.db "PRAGMA wal_checkpoint(TRUNCATE);" + sqlite3 .weft/loomweave/loomweave.db "PRAGMA wal_checkpoint(TRUNCATE);" ``` `TRUNCATE` mode is the strongest checkpoint — it flushes the WAL into @@ -103,13 +103,13 @@ is no live `loomweave db backup` subcommand at v1.0 (deferred to v1.1, §6). **Why this step matters:** in WAL mode, committed pages live in `loomweave.db-wal` until a checkpoint folds them back into `loomweave.db`. A - naive `cp .loomweave/loomweave.db backup.db` during (or shortly after) a live + naive `cp .weft/loomweave/loomweave.db backup.db` during (or shortly after) a live `analyze` therefore captures a *torn* copy — the main database file is missing the most recent committed transactions, which are still sitting in the separate `-wal` file. Forcing a `TRUNCATE` checkpoint first guarantees `loomweave.db` is self-contained before the copy. -4. **Copy `.loomweave/` to the backup location** with any standard tool +4. **Copy `.weft/loomweave/` to the backup location** with any standard tool (`cp -a`, `rsync -a`, `tar`). All three of `loomweave.db`, `loomweave.db-wal`, and `loomweave.db-shm` should be present in the copy; after a successful TRUNCATE the WAL is empty but the file should still @@ -121,8 +121,8 @@ To restore a backup: 1. Stop any running `loomweave analyze` or `loomweave serve` against the project root. -2. Replace the project's `.loomweave/` directory with the backup copy. The - `instance_id` file inside `.loomweave/` is part of the backup; restoring +2. Replace the project's `.weft/loomweave/` directory with the backup copy. The + `instance_id` file inside `.weft/loomweave/` is part of the backup; restoring it preserves the project's federation identity (`/api/v1/_capabilities` `instance_id` stays stable across the restore). 3. Run `loomweave analyze` to validate the restored database. A fresh analyze diff --git a/docs/loomweave/1.0/requirements.md b/docs/loomweave/1.0/requirements.md index beb2c081..9f8fc0b8 100644 --- a/docs/loomweave/1.0/requirements.md +++ b/docs/loomweave/1.0/requirements.md @@ -452,7 +452,7 @@ Write-effect tools (`emit_observation`, `promote_observation`, `propose_guidance > **Deferred to v1.1** per the [Sprint 2 scope amendment §3 (Box B.6)](../../implementation/sprint-2/scope-amendment-2026-05.md). Session persistence is the cursor-based Navigation model deferred with [REQ-MCP-01](#req-mcp-01--cursor-based-session-model); v1.0 tools take explicit `id` arguments per-call and hold no per-session state beyond the request scope. -Sessions are created on MCP `initialize`, idle-timeout after 1 hour (configurable), and persist to `.loomweave/sessions/.json` for reconnection. `loomweave sessions list` and `loomweave sessions close ` provide admin surfaces. +Sessions are created on MCP `initialize`, idle-timeout after 1 hour (configurable), and persist to `.weft/loomweave/sessions/.json` for reconnection. `loomweave sessions list` and `loomweave sessions close ` provide admin surfaces. **Rationale**: Agents reconnecting after a transport interruption expect their cursor and breadcrumbs to survive; losing session state mid-investigation is hostile to the workflow. **Verification**: Open a session, populate cursor, disconnect, reconnect, assert state restored. @@ -468,7 +468,7 @@ Human-readable outputs from `loomweave analyze`. > **Deferred to v1.1** per the [Sprint 2 scope amendment §3 (Box B.4 removed)](../../implementation/sprint-2/scope-amendment-2026-05.md). The `catalog.json` artefact has no consumer in the v1.0 MVP MCP surface; the MCP tools query the SQLite store directly. -`loomweave analyze` emits `.loomweave/catalog.json` — a deterministic, stable-shape dump of the entity catalog, edges, subsystems, and findings at run completion. +`loomweave analyze` emits `.weft/loomweave/catalog.json` — a deterministic, stable-shape dump of the entity catalog, edges, subsystems, and findings at run completion. **Rationale**: JSON is the universal interchange format; downstream consumers (dashboards, bespoke scripts, CI gates) can read the catalog without speaking SQLite. Deterministic output means git diffs reflect real changes, not run-to-run noise. **Verification**: Two consecutive runs produce byte-identical `catalog.json`; schema conforms to a versioned JSON schema. @@ -478,7 +478,7 @@ Human-readable outputs from `loomweave analyze`. > **Deferred to v1.1** per the [Sprint 2 scope amendment §3 (Box B.5 removed)](../../implementation/sprint-2/scope-amendment-2026-05.md). Subsystem rendering lands with WP4 in v1.1. -`loomweave analyze` emits `.loomweave/catalog/.md` (one markdown file per subsystem) plus `.loomweave/catalog/index.md` (top-level navigation). Markdown is generated from the store, not authored. +`loomweave analyze` emits `.weft/loomweave/catalog/.md` (one markdown file per subsystem) plus `.weft/loomweave/catalog/index.md` (top-level navigation). Markdown is generated from the store, not authored. **Rationale**: Markdown is the human-reading surface for cases where a human (reviewer, new team member) wants to read the catalog without running `loomweave serve` or speaking MCP. Subsystem granularity matches how humans think about large codebases; the index makes discovery cheap. **Verification**: For each subsystem entity, a corresponding markdown file exists and renders cleanly; index lists all subsystems. @@ -836,7 +836,7 @@ Loomweave v0.1 is validated against `elspeth` (~425k LOC Python, ~1,100 files). #### NFR-SCALE-02 — DB size bound -The `.loomweave/loomweave.db` store for an elspeth-scale project fits within 2GB. Larger projects degrade gracefully — no hard cap, but cost of commit-the-DB grows. +The `.weft/loomweave/loomweave.db` store for an elspeth-scale project fits within 2GB. Larger projects degrade gracefully — no hard cap, but cost of commit-the-DB grows. **Rationale**: Committed DBs live in git; a 2GB DB is uncomfortable but workable, 10GB is pathological. Matching elspeth to 500MB-2GB keeps the commit-DB story honest. **Verification**: Elspeth run produces a DB within the bound; DB growth linear with entity count. @@ -856,7 +856,7 @@ The `.loomweave/loomweave.db` store for an elspeth-scale project fits within 2GB #### NFR-SEC-01 — Pre-ingest secret scanning -Before any file content reaches the LLM provider, Loomweave runs a pre-ingest secret scanner (bundled `detect-secrets` or equivalent) on the file buffer. Unredacted secrets emit `LMWV-SEC-SECRET-DETECTED` and **block LLM dispatch for that file**. False-positive whitelist at `.loomweave/secrets-baseline.yaml`. +Before any file content reaches the LLM provider, Loomweave runs a pre-ingest secret scanner (bundled `detect-secrets` or equivalent) on the file buffer. Unredacted secrets emit `LMWV-SEC-SECRET-DETECTED` and **block LLM dispatch for that file**. False-positive whitelist at `.weft/loomweave/secrets-baseline.yaml`. **Rationale**: The first real user running `loomweave analyze` on a repo with a committed `.env` would otherwise silently leak to Anthropic. Pre-ingest redaction is a hard dependency for v0.1; retrofitting it after a leak is too late. **Verification**: Fixture with a deliberately-committed test secret blocks LLM dispatch; baseline whitelist suppresses the block for approved false positives. @@ -916,10 +916,10 @@ Every security-relevant event (`LMWV-SEC-SECRET-DETECTED`, `LMWV-SEC-UNREDACTED- #### NFR-SEC-05 — Run log exclusion from git by default -`runs//log.jsonl` (raw LLM request/response bodies) is git-excluded by default via `.loomweave/.gitignore` (`runs/*/log.jsonl`). Operators opt-in to committing explicitly. +`runs//log.jsonl` (raw LLM request/response bodies) is git-excluded by default via `.weft/loomweave/.gitignore` (`runs/*/log.jsonl`). Operators opt-in to committing explicitly. **Rationale**: Run logs may contain source excerpts appropriate to ship to Anthropic but not appropriate to commit to a public repo. Default-exclude prevents accidental exposure; explicit opt-in forces the operator to own the choice. -**Verification**: Fresh install produces `.loomweave/.gitignore` with the rule; log files not tracked in the next `git status`. +**Verification**: Fresh install produces `.weft/loomweave/.gitignore` with the rule; log files not tracked in the next `git status`. **See**: System Design §4 (Storage, Commit posture), §10 (Security, Operator guidance). --- @@ -942,9 +942,9 @@ Loomweave runs entirely locally. The only required network egress is the LLM pro **Verification**: Network egress audit during `loomweave analyze`: only Anthropic endpoints in the packet capture. **See**: System Design §1 (Context & Boundaries). -#### NFR-OPS-03 — `.loomweave/` git-committable +#### NFR-OPS-03 — `.weft/loomweave/` git-committable -The `.loomweave/` directory (including `loomweave.db` by default) is safe to commit to git. Textual DB export (`loomweave db export --textual`) and a merge helper (`loomweave db merge-helper`) handle multi-developer conflicts. +The `.weft/loomweave/` directory (including `loomweave.db` by default) is safe to commit to git. Textual DB export (`loomweave db export --textual`) and a merge helper (`loomweave db merge-helper`) handle multi-developer conflicts. **Rationale**: Shared analysis state benefits small teams (one developer pays the LLM cost; the team sees the briefings). Commit-by-default matches Filigree's and Wardline's storage patterns. Textual export makes git diffs meaningful. **Verification**: `git add .loomweave && git commit` succeeds on a populated store; two developers' simultaneous runs produce a DB that the merge helper resolves deterministically. @@ -964,7 +964,7 @@ The Python plugin installs via `pipx install loomweave-plugin-python` into its o #### NFR-OBSERV-01 — Structured JSON logs -Loomweave emits structured JSON-line logs via the `tracing` crate. Logs rotate at 100MB with 5 files kept. Per-run log at `.loomweave/runs//log.jsonl`; per-process log at `.loomweave/loomweave.log`. +Loomweave emits structured JSON-line logs via the `tracing` crate. Logs rotate at 100MB with 5 files kept. Per-run log at `.weft/loomweave/runs//log.jsonl`; per-process log at `.weft/loomweave/loomweave.log`. **Rationale**: Structured logs are machine-parseable; text logs aren't. Downstream log aggregation (if operators route Loomweave's output into Vector / Loki / Splunk) works by default. **Verification**: Log entries parse as JSON; rotation verified; log levels respected. @@ -1032,7 +1032,7 @@ The dry-run cost estimate is within ±50% of actual spend on representative proj #### NFR-RELIABILITY-01 — Crash-surviving store -> **v1.x status amended by ADR-041.** `.loomweave/loomweave.db` must survive +> **v1.x status amended by ADR-041.** `.weft/loomweave/loomweave.db` must survive > unclean shutdown (SIGKILL during analyze) without corruption. Subsequent > `loomweave analyze --resume ` safely reopens and re-walks the same run > id; it does not continue from a phase/file checkpoint. diff --git a/docs/loomweave/1.0/system-design.md b/docs/loomweave/1.0/system-design.md index dc3c0889..003e4f6e 100644 --- a/docs/loomweave/1.0/system-design.md +++ b/docs/loomweave/1.0/system-design.md @@ -84,7 +84,7 @@ flowchart TB PyPlugin["loomweave-plugin-python
LSP-style JSON-RPC"] end - Store[("`.loomweave/loomweave.db`
SQLite WAL
committed to git")] + Store[("`.weft/loomweave/loomweave.db`
SQLite WAL
committed to git")] Analyze -.->|"spawn per run"| PyPlugin Serve -.->|"spawn on demand
for consult queries"| PyPlugin @@ -100,7 +100,7 @@ flowchart TB | Mode | Surface | Purpose | v0.1 status | |---|---|---|---| | MCP-for-LLM | `loomweave serve` over stdio | First-class product surface — consult-mode agents hold a cursor, navigate the graph, emit observations to Filigree | Primary | -| Catalog artefacts | `loomweave analyze` writes `.loomweave/catalog.json` + per-subsystem markdown | "I want to read the output" cases | v1.1 (deferred — Sprint 2 amendment §3 removed boxes B.4/B.5; see [REQ-ARTEFACT-01](requirements.md#req-artefact-01--json-catalog-output) / [REQ-ARTEFACT-02](requirements.md#req-artefact-02--per-subsystem-markdown--top-level-index)) | +| Catalog artefacts | `loomweave analyze` writes `.weft/loomweave/catalog.json` + per-subsystem markdown | "I want to read the output" cases | v1.1 (deferred — Sprint 2 amendment §3 removed boxes B.4/B.5; see [REQ-ARTEFACT-01](requirements.md#req-artefact-01--json-catalog-output) / [REQ-ARTEFACT-02](requirements.md#req-artefact-02--per-subsystem-markdown--top-level-index)) | | Semi-dynamic wiki | HTML served by `loomweave serve` | Live finding list, in-browser guidance editing, consult entry points | v1.1 (deferred — NG-13) | ### Boundary contracts with the Weft siblings @@ -425,20 +425,20 @@ flowchart LR **Why not a single giant transaction**: Long transactions pin the WAL, prevent checkpoints from completing, and produce unbounded WAL growth. Per-batch transactions are the industry-standard posture for this workload. -**Writer-actor vs. shadow-DB**. The writer-actor model is the v0.1 default. A shadow-DB alternative (`loomweave analyze --shadow-db` writes to `.loomweave/loomweave.db.new`, atomic-renames on completion) is available for users wanting zero-stale reads from a concurrent `loomweave serve` during long analyze runs. See ADR-011. +**Writer-actor vs. shadow-DB**. The writer-actor model is the v0.1 default. A shadow-DB alternative (`loomweave analyze --shadow-db` writes to `.weft/loomweave/loomweave.db.new`, atomic-renames on completion) is available for users wanting zero-stale reads from a concurrent `loomweave serve` during long analyze runs. See ADR-011. ### Crash safety SQLite WAL + writer-actor transactions + explicit `PRAGMA synchronous=NORMAL` give crash-safe storage semantics: a SIGKILL during analyze must not corrupt -`.loomweave/loomweave.db`, and committed rows survive. Per ADR-041, v1.x +`.weft/loomweave/loomweave.db`, and committed rows survive. Per ADR-041, v1.x `loomweave analyze --resume ` reopens the existing run id and re-walks idempotently; it does not read `checkpoints.jsonl` or continue from a phase/file checkpoint. ### Git-friendly storage -`.loomweave/loomweave.db` is committable to git (NFR-OPS-03). SQLite files diff poorly — Loomweave ships two features to handle this: +`.weft/loomweave/loomweave.db` is committable to git (NFR-OPS-03). SQLite files diff poorly — Loomweave ships two features to handle this: 1. **Textual export**: `loomweave db export --textual ` produces deterministic JSON-lines dumps of entities, edges, guidance, findings. Sorted by id / (kind, from, to) so a one-entity change produces a one-line diff. Summary cache is excluded (rebuilds cheaply on next run). @@ -447,7 +447,7 @@ checkpoint. ### File layout ``` -/.loomweave/ +/.weft/loomweave/ loomweave.db # main store (plus WAL files beside it) config.json # schema version, last run IDs loomweave.log # process log @@ -841,7 +841,7 @@ Write-effect tools (`emit_observation`, `promote_observation`, `propose_guidance - Created on MCP `initialize` (≤100ms, NFR-PERF-02). - Default idle timeout: 1 hour. -- State persisted to `.loomweave/sessions/.json` for reconnection. +- State persisted to `.weft/loomweave/sessions/.json` for reconnection. - Admin surface: `loomweave sessions list` / `loomweave sessions close `. --- @@ -1077,7 +1077,7 @@ Security is a first-class concern in v0.1 because Loomweave sends source code to | Prompt injection via source | Critical | Adversarial docstrings / comments → briefing field values → future-prompt poisoning via cache | Schema validation + untrusted-content delimiters + `knowledge_basis: static_only` | | Guidance poisoning via LLM-proposed sheets | High | `propose_guidance` MCP tool promotes attacker text into prompts | Manual promotion gate — proposals create observations, not sheets | | HTTP API reachable by other local processes | Medium | `loomweave serve` on shared dev host / container | ADR-014 registry-backend API is unauthenticated but loopback-only by default; non-loopback binds are refused unless explicitly allowed and protected by operator-managed access control. | -| DB tampering via committed `.loomweave/loomweave.db` | Medium | Bad actor edits DB, commits, poisons teammate briefings | Content-hash cross-check on load (v0.2); `loomweave db verify` CLI | +| DB tampering via committed `.weft/loomweave/loomweave.db` | Medium | Bad actor edits DB, commits, poisons teammate briefings | Content-hash cross-check on load (v0.2); `loomweave db verify` CLI | | LLM audit-log leakage via git | Medium | `runs//log.jsonl` contains request/response bodies | Default-excluded from git | | Personal API key charged when committing team DB | Medium (operator) | Developer commits DB generated with personal key | Operator guidance; `--audit-key` hint | | Plugin subprocess compromise | Medium | Malicious third-party plugin reads source or exhausts host resources | Hybrid authority per ADR-021: path jail, Content-Length ceiling, entity-count cap, per-plugin `prlimit` RSS. Full syscall sandbox + plugin hash-pinning deferred to v0.2 (NG-16) | @@ -1090,7 +1090,7 @@ Before any file content reaches the LLM (Phases 4, 5, 6), Loomweave runs a pre-i - **Scope**: every file in `analysis.include` is scanned; exclusion globs apply *after* scanning (excluded files never reach the LLM regardless). - **Policy on finding**: - Unredacted secret → `LMWV-SEC-SECRET-DETECTED` (severity: ERROR) + **block LLM dispatch for that file**. Entities in the file still land in the store with summaries marked `briefing_blocked: secret_present`. - - False-positive whitelist: `.loomweave/secrets-baseline.yaml` (same format as `detect-secrets`' baseline; committable and reviewable). + - False-positive whitelist: `.weft/loomweave/secrets-baseline.yaml` (same format as `detect-secrets`' baseline; committable and reviewable). - Override: `loomweave analyze --allow-unredacted-secrets` requires explicit confirmation prompt and records the override in `stats.json`. - **Coverage**: high-entropy strings, common API key patterns (AWS, GitHub, Anthropic, Stripe, etc.), RSA private key headers, JWT-looking tokens. @@ -1138,9 +1138,9 @@ Findings feed Filigree via the normal exchange. Security-focused operators can ` Some risks sit outside Loomweave's code but inside the operator's responsibility: -- **Use project-scoped API keys, not personal ones, when committing the DB**. Briefings in `.loomweave/loomweave.db` were paid for by whoever ran analyze; a teammate pulling your committed DB benefits from calls your personal key paid for. Use an Anthropic project key, not your personal key. +- **Use project-scoped API keys, not personal ones, when committing the DB**. Briefings in `.weft/loomweave/loomweave.db` were paid for by whoever ran analyze; a teammate pulling your committed DB benefits from calls your personal key paid for. Use an Anthropic project key, not your personal key. - **Rotate tokens when a committed DB exposes a stale model's output**. If the DB was generated with a leaked or exposed API key, the token is already used; briefings aren't themselves secret but the key's usage fingerprint is. -- **Review `.loomweave/.gitignore` before first commit**. Default excludes `runs/*/log.jsonl` (raw LLM request/response bodies); opting in to commit logs ships source excerpts to the repo — a choice, not an oversight. +- **Review `.weft/loomweave/.gitignore` before first commit**. Default excludes `runs/*/log.jsonl` (raw LLM request/response bodies); opting in to commit logs ships source excerpts to the repo — a choice, not an oversight. Operator-guidance documentation lives in the detailed-design §10 for procedural depth. @@ -1225,7 +1225,7 @@ The parallel listing in [detailed-design.md §11](./detailed-design.md#11-archit | ADR-002 | Plugin transport: Content-Length framed JSON-RPC 2.0 subprocess | Accepted | P0 | Binary-safe framing, resumability after crash, alignment with LSP patterns. Alternatives: newline-delimited JSON (unsafe for content with embedded newlines), Wasm (too early for plugin authoring ergonomics), embedded Python (couples core to Python runtime). | | ADR-003 | Entity ID scheme: symbolic canonical-name; file path as property; EntityAlias v0.2 | Accepted | P0 | Cross-tool identity must survive file moves. Path-embedded IDs silently detach every reference; symbolic IDs survive 80% case (file move without rename). Rename tracking via EntityAlias deferred; manual `--repair-aliases` workaround in v0.1. | | ADR-004 | Finding-exchange format: Filigree-native intake; `metadata.loomweave.*` nesting | Accepted | P0 | Filigree's `POST /api/v1/scan-results` is production path; SARIF requires either translation or Filigree-side work. Nesting convention under `metadata` dict (verified verbatim preservation) avoids silent drops of extension fields. | -| ADR-005 | `.loomweave/` git-committable by default; DB included, run logs excluded | To author | P1 | Shared-analysis-state story benefits small teams; run logs may contain source excerpts appropriate to Anthropic but not git. Default-exclude run logs via `.gitignore`; opt-in to commit. | +| ADR-005 | `.weft/loomweave/` git-committable by default; DB included, run logs excluded | To author | P1 | Shared-analysis-state story benefits small teams; run logs may contain source excerpts appropriate to Anthropic but not git. Default-exclude run logs via `.gitignore`; opt-in to commit. | | [ADR-006](../adr/ADR-006-clustering-algorithm.md), [ADR-032](../adr/ADR-032-weighted-components-clustering-fallback.md) | Clustering algorithm: Leiden (with weighted-components fallback) on imports + calls subgraph | Accepted | P0 | Leiden's connected-community guarantee fixes disconnected-cluster defects. Directed, weighted (reference_count); module-level. `weighted_components` is the deterministic fallback selectable via config when a local component cut is preferred. Modularity score recorded, not enforced (v0.1); weak threshold is reported via finding. | | [ADR-007](../adr/ADR-007-summary-cache-key.md) | Summary cache key design: `(entity_id, content_hash, prompt_template_id, model_tier, guidance_fingerprint)` + TTL backstop + churn-eager invalidation | Accepted | P0 | Full 5-part key captures all syntactic staleness paths; TTL backstop (180d default) bounds semantic staleness the key alone doesn't see; churn-eager invalidation on `LMWV-FACT-GUIDANCE-CHURN-STALE` makes stale-guidance pressure visible via cost. Neighborhood-drift flag (`stale_semantic: true`) rather than forced miss preserves NFR-COST-02's 95% hit-rate target. Block C1 spike validates the assumption. | | ADR-008 | (Superseded by ADR-014.) | Superseded | — | Initial Filigree file-registry displacement design was "feature flag"; recon showed it's schema surgery. See ADR-014. | @@ -1233,7 +1233,7 @@ The parallel listing in [detailed-design.md §11](./detailed-design.md#11-archit | ADR-010 | MCP as first-class surface — lock-in cost vs ecosystem reach | To author | P2 | Anthropic's MCP standard is the ecosystem's current centre of gravity for LLM tool integrations; lock-in cost is acknowledged but the ecosystem reach outweighs it for v0.1. Strategic review at v0.3+. | | [ADR-011](../adr/ADR-011-writer-actor-concurrency.md) | Writer-actor concurrency model (vs shadow-DB swap) | Accepted | P0 | Single writer actor + per-N-files transactions (default N=50) is the committed shape; `--shadow-db` opt-in for zero-stale-read scenarios. Design-review §2.2 CRITICAL flag retires. SQLite-concurrency-under-load assumption named as v0.2 validation task (`NG-28` proposed). | | [ADR-012](../adr/ADR-012-http-auth-default.md) | Historical HTTP read-API auth proposal: UDS default with TCP+token fallback | Superseded for ADR-014 registry-backend API | P0 | ADR-014 now owns the registry-backend HTTP read API posture: unauthenticated loopback-only by default, non-loopback refused unless explicitly allowed and protected externally. ADR-012 remains context for the earlier broad HTTP API proposal. | -| [ADR-013](../adr/ADR-013-pre-ingest-secret-scanner.md) | Pre-ingest secret scanner with LLM-dispatch block | Accepted | P0 | Rust-native port of detect-secrets rule set (preserves NFR-OPS-04 single-binary). File-level block on detection; structural extraction preserved; briefings marked `briefing_blocked: secret_present`. `.loomweave/secrets-baseline.yaml` for false-positives. `--allow-unredacted-secrets` requires TTY confirm OR explicit `--confirm-allow-unredacted-secrets=yes-i-understand` in CI. | +| [ADR-013](../adr/ADR-013-pre-ingest-secret-scanner.md) | Pre-ingest secret scanner with LLM-dispatch block | Accepted | P0 | Rust-native port of detect-secrets rule set (preserves NFR-OPS-04 single-binary). File-level block on detection; structural extraction preserved; briefings marked `briefing_blocked: secret_present`. `.weft/loomweave/secrets-baseline.yaml` for false-positives. `--allow-unredacted-secrets` requires TTY confirm OR explicit `--confirm-allow-unredacted-secrets=yes-i-understand` in CI. | | [ADR-014](../adr/ADR-014-filigree-registry-backend.md) | Filigree `registry_backend` flag + pluggable `RegistryProtocol` — schema surgery, not config flip | Accepted | P0 | Four NOT-NULL foreign keys on `file_records(id)` + three auto-create paths require a real interface, not a flag. Loomweave's shadow-registry fallback preserves v0.1 shipability when Filigree hasn't landed the surgery. | | [ADR-015](../adr/ADR-015-wardline-filigree-emission.md) | Wardline→Filigree emission ownership: Loomweave-side SARIF translator (v0.1), native Wardline POST (v0.2) | Accepted | P0 | Wardline has no HTTP client today (`integration-recon:339`); adding one is a refactor not on the v0.1 timeline. Loomweave-side translator ships independently; translator stays permanent for Semgrep / CodeQL / etc. `weft.md` §5 asterisk 1 retires when native emitter lands. Revision trigger: Block C2 spike showing emitter is ≤1 day of work promotes to v0.1. | | [ADR-016](../adr/ADR-016-observation-transport.md) | Observation transport: `filigree mcp` subprocess spawn (v0.1); `POST /api/v1/observations` HTTP (v0.2) | Accepted | P0 | Per Q1 scope commitment, observation HTTP transport deferred to v0.2. v0.1 emits via Loomweave spawning `filigree mcp` subprocess and calling existing `create_observation` MCP tool over stdio. v0.2 HTTP endpoint is the retirement trigger; capability probe detects via `HEAD /api/v1/observations`. | diff --git a/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md b/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md index 09557739..49201398 100644 --- a/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md +++ b/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md @@ -1,6 +1,11 @@ # ADR-005: `.loomweave/` Directory Git-Tracking Policy -**Status**: Accepted; amended by ADR-041 +**Status**: Accepted; amended by ADR-041, ADR-046 + +> **ADR-046 amendment:** the directory tracked by this policy moved from +> `.loomweave/` to `.weft/loomweave/` (Weft store consolidation, clean break). +> The tracked-vs-ignored split below is unchanged — only the parent path. Read +> every `.loomweave/` path below as `.weft/loomweave/`. **Date**: 2026-04-18 **Deciders**: qacona@gmail.com **Context**: `loomweave install` must write a `.gitignore` inside `.loomweave/` that diff --git a/docs/loomweave/adr/ADR-018-identity-reconciliation.md b/docs/loomweave/adr/ADR-018-identity-reconciliation.md index 5745b768..0f1ebc4b 100644 --- a/docs/loomweave/adr/ADR-018-identity-reconciliation.md +++ b/docs/loomweave/adr/ADR-018-identity-reconciliation.md @@ -98,10 +98,11 @@ This preserves the federation test: removing Wardline breaks Wardline-derived an Wardline now publishes the NG-25 trust-vocabulary descriptor as `vocabulary.yaml` and through `wardline vocab`. Loomweave's Python plugin consumes that descriptor instead of importing `wardline.core.registry.REGISTRY`. Resolution is -project-local `.wardline/vocabulary.yaml` first, then the installed Wardline -distribution data file `wardline/core/vocabulary.yaml`; both paths are plain -file reads and neither imports `wardline`, `wardline.core`, or -`wardline.core.registry`. +project-local first — the consolidated `.weft/wardline/vocabulary.yaml` only, with +no fallback to the pre-consolidation `.wardline/vocabulary.yaml` path (ADR-046 +clean break) — then the installed Wardline distribution data file +`wardline/core/vocabulary.yaml`; all paths are plain file reads and none import +`wardline`, `wardline.core`, or `wardline.core.registry`. The plugin records source-observed decorator facts on Loomweave entities as Wardline metadata and `wardline:*` tags. Wardline remains authoritative for the diff --git a/docs/loomweave/adr/ADR-046-weft-store-consolidation.md b/docs/loomweave/adr/ADR-046-weft-store-consolidation.md new file mode 100644 index 00000000..d82e39f2 --- /dev/null +++ b/docs/loomweave/adr/ADR-046-weft-store-consolidation.md @@ -0,0 +1,127 @@ +# ADR-046: Weft Store Consolidation (`.weft/loomweave/`) + +**Status**: Accepted; amends ADR-005, ADR-040, ADR-044 +**Date**: 2026-06-07 +**Deciders**: john@pgpl.net +**Context**: The Weft federation is consolidating each member's scattered project +dotdir into one shared convention so a project root holds a single `.weft/` +tree (`weft.toml` for operator config; `.weft//` for each member's +machine-written runtime state) instead of N sibling dotdirs (`.loomweave/`, +`.filigree/`, `.wardline/`, …). This ADR records Loomweave's side of that move. + +## Summary + +Loomweave's per-project store moves from `/.loomweave/` to +`/.weft/loomweave/` — a **clean break**: there is no fallback read of the +old `.loomweave/` location. Every consumer routes through a single helper +(`loomweave_core::store`), so the path cannot drift across the workspace. + +Two adjacent decisions ride along: + +1. **Operator override (`weft.toml`).** The operator-authored + `/weft.toml` may relocate the store via a member-private + `[loomweave].store_dir` key. Loomweave reads **only its own `[loomweave]` + table** and treats a missing or malformed `weft.toml` as absent (silent + fallback to the default — never a hard failure; C-9c). `weft.toml` is + **read-only** to Loomweave: `install`, `doctor`, and the CLI never write it + (the C-4 discipline behind Gate `weft-eb3dee402f` — never add a writer to a + shared multi-section file). No shared/cross-member `weft.toml` keys are read + yet; those are hub-pinned and pending Loomweave's schema proposal. + +2. **Sibling resolution reads `.weft/` only (clean break).** When Loomweave + resolves a sibling's runtime state it reads the consolidated + `/.weft//` location **only** — Filigree's live read-API port + at `/.weft/filigree/ephemeral.port`, Wardline's trust-vocabulary + descriptor at `/.weft/wardline/vocabulary.yaml`. There is **no** fallback + to the pre-consolidation `./` path. Weft is pre-launch with a + coordinated cutover, so after launch every sibling is at `.weft/` by + construction; a sibling found only on the legacy path means a mis-sequenced + cutover, and silently resolving it would bind a stale dir (the lacuna-401 + wrong-but-quiet-resolve failure mode). Instead resolution folds to the + fail-soft default — the configured URL (`source = "config"`) for Filigree, an + absent project descriptor for Wardline — so the wire-facing `source`/status + reports the gap loudly. **Runbook ordering:** Filigree migrates to + `.weft/filigree/` → this build installs → downstream re-init (lacuna). This + build must not be installed against any project until Filigree has migrated. + +## Decision + +### Store location + +`loomweave_core::store::store_dir(project_root)` is the single source of truth: + +- Default: `/.weft/loomweave/`. +- Override: `[loomweave].store_dir` in `/weft.toml` (a relative + value resolves against the project root; an absolute value is used verbatim). +- Fail-soft (C-9c): a missing/unparseable `weft.toml`, an absent `[loomweave]` + table or `store_dir` key, a wrong-typed or blank value — all fall back to the + default. Unknown top-level tables (a sibling's section) and unknown keys in + `[loomweave]` are ignored, so the file stays forward-compatible. + +The directory's contents and git-tracking posture are **unchanged** from ADR-005 +— only the parent path moves. `loomweave.db`, `config.json`, and `.gitignore` +remain committed; the WAL sidecars, shadow DB, `embeddings.db`, `ephemeral.port`, +`instance_id`, `*.lock`, `tmp/`, `logs/`, and `runs/*/log.jsonl` remain ignored +by `/.weft/loomweave/.gitignore`. `loomweave.yaml` stays at the project +root (Loomweave's authoritative config; `weft.toml` is enrich-only and never +load-bearing — the §5 deletion test still holds). + +### Amendments to prior ADRs + +- **ADR-005** (`.loomweave/` tracking policy): the tracked-vs-ignored split is + retained verbatim; the directory is now `.weft/loomweave/`. +- **ADR-040** (semantic-search sidecar): `embeddings.db` now lives at + `.weft/loomweave/embeddings.db`. +- **ADR-044** (read-API ephemeral port): Loomweave publishes its own port to + `.weft/loomweave/ephemeral.port`; the loopback-only/port-only/atomic file + contract is otherwise unchanged. Cross-product consumers that read it (e.g. + Wardline) read the `.weft/` location only, matching this clean break. + +## Consequences + +### Positive + +- One `.weft/` tree per project instead of N sibling dotdirs; sibling subtrees + are co-located and each member owns exactly its own subtree. +- A single store helper eliminates the ~30 scattered `.join(".loomweave")` sites + and the drift they invited. +- Operators get a documented, member-private way to relocate the store without + editing Loomweave config (`weft.toml:[loomweave].store_dir`). + +### Negative + +- A clean break orphans any existing `.loomweave/` directory. Existing projects + must re-init (`loomweave install` then `loomweave analyze`) under + `.weft/loomweave/`; the old directory can be deleted. This repo's own + committed `.loomweave/` is removed as part of this change; downstream testbeds + (e.g. `lacuna`) need explicit re-init coverage so they are not silently + stranded. + +### Neutral + +- Reading `[loomweave].store_dir` does a small TOML parse on the store-path + resolution path. These are not hot paths (install, serve startup, analyze), + and the read is fail-soft. + +- The source-walk / secret-scan / pyright skip-lists exclude the whole `.weft/` + dotdir (the default store and all sibling subtrees). A `[loomweave].store_dir` + override pointing *outside* `.weft/` is **not** auto-excluded from those walks, + so an operator who relocates the store under the source tree may see the store + DB walked/scanned. The recommended override stays within `.weft/` (or outside + the analyzed root); per-override skip-list wiring is deferred until a concrete + need appears. + +## Related Decisions + +- [ADR-005](./ADR-005-loomweave-dir-tracking.md) — the tracking policy this ADR + relocates. +- [ADR-040](./ADR-040-semantic-search-embeddings.md) — embeddings sidecar path. +- [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) — the ephemeral + port file contract. + +## References + +- Weft federation doctrine `§5` (enrichment-not-load-bearing / deletion test) + and the `weft.toml` / `.weft//` config-store consolidation contract. +- Gate `weft-eb3dee402f` (C-4) — never add a writer to a shared multi-section + file; the reason `install`/`doctor` never write `weft.toml`. diff --git a/docs/loomweave/adr/README.md b/docs/loomweave/adr/README.md index f2c16778..f8200b7e 100644 --- a/docs/loomweave/adr/README.md +++ b/docs/loomweave/adr/README.md @@ -40,12 +40,13 @@ This folder is the canonical home for authored Loomweave architecture decision r | [ADR-037](./ADR-037-shared-error-vocabulary.md) | Shared error vocabulary (`loomweave-core::errors`) — two typed enums (`HttpErrorCode`, `McpErrorCode`) as single source of truth; wire spelling unchanged on both surfaces; relates to ADR-034 | Accepted | | [ADR-038](./ADR-038-sei-token-and-signature.md) | SEI token scheme (`loomweave:eid:`), signature schema (plugin-declared versioned JSON), and identity persistence (`sei_bindings` table, not an `entities` column); reserves the `loomweave:eid:` locator namespace; resolves SEI-standard REQ-C-01/REQ-C-02; demotes ADR-003 id to *locator* | Accepted | | [ADR-039](./ADR-039-llm-provider-pivot-openrouter-cli.md) | LLM provider pivot — OpenRouter (live HTTP) + Codex/Claude CLI bridges + recording provider; `CachingModel::OpenAiChatCompletions` (not Anthropic four-`cache_control`-breakpoint); supersedes CON-ANTHROPIC-01 | Accepted | -| [ADR-040](./ADR-040-semantic-search-embeddings.md) | Semantic search (`search_semantic`) — opt-in `EmbeddingProvider` trait (recording + API-endpoint impls), git-ignored `.loomweave/embeddings.db` sidecar keyed `(entity_id, content_hash, model_id)` (extends ADR-005's gitignore list), bounded exact cosine scan, policy-engine cost governance | Accepted | +| [ADR-040](./ADR-040-semantic-search-embeddings.md) | Semantic search (`search_semantic`) — opt-in `EmbeddingProvider` trait (recording + API-endpoint impls), git-ignored `.loomweave/embeddings.db` sidecar keyed `(entity_id, content_hash, model_id)` (extends ADR-005's gitignore list), bounded exact cosine scan, policy-engine cost governance | Accepted; sidecar path relocated by ADR-046 | | [ADR-041](./ADR-041-resume-is-idempotent-reemit.md) | Analyze resume is idempotent re-emit, not checkpoint recovery; amends ADR-005/ADR-011 resume language | Accepted | | [ADR-042](./ADR-042-hmac-freshness-and-replay-window.md) | HMAC freshness and replay window — timestamp + nonce headers, crate-backed HMAC, process-local replay cache | Accepted | | [ADR-043](./ADR-043-edge-reanalysis-replacement.md) | Edge reanalysis replacement — per-source-file anchored-edge replacement and edge metadata upsert; amends ADR-026 | Accepted | -| [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) | Read-API ephemeral port publication — `.loomweave/ephemeral.port` as a normative cross-product file contract (loopback-only, port-only, atomic) + consume-time resolution precedence (explicit > file > config > none), per-project deterministic port, installer stops pinning 9111; relates to ADR-034 | Accepted | +| [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) | Read-API ephemeral port publication — `.loomweave/ephemeral.port` as a normative cross-product file contract (loopback-only, port-only, atomic) + consume-time resolution precedence (explicit > file > config > none), per-project deterministic port, installer stops pinning 9111; relates to ADR-034 | Accepted; port path relocated by ADR-046 | | [ADR-045](./ADR-045-worktree-source-staleness.md) | Worktree-source staleness — `Staleness::StaleWorktree` + `worktree_dirty` via hardened, hash-free `git ls-files --others` scoped to ingested extensions; closes the unwatched-top-level-dir blind spot without `git status`'s filter-RCE vector; builds on ADR-013/021 untrusted-corpus posture | Accepted | +| [ADR-046](./ADR-046-weft-store-consolidation.md) | Weft store consolidation — store moves `.loomweave/` → `.weft/loomweave/` (clean break, single `loomweave_core::store` helper); operator-private `weft.toml:[loomweave].store_dir` override (read-only, fail-soft C-9c); Filigree sibling resolution prefers `.weft/filigree/` and tolerates legacy `.filigree/`; amends ADR-005/040/044 | Accepted | ## Backlog still tracked in the detailed design diff --git a/docs/operator/README.md b/docs/operator/README.md index 9ddbd310..4ecd746a 100644 --- a/docs/operator/README.md +++ b/docs/operator/README.md @@ -10,7 +10,7 @@ Practical notes for configuring and running Loomweave. - [Coding-agent LLM providers](./coding-agent-llm-providers.md) — Codex CLI and Claude CLI as local-login alternatives to API-key provider wiring. - [Runtime topology](./runtime-topology.md) — supported `loomweave serve` and - `loomweave analyze` concurrency against one `.loomweave/loomweave.db`. + `loomweave analyze` concurrency against one `.weft/loomweave/loomweave.db`. - [Secret scanning](./secret-scanning.md) — pre-ingest scanner behavior, baseline false-positive workflow, override confirmation, and audit queries. - [Guidance](./guidance.md) — authoring guidance sheets with the `loomweave diff --git a/docs/operator/clustering.md b/docs/operator/clustering.md index 1ce06146..98ff6c1f 100644 --- a/docs/operator/clustering.md +++ b/docs/operator/clustering.md @@ -2,7 +2,7 @@ Loomweave Phase 3 runs after plugin entity and edge extraction. It reads the persisted module dependency graph, clusters modules, and writes subsystem -entities plus `in_subsystem` edges back into `.loomweave/loomweave.db`. +entities plus `in_subsystem` edges back into `.weft/loomweave/loomweave.db`. ## Configuration diff --git a/docs/operator/getting-started.md b/docs/operator/getting-started.md index 1e2875a2..f2cfaf28 100644 --- a/docs/operator/getting-started.md +++ b/docs/operator/getting-started.md @@ -128,10 +128,10 @@ loomweave install loomweave analyze ``` -A bare `loomweave install` does everything: it initialises `.loomweave/`, installs +A bare `loomweave install` does everything: it initialises `.weft/loomweave/`, installs the agent-orientation assets, writes Claude Code MCP config, and upserts the Codex MCP config (see [§3](#agent-orientation-installed-by-default)). If -`.loomweave/` already exists, init is skipped and the other components are applied +`.weft/loomweave/` already exists, init is skipped and the other components are applied idempotently; pass `--force` to wipe and reinitialise the index. Expected output (abridged): @@ -149,7 +149,7 @@ analyze complete: run ok (entities=NNN, edges=MMM) ``` The first run on a tree of this size completes in well under a minute on -typical hardware. The result lives at `.loomweave/loomweave.db` (a single SQLite +typical hardware. The result lives at `.weft/loomweave/loomweave.db` (a single SQLite file) and is safe to commit to git — see [ADR-005](../loomweave/adr/ADR-005-loomweave-dir-tracking.md). @@ -193,7 +193,7 @@ Pick whichever you have; the questions in step 4 are client-agnostic. A bare `loomweave install` already bundles these for consult-mode agents. The component flags exist for explicit partial installs (e.g. adding the skill to a -project whose `.loomweave/` you do not want re-touched): +project whose `.weft/loomweave/` you do not want re-touched): ```bash loomweave install --claude-code --path /tmp/requests-2.32.4 # Claude Code MCP only @@ -329,7 +329,7 @@ Expected behaviour: - `loomweave analyze` exits **0** with run status `completed`. - A `LMWV-SEC-SECRET-DETECTED` finding lands in `findings` with the message `AwsAccessKeyId detected in /tmp/requests-2.32.4/.env:1`. Inspect with - `sqlite3 .loomweave/loomweave.db "SELECT rule_id, message FROM findings + `sqlite3 .weft/loomweave/loomweave.db "SELECT rule_id, message FROM findings WHERE rule_id LIKE 'LMWV-SEC%';"`. - The `.env` file itself has no language entities (it's not Python), so the finding is anchored to the core-minted file entity rather than a @@ -387,7 +387,7 @@ is never quarantined. Notarized release artifacts are on the post-1.0 roadmap. ### "secret_present" block fires on a real file -Add the file to `.loomweave/secrets-baseline.yaml` with a written justification +Add the file to `.weft/loomweave/secrets-baseline.yaml` with a written justification (the schema requires it). Full procedure: [secret-scanning.md](./secret-scanning.md). ### `summary` returns an error citing budget or LLM provider diff --git a/docs/operator/guidance.md b/docs/operator/guidance.md index bb1bf7ce..9fe8cfcf 100644 --- a/docs/operator/guidance.md +++ b/docs/operator/guidance.md @@ -14,7 +14,7 @@ promoted sheets reach consult agents through the `guidance_for` MCP read tool and are also composed into auto-generated `summary` prompts with a real `guidance_fingerprint` cache key. -All subcommands operate on `.loomweave/loomweave.db`, so **run `loomweave analyze` +All subcommands operate on `.weft/loomweave/loomweave.db`, so **run `loomweave analyze` first** — the CLI errors if the database is absent. ## Authoring workflow (`REQ-GUIDANCE-03`) diff --git a/docs/operator/loomweave-http-read-api.md b/docs/operator/loomweave-http-read-api.md index 2bfe07da..1fd03f3b 100644 --- a/docs/operator/loomweave-http-read-api.md +++ b/docs/operator/loomweave-http-read-api.md @@ -22,7 +22,7 @@ serve: The read-API port is auto-selected per project — a deterministic port in Loomweave's band (`9400–10399`, disjoint from Filigree's `8400–9399`) with an -ephemeral fallback — and published to `.loomweave/ephemeral.port` while `serve` +ephemeral fallback — and published to `.weft/loomweave/ephemeral.port` while `serve` runs. Set `serve.http.bind` explicitly only to pin a fixed port (ADR-044). When `identity_token_env` is configured, Loomweave refuses to start unless the env diff --git a/docs/operator/runtime-topology.md b/docs/operator/runtime-topology.md index 8a5590b3..fb7c61a3 100644 --- a/docs/operator/runtime-topology.md +++ b/docs/operator/runtime-topology.md @@ -1,6 +1,6 @@ # Runtime Topology -Loomweave stores project state in `.loomweave/loomweave.db`. The current v0.1 CLI +Loomweave stores project state in `.weft/loomweave/loomweave.db`. The current v0.1 CLI uses SQLite WAL mode with a 5 second `busy_timeout` on writer and reader connections. `loomweave analyze` opens one writer actor for ingest. `loomweave serve` always opens a reader pool, and opens its own writer actor only when LLM @@ -18,7 +18,7 @@ These storage settings are implementation constants today, not configurable ## Supported One `loomweave analyze` process and one `loomweave serve` process may run against -the same `.loomweave/loomweave.db`. `serve` reads use committed SQLite snapshots: +the same `.weft/loomweave/loomweave.db`. `serve` reads use committed SQLite snapshots: in-flight analyze writes are invisible until their transaction commits and a later read checks out a connection. If LLM-backed `serve` writes race with analyze ingest, SQLite serialises the writers and waits up to 5 seconds before @@ -39,16 +39,16 @@ snapshot for a review session. ## Unsupported Do not run multiple `loomweave analyze` processes against the same -`.loomweave/loomweave.db`. Loomweave has one writer actor per process, not one global +`.weft/loomweave/loomweave.db`. Loomweave has one writer actor per process, not one global writer across processes, so two analyze runs can contend at SQLite's single writer boundary and produce interleaved run state. Do not run `loomweave install --force` while either `loomweave analyze` or -`loomweave serve` is using the same project. `--force` replaces `.loomweave/`, so it +`loomweave serve` is using the same project. `--force` replaces `.weft/loomweave/`, so it is an offline maintenance operation. -Do not delete SQLite sidecar files, copy `.loomweave/loomweave.db` without its WAL -sidecars, or edit `.loomweave/` files while Loomweave is running. Stop the processes +Do not delete SQLite sidecar files, copy `.weft/loomweave/loomweave.db` without its WAL +sidecars, or edit `.weft/loomweave/` files while Loomweave is running. Stop the processes first, then copy or repair the store. ## Not Yet Shipped diff --git a/docs/operator/secret-scanning.md b/docs/operator/secret-scanning.md index 8d3d5ff3..04889856 100644 --- a/docs/operator/secret-scanning.md +++ b/docs/operator/secret-scanning.md @@ -10,7 +10,7 @@ Plugin source files and `.env` sidecars are scanned. If a plugin reports an enti ## Whitelist A False Positive -Add `.loomweave/secrets-baseline.yaml` and commit it with the source change: +Add `.weft/loomweave/secrets-baseline.yaml` and commit it with the source change: ```yaml version: "1.0" @@ -125,7 +125,7 @@ are marked by writing `briefing_blocked: ` into the file entity's `properties` JSON column. v1.1 will promote `briefing_blocked` to a typed column on `entities`; v1.0 carries it as a JSON property. -**A v1.0 binary opening a `.loomweave/loomweave.db` produced by a pre-WP5 +**A v1.0 binary opening a `.weft/loomweave/loomweave.db` produced by a pre-WP5 Loomweave binary will find no `briefing_blocked` properties on any row.** Pre-WP5 binaries never ran the scanner and never wrote the property; the 1.0 binary cannot retroactively discover which files contained secrets at diff --git a/plugins/python/src/loomweave_plugin_python/pyright_session.py b/plugins/python/src/loomweave_plugin_python/pyright_session.py index f8096f74..57b9fca4 100644 --- a/plugins/python/src/loomweave_plugin_python/pyright_session.py +++ b/plugins/python/src/loomweave_plugin_python/pyright_session.py @@ -91,7 +91,7 @@ class PyrightRunState: PYRIGHT_FILE_TIMEOUT_SECS = 3.0 STDERR_TAIL_LIMIT = 65536 PYRIGHT_EXCLUDE_PATTERNS = [ - "**/.loomweave/**", + "**/.weft/**", "**/.git/**", "**/.hg/**", "**/.svn/**", @@ -100,7 +100,7 @@ class PyrightRunState: "**/__pycache__/**", "**/node_modules/**", ] -PROJECT_LOCAL_EXTERNAL_DIRS = {".loomweave", ".git", ".hg", ".svn", ".jj", ".venv", "node_modules"} +PROJECT_LOCAL_EXTERNAL_DIRS = {".weft", ".git", ".hg", ".svn", ".jj", ".venv", "node_modules"} if TYPE_CHECKING: diff --git a/plugins/python/src/loomweave_plugin_python/wardline_descriptor.py b/plugins/python/src/loomweave_plugin_python/wardline_descriptor.py index e91d2b16..46f3d1db 100644 --- a/plugins/python/src/loomweave_plugin_python/wardline_descriptor.py +++ b/plugins/python/src/loomweave_plugin_python/wardline_descriptor.py @@ -4,7 +4,7 @@ Wardline remains authoritative for the vocabulary; Loomweave records only the source-observed decorator facts it can derive from that descriptor. -Two contract details below (``PROJECT_DESCRIPTOR_PATH`` and the descriptor +Two contract details below (``PROJECT_DESCRIPTOR_PATHS`` and the descriptor ``version`` semantics) are Loomweave-side assumptions pending Wardline's "Pre-Rust core hardening" Task B, which has not yet published the canonical project-local descriptor location or the ``schema: wardline.vocabulary/v1`` @@ -28,7 +28,18 @@ # location and descriptor-version semantics are not yet pinned by Wardline. # Tracked: filigree clarion-6ab5668d82. EXPECTED_DESCRIPTOR_VERSION = "wardline-generic-2" -PROJECT_DESCRIPTOR_PATH = Path(".wardline/vocabulary.yaml") + +# Weft store consolidation (ADR-046): sibling runtime state lives under the +# shared ``.weft//`` dotdir, so the Wardline descriptor is read only from +# the consolidated ``.weft/wardline/`` location. There is no fallback to the +# pre-consolidation ``.wardline/`` path: after the coordinated cutover every +# sibling is at ``.weft/`` by construction, so a descriptor found only on the +# legacy path means a mis-sequenced cutover; resolving it would silently bind a +# stale dir. Instead the project descriptor reads as absent and the loader falls +# through to the package descriptor (a loud, visible signal). Loomweave never +# writes a sibling's subtree — this is read-only. +WEFT_DESCRIPTOR_PATH = Path(".weft/wardline/vocabulary.yaml") +PROJECT_DESCRIPTOR_PATHS = (WEFT_DESCRIPTOR_PATH,) DescriptorSource = Literal["project", "package"] DescriptorStatus = Literal["enabled", "version_skew", "absent"] @@ -97,13 +108,17 @@ def load_wardline_descriptor(project_root: Path | None) -> WardlineDescriptorSta def _read_project_descriptor(project_root: Path | None) -> str | None: if project_root is None: return None - path = project_root / PROJECT_DESCRIPTOR_PATH - if not path.is_file(): - return None - try: - return path.read_text(encoding="utf-8") - except OSError: - return None + # Read only the consolidated .weft/wardline/ location (ADR-046); the + # pre-consolidation .wardline/ path is not consulted. + for relative in PROJECT_DESCRIPTOR_PATHS: + path = project_root / relative + if not path.is_file(): + continue + try: + return path.read_text(encoding="utf-8") + except OSError: + return None + return None def _read_package_descriptor() -> str | None: diff --git a/plugins/python/tests/test_server.py b/plugins/python/tests/test_server.py index b5274470..179936df 100644 --- a/plugins/python/tests/test_server.py +++ b/plugins/python/tests/test_server.py @@ -249,8 +249,8 @@ def bar(self): def test_initialize_project_descriptor_reports_wardline_enabled(tmp_path: Path) -> None: - descriptor = tmp_path / ".wardline" / "vocabulary.yaml" - descriptor.parent.mkdir() + descriptor = tmp_path / ".weft" / "wardline" / "vocabulary.yaml" + descriptor.parent.mkdir(parents=True) descriptor.write_text( """\ version: wardline-generic-2 @@ -305,8 +305,8 @@ def close(self) -> None: pass monkeypatch.setattr(server_module, "PyrightSession", FakePyrightSession, raising=False) - descriptor = tmp_path / ".wardline" / "vocabulary.yaml" - descriptor.parent.mkdir() + descriptor = tmp_path / ".weft" / "wardline" / "vocabulary.yaml" + descriptor.parent.mkdir(parents=True) descriptor.write_text( """\ version: wardline-generic-2 diff --git a/plugins/python/tests/test_wardline_descriptor.py b/plugins/python/tests/test_wardline_descriptor.py index 3f42b219..4215de42 100644 --- a/plugins/python/tests/test_wardline_descriptor.py +++ b/plugins/python/tests/test_wardline_descriptor.py @@ -44,8 +44,8 @@ def test_project_descriptor_wins_over_package_descriptor( tmp_path: Path, monkeypatch: Any, ) -> None: - project_descriptor = tmp_path / ".wardline" / "vocabulary.yaml" - project_descriptor.parent.mkdir() + project_descriptor = tmp_path / ".weft" / "wardline" / "vocabulary.yaml" + project_descriptor.parent.mkdir(parents=True) project_descriptor.write_text(_DESCRIPTOR, encoding="utf-8") package_descriptor = tmp_path / "package-vocabulary.yaml" package_descriptor.write_text( @@ -70,6 +70,66 @@ def test_project_descriptor_wins_over_package_descriptor( ] +def test_weft_descriptor_location_is_read(tmp_path: Path, monkeypatch: Any) -> None: + # ADR-046: the consolidated .weft/wardline/ location is read as a project + # descriptor (preferred over the package descriptor). + descriptor = tmp_path / ".weft" / "wardline" / "vocabulary.yaml" + descriptor.parent.mkdir(parents=True) + descriptor.write_text(_DESCRIPTOR, encoding="utf-8") + monkeypatch.setattr( + "loomweave_plugin_python.wardline_descriptor.metadata.files", + lambda _name: None, + ) + + state = load_wardline_descriptor(tmp_path) + + assert state.status == "enabled" + assert state.source == "project" + assert state.descriptor_version == EXPECTED_DESCRIPTOR_VERSION + + +def test_legacy_wardline_location_is_not_read(tmp_path: Path, monkeypatch: Any) -> None: + # ADR-046 clean break: a descriptor at only the pre-consolidation .wardline/ + # path is NOT read. With no package descriptor either, the loader degrades to + # absent — a loud signal of a mis-sequenced cutover, not a silent stale read. + legacy = tmp_path / ".wardline" / "vocabulary.yaml" + legacy.parent.mkdir(parents=True) + legacy.write_text(_DESCRIPTOR, encoding="utf-8") + monkeypatch.setattr( + "loomweave_plugin_python.wardline_descriptor.metadata.files", + lambda _name: None, + ) + + state = load_wardline_descriptor(tmp_path) + + assert state.status == "absent" + assert state.reason == "not_found" + assert state.vocabulary is None + + +def test_legacy_wardline_location_does_not_shadow_package(tmp_path: Path, monkeypatch: Any) -> None: + # A legacy .wardline/ descriptor is ignored, so the package descriptor (the + # next resolution rung) is what loads — the legacy file never wins. + legacy = tmp_path / ".wardline" / "vocabulary.yaml" + legacy.parent.mkdir(parents=True) + legacy.write_text( + _DESCRIPTOR.replace("wardline-generic-2", "wardline-generic-9"), + encoding="utf-8", + ) + package_descriptor = tmp_path / "package-vocabulary.yaml" + package_descriptor.write_text(_DESCRIPTOR, encoding="utf-8") + monkeypatch.setattr( + "loomweave_plugin_python.wardline_descriptor.metadata.files", + lambda name: [_FakePackagePath(package_descriptor)] if name == "wardline" else None, + ) + + state = load_wardline_descriptor(tmp_path) + + assert state.status == "enabled" + assert state.source == "package" + assert state.descriptor_version == EXPECTED_DESCRIPTOR_VERSION + + def test_package_descriptor_loads_without_importing_wardline( tmp_path: Path, monkeypatch: Any, @@ -116,8 +176,8 @@ def test_absent_descriptor_degrades_without_vocabulary(tmp_path: Path, monkeypat def test_invalid_descriptor_shape_degrades_to_absent(tmp_path: Path) -> None: - descriptor = tmp_path / ".wardline" / "vocabulary.yaml" - descriptor.parent.mkdir() + descriptor = tmp_path / ".weft" / "wardline" / "vocabulary.yaml" + descriptor.parent.mkdir(parents=True) descriptor.write_text("version: 3\nentries: nope\n", encoding="utf-8") state = load_wardline_descriptor(tmp_path) @@ -128,8 +188,8 @@ def test_invalid_descriptor_shape_degrades_to_absent(tmp_path: Path) -> None: def test_duplicate_canonical_names_degrade_to_absent(tmp_path: Path) -> None: - descriptor = tmp_path / ".wardline" / "vocabulary.yaml" - descriptor.parent.mkdir() + descriptor = tmp_path / ".weft" / "wardline" / "vocabulary.yaml" + descriptor.parent.mkdir(parents=True) descriptor.write_text( """\ version: wardline-generic-2 @@ -151,8 +211,8 @@ def test_duplicate_canonical_names_degrade_to_absent(tmp_path: Path) -> None: def test_version_skew_keeps_valid_vocabulary_with_degraded_confidence(tmp_path: Path) -> None: - descriptor = tmp_path / ".wardline" / "vocabulary.yaml" - descriptor.parent.mkdir() + descriptor = tmp_path / ".weft" / "wardline" / "vocabulary.yaml" + descriptor.parent.mkdir(parents=True) descriptor.write_text( _DESCRIPTOR.replace("wardline-generic-2", "wardline-generic-3"), encoding="utf-8", diff --git a/scripts/b4-gate-run.sh b/scripts/b4-gate-run.sh index 1fb7c6c8..f3c5b552 100755 --- a/scripts/b4-gate-run.sh +++ b/scripts/b4-gate-run.sh @@ -130,7 +130,7 @@ def run_cli(repo_root: Path, venv: Path, corpus_root: Path) -> tuple[int, dict[s subprocess.run(["loomweave", "install"], cwd=project, env=env, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) subprocess.run(["loomweave", "analyze", "."], cwd=project, env=env, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) wall_ms = int(round((time.perf_counter() - started) * 1000)) - db_path = project / ".loomweave" / "loomweave.db" + db_path = project / ".weft" / "loomweave" / "loomweave.db" with sqlite3.connect(db_path) as conn: row = conn.execute("select stats from runs where status = 'completed' order by started_at desc limit 1").fetchone() if row is None: diff --git a/tests/e2e/external-operator-smoke.sh b/tests/e2e/external-operator-smoke.sh index ff5f76ea..f5535081 100755 --- a/tests/e2e/external-operator-smoke.sh +++ b/tests/e2e/external-operator-smoke.sh @@ -124,10 +124,10 @@ fi cd corpus if "$LOOMWEAVE_BIN" install >/dev/null 2>"$WORK_DIR/install.err"; then - if [ -f .loomweave/loomweave.db ]; then - record "3" "PASS" ".loomweave/loomweave.db created against psf/requests@$CORPUS_REF" + if [ -f .weft/loomweave/loomweave.db ]; then + record "3" "PASS" ".weft/loomweave/loomweave.db created against psf/requests@$CORPUS_REF" else - record "3" "FAIL" "install reported success but .loomweave/loomweave.db missing" + record "3" "FAIL" "install reported success but .weft/loomweave/loomweave.db missing" fi else record "3" "FAIL" "loomweave install exited non-zero: $(tr '\n' ' ' < "$WORK_DIR/install.err")" @@ -137,8 +137,8 @@ fi log "running loomweave analyze ..." if "$LOOMWEAVE_BIN" analyze . >"$WORK_DIR/analyze1.out" 2>"$WORK_DIR/analyze1.err"; then - ENTITY_COUNT_1="$(sqlite3 .loomweave/loomweave.db 'SELECT COUNT(*) FROM entities WHERE kind != "subsystem"' || echo 0)" - EDGE_COUNT_1="$(sqlite3 .loomweave/loomweave.db 'SELECT COUNT(*) FROM edges' || echo 0)" + ENTITY_COUNT_1="$(sqlite3 .weft/loomweave/loomweave.db 'SELECT COUNT(*) FROM entities WHERE kind != "subsystem"' || echo 0)" + EDGE_COUNT_1="$(sqlite3 .weft/loomweave/loomweave.db 'SELECT COUNT(*) FROM edges' || echo 0)" if [ "$ENTITY_COUNT_1" -gt 0 ]; then record "4.1" "PASS" "analyze ok; entities=$ENTITY_COUNT_1 edges=$EDGE_COUNT_1" else @@ -191,7 +191,7 @@ def tool_call(proc, rid, name, args): return read_frame(proc) # Pick a real entity from the analyzed corpus to test against. -conn = sqlite3.connect(project_dir / ".loomweave" / "loomweave.db") +conn = sqlite3.connect(project_dir / ".weft" / "loomweave" / "loomweave.db") ent = conn.execute(""" SELECT id, kind, name FROM entities WHERE kind = 'function' AND name = 'get' @@ -255,7 +255,7 @@ try: if matches else f"FAIL: find_entity('{pattern}') returned empty (envelope: {fe_body})") # 4.3(b) callers_of — find a function with at least one caller. - conn2 = sqlite3.connect(project_dir / ".loomweave" / "loomweave.db") + conn2 = sqlite3.connect(project_dir / ".weft" / "loomweave" / "loomweave.db") row = conn2.execute(""" SELECT e.id, COUNT(*) AS c FROM entities e JOIN edges ed ON ed.to_id = e.id @@ -328,8 +328,8 @@ fi log "running loomweave analyze (re-run for idempotency) ..." if "$LOOMWEAVE_BIN" analyze . >"$WORK_DIR/analyze2.out" 2>"$WORK_DIR/analyze2.err"; then - ENTITY_COUNT_2="$(sqlite3 .loomweave/loomweave.db 'SELECT COUNT(*) FROM entities WHERE kind != "subsystem"')" - EDGE_COUNT_2="$(sqlite3 .loomweave/loomweave.db 'SELECT COUNT(*) FROM edges')" + ENTITY_COUNT_2="$(sqlite3 .weft/loomweave/loomweave.db 'SELECT COUNT(*) FROM entities WHERE kind != "subsystem"')" + EDGE_COUNT_2="$(sqlite3 .weft/loomweave/loomweave.db 'SELECT COUNT(*) FROM edges')" if [ "$ENTITY_COUNT_2" = "$ENTITY_COUNT_1" ] && [ "$EDGE_COUNT_2" = "$EDGE_COUNT_1" ]; then record "5" "PASS" "idempotent: entities=$ENTITY_COUNT_2 edges=$EDGE_COUNT_2 unchanged" else @@ -351,8 +351,8 @@ ANALYZE_3_EXIT=0 "$LOOMWEAVE_BIN" analyze . >"$WORK_DIR/analyze3.out" 2>"$WORK_DIR/analyze3.err" || ANALYZE_3_EXIT=$? # Expected: soft-failure (exit 78) or success with briefing_blocked recorded. -BLOCKED_COUNT="$(sqlite3 .loomweave/loomweave.db "SELECT COUNT(*) FROM entities WHERE json_extract(properties, '\$.briefing_blocked') IS NOT NULL" 2>/dev/null || echo 0)" -FINDING_COUNT="$(sqlite3 .loomweave/loomweave.db "SELECT COUNT(*) FROM findings WHERE rule_id = 'LMWV-SEC-SECRET-DETECTED'" 2>/dev/null || echo 0)" +BLOCKED_COUNT="$(sqlite3 .weft/loomweave/loomweave.db "SELECT COUNT(*) FROM entities WHERE json_extract(properties, '\$.briefing_blocked') IS NOT NULL" 2>/dev/null || echo 0)" +FINDING_COUNT="$(sqlite3 .weft/loomweave/loomweave.db "SELECT COUNT(*) FROM findings WHERE rule_id = 'LMWV-SEC-SECRET-DETECTED'" 2>/dev/null || echo 0)" if [ "$BLOCKED_COUNT" -gt 0 ] && [ "$FINDING_COUNT" -gt 0 ]; then record "6" "PASS" "post-plant: $BLOCKED_COUNT blocked entities, $FINDING_COUNT secret findings (analyze exit $ANALYZE_3_EXIT)" @@ -368,7 +368,7 @@ fi if [ "$BLOCKED_COUNT" -gt 0 ]; then log "verifying blocked-entity summary refusal ..." - BLOCKED_ID="$(sqlite3 .loomweave/loomweave.db "SELECT id FROM entities WHERE json_extract(properties, '\$.briefing_blocked') IS NOT NULL LIMIT 1")" + BLOCKED_ID="$(sqlite3 .weft/loomweave/loomweave.db "SELECT id FROM entities WHERE json_extract(properties, '\$.briefing_blocked') IS NOT NULL LIMIT 1")" if [ -n "$BLOCKED_ID" ]; then set +e python3 - "$LOOMWEAVE_BIN" "$WORK_DIR/corpus" "$BLOCKED_ID" "$WORK_DIR/step7.json" <<'PY' diff --git a/tests/e2e/phase3_subsystems.sh b/tests/e2e/phase3_subsystems.sh index bba8939d..2ef08cae 100755 --- a/tests/e2e/phase3_subsystems.sh +++ b/tests/e2e/phase3_subsystems.sh @@ -120,8 +120,8 @@ subsystem_signature() { run_analyze "$PROJECT_A" run_analyze "$PROJECT_B" -DB_A="$PROJECT_A/.loomweave/loomweave.db" -DB_B="$PROJECT_B/.loomweave/loomweave.db" +DB_A="$PROJECT_A/.weft/loomweave/loomweave.db" +DB_B="$PROJECT_B/.weft/loomweave/loomweave.db" log "verifying subsystem rows ..." SUBSYSTEM_COUNT=$(sqlite3 "$DB_A" "SELECT COUNT(*) FROM entities WHERE kind = 'subsystem';") @@ -174,7 +174,7 @@ from pathlib import Path loomweave_bin = Path(sys.argv[1]) project_dir = Path(sys.argv[2]) -conn = sqlite3.connect(project_dir / ".loomweave" / "loomweave.db") +conn = sqlite3.connect(project_dir / ".weft" / "loomweave" / "loomweave.db") subsystem_id = conn.execute( "SELECT id FROM entities WHERE kind = 'subsystem' ORDER BY id LIMIT 1" ).fetchone()[0] diff --git a/tests/e2e/sprint_1_walking_skeleton.sh b/tests/e2e/sprint_1_walking_skeleton.sh index a101844a..c821250c 100755 --- a/tests/e2e/sprint_1_walking_skeleton.sh +++ b/tests/e2e/sprint_1_walking_skeleton.sh @@ -2,9 +2,9 @@ # Sprint 1 walking-skeleton end-to-end demo (WP3 Task 9 / signoffs A.4). # # Runs the README §3 demo script end-to-end and verifies: -# - `loomweave install` creates `.loomweave/loomweave.db` +# - `loomweave install` creates `.weft/loomweave/loomweave.db` # - `loomweave analyze .` spawns the Python plugin and persists at least one entity -# - `sqlite3 .loomweave/loomweave.db` returns Python module/function entities +# - `sqlite3 .weft/loomweave/loomweave.db` returns Python module/function entities # - Python function rows include source path, source line range, and content hash # - resolved and ambiguous calls edges are persisted end-to-end # - resolved references edges are persisted end-to-end @@ -82,7 +82,7 @@ export PATH="$REPO_ROOT/target/release:$VENV/bin:$PATH" # ── 5. loomweave install ─────────────────────────────────────────────────────── log "running: loomweave install" loomweave install -[ -f "$DEMO_DIR/.loomweave/loomweave.db" ] || fail ".loomweave/loomweave.db not created by loomweave install" +[ -f "$DEMO_DIR/.weft/loomweave/loomweave.db" ] || fail ".weft/loomweave/loomweave.db not created by loomweave install" # ── 6. loomweave analyze ─────────────────────────────────────────────────────── log "running: loomweave analyze ." @@ -90,7 +90,7 @@ loomweave analyze . # ── 6b. Verify database integrity (STO-04) ─────────────────────────────────── log "verifying database integrity via PRAGMA integrity_check ..." -INTEGRITY=$(sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" "PRAGMA integrity_check;") +INTEGRITY=$(sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" "PRAGMA integrity_check;") if [ "$INTEGRITY" != "ok" ]; then log "integrity_check output:" printf '%s\n' "$INTEGRITY" >&2 @@ -99,7 +99,7 @@ fi # ── 7. Verify entity via sqlite3 ───────────────────────────────────────────── log "verifying persisted entity via sqlite3 ..." -RESULT=$(sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" "select id, kind from entities order by id;") +RESULT=$(sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" "select id, kind from entities order by id;") # B.2 (Sprint 2): every analyzed file emits a module entity in addition to # its function/class entities. v1.0 also mints a core file entity for file # identity and federation reads. B.4* adds direct and dict-dispatch call sites. @@ -115,47 +115,47 @@ python:module:demo|module" if [ "$RESULT" != "$EXPECTED" ]; then log "DB contents:" - sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" "select * from entities;" >&2 || true + sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" "select * from entities;" >&2 || true message=$(printf 'expected exactly:\n%s\ngot:\n%s' "$EXPECTED" "$RESULT") fail "$message" fi # ── 8. Verify source metadata for MCP entity_at/summary cache (B.6a) ───────── log "verifying persisted Python function source metadata ..." -SOURCE_METADATA=$(sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ +SOURCE_METADATA=$(sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select source_file_id, source_file_path, source_line_start, source_line_end, length(content_hash) from entities where id = 'python:function:demo.hello';") SOURCE_METADATA_EXPECTED="core:file:demo.py|$DEMO_DIR/demo.py|10|11|64" if [ "$SOURCE_METADATA" != "$SOURCE_METADATA_EXPECTED" ]; then log "DB entity source metadata:" - sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ + sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select id, source_file_id, source_file_path, source_line_start, source_line_end, content_hash from entities order by id;" >&2 || true fail "expected Python function source metadata:\n$SOURCE_METADATA_EXPECTED\ngot:\n$SOURCE_METADATA" fi log "verifying core file anchor metadata and module parent chain ..." -FILE_ANCHOR=$(sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ +FILE_ANCHOR=$(sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select id, plugin_id, kind, name, source_file_id, source_file_path, json_extract(properties, '\$.language'), length(content_hash) from entities where id = 'core:file:demo.py';") FILE_ANCHOR_EXPECTED="core:file:demo.py|core|file|demo.py||$DEMO_DIR/demo.py|python|64" if [ "$FILE_ANCHOR" != "$FILE_ANCHOR_EXPECTED" ]; then log "DB file anchor metadata:" - sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ + sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select id, plugin_id, kind, name, parent_id, source_file_id, source_file_path, properties, content_hash from entities order by id;" >&2 || true fail "expected core file anchor metadata:\n$FILE_ANCHOR_EXPECTED\ngot:\n$FILE_ANCHOR" fi -MODULE_PARENT=$(sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ +MODULE_PARENT=$(sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select parent_id, source_file_id from entities where id = 'python:module:demo';") MODULE_PARENT_EXPECTED="core:file:demo.py|core:file:demo.py" if [ "$MODULE_PARENT" != "$MODULE_PARENT_EXPECTED" ]; then log "DB module parent/source metadata:" - sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ + sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select id, parent_id, source_file_id from entities order by id;" >&2 || true fail "expected module parent/source metadata:\n$MODULE_PARENT_EXPECTED\ngot:\n$MODULE_PARENT" fi # ── 9. Verify contains edge via sqlite3 (B.3) ──────────────────────────────── log "verifying persisted contains edge via sqlite3 ..." -EDGE_RESULT=$(sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ +EDGE_RESULT=$(sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select kind, from_id, to_id from edges where kind = 'contains' order by from_id, to_id;") EDGE_EXPECTED="contains|core:file:demo.py|python:module:demo contains|python:module:demo|python:class:demo.Marker @@ -167,85 +167,85 @@ contains|python:module:demo|python:function:demo.z_fallback" if [ "$EDGE_RESULT" != "$EDGE_EXPECTED" ]; then log "DB edge contents:" - sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" "select * from edges;" >&2 || true + sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" "select * from edges;" >&2 || true fail "expected edge row:\n$EDGE_EXPECTED\ngot:\n$EDGE_RESULT" fi # ── 10. Verify run stats include B.3 + B.4* + B.5* edges ──────────────────── log "verifying run stats include edges_inserted >= 10 ..." -EDGES_INSERTED=$(sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ +EDGES_INSERTED=$(sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select json_extract(stats, '\$.edges_inserted') from runs where status = 'completed';") if [ "$EDGES_INSERTED" -lt 10 ]; then log "runs row:" - sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" "select id, status, stats from runs;" >&2 || true + sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" "select id, status, stats from runs;" >&2 || true fail "expected runs.stats.edges_inserted >= 10; got $EDGES_INSERTED" fi # ── 11. Verify dropped_edges_total == 0 (B.3 §6 / §9 exit criterion 6) ────── log "verifying run stats include dropped_edges_total == 0 ..." -DROPPED=$(sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ +DROPPED=$(sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select json_extract(stats, '\$.dropped_edges_total') from runs where status = 'completed';") if [ "$DROPPED" != "0" ]; then log "runs row:" - sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" "select id, status, stats from runs;" >&2 || true + sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" "select id, status, stats from runs;" >&2 || true fail "expected runs.stats.dropped_edges_total == 0; got $DROPPED" fi # ── 12. Verify resolved + ambiguous calls edges (B.4*) ────────────────────── log "verifying persisted resolved calls edge ..." -RESOLVED_CALLS=$(sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ +RESOLVED_CALLS=$(sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select count(*) from edges where kind = 'calls' and confidence = 'resolved';") if [ "$RESOLVED_CALLS" -lt 1 ]; then log "DB edge contents:" - sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" "select kind, from_id, to_id, confidence, properties from edges order by kind, from_id, to_id;" >&2 || true + sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" "select kind, from_id, to_id, confidence, properties from edges order by kind, from_id, to_id;" >&2 || true fail "expected at least one resolved calls edge; got $RESOLVED_CALLS" fi log "verifying persisted ambiguous calls edge with properties.candidates ..." -AMBIGUOUS_WITH_CANDIDATES=$(sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ +AMBIGUOUS_WITH_CANDIDATES=$(sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select count(*) from edges where kind = 'calls' and confidence = 'ambiguous' and json_type(properties, '\$.candidates') = 'array';") if [ "$AMBIGUOUS_WITH_CANDIDATES" -lt 1 ]; then log "DB edge contents:" - sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" "select kind, from_id, to_id, confidence, properties from edges order by kind, from_id, to_id;" >&2 || true + sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" "select kind, from_id, to_id, confidence, properties from edges order by kind, from_id, to_id;" >&2 || true fail "expected at least one ambiguous calls edge with properties.candidates; got $AMBIGUOUS_WITH_CANDIDATES" fi log "verifying run stats include ambiguous_edges_total >= 1 ..." -AMBIGUOUS_EDGES=$(sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ +AMBIGUOUS_EDGES=$(sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select json_extract(stats, '\$.ambiguous_edges_total') from runs where status = 'completed';") if [ "$AMBIGUOUS_EDGES" -lt 1 ]; then log "runs row:" - sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" "select id, status, stats from runs;" >&2 || true + sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" "select id, status, stats from runs;" >&2 || true fail "expected runs.stats.ambiguous_edges_total >= 1; got $AMBIGUOUS_EDGES" fi log "verifying run stats include unresolved_call_sites_total == 0 ..." -UNRESOLVED_CALL_SITES=$(sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ +UNRESOLVED_CALL_SITES=$(sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select json_extract(stats, '\$.unresolved_call_sites_total') from runs where status = 'completed';") if [ "$UNRESOLVED_CALL_SITES" != "0" ]; then log "runs row:" - sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" "select id, status, stats from runs;" >&2 || true + sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" "select id, status, stats from runs;" >&2 || true fail "expected runs.stats.unresolved_call_sites_total == 0; got $UNRESOLVED_CALL_SITES" fi # ── 13. Verify resolved references edges (B.5*) ───────────────────────────── log "verifying persisted resolved references edges ..." -RESOLVED_REFERENCES=$(sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ +RESOLVED_REFERENCES=$(sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select count(*) from edges where kind = 'references' and confidence = 'resolved';") if [ "$RESOLVED_REFERENCES" -lt 2 ]; then log "DB edge contents:" - sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" "select kind, from_id, to_id, confidence, properties from edges order by kind, from_id, to_id;" >&2 || true + sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" "select kind, from_id, to_id, confidence, properties from edges order by kind, from_id, to_id;" >&2 || true fail "expected at least two resolved references edges; got $RESOLVED_REFERENCES" fi log "verifying run stats include reference resolver counters ..." -REFERENCE_SITES=$(sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ +REFERENCE_SITES=$(sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select json_extract(stats, '\$.reference_sites_total') from runs where status = 'completed';") -REFERENCES_RESOLVED=$(sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" \ +REFERENCES_RESOLVED=$(sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" \ "select json_extract(stats, '\$.references_resolved_total') from runs where status = 'completed';") if [ "$REFERENCE_SITES" -lt 2 ] || [ "$REFERENCES_RESOLVED" -lt 2 ]; then log "runs row:" - sqlite3 "$DEMO_DIR/.loomweave/loomweave.db" "select id, status, stats from runs;" >&2 || true + sqlite3 "$DEMO_DIR/.weft/loomweave/loomweave.db" "select id, status, stats from runs;" >&2 || true fail "expected reference_sites_total and references_resolved_total >= 2; got sites=$REFERENCE_SITES resolved=$REFERENCES_RESOLVED" fi diff --git a/tests/e2e/sprint_2_mcp_surface.sh b/tests/e2e/sprint_2_mcp_surface.sh index 58c86cd5..f56f36b3 100755 --- a/tests/e2e/sprint_2_mcp_surface.sh +++ b/tests/e2e/sprint_2_mcp_surface.sh @@ -65,7 +65,7 @@ export PATH="$REPO_ROOT/target/release:$VENV/bin:$PATH" log "running: loomweave install" loomweave install -[ -f "$DEMO_DIR/.loomweave/loomweave.db" ] || fail ".loomweave/loomweave.db not created" +[ -f "$DEMO_DIR/.weft/loomweave/loomweave.db" ] || fail ".weft/loomweave/loomweave.db not created" log "running: loomweave analyze ." loomweave analyze . @@ -119,7 +119,7 @@ def assert_tool_ok(response: dict[str, object]) -> dict[str, object]: return envelope -conn = sqlite3.connect(project_dir / ".loomweave" / "loomweave.db") +conn = sqlite3.connect(project_dir / ".weft" / "loomweave" / "loomweave.db") world_hash = conn.execute( "SELECT content_hash FROM entities WHERE id = ?", ("python:function:demo.world",), @@ -180,7 +180,7 @@ recording_fixture = [ }, } ] -(project_dir / ".loomweave" / "openrouter-recording.json").write_text( +(project_dir / ".weft" / "openrouter-recording.json").write_text( json.dumps(recording_fixture, separators=(",", ":")), encoding="utf-8", ) @@ -255,7 +255,7 @@ llm_policy: provider: recording model_id: anthropic/claude-sonnet-4.6 session_token_ceiling: 1000000 - recording_fixture_path: .loomweave/openrouter-recording.json + recording_fixture_path: .weft/loomweave/openrouter-recording.json serve: mcp: enable_write_tools: true diff --git a/tests/e2e/wp5_secret_scan.sh b/tests/e2e/wp5_secret_scan.sh index 6e4b62e4..84fd74d5 100755 --- a/tests/e2e/wp5_secret_scan.sh +++ b/tests/e2e/wp5_secret_scan.sh @@ -98,7 +98,7 @@ printf "aws_access_key_id = 'AKIAIOSFODNN7EXAMPLE'\n" > "$DEMO_DIR/.env" PATH="$PLUGIN_DIR" "$LOOMWEAVE_BIN" analyze "$DEMO_DIR" -DB="$DEMO_DIR/.loomweave/loomweave.db" +DB="$DEMO_DIR/.weft/loomweave/loomweave.db" BLOCKED=$(sqlite3 "$DB" "select count(*) from entities where json_extract(properties, '\$.briefing_blocked') = 'secret_present';") [ "$BLOCKED" = "3" ] || fail "expected three briefing_blocked secret_present entities (core file + plugin source entity + dotenv anchor), got $BLOCKED" @@ -122,7 +122,7 @@ trap 'rm -rf "$DEMO_DIR" "$PLUGIN_DIR" "$BASELINE_DIR" "$OVERRIDE_DIR" "$MALFORM "$LOOMWEAVE_BIN" install --path "$BASELINE_DIR" printf "aws_access_key_id = 'AKIAIOSFODNN7EXAMPLE'\n" > "$BASELINE_DIR/leaky.sec" HASHED_SECRET=$(printf 'AKIAIOSFODNN7EXAMPLE' | sha1sum | awk '{print $1}') -cat > "$BASELINE_DIR/.loomweave/secrets-baseline.yaml" < "$BASELINE_DIR/.weft/loomweave/secrets-baseline.yaml" < "$MALFORMED_DIR/clean.sec" -printf "not: valid: yaml: [\n" > "$MALFORMED_DIR/.loomweave/secrets-baseline.yaml" +printf "not: valid: yaml: [\n" > "$MALFORMED_DIR/.weft/loomweave/secrets-baseline.yaml" set +e PATH="$PLUGIN_DIR" "$LOOMWEAVE_BIN" analyze "$MALFORMED_DIR" 2>/dev/null MALFORMED_EXIT=$? set -e [ "$MALFORMED_EXIT" -ne 0 ] || fail "malformed baseline: expected non-zero exit, got $MALFORMED_EXIT" -MDB="$MALFORMED_DIR/.loomweave/loomweave.db" +MDB="$MALFORMED_DIR/.weft/loomweave/loomweave.db" M_RUNS=$(sqlite3 "$MDB" "select count(*) from runs;") [ "$M_RUNS" = "0" ] || fail "malformed baseline must abort BEFORE BeginRun; got $M_RUNS run rows" log "PASS: malformed baseline aborts with non-zero exit and no runs row" @@ -195,11 +195,11 @@ trap 'rm -rf "$DEMO_DIR" "$PLUGIN_DIR" "$BASELINE_DIR" "$OVERRIDE_DIR" "$MALFORM printf "aws_access_key_id = 'AKIAIOSFODNN7EXAMPLE'\n" > "$RETRY_DIR/leaky.sec" # First run blocks (no baseline yet). PATH="$PLUGIN_DIR" "$LOOMWEAVE_BIN" analyze "$RETRY_DIR" -RDB="$RETRY_DIR/.loomweave/loomweave.db" +RDB="$RETRY_DIR/.weft/loomweave/loomweave.db" R_BLOCKED_BEFORE=$(sqlite3 "$RDB" "select count(*) from entities where json_extract(properties, '\$.briefing_blocked') = 'secret_present';") [ "$R_BLOCKED_BEFORE" = "2" ] || fail "retry: first run must block the source file entity and plugin entity, got $R_BLOCKED_BEFORE" # Operator commits a baseline acknowledging the example key. -cat > "$RETRY_DIR/.loomweave/secrets-baseline.yaml" < "$RETRY_DIR/.weft/loomweave/secrets-baseline.yaml" < QueryTargets: - db_path = project / ".loomweave" / "loomweave.db" + db_path = project / ".weft" / "loomweave" / "loomweave.db" conn = sqlite3.connect(db_path) conn.row_factory = sqlite3.Row try: @@ -786,7 +786,7 @@ def build_parser() -> argparse.ArgumentParser: "--project", type=Path, required=True, - help="Analyzed project root containing .loomweave/loomweave.db", + help="Analyzed project root containing .weft/loomweave/loomweave.db", ) parser.add_argument( "--loomweave-bin", type=Path, default=Path("target/release/loomweave") diff --git a/wardline.yaml b/wardline.yaml index 520ef8cf..5b8f3267 100644 --- a/wardline.yaml +++ b/wardline.yaml @@ -2,6 +2,6 @@ filigree: url: http://127.0.0.1:8542/api/weft/scan-results loomweave: # ADR-044: pinned to this project's deterministic read-API port. The published - # .loomweave/ephemeral.port overrides this once Wardline resolves consume-time + # .weft/loomweave/ephemeral.port overrides this once Wardline resolves consume-time # (clarion-7f574bc34f follow-up). Until then this keeps local wardline->loomweave working. url: http://127.0.0.1:10196 From c4e19fb6a93897cfbbf1c57645365abb0fdd2eb9 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sun, 7 Jun 2026 11:05:05 +1000 Subject: [PATCH 36/60] test(ci): bound nextest slow-timeout so `--workspace` completes instead of hanging (weft-9b8154e19c) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pre-existing Filigree-emission tests tracked by clarion-1d405be546 hang indefinitely, so the literal CI command `cargo nextest run --workspace --all-features` (CLAUDE.md "Build, test, lint") could never run to a verdict — forcing a "green via family-exclusion" caveat. Add a `slow-timeout` cap in `.config/nextest.toml` (`period = 60s, terminate-after = 3`): any test past the bound is terminated and reported as a timeout failure. The suite now COMPLETES (1247 passed; 4 emission tests fail fast, 4 hangs terminate at 180s — all 8 are clarion-1d405be546) instead of hanging forever. The cap does NOT fix clarion-1d405be546 (its owner's bug) — it makes the suite honestly red and fast. The 60s period sits well above the slowest legitimate test (none flagged SLOW in a full run) with headroom for slower CI runners. Co-Authored-By: Claude Opus 4.8 (1M context) --- .config/nextest.toml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.config/nextest.toml b/.config/nextest.toml index 585efb1c..87efd250 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -1,3 +1,18 @@ +# Bound every test's wall-clock so `cargo nextest run --workspace` always +# *completes* (and reports) instead of hanging. The pre-existing emission tests +# tracked by clarion-1d405be546 hang indefinitely; without a cap the literal CI +# command (CLAUDE.md "Build, test, lint") can never run clean, forcing a +# "green via family-exclusion" caveat. With the cap, a hung test is terminated +# and reported as a timeout failure — honestly red, fast — and clarion-1d405be546 +# remains its owner's bug to fix (the cap does not fix it). +# +# `terminate-after` counts `period`s: a test slow past `period` is warned (SLOW), +# and killed after `period * terminate-after`. The period sits well above the +# slowest legitimate test (the single-threaded serve-http integration tests) with +# CI-runner headroom, so only a genuine hang trips it. +[profile.default] +slow-timeout = { period = "60s", terminate-after = 3 } + [test-groups] serve-http = { max-threads = 1 } From 72544ddb411f75e521f6190624c76a9225981ea6 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sun, 7 Jun 2026 18:14:29 +1000 Subject: [PATCH 37/60] fix(cli): analyze summary reports whole-graph totals + incremental marker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `analyze complete` line reported this run's insert delta, not the graph size. On an incremental run that skips unchanged files the delta is just the phase3 subsystems re-emitted (e.g. "4 entities, 16 edges"), which reads as the graph having shrunk. Report the whole-graph totals instead — the same numbers project_status and the session-start hook show, via identical COUNT(*) SQL — and annotate incremental runs with the unchanged-file skip count. Adds entity_total/subsystem_total/edge_total storage helpers and a pure format_analyze_complete formatter, both test-driven. Falls back to the run delta only if the post-commit count read fails. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/analyze.rs | 100 +++++++++++++++++- crates/loomweave-storage/src/lib.rs | 15 +-- crates/loomweave-storage/src/query.rs | 26 +++++ .../loomweave-storage/tests/query_helpers.rs | 31 ++++-- 4 files changed, 155 insertions(+), 17 deletions(-) diff --git a/crates/loomweave-cli/src/analyze.rs b/crates/loomweave-cli/src/analyze.rs index a6a3397e..2d264ae5 100644 --- a/crates/loomweave-cli/src/analyze.rs +++ b/crates/loomweave-cli/src/analyze.rs @@ -1513,10 +1513,39 @@ pub(crate) async fn run_with_options(project_path: PathBuf, options: AnalyzeOpti bail!("analyze run {run_id} failed — {reason}"); } - println!( - "analyze complete: run {run_id} completed \ - ({total_entity_count} entities, {total_edge_count} edges)" - ); + // Report the WHOLE-GRAPH totals (the same numbers `project_status` and the + // session-start hook show), not this run's insert delta. The delta is + // misleadingly small on incremental runs that skip unchanged files — it + // counts only the phase3 subsystems re-emitted — so an operator could read + // it as the graph having shrunk. Fall back to the run delta only if the + // post-commit count read fails, so a cosmetic hiccup never masks a + // successful run. + let run_delta_summary = || { + format!( + "analyze complete: run {run_id} completed \ + ({total_entity_count} entities, {total_edge_count} edges)" + ) + }; + let summary = match Connection::open(&db_path) { + Ok(conn) => match ( + loomweave_storage::entity_total(&conn), + loomweave_storage::subsystem_total(&conn), + loomweave_storage::edge_total(&conn), + ) { + (Ok(entities), Ok(subsystems), Ok(edges)) => { + format_analyze_complete(&run_id, entities, subsystems, edges, skipped_files_total) + } + _ => run_delta_summary(), + }, + Err(err) => { + tracing::warn!( + error = %err, + "analyze complete: graph-total read failed; reporting run delta" + ); + run_delta_summary() + } + }; + println!("{summary}"); Ok(()) } @@ -1545,6 +1574,33 @@ struct PlannedSeiWrite { /// there and violate the `ux_sei_alive_locator` partial unique index. The /// `BTreeMap` also yields the deterministic, locator-sorted processing order the /// cross-entity carry dedup in [`run_sei_mint_pass`] relies on. +/// Render the operator-facing `analyze complete` summary line. +/// +/// Reports the **whole-graph** totals (entities incl. subsystems, edges) — the +/// same numbers `project_status` and the session-start hook show — rather than +/// the per-run insert delta, which is misleadingly small on incremental runs +/// that skip unchanged files. When unchanged files were skipped, the line is +/// annotated so an operator does not mistake a fast incremental pass for a graph +/// that shrank. +fn format_analyze_complete( + run_id: &str, + entities: i64, + subsystems: i64, + edges: i64, + skipped_files: u64, +) -> String { + let incremental = if skipped_files > 0 { + let noun = if skipped_files == 1 { "file" } else { "files" }; + format!("; incremental: {skipped_files} unchanged {noun} skipped") + } else { + String::new() + }; + format!( + "analyze complete: run {run_id} completed \ + (graph: {entities} entities incl. {subsystems} subsystems, {edges} edges{incremental})" + ) +} + fn dedup_descriptors_by_locator(descriptors: Vec) -> Vec { descriptors .into_iter() @@ -5267,6 +5323,42 @@ mod tests { assert_eq!(f.signature.as_deref(), Some("s1")); } + #[test] + fn analyze_complete_full_run_reports_whole_graph_totals() { + // A full run (no unchanged files skipped) reports the graph totals with + // the subsystem breakdown, matching `project_status` phrasing. + let line = format_analyze_complete("run-1", 263, 5, 496, 0); + assert_eq!( + line, + "analyze complete: run run-1 completed \ + (graph: 263 entities incl. 5 subsystems, 496 edges)" + ); + } + + #[test] + fn analyze_complete_incremental_run_annotates_skipped_files() { + // An incremental run that skipped unchanged files reports the SAME graph + // totals (not the tiny insert delta) plus an explicit incremental marker. + let line = format_analyze_complete("run-2", 263, 5, 496, 29); + assert_eq!( + line, + "analyze complete: run run-2 completed \ + (graph: 263 entities incl. 5 subsystems, 496 edges; \ + incremental: 29 unchanged files skipped)" + ); + } + + #[test] + fn analyze_complete_incremental_singular_file_uses_singular_noun() { + let line = format_analyze_complete("run-3", 10, 0, 4, 1); + assert_eq!( + line, + "analyze complete: run run-3 completed \ + (graph: 10 entities incl. 0 subsystems, 4 edges; \ + incremental: 1 unchanged file skipped)" + ); + } + #[test] fn progress_reporter_is_noop_without_a_path() { // No progress file → no panics, no writes; the normal CLI path. diff --git a/crates/loomweave-storage/src/lib.rs b/crates/loomweave-storage/src/lib.rs index c3ff35cc..fd3c9510 100644 --- a/crates/loomweave-storage/src/lib.rs +++ b/crates/loomweave-storage/src/lib.rs @@ -52,13 +52,14 @@ pub use query::{ ReferenceEdgeMatch, ResolvedFile, ResolvedFileCatalogEntry, RolledUpReferenceEdge, SubsystemMember, UnresolvedCallSiteRow, ancestor_chain, call_edges_from, call_edges_targeting, candidate_entities_for_unresolved_sites, child_entity_ids, contained_entity_ids, - containing_module_id, current_file_hash, entities_by_churn, entities_by_kind, entities_by_tag, - entities_containing_line, entities_with_wardline_facts, entity_at_line, - entity_briefing_block_reason, entity_by_id, entity_visibility, existing_entity_ids, - find_entities, findings_for_emit, import_edges_for_entity, module_dependency_edges, - module_reference_rollup, normalize_source_path, reference_edges_for_entity, resolve_file, - resolve_file_catalog_entry, subsystem_for_member, subsystem_members, subsystem_of_entity, - unresolved_call_sites_for_caller, unresolved_callers_for_target, + containing_module_id, current_file_hash, edge_total, entities_by_churn, entities_by_kind, + entities_by_tag, entities_containing_line, entities_with_wardline_facts, entity_at_line, + entity_briefing_block_reason, entity_by_id, entity_total, entity_visibility, + existing_entity_ids, find_entities, findings_for_emit, import_edges_for_entity, + module_dependency_edges, module_reference_rollup, normalize_source_path, + reference_edges_for_entity, resolve_file, resolve_file_catalog_entry, subsystem_for_member, + subsystem_members, subsystem_of_entity, subsystem_total, unresolved_call_sites_for_caller, + unresolved_callers_for_target, }; pub use reader::ReaderPool; pub use retry::{RetryPolicy, begin_immediate}; diff --git a/crates/loomweave-storage/src/query.rs b/crates/loomweave-storage/src/query.rs index f86cfc54..91a64606 100644 --- a/crates/loomweave-storage/src/query.rs +++ b/crates/loomweave-storage/src/query.rs @@ -296,6 +296,32 @@ pub fn entity_by_id(conn: &Connection, entity_id: &str) -> Result Result { + conn.query_row("SELECT COUNT(*) FROM entities", [], |row| row.get(0)) + .map_err(StorageError::from) +} + +/// Number of subsystem entities (`kind = 'subsystem'`) — the breakdown +/// `project_status` annotates alongside the entity total. +pub fn subsystem_total(conn: &Connection) -> Result { + conn.query_row( + "SELECT COUNT(*) FROM entities WHERE kind = 'subsystem'", + [], + |row| row.get(0), + ) + .map_err(StorageError::from) +} + +/// Total number of edge rows in the graph, matching `project_status`'s +/// `SELECT COUNT(*) FROM edges`. +pub fn edge_total(conn: &Connection) -> Result { + conn.query_row("SELECT COUNT(*) FROM edges", [], |row| row.get(0)) + .map_err(StorageError::from) +} + pub fn resolve_file( conn: &Connection, project_root: &Path, diff --git a/crates/loomweave-storage/tests/query_helpers.rs b/crates/loomweave-storage/tests/query_helpers.rs index aaa76e46..bdb20df8 100644 --- a/crates/loomweave-storage/tests/query_helpers.rs +++ b/crates/loomweave-storage/tests/query_helpers.rs @@ -5,12 +5,12 @@ use std::path::Path; use loomweave_core::EdgeConfidence; use loomweave_storage::{ ModuleDependencyEdge, ReferenceDirection, SubsystemMember, call_edges_from, - call_edges_targeting, child_entity_ids, contained_entity_ids, containing_module_id, - entity_at_line, entity_briefing_block_reason, entity_by_id, find_entities, findings_for_emit, - module_dependency_edges, module_reference_rollup, normalize_source_path, pragma, - reference_edges_for_entity, resolve_file, resolve_file_catalog_entry, schema, - subsystem_for_member, subsystem_members, subsystem_of_entity, unresolved_call_sites_for_caller, - unresolved_callers_for_target, + call_edges_targeting, child_entity_ids, contained_entity_ids, containing_module_id, edge_total, + entity_at_line, entity_briefing_block_reason, entity_by_id, entity_total, find_entities, + findings_for_emit, module_dependency_edges, module_reference_rollup, normalize_source_path, + pragma, reference_edges_for_entity, resolve_file, resolve_file_catalog_entry, schema, + subsystem_for_member, subsystem_members, subsystem_of_entity, subsystem_total, + unresolved_call_sites_for_caller, unresolved_callers_for_target, }; use rusqlite::{Connection, params}; @@ -1706,3 +1706,22 @@ fn containing_module_id_walks_up_to_the_nearest_module() { None, ); } + +#[test] +fn graph_totals_count_all_entities_subsystems_and_edges() { + let dir = tempfile::tempdir().expect("tempdir"); + let conn = open_fresh(&dir); + + // Three entities — one of which is a subsystem — and two edges. + insert_entity(&conn, "python:module:a", "module"); + insert_entity(&conn, "python:function:a.f", "function"); + insert_entity(&conn, "core:subsystem:abcd", "subsystem"); + insert_contains_edge(&conn, "python:module:a", "python:function:a.f"); + insert_contains_edge(&conn, "python:module:a", "core:subsystem:abcd"); + + // entity_total counts every kind, INCLUDING the subsystem (matching the + // `project_status` convention); subsystem_total is the subset. + assert_eq!(entity_total(&conn).expect("entity_total"), 3); + assert_eq!(subsystem_total(&conn).expect("subsystem_total"), 1); + assert_eq!(edge_total(&conn).expect("edge_total"), 2); +} From f4d1ce1d7c050540e000338570891a871577a305 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sun, 7 Jun 2026 18:30:00 +1000 Subject: [PATCH 38/60] fix(docs): update references from `.loomweave/` to `.weft/loomweave/` in SKILL.md --- .agents/skills/loomweave-workflow/SKILL.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.agents/skills/loomweave-workflow/SKILL.md b/.agents/skills/loomweave-workflow/SKILL.md index fd7ab55c..5b8e4d80 100644 --- a/.agents/skills/loomweave-workflow/SKILL.md +++ b/.agents/skills/loomweave-workflow/SKILL.md @@ -26,7 +26,7 @@ calls this?" without reading a single file. - You need a function's neighborhood, execution paths, or which subsystem it belongs to. **Not for:** editing code, reading exact implementation bodies (use `summary` or -read the file once you have its path), or codebases with no `.loomweave/` index. +read the file once you have its path), or codebases with no `.weft/loomweave/` index. ## Entity IDs — the model @@ -161,7 +161,7 @@ honest-empty unless a plugin emits those tags. Likewise `high_churn` and `search_semantic` is also in the catalogue. It is opt-in under `semantic_search:`; when enabled, `loomweave analyze` populates the git-ignored -`.loomweave/embeddings.db` sidecar and the query path filters stale vectors by +`.weft/loomweave/embeddings.db` sidecar and the query path filters stale vectors by content hash. > Not in this catalogue: `emit_observation` as a general-purpose write surface. @@ -202,7 +202,7 @@ and are composed into `summary` prompts with a real guidance fingerprint. ## Launch -`loomweave serve --path ` where `` contains `.loomweave/loomweave.db` +`loomweave serve --path ` where `` contains `.weft/loomweave/loomweave.db` (built by `loomweave analyze `). In an MCP client the tools appear as `mcp__loomweave__find_entity`, etc. From 00aaa857c21d7ba87276ced2c868a9fee62acb00 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sun, 7 Jun 2026 19:27:11 +1000 Subject: [PATCH 39/60] chore(git): untrack .filigree.conf and gitignore it Local per-project filigree config; should not be version-controlled. Co-Authored-By: Claude Opus 4.8 (1M context) --- .filigree.conf | 6 ------ .gitignore | 4 ++++ 2 files changed, 4 insertions(+), 6 deletions(-) delete mode 100644 .filigree.conf diff --git a/.filigree.conf b/.filigree.conf deleted file mode 100644 index f3d9ffca..00000000 --- a/.filigree.conf +++ /dev/null @@ -1,6 +0,0 @@ -{ - "version": 1, - "project_name": "clarion", - "prefix": "clarion", - "db": ".filigree/filigree.db" -} diff --git a/.gitignore b/.gitignore index 6093915b..fa90c217 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .filigree/ +.filigree.conf .weft/filigree/ /target **/*.rs.bk @@ -51,3 +52,6 @@ tests/e2e/external-operator-smoke-results-*.md AGENTS.md CLAUDE.md .agents/skills/filigree-workflow/SKILL.md + +# Filigree issue tracker +.weft/ From 75753449b47a83ed4564a919879af51a3a49b2f6 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sun, 7 Jun 2026 19:27:29 +1000 Subject: [PATCH 40/60] fix(mcp): correct args and type order for filigree server configuration --- .mcp.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.mcp.json b/.mcp.json index 6b3c178d..7511e6b6 100644 --- a/.mcp.json +++ b/.mcp.json @@ -1,9 +1,9 @@ { "mcpServers": { "filigree": { - "args": [], + "type": "stdio", "command": "/home/john/.local/bin/filigree-mcp", - "type": "stdio" + "args": [] }, "loomweave": { "args": [ From 208d09097de5d01fb8d27d13ca48a4ad1d50be4f Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sun, 7 Jun 2026 20:38:26 +1000 Subject: [PATCH 41/60] fix(config): tolerate sibling analysis: section in McpConfig (clarion-1d405be546) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A loomweave.yaml carrying a top-level `analysis:` (clustering) block silently disabled Filigree finding emission. The same file is parsed by two structs: AnalyzeConfig owns `analysis:`, McpConfig owns `integrations.filigree` and is consulted at emission time. Commit 9328e8f gave McpConfig `deny_unknown_fields`, so it began rejecting the sibling `analysis:` section; load_mcp_config caught the parse error and fell back to McpConfig::default() (filigree.enabled = false), skipping emission with only a tracing::warn!. Declare a tolerated, deserialize-and-ignore `analysis` field on McpConfig so the §2 typo-protection still fails loudly for the fields McpConfig actually owns, without rejecting AnalyzeConfig's documented sibling section. Add a regression test asserting a config with both `analysis:` and `integrations.filigree:` loads with emission enabled. Clears 8 previously-RED analyze emission tests in loomweave-cli (four of which hung ~30s on the mock server). Pre-existing on rc3, surfaced by the WEFT_FEDERATION_TOKEN rename work. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-federation/src/config.rs | 44 +++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/crates/loomweave-federation/src/config.rs b/crates/loomweave-federation/src/config.rs index 593dc089..0d096b9d 100644 --- a/crates/loomweave-federation/src/config.rs +++ b/crates/loomweave-federation/src/config.rs @@ -16,6 +16,17 @@ pub struct McpConfig { pub semantic_search: SemanticSearchConfig, pub integrations: IntegrationsConfig, pub serve: ServeConfig, + /// Tolerated-and-ignored sibling section. The same `loomweave.yaml` is + /// parsed by two structs: `AnalyzeConfig` (loomweave-cli) owns the top-level + /// `analysis:` clustering block, while `McpConfig` owns `integrations` and is + /// consulted at finding-emission time. Because `McpConfig` is + /// `deny_unknown_fields` (so typos in the fields it *does* own fail loudly — + /// agent-first-feedback §2), it must still declare `analysis` or it rejects + /// any config carrying that documented section, silently disabling Filigree + /// emission via `load_mcp_config`'s default-on-error fallback. Captured as an + /// opaque value and never read here; `AnalyzeConfig` is the typed owner. + #[serde(default)] + pub analysis: serde_norway::Value, } fn default_config_version() -> u32 { @@ -30,6 +41,7 @@ impl Default for McpConfig { semantic_search: SemanticSearchConfig::default(), integrations: IntegrationsConfig::default(), serve: ServeConfig::default(), + analysis: serde_norway::Value::Null, } } } @@ -1250,6 +1262,38 @@ integrations: assert!(msg.contains("not_a_real_section"), "got: {msg}"); } + #[test] + fn tolerates_analysis_section_without_disabling_filigree_emission() { + // clarion-1d405be546: the same loomweave.yaml is parsed by AnalyzeConfig + // (which owns the top-level `analysis:` clustering block) and by McpConfig + // (which owns `integrations.filigree`, consulted at emission time). Under + // deny_unknown_fields, McpConfig must still PARSE a config that carries a + // sibling `analysis:` section — otherwise load_mcp_config's + // default-on-error fallback silently sets filigree.enabled = false and + // emission is skipped with no surfaced error. + let cfg = McpConfig::from_yaml_str( + r" +analysis: + clustering: + min_cluster_size: 2 +integrations: + filigree: + enabled: true + emit_findings: true + actor: loomweave-test +", + ) + .expect("config carrying both analysis: and integrations.filigree: must load"); + assert!( + cfg.integrations.filigree.enabled, + "a sibling analysis: section must not disable Filigree" + ); + assert!( + cfg.integrations.filigree.emit_findings, + "a sibling analysis: section must not disable finding emission" + ); + } + #[test] fn unknown_nested_key_under_claude_cli_is_rejected() { // The exact agent-first-feedback §2.1 bug: `model_id` placed inside From b7a1b30ddee6ee951316a64274e8ad3b894ecb94 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 00:16:43 +1000 Subject: [PATCH 42/60] =?UTF-8?q?fix(install):=20gitignore=20loomweave.db?= =?UTF-8?q?=20by=20default=20=E2=80=94=20reverse=20ADR-005=20(weft-d822a7d?= =?UTF-8?q?e2d)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The index DB mutated on every analyze/scan while being git-tracked, leaving a permanently dirty working tree that blocked legis from signing the project (C1, dogfood-#2). loomweave.db is a regenerable orientation cache: `loomweave analyze` rebuilds the structural graph with no LLM calls, so only the lazy summary cache carries cost, and that is acceptably machine-local. - install.rs GITIGNORE_CONTENTS now excludes loomweave.db (template = source of truth) - ADR-005 reversed honestly: Status/Summary/Decision/Tracked/Excluded/Alternatives/ Consequences/ADR-014-ref + the commit_db knob inverts to an opt-IN - detailed-design §3 gets a C1 reversal note so it isn't read as current - top-level .gitignore comment de-staled (it already ignored the db) Lacuna's already-tracked db retrofit is the separate cross-repo weft-3c9bae6a40. Co-Authored-By: Claude Opus 4.8 (1M context) --- .gitignore | 4 +- crates/loomweave-cli/src/install.rs | 19 +++- docs/loomweave/1.0/detailed-design.md | 2 + .../adr/ADR-005-loomweave-dir-tracking.md | 97 +++++++++++++------ 4 files changed, 85 insertions(+), 37 deletions(-) diff --git a/.gitignore b/.gitignore index fa90c217..9e1edb13 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,9 @@ **/*.rs.bk Cargo.lock.bak -# SQLite working files (project-level .weft/loomweave/ is tracked per ADR-005/046) +# SQLite working files (project-level .weft/loomweave/ is a regenerable cache, +# ignored — ADR-005 as reversed by C1/weft-d822a7de2d; see the .weft/loomweave/ +# entries below and the blanket .weft/ rule) *.db-journal *.db-wal diff --git a/crates/loomweave-cli/src/install.rs b/crates/loomweave-cli/src/install.rs index d5339392..3f01fb03 100644 --- a/crates/loomweave-cli/src/install.rs +++ b/crates/loomweave-cli/src/install.rs @@ -33,11 +33,20 @@ const CONFIG_JSON_STUB: &str = r#"{ use crate::config::LOOMWEAVE_YAML_STUB; const GITIGNORE_CONTENTS: &str = "\ -# Loomweave .gitignore — ADR-005 tracked-vs-excluded list. -# Tracked (committed): loomweave.db, config.json, .gitignore itself. -# Excluded (ignored): WAL sidecars, shadow DB, per-run logs, tmp scratch, -# the read-API live port discovery file, the per-project instance id, and -# the analyze advisory lock. +# Loomweave .gitignore — ADR-005 tracked-vs-excluded list +# (the loomweave.db posture was reversed by C1 / weft-d822a7de2d). +# Tracked (committed): config.json, .gitignore itself. +# Excluded (ignored): the index DB itself, WAL sidecars, shadow DB, per-run +# logs, tmp scratch, the read-API live port discovery file, the per-project +# instance id, and the analyze advisory lock. + +# The index DB is a regenerable orientation CACHE, not committed analysis state +# (ADR-005, reversed by C1 / weft-d822a7de2d). `loomweave analyze` rebuilds the +# structural graph with no LLM calls, so it is cheap to regenerate; only the lazy +# summary cache costs anything, and that is machine-local. Tracking it dirtied the +# working tree on every analyze/scan and blocked legis signing of the project. +# (Sharing summaries across a team is a future opt-in, not the default.) +loomweave.db # Read-API live port discovery file (ADR-044): present only while serve runs, # rewritten per bind, loopback-only — a runtime artifact, never committed. diff --git a/docs/loomweave/1.0/detailed-design.md b/docs/loomweave/1.0/detailed-design.md index 133ce247..0e8596dc 100644 --- a/docs/loomweave/1.0/detailed-design.md +++ b/docs/loomweave/1.0/detailed-design.md @@ -915,6 +915,8 @@ CREATE INDEX ix_sei_lineage_sei ON sei_lineage(sei); defaults.yaml # default policy overrides ``` +> **Reversed by C1 (weft-d822a7de2d), 2026-06-08 — see [ADR-005](../adr/ADR-005-loomweave-dir-tracking.md).** `loomweave.db` is **no longer committed by default**; it is `.gitignore`d as a regenerable orientation cache (a committed, ever-mutating DB dirtied the tree and blocked legis signing). The committed-DB machinery described in the rest of this subsection (textual export, merge-helper, merge-driver registration, commit caveats) now applies **only** under the `storage.commit_db: true` opt-in, not the default. `config.json` and `runs/` provenance metadata remain tracked. + `.weft/loomweave/` is checked into git (consistent with Filigree's pattern and with the "shared analysis state" principle). SQLite files can diff poorly, so v0.1 ships **two features** for multi-developer teams to handle the committed DB: - `loomweave db export --textual ` — emits a deterministic JSON tree: `entities.jsonl` (one entity per line, sorted by id), `edges.jsonl` (sorted by `(kind, from_id, to_id)`), `guidance.jsonl` (sorted by id), `findings.jsonl` (sorted by id). Summary cache is **excluded** (re-derivable on next run, and JSON-diffing thousands of LLM-generated briefings is not useful). Output is git-friendly: a one-entity change produces a one-line diff. diff --git a/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md b/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md index 49201398..a93684a1 100644 --- a/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md +++ b/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md @@ -1,11 +1,28 @@ # ADR-005: `.loomweave/` Directory Git-Tracking Policy -**Status**: Accepted; amended by ADR-041, ADR-046 +**Status**: Accepted; amended by ADR-041, ADR-046; **`loomweave.db` tracking +reversed by C1 (weft-d822a7de2d), 2026-06-08** + +> **C1 reversal (weft-d822a7de2d), 2026-06-08:** `loomweave.db` is **no longer +> committed by default — it is `.gitignore`d.** The original decision (below) +> committed the DB so small teams could share briefings/guidance. Two facts that +> postdate it overturn that default: (1) tracking a file that mutates on every +> `analyze`/`scan` leaves a permanently dirty working tree, which **blocks legis +> from signing** the project (legis refuses to sign a dirty tree); and (2) the DB +> is a *regenerable orientation cache* — `loomweave analyze` rebuilds the +> structural graph with **no LLM calls**, so the expensive part is only the lazy +> summary cache, which is acceptably machine-local. Sharing summaries across a +> team becomes a future **opt-in** (`storage.commit_db: true`, the inverse of the +> old opt-out), not the default. The `GITIGNORE_CONTENTS` template in +> `crates/loomweave-cli/src/install.rs` remains the source of truth and now lists +> `loomweave.db`. The rest of the tracked/excluded split is unchanged. Sections +> below are kept for the historical decision and read with this reversal applied. > **ADR-046 amendment:** the directory tracked by this policy moved from > `.loomweave/` to `.weft/loomweave/` (Weft store consolidation, clean break). > The tracked-vs-ignored split below is unchanged — only the parent path. Read > every `.loomweave/` path below as `.weft/loomweave/`. + **Date**: 2026-04-18 **Deciders**: qacona@gmail.com **Context**: `loomweave install` must write a `.gitignore` inside `.loomweave/` that @@ -15,11 +32,13 @@ in `docs/implementation/sprint-1/wp1-scaffold.md §UQ-WP1-04`. ## Summary -`.loomweave/loomweave.db` and `.loomweave/config.json` are committed. WAL sidecars, -the shadow-DB intermediate, `tmp/`, `logs/`, and per-run raw LLM request/response -logs (`runs/*/log.jsonl`) are `.gitignore`d. `loomweave.yaml` lives at the project -root and is tracked under the user's existing repo-root `.gitignore`, not under -`.loomweave/.gitignore` (it's a user-edited config, not analysis state). +`.loomweave/config.json` is committed. `.loomweave/loomweave.db` is **`.gitignore`d** +(C1 reversal — a regenerable cache that would otherwise dirty the tree on every +run). WAL sidecars, the shadow-DB intermediate, `tmp/`, `logs/`, and per-run raw +LLM request/response logs (`runs/*/log.jsonl`) are `.gitignore`d. `loomweave.yaml` +lives at the project root and is tracked under the user's existing repo-root +`.gitignore`, not under `.loomweave/.gitignore` (it's a user-edited config, not +analysis state). ## Context @@ -51,6 +70,7 @@ grown the `ephemeral.port` (ADR-044), `embeddings.db` (ADR-040), `instance_id`, and `*.lock` entries): ``` +loomweave.db ephemeral.port *-wal *-shm @@ -68,15 +88,9 @@ runs/*/log.jsonl ### Tracked -- `.loomweave/loomweave.db` — the main analysis store. SQLite diffs poorly; the - `loomweave db export --textual` + `loomweave db merge-helper` pattern (detailed - design §3 File layout) handles the team case. **Committing a live index:** while - `loomweave serve` is running, the on-disk `loomweave.db` lags by its pending WAL - (the `-wal` sidecar is `.gitignore`d), so `git add loomweave.db` mid-serve can - stage an incomplete database. To commit a consistent point-in-time index, take - an online WAL-safe copy with `loomweave db backup` and commit that, or stop - `serve` first (SQLite checkpoints the WAL away on last-connection close) — - clarion-cdee445ed8. +- ~~`.loomweave/loomweave.db`~~ — **reversed by C1 (weft-d822a7de2d): now + Excluded** (see below). The DB is a regenerable orientation cache, and tracking + a file that mutates every run dirtied the tree and blocked legis signing. - `.loomweave/config.json` — small, human-readable internal state (schema version, last run IDs). - `.loomweave/.gitignore` itself — this file. @@ -88,6 +102,13 @@ runs/*/log.jsonl ### Excluded +- `loomweave.db` (C1 reversal, weft-d822a7de2d) — the index DB. A regenerable + orientation cache: `loomweave analyze` rebuilds the structural graph with no + LLM calls, and the only expensive content (the lazy summary cache) is + acceptably machine-local. Committing it left a permanently dirty tree (it + mutates on every `analyze`/`scan`), which blocked legis from signing the + project. Teams that want to share briefings opt **in** via + `storage.commit_db: true` (see the opt-in note below). - All SQLite WAL + SHM sidecars. - All shadow-DB intermediates. - `tmp/` and `logs/` (volatile scratch). @@ -105,13 +126,15 @@ runs/*/log.jsonl inside `.loomweave/`. Its tracking is governed by the project's own repo-root `.gitignore`, which is the user's concern. Default posture: tracked. -### Opt-out for users who don't want the DB committed +### Opt-in for teams who *do* want the DB committed (C1 reversal) -`loomweave.yaml:storage.commit_db: false` (post-Sprint-1 knob; WP6 authors the -full `loomweave.yaml` schema). When false, Loomweave writes an additional -`.loomweave/.gitignore` line excluding `loomweave.db`, and emits -`loomweave db sync push/pull` commands. Not implemented in Sprint 1; the knob -is documented here so the future change has a home. +Post-C1 the default is **ignored**, so the knob inverts: `loomweave.yaml: +storage.commit_db: true` is the opt-**in** for teams that want briefings/guidance +versioned alongside the code. When true, Loomweave omits the `loomweave.db` line +from the generated `.gitignore` (and the team accepts the dirty-tree / legis +consequence, or commits via a checkpointed snapshot). Still unimplemented — the +knob is documented here so the future change has a home. (Before C1 this was the +inverse `commit_db: false` opt-*out*; the commit-the-DB posture was the default.) ## Alternatives Considered @@ -133,9 +156,17 @@ committed is unbounded. are derived outputs that are expensive to rebuild. Small teams especially benefit from having them versioned alongside the code. -**Why rejected**: the "enterprise rigor at lack of scale" posture favours -committing analytic state for small-team workflows. Users who want machine-local -analysis only opt out via `storage.commit_db: false`. +**Why rejected** (originally): the "enterprise rigor at lack of scale" posture +favoured committing analytic state for small-team workflows. Users who wanted +machine-local analysis only opted out via `storage.commit_db: false`. + +> **Superseded for `loomweave.db` by C1 (weft-d822a7de2d):** this alternative is +> now the chosen posture *for the DB* — it is machine-local by default. The +> "expensive to rebuild" con is narrower than it read in 2026-04: the structural +> graph regenerates from `loomweave analyze` with no LLM calls, and only the lazy +> summary cache carries real cost. The decisive new factor (not in view at the +> original decision) is that a committed, ever-mutating DB blocks legis signing. +> `config.json` and the `runs/` provenance metadata remain tracked. ### Alternative 3: commit the DB but use git-lfs by default @@ -161,9 +192,10 @@ path works; LFS is a v0.2+ knob. ### Negative -- Committed SQLite DBs diff poorly by default. Mitigation: the - `loomweave db export --textual` / merge-helper path (detailed-design §3) is - the documented escape hatch. +- ~~Committed SQLite DBs diff poorly by default.~~ Moot post-C1: the DB is no + longer committed by default. A fresh checkout has no index until `loomweave + install`/`analyze` rebuilds it (cheap — no LLM calls); the lazy summary cache + is re-paid per machine unless a team opts into `commit_db: true`. - Adding a new excluded pattern requires either a Loomweave release or a user-side `.loomweave/.gitignore` edit. The post-v0.1 plan is to keep this file tool-owned; users adding their own ignores put them in the repo-root @@ -171,16 +203,19 @@ path works; LFS is a v0.2+ knob. ### Neutral -- `storage.commit_db: false` is a defined but unimplemented opt-out. Sprint 1 - ships with the commit-the-DB default only. +- `storage.commit_db` is a defined but unimplemented knob. Post-C1 its sense is + inverted: `true` is the opt-**in** to commit the DB; the default (DB ignored) + needs no knob. ## Related Decisions - [ADR-011](./ADR-011-writer-actor-concurrency.md) — names the shadow-DB intermediate; this ADR excludes it from git. - [ADR-014](./ADR-014-filigree-registry-backend.md) — cross-tool references - rely on `loomweave.db` being available to readers (Filigree, Wardline); the - commit-by-default posture keeps those references resolvable across machines. + rely on `loomweave.db` being available to readers (Filigree, Wardline). Post-C1 + the DB is no longer committed, so a reader on a fresh checkout resolves + references against a locally-rebuilt index (`loomweave analyze`) rather than a + pulled one; the structural graph it depends on regenerates with no LLM calls. ## References From 234fe7f28976137cf460cc2a8e6635e87821cf07 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 00:20:15 +1000 Subject: [PATCH 43/60] docs(proposal): draft shared weft.toml key layout for hub blessing (weft-a2f4cf95c7) C-9's last hub-level deliverable: the cross-member weft.toml schema. Pins the single well-known home for a shared fact ([] table + cross-read allowlist, no per-member duplication), the sibling-endpoint precedence ladder (flag > env > weft.toml [X].url > on-disk discovery > default), the malformed=absent / operator-sole-writer / no-duplication invariants for the shared layer, and the reader extension to loomweave-core::store. Designed around wardline as the multi-sibling consumer. Open questions deferred to the hub bless. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../proposals/C-9-shared-weft-toml-schema.md | 204 ++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 docs/loomweave/proposals/C-9-shared-weft-toml-schema.md diff --git a/docs/loomweave/proposals/C-9-shared-weft-toml-schema.md b/docs/loomweave/proposals/C-9-shared-weft-toml-schema.md new file mode 100644 index 00000000..30dbd533 --- /dev/null +++ b/docs/loomweave/proposals/C-9-shared-weft-toml-schema.md @@ -0,0 +1,204 @@ +# Proposal: Shared `weft.toml` key layout (C-9 cross-member schema) + +**Status**: DRAFT — loomweave authors, hub blesses (weft-a2f4cf95c7 / clarion-164f88c510) +**Author**: loomweave +**Date**: 2026-06-08 +**Tracks**: weft conventions.md C-9(d), conflict-register §A-14, glossary §8 (no-duplication) +**Reference reader**: `crates/loomweave-core/src/store.rs` (member-private form, shipped) + +--- + +## 1. Why this proposal exists + +C-9 / §A-14 are DECIDED and the **member-private** form is shipped: each member +owns `.weft//` (sole writer) and reads its own `[]` table in the +operator-authored, read-only `weft.toml` (canonical knob `store_dir`; legis is the +reference; loomweave landed it in `store.rs`). + +What is still **PENDING** — and what no member may bake until the hub pins it — is +the **shared / cross-member key layout**: where a fact that *one member writes-by- +operator and another member reads* lives. The concrete forcing case is the +**sibling federation endpoint** (a member's HTTP URL). loomweave surfaced the gap +(it refused to guess the schema under dispatch); wardline is the sharpest consumer +(it calls *both* filigree and loomweave and is retiring its `[wardline.filigree].url` +/ `[wardline.loomweave].url` keys per the 2026-06-07 ruling). This proposal pins: + +1. the **single well-known home** for a shared fact (no per-member duplication); +2. the **precedence** ladder for resolving a sibling endpoint; +3. the **invariants** restated for the shared layer; +4. the **reader semantics** a member implements. + +### Non-goals (already settled or explicitly excluded) + +- `[].store_dir` and the member-private form — **shipped**, not re-opened. +- Authority / signing keys (Legis) — **never** enter the shared `.weft/`/`weft.toml` + namespace; capability confinement (proposed C-8) governs them (C-9 carve-out). +- On-disk *location* discovery (`.weft//ephemeral.port`) — already C-9(e); + this proposal covers the operator-declared *endpoint URL*, a distinct rung. +- Writing `weft.toml` — **out of scope by construction**: members are read-only + (C-9b; the C-4 multi-writer truncation lesson, gate weft-eb3dee402f). + +--- + +## 2. Proposal + +### 2.1 The shared home: `[]` top-level table, allowlisted cross-read keys + +A shared fact about member **X** lives **once**, at the top-level `[X]` table, and +is read by any member. The endpoint of X is: + +```toml +# weft.toml — operator-authored, project root +[filigree] +url = "http://127.0.0.1:8749" # filigree's federation endpoint — read by wardline, loomweave, legis… + +[loomweave] +url = "http://127.0.0.1:9111" # loomweave's endpoint — read by wardline… +store_dir = "custom/store" # member-PRIVATE — read ONLY by loomweave + +[wardline] +url = "http://127.0.0.1:7000" +``` + +**Ownership rule (the crux).** The `[X]` table is X's home for *both* its +member-private keys *and* its cross-readable keys. To keep that unambiguous: + +- A member reads its **own** full `[]` table (private + shared keys). +- A member reads **only the allowlisted cross-read keys** from a **sibling's** + `[X]` table. The v1 cross-read allowlist is **`url`** (and, reserved for the + next fact, **`enabled`**). Everything else under `[X]` is private to X. +- A shared fact is **never** duplicated into a second section. There is exactly + one `url` for X, at `[X].url`. `[wardline.filigree].url` and any + `[.]` form are **retired / forbidden** (glossary §8 clash rule). + +This satisfies "live once at a well-known top-level path any member may read" with +**no new table**: the home of X's endpoint is X's own section. (Alternative B in §4 +keeps a dedicated `[federation]` table instead; recommended against for v1.) + +### 2.2 Precedence ladder (resolving a sibling endpoint) + +A member resolving sibling **X**'s endpoint walks, highest wins: + +| Rung | Source | Who sets it | Lifetime | +|---|---|---|---| +| 1 | CLI flag (`--filigree-url …`) | invoking agent/operator | this invocation | +| 2 | env var (`WEFT__URL`) | shell / `.mcp.json` env | this process | +| 3 | **`weft.toml` `[X].url`** ← *this proposal* | operator (durable) | project | +| 4 | on-disk discovery `.weft//ephemeral.port` (C-9e) | X's live process | while X runs | +| 5 | built-in default | the member | always | + +Rationale for **3 above 4**: `weft.toml [X].url` is the operator's *durable, +explicit* declaration — it is exactly the "persisted operator-declared remote-URL" +case C-9(d) names (e.g. X runs on another host, so there is no local +`ephemeral.port` to find). A live flag/env (1–2) still overrides it for a one-off. +For a purely-local federation the operator declares no `url`, so resolution falls +straight through to on-disk discovery (4) — the common case is unchanged. + +**Operator-overlay vs member-authoritative precedence:** `weft.toml` is the +operator overlay and outranks a member's *built-in default* (rung 5) for shared +facts, but never a runtime flag/env (rungs 1–2). A member's own *authoritative +config* (e.g. `loomweave.yaml`) governs member-private behavior only; it does not +declare another member's endpoint (that would re-introduce the duplicate). + +### 2.3 Invariants restated for the shared layer + +All of these are C-9 invariants; this proposal confirms they hold for cross-read +keys, not just private ones: + +- **Malformed = absent (NORMATIVE).** A missing / unparseable `weft.toml`, an + absent `[X]` table, an absent or wrong-typed `url`, or a blank value → the rung + is skipped and resolution falls through. A member MUST NOT hard-fail. (Same + fail-soft path `store.rs` already implements for `store_dir`.) +- **Operator is sole writer.** No member's `install` / CLI / `doctor` writes + `weft.toml` — including its own `[].url`. The operator (or `weft init`) + authors it. +- **No duplication.** One fact, one home (§2.1). A reader never has to reconcile + two declarations of X's endpoint. +- **Forward-compatible parse.** Unknown top-level tables and unknown keys within + any table are ignored, never a parse rejection (so a member built before a new + shared key exists still loads the file). `store.rs` already does this for + sibling tables; the shared reader extends the same posture. + +### 2.4 Reader semantics (what a member implements, post-bless) + +Extends the existing `WeftToml` deserialization in `loomweave-core::store`: + +```rust +// Today (shipped): reads only its own private table. +#[derive(Deserialize)] struct WeftToml { loomweave: Option } + +// Post-bless: also reads allowlisted cross-read keys from sibling tables. +#[derive(Deserialize)] struct SiblingTable { url: Option /* , enabled: Option */ } +#[derive(Deserialize)] struct WeftToml { + loomweave: Option, // own: private + shared + filigree: Option, // sibling: url only + wardline: Option, + legis: Option, +} +``` + +A sibling-endpoint resolver returns the first rung that yields a non-blank value +and reports its `source` (`flag` / `env` / `weft.toml` / `discovery` / `default`) +on the wire — so the resolved-vs-configured gap is **loud, not silent** (the +lacuna-401 lesson; loomweave's `project_status_get` already reports resolved vs +configured endpoints in one call and is the model). + +--- + +## 3. Worked example: wardline (the multi-sibling consumer) + +wardline calls both filigree and loomweave. Per the 2026-06-07 ruling it **retires** +`[wardline.filigree].url` / `[wardline.loomweave].url`. After this schema lands: + +- filigree endpoint ← `--filigree-url` › `WEFT_FILIGREE_URL` › `weft.toml [filigree].url` › `.weft/filigree/ephemeral.port` › default. +- loomweave endpoint ← `--loomweave-url` › `WEFT_LOOMWEAVE_URL` › `weft.toml [loomweave].url` › `.weft/loomweave/ephemeral.port` › default. + +No `[wardline.*]` sibling keys. The operator declares a remote filigree once at +`[filigree].url`; wardline, loomweave, and legis all read that one line. + +> Note: this resolves the *route* (which endpoint). The *token* (F1/weft-23574069a1) +> is a sibling concern — auth still flows via the daemon/tier-1 +> `WEFT_FEDERATION_TOKEN`, not a per-project mint. Endpoint-resolution and +> token-resolution are independent ladders; this proposal pins only the endpoint. + +--- + +## 4. Alternatives considered + +**B. Dedicated `[federation]` (or `[endpoints]`) table.** +```toml +[federation] +filigree = "http://127.0.0.1:8749" +loomweave = "http://127.0.0.1:9111" +``` +*Pro:* clean separation of shared facts from member-private tables; no per-table +allowlist needed. *Con:* a member's endpoint now lives apart from its `[]` +table (two places to look for "everything about X"); and it does not generalize as +cleanly to a second shared key (`enabled`) without either nesting +(`[federation.filigree] url=…, enabled=…`, which is just §2.1 with a prefix) or a +second parallel table. **Recommend §2.1** (member-table home) for v1; B remains +available if the hub prefers strict shared/private separation. + +**C. Keep per-member `[.].url`.** Rejected: it is the duplication +glossary §8 forbids and the exact pattern wardline is retiring. + +--- + +## 5. Questions for the hub (to resolve at bless) + +1. **Home:** §2.1 member-table + allowlist (recommended), or §4-B `[federation]` table? +2. **Precedence:** confirm rung 3 (`weft.toml url`) sits **above** rung 4 (on-disk + discovery). Local-only federations are unaffected either way; the question is + purely the remote-declared-sibling case. +3. **Allowlist v1:** is `url` the only cross-read key, or do we pin `enabled` now + (a member reading whether a sibling is operator-disabled)? +4. **Env var spelling:** standardize `WEFT__URL` (e.g. `WEFT_FILIGREE_URL`) + as the rung-2 name across members? + +## 6. Sequencing + +Fast-follow, **not** a dogfood-#2 gate blocker (C-9 sequencing). Order: **hub +blesses this schema → members implement the cross-read reader** (loomweave extends +`store.rs` per §2.4; clarion-164f88c510 covers loomweave's reader). Until blessed, +members resolve sibling endpoints by flag / env / on-disk discovery only and bake +**no** `weft.toml` endpoint keys (C-9d). From 1b1c258939a82d87e9b70849bbd2c1d674dde088 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 00:29:49 +1000 Subject: [PATCH 44/60] feat(db): add `loomweave db checkpoint` WAL-truncate verb (C-2, weft-8e3d02f409) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Weft C-2 WAL hygiene: expose an on-demand verb that issues `PRAGMA wal_checkpoint(TRUNCATE)` on the working store, flushing outstanding WAL frames into loomweave.db and resetting the -wal sidecar to zero so the on-disk file is a clean point-in-time artifact for backup/demo/snapshot. The analyze path already TRUNCATE-checkpoints at each committed run boundary (loomweave-storage writer.rs); this is the companion for the serve summary-write path, where the WAL grows between PASSIVE wal_autocheckpoint hits. Best-effort: busy (a live serve reader) reports a partial outcome rather than failing — committed frames are already durable. Two integration tests (truncates-to-0 + data-survives; missing-db rejected). Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/cli.rs | 13 +++++++ crates/loomweave-cli/src/db.rs | 45 +++++++++++++++++++++++ crates/loomweave-cli/src/main.rs | 1 + crates/loomweave-cli/tests/db.rs | 61 ++++++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+) diff --git a/crates/loomweave-cli/src/cli.rs b/crates/loomweave-cli/src/cli.rs index 823a1785..833c0509 100644 --- a/crates/loomweave-cli/src/cli.rs +++ b/crates/loomweave-cli/src/cli.rs @@ -265,6 +265,19 @@ pub enum DbCommand { #[arg(long)] force: bool, }, + + /// Checkpoint the WAL into `.weft/loomweave/loomweave.db` and truncate it, + /// so the on-disk file is a clean point-in-time artifact (Weft C-2). + /// + /// `analyze` already TRUNCATE-checkpoints at each committed run boundary; + /// this is the on-demand companion for after a `serve` session, whose + /// summary writes can grow the WAL between the PASSIVE `wal_autocheckpoint` + /// cadence and a backup / demo / snapshot. + Checkpoint { + /// Project directory containing .weft/loomweave/loomweave.db (default: current). + #[arg(long, default_value = ".")] + path: PathBuf, + }, } #[derive(Subcommand)] diff --git a/crates/loomweave-cli/src/db.rs b/crates/loomweave-cli/src/db.rs index 814cf0f4..4abf618d 100644 --- a/crates/loomweave-cli/src/db.rs +++ b/crates/loomweave-cli/src/db.rs @@ -87,6 +87,51 @@ pub fn backup(project_root: &Path, output: &Path, force: bool) -> Result<()> { } } +/// Force a `PRAGMA wal_checkpoint(TRUNCATE)` on the working store so the on-disk +/// `loomweave.db` becomes a clean point-in-time artifact: outstanding WAL frames +/// are flushed into the main file and the `-wal` sidecar is reset to zero length. +/// +/// `analyze` already TRUNCATE-checkpoints at each committed run boundary (the +/// `loomweave-storage` writer), so the analyze path needs no manual checkpoint. +/// This verb is the on-demand companion for the `serve` summary-write path, where +/// the WAL can grow between the PASSIVE `wal_autocheckpoint` cadence and a +/// snapshot / backup / demo (Weft C-2 WAL-hygiene). Best-effort on contention: a +/// live reader (a `serve` reader-pool connection) can hold TRUNCATE back to a +/// `busy` result — the committed frames are already durable, so we report the +/// partial outcome rather than fail. +pub fn checkpoint(project_root: &Path) -> Result<()> { + let db_path = loomweave_core::store::db_path(project_root); + ensure!( + db_path.exists(), + "Loomweave database not found at {}; run `loomweave analyze` first", + db_path.display() + ); + + let conn = Connection::open(&db_path) + .with_context(|| format!("open database {}", db_path.display()))?; + // `PRAGMA wal_checkpoint(TRUNCATE)` returns one row: + // (busy, log_frames, checkpointed_frames). + // busy = 1 means a concurrent connection blocked the WAL reset. + let (busy, log_frames, checkpointed): (i64, i64, i64) = conn + .query_row("PRAGMA wal_checkpoint(TRUNCATE)", [], |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?)) + }) + .with_context(|| format!("checkpoint {}", db_path.display()))?; + + if busy != 0 { + println!( + "Checkpoint incomplete: a concurrent reader held the WAL back (busy=1). \ + Committed data is durable; re-run when `serve` is idle to fully reset the WAL." + ); + } else { + println!( + "Checkpointed {checkpointed}/{log_frames} WAL frame(s) into {} and truncated the WAL.", + db_path.display() + ); + } + Ok(()) +} + /// Run the online backup into `staging`, then verify the copy is intact. fn run_backup(db_path: &Path, staging: &Path) -> Result<()> { let src = Connection::open_with_flags( diff --git a/crates/loomweave-cli/src/main.rs b/crates/loomweave-cli/src/main.rs index 0e8dce43..24be0b84 100644 --- a/crates/loomweave-cli/src/main.rs +++ b/crates/loomweave-cli/src/main.rs @@ -123,6 +123,7 @@ fn main() -> Result<()> { path, force, } => db::backup(&path, &output, force), + cli::DbCommand::Checkpoint { path } => db::checkpoint(&path), }, cli::Command::Guidance { command } => guidance::run(command), cli::Command::Config { command } => config::run(command), diff --git a/crates/loomweave-cli/tests/db.rs b/crates/loomweave-cli/tests/db.rs index 40aff813..35b1f7d6 100644 --- a/crates/loomweave-cli/tests/db.rs +++ b/crates/loomweave-cli/tests/db.rs @@ -117,6 +117,67 @@ fn backup_refuses_to_clobber_without_force() { assert_eq!(n, 1); } +/// `db checkpoint` succeeds on a seeded DB and leaves the `-wal` sidecar reset to +/// zero length, so the on-disk `loomweave.db` is a clean point-in-time artifact +/// (Weft C-2 WAL hygiene). +#[test] +fn checkpoint_truncates_the_wal() { + let dir = tempfile::tempdir().unwrap(); + seed_db(dir.path()); + + // Grow the WAL: write more rows through a fresh connection. SQLite's PASSIVE + // autocheckpoint may flush frames but does not shrink the -wal file, so an + // explicit TRUNCATE is what guarantees a zero-length sidecar. + { + let db_path = dir.path().join(".weft/loomweave/loomweave.db"); + let conn = Connection::open(&db_path).expect("open db to grow wal"); + for i in 0..50 { + conn.execute( + "INSERT INTO runs (id, started_at, completed_at, config, stats, status) \ + VALUES (?1, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'), NULL, '{}', '{}', 'running')", + rusqlite::params![format!("wal-grow-{i}")], + ) + .expect("insert wal-grow row"); + } + } + + loomweave_bin() + .args(["db", "checkpoint"]) + .arg("--path") + .arg(dir.path()) + .assert() + .success(); + + // After TRUNCATE the -wal sidecar is zero-length (or absent). + let wal = dir.path().join(".weft/loomweave/loomweave.db-wal"); + let wal_len = std::fs::metadata(&wal).map_or(0, |m| m.len()); + assert_eq!( + wal_len, 0, + "WAL sidecar should be truncated to 0 bytes after `db checkpoint`" + ); + + // The data survived the checkpoint (frames flushed into the main file). + let db_path = dir.path().join(".weft/loomweave/loomweave.db"); + let conn = Connection::open_with_flags(&db_path, OpenFlags::SQLITE_OPEN_READ_ONLY) + .expect("reopen checkpointed db"); + let n: i64 = conn + .query_row("SELECT count(*) FROM runs", [], |row| row.get(0)) + .expect("count rows"); + assert_eq!(n, 51, "1 seeded + 50 grown rows survive the checkpoint"); +} + +/// `db checkpoint` rejects a missing source database with a clear failure. +#[test] +fn checkpoint_rejects_missing_source_db() { + let dir = tempfile::tempdir().unwrap(); + loomweave_bin() + .args(["db", "checkpoint"]) + .arg("--path") + .arg(dir.path()) + .assert() + .failure(); +} + /// A missing source database is rejected with a clear error and leaves no /// debris (no output file, no staging temp). #[test] From b66c58d4f4da17cfe7035653658e7f7efbe333c0 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 00:51:58 +1000 Subject: [PATCH 45/60] =?UTF-8?q?fix(findings):=20content-keyed=20finding?= =?UTF-8?q?=20ids=20=E2=80=94=20dedupe=20across=20re-analyses=20(L1,=20ADR?= =?UTF-8?q?-047)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Findings accumulated on every re-analyze of an unchanged tree (dogfood-2 255→259→263) because the finding id embedded run_id (core:finding:{run_id}:…), so ON CONFLICT(id) only de-duped a --resume re-walk; a fresh run got a new run_id and minted a duplicate row — also orphaning the prior row's filigree_issue_id / suppression status every run. Drop run_id from the id (core:finding:); every discriminator was already content-derived (entity/guidance/subsystem id, or a blake3 of entity+rule+evidence). The upsert now de-dupes across fresh runs AND preserves lifecycle; the run_id COLUMN still tracks last-seen, so findings_for_emit (WHERE run_id = current) is unchanged. Loomweave's finding id is never on the wire, so filigree dedup is untouched. - all 12 analyze.rs minting sites + secret_scan/findings.rs → content-keyed - migration 0010 clears legacy run-scoped rows (regenerable; store is a cache per ADR-005/C1) - ADR-047 documents the decision + accepted trade-off (findings are current-state, not a per-run append-log) - regression test: analyze an unchanged tree 3× → finding count stable - write_finding_row + minting-site comments de-staled; test fixtures updated Filed as clarion-772ff358da (Part A); Part B (project-wide finding browser / has_findings filter) remains. Gates: fmt + clippy + 1263 workspace tests + rustdoc + migration-retirement all green. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/analyze.rs | 45 ++++--- .../loomweave-cli/src/secret_scan/findings.rs | 15 +-- crates/loomweave-cli/tests/analyze.rs | 40 +++++++ crates/loomweave-federation/src/filigree.rs | 2 +- .../loomweave-federation/src/scan_results.rs | 4 +- ...10_dedupe_findings_drop_run_scoped_ids.sql | 16 +++ crates/loomweave-storage/src/schema.rs | 7 +- crates/loomweave-storage/src/writer.rs | 24 ++-- .../loomweave-storage/tests/schema_apply.rs | 5 +- .../adr/ADR-047-content-keyed-finding-ids.md | 112 ++++++++++++++++++ docs/loomweave/adr/README.md | 1 + 11 files changed, 224 insertions(+), 47 deletions(-) create mode 100644 crates/loomweave-storage/migrations/0010_dedupe_findings_drop_run_scoped_ids.sql create mode 100644 docs/loomweave/adr/ADR-047-content-keyed-finding-ids.md diff --git a/crates/loomweave-cli/src/analyze.rs b/crates/loomweave-cli/src/analyze.rs index 2d264ae5..04011497 100644 --- a/crates/loomweave-cli/src/analyze.rs +++ b/crates/loomweave-cli/src/analyze.rs @@ -1956,11 +1956,11 @@ async fn emit_deletion_findings( } /// Build a `LMWV-FACT-ENTITY-DELETED` finding anchored to the deleted entity's own -/// (never-pruned) row. The id is deterministic and run-scoped so a `--resume` +/// (never-pruned) row. The id is deterministic and content-keyed so re-analysis (and `--resume`) /// re-walk regenerates the same id and `InsertFinding`'s upsert is idempotent. fn entity_deleted_finding(entity_id: &str, run_id: &str, now: &str) -> FindingRecord { FindingRecord { - id: format!("core:finding:{run_id}:entity-deleted:{entity_id}"), + id: format!("core:finding:entity-deleted:{entity_id}"), tool: "loomweave".to_owned(), tool_version: env!("CARGO_PKG_VERSION").to_owned(), run_id: run_id.to_owned(), @@ -1990,7 +1990,7 @@ fn guidance_orphan_finding( now: &str, ) -> FindingRecord { FindingRecord { - id: format!("core:finding:{run_id}:guidance-orphan:{guidance_id}:{deleted_entity_id}"), + id: format!("core:finding:guidance-orphan:{guidance_id}:{deleted_entity_id}"), tool: "loomweave".to_owned(), tool_version: env!("CARGO_PKG_VERSION").to_owned(), run_id: run_id.to_owned(), @@ -2208,7 +2208,7 @@ async fn emit_guidance_staleness_findings( /// Run-scoped, deterministic id; INFO, confidence 1.0. fn guidance_expired_finding(guidance_id: &str, run_id: &str, now: &str) -> FindingRecord { FindingRecord { - id: format!("core:finding:{run_id}:guidance-expired:{guidance_id}"), + id: format!("core:finding:guidance-expired:{guidance_id}"), tool: "loomweave".to_owned(), tool_version: env!("CARGO_PKG_VERSION").to_owned(), run_id: run_id.to_owned(), @@ -2237,7 +2237,7 @@ fn guidance_stale_finding( now: &str, ) -> FindingRecord { FindingRecord { - id: format!("core:finding:{run_id}:guidance-stale:{guidance_id}"), + id: format!("core:finding:guidance-stale:{guidance_id}"), tool: "loomweave".to_owned(), tool_version: env!("CARGO_PKG_VERSION").to_owned(), run_id: run_id.to_owned(), @@ -2277,7 +2277,7 @@ fn guidance_churn_stale_finding( now: &str, ) -> FindingRecord { FindingRecord { - id: format!("core:finding:{run_id}:guidance-churn-stale:{guidance_id}"), + id: format!("core:finding:guidance-churn-stale:{guidance_id}"), tool: "loomweave".to_owned(), tool_version: env!("CARGO_PKG_VERSION").to_owned(), run_id: run_id.to_owned(), @@ -2409,7 +2409,7 @@ async fn emit_tier_subsystem_findings( /// Build a `LMWV-FACT-TIER-SUBSYSTEM-MIXING` finding anchored to the subsystem, /// carrying its tier-bearing members as related ids and the tier distribution as -/// evidence. Members are pre-sorted by the caller; the id is run-scoped. +/// evidence. Members are pre-sorted by the caller; the id is content-keyed. fn tier_mixing_finding( subsystem_id: &str, members: &[(String, String)], @@ -2423,7 +2423,7 @@ fn tier_mixing_finding( *tier_counts.entry(tier.as_str()).or_default() += 1; } FindingRecord { - id: format!("core:finding:{run_id}:tier-mixing:{subsystem_id}"), + id: format!("core:finding:tier-mixing:{subsystem_id}"), tool: "loomweave".to_owned(), tool_version: env!("CARGO_PKG_VERSION").to_owned(), run_id: run_id.to_owned(), @@ -2459,7 +2459,7 @@ fn tier_unanimous_finding( ) -> FindingRecord { let member_ids: Vec<&str> = members.iter().map(|(id, _)| id.as_str()).collect(); FindingRecord { - id: format!("core:finding:{run_id}:tier-unanimous:{subsystem_id}"), + id: format!("core:finding:tier-unanimous:{subsystem_id}"), tool: "loomweave".to_owned(), tool_version: env!("CARGO_PKG_VERSION").to_owned(), run_id: run_id.to_owned(), @@ -2816,7 +2816,7 @@ async fn insert_weak_modularity_finding( .map(|subsystem| subsystem.id.clone()) .collect::>(); let now = iso8601_now(); - let finding_id = format!("core:finding:{run_id}:weak-modularity"); + let finding_id = "core:finding:weak-modularity".to_owned(); let related_entities_json = serde_json::to_string(&subsystem_ids) .context("serialize weak modularity related_entities")?; writer @@ -2866,7 +2866,7 @@ async fn insert_weak_modularity_finding( /// The finding anchors to the degraded entity itself (the plugin still emits one /// manifest-declared degraded-syntax entity for a syntax-failed file), so no /// synthetic anchor is needed. -/// The id is deterministic and run-scoped so a `--resume` re-walk regenerates the +/// The id is deterministic and content-keyed so re-analysis (and `--resume`) re-walk regenerates the /// same id and `InsertFinding`'s upsert is idempotent (REQ-FINDING-05). fn syntax_error_finding( record: &EntityRecord, @@ -2886,7 +2886,7 @@ fn syntax_error_finding( return None; } Some(FindingRecord { - id: format!("core:finding:{run_id}:syntax-error:{}", record.id), + id: format!("core:finding:syntax-error:{}", record.id), tool: "loomweave".to_owned(), tool_version: env!("CARGO_PKG_VERSION").to_owned(), run_id: run_id.to_owned(), @@ -3041,7 +3041,7 @@ fn host_finding_to_record( }) .to_string(); FindingRecord { - id: format!("core:finding:{run_id}:infra:{discriminator}"), + id: format!("core:finding:infra:{discriminator}"), tool: "loomweave".to_owned(), tool_version: env!("CARGO_PKG_VERSION").to_owned(), run_id: run_id.to_owned(), @@ -3108,7 +3108,7 @@ fn crash_finding_record( ) -> FindingRecord { let discriminator = blake3::hash(format!("{plugin_id}\u{0}{reason}").as_bytes()).to_hex(); FindingRecord { - id: format!("core:finding:{run_id}:crash:{discriminator}"), + id: format!("core:finding:crash:{discriminator}"), tool: "loomweave".to_owned(), tool_version: env!("CARGO_PKG_VERSION").to_owned(), run_id: run_id.to_owned(), @@ -3142,7 +3142,7 @@ fn source_walk_finding_record( blake3::hash(format!("{}\u{0}{skipped_entries}", project_root.display()).as_bytes()) .to_hex(); FindingRecord { - id: format!("core:finding:{run_id}:source-walk:{discriminator}"), + id: format!("core:finding:source-walk:{discriminator}"), tool: "loomweave".to_owned(), tool_version: env!("CARGO_PKG_VERSION").to_owned(), run_id: run_id.to_owned(), @@ -5707,10 +5707,10 @@ mod tests { assert_eq!(finding.kind, "defect"); assert_eq!(finding.severity, "WARN"); assert_eq!(finding.tool, "loomweave"); - // Deterministic, run-scoped id keeps InsertFinding idempotent on resume. + // Deterministic, content-keyed id keeps InsertFinding idempotent across runs. assert_eq!( finding.id, - "core:finding:run-1:syntax-error:python:module:pkg.broken" + "core:finding:syntax-error:python:module:pkg.broken" ); } @@ -5746,10 +5746,10 @@ mod tests { assert_eq!(finding.severity, "INFO"); // Anchors to the deleted entity's own (never-pruned) row. assert_eq!(finding.entity_id, "python:function:pkg.gone"); - // Deterministic, run-scoped id keeps InsertFinding idempotent on resume. + // Deterministic, content-keyed id keeps InsertFinding idempotent across runs. assert_eq!( finding.id, - "core:finding:run-1:entity-deleted:python:function:pkg.gone" + "core:finding:entity-deleted:python:function:pkg.gone" ); } @@ -5782,10 +5782,7 @@ mod tests { assert_eq!(finding.kind, "fact"); assert_eq!(finding.severity, "WARN"); assert_eq!(finding.entity_id, "core:subsystem:abc"); - assert_eq!( - finding.id, - "core:finding:run-1:tier-mixing:core:subsystem:abc" - ); + assert_eq!(finding.id, "core:finding:tier-mixing:core:subsystem:abc"); let evidence: serde_json::Value = serde_json::from_str(&finding.evidence_json).unwrap(); assert_eq!(evidence["tier_distribution"]["public"], 1); assert_eq!(evidence["tier_distribution"]["internal"], 1); @@ -5825,7 +5822,7 @@ mod tests { assert_eq!(related, serde_json::json!(["python:function:pkg.gone"])); assert_eq!( finding.id, - "core:finding:run-1:guidance-orphan:core:guidance:g1:python:function:pkg.gone" + "core:finding:guidance-orphan:core:guidance:g1:python:function:pkg.gone" ); } diff --git a/crates/loomweave-cli/src/secret_scan/findings.rs b/crates/loomweave-cli/src/secret_scan/findings.rs index 532feeab..a500dced 100644 --- a/crates/loomweave-cli/src/secret_scan/findings.rs +++ b/crates/loomweave-cli/src/secret_scan/findings.rs @@ -114,12 +114,13 @@ pub(crate) async fn emit_findings( finding_entity_id(&pending.file_path, entity_anchors).with_context(|| { format!("anchor secret finding for {}", pending.file_path.display()) })?; - // Deterministic, run-scoped id so a `--resume` re-walk regenerates the - // SAME id and `InsertFinding`'s upsert is idempotent (REQ-FINDING-05). - // A random UUID would instead create a duplicate finding row on every - // resume (the id never collides, so the upsert never fires). The digest - // covers the anchor entity, rule, and evidence (file + line + hashed - // secret), which uniquely identify a detection within a run. + // Deterministic, content-keyed id so re-analysis (and a `--resume` + // re-walk) regenerates the SAME id and `InsertFinding`'s upsert is + // idempotent across runs (REQ-FINDING-05; L1 / ADR-047). A random UUID + // would instead create a duplicate finding row on every run (the id never + // collides, so the upsert never fires). The digest covers the anchor + // entity, rule, and evidence (file + line + hashed secret), which + // uniquely identify a detection independent of which run observed it. let discriminator = blake3::hash( format!( "{entity_id}\u{0}{}\u{0}{}", @@ -128,7 +129,7 @@ pub(crate) async fn emit_findings( .as_bytes(), ) .to_hex(); - let finding_id = format!("core:finding:{run_id}:secret:{discriminator}"); + let finding_id = format!("core:finding:secret:{discriminator}"); writer .send_wait(|ack| WriterCmd::InsertFinding { finding: Box::new(FindingRecord { diff --git a/crates/loomweave-cli/tests/analyze.rs b/crates/loomweave-cli/tests/analyze.rs index 3f9465f6..4848c44d 100644 --- a/crates/loomweave-cli/tests/analyze.rs +++ b/crates/loomweave-cli/tests/analyze.rs @@ -1219,6 +1219,46 @@ fn analyze_phase3_emits_weak_modularity_fact_when_below_threshold() { ); } +/// L1 / ADR-047 regression: re-analyzing an UNCHANGED tree must not accumulate +/// duplicate finding rows. Finding ids are content-keyed (`core:finding:`), +/// not run-scoped, so the `ON CONFLICT(id)` upsert de-dupes across fresh runs; +/// before the fix every re-analyze minted new run-scoped rows and the count grew +/// (the dogfood `255 -> 259 -> 263` symptom). +#[cfg(unix)] +#[test] +fn analyze_findings_do_not_accumulate_across_reruns() { + let (project_dir, plugin_dir, config_path) = phase3_project_for_rerun(&["weak_a", "weak_b"]); + let db = project_dir.path().join(".weft/loomweave/loomweave.db"); + let count = || -> i64 { + Connection::open(&db) + .unwrap() + .query_row("SELECT COUNT(*) FROM findings", [], |row| row.get(0)) + .unwrap() + }; + + let after_first = count(); + assert!( + after_first > 0, + "fixture should produce at least one finding to make the test meaningful" + ); + + // Re-analyze the UNCHANGED tree twice more (same source, new run_id each time). + let plugin_path = + std::env::join_paths(std::iter::once(plugin_dir.path().to_path_buf())).unwrap(); + let config = std::path::PathBuf::from(&config_path); + run_phase3_analyze(project_dir.path(), &config, &plugin_path); + let after_second = count(); + run_phase3_analyze(project_dir.path(), &config, &plugin_path); + let after_third = count(); + + assert_eq!( + (after_second, after_third), + (after_first, after_first), + "finding count must be stable across re-analyses of an unchanged tree \ + (got {after_first} -> {after_second} -> {after_third})" + ); +} + /// Set up a phase3 project + plugin and run analyze once. Returns BOTH tempdirs /// (project, plugin) so the caller can keep the plugin on `PATH` and re-run /// `analyze` after mutating the source tree — `run_phase3_fixture` drops the diff --git a/crates/loomweave-federation/src/filigree.rs b/crates/loomweave-federation/src/filigree.rs index 8a4268b9..c8a91f9d 100644 --- a/crates/loomweave-federation/src/filigree.rs +++ b/crates/loomweave-federation/src/filigree.rs @@ -1185,7 +1185,7 @@ mod tests { .expect("enabled client"); let row = crate::scan_results::FindingForEmit { - id: "core:finding:run-1:circular".to_owned(), + id: "core:finding:circular".to_owned(), rule_id: "LMWV-PY-STRUCTURE-001".to_owned(), kind: "defect".to_owned(), severity: "WARN".to_owned(), diff --git a/crates/loomweave-federation/src/scan_results.rs b/crates/loomweave-federation/src/scan_results.rs index 6910d684..c7e5de8f 100644 --- a/crates/loomweave-federation/src/scan_results.rs +++ b/crates/loomweave-federation/src/scan_results.rs @@ -315,7 +315,7 @@ mod tests { fn defect_row() -> FindingForEmit { FindingForEmit { - id: "core:finding:run-1:circular".to_owned(), + id: "core:finding:circular".to_owned(), rule_id: "LMWV-PY-STRUCTURE-001".to_owned(), kind: "defect".to_owned(), severity: "WARN".to_owned(), @@ -463,7 +463,7 @@ mod tests { fn prepare_batch_counts_emitted_and_skipped() { let emitted = defect_row(); let mut skipped = defect_row(); - skipped.id = "core:finding:run-1:weak-modularity".to_owned(); + skipped.id = "core:finding:weak-modularity".to_owned(); skipped.entity_id = "core:subsystem:abcd".to_owned(); skipped.source_file_path = None; diff --git a/crates/loomweave-storage/migrations/0010_dedupe_findings_drop_run_scoped_ids.sql b/crates/loomweave-storage/migrations/0010_dedupe_findings_drop_run_scoped_ids.sql new file mode 100644 index 00000000..54d1804c --- /dev/null +++ b/crates/loomweave-storage/migrations/0010_dedupe_findings_drop_run_scoped_ids.sql @@ -0,0 +1,16 @@ +-- 0010: Clear run-scoped findings so the content-keyed finding id can take over. +-- +-- L1 fix (clarion-772ff358da / ADR-047): finding ids were `core:finding:{run_id}:…`, +-- so a fresh re-analyze minted a NEW row for the same logical finding (the upsert +-- only de-duped a `--resume` re-walk under the same run_id). Findings accumulated +-- across runs (255 -> 259 -> 263) and every re-analyze orphaned the prior row's +-- Filigree linkage. The new id is content-keyed (`core:finding:`), +-- so ON CONFLICT(id) now de-dupes across runs and preserves lifecycle. +-- +-- On an existing database the new content-keyed rows would land BESIDE the old +-- run-scoped ones (a one-time worse doubling that never self-cleans, since no +-- sweep matches the legacy id format). Findings are fully regenerable derived +-- data — the next `loomweave analyze` repopulates them with content-keyed ids — +-- so the clean fix is to drop the legacy rows here. (The store is itself a +-- regenerable cache per ADR-005 as reversed by C1/weft-d822a7de2d.) +DELETE FROM findings; diff --git a/crates/loomweave-storage/src/schema.rs b/crates/loomweave-storage/src/schema.rs index 1c474e56..91c21b3a 100644 --- a/crates/loomweave-storage/src/schema.rs +++ b/crates/loomweave-storage/src/schema.rs @@ -60,12 +60,17 @@ const MIGRATIONS: &[Migration] = &[ name: "0009_drop_fts_content_text", sql: include_str!("../migrations/0009_drop_fts_content_text.sql"), }, + Migration { + version: 10, + name: "0010_dedupe_findings_drop_run_scoped_ids", + sql: include_str!("../migrations/0010_dedupe_findings_drop_run_scoped_ids.sql"), + }, ]; /// Highest migration version known to this build. Mirrored into the /// `SQLite` `user_version` header (STO-02) so a future-built database is /// refused at open instead of silently corrupting state. -pub const CURRENT_SCHEMA_VERSION: u32 = 9; +pub const CURRENT_SCHEMA_VERSION: u32 = 10; const _CURRENT_SCHEMA_VERSION_MATCHES_LAST_MIGRATION: () = { // Compile-time check: `CURRENT_SCHEMA_VERSION` must equal the highest diff --git a/crates/loomweave-storage/src/writer.rs b/crates/loomweave-storage/src/writer.rs index 0a3a3721..bc1d22b0 100644 --- a/crates/loomweave-storage/src/writer.rs +++ b/crates/loomweave-storage/src/writer.rs @@ -883,16 +883,20 @@ fn insert_finding( /// path (REQ-ANALYZE-04 deletion findings, emitted after `CommitRun` via /// `query_time_write`) share this so the SQL has a single home. fn write_finding_row(conn: &Connection, finding: &FindingRecord) -> Result<()> { - // ON CONFLICT(id) DO UPDATE makes the finding path idempotent under - // `--resume`: a finding id embeds its run_id (`core:finding:{run_id}:…`), - // so cross-run ids never collide and a fresh run only ever INSERTs. A - // resume re-walks under the *same* run_id and re-generates the same ids; - // without the upsert it would fail on `UNIQUE constraint: findings.id`. - // The conflict clause refreshes analysis-derived columns from the re-walk - // but PRESERVES the lifecycle columns (`status`, `suppression_reason`, - // `filigree_issue_id`) and `created_at` — the same first-seen-preserving - // discipline `insert_entity` applies. (These lifecycle columns are never - // mutated locally today; preserving them keeps that invariant if they are.) + // ON CONFLICT(id) DO UPDATE makes the finding path idempotent across BOTH a + // `--resume` re-walk and a fresh re-analyze. A finding id is keyed on its + // CONTENT (`core:finding:`, e.g. the anchor entity + rule + + // evidence hash) and NOT on run_id (L1 fix, clarion-772ff358da / ADR-047): + // the same logical finding regenerates the same id every run, so the upsert + // refreshes it in place instead of inserting a duplicate. The run_id *column* + // updates to the latest run (`run_id = excluded.run_id`), so `findings_for_emit` + // (WHERE run_id = current) still returns exactly the reproduced set. The + // conflict clause refreshes analysis-derived columns from the re-walk but + // PRESERVES the lifecycle columns (`status`, `suppression_reason`, + // `filigree_issue_id`) and `created_at` — so a finding's Filigree linkage and + // suppression now SURVIVE re-analysis (a run_id-scoped id used to orphan them + // by minting a fresh row each run). Same first-seen-preserving discipline + // `insert_entity` applies. conn.execute( "INSERT INTO findings ( \ id, tool, tool_version, run_id, rule_id, kind, severity, confidence, \ diff --git a/crates/loomweave-storage/tests/schema_apply.rs b/crates/loomweave-storage/tests/schema_apply.rs index 5da82010..4d00010d 100644 --- a/crates/loomweave-storage/tests/schema_apply.rs +++ b/crates/loomweave-storage/tests/schema_apply.rs @@ -842,7 +842,7 @@ fn migrations_are_idempotent() { let tempdir = tempfile::tempdir().unwrap(); let mut conn = open_fresh(&tempdir); schema::apply_migrations(&mut conn).expect("second apply should be a no-op"); - assert_eq!(schema::applied_count(&conn).unwrap(), 9); + assert_eq!(schema::applied_count(&conn).unwrap(), 10); let tables_after = table_names(&conn); assert!(tables_after.contains(&"entities".to_owned())); } @@ -856,7 +856,7 @@ fn schema_migrations_records_each_applied_migration() { row.get(0) }) .unwrap(); - assert_eq!(count, 9); + assert_eq!(count, 10); let names: Vec = { let mut stmt = conn .prepare("SELECT name FROM schema_migrations ORDER BY version") @@ -876,6 +876,7 @@ fn schema_migrations_records_each_applied_migration() { "0007_run_analyzed_commit", "0008_run_owner_heartbeat", "0009_drop_fts_content_text", + "0010_dedupe_findings_drop_run_scoped_ids", ] ); } diff --git a/docs/loomweave/adr/ADR-047-content-keyed-finding-ids.md b/docs/loomweave/adr/ADR-047-content-keyed-finding-ids.md new file mode 100644 index 00000000..cb9f53b8 --- /dev/null +++ b/docs/loomweave/adr/ADR-047-content-keyed-finding-ids.md @@ -0,0 +1,112 @@ +# ADR-047: Content-keyed finding IDs (cross-run idempotent findings) + +**Status**: Accepted +**Date**: 2026-06-08 +**Deciders**: john@pgpl.net +**Context**: Weft dogfood-#2 surfaced that Loomweave's `findings` table grows on +every re-analyze of an unchanged tree (`255 → 259 → 263`), so an agent browsing +findings sees ever-multiplying duplicates (L1, weft-f506e5f845 / clarion-772ff358da). + +## Summary + +A finding's primary key changes from a **run-scoped** id +(`core:finding:{run_id}:`) to a **content-keyed** id +(`core:finding:`). The `ON CONFLICT(id) DO UPDATE` upsert in +`write_finding_row` now de-duplicates the same logical finding across *fresh* +runs — not just a `--resume` re-walk — and the `run_id` *column* updates to the +latest run. Migration `0010` clears legacy run-scoped finding rows (findings are +regenerable). The accepted trade-off: **findings are current-state, not a +per-run append-log.** + +## Context + +`write_finding_row` upserts `ON CONFLICT(id)`. Every finding id embedded its +`run_id`, with the *intent* (documented in the old code comment) that "cross-run +ids never collide and a fresh run only ever INSERTs" — preserving a per-run +history. The consequence in practice: + +- Each `loomweave analyze` mints a **new** run_id, so the same logical finding + (same anchor entity, rule, evidence) became a **new row** every run. The + `findings` table accumulated unboundedly. An agent calling a finding browser + (which does not filter by the current run) saw all historical copies. +- Worse, the lifecycle columns the upsert is careful to preserve + (`status`, `suppression_reason`, `filigree_issue_id`, `created_at`) were + **orphaned** every run: the fresh row started at `status='open'`, + `filigree_issue_id=NULL`. A finding promoted to a Filigree issue, or + suppressed, silently lost that linkage on the next analyze. + +Every discriminator was *already* content-derived — `entity-deleted:{entity_id}`, +`guidance-orphan:{guidance_id}:{deleted_entity_id}`, `secret:{blake3(entity,rule,evidence)}`, +`source-walk:{blake3(...)}`, `weak-modularity` (one per project), etc. — so the +`run_id` segment was the *only* thing making the id run-unique. + +## Decision + +1. **Drop `run_id` from the finding id** at every minting site (`analyze.rs` + ×12, `secret_scan/findings.rs`). The id is now `core:finding:`, + stable across runs for the same logical finding. +2. **The upsert is unchanged** and now does the right thing across runs: + `ON CONFLICT(id) DO UPDATE` refreshes analysis-derived columns + `run_id` + + `updated_at` to the latest run, while **preserving** `status`, + `suppression_reason`, `filigree_issue_id`, `created_at`. A finding's Filigree + linkage and suppression therefore **survive** re-analysis. +3. **`findings_for_emit(run_id)` is unchanged** (`WHERE run_id = ?1`). A + reproduced finding carries the current run_id (the upsert set it), so it is in + the emit set exactly as before; a finding that did *not* reproduce keeps its + prior run_id and falls out of the set — identical to today's behavior, and the + existing Filigree prune/soft-archive path handles its lifecycle. +4. **Migration `0010` clears legacy findings** (`DELETE FROM findings`). On an + existing database the new content-keyed rows would otherwise land *beside* the + orphaned run-scoped ones (a one-time worse doubling that no sweep matches). + Findings are fully regenerable derived data; the next `analyze` repopulates + them. (The store is itself a regenerable cache per ADR-005 as reversed by + C1/weft-d822a7de2d, so dropping derived rows is consistent.) + +### Resume idempotency is unaffected (in fact stronger) + +A content-keyed id is deterministic on the finding's content, so a `--resume` +re-walk (same inputs) regenerates the same id and upserts — exactly as the +run-scoped id did under the same-run_id resume path, now independent of run_id. + +## Alternatives Considered + +- **Keep run-scoped ids + a stale-finding sweep that deletes prior runs' + findings on commit.** Rejected as the primary fix: it would still orphan + lifecycle (the new run's rows start fresh) and is strictly more machinery than + making the id content-keyed. A prior-index-style sweep for findings whose code + was *fixed* (no longer reproduce) remains a useful **follow-on** (mirrors + entity diffing) and is tracked under clarion-772ff358da. +- **Change filigree dedup.** Not applicable: Loomweave's finding id is **never + sent on the wire** — the emit payload is path/rule_id/message/severity/line + + metadata, and Filigree computes its own fingerprint and assigns its own + `clarion-sf-*` id. The id scheme is purely Loomweave-internal. + +## Consequences + +### Positive +- The `findings` table no longer accumulates duplicates across re-analyses. +- A finding's `filigree_issue_id` and suppression `status` now survive + re-analysis instead of being orphaned every run. + +### Negative / accepted trade-off +- **Findings are current-state, not a per-run append-log.** The prior (commented) + intent of keeping each run's findings as distinct rows is reversed. Per-run + finding history is available via the `runs` table + emit records, not by + multiple rows per finding. + +### Neutral +- `run_id` remains a column on `findings` (last-seen run), still NOT NULL, still + the key `findings_for_emit` filters on. + +## Related Decisions + +- [ADR-005](./ADR-005-loomweave-dir-tracking.md) (as reversed by C1) — the store + is a regenerable cache, which is why migration `0010` may drop derived findings. +- [ADR-011](./ADR-011-writer-actor-concurrency.md) — `write_finding_row` runs on + the writer actor; the upsert semantics live there. + +## References + +- clarion-772ff358da — the L1 implementation issue (Part A here; Part B = a + project-wide finding browser / `has_findings` filter). +- weft-f506e5f845 — the Weft dogfood-#2 residual-tail campaign issue. diff --git a/docs/loomweave/adr/README.md b/docs/loomweave/adr/README.md index f8200b7e..a39109f4 100644 --- a/docs/loomweave/adr/README.md +++ b/docs/loomweave/adr/README.md @@ -47,6 +47,7 @@ This folder is the canonical home for authored Loomweave architecture decision r | [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) | Read-API ephemeral port publication — `.loomweave/ephemeral.port` as a normative cross-product file contract (loopback-only, port-only, atomic) + consume-time resolution precedence (explicit > file > config > none), per-project deterministic port, installer stops pinning 9111; relates to ADR-034 | Accepted; port path relocated by ADR-046 | | [ADR-045](./ADR-045-worktree-source-staleness.md) | Worktree-source staleness — `Staleness::StaleWorktree` + `worktree_dirty` via hardened, hash-free `git ls-files --others` scoped to ingested extensions; closes the unwatched-top-level-dir blind spot without `git status`'s filter-RCE vector; builds on ADR-013/021 untrusted-corpus posture | Accepted | | [ADR-046](./ADR-046-weft-store-consolidation.md) | Weft store consolidation — store moves `.loomweave/` → `.weft/loomweave/` (clean break, single `loomweave_core::store` helper); operator-private `weft.toml:[loomweave].store_dir` override (read-only, fail-soft C-9c); Filigree sibling resolution prefers `.weft/filigree/` and tolerates legacy `.filigree/`; amends ADR-005/040/044 | Accepted | +| [ADR-047](./ADR-047-content-keyed-finding-ids.md) | Content-keyed finding IDs — finding id drops `{run_id}` (`core:finding:`) so `ON CONFLICT(id)` de-dupes findings across fresh re-analyses (not just `--resume`) and `filigree_issue_id`/`status` survive re-analysis; migration `0010` clears legacy run-scoped rows; accepted trade-off = findings are current-state, not a per-run append-log (L1, weft-f506e5f845) | Accepted | ## Backlog still tracked in the detailed design From cb49008c717ce26b911f589e906c78755c22457d Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 08:47:59 +1000 Subject: [PATCH 46/60] feat(mcp): whole-project finding browser + has_findings filter; honest worktree_dirty note (L1, N5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit L1 Part B (clarion-772ff358da) — close the count-without-list gap: - New `project_finding_list` MCP tool returns EVERY finding across the project with NO entity id required; each row carries its anchoring entity { id, sei, file, line } + tool/rule_id/kind/severity/status. page.total is counted off the bare `findings` table (byte-identical to the snapshot's finding_count query), so an unfiltered list reconciles with project_status_get's finding count by construction; the entity JOIN only enriches the page rows. Honest-empty (0 findings -> []). - `entity_wardline_list` (find_by_wardline) accepts `has_findings: true` to page only the taint-fact entities that ALSO carry a finding, instead of every blob. Declared in a new `wardline_facet_schema()` so the additionalProperties:false schema actually permits the param. N5 — make `worktree_dirty` honest at the consumer surface: - project_status_get now emits `worktree_dirty_note` on EVERY path (true/false/null) disclosing the field measures un-indexed UNTRACKED source, not the git working-tree state, so a `false` is not "git clean". - Decision: keep the field name (no rename -> no dangling legis signing gate) and scope detection to untracked-only. Broadening to modified tracked source via `git diff`/`status --porcelain` would hash working-tree content, reintroducing the corpus-controlled code-exec vector hardened_git avoids; modified *indexed* files already surface via `staleness` (-> stale). Documented in the field doc + the consumer note, which tells a freshness/signing gate to key on `staleness == fresh`. Tools surface 39 -> 40; tool-count/positional tests and current-surface docs (README, web concepts/reference) updated. SEI scheme untouched; read-only surface (no new writes). Co-Authored-By: Claude Opus 4.8 (1M context) --- README.md | 2 +- crates/loomweave-mcp/src/catalogue/faceted.rs | 35 +++- .../loomweave-mcp/src/catalogue/inspection.rs | 154 +++++++++++++++ crates/loomweave-mcp/src/lib.rs | 43 ++++- crates/loomweave-mcp/src/snapshot.rs | 11 ++ crates/loomweave-mcp/src/tools/status.rs | 23 +++ crates/loomweave-mcp/tests/catalogue_tools.rs | 179 ++++++++++++++++++ crates/loomweave-mcp/tests/storage_tools.rs | 28 +++ web/docs/concepts/mcp-tools.md | 3 +- web/docs/reference/mcp-tools.md | 12 +- 10 files changed, 481 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index d3d97ae9..9083c0b9 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ and trust-topology tools. ## What it does today -`loomweave serve` exposes a 39-tool MCP surface that a consult-mode agent calls +`loomweave serve` exposes a 40-tool MCP surface that a consult-mode agent calls instead of grep-and-read. The core tool families are: | Family | Examples | diff --git a/crates/loomweave-mcp/src/catalogue/faceted.rs b/crates/loomweave-mcp/src/catalogue/faceted.rs index 52235cd6..8fa9b84f 100644 --- a/crates/loomweave-mcp/src/catalogue/faceted.rs +++ b/crates/loomweave-mcp/src/catalogue/faceted.rs @@ -136,6 +136,7 @@ impl ServerState { ) -> std::result::Result { let tier = optional_facet(arguments, "tier")?; let group = optional_facet(arguments, "group")?; + let has_findings = optional_bool(arguments, "has_findings")?; let scope = RawScope::parse(arguments)?; let page = Page::parse(arguments, FACET_PAGE_DEFAULT, FACET_PAGE_MAX)?; let project_root = self.project_root.clone(); @@ -146,12 +147,32 @@ impl ServerState { let (candidates, scan_truncated) = entities_with_wardline_facts(conn, FACET_SCAN_CAP)?; + // When `has_findings` is set, restrict to entities that carry at + // least one finding — so an agent pages the fact-carrying-AND-flawed + // entities, not every taint-fact blob (L1 complement). One bounded + // query builds the set; absent the flag the filter is a no-op. + let finding_anchor_ids: Option> = if has_findings + { + let mut set = std::collections::HashSet::new(); + let mut stmt = conn.prepare("SELECT DISTINCT entity_id FROM findings")?; + let mut rows = stmt.query([])?; + while let Some(row) = rows.next()? { + set.insert(row.get::<_, String>(0)?); + } + Some(set) + } else { + None + }; + // Scope-filter first, then fetch the (opaque) blobs only for the // survivors — a narrow scope avoids reading every candidate blob. let in_scope: Vec = candidates .into_iter() .filter(|e| { filter.contains(&e.id, e.source_file_path.as_deref(), &project_root) + && finding_anchor_ids + .as_ref() + .is_none_or(|ids| ids.contains(&e.id)) }) .collect(); let ids: Vec = in_scope.iter().map(|e| e.id.clone()).collect(); @@ -200,7 +221,7 @@ impl ServerState { let mut response = json!({ "entities": entities, - "facet": { "tier": tier, "group": group }, + "facet": { "tier": tier, "group": group, "has_findings": has_findings }, "page": { "total": total, "offset": page.offset, @@ -259,6 +280,18 @@ fn optional_facet( } } +/// Parse an optional boolean argument (`has_findings`). Absent / null → `false`. +fn optional_bool( + arguments: &serde_json::Map, + field: &str, +) -> std::result::Result { + match arguments.get(field) { + None | Some(Value::Null) => Ok(false), + Some(Value::Bool(value)) => Ok(*value), + Some(_) => Err(ParamError::new(&format!("{field} must be a boolean"))), + } +} + /// Best-effort match of a wanted facet value against a field on the opaque /// Wardline blob. `None` wanted → always matches (no filter). Comparison is by /// stringified value so `2` matches `"2"`. diff --git a/crates/loomweave-mcp/src/catalogue/inspection.rs b/crates/loomweave-mcp/src/catalogue/inspection.rs index 70848d8b..eaed3823 100644 --- a/crates/loomweave-mcp/src/catalogue/inspection.rs +++ b/crates/loomweave-mcp/src/catalogue/inspection.rs @@ -255,6 +255,101 @@ impl ServerState { Ok(flatten_storage_envelope_result(result)) } + /// `project_finding_list(filter?)` — every finding across the WHOLE project, + /// no entity id required, so an agent can go from `project_status`'s + /// `findings: N` count straight to the N findings (L1). Each row carries its + /// anchoring entity (id, `sei`, file, line) plus the finding's + /// `tool/rule_id/kind/severity/status/message/confidence/created_at`. Optionally + /// filtered by `filter.kind`/`severity`/`status` (same vocabulary as + /// `findings_for`). Bounded (limit/offset, total/truncated). With no filter + /// the page `total` reconciles with `project_status`'s finding count: it is + /// computed from the bare `findings` table (byte-identical to that count's + /// query), the entity join only enriches the returned rows. Honest-empty: + /// a project with no findings returns an empty list, not an error. Stateless. + pub(crate) async fn tool_project_findings( + &self, + arguments: &serde_json::Map, + ) -> std::result::Result { + let filter = FindingFilter::parse(arguments)?; + let page = Page::parse(arguments, FINDINGS_PAGE_DEFAULT, FINDINGS_PAGE_MAX)?; + let result = self + .readers + .with_reader(move |conn| { + let kind = filter.kind.as_deref(); + let severity = filter.severity.as_deref(); + let status = filter.status.as_deref(); + + // Reconciliation contract (L1 acceptance #2): count off the bare + // `findings` table with the SAME predicate the snapshot's + // `finding_count()` uses, so an unfiltered total equals + // project_status's finding count. The entity join below only + // enriches the page rows — it never drives the total. + let total: usize = conn.query_row( + "SELECT COUNT(*) \ + FROM findings \ + WHERE (?1 IS NULL OR kind = ?1) \ + AND (?2 IS NULL OR severity = ?2) \ + AND (?3 IS NULL OR status = ?3)", + rusqlite::params![kind, severity, status], + |row| { + let count: i64 = row.get(0)?; + Ok(usize::try_from(count).unwrap_or(usize::MAX)) + }, + )?; + + // Page rows, joined to the anchoring entity for file:line. The FK + // (`findings.entity_id REFERENCES entities(id) ON DELETE CASCADE`) + // guarantees every finding has a live anchor, so this inner join + // never drops a counted row. + let mut stmt = conn.prepare( + "SELECT f.id, f.tool, f.rule_id, f.kind, f.severity, f.status, \ + f.message, f.confidence, f.created_at, \ + f.entity_id, e.source_file_path, e.source_line_start \ + FROM findings f \ + JOIN entities e ON e.id = f.entity_id \ + WHERE (?1 IS NULL OR f.kind = ?1) \ + AND (?2 IS NULL OR f.severity = ?2) \ + AND (?3 IS NULL OR f.status = ?3) \ + ORDER BY f.created_at DESC, f.id \ + LIMIT ?4 OFFSET ?5", + )?; + let limit = i64::try_from(page.limit).unwrap_or(i64::MAX); + let offset = i64::try_from(page.offset).unwrap_or(i64::MAX); + let mut rows = + stmt.query(rusqlite::params![kind, severity, status, limit, offset])?; + let mut page_rows: Vec = Vec::new(); + while let Some(row) = rows.next()? { + page_rows.push(ProjectFindingRow::from_row(row)?); + } + + let returned = page_rows.len(); + // Resolve each anchor's SEI while a reader connection is in scope. + let findings: Vec = page_rows + .iter() + .map(|row| { + let sei = sei_for_locator(conn, &row.entity_id).ok().flatten(); + row.to_json(sei.as_deref()) + }) + .collect(); + let meta = json!({ + "total": total, + "offset": page.offset, + "limit": page.limit, + "returned": returned, + "truncated": page.offset.saturating_add(returned) < total, + }); + + Ok(success_envelope(json!({ + "findings": findings, + "filter": filter.to_json(), + "page": meta, + "scan_truncated": false, + }))) + }) + .await; + Ok(flatten_storage_envelope_result(result)) + } + /// `wardline_for(entity_id)` — the Wardline metadata recorded for the entity /// (declared tier, groups, boundary contracts), returned **verbatim**: the /// `wardline_json` blob is opaque to Loomweave (federation opacity contract). @@ -481,3 +576,62 @@ impl FindingRow { }) } } + +/// A finding row for the project-wide list, carrying its anchoring entity's +/// locator + `file:line` (the SEI is resolved at render time). No `related_entities` +/// — the project list answers "where are the N findings", and each row's primary +/// anchor is the entity it hangs on. +#[derive(Clone)] +struct ProjectFindingRow { + id: String, + tool: Option, + rule_id: Option, + kind: String, + severity: String, + status: String, + message: Option, + confidence: Option, + created_at: Option, + entity_id: String, + entity_file: Option, + entity_line: Option, +} + +impl ProjectFindingRow { + fn from_row(row: &rusqlite::Row) -> rusqlite::Result { + Ok(Self { + id: row.get(0)?, + tool: row.get(1)?, + rule_id: row.get(2)?, + kind: row.get(3)?, + severity: row.get(4)?, + status: row.get(5)?, + message: row.get(6)?, + confidence: row.get(7)?, + created_at: row.get(8)?, + entity_id: row.get(9)?, + entity_file: row.get(10)?, + entity_line: row.get(11)?, + }) + } + + fn to_json(&self, sei: Option<&str>) -> Value { + json!({ + "id": self.id, + "tool": self.tool, + "rule_id": self.rule_id, + "kind": self.kind, + "severity": self.severity, + "status": self.status, + "message": self.message, + "confidence": self.confidence, + "created_at": self.created_at, + "entity": { + "id": self.entity_id, + "sei": sei, + "file": self.entity_file, + "line": self.entity_line, + }, + }) + } +} diff --git a/crates/loomweave-mcp/src/lib.rs b/crates/loomweave-mcp/src/lib.rs index 6a7ced3a..3f497f64 100644 --- a/crates/loomweave-mcp/src/lib.rs +++ b/crates/loomweave-mcp/src/lib.rs @@ -567,8 +567,8 @@ pub fn list_tools() -> Vec { }, ToolDefinition { name: "entity_wardline_list", - description: "Return entities carrying a Wardline taint fact, optionally filtered by `tier` and/or `group`, within an optional `scope` (entity id → descendants, OR path glob; omitted → whole project). The Wardline blob is opaque to Loomweave: tier/group filtering is best-effort against a top-level field on the blob and honest-empty when absent. Each entity carries its `wardline` blob verbatim plus its `sei`. Bounded (limit/offset, page.total/truncated). Facts are populated via Filigree Flow-B. No LLM call.", - input_schema: scope_facet_schema(&[("tier", false), ("group", false)]), + description: "Return entities carrying a Wardline taint fact, optionally filtered by `tier` and/or `group`, within an optional `scope` (entity id → descendants, OR path glob; omitted → whole project). Pass `has_findings: true` to return only entities that ALSO carry at least one finding — page just the fact-carrying-and-flawed entities instead of every taint-fact blob. The Wardline blob is opaque to Loomweave: tier/group filtering is best-effort against a top-level field on the blob and honest-empty when absent. Each entity carries its `wardline` blob verbatim plus its `sei`. Bounded (limit/offset, page.total/truncated). Facts are populated via Filigree Flow-B. No LLM call.", + input_schema: wardline_facet_schema(), }, ToolDefinition { name: "module_circular_import_list", @@ -672,6 +672,27 @@ pub fn list_tools() -> Vec { "additionalProperties": false }), }, + ToolDefinition { + name: "project_finding_list", + description: "List findings across the WHOLE project — NO entity id required — so an agent can go from project_status_get's `findings: N` count straight to the N findings (the count-without-list gap). Each row carries its anchoring entity { id, sei, file, line } plus the finding's tool/rule_id/kind/severity/status/message/confidence/created_at. Optionally filtered by `filter.kind` (defect/fact/classification/metric/suggestion), `filter.severity` (INFO/WARN/ERROR/CRITICAL/NONE), and `filter.status` (open/acknowledged/suppressed/promoted_to_issue). Bounded (limit default 50, max 200; page.total/returned/truncated). With NO filter, page.total reconciles with project_status_get's finding count (both count the bare findings table). A project with no findings returns an empty list, not an error. No LLM call.", + input_schema: json!({ + "type": "object", + "properties": { + "filter": { + "type": "object", + "properties": { + "kind": {"type": "string"}, + "severity": {"type": "string"}, + "status": {"type": "string"} + }, + "additionalProperties": false + }, + "limit": {"type": "integer", "minimum": 1, "maximum": 200}, + "offset": {"type": "integer", "minimum": 0} + }, + "additionalProperties": false + }), + }, ] } @@ -752,6 +773,17 @@ fn scope_facet_schema(facets: &[(&str, bool)]) -> Value { }) } +/// Input schema for `entity_wardline_list`: the faceted tier/group schema plus a +/// `has_findings` boolean. Declared explicitly because the base schema sets +/// `additionalProperties: false`, which would otherwise reject the param. +fn wardline_facet_schema() -> Value { + let mut schema = scope_facet_schema(&[("tier", false), ("group", false)]); + if let Some(properties) = schema.get_mut("properties").and_then(Value::as_object_mut) { + properties.insert("has_findings".to_owned(), json!({"type": "boolean"})); + } + schema +} + fn confidence_schema() -> Value { json!({ "type": "string", @@ -1386,6 +1418,10 @@ impl ServerState { Ok(value) => value, Err(response) => return response.to_json_rpc(id), }, + "project_finding_list" => match self.tool_project_findings(arguments).await { + Ok(value) => value, + Err(response) => return response.to_json_rpc(id), + }, _ => unreachable!("known tools checked above"), }; @@ -4951,7 +4987,7 @@ mod tests { fn tools_list_exposes_exact_docstrings() { let tools = list_tools(); - assert_eq!(tools.len(), 39); + assert_eq!(tools.len(), 40); assert_eq!(tools[0].name, "entity_at"); assert_eq!( tools[0].description, @@ -5059,6 +5095,7 @@ mod tests { assert_eq!(tools[36].name, "entity_recent_change_list"); assert_eq!(tools[37].name, "entity_dead_list"); assert_eq!(tools[38].name, "entity_semantic_search_list"); + assert_eq!(tools[39].name, "project_finding_list"); } #[test] diff --git a/crates/loomweave-mcp/src/snapshot.rs b/crates/loomweave-mcp/src/snapshot.rs index 52138095..841b5777 100644 --- a/crates/loomweave-mcp/src/snapshot.rs +++ b/crates/loomweave-mcp/src/snapshot.rs @@ -128,6 +128,17 @@ pub struct ProjectSnapshot { /// extensions, so an untracked non-source file (a scratch `notes.txt`) never /// flags it, and the untrusted-corpus posture is preserved (no working-tree /// hashing — see [`loomweave_core::list_untracked_files`]). + /// + /// **Scope (N5): UNTRACKED source only.** This flag deliberately does NOT + /// cover a MODIFIED already-tracked source file. Detecting modified-tracked + /// state (`git diff` / `git status --porcelain`) hashes working-tree content, + /// reintroducing exactly the corpus-controlled code-exec vector the hardening + /// avoids by using `ls-files --others` (which only lists paths) — so it is + /// declined. A modified *indexed* file is already caught by the mtime + /// modification scan ([`Staleness::Stale`]), so the staleness verdict reflects + /// mid-dev edits even though this boolean does not. The consumer-facing + /// `project_status_get.worktree_dirty_note` states this so a signing/freshness + /// gate keys on `staleness`, not on this flag alone. worktree_dirty: Option, /// `true` when this snapshot was produced from a *failure* rather than a /// healthy read: at least one backing SQL query failed unexpectedly and was diff --git a/crates/loomweave-mcp/src/tools/status.rs b/crates/loomweave-mcp/src/tools/status.rs index f2256bfd..9cb32942 100644 --- a/crates/loomweave-mcp/src/tools/status.rs +++ b/crates/loomweave-mcp/src/tools/status.rs @@ -19,6 +19,24 @@ use crate::{ timestamp_day_index, tool_error_envelope, verified_source_excerpt, }; +/// Consumer-visible scope note for `worktree_dirty`, emitted on every path (N5). +/// `worktree_dirty` measures UN-INDEXED UNTRACKED source, not the git +/// working-tree state, so a bare `false`/`null` must not be read as "git clean". +/// It is deliberately scoped to untracked source: a MODIFIED already-tracked +/// source file does not set it — broadening detection would require working-tree +/// hashing, which the untrusted-corpus posture forbids (see +/// `loomweave_core::list_untracked_files` / `hardened_git`). Such edits surface +/// via `staleness` (→ `stale`) instead, so a freshness/signing gate must require +/// `staleness == fresh`, not `worktree_dirty == false` alone. +const WORKTREE_DIRTY_NOTE: &str = "`worktree_dirty` reports UN-INDEXED UNTRACKED source files (an ignore-aware \ + `git ls-files --others` scoped to ingested extensions), NOT the git working-tree \ + state: a `false`/`null` value does NOT mean the git tree is clean. It is scoped to \ + UNTRACKED source only — a MODIFIED already-tracked source file does not set this flag \ + (broadening it would require working-tree hashing, declined under the untrusted-corpus \ + posture); such edits surface via `staleness` (→ `stale`) instead. A freshness or \ + signing gate must require `staleness == fresh`, not `worktree_dirty == false` alone. \ + `null` = not a git work tree, git unavailable, or nothing ingested to scope against."; + impl ServerState { pub(crate) async fn tool_source_for_entity( &self, @@ -309,6 +327,11 @@ impl ServerState { "staleness": serde_json::to_value(snapshot.staleness()).unwrap_or(Value::Null), "staleness_note": staleness_note, "worktree_dirty": snapshot.worktree_dirty(), + // N5: `worktree_dirty` is a bare boolean a consumer (and legis, which + // gates signing on it) can misread as "git clean" on the false/null + // path. Disclose its scope on EVERY path — true, false, and null — so + // the meaning is readable without reading loomweave source. + "worktree_dirty_note": WORKTREE_DIRTY_NOTE, "scan_truncated": snapshot.scan_truncated(), "last_analyzed_at": snapshot.last_analyzed_at(), "git_sha": analyzed_git_sha, diff --git a/crates/loomweave-mcp/tests/catalogue_tools.rs b/crates/loomweave-mcp/tests/catalogue_tools.rs index aeb4a1a2..0a22b8c3 100644 --- a/crates/loomweave-mcp/tests/catalogue_tools.rs +++ b/crates/loomweave-mcp/tests/catalogue_tools.rs @@ -373,6 +373,139 @@ async fn findings_for_empty_entity_is_not_an_error() { assert!(env["result"]["findings"].as_array().unwrap().is_empty()); } +// ---- project_finding_list (L1: whole-project finding browser) ----------- + +#[tokio::test] +async fn project_finding_list_total_reconciles_with_project_status_finding_count() { + // The L1 acceptance: an agent must be able to go from project_status's + // `findings: N` straight to the N findings — so the project-wide list's + // page.total must reconcile with project_status_get's finding count. + let (project, db, conn) = open_project(); + insert_entity(&conn, "python:function:a", "function", "a.py", Some((1, 2))); + insert_entity(&conn, "python:function:b", "function", "b.py", Some((3, 4))); + insert_finding(&conn, "f-1", "python:function:a", "defect", "WARN", "open"); + insert_finding(&conn, "f-2", "python:function:a", "defect", "ERROR", "open"); + insert_finding(&conn, "f-3", "python:function:b", "fact", "INFO", "open"); + drop(conn); + let state = state_for(project.path(), &db); + + let status = call_tool(&state, "project_status", json!({})).await; + let count = status["result"]["counts"]["findings"].as_i64().unwrap(); + assert_eq!(count, 3, "{status}"); + + let env = call_tool(&state, "project_finding_list", json!({})).await; + assert_eq!(env["ok"], true, "{env}"); + assert_eq!( + env["result"]["page"]["total"].as_i64().unwrap(), + count, + "project_finding_list total must reconcile with project_status finding count: {env}" + ); + assert_eq!( + env["result"]["findings"].as_array().unwrap().len(), + 3, + "{env}" + ); +} + +#[tokio::test] +async fn project_finding_list_honest_empty_when_no_findings() { + // Honest-empty: a project with 0 findings returns an empty list, not an error. + let (project, db, conn) = open_project(); + insert_entity(&conn, "python:function:a", "function", "a.py", Some((1, 2))); + drop(conn); + let state = state_for(project.path(), &db); + + let env = call_tool(&state, "project_finding_list", json!({})).await; + assert_eq!(env["ok"], true, "{env}"); + assert_eq!(env["result"]["page"]["total"], 0, "{env}"); + assert!( + env["result"]["findings"].as_array().unwrap().is_empty(), + "{env}" + ); +} + +#[tokio::test] +async fn project_finding_list_rows_carry_entity_sei_file_line_severity_rule() { + // Each finding carries its anchoring entity SEI + file:line + severity/rule — + // with no entity id supplied by the caller. + let (project, db, conn) = open_project(); + insert_entity( + &conn, + "python:function:m.f", + "function", + "m.py", + Some((4, 9)), + ); + insert_alive_sei(&conn, "loomweave:eid:abc123", "python:function:m.f"); + insert_finding( + &conn, + "f-1", + "python:function:m.f", + "defect", + "WARN", + "open", + ); + drop(conn); + let state = state_for(project.path(), &db); + + let env = call_tool(&state, "project_finding_list", json!({})).await; + assert_eq!(env["ok"], true, "{env}"); + let row = &env["result"]["findings"][0]; + assert_eq!(row["rule_id"], "R1", "{env}"); + assert_eq!(row["severity"], "WARN", "{env}"); + assert_eq!(row["entity"]["id"], "python:function:m.f", "{env}"); + assert_eq!(row["entity"]["sei"], "loomweave:eid:abc123", "{env}"); + assert_eq!(row["entity"]["file"], "m.py", "{env}"); + assert_eq!(row["entity"]["line"], 4, "{env}"); +} + +#[tokio::test] +async fn project_finding_list_filters_and_paginates() { + let (project, db, conn) = open_project(); + insert_entity(&conn, "python:function:a", "function", "a.py", Some((1, 2))); + for i in 0..5 { + insert_finding( + &conn, + &format!("f-{i}"), + "python:function:a", + "defect", + "WARN", + "open", + ); + } + insert_finding( + &conn, + "z-crit", + "python:function:a", + "defect", + "CRITICAL", + "open", + ); + drop(conn); + let state = state_for(project.path(), &db); + + // Filter: only the CRITICAL one. + let env = call_tool( + &state, + "project_finding_list", + json!({"filter": {"severity": "CRITICAL"}}), + ) + .await; + assert_eq!(env["result"]["page"]["total"], 1, "{env}"); + assert_eq!(env["result"]["findings"][0]["id"], "z-crit", "{env}"); + + // Paginate over the full set (6 findings). + let env = call_tool( + &state, + "project_finding_list", + json!({"limit": 2, "offset": 0}), + ) + .await; + assert_eq!(env["result"]["page"]["total"], 6, "{env}"); + assert_eq!(env["result"]["page"]["returned"], 2, "{env}"); + assert_eq!(env["result"]["page"]["truncated"], true, "{env}"); +} + // ---- guidance_for ------------------------------------------------------- #[tokio::test] @@ -732,6 +865,52 @@ async fn find_by_wardline_honest_empty_when_no_facts() { assert_eq!(env["result"]["signal"]["available"], false); } +#[tokio::test] +async fn find_by_wardline_has_findings_filter_restricts_to_fact_carrying_entities() { + // L1 complement: page only the wardline entities that actually carry + // findings, instead of every taint-fact-bearing entity. + let (project, db, conn) = open_project(); + insert_entity(&conn, "python:function:a", "function", "a.py", Some((1, 2))); + insert_entity(&conn, "python:function:b", "function", "b.py", Some((1, 2))); + insert_taint_fact(&conn, "python:function:a", r#"{"tier":"exact"}"#); + insert_taint_fact(&conn, "python:function:b", r#"{"tier":"exact"}"#); + // Only `a` carries a finding. + insert_finding(&conn, "f-1", "python:function:a", "defect", "WARN", "open"); + drop(conn); + let state = state_for(project.path(), &db); + + // Unfiltered: both taint-fact entities. + let env = call_tool(&state, "find_by_wardline", json!({})).await; + assert_eq!(env["result"]["page"]["total"], 2, "{env}"); + + // has_findings: true → only `a`. + let env = call_tool(&state, "find_by_wardline", json!({"has_findings": true})).await; + assert_eq!(env["ok"], true, "{env}"); + assert_eq!(env["result"]["page"]["total"], 1, "{env}"); + assert_eq!( + env["result"]["entities"][0]["id"], "python:function:a", + "{env}" + ); + assert_eq!(env["result"]["facet"]["has_findings"], true, "{env}"); +} + +#[test] +fn entity_wardline_list_schema_declares_has_findings() { + // additionalProperties:false on the advertised schema would reject an + // undeclared param, so has_findings must be declared for clients to send it. + let tools = list_tools(); + let tool = tools + .iter() + .find(|t| t.name == "entity_wardline_list") + .expect("entity_wardline_list tool definition"); + assert_eq!( + tool.input_schema["properties"]["has_findings"], + json!({"type": "boolean"}), + "{:#}", + tool.input_schema + ); +} + // ---- graph shortcuts ---------------------------------------------------- fn insert_edge(conn: &Connection, kind: &str, from: &str, to: &str, confidence: &str) { diff --git a/crates/loomweave-mcp/tests/storage_tools.rs b/crates/loomweave-mcp/tests/storage_tools.rs index 563cc089..8b5c6f1f 100644 --- a/crates/loomweave-mcp/tests/storage_tools.rs +++ b/crates/loomweave-mcp/tests/storage_tools.rs @@ -4873,6 +4873,34 @@ async fn project_status_reports_counts_latest_run_and_plugins() { assert_eq!(result["filigree"], Value::Null); } +#[tokio::test] +async fn project_status_emits_worktree_dirty_scope_note_on_every_path() { + // N5: `worktree_dirty` is a bare boolean an agent (and legis, which gates + // signing on it) reads as "git clean" on the false/null path. Emit a + // consumer-visible scope note on EVERY path so the field's meaning — + // un-indexed UNTRACKED source, not the git working-tree state — is readable + // WITHOUT reading loomweave source. Here the project is not a git work tree, + // so worktree_dirty is null, the path the note must still cover. + let (project, db_path) = open_project(); + let state = state_for(project.path(), &db_path); + + let envelope = call_tool(&state, "project_status", json!({})).await; + assert_eq!(envelope["ok"], true, "{envelope}"); + let note = envelope["result"]["worktree_dirty_note"] + .as_str() + .expect("worktree_dirty_note must be present on the null/false path"); + // The note must disclose that the field is NOT the git working-tree state and + // that it is scoped to untracked source (modified tracked source surfaces via + // staleness), so a signing gate doesn't read false as "git clean". + let lower = note.to_lowercase(); + assert!(lower.contains("untracked"), "note: {note}"); + assert!(lower.contains("staleness"), "note: {note}"); + assert!( + lower.contains("not"), + "note must disclose it's not git-clean: {note}" + ); +} + #[tokio::test] async fn project_status_fresh_carries_staleness_note_caveat() { // The named tool an agent reads directly must disclose what "fresh" omits — diff --git a/web/docs/concepts/mcp-tools.md b/web/docs/concepts/mcp-tools.md index 6a61032b..7008fc88 100644 --- a/web/docs/concepts/mcp-tools.md +++ b/web/docs/concepts/mcp-tools.md @@ -25,7 +25,7 @@ trust it. ## Core tool families -Loomweave exposes a 39-tool MCP surface. Start with the navigation and briefing +Loomweave exposes a 40-tool MCP surface. Start with the navigation and briefing tools, then reach for catalogue shortcuts when you need a targeted structural query: @@ -42,6 +42,7 @@ query: | `source_for_entity(id)` | "Show the indexed source span and context." | | `orientation_pack(id or file/line)` | "Give me the entity, context, neighbors, paths, issues, and freshness in one packet." | | `guidance_for(id)` | "Which guidance sheets apply to this entity?" | +| `project_finding_list(filter?)` | "List every finding across the project (no entity id), each with its anchoring entity + file:line." | | `find_dead_code(scope?)` / `search_semantic(query)` | "Run advanced reachability or semantic-search queries when their inputs are available." | See the [MCP tool reference](../reference/mcp-tools.md) for parameters and the diff --git a/web/docs/reference/mcp-tools.md b/web/docs/reference/mcp-tools.md index ff74d7ba..59b46a61 100644 --- a/web/docs/reference/mcp-tools.md +++ b/web/docs/reference/mcp-tools.md @@ -1,7 +1,7 @@ # MCP tool reference The tools below are the core consult tools served by `loomweave serve` over the -MCP stdio transport. The live 1.2.x surface exposes 39 tools, including +MCP stdio transport. The live 1.2.x surface exposes 40 tools, including navigation, briefing, source inspection, guidance/finding enrichment, analyze lifecycle, freshness, faceted search, and structural shortcuts. Connect an MCP client and read `tools/list` for the complete, current catalogue. @@ -94,10 +94,16 @@ Use `tools/list` for exact schemas. The remaining tool families include: - Source and orientation: `source_for_entity`, `call_sites`, `orientation_pack`, `project_status`, `summary_preview_cost`. - Guidance and findings: `guidance_for`, `propose_guidance`, - `promote_guidance`, `findings_for`, `wardline_for`. + `promote_guidance`, `findings_for`, `wardline_for`, and + `project_finding_list` — the whole-project finding browser: every finding + across the project (no entity id required), each carrying its anchoring entity + SEI + `file:line` + severity/rule, paginated. Its unfiltered `page.total` + reconciles with `project_status`'s finding count. - Analyze and freshness: `analyze_start`, `analyze_status`, `analyze_cancel`, `index_diff`. -- Facets and shortcuts: `find_by_tag`, `find_by_kind`, `find_by_wardline`, +- Facets and shortcuts: `find_by_tag`, `find_by_kind`, `find_by_wardline` + (accepts `has_findings: true` to page only the taint-fact entities that also + carry a finding), `find_circular_imports`, `find_coupling_hotspots`, `find_entry_points`, `find_http_routes`, `find_data_models`, `find_tests`, `find_deprecations`, `find_todos`, `what_tests_this`, `high_churn`, `recently_changed`, From 982b463c8ba79f28541e762189a66c6dfcf6ab15 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 10:59:30 +1000 Subject: [PATCH 47/60] =?UTF-8?q?feat(findings):=20stale-finding=20sweep?= =?UTF-8?q?=20=E2=80=94=20retire=20findings=20whose=20code=20no=20longer?= =?UTF-8?q?=20reproduces=20(clarion-87c1eba2bd,=20ADR-048)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a clean full analyze, DELETE open, Filigree-unlinked findings whose run_id != the current run — the prior-index-style finding diff ADR-047 deferred. Reuses ADR-047's run_id signal (a reproduced finding upserts to the current run_id; a vanished one keeps its prior one). Closes the gap where a finding whose code was fixed (or deleted — `entities` is cumulative, so the findings→entities cascade never fires) lingered forever and the whole-project finding count only ever grew. Lifecycle preserved: findings carrying a filigree_issue_id or a non-`open` status (acknowledged/suppressed/promoted_to_issue) are operator decisions owned by the Filigree unseen/soft-archive path, never this local sweep — the predicate (filigree_issue_id IS NULL) is disjoint from that set. Gated to a clean full pass so `run_id <> current` unambiguously means "the run walked this finding's file and stopped reproducing it": !resume && skipped_files == 0 && source_walk_skipped_entries == 0 && !no_sei The walk-error clause closes a hole an adversarial review found: a single source-walk error (IO/permission/path-jail) leaves files unread yet reaches Completed with skipped_files == 0, which would otherwise retire a whole unwalked subtree's still-reproducing findings. - storage: `findings::sweep_stale_findings` + `WriterCmd::SweepStaleFindings` (query-time write, post-CommitRun, best-effort/enrich-only). - cli: sweep call site last in the analyze `Completed` arm, after every during-run and post-commit finding pass. - ADR-048 + README index row. - Tests: storage unit matrix (lifecycle exemptions) + writer-actor round-trip; two CLI integration tests that fail if the sweep or the skipped_files gate clause is removed (positive retirement + incremental-skip no-op). Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/analyze.rs | 52 ++++ crates/loomweave-cli/tests/analyze.rs | 252 ++++++++++++++++++ crates/loomweave-storage/src/commands.rs | 15 ++ crates/loomweave-storage/src/findings.rs | 221 +++++++++++++++ crates/loomweave-storage/src/lib.rs | 2 + crates/loomweave-storage/src/writer.rs | 9 + .../loomweave-storage/tests/writer_actor.rs | 105 ++++++++ .../adr/ADR-048-stale-finding-sweep.md | 155 +++++++++++ docs/loomweave/adr/README.md | 1 + 9 files changed, 812 insertions(+) create mode 100644 crates/loomweave-storage/src/findings.rs create mode 100644 docs/loomweave/adr/ADR-048-stale-finding-sweep.md diff --git a/crates/loomweave-cli/src/analyze.rs b/crates/loomweave-cli/src/analyze.rs index 04011497..ef0771ff 100644 --- a/crates/loomweave-cli/src/analyze.rs +++ b/crates/loomweave-cli/src/analyze.rs @@ -1437,6 +1437,58 @@ pub(crate) async fn run_with_options(project_path: PathBuf, options: AnalyzeOpti ), _ => {} } + // Stale-finding sweep (clarion-87c1eba2bd / ADR-048): retire findings + // whose code no longer reproduces them. Runs LAST in the Completed arm + // — after every during-run `InsertFinding` AND every post-commit + // `PersistPostRunFinding` pass (SEI deletion, tier, guidance) — so a + // reproduced finding already carries the current run_id and only a + // genuinely-vanished finding keeps an older one. Gated to a CLEAN FULL + // PASS so `run_id <> current` unambiguously means "the current run + // walked this finding's file and stopped reproducing it": + // • !resume — a `--resume` run REUSES the prior run_id + // (its not-yet-re-emitted findings already match current, so the + // run_id signal can't distinguish them — never sweep on resume). + // • skipped_files == 0 — an incremental run leaves unchanged + // files' findings at their PRIOR run_id; sweeping them would + // wrongly retire still-reproducing findings. + // • source_walk_skipped_entries == 0 — a file/dir that ERRORED + // during the source walk (IO / permission / path-jail) was never + // read, so its findings were not re-emitted and keep a prior + // run_id; the run still reaches `Completed`, so without this guard + // a single walk error would retire a whole unwalked subtree's + // still-reproducing findings ("never looked" ≠ "looked, fixed"). + // • !no_sei — the SEI pass (entity-deleted / + // guidance-orphan facts) was skipped, so those findings were NOT + // refreshed this run and must not be mistaken for vanished. + // Best-effort + enrich-only like the SEI/tier/guidance passes above: a + // failure logs and never un-commits the already-durable graph. Findings + // linger until the next clean full analyze — accepted (findings are + // regenerable current-state, ADR-047). + if !resume + && skipped_files_total == 0 + && source_walk_skipped_entries == 0 + && !options.no_sei + { + match writer + .send_wait(|ack| WriterCmd::SweepStaleFindings { + current_run_id: run_id.clone(), + ack, + }) + .await + { + Ok(retired) if retired > 0 => tracing::info!( + run_id = %run_id, + stale_findings_retired = retired, + "stale-finding sweep retired findings whose code no longer reproduces" + ), + Ok(_) => {} + Err(e) => tracing::warn!( + run_id = %run_id, + error = %e, + "stale-finding sweep skipped (run already committed successfully)" + ), + } + } } RunOutcome::SoftFailed { reason } => { // Commit entities inserted by healthy plugins AND mark the run diff --git a/crates/loomweave-cli/tests/analyze.rs b/crates/loomweave-cli/tests/analyze.rs index 4848c44d..96aa59b6 100644 --- a/crates/loomweave-cli/tests/analyze.rs +++ b/crates/loomweave-cli/tests/analyze.rs @@ -4209,6 +4209,258 @@ fn analyze_persists_syntax_error_finding_for_unparseable_file() { assert_eq!(anchor_exists, 1, "finding anchor entity is present"); } +/// A `synfixture`-style plugin whose `parse_status` keys on file *content* +/// (`BROKEN` substring), not the filename stem. This lets a test toggle a +/// file-anchored `LMWV-PY-SYNTAX-ERROR` finding on and off by rewriting the SAME +/// file's bytes — the module entity id is stable, so no entity-deleted noise — and +/// makes content edits drive the incremental skip/walk decision. Used by the +/// ADR-048 stale-finding-sweep gate tests. +#[cfg(unix)] +const SWEEP_PLUGIN_SCRIPT: &str = r#"#!/usr/bin/python3 +import json +import pathlib +import sys + + +def read_frame(): + headers = {} + while True: + line = sys.stdin.buffer.readline() + if line in (b"", b"\r\n"): + break + name, value = line.decode("ascii").strip().split(":", 1) + headers[name.lower()] = value.strip() + length = int(headers["content-length"]) + return json.loads(sys.stdin.buffer.read(length)) + + +def write_frame(message): + body = json.dumps(message, separators=(",", ":")).encode("utf-8") + sys.stdout.buffer.write(b"Content-Length: " + str(len(body)).encode("ascii") + b"\r\n\r\n") + sys.stdout.buffer.write(body) + sys.stdout.buffer.flush() + + +while True: + msg = read_frame() + method = msg.get("method") + if method == "initialized": + continue + if method == "exit": + raise SystemExit(0) + ident = msg["id"] + if method == "initialize": + write_frame({ + "jsonrpc": "2.0", + "id": ident, + "result": { + "name": "loomweave-plugin-sweep", + "version": "0.1.0", + "ontology_version": "0.6.0", + "capabilities": {}, + }, + }) + elif method == "analyze_file": + path = msg["params"]["file_path"] + stem = pathlib.Path(path).stem + try: + content = pathlib.Path(path).read_text() + except OSError: + content = "" + parse_status = "syntax_error" if "BROKEN" in content else "ok" + entity = { + "id": f"sweepfixture:module:{stem}", + "kind": "module", + "qualified_name": stem, + "source": {"file_path": path}, + "parse_status": parse_status, + } + write_frame({ + "jsonrpc": "2.0", + "id": ident, + "result": {"entities": [entity], "edges": [], "stats": {}}, + }) + elif method == "shutdown": + write_frame({"jsonrpc": "2.0", "id": ident, "result": {}}) + else: + raise SystemExit(1) +"#; + +#[cfg(unix)] +const SWEEP_PLUGIN_MANIFEST: &str = r#" +[plugin] +name = "loomweave-plugin-sweep" +plugin_id = "sweepfixture" +version = "0.1.0" +protocol_version = "1.0" +executable = "loomweave-plugin-sweep" +language = "sweepfixture" +extensions = ["swp"] + +[capabilities.runtime] +expected_max_rss_mb = 256 +expected_entities_per_file = 100 +wardline_aware = false +reads_outside_project_root = false + +[ontology] +entity_kinds = ["module"] +edge_kinds = [] +rule_id_prefix = "LMWV-SWP-" +ontology_version = "0.6.0" + +[ontology.roles] +file_scope = ["module"] +syntax_degraded_module = ["module"] +"#; + +#[cfg(unix)] +fn write_sweep_plugin(plugin_dir: &std::path::Path) { + use std::os::unix::fs::PermissionsExt; + + let plugin_script = plugin_dir.join("loomweave-plugin-sweep"); + std::fs::write(&plugin_script, SWEEP_PLUGIN_SCRIPT).expect("write sweep plugin script"); + let mut perms = std::fs::metadata(&plugin_script) + .expect("stat sweep plugin") + .permissions(); + perms.set_mode(0o755); + std::fs::set_permissions(&plugin_script, perms).expect("chmod sweep plugin"); + + std::fs::write(plugin_dir.join("plugin.toml"), SWEEP_PLUGIN_MANIFEST) + .expect("write sweep plugin manifest"); +} + +/// Count the file-anchored syntax-error findings the sweep fixture produces. +#[cfg(unix)] +fn syntax_error_finding_count(project_root: &std::path::Path) -> i64 { + Connection::open(project_root.join(".weft/loomweave/loomweave.db")) + .unwrap() + .query_row( + "SELECT COUNT(*) FROM findings WHERE rule_id = 'LMWV-PY-SYNTAX-ERROR'", + [], + |row| row.get(0), + ) + .unwrap() +} + +/// ADR-048 acceptance #1 + #3: a finding the current run no longer reproduces is +/// retired by the stale-finding sweep, and the whole-project finding count drops. +/// `mod_a.swp` carries a `BROKEN` marker (→ one `LMWV-PY-SYNTAX-ERROR` finding); +/// fixing its content and re-running a clean full pass (`--no-incremental`, so the +/// sweep gate's `skipped_files == 0` holds) must DELETE the now-unreproduced +/// finding. This fails if the sweep is removed (the row would linger at the old +/// `run_id`). +#[cfg(unix)] +#[test] +fn analyze_stale_finding_sweep_retires_unreproduced_finding_on_full_run() { + let project_dir = tempfile::tempdir().unwrap(); + let plugin_dir = tempfile::tempdir().unwrap(); + write_sweep_plugin(plugin_dir.path()); + loomweave_bin() + .args(["install", "--path"]) + .arg(project_dir.path()) + .assert() + .success(); + let plugin_path = + std::env::join_paths(std::iter::once(plugin_dir.path().to_path_buf())).unwrap(); + let analyze = |extra: &[&str]| { + let mut cmd = loomweave_bin(); + cmd.arg("analyze"); + for a in extra { + cmd.arg(a); + } + cmd.arg(project_dir.path()) + .env("PATH", &plugin_path) + .assert() + .success(); + }; + + // Run 1: mod_a is broken → one syntax-error finding (run_id R1). + std::fs::write(project_dir.path().join("mod_a.swp"), b"BROKEN\n").unwrap(); + std::fs::write(project_dir.path().join("mod_b.swp"), b"ok\n").unwrap(); + analyze(&[]); + assert_eq!( + syntax_error_finding_count(project_dir.path()), + 1, + "run 1 must produce exactly one syntax-error finding for the broken file" + ); + let total_after_run1: i64 = + Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")) + .unwrap() + .query_row("SELECT COUNT(*) FROM findings", [], |row| row.get(0)) + .unwrap(); + + // Fix mod_a's content (byte change → re-walked) and re-run a clean FULL pass. + std::fs::write(project_dir.path().join("mod_a.swp"), b"ok\n").unwrap(); + analyze(&["--no-incremental"]); + + assert_eq!( + syntax_error_finding_count(project_dir.path()), + 0, + "the fixed file's finding no longer reproduces and must be swept" + ); + let total_after_run2: i64 = + Connection::open(project_dir.path().join(".weft/loomweave/loomweave.db")) + .unwrap() + .query_row("SELECT COUNT(*) FROM findings", [], |row| row.get(0)) + .unwrap(); + assert!( + total_after_run2 < total_after_run1, + "whole-project finding count must DROP after a fix (got {total_after_run1} -> {total_after_run2})" + ); +} + +/// ADR-048 gate (the incremental-skip clause): a finding in a file the run did +/// NOT re-walk (incrementally skipped, so still-reproducing) must NOT be swept, +/// even though its row keeps a prior `run_id`. Run 1 broke `mod_a`; run 2 touches +/// only `mod_b`, so `mod_a` is skipped (`skipped_files > 0`) and its finding is +/// not re-emitted — the gate must block the sweep so the still-valid finding +/// survives. This fails if the `skipped_files == 0` gate clause is removed (the +/// sweep would then delete the finding at run 2's `run_id`). +#[cfg(unix)] +#[test] +fn analyze_stale_finding_sweep_skipped_on_incremental_run() { + let project_dir = tempfile::tempdir().unwrap(); + let plugin_dir = tempfile::tempdir().unwrap(); + write_sweep_plugin(plugin_dir.path()); + loomweave_bin() + .args(["install", "--path"]) + .arg(project_dir.path()) + .assert() + .success(); + let plugin_path = + std::env::join_paths(std::iter::once(plugin_dir.path().to_path_buf())).unwrap(); + let analyze = || { + loomweave_bin() + .args(["analyze"]) + .arg(project_dir.path()) + .env("PATH", &plugin_path) + .assert() + .success(); + }; + + // Run 1: mod_a broken → finding at R1; mod_b clean. + std::fs::write(project_dir.path().join("mod_a.swp"), b"BROKEN\n").unwrap(); + std::fs::write(project_dir.path().join("mod_b.swp"), b"ok\n").unwrap(); + analyze(); + assert_eq!(syntax_error_finding_count(project_dir.path()), 1); + + // Run 2: touch ONLY mod_b. mod_a is unchanged → incrementally skipped, so its + // still-valid finding is not re-emitted and keeps R1 while the run is R2. + std::fs::write(project_dir.path().join("mod_b.swp"), b"ok\n# touched\n").unwrap(); + analyze(); + assert_eq!( + latest_run_stats(project_dir.path())["skipped_files"].as_u64(), + Some(1), + "mod_a must be incrementally skipped so the gate is exercised" + ); + assert_eq!( + syntax_error_finding_count(project_dir.path()), + 1, + "an incremental run must NOT sweep a still-reproducing finding in a skipped file" + ); +} + /// A plugin that crashes mid-`analyze_file`. Initializes cleanly, then exits /// non-zero on the first analyze request — exercising the host's crash path. #[cfg(unix)] diff --git a/crates/loomweave-storage/src/commands.rs b/crates/loomweave-storage/src/commands.rs index 4080a84a..c1d5b7eb 100644 --- a/crates/loomweave-storage/src/commands.rs +++ b/crates/loomweave-storage/src/commands.rs @@ -251,6 +251,21 @@ pub enum WriterCmd { recorded_at: String, ack: Ack<()>, }, + /// Retire findings the current run no longer reproduces (clarion-87c1eba2bd / + /// ADR-048): DELETE every `open`, Filigree-unlinked finding whose `run_id` is + /// not `current_run_id`. Mirrors the prior-index diff for findings, using the + /// `run_id` signal ADR-047 established (a reproduced finding carries the current + /// `run_id`; a vanished one keeps its prior one). PRESERVES lifecycle — + /// `filigree_issue_id`-linked or non-`open` findings are operator decisions + /// owned by the Filigree unseen/soft-archive path, never this sweep. The + /// caller gates this to a clean full pass (Completed, non-resume, fully + /// walked, non-`--no-sei`). Query-time write: it runs after `CommitRun` (no + /// active run transaction), best-effort, and never gates the run's own + /// outcome. Returns the number of rows deleted. + SweepStaleFindings { + current_run_id: String, + ack: Ack, + }, /// Upsert one SEI binding (mint or carry) — Wave 1 / WS1 (ADR-038). A carry /// REPLACEs the binding's own row by SEI PK, moving `current_locator` in /// place; it never creates a second alive row. Query-time write: the SEI diff --git a/crates/loomweave-storage/src/findings.rs b/crates/loomweave-storage/src/findings.rs new file mode 100644 index 00000000..81bbb984 --- /dev/null +++ b/crates/loomweave-storage/src/findings.rs @@ -0,0 +1,221 @@ +//! Stale-finding sweep (clarion-87c1eba2bd / ADR-048). +//! +//! Mirrors the entity prior-index diff (`prior_index.rs`) for findings. The +//! content-keyed finding upsert (`writer::write_finding_row`, ADR-047) refreshes +//! a *reproduced* finding's `run_id` to the current run, but nothing retires a +//! finding that a later run stopped emitting. A finding whose code was fixed (or +//! deleted — `entities` is cumulative, so the `findings → entities` cascade never +//! fires) therefore lingers forever. +//! +//! The diff signal is already established by ADR-047: a reproduced finding carries +//! the current `run_id`; a finding that did NOT reproduce keeps its prior `run_id`. +//! [`sweep_stale_findings`] deletes the latter — but ONLY when it is still +//! transient (`status = 'open'`) and unlinked (`filigree_issue_id IS NULL`). A +//! finding that carries a Filigree issue id or a non-`open` status +//! (`acknowledged` / `suppressed` / `promoted_to_issue`) represents an operator +//! decision and is owned by the Filigree-side unseen / soft-archive lifecycle +//! (ADR-029 / ADR-047) — never by this local sweep. The two paths are disjoint by +//! construction: this sweep touches only `filigree_issue_id IS NULL` rows. +//! +//! Correctness depends on the CALLER gating the sweep to a clean full pass — a +//! `Completed`, non-`--resume`, fully-walked run (no incrementally-skipped files +//! AND no source-walk errors), non-`--no-sei` — so that `run_id <> current` +//! unambiguously means "the current run walked this finding's file and no longer +//! reproduces it." A file the run never read (skipped or walk-errored) keeps its +//! prior `run_id` without having been re-examined and must not be swept. See the +//! call site in `loomweave-cli/src/analyze.rs` for the full gate and its rationale. + +use rusqlite::{Connection, params}; + +use crate::Result; + +/// Retire findings the current run no longer reproduces: delete every `open`, +/// Filigree-unlinked finding whose `run_id` is not `current_run_id`. Returns the +/// number of rows deleted. +/// +/// Lifecycle preservation (acceptance contract): findings carrying a +/// `filigree_issue_id` or a non-`open` status are NEVER deleted, even when stale. +/// +/// This is a raw DELETE with no run-state or transaction management; it runs on +/// the writer actor via the query-time-write path (`WriterCmd::SweepStaleFindings`), +/// post-`CommitRun`, exactly like the prior-index flush. +/// +/// # Errors +/// +/// Returns [`crate::error::StorageError::Sqlite`] if the statement fails. +pub fn sweep_stale_findings(conn: &Connection, current_run_id: &str) -> Result { + let deleted = conn.execute( + "DELETE FROM findings \ + WHERE status = 'open' \ + AND filigree_issue_id IS NULL \ + AND run_id <> ?1", + params![current_run_id], + )?; + Ok(deleted) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::schema::apply_migrations; + + /// In-memory connection with the real schema applied, so the `findings` table + /// shape (and its CHECK constraints on `kind`/`severity`/`status`) come from + /// migration 0001, never a hand-written DDL that could drift. The single + /// anchor entity every finding references is seeded here (`foreign_keys` is ON + /// — production parity, so the `findings.entity_id`/`run_id` FKs are enforced). + fn migrated_conn() -> Connection { + let mut conn = Connection::open_in_memory().unwrap(); + apply_migrations(&mut conn).unwrap(); + conn.execute( + "INSERT INTO entities \ + (id, plugin_id, kind, name, short_name, source_file_path, properties, \ + content_hash, created_at, updated_at) \ + VALUES ('python:function:x', 'python', 'function', 'x', 'x', '/x.py', \ + '{}', 'h', 't', 't')", + [], + ) + .unwrap(); + conn + } + + /// Insert a minimal finding row. Only the columns the sweep predicates on + /// (`status`, `filigree_issue_id`, `run_id`) vary; the rest are fixed valid + /// values that satisfy the NOT NULL + CHECK constraints. The referenced `run` + /// row is seeded on demand (`INSERT OR IGNORE`) so the `run_id` FK resolves. + fn insert_finding( + conn: &Connection, + id: &str, + run_id: &str, + status: &str, + filigree_issue_id: Option<&str>, + ) { + conn.execute( + "INSERT OR IGNORE INTO runs (id, started_at, config, stats, status) \ + VALUES (?1, 't', '{}', '{}', 'completed')", + params![run_id], + ) + .unwrap(); + conn.execute( + "INSERT INTO findings ( \ + id, tool, tool_version, run_id, rule_id, kind, severity, \ + entity_id, related_entities, message, evidence, properties, \ + supports, supported_by, status, filigree_issue_id, \ + created_at, updated_at \ + ) VALUES ( \ + ?1, 'loomweave', '0', ?2, 'LMWV-TEST-RULE', 'defect', 'WARN', \ + 'python:function:x', '[]', 'm', '[]', '{}', \ + '[]', '[]', ?3, ?4, 't', 't' \ + )", + params![id, run_id, status, filigree_issue_id], + ) + .unwrap(); + } + + fn ids(conn: &Connection) -> Vec { + let mut stmt = conn.prepare("SELECT id FROM findings ORDER BY id").unwrap(); + stmt.query_map([], |row| row.get::<_, String>(0)) + .unwrap() + .map(std::result::Result::unwrap) + .collect() + } + + #[test] + fn retires_open_unlinked_stale_finding() { + // The core behaviour: an open, Filigree-unlinked finding from a prior run + // (run_id != current) is retired. + let conn = migrated_conn(); + insert_finding(&conn, "core:finding:stale", "run-1", "open", None); + let deleted = sweep_stale_findings(&conn, "run-2").unwrap(); + assert_eq!(deleted, 1); + assert!(ids(&conn).is_empty()); + } + + #[test] + fn preserves_reproduced_finding_at_current_run() { + // A finding the current run re-emitted carries the current run_id (the + // upsert set it) and must survive. + let conn = migrated_conn(); + insert_finding(&conn, "core:finding:fresh", "run-2", "open", None); + let deleted = sweep_stale_findings(&conn, "run-2").unwrap(); + assert_eq!(deleted, 0); + assert_eq!(ids(&conn), ["core:finding:fresh"]); + } + + #[test] + fn preserves_stale_finding_carrying_filigree_issue_id() { + // A stale finding promoted to / linked to a Filigree issue is an operator + // decision owned by the Filigree lifecycle — never swept locally. + let conn = migrated_conn(); + insert_finding( + &conn, + "core:finding:linked", + "run-1", + "open", + Some("clarion-sf-abc123"), + ); + let deleted = sweep_stale_findings(&conn, "run-2").unwrap(); + assert_eq!(deleted, 0); + assert_eq!(ids(&conn), ["core:finding:linked"]); + } + + #[test] + fn preserves_stale_findings_with_non_open_status() { + // acknowledged / suppressed / promoted_to_issue are operator decisions: + // preserved even when stale and Filigree-unlinked. + let conn = migrated_conn(); + insert_finding(&conn, "core:finding:ack", "run-1", "acknowledged", None); + insert_finding(&conn, "core:finding:sup", "run-1", "suppressed", None); + insert_finding( + &conn, + "core:finding:promo", + "run-1", + "promoted_to_issue", + None, + ); + let deleted = sweep_stale_findings(&conn, "run-2").unwrap(); + assert_eq!(deleted, 0); + assert_eq!( + ids(&conn), + ["core:finding:ack", "core:finding:promo", "core:finding:sup"] + ); + } + + #[test] + fn sweeps_only_stale_open_unlinked_in_a_mixed_set() { + // One pass over a realistic mix: only the open+unlinked+stale row goes. + let conn = migrated_conn(); + insert_finding(&conn, "core:finding:a-stale-open", "run-1", "open", None); + insert_finding(&conn, "core:finding:b-fresh-open", "run-2", "open", None); + insert_finding( + &conn, + "core:finding:c-stale-linked", + "run-1", + "open", + Some("clarion-sf-1"), + ); + insert_finding( + &conn, + "core:finding:d-stale-sup", + "run-1", + "suppressed", + None, + ); + let deleted = sweep_stale_findings(&conn, "run-2").unwrap(); + assert_eq!(deleted, 1); + assert_eq!( + ids(&conn), + [ + "core:finding:b-fresh-open", + "core:finding:c-stale-linked", + "core:finding:d-stale-sup", + ] + ); + } + + #[test] + fn empty_table_sweeps_nothing() { + let conn = migrated_conn(); + assert_eq!(sweep_stale_findings(&conn, "run-1").unwrap(), 0); + } +} diff --git a/crates/loomweave-storage/src/lib.rs b/crates/loomweave-storage/src/lib.rs index fd3c9510..8a8687d3 100644 --- a/crates/loomweave-storage/src/lib.rs +++ b/crates/loomweave-storage/src/lib.rs @@ -8,6 +8,7 @@ pub mod cache; pub mod commands; pub mod embeddings; pub mod error; +pub mod findings; pub mod glob; pub mod guidance; pub mod pragma; @@ -34,6 +35,7 @@ pub use commands::{ }; pub use embeddings::{EmbeddingKey, EmbeddingStore, StoredEmbedding, embeddings_db_path}; pub use error::{Result, StorageError}; +pub use findings::sweep_stale_findings; pub use glob::glob_match; pub use guidance::{ GUIDANCE_PROPOSAL_MARKER, GuidanceProposal, GuidanceSheet, GuidanceSheetInput, MatchFacts, diff --git a/crates/loomweave-storage/src/writer.rs b/crates/loomweave-storage/src/writer.rs index bc1d22b0..eb3a140c 100644 --- a/crates/loomweave-storage/src/writer.rs +++ b/crates/loomweave-storage/src/writer.rs @@ -272,6 +272,15 @@ fn run_actor( }); reply(ack, res); } + WriterCmd::SweepStaleFindings { + current_run_id, + ack, + } => { + let res = query_time_write(conn, &mut state, commits_observed, |conn| { + crate::findings::sweep_stale_findings(conn, ¤t_run_id) + }); + reply(ack, res); + } WriterCmd::UpsertSeiBinding { record, ack } => { let res = query_time_write(conn, &mut state, commits_observed, |conn| { crate::sei::upsert_sei_binding(conn, &record) diff --git a/crates/loomweave-storage/tests/writer_actor.rs b/crates/loomweave-storage/tests/writer_actor.rs index e4077649..15cbcd83 100644 --- a/crates/loomweave-storage/tests/writer_actor.rs +++ b/crates/loomweave-storage/tests/writer_actor.rs @@ -1390,6 +1390,111 @@ async fn insert_finding_is_idempotent_on_resume() { ); } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn sweep_stale_findings_retires_only_unreproduced_open_unlinked_rows() { + // ADR-048 / clarion-87c1eba2bd: the command round-trips through the + // query-time-write path (no active run) and returns the deleted count. Two + // findings are written under run-1; run-2 re-emits only one (so its run_id + // refreshes to run-2). Sweeping at run-2 must retire exactly the finding + // run-2 stopped reproducing and leave the reproduced one. + let dir = tempfile::tempdir().unwrap(); + let path = prepared_db(&dir); + let (writer, handle) = Writer::spawn(path.clone(), 50, 256).unwrap(); + let tx = writer.sender(); + + let finding = |id: &str, run_id: &str| FindingRecord { + id: id.to_owned(), + tool: "loomweave".to_owned(), + tool_version: "1.0.0".to_owned(), + run_id: run_id.to_owned(), + rule_id: "LMWV-TEST-RULE".to_owned(), + kind: "defect".to_owned(), + severity: "WARN".to_owned(), + confidence: None, + confidence_basis: None, + entity_id: "python:module:demo".to_owned(), + related_entities_json: "[]".to_owned(), + message: "m".to_owned(), + evidence_json: "[]".to_owned(), + properties_json: "{}".to_owned(), + supports_json: "[]".to_owned(), + supported_by_json: "[]".to_owned(), + created_at: now_iso(), + updated_at: now_iso(), + }; + + // run-1: both findings present. + begin_demo_run(&tx, "run-1").await; + send::<()>(&tx, |ack| WriterCmd::InsertEntity { + entity: Box::new(make_module_entity("python:module:demo")), + ack, + }) + .await + .unwrap(); + for id in ["core:finding:reproduced", "core:finding:vanished"] { + send::<()>(&tx, |ack| WriterCmd::InsertFinding { + finding: Box::new(finding(id, "run-1")), + ack, + }) + .await + .unwrap(); + } + send::<()>(&tx, |ack| WriterCmd::CommitRun { + run_id: "run-1".into(), + status: RunStatus::Completed, + completed_at: now_iso(), + stats_json: "{}".into(), + ack, + }) + .await + .unwrap(); + + // run-2 (fresh): re-emit only `reproduced` — its run_id upserts to run-2. + begin_demo_run(&tx, "run-2").await; + send::<()>(&tx, |ack| WriterCmd::InsertFinding { + finding: Box::new(finding("core:finding:reproduced", "run-2")), + ack, + }) + .await + .unwrap(); + send::<()>(&tx, |ack| WriterCmd::CommitRun { + run_id: "run-2".into(), + status: RunStatus::Completed, + completed_at: now_iso(), + stats_json: "{}".into(), + ack, + }) + .await + .unwrap(); + + // Sweep at run-2: query-time write, no active run. + let deleted = send::(&tx, |ack| WriterCmd::SweepStaleFindings { + current_run_id: "run-2".into(), + ack, + }) + .await + .expect("sweep command must round-trip without an active run"); + assert_eq!(deleted, 1, "exactly the un-reproduced finding is retired"); + + drop(tx); + drop(writer); + handle.await.unwrap().unwrap(); + + let conn = Connection::open(path).unwrap(); + let surviving: Vec = { + let mut stmt = conn.prepare("SELECT id FROM findings ORDER BY id").unwrap(); + stmt.query_map([], |row| row.get::<_, String>(0)) + .unwrap() + .map(Result::unwrap) + .collect() + }; + assert_eq!( + surviving, + ["core:finding:reproduced"], + "the reproduced finding survives; the vanished one is swept" + ); +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn entity_source_file_id_rejects_non_source_anchor_entity() { let dir = tempfile::tempdir().unwrap(); diff --git a/docs/loomweave/adr/ADR-048-stale-finding-sweep.md b/docs/loomweave/adr/ADR-048-stale-finding-sweep.md new file mode 100644 index 00000000..759693da --- /dev/null +++ b/docs/loomweave/adr/ADR-048-stale-finding-sweep.md @@ -0,0 +1,155 @@ +# ADR-048: Stale-finding sweep (prior-index-style finding diffing) + +**Status**: Accepted +**Date**: 2026-06-08 +**Deciders**: john@pgpl.net +**Context**: ADR-047 made finding ids content-keyed so re-analyses de-dupe instead +of accumulating, and explicitly deferred "a prior-index-style sweep for findings +whose code was *fixed* (no longer reproduce)" as a follow-on (clarion-87c1eba2bd, +split from clarion-772ff358da). + +## Summary + +After a **clean full analyze**, Loomweave retires findings the run no longer +reproduces: it `DELETE`s every `open`, Filigree-unlinked finding whose `run_id` +is not the current run. This mirrors the entity prior-index diff +(`prior_index.rs`) for findings, reusing the `run_id` signal ADR-047 already +established. Lifecycle is preserved: a finding carrying a `filigree_issue_id` or a +non-`open` status (`acknowledged` / `suppressed` / `promoted_to_issue`) is an +operator decision and is never swept locally. The sweep is gated to a run that +walked everything (`Completed`, non-`--resume`, `skipped_files == 0`, +non-`--no-sei`). + +## Context + +ADR-047 left the `findings` table **current-state, not an append-log**: the +content-keyed upsert (`write_finding_row`) refreshes a *reproduced* finding's +`run_id` to the current run, but nothing retires a finding a later run stopped +emitting. Two mechanics make such findings linger forever: + +- `entities` is **cumulative / never-pruned** (`prior_index.rs`, `cache.rs` — + REQ-ANALYZE-04). A vanished entity's row survives, so the + `findings.entity_id … ON DELETE CASCADE` (migration 0001) **never fires** on + re-analyze. A finding on *deleted* code is not cascaded away. +- A finding on code that still exists but was *fixed* is not re-emitted, so its + row simply stays at its prior `run_id`. + +The whole-project finding count (`project_status` / `project_finding_list`) +therefore only ever grows; an agent browsing findings sees defects that no longer +exist. Entities already get prior-index diffing; findings did not. + +ADR-047 §Decision.3 established the diff signal we reuse: `findings_for_emit` +filters `WHERE run_id = ?1`, a **reproduced** finding carries the current run_id +(the upsert set it), and a finding that **did not reproduce** keeps its prior +run_id. "Stale" is exactly `run_id <> current` — *provided the current run +actually re-walked that finding's file*. + +## Decision + +1. **Add a storage sweep** `findings::sweep_stale_findings(conn, current_run_id)`: + ```sql + DELETE FROM findings + WHERE status = 'open' + AND filigree_issue_id IS NULL + AND run_id <> :current_run_id; + ``` + Returns the row count. Wired as `WriterCmd::SweepStaleFindings` through the + query-time-write path (ADR-011), post-`CommitRun`, best-effort + enrich-only — + a failure logs and never un-commits the graph. + +2. **Lifecycle preservation.** `status = 'open'` and `filigree_issue_id IS NULL` + are the only rows touched. Acknowledged / suppressed / promoted findings and + anything linked to a Filigree issue are operator decisions, left to the + Filigree-side lifecycle (§Cross-product boundary). + +3. **Gate the sweep to a clean full pass** at the call site (`analyze.rs`, + `RunOutcome::Completed` arm, after every finding-emitting pass): + - **`!resume`** — `--resume` REUSES the prior run_id, so a resumed run's + not-yet-re-emitted findings already match `current`; the run_id signal can't + distinguish them. (Also satisfies the acceptance criterion "a `--resume` + re-walk does not retire findings the resumed run hasn't re-emitted yet" for + free.) + - **`skipped_files == 0`** — an incremental run leaves *unchanged* files' + findings at their prior run_id (they were not re-walked, so not re-emitted). + Sweeping them would wrongly retire still-reproducing findings. A full pass + walks every file, so `run_id <> current` is unambiguous. + - **`source_walk_skipped_entries == 0`** — a file or directory that *errored* + during the source walk (IO / permission / path-jail) was never read, yet the + run still reaches `Completed`. Its findings keep a prior run_id without being + re-examined, so without this guard a single walk error would retire a whole + unwalked subtree's still-reproducing findings. ("Never looked" must not be + conflated with "looked, code is fixed.") + - **`!no_sei`** — the SEI mint pass produces the `entity-deleted` and + `guidance-orphan` facts; `--no-sei` skips it, so those findings are not + refreshed this run and must not be mistaken for vanished. + +4. **Placement: last in the `Completed` arm.** The sweep runs *after* every + during-run `InsertFinding` and every post-commit `PersistPostRunFinding` pass + (SEI deletion, tier-subsystem, guidance-staleness), so every finding the run + reproduces already carries `current` before the diff runs. + +## Cross-product boundary (Loomweave ↔ Filigree) + +The local sweep and the existing Filigree retention path are **disjoint by +construction**: + +| Finding | Owner | Retirement mechanism | +|---|---|---| +| `filigree_issue_id IS NULL`, `status = 'open'` | Loomweave (local) | this sweep (`DELETE`) | +| `filigree_issue_id IS NOT NULL` *or* non-`open` | Filigree lifecycle | `mark_unseen` per rule/file at emit + age-gated `--prune-unseen` soft-archive (default 30d) | + +The sweep predicate (`filigree_issue_id IS NULL`) never overlaps the Filigree-owned +set, so a finding cannot be retired by both paths or fall between them. This +honours the cross-product identity contract (ADR-029: Filigree keys by entity; +findings carry `filigree_issue_id`) — the local id is never on the wire (ADR-047), +so deleting a local row tells Filigree nothing and breaks no linkage. + +## Alternatives considered + +- **Bump skipped-file findings' `run_id` to current, then `DELETE WHERE run_id <> + current` unconditionally (no skip gate).** Rejected: `findings_for_emit` keys + on `WHERE run_id = ?1`, so bumping a skipped finding's run_id changes the + Filigree emit set and breaks ADR-047's "emit is identical to today" invariant. + Keeping `run_id` strictly meaning "last run that re-walked-and-reproduced this + finding" preserves that contract. +- **File-scope the `DELETE`** (exclude `entity_id IN (entities WHERE + source_file_path IN :skipped_set)`) so the sweep can also run on incremental + runs. Rejected for now: more machinery, and path-less synthetic findings + (weak-modularity, tier facts) can't be file-scoped. The acceptance criteria do + not require real-time incremental pruning; the full-run gate is correct by + construction. Revisit only if incremental pruning becomes a requirement. + +## Consequences + +### Positive +- The whole-project finding count **drops** to reflect fixed/removed code, not + just grows (closes the third acceptance criterion). +- Findings now match the rest of the store's current-state model (ADR-047). + +### Negative / accepted trade-off +- **Stale findings linger until the next *clean full* analyze.** An incremental, + resumed, or `--no-sei` run does not retire them. Accepted: findings are + regenerable derived data; a `--no-incremental` run (or any full pass) settles + the table. + +### Neutral +- `run_id` keeps its ADR-047 meaning (last run that reproduced the finding); the + sweep reads it, never writes it. + +## Related decisions + +- [ADR-047](./ADR-047-content-keyed-finding-ids.md) — content-keyed finding ids; + established the `run_id` diff signal and deferred this sweep. +- [ADR-029](./ADR-029-entity-associations-binding.md) — Filigree keys findings by + entity; the local id is never on the wire. +- [ADR-011](./ADR-011-writer-actor-concurrency.md) — the sweep is a writer-actor + query-time write. +- [ADR-005](./ADR-005-loomweave-dir-tracking.md) (as reversed by C1) — the store + is a regenerable cache, which is why deleting derived findings is sound. + +## References + +- clarion-87c1eba2bd — the implementation issue (Part A of clarion-772ff358da's + deferred follow-on). +- clarion-772ff358da / weft-f506e5f845 — the Weft dogfood-#2 finding-accumulation + campaign that surfaced both halves. diff --git a/docs/loomweave/adr/README.md b/docs/loomweave/adr/README.md index a39109f4..8d06acff 100644 --- a/docs/loomweave/adr/README.md +++ b/docs/loomweave/adr/README.md @@ -48,6 +48,7 @@ This folder is the canonical home for authored Loomweave architecture decision r | [ADR-045](./ADR-045-worktree-source-staleness.md) | Worktree-source staleness — `Staleness::StaleWorktree` + `worktree_dirty` via hardened, hash-free `git ls-files --others` scoped to ingested extensions; closes the unwatched-top-level-dir blind spot without `git status`'s filter-RCE vector; builds on ADR-013/021 untrusted-corpus posture | Accepted | | [ADR-046](./ADR-046-weft-store-consolidation.md) | Weft store consolidation — store moves `.loomweave/` → `.weft/loomweave/` (clean break, single `loomweave_core::store` helper); operator-private `weft.toml:[loomweave].store_dir` override (read-only, fail-soft C-9c); Filigree sibling resolution prefers `.weft/filigree/` and tolerates legacy `.filigree/`; amends ADR-005/040/044 | Accepted | | [ADR-047](./ADR-047-content-keyed-finding-ids.md) | Content-keyed finding IDs — finding id drops `{run_id}` (`core:finding:`) so `ON CONFLICT(id)` de-dupes findings across fresh re-analyses (not just `--resume`) and `filigree_issue_id`/`status` survive re-analysis; migration `0010` clears legacy run-scoped rows; accepted trade-off = findings are current-state, not a per-run append-log (L1, weft-f506e5f845) | Accepted | +| [ADR-048](./ADR-048-stale-finding-sweep.md) | Stale-finding sweep — after a clean full analyze, `DELETE` `open`, Filigree-unlinked findings whose `run_id <> current` (prior-index-style diff reusing ADR-047's run_id signal); preserves lifecycle (`filigree_issue_id`/non-`open`); gated to a clean full pass (`Completed` + non-`--resume` + `skipped_files==0` + no source-walk errors + non-`--no-sei`); disjoint from the Filigree unseen/soft-archive path; whole-project finding count now drops, not just grows (clarion-87c1eba2bd) | Accepted | ## Backlog still tracked in the detailed design From 8759d16ea57f56a092ab53f528c40ec8a3f545e8 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 12:16:48 +1000 Subject: [PATCH 48/60] feat(doctor): detect a git-tracked runtime DB and self-heal under --fix (C1, gap-analysis opp #4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `loomweave doctor` verified the orientation surfaces but never checked whether `.weft/loomweave/loomweave.db` was committed to git — a vacuous-green gap (same class as wardline W2). A tracked db mutates on every analyze/scan, leaving a permanently-dirty tree that blocks legis signing; ADR-005 was reversed in b7a1b30 so fresh installs gitignore it, but a template change cannot untrack an already-committed db. - new `db_tracked_state` queries `git ls-files --error-unmatch`; non-success (untracked / ignored / absent / outside-repo / not-a-repo / git-missing) all fold to Untracked, so the check is fail-soft - text + JSON report paths gain a `db.tracked` check: a warning with the `git rm --cached` remedy by default (advisory — does not fail the gate, matching the enrich-only severity model) - `--fix` self-heals via `git rm --cached --ignore-unmatch` on the db + WAL/SHM sidecars, keeping the working-tree files - 4 tests over a temp git repo: tracked/untracked/outside-repo detection + the --fix unstage-but-keep-file path Closes the loomweave half of weft-d822a7de2d's doctor self-heal. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/doctor.rs | 207 +++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) diff --git a/crates/loomweave-cli/src/doctor.rs b/crates/loomweave-cli/src/doctor.rs index 1a5b5439..0e4aaeea 100644 --- a/crates/loomweave-cli/src/doctor.rs +++ b/crates/loomweave-cli/src/doctor.rs @@ -24,6 +24,7 @@ use std::fs; use std::path::Path; +use std::process::Command; use anyhow::{Context, Result, bail}; use loomweave_federation::config::{McpConfig, ProviderSelection, select_provider_with_env}; @@ -73,6 +74,7 @@ pub fn run(path: &Path, fix: bool, json_output: bool) -> Result { tally += check_mcp(&project_root, fix); tally += check_instructions(&project_root, fix); tally += check_integration_bindings(&project_root, fix); + tally += check_db_tracked(&project_root, fix); println!("--- llm ---"); tally += check_llm_provider(&project_root); @@ -171,6 +173,7 @@ fn json_report(project_root: &Path, fix: bool) -> DoctorJsonReport { check_wardline_taint_capability_json(project_root), check_mcp_hygiene_json(), check_integration_bindings_json(project_root, fix), + check_db_tracked_json(project_root, fix), ]; let next_actions: Vec = checks .iter() @@ -188,6 +191,11 @@ fn json_report(project_root: &Path, fix: bool) -> DoctorJsonReport { "mcp.registration" | "integration.bindings" => { "Run `loomweave doctor --fix`.".to_owned() } + "db.tracked" => { + "Run `loomweave doctor --fix` or `git rm --cached .weft/loomweave/loomweave.db` \ + to stop the regenerable index dirtying the tree." + .to_owned() + } "index.freshness" => { "Run `loomweave analyze ` to refresh the index.".to_owned() } @@ -237,6 +245,100 @@ fn check_loomweave_dir_json(project_root: &Path) -> DoctorJsonCheck { } } +/// Whether the regenerable runtime DB is committed to git. +/// +/// `loomweave.db` mutates on every `analyze`/`scan`; tracking it leaves a +/// permanently-dirty work tree that blocks legis signing (C1 / weft-d822a7de2d). +/// ADR-005 was reversed (`b7a1b30`) so a fresh `install` gitignores it, but a +/// template change cannot untrack an already-committed db — this is the detector +/// for that residual. +#[derive(Debug, PartialEq, Eq)] +enum DbTrackedState { + /// Healthy: the db is not in the git index (untracked, ignored, absent, the + /// store lives outside the repo, or this is not a git work tree). + Untracked, + /// The db is committed/staged — dirties the tree and blocks signing. + Tracked, +} + +/// Ask git whether `/loomweave.db` is tracked. `ls-files +/// --error-unmatch` exits 0 only when the pathspec matches a tracked file, so a +/// non-success exit (untracked, ignored, absent, outside the repo, not a repo, +/// or git missing) all fold to [`DbTrackedState::Untracked`] — nothing to fix. +fn db_tracked_state(project_root: &Path) -> DbTrackedState { + let db = loomweave_core::store::db_path(project_root); + let Ok(rel) = db.strip_prefix(project_root) else { + // Store dir is outside the repo — this repo cannot be tracking it. + return DbTrackedState::Untracked; + }; + let tracked = Command::new("git") + .arg("-C") + .arg(project_root) + .args(["ls-files", "--error-unmatch", "--"]) + .arg(rel) + .output() + .is_ok_and(|out| out.status.success()); + if tracked { + DbTrackedState::Tracked + } else { + DbTrackedState::Untracked + } +} + +/// `--fix` self-heal: `git rm --cached` the runtime db (and its WAL/SHM +/// sidecars), removing them from the index while keeping the working-tree files. +/// `--ignore-unmatch` makes the sidecars optional. +fn git_untrack_db(project_root: &Path) -> Result<()> { + let store = loomweave_core::store::store_dir(project_root); + let rel = store + .strip_prefix(project_root) + .context("store dir is outside the project root; cannot git rm --cached")?; + let status = Command::new("git") + .arg("-C") + .arg(project_root) + .args(["rm", "--cached", "-q", "--ignore-unmatch", "--"]) + .arg(rel.join("loomweave.db")) + .arg(rel.join("loomweave.db-wal")) + .arg(rel.join("loomweave.db-shm")) + .status() + .context("run git rm --cached")?; + if !status.success() { + bail!("git rm --cached exited with {status}"); + } + Ok(()) +} + +/// JSON-path twin of [`check_db_tracked`]. +fn check_db_tracked_json(project_root: &Path, fix: bool) -> DoctorJsonCheck { + match db_tracked_state(project_root) { + DbTrackedState::Untracked => { + DoctorJsonCheck::ok("db.tracked", "runtime loomweave.db is not git-tracked") + } + DbTrackedState::Tracked => { + let what = "loomweave.db is git-tracked — it mutates on every analyze/scan, dirtying \ + the work tree and blocking legis signing (ADR-005 reversed)"; + if !fix { + return DoctorJsonCheck::warning("db.tracked", what); + } + match git_untrack_db(project_root) { + Ok(()) if db_tracked_state(project_root) == DbTrackedState::Untracked => { + DoctorJsonCheck::fixed( + "db.tracked", + format!("{what} — untracked (git rm --cached)"), + ) + } + Ok(()) => DoctorJsonCheck::problem( + "db.tracked", + format!("{what} — repair did not converge"), + ), + Err(err) => { + DoctorJsonCheck::problem("db.tracked", format!("{what} — repair failed: {err}")) + } + } + } + } +} + fn check_index_freshness_json(project_root: &Path) -> DoctorJsonCheck { let lines = hook::snapshot_report(project_root); if lines @@ -941,6 +1043,34 @@ fn repair_instructions(project_root: &Path, what: &str) -> Tally { } } +/// Text-path twin of [`check_db_tracked_json`]: surface a git-tracked runtime db +/// (the C1 analyze→sign blocker) instead of greening over it, and self-heal it +/// under `--fix`. +fn check_db_tracked(project_root: &Path, fix: bool) -> Tally { + match db_tracked_state(project_root) { + DbTrackedState::Untracked => ok("runtime loomweave.db is not git-tracked"), + DbTrackedState::Tracked => { + let what = "loomweave.db is git-tracked — it mutates on every analyze/scan, dirtying \ + the work tree and blocking legis signing"; + if !fix { + return warn( + what, + Some( + "git rm --cached .weft/loomweave/loomweave.db (or loomweave doctor --fix)", + ), + ); + } + match git_untrack_db(project_root) { + Ok(()) if db_tracked_state(project_root) == DbTrackedState::Untracked => { + ok(&format!("{what} — fixed (git rm --cached)")) + } + Ok(()) => problem(&format!("{what} — repair did not converge"), None), + Err(err) => problem(&format!("{what} — repair failed: {err}"), None), + } + } + } +} + fn check_integration_bindings(project_root: &Path, fix: bool) -> Tally { match integration_bindings::binding_state(project_root) { BindingState::Present => { @@ -969,3 +1099,80 @@ fn check_integration_bindings(project_root: &Path, fix: bool) -> Tally { } } } + +#[cfg(test)] +mod tests { + use super::*; + use std::process::Command; + + fn run_git(repo: &Path, args: &[&str]) { + let ok = Command::new("git") + .arg("-C") + .arg(repo) + .args(args) + .output() + .expect("git runs") + .status + .success(); + assert!(ok, "git {args:?} failed"); + } + + fn init_repo(repo: &Path) { + run_git(repo, &["init", "-q"]); + run_git(repo, &["config", "user.email", "t@t"]); + run_git(repo, &["config", "user.name", "t"]); + } + + /// Materialise the runtime DB at the canonical store path + /// (`/.weft/loomweave/loomweave.db`). + fn write_db(root: &Path) -> std::path::PathBuf { + let db = loomweave_core::store::db_path(root); + std::fs::create_dir_all(db.parent().unwrap()).unwrap(); + std::fs::write(&db, b"SQLite format 3\0").unwrap(); + db + } + + #[test] + fn db_tracked_state_is_untracked_when_db_is_not_added() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + init_repo(root); + write_db(root); // present on disk, never `git add`-ed + assert_eq!(db_tracked_state(root), DbTrackedState::Untracked); + } + + #[test] + fn db_tracked_state_is_tracked_when_db_is_git_added() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + init_repo(root); + write_db(root); + run_git(root, &["add", "-f", ".weft/loomweave/loomweave.db"]); + assert_eq!(db_tracked_state(root), DbTrackedState::Tracked); + } + + #[test] + fn db_tracked_state_is_untracked_outside_a_git_repo() { + let dir = tempfile::tempdir().unwrap(); + write_db(dir.path()); + assert_eq!(db_tracked_state(dir.path()), DbTrackedState::Untracked); + } + + #[test] + fn git_untrack_db_unstages_the_tracked_db_but_keeps_the_file() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + init_repo(root); + let db = write_db(root); + run_git(root, &["add", "-f", ".weft/loomweave/loomweave.db"]); + assert_eq!(db_tracked_state(root), DbTrackedState::Tracked); + + git_untrack_db(root).expect("untrack succeeds"); + + assert_eq!(db_tracked_state(root), DbTrackedState::Untracked); + assert!( + db.exists(), + "git rm --cached must keep the working-tree file" + ); + } +} From 6057b6976521232ef1a0f3aec750529036c9dafc Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 12:36:04 +1000 Subject: [PATCH 49/60] feat(doctor): index-DB health check + git-tracked-DB gate (C1, schema) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two doctor surfaces landed together in this commit. NOTE: the index-DB-health work was already present (uncommitted) in the working tree and is bundled here deliberately rather than split, with its CHANGELOG entry. 1. Index-DB health check (`.weft/loomweave.schema`) — classifies four states instead of mere file-existence: absent (warning, install-before-analyze is a legitimate intermediate, gate passes); present-but-unreadable/corrupt/wrong- format (problem, fails the gate); opens but `PRAGMA user_version` exceeds this build's schema (problem, names the newer-build cause + version numbers); healthy (ok). JSON + text paths agree; path via `store::db_path` honours a `weft.toml` store_dir override; opened read-only. Tests: `doctor_index_health_*`. 2. Git-tracked runtime DB is now a gate-failing PROBLEM (was a warning). A tracked `loomweave.db` mutates on every analyze/scan, dirtying the tree and blocking legis signing (C1 / weft-d822a7de2d), so `doctor` exits non-zero instead of vacuously passing as a pre-commit gate; `--fix` self-heals via `git rm --cached`. Test: `doctor_flags_git_tracked_db_as_problem_*`. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 13 ++ crates/loomweave-cli/src/doctor.rs | 127 ++++++++++-- crates/loomweave-cli/tests/doctor.rs | 288 +++++++++++++++++++++++++++ 3 files changed, 410 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ec9c1b5a..dadbce14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,6 +59,19 @@ sandbox. No package is published for release candidates. (Cargo SemVer ### Fixed +- **`loomweave doctor` now gates on tracked-index DB health (check + `.weft/loomweave.schema`).** The check previously only tested for file + existence; it now classifies four states: absent (warning — a legitimate + install-before-analyze intermediate, does not fail the gate); present but + unreadable / corrupt / wrong format (problem — fails the gate); present and + opens but `PRAGMA user_version` exceeds this build's schema version (problem — + reports the version numbers and names the newer-build cause); and healthy + (ok). Both the JSON and text paths report consistently so CI gates driven by + either surface see the same verdict. The check resolves the DB path via + `loomweave_core::store::db_path` so a `weft.toml` `[loomweave].store_dir` + override is honoured, and opens read-only so the health check never creates + or mutates the file. + - **Transient pyright spawn failures no longer disable analysis for the whole run.** A `subprocess.Popen` failure with a transient errno (`EAGAIN`/`ENOMEM`/`EMFILE`/`ENFILE`) now skips only the current file and diff --git a/crates/loomweave-cli/src/doctor.rs b/crates/loomweave-cli/src/doctor.rs index 0e4aaeea..49f7f871 100644 --- a/crates/loomweave-cli/src/doctor.rs +++ b/crates/loomweave-cli/src/doctor.rs @@ -19,8 +19,10 @@ //! *enrich-only* surface (per `docs/suite/weft.md` §5): a Loomweave-solo or //! Loomweave+Filigree-only project is first-class, so their absence is a //! **warning** (surfaced, suggests `--fix`) and never a problem that fails the -//! gate. Only a genuinely broken state — an unparseable config file, or a -//! `--fix` repair that errors or does not converge — is a problem. +//! gate. A genuinely broken state — an unparseable config file, a `--fix` repair +//! that errors or does not converge, or a git-tracked runtime `loomweave.db` +//! (which dirties the tree and blocks legis signing, C1 / weft-d822a7de2d) — is +//! a problem that fails the gate. use std::fs; use std::path::Path; @@ -32,6 +34,9 @@ use rusqlite::Connection; use serde::Serialize; use serde_json::Value; +use loomweave_storage::StorageError; +use loomweave_storage::schema::{CURRENT_SCHEMA_VERSION, verify_user_version}; + use crate::hooks_settings::HookState; use crate::instructions::InstructionsState; use crate::integration_bindings::BindingState; @@ -75,6 +80,7 @@ pub fn run(path: &Path, fix: bool, json_output: bool) -> Result { tally += check_instructions(&project_root, fix); tally += check_integration_bindings(&project_root, fix); tally += check_db_tracked(&project_root, fix); + tally += check_loomweave_dir(&project_root); println!("--- llm ---"); tally += check_llm_provider(&project_root); @@ -196,6 +202,12 @@ fn json_report(project_root: &Path, fix: bool) -> DoctorJsonReport { to stop the regenerable index dirtying the tree." .to_owned() } + ".weft/loomweave.schema" => { + "Run `loomweave install` + `loomweave analyze ` to create or \ + rebuild the index. If the DB is corrupt, remove `.weft/loomweave/loomweave.db` \ + first." + .to_owned() + } "index.freshness" => { "Run `loomweave analyze ` to refresh the index.".to_owned() } @@ -224,24 +236,100 @@ fn json_report(project_root: &Path, fix: bool) -> DoctorJsonReport { } } +/// Classification of the tracked-index DB health, shared by the text and JSON +/// renderers so they can never diverge. +enum IndexDbHealth { + /// DB is absent (legitimate intermediate state: install-before-analyze). + Absent, + /// DB file is present but could not be opened or probed — corrupt, wrong + /// format, permission error, or locked. + Unreadable(String), + /// DB opens cleanly but its `user_version` is newer than this build. + FutureSchema { found: u32, current: u32 }, + /// DB opens and its schema version is within range of this build. + Healthy, +} + +/// Classify the index DB at the canonical store path into one of four states. +/// Uses `Connection::open_with_flags` with `SQLITE_OPEN_READ_ONLY` so the +/// check never creates or mutates the DB (unlike `Connection::open`, which +/// creates the file on success). +fn classify_index_db_health(project_root: &Path) -> IndexDbHealth { + let db_path = loomweave_core::store::db_path(project_root); + if !db_path.exists() { + return IndexDbHealth::Absent; + } + let conn = + match Connection::open_with_flags(&db_path, rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY) { + Ok(conn) => conn, + Err(err) => return IndexDbHealth::Unreadable(err.to_string()), + }; + // `open_with_flags(READ_ONLY)` lazily succeeds even on a non-SQLite file + // ("NOT A SQLITE DB"); the corruption only surfaces at first read. + // `verify_user_version` issues `PRAGMA user_version` — a cheap single-page + // read that serves double duty as the corruption probe. + match verify_user_version(&conn) { + Ok(()) => IndexDbHealth::Healthy, + Err(StorageError::FutureUserVersion { found, current }) => { + IndexDbHealth::FutureSchema { found, current } + } + Err(err) => IndexDbHealth::Unreadable(err.to_string()), + } +} + +/// JSON-path check for tracked-index DB health. Expands the former +/// existence-only check with four distinct states: absent (warning), +/// unreadable (problem), future-schema (problem), healthy (ok). fn check_loomweave_dir_json(project_root: &Path) -> DoctorJsonCheck { - let loomweave_dir = loomweave_core::store::store_dir(project_root); - let db = loomweave_dir.join("loomweave.db"); - if loomweave_dir.is_dir() && db.is_file() { - DoctorJsonCheck::ok( + match classify_index_db_health(project_root) { + IndexDbHealth::Healthy => DoctorJsonCheck::ok( ".weft/loomweave.schema", - ".weft/loomweave store directory and database are present", - ) - } else if loomweave_dir.is_dir() { - DoctorJsonCheck::warning( + format!( + ".weft/loomweave store database is present and readable (schema v{CURRENT_SCHEMA_VERSION})" + ), + ), + IndexDbHealth::Absent => DoctorJsonCheck::warning( ".weft/loomweave.schema", - ".weft/loomweave store directory exists but loomweave.db is absent", - ) - } else { - DoctorJsonCheck::warning( + "no index — run `loomweave install` + `loomweave analyze`", + ), + IndexDbHealth::Unreadable(detail) => DoctorJsonCheck::problem( ".weft/loomweave.schema", - ".weft/loomweave store directory is absent", - ) + format!("index exists but is unreadable: {detail}"), + ), + IndexDbHealth::FutureSchema { found, current } => DoctorJsonCheck::problem( + ".weft/loomweave.schema", + format!( + "index schema v{found} is newer than this build (current v{current}); \ + the database was written by a newer Loomweave build" + ), + ), + } +} + +/// Text-path twin of [`check_loomweave_dir_json`]: contributes to the `Tally` +/// so problems fail the gate and warnings are surfaced. +fn check_loomweave_dir(project_root: &Path) -> Tally { + match classify_index_db_health(project_root) { + IndexDbHealth::Healthy => ok(&format!( + "index DB present and readable (schema v{CURRENT_SCHEMA_VERSION})" + )), + IndexDbHealth::Absent => warn( + "no index — run `loomweave install` + `loomweave analyze`", + Some("loomweave install --path . && loomweave analyze ."), + ), + IndexDbHealth::Unreadable(detail) => problem( + &format!("index exists but is unreadable: {detail}"), + Some( + "check permissions; if corrupt, remove .weft/loomweave/loomweave.db and re-analyze", + ), + ), + IndexDbHealth::FutureSchema { found, current } => problem( + &format!( + "index schema v{found} is newer than this build (current v{current}); \ + the database was written by a newer Loomweave build" + ), + Some("upgrade loomweave to match or exceed the schema version of the database"), + ), } } @@ -318,7 +406,7 @@ fn check_db_tracked_json(project_root: &Path, fix: bool) -> DoctorJsonCheck { let what = "loomweave.db is git-tracked — it mutates on every analyze/scan, dirtying \ the work tree and blocking legis signing (ADR-005 reversed)"; if !fix { - return DoctorJsonCheck::warning("db.tracked", what); + return DoctorJsonCheck::problem("db.tracked", what); } match git_untrack_db(project_root) { Ok(()) if db_tracked_state(project_root) == DbTrackedState::Untracked => { @@ -1053,7 +1141,10 @@ fn check_db_tracked(project_root: &Path, fix: bool) -> Tally { let what = "loomweave.db is git-tracked — it mutates on every analyze/scan, dirtying \ the work tree and blocking legis signing"; if !fix { - return warn( + // A tracked regenerable db blocks the analyze→govern→sign loop — + // a genuinely broken state, so it fails the gate (unlike the + // enrich-only binding/instruction warnings). + return problem( what, Some( "git rm --cached .weft/loomweave/loomweave.db (or loomweave doctor --fix)", diff --git a/crates/loomweave-cli/tests/doctor.rs b/crates/loomweave-cli/tests/doctor.rs index 9dc426e6..94df12e1 100644 --- a/crates/loomweave-cli/tests/doctor.rs +++ b/crates/loomweave-cli/tests/doctor.rs @@ -9,6 +9,7 @@ use std::fs; use std::path::Path; use assert_cmd::Command; +use rusqlite::Connection; fn loomweave_bin() -> Command { let mut cmd = Command::cargo_bin("loomweave").expect("loomweave binary"); @@ -35,6 +36,16 @@ fn read_yaml(path: &Path) -> serde_json::Value { serde_norway::from_str(&fs::read_to_string(path).unwrap()).unwrap() } +/// Materialise a minimal healthy `SQLite` DB at the canonical store path so +/// `check_loomweave_dir` reports healthy (not the absent warning). A freshly +/// opened `SQLite` file has `user_version = 0`, which is <= the current schema +/// version and is therefore accepted. +fn write_healthy_db(root: &Path) { + let db_path = root.join(".weft/loomweave/loomweave.db"); + fs::create_dir_all(db_path.parent().unwrap()).unwrap(); + Connection::open(&db_path).expect("create minimal SQLite DB"); +} + /// Run `doctor` (optionally with `--fix`) and return `(exit_code, stdout)`. fn doctor(dir: &Path, fix: bool) -> (i32, String) { let mut cmd = loomweave_bin(); @@ -96,6 +107,9 @@ fn doctor_fix_registers_mcp_then_reports_healthy() { &["install", "--skills", "--codex-skills", "--hooks"], dir.path(), ); + // Materialise a healthy DB so the index health check reports ok rather than + // the absent-DB warning, which would prevent "All orientation surfaces healthy." + write_healthy_db(dir.path()); let (code, out) = doctor(dir.path(), true); assert_eq!(code, 0, "--fix should repair and exit 0; stdout:\n{out}"); @@ -173,6 +187,10 @@ fn doctor_fix_repairs_missing_three_way_integration_bindings() { ], dir.path(), ); + // Materialise a healthy DB so the index health check reports ok rather than + // the absent-DB warning; with the DB present, only the integration bindings + // surface warns, keeping the "1 warning" count stable. + write_healthy_db(dir.path()); let (code, out) = doctor(dir.path(), false); assert_eq!( @@ -584,3 +602,273 @@ fn doctor_reports_published_ephemeral_port() { "http.config should report the published live port: {http}" ); } + +// --------------------------------------------------------------------------- +// Index DB health check tests (.weft/loomweave.schema) +// --------------------------------------------------------------------------- + +/// (a) Absent DB → `.weft/loomweave.schema` is a warning (ok=true), gate passes. +/// +/// A missing DB is a legitimate intermediate state (install-before-analyze), so +/// it must not fail the gate. The JSON path must set `ok: true`, and the text +/// path must exit 0 (warnings only, no problems). +#[test] +fn doctor_index_health_absent_db_is_warning_gate_passes() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + // `check_sei_population_json` opens the DB with `Connection::open` which + // creates it as a side-effect when absent. Remove any DB that install or a + // prior doctor run may have materialised so this test exercises the + // genuine absence path. + let db_path = dir.path().join(".weft/loomweave/loomweave.db"); + if db_path.exists() { + fs::remove_file(&db_path).unwrap(); + } + + let (code, json) = doctor_json(dir.path(), false); + assert_eq!( + code, 0, + "absent index DB must not fail the gate (install-before-analyze is a \ + legitimate intermediate state): {json}" + ); + assert_eq!( + json["ok"], true, + "absent index DB must leave ok=true: {json}" + ); + let check = json["checks"] + .as_array() + .unwrap() + .iter() + .find(|c| c["id"] == ".weft/loomweave.schema") + .expect(".weft/loomweave.schema check must be present"); + assert_eq!( + check["status"], "warning", + ".weft/loomweave.schema must be a warning when DB is absent: {check}" + ); + assert!( + check["message"] + .as_str() + .unwrap_or("") + .contains("loomweave install"), + "warning message must suggest loomweave install + analyze: {check}" + ); + + // Text path: warnings-only → exit 0. + // Re-delete the DB: doctor_json may have recreated it as a side-effect + // of check_sei_population_json (which uses Connection::open, not read-only). + if db_path.exists() { + fs::remove_file(&db_path).unwrap(); + } + let (code, out) = doctor(dir.path(), false); + assert_eq!( + code, 0, + "absent index DB must not fail the text-path gate: stdout:\n{out}" + ); + assert!( + out.contains("⚠ no index"), + "absent DB must surface as a text-path warning: stdout:\n{out}" + ); +} + +/// (b) DB file present but not valid `SQLite` → `.weft/loomweave.schema` is a +/// problem (ok=false), gate fails. +/// +/// A corrupt or non-`SQLite` file in the DB position must be surfaced as a gate +/// failure, not silently reported as healthy. +#[test] +fn doctor_index_health_corrupt_db_is_problem_gate_fails() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + // Write a non-SQLite file at the DB path — must NOT be zero-length (a 0-byte + // file opens as a fresh db with user_version=0 and is healthy). + let db_path = dir.path().join(".weft/loomweave/loomweave.db"); + fs::create_dir_all(db_path.parent().unwrap()).unwrap(); + fs::write(&db_path, b"this is not a sqlite database").unwrap(); + + let (code, json) = doctor_json(dir.path(), false); + assert_eq!(code, 1, "a corrupt index DB must fail the gate: {json}"); + assert_eq!( + json["ok"], false, + "a corrupt index DB must set ok=false: {json}" + ); + let check = json["checks"] + .as_array() + .unwrap() + .iter() + .find(|c| c["id"] == ".weft/loomweave.schema") + .expect(".weft/loomweave.schema check must be present"); + assert_eq!( + check["status"], "problem", + ".weft/loomweave.schema must be a problem when DB is unreadable: {check}" + ); + assert!( + check["message"] + .as_str() + .unwrap_or("") + .contains("unreadable"), + "problem message must say the index is unreadable: {check}" + ); + + // Text path: problem → exit 1. + let (code, out) = doctor(dir.path(), false); + assert_eq!( + code, 1, + "a corrupt index DB must fail the text-path gate: stdout:\n{out}" + ); + assert!( + out.contains("✗") && out.contains("unreadable"), + "corrupt DB must surface as a text-path problem: stdout:\n{out}" + ); +} + +/// (c) DB present, opens, but `user_version` > current → future-schema +/// problem (ok=false), message names the version numbers. +#[test] +fn doctor_index_health_future_schema_is_problem_with_version_in_message() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + let db_path = dir.path().join(".weft/loomweave/loomweave.db"); + fs::create_dir_all(db_path.parent().unwrap()).unwrap(); + // Create a valid SQLite file with user_version stamped to current+1. + { + let conn = Connection::open(&db_path).expect("create DB"); + // user_version is a 32-bit signed integer in SQLite; any value > current + // triggers the future-schema guard. We avoid hardcoding a literal so the + // test stays correct when CURRENT_SCHEMA_VERSION is bumped. + conn.execute_batch("PRAGMA user_version = 99999;") + .expect("set future user_version"); + } + + let (code, json) = doctor_json(dir.path(), false); + assert_eq!(code, 1, "a future-schema DB must fail the gate: {json}"); + assert_eq!( + json["ok"], false, + "a future-schema DB must set ok=false: {json}" + ); + let check = json["checks"] + .as_array() + .unwrap() + .iter() + .find(|c| c["id"] == ".weft/loomweave.schema") + .expect(".weft/loomweave.schema check must be present"); + assert_eq!( + check["status"], "problem", + ".weft/loomweave.schema must be a problem for a future-schema DB: {check}" + ); + let msg = check["message"].as_str().unwrap_or(""); + assert!( + msg.contains("99999"), + "problem message must name the found schema version (99999): {check}" + ); + assert!( + msg.contains("newer Loomweave build"), + "problem message must mention 'newer Loomweave build': {check}" + ); + + // Text path: problem → exit 1. + let (code, out) = doctor(dir.path(), false); + assert_eq!( + code, 1, + "a future-schema DB must fail the text-path gate: stdout:\n{out}" + ); + assert!( + out.contains("99999"), + "text output must name the schema version (99999): stdout:\n{out}" + ); +} + +/// (d) DB present, opens, version <= current → `.weft/loomweave.schema` is ok. +/// +/// The check's specific status is verified via the JSON surface so we don't +/// couple to the global "All healthy" summary (which depends on plugin/llm state). +#[test] +fn doctor_index_health_healthy_db_is_ok() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + // A freshly opened SQLite file has user_version=0, which is <= current and + // therefore accepted by verify_user_version. + write_healthy_db(dir.path()); + + let (code, json) = doctor_json(dir.path(), false); + assert_eq!(code, 0, "a healthy index DB must not fail the gate: {json}"); + let check = json["checks"] + .as_array() + .unwrap() + .iter() + .find(|c| c["id"] == ".weft/loomweave.schema") + .expect(".weft/loomweave.schema check must be present"); + assert_eq!( + check["status"], "ok", + ".weft/loomweave.schema must be ok for a healthy DB: {check}" + ); + assert_eq!( + check["fixed"], + serde_json::json!(false), + "a healthy check is never marked fixed: {check}" + ); + + // Text path: no warning or problem for the index check → does not + // contribute to exit-1. + let (code, out) = doctor(dir.path(), false); + assert_eq!( + code, 0, + "a healthy index DB must not fail the text-path gate: stdout:\n{out}" + ); + assert!( + out.contains("✓") && out.contains("index DB present"), + "healthy DB must surface as a text-path ok line: stdout:\n{out}" + ); +} + +fn run_git(repo: &Path, args: &[&str]) { + let ok = std::process::Command::new("git") + .arg("-C") + .arg(repo) + .args(args) + .output() + .expect("git runs") + .status + .success(); + assert!(ok, "git {args:?} failed"); +} + +/// A git-tracked runtime DB is a gate-failing problem: it mutates on every +/// analyze/scan, dirtying the work tree and blocking legis signing. `doctor` +/// must exit non-zero; `--fix` untracks it via `git rm --cached` and the project +/// is then healthy (exit 0), with the working-tree file preserved. +#[test] +fn doctor_flags_git_tracked_db_as_problem_and_fix_untracks_it() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + write_healthy_db(dir.path()); + run_git(dir.path(), &["init", "-q"]); + run_git(dir.path(), &["config", "user.email", "t@t"]); + run_git(dir.path(), &["config", "user.name", "t"]); + // `-f` overrides the installed .gitignore — the real scenario is a db that + // was committed before ADR-005 was reversed. + run_git(dir.path(), &["add", "-f", ".weft/loomweave/loomweave.db"]); + + let (code, out) = doctor(dir.path(), false); + assert_eq!( + code, 1, + "a git-tracked db must fail the gate; stdout:\n{out}" + ); + assert!( + out.contains("loomweave.db is git-tracked"), + "the tracked-db problem must be named; stdout:\n{out}" + ); + + let (fix_code, fix_out) = doctor(dir.path(), true); + assert_eq!( + fix_code, 0, + "--fix untracks the db, then the project is healthy; stdout:\n{fix_out}" + ); + assert!( + fix_out.contains("git rm --cached"), + "the --fix line must report the remedy; stdout:\n{fix_out}" + ); + assert!( + dir.path().join(".weft/loomweave/loomweave.db").is_file(), + "git rm --cached must keep the working-tree db file" + ); +} From ec12c83413081fcebf7e01e5dd3fba41432b0911 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 13:02:01 +1000 Subject: [PATCH 50/60] =?UTF-8?q?fix(doctor):=20write=20the=20project-scop?= =?UTF-8?q?ed=20wardline=E2=86=92filigree=20bridge=20URL=20(opp=20#2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `integration_bindings` hardcoded the unscoped `/api/weft/scan-results`, which server-mode Filigree now fail-closes (N1) → every wardline scan 400s. Worse, the staleness check tested for that exact unscoped URL, so it flagged a CORRECT project-scoped config as "stale" and `doctor --fix` / `install` then OVERWROTE the working URL with the broken one — a repair that converted working→broken. - new `filigree_server_scope` reads `.weft/filigree/config.json`; when `mode == "server"` it returns the routing `prefix` (filigree mounts `/api/p/{prefix}/…` on it; `name` kept as fallback), else None (fail-soft) - `desired_bindings` emits `/api/p/{prefix}/weft/scan-results` for server-mode Filigree, and keeps the unscoped `/api/…` path for single-project / no-Filigree layouts (which still serve the unscoped mount) - so `binding_state` now reports a scoped config HEALTHY (no false "stale"), and `--fix` converges to the WORKING scoped URL instead of clobbering it - 3 tests: server-mode→scoped, non-server→unscoped, absent-config→unscoped Closes gap-analysis opp #2 (loomweave's half of the member path-scope action). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../loomweave-cli/src/integration_bindings.rs | 102 +++++++++++++++++- 1 file changed, 98 insertions(+), 4 deletions(-) diff --git a/crates/loomweave-cli/src/integration_bindings.rs b/crates/loomweave-cli/src/integration_bindings.rs index be4a5d34..2021f006 100644 --- a/crates/loomweave-cli/src/integration_bindings.rs +++ b/crates/loomweave-cli/src/integration_bindings.rs @@ -72,10 +72,16 @@ fn desired_bindings(project_root: &Path) -> DesiredBindings { let filigree_base_url = live_filigree_base_url(project_root) .or_else(|| configured_filigree_base_url(project_root)) .unwrap_or_else(|| DEFAULT_FILIGREE_BASE_URL.to_owned()); - let wardline_filigree_url = format!( - "{}/api/weft/scan-results", - filigree_base_url.trim_end_matches('/') - ); + // Server-mode Filigree mounts the federation write router under + // `/api/p/{prefix}/…` and fail-closes an unscoped write (filigree N1), so the + // bridge URL must carry the project scope or every wardline scan 400s. A + // single-project (non-server) Filigree, or no Filigree at all, keeps the + // unscoped `/api/…` mount. (gap-analysis opp #2 / weft path-scope action.) + let base = filigree_base_url.trim_end_matches('/'); + let wardline_filigree_url = match filigree_server_scope(project_root) { + Some(prefix) => format!("{base}/api/p/{prefix}/weft/scan-results"), + None => format!("{base}/api/weft/scan-results"), + }; // ADR-044: seed the consumer's static target with this project's // deterministic read-API port. serve binds the same port (barring an // ephemeral fallback), and the published .weft/loomweave/ephemeral.port file @@ -89,6 +95,30 @@ fn desired_bindings(project_root: &Path) -> DesiredBindings { } } +/// This project's Filigree routing key, but only when Filigree runs in *server* +/// mode (the case that requires a project-scoped `/api/p/{prefix}/…` write). +/// +/// Reads `.weft/filigree/config.json`; the URL-facing key is `prefix` (filigree +/// routes `/api/p/{prefix}` on it; `name` is display-only, kept as a fallback). +/// Fail-soft: returns `None` (→ unscoped path) when the config is absent, +/// unparseable, not `mode: "server"`, or carries no usable key — so a +/// Loomweave-solo or single-project layout is unchanged. +fn filigree_server_scope(project_root: &Path) -> Option { + let path = project_root.join(".weft/filigree/config.json"); + let raw = fs::read_to_string(path).ok()?; + let value: Value = serde_json::from_str(&raw).ok()?; + if value.get("mode").and_then(Value::as_str) != Some("server") { + return None; + } + value + .get("prefix") + .or_else(|| value.get("name")) + .and_then(Value::as_str) + .map(str::trim) + .filter(|key| !key.is_empty()) + .map(str::to_owned) +} + fn live_filigree_base_url(project_root: &Path) -> Option { // ADR-046: read Filigree's live port only from the consolidated // `.weft/filigree/ephemeral.port` location, via the canonical resolver so the @@ -363,3 +393,67 @@ fn write_text_if_changed(path: &Path, content: &str) -> Result { fs::write(path, content).with_context(|| format!("write {}", path.display()))?; Ok(true) } + +#[cfg(test)] +mod tests { + use super::*; + + fn write_filigree_config(root: &Path, body: &str) { + let dir = root.join(".weft/filigree"); + fs::create_dir_all(&dir).unwrap(); + fs::write(dir.join("config.json"), body).unwrap(); + } + + /// Server-mode Filigree fail-closes an unscoped federation write, so the + /// bridge URL must carry the project scope `/api/p/{prefix}/…`. + #[test] + fn server_mode_filigree_yields_project_scoped_bridge_url() { + let dir = tempfile::tempdir().unwrap(); + write_filigree_config( + dir.path(), + r#"{"prefix":"lacuna","name":"lacuna","mode":"server"}"#, + ); + let desired = desired_bindings(dir.path()); + assert!( + desired + .wardline_filigree_url + .ends_with("/api/p/lacuna/weft/scan-results"), + "server-mode Filigree must scope the bridge URL: {}", + desired.wardline_filigree_url + ); + } + + /// Single-project (non-server) Filigree serves the unscoped `/api/…` mount, + /// so the bridge URL stays unscoped. + #[test] + fn non_server_filigree_keeps_unscoped_bridge_url() { + let dir = tempfile::tempdir().unwrap(); + write_filigree_config( + dir.path(), + r#"{"prefix":"lacuna","name":"lacuna","mode":"single"}"#, + ); + let desired = desired_bindings(dir.path()); + assert!( + desired + .wardline_filigree_url + .ends_with("/api/weft/scan-results") + && !desired.wardline_filigree_url.contains("/api/p/"), + "non-server Filigree keeps the unscoped path: {}", + desired.wardline_filigree_url + ); + } + + /// No Filigree config (Loomweave-solo, or pre-init) → unscoped, fail-soft. + #[test] + fn absent_filigree_config_keeps_unscoped_bridge_url() { + let dir = tempfile::tempdir().unwrap(); + let desired = desired_bindings(dir.path()); + assert!( + desired + .wardline_filigree_url + .ends_with("/api/weft/scan-results"), + "absent Filigree config keeps the unscoped path: {}", + desired.wardline_filigree_url + ); + } +} From a91483abbd93fcecd293a91b726c57daa03fe3a2 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 19:04:52 +1000 Subject: [PATCH 51/60] ci: extract reusable verify.yml; ci.yml + release.yml call it (V11-CI-01) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes clarion-c9f62eec7d. The pre-merge gate (ci.yml) and the pre-release gate (release.yml `verify`/`verify-macos`) were hand-duplicated — the exact drift risk release.yml's own header flagged ("Duplicated rather than refactored into a reusable workflow on the eve of 1.0; that refactor is post-release scope"). They had already diverged (llvm-tools-preview component, job shape, cache keys). Factor the entire gate set into `.github/workflows/verify.yml` (`on: workflow_call`): rust (fmt, the five lockstep/migration guards, clippy, build bins, nextest, doc -D warnings, deny), rust-macos (aarch64 clippy+build), python-plugin (ontology lockstep, uv sync, pip-audit, B4/B5 gate, ruff, ruff format, mypy, pytest), and walking-skeleton (sprint_1/wp5/sprint_2/phase3). Both entry points now `uses: ./.github/workflows/verify.yml`, so the CI floor is defined exactly once and cannot drift. The only release-only concern — "tag points at a commit on main" — stays in release.yml as a dedicated `assert-on-main` job (gated on event_name==push, so a dispatch dry-run still runs; a skipped needs does not block dependents). The build/publish jobs now `needs: [verify, assert-on-main]`. ci.yml 227->17, release.yml 673->532, +234 canonical. YAML validated; the gate set is preserved by construction. actionlint could not run in this sandbox (external download blocked) — the live proof is the next Actions run. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/ci.yml | 220 +------------------------------- .github/workflows/release.yml | 185 ++++----------------------- .github/workflows/verify.yml | 234 ++++++++++++++++++++++++++++++++++ 3 files changed, 261 insertions(+), 378 deletions(-) create mode 100644 .github/workflows/verify.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 64c1cee1..afdaba99 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,219 +9,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true -env: - CARGO_TERM_COLOR: always - FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true - RUSTFLAGS: "-D warnings" - jobs: - rust: - name: Rust - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 - with: - # Full history so the ADR-024 migration-retirement guard can resolve - # the published_build.txt ref via `git show :0001` (the marker - # names a commit SHA, which a shallow checkout can't read). Mirrors - # the release.yml verify job. clarion-12667da9f5 / clarion-0106a61480. - fetch-depth: 0 - - - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 - with: - toolchain: stable - components: clippy, rustfmt, llvm-tools-preview - - - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 - - # Python is used by the migration-retirement guard and lockstep guards - # below. Pin >= 3.11 explicitly so the stdlib `tomllib` is available - # regardless of which Python the runner image happens to preinstall. - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 - with: - python-version: "3.11" - - - name: fmt - run: cargo fmt --all -- --check - - - name: migration retirement guard - run: | - python scripts/check-migration-retirement.py --self-test - python scripts/check-migration-retirement.py - - - name: cross-workspace version lockstep - run: python scripts/check-workspace-version-lockstep.py - - - name: pyright pin lockstep - run: | - python scripts/check-pyright-pin-lockstep.py --self-test - python scripts/check-pyright-pin-lockstep.py - - - name: wardline version bounds - run: | - python scripts/check-wardline-version-bounds.py --self-test - python scripts/check-wardline-version-bounds.py - - - name: entity-cap ADR/code lockstep - run: | - python scripts/check-entity-cap-lockstep.py --self-test - python scripts/check-entity-cap-lockstep.py - - - name: clippy - run: cargo clippy --workspace --all-targets --all-features -- -D warnings - - - name: install cargo-nextest - uses: taiki-e/install-action@e310bff3ef77234d477d6bb655da153a5c49d1db - - # Ensure all workspace binaries (notably loomweave-plugin-fixture) are built - # before nextest runs. wp2_e2e tests need the fixture binary on disk and - # nextest's CARGO_BIN_EXE_* propagation is not reliably set for cross-package - # dev-dep binaries (deferred issue clarion-adeff0916d). - - name: build workspace bins - run: cargo build --workspace --bins - - - name: test - run: cargo nextest run --workspace --all-features --no-tests=pass - - - name: doc - run: cargo doc --workspace --no-deps --all-features - env: - RUSTDOCFLAGS: "-D warnings" - - - name: install cargo-deny - uses: taiki-e/install-action@2e721ffcfc34740897fc3130e2dd782b1c136896 - - - name: deny - run: cargo deny check - - # macOS build + lint parity. The release matrix was the first place macOS was - # ever compiled, so a macOS-only `-D warnings` regression stayed invisible - # until tag-cut (clarion-12667da9f5). This native runner job closes that gap. - # It mirrors only the Linux job's clippy + bin build — the Python guards are - # platform-independent and already covered by the `rust` job. No `--target` - # flag: native builds avoid the ring C-build-script failure seen when - # cross-compiling Linux -> macOS. - # - # Only aarch64-apple-darwin (macos-14) runs here: the x86_64 (macos-13) leg is - # temporarily dropped while those runners are offline/unreliable. Restore it - # when macos-13 runner health recovers (clarion-12667da9f5 follow-up). The - # unused-symbol class this gate guards is `target_os = "macos"`, i.e. - # arch-independent, so aarch64 coverage catches the same regressions. - rust-macos: - name: Rust (${{ matrix.target }}) - runs-on: ${{ matrix.runner }} - strategy: - fail-fast: false - matrix: - include: - - target: aarch64-apple-darwin - runner: macos-14 - steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 - - - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 - with: - toolchain: stable - components: clippy - - - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 - with: - key: ${{ matrix.target }} - - - name: clippy - run: cargo clippy --workspace --all-targets --all-features -- -D warnings - - - name: build workspace bins - run: cargo build --workspace --bins - - python-plugin: - name: Python plugin - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 - with: - python-version: "3.11" - cache: pip - cache-dependency-path: plugins/python/uv.lock - - - name: cache pyright runtime - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae - with: - path: ~/.cache/pyright-python/ - key: pyright-python-1.1.409-${{ runner.os }} - - - name: check Python ontology version lockstep - run: | - python scripts/check-python-ontology-version.py --self-test - python scripts/check-python-ontology-version.py - - - name: install uv - run: python -m pip install uv==0.10.2 - - - name: sync plugin (locked dev extras) - run: uv sync --project plugins/python --locked --extra dev - - - name: audit locked Python dependencies - run: | - uv export --project plugins/python --locked --extra dev --no-emit-project \ - --format requirements.txt \ - --output-file /tmp/loomweave-python-dev-requirements.txt - uv run --project plugins/python --extra dev pip-audit \ - -r /tmp/loomweave-python-dev-requirements.txt - - - name: check B.4*/B.5 performance gates - run: uv run --project plugins/python --extra dev python scripts/check-b4-gate-result.py --run-b5-smoke - - - name: ruff check - run: uv run --project plugins/python --extra dev ruff check plugins/python - - - name: ruff format check - run: uv run --project plugins/python --extra dev ruff format --check plugins/python - - - name: mypy - run: uv run --project plugins/python --extra dev mypy --strict plugins/python - - - name: pytest - run: uv run --project plugins/python --extra dev pytest plugins/python - - walking-skeleton: - name: Sprint 1 walking skeleton (end-to-end) - runs-on: ubuntu-latest - needs: [rust, python-plugin] - steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 - - - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 - with: - toolchain: stable - - - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 - with: - python-version: "3.11" - cache: pip - cache-dependency-path: plugins/python/pyproject.toml - - - name: cache pyright runtime - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae - with: - path: ~/.cache/pyright-python/ - key: pyright-python-1.1.409-${{ runner.os }} - - - name: install sqlite3 cli - run: sudo apt-get update && sudo apt-get install -y --no-install-recommends sqlite3 - - - name: run walking skeleton - run: bash tests/e2e/sprint_1_walking_skeleton.sh - - - name: run WP5 secret scanner smoke - run: CARGO_BUILD=0 bash tests/e2e/wp5_secret_scan.sh - - - name: Sprint 2 MCP surface - run: CARGO_BUILD=0 bash tests/e2e/sprint_2_mcp_surface.sh - - - name: Phase 3 subsystems - run: CARGO_BUILD=0 bash tests/e2e/phase3_subsystems.sh + # Every gate lives in the reusable verify.yml so CI and the release pre-publish + # gate share one definition and cannot drift (V11-CI-01, clarion-c9f62eec7d). + verify: + name: Verify + uses: ./.github/workflows/verify.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 04835d5e..ccf0c831 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -36,181 +36,40 @@ permissions: contents: read jobs: - verify: - # Pre-release gate: must mirror ci.yml so a broken commit cannot reach the - # build/publish jobs. Duplicated rather than refactored into a reusable - # workflow on the eve of 1.0; that refactor is post-release scope. - name: Verify (pre-release gates) + # Release-only precondition: a real release (tag push) must point at a commit + # on main. A workflow_dispatch dry-run may run from any branch (it can only + # ever publish to TestPyPI — see the publish-* job `if`), so this is gated on + # event_name == 'push'. Kept here, not in verify.yml: it is a release policy + # check, not a CI gate, so it does not belong in the shared workflow. On a + # dispatch run it is skipped, which leaves dependent build jobs unblocked + # (a skipped `needs` does not fail dependents). clarion-c9f62eec7d. + assert-on-main: + name: Assert tagged commit is on main runs-on: ubuntu-latest + if: github.event_name == 'push' steps: - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 with: fetch-depth: 0 - name: Assert tagged commit is on main - # Only gates real releases (tag pushes). A workflow_dispatch dry-run can - # publish solely to TestPyPI (see publish-* job `if`), so it is allowed - # to run from any branch — letting the publish workflow itself be - # iterated/validated before it is merged to main. - if: github.event_name == 'push' run: | git fetch origin main git merge-base --is-ancestor "$GITHUB_SHA" origin/main || \ { echo "::error::tag does not point to a commit on main"; exit 1; } - - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 - with: - toolchain: stable - components: clippy, rustfmt - - - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 - with: - key: release-verify - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 - with: - python-version: "3.11" - - - name: cache pyright runtime - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae - with: - path: ~/.cache/pyright-python/ - key: pyright-python-1.1.409-${{ runner.os }} - - - name: rust fmt - run: cargo fmt --all -- --check - - - name: migration retirement guard - run: | - python scripts/check-migration-retirement.py --self-test - python scripts/check-migration-retirement.py - - - name: cross-workspace version lockstep - run: python scripts/check-workspace-version-lockstep.py - - - name: pyright pin lockstep - run: | - python scripts/check-pyright-pin-lockstep.py --self-test - python scripts/check-pyright-pin-lockstep.py - - - name: wardline version bounds - run: | - python scripts/check-wardline-version-bounds.py --self-test - python scripts/check-wardline-version-bounds.py - - - name: entity-cap ADR/code lockstep - run: | - python scripts/check-entity-cap-lockstep.py --self-test - python scripts/check-entity-cap-lockstep.py - - - name: rust clippy - run: cargo clippy --workspace --all-targets --all-features -- -D warnings - - - name: install cargo-nextest - uses: taiki-e/install-action@e310bff3ef77234d477d6bb655da153a5c49d1db - - - name: rust build workspace bins - run: cargo build --workspace --bins - - - name: rust nextest - run: cargo nextest run --workspace --all-features --no-tests=pass - - - name: rust doc - run: cargo doc --workspace --no-deps --all-features - env: - RUSTDOCFLAGS: "-D warnings" - - - name: install cargo-deny - uses: taiki-e/install-action@2e721ffcfc34740897fc3130e2dd782b1c136896 - - - name: cargo deny - run: cargo deny check - - - name: check Python ontology version lockstep - run: | - python scripts/check-python-ontology-version.py --self-test - python scripts/check-python-ontology-version.py - - - name: install uv - run: python -m pip install uv==0.10.2 - - - name: sync plugin (locked dev extras) - run: uv sync --project plugins/python --locked --extra dev - - - name: audit locked Python dependencies - run: | - uv export --project plugins/python --locked --extra dev --no-emit-project \ - --format requirements.txt \ - --output-file /tmp/loomweave-python-dev-requirements.txt - uv run --project plugins/python --extra dev pip-audit \ - -r /tmp/loomweave-python-dev-requirements.txt - - - name: check B.4*/B.5 performance gates - run: uv run --project plugins/python --extra dev python scripts/check-b4-gate-result.py --run-b5-smoke - - - name: ruff check - run: uv run --project plugins/python --extra dev ruff check plugins/python - - - name: ruff format check - run: uv run --project plugins/python --extra dev ruff format --check plugins/python - - - name: mypy - run: uv run --project plugins/python --extra dev mypy --strict plugins/python - - - name: pytest - run: uv run --project plugins/python --extra dev pytest plugins/python - - - name: install sqlite3 cli - run: sudo apt-get update && sudo apt-get install -y --no-install-recommends sqlite3 - - - name: walking skeleton (end-to-end) - run: bash tests/e2e/sprint_1_walking_skeleton.sh - - - name: WP5 secret scanner smoke - run: CARGO_BUILD=0 bash tests/e2e/wp5_secret_scan.sh - - - name: Sprint 2 MCP surface - run: CARGO_BUILD=0 bash tests/e2e/sprint_2_mcp_surface.sh - - - name: Phase 3 subsystems - run: CARGO_BUILD=0 bash tests/e2e/phase3_subsystems.sh - - # macOS (aarch64) pre-release gate — mirrors ci.yml's rust-macos job so a - # macOS-only clippy/--all-targets regression cannot reach the build/publish - # jobs. ci.yml gates every PR, but release.yml's Linux-only `verify` left a - # gap for a macOS-only test/all-targets lint issue that does not break the - # --bin build (clarion-47d395e03c). build-rust already uses macos-14, so no - # new runner dependency. Restore the x86_64 (macos-13) leg here alongside the - # build matrix when those runners recover (clarion-ec389a8e72). - verify-macos: - name: Verify macOS (${{ matrix.target }}) - runs-on: ${{ matrix.runner }} - strategy: - fail-fast: false - matrix: - include: - - target: aarch64-apple-darwin - runner: macos-14 - steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 - - - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 - with: - toolchain: stable - components: clippy - - - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 - with: - key: release-verify-${{ matrix.target }} - - - name: clippy - run: cargo clippy --workspace --all-targets --all-features -- -D warnings - - - name: build workspace bins - run: cargo build --workspace --bins + # The full pre-release gate set (Linux + macOS, Rust + Python + e2e). This is + # the SAME reusable workflow ci.yml calls, so the pre-release gate can no + # longer drift from CI — it is identical by construction. Replaces the + # previously-duplicated `verify` + `verify-macos` jobs (V11-CI-01, + # clarion-c9f62eec7d). A broken commit still cannot reach build/publish: the + # build jobs `needs` this job, which fails if any inner gate fails. + verify: + name: Verify (pre-release gates) + uses: ./.github/workflows/verify.yml build-rust: - needs: [verify, verify-macos] + needs: [verify, assert-on-main] name: Build loomweave (${{ matrix.target }}) runs-on: ${{ matrix.runner }} strategy: @@ -276,7 +135,7 @@ jobs: retention-days: 7 build-wheels: - needs: [verify, verify-macos] + needs: [verify, assert-on-main] name: Build loomweave wheel (${{ matrix.target }}) runs-on: ${{ matrix.runner }} # maturin bin-wheels for PyPI. Matrix mirrors `build-rust` (Linux x86_64 + @@ -327,7 +186,7 @@ jobs: retention-days: 7 build-plugin: - needs: [verify, verify-macos] + needs: [verify, assert-on-main] name: Build Python plugin sdist runs-on: ubuntu-latest steps: diff --git a/.github/workflows/verify.yml b/.github/workflows/verify.yml new file mode 100644 index 00000000..d7eaf92f --- /dev/null +++ b/.github/workflows/verify.yml @@ -0,0 +1,234 @@ +name: Verify + +# Single source of truth for the pre-merge / pre-release gate set (closes +# V11-CI-01, clarion-c9f62eec7d). Both ci.yml (every PR + push to main) and +# release.yml (the pre-publish gate) call this via `workflow_call`, so the +# CI floor in CLAUDE.md "Build, test, lint" is defined exactly once and the two +# entry points can no longer drift (the failure mode the old "Duplicated rather +# than refactored" note in release.yml warned about). +# +# Reusable-workflow notes: +# * A called workflow does NOT inherit the caller's top-level `env`, so the +# RUSTFLAGS / CARGO_TERM_COLOR block is defined here, not in the callers. +# * No secrets are used by any gate job, so callers need no `secrets:` line. +# * Release-only concerns (assert-tag-on-main, build/publish) stay in the +# caller — this workflow is purely the gate. +on: + workflow_call: + +env: + CARGO_TERM_COLOR: always + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true + RUSTFLAGS: "-D warnings" + +jobs: + rust: + name: Rust + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 + with: + # Full history so the ADR-024 migration-retirement guard can resolve + # the published_build.txt ref via `git show :0001` (the marker + # names a commit SHA, which a shallow checkout can't read). + # clarion-12667da9f5 / clarion-0106a61480. + fetch-depth: 0 + + - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 + with: + toolchain: stable + components: clippy, rustfmt, llvm-tools-preview + + - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 + + # Python is used by the migration-retirement guard and lockstep guards + # below. Pin >= 3.11 explicitly so the stdlib `tomllib` is available + # regardless of which Python the runner image happens to preinstall. + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 + with: + python-version: "3.11" + + - name: fmt + run: cargo fmt --all -- --check + + - name: migration retirement guard + run: | + python scripts/check-migration-retirement.py --self-test + python scripts/check-migration-retirement.py + + - name: cross-workspace version lockstep + run: python scripts/check-workspace-version-lockstep.py + + - name: pyright pin lockstep + run: | + python scripts/check-pyright-pin-lockstep.py --self-test + python scripts/check-pyright-pin-lockstep.py + + - name: wardline version bounds + run: | + python scripts/check-wardline-version-bounds.py --self-test + python scripts/check-wardline-version-bounds.py + + - name: entity-cap ADR/code lockstep + run: | + python scripts/check-entity-cap-lockstep.py --self-test + python scripts/check-entity-cap-lockstep.py + + - name: clippy + run: cargo clippy --workspace --all-targets --all-features -- -D warnings + + - name: install cargo-nextest + uses: taiki-e/install-action@e310bff3ef77234d477d6bb655da153a5c49d1db + + # Ensure all workspace binaries (notably loomweave-plugin-fixture) are built + # before nextest runs. wp2_e2e tests need the fixture binary on disk and + # nextest's CARGO_BIN_EXE_* propagation is not reliably set for cross-package + # dev-dep binaries (deferred issue clarion-adeff0916d). + - name: build workspace bins + run: cargo build --workspace --bins + + - name: test + run: cargo nextest run --workspace --all-features --no-tests=pass + + - name: doc + run: cargo doc --workspace --no-deps --all-features + env: + RUSTDOCFLAGS: "-D warnings" + + - name: install cargo-deny + uses: taiki-e/install-action@2e721ffcfc34740897fc3130e2dd782b1c136896 + + - name: deny + run: cargo deny check + + # macOS build + lint parity. The release matrix was the first place macOS was + # ever compiled, so a macOS-only `-D warnings` regression stayed invisible + # until tag-cut (clarion-12667da9f5). This native runner job closes that gap. + # It mirrors only the Linux job's clippy + bin build — the Python guards are + # platform-independent and already covered by the `rust` job. No `--target` + # flag: native builds avoid the ring C-build-script failure seen when + # cross-compiling Linux -> macOS. + # + # Only aarch64-apple-darwin (macos-14) runs here: the x86_64 (macos-13) leg is + # temporarily dropped while those runners are offline/unreliable. Restore it + # when macos-13 runner health recovers (clarion-12667da9f5 follow-up). The + # unused-symbol class this gate guards is `target_os = "macos"`, i.e. + # arch-independent, so aarch64 coverage catches the same regressions. + rust-macos: + name: Rust (${{ matrix.target }}) + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: false + matrix: + include: + - target: aarch64-apple-darwin + runner: macos-14 + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 + + - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 + with: + toolchain: stable + components: clippy + + - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 + with: + key: ${{ matrix.target }} + + - name: clippy + run: cargo clippy --workspace --all-targets --all-features -- -D warnings + + - name: build workspace bins + run: cargo build --workspace --bins + + python-plugin: + name: Python plugin + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 + with: + python-version: "3.11" + cache: pip + cache-dependency-path: plugins/python/uv.lock + + - name: cache pyright runtime + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae + with: + path: ~/.cache/pyright-python/ + key: pyright-python-1.1.409-${{ runner.os }} + + - name: check Python ontology version lockstep + run: | + python scripts/check-python-ontology-version.py --self-test + python scripts/check-python-ontology-version.py + + - name: install uv + run: python -m pip install uv==0.10.2 + + - name: sync plugin (locked dev extras) + run: uv sync --project plugins/python --locked --extra dev + + - name: audit locked Python dependencies + run: | + uv export --project plugins/python --locked --extra dev --no-emit-project \ + --format requirements.txt \ + --output-file /tmp/loomweave-python-dev-requirements.txt + uv run --project plugins/python --extra dev pip-audit \ + -r /tmp/loomweave-python-dev-requirements.txt + + - name: check B.4*/B.5 performance gates + run: uv run --project plugins/python --extra dev python scripts/check-b4-gate-result.py --run-b5-smoke + + - name: ruff check + run: uv run --project plugins/python --extra dev ruff check plugins/python + + - name: ruff format check + run: uv run --project plugins/python --extra dev ruff format --check plugins/python + + - name: mypy + run: uv run --project plugins/python --extra dev mypy --strict plugins/python + + - name: pytest + run: uv run --project plugins/python --extra dev pytest plugins/python + + walking-skeleton: + name: Sprint 1 walking skeleton (end-to-end) + runs-on: ubuntu-latest + needs: [rust, python-plugin] + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 + + - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 + with: + toolchain: stable + + - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 + with: + python-version: "3.11" + cache: pip + cache-dependency-path: plugins/python/pyproject.toml + + - name: cache pyright runtime + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae + with: + path: ~/.cache/pyright-python/ + key: pyright-python-1.1.409-${{ runner.os }} + + - name: install sqlite3 cli + run: sudo apt-get update && sudo apt-get install -y --no-install-recommends sqlite3 + + - name: run walking skeleton + run: bash tests/e2e/sprint_1_walking_skeleton.sh + + - name: run WP5 secret scanner smoke + run: CARGO_BUILD=0 bash tests/e2e/wp5_secret_scan.sh + + - name: Sprint 2 MCP surface + run: CARGO_BUILD=0 bash tests/e2e/sprint_2_mcp_surface.sh + + - name: Phase 3 subsystems + run: CARGO_BUILD=0 bash tests/e2e/phase3_subsystems.sh From 99879827baabcac6470d6499e3c21ec39f234775 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 19:49:02 +1000 Subject: [PATCH 52/60] =?UTF-8?q?fix:=20PR#21=20deferred=20review=20findin?= =?UTF-8?q?gs=20=E2=80=94=20TOCTOU=20repair,=20schema-validated=20serve,?= =?UTF-8?q?=20config=20passthrough=20(clarion-c326ee6857)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the 4 still-valid deferred findings from the PR#21 review; the other 8 were verified already-fixed/outdated against current code (rc3) and need only thread resolution (recorded on the issue). #4 writer.rs begin_run TOCTOU: if begin_write_tx exhausts its retries after the runs row was auto-committed 'running', re-mark the row 'failed' under a fresh implicit tx (mirrors the CommitRun failure-remark idiom) so it isn't stranded phantom-running with current_run unset. Deliberately does NOT move the INSERT inside the tx (the ticket's literal suggestion) — that would hide the running row from cross-process analyze_status until the first batch COMMIT (the regression review #15 warns about). Best-effort; mark_stale_running_runs_failed remains the startup backstop. #15 writer.rs resume_run: capture the row's prior (status, completed_at) before flipping it to 'running' and restore them if begin_write_tx fails — a pre-existing completed run must not be left flipped to running. Both paths get deterministic, single-threaded coverage via two test seams (grab a competing write lock after the insert/update; release it on the failure path so the best-effort cleanup can re-acquire the lock) — the contention harness the ticket said #4/#15 needed, without threads or wall-clock races. #8 reader.rs open_validated: reject an unmigrated DB (new schema::reject_unmigrated_for_read → StorageError::UnmigratedIndex) so a header-valid but empty/externally-created file is refused at serve instead of auto-materialised and answered with zero rows. Keyed on user_version (0 = unmigrated), NEVER on row counts — an installed-but-unanalyzed index is user_version=CURRENT with zero entities and stays a valid serve target. #12 serve --config passthrough: serve forwards its resolved on-disk --config to analyze_start-spawned analyze (ServerState::with_analyze_config → spawn_analyze --config) so the child parses the same configuration the operator launched serve with, instead of re-discovering config and silently diverging. Tests: 2 writer failpoint tests, 2 reader schema-validation tests, 1 spawn_analyze --config forward/omit test. 972 storage+mcp+cli tests green; clippy -D warnings clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/http_read/errors.rs | 14 +- crates/loomweave-cli/src/serve.rs | 13 + crates/loomweave-mcp/src/analyze_runs.rs | 82 +++++- crates/loomweave-mcp/src/lib.rs | 15 + crates/loomweave-mcp/src/tools/analyze.rs | 1 + crates/loomweave-storage/src/error.rs | 8 + crates/loomweave-storage/src/reader.rs | 13 + crates/loomweave-storage/src/schema.rs | 27 ++ crates/loomweave-storage/src/writer.rs | 276 +++++++++++++++++- crates/loomweave-storage/tests/reader_pool.rs | 46 +++ 10 files changed, 484 insertions(+), 11 deletions(-) diff --git a/crates/loomweave-cli/src/http_read/errors.rs b/crates/loomweave-cli/src/http_read/errors.rs index e7ed3a7a..326d42ba 100644 --- a/crates/loomweave-cli/src/http_read/errors.rs +++ b/crates/loomweave-cli/src/http_read/errors.rs @@ -77,13 +77,13 @@ pub(crate) fn classify_read_error(err: &StorageError) -> ReadError { // 500 here is defensive — in practice the HTTP API does not open // its own writer, but the reader pool can encounter the same file // header mismatches and we want a clear distinct response code. - StorageError::ForeignDatabase { .. } | StorageError::FutureUserVersion { .. } => { - ReadError { - status: StatusCode::INTERNAL_SERVER_ERROR, - code: ErrorCode::StorageError, - message: "file lookup storage rejected database header", - } - } + StorageError::ForeignDatabase { .. } + | StorageError::FutureUserVersion { .. } + | StorageError::UnmigratedIndex => ReadError { + status: StatusCode::INTERNAL_SERVER_ERROR, + code: ErrorCode::StorageError, + message: "file lookup storage rejected database header", + }, StorageError::PoolInteract(_) | StorageError::WriterGone | StorageError::WriterProtocol(_) diff --git a/crates/loomweave-cli/src/serve.rs b/crates/loomweave-cli/src/serve.rs index 8ff745c5..e6b16a3e 100644 --- a/crates/loomweave-cli/src/serve.rs +++ b/crates/loomweave-cli/src/serve.rs @@ -121,6 +121,9 @@ pub fn run(path: &Path, config_path: Option<&Path>) -> Result<()> { loomweave_mcp::McpToolPolicy { enable_write_tools: config.serve.mcp.enable_write_tools, }, + // review #12: forward serve's resolved config to analyze_start, but only + // when it exists on disk (the McpConfig::default() fallback has no file). + config_path.exists().then(|| config_path.to_path_buf()), )?; supervise_stdio_with_http(stdio, http_server) } @@ -188,6 +191,7 @@ fn spawn_mcp_stdio( filigree_client: Option, diagnostics: loomweave_mcp::DiagnosticsContext, tool_policy: loomweave_mcp::McpToolPolicy, + analyze_config_path: Option, ) -> Result { let (result_tx, result_rx) = mpsc::channel(); let join = thread::Builder::new() @@ -203,6 +207,7 @@ fn spawn_mcp_stdio( filigree_client, diagnostics, tool_policy, + analyze_config_path, ); let _ = result_tx.send(result); }) @@ -221,6 +226,7 @@ fn run_mcp_stdio( filigree_client: Option, diagnostics: loomweave_mcp::DiagnosticsContext, tool_policy: loomweave_mcp::McpToolPolicy, + analyze_config_path: Option, ) -> Result<()> { let stdin = std::io::stdin(); let stdout = std::io::stdout(); @@ -233,6 +239,13 @@ fn run_mcp_stdio( let _runtime_guard = runtime.enter(); let mut state = loomweave_mcp::ServerState::new(project_root, readers).with_tool_policy(tool_policy); + // Forward serve's config to an analyze_start-spawned analyze so the child + // parses the same configuration (review #12). Some only when serve was + // launched with an on-disk config — the McpConfig::default() fallback has + // no file to forward. + if let Some(analyze_config_path) = analyze_config_path { + state = state.with_analyze_config(analyze_config_path); + } let mut llm_writer = None; let mut llm_writer_join = None; if let Some(provider) = llm_provider { diff --git a/crates/loomweave-mcp/src/analyze_runs.rs b/crates/loomweave-mcp/src/analyze_runs.rs index 70f358a5..0ae789d1 100644 --- a/crates/loomweave-mcp/src/analyze_runs.rs +++ b/crates/loomweave-mcp/src/analyze_runs.rs @@ -49,11 +49,19 @@ pub(crate) type RunRegistry = Arc>>; /// `program` is the launcher (`current_exe()` in production; a stub in tests). /// The run id and progress path are passed in so the caller can return the /// handle without racing the run's first DB write or progress write. +/// +/// `config_path`, when `Some`, is forwarded as `--config ` so the spawned +/// `analyze` parses the SAME configuration the active `serve` was launched with +/// (review #12). Without it the child re-discovers config from the default +/// search path, which can silently diverge from serve's `--config` (e.g. +/// integrations.filigree emission settings), so a serve-triggered analyze would +/// behave differently from the operator's configured run. pub(crate) fn spawn_analyze( program: &std::path::Path, project_root: &std::path::Path, run_id: &str, progress_path: &std::path::Path, + config_path: Option<&std::path::Path>, started_at: String, ) -> std::io::Result { let mut command = std::process::Command::new(program); @@ -63,7 +71,11 @@ pub(crate) fn spawn_analyze( .arg("--run-id") .arg(run_id) .arg("--progress-file") - .arg(progress_path) + .arg(progress_path); + if let Some(config_path) = config_path { + command.arg("--config").arg(config_path); + } + command // Isolate the child's stdio. When analyze_start is driven from the // stdio MCP server, the child would otherwise inherit the server's // stdout — and `loomweave analyze` initializes tracing at `info`, so its @@ -241,6 +253,7 @@ mod tests { dir.path(), "run-x", &progress, + None, "2026-05-30T00:00:00Z".to_owned(), ) .expect("spawn stub"); @@ -253,4 +266,71 @@ mod tests { "child stdout was not isolated from the parent: {where_fd1:?}" ); } + + /// review #12: serve's `--config` must be forwarded to the spawned analyze. + #[test] + fn spawn_analyze_forwards_config_path_when_present_and_omits_when_absent() { + use std::io::Write as _; + use std::os::unix::fs::PermissionsExt as _; + + let dir = tempfile::tempdir().unwrap(); + let script = dir.path().join("argv_stub.sh"); + let argv_dump = dir.path().join("argv.txt"); + let config = dir.path().join("loomweave.yaml"); + std::fs::write(&config, "version: 1\n").unwrap(); + // Dump the full argv (one arg per line) so the test can assert exactly + // which flags were forwarded. The progress-file path is argv-relative, + // so locate it from the run rather than hard-coding a positional index. + let mut file = std::fs::File::create(&script).unwrap(); + writeln!( + file, + "#!/bin/sh\nfor a in \"$@\"; do printf '%s\\n' \"$a\"; done > \"{}\"\n", + argv_dump.display() + ) + .unwrap(); + std::fs::set_permissions(&script, std::fs::Permissions::from_mode(0o755)).unwrap(); + drop(file); + + // With Some(config): argv must contain `--config `. + let progress = dir.path().join("p1.txt"); + let mut handle = spawn_analyze( + &script, + dir.path(), + "run-cfg", + &progress, + Some(config.as_path()), + "2026-05-30T00:00:00Z".to_owned(), + ) + .expect("spawn stub"); + handle.child.wait().expect("reap stub"); + let argv = std::fs::read_to_string(&argv_dump).expect("stub wrote argv"); + let forwarded: Vec<&str> = argv.lines().collect(); + let cfg_pos = forwarded + .iter() + .position(|a| *a == "--config") + .expect("--config must be forwarded when Some"); + assert_eq!( + forwarded.get(cfg_pos + 1).copied(), + Some(config.to_str().unwrap()), + "--config must be followed by the serve config path" + ); + + // With None: argv must NOT contain `--config`. + let progress2 = dir.path().join("p2.txt"); + let mut handle2 = spawn_analyze( + &script, + dir.path(), + "run-nocfg", + &progress2, + None, + "2026-05-30T00:00:00Z".to_owned(), + ) + .expect("spawn stub"); + handle2.child.wait().expect("reap stub"); + let argv2 = std::fs::read_to_string(&argv_dump).expect("stub wrote argv"); + assert!( + !argv2.lines().any(|a| a == "--config"), + "--config must be omitted when None; got {argv2:?}" + ); + } } diff --git a/crates/loomweave-mcp/src/lib.rs b/crates/loomweave-mcp/src/lib.rs index 3f497f64..c0e7cef4 100644 --- a/crates/loomweave-mcp/src/lib.rs +++ b/crates/loomweave-mcp/src/lib.rs @@ -1004,6 +1004,11 @@ pub struct ServerState { /// Launcher for `analyze_start` to spawn. `None` → `current_exe()`; tests /// inject a stub via [`ServerState::with_analyze_command`]. analyze_program: Option, + /// Config file the active `serve` was launched with, forwarded as + /// `--config` to an `analyze_start`-spawned analyze so the child parses the + /// same configuration (review #12). `None` → the child uses its default + /// config discovery (serve was started without an explicit `--config`). + analyze_config_path: Option, } impl ServerState { @@ -1027,6 +1032,7 @@ impl ServerState { cancelled_requests: Arc::new(AsyncMutex::new(BTreeSet::new())), cancellation_notify: Arc::new(Notify::new()), analyze_program: None, + analyze_config_path: None, } } @@ -1039,6 +1045,15 @@ impl ServerState { self } + /// Forward `serve`'s `--config` path to `analyze_start`-spawned analyze runs + /// so the child parses the same configuration (review #12). Call only when + /// serve was launched with an explicit, on-disk config file. + #[must_use] + pub fn with_analyze_config(mut self, config_path: PathBuf) -> Self { + self.analyze_config_path = Some(config_path); + self + } + #[must_use] pub fn with_tool_policy(mut self, policy: McpToolPolicy) -> Self { self.tool_policy = policy; diff --git a/crates/loomweave-mcp/src/tools/analyze.rs b/crates/loomweave-mcp/src/tools/analyze.rs index c20a02fe..f0587a03 100644 --- a/crates/loomweave-mcp/src/tools/analyze.rs +++ b/crates/loomweave-mcp/src/tools/analyze.rs @@ -79,6 +79,7 @@ impl ServerState { &self.project_root, &run_id, &progress_path, + self.analyze_config_path.as_deref(), started_at, ) { Ok(handle) => handle, diff --git a/crates/loomweave-storage/src/error.rs b/crates/loomweave-storage/src/error.rs index 5e7b534e..d79025b5 100644 --- a/crates/loomweave-storage/src/error.rs +++ b/crates/loomweave-storage/src/error.rs @@ -33,6 +33,14 @@ pub enum StorageError { )] FutureUserVersion { found: u32, current: u32 }, + #[error( + "LMWV-INFRA-STORAGE-UNMIGRATED-DB: refusing to open an unmigrated SQLite \ + file (user_version=0 — no Loomweave schema applied). This is an empty or \ + externally-created file, not a Loomweave index. Run `loomweave install \ + --path ` then `loomweave analyze ` to build the index" + )] + UnmigratedIndex, + #[error("migration {version} failed: {source}")] Migration { version: u32, diff --git a/crates/loomweave-storage/src/reader.rs b/crates/loomweave-storage/src/reader.rs index 47c3f034..f49ee173 100644 --- a/crates/loomweave-storage/src/reader.rs +++ b/crates/loomweave-storage/src/reader.rs @@ -91,6 +91,19 @@ impl ReaderPool { conn.query_row("PRAGMA schema_version", [], |row| row.get::<_, i64>(0))?; pragma::validate_application_id_for_read(&conn)?; crate::schema::verify_user_version(&conn)?; + // Reject an *unmigrated* file (review #8). A header-valid SQLite file + // can still be an empty/externally-created DB the read pool would + // otherwise auto-materialise and answer every query against with zero + // rows. `application_id` can't discriminate it (legacy Loomweave indexes + // carry application_id=0, which is accepted), so key on the schema + // version: an installed index is stamped at CURRENT_SCHEMA_VERSION by + // `apply_migrations`, whereas a fresh/empty file is user_version=0. + // NOTE: this checks the *schema*, never row counts — an installed but + // not-yet-analyzed index is user_version=CURRENT with zero entities and + // is a valid serve target. (A future-version DB is already rejected by + // verify_user_version above; 0 < v < CURRENT is left to migrate-on-open + // policy and is not refused here.) + crate::schema::reject_unmigrated_for_read(&conn)?; drop(conn); Self::open(db_path, max_size) } diff --git a/crates/loomweave-storage/src/schema.rs b/crates/loomweave-storage/src/schema.rs index 91c21b3a..cdae0114 100644 --- a/crates/loomweave-storage/src/schema.rs +++ b/crates/loomweave-storage/src/schema.rs @@ -146,6 +146,33 @@ pub fn verify_user_version(conn: &Connection) -> Result<()> { Ok(()) } +/// Reject an *unmigrated* database (`user_version = 0`) at read-open time. +/// +/// [`set_user_version`] / [`apply_migrations`] stamp +/// `user_version = CURRENT_SCHEMA_VERSION`, so a genuine Loomweave index — even +/// one with zero entities (installed but not yet analyzed) — reads back a +/// non-zero version. A `user_version` of 0 means no Loomweave schema was ever +/// applied: an empty file the read pool would otherwise auto-create, or an +/// externally-produced `SQLite` file. `serve` must refuse it rather than answer +/// every query with zero rows (review #8). +/// +/// This deliberately keys on the schema version, NOT on table row counts, so a +/// migrated-but-empty index stays a valid serve target. A *future* version is +/// handled by [`verify_user_version`]; `0 < user_version < CURRENT` is left to +/// migrate-on-open policy and is not refused here. +/// +/// # Errors +/// +/// Returns [`StorageError::UnmigratedIndex`] when `PRAGMA user_version` is 0, +/// or [`StorageError::Sqlite`] if the PRAGMA query fails. +pub fn reject_unmigrated_for_read(conn: &Connection) -> Result<()> { + let raw: i64 = conn.query_row("PRAGMA user_version", [], |row| row.get(0))?; + if raw == 0 { + return Err(StorageError::UnmigratedIndex); + } + Ok(()) +} + /// Write `PRAGMA user_version = CURRENT_SCHEMA_VERSION`. Idempotent — writing /// the same value is cheap (it touches the `SQLite` header page). Called after /// the migration runner has applied every pending migration. diff --git a/crates/loomweave-storage/src/writer.rs b/crates/loomweave-storage/src/writer.rs index eb3a140c..1027a538 100644 --- a/crates/loomweave-storage/src/writer.rs +++ b/crates/loomweave-storage/src/writer.rs @@ -16,7 +16,7 @@ use std::path::Path; use std::sync::Arc; use std::sync::atomic::{AtomicUsize, Ordering}; -use rusqlite::{Connection, params}; +use rusqlite::{Connection, OptionalExtension, params}; use tokio::sync::{mpsc, oneshot}; use tokio::task::JoinHandle; @@ -470,6 +470,40 @@ fn begin_run( config_json: &str, started_at: &str, head_commit: Option<&str>, +) -> Result<()> { + begin_run_inner( + conn, + state, + run_id, + config_json, + started_at, + head_commit, + |_| {}, + |_| {}, + ) +} + +/// `begin_run` with two test seams. +/// +/// `after_insert_committed` fires after the auto-committed `INSERT INTO runs` +/// (which deliberately publishes the row as `running` so cross-process +/// `analyze_status` pollers can see an in-progress run *before* the first batch +/// commits) and before the write transaction is opened. `on_write_tx_failed` +/// fires only when `begin_write_tx` returns `Err`, just before the cleanup +/// `UPDATE`. Production passes no-ops; tests use them to drive the review-#4 +/// TOCTOU window deterministically (grab a competing write lock in the first +/// seam so `begin_write_tx` fails, release it in the second so the best-effort +/// cleanup can re-acquire the lock). This mirrors the `on_busy` seam discipline +/// in `retry.rs`. +fn begin_run_inner( + conn: &mut Connection, + state: &mut ActorState, + run_id: &str, + config_json: &str, + started_at: &str, + head_commit: Option<&str>, + mut after_insert_committed: impl FnMut(&Connection), + mut on_write_tx_failed: impl FnMut(&Connection), ) -> Result<()> { if state.current_run.is_some() { return Err(StorageError::WriterProtocol( @@ -483,7 +517,32 @@ fn begin_run( ) VALUES (?1, ?2, NULL, ?3, '{}', 'running', ?4, ?5, ?2)", params![run_id, started_at, config_json, head_commit, owner_pid()], )?; - begin_write_tx(conn, state)?; + after_insert_committed(conn); + if let Err(err) = begin_write_tx(conn, state) { + // TOCTOU repair (review #4). The INSERT above auto-committed the row as + // `running` (visible to analyze_status), but under sustained + // cross-process contention begin_write_tx can exhaust its retries here. + // Without repair the row is stranded `running` with `current_run` + // unset, so the actor's channel-close cleanup never marks it failed and + // analyze_status reports a phantom in-progress run. Re-mark it failed + // under a fresh implicit transaction (mirrors the CommitRun + // failure-remark idiom). The INSERT is deliberately NOT moved inside + // the tx (the ticket's literal suggestion) because that would hide the + // `running` row from cross-process analyze_status until the first batch + // COMMIT — the regression review #15 warns about. Best-effort: if the + // cleanup itself loses the still-contended lock, mark_stale_running_runs_failed + // sweeps the row on the next startup. + on_write_tx_failed(conn); + let _ = conn.execute( + "UPDATE runs \ + SET status = 'failed', \ + completed_at = strftime('%Y-%m-%dT%H:%M:%fZ', 'now'), \ + owner_pid = NULL \ + WHERE id = ?1", + params![run_id], + ); + return Err(err); + } state.in_tx = true; state.writes_in_batch = 0; state.current_run = Some(run_id.to_owned()); @@ -500,11 +559,42 @@ fn begin_run( /// same durable graph as the original — `--resume` is a re-emit-without-flip /// path, not an incremental checkpoint-recovery one. fn resume_run(conn: &mut Connection, state: &mut ActorState, run_id: &str) -> Result<()> { + resume_run_inner(conn, state, run_id, |_| {}, |_| {}) +} + +/// `resume_run` with the same two test seams as [`begin_run_inner`]. +/// +/// Unlike `begin_run`, `resume_run` mutates a PRE-EXISTING row, so the +/// failure path must *restore* the row's prior terminal state rather than mark +/// it failed — leaving a previously-`completed` run flipped to `running` would +/// mis-report it (review #15). The prior `(status, completed_at)` are captured +/// before the flip and restored if `begin_write_tx` fails. +fn resume_run_inner( + conn: &mut Connection, + state: &mut ActorState, + run_id: &str, + mut after_update_committed: impl FnMut(&Connection), + mut on_write_tx_failed: impl FnMut(&Connection), +) -> Result<()> { if state.current_run.is_some() { return Err(StorageError::WriterProtocol( "ResumeRun received while a run is already in progress".to_owned(), )); } + // Capture the row's prior terminal state BEFORE flipping it to `running`, + // so it can be restored verbatim if we fail to open the write transaction. + let prior = conn + .query_row( + "SELECT status, completed_at FROM runs WHERE id = ?1", + params![run_id], + |row| Ok((row.get::<_, String>(0)?, row.get::<_, Option>(1)?)), + ) + .optional()?; + let Some((prior_status, prior_completed_at)) = prior else { + return Err(StorageError::WriterProtocol(format!( + "ResumeRun: no run with id {run_id} to resume" + ))); + }; let reopened = conn.execute( "UPDATE runs \ SET status = 'running', \ @@ -515,11 +605,26 @@ fn resume_run(conn: &mut Connection, state: &mut ActorState, run_id: &str) -> Re params![owner_pid(), run_id], )?; if reopened == 0 { + // Raced away between the SELECT and the UPDATE — treat as not-found. return Err(StorageError::WriterProtocol(format!( "ResumeRun: no run with id {run_id} to resume" ))); } - begin_write_tx(conn, state)?; + after_update_committed(conn); + if let Err(err) = begin_write_tx(conn, state) { + // The row pre-existed this resume, so restore its prior terminal state + // rather than leave it stranded `running` (review #15). Best-effort: + // mark_stale_running_runs_failed is the backstop if the restore also + // loses the still-contended lock. + on_write_tx_failed(conn); + let _ = conn.execute( + "UPDATE runs \ + SET status = ?1, completed_at = ?2, owner_pid = NULL \ + WHERE id = ?3", + params![prior_status, prior_completed_at, run_id], + ); + return Err(err); + } state.in_tx = true; state.writes_in_batch = 0; state.current_run = Some(run_id.to_owned()); @@ -1368,3 +1473,168 @@ fn ensure_run_update_changed_one(changed: usize, run_id: &str) -> Result<()> { ))) } } + +#[cfg(test)] +mod run_lifecycle_failpoint_tests { + //! Deterministic, single-threaded coverage for the `begin_run` / `resume_run` + //! TOCTOU repair paths (reviews #4 / #15). The competing write lock is held + //! and released through `begin_run_inner` / `resume_run_inner`'s two test + //! seams, so the failure window is hit without threads or wall-clock races. + + use std::time::Duration; + + use rusqlite::Connection; + + use super::{ActorState, begin_run_inner, resume_run_inner}; + use crate::error::StorageError; + use crate::schema; + + /// On-disk DB (BEGIN IMMEDIATE needs a real file to contend on) with the + /// busy handler disabled so contention surfaces as an immediate `SQLITE_BUSY`. + fn migrated_conn(path: &std::path::Path) -> Connection { + let mut conn = Connection::open(path).expect("open"); + conn.busy_timeout(Duration::from_millis(0)) + .expect("busy_timeout"); + schema::apply_migrations(&mut conn).expect("apply_migrations"); + conn + } + + /// A writer state whose write-tx acquire fails fast (single attempt, no + /// backoff) so a held competing lock trips it immediately. + fn fastfail_state() -> ActorState { + let mut state = ActorState::new(50); + state.retry_policy = crate::retry::RetryPolicy { + max_attempts: 1, + initial_backoff: Duration::ZERO, + max_backoff: Duration::ZERO, + }; + state + } + + fn run_status(conn: &Connection, run_id: &str) -> Option { + conn.query_row("SELECT status FROM runs WHERE id = ?1", [run_id], |row| { + row.get::<_, String>(0) + }) + .ok() + } + + #[test] + fn begin_run_marks_row_failed_when_write_tx_cannot_be_acquired() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("idx.db"); + let mut conn = migrated_conn(&path); + // Second connection to the same DB; grabs the write lock in the TOCTOU + // window so begin_write_tx busies out. + let competitor = migrated_conn(&path); + let mut state = fastfail_state(); + + let err = begin_run_inner( + &mut conn, + &mut state, + "run-toctou", + "{}", + "2026-01-01T00:00:00.000Z", + None, + // after_insert_committed: the `running` row is now durable; grab the + // write lock so the upcoming begin_write_tx fails. + |_| { + competitor + .execute_batch("BEGIN IMMEDIATE") + .expect("competitor takes the write lock"); + }, + // on_write_tx_failed: release the lock so the best-effort cleanup + // UPDATE can re-acquire it. + |_| { + competitor + .execute_batch("COMMIT") + .expect("competitor releases the write lock"); + }, + ) + .expect_err("begin_write_tx must fail while the competitor holds the lock"); + + assert!( + matches!(err, StorageError::Sqlite(_)), + "expected a busy SQLite error, got {err:?}" + ); + assert_eq!( + run_status(&conn, "run-toctou").as_deref(), + Some("failed"), + "a stranded 'running' row must be repaired to 'failed', not left phantom-running" + ); + assert!( + state.current_run.is_none(), + "current_run must stay unset on the failure path" + ); + } + + #[test] + fn resume_run_restores_prior_status_when_write_tx_cannot_be_acquired() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("idx.db"); + let mut conn = migrated_conn(&path); + // Pre-seed a previously-completed run. + conn.execute( + "INSERT INTO runs ( \ + id, started_at, completed_at, config, stats, status, analyzed_at_commit, \ + owner_pid, heartbeat_at \ + ) VALUES (?1, ?2, ?3, '{}', '{}', 'completed', NULL, NULL, ?2)", + rusqlite::params![ + "run-resume", + "2026-01-01T00:00:00.000Z", + "2026-01-01T00:05:00.000Z" + ], + ) + .unwrap(); + + let competitor = migrated_conn(&path); + let mut state = fastfail_state(); + + let err = resume_run_inner( + &mut conn, + &mut state, + "run-resume", + |_| { + competitor + .execute_batch("BEGIN IMMEDIATE") + .expect("competitor takes the write lock"); + }, + |_| { + competitor + .execute_batch("COMMIT") + .expect("competitor releases the write lock"); + }, + ) + .expect_err("begin_write_tx must fail while the competitor holds the lock"); + + assert!( + matches!(err, StorageError::Sqlite(_)), + "expected a busy SQLite error, got {err:?}" + ); + let (status, completed_at): (String, Option) = conn + .query_row( + "SELECT status, completed_at FROM runs WHERE id = ?1", + ["run-resume"], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .unwrap(); + assert_eq!( + status, "completed", + "a pre-existing completed run must be restored, not left flipped to 'running'" + ); + assert_eq!( + completed_at.as_deref(), + Some("2026-01-01T00:05:00.000Z"), + "completed_at must be restored to its prior value" + ); + assert!(state.current_run.is_none()); + // owner_pid sanity: restored row is unowned. + let owner: Option = conn + .query_row( + "SELECT owner_pid FROM runs WHERE id = ?1", + ["run-resume"], + |row| row.get(0), + ) + .unwrap(); + assert!(owner.is_none(), "restored run must be unowned"); + } +} diff --git a/crates/loomweave-storage/tests/reader_pool.rs b/crates/loomweave-storage/tests/reader_pool.rs index 16cc3330..21099a42 100644 --- a/crates/loomweave-storage/tests/reader_pool.rs +++ b/crates/loomweave-storage/tests/reader_pool.rs @@ -362,3 +362,49 @@ async fn open_validated_accepts_legacy_zero_application_id_without_mutating() { .expect("read app id"); assert_eq!(app_id, 0, "read validation must not stamp legacy DBs"); } + +#[tokio::test] +async fn open_validated_rejects_unmigrated_index(/* review #8 */) { + // A header-valid SQLite file that never had Loomweave migrations applied + // (user_version=0, application_id=0) must be rejected at serve rather than + // auto-materialised into an empty index that answers every query with zero + // rows. + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("unmigrated.db"); + { + let conn = Connection::open(&path).expect("open"); + // A valid SQLite file (a real table) but no Loomweave schema/migrations. + conn.execute_batch("CREATE TABLE marker(id INTEGER);") + .expect("seed bare db"); + let uv: i64 = conn + .query_row("PRAGMA user_version", [], |row| row.get(0)) + .unwrap(); + assert_eq!(uv, 0, "precondition: an unmigrated file is user_version=0"); + } + + match ReaderPool::open_validated(&path, 2) { + Err(loomweave_storage::StorageError::UnmigratedIndex) => {} + Err(other) => panic!("expected UnmigratedIndex, got {other:?}"), + Ok(_) => panic!("open_validated must reject an unmigrated file"), + } +} + +#[tokio::test] +async fn open_validated_accepts_migrated_but_empty_index(/* review #8 */) { + // The discriminator is the *schema version*, NOT row counts: an installed + // index that has not been analyzed yet is user_version=CURRENT with zero + // entities and is a valid serve target. + let dir = tempfile::tempdir().unwrap(); + let path = prepared_db(&dir); // migrated, zero entities + let entity_count: i64 = { + let conn = Connection::open(&path).expect("open"); + conn.query_row("SELECT COUNT(*) FROM entities", [], |row| row.get(0)) + .expect("count entities") + }; + assert_eq!( + entity_count, 0, + "precondition: migrated DB is empty of entities" + ); + + ReaderPool::open_validated(&path, 2).expect("migrated-but-empty index is a valid serve target"); +} From 78c84022fd3b66233a1f0b65cac3edb7b89def49 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 19:51:46 +1000 Subject: [PATCH 53/60] =?UTF-8?q?feat(mcp):=20Wardline=20trust=20boundarie?= =?UTF-8?q?s=20as=20find=5Fdead=5Fcode=20roots=20(clarion-bf496d55d1,=20?= =?UTF-8?q?=C2=A74.2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operator confirmed mechanism §4.2, Loomweave half. Extends DEAD_CODE_ROOT_TAGS with `wardline:external_boundary` and `wardline:trusted` so a developer-annotated Wardline trust boundary is treated as a reachability root by find_dead_code (external_boundary → entry point, trusted → exported API). The input already arrives in-scope: the Python plugin reads the on-disk Wardline vocabulary descriptor and emits `wardline:` tags into entity_tags (plugin_id=python) at analyze time, through the same host validation + writer discipline as every other tag (extractor.py:1098). So this is a read-side alias — no new ingestion, no taint-store parse, no migration, no Wardline-side work. The opaque wardline_taint SP9 blob does NOT carry this classification (it would block on sibling-repo emission), so the plugin-tag channel is the correct input. Doctrine: enrich-only (no descriptor → no wardline:* tag → root set byte-identical; the signal-unavailable empty-root guard still holds). SEI/freshness handled for free — entity_tags rows cascade-delete with their entity (FK ON DELETE CASCADE) and roots join only live entities, so a stale boundary fact cannot resurrect a deleted entity as a root; the plugin re-emits per (entity_id, plugin_id) each analyze, so the signal is as fresh as the index. Test: find_dead_code_treats_wardline_trust_boundaries_as_roots — a wardline-tagged unreached entity is spared, an untagged unreached entity is still flagged. 6 dead_code tests green; clippy -D warnings clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../loomweave-mcp/src/catalogue/shortcuts.rs | 17 ++++++ crates/loomweave-mcp/tests/catalogue_tools.rs | 58 +++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/crates/loomweave-mcp/src/catalogue/shortcuts.rs b/crates/loomweave-mcp/src/catalogue/shortcuts.rs index 8515b0c0..8012b299 100644 --- a/crates/loomweave-mcp/src/catalogue/shortcuts.rs +++ b/crates/loomweave-mcp/src/catalogue/shortcuts.rs @@ -36,6 +36,21 @@ const EDGE_SCAN_ORDER_BY: &str = "ORDER BY kind, from_id, to_id, confidence, \ /// `find_dead_code` — entities "called from outside" the codebase. Tag-emitting /// plugins populate these; the empty-root guard protects indexes with no root /// tags from a flood of false positives. +/// +/// The trailing `wardline:*` entries are Wardline-derived trust boundaries +/// (clarion-bf496d55d1, §4.2): the Python plugin emits `wardline:external_boundary` +/// / `wardline:trusted` from the on-disk Wardline vocabulary descriptor +/// (`@external_boundary` / `@trusted` decorators) into `entity_tags` at analyze +/// time — a developer-annotated, higher-confidence "called from outside the +/// static graph" signal than the structural heuristics. They map onto the +/// existing entry-point / exported-api root classes (`external_boundary` → +/// entry point, `trusted` → exported API); for dead-code reachability only the +/// union matters, so both simply join the root set, reading the same single +/// `entity_tags` signal under the same host validation discipline as every +/// other tag. Enrich-only: with no Wardline descriptor no `wardline:*` tag is +/// emitted and the root set is byte-identical to before. Stale facts cannot +/// resurrect a deleted entity as a root — `entity_tags` rows cascade-delete +/// with their entity, and roots join only live `entities`. const DEAD_CODE_ROOT_TAGS: &[&str] = &[ "entry-point", "http-route", @@ -43,6 +58,8 @@ const DEAD_CODE_ROOT_TAGS: &[&str] = &[ "data-model", "cli-command", "exported-api", + "wardline:external_boundary", + "wardline:trusted", ]; /// Tags that force an entity to be treated as live regardless of static diff --git a/crates/loomweave-mcp/tests/catalogue_tools.rs b/crates/loomweave-mcp/tests/catalogue_tools.rs index 0a22b8c3..625161d2 100644 --- a/crates/loomweave-mcp/tests/catalogue_tools.rs +++ b/crates/loomweave-mcp/tests/catalogue_tools.rs @@ -1524,6 +1524,64 @@ async fn find_dead_code_flags_unreachable_and_spares_live() { ); } +// clarion-bf496d55d1 §4.2: a Wardline-derived trust-boundary tag +// (`wardline:external_boundary` / `wardline:trusted`, emitted by the Python +// plugin from the on-disk Wardline vocabulary descriptor) acts as a reachability +// root, so a statically-unreached but human-annotated trust boundary is NOT +// flagged dead, while an untagged unreached entity still is. +#[tokio::test] +async fn find_dead_code_treats_wardline_trust_boundaries_as_roots() { + let (project, db, conn) = open_project(); + // An externally-invoked boundary: unreached over static edges, but annotated + // @external_boundary -> wardline:external_boundary. Must be a root, not dead. + insert_entity( + &conn, + "python:function:webhook", + "function", + "app.py", + Some((1, 5)), + ); + insert_tag( + &conn, + "python:function:webhook", + "wardline:external_boundary", + ); + // A trusted producer: @trusted -> wardline:trusted. Also a root. + insert_entity( + &conn, + "python:function:mint_token", + "function", + "app.py", + Some((6, 10)), + ); + insert_tag(&conn, "python:function:mint_token", "wardline:trusted"); + // Genuinely dead leaf — unreached and untagged. + insert_entity( + &conn, + "python:function:orphan", + "function", + "app.py", + Some((11, 15)), + ); + drop(conn); + let state = state_for(project.path(), &db); + + let env = call_tool(&state, "find_dead_code", json!({})).await; + assert_eq!(env["ok"], true, "{env}"); + let dead: Vec = env["result"]["dead_code"] + .as_array() + .unwrap() + .iter() + .map(|c| c["entity"]["id"].as_str().unwrap().to_owned()) + .collect(); + assert_eq!( + dead, + vec!["python:function:orphan".to_owned()], + "only the untagged unreached entity is dead; the Wardline trust \ + boundaries are roots: {env}" + ); +} + // Framework-magic entities (decorated handlers, plugin hooks) are excluded from // candidacy even when statically unreached. #[tokio::test] From 304941ea3f7b265136c01a9cf19fab21978a0806 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 19:57:59 +1000 Subject: [PATCH 54/60] refactor(core): split plugin/host.rs validators into host_validate.rs (clarion-2b8811da39) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes V11-ARCH-05. Carves the pure, self-free validation layer out of the 3358-line host.rs into a new same-directory submodule host_validate.rs (mirroring the existing host_findings.rs precedent), separating stateless *validation* from the PluginHost *transport*/orchestration that drives it. Moved: the B.3 per-field cap consts (MAX_ENTITY_FIELD_BYTES, MAX_ENTITY_EXTRA_BYTES, MAX_UNRESOLVED_CALLEE_EXPR_BYTES, MAX_FINDING_SUBCODE_BYTES, MAX_FINDING_SEVERITY_BYTES, MAX_PLUGIN_FINDINGS_PER_FILE) and the free-function validators oversize_field, oversize_edge_field, invalid_unresolved_call_site_reason, validate_plugin_finding (+ private stringify_finding_metadata_value). host.rs re-exports the public caps (`pub use`) so every existing path keeps resolving — crate::plugin::host::MAX_ENTITY_FIELD_BYTES (protocol.rs intra-doc link), the mod.rs surface, and the host.rs test module — and brings the validators into scope for analyze_file to call unqualified. No public API change. Deliberately scoped to the validator layer: analyze_file's four-stage pipeline, the JSON-RPC transport, do_shutdown, read_response_matching, and especially the unsafe pre_exec/setrlimit block in spawn() all stay in host.rs untouched (unsafe_code stays denied except that one documented block). RawEntity/RawEdge/ RawSource stay in host.rs so loomweave-cli's plugin::host::RawSource path and the mod.rs RawEntity/RawEdge re-exports are unaffected. Added 5 direct unit tests in host_validate.rs (subcode-prefix, severity allow-list, anchor_file_path injection, call-site range/caller rejection); the host.rs T8/T9 pipeline integration tests stay in host.rs. 202 core tests green; clippy -D warnings + rustdoc -D warnings clean (intra-doc links resolve). Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-core/src/plugin/host.rs | 236 +------------ .../src/plugin/host_validate.rs | 319 ++++++++++++++++++ crates/loomweave-core/src/plugin/mod.rs | 1 + 3 files changed, 334 insertions(+), 222 deletions(-) create mode 100644 crates/loomweave-core/src/plugin/host_validate.rs diff --git a/crates/loomweave-core/src/plugin/host.rs b/crates/loomweave-core/src/plugin/host.rs index 73d623a9..84b2b9b9 100644 --- a/crates/loomweave-core/src/plugin/host.rs +++ b/crates/loomweave-core/src/plugin/host.rs @@ -42,6 +42,19 @@ pub use super::host_findings::{ FINDING_MALFORMED_UNRESOLVED_CALL_SITE, FINDING_NON_UTF8_PATH, FINDING_UNDECLARED_EDGE_KIND, FINDING_UNDECLARED_KIND, FINDING_UNSUPPORTED_CAPABILITY, HostFinding, }; +// The B.3 per-field caps and the pure validators now live in `host_validate` +// (clarion-2b8811da39). Re-export the public caps so existing paths +// (`crate::plugin::host::MAX_ENTITY_FIELD_BYTES`, protocol.rs intra-doc links, +// the host.rs test module) keep resolving, and bring the validators into scope +// for `analyze_file` to call unqualified. +pub use super::host_validate::{ + MAX_ENTITY_EXTRA_BYTES, MAX_ENTITY_FIELD_BYTES, MAX_FINDING_SEVERITY_BYTES, + MAX_FINDING_SUBCODE_BYTES, MAX_PLUGIN_FINDINGS_PER_FILE, MAX_UNRESOLVED_CALLEE_EXPR_BYTES, +}; +use super::host_validate::{ + invalid_unresolved_call_site_reason, oversize_edge_field, oversize_field, + validate_plugin_finding, +}; use crate::entity_id::{EntityId, EntityIdError, entity_id}; use crate::plugin::jail::{JailError, jail_to_string}; use crate::plugin::limits::{ @@ -62,46 +75,10 @@ use crate::plugin::manifest::{Manifest, ManifestError}; use crate::plugin::protocol::{ AnalyzeFileFinding, AnalyzeFileParams, AnalyzeFileResult, AnalyzeFileStats, EdgeConfidence, ExitNotification, InitializeParams, InitializeResult, InitializedNotification, ProtocolError, - ResponseEnvelope, ResponsePayload, ShutdownParams, UnresolvedCallSite, make_notification, - make_request, + ResponseEnvelope, ResponsePayload, ShutdownParams, make_notification, make_request, }; use crate::plugin::transport::{Frame, TransportError, read_frame, write_frame}; -/// Per-string length cap applied to [`RawEntity::id`], [`RawEntity::kind`], -/// [`RawEntity::qualified_name`], and [`RawSource::file_path`]. -/// -/// 4 KiB is well above any legitimate identifier or path in a real codebase -/// (the Linux `PATH_MAX` is 4096; Python fully-qualified names exceeding 1 KiB -/// are absent from elspeth's 425k LOC baseline). The cap is a trust-boundary -/// check, not a style constraint — pick a value that rejects `DoS` payloads -/// without false-positing on pathological-but-legitimate inputs. -pub const MAX_ENTITY_FIELD_BYTES: usize = 4 * 1024; - -/// Maximum UTF-8 byte length for one unresolved callee expression retained for -/// query-time inferred dispatch. -pub const MAX_UNRESOLVED_CALLEE_EXPR_BYTES: usize = 512; - -/// Maximum plugin-reported findings accepted from one `analyze_file` response. -pub const MAX_PLUGIN_FINDINGS_PER_FILE: usize = 100; - -/// Maximum UTF-8 byte length for one plugin-reported finding subcode. -pub const MAX_FINDING_SUBCODE_BYTES: usize = 128; - -/// Maximum UTF-8 byte length for one plugin-reported severity label. -pub const MAX_FINDING_SEVERITY_BYTES: usize = 32; - -/// Per-entity cap on the total serialised size of the untyped passthrough -/// maps [`RawEntity::extra`] and [`RawSource::extra`]. -/// -/// These flow into `properties_json` downstream (via -/// `loomweave-cli::analyze::map_entity_to_record`) as `serde_json::to_string` -/// output. Without a cap, a plugin could return 8 MiB frames consisting of -/// one tiny `qualified_name` plus a multi-MiB `extra` map that lives in the -/// database row and in every host-side clone until the run ends. 64 KiB is -/// well above any legitimate plugin-declared properties bag (WP3's wardline -/// payload is <2 KiB) while rejecting payload floods. -pub const MAX_ENTITY_EXTRA_BYTES: usize = 64 * 1024; - /// The `RLIMIT_NPROC` ceiling to apply to a plugin child, or `None` to leave /// `RLIMIT_NPROC` uncapped. /// @@ -225,191 +202,6 @@ pub struct RawSource { /// `extra` (serialised) → `source.extra` (serialised). The four scalar /// string fields are bounded by [`MAX_ENTITY_FIELD_BYTES`]; the two /// untyped passthrough maps are bounded by [`MAX_ENTITY_EXTRA_BYTES`]. -/// Per-string and serialised-map oversize check for [`RawEdge`]. -/// Mirrors [`oversize_field`] in spirit: rejects any plugin-controlled string -/// or untyped passthrough map exceeding the B.3 per-field caps. Fields -/// checked in a stable order so the finding deterministically names the -/// first offender for the same input. -fn oversize_edge_field(raw: &RawEdge) -> Option<(&'static str, usize)> { - for (name, len) in [ - ("kind", raw.kind.len()), - ("from_id", raw.from_id.len()), - ("to_id", raw.to_id.len()), - ] { - if len > MAX_ENTITY_FIELD_BYTES { - return Some((name, len)); - } - } - if !raw.extra.is_empty() { - let len = serde_json::to_vec(&raw.extra).map_or(0, |b| b.len()); - if len > MAX_ENTITY_EXTRA_BYTES { - return Some(("extra", len)); - } - } - if let Some(props) = &raw.properties { - let len = serde_json::to_vec(props).map_or(0, |b| b.len()); - if len > MAX_ENTITY_EXTRA_BYTES { - return Some(("properties", len)); - } - } - None -} - -fn oversize_field(raw: &RawEntity) -> Option<(&'static str, usize)> { - for (name, len) in [ - ("id", raw.id.len()), - ("kind", raw.kind.len()), - ("qualified_name", raw.qualified_name.len()), - ("source.file_path", raw.source.file_path.len()), - ] { - if len > MAX_ENTITY_FIELD_BYTES { - return Some((name, len)); - } - } - - // `extra` and `source.extra` flow to `properties_json` downstream. The - // check is by serialised byte length rather than entry count — a single - // entry with a multi-MiB Value is as toxic as many entries each small. - // Serialisation is the next-downstream step anyway (via - // loomweave-cli::analyze::map_entity_to_record), so the to_vec here is not - // an additional allocation beyond what we were already going to pay. - for (name, map) in [("extra", &raw.extra), ("source.extra", &raw.source.extra)] { - if map.is_empty() { - continue; - } - let len = serde_json::to_vec(map).map_or(0, |b| b.len()); - if len > MAX_ENTITY_EXTRA_BYTES { - return Some((name, len)); - } - } - if !raw.tags.is_empty() { - let len = serde_json::to_vec(&raw.tags).map_or(0, |b| b.len()); - if len > MAX_ENTITY_EXTRA_BYTES { - return Some(("tags", len)); - } - } - - None -} - -fn invalid_unresolved_call_site_reason( - site: &UnresolvedCallSite, - accepted_ids: &BTreeSet, - file_len: Option, -) -> Option { - if !accepted_ids.contains(&site.caller_entity_id) { - return Some("caller entity was not accepted for this file".to_owned()); - } - if site.site_ordinal < 0 { - return Some("site_ordinal is negative".to_owned()); - } - if site.source_byte_start < 0 { - return Some("source_byte_start is negative".to_owned()); - } - if site.source_byte_end <= site.source_byte_start { - return Some("source byte range is empty or reversed".to_owned()); - } - if let Some(file_len) = file_len - && site.source_byte_end > file_len - { - return Some("source byte range exceeds analyzed file length".to_owned()); - } - if site.callee_expr.is_empty() { - return Some("callee_expr is empty".to_owned()); - } - if site.callee_expr.len() > MAX_UNRESOLVED_CALLEE_EXPR_BYTES { - return Some(format!( - "callee_expr exceeds {MAX_UNRESOLVED_CALLEE_EXPR_BYTES} bytes" - )); - } - None -} - -fn stringify_finding_metadata_value(value: serde_json::Value) -> Result { - match value { - serde_json::Value::Null => Ok("null".to_owned()), - serde_json::Value::Bool(v) => Ok(v.to_string()), - serde_json::Value::Number(v) => Ok(v.to_string()), - serde_json::Value::String(v) => Ok(v), - serde_json::Value::Array(_) | serde_json::Value::Object(_) => serde_json::to_string(&value) - .map_err(|e| format!("metadata value is not serializable: {e}")), - } -} - -fn validate_plugin_finding( - raw: AnalyzeFileFinding, - rule_id_prefix: &str, - analyzed_path: &Path, -) -> Result { - if raw.subcode.is_empty() { - return Err("subcode is empty".to_owned()); - } - if raw.subcode.len() > MAX_FINDING_SUBCODE_BYTES { - return Err(format!("subcode exceeds {MAX_FINDING_SUBCODE_BYTES} bytes")); - } - if !raw.subcode.starts_with(rule_id_prefix) { - return Err(format!( - "subcode {:?} is outside manifest rule_id_prefix {:?}", - raw.subcode, rule_id_prefix - )); - } - if raw.message.is_empty() { - return Err("message is empty".to_owned()); - } - if raw.message.len() > MAX_ENTITY_FIELD_BYTES { - return Err(format!("message exceeds {MAX_ENTITY_FIELD_BYTES} bytes")); - } - if !raw.metadata.is_empty() { - let len = serde_json::to_vec(&raw.metadata).map_or(0, |bytes| bytes.len()); - if len > MAX_ENTITY_EXTRA_BYTES { - return Err(format!("metadata exceeds {MAX_ENTITY_EXTRA_BYTES} bytes")); - } - } - - let mut metadata = BTreeMap::new(); - if let Some(severity) = raw.severity { - if severity.is_empty() { - return Err("severity is empty".to_owned()); - } - if severity.len() > MAX_FINDING_SEVERITY_BYTES { - return Err(format!( - "severity exceeds {MAX_FINDING_SEVERITY_BYTES} bytes" - )); - } - if !matches!(severity.as_str(), "info" | "warning" | "error") { - return Err(format!("unsupported severity {severity:?}")); - } - metadata.insert("severity".to_owned(), severity); - } - for (key, value) in raw.metadata { - if key.is_empty() { - return Err("metadata key is empty".to_owned()); - } - if key.len() > MAX_ENTITY_FIELD_BYTES { - return Err(format!( - "metadata key exceeds {MAX_ENTITY_FIELD_BYTES} bytes" - )); - } - let value = stringify_finding_metadata_value(value)?; - if value.len() > MAX_ENTITY_FIELD_BYTES { - return Err(format!( - "metadata value for {key:?} exceeds {MAX_ENTITY_FIELD_BYTES} bytes" - )); - } - metadata.insert(key, value); - } - metadata.insert( - "anchor_file_path".to_owned(), - analyzed_path.to_string_lossy().into_owned(), - ); - - Ok(HostFinding::plugin_reported( - raw.subcode, - raw.message, - metadata, - )) -} - /// An entity that has passed all validation checks. /// /// Returned by [`PluginHost::analyze_file`] for each entity that survived the diff --git a/crates/loomweave-core/src/plugin/host_validate.rs b/crates/loomweave-core/src/plugin/host_validate.rs new file mode 100644 index 00000000..b5db661b --- /dev/null +++ b/crates/loomweave-core/src/plugin/host_validate.rs @@ -0,0 +1,319 @@ +//! Pure, self-free validation for plugin-reported entities, edges, unresolved +//! call sites, and findings — the B.3 per-field caps and the finding-shape +//! contract. +//! +//! Carved out of `host.rs` (clarion-2b8811da39) to separate the stateless +//! *validation* layer from the `PluginHost` *transport*/orchestration that +//! drives it. Every function here is a free function with no `self` and no I/O: +//! it inspects a decoded wire value and returns a verdict (an oversize offender, +//! a rejection reason, or a validated `HostFinding`). `PluginHost::analyze_file` +//! calls these as stage-0 (field-size) and finding-shape checks of its +//! four-stage pipeline; the caps and reasons live here so the rules are one +//! reviewable unit, and `host.rs` re-exports the public caps so existing paths +//! (`crate::plugin::host::MAX_ENTITY_FIELD_BYTES`, etc.) keep resolving. + +use std::collections::{BTreeMap, BTreeSet}; +use std::path::Path; + +use crate::plugin::host::{RawEdge, RawEntity}; +use crate::plugin::host_findings::HostFinding; +use crate::plugin::protocol::{AnalyzeFileFinding, UnresolvedCallSite}; + +/// Per-string length cap applied to [`RawEntity::id`], [`RawEntity::kind`], +/// [`RawEntity::qualified_name`], and [`RawSource::file_path`]. +/// +/// 4 KiB is well above any legitimate identifier or path in a real codebase +/// (the Linux `PATH_MAX` is 4096; Python fully-qualified names exceeding 1 KiB +/// are absent from elspeth's 425k LOC baseline). The cap is a trust-boundary +/// check, not a style constraint — pick a value that rejects `DoS` payloads +/// without false-positing on pathological-but-legitimate inputs. +/// +/// [`RawSource::file_path`]: crate::plugin::host::RawSource +pub const MAX_ENTITY_FIELD_BYTES: usize = 4 * 1024; + +/// Maximum UTF-8 byte length for one unresolved callee expression retained for +/// query-time inferred dispatch. +pub const MAX_UNRESOLVED_CALLEE_EXPR_BYTES: usize = 512; + +/// Maximum plugin-reported findings accepted from one `analyze_file` response. +pub const MAX_PLUGIN_FINDINGS_PER_FILE: usize = 100; + +/// Maximum UTF-8 byte length for one plugin-reported finding subcode. +pub const MAX_FINDING_SUBCODE_BYTES: usize = 128; + +/// Maximum UTF-8 byte length for one plugin-reported severity label. +pub const MAX_FINDING_SEVERITY_BYTES: usize = 32; + +/// Per-entity cap on the total serialised size of the untyped passthrough +/// maps [`RawEntity::extra`] and [`RawSource::extra`]. +/// +/// These flow into `properties_json` downstream (via +/// `loomweave-cli::analyze::map_entity_to_record`) as `serde_json::to_string` +/// output. Without a cap, a plugin could return 8 MiB frames consisting of +/// one tiny `qualified_name` plus a multi-MiB `extra` map that lives in the +/// database row and in every host-side clone until the run ends. 64 KiB is +/// well above any legitimate plugin-declared properties bag (WP3's wardline +/// payload is <2 KiB) while rejecting payload floods. +/// +/// [`RawSource::extra`]: crate::plugin::host::RawSource +pub const MAX_ENTITY_EXTRA_BYTES: usize = 64 * 1024; + +/// Per-string and serialised-map oversize check for [`RawEdge`]. +/// Mirrors [`oversize_field`] in spirit: rejects any plugin-controlled string +/// or untyped passthrough map exceeding the B.3 per-field caps. Fields +/// checked in a stable order so the finding deterministically names the +/// first offender for the same input. +pub(crate) fn oversize_edge_field(raw: &RawEdge) -> Option<(&'static str, usize)> { + for (name, len) in [ + ("kind", raw.kind.len()), + ("from_id", raw.from_id.len()), + ("to_id", raw.to_id.len()), + ] { + if len > MAX_ENTITY_FIELD_BYTES { + return Some((name, len)); + } + } + if !raw.extra.is_empty() { + let len = serde_json::to_vec(&raw.extra).map_or(0, |b| b.len()); + if len > MAX_ENTITY_EXTRA_BYTES { + return Some(("extra", len)); + } + } + if let Some(props) = &raw.properties { + let len = serde_json::to_vec(props).map_or(0, |b| b.len()); + if len > MAX_ENTITY_EXTRA_BYTES { + return Some(("properties", len)); + } + } + None +} + +pub(crate) fn oversize_field(raw: &RawEntity) -> Option<(&'static str, usize)> { + for (name, len) in [ + ("id", raw.id.len()), + ("kind", raw.kind.len()), + ("qualified_name", raw.qualified_name.len()), + ("source.file_path", raw.source.file_path.len()), + ] { + if len > MAX_ENTITY_FIELD_BYTES { + return Some((name, len)); + } + } + + // `extra` and `source.extra` flow to `properties_json` downstream. The + // check is by serialised byte length rather than entry count — a single + // entry with a multi-MiB Value is as toxic as many entries each small. + // Serialisation is the next-downstream step anyway (via + // loomweave-cli::analyze::map_entity_to_record), so the to_vec here is not + // an additional allocation beyond what we were already going to pay. + for (name, map) in [("extra", &raw.extra), ("source.extra", &raw.source.extra)] { + if map.is_empty() { + continue; + } + let len = serde_json::to_vec(map).map_or(0, |b| b.len()); + if len > MAX_ENTITY_EXTRA_BYTES { + return Some((name, len)); + } + } + if !raw.tags.is_empty() { + let len = serde_json::to_vec(&raw.tags).map_or(0, |b| b.len()); + if len > MAX_ENTITY_EXTRA_BYTES { + return Some(("tags", len)); + } + } + + None +} + +pub(crate) fn invalid_unresolved_call_site_reason( + site: &UnresolvedCallSite, + accepted_ids: &BTreeSet, + file_len: Option, +) -> Option { + if !accepted_ids.contains(&site.caller_entity_id) { + return Some("caller entity was not accepted for this file".to_owned()); + } + if site.site_ordinal < 0 { + return Some("site_ordinal is negative".to_owned()); + } + if site.source_byte_start < 0 { + return Some("source_byte_start is negative".to_owned()); + } + if site.source_byte_end <= site.source_byte_start { + return Some("source byte range is empty or reversed".to_owned()); + } + if let Some(file_len) = file_len + && site.source_byte_end > file_len + { + return Some("source byte range exceeds analyzed file length".to_owned()); + } + if site.callee_expr.is_empty() { + return Some("callee_expr is empty".to_owned()); + } + if site.callee_expr.len() > MAX_UNRESOLVED_CALLEE_EXPR_BYTES { + return Some(format!( + "callee_expr exceeds {MAX_UNRESOLVED_CALLEE_EXPR_BYTES} bytes" + )); + } + None +} + +fn stringify_finding_metadata_value(value: serde_json::Value) -> Result { + match value { + serde_json::Value::Null => Ok("null".to_owned()), + serde_json::Value::Bool(v) => Ok(v.to_string()), + serde_json::Value::Number(v) => Ok(v.to_string()), + serde_json::Value::String(v) => Ok(v), + serde_json::Value::Array(_) | serde_json::Value::Object(_) => serde_json::to_string(&value) + .map_err(|e| format!("metadata value is not serializable: {e}")), + } +} + +pub(crate) fn validate_plugin_finding( + raw: AnalyzeFileFinding, + rule_id_prefix: &str, + analyzed_path: &Path, +) -> Result { + if raw.subcode.is_empty() { + return Err("subcode is empty".to_owned()); + } + if raw.subcode.len() > MAX_FINDING_SUBCODE_BYTES { + return Err(format!("subcode exceeds {MAX_FINDING_SUBCODE_BYTES} bytes")); + } + if !raw.subcode.starts_with(rule_id_prefix) { + return Err(format!( + "subcode {:?} is outside manifest rule_id_prefix {:?}", + raw.subcode, rule_id_prefix + )); + } + if raw.message.is_empty() { + return Err("message is empty".to_owned()); + } + if raw.message.len() > MAX_ENTITY_FIELD_BYTES { + return Err(format!("message exceeds {MAX_ENTITY_FIELD_BYTES} bytes")); + } + if !raw.metadata.is_empty() { + let len = serde_json::to_vec(&raw.metadata).map_or(0, |bytes| bytes.len()); + if len > MAX_ENTITY_EXTRA_BYTES { + return Err(format!("metadata exceeds {MAX_ENTITY_EXTRA_BYTES} bytes")); + } + } + + let mut metadata = BTreeMap::new(); + if let Some(severity) = raw.severity { + if severity.is_empty() { + return Err("severity is empty".to_owned()); + } + if severity.len() > MAX_FINDING_SEVERITY_BYTES { + return Err(format!( + "severity exceeds {MAX_FINDING_SEVERITY_BYTES} bytes" + )); + } + if !matches!(severity.as_str(), "info" | "warning" | "error") { + return Err(format!("unsupported severity {severity:?}")); + } + metadata.insert("severity".to_owned(), severity); + } + for (key, value) in raw.metadata { + if key.is_empty() { + return Err("metadata key is empty".to_owned()); + } + if key.len() > MAX_ENTITY_FIELD_BYTES { + return Err(format!( + "metadata key exceeds {MAX_ENTITY_FIELD_BYTES} bytes" + )); + } + let value = stringify_finding_metadata_value(value)?; + if value.len() > MAX_ENTITY_FIELD_BYTES { + return Err(format!( + "metadata value for {key:?} exceeds {MAX_ENTITY_FIELD_BYTES} bytes" + )); + } + metadata.insert(key, value); + } + metadata.insert( + "anchor_file_path".to_owned(), + analyzed_path.to_string_lossy().into_owned(), + ); + + Ok(HostFinding::plugin_reported( + raw.subcode, + raw.message, + metadata, + )) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::plugin::protocol::AnalyzeFileFinding; + + fn finding(subcode: &str, message: &str) -> AnalyzeFileFinding { + AnalyzeFileFinding { + subcode: subcode.to_owned(), + message: message.to_owned(), + severity: None, + metadata: BTreeMap::new(), + } + } + + #[test] + fn validate_plugin_finding_rejects_subcode_outside_rule_prefix() { + let err = validate_plugin_finding(finding("OTHER-X", "m"), "PY-", Path::new("a.py")) + .expect_err("subcode outside prefix must be rejected"); + assert!(err.contains("rule_id_prefix"), "{err}"); + } + + #[test] + fn validate_plugin_finding_rejects_unsupported_severity() { + let mut raw = finding("PY-CODE", "m"); + raw.severity = Some("fatal".to_owned()); + let err = validate_plugin_finding(raw, "PY-", Path::new("a.py")) + .expect_err("severity outside {info,warning,error} must be rejected"); + assert!(err.contains("unsupported severity"), "{err}"); + } + + #[test] + fn validate_plugin_finding_injects_anchor_file_path() { + let ok = validate_plugin_finding(finding("PY-CODE", "m"), "PY-", Path::new("pkg/a.py")) + .expect("a well-formed finding validates"); + assert_eq!( + ok.metadata.get("anchor_file_path").map(String::as_str), + Some("pkg/a.py"), + "the analyzed path is recorded as anchor_file_path" + ); + } + + #[test] + fn invalid_unresolved_call_site_reason_rejects_empty_or_reversed_range() { + let mut accepted = BTreeSet::new(); + accepted.insert("caller".to_owned()); + let site = UnresolvedCallSite { + caller_entity_id: "caller".to_owned(), + site_ordinal: 0, + source_byte_start: 10, + source_byte_end: 10, // empty range + callee_expr: "f".to_owned(), + }; + assert_eq!( + invalid_unresolved_call_site_reason(&site, &accepted, Some(100)).as_deref(), + Some("source byte range is empty or reversed"), + ); + } + + #[test] + fn invalid_unresolved_call_site_reason_rejects_unknown_caller() { + let accepted = BTreeSet::new(); + let site = UnresolvedCallSite { + caller_entity_id: "ghost".to_owned(), + site_ordinal: 0, + source_byte_start: 0, + source_byte_end: 1, + callee_expr: "f".to_owned(), + }; + assert_eq!( + invalid_unresolved_call_site_reason(&site, &accepted, None).as_deref(), + Some("caller entity was not accepted for this file"), + ); + } +} diff --git a/crates/loomweave-core/src/plugin/mod.rs b/crates/loomweave-core/src/plugin/mod.rs index cf46643f..dd0b7124 100644 --- a/crates/loomweave-core/src/plugin/mod.rs +++ b/crates/loomweave-core/src/plugin/mod.rs @@ -15,6 +15,7 @@ pub mod breaker; pub mod discovery; pub mod host; mod host_findings; +mod host_validate; pub mod jail; pub mod limits; pub mod manifest; From 1670aa586f4be6c8ccd9bd9ca4b90233c78eb17e Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 20:06:38 +1000 Subject: [PATCH 55/60] feat(federation): weft.toml cross-read sibling-endpoint reader (clarion-164f88c510) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 2 of C-9 (hub-blessed): implement the full cross-read reader against the shared weft.toml [].url schema (proposal §2.4). Step 1 (the schema proposal) shipped in 234fe7f. loomweave-core::store: extend WeftToml to read the allowlisted cross-read `url` from sibling tables ([filigree]/[wardline]/[legis]) and Loomweave's own [loomweave].url; add `pub fn sibling_url(project_root, member)` — fail-soft (absent/malformed/wrong-type/blank → None), never writes weft.toml (Gate weft-eb3dee402f / C-4). Refactored the parse into a shared parse_weft_toml helper; [loomweave].store_dir reading is unchanged. Both endpoint resolvers gain the §2.2 precedence ladder, reporting source: flag/env (WEFT__URL, verbatim) > weft.toml [X].url (verbatim) > on-disk ephemeral.port > configured/default. The operator's durable weft.toml url deliberately outranks on-disk discovery (§2.2: a remote sibling has no local ephemeral.port). New SOURCE_ENV / SOURCE_WEFT_TOML labels surface where the URL came from (project_status/doctor). The env getter is injected (closure) for testability; production passes `|n| std::env::var(n).ok()`. loomweave.yaml stays authoritative for member-private behavior; weft.toml is the operator overlay, never written. Call sites updated: serve.rs + analyze.rs (×2) pass the real env getter; status.rs + doctor.rs likewise; resolver test call sites pass `|_| None`. Tests: store sibling_url (per-member + fail-soft + store_dir-coexistence), filigree_url (env-wins, weft.toml-over-port, blank-env-fallthrough, disabled-not-revived), loomweave_url (env-wins, weft.toml-over-port, blank-env-fallthrough). 102 federation + 73 dependent resolution tests green; clippy -D warnings clean; no migration/version-lockstep impact. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/analyze.rs | 6 +- crates/loomweave-cli/src/doctor.rs | 5 +- crates/loomweave-cli/src/serve.rs | 1 + crates/loomweave-core/src/store.rs | 143 +++++++++++++++--- .../loomweave-federation/src/filigree_url.rs | 131 ++++++++++++++-- .../loomweave-federation/src/loomweave_url.rs | 93 +++++++++++- crates/loomweave-mcp/src/tools/status.rs | 7 +- crates/loomweave-mcp/tests/storage_tools.rs | 8 +- 8 files changed, 345 insertions(+), 49 deletions(-) diff --git a/crates/loomweave-cli/src/analyze.rs b/crates/loomweave-cli/src/analyze.rs index ef0771ff..4c0c6ada 100644 --- a/crates/loomweave-cli/src/analyze.rs +++ b/crates/loomweave-cli/src/analyze.rs @@ -3585,7 +3585,8 @@ async fn post_findings_batch( // Resolve the live Filigree URL (ephemeral port over stale config), the same // resolution `loomweave serve` and `project_status` use. - let resolution = resolve_filigree_url(filigree_cfg, project_root); + let resolution = + resolve_filigree_url(filigree_cfg, project_root, |name| std::env::var(name).ok()); let mut resolved_cfg = filigree_cfg.clone(); if let Some(url) = resolution.resolved_url { resolved_cfg.base_url = url; @@ -3724,7 +3725,8 @@ async fn prune_unseen_findings_in_filigree( // Resolve the live Filigree URL (ephemeral port over stale config), the // same resolution emission uses. - let resolution = resolve_filigree_url(filigree_cfg, project_root); + let resolution = + resolve_filigree_url(filigree_cfg, project_root, |name| std::env::var(name).ok()); let mut resolved_cfg = filigree_cfg.clone(); if let Some(url) = resolution.resolved_url { resolved_cfg.base_url = url; diff --git a/crates/loomweave-cli/src/doctor.rs b/crates/loomweave-cli/src/doctor.rs index 49f7f871..b4c5f044 100644 --- a/crates/loomweave-cli/src/doctor.rs +++ b/crates/loomweave-cli/src/doctor.rs @@ -625,7 +625,10 @@ fn check_http_config_json(project_root: &Path) -> DoctorJsonCheck { } // ADR-044: prefer the live published port over the (now usually absent) // static bind. A running serve publishes .weft/loomweave/ephemeral.port. - let resolution = loomweave_federation::loomweave_url::resolve_loomweave_url(None, project_root); + let resolution = + loomweave_federation::loomweave_url::resolve_loomweave_url(None, project_root, |name| { + std::env::var(name).ok() + }); if let Some(url) = resolution.resolved_url { return DoctorJsonCheck::ok( "http.config", diff --git a/crates/loomweave-cli/src/serve.rs b/crates/loomweave-cli/src/serve.rs index e6b16a3e..000f47ef 100644 --- a/crates/loomweave-cli/src/serve.rs +++ b/crates/loomweave-cli/src/serve.rs @@ -72,6 +72,7 @@ pub fn run(path: &Path, config_path: Option<&Path>) -> Result<()> { let filigree_resolution = loomweave_federation::filigree_url::resolve_filigree_url( &config.integrations.filigree, &project_root, + |name| std::env::var(name).ok(), ); let mut filigree_config = config.integrations.filigree.clone(); if let Some(resolved) = &filigree_resolution.resolved_url { diff --git a/crates/loomweave-core/src/store.rs b/crates/loomweave-core/src/store.rs index c3772c97..4fc0f36c 100644 --- a/crates/loomweave-core/src/store.rs +++ b/crates/loomweave-core/src/store.rs @@ -70,21 +70,7 @@ pub fn db_path(project_root: &Path) -> PathBuf { /// malformed, the `[loomweave]` table or `store_dir` key is absent, or the value /// is blank. fn store_dir_override(project_root: &Path) -> Option { - let raw = std::fs::read_to_string(project_root.join(WEFT_TOML)).ok()?; - // Parse only our own `[loomweave]` table; unknown top-level tables (a - // sibling's section) are ignored by serde's default, so a future `[filigree]` - // never makes this parse reject the file. - let parsed: WeftToml = match toml::from_str(&raw) { - Ok(parsed) => parsed, - Err(err) => { - tracing::debug!( - error = %err, - "weft.toml is malformed; falling back to the default store dir" - ); - return None; - } - }; - let store_dir = parsed.loomweave?.store_dir?; + let store_dir = parse_weft_toml(project_root)?.loomweave?.store_dir?; let trimmed = store_dir.trim(); if trimmed.is_empty() { None @@ -93,17 +79,79 @@ fn store_dir_override(project_root: &Path) -> Option { } } -/// The subset of `weft.toml` Loomweave reads: only its own member-private table. -/// No `deny_unknown_fields` — sibling tables and forward-compatible keys are +/// Read an operator-declared sibling federation endpoint URL from `weft.toml`'s +/// cross-read schema (C-9 shared key layout, blessed; proposal §2.1/§2.2). +/// +/// `member` is the sibling's canonical name — `"filigree"`, `"wardline"`, +/// `"legis"`, or Loomweave's own `"loomweave"` (a member reads its own full +/// `[loomweave]` table, and only the allowlisted `url` from a sibling's table). +/// The single cross-read key in v1 is `url`; everything else under a sibling's +/// table is private to that sibling and ignored here. +/// +/// Returns `None` (fail-soft, never an error, never a write — Gate +/// `weft-eb3dee402f` / C-4) when `weft.toml` is absent or malformed, the +/// `[]` table or its `url` is absent, or the value is blank. This is the +/// `weft.toml` rung of the sibling-endpoint precedence ladder; callers layer a +/// runtime flag/env above it and on-disk `ephemeral.port` discovery below it. +#[must_use] +pub fn sibling_url(project_root: &Path, member: &str) -> Option { + let parsed = parse_weft_toml(project_root)?; + let section = match member { + MEMBER => parsed.loomweave.map(|s| s.url), + "filigree" => parsed.filigree.map(|s| s.url), + "wardline" => parsed.wardline.map(|s| s.url), + "legis" => parsed.legis.map(|s| s.url), + _ => None, + }; + let url = section??; + let trimmed = url.trim(); + if trimmed.is_empty() { + None + } else { + Some(trimmed.to_owned()) + } +} + +/// Read and parse `weft.toml` from the project root, fail-soft. Returns `None` +/// when the file is absent or unparseable. Parses only the tables/keys Loomweave +/// reads; unknown top-level tables and unknown keys are tolerated (no +/// `deny_unknown_fields`), so the file stays forward-compatible as siblings add +/// keys. +fn parse_weft_toml(project_root: &Path) -> Option { + let raw = std::fs::read_to_string(project_root.join(WEFT_TOML)).ok()?; + match toml::from_str(&raw) { + Ok(parsed) => Some(parsed), + Err(err) => { + tracing::debug!(error = %err, "weft.toml is malformed; treating as absent"); + None + } + } +} + +/// The subset of `weft.toml` Loomweave reads: its own member-private table plus +/// the allowlisted cross-read `url` from each sibling's table. No +/// `deny_unknown_fields` — sibling tables and forward-compatible keys are /// deliberately tolerated. #[derive(Debug, Deserialize)] struct WeftToml { loomweave: Option, + filigree: Option, + wardline: Option, + legis: Option, } #[derive(Debug, Deserialize)] struct LoomweaveSection { store_dir: Option, + /// Loomweave's own operator-declared federation endpoint (cross-read by + /// siblings; also the `weft.toml` rung when Loomweave resolves its own URL). + url: Option, +} + +/// A sibling member's table, of which Loomweave reads only the cross-read `url`. +#[derive(Debug, Deserialize)] +struct SiblingSection { + url: Option, } #[cfg(test)] @@ -184,4 +232,65 @@ mod tests { .unwrap(); assert_eq!(store_dir(dir.path()), dir.path().join(".weft/loomweave")); } + + // ---- sibling_url (C-9 cross-read schema) -------------------------------- + + #[test] + fn sibling_url_reads_allowlisted_url_per_member() { + let dir = tempfile::tempdir().unwrap(); + std::fs::write( + dir.path().join(WEFT_TOML), + "[filigree]\nurl = \"http://127.0.0.1:8749\"\n\n\ + [loomweave]\nstore_dir = \"s\"\nurl = \"http://127.0.0.1:9111\"\n\n\ + [wardline]\nurl = \"http://127.0.0.1:7000\"\n", + ) + .unwrap(); + assert_eq!( + sibling_url(dir.path(), "filigree").as_deref(), + Some("http://127.0.0.1:8749") + ); + assert_eq!( + sibling_url(dir.path(), "wardline").as_deref(), + Some("http://127.0.0.1:7000") + ); + // A member reads its OWN [loomweave].url too. + assert_eq!( + sibling_url(dir.path(), MEMBER).as_deref(), + Some("http://127.0.0.1:9111") + ); + // A sibling without a url, an unknown member, and legis (absent) → None. + assert_eq!(sibling_url(dir.path(), "legis"), None); + assert_eq!(sibling_url(dir.path(), "unknown"), None); + } + + #[test] + fn sibling_url_is_fail_soft() { + let dir = tempfile::tempdir().unwrap(); + // Absent weft.toml. + assert_eq!(sibling_url(dir.path(), "filigree"), None); + // Malformed. + std::fs::write(dir.path().join(WEFT_TOML), "not = = toml [[[").unwrap(); + assert_eq!(sibling_url(dir.path(), "filigree"), None); + // Wrong type. + std::fs::write(dir.path().join(WEFT_TOML), "[filigree]\nurl = 123\n").unwrap(); + assert_eq!(sibling_url(dir.path(), "filigree"), None); + // Blank value. + std::fs::write(dir.path().join(WEFT_TOML), "[filigree]\nurl = \" \"\n").unwrap(); + assert_eq!(sibling_url(dir.path(), "filigree"), None); + // Table present, url absent. + std::fs::write(dir.path().join(WEFT_TOML), "[filigree]\nother = 1\n").unwrap(); + assert_eq!(sibling_url(dir.path(), "filigree"), None); + } + + #[test] + fn sibling_url_does_not_disturb_store_dir_reading() { + // The extended schema still reads store_dir correctly alongside urls. + let dir = tempfile::tempdir().unwrap(); + std::fs::write( + dir.path().join(WEFT_TOML), + "[loomweave]\nstore_dir = \"custom/store\"\nurl = \"http://x\"\n", + ) + .unwrap(); + assert_eq!(store_dir(dir.path()), dir.path().join("custom/store")); + } } diff --git a/crates/loomweave-federation/src/filigree_url.rs b/crates/loomweave-federation/src/filigree_url.rs index fed47578..1130e497 100644 --- a/crates/loomweave-federation/src/filigree_url.rs +++ b/crates/loomweave-federation/src/filigree_url.rs @@ -44,6 +44,14 @@ use crate::config::FiligreeConfig; /// by `project_status` (and, per clarion-318f1254eb, `issues_for`) so an agent /// can tell *where* the URL came from without shelling out to probe ports. pub const SOURCE_DISABLED: &str = "disabled"; +/// The runtime environment override `WEFT_FILIGREE_URL` (the C-9 §2.2 rung-2 +/// `WEFT__URL` spelling) — a per-process operator declaration that outranks +/// every durable/on-disk source. +pub const SOURCE_ENV: &str = "env:WEFT_FILIGREE_URL"; +/// The operator-declared durable endpoint `weft.toml [filigree].url` (C-9 §2.2 +/// rung-3). Outranks on-disk port discovery: it is the operator's explicit +/// "Filigree is here" (e.g. a remote host with no local `ephemeral.port`). +pub const SOURCE_WEFT_TOML: &str = "weft.toml"; /// The live ethereal port published by Filigree's running dashboard at the /// consolidated `.weft/filigree/` location — the only location read (ADR-046). pub const SOURCE_EPHEMERAL_PORT: &str = ".weft/filigree/ephemeral.port"; @@ -63,17 +71,35 @@ pub struct FiligreeUrlResolution { pub source: &'static str, } -/// Resolve the Filigree read-API base URL, preferring the live ethereal port. +/// Resolve the Filigree read-API base URL along the C-9 §2.2 precedence ladder. /// -/// - Disabled → no resolved URL, `source = "disabled"`. -/// - A valid `/.weft/filigree/ephemeral.port` → the configured URL -/// with its port overridden by the live port, -/// `source = ".weft/filigree/ephemeral.port"`. -/// - Otherwise → the configured URL unchanged, `source = "config"`. A port file -/// present only at the pre-consolidation `.filigree/` path is **not** read; it -/// folds here, so a mis-sequenced cutover is visible (not a stale resolve). +/// Highest wins, after the enabled short-circuit: +/// 1. `WEFT_FILIGREE_URL` env (`getenv`) → `source = "env:WEFT_FILIGREE_URL"`, +/// used verbatim — a per-process operator override. +/// 2. `weft.toml [filigree].url` → `source = "weft.toml"`, used verbatim — the +/// operator's durable declaration (e.g. a remote Filigree with no local +/// `ephemeral.port`). Outranks on-disk discovery by design (§2.2). +/// 3. A valid `/.weft/filigree/ephemeral.port` → the configured +/// URL with its port overridden by the live port, +/// `source = ".weft/filigree/ephemeral.port"`. +/// 4. Otherwise → the configured URL unchanged, `source = "config"`. A port file +/// present only at the pre-consolidation `.filigree/` path is **not** read; +/// it folds here, so a mis-sequenced cutover is visible (not a stale +/// resolve). +/// +/// - Disabled → no resolved URL, `source = "disabled"` (the env/weft.toml rungs +/// do not revive a disabled integration). +/// +/// `getenv` is injected (rather than reading `std::env` directly) so the rung is +/// testable without mutating process env; production passes +/// `|name| std::env::var(name).ok()`. Both the env and `weft.toml` rungs are +/// fail-soft: a blank/absent value falls through to the next rung. #[must_use] -pub fn resolve_filigree_url(config: &FiligreeConfig, project_root: &Path) -> FiligreeUrlResolution { +pub fn resolve_filigree_url( + config: &FiligreeConfig, + project_root: &Path, + getenv: impl Fn(&str) -> Option, +) -> FiligreeUrlResolution { let configured_url = config.base_url.clone(); if !config.enabled { return FiligreeUrlResolution { @@ -83,6 +109,25 @@ pub fn resolve_filigree_url(config: &FiligreeConfig, project_root: &Path) -> Fil source: SOURCE_DISABLED, }; } + // Rung 1: WEFT_FILIGREE_URL env, used verbatim. + if let Some(url) = getenv("WEFT_FILIGREE_URL").filter(|u| !u.trim().is_empty()) { + return FiligreeUrlResolution { + enabled: true, + configured_url, + resolved_url: Some(url.trim().to_owned()), + source: SOURCE_ENV, + }; + } + // Rung 2: weft.toml [filigree].url, used verbatim (outranks on-disk port). + if let Some(url) = loomweave_core::store::sibling_url(project_root, "filigree") { + return FiligreeUrlResolution { + enabled: true, + configured_url, + resolved_url: Some(url), + source: SOURCE_WEFT_TOML, + }; + } + // Rung 3: live ethereal port overrides the configured URL's port. match read_ephemeral_port(project_root) { Some((port, source)) => { let resolved = override_port(&configured_url, port); @@ -93,6 +138,7 @@ pub fn resolve_filigree_url(config: &FiligreeConfig, project_root: &Path) -> Fil source, } } + // Rung 4: configured base_url unchanged. None => FiligreeUrlResolution { enabled: true, resolved_url: Some(configured_url.clone()), @@ -181,7 +227,7 @@ mod tests { fn disabled_integration_resolves_nothing() { let dir = tempfile::tempdir().unwrap(); let config = FiligreeConfig::default(); // enabled: false - let res = resolve_filigree_url(&config, dir.path()); + let res = resolve_filigree_url(&config, dir.path(), |_| None); assert!(!res.enabled); assert_eq!(res.resolved_url, None); assert_eq!(res.source, SOURCE_DISABLED); @@ -194,7 +240,7 @@ mod tests { // 8542 per the consolidated .weft/filigree/ephemeral.port. let dir = tempfile::tempdir().unwrap(); write_weft_port_file(dir.path(), "8542\n"); - let res = resolve_filigree_url(&enabled_config(), dir.path()); + let res = resolve_filigree_url(&enabled_config(), dir.path(), |_| None); assert!(res.enabled); assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:8542")); assert_eq!(res.source, SOURCE_EPHEMERAL_PORT); @@ -211,7 +257,7 @@ mod tests { // dir (the lacuna-401 wrong-but-quiet-resolve failure mode). let dir = tempfile::tempdir().unwrap(); write_legacy_port_file(dir.path(), "8542\n"); - let res = resolve_filigree_url(&enabled_config(), dir.path()); + let res = resolve_filigree_url(&enabled_config(), dir.path(), |_| None); assert_eq!(res.source, SOURCE_CONFIG); assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:8766")); } @@ -219,7 +265,7 @@ mod tests { #[test] fn falls_back_to_configured_url_when_no_port_file() { let dir = tempfile::tempdir().unwrap(); - let res = resolve_filigree_url(&enabled_config(), dir.path()); + let res = resolve_filigree_url(&enabled_config(), dir.path(), |_| None); assert!(res.enabled); assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:8766")); assert_eq!(res.source, SOURCE_CONFIG); @@ -229,7 +275,7 @@ mod tests { fn corrupt_port_file_folds_to_configured_url() { let dir = tempfile::tempdir().unwrap(); write_weft_port_file(dir.path(), "not-a-port"); - let res = resolve_filigree_url(&enabled_config(), dir.path()); + let res = resolve_filigree_url(&enabled_config(), dir.path(), |_| None); assert_eq!(res.source, SOURCE_CONFIG); assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:8766")); } @@ -238,7 +284,7 @@ mod tests { fn zero_port_is_rejected_as_corrupt() { let dir = tempfile::tempdir().unwrap(); write_weft_port_file(dir.path(), "0"); - let res = resolve_filigree_url(&enabled_config(), dir.path()); + let res = resolve_filigree_url(&enabled_config(), dir.path(), |_| None); assert_eq!(res.source, SOURCE_CONFIG); } @@ -262,4 +308,59 @@ mod tests { fn override_port_returns_input_without_scheme() { assert_eq!(override_port("127.0.0.1:8766", 8542), "127.0.0.1:8766"); } + + fn write_weft_url(root: &Path, member: &str, url: &str) { + std::fs::write( + root.join("weft.toml"), + format!("[{member}]\nurl = \"{url}\"\n"), + ) + .unwrap(); + } + + #[test] + fn env_url_wins_verbatim_over_everything() { + let dir = tempfile::tempdir().unwrap(); + // A live port AND a weft.toml url are present; the env override still wins. + write_weft_port_file(dir.path(), "8542\n"); + write_weft_url(dir.path(), "filigree", "http://weft-host:1234"); + let res = resolve_filigree_url(&enabled_config(), dir.path(), |name| { + (name == "WEFT_FILIGREE_URL").then(|| "http://env-host:9000".to_owned()) + }); + assert_eq!(res.resolved_url.as_deref(), Some("http://env-host:9000")); + assert_eq!(res.source, SOURCE_ENV); + } + + #[test] + fn weft_toml_url_wins_verbatim_over_live_port() { + // The operator's durable declaration (e.g. a remote Filigree) outranks + // the on-disk live port (§2.2 rung-3 above rung-4). + let dir = tempfile::tempdir().unwrap(); + write_weft_port_file(dir.path(), "8542\n"); + write_weft_url(dir.path(), "filigree", "http://remote-host:8749"); + let res = resolve_filigree_url(&enabled_config(), dir.path(), |_| None); + assert_eq!(res.resolved_url.as_deref(), Some("http://remote-host:8749")); + assert_eq!(res.source, SOURCE_WEFT_TOML); + } + + #[test] + fn blank_env_falls_through_to_lower_rungs() { + let dir = tempfile::tempdir().unwrap(); + write_weft_port_file(dir.path(), "8542\n"); + let res = resolve_filigree_url(&enabled_config(), dir.path(), |_| Some(" ".to_owned())); + // Blank env is skipped; the live port resolves. + assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:8542")); + assert_eq!(res.source, SOURCE_EPHEMERAL_PORT); + } + + #[test] + fn disabled_is_not_revived_by_env_or_weft_toml() { + let dir = tempfile::tempdir().unwrap(); + write_weft_url(dir.path(), "filigree", "http://remote-host:8749"); + let res = resolve_filigree_url(&FiligreeConfig::default(), dir.path(), |_| { + Some("http://env-host:9000".to_owned()) + }); + assert!(!res.enabled); + assert_eq!(res.resolved_url, None); + assert_eq!(res.source, SOURCE_DISABLED); + } } diff --git a/crates/loomweave-federation/src/loomweave_url.rs b/crates/loomweave-federation/src/loomweave_url.rs index 85dd9cd8..473a01b6 100644 --- a/crates/loomweave-federation/src/loomweave_url.rs +++ b/crates/loomweave-federation/src/loomweave_url.rs @@ -12,6 +12,13 @@ use std::path::Path; use crate::loomweave_port::read_published_port; +/// The runtime environment override `WEFT_LOOMWEAVE_URL` (C-9 §2.2 rung-2 +/// `WEFT__URL`) — a per-process operator declaration above every durable source. +pub const SOURCE_ENV: &str = "env:WEFT_LOOMWEAVE_URL"; +/// The operator-declared durable endpoint `weft.toml [loomweave].url` (C-9 §2.2 +/// rung-3). Outranks on-disk port discovery: the operator's explicit +/// "Loomweave is here" (e.g. a remote host with no local `ephemeral.port`). +pub const SOURCE_WEFT_TOML: &str = "weft.toml"; /// The live published port file `.weft/loomweave/ephemeral.port`. pub const SOURCE_EPHEMERAL_PORT: &str = ".weft/loomweave/ephemeral.port"; /// A statically configured URL (e.g. `wardline.yaml: loomweave.url`). @@ -28,20 +35,50 @@ pub struct LoomweaveUrlResolution { pub source: &'static str, } -/// Resolve the read-API URL, preferring the live published port over the -/// configured URL. `configured_url` is the consumer's static fallback (pass -/// `None` if it has none). +/// Resolve the Loomweave read-API URL along the C-9 §2.2 precedence ladder. +/// +/// Highest wins: +/// 1. `WEFT_LOOMWEAVE_URL` env (`getenv`) → `source = "env:WEFT_LOOMWEAVE_URL"`, +/// verbatim. +/// 2. `weft.toml [loomweave].url` → `source = "weft.toml"`, verbatim — the +/// operator's durable declaration; outranks on-disk discovery (§2.2). +/// 3. The live published `.weft/loomweave/ephemeral.port` → `http://127.0.0.1:`. +/// 4. `configured_url` (the consumer's static fallback) → `source = "config"`. +/// 5. Nothing → `None`, `source = "none"`. +/// +/// `getenv` is injected for testability; production passes +/// `|name| std::env::var(name).ok()`. Every rung is fail-soft: a blank/absent +/// value falls through to the next. #[must_use] pub fn resolve_loomweave_url( configured_url: Option<&str>, project_root: &Path, + getenv: impl Fn(&str) -> Option, ) -> LoomweaveUrlResolution { + // Rung 1: WEFT_LOOMWEAVE_URL env, verbatim. + if let Some(url) = getenv("WEFT_LOOMWEAVE_URL").filter(|u| !u.trim().is_empty()) { + return LoomweaveUrlResolution { + resolved_url: Some(url.trim().to_owned()), + source: SOURCE_ENV, + }; + } + // Rung 2: weft.toml [loomweave].url, verbatim (outranks on-disk port). + if let Some(url) = + loomweave_core::store::sibling_url(project_root, loomweave_core::store::MEMBER) + { + return LoomweaveUrlResolution { + resolved_url: Some(url), + source: SOURCE_WEFT_TOML, + }; + } + // Rung 3: live published port. if let Some(port) = read_published_port(project_root) { return LoomweaveUrlResolution { resolved_url: Some(format!("http://127.0.0.1:{port}")), source: SOURCE_EPHEMERAL_PORT, }; } + // Rung 4/5: configured fallback, else nothing. match configured_url { Some(url) if !url.trim().is_empty() => LoomweaveUrlResolution { resolved_url: Some(url.to_owned()), @@ -63,7 +100,7 @@ mod tests { fn published_port_beats_configured_url() { let dir = tempfile::tempdir().unwrap(); publish_port(dir.path(), 9412).unwrap(); - let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path(), |_| None); assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:9412")); assert_eq!(res.source, SOURCE_EPHEMERAL_PORT); } @@ -71,7 +108,7 @@ mod tests { #[test] fn falls_back_to_configured_url_when_no_file() { let dir = tempfile::tempdir().unwrap(); - let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path(), |_| None); assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:9111")); assert_eq!(res.source, SOURCE_CONFIG); } @@ -82,14 +119,14 @@ mod tests { let store = loomweave_core::store::store_dir(dir.path()); std::fs::create_dir_all(&store).unwrap(); std::fs::write(store.join("ephemeral.port"), "not-a-port").unwrap(); - let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path(), |_| None); assert_eq!(res.source, SOURCE_CONFIG); } #[test] fn nothing_resolves_to_none() { let dir = tempfile::tempdir().unwrap(); - let res = resolve_loomweave_url(None, dir.path()); + let res = resolve_loomweave_url(None, dir.path(), |_| None); assert_eq!(res.resolved_url, None); assert_eq!(res.source, SOURCE_NONE); } @@ -97,8 +134,48 @@ mod tests { #[test] fn blank_config_with_no_file_resolves_to_none() { let dir = tempfile::tempdir().unwrap(); - let res = resolve_loomweave_url(Some(" "), dir.path()); + let res = resolve_loomweave_url(Some(" "), dir.path(), |_| None); assert_eq!(res.resolved_url, None); assert_eq!(res.source, SOURCE_NONE); } + + fn write_weft_loomweave_url(root: &Path, url: &str) { + std::fs::write( + root.join("weft.toml"), + format!("[loomweave]\nurl = \"{url}\"\n"), + ) + .unwrap(); + } + + #[test] + fn env_url_wins_verbatim_over_published_port_and_weft_toml() { + let dir = tempfile::tempdir().unwrap(); + publish_port(dir.path(), 9412).unwrap(); + write_weft_loomweave_url(dir.path(), "http://weft-host:1234"); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path(), |name| { + (name == "WEFT_LOOMWEAVE_URL").then(|| "http://env-host:9000".to_owned()) + }); + assert_eq!(res.resolved_url.as_deref(), Some("http://env-host:9000")); + assert_eq!(res.source, SOURCE_ENV); + } + + #[test] + fn weft_toml_url_wins_verbatim_over_published_port() { + // Operator's durable [loomweave].url outranks the live local port (§2.2). + let dir = tempfile::tempdir().unwrap(); + publish_port(dir.path(), 9412).unwrap(); + write_weft_loomweave_url(dir.path(), "http://remote-host:9111"); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path(), |_| None); + assert_eq!(res.resolved_url.as_deref(), Some("http://remote-host:9111")); + assert_eq!(res.source, SOURCE_WEFT_TOML); + } + + #[test] + fn blank_env_falls_through_to_published_port() { + let dir = tempfile::tempdir().unwrap(); + publish_port(dir.path(), 9412).unwrap(); + let res = resolve_loomweave_url(None, dir.path(), |_| Some(" ".to_owned())); + assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:9412")); + assert_eq!(res.source, SOURCE_EPHEMERAL_PORT); + } } diff --git a/crates/loomweave-mcp/src/tools/status.rs b/crates/loomweave-mcp/src/tools/status.rs index 9cb32942..70b55abd 100644 --- a/crates/loomweave-mcp/src/tools/status.rs +++ b/crates/loomweave-mcp/src/tools/status.rs @@ -440,8 +440,11 @@ impl ServerState { /// same). Pass `None` config — `project_status` has no static loomweave URL /// of its own; this surfaces whether serve is currently publishing. pub(crate) fn loomweave_read_api_json(&self) -> Value { - let resolution = - loomweave_federation::loomweave_url::resolve_loomweave_url(None, &self.project_root); + let resolution = loomweave_federation::loomweave_url::resolve_loomweave_url( + None, + &self.project_root, + |name| std::env::var(name).ok(), + ); json!({ "resolved_url": resolution.resolved_url, "resolution_source": resolution.source, diff --git a/crates/loomweave-mcp/tests/storage_tools.rs b/crates/loomweave-mcp/tests/storage_tools.rs index 8b5c6f1f..f563c38b 100644 --- a/crates/loomweave-mcp/tests/storage_tools.rs +++ b/crates/loomweave-mcp/tests/storage_tools.rs @@ -1213,7 +1213,7 @@ async fn issues_for_reports_resolved_endpoint_and_result_kind() { allow_live_provider: false, cache_max_age_days: 180, }, - filigree: resolve_filigree_url(&config, project.path()), + filigree: resolve_filigree_url(&config, project.path(), |_| None), }; // Reachable but no associations for this entity -> no_matches. @@ -2441,7 +2441,7 @@ async fn status_surfaces_agree_on_allow_live_provider_when_half_configured() { allow_live_provider: true, // configured-but-inert cache_max_age_days: 180, }, - filigree: resolve_filigree_url(&FiligreeConfig::default(), project.path()), + filigree: resolve_filigree_url(&FiligreeConfig::default(), project.path(), |_| None), }; let state = state_for(project.path(), &db_path).with_diagnostics(diagnostics); @@ -5138,7 +5138,7 @@ async fn project_status_resolves_live_filigree_endpoint() { allow_live_provider: false, cache_max_age_days: 180, }, - filigree: resolve_filigree_url(&config, project.path()), + filigree: resolve_filigree_url(&config, project.path(), |_| None), }; let state = state_for(project.path(), &db_path).with_diagnostics(diagnostics); @@ -5170,7 +5170,7 @@ async fn project_status_filigree_falls_back_to_config_without_port_file() { allow_live_provider: true, cache_max_age_days: 7, }, - filigree: resolve_filigree_url(&config, project.path()), + filigree: resolve_filigree_url(&config, project.path(), |_| None), }; let state = state_for(project.path(), &db_path).with_diagnostics(diagnostics); let envelope = call_tool(&state, "project_status", json!({})).await; From aba5c670de23ff8a5fa4d9f8bfd16d889cbb3e9c Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 20:10:04 +1000 Subject: [PATCH 56/60] docs(storage): fix broken intra-doc link in reject_unmigrated_for_read The new reject_unmigrated_for_read doc (c326, commit 9987982) linked a non-existent `set_user_version`; the user_version writer is the private apply_user_version, reached via the public apply_migrations. Reference apply_migrations so `cargo doc -D warnings` passes. Doc-only. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-storage/src/schema.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/loomweave-storage/src/schema.rs b/crates/loomweave-storage/src/schema.rs index cdae0114..e3e92eb3 100644 --- a/crates/loomweave-storage/src/schema.rs +++ b/crates/loomweave-storage/src/schema.rs @@ -148,7 +148,7 @@ pub fn verify_user_version(conn: &Connection) -> Result<()> { /// Reject an *unmigrated* database (`user_version = 0`) at read-open time. /// -/// [`set_user_version`] / [`apply_migrations`] stamp +/// [`apply_migrations`] stamps /// `user_version = CURRENT_SCHEMA_VERSION`, so a genuine Loomweave index — even /// one with zero entities (installed but not yet analyzed) — reads back a /// non-zero version. A `user_version` of 0 means no Loomweave schema was ever From a8637514c2e0842002c41f3cc35f73039b67316c Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 20:17:08 +1000 Subject: [PATCH 57/60] docs: correct resolver precedence docs (164f) + document store_dir skip-list constraint (clarion-6dd4b8bb85) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 164f follow-up: the loomweave_url / filigree_url module docstrings still described the pre-164f behaviour — loomweave_url.rs flatly said the explicit flag/env precedence was "each consumer's own job, not this library function", which the 164f change contradicted (the function now resolves WEFT__URL + weft.toml [X].url itself). Rewrite both module headers to state the C-9 §2.2 ladder (env > weft.toml > ephemeral.port > configured) and note it supersedes the ADR-044 consumer-only division for these resolvers. Also fix store.rs's module doc, which my sibling_url addition made stale ("reads only its own [loomweave] table" — it now also reads the allowlisted sibling url). Doc-only; rustdoc -D warnings + fmt green. clarion-6dd4b8bb85 (resolve via the ticket's sanctioned option b — document the constraint): the source-walk / secret-scan / pyright skip-lists exclude the whole .weft/ dotdir, so a [loomweave].store_dir override must stay within .weft/ or else be placed entirely outside the analyzed root; an override under the analyzed tree but outside .weft/ would get loomweave.db walked/secret-scanned as source. Documented in store.rs (ADR-046 Consequences). Auto-excluding an arbitrary override location (option a) was considered and rejected as not worth the coupling — the recommended override stays within .weft/. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-core/src/store.rs | 18 +++++++++++++-- .../loomweave-federation/src/filigree_url.rs | 7 ++++++ .../loomweave-federation/src/loomweave_url.rs | 23 ++++++++++++++----- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/crates/loomweave-core/src/store.rs b/crates/loomweave-core/src/store.rs index 4fc0f36c..cebd6f6e 100644 --- a/crates/loomweave-core/src/store.rs +++ b/crates/loomweave-core/src/store.rs @@ -17,13 +17,27 @@ //! member-private `[loomweave].store_dir` key (the canonical store-relocation key //! across the federation). `weft.toml` is **read-only** to Loomweave — install, //! doctor, and the CLI never write it (Gate `weft-eb3dee402f`: never add a writer -//! to a shared multi-section file). Loomweave reads **only its own -//! `[loomweave]` table**; every other top-level table (a sibling's section) is +//! to a shared multi-section file). Loomweave reads its own `[loomweave]` table +//! in full, plus the allowlisted cross-read `url` key from a sibling's table +//! (see [`sibling_url`], C-9 §2.1); every other key in a sibling's section is //! ignored, so the file stays forward-compatible as siblings add their own keys. //! //! Resolution is fail-soft (C-9c, normative): a missing OR malformed `weft.toml` //! — parse error, wrong type, absent table/key, blank value — is treated as //! absent, and the built-in default applies. It is never a hard failure. +//! +//! ### Override location constraint (`store_dir`) +//! +//! The source-walk, secret-scan, and pyright skip-lists exclude the whole +//! `.weft/` dotdir, so a store kept at the default (or any `store_dir` *inside* +//! `.weft/`) is never walked, scanned, or type-checked as project source. A +//! `store_dir` override is therefore **required to stay within `.weft/`, or else +//! be placed entirely outside the analyzed project root.** An override pointing +//! at a path *under the analyzed tree but outside `.weft/`* is a misconfiguration: +//! `loomweave.db` and its WAL would be walked and secret-scanned as if they were +//! source (clarion-6dd4b8bb85, ADR-046 Consequences). Auto-excluding an arbitrary +//! override location was considered and rejected as not worth the coupling — the +//! recommended override stays within `.weft/`. use std::path::{Path, PathBuf}; diff --git a/crates/loomweave-federation/src/filigree_url.rs b/crates/loomweave-federation/src/filigree_url.rs index 1130e497..bc6ddc8c 100644 --- a/crates/loomweave-federation/src/filigree_url.rs +++ b/crates/loomweave-federation/src/filigree_url.rs @@ -29,6 +29,13 @@ //! file is fail-soft: any missing/corrupt/out-of-range content degrades to the //! configured URL. //! +//! Precedence (C-9 §2.2, highest wins; see [`resolve_filigree_url`] for the +//! full contract): `WEFT_FILIGREE_URL` env → `weft.toml [filigree].url` → +//! `.weft/filigree/ephemeral.port` → configured `base_url`. The operator's +//! durable env / `weft.toml` declarations (used verbatim) sit *above* on-disk +//! port discovery — they name a Filigree that may be remote, with no local port +//! file. Every rung is fail-soft. +//! //! Scope: ethereal mode only. Filigree's `server` mode resolves through a //! home-directory global (`~/.config/filigree/server.json`); that path is not //! exercised here and is left as a known gap (clarion-318f1254eb tracks the diff --git a/crates/loomweave-federation/src/loomweave_url.rs b/crates/loomweave-federation/src/loomweave_url.rs index 473a01b6..5d6b44e7 100644 --- a/crates/loomweave-federation/src/loomweave_url.rs +++ b/crates/loomweave-federation/src/loomweave_url.rs @@ -1,12 +1,23 @@ //! Resolve the live Loomweave read-API base URL (ADR-044). //! //! The reference reader of the `.weft/loomweave/ephemeral.port` file contract and -//! the twin of [`crate::filigree_url`]. Precedence (consumer-side): the -//! published live port wins over a configured URL, which wins over nothing. -//! (ADR-044's higher "explicit flag/env" precedence level is realized by each -//! consumer's own CLI/env handling — e.g. Wardline's `--loomweave-url` — not by -//! this library function.) Fail-soft throughout: a missing/corrupt file folds -//! to the configured URL; absent both, `None` (federation simply degrades). +//! the twin of [`crate::filigree_url`]. Resolution walks the C-9 §2.2 +//! precedence ladder (highest wins), reporting which rung produced the URL: +//! 1. `WEFT_LOOMWEAVE_URL` env (a per-process operator override), verbatim; +//! 2. `weft.toml [loomweave].url` (the operator's durable declaration), +//! verbatim — deliberately above on-disk discovery, since a remote +//! Loomweave has no local `ephemeral.port`; +//! 3. the published `.weft/loomweave/ephemeral.port`; +//! 4. the consumer's configured URL; else +//! 5. nothing (`None`). +//! +//! This supersedes the earlier ADR-044 division of labour (where the explicit +//! flag/env rung was each consumer's own job and this function read only the +//! port file): the env + `weft.toml` rungs are now resolved here, with the env +//! getter injected so the rung stays testable. A runtime flag (e.g. Wardline's +//! `--loomweave-url`) still sits above all of these and is applied by the +//! consumer before calling. Fail-soft throughout: a blank/absent/corrupt value +//! at any rung falls through to the next (federation simply degrades). use std::path::Path; From ce029585ba4f9af734240220f8805974aac9632f Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Mon, 8 Jun 2026 21:00:07 +1000 Subject: [PATCH 58/60] test(secret-scan): lock briefing_blocked-survives-reanalysis invariant (clarion-3b87a7b174) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit briefing_blocked is a VIRTUAL generated column over the mutable properties JSON (migration 0002), re-derived from the secret scanner every run; the writer-actor upsert replaces properties wholesale (writer.rs:687) with no read-back. So a secret-bearing entity stays withheld only because every producer re-asserts the block each run. That invariant held on all HEAD paths (pre_ingest is unconditional incl. --resume; secret files are carved out of incremental skip; all three producers fail-closed via or_else(UnscannedSource)) but was UNTESTED — fragile to a future producer that rewrites a secret file's properties without re-stamping the block. Add still_secret_stays_blocked_across_reanalysis: a file that STILL contains a secret must keep briefing_blocked across (1) a full re-analysis that rewrites properties (changed body), (2) an incremental skip, and (3) --resume. Verified to bite by mutating a producer to drop the block (RED), then reverting (GREEN). Investigation found the headline "silent un-block" does not reproduce on HEAD; the only un-blocks are operator-initiated and tested-as-correct (baseline / --allow-unredacted-secrets). Reframed to hardening; no migration. A separate read-side gap (MCP orientation/find/neighborhood/semantic-search leak blocked-entity identity with no gate) was filed as clarion-307668e2be. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/tests/secret_scan.rs | 115 ++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/crates/loomweave-cli/tests/secret_scan.rs b/crates/loomweave-cli/tests/secret_scan.rs index ae0dcc24..52b3385a 100644 --- a/crates/loomweave-cli/tests/secret_scan.rs +++ b/crates/loomweave-cli/tests/secret_scan.rs @@ -309,6 +309,121 @@ fn resume_does_not_duplicate_secret_findings() { assert_eq!(run_rows, 1, "resume reuses the run row"); } +#[test] +fn still_secret_stays_blocked_across_reanalysis() { + // Regression for clarion-3b87a7b174: a file that STILL contains a secret + // must keep `briefing_blocked` across every re-analysis path. The flag has + // no durable storage — `entities.briefing_blocked` (migration 0002) is a + // VIRTUAL generated column over the mutable `properties` JSON, re-derived + // from the scanner each run (writer.rs upserts `properties = excluded. + // properties` wholesale). So the only thing keeping a secret entity withheld + // is that every producer re-asserts the block on every run. This locks the + // invariant `still-secret ⇒ every entity of the file stays briefing_blocked` + // across (1) a full re-analysis that rewrites properties (changed body), + // (2) an incremental skip (unchanged), and (3) `--resume`. A future producer + // that rewrites a secret file's properties without re-stamping the block + // would silently re-expose it to briefings/federation; this test fails first. + let project = tempfile::tempdir().unwrap(); + let plugin = tempfile::tempdir().unwrap(); + write_secret_fixture_plugin(plugin.path()); + install_project(project.path()); + let leaky = project.path().join("leaky.sec"); + std::fs::write(&leaky, b"aws_access_key_id = 'AKIAIOSFODNN7EXAMPLE'\n").unwrap(); + + let analyze = || { + loomweave_bin() + .arg("analyze") + .arg(project.path()) + .env("PATH", plugin_path(plugin.path())) + .assert() + .success(); + }; + + // (total entities for the file, of which how many are NOT blocked). + let census = || { + let db = conn(project.path()); + let total: i64 = db + .query_row( + "SELECT COUNT(*) FROM entities WHERE source_file_path LIKE '%leaky.sec'", + [], + |row| row.get(0), + ) + .unwrap(); + let unblocked: i64 = db + .query_row( + "SELECT COUNT(*) FROM entities \ + WHERE source_file_path LIKE '%leaky.sec' \ + AND json_extract(properties, '$.briefing_blocked') IS NULL", + [], + |row| row.get(0), + ) + .unwrap(); + (total, unblocked) + }; + + // Run 1 — fresh analyze. The secret-bearing file is withheld. + analyze(); + let (total, unblocked) = census(); + assert!( + total >= 1, + "the secret file must produce at least one entity" + ); + assert_eq!( + unblocked, 0, + "run 1: every entity of the secret file must be briefing_blocked", + ); + + // Run 2 — FULL re-analysis with a changed body that STILL contains the + // secret. The changed hash forces a non-skip, so the plugin re-runs and the + // writer rewrites `properties` wholesale: this is the exact properties- + // rewrite path the ticket worried about. The block must be re-asserted. + std::fs::write( + &leaky, + b"# key rotated, value unchanged\naws_access_key_id = 'AKIAIOSFODNN7EXAMPLE'\n", + ) + .unwrap(); + analyze(); + let (total, unblocked) = census(); + assert!(total >= 1); + assert_eq!( + unblocked, 0, + "run 2 (full re-analysis, properties rewritten, secret remains): \ + every entity must stay briefing_blocked", + ); + + // Run 3 — incremental skip (file unchanged since run 2). The rows are left + // untouched, so the block persists; lock it so a skip-path regression bites. + analyze(); + let (_total, unblocked) = census(); + assert_eq!( + unblocked, 0, + "run 3 (incremental skip, secret remains): block must persist", + ); + + // Run 4 — `--resume` the latest run. Resume re-runs the unconditional + // secret scan (it only changes finding `mark_unseen`), so the block must + // survive a resumed re-analysis too. + let run_id: String = conn(project.path()) + .query_row( + "SELECT id FROM runs ORDER BY started_at DESC LIMIT 1", + [], + |r| r.get(0), + ) + .unwrap(); + loomweave_bin() + .arg("analyze") + .args(["--resume", &run_id]) + .arg(project.path()) + .env("PATH", plugin_path(plugin.path())) + .assert() + .success(); + let (_total, unblocked) = census(); + assert_eq!( + unblocked, 0, + "run 4 (--resume, secret remains): block must survive a resumed re-analysis", + ); +} + #[test] fn dotenv_sidecar_persists_finding_with_core_file_anchor() { let project = tempfile::tempdir().unwrap(); From 1015274f289d5119cf95e62ab2f9ef86b89d6280 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Tue, 9 Jun 2026 06:44:54 +1000 Subject: [PATCH 59/60] fix(storage): entity_find content+substring recall so discovery beats grep (weft-b7ce301e92) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dogfood-3 finding LW-1/LW-2: entity_find was effectively name-only. It ran FTS over name/short_name/summary, and summaries are off by default (ADR-030), so a concept word returned empty and nudged agents back to grep — the discovery step that is supposed to replace grep didn't. Two structural causes, confirmed against the live lacuna index: - A concept word that lives only in docstring prose (e.g. `borrow` in LoanPolicy's docstring) was never indexed — FTS covers name/short_name/summary only. - A concept word that is a substring of a compound identifier (e.g. `library` in the class `LibraryService`) cannot be reached by FTS at all: FTS matches whole stemmed tokens, and porter mangles the compound so neither `library` nor the prefix `library*` aligns with the stored token (`librar*` does, 8 hits). Prefix-append cannot fix this; substring matching can. Fix (no schema migration): find_entities now merges two recall paths — 1. FTS (when the pattern is FTS-safe): stemmed, bm25-ranked. 2. LIKE substring over id/name/short_name/summary AND a briefing_blocked- guarded docstring. deduped by id (FTS rank first, then substring-only hits in id order) and paged in Rust. This is the grep-equivalent, always-on keyword path the surface promises, with no dependency on the opt-in embeddings sidecar (ADR-040). LW-2: turning semantic search on by default is ruled out by ADR-040 + local-first (needs a hosted embedding service + key); instead the `not_enabled` signal and the entity_find description now point at entity_find as the always-on path. Secret safety: the docstring clause is gated on `briefing_blocked IS NULL` so a docstring withheld by the pre-ingest secret scanner (ADR-013) never becomes matchable — searching for a leaked secret must not resurface the blocked entity. This deliberately does NOT widen clarion-307668e2be (the separate, tracked blocked-entity *identity* exposure on these read surfaces); id/name/short_name matching is unchanged. Validated on the live lacuna index (no re-analyze; query-only change): both `entity_find 'borrow'` (0->1, LoanPolicy) and `'library'` (0->10, LibraryService ranked first) flip empty->non-empty, end-to-end through a rebuilt serve binary. - 3 new storage tests: docstring concept word, identifier substring FTS cannot reach, and the briefing_blocked content guard. Existing find/kind/pagination tests unchanged and green. - entity_find tool description, web reference, and the pinned description test updated. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../loomweave-mcp/src/catalogue/semantic.rs | 4 +- crates/loomweave-mcp/src/lib.rs | 4 +- crates/loomweave-storage/src/query.rs | 133 +++++++++++++----- .../loomweave-storage/tests/query_helpers.rs | 98 +++++++++++++ web/docs/reference/mcp-tools.md | 17 ++- 5 files changed, 213 insertions(+), 43 deletions(-) diff --git a/crates/loomweave-mcp/src/catalogue/semantic.rs b/crates/loomweave-mcp/src/catalogue/semantic.rs index fa16996b..d2762e95 100644 --- a/crates/loomweave-mcp/src/catalogue/semantic.rs +++ b/crates/loomweave-mcp/src/catalogue/semantic.rs @@ -54,7 +54,9 @@ impl ServerState { "signal": missing_signal( "semantic_search", "semantic search is not enabled (semantic_search.enabled=false) or no embedding \ - provider is configured; enable it and run analyze to build embeddings", + provider is configured; enable it and run analyze to build embeddings. For \ + keyword discovery without embeddings, use entity_find — it matches name, \ + summary, and docstring content by substring (no opt-in required)", ), }))); }; diff --git a/crates/loomweave-mcp/src/lib.rs b/crates/loomweave-mcp/src/lib.rs index c0e7cef4..15af22e6 100644 --- a/crates/loomweave-mcp/src/lib.rs +++ b/crates/loomweave-mcp/src/lib.rs @@ -314,7 +314,7 @@ pub fn list_tools() -> Vec { }, ToolDefinition { name: "entity_find", - description: "Search Loomweave entities by id, name, short name, and summary text stored on entity rows. Results are paginated and ranked by FTS match where possible. This does not traverse the graph and does not search on-demand summary_cache entries. Pass an optional `kind` (e.g. \"subsystem\", \"function\", \"class\", \"module\") to return only entities of that kind — the way to locate a subsystem without visually filtering results.", + description: "Search Loomweave entities by id, name, short name, summary, and docstring content. Matching merges stemmed FTS ranking with grep-equivalent substring recall, so a concept word finds both entities whose docstring mentions it and identifiers that merely contain it (e.g. `library` finds the class `LibraryService`, which whole-token FTS alone misses). This is the always-on keyword-discovery path — no embeddings required (semantic ranking is the separate, opt-in `entity_semantic_search_list`). Results are paginated; FTS-ranked hits come first, then substring-only hits. Docstrings withheld by the secret scanner (briefing_blocked) are never matched. This does not traverse the graph and does not search on-demand summary_cache entries. Pass an optional `kind` (e.g. \"subsystem\", \"function\", \"class\", \"module\") to return only entities of that kind — the way to locate a subsystem without visually filtering results.", input_schema: json!({ "type": "object", "properties": { @@ -5011,7 +5011,7 @@ mod tests { assert_eq!(tools[1].name, "entity_find"); assert_eq!( tools[1].description, - "Search Loomweave entities by id, name, short name, and summary text stored on entity rows. Results are paginated and ranked by FTS match where possible. This does not traverse the graph and does not search on-demand summary_cache entries. Pass an optional `kind` (e.g. \"subsystem\", \"function\", \"class\", \"module\") to return only entities of that kind — the way to locate a subsystem without visually filtering results." + "Search Loomweave entities by id, name, short name, summary, and docstring content. Matching merges stemmed FTS ranking with grep-equivalent substring recall, so a concept word finds both entities whose docstring mentions it and identifiers that merely contain it (e.g. `library` finds the class `LibraryService`, which whole-token FTS alone misses). This is the always-on keyword-discovery path — no embeddings required (semantic ranking is the separate, opt-in `entity_semantic_search_list`). Results are paginated; FTS-ranked hits come first, then substring-only hits. Docstrings withheld by the secret scanner (briefing_blocked) are never matched. This does not traverse the graph and does not search on-demand summary_cache entries. Pass an optional `kind` (e.g. \"subsystem\", \"function\", \"class\", \"module\") to return only entities of that kind — the way to locate a subsystem without visually filtering results." ); assert_eq!(tools[2].name, "entity_callers_list"); assert_eq!( diff --git a/crates/loomweave-storage/src/query.rs b/crates/loomweave-storage/src/query.rs index 91a64606..353dcec5 100644 --- a/crates/loomweave-storage/src/query.rs +++ b/crates/loomweave-storage/src/query.rs @@ -738,54 +738,117 @@ pub fn find_entities( )); } let limit = limit.clamp(1, 100); - let limit_i64 = i64::try_from(limit) - .map_err(|_| StorageError::InvalidQuery("entity search limit is too large".to_owned()))?; - let offset_i64 = i64::try_from(offset) - .map_err(|_| StorageError::InvalidQuery("entity search offset is too large".to_owned()))?; - if is_fts_safe(pattern) { - let kind_clause = if kind.is_some() { - "AND e.kind = ?4 " - } else { - "" - }; - let sql = format!( - "SELECT e.{columns} \ - FROM entity_fts f \ - JOIN entities e ON e.id = f.entity_id \ - WHERE entity_fts MATCH ?1 {kind_clause}\ - ORDER BY bm25(entity_fts), e.id \ - LIMIT ?2 OFFSET ?3", - columns = ENTITY_COLUMNS.replace(", ", ", e.") - ); - let mut stmt = conn.prepare(&sql)?; - let rows = match kind { - Some(kind) => stmt.query_map( - params![pattern, limit_i64, offset_i64, kind], - map_entity_row, - )?, - None => stmt.query_map(params![pattern, limit_i64, offset_i64], map_entity_row)?, - }; - return rows - .collect::, _>>() - .map_err(StorageError::from); + // We materialise `offset + limit` rows from each recall path, merge them + // FTS-first, then page in Rust. `offset + limit` is the smallest prefix of + // the merged stream that can contain this page, so both sources fetch at + // OFFSET 0 up to this cap and pagination happens after the merge. + let fetch_cap = offset.saturating_add(limit); + + // Two complementary recall paths, merged: + // + // 1. FTS (only when the pattern is FTS-safe): stemmed, bm25-ranked matches + // over name / short_name / summary. Good ranking, but it matches whole + // stemmed tokens, not substrings — so the query `library` never reaches + // the class `LibraryService` (token `libraryservice`), and a concept word + // that lives only in docstring prose is invisible. + // 2. LIKE substring over id / name / short_name / summary AND the + // secret-guarded docstring. This is the grep-equivalent content recall the + // discovery surface promises (weft-b7ce301e92): it catches identifier + // substrings FTS cannot and surfaces concept words from docstring prose, + // with no dependency on the opt-in embeddings sidecar (ADR-040). + // + // The merge keeps FTS hits first (preserving bm25 rank) and appends LIKE-only + // hits in id order, deduped by id. Each source is capped at `fetch_cap` and + // `limit` is clamped to <=100, so the merged prefix is bounded and exact for + // any page. + let fts_rows = if is_fts_safe(pattern) { + fts_match_entities(conn, pattern, fetch_cap, kind)? + } else { + Vec::new() + }; + let like_rows = like_match_entities(conn, pattern, fetch_cap, kind)?; + + let mut seen = std::collections::HashSet::with_capacity(fts_rows.len() + like_rows.len()); + let mut merged = Vec::with_capacity(fts_rows.len() + like_rows.len()); + for row in fts_rows.into_iter().chain(like_rows) { + if seen.insert(row.id.clone()) { + merged.push(row); + } } + Ok(merged.into_iter().skip(offset).take(limit).collect()) +} +/// FTS-safe, bm25-ranked matches over `name` / `short_name` / `summary_text`, +/// capped at `fetch_cap` (always OFFSET 0 — [`find_entities`] pages after the +/// merge). The caller guarantees `pattern` satisfies [`is_fts_safe`]. +fn fts_match_entities( + conn: &Connection, + pattern: &str, + fetch_cap: usize, + kind: Option<&str>, +) -> Result> { + let cap_i64 = i64::try_from(fetch_cap) + .map_err(|_| StorageError::InvalidQuery("entity search limit is too large".to_owned()))?; + let kind_clause = if kind.is_some() { + "AND e.kind = ?3 " + } else { + "" + }; + let sql = format!( + "SELECT e.{columns} \ + FROM entity_fts f \ + JOIN entities e ON e.id = f.entity_id \ + WHERE entity_fts MATCH ?1 {kind_clause}\ + ORDER BY bm25(entity_fts), e.id \ + LIMIT ?2", + columns = ENTITY_COLUMNS.replace(", ", ", e.") + ); + let mut stmt = conn.prepare(&sql)?; + let rows = match kind { + Some(kind) => stmt.query_map(params![pattern, cap_i64, kind], map_entity_row)?, + None => stmt.query_map(params![pattern, cap_i64], map_entity_row)?, + }; + rows.collect::, _>>() + .map_err(StorageError::from) +} + +/// Substring (LIKE) matches over `id` / `name` / `short_name` / `summary` plus +/// the `briefing_blocked`-guarded docstring, capped at `fetch_cap` (OFFSET 0). +/// +/// The docstring clause is gated on `briefing_blocked IS NULL`: a secret-bearing +/// docstring withheld by the pre-ingest scanner (ADR-013) must never become +/// matchable, or searching for a leaked secret would resurface the very entity +/// the block exists to hide. (Identity exposure of blocked entities on these +/// read surfaces is a separate, tracked gap — clarion-307668e2be; this content +/// clause deliberately does not widen it. `id`/`name`/`short_name`/`summary` +/// matching is unchanged from the prior behaviour.) +fn like_match_entities( + conn: &Connection, + pattern: &str, + fetch_cap: usize, + kind: Option<&str>, +) -> Result> { + let cap_i64 = i64::try_from(fetch_cap) + .map_err(|_| StorageError::InvalidQuery("entity search limit is too large".to_owned()))?; let like = format!("%{}%", escape_like(pattern)); - let kind_clause = if kind.is_some() { "AND kind = ?4 " } else { "" }; + let kind_clause = if kind.is_some() { "AND kind = ?3 " } else { "" }; let sql = format!( "SELECT {ENTITY_COLUMNS} \ FROM entities \ WHERE (id LIKE ?1 ESCAPE '\\' \ OR name LIKE ?1 ESCAPE '\\' \ OR short_name LIKE ?1 ESCAPE '\\' \ - OR COALESCE(summary, '') LIKE ?1 ESCAPE '\\') {kind_clause}\ + OR COALESCE(summary, '') LIKE ?1 ESCAPE '\\' \ + OR (json_extract(properties, '$.briefing_blocked') IS NULL \ + AND COALESCE(json_extract(properties, '$.docstring'), '') LIKE ?1 ESCAPE '\\')) \ + {kind_clause}\ ORDER BY id \ - LIMIT ?2 OFFSET ?3" + LIMIT ?2" ); let mut stmt = conn.prepare(&sql)?; let rows = match kind { - Some(kind) => stmt.query_map(params![like, limit_i64, offset_i64, kind], map_entity_row)?, - None => stmt.query_map(params![like, limit_i64, offset_i64], map_entity_row)?, + Some(kind) => stmt.query_map(params![like, cap_i64, kind], map_entity_row)?, + None => stmt.query_map(params![like, cap_i64], map_entity_row)?, }; rows.collect::, _>>() .map_err(StorageError::from) diff --git a/crates/loomweave-storage/tests/query_helpers.rs b/crates/loomweave-storage/tests/query_helpers.rs index bdb20df8..bf96eccb 100644 --- a/crates/loomweave-storage/tests/query_helpers.rs +++ b/crates/loomweave-storage/tests/query_helpers.rs @@ -41,6 +41,26 @@ fn insert_entity_with_hash(conn: &Connection, id: &str, kind: &str, content_hash .expect("insert entity with hash"); } +fn insert_entity_with_properties( + conn: &Connection, + id: &str, + kind: &str, + short_name: &str, + properties_json: &str, +) { + conn.execute( + "INSERT INTO entities ( + id, plugin_id, kind, name, short_name, properties, created_at, updated_at + ) VALUES ( + ?1, 'python', ?2, ?1, ?3, ?4, + strftime('%Y-%m-%dT%H:%M:%fZ', 'now'), + strftime('%Y-%m-%dT%H:%M:%fZ', 'now') + )", + params![id, kind, short_name, properties_json], + ) + .expect("insert entity with properties"); +} + fn insert_named_entity( conn: &Connection, id: &str, @@ -997,6 +1017,84 @@ fn find_entities_kind_filter_applies_on_punctuation_like_path() { ); } +#[test] +fn find_entities_matches_concept_word_in_docstring() { + // weft-b7ce301e92 (LW-1): a concept word that lives only in docstring prose + // — not in any entity name — must be discoverable. FTS over name/short_name + // alone returns empty here ("borrow" is no entity's name); the LIKE content + // path over the docstring is what makes discovery beat grep at the entry. + let tempdir = tempfile::tempdir().unwrap(); + let conn = open_fresh(&tempdir); + insert_entity_with_properties( + &conn, + "python:class:loan.LoanPolicy", + "class", + "LoanPolicy", + r#"{"docstring": "Strategy interface: how many days a user may borrow a book."}"#, + ); + // A decoy with no matching content. + insert_entity(&conn, "python:class:user.User", "class"); + + let hits = find_entities(&conn, "borrow", 20, 0, None).expect("docstring search"); + assert_eq!(hits.len(), 1, "{hits:?}"); + assert_eq!(hits[0].id, "python:class:loan.LoanPolicy"); +} + +#[test] +fn find_entities_matches_identifier_substring_fts_cannot() { + // weft-b7ce301e92 (LW-1): the marquee case. The concept word `library` is a + // substring of the CamelCase identifier `LibraryService`, but FTS matches + // whole stemmed tokens (`libraryservice`), so `MATCH 'library'` returns + // nothing. The LIKE substring path is what surfaces the class an agent is + // actually looking for. + let tempdir = tempfile::tempdir().unwrap(); + let conn = open_fresh(&tempdir); + insert_entity_with_properties( + &conn, + "python:class:catalog.LibraryService", + "class", + "LibraryService", + "{}", + ); + + let hits = find_entities(&conn, "library", 20, 0, None).expect("identifier-substring search"); + assert!( + hits.iter() + .any(|e| e.id == "python:class:catalog.LibraryService"), + "substring of a CamelCase identifier must be discoverable: {hits:?}" + ); +} + +#[test] +fn find_entities_does_not_leak_briefing_blocked_docstring_content() { + // weft-b7ce301e92 secret-safety invariant (cf. clarion-307668e2be): a + // docstring withheld by the pre-ingest scanner (briefing_blocked set) must + // never become matchable via the new content path — otherwise searching for + // a leaked secret would resurface the very entity the block exists to hide. + // An identical docstring WITHOUT the block must match (proves the guard, not + // the absence of indexing, is what suppresses it). + let tempdir = tempfile::tempdir().unwrap(); + let conn = open_fresh(&tempdir); + insert_entity_with_properties( + &conn, + "python:function:secrets.blocked", + "function", + "blocked", + r#"{"docstring": "uniquesecrettoken in here", "briefing_blocked": "secret_present"}"#, + ); + insert_entity_with_properties( + &conn, + "python:function:secrets.visible", + "function", + "visible", + r#"{"docstring": "uniquesecrettoken in here"}"#, + ); + + let hits = find_entities(&conn, "uniquesecrettoken", 20, 0, None).expect("guarded search"); + assert_eq!(hits.len(), 1, "blocked docstring must not match: {hits:?}"); + assert_eq!(hits[0].id, "python:function:secrets.visible"); +} + #[test] fn contained_entity_ids_is_depth_first_cycle_safe_and_capped() { let tempdir = tempfile::tempdir().unwrap(); diff --git a/web/docs/reference/mcp-tools.md b/web/docs/reference/mcp-tools.md index 59b46a61..05ffcc3f 100644 --- a/web/docs/reference/mcp-tools.md +++ b/web/docs/reference/mcp-tools.md @@ -27,11 +27,18 @@ a module reports `containing_range` — never a fabricated exact match. ## `find_entity(pattern, kind?)` -Searches entities by id, name, short name, and stored summary text. Results are -paginated and ranked by full-text match where possible. Does **not** traverse -the graph and does **not** search on-demand `summary_cache` entries. Pass an -optional `kind` (`subsystem`, `function`, `class`, `module`) to filter — the way -to locate a subsystem without visually filtering results. +Searches entities by id, name, short name, summary, and **docstring content**. +Matching merges stemmed full-text ranking with grep-equivalent substring recall, +so a concept word finds both entities whose docstring mentions it and identifiers +that merely contain it (e.g. `library` finds the class `LibraryService`, which +whole-token FTS alone misses). This is the always-on keyword-discovery path — no +embeddings required (semantic ranking is the separate, opt-in +`entity_semantic_search_list`). Results are paginated: full-text hits first, then +substring-only hits. Docstrings withheld by the secret scanner (`briefing_blocked`) +are never matched. Does **not** traverse the graph and does **not** search +on-demand `summary_cache` entries. Pass an optional `kind` (`subsystem`, +`function`, `class`, `module`) to filter — the way to locate a subsystem without +visually filtering results. ## `callers_of(id)` From fcd7978e52c461cb81c99a9e0f430194234bc678 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Tue, 9 Jun 2026 07:06:18 +1000 Subject: [PATCH 60/60] docs(skill): reflect entity_find content recall + project_finding_list in the agent pack (weft-b7ce301e92) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bring the agent-facing surface in step with the shipped capabilities: - loomweave-workflow SKILL.md (embedded via include_str! in loomweave-mcp): - New "How find_entity matches" section — it merges stemmed FTS ranking with grep-equivalent substring recall over name/short_name/summary AND docstring, so a concept word that is only a substring of a compound identifier (library -> LibraryService) or lives only in docstring prose (borrow in a LoanPolicy docstring) is discoverable. Names it the always-on keyword-discovery path (reach for it before grep); no embeddings required. - search_semantic paragraph reframed: not_enabled is not a dead end — points back at find_entity as the keyword path (the LW-2 honest-degrade pointer). - Added project_finding_list (cb49008) to the inspection catalogue (every finding project-wide, no entity id) and the has_findings filter on find_by_wardline. - find_entity table row + pagination gotcha updated for content matching. - instructions/loomweave.md (embedded in loomweave-cli): one-line note that entity_find is the grep replacement (substring over name/summary/docstring, no embeddings); semantic ranking is the separate opt-in tool. No code change; embedded-asset prose only. skills/install + instructions drift-marker tests green (installed projects will read as "drifted" until the instructions block is re-pushed, by design). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../assets/instructions/loomweave.md | 6 +++ .../assets/skills/loomweave-workflow/SKILL.md | 42 +++++++++++++++---- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/crates/loomweave-cli/assets/instructions/loomweave.md b/crates/loomweave-cli/assets/instructions/loomweave.md index fa72067b..b3b2aaa8 100644 --- a/crates/loomweave-cli/assets/instructions/loomweave.md +++ b/crates/loomweave-cli/assets/instructions/loomweave.md @@ -8,6 +8,12 @@ subsystem owns X", or "find the thing that does Y" — ask Loomweave's MCP tools (`mcp__loomweave__*`): `entity_find`, `entity_at`, `entity_callers_list`, `entity_neighborhood_get`, `project_status_get`. +`entity_find` is the grep replacement for "find the thing that does Y": it +matches a concept word by substring over name, summary, and docstring content +(e.g. `library` finds `LibraryService`), with no embeddings required — reach for +it before grepping. Semantic *ranking* is the separate, opt-in +`entity_semantic_search_list`. + Entity IDs are `{plugin}:{kind}:{qualified_name}` (e.g. `python:function:pkg.mod.func`); subsystems are `core:subsystem:{hash}`. You rarely type IDs — get one from `entity_find` or `entity_at`, then copy it diff --git a/crates/loomweave-mcp/assets/skills/loomweave-workflow/SKILL.md b/crates/loomweave-mcp/assets/skills/loomweave-workflow/SKILL.md index 5b8e4d80..4f62671a 100644 --- a/crates/loomweave-mcp/assets/skills/loomweave-workflow/SKILL.md +++ b/crates/loomweave-mcp/assets/skills/loomweave-workflow/SKILL.md @@ -58,7 +58,7 @@ tell which case you're in. | Tool | Use when | Args | |------|----------|------| -| `find_entity` | locate an entity by name/text | `{"pattern": ""}` | +| `find_entity` | locate an entity by name, or by a concept word in its docstring/identifier (substring) | `{"pattern": ""}` | | `entity_at` | what's at a file:line | `{"file": "rel/path.py", "line": 42}` | | `callers_of` | what calls this entity | `{"id": ""}` | | `neighborhood` | one-hop callers+callees+container+contained+references+imports | `{"id": ""}` | @@ -106,6 +106,25 @@ node-id strings ranked longest-first. Resolve a path id against `nodes`, not by re-reading each path element. `truncated`/`truncation_reason` report `edge-cap` (traversal stopped early) or `path-cap` (ranked output trimmed for size). +### How `find_entity` matches — the grep replacement for "find the thing that does Y" + +`find_entity` merges two recall paths so a concept word, not just an exact +identifier, lands a hit: + +- **stemmed full-text ranking** over name / short name / summary, and +- **grep-equivalent substring recall** over name / short name / summary **and the + entity's docstring**. + +So a word that is only a *substring* of a compound identifier is discoverable — +`{"pattern": "library"}` finds the class `LibraryService`, which whole-token +full-text alone never matches — and a concept that lives only in docstring prose +(e.g. `borrow` mentioned in a `LoanPolicy` docstring) is found even when no +entity is named after it. This is the **always-on keyword-discovery path: reach +for `find_entity` before you grep.** It needs no embeddings — semantic *ranking* +is the separate, opt-in `search_semantic` (below). Full-text hits rank first, +then substring-only hits. Docstrings withheld by the secret scanner +(`briefing_blocked`) are never matched. + ## Catalogue tools — inspection · faceted search · shortcuts Beyond navigation, Loomweave serves a **stateless catalogue** of read tools. All @@ -125,6 +144,7 @@ descendants) **or** a path glob (`"src/auth/**"`); omit it for the whole project |------|----------|------| | `guidance_for` | guidance sheets applicable to an entity, scope-ranked | `{"id": ""}` | | `findings_for` | findings anchored to an entity (filter kind/severity/status) | `{"id": "", "filter": {"status": "open"}}` | +| `project_finding_list` | **every** finding across the project — no entity id needed; each row carries its anchoring entity `{id, sei, file, line}` + tool/rule/kind/severity/status | `{"filter": {"severity": "error"}}` | | `wardline_for` | the entity's Wardline metadata (verbatim, opaque) | `{"id": ""}` | **Faceted search:** @@ -133,7 +153,7 @@ descendants) **or** a path glob (`"src/auth/**"`); omit it for the whole project |------|----------|------| | `find_by_tag` | entities carrying a categorisation tag | `{"tag": "", "scope": "src/**"}` | | `find_by_kind` | entities of a kind (`function`/`class`/`module`/…) | `{"kind": "function"}` | -| `find_by_wardline` | entities by Wardline tier/group (best-effort) | `{"tier": "exact"}` | +| `find_by_wardline` | entities by Wardline tier/group (best-effort); pass `has_findings:true` to page only taint-fact entities that also carry a finding | `{"tier": "exact", "has_findings": true}` | **Exploration-elimination shortcuts** (on-demand graph/index queries — no analyze-time precompute): @@ -159,10 +179,15 @@ honest-empty unless a plugin emits those tags. Likewise `high_churn` and `recently_changed` are honest-empty until churn/change signals are populated (use `index_diff` for repo-level freshness). -`search_semantic` is also in the catalogue. It is opt-in under -`semantic_search:`; when enabled, `loomweave analyze` populates the git-ignored -`.weft/loomweave/embeddings.db` sidecar and the query path filters stale vectors by -content hash. +`search_semantic` is also in the catalogue — embedding-similarity *ranking* for a +natural-language query. It is opt-in under `semantic_search:`; when enabled, +`loomweave analyze` populates the git-ignored `.weft/loomweave/embeddings.db` +sidecar and the query path filters stale vectors by content hash. When it is off +(the default) it returns `result_kind: "not_enabled"` rather than a fabricated or +empty-as-complete result — **that is not a dead end: `find_entity` already does +keyword/substring/docstring discovery with no embeddings required** (see "How +`find_entity` matches" above), so it is the right reach for "find the thing that +does Y" out of the box. > Not in this catalogue: `emit_observation` as a general-purpose write surface. @@ -197,8 +222,9 @@ and are composed into `summary` prompts with a real guidance fingerprint. `subsystem_of {"id": ""}` — it accepts any entity (a function/class resolves through its containing module) and returns the subsystem plus the module it resolved through. `subsystem_members` is the forward direction. -- **`find_entity` is paginated** (~20/page, `next_cursor`); narrow the pattern - rather than paging if you can. +- **`find_entity` is paginated** (~20/page, `next_cursor`); a broad concept word + now matches docstring/identifier substrings too, so it can return many hits — + narrow the pattern (or add a `kind` filter) rather than paging if you can. ## Launch