From cf5f39721bb13e4422327c6ee6b32f88bc7febb8 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 05:53:54 +1000 Subject: [PATCH 01/27] feat(serve): degrade to no-index MCP server instead of exiting on missing DB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When `.loomweave/loomweave.db` is absent, `loomweave serve` did a hard `ensure!(db_path.exists())` and exited 1 before the MCP protocol started. An MCP stdio client (Claude Code) just saw the server die at startup with the real reason buried in stderr — it read as "loomweave mcp failing" with no actionable signal. Now `serve` starts a degraded "no-index" stdio session instead: - `initialize` succeeds so the client connects cleanly; the `instructions` field leads with the run-`install`+`analyze` chirp (mirrors the SessionStart hook wording). - Every `tools/call` returns the same chirp as a tool result with `isError: true` — the load-bearing channel, since not every client surfaces `initialize.instructions`. - `tools/list` and the static `loomweave-workflow` prompt still answer so the surface looks healthy. - No HTTP read API bind, no LLM/embedding providers, no Filigree client, no ReaderPool — nothing to back them without a DB. One warn line to stderr (never stdout) at degraded startup. loomweave-mcp gains `handle_json_rpc_no_index` + `serve_stdio_no_index` plus chirp helpers; serve.rs swaps the exit for a `serve_no_index` branch. Closes clarion-ac36f51c2b. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/serve.rs | 37 ++++-- crates/loomweave-mcp/src/lib.rs | 192 ++++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+), 7 deletions(-) diff --git a/crates/loomweave-cli/src/serve.rs b/crates/loomweave-cli/src/serve.rs index 9496be06..b96da876 100644 --- a/crates/loomweave-cli/src/serve.rs +++ b/crates/loomweave-cli/src/serve.rs @@ -5,7 +5,7 @@ use std::sync::{Arc, mpsc}; use std::thread; use std::time::Duration; -use anyhow::{Context, Result, anyhow, ensure}; +use anyhow::{Context, Result, anyhow}; use loomweave_core::{ ApiEmbeddingProvider, ApiEmbeddingProviderConfig, ClaudeCliProvider, ClaudeCliProviderConfig, CodexCliProvider, CodexCliProviderConfig, EmbeddingProvider, EmbeddingProviderError, @@ -19,12 +19,13 @@ use loomweave_storage::{DEFAULT_BATCH_SIZE, DEFAULT_CHANNEL_CAPACITY, ReaderPool pub fn run(path: &Path, config_path: Option<&Path>) -> Result<()> { let db_path = path.join(".loomweave").join("loomweave.db"); - ensure!( - db_path.exists(), - "Loomweave database not found at {}; run `loomweave install --path {}` first", - db_path.display(), - path.display() - ); + if !db_path.exists() { + // No index yet. Rather than exiting 1 — which leaves the MCP client + // staring at a server that died at startup with the reason buried in + // stderr — serve a degraded stdio session that answers `initialize` and + // chirps "run analyze" from every tool call. clarion-ac36f51c2b. + return serve_no_index(path, &db_path); + } let project_root = path .canonicalize() @@ -106,6 +107,28 @@ pub fn run(path: &Path, config_path: Option<&Path>) -> Result<()> { supervise_stdio_with_http(stdio, http_server) } +/// Serve a degraded MCP stdio session for a project with no index. No HTTP read +/// API, no LLM / embedding providers, no Filigree client, no `ReaderPool` — +/// there is no DB to back any of them. The session answers `initialize` and +/// chirps "run `loomweave install` + `loomweave analyze`" from every tool call, +/// so the client connects and is told how to recover instead of seeing the +/// server exit. clarion-ac36f51c2b. +fn serve_no_index(project_root: &Path, db_path: &Path) -> Result<()> { + // Goes to stderr (the CLI's tracing sink) — never stdout, which carries the + // MCP protocol — so it lands in the MCP server log without corrupting framing. + tracing::warn!( + db = %db_path.display(), + "Loomweave has no index; serving a degraded MCP session. Run \ + `loomweave analyze` to build the graph, then reconnect." + ); + let stdin = std::io::stdin(); + let stdout = std::io::stdout(); + let mut reader = BufReader::new(stdin); + let mut writer = stdout.lock(); + loomweave_mcp::serve_stdio_no_index(project_root, &mut reader, &mut writer) + .context("serve degraded MCP stdio (no index)") +} + /// Capture the LLM policy posture for `project_status`. `live` means a provider /// that actually dispatches (`OpenRouter` / Codex / Claude CLIs); the recording /// fixture and the disabled state are not live. diff --git a/crates/loomweave-mcp/src/lib.rs b/crates/loomweave-mcp/src/lib.rs index fc29d681..3be34ed0 100644 --- a/crates/loomweave-mcp/src/lib.rs +++ b/crates/loomweave-mcp/src/lib.rs @@ -803,6 +803,112 @@ pub fn handle_json_rpc(request: &Value) -> Option { }) } +/// Actionable chirp for a project with no index. Mirrors the `SessionStart` hook +/// wording (`hook.rs`) so the operator sees the same "install then analyze" +/// sequence whether they read it from the shell or from an MCP client. Surfaced +/// both in the degraded `initialize` instructions and from every degraded +/// `tools/call` result. +fn no_index_message(project_root: &Path) -> String { + let root = project_root.display(); + format!( + "Loomweave has no index for this project yet \ +({root}/.loomweave/loomweave.db is missing), so the structural graph has not been \ +built and every Loomweave tool is unavailable. Run `loomweave install --path {root}` \ +then `loomweave analyze {root}` in a terminal to extract the entity / edge graph, \ +then reconnect this MCP server." + ) +} + +/// Degraded-mode orientation for the `initialize` `instructions` field. Distinct +/// from [`server_instructions`] (the healthy-index orientation) so the normal +/// path — and its `server_instructions_enumerate_every_tool` guard — is +/// untouched. +fn server_instructions_no_index(project_root: &Path) -> String { + format!( + "⚠ NO INDEX. {}\n\nNormally Loomweave answers \"what calls X\", \"where is X \ +defined\", \"what subsystem is X in\" from a pre-extracted graph instead of grepping \ +the tree — but it needs an index first. `tools/list` still shows the surface; any tool \ +call returns this same instruction until the index exists.", + no_index_message(project_root) + ) +} + +/// The `initialize` result for the degraded no-index server. Advertises `tools` +/// and `prompts` (the static `loomweave-workflow` prompt works without a DB) but +/// not `resources` (the `loomweave://context` resource needs the index). +fn initialize_result_no_index(project_root: &Path) -> Value { + json!({ + "protocolVersion": MCP_PROTOCOL_VERSION, + "capabilities": { "tools": {}, "prompts": {} }, + "serverInfo": { + "name": "loomweave", + "version": env!("CARGO_PKG_VERSION") + }, + "instructions": server_instructions_no_index(project_root) + }) +} + +/// JSON-RPC dispatch for the degraded "no index" stdio server: the project has +/// no `.loomweave/loomweave.db`, so there is no graph to query. `initialize` +/// succeeds (the client connects cleanly rather than seeing the server die) and +/// `tools/call` returns the actionable chirp as a tool result with +/// `isError: true` — the load-bearing channel, since not every client surfaces +/// the `initialize` `instructions`. `tools/list` and the static +/// `loomweave-workflow` prompt answer normally so the surface looks healthy. +/// clarion-ac36f51c2b. +#[must_use] +pub fn handle_json_rpc_no_index(request: &Value, project_root: &Path) -> Option { + if is_json_rpc_notification(request) { + return None; + } + let id = request.get("id").cloned().unwrap_or(Value::Null); + let Some(method) = request.get("method").and_then(Value::as_str) else { + return Some(error_response(&id, -32600, "invalid request")); + }; + + Some(match method { + "initialize" => result_response(&id, &initialize_result_no_index(project_root)), + "tools/list" => result_response( + &id, + &json!({"tools": list_tools_for_policy(McpToolPolicy::default())}), + ), + "tools/call" => result_response( + &id, + &json!({ + "content": [ + { "type": "text", "text": no_index_message(project_root) } + ], + "isError": true + }), + ), + "prompts/list" => result_response(&id, &prompts_list()), + "prompts/get" => prompts_get(&id, request.get("params")), + _ => error_response(&id, -32601, "method not found"), + }) +} + +/// Serve a degraded MCP stdio session for a project with no index. Mirrors +/// [`serve_stdio`] (synchronous — there are no storage-backed async tools to +/// drive) but routes every request through [`handle_json_rpc_no_index`]. Used by +/// `loomweave serve` when `.loomweave/loomweave.db` is absent, so the client +/// connects and is told to run analyze rather than watching the server exit. +pub fn serve_stdio_no_index( + project_root: &Path, + reader: &mut impl std::io::BufRead, + writer: &mut impl std::io::Write, +) -> Result<(), McpError> { + loop { + let Some(frame) = read_stdio_frame(reader)? else { + return Ok(()); + }; + let framing = frame.framing; + let request: Value = serde_json::from_slice(&frame.body)?; + if let Some(response) = handle_json_rpc_no_index(&request, project_root) { + write_stdio_response(writer, &encode_response_frame(&response)?, framing)?; + } + } +} + /// Deterministic, non-storage diagnostics threaded in at server construction so /// `project_status` can report the LLM policy and the resolved Filigree /// endpoint without re-reading config or re-running URL resolution. Optional: @@ -4956,6 +5062,92 @@ mod tests { } } + #[test] + fn no_index_initialize_chirps_install_and_analyze() { + let root = std::path::Path::new("/tmp/demo"); + let request = serde_json::json!({"jsonrpc": "2.0", "id": 1, "method": "initialize"}); + let response = + super::handle_json_rpc_no_index(&request, root).expect("initialize yields a response"); + assert_eq!( + response["result"]["protocolVersion"], + super::MCP_PROTOCOL_VERSION + ); + assert_eq!(response["result"]["serverInfo"]["name"], "loomweave"); + assert!(response["result"]["capabilities"]["tools"].is_object()); + let instructions = response["result"]["instructions"] + .as_str() + .expect("instructions present"); + // Both halves of the canonical hook sequence, plus the project path. + assert!( + instructions.contains("loomweave install --path /tmp/demo"), + "instructions: {instructions}" + ); + assert!( + instructions.contains("loomweave analyze /tmp/demo"), + "instructions: {instructions}" + ); + } + + #[test] + fn no_index_tools_call_returns_actionable_is_error() { + let root = std::path::Path::new("/tmp/demo"); + let request = serde_json::json!({ + "jsonrpc": "2.0", + "id": 2, + "method": "tools/call", + "params": {"name": "entity_find", "arguments": {"query": "foo"}} + }); + let response = super::handle_json_rpc_no_index(&request, root).expect("response"); + // isError is the load-bearing chirp channel — fires the moment the agent + // touches any tool, regardless of whether the client surfaced instructions. + assert_eq!(response["result"]["isError"], serde_json::json!(true)); + let text = response["result"]["content"][0]["text"] + .as_str() + .expect("tool result text"); + assert!( + text.contains("loomweave analyze /tmp/demo"), + "tool chirp text: {text}" + ); + } + + #[test] + fn no_index_tools_list_still_advertises_tools() { + let root = std::path::Path::new("/tmp/demo"); + let request = serde_json::json!({"jsonrpc": "2.0", "id": 3, "method": "tools/list"}); + let response = super::handle_json_rpc_no_index(&request, root).expect("response"); + let tools = response["result"]["tools"].as_array().expect("tools array"); + assert!( + !tools.is_empty(), + "degraded tools/list should still advertise the surface" + ); + } + + #[test] + fn no_index_ignores_notifications() { + let root = std::path::Path::new("/tmp/demo"); + // The client sends notifications/initialized right after initialize; it + // has no id and must draw no response. + let request = serde_json::json!({"jsonrpc": "2.0", "method": "notifications/initialized"}); + assert!(super::handle_json_rpc_no_index(&request, root).is_none()); + } + + #[test] + fn serve_stdio_no_index_round_trips_initialize_over_json_line() { + let root = std::path::Path::new("/tmp/demo"); + let input = b"{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"initialize\"}\n"; + let mut reader = std::io::BufReader::new(&input[..]); + let mut output = Vec::new(); + super::serve_stdio_no_index(root, &mut reader, &mut output).expect("degraded serve"); + let response: serde_json::Value = serde_json::from_slice(&output).expect("framed json"); + let instructions = response["result"]["instructions"] + .as_str() + .expect("instructions present"); + assert!( + instructions.contains("loomweave analyze /tmp/demo"), + "instructions: {instructions}" + ); + } + #[test] fn initialize_returns_server_info_and_tools_capability() { let response = super::handle_json_rpc(&serde_json::json!({ From 49a6753d6ea741d44a8dc90ec12662baa11c4c8e Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 07:39:55 +1000 Subject: [PATCH 02/27] docs(adr): ADR-044 read-API ephemeral port; stopgap 9112; wardline tooling Read-API port deconfliction (clarion-7f574bc34f): - ADR-044 (Proposed): serve publishes .loomweave/ephemeral.port with a per-project deterministic port + ephemeral fallback and a loomweave-side resolver (twin of filigree_url), so concurrent projects stop colliding on the hardcoded 9111; installer stops pinning the port. Indexed in the ADR README. - Stopgap so this project coexists with others on 9111 until the ADR lands: loomweave.yaml serve.http.bind -> 127.0.0.1:9112 and wardline.yaml loomweave.url -> :9112. Wardline tooling: - .mcp.json: drop the hardcoded --loomweave-url/--filigree-url from the wardline MCP args (resolved from wardline.yaml instead); normalize server entries. - .pre-commit-config.yaml: add a local wardline-scan hook. - .agents/skills/wardline-gate: add the wardline-gate skill pack. - .gitignore: ignore the raw wardline scan output (findings.jsonl). Co-Authored-By: Claude Opus 4.8 (1M context) --- .agents/skills/wardline-gate/SKILL.md | 65 +++++++++++ .gitignore | 3 + .mcp.json | 24 ++--- .pre-commit-config.yaml | 9 ++ ...044-read-api-ephemeral-port-publication.md | 101 ++++++++++++++++++ docs/loomweave/adr/README.md | 1 + loomweave.yaml | 2 +- wardline.yaml | 4 +- 8 files changed, 192 insertions(+), 17 deletions(-) create mode 100644 .agents/skills/wardline-gate/SKILL.md create mode 100644 docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md diff --git a/.agents/skills/wardline-gate/SKILL.md b/.agents/skills/wardline-gate/SKILL.md new file mode 100644 index 00000000..87ee134a --- /dev/null +++ b/.agents/skills/wardline-gate/SKILL.md @@ -0,0 +1,65 @@ +--- +name: wardline-gate +description: > + Use when scanning for or fixing trust-boundary / taint findings, when a + `wardline scan` reports a defect, or when wiring wardline into an agent's + edit-verify loop. Explains the scan -> explain -> fix-at-the-boundary -> + rescan cycle and the baseline-vs-waiver discipline. +--- + +# Wardline: the trust-boundary gate + +Wardline is a deterministic, whole-program static taint analyzer. It marks trust +boundaries with two decorators from `wardline.decorators`: `@external_boundary` +(untrusted data arriving from outside) and `@trusted` (a producer that must only +receive validated data). When untrusted data reaches a trusted producer it raises +`PY-WL-101` at `ERROR`. + +## The loop + +1. **Scan.** Run `wardline scan . --fail-on ERROR` (or call the `scan` MCP tool). + Read the gate verdict and the active (non-suppressed) findings — `active` is + the population the gate enforces on. +2. **Explain.** For each active defect, call `explain_taint` with the finding's + `fingerprint`, `path`+`line`, and its `qualname` as `sink_qualname`. Do this + right after the scan and before editing — a stale fingerprint returns an error. + With a Loomweave store configured, pass `chain: true` to walk the full taint + chain back to the originating boundary. +3. **Fix at the BOUNDARY, not the sink.** Add validation or rejection at the hop + where untrusted data should have been checked — not a band-aid at the sink. +4. **Re-scan.** Confirm the finding is gone. + +## Exit codes (CLI path) + +- `0` — clean (or gate not requested). +- `1` — the gate tripped: a non-suppressed defect at/above `--fail-on`. +- `2` — a wardline error (bad config, unreadable path). Not a finding. + +Branch on the code. On a trip, read the structured report wardline just wrote — +the finding names the function, file, and lines, which is enough to locate the +leak. + +## Suppression discipline + +Prefer FIXING a finding. Suppress only a finding you have judged a true +non-issue, always with a reason: + +- MCP `baseline` — snapshot current defects so only NEW findings surface. + `overwrite: false` (default) refuses to clobber an existing baseline; + `overwrite: true` re-derives it. A coarse, whole-set tool; requires a reason. +- `waiver_add` — waive ONE finding by fingerprint with a mandatory reason and an + expiry date. An audited, time-boxed exception. +- `wardline judge` (opt-in, network) — an LLM pass that labels each defect + TRUE/FALSE positive. Never runs automatically, never folded into scan; fails + loud with no API key so "couldn't triage" is never mistaken for "nothing to + triage". Above-floor false positives can be recorded as audited suppressions. + +## CLI vs MCP + +- **CLI:** `wardline scan`, `wardline judge`, `wardline baseline create/update`. + Branch on the exit code; read the findings file it writes. +- **MCP:** `wardline mcp` exposes `scan`, `explain_taint`, `fix`, `judge` + (network), `baseline`, `waiver_add`; resources + `wardline://vocab|rules|config|config-schema`; and the `wardline:loop` prompt. + The server is stateless — the read-only tools are pure functions of your code + on disk and your config. diff --git a/.gitignore b/.gitignore index 9b5e95da..5713582a 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,9 @@ tests/e2e/external-operator-smoke-results-*.md # Documentation site build output (mkdocs `site_dir`, web/mkdocs.yml). /site-build/ +# Wardline scan output written by `wardline scan` (raw, regenerated per run). +/findings.jsonl + # Filigree-managed docs — a running filigree process rewrites its managed # instruction blocks in these every session; untracked to avoid diff churn # (filigree regenerates them on demand). diff --git a/.mcp.json b/.mcp.json index 6b84d9ec..6b3c178d 100644 --- a/.mcp.json +++ b/.mcp.json @@ -1,30 +1,26 @@ { "mcpServers": { + "filigree": { + "args": [], + "command": "/home/john/.local/bin/filigree-mcp", + "type": "stdio" + }, "loomweave": { "args": [ "serve" ], - "command": "/home/john/.local/bin/loomweave", + "command": "/home/john/.local/share/uv/tools/loomweave/bin/loomweave", "env": {}, "type": "stdio" }, - "filigree": { - "args": [], - "command": "/home/john/.local/bin/filigree-mcp", - "type": "stdio" - }, "wardline": { + "type": "stdio", + "command": "/home/john/.local/bin/wardline", "args": [ "mcp", "--root", - ".", - "--loomweave-url", - "http://127.0.0.1:9111", - "--filigree-url", - "http://127.0.0.1:8542/api/weft/scan-results" - ], - "command": "/home/john/.local/bin/wardline", - "type": "stdio" + "." + ] } } } diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d4158384..44b775f4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,3 +19,12 @@ repos: args: [--strict, --config-file=plugins/python/pyproject.toml, plugins/python] additional_dependencies: - pytest>=8.0 + - repo: local + hooks: + - id: wardline-scan + name: wardline scan + entry: wardline scan + language: system + types: [python] + pass_filenames: false + diff --git a/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md new file mode 100644 index 00000000..be88c80a --- /dev/null +++ b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md @@ -0,0 +1,101 @@ +# ADR-044: Read-API Ephemeral Port Publication + +**Status**: Proposed +**Date**: 2026-06-06 +**Relates to**: [ADR-034](./ADR-034-federation-http-read-api-hardening.md) +**Tracking**: clarion-7f574bc34f + +## Context + +`loomweave serve` exposes a federation HTTP read API. Its bind address is a +static `127.0.0.1:9111` — the default (`loomweave-federation/src/config.rs`) and +the value the installer stamps into every project's `loomweave.yaml` +(`crates/loomweave-cli/src/install.rs`). Every project gets the same port. + +Consequences observed live: + +- **Bind collision.** Two `loomweave serve` instances cannot run concurrently — + whichever starts first binds 9111, the second fails with `Address already in + use`. A `legis` session held 9111 for hours while another project's serve + could not come up. +- **Consumer mis-targeting.** Consumers point at the static port too + (`wardline.yaml: loomweave.url: http://127.0.0.1:9111`). A second project's + wardline therefore reaches the *first* project's loomweave instance. ADR-034's + instance-ID guard correctly rejects the cross-project taint write + (`PROJECT_MISMATCH`) — no data is corrupted — but federation is silently dead + for the mis-targeted project. + +Loomweave already solved the *consumer* side of this problem for the sibling +direction: `loomweave-federation/src/filigree_url.rs` resolves Filigree's live +endpoint by reading `/.filigree/ephemeral.port` (Filigree publishes a +per-project, deterministic-but-unpredictable port `8400 + sha256(path) % 1000`, +atomically, present only while running; consumers read it, never compute it, and +fail soft to configured URL). Loomweave never applied the same convention to its +*own* read API. + +Picking a free port at install time does not fix this: it is TOCTOU (a port free +at install can be taken before `serve` runs, and two installs at different times +can pick the same "first free" port). That is precisely why the established +pattern publishes the live port at runtime rather than assigning it at install. + +## Decision + +Mirror Filigree's endpoint-discovery convention symmetrically for loomweave's +own read API. + +1. **Deterministic per-project port, ephemeral fallback.** `serve` binds a + per-project deterministic port derived from the canonical project path, in a + loomweave-specific band chosen to *not* overlap Filigree's `8400–9399` band + (so the two products never contend for the same number). If that port is + taken, fall back to an OS-assigned ephemeral port (`bind :0`). The + bind-and-discover primitive already exists in test form at + `crates/loomweave-cli/src/http_read.rs` and is generalized to the production + serve path. +2. **Publish the live port.** On successful bind, write the *actual* bound port + to `/.loomweave/ephemeral.port` (plain integer, atomic write, + removed on clean shutdown, present only while serving) — the loomweave twin of + `.filigree/ephemeral.port`. +3. **Loomweave-side resolver.** Add a resolver in `loomweave-federation` (the + twin of `resolve_filigree_url`) that prefers `.loomweave/ephemeral.port` over + static config and fails soft when the file is missing/corrupt. Consumers use + it: wardline's `loomweave.url`, and loomweave's own `doctor` / + `project_status_get` (which report the resolved source, mirroring how + `project_status` reports the Filigree resolution). +4. **Installer stops pinning a port.** `install` no longer stamps a fixed + `serve.http.bind: 127.0.0.1:9111`. The `loomweave.yaml` stub documents that + the read-API port is auto-selected and published; an explicit `bind` override + remains honored for operators who need a fixed port. + +`.loomweave/ephemeral.port` is a runtime artifact and is git-ignored, consistent +with ADR-005's treatment of run-time-only state. + +## Consequences + +- Two or more projects can `serve` concurrently without port contention; the + cross-project `PROJECT_MISMATCH` federation failure disappears because each + consumer resolves *its own* project's live port. +- The read-API port becomes a *read-this-file*, never a *compute-or-configure*, + fact — matching the discipline loomweave already imposes on consuming + Filigree. "Read, never compute" is the load-bearing rule: nothing should hard + code or re-derive the band formula to guess a peer's port. +- Consumers pinned to a literal `:9111` (e.g. existing `wardline.yaml` files) + must migrate to the resolver. Until they do, they fail soft to the configured + URL — degraded, not broken. +- Federation stays enrich-only and solo-useful: a project with no published port + file (serve not running, or feature disabled) degrades to the configured + `base_url`, never to a sibling-internal default. + +## Verification + +- Two serves on distinct project paths bind distinct ports and each publishes + its own `.loomweave/ephemeral.port`; neither fails to bind. +- A deterministic-port collision forces the ephemeral-`0` fallback, and the + published file reflects the *actually* bound port (not the deterministic + guess). +- The resolver prefers the published port over stale config and fails soft on + missing/corrupt/out-of-range content (twin of the `filigree_url` resolver + tests). +- The published file is removed on clean shutdown; a consumer reading a stale + file degrades rather than erroring. +- A wardline scan against a project whose loomweave serve is running on a + non-9111 port resolves and writes taint successfully (no `PROJECT_MISMATCH`). diff --git a/docs/loomweave/adr/README.md b/docs/loomweave/adr/README.md index 3bafec9d..b18cd28f 100644 --- a/docs/loomweave/adr/README.md +++ b/docs/loomweave/adr/README.md @@ -44,6 +44,7 @@ This folder is the canonical home for authored Loomweave architecture decision r | [ADR-041](./ADR-041-resume-is-idempotent-reemit.md) | Analyze resume is idempotent re-emit, not checkpoint recovery; amends ADR-005/ADR-011 resume language | Accepted | | [ADR-042](./ADR-042-hmac-freshness-and-replay-window.md) | HMAC freshness and replay window — timestamp + nonce headers, crate-backed HMAC, process-local replay cache | Accepted | | [ADR-043](./ADR-043-edge-reanalysis-replacement.md) | Edge reanalysis replacement — per-source-file anchored-edge replacement and edge metadata upsert; amends ADR-026 | Accepted | +| [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) | Read-API ephemeral port publication — per-project deterministic port + `.loomweave/ephemeral.port` + loomweave-side resolver (twin of `filigree_url`), installer stops pinning 9111; relates to ADR-034 | Proposed | ## Backlog still tracked in the detailed design diff --git a/loomweave.yaml b/loomweave.yaml index 6efcfa9f..8ffa933d 100644 --- a/loomweave.yaml +++ b/loomweave.yaml @@ -36,7 +36,7 @@ llm_policy: session_token_ceiling: 1000000 serve: http: - bind: 127.0.0.1:9111 + bind: 127.0.0.1:9112 enabled: true wardline_taint_write: true version: 1 diff --git a/wardline.yaml b/wardline.yaml index dfc57f0b..5b049343 100644 --- a/wardline.yaml +++ b/wardline.yaml @@ -1,4 +1,4 @@ -loomweave: - url: http://127.0.0.1:9111 filigree: url: http://127.0.0.1:8542/api/weft/scan-results +loomweave: + url: http://127.0.0.1:9112 From d4e04745d8202bbff3652df797f5d72b83f796de Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 07:49:54 +1000 Subject: [PATCH 03/27] docs(adr): pin ADR-044 ephemeral-port file as normative cross-product contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Incorporates the Wardline (consumer-side) review. The interop surface is the file, not loomweave's Rust resolver — Wardline implements its own Python reader against it (SEI-style "consumers conform"). Pins, as normative: - File contract: /.loomweave/ephemeral.port, plain-ASCII port only, optional trailing newline, host/scheme implied, atomic temp+rename, created on loopback bind / removed on clean shutdown. - Loopback-only publication: a non-loopback bind (allow_non_loopback, ADR-034) publishes no file, so the port-only format never under-specifies the host. - Resolution precedence (consume-time, per read): explicit flag/env > published file > configured url > none. The file self-heals stale/default config but never overrides a deliberate explicit target. - Fail-soft: validate 1..=65535; malformed or resolved-but-refused (stale file / crashed serve) degrades, never errors. Instance-ID guard (ADR-034) is the correctness backstop so the reader can be simple. - Related follow-up: consume-time resolution applies to both sibling legs; Wardline's filigree leg (install-time today) should unify Wardline-side. Tracking: clarion-7f574bc34f Co-Authored-By: Claude Opus 4.8 (1M context) --- ...044-read-api-ephemeral-port-publication.md | 122 ++++++++++++++---- docs/loomweave/adr/README.md | 2 +- 2 files changed, 100 insertions(+), 24 deletions(-) diff --git a/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md index be88c80a..28a02633 100644 --- a/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md +++ b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md @@ -41,7 +41,13 @@ pattern publishes the live port at runtime rather than assigning it at install. ## Decision Mirror Filigree's endpoint-discovery convention symmetrically for loomweave's -own read API. +own read API. The **interop surface is the file**, not loomweave's resolver: +the resolver below is loomweave's own conforming reader, but the contract that +binds siblings is `.loomweave/ephemeral.port` itself. Cross-product consumers +(notably Wardline, which is Python and cannot call a Rust resolver) implement +their own reader against the file contract — the same "this is the contract, +consumers conform" posture as the SEI token (ADR-038). The normative file +contract and resolution semantics are pinned below. 1. **Deterministic per-project port, ephemeral fallback.** `serve` binds a per-project deterministic port derived from the canonical project path, in a @@ -51,23 +57,79 @@ own read API. bind-and-discover primitive already exists in test form at `crates/loomweave-cli/src/http_read.rs` and is generalized to the production serve path. -2. **Publish the live port.** On successful bind, write the *actual* bound port - to `/.loomweave/ephemeral.port` (plain integer, atomic write, - removed on clean shutdown, present only while serving) — the loomweave twin of - `.filigree/ephemeral.port`. +2. **Publish the live port** to `/.loomweave/ephemeral.port` per the + file contract below — the loomweave twin of `.filigree/ephemeral.port`. 3. **Loomweave-side resolver.** Add a resolver in `loomweave-federation` (the - twin of `resolve_filigree_url`) that prefers `.loomweave/ephemeral.port` over - static config and fails soft when the file is missing/corrupt. Consumers use - it: wardline's `loomweave.url`, and loomweave's own `doctor` / - `project_status_get` (which report the resolved source, mirroring how - `project_status` reports the Filigree resolution). + twin of `resolve_filigree_url`) implementing the resolution semantics below. + Loomweave's own consumers use it (`doctor`, `project_status_get`, which report + the resolved source). It is *one* conforming reader, not the interop surface. 4. **Installer stops pinning a port.** `install` no longer stamps a fixed `serve.http.bind: 127.0.0.1:9111`. The `loomweave.yaml` stub documents that the read-API port is auto-selected and published; an explicit `bind` override remains honored for operators who need a fixed port. -`.loomweave/ephemeral.port` is a runtime artifact and is git-ignored, consistent -with ADR-005's treatment of run-time-only state. +## File contract (normative) + +`.loomweave/ephemeral.port` is the cross-product interop surface. Producers +(loomweave `serve`) and every consumer (loomweave, Wardline, future siblings) +conform to exactly this: + +- **Path:** `/.loomweave/ephemeral.port`, where `` + is the directory the consumer is scanning/serving (the same anchor as + `.filigree/ephemeral.port`). +- **Content:** a single plain-ASCII integer — the **TCP port only**. No host, no + scheme, no key. An optional single trailing `\n` is permitted and ignored. No + other bytes. +- **Host/scheme are implied, not stored:** `127.0.0.1` and `http`. This is sound + *only* because publication is loopback-only (next bullet); a consumer composes + `http://127.0.0.1:`. +- **Loopback-only publication.** The file is written **only when `serve` binds a + loopback address**. If an operator opts into a non-loopback bind + (`allow_non_loopback`, ADR-034), `serve` does **not** publish the file — that + deployment is explicit-config territory and consumers fall back to their + configured URL (where the operator set the reachable host). This keeps the + port-only format unambiguous and prevents a port-only reader from mis-targeting + a non-loopback host. +- **Atomic write:** write to a temp file in `.loomweave/` and `rename(2)` into + place, so a reader never observes a partial/torn value. +- **Lifecycle:** created/refreshed on successful loopback bind; removed on clean + shutdown. Present-only-while-serving is best-effort, not guaranteed — a crash + leaves a stale file, which resolution semantics handle (below). +- **Git-ignored** runtime artifact, consistent with ADR-005's treatment of + run-time-only state. + +## Resolution semantics (normative) + +Every consumer resolves **at consume time** (each scan / read), never caches the +resolution at install time — a port resolved once and reused goes stale exactly +when another project rebinds. Wardline's filigree leg, which resolves at install +time today, is the cautionary case (see related follow-up). + +**Precedence (highest wins):** + +1. An **explicit, deliberate target** — `--loomweave-url` flag or environment + override — always wins. The published port must never override a target the + operator set on purpose (remote loomweave, debugging a specific instance). +2. The **published port file** `.loomweave/ephemeral.port` (composed to + `http://127.0.0.1:`). This **beats a stale/default configured URL** so + resolution self-heals without a config edit. +3. The **configured URL** (e.g. `wardline.yaml: loomweave.url`). +4. **None** — federation is simply absent for this read; degrade, do not error. + +**Fail-soft is mandatory at every step:** + +- The port value MUST be validated to `1..=65535`. Missing, non-integer, + out-of-range, or otherwise malformed content → fall through to the next + precedence level (it is not an error). +- A **resolved-but-refused** connection (file present, but the port is closed — + crashed serve / stale file) MUST be treated as soft: fall through to configured + URL or none. This — not malformed content — is the case a live consumer hits + most, and it must never surface as a hard error. +- The instance-ID guard (ADR-034) is the **correctness backstop** that lets the + reader be simple rather than perfect: even if a stale file points at a port now + owned by *another* project's serve, the write is rejected `PROJECT_MISMATCH`, + fail-soft — a stale file degrades, never corrupts. Consumers rely on this; they + do not need to verify project identity before connecting. ## Consequences @@ -76,14 +138,15 @@ with ADR-005's treatment of run-time-only state. consumer resolves *its own* project's live port. - The read-API port becomes a *read-this-file*, never a *compute-or-configure*, fact — matching the discipline loomweave already imposes on consuming - Filigree. "Read, never compute" is the load-bearing rule: nothing should hard - code or re-derive the band formula to guess a peer's port. + Filigree. "Read, never compute" is the load-bearing rule: nothing hard codes or + re-derives the band formula to guess a peer's port. - Consumers pinned to a literal `:9111` (e.g. existing `wardline.yaml` files) - must migrate to the resolver. Until they do, they fail soft to the configured - URL — degraded, not broken. + self-heal once they prefer the published file over config (precedence 2 > 3) — + no user edit required. Until a consumer adopts the resolver it fails soft to the + configured URL — degraded, not broken. - Federation stays enrich-only and solo-useful: a project with no published port - file (serve not running, or feature disabled) degrades to the configured - `base_url`, never to a sibling-internal default. + file (serve not running, feature disabled, or non-loopback bind) degrades to + the configured `base_url`, never to a sibling-internal default. ## Verification @@ -92,10 +155,23 @@ with ADR-005's treatment of run-time-only state. - A deterministic-port collision forces the ephemeral-`0` fallback, and the published file reflects the *actually* bound port (not the deterministic guess). -- The resolver prefers the published port over stale config and fails soft on - missing/corrupt/out-of-range content (twin of the `filigree_url` resolver - tests). -- The published file is removed on clean shutdown; a consumer reading a stale - file degrades rather than erroring. +- File contract: published content is a bare port (optional trailing newline), + written via temp + rename; a non-loopback bind publishes **no** file. +- Precedence: an explicit `--loomweave-url`/env target overrides the published + file; the published file overrides a stale/default configured URL; absent file + falls through to config, then none. +- Fail-soft: missing / non-integer / out-of-range (`0`, `>65535`) content, and a + **resolved-but-refused** connection (stale file, closed port), each degrade to + the next precedence level rather than erroring. +- The published file is removed on clean shutdown. - A wardline scan against a project whose loomweave serve is running on a non-9111 port resolves and writes taint successfully (no `PROJECT_MISMATCH`). + +## Related follow-up (not blocking this ADR) + +Consume-time live-port resolution should apply to **both** sibling directions. +Wardline reads `.filigree/ephemeral.port` only at install time and uses the +static config URL at scan time, so its filigree leg carries the same latent +staleness this ADR removes for the loomweave leg. Unifying both consumers on +consume-time resolution is Wardline-side work, tracked separately; flagged here +so the two legs are not designed divergently. diff --git a/docs/loomweave/adr/README.md b/docs/loomweave/adr/README.md index b18cd28f..8421b4fb 100644 --- a/docs/loomweave/adr/README.md +++ b/docs/loomweave/adr/README.md @@ -44,7 +44,7 @@ This folder is the canonical home for authored Loomweave architecture decision r | [ADR-041](./ADR-041-resume-is-idempotent-reemit.md) | Analyze resume is idempotent re-emit, not checkpoint recovery; amends ADR-005/ADR-011 resume language | Accepted | | [ADR-042](./ADR-042-hmac-freshness-and-replay-window.md) | HMAC freshness and replay window — timestamp + nonce headers, crate-backed HMAC, process-local replay cache | Accepted | | [ADR-043](./ADR-043-edge-reanalysis-replacement.md) | Edge reanalysis replacement — per-source-file anchored-edge replacement and edge metadata upsert; amends ADR-026 | Accepted | -| [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) | Read-API ephemeral port publication — per-project deterministic port + `.loomweave/ephemeral.port` + loomweave-side resolver (twin of `filigree_url`), installer stops pinning 9111; relates to ADR-034 | Proposed | +| [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) | Read-API ephemeral port publication — `.loomweave/ephemeral.port` as a normative cross-product file contract (loopback-only, port-only, atomic) + consume-time resolution precedence (explicit > file > config > none), per-project deterministic port, installer stops pinning 9111; relates to ADR-034 | Proposed | ## Backlog still tracked in the detailed design From a0731d45ebe2914477dcbaed137bab7afd45e692 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:09:25 +1000 Subject: [PATCH 04/27] =?UTF-8?q?feat(federation):=20loomweave=5Fport=20?= =?UTF-8?q?=E2=80=94=20deterministic=20read-API=20port=20+=20atomic=20publ?= =?UTF-8?q?ish=20(ADR-044)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 --- crates/loomweave-federation/Cargo.toml | 1 + crates/loomweave-federation/src/lib.rs | 1 + .../src/loomweave_port.rs | 193 ++++++++++++++++++ 3 files changed, 195 insertions(+) create mode 100644 crates/loomweave-federation/src/loomweave_port.rs diff --git a/crates/loomweave-federation/Cargo.toml b/crates/loomweave-federation/Cargo.toml index 27006e45..2ca76605 100644 --- a/crates/loomweave-federation/Cargo.toml +++ b/crates/loomweave-federation/Cargo.toml @@ -10,6 +10,7 @@ rust-version.workspace = true workspace = true [dependencies] +blake3.workspace = true loomweave-core = { path = "../loomweave-core", version = "1.0.0" } reqwest.workspace = true serde.workspace = true diff --git a/crates/loomweave-federation/src/lib.rs b/crates/loomweave-federation/src/lib.rs index 43993c83..bd0db468 100644 --- a/crates/loomweave-federation/src/lib.rs +++ b/crates/loomweave-federation/src/lib.rs @@ -3,4 +3,5 @@ pub mod config; pub mod filigree; pub mod filigree_url; +pub mod loomweave_port; pub mod scan_results; diff --git a/crates/loomweave-federation/src/loomweave_port.rs b/crates/loomweave-federation/src/loomweave_port.rs new file mode 100644 index 00000000..bc2dcf73 --- /dev/null +++ b/crates/loomweave-federation/src/loomweave_port.rs @@ -0,0 +1,193 @@ +//! Loomweave read-API ephemeral-port contract (ADR-044). +//! +//! The twin of Filigree's `.filigree/ephemeral.port` convention, applied to +//! Loomweave's own federation HTTP read API. `serve` binds a per-project +//! deterministic port (ephemeral `:0` fallback) and publishes the *actually +//! bound* port to `/.loomweave/ephemeral.port`. Cross-product +//! consumers (notably Wardline, which is Python) read this file; nobody +//! recomputes a peer's port. The deterministic band here is an implementation +//! detail, never part of the file contract. +//! +//! File contract (ADR-044, normative): a single plain-ASCII integer TCP port, +//! optional trailing `\n`, written atomically (temp + rename), present only +//! while `serve` holds a loopback bind. Host (`127.0.0.1`) and scheme (`http`) +//! are implied, sound only because publication is loopback-only. + +use std::path::{Path, PathBuf}; + +/// Base of Loomweave's deterministic read-API port band. Chosen to sit +/// **above** Filigree's `8400–9399` band so the two products never contend for +/// the same number. Internal only — never part of the cross-product file +/// contract (consumers read the published file, never recompute). +pub const PORT_BAND_BASE: u16 = 9400; +/// Width of the band: ports land in `[9400, 10400)` i.e. `9400..=10399`. +pub const PORT_BAND_SPAN: u16 = 1000; + +/// Canonical path of the published port file for a project root. +#[must_use] +pub fn published_port_path(project_root: &Path) -> PathBuf { + project_root.join(".loomweave").join("ephemeral.port") +} + +/// Deterministic-but-unpredictable read-API port for a project, derived from +/// the canonical project path. Stable across runs (so a consumer's static +/// config can match it) yet path-specific (so two projects differ). Mirrors +/// Filigree's `8400 + hash % 1000`, in a disjoint band, using Loomweave's own +/// hash (blake3, as for SEI). The bound port is published; this computation is +/// the producer's *starting guess*, not a value any consumer recomputes. +/// +/// # Panics +/// +/// Never in practice: the `expect` calls are on infallible arithmetic +/// (`blake3` always produces 32 bytes; `% 1000 < 1000` always fits `u16`). +#[must_use] +pub fn deterministic_port(project_root: &Path) -> u16 { + // Best-effort canonicalize so every caller (serve, install, doctor) agrees + // regardless of whether it pre-canonicalized; fall back to the path as-given. + let canonical = project_root + .canonicalize() + .unwrap_or_else(|_| project_root.to_path_buf()); + let bytes = canonical.to_string_lossy(); + let hash = blake3::hash(bytes.as_bytes()); + let head = u64::from_le_bytes( + hash.as_bytes()[..8] + .try_into() + .expect("blake3 digest is 32 bytes, so [..8] is 8 bytes"), + ); + let offset = u16::try_from(head % u64::from(PORT_BAND_SPAN)) + .expect("remainder of % 1000 is < 1000, which fits u16"); + PORT_BAND_BASE + offset +} + +/// Read and validate the published port. Any missing / non-integer / +/// out-of-range / zero content folds to `None` (fail-soft, ADR-044). A `u16` +/// parse already bounds `1..=65535` except `0`, which we reject explicitly. +#[must_use] +pub fn read_published_port(project_root: &Path) -> Option { + let raw = std::fs::read_to_string(published_port_path(project_root)).ok()?; + raw.trim().parse::().ok().filter(|port| *port != 0) +} + +/// Atomically publish `port` to `/.loomweave/ephemeral.port`. +/// Writes a temp file in the same directory and `rename(2)`s it into place, so +/// a concurrent reader never observes a torn value. Creates `.loomweave/` if +/// absent. The caller is responsible for the loopback-only invariant (only call +/// this when the bound address is loopback). +/// +/// # Errors +/// Returns the underlying I/O error if the directory cannot be created or the +/// temp file cannot be written/renamed. +pub fn publish_port(project_root: &Path, port: u16) -> std::io::Result<()> { + let dir = project_root.join(".loomweave"); + std::fs::create_dir_all(&dir)?; + // One `serve` per process publishes, so the PID makes the temp name unique + // within this directory without needing a random suffix. + let tmp = dir.join(format!("ephemeral.port.{}.tmp", std::process::id())); + std::fs::write(&tmp, format!("{port}\n"))?; + std::fs::rename(&tmp, dir.join("ephemeral.port"))?; + Ok(()) +} + +/// Best-effort removal of the published port file. A missing file is not an +/// error (idempotent). Called on clean shutdown; SIGKILL leaves a stale file, +/// which `read_published_port` validation + the ADR-034 instance-ID guard +/// handle (a stale file degrades, never corrupts). +pub fn remove_published_port(project_root: &Path) { + let _ = std::fs::remove_file(published_port_path(project_root)); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn deterministic_port_is_stable_and_in_band() { + let dir = tempfile::tempdir().unwrap(); + let a = deterministic_port(dir.path()); + let b = deterministic_port(dir.path()); + assert_eq!(a, b, "same path must yield the same port"); + assert!( + (PORT_BAND_BASE..PORT_BAND_BASE + PORT_BAND_SPAN).contains(&a), + "port {a} must land in the loomweave band [{PORT_BAND_BASE}, {})", + PORT_BAND_BASE + PORT_BAND_SPAN + ); + // Disjoint from Filigree's 8400-9399 band. + assert!( + a >= 9400, + "port {a} must not overlap Filigree's 8400-9399 band" + ); + } + + #[test] + fn deterministic_port_differs_by_path() { + let a = tempfile::tempdir().unwrap(); + let b = tempfile::tempdir().unwrap(); + // Distinct tempdirs almost always hash to distinct ports; assert the + // function is path-sensitive by checking the inputs differ and the + // computation is a pure function of the (canonical) path. + assert_ne!(a.path(), b.path()); + let pa = deterministic_port(a.path()); + let pb = deterministic_port(b.path()); + // Not guaranteed distinct (1/1000 collision), but the band membership + // and determinism are what matter; assert both are in-band. + assert!(pa >= 9400 && pb >= 9400); + } + + #[test] + fn publish_then_read_round_trips() { + let dir = tempfile::tempdir().unwrap(); + publish_port(dir.path(), 9412).expect("publish"); + assert_eq!(read_published_port(dir.path()), Some(9412)); + // Published content is the bare port plus a single trailing newline. + let raw = std::fs::read_to_string(published_port_path(dir.path())).unwrap(); + assert_eq!(raw, "9412\n"); + } + + #[test] + fn publish_creates_loomweave_dir_if_absent() { + let dir = tempfile::tempdir().unwrap(); + // No .loomweave/ yet. + assert!(!dir.path().join(".loomweave").exists()); + publish_port(dir.path(), 10000).expect("publish creates .loomweave/"); + assert_eq!(read_published_port(dir.path()), Some(10000)); + } + + #[test] + fn read_tolerates_trailing_whitespace_and_newline() { + let dir = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + std::fs::write(published_port_path(dir.path()), " 9500 \n").unwrap(); + assert_eq!(read_published_port(dir.path()), Some(9500)); + } + + #[test] + fn read_rejects_malformed_zero_and_out_of_range() { + let dir = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + for bad in ["", "not-a-port", "0", "65536", "70000", "-1", "12.5"] { + std::fs::write(published_port_path(dir.path()), bad).unwrap(); + assert_eq!( + read_published_port(dir.path()), + None, + "malformed/out-of-range content {bad:?} must fold to None (fail-soft)" + ); + } + } + + #[test] + fn read_absent_file_is_none() { + let dir = tempfile::tempdir().unwrap(); + assert_eq!(read_published_port(dir.path()), None); + } + + #[test] + fn remove_is_idempotent_and_clears_the_file() { + let dir = tempfile::tempdir().unwrap(); + publish_port(dir.path(), 9999).unwrap(); + assert!(published_port_path(dir.path()).exists()); + remove_published_port(dir.path()); + assert!(!published_port_path(dir.path()).exists()); + // Second remove on an absent file is a no-op, not an error. + remove_published_port(dir.path()); + } +} From adc312218da2cd1da2f722ee436c5435099c558a Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:13:38 +1000 Subject: [PATCH 05/27] refactor(federation): loomweave_port publish uses canonical path + cleans temp on rename failure Co-Authored-By: Claude Sonnet 4.6 --- crates/loomweave-federation/src/loomweave_port.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/loomweave-federation/src/loomweave_port.rs b/crates/loomweave-federation/src/loomweave_port.rs index bc2dcf73..dfbb186b 100644 --- a/crates/loomweave-federation/src/loomweave_port.rs +++ b/crates/loomweave-federation/src/loomweave_port.rs @@ -20,7 +20,7 @@ use std::path::{Path, PathBuf}; /// the same number. Internal only — never part of the cross-product file /// contract (consumers read the published file, never recompute). pub const PORT_BAND_BASE: u16 = 9400; -/// Width of the band: ports land in `[9400, 10400)` i.e. `9400..=10399`. +/// Width of the band: ports land in `[PORT_BAND_BASE, PORT_BAND_BASE + PORT_BAND_SPAN)`. pub const PORT_BAND_SPAN: u16 = 1000; /// Canonical path of the published port file for a project root. @@ -84,7 +84,11 @@ pub fn publish_port(project_root: &Path, port: u16) -> std::io::Result<()> { // within this directory without needing a random suffix. let tmp = dir.join(format!("ephemeral.port.{}.tmp", std::process::id())); std::fs::write(&tmp, format!("{port}\n"))?; - std::fs::rename(&tmp, dir.join("ephemeral.port"))?; + if let Err(err) = std::fs::rename(&tmp, published_port_path(project_root)) { + // A successful write + failed rename would otherwise strand the temp. + let _ = std::fs::remove_file(&tmp); + return Err(err); + } Ok(()) } From 69e1ff5252866b97c7128c5c4ecf9d68ae7447a0 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:21:19 +1000 Subject: [PATCH 06/27] feat(config): serve.http.bind is Option; None auto-selects per-project port (ADR-044) Co-Authored-By: Claude Sonnet 4.6 --- Cargo.lock | 1 + crates/loomweave-cli/src/http_read.rs | 20 ++++-- crates/loomweave-federation/src/config.rs | 78 +++++++++++++++++++---- 3 files changed, 84 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 508f3d12..c6cb083e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1125,6 +1125,7 @@ dependencies = [ name = "loomweave-federation" version = "1.0.0" dependencies = [ + "blake3", "loomweave-core", "reqwest", "serde", diff --git a/crates/loomweave-cli/src/http_read.rs b/crates/loomweave-cli/src/http_read.rs index 41b4c16f..fe8ff9c4 100644 --- a/crates/loomweave-cli/src/http_read.rs +++ b/crates/loomweave-cli/src/http_read.rs @@ -244,7 +244,16 @@ where .map(|raw| raw.trim().to_owned()) .filter(|trimmed| !trimmed.is_empty()) .map(Arc::new); - let bind = config.bind; + // ADR-044: an unset bind means auto-select a per-project deterministic + // read-API port. An explicit bind is honored verbatim. (Task 3 adds the + // ephemeral fallback + published-file lifecycle.) + let auto_port = config.bind.is_none(); + let bind = config.bind.unwrap_or_else(|| { + std::net::SocketAddr::from(( + [127, 0, 0, 1], + loomweave_federation::loomweave_port::deterministic_port(&project_root), + )) + }); let warn_unauthenticated_non_loopback = config.allow_non_loopback && !config.is_loopback_bind() && auth_token.is_none() @@ -272,6 +281,7 @@ where auth_token_thread, identity_secret_thread, bind, + auto_port, shutdown_rx, ready_tx, ); @@ -327,6 +337,8 @@ fn run_http_read_server( auth_token: Option>, identity_secret: Option>, bind: std::net::SocketAddr, + // ADR-044 Task 3 will consume this to drive the ephemeral fallback + publish. + _auto_port: bool, shutdown_rx: oneshot::Receiver<()>, ready_tx: mpsc::Sender>, ) -> Result<()> { @@ -836,7 +848,7 @@ mod tests { let config = HttpReadConfig { enabled: true, - bind, + bind: Some(bind), allow_non_loopback: false, token_env: "LOOMWEAVE_LOOPBACK_NO_TOKEN_TEST_UNSET".to_owned(), identity_token_env: None, @@ -907,7 +919,7 @@ mod tests { let config = HttpReadConfig { enabled: true, - bind, + bind: Some(bind), allow_non_loopback: false, wardline_taint_write: true, ..HttpReadConfig::default() @@ -963,7 +975,7 @@ mod tests { let config = HttpReadConfig { enabled: true, - bind, + bind: Some(bind), allow_non_loopback: false, ..HttpReadConfig::default() }; diff --git a/crates/loomweave-federation/src/config.rs b/crates/loomweave-federation/src/config.rs index e3b790a5..670e07bc 100644 --- a/crates/loomweave-federation/src/config.rs +++ b/crates/loomweave-federation/src/config.rs @@ -295,8 +295,8 @@ pub struct McpServeConfig { #[serde(default)] pub struct HttpReadConfig { pub enabled: bool, - #[serde(deserialize_with = "deserialize_socket_addr")] - pub bind: SocketAddr, + #[serde(default, deserialize_with = "deserialize_optional_socket_addr")] + pub bind: Option, pub allow_non_loopback: bool, /// Name of the env var holding the inbound bearer token. When the env /// var is set, every `/api/v1/files`-family request must carry @@ -323,7 +323,7 @@ impl Default for HttpReadConfig { fn default() -> Self { Self { enabled: false, - bind: SocketAddr::from(([127, 0, 0, 1], 9111)), + bind: None, allow_non_loopback: false, token_env: "WEFT_TOKEN".to_owned(), identity_token_env: None, @@ -333,11 +333,19 @@ impl Default for HttpReadConfig { } impl HttpReadConfig { + /// # Panics + /// + /// This function cannot panic in practice: the `.expect` is only reached + /// when `is_loopback_bind()` is `false`, which only occurs when + /// `self.bind` is `Some(non-loopback addr)`. pub fn validate_loopback_trust(&self) -> Result<(), ConfigError> { if self.enabled && !self.allow_non_loopback && !self.is_loopback_bind() { return Err(ConfigError::NonLoopbackHttpBind { code: "LMWV-CONFIG-HTTP-NON-LOOPBACK", - bind: self.bind, + // Safe: is_loopback_bind() is false only when bind is Some(non-loopback). + bind: self + .bind + .expect("non-loopback bind implies an explicit address"), }); } Ok(()) @@ -347,6 +355,12 @@ impl HttpReadConfig { /// token env var is unset. Loopback binds with the env var unset stay /// unauthenticated (v0.1 trust matrix); the failure case is the explicit /// `allow_non_loopback: true` opt-in plus an unset `token_env`. + /// + /// # Panics + /// + /// This function cannot panic in practice: the `.expect` is only reached + /// when `is_loopback_bind()` is `false`, which only occurs when + /// `self.bind` is `Some(non-loopback addr)`. pub fn validate_auth_trust(&self, env_lookup: F) -> Result<(), ConfigError> where F: Fn(&str) -> Option, @@ -383,24 +397,31 @@ impl HttpReadConfig { } Err(ConfigError::NonLoopbackHttpNoAuth { code: "LMWV-CONFIG-HTTP-NO-AUTH", - bind: self.bind, + bind: self + .bind + .expect("non-loopback bind implies an explicit address"), token_env: self.token_env.clone(), }) } + /// `None` (auto-select) always binds `127.0.0.1`, so it is loopback. #[must_use] pub fn is_loopback_bind(&self) -> bool { - self.bind.ip().is_loopback() + self.bind.is_none_or(|addr| addr.ip().is_loopback()) } } -fn deserialize_socket_addr<'de, D>(deserializer: D) -> Result +fn deserialize_optional_socket_addr<'de, D>(deserializer: D) -> Result, D::Error> where D: serde::Deserializer<'de>, { - let raw = String::deserialize(deserializer)?; - raw.parse() - .map_err(|err| serde::de::Error::custom(format!("invalid serve.http.bind {raw:?}: {err}"))) + let raw = Option::::deserialize(deserializer)?; + match raw { + None => Ok(None), + Some(raw) => raw.parse().map(Some).map_err(|err| { + serde::de::Error::custom(format!("invalid serve.http.bind {raw:?}: {err}")) + }), + } } #[derive(Debug, Clone, PartialEq, Deserialize)] @@ -899,7 +920,10 @@ serve: ) .expect("parse HTTP bind"); - assert_eq!(cfg.serve.http.bind, SocketAddr::from(([127, 0, 0, 1], 0))); + assert_eq!( + cfg.serve.http.bind, + Some(SocketAddr::from(([127, 0, 0, 1], 0))) + ); } #[test] @@ -1067,6 +1091,38 @@ serve: ); } + #[test] + fn http_bind_defaults_to_none_auto_select() { + // ADR-044: the installer no longer pins a port; an unset bind means + // "auto-select a per-project deterministic port and publish it". + assert_eq!(HttpReadConfig::default().bind, None); + } + + #[test] + fn http_bind_none_is_treated_as_loopback() { + // Auto-select always binds 127.0.0.1, so an absent bind is loopback and + // must satisfy the loopback-trust gate without allow_non_loopback. + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + assert!(cfg.is_loopback_bind()); + assert!(cfg.validate_loopback_trust().is_ok()); + } + + #[test] + fn http_explicit_bind_still_parses() { + let cfg = McpConfig::from_yaml_str( + "serve:\n http:\n enabled: true\n bind: \"127.0.0.1:9412\"\n", + ) + .expect("parse explicit bind"); + assert_eq!( + cfg.serve.http.bind, + Some(SocketAddr::from(([127, 0, 0, 1], 9412))) + ); + } + #[test] fn old_anthropic_provider_shape_reports_deprecated_provider() { let err = McpConfig::from_yaml_str( From 02a7a9089e21444c7e2d934d616182606a2d12f4 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:33:50 +1000 Subject: [PATCH 07/27] refactor(config): pattern-match HttpReadConfig bind instead of expect; drop misleading # Panics docs (ADR-044) Replace the two .expect() unwraps in validate_loopback_trust and validate_auth_trust with compiler-enforced pattern matching, and delete the # Panics doc sections (a # Panics heading documenting when a method will *not* panic inverts the rustdoc convention). Behavior is identical. Also add a field doc comment to HttpReadConfig.bind and two tests: the auth-trust None path and explicit YAML-null bind parsing. Co-Authored-By: Claude Opus 4.8 --- crates/loomweave-federation/src/config.rs | 55 ++++++++++++++--------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/crates/loomweave-federation/src/config.rs b/crates/loomweave-federation/src/config.rs index 670e07bc..804e35ec 100644 --- a/crates/loomweave-federation/src/config.rs +++ b/crates/loomweave-federation/src/config.rs @@ -295,6 +295,9 @@ pub struct McpServeConfig { #[serde(default)] pub struct HttpReadConfig { pub enabled: bool, + /// Bind address for the HTTP read API. `None` (the default) auto-selects a + /// per-project deterministic port on `127.0.0.1` (ADR-044). `Some(addr)` is + /// honored verbatim (operator override). #[serde(default, deserialize_with = "deserialize_optional_socket_addr")] pub bind: Option, pub allow_non_loopback: bool, @@ -333,20 +336,15 @@ impl Default for HttpReadConfig { } impl HttpReadConfig { - /// # Panics - /// - /// This function cannot panic in practice: the `.expect` is only reached - /// when `is_loopback_bind()` is `false`, which only occurs when - /// `self.bind` is `Some(non-loopback addr)`. pub fn validate_loopback_trust(&self) -> Result<(), ConfigError> { if self.enabled && !self.allow_non_loopback && !self.is_loopback_bind() { - return Err(ConfigError::NonLoopbackHttpBind { - code: "LMWV-CONFIG-HTTP-NON-LOOPBACK", - // Safe: is_loopback_bind() is false only when bind is Some(non-loopback). - bind: self - .bind - .expect("non-loopback bind implies an explicit address"), - }); + // is_loopback_bind() is true for None, so reaching here implies Some(non-loopback). + if let Some(bind) = self.bind { + return Err(ConfigError::NonLoopbackHttpBind { + code: "LMWV-CONFIG-HTTP-NON-LOOPBACK", + bind, + }); + } } Ok(()) } @@ -355,12 +353,6 @@ impl HttpReadConfig { /// token env var is unset. Loopback binds with the env var unset stay /// unauthenticated (v0.1 trust matrix); the failure case is the explicit /// `allow_non_loopback: true` opt-in plus an unset `token_env`. - /// - /// # Panics - /// - /// This function cannot panic in practice: the `.expect` is only reached - /// when `is_loopback_bind()` is `false`, which only occurs when - /// `self.bind` is `Some(non-loopback addr)`. pub fn validate_auth_trust(&self, env_lookup: F) -> Result<(), ConfigError> where F: Fn(&str) -> Option, @@ -383,7 +375,11 @@ impl HttpReadConfig { } None => false, }; - if self.is_loopback_bind() { + // None (auto-select) always binds 127.0.0.1, so it is loopback. + let Some(bind_addr) = self.bind else { + return Ok(()); + }; + if bind_addr.ip().is_loopback() { return Ok(()); } if has_identity_secret { @@ -397,9 +393,7 @@ impl HttpReadConfig { } Err(ConfigError::NonLoopbackHttpNoAuth { code: "LMWV-CONFIG-HTTP-NO-AUTH", - bind: self - .bind - .expect("non-loopback bind implies an explicit address"), + bind: bind_addr, token_env: self.token_env.clone(), }) } @@ -1123,6 +1117,23 @@ serve: ); } + #[test] + fn http_bind_none_passes_auth_trust_validation() { + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + assert!(cfg.validate_auth_trust(|_| None).is_ok()); + } + + #[test] + fn http_bind_explicit_null_is_treated_as_auto_select() { + let cfg = McpConfig::from_yaml_str("serve:\n http:\n enabled: true\n bind: ~\n") + .expect("explicit YAML null should parse as auto-select"); + assert_eq!(cfg.serve.http.bind, None); + } + #[test] fn old_anthropic_provider_shape_reports_deprecated_provider() { let err = McpConfig::from_yaml_str( From ba8233a6483056f9c85c88596c82fe876f791d7b Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:38:10 +1000 Subject: [PATCH 08/27] feat(serve): auto-select read-API port with ephemeral fallback; publish .loomweave/ephemeral.port (ADR-044) Co-Authored-By: Claude Opus 4.8 --- crates/loomweave-cli/src/http_read.rs | 167 +++++++++++++++++++++++++- 1 file changed, 165 insertions(+), 2 deletions(-) diff --git a/crates/loomweave-cli/src/http_read.rs b/crates/loomweave-cli/src/http_read.rs index fe8ff9c4..252259b1 100644 --- a/crates/loomweave-cli/src/http_read.rs +++ b/crates/loomweave-cli/src/http_read.rs @@ -55,6 +55,20 @@ static HTTP_ERROR_DISPATCH: LazyLock = LazyLock::new(|| { tracing::Dispatch::new(subscriber) }); +/// Removes the published `.loomweave/ephemeral.port` on drop — covering +/// graceful shutdown, error return, and panic-unwind in one place. Only +/// SIGKILL can strand a stale file, which the read-side validation and the +/// ADR-034 instance-ID guard tolerate (a stale file degrades, never corrupts). +struct PublishedPortGuard { + project_root: PathBuf, +} + +impl Drop for PublishedPortGuard { + fn drop(&mut self) { + loomweave_federation::loomweave_port::remove_published_port(&self.project_root); + } +} + #[derive(Debug)] pub struct HttpReadServer { shutdown: Option>, @@ -337,8 +351,9 @@ fn run_http_read_server( auth_token: Option>, identity_secret: Option>, bind: std::net::SocketAddr, - // ADR-044 Task 3 will consume this to drive the ephemeral fallback + publish. - _auto_port: bool, + // ADR-044: when true (bind auto-selected), an `AddrInUse` falls back to an + // OS-assigned ephemeral port; an explicit operator bind never falls back. + auto_port: bool, shutdown_rx: oneshot::Receiver<()>, ready_tx: mpsc::Sender>, ) -> Result<()> { @@ -349,8 +364,22 @@ fn run_http_read_server( let readers_identity = readers.identity().clone(); let runtime = build_http_runtime()?; runtime.block_on(async move { + // ADR-044: auto-selected ports fall back to an OS-assigned ephemeral + // port if the deterministic port is taken; an explicit operator bind + // does NOT fall back (a taken explicit port is a hard error). let listener = match tokio::net::TcpListener::bind(bind).await { Ok(listener) => listener, + Err(err) if auto_port && err.kind() == std::io::ErrorKind::AddrInUse => { + let fallback = std::net::SocketAddr::from(([127, 0, 0, 1], 0)); + match tokio::net::TcpListener::bind(fallback).await { + Ok(listener) => listener, + Err(err) => { + let _ = ready_tx + .send(Err(anyhow!("bind HTTP read API ephemeral fallback: {err}"))); + return Err(anyhow!("bind HTTP read API ephemeral fallback: {err}")); + } + } + } Err(err) => { let _ = ready_tx.send(Err(anyhow!("bind HTTP read API on {bind}: {err}"))); return Err(anyhow!("bind HTTP read API on {bind}: {err}")); @@ -363,6 +392,29 @@ fn run_http_read_server( return Err(anyhow!("read HTTP read API local addr: {err}")); } }; + // Publish the ACTUALLY-bound port loopback-only (ADR-044 file contract). + // A non-loopback bind publishes NO file — consumers fall back to their + // configured URL. The guard unlinks the file when this scope unwinds. + let _published_port_guard = if local_addr.ip().is_loopback() { + if let Err(err) = + loomweave_federation::loomweave_port::publish_port(&project_root, local_addr.port()) + { + // Publication is best-effort enrichment: a failure to write the + // discovery file must not take the read API down. + tracing::warn!( + error = %err, + port = local_addr.port(), + "failed to publish .loomweave/ephemeral.port; consumers will fall back to configured URL" + ); + None + } else { + Some(PublishedPortGuard { + project_root: project_root.clone(), + }) + } + } else { + None + }; let _ = ready_tx.send(Ok(HttpReadReady { local_addr, readers_identity, @@ -1018,6 +1070,117 @@ mod tests { ); } + /// ADR-044: with `bind: None`, two serves on distinct project paths each + /// bind their own deterministic port and publish their own + /// `.loomweave/ephemeral.port`. Neither fails to bind. + #[test] + fn auto_port_publishes_distinct_ports_per_project() { + use loomweave_federation::config::HttpReadConfig; + use loomweave_federation::loomweave_port::read_published_port; + use loomweave_storage::ReaderPool; + + let _guard = http_runtime_test_guard(); + + let make = |id: &str| { + let dir = tempfile::tempdir().expect("tempdir"); + let db = dir.path().join("loomweave.db"); + let readers = ReaderPool::open(&db, 4).expect("reader pool"); + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + let iid = crate::instance::parse_instance_id_for_test(id).expect("iid"); + let server = spawn(dir.path().to_path_buf(), db, readers, iid, &cfg) + .expect("spawn") + .expect("enabled => Some"); + (dir, server) + }; + + let (dir_a, server_a) = make("00000000-0000-4000-8000-0000000000a1"); + let (dir_b, server_b) = make("00000000-0000-4000-8000-0000000000a2"); + + let port_a = read_published_port(dir_a.path()).expect("a published a port"); + let port_b = read_published_port(dir_b.path()).expect("b published a port"); + assert!( + port_a >= 9400 && port_b >= 9400, + "ports in the loomweave band" + ); + // Two live servers => two live ports => they cannot be equal. + assert_ne!(port_a, port_b, "concurrent serves must hold distinct ports"); + + server_a.shutdown().expect("shutdown a"); + server_b.shutdown().expect("shutdown b"); + } + + /// The published file is removed on clean shutdown. + #[test] + fn auto_port_file_removed_on_clean_shutdown() { + use loomweave_federation::config::HttpReadConfig; + use loomweave_federation::loomweave_port::{published_port_path, read_published_port}; + use loomweave_storage::ReaderPool; + + let _guard = http_runtime_test_guard(); + + let dir = tempfile::tempdir().expect("tempdir"); + let db = dir.path().join("loomweave.db"); + let readers = ReaderPool::open(&db, 4).expect("reader pool"); + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + let iid = + crate::instance::parse_instance_id_for_test("00000000-0000-4000-8000-0000000000a3") + .expect("iid"); + let server = spawn(dir.path().to_path_buf(), db, readers, iid, &cfg) + .expect("spawn") + .expect("enabled => Some"); + + assert!( + read_published_port(dir.path()).is_some(), + "published while serving" + ); + server.shutdown().expect("shutdown"); + assert!( + !published_port_path(dir.path()).exists(), + "published port file must be gone after clean shutdown" + ); + } + + /// An explicit (operator-set) bind that is already in use is a HARD error — + /// the operator asked for that specific port. Only auto-select falls back. + #[test] + fn explicit_bind_in_use_is_a_hard_error() { + use loomweave_federation::config::HttpReadConfig; + use loomweave_storage::ReaderPool; + use std::net::{SocketAddr, TcpListener}; + + let _guard = http_runtime_test_guard(); + + // Hold a real listener so the address is genuinely occupied. + let held = TcpListener::bind(("127.0.0.1", 0)).expect("hold a port"); + let bind: SocketAddr = held.local_addr().expect("addr"); + + let dir = tempfile::tempdir().expect("tempdir"); + let db = dir.path().join("loomweave.db"); + let readers = ReaderPool::open(&db, 4).expect("reader pool"); + let cfg = HttpReadConfig { + enabled: true, + bind: Some(bind), + ..HttpReadConfig::default() + }; + let iid = + crate::instance::parse_instance_id_for_test("00000000-0000-4000-8000-0000000000a4") + .expect("iid"); + + let result = spawn(dir.path().to_path_buf(), db, readers, iid, &cfg); + assert!( + result.is_err(), + "an explicit in-use bind must fail, not silently fall back to :0" + ); + } + // ---------------------------------------------------------------------- // W.3 taint-fact READ endpoints (GET + :batch-get). // ---------------------------------------------------------------------- From 0b4df33e512f15b6bf9c29cd3052be309d2eedba Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:47:31 +1000 Subject: [PATCH 09/27] test(serve): cover ephemeral fallback when deterministic read-API port is taken (ADR-044) Co-Authored-By: Claude Opus 4.8 --- crates/loomweave-cli/src/http_read.rs | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/crates/loomweave-cli/src/http_read.rs b/crates/loomweave-cli/src/http_read.rs index 252259b1..3cdf4e7c 100644 --- a/crates/loomweave-cli/src/http_read.rs +++ b/crates/loomweave-cli/src/http_read.rs @@ -1181,6 +1181,46 @@ mod tests { ); } + /// The headline ADR-044 behavior: when the AUTO-selected deterministic port + /// is already taken, serve falls back to an OS-assigned ephemeral port and + /// publishes the *actually* bound port (not the deterministic guess). + #[test] + fn auto_port_falls_back_to_ephemeral_when_deterministic_taken() { + use loomweave_federation::config::HttpReadConfig; + use loomweave_federation::loomweave_port::{deterministic_port, read_published_port}; + use loomweave_storage::ReaderPool; + use std::net::TcpListener; + + let _guard = http_runtime_test_guard(); + + let dir = tempfile::tempdir().expect("tempdir"); + // Occupy this project's deterministic port so the auto bind must fall back. + let det = deterministic_port(dir.path()); + let _held = TcpListener::bind(("127.0.0.1", det)).expect("hold deterministic port"); + + let db = dir.path().join("loomweave.db"); + let readers = ReaderPool::open(&db, 4).expect("reader pool"); + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + let iid = + crate::instance::parse_instance_id_for_test("00000000-0000-4000-8000-0000000000a5") + .expect("iid"); + + let server = spawn(dir.path().to_path_buf(), db, readers, iid, &cfg) + .expect("spawn must succeed via ephemeral fallback") + .expect("enabled => Some"); + + let published = read_published_port(dir.path()).expect("published a port"); + assert_ne!( + published, det, + "fallback must publish the ephemeral port actually bound, not the taken deterministic one" + ); + server.shutdown().expect("shutdown"); + } + // ---------------------------------------------------------------------- // W.3 taint-fact READ endpoints (GET + :batch-get). // ---------------------------------------------------------------------- From c102297950f642d186008c4bbf70f49a08c489e3 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:50:11 +1000 Subject: [PATCH 10/27] feat(install): YAML stub no longer pins serve.http.bind 9111 (ADR-044) Co-Authored-By: Claude Sonnet 4.6 --- crates/loomweave-cli/src/install.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/loomweave-cli/src/install.rs b/crates/loomweave-cli/src/install.rs index 7a1ff576..626654c6 100644 --- a/crates/loomweave-cli/src/install.rs +++ b/crates/loomweave-cli/src/install.rs @@ -75,7 +75,9 @@ serve: enable_write_tools: false http: enabled: false - bind: 127.0.0.1:9111 + # The read-API port is auto-selected per project (deterministic, with an + # ephemeral fallback) and published to .loomweave/ephemeral.port while + # serving. Set `bind:` explicitly only to pin a fixed port (ADR-044). "; const GITIGNORE_CONTENTS: &str = "\ From 7cf778288dfab6b0795803d8e65dad4ced47406c Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:54:11 +1000 Subject: [PATCH 11/27] feat(install): integration bindings use per-project deterministic loomweave URL, no fixed bind (ADR-044) Co-Authored-By: Claude Opus 4.8 --- .../loomweave-cli/src/integration_bindings.rs | 21 ++++++++++++------- crates/loomweave-cli/tests/doctor.rs | 7 ++++++- crates/loomweave-cli/tests/install.rs | 12 ++++++++--- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/crates/loomweave-cli/src/integration_bindings.rs b/crates/loomweave-cli/src/integration_bindings.rs index 13825548..86008f4a 100644 --- a/crates/loomweave-cli/src/integration_bindings.rs +++ b/crates/loomweave-cli/src/integration_bindings.rs @@ -12,8 +12,6 @@ use std::path::{Path, PathBuf}; use anyhow::{Context, Result, bail}; use serde_json::{Map, Value, json}; -const LOOMWEAVE_HTTP_BIND: &str = "127.0.0.1:9111"; -const LOOMWEAVE_HTTP_URL: &str = "http://127.0.0.1:9111"; const DEFAULT_FILIGREE_BASE_URL: &str = "http://127.0.0.1:8766"; #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -23,9 +21,13 @@ pub enum BindingState { Unparseable, } +// All three fields are URLs by nature; the `_url` suffix is the meaningful part +// of each name, not redundant noise. +#[allow(clippy::struct_field_names)] struct DesiredBindings { filigree_base_url: String, wardline_filigree_url: String, + loomweave_url: String, } /// Classify the local three-way integration binding files without writing. @@ -67,9 +69,16 @@ fn desired_bindings(project_root: &Path) -> DesiredBindings { "{}/api/weft/scan-results", filigree_base_url.trim_end_matches('/') ); + // ADR-044: seed the consumer's static target with this project's + // deterministic read-API port. serve binds the same port (barring an + // ephemeral fallback), and the published .loomweave/ephemeral.port file + // overrides this at runtime once a consumer resolves consume-time. + let port = loomweave_federation::loomweave_port::deterministic_port(project_root); + let loomweave_url = format!("http://127.0.0.1:{port}"); DesiredBindings { filigree_base_url, wardline_filigree_url, + loomweave_url, } } @@ -114,7 +123,6 @@ fn loomweave_yaml_ok(project_root: &Path, desired: &DesiredBindings) -> Result Result Res let serve = ensure_object(root, "serve")?; let http = ensure_object(serve, "http")?; http.insert("enabled".to_owned(), json!(true)); - http.insert("bind".to_owned(), json!(LOOMWEAVE_HTTP_BIND)); http.insert("wardline_taint_write".to_owned(), json!(true)); write_yaml_if_changed(&path, &value) } @@ -191,7 +198,7 @@ fn install_wardline_yaml(project_root: &Path, desired: &DesiredBindings) -> Resu let mut value = read_yaml_value_or_empty(&path)?; let root = object_mut(&mut value, &path)?; let loomweave = ensure_object(root, "loomweave")?; - loomweave.insert("url".to_owned(), json!(LOOMWEAVE_HTTP_URL)); + loomweave.insert("url".to_owned(), json!(desired.loomweave_url)); let filigree = ensure_object(root, "filigree")?; filigree.insert("url".to_owned(), json!(desired.wardline_filigree_url)); write_yaml_if_changed(&path, &value) @@ -246,7 +253,7 @@ fn desired_wardline_args(desired: &DesiredBindings) -> Value { "--root", ".", "--loomweave-url", - LOOMWEAVE_HTTP_URL, + desired.loomweave_url, "--filigree-url", desired.wardline_filigree_url ]) diff --git a/crates/loomweave-cli/tests/doctor.rs b/crates/loomweave-cli/tests/doctor.rs index 4fff01d9..c848e4b5 100644 --- a/crates/loomweave-cli/tests/doctor.rs +++ b/crates/loomweave-cli/tests/doctor.rs @@ -211,6 +211,11 @@ fn doctor_fix_repairs_missing_three_way_integration_bindings() { "http://127.0.0.1:8749/api/weft/scan-results" ); + let expected_port = loomweave_federation::loomweave_port::deterministic_port( + &dir.path().canonicalize().unwrap(), + ); + let expected_loomweave_url = format!("http://127.0.0.1:{expected_port}"); + let mcp: serde_json::Value = serde_json::from_str(&fs::read_to_string(dir.path().join(".mcp.json")).unwrap()).unwrap(); assert_eq!( @@ -220,7 +225,7 @@ fn doctor_fix_repairs_missing_three_way_integration_bindings() { "--root", ".", "--loomweave-url", - "http://127.0.0.1:9111", + expected_loomweave_url, "--filigree-url", "http://127.0.0.1:8749/api/weft/scan-results" ]) diff --git a/crates/loomweave-cli/tests/install.rs b/crates/loomweave-cli/tests/install.rs index dce89f54..ca4f9a6b 100644 --- a/crates/loomweave-cli/tests/install.rs +++ b/crates/loomweave-cli/tests/install.rs @@ -92,14 +92,20 @@ fn install_all_wires_three_way_integration_bindings() { loomweave_yaml["serve"]["http"]["enabled"], serde_json::json!(true) ); - assert_eq!(loomweave_yaml["serve"]["http"]["bind"], "127.0.0.1:9111"); + // ADR-044: no fixed bind is written; the port is auto-selected at serve time. + assert!(loomweave_yaml["serve"]["http"].get("bind").is_none()); assert_eq!( loomweave_yaml["serve"]["http"]["wardline_taint_write"], serde_json::json!(true) ); + let expected_port = loomweave_federation::loomweave_port::deterministic_port( + &dir.path().canonicalize().unwrap(), + ); + let expected_loomweave_url = format!("http://127.0.0.1:{expected_port}"); + let wardline_yaml = read_yaml(&dir.path().join("wardline.yaml")); - assert_eq!(wardline_yaml["loomweave"]["url"], "http://127.0.0.1:9111"); + assert_eq!(wardline_yaml["loomweave"]["url"], expected_loomweave_url); assert_eq!( wardline_yaml["filigree"]["url"], "http://127.0.0.1:8749/api/weft/scan-results" @@ -114,7 +120,7 @@ fn install_all_wires_three_way_integration_bindings() { "--root", ".", "--loomweave-url", - "http://127.0.0.1:9111", + expected_loomweave_url, "--filigree-url", "http://127.0.0.1:8749/api/weft/scan-results" ]) From efd2285c9ae6f7cd06f7e0cdd5167e61bdf4e74d Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:03:18 +1000 Subject: [PATCH 12/27] fix(install): strip stale auto-stamped serve.http.bind 127.0.0.1:9111 on repair (ADR-044) Older `install --all` runs unconditionally stamped a fixed bind. Task 5 stopped writing it but left existing stamps in place, so re-install kept `bind: 9111`, serve honored it verbatim (no auto-port), and the collision returned invisibly (loomweave_yaml_ok no longer inspected bind). Strip exactly the old auto-default literal on repair and treat its presence as not-ok so doctor/binding_state flags and fixes it. Operator-chosen binds (any other value) are preserved. Co-Authored-By: Claude Opus 4.8 --- .../loomweave-cli/src/integration_bindings.rs | 14 ++++++ crates/loomweave-cli/tests/install.rs | 43 +++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/crates/loomweave-cli/src/integration_bindings.rs b/crates/loomweave-cli/src/integration_bindings.rs index 86008f4a..0a232ff4 100644 --- a/crates/loomweave-cli/src/integration_bindings.rs +++ b/crates/loomweave-cli/src/integration_bindings.rs @@ -14,6 +14,13 @@ use serde_json::{Map, Value, json}; const DEFAULT_FILIGREE_BASE_URL: &str = "http://127.0.0.1:8766"; +/// ADR-044 migration: older `install --all` runs unconditionally stamped a fixed +/// `serve.http.bind: 127.0.0.1:9111`. The deterministic read-API band is +/// `9400–10399`, so this exact literal can only be the old auto-default, never a +/// deterministic value. We strip it on repair so auto-port + ephemeral fallback +/// can engage; any other (operator-chosen) bind is left intact. +const STALE_DEFAULT_BIND: &str = "127.0.0.1:9111"; + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BindingState { Present, @@ -124,6 +131,7 @@ fn loomweave_yaml_ok(project_root: &Path, desired: &DesiredBindings) -> Result Res let serve = ensure_object(root, "serve")?; let http = ensure_object(serve, "http")?; + // ADR-044 migration: strip exactly the old auto-stamped `bind: 127.0.0.1:9111` + // so auto-port + ephemeral fallback can engage. A deliberately operator-chosen + // bind (any other value) is left intact. + if http.get("bind").and_then(Value::as_str) == Some(STALE_DEFAULT_BIND) { + http.remove("bind"); + } http.insert("enabled".to_owned(), json!(true)); http.insert("wardline_taint_write".to_owned(), json!(true)); write_yaml_if_changed(&path, &value) diff --git a/crates/loomweave-cli/tests/install.rs b/crates/loomweave-cli/tests/install.rs index ca4f9a6b..3a73c3e3 100644 --- a/crates/loomweave-cli/tests/install.rs +++ b/crates/loomweave-cli/tests/install.rs @@ -127,6 +127,49 @@ fn install_all_wires_three_way_integration_bindings() { ); } +/// ADR-044 migration: a project whose `loomweave.yaml` still carries the old +/// auto-stamped `serve.http.bind: 127.0.0.1:9111` has that exact literal stripped +/// on re-install, so auto-port + ephemeral fallback engages. A deliberately +/// operator-chosen bind (any other value) is preserved verbatim. +#[test] +fn install_all_strips_stale_default_bind_but_keeps_custom_bind() { + // Case 1: the stale auto-default is stripped. + let stale = tempfile::tempdir().unwrap(); + fs::write( + stale.path().join("loomweave.yaml"), + "version: 1\nserve:\n http:\n enabled: true\n bind: 127.0.0.1:9111\n wardline_taint_write: true\n", + ) + .unwrap(); + loomweave_bin() + .args(["install", "--all", "--path"]) + .arg(stale.path()) + .assert() + .success(); + let stale_yaml = read_yaml(&stale.path().join("loomweave.yaml")); + assert!( + stale_yaml["serve"]["http"].get("bind").is_none(), + "stale 127.0.0.1:9111 bind must be stripped on re-install: {stale_yaml}" + ); + + // Case 2: a deliberately custom bind is preserved. + let custom = tempfile::tempdir().unwrap(); + fs::write( + custom.path().join("loomweave.yaml"), + "version: 1\nserve:\n http:\n enabled: true\n bind: 127.0.0.1:9999\n wardline_taint_write: true\n", + ) + .unwrap(); + loomweave_bin() + .args(["install", "--all", "--path"]) + .arg(custom.path()) + .assert() + .success(); + let custom_yaml = read_yaml(&custom.path().join("loomweave.yaml")); + assert_eq!( + custom_yaml["serve"]["http"]["bind"], "127.0.0.1:9999", + "an operator-chosen bind must be preserved: {custom_yaml}" + ); +} + #[test] fn install_applies_each_migration_exactly_once() { let dir = tempfile::tempdir().unwrap(); From 461614eb0566a2ecbb50bc0d0c411a779fd38213 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:07:41 +1000 Subject: [PATCH 13/27] feat(doctor): resolve_loomweave_url + doctor reports live published read-API port (ADR-044) Co-Authored-By: Claude Opus 4.8 --- crates/loomweave-cli/src/doctor.rs | 27 ++++- crates/loomweave-cli/tests/doctor.rs | 24 +++++ crates/loomweave-federation/src/lib.rs | 1 + .../loomweave-federation/src/loomweave_url.rs | 99 +++++++++++++++++++ 4 files changed, 148 insertions(+), 3 deletions(-) create mode 100644 crates/loomweave-federation/src/loomweave_url.rs diff --git a/crates/loomweave-cli/src/doctor.rs b/crates/loomweave-cli/src/doctor.rs index aa6b0f96..8fbb9dc7 100644 --- a/crates/loomweave-cli/src/doctor.rs +++ b/crates/loomweave-cli/src/doctor.rs @@ -404,16 +404,37 @@ fn check_http_config_json(project_root: &Path) -> DoctorJsonCheck { .and_then(|http| http.get("enabled")) .and_then(Value::as_bool) == Some(true); + if !enabled { + return DoctorJsonCheck::warning( + "http.config", + "HTTP serve config is disabled or incomplete", + ); + } + // ADR-044: prefer the live published port over the (now usually absent) + // static bind. A running serve publishes .loomweave/ephemeral.port. + let resolution = loomweave_federation::loomweave_url::resolve_loomweave_url(None, project_root); + if let Some(url) = resolution.resolved_url { + return DoctorJsonCheck::ok( + "http.config", + format!("HTTP read API published on {url} ({})", resolution.source), + ); + } let bind = config .get("serve") .and_then(|serve| serve.get("http")) .and_then(|http| http.get("bind")) .and_then(Value::as_str) .unwrap_or(""); - if enabled && !bind.trim().is_empty() { - DoctorJsonCheck::ok("http.config", format!("HTTP configured on {bind}")) + if bind.trim().is_empty() { + DoctorJsonCheck::ok( + "http.config", + "HTTP enabled; read-API port auto-selected and published to .loomweave/ephemeral.port while serving", + ) } else { - DoctorJsonCheck::warning("http.config", "HTTP serve config is disabled or incomplete") + DoctorJsonCheck::ok( + "http.config", + format!("HTTP configured on {bind} (auto-published while serving)"), + ) } } diff --git a/crates/loomweave-cli/tests/doctor.rs b/crates/loomweave-cli/tests/doctor.rs index c848e4b5..5a389027 100644 --- a/crates/loomweave-cli/tests/doctor.rs +++ b/crates/loomweave-cli/tests/doctor.rs @@ -391,3 +391,27 @@ fn doctor_flags_untrusted_mcp_command_without_clobbering_it() { "an untrusted command makes the run not ok" ); } + +#[test] +fn doctor_reports_published_ephemeral_port() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + // Simulate a live serve having published its port. + let loomweave_dir = dir.path().join(".loomweave"); + std::fs::create_dir_all(&loomweave_dir).unwrap(); + std::fs::write(loomweave_dir.join("ephemeral.port"), "9876\n").unwrap(); + + let (code, json) = doctor_json(dir.path(), false); + assert_eq!(code, 0, "{json}"); + let http = json["checks"] + .as_array() + .unwrap() + .iter() + .find(|c| c["id"] == "http.config") + .expect("http.config check present"); + assert_eq!(http["status"], "ok"); + assert!( + http["message"].as_str().unwrap_or("").contains("9876"), + "http.config should report the published live port: {http}" + ); +} diff --git a/crates/loomweave-federation/src/lib.rs b/crates/loomweave-federation/src/lib.rs index bd0db468..71da905a 100644 --- a/crates/loomweave-federation/src/lib.rs +++ b/crates/loomweave-federation/src/lib.rs @@ -4,4 +4,5 @@ pub mod config; pub mod filigree; pub mod filigree_url; pub mod loomweave_port; +pub mod loomweave_url; pub mod scan_results; diff --git a/crates/loomweave-federation/src/loomweave_url.rs b/crates/loomweave-federation/src/loomweave_url.rs new file mode 100644 index 00000000..675fd286 --- /dev/null +++ b/crates/loomweave-federation/src/loomweave_url.rs @@ -0,0 +1,99 @@ +//! Resolve the live Loomweave read-API base URL (ADR-044). +//! +//! The reference reader of the `.loomweave/ephemeral.port` file contract and +//! the twin of [`crate::filigree_url`]. Precedence (consumer-side): the +//! published live port wins over a configured URL, which wins over nothing. +//! (ADR-044's higher "explicit flag/env" precedence level is realized by each +//! consumer's own CLI/env handling — e.g. Wardline's `--loomweave-url` — not by +//! this library function.) Fail-soft throughout: a missing/corrupt file folds +//! to the configured URL; absent both, `None` (federation simply degrades). + +use std::path::Path; + +use crate::loomweave_port::read_published_port; + +/// The live published port file `.loomweave/ephemeral.port`. +pub const SOURCE_EPHEMERAL_PORT: &str = ".loomweave/ephemeral.port"; +/// A statically configured URL (e.g. `wardline.yaml: loomweave.url`). +pub const SOURCE_CONFIG: &str = "config"; +/// Neither a published file nor a configured URL — federation is absent. +pub const SOURCE_NONE: &str = "none"; + +/// Where a resolved Loomweave read-API URL came from. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct LoomweaveUrlResolution { + /// The URL a consumer should call, or `None` when nothing resolves. + pub resolved_url: Option, + /// One of the `SOURCE_*` labels. + pub source: &'static str, +} + +/// Resolve the read-API URL, preferring the live published port over the +/// configured URL. `configured_url` is the consumer's static fallback (pass +/// `None` if it has none). +#[must_use] +pub fn resolve_loomweave_url( + configured_url: Option<&str>, + project_root: &Path, +) -> LoomweaveUrlResolution { + if let Some(port) = read_published_port(project_root) { + return LoomweaveUrlResolution { + resolved_url: Some(format!("http://127.0.0.1:{port}")), + source: SOURCE_EPHEMERAL_PORT, + }; + } + match configured_url { + Some(url) if !url.trim().is_empty() => LoomweaveUrlResolution { + resolved_url: Some(url.to_owned()), + source: SOURCE_CONFIG, + }, + _ => LoomweaveUrlResolution { + resolved_url: None, + source: SOURCE_NONE, + }, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::loomweave_port::publish_port; + + #[test] + fn published_port_beats_configured_url() { + let dir = tempfile::tempdir().unwrap(); + publish_port(dir.path(), 9412).unwrap(); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); + assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:9412")); + assert_eq!(res.source, SOURCE_EPHEMERAL_PORT); + } + + #[test] + fn falls_back_to_configured_url_when_no_file() { + let dir = tempfile::tempdir().unwrap(); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); + assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:9111")); + assert_eq!(res.source, SOURCE_CONFIG); + } + + #[test] + fn corrupt_file_folds_to_configured_url() { + let dir = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + std::fs::write( + dir.path().join(".loomweave").join("ephemeral.port"), + "not-a-port", + ) + .unwrap(); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); + assert_eq!(res.source, SOURCE_CONFIG); + } + + #[test] + fn nothing_resolves_to_none() { + let dir = tempfile::tempdir().unwrap(); + let res = resolve_loomweave_url(None, dir.path()); + assert_eq!(res.resolved_url, None); + assert_eq!(res.source, SOURCE_NONE); + } +} From d0b15b3fdce9be2eeddff5ffee3d4facc0164273 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:13:14 +1000 Subject: [PATCH 14/27] test(federation): cover blank-config SOURCE_NONE path in resolve_loomweave_url (ADR-044) Co-Authored-By: Claude Opus 4.8 --- crates/loomweave-federation/src/loomweave_url.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/loomweave-federation/src/loomweave_url.rs b/crates/loomweave-federation/src/loomweave_url.rs index 675fd286..38bb6f95 100644 --- a/crates/loomweave-federation/src/loomweave_url.rs +++ b/crates/loomweave-federation/src/loomweave_url.rs @@ -96,4 +96,12 @@ mod tests { assert_eq!(res.resolved_url, None); assert_eq!(res.source, SOURCE_NONE); } + + #[test] + fn blank_config_with_no_file_resolves_to_none() { + let dir = tempfile::tempdir().unwrap(); + let res = resolve_loomweave_url(Some(" "), dir.path()); + assert_eq!(res.resolved_url, None); + assert_eq!(res.source, SOURCE_NONE); + } } From 046c34177da21c95f78fa9ef5550ea7c3730c8f2 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:26:10 +1000 Subject: [PATCH 15/27] docs(adr): accept ADR-044; auto-port docs, glossary verdict, revert 9112 stopgap Co-Authored-By: Claude Opus 4.8 --- .gitignore | 1 + .loomweave/.gitignore | 4 +++ docs/federation/contracts.md | 5 +++- ...044-read-api-ephemeral-port-publication.md | 29 ++++++++++++++++++- docs/loomweave/adr/README.md | 2 +- docs/operator/loomweave-http-read-api.md | 9 ++++-- docs/operator/secret-scanning.md | 3 +- docs/suite/glossary.md | 13 +++++++++ loomweave.yaml | 1 - wardline.yaml | 5 +++- 10 files changed, 64 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 5713582a..d6e91368 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ tests/e2e/external-operator-smoke-results-*.md .loomweave/loomweave.db .loomweave/instance_id .loomweave/loomweave.lock +.loomweave/ephemeral.port # Documentation site build output (mkdocs `site_dir`, web/mkdocs.yml). /site-build/ diff --git a/.loomweave/.gitignore b/.loomweave/.gitignore index d1d0e32b..b0f4e45f 100644 --- a/.loomweave/.gitignore +++ b/.loomweave/.gitignore @@ -9,6 +9,10 @@ loomweave.db instance_id loomweave.lock +# Read-API live port discovery file (ADR-044): present only while serve runs, +# rewritten per bind, loopback-only — a runtime artifact, never committed. +ephemeral.port + # SQLite write-ahead files never belong in the repo. *-wal *-shm diff --git a/docs/federation/contracts.md b/docs/federation/contracts.md index 90d4a039..c55f3086 100644 --- a/docs/federation/contracts.md +++ b/docs/federation/contracts.md @@ -32,7 +32,10 @@ Filigree is absent (weft.md §5). serve: http: enabled: true - bind: 127.0.0.1:9111 + # The read-API port is auto-selected per project — a deterministic port in + # Loomweave's band (9400–10399, disjoint from Filigree's 8400–9399) with an + # ephemeral fallback — and published to .loomweave/ephemeral.port while + # serve runs. Set `bind:` explicitly only to pin a fixed port (ADR-044). # Preferred 1.0 identity mode. Optional on loopback, required for # authenticated Weft component requests. identity_token_env: WEFT_IDENTITY_SECRET diff --git a/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md index 28a02633..9af2ded1 100644 --- a/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md +++ b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md @@ -1,10 +1,17 @@ # ADR-044: Read-API Ephemeral Port Publication -**Status**: Proposed +**Status**: Accepted **Date**: 2026-06-06 **Relates to**: [ADR-034](./ADR-034-federation-http-read-api-hardening.md) **Tracking**: clarion-7f574bc34f +> **Accepted** on branch `feat/serve-no-index-chirp` (deterministic band +> `9400–10399`). Acceptance evidence: the cross-product-visible +> `.loomweave/ephemeral.port` term carries a **managed-clash** verdict in +> [`docs/suite/glossary.md`](../../suite/glossary.md), with the explicit +> `.filigree/ephemeral.port` ↔ `.loomweave/ephemeral.port` mapping table below +> (per the README acceptance criteria, model ADR-017). + ## Context `loomweave serve` exposes a federation HTTP read API. Its bind address is a @@ -98,6 +105,26 @@ conform to exactly this: - **Git-ignored** runtime artifact, consistent with ADR-005's treatment of run-time-only state. +## Managed-clash verdict + +`ephemeral.port` is a cross-product-visible term: Filigree owns the original +`.filigree/ephemeral.port` endpoint-discovery convention, and this ADR adopts the +same filename for Loomweave's own read API. Per the ADR-acceptance criteria +(`docs/loomweave/adr/README.md`), this is a **managed clash** — the same term is +used by a sibling, governed here by an explicit mapping table (model: ADR-017). +The verdict is recorded in [`docs/suite/glossary.md`](../../suite/glossary.md). + +| Product | Path | Format | Publication | Band (internal, not contract) | +|---|---|---|---|---| +| Filigree | `.filigree/ephemeral.port` | single plain-ASCII TCP port, optional trailing `\n`, atomic temp+rename | loopback-only, present only while running | `8400–9399` | +| Loomweave | `.loomweave/ephemeral.port` | identical | identical | `9400–10399` (disjoint) | + +The clash is *managed*, not *renamed*: the shared filename is deliberate (one +convention siblings recognize), the paths are distinct per product, the wire +format is identical, and the deterministic bands are disjoint so the two products +never contend for the same port. The band is never part of the file contract — +consumers read the published file, never recompute a peer's port. + ## Resolution semantics (normative) Every consumer resolves **at consume time** (each scan / read), never caches the diff --git a/docs/loomweave/adr/README.md b/docs/loomweave/adr/README.md index 8421b4fb..6d098a1c 100644 --- a/docs/loomweave/adr/README.md +++ b/docs/loomweave/adr/README.md @@ -44,7 +44,7 @@ This folder is the canonical home for authored Loomweave architecture decision r | [ADR-041](./ADR-041-resume-is-idempotent-reemit.md) | Analyze resume is idempotent re-emit, not checkpoint recovery; amends ADR-005/ADR-011 resume language | Accepted | | [ADR-042](./ADR-042-hmac-freshness-and-replay-window.md) | HMAC freshness and replay window — timestamp + nonce headers, crate-backed HMAC, process-local replay cache | Accepted | | [ADR-043](./ADR-043-edge-reanalysis-replacement.md) | Edge reanalysis replacement — per-source-file anchored-edge replacement and edge metadata upsert; amends ADR-026 | Accepted | -| [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) | Read-API ephemeral port publication — `.loomweave/ephemeral.port` as a normative cross-product file contract (loopback-only, port-only, atomic) + consume-time resolution precedence (explicit > file > config > none), per-project deterministic port, installer stops pinning 9111; relates to ADR-034 | Proposed | +| [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) | Read-API ephemeral port publication — `.loomweave/ephemeral.port` as a normative cross-product file contract (loopback-only, port-only, atomic) + consume-time resolution precedence (explicit > file > config > none), per-project deterministic port, installer stops pinning 9111; relates to ADR-034 | Accepted | ## Backlog still tracked in the detailed design diff --git a/docs/operator/loomweave-http-read-api.md b/docs/operator/loomweave-http-read-api.md index 39e7d7fe..2bfe07da 100644 --- a/docs/operator/loomweave-http-read-api.md +++ b/docs/operator/loomweave-http-read-api.md @@ -17,10 +17,14 @@ environment variable that contains the shared Weft component secret: serve: http: enabled: true - bind: 127.0.0.1:9111 identity_token_env: WEFT_IDENTITY_SECRET ``` +The read-API port is auto-selected per project — a deterministic port in +Loomweave's band (`9400–10399`, disjoint from Filigree's `8400–9399`) with an +ephemeral fallback — and published to `.loomweave/ephemeral.port` while `serve` +runs. Set `serve.http.bind` explicitly only to pin a fixed port (ADR-044). + When `identity_token_env` is configured, Loomweave refuses to start unless the env var is present and non-empty. Protected `/api/v1/files` routes then require `X-Weft-Component: loomweave:`, `X-Weft-Timestamp: `, and @@ -65,7 +69,8 @@ catalog, or unavailable because of storage errors. When both `serve.http.token_env` (legacy bearer) and `serve.http.identity_token_env` (HMAC, preferred per [ADR-034](../loomweave/adr/ADR-034-federation-http-read-api-hardening.md)) are unset and the -bind is loopback (default: `127.0.0.1:9111`), the HTTP read API serves +bind is loopback (the auto-selected per-project port, or an explicit loopback +`serve.http.bind`; see ADR-044), the HTTP read API serves unauthenticated. This is the intended single-user developer-workstation trust model — the loopback socket is reachable only from processes on the same host, and Loomweave's catalogue is no more sensitive than the project diff --git a/docs/operator/secret-scanning.md b/docs/operator/secret-scanning.md index d0f2e81d..8d3d5ff3 100644 --- a/docs/operator/secret-scanning.md +++ b/docs/operator/secret-scanning.md @@ -93,7 +93,8 @@ without authentication: **loopback bind with no token configured.** When both `serve.http.token_env` (legacy bearer) and `serve.http.identity_token_env` (HMAC, preferred per [ADR-034](../loomweave/adr/ADR-034-federation-http-read-api-hardening.md)) -are unset and the bind is loopback (default: `127.0.0.1:9111`), the HTTP read +are unset and the bind is loopback (the auto-selected per-project read-API port, +or an explicit loopback `serve.http.bind`; see ADR-044), the HTTP read API serves unauthenticated. On a single-user developer workstation this is the intended trust model: the loopback socket is reachable only from processes on that host, and Loomweave's catalogue is no more sensitive than diff --git a/docs/suite/glossary.md b/docs/suite/glossary.md index 2405b8e9..002126ee 100644 --- a/docs/suite/glossary.md +++ b/docs/suite/glossary.md @@ -22,3 +22,16 @@ in the hub doctrine: `~/loom/doctrine.md` §8. Loomweave's ADRs (e.g. ADR-004, ADR-017, ADR-022, ADR-024, ADR-036, ADR-038) remain Loomweave-owned and authoritative for Loomweave's own field shapes; the hub glossary points to them, not the reverse. + +--- + +## Managed clashes (mirror to the hub) + +The body of cross-product term tables now lives at `~/loom/glossary.md`. New +managed-clash verdicts are recorded here as well so the in-repo ADR-acceptance +gate (`docs/loomweave/adr/README.md` §"ADR acceptance criteria") resolves without +the hub; the hub copy is canonical and should mirror this entry. + +| Term | Verdict | Authority | Mapping / notes | +|---|---|---|---| +| `ephemeral.port` (read-API live-port discovery file) | **managed clash** | ADR-044 (Loomweave); Filigree owns the original `.filigree/ephemeral.port` convention | Shared filename convention, **distinct per-product paths**: `.filigree/ephemeral.port` ↔ `.loomweave/ephemeral.port`. Identical format (single plain-ASCII TCP port, optional trailing `\n`, atomic temp+rename), loopback-only publication, present only while the producer serves. Bands are disjoint and never part of the contract — consumers read the file, never recompute. Mapping table in ADR-044 §"Managed-clash verdict". | diff --git a/loomweave.yaml b/loomweave.yaml index 8ffa933d..ee5bf735 100644 --- a/loomweave.yaml +++ b/loomweave.yaml @@ -36,7 +36,6 @@ llm_policy: session_token_ceiling: 1000000 serve: http: - bind: 127.0.0.1:9112 enabled: true wardline_taint_write: true version: 1 diff --git a/wardline.yaml b/wardline.yaml index 5b049343..520ef8cf 100644 --- a/wardline.yaml +++ b/wardline.yaml @@ -1,4 +1,7 @@ filigree: url: http://127.0.0.1:8542/api/weft/scan-results loomweave: - url: http://127.0.0.1:9112 + # ADR-044: pinned to this project's deterministic read-API port. The published + # .loomweave/ephemeral.port overrides this once Wardline resolves consume-time + # (clarion-7f574bc34f follow-up). Until then this keeps local wardline->loomweave working. + url: http://127.0.0.1:10196 From 85d9cd1da2e21046f16922286c8d576fad23fb27 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:29:41 +1000 Subject: [PATCH 16/27] docs(plan): ADR-044 ephemeral-port implementation plan Co-Authored-By: Claude Opus 4.8 --- .../2026-06-06-loomweave-ephemeral-port.md | 1307 +++++++++++++++++ 1 file changed, 1307 insertions(+) create mode 100644 docs/superpowers/plans/2026-06-06-loomweave-ephemeral-port.md diff --git a/docs/superpowers/plans/2026-06-06-loomweave-ephemeral-port.md b/docs/superpowers/plans/2026-06-06-loomweave-ephemeral-port.md new file mode 100644 index 00000000..52953c5f --- /dev/null +++ b/docs/superpowers/plans/2026-06-06-loomweave-ephemeral-port.md @@ -0,0 +1,1307 @@ +# Loomweave Read-API Ephemeral Port Publication — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make `loomweave serve` bind a per-project deterministic read-API port (ephemeral fallback), publish the live port to `.loomweave/ephemeral.port` as a normative cross-product file contract, resolve it consume-time, and stop the installer pinning `9111` — so two projects can `serve` concurrently without the `9111` collision (ADR-044, clarion-7f574bc34f). + +**Architecture:** Mirror Filigree's `.filigree/ephemeral.port` convention symmetrically for Loomweave's own read API. A new `loomweave-federation::loomweave_port` module owns the deterministic-port computation (blake3, band `9400–10399`, disjoint from Filigree's `8400–9399`), the atomic publish/remove, and the validated read. The producer (`http_read.rs`) binds the deterministic port — falling back to OS-assigned `:0` only when the port was *auto-selected*, not when an operator set it explicitly — then publishes the actually-bound port loopback-only via an RAII guard that unlinks on drop. `HttpReadConfig.bind` becomes `Option` so "operator chose a port" is distinguishable from "auto." The installer and the local dogfood bindings stop hardcoding `9111`. + +**Tech Stack:** Rust (workspace edition 2024, rust 1.88), `blake3` (already a workspace dep, Loomweave's SEI hash), `tokio` TCP bind, `axum` serve, `serde`/`serde_norway` config, `cargo nextest`. + +**Branch:** Work on the current branch `feat/serve-no-index-chirp` (ADR-044 already lives here, unpushed). The user may split at push time. + +**The band is internal, never part of the contract.** Consumers read the published file; nobody recomputes a peer's port. The `9400` band number appears only in code, never in the ADR's normative section. + +--- + +## File Structure + +| File | Responsibility | Task | +|---|---|---| +| `crates/loomweave-federation/Cargo.toml` | add `blake3` dep | 1 | +| `crates/loomweave-federation/src/loomweave_port.rs` (CREATE) | deterministic port, atomic publish/remove, validated read | 1 | +| `crates/loomweave-federation/src/lib.rs` | declare `pub mod loomweave_port;` (+ `loomweave_url` in Task 6) | 1, 6 | +| `crates/loomweave-federation/src/config.rs` | `bind: Option`, method + default updates | 2 | +| `crates/loomweave-cli/src/http_read.rs` | candidate resolution; auto-fallback; publish RAII | 2, 3 | +| `crates/loomweave-cli/src/install.rs` | YAML stub drops `bind: 9111` | 4 | +| `crates/loomweave-cli/tests/install.rs` | install-stub + bindings assertions | 4, 5 | +| `crates/loomweave-cli/src/integration_bindings.rs` | deterministic `loomweave.url`; drop fixed bind | 5 | +| `crates/loomweave-cli/tests/doctor.rs` | bindings-repair assertion | 5, 6 | +| `crates/loomweave-federation/src/loomweave_url.rs` (CREATE) | `resolve_loomweave_url` (file>config>none) | 6 | +| `crates/loomweave-cli/src/doctor.rs` | `check_http_config_json` reports published port | 6 | +| `docs/operator/loomweave-http-read-api.md`, `docs/operator/secret-scanning.md`, `docs/federation/contracts.md` | auto-port wording | 7 | +| `loomweave.yaml`, `wardline.yaml` (repo root) | revert `9112` stopgap | 7 | +| `docs/loomweave/adr/ADR-044-*.md`, `docs/loomweave/adr/README.md`, `docs/suite/glossary.md` | ADR Proposed→Accepted + glossary verdict | 7 | + +--- + +## Task 1: Shared ephemeral-port module (`loomweave-federation`) + +**Files:** +- Modify: `crates/loomweave-federation/Cargo.toml` +- Create: `crates/loomweave-federation/src/loomweave_port.rs` +- Modify: `crates/loomweave-federation/src/lib.rs` + +This task ships pure, fully-unit-tested functions with no dependents yet, so the tree stays green standalone. + +- [ ] **Step 1: Add the `blake3` dependency** + +In `crates/loomweave-federation/Cargo.toml`, under `[dependencies]` (alphabetical-ish, after `loomweave-core`), add: + +```toml +blake3.workspace = true +``` + +The workspace already pins `blake3 = "1.8.5"` (root `Cargo.toml:39`); `.workspace = true` inherits it. + +- [ ] **Step 2: Write the failing tests for the new module** + +Create `crates/loomweave-federation/src/loomweave_port.rs` with ONLY the test module first (the `use super::*;` will fail to resolve the items until Step 4): + +```rust +//! Loomweave read-API ephemeral-port contract (ADR-044). +//! +//! The twin of Filigree's `.filigree/ephemeral.port` convention, applied to +//! Loomweave's own federation HTTP read API. `serve` binds a per-project +//! deterministic port (ephemeral `:0` fallback) and publishes the *actually +//! bound* port to `/.loomweave/ephemeral.port`. Cross-product +//! consumers (notably Wardline, which is Python) read this file; nobody +//! recomputes a peer's port. The deterministic band here is an implementation +//! detail, never part of the file contract. +//! +//! File contract (ADR-044, normative): a single plain-ASCII integer TCP port, +//! optional trailing `\n`, written atomically (temp + rename), present only +//! while `serve` holds a loopback bind. Host (`127.0.0.1`) and scheme (`http`) +//! are implied, sound only because publication is loopback-only. + +use std::path::{Path, PathBuf}; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn deterministic_port_is_stable_and_in_band() { + let dir = tempfile::tempdir().unwrap(); + let a = deterministic_port(dir.path()); + let b = deterministic_port(dir.path()); + assert_eq!(a, b, "same path must yield the same port"); + assert!( + (PORT_BAND_BASE..PORT_BAND_BASE + PORT_BAND_SPAN).contains(&a), + "port {a} must land in the loomweave band [{PORT_BAND_BASE}, {})", + PORT_BAND_BASE + PORT_BAND_SPAN + ); + // Disjoint from Filigree's 8400-9399 band. + assert!(a >= 9400, "port {a} must not overlap Filigree's 8400-9399 band"); + } + + #[test] + fn deterministic_port_differs_by_path() { + let a = tempfile::tempdir().unwrap(); + let b = tempfile::tempdir().unwrap(); + // Distinct tempdirs almost always hash to distinct ports; assert the + // function is path-sensitive by checking the inputs differ and the + // computation is a pure function of the (canonical) path. + assert_ne!(a.path(), b.path()); + let pa = deterministic_port(a.path()); + let pb = deterministic_port(b.path()); + // Not guaranteed distinct (1/1000 collision), but the band membership + // and determinism are what matter; assert both are in-band. + assert!(pa >= 9400 && pb >= 9400); + } + + #[test] + fn publish_then_read_round_trips() { + let dir = tempfile::tempdir().unwrap(); + publish_port(dir.path(), 9412).expect("publish"); + assert_eq!(read_published_port(dir.path()), Some(9412)); + // Published content is the bare port plus a single trailing newline. + let raw = std::fs::read_to_string(published_port_path(dir.path())).unwrap(); + assert_eq!(raw, "9412\n"); + } + + #[test] + fn publish_creates_loomweave_dir_if_absent() { + let dir = tempfile::tempdir().unwrap(); + // No .loomweave/ yet. + assert!(!dir.path().join(".loomweave").exists()); + publish_port(dir.path(), 10000).expect("publish creates .loomweave/"); + assert_eq!(read_published_port(dir.path()), Some(10000)); + } + + #[test] + fn read_tolerates_trailing_whitespace_and_newline() { + let dir = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + std::fs::write(published_port_path(dir.path()), " 9500 \n").unwrap(); + assert_eq!(read_published_port(dir.path()), Some(9500)); + } + + #[test] + fn read_rejects_malformed_zero_and_out_of_range() { + let dir = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + for bad in ["", "not-a-port", "0", "65536", "70000", "-1", "12.5"] { + std::fs::write(published_port_path(dir.path()), bad).unwrap(); + assert_eq!( + read_published_port(dir.path()), + None, + "malformed/out-of-range content {bad:?} must fold to None (fail-soft)" + ); + } + } + + #[test] + fn read_absent_file_is_none() { + let dir = tempfile::tempdir().unwrap(); + assert_eq!(read_published_port(dir.path()), None); + } + + #[test] + fn remove_is_idempotent_and_clears_the_file() { + let dir = tempfile::tempdir().unwrap(); + publish_port(dir.path(), 9999).unwrap(); + assert!(published_port_path(dir.path()).exists()); + remove_published_port(dir.path()); + assert!(!published_port_path(dir.path()).exists()); + // Second remove on an absent file is a no-op, not an error. + remove_published_port(dir.path()); + } +} +``` + +- [ ] **Step 3: Run the tests to verify they fail** + +Run: `cargo nextest run -p loomweave-federation loomweave_port` +Expected: FAIL — `cannot find function deterministic_port`, etc. + +- [ ] **Step 4: Implement the module functions** + +Insert the implementation *above* the `#[cfg(test)] mod tests` block in `loomweave_port.rs`: + +```rust +/// Base of Loomweave's deterministic read-API port band. Chosen to sit +/// **above** Filigree's `8400–9399` band so the two products never contend for +/// the same number. Internal only — never part of the cross-product file +/// contract (consumers read the published file, never recompute). +pub const PORT_BAND_BASE: u16 = 9400; +/// Width of the band: ports land in `[9400, 10400)` i.e. `9400..=10399`. +pub const PORT_BAND_SPAN: u16 = 1000; + +/// Canonical path of the published port file for a project root. +#[must_use] +pub fn published_port_path(project_root: &Path) -> PathBuf { + project_root.join(".loomweave").join("ephemeral.port") +} + +/// Deterministic-but-unpredictable read-API port for a project, derived from +/// the canonical project path. Stable across runs (so a consumer's static +/// config can match it) yet path-specific (so two projects differ). Mirrors +/// Filigree's `8400 + hash % 1000`, in a disjoint band, using Loomweave's own +/// hash (blake3, as for SEI). The bound port is published; this computation is +/// the producer's *starting guess*, not a value any consumer recomputes. +#[must_use] +pub fn deterministic_port(project_root: &Path) -> u16 { + // Best-effort canonicalize so every caller (serve, install, doctor) agrees + // regardless of whether it pre-canonicalized; fall back to the path as-given. + let canonical = project_root + .canonicalize() + .unwrap_or_else(|_| project_root.to_path_buf()); + let bytes = canonical.to_string_lossy(); + let hash = blake3::hash(bytes.as_bytes()); + let head = u64::from_le_bytes( + hash.as_bytes()[..8] + .try_into() + .expect("blake3 digest is 32 bytes, so [..8] is 8 bytes"), + ); + let offset = u16::try_from(head % u64::from(PORT_BAND_SPAN)) + .expect("remainder of % 1000 is < 1000, which fits u16"); + PORT_BAND_BASE + offset +} + +/// Read and validate the published port. Any missing / non-integer / +/// out-of-range / zero content folds to `None` (fail-soft, ADR-044). A `u16` +/// parse already bounds `1..=65535` except `0`, which we reject explicitly. +#[must_use] +pub fn read_published_port(project_root: &Path) -> Option { + let raw = std::fs::read_to_string(published_port_path(project_root)).ok()?; + raw.trim().parse::().ok().filter(|port| *port != 0) +} + +/// Atomically publish `port` to `/.loomweave/ephemeral.port`. +/// Writes a temp file in the same directory and `rename(2)`s it into place, so +/// a concurrent reader never observes a torn value. Creates `.loomweave/` if +/// absent. The caller is responsible for the loopback-only invariant (only call +/// this when the bound address is loopback). +/// +/// # Errors +/// Returns the underlying I/O error if the directory cannot be created or the +/// temp file cannot be written/renamed. +pub fn publish_port(project_root: &Path, port: u16) -> std::io::Result<()> { + let dir = project_root.join(".loomweave"); + std::fs::create_dir_all(&dir)?; + // One `serve` per process publishes, so the PID makes the temp name unique + // within this directory without needing a random suffix. + let tmp = dir.join(format!("ephemeral.port.{}.tmp", std::process::id())); + std::fs::write(&tmp, format!("{port}\n"))?; + std::fs::rename(&tmp, dir.join("ephemeral.port"))?; + Ok(()) +} + +/// Best-effort removal of the published port file. A missing file is not an +/// error (idempotent). Called on clean shutdown; SIGKILL leaves a stale file, +/// which `read_published_port` validation + the ADR-034 instance-ID guard +/// handle (a stale file degrades, never corrupts). +pub fn remove_published_port(project_root: &Path) { + let _ = std::fs::remove_file(published_port_path(project_root)); +} +``` + +- [ ] **Step 5: Declare the module** + +In `crates/loomweave-federation/src/lib.rs`, add `pub mod loomweave_port;` after `pub mod filigree_url;`: + +```rust +//! Shared federation/config helpers used by CLI and MCP surfaces. + +pub mod config; +pub mod filigree; +pub mod filigree_url; +pub mod loomweave_port; +pub mod scan_results; +``` + +- [ ] **Step 6: Run the tests to verify they pass** + +Run: `cargo nextest run -p loomweave-federation loomweave_port` +Expected: PASS (8 tests). + +- [ ] **Step 7: Lint + commit** + +```bash +cargo fmt --all +cargo clippy -p loomweave-federation --all-targets --all-features -- -D warnings +git add crates/loomweave-federation/Cargo.toml crates/loomweave-federation/src/loomweave_port.rs crates/loomweave-federation/src/lib.rs +git commit -m "feat(federation): loomweave_port — deterministic read-API port + atomic publish (ADR-044)" +``` + +--- + +## Task 2: `HttpReadConfig.bind` → `Option` (green-tree migration) + +**Files:** +- Modify: `crates/loomweave-federation/src/config.rs` +- Modify: `crates/loomweave-cli/src/http_read.rs` + +`None` means *auto* (deterministic + fallback + publish, wired in Tasks 2–3). `Some(addr)` means an explicit operator override. This is one atomic task: the type change plus every construction site, ending on a green tree. Producer *behavior* (fallback/publish) is Task 3 — here, `spawn` only resolves `None` to the deterministic candidate so it compiles and runs. + +- [ ] **Step 1: Write the failing config tests** + +In `crates/loomweave-federation/src/config.rs`, inside `mod tests`, add: + +```rust + #[test] + fn http_bind_defaults_to_none_auto_select() { + // ADR-044: the installer no longer pins a port; an unset bind means + // "auto-select a per-project deterministic port and publish it". + assert_eq!(HttpReadConfig::default().bind, None); + } + + #[test] + fn http_bind_none_is_treated_as_loopback() { + // Auto-select always binds 127.0.0.1, so an absent bind is loopback and + // must satisfy the loopback-trust gate without allow_non_loopback. + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + assert!(cfg.is_loopback_bind()); + assert!(cfg.validate_loopback_trust().is_ok()); + } + + #[test] + fn http_explicit_bind_still_parses() { + let cfg = McpConfig::from_yaml_str( + "serve:\n http:\n enabled: true\n bind: \"127.0.0.1:9412\"\n", + ) + .expect("parse explicit bind"); + assert_eq!( + cfg.serve.http.bind, + Some(SocketAddr::from(([127, 0, 0, 1], 9412))) + ); + } +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `cargo nextest run -p loomweave-federation http_bind` +Expected: FAIL to compile — `bind` is `SocketAddr`, not `Option`. + +- [ ] **Step 3: Change the field type and the default** + +In `config.rs`, change the `HttpReadConfig.bind` field: + +```rust + #[serde(default, deserialize_with = "deserialize_optional_socket_addr")] + pub bind: Option, +``` + +Change the `Default` impl: + +```rust +impl Default for HttpReadConfig { + fn default() -> Self { + Self { + enabled: false, + bind: None, + allow_non_loopback: false, + token_env: "WEFT_TOKEN".to_owned(), + identity_token_env: None, + wardline_taint_write: false, + } + } +} +``` + +- [ ] **Step 4: Update the loopback methods to treat `None` as loopback** + +Replace `validate_loopback_trust` and `is_loopback_bind`: + +```rust + pub fn validate_loopback_trust(&self) -> Result<(), ConfigError> { + if self.enabled && !self.allow_non_loopback && !self.is_loopback_bind() { + return Err(ConfigError::NonLoopbackHttpBind { + code: "LMWV-CONFIG-HTTP-NON-LOOPBACK", + // Safe: is_loopback_bind() is false only when bind is Some(non-loopback). + bind: self.bind.expect("non-loopback bind implies an explicit address"), + }); + } + Ok(()) + } +``` + +```rust + /// `None` (auto-select) always binds `127.0.0.1`, so it is loopback. + #[must_use] + pub fn is_loopback_bind(&self) -> bool { + self.bind.is_none_or(|addr| addr.ip().is_loopback()) + } +``` + +`validate_auth_trust` already calls `self.is_loopback_bind()` and only reads `self.bind` inside the `NonLoopbackHttpNoAuth` error arm, which is reached only when `is_loopback_bind()` is false (i.e. `Some(non-loopback)`). Update that one read: + +```rust + Err(ConfigError::NonLoopbackHttpNoAuth { + code: "LMWV-CONFIG-HTTP-NO-AUTH", + bind: self.bind.expect("non-loopback bind implies an explicit address"), + token_env: self.token_env.clone(), + }) +``` + +- [ ] **Step 5: Add the optional-socket deserializer** + +Below the existing `deserialize_socket_addr` in `config.rs`, add: + +```rust +fn deserialize_optional_socket_addr<'de, D>(deserializer: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + let raw = Option::::deserialize(deserializer)?; + match raw { + None => Ok(None), + Some(raw) => raw.parse().map(Some).map_err(|err| { + serde::de::Error::custom(format!("invalid serve.http.bind {raw:?}: {err}")) + }), + } +} +``` + +The old `deserialize_socket_addr` is now unused — delete it (clippy `dead_code` would otherwise fire). The `invalid_http_bind_fails_config_load` test still passes because the new deserializer emits the same `invalid serve.http.bind` message. + +- [ ] **Step 6: Fix the existing config tests that build/parse `bind`** + +In `config.rs` `mod tests`, update the two tests that assert a parsed bind value: + +`http_bind_is_parsed_when_config_loads`: +```rust + assert_eq!( + cfg.serve.http.bind, + Some(SocketAddr::from(([127, 0, 0, 1], 0))) + ); +``` + +The non-loopback / IPv6 / allow-non-loopback parse tests (`enabled_non_loopback_http_bind_requires_allow_non_loopback`, `enabled_lan_http_bind_requires_allow_non_loopback`, `enabled_ipv6_loopback_http_bind_is_allowed_by_default`, `enabled_non_loopback_http_bind_allows_explicit_opt_in`, `invalid_http_bind_fails_config_load`) all set `bind:` in YAML strings — those parse into `Some(..)` and need no change. + +- [ ] **Step 7: Fix `http_read.rs` construction + spawn sites** + +In `crates/loomweave-cli/src/http_read.rs`: + +(a) `spawn_with_env` currently does `let bind = config.bind;`. Replace with deterministic resolution (behavior-minimal — no fallback/publish yet; that's Task 3). The `project_root` is already a parameter: + +```rust + // ADR-044: an unset bind means auto-select a per-project deterministic + // read-API port. An explicit bind is honored verbatim. (Task 3 adds the + // ephemeral fallback + published-file lifecycle.) + let auto_port = config.bind.is_none(); + let bind = config.bind.unwrap_or_else(|| { + std::net::SocketAddr::from(( + [127, 0, 0, 1], + loomweave_federation::loomweave_port::deterministic_port(&project_root), + )) + }); +``` + +Thread `auto_port` and `project_root` (clone before it is moved) into `run_http_read_server`. `project_root` is currently moved into the thread closure; capture a clone for publication in Task 3. For Task 2, just add the `auto_port: bool` parameter to `run_http_read_server`'s signature and ignore it with a leading underscore at the call site is not allowed for a named param — instead accept it and bind it to `_auto_port` inside the fn body for now: + +In `run_http_read_server` signature add (after `bind`): +```rust + auto_port: bool, +``` +And at the top of `run_http_read_server` body, until Task 3 consumes it: +```rust + let _auto_port = auto_port; +``` +Pass `auto_port` at the call site inside the spawned thread closure in `spawn_with_env`. + +(b) The three `#[cfg(test)]` tests in `http_read.rs` that build `HttpReadConfig { ..., bind, ... }` (`spawn_emits_loopback_no_token_trust_warning`, `spawn_with_taint_writer_shuts_down_cleanly`, `check_running_surfaces_supervisor_signal_after_runtime_panic`) each set `bind` to a probed `SocketAddr`. Wrap each in `Some(...)`: + +```rust + let config = HttpReadConfig { + enabled: true, + bind: Some(bind), + allow_non_loopback: false, + // ...rest unchanged + }; +``` + +(There are three such literals; update all three. `spawn_with_taint_writer_shuts_down_cleanly` and `check_running_surfaces_supervisor_signal_after_runtime_panic` use `bind, ..HttpReadConfig::default()` shorthand — change `bind,` to `bind: Some(bind),`.) + +- [ ] **Step 8: Run the affected suites** + +Run: +```bash +cargo nextest run -p loomweave-federation +cargo nextest run -p loomweave-cli --lib http_read +``` +Expected: PASS. Then a workspace build to catch any other construction site: +```bash +cargo build --workspace --all-features --tests +``` +Expected: compiles clean. If any other `HttpReadConfig { bind: }` or `.bind` read surfaces, wrap/adapt it the same way. + +- [ ] **Step 9: Lint + commit** + +```bash +cargo fmt --all +cargo clippy --workspace --all-targets --all-features -- -D warnings +git add crates/loomweave-federation/src/config.rs crates/loomweave-cli/src/http_read.rs +git commit -m "feat(config): serve.http.bind is Option; None auto-selects per-project port (ADR-044)" +``` + +--- + +## Task 3: Producer — ephemeral fallback + publish RAII + +**Files:** +- Modify: `crates/loomweave-cli/src/http_read.rs` + +Add: auto-port falls back to `:0` on `AddrInUse`; the actually-bound port is published loopback-only via an RAII guard that unlinks on drop (covers graceful shutdown, error-return, and panic-unwind in one place). + +- [ ] **Step 1: Write the failing producer tests** + +In `http_read.rs` `mod tests`, add. These reuse the `http_runtime_test_guard()` and `ReaderPool` patterns already in the file: + +```rust + /// ADR-044: with `bind: None`, two serves on distinct project paths each + /// bind their own deterministic port and publish their own + /// `.loomweave/ephemeral.port`. Neither fails to bind. + #[test] + fn auto_port_publishes_distinct_ports_per_project() { + use loomweave_federation::config::HttpReadConfig; + use loomweave_federation::loomweave_port::read_published_port; + use loomweave_storage::ReaderPool; + + let _guard = http_runtime_test_guard(); + + let make = |id: &str| { + let dir = tempfile::tempdir().expect("tempdir"); + let db = dir.path().join("loomweave.db"); + let readers = ReaderPool::open(&db, 4).expect("reader pool"); + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + let iid = crate::instance::parse_instance_id_for_test(id).expect("iid"); + let server = spawn(dir.path().to_path_buf(), db, readers, iid, &cfg) + .expect("spawn") + .expect("enabled => Some"); + (dir, server) + }; + + let (dir_a, server_a) = make("00000000-0000-4000-8000-0000000000a1"); + let (dir_b, server_b) = make("00000000-0000-4000-8000-0000000000a2"); + + let port_a = read_published_port(dir_a.path()).expect("a published a port"); + let port_b = read_published_port(dir_b.path()).expect("b published a port"); + assert!(port_a >= 9400 && port_b >= 9400, "ports in the loomweave band"); + // Two live servers => two live ports => they cannot be equal. + assert_ne!(port_a, port_b, "concurrent serves must hold distinct ports"); + + server_a.shutdown().expect("shutdown a"); + server_b.shutdown().expect("shutdown b"); + } + + /// The published file is removed on clean shutdown. + #[test] + fn auto_port_file_removed_on_clean_shutdown() { + use loomweave_federation::config::HttpReadConfig; + use loomweave_federation::loomweave_port::{published_port_path, read_published_port}; + use loomweave_storage::ReaderPool; + + let _guard = http_runtime_test_guard(); + + let dir = tempfile::tempdir().expect("tempdir"); + let db = dir.path().join("loomweave.db"); + let readers = ReaderPool::open(&db, 4).expect("reader pool"); + let cfg = HttpReadConfig { + enabled: true, + bind: None, + ..HttpReadConfig::default() + }; + let iid = crate::instance::parse_instance_id_for_test("00000000-0000-4000-8000-0000000000a3") + .expect("iid"); + let server = spawn(dir.path().to_path_buf(), db, readers, iid, &cfg) + .expect("spawn") + .expect("enabled => Some"); + + assert!(read_published_port(dir.path()).is_some(), "published while serving"); + server.shutdown().expect("shutdown"); + assert!( + !published_port_path(dir.path()).exists(), + "published port file must be gone after clean shutdown" + ); + } + + /// An explicit (operator-set) bind that is already in use is a HARD error — + /// the operator asked for that specific port. Only auto-select falls back. + #[test] + fn explicit_bind_in_use_is_a_hard_error() { + use loomweave_federation::config::HttpReadConfig; + use loomweave_storage::ReaderPool; + use std::net::{SocketAddr, TcpListener}; + + let _guard = http_runtime_test_guard(); + + // Hold a real listener so the address is genuinely occupied. + let held = TcpListener::bind(("127.0.0.1", 0)).expect("hold a port"); + let bind: SocketAddr = held.local_addr().expect("addr"); + + let dir = tempfile::tempdir().expect("tempdir"); + let db = dir.path().join("loomweave.db"); + let readers = ReaderPool::open(&db, 4).expect("reader pool"); + let cfg = HttpReadConfig { + enabled: true, + bind: Some(bind), + ..HttpReadConfig::default() + }; + let iid = crate::instance::parse_instance_id_for_test("00000000-0000-4000-8000-0000000000a4") + .expect("iid"); + + let result = spawn(dir.path().to_path_buf(), db, readers, iid, &cfg); + assert!( + result.is_err(), + "an explicit in-use bind must fail, not silently fall back to :0" + ); + } +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `cargo nextest run -p loomweave-cli --lib http_read::tests::auto_port` +Expected: FAIL — no port is published yet (Task 2 binds the deterministic port but does not publish), and the auto/explicit fallback split is not implemented. + +- [ ] **Step 3: Add the RAII publish guard** + +Near the top of `http_read.rs` (after the imports, before `HttpReadServer`), add: + +```rust +/// Removes the published `.loomweave/ephemeral.port` on drop — covering +/// graceful shutdown, error return, and panic-unwind in one place. Only +/// SIGKILL can strand a stale file, which the read-side validation and the +/// ADR-034 instance-ID guard tolerate (a stale file degrades, never corrupts). +struct PublishedPortGuard { + project_root: PathBuf, +} + +impl Drop for PublishedPortGuard { + fn drop(&mut self) { + loomweave_federation::loomweave_port::remove_published_port(&self.project_root); + } +} +``` + +- [ ] **Step 4: Implement fallback + publish in `run_http_read_server`** + +`run_http_read_server` now needs `auto_port: bool` (added in Task 2) and a clone of `project_root` for publication. `project_root: PathBuf` is already a parameter and is moved into `AppState` later — capture the publish path *before* that move. + +Replace the bind block (currently a single `tokio::net::TcpListener::bind(bind)`) with auto-fallback, and add publication right after `local_addr` is known. Inside the `runtime.block_on(async move { ... })`: + +```rust + // ADR-044: auto-selected ports fall back to an OS-assigned ephemeral + // port if the deterministic port is taken; an explicit operator bind + // does NOT fall back (a taken explicit port is a hard error). + let listener = match tokio::net::TcpListener::bind(bind).await { + Ok(listener) => listener, + Err(err) if auto_port && err.kind() == std::io::ErrorKind::AddrInUse => { + let fallback = std::net::SocketAddr::from(([127, 0, 0, 1], 0)); + match tokio::net::TcpListener::bind(fallback).await { + Ok(listener) => listener, + Err(err) => { + let _ = ready_tx + .send(Err(anyhow!("bind HTTP read API ephemeral fallback: {err}"))); + return Err(anyhow!("bind HTTP read API ephemeral fallback: {err}")); + } + } + } + Err(err) => { + let _ = ready_tx.send(Err(anyhow!("bind HTTP read API on {bind}: {err}"))); + return Err(anyhow!("bind HTTP read API on {bind}: {err}")); + } + }; + let local_addr = match listener.local_addr() { + Ok(addr) => addr, + Err(err) => { + let _ = ready_tx.send(Err(anyhow!("read HTTP read API local addr: {err}"))); + return Err(anyhow!("read HTTP read API local addr: {err}")); + } + }; + // Publish the ACTUALLY-bound port loopback-only (ADR-044 file contract). + // A non-loopback bind publishes NO file — consumers fall back to their + // configured URL. The guard unlinks the file when this scope unwinds. + let _published_port_guard = if local_addr.ip().is_loopback() { + if let Err(err) = + loomweave_federation::loomweave_port::publish_port(&project_root, local_addr.port()) + { + // Publication is best-effort enrichment: a failure to write the + // discovery file must not take the read API down. + tracing::warn!( + error = %err, + port = local_addr.port(), + "failed to publish .loomweave/ephemeral.port; consumers will fall back to configured URL" + ); + None + } else { + Some(PublishedPortGuard { + project_root: project_root.clone(), + }) + } + } else { + None + }; + let _ = ready_tx.send(Ok(HttpReadReady { + local_addr, + readers_identity, + })); +``` + +Note the `_published_port_guard` binding lives for the rest of the `block_on` async scope (through `serve_future`), so it drops — and unlinks — exactly when serving ends (graceful, error, or panic). Delete the old `let _auto_port = auto_port;` placeholder line from Task 2 now that `auto_port` is consumed. + +- [ ] **Step 5: Run the producer tests** + +Run: `cargo nextest run -p loomweave-cli --lib http_read` +Expected: PASS — including the three new tests and all pre-existing ones. + +- [ ] **Step 6: Lint + commit** + +```bash +cargo fmt --all +cargo clippy --workspace --all-targets --all-features -- -D warnings +git add crates/loomweave-cli/src/http_read.rs +git commit -m "feat(serve): auto-select read-API port with ephemeral fallback; publish .loomweave/ephemeral.port (ADR-044)" +``` + +--- + +## Task 4: Installer stub stops pinning `9111` + +**Files:** +- Modify: `crates/loomweave-cli/src/install.rs` +- Modify: `crates/loomweave-cli/tests/install.rs` + +- [ ] **Step 1: Update the YAML stub** + +In `install.rs`, the `LOOMWEAVE_YAML_STUB` ends with: + +``` +serve: + mcp: + enable_write_tools: false + http: + enabled: false + bind: 127.0.0.1:9111 +"; +``` + +Replace the `http:` block (drop the `bind:` line, add an explanatory comment): + +``` +serve: + mcp: + enable_write_tools: false + http: + enabled: false + # The read-API port is auto-selected per project (deterministic, with an + # ephemeral fallback) and published to .loomweave/ephemeral.port while + # serving. Set `bind:` explicitly only to pin a fixed port (ADR-044). +"; +``` + +- [ ] **Step 2: Update the install test that asserts the stub bind** + +There is no dedicated stub test asserting `serve.http.bind` in `tests/install.rs` for the bare `install` path; the `9111` assertions are all in the `--all` bindings test (Task 5) and `doctor.rs` (Task 5/6). Confirm with: + +Run: `grep -n "9111\|http\"\]\[\"bind" crates/loomweave-cli/tests/install.rs` +If a bare-stub test asserts the bind, change it to assert the key is absent: +```rust + assert!(loomweave_yaml["serve"]["http"].get("bind").is_none()); +``` + +- [ ] **Step 3: Run + commit** + +```bash +cargo nextest run -p loomweave-cli --test install +cargo fmt --all +git add crates/loomweave-cli/src/install.rs crates/loomweave-cli/tests/install.rs +git commit -m "feat(install): YAML stub no longer pins serve.http.bind 9111 (ADR-044)" +``` + +(If Step 2 found nothing to change, commit `install.rs` alone.) + +--- + +## Task 5: `integration_bindings` writes the deterministic URL + +**Files:** +- Modify: `crates/loomweave-cli/src/integration_bindings.rs` +- Modify: `crates/loomweave-cli/tests/install.rs` +- Modify: `crates/loomweave-cli/tests/doctor.rs` + +`loomweave install --all` currently stamps `bind: 9111` into `loomweave.yaml` and `loomweave.url: http://127.0.0.1:9111` into `wardline.yaml` + `.mcp.json` — the real cross-project root cause. After this task: it stops writing a fixed `bind` (so auto-port + fallback engages), and writes the **deterministic** `loomweave.url` (the best static target until Wardline adopts consume-time resolution; the published file overrides it at runtime). + +- [ ] **Step 1: Compute the deterministic Loomweave URL per project** + +In `integration_bindings.rs`, delete the two fixed constants: + +```rust +const LOOMWEAVE_HTTP_BIND: &str = "127.0.0.1:9111"; +const LOOMWEAVE_HTTP_URL: &str = "http://127.0.0.1:9111"; +``` + +Add the deterministic URL to `DesiredBindings` and compute it in `desired_bindings`: + +```rust +struct DesiredBindings { + filigree_base_url: String, + wardline_filigree_url: String, + loomweave_url: String, +} +``` + +```rust +fn desired_bindings(project_root: &Path) -> DesiredBindings { + let filigree_base_url = live_filigree_base_url(project_root) + .or_else(|| configured_filigree_base_url(project_root)) + .unwrap_or_else(|| DEFAULT_FILIGREE_BASE_URL.to_owned()); + let wardline_filigree_url = format!( + "{}/api/weft/scan-results", + filigree_base_url.trim_end_matches('/') + ); + // ADR-044: seed the consumer's static target with this project's + // deterministic read-API port. serve binds the same port (barring an + // ephemeral fallback), and the published .loomweave/ephemeral.port file + // overrides this at runtime once a consumer resolves consume-time. + let port = loomweave_federation::loomweave_port::deterministic_port(project_root); + let loomweave_url = format!("http://127.0.0.1:{port}"); + DesiredBindings { + filigree_base_url, + wardline_filigree_url, + loomweave_url, + } +} +``` + +- [ ] **Step 2: Stop writing a fixed `bind` into `loomweave.yaml`** + +In `install_loomweave_yaml`, the `serve.http` block currently inserts `bind`. Remove that line: + +```rust + let serve = ensure_object(root, "serve")?; + let http = ensure_object(serve, "http")?; + http.insert("enabled".to_owned(), json!(true)); + http.insert("wardline_taint_write".to_owned(), json!(true)); + write_yaml_if_changed(&path, &value) +``` + +In `loomweave_yaml_ok`, drop the `bind` predicate from the `serve.http` check: + +```rust + && value + .get("serve") + .and_then(|serve| serve.get("http")) + .is_some_and(|http| { + http.get("enabled").and_then(Value::as_bool) == Some(true) + && http.get("wardline_taint_write").and_then(Value::as_bool) == Some(true) + })) +``` + +- [ ] **Step 3: Write the deterministic URL into `wardline.yaml` + `.mcp.json`** + +`install_wardline_yaml`: +```rust + loomweave.insert("url".to_owned(), json!(desired.loomweave_url)); +``` + +`wardline_yaml_ok`: +```rust + Ok(value + .get("loomweave") + .and_then(|loomweave| loomweave.get("url")) + .and_then(Value::as_str) + == Some(desired.loomweave_url.as_str()) + && value + .get("filigree") + .and_then(|filigree| filigree.get("url")) + .and_then(Value::as_str) + == Some(desired.wardline_filigree_url.as_str())) +``` + +`desired_wardline_args`: +```rust +fn desired_wardline_args(desired: &DesiredBindings) -> Value { + json!([ + "mcp", + "--root", + ".", + "--loomweave-url", + desired.loomweave_url, + "--filigree-url", + desired.wardline_filigree_url + ]) +} +``` + +- [ ] **Step 4: Update the `--all` bindings test** + +In `tests/install.rs`, `install_all_wires_three_way_integration_bindings`: the install canonicalizes `--path`, so compute the expected URL the same way. Replace the `bind`/`loomweave-url` assertions: + +```rust + // ADR-044: no fixed bind is written; the port is auto-selected at serve time. + assert!(loomweave_yaml["serve"]["http"].get("bind").is_none()); + assert_eq!( + loomweave_yaml["serve"]["http"]["wardline_taint_write"], + serde_json::json!(true) + ); + + let expected_port = loomweave_federation::loomweave_port::deterministic_port( + &dir.path().canonicalize().unwrap(), + ); + let expected_loomweave_url = format!("http://127.0.0.1:{expected_port}"); + + let wardline_yaml = read_yaml(&dir.path().join("wardline.yaml")); + assert_eq!(wardline_yaml["loomweave"]["url"], expected_loomweave_url); + assert_eq!( + wardline_yaml["filigree"]["url"], + "http://127.0.0.1:8749/api/weft/scan-results" + ); + + let mcp: serde_json::Value = + serde_json::from_str(&fs::read_to_string(dir.path().join(".mcp.json")).unwrap()).unwrap(); + assert_eq!( + mcp["mcpServers"]["wardline"]["args"], + serde_json::json!([ + "mcp", + "--root", + ".", + "--loomweave-url", + expected_loomweave_url, + "--filigree-url", + "http://127.0.0.1:8749/api/weft/scan-results" + ]) + ); +``` + +Confirm `tests/install.rs` can reach the helper: `loomweave-cli` depends on `loomweave-federation`, so `loomweave_federation::loomweave_port::deterministic_port` is in scope from an integration test. If the import path errors, add `use loomweave_federation::loomweave_port::deterministic_port;` and call it unqualified. + +- [ ] **Step 5: Update the `doctor.rs` bindings-repair test** + +In `tests/doctor.rs`, the repair test (around line 198–227) asserts `--loomweave-url http://127.0.0.1:9111`. Replace with the computed URL (the doctor test also operates on a tempdir; check whether it canonicalizes — match whatever the repaired files actually contain by computing from the same path the repair used): + +```rust + let expected_port = loomweave_federation::loomweave_port::deterministic_port( + &dir.path().canonicalize().unwrap(), + ); + let expected_loomweave_url = format!("http://127.0.0.1:{expected_port}"); + // ... + assert_eq!( + mcp["mcpServers"]["wardline"]["args"], + serde_json::json!([ + "mcp", + "--root", + ".", + "--loomweave-url", + expected_loomweave_url, + "--filigree-url", + "http://127.0.0.1:8749/api/weft/scan-results" + ]) + ); +``` + +If the doctor test reads `loomweave.yaml["serve"]["http"]["bind"]` anywhere, change that to `.get("bind").is_none()`. + +- [ ] **Step 6: Run + commit** + +```bash +cargo nextest run -p loomweave-cli --test install --test doctor +cargo nextest run -p loomweave-cli --lib integration_bindings +cargo fmt --all +cargo clippy --workspace --all-targets --all-features -- -D warnings +git add crates/loomweave-cli/src/integration_bindings.rs crates/loomweave-cli/tests/install.rs crates/loomweave-cli/tests/doctor.rs +git commit -m "feat(install): integration bindings use per-project deterministic loomweave URL, no fixed bind (ADR-044)" +``` + +--- + +## Task 6 (CUTTABLE): `resolve_loomweave_url` + its one caller (doctor) + +**Files:** +- Create: `crates/loomweave-federation/src/loomweave_url.rs` +- Modify: `crates/loomweave-federation/src/lib.rs` +- Modify: `crates/loomweave-cli/src/doctor.rs` + +The resolver is the reference reader of the file contract (the shape Wardline's Python twin mirrors), and `doctor`'s HTTP check is its one in-tree caller — so it ships *with* a caller, not as dead code. **This task is cuttable**: if it slips, the collision is already fixed by Tasks 1–5; defer resolver+caller as a unit. + +- [ ] **Step 1: Write the failing resolver tests** + +Create `crates/loomweave-federation/src/loomweave_url.rs`: + +```rust +//! Resolve the live Loomweave read-API base URL (ADR-044). +//! +//! The reference reader of the `.loomweave/ephemeral.port` file contract and +//! the twin of [`crate::filigree_url`]. Precedence (consumer-side): the +//! published live port wins over a configured URL, which wins over nothing. +//! (ADR-044's higher "explicit flag/env" precedence level is realized by each +//! consumer's own CLI/env handling — e.g. Wardline's `--loomweave-url` — not by +//! this library function.) Fail-soft throughout: a missing/corrupt file folds +//! to the configured URL; absent both, `None` (federation simply degrades). + +use std::path::Path; + +use crate::loomweave_port::read_published_port; + +/// The live published port file `.loomweave/ephemeral.port`. +pub const SOURCE_EPHEMERAL_PORT: &str = ".loomweave/ephemeral.port"; +/// A statically configured URL (e.g. `wardline.yaml: loomweave.url`). +pub const SOURCE_CONFIG: &str = "config"; +/// Neither a published file nor a configured URL — federation is absent. +pub const SOURCE_NONE: &str = "none"; + +/// Where a resolved Loomweave read-API URL came from. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct LoomweaveUrlResolution { + /// The URL a consumer should call, or `None` when nothing resolves. + pub resolved_url: Option, + /// One of the `SOURCE_*` labels. + pub source: &'static str, +} + +/// Resolve the read-API URL, preferring the live published port over the +/// configured URL. `configured_url` is the consumer's static fallback (pass +/// `None` if it has none). +#[must_use] +pub fn resolve_loomweave_url( + configured_url: Option<&str>, + project_root: &Path, +) -> LoomweaveUrlResolution { + if let Some(port) = read_published_port(project_root) { + return LoomweaveUrlResolution { + resolved_url: Some(format!("http://127.0.0.1:{port}")), + source: SOURCE_EPHEMERAL_PORT, + }; + } + match configured_url { + Some(url) if !url.trim().is_empty() => LoomweaveUrlResolution { + resolved_url: Some(url.to_owned()), + source: SOURCE_CONFIG, + }, + _ => LoomweaveUrlResolution { + resolved_url: None, + source: SOURCE_NONE, + }, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::loomweave_port::publish_port; + + #[test] + fn published_port_beats_configured_url() { + let dir = tempfile::tempdir().unwrap(); + publish_port(dir.path(), 9412).unwrap(); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); + assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:9412")); + assert_eq!(res.source, SOURCE_EPHEMERAL_PORT); + } + + #[test] + fn falls_back_to_configured_url_when_no_file() { + let dir = tempfile::tempdir().unwrap(); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); + assert_eq!(res.resolved_url.as_deref(), Some("http://127.0.0.1:9111")); + assert_eq!(res.source, SOURCE_CONFIG); + } + + #[test] + fn corrupt_file_folds_to_configured_url() { + let dir = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(dir.path().join(".loomweave")).unwrap(); + std::fs::write( + dir.path().join(".loomweave").join("ephemeral.port"), + "not-a-port", + ) + .unwrap(); + let res = resolve_loomweave_url(Some("http://127.0.0.1:9111"), dir.path()); + assert_eq!(res.source, SOURCE_CONFIG); + } + + #[test] + fn nothing_resolves_to_none() { + let dir = tempfile::tempdir().unwrap(); + let res = resolve_loomweave_url(None, dir.path()); + assert_eq!(res.resolved_url, None); + assert_eq!(res.source, SOURCE_NONE); + } +} +``` + +Declare it in `lib.rs`: +```rust +pub mod loomweave_url; +``` + +- [ ] **Step 2: Run to verify failure, then pass** + +Run: `cargo nextest run -p loomweave-federation loomweave_url` +Expected: FAIL (module not yet declared / functions absent) → after Step 1 is fully in place, PASS (4 tests). + +- [ ] **Step 3: Write the failing doctor test** + +In `crates/loomweave-cli/tests/doctor.rs`, add a test that a serving project's published port shows up in the HTTP check. Since spawning a real server in the doctor integration test is heavy, instead test the file-present branch by writing the file directly, then run `doctor_json` and assert the `http.config` check reports the published port: + +```rust +#[test] +fn doctor_reports_published_ephemeral_port() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + // Simulate a live serve having published its port. + let loomweave_dir = dir.path().join(".loomweave"); + std::fs::create_dir_all(&loomweave_dir).unwrap(); + std::fs::write(loomweave_dir.join("ephemeral.port"), "9876\n").unwrap(); + + let (code, json) = doctor_json(dir.path(), false); + assert_eq!(code, 0, "{json}"); + let http = json["checks"] + .as_array() + .unwrap() + .iter() + .find(|c| c["id"] == "http.config") + .expect("http.config check present"); + assert_eq!(http["status"], "ok"); + assert!( + http["message"].as_str().unwrap_or("").contains("9876"), + "http.config should report the published live port: {http}" + ); +} +``` + +(`DoctorJsonCheck` serializes its human text as the `message` field — confirmed in `doctor.rs:105-110`.) + +- [ ] **Step 4: Run to verify failure** + +Run: `cargo nextest run -p loomweave-cli --test doctor doctor_reports_published` +Expected: FAIL — `check_http_config_json` does not read the published file yet. + +- [ ] **Step 5: Wire the resolver into `check_http_config_json`** + +Replace `check_http_config_json` in `doctor.rs`: + +```rust +fn check_http_config_json(project_root: &Path) -> DoctorJsonCheck { + let Some(config) = read_loomweave_yaml(project_root) else { + return DoctorJsonCheck::warning("http.config", "loomweave.yaml is absent or unparseable"); + }; + let enabled = config + .get("serve") + .and_then(|serve| serve.get("http")) + .and_then(|http| http.get("enabled")) + .and_then(Value::as_bool) + == Some(true); + if !enabled { + return DoctorJsonCheck::warning("http.config", "HTTP serve config is disabled or incomplete"); + } + // ADR-044: prefer the live published port over the (now usually absent) + // static bind. A running serve publishes .loomweave/ephemeral.port. + let resolution = + loomweave_federation::loomweave_url::resolve_loomweave_url(None, project_root); + if let Some(url) = resolution.resolved_url { + return DoctorJsonCheck::ok( + "http.config", + format!("HTTP read API published on {url} ({})", resolution.source), + ); + } + let bind = config + .get("serve") + .and_then(|serve| serve.get("http")) + .and_then(|http| http.get("bind")) + .and_then(Value::as_str) + .unwrap_or(""); + if bind.trim().is_empty() { + DoctorJsonCheck::ok( + "http.config", + "HTTP enabled; read-API port auto-selected and published to .loomweave/ephemeral.port while serving", + ) + } else { + DoctorJsonCheck::ok("http.config", format!("HTTP configured on {bind} (auto-published while serving)")) + } +} +``` + +- [ ] **Step 6: Run resolver + doctor suites** + +Run: +```bash +cargo nextest run -p loomweave-federation loomweave_url +cargo nextest run -p loomweave-cli --test doctor +``` +Expected: PASS. + +- [ ] **Step 7: Lint + commit** + +```bash +cargo fmt --all +cargo clippy --workspace --all-targets --all-features -- -D warnings +git add crates/loomweave-federation/src/loomweave_url.rs crates/loomweave-federation/src/lib.rs crates/loomweave-cli/src/doctor.rs crates/loomweave-cli/tests/doctor.rs +git commit -m "feat(doctor): resolve_loomweave_url + doctor reports live published read-API port (ADR-044)" +``` + +--- + +## Task 7: Docs, ADR acceptance, stopgap revert + +**Files:** +- Modify: `docs/operator/loomweave-http-read-api.md`, `docs/operator/secret-scanning.md`, `docs/federation/contracts.md` +- Modify: `loomweave.yaml`, `wardline.yaml` (repo root) +- Modify: `docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md`, `docs/loomweave/adr/README.md`, `docs/suite/glossary.md` + +- [ ] **Step 1: Update operator docs** + +In each of `docs/operator/loomweave-http-read-api.md`, `docs/operator/secret-scanning.md`, `docs/federation/contracts.md`, replace the `bind: 127.0.0.1:9111` / `default: 127.0.0.1:9111` references with the auto-port description. Read each hit (from `grep -n 9111 `) and rewrite in context, e.g.: + +> The read-API port is auto-selected per project — a deterministic port in Loomweave's band (`9400–10399`, disjoint from Filigree's `8400–9399`) with an ephemeral fallback — and published to `.loomweave/ephemeral.port` while `serve` runs. Set `serve.http.bind` explicitly only to pin a fixed port. (ADR-044) + +Leave `docs/loomweave/adr/ADR-044-*.md`'s own `9111` references (they describe the *problem*) and `docs/archive/**` (archived, non-normative) and the Filigree-side `docs/federation/filigree-side/ADR-014-*.md` (a Filigree example) unchanged. + +- [ ] **Step 2: Revert the local stopgaps** + +`loomweave.yaml` (repo root) currently has `serve.http.bind: 127.0.0.1:9112`. Remove the `bind:` line so this very project uses auto-port: + +```yaml +serve: + http: + enabled: true + wardline_taint_write: true +``` + +For `wardline.yaml`, make a conscious choice (advisor item 1): pin it to *this* project's deterministic port so local Wardline→Loomweave federation keeps working until the Wardline Python twin lands. Compute it: + +```bash +cargo run -p loomweave-cli -- doctor --json /home/john/loomweave 2>/dev/null | grep -o '127.0.0.1:[0-9]*' | head -1 +``` +…or simpler, add a throwaway `#[test]` that prints `deterministic_port(Path::new("/home/john/loomweave"))`, or compute via a one-off `cargo run`. Then set: + +```yaml +loomweave: + # ADR-044: pinned to this project's deterministic read-API port. The published + # .loomweave/ephemeral.port overrides this once Wardline resolves consume-time + # (clarion-7f574bc34f follow-up). Until then this static target keeps local + # wardline -> loomweave federation working. + url: http://127.0.0.1: +``` + +Verify by starting serve and confirming the published file matches: +```bash +# In one shell: +cargo run -p loomweave-cli -- serve /home/john/loomweave & +sleep 2 +cat /home/john/loomweave/.loomweave/ephemeral.port # should equal +kill %1 +``` + +- [ ] **Step 3: Glossary verdict (acceptance gate)** + +`docs/loomweave/adr/README.md` requires a `glossary.md` verdict before an ADR moves Proposed→Accepted for any cross-product-visible term. `.loomweave/ephemeral.port` mirrors Filigree's `.filigree/ephemeral.port` — a **managed clash** (shared convention, distinct per-product paths). Read `docs/suite/glossary.md`, find the `ephemeral.port` / Filigree entry, and add a Loomweave row recording the managed-clash verdict and the mapping (`.filigree/ephemeral.port` ↔ `.loomweave/ephemeral.port`, identical format, loopback-only). If no such entry exists, add one under the federation-terms section. + +- [ ] **Step 4: Flip ADR-044 to Accepted** + +In `ADR-044-*.md`, change `**Status**: Proposed` → `**Status**: Accepted` and add a one-line acceptance note referencing the glossary verdict and the implementing commits. In `README.md`, change the ADR-044 row's trailing `| Proposed |` → `| Accepted |`. + +- [ ] **Step 5: Full CI floor** + +Run the complete gate (CLAUDE.md): +```bash +cargo fmt --all -- --check +cargo clippy --workspace --all-targets --all-features -- -D warnings +cargo build --workspace --bins +cargo nextest run --workspace --all-features +RUSTDOCFLAGS="-D warnings" cargo doc --workspace --no-deps --all-features +cargo deny check +``` +Expected: all green. + +- [ ] **Step 6: Wardline boundary gate** + +This feature reads external input (the port file, config files). Run: +```bash +wardline scan . --fail-on ERROR +``` +Expected: exit 0. If it trips, fix at the boundary (the `read_published_port` parse is already validated/fail-soft; address any new finding). + +- [ ] **Step 7: Commit + close the issue** + +```bash +git add docs/operator docs/federation/contracts.md docs/loomweave/adr docs/suite/glossary.md loomweave.yaml wardline.yaml +git commit -m "docs(adr): accept ADR-044; auto-port docs, glossary verdict, revert 9112 stopgap" +``` + +Close `clarion-7f574bc34f` with a summary comment (CLI): `filigree close clarion-7f574bc34f --actor opus`. + +--- + +## Self-Review + +**Spec coverage (ADR-044 §Decision + §Verification):** +- Decision 1 (deterministic port + ephemeral fallback) → Task 1 (`deterministic_port`) + Task 3 (fallback). +- Decision 2 (publish `.loomweave/ephemeral.port` per file contract) → Task 1 (`publish_port`, atomic, port-only, trailing `\n`) + Task 3 (loopback-only, lifecycle via RAII). +- Decision 3 (loomweave-side resolver, one of conforming readers) → Task 6 (`resolve_loomweave_url` + doctor caller). +- Decision 4 (installer stops pinning a port; explicit override honored) → Task 4 (stub) + Task 5 (bindings) + Task 2 (`Some` honored, `None` auto). +- Verification: distinct ports/no bind failure → T3 `auto_port_publishes_distinct_ports_per_project`; collision→ephemeral fallback reflects actual port → T3 (fallback path) + T1; file contract (bare port, temp+rename, no file on non-loopback) → T1 + T3 publish branch; precedence (file>config>none) → T6; fail-soft (malformed/out-of-range/refused) → T1 read tests + T6 corrupt test; removed on clean shutdown → T3 `auto_port_file_removed_on_clean_shutdown`; wardline scan against non-9111 serve → realized by Task 5 deterministic URL + Task 7 local verify. +- Resolved-but-refused (closed port) softness: covered behaviorally — `resolve_loomweave_url` returns the URL; the *connection attempt* is the consumer's (Wardline's) responsibility, and the ADR-034 instance-ID guard backstops a stale file. No in-tree consumer connects, so no Rust test asserts refusal; noted, not silently dropped. + +**Placeholder scan:** every code step shows complete code. Two reads-to-confirm remain (doctor `DoctorJsonCheck` serialized field name in T6 Step 3; any bare-stub bind assertion in T4 Step 2) — both are explicit "read X, match the real name" instructions with the fallback spelled out, not hidden TODOs. + +**Type consistency:** `deterministic_port(&Path) -> u16`, `read_published_port(&Path) -> Option`, `publish_port(&Path, u16) -> io::Result<()>`, `remove_published_port(&Path)`, `published_port_path(&Path) -> PathBuf`, `resolve_loomweave_url(Option<&str>, &Path) -> LoomweaveUrlResolution` are used identically across Tasks 1, 3, 5, 6. `HttpReadConfig.bind: Option` is consistent across Tasks 2–3 and all test sites. `auto_port: bool` is added in Task 2 and consumed in Task 3 (placeholder `_auto_port` removed there). From c7f253000b751ada41502f4c6f1733040911d3a8 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:45:01 +1000 Subject: [PATCH 17/27] fix(adr-044): gitignore ephemeral.port template, wire project_status read-API report, reconcile ADR precedence - install.rs GITIGNORE_CONTENTS now ignores .loomweave/ephemeral.port so freshly-installed projects do not show the runtime port file as untracked while serving; install test asserts the new rule. - project_status_get reports loomweave_read_api (resolved_url + resolution_source) via a query-time resolve_loomweave_url(None, project_root), the second in-repo consumer named by ADR-044 alongside doctor. Additive field; existing project_status tests unaffected. Two new tests cover published-port and no-file ("none") cases. - ADR-044: clarify precedence level 1 is an operator's deliberately-supplied target (typed flag/env), while an installer-seeded --loomweave-url in .mcp.json is config-tier (precedence 3) so the published file self-heals it; added a Related follow-up bullet for Wardline (clarion-7f574bc34f). Co-Authored-By: Claude Opus 4.8 --- crates/loomweave-cli/src/install.rs | 7 +++- crates/loomweave-cli/tests/install.rs | 1 + crates/loomweave-mcp/src/tools/status.rs | 14 +++++++ crates/loomweave-mcp/tests/storage_tools.rs | 38 +++++++++++++++++++ ...044-read-api-ephemeral-port-publication.md | 17 +++++++-- 5 files changed, 73 insertions(+), 4 deletions(-) diff --git a/crates/loomweave-cli/src/install.rs b/crates/loomweave-cli/src/install.rs index 626654c6..a690b229 100644 --- a/crates/loomweave-cli/src/install.rs +++ b/crates/loomweave-cli/src/install.rs @@ -83,7 +83,12 @@ serve: const GITIGNORE_CONTENTS: &str = "\ # Loomweave .gitignore — ADR-005 tracked-vs-excluded list. # Tracked (committed): loomweave.db, config.json, .gitignore itself. -# Excluded (ignored): WAL sidecars, shadow DB, per-run logs, tmp scratch. +# Excluded (ignored): WAL sidecars, shadow DB, per-run logs, tmp scratch, +# the read-API live port discovery file. + +# Read-API live port discovery file (ADR-044): present only while serve runs, +# rewritten per bind, loopback-only — a runtime artifact, never committed. +ephemeral.port # SQLite write-ahead files never belong in the repo. *-wal diff --git a/crates/loomweave-cli/tests/install.rs b/crates/loomweave-cli/tests/install.rs index 3a73c3e3..66bdc47b 100644 --- a/crates/loomweave-cli/tests/install.rs +++ b/crates/loomweave-cli/tests/install.rs @@ -58,6 +58,7 @@ fn install_creates_loomweave_dir_with_expected_contents() { "runs/*/log.jsonl", "*-wal", "*-shm", + "ephemeral.port", ] { assert!( gitignore.contains(rule), diff --git a/crates/loomweave-mcp/src/tools/status.rs b/crates/loomweave-mcp/src/tools/status.rs index abeec78b..5df6669f 100644 --- a/crates/loomweave-mcp/src/tools/status.rs +++ b/crates/loomweave-mcp/src/tools/status.rs @@ -283,6 +283,7 @@ impl ServerState { }, "llm": self.llm_diagnostics_json(), "filigree": self.filigree_diagnostics_json(), + "loomweave_read_api": self.loomweave_read_api_json(), }); Ok(success_envelope(result)) @@ -354,6 +355,19 @@ impl ServerState { } } + /// ADR-044: report the live read-API endpoint resolved from + /// `.loomweave/ephemeral.port` (the reference reader; `doctor` reports the + /// same). Pass `None` config — `project_status` has no static loomweave URL + /// of its own; this surfaces whether serve is currently publishing. + pub(crate) fn loomweave_read_api_json(&self) -> Value { + let resolution = + loomweave_federation::loomweave_url::resolve_loomweave_url(None, &self.project_root); + json!({ + "resolved_url": resolution.resolved_url, + "resolution_source": resolution.source, + }) + } + pub(crate) async fn read_issues_for_entities( &self, entity_id: String, diff --git a/crates/loomweave-mcp/tests/storage_tools.rs b/crates/loomweave-mcp/tests/storage_tools.rs index 5eb3dda2..f9040659 100644 --- a/crates/loomweave-mcp/tests/storage_tools.rs +++ b/crates/loomweave-mcp/tests/storage_tools.rs @@ -11,6 +11,13 @@ use loomweave_core::{ LlmPurpose, LlmRequest, LlmResponse, OpenRouterProvider, OpenRouterProviderConfig, Recording, RecordingProvider, build_inferred_calls_prompt, build_leaf_summary_prompt, }; +use loomweave_federation::{ + loomweave_port::publish_port, + loomweave_url::{ + SOURCE_EPHEMERAL_PORT as LOOMWEAVE_SOURCE_EPHEMERAL_PORT, + SOURCE_NONE as LOOMWEAVE_SOURCE_NONE, + }, +}; use loomweave_mcp::{ DiagnosticsContext, LlmDiagnostics, McpToolPolicy, ServerState, config::{FiligreeConfig, LlmConfig, LlmProviderKind}, @@ -4982,6 +4989,37 @@ async fn project_status_filigree_falls_back_to_config_without_port_file() { assert_eq!(envelope["result"]["llm"]["live"], true); } +#[tokio::test] +async fn project_status_reports_loomweave_read_api_published_port() { + // ADR-044: project_status surfaces the live read-API endpoint resolved from + // .loomweave/ephemeral.port (the second in-repo consumer of the resolver, + // alongside doctor). No diagnostics context is needed — it resolves the + // file at query time from the project root. + let (project, db_path) = open_project(); + publish_port(project.path(), 9412).unwrap(); + + let state = state_for(project.path(), &db_path); + let envelope = call_tool(&state, "project_status", json!({})).await; + let read_api = &envelope["result"]["loomweave_read_api"]; + assert_eq!(read_api["resolved_url"], "http://127.0.0.1:9412"); + assert_eq!( + read_api["resolution_source"], + LOOMWEAVE_SOURCE_EPHEMERAL_PORT + ); +} + +#[tokio::test] +async fn project_status_loomweave_read_api_none_without_port_file() { + // No published port file → resolution_source is "none" and resolved_url is + // null (project_status has no static loomweave URL of its own). + let (project, db_path) = open_project(); + let state = state_for(project.path(), &db_path); + let envelope = call_tool(&state, "project_status", json!({})).await; + let read_api = &envelope["result"]["loomweave_read_api"]; + assert_eq!(read_api["resolved_url"], Value::Null); + assert_eq!(read_api["resolution_source"], LOOMWEAVE_SOURCE_NONE); +} + // --------------------------------------------------------------------------- // Wardline Flow B helpers and tests // --------------------------------------------------------------------------- diff --git a/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md index 9af2ded1..bf635329 100644 --- a/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md +++ b/docs/loomweave/adr/ADR-044-read-api-ephemeral-port-publication.md @@ -134,9 +134,15 @@ time today, is the cautionary case (see related follow-up). **Precedence (highest wins):** -1. An **explicit, deliberate target** — `--loomweave-url` flag or environment - override — always wins. The published port must never override a target the - operator set on purpose (remote loomweave, debugging a specific instance). +1. An **explicit, deliberate target** — a `--loomweave-url` flag the operator + *types* or an environment override they set — always wins. The published port + must never override a target the operator chose on purpose (remote loomweave, + debugging a specific instance). Provenance, not flag spelling, is what makes a + value level 1: an **installer-seeded `--loomweave-url` baked into `.mcp.json`** + (e.g. the deterministic URL `loomweave install` stamps into Wardline's MCP + args) is **not** an operator's deliberate choice — it is config-tier + (precedence 3), so the published file overrides it and self-heals when an + ephemeral fallback fired. 2. The **published port file** `.loomweave/ephemeral.port` (composed to `http://127.0.0.1:`). This **beats a stale/default configured URL** so resolution self-heals without a config edit. @@ -202,3 +208,8 @@ static config URL at scan time, so its filigree leg carries the same latent staleness this ADR removes for the loomweave leg. Unifying both consumers on consume-time resolution is Wardline-side work, tracked separately; flagged here so the two legs are not designed divergently. + +- Wardline should treat install-seeded MCP args (the `--loomweave-url` baked into + `.mcp.json` by `loomweave install`) as config-tier and resolve consume-time, so + the published `.loomweave/ephemeral.port` file wins over the baked deterministic + URL when an ephemeral fallback fired. Tracked clarion-7f574bc34f. From b3f58a6ae72ebe8ee043b694227e638074e3f973 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 10:26:43 +1000 Subject: [PATCH 18/27] chore(release): bump to 1.1.0rc1 (Cargo 1.1.0-rc1 / PEP440 1.1.0rc1) Workspace + Python plugin in lockstep. Cross-ecosystem version normalization in check-workspace-version-lockstep.py (SemVer prerelease 1.1.0-rc1 == PEP 440 1.1.0rc1). CHANGELOG: ADR-044 ephemeral-port deconfliction + no-index MCP chirp. No package published for release candidates. Co-Authored-By: Claude Opus 4.8 --- CHANGELOG.md | 36 ++++++++++++++ Cargo.lock | 16 +++---- Cargo.toml | 2 +- crates/loomweave-cli/Cargo.toml | 14 +++--- crates/loomweave-cli/pyproject.toml | 4 +- crates/loomweave-federation/Cargo.toml | 2 +- crates/loomweave-mcp/Cargo.toml | 6 +-- crates/loomweave-plugin-fixture/Cargo.toml | 2 +- crates/loomweave-storage/Cargo.toml | 2 +- plugins/python/plugin.toml | 2 +- plugins/python/pyproject.toml | 2 +- .../src/loomweave_plugin_python/__init__.py | 2 +- plugins/python/tests/test_package.py | 4 +- plugins/python/tests/test_server.py | 2 +- plugins/python/uv.lock | 2 +- scripts/check-workspace-version-lockstep.py | 47 +++++++++++++++---- 16 files changed, 105 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e5ec4e4..f69bd967 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,42 @@ only when an incompatible change is made to that surface. See ## [Unreleased] +## [1.1.0rc1] — 2026-06-06 + +First 1.1 release candidate. No package is published for release candidates — +the `1.1.0` package ships only at the final tag. (Cargo SemVer `1.1.0-rc1`; +the Python wheels normalise to PEP 440 `1.1.0rc1`.) + +### Added + +- **Read-API ephemeral port publication (ADR-044).** `loomweave serve` binds a + per-project **deterministic** read-API port (blake3 over the canonical project + path, band `9400–10399`, disjoint from Filigree's `8400–9399`) with an + OS-assigned **ephemeral fallback** when that port is taken, and publishes the + *actually bound* port to `.loomweave/ephemeral.port` — a normative cross-product + file contract (port-only ASCII + optional trailing newline, atomic temp+rename, + **loopback-only**, removed on clean shutdown). This resolves the cross-project + `127.0.0.1:9111` bind collision so multiple projects can `serve` concurrently + without mis-targeting one another. New consume-time resolver + `resolve_loomweave_url` (precedence: explicit target > published file > + configured URL > none) is the reference reader; `doctor` and + `project_status_get` report the live published endpoint. The published file is + git-ignored. +- **No-index degraded MCP mode.** `serve` on a project with no index no longer + exits 1 — it serves a degraded MCP stdio session that answers `initialize` and + chirps to run `loomweave install` + `loomweave analyze` from every tool call, + so the MCP client connects and is told how to recover. + +### Changed + +- **`serve.http.bind` is now optional** (`Option`). Unset — the new + default — auto-selects and publishes the per-project deterministic port; an + explicit value is honoured verbatim (no fallback). The installer no longer + stamps `serve.http.bind: 127.0.0.1:9111`, the integration bindings write the + per-project deterministic loomweave URL, and `install`/`doctor --fix` self-heal + the stale hard-coded `9111` stamp on existing projects. +- Version bumped to `1.1.0rc1` across the Rust workspace and the Python plugin. + ## [1.0.0] — Loomweave — 2026-06-05 **This release renames the product and re-baselines its version.** What shipped diff --git a/Cargo.lock b/Cargo.lock index c6cb083e..39a16ebc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1057,7 +1057,7 @@ checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" [[package]] name = "loomweave-analysis" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "anyhow", "serde", @@ -1067,7 +1067,7 @@ dependencies = [ [[package]] name = "loomweave-cli" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "anyhow", "assert_cmd", @@ -1106,7 +1106,7 @@ dependencies = [ [[package]] name = "loomweave-core" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "async-trait", "nix", @@ -1123,7 +1123,7 @@ dependencies = [ [[package]] name = "loomweave-federation" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "blake3", "loomweave-core", @@ -1137,7 +1137,7 @@ dependencies = [ [[package]] name = "loomweave-mcp" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "async-trait", "blake3", @@ -1160,7 +1160,7 @@ dependencies = [ [[package]] name = "loomweave-plugin-fixture" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "loomweave-core", "nix", @@ -1169,7 +1169,7 @@ dependencies = [ [[package]] name = "loomweave-scanner" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "regex", "serde", @@ -1181,7 +1181,7 @@ dependencies = [ [[package]] name = "loomweave-storage" -version = "1.0.0" +version = "1.1.0-rc1" dependencies = [ "blake3", "deadpool-sqlite", diff --git a/Cargo.toml b/Cargo.toml index ed0e35e8..de77d4aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ members = [ ] [workspace.package] -version = "1.0.0" +version = "1.1.0-rc1" edition = "2024" license = "MIT" repository = "https://github.com/foundryside-dev/loomweave" diff --git a/crates/loomweave-cli/Cargo.toml b/crates/loomweave-cli/Cargo.toml index edd5d37b..9343d850 100644 --- a/crates/loomweave-cli/Cargo.toml +++ b/crates/loomweave-cli/Cargo.toml @@ -18,12 +18,12 @@ anyhow.workspace = true axum.workspace = true blake3.workspace = true clap.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.0.0" } -loomweave-analysis = { path = "../loomweave-analysis", version = "1.0.0" } -loomweave-federation = { path = "../loomweave-federation", version = "1.0.0" } -loomweave-mcp = { path = "../loomweave-mcp", version = "1.0.0" } -loomweave-scanner = { path = "../loomweave-scanner", version = "1.0.0" } -loomweave-storage = { path = "../loomweave-storage", version = "1.0.0" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } +loomweave-analysis = { path = "../loomweave-analysis", version = "1.1.0-rc1" } +loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc1" } +loomweave-mcp = { path = "../loomweave-mcp", version = "1.1.0-rc1" } +loomweave-scanner = { path = "../loomweave-scanner", version = "1.1.0-rc1" } +loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc1" } dotenvy.workspace = true fs2.workspace = true hmac.workspace = true @@ -46,7 +46,7 @@ uuid.workspace = true [dev-dependencies] assert_cmd.workspace = true -loomweave-plugin-fixture = { path = "../loomweave-plugin-fixture", version = "1.0.0" } +loomweave-plugin-fixture = { path = "../loomweave-plugin-fixture", version = "1.1.0-rc1" } rusqlite.workspace = true serde_json.workspace = true sha1.workspace = true diff --git a/crates/loomweave-cli/pyproject.toml b/crates/loomweave-cli/pyproject.toml index 9df6faef..72e23181 100644 --- a/crates/loomweave-cli/pyproject.toml +++ b/crates/loomweave-cli/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "loomweave" -version = "1.0.0" +version = "1.1.0rc1" description = "Loomweave — graph-aware code archaeology (Rust core)" readme = "../../README.md" requires-python = ">=3.11" @@ -15,7 +15,7 @@ classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: 3", ] -dependencies = ["loomweave-plugin-python==1.0.0"] +dependencies = ["loomweave-plugin-python==1.1.0rc1"] [project.urls] Repository = "https://github.com/foundryside-dev/loomweave" diff --git a/crates/loomweave-federation/Cargo.toml b/crates/loomweave-federation/Cargo.toml index 2ca76605..335406e1 100644 --- a/crates/loomweave-federation/Cargo.toml +++ b/crates/loomweave-federation/Cargo.toml @@ -11,7 +11,7 @@ workspace = true [dependencies] blake3.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.0.0" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } reqwest.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/loomweave-mcp/Cargo.toml b/crates/loomweave-mcp/Cargo.toml index 457cedcd..684fd2ea 100644 --- a/crates/loomweave-mcp/Cargo.toml +++ b/crates/loomweave-mcp/Cargo.toml @@ -12,9 +12,9 @@ workspace = true [dependencies] async-trait.workspace = true blake3.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.0.0" } -loomweave-federation = { path = "../loomweave-federation", version = "1.0.0" } -loomweave-storage = { path = "../loomweave-storage", version = "1.0.0" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } +loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc1" } +loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc1" } reqwest.workspace = true rusqlite.workspace = true serde.workspace = true diff --git a/crates/loomweave-plugin-fixture/Cargo.toml b/crates/loomweave-plugin-fixture/Cargo.toml index 6d73fbeb..35f824d4 100644 --- a/crates/loomweave-plugin-fixture/Cargo.toml +++ b/crates/loomweave-plugin-fixture/Cargo.toml @@ -23,7 +23,7 @@ name = "loomweave-fixture-plugin" path = "src/main.rs" [dependencies] -loomweave-core = { path = "../loomweave-core", version = "1.0.0" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } serde_json.workspace = true [target.'cfg(unix)'.dependencies] diff --git a/crates/loomweave-storage/Cargo.toml b/crates/loomweave-storage/Cargo.toml index a358b6ef..726a0007 100644 --- a/crates/loomweave-storage/Cargo.toml +++ b/crates/loomweave-storage/Cargo.toml @@ -11,7 +11,7 @@ workspace = true [dependencies] blake3.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.0.0" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } deadpool-sqlite.workspace = true rusqlite.workspace = true serde.workspace = true diff --git a/plugins/python/plugin.toml b/plugins/python/plugin.toml index 51638a28..6e159265 100644 --- a/plugins/python/plugin.toml +++ b/plugins/python/plugin.toml @@ -1,7 +1,7 @@ [plugin] name = "loomweave-plugin-python" plugin_id = "python" -version = "1.0.0" +version = "1.1.0rc1" protocol_version = "1.0" # Bare basename per ADR-021 §Layer 1 + WP2 scrub commit eb0a41d — the host # refuses manifests whose `executable` carries any path component. diff --git a/plugins/python/pyproject.toml b/plugins/python/pyproject.toml index 18cee269..e26fb75d 100644 --- a/plugins/python/pyproject.toml +++ b/plugins/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "loomweave-plugin-python" -version = "1.0.0" +version = "1.1.0rc1" description = "Loomweave Python language plugin — v1.0 release" readme = "README.md" requires-python = ">=3.11" diff --git a/plugins/python/src/loomweave_plugin_python/__init__.py b/plugins/python/src/loomweave_plugin_python/__init__.py index 1e1d9cab..4b4fde08 100644 --- a/plugins/python/src/loomweave_plugin_python/__init__.py +++ b/plugins/python/src/loomweave_plugin_python/__init__.py @@ -1,3 +1,3 @@ """loomweave-plugin-python — Python language plugin for Loomweave.""" -__version__ = "1.0.0" +__version__ = "1.1.0rc1" diff --git a/plugins/python/tests/test_package.py b/plugins/python/tests/test_package.py index 0a8c7ca7..9cfae6ef 100644 --- a/plugins/python/tests/test_package.py +++ b/plugins/python/tests/test_package.py @@ -17,7 +17,7 @@ def _read_toml(path: Path) -> dict[str, Any]: def test_package_version_matches_pyproject() -> None: - assert loomweave_plugin_python.__version__ == "1.0.0" + assert loomweave_plugin_python.__version__ == "1.1.0rc1" def test_plugin_version_lockstep_across_pyproject_manifest_and_module() -> None: @@ -42,7 +42,7 @@ def test_plugin_version_lockstep_across_pyproject_manifest_and_module() -> None: def test_manifest_declares_current_v1_ontology_only() -> None: manifest = _read_toml(_PLUGIN_ROOT / "plugin.toml") - assert manifest["plugin"]["version"] == "1.0.0" + assert manifest["plugin"]["version"] == "1.1.0rc1" assert manifest["capabilities"]["runtime"]["wardline_aware"] is True assert manifest["integrations"]["wardline"]["expected_descriptor_version"] == ( EXPECTED_DESCRIPTOR_VERSION diff --git a/plugins/python/tests/test_server.py b/plugins/python/tests/test_server.py index 2d38fcfe..7abb5a1d 100644 --- a/plugins/python/tests/test_server.py +++ b/plugins/python/tests/test_server.py @@ -86,7 +86,7 @@ def test_initialize_roundtrip() -> None: assert response["id"] == 1 result = response["result"] assert result["name"] == "loomweave-plugin-python" - assert result["version"] == "1.0.0" + assert result["version"] == "1.1.0rc1" assert result["ontology_version"] == "0.7.0" assert set(result["capabilities"]) == {"wardline"} assert result["capabilities"]["wardline"]["status"] in { diff --git a/plugins/python/uv.lock b/plugins/python/uv.lock index 89aa6f96..94aa0d89 100644 --- a/plugins/python/uv.lock +++ b/plugins/python/uv.lock @@ -464,7 +464,7 @@ wheels = [ [[package]] name = "loomweave-plugin-python" -version = "1.0.0" +version = "1.1.0rc1" source = { editable = "." } dependencies = [ { name = "packaging" }, diff --git a/scripts/check-workspace-version-lockstep.py b/scripts/check-workspace-version-lockstep.py index 9eb494b9..db16ed83 100755 --- a/scripts/check-workspace-version-lockstep.py +++ b/scripts/check-workspace-version-lockstep.py @@ -75,6 +75,19 @@ def _dig(data: dict[str, Any], *keys: str) -> Any: return cursor +def _normalize(version: str) -> str: + """Normalize a version string for cross-ecosystem comparison. + + Cargo requires SemVer prerelease syntax (`1.1.0-rc1`) while the Python + packages (maturin/hatchling wheels) use PEP 440 (`1.1.0rc1`). The only `-` + in a valid workspace SemVer string is the prerelease separator, so stripping + hyphens maps the Cargo form onto the PEP 440 form. A no-op on final releases + like `1.0.0`, so the strict-equality policy is preserved for non-prerelease + versions. + """ + return version.replace("-", "") + + def _pinned_version(dependencies: Any, package: str) -> str | None: """Return the `==`-pinned version for `package` in a PEP 508 dependency list. @@ -105,10 +118,13 @@ def check_lockstep( except _Missing as missing: # Without the anchor version there is nothing to compare against. return [f"Cargo.toml key {missing} not found"] + # Compare against the PEP 440 form: the Cargo SemVer `1.1.0-rc1` and the + # wheel `1.1.0rc1` are the same product version (see `_normalize`). + rust_norm = _normalize(rust_version) try: plugin_version = _dig(plugin_pyproject, "project", "version") - if plugin_version != rust_version: + if _normalize(plugin_version) != rust_norm: errors.append( f"plugin version {plugin_version!r} != workspace {rust_version!r}" ) @@ -117,7 +133,7 @@ def check_lockstep( try: cli_version = _dig(cli_pyproject, "project", "version") - if cli_version != rust_version: + if _normalize(cli_version) != rust_norm: errors.append( f"loomweave-cli version {cli_version!r} != workspace {rust_version!r}" ) @@ -131,7 +147,7 @@ def check_lockstep( errors.append( f"loomweave-cli pyproject does not pin {PLUGIN_PACKAGE}==" ) - elif pin != rust_version: + elif _normalize(pin) != rust_norm: errors.append( f"loomweave-cli pins {PLUGIN_PACKAGE}=={pin} != workspace {rust_version!r}" ) @@ -143,7 +159,8 @@ def check_lockstep( def _self_test() -> int: """Exercise check_lockstep against in-memory fixtures.""" - cargo = tomllib.loads('[workspace.package]\nversion = "1.0.0"\n') + def cargo_at(version: str) -> dict[str, Any]: + return tomllib.loads(f'[workspace.package]\nversion = "{version}"\n') def plugin(version: str) -> dict[str, Any]: return tomllib.loads( @@ -156,32 +173,44 @@ def cli(version: str, deps: str) -> dict[str, Any]: ) good_deps = 'dependencies = ["loomweave-plugin-python==1.0.0"]' - cases: list[tuple[str, dict[str, Any], dict[str, Any], bool]] = [ - ("aligned", plugin("1.0.0"), cli("1.0.0", good_deps), True), - ("plugin version drift", plugin("1.0.1"), cli("1.0.0", good_deps), False), - ("cli version drift", plugin("1.0.0"), cli("0.9.0", good_deps), False), + rc_deps = 'dependencies = ["loomweave-plugin-python==1.1.0rc1"]' + final = cargo_at("1.0.0") + # Prerelease: the Cargo SemVer `1.1.0-rc1` and the PEP 440 wheel `1.1.0rc1` + # name the same product version and must read as aligned (see `_normalize`). + rc = cargo_at("1.1.0-rc1") + cases: list[tuple[str, dict[str, Any], dict[str, Any], dict[str, Any], bool]] = [ + ("aligned", final, plugin("1.0.0"), cli("1.0.0", good_deps), True), + ("plugin version drift", final, plugin("1.0.1"), cli("1.0.0", good_deps), False), + ("cli version drift", final, plugin("1.0.0"), cli("0.9.0", good_deps), False), ( "cli pin drift", + final, plugin("1.0.0"), cli("1.0.0", 'dependencies = ["loomweave-plugin-python==0.9.0"]'), False, ), ( "cli pin absent", + final, plugin("1.0.0"), cli("1.0.0", 'dependencies = ["something-else>=1"]'), False, ), ( "cli pin unpinned (>=)", + final, plugin("1.0.0"), cli("1.0.0", 'dependencies = ["loomweave-plugin-python>=1.0.0"]'), False, ), + # Cross-ecosystem prerelease normalization. + ("rc aligned", rc, plugin("1.1.0rc1"), cli("1.1.0rc1", rc_deps), True), + ("rc plugin drift", rc, plugin("1.1.0rc2"), cli("1.1.0rc1", rc_deps), False), + ("rc pin drift", rc, plugin("1.1.0rc1"), cli("1.1.0rc1", good_deps), False), ] failures = 0 - for name, plugin_py, cli_py, expect_ok in cases: + for name, cargo, plugin_py, cli_py, expect_ok in cases: errors = check_lockstep(cargo, plugin_py, cli_py) actual_ok = not errors if actual_ok != expect_ok: From 7ff84b2900e0c3ba8ad30020191009480c701353 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:47:23 +1000 Subject: [PATCH 19/27] fix(install): gitignore instance_id + *.lock; document live-index commit hazard (ADR-005) The shipped .loomweave/.gitignore (ADR-005) excluded WAL/shadow/logs but not the per-project `instance_id` fingerprint or the analyze advisory lock (`loomweave.lock`, fs2), so `git add -A` staged live runtime state into demo repos. Add `instance_id` and `*.lock` to GITIGNORE_CONTENTS and refresh ADR-005's verbatim block + Excluded list (also reconciling ephemeral.port/embeddings.db). The install test now asserts both rules ship. ADR-005 also gains a "Committing a live index" note: the on-disk loomweave.db lags its pending WAL while serve runs, so commit a consistent copy via `loomweave db backup` (or stop serve) rather than git-add-ing the live file. Closes clarion-7381e6382d. Refs clarion-cdee445ed8. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/install.rs | 11 ++++++++- crates/loomweave-cli/tests/install.rs | 5 ++++ .../adr/ADR-005-loomweave-dir-tracking.md | 24 ++++++++++++++++--- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/crates/loomweave-cli/src/install.rs b/crates/loomweave-cli/src/install.rs index a690b229..f8f5abcc 100644 --- a/crates/loomweave-cli/src/install.rs +++ b/crates/loomweave-cli/src/install.rs @@ -84,12 +84,21 @@ const GITIGNORE_CONTENTS: &str = "\ # Loomweave .gitignore — ADR-005 tracked-vs-excluded list. # Tracked (committed): loomweave.db, config.json, .gitignore itself. # Excluded (ignored): WAL sidecars, shadow DB, per-run logs, tmp scratch, -# the read-API live port discovery file. +# the read-API live port discovery file, the per-project instance id, and +# the analyze advisory lock. # Read-API live port discovery file (ADR-044): present only while serve runs, # rewritten per bind, loopback-only — a runtime artifact, never committed. ephemeral.port +# Per-project instance fingerprint (loomweave serve) and the analyze advisory +# lock (loomweave.lock, fs2). Both are process-/machine-local runtime state, +# never durable: committing them stages a live lock + instance id, and the lock +# is meaningless on another checkout (clarion-7381e6382d). `*.lock` also covers +# any future lock sidecar. +instance_id +*.lock + # SQLite write-ahead files never belong in the repo. *-wal *-shm diff --git a/crates/loomweave-cli/tests/install.rs b/crates/loomweave-cli/tests/install.rs index 66bdc47b..eb5b8dea 100644 --- a/crates/loomweave-cli/tests/install.rs +++ b/crates/loomweave-cli/tests/install.rs @@ -59,6 +59,11 @@ fn install_creates_loomweave_dir_with_expected_contents() { "*-wal", "*-shm", "ephemeral.port", + // Per-project fingerprint + analyze advisory lock are runtime artifacts, + // never durable — the shipped ignore must list them or `git add -A` + // stages a live lock / instance id (clarion-7381e6382d). + "instance_id", + "*.lock", ] { assert!( gitignore.contains(rule), diff --git a/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md b/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md index bc7166ac..09557739 100644 --- a/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md +++ b/docs/loomweave/adr/ADR-005-loomweave-dir-tracking.md @@ -40,16 +40,22 @@ and every developer's install produces their own variant `.gitignore` by acciden ## Decision `loomweave install` writes `.loomweave/.gitignore` with the following contents -(verbatim — the literal file lives at -`crates/loomweave-cli/src/install.rs` and ships as the v0.1 baseline): +(the literal file lives at `crates/loomweave-cli/src/install.rs` — +`GITIGNORE_CONTENTS` — which is the source of truth; the v0.1 baseline has since +grown the `ephemeral.port` (ADR-044), `embeddings.db` (ADR-040), `instance_id`, +and `*.lock` entries): ``` +ephemeral.port *-wal *-shm *.db-wal *.db-shm *.shadow.db *.db.new +embeddings.db +instance_id +*.lock tmp/ logs/ runs/*/log.jsonl @@ -59,7 +65,13 @@ runs/*/log.jsonl - `.loomweave/loomweave.db` — the main analysis store. SQLite diffs poorly; the `loomweave db export --textual` + `loomweave db merge-helper` pattern (detailed - design §3 File layout) handles the team case. + design §3 File layout) handles the team case. **Committing a live index:** while + `loomweave serve` is running, the on-disk `loomweave.db` lags by its pending WAL + (the `-wal` sidecar is `.gitignore`d), so `git add loomweave.db` mid-serve can + stage an incomplete database. To commit a consistent point-in-time index, take + an online WAL-safe copy with `loomweave db backup` and commit that, or stop + `serve` first (SQLite checkpoints the WAL away on last-connection close) — + clarion-cdee445ed8. - `.loomweave/config.json` — small, human-readable internal state (schema version, last run IDs). - `.loomweave/.gitignore` itself — this file. @@ -75,6 +87,12 @@ runs/*/log.jsonl - All shadow-DB intermediates. - `tmp/` and `logs/` (volatile scratch). - `runs/*/log.jsonl` (raw LLM bodies — audit-local, not commit-appropriate). +- `ephemeral.port` (ADR-044) — the read-API live port discovery file, present + only while `serve` runs and rewritten per bind. +- `embeddings.db` (ADR-040) — the semantic-search sidecar; large and rebuildable. +- `instance_id` and `*.lock` — the per-project `serve` fingerprint and the + analyze advisory lock (`loomweave.lock`, fs2). Both are process-/machine-local + runtime state, never durable (clarion-7381e6382d). ### Out of scope for `.loomweave/.gitignore` From 69ebaddc8c35e1df98495bb83c09797231d761a3 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:47:32 +1000 Subject: [PATCH 20/27] fix(storage): checkpoint WAL(TRUNCATE) after each committed run; surface db backup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a successful CommitRun the writer-actor now runs `PRAGMA wal_checkpoint(TRUNCATE)` so the on-disk loomweave.db reflects committed state while the writer is still alive — previously the WAL only truncated on last-connection close, leaving a multi-MB pending sidecar that made the .db an unreliable point-in-time artifact for commit. The checkpoint is best-effort: failure logs a warning and leaves committed frames durable. `loomweave analyze --help` now points at `loomweave db backup` for committing the index as a versioned artifact (the verb already exists; this is discoverability). Closes clarion-cdee445ed8. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/cli.rs | 4 ++ crates/loomweave-storage/src/writer.rs | 26 +++++++++ .../loomweave-storage/tests/writer_actor.rs | 53 +++++++++++++++++++ 3 files changed, 83 insertions(+) diff --git a/crates/loomweave-cli/src/cli.rs b/crates/loomweave-cli/src/cli.rs index f0cdd8af..d5f93a0d 100644 --- a/crates/loomweave-cli/src/cli.rs +++ b/crates/loomweave-cli/src/cli.rs @@ -62,6 +62,10 @@ pub enum Command { /// Re-runs are idempotent (UPSERT on `entities.id`). If no plugins are on /// `$PATH`, exits 0 with a WARN and status `skipped_no_plugins` — see /// `docs/operator/getting-started.md` Troubleshooting. + /// + /// To commit the index as a versioned artifact while `serve` may be running, + /// take a consistent online copy with `loomweave db backup` rather than + /// `git add`-ing the live file (whose pending WAL is not committable). Analyze { /// Path to analyse (default: current directory). #[arg(default_value = ".")] diff --git a/crates/loomweave-storage/src/writer.rs b/crates/loomweave-storage/src/writer.rs index 004d58a3..0a3a3721 100644 --- a/crates/loomweave-storage/src/writer.rs +++ b/crates/loomweave-storage/src/writer.rs @@ -337,6 +337,16 @@ fn run_actor( &stats_json, commits_observed, ); + // A committed run is the "snapshot" boundary: TRUNCATE-checkpoint + // so the on-disk loomweave.db is a whole, committable artifact + // (ADR-005 tracks it) without waiting for the process to exit. + // Only `CommitRun` (end of an analyze run) reaches here — never the + // serve summary-write path — so there is no per-write checkpoint + // cost. Best-effort and run before the ack so a caller that reads + // the file right after sees the truncated WAL (clarion-cdee445ed8). + if res.is_ok() { + checkpoint_truncate(conn); + } reply(ack, res); } WriterCmd::FailRun { @@ -375,6 +385,22 @@ fn cleanup_after_channel_close(conn: &mut Connection, state: &mut ActorState) { } } +/// Issue `PRAGMA wal_checkpoint(TRUNCATE)` on the writer's own connection, +/// best-effort. A concurrent reader (a live `serve` reader-pool connection) can +/// hold the checkpoint back from resetting the WAL — that returns a "busy" row, +/// not an error, and is harmless: the committed frames are already durable and +/// stay applied. A genuine failure is logged, never propagated, so a checkpoint +/// hiccup can never fail an otherwise-successful run commit (clarion-cdee445ed8). +fn checkpoint_truncate(conn: &Connection) { + if let Err(err) = conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);") { + tracing::warn!( + error = %err, + "loomweave writer: post-commit WAL checkpoint(TRUNCATE) failed (harmless; \ + committed frames remain durable)" + ); + } +} + fn reply(ack: Ack, result: Result) { // If the caller dropped the receiver, we discard the result. This is // correct behaviour — the writer is still responsible for its own diff --git a/crates/loomweave-storage/tests/writer_actor.rs b/crates/loomweave-storage/tests/writer_actor.rs index a051f29f..64f65e40 100644 --- a/crates/loomweave-storage/tests/writer_actor.rs +++ b/crates/loomweave-storage/tests/writer_actor.rs @@ -3270,3 +3270,56 @@ async fn channel_close_with_open_run_self_heals_to_failed() { "pending insert must be rolled back when channel closes" ); } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn commit_run_truncates_wal_while_writer_still_alive() { + // clarion-cdee445ed8: ADR-005 commits `.loomweave/loomweave.db`, so a finished + // analyze must leave the on-disk file a whole, committable snapshot WITHOUT + // waiting for the process to exit. `CommitRun` now issues an explicit + // `wal_checkpoint(TRUNCATE)`. We assert the WAL is reset to 0 bytes with the + // writer STILL ALIVE — proving it is the post-commit checkpoint, not SQLite's + // last-connection-close cleanup (which would only fire after the drop below). + // + // Scope note: `CommitRun` is reached only at the end of an analyze run, never + // by serve's summary-write path, so there is no per-write checkpoint cost. And + // while a long-lived serve holds reader connections open the TRUNCATE is + // best-effort (a reader can hold it back, harmlessly); `loomweave db backup` + // remains the way to capture a consistent committable copy mid-serve. + let dir = tempfile::tempdir().unwrap(); + let path = prepared_db(&dir); + let wal_path = dir.path().join("loomweave.db-wal"); + + let (writer, handle) = Writer::spawn(path.clone(), 50, 256).unwrap(); + let tx = writer.sender(); + begin_demo_run(&tx, "run-wal").await; + seed_module_and_functions(&tx).await; + seed_contains_edges_for_demo_functions(&tx).await; + send::<()>(&tx, |ack| WriterCmd::CommitRun { + run_id: "run-wal".into(), + status: RunStatus::Completed, + completed_at: now_iso(), + stats_json: "{}".into(), + ack, + }) + .await + .unwrap(); + + // Writer is STILL ALIVE here (tx/writer not dropped): the only thing that + // could have emptied the WAL is the explicit post-CommitRun checkpoint. + let wal_after_commit = std::fs::metadata(&wal_path).map_or(0, |m| m.len()); + assert_eq!( + wal_after_commit, 0, + "CommitRun must TRUNCATE-checkpoint the WAL to 0 bytes while the writer is \ + still alive, so the committed loomweave.db is whole on disk; got {wal_after_commit}" + ); + + // Clean shutdown still succeeds (and the actor task joins without error). + drop(tx); + drop(writer); + handle.await.unwrap().unwrap(); + let wal_after_shutdown = std::fs::metadata(&wal_path).map_or(0, |m| m.len()); + assert_eq!( + wal_after_shutdown, 0, + "WAL must remain truncated after shutdown; got {wal_after_shutdown}" + ); +} From 032425c851b5eb8f43cb4bf02da57f54cd707f14 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:47:45 +1000 Subject: [PATCH 21/27] =?UTF-8?q?feat(mcp):=20worktree-aware=20staleness?= =?UTF-8?q?=20=E2=80=94=20indexed=5Fat=5Fcommit=20+=20StaleWorktree=20verd?= =?UTF-8?q?ict=20(ADR-045)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit project_status_get reported staleness:"fresh" while the working tree held un-indexed source, so the session-start banner ("index is fresh, ask Loomweave") lied about uncommitted code. Make staleness worktree-aware: - Snapshot gains indexed_at_commit + worktree_dirty; a new Staleness::StaleWorktree verdict fires when an otherwise-fresh index has untracked source on disk. - Detection uses loomweave_core::list_untracked_files — hardened, hash-free `git ls-files --others --exclude-standard`, scoped to ingested source extensions so a scratch notes.txt does not flag (false-positive guard). Fail-soft outside a git work tree. - Surfaced on loomweave://context, project_status_get (worktree_dirty + staleness_note), and the session-start banner with a concrete re-analyze remedy; orientation treats StaleWorktree as stale. - ADR-045 records the maintainer-authorized security boundary: `git status` is forbidden (filter.clean RCE on hashed content; clarion-4b5a8aff54), but ls-files --others is hash-free — proven by the new ls_files_others_does_not_run_clean_filter security test, not reasoning alone. Closes clarion-26c7e52027 and clarion-d9cf8bcfa9. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-cli/src/hook.rs | 191 +++++++++- crates/loomweave-core/src/hardened_git.rs | 98 ++++++ crates/loomweave-core/src/lib.rs | 2 +- crates/loomweave-mcp/src/snapshot.rs | 325 +++++++++++++++++- crates/loomweave-mcp/src/tools/orientation.rs | 6 +- crates/loomweave-mcp/src/tools/status.rs | 24 ++ crates/loomweave-mcp/tests/storage_tools.rs | 116 +++++++ .../adr/ADR-045-worktree-source-staleness.md | 118 +++++++ docs/loomweave/adr/README.md | 1 + 9 files changed, 865 insertions(+), 16 deletions(-) create mode 100644 docs/loomweave/adr/ADR-045-worktree-source-staleness.md diff --git a/crates/loomweave-cli/src/hook.rs b/crates/loomweave-cli/src/hook.rs index 38d00141..a0adcf08 100644 --- a/crates/loomweave-cli/src/hook.rs +++ b/crates/loomweave-cli/src/hook.rs @@ -154,11 +154,31 @@ fn snapshot_outcome_lines(project_root: &Path, outcome: &SnapshotOutcome) -> Vec } match snapshot.staleness() { Staleness::Fresh => { + // Surface the analyzed commit (when the run recorded one) so the + // "fresh" claim names the commit it reflects — short form for the + // banner; project_status_get carries the full `git_sha`. + let at_commit = snapshot + .indexed_at_commit() + .map(|c| format!(", commit {}", c.chars().take(12).collect::())) + .unwrap_or_default(); lines.push(format!( - "Index is fresh (last analyzed {}). Ask Loomweave before re-exploring \ + "Index is fresh (last analyzed {}{}). Ask Loomweave before re-exploring \ the tree; see the loomweave-workflow skill.", - snapshot.last_analyzed_at().unwrap_or("unknown") + snapshot.last_analyzed_at().unwrap_or("unknown"), + at_commit )); + // Honest caveat (clarion-26c7e52027): freshness compares the mtimes of + // *already-indexed* source files, so brand-new files in a not-yet- + // indexed top-level directory — or any uncommitted additions, which the + // untrusted-corpus git posture cannot safely detect — can sit unseen + // behind a "fresh" verdict. Re-analyze is the remedy. + lines.push( + "Caveat: \"fresh\" reflects already-indexed files only; it will NOT \ + detect brand-new modules in a not-yet-indexed directory. If you just \ + added or moved source, run `loomweave analyze` before relying on \ + graph answers (e.g. \"what calls X\")." + .to_string(), + ); } Staleness::Stale => { lines.push(format!( @@ -167,6 +187,19 @@ fn snapshot_outcome_lines(project_root: &Path, outcome: &SnapshotOutcome) -> Vec project_root.display() )); } + Staleness::StaleWorktree => { + // The ingested files are individually fresh, but the working tree has + // untracked source of an already-indexed type the index has not seen + // (the new-top-level-dir blind spot the mtime passes can't reach; + // clarion-26c7e52027). Concrete, not a caveat — name the remedy. + lines.push(format!( + "Index does NOT reflect the working tree: untracked source files of \ + already-indexed types are present (new modules not yet analyzed). \ + Run `loomweave analyze {}` before relying on graph answers \ + (e.g. \"what calls X\").", + project_root.display() + )); + } Staleness::NeverAnalyzed => { lines.push(format!( "No analysis recorded yet. Run `loomweave analyze {}` to build the index.", @@ -191,3 +224,157 @@ fn snapshot_outcome_lines(project_root: &Path, outcome: &SnapshotOutcome) -> Vec } lines } + +#[cfg(test)] +mod tests { + use super::*; + + use rusqlite::Connection; + + use loomweave_storage::{pragma, schema}; + + /// Build a `Fresh` snapshot for `project_root`: one ingested source file that + /// exists and is older than a completed run. `commit` populates + /// `runs.analyzed_at_commit` (or leaves it NULL). Mirrors the snapshot + /// module's own fixtures; the `TempDir` holding the db is returned so the + /// caller keeps it alive. + fn fresh_snapshot( + project_root: &Path, + commit: Option<&str>, + ) -> (tempfile::TempDir, ProjectSnapshot) { + std::fs::write(project_root.join("a.py"), "x = 1\n").unwrap(); + let db_dir = tempfile::tempdir().unwrap(); + let mut conn = Connection::open(db_dir.path().join("loomweave.db")).unwrap(); + pragma::apply_write_pragmas(&conn).unwrap(); + schema::apply_migrations(&mut conn).unwrap(); + conn.execute( + "INSERT INTO entities \ + (id, plugin_id, kind, name, short_name, properties, source_file_path, created_at, updated_at) \ + VALUES ('python:module:a', 'python', 'module', 'a', 'a', '{}', 'a.py', \ + '2026-01-01T00:00:00.000Z', '2026-01-01T00:00:00.000Z')", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO runs (id, started_at, completed_at, config, stats, status, analyzed_at_commit) \ + VALUES ('r', '2099-01-01T00:00:00.000Z', '2099-01-01T00:00:00.000Z', '{}', '{}', 'completed', ?1)", + rusqlite::params![commit], + ) + .unwrap(); + let snapshot = project_snapshot(&conn, project_root); + assert_eq!( + snapshot.staleness(), + Staleness::Fresh, + "fixture must be Fresh: {snapshot:?}" + ); + (db_dir, snapshot) + } + + #[test] + fn fresh_banner_carries_honest_caveat_and_commit() { + // The bare "fresh ... ask Loomweave before re-exploring" line lied about + // brand-new uncommitted modules (clarion-26c7e52027). The Fresh arm must + // now (a) name the indexed commit and (b) carry the re-analyze caveat. + let root = tempfile::tempdir().unwrap(); + let (_db, snapshot) = fresh_snapshot(root.path(), Some("abc123def4567890")); + let lines = snapshot_outcome_lines(root.path(), &SnapshotOutcome::Ready(snapshot)); + let banner = lines.join("\n"); + + assert!( + banner.contains("Index is fresh"), + "missing fresh line: {banner}" + ); + // Short commit form is surfaced (12 chars), not the full 16-char fixture. + assert!( + banner.contains("commit abc123def456"), + "missing indexed commit: {banner}" + ); + assert!( + banner.contains("loomweave analyze") && banner.contains("brand-new"), + "Fresh banner must disclose the not-yet-indexed blind spot and point at \ + re-analyze: {banner}" + ); + } + + #[test] + fn fresh_banner_omits_commit_clause_when_run_recorded_none() { + // A run analyzed outside a git repo has NULL analyzed_at_commit: the banner + // must not invent a commit clause, but still carries the caveat. + let root = tempfile::tempdir().unwrap(); + let (_db, snapshot) = fresh_snapshot(root.path(), None); + let lines = snapshot_outcome_lines(root.path(), &SnapshotOutcome::Ready(snapshot)); + let banner = lines.join("\n"); + + assert!( + banner.contains("Index is fresh"), + "missing fresh line: {banner}" + ); + assert!( + !banner.contains(", commit "), + "must not fabricate a commit: {banner}" + ); + assert!( + banner.contains("brand-new"), + "caveat must still be present: {banner}" + ); + } + + #[test] + fn stale_worktree_banner_names_untracked_source_and_remedy() { + // In a git work tree, a mtime-fresh index with an untracked module yields + // StaleWorktree (clarion-26c7e52027, ADR-045); the banner must say so + // concretely and point at re-analyze, not the soft Fresh caveat. + use std::process::Command; + let root = tempfile::tempdir().unwrap(); + let git = |args: &[&str]| -> bool { + Command::new("git") + .args(args) + .current_dir(root.path()) + .status() + .is_ok_and(|s| s.success()) + }; + if !git(&["init", "-q"]) { + return; // git unavailable → skip + } + let _ = git(&["config", "user.email", "t@t"]); + let _ = git(&["config", "user.name", "t"]); + std::fs::write(root.path().join("a.py"), "x = 1\n").unwrap(); + git(&["add", "."]); + git(&["commit", "-q", "-m", "init"]); + + let db_dir = tempfile::tempdir().unwrap(); + let mut conn = Connection::open(db_dir.path().join("loomweave.db")).unwrap(); + pragma::apply_write_pragmas(&conn).unwrap(); + schema::apply_migrations(&mut conn).unwrap(); + conn.execute( + "INSERT INTO entities \ + (id, plugin_id, kind, name, short_name, properties, source_file_path, created_at, updated_at) \ + VALUES ('python:module:a', 'python', 'module', 'a', 'a', '{}', 'a.py', \ + '2026-01-01T00:00:00.000Z', '2026-01-01T00:00:00.000Z')", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO runs (id, started_at, completed_at, config, stats, status) \ + VALUES ('r', '2099-01-01T00:00:00.000Z', '2099-01-01T00:00:00.000Z', '{}', '{}', 'completed')", + [], + ) + .unwrap(); + // Brand-new untracked module the index never saw. + std::fs::write(root.path().join("hub.py"), "y = 2\n").unwrap(); + + let snapshot = project_snapshot(&conn, root.path()); + assert_eq!( + snapshot.staleness(), + Staleness::StaleWorktree, + "fixture must be StaleWorktree: {snapshot:?}" + ); + let lines = snapshot_outcome_lines(root.path(), &SnapshotOutcome::Ready(snapshot)); + let banner = lines.join("\n"); + assert!( + banner.contains("does NOT reflect the working tree") + && banner.contains("loomweave analyze"), + "StaleWorktree banner must name the gap and the re-analyze remedy: {banner}" + ); + } +} diff --git a/crates/loomweave-core/src/hardened_git.rs b/crates/loomweave-core/src/hardened_git.rs index 729d437e..9c0068ac 100644 --- a/crates/loomweave-core/src/hardened_git.rs +++ b/crates/loomweave-core/src/hardened_git.rs @@ -161,6 +161,41 @@ pub fn hardened_git_command(repo_root: &Path) -> Command { command } +/// List untracked, non-ignored files in `repo_root`, hardened for an untrusted +/// corpus (clarion-d9cf8bcfa9; ADR-045). +/// +/// Uses `git ls-files --others --exclude-standard -z`: it enumerates worktree +/// paths Git is not tracking and that `.gitignore`/exclude rules do not cover, +/// **without hashing working-tree content**. That distinction is load-bearing — +/// `git status` must hash to report modifications, which runs a repo-controlled +/// `filter..clean` (the one residual the module docs describe, via +/// `$GIT_DIR/info/attributes`); listing untracked paths never hashes, so that +/// filter is never invoked. Verified by the +/// `ls_files_others_does_not_run_clean_filter` test in this module. +/// +/// `-z` is NUL-delimited, so paths containing newlines or other special bytes +/// are unambiguous (no C-quoting to decode). Fail-soft like the crate's other +/// corpus git probes: returns `None` when git is unavailable, `repo_root` is not +/// a work tree, or the command fails — never an error. An empty `Vec` means "a +/// git repo with no untracked files". +#[must_use] +pub fn list_untracked_files(repo_root: &Path) -> Option> { + let out = hardened_git_command(repo_root) + .args(["ls-files", "--others", "--exclude-standard", "-z"]) + .output() + .ok()?; + if !out.status.success() { + return None; + } + Some( + out.stdout + .split(|&b| b == 0) + .filter(|segment| !segment.is_empty()) + .map(|segment| String::from_utf8_lossy(segment).into_owned()) + .collect(), + ) +} + #[cfg(test)] mod tests { use super::*; @@ -229,4 +264,67 @@ mod tests { ); assert_eq!(parse_git_version("garbage"), None); } + + #[test] + fn ls_files_others_does_not_run_clean_filter() { + // The one corpus-controlled code-exec vector hardened_git CANNOT disable by + // config is `$GIT_DIR/info/attributes` naming a `filter`, whose `.clean` + // runs only when git HASHES working-tree content. `list_untracked_files` + // uses `ls-files --others`, which lists paths and never hashes — so the + // filter must never fire. Prove it empirically (ADR-045, clarion-d9cf8bcfa9): + // a booby-trapped repo whose clean filter would create a marker must leave + // NO marker after the call, while still returning the untracked file. + let dir = tempfile::tempdir().unwrap(); + let repo = dir.path(); + + // Skip cleanly if git is unavailable on the test host. + let Ok(init) = Command::new("git") + .args(["init", "-q"]) + .current_dir(repo) + .status() + else { + return; + }; + if !init.success() { + return; + } + // git refuses commands without an identity in some environments; not needed + // here (no commit), but set repo-local config defensively. + let _ = Command::new("git") + .args(["config", "user.email", "t@t"]) + .current_dir(repo) + .status(); + + // Booby-trap: an in-`.git` attribute selects a clean filter (the residual + // source --attr-source cannot neutralize), and a repo-local config defines + // that filter to create PWNED if ever invoked. Repo-local config + in-git + // attributes are exactly what an untrusted corpus controls. + std::fs::create_dir_all(repo.join(".git/info")).unwrap(); + std::fs::write(repo.join(".git/info/attributes"), "* filter=pwn\n").unwrap(); + let marker = repo.join("PWNED"); + Command::new("git") + .args([ + "config", + "filter.pwn.clean", + &format!("sh -c 'touch \"{}\"'", marker.display()), + ]) + .current_dir(repo) + .status() + .unwrap(); + + // An untracked file matching the `*` filter attribute. If anything hashed + // it, the clean filter would run and create the marker. + std::fs::write(repo.join("evil.py"), "x = 1\n").unwrap(); + + let untracked = list_untracked_files(repo).expect("ls-files must succeed in a git repo"); + assert!( + untracked.iter().any(|p| p == "evil.py"), + "the untracked file must be listed: {untracked:?}" + ); + assert!( + !marker.exists(), + "ls-files --others must NOT hash working-tree content, so the corpus \ + clean filter must never run (no PWNED marker)" + ); + } } diff --git a/crates/loomweave-core/src/lib.rs b/crates/loomweave-core/src/lib.rs index d30418b1..6f2aae28 100644 --- a/crates/loomweave-core/src/lib.rs +++ b/crates/loomweave-core/src/lib.rs @@ -19,7 +19,7 @@ pub use embedding_provider::{ }; pub use entity_id::{EntityId, EntityIdError, entity_id}; pub use errors::{HttpErrorCode, McpErrorCode}; -pub use hardened_git::hardened_git_command; +pub use hardened_git::{hardened_git_command, list_untracked_files}; pub use llm_provider::{ CachingModel, ClaudeCliProvider, ClaudeCliProviderConfig, CodexCliProvider, CodexCliProviderConfig, INFERRED_CALLS_PROMPT_VERSION, InferredCallsPromptInput, diff --git a/crates/loomweave-mcp/src/snapshot.rs b/crates/loomweave-mcp/src/snapshot.rs index bc58a9ba..530c741a 100644 --- a/crates/loomweave-mcp/src/snapshot.rs +++ b/crates/loomweave-mcp/src/snapshot.rs @@ -58,6 +58,18 @@ pub enum Staleness { /// modification scan and the unwatched-project-root caveat in the /// type-level note. Fresh, + /// The mtime/structural passes found every ingested file fresh, but the + /// working tree contains untracked source of an already-indexed file type + /// that the index has never seen — e.g. a brand-new top-level module the + /// structural pass cannot reach (the unwatched-project-root blind spot; + /// clarion-26c7e52027). Detected via a hardened, ignore-aware + /// `git ls-files --others` scoped to ingested extensions (ADR-045); the raw + /// signal is on [`ProjectSnapshot::worktree_dirty`]. Returned in place of + /// [`Fresh`] only when that worktree signal is positive — so it never fires + /// outside a git work tree, and a non-source untracked file never triggers it. + /// + /// [`Fresh`]: Staleness::Fresh + StaleWorktree, /// A completed run exists, but no ingested entity has a resolvable /// `source_file_path` to stat — there is *nothing to compare against*, so /// freshness is neither Fresh nor Stale. A normal outcome (e.g. a project @@ -94,6 +106,29 @@ pub struct ProjectSnapshot { staleness: Staleness, /// Latest run `completed_at` (ISO-8601) if any, else `None`. last_analyzed_at: Option, + /// The git commit HEAD pointed at when the latest completed run was analyzed + /// (`runs.analyzed_at_commit`), if Loomweave captured one — `None` for a run + /// analyzed outside a git work tree, or before WS9 began recording it. + /// Surfaced so the `loomweave://context` resource and the session-start + /// banner can state *which commit* the index reflects. It is descriptive, not + /// a freshness signal: a [`Staleness::Fresh`] verdict is only ever fresh + /// relative to the ingested source files' mtimes — never a claim that HEAD or + /// the working tree still matches this commit (clarion-26c7e52027). The + /// `project_status_get` tool already reports the same value as `git_sha`. + /// + /// [`Fresh`]: Staleness::Fresh + indexed_at_commit: Option, + /// Whether the working tree holds untracked source of an already-indexed file + /// type that the index does not reflect — the signal behind + /// [`Staleness::StaleWorktree`] (clarion-26c7e52027, ADR-045). `Some(true)` = + /// un-indexed source present; `Some(false)` = a git work tree with none; + /// `None` = not a git work tree, git unavailable, or nothing ingested to scope + /// against (the check is moot). Computed via a hardened, hash-free + /// `git ls-files --others --exclude-standard` filtered to ingested file + /// extensions, so an untracked non-source file (a scratch `notes.txt`) never + /// flags it, and the untrusted-corpus posture is preserved (no working-tree + /// hashing — see [`loomweave_core::list_untracked_files`]). + worktree_dirty: Option, /// `true` when this snapshot was produced from a *failure* rather than a /// healthy read: at least one backing SQL query failed unexpectedly and was /// folded to a safe default (a count to `0`, the run lookup to `None`, or @@ -161,6 +196,21 @@ impl ProjectSnapshot { self.last_analyzed_at.as_deref() } + /// The commit the latest completed run was analyzed at, if captured — see the + /// field note. `None` when never analyzed, analyzed outside a git repo, or the + /// `analyzed_at_commit` column is NULL. + #[must_use] + pub fn indexed_at_commit(&self) -> Option<&str> { + self.indexed_at_commit.as_deref() + } + + /// Whether the working tree holds untracked source the index has not seen — + /// see the field note. `None` outside a git work tree / with nothing ingested. + #[must_use] + pub fn worktree_dirty(&self) -> Option { + self.worktree_dirty + } + /// `true` when this snapshot was folded from a backing-query failure — see /// the field-level note for the precise contract. #[must_use] @@ -196,9 +246,9 @@ pub fn project_snapshot(conn: &Connection, project_root: &Path) -> ProjectSnapsh ); let finding_count = scalar_count(conn, "SELECT COUNT(*) FROM findings", &mut degraded); - let last_analyzed_at = latest_completed_run(conn, &mut degraded); + let (last_analyzed_at, indexed_at_commit) = latest_completed_run(conn, &mut degraded); let mut scan_truncated = false; - let staleness = compute_staleness( + let mut staleness = compute_staleness( conn, project_root, last_analyzed_at.as_deref(), @@ -206,6 +256,17 @@ pub fn project_snapshot(conn: &Connection, project_root: &Path) -> ProjectSnapsh &mut scan_truncated, ); + // Worktree-source detection (clarion-26c7e52027, ADR-045): the mtime/structural + // passes cannot see un-indexed source in a brand-new top-level directory (the + // unwatched-project-root blind spot), so a hardened, ignore-aware + // `git ls-files --others` scoped to ingested extensions catches it. Best-effort + // and never degrades — `None` outside a git work tree. When the index is + // otherwise Fresh but such source exists, the honest verdict is StaleWorktree. + let worktree_dirty = compute_worktree_dirty(conn, project_root); + if staleness == Staleness::Fresh && worktree_dirty == Some(true) { + staleness = Staleness::StaleWorktree; + } + ProjectSnapshot { db_present: true, entity_count, @@ -213,6 +274,8 @@ pub fn project_snapshot(conn: &Connection, project_root: &Path) -> ProjectSnapsh finding_count, staleness, last_analyzed_at, + indexed_at_commit, + worktree_dirty, degraded, scan_truncated, } @@ -228,6 +291,8 @@ pub fn missing_db_snapshot() -> ProjectSnapshot { finding_count: 0, staleness: Staleness::NeverAnalyzed, last_analyzed_at: None, + indexed_at_commit: None, + worktree_dirty: None, degraded: false, scan_truncated: false, } @@ -249,11 +314,62 @@ pub fn unreadable_db_snapshot() -> ProjectSnapshot { finding_count: 0, staleness: Staleness::Unknown, last_analyzed_at: None, + indexed_at_commit: None, + worktree_dirty: None, degraded: true, scan_truncated: false, } } +/// Whether the working tree holds untracked source of an already-indexed file +/// type — the [`ProjectSnapshot::worktree_dirty`] signal (clarion-26c7e52027, +/// ADR-045). Fail-soft: `None` when nothing is ingested (no extensions to scope +/// against, so the check is moot), the project is not a git work tree, or git is +/// unavailable. Never sets `degraded` — a missing git binary is environmental, +/// not a DB-machinery failure. +/// +/// Scoping to ingested extensions is what keeps this honest: a hardened +/// `git ls-files --others --exclude-standard` lists every untracked, non-ignored +/// path, but only those whose extension Loomweave actually ingests count — so an +/// untracked `notes.txt` never flags a fresh index dirty, while an untracked +/// `hub.py` (the dogfood scenario) does. +fn compute_worktree_dirty(conn: &Connection, project_root: &Path) -> Option { + let exts = ingested_source_extensions(conn); + if exts.is_empty() { + return None; + } + let untracked = loomweave_core::list_untracked_files(project_root)?; + Some(untracked.iter().any(|rel| { + Path::new(rel) + .extension() + .and_then(|ext| ext.to_str()) + .is_some_and(|ext| exts.contains(ext)) + })) +} + +/// The distinct file extensions among ingested `source_file_path`s (lowercased by +/// nothing — git and the filesystem are case-sensitive on the platforms we +/// target). Fail-soft to an empty set on any query error, which makes +/// [`compute_worktree_dirty`] return `None` (treat the scope as unknown). +fn ingested_source_extensions(conn: &Connection) -> BTreeSet { + let mut exts = BTreeSet::new(); + let Ok(mut stmt) = conn.prepare( + "SELECT DISTINCT source_file_path FROM entities \ + WHERE source_file_path IS NOT NULL", + ) else { + return exts; + }; + let Ok(rows) = stmt.query_map([], |row| row.get::<_, String>(0)) else { + return exts; + }; + for rel in rows.flatten() { + if let Some(ext) = Path::new(&rel).extension().and_then(|ext| ext.to_str()) { + exts.insert(ext.to_owned()); + } + } + exts +} + /// Run a scalar `COUNT(*)` query. On failure, log, fold to `0`, and set /// `*degraded` so the caller can mark the whole snapshot as a degraded read. fn scalar_count(conn: &Connection, sql: &str, degraded: &mut bool) -> i64 { @@ -267,23 +383,29 @@ fn scalar_count(conn: &Connection, sql: &str, degraded: &mut bool) -> i64 { } } -/// Look up the latest completed run's `completed_at`. `QueryReturnedNoRows` is a -/// normal "never analyzed" outcome and does *not* degrade; any other error is a -/// machinery failure that folds to `None` and sets `*degraded`. -fn latest_completed_run(conn: &Connection, degraded: &mut bool) -> Option { +/// Look up the latest completed run's `completed_at` and `analyzed_at_commit`. +/// `QueryReturnedNoRows` is a normal "never analyzed" outcome and does *not* +/// degrade; any other error is a machinery failure that folds to `(None, None)` +/// and sets `*degraded`. `analyzed_at_commit` is independently nullable (a run +/// analyzed outside a git work tree), so it is `None` even on the happy path when +/// the column was never populated. +fn latest_completed_run( + conn: &Connection, + degraded: &mut bool, +) -> (Option, Option) { match conn.query_row( - "SELECT completed_at FROM runs \ + "SELECT completed_at, analyzed_at_commit FROM runs \ WHERE completed_at IS NOT NULL AND status = 'completed' \ ORDER BY completed_at DESC LIMIT 1", [], - |row| row.get::<_, String>(0), + |row| Ok((row.get::<_, String>(0)?, row.get::<_, Option>(1)?)), ) { - Ok(s) => Some(s), - Err(rusqlite::Error::QueryReturnedNoRows) => None, + Ok((completed_at, analyzed_at_commit)) => (Some(completed_at), analyzed_at_commit), + Err(rusqlite::Error::QueryReturnedNoRows) => (None, None), Err(err) => { tracing::warn!(error = %err, "loomweave latest-completed-run query failed"); *degraded = true; - None + (None, None) } } } @@ -855,4 +977,185 @@ mod tests { let json = serde_json::to_value(&snap).unwrap(); assert_eq!(json["degraded"], serde_json::Value::Bool(false)); } + + #[test] + fn indexed_at_commit_is_surfaced_and_serialized_when_the_run_recorded_one() { + // `project_status_get` already reports the analyzed commit as `git_sha`; + // the snapshot (loomweave://context + the session-start banner) must carry + // the same value so a Fresh verdict can name the commit it reflects + // (clarion-26c7e52027). + let (_dir, conn) = migrated_conn(); + let dir = tempfile::tempdir().unwrap(); + std::fs::write(dir.path().join("a.py"), "x = 1\n").unwrap(); + insert_entity(&conn, "python:module:a", "module", Some("a.py")); + conn.execute( + "INSERT INTO runs (id, started_at, completed_at, config, stats, status, analyzed_at_commit) \ + VALUES ('r', '2099-01-01T00:00:00.000Z', '2099-01-01T00:00:00.000Z', '{}', '{}', 'completed', 'abc123def456')", + [], + ) + .unwrap(); + + let snap = project_snapshot(&conn, dir.path()); + assert_eq!(snap.indexed_at_commit(), Some("abc123def456"), "{snap:?}"); + let json = serde_json::to_value(&snap).unwrap(); + assert_eq!( + json["indexed_at_commit"], + serde_json::Value::String("abc123def456".into()) + ); + } + + #[test] + fn indexed_at_commit_is_none_when_run_analyzed_outside_a_git_repo() { + // `analyzed_at_commit` is independently nullable: a run outside a git work + // tree records NULL, and the snapshot must report None — never a fabricated + // or empty commit. + let (_dir, conn) = migrated_conn(); + insert_entity(&conn, "python:module:a", "module", Some("a.py")); + conn.execute( + "INSERT INTO runs (id, started_at, completed_at, config, stats, status) \ + VALUES ('r', '2026-01-01T00:00:00.000Z', '2026-01-02T00:00:00.000Z', '{}', '{}', 'completed')", + [], + ) + .unwrap(); + let snap = project_snapshot(&conn, std::path::Path::new("/tmp")); + assert_eq!(snap.indexed_at_commit(), None, "{snap:?}"); + } + + #[test] + fn new_top_level_directory_is_a_known_fresh_blind_spot() { + // Documents (and regression-locks) the conservative-nudge limitation the + // honest banner now discloses (clarion-26c7e52027). The watch set is the + // *direct parents of ingested files* and the project root is deliberately + // unwatched, so a brand-new top-level directory full of never-ingested + // source is invisible to BOTH the structural-drift and per-file passes — + // the verdict stays Fresh. This is the exact dogfood scenario (new + // specimen modules read as "fresh"). Detecting it would need working-tree + // git, which the untrusted-corpus posture (hardened_git) blocks; until + // then the banner tells the agent to re-analyze after adding modules. + use super::parse_iso8601_to_systemtime; + let (_dir, conn) = migrated_conn(); + let root = tempfile::tempdir().unwrap(); + let pkg = root.path().join("pkg"); + std::fs::create_dir(&pkg).unwrap(); + let a = pkg.join("a.py"); + std::fs::write(&a, "x = 1\n").unwrap(); + + let run_iso = "2026-06-15T00:00:00.000Z"; + let run_time = parse_iso8601_to_systemtime(run_iso).unwrap(); + let day = std::time::Duration::from_secs(86_400); + set_mtime(&a, run_time - day); // ingested file untouched since the run + set_mtime(&pkg, run_time - day); // its watched parent untouched too + + insert_entity(&conn, "python:module:pkg.a", "module", Some("pkg/a.py")); + conn.execute( + "INSERT INTO runs (id, started_at, completed_at, config, stats, status) \ + VALUES ('r', ?1, ?1, '{}', '{}', 'completed')", + rusqlite::params![run_iso], + ) + .unwrap(); + + // Add a brand-new top-level package AFTER the run. Its parent is the + // (unwatched) project root, so nothing in the watch set changed. + let newpkg = root.path().join("newpkg"); + std::fs::create_dir(&newpkg).unwrap(); + let hub = newpkg.join("hub.py"); + std::fs::write(&hub, "y = 2\n").unwrap(); + set_mtime(&hub, run_time + day); + + let snap = project_snapshot(&conn, root.path()); + // The mtime/structural passes can't see the new top-level dir, AND this + // tempdir is not a git work tree, so the worktree-source check returns + // None (nothing to detect with). Verdict stays Fresh — the mtime blind + // spot the banner caveat covers. In a GIT repo this same scenario flips to + // StaleWorktree (see `untracked_source_in_git_repo_reports_stale_worktree`). + assert_eq!(snap.staleness, Staleness::Fresh, "{snap:?}"); + assert_eq!( + snap.worktree_dirty, None, + "outside a git work tree, worktree_dirty must be None: {snap:?}" + ); + } + + /// `git init` + commit `files` in `root`; returns `false` (caller skips) if + /// git is unavailable on the host. Committing keeps the seeded source OUT of + /// the untracked set so a clean baseline really is clean. + fn git_init_with_committed(root: &std::path::Path, files: &[(&str, &str)]) -> bool { + use std::process::Command; + let run = |args: &[&str]| -> bool { + Command::new("git") + .args(args) + .current_dir(root) + .status() + .is_ok_and(|s| s.success()) + }; + if !run(&["init", "-q"]) { + return false; + } + let _ = run(&["config", "user.email", "t@t"]); + let _ = run(&["config", "user.name", "t"]); + for (name, body) in files { + std::fs::write(root.join(name), body).unwrap(); + } + if !files.is_empty() { + run(&["add", "."]); + run(&["commit", "-q", "-m", "init"]); + } + true + } + + #[test] + fn untracked_source_in_git_repo_reports_stale_worktree() { + // The dogfood scenario (clarion-26c7e52027, ADR-045): an index that is + // mtime-fresh but with a brand-new untracked source module the structural + // pass cannot reach. In a git work tree the hardened `ls-files --others` + // check (scoped to ingested `.py`) catches it and the verdict is honest: + // StaleWorktree, with worktree_dirty = Some(true). + let (_dir, conn) = migrated_conn(); + let root = tempfile::tempdir().unwrap(); + if !git_init_with_committed(root.path(), &[("demo.py", "x = 1\n")]) { + return; // git unavailable on host → skip (mechanism covered in core) + } + insert_entity(&conn, "python:module:demo", "module", Some("demo.py")); + // Far-future run → every ingested file is mtime-fresh. + conn.execute( + "INSERT INTO runs (id, started_at, completed_at, config, stats, status) \ + VALUES ('r', '2099-01-01T00:00:00.000Z', '2099-01-01T00:00:00.000Z', '{}', '{}', 'completed')", + [], + ) + .unwrap(); + // Brand-new untracked source the index never saw. + std::fs::write(root.path().join("hub.py"), "y = 2\n").unwrap(); + + let snap = project_snapshot(&conn, root.path()); + assert_eq!(snap.staleness, Staleness::StaleWorktree, "{snap:?}"); + assert_eq!(snap.worktree_dirty, Some(true), "{snap:?}"); + } + + #[test] + fn untracked_non_source_in_git_repo_stays_fresh() { + // False-positive guard: an untracked file whose extension Loomweave does + // not ingest (a scratch notes.txt) must NOT flag the index dirty. The + // extension scoping is what keeps the signal honest. + let (_dir, conn) = migrated_conn(); + let root = tempfile::tempdir().unwrap(); + if !git_init_with_committed(root.path(), &[("demo.py", "x = 1\n")]) { + return; + } + insert_entity(&conn, "python:module:demo", "module", Some("demo.py")); + conn.execute( + "INSERT INTO runs (id, started_at, completed_at, config, stats, status) \ + VALUES ('r', '2099-01-01T00:00:00.000Z', '2099-01-01T00:00:00.000Z', '{}', '{}', 'completed')", + [], + ) + .unwrap(); + // Untracked, but NOT a source extension the index uses. + std::fs::write(root.path().join("notes.txt"), "scratch\n").unwrap(); + + let snap = project_snapshot(&conn, root.path()); + assert_eq!( + snap.staleness, + Staleness::Fresh, + "an untracked non-source file must not flip the verdict: {snap:?}" + ); + assert_eq!(snap.worktree_dirty, Some(false), "{snap:?}"); + } } diff --git a/crates/loomweave-mcp/src/tools/orientation.rs b/crates/loomweave-mcp/src/tools/orientation.rs index d7a4f0ff..39d51ee2 100644 --- a/crates/loomweave-mcp/src/tools/orientation.rs +++ b/crates/loomweave-mcp/src/tools/orientation.rs @@ -92,8 +92,10 @@ impl ServerState { "degraded": snapshot.degraded(), "scan_truncated": snapshot.scan_truncated(), }); - let staleness_stale = - matches!(snapshot.staleness(), crate::snapshot::Staleness::Stale); + let staleness_stale = matches!( + snapshot.staleness(), + crate::snapshot::Staleness::Stale | crate::snapshot::Staleness::StaleWorktree + ); // Whether this index has any alive SEI bindings (REQ-C-04 / // ADR-038). Degrades to `false` on a pre-SEI database. let sei_populated = has_any_alive_binding(conn).unwrap_or(false); diff --git a/crates/loomweave-mcp/src/tools/status.rs b/crates/loomweave-mcp/src/tools/status.rs index 5df6669f..888e8f32 100644 --- a/crates/loomweave-mcp/src/tools/status.rs +++ b/crates/loomweave-mcp/src/tools/status.rs @@ -252,6 +252,28 @@ impl ServerState { ); } + // Disclose what a `fresh` verdict does NOT cover, on the named tool an + // agent reads directly — not just in the session-start banner + // (clarion-26c7e52027). `fresh` compares already-indexed files' mtimes; a + // brand-new module in a not-yet-indexed top-level directory, or any + // uncommitted addition (undetectable on an untrusted corpus), can sit + // unseen behind it. `index_diff_get` reports committed/staged drift in + // detail (it shares the untracked blind spot); re-analyze is the remedy. + let staleness_note = match snapshot.staleness() { + crate::snapshot::Staleness::Fresh => Some( + "\"fresh\" reflects already-indexed source files only; it does NOT detect \ + brand-new modules in a not-yet-indexed directory, nor uncommitted \ + additions. If source was added or moved since the last analyze, re-run \ + `loomweave analyze`. Use index_diff_get for committed/staged drift detail.", + ), + crate::snapshot::Staleness::StaleWorktree => Some( + "the working tree has untracked source files of already-indexed types that \ + the index has not seen (new modules not yet analyzed; see worktree_dirty). \ + Re-run `loomweave analyze` before relying on graph answers.", + ), + _ => None, + }; + let result = json!({ "project_root": root_display, "db_path": db_path.display().to_string(), @@ -269,6 +291,8 @@ impl ServerState { "briefing_blocked": briefing_blocked, }, "staleness": serde_json::to_value(snapshot.staleness()).unwrap_or(Value::Null), + "staleness_note": staleness_note, + "worktree_dirty": snapshot.worktree_dirty(), "scan_truncated": snapshot.scan_truncated(), "last_analyzed_at": snapshot.last_analyzed_at(), "git_sha": analyzed_git_sha, diff --git a/crates/loomweave-mcp/tests/storage_tools.rs b/crates/loomweave-mcp/tests/storage_tools.rs index f9040659..a9eb3b9f 100644 --- a/crates/loomweave-mcp/tests/storage_tools.rs +++ b/crates/loomweave-mcp/tests/storage_tools.rs @@ -4828,6 +4828,122 @@ async fn project_status_reports_counts_latest_run_and_plugins() { assert_eq!(result["filigree"], Value::Null); } +#[tokio::test] +async fn project_status_fresh_carries_staleness_note_caveat() { + // The named tool an agent reads directly must disclose what "fresh" omits — + // not only the session-start banner (clarion-26c7e52027). The seeded demo.py + // is older than a far-future run, so the verdict is Fresh. + let (project, db_path) = open_project(); + let conn = Connection::open(&db_path).expect("open sqlite"); + insert_run( + &conn, + "run-fresh", + "2099-01-01T00:00:00.000Z", + "completed", + Some("2099-01-01T00:00:00.000Z"), + ); + drop(conn); + + let state = state_for(project.path(), &db_path); + let result = call_tool(&state, "project_status", json!({})).await["result"].clone(); + assert_eq!( + result["staleness"], "fresh", + "fixture must be fresh: {result}" + ); + let note = result["staleness_note"] + .as_str() + .expect("a fresh verdict must carry a staleness_note"); + assert!( + note.contains("loomweave analyze") && note.contains("not-yet-indexed"), + "staleness_note must disclose the not-yet-indexed gap and the re-analyze \ + remedy: {note}" + ); +} + +#[tokio::test] +async fn project_status_non_fresh_has_null_staleness_note() { + // A non-fresh verdict has no "fresh" claim to qualify, so the note is omitted. + // The seeded demo.py was just written (mtime ~now), so a past-dated run makes + // the source newer than the run → Stale, deterministically. + let (project, db_path) = open_project(); + let conn = Connection::open(&db_path).expect("open sqlite"); + insert_run( + &conn, + "run-1", + "2026-02-02T00:00:00.000Z", + "completed", + Some("2026-02-02T00:00:00.000Z"), + ); + drop(conn); + + let state = state_for(project.path(), &db_path); + let result = call_tool(&state, "project_status", json!({})).await["result"].clone(); + assert_ne!( + result["staleness"], "fresh", + "fixture must NOT be fresh: {result}" + ); + assert_eq!( + result["staleness_note"], + Value::Null, + "a non-fresh verdict must omit the staleness_note: {result}" + ); +} + +#[tokio::test] +async fn project_status_reports_stale_worktree_for_untracked_source() { + // The exact tool the dogfood report quoted (clarion-26c7e52027, ADR-045): a + // mtime-fresh index in a git work tree that has a brand-new untracked module. + // project_status_get must report staleness="stale_worktree" + worktree_dirty + // = true, not a misleading bare "fresh". + let (project, db_path) = open_project(); + + // Make the project a git repo and commit everything seeded so far, so only + // the new module below is untracked. Skip cleanly if git is unavailable. + let git = |args: &[&str]| -> bool { + std::process::Command::new("git") + .args(args) + .current_dir(project.path()) + .status() + .is_ok_and(|s| s.success()) + }; + if !git(&["init", "-q"]) { + return; + } + let _ = git(&["config", "user.email", "t@t"]); + let _ = git(&["config", "user.name", "t"]); + git(&["add", "."]); + git(&["commit", "-q", "-m", "init"]); + + let conn = Connection::open(&db_path).expect("open sqlite"); + insert_run( + &conn, + "run-fresh", + "2099-01-01T00:00:00.000Z", + "completed", + Some("2099-01-01T00:00:00.000Z"), + ); + drop(conn); + // Brand-new untracked Python module the index never saw. + std::fs::write(project.path().join("hub.py"), "y = 2\n").expect("write untracked module"); + + let state = state_for(project.path(), &db_path); + let result = call_tool(&state, "project_status", json!({})).await["result"].clone(); + assert_eq!( + result["staleness"], "stale_worktree", + "untracked source must yield stale_worktree: {result}" + ); + assert_eq!( + result["worktree_dirty"], true, + "worktree_dirty must be true: {result}" + ); + assert!( + result["staleness_note"] + .as_str() + .is_some_and(|n| n.contains("loomweave analyze")), + "stale_worktree must carry a re-analyze note: {result}" + ); +} + #[tokio::test] async fn project_status_marks_skipped_no_plugins_run() { // AC#2: a skipped_no_plugins run is unmistakable as no index refresh. diff --git a/docs/loomweave/adr/ADR-045-worktree-source-staleness.md b/docs/loomweave/adr/ADR-045-worktree-source-staleness.md new file mode 100644 index 00000000..900ec0c8 --- /dev/null +++ b/docs/loomweave/adr/ADR-045-worktree-source-staleness.md @@ -0,0 +1,118 @@ +# ADR-045: Worktree-Source Staleness via Hardened `git ls-files --others` + +**Status**: Accepted +**Date**: 2026-06-06 +**Deciders**: qacona@gmail.com +**Context**: clarion-26c7e52027 (dogfood: `staleness:"fresh"` lied while +un-indexed top-level modules sat in the working tree) and its follow-up +clarion-d9cf8bcfa9. Builds on ADR-013/ADR-021 (untrusted-corpus posture) and the +`hardened_git` helper (clarion-4b5a8aff54). + +## Summary + +`project_snapshot` (the `loomweave://context` resource, the `loomweave hook +session-start` banner, and `project_status_get`) now reports a third "needs +re-analyze" signal beyond the mtime/structural passes: **worktree-source +drift**. When the index is otherwise mtime-fresh but the working tree contains an +**untracked source file of an already-indexed type**, the verdict becomes +`Staleness::StaleWorktree` and the snapshot carries `worktree_dirty: Some(true)`. + +Detection uses a **hardened, ignore-aware, hash-free** `git ls-files --others +--exclude-standard`, scoped to the file extensions Loomweave has actually +ingested. It is fail-soft: `worktree_dirty` is `None` outside a git work tree, +when git is unavailable, or when nothing is ingested. + +## Context + +The mtime/structural freshness passes (ADR note in `snapshot.rs`) watch the +*direct parent directories of ingested files*, and deliberately never watch the +project root (`analyze` writes `.loomweave/` under it, which would wedge every +check to a permanent `Stale`). The documented consequence: a brand-new +**top-level** directory of source the index has never seen is invisible — it +reports `Fresh`. That is the exact dogfood failure: new specimen modules added to +a tree, `project_status_get` still says `fresh`, and an agent trusts it and gives +wrong "what calls X" answers. + +Catching un-indexed worktree source requires looking at the working tree. The +untrusted-corpus posture forbids the obvious tool: `git status` must **hash** +working-tree content to detect modifications, which runs a repo-controlled +`filter..clean` selected by `$GIT_DIR/info/attributes` — a code-execution +vector no git config can disable (see `hardened_git` module docs). That is why +the SEI rename diff and `index_diff` use `git diff --cached` and a stat-based +per-file scan, never `git status`. + +## Decision + +Use `git ls-files --others --exclude-standard` through `hardened_git_command`, +exposed as `loomweave_core::list_untracked_files`. + +1. **Safe under the untrusted-corpus posture.** `ls-files --others` *enumerates* + untracked, non-ignored paths; it never computes blob hashes of working-tree + content, so the `filter.clean` vector is never triggered. This is verified + empirically, not by reasoning alone: `hardened_git::tests:: + ls_files_others_does_not_run_clean_filter` booby-traps a repo with `* filter=pwn` + in `$GIT_DIR/info/attributes` and a repo-local `filter.pwn.clean` that would + create a marker file, then asserts the marker does **not** appear after the + call. The hardened command also sets `core.fsmonitor=false` and + `GIT_OPTIONAL_LOCKS=0`, so no fsmonitor program runs and the index is not + written. + +2. **No false-positives.** A naive "any untracked file ⇒ dirty" would flag a + scratch `notes.txt` and make a genuinely-fresh index look dirty. The signal is + therefore **scoped to the file extensions present in `entities.source_file_path`** + — only an untracked file whose extension Loomweave actually ingests counts. An + untracked `notes.txt` never flags; an untracked `hub.py` (when `.py` is + indexed) does. `--exclude-standard` further drops `.gitignore`d paths, so + build dirs, virtualenvs, and the ignored `.loomweave/` sidecars never appear. + +3. **Verdict + field.** When mtime/structural say `Fresh` and the worktree signal + is positive, the verdict is `Staleness::StaleWorktree` (serialized + `"stale_worktree"`); `worktree_dirty: Option` carries the raw signal on + every snapshot and in `project_status_get`. `StaleWorktree` is treated as + "stale" by orientation consumers; the session-start banner names the remedy + (`loomweave analyze`). + +4. **Fail-soft.** Any git failure, a non-repo working directory, or an empty + ingested-extension set yields `worktree_dirty: None` and leaves the + mtime-derived verdict unchanged. Detection never sets `degraded` (a missing + git binary is environmental) and never errors — `project_snapshot` stays + infallible. + +### What it does NOT cover (deliberate scope) + +- **Untracked-source enumeration runs git at session start.** It is hash-free and + ignore-pruned (comparable to `git status` minus hashing), and fail-soft, but it + is the first git invocation in the session-start snapshot path. Accepted for the + honesty win. +- **Modified-but-unstaged edits to *tracked* indexed files** remain the job of the + stat-based mtime pass (→ `Stale`) and `index_diff_get`'s `diff --cached`; they + are not what `ls-files --others` reports. +- **Mid-serve committable snapshots** are still `loomweave db backup`'s job + (ADR-005 note; clarion-cdee445ed8), unrelated to this verdict. + +## Consequences + +- `Staleness` gains a `StaleWorktree` variant — a wire-vocabulary addition to + `loomweave://context` and `project_status_get` (`"stale_worktree"`). Consumers + that switch on `staleness` must handle it; `orientation` treats it as stale. +- `ProjectSnapshot` gains `worktree_dirty: Option`, surfaced on the context + resource and `project_status_get`. +- `loomweave_core` gains `list_untracked_files`, the only sanctioned untracked + probe, carrying the security contract + the empirical test. +- The session-start banner gives a concrete `StaleWorktree` line instead of only + the `Fresh` caveat when un-indexed source is present in a git repo. + +## Alternatives Considered + +- **`git status --porcelain`** — rejected: hashes the working tree, re-opening the + `filter.clean` RCE the whole `hardened_git` posture exists to close. +- **commit-mismatch (`rev-parse HEAD` vs `analyzed_at_commit`) and/or + `diff --cached` only** — rejected as the *sole* signal: both report "clean" for + the reported untracked-new-file case, i.e. misleadingly-clean — the original bug + wearing a new field name. +- **Watching the project root's mtime** — rejected: the root is poisoned by + `.loomweave/` writes and by unrelated top-level churn (editor temp files, + `.DS_Store`), trading a false-negative for frequent false-positives. +- **Prose-only honest banner (no detection)** — shipped first as the conservative + mitigation (clarion-26c7e52027) and retained for the non-git case; this ADR adds + real detection where a git work tree makes it safe and accurate. diff --git a/docs/loomweave/adr/README.md b/docs/loomweave/adr/README.md index 6d098a1c..f2c16778 100644 --- a/docs/loomweave/adr/README.md +++ b/docs/loomweave/adr/README.md @@ -45,6 +45,7 @@ This folder is the canonical home for authored Loomweave architecture decision r | [ADR-042](./ADR-042-hmac-freshness-and-replay-window.md) | HMAC freshness and replay window — timestamp + nonce headers, crate-backed HMAC, process-local replay cache | Accepted | | [ADR-043](./ADR-043-edge-reanalysis-replacement.md) | Edge reanalysis replacement — per-source-file anchored-edge replacement and edge metadata upsert; amends ADR-026 | Accepted | | [ADR-044](./ADR-044-read-api-ephemeral-port-publication.md) | Read-API ephemeral port publication — `.loomweave/ephemeral.port` as a normative cross-product file contract (loopback-only, port-only, atomic) + consume-time resolution precedence (explicit > file > config > none), per-project deterministic port, installer stops pinning 9111; relates to ADR-034 | Accepted | +| [ADR-045](./ADR-045-worktree-source-staleness.md) | Worktree-source staleness — `Staleness::StaleWorktree` + `worktree_dirty` via hardened, hash-free `git ls-files --others` scoped to ingested extensions; closes the unwatched-top-level-dir blind spot without `git status`'s filter-RCE vector; builds on ADR-013/021 untrusted-corpus posture | Accepted | ## Backlog still tracked in the detailed design From 5da9ccdda64ff4b7ed176a80abfbaa7f3c852815 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:51:56 +1000 Subject: [PATCH 22/27] docs(getting-started): macOS Gatekeeper quarantine workaround (V11-CI-04) Release archives are unsigned (ADR-033), so macOS Gatekeeper blocks the downloaded loomweave binary on first launch. Add a Troubleshooting entry with the `xattr -d com.apple.quarantine` fix and the GUI "Open Anyway" alternative. Closes clarion-03dfa1f94d. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/operator/getting-started.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/operator/getting-started.md b/docs/operator/getting-started.md index d63b15ad..0908960c 100644 --- a/docs/operator/getting-started.md +++ b/docs/operator/getting-started.md @@ -364,6 +364,22 @@ signals. A `loomweave doctor` subcommand that surfaces discovery state at exit is on the v2.0 roadmap; for v1.0 the diagnostic is the WARN line plus the `which loomweave-plugin-*` check above. +### macOS: "loomweave cannot be opened because the developer cannot be verified" + +The release archives are not notarized (ADR-033 ships unsigned binaries), so +macOS Gatekeeper quarantines the downloaded `loomweave` binary and refuses the +first launch with a developer-verification error. Clear the quarantine +attribute on the extracted binary before installing it: + +```bash +xattr -d com.apple.quarantine ./loomweave-aarch64-apple-darwin/loomweave +``` + +Alternatively, approve it once from the GUI — attempt to run it, then +**System Settings → Privacy & Security → "Open Anyway"**. Either is a one-time +step per downloaded binary; a source build (the fallback under [§1](#1-install)) +is never quarantined. Notarized release artifacts are on the post-1.0 roadmap. + ### "secret_present" block fires on a real file Add the file to `.loomweave/secrets-baseline.yaml` with a written justification From b598ebf1a46a98e471772fbfebb8fc6a4e8d13f6 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:51:56 +1000 Subject: [PATCH 23/27] chore(storage): backfill ADR-024 published_build marker to v1.0.0 v1.0.0 was the first externally-published build; 0001_initial_schema.sql is byte-identical at v1.0.0 and HEAD, and all schema changes since are additive 0002+ migrations. Backfilling the marker activates scripts/check-migration-retirement.py's guard (previously pre-trigger despite shipped releases): in-place edits to 0001 now fail CI, enforcing additive-only. Closes clarion-b20448b3ac. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-storage/migrations/published_build.txt | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 crates/loomweave-storage/migrations/published_build.txt diff --git a/crates/loomweave-storage/migrations/published_build.txt b/crates/loomweave-storage/migrations/published_build.txt new file mode 100644 index 00000000..819fd1ed --- /dev/null +++ b/crates/loomweave-storage/migrations/published_build.txt @@ -0,0 +1,9 @@ +# ADR-024 in-place migration-retirement marker (clarion-b20448b3ac). +# +# Names the first externally-published build whose 0001_initial_schema.sql is +# frozen. 0001 has been byte-identical since this tag (verified: `git diff +# v1.0.0 HEAD -- .../0001_initial_schema.sql` is empty), and every schema change +# since has been an additive 0002+ migration. With this marker present, +# scripts/check-migration-retirement.py fails if 0001 ever diverges from this +# ref — later schema changes must be additive migrations, never in-place edits. +v1.0.0 From 3c8feae6cd727c19e319f8cac163cfee4da64f36 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:53:23 +1000 Subject: [PATCH 24/27] ci(release): add macOS aarch64 verify gate mirroring ci.yml (clarion-47d395e03c) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit release.yml's `verify` job was Linux-only, so a macOS-only clippy/--all-targets regression — caught on PRs by ci.yml's rust-macos job but not re-verified at release — could pass `verify` and proceed to the build/publish jobs (the aarch64 build leg only builds --bins, not tests/all-targets). Add a `verify-macos` job mirroring ci.yml's rust-macos (clippy + bin build on macos-14) and add it to the needs chain of build-rust, build-wheels, and build-plugin. No new runner dependency — build-rust already uses macos-14. Closes clarion-47d395e03c. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/release.yml | 40 ++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9aa27abe..f7794065 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -175,8 +175,42 @@ jobs: - name: Phase 3 subsystems run: CARGO_BUILD=0 bash tests/e2e/phase3_subsystems.sh + # macOS (aarch64) pre-release gate — mirrors ci.yml's rust-macos job so a + # macOS-only clippy/--all-targets regression cannot reach the build/publish + # jobs. ci.yml gates every PR, but release.yml's Linux-only `verify` left a + # gap for a macOS-only test/all-targets lint issue that does not break the + # --bin build (clarion-47d395e03c). build-rust already uses macos-14, so no + # new runner dependency. Restore the x86_64 (macos-13) leg here alongside the + # build matrix when those runners recover (clarion-ec389a8e72). + verify-macos: + name: Verify macOS (${{ matrix.target }}) + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: false + matrix: + include: + - target: aarch64-apple-darwin + runner: macos-14 + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 + + - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 + with: + toolchain: stable + components: clippy + + - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 + with: + key: release-verify-${{ matrix.target }} + + - name: clippy + run: cargo clippy --workspace --all-targets --all-features -- -D warnings + + - name: build workspace bins + run: cargo build --workspace --bins + build-rust: - needs: [verify] + needs: [verify, verify-macos] name: Build loomweave (${{ matrix.target }}) runs-on: ${{ matrix.runner }} strategy: @@ -242,7 +276,7 @@ jobs: retention-days: 7 build-wheels: - needs: [verify] + needs: [verify, verify-macos] name: Build loomweave wheel (${{ matrix.target }}) runs-on: ${{ matrix.runner }} # maturin bin-wheels for PyPI. Matrix mirrors `build-rust` (Linux x86_64 + @@ -293,7 +327,7 @@ jobs: retention-days: 7 build-plugin: - needs: [verify] + needs: [verify, verify-macos] name: Build Python plugin sdist runs-on: ubuntu-latest steps: From 8be269d6dc66ba001bfa3b52adaf2034224c6eb9 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:56:21 +1000 Subject: [PATCH 25/27] fix(storage): drop dead entity_fts.content_text column (migration 0009, V11-STO-06) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit content_text shipped in 0001 reserved for an on-demand source-text projection that was never built: the entities_ai trigger always wrote '', the entities_au trigger never touched it, and no query reads it (search MATCHes the table, not the column). It was permanently-empty schema drift; content search is served by the ADR-040 embeddings sidecar. FTS5 has no ALTER DROP COLUMN, so migration 0009 recreates entity_fts and its triggers without it and rebuilds the index from entities. Behaviour-preserving — only a never-populated, never-read column goes. Bumps CURRENT_SCHEMA_VERSION to 9; updates the schema_migrations expectation tests and the authoritative detailed-design.md FTS block; adds a regression test asserting content_text is gone and MATCH search still works. Closes clarion-716449c371. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../migrations/0009_drop_fts_content_text.sql | 58 +++++++++++++++++++ crates/loomweave-storage/src/schema.rs | 7 ++- .../loomweave-storage/tests/schema_apply.rs | 52 ++++++++++++++++- docs/loomweave/1.0/detailed-design.md | 14 ++--- 4 files changed, 121 insertions(+), 10 deletions(-) create mode 100644 crates/loomweave-storage/migrations/0009_drop_fts_content_text.sql diff --git a/crates/loomweave-storage/migrations/0009_drop_fts_content_text.sql b/crates/loomweave-storage/migrations/0009_drop_fts_content_text.sql new file mode 100644 index 00000000..e7af0c68 --- /dev/null +++ b/crates/loomweave-storage/migrations/0009_drop_fts_content_text.sql @@ -0,0 +1,58 @@ +-- Migration 0009: drop the dead entity_fts.content_text column (V11-STO-06, +-- clarion-716449c371). +-- +-- content_text shipped in 0001 reserved for an on-demand source-text projection +-- that was never implemented: the entities_ai trigger always wrote '', the +-- entities_au trigger never touched it, and no query reads it (search MATCHes +-- the table, not the column). Semantic/content search is instead served by the +-- ADR-040 embeddings sidecar, so the column is permanently-empty drift that +-- misrepresents the FTS surface. FTS5 has no ALTER ... DROP COLUMN, so recreate +-- the virtual table and its triggers without it. Behaviour-preserving: only a +-- never-populated, never-read column is removed. + +BEGIN; + +DROP TRIGGER IF EXISTS entities_ai; +DROP TRIGGER IF EXISTS entities_au; +DROP TRIGGER IF EXISTS entities_ad; +DROP TABLE IF EXISTS entity_fts; + +CREATE VIRTUAL TABLE entity_fts USING fts5( + entity_id UNINDEXED, + name, + short_name, + summary_text, + tokenize = 'porter unicode61' +); + +-- FTS5 triggers keep entity_fts synchronised with entities (content_text dropped). +CREATE TRIGGER entities_ai AFTER INSERT ON entities BEGIN + INSERT INTO entity_fts (entity_id, name, short_name, summary_text) + VALUES ( + new.id, + new.name, + new.short_name, + COALESCE(json_extract(new.summary, '$.briefing.purpose'), '') + ); +END; +CREATE TRIGGER entities_au AFTER UPDATE ON entities BEGIN + UPDATE entity_fts + SET name = new.name, + short_name = new.short_name, + summary_text = COALESCE(json_extract(new.summary, '$.briefing.purpose'), '') + WHERE entity_id = new.id; +END; +CREATE TRIGGER entities_ad AFTER DELETE ON entities BEGIN + DELETE FROM entity_fts WHERE entity_id = old.id; +END; + +-- Rebuild the index from existing entities (the recreated vtable starts empty). +INSERT INTO entity_fts (entity_id, name, short_name, summary_text) +SELECT id, name, short_name, + COALESCE(json_extract(summary, '$.briefing.purpose'), '') +FROM entities; + +INSERT INTO schema_migrations (version, name, applied_at) +VALUES (9, '0009_drop_fts_content_text', strftime('%Y-%m-%dT%H:%M:%fZ', 'now')); + +COMMIT; diff --git a/crates/loomweave-storage/src/schema.rs b/crates/loomweave-storage/src/schema.rs index d11d8e32..1c474e56 100644 --- a/crates/loomweave-storage/src/schema.rs +++ b/crates/loomweave-storage/src/schema.rs @@ -55,12 +55,17 @@ const MIGRATIONS: &[Migration] = &[ name: "0008_run_owner_heartbeat", sql: include_str!("../migrations/0008_run_owner_heartbeat.sql"), }, + Migration { + version: 9, + name: "0009_drop_fts_content_text", + sql: include_str!("../migrations/0009_drop_fts_content_text.sql"), + }, ]; /// Highest migration version known to this build. Mirrored into the /// `SQLite` `user_version` header (STO-02) so a future-built database is /// refused at open instead of silently corrupting state. -pub const CURRENT_SCHEMA_VERSION: u32 = 8; +pub const CURRENT_SCHEMA_VERSION: u32 = 9; const _CURRENT_SCHEMA_VERSION_MATCHES_LAST_MIGRATION: () = { // Compile-time check: `CURRENT_SCHEMA_VERSION` must equal the highest diff --git a/crates/loomweave-storage/tests/schema_apply.rs b/crates/loomweave-storage/tests/schema_apply.rs index b94679ec..5da82010 100644 --- a/crates/loomweave-storage/tests/schema_apply.rs +++ b/crates/loomweave-storage/tests/schema_apply.rs @@ -630,6 +630,53 @@ fn fts_trigger_populates_entity_fts_on_insert() { assert_eq!(matched_id, "python:function:auth.refresh"); } +#[test] +fn migration_0009_drops_dead_fts_content_text_column() { + // V11-STO-06 / clarion-716449c371: the never-populated, never-read + // content_text column is gone after 0009, and search via the recreated + // virtual table + triggers still works. + let tempdir = tempfile::tempdir().unwrap(); + let conn = open_fresh(&tempdir); + + let sql: String = conn + .query_row( + "SELECT sql FROM sqlite_master WHERE name='entity_fts'", + [], + |row| row.get(0), + ) + .unwrap(); + assert!( + !sql.contains("content_text"), + "entity_fts must not declare content_text after 0009; sql was: {sql}" + ); + + let summary_json = r#"{"briefing": {"purpose": "rotate signing keys"}}"#; + conn.execute( + "INSERT INTO entities (id, plugin_id, kind, name, short_name, properties, summary, \ + created_at, updated_at) \ + VALUES (?1, ?2, ?3, ?4, ?5, '{}', ?6, \ + strftime('%Y-%m-%dT%H:%M:%fZ', 'now'), strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))", + params![ + "python:function:auth.rotate", + "python", + "function", + "auth.rotate", + "rotate", + summary_json, + ], + ) + .unwrap(); + + let matched_id: String = conn + .query_row( + "SELECT entity_id FROM entity_fts WHERE entity_fts MATCH 'rotate'", + [], + |row| row.get(0), + ) + .expect("FTS search still works after content_text drop"); + assert_eq!(matched_id, "python:function:auth.rotate"); +} + #[test] fn edges_table_has_no_id_column() { // ADR-026 decision 4: drop synthetic `id` PK from edges. Natural key @@ -795,7 +842,7 @@ fn migrations_are_idempotent() { let tempdir = tempfile::tempdir().unwrap(); let mut conn = open_fresh(&tempdir); schema::apply_migrations(&mut conn).expect("second apply should be a no-op"); - assert_eq!(schema::applied_count(&conn).unwrap(), 8); + assert_eq!(schema::applied_count(&conn).unwrap(), 9); let tables_after = table_names(&conn); assert!(tables_after.contains(&"entities".to_owned())); } @@ -809,7 +856,7 @@ fn schema_migrations_records_each_applied_migration() { row.get(0) }) .unwrap(); - assert_eq!(count, 8); + assert_eq!(count, 9); let names: Vec = { let mut stmt = conn .prepare("SELECT name FROM schema_migrations ORDER BY version") @@ -828,6 +875,7 @@ fn schema_migrations_records_each_applied_migration() { "0006_wardline_taint_sei", "0007_run_analyzed_commit", "0008_run_owner_heartbeat", + "0009_drop_fts_content_text", ] ); } diff --git a/docs/loomweave/1.0/detailed-design.md b/docs/loomweave/1.0/detailed-design.md index af81752d..8e449235 100644 --- a/docs/loomweave/1.0/detailed-design.md +++ b/docs/loomweave/1.0/detailed-design.md @@ -739,25 +739,25 @@ CREATE TABLE runs ( analyzed_at_commit TEXT -- git HEAD analyzed against (WS9 / SEI §6, migration 0007); NULL off-git ); --- FTS5 for text search +-- FTS5 for text search. (0001 also declared a `content_text` column reserved +-- for an on-demand source projection; it was never populated and was dropped in +-- migration 0009 — content search is served by the ADR-040 embeddings sidecar.) CREATE VIRTUAL TABLE entity_fts USING fts5( entity_id UNINDEXED, - name, short_name, summary_text, content_text, + name, short_name, summary_text, tokenize = 'porter unicode61' ); -- FTS5 triggers keep entity_fts synchronised with entities. -- summary_text is derived from the briefing's purpose + patterns + risks --- (short textual projection); content_text is populated on demand by the --- plugin during Phase 1 via the `file_analyzed` message. +-- (short textual projection). CREATE TRIGGER entities_ai AFTER INSERT ON entities BEGIN - INSERT INTO entity_fts (entity_id, name, short_name, summary_text, content_text) + INSERT INTO entity_fts (entity_id, name, short_name, summary_text) VALUES ( new.id, new.name, new.short_name, - COALESCE(json_extract(new.summary, '$.briefing.purpose'), ''), - '' + COALESCE(json_extract(new.summary, '$.briefing.purpose'), '') ); END; CREATE TRIGGER entities_au AFTER UPDATE ON entities BEGIN From 5675f4a9e9b4ecaf5a43591634bdcbb2f80ac829 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 18:08:49 +1000 Subject: [PATCH 26/27] chore(release): bump to 1.1.0rc2 (Cargo 1.1.0-rc2 / PEP440 1.1.0rc2) Workspace + Python plugin in lockstep. rc2 rolls up the dogfood-friction fixes and deferred v1.1 engineering items landed on this branch: worktree-aware staleness (ADR-045), .gitignore instance_id/*.lock (ADR-005), WAL checkpoint(TRUNCATE), entity_fts.content_text drop (migration 0009), the macOS aarch64 release verify gate, the Gatekeeper doc, and the ADR-024 marker backfill. No package published for release candidates. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 47 +++++++++++++++++++ Cargo.lock | 16 +++---- Cargo.toml | 2 +- crates/loomweave-cli/Cargo.toml | 14 +++--- crates/loomweave-cli/pyproject.toml | 4 +- crates/loomweave-federation/Cargo.toml | 2 +- crates/loomweave-mcp/Cargo.toml | 6 +-- crates/loomweave-plugin-fixture/Cargo.toml | 2 +- crates/loomweave-storage/Cargo.toml | 2 +- plugins/python/plugin.toml | 2 +- plugins/python/pyproject.toml | 2 +- .../src/loomweave_plugin_python/__init__.py | 2 +- plugins/python/tests/test_package.py | 4 +- plugins/python/tests/test_server.py | 2 +- plugins/python/uv.lock | 2 +- 15 files changed, 78 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f69bd967..baf2017b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,53 @@ only when an incompatible change is made to that surface. See ## [Unreleased] +## [1.1.0rc2] — 2026-06-06 + +Second 1.1 release candidate, rolling up dogfood-friction fixes and deferred +v1.1 engineering items on top of rc1. No package is published for release +candidates. (Cargo SemVer `1.1.0-rc2`; Python wheels normalise to PEP 440 +`1.1.0rc2`.) + +### Added + +- **Worktree-aware staleness (ADR-045).** `project_status_get`, the + `loomweave://context` resource, and the session-start banner now surface + `indexed_at_commit` + `worktree_dirty`, and a new `Staleness::StaleWorktree` + verdict fires when an otherwise-fresh index has untracked source on disk. + Detection uses a hardened, hash-free `git ls-files --others` scoped to ingested + source extensions (false-positive guard), proven filter-safe by test — closes + the "fresh lies about uncommitted code" friction (clarion-26c7e52027, + clarion-d9cf8bcfa9). + +### Changed + +- **`.loomweave/.gitignore` (ADR-005)** now also excludes `instance_id` and + `*.lock`, so `git add -A` no longer stages the per-project serve fingerprint or + the analyze advisory lock; ADR-005 documents the live-index commit hazard and + points at `loomweave db backup` (clarion-7381e6382d). +- **WAL hygiene.** The storage writer-actor runs `PRAGMA wal_checkpoint(TRUNCATE)` + after each committed run, so the on-disk `loomweave.db` reflects committed state + while `serve` is alive instead of lagging behind a multi-MB WAL sidecar + (clarion-cdee445ed8). +- **Release CI parity.** `release.yml` gains a macOS aarch64 `verify-macos` gate + (mirroring `ci.yml`) wired into the build/publish `needs` chain, closing the + gap where a macOS-only lint/test regression could reach the build jobs + (clarion-47d395e03c). + +### Removed + +- **Dead `entity_fts.content_text` column** dropped via migration 0009 — it was + never populated and never read (content search is served by the ADR-040 + embeddings sidecar). `CURRENT_SCHEMA_VERSION` is now 9 (clarion-716449c371). + +### Docs + +- macOS Gatekeeper quarantine workaround added to `getting-started.md` + Troubleshooting (clarion-03dfa1f94d). +- ADR-024 in-place migration-retirement guard activated: `published_build.txt` + backfilled to `v1.0.0` (first published build; 0001 byte-identical since), so + later schema changes must be additive migrations (clarion-b20448b3ac). + ## [1.1.0rc1] — 2026-06-06 First 1.1 release candidate. No package is published for release candidates — diff --git a/Cargo.lock b/Cargo.lock index 39a16ebc..410073f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1057,7 +1057,7 @@ checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" [[package]] name = "loomweave-analysis" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "anyhow", "serde", @@ -1067,7 +1067,7 @@ dependencies = [ [[package]] name = "loomweave-cli" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "anyhow", "assert_cmd", @@ -1106,7 +1106,7 @@ dependencies = [ [[package]] name = "loomweave-core" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "async-trait", "nix", @@ -1123,7 +1123,7 @@ dependencies = [ [[package]] name = "loomweave-federation" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "blake3", "loomweave-core", @@ -1137,7 +1137,7 @@ dependencies = [ [[package]] name = "loomweave-mcp" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "async-trait", "blake3", @@ -1160,7 +1160,7 @@ dependencies = [ [[package]] name = "loomweave-plugin-fixture" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "loomweave-core", "nix", @@ -1169,7 +1169,7 @@ dependencies = [ [[package]] name = "loomweave-scanner" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "regex", "serde", @@ -1181,7 +1181,7 @@ dependencies = [ [[package]] name = "loomweave-storage" -version = "1.1.0-rc1" +version = "1.1.0-rc2" dependencies = [ "blake3", "deadpool-sqlite", diff --git a/Cargo.toml b/Cargo.toml index de77d4aa..a8ac37b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ members = [ ] [workspace.package] -version = "1.1.0-rc1" +version = "1.1.0-rc2" edition = "2024" license = "MIT" repository = "https://github.com/foundryside-dev/loomweave" diff --git a/crates/loomweave-cli/Cargo.toml b/crates/loomweave-cli/Cargo.toml index 9343d850..bbe56dd3 100644 --- a/crates/loomweave-cli/Cargo.toml +++ b/crates/loomweave-cli/Cargo.toml @@ -18,12 +18,12 @@ anyhow.workspace = true axum.workspace = true blake3.workspace = true clap.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } -loomweave-analysis = { path = "../loomweave-analysis", version = "1.1.0-rc1" } -loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc1" } -loomweave-mcp = { path = "../loomweave-mcp", version = "1.1.0-rc1" } -loomweave-scanner = { path = "../loomweave-scanner", version = "1.1.0-rc1" } -loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc1" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc2" } +loomweave-analysis = { path = "../loomweave-analysis", version = "1.1.0-rc2" } +loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc2" } +loomweave-mcp = { path = "../loomweave-mcp", version = "1.1.0-rc2" } +loomweave-scanner = { path = "../loomweave-scanner", version = "1.1.0-rc2" } +loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc2" } dotenvy.workspace = true fs2.workspace = true hmac.workspace = true @@ -46,7 +46,7 @@ uuid.workspace = true [dev-dependencies] assert_cmd.workspace = true -loomweave-plugin-fixture = { path = "../loomweave-plugin-fixture", version = "1.1.0-rc1" } +loomweave-plugin-fixture = { path = "../loomweave-plugin-fixture", version = "1.1.0-rc2" } rusqlite.workspace = true serde_json.workspace = true sha1.workspace = true diff --git a/crates/loomweave-cli/pyproject.toml b/crates/loomweave-cli/pyproject.toml index 72e23181..11c21c6c 100644 --- a/crates/loomweave-cli/pyproject.toml +++ b/crates/loomweave-cli/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "loomweave" -version = "1.1.0rc1" +version = "1.1.0rc2" description = "Loomweave — graph-aware code archaeology (Rust core)" readme = "../../README.md" requires-python = ">=3.11" @@ -15,7 +15,7 @@ classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: 3", ] -dependencies = ["loomweave-plugin-python==1.1.0rc1"] +dependencies = ["loomweave-plugin-python==1.1.0rc2"] [project.urls] Repository = "https://github.com/foundryside-dev/loomweave" diff --git a/crates/loomweave-federation/Cargo.toml b/crates/loomweave-federation/Cargo.toml index 335406e1..db2f9f1a 100644 --- a/crates/loomweave-federation/Cargo.toml +++ b/crates/loomweave-federation/Cargo.toml @@ -11,7 +11,7 @@ workspace = true [dependencies] blake3.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc2" } reqwest.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/loomweave-mcp/Cargo.toml b/crates/loomweave-mcp/Cargo.toml index 684fd2ea..722e6208 100644 --- a/crates/loomweave-mcp/Cargo.toml +++ b/crates/loomweave-mcp/Cargo.toml @@ -12,9 +12,9 @@ workspace = true [dependencies] async-trait.workspace = true blake3.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } -loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc1" } -loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc1" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc2" } +loomweave-federation = { path = "../loomweave-federation", version = "1.1.0-rc2" } +loomweave-storage = { path = "../loomweave-storage", version = "1.1.0-rc2" } reqwest.workspace = true rusqlite.workspace = true serde.workspace = true diff --git a/crates/loomweave-plugin-fixture/Cargo.toml b/crates/loomweave-plugin-fixture/Cargo.toml index 35f824d4..f1d1ac26 100644 --- a/crates/loomweave-plugin-fixture/Cargo.toml +++ b/crates/loomweave-plugin-fixture/Cargo.toml @@ -23,7 +23,7 @@ name = "loomweave-fixture-plugin" path = "src/main.rs" [dependencies] -loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc2" } serde_json.workspace = true [target.'cfg(unix)'.dependencies] diff --git a/crates/loomweave-storage/Cargo.toml b/crates/loomweave-storage/Cargo.toml index 726a0007..dac7a33a 100644 --- a/crates/loomweave-storage/Cargo.toml +++ b/crates/loomweave-storage/Cargo.toml @@ -11,7 +11,7 @@ workspace = true [dependencies] blake3.workspace = true -loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc1" } +loomweave-core = { path = "../loomweave-core", version = "1.1.0-rc2" } deadpool-sqlite.workspace = true rusqlite.workspace = true serde.workspace = true diff --git a/plugins/python/plugin.toml b/plugins/python/plugin.toml index 6e159265..93c4f75a 100644 --- a/plugins/python/plugin.toml +++ b/plugins/python/plugin.toml @@ -1,7 +1,7 @@ [plugin] name = "loomweave-plugin-python" plugin_id = "python" -version = "1.1.0rc1" +version = "1.1.0rc2" protocol_version = "1.0" # Bare basename per ADR-021 §Layer 1 + WP2 scrub commit eb0a41d — the host # refuses manifests whose `executable` carries any path component. diff --git a/plugins/python/pyproject.toml b/plugins/python/pyproject.toml index e26fb75d..46a98200 100644 --- a/plugins/python/pyproject.toml +++ b/plugins/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "loomweave-plugin-python" -version = "1.1.0rc1" +version = "1.1.0rc2" description = "Loomweave Python language plugin — v1.0 release" readme = "README.md" requires-python = ">=3.11" diff --git a/plugins/python/src/loomweave_plugin_python/__init__.py b/plugins/python/src/loomweave_plugin_python/__init__.py index 4b4fde08..5562b23f 100644 --- a/plugins/python/src/loomweave_plugin_python/__init__.py +++ b/plugins/python/src/loomweave_plugin_python/__init__.py @@ -1,3 +1,3 @@ """loomweave-plugin-python — Python language plugin for Loomweave.""" -__version__ = "1.1.0rc1" +__version__ = "1.1.0rc2" diff --git a/plugins/python/tests/test_package.py b/plugins/python/tests/test_package.py index 9cfae6ef..da595730 100644 --- a/plugins/python/tests/test_package.py +++ b/plugins/python/tests/test_package.py @@ -17,7 +17,7 @@ def _read_toml(path: Path) -> dict[str, Any]: def test_package_version_matches_pyproject() -> None: - assert loomweave_plugin_python.__version__ == "1.1.0rc1" + assert loomweave_plugin_python.__version__ == "1.1.0rc2" def test_plugin_version_lockstep_across_pyproject_manifest_and_module() -> None: @@ -42,7 +42,7 @@ def test_plugin_version_lockstep_across_pyproject_manifest_and_module() -> None: def test_manifest_declares_current_v1_ontology_only() -> None: manifest = _read_toml(_PLUGIN_ROOT / "plugin.toml") - assert manifest["plugin"]["version"] == "1.1.0rc1" + assert manifest["plugin"]["version"] == "1.1.0rc2" assert manifest["capabilities"]["runtime"]["wardline_aware"] is True assert manifest["integrations"]["wardline"]["expected_descriptor_version"] == ( EXPECTED_DESCRIPTOR_VERSION diff --git a/plugins/python/tests/test_server.py b/plugins/python/tests/test_server.py index 7abb5a1d..7be14705 100644 --- a/plugins/python/tests/test_server.py +++ b/plugins/python/tests/test_server.py @@ -86,7 +86,7 @@ def test_initialize_roundtrip() -> None: assert response["id"] == 1 result = response["result"] assert result["name"] == "loomweave-plugin-python" - assert result["version"] == "1.1.0rc1" + assert result["version"] == "1.1.0rc2" assert result["ontology_version"] == "0.7.0" assert set(result["capabilities"]) == {"wardline"} assert result["capabilities"]["wardline"]["status"] in { diff --git a/plugins/python/uv.lock b/plugins/python/uv.lock index 94aa0d89..aef6d0bd 100644 --- a/plugins/python/uv.lock +++ b/plugins/python/uv.lock @@ -464,7 +464,7 @@ wheels = [ [[package]] name = "loomweave-plugin-python" -version = "1.1.0rc1" +version = "1.1.0rc2" source = { editable = "." } dependencies = [ { name = "packaging" }, From 0a93731f726c7ac2f2f7ffd1d36cedd9d123a609 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Sat, 6 Jun 2026 19:11:36 +1000 Subject: [PATCH 27/27] feat(cli): inject agent-orientation block into CLAUDE.md/AGENTS.md (install + doctor) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `loomweave install` now pushes a managed Loomweave block into the always-loaded CLAUDE.md / AGENTS.md context, mirroring Filigree's instruction injection, so an agent learns to ask Loomweave's MCP tools before re-grepping the tree. `loomweave doctor` verifies it and, with --fix, repairs it via the same idempotent installer. New `instructions.rs` manages a ``… `` span and edits ONLY its own bytes — it never truncates to EOF, so a co-resident Filigree/Wardline block in the same file survives every create/append/replace/malformed operation. Drift is a body content hash (not the marker version, so a version bump on identical content is not drift), and the malformed-repair strips all orphan start markers so it stays safe and converges in a single pass. - install: --instructions flag + InstallPlan plumbing; bare `install` does it. - doctor: Missing=warning (optional surface), Drifted/Malformed=problem; wired into both the text and JSON report paths with a next-action remediation. - thin embedded asset: a pointer to the MCP tools + the loomweave-workflow skill. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../assets/instructions/loomweave.md | 19 + crates/loomweave-cli/src/cli.rs | 8 +- crates/loomweave-cli/src/doctor.rs | 113 ++- crates/loomweave-cli/src/install.rs | 74 +- crates/loomweave-cli/src/instructions.rs | 765 ++++++++++++++++++ crates/loomweave-cli/src/main.rs | 5 + crates/loomweave-cli/tests/doctor.rs | 169 ++++ .../instructions-injection-plan.md | 224 +++++ 8 files changed, 1367 insertions(+), 10 deletions(-) create mode 100644 crates/loomweave-cli/assets/instructions/loomweave.md create mode 100644 crates/loomweave-cli/src/instructions.rs create mode 100644 docs/implementation/instructions-injection-plan.md diff --git a/crates/loomweave-cli/assets/instructions/loomweave.md b/crates/loomweave-cli/assets/instructions/loomweave.md new file mode 100644 index 00000000..d90bb596 --- /dev/null +++ b/crates/loomweave-cli/assets/instructions/loomweave.md @@ -0,0 +1,19 @@ +## Loomweave (code archaeology) + +This repo is indexed by Loomweave: it has pre-extracted the tree into a +queryable map of entities (functions, classes, modules, files), the call / +reference / import edges between them, and subsystem clusters. Before grepping +or re-reading the tree to answer "what calls X", "where is X defined", "what +subsystem owns X", or "find the thing that does Y" — ask Loomweave's MCP tools +(`mcp__loomweave__*`): `entity_find`, `entity_at`, `entity_callers_list`, +`entity_neighborhood_get`, `project_status_get`. + +Entity IDs are `{plugin}:{kind}:{qualified_name}` (e.g. +`python:function:pkg.mod.func`); subsystems are `core:subsystem:{hash}`. You +rarely type IDs — get one from `entity_find` or `entity_at`, then copy it +verbatim into the next tool. + +Index freshness and counts: `project_status_get` (or the `loomweave://context` +resource). If the index is stale, run `loomweave analyze `. + +Full workflow: the `loomweave-workflow` skill. diff --git a/crates/loomweave-cli/src/cli.rs b/crates/loomweave-cli/src/cli.rs index d5f93a0d..8723eb4a 100644 --- a/crates/loomweave-cli/src/cli.rs +++ b/crates/loomweave-cli/src/cli.rs @@ -52,7 +52,13 @@ pub enum Command { #[arg(long)] hooks: bool, - /// Do everything: .loomweave/ init + MCP config + skills + hooks. + /// Inject the Loomweave agent-orientation block into CLAUDE.md and + /// AGENTS.md (touching only Loomweave's own marker span). + #[arg(long)] + instructions: bool, + + /// Do everything: .loomweave/ init + MCP config + skills + hooks + + /// instructions. #[arg(long)] all: bool, }, diff --git a/crates/loomweave-cli/src/doctor.rs b/crates/loomweave-cli/src/doctor.rs index 8fbb9dc7..195d2447 100644 --- a/crates/loomweave-cli/src/doctor.rs +++ b/crates/loomweave-cli/src/doctor.rs @@ -1,10 +1,11 @@ //! `loomweave doctor [--fix]` — verify (and optionally repair) the installed //! agent-orientation surfaces. //! -//! Three surfaces are checked, each owned by an existing installer module: +//! Several surfaces are checked, each owned by an existing installer module: //! the `loomweave-workflow` skill pack ([`crate::skill_pack`]), the `SessionStart` -//! hook ([`crate::hooks_settings`]), and the Claude Code `.mcp.json` MCP -//! registration ([`crate::mcp_registration`]), plus the local +//! hook ([`crate::hooks_settings`]), the Claude Code `.mcp.json` MCP +//! registration ([`crate::mcp_registration`]), the `CLAUDE.md` / `AGENTS.md` +//! agent-orientation block ([`crate::instructions`]), and the local //! Loomweave/Filigree/Wardline binding files ([`crate::integration_bindings`]). //! The repair for each is that module's idempotent installer, so //! `doctor --fix` and `loomweave install` converge to the same state. @@ -30,10 +31,13 @@ use serde::Serialize; use serde_json::Value; use crate::hooks_settings::HookState; +use crate::instructions::InstructionsState; use crate::integration_bindings::BindingState; use crate::mcp_registration::McpState; use crate::skill_pack::SkillPackState; -use crate::{hook, hooks_settings, integration_bindings, mcp_registration, skill_pack}; +use crate::{ + hook, hooks_settings, instructions, integration_bindings, mcp_registration, skill_pack, +}; /// Run `loomweave doctor`. Returns `Ok(true)` iff every orientation surface is /// healthy after any requested repairs. @@ -66,6 +70,7 @@ pub fn run(path: &Path, fix: bool, json_output: bool) -> Result { tally += check_skill(&project_root, fix); tally += check_hook(&project_root, fix); tally += check_mcp(&project_root, fix); + tally += check_instructions(&project_root, fix); tally += check_integration_bindings(&project_root, fix); println!("--- index ---"); @@ -155,6 +160,7 @@ fn json_report(project_root: &Path, fix: bool) -> DoctorJsonReport { check_skill_json(project_root, fix), check_hook_json(project_root, fix), check_mcp_json(project_root, fix), + check_instructions_json(project_root, fix), check_http_config_json(project_root), check_filigree_url_json(project_root), check_sei_population_json(project_root), @@ -172,6 +178,9 @@ fn json_report(project_root: &Path, fix: bool) -> DoctorJsonReport { "hook.session_start" => { "Run `loomweave doctor --fix` or `loomweave install --hooks`.".to_owned() } + "instructions.block" => { + "Run `loomweave doctor --fix` or `loomweave install --instructions`.".to_owned() + } "mcp.registration" | "integration.bindings" => { "Run `loomweave doctor --fix`.".to_owned() } @@ -519,6 +528,54 @@ fn check_mcp_hygiene_json() -> DoctorJsonCheck { ) } +fn check_instructions_json(project_root: &Path, fix: bool) -> DoctorJsonCheck { + match instructions::instructions_state(project_root) { + InstructionsState::UpToDate => DoctorJsonCheck::ok( + "instructions.block", + "agent-orientation block present in CLAUDE.md + AGENTS.md", + ), + InstructionsState::Missing => { + let what = "agent-orientation block missing from CLAUDE.md / AGENTS.md"; + if !fix { + // Optional surface: absence is a warning, not a gate failure. + return DoctorJsonCheck::warning("instructions.block", what); + } + repair_instructions_json(project_root, what) + } + state => { + let what = match state { + InstructionsState::Drifted => { + "agent-orientation block drifted from the bundled copy" + } + InstructionsState::Malformed => { + "agent-orientation block malformed (dangling loomweave marker)" + } + InstructionsState::UpToDate | InstructionsState::Missing => unreachable!(), + }; + if !fix { + return DoctorJsonCheck::problem("instructions.block", what); + } + repair_instructions_json(project_root, what) + } + } +} + +fn repair_instructions_json(project_root: &Path, what: &str) -> DoctorJsonCheck { + match instructions::install_instructions(project_root) { + Ok(_) if instructions::instructions_state(project_root) == InstructionsState::UpToDate => { + DoctorJsonCheck::fixed("instructions.block", format!("{what}; fixed")) + } + Ok(_) => DoctorJsonCheck::problem( + "instructions.block", + format!("{what}; repair did not converge"), + ), + Err(err) => DoctorJsonCheck::problem( + "instructions.block", + format!("{what}; repair failed: {err}"), + ), + } +} + fn check_integration_bindings_json(project_root: &Path, fix: bool) -> DoctorJsonCheck { match integration_bindings::binding_state(project_root) { BindingState::Present => DoctorJsonCheck::ok( @@ -727,6 +784,54 @@ fn check_mcp(project_root: &Path, fix: bool) -> Tally { } } +fn check_instructions(project_root: &Path, fix: bool) -> Tally { + match instructions::instructions_state(project_root) { + InstructionsState::UpToDate => { + ok("agent-orientation block present in CLAUDE.md + AGENTS.md") + } + // Optional surface: the same guidance ships via the MCP preamble and the + // loomweave-workflow skill, so a missing block is advisory — never a gate + // failure. Mirrors the integration-bindings severity model. + InstructionsState::Missing => { + let what = "agent-orientation block missing from CLAUDE.md / AGENTS.md"; + if !fix { + return warn(what, Some("loomweave install --instructions")); + } + repair_instructions(project_root, what) + } + // Drifted / Malformed fail the gate: a stale or dangling block is a + // genuinely broken state. The repair is safe because it rewrites only + // Loomweave's own marker span. + state => { + let what = match state { + InstructionsState::Drifted => { + "agent-orientation block drifted from the bundled copy" + } + InstructionsState::Malformed => { + "agent-orientation block malformed (dangling loomweave marker)" + } + InstructionsState::UpToDate | InstructionsState::Missing => unreachable!(), + }; + if !fix { + return problem(what, Some("loomweave doctor --fix")); + } + repair_instructions(project_root, what) + } + } +} + +/// Shared `--fix` repair for the instructions block: re-inject, then re-classify +/// to confirm convergence. +fn repair_instructions(project_root: &Path, what: &str) -> Tally { + match instructions::install_instructions(project_root) { + Ok(_) if instructions::instructions_state(project_root) == InstructionsState::UpToDate => { + ok(&format!("{what} — fixed")) + } + Ok(_) => problem(&format!("{what} — repair did not converge"), None), + Err(err) => problem(&format!("{what} — repair failed: {err}"), None), + } +} + fn check_integration_bindings(project_root: &Path, fix: bool) -> Tally { match integration_bindings::binding_state(project_root) { BindingState::Present => { diff --git a/crates/loomweave-cli/src/install.rs b/crates/loomweave-cli/src/install.rs index f8f5abcc..55c5c775 100644 --- a/crates/loomweave-cli/src/install.rs +++ b/crates/loomweave-cli/src/install.rs @@ -131,6 +131,7 @@ pub enum InstallComponent { Skills, CodexSkills, Hooks, + Instructions, } /// What `loomweave install` should do, resolved from the CLI flags. @@ -155,6 +156,7 @@ pub enum InstallPlan { skills: bool, codex_skills: bool, hooks: bool, + instructions: bool, }, /// No flags or `--all`: initialise `.loomweave/` + every integration. All, @@ -176,6 +178,7 @@ impl InstallPlan { skills: components.contains(&InstallComponent::Skills), codex_skills: components.contains(&InstallComponent::CodexSkills), hooks: components.contains(&InstallComponent::Hooks), + instructions: components.contains(&InstallComponent::Instructions), } } } @@ -229,6 +232,20 @@ impl InstallPlan { pub fn hooks(self) -> bool { matches!(self, Self::All | Self::Components { hooks: true, .. }) } + + /// Whether to inject the agent-orientation block into `CLAUDE.md` / + /// `AGENTS.md`. + #[must_use] + pub fn instructions(self) -> bool { + matches!( + self, + Self::All + | Self::Components { + instructions: true, + .. + } + ) + } } /// Run the `install` subcommand. @@ -283,6 +300,10 @@ pub fn run( install_hooks(&project_root)?; } + if plan.instructions() { + install_instruction_blocks(&project_root)?; + } + if matches!(plan, InstallPlan::All) { install_integration_bindings(&project_root)?; } @@ -300,10 +321,11 @@ fn validate_plan(plan: InstallPlan) -> Result<()> { && !plan.skills() && !plan.codex_skills() && !plan.hooks() + && !plan.instructions() { bail!( "nothing to install: pass --claude-code, --codex, --skills, \ - --codex-skills, --hooks, --all, \ + --codex-skills, --hooks, --instructions, --all, \ or run bare `loomweave install` to do everything." ); } @@ -445,6 +467,20 @@ fn install_hooks(project_root: &Path) -> Result<()> { Ok(()) } +fn install_instruction_blocks(project_root: &Path) -> Result<()> { + let report = crate::instructions::install_instructions(project_root) + .context("inject loomweave instructions into CLAUDE.md / AGENTS.md")?; + if report.changed { + println!( + "Injected loomweave instructions block into {}/{{CLAUDE,AGENTS}}.md", + project_root.display() + ); + } else { + println!("loomweave instructions block already up to date"); + } + Ok(()) +} + fn install_integration_bindings(project_root: &Path) -> Result<()> { let changed = crate::integration_bindings::install_bindings(project_root) .context("install local Loomweave/Filigree/Wardline integration bindings")?; @@ -504,6 +540,7 @@ mod tests { assert!(naked.skills()); assert!(naked.codex_skills()); assert!(naked.hooks()); + assert!(naked.instructions()); // --skills: skills only, no init. let skills = InstallPlan::from_components(false, &[InstallComponent::Skills]); @@ -514,7 +551,8 @@ mod tests { codex: false, skills: true, codex_skills: false, - hooks: false + hooks: false, + instructions: false } ); assert!(!skills.init_loomweave()); @@ -523,6 +561,24 @@ mod tests { assert!(skills.skills()); assert!(!skills.codex_skills()); assert!(!skills.hooks()); + assert!(!skills.instructions()); + + // --instructions: instruction blocks only, no init. + let instr = InstallPlan::from_components(false, &[InstallComponent::Instructions]); + assert_eq!( + instr, + InstallPlan::Components { + claude_code: false, + codex: false, + skills: false, + codex_skills: false, + hooks: false, + instructions: true + } + ); + assert!(!instr.init_loomweave()); + assert!(instr.instructions()); + assert!(!instr.skills()); // --hooks: hooks only, no init. let hooks = InstallPlan::from_components(false, &[InstallComponent::Hooks]); @@ -533,7 +589,8 @@ mod tests { codex: false, skills: false, codex_skills: false, - hooks: true + hooks: true, + instructions: false } ); assert!(!hooks.init_loomweave()); @@ -552,6 +609,7 @@ mod tests { assert!(all.skills()); assert!(all.codex_skills()); assert!(all.hooks()); + assert!(all.instructions()); // Multiple component flags: selected components only, still no init. let both = InstallPlan::from_components( @@ -562,6 +620,7 @@ mod tests { InstallComponent::Skills, InstallComponent::CodexSkills, InstallComponent::Hooks, + InstallComponent::Instructions, ], ); assert_eq!( @@ -571,7 +630,8 @@ mod tests { codex: true, skills: true, codex_skills: true, - hooks: true + hooks: true, + instructions: true } ); assert!(!both.init_loomweave()); @@ -580,6 +640,7 @@ mod tests { assert!(both.skills()); assert!(both.codex_skills()); assert!(both.hooks()); + assert!(both.instructions()); } #[test] @@ -595,12 +656,14 @@ mod tests { &[InstallComponent::Skills], &[InstallComponent::CodexSkills], &[InstallComponent::Hooks], + &[InstallComponent::Instructions], &[ InstallComponent::ClaudeCode, InstallComponent::Codex, InstallComponent::Skills, InstallComponent::CodexSkills, InstallComponent::Hooks, + InstallComponent::Instructions, ], ]; for all in [false, true] { @@ -612,7 +675,8 @@ mod tests { || plan.codex() || plan.skills() || plan.codex_skills() - || plan.hooks(), + || plan.hooks() + || plan.instructions(), "from_components({all}, {components:?}) produced a do-nothing plan: {plan:?}" ); } diff --git a/crates/loomweave-cli/src/instructions.rs b/crates/loomweave-cli/src/instructions.rs new file mode 100644 index 00000000..e08b5b1d --- /dev/null +++ b/crates/loomweave-cli/src/instructions.rs @@ -0,0 +1,765 @@ +//! Loomweave-owned agent-orientation block injected into `CLAUDE.md` / +//! `AGENTS.md`, plus its idempotent installer and read-only health check. +//! +//! Like Filigree, Loomweave *pushes* a small managed marker-block into the +//! always-loaded `CLAUDE.md` / `AGENTS.md` context so an agent learns to ask +//! Loomweave's MCP tools before re-grepping the tree. Unlike the skill pack +//! (whose asset is owned by `loomweave-mcp`), this asset is cli-local — there +//! is no MCP owner for it — and is embedded with `include_str!`, matching the +//! embedding convention in [`crate::skill_pack`]. +//! +//! ## Coexistence is the whole point +//! +//! Every file Loomweave writes here **already** contains another tool's block: +//! this repo's own `AGENTS.md` holds Filigree's `` +//! span (and Wardline's). Loomweave therefore *never* owns the tail of the file, +//! so the installer must touch **only** its own +//! ``…`` span and +//! must not delete or move a single byte outside it. In particular it does NOT +//! copy Filigree's truncate-from-start-marker-to-EOF malformed recovery, which +//! is a data-loss bug in a two-block file. See [`install_instructions`]. +//! +//! ## Drift signal +//! +//! Drift is the block-body content compared byte-for-byte against the embedded +//! [`INSTRUCTIONS_BODY`], **not** the marker version string — so a workspace +//! version bump on byte-identical content does not report drift. This mirrors +//! [`crate::skill_pack`]'s fingerprint philosophy; the `v{version}` in the start +//! marker is human-readable provenance only. + +use std::fs; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result, bail}; + +/// Embedded, cli-local instructions body. Deliberately thin: it is +/// always-loaded context competing with the `loomweave-workflow` skill that +/// says the same thing, so it is a pointer, not a manual. +const INSTRUCTIONS_BODY: &str = include_str!("../assets/instructions/loomweave.md"); + +/// Detection prefix for Loomweave's start marker. The full marker carries a +/// `:v{version}:{hash}` provenance suffix (see [`start_marker`]); detection +/// keys only on this prefix so a provenance change is still recognised as the +/// same block. Never collides with `"; + +/// The two project-root files Loomweave manages a block in. +const TARGET_FILES: &[&str] = &["CLAUDE.md", "AGENTS.md"]; + +/// The canonical body bytes that live inside the span. `include_str!` keeps the +/// asset's trailing newline; we trim trailing whitespace so the drift compare +/// is invariant to how the asset file happens to end. This is the single source +/// of truth for both render ([`render_block`]) and extract ([`locate_span`]). +fn canonical_body() -> &'static str { + INSTRUCTIONS_BODY.trim_end() +} + +/// First 8 hex chars of the blake3 digest over [`canonical_body`] — provenance +/// only, stamped into the start marker; not the drift signal. +fn body_hash_prefix() -> String { + let digest = blake3::hash(canonical_body().as_bytes()); + digest.to_hex()[..8].to_owned() +} + +/// The full provenance start-marker line (no trailing newline). +fn start_marker() -> String { + format!( + "", + env!("CARGO_PKG_VERSION"), + body_hash_prefix() + ) +} + +/// Render the complete block (start marker + body + end marker), newline-pinned. +/// +/// Exactly one newline sits at each boundary: after the start marker, between +/// the body and the end marker. [`locate_span`] is the precise inverse, so a +/// freshly rendered block round-trips to [`canonical_body`] with no drift. +fn render_block() -> String { + format!("{}\n{}\n{}", start_marker(), canonical_body(), END_MARKER) +} + +/// Read-only health of the Loomweave block across both [`TARGET_FILES`], for +/// `loomweave doctor`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum InstructionsState { + /// Every target file holds a well-formed block whose body matches the + /// embedded bytes. + UpToDate, + /// At least one target file is missing the block entirely (and no file is + /// in a worse state). A first-class but *optional* surface: the same + /// guidance is delivered by the MCP preamble and the skill, so a project + /// that omits the block is still healthy. Doctor treats this as a + /// **warning**. + Missing, + /// Every file that should hold a block has a well-formed one, but at least + /// one block's body differs from the embedded bytes (a stale copy from an + /// older binary, or hand-edited). Doctor treats this as a **problem** + /// (auto-repaired with `--fix`). + Drifted, + /// At least one target file has a malformed block — a dangling start marker + /// with no following end marker, or an end marker preceding its start. + /// Doctor treats this as a **problem**; the repair is safe because it only + /// rewrites Loomweave's own span. + Malformed, +} + +/// Classify one file's Loomweave block without writing. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum FileState { + /// No start marker present. + Absent, + /// Well-formed block whose body matches the embedded bytes. + Current, + /// Well-formed block whose body differs from the embedded bytes. + Drifted, + /// Start marker present without a following end marker (or markers are + /// mis-ordered). + Malformed, +} + +/// Aggregate per-file states into a single [`InstructionsState`]. +/// +/// Precedence is **severity-ordered**, high → low: `Malformed` > `Drifted` > +/// `Missing` > `UpToDate`. This deliberately differs from +/// [`crate::skill_pack`]'s "Missing first" rule: here `Missing` is only a +/// warning while `Drifted`/`Malformed` fail the gate, so a missing block must +/// never mask a gate-failing drifted/malformed one. +fn aggregate(states: &[FileState]) -> InstructionsState { + if states.iter().any(|s| matches!(s, FileState::Malformed)) { + InstructionsState::Malformed + } else if states.iter().any(|s| matches!(s, FileState::Drifted)) { + InstructionsState::Drifted + } else if states.iter().any(|s| matches!(s, FileState::Absent)) { + InstructionsState::Missing + } else { + InstructionsState::UpToDate + } +} + +/// Classify the Loomweave block across both [`TARGET_FILES`] without writing. +#[must_use] +pub fn instructions_state(project_root: &Path) -> InstructionsState { + let states: Vec = TARGET_FILES + .iter() + .map(|name| file_state(&project_root.join(name))) + .collect(); + aggregate(&states) +} + +/// Classify a single target file. A file that does not exist is [`Absent`] +/// (the installer will create it); an unreadable file is treated as `Absent` +/// too, so the repair path attempts a fresh write rather than wedging. +/// +/// [`Absent`]: FileState::Absent +fn file_state(path: &Path) -> FileState { + let Ok(content) = fs::read_to_string(path) else { + return FileState::Absent; + }; + match locate_span(&content) { + Span::Absent => FileState::Absent, + Span::Malformed => FileState::Malformed, + Span::WellFormed { body, .. } => { + if body == canonical_body() { + FileState::Current + } else { + FileState::Drifted + } + } + } +} + +/// Where (and whether) a well-ordered Loomweave block sits in `content`. +enum Span { + /// No start marker line present. + Absent, + /// Start marker present without a following end marker, or mis-ordered. + Malformed, + /// A well-ordered block. `start` is the byte offset of the start-marker + /// line; `end` is the byte offset just past the end-marker line (including + /// its trailing newline if any). `body` is the extracted block body, + /// trailing-newline-trimmed, for the drift compare. + WellFormed { + start: usize, + end: usize, + body: String, + }, +} + +/// Locate Loomweave's block by scanning **whole lines** — never a bare `-->` +/// substring scan, which could match Filigree's or Wardline's end marker. The +/// start marker is the first line whose trimmed form starts with +/// [`START_PREFIX`]; the end marker is the first line *strictly after* it whose +/// trimmed form equals [`END_MARKER`]. +fn locate_span(content: &str) -> Span { + let mut start: Option<(usize, usize)> = None; // (line_start_byte, line_end_byte) + let mut offset = 0usize; + for line in content.split_inclusive('\n') { + let trimmed = line.trim(); + let line_start = offset; + let line_end = offset + line.len(); + offset = line_end; + + match start { + // Still scanning for the start marker. + None => { + if trimmed.starts_with(START_PREFIX) { + start = Some((line_start, line_end)); + } + } + // Start marker already seen; the first matching end-marker line closes + // the span. + Some((span_start, body_start)) if trimmed == END_MARKER => { + // Body is everything between the start-marker line and the + // end-marker line; trim a single trailing newline so it round-trips + // against `canonical_body` (which has no trailing newline). + let raw_body = &content[body_start..line_start]; + let body = raw_body.strip_suffix('\n').unwrap_or(raw_body).to_owned(); + return Span::WellFormed { + start: span_start, + end: line_end, + body, + }; + } + Some(_) => {} + } + } + match start { + // Start marker found but never a following end marker → dangling. + Some(_) => Span::Malformed, + None => Span::Absent, + } +} + +/// Outcome of an [`install_instructions`] call. +#[derive(Debug, Clone, Copy)] +pub struct InstructionsInstallReport { + /// True if any target file's bytes were (re)written this call; false if + /// every file already held the current well-formed block. + pub changed: bool, +} + +/// Inject (or repair) the Loomweave block into both [`TARGET_FILES`] under +/// `project_root`, idempotently. Doubles as the `doctor --fix` repair. +/// +/// Per-file behaviour, touching **only** Loomweave's own span: +/// +/// - **Replace** when a well-ordered `START_PREFIX`…`END_MARKER` span exists: +/// rewrite exactly that span, leaving every byte outside it (e.g. a +/// coexisting Filigree block) untouched. A no-op when the body already +/// matches. +/// - **Append** when no start marker is present: append the block (separated by +/// a blank line) to the file's existing content, which is left intact. +/// - **Dangling start marker** (start present, no following end): do **not** +/// truncate to EOF (that would eat a coexisting Filigree block). Strip only +/// the orphaned start-marker line and append a fresh well-formed block; all +/// other bytes — including the orphaned prose body, left as loose text — +/// survive. +/// +/// Writes are atomic (temp + rename in the same directory, preserving the +/// existing file mode) and reject a symlinked target. +/// +/// # Errors +/// +/// Returns an error if a target is a symlink, or if any read, temp write, or +/// rename fails. +pub fn install_instructions(project_root: &Path) -> Result { + let mut changed = false; + for name in TARGET_FILES { + let path = project_root.join(name); + changed |= install_into_file(&path) + .with_context(|| format!("inject loomweave instructions into {}", path.display()))?; + } + Ok(InstructionsInstallReport { changed }) +} + +fn install_into_file(path: &Path) -> Result { + reject_symlink(path)?; + + let existing = match fs::read_to_string(path) { + Ok(content) => Some(content), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => None, + Err(err) => { + return Err(err).with_context(|| format!("read {}", path.display())); + } + }; + + let block = render_block(); + let new_content = match existing.as_deref() { + None => format!("{block}\n"), + Some(content) => match locate_span(content) { + Span::WellFormed { start, end, body } => { + if body == canonical_body() { + // Already current — no-op, even if the provenance marker + // version differs (drift is body-only). + return Ok(false); + } + splice_span(content, start, end, &block) + } + Span::Absent => append_block(content, &block), + Span::Malformed => { + // Dangling start marker: strip only the orphan start-marker + // line, then append a fresh block. Never truncate to EOF. + let stripped = strip_start_marker_line(content); + append_block(&stripped, &block) + } + }, + }; + + if existing.as_deref() == Some(new_content.as_str()) { + return Ok(false); + } + atomic_write(path, &new_content)?; + Ok(true) +} + +/// Replace `content[start..end]` with `block`, normalising so the replacement +/// span ends in exactly one newline (the original end-marker line may or may +/// not have carried one at EOF). +fn splice_span(content: &str, start: usize, end: usize, block: &str) -> String { + let mut out = String::with_capacity(content.len()); + out.push_str(&content[..start]); + out.push_str(block); + let tail = &content[end..]; + // The located `end` is just past the end-marker line's trailing newline (if + // present). Preserve whatever followed it verbatim; guarantee a newline + // between our end marker and that tail when the tail is non-empty. + if tail.is_empty() { + out.push('\n'); + } else { + out.push('\n'); + out.push_str(tail); + } + out +} + +/// Append `block` to `content`, separated by a blank line, with a trailing +/// newline. `content`'s existing bytes are preserved verbatim. +fn append_block(content: &str, block: &str) -> String { + if content.is_empty() { + return format!("{block}\n"); + } + let sep = if content.ends_with("\n\n") { + "" + } else if content.ends_with('\n') { + "\n" + } else { + "\n\n" + }; + format!("{content}{sep}{block}\n") +} + +/// Remove **every** line whose trimmed form starts with [`START_PREFIX`]. +/// Every other byte — including any orphaned body that followed it — is kept. +/// +/// This is only reached from the [`Span::Malformed`] branch, where +/// [`locate_span`] returned `Malformed` precisely because no end marker follows +/// the first start marker — so *every* start marker in the file is orphaned by +/// definition. Stripping only the first would leave a second dangling start +/// behind; on the next install/doctor run [`locate_span`] would pair that +/// leftover orphan with the freshly-appended block's end marker, forming a +/// well-formed span that engulfs (and deletes) everything between — including a +/// co-resident Filigree block. Removing all orphan starts converges in one pass +/// and never eats a neighbouring tool's block. +fn strip_start_marker_line(content: &str) -> String { + let mut out = String::with_capacity(content.len()); + for line in content.split_inclusive('\n') { + if line.trim().starts_with(START_PREFIX) { + continue; + } + out.push_str(line); + } + out +} + +/// Reject a symlinked target so temp+rename never silently converts a link into +/// a regular file. A non-existent path is fine (we create it). +fn reject_symlink(path: &Path) -> Result<()> { + match fs::symlink_metadata(path) { + Ok(meta) if meta.file_type().is_symlink() => { + bail!( + "refusing to write {}: it is a symlink (resolve it by hand, then re-run)", + path.display() + ); + } + Ok(_) => Ok(()), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()), + Err(err) => Err(err).with_context(|| format!("stat {}", path.display())), + } +} + +/// Atomically write `content` to `path`: stage into a sibling temp file in the +/// same directory (so `rename` stays on one filesystem), preserve the existing +/// file mode when the target already exists, then `rename` over the target. +fn atomic_write(path: &Path, content: &str) -> Result<()> { + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + fs::create_dir_all(parent).with_context(|| format!("mkdir {}", parent.display()))?; + + let file_name = path.file_name().map_or_else( + || "instructions".to_owned(), + |n| n.to_string_lossy().into_owned(), + ); + let temp_path: PathBuf = parent.join(format!( + ".{}.loomweave.tmp-{}", + file_name, + std::process::id() + )); + + // Cleanup guard: drop the staged temp file if any step after creating it + // fails, so a failed write never leaks a `.tmp-*` sibling. + if let Err(err) = write_temp_then_rename(&temp_path, path, content) { + let _ = fs::remove_file(&temp_path); + return Err(err); + } + Ok(()) +} + +fn write_temp_then_rename(temp_path: &Path, path: &Path, content: &str) -> Result<()> { + fs::write(temp_path, content).with_context(|| format!("write {}", temp_path.display()))?; + #[cfg(unix)] + preserve_mode(path, temp_path)?; + fs::rename(temp_path, path) + .with_context(|| format!("rename {} -> {}", temp_path.display(), path.display()))?; + Ok(()) +} + +/// Copy the existing file's permission bits onto the staged temp file so the +/// rename preserves mode. A no-op when the target does not yet exist. +#[cfg(unix)] +fn preserve_mode(path: &Path, temp_path: &Path) -> Result<()> { + use std::os::unix::fs::PermissionsExt; + + let Ok(meta) = fs::metadata(path) else { + return Ok(()); + }; + let mode = meta.permissions().mode(); + fs::set_permissions(temp_path, fs::Permissions::from_mode(mode)) + .with_context(|| format!("preserve mode on {}", temp_path.display())) +} + +#[cfg(test)] +mod tests { + use super::{ + END_MARKER, INSTRUCTIONS_BODY, InstructionsState, START_PREFIX, canonical_body, + install_instructions, instructions_state, render_block, + }; + + /// A representative Filigree two-block neighbour, taken verbatim in shape + /// from this repo's own `AGENTS.md`. The coexistence tests assert these + /// bytes survive every operation untouched. + const FILIGREE_BLOCK: &str = "\n\ +## Filigree Issue Tracker\n\ +\n\ +filigree tracks tasks for this project.\n\ +\n"; + + #[test] + fn asset_is_thin_and_pointer_shaped() { + // The plan caps the always-loaded body at ~15-25 lines: a pointer, not + // a manual. Guard against it growing into a second skill. + let lines = INSTRUCTIONS_BODY.lines().count(); + assert!( + lines <= 30, + "instructions body grew to {lines} lines; keep it thin (a pointer)" + ); + assert!(INSTRUCTIONS_BODY.contains("mcp__loomweave__")); + assert!(INSTRUCTIONS_BODY.contains("loomweave-workflow")); + } + + #[test] + fn start_prefix_is_not_a_prefix_of_end_marker() { + // Detection keys the start on START_PREFIX and the end on an exact + // END_MARKER line; the `/` keeps the end marker from matching the start + // prefix. Pin that invariant. + assert!(!END_MARKER.starts_with(START_PREFIX)); + } + + #[test] + fn render_round_trips_to_canonical_body() { + let block = render_block(); + assert!(block.starts_with(START_PREFIX)); + assert!(block.ends_with(END_MARKER)); + // Wrapping the rendered block in a file and re-extracting must yield the + // canonical body, or idempotency breaks (install -> Drifted -> "fix" + // every run). + let file = format!("prefix\n\n{block}\n"); + let state = super::locate_span(&file); + match state { + super::Span::WellFormed { body, .. } => assert_eq!(body, canonical_body()), + _ => panic!("rendered block did not locate as well-formed"), + } + } + + #[test] + fn create_when_no_file() { + let dir = tempfile::tempdir().unwrap(); + let report = install_instructions(dir.path()).unwrap(); + assert!(report.changed, "first install should write"); + for name in ["CLAUDE.md", "AGENTS.md"] { + let body = std::fs::read_to_string(dir.path().join(name)).unwrap(); + assert!(body.starts_with(START_PREFIX), "{name} missing block"); + assert!(body.trim_end().ends_with(END_MARKER)); + } + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + } + + #[test] + fn install_is_idempotent() { + let dir = tempfile::tempdir().unwrap(); + assert!(install_instructions(dir.path()).unwrap().changed); + let second = install_instructions(dir.path()).unwrap(); + assert!( + !second.changed, + "second install must be a no-op on byte-identical body" + ); + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + } + + #[test] + fn append_preserves_prior_content() { + let dir = tempfile::tempdir().unwrap(); + let prior = "# Project notes\n\nSome existing prose.\n"; + for name in ["CLAUDE.md", "AGENTS.md"] { + std::fs::write(dir.path().join(name), prior).unwrap(); + } + assert!(install_instructions(dir.path()).unwrap().changed); + for name in ["CLAUDE.md", "AGENTS.md"] { + let body = std::fs::read_to_string(dir.path().join(name)).unwrap(); + assert!(body.starts_with(prior), "prior content not preserved"); + assert!(body.contains(START_PREFIX)); + } + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + } + + #[test] + fn replace_rewrites_on_drift_only() { + let dir = tempfile::tempdir().unwrap(); + install_instructions(dir.path()).unwrap(); + // Hand-edit the body inside the Loomweave span on one file. + let claude = dir.path().join("CLAUDE.md"); + let content = std::fs::read_to_string(&claude).unwrap(); + let drifted = content.replace("code archaeology", "DRIFTED HEADER"); + assert_ne!(drifted, content, "test setup: substitution must apply"); + std::fs::write(&claude, &drifted).unwrap(); + assert_eq!(instructions_state(dir.path()), InstructionsState::Drifted); + + let report = install_instructions(dir.path()).unwrap(); + assert!(report.changed, "drift must trigger a rewrite"); + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + } + + #[test] + fn state_missing_before_install() { + let dir = tempfile::tempdir().unwrap(); + assert_eq!(instructions_state(dir.path()), InstructionsState::Missing); + } + + #[test] + fn state_missing_when_one_file_lacks_block() { + let dir = tempfile::tempdir().unwrap(); + install_instructions(dir.path()).unwrap(); + // Remove the block from AGENTS.md entirely. + std::fs::write(dir.path().join("AGENTS.md"), "# just notes\n").unwrap(); + assert_eq!(instructions_state(dir.path()), InstructionsState::Missing); + } + + /// The headline coexistence guarantee: a file pre-seeded with a Filigree + /// block survives create / append / replace / malformed round-trips with + /// Filigree's bytes untouched. + #[test] + fn filigree_block_survives_every_operation() { + let dir = tempfile::tempdir().unwrap(); + let claude = dir.path().join("CLAUDE.md"); + let agents = dir.path().join("AGENTS.md"); + + // Seed both files with only the Filigree block (the append/create case). + std::fs::write(&claude, FILIGREE_BLOCK).unwrap(); + std::fs::write(&agents, FILIGREE_BLOCK).unwrap(); + + // 1. Append: Loomweave block added, Filigree bytes intact. + install_instructions(dir.path()).unwrap(); + for path in [&claude, &agents] { + let body = std::fs::read_to_string(path).unwrap(); + assert!( + body.contains(FILIGREE_BLOCK), + "filigree block lost on append" + ); + assert!(body.contains(START_PREFIX), "loomweave block missing"); + } + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + + // 2. Replace (drift): edit the Loomweave body; Filigree still survives. + let content = std::fs::read_to_string(&claude).unwrap(); + let drifted = content.replace("code archaeology", "EDITED"); + std::fs::write(&claude, &drifted).unwrap(); + assert_eq!(instructions_state(dir.path()), InstructionsState::Drifted); + install_instructions(dir.path()).unwrap(); + let repaired = std::fs::read_to_string(&claude).unwrap(); + assert!( + repaired.contains(FILIGREE_BLOCK), + "filigree block lost on drift repair" + ); + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + + // 3. Malformed (dangling Loomweave start marker, with the Filigree block + // present): repair must NOT truncate to EOF and eat Filigree. + let dangling = format!( + "{FILIGREE_BLOCK}\n\nstale orphan body\n" + ); + std::fs::write(&agents, &dangling).unwrap(); + assert_eq!(instructions_state(dir.path()), InstructionsState::Malformed); + install_instructions(dir.path()).unwrap(); + let fixed = std::fs::read_to_string(&agents).unwrap(); + assert!( + fixed.contains(FILIGREE_BLOCK), + "filigree block eaten by dangling-marker repair" + ); + assert!( + fixed.contains("stale orphan body"), + "orphaned body should be left as loose prose, not deleted" + ); + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + } + + /// Regression: two dangling Loomweave start markers (no intervening end + /// marker) co-resident with a Filigree block. The Malformed-branch repair + /// must strip BOTH orphan starts, not just the first — otherwise the leftover + /// orphan re-pairs with the freshly-appended block's end marker on a later + /// run, forming a well-formed span that engulfs and deletes the Filigree + /// block (silent data loss) and never converges. Asserts (a) Filigree bytes + /// survive and (b) the repair reaches a fixed point in a single pass. + #[test] + fn two_dangling_starts_with_filigree_block_converge_in_one_pass() { + let dir = tempfile::tempdir().unwrap(); + let agents = dir.path().join("AGENTS.md"); + // Give the other target file a clean block so AGENTS.md is the only + // malformed file driving the aggregate state. + install_instructions(dir.path()).unwrap(); + + // AGENTS.md: TWO dangling loomweave start markers (no end marker + // between them) sitting BEFORE the Filigree block (bad copy-paste / + // merge artifact). The ordering is load-bearing: it puts the leftover + // orphan start on the near side of the Filigree block, so the buggy + // strip-first path leaves an orphan that — on the next run — pairs with + // the appended block's end marker and engulfs (deletes) the Filigree + // bytes. Assertion (a) below then fails on the unfixed code, exercising + // the literal data-loss mechanism, not merely non-convergence. + let doubled = format!( + "\n\ + first orphan body\n\ + \n\ + second orphan body\n\ + \n\ + {FILIGREE_BLOCK}" + ); + std::fs::write(&agents, &doubled).unwrap(); + assert_eq!(instructions_state(dir.path()), InstructionsState::Malformed); + + // (a) Drive repeated install passes — the way `doctor --fix` runs over + // a project's lifetime. The data-loss mechanism only fires on the SECOND + // pass: the buggy strip-first repair leaves an orphan start that + // `locate_span` then pairs with pass-1's appended end marker, forming a + // well-formed span that engulfs the Filigree block, so pass 2's splice + // deletes it. Assert the Filigree bytes survive after EVERY pass, so the + // literal deletion is the load-bearing failure on the unfixed code. + for pass in 1..=3 { + install_instructions(dir.path()).unwrap(); + let after = std::fs::read_to_string(&agents).unwrap(); + assert!( + after.contains(FILIGREE_BLOCK), + "filigree block eaten by two-dangling-start repair on pass {pass}" + ); + } + + // (b) The repair reaches a fixed point: a single pass from Malformed must + // converge to UpToDate (not "repair did not converge"), and further + // passes are no-ops. + std::fs::write(&agents, &doubled).unwrap(); + install_instructions(dir.path()).unwrap(); + assert_eq!( + instructions_state(dir.path()), + InstructionsState::UpToDate, + "two-dangling-start repair must reach a fixed point in a single pass" + ); + let second = install_instructions(dir.path()).unwrap(); + assert!( + !second.changed, + "repaired file must be a stable fixed point (no further rewrite)" + ); + + let fixed = std::fs::read_to_string(&agents).unwrap(); + assert!( + fixed.contains(FILIGREE_BLOCK), + "filigree block must survive the converged repair" + ); + // Both orphaned bodies survive as loose prose; no bytes outside our span lost. + assert!(fixed.contains("first orphan body")); + assert!(fixed.contains("second orphan body")); + // Exactly one well-formed start marker remains (the appended block). + assert_eq!( + fixed.matches(START_PREFIX).count(), + 1, + "exactly one start marker must remain after stripping both orphans" + ); + } + + #[test] + fn dangling_start_marker_is_malformed_then_repaired() { + let dir = tempfile::tempdir().unwrap(); + let claude = dir.path().join("CLAUDE.md"); + let agents = dir.path().join("AGENTS.md"); + // One file gets a clean block so only the dangling file is malformed. + install_instructions(dir.path()).unwrap(); + std::fs::write( + &claude, + "# notes\n\norphan body, no end marker\n", + ) + .unwrap(); + let _ = &agents; + assert_eq!(instructions_state(dir.path()), InstructionsState::Malformed); + + install_instructions(dir.path()).unwrap(); + assert_eq!(instructions_state(dir.path()), InstructionsState::UpToDate); + let fixed = std::fs::read_to_string(&claude).unwrap(); + assert!(fixed.contains("# notes"), "leading content eaten"); + assert!( + fixed.contains("orphan body, no end marker"), + "orphan body should survive as loose prose" + ); + // Exactly one well-formed start marker remains. + assert_eq!(fixed.matches(START_PREFIX).count(), 1); + } + + #[cfg(unix)] + #[test] + fn symlink_target_is_rejected() { + use std::os::unix::fs::symlink; + let dir = tempfile::tempdir().unwrap(); + let real = dir.path().join("real.md"); + std::fs::write(&real, "real contents\n").unwrap(); + symlink(&real, dir.path().join("CLAUDE.md")).unwrap(); + let err = install_instructions(dir.path()).unwrap_err(); + assert!( + err.to_string().contains("symlink") + || err.chain().any(|c| c.to_string().contains("symlink")), + "expected a symlink rejection, got: {err}" + ); + } + + #[cfg(unix)] + #[test] + fn atomic_write_preserves_mode() { + use std::os::unix::fs::PermissionsExt; + let dir = tempfile::tempdir().unwrap(); + let claude = dir.path().join("CLAUDE.md"); + std::fs::write(&claude, "# notes\n").unwrap(); + std::fs::set_permissions(&claude, std::fs::Permissions::from_mode(0o640)).unwrap(); + install_instructions(dir.path()).unwrap(); + let mode = std::fs::metadata(&claude).unwrap().permissions().mode() & 0o777; + assert_eq!(mode, 0o640, "file mode not preserved across rewrite"); + } +} diff --git a/crates/loomweave-cli/src/main.rs b/crates/loomweave-cli/src/main.rs index 43c75988..47ed6b57 100644 --- a/crates/loomweave-cli/src/main.rs +++ b/crates/loomweave-cli/src/main.rs @@ -10,6 +10,7 @@ mod hooks_settings; mod http_read; mod install; mod instance; +mod instructions; mod integration_bindings; mod mcp_registration; mod run_lifecycle; @@ -42,6 +43,7 @@ fn main() -> Result<()> { skills, codex_skills, hooks, + instructions, all, } => { let mut components = Vec::new(); @@ -60,6 +62,9 @@ fn main() -> Result<()> { if hooks { components.push(install::InstallComponent::Hooks); } + if instructions { + components.push(install::InstallComponent::Instructions); + } install::run( &path, force, diff --git a/crates/loomweave-cli/tests/doctor.rs b/crates/loomweave-cli/tests/doctor.rs index 5a389027..92266329 100644 --- a/crates/loomweave-cli/tests/doctor.rs +++ b/crates/loomweave-cli/tests/doctor.rs @@ -169,6 +169,7 @@ fn doctor_fix_repairs_missing_three_way_integration_bindings() { "--codex-skills", "--hooks", "--claude-code", + "--instructions", ], dir.path(), ); @@ -392,6 +393,174 @@ fn doctor_flags_untrusted_mcp_command_without_clobbering_it() { ); } +/// Instructions severity model (plan decision #2, the product-judgment veto +/// point): `Missing` is a non-gating **warning** — the same guidance ships via +/// the MCP preamble and the loomweave-workflow skill, so a project that omits +/// the always-loaded block is still first-class. A fresh `--all` install holds +/// the block; deleting it from one target file drives the aggregate to Missing, +/// which must surface as a warning and still exit 0. +#[test] +fn doctor_reports_missing_instructions_block_as_warning() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + // Drop the Loomweave block from one target file -> aggregate is Missing. + fs::write(dir.path().join("AGENTS.md"), "# just notes\n").unwrap(); + + let (code, out) = doctor(dir.path(), false); + assert_eq!( + code, 0, + "a missing instructions block is an optional surface; must NOT fail the gate:\n{out}" + ); + assert!( + out.contains("⚠ agent-orientation block missing from CLAUDE.md / AGENTS.md"), + "missing block should surface as a warning:\n{out}" + ); + + // --fix re-injects the block; a plain re-run is then clean. + let (code, out) = doctor(dir.path(), true); + assert_eq!(code, 0, "--fix should repair and exit 0:\n{out}"); + assert!( + out.contains("agent-orientation block missing from CLAUDE.md / AGENTS.md — fixed"), + "stdout:\n{out}" + ); + let (code, _) = doctor(dir.path(), false); + assert_eq!(code, 0, "repaired project must be healthy on re-run"); +} + +/// `Drifted` -> **problem**: a stale block body fails the gate without `--fix` +/// and is auto-repaired with `--fix`. This pins the one branch that actually +/// gates the doctor exit code; a refactor flipping Drifted to a warning would +/// otherwise pass the suite undetected. +#[test] +fn doctor_reports_drifted_instructions_block_as_gating_problem() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + // Hand-edit the body inside the Loomweave span -> Drifted. + let claude = dir.path().join("CLAUDE.md"); + let content = fs::read_to_string(&claude).unwrap(); + let drifted = content.replace("code archaeology", "DRIFTED HEADER"); + assert_ne!(drifted, content, "test setup: substitution must apply"); + fs::write(&claude, &drifted).unwrap(); + + let (code, out) = doctor(dir.path(), false); + assert_eq!( + code, 1, + "a drifted instructions block must FAIL the doctor gate without --fix:\n{out}" + ); + assert!( + out.contains("agent-orientation block drifted from the bundled copy"), + "stdout:\n{out}" + ); + + let (code, out) = doctor(dir.path(), true); + assert_eq!(code, 0, "--fix should repair drift and exit 0:\n{out}"); + assert!( + out.contains("agent-orientation block drifted from the bundled copy — fixed"), + "stdout:\n{out}" + ); + let (code, _) = doctor(dir.path(), false); + assert_eq!(code, 0, "repaired project must be healthy on re-run"); +} + +/// `Malformed` -> **problem**: a dangling Loomweave start marker (no following +/// end marker) fails the gate without `--fix`, and `--fix` repairs it without +/// truncating to EOF. +#[test] +fn doctor_reports_malformed_instructions_block_as_gating_problem() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + // Replace one target file's block with a dangling start marker. + fs::write( + dir.path().join("CLAUDE.md"), + "# notes\n\norphan body, no end marker\n", + ) + .unwrap(); + + let (code, out) = doctor(dir.path(), false); + assert_eq!( + code, 1, + "a malformed instructions block must FAIL the doctor gate without --fix:\n{out}" + ); + assert!( + out.contains("agent-orientation block malformed (dangling loomweave marker)"), + "stdout:\n{out}" + ); + + let (code, out) = doctor(dir.path(), true); + assert_eq!( + code, 0, + "--fix should repair the malformed block and exit 0:\n{out}" + ); + let fixed = fs::read_to_string(dir.path().join("CLAUDE.md")).unwrap(); + assert!( + fixed.contains("# notes"), + "leading content must survive the repair:\n{fixed}" + ); + assert!( + fixed.contains("orphan body, no end marker"), + "orphaned body must survive as loose prose:\n{fixed}" + ); + let (code, _) = doctor(dir.path(), false); + assert_eq!(code, 0, "repaired project must be healthy on re-run"); +} + +/// JSON surface: pin the `instructions.block` check shape. Healthy install -> +/// status `ok`, `fixed: false`; a drifted block -> status `problem` and the run +/// aggregates to `ok: false`. The healthy-install json shape test omits this +/// check, leaving the status string and `fixed` flag unverified. +#[test] +fn doctor_json_reports_instructions_block_check_shape() { + let dir = tempfile::tempdir().unwrap(); + install(&["install", "--all"], dir.path()); + + // Healthy: instructions.block is ok, not fixed. + let (code, json) = doctor_json(dir.path(), false); + assert_eq!(code, 0, "healthy install should exit 0: {json}"); + let check = json["checks"] + .as_array() + .unwrap() + .iter() + .find(|c| c["id"] == "instructions.block") + .expect("instructions.block check present"); + assert_eq!(check["status"], "ok"); + assert_eq!(check["fixed"], serde_json::json!(false)); + + // Drift the block -> the json check becomes a problem and ok aggregates to false. + let claude = dir.path().join("CLAUDE.md"); + let content = fs::read_to_string(&claude).unwrap(); + fs::write( + &claude, + content.replace("code archaeology", "DRIFTED HEADER"), + ) + .unwrap(); + + let (code, json) = doctor_json(dir.path(), false); + assert_eq!(code, 1, "a drifted block must fail the json gate: {json}"); + assert_eq!( + json["ok"], false, + "an instructions-driven problem must make the run not ok: {json}" + ); + let check = json["checks"] + .as_array() + .unwrap() + .iter() + .find(|c| c["id"] == "instructions.block") + .expect("instructions.block check present"); + assert_eq!(check["status"], "problem"); + + // --fix repairs it: status becomes fixed. + let (code, json) = doctor_json(dir.path(), true); + assert_eq!(code, 0, "--fix json should repair and exit 0: {json}"); + let check = json["checks"] + .as_array() + .unwrap() + .iter() + .find(|c| c["id"] == "instructions.block") + .expect("instructions.block check present"); + assert_eq!(check["status"], "fixed"); + assert_eq!(check["fixed"], serde_json::json!(true)); +} + #[test] fn doctor_reports_published_ephemeral_port() { let dir = tempfile::tempdir().unwrap(); diff --git a/docs/implementation/instructions-injection-plan.md b/docs/implementation/instructions-injection-plan.md new file mode 100644 index 00000000..e95cc085 --- /dev/null +++ b/docs/implementation/instructions-injection-plan.md @@ -0,0 +1,224 @@ +# Plan: inject Loomweave agent-orientation guidance into CLAUDE.md / AGENTS.md + +Status: proposal / sketch (2026-06-06) +Scope: `loomweave install` (write) + `loomweave doctor [--fix]` (verify/repair) + +## Problem + +An agent that opens a Loomweave-indexed repo only learns how to use Loomweave +by *pulling* a surface: the MCP server's instructions preamble, or the +`loomweave-workflow` skill. Neither is in the always-loaded `CLAUDE.md` / +`AGENTS.md` context. Filigree already solves the same problem by *pushing* a +managed marker-block into those files (`inject_instructions`, +`src/filigree/install.py`). Loomweave does not — confirmed: a full grep of the +source for `CLAUDE.md`/`AGENTS.md` returns zero hits; the marker blocks in this +repo's own `CLAUDE.md`/`AGENTS.md` are Filigree's +(``). + +This plan adds the equivalent for Loomweave, slotting into the existing +orientation-surface machinery rather than inventing new structure. + +## Design (mirrors the established surface pattern) + +Every orientation surface in `loomweave-cli` already follows one shape, and the +new surface adopts it verbatim: + +| Surface | state query | idempotent installer (= `--fix` repair) | doctor checks | +|---|---|---|---| +| skill pack | `skill_pack::skill_pack_state` → `SkillPackState` | `install_skill_pack` | `check_skill` / `check_skill_json` | +| hook | `hooks_settings::…` → `HookState` | `install_session_start_hook` | `check_hook*` | +| MCP | `mcp_registration::…` → `McpState` | `install_mcp_entry` | `check_mcp*` | +| bindings | `integration_bindings::binding_state` → `BindingState` | `install_bindings` | `check_integration_bindings*` | +| **instructions (new)** | `instructions::instructions_state` → `InstructionsState` | `install_instructions` | `check_instructions*` | + +### New module: `crates/loomweave-cli/src/instructions.rs` + +Embeds one asset and manages a marker-block in two files at the project root: +`CLAUDE.md` and `AGENTS.md`. + +```rust +// Embedded, cli-local (no MCP owner exists for this asset, unlike the skill). +const INSTRUCTIONS_BODY: &str = + include_str!("../assets/instructions/loomweave.md"); + +// Loomweave's OWN marker namespace — must coexist with Filigree's block in the +// same file. Never collides with, reads, or edits ``. +const START_PREFIX: &str = ""; + +// Provenance only (human-readable); NOT the drift signal. See "Drift" below. +fn start_marker() -> String { + format!("", + env!("CARGO_PKG_VERSION"), body_hash_prefix()) +} + +const TARGET_FILES: &[&str] = &["CLAUDE.md", "AGENTS.md"]; +``` + +The rendered block is: + +``` + +…INSTRUCTIONS_BODY… + +``` + +## Five decisions baked in (do not regress these) + +### 1. SAFE marker recovery — do NOT copy Filigree's truncate-to-EOF + +Filigree's malformed-recovery (`install.py`: start marker present, end marker +missing → `content[:start] + INSTRUCTIONS`) **truncates from the start marker +to EOF**. That is only safe when the tool owns the tail of the file. Loomweave +*never* owns the tail: this repo's `AGENTS.md` already holds Filigree's block at +lines 1–119, so every file Loomweave writes is a **two-block file**. Copying +Filigree's recovery verbatim would let a dangling Loomweave start marker eat +Filigree's block (and vice-versa). + +Rule for `install_instructions`: + +- **Replace** only when BOTH `START_PREFIX` and a following `END_MARKER` are + found and well-ordered (`end > start`). Replace exactly that span; touch no + byte outside it. +- **Append** when neither marker is present. +- **Dangling start marker** (start present, no following end): do **not** + truncate to EOF. Treat as malformed → `install` strips only the orphaned + marker line and re-appends a fresh well-formed block; `doctor` without + `--fix` reports it as a **problem** (see #2). Never delete bytes outside the + Loomweave span. +- Atomic write (temp + rename, preserve mode) and symlink rejection, matching + Filigree's `_atomic_write_text` / `reject_symlink` and Loomweave's existing + atomic-write convention. + +Guard test (mandatory): inject into a file that already contains a Filigree +block, and assert **both blocks survive** create / append / replace / malformed +round-trips. + +### 2. Severity = `integration_bindings` model, not `skill_pack` model + +The how-to-use guidance is already delivered twice (MCP preamble + skill); the +CLAUDE.md block is a redundant always-on *push*. A project that omits it is +still first-class. Therefore: + +- `Missing` → **warning** (surfaced, suggests `--fix`; does NOT fail the + `doctor` gate). +- `Unparseable` / malformed / dangling-marker → **problem** (fails the gate; + this is the "genuinely broken, needs a human" case, and it composes with #1 — + we never auto-truncate an ambiguous block). +- `Drifted` → **problem** when `--fix` is absent, auto-repaired with `--fix` + (parity with skill pack's drift handling; safe because the repair only + rewrites Loomweave's own span). + +> **User veto point.** This is the one product-judgment call. If we'd rather +> treat the block as a first-class surface (Missing = problem, gate fails), +> flip `Missing` to problem. Recommended: warning. + +### 3. Drift signal = block-body content hash, not the marker version + +If the marker version string were the drift signal, every workspace version +bump (`v1.1.0-rc2` → next) would make `doctor` report "drifted" on byte-for-byte +identical content. `skill_pack` already avoids this: its blake3 fingerprint is +the drift signal and the version is "informational only." Mirror it — compare +the **extracted block-body bytes** against `INSTRUCTIONS_BODY`; keep +`v{version}` in the marker as provenance only. + +### 4. Concurrent session-start refresh race — accepted, not engineered around + +If the session-start hook re-injects on every start (as Filigree does), two +sessions race read-modify-write on the same files. Steady-state this is +harmless: each tool's refresh is deterministic, so a lost write reproduces +identical bytes next session. The only corruption risk was the truncation in +#1, already removed. Decision: **do not re-inject from the session-start hook**; +injection happens on `install` and `doctor --fix` only. Note the race as +accepted; no cross-tool lock. + +### 5. One flag, both files + +Add a single `--instructions` component (match `skill_pack`'s one-flag-both-roots +ergonomics, not Filigree's two-flag `--claude-md`/`--agents-md` split). It +writes both `CLAUDE.md` and `AGENTS.md`. + +## Content of the embedded asset (keep it THIN) + +`crates/loomweave-cli/assets/instructions/loomweave.md` — deliberately shorter +than Filigree's ~120 lines, because it is always-loaded context competing with +the skill that says the same thing. Target ~15–25 lines: a pointer, not a +manual. Sketch: + +```markdown +## Loomweave (code archaeology) + +This repo is indexed by Loomweave. Before grepping or re-reading the tree to +answer "what calls X", "where is X defined", "what subsystem owns X", or "find +the thing that does Y" — ask Loomweave's MCP tools (`mcp__loomweave__*`): +`entity_find`, `entity_at`, `entity_callers_list`, `entity_neighborhood_get`, +`project_status_get`. Entity IDs are `{plugin}:{kind}:{qualified_name}`. + +Index freshness and counts: `project_status_get` (or the `loomweave://context` +resource). If stale, run `loomweave analyze `. + +Full workflow: the `loomweave-workflow` skill. +``` + +(Final wording to track the MCP server instructions preamble so the two don't +drift apart in tone.) + +## Wiring changes (exact insertion points) + +1. **`cli.rs`** — add `InstallComponent::Instructions` and an `--instructions` + flag to the `Install` subcommand args (alongside `--skills`/`--hooks`). +2. **`install.rs`** + - `InstallPlan::Components` — add `instructions: bool` field. + - `from_components` — populate it from `InstallComponent::Instructions`. + - add `InstallPlan::instructions(self) -> bool` (true for `All` and the + component). + - `validate_plan` — include `instructions()` in the do-nothing guard. + - `run()` — `if plan.instructions() { install_instructions(&project_root)?; }` + plus an `install_instructions` wrapper printing changed/up-to-date, in the + same style as `install_claude_skills`. + - Naked `install` (`InstallPlan::All`) therefore writes the blocks by default. +3. **`doctor.rs`** + - `use crate::instructions::{self, InstructionsState};` + - text path: `tally += check_instructions(&project_root, fix);` in `run()`, + plus `fn check_instructions` mirroring `check_skill`. + - json path: add `check_instructions_json(project_root, fix)` to the + `json_report` `checks` vec. + - `next_actions` map: add + `"instructions.block" => "Run \`loomweave doctor --fix\` or \`loomweave install --instructions\`."`. +4. **`main.rs`** — route the new component flag into `from_components` (follows + the existing component plumbing; no new branch logic). + +## Tests + +- `instructions.rs` unit tests: + - create (no file) → file created with one well-formed block. + - append (file without marker) → block appended, prior content intact. + - replace (file with current marker) → idempotent no-op when body matches; + rewrite when body differs (drift). + - **coexistence**: file pre-seeded with a Filigree block → after every + operation, BOTH blocks present and Filigree's bytes untouched. + - dangling start marker → repaired without eating any other bytes; reported + `problem` without `--fix`. + - symlink target rejected. +- `doctor.rs`: `InstructionsState` → severity mapping (Missing=warning, + Unparseable/dangling=problem, Drifted=problem-without-fix / fixed-with-fix); + `--fix` converges to `UpToDate`; json `ok` flag reflects only problems. +- e2e: extend an install smoke script to assert the block lands in CLAUDE.md and + `doctor` reports it healthy. + +## Suite-level follow-up (out of band, worth flagging) + +The "each Weft tool owns `` and edits only its own +span" rule is a **suite contract**, not a Loomweave-local detail — Filigree's +current truncate-to-EOF recovery violates it and can eat Loomweave's block. +Recommend a short ADR (or a line in `docs/suite/weft.md`, already cited by +`doctor.rs`) capturing the contract, and a matching fix to Filigree's +`inject_instructions` so both tools stop being able to corrupt each other. + +## Effort + +Small–medium. One new module (~150 lines + tests) modelled line-for-line on +`skill_pack.rs`, one embedded asset, and ~6 mechanical insertion points across +`cli.rs` / `install.rs` / `doctor.rs` / `main.rs`. No schema, no migration, no +new dependency (blake3 + tempfile already in the tree). +```