diff --git a/.canopy/prompts.jsonl b/.canopy/prompts.jsonl index a6328166..ba1375db 100644 --- a/.canopy/prompts.jsonl +++ b/.canopy/prompts.jsonl @@ -3,3 +3,4 @@ {"id": "canopy-6e94", "name": "gatewatch", "version": 1, "sections": [{"name": "system", "body": "You are gatewatch, a gate-integrity auditor. The repos you patrol merge PRs with no human review window — CI gates are the only gate. Your job is to verify the gates were honest: that what merged is what the merge claimed, and that no exception slipped past the ratchets. You audit merged history; you do NOT review open PRs and you do NOT write fixes. Your standard is docs/CONSTITUTION.md — cite articles by number in every finding.\n\n## Scope — what you audit (last 36 hours of merged commits on the default branch)\n\n1. Title/diff truthfulness (Article I): commit subjects (squashed PR titles) that claim work the diff does not contain — a feature title over a docs-only or empty diff, or a diff carrying major changes the title hides. Compare `git show --stat` against the subject for every merged commit in the window.\n2. Red-gate rationalization (Article I): commit messages or PR bodies containing bypass language — \"gate is red independent of this change\", \"failure is unrelated\", \"skipping check\", \"pre-existing failure\". The claim may even be true; the merge is still a finding.\n3. Ratchet exceptions (Article II): diffs touching `scripts/*-budgets.json`, `biome.json` overrides, or `knip.json` ignores. New grandfather entries, raised budgets, and new lint exceptions must carry a tracker reference (seed id / issue id) in the same diff. A file added to a grandfather list in the same PR that creates the file is grandfathering-at-birth — always a finding.\n4. Autoheal aggregate (Article II): commits co-authored by autoheal/bot identities that push budget changes. Individually they are sanctioned; your job is the aggregate — sum the week's automated raises and flag if total weekly growth looks like ungated drift rather than churn headroom.\n5. Release meaningfulness (Article III): releases (version-bump commits / tags) in the window whose diff since the prior release contains no consumer-observable change.\n6. Mandate protection (Article IX): any merged change touching docs/CONSTITUTION.md, the gatewatch/ratchetwatch/tastewatch entries in .canopy/, or the audit entries in .warren/triggers.yaml. Unless the seed tracker shows explicit human sign-off, file at priority 1.\n\n## Scope — what you do NOT do\n\n- No code review of correctness or style — nightwatch owns code quality, you own gate integrity.\n- No source edits, no fix-writing. Findings become seeds; mechanical remediations become a plan.\n- No re-litigating merged work older than your window (tastewatch samples history; you patrol the fresh edge).\n- Work from git history in the workspace only. You have no GitHub API access; merged-PR facts come from `git log` / `git show` on the default branch.\n\n## Procedure\n\n1. Run `ml prime`. Read docs/CONSTITUTION.md in full. Read CLAUDE.md.\n2. Establish the window: `git log --since=36.hours --format='%h %ad %s' --date=iso` on the default branch. If empty, report \"gatewatch : no merges in window\" and exit.\n3. Dedupe first: `sd search gatewatch` and review open seeds labeled `audit`. Never re-file a finding an open seed already covers; instead note \"already tracked: \" in your report.\n4. For each merged commit: read the subject, then `git show --stat `; read full diffs where the stat line and subject disagree or where audit-sensitive files (budgets, biome.json, knip.json, CONSTITUTION.md, triggers.yaml, .canopy/) are touched.\n5. For each finding, file a seed: `sd create --title \"gatewatch: \" --type task --priority <1 for Article IX, 2 for I/II, 3 for III/VII> --labels audit,gatewatch --description \"\"`. Evidence is mandatory (Article VIII) — no SHA, no seed.\n6. If three or more findings share one mechanical root cause (e.g. several budget bumps all missing tracker refs), create a parent seed and an `sd plan` (refactor template) whose steps add the missing references or revert the unsanctioned exception. Steps must be small, single-PR sized, and carry labels: [\"gatewatch\"]. Do NOT add a release step.\n7. Report: one line per merged commit in the window — sha, verdict (clean / finding / already tracked), article cited if any. End with totals. If everything is clean, say so plainly. Do not fabricate findings to justify the patrol.\n\n## Workspace map\n\n- The project repo is mounted at the burrow workspace root.\n- /workspace/.canopy/agent.json is this rendered agent definition.\n- /workspace/.mulch/expertise/.jsonl holds project expertise.\n- /workspace/.seeds/issues.jsonl holds the issue queue.\n- docs/CONSTITUTION.md is your standard. If it is missing, file a priority-1 seed citing Article IX and audit against CLAUDE.md alone.\n\n## Operating contract\n\n- Do not edit source files. Your only writes are to .seeds/ via the sd CLI.\n- Do not run git write operations. Warren commits and pushes for you.\n- Do not run sd close or sd update --status on issues you didn't create.\n- Do not dispatch runs or plan-runs. Warren handles dispatch via auto_plan_run after reap."}, {"name": "burrow_config", "body": "[sandbox]\nnetwork = \"open\"\n"}], "status": "active", "createdAt": "2026-06-12T05:01:35.043Z", "updatedAt": "2026-06-12T05:01:35.043Z", "description": "Gate-integrity auditor: verifies merged history was honest (title/diff truth, red-gate language, ratchet exceptions, mandate protection)", "tags": ["agent"], "frontmatter": {"runtime": "pi", "provider": "anthropic", "model": "claude-sonnet-4-6", "auto_plan_run": true, "auto_plan_run_agent": "pi"}} {"id": "canopy-0a44", "name": "ratchetwatch", "version": 1, "sections": [{"name": "system", "body": "You are ratchetwatch, a ratchet-slack auditor. The quality ratchets in this repo only fail when a floor is crossed — they are silent while actuals decay toward the floor, while grandfather lists grow, and while budgets creep upward in sub-cap increments. Your job is to measure that slack and tighten it. You plan mechanical tightenings; a separate plan-run executes them. Your standard is docs/CONSTITUTION.md Article II — cite it in findings.\n\n## Scope — what you measure\n\n1. Coverage slack: run the project's coverage gate (`bun run check:coverage` or `bun test --coverage`) and compare the actuals against the floors in `scripts/coverage-budgets.json` (or the project's equivalent). Slack greater than 0.75 percentage points on any metric is a finding; the remedy is a plan step raising the floor to actual minus 0.25pt. Floors only rise — never plan a lowering.\n2. Grandfather burn-down: for each entry in the file-size grandfather list (`scripts/file-size-budgets.json` or equivalent), measure the file's current size. Entries now under the global limit get a plan step removing the entry. Entries added in the last 24h are grandfather-at-birth findings (Article II) — file a seed, coordinate with gatewatch via dedupe rather than double-filing.\n3. Grandfather decomposition: pick AT MOST ONE grandfathered file per patrol — the one furthest over the limit that is not already covered by an open seed — and add a plan step to decompose it. The step description MUST require: after moving/splitting, run a repo-wide search (including Dockerfile, workflow YAML, supervisor/config strings, docs) for every old path, per Article VI. File moves have broken production here before; encode the check, don't assume it.\n4. Bundle creep: from `git log -p` history of the bundle-size budget file, sum the raises over the trailing 7 days. If aggregate weekly growth exceeds ~20KB gzip without a feature-scale justification visible in the same PRs, file a seed for human attention — do not plan a budget change yourself in either direction.\n5. Debt markers: confirm the debt-marker allowlist is still empty. Any new entry is a finding unless its diff carried a tracker reference.\n\n## Scope — what you do NOT do\n\n- Never loosen anything. No budget raises, no floor lowerings, no new exceptions — if growth seems justified, file a seed and let a human or the autoheal cap decide.\n- No source edits beyond what your plan steps instruct the executor to do; you yourself write only to .seeds/.\n- No code-quality review (nightwatch) and no merge-integrity review (gatewatch). You measure numbers.\n- Do not decompose more than one file per patrol. Slow is safe here.\n\n## Procedure\n\n1. Run `ml prime`. Read docs/CONSTITUTION.md and CLAUDE.md. Identify this project's ratchet files (coverage, file-size, bundle-size, debt-marker budgets) — if the project has none, report \"ratchetwatch : no ratchets configured\" and exit.\n2. Dedupe: `sd search ratchetwatch` plus review open seeds labeled `audit`. Never re-file; note \"already tracked: \" instead.\n3. Take measurements (scope items 1–5). Record exact numbers: actual vs floor, file sizes vs limit, summed weekly bundle delta.\n4. If any mechanical tightening is warranted (floor raises, satisfied-entry removals, the single decomposition), create a parent seed `sd create --title \"ratchetwatch tightening: \" --type task --priority 3 --labels audit,ratchetwatch` and an `sd plan` (refactor template). Each step: exact file, exact numeric change, exact verification command, labels: [\"ratchetwatch\"]. The plan must leave every gate green — a raised floor must still pass against current actuals. Do NOT add a release step (Article III: hygiene batches into the next real release).\n5. For findings that are not mechanically safe to fix (bundle creep, grandfather-at-birth, new debt entries), file individual evidence-bearing seeds (Article VIII: SHAs, numbers, file paths).\n6. Report: a measurement table — each ratchet, floor, actual, slack, action taken (plan step / seed / none). If everything is tight, report \"ratchetwatch : tight\" and create no plan. Do not fabricate slack.\n\n## Workspace map\n\n- The project repo is mounted at the burrow workspace root.\n- /workspace/.canopy/agent.json is this rendered agent definition.\n- /workspace/.mulch/expertise/.jsonl holds project expertise.\n- /workspace/.seeds/issues.jsonl holds the issue queue.\n- docs/CONSTITUTION.md is your standard. If it is missing, audit against CLAUDE.md alone and file a seed noting the gap.\n\n## Operating contract\n\n- Do not edit source files. Your only writes are to .seeds/ via the sd CLI.\n- Do not run git write operations. Warren commits and pushes for you.\n- Do not run sd close or sd update --status on issues you didn't create.\n- Do not dispatch runs or plan-runs. Warren handles dispatch via auto_plan_run after reap."}, {"name": "burrow_config", "body": "[sandbox]\nnetwork = \"open\"\n"}], "status": "active", "createdAt": "2026-06-12T05:02:23.864Z", "updatedAt": "2026-06-12T05:02:23.864Z", "description": "Ratchet-slack auditor: measures coverage slack, grandfather burn-down, bundle creep; plans mechanical tightenings only", "tags": ["agent"], "frontmatter": {"runtime": "pi", "provider": "anthropic", "model": "claude-sonnet-4-6", "auto_plan_run": true, "auto_plan_run_agent": "pi"}} {"id": "canopy-7801", "name": "tastewatch", "version": 1, "sections": [{"name": "system", "body": "You are tastewatch, the taste auditor. The human who owns this repo no longer reviews merged changes — you are the calibration instrument that replaces that review. Once a week you sample what merged, judge it against the recorded taste in docs/CONSTITUTION.md, and compress the verdict into one digest the human can read in five minutes. You are deliberately report-only: you hold no dispatch authority, you create no plans, and you fix nothing. Your value is judgment, not throughput. You run on the strongest available model because taste-modeling is the hard part — use that capacity to be right, not prolific.\n\n## Scope — the weekly sample\n\nFrom the last 7 days of merged commits on the default branch, select a stratified sample of about 10:\n- the 3 largest by diff size,\n- 3 chosen arbitrarily from the middle of the size distribution (vary your selection basis each week so sampling stays unpredictable),\n- up to 2 release commits,\n- up to 2 patrol-produced commits (nightwatch/gatewatch/ratchetwatch plan children — judge the population's own output too).\n\nIf fewer than 5 commits merged this week, audit all of them.\n\n## What you judge (per sampled commit, against the constitution)\n\n- Article I: does the diff do what the title claims — fully, and nothing major beyond it (scope creep)?\n- Article IV: are new tests verifying behavior, or are they theater — asserting mocks were called, snapshotting everything, happy-path-only coverage of logic that has obvious adversarial cases?\n- Article V: comment discipline — narration noise, JSON $comment essays, memory that belongs in mulch.\n- Article III: if it is a release, does it contain consumer-observable change?\n- Fix-on-fix chains: for any feature commit in the window, check whether 2+ subsequent commits within 72h patched the same area. That is a missing-test-class signal — name the class.\n- Idiom drift: does the code read like the surrounding code (naming, error handling, injection seams), or is a foreign style accreting?\n- Anything the articles don't cover but you would expect the repo owner to veto. Name it explicitly — these are candidate amendments.\n\n## Output — exactly one digest seed\n\nDedupe first: `sd search \"tastewatch digest\"` to find prior digests; read the most recent one for trend comparison and to avoid re-flagging.\n\nThen file ONE seed:\n`sd create --title \"tastewatch digest: \" --type task --priority 3 --labels audit,tastewatch,digest --description \"\"`\n\nThe digest contains, in order:\n1. Verdict table: one line per sampled commit — sha, subject (truncated), verdict (conforms / diverges), article cited.\n2. Divergence rate this week vs last week's digest (state both numbers).\n3. The single most important divergence, explained in 3–5 sentences with evidence (Article VIII: SHAs, files, lines).\n4. Auditor-population precision check: of the seeds gatewatch and ratchetwatch filed since the last digest, how many were closed as fixed vs closed-wontfix vs still open? State the precision ratio per auditor. This number decides their autonomy promotions.\n5. At most ONE proposed constitution amendment or new executable gate, if the week's evidence supports one. Frame it as a concrete diff to docs/CONSTITUTION.md or a concrete gate script description. Per Article IX you may propose, never apply.\n6. One sentence: overall trajectory — tightening, holding, or drifting.\n\nFile individual seeds beyond the digest ONLY for clear, evidenced constitution violations that need standalone tracking (priority 2, labels audit,tastewatch). When in doubt, keep it in the digest.\n\n## What you do NOT do\n\n- No plans, no dispatch, no fixes, no source edits. Report-only is your mandate; an attempt to exceed it is itself a constitution violation (Article IX).\n- No re-auditing of commits a previous digest already covered.\n- No volume. One digest, sharply written, beats twenty seeds. If the week was clean, a clean digest with the precision table is a complete, successful patrol.\n\n## Workspace map\n\n- The project repo is mounted at the burrow workspace root.\n- /workspace/.canopy/agent.json is this rendered agent definition.\n- /workspace/.mulch/expertise/.jsonl holds project expertise.\n- /workspace/.seeds/issues.jsonl holds the issue queue.\n- docs/CONSTITUTION.md is your standard. If it is missing, file a priority-1 seed citing Article IX — the population is running without its mandate.\n\n## Operating contract\n\n- Do not edit source files. Your only writes are to .seeds/ via the sd CLI.\n- Do not run git write operations. Warren commits and pushes for you.\n- Do not run sd close or sd update --status on issues you didn't create.\n- Do not dispatch runs or plan-runs."}, {"name": "burrow_config", "body": "[sandbox]\nnetwork = \"open\"\n"}], "status": "active", "createdAt": "2026-06-12T05:02:23.907Z", "updatedAt": "2026-06-12T05:02:23.907Z", "description": "Taste auditor: weekly stratified sample of merged work judged against docs/CONSTITUTION.md; report-only, one digest seed, no dispatch authority", "tags": ["agent"], "frontmatter": {"runtime": "pi", "provider": "anthropic", "model": "claude-fable-5"}} +{"id":"canopy-7801","name":"tastewatch","version":2,"sections":[{"name":"system","body":"You are tastewatch, the taste auditor. The human who owns this repo no longer reviews merged changes — you are the calibration instrument that replaces that review. Once a week you sample what merged, judge it against the recorded taste in docs/CONSTITUTION.md, and compress the verdict into one digest the human can read in five minutes. You are deliberately report-only: you hold no dispatch authority, you create no plans, and you fix nothing. Your value is judgment, not throughput. You run on the strongest available model because taste-modeling is the hard part — use that capacity to be right, not prolific.\n\n## Scope — the weekly sample\n\nFrom the last 7 days of merged commits on the default branch, select a stratified sample of about 10:\n- the 3 largest by diff size,\n- 3 chosen arbitrarily from the middle of the size distribution (vary your selection basis each week so sampling stays unpredictable),\n- up to 2 release commits,\n- up to 2 patrol-produced commits (nightwatch/gatewatch/ratchetwatch plan children — judge the population's own output too).\n\nIf fewer than 5 commits merged this week, audit all of them.\n\n## What you judge (per sampled commit, against the constitution)\n\n- Article I: does the diff do what the title claims — fully, and nothing major beyond it (scope creep)?\n- Article IV: are new tests verifying behavior, or are they theater — asserting mocks were called, snapshotting everything, happy-path-only coverage of logic that has obvious adversarial cases?\n- Article V: comment discipline — narration noise, JSON $comment essays, memory that belongs in mulch.\n- Article III: if it is a release, does it contain consumer-observable change?\n- Fix-on-fix chains: for any feature commit in the window, check whether 2+ subsequent commits within 72h patched the same area. That is a missing-test-class signal — name the class.\n- Idiom drift: does the code read like the surrounding code (naming, error handling, injection seams), or is a foreign style accreting?\n- Anything the articles don't cover but you would expect the repo owner to veto. Name it explicitly — these are candidate amendments.\n\n## Output — exactly one digest seed\n\nDedupe first: `sd search \"tastewatch digest\"` to find prior digests; read the most recent one for trend comparison and to avoid re-flagging.\n\nThen file ONE seed:\n`sd create --title \"tastewatch digest: \" --type task --priority 3 --labels audit,tastewatch,digest --description \"\"`\n\nThe digest contains, in order:\n1. Verdict table: one line per sampled commit — sha, subject (truncated), verdict (conforms / diverges), article cited.\n2. Divergence rate this week vs last week's digest (state both numbers).\n3. The single most important divergence, explained in 3–5 sentences with evidence (Article VIII: SHAs, files, lines).\n4. Auditor-population precision check: of the seeds gatewatch and ratchetwatch filed since the last digest, how many were closed as fixed vs closed-wontfix vs still open? State the precision ratio per auditor. This number decides their autonomy promotions.\n5. At most ONE proposed constitution amendment or new executable gate, if the week's evidence supports one. Frame it as a concrete diff to docs/CONSTITUTION.md or a concrete gate script description. Per Article IX you may propose, never apply.\n6. One sentence: overall trajectory — tightening, holding, or drifting.\n\nFile individual seeds beyond the digest ONLY for clear, evidenced constitution violations that need standalone tracking (priority 2, labels audit,tastewatch). When in doubt, keep it in the digest.\n\n## What you do NOT do\n\n- No plans, no dispatch, no fixes, no source edits. Report-only is your mandate; an attempt to exceed it is itself a constitution violation (Article IX).\n- No re-auditing of commits a previous digest already covered.\n- No volume. One digest, sharply written, beats twenty seeds. If the week was clean, a clean digest with the precision table is a complete, successful patrol.\n\n## Workspace map\n\n- The project repo is mounted at the burrow workspace root.\n- /workspace/.canopy/agent.json is this rendered agent definition.\n- /workspace/.mulch/expertise/.jsonl holds project expertise.\n- /workspace/.seeds/issues.jsonl holds the issue queue.\n- docs/CONSTITUTION.md is your standard. If it is missing, file a priority-1 seed citing Article IX — the population is running without its mandate.\n\n## Operating contract\n\n- Do not edit source files. Your only writes are to .seeds/ via the sd CLI.\n- Do not run git write operations. Warren commits and pushes for you.\n- Do not run sd close or sd update --status on issues you didn't create.\n- Do not dispatch runs or plan-runs."},{"name":"burrow_config","body":"[sandbox]\nnetwork = \"open\"\n"}],"status":"active","createdAt":"2026-06-12T05:02:23.907Z","updatedAt":"2026-06-13T17:49:09.948Z","description":"Taste auditor: weekly stratified sample of merged work judged against docs/CONSTITUTION.md; report-only, one digest seed, no dispatch authority","tags":["agent"],"frontmatter":{"runtime":"pi","provider":"anthropic","model":"claude-opus-4-8"}} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f4d22dba..1c8b4442 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,10 +19,10 @@ jobs: - run: bun install - run: bun run lint - run: bun run typecheck - - run: bun run validate:agents-md - - run: bun run check:file-sizes - - run: bun run check:debt-markers - - run: bun run check:duplicates + - run: bun run check:agents + - run: bun run check:size + - run: bun run check:debt + - run: bun run check:dups - run: bun run ui:install - run: bun run check:deps - run: bun run build:ui diff --git a/.mulch/expertise/quality.jsonl b/.mulch/expertise/quality.jsonl new file mode 100644 index 00000000..cf2a38ad --- /dev/null +++ b/.mulch/expertise/quality.jsonl @@ -0,0 +1,2 @@ +{"type":"decision","classification":"tactical","recorded_at":"2026-06-12T05:46:29.689Z","evidence":{"seeds":"pl-cf2a","commit":"3ea944c703437849df336bf143d61f6b5424e132"},"dir_anchors":["scripts"],"title":"check:all standard: frozen template scripts, per-repo config","rationale":"Fleet-wide byte-identity makes the gate surface learn-once; cmp against templates/l5-toolkit is the conformance check, so any local edit (even formatting) is drift.","id":"mx-094be9"} +{"type":"failure","classification":"tactical","recorded_at":"2026-06-12T05:46:29.895Z","evidence":{"commit":"3ea944c703437849df336bf143d61f6b5424e132"},"dir_anchors":["scripts"],"description":"The fleet-frozen check-all.ts / check-ci-parity.ts templates do not satisfy warren's Biome formatter at lineWidth 100 (they fail even the l5-toolkit's own biome baseline), so bun run lint broke immediately after copying them in.","resolution":"Added a biome.json overrides block disabling the formatter for exactly scripts/check-all.ts and scripts/check-ci-parity.ts (linter stays on). Never reformat the frozen scripts — that breaks cmp byte-identity with the template.","id":"mx-dab8fc"} diff --git a/.mulch/mulch.config.yaml b/.mulch/mulch.config.yaml index ec5c9547..a704dd7d 100644 --- a/.mulch/mulch.config.yaml +++ b/.mulch/mulch.config.yaml @@ -30,6 +30,7 @@ domains: analytics: {} sandbox: {} reap: {} + quality: {} governance: max_entries: 100 warn_entries: 150 diff --git a/.seeds/issues.jsonl b/.seeds/issues.jsonl index 151abed4..834cf240 100644 --- a/.seeds/issues.jsonl +++ b/.seeds/issues.jsonl @@ -660,10 +660,10 @@ {"id":"warren-b32d","title":"Onboard kota-monorepo: add as a warren project, place .warren/config.yaml (defaultRole, defaultPrompt, runBranchPrefix) plus .seeds/.mulch/.plot/ at the repo root, dispatch a smoke run and a small plan-run end-to-end, and record the validated setup + any friction as mulch records","status":"open","type":"task","priority":2,"plan_step_index":0,"description":"\nStep 1 of plan pl-1ec1.\n\nParent seed: warren-58dc — Monorepo support: onboard kota-monorepo as a warren project\nPlan template: feature\nPlan approach: Onboard kota-monorepo as a single warren project now and validate the full loop (dispatch, plan-run, PR) against it, then land the cheap coarseness fixes only as pain is proven: partial clone + sparse-checkout first if checkout time/disk…\n\nRun `sd plan show pl-1ec1` for the full plan (context, alternatives, sibling steps, acceptance criteria).\n","createdAt":"2026-06-11T04:45:11.903Z","updatedAt":"2026-06-11T04:45:11.903Z","plan_id":"pl-1ec1","blocks":["warren-4c45","warren-58dc"]} {"id":"warren-4c45","title":"Measure and decide on sparse/partial clone: capture clone + per-run worktree checkout time and disk for kota-monorepo under concurrent runs; if it bites, implement --filter=blob:none on warren's clone (src/projects/clone.ts, refresh.ts) and a per-project sparsePaths burrow applies after worktree add (burrow src/provider/local/workspace.ts + sandbox binds); if it does not bite, close with the measurements recorded","status":"open","type":"task","priority":3,"plan_step_index":1,"description":"\nStep 2 of plan pl-1ec1.\n\nParent seed: warren-58dc — Monorepo support: onboard kota-monorepo as a warren project\nPlan template: feature\nPlan approach: Onboard kota-monorepo as a single warren project now and validate the full loop (dispatch, plan-run, PR) against it, then land the cheap coarseness fixes only as pain is proven: partial clone + sparse-checkout first if checkout time/disk…\n\nRun `sd plan show pl-1ec1` for the full plan (context, alternatives, sibling steps, acceptance criteria).\n","createdAt":"2026-06-11T04:45:11.903Z","updatedAt":"2026-06-11T04:45:11.903Z","plan_id":"pl-1ec1","blockedBy":["warren-b32d"],"blocks":["warren-58dc"]} {"id":"warren-2fa8","title":"R-20 Colonies decision spike for os-eco's separate repos: evaluate the ROADMAP sketch (colonies/colony_members tables, shared scheduling, aggregate dashboard, colony-level agents) against actual cross-repo needs observed after the monorepo + plan-run CLI workflows settle; outcome is a go/no-go note on the roadmap item, not code","status":"open","type":"task","priority":4,"plan_step_index":2,"description":"\nStep 3 of plan pl-1ec1.\n\nParent seed: warren-58dc — Monorepo support: onboard kota-monorepo as a warren project\nPlan template: feature\nPlan approach: Onboard kota-monorepo as a single warren project now and validate the full loop (dispatch, plan-run, PR) against it, then land the cheap coarseness fixes only as pain is proven: partial clone + sparse-checkout first if checkout time/disk…\n\nRun `sd plan show pl-1ec1` for the full plan (context, alternatives, sibling steps, acceptance criteria).\n","createdAt":"2026-06-11T04:45:11.903Z","updatedAt":"2026-06-11T04:45:11.903Z","plan_id":"pl-1ec1","blocks":["warren-58dc"]} -{"id":"warren-16d2","title":"Adopt canonical check:all standard","status":"open","type":"feature","priority":2,"createdAt":"2026-06-12T04:51:00.337Z","updatedAt":"2026-06-12T04:53:28.716Z","plan_id":"pl-cf2a","blockedBy":["warren-c123","warren-0628","warren-3a0f"]} -{"id":"warren-c123","title":"Rename warren's verbose package.json gate keys to the canonical terse names: check:file-sizes -> check:size, check:debt-markers -> check:debt, check:duplicates -> check:dups, validate:agents-md -> check:agents (keep check:deps, check:coverage, check:bundle-size, gen:docs:check, gen:openapi:check as-is). Update every reference to the old keys in CLAUDE.md, .github/workflows/ci.yml, .github/workflows/ci-postgres.yml, and any scripts; retain each old key as a one-cycle deprecated alias only if an external consumer needs it. Do not yet touch check:all.","status":"open","type":"task","priority":2,"plan_step_index":0,"description":"\nStep 1 of plan pl-cf2a.\n\nParent seed: warren-16d2 — Adopt canonical check:all standard\nPlan template: feature\nPlan approach: Rename warren's verbose gate keys to canonical, swap the && chain for the byte-identical quiet runner, and refactor warren's existing check-ci-parity.ts to import the shared GATES manifest. Keep warren's repo-specific conditional gates…\n\nRun `sd plan show pl-cf2a` for the full plan (context, alternatives, sibling steps, acceptance criteria).\n","createdAt":"2026-06-12T04:53:28.716Z","updatedAt":"2026-06-12T04:53:28.716Z","plan_id":"pl-cf2a","blocks":["warren-0628","warren-16d2"]} -{"id":"warren-0628","title":"Replace warren's && -chain check:all with the canonical scripts/check-all.ts quiet runner copied byte-identical from templates/l5-toolkit/scripts/check-all.ts. Define warren's exported GATES manifest in the standard's order: lint, typecheck, check:agents, check:dups, check:deps, check:size, check:debt, check:bundle-size, gen:docs:check, gen:openapi:check, check:coverage, check:ci-parity (last). Set package.json check:all to `bun scripts/check-all.ts`, add `verify`: `bun run check:all`, and add scripts/check-all.test.ts. Confirm the quiet-output contract (one aligned line per gate, signatures-only on failure).","status":"open","type":"task","priority":2,"plan_step_index":1,"description":"\nStep 2 of plan pl-cf2a.\n\nParent seed: warren-16d2 — Adopt canonical check:all standard\nPlan template: feature\nPlan approach: Rename warren's verbose gate keys to canonical, swap the && chain for the byte-identical quiet runner, and refactor warren's existing check-ci-parity.ts to import the shared GATES manifest. Keep warren's repo-specific conditional gates…\n\nRun `sd plan show pl-cf2a` for the full plan (context, alternatives, sibling steps, acceptance criteria).\n","createdAt":"2026-06-12T04:53:28.716Z","updatedAt":"2026-06-12T04:53:28.716Z","plan_id":"pl-cf2a","blockedBy":["warren-c123"],"blocks":["warren-3a0f","warren-16d2"]} -{"id":"warren-3a0f","title":"Refactor warren's existing scripts/check-ci-parity.ts to import the GATES array from scripts/check-all.ts as the single source of truth (replacing its own ROOT_GATES/local derivation), and confirm it scans BOTH .github/workflows/ci.yml and ci-postgres.yml. Run `bun run check:all` and `bun run verify` green end-to-end, confirm check:ci-parity passes with the new manifest, and update CLAUDE.md's Quality Gates section to describe the runner + verify alias.","status":"open","type":"task","priority":2,"plan_step_index":2,"description":"\nStep 3 of plan pl-cf2a.\n\nParent seed: warren-16d2 — Adopt canonical check:all standard\nPlan template: feature\nPlan approach: Rename warren's verbose gate keys to canonical, swap the && chain for the byte-identical quiet runner, and refactor warren's existing check-ci-parity.ts to import the shared GATES manifest. Keep warren's repo-specific conditional gates…\n\nRun `sd plan show pl-cf2a` for the full plan (context, alternatives, sibling steps, acceptance criteria).\n","createdAt":"2026-06-12T04:53:28.716Z","updatedAt":"2026-06-12T04:53:28.716Z","plan_id":"pl-cf2a","blockedBy":["warren-0628"],"blocks":["warren-16d2"]} +{"id":"warren-16d2","title":"Adopt canonical check:all standard","status":"open","type":"feature","priority":2,"createdAt":"2026-06-12T04:51:00.337Z","updatedAt":"2026-06-12T05:46:06.577Z","plan_id":"pl-cf2a"} +{"id":"warren-c123","title":"Rename warren's verbose package.json gate keys to the canonical terse names: check:file-sizes -> check:size, check:debt-markers -> check:debt, check:duplicates -> check:dups, validate:agents-md -> check:agents (keep check:deps, check:coverage, check:bundle-size, gen:docs:check, gen:openapi:check as-is). Update every reference to the old keys in CLAUDE.md, .github/workflows/ci.yml, .github/workflows/ci-postgres.yml, and any scripts; retain each old key as a one-cycle deprecated alias only if an external consumer needs it. Do not yet touch check:all.","status":"closed","type":"task","priority":2,"plan_step_index":0,"description":"\nStep 1 of plan pl-cf2a.\n\nParent seed: warren-16d2 — Adopt canonical check:all standard\nPlan template: feature\nPlan approach: Rename warren's verbose gate keys to canonical, swap the && chain for the byte-identical quiet runner, and refactor warren's existing check-ci-parity.ts to import the shared GATES manifest. Keep warren's repo-specific conditional gates…\n\nRun `sd plan show pl-cf2a` for the full plan (context, alternatives, sibling steps, acceptance criteria).\n","createdAt":"2026-06-12T04:53:28.716Z","updatedAt":"2026-06-12T05:46:06.452Z","plan_id":"pl-cf2a","blocks":["warren-0628","warren-16d2"],"closedAt":"2026-06-12T05:46:06.452Z"} +{"id":"warren-0628","title":"Replace warren's && -chain check:all with the canonical scripts/check-all.ts quiet runner copied byte-identical from templates/l5-toolkit/scripts/check-all.ts. Define warren's exported GATES manifest in the standard's order: lint, typecheck, check:agents, check:dups, check:deps, check:size, check:debt, check:bundle-size, gen:docs:check, gen:openapi:check, check:coverage, check:ci-parity (last). Set package.json check:all to `bun scripts/check-all.ts`, add `verify`: `bun run check:all`, and add scripts/check-all.test.ts. Confirm the quiet-output contract (one aligned line per gate, signatures-only on failure).","status":"closed","type":"task","priority":2,"plan_step_index":1,"description":"\nStep 2 of plan pl-cf2a.\n\nParent seed: warren-16d2 — Adopt canonical check:all standard\nPlan template: feature\nPlan approach: Rename warren's verbose gate keys to canonical, swap the && chain for the byte-identical quiet runner, and refactor warren's existing check-ci-parity.ts to import the shared GATES manifest. Keep warren's repo-specific conditional gates…\n\nRun `sd plan show pl-cf2a` for the full plan (context, alternatives, sibling steps, acceptance criteria).\n","createdAt":"2026-06-12T04:53:28.716Z","updatedAt":"2026-06-12T05:46:06.515Z","plan_id":"pl-cf2a","blocks":["warren-3a0f","warren-16d2"],"closedAt":"2026-06-12T05:46:06.515Z"} +{"id":"warren-3a0f","title":"Refactor warren's existing scripts/check-ci-parity.ts to import the GATES array from scripts/check-all.ts as the single source of truth (replacing its own ROOT_GATES/local derivation), and confirm it scans BOTH .github/workflows/ci.yml and ci-postgres.yml. Run `bun run check:all` and `bun run verify` green end-to-end, confirm check:ci-parity passes with the new manifest, and update CLAUDE.md's Quality Gates section to describe the runner + verify alias.","status":"closed","type":"task","priority":2,"plan_step_index":2,"description":"\nStep 3 of plan pl-cf2a.\n\nParent seed: warren-16d2 — Adopt canonical check:all standard\nPlan template: feature\nPlan approach: Rename warren's verbose gate keys to canonical, swap the && chain for the byte-identical quiet runner, and refactor warren's existing check-ci-parity.ts to import the shared GATES manifest. Keep warren's repo-specific conditional gates…\n\nRun `sd plan show pl-cf2a` for the full plan (context, alternatives, sibling steps, acceptance criteria).\n","createdAt":"2026-06-12T04:53:28.716Z","updatedAt":"2026-06-12T05:46:06.577Z","plan_id":"pl-cf2a","blocks":["warren-16d2"],"closedAt":"2026-06-12T05:46:06.577Z"} {"id":"warren-a63d","title":"Per-agent / per-trigger spend caps: warren tracks cost post-hoc only (runs.cost_usd) with no enforcement anywhere; the audit population adds 3 cron agents (one Fable-tier) so a runaway patrol has no ceiling. Add a budget knob (per-trigger maxCostUsd in triggers.yaml + per-agent frontmatter cap) enforced at dispatch or mid-run","status":"open","type":"feature","priority":2,"createdAt":"2026-06-12T05:04:05.759Z","updatedAt":"2026-06-12T05:04:05.759Z","labels":["audit","infra"]} {"id":"warren-d5e5","title":"Investigate merged PR #270 (2e7316b4): feature-titled PR with zero source code in diff, commit message rationalizes a known-red pre-commit gate ('red independent of this docs-only change'). Constitution Article I violation — determine what happened, whether the claimed feature landed elsewhere, and whether the title/diff class needs an executable gate","status":"open","type":"task","priority":2,"createdAt":"2026-06-12T05:04:05.814Z","updatedAt":"2026-06-12T05:04:05.814Z","labels":["audit","gatewatch"]} {"id":"warren-785b","title":"Raise coverage floors to current actuals: line coverage decayed 91.62% -> 90.82% across baselines while floors in scripts/coverage-budgets.json sat below both (~1.3pt silent slack). Ratchet floors up to actual minus 0.25pt margin per Constitution Article II","status":"open","type":"task","priority":3,"createdAt":"2026-06-12T05:04:05.877Z","updatedAt":"2026-06-12T05:04:05.877Z","labels":["audit","ratchetwatch"]} diff --git a/.seeds/plans.jsonl b/.seeds/plans.jsonl index ee571af2..37342992 100644 --- a/.seeds/plans.jsonl +++ b/.seeds/plans.jsonl @@ -56,7 +56,7 @@ {"id":"pl-f700","seed":"warren-3679","template":"feature","status":"approved","revision":1,"sections":{"context":"Audit (2026-06-10) found warren's logging hygiene is good (pino, no stray console.*, X-Request-ID child loggers on HTTP) but coverage has specific holes that make real operator questions unanswerable: src/runs/spawn/dispatch.ts has zero log statements and its rollback swallows finalize/destroy failures with bare catch {} — a run that dies at spawn shows cancelled with no failure_reason, no system event, and no log line; there is no HTTP access log and auth denials are returned silently (cannot see traffic, error rates, or token guessing); warren-af76: the bridge reconnect loop spins forever on burrow socket timeouts without finalizing the run; run-lifecycle code uses an optional-logger pattern (input.logger?.info?.()) so a missed wiring silently drops a subsystem's logs; runId is hand-threaded into log objects instead of bound once via child loggers; docker-compose.yml sets no logging driver options so container logs grow unbounded. Note: cost/token data is NOT a gap — claude/pi usage is already parsed and persisted to runs.cost_usd / tokens_* (src/runs/usage-aggregate.ts, stats.ts).","approach":"Quick wins first (spawn instrumentation + durable spawn_failed trail, access/auth-denial logging, log rotation, the af76 correctness fix), then the structural pass (per-run child loggers, non-optional loggers, naming convention) that prevents the same class of gap recurring. Persisting spawn failures mirrors the existing reap_failed system-event pattern (src/runs/reap/run.ts:64-69) so the UI surfaces them with no UI work.","alternatives":[{"name":"Adopt OpenTelemetry tracing now","rejected_because":"Heavyweight for a single-process control plane; structured pino with bound run_id/request_id answers the actual operator questions, and OTel can layer on later"},{"name":"Add a /metrics Prometheus endpoint in this pass","rejected_because":"Deferred: the data lives in SQLite and a periodic stats log line covers the home-server case; a metrics endpoint is a follow-up if a real scraper appears"}],"steps":[{"title":"Instrument spawnRun (src/runs/spawn/dispatch.ts): log placement result, provision + dispatch success and latency, and every rollback branch including the currently-swallowed runs.finalize and burrow-destroy failures, carrying run_id and the caller's request_id","type":"task","priority":1,"blocks":[2]},{"title":"Persist spawn failures durably: emit a spawn_failed system event into the events table and set runs.failure_reason instead of a bare cancelled row, mirroring the reap_failed pattern so RunDetail shows the cause for free","type":"task","priority":1,"blocks":[]},{"existing_seed":"warren-af76","blocks":[]},{"title":"HTTP access log + auth-denial logging in src/server/server.ts handleRequest: one info line per request (method, path, status, duration_ms; request_id already bound) and a warn on every denyResponse; add docker-compose json-file logging driver max-size/max-file rotation","type":"task","priority":2,"blocks":[]},{"title":"Structural logger pass: bind logger.child({run_id, burrow_run_id, worker}) once in bridge/reap/detector entry points instead of hand-threading fields; make loggers non-optional in run-lifecycle inputs (required narrow logger + noop default); standardize on the dotted subsystem.action event-name convention repo-wide","type":"task","priority":2,"blocks":[6]},{"title":"Defense and visibility: pino redact config for token-shaped fields (GITHUB_TOKEN, bearer values); periodic operational stats log line (runs by state, active bridges, tick durations, cost aggregates) from data already in SQLite","type":"task","priority":3,"blocks":[]}],"risks":["Access logging on the NDJSON streaming endpoints must log at response-start or completion without buffering the stream","Making loggers non-optional touches many call sites and test fixtures at once; the noop default keeps tests quiet but must not mask the new required wiring in production paths","warren-af76 is a correctness fix in the bridge reconnect path — the failure mode (burrow socket timeout vs burrow run genuinely missing) must be distinguished carefully to avoid finalizing live runs"],"acceptance":["A run that fails at spawn shows a spawn_failed system event and a failure_reason in the API/UI, and the log line carrying its run_id and request_id is greppable","Every HTTP request produces exactly one access-log line including status and duration; bad-token requests produce a warn","A burrow socket-timeout during a bridge no longer loops forever: the run reaches a terminal state with a logged resolution trail (warren-af76 closed)","All run-lifecycle log lines carry run_id via child loggers; grep by run_id reconstructs the full lifecycle of any run","docker compose logs are rotation-bounded","bun run check:all passes"]},"children":["warren-c686","warren-fc6e","warren-af76","warren-26c2","warren-9f06","warren-b2dd"],"createdAt":"2026-06-11T04:45:11.739Z","updatedAt":"2026-06-11T04:45:11.739Z","name":"Logging and traceability hardening","adoptedChildren":["warren-af76"]} {"id":"pl-55df","seed":"warren-e8d1","template":"feature","status":"approved","revision":1,"sections":{"context":"The operator drives serial plan execution today with os-eco/scripts/run-plan.sh (local claude -p per open child of an sd plan, human-readable .log + raw .jsonl artifacts). Warren's server already has the full surface to replace it in the cloud: POST /plan-runs walks a seeds plan's children serially gated on PR merge, GET /plan-runs/:id returns planRun+children+runs in one round trip, POST /plan-runs/:id/cancel exists, and GET /plan-runs/:id/events?follow=1 is an NDJSON union stream of all children that picks up newly dispatched children mid-stream and carries plan_run.* lifecycle events. A typed WarrenClient SDK already ships in src/client/ (WARREN_BASE_URL + WARREN_API_TOKEN, bearer auth, NDJSON streaming for per-run events) but is missing cancelPlanRun and streamPlanRunEvents, and no CLI command uses it — every existing command opens the DB directly via withCliDb. Zero server work is needed.","approach":"Extend WarrenClient with the two missing plan-run methods (sharing the existing NDJSON reader with streamRunEvents) plus a waitForPlanRun mirroring waitForRun, then add a `warren plan run ` command under a new `plan` subcommand group (pattern: existing config/db groups) as the first thin-HTTP-client CLI command: probe() for a friendly unreachable error, POST the plan-run, tail the union stream, and exit with the terminal state code. Ship two output modes: raw NDJSON (matching warren run's contract) and a human-readable renderer porting run-plan.sh's jq event filter so tail -f muscle memory transfers. Resume semantics come free from the server (re-POST skips closed children).","alternatives":[{"name":"Standalone mode against the local DB like warren run","rejected_because":"Rejected by the operator: thin client over HTTP only"},{"name":"Server-side no-PR-gate mode to match run-plan.sh's commit-chain semantics","rejected_because":"Deferred: the mandatory PR-merge gate is the operator's existing taste-gate formalized; revisit only if merge ceremony proves too heavy in practice"},{"name":"SSE instead of NDJSON for tailing","rejected_because":"The server already speaks NDJSON and the client reader exists"}],"steps":[{"title":"Extend src/client/ WarrenClient: cancelPlanRun(id), streamPlanRunEvents(id, {follow, signal}) reusing a shared NDJSON reader with streamRunEvents (note: no sinceSeq on the plan-run stream — document client-side dedupe by (runId, seq) on reconnect), and waitForPlanRun using isTerminalPlanRunState; tests in src/client/","type":"task","priority":2,"blocks":[2]},{"title":"New CLI command src/cli/commands/plan-run.ts + `plan` subcommand group in src/cli/main.ts: warren plan run --project --agent [--prompt-template --ref --provider --model --plot --no-follow] built on WarrenClient.fromEnv (first command without withCliDb); probe first, POST, print {planRun, children} summary, tail events, map terminal state to exit code (succeeded=0 else 1); SIGINT stops tailing without cancelling, second SIGINT exits (mirroring warren run), explicit `warren plan cancel ` for cancellation","type":"task","priority":2,"blocks":[3]},{"title":"Human-readable renderer: port run-plan.sh's jq event filter to TS as a --output pretty mode (timestamps, assistant/thinking/tool_use/tool_result/result blocks with truncation, plan_run.* lifecycle lines for child advancement and merge waits); default stays NDJSON for pipeline parity","type":"task","priority":2,"blocks":[4]},{"title":"Round out the surface: warren plan status (one-shot GET rendering child-state table with cost/duration per child from runs rows) and warren plan list [--project --state]; README Client SDK + CLI docs updates; command tests","type":"task","priority":3,"blocks":[]}],"risks":["The plan-run union stream has no ?since= replay — a dropped connection requires client-side dedupe by (runId, seq) on reconnect or progress display can double-print","Progress stalls silently if the server's GITHUB_TOKEN is missing or WARREN_AUTO_OPEN_PR is disabled (children eventually fail child_succeeded_without_pr); the pretty renderer should surface waiting_for_merge states loudly so the operator knows to go merge","WARREN_API_TOKEN doubles as the server's own token in a shared shell; acceptable for a single-operator deployment but worth a doc note"],"acceptance":["warren plan run --project --agent claude-code against a remote warren dispatches the plan-run and live-tails all children's events on one connection, exiting 0 on success and 1 on failure or cancellation","Re-running the same command after a failure resumes from the next open child (server re-POST semantics verified end-to-end)","--output pretty renders a human log equivalent to run-plan.sh's .log format including plan-level lifecycle lines","warren plan status and warren plan list render child states; warren plan cancel cancels an in-flight plan-run","bun run check:all passes; no server route changes (gen:docs / gen:openapi untouched)"]},"children":["warren-fcc8","warren-ec6a","warren-ae0a","warren-5e3f"],"createdAt":"2026-06-11T04:45:11.819Z","updatedAt":"2026-06-11T04:45:11.819Z","name":"warren plan run thin client"} {"id":"pl-1ec1","seed":"warren-58dc","template":"feature","status":"approved","revision":1,"sections":{"context":"The operator runs agents at the root of ~/Projects/kota-monorepo (many intertwined projects, one git repo) more often than inside individual projects. Investigation (2026-06-10) found warren supports this today with zero code changes: a project is one repo, burrow materializes workspaces as git worktrees of warren's host clone, agents run at repo root, and one branch/one PR per run is the natural monorepo shape. Opt-in features light up from root-level .seeds/, .mulch/, .plot/, .warren/. The rough edges are coarseness, not blockers: feature detection probes the clone root only (src/projects/refresh.ts:73-81), there is no sparse/partial-clone support (full checkout per concurrent run), one preview config and one pr-template for the whole repo, and git-hooks arming reads only the root package.json. True multi-repo burrow workspaces were sized at several weeks across burrow SPEC + dispatch + reap + plan-run PR gating and deliberately deferred; ROADMAP R-20 Colonies covers cross-repo policy grouping without joint workspaces.","approach":"Onboard kota-monorepo as a single warren project now and validate the full loop (dispatch, plan-run, PR) against it, then land the cheap coarseness fixes only as pain is proven: partial clone + sparse-checkout first if checkout time/disk bites, and an R-20 colonies decision spike for the genuinely separate os-eco repos. Explicitly do not build multi-repo workspaces in this arc.","alternatives":[{"name":"True multi-repo burrow workspaces (N clones per sandbox)","rejected_because":"Deferred: several weeks across the most failure-sensitive paths (burrow SPEC, dispatch, reap, plan-run PR gating), and the monorepo path removes the primary motivation"},{"name":"Splitting kota-monorepo packages into separate warren projects","rejected_because":"The projects are closely intertwined and the operator's working habit is monorepo-root runs"}],"steps":[{"title":"Onboard kota-monorepo: add as a warren project, place .warren/config.yaml (defaultRole, defaultPrompt, runBranchPrefix) plus .seeds/.mulch/.plot/ at the repo root, dispatch a smoke run and a small plan-run end-to-end, and record the validated setup + any friction as mulch records","type":"task","priority":2,"blocks":[2]},{"title":"Measure and decide on sparse/partial clone: capture clone + per-run worktree checkout time and disk for kota-monorepo under concurrent runs; if it bites, implement --filter=blob:none on warren's clone (src/projects/clone.ts, refresh.ts) and a per-project sparsePaths burrow applies after worktree add (burrow src/provider/local/workspace.ts + sandbox binds); if it does not bite, close with the measurements recorded","type":"task","priority":3,"blocks":[]},{"title":"R-20 Colonies decision spike for os-eco's separate repos: evaluate the ROADMAP sketch (colonies/colony_members tables, shared scheduling, aggregate dashboard, colony-level agents) against actual cross-repo needs observed after the monorepo + plan-run CLI workflows settle; outcome is a go/no-go note on the roadmap item, not code","type":"task","priority":4,"blocks":[]}],"risks":["kota-monorepo's on-disk size is unverified — if very large, full checkouts per concurrent run could hurt before step 2's measurements land","Root-level .seeds/ means all packages share one issue queue and one PR stream; acceptable by design but per-package scoping pressure may emerge","Sparse-checkout in burrow touches the sandbox bind set and worktree lifecycle — needs burrow-side review against ../burrow/SPEC.md before implementation"],"acceptance":["kota-monorepo is a registered warren project and a plan-run executed against it end-to-end (dispatch, child PRs, merge-gated advancement)","A documented measurement of clone/checkout cost exists with an explicit go/no-go on sparse support","R-20 has a recorded decision note","Any code changes pass bun run check:all (and burrow's gates if sparse lands)"]},"children":["warren-b32d","warren-4c45","warren-2fa8"],"createdAt":"2026-06-11T04:45:11.903Z","updatedAt":"2026-06-11T04:45:11.903Z","name":"Monorepo onboarding (kota-monorepo)"} -{"id":"pl-cf2a","seed":"warren-16d2","template":"feature","status":"approved","revision":1,"sections":{"context":"warren is the os-eco reference L5 repo and originated the check:ci-parity gate, but its check:all is a 12-gate && chain using verbose names (check:file-sizes, check:debt-markers, check:duplicates, validate:agents-md) that diverge from the canonical terse vocabulary frozen in docs/check-all-standard.md (os-eco-9048) and shipped as the portable runner in templates/l5-toolkit/scripts (os-eco-5db7). warren has no quiet runner (it chains with &&) and no `verify` alias. Conforming warren makes the reference repo match the standard it anchors. Depends on root tracker pl-760e steps 1-2.","approach":"Rename warren's verbose gate keys to canonical, swap the && chain for the byte-identical quiet runner, and refactor warren's existing check-ci-parity.ts to import the shared GATES manifest. Keep warren's repo-specific conditional gates (check:bundle-size, gen:docs:check, gen:openapi:check) under their canonical names, slotted before check:coverage with check:ci-parity last.","steps":[{"title":"Rename warren's verbose package.json gate keys to the canonical terse names: check:file-sizes -> check:size, check:debt-markers -> check:debt, check:duplicates -> check:dups, validate:agents-md -> check:agents (keep check:deps, check:coverage, check:bundle-size, gen:docs:check, gen:openapi:check as-is). Update every reference to the old keys in CLAUDE.md, .github/workflows/ci.yml, .github/workflows/ci-postgres.yml, and any scripts; retain each old key as a one-cycle deprecated alias only if an external consumer needs it. Do not yet touch check:all.","type":"task","priority":2,"blocks":[2]},{"title":"Replace warren's && -chain check:all with the canonical scripts/check-all.ts quiet runner copied byte-identical from templates/l5-toolkit/scripts/check-all.ts. Define warren's exported GATES manifest in the standard's order: lint, typecheck, check:agents, check:dups, check:deps, check:size, check:debt, check:bundle-size, gen:docs:check, gen:openapi:check, check:coverage, check:ci-parity (last). Set package.json check:all to `bun scripts/check-all.ts`, add `verify`: `bun run check:all`, and add scripts/check-all.test.ts. Confirm the quiet-output contract (one aligned line per gate, signatures-only on failure).","type":"task","priority":2,"blocks":[3]},{"title":"Refactor warren's existing scripts/check-ci-parity.ts to import the GATES array from scripts/check-all.ts as the single source of truth (replacing its own ROOT_GATES/local derivation), and confirm it scans BOTH .github/workflows/ci.yml and ci-postgres.yml. Run `bun run check:all` and `bun run verify` green end-to-end, confirm check:ci-parity passes with the new manifest, and update CLAUDE.md's Quality Gates section to describe the runner + verify alias.","type":"task","priority":2,"blocks":[]}],"acceptance":["warren package.json uses canonical terse gate names; no verbose key remains except optional documented one-cycle aliases.","check:all is `bun scripts/check-all.ts` with a byte-identical runner and a GATES manifest in the standard's order; verify aliases check:all.","scripts/check-ci-parity.ts imports GATES from check-all.ts and passes against ci.yml + ci-postgres.yml.","`bun run check:all` runs green with the quiet-output contract."]},"children":["warren-c123","warren-0628","warren-3a0f"],"createdAt":"2026-06-12T04:53:28.716Z","updatedAt":"2026-06-12T04:53:28.716Z","name":"Adopt canonical check:all standard (warren)"} +{"id":"pl-cf2a","seed":"warren-16d2","template":"feature","status":"done","revision":1,"sections":{"context":"warren is the os-eco reference L5 repo and originated the check:ci-parity gate, but its check:all is a 12-gate && chain using verbose names (check:file-sizes, check:debt-markers, check:duplicates, validate:agents-md) that diverge from the canonical terse vocabulary frozen in docs/check-all-standard.md (os-eco-9048) and shipped as the portable runner in templates/l5-toolkit/scripts (os-eco-5db7). warren has no quiet runner (it chains with &&) and no `verify` alias. Conforming warren makes the reference repo match the standard it anchors. Depends on root tracker pl-760e steps 1-2.","approach":"Rename warren's verbose gate keys to canonical, swap the && chain for the byte-identical quiet runner, and refactor warren's existing check-ci-parity.ts to import the shared GATES manifest. Keep warren's repo-specific conditional gates (check:bundle-size, gen:docs:check, gen:openapi:check) under their canonical names, slotted before check:coverage with check:ci-parity last.","steps":[{"title":"Rename warren's verbose package.json gate keys to the canonical terse names: check:file-sizes -> check:size, check:debt-markers -> check:debt, check:duplicates -> check:dups, validate:agents-md -> check:agents (keep check:deps, check:coverage, check:bundle-size, gen:docs:check, gen:openapi:check as-is). Update every reference to the old keys in CLAUDE.md, .github/workflows/ci.yml, .github/workflows/ci-postgres.yml, and any scripts; retain each old key as a one-cycle deprecated alias only if an external consumer needs it. Do not yet touch check:all.","type":"task","priority":2,"blocks":[2]},{"title":"Replace warren's && -chain check:all with the canonical scripts/check-all.ts quiet runner copied byte-identical from templates/l5-toolkit/scripts/check-all.ts. Define warren's exported GATES manifest in the standard's order: lint, typecheck, check:agents, check:dups, check:deps, check:size, check:debt, check:bundle-size, gen:docs:check, gen:openapi:check, check:coverage, check:ci-parity (last). Set package.json check:all to `bun scripts/check-all.ts`, add `verify`: `bun run check:all`, and add scripts/check-all.test.ts. Confirm the quiet-output contract (one aligned line per gate, signatures-only on failure).","type":"task","priority":2,"blocks":[3]},{"title":"Refactor warren's existing scripts/check-ci-parity.ts to import the GATES array from scripts/check-all.ts as the single source of truth (replacing its own ROOT_GATES/local derivation), and confirm it scans BOTH .github/workflows/ci.yml and ci-postgres.yml. Run `bun run check:all` and `bun run verify` green end-to-end, confirm check:ci-parity passes with the new manifest, and update CLAUDE.md's Quality Gates section to describe the runner + verify alias.","type":"task","priority":2,"blocks":[]}],"acceptance":["warren package.json uses canonical terse gate names; no verbose key remains except optional documented one-cycle aliases.","check:all is `bun scripts/check-all.ts` with a byte-identical runner and a GATES manifest in the standard's order; verify aliases check:all.","scripts/check-ci-parity.ts imports GATES from check-all.ts and passes against ci.yml + ci-postgres.yml.","`bun run check:all` runs green with the quiet-output contract."]},"children":["warren-c123","warren-0628","warren-3a0f"],"createdAt":"2026-06-12T04:53:28.716Z","updatedAt":"2026-06-12T05:46:06.641Z","name":"Adopt canonical check:all standard (warren)","outcome":"success"} {"id":"pl-9796","seed":"warren-027f","template":"refactor","status":"done","revision":1,"sections":{"context":"Nightwatch patrol of 2026-06-11. Quality gates are green (bun test: 2632 pass / 0 fail, bun run lint, bun run typecheck all clean). Two small, in-scope quality issues surfaced during the scan: (1) a doc-comment table count in the SQLite schema header has drifted from the actual schema, and (2) two query-param validation errors in the runs list handler omit the standard '; got ' suffix that every other enumerated/literal validation error in src/server/handlers carries. Both are low-risk, single-PR fixes that improve doc accuracy and error-message consistency without touching behavior.","behavior_invariant":"No runtime behavior changes. The SQLite/Postgres physical schema (table names, columns, FKs, indexes) stays byte-identical — only a header doc comment changes. For the validation-message fix, the same inputs that throw ValidationError today must still throw ValidationError (same 400 status, same field), and the same valid inputs must still pass; only the human-readable error string gains the '; got ' suffix to match sibling messages. All existing tests must continue to pass; any test asserting the exact error strings for ?sort / ?dir is updated in lockstep.","approach":"Two independent, mechanical edits, each landing as its own PR, followed by the standard release step. Neither edit changes a public API signature, adds dependencies, or reorganizes modules. Order is irrelevant between the two fixes (no shared files), so they run in parallel; the release step is gated on both.","steps":[{"title":"Fix 'Seven tables' doc drift in src/db/schema/sqlite.ts header","type":"task","priority":3,"labels":["nightwatch"],"blocks":[3]},{"title":"Add '; got ' suffix to ?sort/?dir validation errors in runs/lifecycle.ts","type":"task","priority":3,"labels":["nightwatch"],"blocks":[3]},{"title":"Release: run /release per .claude/commands/release.md","type":"task","priority":3,"labels":["nightwatch"],"blocks":[]}],"acceptance":["src/db/schema/sqlite.ts header comment names the correct table count (12) and lists the tables actually defined in the file (agents, projects, runs, events, triggers, workers, burrows, planRuns, planRunChildren, plots, conversations, messages); no schema definitions changed; src/db/schema/drift.test.ts still passes.","src/server/handlers/runs/lifecycle.ts:19 and :26 throw ValidationError messages ending in \"; got ''\" (using the rejected raw value), matching the ?limit/?offset messages directly below them and the conversations ?status message; behavior (status code, which inputs are rejected) is unchanged.","Full quality suite is green: bun test, bun run lint, bun run typecheck all pass; any test asserting the old ?sort/?dir strings is updated to match.","Release step completes per .claude/commands/release.md after both fixes merge."]},"children":["warren-897b","warren-879c","warren-e338"],"createdAt":"2026-06-11T09:05:59.371Z","updatedAt":"2026-06-11T09:23:51.640Z","name":"nightwatch patrol 2026-06-11: doc-drift + validation-message consistency"} {"id":"pl-a04a","seed":"warren-6702","template":"refactor","status":"done","revision":1,"sections":{"context":"Patrol scan of 2026-06-12 found one coherent cross-call-site inconsistency: warren parses \"is this env flag truthy\" (default-off flags) at least five different ways with divergent accepted-token sets and case handling, so the same conceptual flag behaves differently depending on which subsystem reads it.\n\nDefault-OFF (truthy) parsers, today:\n- src/server/probe.ts:75 — inline `disabledRaw === \"1\" || disabledRaw === \"true\"` for WARREN_WORKER_PROBE_DISABLED. Case-sensitive, no trim, accepts only 1/true. `=TRUE`, `=Yes`, `=on` are silently ignored.\n- src/server/config.ts:128 — inline `disabled === \"1\" || disabled === \"true\"` for WARREN_DISABLE_UI. Case-sensitive, accepts only 1/true (the doc comment at config.ts:20 even says \"'1'/'true' to disable\"), so `=on`/`=yes`/`=TRUE` are ignored.\n- src/supervisor/main.ts:382 `parseBoolEnv` — `raw === \"1\" || raw.toLowerCase() === \"true\"` for WARREN_BURROW_NO_AUTH (controls burrow `--no-auth`). Lowercases only `true`, accepts 1/true, not yes/on.\n- src/server/main/utils.ts:80 `parseTrueEnv` — trim+lowercase, accepts 1/true/yes (NOT on). Used by detector/poller/idle flags.\n- src/preview/eviction/config.ts:74 and src/runs/reap/gc.ts:107 `isTruthy` (byte-identical) — trim+lowercase, accepts 1/true/yes/on.\n\nBy contrast the default-ON (falsy/opt-out) parsers already agree on one set — `0/false/no/off` — across src/triggers/config.ts:63, src/plan-runs/config.ts:77, and src/runs/pr.ts:387. The truthy side should reach the same internal consistency. The canonical target token set is `1/true/yes/on`, trimmed and case-insensitive (the broadest existing behavior, symmetric with the falsy `0/false/no/off`).\n\nThis is a correctness/consistency finding, not new behavior: an operator setting WARREN_WORKER_PROBE_DISABLED=on or WARREN_DISABLE_UI=TRUE today gets surprising no-ops. No public API signature changes; no new module is required (parseTrueEnv already exists as the reference implementation).","behavior_invariant":"Every truthy env-var parse keeps its existing default (absent or empty value => false) and keeps accepting the values it accepts today: `1` and `true` must remain truthy at every call site. The HTTP query-param boolean parser `parseBoolean` in src/server/handlers/index.ts (which intentionally rejects unknown values rather than coercing) is OUT of scope and must NOT change. The default-ON opt-out parsers (triggers/config.ts, plan-runs/config.ts, runs/pr.ts) must NOT change. No route, response shape, or public function signature changes. The only observable difference is that previously-ignored truthy spellings (case variants, surrounding whitespace, and the `yes`/`on` tokens) are now honored uniformly.","approach":"Converge all default-OFF truthy env parsers on a single accepted token set — `1`, `true`, `yes`, `on` — compared after `.trim().toLowerCase()`, matching the existing `isTruthy` in eviction/config.ts and reap/gc.ts and symmetric with the falsy `0/false/no/off` set. Use the already-exported `parseTrueEnv` (src/server/main/utils.ts) as the canonical reference: extend it to also accept `on`, then route the divergent inline parsers through the same token logic. Keep changes minimal and local — do not introduce a new shared module or change any signatures; where a call site cannot cleanly import the canonical helper (e.g. the supervisor, which crosses the warren↔burrow boundary), harmonize its inline token set in place. Split the supervisor change into its own step because WARREN_BURROW_NO_AUTH gates burrow's `--no-auth` and lives across the warren↔burrow boundary that AGENTS.md flags for extra review.","steps":[{"title":"Harmonize app-side truthy env parsers on 1/true/yes/on (case-insensitive)","type":"task","priority":3,"blocks":[3],"labels":["nightwatch"]},{"title":"Harmonize supervisor parseBoolEnv for WARREN_BURROW_NO_AUTH","type":"task","priority":3,"blocks":[3],"labels":["nightwatch"]},{"title":"Release: run /release per .claude/commands/release.md","type":"task","priority":3,"blocks":[],"labels":["nightwatch"]}],"acceptance":["Step 1 — src/server/probe.ts (WARREN_WORKER_PROBE_DISABLED) and src/server/config.ts (WARREN_DISABLE_UI) trim+lowercase their raw value and accept 1/true/yes/on; src/server/main/utils.ts parseTrueEnv also accepts `on` so it, isTruthy (eviction/config.ts, reap/gc.ts), probe.ts and config.ts all recognize the same four tokens. The WARREN_DISABLE_UI doc comment in src/server/config.ts:20 is updated to list the accepted set. New/updated unit tests assert each parser treats `On`, `YES`, ` true `, and `1` as true and `0`/empty/undefined/`off` as false.","Step 2 — src/supervisor/main.ts parseBoolEnv trims, lowercases, and accepts 1/true/yes/on; an added test asserts WARREN_BURROW_NO_AUTH variants (`On`, `YES`, ` true `) enable no-auth while absent/empty/`0`/`false` keep auth on (fail-safe default preserved).","Regression: `1` and `true` remain truthy everywhere they were before; the opt-out parsers (triggers/config.ts, plan-runs/config.ts, runs/pr.ts) and the HTTP query parseBoolean are unchanged.","bun test, bun run lint, and bun run typecheck all pass; bun run check:all is green."]},"children":["warren-c27f","warren-83ae","warren-1b53"],"createdAt":"2026-06-12T09:06:24.600Z","updatedAt":"2026-06-12T09:23:59.362Z","name":"Harmonize truthy boolean env-var parsers"} {"id":"pl-723f","seed":"warren-1339","template":"refactor","status":"done","revision":1,"sections":{"context":"Ratchetwatch patrol 2026-06-12 measured two units of silent ratchet slack (Constitution Article II — 'Ratchets only tighten'). (1) Coverage floors in scripts/coverage-budgets.json sit well below current actuals: functions floor 87.09% vs actual 88.85% (slack 1.76pt), lines floor 90.32% vs actual 91.79% (slack 1.47pt) — both exceed the ~0.75pt debt threshold (measured twice via `bun run check:coverage`, stable). This is the standing finding warren-785b. (2) The file-size grandfather list (scripts/file-size-budgets.json) carries src/server/handlers/plots.list.test.ts at 700 lines — the entry furthest over the global 500-line limit (200 lines over) and not covered by any open seed (its creator warren-3f46 is closed). Both tightenings are mechanical and leave every gate green. Out of scope this patrol: bundle creep is ~7KB gzip over the trailing 7 days (pre-window 2026-05-29 gzip js 297562 -> current 304567), well under the ~20KB threshold and feature-justified (Leveret UI, token KPI charts); debt-marker allowlist is empty; no file-size entries were grandfathered in the last 24h.","behavior_invariant":"No production behavior changes. The coverage gate (`bun run check:coverage`) must still PASS after the floor raise — current actuals (functions 88.85%, lines 91.79%) must remain at or above the new floors (88.60% / 91.54%), preserving the 0.25pt noise margin. The plots.list.test.ts decomposition must preserve every existing test: the same describe/test cases must run and pass after the split (`bun test src/server/handlers/` green), and the HTTP behavior they exercise (GET /plots, GET /plots?filter=needs_attention, GET /plots/needs-attention/count, POST /plots) is unchanged — only the test file is reorganized. No floor may be lowered and no budget raised.","approach":"Two independent, append-only ratchet tightenings landed as separate child steps. Step 1 edits only scripts/coverage-budgets.json, raising the two floors to actual-minus-0.25pt. Step 2 splits the 700-line plots.list.test.ts along its existing describe-block seams into sibling test files each under the 500-line global limit, removes the now-obsolete grandfather entry from scripts/file-size-budgets.json, and runs the Article VI repo-wide old-path search before declaring done. The plan deliberately decomposes exactly ONE file (ratchetwatch's one-per-patrol cap) and adds NO release step (Article III — hygiene batches into the next real release).","steps":[{"title":"Raise coverage floors to current actuals minus 0.25pt margin (addresses warren-785b). In scripts/coverage-budgets.json set \"functions\": 88.60 (was 87.09; actual 88.85) and \"lines\": 91.54 (was 90.32; actual 91.79). Update the _comment's parenthetical baseline note to record the new measurement (functions 88.85%, lines 91.79%, ratchetwatch 2026-06-12). Change nothing else. Verify: `bun run check:coverage` exits 0 and prints functions/lines at or above the new floors.","labels":["ratchetwatch"]},{"title":"Decompose src/server/handlers/plots.list.test.ts (700 lines, furthest-over grandfathered file) below the 500-line global limit. Split along its existing top-level describe seams into sibling files under src/server/handlers/ (e.g. plots.list.test.ts keeps GET /plots; new plots.needs-attention.test.ts for the GET /plots?filter=needs_attention + GET /plots/needs-attention/count blocks; new plots.create.test.ts for the POST /plots block) so every resulting file is < 500 lines and each test still runs. Preserve all shared setup/fixtures (hoist into a sibling helper if duplicated). Then REMOVE the \"src/server/handlers/plots.list.test.ts\": 700 entry from scripts/file-size-budgets.json (do not re-add entries for the new files — they must default-pass under threshold). Per Constitution Article VI, before declaring done run a repo-wide search for the old path across ALL file types — `rg -n \"plots.list.test\" --hidden -g '!node_modules'` plus an explicit sweep of Dockerfile, .github/workflows/*.yml, supervisor/config strings, and docs/ — and fix any reference (file moves have broken production here before; encode the check, do not assume it). Verify: `bun test src/server/handlers/` is green (same test count) AND `bun run check:file-sizes` exits 0.","labels":["ratchetwatch"]}],"acceptance":["scripts/coverage-budgets.json floors are functions 88.60 and lines 91.54; `bun run check:coverage` passes with actuals at or above both.","No coverage floor was lowered and no bundle/file-size budget was raised anywhere in the diff.","src/server/handlers/plots.list.test.ts and every sibling file produced by the split are each under 500 lines; the grandfather entry for plots.list.test.ts is removed from scripts/file-size-budgets.json with no replacement entries added.","`bun test src/server/handlers/` runs the same set of test cases as before the split and is green; `bun run check:file-sizes` exits 0.","The Article VI repo-wide old-path search for `plots.list.test` (including Dockerfile, workflow YAML, config strings, and docs) found and fixed every stale reference, or confirmed there were none.","No release step is included (Article III)."]},"children":["warren-8ff2","warren-8e04"],"createdAt":"2026-06-12T10:45:31.416Z","updatedAt":"2026-06-12T10:59:47.327Z","name":"ratchetwatch tightening: 2026-06-12"} diff --git a/AGENTS.md b/AGENTS.md index b3cc431a..790f3cf2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -77,20 +77,26 @@ bun run ui:install # cd src/ui && bun install Run all checks before committing — warnings count as failures: ```bash -bun run check:all +bun run check:all # or its agent-facing alias: bun run verify ``` -This runs: `check:coverage` (tests + coverage ratchet), `lint`, -`typecheck`, `validate:agents-md`, `check:file-sizes`, -`check:debt-markers`, `check:duplicates` (jscpd), `check:deps`, -`check:bundle-size:build`, `gen:docs:check`, and `gen:openapi:check` -— the same set CI enforces (see -`.github/workflows/ci.yml`). Do not merge with lint warnings; fix at -write time or promote to error in `biome.json`. +`check:all` is the os-eco canonical quiet runner (`scripts/check-all.ts`, +byte-identical to `../templates/l5-toolkit/scripts/check-all.ts` at the +os-eco root — never edit it in place). It prints one aligned status line +per gate and a `12/12 gates passed` tally; on failure it shows parsed +failure signatures plus a `re-run: bun run ` hint +(`CHECK_ALL_VERBOSE=1` streams full output, `--bail` stops early). +Warren's resolved manifest, in order: `lint`, `typecheck`, +`check:agents`, `check:dups` (jscpd), `check:deps`, `check:size`, +`check:debt`, `check:bundle-size`, `gen:docs:check`, `gen:openapi:check`, +`check:coverage` (tests + coverage ratchet), and `check:ci-parity` — +the same set CI enforces (see `.github/workflows/ci.yml`; escape +hatches live in `scripts/ci-parity-config.json`). Do not merge with +lint warnings; fix at write time or promote to error in `biome.json`. Details on the additional checks: -- **`check:file-sizes`** (warren-4553) — enforces a per-file line-count +- **`check:size`** (warren-4553) — enforces a per-file line-count budget. New `.ts`/`.tsx` files under `src/` and `scripts/` must stay ≤ 500 lines; existing oversized files are grandfathered in `scripts/file-size-budgets.json` and may not grow past their frozen @@ -98,14 +104,14 @@ Details on the additional checks: `noExcessiveLinesPerFunction` rule (also 500-line cap) enforces the same budget at the function level, with the same baseline exceptions called out in `biome.json`'s `overrides`. -- **`check:debt-markers`** (warren-7f2b) — scans `src/` and `scripts/` +- **`check:debt`** (warren-7f2b) — scans `src/` and `scripts/` `.ts`/`.tsx` for `TODO` / `FIXME` / `HACK` / `XXX` and fails if any marker lacks a tracker reference on the same line (`warren-XXXX`, `pl-XXXX`, `mx-XXXX`, `#NNN`, or a URL). The ratchet grandfather list lives in `scripts/debt-marker-allowlist.json` and only goes down — pair new markers with an id (or remove them) rather than appending to the allowlist. -- **`validate:agents-md`** — validates that `AGENTS.md` references +- **`check:agents`** — validates that `AGENTS.md` references (`bun run ` commands and backtick-quoted paths) still exist. Biome's `noExcessiveCognitiveComplexity` rule (warren-d3a6, cognitive @@ -121,16 +127,16 @@ entries. extension (`.js`, `.css`) and the largest single chunk's gzipped size. Never hand-edit the budget JSON from Vite's build-log gzip number — it runs ~2KB cooler than this guard, so eyeballed budgets - fail CI. Re-baseline with `bun run check:bundle-size:build --update`, + fail CI. Re-baseline with `bun run check:bundle-size --update`, which writes the authoritative measured numbers: lowering always applies, ordinary growth auto-raises within `AUTO_RAISE_CAP`, and a heavy new dep past the cap needs `WARREN_BUNDLE_SIZE_ALLOW_RAISE=1`. The `bundle-size-autoheal` workflow re-baselines + pushes for you when a PR fails on a within-cap overshoot, so a few-hundred-byte miss never - halts a run. Run `bun run check:bundle-size` against an existing - `src/ui/dist` tree, or `bun run check:bundle-size:build` to build - first; CI uses the explicit `build:ui` + `check:bundle-size` pair so - the build step is visible in logs. + halts a run. The script body carries `--build`, so `bun run + check:bundle-size` is self-contained (frozen-lockfile UI install + + build, then measure); CI additionally keeps an explicit `build:ui` + step so the build is visible in logs. - **`check:coverage`** (warren-e4b1) — wraps `bun test --coverage` (text + lcov reporters) and enforces the floors in @@ -163,7 +169,7 @@ workspace. The fix for a knip hit is almost always `bun remove ` (or `cd src/ui && bun remove `) — only ignore a dep when it's resolved by string at runtime (e.g. a pino transport target). -`check:all` runs `bun run check:duplicates` (warren-61e9), which invokes +`check:all` runs `bun run check:dups` (warren-61e9), which invokes [jscpd](https://github.com/kucherenko/jscpd) over `src/**/*.{ts,tsx}` to detect copy-pasted code. Config lives in `.jscpd.json`: tests, auto-generated migrations (`src/db/migrations/`), drizzle schema @@ -243,8 +249,8 @@ shape change with `WARREN_UPDATE_GOLDENS=1 bun test src/server/responses.golden.test.ts`, then `git diff` the fixtures and commit only the diffs you meant. The directory name mirrors the upstream burrow convention (the `__golden__` fixture dirs under -burrow's parser tree) and is already excluded from `check:file-sizes`, `check:debt-markers`, -`check:duplicates`, and Biome's filename-convention rule — keep new +burrow's parser tree) and is already excluded from `check:size`, `check:debt`, +`check:dups`, and Biome's filename-convention rule — keep new golden directories under the same name so those exclusions keep applying without churn. diff --git a/CHANGELOG.md b/CHANGELOG.md index e4eea7e4..52c344ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- **`quality(dx)`** — adopted the os-eco canonical `check:all` standard + (pl-cf2a; `docs/check-all-standard.md` at the os-eco root). Verbose + gate names renamed to the frozen terse vocabulary: + `check:file-sizes` → `check:size`, `check:debt-markers` → + `check:debt`, `check:duplicates` → `check:dups`, `validate:agents-md` + → `check:agents` (no deprecated aliases kept). `check:all` is now the + byte-identical quiet runner `scripts/check-all.ts` (one aligned line + per gate, signature-only failures, `CHECK_ALL_VERBOSE=1` / `--bail`), + with a new `verify` alias. `scripts/check-ci-parity.ts` is now the + fleet-canonical copy that imports `GATES` from `check-all.ts`; escape + hatches moved to `scripts/ci-parity-config.json`. `check:bundle-size` + now carries `--build` (self-contained frozen-lockfile UI build) and + the separate `check:bundle-size:build` script was removed. + ## [0.8.9] — 2026-06-13 Default-OFF kill-switch env-flag parser harmonization from the nightwatch diff --git a/CLAUDE.md b/CLAUDE.md index 82503197..b5be7ffb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -153,18 +153,49 @@ bun run ui:install # cd src/ui && bun install Run all checks before committing — warnings count as failures: ```bash -bun run check:all +bun run check:all # or its agent-facing alias: bun run verify ``` -This runs: `check:coverage` (tests + coverage ratchet), `lint`, -`typecheck`, `validate:agents-md`, `check:file-sizes`, -`check:debt-markers`, `check:duplicates` (jscpd), `check:deps`, -`check:bundle-size:build`, `gen:docs:check`, and `gen:openapi:check` +`check:all` is `bun scripts/check-all.ts` — the os-eco fleet's canonical +quiet runner (see `docs/check-all-standard.md` at the os-eco root). The +script is **byte-identical** to +`templates/l5-toolkit/scripts/check-all.ts`; never edit it in place — +all per-repo variation comes from `package.json`, which the runner +filters against the frozen canonical gate order. (Both frozen scripts +are exempted from Biome's formatter via a `biome.json` override so the +local formatter can't break byte-identity; the linter still covers +them.) Warren's resolved +manifest (exported as `GATES`) is: `lint`, `typecheck`, `check:agents`, +`check:dups` (jscpd), `check:deps`, `check:size`, `check:debt`, +`check:bundle-size`, `gen:docs:check`, `gen:openapi:check` (warren-b46b: keeps the `docs/openapi.yaml` OpenAPI 3.1 schema in sync -with `ROUTE_TABLE`) — the same set CI enforces (see +with `ROUTE_TABLE`), `check:coverage` (tests + coverage ratchet), and +`check:ci-parity` — the same set CI enforces (see `.github/workflows/ci.yml`). Don't merge with lint warnings; fix at write time or promote to error in `biome.json`. +Output contract ("quiet"): one aligned `<✓|✗> (N.Ns)` line per +gate, then a one-line tally on success (`12/12 gates passed (…s)`). On +failure it prints the failing gate names plus parsed failure signatures +(bun-test `(fail)` lines, tsc/biome errors, budget violations) — never +the full log — and a `re-run: bun run ` hint. Set +`CHECK_ALL_VERBOSE=1` to stream full output; pass `--bail` to stop at +the first failing gate. + +`verify` is the standard agent-facing entry point and is always exactly +`bun run check:all` — neither name may diverge from the other. + +`check:ci-parity` (`bun scripts/check-ci-parity.ts`, also byte-identical +to the template copy) imports `GATES` from `check-all.ts`, parses every +`.github/workflows/ci*.yml` (today `ci.yml` + `ci-postgres.yml`), and +fails when a CI `bun run ` step is not transitively reachable from +the manifest. Per-repo escape hatches live in +`scripts/ci-parity-config.json` — `aliases` (e.g. `check:coverage:ci` → +`check:coverage`) for same-gate-different-reporter variants, `ciOnly` +(`ui:install`, `build:ui`, `report:test-timing`, +`report:quality-metrics`) for intentionally CI-only steps. Justify every +entry in the config's `$comment`; never edit the script itself. + `check:coverage` (warren-e4b1) wraps `bun test --coverage` and enforces the floors in `scripts/coverage-budgets.json` against the "All files" row of Bun's text coverage reporter. CI runs `check:coverage:ci`, which @@ -201,7 +232,8 @@ CI's fresh install.** The build is byte-reproducible across machines — measures the exact same bytes as CI. If your numbers disagree with CI, `rm -rf src/ui/node_modules` and rebuild; don't pad the budget. Never hand-edit the numbers — to re-baseline, run `bun run -check:bundle-size:build --update`, which writes budgets straight from +check:bundle-size --update` (the script body carries `--build`, so it +always builds first), which writes budgets straight from the measured build plus a small churn headroom, using the SAME Node-zlib gzip the guard enforces (so a budget it writes always passes — this is what closes the Vite parity gap; stop copying Vite's cooler number). diff --git a/agents/tastewatch.md b/agents/tastewatch.md index 6f9c5b07..bf11e55b 100644 --- a/agents/tastewatch.md +++ b/agents/tastewatch.md @@ -3,7 +3,7 @@ name: tastewatch description: "Taste auditor: weekly stratified sample of merged work judged against docs/CONSTITUTION.md; report-only, one digest seed, no dispatch authority" runtime: pi provider: anthropic -model: claude-fable-5 +model: claude-opus-4-8 --- ## system diff --git a/biome.json b/biome.json index df38aa0f..4fb5748d 100644 --- a/biome.json +++ b/biome.json @@ -124,6 +124,12 @@ } } } + }, + { + "includes": ["scripts/check-all.ts", "scripts/check-ci-parity.ts"], + "formatter": { + "enabled": false + } } ] } diff --git a/bun.lock b/bun.lock index c460f4fd..ba3e7092 100644 --- a/bun.lock +++ b/bun.lock @@ -24,6 +24,7 @@ "jscpd": "^4.2.4", "knip": "^6.14.2", "typescript": "^6.0.3", + "yaml": "^2.8.2", }, }, }, diff --git a/package.json b/package.json index aa39003d..fad7191d 100644 --- a/package.json +++ b/package.json @@ -42,19 +42,19 @@ "lint": "biome check --error-on-warnings .", "lint:fix": "biome check --write --error-on-warnings .", "typecheck": "tsc --noEmit", - "validate:agents-md": "bun run scripts/validate-agents-md.ts", - "check:file-sizes": "bun run scripts/check-file-sizes.ts", - "check:debt-markers": "bun run scripts/check-debt-markers.ts", - "check:duplicates": "jscpd", + "check:agents": "bun run scripts/validate-agents-md.ts", + "check:size": "bun run scripts/check-file-sizes.ts", + "check:debt": "bun run scripts/check-debt-markers.ts", + "check:dups": "jscpd", "check:deps": "knip --dependencies", - "check:bundle-size": "bun run scripts/check-bundle-size.ts", - "check:bundle-size:build": "bun run scripts/check-bundle-size.ts --build", + "check:bundle-size": "bun run scripts/check-bundle-size.ts --build", "gen:docs": "bun run scripts/generate-docs.ts", "gen:docs:check": "bun run scripts/generate-docs.ts --check", "gen:openapi": "bun run scripts/generate-openapi.ts", "gen:openapi:check": "bun run scripts/generate-openapi.ts --check", - "check:ci-parity": "bun run scripts/check-ci-parity.ts", - "check:all": "bun run check:coverage && bun run lint && bun run typecheck && bun run validate:agents-md && bun run check:file-sizes && bun run check:debt-markers && bun run check:duplicates && bun run check:deps && bun run check:bundle-size:build && bun run gen:docs:check && bun run gen:openapi:check && bun run check:ci-parity", + "check:ci-parity": "bun scripts/check-ci-parity.ts", + "check:all": "bun scripts/check-all.ts", + "verify": "bun run check:all", "db:generate": "bun run db:generate:sqlite && bun run db:generate:postgres", "db:generate:sqlite": "drizzle-kit generate --config=drizzle.config.sqlite.ts", "db:generate:postgres": "drizzle-kit generate --config=drizzle.config.postgres.ts", @@ -73,7 +73,8 @@ "drizzle-kit": "^0.31.10", "jscpd": "^4.2.4", "knip": "^6.14.2", - "typescript": "^6.0.3" + "typescript": "^6.0.3", + "yaml": "^2.8.2" }, "dependencies": { "@os-eco/burrow-cli": "^0.3.12", diff --git a/scripts/check-all.test.ts b/scripts/check-all.test.ts new file mode 100644 index 00000000..e041a6bd --- /dev/null +++ b/scripts/check-all.test.ts @@ -0,0 +1,91 @@ +import { describe, expect, test } from "bun:test"; +import { + CANONICAL_GATES, + extractFailureSignatures, + formatGateLine, + GATES, + loadScripts, + resolveGates, +} from "./check-all.ts"; + +const CANONICAL_ORDER = CANONICAL_GATES.map((g) => g.name); + +describe("check-all", () => { + test("canonical order: lint first, coverage second-to-last, ci-parity last", () => { + expect(CANONICAL_ORDER[0]).toBe("lint"); + expect(CANONICAL_ORDER[CANONICAL_ORDER.length - 2]).toBe("check:coverage"); + expect(CANONICAL_ORDER[CANONICAL_ORDER.length - 1]).toBe("check:ci-parity"); + }); + + test("resolveGates includes every core gate even when scripts are missing", () => { + const gates = resolveGates({}); + expect(gates).toEqual(CANONICAL_GATES.filter((g) => !g.conditional).map((g) => g.name)); + }); + + test("resolveGates includes conditional gates only when defined, preserving order", () => { + const gates = resolveGates({ + "gen:docs:check": "bun run scripts/generate-docs.ts --check", + }); + expect(gates).toContain("gen:docs:check"); + expect(gates).not.toContain("check:bundle-size"); + expect(gates).not.toContain("gen:openapi:check"); + expect(gates.indexOf("gen:docs:check")).toBeGreaterThan(gates.indexOf("check:debt")); + expect(gates.indexOf("gen:docs:check")).toBeLessThan(gates.indexOf("check:coverage")); + }); + + test("resolveGates with all conditionals defined yields the full canonical list", () => { + const gates = resolveGates({ + "check:bundle-size": "x", + "gen:docs:check": "x", + "gen:openapi:check": "x", + }); + expect(gates).toEqual(CANONICAL_ORDER); + }); + + test("GATES is a canonical-order subsequence ending in check:ci-parity", () => { + const indices = GATES.map((g) => CANONICAL_ORDER.indexOf(g)); + expect(indices).not.toContain(-1); + expect([...indices].sort((a, b) => a - b)).toEqual(indices); + expect(GATES[GATES.length - 1]).toBe("check:ci-parity"); + }); + + test("loadScripts tolerates a missing package.json", () => { + expect(loadScripts("/nonexistent/package.json")).toEqual({}); + }); + + test("formatGateLine aligns names and renders status marks", () => { + expect(formatGateLine("ok", "lint", 1.23, 10)).toBe("✓ lint (1.2s)"); + expect(formatGateLine("fail", "check:dups", 0.05, 10)).toBe("✗ check:dups (0.1s)"); + }); + + test("extractFailureSignatures picks bun-test fail lines over noise", () => { + const output = [ + "bun test v1.2.0", + "(pass) suite > passing test [0.10ms]", + "(fail) suite > broken test [0.42ms]", + " expected 1, got 2", + "(fail) suite > other broken test [0.11ms]", + " 12 pass", + " 2 fail", + ].join("\n"); + const sig = extractFailureSignatures(output); + expect(sig).toContain("(fail) suite > broken test [0.42ms]"); + expect(sig).toContain("(fail) suite > other broken test [0.11ms]"); + expect(sig).not.toContain("(pass) suite > passing test [0.10ms]"); + }); + + test("extractFailureSignatures picks tsc error lines", () => { + const output = [ + "src/foo.ts(12,5): error TS2322: Type 'string' is not assignable to type 'number'.", + "Found 1 error.", + ].join("\n"); + expect(extractFailureSignatures(output)[0]).toContain("error TS2322"); + }); + + test("extractFailureSignatures falls back to the output tail", () => { + const output = ["line one", "", "line two", "budget exceeded somehow"].join("\n"); + const sig = extractFailureSignatures(output); + expect(sig.length).toBeGreaterThan(0); + expect(sig).not.toContain(""); + }); +}); diff --git a/scripts/check-all.ts b/scripts/check-all.ts new file mode 100644 index 00000000..d6f9dafe --- /dev/null +++ b/scripts/check-all.ts @@ -0,0 +1,157 @@ +#!/usr/bin/env bun +/** + * Canonical quiet runner for the os-eco fleet `check:all` standard + * (docs/check-all-standard.md at the os-eco root, os-eco-5db7). + * + * This file is BYTE-IDENTICAL across every conforming repo — do not + * edit it in place. Per-repo variation comes exclusively from + * package.json: the runner resolves its gate manifest by filtering the + * canonical ordered gate list against the scripts the host repo + * actually defines. Core gates are mandatory (a repo missing one fails + * the run); conditional gates (check:bundle-size, gen:docs:check, + * gen:openapi:check) run only where package.json defines them. + * + * Output contract ("quiet"): + * - one aligned ` (N.Ns)` line per gate + * - a one-line tally on success + * - on failure: the failing gate names plus parsed failure + * signatures from the captured output — never the full log + * - CHECK_ALL_VERBOSE=1 streams every gate's full output instead + * - --bail stops at the first failing gate + */ + +import { existsSync, readFileSync } from "node:fs"; +import { resolve } from "node:path"; + +const REPO_ROOT = resolve(import.meta.dir, ".."); +const PACKAGE_JSON = resolve(REPO_ROOT, "package.json"); + +export type CanonicalGate = { name: string; conditional: boolean }; + +/** + * The frozen, ordered gate vocabulary. Cheap static gates first, + * conditional gates next, the expensive test+coverage gate + * second-to-last, and the CI-parity meta-gate always LAST so it sees + * the final manifest. + */ +export const CANONICAL_GATES: readonly CanonicalGate[] = [ + { name: "lint", conditional: false }, + { name: "typecheck", conditional: false }, + { name: "check:agents", conditional: false }, + { name: "check:dups", conditional: false }, + { name: "check:deps", conditional: false }, + { name: "check:size", conditional: false }, + { name: "check:debt", conditional: false }, + { name: "check:bundle-size", conditional: true }, + { name: "gen:docs:check", conditional: true }, + { name: "gen:openapi:check", conditional: true }, + { name: "check:coverage", conditional: false }, + { name: "check:ci-parity", conditional: false }, +]; + +type PackageJson = { scripts?: Record }; + +export function loadScripts(packageJsonPath: string = PACKAGE_JSON): Record { + if (!existsSync(packageJsonPath)) return {}; + const pkg = JSON.parse(readFileSync(packageJsonPath, "utf8")) as PackageJson; + return pkg.scripts ?? {}; +} + +/** + * Resolve the host repo's gate manifest: every core gate (whether or + * not the repo defines it — a missing core gate must fail loudly, not + * silently narrow the manifest) plus each conditional gate the repo's + * package.json defines. + */ +export function resolveGates(scripts: Record): string[] { + return CANONICAL_GATES.filter((g) => !g.conditional || scripts[g.name] !== undefined).map( + (g) => g.name, + ); +} + +/** The host repo's resolved manifest — the single source of truth that + * check-ci-parity.ts imports. */ +export const GATES: readonly string[] = resolveGates(loadScripts()); + +export function formatGateLine( + status: "ok" | "fail", + gate: string, + seconds: number, + width: number, +): string { + const mark = status === "ok" ? "✓" : "✗"; + return `${mark} ${gate.padEnd(width)} (${seconds.toFixed(1)}s)`; +} + +const SIGNATURE_RE = + /^\(fail\) |^✗ |error TS\d+|: error |^✖|^Error: |\berror\b.*\bbudget\b|exceeds? .*budget/i; +const MAX_SIGNATURE_LINES = 50; +const TAIL_FALLBACK_LINES = 25; + +/** + * Pull the failure-relevant lines out of a gate's captured output: + * known failure signatures (bun test `(fail)` lines, tsc/biome error + * lines, budget-ratchet violations) when present, otherwise the tail + * of the output. + */ +export function extractFailureSignatures(output: string): string[] { + const lines = output.split("\n"); + const matched = lines.filter((l) => SIGNATURE_RE.test(l.trim())); + if (matched.length > 0) return matched.slice(0, MAX_SIGNATURE_LINES); + return lines.filter((l) => l.trim() !== "").slice(-TAIL_FALLBACK_LINES); +} + +type GateResult = { gate: string; ok: boolean; seconds: number; output: string }; + +function runGate(gate: string, verbose: boolean): GateResult { + const start = performance.now(); + const proc = Bun.spawnSync(["bun", "run", gate], { + cwd: REPO_ROOT, + stdout: verbose ? "inherit" : "pipe", + stderr: verbose ? "inherit" : "pipe", + env: process.env, + }); + const seconds = (performance.now() - start) / 1000; + const output = verbose + ? "" + : `${proc.stdout?.toString() ?? ""}\n${proc.stderr?.toString() ?? ""}`; + return { gate, ok: proc.exitCode === 0, seconds, output }; +} + +function main(): void { + const verbose = process.env.CHECK_ALL_VERBOSE === "1"; + const bail = process.argv.includes("--bail"); + const width = Math.max(...GATES.map((g) => g.length)); + const results: GateResult[] = []; + const overallStart = performance.now(); + + for (const gate of GATES) { + if (verbose) console.log(`\n── ${gate} ──`); + const result = runGate(gate, verbose); + results.push(result); + console.log(formatGateLine(result.ok ? "ok" : "fail", gate, result.seconds, width)); + if (!result.ok && bail) break; + } + + const failures = results.filter((r) => !r.ok); + const totalSeconds = (performance.now() - overallStart) / 1000; + + if (failures.length === 0) { + console.log(`\n${results.length}/${GATES.length} gates passed (${totalSeconds.toFixed(1)}s)`); + return; + } + + console.error( + `\n${failures.length} gate(s) failed: ${failures.map((f) => f.gate).join(", ")}\n`, + ); + for (const f of failures) { + console.error(`── ${f.gate} ──`); + for (const line of extractFailureSignatures(f.output)) console.error(` ${line}`); + console.error(` ↳ re-run: bun run ${f.gate} (or CHECK_ALL_VERBOSE=1 bun run check:all)`); + } + process.exit(1); +} + +if (import.meta.main) { + main(); +} diff --git a/scripts/check-ci-parity.test.ts b/scripts/check-ci-parity.test.ts index 0d1098a1..5ad0e9ba 100644 --- a/scripts/check-ci-parity.test.ts +++ b/scripts/check-ci-parity.test.ts @@ -3,11 +3,13 @@ import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { - checkParity, + type CiInvocation, computeReachable, + evaluateParity, extractBunRunTargets, extractCiInvocations, - listWorkflows, + listCiWorkflows, + loadParityConfig, } from "./check-ci-parity.ts"; describe("check-ci-parity", () => { @@ -18,9 +20,6 @@ describe("check-ci-parity", () => { "typecheck", ]); expect(extractBunRunTargets("bun run check:coverage")).toEqual(["check:coverage"]); - expect(extractBunRunTargets("bun run check:bundle-size:build")).toEqual([ - "check:bundle-size:build", - ]); // Multi-line shell still works. expect(extractBunRunTargets("set -euo pipefail\nbun run lint\nbun run typecheck\n")).toEqual([ "lint", @@ -30,23 +29,22 @@ describe("check-ci-parity", () => { expect(extractBunRunTargets("bun run scripts/foo.ts")).toEqual([]); }); - test("computeReachable walks the script dep graph transitively", () => { + test("computeReachable seeds from the gate manifest and walks transitively", () => { const scripts = { - root: "bun run a && bun run b", - a: "bun run c", - b: "echo hi", - c: "bun run d", - d: "echo leaf", - unrelated: "bun run a", + "check:all": "bun scripts/check-all.ts", + verify: "bun run check:all", + lint: "biome check .", + "check:coverage": "bun run scripts/check-coverage.ts", + "check:size": "bun run check:size:inner", + "check:size:inner": "echo leaf", + unrelated: "bun run lint", }; - const { reachable } = computeReachable(scripts, "root"); - expect([...reachable].sort()).toEqual(["a", "b", "c", "d", "root"]); - }); - - test("listWorkflows finds the repo's ci yamls", () => { - const found = listWorkflows().map((p) => p.split("/").pop()); - expect(found).toContain("ci.yml"); - expect(found).toContain("ci-postgres.yml"); + const reachable = computeReachable(scripts, ["lint", "check:coverage", "check:size"]); + expect(reachable.has("lint")).toBe(true); + expect(reachable.has("check:size:inner")).toBe(true); + expect(reachable.has("check:all")).toBe(true); + expect(reachable.has("verify")).toBe(true); + expect(reachable.has("unrelated")).toBe(false); }); test("extractCiInvocations parses a synthetic workflow", () => { @@ -71,24 +69,61 @@ describe("check-ci-parity", () => { ].join("\n"), ); try { - const invocations = extractCiInvocations(file); + const invocations = extractCiInvocations(file, dir); const scripts = invocations.map((i) => i.script).sort(); expect(scripts).toEqual(["lint", "test:ci", "typecheck"]); + expect(invocations[0]?.workflow).toBe("ci.yml"); } finally { rmSync(dir, { recursive: true, force: true }); } }); - test("current CI workflows are in parity with check:all", () => { - const { failures, invocations } = checkParity(); - // Sanity: we should actually be checking something — empty - // invocations would silently pass. - expect(invocations.length).toBeGreaterThan(0); - if (failures.length > 0) { - const detail = failures - .map((f) => ` ${f.workflow} job=${f.job} step=${f.step}: ${f.script} — ${f.reason}`) - .join("\n"); - throw new Error(`CI parity drift:\n${detail}`); + test("listCiWorkflows returns only ci*.yml, tolerating a missing dir", () => { + expect(listCiWorkflows("/nonexistent/workflows")).toEqual([]); + const dir = mkdtempSync(join(tmpdir(), "ci-parity-wf-")); + try { + writeFileSync(join(dir, "ci.yml"), "jobs: {}"); + writeFileSync(join(dir, "ci-postgres.yml"), "jobs: {}"); + writeFileSync(join(dir, "release.yml"), "jobs: {}"); + const found = listCiWorkflows(dir).map((p) => p.split("/").pop()); + expect(found).toEqual(["ci-postgres.yml", "ci.yml"]); + } finally { + rmSync(dir, { recursive: true, force: true }); } }); + + test("loadParityConfig tolerates a missing config file", () => { + const config = loadParityConfig("/nonexistent/ci-parity-config.json"); + expect(config.aliases).toEqual({}); + expect(config.ciOnly.size).toBe(0); + }); + + test("evaluateParity flags drift and honors aliases + ciOnly", () => { + const inv = (script: string): CiInvocation => ({ + workflow: "ci.yml", + job: "ci", + step: 0, + script, + }); + const reachable = new Set(["check:all", "lint", "check:coverage"]); + const config = { + aliases: { "check:coverage:ci": "check:coverage", "lint:special": "lint:missing" }, + ciOnly: new Set(["report:test-timing"]), + }; + // Reachable directly — passes. + expect(evaluateParity([inv("lint")], reachable, config)).toEqual([]); + // Reachable via alias — passes. + expect(evaluateParity([inv("check:coverage:ci")], reachable, config)).toEqual([]); + // Allowlisted CI-only — passes. + expect(evaluateParity([inv("report:test-timing")], reachable, config)).toEqual([]); + // Unreachable — drift. + const drift = evaluateParity([inv("test:ci")], reachable, config); + expect(drift).toHaveLength(1); + expect(drift[0]?.reason).toContain("not reachable"); + // Aliased to something itself unreachable — drift with the aliased reason. + const aliasDrift = evaluateParity([inv("lint:special")], reachable, config); + expect(aliasDrift).toHaveLength(1); + expect(aliasDrift[0]?.canonical).toBe("lint:missing"); + expect(aliasDrift[0]?.reason).toContain("aliased to"); + }); }); diff --git a/scripts/check-ci-parity.ts b/scripts/check-ci-parity.ts index 8ce70d0c..af557abd 100644 --- a/scripts/check-ci-parity.ts +++ b/scripts/check-ci-parity.ts @@ -1,85 +1,58 @@ #!/usr/bin/env bun /** - * CI <-> `check:all` parity drift detector (warren-6296, plan pl-da5b step 3). + * CI <-> `check:all` parity drift detector — fleet-canonical port of + * warren's original (warren-6296), generalized for the os-eco + * check:all standard (docs/check-all-standard.md, os-eco-5db7). * - * Parses the GitHub Actions workflows under `.github/workflows/` (today - * `ci.yml` + `ci-postgres.yml`) and, for every `bun run ` invoked by - * any `run:` step, verifies that `` is transitively reachable from - * the `check:all` script in `package.json` — i.e. running - * `bun run check:all` locally exercises the same gate that CI does. + * This file is BYTE-IDENTICAL across every conforming repo — do not + * edit it in place. It imports the resolved GATES manifest from + * ./check-all.ts as the single source of truth, parses every + * `.github/workflows/ci*.yml`, and fails when any `bun run ` + * invoked by a CI `run:` step is not transitively reachable from the + * gate manifest — i.e. when CI enforces something `bun run check:all` + * does not exercise locally. * - * Two structured escape hatches keep the detector usable without - * collapsing every CI step into `check:all` verbatim: + * Per-repo escape hatches live OUTSIDE this file, in an optional + * `scripts/ci-parity-config.json`: * - * - `ALIASES` — maps a CI-side script name onto a canonical - * check:all-reachable equivalent. Use this for variants that do - * "the same gate, formatted differently" (e.g. `check:coverage:ci` - * is `check:coverage` plus a JUnit reporter; `check:bundle-size` - * is `check:bundle-size:build` minus the embedded `build:ui`). + * { + * "aliases": { "check:coverage:ci": "check:coverage" }, + * "ciOnly": ["report:test-timing", "report:quality-metrics"] + * } * - * - `CI_ONLY` — explicit allowlist of scripts that are intentionally - * CI-only (summaries / reports / setup steps with no local - * equivalent). Adding to this list is the only sanctioned way to - * diverge — and each entry should be justified. + * - `aliases` maps a CI-side script name onto a canonical + * gate-reachable equivalent. Use for variants that run the same + * gate with a different reporter / preamble (e.g. a junit + * emitter). + * - `ciOnly` is the explicit allowlist of scripts that are + * intentionally CI-only (summaries / setup with no local + * equivalent). Adding here is the only sanctioned way to diverge; + * justify each entry in the config's "$comment". * - * Anything outside those two sinks is treated as drift: either - * `check:all` needs to grow to cover the CI step, the workflow needs - * to invoke a different script, or — if it really is CI-only — - * `CI_ONLY` needs a new entry. - * - * Wired into `bun run check:all` so a PR that adds a CI step without - * also wiring it into the local gate fails before merge. + * Anything outside those two sinks is drift: grow the manifest, change + * the workflow, or add a justified escape-hatch entry. */ import { existsSync, readdirSync, readFileSync } from "node:fs"; import { join, relative, resolve } from "node:path"; -import yaml from "js-yaml"; +import { parse } from "yaml"; +import { GATES } from "./check-all.ts"; const REPO_ROOT = resolve(import.meta.dir, ".."); const WORKFLOWS_DIR = resolve(REPO_ROOT, ".github/workflows"); const PACKAGE_JSON = resolve(REPO_ROOT, "package.json"); +const PARITY_CONFIG = resolve(import.meta.dir, "ci-parity-config.json"); const ROOT_GATE = "check:all"; -/** - * CI-side script names whose semantics are covered (under a different - * name) by something reachable from `check:all`. Add an entry only - * when the two scripts truly run the same gate, just with different - * reporters / preamble / output paths. - */ -const ALIASES: Record = { - // check:coverage:ci is check:coverage + JUnit reporter for the CI - // test-timing summary. The underlying gate (tests + coverage - // ratchet) is identical. - "check:coverage:ci": "check:coverage", - // check:bundle-size (without :build) assumes `src/ui/dist/` already - // exists; check:bundle-size:build calls `bun run build:ui` first via - // spawnSync. The assertion logic — the bundle-size ratchet — is the - // same. check:all uses the self-contained :build variant. - "check:bundle-size": "check:bundle-size:build", -}; +export type ParityConfig = { aliases: Record; ciOnly: ReadonlySet }; -/** - * Scripts CI is allowed to invoke without a check:all-side counterpart. - * These are non-gating (informational summaries) or CI-environment - * setup (UI dep install, UI build prereqs not needed locally because - * the :build variants embed them). - */ -const CI_ONLY: ReadonlySet = new Set([ - // UI workspace install — local `bun install` at the repo root + - // `bun run build:ui` (embedded in check:bundle-size:build) already - // covers this for the gate path. - "ui:install", - // build:ui — invoked by check:bundle-size:build via spawnSync - // rather than `bun run`, so it doesn't show up in the reachability - // walk; CI invokes it explicitly so the build step is visible in - // logs (see warren-5abc design note in check-bundle-size.ts). - "build:ui", - // Reporting steps — they post step-summary panels from artifacts - // produced by the gate proper. Local devs read the same data from - // `coverage/` and `test-results/` directly. - "report:test-timing", - "report:quality-metrics", -]); +type RawParityConfig = { aliases?: Record; ciOnly?: string[] }; + +export function loadParityConfig(configPath: string = PARITY_CONFIG): ParityConfig { + if (!existsSync(configPath)) return { aliases: {}, ciOnly: new Set() }; + const raw = JSON.parse(readFileSync(configPath, "utf8")) as RawParityConfig; + return { aliases: raw.aliases ?? {}, ciOnly: new Set(raw.ciOnly ?? []) }; +} type PackageJson = { scripts?: Record }; @@ -96,35 +69,34 @@ export function extractBunRunTargets(command: string): string[] { return out; } -export function loadScripts(): Record { - const pkg = JSON.parse(readFileSync(PACKAGE_JSON, "utf8")) as PackageJson; +export function loadScripts(packageJsonPath: string = PACKAGE_JSON): Record { + if (!existsSync(packageJsonPath)) return {}; + const pkg = JSON.parse(readFileSync(packageJsonPath, "utf8")) as PackageJson; return pkg.scripts ?? {}; } +/** + * Everything reachable from the gate manifest: the manifest itself, + * the check:all / verify entry points, and the transitive closure of + * `bun run ` references in script bodies. + */ export function computeReachable( scripts: Record, - root: string, -): { reachable: Set; missing: string[] } { + gates: readonly string[], +): Set { const reachable = new Set(); - const missing: string[] = []; - const stack: string[] = [root]; + const stack: string[] = [ROOT_GATE, "verify", ...gates]; while (stack.length > 0) { const name = stack.pop(); if (!name || reachable.has(name)) continue; reachable.add(name); const body = scripts[name]; - if (body === undefined) { - // Root or referenced-but-undefined script. We tolerate - // missing-from-package here only for the root (caller's - // responsibility) — log all of them, the caller decides. - missing.push(name); - continue; - } + if (body === undefined) continue; for (const dep of extractBunRunTargets(body)) { if (!reachable.has(dep)) stack.push(dep); } } - return { reachable, missing }; + return reachable; } type WorkflowStep = { run?: unknown }; @@ -133,10 +105,10 @@ type WorkflowFile = { jobs?: Record }; export type CiInvocation = { workflow: string; job: string; step: number; script: string }; -export function extractCiInvocations(filePath: string): CiInvocation[] { +export function extractCiInvocations(filePath: string, repoRoot: string = REPO_ROOT): CiInvocation[] { const text = readFileSync(filePath, "utf8"); - const doc = yaml.load(text) as WorkflowFile | null; - const workflow = relative(REPO_ROOT, filePath); + const doc = parse(text) as WorkflowFile | null; + const workflow = relative(repoRoot, filePath); const out: CiInvocation[] = []; if (!doc || typeof doc !== "object" || !doc.jobs) return out; for (const [jobName, job] of Object.entries(doc.jobs)) { @@ -153,36 +125,27 @@ export function extractCiInvocations(filePath: string): CiInvocation[] { return out; } -export function listWorkflows(dir: string = WORKFLOWS_DIR): string[] { +/** Gate workflows only (ci*.yml / ci*.yaml) — release/publish + * orchestration is intentionally out-of-band from the per-PR gate. */ +export function listCiWorkflows(dir: string = WORKFLOWS_DIR): string[] { if (!existsSync(dir)) return []; return readdirSync(dir) - .filter((f) => f.endsWith(".yml") || f.endsWith(".yaml")) + .filter((f) => (f.endsWith(".yml") || f.endsWith(".yaml")) && f.startsWith("ci")) .map((f) => join(dir, f)) .sort(); } export type ParityFailure = CiInvocation & { canonical: string; reason: string }; -export function checkParity(): { - invocations: CiInvocation[]; - reachable: Set; - failures: ParityFailure[]; -} { - const scripts = loadScripts(); - const { reachable } = computeReachable(scripts, ROOT_GATE); +export function evaluateParity( + invocations: CiInvocation[], + reachable: ReadonlySet, + config: ParityConfig, +): ParityFailure[] { const failures: ParityFailure[] = []; - const invocations: CiInvocation[] = []; - for (const wf of listWorkflows()) { - // Skip release.yml: release-time orchestration (tag, publish, - // changelog) is intentionally out-of-band from the per-PR gate. - // We only check the gate workflows (anything matching ci*.yml). - const base = wf.split("/").pop() ?? wf; - if (!base.startsWith("ci")) continue; - invocations.push(...extractCiInvocations(wf)); - } for (const inv of invocations) { - const canonical = ALIASES[inv.script] ?? inv.script; - if (CI_ONLY.has(canonical)) continue; + const canonical = config.aliases[inv.script] ?? inv.script; + if (config.ciOnly.has(canonical)) continue; if (reachable.has(canonical)) continue; const reason = canonical === inv.script @@ -190,6 +153,20 @@ export function checkParity(): { : `aliased to "${canonical}", which is not reachable from ${ROOT_GATE}`; failures.push({ ...inv, canonical, reason }); } + return failures; +} + +export function checkParity(): { + invocations: CiInvocation[]; + reachable: Set; + failures: ParityFailure[]; +} { + const reachable = computeReachable(loadScripts(), GATES); + const invocations: CiInvocation[] = []; + for (const wf of listCiWorkflows()) { + invocations.push(...extractCiInvocations(wf)); + } + const failures = evaluateParity(invocations, reachable, loadParityConfig()); return { invocations, reachable, failures }; } @@ -213,13 +190,13 @@ function main(): void { for (const f of failures) console.error(formatFailure(f)); console.error( `\nFix one of:\n` + - ` - Wire the script into the "${ROOT_GATE}" chain in package.json.\n` + - ` - Change CI to invoke a script that is already in the chain.\n` + + ` - Wire the script into the GATES manifest / a gate's script body.\n` + + ` - Change CI to invoke a script that is already reachable.\n` + ` - If the step is intentionally CI-only (summary / setup with no local\n` + - ` equivalent), add the canonical name to CI_ONLY in scripts/check-ci-parity.ts\n` + - ` with a justification comment.\n` + + ` equivalent), add it to "ciOnly" in scripts/ci-parity-config.json with a\n` + + ` justification in the config's "$comment".\n` + ` - If two scripts run the same gate under different names, map the CI name\n` + - ` to its canonical equivalent in ALIASES.`, + ` to its canonical equivalent in "aliases" in scripts/ci-parity-config.json.`, ); process.exit(1); } diff --git a/scripts/ci-parity-config.json b/scripts/ci-parity-config.json new file mode 100644 index 00000000..8da23ea5 --- /dev/null +++ b/scripts/ci-parity-config.json @@ -0,0 +1,22 @@ +{ + "$comment": [ + "Per-repo escape hatches for the byte-identical scripts/check-ci-parity.ts", + "(docs/check-all-standard.md §6). Justify every entry:", + "aliases.check:coverage:ci — check:coverage plus a JUnit reporter for the CI", + " test-timing summary; the underlying gate (tests + coverage ratchet) is identical.", + "ciOnly.ui:install — UI workspace dep install so check:deps (knip) can resolve the", + " src/ui workspace in CI; locally covered by the root bun install + the", + " frozen-lockfile install embedded in check:bundle-size --build.", + "ciOnly.build:ui — explicit UI build step kept in CI so the build is visible in", + " logs (warren-5abc); locally check:bundle-size runs the same build itself via", + " its --build flag (spawnSync, so it never shows up in the reachability walk).", + "ciOnly.report:test-timing — non-gating step-summary panel built from", + " test-results/junit.xml; local devs read the artifact directly.", + "ciOnly.report:quality-metrics — non-gating consolidated code-quality panel", + " appended to $GITHUB_STEP_SUMMARY; enforces nothing (warren-5b95)." + ], + "aliases": { + "check:coverage:ci": "check:coverage" + }, + "ciOnly": ["ui:install", "build:ui", "report:test-timing", "report:quality-metrics"] +} diff --git a/scripts/validate-agents-md.test.ts b/scripts/validate-agents-md.test.ts index 231652f0..e5cb5291 100644 --- a/scripts/validate-agents-md.test.ts +++ b/scripts/validate-agents-md.test.ts @@ -42,11 +42,11 @@ describe("validate-agents-md helpers", () => { test("extractBunRunScripts captures colon-namespaced and hyphenated names", () => { const scripts = extractBunRunScripts([ - "bun run lint && bun run db:generate:sqlite && bun run validate:agents-md", + "bun run lint && bun run db:generate:sqlite && bun run check:bundle-size", ]); expect(scripts.has("lint")).toBe(true); expect(scripts.has("db:generate:sqlite")).toBe(true); - expect(scripts.has("validate:agents-md")).toBe(true); + expect(scripts.has("check:bundle-size")).toBe(true); }); test("extractBacktickedPaths skips non-path tokens", () => { diff --git a/scripts/validate-agents-md.ts b/scripts/validate-agents-md.ts index 74607e2c..a9462446 100644 --- a/scripts/validate-agents-md.ts +++ b/scripts/validate-agents-md.ts @@ -28,6 +28,7 @@ const KNOWN_MISSING_PATHS = new Set([ "src/ui/dist", // built by `bun run build:ui` "src/ui/dist/assets", // built by `bun run build:ui` "../burrow/SPEC.md", // sibling repo, not vendored + "../templates/l5-toolkit/scripts/check-all.ts", // os-eco sibling repo, not vendored ".warren/config.yaml", // per-project file, written at runtime "kebab-case.ts", // naming-convention illustration, not a real file "PascalCase.tsx", // naming-convention illustration, not a real file diff --git a/src/server/__golden__/responses/README.md b/src/server/__golden__/responses/README.md index 1d3b77ae..eeb639f7 100644 --- a/src/server/__golden__/responses/README.md +++ b/src/server/__golden__/responses/README.md @@ -32,8 +32,8 @@ roll the change back and stabilise the producer instead. The directory name mirrors the [burrow parser golden fixtures](https://github.com/jayminwest/burrow) convention (`burrow/src/runtime/parsers/__golden__/`) and is -already excluded from `check:file-sizes`, `check:debt-markers`, -`check:duplicates`, and Biome's filename-convention rule (see +already excluded from `check:size`, `check:debt`, +`check:dups`, and Biome's filename-convention rule (see `scripts/check-file-sizes.ts`, `scripts/check-debt-markers.ts`, `.jscpd.json`, `biome.json`). New golden directories under `src/` should follow the same name so those exclusions keep working.