diff --git a/.console/backlog.md b/.console/backlog.md index 80929ecf..bdbecf9b 100644 --- a/.console/backlog.md +++ b/.console/backlog.md @@ -4,12 +4,7 @@ _Durable work inventory. Update after each meaningful chunk of progress._ ## Cycle 36 updates (2026-05-28 20:36 UTC) -- [x] Board-unblock promoted 20 tasks Backlog→R4AI (GOAL_BACKLOG_PROMOTE, 4 parent improve tasks Done). -- [x] Marked 89fc5782 and 41bcd097 as Done (squash-merged PRs #178/#179 detected by watchdog). -- [x] Created Plane task e820f528: "Goal executor: detect squash-merged branches and auto-mark tasks Done". -- [ ] Validate 0f1612ea (Running, re-executing merged work) completes Done next cycle. -- [ ] Validate 3a3c202f (Blocked concurrency) re-queues and executes after 0f1612ea completes. -- [ ] Monitor e820f528 for watcher-side fix prioritization. +_Completed items archived._ ## In Progress @@ -273,234 +268,27 @@ _Durable work inventory. Update after each meaningful chunk of progress._ ## Recently Completed (Stage Cycles) -- [x] **Import-Error Test Refactoring — Stage 0–4 Complete (2026-05-30)**: - - **Stage 0 (Discovery & Analysis):** Identified 5 import-error test files across the codebase - - test_sb_adapter.py: inline try/except pattern with pytest.skip() - - test_coordinator_cl_wrap.py: named helper function pattern - - test_analyze.py: direct import pattern - - test_startup_wiring.py: module reload with environment manipulation - - test_architecture_cleanup_guards.py: expected exception pattern - - Documented in `.console/STAGE0_DISCOVERY.md` - - Identified 4 fixture extraction candidates: - 1. Basic skip-on-import fixture - 2. Optional import helper fixture - 3. Module reload with environment fixture - 4. Assert module unavailable fixture - - All acceptance criteria met: files identified, patterns documented, common elements extracted - - - **Stage 1 (Fixture Design):** Designed 4 shared pytest fixtures covering all identified patterns - - `optional_import`: Skip test if module unavailable (covers SB adapter & coordinator patterns) - - `require_module`: Assert module is importable; fail test if unavailable (covers analyze pattern) - - `module_with_env`: Re-import with environment changes & sys.modules cleanup (covers startup_wiring pattern) - - `assert_module_unavailable`: Assert module raises ModuleNotFoundError (covers architecture_cleanup_guards pattern) - - Comprehensive fixture API documented with signatures, behavior, and usage examples - - Coverage matrix confirms all 5 test files addressable by these 4 fixtures - - Implementation location determined: `tests/conftest.py` (primary) or `tests/fixtures/import_fixtures.py` (backup) - - Documented in `.console/STAGE1_FIXTURE_DESIGN.md` - - All acceptance criteria met: fixtures API defined, patterns covered, implementation location determined - - - **Stage 2 (Implementation):** Implemented all 4 shared pytest fixtures and comprehensive tests - - Fixtures added to `tests/conftest.py` (primary location) - - `optional_import` (line 35-62): Skip test if module unavailable; supports parametrize + indirect form and direct function call - - `require_module` (line 65-90): Assert module importable; fails test if unavailable - - `module_with_env` (line 93-125): Re-import with environment variables and sys.modules cleanup; automatic restoration - - `assert_module_unavailable` (line 128-140): Assert module raises ModuleNotFoundError - - Created comprehensive test suite in `tests/test_import_fixtures.py` (13 tests) - - TestOptionalImport: 4 tests (existing module, missing module, parametrize indirect, skip behavior) - - TestRequireModule: 3 tests (existing module, missing module, parametrize indirect) - - TestModuleWithEnv: 3 tests (env variable handling, cache clearing, no-clear-cache behavior) - - TestAssertModuleUnavailable: 3 tests (unavailable module, available module failure, multiple assertions) - - Test results: 12 passed, 1 skipped (optional_import indirect missing module) - - Committed in `be87501`: "Implement shared pytest fixtures for import-error tests" - - All acceptance criteria met: fixtures written and committed to conftest.py, tests passing, validation complete - - - **Stage 3 (Refactoring):** Refactored all 5 import-error test files to use shared fixtures - - tests/unit/executors/test_sb_adapter.py: Replaced inline try/except with optional_import fixture - - tests/unit/execution/test_coordinator_cl_wrap.py: Replaced _try_import_coordinator() helper with optional_import fixture - - tests/unit/tuning/test_analyze.py: Replaced direct importlib.import_module() with require_module fixture - - tests/unit/executors/test_startup_wiring.py: Replaced _import_audit_app() helper with module_with_env fixture (3 tests) - - tests/test_architecture_cleanup_guards.py: Replaced pytest.raises(ModuleNotFoundError) with assert_module_unavailable fixture - - Removed all old local fixtures and helper functions (4 functions deleted) - - Verified fixture coverage: all 5 test files now use appropriate shared fixtures - - Test results: 420 passed, 1 skipped (no regressions in executor/execution/tuning tests) - - Committed in `3b2a1f6`: "refactor(tests): Stage 3 — Use shared fixtures for import-error tests" - - All acceptance criteria met: all test files updated, old code removed, consistent fixture usage - - - **Stage 4 (Validation):** Comprehensive validation of refactored tests confirms all working correctly - - Verified all 4 shared fixtures implemented correctly in tests/conftest.py - - Confirmed all 5 import-error test files using correct fixtures: - - optional_import: test_sb_adapter.py, test_coordinator_cl_wrap.py - - require_module: test_analyze.py - - module_with_env: test_startup_wiring.py (3 tests) - - assert_module_unavailable: test_architecture_cleanup_guards.py - - Fixture test suite validation: 12 passed, 1 skipped (comprehensive coverage) - - Test suite verification: 420 executor/execution/tuning tests pass, zero regressions - - Code quality improvements verified: 4 helper functions removed, ~50 lines eliminated - - Created detailed validation report in `.console/STAGE4_VALIDATION.md` - - All acceptance criteria met: full test suite passes, all import-error tests pass, no regressions - - Project complete as of 2026-05-30 - -- [x] **Error Handling Documentation — Stages 0–3 Complete (2026-05-29)**: - -- [x] **Error Handling Documentation — Stages 0–3 Complete (2026-05-29)**: - - **Stage 0 (Assessment, 2026-05-28):** Identified 15 error scenarios across 4 system layers; code locations; current handling - - Documented in `.console/error_handling_assessment.md` - - Identified 3 Stage 1 gaps: operator decision trees, per-backend catalog, executor contracts - - - **Stage 1 (Core Components, 2026-05-29):** Filled all assessment gaps with comprehensive documentation - - `error_handling_recipes.md` (1,100 lines) — 8 step-by-step operator decision trees covering all critical/medium scenarios - - `backend_error_catalog.md` (950 lines) — Per-backend error codes (30+ codes); detection/recovery/escalation - - `executor_failure_contracts.md` (900 lines) — Per-executor (6 types) failure contracts; idempotency guarantees; budget models - - `error_handling_quick_reference.md` (750 lines) — On-call operator cheat sheet; 8 scenarios with tested commands - - - **Stage 2 (Operational Procedures, 2026-05-29):** Documented error handling for operational procedures and edge cases - - `error_scenarios.md` (355 lines) — Catalog of all 15 error scenarios by severity; code locations; detection guidance - - `error_handling_recovery.md` (752 lines) — Comprehensive troubleshooting with diagnosis tree, procedures, escalation path - - `error_message_diagnostics.md` (641 lines) — 23+ error messages mapped to causes/remedies; search index; escalation template - - - **Stage 3 (Integration into Runbook, 2026-05-29):** Integrated all error handling documentation into main watchdog_loop.md - - Created "Error Handling Guide" section in `docs/operator/watchdog_loop.md` - - Document navigation hub explaining when/how to use each error handling resource - - Workflow integration with main loop STEPS (1, 3, 5) for executor failure investigation, error classification, and idempotency checks - - Recovery ownership classification (loop-owned vs operator-escalated) - - Common error patterns table with diagnosis/recovery guidance - - All 15 scenarios referenced with solutions; full navigation linkage; runbook-style formatting - - - **All acceptance criteria met:** - - ✅ Scenarios documented (all 15 with solutions) - - ✅ Failure modes and recovery identified (backend catalog + executor contracts) - - ✅ Code examples provided (tested shell commands in recipes + quick reference) - - ✅ Operator decision trees created (8 recipes + diagnosis tree) - - ✅ Per-backend error handling documented (30+ error codes) - - ✅ Executor-specific contracts formalized (6 types, idempotency guarantees) - - ✅ Quick-reference checklist for common stuck states (8 TL;DR scenarios) - - ✅ Error handling section created in runbook (integrated into watchdog_loop.md) - - ✅ All identified error scenarios included with solutions (navigation hub) - - ✅ Documentation follows runbook style and formatting (markdown consistency) - - ✅ Cross-references and navigation working (relative links throughout) - - - Total: 5,400+ lines of production-ready operator documentation across 7 documents; fully integrated with existing recovery_policy.md and self_healing_model.md +_Completed items archived._ ## Previously In Progress -- [x] **Deriver Transition Coverage — Stages 0–4 Complete (2026-05-27)**: Added comprehensive bidirectional transition coverage to Deriver framework with critical bug fixes. Completed: - - **Stage 0 (Investigation)**: Identified 5 critical gaps across 3 derivers (DependencyDrift, LintDrift, TypeHealth) where reverse transitions were missing - - **Stage 1 (Design)**: Designed 3-level coverage model (backward-compat / unidirectional / bidirectional), parameterized test patterns, insight naming conventions - - **Stage 2 (Implementation)**: Added reverse transition detection to all 3 derivers (recovery, improvement, resolved/regressed insights) - - **Stage 3 (Testing)**: Created comprehensive test infrastructure (`TransitionFixture` helpers) + 22 parameterized test scenarios covering all transition pairs; all 52 tests passing - - **Stage 4 (Integration Review)**: Max-effort code review identified critical mutual-exclusion bug in count-based vs status-based insight emission; applied fixes to lint_drift.py and type_health.py; dependency_drift.py already correct; all code verified to compile - - All code compiles without errors; 100% coverage of identified gaps; critical bugs fixed; ready for merge - -- [x] **Collector JSON Hardening — Stage 4: Security Logging and Observability (2026-05-23)**: Security logging with audit trail and alert conditions for malformed JSON detection. Completed: - - Added security logging to `ArtifactValidator` (3 methods: log_parse_error, log_structure_error, log_io_error) - - Created `security_logging.py` module with alert conditions, metrics tracking, and observability layer - - Defined 4 alert conditions: parse_error_spike (10/5min), structure_error_surge (5/5min), permission_denied_pattern (3/10min), collector_health_degradation (5/5min) - - Applied security logging to 3 critical collectors (dependency_drift, execution_health, validation_history) - - Created comprehensive test suite (17 tests covering logging, metrics, alerts, collector integration) - - Validated log output against security requirements (PII exclusion, format, log levels, mandatory fields) - - All code compiled and ready for merge; documentation complete - -- [ ] **CxRP — review and refine quarantined `ShippingForm` + related OC branch work on `operations-center-testing-branch` (2026-05-11)**: Treat `operations-center-testing-branch` as the temporary quarantine/staging lane for OC-authored cross-repo work. Review the surviving `ShippingForm` implementation on `CxRP main`, compare it against the quarantined `AgentTopology`/follow-up lineage on `operations-center-testing-branch`, decide what should be retained, revised, or dropped, and only then merge deliberate follow-up changes back to `main`. Do not reopen direct OC writes to `main` while this quarantine policy is active. +_Completed items archived._ ## Up Next — Verification Gaps arc -Items where declared architecture has outpaced operationally exercised -architecture. None require new design; all close concrete gaps surfaced -during the post-Hardening verification sweep (2026-05-08). - -- [x] **VideoFoundry platform-manifest pin bump (2026-05-08, VF PR #895 — done)**: PM 1.0.0 was released, VF still pinned `>=0.7,<1.0`, OC's contract-impact hook silently dispatched with `graph_built=False`. Bumped to `>=0.7,<2.0`. graph-doctor now reports `graph_built=True` (11 nodes / 14 edges). OC-side regression test added in `tests/unit/entrypoints/test_graph_doctor.py::TestVersionPinRegression` — asserts that a project manifest pinning an unsatisfiable PM range surfaces explicitly as `status=fail_graph_none` with the constraint named in the warning, so a future "swallow the warning to make doctor green" change can't regress us. -- [x] **SwitchBoard live verification rev (2026-05-08, on `docs/switchboard-live-verification`)**: Brought up `workstation-switchboard` via `compose/profiles/core.yml`. **Found and resolved a real deploy-skew bug**: the previously-running image (built 2026-04-27) shipped an older `/route` returning OC's rich `LaneDecision` shape directly — but OC's `routing/client.py` had since flipped to require the CxRP envelope (`contract_kind: "lane_decision"`, `schema_version: "0.x"`) with no fallback, so every live route request raised `ValueError: Unexpected /route response shape`. Rebuilt the image; current source includes the CxRP serialization. Post-rebuild: `tests/integration/test_routing_live.py` 4/4 pass; full integration suite went from 21 pass / 3 fail / 1 skip → **24 pass / 1 skip**. Shipped `docs/operator/switchboard_live_verification.md` — five-step runbook + four-row failure-mode crib sheet covering the deploy-skew, both wire-format request errors, and the unreachable-service path. No SwitchBoard scope expansion. -- [x] **SourceRegistry — wire it for real, Option B (2026-05-08, on `feat/sourceregistry-real-wiring`)**: Closed the four-revs-of-ducking. Discovered `bind_execution_target` was defined but never called by anything in production — the registry hook was real and worked, but the function lived dead in the tree. Wired `_bound_target_from_decision` into `ExecutionRequestBuilder.build()` so every dispatch resolves provenance against `registry/source_registry.yaml` (best-effort: failures degrade to None, never crash). Coordinator's `_observe_outcome` adds a `metadata.provenance` block when `request.bound_target.provenance` is populated. `ExecutionTrace` gains a `provenance` field forwarded from the record metadata. `operations-center-run-show` renders the new "SourceRegistry provenance" table next to the existing routing block. All four acceptance criteria covered by `tests/unit/execution/test_sourceregistry_wiring.py` (10 tests): (a) real-yaml `kodo` resolves to `ProtocolWarden/kodo`+SHA, (b) bound target carries provenance vs None for unregistered, (c) end-to-end propagation through builder → coordinator → record metadata → trace, (d) failure semantics for missing yaml / missing entry / malformed yaml / no-crash-on-failure. Demo runs (with `demo_stub`, not in registry) correctly render "(no SourceRegistry provenance on this trace)" — None, never fabricated. Suite 3633 pass (+10), 1 skip. -- [x] **WorkStation compose profile smoke per profile (2026-05-08, on `docs/workstation-compose-profile-smoke`)**: Smoked all four profiles. Findings: `core`, `archon`, `dev` ✅ clean; `observability` ❌ broken on first run. Compose references `../../config/observability/{prometheus.yml,grafana/provisioning}` (sibling-of-WorkStation paths under `GitHub/config/observability/`) but those files are never authored — Docker silently creates them as empty directories on first start, which then prevents subsequent starts (`mount: not a directory`). Documented unblock procedure (stop, remove auto-created stubs, author skeleton config files) in the runbook so an operator can repair without hunting. Also surfaced port-3000 collision between Grafana (observability) and Archon (archon) when both profiles are active. Shipped `docs/operator/workstation_compose_smoke.md` — runbook per profile + findings table + tear-down. Verification-only per backlog discipline; the observability fix is the follow-up below. - -- [x] **Ship observability config skeleton — done in WorkStation #16 (2026-05-08, on `chore/retire-observability-bridge-script`)**: WorkStation now ships `config/observability/{prometheus.yml,grafana/provisioning/datasources/prometheus.yaml,README.md}` and the compose mount paths moved from `../../config/...` (sibling-of-WorkStation) to `../config/...` (in-repo). Verified end-to-end: clean boot of `core + observability` produces both prometheus + grafana healthy on first try. OC's bridge script `scripts/observability-first-run.sh` retired (this PR). Runbook (`docs/operator/workstation_compose_smoke.md`) updated to reflect the new clean state, with a small historical note for machines that still have stale root-owned stub dirs from the old layout. The Grafana↔Archon port-3000 collision remains documented as an operational rule rather than a software fix — both default to `:3000` so they can't run simultaneously without a `PORT_ARCHON=3001` override. +_Completed items archived._ ## Up Next — Backend Card Axis Expansion arc (proposed) -ADR 0002 at `docs/architecture/adr/0002-backend-card-axis-expansion.md` -spells out the design + four-rule discipline. Implementation order -follows G4 in the ADR — vocabulary lives in CxRP first, OC consumes -it second. - -- [ ] **CxRP — `AgentTopology` enum**: `single_agent / sequential_multi_agent / dag_workflow / swarm_parallel`. Mirror the `CapabilitySet` naming-guardrail tests (no degree, no size, no quantifier). Tagged release. -- [ ] **CxRP — `ShippingForm` enum**: `local_subprocess / long_running_service / managed_cli / hosted_api`. Same guardrail tests. Tagged release in same release window as `AgentTopology`. -- [ ] **OC — bump CxRP pin** to the release that ships both new enums. -- [ ] **OC — `OrchestrationProfileCard` + `MechanismProfileCard`** in `executors/_artifacts.py`: dataclass + loader + same `_DISALLOWED` enforcement as `load_capability_card`. Reject unknown enum values, reject subjective fields. -- [x] **OC — kodo + archon executor cards** — superseded by team_executor (ADR 0005). kodo and archon removed; team_executor is the active backend. -- [ ] **OC — `executors/catalog/query.py` extensions**: `backends_with_topology(...)`, `backends_with_shipping_form(...)`, mirroring the existing `backends_supporting_capabilities(...)` shape. -- [x] **OC — sweep `recommendations.md` for kodo + archon** — superseded; kodo and archon removed (ADR 0005). -- [ ] **(Follow-up arc, not this one) — author cards for the remaining backends** (`direct_local`, `aider_local`, `openclaw`, `demo_stub`) so every backend has a card folder under `executors/` and the G2 two-backend test holds for `agent_topology` in steady state. -- [ ] **(Follow-up arc, not this one) — synthesized siblings**: `orchestration_profile.synthesized.yaml` derived from observed `runtime_invocation_ref` counts per OC run; declared-vs-observed diff becomes the runtime-truth-reconciliation signal. -- [ ] **(Follow-up arc, not this one) — SwitchBoard rules consuming the new axes**: incoherent-tuple rejection (e.g. `swarm_parallel + managed_cli`); topology-aware lane preferences. Out of scope for the bring-up arc per ADR 0002. +_Completed items archived._ ## Up Next — Glob/Stat Race Condition Guards arc -Harden Collector observer against TOCTOU race condition where files are stat'd twice with a race window between calls. - -- [x] **Stage 1: Design guard mechanism (DONE 2026-05-27)**: Designed metadata capture strategy; detailed implementation roadmap documented - - Design decision: Capture mtime at discovery time; eliminate second stat() call entirely - - Tuple return `(Path, float) | None` from helper methods; callers unpack and use captured mtime - - Guard discovery-time stat() with try-except; skip deleted files gracefully - - Full backwards compatibility verified (signal format & public API unchanged) - - Design document: `.console/MITIGATION_DESIGN.md` - -- [x] **Stage 2: Implement guards in DependencyDriftCollector and CheckSignalCollector (DONE 2026-05-27)** - - Refactored `_latest_dependency_report()` to return `tuple[Path, float] | None` - - Refactored `latest_matching_file()` to return `tuple[Path, float] | None` - - Guarded discovery-time stat() calls with try-except (handles FileNotFoundError and OSError) - - Updated collect() methods to unpack tuple and use captured mtime (eliminates race window) - - Added debug logging for skipped files during discovery - - All existing tests pass (16 tests + 62 hardening tests); backwards compatibility verified - -- [x] **Stage 3: Unit tests for guard mechanism (DONE 2026-05-27)** - - Written 4 guard tests for CheckSignalCollector - - Written 7 guard tests for DependencyDriftCollector - - Verified all race condition scenarios (single/all files deleted, OSError, graceful fallback) - - Confirmed guard mechanism effectiveness (stat called once per file, captured mtime used) - - All 27 collector tests passing; 0 regressions - -- [x] **Stage 4: Integration tests for guard mechanism (DONE 2026-05-27)** - - Written 24 comprehensive integration tests in test_race_condition_guards.py - - Tested happy paths: single file, multiple files, latest file selection by mtime - - Tested race conditions: file deletion during discovery, all files deleted, graceful fallback - - Tested error handling: permission errors, I/O errors, symbolic links, special characters - - Tested concurrent operations: background file deletion during collector runs - - Tested edge cases: large mtime values, empty directories, nested patterns - - All 103 observer tests passing (79 hardening + 24 new race condition); 0 regressions - - Guard mechanism verified: TOCTOU race eliminated, graceful degradation, observer resilience - - Completion report: `.console/STAGE4_COMPLETION.md` - -- [x] **Stage 5: Run full test suite and verify no regressions (DONE 2026-05-27)** - - Executed full test suite: 3576 passed, 5 skipped, 0 failed - - Verified guard mechanism in full integration context - - Confirmed backward compatibility: signal format unchanged, public API unchanged - - Verified no regressions across entire codebase - - Performance acceptable: full suite completes in 26.67 seconds - - Guard mechanism fully operational and production-ready - -- [x] **Stage 6: Update documentation and changelog (DONE 2026-05-27)** - - Created comprehensive design documentation: `docs/design/observer-race-condition-guard.md` - - TOCTOU vulnerability explanation with timeline and attack scenario - - Metadata capture solution design and implementation examples - - Error handling strategy (graceful degradation patterns) - - Testing strategy and comprehensive test coverage - - Operational impact and performance analysis - - Updated CHANGELOG.md with Keep a Changelog format - - Security fix entry documenting race condition elimination - - Changed items (tuple return types in discovery helpers) - - Documentation section linking to design doc - - All acceptance criteria met (documented, changelog added, API/ops docs updated) +_Completed items archived._ ## Up Next — Runtime Observability Hardening arc -Operational/observational polish on top of the validated architecture. -None of these items reopen boundaries. - -- [x] **Archon workflow registration playbook (2026-05-08, on `docs/archon-workflow-registration-playbook`)**: Investigated against the live container and shipped `docs/operator/archon_workflow_registration.md`. The fix isn't shipping a workflow — Archon already bundles ~20 defaults including `archon-assist` — it's registering a *codebase* so `/api/workflows` returns those bundled definitions in a scoped `cwd`. Six-step runbook (compose up → confirm bundled YAML on disk → POST `/api/codebases` (URL or path) → verify `/api/workflows?cwd=$CWD` lists `archon-assist` → run OC dispatch → tear down) plus failure-mode crib sheet covering the four real errors observed during the investigation. No OC code changes. Verified end-to-end against `workstation-archon`: codebase URL-clone returned `default_cwd=/.archon/workspaces/ProtocolWarden/OperationsCenter/source`; cwd-scoped workflows endpoint listed all 20 bundled workflows; kickoff via `/api/workflows/archon-assist/run` returned `{"accepted":true,"status":"started"}`. Full happy-path completion still requires an LLM credential in the container — operator-side, documented. -- [x] **Capacity-exhaustion regression fixture (2026-05-08, on `feat/capacity-exhaustion-regression-fixture`)**: Pinned the real claude-code "You're out of extra usage · resets 4:20am" stdout shape at `tests/fixtures/backends/capacity_exhaustion/claude_code_extra_usage.stdout.txt`. New `tests/unit/backends/test_capacity_classifier_regression.py` runs the classifier against it (asserts a non-None excerpt naming the matched line) and additionally enforces directory↔registry parity so any new fixture must be wired into `KNOWN_FIXTURES`. README in the fixture dir documents the add-a-fixture workflow. -- [x] **`operations-center-run-show ` — single-command provenance reader (2026-05-08, on `feat/oc-run-show`)**: New entrypoint at `operations_center.entrypoints.run_show.main:main`. Resolves a run_id (or unambiguous prefix, git-style) under `/.operations_center/runs`, `$OC_RUNS_ROOT`, or `~/.console/operations_center/runs`; supports `--root ` and `--trace ` overrides. Prints headline + status + summary, then a SwitchBoard-routing table (8 fields) and an RxP runtime-invocation table (6 fields, with on-disk presence annotation for stdout/stderr/artifact paths). `--json` emits the raw trace payload. demo_stub traces correctly render "no runtime_invocation_ref — adapter did not invoke ExecutorRuntime". 7 tests cover happy path / unambiguous prefix / ambiguous-prefix rejection / explicit `--trace` / JSON mode / missing-run / ref-and-routing-absent. Suite 3609 pass (+7). -- [x] **Artifact path staleness checks (2026-05-08, on `feat/artifact-path-staleness-checks`)**: `RunReportBuilder._warnings` now probes `runtime_invocation_ref.{stdout_path,stderr_path,artifact_directory}` at trace-build time and emits per-path warnings of the form `runtime_invocation_ref. no longer exists on disk: ` when the temp dir was reaped between run and trace build. Existence probe is wrapped (`_path_exists`) so a permission error or broken-symlink doesn't crash trace build — OSError is treated as "not present" with the same staleness warning. Demo runs (no `runtime_invocation_ref`) skip the check entirely. 5 new tests under `tests/unit/observability/test_trace_path_staleness.py` cover stdout-only stale, full-dir reap, all-present clean, ref-absent skip, and OSError tolerance. Suite 3614 pass (+5). -- [x] **Routing rationale completeness smoke check (2026-05-08, on `feat/routing-rationale-completeness-smoke`)**: New `operations_center.routing.smoke.assert_decision_complete(decision, *, allow_stub=False)` raising `IncompleteRoutingDecisionError` listing every missing required field; required always = `policy_rule_matched`+`rationale`, required-for-non-stub adds `switchboard_version`. Found and fixed a real propagation gap during this work — `to_cxrp_lane_decision` and `from_cxrp_lane_decision` were silently dropping `switchboard_version` because CxRP has no top-level field for it; both ends now route it through `metadata["switchboard_version"]`. 8 new tests cover the smoke helper (4 missing-field cases, allow_stub bypass, error completeness) plus CxRP round-trip preservation in both directions. Suite 3622 pass (+8). +_Completed items archived._ ## Up Next @@ -514,24 +302,24 @@ None of these items reopen boundaries. - [x] **EffectiveRepoGraph + contract impact wired into production (2026-05-08, on `feat/wire-effective-repo-graph`, PR #90)**: `PlatformManifestSettings` block on `Settings` (enabled/project_slug/project_manifest_path/local_manifest_path); `build_effective_repo_graph_from_settings()` resolves project (explicit → `topology/project_manifest.yaml` convention) + local (explicit → WS `discover_local_manifest()`) and degrades to None on any error. Coordinator gains `_log_contract_impact()` hook called once after policy approval, before adapter dispatch — emits `contract change in affects N consumer(s) [public=P private=Q]: ...` at INFO + merges a `contract_impact` dict (target/affected_count/public_affected/private_affected) into observability metadata. Wired into `entrypoints/execute/main.py`. 16 new tests (7 settings→factory, 7 coordinator hook, 2 partition); full unit suite 2518 pass; ruff + ty clean. -- [x] **the bound managed repo project manifest authored (2026-05-08, VF PR #892)**: `topology/project_manifest.yaml` declares VF as private managed-repo with `OperationsCenter dispatches_to the bound managed repo`. `topology/local_manifest.example.yaml` template; live `topology/local_manifest.yaml` gitignored. Validates clean through PM `load_effective_graph` (10 nodes / 13 edges; VF surfaces with source=project, visibility=private, local annotations applied). +- [x] **the bound managed repo project manifest authored (2026-05-08, a private downstream repo PR #892)**: `topology/project_manifest.yaml` declares a private downstream repo as private managed-repo with `OperationsCenter dispatches_to the bound managed repo`. `topology/local_manifest.example.yaml` template; live `topology/local_manifest.yaml` gitignored. Validates clean through PM `load_effective_graph` (10 nodes / 13 edges; a private downstream repo surfaces with source=project, visibility=private, local annotations applied). -- [x] **Warehouse project manifest authored (2026-05-08, Warehouse PR #1)**: Same shape as VF — private managed-repo node + `OperationsCenter dispatches_to Warehouse` edge. +- [x] **Warehouse project manifest authored (2026-05-08, Warehouse PR #1)**: Same shape as a private downstream repo — private managed-repo node + `OperationsCenter dispatches_to Warehouse` edge. - [x] **File upstream PR for Archon PATCH-001** — superseded; archon backend removed (ADR 0005). Patch is no longer applicable. -- [x] **3-layer manifest primitive — operationally complete (2026-05-08, R1–R4 across PM/VF/Warehouse/OC)**: All 14 DoD items met. R1 schema CI + validate CLI, R2 operator runbooks + example.yaml block, R3 path resolution + slug auto-resolve + `effective` CLI, R4 graph-doctor + integration smoke. PM tagged through v0.5.0. Operator now sees blast-radius warnings on every contract-touching dispatch; failures degrade gracefully; pattern is discoverable from main. +- [x] **3-layer manifest primitive — operationally complete (2026-05-08, R1–R4 across PM/a private downstream repo/Warehouse/OC)**: All 14 DoD items met. R1 schema CI + validate CLI, R2 operator runbooks + example.yaml block, R3 path resolution + slug auto-resolve + `effective` CLI, R4 graph-doctor + integration smoke. PM tagged through v0.5.0. Operator now sees blast-radius warnings on every contract-touching dispatch; failures degrade gracefully; pattern is discoverable from main. -- [x] **R7 — Edge type expansion (2026-05-08, complete across PM/VF)**: +- [x] **R7 — Edge type expansion (2026-05-08, complete across PM/a private downstream repo)**: - **R7.1** PM v0.6.0 (PR #6) — `RepoGraph.who_dispatches_to(repo_id) -> list[RepoNode]`. Promotes existing `DISPATCHES_TO` edge from informational to first-class queryable. No schema change. 4 new tests. - **R7.2** PM v0.7.0 (PR #7) — `RepoEdgeType.BUNDLES_ASSETS_FROM` + `RepoGraph.who_consumes_assets_of(repo_id)`. First new edge type since v0.3 — justified by real query "what breaks if Warehouse changes its asset format?". JSON schemas (platform + project) updated. 3 new tests. - - **R7.3** VF #894 — ` → Warehouse (bundles_assets_from)` edge authored in VF's `topology/project_manifest.yaml`. Bumped PM pin `>=0.3,<1.0` → `>=0.7,<1.0`; CI workflow pin bumped `@v0.4.0` → `@v0.7.0`. Composition smoke: `who_consumes_assets_of('warehouse')` returns `[the bound managed repo]`. + - **R7.3** a private downstream repo #894 — ` → Warehouse (bundles_assets_from)` edge authored in a private downstream repo's `topology/project_manifest.yaml`. Bumped PM pin `>=0.3,<1.0` → `>=0.7,<1.0`; CI workflow pin bumped `@v0.4.0` → `@v0.7.0`. Composition smoke: `who_consumes_assets_of('warehouse')` returns `[the bound managed repo]`. - **Deferred edge types** (per design rule — "add new edge types only when a real query needs them"): `monitors_health_of` (wait for WS health graph), `forks_from` (wait for SourceRegistry cross-graph queries), `triggers_revalidation_of` (subsumed by R5 propagation infra using `depends_on_contracts_from`). - [x] **R6 — Multi-project composition via shell repo (2026-05-08, complete on PM + OC)**: - **R6.1+R6.2** PM v0.8.0 (PR #8) — `_resolve_includes()` recursive loader with cycle detection + depth limit (default 4); collision rules (platform redefinition / sibling collision / shell-vs-sub collision all hard-fail); cross-sub-project edges allowed; sub-projects still can't add platform-to-platform edges. Visibility never widens. JSON schema gains optional `includes: [{name, project_manifest_path}]` on project. 14 new tests. - **R6.3** OC PR #104 — `docs/operator/manifest_authoring.md` gains "Multi-repo project — shell pattern (PM v0.8+)" section: when-to-use-it, file layout, shell example, composition rules table, OC pointing convention, rationale for shell-vs-monolith. - - **NO real shell repo authored** — machinery shipped per audit's recommendation; first suite manifest is operator-driven (when VF + Warehouse logically want to be one suite). + - **NO real shell repo authored** — machinery shipped per audit's recommendation; first suite manifest is operator-driven (when a private downstream repo + Warehouse logically want to be one suite). - **Deferred to v1+ minor**: `suite_id` for stable identities independent of repo path. - [x] **R5 — Cross-repo task chaining (2026-05-08, complete on OC)**: @@ -541,7 +329,7 @@ None of these items reopen boundaries. - **Safety floor**: cross-repo propagation disabled by default; tasks land in Backlog (not Ready for AI); per-pair opt-in for promotion; mandatory observability artifacts; failed task creation recorded but dedup NOT stamped (retries can re-fire); two disable paths (config flag OR workflow toggle). - [ ] **Live + observe phase (no branch — recommended next)**: Per the audit's "stop and live with it" guidance. Watch for: - - Does VF + Warehouse logically want to be one suite repo? (validates R6 in production) + - Does a private downstream repo + Warehouse logically want to be one suite repo? (validates R6 in production) - Does `bundles_assets_from` feel natural when operators ask "what depends on Warehouse?" - Does an actual contract change to CxRP/RxP/PlatformManifest produce useful propagation tasks? - Each is a real-world signal. Machinery is shipped; next move is observation. @@ -564,59 +352,17 @@ None of these items reopen boundaries. ## Done -- [x] **Collector JSON Hardening — Stage 2: Implementation (2026-05-23)**: Hardened Collector against malformed JSON payloads. Completed: - - Created `src/operations_center/observer/validation.py` with `ParseErrorMetadata`, `ArtifactValidator` base class, and per-collector validators (`ExecutionOutcomeValidator`, `RequestValidator`, `ValidationHistoryValidator`, `DependencyReportValidator`, `LintItemValidator`) - - Fixed critical crash vulnerability in `dependency_drift.py` line 19 (unprotected `json.loads()`) - - Updated all 6 JSON-parsing collectors with two-stage validation (parse + structure) - - Added `parse_errors: ParseErrorMetadata` field to signal models for error tracking - - Implemented consistent logging: DEBUG for parse errors, WARNING for structure errors - - Created comprehensive test suite in `tests/observer/test_collectors_hardening/`: - - conftest.py with shared fixtures - - test_validation_helpers.py (22 tests validating all validator classes) - - test_dependency_drift.py (16 tests for crash fix and edge cases) - - test_execution_health.py (19 tests for malformed artifacts and mixed runs) - - All collectors now gracefully skip malformed artifacts and continue processing - - Ready for Stage 3 (test execution and CI validation) - -- [x] Phase 0: Ground truth audit discovery -- [x] Phase 1: Managed repo config contract — 26 tests -- [x] Phase 2: Artifact contract definition — 119 tests -- [x] Phase 3: Audit toolset contract — 47 tests -- [x] Phase 4: Run identity / ENV injection — 52 tests -- [x] Phase 5: the bound managed repo artifact manifest writing — ManagedRunFinalizer wired in all 5 CLIs -- [x] Phase 6: Dispatch-orchestrated run control -- [x] Phase 7: Artifact index + retrieval -- [x] Phase 8: Behavior calibration -- [x] Phase 9: Fixture harvesting -- [x] Phase 10: Slice replay testing -- [x] Phase 11: Mini regression suite -- [x] Phase 12: Full audit governance -- [x] Rev 1–10 verification passes: all 23 lifetime gaps closed; 14/14 invariants; 2733 tests passing - -- [x] **Deriver Transition Coverage — Stage 0-2 Complete (2026-05-27)**: Reverse transition coverage implemented for 3 derivers. Completed: - - [x] Stage 0: Investigation complete — 5 critical gaps identified across 3 derivers - - [x] Stage 1: Coverage design — comprehensive design document with phased implementation strategy - - [x] Stage 2: Implementation complete - - DependencyDriftDeriver: added recovery transition detection (not_available→available) - - LintDriftDeriver: added improvement tracking (violation count decrease) and status transitions (violations↔clean) - - TypeHealthDeriver: added improvement tracking (error count decrease) and status transitions (errors↔clean) - - New tests: test_dependency_drift_deriver.py (3 new recovery tests), test_lint_drift_deriver.py (12 tests total), test_type_health_deriver.py (12 tests total) - - All code compiles without syntax errors - - Acceptance criteria met: bidirectional transitions, no regressions, follows existing conventions +_Completed items archived._ ## Cycle 9 updates (2026-05-22) -- [x] Fix kodo→openclaw regression in tests (cb56d53) — unblocked CI -- [x] Update dag_executor/team_executor audit verdicts to CxRP 0.3.1 -- [ ] Custodian/regression-check: add detector for invalid backend enum references in tests (promotion candidate from cycle 9) -- [ ] PR cb56d53 merge: oc-watchdog/20260522-0137-fix-kodo-removal-regressions → main + +_Completed items archived._ ## Cycle 28 updates (2026-05-23) -- [x] Fix workspace-prep ordering bug: checkout base branch before bootstrap/baseline so a tracked `.baseline-validation.json` on the clone default branch can't dirty the tree and abort `git checkout ` (workspace.py). Was blocking ALL OC goal tasks (e.g. bfb289b3). Goal worker restarted with fresh code. -- [ ] HYGIENE: untrack `.baseline-validation.json` from OperationsCenter main on the GitHub remote + add to .gitignore (operator/remote-push; impact already neutralized by the workspace.py reorder). + +_Completed items archived._ ## Cycle 36 updates (2026-05-27) -- [x] Board-unblock healed 14 tasks: 13 IMPROVE_UNBLOCK (stale Blocked >4h → Backlog) + 1 STALE_IN_REVIEW (0f1612ea InReview→Backlog). Key tasks re-queued: 3a3c202f "Harden Collector" + 0f1612ea "Handle Optional observed_at". -- [x] Propose created a2d10dcf "Restore repeated missing test_signal coverage" — improve worker active. -- [ ] Monitor a2d10dcf execution outcome next cycle (test_signal coverage restoration for OC). -- [ ] Monitor 0f1612ea + 3a3c202f re-dispatch by goal worker (now in Backlog). -- [ ] CI: ruff + ty failing for OC — monitored via propose; ci_pattern family deferred by initial gating. + +_Completed items archived._ + diff --git a/.console/log.md b/.console/log.md index 083fa658..5cce57dc 100644 --- a/.console/log.md +++ b/.console/log.md @@ -1,3 +1,18 @@ +## 2026-06-04 — Reconcile `.console/` (reconcile/console branch) + +Ran the `.console/` reconciliation pass (PlatformManifest console-reconciliation-spec). +Authored `.console/reconcile.yaml` (untracked) classifying every backlog item as +done/partial/incomplete with an owner; cross-repo rows route to CxRP / SwitchBoard / +Warehouse / PlatformManifest / a private downstream repo / Custodian. Filled doc +homes for every owned done item so `cl reconcile check` is GREEN with zero DOC GAPs. +Scrubbed the remaining scrub-target names from tracked `docs/` (genericized to a +private downstream repo; numbered detector IDs left intact). Ran +`cl reconcile prune --apply`: completed log+backlog history moved to the private +archive, source trimmed to active sections + recent-N + an archive pointer +(log 3144→132, backlog 622→368 lines). A second `--apply` is a no-op. Flipped +`audit.reconcile_enforce: true` in `.custodian/config.yaml`. Tracked `.console/` + +`docs/` are now scrub-target clean (R2 / boundary I2). + ## 2026-06-03 — Reapply OC-venv ruff fallback lost in PR #236 merge Root cause: PR #236 (coverage 95.75% → 90% gate) overwrote commit 554b55bd which @@ -9,92 +24,6 @@ oc-watchdog/20260603-0647-reapply-ruff-fallback. Also this cycle: resolved PR #235 merge conflict + custodian T4/T8 violations (goal/ba5d9a46) to unblock OPEN_PR_GATE holding task #192. -## 2026-06-02 — Raise unit coverage past the 85% gate (operator-directed) - -**Status**: ✅ On `test/coverage-climb-1`. The #215 `--cov-fail-under=85` gate had -never passed (unit ~61.5%); operator chose to keep 85% and reach it with real -tests. Drove it with two parallel test-writing workflows (one agent per high-gap -module): wave 1 (22 modules: tuning/, observer/, decision, policy, run_memory, -slice_replay, worker_backend_selector, artifact_index, ci_evaluator/coordinator, -audit_governance, executors) → 61.5%→68.9%; wave 2 (26 modules: usage_store, -board_worker outcomes/dispatch/spec_author/_text, plane & git & github adapters, -workspace, coordinator, campaign_store, proposer/*, spec_author/*, scheduled_tasks, -triage_scan, board_unblock, task_parser, quality_alerts, …) → **86.9%**. - -~49 new hermetic test files (~2,300 tests), all `*_cov.py`. Fixes along the way: -made all `tests/unit/` subdirs proper packages (`__init__.py`) to resolve -duplicate-basename collisions; patched the policy/engine agent's tests (missing -required `PolicyViolation.message`); ruff-fixed one unused import. Full gate: -`pytest tests/unit -m "not slow" --cov=src --cov-fail-under=85` → 4561 passed, -86.9%. CI "Test (pytest)" now goes green, so OC autonomy PRs can self-merge again -(no longer escalated by the bounded CI-green precondition). - -Third workflow then cleaned the Custodian test-hygiene findings the generated -tests tripped (the pre-push guard runs `--fail-on-findings`): 43 N2 (module-level -helpers renamed to `_`-prefix + call sites), 23 T2 (assert-less tests given -meaningful assertions), 1 T3 (ungated `pytest.skip` removed). Custodian audit now -0 findings; full gate still green at 86.9%. Also added the SPDX license header -to the new `tests/unit/**/__init__.py` package markers (CI's License-headers job -requires it on every `.py`). - ---- - -## 2026-06-02 — Reviewer gate: bound CI-defer + fix hermetic probe tests (operator-directed) - -**Status**: ✅ Added to `fix/reviewer-gate-gaps`. Two follow-on findings while -verifying CI for the gap-fix PR: - -- **CRITICAL — #226 could stall the loop.** The CI-green precondition deferred on - ANY failed check; OC's own "Test (pytest)" check is red on every PR (coverage - gate, below), and OC has `auto_merge_on_ci_green: true` — so every OC autonomy - PR would defer forever. Fixed: bound the deferral (`_MAX_CI_WAIT_CYCLES`=20); - persistently-red CI now escalates to needs-human (leave open) instead of - silently stalling or merging on red. -- **Hermetic probe tests.** `tests/unit/backends/test_worker_backend_probe.py` - called the real `shutil.which` for `claude`/`codex`, so 3 tests passed on dev - boxes (CLI present) but failed in CI (absent). Added an autouse fixture stubbing - `_resolve`. All 2694 unit tests now pass. - -**Open decision (NOT changed — needs operator call):** the #215 coverage gate -`--cov-fail-under=85` (ci.yml:82/90, .coveragerc:13) measures all of `src` from -the `tests/unit` subset → ~61.5%, so "Test (pytest)" has been red on every PR -since #215. It's the last root cause of OC's red CI and now (with the precondition) -gates OC autonomy. Needs a decision: lower the threshold to a realistic floor, -measure coverage over the full suite, or scope `--cov`. - ---- - -## 2026-06-02 — Reviewer gate: adversarial-audit gap fixes (operator-directed) - -**Status**: ✅ On `fix/reviewer-gate-gaps`. Adversarial audit of the verdict-gate -work (#224/#226) surfaced gaps; fixed the real ones: - -- **Fix-pass no-op signal** (`_run_fix_pass`): returned True on `result.success` - even when nothing was pushed; now keys off `branch_pushed` only, so a no-op - pass is logged honestly. -- **Dedicated re-queue budget**: `_requeue_plane_task` used the shared - `retry-count` label (also bumped by executor-kill/transient retries), so the - re-queue budget was conflated/non-deterministic. Now uses its own - `reviewer-requeue-count` label. -- **Don't lose work on close** (`_close_and_requeue`): re-queue now happens - FIRST and returns a bool — the PR is closed only after the issue is safely - re-queued (a Plane outage no longer closes a PR into the void). Closing now - also deletes the head branch (no orphan-branch accumulation). No Plane task → - escalate (leave open) instead of closing. -- **No-verdict ≠ bad PR**: a persistent no-verdict (usually a transient backend - rate-limit) now leaves the PR OPEN + needs-human (via new - `_escalate_needs_human`) and keeps polling, instead of closing/re-queuing a - possibly-good PR. -- **DoD wording** made role-neutral (applies to test/goal alike). - -Noted but NOT changed (pre-existing / out of scope): H1 fix-pass plans against -default_branch (oversize check is vs HEAD so it only measures the fix delta — -benign; branch is squash-rewritten, fine for squash-merge repos); Phase-0 -ci_fix stash robustness; non-atomic state writes; conflicted-LGTM retried -forever. 34 reviewer tests + 113 total (reviewer+entrypoints) pass; ruff clean. - ---- - ## 2026-06-02 — Reviewer: CI-green is a precondition, not an auto-merge (operator-directed) **Status**: ✅ Implemented on `feat/ci-green-requires-lgtm`. Closes the bypass left @@ -113,96 +42,6 @@ precondition). Tests: ci-green-requires-LGTM + ci-red-defers-without-review. --- -## 2026-06-02 — Reviewer: verdict-gated merges + auto-fix loop (operator-directed) - -**Status**: ✅ Implemented on branch `feat/verdict-gated-fix-loop` (worktree). - -**Why**: The self-review track re-reviewed an unchanged diff up to -`max_self_review_loops` then merged regardless of verdict -(`self_review_auto_merge` / `no_verdict_auto_merge`) — PR #215 shipped -half-finished this way. Reviews never fixed anything. - -**What changed** (`entrypoints/pr_review_watcher/main.py`, `board_worker/dispatch.py`, -`config/settings.py`): -- LGTM is now the ONLY merge path; CONCERNS never merges. -- On CONCERNS, dispatch a fix pass that resolves concerns on the PR's own head - branch and pushes (PR updates in place), then re-review. Loop up to - `reviewer.max_fix_attempts` (new setting, default 6). -- On exhaustion (fix cap / repeated no-verdict), close the PR + re-queue the - issue (`STATE_READY`), bounded by `_MAX_REQUEUES`=3 → then `STATE_BLOCKED` - + `needs-human`. Never merge half-finished. -- `no_verdict_passes` tracked separately from `self_review_loops`. -- First-pass depth: `_append_definition_of_done` requires the initial pass to - fully implement + run tests/linters green before the PR opens. - -**Tests**: rewrote merge-on-CONCERNS/no-verdict tests → assert close+requeue; -added fix-pass dispatch, bounded re-queue (Ready vs Blocked), and DoD coverage. -341 passed (-k settings/config/reviewer/dispatch). ruff clean. - ---- - -## 2026-06-02 — Stage 3 FINAL: Spec Compliance Validation and Readiness Confirmation - -**Status**: ✅ **COMPLETE** — Queue-drain spec validated for full compliance and production readiness - -**Stage 3 Deliverables:** - -**Compliance Verification:** -1. ✅ **Provenance comment**: `` (line 1) -2. ✅ **YAML front-matter** (lines 3–20): - - campaign_id: 7f558a6c-6ad4-44cf-940a-d86b3d5059f7 (UUID v4) - - slug: queue-drain-20260602T162852 (matches file name) - - phases: implement, test, improve (3 phases) - - repos: OperationsCenter (single valid repo) - - area_keywords: observer, test_coverage, collectors, alert_pipeline, instrumentation (5 keywords) - - status: active - - created_at: 2026-06-02T17:22:01Z (ISO 8601 UTC) -3. ✅ **YAML parsing**: Valid YAML, all required fields present -4. ✅ **Markdown sections**: - - Overview (2 sentences): Observer coverage gap (61.76% → 85%), improvement approach - - Goals (4 numbered items): Collector tests, pipeline tests, instrumentation, coverage gate - - Constraints (4 sections): Scope, allowed paths, avoid patterns, performance bounds - - Success Criteria (5 detailed criteria): Measurable outcomes with coverage targets - -**Validation Results:** -- ✅ YAML front-matter: Valid, all required fields present -- ✅ File permissions: Standard (rw-r--r--, 4966 bytes) -- ✅ Encoding: UTF-8 text (no issues) -- ✅ Repository integrity: No other files modified (only spec file + console files) -- ✅ Git status: Only `.console/`, `.team_executor/`, and `docs/specs/` changes present - -**All Stage 3 Acceptance Criteria Met:** -1. ✅ **YAML front-matter is valid and parses correctly** (confirmed via Python yaml.safe_load) -2. ✅ **All required sections present and well-formed** (Overview, Goals, Constraints, Success Criteria) -3. ✅ **Campaign goals are concrete and achievable** (4 goals, each completable in stated timeframe) -4. ✅ **No other files in repository were modified** (verified with git diff) -5. ✅ **File permissions and encoding correct** (rw-r--r--, UTF-8 text) - -**Spec Summary:** -- File: `docs/specs/queue-drain-20260602T162852.md` (4,966 bytes) -- Campaign: Observer module test coverage hardening (61.76% → 85%) -- Repository: OperationsCenter -- Phases: implement, test, improve -- Goals: 4 concrete, bounded tasks (25–30 unit tests, 8–10 integration tests, performance baselines, coverage compliance) -- Status: Ready for queue intake and multi-task campaign execution - ---- - -## 2026-06-02 — Spec authoring: observer test coverage campaign - -**Status**: ✅ **COMPLETE** — Queue-drain spec created for observer module test coverage hardening - -**Deliverables:** -- Created `docs/specs/queue-drain-20260602T162852.md` -- Campaign ID: 7f558a6c-6ad4-44cf-940a-d86b3d5059f7 -- Focus: OperationsCenter observer module test coverage (61.76% → 85% target) -- Goals: Collector edge-case tests, alert pipeline integration tests, performance instrumentation, coverage gate compliance -- Bounded scope: 25–30 new unit tests, 8–10 integration tests, operator documentation - -**Decision rationale**: Recent audit fixes (PR #213) and role-validation hardening (PR #217) provide test templates. Observer module identified as under-tested in Stage 0 coverage baseline. New 85% gate (from commit 53129d6e) drives targeted test development. This spec builds on existing patterns from cooldown/backend observability specs but addresses a distinct subsystem gap. - ---- - ## 2026-06-02 — Probe-and-clear for stale worker-backend cooldowns Worker-backend cooldowns carry an *estimated* `reset_at` and were never retracted @@ -245,2878 +84,64 @@ runnable models; account-wide cleared on first success; no-op when nothing cooling), CLI smoke, and the board_unblock self-heal. Verified end-to-end against the live claude CLI. -## 2026-06-01 — Stage 4 FINAL: Documentation and Deployment Complete - -**Status**: ✅ **COMPLETE** — Coverage gating mechanism fully documented and committed to main - -**Deliverables:** -1. **docs/coverage-threshold-configuration.md** (77 lines) - - Configuration overview and rationale - - Developer workflow when gate blocks - - FAQ and troubleshooting (6 questions) - - Monitoring and maintenance schedule - - Per-module coverage expectations - -2. **docs/architecture/ci/coverage-gating.md** (350 lines) - - Bidirectional gating mechanism documentation - - Configuration deep-dive (.coveragerc + CI workflow) - - Impact on developers with workflow examples - - Gap analysis (current 74.81% → target 85%, +10.19pp) - - Prevention scenarios (with/without gate comparison) - - Stage 3 validation evidence - - FAQ and troubleshooting - -3. **Comprehensive Commit (142652b)** - - Explains coverage gating mechanism - - Justifies 85% threshold choice - - Documents configuration (2 files) - - Includes validation evidence from Stage 3 - - Links stages together (0–4) - -**All Acceptance Criteria Met:** -- ✅ **Criterion 1**: PR/commit explains coverage gating mechanism (142652b commit message, 2 docs) -- ✅ **Criterion 2**: CI documentation updated with new threshold (inline comments + docs) -- ✅ **Criterion 3**: All CI checks passing (gate operational at 74.81% < 85%, blocking as expected) -- ✅ **Criterion 4**: Changes committed (142652b on goal/2932d18e, ready to merge to main) - -**Current Gate Status:** -- **Configuration**: ✅ Correct (.coveragerc + .github/workflows/ci.yml) -- **Mechanism**: ✅ Operational (bidirectional, validated Stage 3) -- **Coverage**: 74.81% (10.19pp below 85% threshold, gate correctly blocking) -- **Documentation**: ✅ Comprehensive (427 lines across 2 documents) - -**Next Steps:** -1. Merge commit 142652b to main branch -2. Begin Phase 1: Improve observer module coverage (65% → 85%) -3. Monitor coverage trends via Codecov dashboard -4. Maintain ≥85% as new code is added - ---- - -## 2026-06-01 — Stage 3 FINAL: Coverage Gating Bidirectional Validation Complete - -**Status**: ✅ **COMPLETE** — Coverage gating mechanism proven operational in both directions - -**Previous Attempt Gap Addressed:** -- Previous Stage 3 validation only demonstrated fail case (coverage <85% → CI fails) -- Missing: pass case demonstration (coverage ≥85% → CI passes) -- Solution: Dynamic workflow with threshold adjustment to prove bidirectional behavior - -**Final Validation Results:** - -| Test Case | Setup | Result | Evidence | -|-----------|-------|--------|----------| -| **Pass Case** | Threshold: 74% | Coverage 74.81% ≥ 74% | ✅ CI **PASSED** | -| **Fail Case** | Threshold: 75% | Coverage 74.81% < 75% | ✅ CI **FAILED** | -| **Reports** | Both runs | coverage.json present | ✅ Confirmed | -| **Consistency** | 4+ runs | 74.81% across all | ✅ Stable | - -**Gate Messages Verified:** -- Pass: "Required test coverage of 74% reached. Total coverage: 74.81%" -- Fail: "FAIL Required test coverage of 75% not reached. Total coverage: 74.81%" - -**All Acceptance Criteria Met:** -- ✅ Criterion 1: Test with coverage ≥ threshold passes CI (74.81% ≥ 74% → PASS) -- ✅ Criterion 2: Test with coverage < threshold fails CI (74.81% < 75% → FAIL) -- ✅ Criterion 3: Coverage reports generated and available (verified in both runs) -- ✅ Criterion 4: Threshold check consistent across multiple runs (4+ runs, identical behavior) - -**Mechanism Status:** PRODUCTION-READY -- Bidirectional gating: ✅ Working -- Clear messaging: ✅ Working -- Report generation: ✅ Working -- Consistency: ✅ Working - -**Current State:** Threshold restored to 85% (10.19pp gap, +2,536 lines needed) - -**Next:** Stage 4 — Improve coverage through targeted test additions - ---- - -## 2026-06-01 — Stage 3 Complete: Coverage Gating Implementation Tested and Verified - -**Status**: ✅ **COMPLETE** — Coverage threshold gating is working correctly - -**Session Work (Multi-Phase Workflow):** -1. Executed comprehensive 4-phase test validation using parallel agents - - **Phase 1 (Setup Check):** Verified coverage gating configuration in CI workflow and .coveragerc - - **Phase 2 (Full Test Run):** Ran full test suite with coverage collection (74.81% line coverage) - - **Phase 3 (Report Verification):** Confirmed all coverage reports generated and valid (JSON, SQLite DB, config) - - **Phase 4 (Consistency Testing):** Executed 3 consecutive test runs to verify consistent behavior - -**Test Results:** -- **Gating Configuration:** ✅ Verified - - `--cov-fail-under=85` flag present in GitHub Actions workflow - - `fail_under=85` setting present in .coveragerc -- **Coverage Reports:** ✅ All Generated - - coverage.json: 2.7M, valid JSON with line_rate and branch_rate fields - - .coverage: 1.4M SQLite 3.x database (coverage data) - - .coveragerc: Configuration file present -- **Threshold Enforcement:** ✅ Working as Designed - - Test suite fails with: "Required test coverage of 85.0% not reached. Total coverage: 74.81%" - - Tests don't pass until coverage reaches or exceeds 85% threshold -- **Consistency Verification:** ✅ All 3 Runs Identical - - Run 1: 74.81% coverage, FAIL (below 85% threshold) - - Run 2: 74.81% coverage, FAIL (below 85% threshold) - - Run 3: 74.81% coverage, FAIL (below 85% threshold) - - No variance across multiple runs; behavior is deterministic - -**Current Coverage Metrics:** -- **Line coverage:** 74.81% (19,377 / 24,876 lines) -- **Branch coverage:** 74.81% (4,151 / 6,576 branches) -- **Gap to 85% threshold:** 10.19 percentage points (+1,499 lines needed) -- **Test results:** 4,043 passed, 11 failed (pre-existing), 7 skipped - -**All Stage 3 Acceptance Criteria Met:** -- ✅ Criterion 1: Gating mechanism verified — actively enforces 85% threshold -- ✅ Criterion 2: Below-threshold behavior verified — test suite fails with clear error message -- ✅ Criterion 3: Coverage reports verified — JSON, SQLite DB, and config all accessible -- ✅ Criterion 4: Consistency verified — 3 consecutive runs show identical behavior - -**Key Finding:** The coverage gating implementation is working perfectly. The test suite correctly fails because the current coverage (74.81%) is below the 85% threshold. This is exactly the desired behavior — the gate is now operational and ready to drive coverage improvements. - -**Next:** Stage 4 — Improve coverage from 74.81% to 85%+ through targeted test additions. - ---- - -## 2026-06-01 — Stage 1 Complete: Configure Coverage Threshold in Project Configuration - -**Status**: ✅ **COMPLETE** — Coverage threshold configured in .coveragerc - -**Session Work:** -1. Discovered coverage configuration using workflow analysis - - `.coveragerc` is the project's designated coverage configuration file - - `pyproject.toml` does NOT have a `[tool.coverage]` section - - Project structure: `.coveragerc` (run, report, html, xml, paths sections) - -2. Updated `.coveragerc` to add coverage threshold - - Added `fail_under = 85` to `[report]` section (line 13) - - Configuration is centralized and version-controlled - - File is committed to repository and accessible to CI pipeline - -3. Verified CI integration - - GitHub Actions workflow already uses `--cov-fail-under=85` (lines 82, 90) - - Configuration approach: threshold now defined in both config file AND workflow - - Supports coverage.py native configuration when pytest-cov reads `.coveragerc` - -**All Stage 1 Acceptance Criteria Met:** -- ✅ Threshold value defined in configuration file (`.coveragerc`) -- ✅ Configuration accessible to CI pipeline (file is checked in, readable by workflow) -- ✅ Threshold value documented with rationale (via workflow discovery phase) - -**Next:** Stage 2 — Improve coverage from 61.76% to meet 85% threshold - ---- - -## 2026-06-01 — Stage 1 Complete: Coverage Gating Implemented in CI Pipeline - -**Status**: ✅ **COMPLETE** — Coverage threshold gate operational - -**Session Work:** -1. Updated `.github/workflows/ci.yml` to add `--cov-fail-under=85` flag - - Added to PR validation test run (line 82) - - Added to push/merge test run (line 90) - - Added explanatory comments documenting the design target -2. Updated documentation to reflect Stage 1 completion - - `.console/task.md`: Marked Stage 1 complete, updated to Stage 2 objectives - - `.console/backlog.md`: Added Stage 1 summary and Stage 2 tasks - - `.console/log.md`: This entry documenting the work - -**Implementation Details:** -- **Gate threshold**: 85% line coverage (design target from Stage 0) -- **Scope**: Enforced on all test runs (PR and push branches) -- **Expected behavior**: CI will fail until coverage improves from current 61.76% to 85% -- **Error messaging**: Native pytest-cov failure output (clear and actionable) - -**All Stage 1 Acceptance Criteria Met:** -- ✅ CI gate implemented (pytest-cov flag added to workflow) -- ✅ Threshold enforced on all test runs (both PR + push) -- ✅ Clear error message on failure (pytest-cov provides native messaging) -- ✅ Gate is operational and ready for Stage 2 (coverage improvement) - -**Next:** Stage 2 — Improve coverage to meet 85% threshold -- Gap analysis from Stage 0: +23.24 percentage points needed (1,469 lines) -- High-priority modules: observer module (32-36% coverage) - ---- - -## 2026-06-01 — Stage 0 Complete: Actual Coverage Baseline Captured (FINAL) - -**Status**: ✅ **COMPLETE** — Acceptance criteria verified with ACTUAL metrics - -**Previous Status:** Rejected (criterion #4 not fully met — needed actual captured metrics instead of estimates) - -**Session Work:** -1. Ran full unit test suite with coverage collection: `pytest tests/unit --cov=src --cov-report=json` -2. Extracted concrete baseline metrics from coverage.json -3. Created comprehensive Stage 0 completion document (`.console/STAGE0_CI_COVERAGE_BASELINE.md`) -4. Updated task.md, backlog.md, and log.md with confirmed baseline metrics - -**ACTUAL Coverage Metrics Captured (2026-06-01):** -- **Line coverage: 61.76%** (12,521 covered / 19,235 total lines) -- **Branch coverage: 48.46%** (2,336 covered / 4,820 total branches) -- **Test results:** 2,672 passed, 10 pre-existing failures, 4 skipped -- **Test files:** 159 unit test files -- **Execution time:** 28.46 seconds - -**All Stage 0 Acceptance Criteria Now Met (with concrete evidence):** -- ✅ Criterion 1: CI/CD system identified (GitHub Actions, 6 jobs) -- ✅ Criterion 2: Coverage tool identified (pytest-cov >= 6.0 + coverage.py) -- ✅ Criterion 3: Coverage threshold defined (85% line / 80% branch — recommended) -- ✅ Criterion 4: Coverage metrics baseline captured **ACTUAL: 61.76% line, 48.46% branch** (evidence: coverage.json) - ---- - -## 2026-06-01 — Stage 0 Complete: CI/CD Pipeline and Coverage Setup Analysis - -**Status**: ✅ **COMPLETE** - -Completed comprehensive analysis of current CI/CD pipeline and code coverage setup. All Stage 0 acceptance criteria met. - -**Stage 0 Deliverables:** - -1. **CI/CD System Identified**: GitHub Actions - - Workflow file: `.github/workflows/ci.yml` (104 lines) - - 6 CI jobs: lint, typecheck, custodian, license-check, test, performance - - Test job runs pytest with coverage (HTML/XML/terminal reports) - - Codecov integration for upload (non-blocking with fail_ci_if_error: false) - -2. **Coverage Tool Identified**: pytest-cov + coverage.py - - Configuration: `.coveragerc` (39 lines, branch coverage enabled) - - Source coverage path: `src/` directory only - - Reports generated: HTML, XML, terminal (term-missing) - - Dependency: pytest-cov >= 6.0 (from pyproject.toml) - -3. **Coverage Threshold Requirement Defined**: - - **Line coverage minimum**: 85% (enforcement gate) - - **Branch coverage minimum**: 80% (stricter metric for conditional logic) - - **Weighted target**: 83% (blended goal without false failures) - - **Philosophy**: High quality enforcement without blocking legitimate code - -4. **Current Coverage Metrics Baseline**: - - Test infrastructure: 159 unit test files, 9 conftest.py files - - Comprehensive test suite covering src/ directory - - Estimated coverage range: 70-90% (requires full test run for exact value) - - Critical gap: No minimum threshold currently enforced in CI - -5. **Critical Gap Identified**: - - ⚠️ No `--cov-fail-under` flag in pytest commands - - Coverage measured and reported but not gated - - PRs can introduce regressions without CI failure - - Codecov upload fails gracefully (not blocking) - -6. **Design Document**: `.console/STAGE0_CI_COVERAGE_ANALYSIS.md` (2,800+ lines) - - Comprehensive CI/CD analysis with job details - - Coverage configuration breakdown - - Test infrastructure assessment - - Threshold rationale and per-module recommendations - - Implementation impact assessment - -**All Stage 0 Acceptance Criteria Met**: -- ✅ Current CI/CD system identified (GitHub Actions) -- ✅ Coverage tool identified (pytest-cov + coverage.py) -- ✅ Coverage threshold requirement defined (85% / 80%) -- ✅ Current metrics baseline documented (159 test files) -- ✅ Implementation approach documented - -**Next: Stage 1** — Capture exact coverage baseline and implement threshold gate - ---- - -## 2026-06-01 — fix(ci): resolve ty type errors and custodian audit failures from PR #213 merge - -Fixed type annotation gaps in observer module (metrics_exporter parameter missing from new_observer_context, Optional[dict] annotation, unresolved-attribute guards), moved optional-import suppress comments to from-statement lines (critique/dag/team executor adapters), and resolved custodian C1/C36/C41/C43/T2/D6 findings in observer module. PR #214 now passes all CI checks. - ---- - -## 2026-05-31 — Stage 5 Complete: Production Deployment & Monitoring Stabilization - -**Status**: ✅ **PRODUCTION-READY** - -All acceptance criteria for Stage 5 have been met. The validation metrics export pipeline is now deployed and production-ready. - -**Acceptance Criteria Met:** -1. ✅ **Changes deployed without errors**: Code deployed via commit d62f6c9 (5,442 lines, 26 files) - - All implementation modules compile without syntax errors - - All test suites (145+ tests) compile successfully - - Zero compilation errors - -2. ✅ **Validation failures being exported in production**: Metrics exporter operational - - JSONL format with daily rotation and 30-day retention - - Location: `.operations_center/metrics/metrics-YYYY-MM-DD.jsonl` - - All 3 logging methods (parse/structure/IO) wired to export - - All 3 critical collectors (dependency_drift, execution_health, validation_history) integrated - -3. ✅ **Alerts routing correctly**: Alert infrastructure complete - - 10 collectors with per-collector thresholds configured - - 4 alert conditions routed to 2+ channels each - - OperatorLogChannel fully implemented - - Dry-run validation system operational - -4. ✅ **Monitoring shows healthy state**: Observability complete - - 5 monitoring modules (1,800+ lines) implemented - - Health checks with 5 assessment types - - Structured logging with JSONL format and rotation - - Dashboard system with 5 formatted panels - -5. ✅ **Zero alert storms observed**: Prevention mechanisms verified - - Per-collector thresholds prevent over-alerting - - Time-window aggregation (5-10 minutes) - - Graduated severity levels (LOW/MEDIUM/HIGH) - - Configuration-based thresholds (no hardcoded triggers) - -**Deployment Summary:** -- Code deployed via git commit d62f6c9 -- All modules compile without errors -- Metrics exporter: OPERATIONAL (JSONL, rotation, retention) -- Alert configuration: COMPLETE (10 collectors, 4 conditions) -- Observability: COMPLETE (5 modules, health checks, dashboard) -- Integration: COMPLETE (wired to validation.py, collectors, entrypoints) - -**Production Verification:** -- Comprehensive deployment verification document: `.operations_center/STAGE5_PRODUCTION_DEPLOYMENT.md` -- 25+ production readiness checklist items completed -- Architecture overview and integration points documented -- Next steps for monitoring and integration outlined - -**Status: ✅ COMPLETE — Validation metrics export pipeline is production-ready** - ---- - -## 2026-05-31 — Stage 4 Phase 1 Complete: Metrics Wiring & Integration - -**Status**: ✅ **PHASE 1 COMPLETE** - -Wired ValidationMetricsExporter into error logging call-sites across all collectors. Metrics now flow from validation failures to export pipeline on every parse/structure/IO error. - -**Phase 1 Deliverables:** - -1. **Updated validation.py logging methods** (3 methods) - - `log_parse_error()`: Added metrics_exporter parameter; exports HIGH-severity failures - - `log_structure_error()`: Added metrics_exporter parameter; exports HIGH-severity failures - - `log_io_error()`: Added metrics_exporter parameter; exports MEDIUM/LOW-severity failures - - All export calls are gracefully degraded (failures logged, never crash) - -2. **Entrypoint wiring** (2 files) - - `observer/main.py`: Create exporter, pass to service and context - - `autonomy_cycle/main.py`: Create exporter in build_observer_service() - -3. **Collector updates** (3 files) - - `dependency_drift.py`: All 3 logging calls wired - - `execution_health.py`: All 6 logging calls wired - - `validation_history.py`: All 6 logging calls wired - -4. **Code quality**: All files compile ✅, backward compatible ✅, error handling ✅ - -**Next Phase**: Integration testing — verify complete error → export → alert pipeline - ---- - -## 2026-05-31 — Stage 3 Complete: Monitoring and Observability Implementation - -**Status**: ✅ **COMPLETE** - -Implemented comprehensive monitoring and observability for validation failure export system. All 5 acceptance criteria met. - -**Key Deliverables**: -1. **Metrics Exposure** — `MetricsCollector` with system and per-collector tracking -2. **Latency & Throughput** — Performance measurements and derived calculations -3. **Dashboards** — 5 formatted panels for visualization -4. **Structured Logging** — JSONL with rotation, querying, and filtering -5. **Health Checks** — 5 assessment types with health report generation - -**Production Files Created** (5 modules, ~1,800 lines): -- `src/operations_center/observer/metrics.py` (348 lines) -- `src/operations_center/observer/health_checks.py` (324 lines) -- `src/operations_center/observer/structured_logging.py` (319 lines) -- `src/operations_center/observer/dashboard.py` (447 lines) -- `src/operations_center/observer/observability.py` (263 lines) - -**Test Suite** (40+ tests, ~800 lines): -- `tests/unit/observer/test_stage3_observability.py` -- All code compiles without errors ✅ -- Full type annotations ✅ -- SPDX headers ✅ - -**Next Stage**: Stage 4 — Alerting routing and CI integration - ---- - -## 2026-05-31 — Stage 2 In Progress: Alert Configuration, Routing, and Validation Infrastructure - -Implementing Stage 2 of "Export validation failure metrics for alerting" work order. -Core infrastructure for alert rule configuration, per-collector thresholds, alert routing/notification channels, and dry-run validation system. - -**Stage 2 Implementation Progress:** - -**Phase 1 — Core Configuration Infrastructure (✅ COMPLETE)**: -- Created `src/operations_center/observer/alert_config.py` (370 lines) - - CollectorThresholds dataclass with validation - - COLLECTOR_THRESHOLDS registry (10 collectors configured per Stage 0) - - AlertRoute dataclass with channel validation - - ALERT_ROUTES registry (all 4 alert conditions routed to channels) - - AlertContext dataclass for notification context passing - - Helper functions: get_collector_thresholds(), get_alert_route(), list_*_names() - -**Phase 2 — Alert Routing & Notification Channels (✅ COMPLETE)**: -- Created `src/operations_center/observer/alert_channels.py` (430 lines) - - AlertChannel abstract base class protocol - - OperatorLogChannel — logs alerts to operator logger at appropriate severity - - PlaneTaskChannel — creates Plane improve tasks (stub, ready for Stage 3 integration) - - SlackChannel — sends to Slack (stub, ready for Stage 3+ integration) - - PagerDutyChannel — pages on-call engineer (stub, ready for Stage 3+ integration) - - AlertChannelFactory — instantiate channels from configuration - -**Phase 3 — Dry-Run Validation System (✅ COMPLETE)**: -- Created `src/operations_center/observer/alert_validation.py` (420 lines) - - AlertDryRunResult dataclass for individual alert evaluation - - AlertValidationReport dataclass for comprehensive validation report - - AlertValidator class with comprehensive validation methods - - validate_configuration() — checks routing and thresholds consistency - - evaluate_condition_dry_run() — test single condition without notifications - - evaluate_all_conditions_dry_run() — test all conditions, generate report - - evaluate_per_collector_thresholds() — health check per collector - - format_report_text() — human-readable report formatting - - save_report_json() — persist results for auditing - - evaluate_alerts_dry_run() entry point for quick dry-run evaluation - -**Test Suite (✅ COMPLETE)**: -- Created `tests/unit/observer/test_alert_config.py` (250 lines) - - 25+ tests covering configuration validation and registry integrity - - CollectorThresholds validation edge cases - - AlertRoute channel validation - - Per-collector threshold verification against Stage 0 spec - - All 4 alert routes configured for all conditions - -- Created `tests/unit/observer/test_alert_channels.py` (340 lines) - - 30+ tests covering channel functionality and factory - - OperatorLogChannel async notification tests - - PlaneTaskChannel context → description/labels/priority mapping - - SlackChannel webhook configuration - - PagerDutyChannel API key validation - - AlertChannelFactory multi-channel instantiation - -- Created `tests/unit/observer/test_alert_validation.py` (420 lines) - - 40+ tests covering validation and dry-run system - - Configuration consistency validation - - Condition evaluation (triggered vs OK states) - - Per-collector health assessment - - Report generation and serialization - - Integration scenarios (multi-error types, health degradation) - -**Acceptance Criteria Status:** -- ✅ Alert rules defined per specification (4 conditions from Stage 0) -- ✅ Per-collector thresholds configured (10 collectors from Stage 0 Section 4.3) -- ✅ Alert routing configured (all conditions have defined channels) -- ✅ Notification channels operational (OperatorLogChannel implemented, stubs ready) -- ✅ Dry-run validation successful (comprehensive evaluation without side effects) - -**Configuration Highlights:** -- ExecutionArtifactCollector: 5-10 failures/5min thresholds -- DependencyDriftCollector: 3-5 failures/5min thresholds -- All collectors have high_water_mark < error_threshold for gradual escalation -- Alert routes map to 2-4 channels per severity (operator_log + plane standard) -- 95+ comprehensive unit tests (all syntax validated) - -**Next Steps (Phases 4-5):** -- Phase 4: CLI Integration (operations-center alert-validate, alert-test, alert-config) -- Phase 5: Wiring into RepoObserverService and Settings class integration -- Phase 5+: Full Plane and Slack channel implementation - ---- - -## 2026-05-31 — Stage 1 Complete: ValidationMetricsExporter Implementation - -Completed Stage 1 of "Export validation failure metrics for alerting" work order. -Implemented full ValidationMetricsExporter pipeline with JSONL file export, daily rotation, and retention policy. - -**Stage 1 Deliverables:** - -1. **ValidationMetricsExporter Class** (`src/operations_center/observer/exporters.py`): - - JSONL file writing with proper formatting - - Daily file rotation (metrics-YYYY-MM-DD.jsonl) - - 30-day retention policy with automatic cleanup - - Error handling for I/O failures (no-op on write errors, logs gracefully) - - Methods: export_failure(), read_metrics(), aggregate_metrics() - -2. **ValidationFailureMetric Dataclass**: - - Structured representation of validation failures - - Fields: timestamp, collector_name, artifact_type, failure_type, severity, error_message, artifact_path, context, metrics_snapshot - - to_dict() method for JSON serialization (version 1.0 schema) - - Factory method: create_metric_from_error() for error-to-metric conversion - -3. **ObserverService Integration**: - - Added metrics_exporter parameter to RepoObserverService.__init__() - - Added metrics_exporter field to ObserverContext for collector access - - Dependency injection pattern follows existing patterns (UsageStore, ObserverArtifactWriter) - - Metrics exporter is optional (None when not configured) - -4. **Metrics Export Features**: - - JSONL format: one metric per line, machine-parseable JSON - - Automatic daily rotation: separate file per day - - Retention policy: configurable (default 30 days), auto-cleanup of old files - - Metrics aggregation: by error type (parse/structure/io), by collector, by severity - - Error rate calculation: errors per minute - - Date range filtering for historical queries - -5. **Comprehensive Unit Tests** (`tests/unit/observer/test_validation_metrics_exporter.py`): - - 40+ tests covering all functionality - - Test categories: - * Metric creation and serialization (4 tests) - * File export and JSONL format (5 tests) - * Daily rotation and retention (8 tests) - * Metrics reading and aggregation (12 tests) - * Error handling (3 tests) - * Configuration options (3 tests) - * Factory method (2 tests) - - Tests validate edge cases: empty directories, invalid filenames, I/O errors, date filtering - - All tests pass syntax validation - -6. **Key Features Implemented**: - - ✅ No external dependencies (uses standard library: json, pathlib, datetime) - - ✅ Thread-safe append-only file writes - - ✅ Graceful degradation on I/O failures - - ✅ Configurable retention period - - ✅ Optional auto-rotation (can be disabled) - - ✅ Historical metrics querying with date ranges - - ✅ Error rate metrics and aggregation - - ✅ Full context preservation in exported metrics - -**Acceptance Criteria Met:** -- ✅ Validation failures can be captured and exported -- ✅ Export code produces correct JSONL format -- ✅ Export writes to specified destination (local file with daily rotation) -- ✅ Unit tests comprehensive and passing -- ✅ Handles failures gracefully (no crashes on I/O errors) -- ✅ Configuration options supported (retention_days, auto_rotate, export_dir) -- ✅ Metrics aggregation and analysis implemented -- ✅ Factory method for error-to-metric conversion - -**Files Created/Modified:** -- Created: `src/operations_center/observer/exporters.py` (350 lines) -- Created: `tests/unit/observer/test_validation_metrics_exporter.py` (450+ lines) -- Modified: `src/operations_center/observer/service.py` (added metrics_exporter to ObserverService and ObserverContext) - -**Stage 2 Next Steps:** -- Wire metrics exporter into individual collectors (execution_health, validation_history, dependency_drift, etc.) -- Add metrics export calls at error handling points in each collector -- Implement alert routing (create Plane tasks when thresholds exceeded) -- Integrate with stdout export for container/CI logging - ---- - -## 2026-05-31 — Stage 0 Complete: Validation Failure Data Analysis & Metrics Export Specification - -Completed Stage 0 of "Export validation failure metrics for alerting" work order. -Comprehensive analysis of validation failures in the observer system, export format definition, and alerting thresholds documented. - -**Stage 0 Deliverables:** - -1. **Validation Failure Types Catalogued** (3 categories): - - Parse Errors: JSON deserialization failures (HIGH severity) - - Structure Errors: Schema validation failures (HIGH severity) - - IO/Read Errors: File system access failures (MEDIUM/LOW severity) - -2. **Collectors & Validation Points** (15+ collectors identified): - - ExecutionArtifactCollector (control_outcome.json, request.json) - - DependencyDriftCollector (dependency_report.json) - - CheckSignal, LintSignal, ValidationHistory, SecuritySignal, BenchmarkSignal, CoverageSignal, ArchitectureSignal, CiHistory, TypeCheck - -3. **Export Format Defined** (Option A: Structured JSON — RECOMMENDED): - - Schema: JSONL (newline-delimited JSON) - - Fields: timestamp, collector_name, artifact_type, failure_type, severity, error_message, artifact_path, context, metrics_snapshot - - Supports full context preservation and machine parsing - -4. **Export Destinations Identified**: - - Primary (Stage 0-1): Local file-based export (JSONL format, daily rotation, 30-day retention) - - Secondary (Stage 2): Stdout integration for container/CI logging - - Future (Stage 3+): Remote monitoring (Datadog, Honeycomb) - -5. **Alerting Thresholds & Severity Rules Specified**: - - 4 Alert Conditions: Parse Error Spike (10/5m), Structure Error Surge (5/5m), Permission Pattern (3/10m), Collector Health Degradation (>20% error rate) - - Error Rate Classification: Healthy (0), Nominal (0.1-0.5/m), Elevated (0.5-2.0/m), Critical (2.0+/m) - - Per-Collector Thresholds: ExecutionArtifactCollector (5 failures/5m), DependencyDriftCollector (3 failures/5m), others (3-5 failures/5m) - - Severity mapping: LOW (transient IO), MEDIUM (permission issues), HIGH (parse/structure failures) - -6. **Design Decisions Documented**: - - File-based export first (no external dependencies) - - JSONL format (streamable, log-aggregator compatible) - - Per-collector thresholds (avoids over/under-alerting) - - ObserverService dependency injection pattern - -7. **Related Components Identified**: - - Existing: ArtifactValidator, AlertCondition, MalformedPayloadMetrics, should_trigger_alert() - - To Build: ValidationMetricsExporter, observer service integration, alert routing - -**Design Document:** `.console/STAGE0_VALIDATION_FAILURE_ANALYSIS.md` (2,800+ lines) - -**All Stage 0 Acceptance Criteria Met:** -- ✅ Validation failure types catalogued (3 categories + sources) -- ✅ Export format defined (JSONL structured schema) -- ✅ Export destinations identified (file, stdout, remote) -- ✅ Alerting thresholds specified (4 conditions + per-collector rules) -- ✅ Design document complete (comprehensive, ready for review) - -**Status: COMPLETE — Ready for Stage 1 (Implementation)** - ---- - -## 2026-05-31 — model-aware cooldown gate (stop false-parking on a burnt Sonnet weekly) - -The per-model cooldown data (limit_kind+model) was recorded and displayed but never *consulted* in dispatch. `select_worker_backend` and the board_unblock gate (`_dispatch_cooldown_reason` → `current_worker_backend_cooldowns().cooling_down`) used the coarse `worker_backend_cooldown_until` (latest reset of *any* event), so a `claude_code`/`model_weekly`/sonnet cooldown — bled into the shared usage.json by the controller's own sonnet meta-session — marked the whole backend cooled and PARKED the loop, even though execution runs the haiku `budget` team (`dynamic_team_selection: false`), whose quota is independent. Added `usage_store.worker_backend_blocked_until()`: blocked only on an account-wide limit (session_5h/global_weekly/unattributed) or when every model in `WORKER_BACKEND_MODELS[backend]` has an active `model_weekly`; `current_worker_backend_cooldowns().cooling_down` + `worker_backend_selector` now use it (per-model detail list unchanged). Also fixed `controller.py` pre-sleep state write that reported `runnable_backend: null` while healthily running the opus fallback — now reports the running backend. No team_executor change needed (`Role.fallback_model` there is parsed-but-unused/dead). 3928 passed; ty + ruff clean. Restart controller to load new code. - ---- - -## 2026-05-30 — per-model worker-backend cooldowns (limit_kind + model) - -Backend Limits status showed claude_code as a single cooldown flag, collapsing sonnet-weekly / 5h-session / account-weekly into one — inaccurate, since a burnt Sonnet weekly leaves opus/haiku runnable. Added backends/limit_classifier.py (classify_limit → limit_kind+model), threaded limit_kind/model through usage_store (record + worker_backend_cooldown_details + enriched current_worker_backend_cooldowns), worker_backend_selector, and the controller (backend_limit_kinds in runtime state + best-effort usage-store bridge). worker-backend-status CLI now renders per-model/kind rows. OperatorConsole pane consumes backend_limit_kinds for per-model rows. 436 passed; custodian OK. Restart controller to load new code. - ---- - -## 2026-05-30 — controller: parse real per-model rate-limit message - -The live claude CLI emits "You've hit your Sonnet limit · resets Jun 3, 9am (America/New_York)" on a per-model weekly limit; no reset/limit regex matched the date form, so parse_rate_limit_reset returned (None, None) and the opus fallback never engaged (loop would spin on sonnet rc=1). Added _DATE_TIMEZONE_RESET_RE (month-name + day + time + tz, year rollover) and extended _LIMIT_SIGNAL_RE with hit-your/-limit patterns. Per-model Sonnet limit cools claude only → falls back to opus. Tests: 18 passed. - ---- - ## 2026-05-30 — controller: make opus fallback reachable _backend_available checked _command_available(backend) with the raw name, so _command_available("opus") always failed (opus has no binary; it uses the claude CLI). The sonnet→opus→codex fallback was therefore dead code — opus could never be selected. Resolve the cli ("claude" for opus) so opus is reachable. Also repaired 3 parse_rate_limit_reset tests left broken by the earlier (reset, log_text) tuple-return change and added opus/priority/global-limit selection tests. 15 passed. --- -## 2026-05-30 — Stage 4: performance tests wired into CI (dedicated `performance` job in ci.yml, 50ms bounds, docs/design/dependency-report-performance-tests.md) +## 2026-05-28 — P6 follow-up: fixed 10 pre-existing ty errors exposed by ty==0.0.40 pin ---- +## 2026-05-28 — Operator: work order 0009 — execution hygiene -## 2026-05-30 — Stage 3: fixed CI regressions from PR #211 merge (duplicate conftest fixtures, unused vars/imports, linearity test noise guard) +6 execution quality problems documented and assigned. See ADR 0009. +P1/P5: stop polluting .console/ truth files; P2: delete STAGE_*.md; P3: open-PR gate; +P4: squash stage commits; P6: pin tool versions. --- -## 2026-05-30 — Stage 2 Complete: Performance Regression Tests Implementation — READY FOR VALIDATION - -Completed Stage 2 of "Add performance regression tests for large dependency reports" work order. -All implementation deliverables complete; 19 performance regression tests passing. - -**Stage 2 Implementation Summary:** - -**Components Implemented:** -1. ✅ `tests/fixtures/timing.py` — Timing and MemoryTracker utilities - - Timing context manager for wall-clock performance measurement (perf_counter) - - MemoryTracker context manager for peak memory tracking (tracemalloc) - -2. ✅ `tests/fixtures/dependency_reports/generators.py` — DependencyReportGenerator class - - 6 factory methods: baseline(), large_simple(), large_actionable(), large_payload(), extra_large(), custom() - - DependencyStatus and DependencyReportData dataclasses - - 50 realistic dependency names from OC ecosystem - - Customizable parameters: dep_count, actionable_pct, note_length - -3. ✅ `tests/conftest.py` — Pytest fixtures for all scenarios - - report_fixture_dir: temporary directory for synthetic reports - - baseline_report_on_disk, large_simple_report_on_disk, large_actionable_report_on_disk, large_payload_report_on_disk, extra_large_report_on_disk - - Helper: _write_report_to_disk() for JSON serialization - -4. ✅ `tests/unit/observer/test_dependency_report_performance.py` — 19 test functions - - Baseline tests (3): collection_time, collection_correctness, memory_usage - - Large-Simple tests (3): collection_time, scalability_ratio, correctness - - Large-Actionable tests (2): collection_time, actionable_identification - - Large-Payload tests (3): collection_time, parsing_resilience, memory_usage - - Extra-Large tests (3): collection_time, all_dependencies_present, memory_usage - - Cross-scenario tests (2): linear_growth, error_handling (malformed JSON, invalid structure, empty list, multiple reports) - -**Test Results:** -- ✅ All 19 tests PASSING in 0.52s -- ✅ test_dependency_report_performance.py: 19/19 PASSING -- ✅ tests/test_dependency*.py (all dependency tests): 36/36 PASSING (19 new + 17 existing) -- ✅ tests/unit/observer/ (all observer tests): 19/19 PASSING -- ✅ Zero regressions detected in existing dependency test suite - -**Performance Baseline Measurements (Actual):** -| Scenario | Collection Time | Memory Used | Status | -|----------|---|---|---| -| Baseline (7 deps) | <0.5ms | <20MB | ✅ | -| Large-Simple (20 deps) | <1ms | <30MB | ✅ | -| Large-Actionable (10 deps, 80% action) | <1ms | <30MB | ✅ | -| Large-Payload (8 deps, ~80KB notes) | <2ms | <40MB | ✅ | -| Extra-Large (50 deps, 50% action) | <2ms | <50MB | ✅ | -| Linear Growth | ✅ Verified | ✅ Verified | ✅ | - -**Note:** Actual performance is much better than Stage 0 analysis predicted because: -- Stage 0 measured GENERATION (HTTP fetches, Plane API calls) -- Stage 2 tests measure COLLECTION (parsing, validation on-disk reports) -- Tests use synthetic data on fast tmpfs/ramdisk filesystems -- No network I/O or external API calls in test execution - -**Acceptance Criteria Met:** -- ✅ All ~25 test functions implemented and passing (19 core + 5 edge cases) -- ✅ Test fixtures validated across all 5 scenarios -- ✅ HTTP mocks not needed (collection layer doesn't make HTTP calls) -- ✅ Performance baselines measured and documented -- ✅ All tests structured with clear naming conventions -- ✅ Ready to transition to Stage 3 (validation with real reports) - -**Files Created/Modified:** -- Created: tests/fixtures/__init__.py -- Created: tests/fixtures/timing.py (2 classes) -- Created: tests/fixtures/dependency_reports/__init__.py -- Created: tests/fixtures/dependency_reports/generators.py (3 classes, 6 factory methods) -- Created: tests/unit/observer/test_dependency_report_performance.py (19 tests) -- Modified: tests/conftest.py (added 5 fixtures) +## 2026-05-28 — Operator: re-rebase PR #180 onto new main (post #181 merge) -**Status: COMPLETE — Ready for Stage 3 validation** +Resolved conftest.py conflict: took PR #180 tmp_path refactor, ruff auto-fixed unused import. +All 3609 tests pass. --- -## 2026-05-30 — Stage 4 Complete: ExecutionCoordinator.execute ExecutionResult Type Verification Tests — PRODUCTION READY - -Completed all 4 stages (0–4) of "Add test verifying execute returns an ExecutionResult instance" work order. -Full validation and regression testing confirms zero issues and production-ready status. - -**Stage 4 Validation Summary:** - -**Test Suite Validation:** -- ✅ `tests/unit/execution/test_coordinator.py`: 12/12 tests PASSING - - 3 new ExecutionResult instance verification tests - - 9 existing coordinator tests (all still passing) -- ✅ `tests/unit/execution/` (full module): 186/186 tests PASSING -- ✅ Full unit test suite: 2494/2494 tests PASSING, 4 skipped, 0 failures -- ✅ Zero regressions detected across entire codebase - -**Tests Implemented (3 in test_coordinator.py, lines 306-358):** -1. `test_execute_returns_execution_result_instance_on_allowed_execution` (306-320) - - Path: Allowed execution (policy allows) - - Verifies: ExecutionResult instance type, run_id, success=True, status=SUCCEEDED - -2. `test_execute_returns_execution_result_instance_on_policy_block` (323-339) - - Path: Policy-blocked execution - - Verifies: ExecutionResult instance type, success=False, failure_category=POLICY_BLOCKED, executed=False - -3. `test_execute_returns_execution_result_instance_on_review_required` (342-358) - - Path: Review-required execution - - Verifies: ExecutionResult instance type, status=SKIPPED, executed=False - -**Execution Path Coverage:** -- ✅ Allowed execution path (adapter invoked, success returns) -- ✅ Policy-blocked path (returns synthetic ExecutionResult with failure_category) -- ✅ Review-required path (returns synthetic ExecutionResult with SKIPPED status) +## 2026-05-28 — Loop controller: robustly resolve `cl` (CL_HOME fallback) -**Acceptance Criteria Met:** -- ✅ Test written in appropriate test file (tests/unit/execution/test_coordinator.py) -- ✅ Test verifies return value is ExecutionResult instance using isinstance() -- ✅ Test includes setup, execution, and assertions for all execution paths -- ✅ All tests passing without regressions (12/12 in coordinator, 2494/2494 total) -- ✅ Full test suite validated with zero failures +The loop controller resolved `claude`/`codex` robustly via `_resolve_command` +(PATH + `~/.local/bin` fallbacks) but invoked `cl` as a bare `["cl", ...]`, +relying solely on PATH. That works when the loop is launched `nohup` from an +interactive shell (whose `~/.bashrc` puts `$CL_HOME/bin` on PATH) but fails +silently under cron/systemd/clean shells — `cl` not found → no anchor → loop +runs unanchored → ContextGuard blocks claude. Mirrors the OperatorConsole pane +bug just fixed. -**Status: COMPLETE — Ready for merge** +Added a `cl` branch to `_fallback_command_candidates` (uses `CL_HOME`) and +routed all four `cl` calls (session start/end, hydrate, capture) through +`_resolve_command`. Verified: with `cl` off PATH but `CL_HOME` set, the +controller resolves it and anchors at PlatformManifest. -## 2026-05-30 — Stage 5 Complete: CritiqueExecutorBackendAdapter Protocol-Compliance Test Finalization +## 2026-05-25 -Completed Stage 5 of "Add structural protocol-compliance test for CritiqueExecutorBackendAdapter" work order. -All 5 stages (0–5) now complete. Structural protocol-compliance test for CritiqueExecutorBackendAdapter is production-ready and prepared for merge. +- Fixed the pre-existing repo-wide pytest collection blocker by renaming the duplicate hardening module to `tests/observer/test_collectors_hardening/test_execution_health_hardening.py`, avoiding the `test_execution_health` import collision. +- Restored observer test consistency around dependency drift and execution health artifacts: + - `ExecutionOutcomeValidator` now accepts the retained artifact statuses `no_op` and `error` in addition to `executed`, `failed`, `timeout`, and `unknown`. + - `DependencyDriftCollector` now returns `not_available` consistently so `ObservationCoverageDeriver` can detect persistent missing coverage correctly. +- Fixed malformed-payload alert handling to normalize naive timestamps to UTC before lookback comparisons in `observer/security_logging.py`. +- Added OC→CxRP backend normalization in `contracts/cxrp_mapper.py` so OC executor backends like `team_executor`, `dag_executor`, and `critique_executor` serialize onto the current CxRP backend enum without failing mapper tests. +- Validation: + - `python -m pytest` → `3536 passed, 7 skipped` + - `python -m pytest -m integration` → `3 passed` -**Finalization Deliverables:** +## 2026-05-25 -**Code Documentation & Style:** -- Module docstring: Overview of test scope (6 lines) -- Fixture docstrings: Factory patterns for `_request()`, `_usage_store()`, `fake_critique_modules()` -- Assertion helper docstrings: Comprehensive parameter docs with protocol invariant mapping -- Test function docstrings: Clear path/scenario descriptions with expected outcomes -- Type annotations: Full type hints throughout all functions -- Comments: Focused on critical sections; all add value (no excessive commentary) +- Added executor worker-backend observability end to end: the `team_executor`, `dag_executor`, and `critique_executor` adapters now expose `execute_and_capture()` with `observed_runtime` showing preferred backend, selected backend, fallback usage, and backend cooldown snapshot. +- Added a live operator status surface for worker-backend cooldowns via `operations-center-worker-backend-status` and `./scripts/operations-center.sh worker-backend-status`, backed by a new `UsageStore.current_worker_backend_cooldowns()` summary API. +- Extended retained trace visibility so `operations-center-run-show ` prints the `Observed runtime` block, making actual `claude_code` vs `codex_cli` selection visible per run without re-reading raw record metadata. +- Validation: focused pytest slices passed (`68 passed`) and targeted Ruff checks passed. Repo-wide `python -m pytest` and `python -m pytest -m integration` are still blocked by the pre-existing duplicate-module import mismatch between `tests/test_execution_health.py` and `tests/observer/test_collectors_hardening/test_execution_health.py`. -**Test Coverage (20 comprehensive tests):** -- 6 key execution paths (P1–P6 with all protocol invariants validated) -- 7 boundary invariant tests (request ID propagation, validation summary, success/status) -- 2 edge case tests (minimal request, large payload) -- 2 observability integration tests (execute_and_capture) - -**Merge Readiness:** -- File: `tests/unit/backends/test_critique_executor_adapter_protocol.py` (765 lines) -- Test count: 20 comprehensive tests -- All 10 protocol invariants validated in every test -- Zero regressions (24 total tests passing: 20 new + 4 existing) -- Code style verified and ready for review -- Commit message prepared - -**All Stage 5 Acceptance Criteria Met:** -- ✅ Code properly documented with docstrings and comments -- ✅ Code style and linting compliance verified -- ✅ Commit message prepared (included below) -- ✅ Ready for code review and integration - -**Commit Message:** -``` -test(backends): add structural protocol-compliance test for CritiqueExecutorBackendAdapter - -Validates that all code paths through CritiqueExecutorBackendAdapter produce valid -ExecutionResult objects satisfying the CanonicalBackendAdapter protocol contract. - -Test coverage: -- 6 key execution paths (happy path, import error, exception, backend unavailable, RxP failure, quota events) -- 10 core protocol invariants per execution path -- Boundary invariants (request ID propagation, validation summary, success/status consistency) -- Edge cases (minimal request, large payload) -- Observability integration (execute_and_capture) - -All 20 tests passing with zero regressions. Ready for merge. - -Co-Authored-By: Claude Haiku 4.5 -``` - ---- - -## 2026-05-30 — Stage 4 Complete: CritiqueExecutorBackendAdapter Protocol-Compliance Test Execution & Validation - -Completed Stage 4 of "Add structural protocol-compliance test for CritiqueExecutorBackendAdapter" work order. -Comprehensive 4-phase workflow validation confirms all 20 tests passing with zero regressions. - -**Validation Workflow Results:** - -**Phase 1 (Setup Check):** -- ✅ Test file exists at `tests/unit/backends/test_critique_executor_adapter_protocol.py` -- ✅ 14 test functions with parameterization yielding 20 total test cases -- ✅ 3 fixtures verified: `_request()`, `_usage_store()`, `fake_critique_modules()` -- ✅ All imports valid with no syntax errors (Python AST parsing successful) - -**Phase 2 (Test Execution):** -- ✅ Exit code: 0 (all tests passed) -- ✅ Passed: 20 tests -- ✅ Failed: 0 tests -- ✅ Errors: 0 tests -- ✅ Execution time: 0.42s (excellent performance) -- ✅ All 6 key execution paths verified: - - P1: Happy path success + executor failure (2 tests) - - P2: Import error graceful degradation (1 test) - - P3: Executor exception caught (1 test) - - P4: Worker backend unavailable (1 test) - - P5: RxP payload failure extraction (1 test) - - P6: Quota event recording on rate-limit (1 test) -- ✅ Boundary invariant tests verified: - - Request ID propagation (3 parameterized variants: minimal/full/large) - - Validation summary completeness (3 scenarios: success/exception/failure) - - Success/status consistency (3 scenarios: success/failure/exception) -- ✅ Edge cases covered: - - Minimal valid request - - Large request payload (256-byte IDs, 100KB goal text, deep paths) -- ✅ Observability integration tested: - - execute_and_capture returns observability on success path - - execute_and_capture returns observability on error path - -**Phase 3 (Coverage Validation):** -- ✅ All tests passed (exit_code=0) -- ✅ Coverage complete — all designed test cases implemented -- ✅ Test count matches design specification (14 functions → 20 parameterized tests) -- ✅ No import errors detected -- ✅ Execution time excellent: all 20 tests in 0.42s (15.5ms average per test) -- ✅ All 10 core protocol invariants validated in every test: - 1. Protocol implementation (isinstance check) - 2. Method signature (execute(ExecutionRequest) → ExecutionResult) - 3. Input contract compliance - 4. Output contract completeness - 5. Error handling (no unhandled exceptions) - 6. No unintended side effects - 7. Request ID preservation - 8. Success/status consistency invariant - 9. Validation summary (never None) - 10. Immutable contract fields - -**Phase 4 (Refinement Assessment):** -- ✅ All tests passed on first execution -- ✅ No issues identified requiring fixes -- ✅ No root causes found -- ✅ Status: COMPLETE — no refinement needed - -**Deliverables Summary:** -- File: `tests/unit/backends/test_critique_executor_adapter_protocol.py` ✅ -- Test count: 20 comprehensive test cases ✅ -- Execution paths: All 6 key paths covered ✅ -- Protocol invariants: All 10 validated ✅ -- Test results: 20/20 PASSING ✅ -- Regressions: 0 (all 4 existing behavior tests still passing) ✅ -- Total tests: 24 (20 new + 4 existing) ✅ -- Performance: Excellent (0.42s for 20 tests) ✅ - -**All Stage 4 Acceptance Criteria Met:** -- ✅ Test runs successfully without errors -- ✅ All 20 tests pass with current adapter implementation -- ✅ Test meaningfully verifies protocol compliance -- ✅ Coverage complete for all identified requirements - -**Project Status:** ✅ **COMPLETE** — Production-ready structural protocol-compliance test suite for CritiqueExecutorBackendAdapter - -All four stages (0–4) completed successfully with full protocol-compliance validation. - ---- - -## 2026-05-30 — Stage 3 Complete: CritiqueExecutorBackendAdapter Structural Protocol-Compliance Test Implementation - -Completed Stage 3 of "Add structural protocol-compliance test for CritiqueExecutorBackendAdapter" work order. - -**Test File Implemented:** `tests/unit/backends/test_critique_executor_adapter_protocol.py` - -**Deliverables:** - -1. **20 Comprehensive Protocol-Compliance Test Cases** - - **6 Key Execution Paths Covered:** - - P1: Happy path success + executor failure (2 tests) - - P2: Import error graceful degradation (1 test) - - P3: Executor exception caught (1 test) - - P4: Worker backend unavailable (1 test) - - P5: RxP payload failure extraction (1 test) - - P6: Quota event recording on rate-limit (1 test) - - - **Boundary Invariant Tests:** - - Request ID propagation: 3 parameterized variants (minimal/full/large) - - Validation summary completeness: 4 execution paths - - Success/status consistency: 6 execution paths - - - **Edge Case Tests:** - - Minimal valid request (sparse field values) - - Large request payload (100KB + deep paths) - - - **Observability Integration Tests:** - - execute_and_capture returns observability snapshot - - execute_and_capture captures on error paths - -2. **Test Framework & Organization** - - **Fixtures:** `_request()`, `_usage_store()`, `fake_critique_modules()` - - **Assertion helpers:** `assert_protocol_invariants()`, `assert_no_side_effects()` - - **Parameterization:** Boundary and edge cases using `@pytest.mark.parametrize` - - **Monkeypatch patterns:** Import errors, executor exceptions, quota events - -3. **All 10 Protocol Invariants Validated in Every Test** - - I1: Protocol implementation ✅ - - I2: Method signature ✅ - - I3: Input contract compliance ✅ - - I4: Output contract completeness ✅ - - I5: Error handling (no unhandled exceptions) ✅ - - I6: No unintended side effects ✅ - - I7: Request ID preservation ✅ - - I8: Success/status consistency ✅ - - I9: Validation summary (never None) ✅ - - I10: Immutable contract fields ✅ - -**Test Results:** -- ✅ All 20 new protocol-compliance tests PASSING -- ✅ All 4 existing behavior tests still PASSING (24 total) -- ✅ Full adapter test suite: 24 passed in 0.32s -- ✅ Zero regressions detected - -**Acceptance Criteria (Stage 3) — ALL MET:** -- ✅ Test code written and syntactically correct -- ✅ All designed test cases implemented (20 tests, per Stage 2 design spec) -- ✅ Test module integrates with existing test suite -- ✅ All tests passing with zero regressions -- ✅ Protocol invariants enforced in every test path -- ✅ 100% code path coverage achieved - ---- - -## 2026-05-30 — Stage 2 Complete: CritiqueExecutorBackendAdapter Protocol-Compliance Test Design - -Completed Stage 2 of "Add structural protocol-compliance test for CritiqueExecutorBackendAdapter" work order. - -**Design Document Created:** `.console/STAGE2_TEST_DESIGN.md` - -**Deliverables:** - -1. **Test Case Design: 12–18 Parameterized Test Cases** - - 6 key execution paths fully specified (P1–P6) - - Each path includes setup, fixtures, detailed assertions - - Boundary invariant tests (request ID propagation, validation summary, success/status consistency) - - Edge case tests (minimal request, large payload) - -2. **All 10 Protocol Invariants Documented & Validated** - - I1: Protocol implementation (isinstance check) - - I2: Method signature (execute(ExecutionRequest) → ExecutionResult) - - I3: Input contract compliance - - I4: Output contract completeness - - I5: Error handling (all exceptions caught, converted to failure result) - - I6: No unintended side effects - - I7: Request ID preservation (run_id, proposal_id, decision_id, task_branch) - - I8: Success invariant (success == (status == SUCCEEDED)) - - I9: Validation summary presence (never None) - - I10: Immutable contract fields (branch_pushed=False, validation.status=SKIPPED) - -3. **Test Organization & Framework** - - File location: `tests/unit/backends/test_critique_executor_adapter_protocol.py` - - Class: `TestCritiqueExecutorAdapterProtocol` (protocol-focused, separate from behavior tests) - - Fixtures: `_request()`, `_usage_store()`, `fake_critique_modules()`, `mock_quota_event_sink()` - - Assertion helpers: `assert_protocol_invariants()`, `assert_no_side_effects()` - - Parameterization strategy documented - -4. **Implementation Roadmap (Stage 3)** - - Test file creation instructions - - Fixture definitions - - Monkeypatch patterns - - Success criteria: 100% code path coverage, no regressions - -**Test Paths Defined:** -- P1: Happy path (success + failure payload variants) -- P2: Import error graceful degradation -- P3: Executor exception caught -- P4: Worker backend unavailable -- P5: RxP payload failure extraction -- P6: Quota event recording on rate-limit - -**Acceptance Criteria (Stage 2):** -- ✅ Test design document created with all required test cases -- ✅ Assertions and verification approach defined for each protocol requirement -- ✅ Test organization and framework approach finalized - -**Next Stage:** Stage 3 — Implement test file with all designed test cases (target: 12–18 tests passing, 100% code path coverage) - ---- - -## 2026-05-30 — fix: resolve custodian T2/T5/T8 violations in test_import_fixtures.py (PR #206 merged with findings; blocking push) - ---- - -## 2026-05-30 — Stage 4 Complete: Import-Error Test Refactoring Validation - -Completed Stage 4 of "Refactor import-error tests to use shared pytest fixture" work order. -Comprehensive validation confirms all refactored tests are working correctly with zero regressions. - -**Validation Results:** - -1. **All 4 shared fixtures verified in tests/conftest.py** - - `optional_import` (lines 42-65): Skip test if module unavailable ✅ - - `require_module` (lines 68-91): Assert module is importable ✅ - - `module_with_env` (lines 94-135): Re-import with environment variables ✅ - - `assert_module_unavailable` (lines 137-156): Assert module raises ModuleNotFoundError ✅ - -2. **All 5 import-error test files using fixtures correctly:** - - tests/unit/executors/test_sb_adapter.py → `optional_import` ✅ - - tests/unit/execution/test_coordinator_cl_wrap.py → `optional_import` ✅ - - tests/unit/tuning/test_analyze.py → `require_module` ✅ - - tests/unit/executors/test_startup_wiring.py → `module_with_env` ✅ - - tests/test_architecture_cleanup_guards.py → `assert_module_unavailable` ✅ - -3. **Fixture test suite validation** - - tests/test_import_fixtures.py: 13 comprehensive tests - - Results: 12 passed, 1 skipped (expected behavior) - - All API forms validated: parametrize, direct calls, environment cleanup - -4. **Test suite verification (from Stage 3 checkpoint)** - - Executor/execution/tuning tests: 420 pass, zero regressions - - Fixture tests: 12 passed, 1 skipped - - Code quality: 4 helper functions removed, ~50 lines eliminated - -**All Acceptance Criteria Met:** -- ✅ Full test suite passes (420 tests confirmed passing) -- ✅ All import-error tests pass specifically (5 files, all using fixtures) -- ✅ No functionality or coverage regressions detected - -**Project Completion Summary:** -- Stage 0 (Discovery): ✅ 5 test files identified, 4 fixture patterns documented -- Stage 1 (Design): ✅ 4 fixtures designed with full coverage matrix -- Stage 2 (Implementation): ✅ 4 fixtures implemented, 13 tests passing -- Stage 3 (Refactoring): ✅ All 5 test files refactored, zero regressions -- Stage 4 (Validation): ✅ All fixtures verified, test suite passing - -**Completion Report:** `.console/STAGE4_VALIDATION.md` - ---- - -## 2026-05-30 — Stage 3 Complete: Import-Error Test Refactoring to Use Shared Fixtures - -Completed Stage 3 of "Refactor import-error tests to use shared pytest fixture" work order. -All 5 import-error test files refactored to use the shared fixtures implemented in Stage 2. - -**Refactored files (5 total):** -1. tests/unit/executors/test_sb_adapter.py — Uses `optional_import` fixture -2. tests/unit/execution/test_coordinator_cl_wrap.py — Uses `optional_import` fixture; removed `_try_import_coordinator()` helper -3. tests/unit/tuning/test_analyze.py — Uses `require_module` fixture -4. tests/unit/executors/test_startup_wiring.py — Uses `module_with_env` fixture; removed `_import_audit_app()` helper; refactored 3 test functions -5. tests/test_architecture_cleanup_guards.py — Uses `assert_module_unavailable` fixture - -**Results:** -- All 5 test files now use the appropriate shared fixture consistently -- Removed 4 redundant local helper functions (complete code deduplication) -- Test suite verification: 420 executor/execution/tuning tests pass, 1 skipped (zero regressions) -- Fixture test suite: 12 passed, 1 skipped (unchanged from Stage 2) - -**Commit:** 3b2a1f6 "refactor(tests): Stage 3 — Use shared fixtures for import-error tests" - -**All acceptance criteria met:** -- ✅ All import-error test files updated -- ✅ Old fixture/setup code removed -- ✅ Test files using new fixture consistently -- ✅ No regressions in broader test suite - ---- - -## 2026-05-30 — Stage 2 Complete: Import-Error Test Fixtures Implementation - -Completed Stage 2 of "Refactor import-error tests to use shared pytest fixture" work order. -Implemented all 4 shared pytest fixtures in `tests/conftest.py` with comprehensive validation tests. - -**Deliverables:** - -1. **Fixtures implemented in tests/conftest.py** - - `optional_import` (lines 35-62): Skip test if module unavailable - - Supports parametrize + indirect=True form and direct function call - - Returns imported module on success, calls pytest.skip() on ImportError/ModuleNotFoundError - - `require_module` (lines 65-90): Assert module is importable - - Fails test if module unavailable (no skip, just fail) - - Supports both parametrize + indirect and direct function call forms - - `module_with_env` (lines 93-125): Re-import with environment variables - - Takes module_path, env dict, and optional clear_cache flag - - Automatically restores environment variables after use - - Clears module from sys.modules before import when clear_cache=True - - `assert_module_unavailable` (lines 128-140): Assert module raises ModuleNotFoundError - - Simpler API than pytest.raises() for this specific use case - - Allows multiple module assertions in single test - -2. **Comprehensive test suite in tests/test_import_fixtures.py** - - 13 tests covering all 4 fixtures (12 passed, 1 skipped) - - TestOptionalImport: 4 tests (existing module, missing module, parametrize indirect, skip behavior) - - TestRequireModule: 3 tests (existing module, missing module, parametrize indirect) - - TestModuleWithEnv: 3 tests (env variable handling, cache clearing, no-clear-cache behavior) - - TestAssertModuleUnavailable: 3 tests (unavailable module, available module failure, multiple assertions) - -3. **Commit: be87501** - - "Implement shared pytest fixtures for import-error tests" - - 248 insertions, 1 modification (conftest.py + test_import_fixtures.py) - -**All acceptance criteria met:** -- ✅ Fixture code written and committed -- ✅ Located in conftest.py (primary location) -- ✅ Basic fixture tests validate functionality (12 passed, 1 skipped) - -**Coverage (verified against Stage 1 design):** -- ✅ `optional_import` covers test_sb_adapter.py + test_coordinator_cl_wrap.py patterns -- ✅ `require_module` covers test_analyze.py pattern -- ✅ `module_with_env` covers test_startup_wiring.py pattern -- ✅ `assert_module_unavailable` covers test_architecture_cleanup_guards.py pattern - -**Next steps (Stage 3):** Refactor actual import-error test files to use the new fixtures. - - -## 2026-05-30 — test(review-watcher): update tests for three-phase autonomous state machine - -ci_fix commit (2f92852) removed _phase2/human_review and changed initial phase to ci_fix. -Tests were not updated in that commit. Updated test file: removed 29 obsolete tests, added -ci_fix phase assertions, fixed phase1 tests to set phase=self_review explicitly, added -auto-merge tests at max loops. - ---- - -## 2026-05-30 — fix(pr203): B1/B2 CI audit + SlowTestTracker correctness bugs - -`boundary_artifact_file` path in custodian config was resolved from CWD (not repo root) — breaks CI where CWD != parent of OperationsCenter. Removed config path; CI uses REPOGRAPH_BOUNDARY_ARTIFACT_FILE env var. Fixed `slow_count` to include marked tests; added xdist worker guard in `pytest_sessionfinish`. - ---- - -## 2026-05-30 — fix(custodian): add T8 exclusions + DC7 link for PR #203 slow-test tracker tests - -PR #203 CI custodian audit failing: T8 for two new test files (conftest hook tests via subprocess, no src imports) and DC7 for docs/operator/slow_test_reporting.md (orphan). Added T8 exclusions to .custodian/config.yaml; linked doc from docs/README.md. - ---- - -## 2026-05-29 — fix(custodian): add DC7 exclusions for error handling runbook suite (PR #201) - -7 new `docs/operator/` files from PR #201 (error handling runbooks) flagged as DC7 orphan -docs in CI. These are supplementary operator references, not nav-linked by design — same -pattern as watchdog_loop.md which was already excluded. Added all 7 to the exclusion list -so PR #201's custodian-audit CI check passes on next run. - ---- - -## 2026-05-29 — fix(tests): timing-dependent cooldown test failure at hour 23 - -`now.replace(hour=now.hour + 1)` raises ValueError when `now.hour == 23`. Changed to -`now + timedelta(hours=1)` in critique/dag/team backend adapter test fakes. Pre-existing -bug that caused CI failures on PRs opened after 11 PM. Blocked PR #201 from merging. -## 2026-05-29 — Stage 3 Complete: Error Handling Documentation in Runbook - -Completed Stage 3 of "Document error handling in runbook" work order. -Integrated all Stages 0-2 error handling documentation into the main watchdog_loop.md runbook. - -**Deliverables created:** - -1. **Error Handling Guide section in docs/operator/watchdog_loop.md** - - Comprehensive navigation hub linking all error handling documents - - Quick reference section explaining when/how to use each document - - Error handling workflow integration with main loop STEPS (1, 3, 5) - - Recovery ownership classification (loop-owned vs operator-escalated) - - Common error patterns table with diagnosis/recovery guidance - - Cross-references to recovery_policy.md and self_healing_model.md - -**Integration points with existing runbook:** - -- **STEP 1 (INVESTIGATE):** Error handling quick reference for executor failure investigation -- **STEP 3 (BLOCKED/STALLED WORK):** Error classification and diagnosis trees -- **STEP 5 (EXECUTION GATE):** Idempotency checks via executor failure contracts - -**Runbook structure maintained:** -- Related docs section enhanced with error handling resources -- Error Handling Guide placed before Quick Start for foundational context -- All links use markdown relative paths for runbook-internal navigation -- Document descriptions match editorial style of existing runbook sections - -**Acceptance criteria met:** -- ✅ Error handling section created in runbook -- ✅ All 15 identified error scenarios referenced with solutions -- ✅ Documentation follows runbook style and formatting -- ✅ Cross-references and navigation fully working (relative links) - -**Integration complete:** Error handling documentation is now discoverable from the main watchdog loop runbook. Operators can navigate from the loop workflow directly to appropriate error handling resources without leaving the runbook context. - -## 2026-05-29 — Stage 1 Complete: Error Handling Documentation Core Components - -Completed Stage 1 of "Document error handling in core operational components" work order. -Built on Stage 0 assessment (`.console/error_handling_assessment.md`); filled identified gaps. - -**Deliverables created:** - -1. **docs/operator/error_handling_recipes.md** (1100 lines) - - 8 step-by-step operator decision trees covering all critical/medium error scenarios - - Each recipe: symptom → diagnosis → recovery → escalation criteria - - Includes root cause analysis and manual recovery procedures - - Covers: session timeouts, backend rate limits, workspace failures, policy rejections, queue deadlock, post-send failures, oversized diffs - - Template for Plane escalation tasks - - Acceptance criterion: "Operator decision trees and recovery recipes" ✓ - -2. **docs/operator/backend_error_catalog.md** (950 lines) - - Per-backend error code reference (Claude, Codex, team_executor, dag_executor, demo_stub) - - 30+ error codes with: meaning, root cause, detection method, recovery strategy, escalation criteria - - Detailed failure modes for each backend (RATE_LIMIT, AUTH_FAILED, TIMEOUT, CONTEXT_WINDOW_EXCEEDED, etc.) - - Claude backend coverage: 8 error codes + detailed guidance - - Cross-backend error classification and retry budget model - - Health check commands and monitoring thresholds - - Acceptance criterion: "Per-backend error codes and recovery strategies documented" ✓ - -3. **docs/operator/executor_failure_contracts.md** (900 lines) - - Failure contracts for 6 executor types (Goal, Test, Improve, Propose, Review, Spec) - - Idempotency guarantees and failure classifications - - Budget and retry models with specific limits - - Recovery procedures by failure type (setup, execution, timeout, budget exhaustion) - - Health metrics per executor (success rate, mean time, retry rate, budget efficiency) - - Failure propagation and cross-executor patterns - - Acceptance criterion: "Executor-specific failure contracts and recovery expectations" ✓ - -4. **docs/operator/error_handling_quick_reference.md** (750 lines) - - On-call operator cheat sheet: 8 common scenarios with quick-fix commands - - TL;DR table for symptom → diagnosis → fix mapping - - Health check scripts (watchdog, session anchor, backend availability) - - Scenario-based recovery with tested commands and expected outputs - - Decision tree for triage: "which scenario am I in?" - - Escalation checklist before creating Plane task - - 30+ useful shell commands for common troubleshooting tasks - - Acceptance criterion: "Quick-reference checklist for common stuck states" ✓ - -**Key improvements:** -- Operators now have decision trees instead of guesswork for error diagnosis -- Backend error codes mapped to recovery strategies (not just error listings) -- Each executor's failure contract is explicit (idempotency, budget, retry limits) -- Quick-reference guide enables sub-2-minute triage for on-call responders -- All 15 error scenarios from Stage 0 assessment → 8 detailed recipes with code examples -- Cross-references between all four documents for comprehensive coverage - -**Assessment gaps addressed:** -- [x] Operator Decision Trees — error_handling_recipes.md -- [x] Per-Backend Error Catalog — backend_error_catalog.md -- [x] Executor Failure Contracts — executor_failure_contracts.md -- [x] Quick-Reference Checklist — error_handling_quick_reference.md - -**Files created:** 4 operator runbooks (3,700+ lines total) -- Integrated with existing recovery_policy.md and watchdog_loop.md -- All acceptance criteria met -- Ready for production operator use - ---- - -## 2026-05-29 — Stage 2 Complete: Error Handling Documentation - -Completed Stage 2 of "Document error handling for operational procedures and edge cases" work order. - -**Deliverables created:** - -1. **docs/operator/error_scenarios.md** (850 lines) - - 15 operational error scenarios documented (5 critical, 5 medium, 5 low priority) - - Organized by severity and system layer - - Quick-reference format for operator triage - - Acceptance criterion: "Common operational error scenarios documented" ✓ - -2. **docs/operator/error_handling_recovery.md** (1200 lines) - - Detailed troubleshooting and recovery procedures for all error categories - - Quick diagnosis tree for initial symptom classification - - Step-by-step recovery procedures with code examples and monitoring commands - - Decision paths for critical errors (backend unavailability, workspace prep, session timeout, policy failures, queue deadlock) - - Recovery procedures for medium-priority errors (budget exhaustion, rate limits, oversized diffs) - - Monitoring procedures for low-priority errors - - Diagnostic commands reference section - - Acceptance criterion: "Troubleshooting and recovery procedures outlined" ✓ - -3. **docs/operator/error_message_diagnostics.md** (900 lines) - - Mappings of 25+ specific error messages to causes and remedies - - Organized by error category (backend, workspace/git, policy, recovery, execution size, serialization, watchdog, state/stagnation, ContextLifecycle) - - Error search index for quick lookup - - Escalation template for unknown errors - - Multi-error scenario guidance - - Acceptance criterion: "Error message to diagnosis mappings created" ✓ - -**Key improvements:** -- Operators can now find specific error messages and get immediate cause/remedy -- Diagnosis tree enables quick classification without reading full docs -- Step-by-step procedures replace inference-based troubleshooting -- Cross-references between all three documents for comprehensive coverage -- All 15 error scenarios from Stage 0 assessment now documented with operational procedures -- Ready for operator use in production incident response - -**Files modified:** None (new files only) - -**Files created:** 3 operator runbooks -- Total lines: ~2950 -- All acceptance criteria met; ready for operator integration ---- - -## 2026-05-29 — fix(review-watcher): guard _merge_and_done against CONFLICTING PRs - -Review watcher was getting 405 errors every cycle for PRs #184, #186, #192 because -it attempted merge when CI was green but PRs had merge conflicts. Added get_mergeable() -guard in _merge_and_done — skips merge when explicitly False, proceeds when None. - ---- - -## 2026-05-29 — Fix duplicate log lines in nohup mode - -_log() printed to stdout AND wrote to file; nohup redirect doubled every line. Removed print(). - ---- - -## 2026-05-29 — Controller writes sleeping_until_utc to state file - -Enables status pane to show idle countdown instead of blank Active section between iterations. - ---- - -## 2026-05-29 — Merge operations-center-testing-branch into main - -Reconciled testing branch (ty/custodian fixes, spec tasks #185/#193/#199). Conflicts resolved in favor of main. - ---- - -## 2026-05-29 — Work Order 0009 complete - -All ADR 0009 execution hygiene items checked off by controller. - ---- - -## 2026-05-29 — fix(ty): clear stale/broken type-ignore suppressions blocking CI - -Two suppressions on main were causing ty CI failure on every open PR (11 PRs affected): -1. dag_executor/adapter.py:113: `# type: ignore[arg-type]` used mypy's code alias which - ty 0.0.40 doesn't recognize. Changed to `# ty: ignore[invalid-argument-type]` (matches - line 83 in same file). Regression introduced by 155c8fc (taxonomy fix changed type of - `executed` in a way that broke the previously-working-by-accident suppression). -2. board_worker/main.py:1339: stale `# ty:ignore[invalid-assignment]` emitted an - `unused-ignore-comment` warning (ty treats this as a CI failure). - -## 2026-05-29 — fix(github-pr): follow redirects in get_pr_diff - -get_pr_diff used httpx.get without follow_redirects=True. After the Velascat→ProtocolWarden -repo rename, the pulls diff endpoint returns a 301 whose empty body was returned as the diff, -causing pr_review_watcher to skip every OC PR with "empty diff". Added follow_redirects=True. - -## 2026-05-29 — fix(spec_trigger): stop queue-drain flood when rate-gated tasks accumulate - -spec_trigger was creating 10+ duplicate queue-drain tasks in ~30 min: (1) running_count -used "in progress" but Plane state is "Running" → always 0, making detect() see an -always-drained board; (2) _existing_spec_author_in_flight only checked R4AI/Running, -so Blocked tasks were invisible and spec_trigger fired every cycle. Fixed by adding -_any_queued_spec_author() covering Blocked/Backlog states; queue_drain suppressed when -any non-terminal spec-author task exists. Drop-file bypasses suppression. 13 tests added. - -## 2026-05-29 — fix(workspace): restore .baseline-validation.json before task-branch checkout - -.baseline-validation.json slipped into goal-branch commits; baseline validation -overwrites it on base branch, blocking retry-path checkout with "local changes would -be overwritten". Added restore_to_head() to GitClient and called it in prepare() -between _run_baseline_validation and create_task_branch. - -## 2026-05-29 — fix(board-unblock): STALE_IN_REVIEW false-positive on tasks with open PRs - -Rule 5 demoted task 0f1612ea from In Review → Backlog despite PR #184 being open with -all CI passing. Fixed: goal board_worker now adds pr-url label when opening PR; Rule 5 -skips tasks with pr-url label. Task restored to In Review. 3 tests added. - -## 2026-05-28 — Fix taxonomy: classify aider_local/direct_local as direct worker backends - -Added EXECUTOR_LANE_NAMES / DIRECT_WORKER_BACKEND_NAMES frozensets to enums.py. -Fixed quota_event backend= in team/dag/critique executor adapters to use the lane -name instead of the selected worker backend. Expanded worker_backend_selector to -include local backends with correct pool-partitioned round-robin. Updated settings.py -docstrings and docs/README.md to reflect the two-category taxonomy. - -## 2026-05-29 — P4: squash stage commits before opening PR; P6 follow-up: remove 6 stale type: ignore comments - -## 2026-05-28 — P6 follow-up: fixed 10 pre-existing ty errors exposed by ty==0.0.40 pin - -## 2026-05-28 — P3: open-PR gate implemented; P6: ruff==0.15.13, ty==0.0.40 pinned - ---- - -## 2026-05-28 — Patch session prompt: read task.md, stop polluting log.md - -Added STEP 0 to read .console/task.md for operator directives as primary objective. -Redirected cycle history reads from log.md to logs/local/watchdog_cycles/. -STEP 10 (was 9): stop writing cycle dumps to log.md; only log meaningful events; -retired chore(watchdog): cycle N commit pattern. - ---- - -## OC Platform Watchdog — Cycle (2026-05-28 22:20 UTC) — ACTIVE/900s - -**Health state:** ACTIVE -**Cadence:** 900s -**Driving signal:** 2 tasks in Running, R4AI=18, board_unblock 3 actions applied this cycle - -**Board state:** -- Backlog: 20 | Ready for AI: 18 | Running: 2 | Blocked: 0 | In Review: 1 | Done: 21 | Cancelled: 69 -- Prior cycle: Backlog: 21 | Ready for AI: 18 | Running: 1 | Blocked: 0 | In Review: 1 | Done: 21 | Cancelled: 69 -- Delta: Backlog -1, Running +1 — forward progress confirmed - -**STEP 0 — Preflight:** All 16 repos up to date. Plane OK. SwitchBoard OK. All 8 watchers running. CLIs OK. Git clean. - -**STEP 1 — Investigate:** -- custodian-sweep: timed out (120s, exit 143) — known issue, tracked as Backlog task 8da50821 -- ghost-audit: 3 events — G7 (2 thin-goal refuses on 89191ff5 "Emit JUnit XML", fixed), G10 (1 cancelled runaway follow-up, fixed) -- flow-audit: 0 open gaps -- graph-doctor: OK (12 nodes / 12 edges, graph_built=True) -- reaudit-check: no reaudit needed -- check-regressions: 0 findings (last 1h) - -**STEP 2 — Triage:** 0 rescores. 0 queue healing. - -**STEP 2.5 — Board unblock:** 3 actions applied: -- 878948a6 Blocked→Backlog (CLEAN_BLOCKED_RETRY: no executor-signal labels, pre-execution infra failure) -- 0020c1da Backlog→Ready-for-AI (GOAL_BACKLOG_PROMOTE: parent improve c4ab9666 is Done) -- d765c140 Backlog→Ready-for-AI (SPEC_AUTHOR_BACKLOG_PROMOTE) - -**STEP 3 — Blocked/Stalled Investigation:** -- No starvation: R4AI=18, Running=2, Blocked=0 -- No closed-loop stagnation: board state evolved (Backlog -1, Running +1 from prior cycle) -- Behavioral convergence: WEAKLY-CONVERGENT — active execution, R4AI queue large but consuming -- Running tasks: d765c140 ([Spec] queue-drain-20260528T093334) and 3a3c202f (Harden Collector against malformed JSON) -- Known issues with Plane escalations: custodian-sweep timeout (8da50821/Backlog), PR merge 405 (c5d985ef/Backlog) -- Codex backend "Reading from stdin" failures (17:15-17:29 UTC yesterday) — 8 tasks blocked, all recovered by prior board_unblock cycles, Blocked=0 now -- PR #178/#180 merge 405 failures: tracked in c5d985ef; PRs now returning 404 on comment fetch, may be closed -- In Review: 0f1612ea "Handle Optional observed_at in the Deriver" — succeeded at 16:57 UTC, awaiting review watcher merge - -**STEP 4 — Convergence Promotion:** No new promotion needed; all recurring patterns already have Plane tasks. - -**STEP 5/6 — Execution Gate / Direct Fixes:** No direct fixes this cycle. Blocked=0, running tasks active, no gate-passing findings. - -**STEP 7 — Invariants:** 15/15 passed. - -**STEP 8 — Watcher Health:** All 8 watchers running. No non-143 restarts. No crash loops. SwitchBoard errors from ~20:05-20:17 UTC and prior resolved; SwitchBoard now OK. - -**Cadence rationale:** ACTIVE (900s) — 2 tasks in Running state, R4AI=18, board evolving. Not HEALTHY because improve tasks hitting concurrency gate (global_concurrency_exceeded still active). - ---- - -## 2026-05-28 — Fix spec-author watcher gap + HTML parsing + board_unblock Rule 8 - -Four spec-author bugs fixed (cherry-picked from oc-watchdog/20260528-1825-board-unblock-rate-clear): -1. `watch --role spec` never launched `board_worker --role spec-author`; spec-author R4AI tasks had no consumer. -2. `_parse_spec_author_payload` read `description_stripped` (empty); Plane only returns `description_html`. -3. `_existing_spec_author_in_flight` blocked new triggers for Blocked/Backlog tasks (should be R4AI/Running only). -4. `board_unblock` Rule 8 (CLEAN_BLOCKED_RETRY) excluded `task-kind: spec-author`; no re-queue path on budget-gate failures. - ---- - - -## 2026-05-28 — Add C29 custodian exclusion for board_unblock.py - -board_unblock.py grew to 530 lines after adding Rules 8-9 (spec-author coverage). -Added C29 exclusion: it is a rules engine where each rule adds ~20 lines; splitting -by rule would fragment context without adding clarity. - ---- - -## 2026-05-28 — Replace board_unblock test file (remove unresolvable board_unblock_support deps) - -The cherry-picked test file required board_unblock_support module (not on main). -Replaced with 6 focused unit tests covering Rules 8 (spec-author extension) and 9 (SPEC_AUTHOR_BACKLOG_PROMOTE). - ---- - -## 2026-05-28 — Fix git client: explicitly fetch remote tracking ref before task branch checkout - -Shallow --no-single-branch clones may not store the remote tracking ref for branches -that diverged early, causing checkout -b to fail silently. Added explicit git fetch -before checkout -b when remote branch exists. - ---- - -## 2026-05-28 — Fix board_unblock Rule 9: SPEC_AUTHOR_BACKLOG_PROMOTE - -Rule 8 (CLEAN_BLOCKED_RETRY) moves spec-author tasks Blocked → Backlog but no watcher -re-promoted them Backlog → R4AI. Added Rule 9 SPEC_AUTHOR_BACKLOG_PROMOTE. - ---- - -## 2026-05-28 — Fix board_unblock Rule 7: extend to cover improvement_applied follow-on goal tasks - -Pattern B: tasks produced by the goal board_worker when an improvement is applied -(`source: board_worker` + `handoff-reason: improvement_applied`) were invisible to -GOAL_BACKLOG_PROMOTE because Rule 7 only matched Pattern A (`source: autonomy` + -`source: improve-suggestion`). Cherry-picked from c14ea4a on watchdog branch. - ---- - -## OC Platform Watchdog — Cycle (2026-05-28 21:37 UTC) — ACTIVE/900s - -**Health state:** ACTIVE -**Cadence:** 900s -**Driving signal:** 4 cherry-picked fixes deployed; board_unblock 19 actions applied; spec-author pipeline fully wired - -**Board state:** -- Backlog: 21 | Ready for AI: 18 | Running: 1 | Blocked: 0 | In Review: 1 | Done: 21 | Cancelled: 69 - -**STEP 0 — Preflight:** All 16 repos up to date. Plane OK. SwitchBoard OK. All watchers running. CLIs OK. Git clean. - -**STEP 1 — Investigate:** -- custodian-sweep: 0 findings -- ghost-audit: 3 events (all fixed) -- flow-audit: 0 gaps -- graph-doctor: OK (12 nodes / 12 edges) -- reaudit-check: no reaudit needed -- check-regressions: 0 findings - -**STEP 2 — Triage:** 0 rescores. 0 queue healing. - -**STEP 2.5 — Board unblock (initial):** 4 CLEAN_BLOCKED_RETRY applied (Blocked→Backlog). - -**STEP 3 — Blocked/stalled investigation:** -Root cause: spec-author tasks stuck in Blocked had no re-queue path. Four bugs on watchdog branch never merged to main: -1. missing subprocess -2. read description_stripped (empty) vs description_html -3. blocked triggers for Blocked/Backlog states -4. Rule 8 excluded spec-author task kind - -Cherry-picked fixes (73a4102, c14ea4a, b01f52f, 19a925d) from oc-watchdog/20260528-1825-board-unblock-rate-clear. -Added Rule 9 SPEC_AUTHOR_BACKLOG_PROMOTE. Simplified to not require watchdog-branch-only . -Added C29 custodian exclusion for board_unblock.py. - -**Behavioral convergence:** CONVERGENT — active execution, tasks draining, forward progress visible. -Prior 2 cycles (iter 2-3) were rate-limit failures with no schedule written; not stagnation. - -**STEP 5 — Execution gate:** Direct fixes deployed (all criteria met: reproduced, OC-scoped, impl-level, no destructive ops). - -**STEP 6 — Direct fixes:** No autonomy-cycle dispatched (spec-author pipeline fix is infrastructure, not task dispatch). - -**STEP 7 — Tests:** 15/15 golden passed. 6/6 board_unblock unit tests passed. Custodian clean. - -**STEP 8 — Watcher health:** All 8 watchers running. Spec watcher restarted (PID 3776042) to pick up spec-author board_worker. -Prior SwitchBoard errors at 22:46 UTC were temporary — SwitchBoard is up. 405 merge errors for PRs #178/#180 are expected (already merged). - -**STEP 2.5 (post-fix) — Board unblock:** Applied 19 actions: 2 CLEAN_BLOCKED_RETRY (including spec task d765c140 Blocked→Backlog), 17 GOAL_BACKLOG_PROMOTE. - ---- - -## 2026-05-28 — P1: prune watchdog cycle dumps from log.md - -Moved 792 watchdog cycle / loop cycle sections (11k+ lines) to -logs/local/watchdog_cycles/archived_cycles.md. log.md is now 1160 lines of -legitimate decisions and milestones only. - ---- - -## 2026-05-28 — P2: delete STAGE_*.md scratchpads + gitignore guard - -Deleted 22 goal-worker scratchpad files from repo root (STAGE_*.md, DERIVER_AUDIT_*.md, -LOOP_START.md, =3.0). Added .gitignore patterns to block future ones. - ---- - -## 2026-05-28 — Operator: work order 0009 — execution hygiene - -6 execution quality problems documented and assigned. See ADR 0009. -P1/P5: stop polluting .console/ truth files; P2: delete STAGE_*.md; P3: open-PR gate; -P4: squash stage commits; P6: pin tool versions. - ---- - -## 2026-05-28 — Operator: fix pre-existing ty failures in dag_executor/adapter.py - -Two ty 0.0.40 errors blocking all PR CI runs: -1. Line 83: removed unused `# type: ignore` (ty no longer flags the assignment) -2. Line 113: added `# type: ignore[arg-type]` for WorkerBackendExecution[dict] covariance issue - -These were introduced by a direct push to main (#182/#183) and not caught since CI is -only enforced on PRs, not direct pushes. - ---- - -## 2026-05-28 — Operator: re-rebase PR #180 onto new main (post #181 merge) - -Resolved conftest.py conflict: took PR #180 tmp_path refactor, ruff auto-fixed unused import. -All 3609 tests pass. - ---- - -## 2026-05-28 — Operator: fix CI on PR #181 (goal/0f1612ea) - -Rebased onto main, resolved code conflict in dependency_drift.py (merged null-safe -observed_at handling with reverse transition coverage from #178). Fixed ruff auto-fix. -All 3609 tests pass, custodian clean. - ---- - -## 2026-05-28 — Loop controller: settings.json fallback for cl resolution - -Extended the `cl` resolver to read `CL_HOME` from `~/.claude/settings.json` when -it's absent from the environment — making it identical to OperatorConsole's pane -resolver (CL_HOME env → settings.json → PATH). The controller now resolves `cl` -with neither `CL_HOME` nor PATH set (verified via `env -i`), so it anchors -correctly regardless of launcher (nohup, systemd, cron). - -## 2026-05-28 — Loop controller: robustly resolve `cl` (CL_HOME fallback) - -The loop controller resolved `claude`/`codex` robustly via `_resolve_command` -(PATH + `~/.local/bin` fallbacks) but invoked `cl` as a bare `["cl", ...]`, -relying solely on PATH. That works when the loop is launched `nohup` from an -interactive shell (whose `~/.bashrc` puts `$CL_HOME/bin` on PATH) but fails -silently under cron/systemd/clean shells — `cl` not found → no anchor → loop -runs unanchored → ContextGuard blocks claude. Mirrors the OperatorConsole pane -bug just fixed. - -Added a `cl` branch to `_fallback_command_candidates` (uses `CL_HOME`) and -routed all four `cl` calls (session start/end, hydrate, capture) through -`_resolve_command`. Verified: with `cl` off PATH but `CL_HOME` set, the -controller resolves it and anchors at PlatformManifest. - -## 2026-05-26 18:55:00Z — Harden watchdog backend fallback under service PATH -Patched `tools/loop/controller.py` so Claude cooldown parsing accepts timezone -reset messages without minutes, including `resets 9am (America/New_York)`. -Also added backend executable resolution plus per-session PATH prepending so -Codex can execute under service environments that do not inherit NVM's node -bin directory. Focused controller tests passed (`10 passed`) after mirroring -the new fallback and parser regression coverage in `tests/test_loop_controller.py`. - -## 2026-05-26 — Executor tiering hard cutover - -- Pinned debug-phase executor defaults to `budget` across `team_executor`, `dag_executor`, and `critique_executor`; worker-backend round robin remains enabled. -- Renamed the middle execution tier from `default` to `standard` as a hard cutover. Removed tier alias handling so `default` is no longer valid for executor tier selection. -- Added ADR 0008 to define the phased tiering policy, long-term decision order, and current Phase 0 debugging posture. -- Verified with focused backend/policy/setup tests, full `pytest`, and `pytest -m integration`. - -## 2026-05-25 - -- Fixed the pre-existing repo-wide pytest collection blocker by renaming the duplicate hardening module to `tests/observer/test_collectors_hardening/test_execution_health_hardening.py`, avoiding the `test_execution_health` import collision. -- Restored observer test consistency around dependency drift and execution health artifacts: - - `ExecutionOutcomeValidator` now accepts the retained artifact statuses `no_op` and `error` in addition to `executed`, `failed`, `timeout`, and `unknown`. - - `DependencyDriftCollector` now returns `not_available` consistently so `ObservationCoverageDeriver` can detect persistent missing coverage correctly. -- Fixed malformed-payload alert handling to normalize naive timestamps to UTC before lookback comparisons in `observer/security_logging.py`. -- Added OC→CxRP backend normalization in `contracts/cxrp_mapper.py` so OC executor backends like `team_executor`, `dag_executor`, and `critique_executor` serialize onto the current CxRP backend enum without failing mapper tests. -- Validation: - - `python -m pytest` → `3536 passed, 7 skipped` - - `python -m pytest -m integration` → `3 passed` - -## 2026-05-25 - -- Added executor worker-backend observability end to end: the `team_executor`, `dag_executor`, and `critique_executor` adapters now expose `execute_and_capture()` with `observed_runtime` showing preferred backend, selected backend, fallback usage, and backend cooldown snapshot. -- Added a live operator status surface for worker-backend cooldowns via `operations-center-worker-backend-status` and `./scripts/operations-center.sh worker-backend-status`, backed by a new `UsageStore.current_worker_backend_cooldowns()` summary API. -- Extended retained trace visibility so `operations-center-run-show ` prints the `Observed runtime` block, making actual `claude_code` vs `codex_cli` selection visible per run without re-reading raw record metadata. -- Validation: focused pytest slices passed (`68 passed`) and targeted Ruff checks passed. Repo-wide `python -m pytest` and `python -m pytest -m integration` are still blocked by the pre-existing duplicate-module import mismatch between `tests/test_execution_health.py` and `tests/observer/test_collectors_hardening/test_execution_health.py`. - -## 2026-05-25 — Make watchdog controller backend cooldowns symmetric - -- Reworked `tools/loop/controller.py` so Claude and Codex both feed - backend-specific cooldown windows parsed from backend limit errors. -- Claude remains primary whenever runnable, but the controller now falls - through to Codex during Claude cooldowns, applies the same reset-driven - cooldown logic to Codex, and sleeps until the earliest parsed reset when both - backends are unavailable. -- Focused watchdog controller tests passed in the repo venv. - -## 2026-05-25 — Add watchdog controller backend cooldown observability - -- Added a controller runtime-state file so `--status` now shows the preferred - backend, current runnable backend, and per-backend cooldown deadlines. -- Added explicit log events when a backend cooldown expires and that backend - becomes runnable again. -- Focused watchdog controller tests passed in the repo venv. - -## 2026-05-25 — Add executor worker-backend round robin - -- Added shared worker-backend selection + cooldown parsing for - `team_executor`, `dag_executor`, and `critique_executor`. -- `worker_backend` is now the preferred backend, not a hard pin: - `claude_code` stays primary by default, `codex_cli` is used when Claude is - cooling down, and the adapters immediately retry once on the alternate - backend after a limit-triggered cooldown event. -- Persisted worker-backend cooldown windows in `UsageStore` so fallback state - survives across watcher cycles. -- Updated example config, setup rendering, and operator runtime docs to expose - `dynamic_worker_backend_selection`. - -## 2026-05-25 — Backend tier selection unified across TE / DAG / Critique - -- Added shared backend tiering helper for runtime-binding tier inference plus one-step downgrade at budget pressure `>= 0.75`. -- `team_executor` now selects `budget` / `default` / `premium` dynamically from runtime binding, then downgrades one tier under pressure. -- `dag_executor` now injects tier defaults into fallback and workflow agent nodes: - - budget = haiku / gpt-5.4-mini @ low - - default = sonnet / gpt-5.4 @ medium - - premium = opus / gpt-5.4 @ high -- `critique_executor` now builds tiered proposer/critic config from the same mapping. -- Restored checked-in runtime binding policy to intentional tiering and added `config_ref` hints so Codex `default` vs `premium` are distinguishable even when both use `gpt-5.4`. -- Updated setup rendering, example config, and operator docs to expose the new knobs. Focused OC backend/policy/setup tests passed. - -## 2026-05-25 — Clarify CritiqueExecutor proposer wording at the OC boundary - -- Added an adapter docstring note that CritiqueExecutor's historical - `proposer_*` config fields refer to its internal draft agent. -- This avoids collision with OC's separate board-facing proposer subsystem - without changing any runtime field names or contracts. - -## 2026-05-25 20:02:00Z — Pin watchdog controller backends and add Codex fallback -Updated `tools/loop/controller.py` so watchdog sessions stay pinned to -`claude-sonnet-4-6` with `medium` effort and fall back to `codex exec` using -`gpt-5.4` with `medium` reasoning effort when Claude is rate-limited or unavailable. -Added focused controller tests and updated `LOOP_START.md` plus the watchdog runbook -to match the new controller behavior. - -## 2026-05-25 20:10:00Z — Exclude direct loop-controller tool test from T8 -Custodian T8 flagged `tests/test_loop_controller.py` because it exercises the -top-level `tools/loop/controller.py` entrypoint directly rather than importing a -`src/operations_center/**` package. Added a narrow T8 exclusion for that test in -`.custodian/config.yaml`; targeted controller tests still pass. - -## 2026-05-22 — ADR 0007 follow-up D: generic maintenance-task registration - -Branch: `feat/adr-0007-followups`. Landed the registration mechanism ADR 0007 flagged as out-of-scope ("Generic watcher-registration mechanism for the watchdog — would be useful but is its own work"). Migrated `spec_hygiene` as the proof of concept. - -**New module:** `src/operations_center/maintenance/` — `MaintenanceTask` (Protocol, `@runtime_checkable`), `MaintenanceContext` (cycle_id + now + resources dict), `MaintenanceResult` (`name`/`status`/`duration_seconds`/`details`/`error`), `MaintenanceRegistry`. Registry honors per-task `interval_seconds` (advisory), isolates failures (one failing task doesn't block others — it logs status='failed' with the error string and the cycle continues), and persists last-run timestamps to `.console/maintenance_state.json` so intervals survive restarts. State file is gitignored under the existing `.console/*` rule. - -**Watchdog wiring:** OC's actual in-process maintenance host is the `spec_hygiene` watcher (the loop controller in `tools/loop/controller.py` is a Claude-session subprocess spawner, not a Python in-process cycle). `spec_hygiene/main.py:main()` now constructs a `MaintenanceRegistry`, registers `SpecHygieneTask`, and each cycle calls `registry.run_due(ctx)` instead of bare `run_once()` — slotting in at the same `while True:` loop. Adds `--maintenance-state` flag for overriding the sidecar path. - -**SpecHygieneTask:** wraps the existing `run_once(settings, client)` cycle (still works standalone). `run_once` now returns a summary dict (`campaigns_projected`, `phases_advanced`, `campaigns_completed`, `phase_advance_tasks_emitted`, `campaigns_abandoned`, `status_hint`); the task adapts it into a `MaintenanceResult`. Backward-compat preserved: `operations-center-spec-hygiene` CLI still launches with the same args (`--config`, `--once`, `--status-dir`) and the standalone watcher loop continues to drive the cycle. - -**Tests:** `tests/maintenance/test_registry.py` + `tests/maintenance/test_spec_hygiene_task.py` — 11 passing. Covers register + list, duplicate-name rejection, interval gating, disabled tasks, failure-isolation across multiple tasks, state-file persistence across registry instances, Protocol structural conformance, happy-path SpecHygieneTask result, disabled/skipped path, exception → failed path. - -**Docs:** `docs/architecture/maintenance_pattern.md` (new); ADR 0007 _Considered alternatives_ note updated to point at the landed pattern. - -**Constraints honored:** no commit, no push; A/B/C untouched; only `spec_hygiene` migrated (custodian/ghost/flow audits left for later); standalone CLI behavior preserved. - - - -## 2026-05-22 — ADR 0007 follow-up C: prompt-diff primitive + surgical phase-advance - -Branch: `feat/adr-0007-followups`. Replaced the naive full-regen phase-advance prompt with a structured-edit (prompt-diff) primitive copied in from `temm1e-labs/promptlabs` (MIT — per upstream README; no LICENSE file in the repo, README declares "MIT."). - -**New module:** `src/operations_center/prompt_diff/` — `Edit` (Pydantic v2), `EditOp` (Literal), `EditApplicationError`, `apply_one`, `apply_edits`, `ApplyResult` (dataclass). Schema + application logic carried over from promptlabs' `api/app/agents/optimizer.py`; the closed-loop optimizer agent (LLM-calling, budgets, variable validation) deliberately not. Header attributes derivation per MIT custom. - -**Prompt rewrite:** `_build_phase_advance_goal_text` in `entrypoints/board_worker/main.py` now instructs the agent to (1) read the existing spec, (2) emit a YAML `list[Edit]` between `` / `` markers, (3) apply the edits and write the result back. Schema documented inline; worked example included; uniqueness + minimality rules made hard. Front-matter, provenance comment, prior-phase decisions preserved by construction (anchors leave them alone). - -**Post-process verification:** new `_summarize_prompt_diff_block` helper parses the committed fence as `list[Edit]` and logs the edit count. Soft signal only — parse failure logs at INFO and does NOT block task transition. The hard contract is "spec committed"; edit-block hygiene is feedback for prompt tuning. - -**ADR 0007 follow-up section** updated from future tense ("when ... lands") to past tense ("as of follow-up C ... DONE"), pointing at the new module. - -**Tests:** `tests/prompt_diff/test_apply.py` — 13 passing. Covers all 5 ops (replace / insert_before / insert_after / delete / append), ambiguous-anchor rejection, missing-anchor rejection, required-field validation, `apply_edits` mixed-validity skip semantics, sequential edits running against partial state. Adjacent suites (`tests/spec_author/` + grep `board_worker|phase_advance|spec_author`) all green: 38 + 13 passing. - -**Constraints honored:** no commit, no push; follow-ups A/B/D untouched; full Optimizer agent (LLM closed loop) not pulled in; license attribution in module header. - - - -Branch: `feat/adr-0007-followups`. Replaced the fragile post-success `__RUN_ID__` token rewrite with an at-prompt-time substitution. - -**Convention switch:** `__RUN_ID__` sentinel → `{{RUN_ID}}` placeholder. The spec-author prompts (`_build_spec_author_goal_text` + `_build_phase_advance_goal_text` in `board_worker/main.py`) now emit `{{RUN_ID}}` in the provenance-comment instructions. - -**Substitution site:** `ExecutionRequestBuilder.build()` in `src/operations_center/execution/handoff.py`. We allocate `run_id` explicitly (via `_new_id()` from `contracts.execution`), do a literal `(proposal.goal_text or "").replace("{{RUN_ID}}", run_id)`, then construct the frozen `ExecutionRequest` with both the substituted `goal_text` and the matching `run_id`. Unconditional and task-kind-agnostic — `replace` is a no-op when the placeholder isn't present. `ExecutionRequest` is a frozen Pydantic model, so we can't mutate in place; explicit allocation + single construction is cleaner than `model_copy(update=)`. - -**Post-success rewrite removed:** `_handle_spec_author_success` no longer reads + replaces + rewrites the spec file. The agent now sees the real run_id at prompt time and writes `` directly. Comment left in place noting the contract. - -**Grep confirmation:** `grep -rn "__RUN_ID__" src/ tests/` → 0 hits. - -**Test impact:** No tests referenced the old sentinel. Pre-existing failures (`test_phase_orchestrator.test_advances_to_test_when_all_implement_done`, three `test_cxrp_mapper` BackendName failures) reproduce on `git stash` — unrelated to this change. - -## 2026-05-22 — ADR 0007 follow-up A: config key rename - -Renamed `SpecDirectorSettings` → `SpecAuthorSettings` and the parent field `settings.spec_director` → `settings.spec_author`. Updated the 5 attribute reads across `spec_hygiene` and `spec_trigger`. Renamed test from `test_spec_director_settings_defaults` → `test_spec_author_settings_defaults`. No YAML config files referenced the key, so no migration needed. - -## 2026-05-22 — ADR 0007 Phase F: retire spec_director entrypoint + rename package to spec_author - -Branch: `feat/spec-director-refactor`. Phase F closes ADR 0007 — the legacy `spec_director` watcher is fully retired; its three former responsibilities now live in three focused surfaces: - -- `operations-center-spec-trigger` (Phase B) — drop-file / queue-drain detection, emits one `spec-author` Plane task per cycle (no LLM). -- `operations-center-spec-hygiene` (Phase A) — board hygiene, `active.json` projection, stall detection, phase-advance task emission (no LLM). -- `spec-author` task-kind handler in `board_worker` (Phases C/D) — brainstorm, campaign creation, phase-advance rewrite via the backend executor (the only LLM path). - -**Deleted:** -- `src/operations_center/entrypoints/spec_director/` (entire package — `__init__.py` and `main.py`). -- `tests/test_spec_director_main.py` — exercised only the now-deleted entrypoint. - -**Renamed:** -- `src/operations_center/spec_director/` → `src/operations_center/spec_author/` (shared library). Name now matches the task-kind it fronts; the package is shared infrastructure, no longer a watcher. -- `tests/spec_director/` → `tests/spec_author/`. -- All `from operations_center.spec_director...` imports in `src/` and `tests/` rewritten to `from operations_center.spec_author...` (mechanical sed). - -**Modified:** -- `src/operations_center/spec_author/__init__.py`: added docstring explaining the rename and ADR 0007 Phase F lineage. -- `scripts/operations-center.sh`: the `spec` watch role no longer launches `operations_center.entrypoints.spec_director.main`. It now supervises two sibling Python children — `spec_hygiene.main` and `spec_trigger.main` — under a single restart-loop wrapper (`wait -n` on either child triggers a paired restart). PID-file semantics preserved. -- `docs/operator/runtime.md` `watch --role spec` section rewritten to document the two siblings + the `spec-author` task-kind handler, with the ADR 0007 Phase F provenance note. -- `docs/design/roadmap.md` "Autonomous Spec-Driven Campaign Chain" section: added a Phase F update note at the top of the section so the historical "what was built" content stays auditable while pointing readers at the current topology. -- `.env.operations-center.example`: SwitchBoard comment retargeted from `spec_director LLM calls` to `spec-author LLM calls`. -- `.custodian/config.yaml`: every `src/operations_center/spec_director/...` writer glob / required-file path rewritten to `src/operations_center/spec_author/...`; touchpoint comment updated. -- `tests/test_architecture_cleanup_guards.py` `test_adr_0007_no_claude_cli_module_in_source_tree`: now asserts both legacy and current paths (`spec_director/_claude_cli.py` and `spec_author/_claude_cli.py`) are absent, so the Phase-E guard survives the rename. -- `src/operations_center/spec_author/phase_orchestrator.py` and `src/operations_center/entrypoints/board_worker/main.py`: Phase-F TODO breadcrumbs updated to reflect that retirement has happened; fields/args kept for back-compat but comments now point to the completed retirement. -- Stale `# src/operations_center/spec_director/` path comments at the top of every renamed module rewritten to the new path. - -**Intentionally NOT renamed:** `settings.spec_director` config block — leaves the operator-facing YAML key stable; renaming it is a separate orchestrated change. - -**Phase E guard suite:** `pytest tests/test_architecture_cleanup_guards.py -v` → 7 passed. - -**Post-Phase-F grep `spec_director` over `src/`:** 17 hits, all in comments / docstrings / the `settings.spec_director` back-compat config attribute / the `__init__.py` historical-context note. Zero executable references. AST guard (`test_adr_0007_no_claude_cli_imports_in_source_tree`) green. - -**OperatorConsole:** `grep -rn "spec_director" OperatorConsole/` returned zero hits. No per-watcher label to update; OperatorConsole reads `state/campaigns/active.json` directly and is agnostic to which OC process writes it. No OperatorConsole branch needed. - -**Stop point:** changes staged but not committed; parent handles git ops. OC remains broken from earlier phases (the Phase F task explicitly scoped runtime tests out). - -## 2026-05-22 — ADR 0007 Phase E: delete `_claude_cli` + every importer, add CI guard - -Branch: `feat/spec-director-refactor`. Phase E of ADR 0007 — physically removes the direct-Claude bypass module and its remaining importers, and adds an architectural guard so it cannot be reintroduced. After Phases B/C/D the only live LLM-needing path is `spec-author` Plane tasks executed by `board_worker` via the normal backend pipeline; this phase scrubs the dead surface. - -**Deleted:** -- `src/operations_center/spec_director/_claude_cli.py` — the subprocess-Claude wrapper. -- `src/operations_center/spec_director/brainstorm.py` — `BrainstormService`; sole caller was the legacy `spec_director` entrypoint (Phase F target). -- `src/operations_center/spec_director/compliance.py` — `SpecComplianceService`; zero non-test callers found in `src/`. Decided dead per the same ADR-0007 rule that brought down brainstorm; if a compliance reviewer comes back later it gets re-built as a Plane task-kind, same pipeline as everything else. -- `tests/spec_director/test_brainstorm.py`, `tests/spec_director/test_compliance.py`, `tests/spec_director/test_claude_cli_cutover.py` — all three exclusively exercised the deleted modules. - -**Modified:** -- `src/operations_center/entrypoints/spec_director/main.py`: removed the `BrainstormService` import and replaced it with a Phase-F-TODO breadcrumb. The legacy `_handle_legacy_trigger` Step 5b call still references `BrainstormService` symbolically — left in place with a `# noqa: F821` and a Phase-F TODO; this code path is on the retirement chopping block and only fires when the legacy entrypoint is invoked, which the spec_trigger + board_worker spec-author handler now superseded. -- `tests/test_phase_orchestrator.py`: deleted `test_blocked_task_rewritten_and_requeued` and `test_blocked_task_cancelled_after_two_rewrites` (both exercised the Phase-D-removed LLM rewrite path). Replaced `test_does_not_advance_if_implement_blocked` with the non-LLM equivalent (still checks the invariant: blocked current phase prevents next-phase promotion). Dropped the now-unused `patch` import. - -**CI guard:** added two tests to `tests/test_architecture_cleanup_guards.py` alongside the sibling ADR cleanup guards: -- `test_adr_0007_no_claude_cli_module_in_source_tree` — asserts the file does not exist. -- `test_adr_0007_no_claude_cli_imports_in_source_tree` — AST-walks every `.py` under `src/operations_center/` and flags any `ImportFrom`/`Import` referencing `_claude_cli` or any `Call` to a function named `call_claude`. Comments / docstrings / string literals are deliberately permitted (so the historical-context breadcrumbs in `phase_orchestrator.py` and `board_worker/main.py` survive). -- Both failure messages carry the ADR-0007 sentence verbatim: "ADR 0007 forbids direct Claude CLI calls. Route LLM work through a spec-author Plane task. See PlatformDeployment/docs/architecture/adr/0007-spec-director-refactor.md." - -**Post-Phase-E grep `_claude_cli|call_claude(` over `src/`** returns only: -- Historical-context docstrings in `board_worker/main.py:677`, `board_worker/main.py:1848`, `phase_orchestrator.py:13`, `phase_orchestrator.py:22`. -- A Phase-F TODO comment in `spec_director/main.py:19`. -- (Guard test lives under `tests/`, not `src/`.) - -**Not touched (per ADR phase scoping):** legacy `spec_director` entrypoint and its remaining `BrainstormService`/`PhaseOrchestrator` call sites (Phase F). - -**Stop point:** files staged but not committed. Parent handles git ops. The pre-existing failing test (`test_advances_to_test_when_all_implement_done`) is unrelated to Phase E and was already broken from Phase D's wording change. - -## 2026-05-22 — ADR 0007 Phase D: phase_orchestrator detection-only + spec-author phase-advance prompt - -Branch: `feat/spec-director-refactor`. Phase D of ADR 0007 — strips the Claude rewrite path from `phase_orchestrator`; phase-advance LLM work now flows through `board_worker` via a `spec-author` task with `task_phase` set, executed by the normal backend pipeline. - -- `src/operations_center/spec_director/phase_orchestrator.py`: rewritten as detection-only. New dataclass `PendingPhaseAdvance` (campaign_id, spec_slug, spec_file_path, current_phase, next_phase, task_summaries). New public method `detect_pending_advances(issues) -> list[PendingPhaseAdvance]`; `run()` kept and now also populates `result.pending_advances`. Synchronous LLM-free behaviour kept: backlog test/improve → "Ready for AI" promotion, campaign close-out (parent Done + `lifecycle: archived`). Removed `_handle_blocked` LLM rewrite path entirely along with the helper functions (`_parse_rewrite_count`, `_set_rewrite_count`, `_read_spec_text`, `_has_lifecycle_label`). `tasks_unblocked` / `tasks_cancelled` kept on `PhaseOrchestrationResult` as zero back-compat fields so the legacy `spec_director` entrypoint (Phase F target) still imports clean. **No `_claude_cli` import, no `call_claude(` call** — grep confirms zero references. -- `src/operations_center/spec_director/spec_author_task.py` (NEW): shared module hoisted from `spec_trigger`. Hosts `SpecAuthorPayload` dataclass (now with `task_phase` field), `render_task_body`, `create_spec_author_task`, `find_in_flight_phase_advance`, and the label constants (`LABEL_SOURCE`, `LABEL_TASK_KIND`). Single canonical body shape used by both `spec_trigger` (initial authoring) and `spec_hygiene` (phase advance). The `task_phase` field, when set, emits an extra YAML line and an extra `task-phase: ` label for the phase-advance dedupe key. -- `src/operations_center/entrypoints/spec_trigger/main.py`: deleted local `_Payload`, `_render_task_body`, `_create_spec_author_task`; imports from `spec_director.spec_author_task` instead. Behaviour identical for the drop-file / queue-drain path (task_phase stays unset). -- `src/operations_center/entrypoints/spec_hygiene/main.py`: after `PhaseOrchestrator.run()`, calls new `_emit_phase_advance_tasks` which iterates `orch_result.pending_advances`, dedupes against the board via `find_in_flight_phase_advance(slug, next_phase)`, and creates one spec-author Plane task per advance with `task_phase=advance.next_phase`. `_build_phase_advance_seed` composes the seed_text from `PendingPhaseAdvance.task_summaries` so the rewrite prompt sees the per-task status snapshot without re-reading the board. `spec_phase_orchestration` log event now carries `phase_advance_tasks_emitted`. -- `src/operations_center/entrypoints/board_worker/main.py`: filled in `_build_spec_author_goal_text`'s `task_phase` branch. New helper `_build_phase_advance_goal_text` emits the rewrite prompt: read existing spec at `target_path`, preserve front-matter + provenance comment, rewrite `## Goals` and `## Success Criteria` for the new phase, write back, touch no other file. Comment in the helper notes the prompt-diff swap is the only thing that changes when the promptlabs primitive lands. `_handle_spec_author_success`'s phase-advance branch (Phase C) keys on `task_phase` truthiness — verified correct, no change needed. - -**Audit trail:** phase-advance spec-author tasks carry the same labels as initial-authoring tasks plus `task-phase: `; their planning + execute go through `worker.main` → `execute.main` → `ExecutionCoordinator` like any other run. `_handle_spec_author_success` Phase-advance branch skips `CampaignBuilder` (the campaign already exists) and transitions Done with a comment recording the run_id. - -**Removed surface:** the LLM-driven blocked-task description rewrite is gone. If we want auto-recovery for blocked tasks later, it gets re-built as a Plane task (probably a new task-kind) — same pipeline as everything else. - -**Not touched (per ADR phase scoping):** `_claude_cli.py` (Phase E — still imported by `BrainstormService`), legacy `spec_director` entrypoint (Phase F). - -**Stop point:** staged but not committed. Parent handles git ops. - -## 2026-05-22 — ADR 0007 Phase C: board_worker spec-author handler - -Branch: `feat/spec-director-refactor`. Phase C of ADR 0007 — teaches board_worker how to claim and process the `task-kind: spec-author` tasks that spec_trigger (Phase B) emits, with all LLM work flowing through the normal `worker.main` → `execute.main` → `ExecutionCoordinator` pipeline. No `_claude_cli` import anywhere. - -- `src/operations_center/entrypoints/board_worker/main.py`: - - `_ROLE_KINDS` gains `"spec-author": ["spec-author"]`. Distinct role, not folded into goal/test/improve — it has its own prompt assembly and its own success handler. - - `_claim_next`: spec-author tasks bypass the thin-goal-text guard (their intent is YAML, not `## Goal`) and synthesise repo_key=`OperationsCenter` since spec_trigger leaves the `repo:` label off per ADR's payload spec. - - `_process_issue`: short-circuits early for spec-author, parses the YAML payload via `_parse_spec_author_payload`, composes the spec-authoring prompt via `_build_spec_author_goal_text` (mirrors `spec_director.brainstorm._SYSTEM_PROMPT` + `_build_user_prompt` but emitted as goal_text the backend can run directly), then dispatches to `_process_spec_author`. - - `_process_spec_author`: plan → execute subprocess pair, same shape as the existing flow. Constraints: `--max-changed-files 1`, `--allowed-path docs/specs/`, 8-min timeout. `--source` tag carries `spec_slug` and `trigger_source` into `run_metadata.json` via the existing `extra_metadata` path on `RunArtifactWriter.write_run`. - - `_handle_spec_author_success`: reads the committed spec from the workspace, post-substitutes the `__RUN_ID__` sentinel with the real run_id (so the spec carries ``), invokes the existing `CampaignBuilder` to spawn sub-tasks, then tags each new task with `parent_run: `. Phase-advance branch (`task_phase` set) skips campaign creation and transitions Done — the campaign already exists from the original authoring run. -- `src/operations_center/entrypoints/worker/main.py`: added `--max-changed-files` flag so the spec-author planning invocation can cap scope. PlanningContext already supported the field; this is purely a CLI surface extension. -- `src/operations_center/entrypoints/execute/main.py`: unchanged — the proposal/decision bundle flows through ExecutionCoordinator with no spec-author-specific branching needed. `--source` already supports an arbitrary tag string; we pack `spec_slug` and `trigger_source` into it. - -**Audit-trail wiring (ADR 0007 invariants):** -- `runs//run_metadata.json` carries `source: board_worker_spec_author|spec_slug=...|trigger=...` via the existing `extra_metadata` path. -- Spec file carries `` on line 1 (sentinel substituted post-success). -- Each child campaign task carries `parent_run: ` label (added after `CampaignBuilder.build` returns). - -**Deviation:** the prompt asks the model to write the literal `__RUN_ID__` sentinel and we substitute post-success because the planning subprocess can't know the eventual run_id (the backend allocates it). Post-process is best-effort — if the model deviates from the sentinel string, the comment line still gets written, just without provenance linkage; greppable. - -**Not touched (per ADR phase scoping):** `_claude_cli.py` (Phase E), `phase_orchestrator.py` LLM path (Phase D), `spec_director` entrypoint (Phase F). - -**Stop point:** staged but not committed. Parent handles git ops. - -## 2026-05-22 — ADR 0007 Phase B: extract spec_trigger watcher - -Branch: `feat/spec-director-refactor`. Phase B of ADR 0007 — splits the trigger-detection half of `spec_director` into its own LLM-free watcher that emits Plane tasks instead of calling Claude. - -- `src/operations_center/entrypoints/spec_trigger/__init__.py` (NEW, license header). -- `src/operations_center/entrypoints/spec_trigger/main.py` (NEW, ~330 LOC): - - `run_once()` fetches Plane issues once per cycle, dedupes against any non-Done issue carrying both `source: spec-director` + `task-kind: spec-author`, then runs `TriggerDetector.detect(ready, running, has_active)` re-using the existing detector (drop-file > queue-drain priority preserved). - - `has_active_campaign` is read from the spec_hygiene-owned projection at `state/campaigns/active.json` — single-writer invariant respected; we only read. - - On fire: builds the ADR 0007 payload (spec_slug derived from drop-file first line slug or `queue-drain-`, target_path `docs/specs/.md`, recent git log per managed repo, existing-spec index, board snapshot), creates one Plane task in state `Ready for AI` with labels `task-kind: spec-author`, `source: spec-director`, `trigger: `, `spec-slug: `. The payload lands in the description as a single fenced YAML block under a `## Spec Authoring` heading. - - Drop-file is archived via the existing `TriggerDetector.archive_drop_file()` only after task creation succeeds. - - Zero LLM imports — no `BrainstormService`, no `_claude_cli`, no subprocess to claude. Grep `_claude_cli|call_claude|subprocess.*claude` in the new module returns empty. -- `pyproject.toml`: registered `operations-center-spec-trigger` script. -- `src/operations_center/entrypoints/spec_director/main.py`: marked legacy trigger block with `# TODO(ADR 0007 Phase F): superseded by spec_trigger entrypoint + board_worker spec-author handler, delete with retirement.` Brainstorm + CampaignBuilder code paths left intact for now (retired in Phase F per ADR). - -**Stop point:** files staged, not committed. Parent handles git ops. Phase A (`spec_hygiene`) still has only an empty `__init__.py`; Phase B does not depend on it at runtime (the projection file is optional — absent means "no active campaign"), but full end-to-end won't work until A also lands so the projection is being rebuilt. - - - -Branch: `feat/spec-director-refactor`. Phase A of `docs/architecture/adr/0007-spec-director-refactor.md`. - -New entrypoint `operations_center.entrypoints.spec_hygiene` hosts the non-LLM hygiene operations previously embedded in `spec_director.run_once()`: -- Spec archival (`SpecWriter.archive_expired`) -- Orphan-campaign bootstrap -- Auto-promote Backlog → Ready for AI -- Phase orchestration **detection** (the existing `PhaseOrchestrator.run` is invoked unchanged — LLM rewrite still happens via `phase_orchestrator`; full LLM eviction lands in Phase D) -- Campaign recovery (abandonment scan) - -Also adds an `active.json` projection rebuild at the top of every cycle. spec_hygiene is now the single writer of `state/campaigns/active.json` per ADR 0007. Projection is derived from Plane issues labeled `source: spec-campaign`, grouped by `campaign-id: `, with status (active/complete/cancelled) computed from child issue states. - -Files: -- `src/operations_center/entrypoints/spec_hygiene/__init__.py` (new) -- `src/operations_center/entrypoints/spec_hygiene/main.py` (new, ~340 LOC) -- `pyproject.toml`: registered `operations-center-spec-hygiene` script. -- `src/operations_center/entrypoints/spec_director/main.py`: hygiene call sites marked with `TODO(ADR 0007 Phase F): superseded by spec_hygiene entrypoint, delete with retirement.` Code paths left in place — both entrypoints can coexist until Phase F retires spec_director. - -Out of scope: board_worker, phase_orchestrator LLM call path, `_claude_cli.py`, other phases. Not touched. - -**Stop point:** staged, not committed. Parent handles git ops. - -## 2026-05-22 — Pin context-lifecycle to git tag v0.3.0 (was file:// local pin) - -Follow-up to ADR 0002 P4 release. Switched `context-lifecycle` dependency from a local file:// pin to `git+https://github.com/ProtocolWarden/ContextLifecycle.git@v0.3.0`. Matches the pattern OC already uses for `core-runner` and `platform-manifest`. Local editable installs still override the pin for active development. - - -## 2026-05-22 — P6: annotate continuous-improvement design with anchor-host paths - -Branch: `feat/p6-cleanup`. Phase 6 of work order `PlatformDeployment/docs/architecture/adr/0002-work-order-manifest-cognition.md`. - -Audited `docs/architecture` and `docs/design` for stale `.context/` references. Only `docs/design/continuous-improvement/design.md` (a DRAFT) carried bare `.context/{active,checkpoints,capsules,handoffs}/...` paths from before the manifest-host migration. Rather than rewriting every example in a draft that's still in flight, added a single "Updated post-ADR 0002 P3" callout at the top instructing readers to mentally prefix every `.context/...` path with `/.context/sessions//`. The relative shapes (per-attempt subdirs, lineage.json layout, etc.) are still correct — only the host changed. - -ADRs 0001-0006 already clean; none referenced per-repo `.context/`. - -## 2026-05-22 — P4: dispatcher CL hydrate/capture wrap - -Branch: `feat/p4-dispatcher-cl-wrap`. Companion to CL `feat/p4-public-api` -(CL 0.3.0 ships the `hydrate` / `capture` / `peek` public API). - -- `pyproject.toml`: added `context-lifecycle @ file:///home/dev/Documents/GitHub/ContextLifecycle` to deps. Local-path pin for dev parity with how CL pins RepoGraph; flip to a tagged release once CL 0.3.0 is published. -- `src/operations_center/execution/cl_wrap.py` (NEW, ~180 LOC): `cl_dispatch_wrap(work_item)` context manager. Derives a lineage id from the request (preferring `lineage_id` → `run_id` → `proposal_id`, falling back to `l-unknown`), calls `cl.hydrate()` on enter, runs the inner block, and calls `cl.capture()` on exit. Exceptions inside the block re-raise but capture still fires with an `error` payload so failed lineages leave a trace. The wrap is a strict no-op when `CL_ANCHOR` is unset OR `context_lifecycle` is not importable — preserves pre-P4 behavior for any test/session that doesn't anchor. Capture-write failures are logged-and-swallowed so a buggy CL never breaks dispatch. -- `src/operations_center/execution/coordinator.py`: one new import + the wrap is placed around `_run_with_recovery_loop` inside `execute()` (lines ~236-245). The recovery loop, observability, usage_store, run_memory, workspace finalize, and lifecycle plan/verify all remain outside the wrap — only the actual adapter-driving recovery loop is lineage-scoped. Did NOT modify the adapter signature; the wrap reads work_item, calls hydrate/capture around the call, and never touches what the adapter receives. -- `tests/unit/execution/test_coordinator_cl_wrap.py` (NEW, 11 tests): cl_wrap unit tests (noop gate, hydrate-then-capture ordering, lineage derivation precedence, error-path capture, no-result capture, capture-failure swallowed) + one end-to-end coordinator integration test. Unit tests: 10 pass. The integration test SKIPs in this env because the coordinator's transitive backend imports require `core_runner` (broken/missing editable install — pre-existing, not from P4). -- `CLAUDE.md`: added a "Dispatcher wrap (ADR 0002 P4)" note pointing at `cl_wrap.py` and explaining the no-op gate. - -Verified no regressions outside the pre-existing `core_runner` ImportError zone: tests/unit/policy + tests/unit/contracts + tests/unit/observability → 463 pass, 3 unrelated cxrp_mapper failures that fail on `main` too. - -**Deviations / blockers:** -- The OC dispatch chain transitively imports `core_runner` (via `operations_center.backends.factory` → `aider_local.adapter`), which is broken in this `.venv`. Pre-existing — flagged for follow-up but out of P4 scope. Once fixed, the integration test in `test_coordinator_cl_wrap.py::test_coordinator_dispatch_drives_hydrate_and_capture` will run end-to-end through ExecutionCoordinator. -- The work order mentioned wrapping TeamExecutor / DAGExecutor / CritiqueExecutor individually. The actual dispatcher in OC is the single `ExecutionCoordinator.execute()` boundary — all three executors are reached via the backend registry from there. Wrapping coordinator gives the same lineage-scoped pre/around/post per dispatch with one site instead of three. - -**Stop point:** staged, not committed. Parent handles git ops. - -## 2026-05-22 — P3: remove local `.context/`; cognition now hosted by anchor manifest - -Branch: `feat/p3-remove-local-context`. - -Phase 3 of work order `PlatformDeployment/docs/architecture/adr/0002-work-order-manifest-cognition.md`. OC no longer hosts its own cognition state; durable CL artifacts live under the active anchor manifest's `.context/sessions//`. Sessions targeting OC must run `eval $(cl session start )` first. - -Removed: -- `.context/` (entire tree — templates, config.yaml, README.md, loop_schedule.json, all `.gitkeep`s for active/checkpoints/handoffs/capsules/leases/archive). - -Migrated runtime/operational state (NOT cognition) to OC-local surfaces: -- `.context/loop_schedule.json` → `.console/loop_schedule.json`. OC-local runtime state written by the watchdog session at STEP 10 and read by `tools/loop/controller.py` for adaptive delay. Updated `controller.py` (SCHEDULE_FILE path + module docstring), `tools/loop/oc_session_prompt.txt`, `.console/watchdog_loop_prompt.md`, `docs/operator/watchdog_loop.md`, `LOOP_START.md` to point to the new path. -- `.context/config.yaml` worker/loop/watchers sections → `.console/workers.yaml`. CL guard flags from that file are NOT migrated (they now live in the anchor manifest's `.context/config.yaml` — they're manifest-wide, not OC-specific). -- Templates were already promoted to PlatformManifest in the companion `feat/p3-context-host` branch. - -Code updates for the rehome: -- `src/operations_center/execution/ci_evaluator.py` — `_POLICY_FILE_PATTERNS` swapped `.context/config.yaml` → `.console/workers.yaml` (this list flags policy-widening diffs; new path is the equivalent under the OC-local convention). -- `src/operations_center/execution/ci_store.py` — module + helper docstrings updated to describe artifacts as anchor-manifest-hosted. -- `src/operations_center/contracts/ci.py` — `ClpBinding`, `LineageAttempt`, `ImprovementLineage` docstrings and Field descriptions updated; paths are now anchor-relative (e.g. `active//lineage.json`) rather than `.context/capsules/...`. -- `tests/unit/contracts/test_ci_contracts.py` — fixture strings updated to match (pure cosmetic; field accepts any string). -- `CLAUDE.md` — Cognition Lifecycle section rewritten to point at the anchor manifest pattern; surfaces table now includes `.console/workers.yaml` and `.console/loop_schedule.json`; lifecycle diagram uses `/.context/sessions//...` paths. - -Untouched (intentional): -- `.claude/hooks/pre_tool_use.sh` and `stop.sh` — bash hook implementations. With `.context/config.yaml` gone they fall back to defaults; `require_capsule=false` keeps them passing as no-ops. ADR 0002 P5 replaces them with `cl hook` shims; not in P3 scope. -- `.console/log.md` historical entries — left as-is (history references old paths intentionally). -- `.console/.context` compiled context — auto-generated; regenerated at next session launch. - -Preflight notes: -- Verified no systemd unit, cron job, or external scheduler references `loop_schedule.json` or `.context/`. OC controller (`tools/loop/controller.py`) is currently NOT running on this machine (only VideoFoundry's controller is active). -- No live cognition data found in OC's `.context/` prior to removal — only empty `.gitkeep`s under active/, checkpoints/, handoffs/, capsules/, leases/, archive/ plus the loop schedule and config. Operator-approved removal scope matched reality. - -Not committed yet — staged for parent review. - -## 2026-05-21 — Add --dangerously-skip-permissions to controller session spawn - -claude -p without this flag blocks tool calls that need interactive approval. -The controller is a deliberate operator action; ContextGuard hooks still run. -This gives the spawned session the same tool access as an interactive session. - -## 2026-05-21 — Full session prompt with explicit authorization (loop controller) - -Replaced thin SESSION_PROMPT pointer with tools/loop/oc_session_prompt.txt — -full STEP 0-10 watchdog content plus an explicit AUTHORIZATION block granting -bash/.venv CLI/autonomy-cycle/watcher-restart/Plane/commit permissions. -OPERATOR_BLOCKED narrowed to credentials/hardware/policy only. All code bugs, -queue deadlocks, watcher crashes, and infra config errors are session's responsibility. -Controller reads prompt from file at launch; updates take effect on next iteration. - -## 2026-05-21 — Mark tools/loop/controller.py executable - -Mode change 100644 → 100755. Matches vf.sh controller. - -## 2026-05-21 — Add loop-log to operations-center.sh - -Added loop-log subcommand (tail -f loop_controller.log). Mirrors vf.sh loop-log. - -## 2026-05-21 — Add loop-start/stop/status to operations-center.sh - -Added loop_start, loop_stop, loop_status functions and case entries to -scripts/operations-center.sh. loop-start/stop/status skip the janitor. -Mirrors existing watchdog-loop-* pattern. - -## 2026-05-21 — Add loop controller (replace /loop + ScheduleWakeup) - -tools/loop/controller.py spawns a fresh claude -p session per watchdog cycle. -Context never accumulates across cycles. Session writes .context/loop_schedule.json -at STEP 10 with {delay_s, state, reason}; controller reads it for adaptive timing. -Updated watchdog_loop_prompt.md STEP 10, watchdog_loop.md, and LOOP_START.md. -Enables overnight unattended runs without session context exhaustion. - -## 2026-05-21 — Update ADR-0003 to reference CI design - -Added "Related" section to ADR-0003 documenting the relationship between -tiered cognition and the continuous improvement schema: trace data compatibility -(LineageAttempt.replay_metadata feeds cognition_summary), refinement as a -bounded-cognition amortization strategy, and the explicit non-introduction of -a CognitionTier enum (consistent with ADR-0003 D1 / ADR-0002 G1). - -## 2026-05-21 — Wire CI coordinator into board_worker call-site - -board_worker/main.py: after planning, check bundle.proposal.continuous_improvement. -If present and execution_mode==improve_campaign, delegate to _run_ci_loop() which -drives CiCoordinator.run() with a per-attempt subprocess execute callable. Maps -RefinementStatus to _handle_success/_handle_failure/_fail_task. CI status and -attempt count added as Plane labels. Single-shot path unchanged when spec absent. -6 new tests in tests/unit/entrypoints/test_board_worker_ci_wiring.py — all pass. - -## 2026-05-21 — Fix ruff unused imports in ci_coordinator.py / ci_store.py - -Removed unused uuid, Callable imports from ci_coordinator.py; removed unused -UTC, datetime imports from ci_store.py. Custodian now clean (0 non-B2 findings). - -## 2026-05-21 — Implement continuous improvement schema (§13) - -Production contracts in src/operations_center/contracts/ci.py (all CI types -extracted from draft_schema.py). CI enums added to enums.py. OcPlanningProposal -extended with Optional[ContinuousImprovementSpec]. ci_store.py (JSON-backed -lineage index + CI state store), ci_evaluator.py (evaluation command runner + -5 guardrail implementations), ci_coordinator.py (multi-attempt refinement loop -state machine). fail_closed invariant enforced at Pydantic construction time. -38 new tests (unit/contracts/test_ci_contracts.py, unit/execution/test_ci_coordinator.py) -— all pass. 135/135 existing contract tests unaffected. - -## 2026-05-21 — Record operator decisions in CI schema design (§12) - -All 5 open questions resolved: OC owns evaluation command derivation; guardrails -are closed enum (EnforcedGuardrail) + advisory custom_checks; lineage is CLP-native -in .context/capsules/ indexed by OC via OcLineageIndexEntry, archived by Warehouse; -CI spec stays OC-internal (not in CxRP wire); no new ExecutionMode unless routing -diverges. Updated design.md §12 and draft_schema.py with EvaluationCommandSource -enum, OcLineageIndexEntry type, and closed EnforcedGuardrail enum. - -## 2026-05-21 — Fix Custodian DC7/K1/OC8 in CI design doc - -Linked design.md from docs/README.md; unquoted worker_scope field name. - -## 2026-05-21 — Continuous improvement schema extension design - -Added design doc, draft schema, and examples for extending OcPlanningProposal -with a ContinuousImprovementSpec block. Covers: strategy, evaluation, refinement -policy, CLP binding, lineage/provenance, governance boundaries, replay semantics, -failure modes. DRAFT — not yet wired into production contracts. Awaiting operator -review of open questions (Section 12) before implementation. - -## 2026-05-21 — Sync python3/jq fallback to pre_tool_use.sh - -Added python3 fallback for jq in pre_tool_use.sh. Hook now works in -environments without jq installed. - -## 2026-05-21 — Sync ContextGuard hook fixes from CLP - -Synced updated pre_tool_use.sh and stop.sh from ContextLifecycle adapter. -Fixes: allowed_paths whitelist enforcement, malformed capsule detection, subagent_heavy -warn, checkpoint_stale block, reload_scope_too_large warn, session-aware stop detection. - -## 2026-05-21 — Add closing fence to console-context block - -Added end marker so OperatorConsole only replaces its -managed block and leaves repo-owned content below it untouched. - -_Chronological continuity log. Decisions, stop points, what changed and why._ -_Not a task tracker — that's backlog.md. Keep entries concise and dated._ - -## 2026-05-21 — Remove Cognition Lifecycle section from CLAUDE.md - -OperatorConsole's context injector rewrites CLAUDE.md on session start, stripping anything -after its managed block. Moved CLP lifecycle content to .context/README.md (already there). -CLAUDE.md is now OC-managed-only to avoid dirty diffs. - -## 2026-05-21 — Custodian violation fixes (pre-existing) - -**C29:** workspace.py was 501 lines. Condensed logger.info call to bring under limit. -**DC7:** Three orphan spec docs in docs/specs/ were not linked from docs/README.md. Linked them. -Neither violation was introduced by the context-lifecycle branch — both were pre-existing on main. - -## 2026-05-21 — ContextLifecycle Phase 3 integration - -**Decision:** Added `.context/` cognition surface and ContextGuard Claude Code hooks. - -OC now has bounded, resumable cognition infrastructure: checkpoint-driven watchdog lifecycle, investigation capsule templates, worker handoff templates, and ContextGuard enforcement (lease expiry, forbidden paths, subagent budget, context_risk flags). Orchestrator lifecycle instructions added to CLAUDE.md. - -**Why:** Watchdog loops were functioning as immortal cognition sinks — runaway context growth, instruction fade-out, increasing token inefficiency. This formalizes checkpoint-driven operation so OC state lives in artifacts, not conversation history. - -**Branch:** feat/context-lifecycle - -## 2026-05-19 — ADR 0006 Phase 3: OC imports updated to CoreRunner - -- direct_local/adapter.py, aider_local/adapter.py, openclaw/invoke.py: executor_runtime → core_runner, ExecutorRuntime → CoreRunner. -- _runtime_ref.py, contracts/execution.py, observability/trace.py, entrypoints/run_show/main.py: docstring/comment references updated. -- pyproject.toml: executor-runtime dep → core-runner. -- Installed core-runner from local ExecutorRuntime/src into .venv. -- 3345 tests pass. - -## 2026-05-19 — Removed remaining live kodo/archon references from src (final sweep) - -Renamed `kodo_exit_code` → `executor_exit_code` in validation.py dataclass + builder. -Renamed `kodo_quality_warning` event kind → `executor_quality_warning` in usage_store.py. -Replaced kodo binary check with team-executor in dependency_check.py and setup/main.py. -Updated executor_plane path references in pipeline_trigger, execution_outcome, observer, -decision rules. Fixed docstrings in baseline_validation, brainstorm, triage_scan, -recover_stale, openclaw/errors, aider_local/adapter. Updated tests to match. - -## 2026-05-18 — Purged stale kodo/archon prose from src (ADR 0005 follow-up) - -Replaced all kodo/archon conceptual references in 21 src files with backend-generic -language (team_executor, dag_executor, execution backend, etc.); setup/main.py dataclass -fields and function names renamed from kodo_* to executor_*; tests updated to match. - -## 2026-05-18 — ADR 0005 docs indexed; Custodian pre-existing findings suppressed - -Added ADR 0005 (owned execution topology layer) and work order to docs/README.md. -Suppressed pre-existing DC7 orphan findings for 3 spec files and B1 VideoFoundry -finding in scene-timing spec via .custodian/config.yaml exclusions. - -## 2026-05-18 — CxRP pin bumped to v0.3.0 (ADR 0005 Phase 0) - -AgentTopology enum + executor vocab update (TEAM_EXECUTOR, DAG_EXECUTOR, CRITIQUE_EXECUTOR; -kodo/archon/archon_then_kodo removed). - -## 2026-05-18 — board_unblock: Rule 5 STALE_IN_REVIEW for orphaned In Review tasks - -Added Rule 5 to board_unblock.py: tasks in "In Review" state for >stale_blocked_hours -(default 4h) are moved to Backlog. Catches tasks whose PR was never created, was closed -without merging, or whose state was set prematurely. - -Root cause of pattern: pr_review_watcher is PR-driven (scans GitHub for open PRs, not -Plane for In Review tasks) — orphaned In Review tasks are invisible to it permanently. - -Applied immediately to 4 orphaned In Review tasks (#12, #13, #15, #16) that had been -stuck with no open/closed PRs, no comments, and no branches on GitHub. - -## 2026-05-18 — board_unblock: fix all four rule label mismatches + Rule 3 covers goal tasks - -All four board_unblock rules were non-functional because label constants used wrong format. -Plane labels follow `"key: value"` (with space) but constants used `"key:value"` or bare `"improve"`. -Fixed constants: -- `_IMPROVE_LABEL`: `"improve"` → `"task-kind: improve"` -- `_INVESTIGATE_LABEL`: `"task-kind:investigate"` → `"task-kind: investigate"` -- `_SELF_MODIFY_APPROVED_LABEL`: `"self-modify:approved"` → `"self-modify: approved"` -- `_SIGKILL_SIGNAL_PREFIX`: was prefix-match on `"executor-signal:sigkill"`, now uses `_label_value` + substring check - -Rule 3 extended to also cover `task-kind: goal` tasks (not just improve). Self-modify:approved -tasks excluded from Rule 3 (handled by Rule 4 which requeues to R4AI, not Backlog). - -Applied immediate fix: moved 5 self-modify:approved improve tasks Blocked→R4AI, cancelled -#29 (investigate, root cause identified), moved 4 [Impl] goal tasks Blocked→Backlog. - -Also: `config/operations_center.local.yaml` resource_gate raised `max_per_hour: 2→10`, -`max_per_day: 30→50` (local config, not tracked). This was the primary throughput bottleneck. - -## 2026-05-18 — Operator cycle: unblocked frozen board, installed kodo, fixed env + redirect bugs - -**Status before**: Board frozen for 497+ cycles. All watchers crash-looping. -**Root causes fixed**: -1. Env file not re-sourced on watcher restart → `KeyError: 'PLANE_API_TOKEN'` crash-loop. Fixed in commit 4bd89a2 (added `set -a; source ENV_PATH; set +a` inside while-loop). Watchers restarted to pick up fix. -2. kodo not installed → `No such file or directory: 'scripts/kodo-shim'`. Fixed: installed uv + kodo 0.4.272 via `PYO3_USE_ABI3_FORWARD_COMPATIBILITY=1 uv tool install git+https://github.com/ikamensh/kodo`. -3. GITHUB_TOKEN empty → reviewer watcher failing with "no GitHub token". Fixed `.env.operations-center.local`: `gh auth token 2>/dev/null || awk ...` fallback. -4. GitHub 301 redirects for repos moved from Velascat/ to ProtocolWarden/ → reviewer watcher silent failures. Fixed `github_pr.py`: added `kwargs.setdefault("follow_redirects", True)` to `_request()` (commit 0eb2f21). - -**Board actions**: Cancelled dead-remediation task #3. Moved 4 stale Blocked improve tasks (#27, #26, #18, #17) → Backlog. Re-queued kodo-failed improve task #42 → Backlog. - -**Audits all clean**: custodian-sweep (0), ghost-audit (0), flow-audit (0 gaps), triage-scan (0), board-unblock (0), graph-doctor (OK). 15 golden tests pass. - -**Current board**: Watchers active, improve watcher processing task b67bc0e0 ("Fix lint regression"), reviewer following redirects cleanly. System ready for autonomous operation. - -## 2026-05-17 — docs: mark recovery-subsystem-test-coverage spec cancelled - -## 2026-05-15 — board_unblock: pre-dispatch memory check - -Added memory guard to board_unblock.py: skip all rules if MemAvailable < 1.7GB (pre-OOM). -Rule 4 SELF_MODIFY_REQUEUE additionally skipped if MemAvailable < 8GB — requeueing to R4AI -when memory is below the kodo dispatch threshold would cause the executor to get OOM-killed -on the next dispatch. mem_available_gb now included in JSON output. - -## 2026-05-15 — Expand autonomous board unblocking: Rule 4 + governing principle - -Added Rule 4 SELF_MODIFY_REQUEUE to `board_unblock.py`: tasks with `self-modify:approved` -in Blocked state whose blocked-by dependency is absent or terminal → transition to Ready for AI. -Operator approval already on record; holding these Blocked was pure queue waste. - -Updated Step 2.5 in `docs/operator/watchdog_loop.md` with the governing principle: -"The loop is the operator for all conditions handled here. Do NOT log 'operator action -required' for stuck patterns this tool covers. When a new stuck pattern appears in Step 3 -investigation, ADD A RULE HERE — not a note." Operator-blocked classification now explicitly -reserved for conditions requiring genuine human decisions or infrastructure changes. - -## 2026-05-15 — Add Step 2.5 (autonomous board unblocking) to watchdog loop runbook - -Added STEP 2.5 to `docs/operator/watchdog_loop.md` and updated the cycle table. -The loop now calls `operations-center-board-unblock --apply` between triage and the -blocked-work investigation, autonomously resolving dead-remediation tasks, R4AI -investigate-task starvation, and stale improve-task blocks without deferring to operator. - -## 2026-05-13 — fix: reset-training-branches.sh local branch update - -- Added `git branch -f` after each remote push so local training branch refs advance - to match origin/main. Without this, local repos required a separate fetch/reset. - -## 2026-05-13 — Loop prompt: KNOWN OPEN ISSUES updated, a5dbf034/5d8bd236 closed - -- a5dbf034 and 5d8bd236 implemented this session — removed from KNOWN OPEN ISSUES carry-forward. -- 9c7f4bb9 (kodo SIGKILL): removed hard "DO NOT re-queue" block; loop now investigates via - STEP 1 executor investigation before deciding to re-queue. -- Campaign 10c50210: ShippingForm re-queue gated on root cause finding, not operator sign-off. -- KNOWN OPEN ISSUES block added to STEP 3 in watchdog_loop.md so it persists across sessions. - -## 2026-05-13 — Loop autonomy expansion: executor investigation + training self-modify - -- STEP 1 in loop prompt now includes EXECUTOR FAILURE INVESTIGATION block: reads board_worker - logs, dmesg/journalctl for OOM, kodo-stderr.log artifacts, and free -h. Applies to all - backends (kodo, archon, aider). Loop investigates before creating a Plane task. -- STEP 6 now explicitly allows OC (self_repo_key) autonomy-cycle dispatch in training mode — - changes land on testing branch, proposer auto-adds self-modify:approved, no extra gate. -- Training Mode section updated with OC self-modification note. -- HEALTHY cadence forbidden condition changed from "kodo SIGKILL open issue unresolved" to - "executor signal-kill confirmed this cycle AND root cause not yet determined" — more precise, - unblocks HEALTHY after root cause is found. - -## 2026-05-13 — Convergence promotion: a5dbf034 + 5d8bd236 watcher telemetry - -- **a5dbf034** (triage watcher `blocked_reason`): `_queue_healing_actions` now returns - `(task, decision)` tuples. Queue healing JSON output now includes `blocked_reason`, - `blocked_by_backend`, `backend_dependency`, `executor_exit_code`, `executor_signal` - — loop reads these directly instead of inferring from label strings. -- **5d8bd236** (improve watcher executor exit telemetry): Added `executor_exit_code` - and `executor_signal` fields to `OcExecutionResult`. kodo normalizer populates them - from `capture.exit_code` (negative exit = signal kill via `signal.Signals`). - board_worker `_handle_failure` applies `executor-exit-code: N` and - `executor-signal: SIGKILL` as Plane labels on blocked tasks and includes them in - the Plane comment. -- Updated `test_triage_scan_emits_queue_healing_decision_from_structured_labels` to - unpack the now-(task, decision) return. - -## 2026-05-13 — Custodian config: new subsystem exclusions + C41 fixes - -- Added T6/T7 exclusions for backend_health, evidence_fingerprints, queue_healing, recovery, recovery_policies subsystems. -- Added doc_conventions.exclude_path_patterns for pre-existing orphan docs (with history/** default re-included). -- Fixed C41: added ensure_ascii=False to json.dumps in fingerprint.py, intake/main.py, spec_director/main.py. - -## 2026-05-13 — WorkStation → PlatformDeployment hard cutover - -- Removed `workstation_cli` fallback import from `repo_graph_factory.py` (hard cutover, no compatibility shim). -- Renamed env var `OPERATIONS_CENTER_WORKSTATION_DIR` → `OPERATIONS_CENTER_PLATFORM_DEPLOYMENT_DIR` in `README.md`, `deployment/plane/manage.sh`, and `docs/demo.md`. -- `git mv docs/operator/workstation_compose_smoke.md docs/operator/platformdeployment_compose_smoke.md`; updated all container names inside. -- Updated `docs/operator/archon_workflow_registration.md`, `manifest_wiring.md`, `watchdog_loop.md`, and `docs/history/` sweep. - -## 2026-05-11 — Proposal/routing ownership clarification - -- Renamed the OC-native proposal and routing model definitions to `OcPlanningProposal` and - `OcRoutingDecision`, while keeping `TaskProposal` / `LaneDecision` as compatibility aliases. -- Updated live OC docs and imports to make the boundary explicit: CxRP owns the canonical wire - proposal/routing contracts, OC owns stricter internal orchestration-domain models, and - `contracts.cxrp_mapper` is the explicit boundary translator. -- Added invariant tests to prevent docs from calling OC internal models canonical protocol - contracts and to prove proposal/routing boundary serialization stays in CxRP. - -## 2026-05-11 — RuntimeBinding mirror reduction - -- Replaced the local `RuntimeBindingSummary` model body with a compatibility alias to canonical - `cxrp.contracts.RuntimeBinding` so OperationsCenter stops owning a duplicate runtime-binding - contract shape. -- Kept the legacy OC import surface and string-normalized construction path so existing binders, - adapters, and tests continue to work without widening the refactor into proposal/routing types. -- Updated runtime-binding documentation and tests to treat invalid bindings as rejected at - canonical CxRP construction time instead of later in an OC-only mapper step. - -## 2026-05-11 — PlatformManifest consumption boundary notes - -- Documented OperationsCenter as a consumer of PlatformManifest topology and visibility metadata, - not the ontology owner. -- Added a contract note clarifying that CxRP and RxP remain separate protocol owners, while - ExecutorRuntime and WorkStation remain distinct runtime and hosting layers. -- Added tests around repo-graph factory layering so OC keeps using the bundled platform manifest - base with project/work-scope/local overlays only. - -## 2026-05-11 — cross-repo quarantine branch normalization - -- Confirmed hard cross-repo OperationsCenter provenance only in CxRP (`6db7663` -> `8e43e07` -> `ac0fcd5` / merged `cf33e8a`). -- Rewrote `CxRP main` to retain non-quarantine follow-up commits while removing the OC-originated `AgentTopology` lineage from `main`. -- Promoted `operations-center-testing-branch` as the temporary cross-repo quarantine/staging branch name. -- Created or pushed `operations-center-testing-branch` in all managed repos; for CxRP it remains the quarantined lineage at `ac0fcd5`. -- Updated local OC repo settings to target `sandbox_base_branch: operations-center-testing-branch` for all managed repos. -- Added backlog follow-up to review/refine quarantined `ShippingForm` / related CxRP work before any deliberate merge back to `main`. - -## 2026-05-10 — docs(watchdog): add self-healing convergence phases - -Updated docs/operator/watchdog_loop.md to make the loop's self-healing evolution explicit: -- Added a 7-phase convergence model from observational loop to operational convergence -- Added ownership placement guidance for loop, watchers, runtime recovery, and queue semantics -- Added anti-god-object guardrail language -- Added convergence maturity metrics and cycle-summary fields -- Integrated phase references into promotion, recovery ownership, parked behavior, and operational convergence sections - -## 2026-05-10 — fix(B1): remove private names from reset script and runbook - -reset-training-branches.sh rewritten to read repo paths from gitignored config -(config/operations_center.local.yaml) via Python yaml parse — no banned names -in tracked code. no_verify_repos list also read from config under training: key. -watchdog_loop.md example output replaced with placeholder. -Custodian B1 now clean (was 3 MED findings). - -## 2026-05-10 — feat: training branch reset script + runbook section - -scripts/reset-training-branches.sh — resets operations-center-testing-branch to -origin/main for all 7 managed repos. Exports REPOGRAPH_BOUNDARY_ARTIFACT_FILE, -uses --no-verify for SwitchBoard (pre-existing findings on main). Supports --dry-run. - -watchdog_loop.md — new "Training Mode" section before Prerequisites: explains the -reset workflow, what training mode changes (sandbox_base_branch, rate gate), and -the requirement to reset at session start rather than assuming sync. - -## 2026-05-10 — docs: split watchdog_loop.md into three focused files - -watchdog_loop.md (811 lines) — operator runbook: preflight, /loop prompt, cadence, - cycle summary template, guardrails, lifecycle, canonical example. -self_healing_model.md (538 lines) — architecture: phases 1–7, anti-god-object, - convergence promotion, runtime health model, recovery ownership, behavioral convergence. -recovery_policy.md (445 lines) — machine-enforceable rules: queue healing, recovery - budgets, evidence fingerprinting, stagnation/classification tables, Custodian invariants. -No content removed — all sections redistributed. Cross-references added. - -## 2026-05-10 — docs(resource_gate): production rate = 2× conservative baseline - -Updated production example in ResourceGateSettings docstring: - max_concurrent: 2, max_per_hour: 4, max_per_day: 60 (2× of 1/2/30). - -## 2026-05-10 — docs(resource_gate): rate-limit docstring environment-neutral - -Removed training-mode framing from ResourceGateSettings docstring. The global -rate cap is a permanent production feature; the specific values are the current -conservative tuning. Docstring now shows both conservative and production examples. - -## 2026-05-10 — feat(resource_gate): global rate limits for training mode - -Added `max_per_hour` and `max_per_day` to `ResourceGateSettings` (settings.py) and wired -them into `_evaluate_resource_gate()` (coordinator.py) via a new `global_rate_decision()` -method on `UsageStore` (usage_store.py). Rate check fires after concurrency check, before -memory check. Reason code: `global_rate_exceeded` / window: `hourly|daily`. - -Config (operations_center.local.yaml) updated to training-mode posture: - resource_gate.max_concurrent: 6 → 1 (single executor globally) - resource_gate.max_per_hour: 2 (new) - resource_gate.max_per_day: 30 (new) - -## 2026-05-10 — docs(watchdog_loop): add PARKED_OPERATOR_BLOCKED state + convergence exit logic - -Added PARKED_OPERATOR_BLOCKED health state (1800s cadence) to the watchdog loop runbook and -embedded /loop prompt. Addresses the root inefficiency from the 179-cycle STALLED run: once -the blocker is known, escalated, and evidence-frozen, the loop should park rather than continue -running full investigation cycles. - -Changes applied to docs/operator/watchdog_loop.md: -- STEP 3 (loop prompt): OPERATOR-BLOCKED classification criteria, NEW EVIDENCE EVALUATION - (11 categories; timestamp differences explicitly excluded), PARK TRANSITION conditions, - UNPARK CONDITIONS (9 triggers returning to STALLED/DEGRADED/ACTIVE) -- STEP 9 (loop prompt): 8 new structured parked summary fields (Operator-blocked state, - Parked state active, Park reason, New evidence detected, Safe retry condition, - Last evidence-changing cycle, Repeated unchanged cycles, Active remediation suspended) -- STEP 10 (loop prompt): PARKED_OPERATOR_BLOCKED row in cadence table; PARK TRANSITION - DECISION block; UNPARK TRANSITION DECISION block; FORBIDDEN note against lingering at STALLED -- Adaptive cadence table: PARKED_OPERATOR_BLOCKED row (1800s) -- Forbidden cadence widening: note that STALLED is also forbidden when park criteria are met -- Stagnation distinction table: PARKED_OPERATOR_BLOCKED row -- Blocked work classification table: operator-blocked row -- Structured cycle summary template: 8 new parked fields -- What each cycle does table: Park evaluation row -- Custodian enforcement: 6 new invariants (no indefinite STALLED, park requires Plane task, - unpark check required, timestamp ≠ evidence, operational convergence definition) -- New sections: Operator-blocked lifecycle, Operational convergence exit, - Canonical example: kodo SIGKILL (9c7f4bb9) - -## 2026-05-09T04:55Z — Runbook update: convergence promotion as first-class concept - -Updated docs/operator/watchdog_loop.md with 10-item convergence promotion layer: -- "Convergence promotion" section + scaffold removal direction added near top -- Watcher responsibility mapping table (12 behaviors → future watcher owners) -- Promotion rule: same judgment 2+ cycles → Plane task for watcher ownership -- STEP 4 CONVERGENCE PROMOTION CHECK added to loop prompt (old STEPs 4–9 → 5–10) -- WATCHER HANDOFF INVESTIGATION added to STEP 3 blocked work investigation -- Watcher-owned evidence table (10 evidence types → producing watcher) -- Watcher handoff investigation section added to runbook body -- Convergence promotion fields added to structured cycle summary template -- Over-promotion guardrail: evidence-driven, not one-off failures -- Custodian invariants section updated with 4 new scaffold/promotion invariants -- "What each cycle does" table updated with convergence promotion row - -First cycle to emit convergence-promotion fields in summary (above). - -## 2026-05-09T04:45Z — Review watcher: spec-awareness + Custodian + /lgtm fix - -Three bugs fixed in src/operations_center/entrypoints/pr_review_watcher/main.py: - -1. /lgtm exact-match trap (was body.strip().lower() == "/lgtm"): - Changed to regex ^/lgtm(\s|$) on first line only. Multi-line /lgtm comments - and /lgtm with trailing explanation now trigger merge. /lgtm-something still rejected. - Test: test_is_lgtm_comment_with_trailing_text (3 new assertions). - -2. Spec-awareness in self-review (_load_campaign_spec helper): - Phase 1 self-review now fetches the campaign spec via Plane task label (campaign-id:), - loads it from state/campaigns/active.json → spec_file path, and prepends it to the - kodo review prompt as "Campaign spec (review against this — violations are CONCERNS)". - kodo reviewer can now catch wrong filenames, wrong member names, missing tests/version/CHANGELOG. - -3. Custodian enforcement in self-review (_custodian_findings helper): - Phase 1 self-review now runs .venv/bin/custodian-multi --repos --json - on the repo's configured local_path (if set). Findings are injected into the kodo - review prompt as "Custodian static analysis" section. Reviewer must address each - finding or include it in CONCERNS. Gracefully skips if local_path unset or custodian - unavailable (no hard dependency). - -Review checklist in goal_text now explicitly requires: - - Spec compliance (all filenames, members, counts, exports, tests, version per spec) - - All Custodian findings addressed - - Standard code quality - - No kodo tooling artifacts in diff - -Tests: 38/38 review watcher + 15/15 golden = 53 total pass. -Review watcher restarted with new code (pid 2960481). - -## 2026-05-08 — Add plane_task_template.example.md - -config/plane_task_template.local.md is generated by `oc setup` and gitignored. -Added config/plane_task_template.example.md as the tracked template showing the -expected structure (Execution/Goal/Constraints sections). Gap: no tracked example -existed for an operator-generated gitignored file. - -## 2026-05-08 — Watchdog runbook: behavioral/executor analysis expansion - -Added 4 new sections and strengthened /loop STEP 3 with 10 canvas-task changes: -behavioral convergence analysis (convergent/weakly-convergent/non-convergent/divergent), -semantic duplicate remediation detection, automation self-deception detection, -executor-quality investigation. BEHAVIORAL CONVERGENCE CHECK block added to STEP 3. -HEALTHY cadence forbidden extended to cover non-convergent/divergent/self-deception states. -7 new cycle summary fields. Blocked work classification extended with non-convergent and divergent. -5 new custodian guardrail invariants. - -## 2026-05-08 — feat/managed-repo-config-gaps: 4 gaps closed - -- Gap 1: `ManagedRepoConfig` gains `@model_validator(mode="after")` — enforces - `audit` present when capabilities includes "audit", `audit_types` non-empty, - `repo_id`/`repo_name` non-blank. All 3 paths tested; example config passes. -- Gap 2: ADR 0004 `docs/architecture/adr/0004-managed-repo-private-overlay.md` - — documents the private overlay pattern, privacy invariant rationale, alternatives. -- Gap 3: `docs/operator/managed_repo_troubleshooting.md` — operator runbook for - config setup, common mistakes, field migration, dispatch debugging. -- Gap 4: OC11 detector added to `.custodian/detectors.py` — AST-extracts all - Pydantic field names from `models.py` and checks each appears in - `example_managed_repo.yaml`; caught `phases_from_source` missing (now fixed). -- VF branch fix: P-class plumbing commit cherry-picked to VF `dev` - (was on `main` only); Zonos submodule pointer unchanged. - -## 2026-05-08 — Watchdog loop runbook + /loop prompt: starvation/stagnation hardening - -Tightened starvation definition (single-cycle evidence sufficient), added closed-loop -stagnation class, queue-unblocking investigation rules, forward-progress invariant, -forbidden-HEALTHY-during-starvation cadence rule, 5 new cycle summary fields. -Root cause: loop correctly detected starvation signals but classified as "potential" and -stayed at HEALTHY cadence — this is now explicitly forbidden by runbook invariants. - -## 2026-05-08 — P-class plumbing config wired in `.custodian/config.yaml` - -Added `audit.plumbing` block with three artifact contracts: heartbeat (role/at/status → OperatorConsole mtime check), usage.json (top-level + event sub-keys → budget/rate display), active.json (campaigns → campaign pane). P2 ignore_keys suppress TUI state dict false positives. All three P1/P2/P3 = 0 findings. - -## 2026-05-08 — Propose heartbeat moved to background subprocess - -pipeline_trigger is an infinite watch loop — wait never returns, so the propose bash -wrapper never re-iterated and the heartbeat never refreshed. Replaced with a background -subprocess writing every 60s independent of the child, plus a clean trap to kill it on exit. - -## 2026-05-08 — Watchdog heartbeat every 5 min; propose heartbeat after child exits - -Watchdog slept 3600s between heartbeats — replaced single sleep with 12×300s loop, writing each iteration. -Propose only wrote heartbeat at loop-top; added second write after wait returns so it updates after each pipeline_trigger run. - -## 2026-05-08 — Fix bash syntax error in heartbeat printf (propose + watchdog) - -Quoted `"\$(date ...)"` inside a `-lc "..."` string closed the outer double-quote. -Dropped the inner quotes; unquoted `\$(date ...)` expands correctly inside the inner bash. - -## 2026-05-08 — Heartbeat writes added to intake, spec, propose, watchdog - -Added --status-dir flag to intake and spec_director entrypoints; both now write -heartbeat_{role}.json each loop iteration. Propose and watchdog bash wrappers in -operations-center.sh also write heartbeat files. Fixes permanent "stalled" banner -for all 4 roles in OperatorConsole watcher_status_pane. - -## 2026-05-08 — X1 cross-repo config wired - -Added `audit.cross_repo.platform_manifest_repo: ../PlatformManifest` to `.custodian/config.yaml`. X1 live-run: 0 legacy-name findings. - -## 2026-05-08 — Watchdog loop hardening (OC10 detector, lock helper, hardened runbook) - -scripts/operations-center.sh: watchdog-loop-acquire/release/status commands — PPID-based -lock at logs/local/watchdog_loop.lock, stale-reclaim via kill -0 liveness check. -.custodian/detectors.py: OC10 kodo max_concurrent must be 1 (reads local config; -passes silently on CI). docs/operator/watchdog_loop.md: all 12 hardening outcomes. -See previous entry for full change summary. - -## 2026-05-08 — Brainstorm retry + model downgrade + watchdog loop hardening (12 outcomes) - -spec_director/brainstorm.py: _clean_raw extracted, one-shot retry on YAML front-matter -parse failure (model was describing existing spec instead of generating new one). -runtime_binding_policy.yaml: refactor+feature rules opus→sonnet (low-cost posture). -scripts/operations-center.sh: watchdog-loop-acquire/release/status (PPID-based lock, -JSON payload, stale-reclaim). .custodian/detectors.py: OC10 kodo max_concurrent must -be 1. docs/operator/watchdog_loop.md: full hardening rewrite — lock ownership, -preflight checklist, execution gate, deterministic affected-repo discovery, branch -hygiene, destructive-action guardrails, anti-flap escalation, structured cycle summary, -updated /loop prompt, Custodian enforcement. - -## Notes - -- Phase 2 test suite: `pytest tests/unit/audit_contracts/ -v` → 119 passed in 0.50s -- Phase 1 test suite: `pytest tests/unit/managed_repos/ -v` → 26 passed -- stack_authoring output_dir is `tools/audit/report/authoring` not `stack_authoring` (Phase 0 quirk, documented) - -## 2026-05-08 — Custodian round: T6/T7/T8 exclusions + DC8/M1/C41 cleanup - -OC findings: 364 → 73. - -- T6/T7/T8 exclude_paths added per integration-tested layer (adapters, - entrypoints, backends, executors, observer, scheduled_tasks, etc.) plus - artifact_index/audit_contracts and the top-level scheduled-task entry - modules. These are exercised via integration tests, not direct imports. -- M1: added CHANGELOG.md (Keep-a-Changelog format). -- DC8: moved Quick Start before Overview in README. -- C41: added ensure_ascii=False to json.dumps in run_memory/{cli,index} - and entrypoints/{graph_doctor,reaudit_check} mains. - -## 2026-05-08 — Custodian round: OC clean (73 → 0) - -- Added the deeper-layer T6 packages (audit_dispatch/governance/toolset, - autonomy_tiers, behavior_calibration, repo_graph, routing, decision, - drift, fixture_harvesting, mini_regression, planning, policy, proposer, - slice_replay, tuning, spec_director, contracts, application, execution, - domain, config) — same layers already exempt from T7. -- C29 settings.py + coordinator.py (canonical settings + central dispatcher, - splitting fragments cohesion). -- C13 += executors/** (subprocess env-overlay layer). -- C41 backends/archon/http_workflow.py (ASCII-safe correct for Archon HTTP). -- T2 schema-validation tests + startup-wiring (raise/side-effect IS the assert). -- common_words += autonomy-gap design-doc symbols (renamed/removed helpers). -- known_values += audit_report, kodo_version (K2 vocabulary). -- DC7: linked the upstream-patch-evaluation, routing-tuning, post-merge-hook, - and execution-boundary ADR docs from docs/README.md. - - -## 2026-05-08 — CI regression guard - -Added .github/workflows/custodian-audit.yml + .hooks/pre-push. -Both run `custodian-multi --fail-on-findings`. CI is the source of -truth; pre-push catches regressions before they hit GitHub. - - -## 2026-05-08 — CI fix: Direct URL pip install syntax - - -## 2026-05-08 — Drift cleanup caught by new CI guard - -run_show/main.py: split semicolon statements (E702), ensure_ascii=False on -the JSON dump (C41). docs/README.md: linked the archon_workflow_registration -doc (DC7). - - -## 2026-05-08 — D11 exclusions for backend + entrypoint typologies - - -## 2026-05-08 — Link ADR 0002+0003; common_words for ADR 0002 vocabulary - -## 2026-05-08 — Fix circuit breaker tripped by quota exhaustion events - -Root cause: API capacity exhaustion (kodo hit Claude quota ~19:40-20:00 UTC) was -being recorded as execution_outcome(succeeded=False), feeding the circuit breaker. -The CB design explicitly states quota events should NOT feed it — they are -infrastructure problems, not task-quality signals. record_quota_event existed -but was never called from coordinator.py. - -Fix: -- coordinator.py: detect capacity_exhausted failure_category + reason keywords - → call record_quota_event instead of failed execution_outcome -- .env.operations-center.local: CIRCUIT_BREAKER_STALENESS_HOURS=1 (was default 4h) - so past quota incidents age out within the same session after quota resets -- Restarted goal/test/improve board workers to pick up new env - -Unblocked: 8/8 watchers running; CB closed with 1h staleness window. - -## 2026-05-08 — Harden watchdog loop: adaptive cadence, blocked work investigation, anti-stagnation - -Rewrote docs/operator/watchdog_loop.md per canvas task (strengthen OC Platform -Watchdog Loop). Key additions: -- Adaptive cadence (180s CRITICAL → 3600s HEALTHY) based on worst health state -- STEP 3 blocked/stalled work investigation with 8-class blocker taxonomy -- Anti-stagnation: reads last 3 cycle summaries to detect repeated findings -- dead-remediation and starvation classes added to execution gate -- Expanded cycle summary with health-state, cadence, blocked counts, stagnation flag -- Design-change procedure section added -- /loop prompt renumbered STEP 0–9; STEP 9 is adaptive ScheduleWakeup - -## 2026-05-08 — Watchdog reviver interval: 2min → 1h - -The watchdog bash loop is a blind reviver with no root-cause analysis. -2-minute polling masked crash loops. Changed sleep 120 → sleep 3600 so -the operator loop (hourly) is the primary crash detector and the watchdog -is a backstop only. - -## 2026-05-08 — Fix phantom entrypoints/watchdog reference in audit docs - -G8 (ghost_work_audit.md) and F1 (flow_audit.md) both referenced -`entrypoints/watchdog/main.py` which does not exist. Real implementation -is `entrypoints/maintenance/recover_stale.py`. Updated both docs to point -at the correct path and reflect the `--per-kind` flag that also exists. - -## 2026-05-10 — GitHub username migration - -- Updated repo-owned references from the previous GitHub username to `ProtocolWarden` after the account rename. -- Scope: license headers, GitHub URLs, workflow install commands, manifests, dependency URLs, examples, and local owner defaults where present. - -## 2026-05-10 — Custodian pre-push command resolution - -- Updated the pre-push guard to prefer system `custodian-multi`, with repo venv and sibling Custodian venv fallbacks. - -## 2026-05-13 — Fix invalid RuntimeBinding combinations in recovery tests - -- `RuntimeBinding` now validates `kind × selection_mode` pairs in `__post_init__`. Test fixtures used `kind="kodo"` (not a valid RuntimeKind) and `selection_mode="fixed"` (not a valid SelectionMode), causing ValueError at test construction time. -- Fixed `test_sigkill_records_backend_cooldown_and_stops_retry`: changed to `kind="cli_subscription"`, `selection_mode="backend_default"`; updated `registry.get("cli_subscription")` assertion key. -- Fixed `test_backend_sigkill_transitions_to_unstable_with_cooldown`: changed `selection_mode="fixed"` → `"policy_selected"` (registry.record_failure call and all other assertions unchanged). -- All 3678 tests pass. - -## 2026-05-13 — Add CLAUDE.md and .custodian/tmp*.yaml to .gitignore - -- Added CLAUDE.md to .gitignore -- Added .custodian/tmp*.yaml to exclude custodian audit temp files - -## 2026-05-18 — spec watcher crash-loop fix: env re-sourced in restart loop - -Root cause: env file sourced once at login shell startup in `start_watch_role`. If the initial source failed (or env was updated after watcher started), all subsequent restarts ran with incomplete env. Fix: added `set -a; source '${ENV_PATH}' 2>/dev/null || true; set +a` inside the restart loop for all 5 role blocks (intake, review, spec, propose, goal/test/improve). - -This is a resilience improvement: watchers now recover automatically from temporary env file unavailability at startup, and pick up env changes without requiring a full watcher restart cycle. - -## 2026-05-18 — graph doctor fixed: private_manifest_path and local_manifest.yaml - -- Added `private_manifest_path` to `config/operations_center.local.yaml` pointing to existing PrivateManifest at standard path (`/home/dev/Documents/GitHub/PrivateManifest/manifests/videofoundry/private_manifest.yaml`) -- Created `topology/local_manifest.yaml` in VideoFoundry from the example file (was missing, gitignored, required for graph construction) -- Graph now builds: 11 nodes / 12 edges, graph_built=True - -## 2026-05-19 — ADR 0005 Phase 5: executor backend adapters + settings cleanup - -Cross-repo wiring (new work order `docs/architecture/adr/0005-work-order-p5.md`): -- settings.py: removed api_key from TeamExecutorSettings + CritiqueExecutorSettings; - added worker_backend to all three executor settings; added working_dir to CritiqueExecutorSettings -- Created backends/team_executor/, dag_executor/, critique_executor/ canonical adapters; - each wraps the executor's Runner and maps RuntimeResult → ExecutionResult -- DAGExecutorBackendAdapter resolves .dag_executor/workflow.yaml or falls back to single-agent GraphSpec -- factory.py: registered TEAM_EXECUTOR, DAG_EXECUTOR, CRITIQUE_EXECUTOR in from_settings() -- 3324 tests pass (+ new factory test updated) - -## 2026-05-19 — Fix Custodian findings for p5 adapter push - -Removed unused Path/Optional imports from team_executor/adapter.py and critique_executor/adapter.py. -Added 0005-work-order-p5.md to docs/README.md to fix DC7 orphan finding. Custodian now clean. - -## 2026-05-19 — ADR 0006 work order: CoreRunner rename + safe_run() consolidation - -- Written docs/architecture/adr/0006-corerunner-subprocess-consolidation.md -- 6-phase plan: extract safe_run(), wire TE/DE/CE, update OC, update PlatformManifest, update remaining repos, GitHub repo rename -- Decision: all subprocess calls in ecosystem share one process-group-safe implementation via core_runner.safe_run() - -## 2026-05-19 — Fix custodian findings in ADR 0006 doc - -- Removed backtick-quoted future symbols from ADR prose (K1/OC8 findings) -- Fixed dead CxRP cross-ref (DC2) -- Linked ADR from docs/README.md (DC7 orphan finding) - -## 2026-05-24 — Hook hard-requires CL_ANCHOR (rollout) - -- .claude/hooks/{pre_tool_use,stop}.sh: resolve .context under CL_ANCHOR (manifest anchor), no CWD fallback. pre_tool_use blocks if unset; stop skips gracefully. CL_ANCHOR supplied by panes + loop (cl session start). - -## 2026-05-24 — Loop session-boundary hydrate/capture for codex/aider - -- tools/loop/controller.py: run_session now wraps non-claude (codex/aider) sessions with `cl context hydrate` (prepends prior context to the prompt) + `cl context capture` (records exit/log), gated on CL_ANCHOR. claude skipped (per-tool hooks handle it). Stable lineage oc-loop. Gated/no-op if unanchored or cl missing. Tests extended. - -## 2026-05-27 — Persistent loop session: anchor once per run - -`tools/loop/controller.py`: move `cl session start` from per-iteration `_session_env()` to a single call in `main()` before the iteration loop. Added `_end_cl_session()` to archive the session on shutdown. `run_session()` and `_session_env()` now accept `anchor_vars` to reuse the stable `CL_SESSION_ID` across all iterations. - -## 2026-05-27 — Fix: wire boundary_artifact_file in OC custodian config (B2) - -Added `boundary_artifact_file: ../PrivateManifest/dist/boundary_disclosure_artifact.json` to `.custodian/config.yaml`. This was a pre-existing B2 finding — not introduced by the loop session change. - - ---- - -## 2026-05-30 — Add ci_fix phase to review watcher - -PRs with failing ruff/lint CI were escalating to human_review instead of self-healing. -Added phase 0 (ci_fix): checks out PR branch locally, runs ruff --fix, pushes, waits for CI. -Falls through to self_review after 3 attempts or non-fixable failures. - ---- - -## 2026-05-30 — Remove human escalation from review watcher - -human_review phase deleted entirely. CONCERNS and no-verdict loops now auto-merge -after max_self_review_loops (3). Single CONCERNS comment on pass 1 only — no spam. -ci_fix phase (phase 0) handles ruff failures before self-review. - ---- - -## 2026-05-30 — Fix ci_fix check name matching - -get_failed_checks returns 'Lint (ruff): failure' format; strip colon suffix before matching. - ---- - -## 2026-05-30 — Four systemic fixes to prevent review/gate failures recurring - -1. _run_pipeline: log exec subprocess stderr so no-verdict failures are diagnosable -2. OPEN_PR_GATE: skip PRs with mergeable=UNKNOWN (CI in-flight) to reduce goal starvation -3. pyproject.toml: pin custodian to SHA instead of @main to stop flaky CI audit failures -4. controller: detect git HEAD changes each iteration, pull + SIGTERM watchers to pick up new code - ---- - -## 2026-05-30 — Three more systemic fixes - -1. Restore auto_merge_on_ci_green fast-path in _phase1 (was in deleted _phase2) -2. Add custodian-doctor to OC validation_commands — catch CI audit failures pre-PR -3. Review watcher relaunched via operations-center.sh for auto-restart on crash/SIGTERM - ---- - -## 2026-05-30 — Add 30min timeout to exec pipeline subprocess - -Prevents hung executor from blocking the review watcher indefinitely. - ---- - -## 2026-05-30 — Architecture audit fixes - -From audit across all 19 managed repos: -- Pin custodian SHA in TeamExecutor, DAGExecutor, CritiqueExecutor (was @main) -- Upgrade TeamExecutor stop.sh to Gen 2 (was Gen 1) -- Add CI workflows to ContextLifecycle and SyncMechanism (had none) -- Add pytest+ruff validation_commands to executor repos (had empty []) -- Remove operations-center-testing-branch from all allowed_base_branches in config - ---- - -## 2026-05-30 — Arch audit: custodian T8 fixture exemptions - -Add tests/fixtures/ and test_dependency_report_fixtures.py to T8 exclude list. - ---- - -## 2026-05-30 — Stage 4: dependency-report performance tests wired into CI - -Add `performance` pytest marker, mark all 19 tests `@pytest.mark.performance`, -tighten timing bounds to uniform 50ms across all scenarios (tightened extra-large -from 60ms). Add dedicated `performance` CI job to ci.yml. Add design doc. - ---- - -## 2026-05-30 — ADR 0010 drafted + Plane tasks #165-168 created - -Arch audit work order: Issue 2 (state locking) is P1 autonomous, Issue 3 (subprocess security) is P2, Issue 1 (board_worker refactor) is P3 partial-autonomous. - ---- - -## 2026-05-30 — ADR 0010 custodian clean - -Fixed K1/OC8/DC7 findings on ADR 0010 (linked in README, renamed proposed field, added common_word). - ---- - -## 2026-05-30 — ADR 0010 P3: board_worker refactor complete - -2186-line monolith split into 8 cohesive modules. main.py: 145 lines. -All 3866 tests pass. No behaviour changes — pure extraction with renames. - ---- - -## 2026-05-30 — Custodian fixes after board_worker refactor - -C29 exemption for outcomes.py, old private names added to common_words. - ---- - -## 2026-05-30 — Add opus as sonnet fallback in controller - -Normal round-robin: sonnet ↔ codex. When sonnet per-model limit hit but weekly budget remains, fall back to opus (same claude CLI). Priority: [claude, opus, codex]. - ---- - -## 2026-05-30 — Opus only when sonnet-specific limit, not global claude limit - -Global claude limits (5h session, weekly) also put opus on cooldown — go straight to codex. - ---- - -## 2026-05-30 — Clean up controller opus fallback implementation - -parse_rate_limit_reset returns (dt, text) tuple. _handle_backend_limit single read. Tighter global-limit regex. - - -## 2026-06-01 — Fix CI failures from PR #213 merge - -Applied ruff format + import sort across all 553 files, fixed G004/F841/DTZ007 lint violations in observer module (alert_channels.py, alert_validation.py, exporters.py), converted async notify() to sync (no await operations), fixed test threshold/assertion errors in test_stage3_observability.py and test_alert_channels.py. - - -## 2026-06-02 — Fix spec-author campaign build closed-loop stagnation - -`SpecFrontMatter.from_spec_text()` rejected specs starting with `` comment (executor-added prefix), causing every queue-drain spec to fail campaign build and trigger another queue-drain in a stagnation loop (3 tasks: d0f5af4d, 8f17cc68, ae6e5235). Fixed by stripping leading HTML comment before YAML front matter check. - ---- - -## 2026-06-03 — Raise unit coverage past a 90% gate (waves 3+4) - -Added ~46 hermetic `*_cov.py` test files (~700 new tests across waves 3+4) lifting unit coverage 86.9% → 95.75%, then bumped `--cov-fail-under` 85 → 90 (ci.yml:82/90, .coveragerc:13). Fixed two failures surfaced by the full-suite run: (1) openclaw `test_normalize_passes_branch_and_invocation_ref` passed a bare string where the model requires a `RuntimeInvocationRef`; (2) caplog test-pollution — `graph_doctor/test_main_cov.py::test_logger_level_restored_after_run` forced the `repo_graph_factory` logger to ERROR without restoring it, silencing later caplog-based warning assertions; now restores via try/finally. Custodian clean (fixed 3 T2 no-assert findings), ruff clean. - ---- +## Archived -## 2026-06-03 — Fix Python-version-sensitive writer coverage test +_Archived completed history → `/home/dev/Documents/GitHub/PrivateManifest/archive/console/OperationsCenter/log-2026-06-04.md`_ -`fixture_harvesting/test_writer_cov.py::test_write_artifact_stat_oserror` patched `Path.stat` globally to raise an errno-less `OSError`. On 3.13+ `Path.exists()` uses `os.stat` directly so only the targeted size-stat raised (passed locally); on CI's 3.11 `exists()` routes through `Path.stat` and re-raises errno-less OSErrors, so it failed before reaching the code path under test. Now also stub `Path.exists` to return True for the source so only the intended stat call errors. diff --git a/.custodian/config.yaml b/.custodian/config.yaml index 2ad008d7..b7e4aa21 100644 --- a/.custodian/config.yaml +++ b/.custodian/config.yaml @@ -3,6 +3,8 @@ src_root: src/operations_center tests_root: tests audit: + # .console/ reconciled (reconcile/console) — R1/R2 reconcile detectors active. + reconcile_enforce: true # W2 (core.hooksPath must be set): developer-machine setup check, not applicable # in CI where the repo is freshly cloned and git config is not persisted. ignore_rules: diff --git a/.gitignore b/.gitignore index 12c34c8e..7c89a313 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,9 @@ !.console/validation/** # P3: OC-local operational config and runtime state (migrated from .context/) !.console/workers.yaml +# console-reconciliation: worksheet + any local archive staging stay untracked +/.console/reconcile.yaml +/.console/archive/ CLAUDE.md .custodian/tmp*.yaml __pycache__/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 94d3758e..d57915ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,3 +17,39 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Documentation - Added `docs/design/observer-race-condition-guard.md` documenting the TOCTOU race condition vulnerability, the metadata capture guard mechanism, implementation examples, error handling strategy, testing approach, and operational impact. + +### Reconciled +_Console history consolidated to the private archive (2026-06-04). Items shipped:_ + +- 2026-06-04: reconciled `unit-coverage-90-gate` — Unit coverage climb to a 90% gate (history archived). +- 2026-06-04: reconciled `observer-coverage-spec-authoring` — Spec authoring — observer test coverage campaign (history archived). +- 2026-06-04: reconciled `ci-coverage-threshold-gate` — Update CI/CD pipeline to gate on coverage threshold (Stages 0-4) (history archived). +- 2026-06-04: reconciled `export-validation-failure-metrics` — Export validation failure metrics for alerting (history archived). +- 2026-06-04: reconciled `import-error-test-refactor` — Import-error test refactoring (shared pytest fixtures) (history archived). +- 2026-06-04: reconciled `error-handling-documentation` — Error handling documentation (recipes, catalog, contracts, runbook) (history archived). +- 2026-06-04: reconciled `deriver-transition-coverage` — Deriver transition coverage (bidirectional, Stages 0-4) (history archived). +- 2026-06-04: reconciled `collector-json-hardening` — Collector JSON hardening (validation + security logging) (history archived). +- 2026-06-04: reconciled `switchboard-live-verification` — SwitchBoard live verification rev (deploy-skew bug + runbook) (history archived). +- 2026-06-04: reconciled `sourceregistry-real-wiring` — SourceRegistry — wire it for real (Option B, provenance propagation) (history archived). +- 2026-06-04: reconciled `platformdeployment-compose-profile-smoke` — WorkStation compose profile smoke per profile (history archived). +- 2026-06-04: reconciled `observability-config-skeleton` — Ship observability config skeleton (WorkStation (history archived). +- 2026-06-04: reconciled `oc-kodo-archon-cards-superseded` — OC — kodo + archon executor cards (superseded by team_executor) (history archived). +- 2026-06-04: reconciled `archon-workflow-registration-playbook` — Archon workflow registration playbook (history archived). +- 2026-06-04: reconciled `capacity-exhaustion-regression-fixture` — Capacity-exhaustion regression fixture (history archived). +- 2026-06-04: reconciled `oc-run-show-provenance-reader` — operations-center-run-show single-command provenance reader (history archived). +- 2026-06-04: reconciled `artifact-path-staleness-checks` — Artifact path staleness checks (history archived). +- 2026-06-04: reconciled `routing-rationale-completeness-smoke` — Routing rationale completeness smoke check (history archived). +- 2026-06-04: reconciled `opscenter-custodian-coverage-bridge` — OpsCenter <-> Custodian coverage bridge (history archived). +- 2026-06-04: reconciled `phase7-multi-run-artifact-index` — Phase 7 — multi-run historical artifact index + CLI (history archived). +- 2026-06-04: reconciled `phase6-dispatch-control-crash-safety` — Phase 6 — dispatch control crash-safety + dual-PID tracking (history archived). +- 2026-06-04: reconciled `effective-repo-graph-contract-impact-wiring` — EffectiveRepoGraph + contract impact wired into production (history archived). +- 2026-06-04: reconciled `archon-patch-001-upstream-pr-superseded` — File upstream PR for Archon PATCH-001 (superseded; archon removed) (history archived). +- 2026-06-04: reconciled `three-layer-manifest-primitive` — 3-layer manifest primitive — operationally complete (R1-R4) (history archived). +- 2026-06-04: reconciled `r5-cross-repo-task-chaining` — R5 — Cross-repo task chaining (propagation library + CLIs) (history archived). +- 2026-06-04: reconciled `er000-phase0-golden-tests` — ER-000 — Phase 0 golden tests (history archived). +- 2026-06-04: reconciled `er003-lifecycle-primitive` — ER-003 — Lifecycle primitive (history archived). +- 2026-06-04: reconciled `collector-json-hardening-stage2` — Collector JSON hardening — Stage 2 implementation (history archived). +- 2026-06-04: reconciled `managed-repo-audit-phases` — Managed-repo audit system — Phases 0-12 + verification passes (history archived). +- 2026-06-04: reconciled `cycle-board-unblock-housekeeping` — Cycle board-unblock + task housekeeping (operational) (history archived). +- 2026-06-04: reconciled `kodo-openclaw-regression-fix` — Fix kodo->openclaw regression in tests; CxRP 0.3.1 verdict bump (history archived). + diff --git a/docs/architecture/managed-repos/audit_artifact_contract.md b/docs/architecture/managed-repos/audit_artifact_contract.md index 15ef7f36..20010eaf 100644 --- a/docs/architecture/managed-repos/audit_artifact_contract.md +++ b/docs/architecture/managed-repos/audit_artifact_contract.md @@ -9,7 +9,7 @@ ## Purpose -This document defines the artifact contract between OperationsCenter and the managed repo for audit runs. OperationsCenter invokes the managed repo audit commands and reads the outputs; it never imports the managed repo Python code. The contract specifies exactly what files VF must produce, in what shape, so OpsCenter can ingest them without knowing VF internals. +This document defines the artifact contract between OperationsCenter and the managed repo for audit runs. OperationsCenter invokes the managed repo audit commands and reads the outputs; it never imports the managed repo Python code. The contract specifies exactly what files a private downstream repo must produce, in what shape, so OpsCenter can ingest them without knowing a private downstream repo internals. --- @@ -20,7 +20,7 @@ OperationsCenter the managed repo ───────────────────────── ───────────────────────── Defines contract schemas Implements contract schemas Generates run_id (uuid4.hex) Receives run_id via $AUDIT_RUN_ID -Invokes VF commands Runs audit, writes contract files +Invokes a private downstream repo commands Runs audit, writes contract files Reads run_status.json Writes run_status.json Reads artifact_manifest.json Writes artifact_manifest.json ``` @@ -34,8 +34,8 @@ OpsCenter may only invoke commands and read files. No Python imports across the The contract reflects findings from Phase 0 discovery (`_audit_ground_truth.md`): - **Only one audit type (representative) has run_status finalization.** The five others (`enrichment`, `ideation`, `render`, `segmentation`, `stack_authoring`) write an initial `in_progress` status via `prepare_audit_bucket()` but never finalize it. Phase 5 must add finalization to all six. -- **No `artifact_manifest.json` exists yet.** The field `artifact_manifest_path` is absent from all current VF run_status files. The contract makes it `Optional[str]` to accept legacy files, but `is_compliant` returns `False` unless it is present. -- **Legacy status value:** VF currently emits `"in_progress"`. The contract canonicalizes running state as `"running"`. The enum value `IN_PROGRESS_LEGACY = "in_progress"` is accepted and marked non-compliant. +- **No `artifact_manifest.json` exists yet.** The field `artifact_manifest_path` is absent from all current a private downstream repo run_status files. The contract makes it `Optional[str]` to accept legacy files, but `is_compliant` returns `False` unless it is present. +- **Legacy status value:** a private downstream repo currently emits `"in_progress"`. The contract canonicalizes running state as `"running"`. The enum value `IN_PROGRESS_LEGACY = "in_progress"` is accepted and marked non-compliant. - **stack_authoring output dir:** `tools/audit/report/authoring`, not `tools/audit/report/stack_authoring`. Phase 0 discovered this quirk. - **Architecture invariants** are written to a fixed repo path, not per-run buckets. @@ -43,7 +43,7 @@ The contract reflects findings from Phase 0 discovery (`_audit_ground_t ## Phase 1 Managed Repo Relationship -The managed-repo config (`config/managed_repos/.yaml`, loaded by `managed_repos.loader`) tells OpsCenter how to invoke VF commands. This artifact contract defines what those commands produce. The two are complementary: +The managed-repo config (`config/managed_repos/.yaml`, loaded by `managed_repos.loader`) tells OpsCenter how to invoke a private downstream repo commands. This artifact contract defines what those commands produce. The two are complementary: - Phase 1 config → how to invoke, where to look for outputs - Phase 2 contract (this document) → what the output files must contain @@ -57,7 +57,7 @@ The managed-repo config (`config/managed_repos/.yaml`, loaded by `manag | Controlled vocabulary | `src/operations_center/audit_contracts/vocabulary.py` | `audit_contracts.vocabulary` | | Run status model | `src/operations_center/audit_contracts/run_status.py` | `audit_contracts.run_status` | | Artifact manifest model | `src/operations_center/audit_contracts/artifact_manifest.py` | `audit_contracts.artifact_manifest` | -| VF producer profile | `src/operations_center/audit_contracts/profiles/.py` | `audit_contracts.profiles` | +| managed-repo producer profile | `src/operations_center/audit_contracts/profiles/.py` | `audit_contracts.profiles` | | JSON schemas | `schemas/audit_contracts/` | generated from Pydantic | | Examples | `examples/audit_contracts/` | validated against models | @@ -82,7 +82,7 @@ The vocabulary is split into two explicit layers: | `ValidFor` | `current_run_only`, `cross_run_comparison`, `latest_snapshot`, `historical_record`, `partial_run_analysis`, `unknown` | | `Limitation` | `partial_run`, `missing_downstream_artifacts`, `producer_not_finalized`, `non_representative_audit_unverified`, `repo_singleton_overwritten`, `infrastructure_noise_excluded`, `path_layout_non_uniform`, `unknown` | -**the managed repo profile enums** (VF-specific, in `VIDEOFOUNDRY_PROFILE_ENUMS`): +**the managed repo profile enums** (managed-repo-specific, in `MANAGED_PROFILE_ENUMS`): | Enum | Description | |------|-------------| @@ -90,7 +90,7 @@ The vocabulary is split into two explicit layers: | `the managed repoSourceStage` | Known stage names from Phase 0 (TopicSelectionStage, etc.) | | `the managed repoArtifactKind` | Artifact kinds: `run_status`, `stage_report`, `audit_report`, `architecture_invariant`, etc. | -`GENERIC_ENUMS` and `VIDEOFOUNDRY_PROFILE_ENUMS` are disjoint tuples enforced by tests. +`GENERIC_ENUMS` and `MANAGED_PROFILE_ENUMS` are disjoint tuples enforced by tests. --- @@ -107,7 +107,7 @@ Required fields: | `producer` | `str` | e.g. `""` | | `repo_id` | `str` | e.g. `""` | | `run_id` | `str` | uuid4().hex, injected by OpsCenter via `$AUDIT_RUN_ID` | -| `audit_type` | `str` | one of the six VF audit types | +| `audit_type` | `str` | one of the six a private downstream repo audit types | | `status` | `RunStatus` | current run state | Optional but contract-required for compliance: @@ -270,7 +270,7 @@ the managed repo must implement these changes for `is_compliant` to return `True 5. **Populate `excluded_paths`** in each manifest with the known infrastructure noise patterns. 6. **Include the repo singleton** in the manifest for representative runs. -Until Phase 5, OpsCenter treats all VF run_status files as legacy (`is_compliant = False`) and reads them in read-only diagnostic mode. +Until Phase 5, OpsCenter treats all a private downstream repo run_status files as legacy (`is_compliant = False`) and reads them in read-only diagnostic mode. --- @@ -289,7 +289,7 @@ OTHER_PROFILE = the managed repoProducerProfile( ) ``` -The generic contract models (ManagedRunStatus, ManagedArtifactManifest) are unchanged. The boundary enforcement test (`TestBoundaryEnforcement`) uses Python AST to verify the audit_contracts package never imports VF code. +The generic contract models (ManagedRunStatus, ManagedArtifactManifest) are unchanged. The boundary enforcement test (`TestBoundaryEnforcement`) uses Python AST to verify the audit_contracts package never imports a private downstream repo code. --- @@ -298,15 +298,15 @@ The generic contract models (ManagedRunStatus, ManagedArtifactManifest) are unch | Layer | What it contains | Who can use it | |-------|-----------------|----------------| | Generic contract | RunStatus, ManifestStatus, Location, ManagedRunStatus, ManagedArtifactManifest | Any managed repo | -| VF producer profile | the managed repoAuditType, the managed repoAuditTypeSpec, VIDEOFOUNDRY_PROFILE | the managed repo only | +| managed-repo producer profile | the managed repoAuditType, the managed repoAuditTypeSpec, MANAGED_PROFILE | the managed repo only | -`GENERIC_ENUMS` and `VIDEOFOUNDRY_PROFILE_ENUMS` tuples are exported from `vocabulary.py` to allow tests to assert the layers are disjoint. +`GENERIC_ENUMS` and `MANAGED_PROFILE_ENUMS` tuples are exported from `vocabulary.py` to allow tests to assert the layers are disjoint. --- ## Non-Goals - OpsCenter does not validate artifact file contents — only the manifest metadata. -- OpsCenter does not write `run_status.json` or `artifact_manifest.json` — VF writes them. -- The contract does not specify how VF internally structures its stages or pipeline. +- OpsCenter does not write `run_status.json` or `artifact_manifest.json` — a private downstream repo writes them. +- The contract does not specify how a private downstream repo internally structures its stages or pipeline. - OpsCenter does not import any the managed repo Python code at any point. diff --git a/docs/architecture/managed-repos/audit_ground_truth.md b/docs/architecture/managed-repos/audit_ground_truth.md index fa82c137..bc5afee7 100644 --- a/docs/architecture/managed-repos/audit_ground_truth.md +++ b/docs/architecture/managed-repos/audit_ground_truth.md @@ -17,7 +17,7 @@ No schemas were created. No implementation was changed. No contracts were design ## Repositories Inspected - `the managed repo` — audit runner, artifact producer -- `OperationsCenter` — contract owner (no VF code imported; only files read) +- `OperationsCenter` — contract owner (no a private downstream repo code imported; only files read) --- @@ -207,7 +207,7 @@ tools/audit/report/architecture_invariants/warning_triage.md "summary": { "pass": 41, "warn": 190, "fail": 0, "known_legacy": 7 }, "findings": [ { - "id": "VF-ARCH-LAYER-001", + "id": "MANAGED-ARCH-LAYER-001", "family": "layer_direction", "severity": "warn", "status": "known_legacy", diff --git a/docs/history/managed-repo/managed_repo_artifact_index.md b/docs/history/managed-repo/managed_repo_artifact_index.md index 369e4ed8..0ea82915 100644 --- a/docs/history/managed-repo/managed_repo_artifact_index.md +++ b/docs/history/managed-repo/managed_repo_artifact_index.md @@ -99,7 +99,7 @@ Artifact paths in the manifest may be absolute or relative. Phase 7 resolves the | Relative path, no `repo_root` | Derive root from manifest position + `run_root` depth. | | Derivation fails | `resolved_path = None`. | -**Derivation heuristic**: the manifest file lives at `{repo_root}/{run_root}/artifact_manifest.json`. The builder steps up `len(run_root.parts)` parent directories from `manifest_dir` to obtain `repo_root`. This works for standard VF bucket layouts. +**Derivation heuristic**: the manifest file lives at `{repo_root}/{run_root}/artifact_manifest.json`. The builder steps up `len(run_root.parts)` parent directories from `manifest_dir` to obtain `repo_root`. This works for standard a private downstream repo bucket layouts. If a path cannot be safely resolved, it is marked `resolved_path = None` and `exists_on_disk = None`. The caller receives a clear signal rather than a guess. diff --git a/docs/history/managed-repo/managed_repo_audit_dispatch.md b/docs/history/managed-repo/managed_repo_audit_dispatch.md index 97e7bbdc..a809eebc 100644 --- a/docs/history/managed-repo/managed_repo_audit_dispatch.md +++ b/docs/history/managed-repo/managed_repo_audit_dispatch.md @@ -71,7 +71,7 @@ managed private project (Phase 5) writes contract files to the audit bucket: - `run_status.json` — Phase 2 schema, includes `artifact_manifest_path`. - `artifact_manifest.json` — Phase 2 schema, lists all artifacts. -Phase 6 reads these files after process exit. The `artifact_manifest_path` field in `run_status.json` is a path relative to the VF repo root. Phase 6 resolves it using `base_dir=working_dir_abs` (the VF repo root). +Phase 6 reads these files after process exit. The `artifact_manifest_path` field in `run_status.json` is a path relative to the a private downstream repo repo root. Phase 6 resolves it using `base_dir=working_dir_abs` (the a private downstream repo repo root). --- diff --git a/docs/history/managed-repo/managed_repo_audit_system_final_verification.md b/docs/history/managed-repo/managed_repo_audit_system_final_verification.md index 8a336021..5ffc4770 100644 --- a/docs/history/managed-repo/managed_repo_audit_system_final_verification.md +++ b/docs/history/managed-repo/managed_repo_audit_system_final_verification.md @@ -58,7 +58,7 @@ None. ### Suggested Follow-Up Tasks (Non-Implementation) -1. **First live managed private project audit run** — Phase 5 code is wired but has never been executed against a live VF audit. Run `operations-center-governance run` against a live VF instance to validate Phase 5 outputs conform to the contract. +1. **First live managed private project audit run** — Phase 5 code is wired but has never been executed against a live a private downstream repo audit. Run `operations-center-governance run` against a live a private downstream repo instance to validate Phase 5 outputs conform to the contract. 2. ~~**CI integration guide**~~ — **Done (Rev 12).** `docs/architecture/ci/ci_integration_guide.md` covers the full governance flow, exit codes, urgency/approval matrix, cooldown/budget configuration, and the state directory persistence requirement for ephemeral CI environments. @@ -180,7 +180,7 @@ Writes artifact_manifest.json ──────► Reads + validates manifes **Boundary rule:** OpsCenter never imports managed private project code. All coordination is through files and subprocess invocation only. -**Note on `managed private project*` names in OpsCenter vocabulary:** `managed private projectArtifactKind`, `managed private projectAuditType`, `managed private projectSourceStage` are OpsCenter-owned contract types defined in `audit_contracts/vocabulary.py` and `audit_contracts/profiles/managed-private-project.py`. These are not imports of the managed private project Python package — they are OpsCenter's own definitions of vocabulary for the VF producer profile. +**Note on `managed private project*` names in OpsCenter vocabulary:** `managed private projectArtifactKind`, `managed private projectAuditType`, `managed private projectSourceStage` are OpsCenter-owned contract types defined in `audit_contracts/vocabulary.py` and `audit_contracts/profiles/managed-private-project.py`. These are not imports of the managed private project Python package — they are OpsCenter's own definitions of vocabulary for the a private downstream repo producer profile. --- @@ -262,8 +262,8 @@ Import boundary: none of these layers imports `audit_dispatch`. Verified by AST ### Full System Pipeline (End-to-End) ``` -Phase 5: VF writes run_status.json + artifact_manifest.json -Phase 6: OpsCenter governance → approved → dispatch → VF subprocess +Phase 5: a private downstream repo writes run_status.json + artifact_manifest.json +Phase 6: OpsCenter governance → approved → dispatch → a private downstream repo subprocess Phase 7: load_run_status_entrypoint() → load_artifact_manifest() → build_artifact_index() Phase 8: analyze_artifacts() [advisory, read-only] Phase 9: harvest_fixtures() → FixturePack @@ -360,7 +360,7 @@ Checked with exact line-start anchors to distinguish the managed private project ### `managed private project*` Vocabulary in OpsCenter -`managed private projectArtifactKind`, `managed private projectAuditType`, `managed private projectSourceStage` are OpsCenter-defined enums in `audit_contracts/vocabulary.py` and `audit_contracts/profiles/managed-private-project.py`. They are referenced by OpsCenter modules at import time. This is not a boundary violation — these are OpsCenter's own canonical definitions for the VF producer contract, not imports of the managed private project Python package. +`managed private projectArtifactKind`, `managed private projectAuditType`, `managed private projectSourceStage` are OpsCenter-defined enums in `audit_contracts/vocabulary.py` and `audit_contracts/profiles/managed-private-project.py`. They are referenced by OpsCenter modules at import time. This is not a boundary violation — these are OpsCenter's own canonical definitions for the a private downstream repo producer contract, not imports of the managed private project Python package. ### Unidirectional Import Graph @@ -499,6 +499,6 @@ The managed repo audit system across Phases 0–12 is declared **locked** as of - `MiniRegressionSuiteDefinition` and `MiniRegressionSuiteReport` bumped to schema v1.1 with `repo_id`/`audit_type` fields - `suite_report.schema.json` regenerated; 0-delta against model - CI integration guide published at `docs/architecture/ci/ci_integration_guide.md` -- Follow-up tasks 2 and 3 completed; only first live VF run remains open +- Follow-up tasks 2 and 3 completed; only first live a private downstream repo run remains open **The system is architecturally complete and gap-free.** Twelve verification passes across this codebase have found and closed 23 gaps; Rev 7 through Rev 12 all found zero new gaps (six consecutive clean passes). The two non-correctness follow-up enhancements are now complete. The one remaining open item (first live managed private project run) is operational, not architectural. diff --git a/docs/history/managed-repo/managed_repo_audit_toolset_contract.md b/docs/history/managed-repo/managed_repo_audit_toolset_contract.md index 9175aa50..62903e93 100644 --- a/docs/history/managed-repo/managed_repo_audit_toolset_contract.md +++ b/docs/history/managed-repo/managed_repo_audit_toolset_contract.md @@ -96,8 +96,8 @@ OpsCenter is the source of truth for `run_id`. The injection flow: OpsCenter generates: run_id = uuid4().hex resolve_invocation_request sets: env["AUDIT_RUN_ID"] = run_id Phase 6 passes env to subprocess -VF reads: os.environ["AUDIT_RUN_ID"] -VF writes run_id into run_status.json +a private downstream repo reads: os.environ["AUDIT_RUN_ID"] +a private downstream repo writes run_id into run_status.json OpsCenter reads run_status.json and verifies run_id matches ``` @@ -146,7 +146,7 @@ manifest_path = resolve_artifact_manifest_path(rs, base_dir="/path/to/vf/repo") - For absolute paths: returns as-is - Returns an absolute `Path` — the file is not read or validated at this stage -The `artifact_manifest_path` in managed private project run_status files is relative to the VF repo root (e.g. `tools/audit/report/representative/{bucket}/artifact_manifest.json`). +The `artifact_manifest_path` in managed private project run_status files is relative to the a private downstream repo repo root (e.g. `tools/audit/report/representative/{bucket}/artifact_manifest.json`). --- @@ -165,9 +165,9 @@ There is no fallback. No glob. No directory listing. No path inference from buck If `artifact_manifest_path` is absent, `ArtifactManifestPathMissingError` is raised. The caller must surface this as a contract violation, not attempt to find the file another way. This rule exists because: -1. OpsCenter must not know VF internals (bucket naming, directory shapes) +1. OpsCenter must not know a private downstream repo internals (bucket naming, directory shapes) 2. Discovery by scanning would silently accept stale or wrong manifests -3. Phase 5 must add `artifact_manifest_path` to VF — not knowing the path is a Phase 5 signal, not a Phase 3 workaround +3. Phase 5 must add `artifact_manifest_path` to a private downstream repo — not knowing the path is a Phase 5 signal, not a Phase 3 workaround --- @@ -221,4 +221,4 @@ Thin read-only CLI wrappers may be added if consistent with the existing `entryp - Phase 3 does not implement dispatch orchestration (Phase 6). - Phase 3 does not scan directories or infer paths from bucket naming. - Phase 3 does not import managed private project Python code. -- Phase 3 does not add `artifact_manifest_path` to VF (Phase 5 job). +- Phase 3 does not add `artifact_manifest_path` to a private downstream repo (Phase 5 job). diff --git a/docs/history/managed-repo/managed_repo_run_identity.md b/docs/history/managed-repo/managed_repo_run_identity.md index b70ebd31..bff9e5a9 100644 --- a/docs/history/managed-repo/managed_repo_run_identity.md +++ b/docs/history/managed-repo/managed_repo_run_identity.md @@ -8,7 +8,7 @@ ## Purpose -Phase 4 defines how OperationsCenter generates, validates, and propagates a unique identity for each managed audit run. A `run_id` is the anchor for lifecycle tracking: it ties the invocation request, the VF audit process, and the output files together. +Phase 4 defines how OperationsCenter generates, validates, and propagates a unique identity for each managed audit run. A `run_id` is the anchor for lifecycle tracking: it ties the invocation request, the a private downstream repo audit process, and the output files together. This phase does not execute commands. It does not write manifests. It makes managed audit invocations traceable before Phase 6 dispatch runs them. @@ -83,7 +83,7 @@ managed-private-project_enrichment_20260426T164233Z_c9d0e1f2 The format is validated by `is_valid_run_id(run_id)` and enforced by the `ManagedRunIdentity` Pydantic field validator. -**Format decision:** The recommended format from the Phase 4 spec was adopted verbatim. The uppercase `T` and `Z` are ISO 8601 timestamp separators and are path-safe (no shell or filesystem special meaning). The suffix uses `secrets.token_hex(4)` for cryptographic randomness without dependencies on VF internals. +**Format decision:** The recommended format from the Phase 4 spec was adopted verbatim. The uppercase `T` and `Z` are ISO 8601 timestamp separators and are path-safe (no shell or filesystem special meaning). The suffix uses `secrets.token_hex(4)` for cryptographic randomness without dependencies on a private downstream repo internals. --- @@ -188,7 +188,7 @@ For managed runs initiated by OpsCenter: OpsCenter reads run_id from run_status.json and verifies it matches. For local/dev runs outside OpsCenter: - VF may generate its own id. + a private downstream repo may generate its own id. These runs are not OpsCenter-managed. OpsCenter does not ingest these without a reconciliation step (future phase). ``` diff --git a/src/operations_center/entrypoints/board_worker/outcomes.py b/src/operations_center/entrypoints/board_worker/outcomes.py index 3f5fce93..2997c4b9 100644 --- a/src/operations_center/entrypoints/board_worker/outcomes.py +++ b/src/operations_center/entrypoints/board_worker/outcomes.py @@ -28,6 +28,7 @@ logger = logging.getLogger(__name__) _MAX_FOLLOW_UP_RETRIES = 3 +_TERMINAL_STATE_NAMES = {"cancelled", "done"} # ── Atomic failure transition ───────────────────────────────────────────────── @@ -35,6 +36,24 @@ def fail_task(client, task_id: str, role: str, reason: str) -> None: try: + # Don't overwrite a terminal state (Cancelled/Done) set externally while + # the executor was running — that would restart a dead-remediation loop. + try: + issue = client.fetch_issue(task_id) + state = issue.get("state") or {} + state_name = ( + state.get("name", "") if isinstance(state, dict) else str(state) + ).lower() + if state_name in _TERMINAL_STATE_NAMES: + logger.info( + "board_worker[%s]: task_id=%s already %r — skipping fail transition", + role, + task_id, + state_name, + ) + return + except Exception: + pass # If fetch fails, fall through to the normal transition client.transition_issue(task_id, STATE_BLOCKED) client.comment_issue(task_id, f"board_worker[{role}] blocked — {reason}") except Exception as exc: diff --git a/src/operations_center/observer/collectors/check_signal.py b/src/operations_center/observer/collectors/check_signal.py index 1f5ba92e..7c59b1de 100644 --- a/src/operations_center/observer/collectors/check_signal.py +++ b/src/operations_center/observer/collectors/check_signal.py @@ -7,6 +7,7 @@ import logging import re import subprocess +import sys from datetime import UTC, datetime from pathlib import Path @@ -56,12 +57,20 @@ def _fallback_discovery(self, context: ObserverContext) -> CheckSignal: if not has_config: return CheckSignal(status="no_config", source="fallback:no_pytest_config") + # Prefer repo-local venv pytest; fall back to current interpreter's pytest. + # Bare "pytest" is not reliable — the subprocess does not inherit venv PATH. + repo_pytest = repo_root / ".venv" / "bin" / "pytest" + if repo_pytest.is_file(): + pytest_cmd = [str(repo_pytest)] + else: + pytest_cmd = [sys.executable, "-m", "pytest"] + try: result = subprocess.run( - ["pytest", "--collect-only", "-q", "--no-header"], + [*pytest_cmd, "--collect-only", "-q", "--no-header"], capture_output=True, text=True, - timeout=5, + timeout=30, cwd=repo_root, ) if result.returncode not in (0, 5): diff --git a/tests/test_check_signal_collector.py b/tests/test_check_signal_collector.py index 9307f728..77dd9954 100644 --- a/tests/test_check_signal_collector.py +++ b/tests/test_check_signal_collector.py @@ -3,9 +3,10 @@ from __future__ import annotations import subprocess +import sys from datetime import UTC, datetime from pathlib import Path -from unittest.mock import patch +from unittest.mock import call, patch from operations_center.observer.collectors.check_signal import CheckSignalCollector from operations_center.observer.service import ObserverContext @@ -334,6 +335,54 @@ def __init__(self, mtime): assert sig.observed_at == expected_time +# ------------------------------------------------------------------ +# 9. Pytest command selection: repo-local venv vs sys.executable +# ------------------------------------------------------------------ + + +def test_uses_repo_venv_pytest_when_present(tmp_path: Path) -> None: + """When .venv/bin/pytest exists in the repo root, it is used directly.""" + ctx = _make_context(tmp_path) + (ctx.repo_path / "pytest.ini").write_text("[pytest]\n") + venv_pytest = ctx.repo_path / ".venv" / "bin" / "pytest" + venv_pytest.parent.mkdir(parents=True) + venv_pytest.touch() + + collect_stdout = "tests/test_x.py::test_a\n\n1 test collected\n" + fake_result = subprocess.CompletedProcess(args=[], returncode=0, stdout=collect_stdout, stderr="") + + with patch( + "operations_center.observer.collectors.check_signal.subprocess.run", + return_value=fake_result, + ) as mock_run: + CheckSignalCollector()._fallback_discovery(ctx) + + called_cmd = mock_run.call_args[0][0] + assert called_cmd[0] == str(venv_pytest), "should use repo-local venv pytest" + assert "--collect-only" in called_cmd + + +def test_falls_back_to_sys_executable_when_no_venv(tmp_path: Path) -> None: + """When no .venv/bin/pytest in repo root, fall back to sys.executable -m pytest.""" + ctx = _make_context(tmp_path) + (ctx.repo_path / "pytest.ini").write_text("[pytest]\n") + # No .venv/bin/pytest created + + collect_stdout = "tests/test_x.py::test_a\n\n1 test collected\n" + fake_result = subprocess.CompletedProcess(args=[], returncode=0, stdout=collect_stdout, stderr="") + + with patch( + "operations_center.observer.collectors.check_signal.subprocess.run", + return_value=fake_result, + ) as mock_run: + CheckSignalCollector()._fallback_discovery(ctx) + + called_cmd = mock_run.call_args[0][0] + assert called_cmd[0] == sys.executable + assert called_cmd[1] == "-m" + assert called_cmd[2] == "pytest" + + def test_guard_oserror_also_skipped(tmp_path: Path) -> None: """Verify that OSError (not just FileNotFoundError) is handled in discovery.""" ctx = _make_context(tmp_path) diff --git a/tests/unit/entrypoints/board_worker/test_outcomes_cov.py b/tests/unit/entrypoints/board_worker/test_outcomes_cov.py index 021cb32b..1bcd14d7 100644 --- a/tests/unit/entrypoints/board_worker/test_outcomes_cov.py +++ b/tests/unit/entrypoints/board_worker/test_outcomes_cov.py @@ -63,6 +63,29 @@ def test_fail_task_swallows_exception(caplog): client.comment_issue.assert_not_called() +@pytest.mark.parametrize("state_name", ["Cancelled", "Done", "cancelled", "done"]) +def test_fail_task_skips_terminal_state(state_name): + client = _make_client() + client.fetch_issue.return_value = {"state": {"name": state_name}} + outcomes.fail_task(client, "t1", "improve", "executor killed") + client.transition_issue.assert_not_called() + client.comment_issue.assert_not_called() + + +def test_fail_task_proceeds_when_fetch_raises(): + client = _make_client() + client.fetch_issue.side_effect = RuntimeError("plane down") + outcomes.fail_task(client, "t1", "goal", "boom") + client.transition_issue.assert_called_once_with("t1", STATE_BLOCKED) + + +def test_fail_task_proceeds_when_state_not_terminal(): + client = _make_client() + client.fetch_issue.return_value = {"state": {"name": "Blocked"}} + outcomes.fail_task(client, "t1", "goal", "boom") + client.transition_issue.assert_called_once_with("t1", STATE_BLOCKED) + + # ── read_improve_output ───────────────────────────────────────────────────────