diff --git a/.claude/commands/generate-brief.md b/.claude/commands/generate-brief.md index 800f4ee6..672d69c3 100644 --- a/.claude/commands/generate-brief.md +++ b/.claude/commands/generate-brief.md @@ -104,7 +104,7 @@ your editorial judgment. - Read approved workspace sources, evidence inputs, and cached packages. - Extract candidate reportable items. - With `role_topology=default`, Scout writes both `candidate_claims.json` and `screened_candidates.json` before `stage-complete --stage scout`. - - Do not delegate Screener in default topology. The Screener stage is satisfied by topology after `stage-complete --stage scout` succeeds with both artifacts present. + - Do not delegate Screener in default topology and do not call `state stage-complete --stage screener`. The Screener stage is satisfied by topology after `stage-complete --stage scout` succeeds with both artifacts present. - With `role_topology=strict`, Scout writes only `candidate_claims.json`; strict topology delegates Screener separately after Scout completion. - Optional chunk parallelism is parent-side only: chunk outputs are scratch/intermediate runtime material, not workflow artifacts. - If Scout work is split across chunks or child agents, the parent must join chunks deterministically before writing `candidate_claims.json`, using source identity, source path or URL, source date, topic, and evidence text rather than completion order. @@ -113,6 +113,7 @@ your editorial judgment. - Do not append to `candidate_claims.json` from chunk workers, and do not silently drop duplicate or near-duplicate chunk outputs. - Write `$ARGUMENTS/output/intermediate/candidate_claims.json`. - In default topology, also screen candidates and write `$ARGUMENTS/output/intermediate/screened_candidates.json` before recording `stage-complete --stage scout`. + - Do not replay Screener delegation or `stage-complete --stage screener` in default topology. - Check the expected artifact. - Run `multi-agent-brief state stage-complete --workspace $ARGUMENTS --stage scout --reason "Candidate claims were extracted."`. - If the transaction fails, stop and report the failure. Do not invoke the next specialist. diff --git a/.opencode/commands/generate-brief.md b/.opencode/commands/generate-brief.md index 659f21c1..f268732f 100644 --- a/.opencode/commands/generate-brief.md +++ b/.opencode/commands/generate-brief.md @@ -54,6 +54,7 @@ Stage sequence: 6. Read `configs/policy_packs/default.yaml` and apply role topology: - `default`: Scout performs discovery + screening and writes both `candidate_claims.json` and `screened_candidates.json`. + Do not delegate Screener and do not call `state stage-complete --stage screener` in default topology. - `strict`: Scout writes only `candidate_claims.json`; then Screener writes `screened_candidates.json`. - In all modes both artifacts are required before Claim Ledger. - Optional chunk parallelism is parent-side only: chunk outputs are scratch/intermediate runtime material, not workflow artifacts. @@ -67,6 +68,7 @@ Stage sequence: - Extract candidate reportable items. - Write `$ARGUMENTS/output/intermediate/candidate_claims.json`. - In default topology, screen candidates and write `$ARGUMENTS/output/intermediate/screened_candidates.json` before recording `stage-complete --stage scout`. + - Do not replay Screener delegation or `stage-complete --stage screener` in default topology. 8. Strict topology only: check `candidate_claims.json`, then delegate the **brief-screener** subagent: - Dedupe, rank, freshness-check, and cap candidates. diff --git a/scripts/ci/smoke_packaged_topology_handoff.py b/scripts/ci/smoke_packaged_topology_handoff.py index 23e428f8..fff3f40b 100644 --- a/scripts/ci/smoke_packaged_topology_handoff.py +++ b/scripts/ci/smoke_packaged_topology_handoff.py @@ -106,6 +106,7 @@ def _protocol_stage(handoff: dict[str, object], stage_id: str) -> dict[str, obje def _assert_default_handoff(path: Path) -> None: data = _handoff(path) prompt = str(data.get("prompt") or "") + prompt_plain = prompt.replace("`", "") screener = _protocol_stage(data, "screener") satisfaction = screener.get("topology_satisfaction") if not isinstance(satisfaction, dict): @@ -121,8 +122,15 @@ def _assert_default_handoff(path: Path) -> None: } if required_ids != {"candidate_claims", "screened_candidates"}: raise SystemExit(f"default topology required artifacts wrong: {required_ids}") + forbidden = default.get("forbidden_replay_actions") + if forbidden != ["delegate screener", "state stage-complete --stage screener"]: + raise SystemExit(f"default topology forbidden replay actions wrong: {forbidden}") if "default: satisfied by scout" not in prompt: raise SystemExit("handoff prompt does not describe default topology satisfaction") + if "do not call state stage-complete --stage screener" not in prompt_plain: + raise SystemExit("handoff prompt does not forbid default screener stage-complete replay") + if "do not replay delegate screener, state stage-complete --stage screener" not in prompt: + raise SystemExit("handoff prompt does not describe forbidden screener replay actions") if "independent MUST produce (strict)" not in prompt: raise SystemExit("handoff prompt does not reserve independent screener for strict") diff --git a/scripts/generate_agent_configs.py b/scripts/generate_agent_configs.py index 1a9b2218..699d3beb 100644 --- a/scripts/generate_agent_configs.py +++ b/scripts/generate_agent_configs.py @@ -852,6 +852,7 @@ def render_opencode_command_generate_brief(manifest: dict) -> str: "\n" "6. Read `configs/policy_packs/default.yaml` and apply role topology:\n" " - `default`: Scout performs discovery + screening and writes both `candidate_claims.json` and `screened_candidates.json`.\n" + " Do not delegate Screener and do not call `state stage-complete --stage screener` in default topology.\n" " - `strict`: Scout writes only `candidate_claims.json`; then Screener writes `screened_candidates.json`.\n" " - In all modes both artifacts are required before Claim Ledger.\n" " - Optional chunk parallelism is parent-side only: chunk outputs are scratch/intermediate runtime material, not workflow artifacts.\n" @@ -865,6 +866,7 @@ def render_opencode_command_generate_brief(manifest: dict) -> str: " - Extract candidate reportable items.\n" " - Write `$ARGUMENTS/output/intermediate/candidate_claims.json`.\n" " - In default topology, screen candidates and write `$ARGUMENTS/output/intermediate/screened_candidates.json` before recording `stage-complete --stage scout`.\n" + " - Do not replay Screener delegation or `stage-complete --stage screener` in default topology.\n" "\n" "8. Strict topology only: check `candidate_claims.json`, then delegate the **brief-screener** subagent:\n" " - Dedupe, rank, freshness-check, and cap candidates.\n" diff --git a/src/multi_agent_brief/cli/start_commands.py b/src/multi_agent_brief/cli/start_commands.py index 57c5a678..b72d175e 100644 --- a/src/multi_agent_brief/cli/start_commands.py +++ b/src/multi_agent_brief/cli/start_commands.py @@ -113,11 +113,13 @@ "write output/intermediate/candidate_claims.json first, then " "output/intermediate/screened_candidates.json, and record " "`state stage-complete --stage scout`; the screener stage is satisfied by topology " - "and must not be run as a separate specialist. With role_topology=strict, Scout " - "writes only candidate_claims.json, then delegate Screener to write " - "screened_candidates.json. In all modes both artifacts remain required evidence " - "for Claim Ledger. Runtime may split Scout work internally, but chunk outputs " - "are scratch material only; only deterministic joined artifacts count." + "and must not be run as a separate specialist. Do not delegate Screener and " + "do not call `state stage-complete --stage screener` in default topology. " + "With role_topology=strict, Scout writes only candidate_claims.json, then " + "delegate Screener to write screened_candidates.json. In all modes both " + "artifacts remain required evidence for Claim Ledger. Runtime may split " + "Scout work internally, but chunk outputs are scratch material only; only " + "deterministic joined artifacts count." ) RUNTIME_WEBSEARCH_ZERO_RESULT_NOTE = ( "Runtime WebSearch zero-result guard: if runtime WebSearch reports `Did 0 searches`, " @@ -506,7 +508,7 @@ def _manual_handoff(workspace: Path, repo: Path, venv: str) -> AgentHandoff: f"3. multi-agent-brief inputs extract --config {ws_path}/config.yaml (if PDF/DOCX/image inputs exist)\n" f"4. multi-agent-brief inputs classify --config {ws_path}/config.yaml\n" "5. Use the 'scout' subagent. Runtime may split Scout work internally, but chunk outputs are scratch only; write the deterministic joined output/intermediate/candidate_claims.json once. Default topology: also write output/intermediate/screened_candidates.json from the joined candidate universe, then stage-complete scout. Strict topology: write only candidate_claims.json.\n" - "6. Strict topology only: use the 'screener' subagent to write output/intermediate/screened_candidates.json.\n" + "6. Default topology: do not delegate Screener and do not call state stage-complete --stage screener; topology satisfaction records Screener after Scout completes. Strict topology only: use the 'screener' subagent to write output/intermediate/screened_candidates.json.\n" "7. Use the 'claim-ledger' subagent to write output/intermediate/claim_drafts.json, then run " f"multi-agent-brief state freeze-claim-ledger --workspace {ws_path} to create output/intermediate/claim_ledger.json, " f"then run multi-agent-brief state stage-complete --workspace {ws_path} --stage claim-ledger --reason \"Claim Ledger was frozen from claim drafts.\"\n" @@ -790,6 +792,11 @@ def _build_stage_completion_protocol(repo: Path) -> dict[str, Any]: stages = load_stage_specs(repo) artifacts = load_artifact_contracts(repo) artifact_by_id = {str(item.get("artifact_id")): item for item in artifacts if item.get("artifact_id")} + stage_ids = { + str(stage.get("stage_id") or "") + for stage in stages + if stage.get("stage_id") + } stage_protocol: list[dict[str, Any]] = [] for stage in stages: @@ -820,8 +827,10 @@ def _build_stage_completion_protocol(repo: Path) -> dict[str, Any]: }) topology_satisfaction = _protocol_topology_satisfaction( + stage_id, stage.get("topology_satisfaction") or {}, artifact_by_id, + stage_ids=stage_ids, ) freeze_input_artifacts: list[dict[str, Any]] = [] pre_completion_transactions: list[dict[str, Any]] = [] @@ -925,13 +934,17 @@ def _ordered_role_topologies() -> list[str]: def _protocol_topology_satisfaction( + stage_id: str, rules: dict[str, Any], artifact_by_id: dict[str, dict[str, Any]], + *, + stage_ids: set[str], ) -> dict[str, dict[str, Any]]: result: dict[str, dict[str, Any]] = {} for topology, rule in sorted(rules.items()): if not isinstance(rule, dict): continue + satisfied_by = str(rule.get("satisfied_by") or "") required_artifacts: list[dict[str, Any]] = [] for artifact_id in rule.get("required_artifacts") or []: artifact_key = str(artifact_id) @@ -945,10 +958,16 @@ def _protocol_topology_satisfaction( "required": True, "format": "", }) - result[str(topology)] = { - "satisfied_by": str(rule.get("satisfied_by") or ""), + projected_rule = { + "satisfied_by": satisfied_by, "required_artifacts": required_artifacts, } + if satisfied_by in stage_ids: + projected_rule["forbidden_replay_actions"] = [ + f"delegate {stage_id}", + f"state stage-complete --stage {stage_id}", + ] + result[str(topology)] = projected_rule return result @@ -1023,9 +1042,11 @@ def _render_topology_satisfaction(rules: dict[str, Any]) -> str: if not isinstance(rule, dict): continue artifacts = _protocol_paths(rule.get("required_artifacts") or []) + forbidden = ", ".join(str(item) for item in (rule.get("forbidden_replay_actions") or [])) + replay_text = f"; do not replay {forbidden}" if forbidden else "" parts.append( f"{topology}: satisfied by {rule.get('satisfied_by') or 'unknown'} " - f"when {artifacts} exist" + f"when {artifacts} exist{replay_text}" ) return "; ".join(parts) if parts else "none" @@ -1565,6 +1586,11 @@ def write_handoff_artifacts(handoff: AgentHandoff, workspace: Path) -> tuple[Pat md_content.append( f" - `{topology}`: satisfied by `{rule.get('satisfied_by')}` when {artifacts} exist" ) + forbidden = rule.get("forbidden_replay_actions") or [] + if forbidden: + md_content.append( + " - Do not replay: " + ", ".join(str(item) for item in forbidden) + ) md_content.append( "- Independent completion topologies: " + (", ".join(stage.get("independent_completion_topologies") or []) or "none") diff --git a/src/multi_agent_brief/hermes/adapter.py b/src/multi_agent_brief/hermes/adapter.py index d46f3499..d772ed1e 100644 --- a/src/multi_agent_brief/hermes/adapter.py +++ b/src/multi_agent_brief/hermes/adapter.py @@ -649,6 +649,10 @@ def render_hermes_cron_markdown(plan: HermesCronPlan) -> str: #### 2. Screener child (strict topology or explicit repair/review) +In default topology, do not delegate Screener and do not call +`state stage-complete --stage screener`; Scout writes `screened_candidates.json` +and the Screener stage is satisfied by topology after Scout completion. + ```python delegate_task( goal="Screen and rank MABW candidate claims", @@ -977,6 +981,7 @@ def render_hermes_prompt( toolsets: ["file", "terminal", "web"] 13. If role_topology is `strict`, after candidate_claims.json exists and is non-empty, delegate screener child. If role_topology is `default`, Scout must already have written screened_candidates.json and the screener stage is satisfied by topology: + Do not delegate Screener and do not call `state stage-complete --stage screener` in default topology. Goal: "Screen and rank MABW candidate claims" Input: output/intermediate/candidate_claims.json Write: output/intermediate/screened_candidates.json diff --git a/tests/test_hermes_adapter.py b/tests/test_hermes_adapter.py index 58e07e43..c397e3c4 100644 --- a/tests/test_hermes_adapter.py +++ b/tests/test_hermes_adapter.py @@ -228,6 +228,8 @@ def test_hermes_skill_has_delegation_sequence(): assert "child completion" in skill assert "do not silently drop chunk-level outputs" in skill assert "Only the final joined" in skill + assert "do not delegate Screener and do not call" in skill + assert "state stage-complete --stage screener" in skill def test_hermes_skill_contains_atomic_graph_boundary(): diff --git a/tests/test_start_commands.py b/tests/test_start_commands.py index 8e6a920e..0a5fc7a2 100644 --- a/tests/test_start_commands.py +++ b/tests/test_start_commands.py @@ -307,6 +307,10 @@ def _assert_orchestrator_contract_handoff(data: dict[str, object]) -> None: assert "drop duplicates silently during chunk join" in scout_chunk_contract["forbidden"] screener_protocol = protocol_stages["screener"] assert screener_protocol["topology_satisfaction"]["default"]["satisfied_by"] == "scout" + assert screener_protocol["topology_satisfaction"]["default"]["forbidden_replay_actions"] == [ + "delegate screener", + "state stage-complete --stage screener", + ] assert screener_protocol["topology_satisfaction"]["default"]["required_artifacts"] == [ { "artifact_id": "candidate_claims", @@ -322,7 +326,17 @@ def _assert_orchestrator_contract_handoff(data: dict[str, object]) -> None: }, ] assert screener_protocol["topology_satisfaction"]["human_assisted"]["satisfied_by"] == "scout" + assert screener_protocol["topology_satisfaction"]["human_assisted"]["forbidden_replay_actions"] == [ + "delegate screener", + "state stage-complete --stage screener", + ] assert screener_protocol["independent_completion_topologies"] == ["strict"] + analyst_protocol = protocol_stages["analyst"] + assert analyst_protocol["topology_satisfaction"]["human_assisted"]["satisfied_by"] == "writer" + assert "forbidden_replay_actions" not in analyst_protocol["topology_satisfaction"]["human_assisted"] + editor_protocol = protocol_stages["editor"] + assert editor_protocol["topology_satisfaction"]["human_assisted"]["satisfied_by"] == "writer" + assert "forbidden_replay_actions" not in editor_protocol["topology_satisfaction"]["human_assisted"] claim_ledger_protocol = protocol_stages["claim-ledger"] assert claim_ledger_protocol["freeze_input_artifacts"] == [ { @@ -417,6 +431,7 @@ def _assert_orchestrator_contract_handoff(data: dict[str, object]) -> None: assert "runtime-internal scratch only" in text assert "append to candidate_claims.json from chunk workers" in text assert "topology satisfaction: default: satisfied by scout" in text + assert "do not replay delegate screener, state stage-complete --stage screener" in text assert "independent MUST produce (strict): screened_candidates at output/intermediate/screened_candidates.json" in text assert "role MUST produce freeze input: audited_brief at output/intermediate/audited_brief.md" in text assert ( @@ -1068,6 +1083,7 @@ def test_start_codex_handoff_uses_root_session_orchestrator(tmp_path): assert "state stage-complete" in prompt assert "state finalize-complete" in prompt assert "With role_topology=default, Scout performs discovery and screening in one role" in prompt + assert "do not call `state stage-complete --stage screener`" in prompt assert "workspace is trusted in Codex" in prompt assert "install Codex runtime assets" in prompt assert "Codex writer flow protocol" in prompt @@ -1242,6 +1258,7 @@ def test_handoff_with_config_generates_artifacts(tmp_path): assert "scratch only: `True`" in handoff_md assert "Join chunk outputs deterministically before writing workflow artifacts" in handoff_md assert "append to candidate_claims.json from chunk workers" in handoff_md + assert "Do not replay: delegate screener, state stage-complete --stage screener" in handoff_md def test_handoff_no_config_fails(tmp_path): @@ -1283,6 +1300,7 @@ def test_build_handoff_claude_has_generate_brief(tmp_path): ) assert "/generate-brief" in handoff.prompt assert "With role_topology=default, Scout performs discovery and screening in one role" in handoff.prompt + assert "do not call `state stage-complete --stage screener` in default topology" in handoff.prompt assert "strict: scout → screener" in handoff.prompt _assert_orchestrator_contract_handoff(handoff.to_dict()) @@ -1302,6 +1320,7 @@ def test_build_handoff_codex_maps_specialists_to_custom_agents(tmp_path): assert "Spawn the named Codex custom agent" in handoff.prompt assert ".codex/agents/scout.toml" in handoff.prompt assert "default: discovery + screening" in handoff.prompt + assert "do not call `state stage-complete --stage screener` in default topology" in handoff.prompt assert "strict topology or explicit repair/review only" in handoff.prompt assert ".codex/agents/claim-ledger.toml" in handoff.prompt assert "Do not call the next specialist until" in handoff.prompt diff --git a/tests/test_status_commands.py b/tests/test_status_commands.py index bf3fe640..ccdb89c2 100644 --- a/tests/test_status_commands.py +++ b/tests/test_status_commands.py @@ -489,6 +489,21 @@ def test_status_command_human_output_reports_topology_satisfied_stage(tmp_path, "(default; required=candidate_claims,screened_candidates)" ) in out + rc = main(["status", "--workspace", str(ws), "--json"]) + + assert rc == 0 + payload = json.loads(capsys.readouterr().out) + screener = next( + stage + for stage in payload["timing"]["stages"] + if stage.get("stage_id") == "screener" + ) + assert screener["status"] == "satisfied_by_topology" + assert screener["completion_event_type"] == "stage_satisfied_by_topology" + assert screener["topology"] == "default" + assert screener["satisfied_by"] == "scout" + assert screener["required_artifacts"] == ["candidate_claims", "screened_candidates"] + def test_status_command_reports_auditable_target_complete(tmp_path, capsys): ws = _minimal_workspace(tmp_path / "ws") diff --git a/tests/test_subagent_first_contract.py b/tests/test_subagent_first_contract.py index 1015487f..a7bebcfd 100644 --- a/tests/test_subagent_first_contract.py +++ b/tests/test_subagent_first_contract.py @@ -175,7 +175,9 @@ def test_claude_generate_brief_is_topology_aware_for_scout_and_screener(): text = _read(".claude/commands/generate-brief.md") assert "With `role_topology=default`, Scout writes both `candidate_claims.json`" in text assert "`screened_candidates.json` before `stage-complete --stage scout`" in text - assert "Do not delegate Screener in default topology." in text + assert "Do not delegate Screener in default topology" in text + assert "do not call `state stage-complete --stage screener`" in text + assert "Do not replay Screener delegation or `stage-complete --stage screener`" in text assert "Strict topology only: invoke the **screener** subagent" in text assert "With `role_topology=strict`, Scout writes only `candidate_claims.json`" in text assert "strict topology delegates Screener separately after Scout completion" in text