Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .claude/commands/generate-brief.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ your editorial judgment.
- Read approved workspace sources, evidence inputs, and cached packages.
- Extract candidate reportable items.
- With `role_topology=default`, Scout writes both `candidate_claims.json` and `screened_candidates.json` before `stage-complete --stage scout`.
- Do not delegate Screener in default topology. The Screener stage is satisfied by topology after `stage-complete --stage scout` succeeds with both artifacts present.
- Do not delegate Screener in default topology and do not call `state stage-complete --stage screener`. The Screener stage is satisfied by topology after `stage-complete --stage scout` succeeds with both artifacts present.
- With `role_topology=strict`, Scout writes only `candidate_claims.json`; strict topology delegates Screener separately after Scout completion.
- Optional chunk parallelism is parent-side only: chunk outputs are scratch/intermediate runtime material, not workflow artifacts.
- If Scout work is split across chunks or child agents, the parent must join chunks deterministically before writing `candidate_claims.json`, using source identity, source path or URL, source date, topic, and evidence text rather than completion order.
Expand All @@ -113,6 +113,7 @@ your editorial judgment.
- Do not append to `candidate_claims.json` from chunk workers, and do not silently drop duplicate or near-duplicate chunk outputs.
- Write `$ARGUMENTS/output/intermediate/candidate_claims.json`.
- In default topology, also screen candidates and write `$ARGUMENTS/output/intermediate/screened_candidates.json` before recording `stage-complete --stage scout`.
- Do not replay Screener delegation or `stage-complete --stage screener` in default topology.
- Check the expected artifact.
- Run `multi-agent-brief state stage-complete --workspace $ARGUMENTS --stage scout --reason "Candidate claims were extracted."`.
- If the transaction fails, stop and report the failure. Do not invoke the next specialist.
Expand Down
2 changes: 2 additions & 0 deletions .opencode/commands/generate-brief.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ Stage sequence:

6. Read `configs/policy_packs/default.yaml` and apply role topology:
- `default`: Scout performs discovery + screening and writes both `candidate_claims.json` and `screened_candidates.json`.
Do not delegate Screener and do not call `state stage-complete --stage screener` in default topology.
- `strict`: Scout writes only `candidate_claims.json`; then Screener writes `screened_candidates.json`.
- In all modes both artifacts are required before Claim Ledger.
- Optional chunk parallelism is parent-side only: chunk outputs are scratch/intermediate runtime material, not workflow artifacts.
Expand All @@ -67,6 +68,7 @@ Stage sequence:
- Extract candidate reportable items.
- Write `$ARGUMENTS/output/intermediate/candidate_claims.json`.
- In default topology, screen candidates and write `$ARGUMENTS/output/intermediate/screened_candidates.json` before recording `stage-complete --stage scout`.
- Do not replay Screener delegation or `stage-complete --stage screener` in default topology.

8. Strict topology only: check `candidate_claims.json`, then delegate the **brief-screener** subagent:
- Dedupe, rank, freshness-check, and cap candidates.
Expand Down
8 changes: 8 additions & 0 deletions scripts/ci/smoke_packaged_topology_handoff.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def _protocol_stage(handoff: dict[str, object], stage_id: str) -> dict[str, obje
def _assert_default_handoff(path: Path) -> None:
data = _handoff(path)
prompt = str(data.get("prompt") or "")
prompt_plain = prompt.replace("`", "")
screener = _protocol_stage(data, "screener")
satisfaction = screener.get("topology_satisfaction")
if not isinstance(satisfaction, dict):
Expand All @@ -121,8 +122,15 @@ def _assert_default_handoff(path: Path) -> None:
}
if required_ids != {"candidate_claims", "screened_candidates"}:
raise SystemExit(f"default topology required artifacts wrong: {required_ids}")
forbidden = default.get("forbidden_replay_actions")
if forbidden != ["delegate screener", "state stage-complete --stage screener"]:
raise SystemExit(f"default topology forbidden replay actions wrong: {forbidden}")
if "default: satisfied by scout" not in prompt:
raise SystemExit("handoff prompt does not describe default topology satisfaction")
if "do not call state stage-complete --stage screener" not in prompt_plain:
raise SystemExit("handoff prompt does not forbid default screener stage-complete replay")
if "do not replay delegate screener, state stage-complete --stage screener" not in prompt:
raise SystemExit("handoff prompt does not describe forbidden screener replay actions")
if "independent MUST produce (strict)" not in prompt:
raise SystemExit("handoff prompt does not reserve independent screener for strict")

Expand Down
2 changes: 2 additions & 0 deletions scripts/generate_agent_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,6 +852,7 @@ def render_opencode_command_generate_brief(manifest: dict) -> str:
"\n"
"6. Read `configs/policy_packs/default.yaml` and apply role topology:\n"
" - `default`: Scout performs discovery + screening and writes both `candidate_claims.json` and `screened_candidates.json`.\n"
" Do not delegate Screener and do not call `state stage-complete --stage screener` in default topology.\n"
" - `strict`: Scout writes only `candidate_claims.json`; then Screener writes `screened_candidates.json`.\n"
" - In all modes both artifacts are required before Claim Ledger.\n"
" - Optional chunk parallelism is parent-side only: chunk outputs are scratch/intermediate runtime material, not workflow artifacts.\n"
Expand All @@ -865,6 +866,7 @@ def render_opencode_command_generate_brief(manifest: dict) -> str:
" - Extract candidate reportable items.\n"
" - Write `$ARGUMENTS/output/intermediate/candidate_claims.json`.\n"
" - In default topology, screen candidates and write `$ARGUMENTS/output/intermediate/screened_candidates.json` before recording `stage-complete --stage scout`.\n"
" - Do not replay Screener delegation or `stage-complete --stage screener` in default topology.\n"
"\n"
"8. Strict topology only: check `candidate_claims.json`, then delegate the **brief-screener** subagent:\n"
" - Dedupe, rank, freshness-check, and cap candidates.\n"
Expand Down
44 changes: 35 additions & 9 deletions src/multi_agent_brief/cli/start_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,13 @@
"write output/intermediate/candidate_claims.json first, then "
"output/intermediate/screened_candidates.json, and record "
"`state stage-complete --stage scout`; the screener stage is satisfied by topology "
"and must not be run as a separate specialist. With role_topology=strict, Scout "
"writes only candidate_claims.json, then delegate Screener to write "
"screened_candidates.json. In all modes both artifacts remain required evidence "
"for Claim Ledger. Runtime may split Scout work internally, but chunk outputs "
"are scratch material only; only deterministic joined artifacts count."
"and must not be run as a separate specialist. Do not delegate Screener and "
"do not call `state stage-complete --stage screener` in default topology. "
"With role_topology=strict, Scout writes only candidate_claims.json, then "
"delegate Screener to write screened_candidates.json. In all modes both "
"artifacts remain required evidence for Claim Ledger. Runtime may split "
"Scout work internally, but chunk outputs are scratch material only; only "
"deterministic joined artifacts count."
)
RUNTIME_WEBSEARCH_ZERO_RESULT_NOTE = (
"Runtime WebSearch zero-result guard: if runtime WebSearch reports `Did 0 searches`, "
Expand Down Expand Up @@ -506,7 +508,7 @@ def _manual_handoff(workspace: Path, repo: Path, venv: str) -> AgentHandoff:
f"3. multi-agent-brief inputs extract --config {ws_path}/config.yaml (if PDF/DOCX/image inputs exist)\n"
f"4. multi-agent-brief inputs classify --config {ws_path}/config.yaml\n"
"5. Use the 'scout' subagent. Runtime may split Scout work internally, but chunk outputs are scratch only; write the deterministic joined output/intermediate/candidate_claims.json once. Default topology: also write output/intermediate/screened_candidates.json from the joined candidate universe, then stage-complete scout. Strict topology: write only candidate_claims.json.\n"
"6. Strict topology only: use the 'screener' subagent to write output/intermediate/screened_candidates.json.\n"
"6. Default topology: do not delegate Screener and do not call state stage-complete --stage screener; topology satisfaction records Screener after Scout completes. Strict topology only: use the 'screener' subagent to write output/intermediate/screened_candidates.json.\n"
"7. Use the 'claim-ledger' subagent to write output/intermediate/claim_drafts.json, then run "
f"multi-agent-brief state freeze-claim-ledger --workspace {ws_path} to create output/intermediate/claim_ledger.json, "
f"then run multi-agent-brief state stage-complete --workspace {ws_path} --stage claim-ledger --reason \"Claim Ledger was frozen from claim drafts.\"\n"
Expand Down Expand Up @@ -790,6 +792,11 @@ def _build_stage_completion_protocol(repo: Path) -> dict[str, Any]:
stages = load_stage_specs(repo)
artifacts = load_artifact_contracts(repo)
artifact_by_id = {str(item.get("artifact_id")): item for item in artifacts if item.get("artifact_id")}
stage_ids = {
str(stage.get("stage_id") or "")
for stage in stages
if stage.get("stage_id")
}
stage_protocol: list[dict[str, Any]] = []

for stage in stages:
Expand Down Expand Up @@ -820,8 +827,10 @@ def _build_stage_completion_protocol(repo: Path) -> dict[str, Any]:
})

topology_satisfaction = _protocol_topology_satisfaction(
stage_id,
stage.get("topology_satisfaction") or {},
artifact_by_id,
stage_ids=stage_ids,
)
freeze_input_artifacts: list[dict[str, Any]] = []
pre_completion_transactions: list[dict[str, Any]] = []
Expand Down Expand Up @@ -925,13 +934,17 @@ def _ordered_role_topologies() -> list[str]:


def _protocol_topology_satisfaction(
stage_id: str,
rules: dict[str, Any],
artifact_by_id: dict[str, dict[str, Any]],
*,
stage_ids: set[str],
) -> dict[str, dict[str, Any]]:
result: dict[str, dict[str, Any]] = {}
for topology, rule in sorted(rules.items()):
if not isinstance(rule, dict):
continue
satisfied_by = str(rule.get("satisfied_by") or "")
required_artifacts: list[dict[str, Any]] = []
for artifact_id in rule.get("required_artifacts") or []:
artifact_key = str(artifact_id)
Expand All @@ -945,10 +958,16 @@ def _protocol_topology_satisfaction(
"required": True,
"format": "",
})
result[str(topology)] = {
"satisfied_by": str(rule.get("satisfied_by") or ""),
projected_rule = {
"satisfied_by": satisfied_by,
"required_artifacts": required_artifacts,
}
if satisfied_by in stage_ids:
projected_rule["forbidden_replay_actions"] = [
f"delegate {stage_id}",
f"state stage-complete --stage {stage_id}",
]
result[str(topology)] = projected_rule
return result


Expand Down Expand Up @@ -1023,9 +1042,11 @@ def _render_topology_satisfaction(rules: dict[str, Any]) -> str:
if not isinstance(rule, dict):
continue
artifacts = _protocol_paths(rule.get("required_artifacts") or [])
forbidden = ", ".join(str(item) for item in (rule.get("forbidden_replay_actions") or []))
replay_text = f"; do not replay {forbidden}" if forbidden else ""
parts.append(
f"{topology}: satisfied by {rule.get('satisfied_by') or 'unknown'} "
f"when {artifacts} exist"
f"when {artifacts} exist{replay_text}"
)
return "; ".join(parts) if parts else "none"

Expand Down Expand Up @@ -1565,6 +1586,11 @@ def write_handoff_artifacts(handoff: AgentHandoff, workspace: Path) -> tuple[Pat
md_content.append(
f" - `{topology}`: satisfied by `{rule.get('satisfied_by')}` when {artifacts} exist"
)
forbidden = rule.get("forbidden_replay_actions") or []
if forbidden:
md_content.append(
" - Do not replay: " + ", ".join(str(item) for item in forbidden)
)
md_content.append(
"- Independent completion topologies: "
+ (", ".join(stage.get("independent_completion_topologies") or []) or "none")
Expand Down
5 changes: 5 additions & 0 deletions src/multi_agent_brief/hermes/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,10 @@ def render_hermes_cron_markdown(plan: HermesCronPlan) -> str:

#### 2. Screener child (strict topology or explicit repair/review)

In default topology, do not delegate Screener and do not call
`state stage-complete --stage screener`; Scout writes `screened_candidates.json`
and the Screener stage is satisfied by topology after Scout completion.
Comment on lines +652 to +654

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Synchronize the checked-in Hermes skill

When adding this explicit default-topology replay ban to render_hermes_skill(), the repo-shipped Hermes skill assets were left unchanged: .agents/hermes-skills/multi-agent-brief-hermes/SKILL.md and its references/delegate-task-sequence.md still do not mention the new state stage-complete --stage screener prohibition. Source-clone Hermes users can load those checked-in skill files directly instead of reinstalling a freshly rendered skill, so they would continue to get the stale guidance this change is trying to harden; please update the checked-in Hermes skill/reference (and a contract test) as well.

Useful? React with 👍 / 👎.


```python
delegate_task(
goal="Screen and rank MABW candidate claims",
Expand Down Expand Up @@ -977,6 +981,7 @@ def render_hermes_prompt(
toolsets: ["file", "terminal", "web"]

13. If role_topology is `strict`, after candidate_claims.json exists and is non-empty, delegate screener child. If role_topology is `default`, Scout must already have written screened_candidates.json and the screener stage is satisfied by topology:
Do not delegate Screener and do not call `state stage-complete --stage screener` in default topology.
Goal: "Screen and rank MABW candidate claims"
Input: output/intermediate/candidate_claims.json
Write: output/intermediate/screened_candidates.json
Expand Down
2 changes: 2 additions & 0 deletions tests/test_hermes_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ def test_hermes_skill_has_delegation_sequence():
assert "child completion" in skill
assert "do not silently drop chunk-level outputs" in skill
assert "Only the final joined" in skill
assert "do not delegate Screener and do not call" in skill
assert "state stage-complete --stage screener" in skill


def test_hermes_skill_contains_atomic_graph_boundary():
Expand Down
19 changes: 19 additions & 0 deletions tests/test_start_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,10 @@ def _assert_orchestrator_contract_handoff(data: dict[str, object]) -> None:
assert "drop duplicates silently during chunk join" in scout_chunk_contract["forbidden"]
screener_protocol = protocol_stages["screener"]
assert screener_protocol["topology_satisfaction"]["default"]["satisfied_by"] == "scout"
assert screener_protocol["topology_satisfaction"]["default"]["forbidden_replay_actions"] == [
"delegate screener",
"state stage-complete --stage screener",
]
assert screener_protocol["topology_satisfaction"]["default"]["required_artifacts"] == [
{
"artifact_id": "candidate_claims",
Expand All @@ -322,7 +326,17 @@ def _assert_orchestrator_contract_handoff(data: dict[str, object]) -> None:
},
]
assert screener_protocol["topology_satisfaction"]["human_assisted"]["satisfied_by"] == "scout"
assert screener_protocol["topology_satisfaction"]["human_assisted"]["forbidden_replay_actions"] == [
"delegate screener",
"state stage-complete --stage screener",
]
assert screener_protocol["independent_completion_topologies"] == ["strict"]
analyst_protocol = protocol_stages["analyst"]
assert analyst_protocol["topology_satisfaction"]["human_assisted"]["satisfied_by"] == "writer"
assert "forbidden_replay_actions" not in analyst_protocol["topology_satisfaction"]["human_assisted"]
editor_protocol = protocol_stages["editor"]
assert editor_protocol["topology_satisfaction"]["human_assisted"]["satisfied_by"] == "writer"
assert "forbidden_replay_actions" not in editor_protocol["topology_satisfaction"]["human_assisted"]
claim_ledger_protocol = protocol_stages["claim-ledger"]
assert claim_ledger_protocol["freeze_input_artifacts"] == [
{
Expand Down Expand Up @@ -417,6 +431,7 @@ def _assert_orchestrator_contract_handoff(data: dict[str, object]) -> None:
assert "runtime-internal scratch only" in text
assert "append to candidate_claims.json from chunk workers" in text
assert "topology satisfaction: default: satisfied by scout" in text
assert "do not replay delegate screener, state stage-complete --stage screener" in text
assert "independent MUST produce (strict): screened_candidates at output/intermediate/screened_candidates.json" in text
assert "role MUST produce freeze input: audited_brief at output/intermediate/audited_brief.md" in text
assert (
Expand Down Expand Up @@ -1068,6 +1083,7 @@ def test_start_codex_handoff_uses_root_session_orchestrator(tmp_path):
assert "state stage-complete" in prompt
assert "state finalize-complete" in prompt
assert "With role_topology=default, Scout performs discovery and screening in one role" in prompt
assert "do not call `state stage-complete --stage screener`" in prompt
assert "workspace is trusted in Codex" in prompt
assert "install Codex runtime assets" in prompt
assert "Codex writer flow protocol" in prompt
Expand Down Expand Up @@ -1242,6 +1258,7 @@ def test_handoff_with_config_generates_artifacts(tmp_path):
assert "scratch only: `True`" in handoff_md
assert "Join chunk outputs deterministically before writing workflow artifacts" in handoff_md
assert "append to candidate_claims.json from chunk workers" in handoff_md
assert "Do not replay: delegate screener, state stage-complete --stage screener" in handoff_md


def test_handoff_no_config_fails(tmp_path):
Expand Down Expand Up @@ -1283,6 +1300,7 @@ def test_build_handoff_claude_has_generate_brief(tmp_path):
)
assert "/generate-brief" in handoff.prompt
assert "With role_topology=default, Scout performs discovery and screening in one role" in handoff.prompt
assert "do not call `state stage-complete --stage screener` in default topology" in handoff.prompt
assert "strict: scout → screener" in handoff.prompt
_assert_orchestrator_contract_handoff(handoff.to_dict())

Expand All @@ -1302,6 +1320,7 @@ def test_build_handoff_codex_maps_specialists_to_custom_agents(tmp_path):
assert "Spawn the named Codex custom agent" in handoff.prompt
assert ".codex/agents/scout.toml" in handoff.prompt
assert "default: discovery + screening" in handoff.prompt
assert "do not call `state stage-complete --stage screener` in default topology" in handoff.prompt
assert "strict topology or explicit repair/review only" in handoff.prompt
assert ".codex/agents/claim-ledger.toml" in handoff.prompt
assert "Do not call the next specialist until" in handoff.prompt
Expand Down
15 changes: 15 additions & 0 deletions tests/test_status_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,21 @@ def test_status_command_human_output_reports_topology_satisfied_stage(tmp_path,
"(default; required=candidate_claims,screened_candidates)"
) in out

rc = main(["status", "--workspace", str(ws), "--json"])

assert rc == 0
payload = json.loads(capsys.readouterr().out)
screener = next(
stage
for stage in payload["timing"]["stages"]
if stage.get("stage_id") == "screener"
)
assert screener["status"] == "satisfied_by_topology"
assert screener["completion_event_type"] == "stage_satisfied_by_topology"
assert screener["topology"] == "default"
assert screener["satisfied_by"] == "scout"
assert screener["required_artifacts"] == ["candidate_claims", "screened_candidates"]


def test_status_command_reports_auditable_target_complete(tmp_path, capsys):
ws = _minimal_workspace(tmp_path / "ws")
Expand Down
Loading
Loading