From 98bff3fc6fbafd5483fb5db84861d2fcd836b779 Mon Sep 17 00:00:00 2001
From: Wild Wind <wwind123@gmail.com>
Date: Sun, 17 May 2026 02:03:20 -0700
Subject: [PATCH] Add plan-first issue review stage

---
 README.md                                    |   9 ++
 docs/local_agent_loop.md                     |  14 +++
 src/coding_review_agent_loop/cli.py          |  10 +-
 src/coding_review_agent_loop/config.py       |   2 +
 src/coding_review_agent_loop/orchestrator.py | 110 ++++++++++++++++-
 src/coding_review_agent_loop/prompts.py      | 122 +++++++++++++++++++
 src/coding_review_agent_loop/protocol.py     |  10 ++
 tests/test_agent_loop.py                     | 117 ++++++++++++++++++
 8 files changed, 391 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f48334e..6cbf52a 100644
--- a/README.md
+++ b/README.md
@@ -97,6 +97,15 @@ and body as the implementation task:
 agent-loop issue 123 --repo OWNER/REPO
 ```
 
+For larger or ambiguous issues, add `--plan-first` to make the coder write an
+implementation plan first. Reviewers approve or block that plan before the coder
+edits files or opens a PR; after plan approval, the normal implementation and PR
+review loop runs.
+
+```bash
+agent-loop issue 123 --repo OWNER/REPO --plan-first
+```
+
 Provide a one-off task directly when there is no issue yet:
 
 ```bash
diff --git a/docs/local_agent_loop.md b/docs/local_agent_loop.md
index 03159a8..536afc2 100644
--- a/docs/local_agent_loop.md
+++ b/docs/local_agent_loop.md
@@ -136,6 +136,20 @@ Fix a GitHub issue:
 agent-loop issue 56 --repo OWNER/REPO
 ```
 
+For issue work that needs design agreement before implementation, enable the
+optional planning gate:
+
+```bash
+agent-loop issue 56 --repo OWNER/REPO --plan-first
+```
+
+With `--plan-first`, the coder first writes a concise implementation plan and
+must not edit files, push a branch, or open a PR. Reviewers inspect that plan
+for correctness, architecture fit, edge cases, test strategy, and ambiguity. If
+any reviewer blocks, the coder revises the plan; once all reviewers approve in
+the same planning round, the approved plan is passed into the normal
+implementation prompt and the regular PR review loop begins.
+
 Implement a free-form task:
 
 ```bash
diff --git a/src/coding_review_agent_loop/cli.py b/src/coding_review_agent_loop/cli.py
index a6c1418..6a5920f 100644
--- a/src/coding_review_agent_loop/cli.py
+++ b/src/coding_review_agent_loop/cli.py
@@ -50,7 +50,7 @@
     build_task_prompt,
     format_agent_list,
 )
-from .protocol import is_clarification_request, parse_agent_state, parse_pr_number
+from .protocol import is_clarification_request, parse_agent_state, parse_plan_state, parse_pr_number
 from .runner import CommandResult, Runner, ensure_log_dir_ignored, tail_text
 
 
@@ -226,6 +226,14 @@ def add_common(subparser: argparse.ArgumentParser) -> None:
 
     issue = subparsers.add_parser("issue", help="Ask the coder to fix an issue, then review it.")
     issue.add_argument("issue_number", type=int)
+    issue.add_argument(
+        "--plan-first",
+        action="store_true",
+        help=(
+            "Run an opt-in implementation planning/review loop before the coder "
+            "edits code or opens a PR."
+        ),
+    )
     add_common(issue)
 
     pr = subparsers.add_parser("pr", help="Run the reviewer/coder loop on an existing PR.")
diff --git a/src/coding_review_agent_loop/config.py b/src/coding_review_agent_loop/config.py
index b4d1ad2..144483c 100644
--- a/src/coding_review_agent_loop/config.py
+++ b/src/coding_review_agent_loop/config.py
@@ -50,6 +50,7 @@ class AgentLoopConfig:
     agent_memory_dir: Path
     refresh_test_profile: bool
     approved_followups: str = "ignore"
+    plan_first: bool = False
     auto_agent_dirs: tuple[AgentName, ...] = ()
 
     def __post_init__(self) -> None:
@@ -316,5 +317,6 @@ def config_from_args(args: argparse.Namespace, runner: Runner) -> AgentLoopConfi
         ),
         refresh_test_profile=args.refresh_test_profile,
         approved_followups=args.approved_followups,
+        plan_first=getattr(args, "plan_first", False),
         auto_agent_dirs=auto_agent_dirs,
     )
diff --git a/src/coding_review_agent_loop/orchestrator.py b/src/coding_review_agent_loop/orchestrator.py
index 8963bb3..feda609 100644
--- a/src/coding_review_agent_loop/orchestrator.py
+++ b/src/coding_review_agent_loop/orchestrator.py
@@ -25,13 +25,16 @@
 from .prompts import (
     build_followup_prompt,
     build_issue_prompt,
+    build_issue_plan_prompt,
+    build_plan_followup_prompt,
+    build_plan_review_prompt,
     build_review_prompt,
     build_same_pr_followup_prompt,
     build_task_clarification_prompt,
     build_task_prompt,
     format_agent_list,
 )
-from .protocol import is_clarification_request, parse_agent_state, parse_pr_number
+from .protocol import is_clarification_request, parse_agent_state, parse_plan_state, parse_pr_number
 from .protocol import ApprovedFollowup, parse_approved_followups
 from .runner import Runner
 from .workdirs import active_workdir
@@ -180,6 +183,99 @@ def _create_approved_followup_issues(
     return issue_urls, skipped_count
 
 
+def _run_issue_plan_loop(
+    runner: Runner,
+    *,
+    issue_number: int,
+    config: AgentLoopConfig,
+    memory,
+) -> tuple[str, str | None]:
+    coder_name = agent_display_name(config.coder)
+    configured_reviewers = reviewers(config)
+    reviewer_session_ids: dict[AgentName, str | None] = {}
+
+    log(config, f"Planning issue #{issue_number}: invoking {coder_name}")
+    plan, coder_session_id = run_agent(
+        runner,
+        agent=config.coder,
+        config=config,
+        prompt=build_issue_plan_prompt(issue_number, config, memory),
+    )
+    if not plan.strip():
+        raise AgentLoopError(f"{coder_name} produced an empty implementation plan.")
+    if parse_plan_state(plan) != "blocking":
+        raise AgentLoopError(
+            f"{coder_name} planning output must use <!-- AGENT_PLAN_STATE: blocking -->."
+        )
+
+    for round_number in range(1, config.max_rounds + 1):
+        blocking_reviews: list[tuple[str, str]] = []
+        for reviewer in configured_reviewers:
+            reviewer_name = agent_display_name(reviewer)
+            log(config, f"Planning round {round_number}: {reviewer_name} reviewing issue #{issue_number} plan")
+            review_output, new_session_id = run_agent(
+                runner,
+                agent=reviewer,
+                config=config,
+                prompt=build_plan_review_prompt(
+                    issue_number,
+                    round_number,
+                    plan,
+                    config,
+                    reviewer=reviewer,
+                    memory=memory,
+                ),
+                session_id=reviewer_session_ids.get(reviewer),
+            )
+            reviewer_session_ids[reviewer] = new_session_id
+            if not review_output.strip():
+                raise AgentLoopError(f"{reviewer_name} produced an empty planning review.")
+            review_state = parse_plan_state(review_output)
+            log(config, f"Planning round {round_number}: {reviewer_name} state is {review_state}")
+            if review_state == "blocking":
+                blocking_reviews.append((reviewer_name, review_output))
+
+        if not blocking_reviews:
+            log(config, f"Issue #{issue_number} implementation plan approved")
+            return plan, coder_session_id
+
+        if round_number == config.max_rounds:
+            raise AgentLoopError(
+                f"One or more reviewers still reported blocking plan issues after "
+                f"planning round {round_number}; human review required."
+            )
+
+        combined_review = "\n\n".join(
+            f"{name} planning review:\n\n{review}" for name, review in blocking_reviews
+        )
+        log(config, f"Planning round {round_number}: {coder_name} revising the issue plan")
+        plan, coder_session_id = run_agent(
+            runner,
+            agent=config.coder,
+            config=config,
+            prompt=build_plan_followup_prompt(
+                issue_number,
+                round_number,
+                plan,
+                combined_review,
+                config,
+                memory,
+            ),
+            session_id=coder_session_id,
+        )
+        if not plan.strip():
+            raise AgentLoopError(f"{coder_name} produced an empty revised implementation plan.")
+        if parse_plan_state(plan) != "blocking":
+            raise AgentLoopError(
+                f"{coder_name} revised planning output must use <!-- AGENT_PLAN_STATE: blocking -->."
+            )
+
+    raise AgentLoopError(
+        f"Reached max planning rounds ({config.max_rounds}) for issue #{issue_number}; "
+        "human review required."
+    )
+
+
 def _format_created_followup_issue_summary(
     pr_number: int,
     issue_urls: list[str],
@@ -226,12 +322,22 @@ def run_issue_loop(runner: Runner, *, issue_number: int, config: AgentLoopConfig
     log(config, f"Validating issue #{issue_number}")
     validate_open_issue(runner, config=config, issue_number=issue_number)
     memory = prepare_agent_memory(runner, config)
+    approved_plan: str | None = None
+    coder_session_id: str | None = None
+    if config.plan_first:
+        approved_plan, coder_session_id = _run_issue_plan_loop(
+            runner,
+            issue_number=issue_number,
+            config=config,
+            memory=memory,
+        )
 
     coder_output, coder_session_id = run_agent(
         runner,
         agent=config.coder,
         config=config,
-        prompt=build_issue_prompt(issue_number, config, memory),
+        prompt=build_issue_prompt(issue_number, config, memory, approved_plan),
+        session_id=coder_session_id,
     )
     pr_number = parse_pr_number(coder_output)
     if pr_number is None:
diff --git a/src/coding_review_agent_loop/prompts.py b/src/coding_review_agent_loop/prompts.py
index 6619ee4..06f1bfb 100644
--- a/src/coding_review_agent_loop/prompts.py
+++ b/src/coding_review_agent_loop/prompts.py
@@ -40,15 +40,27 @@ def build_issue_prompt(
     issue_number: int,
     config: AgentLoopConfig,
     memory: AgentMemoryContext | None = None,
+    approved_plan: str | None = None,
 ) -> str:
     reviewer_name = format_agent_list(reviewers(config))
     coder_signature = agent_signature(config.coder)
+    plan_block = ""
+    if approved_plan:
+        plan_block = f"""
+Approved implementation plan:
+{approved_plan}
+
+Implement this approved plan. If you discover that the plan is materially wrong
+or incomplete, document the discrepancy in the PR description and keep the
+implementation as close to the approved plan as safely possible.
+"""
     return f"""Fix GitHub issue #{issue_number} in {config.repo}.
 
 Use this local checkout as your workspace. Create a branch, implement the fix,
 run relevant tests, commit, push, and open a pull request against {config.base}.
 {_scratch_file_guidance()}
 {_memory_block(memory)}
+{plan_block}
 
 Do not wait for {reviewer_name} yourself; this local orchestrator will run {reviewer_name} after
 you create the PR. In your final response, include the PR number using exactly
@@ -65,6 +77,116 @@ def build_issue_prompt(
 """
 
 
+def build_issue_plan_prompt(
+    issue_number: int,
+    config: AgentLoopConfig,
+    memory: AgentMemoryContext | None = None,
+) -> str:
+    reviewer_name = format_agent_list(reviewers(config))
+    coder_signature = agent_signature(config.coder)
+    return f"""Plan the implementation for GitHub issue #{issue_number} in {config.repo}.
+
+Use this local checkout only for inspection. Do not edit files, create a branch,
+commit, push, or open a pull request during this planning stage.
+{_scratch_file_guidance()}
+{_memory_block(memory)}
+
+Write a concise implementation plan for {reviewer_name} to review. Cover the
+intended code changes, behavior/API decisions, risks or ambiguities, and test
+strategy. If the issue is materially ambiguous, call out the ambiguity in the
+plan instead of implementing.
+
+End your final response with exactly one marker:
+
+<!-- AGENT_PLAN_STATE: blocking -->
+
+Use blocking here to hand the plan to {reviewer_name} for review. Sign the
+response as:
+-- {coder_signature}
+"""
+
+
+def build_plan_review_prompt(
+    issue_number: int,
+    round_number: int,
+    plan: str,
+    config: AgentLoopConfig,
+    *,
+    reviewer: AgentName,
+    memory: AgentMemoryContext | None = None,
+) -> str:
+    coder_name = agent_display_name(config.coder)
+    reviewer_signature = agent_signature(reviewer)
+    reviewer_group = format_agent_list(reviewers(config))
+    return f"""Review the implementation plan for GitHub issue #{issue_number} in {config.repo} (planning round {round_number}).
+
+Use this local checkout only for inspection. Do not edit files, create a branch,
+commit, push, or open a pull request during this planning review.
+{_scratch_file_guidance()}
+{_memory_block(memory)}
+
+Plan from {coder_name}:
+
+{plan}
+
+Review the plan for correctness, architecture fit, missing edge cases, test
+strategy, and ambiguity. Use blocking only for plan issues that should be
+resolved before implementation starts.
+
+All configured reviewers ({reviewer_group}) must approve in the same planning
+round before {coder_name} starts implementation.
+
+End your final response with exactly one marker:
+
+<!-- AGENT_PLAN_STATE: approved -->
+
+or:
+
+<!-- AGENT_PLAN_STATE: blocking -->
+
+Always sign your response:
+-- {reviewer_signature}
+"""
+
+
+def build_plan_followup_prompt(
+    issue_number: int,
+    round_number: int,
+    plan: str,
+    review: str,
+    config: AgentLoopConfig,
+    memory: AgentMemoryContext | None = None,
+) -> str:
+    reviewer_name = format_agent_list(reviewers(config))
+    coder_signature = agent_signature(config.coder)
+    return f"""{reviewer_name} reviewed your implementation plan for GitHub issue #{issue_number} in {config.repo} and found blocking issues.
+
+Use this local checkout only for inspection. Do not edit files, create a branch,
+commit, push, or open a pull request during this planning stage.
+{_scratch_file_guidance()}
+{_memory_block(memory)}
+
+Previous plan:
+
+{plan}
+
+{reviewer_name} planning review:
+
+{review}
+
+Revise the plan to address the blocking feedback. This is planning round
+{round_number}; do not implement yet.
+
+End your final response with exactly one marker:
+
+<!-- AGENT_PLAN_STATE: blocking -->
+
+Use blocking to hand the revised plan back to {reviewer_name}. Sign the
+response as:
+-- {coder_signature}
+"""
+
+
 def build_task_prompt(
     task_text: str,
     config: AgentLoopConfig,
diff --git a/src/coding_review_agent_loop/protocol.py b/src/coding_review_agent_loop/protocol.py
index 27f9688..705fc9c 100644
--- a/src/coding_review_agent_loop/protocol.py
+++ b/src/coding_review_agent_loop/protocol.py
@@ -8,6 +8,7 @@
 from .errors import AgentLoopError
 
 STATE_RE = re.compile(r"<!--\s*AGENT_STATE:\s*(approved|blocking)\s*-->", re.I)
+PLAN_STATE_RE = re.compile(r"<!--\s*AGENT_PLAN_STATE:\s*(approved|blocking)\s*-->", re.I)
 PR_RE = re.compile(r"<!--\s*AGENT_PR:\s*(\d+)\s*-->", re.I)
 GH_PR_URL_RE = re.compile(r"/pull/(\d+)(?:\b|$)")
 CLARIFY_RE = re.compile(r"<!--\s*AGENT_CLARIFY\s*-->", re.I)
@@ -44,6 +45,15 @@ def parse_agent_state(text: str) -> str:
     return matches[-1].lower()
 
 
+def parse_plan_state(text: str) -> str:
+    matches = PLAN_STATE_RE.findall(text)
+    if not matches:
+        raise AgentLoopError(
+            "Agent response did not include <!-- AGENT_PLAN_STATE: approved|blocking -->"
+        )
+    return matches[-1].lower()
+
+
 def parse_pr_number(text: str) -> int | None:
     marker = PR_RE.search(text)
     if marker:
diff --git a/tests/test_agent_loop.py b/tests/test_agent_loop.py
index 1c92b3c..3a2e096 100644
--- a/tests/test_agent_loop.py
+++ b/tests/test_agent_loop.py
@@ -16,6 +16,7 @@
     ensure_log_dir_ignored,
     is_clarification_request,
     parse_agent_state,
+    parse_plan_state,
     parse_pr_number,
     run_issue_loop,
     run_pr_loop,
@@ -446,6 +447,13 @@ def test_parse_agent_state_requires_marker():
         parse_agent_state("LGTM")
 
 
+def test_parse_plan_state_accepts_separate_planning_marker():
+    assert parse_plan_state("plan looks viable\n<!-- AGENT_PLAN_STATE: approved -->") == "approved"
+    assert parse_plan_state("needs detail\n<!-- agent_plan_state: BLOCKING -->") == "blocking"
+    with pytest.raises(AgentLoopError):
+        parse_plan_state("<!-- AGENT_STATE: approved -->")
+
+
 def test_parse_non_blocking_followups_extracts_bullets_only_from_section():
     review = """
     Looks good.
@@ -616,6 +624,94 @@ def test_issue_loop_can_use_codex_as_coder_and_claude_as_reviewer(tmp_path):
     assert runner.comments[-1].startswith("LGTM.")
 
 
+def test_issue_loop_plan_first_gets_plan_approval_before_implementation(tmp_path):
+    runner = FakeRunner(
+        claude_outputs=[
+            "Plan: update CLI, protocol, and orchestration tests.\n"
+            "<!-- AGENT_PLAN_STATE: blocking -->\n-- Anthropic Claude",
+            "Implemented approved plan.\n<!-- AGENT_PR: 77 -->\n"
+            "<!-- AGENT_STATE: blocking -->\n-- Anthropic Claude",
+        ],
+        codex_outputs=[
+            "Plan looks sound.\n<!-- AGENT_PLAN_STATE: approved -->\n-- OpenAI Codex",
+            "LGTM.\n<!-- AGENT_STATE: approved -->\n-- OpenAI Codex",
+        ],
+    )
+    config = make_config(tmp_path, plan_first=True)
+
+    assert run_issue_loop(runner, issue_number=56, config=config) == 0
+
+    agent_commands = [cmd for cmd, _cwd in runner.commands if cmd[:1] in (["claude"], ["codex"])]
+    assert [cmd[:2] for cmd in agent_commands] == [
+        ["claude", "--print"],
+        ["codex", "exec"],
+        ["claude", "--print"],
+        ["codex", "exec"],
+    ]
+    planning_prompt = agent_commands[0][-1]
+    assert "Do not edit files, create a branch" in planning_prompt
+    assert "<!-- AGENT_PLAN_STATE: blocking -->" in planning_prompt
+
+    plan_review_prompt = agent_commands[1][-1]
+    assert "Review the implementation plan for GitHub issue #56" in plan_review_prompt
+    assert "Plan: update CLI, protocol, and orchestration tests." in plan_review_prompt
+    assert "<!-- AGENT_PLAN_STATE: approved -->" in plan_review_prompt
+
+    implementation_prompt = agent_commands[2][-1]
+    assert "Approved implementation plan:" in implementation_prompt
+    assert "Plan: update CLI, protocol, and orchestration tests." in implementation_prompt
+    assert "<!-- AGENT_PR: <number> -->" in implementation_prompt
+
+    assert len(runner.comments) == 2
+    assert runner.comments[0].startswith("Implemented approved plan.")
+    assert runner.comments[1].startswith("LGTM.")
+
+
+def test_issue_loop_plan_first_revises_plan_until_all_reviewers_approve(tmp_path):
+    runner = FakeRunner(
+        claude_outputs=[
+            "Plan v1.\n<!-- AGENT_PLAN_STATE: blocking -->\n-- Anthropic Claude",
+            "Plan v2 adds tests.\n<!-- AGENT_PLAN_STATE: blocking -->\n-- Anthropic Claude",
+            "Implemented revised plan.\n<!-- AGENT_PR: 77 -->\n"
+            "<!-- AGENT_STATE: blocking -->\n-- Anthropic Claude",
+        ],
+        codex_outputs=[
+            "Missing regression test plan.\n<!-- AGENT_PLAN_STATE: blocking -->\n-- OpenAI Codex",
+            "Plan approved.\n<!-- AGENT_PLAN_STATE: approved -->\n-- OpenAI Codex",
+            "LGTM.\n<!-- AGENT_STATE: approved -->\n-- OpenAI Codex",
+        ],
+    )
+    config = make_config(tmp_path, plan_first=True)
+
+    assert run_issue_loop(runner, issue_number=56, config=config) == 0
+
+    claude_prompts = [cmd[-1] for cmd, _cwd in runner.commands if cmd[:1] == ["claude"]]
+    assert "Previous plan:\n\nPlan v1." in claude_prompts[1]
+    assert "Missing regression test plan." in claude_prompts[1]
+    assert "Approved implementation plan:" in claude_prompts[2]
+    assert "Plan v2 adds tests." in claude_prompts[2]
+
+
+def test_issue_loop_plan_first_fails_after_max_planning_rounds(tmp_path):
+    runner = FakeRunner(
+        claude_outputs=[
+            "Plan v1.\n<!-- AGENT_PLAN_STATE: blocking -->",
+        ],
+        codex_outputs=[
+            "Still ambiguous.\n<!-- AGENT_PLAN_STATE: blocking -->\n-- OpenAI Codex",
+        ],
+    )
+    config = make_config(tmp_path, plan_first=True, max_rounds=1)
+
+    with pytest.raises(AgentLoopError, match="blocking plan issues"):
+        run_issue_loop(runner, issue_number=56, config=config)
+
+    assert runner.comments == []
+    claude_calls = [cmd for cmd, _cwd in runner.commands if cmd[:1] == ["claude"]]
+    assert len(claude_calls) == 1
+    assert "Plan the implementation" in claude_calls[0][-1]
+
+
 def test_ensure_log_dir_ignored_does_not_overwrite_existing_file(tmp_path):
     log_dir = tmp_path / "logs"
     log_dir.mkdir()
@@ -1423,6 +1519,27 @@ def test_approved_followups_cli_mode_is_configurable(tmp_path, mode):
     assert config.approved_followups == mode
 
 
+def test_issue_plan_first_cli_flag_is_configurable(tmp_path):
+    parser = build_parser()
+    args = parser.parse_args([
+        "issue",
+        "56",
+        "--repo",
+        "OWNER/REPO",
+        "--plan-first",
+        "--claude-dir",
+        str(tmp_path / "claude"),
+        "--codex-dir",
+        str(tmp_path / "codex"),
+        "--gemini-dir",
+        str(tmp_path / "gemini"),
+    ])
+
+    config = config_from_args(args, FakeRunner())
+
+    assert config.plan_first is True
+
+
 def test_explicit_agent_dirs_are_preserved_when_others_default(tmp_path):
     parser = build_parser()
     codex_dir = tmp_path / "codex"