From 98bff3fc6fbafd5483fb5db84861d2fcd836b779 Mon Sep 17 00:00:00 2001 From: Wild Wind Date: Sun, 17 May 2026 02:03:20 -0700 Subject: [PATCH] Add plan-first issue review stage --- README.md | 9 ++ docs/local_agent_loop.md | 14 +++ src/coding_review_agent_loop/cli.py | 10 +- src/coding_review_agent_loop/config.py | 2 + src/coding_review_agent_loop/orchestrator.py | 110 ++++++++++++++++- src/coding_review_agent_loop/prompts.py | 122 +++++++++++++++++++ src/coding_review_agent_loop/protocol.py | 10 ++ tests/test_agent_loop.py | 117 ++++++++++++++++++ 8 files changed, 391 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f48334e..6cbf52a 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,15 @@ and body as the implementation task: agent-loop issue 123 --repo OWNER/REPO ``` +For larger or ambiguous issues, add `--plan-first` to make the coder write an +implementation plan first. Reviewers approve or block that plan before the coder +edits files or opens a PR; after plan approval, the normal implementation and PR +review loop runs. + +```bash +agent-loop issue 123 --repo OWNER/REPO --plan-first +``` + Provide a one-off task directly when there is no issue yet: ```bash diff --git a/docs/local_agent_loop.md b/docs/local_agent_loop.md index 03159a8..536afc2 100644 --- a/docs/local_agent_loop.md +++ b/docs/local_agent_loop.md @@ -136,6 +136,20 @@ Fix a GitHub issue: agent-loop issue 56 --repo OWNER/REPO ``` +For issue work that needs design agreement before implementation, enable the +optional planning gate: + +```bash +agent-loop issue 56 --repo OWNER/REPO --plan-first +``` + +With `--plan-first`, the coder first writes a concise implementation plan and +must not edit files, push a branch, or open a PR. Reviewers inspect that plan +for correctness, architecture fit, edge cases, test strategy, and ambiguity. If +any reviewer blocks, the coder revises the plan; once all reviewers approve in +the same planning round, the approved plan is passed into the normal +implementation prompt and the regular PR review loop begins. + Implement a free-form task: ```bash diff --git a/src/coding_review_agent_loop/cli.py b/src/coding_review_agent_loop/cli.py index a6c1418..6a5920f 100644 --- a/src/coding_review_agent_loop/cli.py +++ b/src/coding_review_agent_loop/cli.py @@ -50,7 +50,7 @@ build_task_prompt, format_agent_list, ) -from .protocol import is_clarification_request, parse_agent_state, parse_pr_number +from .protocol import is_clarification_request, parse_agent_state, parse_plan_state, parse_pr_number from .runner import CommandResult, Runner, ensure_log_dir_ignored, tail_text @@ -226,6 +226,14 @@ def add_common(subparser: argparse.ArgumentParser) -> None: issue = subparsers.add_parser("issue", help="Ask the coder to fix an issue, then review it.") issue.add_argument("issue_number", type=int) + issue.add_argument( + "--plan-first", + action="store_true", + help=( + "Run an opt-in implementation planning/review loop before the coder " + "edits code or opens a PR." + ), + ) add_common(issue) pr = subparsers.add_parser("pr", help="Run the reviewer/coder loop on an existing PR.") diff --git a/src/coding_review_agent_loop/config.py b/src/coding_review_agent_loop/config.py index b4d1ad2..144483c 100644 --- a/src/coding_review_agent_loop/config.py +++ b/src/coding_review_agent_loop/config.py @@ -50,6 +50,7 @@ class AgentLoopConfig: agent_memory_dir: Path refresh_test_profile: bool approved_followups: str = "ignore" + plan_first: bool = False auto_agent_dirs: tuple[AgentName, ...] = () def __post_init__(self) -> None: @@ -316,5 +317,6 @@ def config_from_args(args: argparse.Namespace, runner: Runner) -> AgentLoopConfi ), refresh_test_profile=args.refresh_test_profile, approved_followups=args.approved_followups, + plan_first=getattr(args, "plan_first", False), auto_agent_dirs=auto_agent_dirs, ) diff --git a/src/coding_review_agent_loop/orchestrator.py b/src/coding_review_agent_loop/orchestrator.py index 8963bb3..feda609 100644 --- a/src/coding_review_agent_loop/orchestrator.py +++ b/src/coding_review_agent_loop/orchestrator.py @@ -25,13 +25,16 @@ from .prompts import ( build_followup_prompt, build_issue_prompt, + build_issue_plan_prompt, + build_plan_followup_prompt, + build_plan_review_prompt, build_review_prompt, build_same_pr_followup_prompt, build_task_clarification_prompt, build_task_prompt, format_agent_list, ) -from .protocol import is_clarification_request, parse_agent_state, parse_pr_number +from .protocol import is_clarification_request, parse_agent_state, parse_plan_state, parse_pr_number from .protocol import ApprovedFollowup, parse_approved_followups from .runner import Runner from .workdirs import active_workdir @@ -180,6 +183,99 @@ def _create_approved_followup_issues( return issue_urls, skipped_count +def _run_issue_plan_loop( + runner: Runner, + *, + issue_number: int, + config: AgentLoopConfig, + memory, +) -> tuple[str, str | None]: + coder_name = agent_display_name(config.coder) + configured_reviewers = reviewers(config) + reviewer_session_ids: dict[AgentName, str | None] = {} + + log(config, f"Planning issue #{issue_number}: invoking {coder_name}") + plan, coder_session_id = run_agent( + runner, + agent=config.coder, + config=config, + prompt=build_issue_plan_prompt(issue_number, config, memory), + ) + if not plan.strip(): + raise AgentLoopError(f"{coder_name} produced an empty implementation plan.") + if parse_plan_state(plan) != "blocking": + raise AgentLoopError( + f"{coder_name} planning output must use ." + ) + + for round_number in range(1, config.max_rounds + 1): + blocking_reviews: list[tuple[str, str]] = [] + for reviewer in configured_reviewers: + reviewer_name = agent_display_name(reviewer) + log(config, f"Planning round {round_number}: {reviewer_name} reviewing issue #{issue_number} plan") + review_output, new_session_id = run_agent( + runner, + agent=reviewer, + config=config, + prompt=build_plan_review_prompt( + issue_number, + round_number, + plan, + config, + reviewer=reviewer, + memory=memory, + ), + session_id=reviewer_session_ids.get(reviewer), + ) + reviewer_session_ids[reviewer] = new_session_id + if not review_output.strip(): + raise AgentLoopError(f"{reviewer_name} produced an empty planning review.") + review_state = parse_plan_state(review_output) + log(config, f"Planning round {round_number}: {reviewer_name} state is {review_state}") + if review_state == "blocking": + blocking_reviews.append((reviewer_name, review_output)) + + if not blocking_reviews: + log(config, f"Issue #{issue_number} implementation plan approved") + return plan, coder_session_id + + if round_number == config.max_rounds: + raise AgentLoopError( + f"One or more reviewers still reported blocking plan issues after " + f"planning round {round_number}; human review required." + ) + + combined_review = "\n\n".join( + f"{name} planning review:\n\n{review}" for name, review in blocking_reviews + ) + log(config, f"Planning round {round_number}: {coder_name} revising the issue plan") + plan, coder_session_id = run_agent( + runner, + agent=config.coder, + config=config, + prompt=build_plan_followup_prompt( + issue_number, + round_number, + plan, + combined_review, + config, + memory, + ), + session_id=coder_session_id, + ) + if not plan.strip(): + raise AgentLoopError(f"{coder_name} produced an empty revised implementation plan.") + if parse_plan_state(plan) != "blocking": + raise AgentLoopError( + f"{coder_name} revised planning output must use ." + ) + + raise AgentLoopError( + f"Reached max planning rounds ({config.max_rounds}) for issue #{issue_number}; " + "human review required." + ) + + def _format_created_followup_issue_summary( pr_number: int, issue_urls: list[str], @@ -226,12 +322,22 @@ def run_issue_loop(runner: Runner, *, issue_number: int, config: AgentLoopConfig log(config, f"Validating issue #{issue_number}") validate_open_issue(runner, config=config, issue_number=issue_number) memory = prepare_agent_memory(runner, config) + approved_plan: str | None = None + coder_session_id: str | None = None + if config.plan_first: + approved_plan, coder_session_id = _run_issue_plan_loop( + runner, + issue_number=issue_number, + config=config, + memory=memory, + ) coder_output, coder_session_id = run_agent( runner, agent=config.coder, config=config, - prompt=build_issue_prompt(issue_number, config, memory), + prompt=build_issue_prompt(issue_number, config, memory, approved_plan), + session_id=coder_session_id, ) pr_number = parse_pr_number(coder_output) if pr_number is None: diff --git a/src/coding_review_agent_loop/prompts.py b/src/coding_review_agent_loop/prompts.py index 6619ee4..06f1bfb 100644 --- a/src/coding_review_agent_loop/prompts.py +++ b/src/coding_review_agent_loop/prompts.py @@ -40,15 +40,27 @@ def build_issue_prompt( issue_number: int, config: AgentLoopConfig, memory: AgentMemoryContext | None = None, + approved_plan: str | None = None, ) -> str: reviewer_name = format_agent_list(reviewers(config)) coder_signature = agent_signature(config.coder) + plan_block = "" + if approved_plan: + plan_block = f""" +Approved implementation plan: +{approved_plan} + +Implement this approved plan. If you discover that the plan is materially wrong +or incomplete, document the discrepancy in the PR description and keep the +implementation as close to the approved plan as safely possible. +""" return f"""Fix GitHub issue #{issue_number} in {config.repo}. Use this local checkout as your workspace. Create a branch, implement the fix, run relevant tests, commit, push, and open a pull request against {config.base}. {_scratch_file_guidance()} {_memory_block(memory)} +{plan_block} Do not wait for {reviewer_name} yourself; this local orchestrator will run {reviewer_name} after you create the PR. In your final response, include the PR number using exactly @@ -65,6 +77,116 @@ def build_issue_prompt( """ +def build_issue_plan_prompt( + issue_number: int, + config: AgentLoopConfig, + memory: AgentMemoryContext | None = None, +) -> str: + reviewer_name = format_agent_list(reviewers(config)) + coder_signature = agent_signature(config.coder) + return f"""Plan the implementation for GitHub issue #{issue_number} in {config.repo}. + +Use this local checkout only for inspection. Do not edit files, create a branch, +commit, push, or open a pull request during this planning stage. +{_scratch_file_guidance()} +{_memory_block(memory)} + +Write a concise implementation plan for {reviewer_name} to review. Cover the +intended code changes, behavior/API decisions, risks or ambiguities, and test +strategy. If the issue is materially ambiguous, call out the ambiguity in the +plan instead of implementing. + +End your final response with exactly one marker: + + + +Use blocking here to hand the plan to {reviewer_name} for review. Sign the +response as: +-- {coder_signature} +""" + + +def build_plan_review_prompt( + issue_number: int, + round_number: int, + plan: str, + config: AgentLoopConfig, + *, + reviewer: AgentName, + memory: AgentMemoryContext | None = None, +) -> str: + coder_name = agent_display_name(config.coder) + reviewer_signature = agent_signature(reviewer) + reviewer_group = format_agent_list(reviewers(config)) + return f"""Review the implementation plan for GitHub issue #{issue_number} in {config.repo} (planning round {round_number}). + +Use this local checkout only for inspection. Do not edit files, create a branch, +commit, push, or open a pull request during this planning review. +{_scratch_file_guidance()} +{_memory_block(memory)} + +Plan from {coder_name}: + +{plan} + +Review the plan for correctness, architecture fit, missing edge cases, test +strategy, and ambiguity. Use blocking only for plan issues that should be +resolved before implementation starts. + +All configured reviewers ({reviewer_group}) must approve in the same planning +round before {coder_name} starts implementation. + +End your final response with exactly one marker: + + + +or: + + + +Always sign your response: +-- {reviewer_signature} +""" + + +def build_plan_followup_prompt( + issue_number: int, + round_number: int, + plan: str, + review: str, + config: AgentLoopConfig, + memory: AgentMemoryContext | None = None, +) -> str: + reviewer_name = format_agent_list(reviewers(config)) + coder_signature = agent_signature(config.coder) + return f"""{reviewer_name} reviewed your implementation plan for GitHub issue #{issue_number} in {config.repo} and found blocking issues. + +Use this local checkout only for inspection. Do not edit files, create a branch, +commit, push, or open a pull request during this planning stage. +{_scratch_file_guidance()} +{_memory_block(memory)} + +Previous plan: + +{plan} + +{reviewer_name} planning review: + +{review} + +Revise the plan to address the blocking feedback. This is planning round +{round_number}; do not implement yet. + +End your final response with exactly one marker: + + + +Use blocking to hand the revised plan back to {reviewer_name}. Sign the +response as: +-- {coder_signature} +""" + + def build_task_prompt( task_text: str, config: AgentLoopConfig, diff --git a/src/coding_review_agent_loop/protocol.py b/src/coding_review_agent_loop/protocol.py index 27f9688..705fc9c 100644 --- a/src/coding_review_agent_loop/protocol.py +++ b/src/coding_review_agent_loop/protocol.py @@ -8,6 +8,7 @@ from .errors import AgentLoopError STATE_RE = re.compile(r"", re.I) +PLAN_STATE_RE = re.compile(r"", re.I) PR_RE = re.compile(r"", re.I) GH_PR_URL_RE = re.compile(r"/pull/(\d+)(?:\b|$)") CLARIFY_RE = re.compile(r"", re.I) @@ -44,6 +45,15 @@ def parse_agent_state(text: str) -> str: return matches[-1].lower() +def parse_plan_state(text: str) -> str: + matches = PLAN_STATE_RE.findall(text) + if not matches: + raise AgentLoopError( + "Agent response did not include " + ) + return matches[-1].lower() + + def parse_pr_number(text: str) -> int | None: marker = PR_RE.search(text) if marker: diff --git a/tests/test_agent_loop.py b/tests/test_agent_loop.py index 1c92b3c..3a2e096 100644 --- a/tests/test_agent_loop.py +++ b/tests/test_agent_loop.py @@ -16,6 +16,7 @@ ensure_log_dir_ignored, is_clarification_request, parse_agent_state, + parse_plan_state, parse_pr_number, run_issue_loop, run_pr_loop, @@ -446,6 +447,13 @@ def test_parse_agent_state_requires_marker(): parse_agent_state("LGTM") +def test_parse_plan_state_accepts_separate_planning_marker(): + assert parse_plan_state("plan looks viable\n") == "approved" + assert parse_plan_state("needs detail\n") == "blocking" + with pytest.raises(AgentLoopError): + parse_plan_state("") + + def test_parse_non_blocking_followups_extracts_bullets_only_from_section(): review = """ Looks good. @@ -616,6 +624,94 @@ def test_issue_loop_can_use_codex_as_coder_and_claude_as_reviewer(tmp_path): assert runner.comments[-1].startswith("LGTM.") +def test_issue_loop_plan_first_gets_plan_approval_before_implementation(tmp_path): + runner = FakeRunner( + claude_outputs=[ + "Plan: update CLI, protocol, and orchestration tests.\n" + "\n-- Anthropic Claude", + "Implemented approved plan.\n\n" + "\n-- Anthropic Claude", + ], + codex_outputs=[ + "Plan looks sound.\n\n-- OpenAI Codex", + "LGTM.\n\n-- OpenAI Codex", + ], + ) + config = make_config(tmp_path, plan_first=True) + + assert run_issue_loop(runner, issue_number=56, config=config) == 0 + + agent_commands = [cmd for cmd, _cwd in runner.commands if cmd[:1] in (["claude"], ["codex"])] + assert [cmd[:2] for cmd in agent_commands] == [ + ["claude", "--print"], + ["codex", "exec"], + ["claude", "--print"], + ["codex", "exec"], + ] + planning_prompt = agent_commands[0][-1] + assert "Do not edit files, create a branch" in planning_prompt + assert "" in planning_prompt + + plan_review_prompt = agent_commands[1][-1] + assert "Review the implementation plan for GitHub issue #56" in plan_review_prompt + assert "Plan: update CLI, protocol, and orchestration tests." in plan_review_prompt + assert "" in plan_review_prompt + + implementation_prompt = agent_commands[2][-1] + assert "Approved implementation plan:" in implementation_prompt + assert "Plan: update CLI, protocol, and orchestration tests." in implementation_prompt + assert "" in implementation_prompt + + assert len(runner.comments) == 2 + assert runner.comments[0].startswith("Implemented approved plan.") + assert runner.comments[1].startswith("LGTM.") + + +def test_issue_loop_plan_first_revises_plan_until_all_reviewers_approve(tmp_path): + runner = FakeRunner( + claude_outputs=[ + "Plan v1.\n\n-- Anthropic Claude", + "Plan v2 adds tests.\n\n-- Anthropic Claude", + "Implemented revised plan.\n\n" + "\n-- Anthropic Claude", + ], + codex_outputs=[ + "Missing regression test plan.\n\n-- OpenAI Codex", + "Plan approved.\n\n-- OpenAI Codex", + "LGTM.\n\n-- OpenAI Codex", + ], + ) + config = make_config(tmp_path, plan_first=True) + + assert run_issue_loop(runner, issue_number=56, config=config) == 0 + + claude_prompts = [cmd[-1] for cmd, _cwd in runner.commands if cmd[:1] == ["claude"]] + assert "Previous plan:\n\nPlan v1." in claude_prompts[1] + assert "Missing regression test plan." in claude_prompts[1] + assert "Approved implementation plan:" in claude_prompts[2] + assert "Plan v2 adds tests." in claude_prompts[2] + + +def test_issue_loop_plan_first_fails_after_max_planning_rounds(tmp_path): + runner = FakeRunner( + claude_outputs=[ + "Plan v1.\n", + ], + codex_outputs=[ + "Still ambiguous.\n\n-- OpenAI Codex", + ], + ) + config = make_config(tmp_path, plan_first=True, max_rounds=1) + + with pytest.raises(AgentLoopError, match="blocking plan issues"): + run_issue_loop(runner, issue_number=56, config=config) + + assert runner.comments == [] + claude_calls = [cmd for cmd, _cwd in runner.commands if cmd[:1] == ["claude"]] + assert len(claude_calls) == 1 + assert "Plan the implementation" in claude_calls[0][-1] + + def test_ensure_log_dir_ignored_does_not_overwrite_existing_file(tmp_path): log_dir = tmp_path / "logs" log_dir.mkdir() @@ -1423,6 +1519,27 @@ def test_approved_followups_cli_mode_is_configurable(tmp_path, mode): assert config.approved_followups == mode +def test_issue_plan_first_cli_flag_is_configurable(tmp_path): + parser = build_parser() + args = parser.parse_args([ + "issue", + "56", + "--repo", + "OWNER/REPO", + "--plan-first", + "--claude-dir", + str(tmp_path / "claude"), + "--codex-dir", + str(tmp_path / "codex"), + "--gemini-dir", + str(tmp_path / "gemini"), + ]) + + config = config_from_args(args, FakeRunner()) + + assert config.plan_first is True + + def test_explicit_agent_dirs_are_preserved_when_others_default(tmp_path): parser = build_parser() codex_dir = tmp_path / "codex"