diff --git a/README.md b/README.md index e579367..d2b178e 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,16 @@ original issue body: agent-loop issue 123 --repo OWNER/REPO ``` +For larger or ambiguous issues, add `--plan-first` to run a plan review on the +issue before code is written. The coder may inspect the checkout but must not +edit files, push, or open a PR during planning. Reviewers approve or block with +`AGENT_PLAN_STATE` markers. By default the loop posts the approved plan summary +and stops; add `--implement-after-approval` to continue into the normal PR flow: + +```bash +agent-loop issue 123 --repo OWNER/REPO --plan-first --implement-after-approval +``` + Provide a one-off task directly when there is no issue yet: ```bash diff --git a/docs/local_agent_loop.md b/docs/local_agent_loop.md index 33f6fa9..3c3c84c 100644 --- a/docs/local_agent_loop.md +++ b/docs/local_agent_loop.md @@ -140,6 +140,23 @@ Issue mode includes the issue title, body, and comments in the coder prompt and issue-origin review prompts. Comments are ordered oldest to newest so later discussion can refine or supersede the original body. +Run issue mode as plan-first discussion before implementation: + +```bash +agent-loop issue 56 --repo OWNER/REPO --plan-first +``` + +With `--plan-first`, the coder writes an implementation plan without editing +code, pushing a branch, or opening a PR. Reviewers critique that plan on the +issue using `AGENT_PLAN_STATE` markers until every reviewer approves in the +same planning round. By default the loop posts an approved consensus summary to +the issue and stops. Add `--implement-after-approval` to continue into the +normal implementation and PR review loop using the approved plan: + +```bash +agent-loop issue 56 --repo OWNER/REPO --plan-first --implement-after-approval +``` + Implement a free-form task: ```bash @@ -360,10 +377,15 @@ Agent responses are parsed using HTML comment markers: + + ``` -`AGENT_PR` is required after a coder creates a PR. Review/fix responses must include a final `AGENT_STATE` marker. If a response quotes older markers, the final marker is treated as authoritative. +`AGENT_PR` is required after a coder creates a PR. Review/fix responses must +include a final `AGENT_STATE` marker. Plan-first coder/reviewer responses use +`AGENT_PLAN_STATE` instead. If a response quotes older markers, the final +matching marker is treated as authoritative. When `--approved-followups` is set to `summarize`, `issue`, or a `fix-and-*` mode, approved reviewer responses may also include optional future-work items diff --git a/src/coding_review_agent_loop/cli.py b/src/coding_review_agent_loop/cli.py index a6c1418..e659dd0 100644 --- a/src/coding_review_agent_loop/cli.py +++ b/src/coding_review_agent_loop/cli.py @@ -226,6 +226,19 @@ def add_common(subparser: argparse.ArgumentParser) -> None: issue = subparsers.add_parser("issue", help="Ask the coder to fix an issue, then review it.") issue.add_argument("issue_number", type=int) + issue.add_argument( + "--plan-first", + action="store_true", + help=( + "Run an issue planning/review stage before implementation. By default, " + "stop after the plan is approved and post the outcome to the issue." + ), + ) + issue.add_argument( + "--implement-after-approval", + action="store_true", + help="With --plan-first, continue into implementation after reviewers approve the plan.", + ) add_common(issue) pr = subparsers.add_parser("pr", help="Run the reviewer/coder loop on an existing PR.") @@ -288,7 +301,15 @@ def main(argv: Sequence[str] | None = None) -> int: try: config = config_from_args(args, runner) if args.command == "issue": - return run_issue_loop(runner, issue_number=args.issue_number, config=config) + if args.implement_after_approval and not args.plan_first: + raise AgentLoopError("--implement-after-approval requires --plan-first.") + return run_issue_loop( + runner, + issue_number=args.issue_number, + config=config, + plan_first=args.plan_first, + implement_after_approval=args.implement_after_approval, + ) if args.command == "pr": return run_pr_loop(runner, pr_number=args.pr_number, config=config) if args.command == "task": diff --git a/src/coding_review_agent_loop/github.py b/src/coding_review_agent_loop/github.py index b8327e3..19956fe 100644 --- a/src/coding_review_agent_loop/github.py +++ b/src/coding_review_agent_loop/github.py @@ -234,6 +234,54 @@ def post_pr_comment( pass +def post_issue_comment( + runner: Runner, + *, + config: AgentLoopConfig, + issue_number: int, + body: str, +) -> None: + log(config, f"Posting agent output to issue #{issue_number}") + if config.dry_run: + runner.run( + [ + config.gh_cmd, + "issue", + "comment", + str(issue_number), + "--repo", + config.repo, + "--body", + body, + ], + cwd=active_workdir(config), + ) + return + + with tempfile.NamedTemporaryFile("w", encoding="utf-8", delete=False) as handle: + handle.write(body) + path = handle.name + try: + runner.run( + [ + config.gh_cmd, + "issue", + "comment", + str(issue_number), + "--repo", + config.repo, + "--body-file", + path, + ], + cwd=active_workdir(config), + ) + finally: + try: + os.unlink(path) + except FileNotFoundError: + pass + + def create_issue( runner: Runner, *, diff --git a/src/coding_review_agent_loop/orchestrator.py b/src/coding_review_agent_loop/orchestrator.py index 9e02924..e52367b 100644 --- a/src/coding_review_agent_loop/orchestrator.py +++ b/src/coding_review_agent_loop/orchestrator.py @@ -17,6 +17,7 @@ get_issue_context, get_pr_metadata, merge_pr, + post_issue_comment, post_pr_comment, validate_open_issue, validate_open_pr, @@ -26,14 +27,18 @@ from .memory import prepare_agent_memory from .prompts import ( build_followup_prompt, + build_issue_implementation_prompt, + build_issue_plan_prompt, build_issue_prompt, + build_plan_review_prompt, + build_plan_revision_prompt, build_review_prompt, build_same_pr_followup_prompt, build_task_clarification_prompt, build_task_prompt, format_agent_list, ) -from .protocol import is_clarification_request, parse_agent_state, parse_pr_number +from .protocol import is_clarification_request, parse_agent_state, parse_plan_state, parse_pr_number from .protocol import ApprovedFollowup, parse_approved_followups from .runner import Runner from .workdirs import active_workdir @@ -223,12 +228,185 @@ def _format_same_pr_followups(followups: Sequence[ApprovedFollowup]) -> str: return "\n".join(lines).strip() -def run_issue_loop(runner: Runner, *, issue_number: int, config: AgentLoopConfig) -> int: +def _format_plan_approval_summary(issue_number: int, approved_plan: str) -> str: + return "\n".join( + [ + f"Planning complete for issue #{issue_number}.", + "", + "Outcome: implement", + "", + "Approved plan:", + "", + approved_plan, + "", + "-- coding-review-agent-loop", + ] + ) + + +def _run_plan_first_loop( + runner: Runner, + *, + issue_number: int, + config: AgentLoopConfig, + memory, + issue_context: IssueContext, + implement_after_approval: bool, +) -> int: + coder_name = agent_display_name(config.coder) + configured_reviewers = reviewers(config) + coder_session_id: str | None = None + reviewer_session_ids: dict[AgentName, str | None] = {} + + log(config, f"Planning issue #{issue_number}: invoking {coder_name}") + plan_output, coder_session_id = run_agent( + runner, + agent=config.coder, + config=config, + prompt=build_issue_plan_prompt(issue_number, config, memory, issue_context=issue_context), + ) + if not plan_output.strip(): + raise AgentLoopError(f"{coder_name} produced an empty response.") + if is_clarification_request(plan_output): + raise AgentLoopError( + f"{coder_name} requested clarification during planning; human intervention required.\n\n" + f"{coder_name}'s questions:\n{plan_output}" + ) + parse_plan_state(plan_output) + post_issue_comment(runner, config=config, issue_number=issue_number, body=plan_output) + current_plan = plan_output + + for round_number in range(1, config.max_rounds + 1): + blocking_reviews: list[tuple[str, str]] = [] + for reviewer in configured_reviewers: + reviewer_name = agent_display_name(reviewer) + log(config, f"Planning round {round_number}: {reviewer_name} reviewing issue #{issue_number}") + review_output, new_session_id = run_agent( + runner, + agent=reviewer, + config=config, + prompt=build_plan_review_prompt( + issue_number, + round_number, + current_plan, + config, + reviewer=reviewer, + memory=memory, + issue_context=issue_context, + ), + session_id=reviewer_session_ids.get(reviewer), + ) + reviewer_session_ids[reviewer] = new_session_id + if not review_output.strip(): + raise AgentLoopError(f"{reviewer_name} produced an empty response.") + post_issue_comment(runner, config=config, issue_number=issue_number, body=review_output) + review_state = parse_plan_state(review_output) + log(config, f"Planning round {round_number}: {reviewer_name} state is {review_state}") + if review_state == "blocking": + blocking_reviews.append((reviewer_name, review_output)) + + if not blocking_reviews: + post_issue_comment( + runner, + config=config, + issue_number=issue_number, + body=_format_plan_approval_summary(issue_number, current_plan), + ) + if not implement_after_approval: + print( + f"Issue #{issue_number} plan approved by {format_agent_list(configured_reviewers)}." + ) + return 0 + + log(config, f"Planning approved; invoking {coder_name} to implement issue #{issue_number}") + coder_output, coder_session_id = run_agent( + runner, + agent=config.coder, + config=config, + prompt=build_issue_implementation_prompt( + issue_number, + current_plan, + config, + memory, + issue_context=issue_context, + ), + session_id=coder_session_id, + ) + pr_number = parse_pr_number(coder_output) + if pr_number is None: + raise AgentLoopError( + f"{coder_name} output did not include a PR marker or PR URL." + ) + log(config, f"{coder_name} reported PR #{pr_number}; validating it is open") + validate_open_pr(runner, config=config, pr_number=pr_number) + post_pr_comment(runner, config=config, pr_number=pr_number, body=coder_output) + return run_pr_loop( + runner, + pr_number=pr_number, + config=config, + coder_session_id=coder_session_id, + issue_context=issue_context, + workdirs_ready=True, + ) + + if round_number == config.max_rounds: + raise AgentLoopError( + f"One or more reviewers still reported blocking plan issues after " + f"round {round_number}; human review required." + ) + + combined_review = "\n\n".join( + f"{name} plan review:\n\n{review}" for name, review in blocking_reviews + ) + log(config, f"Planning round {round_number}: {coder_name} revising the plan") + current_plan, coder_session_id = run_agent( + runner, + agent=config.coder, + config=config, + prompt=build_plan_revision_prompt( + issue_number, + round_number, + current_plan, + combined_review, + config, + memory, + issue_context=issue_context, + ), + session_id=coder_session_id, + ) + if not current_plan.strip(): + raise AgentLoopError(f"{coder_name} produced an empty response.") + parse_plan_state(current_plan) + post_issue_comment(runner, config=config, issue_number=issue_number, body=current_plan) + + raise AgentLoopError( + f"Reached max planning rounds ({config.max_rounds}) for issue #{issue_number}; " + "human review required." + ) + + +def run_issue_loop( + runner: Runner, + *, + issue_number: int, + config: AgentLoopConfig, + plan_first: bool = False, + implement_after_approval: bool = False, +) -> int: ensure_agent_workdirs(config, runner) log(config, f"Validating issue #{issue_number}") validate_open_issue(runner, config=config, issue_number=issue_number) issue_context = get_issue_context(runner, config=config, issue_number=issue_number) memory = prepare_agent_memory(runner, config) + if plan_first: + return _run_plan_first_loop( + runner, + issue_number=issue_number, + config=config, + memory=memory, + issue_context=issue_context, + implement_after_approval=implement_after_approval, + ) coder_output, coder_session_id = run_agent( runner, diff --git a/src/coding_review_agent_loop/prompts.py b/src/coding_review_agent_loop/prompts.py index 9aaf96a..5051b86 100644 --- a/src/coding_review_agent_loop/prompts.py +++ b/src/coding_review_agent_loop/prompts.py @@ -170,6 +170,158 @@ def build_issue_prompt( """ +def build_issue_plan_prompt( + issue_number: int, + config: AgentLoopConfig, + memory: AgentMemoryContext | None = None, + issue_context: IssueContext | None = None, +) -> str: + reviewer_name = format_agent_list(reviewers(config)) + coder_signature = agent_signature(config.coder) + return f"""Plan GitHub issue #{issue_number} in {config.repo}. + +Use this local checkout only to inspect context. Do not edit files, create a +branch, commit, push, or open a pull request during this planning stage. +Write a concise implementation plan covering the intended approach, key files or +areas to change, edge cases, and test strategy. +{_scratch_file_guidance()} +{_issue_context_block(issue_context)} +{_memory_block(memory)} + +Do not wait for {reviewer_name} yourself; this local orchestrator will run +{reviewer_name} to review the plan. End your final response with exactly one +planning marker: + + + +Use blocking here to hand the plan to {reviewer_name} for review. If the issue +is materially ambiguous before a useful plan can be written, ask focused +clarifying questions and end with exactly this marker: + + + +Sign the response as: +-- {coder_signature} +""" + + +def build_plan_review_prompt( + issue_number: int, + round_number: int, + plan: str, + config: AgentLoopConfig, + *, + reviewer: AgentName, + memory: AgentMemoryContext | None = None, + issue_context: IssueContext | None = None, +) -> str: + coder_name = agent_display_name(config.coder) + reviewer_signature = agent_signature(reviewer) + reviewer_group = format_agent_list(reviewers(config)) + return f"""Review the implementation plan for GitHub issue #{issue_number} in {config.repo} (planning round {round_number}). + +Use this local checkout only to inspect context. Do not edit files, create a +branch, commit, push, or open a pull request during this planning review. +{_scratch_file_guidance()} +{_issue_context_block(issue_context)} +{_memory_block(memory)} + +Plan from {coder_name}: + +{plan} + +Review the plan for correctness, architecture fit, missing edge cases, test +strategy, and ambiguity. Use blocking only for issues that should be resolved +before implementation starts. All configured reviewers ({reviewer_group}) must +approve in the same planning round before implementation can proceed. + +End your final response with exactly one planning marker: + + + +or: + + + +Use approved only if there are no blocking plan issues. Always sign your response: +-- {reviewer_signature} +""" + + +def build_plan_revision_prompt( + issue_number: int, + round_number: int, + previous_plan: str, + review: str, + config: AgentLoopConfig, + memory: AgentMemoryContext | None = None, + issue_context: IssueContext | None = None, +) -> str: + reviewer_name = format_agent_list(reviewers(config)) + coder_signature = agent_signature(config.coder) + return f"""{reviewer_name} reviewed the implementation plan for GitHub issue #{issue_number} in {config.repo} and found blocking issues. + +Revise the plan in this local checkout without editing code. Do not create a +branch, commit, push, or open a pull request during this planning stage. +{_scratch_file_guidance()} +{_issue_context_block(issue_context)} +{_memory_block(memory)} + +Previous plan: + +{previous_plan} + +{reviewer_name} plan review: + +{review} + +This is planning round {round_number}. End your final response with exactly one +planning marker: + + + +Use blocking to hand the revised plan back to {reviewer_name}. Sign the response as: +-- {coder_signature} +""" + + +def build_issue_implementation_prompt( + issue_number: int, + approved_plan: str, + config: AgentLoopConfig, + memory: AgentMemoryContext | None = None, + issue_context: IssueContext | None = None, +) -> str: + reviewer_name = format_agent_list(reviewers(config)) + coder_signature = agent_signature(config.coder) + return f"""Implement the approved plan for GitHub issue #{issue_number} in {config.repo}. + +Use this local checkout as your workspace. Create a branch, implement the +approved plan, run relevant tests, commit, push, and open a pull request against +{config.base}. +{_scratch_file_guidance()} +{_issue_context_block(issue_context)} +{_memory_block(memory)} + +Approved implementation plan: + +{approved_plan} + +Do not wait for {reviewer_name} yourself; this local orchestrator will run +{reviewer_name} after you create the PR. In your final response, include the PR +number using exactly this marker: + + + +Also include exactly one state marker: + + + +Use blocking here to hand the PR to {reviewer_name} for review. Sign the response as: +-- {coder_signature} +""" + + def build_task_prompt( task_text: str, config: AgentLoopConfig, diff --git a/src/coding_review_agent_loop/protocol.py b/src/coding_review_agent_loop/protocol.py index 27f9688..705fc9c 100644 --- a/src/coding_review_agent_loop/protocol.py +++ b/src/coding_review_agent_loop/protocol.py @@ -8,6 +8,7 @@ from .errors import AgentLoopError STATE_RE = re.compile(r"", re.I) +PLAN_STATE_RE = re.compile(r"", re.I) PR_RE = re.compile(r"", re.I) GH_PR_URL_RE = re.compile(r"/pull/(\d+)(?:\b|$)") CLARIFY_RE = re.compile(r"", re.I) @@ -44,6 +45,15 @@ def parse_agent_state(text: str) -> str: return matches[-1].lower() +def parse_plan_state(text: str) -> str: + matches = PLAN_STATE_RE.findall(text) + if not matches: + raise AgentLoopError( + "Agent response did not include " + ) + return matches[-1].lower() + + def parse_pr_number(text: str) -> int | None: marker = PR_RE.search(text) if marker: diff --git a/tests/test_agent_loop.py b/tests/test_agent_loop.py index cf3d9a7..ae02558 100644 --- a/tests/test_agent_loop.py +++ b/tests/test_agent_loop.py @@ -169,6 +169,14 @@ def run(self, args, *, cwd, input_text=None, check=True): self.comments.append(cmd[cmd.index("--body") + 1]) return CommandResult(cmd, cwd_path, "", "", 0) + if cmd[:3] == ["gh", "issue", "comment"]: + if "--body-file" in cmd: + body_path = Path(cmd[cmd.index("--body-file") + 1]) + self.comments.append(body_path.read_text(encoding="utf-8")) + elif "--body" in cmd: + self.comments.append(cmd[cmd.index("--body") + 1]) + return CommandResult(cmd, cwd_path, "", "", 0) + if cmd[:3] == ["gh", "issue", "create"]: title = cmd[cmd.index("--title") + 1] if "--body-file" in cmd: @@ -2097,6 +2105,82 @@ def test_issue_loop_rejects_pr_number_before_running_claude(tmp_path): assert not any(cmd[:1] == ["claude"] for cmd, _cwd in runner.commands) +def test_issue_loop_plan_first_stops_after_approved_plan(tmp_path): + runner = FakeRunner( + claude_outputs=[ + "Plan:\n- Update the CLI.\n- Add tests.\n\n-- Anthropic Claude", + ], + codex_outputs=[ + "Plan looks sound.\n\n-- OpenAI Codex", + ], + ) + config = make_config(tmp_path) + + assert run_issue_loop(runner, issue_number=56, config=config, plan_first=True) == 0 + + assert any(cmd[:3] == ["claude", "--print", "--output-format"] for cmd, _cwd in runner.commands) + assert any(cmd[:2] == ["codex", "exec"] for cmd, _cwd in runner.commands) + assert not any(cmd[:3] == ["gh", "pr", "view"] for cmd, _cwd in runner.commands) + assert len(runner.comments) == 3 + assert runner.comments[0].startswith("Plan:") + assert runner.comments[1].startswith("Plan looks sound.") + assert "Outcome: implement" in runner.comments[2] + + +def test_issue_loop_plan_first_revises_until_all_reviewers_approve(tmp_path): + runner = FakeRunner( + claude_outputs=[ + "Initial plan.\n\n-- Anthropic Claude", + "Revised plan with tests.\n\n-- Anthropic Claude", + ], + codex_outputs=[ + "Missing test strategy.\n\n-- OpenAI Codex", + "Plan looks sound.\n\n-- OpenAI Codex", + ], + ) + config = make_config(tmp_path) + + assert run_issue_loop(runner, issue_number=56, config=config, plan_first=True) == 0 + + claude_calls = [cmd for cmd, _cwd in runner.commands if cmd[:1] == ["claude"]] + assert len(claude_calls) == 2 + assert "Missing test strategy" in claude_calls[1][-1] + assert len(runner.comments) == 5 + assert runner.comments[2].startswith("Revised plan") + + +def test_issue_loop_plan_first_can_implement_after_approval(tmp_path): + runner = FakeRunner( + claude_outputs=[ + "Plan:\n- Make the change.\n\n-- Anthropic Claude", + "Implemented approved plan.\n\n\n-- Anthropic Claude", + ], + codex_outputs=[ + "Plan looks sound.\n\n-- OpenAI Codex", + "LGTM.\n\n-- OpenAI Codex", + ], + ) + config = make_config(tmp_path) + + assert ( + run_issue_loop( + runner, + issue_number=56, + config=config, + plan_first=True, + implement_after_approval=True, + ) + == 0 + ) + + claude_calls = [cmd for cmd, _cwd in runner.commands if cmd[:1] == ["claude"]] + assert len(claude_calls) == 2 + assert "Approved implementation plan" in claude_calls[1][-1] + assert len(runner.comments) == 5 + assert runner.comments[3].startswith("Implemented approved plan.") + assert runner.comments[4].startswith("LGTM.") + + def test_is_clarification_request_detects_marker(): assert is_clarification_request("need more info\n") assert is_clarification_request("")