diff --git a/docs/local_agent_loop.md b/docs/local_agent_loop.md index 2920870..81a5973 100644 --- a/docs/local_agent_loop.md +++ b/docs/local_agent_loop.md @@ -185,9 +185,15 @@ by repo and agent: The tool prints the selected default workdirs. If a default checkout does not exist, it runs `gh repo clone OWNER/REPO `. If it already exists and is a clean checkout for the requested repo, it fetches origin and fast-forwards the -configured base branch. If the checkout is dirty, points at another repo, or is -not a git checkout, the command fails clearly instead of overwriting local -work. +configured base branch. Default checkouts are disposable tool-owned temp +workdirs: if one is dirty, the tool logs the cleanup, runs `git reset --hard` +and `git clean -fd` inside that generated repo checkout, then syncs it to the +configured base branch. A default path that points at another repo or is not a +git checkout still fails clearly. + +Explicit `--claude-dir`, `--codex-dir`, and `--gemini-dir` workdirs are treated +conservatively. If an explicit git checkout is dirty, the command fails and asks +you to commit, stash, or clean it yourself. These temporary checkouts may disappear after reboot or `/tmp` cleanup. Large projects and long-lived agent setups should use explicit persistent workdirs to diff --git a/src/coding_review_agent_loop/config.py b/src/coding_review_agent_loop/config.py index 568abe2..a1ff0f6 100644 --- a/src/coding_review_agent_loop/config.py +++ b/src/coding_review_agent_loop/config.py @@ -132,6 +132,29 @@ def _run_git(runner: Runner, path: Path, args: tuple[str, ...], *, check: bool = return runner.run(("git", *args), cwd=path, check=check) +def ensure_explicit_workdir( + path: Path, + *, + option_name: str, + agent: AgentName, + runner: Runner, +) -> None: + ensure_workdir(path, option_name) + if not (path / ".git").exists(): + return + + git_check = _run_git(runner, path, ("rev-parse", "--is-inside-work-tree"), check=False) + if git_check.returncode != 0 or git_check.stdout.strip() != "true": + return + + status = _run_git(runner, path, ("status", "--porcelain")).stdout.strip() + if status: + raise AgentLoopError( + f"Explicit {agent} workdir is dirty: {path}. " + "Commit, stash, or clean it before rerunning." + ) + + def ensure_temp_checkout(path: Path, *, agent: AgentName, config: AgentLoopConfig, runner: Runner) -> None: if not path.exists(): try: @@ -162,10 +185,9 @@ def ensure_temp_checkout(path: Path, *, agent: AgentName, config: AgentLoopConfi status = _run_git(runner, path, ("status", "--porcelain")).stdout.strip() if status: - raise AgentLoopError( - f"Default {agent} workdir is dirty: {path}. " - "Commit, stash, or clean it before rerunning, or pass an explicit agent directory." - ) + log(config, f"Cleaning dirty default {agent} workdir before reuse: {path}") + _run_git(runner, path, ("reset", "--hard")) + _run_git(runner, path, ("clean", "-fd")) _run_git(runner, path, ("fetch", "origin")) checkout = _run_git(runner, path, ("checkout", config.base), check=False) @@ -188,7 +210,7 @@ def ensure_agent_workdirs(config: AgentLoopConfig, runner: Runner) -> None: log(config, f"Using default {agent} workdir: {path}") ensure_temp_checkout(path, agent=agent, config=config, runner=runner) else: - ensure_workdir(path, option) + ensure_explicit_workdir(path, option_name=option, agent=agent, runner=runner) ensure_distinct_workdirs(config) diff --git a/src/coding_review_agent_loop/prompts.py b/src/coding_review_agent_loop/prompts.py index 4c3fb69..84c458b 100644 --- a/src/coding_review_agent_loop/prompts.py +++ b/src/coding_review_agent_loop/prompts.py @@ -27,6 +27,14 @@ def _memory_block(memory: AgentMemoryContext | None) -> str: return f"Agent memory context:\n{text}\n" +SCRATCH_FILE_GUIDANCE = ( + "If you need temporary scratch files while inspecting diffs or command output, " + "write them outside the repository checkout, for example under " + "/tmp/coding-review-agent-loop/scratch/. Do not create temporary files in the " + "repo worktree unless they are intended project changes." +) + + def build_issue_prompt( issue_number: int, config: AgentLoopConfig, @@ -38,6 +46,7 @@ def build_issue_prompt( Use this local checkout as your workspace. Create a branch, implement the fix, run relevant tests, commit, push, and open a pull request against {config.base}. +{SCRATCH_FILE_GUIDANCE} {_memory_block(memory)} Do not wait for {reviewer_name} yourself; this local orchestrator will run {reviewer_name} after @@ -69,6 +78,7 @@ def build_task_prompt( {_memory_block(memory)} Use this local checkout as your workspace. Decide between two paths: +{SCRATCH_FILE_GUIDANCE} (a) If the task is clear enough to implement, create a branch, implement the change, run relevant tests, commit, push, and open a pull request against @@ -116,6 +126,7 @@ def build_task_clarification_prompt( Now proceed. Strongly prefer to implement the task and open a PR. Only ask again if a critical detail is still missing. Use the same response markers as before: +{SCRATCH_FILE_GUIDANCE} - For implementation: include both and at the end of your final response. @@ -206,6 +217,7 @@ def build_review_prompt( Focus on correctness, security, test coverage, and maintainability. Review the full diff and any existing PR discussion. Do not make code changes in this review step; report blocking findings if {coder_name} needs to fix anything. +{SCRATCH_FILE_GUIDANCE} {followup_guidance} Use blocking only for issues that should prevent merge. All configured reviewers ({reviewer_group}) must approve in the same round for @@ -238,6 +250,7 @@ def build_followup_prompt( Address the review below in this local checkout. Pull/sync the PR branch if needed, implement fixes, run relevant tests, commit, and push to the same PR. Do not create a new PR. +{SCRATCH_FILE_GUIDANCE} {_memory_block(memory)} {reviewer_name} review: @@ -269,6 +282,7 @@ def build_same_pr_followup_prompt( Address the follow-up items below in this local checkout. Pull/sync the PR branch if needed, implement fixes, run relevant tests, commit, and push to the same PR. Do not create a new PR. +{SCRATCH_FILE_GUIDANCE} {_memory_block(memory)} Same-PR follow-ups: diff --git a/tests/test_agent_loop.py b/tests/test_agent_loop.py index 54bb0c3..b6a5371 100644 --- a/tests/test_agent_loop.py +++ b/tests/test_agent_loop.py @@ -25,6 +25,15 @@ default_agent_workdir, default_cache_root, ) +from coding_review_agent_loop.prompts import ( + SCRATCH_FILE_GUIDANCE, + build_followup_prompt, + build_issue_prompt, + build_review_prompt, + build_same_pr_followup_prompt, + build_task_clarification_prompt, + build_task_prompt, +) from coding_review_agent_loop.protocol import parse_approved_followups, parse_non_blocking_followups @@ -625,6 +634,28 @@ def test_review_prompt_includes_pr_metadata_and_suggested_commands(tmp_path): assert "Use blocking only for issues that should prevent merge." in prompt +def test_agent_prompts_keep_scratch_files_outside_repo_worktree(tmp_path): + config = make_config(tmp_path) + + prompts = [ + build_issue_prompt(56, config), + build_task_prompt("Fix the bug", config), + build_task_clarification_prompt( + "Fix the bug", + [("Which bug?", "The failing parser bug.")], + config, + ), + build_review_prompt(77, 1, config, reviewer="codex"), + build_followup_prompt(77, 2, "Please add a regression test.", config), + build_same_pr_followup_prompt(77, 2, "Please rename the helper.", config), + ] + + for prompt in prompts: + assert SCRATCH_FILE_GUIDANCE in prompt + assert "/tmp/coding-review-agent-loop/scratch/" in prompt + assert "Do not create temporary files in the repo worktree" in prompt + + def test_review_prompt_allows_same_pr_followups_for_fix_modes(tmp_path): runner = FakeRunner(codex_outputs=["LGTM.\n\n-- OpenAI Codex"]) config = make_config(tmp_path, approved_followups="fix-and-summarize") @@ -1448,8 +1479,11 @@ def test_clean_existing_auto_agent_dir_is_synced(tmp_path): assert ["git", "pull", "--ff-only", "origin", "main"] in commands -def test_dirty_existing_auto_agent_dir_fails_clearly(tmp_path): - runner = FakeRunner(git_status=" M file.py\n") +def test_dirty_existing_auto_agent_dir_is_cleaned_before_reuse(tmp_path, capsys): + runner = FakeRunner( + codex_outputs=["LGTM.\n\n-- OpenAI Codex"], + git_status=" M file.py\n?? scratch.txt\n", + ) codex_dir = tmp_path / "codex" codex_dir.mkdir() config = make_config( @@ -1457,14 +1491,39 @@ def test_dirty_existing_auto_agent_dir_fails_clearly(tmp_path): codex_dir=codex_dir, reviewer="codex", auto_agent_dirs=("codex",), + quiet=False, create_dirs=False, ) config.claude_dir.mkdir(parents=True) config.gemini_dir.mkdir(parents=True) - with pytest.raises(AgentLoopError, match="dirty"): + assert run_pr_loop(runner, pr_number=77, config=config) == 0 + + commands = [cmd for cmd, _cwd in runner.commands] + assert ["git", "reset", "--hard"] in commands + assert ["git", "clean", "-fd"] in commands + assert ["git", "fetch", "origin"] in commands + assert any(cmd[:2] == ["codex", "exec"] for cmd, _cwd in runner.commands) + captured = capsys.readouterr() + assert f"Cleaning dirty default codex workdir before reuse: {codex_dir}" in captured.err + + +def test_dirty_explicit_agent_dir_still_fails_clearly(tmp_path): + runner = FakeRunner(git_status=" M file.py\n") + codex_dir = tmp_path / "codex" + config = make_config( + tmp_path, + codex_dir=codex_dir, + reviewer="codex", + ) + (codex_dir / ".git").mkdir() + + with pytest.raises(AgentLoopError, match="Explicit codex workdir is dirty"): run_pr_loop(runner, pr_number=77, config=config) + commands = [cmd for cmd, _cwd in runner.commands] + assert ["git", "reset", "--hard"] not in commands + assert ["git", "clean", "-fd"] not in commands assert not any(cmd[:2] == ["codex", "exec"] for cmd, _cwd in runner.commands)