Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions docs/local_agent_loop.md
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,15 @@ by repo and agent:
The tool prints the selected default workdirs. If a default checkout does not
exist, it runs `gh repo clone OWNER/REPO <path>`. If it already exists and is a
clean checkout for the requested repo, it fetches origin and fast-forwards the
configured base branch. If the checkout is dirty, points at another repo, or is
not a git checkout, the command fails clearly instead of overwriting local
work.
configured base branch. Default checkouts are disposable tool-owned temp
workdirs: if one is dirty, the tool logs the cleanup, runs `git reset --hard`
and `git clean -fd` inside that generated repo checkout, then syncs it to the
configured base branch. A default path that points at another repo or is not a
git checkout still fails clearly.

Explicit `--claude-dir`, `--codex-dir`, and `--gemini-dir` workdirs are treated
conservatively. If an explicit git checkout is dirty, the command fails and asks
you to commit, stash, or clean it yourself.

These temporary checkouts may disappear after reboot or `/tmp` cleanup. Large
projects and long-lived agent setups should use explicit persistent workdirs to
Expand Down
32 changes: 27 additions & 5 deletions src/coding_review_agent_loop/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,29 @@ def _run_git(runner: Runner, path: Path, args: tuple[str, ...], *, check: bool =
return runner.run(("git", *args), cwd=path, check=check)


def ensure_explicit_workdir(
path: Path,
*,
option_name: str,
agent: AgentName,
runner: Runner,
) -> None:
ensure_workdir(path, option_name)
if not (path / ".git").exists():
return

git_check = _run_git(runner, path, ("rev-parse", "--is-inside-work-tree"), check=False)
if git_check.returncode != 0 or git_check.stdout.strip() != "true":
return

status = _run_git(runner, path, ("status", "--porcelain")).stdout.strip()
if status:
raise AgentLoopError(
f"Explicit {agent} workdir is dirty: {path}. "
"Commit, stash, or clean it before rerunning."
)


def ensure_temp_checkout(path: Path, *, agent: AgentName, config: AgentLoopConfig, runner: Runner) -> None:
if not path.exists():
try:
Expand Down Expand Up @@ -162,10 +185,9 @@ def ensure_temp_checkout(path: Path, *, agent: AgentName, config: AgentLoopConfi

status = _run_git(runner, path, ("status", "--porcelain")).stdout.strip()
if status:
raise AgentLoopError(
f"Default {agent} workdir is dirty: {path}. "
"Commit, stash, or clean it before rerunning, or pass an explicit agent directory."
)
log(config, f"Cleaning dirty default {agent} workdir before reuse: {path}")
_run_git(runner, path, ("reset", "--hard"))
_run_git(runner, path, ("clean", "-fd"))

_run_git(runner, path, ("fetch", "origin"))
checkout = _run_git(runner, path, ("checkout", config.base), check=False)
Expand All @@ -188,7 +210,7 @@ def ensure_agent_workdirs(config: AgentLoopConfig, runner: Runner) -> None:
log(config, f"Using default {agent} workdir: {path}")
ensure_temp_checkout(path, agent=agent, config=config, runner=runner)
else:
ensure_workdir(path, option)
ensure_explicit_workdir(path, option_name=option, agent=agent, runner=runner)
ensure_distinct_workdirs(config)


Expand Down
14 changes: 14 additions & 0 deletions src/coding_review_agent_loop/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ def _memory_block(memory: AgentMemoryContext | None) -> str:
return f"Agent memory context:\n{text}\n"


SCRATCH_FILE_GUIDANCE = (
"If you need temporary scratch files while inspecting diffs or command output, "
"write them outside the repository checkout, for example under "
"/tmp/coding-review-agent-loop/scratch/. Do not create temporary files in the "
"repo worktree unless they are intended project changes."
)


def build_issue_prompt(
issue_number: int,
config: AgentLoopConfig,
Expand All @@ -38,6 +46,7 @@ def build_issue_prompt(

Use this local checkout as your workspace. Create a branch, implement the fix,
run relevant tests, commit, push, and open a pull request against {config.base}.
{SCRATCH_FILE_GUIDANCE}
{_memory_block(memory)}

Do not wait for {reviewer_name} yourself; this local orchestrator will run {reviewer_name} after
Expand Down Expand Up @@ -69,6 +78,7 @@ def build_task_prompt(
{_memory_block(memory)}

Use this local checkout as your workspace. Decide between two paths:
{SCRATCH_FILE_GUIDANCE}

(a) If the task is clear enough to implement, create a branch, implement the
change, run relevant tests, commit, push, and open a pull request against
Expand Down Expand Up @@ -116,6 +126,7 @@ def build_task_clarification_prompt(
Now proceed. Strongly prefer to implement the task and open a PR. Only ask
again if a critical detail is still missing. Use the same response markers as
before:
{SCRATCH_FILE_GUIDANCE}

- For implementation: include both <!-- AGENT_PR: <number> --> and
<!-- AGENT_STATE: blocking --> at the end of your final response.
Expand Down Expand Up @@ -206,6 +217,7 @@ def build_review_prompt(
Focus on correctness, security, test coverage, and maintainability. Review the
full diff and any existing PR discussion. Do not make code changes in this
review step; report blocking findings if {coder_name} needs to fix anything.
{SCRATCH_FILE_GUIDANCE}
{followup_guidance}
Use blocking only for issues that should prevent merge.
All configured reviewers ({reviewer_group}) must approve in the same round for
Expand Down Expand Up @@ -238,6 +250,7 @@ def build_followup_prompt(
Address the review below in this local checkout. Pull/sync the PR branch if
needed, implement fixes, run relevant tests, commit, and push to the same PR.
Do not create a new PR.
{SCRATCH_FILE_GUIDANCE}
{_memory_block(memory)}

{reviewer_name} review:
Expand Down Expand Up @@ -269,6 +282,7 @@ def build_same_pr_followup_prompt(
Address the follow-up items below in this local checkout. Pull/sync the PR
branch if needed, implement fixes, run relevant tests, commit, and push to the
same PR. Do not create a new PR.
{SCRATCH_FILE_GUIDANCE}
{_memory_block(memory)}

Same-PR follow-ups:
Expand Down
65 changes: 62 additions & 3 deletions tests/test_agent_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,15 @@
default_agent_workdir,
default_cache_root,
)
from coding_review_agent_loop.prompts import (
SCRATCH_FILE_GUIDANCE,
build_followup_prompt,
build_issue_prompt,
build_review_prompt,
build_same_pr_followup_prompt,
build_task_clarification_prompt,
build_task_prompt,
)
from coding_review_agent_loop.protocol import parse_approved_followups, parse_non_blocking_followups


Expand Down Expand Up @@ -625,6 +634,28 @@ def test_review_prompt_includes_pr_metadata_and_suggested_commands(tmp_path):
assert "Use blocking only for issues that should prevent merge." in prompt


def test_agent_prompts_keep_scratch_files_outside_repo_worktree(tmp_path):
config = make_config(tmp_path)

prompts = [
build_issue_prompt(56, config),
build_task_prompt("Fix the bug", config),
build_task_clarification_prompt(
"Fix the bug",
[("Which bug?", "The failing parser bug.")],
config,
),
build_review_prompt(77, 1, config, reviewer="codex"),
build_followup_prompt(77, 2, "Please add a regression test.", config),
build_same_pr_followup_prompt(77, 2, "Please rename the helper.", config),
]

for prompt in prompts:
assert SCRATCH_FILE_GUIDANCE in prompt
assert "/tmp/coding-review-agent-loop/scratch/" in prompt
assert "Do not create temporary files in the repo worktree" in prompt


def test_review_prompt_allows_same_pr_followups_for_fix_modes(tmp_path):
runner = FakeRunner(codex_outputs=["LGTM.\n<!-- AGENT_STATE: approved -->\n-- OpenAI Codex"])
config = make_config(tmp_path, approved_followups="fix-and-summarize")
Expand Down Expand Up @@ -1448,23 +1479,51 @@ def test_clean_existing_auto_agent_dir_is_synced(tmp_path):
assert ["git", "pull", "--ff-only", "origin", "main"] in commands


def test_dirty_existing_auto_agent_dir_fails_clearly(tmp_path):
runner = FakeRunner(git_status=" M file.py\n")
def test_dirty_existing_auto_agent_dir_is_cleaned_before_reuse(tmp_path, capsys):
runner = FakeRunner(
codex_outputs=["LGTM.\n<!-- AGENT_STATE: approved -->\n-- OpenAI Codex"],
git_status=" M file.py\n?? scratch.txt\n",
)
codex_dir = tmp_path / "codex"
codex_dir.mkdir()
config = make_config(
tmp_path,
codex_dir=codex_dir,
reviewer="codex",
auto_agent_dirs=("codex",),
quiet=False,
create_dirs=False,
)
config.claude_dir.mkdir(parents=True)
config.gemini_dir.mkdir(parents=True)

with pytest.raises(AgentLoopError, match="dirty"):
assert run_pr_loop(runner, pr_number=77, config=config) == 0

commands = [cmd for cmd, _cwd in runner.commands]
assert ["git", "reset", "--hard"] in commands
assert ["git", "clean", "-fd"] in commands
assert ["git", "fetch", "origin"] in commands
assert any(cmd[:2] == ["codex", "exec"] for cmd, _cwd in runner.commands)
captured = capsys.readouterr()
assert f"Cleaning dirty default codex workdir before reuse: {codex_dir}" in captured.err


def test_dirty_explicit_agent_dir_still_fails_clearly(tmp_path):
runner = FakeRunner(git_status=" M file.py\n")
codex_dir = tmp_path / "codex"
config = make_config(
tmp_path,
codex_dir=codex_dir,
reviewer="codex",
)
(codex_dir / ".git").mkdir()

with pytest.raises(AgentLoopError, match="Explicit codex workdir is dirty"):
run_pr_loop(runner, pr_number=77, config=config)

commands = [cmd for cmd, _cwd in runner.commands]
assert ["git", "reset", "--hard"] not in commands
assert ["git", "clean", "-fd"] not in commands
assert not any(cmd[:2] == ["codex", "exec"] for cmd, _cwd in runner.commands)


Expand Down
Loading