Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/local_agent_loop.md
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ agent-loop issue 56 \
--gemini-arg=--approval-mode --gemini-arg=auto_edit
```

Providing any `--claude-arg`, `--codex-arg`, or `--gemini-arg` replaces that agent's default entirely. Gemini's text output is used directly. If you pass `--gemini-arg=--output-format --gemini-arg=json`, the loop extracts the JSON `response` field before parsing markers.
Providing any `--claude-arg`, `--codex-arg`, or `--gemini-arg` replaces that agent's default entirely. Claude and Gemini prompts include a tool-owned response-file path under `/tmp/coding-review-agent-loop/responses/`; when the file exists and is non-empty, the loop posts that file instead of stdout so CLI diagnostics and tool narration do not leak into GitHub comments. Gemini still supports stdout marker filtering as a fallback. If you pass `--gemini-arg=--output-format --gemini-arg=json`, the loop extracts the JSON `response` field before parsing markers when no response file was written.

## Protocol

Expand Down
44 changes: 44 additions & 0 deletions src/coding_review_agent_loop/agents/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from dataclasses import dataclass
from pathlib import Path
import tempfile
import uuid
from typing import TYPE_CHECKING, Literal, Protocol

from ..runner import Runner
Expand Down Expand Up @@ -36,3 +38,45 @@ def run(
prompt: str,
session_id: str | None = None,
) -> AgentResult: ...


def _safe_repo_slug(repo: str) -> str:
return repo.replace("/", "-").replace(":", "-")


def public_response_path(config: AgentLoopConfig, agent: AgentName) -> Path:
path = (
Path(tempfile.gettempdir())
/ "coding-review-agent-loop"
/ "responses"
/ _safe_repo_slug(config.repo)
/ agent
/ f"{uuid.uuid4().hex}.md"
)
path.parent.mkdir(parents=True, exist_ok=True)
return path


def with_public_response_file_instruction(prompt: str, response_path: Path) -> str:
return f"""{prompt}

PUBLIC RESPONSE FILE:

Write the final public response that should be posted to GitHub to this file:

{response_path}

The orchestrator will post only that file's contents when it exists and is
non-empty. Keep internal tool narration, planning notes, diagnostics, and
scratch output out of that file. Include the required AGENT_STATE / AGENT_PR /
AGENT_CLARIFY markers in the file, as requested above.
"""


def read_public_response_file(response_path: Path) -> str | None:
try:
text = response_path.read_text(encoding="utf-8")
except FileNotFoundError:
return None
text = text.strip()
return text or None
15 changes: 11 additions & 4 deletions src/coding_review_agent_loop/agents/claude.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@
from pathlib import Path
from typing import TYPE_CHECKING

from .base import AgentName, AgentResult
from .base import (
AgentName,
AgentResult,
public_response_path,
read_public_response_file,
with_public_response_file_instruction,
)
from ..logging import agent_log_path, log
from ..runner import Runner

Expand Down Expand Up @@ -46,12 +52,13 @@ def run(
prompt: str,
session_id: str | None = None,
) -> AgentResult:
response_path = public_response_path(config, "claude")
args = [config.claude_cmd, "--print", "--output-format", "json", *config.claude_args]
if session_id:
args += ["--resume", session_id]
args.append(prompt)
args.append(with_public_response_file_instruction(prompt, response_path))
log_path = agent_log_path(config, "claude")
log(config, f"Starting Claude in {config.claude_dir}; log: {log_path}")
log(config, f"Starting Claude in {config.claude_dir}; log: {log_path}; response: {response_path}")
result = runner.run_with_log(
args,
cwd=config.claude_dir,
Expand All @@ -61,7 +68,7 @@ def run(
)
log(config, f"Claude finished; log: {log_path}")
text, new_session_id = _parse_claude_output(result.stdout)
return AgentResult(text=text, session_id=new_session_id)
return AgentResult(text=read_public_response_file(response_path) or text, session_id=new_session_id)


BACKEND = ClaudeBackend()
17 changes: 13 additions & 4 deletions src/coding_review_agent_loop/agents/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@
from pathlib import Path
from typing import TYPE_CHECKING

from .base import AgentName, AgentResult
from .base import (
AgentName,
AgentResult,
public_response_path,
read_public_response_file,
with_public_response_file_instruction,
)
from ..logging import agent_log_path, log
from ..protocol import CLARIFY_RE, STATE_RE
from ..runner import Runner
Expand Down Expand Up @@ -94,12 +100,15 @@ def run(
prompt: str,
session_id: str | None = None,
) -> AgentResult:
response_path = public_response_path(config, "gemini")
log_path = agent_log_path(config, "gemini")
log(config, f"Starting Gemini in {config.gemini_dir}; log: {log_path}")
log(config, f"Starting Gemini in {config.gemini_dir}; log: {log_path}; response: {response_path}")
args = [
config.gemini_cmd,
"--prompt",
_with_public_response_marker_instruction(prompt),
_with_public_response_marker_instruction(
with_public_response_file_instruction(prompt, response_path)
),
*config.gemini_args,
]
if session_id:
Expand All @@ -113,7 +122,7 @@ def run(
)
log(config, f"Gemini finished; log: {log_path}")
text, new_session_id = _parse_gemini_output(result.stdout)
return AgentResult(text=text, session_id=new_session_id)
return AgentResult(text=read_public_response_file(response_path) or text, session_id=new_session_id)


BACKEND = GeminiBackend()
66 changes: 66 additions & 0 deletions tests/test_agent_loop.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import re
from pathlib import Path

import pytest
Expand Down Expand Up @@ -46,6 +47,7 @@ def __init__(
diff_returncode=0,
diff_stderr="",
issue_urls=None,
public_response_outputs=None,
):
super().__init__(dry_run=False)
self.claude_outputs = list(claude_outputs or [])
Expand Down Expand Up @@ -83,6 +85,7 @@ def __init__(
self.diff_returncode = diff_returncode
self.diff_stderr = diff_stderr
self.issue_urls = list(issue_urls) if issue_urls is not None else None
self.public_response_outputs = list(public_response_outputs or [])

def _record_command(self, args, cwd):
cmd = [str(arg) for arg in args]
Expand All @@ -92,6 +95,17 @@ def _record_command(self, args, cwd):
self.commands.append((cmd, cwd_path))
return cmd, cwd_path

def _maybe_write_public_response_file(self, cmd):
if not self.public_response_outputs:
return
prompt = "\n".join(cmd)
match = re.search(r"Write the final public response.*?\n\n([^\n]+/responses/[^\n]+\.md)", prompt, re.S)
if not match:
return
response_path = Path(match.group(1))
response_path.parent.mkdir(parents=True, exist_ok=True)
response_path.write_text(self.public_response_outputs.pop(0), encoding="utf-8")

def run_with_log(
self,
args,
Expand All @@ -108,6 +122,7 @@ def run_with_log(

if cmd[:1] == ["claude"]:
output = self.claude_outputs.pop(0)
self._maybe_write_public_response_file(cmd)
log_path.write_text(f"$ {' '.join(cmd)}\n\n{output}", encoding="utf-8")
return CommandResult(cmd, cwd_path, output, "", 0)

Expand All @@ -121,6 +136,7 @@ def run_with_log(

if cmd[:1] == ["gemini"]:
output = self.gemini_outputs.pop(0)
self._maybe_write_public_response_file(cmd)
log_path.write_text(f"$ {' '.join(cmd)}\n\n{output}", encoding="utf-8")
return CommandResult(cmd, cwd_path, output, "", 0)

Expand Down Expand Up @@ -2230,6 +2246,56 @@ def test_gemini_review_loop_uses_prompt_and_extra_args(tmp_path):
assert runner.comments == ["LGTM.\n<!-- AGENT_STATE: approved -->\n-- Google Gemini"]


def test_gemini_review_loop_prefers_public_response_file_over_stdout(tmp_path):
runner = FakeRunner(
gemini_outputs=[
"Warning: True color (24-bit) support not detected.\n"
"YOLO mode is enabled. All tool calls will be automatically approved.\n"
"I will fetch the PR and inspect the diff.\n"
"Error executing tool run_shell_command: confirmation required.\n"
"This stdout chatter should not be posted.\n",
],
public_response_outputs=[
"LGTM from response file.\n<!-- AGENT_STATE: approved -->\n-- Google Gemini",
],
)
config = make_config(tmp_path, reviewer="gemini")

assert run_pr_loop(runner, pr_number=77, config=config) == 0

gemini_call = next(cmd for cmd, _cwd in runner.commands if cmd[:1] == ["gemini"])
assert "PUBLIC RESPONSE FILE:" in gemini_call[2]
assert "/coding-review-agent-loop/responses/OWNER-REPO/gemini/" in gemini_call[2]
assert runner.comments == ["LGTM from response file.\n<!-- AGENT_STATE: approved -->\n-- Google Gemini"]


def test_claude_review_loop_prefers_public_response_file_over_stdout(tmp_path):
runner = FakeRunner(
claude_outputs=[
json.dumps(
{
"result": (
"I will inspect the PR diff.\n"
"Tool output chatter should not be posted.\n"
),
"session_id": "claude-session-1",
}
),
],
public_response_outputs=[
"LGTM from response file.\n<!-- AGENT_STATE: approved -->\n-- Anthropic Claude",
],
)
config = make_config(tmp_path, reviewer="claude")

assert run_pr_loop(runner, pr_number=77, config=config) == 0

claude_call = next(cmd for cmd, _cwd in runner.commands if cmd[:1] == ["claude"])
assert "PUBLIC RESPONSE FILE:" in claude_call[-1]
assert "/coding-review-agent-loop/responses/OWNER-REPO/claude/" in claude_call[-1]
assert runner.comments == ["LGTM from response file.\n<!-- AGENT_STATE: approved -->\n-- Anthropic Claude"]


def test_codex_task_loop_rejects_empty_task_text(tmp_path):
runner = FakeRunner()
config = make_config(tmp_path, coder="codex", reviewer="claude")
Expand Down
Loading