From 79938d3bd12708640e0af6ad4b4ed098b42136d8 Mon Sep 17 00:00:00 2001 From: Wild Wind Date: Sat, 2 May 2026 19:08:59 -0700 Subject: [PATCH 1/2] Use response files for public agent output --- docs/local_agent_loop.md | 2 +- src/coding_review_agent_loop/agents/base.py | 44 +++++++++++++++++++ src/coding_review_agent_loop/agents/claude.py | 15 +++++-- src/coding_review_agent_loop/agents/gemini.py | 17 +++++-- tests/test_agent_loop.py | 39 ++++++++++++++++ 5 files changed, 108 insertions(+), 9 deletions(-) diff --git a/docs/local_agent_loop.md b/docs/local_agent_loop.md index 8b8f7f3..a8c7e5a 100644 --- a/docs/local_agent_loop.md +++ b/docs/local_agent_loop.md @@ -339,7 +339,7 @@ agent-loop issue 56 \ --gemini-arg=--approval-mode --gemini-arg=auto_edit ``` -Providing any `--claude-arg`, `--codex-arg`, or `--gemini-arg` replaces that agent's default entirely. Gemini's text output is used directly. If you pass `--gemini-arg=--output-format --gemini-arg=json`, the loop extracts the JSON `response` field before parsing markers. +Providing any `--claude-arg`, `--codex-arg`, or `--gemini-arg` replaces that agent's default entirely. Claude and Gemini prompts include a tool-owned response-file path under `/tmp/coding-review-agent-loop/responses/`; when the file exists and is non-empty, the loop posts that file instead of stdout so CLI diagnostics and tool narration do not leak into GitHub comments. Gemini still supports stdout marker filtering as a fallback. If you pass `--gemini-arg=--output-format --gemini-arg=json`, the loop extracts the JSON `response` field before parsing markers when no response file was written. ## Protocol diff --git a/src/coding_review_agent_loop/agents/base.py b/src/coding_review_agent_loop/agents/base.py index 0292e4a..f711afd 100644 --- a/src/coding_review_agent_loop/agents/base.py +++ b/src/coding_review_agent_loop/agents/base.py @@ -4,6 +4,8 @@ from dataclasses import dataclass from pathlib import Path +import tempfile +import uuid from typing import TYPE_CHECKING, Literal, Protocol from ..runner import Runner @@ -36,3 +38,45 @@ def run( prompt: str, session_id: str | None = None, ) -> AgentResult: ... + + +def _safe_repo_slug(repo: str) -> str: + return repo.replace("/", "-").replace(":", "-") + + +def public_response_path(config: AgentLoopConfig, agent: AgentName) -> Path: + path = ( + Path(tempfile.gettempdir()) + / "coding-review-agent-loop" + / "responses" + / _safe_repo_slug(config.repo) + / agent + / f"{uuid.uuid4().hex}.md" + ) + path.parent.mkdir(parents=True, exist_ok=True) + return path + + +def with_public_response_file_instruction(prompt: str, response_path: Path) -> str: + return f"""{prompt} + +PUBLIC RESPONSE FILE: + +Write the final public response that should be posted to GitHub to this file: + +{response_path} + +The orchestrator will post only that file's contents when it exists and is +non-empty. Keep internal tool narration, planning notes, diagnostics, and +scratch output out of that file. Include the required AGENT_STATE / AGENT_PR / +AGENT_CLARIFY markers in the file, as requested above. +""" + + +def read_public_response_file(response_path: Path) -> str | None: + try: + text = response_path.read_text(encoding="utf-8") + except FileNotFoundError: + return None + text = text.strip() + return text or None diff --git a/src/coding_review_agent_loop/agents/claude.py b/src/coding_review_agent_loop/agents/claude.py index dad2318..888c39d 100644 --- a/src/coding_review_agent_loop/agents/claude.py +++ b/src/coding_review_agent_loop/agents/claude.py @@ -6,7 +6,13 @@ from pathlib import Path from typing import TYPE_CHECKING -from .base import AgentName, AgentResult +from .base import ( + AgentName, + AgentResult, + public_response_path, + read_public_response_file, + with_public_response_file_instruction, +) from ..logging import agent_log_path, log from ..runner import Runner @@ -46,12 +52,13 @@ def run( prompt: str, session_id: str | None = None, ) -> AgentResult: + response_path = public_response_path(config, "claude") args = [config.claude_cmd, "--print", "--output-format", "json", *config.claude_args] if session_id: args += ["--resume", session_id] - args.append(prompt) + args.append(with_public_response_file_instruction(prompt, response_path)) log_path = agent_log_path(config, "claude") - log(config, f"Starting Claude in {config.claude_dir}; log: {log_path}") + log(config, f"Starting Claude in {config.claude_dir}; log: {log_path}; response: {response_path}") result = runner.run_with_log( args, cwd=config.claude_dir, @@ -61,7 +68,7 @@ def run( ) log(config, f"Claude finished; log: {log_path}") text, new_session_id = _parse_claude_output(result.stdout) - return AgentResult(text=text, session_id=new_session_id) + return AgentResult(text=read_public_response_file(response_path) or text, session_id=new_session_id) BACKEND = ClaudeBackend() diff --git a/src/coding_review_agent_loop/agents/gemini.py b/src/coding_review_agent_loop/agents/gemini.py index 9d9627f..9174e21 100644 --- a/src/coding_review_agent_loop/agents/gemini.py +++ b/src/coding_review_agent_loop/agents/gemini.py @@ -6,7 +6,13 @@ from pathlib import Path from typing import TYPE_CHECKING -from .base import AgentName, AgentResult +from .base import ( + AgentName, + AgentResult, + public_response_path, + read_public_response_file, + with_public_response_file_instruction, +) from ..logging import agent_log_path, log from ..protocol import CLARIFY_RE, STATE_RE from ..runner import Runner @@ -94,12 +100,15 @@ def run( prompt: str, session_id: str | None = None, ) -> AgentResult: + response_path = public_response_path(config, "gemini") log_path = agent_log_path(config, "gemini") - log(config, f"Starting Gemini in {config.gemini_dir}; log: {log_path}") + log(config, f"Starting Gemini in {config.gemini_dir}; log: {log_path}; response: {response_path}") args = [ config.gemini_cmd, "--prompt", - _with_public_response_marker_instruction(prompt), + _with_public_response_marker_instruction( + with_public_response_file_instruction(prompt, response_path) + ), *config.gemini_args, ] if session_id: @@ -113,7 +122,7 @@ def run( ) log(config, f"Gemini finished; log: {log_path}") text, new_session_id = _parse_gemini_output(result.stdout) - return AgentResult(text=text, session_id=new_session_id) + return AgentResult(text=read_public_response_file(response_path) or text, session_id=new_session_id) BACKEND = GeminiBackend() diff --git a/tests/test_agent_loop.py b/tests/test_agent_loop.py index 89567f1..8019311 100644 --- a/tests/test_agent_loop.py +++ b/tests/test_agent_loop.py @@ -1,4 +1,5 @@ import json +import re from pathlib import Path import pytest @@ -46,6 +47,7 @@ def __init__( diff_returncode=0, diff_stderr="", issue_urls=None, + public_response_outputs=None, ): super().__init__(dry_run=False) self.claude_outputs = list(claude_outputs or []) @@ -83,6 +85,7 @@ def __init__( self.diff_returncode = diff_returncode self.diff_stderr = diff_stderr self.issue_urls = list(issue_urls) if issue_urls is not None else None + self.public_response_outputs = list(public_response_outputs or []) def _record_command(self, args, cwd): cmd = [str(arg) for arg in args] @@ -92,6 +95,17 @@ def _record_command(self, args, cwd): self.commands.append((cmd, cwd_path)) return cmd, cwd_path + def _maybe_write_public_response_file(self, cmd): + if not self.public_response_outputs: + return + prompt = "\n".join(cmd) + match = re.search(r"Write the final public response.*?\n\n([^\n]+/responses/[^\n]+\.md)", prompt, re.S) + if not match: + return + response_path = Path(match.group(1)) + response_path.parent.mkdir(parents=True, exist_ok=True) + response_path.write_text(self.public_response_outputs.pop(0), encoding="utf-8") + def run_with_log( self, args, @@ -108,6 +122,7 @@ def run_with_log( if cmd[:1] == ["claude"]: output = self.claude_outputs.pop(0) + self._maybe_write_public_response_file(cmd) log_path.write_text(f"$ {' '.join(cmd)}\n\n{output}", encoding="utf-8") return CommandResult(cmd, cwd_path, output, "", 0) @@ -121,6 +136,7 @@ def run_with_log( if cmd[:1] == ["gemini"]: output = self.gemini_outputs.pop(0) + self._maybe_write_public_response_file(cmd) log_path.write_text(f"$ {' '.join(cmd)}\n\n{output}", encoding="utf-8") return CommandResult(cmd, cwd_path, output, "", 0) @@ -2230,6 +2246,29 @@ def test_gemini_review_loop_uses_prompt_and_extra_args(tmp_path): assert runner.comments == ["LGTM.\n\n-- Google Gemini"] +def test_gemini_review_loop_prefers_public_response_file_over_stdout(tmp_path): + runner = FakeRunner( + gemini_outputs=[ + "Warning: True color (24-bit) support not detected.\n" + "YOLO mode is enabled. All tool calls will be automatically approved.\n" + "I will fetch the PR and inspect the diff.\n" + "Error executing tool run_shell_command: confirmation required.\n" + "This stdout chatter should not be posted.\n", + ], + public_response_outputs=[ + "LGTM from response file.\n\n-- Google Gemini", + ], + ) + config = make_config(tmp_path, reviewer="gemini") + + assert run_pr_loop(runner, pr_number=77, config=config) == 0 + + gemini_call = next(cmd for cmd, _cwd in runner.commands if cmd[:1] == ["gemini"]) + assert "PUBLIC RESPONSE FILE:" in gemini_call[2] + assert "/coding-review-agent-loop/responses/OWNER-REPO/gemini/" in gemini_call[2] + assert runner.comments == ["LGTM from response file.\n\n-- Google Gemini"] + + def test_codex_task_loop_rejects_empty_task_text(tmp_path): runner = FakeRunner() config = make_config(tmp_path, coder="codex", reviewer="claude") From 6f5c67abd018b5c80080f3df595315fe2ce5627f Mon Sep 17 00:00:00 2001 From: Wild Wind Date: Sat, 2 May 2026 19:18:36 -0700 Subject: [PATCH 2/2] Add Claude public response regression test --- tests/test_agent_loop.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/test_agent_loop.py b/tests/test_agent_loop.py index 8019311..1c92b3c 100644 --- a/tests/test_agent_loop.py +++ b/tests/test_agent_loop.py @@ -2269,6 +2269,33 @@ def test_gemini_review_loop_prefers_public_response_file_over_stdout(tmp_path): assert runner.comments == ["LGTM from response file.\n\n-- Google Gemini"] +def test_claude_review_loop_prefers_public_response_file_over_stdout(tmp_path): + runner = FakeRunner( + claude_outputs=[ + json.dumps( + { + "result": ( + "I will inspect the PR diff.\n" + "Tool output chatter should not be posted.\n" + ), + "session_id": "claude-session-1", + } + ), + ], + public_response_outputs=[ + "LGTM from response file.\n\n-- Anthropic Claude", + ], + ) + config = make_config(tmp_path, reviewer="claude") + + assert run_pr_loop(runner, pr_number=77, config=config) == 0 + + claude_call = next(cmd for cmd, _cwd in runner.commands if cmd[:1] == ["claude"]) + assert "PUBLIC RESPONSE FILE:" in claude_call[-1] + assert "/coding-review-agent-loop/responses/OWNER-REPO/claude/" in claude_call[-1] + assert runner.comments == ["LGTM from response file.\n\n-- Anthropic Claude"] + + def test_codex_task_loop_rejects_empty_task_text(tmp_path): runner = FakeRunner() config = make_config(tmp_path, coder="codex", reviewer="claude")