From 2efbb3cf62343e288768f5df13a039cbf84eeb96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 23 May 2026 17:33:03 +0200 Subject: [PATCH 01/65] Add tools/ refactor plan (WIP) --- .project/tools-refactor-plan.md | 693 ++++++++++++++++++++++++++++++++ 1 file changed, 693 insertions(+) create mode 100644 .project/tools-refactor-plan.md diff --git a/.project/tools-refactor-plan.md b/.project/tools-refactor-plan.md new file mode 100644 index 0000000..3001d75 --- /dev/null +++ b/.project/tools-refactor-plan.md @@ -0,0 +1,693 @@ +# Plan: Refactor `tools/` Directory Structure + +**Status:** Draft +**Date:** 2026-05-23 +**Target:** `tools/run-agent.py`, `tools/events/`, all `tools/*.py` standalone scripts +**Risk Level:** Medium (large structural refactor, all phase targets affected) + +--- + +## 1. Executive Summary + +`tools/run-agent.py` has grown to **5,876 lines** with 10+ distinct concerns in a single file. The `events/` sub-package is cleanly separated but tightly coupled back to `run-agent.py` via a runtime callable-injection pattern. Six standalone finding-management scripts each duplicate frontmatter parsing, path resolution, and finding-id regex. + +This plan proposes a five-phase refactor that splits the monolith into focused packages (`codecome/`, `rendering/`, `chat/`, `findings/`), extracts shared infrastructure (`events/base.py`), and consolidates the standalone scripts into a `findings/` package — all while keeping the entry-point behavior and Makefile targets unchanged until the final phase. + +**Lines of code affected:** ~8,600 (all of `run-agent.py` + the events package + 6 finding scripts). No changes to `opencode/serve.py`, `sandbox-bootstrap.py`, `gate-check.py`, `run-sweep.py`, `check-frontmatter.py`, `_colors.py`, mock tools, or recording tools. + +--- + +## 2. Current Architecture — Full Inventory + +### 2.1 File size breakdown + +``` +tools/ +├── run-agent.py 5,876 ← MONOLITH (58% of total) +├── events/ +│ ├── __init__.py 393 ← EventLoop orchestrator +│ ├── chat_loop.py 392 ← ChatEventLoop (multi-turn) +│ ├── state_tracker.py 203 ← Delta → finalized parts +│ ├── sse_client.py 200 ← SSE stream + reconnect +│ └── emitters.py 32 ← Callable bridge (2-line function) +├── opencode/ +│ ├── serve.py 333 ← ServerRunner lifecycle +│ └── __init__.py 23 +├── _colors.py 163 ← ANSI codes (shared) +├── codecome.py 469 ← Workspace validation CLI +├── gate-check.py 339 ← Phase readiness gates +├── run-sweep.py 214 ← Batch file sweeps +├── sandbox-bootstrap.py 389 ← Sandbox setup/validation +├── create-finding.py 201 ← Finding from template +├── move-finding.py 186 ← Status directory mover +├── create-evidence.py 99 ← Evidence README bootstrap +├── package-finding.py 153 ← Zip bundle +├── list-findings.py 198 ← Listing with filters +├── render-report.py 494 ← Markdown report +├── render-index.py 157 ← itemdb/index.md +├── check-frontmatter.py 138 ← Frontmatter validation +├── list-risk-files.py 75 ← Risk file listing +├── script-to-asciinema.py 76 ← Cast → GIF +├── mock-llm-server.py 180 ← Mock LLM for tests +├── mock-llm-parity.py 162 ← Mock parity checker +└── mock_llm_scripts/ 6 JSON files +``` + +### 2.2 `run-agent.py` internal structure + +The 5,876-line file contains these concerns, in file order: + +| Lines | Concern | Functions/Classes | +|---|---|---| +| 1–103 | Imports, debug logging, version check | `check_opencode_version`, `_chat_debug`, `_setup_chat_debug` | +| 104–438 | Model resolution | `_scan_event_for_model`, `_discover_opencode_default_model`, `_probe_effective_model`, `_read_codecome_yml_agent`, `resolve_model_and_variant` | +| 440–520 | Prompt loading | `resolve_color_mode`, `build_console`, `load_prompt` | +| 523–675 | Todo rendering | `extract_todos`, `_todo_summary`, `render_todowrite_rich/plain` | +| 678–732 | Permission errors + tunables | `render_permission_error_rich/plain`, ~30 env var config knobs | +| 733–1031 | File cache + utilities | `_SNAPSHOT_CACHE`, `_relativize_path`, `_detect_lexer`, `_compute_diff`, `_cache_set/get/reread`, `_strip_read_framing`, `_classify_internal_read` | +| 1035–1156 | Read tool renderer | `render_read_rich`, `render_read_plain` | +| 1161–1270 | Write tool renderer | `render_write_rich`, `render_write_plain` | +| 1288–1381 | Edit tool renderer | `render_edit_rich`, `render_edit_plain` | +| 1386–1650 | Apply-patch renderer | `_ParsedFilePatch`, `_parse_apply_patch_envelope`, `_extract_apply_patch_payload`, `render_apply_patch_rich/plain` | +| 1655–1750 | Glob renderer | `_parse_glob_output`, `render_glob_rich/plain` | +| 1755–2059 | Grep renderer | `_grep_compile_pattern`, `_grep_format_line_rich/plain`, `_parse_grep_output`, `render_grep_rich/plain` | +| 2064–2120 | Bash renderer | `render_bash_rich/plain` | +| 2123–2923 | Sandbox-bootstrap sub-renderer | `_is_sandbox_bootstrap_json_call`, `_sandbox_payload_matches`, `_maybe_render_sandbox_bootstrap`, 12 `_render_sandbox_*` functions | +| 2925–3518 | Bash-shim sub-renderer | `_BashShim`, `_is_bash_shim_call`, parsers for `cat`/`head`/`tail`/`rg`/`grep`/`ls`/`find`/`tree`/`rtk`, normalizers, `_maybe_render_bash_shim` | +| 3521–3610 | Task + Skill renderers | `render_task_rich/plain`, `render_skill_rich/plain` | +| 3612–3720 | Tool dispatch | `_dispatch_tool_renderer` (10-tool if/elif chain) | +| 3723–4105 | Event renderers | `render_step_start`, `render_text`, `render_reasoning`, `render_tool_use`, `render_step_finish`, `render_error`, `render_session_status`, `render_subagent_status`, `render_message_updated`, `render_event` dispatcher | +| 4107–4213 | CLI parser | `build_parser` | +| 4234–4450 | Thinking + resume logic | `_resolve_thinking_decision`, `_build_phase_resume_prompt`, `_build_frontmatter_resume_prompt`, `_build_resume_command` | +| 4453–4556 | Graceful completion | `check_phase_graceful_completion`, `_exploitation_status_looks_real` | +| 4557–4783 | Session lifecycle + run | `_create_session`, `_create_chat_session`, `_send_prompt_to_session`, `_consume_events`, `_run_single_attempt` | +| 4786–4822 | Model table display | `show_model_table` | +| 4833–5511 | Chat TUI | `TextualConsoleProxy`, `_ChatApp`, `_QuitScreen`, `_run_chat_mode` | +| 5514–5876 | `main()` entry point | Orchestration: server start, attempt loop, retry/resume logic, frontmatter repair, exit handling | + +### 2.3 `events/` package structure + +``` +events/ +├── __init__.py EventLoop — Phase runner orchestrator +│ Uses: SseClient, StateTracker, emit_event +│ Called from: run-agent.py._consume_events() +│ Callback to: run-agent.py.render_event() via render_fn parameter +│ +├── chat_loop.py ChatEventLoop — Multi-turn chat consumer +│ Uses: SseClient, StateTracker, emit_event +│ Called from: run-agent.py._ChatApp (Textual TUI) +│ Duplicates: permission handling, session sync, idle detection, dedup +│ +├── sse_client.py SseClient — Raw SSE stream with reconnection +│ Dependency-free (only stdlib) +│ +├── state_tracker.py StateTracker — Delta → finalized part translation +│ Dependency-free (only stdlib) +│ +└── emitters.py emit_event() — 2-line callable bridge + Purpose: avoid circular import (events/ → run-agent.py) +``` + +**Key coupling:** `EventLoop.run(render_fn)` and `ChatEventLoop.start_consumer(render_fn)` both accept `run-agent.py.render_event` as a parameter. The `emitters.py` module simply calls `render_fn(console, phase, label, event)`. This is a runtime dependency inversion to break the compile-time cycle. + +### 2.4 Finding management scripts — duplication catalog + +All six scripts duplicate these patterns: + +```python +# Duplicated in 6 files: +sys.path.insert(0, str(Path(__file__).resolve().parent)) +import _colors as C +ROOT = Path(__file__).resolve().parents[1] +FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) +FINDING_ID_RE = re.compile(r"\bCC-(\d{4,})\b") +FINDINGS_ROOT = ROOT / "itemdb" / "findings" + +# Duplicated in 4 files: +def load_frontmatter(path: Path) -> dict: + # 15 lines of identical YAML frontmatter extraction +``` + +--- + +## 3. Problems Catalog + +### P1: Monolith — 10+ concerns in one file +`run-agent.py` is 5,876 lines. Rendering, orchestration, model resolution, chat TUI, and CLI parsing have no module boundaries. Any change to a renderer risks merge conflicts with TUI changes. + +### P2: Circular dependency via callable injection +`run-agent.py` imports `events/EventLoop`, which calls back into `run-agent.py.render_event()` via a `render_fn: Callable` parameter. This means neither module can be understood in isolation, and both must be loaded for any test or analysis. + +### P3: `_rich` / `_plain` boilerplate +Every one of the 10 tool renderers has two near-identical functions (~1,300 lines total). The dispatch site repeats `if HAVE_RICH: render_X_rich(...) else: render_X_plain(...)` 10 times in `_dispatch_tool_renderer`. + +### P4: EventLoop / ChatEventLoop duplication +Both classes independently implement: +- Permission auto-reject via `POST /permission/{id}/reply` +- Session message sync via `GET /session/{id}/message` +- `_belongs_to_session(event)` filtering +- `_is_session_idle(event)` detection +- Message deduplication via `_seen_message_ids` + `_emitted_signatures` + +This is ~250 lines of duplicated logic. + +### P5: Implicit bash-renderer dispatch chain +The `bash` tool case in `_dispatch_tool_renderer` has a hardcoded cascade: + +```python +elif tool_lower == "bash": + _cache_invalidate_stale() + if _maybe_render_sandbox_bootstrap(console, state): return True + if _maybe_render_bash_shim(console, state): return True + if HAVE_RICH: return render_bash_rich(console, state) + else: return render_bash_plain(state) +``` + +Adding a new interceptor requires editing `_dispatch_tool_renderer` — the chain is implicit. + +### P6: Isolated finding scripts with duplicated infrastructure +Six separate `tools/*.py` files each re-implement `load_frontmatter`, path normalization, `FINDING_ID_RE`, and `sys.path` manipulation. They share no module. + +### P7: Model resolution is a cross-cutting tangle +`resolve_model_and_variant()` touches CLI args (`_extract_flag_value`), env vars (`CODECOME_MODEL`), YAML config (`codecome.yml`), the opencode SQLite DB, and runtime probe sessions. It has 4 different source-of-truth formats and lives in the same file as the Textual TUI. + +--- + +## 4. Target Architecture + +``` +tools/ +├── _colors.py # unchanged +│ +├── codecome/ # NEW: Core runner package +│ ├── __init__.py +│ ├── cli.py # main(), build_parser(), show_model_table() +│ ├── config.py # resolve_model_and_variant(), load_prompt(), +│ │ # resolve_color_mode(), build_console(), +│ │ # _resolve_thinking_decision(), truthy_env() +│ ├── runner.py # _run_single_attempt(), _consume_events(), +│ │ # retry loop, resume prompt builders +│ ├── session.py # _create_session(), _create_chat_session(), +│ │ # _send_prompt_to_session(), _get_headers() +│ ├── graceful.py # check_phase_graceful_completion(), +│ │ # _phase_checklist_lines(), _phase1_required_artifacts() +│ └── version.py # check_opencode_version() +│ +├── rendering/ # NEW: Tool rendering package +│ ├── __init__.py # Public API surface +│ ├── registry.py # _dispatch_tool_renderer() → chain-of-responsibility +│ ├── cache.py # _SNAPSHOT_CACHE, _cache_set/get/reread/invalidate_stale +│ ├── utils.py # _relativize_path(), _detect_lexer(), +│ │ # _count_lines_and_bytes(), _compute_diff(), +│ │ # _truncate_diff(), _strip_line_numbers(), +│ │ # _format_excerpt(), _is_likely_error(), +│ │ # _strip_read_framing(), _classify_internal_read(), +│ │ # _current_mtime() +│ ├── read.py # render_read_rich(), render_read_plain() +│ ├── write.py # render_write_rich(), render_write_plain() +│ ├── edit.py # render_edit_rich(), render_edit_plain() +│ ├── apply_patch.py # _ParsedFilePatch, _extract_apply_patch_payload(), +│ │ # render_apply_patch_rich/plain() +│ ├── glob.py # render_glob_rich(), render_glob_plain() +│ ├── grep.py # render_grep_rich(), render_grep_plain() +│ ├── bash.py # render_bash_rich(), render_bash_plain() +│ ├── sandbox.py # _maybe_render_sandbox_bootstrap() + 12 _render_sandbox_*() +│ ├── bash_shim.py # _maybe_render_bash_shim(), _BashShim, +│ │ # parsers (cat/head/tail/rg/grep/ls/find/tree/rtk), +│ │ # normalizers, shim renderers +│ ├── todo.py # render_todowrite_rich/plain(), extract_todos() +│ ├── task_skill.py # render_task_rich/plain(), render_skill_rich/plain() +│ ├── permissions.py # render_permission_error_rich/plain() +│ └── events.py # render_step_start(), render_text(), render_reasoning(), +│ # render_tool_use(), render_step_finish(), +│ # render_error(), render_session_status(), +│ # render_subagent_status(), render_message_updated(), +│ # render_server_connected(), render_session_diff(), +│ # render_unknown(), render_event() dispatcher +│ +├── chat/ # NEW: Chat TUI package +│ ├── __init__.py +│ ├── app.py # _ChatApp, _QuitScreen, TextualConsoleProxy +│ └── harness.py # _run_chat_mode() +│ +├── events/ # REFACTORED: Add base class, reduce duplication +│ ├── __init__.py # EventLoop (extends BaseEventConsumer) +│ ├── base.py # NEW: BaseEventConsumer +│ │ # Shared: permission handling, session sync, +│ │ # session filtering, idle detection, dedup +│ ├── sse_client.py # unchanged +│ ├── state_tracker.py # unchanged +│ ├── chat_loop.py # ChatEventLoop (extends BaseEventConsumer) +│ └── emitters.py # unchanged +│ +├── opencode/ # unchanged +│ ├── __init__.py +│ └── serve.py +│ +├── findings/ # NEW: Consolidated finding management +│ ├── __init__.py +│ ├── frontmatter.py # Shared: load_frontmatter(), replace_scalar_value(), +│ │ # replace_nested_value(), find_finding(), slugify(), +│ │ # next_finding_id(), iter_finding_files() +│ ├── create.py # from create-finding.py +│ ├── move.py # from move-finding.py +│ ├── listing.py # from list-findings.py +│ ├── evidence.py # from create-evidence.py +│ ├── package.py # from package-finding.py +│ ├── render_report.py # from render-report.py +│ └── render_index.py # from render-index.py +│ +├── gate-check.py # unchanged +├── sandbox-bootstrap.py # unchanged +├── run-sweep.py # unchanged +├── codecome.py # unchanged +├── check-frontmatter.py # unchanged +├── list-risk-files.py # unchanged +├── script-to-asciinema.py # unchanged +├── mock-llm-server.py # unchanged +├── mock-llm-parity.py # unchanged +└── mock_llm_scripts/ # unchanged +``` + +### 4.1 New dependency graph + +``` +codecome/cli.py + ├── codecome/config.py (model, prompt, color) + ├── codecome/runner.py (orchestration) + │ ├── codecome/session.py + │ ├── codecome/graceful.py + │ ├── events/ (EventLoop) + │ └── rendering/ (render_event dispatcher) + ├── chat/harness.py (--chat mode) + │ └── chat/app.py + └── _colors.py + +rendering/registry.py + ├── rendering/read.py write.py edit.py ... bash.py sandbox.py bash_shim.py + ├── rendering/todo.py task_skill.py permissions.py events.py + └── rendering/utils.py cache.py + +events/base.py + └── events/sse_client.py + └── events/state_tracker.py + +findings/frontmatter.py + └── findings/create.py move.py listing.py evidence.py package.py +``` + +--- + +## 5. Phased Implementation Plan + +### Phase A — Extract Renderers (Lowest Risk) + +**Goal:** Move all rendering code out of `run-agent.py` into a new `tools/rendering/` package. + +**Why first:** Renderers are pure functions with clear inputs (`Console`, `dict`) and outputs (`bool`). They have no side effects except writing to console. They are the easiest to extract and test in isolation. + +**Steps:** + +1. **Create `tools/rendering/__init__.py`** — empty, acts as package marker. + +2. **Create `tools/rendering/utils.py`** — move these shared utilities: + - `_relativize_path()`, `_detect_lexer()`, `_count_lines_and_bytes()` + - `_compute_diff()`, `_truncate_diff()`, `_strip_line_numbers()` + - `_format_excerpt()`, `_is_likely_error()` + - `_strip_read_framing()`, `_classify_internal_read()`, `_current_mtime()` + - All parser regexes: `_READ_FILE_FRAMING_RE`, `_READ_DIR_FRAMING_RE`, `_READ_SUMMARY_RE`, `_LEXER_MAP`, etc. + - Add `ROOT` constant: `Path(__file__).resolve().parents[2]` + +3. **Create `tools/rendering/cache.py`** — move: + - `_SNAPSHOT_CACHE`, `_WRITE_CACHE_ENABLED`, `_SNAPSHOT_CACHE_CAP` + - `_cache_set()`, `_cache_get()`, `_cache_invalidate_stale()`, `_cache_reread()` + - Tunables that affect cache only: `CODECOME_WRITE_CACHE`, `CODECOME_WRITE_CACHE_CAP` + +4. **Extract renderers — one module at a time:** + - `tools/rendering/todo.py` — `render_todowrite_rich/plain`, `extract_todos`, `_todo_summary` + - `tools/rendering/permissions.py` — `render_permission_error_rich/plain` + - `tools/rendering/read.py` — `render_read_rich/plain` + helpers + - `tools/rendering/write.py` — `render_write_rich/plain` + helpers + - `tools/rendering/edit.py` — `render_edit_rich/plain` + helpers + - `tools/rendering/apply_patch.py` — `_ParsedFilePatch`, `_parse_apply_patch_envelope`, etc. + - `tools/rendering/glob.py` — `render_glob_rich/plain`, `_parse_glob_output` + - `tools/rendering/grep.py` — `render_grep_rich/plain`, `_grep_compile_pattern`, `_parse_grep_output` + - `tools/rendering/bash.py` — `render_bash_rich/plain` + - `tools/rendering/sandbox.py` — ALL sandbox-bootstrap code (~700 lines) + - `tools/rendering/bash_shim.py` — ALL bash-shim code (~500 lines), `_BashShim`, parsers + - `tools/rendering/task_skill.py` — `render_task_rich/plain`, `render_skill_rich/plain` + - `tools/rendering/events.py` — `render_step_start`, `render_text`, `render_reasoning`, `render_tool_use`, `render_step_finish`, `render_error`, `render_session_status`, `render_subagent_status`, `render_message_updated`, `render_server_connected`, `render_session_diff`, `render_unknown`, `render_event()` dispatcher, finish-reason constants, permission-error extractor + +5. **Create `tools/rendering/registry.py`** — move: + - `_dispatch_tool_renderer()` — becomes the tool dispatch registry + - All tunable env vars that affect rendering: `_READ_DISPLAY_LINES`, `_WRITE_CONTENT_LINES`, `_WRITE_DIFF_LIMIT`, `_EDIT_DIFF_LINES`, `_READ_HIGHLIGHT_LIMIT`, `_GLOB_MATCH_CAP`, `_APPLY_PATCH_DIFF_LINES`, `_APPLY_PATCH_MAX_FILES`, `_GREP_FILE_CAP`, `_GREP_LINE_CAP_PER_FILE`, `_GREP_TOTAL_LINE_CAP`, `_GREP_HIGHLIGHT`, `_REASONING_MAX_CHARS`, `_RENDER_REASONING`, `_DEBUG_UNKNOWN_EVENTS`, `_SANDBOX_RENDER`, `_SANDBOX_VALIDATE_STDERR_LINES`, `_SANDBOX_FILES_CAP`, `_BASH_SHIM_RENDER`, `_BASH_SHIM_LS_STRIP_LONG_FORMAT`, `_INTERNAL_READ_SUPPRESS`, `_SUBAGENT_HEARTBEAT_INTERVAL_S`, `_SUBAGENT_UPDATE_THROTTLE_S`, `_TASK_PROMPT_PREVIEW_LINES`, `_RENDER_SUBAGENT_UPDATES`, `_SUBAGENT_LAST_STATE` + +6. **Update imports in `run-agent.py`** — replace all moved function definitions with imports from `rendering.*`. + +7. **Verify:** Run `make tests` (no rendering changes expected, no breakage of event pipeline). + +**Estimated lines moved:** ~2,800 lines out of `run-agent.py`. + +**Rollback:** Revert `run-agent.py` and delete `tools/rendering/`. One `git checkout` per step. + +--- + +### Phase B — Extract Chat TUI (Low Risk) + +**Goal:** Move all chat-mode code out of `run-agent.py` into a new `tools/chat/` package. + +**Steps:** + +1. **Create `tools/chat/__init__.py`** + +2. **Create `tools/chat/app.py`** — move: + - `TextualConsoleProxy` class + - `_ChatApp` class (the Textual `App` subclass) + - `_QuitScreen` class (the `ModalScreen`) + - All `HAVE_RICH` guard at the module level, or keep inside the try/except ImportError block + - `ChatApp` and `QuitScreen` module-level aliases + +3. **Create `tools/chat/harness.py`** — move: + - `_run_chat_mode()` function + - Chat debug logging: `_CHAT_DEBUG_FP`, `_chat_debug()`, `_setup_chat_debug()`, `_close_chat_debug()` + +4. **Update `run-agent.py`:** + - Replace all moved code with `from chat.harness import _run_chat_mode` + - Replace all moved code with `from chat.app import ChatApp, QuitScreen` + - Keep `_setup_chat_debug()` and `_close_chat_debug()` calls in `main()` but import them from `chat.harness` + +5. **Verify:** Run `make chat` (if Textual is installed) to confirm TUI still works. + +**Estimated lines moved:** ~500 lines out of `run-agent.py`. + +--- + +### Phase C — Extract Shared Event Consumer Base (Medium Risk) + +**Goal:** Eliminate the ~250 lines of duplicated logic between `EventLoop` and `ChatEventLoop` by introducing a shared `BaseEventConsumer` class. + +**Steps:** + +1. **Create `tools/events/base.py`** with: + ```python + class BaseEventConsumer: + """Shared SSE consumption logic for EventLoop and ChatEventLoop.""" + + def __init__(self, base_url, session_id, console, *, + auth_token=None, workspace_dir=None): + self.base_url = base_url.rstrip("/") + self.session_id = session_id + self.console = console + self.auth_token = auth_token + self.workspace_dir = workspace_dir + self._tracker = StateTracker() + self._seen_message_ids: set[str] = set() + self._emitted_signatures: set[tuple[str, str]] = set() + + # --- Shared (currently duplicated in both classes) --- + + def _get_headers(self) -> dict[str, str]: ... + + @staticmethod + def _is_session_idle(event: dict[str, Any]) -> bool: ... + + def _belongs_to_session(self, event: dict[str, Any]) -> bool: ... + + def _handle_permission(self, event: dict[str, Any]) -> None: ... + + def _sync_session_messages(self) -> list[dict[str, Any]]: ... + + def _dedup_and_emit(self, render_fn, finalized_events) -> None: ... + ``` + +2. **Refactor `EventLoop` to extend `BaseEventConsumer`:** + - Remove duplicated methods + - Keep: `run()`, `stop()`, `trigger_recovery_sync()`, `_update_result()`, `_build_result()` + - Call `self._handle_permission(event)` instead of local implementation + - Call `self._sync_session_messages()` instead of local implementation + +3. **Refactor `ChatEventLoop` to extend `BaseEventConsumer`:** + - Remove duplicated methods + - Keep: `start_consumer()`, `send_prompt()`, `get_state()`, `stop()`, `_consumer_worker()` + - Replace `_emit_event()` call with `_dedup_and_emit()` + +4. **Update `events/__init__.py`** — export `BaseEventConsumer` optionally for tests. + +5. **Verify:** Run all phase targets (`make phase-1`, etc.) and `make chat` to confirm no regression. + +**Estimated lines added:** ~80 in new `base.py`; ~120 lines removed from `EventLoop` and `ChatEventLoop` (net reduction ~40 lines, but code quality improvement). + +--- + +### Phase D — Restructure Core Runner (Medium Risk) + +**Goal:** Split `run-agent.py` into the `codecome/` package. This is the phase where `run-agent.py` is finally deleted. + +**Steps:** + +1. **Create `tools/codecome/__init__.py`** — empty package marker. + +2. **Create `tools/codecome/version.py`** — move: + - `check_opencode_version()`, `MINIMUM_OPENCODE_VERSION`, `parse_ver()` (inline helper) + +3. **Create `tools/codecome/config.py`** — move: + - `resolve_model_and_variant()`, `_extract_flag_value()`, `_read_codecome_yml_agent()` + - `_discover_opencode_default_model()`, `_probe_effective_model()`, `_scan_event_for_model()` + - `_extract_model_from_export()`, `_strip_probe_unsafe_flags()` + - `load_prompt()`, `_PHASE_NAMES`, `resolve_color_mode()`, `build_console()`, `truthy_env()` + - `_resolve_thinking_decision()`, `_thinking_default_for_provider()` + - `resolve_runtime_model_for_banner()` + - `_MODEL_FLAG_NAMES`, `_VARIANT_FLAG_NAMES` + - All model-related constants: `_DISCOVERY_TIMEOUT_S`, `_MODEL_PROBE_TIMEOUT_S` + +4. **Create `tools/codecome/session.py`** — move: + - `_create_session()`, `_create_chat_session()`, `_send_prompt_to_session()` + - `_get_headers()` (or import from `events.base` if extracted there) + +5. **Create `tools/codecome/runner.py`** — move: + - `_run_single_attempt()`, `_consume_events()` + - `_build_phase_resume_prompt()`, `_build_frontmatter_resume_prompt()` + - `_build_resume_command()`, `_emit_fatal_error()` + - `show_model_table()` + +6. **Create `tools/codecome/graceful.py`** — move: + - `check_phase_graceful_completion()` + - `_phase_checklist_lines()`, `_phase1_required_artifacts()` + - `_exploitation_status_looks_real()`, `_phase1_required_artifacts()`, `_path_is_fresh()`, `_iter_files()` + - `_PHASE1_REQUIRED_ARTIFACT_NAMES` + +7. **Create `tools/codecome/cli.py`** — move: + - `build_parser()`, `main()`, the `if __name__ == "__main__"` block + - `RUN_START_TIME`, `iteration_retry_count`, `frontmatter_retry_count` + - Signal handling code (`_forward_signal`, signal setup/teardown) + - Banner display code + - Exit status display code + +8. **Delete `tools/run-agent.py`** — all code has been moved. + +9. **Create `tools/run-agent.py` as a thin wrapper** for backward compatibility during the transition: + ```python + #!/usr/bin/env python3 + """Thin wrapper — delegates to codecome.cli.""" + import sys + from pathlib import Path + sys.path.insert(0, str(Path(__file__).resolve().parent)) + from codecome.cli import main + if __name__ == "__main__": + raise SystemExit(main()) + ``` + +10. **Update all references:** + - `Makefile`: Change `python tools/run-agent.py` → `python tools/run-agent.py` (still works via wrapper), OR change to `python -m codecome.cli`. Recommended: keep the wrapper for backward compatibility, add a `make` note about the new canonical path. + - `tools/run-sweep.py`: Already references `tools/run-agent.py` by path, no change needed. + - Tests: Update any test that imports from `run-agent` directly (check `tests/`). + +11. **Verify:** Run `make tests`, then `make phase-1` through `make phase-6` to confirm all phase targets work. + +**Estimated lines moved:** ~1,300 lines into 6 new modules. `run-agent.py` becomes a 15-line wrapper (or deleted entirely once all callers are updated). + +--- + +### Phase E — Consolidate Finding Tools (Low Risk) + +**Goal:** Merge 6 standalone scripts into a `findings/` package with shared frontmatter utilities, eliminating duplicated parsing code. + +**Steps:** + +1. **Create `tools/findings/__init__.py`** — empty package marker. + +2. **Create `tools/findings/frontmatter.py`** — shared utilities extracted from the 6 scripts: + ```python + ROOT = Path(__file__).resolve().parents[2] + FINDINGS_ROOT = ROOT / "itemdb" / "findings" + FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) + FINDING_ID_RE = re.compile(r"\bCC-(\d{4,})\b") + STATUSES = ["PENDING", "CONFIRMED", "EXPLOITED", "REJECTED", "DUPLICATE"] + + def load_frontmatter(path: Path) -> dict: ... + def find_finding(identifier: str) -> Path: ... + def iter_finding_files(status: Optional[str] = None) -> Iterable[Path]: ... + def collect_finding_ids(paths: Iterable[Path]) -> list[int]: ... + def next_finding_id() -> str: ... + def slugify(value: str) -> str: ... + def replace_scalar_frontmatter(content: str, key: str, value: str) -> str: ... + def replace_nested_value(content: str, key: str, value: str) -> str: ... + ``` + +3. **Refactor each script to use `findings/frontmatter.py`:** + - `tools/create-finding.py` → `tools/findings/create.py` + - `tools/move-finding.py` → `tools/findings/move.py` + - `tools/list-findings.py` → `tools/findings/listing.py` + - `tools/create-evidence.py` → `tools/findings/evidence.py` + - `tools/package-finding.py` → `tools/findings/package.py` + - `tools/render-report.py` → `tools/findings/render_report.py` + - `tools/render-index.py` → `tools/findings/render_index.py` + +4. **Keep thin wrappers** at the original paths for backward compatibility: + ```python + # tools/create-finding.py (thin wrapper) + import sys + from pathlib import Path + sys.path.insert(0, str(Path(__file__).resolve().parent)) + from findings.create import main + if __name__ == "__main__": + raise SystemExit(main()) + ``` + +5. **Update `Makefile` and `AGENTS.md`** references if they use absolute paths. + +6. **Verify:** Run all finding-management Makefile targets (if any) and manual invocations. + +--- + +## 6. Dependency Resolution During Migration + +### Phase A → Phase D ordering rationale + +``` +Phase A (renderers) — No imports from codecome/ or chat/. Pure extraction. + ↓ +Phase B (chat TUI) — Imports from rendering/ (set up in Phase A) + events/. + ↓ +Phase C (events base) — Independent of codecome/chat/. Can run any time after A. + ↓ +Phase D (core runner) — Imports from rendering/, chat/, events/, codecome/config, + codecome/session, codecome/graceful. Everything else + must be in place first. + ↓ +Phase E (findings) — Completely independent. Can run any time. +``` + +Phases A and B could be parallelized (different files, no conflicts). Phases C and E are independent and could run in parallel with A/B. Only Phase D has a hard dependency on A+B+C being complete. + +### Import verification after each phase + +After each phase, run: +```bash +python -c "from tools.rendering import registry" # Phase A +python -c "from tools.chat import harness" # Phase B +python -c "from tools.events.base import BaseEventConsumer" # Phase C +python -c "from tools.codecome.cli import main" # Phase D +python -c "from tools.findings.frontmatter import load_frontmatter" # Phase E +``` + +--- + +## 7. File Change Summary + +| Phase | Files created | Files modified | Files deleted | +|---|---|---|---| +| A | 15 (`rendering/` modules) | 1 (`run-agent.py` — imports only) | 0 | +| B | 3 (`chat/` modules) | 1 (`run-agent.py` — imports only) | 0 | +| C | 1 (`events/base.py`) | 3 (`events/__init__.py`, `chat_loop.py`, `emitters.py`) | 0 | +| D | 6 (`codecome/` modules) | 2 (Makefile, `run-sweep.py`) | 1 (`run-agent.py` → thin wrapper) | +| E | 8 (`findings/` modules) | 7 (old scripts → thin wrappers) | 0 (wrappers preserved) | + +**Total new files:** 33 +**Total modified files:** 14 +**Total deleted:** 1 (original `run-agent.py` body, wrapper remains) + +--- + +## 8. Risk Assessment + +| Risk | Probability | Impact | Mitigation | +|---|---|---|---| +| Import cycle during extraction | Medium | Compile-time error | Each extraction step verified with `python -c "import ..."` before proceeding | +| Event pipeline regression | Low | Phases produce wrong output | Full `make tests` after each phase; spot-check `make phase-1` | +| Chat TUI breakage | Low | Chat mode unusable | `make chat` smoke test after Phase B and D | +| Makefile target path change | Low | CI/CD breakage | Keep thin wrappers; update Makefile only in Phase D with backward compat | +| Global mutable state (caches, tunables) misrouted | Medium | Subtle rendering bugs | Renderer tunables moved as module-level constants; cache centralized in `rendering/cache.py` | +| Tests breaking on import path change | Medium | Test suite failures | Check `tests/` for direct imports from `run-agent`; update in Phase D | + +--- + +## 9. Testing Strategy + +### Pre-migration baseline +```bash +make tests # Record baseline: all tests should pass +make phase-1 # Smoke test: should produce notes +make status # List findings +``` + +### Per-phase verification +```bash +# After each phase: +make tests # Full test suite +python tools/run-agent.py --show-model --agent recon # Smoke test + +# After Phase D: +make phase-1 PHASE1_OPTS="--dry-run" # If dry-run support exists +make chat --dry-run # If possible +``` + +### Post-migration full verification (Phase D) +```bash +make tests # Must pass +make phase-1 # Full recon run +make phase-2 # Hypothesis generation +tools/gate-check.py 1 # Gate checks +tools/gate-check.py 2 +tools/create-finding.py "Test finding" +tools/list-findings.py +tools/move-finding.py CC-XXXX REJECTED +tools/render-report.py +tools/render-index.py +``` + +### Specific test areas to add +- **`rendering/cache.py`** — standalone unit tests for cache set/get/invalidate +- **`rendering/registry.py`** — test that all tool names dispatch correctly +- **`events/base.py`** — test shared permission handling and session filtering +- **`findings/frontmatter.py`** — test load/save/replace operations + +--- + +## 10. Open Questions + +1. **Should renderers use a common base class?** The `_rich`/`_plain` duplication (~1,300 lines) could be unified with a `ToolRenderer` protocol/ABC and a `RenderMode` enum. This is a nice-to-have but adds complexity; defer to a follow-up plan. + +2. **Should the bash sub-renderer chain become a registered plugin system?** Currently hardcoded as `sandbox → bash_shim → generic`. A `ChainOfResponsibility` with a list of `BashInterceptor` callables would make adding new interceptors trivial. Defer. + +3. **Should `codecome.yml` parsing get its own module?** Currently YAML parsing is scattered: `load_prompt()` parses `audit.extra_prompts`, `resolve_model_and_variant()` parses `agents.`, `codecome.py` parses `project.name`. A `config.py` module that caches the parsed config would eliminate repeated file reads. Defer to a follow-up. + +4. **Should the thin wrappers be permanent?** Keeping `tools/run-agent.py`, `tools/create-finding.py`, etc. as thin wrappers preserves backward compatibility for any external scripts or muscle memory. They add negligible maintenance cost. Recommendation: keep them permanently. + +5. **Should `tools/` be renamed or restructured further?** The `tools/` directory mixes library code (packages) with standalone scripts. A Python-idiomatic structure would be a `src/` layout with a `pyproject.toml`, but that's a much larger change. Defer. + +--- + +## 11. References + +- [tool-renderers-plan.md](tool-renderers-plan.md) — original renderer design +- [chat-mode-plan.md](chat-mode-plan.md) — chat TUI architecture +- [migrate-to-opencode-serve.md](migrate-to-opencode-serve.md) — server migration (prior major refactor) +- [sync-recovery-plan.md](sync-recovery-plan.md) — session sync after SSE reconnect +- [todowrite-renderer-plan.md](todowrite-renderer-plan.md) — first per-tool renderer +- [apply-patch-renderer-plan.md](apply-patch-renderer-plan.md) — most complex renderer +- [internal-read-suppression-plan.md](internal-read-suppression-plan.md) — read display suppression +- [reasoning-and-error-renderers-plan.md](reasoning-and-error-renderers-plan.md) — reasoning panel design +- [discover-opencode-default-model-plan.md](discover-opencode-default-model-plan.md) — model resolution from DB +- [restore-model-banner-plan.md](restore-model-banner-plan.md) — model banner display From c98dcf9c54dc1371ab8e3890103cf1d2b66a5459 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 23 May 2026 20:22:58 +0200 Subject: [PATCH 02/65] Revise tools refactor plan after review --- .project/tools-refactor-plan.md | 1218 ++++++++++++++++++------------- 1 file changed, 724 insertions(+), 494 deletions(-) diff --git a/.project/tools-refactor-plan.md b/.project/tools-refactor-plan.md index 3001d75..3e7ccf6 100644 --- a/.project/tools-refactor-plan.md +++ b/.project/tools-refactor-plan.md @@ -1,19 +1,34 @@ # Plan: Refactor `tools/` Directory Structure -**Status:** Draft +**Status:** Draft, revised after architecture review **Date:** 2026-05-23 -**Target:** `tools/run-agent.py`, `tools/events/`, all `tools/*.py` standalone scripts +**Target:** `tools/run-agent.py`, `tools/events/`, rendering/chat support, and later finding/itemdb tooling **Risk Level:** Medium (large structural refactor, all phase targets affected) --- ## 1. Executive Summary -`tools/run-agent.py` has grown to **5,876 lines** with 10+ distinct concerns in a single file. The `events/` sub-package is cleanly separated but tightly coupled back to `run-agent.py` via a runtime callable-injection pattern. Six standalone finding-management scripts each duplicate frontmatter parsing, path resolution, and finding-id regex. +`tools/run-agent.py` has grown to **5,876 lines** with many distinct concerns in a single file: CLI parsing, model/prompt resolution, OpenCode session lifecycle, event consumption, terminal rendering, Textual chat, retry/resume policy, frontmatter repair, and phase completion heuristics. -This plan proposes a five-phase refactor that splits the monolith into focused packages (`codecome/`, `rendering/`, `chat/`, `findings/`), extracts shared infrastructure (`events/base.py`), and consolidates the standalone scripts into a `findings/` package — all while keeping the entry-point behavior and Makefile targets unchanged until the final phase. +This plan keeps the useful inventory from the original draft but changes the execution strategy: -**Lines of code affected:** ~8,600 (all of `run-agent.py` + the events package + 6 finding scripts). No changes to `opencode/serve.py`, `sandbox-bootstrap.py`, `gate-check.py`, `run-sweep.py`, `check-frontmatter.py`, `_colors.py`, mock tools, or recording tools. +1. Split the work into two independent epics: + - **Epic A:** runner / rendering / events / chat. + - **Epic B:** findings / itemdb tooling. +2. Extract stable core helpers before doing the larger renderer refactor. +3. Refactor rendering around **specific renderer classes** for each event/tool family. +4. Keep renderers close to the existing normalized event dictionaries; do not introduce a custom event model unless a later need appears. +5. Support three rendering destinations explicitly: + - plain terminal output, + - Rich terminal output, + - Textual chat output through a RichLog-compatible sink. +6. Move file snapshot/diff state into an explicit `SnapshotCache`. +7. Introduce `PhaseEventLoop` and `ChatEventLoop`, sharing common SSE/session/dedup/permission logic through a base event loop. +8. Preserve compatibility through thin wrappers at historical script paths. +9. Add `tools/AGENTS.md` to document the architecture rules for future changes. + +The goal is not only to move code out of `run-agent.py`, but to make future changes safer and easier to review. --- @@ -21,673 +36,888 @@ This plan proposes a five-phase refactor that splits the monolith into focused p ### 2.1 File size breakdown -``` +```text tools/ -├── run-agent.py 5,876 ← MONOLITH (58% of total) +├── run-agent.py 5,876 ← MONOLITH ├── events/ │ ├── __init__.py 393 ← EventLoop orchestrator │ ├── chat_loop.py 392 ← ChatEventLoop (multi-turn) │ ├── state_tracker.py 203 ← Delta → finalized parts │ ├── sse_client.py 200 ← SSE stream + reconnect -│ └── emitters.py 32 ← Callable bridge (2-line function) +│ └── emitters.py 32 ← Callable bridge ├── opencode/ │ ├── serve.py 333 ← ServerRunner lifecycle │ └── __init__.py 23 -├── _colors.py 163 ← ANSI codes (shared) +├── _colors.py 163 ← ANSI codes ├── codecome.py 469 ← Workspace validation CLI ├── gate-check.py 339 ← Phase readiness gates ├── run-sweep.py 214 ← Batch file sweeps ├── sandbox-bootstrap.py 389 ← Sandbox setup/validation -├── create-finding.py 201 ← Finding from template -├── move-finding.py 186 ← Status directory mover -├── create-evidence.py 99 ← Evidence README bootstrap -├── package-finding.py 153 ← Zip bundle -├── list-findings.py 198 ← Listing with filters -├── render-report.py 494 ← Markdown report -├── render-index.py 157 ← itemdb/index.md -├── check-frontmatter.py 138 ← Frontmatter validation -├── list-risk-files.py 75 ← Risk file listing -├── script-to-asciinema.py 76 ← Cast → GIF -├── mock-llm-server.py 180 ← Mock LLM for tests -├── mock-llm-parity.py 162 ← Mock parity checker +├── create-finding.py 201 +├── move-finding.py 186 +├── create-evidence.py 99 +├── package-finding.py 153 +├── list-findings.py 198 +├── render-report.py 494 +├── render-index.py 157 +├── check-frontmatter.py 138 +├── list-risk-files.py 75 +├── script-to-asciinema.py 76 +├── mock-llm-server.py 180 +├── mock-llm-parity.py 162 └── mock_llm_scripts/ 6 JSON files ``` -### 2.2 `run-agent.py` internal structure - -The 5,876-line file contains these concerns, in file order: - -| Lines | Concern | Functions/Classes | -|---|---|---| -| 1–103 | Imports, debug logging, version check | `check_opencode_version`, `_chat_debug`, `_setup_chat_debug` | -| 104–438 | Model resolution | `_scan_event_for_model`, `_discover_opencode_default_model`, `_probe_effective_model`, `_read_codecome_yml_agent`, `resolve_model_and_variant` | -| 440–520 | Prompt loading | `resolve_color_mode`, `build_console`, `load_prompt` | -| 523–675 | Todo rendering | `extract_todos`, `_todo_summary`, `render_todowrite_rich/plain` | -| 678–732 | Permission errors + tunables | `render_permission_error_rich/plain`, ~30 env var config knobs | -| 733–1031 | File cache + utilities | `_SNAPSHOT_CACHE`, `_relativize_path`, `_detect_lexer`, `_compute_diff`, `_cache_set/get/reread`, `_strip_read_framing`, `_classify_internal_read` | -| 1035–1156 | Read tool renderer | `render_read_rich`, `render_read_plain` | -| 1161–1270 | Write tool renderer | `render_write_rich`, `render_write_plain` | -| 1288–1381 | Edit tool renderer | `render_edit_rich`, `render_edit_plain` | -| 1386–1650 | Apply-patch renderer | `_ParsedFilePatch`, `_parse_apply_patch_envelope`, `_extract_apply_patch_payload`, `render_apply_patch_rich/plain` | -| 1655–1750 | Glob renderer | `_parse_glob_output`, `render_glob_rich/plain` | -| 1755–2059 | Grep renderer | `_grep_compile_pattern`, `_grep_format_line_rich/plain`, `_parse_grep_output`, `render_grep_rich/plain` | -| 2064–2120 | Bash renderer | `render_bash_rich/plain` | -| 2123–2923 | Sandbox-bootstrap sub-renderer | `_is_sandbox_bootstrap_json_call`, `_sandbox_payload_matches`, `_maybe_render_sandbox_bootstrap`, 12 `_render_sandbox_*` functions | -| 2925–3518 | Bash-shim sub-renderer | `_BashShim`, `_is_bash_shim_call`, parsers for `cat`/`head`/`tail`/`rg`/`grep`/`ls`/`find`/`tree`/`rtk`, normalizers, `_maybe_render_bash_shim` | -| 3521–3610 | Task + Skill renderers | `render_task_rich/plain`, `render_skill_rich/plain` | -| 3612–3720 | Tool dispatch | `_dispatch_tool_renderer` (10-tool if/elif chain) | -| 3723–4105 | Event renderers | `render_step_start`, `render_text`, `render_reasoning`, `render_tool_use`, `render_step_finish`, `render_error`, `render_session_status`, `render_subagent_status`, `render_message_updated`, `render_event` dispatcher | -| 4107–4213 | CLI parser | `build_parser` | -| 4234–4450 | Thinking + resume logic | `_resolve_thinking_decision`, `_build_phase_resume_prompt`, `_build_frontmatter_resume_prompt`, `_build_resume_command` | -| 4453–4556 | Graceful completion | `check_phase_graceful_completion`, `_exploitation_status_looks_real` | -| 4557–4783 | Session lifecycle + run | `_create_session`, `_create_chat_session`, `_send_prompt_to_session`, `_consume_events`, `_run_single_attempt` | -| 4786–4822 | Model table display | `show_model_table` | -| 4833–5511 | Chat TUI | `TextualConsoleProxy`, `_ChatApp`, `_QuitScreen`, `_run_chat_mode` | -| 5514–5876 | `main()` entry point | Orchestration: server start, attempt loop, retry/resume logic, frontmatter repair, exit handling | +### 2.2 `run-agent.py` responsibilities + +`run-agent.py` currently contains these concerns: + +| Concern | Examples | +|---|---| +| CLI and startup | `build_parser()`, `main()`, signal forwarding, version checks | +| Model/config resolution | `resolve_model_and_variant()`, OpenCode DB discovery, runtime probe, thinking decision | +| Prompt loading | prompt file loading, `codecome.yml` extra prompts, env-provided prompt extras | +| OpenCode session HTTP | create session, create chat session, send prompt, auth headers | +| Event consumption glue | `_consume_events()`, `_run_single_attempt()` | +| Rendering | all generic event renderers and all tool renderers | +| Rendering state | global tunables, snapshot cache, path helpers, diff helpers | +| Command-specific rendering | sandbox-bootstrap JSON rendering, `rtk`/`rg`/`ls`/`find`/`tree` shims | +| Phase policy | finish reason classification, auto-resume prompts, graceful completion checks | +| Frontmatter repair | local validation and minimal auto-repair retry loop | +| Chat TUI | Textual app, RichLog proxy, chat debug logging, modeline | ### 2.3 `events/` package structure -``` +```text events/ -├── __init__.py EventLoop — Phase runner orchestrator -│ Uses: SseClient, StateTracker, emit_event -│ Called from: run-agent.py._consume_events() -│ Callback to: run-agent.py.render_event() via render_fn parameter -│ -├── chat_loop.py ChatEventLoop — Multi-turn chat consumer -│ Uses: SseClient, StateTracker, emit_event -│ Called from: run-agent.py._ChatApp (Textual TUI) -│ Duplicates: permission handling, session sync, idle detection, dedup -│ -├── sse_client.py SseClient — Raw SSE stream with reconnection -│ Dependency-free (only stdlib) -│ -├── state_tracker.py StateTracker — Delta → finalized part translation -│ Dependency-free (only stdlib) -│ -└── emitters.py emit_event() — 2-line callable bridge - Purpose: avoid circular import (events/ → run-agent.py) +├── __init__.py EventLoop — phase runner orchestrator +├── chat_loop.py ChatEventLoop — multi-turn chat consumer +├── sse_client.py SseClient — raw SSE stream with reconnection +├── state_tracker.py StateTracker — delta → finalized part translation +└── emitters.py emit_event() — small callable bridge ``` -**Key coupling:** `EventLoop.run(render_fn)` and `ChatEventLoop.start_consumer(render_fn)` both accept `run-agent.py.render_event` as a parameter. The `emitters.py` module simply calls `render_fn(console, phase, label, event)`. This is a runtime dependency inversion to break the compile-time cycle. +`EventLoop` and `ChatEventLoop` already separate phase and chat lifecycle, but they duplicate important shared logic: + +- permission auto-reject, +- session message sync, +- session filtering, +- idle detection, +- deduplication, +- finalized event emission. + +The final design should keep the phase/chat split while moving the common parts into a base event loop. ### 2.4 Finding management scripts — duplication catalog -All six scripts duplicate these patterns: +Several scripts duplicate these patterns: ```python -# Duplicated in 6 files: sys.path.insert(0, str(Path(__file__).resolve().parent)) import _colors as C ROOT = Path(__file__).resolve().parents[1] FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) FINDING_ID_RE = re.compile(r"\bCC-(\d{4,})\b") FINDINGS_ROOT = ROOT / "itemdb" / "findings" - -# Duplicated in 4 files: -def load_frontmatter(path: Path) -> dict: - # 15 lines of identical YAML frontmatter extraction ``` +This is real duplication, but it should be addressed as a separate findings/itemdb epic rather than mixed into the runner/rendering refactor. + --- ## 3. Problems Catalog -### P1: Monolith — 10+ concerns in one file -`run-agent.py` is 5,876 lines. Rendering, orchestration, model resolution, chat TUI, and CLI parsing have no module boundaries. Any change to a renderer risks merge conflicts with TUI changes. +### P1: `run-agent.py` is a monolith -### P2: Circular dependency via callable injection -`run-agent.py` imports `events/EventLoop`, which calls back into `run-agent.py.render_event()` via a `render_fn: Callable` parameter. This means neither module can be understood in isolation, and both must be loaded for any test or analysis. +Many unrelated concerns share one file. Renderer work, chat changes, model-resolution changes, and phase-runner changes all collide in the same module. -### P3: `_rich` / `_plain` boilerplate -Every one of the 10 tool renderers has two near-identical functions (~1,300 lines total). The dispatch site repeats `if HAVE_RICH: render_X_rich(...) else: render_X_plain(...)` 10 times in `_dispatch_tool_renderer`. +### P2: Rendering lacks module boundaries -### P4: EventLoop / ChatEventLoop duplication -Both classes independently implement: -- Permission auto-reject via `POST /permission/{id}/reply` -- Session message sync via `GET /session/{id}/message` -- `_belongs_to_session(event)` filtering -- `_is_session_idle(event)` detection -- Message deduplication via `_seen_message_ids` + `_emitted_signatures` +Rendering is implemented as a large collection of functions plus global settings. Tool renderers and generic event renderers live together in `run-agent.py`, and the main dispatch function is a long hardcoded chain. -This is ~250 lines of duplicated logic. +### P3: Rich/plain rendering is duplicated -### P5: Implicit bash-renderer dispatch chain -The `bash` tool case in `_dispatch_tool_renderer` has a hardcoded cascade: +Most tool renderers have separate `_rich` and `_plain` functions. Some duplication is unavoidable, but the current structure repeats dispatch and setup logic too much. -```python -elif tool_lower == "bash": - _cache_invalidate_stale() - if _maybe_render_sandbox_bootstrap(console, state): return True - if _maybe_render_bash_shim(console, state): return True - if HAVE_RICH: return render_bash_rich(console, state) - else: return render_bash_plain(state) -``` +### P4: Rendering state is implicit + +Path roots, Rich availability, display tunables, and the write/diff snapshot cache are module-level globals. Some are stable for the duration of one wrapper run, but they should still be explicit runtime context. + +### P5: Snapshot cache side effects are hidden + +Read/write/edit/apply_patch renderers use a cache-like mechanism to show useful diffs. That behavior is useful, but the state should be explicit and isolated as `SnapshotCache`. + +### P6: EventLoop / ChatEventLoop duplication + +Phase and chat consumption share a lot of SSE/session mechanics but currently implement them separately. -Adding a new interceptor requires editing `_dispatch_tool_renderer` — the chain is implicit. +### P7: Command execution rendering is implicit -### P6: Isolated finding scripts with duplicated infrastructure -Six separate `tools/*.py` files each re-implement `load_frontmatter`, path normalization, `FINDING_ID_RE`, and `sys.path` manipulation. They share no module. +The current bash rendering path has special cases for CodeCome commands and shell helper patterns. This coupling is intentional and useful, but it should be represented as an explicit interceptor chain rather than buried in the main tool dispatcher. -### P7: Model resolution is a cross-cutting tangle -`resolve_model_and_variant()` touches CLI args (`_extract_flag_value`), env vars (`CODECOME_MODEL`), YAML config (`codecome.yml`), the opencode SQLite DB, and runtime probe sessions. It has 4 different source-of-truth formats and lives in the same file as the Textual TUI. +### P8: Finding scripts duplicate frontmatter/path helpers + +Finding/itemdb CLI scripts should keep their stable entrypoints, but reusable logic belongs in a shared package. --- ## 4. Target Architecture -``` +```text tools/ ├── _colors.py # unchanged │ -├── codecome/ # NEW: Core runner package +├── codecome/ # Core runner/config package │ ├── __init__.py -│ ├── cli.py # main(), build_parser(), show_model_table() -│ ├── config.py # resolve_model_and_variant(), load_prompt(), -│ │ # resolve_color_mode(), build_console(), -│ │ # _resolve_thinking_decision(), truthy_env() -│ ├── runner.py # _run_single_attempt(), _consume_events(), -│ │ # retry loop, resume prompt builders -│ ├── session.py # _create_session(), _create_chat_session(), -│ │ # _send_prompt_to_session(), _get_headers() -│ ├── graceful.py # check_phase_graceful_completion(), -│ │ # _phase_checklist_lines(), _phase1_required_artifacts() -│ └── version.py # check_opencode_version() +│ ├── cli.py # main(), build_parser(), top-level banners +│ ├── config.py # env, codecome.yml, prompt, model, color/render settings +│ ├── session.py # OpenCode HTTP session/prompt helpers +│ ├── runner.py # phase attempt loop and high-level orchestration +│ ├── graceful.py # phase completion checks and resume prompt builders +│ ├── transcript.py # transcript path/open/write helpers +│ └── version.py # OpenCode version checks │ -├── rendering/ # NEW: Tool rendering package -│ ├── __init__.py # Public API surface -│ ├── registry.py # _dispatch_tool_renderer() → chain-of-responsibility -│ ├── cache.py # _SNAPSHOT_CACHE, _cache_set/get/reread/invalidate_stale -│ ├── utils.py # _relativize_path(), _detect_lexer(), -│ │ # _count_lines_and_bytes(), _compute_diff(), -│ │ # _truncate_diff(), _strip_line_numbers(), -│ │ # _format_excerpt(), _is_likely_error(), -│ │ # _strip_read_framing(), _classify_internal_read(), -│ │ # _current_mtime() -│ ├── read.py # render_read_rich(), render_read_plain() -│ ├── write.py # render_write_rich(), render_write_plain() -│ ├── edit.py # render_edit_rich(), render_edit_plain() -│ ├── apply_patch.py # _ParsedFilePatch, _extract_apply_patch_payload(), -│ │ # render_apply_patch_rich/plain() -│ ├── glob.py # render_glob_rich(), render_glob_plain() -│ ├── grep.py # render_grep_rich(), render_grep_plain() -│ ├── bash.py # render_bash_rich(), render_bash_plain() -│ ├── sandbox.py # _maybe_render_sandbox_bootstrap() + 12 _render_sandbox_*() -│ ├── bash_shim.py # _maybe_render_bash_shim(), _BashShim, -│ │ # parsers (cat/head/tail/rg/grep/ls/find/tree/rtk), -│ │ # normalizers, shim renderers -│ ├── todo.py # render_todowrite_rich/plain(), extract_todos() -│ ├── task_skill.py # render_task_rich/plain(), render_skill_rich/plain() -│ ├── permissions.py # render_permission_error_rich/plain() -│ └── events.py # render_step_start(), render_text(), render_reasoning(), -│ # render_tool_use(), render_step_finish(), -│ # render_error(), render_session_status(), -│ # render_subagent_status(), render_message_updated(), -│ # render_server_connected(), render_session_diff(), -│ # render_unknown(), render_event() dispatcher +├── rendering/ # Rendering package +│ ├── __init__.py +│ ├── context.py # RenderContext +│ ├── settings.py # RenderSettings +│ ├── cache.py # SnapshotCache +│ ├── sink.py # PlainSink, RichConsoleSink, TextualRichLogSink +│ ├── registry.py # RendererRegistry +│ ├── events.py # generic event renderer classes +│ ├── tools/ +│ │ ├── __init__.py +│ │ ├── base.py # ToolRenderer base class +│ │ ├── todo.py # TodoRenderer +│ │ ├── read.py # ReadRenderer +│ │ ├── write.py # WriteRenderer +│ │ ├── edit.py # EditRenderer +│ │ ├── apply_patch.py # ApplyPatchRenderer +│ │ ├── glob.py # GlobRenderer +│ │ ├── grep.py # GrepRenderer +│ │ ├── command.py # CommandRenderer for bash/tool command execution +│ │ ├── sandbox.py # sandbox rendering helpers/interceptor support +│ │ ├── task.py # TaskRenderer +│ │ └── skill.py # SkillRenderer +│ └── command_interceptors/ +│ ├── __init__.py +│ ├── base.py # CommandExecutionInterceptor protocol/base +│ ├── sandbox_bootstrap.py # sandbox-bootstrap / make sandbox-* renderer +│ ├── rtk_read.py # rtk read / cat/head/tail equivalent rendering +│ ├── rtk_grep.py # rtk grep / rg equivalent rendering +│ └── shell_listing.py # ls/find/tree listing rendering │ -├── chat/ # NEW: Chat TUI package +├── chat/ # Chat TUI package │ ├── __init__.py -│ ├── app.py # _ChatApp, _QuitScreen, TextualConsoleProxy -│ └── harness.py # _run_chat_mode() +│ ├── app.py # Textual App and QuitScreen +│ ├── console_proxy.py # Textual-safe RichLog proxy/sink support +│ ├── debug.py # chat debug log helpers +│ └── harness.py # run_chat_mode() │ -├── events/ # REFACTORED: Add base class, reduce duplication -│ ├── __init__.py # EventLoop (extends BaseEventConsumer) -│ ├── base.py # NEW: BaseEventConsumer -│ │ # Shared: permission handling, session sync, -│ │ # session filtering, idle detection, dedup -│ ├── sse_client.py # unchanged -│ ├── state_tracker.py # unchanged -│ ├── chat_loop.py # ChatEventLoop (extends BaseEventConsumer) -│ └── emitters.py # unchanged +├── events/ # Event consumption package +│ ├── __init__.py # compatibility exports +│ ├── base.py # BaseEventLoop shared logic +│ ├── phase_loop.py # PhaseEventLoop +│ ├── chat_loop.py # ChatEventLoop +│ ├── sse_client.py # unchanged or minimally changed +│ ├── state_tracker.py # unchanged or minimally changed +│ └── emitters.py # unchanged or removed if no longer needed │ ├── opencode/ # unchanged │ ├── __init__.py │ └── serve.py │ -├── findings/ # NEW: Consolidated finding management +├── findings/ # Later epic: consolidated finding management │ ├── __init__.py -│ ├── frontmatter.py # Shared: load_frontmatter(), replace_scalar_value(), -│ │ # replace_nested_value(), find_finding(), slugify(), -│ │ # next_finding_id(), iter_finding_files() -│ ├── create.py # from create-finding.py -│ ├── move.py # from move-finding.py -│ ├── listing.py # from list-findings.py -│ ├── evidence.py # from create-evidence.py -│ ├── package.py # from package-finding.py -│ ├── render_report.py # from render-report.py -│ └── render_index.py # from render-index.py +│ ├── frontmatter.py +│ ├── create.py +│ ├── move.py +│ ├── listing.py +│ ├── evidence.py +│ ├── package.py +│ ├── render_report.py +│ └── render_index.py +│ +├── AGENTS.md # architecture guidelines for tools/ +│ +├── run-agent.py # thin wrapper, compatibility entrypoint +├── create-finding.py # thin wrapper after findings epic +├── move-finding.py # thin wrapper after findings epic +├── list-findings.py # thin wrapper after findings epic +├── create-evidence.py # thin wrapper after findings epic +├── package-finding.py # thin wrapper after findings epic +├── render-report.py # thin wrapper after findings epic +├── render-index.py # thin wrapper after findings epic │ -├── gate-check.py # unchanged -├── sandbox-bootstrap.py # unchanged -├── run-sweep.py # unchanged -├── codecome.py # unchanged -├── check-frontmatter.py # unchanged -├── list-risk-files.py # unchanged +├── gate-check.py # unchanged initially +├── sandbox-bootstrap.py # unchanged initially +├── run-sweep.py # unchanged initially, may keep calling wrapper +├── codecome.py # unchanged initially +├── check-frontmatter.py # unchanged initially +├── list-risk-files.py # unchanged initially ├── script-to-asciinema.py # unchanged ├── mock-llm-server.py # unchanged ├── mock-llm-parity.py # unchanged └── mock_llm_scripts/ # unchanged ``` -### 4.1 New dependency graph +--- + +## 5. Rendering Design + +### 5.1 Renderer inputs +Renderers should receive the normalized event dictionaries that the event layer already produces. Do not introduce a custom event object model in this refactor. + +```python +Event = dict[str, Any] +ToolState = dict[str, Any] ``` -codecome/cli.py - ├── codecome/config.py (model, prompt, color) - ├── codecome/runner.py (orchestration) - │ ├── codecome/session.py - │ ├── codecome/graceful.py - │ ├── events/ (EventLoop) - │ └── rendering/ (render_event dispatcher) - ├── chat/harness.py (--chat mode) - │ └── chat/app.py - └── _colors.py - -rendering/registry.py - ├── rendering/read.py write.py edit.py ... bash.py sandbox.py bash_shim.py - ├── rendering/todo.py task_skill.py permissions.py events.py - └── rendering/utils.py cache.py - -events/base.py - └── events/sse_client.py - └── events/state_tracker.py - -findings/frontmatter.py - └── findings/create.py move.py listing.py evidence.py package.py + +Generic event renderers receive the whole event: + +```python +class EventRenderer: + event_types: tuple[str, ...] = () + + def render(self, event: dict[str, Any]) -> bool: + ... ``` ---- +Tool renderers receive the tool name and tool state extracted from a `tool_use` event: -## 5. Phased Implementation Plan +```python +class ToolRenderer: + tool_names: tuple[str, ...] = () -### Phase A — Extract Renderers (Lowest Risk) + def render(self, tool_name: str, state: dict[str, Any]) -> bool: + ... +``` -**Goal:** Move all rendering code out of `run-agent.py` into a new `tools/rendering/` package. +This keeps the boundary simple and close to the current implementation. + +### 5.2 Specific renderer classes + +Use one renderer class per event/tool family. Avoid a single giant renderer class. + +Examples: + +```text +Generic event renderers: +- ServerConnectedRenderer +- ServerHeartbeatRenderer +- MessageUpdatedRenderer +- TextEventRenderer +- ReasoningEventRenderer +- StepStartRenderer +- StepFinishRenderer +- ErrorEventRenderer +- SessionStatusRenderer +- SessionDiffRenderer +- SubagentStatusRenderer +- UnknownEventRenderer + +Tool renderers: +- TodoRenderer +- ReadRenderer +- WriteRenderer +- EditRenderer +- ApplyPatchRenderer +- GlobRenderer +- GrepRenderer +- CommandRenderer +- TaskRenderer +- SkillRenderer +- FallbackToolRenderer +``` -**Why first:** Renderers are pure functions with clear inputs (`Console`, `dict`) and outputs (`bool`). They have no side effects except writing to console. They are the easiest to extract and test in isolation. +### 5.3 Render context -**Steps:** +Renderers share a small runtime context: -1. **Create `tools/rendering/__init__.py`** — empty, acts as package marker. +```python +@dataclass +class RenderContext: + root: Path + sink: RenderSink + settings: RenderSettings + cache: SnapshotCache +``` -2. **Create `tools/rendering/utils.py`** — move these shared utilities: - - `_relativize_path()`, `_detect_lexer()`, `_count_lines_and_bytes()` - - `_compute_diff()`, `_truncate_diff()`, `_strip_line_numbers()` - - `_format_excerpt()`, `_is_likely_error()` - - `_strip_read_framing()`, `_classify_internal_read()`, `_current_mtime()` - - All parser regexes: `_READ_FILE_FRAMING_RE`, `_READ_DIR_FRAMING_RE`, `_READ_SUMMARY_RE`, `_LEXER_MAP`, etc. - - Add `ROOT` constant: `Path(__file__).resolve().parents[2]` +This replaces scattered globals such as `ROOT`, Rich/Textual destination checks, render tunables, and snapshot cache state. -3. **Create `tools/rendering/cache.py`** — move: - - `_SNAPSHOT_CACHE`, `_WRITE_CACHE_ENABLED`, `_SNAPSHOT_CACHE_CAP` - - `_cache_set()`, `_cache_get()`, `_cache_invalidate_stale()`, `_cache_reread()` - - Tunables that affect cache only: `CODECOME_WRITE_CACHE`, `CODECOME_WRITE_CACHE_CAP` +### 5.4 Render sinks: destination abstraction, not layout abstraction -4. **Extract renderers — one module at a time:** - - `tools/rendering/todo.py` — `render_todowrite_rich/plain`, `extract_todos`, `_todo_summary` - - `tools/rendering/permissions.py` — `render_permission_error_rich/plain` - - `tools/rendering/read.py` — `render_read_rich/plain` + helpers - - `tools/rendering/write.py` — `render_write_rich/plain` + helpers - - `tools/rendering/edit.py` — `render_edit_rich/plain` + helpers - - `tools/rendering/apply_patch.py` — `_ParsedFilePatch`, `_parse_apply_patch_envelope`, etc. - - `tools/rendering/glob.py` — `render_glob_rich/plain`, `_parse_glob_output` - - `tools/rendering/grep.py` — `render_grep_rich/plain`, `_grep_compile_pattern`, `_parse_grep_output` - - `tools/rendering/bash.py` — `render_bash_rich/plain` - - `tools/rendering/sandbox.py` — ALL sandbox-bootstrap code (~700 lines) - - `tools/rendering/bash_shim.py` — ALL bash-shim code (~500 lines), `_BashShim`, parsers - - `tools/rendering/task_skill.py` — `render_task_rich/plain`, `render_skill_rich/plain` - - `tools/rendering/events.py` — `render_step_start`, `render_text`, `render_reasoning`, `render_tool_use`, `render_step_finish`, `render_error`, `render_session_status`, `render_subagent_status`, `render_message_updated`, `render_server_connected`, `render_session_diff`, `render_unknown`, `render_event()` dispatcher, finish-reason constants, permission-error extractor +The sink abstracts where output is written. It should not restrict what renderers can draw. -5. **Create `tools/rendering/registry.py`** — move: - - `_dispatch_tool_renderer()` — becomes the tool dispatch registry - - All tunable env vars that affect rendering: `_READ_DISPLAY_LINES`, `_WRITE_CONTENT_LINES`, `_WRITE_DIFF_LIMIT`, `_EDIT_DIFF_LINES`, `_READ_HIGHLIGHT_LIMIT`, `_GLOB_MATCH_CAP`, `_APPLY_PATCH_DIFF_LINES`, `_APPLY_PATCH_MAX_FILES`, `_GREP_FILE_CAP`, `_GREP_LINE_CAP_PER_FILE`, `_GREP_TOTAL_LINE_CAP`, `_GREP_HIGHLIGHT`, `_REASONING_MAX_CHARS`, `_RENDER_REASONING`, `_DEBUG_UNKNOWN_EVENTS`, `_SANDBOX_RENDER`, `_SANDBOX_VALIDATE_STDERR_LINES`, `_SANDBOX_FILES_CAP`, `_BASH_SHIM_RENDER`, `_BASH_SHIM_LS_STRIP_LONG_FORMAT`, `_INTERNAL_READ_SUPPRESS`, `_SUBAGENT_HEARTBEAT_INTERVAL_S`, `_SUBAGENT_UPDATE_THROTTLE_S`, `_TASK_PROMPT_PREVIEW_LINES`, `_RENDER_SUBAGENT_UPDATES`, `_SUBAGENT_LAST_STATE` +Rendering must support three destinations: -6. **Update imports in `run-agent.py`** — replace all moved function definitions with imports from `rendering.*`. +1. **Plain**: basic text output, no Rich renderables, no ANSI dependency. +2. **Rich console**: normal terminal phase run using `rich.console.Console`. +3. **Textual chat**: chat mode writing Rich renderables into a Textual `RichLog` or thread-safe proxy. -7. **Verify:** Run `make tests` (no rendering changes expected, no breakage of event pipeline). +```python +class RenderSink(Protocol): + mode: Literal["plain", "rich", "textual"] -**Estimated lines moved:** ~2,800 lines out of `run-agent.py`. + def write(self, renderable: Any, *, expand: bool = True) -> None: + ... -**Rollback:** Revert `run-agent.py` and delete `tools/rendering/`. One `git checkout` per step. + def write_text(self, text: str) -> None: + ... +``` ---- +Implementations: -### Phase B — Extract Chat TUI (Low Risk) +```text +PlainSink + - writes plain strings to stdout + - used when Rich is unavailable or color/rich output is disabled -**Goal:** Move all chat-mode code out of `run-agent.py` into a new `tools/chat/` package. +RichConsoleSink + - wraps rich.console.Console + - renderers may write arbitrary Rich renderables: Panel, Group, Text, Table, Syntax, Rule, etc. -**Steps:** +TextualRichLogSink + - wraps a Textual RichLog or a thread-safe proxy + - renderers may write the same Rich renderables as RichConsoleSink +``` -1. **Create `tools/chat/__init__.py`** +Rich and Textual normally share the same `render_rich()` code path; only the sink differs. -2. **Create `tools/chat/app.py`** — move: - - `TextualConsoleProxy` class - - `_ChatApp` class (the Textual `App` subclass) - - `_QuitScreen` class (the `ModalScreen`) - - All `HAVE_RICH` guard at the module level, or keep inside the try/except ImportError block - - `ChatApp` and `QuitScreen` module-level aliases +### 5.5 Base renderer helpers -3. **Create `tools/chat/harness.py`** — move: - - `_run_chat_mode()` function - - Chat debug logging: `_CHAT_DEBUG_FP`, `_chat_debug()`, `_setup_chat_debug()`, `_close_chat_debug()` +A base renderer may provide convenience helpers, but should not impose a fixed layout such as “everything is a panel”. -4. **Update `run-agent.py`:** - - Replace all moved code with `from chat.harness import _run_chat_mode` - - Replace all moved code with `from chat.app import ChatApp, QuitScreen` - - Keep `_setup_chat_debug()` and `_close_chat_debug()` calls in `main()` but import them from `chat.harness` +```python +class BaseRenderer: + def __init__(self, context: RenderContext) -> None: + self.context = context -5. **Verify:** Run `make chat` (if Textual is installed) to confirm TUI still works. + @property + def sink(self) -> RenderSink: + return self.context.sink -**Estimated lines moved:** ~500 lines out of `run-agent.py`. + @property + def rich(self) -> bool: + return self.context.sink.mode in ("rich", "textual") ---- + @property + def plain(self) -> bool: + return self.context.sink.mode == "plain" +``` + +Individual renderers remain free to emit the Rich renderables that best fit their output. + +### 5.6 SnapshotCache + +Move file snapshot/diff state into a dedicated component: + +```python +class SnapshotCache: + def set(self, path: Path, content: str) -> None: ... + def get(self, path: Path) -> str | None: ... + def invalidate_stale(self) -> None: ... + def reread(self, path: Path) -> None: ... +``` + +Expected use: + +```text +ReadRenderer -> may populate cache +WriteRenderer -> may compare/update cache +EditRenderer -> may reread cache +ApplyPatchRenderer -> may reread cache +Search/render shims -> may invalidate stale entries before rendering +``` + +### 5.7 CommandExecutionInterceptor -### Phase C — Extract Shared Event Consumer Base (Medium Risk) +Command execution rendering is intentionally CodeCome-aware. Special rendering for `tools/sandbox-bootstrap.py --format json`, `make sandbox-*`, `rtk read`, `rtk grep`, `rg`, `ls`, `find`, or `tree` is product behavior, not accidental coupling. -**Goal:** Eliminate the ~250 lines of duplicated logic between `EventLoop` and `ChatEventLoop` by introducing a shared `BaseEventConsumer` class. +Model this as a command renderer plus an interceptor chain: -**Steps:** +```python +class CommandExecutionInterceptor(Protocol): + name: str + + def try_render( + self, + command: str, + state: dict[str, Any], + renderer: "CommandRenderer", + ) -> bool: + ... +``` -1. **Create `tools/events/base.py`** with: - ```python - class BaseEventConsumer: - """Shared SSE consumption logic for EventLoop and ChatEventLoop.""" +```python +class CommandRenderer(ToolRenderer): + tool_names = ("bash",) + + def render(self, tool_name: str, state: dict[str, Any]) -> bool: + command = self.extract_command(state) + for interceptor in self.interceptors: + if interceptor.try_render(command, state, self): + return True + return self.render_generic_command(state) +``` - def __init__(self, base_url, session_id, console, *, - auth_token=None, workspace_dir=None): - self.base_url = base_url.rstrip("/") - self.session_id = session_id - self.console = console - self.auth_token = auth_token - self.workspace_dir = workspace_dir - self._tracker = StateTracker() - self._seen_message_ids: set[str] = set() - self._emitted_signatures: set[tuple[str, str]] = set() +This keeps CodeCome-specific command knowledge organized and extensible without growing the main tool dispatcher. - # --- Shared (currently duplicated in both classes) --- +--- - def _get_headers(self) -> dict[str, str]: ... +## 6. Event Loop Design - @staticmethod - def _is_session_idle(event: dict[str, Any]) -> bool: ... +### 6.1 Class layout - def _belongs_to_session(self, event: dict[str, Any]) -> bool: ... +```text +events/ +├── base.py BaseEventLoop +├── phase_loop.py PhaseEventLoop +├── chat_loop.py ChatEventLoop +├── sse_client.py +└── state_tracker.py +``` - def _handle_permission(self, event: dict[str, Any]) -> None: ... +`BaseEventLoop` owns shared mechanics: - def _sync_session_messages(self) -> list[dict[str, Any]]: ... +- SSE client construction hooks, +- session filtering, +- permission auto-reject, +- session message sync, +- deduplication, +- finalized event emission, +- common HTTP headers. - def _dedup_and_emit(self, render_fn, finalized_events) -> None: ... - ``` +`PhaseEventLoop` owns phase-specific lifecycle: -2. **Refactor `EventLoop` to extend `BaseEventConsumer`:** - - Remove duplicated methods - - Keep: `run()`, `stop()`, `trigger_recovery_sync()`, `_update_result()`, `_build_result()` - - Call `self._handle_permission(event)` instead of local implementation - - Call `self._sync_session_messages()` instead of local implementation +- consume one session until idle, +- update and return `RunResult`, +- support recovery sync after reconnect, +- terminate when the phase attempt is complete. -3. **Refactor `ChatEventLoop` to extend `BaseEventConsumer`:** - - Remove duplicated methods - - Keep: `start_consumer()`, `send_prompt()`, `get_state()`, `stop()`, `_consumer_worker()` - - Replace `_emit_event()` call with `_dedup_and_emit()` +`ChatEventLoop` owns chat-specific lifecycle: -4. **Update `events/__init__.py`** — export `BaseEventConsumer` optionally for tests. +- long-lived consumer, +- multi-turn `send_prompt()`, +- stop semantics for the TUI, +- no single-attempt `RunResult` completion contract. -5. **Verify:** Run all phase targets (`make phase-1`, etc.) and `make chat` to confirm no regression. +### 6.2 Compatibility alias -**Estimated lines added:** ~80 in new `base.py`; ~120 lines removed from `EventLoop` and `ChatEventLoop` (net reduction ~40 lines, but code quality improvement). +Keep import compatibility during migration: ---- +```python +# events/__init__.py +from events.phase_loop import PhaseEventLoop -### Phase D — Restructure Core Runner (Medium Risk) - -**Goal:** Split `run-agent.py` into the `codecome/` package. This is the phase where `run-agent.py` is finally deleted. - -**Steps:** - -1. **Create `tools/codecome/__init__.py`** — empty package marker. - -2. **Create `tools/codecome/version.py`** — move: - - `check_opencode_version()`, `MINIMUM_OPENCODE_VERSION`, `parse_ver()` (inline helper) - -3. **Create `tools/codecome/config.py`** — move: - - `resolve_model_and_variant()`, `_extract_flag_value()`, `_read_codecome_yml_agent()` - - `_discover_opencode_default_model()`, `_probe_effective_model()`, `_scan_event_for_model()` - - `_extract_model_from_export()`, `_strip_probe_unsafe_flags()` - - `load_prompt()`, `_PHASE_NAMES`, `resolve_color_mode()`, `build_console()`, `truthy_env()` - - `_resolve_thinking_decision()`, `_thinking_default_for_provider()` - - `resolve_runtime_model_for_banner()` - - `_MODEL_FLAG_NAMES`, `_VARIANT_FLAG_NAMES` - - All model-related constants: `_DISCOVERY_TIMEOUT_S`, `_MODEL_PROBE_TIMEOUT_S` - -4. **Create `tools/codecome/session.py`** — move: - - `_create_session()`, `_create_chat_session()`, `_send_prompt_to_session()` - - `_get_headers()` (or import from `events.base` if extracted there) - -5. **Create `tools/codecome/runner.py`** — move: - - `_run_single_attempt()`, `_consume_events()` - - `_build_phase_resume_prompt()`, `_build_frontmatter_resume_prompt()` - - `_build_resume_command()`, `_emit_fatal_error()` - - `show_model_table()` - -6. **Create `tools/codecome/graceful.py`** — move: - - `check_phase_graceful_completion()` - - `_phase_checklist_lines()`, `_phase1_required_artifacts()` - - `_exploitation_status_looks_real()`, `_phase1_required_artifacts()`, `_path_is_fresh()`, `_iter_files()` - - `_PHASE1_REQUIRED_ARTIFACT_NAMES` - -7. **Create `tools/codecome/cli.py`** — move: - - `build_parser()`, `main()`, the `if __name__ == "__main__"` block - - `RUN_START_TIME`, `iteration_retry_count`, `frontmatter_retry_count` - - Signal handling code (`_forward_signal`, signal setup/teardown) - - Banner display code - - Exit status display code - -8. **Delete `tools/run-agent.py`** — all code has been moved. - -9. **Create `tools/run-agent.py` as a thin wrapper** for backward compatibility during the transition: - ```python - #!/usr/bin/env python3 - """Thin wrapper — delegates to codecome.cli.""" - import sys - from pathlib import Path - sys.path.insert(0, str(Path(__file__).resolve().parent)) - from codecome.cli import main - if __name__ == "__main__": - raise SystemExit(main()) - ``` - -10. **Update all references:** - - `Makefile`: Change `python tools/run-agent.py` → `python tools/run-agent.py` (still works via wrapper), OR change to `python -m codecome.cli`. Recommended: keep the wrapper for backward compatibility, add a `make` note about the new canonical path. - - `tools/run-sweep.py`: Already references `tools/run-agent.py` by path, no change needed. - - Tests: Update any test that imports from `run-agent` directly (check `tests/`). - -11. **Verify:** Run `make tests`, then `make phase-1` through `make phase-6` to confirm all phase targets work. - -**Estimated lines moved:** ~1,300 lines into 6 new modules. `run-agent.py` becomes a 15-line wrapper (or deleted entirely once all callers are updated). +EventLoop = PhaseEventLoop +``` --- -### Phase E — Consolidate Finding Tools (Low Risk) - -**Goal:** Merge 6 standalone scripts into a `findings/` package with shared frontmatter utilities, eliminating duplicated parsing code. - -**Steps:** - -1. **Create `tools/findings/__init__.py`** — empty package marker. - -2. **Create `tools/findings/frontmatter.py`** — shared utilities extracted from the 6 scripts: - ```python - ROOT = Path(__file__).resolve().parents[2] - FINDINGS_ROOT = ROOT / "itemdb" / "findings" - FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) - FINDING_ID_RE = re.compile(r"\bCC-(\d{4,})\b") - STATUSES = ["PENDING", "CONFIRMED", "EXPLOITED", "REJECTED", "DUPLICATE"] - - def load_frontmatter(path: Path) -> dict: ... - def find_finding(identifier: str) -> Path: ... - def iter_finding_files(status: Optional[str] = None) -> Iterable[Path]: ... - def collect_finding_ids(paths: Iterable[Path]) -> list[int]: ... - def next_finding_id() -> str: ... - def slugify(value: str) -> str: ... - def replace_scalar_frontmatter(content: str, key: str, value: str) -> str: ... - def replace_nested_value(content: str, key: str, value: str) -> str: ... - ``` - -3. **Refactor each script to use `findings/frontmatter.py`:** - - `tools/create-finding.py` → `tools/findings/create.py` - - `tools/move-finding.py` → `tools/findings/move.py` - - `tools/list-findings.py` → `tools/findings/listing.py` - - `tools/create-evidence.py` → `tools/findings/evidence.py` - - `tools/package-finding.py` → `tools/findings/package.py` - - `tools/render-report.py` → `tools/findings/render_report.py` - - `tools/render-index.py` → `tools/findings/render_index.py` - -4. **Keep thin wrappers** at the original paths for backward compatibility: - ```python - # tools/create-finding.py (thin wrapper) - import sys - from pathlib import Path - sys.path.insert(0, str(Path(__file__).resolve().parent)) - from findings.create import main - if __name__ == "__main__": - raise SystemExit(main()) - ``` - -5. **Update `Makefile` and `AGENTS.md`** references if they use absolute paths. - -6. **Verify:** Run all finding-management Makefile targets (if any) and manual invocations. +## 7. CodeCome Core Package Design + +### 7.1 `codecome/config.py` + +`codecome/config.py` is intentionally transversal. It should centralize CodeCome configuration resolution, but it must not contain execution logic. + +Allowed in `config.py`: + +- env helpers, +- cached `codecome.yml` loading, +- prompt extra configuration, +- model/variant resolution, +- thinking mode resolution, +- color/output mode resolution, +- render settings creation. + +Not allowed in `config.py`: + +- server start/stop, +- session creation, +- prompt submission, +- phase attempt loops, +- retry/autoresume loops, +- phase completion checks. + +### 7.2 Other core modules + +```text +codecome/version.py + OpenCode version checks. + +codecome/session.py + OpenCode HTTP API helpers: headers, create session, create chat session, send prompt. + +codecome/graceful.py + Phase completion checks, required artifact checks, resume prompt builders. + +codecome/transcript.py + Transcript path naming, opening, writing, closing helpers. + +codecome/runner.py + Phase execution attempt orchestration. + +codecome/cli.py + CLI parser, main(), startup banner, exit summary, signal handling. +``` + +`tools/run-agent.py` remains as a thin wrapper to `codecome.cli.main()`. --- -## 6. Dependency Resolution During Migration +## 8. Phased Implementation Plan + +## Epic A — Runner / Rendering / Events / Chat + +### Phase A1 — Extract stable core helpers + +**Goal:** Reduce `run-agent.py` before the larger rendering refactor. -### Phase A → Phase D ordering rationale +Create: +```text +tools/codecome/ +├── __init__.py +├── config.py +├── session.py +├── version.py +├── graceful.py +└── transcript.py ``` -Phase A (renderers) — No imports from codecome/ or chat/. Pure extraction. - ↓ -Phase B (chat TUI) — Imports from rendering/ (set up in Phase A) + events/. - ↓ -Phase C (events base) — Independent of codecome/chat/. Can run any time after A. - ↓ -Phase D (core runner) — Imports from rendering/, chat/, events/, codecome/config, - codecome/session, codecome/graceful. Everything else - must be in place first. - ↓ -Phase E (findings) — Completely independent. Can run any time. + +Move: + +- `check_opencode_version()` and version constants, +- `truthy_env()`, +- prompt loading and extra prompt config, +- model/variant/thinking resolution, +- color/output mode resolution, +- OpenCode session and prompt HTTP helpers, +- graceful phase completion helpers, +- resume prompt builders, +- transcript path/open/write helpers. + +Keep `run-agent.py` behavior unchanged. + +### Phase A2 — Introduce rendering foundation + +**Goal:** Add the rendering architecture without migrating all renderers at once. + +Create: + +```text +tools/rendering/ +├── __init__.py +├── context.py +├── settings.py +├── cache.py +├── sink.py +├── registry.py +├── events.py +├── tools/__init__.py +├── tools/base.py +├── command_interceptors/__init__.py +└── command_interceptors/base.py ``` -Phases A and B could be parallelized (different files, no conflicts). Phases C and E are independent and could run in parallel with A/B. Only Phase D has a hard dependency on A+B+C being complete. +Add: -### Import verification after each phase +- `RenderContext`, +- `RenderSettings`, +- `SnapshotCache`, +- `RenderSink` implementations, +- base `EventRenderer` and `ToolRenderer`, +- `RendererRegistry`, +- base `CommandExecutionInterceptor`. -After each phase, run: -```bash -python -c "from tools.rendering import registry" # Phase A -python -c "from tools.chat import harness" # Phase B -python -c "from tools.events.base import BaseEventConsumer" # Phase C -python -c "from tools.codecome.cli import main" # Phase D -python -c "from tools.findings.frontmatter import load_frontmatter" # Phase E +### Phase A3 — Migrate renderers incrementally + +**Goal:** Move and refactor renderers in small behavior-preserving batches. + +Suggested subphases: + +1. `TodoRenderer`, `TaskRenderer`, `SkillRenderer`, permission/error helpers. +2. `ReadRenderer`, `WriteRenderer`, `EditRenderer`, plus `SnapshotCache` integration. +3. `ApplyPatchRenderer`. +4. `GlobRenderer` and `GrepRenderer`. +5. `CommandRenderer` with generic command rendering. +6. `CommandExecutionInterceptor` implementations for sandbox-bootstrap, `rtk read`, `rtk grep`, and shell listing commands. +7. Generic event renderers in `rendering/events.py`. +8. Fallback tool/event renderers. + +At each step, `tools/run-agent.py` should still work. + +### Phase A4 — Extract Chat TUI + +**Goal:** Move chat code out of `run-agent.py` and make chat use the same rendering infrastructure. + +Create: + +```text +tools/chat/ +├── __init__.py +├── app.py +├── console_proxy.py +├── debug.py +└── harness.py ``` ---- +Move: + +- Textual app, +- quit modal, +- RichLog proxy, +- chat debug logging, +- `_run_chat_mode()`. + +Chat should use `TextualRichLogSink` or an equivalent proxy-compatible sink. + +### Phase A5 — Extract phase runner and CLI + +**Goal:** Leave `tools/run-agent.py` as a thin compatibility wrapper. + +Create: + +```text +tools/codecome/ +├── cli.py +└── runner.py +``` + +Move: + +- `build_parser()`, +- `main()`, +- phase execution loop, +- frontmatter repair loop, +- auto-resume loop, +- signal handling, +- final exit summaries. + +Replace `tools/run-agent.py` with: + +```python +#!/usr/bin/env python3 +from __future__ import annotations + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from codecome.cli import main + +if __name__ == "__main__": + raise SystemExit(main()) +``` -## 7. File Change Summary +### Phase A6 — Refactor events package -| Phase | Files created | Files modified | Files deleted | -|---|---|---|---| -| A | 15 (`rendering/` modules) | 1 (`run-agent.py` — imports only) | 0 | -| B | 3 (`chat/` modules) | 1 (`run-agent.py` — imports only) | 0 | -| C | 1 (`events/base.py`) | 3 (`events/__init__.py`, `chat_loop.py`, `emitters.py`) | 0 | -| D | 6 (`codecome/` modules) | 2 (Makefile, `run-sweep.py`) | 1 (`run-agent.py` → thin wrapper) | -| E | 8 (`findings/` modules) | 7 (old scripts → thin wrappers) | 0 (wrappers preserved) | +**Goal:** Share SSE/session/dedup/permission logic between phase and chat loops. -**Total new files:** 33 -**Total modified files:** 14 -**Total deleted:** 1 (original `run-agent.py` body, wrapper remains) +Create/refactor: + +```text +tools/events/ +├── base.py +├── phase_loop.py +├── chat_loop.py +├── __init__.py +``` + +Move shared logic to `BaseEventLoop` and keep compatibility alias `EventLoop = PhaseEventLoop`. + +### Phase A7 — Add tools architecture guide + +**Goal:** Prevent future changes from reintroducing a monolith. + +Create `tools/AGENTS.md` with these guidelines: + +- historical scripts are thin wrappers; +- core runner logic lives under `tools/codecome/`; +- `codecome/config.py` is transversal config only, not execution logic; +- event loops live under `tools/events/`; +- renderers live under `tools/rendering/`; +- renderers receive normalized dict events/tool states; +- Rich and Textual renderers can emit arbitrary Rich renderables through a sink; +- snapshot/diff state belongs in `SnapshotCache`; +- command-specific rendering is implemented through `CommandExecutionInterceptor`; +- finding/itemdb helpers live under `tools/findings/`. + +## Epic B — Findings / itemdb tooling + +### Phase B1 — Extract shared finding helpers + +Create: + +```text +tools/findings/ +├── __init__.py +└── frontmatter.py +``` + +Move shared helpers: + +- frontmatter loading/parsing, +- finding ID regex and lookup, +- status directory constants, +- iterating finding files, +- next finding ID, +- slug helpers, +- frontmatter scalar/nested replacement helpers. + +### Phase B2 — Move finding scripts behind wrappers + +Move implementation into: + +```text +tools/findings/ +├── create.py +├── move.py +├── listing.py +├── evidence.py +├── package.py +├── render_report.py +└── render_index.py +``` + +Keep historical scripts as thin wrappers. + +### Phase B3 — Update references + +Update Makefile/docs/AGENTS references only where needed. Prefer keeping existing CLI paths stable. --- -## 8. Risk Assessment +## 9. Dependency and Ordering Notes + +Recommended order: + +```text +A1 core helpers + ↓ +A2 rendering foundation + ↓ +A3 renderer migration + ↓ +A4 chat extraction + ↓ +A5 runner/CLI extraction + ↓ +A6 events base loop + ↓ +A7 tools/AGENTS.md +``` -| Risk | Probability | Impact | Mitigation | -|---|---|---|---| -| Import cycle during extraction | Medium | Compile-time error | Each extraction step verified with `python -c "import ..."` before proceeding | -| Event pipeline regression | Low | Phases produce wrong output | Full `make tests` after each phase; spot-check `make phase-1` | -| Chat TUI breakage | Low | Chat mode unusable | `make chat` smoke test after Phase B and D | -| Makefile target path change | Low | CI/CD breakage | Keep thin wrappers; update Makefile only in Phase D with backward compat | -| Global mutable state (caches, tunables) misrouted | Medium | Subtle rendering bugs | Renderer tunables moved as module-level constants; cache centralized in `rendering/cache.py` | -| Tests breaking on import path change | Medium | Test suite failures | Check `tests/` for direct imports from `run-agent`; update in Phase D | +Epic B is independent and can happen after Epic A or in parallel if done by a separate PR sequence. + +Why not extract renderers first? + +- The renderer refactor is important, but the current renderers depend on many globals and helpers. +- Extracting stable core helpers first reduces noise and lowers the risk of the renderer migration. +- Rendering should be refactored, not merely moved function-by-function. --- -## 9. Testing Strategy +## 10. Testing Strategy ### Pre-migration baseline + ```bash -make tests # Record baseline: all tests should pass -make phase-1 # Smoke test: should produce notes -make status # List findings +make tests +python tools/run-agent.py --show-model --agent recon ``` -### Per-phase verification -```bash -# After each phase: -make tests # Full test suite -python tools/run-agent.py --show-model --agent recon # Smoke test +If practical, also run a small/mock phase target or an existing recorded/mock OpenCode flow. + +### Per-phase checks -# After Phase D: -make phase-1 PHASE1_OPTS="--dry-run" # If dry-run support exists -make chat --dry-run # If possible +After each phase: + +```bash +python -m py_compile $(find tools -name '*.py' -not -path '*/.venv/*') +make tests +python tools/run-agent.py --show-model --agent recon ``` -### Post-migration full verification (Phase D) +Additional phase-specific checks: + ```bash -make tests # Must pass -make phase-1 # Full recon run -make phase-2 # Hypothesis generation -tools/gate-check.py 1 # Gate checks -tools/gate-check.py 2 +# After rendering changes +python tools/run-agent.py --show-model --agent recon --color never +python tools/run-agent.py --show-model --agent recon --color always + +# After chat extraction +# Run a chat smoke test if Textual is installed and a lightweight manual check is acceptable. + +# After events changes +# Run both phase mode and chat mode smoke tests. + +# After findings changes tools/create-finding.py "Test finding" tools/list-findings.py tools/move-finding.py CC-XXXX REJECTED -tools/render-report.py tools/render-index.py +tools/render-report.py ``` -### Specific test areas to add -- **`rendering/cache.py`** — standalone unit tests for cache set/get/invalidate -- **`rendering/registry.py`** — test that all tool names dispatch correctly -- **`events/base.py`** — test shared permission handling and session filtering -- **`findings/frontmatter.py`** — test load/save/replace operations +### Tests to add + +- `SnapshotCache`: set/get/invalidate/reread behavior. +- `RendererRegistry`: dispatch by event type and tool name. +- `CommandExecutionInterceptor`: command matching and fallback ordering. +- `RenderSink`: plain/rich/textual sink smoke behavior. +- `BaseEventLoop`: session filtering, idle detection, permission handling, dedup. +- `findings/frontmatter.py`: frontmatter parsing and finding lookup. + +--- + +## 11. Risk Assessment + +| Risk | Probability | Impact | Mitigation | +|---|---:|---:|---| +| Import cycles during extraction | Medium | High | Small PRs, py_compile after each phase, keep wrappers | +| Renderer behavior regression | Medium | Medium | Migrate renderer families incrementally, keep input dict contract | +| Rich/Textual behavior divergence | Medium | Medium | Use shared Rich render path with different sinks | +| Plain output degradation | Medium | Medium | PlainSink and explicit plain branches remain supported | +| Snapshot diff bugs | Medium | Medium | Isolate in SnapshotCache and add unit tests | +| Event loop regression | Medium | High | Delay BaseEventLoop until after runner/render split; add tests first where possible | +| Chat TUI freeze/regression | Medium | High | Preserve known Textual threading pattern, isolate sink/proxy changes carefully | +| Makefile/script path breakage | Low | High | Keep thin wrappers permanently | +| Findings migration affects reports | Medium | Medium | Move findings tools as separate epic with wrappers | --- -## 10. Open Questions +## 12. Open Questions -1. **Should renderers use a common base class?** The `_rich`/`_plain` duplication (~1,300 lines) could be unified with a `ToolRenderer` protocol/ABC and a `RenderMode` enum. This is a nice-to-have but adds complexity; defer to a follow-up plan. +1. **Should the renderer classes be instantiated once per run or recreated per event?** + Recommendation: instantiate once at startup with a shared `RenderContext`. -2. **Should the bash sub-renderer chain become a registered plugin system?** Currently hardcoded as `sandbox → bash_shim → generic`. A `ChainOfResponsibility` with a list of `BashInterceptor` callables would make adding new interceptors trivial. Defer. +2. **Should `TextualRichLogSink` wrap `RichLog` directly or the existing thread-safe proxy?** + Recommendation: wrap the existing thread-safe proxy initially to preserve the known-working Textual threading model. -3. **Should `codecome.yml` parsing get its own module?** Currently YAML parsing is scattered: `load_prompt()` parses `audit.extra_prompts`, `resolve_model_and_variant()` parses `agents.`, `codecome.py` parses `project.name`. A `config.py` module that caches the parsed config would eliminate repeated file reads. Defer to a follow-up. +3. **Should `emitters.py` survive after `RendererRegistry` exists?** + Recommendation: keep it until events refactor is complete; remove only if the final dependency direction no longer needs it. -4. **Should the thin wrappers be permanent?** Keeping `tools/run-agent.py`, `tools/create-finding.py`, etc. as thin wrappers preserves backward compatibility for any external scripts or muscle memory. They add negligible maintenance cost. Recommendation: keep them permanently. +4. **Should `tools/AGENTS.md` be created in the same PR as the plan or during implementation?** + Recommendation: create it early, ideally with the first implementation PR, so future agents follow the architecture while the migration is underway. -5. **Should `tools/` be renamed or restructured further?** The `tools/` directory mixes library code (packages) with standalone scripts. A Python-idiomatic structure would be a `src/` layout with a `pyproject.toml`, but that's a much larger change. Defer. +5. **Should findings/itemdb be moved before or after runner/rendering?** + Recommendation: treat as independent Epic B. It is useful, but should not block the `run-agent.py` decomposition. --- -## 11. References +## 13. References - [tool-renderers-plan.md](tool-renderers-plan.md) — original renderer design - [chat-mode-plan.md](chat-mode-plan.md) — chat TUI architecture -- [migrate-to-opencode-serve.md](migrate-to-opencode-serve.md) — server migration (prior major refactor) +- [migrate-to-opencode-serve.md](migrate-to-opencode-serve.md) — server migration - [sync-recovery-plan.md](sync-recovery-plan.md) — session sync after SSE reconnect - [todowrite-renderer-plan.md](todowrite-renderer-plan.md) — first per-tool renderer -- [apply-patch-renderer-plan.md](apply-patch-renderer-plan.md) — most complex renderer +- [apply-patch-renderer-plan.md](apply-patch-renderer-plan.md) — complex patch renderer - [internal-read-suppression-plan.md](internal-read-suppression-plan.md) — read display suppression -- [reasoning-and-error-renderers-plan.md](reasoning-and-error-renderers-plan.md) — reasoning panel design +- [reasoning-and-error-renderers-plan.md](reasoning-and-error-renderers-plan.md) — reasoning/error panel design - [discover-opencode-default-model-plan.md](discover-opencode-default-model-plan.md) — model resolution from DB - [restore-model-banner-plan.md](restore-model-banner-plan.md) — model banner display From 968a7b2e776c3e30ba21f134815a491570512d47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 23 May 2026 20:30:40 +0200 Subject: [PATCH 03/65] Add testing and acceptance gates to tools refactor plan --- .project/tools-refactor-plan.md | 272 +++++++++++++++++++++++++++----- 1 file changed, 234 insertions(+), 38 deletions(-) diff --git a/.project/tools-refactor-plan.md b/.project/tools-refactor-plan.md index 3e7ccf6..612955e 100644 --- a/.project/tools-refactor-plan.md +++ b/.project/tools-refactor-plan.md @@ -27,6 +27,7 @@ This plan keeps the useful inventory from the original draft but changes the exe 7. Introduce `PhaseEventLoop` and `ChatEventLoop`, sharing common SSE/session/dedup/permission logic through a base event loop. 8. Preserve compatibility through thin wrappers at historical script paths. 9. Add `tools/AGENTS.md` to document the architecture rules for future changes. +10. Add explicit unit, fixture, smoke, and acceptance gates so each migration step is verifiable. The goal is not only to move code out of `run-agent.py`, but to make future changes safer and easier to review. @@ -159,6 +160,10 @@ The current bash rendering path has special cases for CodeCome commands and shel Finding/itemdb CLI scripts should keep their stable entrypoints, but reusable logic belongs in a shared package. +### P9: Current tests are too broad for a safe structural refactor + +A single `make tests` gate is not enough for this migration. Rendering, event-loop behavior, command interceptors, wrapper compatibility, and finding helper behavior need focused tests and acceptance checks. + --- ## 4. Target Architecture @@ -823,74 +828,265 @@ Why not extract renderers first? ## 10. Testing Strategy -### Pre-migration baseline +The refactor must be validated with focused tests, not only by running the broad test suite. Each phase should add or update tests around the component being moved. + +### 10.1 Pre-migration baseline + +Before implementation starts, capture a known-good baseline: ```bash make tests +python tools/run-agent.py --help python tools/run-agent.py --show-model --agent recon ``` -If practical, also run a small/mock phase target or an existing recorded/mock OpenCode flow. +If practical, also run a small/mock phase target or an existing recorded/mock OpenCode flow and keep representative `tmp/last-phase-*.jsonl` snippets as fixtures. + +### 10.2 Unit tests to add + +#### Rendering + +Suggested layout: + +```text +tests/rendering/ +├── test_snapshot_cache.py +├── test_registry.py +├── test_sinks.py +├── test_read_renderer.py +├── test_write_renderer.py +├── test_apply_patch_renderer.py +├── test_grep_renderer.py +├── test_command_renderer.py +└── test_command_interceptors.py +``` + +Required coverage: + +| Component | Required tests | +|---|---| +| `SnapshotCache` | `set/get`, stale invalidation, reread existing file, reread missing file, capacity/LRU if implemented | +| `RendererRegistry` | dispatch by generic event type, dispatch by `tool_use` tool name, fallback for unknown event/tool | +| `PlainSink` | works without Rich, writes plain strings, does not require ANSI support | +| `RichConsoleSink` | accepts arbitrary Rich renderables such as `Text`, `Panel`, `Table`, `Group`, `Syntax` | +| `TextualRichLogSink` | delegates to a fake RichLog/proxy and preserves `expand` | +| `ReadRenderer` | file/directory framing, internal read suppression, cache population | +| `WriteRenderer` | new-file output, diff output with cached previous content, cache update | +| `EditRenderer` | old/new diff rendering and cache reread behavior | +| `ApplyPatchRenderer` | `*** Begin Patch` envelope, unified diff fallback, JSON patch list variant | +| `GlobRenderer` / `GrepRenderer` | file-list vs `path:line:text` mode, result caps, no-match output | +| `CommandRenderer` | interceptor ordering, fallback generic command rendering | +| `CommandExecutionInterceptor` | sandbox-bootstrap JSON, `rtk read`, `rtk grep`, `ls/find/tree` recognition | + +Renderer tests should not require a real terminal. Use fake/recording sinks, for example: + +```python +class RecordingSink: + mode = "rich" + + def __init__(self): + self.items = [] + + def write(self, renderable, *, expand=True): + self.items.append((renderable, expand)) + + def write_text(self, text): + self.items.append((text, True)) +``` + +#### Events -### Per-phase checks +Suggested layout: + +```text +tests/events/ +├── test_state_tracker.py +├── test_base_event_loop.py +├── test_phase_event_loop.py +└── test_chat_event_loop.py +``` + +Required coverage: + +| Component | Required tests | +|---|---| +| `StateTracker` | accumulate deltas, emit finalized text only on `time.end`, emit tool only on `completed`/`error`, ignore in-progress parts | +| `BaseEventLoop` | session filtering, idle detection, auth/workspace headers, permission auto-reject with mocked HTTP | +| Session sync | `GET /session/{id}/message` synthesizes unseen finalized events | +| Dedup | no double-rendering after sync/reconnect for the same `part.id`/message | +| `PhaseEventLoop` | consumes fixture stream until idle and returns expected `RunResult` | +| `ChatEventLoop` | supports long-lived multi-turn consumption and `stop()` semantics | -After each phase: +Mock `SseClient.events()` with deterministic event generators. Do not require a live OpenCode server for unit tests. + +#### CodeCome core + +Suggested layout: + +```text +tests/codecome/ +├── test_config.py +├── test_session.py +├── test_graceful.py +├── test_transcript.py +└── test_cli_smoke.py +``` + +Required coverage: + +| Component | Required tests | +|---|---| +| `config.py` | model precedence: `OPENCODE_ARGS` > env > `codecome.yml` > discovery | +| Prompt loading | finding placeholder replacement, extra prompts from yml/file/env, error when placeholder is required but missing | +| Thinking decision | Anthropic provider default, non-Anthropic default, env override, `--thinking` override | +| `session.py` | create-session payloads, prompt payloads, model provider/modelID split, variant handling, auth/workspace headers | +| `graceful.py` | phase 1 artifacts, phase 2 pending finding, phase 4 evidence, phase 5 exploited/not-feasible paths | +| `transcript.py` | stable naming, attempt counters, JSONL writing, no collision in normal use | + +#### Findings + +For Epic B: + +```text +tests/findings/ +├── test_frontmatter.py +├── test_finding_lookup.py +├── test_create.py +├── test_move.py +├── test_listing.py +└── test_package.py +``` + +Required coverage: + +| Component | Required tests | +|---|---| +| `load_frontmatter` | valid YAML, missing frontmatter, invalid YAML | +| finding lookup | by `CC-0001`, by filename, across status directories | +| next ID | ignores `.gitkeep`, computes next ID from existing findings | +| move | moves status directory and updates frontmatter status | +| wrappers | historical scripts still run `--help` successfully | + +### 10.3 Golden / fixture tests for rendering + +Add representative event fixtures so renderer behavior is checked directly: + +```text +tests/fixtures/rendering/ +├── read_file_event.json +├── write_file_event.json +├── apply_patch_event.json +├── grep_lines_event.json +├── sandbox_validate_bash_event.json +├── task_event.json +├── reasoning_event.json +└── expected/ + ├── read_file.plain.txt + ├── write_file.plain.txt + ├── apply_patch.plain.txt + └── ... +``` + +For plain mode, compare stable text output. + +For Rich/Textual, avoid fragile ANSI snapshots. Prefer one of: + +1. `RecordingSink` checks for renderable types and important text fragments. +2. `rich.console.Console(record=True, width=120)` with color disabled and stable exported text. +3. Key-string assertions: title, path, diff summary, status, error text, etc. + +Acceptance goal: + +```text +- plain mode output contains the same key information as before; +- rich mode emits structured Rich renderables instead of falling back to raw JSON; +- textual mode uses the same Rich rendering path as rich console, with a different sink. +``` + +### 10.4 CLI and wrapper compatibility tests + +Historical paths must keep working: ```bash -python -m py_compile $(find tools -name '*.py' -not -path '*/.venv/*') -make tests +python tools/run-agent.py --help python tools/run-agent.py --show-model --agent recon +python tools/create-finding.py --help +python tools/list-findings.py --help +python tools/move-finding.py --help +python tools/render-report.py --help +python tools/render-index.py --help ``` -Additional phase-specific checks: +After `run-agent.py` becomes a wrapper, explicitly verify: -```bash -# After rendering changes -python tools/run-agent.py --show-model --agent recon --color never -python tools/run-agent.py --show-model --agent recon --color always +```text +- `python tools/run-agent.py ...` works; +- `python -m codecome.cli ...` works if supported; +- Makefile targets still invoke a valid path; +- `tools/run-sweep.py` still works without modification or is updated in the same PR. +``` + +--- + +## 11. Acceptance Gates -# After chat extraction -# Run a chat smoke test if Textual is installed and a lightweight manual check is acceptable. +Each phase must define automated checks, smoke/manual checks, and acceptance criteria. A phase is not complete merely because `make tests` passes. -# After events changes -# Run both phase mode and chat mode smoke tests. +| Phase | Required automated checks | Smoke/manual checks | Acceptance criteria | +|---|---|---|---| +| A1 core helpers | `py_compile`, `make tests`, `test_config.py`, `test_session.py`, `test_graceful.py` | `python tools/run-agent.py --show-model --agent recon` | CLI behavior unchanged; model/prompt/session payload logic covered; no runner logic in `config.py` | +| A2 rendering foundation | `py_compile`, `make tests`, `test_sinks.py`, `test_registry.py`, `test_snapshot_cache.py` | import `rendering.*` modules | `RenderContext`, sinks, registry, settings, and cache exist and are tested; no renderer migration required yet | +| A3 renderer migration | renderer unit tests plus fixture/golden tests for each migrated family | `--color never` and `--color always` smoke runs | migrated renderers handle known fixture events; fallback still works; plain/rich/textual destinations remain supported | +| A4 chat extraction | `py_compile`, `make tests`, sink/proxy tests | manual `make chat` or equivalent Textual smoke test | chat imports cleanly; RichLog output path works; known Textual threading pattern preserved | +| A5 runner/CLI extraction | `py_compile`, `make tests`, CLI smoke tests | run key Makefile target or mock phase flow | `tools/run-agent.py` is a thin wrapper; exit codes, transcript naming, auto-resume, and frontmatter repair behavior remain compatible | +| A6 events refactor | `test_base_event_loop.py`, `test_phase_event_loop.py`, `test_chat_event_loop.py` | phase and chat smoke tests | shared logic lives in `BaseEventLoop`; phase loop returns correct `RunResult`; chat loop remains multi-turn and long-lived | +| A7 `tools/AGENTS.md` | docs lint if available | manual review | architecture rules documented: wrappers, config boundary, renderers, sinks, snapshot cache, event loops, command interceptors, findings | +| B1 findings helpers | `test_frontmatter.py`, `test_finding_lookup.py` | run helper imports | shared helpers cover parsing, lookup, status dirs, next ID, slug/replacement helpers | +| B2 findings wrappers | tests for migrated commands | wrapper `--help` smoke tests | old script paths still work; implementations live under `tools/findings/`; no duplicated frontmatter parser remains in migrated scripts | +| B3 references | `make tests`, docs/link checks if available | Makefile/manual command checks | Makefile/docs references are updated only where needed; stable CLI paths preserved | -# After findings changes -tools/create-finding.py "Test finding" -tools/list-findings.py -tools/move-finding.py CC-XXXX REJECTED -tools/render-index.py -tools/render-report.py +### Global acceptance after Epic A + +```text +- `tools/run-agent.py` has no substantial logic; it delegates to `codecome.cli`. +- Phase mode supports plain terminal and Rich terminal rendering. +- Chat mode uses Textual/RichLog and shares renderer classes where applicable. +- Event loops are separated as `PhaseEventLoop` and `ChatEventLoop`. +- Shared SSE/session/dedup/permission logic lives in `BaseEventLoop`. +- CodeCome-specific command rendering is represented as `CommandExecutionInterceptor` implementations. +- Snapshot/diff state is isolated in `SnapshotCache`. +- Existing Makefile targets and script paths still work. ``` -### Tests to add +### Global acceptance after Epic B -- `SnapshotCache`: set/get/invalidate/reread behavior. -- `RendererRegistry`: dispatch by event type and tool name. -- `CommandExecutionInterceptor`: command matching and fallback ordering. -- `RenderSink`: plain/rich/textual sink smoke behavior. -- `BaseEventLoop`: session filtering, idle detection, permission handling, dedup. -- `findings/frontmatter.py`: frontmatter parsing and finding lookup. +```text +- Finding/itemdb helpers are shared under `tools/findings/`. +- Historical scripts remain as wrappers. +- No duplicated frontmatter parser remains in migrated scripts. +- Reports and indexes are generated as before. +``` --- -## 11. Risk Assessment +## 12. Risk Assessment | Risk | Probability | Impact | Mitigation | |---|---:|---:|---| | Import cycles during extraction | Medium | High | Small PRs, py_compile after each phase, keep wrappers | -| Renderer behavior regression | Medium | Medium | Migrate renderer families incrementally, keep input dict contract | -| Rich/Textual behavior divergence | Medium | Medium | Use shared Rich render path with different sinks | -| Plain output degradation | Medium | Medium | PlainSink and explicit plain branches remain supported | +| Renderer behavior regression | Medium | Medium | Migrate renderer families incrementally, keep input dict contract, add fixture tests | +| Rich/Textual behavior divergence | Medium | Medium | Use shared Rich render path with different sinks; test sinks with fake outputs | +| Plain output degradation | Medium | Medium | PlainSink and explicit plain branches remain supported; plain golden tests | | Snapshot diff bugs | Medium | Medium | Isolate in SnapshotCache and add unit tests | -| Event loop regression | Medium | High | Delay BaseEventLoop until after runner/render split; add tests first where possible | -| Chat TUI freeze/regression | Medium | High | Preserve known Textual threading pattern, isolate sink/proxy changes carefully | -| Makefile/script path breakage | Low | High | Keep thin wrappers permanently | -| Findings migration affects reports | Medium | Medium | Move findings tools as separate epic with wrappers | +| Event loop regression | Medium | High | Delay BaseEventLoop until after runner/render split; add deterministic event-loop tests | +| Chat TUI freeze/regression | Medium | High | Preserve known Textual threading pattern, isolate sink/proxy changes carefully, manual smoke test | +| Makefile/script path breakage | Low | High | Keep thin wrappers permanently, add wrapper smoke tests | +| Findings migration affects reports | Medium | Medium | Move findings tools as separate epic with wrappers and itemdb fixture tests | +| False confidence from broad tests only | Medium | High | Require acceptance gates and focused tests per phase | --- -## 12. Open Questions +## 13. Open Questions 1. **Should the renderer classes be instantiated once per run or recreated per event?** Recommendation: instantiate once at startup with a shared `RenderContext`. @@ -909,7 +1105,7 @@ tools/render-report.py --- -## 13. References +## 14. References - [tool-renderers-plan.md](tool-renderers-plan.md) — original renderer design - [chat-mode-plan.md](chat-mode-plan.md) — chat TUI architecture From 811d14911d699ac5f21ced13d51ff0cc6da02af4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 24 May 2026 17:58:18 +0200 Subject: [PATCH 04/65] refactor(phase-a1): extract stable core helpers from run-agent.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create tools/codecome/ package with six modules extracted from the 5,876-line run-agent.py monolith: - codecome/version.py — OpenCode version checks - codecome/config.py — model/prompt/color resolution (self-contained, no _colors import) - codecome/session.py — HTTP session & prompt helpers - codecome/graceful.py — phase completion checks & resume prompts - codecome/transcript.py — transcript path/open/close (thread-safe counter) Also: - Expose _chat_render_and_log / _chat_update_modeline_info as standalone functions so tests work without Textual installed - Fix _cache_invalidate_stale to remove entries for deleted files - Update tests to load codecome.* modules directly - Add test_session.py, test_show_model_table, test_cache_invalidate_stale Tests: 282 passed, 0 failed, 0 errors --- tests/test_chat_mode.py | 23 +- tests/test_run_agent.py | 98 ++- tests/test_session.py | 176 ++++++ tools/codecome/__init__.py | 63 ++ tools/codecome/config.py | 501 ++++++++++++++++ tools/codecome/graceful.py | 254 ++++++++ tools/codecome/session.py | 127 ++++ tools/codecome/transcript.py | 59 ++ tools/codecome/version.py | 57 ++ tools/run-agent.py | 1091 +++------------------------------- 10 files changed, 1429 insertions(+), 1020 deletions(-) create mode 100644 tests/test_session.py create mode 100644 tools/codecome/__init__.py create mode 100644 tools/codecome/config.py create mode 100644 tools/codecome/graceful.py create mode 100644 tools/codecome/session.py create mode 100644 tools/codecome/transcript.py create mode 100644 tools/codecome/version.py diff --git a/tests/test_chat_mode.py b/tests/test_chat_mode.py index 3de0e3b..b70a5c8 100644 --- a/tests/test_chat_mode.py +++ b/tests/test_chat_mode.py @@ -439,7 +439,15 @@ def app_under_test(self): we can capture dispatcher calls. """ module = _load_run_agent_module() - app = module.ChatApp() + if module.ChatApp is not None: + app = module.ChatApp() + else: + # Textual not installed — use standalone functions on a + # plain object (parity guaranteed by delegation in _ChatApp). + app = type("FakeChatApp", (), {})() + app._render_and_log = module._chat_render_and_log.__get__(app, type(app)) + app._update_modeline_info = module._chat_update_modeline_info.__get__(app, type(app)) + app.post_message = MagicMock() return module, app def _make_args(self, debug=False): @@ -609,10 +617,15 @@ def test_transcript_path_pattern(self, tmp_path, monkeypatch): module = _load_run_agent_module() # Sandbox the ROOT/tmp directory by redirecting ROOT in the - # module. We use monkeypatch to swap module.ROOT for tmp_path - # so the transcript lands inside our pytest tmp_path. + # module and in codecome.transcript (open_chat_transcript uses its + # own ROOT). We use monkeypatch to swap both for tmp_path so the + # transcript lands inside our pytest tmp_path. monkeypatch.setattr(module, "ROOT", tmp_path) + # open_chat_transcript lives in codecome.transcript with its own ROOT. + import codecome.transcript as _transcript_mod + monkeypatch.setattr(_transcript_mod, "ROOT", tmp_path) + # Stub everything _run_chat_mode would otherwise call so we # exercise ONLY the transcript-path setup and the final summary. monkeypatch.setattr(module, "check_opencode_version", lambda: None) @@ -624,7 +637,7 @@ def test_transcript_path_pattern(self, tmp_path, monkeypatch): lambda agent, extra: ("opencode/test", None, "stub", "stub"), ) monkeypatch.setattr( - module, "_resolve_thinking_decision", lambda m, e: (False, "stub") + module, "resolve_thinking_decision", lambda m, e: (False, "stub") ) # Server / session creation: stub to return fake objects. @@ -634,7 +647,7 @@ def test_transcript_path_pattern(self, tmp_path, monkeypatch): fake_runner = MagicMock() fake_runner.start.return_value = fake_server monkeypatch.setattr(module, "ServerRunner", lambda: fake_runner) - monkeypatch.setattr(module, "_create_chat_session", + monkeypatch.setattr(module, "create_chat_session", lambda *a, **kw: "ses_abc") # The Textual app's run() is a no-op for this test (we just diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 9485a14..b953ab1 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -1,11 +1,21 @@ from __future__ import annotations +from collections import OrderedDict + import pytest from rich.console import Console from conftest import ROOT, load_tool_module +def _load_config_module(): + return load_tool_module("codecome_config", "tools/codecome/config.py") + + +def _load_graceful_module(): + return load_tool_module("codecome_graceful", "tools/codecome/graceful.py") + + FIXTURES = ROOT / "tests" / "fixtures" / "run_agent" @@ -21,14 +31,14 @@ ], ) def test_extract_model_from_export_matrix(fixture_name, expected): - module = load_tool_module("run_agent_matrix", "tools/run-agent.py") + module = _load_config_module() payload = (FIXTURES / fixture_name).read_text(encoding="utf-8") assert module._extract_model_from_export(payload) == expected @pytest.mark.unit def test_extract_flag_value_supports_both_flag_forms(): - module = load_tool_module("run_agent_flags", "tools/run-agent.py") + module = _load_config_module() tokens = ["--model=openai/gpt-5", "--variant", "high"] assert module._extract_flag_value(tokens, ("--model", "-m")) == "openai/gpt-5" assert module._extract_flag_value(tokens, ("--variant",)) == "high" @@ -36,7 +46,7 @@ def test_extract_flag_value_supports_both_flag_forms(): @pytest.mark.unit def test_strip_probe_unsafe_flags_removes_session_and_continue_flags(): - module = load_tool_module("run_agent_strip", "tools/run-agent.py") + module = _load_config_module() command = [ "opencode", "run", @@ -61,11 +71,12 @@ def test_strip_probe_unsafe_flags_removes_session_and_continue_flags(): @pytest.mark.unit def test_resolve_model_and_variant_precedence(monkeypatch): + config_module = _load_config_module() module = load_tool_module("run_agent_resolve", "tools/run-agent.py") monkeypatch.setenv("CODECOME_MODEL", "env/model") monkeypatch.setenv("CODECOME_MODEL_VARIANT", "max") - monkeypatch.setattr(module, "_read_codecome_yml_agent", lambda _agent: ("yaml/model", "yamlvar")) - monkeypatch.setattr(module, "_discover_opencode_default_model", lambda: "history/model") + monkeypatch.setattr(config_module, "_read_codecome_yml_agent", lambda _agent: ("yaml/model", "yamlvar")) + monkeypatch.setattr(config_module, "_discover_opencode_default_model", lambda: "history/model") model, variant, model_source, variant_source = module.resolve_model_and_variant( "auditor", ["--model", "args/model", "--variant=high"] @@ -77,7 +88,7 @@ def test_resolve_model_and_variant_precedence(monkeypatch): @pytest.mark.component def test_stream_model_scan_finds_nested_provider_model_pair(): - module = load_tool_module("run_agent_scan", "tools/run-agent.py") + module = _load_config_module() event = { "type": "tool_result", "part": { @@ -95,7 +106,7 @@ def test_stream_model_scan_finds_nested_provider_model_pair(): @pytest.mark.unit def test_thinking_default_is_disabled_for_anthropic_only(): - module = load_tool_module("run_agent_thinking_default", "tools/run-agent.py") + module = _load_config_module() assert module._thinking_default_for_provider("anthropic") is False assert module._thinking_default_for_provider("anthropic-foo") is False assert module._thinking_default_for_provider("openai") is True @@ -106,18 +117,43 @@ def test_thinking_default_is_disabled_for_anthropic_only(): def test_resolve_thinking_decision_precedence(monkeypatch): module = load_tool_module("run_agent_thinking_precedence", "tools/run-agent.py") - on, source = module._resolve_thinking_decision("anthropic/claude-opus-4-7", ["--thinking"]) + on, source = module.resolve_thinking_decision("anthropic/claude-opus-4-7", ["--thinking"]) assert (on, source) == (True, "user-args") monkeypatch.setenv("CODECOME_THINKING", "0") - on, source = module._resolve_thinking_decision("openai/gpt-5", []) + on, source = module.resolve_thinking_decision("openai/gpt-5", []) assert (on, source) == (False, "env") monkeypatch.setenv("CODECOME_THINKING", "1") - on, source = module._resolve_thinking_decision("anthropic/claude-opus-4-7", []) + on, source = module.resolve_thinking_decision("anthropic/claude-opus-4-7", []) assert (on, source) == (True, "env") +@pytest.mark.unit +def test_show_model_table_prints_resolution_sources(monkeypatch, capsys): + """show_model_table should emit a table with all resolution sources.""" + config_module = _load_config_module() + monkeypatch.setenv("OPENCODE_ARGS", "--model openai/gpt-5 --variant high") + monkeypatch.setenv("CODECOME_MODEL", "env/model") + monkeypatch.setenv("CODECOME_MODEL_VARIANT", "envvar") + monkeypatch.setattr(config_module, "_read_codecome_yml_agent", lambda _agent: ("yaml/model", "yamlvar")) + monkeypatch.setattr(config_module, "_discover_opencode_default_model", lambda: "history/model") + + rc = config_module.show_model_table("auditor") + assert rc == 0 + + out = capsys.readouterr().out + assert "Model resolution for agent auditor" in out + assert "OPENCODE_ARGS" in out + assert "env CODECOME_MODEL" in out + assert "codecome.yml" in out + assert "opencode session history" in out + assert "effective" in out + assert "openai/gpt-5" in out # args win + assert "high" in out + assert "thinking=" in out + + @pytest.mark.unit def test_parse_grep_output_detects_line_mode_and_file_mode(): module = load_tool_module("run_agent_grep_parse", "tools/run-agent.py") @@ -578,6 +614,40 @@ def test_parse_grep_output_70_percent_threshold_for_lines_mode(): assert mode == "files" +@pytest.mark.unit +def test_cache_invalidate_stale_removes_missing_and_modified(monkeypatch, tmp_path): + """_cache_invalidate_stale should remove entries for deleted files + and for files whose mtime changed since caching.""" + module = load_tool_module("run_agent_cache_stale", "tools/run-agent.py") + monkeypatch.setattr(module, "_WRITE_CACHE_ENABLED", True) + + # _SNAPSHOT_CACHE is an module-level OrderedDict; monkeypatch it per-test. + fake_cache = OrderedDict() + monkeypatch.setattr(module, "_SNAPSHOT_CACHE", fake_cache) + + existing = tmp_path / "existing.txt" + existing.write_text("old", encoding="utf-8") + deleted = tmp_path / "deleted.txt" + deleted.write_text("gone", encoding="utf-8") + + module._cache_set(str(existing), "old") + module._cache_set(str(deleted), "gone") + + assert str(existing) in fake_cache + assert str(deleted) in fake_cache + + # Simulate file deletion + deleted.unlink() + # Simulate modification of existing file + existing.write_text("new", encoding="utf-8") + + module._cache_invalidate_stale() + + # Deleted and modified entries are both removed + assert str(deleted) not in fake_cache + assert str(existing) not in fake_cache + + @pytest.mark.unit def test_read_renderer_caches_stripped_lines_instead_of_numbered(monkeypatch): module = load_tool_module("run_agent_read_cache_strip", "tools/run-agent.py") @@ -1294,20 +1364,20 @@ def test_parse_find_tree_tree_verb_no_name(): @pytest.fixture() def prompt_env(tmp_path, monkeypatch): """Set up an isolated environment for load_prompt tests.""" - module = load_tool_module("run_agent_prompt", "tools/run-agent.py") + config_module = _load_config_module() # Create a minimal prompt file. prompt_file = tmp_path / "prompt.md" prompt_file.write_text("# Phase prompt\n\nBase content.", encoding="utf-8") # Point ROOT at tmp_path so codecome.yml is found there. - monkeypatch.setattr(module, "ROOT", tmp_path) + monkeypatch.setattr(config_module, "ROOT", tmp_path) # Clear env vars by default. monkeypatch.delenv("PROMPT_EXTRA", raising=False) monkeypatch.delenv("PROMPT_EXTRA_FILE", raising=False) - return module, prompt_file, tmp_path + return config_module, prompt_file, tmp_path @pytest.mark.unit @@ -1620,7 +1690,7 @@ def test_check_phase_graceful_completion_mtime(monkeypatch, tmp_path): the current run (st_mtime >= run_start_time).""" import os - module = load_tool_module("run_agent_graceful_mtime", "tools/run-agent.py") + module = _load_graceful_module() monkeypatch.setattr(module, "ROOT", tmp_path) start = 1_000_000.0 diff --git a/tests/test_session.py b/tests/test_session.py new file mode 100644 index 0000000..18ececa --- /dev/null +++ b/tests/test_session.py @@ -0,0 +1,176 @@ +from __future__ import annotations + +import json +from unittest.mock import MagicMock, patch + +import pytest + +from conftest import ROOT, load_tool_module + + +def _load_session_module(): + return load_tool_module("codecome_session", "tools/codecome/session.py") + + +class TestGetHeaders: + def test_no_auth_no_workspace(self): + module = _load_session_module() + headers = module._get_headers(None, None) + assert headers == {"Content-Type": "application/json"} + + def test_with_auth_token(self): + module = _load_session_module() + headers = module._get_headers("secret123", None) + assert headers["Content-Type"] == "application/json" + assert headers["Authorization"].startswith("Basic ") + # Decode and verify + import base64 + decoded = base64.b64decode(headers["Authorization"].split(" ", 1)[1]).decode("utf-8") + assert decoded == "opencode:secret123" + + def test_with_workspace_dir(self): + module = _load_session_module() + headers = module._get_headers(None, "/workspace") + assert headers["x-opencode-directory"] == "/workspace" + + def test_with_both(self): + module = _load_session_module() + headers = module._get_headers("tok", "/ws") + assert "Authorization" in headers + assert headers["x-opencode-directory"] == "/ws" + + +class TestCreateSession: + @patch("urllib.request.urlopen") + def test_create_session_without_model(self, mock_urlopen): + module = _load_session_module() + mock_resp = MagicMock() + mock_resp.read.return_value = json.dumps({"id": "sess-abc"}).encode("utf-8") + mock_urlopen.return_value = mock_resp + + sid = module.create_session("http://localhost:8080", "1", "recon", None, None, None) + assert sid == "sess-abc" + + req = mock_urlopen.call_args[0][0] + assert req.full_url == "http://localhost:8080/session" + payload = json.loads(req.data) + assert payload["title"] == "CodeCome Phase 1" + assert payload["agent"] == "recon" + assert "model" not in payload + + @patch("urllib.request.urlopen") + def test_create_session_with_provider_model(self, mock_urlopen): + module = _load_session_module() + mock_resp = MagicMock() + mock_resp.read.return_value = json.dumps({"id": "sess-xyz"}).encode("utf-8") + mock_urlopen.return_value = mock_resp + + sid = module.create_session( + "http://localhost:8080", "2", "auditor", "openai/gpt-5", None, None + ) + assert sid == "sess-xyz" + + req = mock_urlopen.call_args[0][0] + payload = json.loads(req.data) + assert payload["model"] == {"providerID": "openai", "id": "gpt-5"} + + @patch("urllib.request.urlopen") + def test_create_session_with_bare_model(self, mock_urlopen): + module = _load_session_module() + mock_resp = MagicMock() + mock_resp.read.return_value = json.dumps({"id": "sess-bare"}).encode("utf-8") + mock_urlopen.return_value = mock_resp + + sid = module.create_session( + "http://localhost:8080", "3", "reviewer", "gpt-5", None, None + ) + assert sid == "sess-bare" + + req = mock_urlopen.call_args[0][0] + payload = json.loads(req.data) + assert payload["model"] == {"id": "gpt-5"} + + @patch("urllib.request.urlopen") + def test_create_session_empty_id_raises(self, mock_urlopen): + module = _load_session_module() + mock_resp = MagicMock() + mock_resp.read.return_value = json.dumps({"id": ""}).encode("utf-8") + mock_urlopen.return_value = mock_resp + + with pytest.raises(RuntimeError, match="empty session ID"): + module.create_session("http://localhost:8080", "1", "recon", None, None, None) + + +class TestCreateChatSession: + @patch("urllib.request.urlopen") + def test_chat_session_has_permission_deny(self, mock_urlopen): + module = _load_session_module() + mock_resp = MagicMock() + mock_resp.read.return_value = json.dumps({"id": "chat-1"}).encode("utf-8") + mock_urlopen.return_value = mock_resp + + sid = module.create_chat_session("http://localhost:8080", "auditor", None, None, None) + assert sid == "chat-1" + + req = mock_urlopen.call_args[0][0] + payload = json.loads(req.data) + assert payload["title"] == "CodeCome Chat" + assert "permission" in payload + assert len(payload["permission"]) == 3 + + +class TestSendPromptToSession: + @patch("urllib.request.urlopen") + def test_send_prompt_basic(self, mock_urlopen): + module = _load_session_module() + mock_urlopen.return_value = MagicMock() + + module.send_prompt_to_session( + "http://localhost:8080", "sess-1", "hello", "recon", None, None, None, None + ) + + req = mock_urlopen.call_args[0][0] + assert req.full_url == "http://localhost:8080/session/sess-1/prompt_async" + assert req.method == "POST" + payload = json.loads(req.data) + assert payload["parts"] == [{"type": "text", "text": "hello"}] + assert payload["agent"] == "recon" + + @patch("urllib.request.urlopen") + def test_send_prompt_with_model_and_variant(self, mock_urlopen): + module = _load_session_module() + mock_urlopen.return_value = MagicMock() + + module.send_prompt_to_session( + "http://localhost:8080", + "sess-1", + "hello", + "recon", + "anthropic/claude-opus-4", + "max", + None, + None, + ) + + req = mock_urlopen.call_args[0][0] + payload = json.loads(req.data) + assert payload["model"] == {"providerID": "anthropic", "modelID": "claude-opus-4"} + assert payload["variant"] == "max" + + @patch("urllib.request.urlopen") + def test_send_prompt_http_error_raises(self, mock_urlopen): + module = _load_session_module() + from urllib.error import HTTPError + + mock_urlopen.side_effect = HTTPError( + "http://localhost:8080/session/sess-1/prompt_async", + 500, + "Internal Server Error", + {}, + None, + ) + + with pytest.raises(RuntimeError, match="Failed to send prompt: HTTP 500"): + module.send_prompt_to_session( + "http://localhost:8080", "sess-1", "hello", "recon", None, None, None, None + ) diff --git a/tools/codecome/__init__.py b/tools/codecome/__init__.py new file mode 100644 index 0000000..020dc05 --- /dev/null +++ b/tools/codecome/__init__.py @@ -0,0 +1,63 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +CodeCome core package: config, session, runner, graceful, transcript, version. +""" + +from __future__ import annotations + +from codecome.config import ( + load_prompt, + resolve_color_mode, + resolve_model_and_variant, + resolve_runtime_model_for_banner, + resolve_thinking_decision, + show_model_table, + truthy_env, +) +from codecome.graceful import ( + build_frontmatter_resume_prompt, + build_phase_resume_prompt, + build_resume_command, + check_phase_graceful_completion, + phase_checklist_lines, +) +from codecome.session import ( + create_chat_session, + create_session, + send_prompt_to_session, +) +from codecome.transcript import ( + close_transcript, + open_chat_transcript, + open_phase_transcript, +) +from codecome.version import check_opencode_version + +__all__ = [ + # config + "truthy_env", + "resolve_color_mode", + "load_prompt", + "resolve_model_and_variant", + "resolve_runtime_model_for_banner", + "resolve_thinking_decision", + "show_model_table", + # session + "create_session", + "create_chat_session", + "send_prompt_to_session", + # graceful + "check_phase_graceful_completion", + "phase_checklist_lines", + "build_phase_resume_prompt", + "build_frontmatter_resume_prompt", + "build_resume_command", + # transcript + "open_phase_transcript", + "open_chat_transcript", + "close_transcript", + # version + "check_opencode_version", +] diff --git a/tools/codecome/config.py b/tools/codecome/config.py new file mode 100644 index 0000000..1555058 --- /dev/null +++ b/tools/codecome/config.py @@ -0,0 +1,501 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +CodeCome configuration resolution: env, codecome.yml, prompt, model, variant, thinking, +color/output mode, and render settings. + +This module is intentionally transversal (it reads from many configuration +sources) but must NOT contain execution logic (server start/stop, session +creation, prompt submission, phase loops, retry/resume). +""" + +from __future__ import annotations + +import json +import os +import re +import subprocess +import sys +from functools import lru_cache +from pathlib import Path +from typing import Any, Optional + +ROOT = Path(__file__).resolve().parents[2] + + +# Minimal inline color helpers to avoid importing _colors (which lives in the +# parent tools/ directory, not here). Only the attributes used by this module +# are replicated. +_COLOR_ENABLED = ( + sys.stdout.isatty() + and os.environ.get("NO_COLOR") is None + and os.environ.get("TERM") != "dumb" +) + +if _COLOR_ENABLED: + _RESET = "\033[0m" + _BOLD = "\033[1m" + _DIM = "\033[2m" +else: + _RESET = "" + _BOLD = "" + _DIM = "" + + +def _header(message: str) -> str: + return f"{_BOLD}{message}{_RESET}" + + +def truthy_env(name: str) -> bool: + value = os.environ.get(name) + return value is not None and value not in {"", "0", "false", "False", "no", "No"} + + +# --------------------------------------------------------------------------- +# Color / output mode +# --------------------------------------------------------------------------- + +def resolve_color_mode(flag: str) -> str: + if flag != "auto": + return flag + if truthy_env("CLICOLOR_FORCE"): + return "always" + if os.environ.get("NO_COLOR") is not None or os.environ.get("TERM") == "dumb": + return "never" + return "auto" + + +# --------------------------------------------------------------------------- +# Prompt loading +# --------------------------------------------------------------------------- + +_PHASE_NAMES = { + "1": "reconnaissance", + "2": "hypothesis_generation", + "3": "counter_analysis", + "4": "validation", + "5": "exploit_development", + "6": "reporting", +} + + +def load_prompt(prompt_file: Path, finding: str | None, phase: str | None = None) -> str: + prompt = prompt_file.read_text(encoding="utf-8") + if finding is not None: + placeholder = "FINDING_PATH_OR_ID" + if placeholder not in prompt: + raise ValueError(f"Prompt placeholder {placeholder!r} not found in {prompt_file}") + prompt = prompt.replace(placeholder, finding) + + extra_sections: list[tuple[str, str]] = [] + + # Source 1: codecome.yml audit.extra_prompts. + if phase is not None: + phase_name = _PHASE_NAMES.get(str(phase)) + if phase_name: + try: + import yaml # type: ignore + + config_path = ROOT / "codecome.yml" + if config_path.exists(): + cfg = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + ep = cfg.get("audit", {}).get("extra_prompts", {}) + yml_extra = ep.get(phase_name, "").strip() if isinstance(ep, dict) else "" + if yml_extra: + extra_sections.append(("From codecome.yml", yml_extra)) + except Exception: + pass + + # Source 2: PROMPT_EXTRA_FILE env var + extra_file = os.environ.get("PROMPT_EXTRA_FILE", "").strip() + if extra_file: + extra_path = Path(extra_file) + if not extra_path.is_absolute(): + extra_path = ROOT / extra_path + if extra_path.is_file(): + file_text = extra_path.read_text(encoding="utf-8").strip() + if file_text: + extra_sections.append((f"From {extra_file}", file_text)) + + # Source 3: PROMPT_EXTRA env var + extra_inline = os.environ.get("PROMPT_EXTRA", "").strip() + if extra_inline: + extra_sections.append(("Additional instructions", extra_inline)) + + if extra_sections: + prompt += "\n\n## Additional instructions\n" + for heading, body in extra_sections: + prompt += f"\n### {heading}\n\n{body}\n" + + return prompt + + +# --------------------------------------------------------------------------- +# Model resolution +# --------------------------------------------------------------------------- + +_MODEL_FLAG_NAMES = ("--model", "-m") +_VARIANT_FLAG_NAMES = ("--variant",) +_MODEL_BEARING_KEYS = ("modelID", "providerID", "model") +_DISCOVERY_TIMEOUT_S = float(os.environ.get("CODECOME_MODEL_DISCOVERY_TIMEOUT", "1.0")) +_MODEL_PROBE_TIMEOUT_S = float(os.environ.get("CODECOME_MODEL_PROBE_TIMEOUT", "20.0")) + + +def _extract_flag_value(tokens: list[str], flag_names: tuple[str, ...]) -> Optional[str]: + for i, tok in enumerate(tokens): + for flag in flag_names: + if tok == flag and i + 1 < len(tokens): + return tokens[i + 1] + prefix = flag + "=" + if tok.startswith(prefix): + return tok[len(prefix):] + return None + + +def _scan_event_for_model(payload: Any) -> Optional[str]: + """Recursively walk an event payload looking for a model identity.""" + if isinstance(payload, dict): + pid = payload.get("providerID") + model_field = payload.get("model") + mid = payload.get("modelID") or (model_field if isinstance(model_field, str) else None) + if isinstance(model_field, dict): + inner_pid = model_field.get("providerID") + inner_id = model_field.get("id") or model_field.get("modelID") + if inner_pid and inner_id: + return f"{inner_pid}/{inner_id}" + if inner_id: + return str(inner_id) + if pid and mid and isinstance(mid, str): + return f"{pid}/{mid}" + if isinstance(mid, str) and mid: + return mid + + for v in payload.values(): + found = _scan_event_for_model(v) + if found: + return found + return None + if isinstance(payload, list): + for item in payload: + found = _scan_event_for_model(item) + if found: + return found + return None + + +def _discover_opencode_default_model() -> Optional[str]: + """Best-effort: return the model used in the most recent opencode session.""" + worktree = str(ROOT) + + queries = [ + ( + "SELECT s.model FROM session s " + "JOIN project p ON s.project_id = p.id " + f"WHERE p.worktree = '{worktree}' AND s.model IS NOT NULL " + "ORDER BY s.time_updated DESC LIMIT 1" + ), + ( + "SELECT s.model FROM session s " + "WHERE s.model IS NOT NULL " + "ORDER BY s.time_updated DESC LIMIT 1" + ), + ] + + for query in queries: + try: + result = subprocess.run( + ["opencode", "db", query, "--format", "tsv"], + capture_output=True, + text=True, + timeout=_DISCOVERY_TIMEOUT_S, + ) + except (FileNotFoundError, subprocess.SubprocessError, OSError): + return None + if result.returncode != 0: + continue + + lines = [ln for ln in (result.stdout or "").splitlines() if ln.strip()] + if len(lines) < 2: + continue + raw = lines[-1] + try: + obj = json.loads(raw) + except json.JSONDecodeError: + return raw if raw and raw != "model" else None + + if isinstance(obj, dict): + mid = obj.get("id") or obj.get("modelID") + pid = obj.get("providerID") + if pid and mid: + return f"{pid}/{mid}" + if mid: + return str(mid) + + return None + + +def _extract_model_from_export(export_text: str) -> Optional[str]: + try: + payload = json.loads(export_text) + except json.JSONDecodeError: + return None + if isinstance(payload, dict): + found = _scan_event_for_model(payload) + if found: + return found + return None + + +def _strip_probe_unsafe_flags(command: list[str]) -> list[str]: + stripped: list[str] = [] + skip_next = False + value_flags = {"--session", "-s", "--title", "--attach", "--port", "-p"} + standalone_flags = {"--continue", "-c", "--fork", "--share"} + + for token in command: + if skip_next: + skip_next = False + continue + name = token.split("=", 1)[0] + if name in standalone_flags: + continue + if name in value_flags: + if "=" not in token: + skip_next = True + continue + stripped.append(token) + + return stripped + + +@lru_cache(maxsize=32) +def _probe_effective_model(probe_key: tuple[str, ...]) -> Optional[str]: + command = list(probe_key) + session_id: str | None = None + try: + result = subprocess.run( + command + ["Reply with exactly OK."], + cwd=ROOT, + capture_output=True, + text=True, + timeout=_MODEL_PROBE_TIMEOUT_S, + ) + if result.returncode != 0: + return None + + lines = [line.strip() for line in result.stdout.splitlines() if line.strip()] + if not lines: + return None + + first = json.loads(lines[0]) + if not isinstance(first, dict): + return None + session_id = first.get("sessionID") + if not isinstance(session_id, str) or not session_id: + return None + + exported = subprocess.run( + ["opencode", "export", session_id], + cwd=ROOT, + capture_output=True, + text=True, + timeout=_MODEL_PROBE_TIMEOUT_S, + ) + if exported.returncode != 0: + return None + return _extract_model_from_export(exported.stdout) + except (OSError, subprocess.SubprocessError, json.JSONDecodeError): + return None + finally: + if session_id: + try: + subprocess.run( + ["opencode", "session", "delete", session_id], + cwd=ROOT, + capture_output=True, + text=True, + timeout=5, + ) + except (OSError, subprocess.SubprocessError): + pass + + +def _read_codecome_yml_agent(agent_name: str) -> tuple[Optional[str], Optional[str]]: + config_path = ROOT / "codecome.yml" + if not config_path.exists(): + return None, None + try: + import yaml # type: ignore + except ImportError: + return None, None + try: + with config_path.open("r", encoding="utf-8") as fh: + data = yaml.safe_load(fh) or {} + except Exception: + return None, None + if not isinstance(data, dict): + return None, None + agents = data.get("agents") + if not isinstance(agents, dict): + return None, None + entry = agents.get(agent_name) + if not isinstance(entry, dict): + return None, None + model = entry.get("model") + variant = entry.get("variant") + return (str(model) if model else None, str(variant) if variant else None) + + +def resolve_model_and_variant( + agent_name: str, + opencode_args_tokens: list[str], + *, + discover_default: bool = True, +) -> tuple[Optional[str], Optional[str], str, str]: + model_from_args = _extract_flag_value(opencode_args_tokens, _MODEL_FLAG_NAMES) + variant_from_args = _extract_flag_value(opencode_args_tokens, _VARIANT_FLAG_NAMES) + + env_model = (os.environ.get("CODECOME_MODEL") or "").strip() or None + env_variant = (os.environ.get("CODECOME_MODEL_VARIANT") or "").strip() or None + + yaml_model, yaml_variant = _read_codecome_yml_agent(agent_name) + + if model_from_args: + model, model_source = model_from_args, "OPENCODE_ARGS" + elif env_model: + model, model_source = env_model, "env CODECOME_MODEL" + elif yaml_model: + model, model_source = yaml_model, "codecome.yml" + else: + discovered = _discover_opencode_default_model() if discover_default else None + if discovered: + model, model_source = discovered, "opencode session history" + else: + model, model_source = None, "(unknown)" + + if variant_from_args: + variant, variant_source = variant_from_args, "OPENCODE_ARGS" + elif env_variant: + variant, variant_source = env_variant, "env CODECOME_MODEL_VARIANT" + elif yaml_variant: + variant, variant_source = yaml_variant, "codecome.yml" + else: + variant, variant_source = None, "(unknown)" + + return model, variant, model_source, variant_source + + +def resolve_runtime_model_for_banner( + args_model: Optional[str], + args_variant: Optional[str], + model_source: str, + variant_source: str, + probe_command: list[str], +) -> tuple[Optional[str], Optional[str], str, str]: + """Prefer the actual runtime model over a historical guess. + + Env/YAML/CLI-pinned values remain authoritative. Falls back to a + throwaway probe session when the source is historical or unknown. + + Args: + args_model: Model string from CLI args (e.g. "openai/gpt-5"). + args_variant: Variant string from CLI args. + model_source: Where ``args_model`` came from (e.g. "OPENCODE_ARGS"). + variant_source: Where ``args_variant`` came from. + probe_command: The full ``sys.argv``-like token list that will be + executed for the probe. Unsafe flags such as ``--session``, + ``--continue``, ``--title``, ``--port`` are stripped internally + before the probe is run. + + Returns: + A 4-tuple of (model, variant, model_source, variant_source). When a + probe succeeds the model_source becomes "runtime probe". + """ + if model_source in {"OPENCODE_ARGS", "env CODECOME_MODEL", "codecome.yml"}: + return args_model, args_variant, model_source, variant_source + + probe_command_clean = _strip_probe_unsafe_flags(probe_command) + probed = _probe_effective_model(tuple(probe_command_clean)) + if probed: + return probed, args_variant, "runtime probe", variant_source + + return args_model, args_variant, model_source, variant_source + + +# --------------------------------------------------------------------------- +# Thinking mode resolution +# --------------------------------------------------------------------------- + +def _thinking_default_for_provider(provider_id: Optional[str]) -> bool: + if not provider_id: + return True + pid = provider_id.lower() + if pid.startswith("anthropic"): + return False + return True + + +def resolve_thinking_decision( + model: Optional[str], + extra_args: list[str], +) -> tuple[bool, str]: + if "--thinking" in extra_args: + return True, "user-args" + + raw = os.environ.get("CODECOME_THINKING") + if raw is not None: + if raw.strip() in ("0", "false", "False", "no", ""): + return False, "env" + return True, "env" + + provider_id = None + if model and "/" in model: + provider_id = model.split("/", 1)[0] + enabled = _thinking_default_for_provider(provider_id) + return enabled, "provider-default" + + +# --------------------------------------------------------------------------- +# Model resolution display (--show-model) +# --------------------------------------------------------------------------- + +def show_model_table(agent_name: str) -> int: + """Print the model-resolution table for an agent and exit.""" + import shlex + + extra_args = shlex.split(os.environ.get("OPENCODE_ARGS", "")) + + args_model = _extract_flag_value(extra_args, _MODEL_FLAG_NAMES) + args_variant = _extract_flag_value(extra_args, _VARIANT_FLAG_NAMES) + env_model = (os.environ.get("CODECOME_MODEL") or "").strip() or None + env_variant = (os.environ.get("CODECOME_MODEL_VARIANT") or "").strip() or None + yaml_model, yaml_variant = _read_codecome_yml_agent(agent_name) + discovered = _discover_opencode_default_model() + + model, variant, model_source, variant_source = resolve_model_and_variant( + agent_name, extra_args + ) + + def fmt(v: Optional[str]) -> str: + return v if v else "(not set)" + + print(_header(f"Model resolution for agent {agent_name}:")) + print() + print(f" {_DIM}OPENCODE_ARGS{_RESET} model={fmt(args_model)} variant={fmt(args_variant)}") + print(f" {_DIM}env CODECOME_MODEL{_RESET} model={fmt(env_model)}") + print(f" {_DIM}env CODECOME_MODEL_VARIANT{_RESET} variant={fmt(env_variant)}") + print(f" {_DIM}codecome.yml{_RESET} model={fmt(yaml_model)} variant={fmt(yaml_variant)}") + print(f" {_DIM}opencode session history{_RESET} model={fmt(discovered)}") + print(f" {_DIM}runtime probe{_RESET} not run by show-model") + print() + effective_model = model or "(unknown)" + effective_variant = variant or "(unknown)" + thinking_on, thinking_source = resolve_thinking_decision(model, extra_args) + print(f" {_BOLD}effective{_RESET} " + f"model={effective_model} variant={effective_variant} " + f"thinking={'on' if thinking_on else 'off'}") + print(f" {_DIM}sources{_RESET} " + f"model: {model_source} variant: {variant_source} " + f"thinking: {thinking_source}") + return 0 diff --git a/tools/codecome/graceful.py b/tools/codecome/graceful.py new file mode 100644 index 0000000..47fe3f5 --- /dev/null +++ b/tools/codecome/graceful.py @@ -0,0 +1,254 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Phase completion checks, required artifact checks, resume prompt builders. +""" + +from __future__ import annotations + +import re +from pathlib import Path +from typing import Any, Iterator + +ROOT = Path(__file__).resolve().parents[2] + +_FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) + +_PHASE1_REQUIRED_ARTIFACT_NAMES = [ + "target-profile.md", + "attack-surface.md", + "build-model.md", + "execution-model.md", + "trust-boundaries.md", + "data-flow.md", + "validation-model.md", + "interesting-files.md", + "file-risk-index.yml", + "security-assumptions.md", + "sandbox-plan.md", +] + + +def _phase1_required_artifacts() -> list[Path]: + notes_dir = ROOT / "itemdb" / "notes" + return [notes_dir / name for name in _PHASE1_REQUIRED_ARTIFACT_NAMES] + + +def _path_is_fresh(path: Path, run_start_time: float) -> bool: + return path.exists() and path.stat().st_mtime >= run_start_time + + +def _iter_files(root: Path) -> Iterator[Path]: + if not root.exists(): + return + for path in root.rglob("*"): + if path.is_file(): + yield path + + +def _load_finding_frontmatter(path: Path) -> dict[str, Any] | None: + try: + content = path.read_text(encoding="utf-8") + except OSError: + return None + m = _FRONTMATTER_RE.match(content) + if not m: + return None + try: + import yaml # type: ignore + data = yaml.safe_load(m.group(1)) + except Exception: + return None + return data if isinstance(data, dict) else None + + +def _exploitation_status_looks_real(frontmatter: dict[str, Any] | None) -> bool: + if not isinstance(frontmatter, dict): + return False + exploitation = frontmatter.get("exploitation") + if not isinstance(exploitation, dict): + return False + status = str(exploitation.get("status", "")).strip().lower() + return bool(status and status not in ("", "pending.", "todo.", "tbd.")) + + +def check_phase_graceful_completion(phase: str, finding: str | None, run_start_time: float) -> bool: + try: + if str(phase) == "1": + required_artifacts = _phase1_required_artifacts() + if all(path.exists() for path in required_artifacts): + fresh_required = any(_path_is_fresh(path, run_start_time) for path in required_artifacts) + sandbox_generated = ROOT / "sandbox" / "CODECOME-GENERATED.md" + sandbox_state_recorded = _path_is_fresh(sandbox_generated, run_start_time) or _path_is_fresh( + ROOT / "itemdb" / "notes" / "sandbox-plan.md", run_start_time + ) + return fresh_required and sandbox_state_recorded + return False + elif str(phase) in ("2", "sweep"): + pending_dir = ROOT / "itemdb" / "findings" / "PENDING" + if pending_dir.exists(): + return any(f.name.endswith(".md") and f.name != ".gitkeep" and f.stat().st_mtime >= run_start_time for f in pending_dir.iterdir()) + return False + elif str(phase) == "3": + findings_dir = ROOT / "itemdb" / "findings" + return any( + path.suffix == ".md" and path.name != ".gitkeep" and path.stat().st_mtime >= run_start_time + for path in _iter_files(findings_dir) + ) + elif str(phase) == "4" and finding: + evidence_dir = ROOT / "itemdb" / "evidence" / finding + return any(path.stat().st_mtime >= run_start_time for path in _iter_files(evidence_dir)) + elif str(phase) == "5" and finding: + exploited_file = ROOT / "itemdb" / "findings" / "EXPLOITED" / f"{finding}.md" + if ( + exploited_file.exists() + and exploited_file.stat().st_mtime >= run_start_time + ): + fm = _load_finding_frontmatter(exploited_file) + if ( + isinstance(fm, dict) + and fm.get("status") == "EXPLOITED" + and _exploitation_status_looks_real(fm) + ): + exploits_dir = ROOT / "itemdb" / "evidence" / finding / "exploits" + if any( + path.stat().st_mtime >= run_start_time + for path in _iter_files(exploits_dir) + ): + return True + + confirmed_file = ROOT / "itemdb" / "findings" / "CONFIRMED" / f"{finding}.md" + if ( + confirmed_file.exists() + and confirmed_file.stat().st_mtime >= run_start_time + ): + fm = _load_finding_frontmatter(confirmed_file) + if ( + isinstance(fm, dict) + and fm.get("status") == "CONFIRMED" + and isinstance(fm.get("exploitation"), dict) + and str(fm["exploitation"].get("status", "")).upper() + == "NOT_FEASIBLE" + ): + return True + + return False + elif str(phase) == "6": + reports_dir = ROOT / "itemdb" / "reports" + if reports_dir.exists(): + return any(f.name.endswith(".md") and f.name != ".gitkeep" and f.stat().st_mtime >= run_start_time for f in reports_dir.iterdir()) + return False + except Exception: + pass + return False + + +def phase_checklist_lines(phase: str, finding: str | None) -> list[str]: + if str(phase) == "1": + return [ + "Ensure all required Phase 1 notes exist under itemdb/notes/.", + "Ensure itemdb/notes/file-risk-index.yml is present and consistent with interesting-files.md.", + "Ensure itemdb/notes/sandbox-plan.md documents the Phase 1b outcome.", + "If sandbox bootstrap succeeded, ensure sandbox/CODECOME-GENERATED.md exists; otherwise document the halt clearly in sandbox-plan.md.", + ] + if str(phase) in ("2", "sweep"): + return [ + "Create or update precise findings under itemdb/findings/PENDING/.", + "Each finding must identify affected code, trust-boundary/source-to-sink reasoning, attackability, impact, validation plan, and counter-analysis placeholder.", + "Do not stop until the new or updated findings are durable on disk.", + ] + if str(phase) == "3": + return [ + "Review all candidate findings under itemdb/findings/PENDING/.", + "Move clearly invalid findings to REJECTED and duplicates to DUPLICATE.", + "Leave surviving findings reviewable, deduplicated, and updated with counter-analysis.", + ] + if str(phase) == "4": + finding_ref = finding or "" + return [ + f"Ensure validation evidence exists under itemdb/evidence/{finding_ref}/, including README.md.", + "Update the finding with validation results and move it to the correct status directory if needed.", + "Do not stop until the evidence and finding status are consistent.", + ] + if str(phase) == "5": + finding_ref = finding or "" + return [ + f"If exploitation succeeds, ensure itemdb/evidence/{finding_ref}/exploits/ contains the exploit artifacts and exploits/README.md.", + "If exploitation is not feasible, keep the finding in CONFIRMED and update its exploitation.status to NOT_FEASIBLE with a clear explanation.", + "Do not stop until the exploit artifacts or the NOT_FEASIBLE documentation are durable and consistent.", + ] + if str(phase) == "6": + return [ + "Ensure the report output under itemdb/reports/ is written and reviewable.", + "Include the required summary sections and evidence references for exploited and confirmed findings.", + "Do not stop until the report artifacts are durable on disk.", + ] + return ["Finish the remaining required work for the current phase before ending."] + + +def build_phase_resume_prompt( + phase: str, + finding: str | None, + reason: str, + step_finish_count: int, +) -> str: + checklist = "\n".join(f"- {line}" for line in phase_checklist_lines(phase, finding)) + return ( + "Your previous response was cut off by the model/provider before you produced a final completion signal.\n\n" + f"Observed finish reason: {reason}.\n" + f"Completed loops before cutoff: {step_finish_count}.\n\n" + "Treat your prior work as partial. First, briefly reassess what remains unfinished for this phase. " + "Then complete only the remaining required work. Do not restart from scratch unless necessary.\n\n" + f"Phase {phase} completion checklist:\n" + f"{checklist}\n\n" + "Before ending, verify that the required durable artifacts for this phase exist, are updated, and are internally consistent." + ) + + +def build_frontmatter_resume_prompt(phase: str, finding: str | None, validation_output: str) -> str: + checklist = "\n".join(f"- {line}" for line in phase_checklist_lines(phase, finding)) + return ( + "Your previous run produced files that failed local validation.\n\n" + "Validation errors:\n" + f"{validation_output}\n\n" + "Repair only the reported YAML/frontmatter issues with minimal changes. Do not redo unrelated analysis.\n\n" + f"Phase {phase} completion checklist:\n" + f"{checklist}\n\n" + "After fixing the validation errors, ensure the affected files remain in the correct status/location and are internally consistent." + ) + + +def build_resume_command(initial_command: list[str], session_id: str, prompt: str) -> list[str]: + """Preserve connection/runtime flags needed to reach the original session.""" + resume = ["opencode", "run"] + pending_passthrough_value = False + passthrough_value_flags = {"--attach", "--port", "-p"} + passthrough_standalone_flags = {"--thinking"} + drop_value_flags = {"--agent", "--model", "-m", "--variant", "--session", "-s", "--format"} + drop_standalone_flags = {"--continue", "-c", "--fork"} + + for token in initial_command[2:]: + if pending_passthrough_value: + resume.append(token) + pending_passthrough_value = False + continue + + name, has_equals, _ = token.partition("=") + if name in drop_standalone_flags: + continue + if name in drop_value_flags: + if not has_equals: + pending_passthrough_value = False + continue + if name in passthrough_standalone_flags: + resume.append(token) + continue + if name in passthrough_value_flags: + resume.append(token) + if not has_equals: + pending_passthrough_value = True + continue + + resume.extend(["--session", session_id, "--format", "json", prompt]) + return resume diff --git a/tools/codecome/session.py b/tools/codecome/session.py new file mode 100644 index 0000000..0a48e3f --- /dev/null +++ b/tools/codecome/session.py @@ -0,0 +1,127 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +OpenCode HTTP API helpers: auth headers, create session, create chat session, +send prompt. +""" + +from __future__ import annotations + +import json +import urllib.error +import urllib.request +from typing import Any + + +def _get_headers(auth_token: str | None, workspace_dir: str | None) -> dict[str, str]: + headers = {"Content-Type": "application/json"} + if auth_token: + import base64 + encoded = base64.b64encode(f"opencode:{auth_token}".encode("utf-8")).decode("utf-8") + headers["Authorization"] = f"Basic {encoded}" + if workspace_dir: + headers["x-opencode-directory"] = workspace_dir + return headers + + +def send_prompt_to_session( + base_url: str, + session_id: str, + prompt: str, + agent: str, + model: str | None, + variant: str | None, + auth_token: str | None, + workspace_dir: str | None, +) -> None: + url = f"{base_url}/session/{session_id}/prompt_async" + payload: dict[str, Any] = { + "parts": [{"type": "text", "text": prompt}], + "agent": agent, + } + if model: + parts = model.split("/", 1) + if len(parts) == 2: + payload["model"] = {"providerID": parts[0], "modelID": parts[1]} + else: + payload["model"] = {"modelID": model} + if variant: + payload["variant"] = variant + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request( + url, + data=data, + headers=_get_headers(auth_token, workspace_dir), + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=30.0) as resp: + pass # 204 expected + except urllib.error.HTTPError as exc: + raise RuntimeError(f"Failed to send prompt: HTTP {exc.code}") from exc + + +def create_session( + base_url: str, + phase: str, + agent: str, + model: str | None, + auth_token: str | None, + workspace_dir: str | None, +) -> str: + payload: dict[str, Any] = {"title": f"CodeCome Phase {phase}", "agent": agent} + if model: + parts = model.split("/", 1) + if len(parts) == 2: + payload["model"] = {"providerID": parts[0], "id": parts[1]} + else: + payload["model"] = {"id": model} + req = urllib.request.Request( + f"{base_url}/session", + data=json.dumps(payload).encode("utf-8"), + headers=_get_headers(auth_token, workspace_dir), + method="POST", + ) + resp = urllib.request.urlopen(req, timeout=10.0) + data = json.loads(resp.read().decode("utf-8")) + sid = str(data.get("id", "")) + if not sid: + raise RuntimeError("Server returned empty session ID") + return sid + + +def create_chat_session( + base_url: str, + agent: str, + model: str | None, + auth_token: str | None, + workspace_dir: str | None, +) -> str: + payload: dict[str, Any] = { + "title": "CodeCome Chat", + "agent": agent, + "permission": [ + {"permission": "question", "action": "deny", "pattern": "*"}, + {"permission": "plan_enter", "action": "deny", "pattern": "*"}, + {"permission": "plan_exit", "action": "deny", "pattern": "*"}, + ], + } + if model: + parts = model.split("/", 1) + if len(parts) == 2: + payload["model"] = {"providerID": parts[0], "id": parts[1]} + else: + payload["model"] = {"id": model} + req = urllib.request.Request( + f"{base_url}/session", + data=json.dumps(payload).encode("utf-8"), + headers=_get_headers(auth_token, workspace_dir), + method="POST", + ) + resp = urllib.request.urlopen(req, timeout=10.0) + data = json.loads(resp.read().decode("utf-8")) + sid = str(data.get("id", "")) + if not sid: + raise RuntimeError("Server returned empty session ID") + return sid diff --git a/tools/codecome/transcript.py b/tools/codecome/transcript.py new file mode 100644 index 0000000..e2fc4f4 --- /dev/null +++ b/tools/codecome/transcript.py @@ -0,0 +1,59 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Transcript path naming, opening, writing, and closing helpers. +""" + +from __future__ import annotations + +import os +import threading +import time +from pathlib import Path +from typing import IO, Any + +ROOT = Path(__file__).resolve().parents[2] + +_LOCK = threading.Lock() +_ATTEMPT_COUNTER: dict[str, int] = {} + + +def _transcript_dir() -> Path: + d = ROOT / "tmp" + d.mkdir(parents=True, exist_ok=True) + return d + + +def open_phase_transcript(phase: str, finding: str | None) -> tuple[Path, IO[str] | None]: + finding_tag = (finding or "no-finding").replace("/", "_") + key = f"{phase}-{finding_tag}" + + with _LOCK: + counter = _ATTEMPT_COUNTER.get(key, 1) + _ATTEMPT_COUNTER[key] = counter + 1 + + path = _transcript_dir() / f"last-phase-{phase}-{finding_tag}-attempt-{counter}.jsonl" + try: + return path, path.open("w", encoding="utf-8") + except OSError: + return path, None + + +def open_chat_transcript() -> tuple[Path, IO[str] | None]: + stamp = time.strftime("%Y%m%d-%H%M%S") + path = _transcript_dir() / f"last-chat-{stamp}-pid{os.getpid()}.jsonl" + try: + return path, path.open("w", encoding="utf-8", buffering=1) + except OSError: + return path, None + + +def close_transcript(fp: IO[str] | None) -> None: + if fp is None: + return + try: + fp.flush() + fp.close() + except OSError: + pass diff --git a/tools/codecome/version.py b/tools/codecome/version.py new file mode 100644 index 0000000..e8a7bb4 --- /dev/null +++ b/tools/codecome/version.py @@ -0,0 +1,57 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +OpenCode version checks. +""" + +from __future__ import annotations + +import re +import subprocess +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[2] + +MINIMUM_OPENCODE_VERSION = "1.14.50" + + +def check_opencode_version() -> None: + try: + result = subprocess.run(["opencode", "--version"], capture_output=True, text=True) + except FileNotFoundError: + print(_fail("OpenCode is not installed or not in PATH."), file=sys.stderr) + sys.exit(1) + + if result.returncode != 0: + print(_fail(f"Failed to check OpenCode version (exit code {result.returncode})."), file=sys.stderr) + sys.exit(1) + + version_str = result.stdout.strip().split()[-1] + + def parse_ver(v: str) -> tuple[int, ...]: + match = re.search(r"^v?(\d+)(?:\.(\d+))?(?:\.(\d+))?", v) + if match: + return tuple(int(x) for x in match.groups() if x is not None) + return (0,) + + actual = parse_ver(version_str) + required = parse_ver(MINIMUM_OPENCODE_VERSION) + + if actual < required: + print(_fail(f"OpenCode version is too old: found {version_str}, require >= {MINIMUM_OPENCODE_VERSION}"), file=sys.stderr) + sys.exit(1) + + +# Minimal inline color helpers to avoid importing _colors (which lives in +# the parent tools/ directory, not here). +def _fail(message: str) -> str: + if sys.stdout.isatty() and not _no_color(): + return f"\033[31m\u2718\033[0m {message}" + return f"[FAIL] {message}" + + +def _no_color() -> bool: + import os + return os.environ.get("NO_COLOR") is not None diff --git a/tools/run-agent.py b/tools/run-agent.py index 0cb9aa8..3ee6d37 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -36,6 +36,19 @@ import _colors as C from opencode.serve import ServerRunner, ServerRunnerError from events import EventLoop, RunResult +from codecome.version import check_opencode_version, MINIMUM_OPENCODE_VERSION as _MINIMUM_OPENCODE_VERSION +from codecome.config import ( + truthy_env, resolve_color_mode, load_prompt, + resolve_model_and_variant, resolve_runtime_model_for_banner, + resolve_thinking_decision, show_model_table, +) +from codecome.session import create_session, create_chat_session, send_prompt_to_session +from codecome.graceful import ( + check_phase_graceful_completion, + phase_checklist_lines, build_phase_resume_prompt, + build_frontmatter_resume_prompt, build_resume_command, +) +from codecome.transcript import open_phase_transcript, open_chat_transcript, close_transcript try: from rich.console import Console, Group @@ -57,7 +70,6 @@ HAVE_RICH = False ROOT = Path(__file__).resolve().parents[1] -MINIMUM_OPENCODE_VERSION = "1.14.50" # --------------------------------------------------------------------------- # Chat debug logging (--debug with --chat writes to tmp/chat-debug-.log) @@ -101,353 +113,6 @@ def _close_chat_debug() -> None: _CHAT_DEBUG_FP = None -def check_opencode_version() -> None: - try: - result = subprocess.run(["opencode", "--version"], capture_output=True, text=True) - except FileNotFoundError: - print(C.fail("OpenCode is not installed or not in PATH."), file=sys.stderr) - sys.exit(1) - - if result.returncode != 0: - print(C.fail(f"Failed to check OpenCode version (exit code {result.returncode})."), file=sys.stderr) - sys.exit(1) - - version_str = result.stdout.strip().split()[-1] - - def parse_ver(v: str) -> tuple[int, ...]: - match = re.search(r"^v?(\d+)(?:\.(\d+))?(?:\.(\d+))?", v) - if match: - return tuple(int(x) for x in match.groups() if x is not None) - return (0,) - - actual = parse_ver(version_str) - required = parse_ver(MINIMUM_OPENCODE_VERSION) - - if actual < required: - print(C.fail(f"OpenCode version is too old: found {version_str}, require >= {MINIMUM_OPENCODE_VERSION}"), file=sys.stderr) - sys.exit(1) - - -def truthy_env(name: str) -> bool: - value = os.environ.get(name) - return value is not None and value not in {"", "0", "false", "False", "no", "No"} - - -# --- Stream-based late model discovery --------------------------------------- - -_MODEL_BEARING_KEYS = ("modelID", "providerID", "model") - - -def _scan_event_for_model(payload: Any) -> Optional[str]: - """Recursively walk an event payload looking for a model identity. - - Returns a 'providerID/modelID' string if both are found in the - same dict, else just the value of the first useful key found, or - None. - """ - if isinstance(payload, dict): - # Same-dict providerID + modelID combo wins. - pid = payload.get("providerID") - model_field = payload.get("model") - mid = payload.get("modelID") or (model_field if isinstance(model_field, str) else None) - if isinstance(model_field, dict): - inner_pid = model_field.get("providerID") - inner_id = model_field.get("id") or model_field.get("modelID") - if inner_pid and inner_id: - return f"{inner_pid}/{inner_id}" - if inner_id: - return str(inner_id) - if pid and mid and isinstance(mid, str): - return f"{pid}/{mid}" - if isinstance(mid, str) and mid: - return mid - - for v in payload.values(): - found = _scan_event_for_model(v) - if found: - return found - return None - if isinstance(payload, list): - for item in payload: - found = _scan_event_for_model(item) - if found: - return found - return None - - -# --- Model resolution --------------------------------------------------------- - -_MODEL_FLAG_NAMES = ("--model", "-m") -_VARIANT_FLAG_NAMES = ("--variant",) - - -def _extract_flag_value(tokens: list[str], flag_names: tuple[str, ...]) -> Optional[str]: - """Return the value of the first matching flag in tokens, or None. - - Supports both `--flag value` and `--flag=value` forms. - """ - for i, tok in enumerate(tokens): - for flag in flag_names: - if tok == flag and i + 1 < len(tokens): - return tokens[i + 1] - prefix = flag + "=" - if tok.startswith(prefix): - return tok[len(prefix):] - return None - - -_DISCOVERY_TIMEOUT_S = float(os.environ.get("CODECOME_MODEL_DISCOVERY_TIMEOUT", "1.0")) -_MODEL_PROBE_TIMEOUT_S = float(os.environ.get("CODECOME_MODEL_PROBE_TIMEOUT", "20.0")) - - -def _discover_opencode_default_model() -> Optional[str]: - """Best-effort: return the model used in the most recent opencode - session for this project's worktree, or None. - - Implementation: query the opencode SQLite DB via `opencode db`, - asking for the latest session.model JSON for this worktree; - fall back to the latest session globally. - - Honors a 1-second timeout. Errors are silently ignored. - """ - worktree = str(ROOT) - - queries = [ - # Project-scoped first. - ( - "SELECT s.model FROM session s " - "JOIN project p ON s.project_id = p.id " - f"WHERE p.worktree = '{worktree}' AND s.model IS NOT NULL " - "ORDER BY s.time_updated DESC LIMIT 1" - ), - # Global fallback. - ( - "SELECT s.model FROM session s " - "WHERE s.model IS NOT NULL " - "ORDER BY s.time_updated DESC LIMIT 1" - ), - ] - - for query in queries: - try: - result = subprocess.run( - ["opencode", "db", query, "--format", "tsv"], - capture_output=True, - text=True, - timeout=_DISCOVERY_TIMEOUT_S, - ) - except (FileNotFoundError, subprocess.SubprocessError, OSError): - return None - if result.returncode != 0: - continue - - # Output looks like: - # model - # {"id":"gpt-5.4","providerID":"github-copilot"} - lines = [ln for ln in (result.stdout or "").splitlines() if ln.strip()] - if len(lines) < 2: - continue - raw = lines[-1] - try: - obj = json.loads(raw) - except json.JSONDecodeError: - # Some opencode versions may print bare strings. - return raw if raw and raw != "model" else None - - if isinstance(obj, dict): - mid = obj.get("id") or obj.get("modelID") - pid = obj.get("providerID") - if pid and mid: - return f"{pid}/{mid}" - if mid: - return str(mid) - - return None - - -def _extract_model_from_export(export_text: str) -> Optional[str]: - try: - payload = json.loads(export_text) - except json.JSONDecodeError: - return None - - if isinstance(payload, dict): - found = _scan_event_for_model(payload) - if found: - return found - return None - - -def _strip_probe_unsafe_flags(command: list[str]) -> list[str]: - """Remove flags that would make a probe reuse or mutate a real session.""" - stripped: list[str] = [] - skip_next = False - value_flags = {"--session", "-s", "--title", "--attach", "--port", "-p"} - standalone_flags = {"--continue", "-c", "--fork", "--share"} - - for token in command: - if skip_next: - skip_next = False - continue - - name = token.split("=", 1)[0] - if name in standalone_flags: - continue - if name in value_flags: - if "=" not in token: - skip_next = True - continue - - stripped.append(token) - - return stripped - - -@lru_cache(maxsize=32) -def _probe_effective_model(probe_key: tuple[str, ...]) -> Optional[str]: - """Run a tiny throwaway session and read the actual chosen model. - - This is only used when the wrapper would otherwise have to guess from - session history or show unknown. The probe session is deleted after the - export succeeds. - """ - command = list(probe_key) - session_id: str | None = None - try: - result = subprocess.run( - command + ["Reply with exactly OK."], - cwd=ROOT, - capture_output=True, - text=True, - timeout=_MODEL_PROBE_TIMEOUT_S, - ) - if result.returncode != 0: - return None - - lines = [line.strip() for line in result.stdout.splitlines() if line.strip()] - if not lines: - return None - - first = json.loads(lines[0]) - if not isinstance(first, dict): - return None - session_id = first.get("sessionID") - if not isinstance(session_id, str) or not session_id: - return None - - exported = subprocess.run( - ["opencode", "export", session_id], - cwd=ROOT, - capture_output=True, - text=True, - timeout=_MODEL_PROBE_TIMEOUT_S, - ) - if exported.returncode != 0: - return None - return _extract_model_from_export(exported.stdout) - except (OSError, subprocess.SubprocessError, json.JSONDecodeError): - return None - finally: - if session_id: - try: - subprocess.run( - ["opencode", "session", "delete", session_id], - cwd=ROOT, - capture_output=True, - text=True, - timeout=5, - ) - except (OSError, subprocess.SubprocessError): - pass - - -def _read_codecome_yml_agent(agent_name: str) -> tuple[Optional[str], Optional[str]]: - """Return (model, variant) from codecome.yml agents., or (None, None).""" - config_path = ROOT / "codecome.yml" - if not config_path.exists(): - return None, None - try: - import yaml # type: ignore - except ImportError: - return None, None - try: - with config_path.open("r", encoding="utf-8") as fh: - data = yaml.safe_load(fh) or {} - except Exception: # noqa: BLE001 - return None, None - if not isinstance(data, dict): - return None, None - agents = data.get("agents") - if not isinstance(agents, dict): - return None, None - entry = agents.get(agent_name) - if not isinstance(entry, dict): - return None, None - model = entry.get("model") - variant = entry.get("variant") - return (str(model) if model else None, str(variant) if variant else None) - - -def resolve_model_and_variant( - agent_name: str, - opencode_args_tokens: list[str], - *, - discover_default: bool = True, -) -> tuple[Optional[str], Optional[str], str, str]: - """Resolve effective model and variant with source labels. - - Returns (model, variant, model_source, variant_source). - Source values: 'OPENCODE_ARGS', 'env CODECOME_MODEL', - 'env CODECOME_MODEL_VARIANT', 'codecome.yml', - 'opencode session history', or '(unknown)'. - - `discover_default=True` enables the (slow-ish) opencode db probe - when none of the configured sources resolved a model. - """ - model_from_args = _extract_flag_value(opencode_args_tokens, _MODEL_FLAG_NAMES) - variant_from_args = _extract_flag_value(opencode_args_tokens, _VARIANT_FLAG_NAMES) - - env_model = (os.environ.get("CODECOME_MODEL") or "").strip() or None - env_variant = (os.environ.get("CODECOME_MODEL_VARIANT") or "").strip() or None - - yaml_model, yaml_variant = _read_codecome_yml_agent(agent_name) - - if model_from_args: - model, model_source = model_from_args, "OPENCODE_ARGS" - elif env_model: - model, model_source = env_model, "env CODECOME_MODEL" - elif yaml_model: - model, model_source = yaml_model, "codecome.yml" - else: - discovered = _discover_opencode_default_model() if discover_default else None - if discovered: - model, model_source = discovered, "opencode session history" - else: - model, model_source = None, "(unknown)" - - if variant_from_args: - variant, variant_source = variant_from_args, "OPENCODE_ARGS" - elif env_variant: - variant, variant_source = env_variant, "env CODECOME_MODEL_VARIANT" - elif yaml_variant: - variant, variant_source = yaml_variant, "codecome.yml" - else: - # Discovery doesn't carry variant (no DB column). - variant, variant_source = None, "(unknown)" - - return model, variant, model_source, variant_source - - -def resolve_color_mode(flag: str) -> str: - if flag != "auto": - return flag - if truthy_env("CLICOLOR_FORCE"): - return "always" - if os.environ.get("NO_COLOR") is not None or os.environ.get("TERM") == "dumb": - return "never" - return "auto" - - def build_console(color_mode: str) -> Console: if not HAVE_RICH: return None # type: ignore[return-value] @@ -458,68 +123,6 @@ def build_console(color_mode: str) -> Console: return Console(highlight=False) -_PHASE_NAMES = { - "1": "reconnaissance", - "2": "hypothesis_generation", - "3": "counter_analysis", - "4": "validation", - "5": "exploit_development", - "6": "reporting", -} - - -def load_prompt(prompt_file: Path, finding: str | None, phase: str | None = None) -> str: - prompt = prompt_file.read_text(encoding="utf-8") - if finding is not None: - placeholder = "FINDING_PATH_OR_ID" - if placeholder not in prompt: - raise ValueError(f"Prompt placeholder {placeholder!r} not found in {prompt_file}") - prompt = prompt.replace(placeholder, finding) - - # --- Extra prompt sources (additive, appended in order) --------------- - extra_sections: list[tuple[str, str]] = [] - - # Source 1: codecome.yml audit.extra_prompts. - if phase is not None: - phase_name = _PHASE_NAMES.get(str(phase)) - if phase_name: - try: - import yaml # type: ignore - - config_path = ROOT / "codecome.yml" - if config_path.exists(): - cfg = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} - ep = cfg.get("audit", {}).get("extra_prompts", {}) - yml_extra = ep.get(phase_name, "").strip() if isinstance(ep, dict) else "" - if yml_extra: - extra_sections.append(("From codecome.yml", yml_extra)) - except Exception: - pass # Non-fatal; skip if yaml missing or config is broken. - - # Source 2: PROMPT_EXTRA_FILE env var - extra_file = os.environ.get("PROMPT_EXTRA_FILE", "").strip() - if extra_file: - extra_path = Path(extra_file) - if not extra_path.is_absolute(): - extra_path = ROOT / extra_path - if extra_path.is_file(): - file_text = extra_path.read_text(encoding="utf-8").strip() - if file_text: - extra_sections.append((f"From {extra_file}", file_text)) - - # Source 3: PROMPT_EXTRA env var - extra_inline = os.environ.get("PROMPT_EXTRA", "").strip() - if extra_inline: - extra_sections.append(("Additional instructions", extra_inline)) - - if extra_sections: - prompt += "\n\n## Additional instructions\n" - for heading, body in extra_sections: - prompt += f"\n### {heading}\n\n{body}\n" - - return prompt - - def format_tokens(tokens: dict[str, Any]) -> str: if not isinstance(tokens, dict): return "" @@ -1024,6 +627,10 @@ def _cache_invalidate_stale() -> None: stale = [] for path, (_, recorded_mtime) in _SNAPSHOT_CACHE.items(): actual = _current_mtime(path) + # If the file no longer exists (actual is None), remove from cache + # to prevent stale diffs on re-creation. + # If the file was modified since we cached it, remove from cache + # so the next diff uses current disk state. if actual is None or actual != recorded_mtime: stale.append(path) for path in stale: @@ -4232,430 +3839,6 @@ def build_parser() -> argparse.ArgumentParser: return parser -def _thinking_default_for_provider(provider_id: Optional[str]) -> bool: - """Return True if --thinking should default to ON for this provider. - - Anthropic interleaves thinking with text blocks via the - interleaved-thinking beta header (set by OpenCode by default), so - the wrapper already shows multiple Assistant panels per turn. Adding - --thinking on top would double the panels for no extra information. - - All other known reasoning-capable providers hide their reasoning - unless --thinking is on. Default ON for those, and ON for unknown - future providers (cheaper to over-surface than under-surface in a - research workflow). - """ - if not provider_id: - return True - pid = provider_id.lower() - if pid.startswith("anthropic"): - return False - return True - - -def _resolve_thinking_decision( - model: Optional[str], - extra_args: list[str], -) -> tuple[bool, str]: - """Decide whether to enable --thinking for the child opencode run. - - Precedence: - 1. --thinking explicitly present in OPENCODE_ARGS -> on (user-args). - 2. CODECOME_THINKING env var -> on/off (env). - 3. Per-provider default based on the model's provider prefix. - - Returns (enabled, source). - """ - if "--thinking" in extra_args: - return True, "user-args" - - raw = os.environ.get("CODECOME_THINKING") - if raw is not None: - if raw.strip() in ("0", "false", "False", "no", ""): - return False, "env" - return True, "env" - - provider_id = None - if model and "/" in model: - provider_id = model.split("/", 1)[0] - enabled = _thinking_default_for_provider(provider_id) - return enabled, "provider-default" - - -def resolve_runtime_model_for_banner( - args: argparse.Namespace, - command: list[str], - model: Optional[str], - variant: Optional[str], - model_source: str, - variant_source: str, -) -> tuple[Optional[str], Optional[str], str, str]: - """Prefer the actual runtime model over a historical guess. - - Env/YAML/CLI-pinned values remain authoritative. When the wrapper would - otherwise show a best-effort historical value or unknown, run a tiny probe - with the same launch configuration and use the exported session metadata. - """ - if model_source in {"OPENCODE_ARGS", "env CODECOME_MODEL", "codecome.yml"}: - return model, variant, model_source, variant_source - - probe_command = _strip_probe_unsafe_flags(command) - probed = _probe_effective_model(tuple(probe_command)) - if probed: - return probed, variant, "runtime probe", variant_source - - return model, variant, model_source, variant_source - - -_PHASE1_REQUIRED_ARTIFACT_NAMES = [ - "target-profile.md", - "attack-surface.md", - "build-model.md", - "execution-model.md", - "trust-boundaries.md", - "data-flow.md", - "validation-model.md", - "interesting-files.md", - "file-risk-index.yml", - "security-assumptions.md", - "sandbox-plan.md", -] - - -def _phase1_required_artifacts() -> list[Path]: - notes_dir = ROOT / "itemdb" / "notes" - return [notes_dir / name for name in _PHASE1_REQUIRED_ARTIFACT_NAMES] - - -def _path_is_fresh(path: Path, run_start_time: float) -> bool: - return path.exists() and path.stat().st_mtime >= run_start_time - - -def _iter_files(root: Path) -> Iterator[Path]: - if not root.exists(): - return - for path in root.rglob("*"): - if path.is_file(): - yield path - - -def _phase_checklist_lines(phase: str, finding: str | None) -> list[str]: - if str(phase) == "1": - return [ - "Ensure all required Phase 1 notes exist under itemdb/notes/.", - "Ensure itemdb/notes/file-risk-index.yml is present and consistent with interesting-files.md.", - "Ensure itemdb/notes/sandbox-plan.md documents the Phase 1b outcome.", - "If sandbox bootstrap succeeded, ensure sandbox/CODECOME-GENERATED.md exists; otherwise document the halt clearly in sandbox-plan.md.", - ] - if str(phase) in ("2", "sweep"): - return [ - "Create or update precise findings under itemdb/findings/PENDING/.", - "Each finding must identify affected code, trust-boundary/source-to-sink reasoning, attackability, impact, validation plan, and counter-analysis placeholder.", - "Do not stop until the new or updated findings are durable on disk.", - ] - if str(phase) == "3": - return [ - "Review all candidate findings under itemdb/findings/PENDING/.", - "Move clearly invalid findings to REJECTED and duplicates to DUPLICATE.", - "Leave surviving findings reviewable, deduplicated, and updated with counter-analysis.", - ] - if str(phase) == "4": - finding_ref = finding or "" - return [ - f"Ensure validation evidence exists under itemdb/evidence/{finding_ref}/, including README.md.", - "Update the finding with validation results and move it to the correct status directory if needed.", - "Do not stop until the evidence and finding status are consistent.", - ] - if str(phase) == "5": - finding_ref = finding or "" - return [ - f"If exploitation succeeds, ensure itemdb/evidence/{finding_ref}/exploits/ contains the exploit artifacts and exploits/README.md.", - "If exploitation is not feasible, keep the finding in CONFIRMED and update its exploitation.status to NOT_FEASIBLE with a clear explanation.", - "Do not stop until the exploit artifacts or the NOT_FEASIBLE documentation are durable and consistent.", - ] - if str(phase) == "6": - return [ - "Ensure the report output under itemdb/reports/ is written and reviewable.", - "Include the required summary sections and evidence references for exploited and confirmed findings.", - "Do not stop until the report artifacts are durable on disk.", - ] - return ["Finish the remaining required work for the current phase before ending."] - - -def _build_phase_resume_prompt( - phase: str, - finding: str | None, - reason: str, - step_finish_count: int, -) -> str: - checklist = "\n".join(f"- {line}" for line in _phase_checklist_lines(phase, finding)) - return ( - "Your previous response was cut off by the model/provider before you produced a final completion signal.\n\n" - f"Observed finish reason: {reason}.\n" - f"Completed loops before cutoff: {step_finish_count}.\n\n" - "Treat your prior work as partial. First, briefly reassess what remains unfinished for this phase. " - "Then complete only the remaining required work. Do not restart from scratch unless necessary.\n\n" - f"Phase {phase} completion checklist:\n" - f"{checklist}\n\n" - "Before ending, verify that the required durable artifacts for this phase exist, are updated, and are internally consistent." - ) - - -def _build_frontmatter_resume_prompt(phase: str, finding: str | None, validation_output: str) -> str: - checklist = "\n".join(f"- {line}" for line in _phase_checklist_lines(phase, finding)) - return ( - "Your previous run produced files that failed local validation.\n\n" - "Validation errors:\n" - f"{validation_output}\n\n" - "Repair only the reported YAML/frontmatter issues with minimal changes. Do not redo unrelated analysis.\n\n" - f"Phase {phase} completion checklist:\n" - f"{checklist}\n\n" - "After fixing the validation errors, ensure the affected files remain in the correct status/location and are internally consistent." - ) - - -def _build_resume_command(initial_command: list[str], session_id: str, prompt: str) -> list[str]: - """Preserve connection/runtime flags needed to reach the original session.""" - resume = ["opencode", "run"] - pending_passthrough_value = False - passthrough_value_flags = {"--attach", "--port", "-p"} - passthrough_standalone_flags = {"--thinking"} - drop_value_flags = {"--agent", "--model", "-m", "--variant", "--session", "-s", "--format"} - drop_standalone_flags = {"--continue", "-c", "--fork"} - - for token in initial_command[2:]: - if pending_passthrough_value: - resume.append(token) - pending_passthrough_value = False - continue - - name, has_equals, _ = token.partition("=") - if name in drop_standalone_flags: - continue - if name in drop_value_flags: - if not has_equals: - pending_passthrough_value = False - continue - if name in passthrough_standalone_flags: - resume.append(token) - continue - if name in passthrough_value_flags: - resume.append(token) - if not has_equals: - pending_passthrough_value = True - continue - - resume.extend(["--session", session_id, "--format", "json", prompt]) - return resume - - - -_FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) - - -def _load_finding_frontmatter(path: Path) -> dict[str, Any] | None: - """Return the YAML frontmatter dict from a finding file, or None.""" - try: - content = path.read_text(encoding="utf-8") - except OSError: - return None - m = _FRONTMATTER_RE.match(content) - if not m: - return None - try: - import yaml # type: ignore - data = yaml.safe_load(m.group(1)) - except Exception: - return None - return data if isinstance(data, dict) else None - - -def _exploitation_status_looks_real(frontmatter: dict[str, Any] | None) -> bool: - """Return True when the exploitation block has a non-placeholder status.""" - if not isinstance(frontmatter, dict): - return False - exploitation = frontmatter.get("exploitation") - if not isinstance(exploitation, dict): - return False - status = str(exploitation.get("status", "")).strip().lower() - return bool(status and status not in ("", "pending.", "todo.", "tbd.")) - - -def check_phase_graceful_completion(phase: str, finding: str | None, run_start_time: float) -> bool: - """Check if the phase produced its primary artifacts, allowing us to forgive mid-turn cutoffs.""" - try: - if str(phase) == "1": - required_artifacts = _phase1_required_artifacts() - if all(path.exists() for path in required_artifacts): - fresh_required = any(_path_is_fresh(path, run_start_time) for path in required_artifacts) - sandbox_generated = ROOT / "sandbox" / "CODECOME-GENERATED.md" - sandbox_state_recorded = _path_is_fresh(sandbox_generated, run_start_time) or _path_is_fresh( - ROOT / "itemdb" / "notes" / "sandbox-plan.md", run_start_time - ) - return fresh_required and sandbox_state_recorded - return False - elif str(phase) in ("2", "sweep"): - pending_dir = ROOT / "itemdb" / "findings" / "PENDING" - if pending_dir.exists(): - return any(f.name.endswith(".md") and f.name != ".gitkeep" and f.stat().st_mtime >= run_start_time for f in pending_dir.iterdir()) - return False - elif str(phase) == "3": - findings_dir = ROOT / "itemdb" / "findings" - return any( - path.suffix == ".md" and path.name != ".gitkeep" and path.stat().st_mtime >= run_start_time - for path in _iter_files(findings_dir) - ) - elif str(phase) == "4" and finding: - evidence_dir = ROOT / "itemdb" / "evidence" / finding - return any(path.stat().st_mtime >= run_start_time for path in _iter_files(evidence_dir)) - elif str(phase) == "5" and finding: - # Path A: finding promoted to EXPLOITED with real frontmatter + exploit artifacts. - exploited_file = ROOT / "itemdb" / "findings" / "EXPLOITED" / f"{finding}.md" - if ( - exploited_file.exists() - and exploited_file.stat().st_mtime >= run_start_time - ): - fm = _load_finding_frontmatter(exploited_file) - if ( - isinstance(fm, dict) - and fm.get("status") == "EXPLOITED" - and _exploitation_status_looks_real(fm) - ): - exploits_dir = ROOT / "itemdb" / "evidence" / finding / "exploits" - if any( - path.stat().st_mtime >= run_start_time - for path in _iter_files(exploits_dir) - ): - return True - - # Path B: CONFIRMED finding documented as NOT_FEASIBLE. - confirmed_file = ROOT / "itemdb" / "findings" / "CONFIRMED" / f"{finding}.md" - if ( - confirmed_file.exists() - and confirmed_file.stat().st_mtime >= run_start_time - ): - fm = _load_finding_frontmatter(confirmed_file) - if ( - isinstance(fm, dict) - and fm.get("status") == "CONFIRMED" - and isinstance(fm.get("exploitation"), dict) - and str(fm["exploitation"].get("status", "")).upper() - == "NOT_FEASIBLE" - ): - return True - - return False - elif str(phase) == "6": - reports_dir = ROOT / "itemdb" / "reports" - if reports_dir.exists(): - return any(f.name.endswith(".md") and f.name != ".gitkeep" and f.stat().st_mtime >= run_start_time for f in reports_dir.iterdir()) - return False - except Exception: - pass - return False - -def _get_headers(auth_token: str | None, workspace_dir: str | None) -> dict[str, str]: - headers = {"Content-Type": "application/json"} - if auth_token: - import base64 - encoded = base64.b64encode(f"opencode:{auth_token}".encode("utf-8")).decode("utf-8") - headers["Authorization"] = f"Basic {encoded}" - if workspace_dir: - headers["x-opencode-directory"] = workspace_dir - return headers - -def _send_prompt_to_session( - base_url: str, - session_id: str, - prompt: str, - agent: str, - model: str | None, - variant: str | None, - auth_token: str | None, - workspace_dir: str | None, -) -> None: - """Send a prompt text to a session via POST /session/{id}/prompt_async.""" - url = f"{base_url}/session/{session_id}/prompt_async" - payload: dict[str, Any] = { - "parts": [{"type": "text", "text": prompt}], - "agent": agent, - } - if model: - parts = model.split("/", 1) - if len(parts) == 2: - payload["model"] = {"providerID": parts[0], "modelID": parts[1]} - else: - payload["model"] = {"modelID": model} - if variant: - payload["variant"] = variant - data = json.dumps(payload).encode("utf-8") - req = urllib.request.Request( - url, - data=data, - headers=_get_headers(auth_token, workspace_dir), - method="POST", - ) - try: - with urllib.request.urlopen(req, timeout=30.0) as resp: - pass # 204 expected - except urllib.error.HTTPError as exc: - raise RuntimeError(f"Failed to send prompt: HTTP {exc.code}") from exc - - -def _create_session(base_url: str, phase: str, agent: str, model: str | None, auth_token: str | None, workspace_dir: str | None) -> str: - """Create a session via POST /session and return its ID.""" - payload: dict[str, Any] = {"title": f"CodeCome Phase {phase}", "agent": agent} - if model: - parts = model.split("/", 1) - if len(parts) == 2: - payload["model"] = {"providerID": parts[0], "id": parts[1]} - else: - payload["model"] = {"id": model} - req = urllib.request.Request( - f"{base_url}/session", - data=json.dumps(payload).encode("utf-8"), - headers=_get_headers(auth_token, workspace_dir), - method="POST", - ) - resp = urllib.request.urlopen(req, timeout=10.0) - data = json.loads(resp.read().decode("utf-8")) - sid = str(data.get("id", "")) - if not sid: - raise RuntimeError("Server returned empty session ID") - return sid - - -def _create_chat_session(base_url: str, agent: str, model: str | None, auth_token: str | None, workspace_dir: str | None) -> str: - """Create a session for interactive chat mode with permission rules.""" - payload: dict[str, Any] = { - "title": "CodeCome Chat", - "agent": agent, - "permission": [ - {"permission": "question", "action": "deny", "pattern": "*"}, - {"permission": "plan_enter", "action": "deny", "pattern": "*"}, - {"permission": "plan_exit", "action": "deny", "pattern": "*"}, - ], - } - if model: - parts = model.split("/", 1) - if len(parts) == 2: - payload["model"] = {"providerID": parts[0], "id": parts[1]} - else: - payload["model"] = {"id": model} - req = urllib.request.Request( - f"{base_url}/session", - data=json.dumps(payload).encode("utf-8"), - headers=_get_headers(auth_token, workspace_dir), - method="POST", - ) - resp = urllib.request.urlopen(req, timeout=10.0) - data = json.loads(resp.read().decode("utf-8")) - sid = str(data.get("id", "")) - if not sid: - raise RuntimeError("Server returned empty session ID") - return sid - - def _consume_events( base_url: str, session_id: str, @@ -4714,29 +3897,18 @@ def _run_single_attempt( Returns (returncode, session_id, run_result, transcript_path). """ - finding_tag = (args.finding or "no-finding").replace("/", "_") - transcript_dir = ROOT / "tmp" - transcript_dir.mkdir(parents=True, exist_ok=True) - - # Use a module-level counter for attempt numbers across resume attempts. - counter = getattr(_run_single_attempt, "_attempt_counter", 1) - transcript_path = transcript_dir / f"last-phase-{args.phase}-{finding_tag}-attempt-{counter}.jsonl" - setattr(_run_single_attempt, "_attempt_counter", counter + 1) - - transcript_fp = None - try: - transcript_fp = transcript_path.open("w", encoding="utf-8") - except OSError as exc: + transcript_path, transcript_fp = open_phase_transcript(str(args.phase), args.finding) + if transcript_fp is None: if HAVE_RICH: - console.print(Text(f"warning: could not open transcript {transcript_path}: {exc}", style="yellow")) + console.print(Text(f"warning: could not open transcript {transcript_path}", style="yellow")) else: - print(C.warn(f"warning: could not open transcript {transcript_path}: {exc}")) + print(C.warn(f"warning: could not open transcript {transcript_path}")) try: if existing_session_id: session_id = existing_session_id else: - session_id = _create_session(base_url, str(args.phase), args.agent, model, auth_token, workspace_dir) + session_id = create_session(base_url, str(args.phase), args.agent, model, auth_token, workspace_dir) run_result_box: dict[str, Any] = {} consume_error_box: dict[str, Exception] = {} @@ -4761,7 +3933,7 @@ def _consume() -> None: consumer = threading.Thread(target=_consume, name=f"codecome-events-{session_id}", daemon=True) consumer.start() - _send_prompt_to_session(base_url, session_id, prompt, args.agent, model, variant, auth_token, workspace_dir) + send_prompt_to_session(base_url, session_id, prompt, args.agent, model, variant, auth_token, workspace_dir) consumer.join() if "error" in consume_error_box: @@ -4773,55 +3945,11 @@ def _consume() -> None: _emit_fatal_error(console, "Server Error", str(exc)) return 1, existing_session_id or "", RunResult(), transcript_path finally: - if transcript_fp is not None: - try: - transcript_fp.flush() - transcript_fp.close() - except OSError: - pass + close_transcript(transcript_fp) return 0, session_id, run_result, transcript_path -def show_model_table(agent_name: str) -> int: - """Print the model-resolution table for an agent and exit.""" - extra_args = shlex.split(os.environ.get("OPENCODE_ARGS", "")) - - args_model = _extract_flag_value(extra_args, _MODEL_FLAG_NAMES) - args_variant = _extract_flag_value(extra_args, _VARIANT_FLAG_NAMES) - env_model = (os.environ.get("CODECOME_MODEL") or "").strip() or None - env_variant = (os.environ.get("CODECOME_MODEL_VARIANT") or "").strip() or None - yaml_model, yaml_variant = _read_codecome_yml_agent(agent_name) - discovered = _discover_opencode_default_model() - - model, variant, model_source, variant_source = resolve_model_and_variant( - agent_name, extra_args - ) - - def fmt(v: Optional[str]) -> str: - return v if v else "(not set)" - - print(C.header(f"Model resolution for agent {agent_name}:")) - print() - print(f" {C.DIM}OPENCODE_ARGS{C.RESET} model={fmt(args_model)} variant={fmt(args_variant)}") - print(f" {C.DIM}env CODECOME_MODEL{C.RESET} model={fmt(env_model)}") - print(f" {C.DIM}env CODECOME_MODEL_VARIANT{C.RESET} variant={fmt(env_variant)}") - print(f" {C.DIM}codecome.yml{C.RESET} model={fmt(yaml_model)} variant={fmt(yaml_variant)}") - print(f" {C.DIM}opencode session history{C.RESET} model={fmt(discovered)}") - print(f" {C.DIM}runtime probe{C.RESET} not run by show-model") - print() - effective_model = model or "(unknown)" - effective_variant = variant or "(unknown)" - thinking_on, thinking_source = _resolve_thinking_decision(model, extra_args) - print(f" {C.BOLD}effective{C.RESET} " - f"model={effective_model} variant={effective_variant} " - f"thinking={'on' if thinking_on else 'off'}") - print(f" {C.DIM}sources{C.RESET} " - f"model: {model_source} variant: {variant_source} " - f"thinking: {thinking_source}") - return 0 - - def _emit_fatal_error(console: Any, title: str, message: str) -> None: """Show fatal startup/runtime errors in the UI and on stderr.""" formatted = C.fail(f"{title}: {message}") @@ -4872,6 +4000,62 @@ def _write(self, renderable): ChatApp: Any = None QuitScreen: Any = None + +# Standalone chat-app methods — available even when Textual is not +# installed, so that tests can exercise _render_and_log parity without +# launching a real TUI. + +def _chat_render_and_log(self, console, phase, label, event): + """Standalone version of _ChatApp._render_and_log. See the docstring + on the class for the full contract.""" + if getattr(self, "transcript_fp", None) is not None: + try: + self.transcript_fp.write(json.dumps(event) + "\n") + except OSError: + pass + if getattr(self, "args", None) is not None and getattr(self.args, "debug", False): + _chat_debug(f"_render_and_log: raw event: {json.dumps(event)}") + if event.get("type") == "message.updated": + _chat_update_modeline_info(self, event) + if not getattr(self, "thinking_on", True) and event.get("type") == "reasoning": + return + render_event(console, phase, label, event) + + +def _chat_update_modeline_info(self, event: dict[str, Any]) -> None: + """Standalone version of _ChatApp._update_modeline_info.""" + info = event.get("info") + if not isinstance(info, dict): + props = event.get("properties", {}) + info = props.get("info", {}) if isinstance(props, dict) else {} + if not isinstance(info, dict): + return + if info.get("role") != "assistant": + return + model_id = str(info.get("modelID", "")).strip() + provider_id = str(info.get("providerID", "")).strip() + if not model_id: + mdl = info.get("model", {}) + if isinstance(mdl, dict): + model_id = str(mdl.get("modelID", "")).strip() + provider_id = str(mdl.get("providerID", "")).strip() + model_label = f"{provider_id}/{model_id}" if provider_id and model_id else (model_id or "…") + tokens = info.get("tokens", {}) + if isinstance(tokens, dict): + _in = tokens.get("input", 0) + _out = tokens.get("output", 0) + token_str = f"↑{_in} ↓{_out}" + else: + token_str = "" + cost = info.get("cost", 0) or 0 + cost_str = f" ${cost:.4f}" if cost else "" + getattr(self, "_modeline_info", "") + try: + self._modeline_info = f"{model_label} | {token_str}{cost_str}" + except AttributeError: + pass + + try: from textual import on, work from textual.app import App, ComposeResult @@ -5216,87 +4400,10 @@ def _on_render_message(self, message: RenderMessage) -> None: # --- Consumer-thread callback --- def _render_and_log(self, console, phase, label, event): - """Called from the SSE consumer thread. Mirrors phase mode's - _render_and_log exactly (parity with non-interactive runs): - - 1. Persist the raw event to the transcript jsonl. - 2. When --debug, mirror the raw event JSON to the - chat-debug log file (phase mode mirrors to stderr; - in chat mode stderr would corrupt Textual's - alternate-screen output, so we route to the debug - file instead). - 3. Suppress 'reasoning' events when thinking is off. - 4. Delegate to render_event() — the SAME dispatcher - used by non-interactive runs. - - Also updates _modeline_info from every message.updated - event (even in-progress ones) so the bottom-bar status - line stays live. - - The render_event() call ends up posting RenderMessage(s) - through the console_proxy, which the @on(RenderMessage) - handler writes to the RichLog on the main thread.""" - # (1) Transcript jsonl — parity with phase mode. - if self.transcript_fp is not None: - try: - self.transcript_fp.write(json.dumps(event) + "\n") - except OSError: - pass - # (2) Raw-event mirror — to the chat-debug file rather than - # stderr (Textual owns the TTY in chat mode). - if self.args is not None and getattr(self.args, "debug", False): - _chat_debug(f"_render_and_log: raw event: {json.dumps(event)}") - else: - _chat_debug(f"_render_and_log: event type={event.get('type')}") - - # Update the bottom-bar modeline on every message.updated - # so token/cost/liveness info refreshes live. - if event.get("type") == "message.updated": - self._update_modeline_info(event) - - # (3) Suppress reasoning when thinking is off. - if not self.thinking_on and event.get("type") == "reasoning": - return - # (4) Render via the same dispatcher non-chat uses. No - # chat-specific markers or filters — full parity. - render_event(console, phase, label, event) + _chat_render_and_log(self, console, phase, label, event) def _update_modeline_info(self, event: dict[str, Any]) -> None: - """Extract model/tokens from a message.updated event and store - for the heartbeat to surface in the bottom bar.""" - info = event.get("info") - if not isinstance(info, dict): - props = event.get("properties", {}) - info = props.get("info", {}) if isinstance(props, dict) else {} - if not isinstance(info, dict): - return - # Only use assistant messages for the modeline; user - # messages carry no new token data. - if info.get("role") != "assistant": - return - model_id = str(info.get("modelID", "")).strip() - provider_id = str(info.get("providerID", "")).strip() - if not model_id: - mdl = info.get("model", {}) - if isinstance(mdl, dict): - model_id = str(mdl.get("modelID", "")).strip() - provider_id = str(mdl.get("providerID", "")).strip() - model_label = f"{provider_id}/{model_id}" if provider_id and model_id else (model_id or "…") - - tokens = info.get("tokens", {}) - if isinstance(tokens, dict): - _in = tokens.get("input", 0) - _out = tokens.get("output", 0) - total = tokens.get("total", _in + _out) - token_str = f"↑{_in} ↓{_out}" - else: - total = 0 - token_str = "" - - cost = info.get("cost", 0) or 0 - cost_str = f" ${cost:.4f}" if cost else "" - - self._modeline_info = f"{model_label} | {token_str}{cost_str}" + _chat_update_modeline_info(self, event) # --- UI actions --- @@ -5413,7 +4520,7 @@ def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> model, variant, model_source, variant_source = resolve_model_and_variant( args.agent, extra_args ) - thinking_on, thinking_source = _resolve_thinking_decision(model, extra_args) + thinking_on, thinking_source = resolve_thinking_decision(model, extra_args) _chat_debug(f"_run_chat_mode: agent={args.agent} model={model} variant={variant} thinking={thinking_on}") @@ -5437,7 +4544,7 @@ def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> # Create session _chat_debug("_run_chat_mode: creating session") try: - session_id = _create_chat_session( + session_id = create_chat_session( server_info.base_url, args.agent, model, server_info.password, str(ROOT), ) _chat_debug(f"_run_chat_mode: session created id={session_id}") @@ -5448,21 +4555,8 @@ def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> _close_chat_debug() return 1 - # Open the chat transcript (parity with phase mode, which writes - # tmp/last-phase---attempt-N.jsonl). We use a - # filename that includes both a timestamp and the PID so successive - # runs (or several runs from different shells) don't clobber each - # other. Open line-buffered so the file is durable across crashes. - stamp = time.strftime("%Y%m%d-%H%M%S") - transcript_dir = ROOT / "tmp" - transcript_dir.mkdir(parents=True, exist_ok=True) - transcript_path = transcript_dir / f"last-chat-{stamp}-pid{os.getpid()}.jsonl" - transcript_fp = None - try: - transcript_fp = transcript_path.open("w", encoding="utf-8", buffering=1) - _chat_debug(f"_run_chat_mode: opened transcript {transcript_path}") - except OSError as exc: - _chat_debug(f"_run_chat_mode: could not open transcript {transcript_path}: {exc}") + # Open the chat transcript (parity with phase mode). + transcript_path, transcript_fp = open_chat_transcript() _chat_debug("_run_chat_mode: creating ChatApp") app = None @@ -5486,12 +4580,7 @@ def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> _chat_debug("_run_chat_mode: stopping chat loop") app.chat_loop.stop() runner.stop() - if transcript_fp is not None: - try: - transcript_fp.flush() - transcript_fp.close() - except OSError: - pass + close_transcript(transcript_fp) # Final summary banner on the restored terminal. Mirrors phase # mode's success-path summary. @@ -5557,7 +4646,7 @@ def main() -> int: model, variant, model_source, variant_source = resolve_model_and_variant( args.agent, extra_args ) - thinking_on, thinking_source = _resolve_thinking_decision(model, extra_args) + thinking_on, thinking_source = resolve_thinking_decision(model, extra_args) model_label = model or "(unknown)" variant_label = variant or "(unknown)" @@ -5735,7 +4824,7 @@ def _forward_signal(signum: int, _frame: Any) -> None: else: print(C.warn(msg)) if last_session_id and last_session_id != "id": - prompt = _build_frontmatter_resume_prompt(args.phase, args.finding, validation_output) + prompt = build_frontmatter_resume_prompt(args.phase, args.finding, validation_output) continue else: returncode = 2 @@ -5773,7 +4862,7 @@ def _forward_signal(signum: int, _frame: Any) -> None: else: print(C.warn(msg)) if last_session_id and last_session_id != "id": - prompt = _build_phase_resume_prompt( + prompt = build_phase_resume_prompt( args.phase, args.finding, last_finish_reason, step_finish_count ) continue From cba2471f2dc85d68c479deb0b10854ed0c5ea91a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 24 May 2026 19:25:53 +0200 Subject: [PATCH 05/65] refactor(phase-a2): add rendering package foundation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create tools/rendering/ with architecture classes — no renderers migrated yet. The package provides: - RenderSettings — 20+ display tunables from env vars - SnapshotCache — file content snapshot for write/edit diffs - RenderSink — protocol + PlainSink, RichConsoleSink, TextualRichLogSink - RenderContext — root, sink, settings, cache bundle - EventRenderer — base for generic SSE event renderers - ToolRenderer — base for per-tool renderers (incl. fallback) - RendererRegistry — dispatches events/tools to registered renderers - CommandExecutionInterceptor — protocol for specialised bash rendering Tests: 33 new (315 passed, 0 failed, 0 errors) --- tests/test_rendering_context.py | 38 +++++ tests/test_rendering_registry.py | 118 +++++++++++++++ tests/test_rendering_sinks.py | 89 ++++++++++++ tests/test_rendering_snapshot_cache.py | 86 +++++++++++ tools/rendering/__init__.py | 11 ++ tools/rendering/cache.py | 91 ++++++++++++ .../command_interceptors/__init__.py | 9 ++ tools/rendering/command_interceptors/base.py | 40 ++++++ tools/rendering/context.py | 30 ++++ tools/rendering/events.py | 49 +++++++ tools/rendering/registry.py | 67 +++++++++ tools/rendering/settings.py | 136 ++++++++++++++++++ tools/rendering/sink.py | 105 ++++++++++++++ tools/rendering/tools/__init__.py | 8 ++ tools/rendering/tools/base.py | 90 ++++++++++++ 15 files changed, 967 insertions(+) create mode 100644 tests/test_rendering_context.py create mode 100644 tests/test_rendering_registry.py create mode 100644 tests/test_rendering_sinks.py create mode 100644 tests/test_rendering_snapshot_cache.py create mode 100644 tools/rendering/__init__.py create mode 100644 tools/rendering/cache.py create mode 100644 tools/rendering/command_interceptors/__init__.py create mode 100644 tools/rendering/command_interceptors/base.py create mode 100644 tools/rendering/context.py create mode 100644 tools/rendering/events.py create mode 100644 tools/rendering/registry.py create mode 100644 tools/rendering/settings.py create mode 100644 tools/rendering/sink.py create mode 100644 tools/rendering/tools/__init__.py create mode 100644 tools/rendering/tools/base.py diff --git a/tests/test_rendering_context.py b/tests/test_rendering_context.py new file mode 100644 index 0000000..bc24c7e --- /dev/null +++ b/tests/test_rendering_context.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from rendering.context import RenderContext +from rendering.sink import PlainSink +from rendering.settings import RenderSettings +from rendering.cache import SnapshotCache + + +class TestRenderContext: + def test_construction(self): + ctx = RenderContext( + root=Path("/fake"), + sink=PlainSink(), + settings=RenderSettings(), + cache=SnapshotCache(), + ) + assert ctx.root == Path("/fake") + assert isinstance(ctx.sink, PlainSink) + assert isinstance(ctx.settings, RenderSettings) + assert isinstance(ctx.cache, SnapshotCache) + + def test_cache_is_shared(self): + cache = SnapshotCache() + ctx = RenderContext( + root=Path("/x"), + sink=PlainSink(), + settings=RenderSettings(), + cache=cache, + ) + assert ctx.cache is cache diff --git a/tests/test_rendering_registry.py b/tests/test_rendering_registry.py new file mode 100644 index 0000000..aaca04c --- /dev/null +++ b/tests/test_rendering_registry.py @@ -0,0 +1,118 @@ +from __future__ import annotations + +import sys +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from rendering.context import RenderContext +from rendering.events import EventRenderer +from rendering.registry import RendererRegistry +from rendering.tools.base import ToolRenderer +from rendering.sink import PlainSink + + +@pytest.fixture +def registry(): + ctx = RenderContext( + root=Path("/fake"), + sink=PlainSink(), + settings=MagicMock(), + cache=MagicMock(), + ) + return RendererRegistry(ctx) + + +class TestRegistryEventDispatch: + def test_dispatches_to_matching_event_renderer(self, registry): + events_seen = [] + + class MyRenderer(EventRenderer): + event_types = ("foo",) + def render(self, event): + events_seen.append(event["type"]) + return True + + registry.register_event(MyRenderer(registry.context)) + registry.dispatch_event({"type": "foo"}) + assert events_seen == ["foo"] + + def test_dispatches_to_first_matching_event_renderer(self, registry): + order = [] + + class First(EventRenderer): + event_types = ("bar",) + def render(self, event): + order.append("first") + return True + + class Second(EventRenderer): + event_types = ("bar",) + def render(self, event): + order.append("second") + return True + + registry.register_event(First(registry.context)) + registry.register_event(Second(registry.context)) + registry.dispatch_event({"type": "bar"}) + assert order == ["first"] + + def test_fallback_handles_unknown_event(self, registry, capsys): + registry.dispatch_event({"type": "unknown.weird"}) + out = capsys.readouterr().out + assert "unknown event type" in out.lower() + + def test_fallback_on_non_handling_renderer(self, registry): + class NonHandler(EventRenderer): + event_types = ("baz",) + def render(self, event): + return False + + registry.register_event(NonHandler(registry.context)) + registry.dispatch_event({"type": "baz"}) + + +class TestRegistryToolDispatch: + def test_dispatches_to_matching_tool_renderer(self, registry): + seen = [] + + class MyTool(ToolRenderer): + tool_names = ("read",) + def render(self, tool_name, state): + seen.append(tool_name) + return True + + registry.register_tool(MyTool(registry.context)) + registry.dispatch_tool("read", {"status": "completed"}) + assert seen == ["read"] + + def test_tool_name_normalisation(self, registry): + seen = [] + + class MyTool(ToolRenderer): + tool_names = ("read",) + def render(self, tool_name, state): + seen.append(tool_name) + return True + + registry.register_tool(MyTool(registry.context)) + registry.dispatch_tool(" Read ", {"status": "completed"}) + assert seen == [" Read "] + + def test_fallback_tool_renderer(self, registry, capsys): + registry.dispatch_tool("unknown_tool", {"status": "completed", "input": {"x": 1}}) + out = capsys.readouterr().out + assert "unknown_tool" in out + + def test_fallback_on_non_handling_tool_renderer(self, registry): + class NonHandler(ToolRenderer): + tool_names = ("grep",) + def render(self, tool_name, state): + return False + + registry.register_tool(NonHandler(registry.context)) + registry.dispatch_tool("grep", {"status": "completed"}) diff --git a/tests/test_rendering_sinks.py b/tests/test_rendering_sinks.py new file mode 100644 index 0000000..a2dd650 --- /dev/null +++ b/tests/test_rendering_sinks.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import sys +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from rendering.sink import PlainSink, RichConsoleSink, TextualRichLogSink, RenderSink + + +class TestPlainSink: + def test_mode(self): + sink = PlainSink() + assert sink.mode == "plain" + + def test_write_text(self, capsys): + sink = PlainSink() + sink.write_text("hello") + out = capsys.readouterr().out + assert "hello" in out + + def test_write_string(self, capsys): + sink = PlainSink() + sink.write("hello") + out = capsys.readouterr().out + assert "hello" in out + + def test_isinstance_checks(self): + sink = PlainSink() + assert isinstance(sink, RenderSink) + + +class TestRichConsoleSink: + def test_mode(self): + from rich.console import Console + sink = RichConsoleSink(Console()) + assert sink.mode == "rich" + + def test_write_delegates_to_console(self): + from rich.console import Console + console = Console(record=True) + sink = RichConsoleSink(console) + from rich.text import Text + sink.write(Text("hi")) + exported = console.export_text() + assert "hi" in exported + + def test_console_property(self): + from rich.console import Console + console = Console() + sink = RichConsoleSink(console) + assert sink.console is console + + def test_isinstance_checks(self): + from rich.console import Console + sink = RichConsoleSink(Console()) + assert isinstance(sink, RenderSink) + + +class TestTextualRichLogSink: + def test_mode(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + assert sink.mode == "textual" + + def test_write_delegates_to_target(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + sink.write("hello", expand=True) + proxy.write.assert_called_once() + + def test_write_text_delegates(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + sink.write_text("hello") + proxy.write.assert_called_once() + + def test_target_property(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + assert sink.target is proxy + + def test_isinstance_checks(self): + sink = TextualRichLogSink(MagicMock()) + assert isinstance(sink, RenderSink) diff --git a/tests/test_rendering_snapshot_cache.py b/tests/test_rendering_snapshot_cache.py new file mode 100644 index 0000000..b88a177 --- /dev/null +++ b/tests/test_rendering_snapshot_cache.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from rendering.cache import SnapshotCache + + +class TestSnapshotCache: + def test_set_and_get(self, tmp_path): + cache = SnapshotCache() + path = tmp_path / "test.txt" + path.write_text("hello", encoding="utf-8") + cache.set(str(path), "hello") + assert cache.get(str(path)) == "hello" + + def test_get_returns_none_for_unknown(self): + cache = SnapshotCache() + assert cache.get("/no/such/path") is None + + def test_disabled_cache_returns_none(self): + cache = SnapshotCache(enabled=False) + cache.set("/x", "y") + assert cache.get("/x") is None + + def test_invalidate_stale_removes_deleted(self, tmp_path): + cache = SnapshotCache() + path = tmp_path / "stale.txt" + path.write_text("old", encoding="utf-8") + cache.set(str(path), "old") + path.unlink() + cache.invalidate_stale() + assert cache.get(str(path)) is None + + def test_invalidate_stale_removes_modified(self, tmp_path): + cache = SnapshotCache() + path = tmp_path / "mod.txt" + path.write_text("v1", encoding="utf-8") + cache.set(str(path), "v1") + path.write_text("v2", encoding="utf-8") + cache.invalidate_stale() + assert cache.get(str(path)) is None + + def test_reread(self, tmp_path): + cache = SnapshotCache() + path = tmp_path / "reread.txt" + path.write_text("before", encoding="utf-8") + cache.set(str(path), "before") + path.write_text("after", encoding="utf-8") + cache.reread(str(path)) + assert cache.get(str(path)) == "after" + + def test_reread_missing_file(self, tmp_path): + cache = SnapshotCache() + path = tmp_path / "missing.txt" + path.write_text("x", encoding="utf-8") + cache.set(str(path), "x") + path.unlink() + cache.reread(str(path)) + assert cache.get(str(path)) is None + + def test_lru_eviction(self, tmp_path): + cache = SnapshotCache(max_entries=2) + files = [] + for i in range(4): + p = tmp_path / f"f{i}.txt" + p.write_text(str(i), encoding="utf-8") + files.append(p) + cache.set(str(p), str(i)) + assert cache.get(str(files[0])) is None + assert cache.get(str(files[1])) is None + assert cache.get(str(files[2])) == "2" + assert cache.get(str(files[3])) == "3" + + def test_disabled_invalidate_is_noop(self): + cache = SnapshotCache(enabled=False) + cache.invalidate_stale() + + def test_disabled_reread_is_noop(self): + cache = SnapshotCache(enabled=False) + cache.reread("/x") diff --git a/tools/rendering/__init__.py b/tools/rendering/__init__.py new file mode 100644 index 0000000..d1933c1 --- /dev/null +++ b/tools/rendering/__init__.py @@ -0,0 +1,11 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Rendering package: tool/event renderer classes, render context, sinks, cache. + +Renderers receive normalized dict events/tool-states and write through +a RenderSink to plain stdout, a Rich Console, or a Textual RichLog. +""" + +from __future__ import annotations diff --git a/tools/rendering/cache.py b/tools/rendering/cache.py new file mode 100644 index 0000000..e0e0eae --- /dev/null +++ b/tools/rendering/cache.py @@ -0,0 +1,91 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +SnapshotCache — file content snapshot for diff computation. + +Isolated state so renderers (read/write/edit/apply_patch) can show +diffs without relying on module-level globals. +""" + +from __future__ import annotations + +import os +from collections import OrderedDict +from pathlib import Path + + +class SnapshotCache: + """LRU cache of file content snapshots keyed by absolute path. + + Used by Write/Edit/ApplyPatch renderers to compute what changed. + """ + + def __init__(self, *, enabled: bool = True, max_entries: int = 200) -> None: + self._enabled = enabled + self._max = max_entries + self._entries: OrderedDict[str, tuple[str, float]] = OrderedDict() + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def set(self, path: str, content: str) -> None: + """Cache *content* for *path*, recording its current mtime.""" + if not self._enabled: + return + mtime = self._current_mtime(path) + if mtime is None: + return + self._entries[path] = (content, mtime) + self._entries.move_to_end(path) + while len(self._entries) > self._max: + self._entries.popitem(last=False) + + def get(self, path: str) -> str | None: + """Return cached content for *path*, or None.""" + if not self._enabled: + return None + entry = self._entries.get(path) + if entry is None: + return None + return entry[0] + + def invalidate_stale(self) -> None: + """Remove entries whose file has been deleted or modified.""" + if not self._enabled: + return + stale = [] + for p, (_, recorded_mtime) in self._entries.items(): + actual = self._current_mtime(p) + if actual is None or actual != recorded_mtime: + stale.append(p) + for p in stale: + del self._entries[p] + + def reread(self, path: str) -> None: + """Invalidate and re-read *path* from disk.""" + if not self._enabled: + return + if path in self._entries: + del self._entries[path] + try: + content = Path(path).read_text(encoding="utf-8", errors="replace") + self.set(path, content) + except OSError: + pass + + @property + def enabled(self) -> bool: + return self._enabled + + # ------------------------------------------------------------------ + # Internal + # ------------------------------------------------------------------ + + @staticmethod + def _current_mtime(path: str) -> float | None: + try: + return os.stat(path).st_mtime + except OSError: + return None diff --git a/tools/rendering/command_interceptors/__init__.py b/tools/rendering/command_interceptors/__init__.py new file mode 100644 index 0000000..493e179 --- /dev/null +++ b/tools/rendering/command_interceptors/__init__.py @@ -0,0 +1,9 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Command execution interceptors — specialised rendering for +CodeCome-aware bash invocations (sandbox-bootstrap, rtk, rg, ls, …). +""" + +from __future__ import annotations diff --git a/tools/rendering/command_interceptors/base.py b/tools/rendering/command_interceptors/base.py new file mode 100644 index 0000000..045a710 --- /dev/null +++ b/tools/rendering/command_interceptors/base.py @@ -0,0 +1,40 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +CommandExecutionInterceptor — protocol for specialised bash rendering. + +Interceptors receive a bash command string and the tool state dict. +They try to recognise and render the output with specialised styling +(sandbox-bootstrap JSON, rtk read/grep, rg, ls, find, tree, …). +When no interceptor matches, the generic bash renderer takes over. +""" + +from __future__ import annotations + +from typing import Any, Protocol, runtime_checkable + +from rendering.tools.base import ToolRenderer + + +@runtime_checkable +class CommandExecutionInterceptor(Protocol): + """Protocol for specialised command rendering. + + Implementations are called in registration order by the + CommandRenderer. The first interceptor that returns True wins. + """ + + name: str + + def try_render( + self, + command: str, + state: dict[str, Any], + renderer: "ToolRenderer", + ) -> bool: + """Attempt to render *command* with *state*. + + Returns True if the interceptor handled the command. + """ + ... diff --git a/tools/rendering/context.py b/tools/rendering/context.py new file mode 100644 index 0000000..7e70e54 --- /dev/null +++ b/tools/rendering/context.py @@ -0,0 +1,30 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +RenderContext — shared runtime state for the render pipeline. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +from rendering.cache import SnapshotCache +from rendering.settings import RenderSettings +from rendering.sink import RenderSink + + +@dataclass +class RenderContext: + """Shared runtime context for all renderers in a single run. + + Created once at startup and passed to every renderer. Carries the + workspace root, the configured sink, display tunables, and the + snapshot cache used by write/edit/apply_patch renderers. + """ + + root: Path + sink: RenderSink + settings: RenderSettings + cache: SnapshotCache diff --git a/tools/rendering/events.py b/tools/rendering/events.py new file mode 100644 index 0000000..bbe4b6e --- /dev/null +++ b/tools/rendering/events.py @@ -0,0 +1,49 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Base classes for generic (non-tool) event renderers. + +Event renderers receive the full normalized event dict and write +output through the render context's sink. +""" + +from __future__ import annotations + +from typing import Any + +from rendering.context import RenderContext + + +class EventRenderer: + """Base class for renderers that handle generic SSE events. + + Subclasses declare which event types they handle via ``event_types``. + The registry will dispatch each event to the first matching renderer. + """ + + event_types: tuple[str, ...] = () + + def __init__(self, context: RenderContext) -> None: + self.context = context + + def render(self, event: dict[str, Any]) -> bool: + """Render *event*. Return True if handled, False to fall through.""" + raise NotImplementedError + + +class UnknownEventRenderer(EventRenderer): + """Fallback renderer for unrecognised event types.""" + + def render(self, event: dict[str, Any]) -> bool: + event_type = event.get("type", "") + if event_type == "message.part.updated": + part_type = event.get("part", {}).get("type", "") + message = f"unknown part type: {part_type}" + else: + message = f"unknown event type: {event_type}" + self.context.sink.write_text(message) + if self.context.settings.debug_unknown_events: + import json + self.context.sink.write_text(json.dumps(event, indent=2, default=str)) + return True diff --git a/tools/rendering/registry.py b/tools/rendering/registry.py new file mode 100644 index 0000000..52ba302 --- /dev/null +++ b/tools/rendering/registry.py @@ -0,0 +1,67 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +RendererRegistry — dispatches events to event/tool renderers. + +Matches each event by type, then delegates to the first registered +renderer that declares a matching ``event_types`` or ``tool_names``. +""" + +from __future__ import annotations + +from typing import Any + +from rendering.context import RenderContext +from rendering.events import EventRenderer, UnknownEventRenderer +from rendering.tools.base import FallbackToolRenderer, ToolRenderer + + +class RendererRegistry: + """Dispatch events and tool calls to registered renderers. + + Registration order matters: the first matching renderer wins. + The fallback renderers are registered last and catch anything + that no specific renderer handled. + """ + + def __init__(self, context: RenderContext) -> None: + self.context = context + self._event_renderers: list[EventRenderer] = [] + self._tool_renderers: list[ToolRenderer] = [] + + # Register fallbacks last (lowest priority). + self._unknown = UnknownEventRenderer(context) + self._fallback_tool = FallbackToolRenderer(context) + + # ------------------------------------------------------------------ + # Registration + # ------------------------------------------------------------------ + + def register_event(self, renderer: EventRenderer) -> None: + self._event_renderers.append(renderer) + + def register_tool(self, renderer: ToolRenderer) -> None: + self._tool_renderers.append(renderer) + + # ------------------------------------------------------------------ + # Dispatch + # ------------------------------------------------------------------ + + def dispatch_event(self, event: dict[str, Any]) -> None: + """Render a generic event through the matching renderer.""" + event_type = event.get("type", "") + for renderer in self._event_renderers: + if not renderer.event_types or event_type in renderer.event_types: + if renderer.render(event): + return + self._unknown.render(event) + + def dispatch_tool(self, tool_name: str, state: dict[str, Any]) -> None: + """Render a tool call through the matching renderer.""" + tool_lower = tool_name.strip().lower() + for renderer in self._tool_renderers: + if not renderer.tool_names or tool_lower in renderer.tool_names: + if renderer.render(tool_name, state): + return + self._fallback_tool.render(tool_name, state) diff --git a/tools/rendering/settings.py b/tools/rendering/settings.py new file mode 100644 index 0000000..4f930ce --- /dev/null +++ b/tools/rendering/settings.py @@ -0,0 +1,136 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +RenderSettings — all display tunables, initialised from env vars. + +Replaces the ~30 module-level globals currently in run-agent.py. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass, field + + +def _truthy_env(name: str) -> bool: + value = os.environ.get(name) + return value is not None and value not in {"", "0", "false", "False", "no", "No"} + + +@dataclass +class RenderSettings: + """Immutable display tunables for the render pipeline. + + All values are resolved at creation time from environment variables. + Individual renderers may also accept CLI-override values. + + Create with ``RenderSettings(override_kwargs...)`` for ad-hoc test configs. + """ + + # --- Read ------------------------------------------------------------ + read_display_lines: int = 10 + read_highlight_limit: int = 200 * 1024 + + # --- Write ----------------------------------------------------------- + write_content_lines: int = 25 + write_diff_limit: int = 50 + + # --- Edit ------------------------------------------------------------ + edit_diff_lines: int = 25 + + # --- Apply-patch ----------------------------------------------------- + apply_patch_diff_lines: int = 25 + apply_patch_max_files: int = 10 + + # --- Glob ------------------------------------------------------------ + glob_match_cap: int = 10 + + # --- Grep ------------------------------------------------------------ + grep_file_cap: int = 50 + grep_line_cap_per_file: int = 5 + grep_total_line_cap: int = 200 + grep_highlight: bool = True + + # --- Reasoning ------------------------------------------------------- + reasoning_max_chars: int = 4000 + render_reasoning: bool = True + + # --- Debug ----------------------------------------------------------- + debug_unknown_events: bool = False + + # --- Sandbox --------------------------------------------------------- + sandbox_render: bool = True + sandbox_validate_stderr_lines: int = 20 + sandbox_files_cap: int = 15 + + # --- Bash-shim ------------------------------------------------------- + bash_shim_render: bool = True + bash_shim_ls_strip_long_format: bool = True + + # --- Internal read suppression --------------------------------------- + internal_read_suppress: bool = True + + # --- Subagent -------------------------------------------------------- + subagent_heartbeat_interval_s: int = 30 + subagent_update_throttle_s: int = 5 + task_prompt_preview_lines: int = 5 + render_subagent_updates: bool = True + + # --- Snapshot cache -------------------------------------------------- + write_cache_enabled: bool = True + write_cache_cap: int = 200 + + @classmethod + def from_env(cls) -> "RenderSettings": + """Create settings from environment variables.""" + return cls( + read_display_lines=int(os.environ.get("CODECOME_READ_DISPLAY_LINES", "10")), + read_highlight_limit=int(os.environ.get("CODECOME_READ_HIGHLIGHT_LIMIT", str(200 * 1024))), + write_content_lines=int(os.environ.get("CODECOME_WRITE_CONTENT_LINES", "25")), + write_diff_limit=int(os.environ.get("CODECOME_WRITE_DIFF_LIMIT", "50")), + edit_diff_lines=int(os.environ.get("CODECOME_EDIT_DIFF_LINES", "25")), + apply_patch_diff_lines=int(os.environ.get( + "CODECOME_APPLY_PATCH_DIFF_LINES", + str(int(os.environ.get("CODECOME_EDIT_DIFF_LINES", "25"))), + )), + apply_patch_max_files=int(os.environ.get("CODECOME_APPLY_PATCH_MAX_FILES", "10")), + glob_match_cap=int(os.environ.get("CODECOME_GLOB_MATCH_CAP", "10")), + grep_file_cap=int(os.environ.get("CODECOME_GREP_FILE_CAP", "50")), + grep_line_cap_per_file=int(os.environ.get("CODECOME_GREP_LINE_CAP_PER_FILE", "5")), + grep_total_line_cap=int(os.environ.get("CODECOME_GREP_TOTAL_LINE_CAP", "200")), + grep_highlight=_truthy_env("CODECOME_GREP_HIGHLIGHT") + if "CODECOME_GREP_HIGHLIGHT" in os.environ + else True, + reasoning_max_chars=int(os.environ.get("CODECOME_REASONING_MAX_CHARS", "4000")), + render_reasoning=_truthy_env("CODECOME_RENDER_REASONING") + if "CODECOME_RENDER_REASONING" in os.environ + else True, + debug_unknown_events=_truthy_env("CODECOME_DEBUG_UNKNOWN_EVENTS") + if "CODECOME_DEBUG_UNKNOWN_EVENTS" in os.environ + else False, + sandbox_render=_truthy_env("CODECOME_SANDBOX_RENDER") + if "CODECOME_SANDBOX_RENDER" in os.environ + else True, + sandbox_validate_stderr_lines=int(os.environ.get("CODECOME_SANDBOX_VALIDATE_STDERR_LINES", "20")), + sandbox_files_cap=int(os.environ.get("CODECOME_SANDBOX_FILES_CAP", "15")), + bash_shim_render=_truthy_env("CODECOME_BASH_SHIM_RENDER") + if "CODECOME_BASH_SHIM_RENDER" in os.environ + else True, + bash_shim_ls_strip_long_format=_truthy_env("CODECOME_BASH_SHIM_LS_STRIP_LONG_FORMAT") + if "CODECOME_BASH_SHIM_LS_STRIP_LONG_FORMAT" in os.environ + else True, + internal_read_suppress=_truthy_env("CODECOME_INTERNAL_READ_SUPPRESS") + if "CODECOME_INTERNAL_READ_SUPPRESS" in os.environ + else True, + subagent_heartbeat_interval_s=int(os.environ.get("CODECOME_SUBAGENT_HEARTBEAT_INTERVAL_S", "30")), + subagent_update_throttle_s=int(os.environ.get("CODECOME_SUBAGENT_UPDATE_THROTTLE_S", "5")), + task_prompt_preview_lines=int(os.environ.get("CODECOME_TASK_PROMPT_PREVIEW_LINES", "5")), + render_subagent_updates=_truthy_env("CODECOME_RENDER_SUBAGENT_UPDATES") + if "CODECOME_RENDER_SUBAGENT_UPDATES" in os.environ + else True, + write_cache_enabled=_truthy_env("CODECOME_WRITE_CACHE") + if "CODECOME_WRITE_CACHE" in os.environ + else True, + write_cache_cap=int(os.environ.get("CODECOME_WRITE_CACHE_CAP", "200")), + ) diff --git a/tools/rendering/sink.py b/tools/rendering/sink.py new file mode 100644 index 0000000..62416f4 --- /dev/null +++ b/tools/rendering/sink.py @@ -0,0 +1,105 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +RenderSink — destination abstraction for renderer output. + +Three implementations: + - PlainSink — writes plain strings to stdout (no Rich dependency) + - RichConsoleSink — delegates to a rich.console.Console + - TextualRichLogSink — delegates to a Textual RichLog or thread-safe proxy +""" + +from __future__ import annotations + +from typing import Any, Literal, Protocol, runtime_checkable + +__all__ = [ + "RenderSink", + "PlainSink", + "RichConsoleSink", + "TextualRichLogSink", +] + + +@runtime_checkable +class RenderSink(Protocol): + """Destination for rendered output. + + The sink abstracts *where* output goes but does not restrict *what* + renderers can emit. Rich and Textual renderers may emit any Rich + renderable (Panel, Group, Text, Table, Syntax, Rule, Markdown, …); + the Plain branch emits only plain strings. + """ + + mode: Literal["plain", "rich", "textual"] + + def write(self, renderable: Any, *, expand: bool = True) -> None: + """Write a Rich renderable or plain string.""" + ... + + def write_text(self, text: str) -> None: + """Write a plain string (always safe, any mode).""" + ... + + +class PlainSink: + """Writes plain strings to stdout. No ANSI or Rich dependency.""" + + mode: Literal["plain"] = "plain" + + def write(self, renderable: Any, *, expand: bool = True) -> None: + import sys + if isinstance(renderable, str): + self.write_text(renderable) + else: + # Minimal fallback: str() the renderable. + self.write_text(str(renderable)) + + def write_text(self, text: str) -> None: + import sys + sys.stdout.write(text) + sys.stdout.write("\n") + + +class RichConsoleSink: + """Delegates to a rich.console.Console.""" + + mode: Literal["rich"] = "rich" + + def __init__(self, console: Any) -> None: + self._console = console + + @property + def console(self) -> Any: + return self._console + + def write(self, renderable: Any, *, expand: bool = True) -> None: + self._console.print(renderable, overflow="ignore", crop=False) + + def write_text(self, text: str) -> None: + self._console.print(text, overflow="ignore", crop=False) + + +class TextualRichLogSink: + """Delegates to a Textual RichLog or a thread-safe proxy. + + In chat mode, the entry point wires a ``TextualConsoleProxy`` or + similar object that implements ``write(renderable)``. + """ + + mode: Literal["textual"] = "textual" + + def __init__(self, rich_log_or_proxy: Any) -> None: + self._target = rich_log_or_proxy + + @property + def target(self) -> Any: + return self._target + + def write(self, renderable: Any, *, expand: bool = True) -> None: + # The proxy's .write() is thread-safe (post_message in Textual). + self._target.write(renderable, expand=expand) # type: ignore[call-arg] + + def write_text(self, text: str) -> None: + self.write(text) diff --git a/tools/rendering/tools/__init__.py b/tools/rendering/tools/__init__.py new file mode 100644 index 0000000..402988e --- /dev/null +++ b/tools/rendering/tools/__init__.py @@ -0,0 +1,8 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Tool renderer classes — one per OpenCode tool family. +""" + +from __future__ import annotations diff --git a/tools/rendering/tools/base.py b/tools/rendering/tools/base.py new file mode 100644 index 0000000..4be3bce --- /dev/null +++ b/tools/rendering/tools/base.py @@ -0,0 +1,90 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Base class for tool-specific renderers. + +Tool renderers receive a tool name and a tool state dict extracted from +a ``tool_use`` event. Renderers are instantiated once with the shared +``RenderContext``. +""" + +from __future__ import annotations + +from typing import Any + +from rendering.context import RenderContext + + +class ToolRenderer: + """Base class for per-tool renderers. + + Subclasses declare which tool names they handle via ``tool_names`` + (e.g. ``("read",)``, ``("bash",)``, ``("write", "edit")``). + """ + + tool_names: tuple[str, ...] = () + + def __init__(self, context: RenderContext) -> None: + self.context = context + + def render(self, tool_name: str, state: dict[str, Any]) -> bool: + """Render a tool call. Return True if handled, False to fall through.""" + raise NotImplementedError + + +class FallbackToolRenderer(ToolRenderer): + """Fallback renderer for tools that have no specific renderer. + + Emits the raw tool state as JSON for Rich mode, or a simple header + for plain mode. + """ + + tool_names = () # catches all unhandled tools + + def render(self, tool_name: str, state: dict[str, Any]) -> bool: + import json + + status = str(state.get("status", "unknown")) + input_data = state.get("input") + output_data = state.get("output") + + if self.context.sink.mode in ("rich", "textual"): + from rich.console import Group + from rich.json import JSON + from rich.panel import Panel + from rich.text import Text + + sections: list[Any] = [] + if input_data is not None: + sections.append(Text("Input", style="bold cyan")) + try: + sections.append(JSON.from_data(input_data)) + except Exception: + sections.append(Text(str(input_data))) + if output_data is not None: + if sections: + sections.append(Text()) + sections.append(Text("Output", style="bold green")) + if isinstance(output_data, (dict, list)): + try: + sections.append(JSON.from_data(output_data)) + except Exception: + sections.append(Text(str(output_data))) + else: + sections.append(Text(str(output_data))) + + body = Group(*sections) if sections else Text("No tool payload", style="dim") + title = f"Tool: {tool_name} [{status}]" + border = "green" if status == "completed" else "yellow" + self.context.sink.write(Panel(body, title=title, border_style=border, expand=True)) + else: + import _colors as C + print(C.header(f"Tool: {tool_name} [{status}]")) + if input_data is not None: + print(C.info("Input")) + print(json.dumps(input_data, indent=2) if isinstance(input_data, (dict, list)) else str(input_data)) + if output_data is not None: + print(C.info("Output")) + print(json.dumps(output_data, indent=2) if isinstance(output_data, (dict, list)) else str(output_data)) + return True From 51814802c37aeb305f9fe07d9586b701930183a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 24 May 2026 21:04:11 +0200 Subject: [PATCH 06/65] fix(phase-a2): add BaseRenderer, de-duplicate kimi artefacts - Create rendering/base.py with shared sink/rich/plain properties - EventRenderer and ToolRenderer now inherit from BaseRenderer - Remove 4x/6x duplicated import/__all__ blocks from 4 files - Keep kimi improvements: str|PathLike in cache, end param in sink, truthy_env import, _int_env/_bool_env helpers --- tools/rendering/__init__.py | 25 +++++ tools/rendering/base.py | 37 ++++++++ .../command_interceptors/__init__.py | 4 + tools/rendering/events.py | 11 +-- tools/rendering/settings.py | 91 ++++++++----------- tools/rendering/tools/__init__.py | 4 + 6 files changed, 114 insertions(+), 58 deletions(-) create mode 100644 tools/rendering/base.py diff --git a/tools/rendering/__init__.py b/tools/rendering/__init__.py index d1933c1..9e95649 100644 --- a/tools/rendering/__init__.py +++ b/tools/rendering/__init__.py @@ -9,3 +9,28 @@ """ from __future__ import annotations + +from rendering.base import BaseRenderer +from rendering.cache import SnapshotCache +from rendering.context import RenderContext +from rendering.events import EventRenderer, UnknownEventRenderer +from rendering.registry import RendererRegistry +from rendering.settings import RenderSettings +from rendering.sink import PlainSink, RichConsoleSink, RenderSink, TextualRichLogSink +from rendering.tools.base import FallbackToolRenderer, ToolRenderer + +__all__ = [ + "BaseRenderer", + "RenderContext", + "RenderSettings", + "SnapshotCache", + "RenderSink", + "PlainSink", + "RichConsoleSink", + "TextualRichLogSink", + "RendererRegistry", + "EventRenderer", + "UnknownEventRenderer", + "ToolRenderer", + "FallbackToolRenderer", +] diff --git a/tools/rendering/base.py b/tools/rendering/base.py new file mode 100644 index 0000000..9c99c4d --- /dev/null +++ b/tools/rendering/base.py @@ -0,0 +1,37 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +BaseRenderer — shared helpers for all renderers. + +Every renderer receives a ``RenderContext`` at construction time and +inherits the ``sink``, ``rich``, and ``plain`` properties. +""" + +from __future__ import annotations + +from rendering.context import RenderContext +from rendering.sink import RenderSink + + +class BaseRenderer: + """Shared base for EventRenderer and ToolRenderer. + + Provides convenience accessors so individual renderers never need to + inspect the sink mode manually. + """ + + def __init__(self, context: RenderContext) -> None: + self.context = context + + @property + def sink(self) -> RenderSink: + return self.context.sink + + @property + def rich(self) -> bool: + return self.context.sink.mode in ("rich", "textual") + + @property + def plain(self) -> bool: + return self.context.sink.mode == "plain" diff --git a/tools/rendering/command_interceptors/__init__.py b/tools/rendering/command_interceptors/__init__.py index 493e179..a99b47b 100644 --- a/tools/rendering/command_interceptors/__init__.py +++ b/tools/rendering/command_interceptors/__init__.py @@ -7,3 +7,7 @@ """ from __future__ import annotations + +from rendering.command_interceptors.base import CommandExecutionInterceptor + +__all__ = ["CommandExecutionInterceptor"] diff --git a/tools/rendering/events.py b/tools/rendering/events.py index bbe4b6e..9c038cc 100644 --- a/tools/rendering/events.py +++ b/tools/rendering/events.py @@ -12,10 +12,10 @@ from typing import Any -from rendering.context import RenderContext +from rendering.base import BaseRenderer -class EventRenderer: +class EventRenderer(BaseRenderer): """Base class for renderers that handle generic SSE events. Subclasses declare which event types they handle via ``event_types``. @@ -24,9 +24,6 @@ class EventRenderer: event_types: tuple[str, ...] = () - def __init__(self, context: RenderContext) -> None: - self.context = context - def render(self, event: dict[str, Any]) -> bool: """Render *event*. Return True if handled, False to fall through.""" raise NotImplementedError @@ -42,8 +39,8 @@ def render(self, event: dict[str, Any]) -> bool: message = f"unknown part type: {part_type}" else: message = f"unknown event type: {event_type}" - self.context.sink.write_text(message) + self.sink.write_text(message) if self.context.settings.debug_unknown_events: import json - self.context.sink.write_text(json.dumps(event, indent=2, default=str)) + self.sink.write_text(json.dumps(event, indent=2, default=str)) return True diff --git a/tools/rendering/settings.py b/tools/rendering/settings.py index 4f930ce..5698ae0 100644 --- a/tools/rendering/settings.py +++ b/tools/rendering/settings.py @@ -10,12 +10,19 @@ from __future__ import annotations import os -from dataclasses import dataclass, field +from dataclasses import dataclass +from codecome.config import truthy_env -def _truthy_env(name: str) -> bool: - value = os.environ.get(name) - return value is not None and value not in {"", "0", "false", "False", "no", "No"} + +def _int_env(name: str, default: int) -> int: + return int(os.environ.get(name, str(default))) + + +def _bool_env(name: str, default: bool) -> bool: + if name in os.environ: + return truthy_env(name) + return default @dataclass @@ -85,52 +92,34 @@ class RenderSettings: def from_env(cls) -> "RenderSettings": """Create settings from environment variables.""" return cls( - read_display_lines=int(os.environ.get("CODECOME_READ_DISPLAY_LINES", "10")), - read_highlight_limit=int(os.environ.get("CODECOME_READ_HIGHLIGHT_LIMIT", str(200 * 1024))), - write_content_lines=int(os.environ.get("CODECOME_WRITE_CONTENT_LINES", "25")), - write_diff_limit=int(os.environ.get("CODECOME_WRITE_DIFF_LIMIT", "50")), - edit_diff_lines=int(os.environ.get("CODECOME_EDIT_DIFF_LINES", "25")), - apply_patch_diff_lines=int(os.environ.get( + read_display_lines=_int_env("CODECOME_READ_DISPLAY_LINES", 10), + read_highlight_limit=_int_env("CODECOME_READ_HIGHLIGHT_LIMIT", 200 * 1024), + write_content_lines=_int_env("CODECOME_WRITE_CONTENT_LINES", 25), + write_diff_limit=_int_env("CODECOME_WRITE_DIFF_LIMIT", 50), + edit_diff_lines=_int_env("CODECOME_EDIT_DIFF_LINES", 25), + apply_patch_diff_lines=_int_env( "CODECOME_APPLY_PATCH_DIFF_LINES", - str(int(os.environ.get("CODECOME_EDIT_DIFF_LINES", "25"))), - )), - apply_patch_max_files=int(os.environ.get("CODECOME_APPLY_PATCH_MAX_FILES", "10")), - glob_match_cap=int(os.environ.get("CODECOME_GLOB_MATCH_CAP", "10")), - grep_file_cap=int(os.environ.get("CODECOME_GREP_FILE_CAP", "50")), - grep_line_cap_per_file=int(os.environ.get("CODECOME_GREP_LINE_CAP_PER_FILE", "5")), - grep_total_line_cap=int(os.environ.get("CODECOME_GREP_TOTAL_LINE_CAP", "200")), - grep_highlight=_truthy_env("CODECOME_GREP_HIGHLIGHT") - if "CODECOME_GREP_HIGHLIGHT" in os.environ - else True, - reasoning_max_chars=int(os.environ.get("CODECOME_REASONING_MAX_CHARS", "4000")), - render_reasoning=_truthy_env("CODECOME_RENDER_REASONING") - if "CODECOME_RENDER_REASONING" in os.environ - else True, - debug_unknown_events=_truthy_env("CODECOME_DEBUG_UNKNOWN_EVENTS") - if "CODECOME_DEBUG_UNKNOWN_EVENTS" in os.environ - else False, - sandbox_render=_truthy_env("CODECOME_SANDBOX_RENDER") - if "CODECOME_SANDBOX_RENDER" in os.environ - else True, - sandbox_validate_stderr_lines=int(os.environ.get("CODECOME_SANDBOX_VALIDATE_STDERR_LINES", "20")), - sandbox_files_cap=int(os.environ.get("CODECOME_SANDBOX_FILES_CAP", "15")), - bash_shim_render=_truthy_env("CODECOME_BASH_SHIM_RENDER") - if "CODECOME_BASH_SHIM_RENDER" in os.environ - else True, - bash_shim_ls_strip_long_format=_truthy_env("CODECOME_BASH_SHIM_LS_STRIP_LONG_FORMAT") - if "CODECOME_BASH_SHIM_LS_STRIP_LONG_FORMAT" in os.environ - else True, - internal_read_suppress=_truthy_env("CODECOME_INTERNAL_READ_SUPPRESS") - if "CODECOME_INTERNAL_READ_SUPPRESS" in os.environ - else True, - subagent_heartbeat_interval_s=int(os.environ.get("CODECOME_SUBAGENT_HEARTBEAT_INTERVAL_S", "30")), - subagent_update_throttle_s=int(os.environ.get("CODECOME_SUBAGENT_UPDATE_THROTTLE_S", "5")), - task_prompt_preview_lines=int(os.environ.get("CODECOME_TASK_PROMPT_PREVIEW_LINES", "5")), - render_subagent_updates=_truthy_env("CODECOME_RENDER_SUBAGENT_UPDATES") - if "CODECOME_RENDER_SUBAGENT_UPDATES" in os.environ - else True, - write_cache_enabled=_truthy_env("CODECOME_WRITE_CACHE") - if "CODECOME_WRITE_CACHE" in os.environ - else True, - write_cache_cap=int(os.environ.get("CODECOME_WRITE_CACHE_CAP", "200")), + _int_env("CODECOME_EDIT_DIFF_LINES", 25), + ), + apply_patch_max_files=_int_env("CODECOME_APPLY_PATCH_MAX_FILES", 10), + glob_match_cap=_int_env("CODECOME_GLOB_MATCH_CAP", 10), + grep_file_cap=_int_env("CODECOME_GREP_FILE_CAP", 50), + grep_line_cap_per_file=_int_env("CODECOME_GREP_LINE_CAP_PER_FILE", 5), + grep_total_line_cap=_int_env("CODECOME_GREP_TOTAL_LINE_CAP", 200), + grep_highlight=_bool_env("CODECOME_GREP_HIGHLIGHT", True), + reasoning_max_chars=_int_env("CODECOME_REASONING_MAX_CHARS", 4000), + render_reasoning=_bool_env("CODECOME_RENDER_REASONING", True), + debug_unknown_events=_bool_env("CODECOME_DEBUG_UNKNOWN_EVENTS", False), + sandbox_render=_bool_env("CODECOME_SANDBOX_RENDER", True), + sandbox_validate_stderr_lines=_int_env("CODECOME_SANDBOX_VALIDATE_STDERR_LINES", 20), + sandbox_files_cap=_int_env("CODECOME_SANDBOX_FILES_CAP", 15), + bash_shim_render=_bool_env("CODECOME_BASH_SHIM_RENDER", True), + bash_shim_ls_strip_long_format=_bool_env("CODECOME_BASH_SHIM_LS_STRIP_LONG_FORMAT", True), + internal_read_suppress=_bool_env("CODECOME_INTERNAL_READ_SUPPRESS", True), + subagent_heartbeat_interval_s=_int_env("CODECOME_SUBAGENT_HEARTBEAT_INTERVAL_S", 30), + subagent_update_throttle_s=_int_env("CODECOME_SUBAGENT_UPDATE_THROTTLE_S", 5), + task_prompt_preview_lines=_int_env("CODECOME_TASK_PROMPT_PREVIEW_LINES", 5), + render_subagent_updates=_bool_env("CODECOME_RENDER_SUBAGENT_UPDATES", True), + write_cache_enabled=_bool_env("CODECOME_WRITE_CACHE", True), + write_cache_cap=_int_env("CODECOME_WRITE_CACHE_CAP", 200), ) diff --git a/tools/rendering/tools/__init__.py b/tools/rendering/tools/__init__.py index 402988e..d7166b0 100644 --- a/tools/rendering/tools/__init__.py +++ b/tools/rendering/tools/__init__.py @@ -6,3 +6,7 @@ """ from __future__ import annotations + +from rendering.tools.base import FallbackToolRenderer, ToolRenderer + +__all__ = ["FallbackToolRenderer", "ToolRenderer"] From ded9f59effa4bece41fbb67a1173874e4172427b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 24 May 2026 21:23:13 +0200 Subject: [PATCH 07/65] fix: address Phase A1/A2 PR review comments + kimi improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Move command_interceptors/ → tools/interceptors/ (per plan) 2. Propagate OSError from open_*_transcript to callers for diagnostic text preservation 3. Use real codecome.config import in monkeypatch tests instead of separately loaded module Includes kimi's Phase A2 refinements: - cache.py: str|PathLike support via os.fspath() - sink.py: end param on write_text, PlainSink flush, TextualRichLogSink TypeError fallback - tools/base.py: inherit from BaseRenderer, use sink.write_text() - Expanded sink/registry/snapshot_cache tests --- tests/test_rendering_registry.py | 10 +- tests/test_rendering_sinks.py | 208 +++++++++++++++++- tests/test_rendering_snapshot_cache.py | 7 + tests/test_run_agent.py | 17 +- tools/codecome/transcript.py | 14 +- tools/rendering/cache.py | 25 ++- tools/rendering/sink.py | 20 +- tools/rendering/tools/base.py | 23 +- .../interceptors}/__init__.py | 2 +- .../interceptors}/base.py | 2 +- tools/run-agent.py | 22 +- 11 files changed, 291 insertions(+), 59 deletions(-) rename tools/rendering/{command_interceptors => tools/interceptors}/__init__.py (82%) rename tools/rendering/{command_interceptors => tools/interceptors}/base.py (94%) diff --git a/tests/test_rendering_registry.py b/tests/test_rendering_registry.py index aaca04c..bac8b37 100644 --- a/tests/test_rendering_registry.py +++ b/tests/test_rendering_registry.py @@ -66,7 +66,7 @@ def test_fallback_handles_unknown_event(self, registry, capsys): out = capsys.readouterr().out assert "unknown event type" in out.lower() - def test_fallback_on_non_handling_renderer(self, registry): + def test_fallback_on_non_handling_renderer(self, registry, capsys): class NonHandler(EventRenderer): event_types = ("baz",) def render(self, event): @@ -74,6 +74,9 @@ def render(self, event): registry.register_event(NonHandler(registry.context)) registry.dispatch_event({"type": "baz"}) + # Should fall through to UnknownEventRenderer + out = capsys.readouterr().out + assert "unknown event type" in out.lower() class TestRegistryToolDispatch: @@ -108,7 +111,7 @@ def test_fallback_tool_renderer(self, registry, capsys): out = capsys.readouterr().out assert "unknown_tool" in out - def test_fallback_on_non_handling_tool_renderer(self, registry): + def test_fallback_on_non_handling_tool_renderer(self, registry, capsys): class NonHandler(ToolRenderer): tool_names = ("grep",) def render(self, tool_name, state): @@ -116,3 +119,6 @@ def render(self, tool_name, state): registry.register_tool(NonHandler(registry.context)) registry.dispatch_tool("grep", {"status": "completed"}) + # Should fall through to FallbackToolRenderer + out = capsys.readouterr().out + assert "grep" in out.lower() diff --git a/tests/test_rendering_sinks.py b/tests/test_rendering_sinks.py index a2dd650..b7a21b3 100644 --- a/tests/test_rendering_sinks.py +++ b/tests/test_rendering_sinks.py @@ -21,13 +21,20 @@ def test_write_text(self, capsys): sink = PlainSink() sink.write_text("hello") out = capsys.readouterr().out - assert "hello" in out + assert out == "hello\n" + + def test_write_text_with_custom_end(self, capsys): + sink = PlainSink() + sink.write_text("hello", end="") + sink.write_text("world", end="\n") + out = capsys.readouterr().out + assert out == "helloworld\n" def test_write_string(self, capsys): sink = PlainSink() sink.write("hello") out = capsys.readouterr().out - assert "hello" in out + assert out == "hello\n" def test_isinstance_checks(self): sink = PlainSink() @@ -73,6 +80,203 @@ def test_write_delegates_to_target(self): sink.write("hello", expand=True) proxy.write.assert_called_once() + def test_write_with_expand_not_supported_falls_back(self): + proxy = MagicMock() + proxy.write.side_effect = [TypeError("unexpected keyword"), None] + sink = TextualRichLogSink(proxy) + sink.write("hello", expand=True) + assert proxy.write.call_count == 2 + proxy.write.assert_called_with("hello") + + def test_write_text_delegates(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + sink.write_text("hello") + proxy.write.assert_called_once() + + def test_target_property(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + assert sink.target is proxy + + def test_isinstance_checks(self): + sink = TextualRichLogSink(MagicMock()) + assert isinstance(sink, RenderSink) + + +class TestRichConsoleSink: + def test_mode(self): + from rich.console import Console + sink = RichConsoleSink(Console()) + assert sink.mode == "rich" + + def test_write_delegates_to_console(self): + from rich.console import Console + console = Console(record=True) + sink = RichConsoleSink(console) + from rich.text import Text + sink.write(Text("hi")) + exported = console.export_text() + assert "hi" in exported + + def test_console_property(self): + from rich.console import Console + console = Console() + sink = RichConsoleSink(console) + assert sink.console is console + + def test_isinstance_checks(self): + from rich.console import Console + sink = RichConsoleSink(Console()) + assert isinstance(sink, RenderSink) + + +class TestTextualRichLogSink: + def test_mode(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + assert sink.mode == "textual" + + def test_write_delegates_to_target(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + sink.write("hello", expand=True) + proxy.write.assert_called_once() + + def test_write_with_expand_not_supported_falls_back(self): + proxy = MagicMock() + proxy.write.side_effect = [TypeError("unexpected keyword"), None] + sink = TextualRichLogSink(proxy) + sink.write("hello", expand=True) + assert proxy.write.call_count == 2 + proxy.write.assert_called_with("hello") + + def test_write_text_delegates(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + sink.write_text("hello") + proxy.write.assert_called_once() + + def test_target_property(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + assert sink.target is proxy + + def test_isinstance_checks(self): + sink = TextualRichLogSink(MagicMock()) + assert isinstance(sink, RenderSink) + + +class TestRichConsoleSink: + def test_mode(self): + from rich.console import Console + sink = RichConsoleSink(Console()) + assert sink.mode == "rich" + + def test_write_delegates_to_console(self): + from rich.console import Console + console = Console(record=True) + sink = RichConsoleSink(console) + from rich.text import Text + sink.write(Text("hi")) + exported = console.export_text() + assert "hi" in exported + + def test_console_property(self): + from rich.console import Console + console = Console() + sink = RichConsoleSink(console) + assert sink.console is console + + def test_isinstance_checks(self): + from rich.console import Console + sink = RichConsoleSink(Console()) + assert isinstance(sink, RenderSink) + + +class TestTextualRichLogSink: + def test_mode(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + assert sink.mode == "textual" + + def test_write_delegates_to_target(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + sink.write("hello", expand=True) + proxy.write.assert_called_once() + + def test_write_with_expand_not_supported_falls_back(self): + proxy = MagicMock() + proxy.write.side_effect = [TypeError("unexpected keyword"), None] + sink = TextualRichLogSink(proxy) + sink.write("hello", expand=True) + assert proxy.write.call_count == 2 + proxy.write.assert_called_with("hello") + + def test_write_text_delegates(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + sink.write_text("hello") + proxy.write.assert_called_once() + + def test_target_property(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + assert sink.target is proxy + + def test_isinstance_checks(self): + sink = TextualRichLogSink(MagicMock()) + assert isinstance(sink, RenderSink) + + +class TestRichConsoleSink: + def test_mode(self): + from rich.console import Console + sink = RichConsoleSink(Console()) + assert sink.mode == "rich" + + def test_write_delegates_to_console(self): + from rich.console import Console + console = Console(record=True) + sink = RichConsoleSink(console) + from rich.text import Text + sink.write(Text("hi")) + exported = console.export_text() + assert "hi" in exported + + def test_console_property(self): + from rich.console import Console + console = Console() + sink = RichConsoleSink(console) + assert sink.console is console + + def test_isinstance_checks(self): + from rich.console import Console + sink = RichConsoleSink(Console()) + assert isinstance(sink, RenderSink) + + +class TestTextualRichLogSink: + def test_mode(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + assert sink.mode == "textual" + + def test_write_delegates_to_target(self): + proxy = MagicMock() + sink = TextualRichLogSink(proxy) + sink.write("hello", expand=True) + proxy.write.assert_called_once() + + def test_write_with_expand_not_supported_falls_back(self): + proxy = MagicMock() + proxy.write.side_effect = [TypeError("unexpected keyword"), None] + sink = TextualRichLogSink(proxy) + sink.write("hello", expand=True) + assert proxy.write.call_count == 2 + proxy.write.assert_called_with("hello") + def test_write_text_delegates(self): proxy = MagicMock() sink = TextualRichLogSink(proxy) diff --git a/tests/test_rendering_snapshot_cache.py b/tests/test_rendering_snapshot_cache.py index b88a177..b5a740b 100644 --- a/tests/test_rendering_snapshot_cache.py +++ b/tests/test_rendering_snapshot_cache.py @@ -80,7 +80,14 @@ def test_lru_eviction(self, tmp_path): def test_disabled_invalidate_is_noop(self): cache = SnapshotCache(enabled=False) cache.invalidate_stale() + assert len(cache._entries) == 0 def test_disabled_reread_is_noop(self): cache = SnapshotCache(enabled=False) cache.reread("/x") + assert len(cache._entries) == 0 + assert len(cache._entries) == 0 + assert len(cache._entries) == 0 + assert len(cache._entries) == 0 + assert len(cache._entries) == 0 + assert len(cache._entries) == 0 diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index b953ab1..9c046ba 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -71,14 +71,13 @@ def test_strip_probe_unsafe_flags_removes_session_and_continue_flags(): @pytest.mark.unit def test_resolve_model_and_variant_precedence(monkeypatch): - config_module = _load_config_module() - module = load_tool_module("run_agent_resolve", "tools/run-agent.py") + import codecome.config as _cfg monkeypatch.setenv("CODECOME_MODEL", "env/model") monkeypatch.setenv("CODECOME_MODEL_VARIANT", "max") - monkeypatch.setattr(config_module, "_read_codecome_yml_agent", lambda _agent: ("yaml/model", "yamlvar")) - monkeypatch.setattr(config_module, "_discover_opencode_default_model", lambda: "history/model") + monkeypatch.setattr(_cfg, "_read_codecome_yml_agent", lambda _agent: ("yaml/model", "yamlvar")) + monkeypatch.setattr(_cfg, "_discover_opencode_default_model", lambda: "history/model") - model, variant, model_source, variant_source = module.resolve_model_and_variant( + model, variant, model_source, variant_source = _cfg.resolve_model_and_variant( "auditor", ["--model", "args/model", "--variant=high"] ) assert (model, variant) == ("args/model", "high") @@ -132,14 +131,14 @@ def test_resolve_thinking_decision_precedence(monkeypatch): @pytest.mark.unit def test_show_model_table_prints_resolution_sources(monkeypatch, capsys): """show_model_table should emit a table with all resolution sources.""" - config_module = _load_config_module() + import codecome.config as _cfg monkeypatch.setenv("OPENCODE_ARGS", "--model openai/gpt-5 --variant high") monkeypatch.setenv("CODECOME_MODEL", "env/model") monkeypatch.setenv("CODECOME_MODEL_VARIANT", "envvar") - monkeypatch.setattr(config_module, "_read_codecome_yml_agent", lambda _agent: ("yaml/model", "yamlvar")) - monkeypatch.setattr(config_module, "_discover_opencode_default_model", lambda: "history/model") + monkeypatch.setattr(_cfg, "_read_codecome_yml_agent", lambda _agent: ("yaml/model", "yamlvar")) + monkeypatch.setattr(_cfg, "_discover_opencode_default_model", lambda: "history/model") - rc = config_module.show_model_table("auditor") + rc = _cfg.show_model_table("auditor") assert rc == 0 out = capsys.readouterr().out diff --git a/tools/codecome/transcript.py b/tools/codecome/transcript.py index e2fc4f4..7afcece 100644 --- a/tools/codecome/transcript.py +++ b/tools/codecome/transcript.py @@ -25,7 +25,7 @@ def _transcript_dir() -> Path: return d -def open_phase_transcript(phase: str, finding: str | None) -> tuple[Path, IO[str] | None]: +def open_phase_transcript(phase: str, finding: str | None) -> tuple[Path, IO[str]]: finding_tag = (finding or "no-finding").replace("/", "_") key = f"{phase}-{finding_tag}" @@ -34,19 +34,13 @@ def open_phase_transcript(phase: str, finding: str | None) -> tuple[Path, IO[str _ATTEMPT_COUNTER[key] = counter + 1 path = _transcript_dir() / f"last-phase-{phase}-{finding_tag}-attempt-{counter}.jsonl" - try: - return path, path.open("w", encoding="utf-8") - except OSError: - return path, None + return path, path.open("w", encoding="utf-8") -def open_chat_transcript() -> tuple[Path, IO[str] | None]: +def open_chat_transcript() -> tuple[Path, IO[str]]: stamp = time.strftime("%Y%m%d-%H%M%S") path = _transcript_dir() / f"last-chat-{stamp}-pid{os.getpid()}.jsonl" - try: - return path, path.open("w", encoding="utf-8", buffering=1) - except OSError: - return path, None + return path, path.open("w", encoding="utf-8", buffering=1) def close_transcript(fp: IO[str] | None) -> None: diff --git a/tools/rendering/cache.py b/tools/rendering/cache.py index e0e0eae..abe407e 100644 --- a/tools/rendering/cache.py +++ b/tools/rendering/cache.py @@ -30,23 +30,25 @@ def __init__(self, *, enabled: bool = True, max_entries: int = 200) -> None: # Public API # ------------------------------------------------------------------ - def set(self, path: str, content: str) -> None: + def set(self, path: str | os.PathLike[str], content: str) -> None: """Cache *content* for *path*, recording its current mtime.""" if not self._enabled: return - mtime = self._current_mtime(path) + p = os.fspath(path) + mtime = self._current_mtime(p) if mtime is None: return - self._entries[path] = (content, mtime) - self._entries.move_to_end(path) + self._entries[p] = (content, mtime) + self._entries.move_to_end(p) while len(self._entries) > self._max: self._entries.popitem(last=False) - def get(self, path: str) -> str | None: + def get(self, path: str | os.PathLike[str]) -> str | None: """Return cached content for *path*, or None.""" if not self._enabled: return None - entry = self._entries.get(path) + p = os.fspath(path) + entry = self._entries.get(p) if entry is None: return None return entry[0] @@ -63,15 +65,16 @@ def invalidate_stale(self) -> None: for p in stale: del self._entries[p] - def reread(self, path: str) -> None: + def reread(self, path: str | os.PathLike[str]) -> None: """Invalidate and re-read *path* from disk.""" if not self._enabled: return - if path in self._entries: - del self._entries[path] + p = os.fspath(path) + if p in self._entries: + del self._entries[p] try: - content = Path(path).read_text(encoding="utf-8", errors="replace") - self.set(path, content) + content = Path(p).read_text(encoding="utf-8", errors="replace") + self.set(p, content) except OSError: pass diff --git a/tools/rendering/sink.py b/tools/rendering/sink.py index 62416f4..fd5dc49 100644 --- a/tools/rendering/sink.py +++ b/tools/rendering/sink.py @@ -38,7 +38,7 @@ def write(self, renderable: Any, *, expand: bool = True) -> None: """Write a Rich renderable or plain string.""" ... - def write_text(self, text: str) -> None: + def write_text(self, text: str, *, end: str = "\n") -> None: """Write a plain string (always safe, any mode).""" ... @@ -56,10 +56,11 @@ def write(self, renderable: Any, *, expand: bool = True) -> None: # Minimal fallback: str() the renderable. self.write_text(str(renderable)) - def write_text(self, text: str) -> None: + def write_text(self, text: str, *, end: str = "\n") -> None: import sys sys.stdout.write(text) - sys.stdout.write("\n") + sys.stdout.write(end) + sys.stdout.flush() class RichConsoleSink: @@ -77,7 +78,7 @@ def console(self) -> Any: def write(self, renderable: Any, *, expand: bool = True) -> None: self._console.print(renderable, overflow="ignore", crop=False) - def write_text(self, text: str) -> None: + def write_text(self, text: str, *, end: str = "\n") -> None: self._console.print(text, overflow="ignore", crop=False) @@ -99,7 +100,12 @@ def target(self) -> Any: def write(self, renderable: Any, *, expand: bool = True) -> None: # The proxy's .write() is thread-safe (post_message in Textual). - self._target.write(renderable, expand=expand) # type: ignore[call-arg] - - def write_text(self, text: str) -> None: + # Some targets (e.g. the legacy TextualConsoleProxy) do not accept + # an expand keyword; fall back gracefully. + try: + self._target.write(renderable, expand=expand) # type: ignore[call-arg] + except TypeError: + self._target.write(renderable) + + def write_text(self, text: str, *, end: str = "\n") -> None: self.write(text) diff --git a/tools/rendering/tools/base.py b/tools/rendering/tools/base.py index 4be3bce..cec62aa 100644 --- a/tools/rendering/tools/base.py +++ b/tools/rendering/tools/base.py @@ -13,10 +13,10 @@ from typing import Any -from rendering.context import RenderContext +from rendering.base import BaseRenderer -class ToolRenderer: +class ToolRenderer(BaseRenderer): """Base class for per-tool renderers. Subclasses declare which tool names they handle via ``tool_names`` @@ -25,9 +25,6 @@ class ToolRenderer: tool_names: tuple[str, ...] = () - def __init__(self, context: RenderContext) -> None: - self.context = context - def render(self, tool_name: str, state: dict[str, Any]) -> bool: """Render a tool call. Return True if handled, False to fall through.""" raise NotImplementedError @@ -49,7 +46,7 @@ def render(self, tool_name: str, state: dict[str, Any]) -> bool: input_data = state.get("input") output_data = state.get("output") - if self.context.sink.mode in ("rich", "textual"): + if self.rich: from rich.console import Group from rich.json import JSON from rich.panel import Panel @@ -80,11 +77,15 @@ def render(self, tool_name: str, state: dict[str, Any]) -> bool: self.context.sink.write(Panel(body, title=title, border_style=border, expand=True)) else: import _colors as C - print(C.header(f"Tool: {tool_name} [{status}]")) + self.context.sink.write_text(C.header(f"Tool: {tool_name} [{status}]")) if input_data is not None: - print(C.info("Input")) - print(json.dumps(input_data, indent=2) if isinstance(input_data, (dict, list)) else str(input_data)) + self.context.sink.write_text(C.info("Input")) + self.context.sink.write_text( + json.dumps(input_data, indent=2) if isinstance(input_data, (dict, list)) else str(input_data) + ) if output_data is not None: - print(C.info("Output")) - print(json.dumps(output_data, indent=2) if isinstance(output_data, (dict, list)) else str(output_data)) + self.context.sink.write_text(C.info("Output")) + self.context.sink.write_text( + json.dumps(output_data, indent=2) if isinstance(output_data, (dict, list)) else str(output_data) + ) return True diff --git a/tools/rendering/command_interceptors/__init__.py b/tools/rendering/tools/interceptors/__init__.py similarity index 82% rename from tools/rendering/command_interceptors/__init__.py rename to tools/rendering/tools/interceptors/__init__.py index a99b47b..1aa7c65 100644 --- a/tools/rendering/command_interceptors/__init__.py +++ b/tools/rendering/tools/interceptors/__init__.py @@ -8,6 +8,6 @@ from __future__ import annotations -from rendering.command_interceptors.base import CommandExecutionInterceptor +from rendering.tools.interceptors.base import CommandExecutionInterceptor __all__ = ["CommandExecutionInterceptor"] diff --git a/tools/rendering/command_interceptors/base.py b/tools/rendering/tools/interceptors/base.py similarity index 94% rename from tools/rendering/command_interceptors/base.py rename to tools/rendering/tools/interceptors/base.py index 045a710..022d184 100644 --- a/tools/rendering/command_interceptors/base.py +++ b/tools/rendering/tools/interceptors/base.py @@ -14,7 +14,7 @@ from typing import Any, Protocol, runtime_checkable -from rendering.tools.base import ToolRenderer +from rendering.tools.base import ToolRenderer # noqa: E402 @runtime_checkable diff --git a/tools/run-agent.py b/tools/run-agent.py index 3ee6d37..ad80a0a 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -3897,12 +3897,18 @@ def _run_single_attempt( Returns (returncode, session_id, run_result, transcript_path). """ - transcript_path, transcript_fp = open_phase_transcript(str(args.phase), args.finding) - if transcript_fp is None: + transcript_fp = None + try: + transcript_path, transcript_fp = open_phase_transcript(str(args.phase), args.finding) + except OSError as exc: + # Reconstruct the path the helper would have produced so the + # warning still names the right file. + finding_tag = (args.finding or "no-finding").replace("/", "_") + transcript_path = ROOT / "tmp" / f"last-phase-{args.phase}-{finding_tag}-attempt-N.jsonl" if HAVE_RICH: - console.print(Text(f"warning: could not open transcript {transcript_path}", style="yellow")) + console.print(Text(f"warning: could not open transcript {transcript_path}: {exc}", style="yellow")) else: - print(C.warn(f"warning: could not open transcript {transcript_path}")) + print(C.warn(f"warning: could not open transcript {transcript_path}: {exc}")) try: if existing_session_id: @@ -4556,7 +4562,13 @@ def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> return 1 # Open the chat transcript (parity with phase mode). - transcript_path, transcript_fp = open_chat_transcript() + transcript_path: Path = Path() + transcript_fp = None + try: + transcript_path, transcript_fp = open_chat_transcript() + _chat_debug(f"_run_chat_mode: opened transcript {transcript_path}") + except OSError as exc: + _chat_debug(f"_run_chat_mode: could not open transcript: {exc}") _chat_debug("_run_chat_mode: creating ChatApp") app = None From 4a892cf571648187303434b582c770f6612209f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 24 May 2026 21:42:28 +0200 Subject: [PATCH 08/65] =?UTF-8?q?fix:=20address=20PR=20review=20comments?= =?UTF-8?q?=20=E2=80=94=20de-duplication=20+=20explicit=20registry?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. tests/test_rendering_sinks.py: remove 4x duplicated sink test classes (293 lines → 105 lines) 2. tools/rendering/registry.py: require explicit event_types/tool_names on registered renderers; empty tuple no longer acts as catch-all. Fallback renderers are held separately and unaffected. 3. tests/test_rendering_snapshot_cache.py: remove 5x duplicated assert --- tests/test_rendering_sinks.py | 189 ------------------------- tests/test_rendering_snapshot_cache.py | 5 - tools/rendering/registry.py | 16 ++- 3 files changed, 12 insertions(+), 198 deletions(-) diff --git a/tests/test_rendering_sinks.py b/tests/test_rendering_sinks.py index b7a21b3..f2d622b 100644 --- a/tests/test_rendering_sinks.py +++ b/tests/test_rendering_sinks.py @@ -102,192 +102,3 @@ def test_target_property(self): def test_isinstance_checks(self): sink = TextualRichLogSink(MagicMock()) assert isinstance(sink, RenderSink) - - -class TestRichConsoleSink: - def test_mode(self): - from rich.console import Console - sink = RichConsoleSink(Console()) - assert sink.mode == "rich" - - def test_write_delegates_to_console(self): - from rich.console import Console - console = Console(record=True) - sink = RichConsoleSink(console) - from rich.text import Text - sink.write(Text("hi")) - exported = console.export_text() - assert "hi" in exported - - def test_console_property(self): - from rich.console import Console - console = Console() - sink = RichConsoleSink(console) - assert sink.console is console - - def test_isinstance_checks(self): - from rich.console import Console - sink = RichConsoleSink(Console()) - assert isinstance(sink, RenderSink) - - -class TestTextualRichLogSink: - def test_mode(self): - proxy = MagicMock() - sink = TextualRichLogSink(proxy) - assert sink.mode == "textual" - - def test_write_delegates_to_target(self): - proxy = MagicMock() - sink = TextualRichLogSink(proxy) - sink.write("hello", expand=True) - proxy.write.assert_called_once() - - def test_write_with_expand_not_supported_falls_back(self): - proxy = MagicMock() - proxy.write.side_effect = [TypeError("unexpected keyword"), None] - sink = TextualRichLogSink(proxy) - sink.write("hello", expand=True) - assert proxy.write.call_count == 2 - proxy.write.assert_called_with("hello") - - def test_write_text_delegates(self): - proxy = MagicMock() - sink = TextualRichLogSink(proxy) - sink.write_text("hello") - proxy.write.assert_called_once() - - def test_target_property(self): - proxy = MagicMock() - sink = TextualRichLogSink(proxy) - assert sink.target is proxy - - def test_isinstance_checks(self): - sink = TextualRichLogSink(MagicMock()) - assert isinstance(sink, RenderSink) - - -class TestRichConsoleSink: - def test_mode(self): - from rich.console import Console - sink = RichConsoleSink(Console()) - assert sink.mode == "rich" - - def test_write_delegates_to_console(self): - from rich.console import Console - console = Console(record=True) - sink = RichConsoleSink(console) - from rich.text import Text - sink.write(Text("hi")) - exported = console.export_text() - assert "hi" in exported - - def test_console_property(self): - from rich.console import Console - console = Console() - sink = RichConsoleSink(console) - assert sink.console is console - - def test_isinstance_checks(self): - from rich.console import Console - sink = RichConsoleSink(Console()) - assert isinstance(sink, RenderSink) - - -class TestTextualRichLogSink: - def test_mode(self): - proxy = MagicMock() - sink = TextualRichLogSink(proxy) - assert sink.mode == "textual" - - def test_write_delegates_to_target(self): - proxy = MagicMock() - sink = TextualRichLogSink(proxy) - sink.write("hello", expand=True) - proxy.write.assert_called_once() - - def test_write_with_expand_not_supported_falls_back(self): - proxy = MagicMock() - proxy.write.side_effect = [TypeError("unexpected keyword"), None] - sink = TextualRichLogSink(proxy) - sink.write("hello", expand=True) - assert proxy.write.call_count == 2 - proxy.write.assert_called_with("hello") - - def test_write_text_delegates(self): - proxy = MagicMock() - sink = TextualRichLogSink(proxy) - sink.write_text("hello") - proxy.write.assert_called_once() - - def test_target_property(self): - proxy = MagicMock() - sink = TextualRichLogSink(proxy) - assert sink.target is proxy - - def test_isinstance_checks(self): - sink = TextualRichLogSink(MagicMock()) - assert isinstance(sink, RenderSink) - - -class TestRichConsoleSink: - def test_mode(self): - from rich.console import Console - sink = RichConsoleSink(Console()) - assert sink.mode == "rich" - - def test_write_delegates_to_console(self): - from rich.console import Console - console = Console(record=True) - sink = RichConsoleSink(console) - from rich.text import Text - sink.write(Text("hi")) - exported = console.export_text() - assert "hi" in exported - - def test_console_property(self): - from rich.console import Console - console = Console() - sink = RichConsoleSink(console) - assert sink.console is console - - def test_isinstance_checks(self): - from rich.console import Console - sink = RichConsoleSink(Console()) - assert isinstance(sink, RenderSink) - - -class TestTextualRichLogSink: - def test_mode(self): - proxy = MagicMock() - sink = TextualRichLogSink(proxy) - assert sink.mode == "textual" - - def test_write_delegates_to_target(self): - proxy = MagicMock() - sink = TextualRichLogSink(proxy) - sink.write("hello", expand=True) - proxy.write.assert_called_once() - - def test_write_with_expand_not_supported_falls_back(self): - proxy = MagicMock() - proxy.write.side_effect = [TypeError("unexpected keyword"), None] - sink = TextualRichLogSink(proxy) - sink.write("hello", expand=True) - assert proxy.write.call_count == 2 - proxy.write.assert_called_with("hello") - - def test_write_text_delegates(self): - proxy = MagicMock() - sink = TextualRichLogSink(proxy) - sink.write_text("hello") - proxy.write.assert_called_once() - - def test_target_property(self): - proxy = MagicMock() - sink = TextualRichLogSink(proxy) - assert sink.target is proxy - - def test_isinstance_checks(self): - sink = TextualRichLogSink(MagicMock()) - assert isinstance(sink, RenderSink) diff --git a/tests/test_rendering_snapshot_cache.py b/tests/test_rendering_snapshot_cache.py index b5a740b..3c92fcc 100644 --- a/tests/test_rendering_snapshot_cache.py +++ b/tests/test_rendering_snapshot_cache.py @@ -86,8 +86,3 @@ def test_disabled_reread_is_noop(self): cache = SnapshotCache(enabled=False) cache.reread("/x") assert len(cache._entries) == 0 - assert len(cache._entries) == 0 - assert len(cache._entries) == 0 - assert len(cache._entries) == 0 - assert len(cache._entries) == 0 - assert len(cache._entries) == 0 diff --git a/tools/rendering/registry.py b/tools/rendering/registry.py index 52ba302..aa2ba9d 100644 --- a/tools/rendering/registry.py +++ b/tools/rendering/registry.py @@ -49,19 +49,27 @@ def register_tool(self, renderer: ToolRenderer) -> None: # ------------------------------------------------------------------ def dispatch_event(self, event: dict[str, Any]) -> None: - """Render a generic event through the matching renderer.""" + """Render a generic event through the matching renderer. + + Registered renderers must declare explicit ``event_types``; + the ``UnknownEventRenderer`` fallback catches everything else. + """ event_type = event.get("type", "") for renderer in self._event_renderers: - if not renderer.event_types or event_type in renderer.event_types: + if event_type in renderer.event_types: if renderer.render(event): return self._unknown.render(event) def dispatch_tool(self, tool_name: str, state: dict[str, Any]) -> None: - """Render a tool call through the matching renderer.""" + """Render a tool call through the matching renderer. + + Registered renderers must declare explicit ``tool_names``; + the ``FallbackToolRenderer`` catches everything else. + """ tool_lower = tool_name.strip().lower() for renderer in self._tool_renderers: - if not renderer.tool_names or tool_lower in renderer.tool_names: + if tool_lower in renderer.tool_names: if renderer.render(tool_name, state): return self._fallback_tool.render(tool_name, state) From 3f8cf8ccc90ec12a50d2c63f8efa915fe647bccd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 24 May 2026 21:53:15 +0200 Subject: [PATCH 09/65] refactor(phase-a3-batch1): migrate todo/task/skill/permission renderers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the simplest renderers from run-agent.py into rendering/tools/: - rendering/tools/todo.py — TodoRenderer (todowrite tool) - rendering/tools/task.py — TaskRenderer (task tool) - rendering/tools/skill.py — SkillRenderer (skill tool) - rendering/tools/permissions.py — PermissionErrorRenderer Each inherits from ToolRenderer/BaseRenderer, uses self.rich/self.plain from the sink abstraction, and delegates to _render_rich/_render_plain. Dispatch in run-agent.py now routes todo/task/skill through the new classes via a lazy RenderContext. Old render_* functions kept for now. Tests: 17 new (334 passed, 0 failed, 0 errors) --- tests/test_rendering_tools.py | 216 +++++++++++++++++++++++++++ tests/test_run_agent.py | 23 ++- tools/rendering/tools/__init__.py | 13 +- tools/rendering/tools/permissions.py | 36 +++++ tools/rendering/tools/skill.py | 51 +++++++ tools/rendering/tools/task.py | 92 ++++++++++++ tools/rendering/tools/todo.py | 157 +++++++++++++++++++ tools/run-agent.py | 45 ++++-- 8 files changed, 607 insertions(+), 26 deletions(-) create mode 100644 tests/test_rendering_tools.py create mode 100644 tools/rendering/tools/permissions.py create mode 100644 tools/rendering/tools/skill.py create mode 100644 tools/rendering/tools/task.py create mode 100644 tools/rendering/tools/todo.py diff --git a/tests/test_rendering_tools.py b/tests/test_rendering_tools.py new file mode 100644 index 0000000..928e587 --- /dev/null +++ b/tests/test_rendering_tools.py @@ -0,0 +1,216 @@ +from __future__ import annotations + +import sys +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from rendering.context import RenderContext +from rendering.sink import PlainSink, RichConsoleSink +from rendering.settings import RenderSettings +from rendering.cache import SnapshotCache +from rendering.tools.todo import TodoRenderer +from rendering.tools.task import TaskRenderer +from rendering.tools.skill import SkillRenderer +from rendering.tools.permissions import PermissionErrorRenderer + + +def _ctx(sink_mode="plain"): + if sink_mode == "rich": + from rich.console import Console + sink = RichConsoleSink(Console(record=True)) + else: + sink = PlainSink() + return RenderContext( + root=Path("/fake"), + sink=sink, + settings=RenderSettings(), + cache=SnapshotCache(), + ) + + +# —————————————————————————————————————————————— +# TodoRenderer +# —————————————————————————————————————————————— + +class TestTodoRenderer: + def test_renders_empty_todos(self, capsys): + r = TodoRenderer(_ctx("plain")) + state = {"input": {"todos": []}} + assert r.render("todowrite", state) is True + out = capsys.readouterr().out + assert "todos" in out.lower() + assert "No todos" in out + + def test_renders_todos_plain(self, capsys): + r = TodoRenderer(_ctx("plain")) + state = { + "input": { + "todos": [ + {"content": "Fix bug", "status": "completed", "priority": "high"}, + {"content": "Write test", "status": "in_progress", "priority": "medium"}, + {"content": "Deploy", "status": "pending", "priority": "low"}, + ] + } + } + assert r.render("todowrite", state) is True + out = capsys.readouterr().out + assert "3 tasks" in out + assert "Fix bug" in out + assert "Write test" in out + assert "Deploy" in out + + def test_renders_todos_rich(self): + r = TodoRenderer(_ctx("rich")) + state = { + "input": { + "todos": [ + {"content": "Fix bug", "status": "completed", "priority": "high"}, + ] + } + } + assert r.render("todowrite", state) is True + + def test_returns_false_when_state_not_recognized(self): + r = TodoRenderer(_ctx("plain")) + assert r.render("todowrite", {"input": {}}) is False + + def test_extracts_todos_from_output_list(self, capsys): + r = TodoRenderer(_ctx("plain")) + state = { + "output": [ + {"content": "From output", "status": "pending", "priority": "low"}, + ] + } + assert r.render("todowrite", state) is True + out = capsys.readouterr().out + assert "From output" in out + + +# —————————————————————————————————————————————— +# TaskRenderer +# —————————————————————————————————————————————— + +class TestTaskRenderer: + def test_renders_task_plain(self, capsys): + r = TaskRenderer(_ctx("plain")) + state = { + "input": { + "description": "Look for bugs", + "subagent_type": "auditor", + "prompt": "Find security issues", + }, + "status": "completed", + } + assert r.render("task", state) is True + out = capsys.readouterr().out + assert "Look for bugs" in out + assert "auditor" in out + assert "completed" in out + + def test_renders_task_rich(self): + r = TaskRenderer(_ctx("rich")) + state = { + "input": { + "description": "Look for bugs", + "prompt": "line1\nline2\nline3", + }, + "status": "completed", + } + assert r.render("task", state) is True + + def test_task_prompt_preview_truncates(self, capsys): + r = TaskRenderer(_ctx("plain")) + settings = RenderSettings(task_prompt_preview_lines=2) + r.context.settings = settings + state = { + "input": { + "description": "Audit", + "prompt": "line1\nline2\nline3\nline4", + }, + "status": "in_progress", + } + assert r.render("task", state) is True + out = capsys.readouterr().out + assert "line1" in out + assert "line2" in out + assert "more lines" in out + + def test_returns_false_for_non_dict_input(self): + r = TaskRenderer(_ctx("plain")) + assert r.render("task", {"input": "not a dict"}) is False + + def test_renders_output_when_present(self, capsys): + r = TaskRenderer(_ctx("plain")) + state = { + "input": {"description": "X", "prompt": "Y"}, + "output": "Task result", + "status": "completed", + } + assert r.render("task", state) is True + out = capsys.readouterr().out + assert "Task result" in out + + def test_output_truncated_over_200_chars(self, capsys): + r = TaskRenderer(_ctx("plain")) + long = "x" * 300 + state = { + "input": {"description": "X", "prompt": "Y"}, + "output": long, + "status": "completed", + } + assert r.render("task", state) is True + out = capsys.readouterr().out + assert "..." in out + assert long not in out + + +# —————————————————————————————————————————————— +# SkillRenderer +# —————————————————————————————————————————————— + +class TestSkillRenderer: + def test_renders_known_skill_plain(self, capsys): + r = SkillRenderer(_ctx("plain")) + state = {"input": {"name": "web-security"}} + assert r.render("skill", state) is True + out = capsys.readouterr().out + assert "web-security" in out + + def test_renders_unknown_skill_plain(self, capsys): + r = SkillRenderer(_ctx("plain")) + state = {"input": {"name": ""}} + assert r.render("skill", state) is True + out = capsys.readouterr().out + assert "unknown" in out.lower() + + def test_renders_skill_rich(self): + r = SkillRenderer(_ctx("rich")) + state = {"input": {"name": "web-security"}} + assert r.render("skill", state) is True + + def test_returns_false_for_non_dict_input(self): + r = SkillRenderer(_ctx("plain")) + assert r.render("skill", {"input": "nope"}) is False + + +# —————————————————————————————————————————————— +# PermissionErrorRenderer +# —————————————————————————————————————————————— + +class TestPermissionErrorRenderer: + def test_renders_permission_error_plain(self, capsys): + r = PermissionErrorRenderer(_ctx("plain")) + r.render_message("tool permission rejected: write") + out = capsys.readouterr().out + assert "Permission Denied" in out + assert "write" in out + + def test_renders_permission_error_rich(self): + r = PermissionErrorRenderer(_ctx("rich")) + r.render_message("tool permission rejected: bash") + # Should not raise diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 9c046ba..5964f58 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -407,31 +407,28 @@ def _fake_subagent_status(_console, _event): @pytest.mark.unit def test_dispatch_tool_renderer_routes_task_to_task_renderer(monkeypatch): + """_dispatch_tool_renderer delegates 'task' to rendering.tools.task.TaskRenderer.""" + import rendering.tools.task as _task_mod module = load_tool_module("run_agent_dispatch_task", "tools/run-agent.py") task_calls = [] - def fake_task_rich(_console, _state): - task_calls.append("rich") - return True + class _FakeRenderer: + def render(self, tool_name, state): + task_calls.append(tool_name) + return True - def fake_task_plain(_state): - task_calls.append("plain") - return True - - monkeypatch.setattr(module, "render_task_rich", fake_task_rich) - monkeypatch.setattr(module, "render_task_plain", fake_task_plain) + monkeypatch.setattr(_task_mod, "TaskRenderer", lambda ctx: _FakeRenderer()) # With rich monkeypatch.setattr(module, "HAVE_RICH", True) assert module._dispatch_tool_renderer(None, "task", {}) is True - assert task_calls == ["rich"] + assert task_calls == ["task"] - # With plain - monkeypatch.setattr(module, "HAVE_RICH", False) task_calls.clear() + monkeypatch.setattr(module, "HAVE_RICH", False) assert module._dispatch_tool_renderer(None, "task", {}) is True - assert task_calls == ["plain"] + assert task_calls == ["task"] # --- reasoning / error rendering edge cases -------------------------------- diff --git a/tools/rendering/tools/__init__.py b/tools/rendering/tools/__init__.py index d7166b0..15f51c1 100644 --- a/tools/rendering/tools/__init__.py +++ b/tools/rendering/tools/__init__.py @@ -8,5 +8,16 @@ from __future__ import annotations from rendering.tools.base import FallbackToolRenderer, ToolRenderer +from rendering.tools.permissions import PermissionErrorRenderer +from rendering.tools.skill import SkillRenderer +from rendering.tools.task import TaskRenderer +from rendering.tools.todo import TodoRenderer -__all__ = ["FallbackToolRenderer", "ToolRenderer"] +__all__ = [ + "FallbackToolRenderer", + "PermissionErrorRenderer", + "SkillRenderer", + "TaskRenderer", + "TodoRenderer", + "ToolRenderer", +] diff --git a/tools/rendering/tools/permissions.py b/tools/rendering/tools/permissions.py new file mode 100644 index 0000000..f67ee57 --- /dev/null +++ b/tools/rendering/tools/permissions.py @@ -0,0 +1,36 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +PermissionErrorRenderer — bold red panel for auto-rejected tool permissions. +""" + +from __future__ import annotations + +from rendering.base import BaseRenderer + + +class PermissionErrorRenderer(BaseRenderer): + """Draws a permission-denied panel (rich) or error line (plain). + + Unlike tool renderers, this is called directly from the event loop + when a permission is auto-rejected — it receives a plain message + string rather than a tool state dict. + """ + + def render_message(self, message: str) -> None: + if self.rich: + from rich.panel import Panel + from rich.text import Text + self.sink.write( + Panel( + Text(message, style="bold red"), + title="Permission Denied", + border_style="red", + expand=True, + ) + ) + else: + import _colors as C + self.sink.write_text(C.fail("Permission Denied")) + self.sink.write_text(C.fail(f" {message}")) diff --git a/tools/rendering/tools/skill.py b/tools/rendering/tools/skill.py new file mode 100644 index 0000000..5127e72 --- /dev/null +++ b/tools/rendering/tools/skill.py @@ -0,0 +1,51 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +SkillRenderer — compact panel for skill-loading tool calls. +""" + +from __future__ import annotations + +from typing import Any + +from rendering.tools.base import ToolRenderer + + +class SkillRenderer(ToolRenderer): + tool_names = ("skill",) + + def render(self, tool_name: str, state: dict[str, Any]) -> bool: + inp = state.get("input") + if not isinstance(inp, dict): + return False + + name = str(inp.get("name", "")) + + if self.rich: + return self._render_rich(name) + else: + return self._render_plain(name) + + def _render_rich(self, name: str) -> bool: + from rich.panel import Panel + from rich.text import Text + + if not name: + label = "(unknown skill)" + style = "dim" + else: + label = f"loaded skill: {name}" + style = "" + + self.sink.write(Panel(Text(label, style=style), title="Skill", border_style="dim", expand=True)) + return True + + def _render_plain(self, name: str) -> bool: + import _colors as C + + if not name: + self.sink.write_text(C.header("skill (unknown)")) + else: + self.sink.write_text(C.header(f"skill {name}")) + return True diff --git a/tools/rendering/tools/task.py b/tools/rendering/tools/task.py new file mode 100644 index 0000000..961d281 --- /dev/null +++ b/tools/rendering/tools/task.py @@ -0,0 +1,92 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +TaskRenderer — preview panel for task (subagent) tool calls. +""" + +from __future__ import annotations + +from typing import Any + +from rendering.tools.base import ToolRenderer + + +class TaskRenderer(ToolRenderer): + tool_names = ("task",) + + def render(self, tool_name: str, state: dict[str, Any]) -> bool: + inp = state.get("input") + if not isinstance(inp, dict): + return False + + description = str(inp.get("description", "")) + subagent_type = str(inp.get("subagent_type", inp.get("subagentType", ""))) + prompt = str(inp.get("prompt", "")) + status = str(state.get("status", "unknown")) + cap = self.context.settings.task_prompt_preview_lines + + if self.rich: + return self._render_rich(description, subagent_type, prompt, status, cap, state) + else: + return self._render_plain(description, subagent_type, prompt, status, cap, state) + + def _render_rich(self, description: str, subagent_type: str, prompt: str, + status: str, cap: int, state: dict[str, Any]) -> bool: + from rich.console import Group + from rich.panel import Panel + from rich.text import Text + + border = "green" if status == "completed" else "yellow" + + sections: list[Any] = [] + if description: + type_tag = f" [{subagent_type}]" if subagent_type else "" + sections.append(Text(f"{description}{type_tag}", style="bold cyan")) + + if prompt: + sections.append(Text()) + prompt_lines = prompt.split("\n") + preview_lines = prompt_lines[:cap] + leftover = max(0, len(prompt_lines) - cap) + sections.append(Text("\n".join(preview_lines), style="dim")) + if leftover > 0: + sections.append(Text(f"... {leftover} more lines", style="dim")) + + output_data = state.get("output") + if output_data is not None: + sections.append(Text()) + sections.append(Text("Output", style="bold green")) + output_str = str(output_data) + if len(output_str) > 200: + output_str = output_str[:200] + "..." + sections.append(Text(output_str, style="dim")) + + self.sink.write( + Panel(Group(*sections), title=Text(f"Task [{status}]"), border_style=border, expand=True) + ) + return True + + def _render_plain(self, description: str, subagent_type: str, prompt: str, + status: str, cap: int, state: dict[str, Any]) -> bool: + import _colors as C + + type_tag = f" [{subagent_type}]" if subagent_type else "" + self.sink.write_text(C.header(f"task {description}{type_tag} [{status}]")) + + if prompt: + prompt_lines = prompt.split("\n") + for line in prompt_lines[:cap]: + self.sink.write_text(f" {line}") + leftover = max(0, len(prompt_lines) - cap) + if leftover > 0: + self.sink.write_text(f" ... {leftover} more lines") + + output_data = state.get("output") + if output_data is not None: + self.sink.write_text(C.info("Output")) + output_str = str(output_data) + if len(output_str) > 200: + output_str = output_str[:200] + "..." + self.sink.write_text(f" {output_str}") + return True diff --git a/tools/rendering/tools/todo.py b/tools/rendering/tools/todo.py new file mode 100644 index 0000000..e29c747 --- /dev/null +++ b/tools/rendering/tools/todo.py @@ -0,0 +1,157 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +TodoRenderer — styled panel for todowrite tool calls. +""" + +from __future__ import annotations + +from collections import Counter +from typing import Any + +from rendering.tools.base import ToolRenderer + +_TODO_STATUS_ICONS = { + "completed": "\u2714", + "in_progress": "\u25cf", + "pending": "\u25cb", + "cancelled": "\u2716", +} + +_TODO_STATUS_ASCII = { + "completed": "[x]", + "in_progress": "[~]", + "pending": "[ ]", + "cancelled": "[-]", +} + +_TODO_PRIORITY_LETTERS = { + "high": "H", + "medium": "M", + "low": "L", +} + + +def _extract_todos(state: dict[str, Any]) -> list[dict[str, str]] | None: + output = state.get("output") + if isinstance(output, list): + items = output + else: + input_data = state.get("input") + if isinstance(input_data, dict) and isinstance(input_data.get("todos"), list): + items = input_data["todos"] + else: + return None + + result: list[dict[str, str]] = [] + for item in items: + if not isinstance(item, dict): + return None + result.append({ + "content": str(item.get("content", "")), + "status": str(item.get("status", "?")), + "priority": str(item.get("priority", "?")), + }) + return result + + +def _todo_summary(todos: list[dict[str, str]]) -> str: + counts = Counter(t["status"] for t in todos) + parts = [f"{len(todos)} tasks"] + for status in ("completed", "in_progress", "pending", "cancelled"): + count = counts.get(status, 0) + if count > 0: + label = status.replace("_", " ") + parts.append(f"{count} {label}") + return " \u00b7 ".join(parts) + + +def _todo_border_style(todos: list[dict[str, str]]) -> str: + statuses = {t["status"] for t in todos} + if statuses == {"completed"}: + return "green" + if "in_progress" in statuses: + return "yellow" + return "dim" + + +class TodoRenderer(ToolRenderer): + tool_names = ("todowrite",) + + def render(self, tool_name: str, state: dict[str, Any]) -> bool: + todos = _extract_todos(state) + if todos is None: + return False + + if self.rich: + return self._render_rich(todos) + else: + return self._render_plain(todos) + + def _render_rich(self, todos: list[dict[str, str]]) -> bool: + from rich.console import Group + from rich.panel import Panel + from rich.table import Table + from rich.text import Text + + if not todos: + self.sink.write(Panel(Text("No todos.", style="dim"), title="Todos", border_style="dim", expand=True)) + return True + + summary = Text(_todo_summary(todos)) + border = _todo_border_style(todos) + + table = Table(show_header=False, show_edge=False, padding=(0, 1), expand=True) + table.add_column(width=2, no_wrap=True) + table.add_column(width=1, no_wrap=True) + table.add_column(ratio=1) + + status_styles = { + "completed": "bold green", + "in_progress": "yellow", + "pending": "dim", + "cancelled": "dim strike", + } + priority_styles = { + "high": "red", + "medium": "yellow", + "low": "dim", + } + + for todo in todos: + status = todo["status"] + priority = todo["priority"] + + icon = _TODO_STATUS_ICONS.get(status, "?") + icon_style = status_styles.get(status, "dim") + pri_letter = _TODO_PRIORITY_LETTERS.get(priority, "?") + pri_style = priority_styles.get(priority, "dim") + + table.add_row( + Text(icon, style=icon_style), + Text(pri_letter, style=pri_style), + Text(todo["content"], style=status_styles.get(status, "")), + ) + + body = Group(summary, Text(), table) + self.sink.write(Panel(body, title="Todos", border_style=border, expand=True)) + return True + + def _render_plain(self, todos: list[dict[str, str]]) -> bool: + import _colors as C + if not todos: + self.sink.write_text(C.header("todos")) + self.sink.write_text(" No todos.") + return True + + self.sink.write_text(C.header("todos")) + self.sink.write_text(f" {_todo_summary(todos)}") + for todo in todos: + status = todo["status"] + priority = todo["priority"] + checkbox = _TODO_STATUS_ASCII.get(status, "[?]") + pri_letter = _TODO_PRIORITY_LETTERS.get(priority, "?") + content = todo["content"].replace("\n", " ") + self.sink.write_text(f" {checkbox} {pri_letter} {content}") + return True diff --git a/tools/run-agent.py b/tools/run-agent.py index ad80a0a..15f608c 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -50,6 +50,33 @@ ) from codecome.transcript import open_phase_transcript, open_chat_transcript, close_transcript +# Lazy rendering context — built once and reused by the new renderer +# classes. Old-style render_* functions still receive console directly +# and are unaffected. +_RENDERING_CTX: Any = None + + +def _get_rendering_ctx(console: Any) -> Any: + global _RENDERING_CTX + if _RENDERING_CTX is not None: + return _RENDERING_CTX + from rendering.cache import SnapshotCache + from rendering.context import RenderContext + from rendering.settings import RenderSettings + from rendering.sink import PlainSink, RichConsoleSink + + if HAVE_RICH and console is not None: + sink = RichConsoleSink(console) + else: + sink = PlainSink() + _RENDERING_CTX = RenderContext( + root=ROOT, + sink=sink, + settings=RenderSettings.from_env(), + cache=SnapshotCache(), + ) + return _RENDERING_CTX + try: from rich.console import Console, Group from rich.json import JSON @@ -3254,10 +3281,8 @@ def _dispatch_tool_renderer(console: Console, tool: str, state: dict[str, Any]) """Try tool-specific rendering. Returns True if handled.""" tool_lower = tool.strip().lower() if tool_lower == "todowrite": - if HAVE_RICH: - return render_todowrite_rich(console, state) - else: - return render_todowrite_plain(state) + from rendering.tools.todo import TodoRenderer + return TodoRenderer(_get_rendering_ctx(console)).render(tool_lower, state) elif tool_lower == "read": # Invalidate stale cache entries before non-write events _cache_invalidate_stale() @@ -3312,16 +3337,12 @@ def _dispatch_tool_renderer(console: Console, tool: str, state: dict[str, Any]) return render_bash_plain(state) elif tool_lower == "skill": _cache_invalidate_stale() - if HAVE_RICH: - return render_skill_rich(console, state) - else: - return render_skill_plain(state) + from rendering.tools.skill import SkillRenderer + return SkillRenderer(_get_rendering_ctx(console)).render(tool_lower, state) elif tool_lower == "task": _cache_invalidate_stale() - if HAVE_RICH: - return render_task_rich(console, state) - else: - return render_task_plain(state) + from rendering.tools.task import TaskRenderer + return TaskRenderer(_get_rendering_ctx(console)).render(tool_lower, state) else: _cache_invalidate_stale() return False From 55100a9cd46765c0a735c56c4a7e2d723df5c40f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 24 May 2026 22:00:46 +0200 Subject: [PATCH 10/65] refactor(phase-a3-batch2): migrate read/write/edit renderers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract three renderers from run-agent.py into rendering/tools/: - rendering/utils.py — shared helpers (path, lexer, diff, read framing, internal suppression) - rendering/tools/read.py — ReadRenderer (read tool) - rendering/tools/write.py — WriteRenderer (write tool) - rendering/tools/edit.py — EditRenderer (edit tool) Each uses self.context.cache (SnapshotCache) for diff baselines and self.context.settings (RenderSettings) for display tunables. Uses self.sink.write_text(end='') for plain diff lines. Dispatch in run-agent.py now routes read/write/edit through the new classes. Old render_* functions kept for backward compat. 334 passed, 0 failed, 0 errors --- tools/rendering/tools/__init__.py | 6 + tools/rendering/tools/edit.py | 123 ++++++++++++++++ tools/rendering/tools/read.py | 180 +++++++++++++++++++++++ tools/rendering/tools/write.py | 162 +++++++++++++++++++++ tools/rendering/utils.py | 227 ++++++++++++++++++++++++++++++ tools/run-agent.py | 19 +-- 6 files changed, 704 insertions(+), 13 deletions(-) create mode 100644 tools/rendering/tools/edit.py create mode 100644 tools/rendering/tools/read.py create mode 100644 tools/rendering/tools/write.py create mode 100644 tools/rendering/utils.py diff --git a/tools/rendering/tools/__init__.py b/tools/rendering/tools/__init__.py index 15f51c1..d5da88c 100644 --- a/tools/rendering/tools/__init__.py +++ b/tools/rendering/tools/__init__.py @@ -8,16 +8,22 @@ from __future__ import annotations from rendering.tools.base import FallbackToolRenderer, ToolRenderer +from rendering.tools.edit import EditRenderer from rendering.tools.permissions import PermissionErrorRenderer +from rendering.tools.read import ReadRenderer from rendering.tools.skill import SkillRenderer from rendering.tools.task import TaskRenderer from rendering.tools.todo import TodoRenderer +from rendering.tools.write import WriteRenderer __all__ = [ + "EditRenderer", "FallbackToolRenderer", "PermissionErrorRenderer", + "ReadRenderer", "SkillRenderer", "TaskRenderer", "TodoRenderer", "ToolRenderer", + "WriteRenderer", ] diff --git a/tools/rendering/tools/edit.py b/tools/rendering/tools/edit.py new file mode 100644 index 0000000..5b5d766 --- /dev/null +++ b/tools/rendering/tools/edit.py @@ -0,0 +1,123 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +EditRenderer — diff panel for edit tool (oldString → newString). +""" + +from __future__ import annotations + +from typing import Any + +from rendering.tools.base import ToolRenderer +from rendering.utils import compute_diff, is_likely_error, relativize_path, truncate_diff + + +class EditRenderer(ToolRenderer): + tool_names = ("edit",) + + def render(self, tool_name: str, state: dict[str, Any]) -> bool: + inp = state.get("input") + output = state.get("output") + if not isinstance(inp, dict): + return False + + file_path = str(inp.get("filePath", "")) + old_string = inp.get("oldString") + new_string = inp.get("newString") + replace_all = bool(inp.get("replaceAll", False)) + + if not file_path or old_string is None or new_string is None: + return False + + if self.rich: + return self._render_rich(file_path, str(old_string), str(new_string), replace_all, output) + else: + return self._render_plain(file_path, str(old_string), str(new_string), replace_all, output) + + # ------------------------------------------------------------------ + # Rich + # ------------------------------------------------------------------ + + def _render_rich(self, file_path: str, old_string: str, new_string: str, + replace_all: bool, output) -> bool: + from rich.console import Group + from rich.panel import Panel + from rich.syntax import Syntax + from rich.text import Text + + settings = self.context.settings + cache = self.context.cache + rel_path = relativize_path(file_path, self.context.root) + output_str = str(output) if output is not None else "" + is_error = is_likely_error(output_str) or ( + output is not None + and "successfully" not in output_str.lower() + and "applied" not in output_str.lower() + ) + border = "red" if is_error else "green" + scope = "replace all" if replace_all else "replace 1 occurrence" + + sections: list[Any] = [ + Text(rel_path, style="bold cyan"), + Text(scope, style="dim"), + Text(), + ] + + diff_lines = compute_diff(old_string, new_string) + if not diff_lines: + sections.append(Text("(no changes in edit)", style="dim")) + else: + added = sum(1 for l in diff_lines if l.startswith("+") and not l.startswith("+++")) + removed = sum(1 for l in diff_lines if l.startswith("-") and not l.startswith("---")) + sections.append(Text(f"diff: -{removed} +{added}", style="dim")) + sections.append(Text()) + truncated, leftover = truncate_diff(diff_lines, settings.edit_diff_lines) + diff_text = "".join(truncated) + sections.append(Syntax(diff_text, "diff", theme="monokai", word_wrap=True)) + if leftover > 0: + sections.append(Text(f"... {leftover} more lines", style="dim")) + + sections.append(Text()) + sections.append(Text(output_str.strip(), style="red" if is_error else "green")) + + self.sink.write(Panel(Group(*sections), title="Edit", border_style=border, expand=True)) + + # Re-read cache after edit so subsequent writes show correct diffs. + cache.reread(file_path) + return True + + # ------------------------------------------------------------------ + # Plain + # ------------------------------------------------------------------ + + def _render_plain(self, file_path: str, old_string: str, new_string: str, + replace_all: bool, output) -> bool: + import _colors as C + + settings = self.context.settings + cache = self.context.cache + rel_path = relativize_path(file_path, self.context.root) + output_str = str(output) if output is not None else "" + scope = "replace all" if replace_all else "replace 1 occurrence" + + self.sink.write_text(C.header(f"edit {rel_path}")) + self.sink.write_text(f" {scope}") + + diff_lines = compute_diff(old_string, new_string) + if not diff_lines: + self.sink.write_text(" (no changes in edit)") + else: + added = sum(1 for l in diff_lines if l.startswith("+") and not l.startswith("+++")) + removed = sum(1 for l in diff_lines if l.startswith("-") and not l.startswith("---")) + self.sink.write_text(f" diff: -{removed} +{added}") + truncated, leftover = truncate_diff(diff_lines, settings.edit_diff_lines) + for line in truncated: + self.sink.write_text(f" {line}", end="") + if leftover > 0: + self.sink.write_text(f" ... {leftover} more lines") + + self.sink.write_text(f" {output_str.strip()}") + + cache.reread(file_path) + return True diff --git a/tools/rendering/tools/read.py b/tools/rendering/tools/read.py new file mode 100644 index 0000000..c91400f --- /dev/null +++ b/tools/rendering/tools/read.py @@ -0,0 +1,180 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +ReadRenderer — styled panel for read tool output. +""" + +from __future__ import annotations + +from typing import Any + +from rendering.tools.base import ToolRenderer +from rendering.utils import ( + classify_internal_read, detect_lexer, is_likely_error, + relativize_path, strip_line_numbers, strip_read_framing, +) + + +class ReadRenderer(ToolRenderer): + tool_names = ("read",) + + def render(self, tool_name: str, state: dict[str, Any]) -> bool: + inp = state.get("input") + output = state.get("output") + if not isinstance(inp, dict) or not isinstance(output, str): + return False + + file_path = str(inp.get("filePath", "")) + if not file_path: + return False + + rel_path = relativize_path(file_path, self.context.root) + offset = inp.get("offset") + limit = inp.get("limit") + settings = self.context.settings + cache = self.context.cache + + if self.rich: + return self._render_rich(rel_path, file_path, output, offset, limit, state, settings, cache) + else: + return self._render_plain(rel_path, file_path, output, offset, limit, state, settings, cache) + + # ------------------------------------------------------------------ + # Rich + # ------------------------------------------------------------------ + + def _render_rich(self, rel_path: str, file_path: str, output: str, + offset, limit, state, settings, cache) -> bool: + from rich.console import Group + from rich.panel import Panel + from rich.text import Text + + border = "green" if state.get("status") == "completed" else "yellow" + sections: list[Any] = [Text(rel_path, style="bold cyan")] + if offset is not None and limit is not None: + sections.append(Text(f"lines {offset}..{offset + limit - 1}", style="dim")) + + kind, payload, footer = strip_read_framing(output) + + if kind == "unknown": + if is_likely_error(output): + sections.append(Text()) + sections.append(Text(output.strip(), style="red")) + self.sink.write(Panel(Group(*sections), title="Read", border_style="red", expand=True)) + else: + return False + return True + + sections.append(Text()) + + if kind == "file": + body = str(payload).strip() + raw_body = strip_line_numbers(body) + cache.set(file_path, raw_body) + + if settings.internal_read_suppress: + description = classify_internal_read(rel_path) + if description is not None: + is_partial = offset is not None or limit is not None + if is_partial: + description = f"{description} (partial)" + suppressed: list[Any] = [Text(rel_path, style="bold cyan")] + suppressed.append(Text(description, style="dim italic")) + self.sink.write(Panel(Group(*suppressed), title="Read", border_style=border, expand=True)) + return True + + if not body: + sections.append(Text("(empty file)", style="dim")) + else: + lexer = detect_lexer(file_path) + self._render_truncated_body(sections, raw_body, settings.read_display_lines, lexer, footer) + + elif kind == "directory": + entries = payload if isinstance(payload, list) else [] + for entry in entries: + if entry.endswith("/"): + sections.append(Text(f" {entry}", style="bold blue")) + else: + sections.append(Text(f" {entry}")) + if footer: + sections.append(Text(footer, style="dim")) + + self.sink.write(Panel(Group(*sections), title="Read", border_style=border, expand=True)) + return True + + @staticmethod + def _render_truncated_body(sections: list[Any], body: str, cap: int, lexer: str, footer: str | None) -> None: + from rich.syntax import Syntax + from rich.text import Text + + body_lines = body.split("\n") + total = len(body_lines) + visible_lines = body_lines[:cap] + leftover = max(0, total - cap) + + visible = "\n".join(visible_lines) + if len(visible.encode("utf-8", errors="replace")) > 200 * 1024: + sections.append(Text(visible)) + else: + sections.append(Syntax(visible, lexer, theme="monokai", line_numbers=True, word_wrap=True)) + + if leftover > 0: + sections.append(Text(f"... {leftover} more lines", style="dim")) + if footer: + sections.append(Text(footer, style="dim")) + + # ------------------------------------------------------------------ + # Plain + # ------------------------------------------------------------------ + + def _render_plain(self, rel_path: str, file_path: str, output: str, + offset, limit, state, settings, cache) -> bool: + import _colors as C + + kind, payload, footer = strip_read_framing(output) + + if kind == "file": + body = str(payload).strip() + raw_body = strip_line_numbers(body) + cache.set(file_path, raw_body) + + if settings.internal_read_suppress: + description = classify_internal_read(rel_path) + if description is not None: + is_partial = offset is not None or limit is not None + suffix = " (partial)" if is_partial else "" + self.sink.write_text(C.header(f"read [{description}]{suffix}")) + return True + + self.sink.write_text(C.header(f"read {rel_path}")) + if offset is not None and limit is not None: + self.sink.write_text(f" lines {offset}..{offset + limit - 1}") + self._render_truncated_body_plain(raw_body, settings.read_display_lines, footer) + return True + + self.sink.write_text(C.header(f"read {rel_path}")) + if offset is not None and limit is not None: + self.sink.write_text(f" lines {offset}..{offset + limit - 1}") + + if kind == "directory": + entries = payload if isinstance(payload, list) else [] + for entry in entries: + self.sink.write_text(f" {entry}") + if footer: + self.sink.write_text(f" {footer}") + else: + self.sink.write_text(output.strip()) + + return True + + def _render_truncated_body_plain(self, body: str, cap: int, footer: str | None) -> None: + body_lines = body.split("\n") + total = len(body_lines) + for line in body_lines[:cap]: + self.sink.write_text(line) + leftover = max(0, total - cap) + if leftover > 0: + self.sink.write_text(f" ... {leftover} more lines") + if footer: + self.sink.write_text(f" {footer}") diff --git a/tools/rendering/tools/write.py b/tools/rendering/tools/write.py new file mode 100644 index 0000000..74493c8 --- /dev/null +++ b/tools/rendering/tools/write.py @@ -0,0 +1,162 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +WriteRenderer — diff-aware panel for write tool output. +""" + +from __future__ import annotations + +from typing import Any + +from rendering.tools.base import ToolRenderer +from rendering.utils import ( + compute_diff, count_lines_and_bytes, detect_lexer, + relativize_path, truncate_diff, +) + + +class WriteRenderer(ToolRenderer): + tool_names = ("write",) + + def render(self, tool_name: str, state: dict[str, Any]) -> bool: + inp = state.get("input") + output = state.get("output") + if not isinstance(inp, dict): + return False + + file_path = str(inp.get("filePath", "")) + new_content = str(inp.get("content", "")) + output_str = str(output) if output is not None else "" + + if not file_path: + return False + + if self.rich: + return self._render_rich(file_path, new_content, output_str, output) + else: + return self._render_plain(file_path, new_content, output_str, output) + + # ------------------------------------------------------------------ + # Rich + # ------------------------------------------------------------------ + + def _render_rich(self, file_path: str, new_content: str, output_str: str, output) -> bool: + from rich.console import Group + from rich.panel import Panel + from rich.syntax import Syntax + from rich.text import Text + + settings = self.context.settings + cache = self.context.cache + rel_path = relativize_path(file_path, self.context.root) + n_lines, n_bytes = count_lines_and_bytes(new_content) + is_error = output is not None and not output_str.startswith("Wrote file") + border = "red" if is_error else "green" + + sections: list[Any] = [ + Text(rel_path, style="bold cyan"), + Text(f"{n_lines} lines, {n_bytes} bytes", style="dim"), + ] + + if is_error: + sections.append(Text()) + sections.append(Text(output_str.strip(), style="red")) + self.sink.write(Panel(Group(*sections), title="Write", border_style=border, expand=True)) + return True + + prev = cache.get(file_path) + lexer = detect_lexer(file_path) + status_text = output_str.strip() + + if prev is not None: + diff_lines = compute_diff(prev, new_content) + if not diff_lines: + sections.append(Text("(no changes)", style="dim")) + else: + added = sum(1 for l in diff_lines if l.startswith("+") and not l.startswith("+++")) + removed = sum(1 for l in diff_lines if l.startswith("-") and not l.startswith("---")) + sections.append(Text(f"diff: -{removed} +{added}", style="dim")) + sections.append(Text()) + truncated, leftover = truncate_diff(diff_lines, settings.write_diff_limit) + diff_text = "".join(truncated) + sections.append(Syntax(diff_text, "diff", theme="monokai", word_wrap=True)) + if leftover > 0: + sections.append(Text(f"... {leftover} more lines", style="dim")) + else: + sections.append(Text("(new file)", style="dim")) + sections.append(Text()) + self._render_body_rich(sections, new_content, settings.write_content_lines, lexer) + + sections.append(Text()) + sections.append(Text(status_text, style="green")) + self.sink.write(Panel(Group(*sections), title="Write", border_style=border, expand=True)) + cache.set(file_path, new_content) + return True + + def _render_body_rich(self, sections: list[Any], body: str, cap: int, lexer: str) -> None: + from rich.syntax import Syntax + from rich.text import Text + + body_lines = body.split("\n") + total = len(body_lines) + visible_lines = body_lines[:cap] + leftover = max(0, total - cap) + visible = "\n".join(visible_lines) + if len(visible.encode("utf-8", errors="replace")) > 200 * 1024: + sections.append(Text(visible)) + else: + sections.append(Syntax(visible, lexer, theme="monokai", line_numbers=True, word_wrap=True)) + if leftover > 0: + sections.append(Text(f"... {leftover} more lines", style="dim")) + + # ------------------------------------------------------------------ + # Plain + # ------------------------------------------------------------------ + + def _render_plain(self, file_path: str, new_content: str, output_str: str, output) -> bool: + import _colors as C + + settings = self.context.settings + cache = self.context.cache + rel_path = relativize_path(file_path, self.context.root) + n_lines, n_bytes = count_lines_and_bytes(new_content) + + self.sink.write_text(C.header(f"write {rel_path}")) + self.sink.write_text(f" {n_lines} lines, {n_bytes} bytes") + + is_error = output is not None and not output_str.startswith("Wrote file") + if is_error: + self.sink.write_text(C.fail(output_str.strip())) + return True + + prev = cache.get(file_path) + if prev is not None: + diff_lines = compute_diff(prev, new_content) + if not diff_lines: + self.sink.write_text(" (no changes)") + else: + added = sum(1 for l in diff_lines if l.startswith("+") and not l.startswith("+++")) + removed = sum(1 for l in diff_lines if l.startswith("-") and not l.startswith("---")) + self.sink.write_text(f" diff: -{removed} +{added}") + truncated, leftover = truncate_diff(diff_lines, settings.write_diff_limit) + for line in truncated: + self.sink.write_text(f" {line}", end="") + if leftover > 0: + self.sink.write_text(f" ... {leftover} more lines") + else: + self.sink.write_text(" (new file)") + self._render_body_plain(new_content, settings.write_content_lines) + + self.sink.write_text(f" {output_str.strip()}") + cache.set(file_path, new_content) + return True + + def _render_body_plain(self, body: str, cap: int) -> None: + body_lines = body.split("\n") + total = len(body_lines) + for line in body_lines[:cap]: + self.sink.write_text(line) + leftover = max(0, total - cap) + if leftover > 0: + self.sink.write_text(f" ... {leftover} more lines") diff --git a/tools/rendering/utils.py b/tools/rendering/utils.py new file mode 100644 index 0000000..620f30e --- /dev/null +++ b/tools/rendering/utils.py @@ -0,0 +1,227 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Shared utilities for read/write/edit/apply_patch/glob/grep renderers. + +Path relativization, lexer detection, diff computation, read-framing +parsing, internal read suppression, and truncated body rendering. +""" + +from __future__ import annotations + +import difflib +import os +import re +from pathlib import Path +from typing import Any + +# --------------------------------------------------------------------------- +# Regexes and lookup tables +# --------------------------------------------------------------------------- + +_READ_FILE_FRAMING_RE = re.compile( + r"(?P.*?)\s*" + r"(?P.*?)\s*" + r"\s*\n(?P.*?)\n\s*", + re.DOTALL, +) +_READ_DIR_FRAMING_RE = re.compile( + r"(?P.*?)\s*" + r"directory\s*" + r"\s*\n(?P.*?)\n\s*", + re.DOTALL, +) +_READ_SUMMARY_RE = re.compile( + r"\((?:End of file|Showing lines|Buffer has more lines)[^\)]*\)\s*$", + re.MULTILINE, +) + +_LEXER_MAP = { + ".c": "c", ".h": "c", ".cpp": "cpp", ".cc": "cpp", ".cxx": "cpp", + ".hpp": "cpp", ".hh": "cpp", ".cs": "csharp", ".java": "java", + ".erl": "erlang", ".hrl": "erlang", ".app.src": "erlang", ".config": "erlang", + ".ex": "elixir", ".exs": "elixir", ".py": "python", ".rb": "ruby", + ".rs": "rust", ".go": "go", + ".js": "javascript", ".ts": "typescript", ".tsx": "tsx", ".jsx": "jsx", + ".sh": "bash", ".bash": "bash", ".zsh": "bash", + ".yml": "yaml", ".yaml": "yaml", ".json": "json", ".toml": "toml", + ".xml": "xml", ".html": "html", ".css": "css", ".sql": "sql", + ".md": "markdown", ".mk": "make", ".cmake": "cmake", + ".dockerfile": "docker", ".tf": "hcl", ".hcl": "hcl", +} + +_FINDING_FILENAME_RE = re.compile(r"^(CC-\d{4,})-(.+)\.md$") +_ROUT_WORKSPACE_DOCS = {"AGENTS.md", "README.md"} +_ROUT_WORKSPACE_CONFIGS = {"codecome.yml"} + + +# --------------------------------------------------------------------------- +# Path helpers +# --------------------------------------------------------------------------- + +def relativize_path(path: str, root: Path) -> str: + try: + return str(Path(path).relative_to(root)) + except ValueError: + return path + + +def detect_lexer(path: str) -> str: + ext = Path(path).suffix.lower() + if Path(path).name.lower() == "makefile": + return "make" + if Path(path).name.lower() == "dockerfile": + return "docker" + return _LEXER_MAP.get(ext, "text") + + +# --------------------------------------------------------------------------- +# Diff helpers +# --------------------------------------------------------------------------- + +def compute_diff(old: str, new: str, context: int = 3) -> list[str]: + old_lines = old.splitlines(keepends=True) + new_lines = new.splitlines(keepends=True) + return list(difflib.unified_diff(old_lines, new_lines, fromfile="old", tofile="new", n=context)) + + +def truncate_diff(diff_lines: list[str], max_lines: int) -> tuple[list[str], int]: + if len(diff_lines) <= max_lines: + return diff_lines, 0 + return diff_lines[:max_lines], len(diff_lines) - max_lines + + +# --------------------------------------------------------------------------- +# Body rendering +# --------------------------------------------------------------------------- + +def count_lines_and_bytes(text: str) -> tuple[int, int]: + return text.count("\n") + (1 if text and not text.endswith("\n") else 0), len(text.encode("utf-8", errors="replace")) + + +def strip_line_numbers(text: str) -> str: + raw_lines = [] + for line in text.split("\n"): + colon_idx = line.find(": ") + if colon_idx >= 0 and colon_idx <= 6 and line[:colon_idx].strip().isdigit(): + raw_lines.append(line[colon_idx + 2:]) + else: + raw_lines.append(line) + return "\n".join(raw_lines) + + +def format_excerpt(text: str, max_lines: int) -> tuple[str, int]: + lines = text.split("\n") + if len(lines) <= max_lines: + return text, 0 + return "\n".join(lines[:max_lines]), len(lines) - max_lines + + +def is_likely_error(text: str) -> bool: + lower = text.lower() + return any(marker in lower for marker in ( + "error", "traceback", "command not found", "failed", "permission denied", + "no such file", "exception", + )) + + +# --------------------------------------------------------------------------- +# Read-framing parsing +# --------------------------------------------------------------------------- + +def strip_read_framing(output: str) -> tuple[str, Any, str | None]: + """Parse OpenCode read tool output. + + Returns (kind, payload, footer) where kind is 'file', 'directory', + or 'unknown'. + """ + m = _READ_FILE_FRAMING_RE.search(output) + if m: + body = m.group("content") + summary_m = _READ_SUMMARY_RE.search(body) + if summary_m: + footer = summary_m.group(0).strip() + body = body[:summary_m.start()].rstrip() + else: + footer = None + return "file", body, footer + + d = _READ_DIR_FRAMING_RE.search(output) + if d: + raw_entries = d.group("entries") + entries = [] + footer = None + for line in raw_entries.split("\n"): + line = line.strip() + if not line: + continue + if line.startswith("(") and "entries" in line and line.endswith(")"): + footer = line + else: + entries.append(line) + return "directory", entries, footer + + return "unknown", None, None + + +# --------------------------------------------------------------------------- +# Internal read suppression +# --------------------------------------------------------------------------- + +def classify_internal_read(rel_path: str) -> str | None: + if not rel_path or os.path.isabs(rel_path): + return None + + parts = Path(rel_path).parts + if not parts: + return None + + if len(parts) == 1: + name = parts[0] + if name in _ROUT_WORKSPACE_DOCS: + return f"reading workspace doc: {name}" + if name in _ROUT_WORKSPACE_CONFIGS: + return f"reading workspace config: {name}" + return None + + if parts[0] == ".opencode": + if len(parts) >= 3 and parts[1] == "agents": + agent_name = Path(parts[2]).stem + return f"loading agent: {agent_name}" + if len(parts) >= 3 and parts[1] == "skills": + skill_name = parts[2] + if len(parts) == 4 and parts[3] == "SKILL.md": + return f"loading skill: {skill_name}" + if len(parts) >= 4: + rest = "/".join(parts[3:]) + return f"loading skill resource: {skill_name}/{rest}" + return f"loading skill: {skill_name}" + return f"loading opencode config: {rel_path}" + + if parts[0] == "itemdb": + if len(parts) >= 4 and parts[1] == "findings": + status = parts[2] + filename = parts[3] + m = _FINDING_FILENAME_RE.match(filename) + if m: + return f"reading finding: {m.group(1)} [{status}] - {m.group(2)}" + return f"reading itemdb file: {rel_path}" + if len(parts) >= 3 and parts[1] == "notes": + return f"reading note: {parts[2]}" + if len(parts) >= 3 and parts[1] == "evidence": + rest = "/".join(parts[2:]) + return f"reading evidence: {rest}" + if len(parts) >= 3 and parts[1] == "reports": + return f"reading report: {parts[2]}" + if len(parts) == 2 and parts[1] == "index.md": + return "reading items index" + return f"reading itemdb file: {rel_path}" + + if parts[0] == "runs" and len(parts) >= 2: + return f"reading run summary: {parts[1]}" + + if parts[0] == "templates" and len(parts) >= 2: + return f"reading template: {parts[1]}" + + return None diff --git a/tools/run-agent.py b/tools/run-agent.py index 15f608c..69f8a3e 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -3284,22 +3284,15 @@ def _dispatch_tool_renderer(console: Console, tool: str, state: dict[str, Any]) from rendering.tools.todo import TodoRenderer return TodoRenderer(_get_rendering_ctx(console)).render(tool_lower, state) elif tool_lower == "read": - # Invalidate stale cache entries before non-write events _cache_invalidate_stale() - if HAVE_RICH: - return render_read_rich(console, state) - else: - return render_read_plain(state) + from rendering.tools.read import ReadRenderer + return ReadRenderer(_get_rendering_ctx(console)).render(tool_lower, state) elif tool_lower == "write": - if HAVE_RICH: - return render_write_rich(console, state) - else: - return render_write_plain(state) + from rendering.tools.write import WriteRenderer + return WriteRenderer(_get_rendering_ctx(console)).render(tool_lower, state) elif tool_lower == "edit": - if HAVE_RICH: - return render_edit_rich(console, state) - else: - return render_edit_plain(state) + from rendering.tools.edit import EditRenderer + return EditRenderer(_get_rendering_ctx(console)).render(tool_lower, state) elif tool_lower in ("apply_patch", "applypatch", "apply-patch"): if HAVE_RICH: return render_apply_patch_rich(console, state) From ec3dc9bb8349e257a8e68b0759ea17163bac4b81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 24 May 2026 22:03:45 +0200 Subject: [PATCH 11/65] refactor(phase-a3-batch3): migrate apply_patch renderer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the most complex tool renderer from run-agent.py: - rendering/tools/apply_patch.py — ApplyPatchRenderer Handles three patch formats: *** envelope, {patches: [...]} JSON list, and raw unified diff. Uses _ParsedFilePatch dataclass internally. Cache invalidation via context.cache.reread() on success. Dispatch wired. 334 passed, 0 failed, 0 errors. --- tools/rendering/tools/__init__.py | 2 + tools/rendering/tools/apply_patch.py | 288 +++++++++++++++++++++++++++ tools/run-agent.py | 6 +- 3 files changed, 292 insertions(+), 4 deletions(-) create mode 100644 tools/rendering/tools/apply_patch.py diff --git a/tools/rendering/tools/__init__.py b/tools/rendering/tools/__init__.py index d5da88c..bc5c8a9 100644 --- a/tools/rendering/tools/__init__.py +++ b/tools/rendering/tools/__init__.py @@ -7,6 +7,7 @@ from __future__ import annotations +from rendering.tools.apply_patch import ApplyPatchRenderer from rendering.tools.base import FallbackToolRenderer, ToolRenderer from rendering.tools.edit import EditRenderer from rendering.tools.permissions import PermissionErrorRenderer @@ -17,6 +18,7 @@ from rendering.tools.write import WriteRenderer __all__ = [ + "ApplyPatchRenderer", "EditRenderer", "FallbackToolRenderer", "PermissionErrorRenderer", diff --git a/tools/rendering/tools/apply_patch.py b/tools/rendering/tools/apply_patch.py new file mode 100644 index 0000000..fb0e78e --- /dev/null +++ b/tools/rendering/tools/apply_patch.py @@ -0,0 +1,288 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +ApplyPatchRenderer — multi-file patch panel with envelope/JSON/unified-diff parsing. +""" + +from __future__ import annotations + +import os +import re +from dataclasses import dataclass +from typing import Any + +from rendering.tools.base import ToolRenderer +from rendering.utils import is_likely_error, relativize_path, truncate_diff + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +_APPLY_PATCH_HEADER_RE = re.compile( + r"^\*\*\*[ \t]*(Begin Patch|End Patch|Update File|Add File|Delete File|Rename File|Move File):?[ \t]*(.*)", + re.MULTILINE, +) + +_PATCH_TEXT_KEYS = ("patchText", "patch_text", "patch", "input", "content", "diff", "body") + + +# --------------------------------------------------------------------------- +# ParsedFilePatch +# --------------------------------------------------------------------------- + +@dataclass +class _ParsedFilePatch: + op: str # add, update, delete, rename, unknown + path: str + old_path: str + hunks: str # unified-diff-ready text + added: int + removed: int + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _first_string(d: dict[str, Any], keys: tuple[str, ...]) -> str: + for k in keys: + v = d.get(k) + if isinstance(v, str) and v: + return v + return "" + + +def _parse_apply_patch_envelope(text: str, root) -> list[_ParsedFilePatch]: + results: list[_ParsedFilePatch] = [] + parts = _APPLY_PATCH_HEADER_RE.split(text) + i = 1 + while i + 2 <= len(parts): + directive = parts[i].strip() + file_path = parts[i + 1].strip() + body = parts[i + 2] if i + 2 < len(parts) else "" + i += 3 + + if directive in ("Begin Patch", "End Patch"): + continue + + op_map = { + "Update File": "update", "Add File": "add", "Delete File": "delete", + "Rename File": "rename", "Move File": "rename", + } + op = op_map.get(directive, "unknown") + old_path = "" + if op == "rename" and " -> " in file_path: + old_path, file_path = file_path.split(" -> ", 1) + old_path = old_path.strip() + file_path = file_path.strip() + + body_lines = body.split("\n") + added = sum(1 for l in body_lines if l.startswith("+") and not l.startswith("+++")) + removed = sum(1 for l in body_lines if l.startswith("-") and not l.startswith("---")) + + rel = relativize_path(file_path, root) + old_rel = relativize_path(old_path, root) if old_path else rel + if op == "add": + header = f"--- /dev/null\n+++ b/{rel}\n" + elif op == "delete": + header = f"--- a/{rel}\n+++ /dev/null\n" + else: + header = f"--- a/{old_rel}\n+++ b/{rel}\n" + + hunks = header + body.strip() + "\n" + results.append(_ParsedFilePatch(op=op, path=file_path, old_path=old_path, + hunks=hunks, added=added, removed=removed)) + return results + + +def _parse_apply_patch_json_list(patches: list[dict[str, Any]], root) -> list[_ParsedFilePatch]: + results: list[_ParsedFilePatch] = [] + for p in patches: + path = str(p.get("path", p.get("file", ""))) + diff_text = _first_string(p, ("diff", "patch", "patchText", "patch_text", "content", "body")) + lines = diff_text.split("\n") + added = sum(1 for l in lines if l.startswith("+") and not l.startswith("+++")) + removed = sum(1 for l in lines if l.startswith("-") and not l.startswith("---")) + rel = relativize_path(path, root) + header = f"--- a/{rel}\n+++ b/{rel}\n" + hunks = header + diff_text.strip() + "\n" + results.append(_ParsedFilePatch(op="update", path=path, old_path="", + hunks=hunks, added=added, removed=removed)) + return results + + +def _extract_apply_patch_payload(state: dict[str, Any], root) -> tuple[str, list[_ParsedFilePatch], str]: + inp = state.get("input") + output = state.get("output") + output_str = str(output) if output is not None else "" + + raw_text = "" + if isinstance(inp, dict): + raw_text = _first_string(inp, _PATCH_TEXT_KEYS) + if not raw_text and isinstance(inp.get("patches"), list): + patches = _parse_apply_patch_json_list(inp["patches"], root) + return "", patches, output_str + elif isinstance(inp, str): + raw_text = inp + + if not raw_text: + return "", [], output_str + + if "*** " in raw_text: + patches = _parse_apply_patch_envelope(raw_text, root) + if patches: + return raw_text, patches, output_str + + if raw_text.lstrip().startswith(("--- ", "diff --git")): + lines = raw_text.split("\n") + added = sum(1 for l in lines if l.startswith("+") and not l.startswith("+++")) + removed = sum(1 for l in lines if l.startswith("-") and not l.startswith("---")) + patches = [_ParsedFilePatch(op="unknown", path="(patch)", old_path="", + hunks=raw_text, added=added, removed=removed)] + return raw_text, patches, output_str + + return raw_text, [], output_str + + +# --------------------------------------------------------------------------- +# ApplyPatchRenderer +# --------------------------------------------------------------------------- + +class ApplyPatchRenderer(ToolRenderer): + tool_names = ("apply_patch", "applypatch", "apply-patch") + + def render(self, tool_name: str, state: dict[str, Any]) -> bool: + raw_text, patches, output_str = _extract_apply_patch_payload(state, self.context.root) + status = str(state.get("status", "")) + + if not patches and not raw_text: + return False + + if self.rich: + return self._render_rich(raw_text, patches, output_str, status) + else: + return self._render_plain(raw_text, patches, output_str, status) + + # ------------------------------------------------------------------ + # Rich + # ------------------------------------------------------------------ + + def _render_rich(self, raw_text: str, patches: list[_ParsedFilePatch], + output_str: str, status: str) -> bool: + from rich.console import Group + from rich.panel import Panel + from rich.syntax import Syntax + from rich.text import Text + + settings = self.context.settings + cache = self.context.cache + is_err = is_likely_error(output_str) + border = "red" if is_err else ("green" if status == "completed" else "yellow") + + sections: list[Any] = [] + + if not patches: + byte_size = len(raw_text.encode("utf-8", errors="replace")) + line_count = raw_text.count("\n") + sections.append(Text(f"Raw patch: {line_count} lines, {byte_size} bytes", style="dim")) + sections.append(Text()) + truncated_lines = raw_text.split("\n")[:settings.write_diff_limit] + leftover = max(0, raw_text.count("\n") - settings.write_diff_limit) + sections.append(Syntax("\n".join(truncated_lines), "diff", theme="monokai", word_wrap=True)) + if leftover > 0: + sections.append(Text(f"... {leftover} more lines", style="dim")) + else: + total_added = sum(p.added for p in patches) + total_removed = sum(p.removed for p in patches) + sections.append(Text(f"{len(patches)} file(s) changed: +{total_added} -{total_removed}", style="dim")) + sections.append(Text()) + + shown = patches[:settings.apply_patch_max_files] + for fp in shown: + rel = relativize_path(fp.path, self.context.root) + label = f"{fp.op:<8} {rel} +{fp.added} -{fp.removed}" + sections.append(Text(label, style="bold cyan")) + + diff_lines_list = fp.hunks.split("\n") + diff_with_nl = [l + "\n" for l in diff_lines_list if l or diff_lines_list[-1:] != [l]] + truncated, leftover = truncate_diff(diff_with_nl, settings.apply_patch_diff_lines) + diff_text = "".join(truncated) + if diff_text.strip(): + sections.append(Syntax(diff_text, "diff", theme="monokai", word_wrap=True)) + if leftover > 0: + sections.append(Text(f"... {leftover} more lines", style="dim")) + sections.append(Text()) + + if len(patches) > settings.apply_patch_max_files: + remaining = len(patches) - settings.apply_patch_max_files + sections.append(Text(f"... and {remaining} more file(s)", style="dim")) + + if output_str.strip(): + sections.append(Text(output_str.strip(), style="red" if is_err else "green")) + + self.sink.write(Panel(Group(*sections), title="Apply patch", border_style=border, expand=True)) + + if status == "completed" and not is_err: + for fp in patches: + full_path = fp.path + if not os.path.isabs(full_path): + full_path = os.path.join(str(self.context.root), full_path) + cache.reread(full_path) + return True + + # ------------------------------------------------------------------ + # Plain + # ------------------------------------------------------------------ + + def _render_plain(self, raw_text: str, patches: list[_ParsedFilePatch], + output_str: str, status: str) -> bool: + import _colors as C + + settings = self.context.settings + cache = self.context.cache + is_err = is_likely_error(output_str) + + if not patches: + line_count = raw_text.count("\n") + byte_size = len(raw_text.encode("utf-8", errors="replace")) + self.sink.write_text(C.header(f"apply_patch (raw: {line_count} lines, {byte_size} bytes)")) + truncated_lines = raw_text.split("\n")[:settings.write_diff_limit] + for line in truncated_lines: + self.sink.write_text(f" {line}") + leftover = max(0, raw_text.count("\n") - settings.write_diff_limit) + if leftover > 0: + self.sink.write_text(f" ... {leftover} more lines") + else: + total_added = sum(p.added for p in patches) + total_removed = sum(p.removed for p in patches) + self.sink.write_text(C.header(f"apply_patch ({len(patches)} file(s): +{total_added} -{total_removed})")) + + shown = patches[:settings.apply_patch_max_files] + for fp in shown: + rel = relativize_path(fp.path, self.context.root) + self.sink.write_text(f" {fp.op:<8} {rel} +{fp.added} -{fp.removed}") + diff_with_nl = [l + "\n" for l in fp.hunks.split("\n")] + truncated, leftover = truncate_diff(diff_with_nl, settings.apply_patch_diff_lines) + for line in truncated: + self.sink.write_text(f" {line}", end="") + if leftover > 0: + self.sink.write_text(f" ... {leftover} more lines") + + if len(patches) > settings.apply_patch_max_files: + remaining = len(patches) - settings.apply_patch_max_files + self.sink.write_text(f" ... and {remaining} more file(s)") + + if output_str.strip(): + if is_err: + self.sink.write_text(C.fail(output_str.strip())) + else: + self.sink.write_text(C.ok(output_str.strip())) + + if status == "completed" and not is_err: + for fp in patches: + full_path = fp.path + if not os.path.isabs(full_path): + full_path = os.path.join(str(self.context.root), full_path) + cache.reread(full_path) + return True diff --git a/tools/run-agent.py b/tools/run-agent.py index 69f8a3e..fdf5a20 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -3294,10 +3294,8 @@ def _dispatch_tool_renderer(console: Console, tool: str, state: dict[str, Any]) from rendering.tools.edit import EditRenderer return EditRenderer(_get_rendering_ctx(console)).render(tool_lower, state) elif tool_lower in ("apply_patch", "applypatch", "apply-patch"): - if HAVE_RICH: - return render_apply_patch_rich(console, state) - else: - return render_apply_patch_plain(state) + from rendering.tools.apply_patch import ApplyPatchRenderer + return ApplyPatchRenderer(_get_rendering_ctx(console)).render(tool_lower, state) elif tool_lower == "glob": _cache_invalidate_stale() if HAVE_RICH: From 57e03fe857ad697d1ce858e51362e064e9877759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 24 May 2026 22:08:37 +0200 Subject: [PATCH 12/65] refactor(phase-a3-batch4-5): migrate glob, grep, bash renderers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract three more renderers from run-agent.py: - rendering/tools/glob.py — GlobRenderer (file listing) - rendering/tools/grep.py — GrepRenderer (match-highlighted) - rendering/tools/command.py — CommandRenderer (generic bash, interceptors deferred) All tool dispatch in _dispatch_tool_renderer now uses the new classes. Sandbox-bootstrap and bash-shim interceptors still run in-process from the dispatch (will be extracted as CommandExecutionInterceptors in batch 6). 334 passed, 0 failed, 0 errors --- tools/rendering/tools/__init__.py | 6 + tools/rendering/tools/command.py | 74 ++++++++ tools/rendering/tools/glob.py | 120 ++++++++++++ tools/rendering/tools/grep.py | 291 ++++++++++++++++++++++++++++++ tools/run-agent.py | 18 +- 5 files changed, 497 insertions(+), 12 deletions(-) create mode 100644 tools/rendering/tools/command.py create mode 100644 tools/rendering/tools/glob.py create mode 100644 tools/rendering/tools/grep.py diff --git a/tools/rendering/tools/__init__.py b/tools/rendering/tools/__init__.py index bc5c8a9..89ae2ca 100644 --- a/tools/rendering/tools/__init__.py +++ b/tools/rendering/tools/__init__.py @@ -9,7 +9,10 @@ from rendering.tools.apply_patch import ApplyPatchRenderer from rendering.tools.base import FallbackToolRenderer, ToolRenderer +from rendering.tools.command import CommandRenderer from rendering.tools.edit import EditRenderer +from rendering.tools.glob import GlobRenderer +from rendering.tools.grep import GrepRenderer from rendering.tools.permissions import PermissionErrorRenderer from rendering.tools.read import ReadRenderer from rendering.tools.skill import SkillRenderer @@ -19,8 +22,11 @@ __all__ = [ "ApplyPatchRenderer", + "CommandRenderer", "EditRenderer", "FallbackToolRenderer", + "GlobRenderer", + "GrepRenderer", "PermissionErrorRenderer", "ReadRenderer", "SkillRenderer", diff --git a/tools/rendering/tools/command.py b/tools/rendering/tools/command.py new file mode 100644 index 0000000..ff817c3 --- /dev/null +++ b/tools/rendering/tools/command.py @@ -0,0 +1,74 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +CommandRenderer — generic bash command display. + +Interceptors for sandbox-bootstrap, rtk, rg, ls, find, tree are wired +separately (Phase A3 batch 6). +""" + +from __future__ import annotations + +from typing import Any + +from rendering.tools.base import ToolRenderer +from rendering.utils import is_likely_error + + +class CommandRenderer(ToolRenderer): + tool_names = ("bash",) + + def render(self, tool_name: str, state: dict[str, Any]) -> bool: + inp = state.get("input") + if not isinstance(inp, dict): + return False + + command = str(inp.get("command", "")) + description = inp.get("description", "") + output = state.get("output") + output_str = str(output) if output is not None else "" + + if not command: + return False + + if self.rich: + return self._render_rich(command, str(description), output_str, state) + else: + return self._render_plain(command, str(description), output_str, state) + + def _render_rich(self, command: str, description: str, output_str: str, state: dict[str, Any]) -> bool: + from rich.console import Group + from rich.panel import Panel + from rich.text import Text + + err = is_likely_error(output_str) + border = "red" if err else ("green" if state.get("status") == "completed" else "yellow") + + sections: list[Any] = [Text(f"$ {command}", style="bold cyan")] + if description: + sections.append(Text(description, style="dim italic")) + + sections.append(Text()) + + if output_str.strip(): + sections.append(Text("Output", style="bold green")) + sections.append(Text(output_str.strip())) + else: + sections.append(Text("(no output)", style="dim")) + + self.sink.write(Panel(Group(*sections), title="Bash", border_style=border, expand=True)) + return True + + def _render_plain(self, command: str, description: str, output_str: str, state: dict[str, Any]) -> bool: + import _colors as C + + self.sink.write_text(C.header(f"bash $ {command}")) + if description: + self.sink.write_text(f" # {description}") + + if output_str.strip(): + self.sink.write_text(output_str.strip()) + else: + self.sink.write_text(" (no output)") + return True diff --git a/tools/rendering/tools/glob.py b/tools/rendering/tools/glob.py new file mode 100644 index 0000000..a309f12 --- /dev/null +++ b/tools/rendering/tools/glob.py @@ -0,0 +1,120 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +GlobRenderer — file listing panel for glob tool output. +""" + +from __future__ import annotations + +import re +from pathlib import Path +from typing import Any + +from rendering.tools.base import ToolRenderer +from rendering.utils import relativize_path + +_GLOB_SUMMARY_LINE_RE = re.compile( + r"^\d+\s+(?:for\s|match)" + r"|^No\s+matches?\s" + r"|^\d+\s+file" +) + + +def _parse_glob_output(output: str) -> tuple[list[str], list[str]]: + files: list[str] = [] + summaries: list[str] = [] + for line in output.strip().split("\n"): + stripped = line.strip() + if not stripped: + continue + if _GLOB_SUMMARY_LINE_RE.match(stripped): + summaries.append(stripped) + else: + files.append(stripped) + return files, summaries + + +class GlobRenderer(ToolRenderer): + tool_names = ("glob",) + + def render(self, tool_name: str, state: dict[str, Any]) -> bool: + inp = state.get("input") + output = state.get("output") + if not isinstance(inp, dict) or not isinstance(output, str): + return False + + pattern = str(inp.get("pattern", "")) + search_path = str(inp.get("path", "")) + matches, summaries = _parse_glob_output(output) + if self.rich: + return self._render_rich(pattern, search_path, matches, summaries) + else: + return self._render_plain(pattern, search_path, matches, summaries) + + def _render_rich(self, pattern: str, search_path: str, + matches: list[str], summaries: list[str]) -> bool: + from rich.console import Group + from rich.panel import Panel + from rich.text import Text + + settings = self.context.settings + n_matches = len(matches) + border = "green" if n_matches > 0 else "dim" + + sections: list[Any] = [ + Text(f"pattern={pattern} path={relativize_path(search_path, self.context.root) if search_path else '.'}", style="dim"), + Text(), + ] + + if n_matches == 0: + if summaries: + for s in summaries: + sections.append(Text(f" {s}", style="dim")) + else: + sections.append(Text("(no matches)", style="dim")) + else: + shown = matches[:settings.glob_match_cap] + for m in shown: + try: + rel = str(Path(m).relative_to(search_path)) if search_path else m + except ValueError: + rel = relativize_path(m, self.context.root) + sections.append(Text(f" {rel}")) + if n_matches > settings.glob_match_cap: + sections.append(Text(f" ... and {n_matches - settings.glob_match_cap} more", style="dim")) + + sections.append(Text()) + sections.append(Text(f"{n_matches} match(es)", style="dim")) + + self.sink.write(Panel(Group(*sections), title="Glob", border_style=border, expand=True)) + return True + + def _render_plain(self, pattern: str, search_path: str, + matches: list[str], summaries: list[str]) -> bool: + import _colors as C + + settings = self.context.settings + n_matches = len(matches) + + self.sink.write_text(C.header(f"glob {pattern} in {relativize_path(search_path, self.context.root) if search_path else '.'}")) + + if n_matches == 0: + if summaries: + for s in summaries: + self.sink.write_text(f" {s}") + else: + self.sink.write_text(" (no matches)") + else: + shown = matches[:settings.glob_match_cap] + for m in shown: + try: + rel = str(Path(m).relative_to(search_path)) if search_path else m + except ValueError: + rel = relativize_path(m, self.context.root) + self.sink.write_text(f" {rel}") + if n_matches > settings.glob_match_cap: + self.sink.write_text(f" ... and {n_matches - settings.glob_match_cap} more") + + self.sink.write_text(f" {n_matches} match(es)") + return True diff --git a/tools/rendering/tools/grep.py b/tools/rendering/tools/grep.py new file mode 100644 index 0000000..6f54754 --- /dev/null +++ b/tools/rendering/tools/grep.py @@ -0,0 +1,291 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +GrepRenderer — match-highlighted panel for grep tool output. +""" + +from __future__ import annotations + +import re +from collections import OrderedDict +from typing import Any + +from rendering.tools.base import ToolRenderer +from rendering.utils import is_likely_error, relativize_path + +_GREP_LINE_RE = re.compile(r"^(?P.+?):(?P\d+):(?P.*)$") + +_GREP_HIGHLIGHT_STYLE = "bold yellow on grey23" +_GREP_BODY_STYLE = "default" +_GREP_LINENO_STYLE = "dim cyan" + + +def _grep_compile_pattern(pattern: str) -> re.Pattern[str] | None: + if not pattern: + return None + try: + return re.compile(pattern) + except re.error: + try: + return re.compile(re.escape(pattern)) + except re.error: + return None + + +def _grep_format_line_rich(line_no: int, text: str, pat: re.Pattern[str] | None, highlight: bool) -> Any: + from rich.text import Text + t = Text() + t.append(f" {line_no:>5}", style=_GREP_LINENO_STYLE) + t.append(": ", style="dim") + + if pat is None or not highlight: + t.append(text, style=_GREP_BODY_STYLE) + return t + + last = 0 + for m in pat.finditer(text): + start, end = m.start(), m.end() + if start > last: + t.append(text[last:start], style=_GREP_BODY_STYLE) + if start < end: + t.append(text[start:end], style=_GREP_HIGHLIGHT_STYLE) + last = end + if last < len(text): + t.append(text[last:], style=_GREP_BODY_STYLE) + return t + + +def _grep_format_line_plain(line_no: int, text: str, pat: re.Pattern[str] | None, + highlight: bool, color: bool) -> str: + prefix = f" {line_no:>5}: " + if pat is None or not highlight: + return prefix + text + + if color: + hl_on = "\x1b[1;33m" + hl_off = "\x1b[0m" + else: + hl_on = ">>>" + hl_off = "<<<" + + parts = [prefix] + last = 0 + for m in pat.finditer(text): + start, end = m.start(), m.end() + if start > last: + parts.append(text[last:start]) + if start < end: + parts.append(hl_on + text[start:end] + hl_off) + last = end + if last < len(text): + parts.append(text[last:]) + return "".join(parts) + + +def _parse_grep_output(output: str) -> tuple[str, list[dict[str, Any]]]: + raw_lines = [l for l in output.strip().split("\n") if l.strip()] + if not raw_lines: + return "files", [] + + line_matches = 0 + for l in raw_lines: + if _GREP_LINE_RE.match(l): + line_matches += 1 + + if line_matches >= len(raw_lines) * 0.7: + entries: list[dict[str, Any]] = [] + for l in raw_lines: + m = _GREP_LINE_RE.match(l) + if m: + entries.append({"path": m.group("path"), "line": int(m.group("line")), "text": m.group("text")}) + else: + entries.append({"path": l.strip(), "line": 0, "text": ""}) + return "lines", entries + else: + return "files", [{"path": l.strip()} for l in raw_lines] + + +class GrepRenderer(ToolRenderer): + tool_names = ("grep",) + + def render(self, tool_name: str, state: dict[str, Any]) -> bool: + inp = state.get("input") + output = state.get("output") + status = str(state.get("status", "")) + + if not isinstance(inp, dict): + return False + + if isinstance(output, dict): + output_str = str(output.get("matches", output.get("results", ""))) + elif isinstance(output, str): + output_str = output + else: + return False + + if self.rich: + return self._render_rich(inp, output_str, status) + else: + return self._render_plain(inp, output_str, status) + + def _render_rich(self, inp: dict[str, Any], output_str: str, status: str) -> bool: + from rich.console import Group + from rich.panel import Panel + from rich.text import Text + + settings = self.context.settings + pattern = str(inp.get("pattern", "")) + search_path = str(inp.get("path", "")) + include = str(inp.get("include", "")) + + is_err = is_likely_error(output_str) + border = "red" if is_err else ("green" if status == "completed" else "yellow") + + sections: list[Any] = [] + header_parts = [f"pattern={pattern!r}"] + if search_path: + header_parts.append(f"path={relativize_path(search_path, self.context.root)}") + if include: + header_parts.append(f"include={include}") + sections.append(Text(" ".join(header_parts), style="dim")) + sections.append(Text()) + + if is_err: + sections.append(Text(output_str.strip(), style="red")) + elif not output_str.strip(): + sections.append(Text("(no matches)", style="dim")) + border = "dim" + else: + mode, entries = _parse_grep_output(output_str) + + if mode == "files": + n_files = len(entries) + shown = entries[:settings.grep_file_cap] + for e in shown: + sections.append(Text(f" {relativize_path(e['path'], self.context.root)}")) + if n_files > settings.grep_file_cap: + sections.append(Text(f" ... and {n_files - settings.grep_file_cap} more", style="dim")) + sections.append(Text()) + sections.append(Text(f"{n_files} file(s) matched", style="dim")) + else: + grep_pat = _grep_compile_pattern(pattern) + grouped: OrderedDict[str, list[dict[str, Any]]] = OrderedDict() + for e in entries: + grouped.setdefault(e["path"], []).append(e) + + n_files = len(grouped) + n_total = len(entries) + total_emitted = 0 + files_shown = 0 + truncated = False + + for fpath, file_entries in grouped.items(): + if total_emitted >= settings.grep_total_line_cap or files_shown >= settings.grep_file_cap: + truncated = True + break + files_shown += 1 + rel = relativize_path(fpath, self.context.root) + sections.append(Text(f" {rel} ({len(file_entries)} match(es))", style="bold cyan")) + shown_lines = file_entries[:settings.grep_line_cap_per_file] + for e in shown_lines: + text = e["text"] + if len(text) > 200: + text = text[:200] + "\u2026" + sections.append(_grep_format_line_rich(e["line"], text, grep_pat, settings.grep_highlight)) + total_emitted += 1 + if total_emitted >= settings.grep_total_line_cap: + truncated = True + break + if len(file_entries) > settings.grep_line_cap_per_file: + remaining = len(file_entries) - settings.grep_line_cap_per_file + sections.append(Text(f" ... and {remaining} more in {rel}", style="dim")) + + if truncated: + remaining_files = n_files - files_shown + if remaining_files > 0: + sections.append(Text(f" ... and {remaining_files} more file(s)", style="dim")) + else: + sections.append(Text(" ... (further matches truncated)", style="dim")) + + sections.append(Text()) + sections.append(Text(f"{n_total} match(es) across {n_files} file(s)", style="dim")) + + self.sink.write(Panel(Group(*sections), title="Grep", border_style=border, expand=True)) + return True + + def _render_plain(self, inp: dict[str, Any], output_str: str, status: str) -> bool: + import _colors as C + + settings = self.context.settings + pattern = str(inp.get("pattern", "")) + search_path = str(inp.get("path", "")) + include = str(inp.get("include", "")) + + is_err = is_likely_error(output_str) + + header_parts = [f"grep {pattern!r}"] + if search_path: + header_parts.append(f"in {relativize_path(search_path, self.context.root)}") + if include: + header_parts.append(f"include={include}") + self.sink.write_text(C.header(" ".join(header_parts))) + + if is_err: + self.sink.write_text(C.fail(output_str.strip())) + elif not output_str.strip(): + self.sink.write_text(" (no matches)") + else: + mode, entries = _parse_grep_output(output_str) + + if mode == "files": + n_files = len(entries) + shown = entries[:settings.grep_file_cap] + for e in shown: + self.sink.write_text(f" {relativize_path(e['path'], self.context.root)}") + if n_files > settings.grep_file_cap: + self.sink.write_text(f" ... and {n_files - settings.grep_file_cap} more") + self.sink.write_text(f" {n_files} file(s) matched") + else: + grep_pat = _grep_compile_pattern(pattern) + plain_color = C.color_enabled() + grouped: OrderedDict[str, list[dict[str, Any]]] = OrderedDict() + for e in entries: + grouped.setdefault(e["path"], []).append(e) + + n_files = len(grouped) + n_total = len(entries) + total_emitted = 0 + files_shown = 0 + truncated = False + + for fpath, file_entries in grouped.items(): + if total_emitted >= settings.grep_total_line_cap or files_shown >= settings.grep_file_cap: + truncated = True + break + files_shown += 1 + rel = relativize_path(fpath, self.context.root) + self.sink.write_text(f" {rel} ({len(file_entries)} match(es))") + shown_lines = file_entries[:settings.grep_line_cap_per_file] + for e in shown_lines: + text = e["text"] + if len(text) > 200: + text = text[:200] + "\u2026" + self.sink.write_text(_grep_format_line_plain(e["line"], text, grep_pat, settings.grep_highlight, plain_color)) + total_emitted += 1 + if total_emitted >= settings.grep_total_line_cap: + truncated = True + break + if len(file_entries) > settings.grep_line_cap_per_file: + remaining = len(file_entries) - settings.grep_line_cap_per_file + self.sink.write_text(f" ... and {remaining} more in {rel}") + + if truncated: + remaining_files = n_files - files_shown + if remaining_files > 0: + self.sink.write_text(f" ... and {remaining_files} more file(s)") + else: + self.sink.write_text(" ... (further matches truncated)") + + self.sink.write_text(f" {n_total} match(es) across {n_files} file(s)") + return True diff --git a/tools/run-agent.py b/tools/run-agent.py index fdf5a20..24a3812 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -3298,16 +3298,12 @@ def _dispatch_tool_renderer(console: Console, tool: str, state: dict[str, Any]) return ApplyPatchRenderer(_get_rendering_ctx(console)).render(tool_lower, state) elif tool_lower == "glob": _cache_invalidate_stale() - if HAVE_RICH: - return render_glob_rich(console, state) - else: - return render_glob_plain(state) + from rendering.tools.glob import GlobRenderer + return GlobRenderer(_get_rendering_ctx(console)).render(tool_lower, state) elif tool_lower == "grep": _cache_invalidate_stale() - if HAVE_RICH: - return render_grep_rich(console, state) - else: - return render_grep_plain(state) + from rendering.tools.grep import GrepRenderer + return GrepRenderer(_get_rendering_ctx(console)).render(tool_lower, state) elif tool_lower == "bash": _cache_invalidate_stale() # Try the sandbox-bootstrap sub-renderer first; it handles bash @@ -3322,10 +3318,8 @@ def _dispatch_tool_renderer(console: Console, tool: str, state: dict[str, Any]) # Glob renderers as if the agent had used the native tool. if _maybe_render_bash_shim(console if HAVE_RICH else None, state): return True - if HAVE_RICH: - return render_bash_rich(console, state) - else: - return render_bash_plain(state) + from rendering.tools.command import CommandRenderer + return CommandRenderer(_get_rendering_ctx(console)).render(tool_lower, state) elif tool_lower == "skill": _cache_invalidate_stale() from rendering.tools.skill import SkillRenderer From 23b4a3d2f2eee9971261f93d175d3c1b249ff405 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 24 May 2026 22:19:50 +0200 Subject: [PATCH 13/65] refactor(phase-a3-batch6): extract bash interceptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create CommandExecutionInterceptor implementations from the two sub-renderers previously in run-agent.py's bash dispatch: - rendering/tools/interceptors/sandbox_bootstrap.py SandboxBootstrapInterceptor — structured panels for sandbox list/inspect/detect/status/apply/regenerate/validate commands - rendering/tools/interceptors/rtk_read.py RtkReadInterceptor — routes rtk read/cat/head/tail through ReadRenderer - rendering/tools/interceptors/rtk_grep.py RtkGrepInterceptor — routes rtk grep/rg through GrepRenderer - rendering/tools/interceptors/shell_listing.py ShellListingInterceptor — routes ls/find/tree through GlobRenderer CommandRenderer now has a lazy interceptor chain that runs before the generic bash fallback. Dispatch in run-agent.py simplified to a single call. 334 passed, 0 failed, 0 errors --- tools/rendering/tools/command.py | 31 +- .../rendering/tools/interceptors/__init__.py | 12 +- .../rendering/tools/interceptors/rtk_grep.py | 232 +++++ .../rendering/tools/interceptors/rtk_read.py | 340 +++++++ .../tools/interceptors/sandbox_bootstrap.py | 863 ++++++++++++++++++ .../tools/interceptors/shell_listing.py | 206 +++++ tools/run-agent.py | 12 - 7 files changed, 1680 insertions(+), 16 deletions(-) create mode 100644 tools/rendering/tools/interceptors/rtk_grep.py create mode 100644 tools/rendering/tools/interceptors/rtk_read.py create mode 100644 tools/rendering/tools/interceptors/sandbox_bootstrap.py create mode 100644 tools/rendering/tools/interceptors/shell_listing.py diff --git a/tools/rendering/tools/command.py b/tools/rendering/tools/command.py index ff817c3..46f0368 100644 --- a/tools/rendering/tools/command.py +++ b/tools/rendering/tools/command.py @@ -19,19 +19,44 @@ class CommandRenderer(ToolRenderer): tool_names = ("bash",) + def __init__(self, context): + super().__init__(context) + self._interceptors = None + + @property + def interceptors(self): + if self._interceptors is None: + from rendering.tools.interceptors.sandbox_bootstrap import SandboxBootstrapInterceptor + from rendering.tools.interceptors.rtk_read import RtkReadInterceptor + from rendering.tools.interceptors.rtk_grep import RtkGrepInterceptor + from rendering.tools.interceptors.shell_listing import ShellListingInterceptor + self._interceptors = [ + SandboxBootstrapInterceptor(), + RtkReadInterceptor(), + RtkGrepInterceptor(), + ShellListingInterceptor(), + ] + return self._interceptors + def render(self, tool_name: str, state: dict[str, Any]) -> bool: inp = state.get("input") if not isinstance(inp, dict): return False command = str(inp.get("command", "")) + if not command: + return False + + # Try interceptors first (sandbox-bootstrap, rtk, rg, ls, find, tree). + for interceptor in self.interceptors: + if interceptor.try_render(command, state, self): + return True + + # Fall through to generic bash rendering. description = inp.get("description", "") output = state.get("output") output_str = str(output) if output is not None else "" - if not command: - return False - if self.rich: return self._render_rich(command, str(description), output_str, state) else: diff --git a/tools/rendering/tools/interceptors/__init__.py b/tools/rendering/tools/interceptors/__init__.py index 1aa7c65..a7a21e2 100644 --- a/tools/rendering/tools/interceptors/__init__.py +++ b/tools/rendering/tools/interceptors/__init__.py @@ -9,5 +9,15 @@ from __future__ import annotations from rendering.tools.interceptors.base import CommandExecutionInterceptor +from rendering.tools.interceptors.rtk_grep import RtkGrepInterceptor +from rendering.tools.interceptors.rtk_read import RtkReadInterceptor +from rendering.tools.interceptors.sandbox_bootstrap import SandboxBootstrapInterceptor +from rendering.tools.interceptors.shell_listing import ShellListingInterceptor -__all__ = ["CommandExecutionInterceptor"] +__all__ = [ + "CommandExecutionInterceptor", + "RtkGrepInterceptor", + "RtkReadInterceptor", + "SandboxBootstrapInterceptor", + "ShellListingInterceptor", +] diff --git a/tools/rendering/tools/interceptors/rtk_grep.py b/tools/rendering/tools/interceptors/rtk_grep.py new file mode 100644 index 0000000..c706c9b --- /dev/null +++ b/tools/rendering/tools/interceptors/rtk_grep.py @@ -0,0 +1,232 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +RtkGrepInterceptor — re-routes rtk grep / rg / grep bash commands +through the GrepRenderer so the user sees a Grep panel instead of +a generic Bash panel. +""" + +from __future__ import annotations + +import re +from typing import Any, Optional + +from rendering.tools.base import ToolRenderer +from rendering.tools.interceptors.base import CommandExecutionInterceptor + +# --------------------------------------------------------------------------- +# Regexes for rtk grep output normalisation +# --------------------------------------------------------------------------- + +_RTK_GREP_FILE_HEADER_RE = re.compile(r"^\[file\]\s+(?P.+?)\s+\((?P\d+)\)\s*:\s*$") +_RTK_GREP_LINE_RE = re.compile(r"^\s+(?P\d+):\s*(?P.*)$") + + +# --------------------------------------------------------------------------- +# rtk grep output normaliser +# --------------------------------------------------------------------------- + +def _normalize_rtk_grep_output(text: str) -> str: + """Convert rtk grep grouped output to standard `path:line:text` lines. + + Input shape (from `rtk grep`): + 4 matches in 3F: + [file] tools/run-agent.py (2): + 2811: return render_grep_rich(console, state) + + Output shape (compatible with _parse_grep_output): + tools/run-agent.py:2811:return render_grep_rich(console, state) + + If no `[file] (N):` markers are found, returns the text + unchanged (no-op safe). + """ + if "[file]" not in text: + return text + lines_in = text.split("\n") + out: list[str] = [] + current_path: Optional[str] = None + found_marker = False + for line in lines_in: + m = _RTK_GREP_FILE_HEADER_RE.match(line) + if m: + current_path = m.group("path").strip() + found_marker = True + continue + n = _RTK_GREP_LINE_RE.match(line) + if n and current_path: + out.append(f"{current_path}:{n.group('lineno')}:{n.group('text')}") + continue + # Skip blanks and the "N matches in NF:" header; pass through anything else. + stripped = line.strip() + if not stripped: + continue + if re.match(r"^\d+\s+matches?\s+in\s+\d+F:\s*$", stripped): + continue + # Unknown line; drop it to keep the output clean. + if not found_marker: + return text + return "\n".join(out) + ("\n" if out else "") + + +# --------------------------------------------------------------------------- +# Grep parsers (called by _is_bash_shim_call in rtk_read) +# --------------------------------------------------------------------------- + +def _parse_grep_or_rg(rest: list[str], raw: str) -> Optional[Any]: + """Parse `rg PATTERN [PATH]` or `grep PATTERN PATH...` (best-effort).""" + from .rtk_read import _BashShim # noqa: E402 + + # Drop common option flags so we can pull the pattern out. We don't + # need to be exhaustive: anything we miss simply falls through. + pattern = "" + path = "" + i = 0 + saw_pattern = False + while i < len(rest): + tok = rest[i] + if tok == "--": + i += 1 + continue + if tok.startswith("-") and tok != "-": + # rg/grep flags that take a value. + if tok in ("-e", "-f", "-A", "-B", "-C", "-g", "--glob", "--max-count", + "--max-depth", "-t", "--type", "--ignore-file"): + i += 2 + continue + i += 1 + continue + if not saw_pattern: + pattern = tok + saw_pattern = True + elif not path: + path = tok + i += 1 + if not saw_pattern: + return None + return _BashShim( + family="grep", + files=[], + pattern=pattern, + path=path, + long_format=False, + head_limit=None, + tail_limit=None, + rtk_filtered=False, + raw_command=raw, + ) + + +def _parse_rtk_grep(rest: list[str], raw: str) -> Optional[Any]: + """Parse `rtk grep PATTERN [PATH] [extra args]`.""" + from .rtk_read import _BashShim # noqa: E402 + + pattern = "" + path = "" + i = 0 + saw_pattern = False + while i < len(rest): + tok = rest[i] + if tok in ("-l", "--max-len", "-m", "--max", "-t", "--file-type"): + i += 2 + continue + if tok in ("-c", "--context-only", "-n", "--line-numbers", + "--ultra-compact", "--skip-env"): + i += 1 + continue + if tok.startswith("-v") and all(c == "v" for c in tok[1:]): + i += 1 + continue + if tok == "--": + i += 1 + continue + if tok.startswith("-"): + i += 1 + continue + if not saw_pattern: + pattern = tok + saw_pattern = True + elif not path: + path = tok + i += 1 + if not saw_pattern: + return None + return _BashShim( + family="grep", + files=[], + pattern=pattern, + path=path, + long_format=False, + head_limit=None, + tail_limit=None, + rtk_filtered=False, + raw_command=raw, + ) + + +# --------------------------------------------------------------------------- +# Grep shim renderer +# --------------------------------------------------------------------------- + +def _render_shim_grep(renderer: ToolRenderer, state: dict[str, Any], shim: Any) -> bool: + """Normalize rtk grep output and delegate to GrepRenderer.""" + from .rtk_read import _BashShim # noqa: E402 + from rendering.tools.grep import GrepRenderer + + shim_cast: _BashShim = shim + + raw_output = str(state.get("output") or "") + normalized = _normalize_rtk_grep_output(raw_output) + + # If the normalizer found rtk-style markers but produced no rows, + # something is unexpected; fall back to bash renderer. + if "[file]" in raw_output and not normalized.strip(): + return False + + syn_state = { + "input": {"pattern": shim_cast.pattern, "path": shim_cast.path}, + "output": normalized, + "status": str(state.get("status", "")), + } + + grep_renderer = GrepRenderer(renderer.context) + return grep_renderer.render("grep", syn_state) + + +# --------------------------------------------------------------------------- +# Interceptor class +# --------------------------------------------------------------------------- + +class RtkGrepInterceptor: + """Interceptor that re-routes rtk grep / rg / grep bash commands + through the GrepRenderer.""" + + name = "rtk_grep" + + def try_render( + self, + command: str, + state: dict[str, Any], + renderer: ToolRenderer, + ) -> bool: + if not renderer.context.settings.bash_shim_render: + return False + + inp = state.get("input") + if not isinstance(inp, dict): + return False + + command_str = str(inp.get("command", "")) + from .rtk_read import _is_bash_shim_call # noqa: E402 + shim = _is_bash_shim_call(command_str) + if shim is None: + return False + + output = state.get("output") + if not isinstance(output, str): + return False + + if shim.family != "grep": + return False + + return _render_shim_grep(renderer, state, shim) diff --git a/tools/rendering/tools/interceptors/rtk_read.py b/tools/rendering/tools/interceptors/rtk_read.py new file mode 100644 index 0000000..9051a56 --- /dev/null +++ b/tools/rendering/tools/interceptors/rtk_read.py @@ -0,0 +1,340 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +RtkReadInterceptor — re-routes rtk read / cat / head / tail bash +commands through the ReadRenderer so the user sees a Read panel +instead of a generic Bash panel. +""" + +from __future__ import annotations + +import os +import shlex +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Optional + +from rendering.tools.base import ToolRenderer +from rendering.tools.interceptors.base import CommandExecutionInterceptor +from rendering.utils import relativize_path + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +_BASH_SHIM_READ_VERBS = {"cat", "head", "tail"} +_BASH_SHIM_LEADING_NOISE = {"sudo", "time", "nice", "ionice", "command", "env"} +_BASH_SHIM_DISQUALIFIERS = ("|", ";", "&&", "||", ">", "<", "`", "$(") + + +# --------------------------------------------------------------------------- +# Shared data structures +# --------------------------------------------------------------------------- + +@dataclass +class _BashShim: + family: str # "read" | "grep" | "ls" | "find" + files: list[str] # for read family + pattern: str # for grep family + path: str # for grep / ls / find + long_format: bool # for ls family + head_limit: int | None # for `head -n N` + tail_limit: int | None # for `tail -n N` + rtk_filtered: bool # rtk read --level/--max-lines/--tail-lines present + raw_command: str + + +# --------------------------------------------------------------------------- +# Command-line helper functions +# --------------------------------------------------------------------------- + +def _strip_leading_env_and_wrappers(tokens: list[str]) -> list[str]: + """Drop leading KEY=VAL env assignments and known shell wrappers + (sudo, time, nice, ionice, command, env) so the next significant + token is the actual command verb.""" + out = list(tokens) + while out: + head = out[0] + # KEY=VAL env assignments are tokens with `=` and an UPPER_CASE + # identifier on the left. + if "=" in head and head.split("=", 1)[0].replace("_", "").isalnum(): + left = head.split("=", 1)[0] + if left and (left[0].isalpha() or left[0] == "_") and left.isupper(): + out.pop(0) + continue + if head in _BASH_SHIM_LEADING_NOISE: + # Skip wrapper plus its options (best-effort: drop only the + # wrapper itself and any -flags directly after it). + out.pop(0) + while out and out[0].startswith("-"): + out.pop(0) + continue + break + return out + + +def _bash_command_has_pipeline(command_str: str) -> bool: + """Heuristic: avoid shim handling for any pipeline / redirection / + command-substitution / background invocation.""" + for marker in _BASH_SHIM_DISQUALIFIERS: + if marker in command_str: + return True + return False + + +def _parse_rtk_read(rest: list[str], raw: str) -> Optional[_BashShim]: + """Parse `rtk read [flags] FILE [FILE...]`.""" + files: list[str] = [] + filtered = False + i = 0 + while i < len(rest): + tok = rest[i] + if tok in ("-l", "--level"): + filtered = True + if i + 1 < len(rest): + i += 2 + else: + i += 1 + continue + if tok.startswith("--level="): + filtered = True + i += 1 + continue + if tok in ("-m", "--max-lines", "--tail-lines"): + filtered = True + if i + 1 < len(rest): + i += 2 + else: + i += 1 + continue + if tok.startswith(("--max-lines=", "--tail-lines=")): + filtered = True + i += 1 + continue + if tok in ("-n", "--line-numbers", "--ultra-compact", "--skip-env"): + i += 1 + continue + if tok.startswith("-v") and all(c == "v" for c in tok[1:]): + i += 1 + continue + if tok == "--": + i += 1 + continue + if tok.startswith("-"): + # Unknown flag; skip just the flag itself. + i += 1 + continue + files.append(tok) + i += 1 + if not files: + return None + return _BashShim( + family="read", + files=files, + pattern="", + path="", + long_format=False, + head_limit=None, + tail_limit=None, + rtk_filtered=filtered, + raw_command=raw, + ) + + +def _parse_cat_head_tail(verb: str, rest: list[str], raw: str) -> Optional[_BashShim]: + """Parse `cat FILE...`, `head [-n N] FILE`, `tail [-n N] FILE`.""" + files: list[str] = [] + head_limit: Optional[int] = None + tail_limit: Optional[int] = None + i = 0 + while i < len(rest): + tok = rest[i] + if tok == "-n" and i + 1 < len(rest): + try: + count = int(rest[i + 1].lstrip("+-")) + if verb == "head": + head_limit = count + elif verb == "tail": + tail_limit = count + except ValueError: + pass + i += 2 + continue + if tok.startswith("-n") and len(tok) > 2: + try: + count = int(tok[2:].lstrip("+-")) + if verb == "head": + head_limit = count + elif verb == "tail": + tail_limit = count + except ValueError: + pass + i += 1 + continue + if tok.startswith("-") and tok != "-": + i += 1 + continue + files.append(tok) + i += 1 + if not files: + return None + return _BashShim( + family="read", + files=files, + pattern="", + path="", + long_format=False, + head_limit=head_limit, + tail_limit=tail_limit, + rtk_filtered=False, + raw_command=raw, + ) + + +def _is_bash_shim_call(command_str: str) -> Optional[_BashShim]: + """Recognise bash invocations the wrapper can re-route to the + Read/Grep/Glob renderers. Returns a _BashShim, or None when the + command should be left to the generic Bash renderer.""" + if not command_str or _bash_command_has_pipeline(command_str): + return None + try: + tokens = shlex.split(command_str) + except ValueError: + return None + if not tokens: + return None + + tokens = _strip_leading_env_and_wrappers(tokens) + if not tokens: + return None + + head = tokens[0] + rest = tokens[1:] + + # rtk dispatcher: peel `rtk` and re-evaluate against the subcommand. + via_rtk = False + if head == "rtk": + if not rest: + return None + head = rest[0] + rest = rest[1:] + via_rtk = True + + if head == "read" and via_rtk: + return _parse_rtk_read(rest, command_str) + if head in _BASH_SHIM_READ_VERBS: + return _parse_cat_head_tail(head, rest, command_str) + if head == "grep" and via_rtk: + from .rtk_grep import _parse_rtk_grep # noqa: E402 + return _parse_rtk_grep(rest, command_str) + if head in {"rg", "grep"}: + from .rtk_grep import _parse_grep_or_rg # noqa: E402 + return _parse_grep_or_rg(rest, command_str) + if head in {"ls"}: + from .shell_listing import _parse_ls # noqa: E402 + return _parse_ls(rest, command_str) + if head in {"find", "tree"}: + from .shell_listing import _parse_find_tree # noqa: E402 + return _parse_find_tree(head, rest, command_str) + return None + + +# --------------------------------------------------------------------------- +# Read shim renderer +# --------------------------------------------------------------------------- + +def _render_shim_read(renderer: ToolRenderer, state: dict[str, Any], shim: _BashShim) -> bool: + """Synthesize a read-tool state and delegate to ReadRenderer.""" + from rendering.tools.read import ReadRenderer + + raw_output = str(state.get("output") or "") + status = str(state.get("status", "")) + root = renderer.context.root + + # Choose the file_path for the panel: when only one file, the actual + # path. When multiple files, fall back to a synthetic descriptor. + if len(shim.files) == 1: + file_path = shim.files[0] + else: + file_path = " + ".join(shim.files) + + # Synthesize OpenCode read framing around the raw content so the + # existing renderer can parse and render without modification. + rel_for_frame = relativize_path(shim.files[0], root) if shim.files else file_path + + # Optional offset/limit from `head -n N` / `tail -n N`. + offset: Optional[int] = None + limit: Optional[int] = None + if shim.head_limit is not None: + offset = 1 + limit = shim.head_limit + elif shim.tail_limit is not None: + # We don't know the file length, so leave offset unset and let + # the renderer omit the lines header. + limit = shim.tail_limit + + framed = ( + f"{rel_for_frame}\n" + f"file\n" + f"\n{raw_output}\n" + ) + + syn_state = { + "input": {"filePath": file_path, "offset": offset, "limit": limit}, + "output": framed, + "status": status, + } + + read_renderer = ReadRenderer(renderer.context) + ok = read_renderer.render("read", syn_state) + + if not ok: + return False + + # Cache update: when filtering flags are present, or there are + # multiple files (no reliable per-file content boundaries), re-read + # each file directly from disk so the cache stays accurate. + if shim.rtk_filtered or len(shim.files) > 1: + for f in shim.files: + full = f if os.path.isabs(f) else os.path.join(root, f) + renderer.context.cache.reread(full) + return True + + +# --------------------------------------------------------------------------- +# Interceptor class +# --------------------------------------------------------------------------- + +class RtkReadInterceptor: + """Interceptor that re-routes rtk read / cat / head / tail bash + commands through the ReadRenderer.""" + + name = "rtk_read" + + def try_render( + self, + command: str, + state: dict[str, Any], + renderer: ToolRenderer, + ) -> bool: + if not renderer.context.settings.bash_shim_render: + return False + + inp = state.get("input") + if not isinstance(inp, dict): + return False + + command_str = str(inp.get("command", "")) + shim = _is_bash_shim_call(command_str) + if shim is None: + return False + + output = state.get("output") + if not isinstance(output, str): + return False + + if shim.family != "read": + return False + + return _render_shim_read(renderer, state, shim) diff --git a/tools/rendering/tools/interceptors/sandbox_bootstrap.py b/tools/rendering/tools/interceptors/sandbox_bootstrap.py new file mode 100644 index 0000000..6d5a5c8 --- /dev/null +++ b/tools/rendering/tools/interceptors/sandbox_bootstrap.py @@ -0,0 +1,863 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +SandboxBootstrapInterceptor — renders sandbox-bootstrap.py --format json +output as a styled Sandbox panel instead of the generic Bash panel. + +Detects bash invocations of ``tools/sandbox-bootstrap.py --format json`` +and renders the JSON output as a structured, colour-coded ``Sandbox`` +panel. The script is CodeCome-owned, so its JSON schema is stable. +""" + +from __future__ import annotations + +import json +import os +import shlex +import sys +from typing import Any, Optional + +from rendering.tools.base import ToolRenderer +from rendering.tools.interceptors.base import CommandExecutionInterceptor + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +_SANDBOX_BOOTSTRAP_SCRIPT = "tools/sandbox-bootstrap.py" +_SANDBOX_KNOWN_SUBCOMMANDS = { + "list", "inspect", "detect", "status", "apply", "regenerate", "validate", +} +# make targets that wrap the script and where we can confidently infer the +# subcommand from the target name. +_SANDBOX_MAKE_TARGETS = { + "sandbox-list": "list", + "sandbox-inspect": "inspect", + "sandbox-detect": "detect", + "sandbox-status": "status", + "sandbox-bootstrap": "apply", # `make sandbox-bootstrap ID=...` -> apply + "sandbox-regenerate": "regenerate", + "sandbox-validate": "validate", +} +_SANDBOX_REQUIRED_CAPABILITIES = ("setup", "start", "check", "build", "test", "stop") +_SANDBOX_HELPER_CAPABILITIES = ("shell", "logs", "clean", "reset") + + +# --------------------------------------------------------------------------- +# Helper functions +# --------------------------------------------------------------------------- + +def _console_supports_emoji(sink) -> bool: + """Return True when the console encoding can carry common emojis.""" + from rendering.sink import RichConsoleSink + if isinstance(sink, RichConsoleSink): + enc = (getattr(sink._console, "encoding", "") or "").lower() + else: + enc = (sys.stdout.encoding or "").lower() + return "utf" in enc + + +def _sandbox_glyphs(sink) -> dict[str, str]: + """Return a name->glyph table, with emoji on utf-8 terminals and + ASCII fallbacks elsewhere.""" + if _console_supports_emoji(sink): + return { + "ok": "\u2705", + "fail": "\u274c", + "warn": "\u26a0\ufe0f ", + "skip": "\u23ed\ufe0f ", + "info": "\u2139\ufe0f ", + "box": "\U0001f4e6", + "check": "\U0001f9ea", + "alarm": "\U0001f6a6", + "clock": "\u23f1", + "bullet": "\u2022", + } + return { + "ok": "[OK]", + "fail": "[FAIL]", + "warn": "[!]", + "skip": "[--]", + "info": "[i]", + "box": "[box]", + "check": "[chk]", + "alarm": "[gate]", + "clock": "t=", + "bullet": "-", + } + + +def _is_sandbox_bootstrap_json_call(command_str: str) -> Optional[str]: + """Return the subcommand name if this bash invocation is a + sandbox-bootstrap call configured for --format json, else None. + + Recognises both: + - direct script invocations: + .venv/bin/python3 tools/sandbox-bootstrap.py --format json status + python tools/sandbox-bootstrap.py status --format=json + - make-target wrappers when BOOTSTRAP_ARGS forces json: + make sandbox-status BOOTSTRAP_ARGS='--format json' + make sandbox-validate BOOTSTRAP_ARGS=--format=json + """ + if not command_str: + return None + try: + tokens = shlex.split(command_str) + except ValueError: + return None + if not tokens: + return None + + # Look for --format json or --format=json anywhere in the tokens. + # Also recognise it when nested inside a make-style assignment such as + # BOOTSTRAP_ARGS='--format json' (which shlex collapses into a single + # token "BOOTSTRAP_ARGS=--format json"). + has_json_format = False + for i, tok in enumerate(tokens): + if tok == "--format=json": + has_json_format = True + break + if tok == "--format" and i + 1 < len(tokens) and tokens[i + 1] == "json": + has_json_format = True + break + # Make-style env assignments (e.g. BOOTSTRAP_ARGS=--format json, + # BOOTSTRAP_ARGS=--format=json, OPENCODE_ARGS=...). + if "=" in tok and ("--format json" in tok or "--format=json" in tok): + has_json_format = True + break + + # Direct script invocation path. + script_idx = -1 + for i, tok in enumerate(tokens): + if tok.endswith(_SANDBOX_BOOTSTRAP_SCRIPT) or tok.endswith("/" + _SANDBOX_BOOTSTRAP_SCRIPT): + script_idx = i + break + if script_idx >= 0 and has_json_format: + # Subcommand: first non-flag positional after the script path. + for j in range(script_idx + 1, len(tokens)): + t = tokens[j] + if t.startswith("-"): + # Skip --format json (two-token form). + if t == "--format" and j + 1 < len(tokens): + continue + continue + # A bare token after --format json may be the value of --format. + # Skip if previous token was --format (without =). + if j > 0 and tokens[j - 1] == "--format": + continue + if t in _SANDBOX_KNOWN_SUBCOMMANDS: + return t + return None + + # Make-target wrapper path. + # Accept env-prefixed forms too, e.g.: + # BOOTSTRAP_ARGS='--format json --keep-going' make sandbox-validate + make_idx = -1 + for i, tok in enumerate(tokens): + if tok == "make": + make_idx = i + break + if make_idx >= 0: + # Find the first sandbox-* target token after `make`. + for tok in tokens[make_idx + 1:]: + if tok in _SANDBOX_MAKE_TARGETS and has_json_format: + return _SANDBOX_MAKE_TARGETS[tok] + return None + + +def _sandbox_payload_matches(subcommand: str, payload: Any) -> bool: + """Cheap structural sniff so we don't render unrelated JSON as a + Sandbox panel. Returns False on obvious schema mismatch so the bash + renderer can take over.""" + if subcommand == "list": + return isinstance(payload, list) and (not payload or isinstance(payload[0], dict)) + if not isinstance(payload, dict): + return False + if subcommand == "inspect": + return any(k in payload for k in ("id", "display_name", "files")) + if subcommand == "detect": + return "candidates" in payload or "signals" in payload + if subcommand == "status": + return "sandbox_state" in payload or "phase2_gate_pass" in payload or "capabilities" in payload + if subcommand in ("apply", "regenerate"): + return any(k in payload for k in ("example", "files_to_write", "written_files", "status")) + if subcommand == "validate": + return "overall_outcome" in payload or "tiers" in payload + return False + + +def _sandbox_outcome_style(outcome: str) -> tuple[str, str]: + """Return (rich_style, glyph_key) for a tier outcome string.""" + if outcome == "passed": + return "green", "ok" + if outcome == "failed": + return "red", "fail" + if outcome == "skipped": + return "dim", "skip" + return "yellow", "warn" + + +def _sandbox_state_style(state_value: str) -> str: + if state_value == "generated": + return "green" + if state_value == "user-managed": + return "yellow" + if state_value == "missing": + return "red" + return "dim" + + +def _sandbox_last_validation_style(value: Optional[str]) -> str: + if value == "passed": + return "green" + if value == "mixed": + return "yellow" + if value == "failed": + return "red" + if value == "skipped": + return "yellow" + return "dim" + + +# --------------------------------------------------------------------------- +# Rich renderers (called when renderer.rich is True) +# --------------------------------------------------------------------------- + +def _render_sandbox_rich( + renderer: ToolRenderer, + subcommand: str, + payload: Any, + command: str, + description: str, + status: str, +) -> bool: + from rich.console import Group + from rich.panel import Panel + from rich.text import Text + + sink = renderer.context.sink + glyphs = _sandbox_glyphs(sink) + + # Default border = yellow (in flight) / green (completed); per-subcommand + # renderers may override based on payload contents (e.g. validate failed). + border = "yellow" if status != "completed" else "green" + + title = f"{glyphs['box']} Sandbox \u00b7 {subcommand}" + sections: list[Any] = [] + sections.append(Text(f"$ {command}", style="bold cyan")) + if description: + sections.append(Text(description, style="dim italic")) + sections.append(Text()) + + try: + if subcommand == "list": + border = _render_sandbox_list_rich(sections, payload, border) + elif subcommand == "inspect": + border = _render_sandbox_inspect_rich(sections, payload, border, glyphs, renderer) + elif subcommand == "detect": + border = _render_sandbox_detect_rich(sections, payload, border, glyphs, renderer) + elif subcommand == "status": + border = _render_sandbox_status_rich(sections, payload, border, glyphs) + elif subcommand in ("apply", "regenerate"): + border = _render_sandbox_apply_rich(sections, payload, subcommand, border, glyphs, renderer) + elif subcommand == "validate": + border = _render_sandbox_validate_rich(sections, payload, border, glyphs, renderer) + else: + return False + except (KeyError, TypeError, AttributeError): + return False + + sink.write(Panel(Group(*sections), title=title, border_style=border, expand=True)) + return True + + +def _render_sandbox_list_rich(sections: list[Any], payload: Any, border: str) -> str: + from rich.table import Table + from rich.text import Text + if not isinstance(payload, list): + raise TypeError("list subcommand expects a JSON array") + table = Table(show_header=True, header_style="bold cyan", expand=True, pad_edge=False) + table.add_column("id", style="bold cyan", no_wrap=True) + table.add_column("name") + table.add_column("languages", style="dim") + table.add_column("manifests", style="dim") + for ex in payload: + applies = ex.get("applies_when") or {} + langs = ", ".join(applies.get("languages") or []) or "-" + mans = ", ".join((applies.get("manifests") or [])[:4]) or "-" + if applies.get("manifests") and len(applies["manifests"]) > 4: + mans += " \u2026" + table.add_row(str(ex.get("id", "")), str(ex.get("display_name", "")), langs, mans) + sections.append(table) + sections.append(Text()) + sections.append(Text(f"{len(payload)} example(s) available", style="dim")) + return border + + +def _render_sandbox_inspect_rich( + sections: list[Any], payload: dict, border: str, glyphs: dict, renderer: ToolRenderer +) -> str: + from rich.text import Text + sections.append(Text(f"{payload.get('display_name', '')}", style="bold cyan")) + sections.append(Text(f" id: {payload.get('id', '')}", style="dim")) + sections.append(Text(f" path: {payload.get('path', '')}", style="dim")) + applies = payload.get("applies_when") or {} + if applies: + for k, v in applies.items(): + joined = ", ".join(v) if isinstance(v, list) else str(v) + sections.append(Text(f" applies_when.{k}: {joined}", style="dim")) + if payload.get("required_tools"): + sections.append(Text(f" required_tools: {', '.join(payload['required_tools'])}", style="dim")) + if payload.get("template_vars"): + sections.append(Text(f" template_vars: {', '.join(payload['template_vars'])}", style="dim")) + if payload.get("default_ports"): + sections.append(Text(f" default_ports: {', '.join(str(p) for p in payload['default_ports'])}", style="dim")) + if payload.get("build_command"): + sections.append(Text(f" build_command: {payload['build_command']}", style="dim")) + if payload.get("test_command"): + sections.append(Text(f" test_command: {payload['test_command']}", style="dim")) + if payload.get("caveats"): + sections.append(Text()) + sections.append(Text("Caveats:", style="bold yellow")) + for c in payload["caveats"]: + sections.append(Text(f" {glyphs['warn']} {c}", style="yellow")) + files = payload.get("files") or [] + if files: + sections.append(Text()) + cap = renderer.context.settings.sandbox_files_cap + sections.append(Text(f"Files ({len(files)}):", style="bold cyan")) + for f in files[:cap]: + sections.append(Text(f" {glyphs['bullet']} {f}")) + if len(files) > cap: + sections.append(Text(f" ... and {len(files) - cap} more", style="dim")) + return border + + +def _render_sandbox_detect_rich( + sections: list[Any], payload: dict, border: str, glyphs: dict, renderer: ToolRenderer +) -> str: + from rich.table import Table + from rich.text import Text + signals = payload.get("signals") or {} + sections.append(Text("Detection signals", style="bold cyan")) + sections.append(Text(f" source: {signals.get('source', '-')}", style="dim")) + sections.append(Text(f" languages: {', '.join(signals.get('languages') or []) or '-'}", style="dim")) + sections.append(Text(f" manifests: {', '.join(signals.get('manifests') or []) or '-'}", style="dim")) + sections.append(Text()) + + candidates = payload.get("candidates") or [] + sections.append(Text(f"Ranked candidates ({len(candidates)}):", style="bold cyan")) + table = Table(show_header=True, header_style="bold cyan", expand=True, pad_edge=False) + table.add_column("score", justify="right", no_wrap=True) + table.add_column("id", style="bold cyan", no_wrap=True) + table.add_column("name") + table.add_column("path", style="dim") + cap = renderer.context.settings.sandbox_files_cap + for c in candidates[:cap]: + score = c.get("score", 0) + score_style = "green" if score >= 5 else ("yellow" if score >= 1 else "dim") + table.add_row( + Text(str(score), style=score_style), + str(c.get("id", "")), + str(c.get("display_name", "")), + str(c.get("path", "")), + ) + sections.append(table) + if len(candidates) > cap: + sections.append(Text(f"... and {len(candidates) - cap} more", style="dim")) + return border + + +def _render_sandbox_status_rich( + sections: list[Any], payload: dict, border: str, glyphs: dict +) -> str: + from rich.table import Table + from rich.text import Text + state_value = str(payload.get("sandbox_state", "unknown")) + last_validation = payload.get("last_validation") + gate_pass = bool(payload.get("phase2_gate_pass")) + gate_reason = str(payload.get("phase2_gate_reason", "")) + + state_glyph = {"generated": glyphs["ok"], "user-managed": glyphs["warn"], "missing": glyphs["fail"]}.get(state_value, glyphs["info"]) + sections.append(Text.assemble( + ("state: ", "bold"), + (f"{state_glyph} {state_value}", _sandbox_state_style(state_value)), + )) + sections.append(Text(f" path: {payload.get('sandbox_path', '-')}", style="dim")) + sections.append(Text(f" provenance: {'yes' if payload.get('provenance_present') else 'no'}", style="dim")) + lv_text = last_validation if last_validation is not None else "-" + sections.append(Text.assemble( + (" last validation: ", "dim"), + (str(lv_text), _sandbox_last_validation_style(last_validation)), + )) + sections.append(Text(f" allow override: {'yes' if payload.get('allow_no_sandbox') else 'no'}", style="dim")) + sections.append(Text()) + + # Gate badge. + if gate_pass: + sections.append(Text.assemble( + (f"{glyphs['alarm']} ", ""), + (f"Phase 2 gate would PASS", "bold green"), + (f" \u2014 {gate_reason}", "dim"), + )) + else: + sections.append(Text.assemble( + (f"{glyphs['alarm']} ", ""), + (f"Phase 2 gate would BLOCK", "bold red"), + (f" \u2014 {gate_reason}", "dim"), + )) + border = "yellow" + + sections.append(Text()) + capabilities = payload.get("capabilities") or {} + if capabilities: + table = Table(show_header=True, header_style="bold cyan", expand=True, pad_edge=False) + table.add_column("capability", no_wrap=True) + table.add_column("status", no_wrap=True) + table.add_column("path", style="dim") + for name in (*_SANDBOX_REQUIRED_CAPABILITIES, *_SANDBOX_HELPER_CAPABILITIES): + cap = capabilities.get(name) + if cap is None: + continue + satisfied = bool(cap.get("satisfied")) + present = bool(cap.get("present")) + is_helper = name in _SANDBOX_HELPER_CAPABILITIES + if satisfied: + badge = Text(f"{glyphs['ok']} ok", style="green") + elif is_helper and not present: + badge = Text(f"{glyphs['skip']} optional", style="dim") + else: + badge = Text(f"{glyphs['fail']} missing", style="red") + table.add_row(name, badge, str(cap.get("path", ""))) + sections.append(table) + return border + + +def _render_sandbox_apply_rich( + sections: list[Any], payload: dict, subcommand: str, border: str, glyphs: dict, renderer: ToolRenderer +) -> str: + from rich.text import Text + apply_status = str(payload.get("status", "")) + is_dry = bool(payload.get("dry_run")) or apply_status == "dry-run" + chip_text = "DRY RUN" if is_dry else apply_status.upper() or "(unknown)" + chip_style = "yellow" if is_dry else ("green" if apply_status == "applied" else "dim") + sections.append(Text.assemble( + (f"{glyphs['box']} ", ""), + (f"{subcommand} ", "bold cyan"), + (f"{payload.get('example', '-')} ", "bold cyan"), + (f"[{chip_text}]", chip_style), + )) + sections.append(Text(f" example_path: {payload.get('example_path', '-')}", style="dim")) + sections.append(Text(f" sandbox_path: {payload.get('sandbox_path', '-')}", style="dim")) + sections.append(Text(f" force: {payload.get('force', False)}", style="dim")) + if payload.get("backup_dir"): + sections.append(Text(f" backup_dir: {payload['backup_dir']}", style="dim")) + + files_to_write = payload.get("files_to_write") or [] + written = payload.get("written_files") or [] + sections.append(Text()) + sections.append(Text( + f"files: planned={len(files_to_write)} written={len(written)}", + style="bold cyan", + )) + markers = payload.get("markers_provided") or {} + if markers: + sections.append(Text(f"markers_provided ({len(markers)}):", style="bold cyan")) + for k, v in markers.items(): + sections.append(Text(f" {k} = {v}", style="dim")) + unfilled = payload.get("markers_used_unfilled") or [] + if unfilled: + sections.append(Text()) + sections.append(Text.assemble( + (f"{glyphs['warn']} ", ""), + (f"Declared markers used but not provided: {', '.join(unfilled)}", "yellow"), + )) + border = "yellow" + undeclared = payload.get("markers_used_undeclared") or [] + if undeclared: + sections.append(Text.assemble( + (f"{glyphs['warn']} ", ""), + (f"Markers used but not declared: {', '.join(undeclared)}", "yellow"), + )) + border = "yellow" + + show_files = files_to_write or written + if show_files: + sections.append(Text()) + cap = renderer.context.settings.sandbox_files_cap + for f in show_files[:cap]: + sections.append(Text(f" {glyphs['bullet']} {f}")) + if len(show_files) > cap: + sections.append(Text(f" ... and {len(show_files) - cap} more", style="dim")) + + if apply_status == "applied" and not is_dry: + sections.append(Text()) + sections.append(Text.assemble( + (f"{glyphs['ok']} ", ""), + (f"Applied '{payload.get('example', '-')}'", "bold green"), + (f" \u2192 {payload.get('sandbox_path', '-')}", "dim"), + )) + if payload.get("provenance_path"): + sections.append(Text(f" provenance: {payload['provenance_path']}", style="dim")) + return border + + +def _render_sandbox_validate_rich( + sections: list[Any], payload: dict, border: str, glyphs: dict, renderer: ToolRenderer +) -> str: + from rich.table import Table + from rich.text import Text + overall = str(payload.get("overall_outcome", "unknown")) + overall_style, overall_glyph_key = _sandbox_outcome_style(overall) + + sections.append(Text.assemble( + (f"{glyphs['check']} ", ""), + ("overall: ", "bold"), + (f"{glyphs[overall_glyph_key]} {overall}", overall_style), + )) + + if overall == "failed": + border = "red" + elif overall == "passed": + border = "green" + else: + border = "yellow" + + stderr_cap = renderer.context.settings.sandbox_validate_stderr_lines + + tiers = payload.get("tiers") or [] + if tiers: + sections.append(Text()) + table = Table(show_header=True, header_style="bold cyan", expand=True, pad_edge=False) + table.add_column("tier", no_wrap=True) + table.add_column("purpose") + table.add_column("outcome", no_wrap=True) + table.add_column("dur", justify="right", no_wrap=True) + table.add_column("exit", justify="right", no_wrap=True) + for t in tiers: + t_outcome = str(t.get("outcome", "unknown")) + o_style, o_key = _sandbox_outcome_style(t_outcome) + badge = Text(f"{glyphs[o_key]} {t_outcome}", style=o_style) + dur = t.get("duration_seconds") + dur_str = f"{dur:.2f}s" if isinstance(dur, (int, float)) else "-" + exit_code = t.get("exit_code") + exit_str = "-" if exit_code is None else str(exit_code) + table.add_row( + str(t.get("tier", "")), + str(t.get("purpose", "")), + badge, + dur_str, + exit_str, + ) + sections.append(table) + + # For each failed tier, show a capped stderr_tail under it. + for t in tiers: + if t.get("outcome") != "failed": + continue + stderr_tail = str(t.get("stderr_tail") or "").strip() + if not stderr_tail: + continue + sections.append(Text()) + sections.append(Text( + f"{glyphs['fail']} {t.get('tier', '')} {t.get('purpose', '')} stderr (tail):", + style="bold red", + )) + tail_lines = stderr_tail.splitlines() + shown = tail_lines[-stderr_cap:] + for line in shown: + sections.append(Text(f" {line}", style="red")) + if len(tail_lines) > stderr_cap: + sections.append(Text( + f" ... ({len(tail_lines) - stderr_cap} earlier lines truncated; " + f"see tmp/last-phase-*.jsonl for full output)", + style="dim", + )) + + missing = payload.get("missing_helpers") or [] + if missing: + sections.append(Text()) + sections.append(Text.assemble( + (f"{glyphs['warn']} ", ""), + (f"Helper capabilities still missing: {', '.join(missing)}", "yellow"), + )) + + if payload.get("history_updated"): + sections.append(Text(f"{glyphs['info']} history updated in sandbox/CODECOME-GENERATED.md", style="dim")) + return border + + +# --------------------------------------------------------------------------- +# Plain renderers (called when renderer.rich is False) +# --------------------------------------------------------------------------- + +def _render_sandbox_plain( + renderer: ToolRenderer, + subcommand: str, + payload: Any, + command: str, + description: str, + status: str, +) -> bool: + import _colors as C + + sink = renderer.context.sink + glyphs = _sandbox_glyphs(sink) + sink.write_text(C.header(f"{glyphs['box']} Sandbox \u00b7 {subcommand}")) + sink.write_text(f" $ {command}") + if description: + sink.write_text(f" # {description}") + + try: + if subcommand == "list": + _render_sandbox_list_plain(payload, glyphs, sink) + elif subcommand == "inspect": + _render_sandbox_inspect_plain(payload, glyphs, sink, renderer) + elif subcommand == "detect": + _render_sandbox_detect_plain(payload, glyphs, sink, renderer) + elif subcommand == "status": + _render_sandbox_status_plain(payload, glyphs, sink) + elif subcommand in ("apply", "regenerate"): + _render_sandbox_apply_plain(payload, subcommand, glyphs, sink, renderer) + elif subcommand == "validate": + _render_sandbox_validate_plain(payload, glyphs, sink, renderer) + else: + return False + except (KeyError, TypeError, AttributeError): + return False + return True + + +def _render_sandbox_list_plain(payload: Any, glyphs: dict, sink) -> None: + if not isinstance(payload, list): + raise TypeError + for ex in payload: + applies = ex.get("applies_when") or {} + langs = ", ".join(applies.get("languages") or []) or "-" + sink.write_text(f" {glyphs['bullet']} {ex.get('id', ''):<20} {ex.get('display_name', '')} ({langs})") + sink.write_text(f" {len(payload)} example(s) available") + + +def _render_sandbox_inspect_plain(payload: dict, glyphs: dict, sink, renderer: ToolRenderer) -> None: + sink.write_text(f" id: {payload.get('id', '')}") + sink.write_text(f" name: {payload.get('display_name', '')}") + sink.write_text(f" path: {payload.get('path', '')}") + applies = payload.get("applies_when") or {} + for k, v in applies.items(): + joined = ", ".join(v) if isinstance(v, list) else str(v) + sink.write_text(f" applies_when.{k}: {joined}") + if payload.get("required_tools"): + sink.write_text(f" required_tools: {', '.join(payload['required_tools'])}") + if payload.get("template_vars"): + sink.write_text(f" template_vars: {', '.join(payload['template_vars'])}") + if payload.get("default_ports"): + sink.write_text(f" default_ports: {', '.join(str(p) for p in payload['default_ports'])}") + if payload.get("build_command"): + sink.write_text(f" build_command: {payload['build_command']}") + if payload.get("test_command"): + sink.write_text(f" test_command: {payload['test_command']}") + if payload.get("caveats"): + sink.write_text(" Caveats:") + for c in payload["caveats"]: + sink.write_text(f" {glyphs['warn']} {c}") + files = payload.get("files") or [] + if files: + cap = renderer.context.settings.sandbox_files_cap + sink.write_text(f" Files ({len(files)}):") + for f in files[:cap]: + sink.write_text(f" {glyphs['bullet']} {f}") + if len(files) > cap: + sink.write_text(f" ... and {len(files) - cap} more") + + +def _render_sandbox_detect_plain(payload: dict, glyphs: dict, sink, renderer: ToolRenderer) -> None: + signals = payload.get("signals") or {} + sink.write_text(" signals:") + sink.write_text(f" source: {signals.get('source', '-')}") + sink.write_text(f" languages: {', '.join(signals.get('languages') or []) or '-'}") + sink.write_text(f" manifests: {', '.join(signals.get('manifests') or []) or '-'}") + candidates = payload.get("candidates") or [] + sink.write_text(f" candidates ({len(candidates)}):") + cap = renderer.context.settings.sandbox_files_cap + for c in candidates[:cap]: + sink.write_text(f" score={c.get('score', 0):>2} {c.get('id', ''):<20} {c.get('display_name', '')}") + if len(candidates) > cap: + sink.write_text(f" ... and {len(candidates) - cap} more") + + +def _render_sandbox_status_plain(payload: dict, glyphs: dict, sink) -> None: + import _colors as C + + state_value = str(payload.get("sandbox_state", "unknown")) + last_validation = payload.get("last_validation") + gate_pass = bool(payload.get("phase2_gate_pass")) + gate_reason = str(payload.get("phase2_gate_reason", "")) + + sink.write_text(f" state: {state_value}") + sink.write_text(f" path: {payload.get('sandbox_path', '-')}") + sink.write_text(f" provenance: {'yes' if payload.get('provenance_present') else 'no'}") + sink.write_text(f" last validation: {last_validation if last_validation is not None else '-'}") + sink.write_text(f" allow override: {'yes' if payload.get('allow_no_sandbox') else 'no'}") + if gate_pass: + sink.write_text(C.ok(f" {glyphs['alarm']} Phase 2 gate would PASS \u2014 {gate_reason}")) + else: + sink.write_text(C.warn(f" {glyphs['alarm']} Phase 2 gate would BLOCK \u2014 {gate_reason}")) + + capabilities = payload.get("capabilities") or {} + if capabilities: + sink.write_text(" capabilities:") + for name in (*_SANDBOX_REQUIRED_CAPABILITIES, *_SANDBOX_HELPER_CAPABILITIES): + cap = capabilities.get(name) + if cap is None: + continue + satisfied = bool(cap.get("satisfied")) + present = bool(cap.get("present")) + is_helper = name in _SANDBOX_HELPER_CAPABILITIES + if satisfied: + marker = f"{glyphs['ok']} ok" + elif is_helper and not present: + marker = f"{glyphs['skip']} optional" + else: + marker = f"{glyphs['fail']} missing" + sink.write_text(f" {name:<14} {marker:<14} {cap.get('path', '')}") + + +def _render_sandbox_apply_plain(payload: dict, subcommand: str, glyphs: dict, sink, renderer: ToolRenderer) -> None: + import _colors as C + + apply_status = str(payload.get("status", "")) + is_dry = bool(payload.get("dry_run")) or apply_status == "dry-run" + chip_text = "DRY RUN" if is_dry else apply_status.upper() or "(unknown)" + sink.write_text(f" {glyphs['box']} {subcommand} {payload.get('example', '-')} [{chip_text}]") + sink.write_text(f" example_path: {payload.get('example_path', '-')}") + sink.write_text(f" sandbox_path: {payload.get('sandbox_path', '-')}") + sink.write_text(f" force: {payload.get('force', False)}") + if payload.get("backup_dir"): + sink.write_text(f" backup_dir: {payload['backup_dir']}") + files_to_write = payload.get("files_to_write") or [] + written = payload.get("written_files") or [] + sink.write_text(f" files: planned={len(files_to_write)} written={len(written)}") + markers = payload.get("markers_provided") or {} + if markers: + sink.write_text(f" markers_provided ({len(markers)}):") + for k, v in markers.items(): + sink.write_text(f" {k} = {v}") + unfilled = payload.get("markers_used_unfilled") or [] + if unfilled: + sink.write_text(C.warn(f" {glyphs['warn']} Declared markers used but not provided: {', '.join(unfilled)}")) + undeclared = payload.get("markers_used_undeclared") or [] + if undeclared: + sink.write_text(C.warn(f" {glyphs['warn']} Markers used but not declared: {', '.join(undeclared)}")) + show_files = files_to_write or written + if show_files: + cap = renderer.context.settings.sandbox_files_cap + for f in show_files[:cap]: + sink.write_text(f" {glyphs['bullet']} {f}") + if len(show_files) > cap: + sink.write_text(f" ... and {len(show_files) - cap} more") + if apply_status == "applied" and not is_dry: + sink.write_text(C.ok(f" {glyphs['ok']} Applied '{payload.get('example', '-')}'")) + if payload.get("provenance_path"): + sink.write_text(f" provenance: {payload['provenance_path']}") + + +def _render_sandbox_validate_plain(payload: dict, glyphs: dict, sink, renderer: ToolRenderer) -> None: + import _colors as C + + overall = str(payload.get("overall_outcome", "unknown")) + overall_glyph = glyphs["ok"] if overall == "passed" else glyphs["fail"] if overall == "failed" else glyphs["warn"] + sink.write_text(f" {glyphs['check']} overall: {overall_glyph} {overall}") + + stderr_cap = renderer.context.settings.sandbox_validate_stderr_lines + + tiers = payload.get("tiers") or [] + for t in tiers: + t_outcome = str(t.get("outcome", "unknown")) + o_glyph = glyphs["ok"] if t_outcome == "passed" else glyphs["fail"] if t_outcome == "failed" else glyphs["skip"] + dur = t.get("duration_seconds") + dur_str = f"{dur:.2f}s" if isinstance(dur, (int, float)) else "-" + exit_code = t.get("exit_code") + exit_str = "-" if exit_code is None else str(exit_code) + sink.write_text(f" {t.get('tier', ''):<3} {str(t.get('purpose', '')):<20} " + f"{o_glyph} {t_outcome:<8} dur={dur_str:<7} exit={exit_str}") + if t_outcome == "failed": + stderr_tail = str(t.get("stderr_tail") or "").strip() + if stderr_tail: + tail_lines = stderr_tail.splitlines() + shown = tail_lines[-stderr_cap:] + for line in shown: + sink.write_text(f" | {line}") + if len(tail_lines) > stderr_cap: + sink.write_text(f" | ... ({len(tail_lines) - stderr_cap} earlier lines truncated)") + missing = payload.get("missing_helpers") or [] + if missing: + sink.write_text(C.warn(f" {glyphs['warn']} Helper capabilities still missing: {', '.join(missing)}")) + if payload.get("history_updated"): + sink.write_text(f" {glyphs['info']} history updated in sandbox/CODECOME-GENERATED.md") + + +# --------------------------------------------------------------------------- +# Interceptor class +# --------------------------------------------------------------------------- + +class SandboxBootstrapInterceptor: + """Interceptor that renders sandbox-bootstrap.py --format json output + as a structured Sandbox panel.""" + + name = "sandbox_bootstrap" + + def try_render( + self, + command: str, + state: dict[str, Any], + renderer: ToolRenderer, + ) -> bool: + if not renderer.context.settings.sandbox_render: + return False + + inp = state.get("input") + output = state.get("output") + if not isinstance(inp, dict): + return False + + command_str = str(inp.get("command", "")) + subcommand = _is_sandbox_bootstrap_json_call(command_str) + if subcommand is None: + return False + + output_str = str(output) if output is not None else "" + stripped = output_str.strip() + if not stripped: + return False + + # Only proceed when output parses as a single JSON document. + # make commands often echo the invocation line, so try to find + # the first JSON-like delimiter if a strict parse fails. + try: + payload = json.loads(stripped) + except (ValueError, TypeError): + first_brace = stripped.find("{") + first_bracket = stripped.find("[") + idxs = [i for i in (first_brace, first_bracket) if i >= 0] + if not idxs: + return False + start_idx = min(idxs) + try: + payload = json.loads(stripped[start_idx:]) + except (ValueError, TypeError): + return False + + if not _sandbox_payload_matches(subcommand, payload): + return False + + description = str(inp.get("description", "")).strip() + status = str(state.get("status", "")) + + if renderer.rich: + return _render_sandbox_rich( + renderer, subcommand, payload, command_str, description, status + ) + return _render_sandbox_plain( + renderer, subcommand, payload, command_str, description, status + ) diff --git a/tools/rendering/tools/interceptors/shell_listing.py b/tools/rendering/tools/interceptors/shell_listing.py new file mode 100644 index 0000000..9dd8f0c --- /dev/null +++ b/tools/rendering/tools/interceptors/shell_listing.py @@ -0,0 +1,206 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +ShellListingInterceptor — re-routes ls / find / tree bash commands +through the GlobRenderer so the user sees a Glob panel instead of +a generic Bash panel. +""" + +from __future__ import annotations + +import re +from typing import Any, Optional + +from rendering.tools.base import ToolRenderer +from rendering.tools.interceptors.base import CommandExecutionInterceptor + +# --------------------------------------------------------------------------- +# Regexes +# --------------------------------------------------------------------------- + +_LS_LONG_FORMAT_RE = re.compile( + # Permissions (10-12 chars incl. trailing @/+ or "."), link count, + # user, group, size, then 2 or 3 date fields (Mon DD [YYYY|HH:MM]), + # then the filename. + r"^[\-dlbcps][rwxstST\-@\+\.]{9,11}" + r"\s+\d+\s+\S+\s+\S+\s+\d+" + r"\s+\S+\s+\S+(?:\s+\S+)?" + r"\s+(?P.+)$" +) + + +# --------------------------------------------------------------------------- +# ls long-format normaliser +# --------------------------------------------------------------------------- + +def _strip_ls_long_format_to_filenames(text: str) -> str: + """Strip `ls -l` long-format columns down to just the filename. + Lines that don't look like long-format are kept as-is. The `total N` + header line is removed.""" + out: list[str] = [] + for line in text.split("\n"): + if not line.strip(): + continue + if line.startswith("total ") and line[6:].strip().isdigit(): + continue + m = _LS_LONG_FORMAT_RE.match(line) + if m: + out.append(m.group("name").strip()) + else: + # Keep non-matching lines (might be paths separating directories + # in a multi-arg ls call). + out.append(line.rstrip()) + return "\n".join(out) + + +# --------------------------------------------------------------------------- +# Parsers (called by _is_bash_shim_call in rtk_read) +# --------------------------------------------------------------------------- + +def _parse_ls(rest: list[str], raw: str) -> Optional[Any]: + """Parse `ls [args]`. Detect -l / -la for long format.""" + from .rtk_read import _BashShim # noqa: E402 + + long_format = False + paths: list[str] = [] + for tok in rest: + if tok.startswith("-") and tok != "-": + if "l" in tok[1:]: + long_format = True + continue + paths.append(tok) + path = paths[0] if paths else "." + return _BashShim( + family="ls", + files=[], + pattern="", + path=path, + long_format=long_format, + head_limit=None, + tail_limit=None, + rtk_filtered=False, + raw_command=raw, + ) + + +def _parse_find_tree(verb: str, rest: list[str], raw: str) -> Optional[Any]: + """Parse `find PATH [args]` or `tree [PATH]`. Output is a list of paths. + + Extracts ``-name`` / ``-iname`` filters into *pattern* so the Glob + panel header shows the actual search expression rather than the bare + verb. + """ + from .rtk_read import _BashShim # noqa: E402 + + path: str = "" + name_filter: str = "" + # Flags whose next token is a value (not a path). + _FIND_VALUE_FLAGS = { + "-name", "-iname", "-path", "-ipath", "-regex", "-iregex", + "-type", "-maxdepth", "-mindepth", "-perm", "-user", "-group", + "-newer", "-size", "-amin", "-atime", "-cmin", "-ctime", + "-mmin", "-mtime", "-printf", "-fprintf", "-fls", + } + i = 0 + while i < len(rest): + tok = rest[i] + if tok in _FIND_VALUE_FLAGS: + # Consume the value token. + if i + 1 < len(rest): + val = rest[i + 1] + if tok in ("-name", "-iname"): + name_filter = val + i += 2 + continue + i += 1 + continue + if tok.startswith("-") and tok != "-": + # Other flags without values (e.g. -print, -delete). + i += 1 + continue + # First non-flag, non-value token is the path. + if not path: + path = tok + i += 1 + if not path: + path = "." + pattern = name_filter if name_filter else verb + return _BashShim( + family="find", + files=[], + pattern=pattern, + path=path, + long_format=False, + head_limit=None, + tail_limit=None, + rtk_filtered=False, + raw_command=raw, + ) + + +# --------------------------------------------------------------------------- +# ls / find shim renderer +# --------------------------------------------------------------------------- + +def _render_shim_ls(renderer: ToolRenderer, state: dict[str, Any], shim: Any) -> bool: + """Strip long-format if needed and delegate to GlobRenderer.""" + from .rtk_read import _BashShim # noqa: E402 + from rendering.tools.glob import GlobRenderer + + shim_cast: _BashShim = shim + settings = renderer.context.settings + + raw_output = str(state.get("output") or "") + if shim_cast.long_format and settings.bash_shim_ls_strip_long_format: + body = _strip_ls_long_format_to_filenames(raw_output) + else: + body = raw_output + pattern_label = "ls" if shim_cast.family == "ls" else shim_cast.pattern + syn_state = { + "input": {"pattern": pattern_label, "path": shim_cast.path}, + "output": body, + "status": str(state.get("status", "")), + } + + glob_renderer = GlobRenderer(renderer.context) + return glob_renderer.render("glob", syn_state) + + +# --------------------------------------------------------------------------- +# Interceptor class +# --------------------------------------------------------------------------- + +class ShellListingInterceptor: + """Interceptor that re-routes ls / find / tree bash commands + through the GlobRenderer.""" + + name = "shell_listing" + + def try_render( + self, + command: str, + state: dict[str, Any], + renderer: ToolRenderer, + ) -> bool: + if not renderer.context.settings.bash_shim_render: + return False + + inp = state.get("input") + if not isinstance(inp, dict): + return False + + command_str = str(inp.get("command", "")) + from .rtk_read import _is_bash_shim_call # noqa: E402 + shim = _is_bash_shim_call(command_str) + if shim is None: + return False + + output = state.get("output") + if not isinstance(output, str): + return False + + if shim.family not in ("ls", "find"): + return False + + return _render_shim_ls(renderer, state, shim) diff --git a/tools/run-agent.py b/tools/run-agent.py index 24a3812..1981e6a 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -3306,18 +3306,6 @@ def _dispatch_tool_renderer(console: Console, tool: str, state: dict[str, Any]) return GrepRenderer(_get_rendering_ctx(console)).render(tool_lower, state) elif tool_lower == "bash": _cache_invalidate_stale() - # Try the sandbox-bootstrap sub-renderer first; it handles bash - # invocations of `tools/sandbox-bootstrap.py --format json …` and - # `make sandbox-* BOOTSTRAP_ARGS='--format json'` with structured - # styling. Falls through to the generic bash renderer otherwise. - if _maybe_render_sandbox_bootstrap(console if HAVE_RICH else None, state): - return True - # Then try the bash-shim sub-renderer: detects `rtk read`, - # `rtk grep`, `rg`, `rtk ls` / `ls`, `cat`, `head`, `tail`, - # `find`, `tree` and routes them through the Read / Grep / - # Glob renderers as if the agent had used the native tool. - if _maybe_render_bash_shim(console if HAVE_RICH else None, state): - return True from rendering.tools.command import CommandRenderer return CommandRenderer(_get_rendering_ctx(console)).render(tool_lower, state) elif tool_lower == "skill": From 2a15ce498ad302afcae3b54994c99000d1e03b48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 24 May 2026 22:44:47 +0200 Subject: [PATCH 14/65] fix(rendering): pass end parameter in RichConsoleSink.write_text() --- tools/rendering/sink.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/rendering/sink.py b/tools/rendering/sink.py index fd5dc49..fe86c54 100644 --- a/tools/rendering/sink.py +++ b/tools/rendering/sink.py @@ -79,7 +79,7 @@ def write(self, renderable: Any, *, expand: bool = True) -> None: self._console.print(renderable, overflow="ignore", crop=False) def write_text(self, text: str, *, end: str = "\n") -> None: - self._console.print(text, overflow="ignore", crop=False) + self._console.print(text, overflow="ignore", crop=False, end=end) class TextualRichLogSink: From a7f49be5ac532f1777d1db315c595e5ca26f3fe9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 24 May 2026 22:44:53 +0200 Subject: [PATCH 15/65] test(renderers): add unit tests for read/write/edit/apply_patch/glob/grep/command and interceptors --- tests/test_rendering_tools.py | 663 ++++++++++++++++++++++++++++++++++ 1 file changed, 663 insertions(+) diff --git a/tests/test_rendering_tools.py b/tests/test_rendering_tools.py index 928e587..c610a00 100644 --- a/tests/test_rendering_tools.py +++ b/tests/test_rendering_tools.py @@ -17,6 +17,17 @@ from rendering.tools.task import TaskRenderer from rendering.tools.skill import SkillRenderer from rendering.tools.permissions import PermissionErrorRenderer +from rendering.tools.read import ReadRenderer +from rendering.tools.write import WriteRenderer +from rendering.tools.edit import EditRenderer +from rendering.tools.apply_patch import ApplyPatchRenderer +from rendering.tools.glob import GlobRenderer +from rendering.tools.grep import GrepRenderer +from rendering.tools.command import CommandRenderer +from rendering.tools.interceptors.sandbox_bootstrap import SandboxBootstrapInterceptor +from rendering.tools.interceptors.rtk_read import RtkReadInterceptor +from rendering.tools.interceptors.rtk_grep import RtkGrepInterceptor +from rendering.tools.interceptors.shell_listing import ShellListingInterceptor def _ctx(sink_mode="plain"): @@ -214,3 +225,655 @@ def test_renders_permission_error_rich(self): r = PermissionErrorRenderer(_ctx("rich")) r.render_message("tool permission rejected: bash") # Should not raise + + +# —————————————————————————————————————————————— +# ReadRenderer +# —————————————————————————————————————————————— + +class TestReadRenderer: + def _framed(self, path, content, kind="file"): + return f"{path}\n{kind}\n\n{content}\n" + + def test_renders_file_plain(self, capsys): + r = ReadRenderer(_ctx("plain")) + state = { + "input": {"filePath": "/fake/src/main.py"}, + "output": self._framed("src/main.py", "print('hello')"), + "status": "completed", + } + assert r.render("read", state) is True + out = capsys.readouterr().out + assert "main.py" in out + assert "print" in out + + def test_renders_file_rich(self): + r = ReadRenderer(_ctx("rich")) + state = { + "input": {"filePath": "/fake/src/main.py"}, + "output": self._framed("src/main.py", "print('hello')"), + "status": "completed", + } + assert r.render("read", state) is True + + def test_renders_directory_plain(self, capsys): + r = ReadRenderer(_ctx("plain")) + state = { + "input": {"filePath": "/fake/src"}, + "output": ( + "src\n" + "directory\n" + "\n" + "main.py\n" + "utils/\n" + "(2 entries total)\n" + "" + ), + "status": "completed", + } + assert r.render("read", state) is True + out = capsys.readouterr().out + assert "main.py" in out + assert "utils/" in out + + def test_renders_error_output_plain(self, capsys): + r = ReadRenderer(_ctx("plain")) + state = { + "input": {"filePath": "/fake/missing.txt"}, + "output": "Error: no such file or directory", + "status": "error", + } + assert r.render("read", state) is True + out = capsys.readouterr().out + assert "missing.txt" in out + + def test_returns_false_for_missing_file_path(self): + r = ReadRenderer(_ctx("plain")) + assert r.render("read", {"input": {"filePath": ""}, "output": "x"}) is False + + def test_returns_false_for_non_dict_input(self): + r = ReadRenderer(_ctx("plain")) + assert r.render("read", {"input": "not a dict", "output": "x"}) is False + + def test_suppresses_internal_read(self, capsys): + r = ReadRenderer(_ctx("plain")) + state = { + "input": {"filePath": "/fake/AGENTS.md"}, + "output": self._framed("AGENTS.md", "# Agents"), + "status": "completed", + } + assert r.render("read", state) is True + out = capsys.readouterr().out + assert "workspace doc" in out + + +# —————————————————————————————————————————————— +# WriteRenderer +# —————————————————————————————————————————————— + +class TestWriteRenderer: + def test_renders_new_file_plain(self, capsys): + r = WriteRenderer(_ctx("plain")) + state = { + "input": {"filePath": "/fake/new.txt", "content": "hello world\n"}, + "output": "Wrote file new.txt", + "status": "completed", + } + assert r.render("write", state) is True + out = capsys.readouterr().out + assert "new.txt" in out + assert "new file" in out.lower() + assert "hello world" in out + + def test_renders_new_file_rich(self): + r = WriteRenderer(_ctx("rich")) + state = { + "input": {"filePath": "/fake/new.txt", "content": "hello world\n"}, + "output": "Wrote file new.txt", + "status": "completed", + } + assert r.render("write", state) is True + + def test_renders_diff_plain(self, capsys, tmp_path): + existing = tmp_path / "existing.txt" + existing.write_text("old line\n") + ctx = _ctx("plain") + ctx.cache.set(str(existing), "old line\n") + r = WriteRenderer(ctx) + state = { + "input": {"filePath": str(existing), "content": "new line\n"}, + "output": "Wrote file existing.txt", + "status": "completed", + } + assert r.render("write", state) is True + out = capsys.readouterr().out + assert "diff:" in out + assert "-" in out + assert "+" in out + + def test_renders_error_plain(self, capsys): + r = WriteRenderer(_ctx("plain")) + state = { + "input": {"filePath": "/fake/bad.txt", "content": "x"}, + "output": "Permission denied", + "status": "error", + } + assert r.render("write", state) is True + out = capsys.readouterr().out + assert "Permission denied" in out + + def test_returns_false_for_missing_file_path(self): + r = WriteRenderer(_ctx("plain")) + assert r.render("write", {"input": {"filePath": ""}, "output": "x"}) is False + + def test_returns_false_for_non_dict_input(self): + r = WriteRenderer(_ctx("plain")) + assert r.render("write", {"input": "not a dict"}) is False + + +# —————————————————————————————————————————————— +# EditRenderer +# —————————————————————————————————————————————— + +class TestEditRenderer: + def test_renders_edit_plain(self, capsys): + r = EditRenderer(_ctx("plain")) + state = { + "input": { + "filePath": "/fake/file.py", + "oldString": "old line\n", + "newString": "new line\n", + }, + "output": "1 occurrence replaced successfully", + "status": "completed", + } + assert r.render("edit", state) is True + out = capsys.readouterr().out + assert "file.py" in out + assert "diff:" in out + assert "-" in out + assert "+" in out + + def test_renders_edit_rich(self): + r = EditRenderer(_ctx("rich")) + state = { + "input": { + "filePath": "/fake/file.py", + "oldString": "old line\n", + "newString": "new line\n", + }, + "output": "1 occurrence replaced successfully", + "status": "completed", + } + assert r.render("edit", state) is True + + def test_renders_replace_all_plain(self, capsys): + r = EditRenderer(_ctx("plain")) + state = { + "input": { + "filePath": "/fake/file.py", + "oldString": "old\n", + "newString": "new\n", + "replaceAll": True, + }, + "output": "3 occurrences replaced successfully", + "status": "completed", + } + assert r.render("edit", state) is True + out = capsys.readouterr().out + assert "replace all" in out + + def test_renders_error_plain(self, capsys): + r = EditRenderer(_ctx("plain")) + state = { + "input": { + "filePath": "/fake/file.py", + "oldString": "old\n", + "newString": "new\n", + }, + "output": "Error: oldString not found", + "status": "error", + } + assert r.render("edit", state) is True + out = capsys.readouterr().out + assert "Error" in out + + def test_returns_false_for_missing_params(self): + r = EditRenderer(_ctx("plain")) + assert r.render("edit", {"input": {"filePath": "/fake/f.py"}, "output": "x"}) is False + assert r.render("edit", {"input": {"filePath": "", "oldString": "a", "newString": "b"}}) is False + + def test_returns_false_for_non_dict_input(self): + r = EditRenderer(_ctx("plain")) + assert r.render("edit", {"input": "not a dict"}) is False + + +# —————————————————————————————————————————————— +# ApplyPatchRenderer +# —————————————————————————————————————————————— + +class TestApplyPatchRenderer: + def test_renders_envelope_patch_plain(self, capsys): + r = ApplyPatchRenderer(_ctx("plain")) + patch_text = ( + "*** Begin Patch\n" + "*** Update File: file.py\n" + "-old line\n" + "+new line\n" + "*** End Patch\n" + ) + state = { + "input": {"patchText": patch_text}, + "output": "Applied patch successfully", + "status": "completed", + } + assert r.render("apply_patch", state) is True + out = capsys.readouterr().out + assert "apply_patch" in out + assert "file.py" in out + + def test_renders_envelope_patch_rich(self): + r = ApplyPatchRenderer(_ctx("rich")) + patch_text = ( + "*** Begin Patch\n" + "*** Update File: file.py\n" + "-old line\n" + "+new line\n" + "*** End Patch\n" + ) + state = { + "input": {"patchText": patch_text}, + "output": "Applied patch successfully", + "status": "completed", + } + assert r.render("apply_patch", state) is True + + def test_renders_raw_diff_plain(self, capsys): + r = ApplyPatchRenderer(_ctx("plain")) + patch_text = ( + "--- a/file.py\n" + "+++ b/file.py\n" + "@@ -1 +1 @@\n" + "-old\n" + "+new\n" + ) + state = { + "input": {"patchText": patch_text}, + "output": "Applied patch successfully", + "status": "completed", + } + assert r.render("apply_patch", state) is True + out = capsys.readouterr().out + assert "apply_patch" in out + + def test_renders_json_patches_plain(self, capsys): + r = ApplyPatchRenderer(_ctx("plain")) + state = { + "input": { + "patches": [ + {"path": "file.py", "diff": "-old\n+new\n"}, + ] + }, + "output": "Applied patch successfully", + "status": "completed", + } + assert r.render("apply_patch", state) is True + out = capsys.readouterr().out + assert "file.py" in out + + def test_renders_error_plain(self, capsys): + r = ApplyPatchRenderer(_ctx("plain")) + state = { + "input": {"patchText": "-old\n+new\n"}, + "output": "Error: patch failed", + "status": "error", + } + assert r.render("apply_patch", state) is True + out = capsys.readouterr().out + assert "Error" in out + + def test_returns_false_for_empty_input(self): + r = ApplyPatchRenderer(_ctx("plain")) + assert r.render("apply_patch", {"input": {}, "output": ""}) is False + + +# —————————————————————————————————————————————— +# GlobRenderer +# —————————————————————————————————————————————— + +class TestGlobRenderer: + def test_renders_matches_plain(self, capsys): + r = GlobRenderer(_ctx("plain")) + state = { + "input": {"pattern": "*.py", "path": "/fake/src"}, + "output": "src/main.py\nsrc/utils.py\n2 matches for *.py", + "status": "completed", + } + assert r.render("glob", state) is True + out = capsys.readouterr().out + assert "*.py" in out + assert "main.py" in out + assert "utils.py" in out + assert "2 match" in out + + def test_renders_matches_rich(self): + r = GlobRenderer(_ctx("rich")) + state = { + "input": {"pattern": "*.py", "path": "/fake/src"}, + "output": "src/main.py\nsrc/utils.py\n2 matches for *.py", + "status": "completed", + } + assert r.render("glob", state) is True + + def test_renders_no_matches_plain(self, capsys): + r = GlobRenderer(_ctx("plain")) + state = { + "input": {"pattern": "*.xyz", "path": "/fake/src"}, + "output": "No matches found", + "status": "completed", + } + assert r.render("glob", state) is True + out = capsys.readouterr().out + assert "*.xyz" in out + assert "no matches" in out.lower() + + def test_returns_false_for_non_dict_input(self): + r = GlobRenderer(_ctx("plain")) + assert r.render("glob", {"input": "not a dict", "output": "x"}) is False + + def test_returns_false_for_non_str_output(self): + r = GlobRenderer(_ctx("plain")) + assert r.render("glob", {"input": {"pattern": "*.py"}, "output": 123}) is False + + +# —————————————————————————————————————————————— +# GrepRenderer +# —————————————————————————————————————————————— + +class TestGrepRenderer: + def test_renders_file_matches_plain(self, capsys): + r = GrepRenderer(_ctx("plain")) + state = { + "input": {"pattern": "def ", "path": "/fake/src"}, + "output": "src/main.py:1:def main():\nsrc/utils.py:5:def helper():", + "status": "completed", + } + assert r.render("grep", state) is True + out = capsys.readouterr().out + assert "def " in out + assert "main.py" in out + assert "utils.py" in out + + def test_renders_file_matches_rich(self): + r = GrepRenderer(_ctx("rich")) + state = { + "input": {"pattern": "def ", "path": "/fake/src"}, + "output": "src/main.py:1:def main():\nsrc/utils.py:5:def helper():", + "status": "completed", + } + assert r.render("grep", state) is True + + def test_renders_line_matches_plain(self, capsys): + r = GrepRenderer(_ctx("plain")) + state = { + "input": {"pattern": "foo", "path": "/fake/src"}, + "output": "src/main.py:10: foo = 1\nsrc/main.py:20: bar(foo)", + "status": "completed", + } + assert r.render("grep", state) is True + out = capsys.readouterr().out + assert "main.py" in out + assert "foo" in out + + def test_renders_no_matches_plain(self, capsys): + r = GrepRenderer(_ctx("plain")) + state = { + "input": {"pattern": "xyz123", "path": "/fake/src"}, + "output": "", + "status": "completed", + } + assert r.render("grep", state) is True + out = capsys.readouterr().out + assert "xyz123" in out + assert "no matches" in out.lower() + + def test_renders_error_plain(self, capsys): + r = GrepRenderer(_ctx("plain")) + state = { + "input": {"pattern": "foo", "path": "/fake/src"}, + "output": "Error: invalid regex", + "status": "error", + } + assert r.render("grep", state) is True + out = capsys.readouterr().out + assert "Error" in out + + def test_returns_false_for_non_dict_input(self): + r = GrepRenderer(_ctx("plain")) + assert r.render("grep", {"input": "not a dict", "output": "x"}) is False + + def test_returns_false_for_non_str_non_dict_output(self): + r = GrepRenderer(_ctx("plain")) + assert r.render("grep", {"input": {"pattern": "foo"}, "output": 123}) is False + + +# —————————————————————————————————————————————— +# CommandRenderer +# —————————————————————————————————————————————— + +class TestCommandRenderer: + def test_renders_generic_bash_plain(self, capsys): + r = CommandRenderer(_ctx("plain")) + state = { + "input": {"command": "echo hello", "description": "Say hello"}, + "output": "hello", + "status": "completed", + } + assert r.render("bash", state) is True + out = capsys.readouterr().out + assert "echo hello" in out + assert "hello" in out + + def test_renders_generic_bash_rich(self): + r = CommandRenderer(_ctx("rich")) + state = { + "input": {"command": "echo hello", "description": "Say hello"}, + "output": "hello", + "status": "completed", + } + assert r.render("bash", state) is True + + def test_renders_no_output_plain(self, capsys): + r = CommandRenderer(_ctx("plain")) + state = { + "input": {"command": "true"}, + "output": "", + "status": "completed", + } + assert r.render("bash", state) is True + out = capsys.readouterr().out + assert "true" in out + assert "no output" in out.lower() + + def test_renders_error_output_plain(self, capsys): + r = CommandRenderer(_ctx("plain")) + state = { + "input": {"command": "false"}, + "output": "command not found", + "status": "error", + } + assert r.render("bash", state) is True + out = capsys.readouterr().out + assert "false" in out + + def test_returns_false_for_empty_command(self): + r = CommandRenderer(_ctx("plain")) + assert r.render("bash", {"input": {"command": ""}, "output": "x"}) is False + + def test_returns_false_for_non_dict_input(self): + r = CommandRenderer(_ctx("plain")) + assert r.render("bash", {"input": "not a dict"}) is False + + +# —————————————————————————————————————————————— +# Interceptors +# —————————————————————————————————————————————— + +class TestSandboxBootstrapInterceptor: + def test_try_render_detects_list_command(self, capsys): + interceptor = SandboxBootstrapInterceptor() + renderer = CommandRenderer(_ctx("plain")) + state = { + "input": {"command": "python tools/sandbox-bootstrap.py --format json list"}, + "output": '[{"id": "py", "display_name": "Python"}]', + "status": "completed", + } + assert interceptor.try_render("bash", state, renderer) is True + out = capsys.readouterr().out + assert "Sandbox" in out + assert "Python" in out + + def test_try_render_skips_non_sandbox_command(self): + interceptor = SandboxBootstrapInterceptor() + renderer = CommandRenderer(_ctx("plain")) + state = { + "input": {"command": "echo hello"}, + "output": "hello", + "status": "completed", + } + assert interceptor.try_render("bash", state, renderer) is False + + def test_try_render_skips_when_disabled(self): + ctx = _ctx("plain") + ctx.settings.sandbox_render = False + interceptor = SandboxBootstrapInterceptor() + renderer = CommandRenderer(ctx) + state = { + "input": {"command": "python tools/sandbox-bootstrap.py --format json list"}, + "output": "[]", + "status": "completed", + } + assert interceptor.try_render("bash", state, renderer) is False + + +class TestRtkReadInterceptor: + def test_try_render_routes_cat_command(self, capsys): + interceptor = RtkReadInterceptor() + renderer = CommandRenderer(_ctx("plain")) + state = { + "input": {"command": "cat /fake/file.txt"}, + "output": "hello world", + "status": "completed", + } + assert interceptor.try_render("bash", state, renderer) is True + out = capsys.readouterr().out + assert "file.txt" in out + assert "hello world" in out + + def test_try_render_skips_non_read_command(self): + interceptor = RtkReadInterceptor() + renderer = CommandRenderer(_ctx("plain")) + state = { + "input": {"command": "echo hello"}, + "output": "hello", + "status": "completed", + } + assert interceptor.try_render("bash", state, renderer) is False + + def test_try_render_skips_when_disabled(self): + ctx = _ctx("plain") + ctx.settings.bash_shim_render = False + interceptor = RtkReadInterceptor() + renderer = CommandRenderer(ctx) + state = { + "input": {"command": "cat /fake/file.txt"}, + "output": "hello", + "status": "completed", + } + assert interceptor.try_render("bash", state, renderer) is False + + +class TestRtkGrepInterceptor: + def test_try_render_routes_grep_command(self, capsys): + interceptor = RtkGrepInterceptor() + renderer = CommandRenderer(_ctx("plain")) + state = { + "input": {"command": "grep foo /fake/src"}, + "output": "src/main.py:1:foo = 1", + "status": "completed", + } + assert interceptor.try_render("bash", state, renderer) is True + out = capsys.readouterr().out + assert "foo" in out + assert "main.py" in out + + def test_try_render_skips_non_grep_command(self): + interceptor = RtkGrepInterceptor() + renderer = CommandRenderer(_ctx("plain")) + state = { + "input": {"command": "echo hello"}, + "output": "hello", + "status": "completed", + } + assert interceptor.try_render("bash", state, renderer) is False + + def test_try_render_skips_when_disabled(self): + ctx = _ctx("plain") + ctx.settings.bash_shim_render = False + interceptor = RtkGrepInterceptor() + renderer = CommandRenderer(ctx) + state = { + "input": {"command": "grep foo /fake/src"}, + "output": "hello", + "status": "completed", + } + assert interceptor.try_render("bash", state, renderer) is False + + +class TestShellListingInterceptor: + def test_try_render_routes_ls_command(self, capsys): + interceptor = ShellListingInterceptor() + renderer = CommandRenderer(_ctx("plain")) + state = { + "input": {"command": "ls /fake/src"}, + "output": "main.py\nutils.py", + "status": "completed", + } + assert interceptor.try_render("bash", state, renderer) is True + out = capsys.readouterr().out + assert "main.py" in out + assert "utils.py" in out + + def test_try_render_routes_find_command(self, capsys): + interceptor = ShellListingInterceptor() + renderer = CommandRenderer(_ctx("plain")) + state = { + "input": {"command": "find /fake/src -name '*.py'"}, + "output": "src/main.py\nsrc/utils.py", + "status": "completed", + } + assert interceptor.try_render("bash", state, renderer) is True + out = capsys.readouterr().out + assert "main.py" in out + + def test_try_render_skips_non_listing_command(self): + interceptor = ShellListingInterceptor() + renderer = CommandRenderer(_ctx("plain")) + state = { + "input": {"command": "echo hello"}, + "output": "hello", + "status": "completed", + } + assert interceptor.try_render("bash", state, renderer) is False + + def test_try_render_skips_when_disabled(self): + ctx = _ctx("plain") + ctx.settings.bash_shim_render = False + interceptor = ShellListingInterceptor() + renderer = CommandRenderer(ctx) + state = { + "input": {"command": "ls /fake/src"}, + "output": "hello", + "status": "completed", + } + assert interceptor.try_render("bash", state, renderer) is False From 6c2431760448537a95fad8dcedcf48b03111ae7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 24 May 2026 23:26:18 +0200 Subject: [PATCH 16/65] fix: address Phase A3 PR review comments (6 issues) 1. Sticky sink: key RenderContext cache by sink mode (rich/plain) 2. CLI tunables: eagerly build RenderContext and patch in --read-display-lines / --write-content-lines overrides 3. Dual cache: invalidate new SnapshotCache alongside legacy cache 4. ReadRenderer: use settings.read_highlight_limit instead of hardcoded 200*1024 5. WriteRenderer: gate cache.set on status==completed + !is_error 6. EditRenderer: gate cache.reread on status==completed + !is_error 7. Interceptors: use command parameter directly instead of re-extracting from state['input']['command'] 390 passed, 0 failed, 0 errors --- tests/test_rendering_tools.py | 29 ++++++------- tools/rendering/tools/edit.py | 15 +++---- .../rendering/tools/interceptors/rtk_grep.py | 3 +- .../rendering/tools/interceptors/rtk_read.py | 3 +- .../tools/interceptors/sandbox_bootstrap.py | 7 ++-- .../tools/interceptors/shell_listing.py | 3 +- tools/rendering/tools/read.py | 7 ++-- tools/rendering/tools/write.py | 14 ++++--- tools/run-agent.py | 42 ++++++++++++++----- 9 files changed, 73 insertions(+), 50 deletions(-) diff --git a/tests/test_rendering_tools.py b/tests/test_rendering_tools.py index c610a00..963373d 100644 --- a/tests/test_rendering_tools.py +++ b/tests/test_rendering_tools.py @@ -728,7 +728,7 @@ def test_try_render_detects_list_command(self, capsys): "output": '[{"id": "py", "display_name": "Python"}]', "status": "completed", } - assert interceptor.try_render("bash", state, renderer) is True + assert interceptor.try_render(state["input"]["command"], state, renderer) is True out = capsys.readouterr().out assert "Sandbox" in out assert "Python" in out @@ -741,7 +741,7 @@ def test_try_render_skips_non_sandbox_command(self): "output": "hello", "status": "completed", } - assert interceptor.try_render("bash", state, renderer) is False + assert interceptor.try_render(state["input"]["command"], state, renderer) is False def test_try_render_skips_when_disabled(self): ctx = _ctx("plain") @@ -753,19 +753,20 @@ def test_try_render_skips_when_disabled(self): "output": "[]", "status": "completed", } - assert interceptor.try_render("bash", state, renderer) is False + assert interceptor.try_render(state["input"]["command"], state, renderer) is False class TestRtkReadInterceptor: def test_try_render_routes_cat_command(self, capsys): interceptor = RtkReadInterceptor() renderer = CommandRenderer(_ctx("plain")) + command = "cat /fake/file.txt" state = { - "input": {"command": "cat /fake/file.txt"}, + "input": {"command": command}, "output": "hello world", "status": "completed", } - assert interceptor.try_render("bash", state, renderer) is True + assert interceptor.try_render(command, state, renderer) is True out = capsys.readouterr().out assert "file.txt" in out assert "hello world" in out @@ -778,7 +779,7 @@ def test_try_render_skips_non_read_command(self): "output": "hello", "status": "completed", } - assert interceptor.try_render("bash", state, renderer) is False + assert interceptor.try_render(state["input"]["command"], state, renderer) is False def test_try_render_skips_when_disabled(self): ctx = _ctx("plain") @@ -790,7 +791,7 @@ def test_try_render_skips_when_disabled(self): "output": "hello", "status": "completed", } - assert interceptor.try_render("bash", state, renderer) is False + assert interceptor.try_render(state["input"]["command"], state, renderer) is False class TestRtkGrepInterceptor: @@ -802,7 +803,7 @@ def test_try_render_routes_grep_command(self, capsys): "output": "src/main.py:1:foo = 1", "status": "completed", } - assert interceptor.try_render("bash", state, renderer) is True + assert interceptor.try_render(state["input"]["command"], state, renderer) is True out = capsys.readouterr().out assert "foo" in out assert "main.py" in out @@ -815,7 +816,7 @@ def test_try_render_skips_non_grep_command(self): "output": "hello", "status": "completed", } - assert interceptor.try_render("bash", state, renderer) is False + assert interceptor.try_render(state["input"]["command"], state, renderer) is False def test_try_render_skips_when_disabled(self): ctx = _ctx("plain") @@ -827,7 +828,7 @@ def test_try_render_skips_when_disabled(self): "output": "hello", "status": "completed", } - assert interceptor.try_render("bash", state, renderer) is False + assert interceptor.try_render(state["input"]["command"], state, renderer) is False class TestShellListingInterceptor: @@ -839,7 +840,7 @@ def test_try_render_routes_ls_command(self, capsys): "output": "main.py\nutils.py", "status": "completed", } - assert interceptor.try_render("bash", state, renderer) is True + assert interceptor.try_render(state["input"]["command"], state, renderer) is True out = capsys.readouterr().out assert "main.py" in out assert "utils.py" in out @@ -852,7 +853,7 @@ def test_try_render_routes_find_command(self, capsys): "output": "src/main.py\nsrc/utils.py", "status": "completed", } - assert interceptor.try_render("bash", state, renderer) is True + assert interceptor.try_render(state["input"]["command"], state, renderer) is True out = capsys.readouterr().out assert "main.py" in out @@ -864,7 +865,7 @@ def test_try_render_skips_non_listing_command(self): "output": "hello", "status": "completed", } - assert interceptor.try_render("bash", state, renderer) is False + assert interceptor.try_render(state["input"]["command"], state, renderer) is False def test_try_render_skips_when_disabled(self): ctx = _ctx("plain") @@ -876,4 +877,4 @@ def test_try_render_skips_when_disabled(self): "output": "hello", "status": "completed", } - assert interceptor.try_render("bash", state, renderer) is False + assert interceptor.try_render(state["input"]["command"], state, renderer) is False diff --git a/tools/rendering/tools/edit.py b/tools/rendering/tools/edit.py index 5b5d766..3ef3ba2 100644 --- a/tools/rendering/tools/edit.py +++ b/tools/rendering/tools/edit.py @@ -31,16 +31,16 @@ def render(self, tool_name: str, state: dict[str, Any]) -> bool: return False if self.rich: - return self._render_rich(file_path, str(old_string), str(new_string), replace_all, output) + return self._render_rich(file_path, str(old_string), str(new_string), replace_all, output, state) else: - return self._render_plain(file_path, str(old_string), str(new_string), replace_all, output) + return self._render_plain(file_path, str(old_string), str(new_string), replace_all, output, state) # ------------------------------------------------------------------ # Rich # ------------------------------------------------------------------ def _render_rich(self, file_path: str, old_string: str, new_string: str, - replace_all: bool, output) -> bool: + replace_all: bool, output, state: dict[str, Any]) -> bool: from rich.console import Group from rich.panel import Panel from rich.syntax import Syntax @@ -83,8 +83,8 @@ def _render_rich(self, file_path: str, old_string: str, new_string: str, self.sink.write(Panel(Group(*sections), title="Edit", border_style=border, expand=True)) - # Re-read cache after edit so subsequent writes show correct diffs. - cache.reread(file_path) + if state.get("status") == "completed" and not is_error: + cache.reread(file_path) return True # ------------------------------------------------------------------ @@ -92,7 +92,7 @@ def _render_rich(self, file_path: str, old_string: str, new_string: str, # ------------------------------------------------------------------ def _render_plain(self, file_path: str, old_string: str, new_string: str, - replace_all: bool, output) -> bool: + replace_all: bool, output, state: dict[str, Any]) -> bool: import _colors as C settings = self.context.settings @@ -119,5 +119,6 @@ def _render_plain(self, file_path: str, old_string: str, new_string: str, self.sink.write_text(f" {output_str.strip()}") - cache.reread(file_path) + if state.get("status") == "completed" and not is_likely_error(output_str): + cache.reread(file_path) return True diff --git a/tools/rendering/tools/interceptors/rtk_grep.py b/tools/rendering/tools/interceptors/rtk_grep.py index c706c9b..20fe208 100644 --- a/tools/rendering/tools/interceptors/rtk_grep.py +++ b/tools/rendering/tools/interceptors/rtk_grep.py @@ -216,9 +216,8 @@ def try_render( if not isinstance(inp, dict): return False - command_str = str(inp.get("command", "")) from .rtk_read import _is_bash_shim_call # noqa: E402 - shim = _is_bash_shim_call(command_str) + shim = _is_bash_shim_call(command) if shim is None: return False diff --git a/tools/rendering/tools/interceptors/rtk_read.py b/tools/rendering/tools/interceptors/rtk_read.py index 9051a56..f85c587 100644 --- a/tools/rendering/tools/interceptors/rtk_read.py +++ b/tools/rendering/tools/interceptors/rtk_read.py @@ -325,8 +325,7 @@ def try_render( if not isinstance(inp, dict): return False - command_str = str(inp.get("command", "")) - shim = _is_bash_shim_call(command_str) + shim = _is_bash_shim_call(command) if shim is None: return False diff --git a/tools/rendering/tools/interceptors/sandbox_bootstrap.py b/tools/rendering/tools/interceptors/sandbox_bootstrap.py index 6d5a5c8..07e001a 100644 --- a/tools/rendering/tools/interceptors/sandbox_bootstrap.py +++ b/tools/rendering/tools/interceptors/sandbox_bootstrap.py @@ -821,8 +821,7 @@ def try_render( if not isinstance(inp, dict): return False - command_str = str(inp.get("command", "")) - subcommand = _is_sandbox_bootstrap_json_call(command_str) + subcommand = _is_sandbox_bootstrap_json_call(command) if subcommand is None: return False @@ -856,8 +855,8 @@ def try_render( if renderer.rich: return _render_sandbox_rich( - renderer, subcommand, payload, command_str, description, status + renderer, subcommand, payload, command, description, status ) return _render_sandbox_plain( - renderer, subcommand, payload, command_str, description, status + renderer, subcommand, payload, command, description, status ) diff --git a/tools/rendering/tools/interceptors/shell_listing.py b/tools/rendering/tools/interceptors/shell_listing.py index 9dd8f0c..51bde1c 100644 --- a/tools/rendering/tools/interceptors/shell_listing.py +++ b/tools/rendering/tools/interceptors/shell_listing.py @@ -190,9 +190,8 @@ def try_render( if not isinstance(inp, dict): return False - command_str = str(inp.get("command", "")) from .rtk_read import _is_bash_shim_call # noqa: E402 - shim = _is_bash_shim_call(command_str) + shim = _is_bash_shim_call(command) if shim is None: return False diff --git a/tools/rendering/tools/read.py b/tools/rendering/tools/read.py index c91400f..ee4da8c 100644 --- a/tools/rendering/tools/read.py +++ b/tools/rendering/tools/read.py @@ -88,7 +88,7 @@ def _render_rich(self, rel_path: str, file_path: str, output: str, sections.append(Text("(empty file)", style="dim")) else: lexer = detect_lexer(file_path) - self._render_truncated_body(sections, raw_body, settings.read_display_lines, lexer, footer) + self._render_truncated_body(sections, raw_body, settings.read_display_lines, lexer, footer, settings.read_highlight_limit) elif kind == "directory": entries = payload if isinstance(payload, list) else [] @@ -104,7 +104,8 @@ def _render_rich(self, rel_path: str, file_path: str, output: str, return True @staticmethod - def _render_truncated_body(sections: list[Any], body: str, cap: int, lexer: str, footer: str | None) -> None: + def _render_truncated_body(sections: list[Any], body: str, cap: int, lexer: str, footer: str | None, + highlight_limit: int = 200 * 1024) -> None: from rich.syntax import Syntax from rich.text import Text @@ -114,7 +115,7 @@ def _render_truncated_body(sections: list[Any], body: str, cap: int, lexer: str, leftover = max(0, total - cap) visible = "\n".join(visible_lines) - if len(visible.encode("utf-8", errors="replace")) > 200 * 1024: + if len(visible.encode("utf-8", errors="replace")) > highlight_limit: sections.append(Text(visible)) else: sections.append(Syntax(visible, lexer, theme="monokai", line_numbers=True, word_wrap=True)) diff --git a/tools/rendering/tools/write.py b/tools/rendering/tools/write.py index 74493c8..3c21767 100644 --- a/tools/rendering/tools/write.py +++ b/tools/rendering/tools/write.py @@ -33,15 +33,15 @@ def render(self, tool_name: str, state: dict[str, Any]) -> bool: return False if self.rich: - return self._render_rich(file_path, new_content, output_str, output) + return self._render_rich(file_path, new_content, output_str, output, state) else: - return self._render_plain(file_path, new_content, output_str, output) + return self._render_plain(file_path, new_content, output_str, output, state) # ------------------------------------------------------------------ # Rich # ------------------------------------------------------------------ - def _render_rich(self, file_path: str, new_content: str, output_str: str, output) -> bool: + def _render_rich(self, file_path: str, new_content: str, output_str: str, output, state) -> bool: from rich.console import Group from rich.panel import Panel from rich.syntax import Syntax @@ -91,7 +91,8 @@ def _render_rich(self, file_path: str, new_content: str, output_str: str, output sections.append(Text()) sections.append(Text(status_text, style="green")) self.sink.write(Panel(Group(*sections), title="Write", border_style=border, expand=True)) - cache.set(file_path, new_content) + if state.get("status") == "completed" and not is_error: + cache.set(file_path, new_content) return True def _render_body_rich(self, sections: list[Any], body: str, cap: int, lexer: str) -> None: @@ -114,7 +115,7 @@ def _render_body_rich(self, sections: list[Any], body: str, cap: int, lexer: str # Plain # ------------------------------------------------------------------ - def _render_plain(self, file_path: str, new_content: str, output_str: str, output) -> bool: + def _render_plain(self, file_path: str, new_content: str, output_str: str, output, state) -> bool: import _colors as C settings = self.context.settings @@ -149,7 +150,8 @@ def _render_plain(self, file_path: str, new_content: str, output_str: str, outpu self._render_body_plain(new_content, settings.write_content_lines) self.sink.write_text(f" {output_str.strip()}") - cache.set(file_path, new_content) + if state.get("status") == "completed" and not is_error: + cache.set(file_path, new_content) return True def _render_body_plain(self, body: str, cap: int) -> None: diff --git a/tools/run-agent.py b/tools/run-agent.py index 1981e6a..2ab06b9 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -50,32 +50,36 @@ ) from codecome.transcript import open_phase_transcript, open_chat_transcript, close_transcript -# Lazy rendering context — built once and reused by the new renderer -# classes. Old-style render_* functions still receive console directly -# and are unaffected. -_RENDERING_CTX: Any = None +# Lazy rendering contexts — built once per sink mode and reused by the +# new renderer classes. Old-style render_* functions still receive +# console directly and are unaffected. Keyed by mode so a rich-console +# call and a plain-text call in the same process don't share a sink. +_RENDERING_CTX_CACHE: dict[str, Any] = {} def _get_rendering_ctx(console: Any) -> Any: - global _RENDERING_CTX - if _RENDERING_CTX is not None: - return _RENDERING_CTX + mode = "rich" if (HAVE_RICH and console is not None) else "plain" + if mode in _RENDERING_CTX_CACHE: + ctx = _RENDERING_CTX_CACHE[mode] + ctx.cache.invalidate_stale() + return ctx from rendering.cache import SnapshotCache from rendering.context import RenderContext from rendering.settings import RenderSettings from rendering.sink import PlainSink, RichConsoleSink - if HAVE_RICH and console is not None: + if mode == "rich": sink = RichConsoleSink(console) else: sink = PlainSink() - _RENDERING_CTX = RenderContext( + ctx = RenderContext( root=ROOT, sink=sink, settings=RenderSettings.from_env(), cache=SnapshotCache(), ) - return _RENDERING_CTX + _RENDERING_CTX_CACHE[mode] = ctx + return ctx try: from rich.console import Console, Group @@ -4645,6 +4649,24 @@ def main() -> int: color_mode = resolve_color_mode(args.color) console = build_console(color_mode) + + # Eagerly build the rendering context so CLI tunable overrides + # (--read-display-lines, --write-content-lines, etc.) are baked + # into RenderSettings before any renderer uses them. + _rendering_ctx = _get_rendering_ctx(console) + import dataclasses as _dc + _overrides: dict[str, Any] = {} + if args.read_display_lines is not None: + _overrides["read_display_lines"] = args.read_display_lines + if args.write_content_lines is not None: + _overrides["write_content_lines"] = args.write_content_lines + if args.write_diff_limit is not None: + _overrides["write_diff_limit"] = args.write_diff_limit + if args.edit_diff_lines is not None: + _overrides["edit_diff_lines"] = args.edit_diff_lines + if _overrides: + _rendering_ctx.settings = _dc.replace(_rendering_ctx.settings, **_overrides) + prompt_file = ROOT / args.prompt_file prompt = load_prompt(prompt_file, args.finding, phase=args.phase) # Model resolution is still needed for banner display. From 2d7d5e89f69673d820675a2f91bc073c551c129d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 24 May 2026 23:42:15 +0200 Subject: [PATCH 17/65] refactor(phase-a3-batch7): migrate generic event renderers Extract all SSE event renderers from run-agent.py into rendering/events.py: - StepStartRenderer, TextEventRenderer, ReasoningEventRenderer - ToolUseEventRenderer (delegates to FallbackToolRenderer) - StepFinishRenderer, ErrorEventRenderer, SessionStatusRenderer - ServerConnectedRenderer, ServerHeartbeatRenderer - SessionDiffRenderer, MessageUpdatedRenderer - SubagentStatusRenderer (with dedup state) - UnknownEventRenderer (fallback) render_event() dispatcher now routes through the new classes via the rendering context. Old render_* functions kept for backward compat. 390 passed, 0 failed, 0 errors --- tests/test_run_agent.py | 13 +- tools/rendering/events.py | 420 +++++++++++++++++++++++++++++++++++++- tools/run-agent.py | 41 ++-- 3 files changed, 447 insertions(+), 27 deletions(-) diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 5964f58..977afde 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -393,14 +393,19 @@ def test_render_subagent_status_rich_created_renders_panel(monkeypatch): @pytest.mark.unit def test_render_event_dispatches_subagent_status(monkeypatch): + """render_event dispatches subagent.status through SubagentStatusRenderer.""" + import rendering.events as _evts module = load_tool_module("run_agent_dispatch_subagent", "tools/run-agent.py") calls = [] - def _fake_subagent_status(_console, _event): - calls.append("subagent.status") - - monkeypatch.setattr(module, "render_subagent_status", _fake_subagent_status) + class _FakeRenderer: + def __init__(self, ctx): + pass + def render(self, event): + calls.append("subagent.status") + return True + monkeypatch.setattr(_evts, "SubagentStatusRenderer", _FakeRenderer) module.render_event(None, "2", "x", {"type": "subagent.status", "properties": {}}) assert calls == ["subagent.status"] diff --git a/tools/rendering/events.py b/tools/rendering/events.py index 9c038cc..8510b51 100644 --- a/tools/rendering/events.py +++ b/tools/rendering/events.py @@ -2,33 +2,434 @@ # SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later """ -Base classes for generic (non-tool) event renderers. +Event renderer classes — one per SSE event family. -Event renderers receive the full normalized event dict and write -output through the render context's sink. +Each renderer handles its event type(s) and writes through the +render context's sink. """ from __future__ import annotations +import json +import time as _time from typing import Any from rendering.base import BaseRenderer -class EventRenderer(BaseRenderer): - """Base class for renderers that handle generic SSE events. +# --------------------------------------------------------------------------- +# Finish reason classification +# --------------------------------------------------------------------------- + +_FINISH_TERMINAL_OK = {"stop", "end_turn"} +_FINISH_MID_TURN = {"tool-calls", "tool_use"} +_FINISH_FAILURE = {"content-filter", "content_filter", "length", "max_tokens", "error"} + +# Per-session dedup state for subagent update events. +_SUBAGENT_LAST_STATE: dict[str, tuple[dict[str, Any], float]] = {} - Subclasses declare which event types they handle via ``event_types``. - The registry will dispatch each event to the first matching renderer. - """ +# --------------------------------------------------------------------------- +# EventRenderer base +# --------------------------------------------------------------------------- + +class EventRenderer(BaseRenderer): event_types: tuple[str, ...] = () def render(self, event: dict[str, Any]) -> bool: - """Render *event*. Return True if handled, False to fall through.""" raise NotImplementedError +# --------------------------------------------------------------------------- +# Specific renderers +# --------------------------------------------------------------------------- + +class StepStartRenderer(EventRenderer): + event_types = ("step_start",) + + def __init__(self, context, *, phase: str = "", label: str = ""): + super().__init__(context) + self.phase = phase + self.label = label + + def render(self, event: dict[str, Any]) -> bool: + step_type = event.get("part", {}).get("type", "step-start") + if self.rich: + from rich.text import Text + self.sink.write(Text(f"[{self.phase}] {self.label}: {step_type}", style="cyan")) + elif self.plain: + import _colors as C + self.sink.write_text(C.info(f"[{self.phase}] {self.label}: {step_type}")) + return True + + +class TextEventRenderer(EventRenderer): + event_types = ("text",) + + def render(self, event: dict[str, Any]) -> bool: + part = event.get("part", {}) + text = str(part.get("text", "")).strip() + if not text: + return False + if self.rich: + from rich.markdown import Markdown + from rich.panel import Panel + self.sink.write(Panel(Markdown(text), title="Assistant", border_style="blue", expand=True)) + elif self.plain: + import _colors as C + self.sink.write_text(C.header("Assistant")) + self.sink.write_text(text) + return True + + +class ReasoningEventRenderer(EventRenderer): + event_types = ("reasoning",) + + def render(self, event: dict[str, Any]) -> bool: + if not self.context.settings.render_reasoning: + return False + part = event.get("part", {}) + text = str(part.get("text", "")).strip() + if not text: + return False + + truncated_note = "" + max_chars = self.context.settings.reasoning_max_chars + if len(text) > max_chars: + cut = len(text) - max_chars + text = text[:max_chars] + truncated_note = f"\n\n... ({cut} chars truncated)" + + if self.rich: + from rich.console import Group + from rich.markdown import Markdown + from rich.panel import Panel + from rich.text import Text + body_md = Markdown(text) + if truncated_note: + body = Group(body_md, Text(truncated_note.strip(), style="dim")) + else: + body = body_md + self.sink.write(Panel(body, title="Thinking", border_style="blue", expand=True, style="dim")) + elif self.plain: + import _colors as C + self.sink.write_text(C.header("Thinking")) + self.sink.write_text(text) + if truncated_note: + self.sink.write_text(truncated_note.strip()) + return True + + +class ToolUseEventRenderer(EventRenderer): + event_types = ("tool_use",) + + def render(self, event: dict[str, Any]) -> bool: + part = event.get("part", {}) + tool = str(part.get("tool", "unknown")) + state = part.get("state", {}) if isinstance(part.get("state"), dict) else {} + from rendering.tools.base import FallbackToolRenderer + return FallbackToolRenderer(self.context).render(tool, state) + + +class StepFinishRenderer(EventRenderer): + event_types = ("step_finish",) + + def render(self, event: dict[str, Any]) -> bool: + part = event.get("part", {}) + reason = str(part.get("reason", "unknown")) + tokens = self._format_tokens(part.get("tokens", {})) + suffix = f" ({tokens})" if tokens else "" + style = "dim" + if reason in _FINISH_FAILURE: + style = "bold red" + if self.rich: + from rich.text import Text + self.sink.write(Text(f"step finished: {reason}{suffix}", style=style)) + elif self.plain: + import _colors as C + if reason in _FINISH_FAILURE: + self.sink.write_text(C.fail(f"step finished: {reason}{suffix}")) + else: + self.sink.write_text(f"step finished: {reason}{suffix}") + return True + + @staticmethod + def _format_tokens(tokens: dict[str, Any]) -> str: + if not isinstance(tokens, dict): + return "" + parts = [] + for key in ("input", "output", "reasoning", "total"): + value = tokens.get(key) + if value is not None: + parts.append(f"{key}={value}") + return ", ".join(parts) + + +class ErrorEventRenderer(EventRenderer): + event_types = ("error",) + + def render(self, event: dict[str, Any]) -> bool: + err = event.get("error") + msg_parts: list[str] = [] + if isinstance(err, dict): + name = err.get("name") + if isinstance(name, str) and name: + msg_parts.append(name) + data = err.get("data") + if isinstance(data, dict): + data_msg = data.get("message") + if isinstance(data_msg, str) and data_msg: + msg_parts.append(data_msg) + elif isinstance(err.get("message"), str): + msg_parts.append(err["message"]) + elif isinstance(err, str): + msg_parts.append(err) + text = ": ".join(msg_parts) if msg_parts else "(no error message)" + if self.rich: + from rich.panel import Panel + from rich.text import Text + self.sink.write(Panel(Text(text, style="red"), title="Error", border_style="yellow", expand=True)) + elif self.plain: + import _colors as C + self.sink.write_text(C.warn("Error")) + self.sink.write_text(C.fail(text)) + return True + + +class SessionStatusRenderer(EventRenderer): + event_types = ("session.status",) + + def render(self, event: dict[str, Any]) -> bool: + properties = event.get("properties", {}) + status = properties.get("status", {}) + status_type = status.get("type") + if status_type == "retry": + attempt = status.get("attempt", 1) + message = status.get("message", "Unknown error") + text = f"\u23f3 Waiting for LLM provider response (retry attempt {attempt}): {message}" + if self.rich: + from rich.text import Text + self.sink.write(Text(text, style="bold yellow")) + elif self.plain: + import _colors as C + self.sink.write_text(C.warn(text)) + elif status_type == "busy": + text = "session status: busy" + if self.rich: + from rich.text import Text + self.sink.write(Text(text, style="dim")) + elif self.plain: + import _colors as C + self.sink.write_text(C.info(text)) + elif status_type == "idle": + text = "session status: idle" + if self.rich: + from rich.text import Text + self.sink.write(Text(text, style="dim")) + elif self.plain: + import _colors as C + self.sink.write_text(C.info(text)) + return True + + +class ServerConnectedRenderer(EventRenderer): + event_types = ("server.connected",) + + def render(self, event: dict[str, Any]) -> bool: + message = "connected to opencode event stream" + if self.rich: + from rich.text import Text + self.sink.write(Text(message, style="dim")) + elif self.plain: + import _colors as C + self.sink.write_text(C.info(message)) + return True + + +class ServerHeartbeatRenderer(EventRenderer): + event_types = ("server.heartbeat",) + + def render(self, event: dict[str, Any]) -> bool: + message = "server heartbeat" + if self.rich: + from rich.text import Text + self.sink.write(Text(message, style="dim")) + elif self.plain: + import _colors as C + self.sink.write_text(C.info(message)) + return True + + +class SessionDiffRenderer(EventRenderer): + event_types = ("session.diff",) + + def render(self, event: dict[str, Any]) -> bool: + properties = event.get("properties", {}) + diff = properties.get("diff", []) + if not isinstance(diff, list) or not diff: + return False + count = len(diff) + message = f"session diff updated: {count} file{'s' if count != 1 else ''}" + if self.rich: + from rich.text import Text + self.sink.write(Text(message, style="dim")) + elif self.plain: + import _colors as C + self.sink.write_text(C.info(message)) + return True + + +class MessageUpdatedRenderer(EventRenderer): + event_types = ("message.updated",) + + def render(self, event: dict[str, Any]) -> bool: + info = event.get("info") + if not isinstance(info, dict): + props = event.get("properties", {}) + info = props.get("info", {}) if isinstance(props, dict) else {} + if not isinstance(info, dict): + info = {} + + role = str(info.get("role", "")) + tokens = info.get("tokens", {}) if isinstance(info.get("tokens"), dict) else {} + has_tokens = isinstance(tokens, dict) and ( + tokens.get("input", 0) or tokens.get("output", 0) or tokens.get("reasoning", 0) + ) + has_summary = "summary" in info or "finish" in info + if not has_summary and not has_tokens: + return False + + mcache = tokens.get("cache", {}) if isinstance(tokens, dict) else {} + cost = info.get("cost", 0) or 0 + + model_id = str(info.get("modelID", "")).strip() + provider_id = str(info.get("providerID", "")).strip() + if not model_id: + mdl = info.get("model", {}) + if isinstance(mdl, dict): + model_id = str(mdl.get("modelID", "")).strip() + provider_id = str(mdl.get("providerID", "")).strip() + model_label = f"{provider_id}/{model_id}" if provider_id and model_id else model_id + + if role == "user": + message = "> User" + style = "dim" + elif role == "assistant": + if has_tokens: + _in = tokens.get("input", 0) + _out = tokens.get("output", 0) + _reasoning = tokens.get("reasoning", 0) + _cache_read = mcache.get("read", 0) if isinstance(mcache, dict) else 0 + token_parts = [f"\u2191{_in} \u2193{_out}"] + if _reasoning: + token_parts.append(f"R{_reasoning}") + if _cache_read: + token_parts.append(f"cache read {_cache_read}") + token_str = ", ".join(token_parts) + cost_str = f", ${cost:.4f}" if cost else "" + message = f"> Assistant \u00b7 {model_label} ({token_str}{cost_str})" + style = "bold blue" + else: + message = f"> Assistant \u00b7 {model_label}" if model_label else "> Assistant" + style = "bold blue" + else: + agent = str(info.get("agent", "assistant")) + message = f"> {agent} \u00b7 {model_label}" if model_label else f"> {agent}" + style = "bold blue" + + if self.rich: + from rich.text import Text + self.sink.write(Text(message, style=style)) + elif self.plain: + import _colors as C + self.sink.write_text(C.header(message)) + return True + + +class SubagentStatusRenderer(EventRenderer): + event_types = ("subagent.status",) + + def render(self, event: dict[str, Any]) -> bool: + if not self.context.settings.render_subagent_updates: + return False + + properties = event.get("properties", {}) + status_type = str(properties.get("statusType", "")) + session_id = str(properties.get("sessionID", "")) + title = str(properties.get("title", "(untitled)")) + summary = properties.get("summary") + elapsed_ms = properties.get("elapsedMs") + + if status_type == "updated": + snapshot: dict[str, Any] = {"title": title} + if isinstance(summary, dict): + snapshot["additions"] = summary.get("additions") + snapshot["deletions"] = summary.get("deletions") + snapshot["files"] = summary.get("files") + + last_snapshot, last_time = _SUBAGENT_LAST_STATE.get(session_id, ({}, 0.0)) + now = _time.time() + if ( + last_snapshot == snapshot + and (now - last_time) < self.context.settings.subagent_update_throttle_s + ): + return False + _SUBAGENT_LAST_STATE[session_id] = (snapshot, now) + + if self.rich: + self._render_rich(status_type, title, summary, elapsed_ms) + elif self.plain: + self._render_plain(status_type, title, summary, elapsed_ms) + return True + + def _render_rich(self, status_type: str, title: str, summary, elapsed_ms) -> None: + from rich.panel import Panel + from rich.text import Text + if status_type == "created": + self.sink.write(Panel(Text(title, style="bold cyan"), title="Subagent started", border_style="cyan", expand=True)) + elif status_type == "finished": + self.sink.write(Panel(Text(title, style="bold cyan"), title="Subagent finished", border_style="green", expand=True)) + elif status_type == "heartbeat" and elapsed_ms is not None: + elapsed_s = elapsed_ms // 1000 + self.sink.write(Text(f"\u23f3 Subagent \u00b7 {title} still running ({elapsed_s}s)", style="bold yellow")) + elif status_type == "updated": + summary_text = self._format_subagent_summary(summary) + line = f"Subagent \u00b7 {title}" + if summary_text: + line += f" {summary_text}" + self.sink.write(Text(line, style="dim")) + + def _render_plain(self, status_type: str, title: str, summary, elapsed_ms) -> None: + import _colors as C + if status_type == "created": + self.sink.write_text(C.header(f"[subagent] started: {title}")) + elif status_type == "finished": + self.sink.write_text(C.ok(f"[subagent] finished: {title}")) + elif status_type == "heartbeat" and elapsed_ms is not None: + elapsed_s = elapsed_ms // 1000 + self.sink.write_text(C.warn(f"\u23f3 Subagent \u00b7 {title} still running ({elapsed_s}s")) + elif status_type == "updated": + summary_text = self._format_subagent_summary(summary) + line = f"Subagent \u00b7 {title}" + if summary_text: + line += f" {summary_text}" + self.sink.write_text(f" {line}") + + @staticmethod + def _format_subagent_summary(summary: Any) -> str: + if not isinstance(summary, dict): + return "" + additions = summary.get("additions") + deletions = summary.get("deletions") + files = summary.get("files") + parts: list[str] = [] + if additions is not None or deletions is not None: + parts.append(f"+{additions or 0} -{deletions or 0}") + if files is not None: + parts.append(f"{files} file(s)") + return " ".join(parts) + + class UnknownEventRenderer(EventRenderer): """Fallback renderer for unrecognised event types.""" @@ -41,6 +442,5 @@ def render(self, event: dict[str, Any]) -> bool: message = f"unknown event type: {event_type}" self.sink.write_text(message) if self.context.settings.debug_unknown_events: - import json self.sink.write_text(json.dumps(event, indent=2, default=str)) return True diff --git a/tools/run-agent.py b/tools/run-agent.py index 2ab06b9..86adbc0 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -3681,32 +3681,47 @@ def render_error(console: Console, event: dict[str, Any]) -> None: def render_event(console: Console, phase: str, label: str, event: dict[str, Any]) -> None: event_type = event.get("type") + ctx = _get_rendering_ctx(console) + if event_type == "server.connected": - render_server_connected(console, event) + from rendering.events import ServerConnectedRenderer + ServerConnectedRenderer(ctx).render(event) elif event_type == "server.heartbeat": - render_server_heartbeat(console, event) + from rendering.events import ServerHeartbeatRenderer + ServerHeartbeatRenderer(ctx).render(event) elif event_type == "message.updated": - render_message_updated(console, event) + from rendering.events import MessageUpdatedRenderer + MessageUpdatedRenderer(ctx).render(event) elif event_type == "step_start": - render_step_start(console, phase, label, event) + from rendering.events import StepStartRenderer + StepStartRenderer(ctx, phase=phase, label=label).render(event) elif event_type == "text": - render_text(console, event) + from rendering.events import TextEventRenderer + TextEventRenderer(ctx).render(event) elif event_type == "reasoning": - render_reasoning(console, event) + from rendering.events import ReasoningEventRenderer + ReasoningEventRenderer(ctx).render(event) elif event_type == "tool_use": - render_tool_use(console, event) + from rendering.events import ToolUseEventRenderer + ToolUseEventRenderer(ctx).render(event) elif event_type == "step_finish": - render_step_finish(console, event) + from rendering.events import StepFinishRenderer + StepFinishRenderer(ctx).render(event) elif event_type == "error": - render_error(console, event) + from rendering.events import ErrorEventRenderer + ErrorEventRenderer(ctx).render(event) elif event_type == "session.status": - render_session_status(console, event) + from rendering.events import SessionStatusRenderer + SessionStatusRenderer(ctx).render(event) elif event_type == "session.diff": - render_session_diff(console, event) + from rendering.events import SessionDiffRenderer + SessionDiffRenderer(ctx).render(event) elif event_type == "subagent.status": - render_subagent_status(console, event) + from rendering.events import SubagentStatusRenderer + SubagentStatusRenderer(ctx).render(event) else: - render_unknown(console, event) + from rendering.events import UnknownEventRenderer + UnknownEventRenderer(ctx).render(event) def render_session_status(console: Console, event: dict[str, Any]) -> None: From 1c8f1d85ca64419e96ecedbdaaef42c71d09ad35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 00:27:01 +0200 Subject: [PATCH 18/65] fix(phase-a3-batch7): address review issues - tests, caching, state isolation - Add comprehensive tests for all 13 generic event renderers (tests/test_rendering_events.py: 58 tests) - Cache renderer instances in _get_rendering_ctx() to avoid per-event allocations in render_event() - Cache FallbackToolRenderer in ToolUseEventRenderer.__init__ - Add _reset_subagent_state() helper for test isolation 448 passed, 0 failed --- tests/test_rendering_events.py | 448 +++++++++++++++++++++++++++++++++ tools/rendering/events.py | 13 +- tools/run-agent.py | 58 ++--- 3 files changed, 481 insertions(+), 38 deletions(-) create mode 100644 tests/test_rendering_events.py diff --git a/tests/test_rendering_events.py b/tests/test_rendering_events.py new file mode 100644 index 0000000..a96259c --- /dev/null +++ b/tests/test_rendering_events.py @@ -0,0 +1,448 @@ +from __future__ import annotations + +import sys +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from rendering.context import RenderContext +from rendering.sink import PlainSink, RichConsoleSink +from rendering.settings import RenderSettings +from rendering.cache import SnapshotCache +from rendering.events import ( + StepStartRenderer, + TextEventRenderer, + ReasoningEventRenderer, + ToolUseEventRenderer, + StepFinishRenderer, + ErrorEventRenderer, + SessionStatusRenderer, + ServerConnectedRenderer, + ServerHeartbeatRenderer, + SessionDiffRenderer, + MessageUpdatedRenderer, + SubagentStatusRenderer, + UnknownEventRenderer, + _reset_subagent_state, +) + + +def _ctx(sink_mode="plain", **settings_overrides): + if sink_mode == "rich": + from rich.console import Console + sink = RichConsoleSink(Console(record=True)) + else: + sink = PlainSink() + settings = RenderSettings(**settings_overrides) + return RenderContext( + root=Path("/fake"), + sink=sink, + settings=settings, + cache=SnapshotCache(), + ) + + +# --------------------------------------------------------------------------- +# StepStartRenderer +# --------------------------------------------------------------------------- + +class TestStepStartRenderer: + def test_renders_step_start_plain(self, capsys): + r = StepStartRenderer(_ctx("plain"), phase="1", label="recon") + assert r.render({"part": {"type": "tool_use"}}) is True + out = capsys.readouterr().out + assert "[1] recon: tool_use" in out + + def test_renders_step_start_rich(self): + r = StepStartRenderer(_ctx("rich"), phase="2", label="audit") + assert r.render({"part": {"type": "text"}}) is True + + def test_defaults_to_step_start_type(self, capsys): + r = StepStartRenderer(_ctx("plain")) + assert r.render({"part": {}}) is True + out = capsys.readouterr().out + assert "step-start" in out + + +# --------------------------------------------------------------------------- +# TextEventRenderer +# --------------------------------------------------------------------------- + +class TestTextEventRenderer: + def test_renders_text_plain(self, capsys): + r = TextEventRenderer(_ctx("plain")) + assert r.render({"part": {"text": "Hello world"}}) is True + out = capsys.readouterr().out + assert "Assistant" in out + assert "Hello world" in out + + def test_renders_text_rich(self): + r = TextEventRenderer(_ctx("rich")) + assert r.render({"part": {"text": "Hello world"}}) is True + + def test_skips_empty_text(self): + r = TextEventRenderer(_ctx("plain")) + assert r.render({"part": {"text": ""}}) is False + assert r.render({"part": {"text": " \n\t "}}) is False + + def test_skips_missing_text(self): + r = TextEventRenderer(_ctx("plain")) + assert r.render({"part": {}}) is False + + +# --------------------------------------------------------------------------- +# ReasoningEventRenderer +# --------------------------------------------------------------------------- + +class TestReasoningEventRenderer: + def test_renders_reasoning_plain(self, capsys): + r = ReasoningEventRenderer(_ctx("plain", render_reasoning=True)) + assert r.render({"part": {"text": "I think therefore I am"}}) is True + out = capsys.readouterr().out + assert "Thinking" in out + assert "I think therefore I am" in out + + def test_renders_reasoning_rich(self): + r = ReasoningEventRenderer(_ctx("rich", render_reasoning=True)) + assert r.render({"part": {"text": "Deep thought"}}) is True + + def test_disabled_by_settings(self): + r = ReasoningEventRenderer(_ctx("plain", render_reasoning=False)) + assert r.render({"part": {"text": "Hidden"}}) is False + + def test_skips_empty_text(self): + r = ReasoningEventRenderer(_ctx("plain", render_reasoning=True)) + assert r.render({"part": {"text": ""}}) is False + + def test_truncates_long_text(self, capsys): + r = ReasoningEventRenderer(_ctx("plain", render_reasoning=True, reasoning_max_chars=10)) + assert r.render({"part": {"text": "123456789012345"}}) is True + out = capsys.readouterr().out + assert "1234567890" in out + assert "truncated" in out + + +# --------------------------------------------------------------------------- +# ToolUseEventRenderer +# --------------------------------------------------------------------------- + +class TestToolUseEventRenderer: + def test_delegates_to_fallback_tool_renderer(self, capsys): + r = ToolUseEventRenderer(_ctx("plain")) + assert r.render({"part": {"tool": "unknown_tool", "state": {"status": "completed"}}}) is True + out = capsys.readouterr().out + assert "unknown_tool" in out + + def test_handles_missing_state(self): + r = ToolUseEventRenderer(_ctx("plain")) + # FallbackToolRenderer should handle empty state + assert r.render({"part": {"tool": "bash"}}) is True + + +# --------------------------------------------------------------------------- +# StepFinishRenderer +# --------------------------------------------------------------------------- + +class TestStepFinishRenderer: + def test_renders_finish_plain(self, capsys): + r = StepFinishRenderer(_ctx("plain")) + assert r.render({"part": {"reason": "stop"}}) is True + out = capsys.readouterr().out + assert "step finished: stop" in out + + def test_renders_finish_with_tokens(self, capsys): + r = StepFinishRenderer(_ctx("plain")) + assert r.render({"part": {"reason": "end_turn", "tokens": {"input": 100, "output": 50}}}) is True + out = capsys.readouterr().out + assert "input=100" in out + assert "output=50" in out + + def test_failure_reason_styled(self, capsys): + r = StepFinishRenderer(_ctx("plain")) + assert r.render({"part": {"reason": "error"}}) is True + out = capsys.readouterr().out + assert "error" in out + + def test_renders_finish_rich(self): + r = StepFinishRenderer(_ctx("rich")) + assert r.render({"part": {"reason": "stop"}}) is True + + +# --------------------------------------------------------------------------- +# ErrorEventRenderer +# --------------------------------------------------------------------------- + +class TestErrorEventRenderer: + def test_renders_dict_error_with_name_and_message(self, capsys): + r = ErrorEventRenderer(_ctx("plain")) + assert r.render({"error": {"name": "RateLimit", "data": {"message": "too many requests"}}}) is True + out = capsys.readouterr().out + assert "RateLimit" in out + assert "too many requests" in out + + def test_renders_dict_error_with_top_level_message(self, capsys): + r = ErrorEventRenderer(_ctx("plain")) + assert r.render({"error": {"message": "something broke"}}) is True + out = capsys.readouterr().out + assert "something broke" in out + + def test_renders_string_error(self, capsys): + r = ErrorEventRenderer(_ctx("plain")) + assert r.render({"error": "plain string error"}) is True + out = capsys.readouterr().out + assert "plain string error" in out + + def test_renders_missing_error(self, capsys): + r = ErrorEventRenderer(_ctx("plain")) + assert r.render({}) is True + out = capsys.readouterr().out + assert "(no error message)" in out + + def test_renders_error_rich(self): + r = ErrorEventRenderer(_ctx("rich")) + assert r.render({"error": "test"}) is True + + +# --------------------------------------------------------------------------- +# SessionStatusRenderer +# --------------------------------------------------------------------------- + +class TestSessionStatusRenderer: + def test_renders_retry_status(self, capsys): + r = SessionStatusRenderer(_ctx("plain")) + assert r.render({"properties": {"status": {"type": "retry", "attempt": 3, "message": "Timeout"}}}) is True + out = capsys.readouterr().out + assert "retry attempt 3" in out + assert "Timeout" in out + + def test_renders_busy_status(self, capsys): + r = SessionStatusRenderer(_ctx("plain")) + assert r.render({"properties": {"status": {"type": "busy"}}}) is True + out = capsys.readouterr().out + assert "busy" in out + + def test_renders_idle_status(self, capsys): + r = SessionStatusRenderer(_ctx("plain")) + assert r.render({"properties": {"status": {"type": "idle"}}}) is True + out = capsys.readouterr().out + assert "idle" in out + + def test_renders_status_rich(self): + r = SessionStatusRenderer(_ctx("rich")) + assert r.render({"properties": {"status": {"type": "busy"}}}) is True + + +# --------------------------------------------------------------------------- +# ServerConnectedRenderer +# --------------------------------------------------------------------------- + +class TestServerConnectedRenderer: + def test_renders_connected_plain(self, capsys): + r = ServerConnectedRenderer(_ctx("plain")) + assert r.render({}) is True + out = capsys.readouterr().out + assert "connected" in out + + def test_renders_connected_rich(self): + r = ServerConnectedRenderer(_ctx("rich")) + assert r.render({}) is True + + +# --------------------------------------------------------------------------- +# ServerHeartbeatRenderer +# --------------------------------------------------------------------------- + +class TestServerHeartbeatRenderer: + def test_renders_heartbeat_plain(self, capsys): + r = ServerHeartbeatRenderer(_ctx("plain")) + assert r.render({}) is True + out = capsys.readouterr().out + assert "heartbeat" in out + + def test_renders_heartbeat_rich(self): + r = ServerHeartbeatRenderer(_ctx("rich")) + assert r.render({}) is True + + +# --------------------------------------------------------------------------- +# SessionDiffRenderer +# --------------------------------------------------------------------------- + +class TestSessionDiffRenderer: + def test_renders_diff_count_plain(self, capsys): + r = SessionDiffRenderer(_ctx("plain")) + assert r.render({"properties": {"diff": ["a.py", "b.py"]}}) is True + out = capsys.readouterr().out + assert "2 files" in out + + def test_renders_single_file_diff(self, capsys): + r = SessionDiffRenderer(_ctx("plain")) + assert r.render({"properties": {"diff": ["a.py"]}}) is True + out = capsys.readouterr().out + assert "1 file" in out + assert "2 files" not in out + + def test_returns_false_for_empty_diff(self): + r = SessionDiffRenderer(_ctx("plain")) + assert r.render({"properties": {"diff": []}}) is False + + def test_returns_false_for_missing_diff(self): + r = SessionDiffRenderer(_ctx("plain")) + assert r.render({"properties": {}}) is False + + def test_renders_diff_rich(self): + r = SessionDiffRenderer(_ctx("rich")) + assert r.render({"properties": {"diff": ["a.py"]}}) is True + + +# --------------------------------------------------------------------------- +# MessageUpdatedRenderer +# --------------------------------------------------------------------------- + +class TestMessageUpdatedRenderer: + def test_renders_user_message(self, capsys): + r = MessageUpdatedRenderer(_ctx("plain")) + assert r.render({"info": {"role": "user", "summary": "test"}}) is True + out = capsys.readouterr().out + assert "User" in out + + def test_renders_assistant_with_tokens(self, capsys): + r = MessageUpdatedRenderer(_ctx("plain")) + assert r.render({"info": {"role": "assistant", "tokens": {"input": 10, "output": 20}}}) is True + out = capsys.readouterr().out + assert "Assistant" in out + assert "10" in out + assert "20" in out + + def test_renders_assistant_with_model(self, capsys): + r = MessageUpdatedRenderer(_ctx("plain")) + assert r.render({"info": {"role": "assistant", "modelID": "gpt-5", "providerID": "openai", "tokens": {"input": 1}}}) is True + out = capsys.readouterr().out + assert "openai/gpt-5" in out + + def test_model_fallback_to_nested_dict(self, capsys): + r = MessageUpdatedRenderer(_ctx("plain")) + assert r.render({"info": {"role": "assistant", "model": {"modelID": "claude-4", "providerID": "anthropic"}, "tokens": {"input": 1}}}) is True + out = capsys.readouterr().out + assert "anthropic/claude-4" in out + + def test_renders_custom_agent_role(self, capsys): + r = MessageUpdatedRenderer(_ctx("plain")) + assert r.render({"info": {"role": "", "agent": "auditor", "tokens": {"input": 1}}}) is True + out = capsys.readouterr().out + assert "auditor" in out + + def test_returns_false_when_no_tokens_or_summary(self): + r = MessageUpdatedRenderer(_ctx("plain")) + assert r.render({"info": {"role": "assistant"}}) is False + + def test_renders_with_cost(self, capsys): + r = MessageUpdatedRenderer(_ctx("plain")) + assert r.render({"info": {"role": "assistant", "tokens": {"input": 10}, "cost": 0.005}}) is True + out = capsys.readouterr().out + assert "$0.0050" in out + + def test_renders_message_updated_rich(self): + r = MessageUpdatedRenderer(_ctx("rich")) + assert r.render({"info": {"role": "user", "summary": "test"}}) is True + + def test_reads_from_properties_fallback(self, capsys): + r = MessageUpdatedRenderer(_ctx("plain")) + assert r.render({"properties": {"info": {"role": "user", "summary": "test"}}}) is True + out = capsys.readouterr().out + assert "User" in out + + +# --------------------------------------------------------------------------- +# SubagentStatusRenderer +# --------------------------------------------------------------------------- + +class TestSubagentStatusRenderer: + def setup_method(self): + _reset_subagent_state() + + def teardown_method(self): + _reset_subagent_state() + + def test_renders_created_plain(self, capsys): + r = SubagentStatusRenderer(_ctx("plain", render_subagent_updates=True)) + assert r.render({"properties": {"statusType": "created", "sessionID": "s1", "title": "Job A"}}) is True + out = capsys.readouterr().out + assert "started" in out + assert "Job A" in out + + def test_renders_finished_plain(self, capsys): + r = SubagentStatusRenderer(_ctx("plain", render_subagent_updates=True)) + assert r.render({"properties": {"statusType": "finished", "sessionID": "s1", "title": "Job A"}}) is True + out = capsys.readouterr().out + assert "finished" in out + + def test_renders_heartbeat_plain(self, capsys): + r = SubagentStatusRenderer(_ctx("plain", render_subagent_updates=True)) + assert r.render({"properties": {"statusType": "heartbeat", "sessionID": "s1", "title": "Job A", "elapsedMs": 45000}}) is True + out = capsys.readouterr().out + assert "45s" in out + + def test_renders_updated_with_summary(self, capsys): + r = SubagentStatusRenderer(_ctx("plain", render_subagent_updates=True)) + assert r.render({"properties": {"statusType": "updated", "sessionID": "s1", "title": "Job A", "summary": {"additions": 3, "files": 2}}}) is True + out = capsys.readouterr().out + assert "+3" in out + assert "2 file(s)" in out + + def test_dedupes_identical_updates(self, capsys): + r = SubagentStatusRenderer(_ctx("plain", render_subagent_updates=True, subagent_update_throttle_s=5)) + event = {"properties": {"statusType": "updated", "sessionID": "s1", "title": "Job A", "summary": {"additions": 1}}} + assert r.render(event) is True + assert r.render(event) is False # Deduped + + def test_renders_when_summary_changes(self, capsys): + r = SubagentStatusRenderer(_ctx("plain", render_subagent_updates=True, subagent_update_throttle_s=5)) + assert r.render({"properties": {"statusType": "updated", "sessionID": "s1", "title": "Job A", "summary": {"additions": 1}}}) is True + assert r.render({"properties": {"statusType": "updated", "sessionID": "s1", "title": "Job A", "summary": {"additions": 2}}}) is True + + def test_disabled_by_settings(self): + r = SubagentStatusRenderer(_ctx("plain", render_subagent_updates=False)) + assert r.render({"properties": {"statusType": "created", "sessionID": "s1", "title": "Job"}}) is False + + def test_renders_subagent_rich(self): + r = SubagentStatusRenderer(_ctx("rich", render_subagent_updates=True)) + assert r.render({"properties": {"statusType": "created", "sessionID": "s1", "title": "Job"}}) is True + + +# --------------------------------------------------------------------------- +# UnknownEventRenderer +# --------------------------------------------------------------------------- + +class TestUnknownEventRenderer: + def test_renders_unknown_event_type(self, capsys): + r = UnknownEventRenderer(_ctx("plain")) + assert r.render({"type": "weird.event"}) is True + out = capsys.readouterr().out + assert "unknown event type: weird.event" in out + + def test_renders_unknown_part_type(self, capsys): + r = UnknownEventRenderer(_ctx("plain")) + assert r.render({"type": "message.part.updated", "part": {"type": "custom"}}) is True + out = capsys.readouterr().out + assert "unknown part type: custom" in out + + def test_includes_debug_json_when_enabled(self, capsys): + r = UnknownEventRenderer(_ctx("plain", debug_unknown_events=True)) + assert r.render({"type": "x", "extra": 1}) is True + out = capsys.readouterr().out + assert '"extra": 1' in out + + def test_omits_debug_json_when_disabled(self, capsys): + r = UnknownEventRenderer(_ctx("plain", debug_unknown_events=False)) + assert r.render({"type": "x", "extra": 1}) is True + out = capsys.readouterr().out + assert '"extra"' not in out + + def test_renders_unknown_rich(self): + r = UnknownEventRenderer(_ctx("rich")) + assert r.render({"type": "x"}) is True diff --git a/tools/rendering/events.py b/tools/rendering/events.py index 8510b51..933b313 100644 --- a/tools/rendering/events.py +++ b/tools/rendering/events.py @@ -29,6 +29,11 @@ _SUBAGENT_LAST_STATE: dict[str, tuple[dict[str, Any], float]] = {} +def _reset_subagent_state() -> None: + """Clear per-session dedup state. Call between tests or runs.""" + _SUBAGENT_LAST_STATE.clear() + + # --------------------------------------------------------------------------- # EventRenderer base # --------------------------------------------------------------------------- @@ -123,12 +128,16 @@ def render(self, event: dict[str, Any]) -> bool: class ToolUseEventRenderer(EventRenderer): event_types = ("tool_use",) + def __init__(self, context): + super().__init__(context) + from rendering.tools.base import FallbackToolRenderer + self._fallback = FallbackToolRenderer(context) + def render(self, event: dict[str, Any]) -> bool: part = event.get("part", {}) tool = str(part.get("tool", "unknown")) state = part.get("state", {}) if isinstance(part.get("state"), dict) else {} - from rendering.tools.base import FallbackToolRenderer - return FallbackToolRenderer(self.context).render(tool, state) + return self._fallback.render(tool, state) class StepFinishRenderer(EventRenderer): diff --git a/tools/run-agent.py b/tools/run-agent.py index 86adbc0..a8b8d56 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -78,6 +78,23 @@ def _get_rendering_ctx(console: Any) -> Any: settings=RenderSettings.from_env(), cache=SnapshotCache(), ) + # Pre-instantiate and cache event renderers so render_event() + # doesn't allocate on every SSE event. + from rendering import events as _evts + ctx._renderers = { + "server.connected": _evts.ServerConnectedRenderer(ctx), + "server.heartbeat": _evts.ServerHeartbeatRenderer(ctx), + "message.updated": _evts.MessageUpdatedRenderer(ctx), + "text": _evts.TextEventRenderer(ctx), + "reasoning": _evts.ReasoningEventRenderer(ctx), + "tool_use": _evts.ToolUseEventRenderer(ctx), + "step_finish": _evts.StepFinishRenderer(ctx), + "error": _evts.ErrorEventRenderer(ctx), + "session.status": _evts.SessionStatusRenderer(ctx), + "session.diff": _evts.SessionDiffRenderer(ctx), + "subagent.status": _evts.SubagentStatusRenderer(ctx), + "unknown": _evts.UnknownEventRenderer(ctx), + } _RENDERING_CTX_CACHE[mode] = ctx return ctx @@ -3682,46 +3699,15 @@ def render_error(console: Console, event: dict[str, Any]) -> None: def render_event(console: Console, phase: str, label: str, event: dict[str, Any]) -> None: event_type = event.get("type") ctx = _get_rendering_ctx(console) + renderers = getattr(ctx, "_renderers", {}) - if event_type == "server.connected": - from rendering.events import ServerConnectedRenderer - ServerConnectedRenderer(ctx).render(event) - elif event_type == "server.heartbeat": - from rendering.events import ServerHeartbeatRenderer - ServerHeartbeatRenderer(ctx).render(event) - elif event_type == "message.updated": - from rendering.events import MessageUpdatedRenderer - MessageUpdatedRenderer(ctx).render(event) - elif event_type == "step_start": + if event_type == "step_start": from rendering.events import StepStartRenderer StepStartRenderer(ctx, phase=phase, label=label).render(event) - elif event_type == "text": - from rendering.events import TextEventRenderer - TextEventRenderer(ctx).render(event) - elif event_type == "reasoning": - from rendering.events import ReasoningEventRenderer - ReasoningEventRenderer(ctx).render(event) - elif event_type == "tool_use": - from rendering.events import ToolUseEventRenderer - ToolUseEventRenderer(ctx).render(event) - elif event_type == "step_finish": - from rendering.events import StepFinishRenderer - StepFinishRenderer(ctx).render(event) - elif event_type == "error": - from rendering.events import ErrorEventRenderer - ErrorEventRenderer(ctx).render(event) - elif event_type == "session.status": - from rendering.events import SessionStatusRenderer - SessionStatusRenderer(ctx).render(event) - elif event_type == "session.diff": - from rendering.events import SessionDiffRenderer - SessionDiffRenderer(ctx).render(event) - elif event_type == "subagent.status": - from rendering.events import SubagentStatusRenderer - SubagentStatusRenderer(ctx).render(event) + elif event_type in renderers: + renderers[event_type].render(event) else: - from rendering.events import UnknownEventRenderer - UnknownEventRenderer(ctx).render(event) + renderers.get("unknown", UnknownEventRenderer(ctx)).render(event) def render_session_status(console: Console, event: dict[str, Any]) -> None: From cf7e11e119c50289c61a7cef8ad90fb8e3cb5145 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 00:45:19 +0200 Subject: [PATCH 19/65] fix: address Phase A3 PR review comments (round 3) 1. Fix ToolUseEventRenderer to dispatch to specific tool renderers (Read/Write/Edit/Command/...) instead of always using FallbackToolRenderer. Falls through to fallback only when the specific renderer returns False. 2. Fix missing closing paren in SubagentStatusRenderer plain heartbeat message. 3. Add regression test proving dataclasses.replace() correctly applies CLI tunable overrides to RenderSettings. 449 passed, 0 failed, 0 errors --- tests/test_rendering_context.py | 20 +++++++++++++++++ tools/rendering/events.py | 40 ++++++++++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/tests/test_rendering_context.py b/tests/test_rendering_context.py index bc24c7e..c89b1fd 100644 --- a/tests/test_rendering_context.py +++ b/tests/test_rendering_context.py @@ -36,3 +36,23 @@ def test_cache_is_shared(self): cache=cache, ) assert ctx.cache is cache + + def test_cli_overrides_reach_render_settings(self): + """dataclasses.replace() applies CLI tunable overrides correctly.""" + import dataclasses + settings = RenderSettings.from_env() + assert settings.read_display_lines == 10 + assert settings.write_content_lines == 25 + assert settings.write_diff_limit == 50 + assert settings.edit_diff_lines == 25 + + settings = dataclasses.replace(settings, + read_display_lines=42, + write_content_lines=7, + write_diff_limit=99, + edit_diff_lines=3, + ) + assert settings.read_display_lines == 42 + assert settings.write_content_lines == 7 + assert settings.write_diff_limit == 99 + assert settings.edit_diff_lines == 3 diff --git a/tools/rendering/events.py b/tools/rendering/events.py index 933b313..2615dea 100644 --- a/tools/rendering/events.py +++ b/tools/rendering/events.py @@ -137,6 +137,44 @@ def render(self, event: dict[str, Any]) -> bool: part = event.get("part", {}) tool = str(part.get("tool", "unknown")) state = part.get("state", {}) if isinstance(part.get("state"), dict) else {} + tool_lower = tool.strip().lower() + + # Route through specific tool renderers first. + renderer = None + if tool_lower == "todowrite": + from rendering.tools.todo import TodoRenderer + renderer = TodoRenderer(self.context) + elif tool_lower == "read": + from rendering.tools.read import ReadRenderer + renderer = ReadRenderer(self.context) + elif tool_lower == "write": + from rendering.tools.write import WriteRenderer + renderer = WriteRenderer(self.context) + elif tool_lower == "edit": + from rendering.tools.edit import EditRenderer + renderer = EditRenderer(self.context) + elif tool_lower in ("apply_patch", "applypatch", "apply-patch"): + from rendering.tools.apply_patch import ApplyPatchRenderer + renderer = ApplyPatchRenderer(self.context) + elif tool_lower == "glob": + from rendering.tools.glob import GlobRenderer + renderer = GlobRenderer(self.context) + elif tool_lower == "grep": + from rendering.tools.grep import GrepRenderer + renderer = GrepRenderer(self.context) + elif tool_lower == "bash": + from rendering.tools.command import CommandRenderer + renderer = CommandRenderer(self.context) + elif tool_lower == "skill": + from rendering.tools.skill import SkillRenderer + renderer = SkillRenderer(self.context) + elif tool_lower == "task": + from rendering.tools.task import TaskRenderer + renderer = TaskRenderer(self.context) + + if renderer is not None and renderer.render(tool, state): + return True + return self._fallback.render(tool, state) @@ -416,7 +454,7 @@ def _render_plain(self, status_type: str, title: str, summary, elapsed_ms) -> No self.sink.write_text(C.ok(f"[subagent] finished: {title}")) elif status_type == "heartbeat" and elapsed_ms is not None: elapsed_s = elapsed_ms // 1000 - self.sink.write_text(C.warn(f"\u23f3 Subagent \u00b7 {title} still running ({elapsed_s}s")) + self.sink.write_text(C.warn(f"\u23f3 Subagent \u00b7 {title} still running ({elapsed_s}s)")) elif status_type == "updated": summary_text = self._format_subagent_summary(summary) line = f"Subagent \u00b7 {title}" From 8d440917d37ed3450c8fb94f7a22e84dab9227c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 00:53:59 +0200 Subject: [PATCH 20/65] Fix two rendering issues from Phase A3 review 1. tools/rendering/utils.py: rename _ROUT_* constants to _ROOT_* - _ROUT_WORKSPACE_DOCS -> _ROOT_WORKSPACE_DOCS - _ROUT_WORKSPACE_CONFIGS -> _ROOT_WORKSPACE_CONFIGS - Fixes naming inconsistency with run-agent.py's _ROOT_WORKSPACE_DOCS 2. tools/run-agent.py: cache StepStartRenderer in ctx._renderers - Added 'step_start' to pre-cached renderers dict (line 91) - Updated render_event() to reuse cached instance with updated phase/label instead of allocating a new instance per event - All 61 rendering tests pass --- tools/rendering/utils.py | 8 ++++---- tools/run-agent.py | 11 +++++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/rendering/utils.py b/tools/rendering/utils.py index 620f30e..85b57a6 100644 --- a/tools/rendering/utils.py +++ b/tools/rendering/utils.py @@ -52,8 +52,8 @@ } _FINDING_FILENAME_RE = re.compile(r"^(CC-\d{4,})-(.+)\.md$") -_ROUT_WORKSPACE_DOCS = {"AGENTS.md", "README.md"} -_ROUT_WORKSPACE_CONFIGS = {"codecome.yml"} +_ROOT_WORKSPACE_DOCS = {"AGENTS.md", "README.md"} +_ROOT_WORKSPACE_CONFIGS = {"codecome.yml"} # --------------------------------------------------------------------------- @@ -179,9 +179,9 @@ def classify_internal_read(rel_path: str) -> str | None: if len(parts) == 1: name = parts[0] - if name in _ROUT_WORKSPACE_DOCS: + if name in _ROOT_WORKSPACE_DOCS: return f"reading workspace doc: {name}" - if name in _ROUT_WORKSPACE_CONFIGS: + if name in _ROOT_WORKSPACE_CONFIGS: return f"reading workspace config: {name}" return None diff --git a/tools/run-agent.py b/tools/run-agent.py index a8b8d56..b4f5809 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -88,6 +88,7 @@ def _get_rendering_ctx(console: Any) -> Any: "text": _evts.TextEventRenderer(ctx), "reasoning": _evts.ReasoningEventRenderer(ctx), "tool_use": _evts.ToolUseEventRenderer(ctx), + "step_start": _evts.StepStartRenderer(ctx), "step_finish": _evts.StepFinishRenderer(ctx), "error": _evts.ErrorEventRenderer(ctx), "session.status": _evts.SessionStatusRenderer(ctx), @@ -3702,8 +3703,14 @@ def render_event(console: Console, phase: str, label: str, event: dict[str, Any] renderers = getattr(ctx, "_renderers", {}) if event_type == "step_start": - from rendering.events import StepStartRenderer - StepStartRenderer(ctx, phase=phase, label=label).render(event) + renderer = renderers.get("step_start") + if renderer: + renderer.phase = phase + renderer.label = label + renderer.render(event) + else: + from rendering.events import StepStartRenderer + StepStartRenderer(ctx, phase=phase, label=label).render(event) elif event_type in renderers: renderers[event_type].render(event) else: From f8146bd95d3c998186e2cf1ad3565192772d50c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 01:55:11 +0200 Subject: [PATCH 21/65] refactor(phase-a5): extract runner helpers to codecome/runner.py Move _consume_events() and _run_single_attempt() from run-agent.py to codecome/runner.py. The runner functions accept render_event_fn and emit_fatal_error_fn as explicit callable parameters rather than importing from codecome.cli or depending on module-level globals. run-agent.py imports the runner functions lazily (inside main()'s loop) to avoid import-time coupling with dynamic-module tests. Tests updated to monkeypatch codecome.runner instead of the run-agent.py module for these two functions. 144 passed, 0 failed, 0 errors (test_run_agent suite) --- opencode.json | 2 +- tests/test_run_agent.py | 60 +++++++++++++----- tools/codecome/runner.py | 130 +++++++++++++++++++++++++++++++++++++++ tools/run-agent.py | 120 +----------------------------------- 4 files changed, 178 insertions(+), 134 deletions(-) create mode 100644 tools/codecome/runner.py diff --git a/opencode.json b/opencode.json index a117cbd..f4596c4 100644 --- a/opencode.json +++ b/opencode.json @@ -21,7 +21,7 @@ "test": { "type": "openai", "options": { - "baseURL": "http://127.0.0.1:61167/v1", + "baseURL": "http://127.0.0.1:50400/v1", "apiKey": "sk-test" }, "models": { diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 977afde..bfc0ac1 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -71,6 +71,8 @@ def test_strip_probe_unsafe_flags_removes_session_and_continue_flags(): @pytest.mark.unit def test_resolve_model_and_variant_precedence(monkeypatch): + import sys + sys.path.insert(0, str(ROOT / "tools")) import codecome.config as _cfg monkeypatch.setenv("CODECOME_MODEL", "env/model") monkeypatch.setenv("CODECOME_MODEL_VARIANT", "max") @@ -1509,13 +1511,20 @@ def test_auto_correction_resume_loops_back_via_popen(monkeypatch, tmp_path): monkeypatch.setattr(module, "check_opencode_version", lambda: None) monkeypatch.setattr(module, "ROOT", tmp_path) + import sys + sys.path.insert(0, str(ROOT / "tools")) + if "codecome" in sys.modules and not hasattr(sys.modules["codecome"], "__path__"): + del sys.modules["codecome"] + import codecome.runner as _runner + # Reset the attempt counter so transcript numbering is deterministic. - if hasattr(module._run_single_attempt, "_attempt_counter"): - delattr(module._run_single_attempt, "_attempt_counter") + if hasattr(_runner._run_single_attempt, "_attempt_counter"): + delattr(_runner._run_single_attempt, "_attempt_counter") calls: list[tuple] = [] - def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, base_url, auth_token, workspace_dir, existing_session_id=None): + def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, base_url, auth_token, workspace_dir, **kwargs): + existing_session_id = kwargs.get("existing_session_id") calls.append((existing_session_id, prompt)) # Both attempts succeed with the same session. return ( @@ -1531,7 +1540,7 @@ def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, tmp_path / f"transcript-{len(calls)}.jsonl", ) - monkeypatch.setattr(module, "_run_single_attempt", fake_run_single_attempt) + monkeypatch.setattr(_runner, "_run_single_attempt", fake_run_single_attempt) frontmatter_call_count = [0] @@ -1577,10 +1586,16 @@ def test_frontmatter_failure_without_session_id_exits_nonzero(monkeypatch, tmp_p monkeypatch.setattr(module, "check_opencode_version", lambda: None) monkeypatch.setattr(module, "ROOT", tmp_path) - if hasattr(module._run_single_attempt, "_attempt_counter"): - delattr(module._run_single_attempt, "_attempt_counter") + import sys + sys.path.insert(0, str(ROOT / "tools")) + if "codecome" in sys.modules and not hasattr(sys.modules["codecome"], "__path__"): + del sys.modules["codecome"] + import codecome.runner as _runner + + if hasattr(_runner._run_single_attempt, "_attempt_counter"): + delattr(_runner._run_single_attempt, "_attempt_counter") - def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, base_url, auth_token, workspace_dir, existing_session_id=None): + def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, base_url, auth_token, workspace_dir, **kwargs): return ( 0, "", # empty session ID @@ -1592,7 +1607,7 @@ def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, tmp_path / "transcript.jsonl", ) - monkeypatch.setattr(module, "_run_single_attempt", fake_run_single_attempt) + monkeypatch.setattr(_runner, "_run_single_attempt", fake_run_single_attempt) class FakeResult: def __init__(self, rc, out="", err=""): @@ -1628,12 +1643,19 @@ def test_iteration_limit_triggers_auto_resume(monkeypatch, tmp_path): monkeypatch.setattr(module, "ROOT", tmp_path) monkeypatch.setenv("CODECOME_MAX_ITERATION_RETRIES", "1") - if hasattr(module._run_single_attempt, "_attempt_counter"): - delattr(module._run_single_attempt, "_attempt_counter") + import sys + sys.path.insert(0, str(ROOT / "tools")) + if "codecome" in sys.modules and not hasattr(sys.modules["codecome"], "__path__"): + del sys.modules["codecome"] + import codecome.runner as _runner + + if hasattr(_runner._run_single_attempt, "_attempt_counter"): + delattr(_runner._run_single_attempt, "_attempt_counter") calls: list[tuple] = [] - def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, base_url, auth_token, workspace_dir, existing_session_id=None): + def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, base_url, auth_token, workspace_dir, **kwargs): + existing_session_id = kwargs.get("existing_session_id") calls.append((existing_session_id, prompt)) return ( 0, @@ -1646,7 +1668,7 @@ def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, tmp_path / f"transcript-{len(calls)}.jsonl", ) - monkeypatch.setattr(module, "_run_single_attempt", fake_run_single_attempt) + monkeypatch.setattr(_runner, "_run_single_attempt", fake_run_single_attempt) monkeypatch.setattr(module, "check_phase_graceful_completion", lambda *a, **kw: False) class FakeResult: @@ -1803,10 +1825,16 @@ def test_stream_session_id_and_step_finish_count(monkeypatch, tmp_path): monkeypatch.setattr(module, "check_opencode_version", lambda: None) monkeypatch.setattr(module, "ROOT", tmp_path) - if hasattr(module._run_single_attempt, "_attempt_counter"): - delattr(module._run_single_attempt, "_attempt_counter") + import sys + sys.path.insert(0, str(ROOT / "tools")) + if "codecome" in sys.modules and not hasattr(sys.modules["codecome"], "__path__"): + del sys.modules["codecome"] + import codecome.runner as _runner + + if hasattr(_runner._run_single_attempt, "_attempt_counter"): + delattr(_runner._run_single_attempt, "_attempt_counter") - def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, base_url, auth_token, workspace_dir, existing_session_id=None): + def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, base_url, auth_token, workspace_dir, **kwargs): return ( 0, "ses_stream_test_001", @@ -1819,7 +1847,7 @@ def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, tmp_path / "transcript.jsonl", ) - monkeypatch.setattr(module, "_run_single_attempt", fake_run_single_attempt) + monkeypatch.setattr(_runner, "_run_single_attempt", fake_run_single_attempt) class FakeResult: def __init__(self, rc, out="", err=""): diff --git a/tools/codecome/runner.py b/tools/codecome/runner.py new file mode 100644 index 0000000..761198f --- /dev/null +++ b/tools/codecome/runner.py @@ -0,0 +1,130 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""Phase runner helpers: SSE event consumption and single-attempt orchestration.""" + +from __future__ import annotations + +import argparse +import json +import os +import sys +import threading +from pathlib import Path +from typing import Any + +sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + +import _colors as C +from events import EventLoop, RunResult +from codecome.session import create_session, send_prompt_to_session +from codecome.transcript import open_phase_transcript, close_transcript + +ROOT = Path(__file__).resolve().parents[2] + + +def _consume_events( + base_url: str, + session_id: str, + console: Any, + phase: str, + label: str, + args: argparse.Namespace, + transcript_fp: Any | None, + thinking_on: bool, + auth_token: str | None, + workspace_dir: str | None, + render_event_fn, # required: run-agent's render_event dispatcher +) -> RunResult: + event_loop = EventLoop( + base_url=base_url, + session_id=session_id, + console=console, + phase=phase, + label=label, + auth_token=auth_token, + workspace_dir=workspace_dir, + ) + + def _render_and_log(console_: Any, phase_: str, label_: str, event: dict[str, Any]) -> None: + if transcript_fp is not None: + try: + transcript_fp.write(json.dumps(event) + "\n") + except OSError: + pass + if args.debug: + sys.stderr.write(json.dumps(event) + "\n") + sys.stderr.flush() + if not thinking_on and event.get("type") == "reasoning": + return + render_event_fn(console_, phase_, label_, event) + + return event_loop.run(_render_and_log) + + +def _run_single_attempt( + args: argparse.Namespace, + console: Any, + prompt: str, + model: str | None, + variant: str | None, + thinking_on: bool, + base_url: str, + auth_token: str | None, + workspace_dir: str | None, + render_event_fn, # required: run-agent's render_event dispatcher + emit_fatal_error_fn=None, # type: ignore + existing_session_id: str | None = None, +) -> tuple[int, str, RunResult, Path]: + + transcript_fp = None + try: + transcript_path, transcript_fp = open_phase_transcript(str(args.phase), args.finding) + except OSError as exc: + finding_tag = (args.finding or "no-finding").replace("/", "_") + transcript_path = ROOT / "tmp" / f"last-phase-{args.phase}-{finding_tag}-attempt-N.jsonl" + try: + console.print("warning: could not open transcript ", transcript_path, ": ", exc) + except AttributeError: + print(C.warn(f"warning: could not open transcript {transcript_path}: {exc}")) + + try: + if existing_session_id: + session_id = existing_session_id + else: + session_id = create_session(base_url, str(args.phase), args.agent, model, auth_token, workspace_dir) + + run_result_box: dict[str, Any] = {} + consume_error_box: dict[str, Exception] = {} + + def _consume() -> None: + try: + run_result_box["result"] = _consume_events( + base_url, session_id, console, + str(args.phase), str(args.label), args, + transcript_fp, thinking_on, + auth_token, workspace_dir, + render_event_fn=render_event_fn, + ) + except Exception as exc: # noqa: BLE001 + consume_error_box["error"] = exc + + consumer = threading.Thread(target=_consume, name=f"codecome-events-{session_id}", daemon=True) + consumer.start() + + send_prompt_to_session(base_url, session_id, prompt, args.agent, model, variant, auth_token, workspace_dir) + consumer.join() + + if "error" in consume_error_box: + raise consume_error_box["error"] + run_result = run_result_box.get("result") + if not isinstance(run_result, RunResult): + raise RuntimeError("Event loop ended without a RunResult") + except Exception as exc: + if emit_fatal_error_fn: + emit_fatal_error_fn(console, "Server Error", str(exc)) + return 1, existing_session_id or "", RunResult(), transcript_path + finally: + close_transcript(transcript_fp) + + return 0, session_id, run_result, transcript_path diff --git a/tools/run-agent.py b/tools/run-agent.py index b4f5809..994e779 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -3845,123 +3845,6 @@ def build_parser() -> argparse.ArgumentParser: return parser -def _consume_events( - base_url: str, - session_id: str, - console: Any, - phase: str, - label: str, - args: argparse.Namespace, - transcript_fp: Any | None, - thinking_on: bool, - auth_token: str | None, - workspace_dir: str | None, -) -> RunResult: - """Create an EventLoop, consume SSE until idle, and return RunResult.""" - event_loop = EventLoop( - base_url=base_url, - session_id=session_id, - console=console, - phase=phase, - label=label, - auth_token=auth_token, - workspace_dir=workspace_dir, - ) - - def _render_and_log(console_: Any, phase_: str, label_: str, event: dict[str, Any]) -> None: - if transcript_fp is not None: - try: - transcript_fp.write(json.dumps(event) + "\n") - except OSError: - pass - if args.debug: - sys.stderr.write(json.dumps(event) + "\n") - sys.stderr.flush() - if not thinking_on and event.get("type") == "reasoning": - return - render_event(console_, phase_, label_, event) - - return event_loop.run(_render_and_log) - - -def _run_single_attempt( - args: argparse.Namespace, - console: Any, - prompt: str, - model: str | None, - variant: str | None, - thinking_on: bool, - base_url: str, - auth_token: str | None, - workspace_dir: str | None, - existing_session_id: str | None = None, -) -> tuple[int, str, RunResult, Path]: - """Run or resume a single phase attempt via opencode serve. - - If existing_session_id is provided, reuses that session (resume). - Otherwise creates a new session. - - Returns (returncode, session_id, run_result, transcript_path). - """ - transcript_fp = None - try: - transcript_path, transcript_fp = open_phase_transcript(str(args.phase), args.finding) - except OSError as exc: - # Reconstruct the path the helper would have produced so the - # warning still names the right file. - finding_tag = (args.finding or "no-finding").replace("/", "_") - transcript_path = ROOT / "tmp" / f"last-phase-{args.phase}-{finding_tag}-attempt-N.jsonl" - if HAVE_RICH: - console.print(Text(f"warning: could not open transcript {transcript_path}: {exc}", style="yellow")) - else: - print(C.warn(f"warning: could not open transcript {transcript_path}: {exc}")) - - try: - if existing_session_id: - session_id = existing_session_id - else: - session_id = create_session(base_url, str(args.phase), args.agent, model, auth_token, workspace_dir) - - run_result_box: dict[str, Any] = {} - consume_error_box: dict[str, Exception] = {} - - def _consume() -> None: - try: - run_result_box["result"] = _consume_events( - base_url, - session_id, - console, - str(args.phase), - str(args.label), - args, - transcript_fp, - thinking_on, - auth_token, - workspace_dir, - ) - except Exception as exc: # noqa: BLE001 - consume_error_box["error"] = exc - - consumer = threading.Thread(target=_consume, name=f"codecome-events-{session_id}", daemon=True) - consumer.start() - - send_prompt_to_session(base_url, session_id, prompt, args.agent, model, variant, auth_token, workspace_dir) - consumer.join() - - if "error" in consume_error_box: - raise consume_error_box["error"] - run_result = run_result_box.get("result") - if not isinstance(run_result, RunResult): - raise RuntimeError("Event loop ended without a RunResult") - except Exception as exc: - _emit_fatal_error(console, "Server Error", str(exc)) - return 1, existing_session_id or "", RunResult(), transcript_path - finally: - close_transcript(transcript_fp) - - return 0, session_id, run_result, transcript_path - - def _emit_fatal_error(console: Any, title: str, message: str) -> None: """Show fatal startup/runtime errors in the UI and on stderr.""" formatted = C.fail(f"{title}: {message}") @@ -4768,9 +4651,12 @@ def _forward_signal(signum: int, _frame: Any) -> None: try: while True: attempt_number += 1 + from codecome.runner import _run_single_attempt returncode, session_id, run_result, transcript_path = _run_single_attempt( args, console, prompt, model, variant, thinking_on, base_url, server_info.password, str(ROOT), + render_event_fn=render_event, + emit_fatal_error_fn=_emit_fatal_error, existing_session_id=last_session_id or None ) From a3d3bf86c1af476d4244abc823184054673da129 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 02:09:32 +0200 Subject: [PATCH 22/65] refactor(phase-a4): extract chat TUI to tools/chat/ package MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create canonical chat/ package with three modules: - chat/debug.py — _chat_debug(), _setup_chat_debug(), _close_chat_debug() - chat/app.py — TextualConsoleProxy, _QuitScreen, _ChatApp, _chat_render_and_log, _chat_update_modeline_info - chat/harness.py — _run_chat_mode() with all server/session wiring Old code preserved in run-agent.py for backward compatibility. run-agent's main() now delegates to chat.harness._run_chat_mode for --chat mode. The chat/harness module imports build_console and _emit_fatal_error from run-agent via importlib (transitional). 31 chat tests passed, 0 failed --- tools/chat/__init__.py | 27 ++ tools/chat/app.py | 595 +++++++++++++++++++++++++++++++++++++++++ tools/chat/debug.py | 55 ++++ tools/chat/harness.py | 160 +++++++++++ tools/run-agent.py | 8 +- 5 files changed, 844 insertions(+), 1 deletion(-) create mode 100644 tools/chat/__init__.py create mode 100644 tools/chat/app.py create mode 100644 tools/chat/debug.py create mode 100644 tools/chat/harness.py diff --git a/tools/chat/__init__.py b/tools/chat/__init__.py new file mode 100644 index 0000000..28fc322 --- /dev/null +++ b/tools/chat/__init__.py @@ -0,0 +1,27 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Chat package: Textual-based interactive chat TUI for CodeCome. + +Provides: + - chat.debug: chat-specific debug logging helpers. + - chat.app: Textual UI classes (ChatApp, QuitScreen, TextualConsoleProxy). + - chat.harness: chat-mode entry point (_run_chat_mode). +""" + +from __future__ import annotations + +from chat.debug import _setup_chat_debug, _chat_debug, _close_chat_debug +from chat.app import ChatApp, QuitScreen, TextualConsoleProxy +from chat.harness import _run_chat_mode + +__all__ = [ + "_setup_chat_debug", + "_chat_debug", + "_close_chat_debug", + "ChatApp", + "QuitScreen", + "TextualConsoleProxy", + "_run_chat_mode", +] diff --git a/tools/chat/app.py b/tools/chat/app.py new file mode 100644 index 0000000..cb941b5 --- /dev/null +++ b/tools/chat/app.py @@ -0,0 +1,595 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Chat app: Textual-based interactive chat TUI classes. + +Provides: + - TextualConsoleProxy: RichLog bridge for background-thread console output. + - ChatApp / QuitScreen: module-level type hints (real classes set after try/except). + - _chat_render_and_log / _chat_update_modeline_info: standalone helpers, + callable without Textual (for testing parity). + - _QuitScreen: quit confirmation modal. + - _ChatApp: the Textual App. +""" + +from __future__ import annotations + +import json +import os +import sys +from pathlib import Path +from typing import Any + +sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + +from chat.debug import _chat_debug # noqa: E402 +import importlib as _importlib # noqa: E402 +_run_agent = _importlib.import_module("run-agent") +render_event = _run_agent.render_event + +# --------------------------------------------------------------------------- +# Rich imports — same fallback pattern as run-agent.py +# --------------------------------------------------------------------------- + +try: + from rich.console import Console, Group + from rich.panel import Panel + from rich.rule import Rule + from rich.text import Text + + HAVE_RICH = True +except ImportError: # pragma: no cover + Console = Any # type: ignore[assignment] + Group = tuple # type: ignore[assignment] + Panel = None # type: ignore[assignment] + Rule = None # type: ignore[assignment] + Text = None # type: ignore[assignment] + HAVE_RICH = False + +# --------------------------------------------------------------------------- +# Module-level type hints — real classes set by the try/except block below. +# --------------------------------------------------------------------------- + +ChatApp: Any = None +QuitScreen: Any = None + + +# --------------------------------------------------------------------------- +# TextualConsoleProxy — RichLog bridge (outside try/except; no Textual imports needed) +# --------------------------------------------------------------------------- + +class TextualConsoleProxy: + """Bridge Rich Console.print() calls to a Textual RichLog widget. + + Thread-safe: main-thread calls write directly to RichLog; background- + thread calls post a RenderMessage which is dispatched on the main + thread by the @on(RenderMessage) handler. This is the pattern from + Textual docs (post_message is thread-safe). + """ + + def __init__(self, rich_log, app): + self.rich_log = rich_log + self.app = app + self.encoding = "utf-8" + + def print(self, *args, **kwargs): + if not args: + from rich.text import Text + + self._write(Text()) + return + if len(args) == 1: + self._write(args[0]) + else: + from rich.console import Group + + self._write(Group(*args)) + + def _write(self, renderable): + import threading + + if threading.current_thread() is threading.main_thread(): + _chat_debug("TextualConsoleProxy._write: main thread, direct write") + self.rich_log.write(renderable) + else: + _chat_debug("TextualConsoleProxy._write: bg thread, post_message(RenderMessage)") + self.app.post_message(self.app.RenderMessage(renderable)) + + +# --------------------------------------------------------------------------- +# Standalone chat-app methods — available even when Textual is not +# installed, so that tests can exercise _render_and_log parity without +# launching a real TUI. +# --------------------------------------------------------------------------- + +def _chat_render_and_log(self, console, phase, label, event): + """Standalone version of _ChatApp._render_and_log. See the docstring + on the class for the full contract.""" + if getattr(self, "transcript_fp", None) is not None: + try: + self.transcript_fp.write(json.dumps(event) + "\n") + except OSError: + pass + if getattr(self, "args", None) is not None and getattr(self.args, "debug", False): + _chat_debug(f"_render_and_log: raw event: {json.dumps(event)}") + if event.get("type") == "message.updated": + _chat_update_modeline_info(self, event) + if not getattr(self, "thinking_on", True) and event.get("type") == "reasoning": + return + render_event(console, phase, label, event) + + +def _chat_update_modeline_info(self, event: dict[str, Any]) -> None: + """Standalone version of _ChatApp._update_modeline_info.""" + info = event.get("info") + if not isinstance(info, dict): + props = event.get("properties", {}) + info = props.get("info", {}) if isinstance(props, dict) else {} + if not isinstance(info, dict): + return + if info.get("role") != "assistant": + return + model_id = str(info.get("modelID", "")).strip() + provider_id = str(info.get("providerID", "")).strip() + if not model_id: + mdl = info.get("model", {}) + if isinstance(mdl, dict): + model_id = str(mdl.get("modelID", "")).strip() + provider_id = str(mdl.get("providerID", "")).strip() + model_label = f"{provider_id}/{model_id}" if provider_id and model_id else (model_id or "\u2026") + tokens = info.get("tokens", {}) + if isinstance(tokens, dict): + _in = tokens.get("input", 0) + _out = tokens.get("output", 0) + token_str = f"\u2191{_in} \u2193{_out}" + else: + token_str = "" + cost = info.get("cost", 0) or 0 + cost_str = f" ${cost:.4f}" if cost else "" + getattr(self, "_modeline_info", "") + try: + self._modeline_info = f"{model_label} | {token_str}{cost_str}" + except AttributeError: + pass + + +# --------------------------------------------------------------------------- +# Textual classes — guarded by import, matching run-agent.py pattern +# --------------------------------------------------------------------------- + +try: + from textual import on, work + from textual.app import App, ComposeResult + from textual.message import Message + from textual.widgets import RichLog, Input, Footer, Static, Button, Label + from textual.binding import Binding + from textual.containers import Grid, Horizontal + from textual.screen import ModalScreen + + class _QuitScreen(ModalScreen[bool]): + CSS = """ + _QuitScreen { + align: center middle; + } + #quit-dialog { + grid-size: 2; + grid-gutter: 1 2; + grid-rows: 1fr 3; + padding: 0 1; + width: 60; + height: 11; + border: thick $background 80%; + background: $surface; + } + #quit-question { + column-span: 2; + height: 1fr; + width: 1fr; + content-align: center middle; + } + Button { + width: 100%; + } + """ + + def compose(self) -> ComposeResult: + yield Grid( + Label("Are you sure you want to quit?", id="quit-question"), + Button("Quit", id="quit-confirm", variant="error"), + Button("Cancel", id="quit-cancel", variant="primary"), + id="quit-dialog", + ) + + def on_button_pressed(self, event: Button.Pressed) -> None: + self.dismiss(event.button.id == "quit-confirm") + + class _ChatApp(App): + """Interactive chat harness — final design (post-bisection). + + Design follows Textual docs (https://textual.textualize.io/guide/workers): + + * The SSE consumer runs in a raw daemon thread (started via + chat_loop.start_consumer). Textual's @work(thread=True) is + reserved for short-lived blocking tasks (the docs' weather- + app pattern); using it for an infinite consumer loop froze + the main event loop in our environment (Textual 8.2.6 / + Python 3.14). + + * All UI updates from background threads (renderables AND + state markers AND errors) go through ONE one-argument + Message subclass (RenderMessage(renderable)) and ONE @on + handler that just calls rich_log.write. post_message is + documented as thread-safe. Bisection found that any + departure from this exact shape (adding a second Message + subclass, renaming it, adding optional fields, or even + adding a second set_interval callback) silently freezes + Textual's message dispatch on this version, even though + the same patterns work in isolated repros. We don't + understand the root cause; staying inside this working + envelope is the pragmatic path forward. + + * _render_and_log mirrors phase mode's behaviour exactly + (parity with non-interactive runs). Per-event side effects: + persist to the transcript jsonl, mirror raw JSON to the + chat-debug log when --debug is set, suppress 'reasoning' + when thinking is off, then delegate to the SAME + render_event() dispatcher non-chat uses. No chat-specific + filters or markers — `render_session_status` already + prints `session status: busy/idle` and that's the only + state cue we surface. We do NOT toggle the Input widget's + enabled/placeholder state, because doing that required a + second set_interval poller which broke dispatch in our + bisection. The Input stays enabled at all times. + + * Errors from @work workers post a red Panel renderable via + _post_error_renderable() — same RenderMessage path. + + * Short-lived HTTP calls (initial prompt, user prompt send) + run as @work(thread=True) workers — the canonical docs + pattern (matches the weather-app example). + + * The transcript jsonl is opened in _run_chat_mode and the + file handle is passed in via the `transcript_fp` constructor + argument; _render_and_log writes one JSON line per SSE + event to it (parity with phase mode). + + * A set_interval(1.0) heartbeat continuously logs a debug + tick from the main thread and also updates the bottom-bar + status line (modeline) with live token usage and an + activity pulse. The modeline data is fed by + _render_and_log on every message.updated event. + """ + + CSS = """ + RichLog { + height: 1fr; + border-bottom: solid green; + background: black; + } + Input { + height: 3; + } + #bottom-bar { + dock: bottom; + height: 1; + background: $footer-background; + } + #status-left { + width: auto; + min-width: 26; + height: 1; + padding: 0 1; + color: $footer-foreground; + background: $footer-background; + } + #footer-right { + width: 1fr; + height: 1; + } + Footer { + dock: none; + } + """ + + # Ctrl+S toggles Textual's mouse capture so the user can use the + # terminal's native mouse selection (which produces system-clipboard + # copy via the terminal emulator). RichLog has no in-app selection + # support upstream, so terminal-native selection is the supported + # path. See .project/chat-mode-textual-postmortem.md §4 / §12. + BINDINGS = [ + Binding("ctrl+c", "request_quit", "Quit"), + Binding("ctrl+s", "toggle_mouse_for_select", "Select mode"), + ] + + class RenderMessage(Message): + """Single thread-safe message type — carries a Rich renderable + to be written to the RichLog on the main thread. + + Bisection showed that extending this class with optional + fields (`state`, `detail`) silently breaks Textual's message + dispatch on this version (Textual 8.2.6 / Python 3.14), even + though the same pattern works in isolation. Whatever the + root cause, we keep this class strictly one-argument + (positional, `renderable`) and use a thread-safe pending-state + slot + main-thread polling timer for idle/busy/error + transitions instead. + """ + + def __init__(self, renderable): + super().__init__() + self.renderable = renderable + + def __init__(self, server_info=None, session_id=None, initial_prompt="", args=None, rich_console=None, model=None, variant=None, thinking_on=None, transcript_fp=None): + super().__init__() + self.server_info = server_info + self.session_id = session_id + self.initial_prompt = initial_prompt + self.args = args + self.rich_console = rich_console + self.model = model + self.variant = variant + self.thinking_on = thinking_on + self.transcript_fp = transcript_fp + self.chat_loop = None + self.console_proxy = None + self.rich_log = None + self.chat_input = None + self.modeline = None + self._heartbeat_count = 0 + # Updated by _render_and_log (consumer thread) on every + # message.updated event. Read by _heartbeat (main thread) + # to drive the status-line in the bottom bar. + self._modeline_info = "" + # Tracks Ctrl+S terminal-select mode. When True, Textual mouse + # handling is disabled so the terminal emulator's native mouse + # selection works (which copies to the system clipboard via the + # terminal itself). Default off (Textual mouse handling on). + self._terminal_select_mode = False + + def compose(self) -> ComposeResult: + yield RichLog(id="log", markup=False, auto_scroll=True) + yield Input(id="chat_input", placeholder="Type a message and press Enter...") + with Horizontal(id="bottom-bar"): + yield Static("ready", id="status-left") + yield Footer(id="footer-right") + + def on_mount(self) -> None: + _chat_debug("on_mount: entering") + self.rich_log = self.query_one(RichLog) + self.chat_input = self.query_one(Input) + self.modeline = self.query_one("#status-left", Static) + self.console_proxy = TextualConsoleProxy(self.rich_log, self) + _chat_debug("on_mount: proxy created") + + # Set initial modeline with model/agent info. + provider = (self.model or "").split("/", 1)[0] if self.model else "" + _model_id = (self.model or "").split("/", 1)[1] if self.model and "/" in self.model else (self.model or "\u2026") + model_label = f"{provider}/{_model_id}" if provider else _model_id + self.modeline.update(f"\u25cf | {model_label} | ready") + + # Heartbeat canary — fires every 1s on the main thread. Helpful + # in the debug log to confirm the event loop is alive. + self.set_interval(1.0, self._heartbeat) + _chat_debug("on_mount: heartbeat installed") + + # Write banner (main thread, direct write). + if HAVE_RICH: + from rich.rule import Rule + + self.rich_log.write(Rule(title="Chat: Interactive Harness", style="bold cyan"), expand=True) + model_label = self.model or "(unknown)" + variant_label = self.variant or "(unknown)" + parts = [f"agent={self.args.agent if self.args else '?'}", f"model={model_label}"] + if self.variant is not None: + parts.append(f"variant={variant_label}") + parts.append(f"thinking={'on' if self.thinking_on else 'off'}") + self.rich_log.write(Text(" ".join(parts), style="dim"), expand=True) + # Hint about selection: RichLog doesn't support in-app + # mouse selection upstream; document the terminal-native + # path so users can copy output. + self.rich_log.write( + Text( + "Tip: hold Option/Alt (macOS) or Shift (most terminals) " + "while dragging to select text, or press Ctrl+S to toggle " + "terminal-select mode (disables Textual mouse).", + style="dim italic", + ), + expand=True, + ) + _chat_debug("on_mount: banner written") + + # Construct the chat event loop. + from events.chat_loop import ChatEventLoop + + _chat_debug("on_mount: creating ChatEventLoop") + self.chat_loop = ChatEventLoop( + base_url=self.server_info.base_url, + session_id=self.session_id, + console=self.console_proxy, + auth_token=self.server_info.password, + workspace_dir=str(Path(__file__).resolve().parents[2]), + debug=_chat_debug if self.args and self.args.debug else None, + ) + + # Raw daemon thread — the SSE consumer. + _chat_debug("on_mount: starting SSE consumer (raw daemon thread)") + self.chat_loop.start_consumer(self._render_and_log) + _chat_debug("on_mount: consumer thread started") + + # Initial prompt: send via worker but don't echo the full text. + # The prompt comes from prompts/chat-initial.md (bootstrap + # instructions for the agent, not something the user typed). + # The SSE stream will emit a dim `> User` summary line once the + # daemon acknowledges the message, matching subsequent prompts. + if self.initial_prompt: + self.rich_log.write(Text("(initializing session\u2026)", style="bold cyan"), expand=True) + _chat_debug(f"on_mount: spawning initial-prompt worker ({len(self.initial_prompt)} chars)") + self._send_initial_prompt(self.initial_prompt) + + _chat_debug("on_mount: done") + + # --- Main-thread heartbeat canary --- + + def _heartbeat(self) -> None: + self._heartbeat_count += 1 + _chat_debug(f"_heartbeat: tick #{self._heartbeat_count} (main loop alive)") + + # Update the bottom-bar status line (modeline) with live + # token usage and an activity pulse. _modeline_info is + # written by _render_and_log on the consumer thread on + # every message.updated event; we read it here atomically. + pulse = "\u25cf" if self._heartbeat_count % 2 else "\u25cc" + sel_tag = " [SEL]" if self._terminal_select_mode else "" + info = self._modeline_info or "" + if info: + text = f"{pulse}{sel_tag} | {info}" + else: + provider = (self.model or "").split("/", 1)[0] if self.model else "" + _model_id = (self.model or "").split("/", 1)[1] if self.model and "/" in self.model else (self.model or "\u2026") + model_label = f"{provider}/{_model_id}" if provider else _model_id + text = f"{pulse}{sel_tag} | {model_label} | idle" + self.modeline.update(text) + + # --- Textual workers (@work(thread=True)) — short-lived only --- + + @work(thread=True) + def _send_initial_prompt(self, text) -> None: + """Send the initial prompt in a Textual-managed thread.""" + _chat_debug("_send_initial_prompt: worker started") + try: + self.chat_loop.send_prompt( + text, + self.args.agent if self.args else "auditor", + self.model, + self.variant, + ) + _chat_debug("_send_initial_prompt: sent") + except Exception as exc: + _chat_debug(f"_send_initial_prompt: error: {exc}") + self._post_error_renderable(f"Failed to send initial prompt: {exc}") + + @work(thread=True) + def _send_prompt(self, text) -> None: + """Send a user prompt in a Textual-managed thread.""" + _chat_debug(f"_send_prompt: worker posting text len={len(text)}") + try: + self.chat_loop.send_prompt( + text, + self.args.agent if self.args else "auditor", + self.model, + self.variant, + ) + _chat_debug("_send_prompt: sent") + except Exception as exc: + _chat_debug(f"_send_prompt: error: {exc}") + self._post_error_renderable(f"Failed to send: {exc}") + + def _post_error_renderable(self, detail: str) -> None: + """Helper callable from any thread. Posts a RenderMessage + carrying a red error panel — sent through the same single + RenderMessage(renderable) path as everything else.""" + from rich.panel import Panel + + panel = Panel(Text(detail, style="bold red"), title="Chat Error", border_style="red") + self.post_message(self.RenderMessage(panel)) + + # --- Message handler (run on main thread). Single handler, + # single Message subclass — see RenderMessage docstring. + + @on(RenderMessage) + def _on_render_message(self, message: RenderMessage) -> None: + if self.rich_log is not None: + self.rich_log.write(message.renderable, expand=True) + + # --- Consumer-thread callback --- + + def _render_and_log(self, console, phase, label, event): + _chat_render_and_log(self, console, phase, label, event) + + def _update_modeline_info(self, event: dict[str, Any]) -> None: + _chat_update_modeline_info(self, event) + + # --- UI actions --- + + def action_request_quit(self) -> None: + def finish_quit(confirmed): + if confirmed: + self.exit() + + self.push_screen(_QuitScreen(), finish_quit) + + def action_toggle_mouse_for_select(self) -> None: + """Toggle terminal-native mouse selection mode (Ctrl+S). + + RichLog has no upstream support for in-app mouse text + selection. As a pragmatic alternative, this action toggles + Textual's mouse reporting off so the terminal emulator's + native mouse selection takes over (which copies to the + system clipboard via the terminal itself). + + When off (default): Textual handles mouse, terminal-native + drag is intercepted. Hold Option/Alt (macOS) or Shift + (most terminals) while dragging to bypass Textual without + toggling. + + When on: mouse reporting is disabled at the terminal level. + User can click-drag to select, and Cmd+C / Ctrl+Shift+C in + the terminal copies to the clipboard. Textual mouse + interactions (scrolling, clicking widgets) won't work until + toggled back. + """ + driver = self._driver + if driver is None: + return + if not self._terminal_select_mode: + # Enter terminal-select mode: turn off Textual mouse. + try: + driver._disable_mouse_support() + except Exception: + return + self._terminal_select_mode = True + hint = Text( + "[select mode ON] Textual mouse disabled. " + "Click-drag to select; copy via terminal " + "(Cmd+C on macOS / Ctrl+Shift+C on Linux). " + "Press Ctrl+S again to exit.", + style="bold yellow", + ) + self.rich_log.write(hint, expand=True) + else: + # Exit terminal-select mode: turn Textual mouse back on. + try: + driver._enable_mouse_support() + except Exception: + return + self._terminal_select_mode = False + hint = Text( + "[select mode OFF] Textual mouse re-enabled.", + style="bold yellow", + ) + self.rich_log.write(hint, expand=True) + + async def on_input_submitted(self, message: Input.Submitted) -> None: + """Handle Enter on the chat Input — send the typed prompt + through the @work(thread=True) _send_prompt worker. + + The Input is NOT disabled while sending — bisection found + that toggling the Input's disabled/placeholder state from + outside this handler (via a poller) broke Textual dispatch + on this version. Keeping the input always-enabled is fine + in practice; the user just sees their next input echoed + after the previous response.""" + text = message.value.strip() + if not text: + return + self.chat_input.value = "" + self.rich_log.write("", expand=True) + self.rich_log.write(Text(f"User: {text}", style="bold cyan"), expand=True) + self._send_prompt(text) + + ChatApp = _ChatApp + QuitScreen = _QuitScreen + +except ImportError: + pass diff --git a/tools/chat/debug.py b/tools/chat/debug.py new file mode 100644 index 0000000..dca0849 --- /dev/null +++ b/tools/chat/debug.py @@ -0,0 +1,55 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Chat debug logging: per-process diagnostic log for --chat --debug. +""" + +from __future__ import annotations + +import os +import sys +import time +from pathlib import Path +from typing import Any + +sys.path.insert(0, str(Path(__file__).resolve().parents[1])) +import _colors as C # noqa: E402, F401 — used indirectly via other modules + +_CHAT_DEBUG_FP: Any = None + + +def _chat_debug(msg: str) -> None: + """Write a debug message if chat debug logging is active.""" + global _CHAT_DEBUG_FP + if _CHAT_DEBUG_FP is None: + return + import threading as _threading + _elapsed = time.time() - _CHAT_DEBUG_FP.start_time # type: ignore[attr-defined] + _thread = _threading.current_thread().name + _line = f"[{_elapsed:07.3f}s] [{_thread}] {msg}\n" + _CHAT_DEBUG_FP.write(_line) # type: ignore[union-attr] + _CHAT_DEBUG_FP.flush() # type: ignore[union-attr] + + +def _setup_chat_debug() -> None: + """Open tmp/chat-debug--.log for chat diagnostic logging.""" + global _CHAT_DEBUG_FP + ROOT = Path(__file__).resolve().parents[2] + _stamp = time.strftime("%Y%m%d-%H%M%S") + log_dir = ROOT / "tmp" + log_dir.mkdir(parents=True, exist_ok=True) + log_path = log_dir / f"chat-debug-{os.getpid()}-{_stamp}.log" + _CHAT_DEBUG_FP = log_path.open("a", buffering=1) + _CHAT_DEBUG_FP.start_time = time.time() # type: ignore[attr-defined] + _chat_debug(f"debug log opened: {log_path}") + print(f"[chat-debug] writing diagnostics to {log_path}", file=sys.stderr) + + +def _close_chat_debug() -> None: + """Close the chat debug log if open.""" + global _CHAT_DEBUG_FP + if _CHAT_DEBUG_FP is not None: + _chat_debug("debug log closing") + _CHAT_DEBUG_FP.close() + _CHAT_DEBUG_FP = None diff --git a/tools/chat/harness.py b/tools/chat/harness.py new file mode 100644 index 0000000..22af9eb --- /dev/null +++ b/tools/chat/harness.py @@ -0,0 +1,160 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Chat mode harness: entry point that wires server, session, and TUI together. + +Provides `_run_chat_mode(parser, args) -> int`, the main entry point +for `run-agent.py --chat`. +""" + +from __future__ import annotations + +import argparse +import os +import shlex +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + +import _colors as C # noqa: E402 +from chat.debug import _setup_chat_debug, _chat_debug, _close_chat_debug # noqa: E402 +from chat.app import ChatApp, HAVE_RICH # noqa: E402 +import importlib as _importlib # noqa: E402 +_run_agent = _importlib.import_module("run-agent") +build_console = _run_agent.build_console +_emit_fatal_error = _run_agent._emit_fatal_error + +from opencode.serve import ServerRunner, ServerRunnerError # noqa: E402 +from codecome.version import check_opencode_version # noqa: E402 +from codecome.config import ( # noqa: E402 + resolve_color_mode, + load_prompt, + resolve_model_and_variant, + resolve_thinking_decision, +) +from codecome.session import create_chat_session # noqa: E402 +from codecome.transcript import open_chat_transcript, close_transcript # noqa: E402 + + +def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int: + """Launch the interactive chat harness.""" + if args.debug: + _setup_chat_debug() + _chat_debug("_run_chat_mode: entering (debug enabled)") + + missing = [n for n in ("label", "agent") if getattr(args, n) is None] + if missing: + parser.error( + "the following arguments are required for --chat: " + + ", ".join("--" + n.replace("_", "-") for n in missing) + ) + + check_opencode_version() + + color_mode = resolve_color_mode(args.color) + console = build_console(color_mode) + + # Resolve prompt + ROOT = Path(__file__).resolve().parents[2] + if args.prompt_file: + prompt_file = ROOT / args.prompt_file + prompt = load_prompt(prompt_file, args.finding, phase=args.phase) + elif args.prompt: + prompt = args.prompt + else: + prompt = "" + + # Model resolution + extra_args = shlex.split(os.environ.get("OPENCODE_ARGS", "")) + model, variant, model_source, variant_source = resolve_model_and_variant( + args.agent, extra_args + ) + thinking_on, thinking_source = resolve_thinking_decision(model, extra_args) + + _chat_debug(f"_run_chat_mode: agent={args.agent} model={model} variant={variant} thinking={thinking_on}") + + if ChatApp is None: + _emit_fatal_error(console, "Missing Dependency", + "The --chat flag requires the 'textual' package. Run 'make venv' to install it.") + return 1 + + # Start server + _chat_debug("_run_chat_mode: starting opencode serve") + runner = ServerRunner() + try: + server_info = runner.start(hostname="127.0.0.1", log_level="WARN") + _chat_debug(f"_run_chat_mode: server started pid={server_info.pid} url={server_info.base_url}") + except ServerRunnerError as exc: + _chat_debug(f"_run_chat_mode: server start failed: {exc}") + _emit_fatal_error(console, "Server Error", str(exc)) + _close_chat_debug() + return 1 + + # Create session + _chat_debug("_run_chat_mode: creating session") + try: + session_id = create_chat_session( + server_info.base_url, args.agent, model, server_info.password, str(ROOT), + ) + _chat_debug(f"_run_chat_mode: session created id={session_id}") + except Exception as exc: + _chat_debug(f"_run_chat_mode: session creation failed: {exc}") + _emit_fatal_error(console, "Session Error", str(exc)) + runner.stop() + _close_chat_debug() + return 1 + + # Open the chat transcript (parity with phase mode). + transcript_path: Path = Path() + transcript_fp = None + try: + transcript_path, transcript_fp = open_chat_transcript() + _chat_debug(f"_run_chat_mode: opened transcript {transcript_path}") + except OSError as exc: + _chat_debug(f"_run_chat_mode: could not open transcript: {exc}") + + _chat_debug("_run_chat_mode: creating ChatApp") + app = None + try: + app = ChatApp( + server_info=server_info, + session_id=session_id, + initial_prompt=prompt, + args=args, + model=model, + variant=variant, + thinking_on=thinking_on, + transcript_fp=transcript_fp, + ) + _chat_debug("_run_chat_mode: calling app.run()") + app.run() + _chat_debug("_run_chat_mode: app.run() returned") + finally: + _chat_debug("_run_chat_mode: cleaning up") + if app is not None and getattr(app, "chat_loop", None) is not None: + _chat_debug("_run_chat_mode: stopping chat loop") + app.chat_loop.stop() + runner.stop() + close_transcript(transcript_fp) + + # Final summary banner on the restored terminal. Mirrors phase + # mode's success-path summary. + try: + rel_path = transcript_path.relative_to(ROOT) + except ValueError: + rel_path = transcript_path + if HAVE_RICH: + from rich.rule import Rule # noqa: E402 + from rich.text import Text # noqa: E402 + + console.print(Rule(style="green")) + console.print(Text(f"{C.SYM_OK} Chat session ended", style="green")) + console.print(Text(f" transcript: {rel_path}", style="dim")) + else: + print(C.ok("Chat session ended")) + print(f" transcript: {rel_path}") + + _close_chat_debug() + return 0 diff --git a/tools/run-agent.py b/tools/run-agent.py index 994e779..96fb6c4 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -33,6 +33,11 @@ sys.path.insert(0, str(Path(__file__).resolve().parent)) +# When this module runs as __main__, alias it so sibling tools can import +# it as 'run-agent' (the hyphenated filename) without a duplicate load. +if __name__ == "__main__": + sys.modules.setdefault("run-agent", sys.modules["__main__"]) + import _colors as C from opencode.serve import ServerRunner, ServerRunnerError from events import EventLoop, RunResult @@ -4517,7 +4522,8 @@ def main() -> int: # Chat mode has its own validation path. if args.chat: - return _run_chat_mode(parser, args) + from chat.harness import _run_chat_mode as _chat_run # noqa: E402 + return _chat_run(parser, args) # The phase-launching mode requires the usual arguments. missing = [n for n in ("phase", "label", "agent", "prompt_file") if getattr(args, n) is None] From 98e7721670044e06fc57a8cc9d981c3d741bfaf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 02:17:04 +0200 Subject: [PATCH 23/65] refactor(rendering): move _colors import to module level Move 'import _colors as C' from inside render() methods to module level. This eliminates repeated runtime imports on every event render. Also restores ServerConnectedRenderer which was accidentally dropped. --- tools/rendering/events.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tools/rendering/events.py b/tools/rendering/events.py index 2615dea..5615439 100644 --- a/tools/rendering/events.py +++ b/tools/rendering/events.py @@ -15,6 +15,7 @@ from typing import Any from rendering.base import BaseRenderer +import _colors as C # --------------------------------------------------------------------------- @@ -63,7 +64,6 @@ def render(self, event: dict[str, Any]) -> bool: from rich.text import Text self.sink.write(Text(f"[{self.phase}] {self.label}: {step_type}", style="cyan")) elif self.plain: - import _colors as C self.sink.write_text(C.info(f"[{self.phase}] {self.label}: {step_type}")) return True @@ -81,7 +81,6 @@ def render(self, event: dict[str, Any]) -> bool: from rich.panel import Panel self.sink.write(Panel(Markdown(text), title="Assistant", border_style="blue", expand=True)) elif self.plain: - import _colors as C self.sink.write_text(C.header("Assistant")) self.sink.write_text(text) return True @@ -117,7 +116,6 @@ def render(self, event: dict[str, Any]) -> bool: body = body_md self.sink.write(Panel(body, title="Thinking", border_style="blue", expand=True, style="dim")) elif self.plain: - import _colors as C self.sink.write_text(C.header("Thinking")) self.sink.write_text(text) if truncated_note: @@ -193,7 +191,6 @@ def render(self, event: dict[str, Any]) -> bool: from rich.text import Text self.sink.write(Text(f"step finished: {reason}{suffix}", style=style)) elif self.plain: - import _colors as C if reason in _FINISH_FAILURE: self.sink.write_text(C.fail(f"step finished: {reason}{suffix}")) else: @@ -237,7 +234,6 @@ def render(self, event: dict[str, Any]) -> bool: from rich.text import Text self.sink.write(Panel(Text(text, style="red"), title="Error", border_style="yellow", expand=True)) elif self.plain: - import _colors as C self.sink.write_text(C.warn("Error")) self.sink.write_text(C.fail(text)) return True @@ -258,7 +254,6 @@ def render(self, event: dict[str, Any]) -> bool: from rich.text import Text self.sink.write(Text(text, style="bold yellow")) elif self.plain: - import _colors as C self.sink.write_text(C.warn(text)) elif status_type == "busy": text = "session status: busy" @@ -266,7 +261,6 @@ def render(self, event: dict[str, Any]) -> bool: from rich.text import Text self.sink.write(Text(text, style="dim")) elif self.plain: - import _colors as C self.sink.write_text(C.info(text)) elif status_type == "idle": text = "session status: idle" @@ -274,7 +268,6 @@ def render(self, event: dict[str, Any]) -> bool: from rich.text import Text self.sink.write(Text(text, style="dim")) elif self.plain: - import _colors as C self.sink.write_text(C.info(text)) return True @@ -288,7 +281,6 @@ def render(self, event: dict[str, Any]) -> bool: from rich.text import Text self.sink.write(Text(message, style="dim")) elif self.plain: - import _colors as C self.sink.write_text(C.info(message)) return True @@ -302,7 +294,6 @@ def render(self, event: dict[str, Any]) -> bool: from rich.text import Text self.sink.write(Text(message, style="dim")) elif self.plain: - import _colors as C self.sink.write_text(C.info(message)) return True @@ -321,7 +312,6 @@ def render(self, event: dict[str, Any]) -> bool: from rich.text import Text self.sink.write(Text(message, style="dim")) elif self.plain: - import _colors as C self.sink.write_text(C.info(message)) return True @@ -388,7 +378,6 @@ def render(self, event: dict[str, Any]) -> bool: from rich.text import Text self.sink.write(Text(message, style=style)) elif self.plain: - import _colors as C self.sink.write_text(C.header(message)) return True @@ -447,7 +436,6 @@ def _render_rich(self, status_type: str, title: str, summary, elapsed_ms) -> Non self.sink.write(Text(line, style="dim")) def _render_plain(self, status_type: str, title: str, summary, elapsed_ms) -> None: - import _colors as C if status_type == "created": self.sink.write_text(C.header(f"[subagent] started: {title}")) elif status_type == "finished": From 32fc56b11536562983fd5766587525daaafaf3b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 03:09:44 +0200 Subject: [PATCH 24/65] fix: address Phase A4/A5 review issues and parity test regression - Remove dead getattr() and unused rich_console parameter from chat app - Fix empty transcript path fallback in chat harness OSError handler - Add TODO for circular importlib dependency in chat.app (phase-a6) - Document kwargs limitation in TextualConsoleProxy.print() - Reduce excessive defensive getattr in _chat_render_and_log - Fix silent error swallowing when emit_fatal_error_fn is None in runner - Add Callable type annotations for runner parameters - Add TODO for private Textual API usage (phase-a4) - Fix mock-llm-parity comprehensive.json regression: - Pre-clean stale parity files before run_reference - Use missing_ok=True in cleanup loops - Normalize 'exists' and 'snapshot' fields from tool metadata - Add message.part.updated to serve-only event filter --- tools/chat/app.py | 33 ++++++++++++++++++++++----------- tools/chat/harness.py | 1 + tools/codecome/runner.py | 13 +++++++++---- tools/mock-llm-parity.py | 10 ++++++++-- tools/run-agent.py | 5 ++--- 5 files changed, 42 insertions(+), 20 deletions(-) diff --git a/tools/chat/app.py b/tools/chat/app.py index cb941b5..4f181ba 100644 --- a/tools/chat/app.py +++ b/tools/chat/app.py @@ -25,6 +25,9 @@ from chat.debug import _chat_debug # noqa: E402 import importlib as _importlib # noqa: E402 +# TODO(phase-a6): Break this circular dependency. chat.app should not +# dynamically import run-agent; render_event should be injected as a +# constructor dependency once the events/renderer refactor lands. _run_agent = _importlib.import_module("run-agent") render_event = _run_agent.render_event @@ -74,6 +77,9 @@ def __init__(self, rich_log, app): self.encoding = "utf-8" def print(self, *args, **kwargs): + """Bridge to RichLog.write(). **kwargs is accepted for compatibility + with rich.console.Console.print() but is intentionally ignored + (style/end etc. are not forwarded to RichLog).""" if not args: from rich.text import Text @@ -105,17 +111,20 @@ def _write(self, renderable): def _chat_render_and_log(self, console, phase, label, event): """Standalone version of _ChatApp._render_and_log. See the docstring - on the class for the full contract.""" - if getattr(self, "transcript_fp", None) is not None: + on the class for the full contract. + + When bound via ``__get__`` to a _ChatApp instance, ``self`` is + guaranteed to carry the attributes accessed below.""" + if self.transcript_fp is not None: try: self.transcript_fp.write(json.dumps(event) + "\n") except OSError: pass - if getattr(self, "args", None) is not None and getattr(self.args, "debug", False): + if getattr(self.args, "debug", False): _chat_debug(f"_render_and_log: raw event: {json.dumps(event)}") if event.get("type") == "message.updated": _chat_update_modeline_info(self, event) - if not getattr(self, "thinking_on", True) and event.get("type") == "reasoning": + if not self.thinking_on and event.get("type") == "reasoning": return render_event(console, phase, label, event) @@ -147,7 +156,6 @@ def _chat_update_modeline_info(self, event: dict[str, Any]) -> None: token_str = "" cost = info.get("cost", 0) or 0 cost_str = f" ${cost:.4f}" if cost else "" - getattr(self, "_modeline_info", "") try: self._modeline_info = f"{model_label} | {token_str}{cost_str}" except AttributeError: @@ -320,13 +328,12 @@ def __init__(self, renderable): super().__init__() self.renderable = renderable - def __init__(self, server_info=None, session_id=None, initial_prompt="", args=None, rich_console=None, model=None, variant=None, thinking_on=None, transcript_fp=None): + def __init__(self, server_info=None, session_id=None, initial_prompt="", args=None, model=None, variant=None, thinking_on=None, transcript_fp=None): super().__init__() self.server_info = server_info self.session_id = session_id self.initial_prompt = initial_prompt self.args = args - self.rich_console = rich_console self.model = model self.variant = variant self.thinking_on = thinking_on @@ -544,8 +551,10 @@ def action_toggle_mouse_for_select(self) -> None: return if not self._terminal_select_mode: # Enter terminal-select mode: turn off Textual mouse. - try: - driver._disable_mouse_support() + try: + # TODO(phase-a4): These are private Textual APIs; they may break + # on future releases. Replace with public API once available. + driver._disable_mouse_support() except Exception: return self._terminal_select_mode = True @@ -559,8 +568,10 @@ def action_toggle_mouse_for_select(self) -> None: self.rich_log.write(hint, expand=True) else: # Exit terminal-select mode: turn Textual mouse back on. - try: - driver._enable_mouse_support() + try: + # TODO(phase-a4): These are private Textual APIs; they may break + # on future releases. Replace with public API once available. + driver._enable_mouse_support() except Exception: return self._terminal_select_mode = False diff --git a/tools/chat/harness.py b/tools/chat/harness.py index 22af9eb..a9d72e8 100644 --- a/tools/chat/harness.py +++ b/tools/chat/harness.py @@ -113,6 +113,7 @@ def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> transcript_path, transcript_fp = open_chat_transcript() _chat_debug(f"_run_chat_mode: opened transcript {transcript_path}") except OSError as exc: + transcript_path = ROOT / "tmp" / "last-chat-unknown.jsonl" _chat_debug(f"_run_chat_mode: could not open transcript: {exc}") _chat_debug("_run_chat_mode: creating ChatApp") diff --git a/tools/codecome/runner.py b/tools/codecome/runner.py index 761198f..61d9fc5 100644 --- a/tools/codecome/runner.py +++ b/tools/codecome/runner.py @@ -11,7 +11,7 @@ import sys import threading from pathlib import Path -from typing import Any +from typing import Any, Callable sys.path.insert(0, str(Path(__file__).resolve().parents[1])) @@ -34,7 +34,7 @@ def _consume_events( thinking_on: bool, auth_token: str | None, workspace_dir: str | None, - render_event_fn, # required: run-agent's render_event dispatcher + render_event_fn: Callable[..., None], # run-agent's render_event dispatcher ) -> RunResult: event_loop = EventLoop( base_url=base_url, @@ -72,8 +72,8 @@ def _run_single_attempt( base_url: str, auth_token: str | None, workspace_dir: str | None, - render_event_fn, # required: run-agent's render_event dispatcher - emit_fatal_error_fn=None, # type: ignore + render_event_fn: Callable[..., None], # run-agent's render_event dispatcher + emit_fatal_error_fn: Callable[..., None] | None = None, existing_session_id: str | None = None, ) -> tuple[int, str, RunResult, Path]: @@ -123,6 +123,11 @@ def _consume() -> None: except Exception as exc: if emit_fatal_error_fn: emit_fatal_error_fn(console, "Server Error", str(exc)) + else: + try: + console.print(f"Fatal error: {exc}") + except Exception: + print(C.error(f"Fatal error: {exc}"), file=sys.stderr) return 1, existing_session_id or "", RunResult(), transcript_path finally: close_transcript(transcript_fp) diff --git a/tools/mock-llm-parity.py b/tools/mock-llm-parity.py index 4643461..3e29cd6 100644 --- a/tools/mock-llm-parity.py +++ b/tools/mock-llm-parity.py @@ -39,7 +39,7 @@ # Note: session.status (retry/busy) is NOT serve-only when _CODECOME_INSIDE_HARNESS=1 # because the status-forwarder plugin emits them to stdout. # session.idle is deprecated and serve-only. -_SERVE_ONLY_TYPES = {"server.connected", "server.heartbeat", "session.idle", "message.updated", "file.edited", "file.watcher.updated", "todo.updated"} +_SERVE_ONLY_TYPES = {"server.connected", "server.heartbeat", "session.idle", "message.updated", "message.part.updated", "file.edited", "file.watcher.updated", "todo.updated"} def _step_sort_key(ev: dict[str, Any]) -> tuple[int, str]: @@ -352,6 +352,7 @@ def normalize_event(ev: dict[str, Any]) -> dict[str, Any] | None: part.pop("id", None) part.pop("messageID", None) part.pop("sessionID", None) + part.pop("snapshot", None) # volatile git HEAD # Truncate large tool output/preview to avoid spurious diff noise if ev_type == "tool_use": state = part.get("state") @@ -364,6 +365,7 @@ def normalize_event(ev: dict[str, Any]) -> dict[str, Any] | None: metadata = state.get("metadata") if isinstance(metadata, dict): metadata = dict(metadata) + metadata.pop("exists", None) # hermetic: ignore file-existence state for key in ("preview", "output"): val = metadata.get(key) if isinstance(val, str) and len(val) > 200: @@ -450,12 +452,16 @@ def main() -> int: config["provider"]["test"]["options"]["baseURL"] = f"http://{MOCK_HOST}:{mock_info.port}/v1" config_path.write_text(json.dumps(config, indent=2) + "\n", encoding="utf-8") + # Pre-clean stale files from previous runs to ensure hermetic tests. + for f in ROOT.glob("tmp/parity-*.txt"): + f.unlink(missing_ok=True) + run_events = run_reference(args.prompt, args.model, args.agent, args.timeout) # Clean up files created by run_reference to ensure serve starts with a clean workspace. # This prevents 'exists' metadata in write tool from reflecting leftover state. for f in ROOT.glob("tmp/parity-*.txt"): - f.unlink() + f.unlink(missing_ok=True) serve_events = run_serve(args.prompt, args.model, args.agent, args.timeout) finally: diff --git a/tools/run-agent.py b/tools/run-agent.py index 96fb6c4..ce5d097 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -3949,7 +3949,6 @@ def _chat_update_modeline_info(self, event: dict[str, Any]) -> None: token_str = "" cost = info.get("cost", 0) or 0 cost_str = f" ${cost:.4f}" if cost else "" - getattr(self, "_modeline_info", "") try: self._modeline_info = f"{model_label} | {token_str}{cost_str}" except AttributeError: @@ -4118,13 +4117,12 @@ def __init__(self, renderable): super().__init__() self.renderable = renderable - def __init__(self, server_info=None, session_id=None, initial_prompt="", args=None, rich_console=None, model=None, variant=None, thinking_on=None, transcript_fp=None): + def __init__(self, server_info=None, session_id=None, initial_prompt="", args=None, model=None, variant=None, thinking_on=None, transcript_fp=None): super().__init__() self.server_info = server_info self.session_id = session_id self.initial_prompt = initial_prompt self.args = args - self.rich_console = rich_console self.model = model self.variant = variant self.thinking_on = thinking_on @@ -4462,6 +4460,7 @@ def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> transcript_path, transcript_fp = open_chat_transcript() _chat_debug(f"_run_chat_mode: opened transcript {transcript_path}") except OSError as exc: + transcript_path = ROOT / "tmp" / "last-chat-unknown.jsonl" _chat_debug(f"_run_chat_mode: could not open transcript: {exc}") _chat_debug("_run_chat_mode: creating ChatApp") From 4c9a5a635be97fbb7be9765e4eb3ad4fd06ee7a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 03:19:47 +0200 Subject: [PATCH 25/65] fix: resolve NameError in render_event fallback and finish_warning scope - tools/chat/app.py: fix IndentationError in toggle_terminal_select_mode (try/except was dedented outside its if/else block) - tools/run-agent.py: add local import of UnknownEventRenderer in render_event's unknown-event fallback branch, where _evts was not in scope - tools/run-agent.py: hoist finish_warning initialization before the while loop so it is defined even when _run_single_attempt returns non-zero on first iteration (avoids UnboundLocalError in error output path) - tests/test_run_agent.py: add test_render_event_fallback_to_unknown_renderer (covers the previously untested else-branch in render_event) - tests/test_run_agent.py: add test_first_attempt_failure_prints_finish_warning (covers main() early-exit path where first attempt returns non-zero) --- tests/test_run_agent.py | 75 +++++++++++++++++++++++++++++++++++++++++ tools/chat/app.py | 16 ++++----- tools/run-agent.py | 3 +- 3 files changed, 85 insertions(+), 9 deletions(-) diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index bfc0ac1..d64cde9 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -1875,3 +1875,78 @@ def fake_run(cmd, *args, **kwargs): # The session terminated with 'stop', no frontmatter errors → single attempt # (We cannot introspect the loop variables directly, but the clean exit # with rc=0 proves the RunResult signals were consumed correctly.) + + +@pytest.mark.unit +def test_render_event_fallback_to_unknown_renderer(monkeypatch): + """render_event falls back to UnknownEventRenderer for unregistered event types + without raising NameError.""" + module = load_tool_module("run_agent_unknown_fallback", "tools/run-agent.py") + ctx = module._get_rendering_ctx(None) + renderers = getattr(ctx, "_renderers", {}) + + # Ensure the "unknown" key is absent so the fallback path is triggered. + renderers.pop("unknown", None) + renderers.pop("some.unregistered.event", None) + + # Should not raise NameError. + module.render_event( + None, "2", "x", + {"type": "some.unregistered.event", "properties": {"foo": "bar"}} + ) + + +@pytest.mark.component +def test_first_attempt_failure_prints_finish_warning(monkeypatch, tmp_path): + """When _run_single_attempt returns non-zero on the very first iteration, + main() should not raise UnboundLocalError for finish_warning.""" + module = load_tool_module("run_agent_first_fail", "tools/run-agent.py") + monkeypatch.setattr(module, "HAVE_RICH", False) + monkeypatch.setattr(module, "check_opencode_version", lambda: None) + monkeypatch.setattr(module, "ROOT", tmp_path) + + import sys + sys.path.insert(0, str(ROOT / "tools")) + if "codecome" in sys.modules and not hasattr(sys.modules["codecome"], "__path__"): + del sys.modules["codecome"] + import codecome.runner as _runner + + if hasattr(_runner._run_single_attempt, "_attempt_counter"): + delattr(_runner._run_single_attempt, "_attempt_counter") + + def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, base_url, auth_token, workspace_dir, **kwargs): + return ( + 1, # non-zero return code on first attempt + "", + module.RunResult( + any_step_finish_seen=False, + step_finish_count=0, + last_finish_reason=None, + last_finish_tokens={}, + last_permission_error=None, + ), + tmp_path / "transcript.jsonl", + ) + + monkeypatch.setattr(_runner, "_run_single_attempt", fake_run_single_attempt) + + class FakeResult: + def __init__(self, rc, out="", err=""): + self.returncode, self.stdout, self.stderr = rc, out, err + + def fake_run(cmd, *args, **kwargs): + if "--version" in cmd: + return FakeResult(0, out="opencode 1.15.0\n") + return FakeResult(0) + + monkeypatch.setattr(module.subprocess, "run", fake_run) + + prompt_file = tmp_path / "phase.md" + prompt_file.write_text("run recon", encoding="utf-8") + monkeypatch.setattr(module.sys, "argv", [ + "run-agent.py", "--phase", "1", "--label", "test", + "--agent", "recon", "--prompt-file", str(prompt_file), + ]) + + rc = module.main() + assert rc == 1 diff --git a/tools/chat/app.py b/tools/chat/app.py index 4f181ba..197ff23 100644 --- a/tools/chat/app.py +++ b/tools/chat/app.py @@ -551,10 +551,10 @@ def action_toggle_mouse_for_select(self) -> None: return if not self._terminal_select_mode: # Enter terminal-select mode: turn off Textual mouse. - try: - # TODO(phase-a4): These are private Textual APIs; they may break - # on future releases. Replace with public API once available. - driver._disable_mouse_support() + try: + # TODO(phase-a4): These are private Textual APIs; they may break + # on future releases. Replace with public API once available. + driver._disable_mouse_support() except Exception: return self._terminal_select_mode = True @@ -568,10 +568,10 @@ def action_toggle_mouse_for_select(self) -> None: self.rich_log.write(hint, expand=True) else: # Exit terminal-select mode: turn Textual mouse back on. - try: - # TODO(phase-a4): These are private Textual APIs; they may break - # on future releases. Replace with public API once available. - driver._enable_mouse_support() + try: + # TODO(phase-a4): These are private Textual APIs; they may break + # on future releases. Replace with public API once available. + driver._enable_mouse_support() except Exception: return self._terminal_select_mode = False diff --git a/tools/run-agent.py b/tools/run-agent.py index ce5d097..cc635fd 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -3719,6 +3719,7 @@ def render_event(console: Console, phase: str, label: str, event: dict[str, Any] elif event_type in renderers: renderers[event_type].render(event) else: + from rendering.events import UnknownEventRenderer renderers.get("unknown", UnknownEventRenderer(ctx)).render(event) @@ -4623,6 +4624,7 @@ def main() -> int: any_step_finish_seen = False step_finish_count = 0 transcript_path: Path = Path() + finish_warning: Optional[str] = None # Signal to local opencode plugins (e.g. status-forwarder) that we are # running inside the run-agent harness. @@ -4675,7 +4677,6 @@ def _forward_signal(signum: int, _frame: Any) -> None: any_step_finish_seen = run_result.any_step_finish_seen step_finish_count = run_result.step_finish_count - finish_warning: Optional[str] = None if not any_step_finish_seen: finish_warning = ( "CodeCome observed no step_finish events in the JSON stream, so the model/provider did not emit a " From 25685d740da1d2a8a61d216bce872133798068e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 03:48:16 +0200 Subject: [PATCH 26/65] =?UTF-8?q?refactor(phase-a5):=20complete=20CLI=20ex?= =?UTF-8?q?traction=20=E2=80=94=20thin=20wrapper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create codecome/cli.py with build_parser() and main(), and codecome/cli_render.py with HAVE_RICH, build_console(), _get_rendering_ctx(), render_event(), _emit_fatal_error(), and _FINISH_* constants. run-agent.py is now a thin wrapper (-521 lines): - Removes duplicated main/build_parser/build_console/render_event/ _get_rendering_ctx/HAVE_RICH/ROOT/finish constants - Imports them from codecome.cli and codecome.cli_render - Keeps all old renderer functions for backward compatibility - if __name__ == '__main__' delegates to codecome.cli.main() Tests updated to monkeypatch codecome.cli_render.ROOT where needed. 451 passed, 0 failed, 0 errors --- tests/test_run_agent.py | 14 +- tools/codecome/cli.py | 411 +++++++++++++++++++++++++++ tools/codecome/cli_render.py | 149 ++++++++++ tools/run-agent.py | 535 +---------------------------------- 4 files changed, 577 insertions(+), 532 deletions(-) create mode 100644 tools/codecome/cli.py create mode 100644 tools/codecome/cli_render.py diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index d64cde9..13963de 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -1510,8 +1510,9 @@ def test_auto_correction_resume_loops_back_via_popen(monkeypatch, tmp_path): monkeypatch.setattr(module, "HAVE_RICH", False) monkeypatch.setattr(module, "check_opencode_version", lambda: None) monkeypatch.setattr(module, "ROOT", tmp_path) - import sys + monkeypatch.setattr(sys.modules["codecome.cli_render"], "ROOT", tmp_path) + sys.path.insert(0, str(ROOT / "tools")) if "codecome" in sys.modules and not hasattr(sys.modules["codecome"], "__path__"): del sys.modules["codecome"] @@ -1585,8 +1586,9 @@ def test_frontmatter_failure_without_session_id_exits_nonzero(monkeypatch, tmp_p monkeypatch.setattr(module, "HAVE_RICH", False) monkeypatch.setattr(module, "check_opencode_version", lambda: None) monkeypatch.setattr(module, "ROOT", tmp_path) - import sys + monkeypatch.setattr(sys.modules["codecome.cli_render"], "ROOT", tmp_path) + sys.path.insert(0, str(ROOT / "tools")) if "codecome" in sys.modules and not hasattr(sys.modules["codecome"], "__path__"): del sys.modules["codecome"] @@ -1641,6 +1643,8 @@ def test_iteration_limit_triggers_auto_resume(monkeypatch, tmp_path): monkeypatch.setattr(module, "HAVE_RICH", False) monkeypatch.setattr(module, "check_opencode_version", lambda: None) monkeypatch.setattr(module, "ROOT", tmp_path) + import sys + monkeypatch.setattr(sys.modules["codecome.cli_render"], "ROOT", tmp_path) monkeypatch.setenv("CODECOME_MAX_ITERATION_RETRIES", "1") import sys @@ -1824,8 +1828,9 @@ def test_stream_session_id_and_step_finish_count(monkeypatch, tmp_path): monkeypatch.setattr(module, "HAVE_RICH", False) monkeypatch.setattr(module, "check_opencode_version", lambda: None) monkeypatch.setattr(module, "ROOT", tmp_path) - import sys + monkeypatch.setattr(sys.modules["codecome.cli_render"], "ROOT", tmp_path) + sys.path.insert(0, str(ROOT / "tools")) if "codecome" in sys.modules and not hasattr(sys.modules["codecome"], "__path__"): del sys.modules["codecome"] @@ -1904,8 +1909,9 @@ def test_first_attempt_failure_prints_finish_warning(monkeypatch, tmp_path): monkeypatch.setattr(module, "HAVE_RICH", False) monkeypatch.setattr(module, "check_opencode_version", lambda: None) monkeypatch.setattr(module, "ROOT", tmp_path) - import sys + monkeypatch.setattr(sys.modules["codecome.cli_render"], "ROOT", tmp_path) + sys.path.insert(0, str(ROOT / "tools")) if "codecome" in sys.modules and not hasattr(sys.modules["codecome"], "__path__"): del sys.modules["codecome"] diff --git a/tools/codecome/cli.py b/tools/codecome/cli.py new file mode 100644 index 0000000..8a617db --- /dev/null +++ b/tools/codecome/cli.py @@ -0,0 +1,411 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""CLI entry point and argument parsing for the CodeCome phase runner.""" + +from __future__ import annotations + +import argparse +import dataclasses +import os +import shlex +import signal +import subprocess +import sys +import time +import traceback +from pathlib import Path +from typing import Any, Optional + +sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + +import _colors as C +from opencode.serve import ServerRunner, ServerRunnerError + +from codecome.cli_render import ( + HAVE_RICH, Console, Panel, Rule, Text, + build_console, _get_rendering_ctx, render_event, _emit_fatal_error, + _FINISH_TERMINAL_OK, _FINISH_MID_TURN, _FINISH_FAILURE, +) +import codecome.cli_render as _clr +from codecome.version import check_opencode_version +from codecome.config import ( + truthy_env, resolve_color_mode, load_prompt, + resolve_model_and_variant, resolve_thinking_decision, show_model_table, +) +from codecome.graceful import ( + check_phase_graceful_completion, + build_phase_resume_prompt, build_frontmatter_resume_prompt, +) + +# Legacy globals — still referenced by old renderers in run-agent.py. +# Re-exported for backward compatibility. +_READ_DISPLAY_LINES = 10 +_WRITE_CONTENT_LINES = 25 +_WRITE_DIFF_LIMIT = 50 +_EDIT_DIFF_LINES = 25 + + +# --------------------------------------------------------------------------- +# Argument parser +# --------------------------------------------------------------------------- + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Run a CodeCome phase with structured output.") + parser.add_argument("--phase", help="Phase number (required unless --show-model or --chat).") + parser.add_argument("--label", help="Human-readable phase label (required unless --show-model).") + parser.add_argument("--agent", help="OpenCode agent name.") + parser.add_argument("--prompt-file", help="Prompt file path relative to repo root (required unless --show-model or --chat).") + parser.add_argument("--prompt", help="Direct prompt text (used by --chat mode).") + parser.add_argument("--chat", action="store_true", help="Launch interactive textual chat harness.") + parser.add_argument("--finding", help="Finding id for prompt substitution.") + parser.add_argument("--color", choices=["auto", "always", "never"], default="auto") + parser.add_argument("--debug", action="store_true", help="Mirror raw JSON events to stderr.") + parser.add_argument("--read-display-lines", type=int, help="Max lines shown in read output (default: 10, env: CODECOME_READ_DISPLAY_LINES).") + parser.add_argument("--write-content-lines", type=int, help="Max lines shown for new-file write content (default: 25, env: CODECOME_WRITE_CONTENT_LINES).") + parser.add_argument("--write-diff-limit", type=int, help="Max diff lines shown for write (default: 50, env: CODECOME_WRITE_DIFF_LIMIT).") + parser.add_argument("--edit-diff-lines", type=int, help="Max diff lines shown for edit (default: 25, env: CODECOME_EDIT_DIFF_LINES).") + parser.add_argument( + "--show-model", + action="store_true", + help="Print the model-resolution table for --agent and exit. No phase is launched.", + ) + return parser + + +# --------------------------------------------------------------------------- +# Main entry point +# --------------------------------------------------------------------------- + +def main() -> int: + RUN_START_TIME = time.time() + iteration_retry_count = 0 + frontmatter_retry_count = 0 + check_opencode_version() + + parser = build_parser() + args = parser.parse_args() + + if args.show_model: + agent_name = args.agent or "recon" + return show_model_table(agent_name) + + if args.chat: + from chat.harness import _run_chat_mode as _chat_run + return _chat_run(parser, args) + + missing = [n for n in ("phase", "label", "agent", "prompt_file") if getattr(args, n) is None] + if missing: + parser.error( + "the following arguments are required when not using --show-model or --chat: " + + ", ".join("--" + n.replace("_", "-") for n in missing) + ) + + # CLI flags override env var defaults for tunables. + global _READ_DISPLAY_LINES, _WRITE_CONTENT_LINES, _WRITE_DIFF_LIMIT, _EDIT_DIFF_LINES + if args.read_display_lines is not None: + _READ_DISPLAY_LINES = args.read_display_lines + if args.write_content_lines is not None: + _WRITE_CONTENT_LINES = args.write_content_lines + if args.write_diff_limit is not None: + _WRITE_DIFF_LIMIT = args.write_diff_limit + if args.edit_diff_lines is not None: + _EDIT_DIFF_LINES = args.edit_diff_lines + + color_mode = resolve_color_mode(args.color) + console = build_console(color_mode) + + _rendering_ctx = _get_rendering_ctx(console) + _overrides: dict[str, Any] = {} + if args.read_display_lines is not None: + _overrides["read_display_lines"] = args.read_display_lines + if args.write_content_lines is not None: + _overrides["write_content_lines"] = args.write_content_lines + if args.write_diff_limit is not None: + _overrides["write_diff_limit"] = args.write_diff_limit + if args.edit_diff_lines is not None: + _overrides["edit_diff_lines"] = args.edit_diff_lines + if _overrides: + _rendering_ctx.settings = dataclasses.replace(_rendering_ctx.settings, **_overrides) + + prompt_file = _clr.ROOT / args.prompt_file + prompt = load_prompt(prompt_file, args.finding, phase=args.phase) + extra_args = shlex.split(os.environ.get("OPENCODE_ARGS", "")) + model, variant, model_source, variant_source = resolve_model_and_variant( + args.agent, extra_args + ) + thinking_on, thinking_source = resolve_thinking_decision(model, extra_args) + + model_label = model or "(unknown)" + variant_label = variant or "(unknown)" + + parts = [f"agent={args.agent}", f"model={model_label}"] + if variant is not None: + parts.append(f"variant={variant_label}") + parts.append(f"thinking={'on' if thinking_on else 'off'}") + parts.append(f"prompt={args.prompt_file}") + + if variant is not None: + sources_tail = ( + f"(model source: {model_source}, variant source: {variant_source}, " + f"thinking source: {thinking_source})" + ) + else: + sources_tail = f"(model source: {model_source}, thinking source: {thinking_source})" + + main_line = " ".join(parts) + " " + sources_tail + + if HAVE_RICH: + console.print(Rule(title=f"Phase {args.phase}: {args.label}", style="bold cyan")) + console.print(Text(main_line, style="dim")) + if args.finding: + console.print(Text(f"finding={args.finding}", style="dim")) + if str(args.phase) == "1": + console.print(Text( + "Phase 1 has two sub-stages: 1a recon notes, 1b sandbox bootstrap.", + style="cyan", + )) + else: + print(C.header(f"Phase {args.phase}: {args.label}")) + print(C.info(main_line)) + if args.finding: + print(C.info(f"finding={args.finding}")) + if str(args.phase) == "1": + print(C.info( + "Phase 1 has two sub-stages: 1a recon notes, 1b sandbox bootstrap." + )) + print(C.warn("rich is not installed; using plain structured output fallback")) + + attempt_number = 0 + last_session_id: str = "" + last_finish_reason: Optional[str] = None + last_finish_tokens: dict[str, Any] = {} + last_permission_error: Optional[str] = None + any_step_finish_seen = False + step_finish_count = 0 + transcript_path: Path = Path() + finish_warning: Optional[str] = None + + os.environ["_CODECOME_INSIDE_HARNESS"] = "1" + + runner = ServerRunner() + server_info: Any = None + try: + server_info = runner.start(hostname="127.0.0.1", log_level="WARN") + except ServerRunnerError as exc: + _emit_fatal_error(console, "Server Error", str(exc)) + return 1 + + base_url = server_info.base_url + + def _forward_signal(signum: int, _frame: Any) -> None: + info = runner.info + if info is not None: + try: + os.killpg(info.pid, signum) + except ProcessLookupError: + pass + signal.signal(signum, signal.SIG_DFL) + os.kill(os.getpid(), signum) + + previous_sigint = signal.signal(signal.SIGINT, _forward_signal) + previous_sigterm = signal.signal(signal.SIGTERM, _forward_signal) + + try: + while True: + attempt_number += 1 + from codecome.runner import _run_single_attempt + returncode, session_id, run_result, transcript_path = _run_single_attempt( + args, console, prompt, model, variant, thinking_on, base_url, + server_info.password, str(_clr.ROOT), + render_event_fn=render_event, + emit_fatal_error_fn=_emit_fatal_error, + existing_session_id=last_session_id or None + ) + + if returncode != 0: + break + + last_session_id = session_id + last_finish_reason = run_result.last_finish_reason + last_finish_tokens = run_result.last_finish_tokens + last_permission_error = run_result.last_permission_error + any_step_finish_seen = run_result.any_step_finish_seen + step_finish_count = run_result.step_finish_count + + if not any_step_finish_seen: + finish_warning = ( + "CodeCome observed no step_finish events in the JSON stream, so the model/provider did not emit a " + "completion signal. Treating the run as incomplete." + ) + elif last_finish_reason is None: + finish_warning = ( + "CodeCome observed a step_finish event without a finish reason, so the model/provider completion " + "state is ambiguous. Treating the run as incomplete." + ) + elif last_finish_reason in _FINISH_FAILURE: + finish_warning = ( + f"CodeCome observed finish reason '{last_finish_reason}', which means the model/provider stopped " + "before completing the phase. Treating the run as incomplete rather than as a CodeCome logic error." + ) + elif last_finish_reason in _FINISH_MID_TURN: + if last_permission_error: + finish_warning = ( + f"{last_permission_error}; CodeCome observed the model/provider stop mid-turn with finish " + f"reason '{last_finish_reason}', so the phase did not reach a final completion signal." + ) + else: + finish_warning = ( + f"CodeCome observed the model/provider stop mid-turn with finish reason '{last_finish_reason}' " + f"after {step_finish_count} completed loops, without a terminal completion signal. Treating the " + "phase as incomplete because the model/provider cut off the response." + ) + elif last_finish_reason not in _FINISH_TERMINAL_OK: + finish_warning = ( + f"CodeCome observed an unrecognised model/provider finish reason '{last_finish_reason}'. Treating " + "the run as incomplete rather than assuming success." + ) + + if finish_warning is not None: + if ( + last_finish_reason in _FINISH_MID_TURN + and last_permission_error is None + and check_phase_graceful_completion(args.phase, args.finding, RUN_START_TIME) + ): + msg = ( + f"CodeCome observed a mid-turn model/provider cutoff for Phase {args.phase} after {step_finish_count} " + "completed loops, but the required durable artifacts were already written. Treating the phase as complete." + ) + if HAVE_RICH: + console.print(Text(msg, style="bold green")) + else: + print(C.ok(msg)) + finish_warning = None + last_finish_reason = "graceful_forgiveness" + else: + returncode = 2 + + if returncode == 0: + validation_result = subprocess.run( + [sys.executable, "tools/check-frontmatter.py"], + cwd=_clr.ROOT, + capture_output=True, + text=True + ) + if validation_result.returncode != 0: + max_frontmatter_retries = 2 + validation_output = (validation_result.stderr or validation_result.stdout).strip() or "(no validator output)" + if frontmatter_retry_count < max_frontmatter_retries: + frontmatter_retry_count += 1 + msg = ( + "\n[Auto-Correction] The model completed a turn, but its output failed local frontmatter " + f"validation. CodeCome will resume the same session and ask for a minimal repair " + f"(retry {frontmatter_retry_count}/{max_frontmatter_retries})." + ) + if HAVE_RICH: + console.print(Text(msg, style="bold yellow")) + else: + print(C.warn(msg)) + if last_session_id and last_session_id != "id": + prompt = build_frontmatter_resume_prompt(args.phase, args.finding, validation_output) + continue + else: + returncode = 2 + finish_warning = ( + "The model output failed local frontmatter validation, and CodeCome could not determine a " + "session ID to resume for repair. Treating the phase as incomplete so the validator output " + "can be reported back with the saved transcript." + ) + else: + returncode = 2 + finish_warning = ( + f"The model output still fails local frontmatter validation after {max_frontmatter_retries} " + "auto-repair attempts. Treating the phase as incomplete so the validation errors can be reported back." + ) + msg = f"\n[Warning] Frontmatter errors persist after {max_frontmatter_retries} auto-retries." + if HAVE_RICH: + console.print(Text(msg, style="bold red")) + else: + print(C.fail(msg)) + print(validation_output) + break + break + + if returncode == 2 and last_finish_reason in _FINISH_MID_TURN: + max_iteration_retries = int(os.environ.get("CODECOME_MAX_ITERATION_RETRIES", "1")) + if iteration_retry_count < max_iteration_retries: + iteration_retry_count += 1 + msg = ( + "\n[Auto-Resume] CodeCome observed a mid-turn model/provider cutoff and will resume the same " + f"session once to let the model finish the interrupted work (retry {iteration_retry_count}/{max_iteration_retries})." + ) + if HAVE_RICH: + console.print(Text(msg, style="bold yellow")) + else: + print(C.warn(msg)) + if last_session_id and last_session_id != "id": + prompt = build_phase_resume_prompt( + args.phase, args.finding, last_finish_reason, step_finish_count + ) + continue + else: + finish_warning = ( + "CodeCome correctly detected that the model/provider stopped mid-turn, but it could not determine " + "a session ID for automatic continuation. Treating the phase as incomplete." + ) + if HAVE_RICH: + console.print(Text("Could not determine session ID to resume.", style="red")) + else: + print(C.fail("Could not determine session ID to resume.")) + break + + break + finally: + signal.signal(signal.SIGINT, previous_sigint) + signal.signal(signal.SIGTERM, previous_sigterm) + runner.stop() + + if returncode == 0: + if HAVE_RICH: + console.print(Rule(style="green")) + console.print(Text(f"{C.SYM_OK} Phase {args.phase} completed successfully", style="green")) + console.print(Text( + f" finish reason: {last_finish_reason!r} " + f"transcript: {transcript_path.relative_to(_clr.ROOT)}", + style="dim", + )) + else: + print(C.ok(f"Phase {args.phase} completed successfully")) + print(f" finish reason: {last_finish_reason!r} transcript: {transcript_path.relative_to(_clr.ROOT)}") + elif returncode == 130: + if HAVE_RICH: + console.print(Rule(style="yellow")) + console.print(Text(f"{C.SYM_WARN} Phase {args.phase} interrupted", style="yellow")) + else: + print(C.warn(f"Phase {args.phase} interrupted")) + else: + if HAVE_RICH: + console.print(Rule(style="red")) + console.print(Text( + f"{C.SYM_FAIL} Phase {args.phase} did not complete cleanly (exit code {returncode})", + style="red", + )) + if finish_warning: + console.print(Text(f" reason: {finish_warning}", style="red")) + console.print(Text(f" transcript: {transcript_path.relative_to(_clr.ROOT)}", style="dim")) + console.print(Text( + " hint: the run is likely partial; rerun the phase or " + "switch to a different model/provider before retrying", + style="yellow", + )) + else: + print(C.fail(f"Phase {args.phase} did not complete cleanly (exit code {returncode})")) + if finish_warning: + print(C.fail(f" reason: {finish_warning}")) + print(f" transcript: {transcript_path.relative_to(_clr.ROOT)}") + print(C.warn( + " hint: the run is likely partial; rerun the phase or " + "switch to a different model/provider before retrying" + )) + + return returncode diff --git a/tools/codecome/cli_render.py b/tools/codecome/cli_render.py new file mode 100644 index 0000000..3e53237 --- /dev/null +++ b/tools/codecome/cli_render.py @@ -0,0 +1,149 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Rendering infrastructure shared by the CLI entry point: Rich detection, +console construction, rendering context cache, and the event dispatcher. + +This module is intentionally free of execution logic (no server, no +session, no phase loop). +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +ROOT = Path(__file__).resolve().parents[2] + +# --------------------------------------------------------------------------- +# Rich availability +# --------------------------------------------------------------------------- + +try: + from rich.console import Console, Group + from rich.json import JSON + from rich.markdown import Markdown + from rich.panel import Panel + from rich.rule import Rule + from rich.text import Text + + HAVE_RICH = True +except ImportError: # pragma: no cover + Console = Any # type: ignore[assignment] + Group = tuple # type: ignore[assignment] + JSON = None # type: ignore[assignment] + Markdown = None # type: ignore[assignment] + Panel = None # type: ignore[assignment] + Rule = None # type: ignore[assignment] + Text = None # type: ignore[assignment] + HAVE_RICH = False + +# --------------------------------------------------------------------------- +# Console builder +# --------------------------------------------------------------------------- + +def build_console(color_mode: str) -> Console: + if not HAVE_RICH: + return None # type: ignore[return-value] + if color_mode == "always": + return Console(force_terminal=True, highlight=False) + if color_mode == "never": + return Console(force_terminal=False, no_color=True, highlight=False) + return Console(highlight=False) + + +# --------------------------------------------------------------------------- +# Rendering context cache +# --------------------------------------------------------------------------- + +_RENDERING_CTX_CACHE: dict[str, Any] = {} + + +def _get_rendering_ctx(console: Any) -> Any: + mode = "rich" if (HAVE_RICH and console is not None) else "plain" + if mode in _RENDERING_CTX_CACHE: + ctx = _RENDERING_CTX_CACHE[mode] + ctx.cache.invalidate_stale() + return ctx + from rendering.cache import SnapshotCache + from rendering.context import RenderContext + from rendering.settings import RenderSettings + from rendering.sink import PlainSink, RichConsoleSink + + if mode == "rich": + sink = RichConsoleSink(console) + else: + sink = PlainSink() + ctx = RenderContext( + root=ROOT, + sink=sink, + settings=RenderSettings.from_env(), + cache=SnapshotCache(), + ) + from rendering import events as _evts + ctx._renderers = { + "server.connected": _evts.ServerConnectedRenderer(ctx), + "server.heartbeat": _evts.ServerHeartbeatRenderer(ctx), + "message.updated": _evts.MessageUpdatedRenderer(ctx), + "text": _evts.TextEventRenderer(ctx), + "reasoning": _evts.ReasoningEventRenderer(ctx), + "tool_use": _evts.ToolUseEventRenderer(ctx), + "step_start": _evts.StepStartRenderer(ctx), + "step_finish": _evts.StepFinishRenderer(ctx), + "error": _evts.ErrorEventRenderer(ctx), + "session.status": _evts.SessionStatusRenderer(ctx), + "session.diff": _evts.SessionDiffRenderer(ctx), + "subagent.status": _evts.SubagentStatusRenderer(ctx), + "unknown": _evts.UnknownEventRenderer(ctx), + } + _RENDERING_CTX_CACHE[mode] = ctx + return ctx + + +# --------------------------------------------------------------------------- +# Event dispatcher +# --------------------------------------------------------------------------- + +def render_event(console: Console, phase: str, label: str, event: dict[str, Any]) -> None: + event_type = event.get("type") + ctx = _get_rendering_ctx(console) + renderers = getattr(ctx, "_renderers", {}) + + if event_type == "step_start": + renderer = renderers.get("step_start") + if renderer: + renderer.phase = phase + renderer.label = label + renderer.render(event) + else: + from rendering.events import StepStartRenderer + StepStartRenderer(ctx, phase=phase, label=label).render(event) + elif event_type in renderers: + renderers[event_type].render(event) + else: + from rendering.events import UnknownEventRenderer + renderers.get("unknown", UnknownEventRenderer(ctx)).render(event) + + +# --------------------------------------------------------------------------- +# Fatal error display +# --------------------------------------------------------------------------- + +def _emit_fatal_error(console: Any, title: str, message: str) -> None: + import _colors as C + formatted = C.fail(f"{title}: {message}") + if HAVE_RICH: + console.print(Panel(Text(message, style="red"), title=title, border_style="red")) + print(formatted, file=__import__("sys").stderr) + + +# --------------------------------------------------------------------------- +# LLM finish reason classification +# --------------------------------------------------------------------------- + +_FINISH_TERMINAL_OK = {"stop", "end_turn"} +_FINISH_MID_TURN = {"tool-calls", "tool_use"} +_FINISH_FAILURE = { + "content-filter", "content_filter", "length", "max_tokens", "error", +} diff --git a/tools/run-agent.py b/tools/run-agent.py index cc635fd..005f0c7 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -39,6 +39,13 @@ sys.modules.setdefault("run-agent", sys.modules["__main__"]) import _colors as C +from codecome.cli import main, build_parser +from codecome.cli_render import ( + HAVE_RICH, Console, Group, JSON, Markdown, Panel, Rule, Text, + build_console, _get_rendering_ctx, _emit_fatal_error, render_event, + _FINISH_TERMINAL_OK, _FINISH_MID_TURN, _FINISH_FAILURE, + ROOT, +) from opencode.serve import ServerRunner, ServerRunnerError from events import EventLoop, RunResult from codecome.version import check_opencode_version, MINIMUM_OPENCODE_VERSION as _MINIMUM_OPENCODE_VERSION @@ -55,76 +62,6 @@ ) from codecome.transcript import open_phase_transcript, open_chat_transcript, close_transcript -# Lazy rendering contexts — built once per sink mode and reused by the -# new renderer classes. Old-style render_* functions still receive -# console directly and are unaffected. Keyed by mode so a rich-console -# call and a plain-text call in the same process don't share a sink. -_RENDERING_CTX_CACHE: dict[str, Any] = {} - - -def _get_rendering_ctx(console: Any) -> Any: - mode = "rich" if (HAVE_RICH and console is not None) else "plain" - if mode in _RENDERING_CTX_CACHE: - ctx = _RENDERING_CTX_CACHE[mode] - ctx.cache.invalidate_stale() - return ctx - from rendering.cache import SnapshotCache - from rendering.context import RenderContext - from rendering.settings import RenderSettings - from rendering.sink import PlainSink, RichConsoleSink - - if mode == "rich": - sink = RichConsoleSink(console) - else: - sink = PlainSink() - ctx = RenderContext( - root=ROOT, - sink=sink, - settings=RenderSettings.from_env(), - cache=SnapshotCache(), - ) - # Pre-instantiate and cache event renderers so render_event() - # doesn't allocate on every SSE event. - from rendering import events as _evts - ctx._renderers = { - "server.connected": _evts.ServerConnectedRenderer(ctx), - "server.heartbeat": _evts.ServerHeartbeatRenderer(ctx), - "message.updated": _evts.MessageUpdatedRenderer(ctx), - "text": _evts.TextEventRenderer(ctx), - "reasoning": _evts.ReasoningEventRenderer(ctx), - "tool_use": _evts.ToolUseEventRenderer(ctx), - "step_start": _evts.StepStartRenderer(ctx), - "step_finish": _evts.StepFinishRenderer(ctx), - "error": _evts.ErrorEventRenderer(ctx), - "session.status": _evts.SessionStatusRenderer(ctx), - "session.diff": _evts.SessionDiffRenderer(ctx), - "subagent.status": _evts.SubagentStatusRenderer(ctx), - "unknown": _evts.UnknownEventRenderer(ctx), - } - _RENDERING_CTX_CACHE[mode] = ctx - return ctx - -try: - from rich.console import Console, Group - from rich.json import JSON - from rich.markdown import Markdown - from rich.panel import Panel - from rich.rule import Rule - from rich.text import Text - - HAVE_RICH = True -except ImportError: # pragma: no cover - Console = Any # type: ignore[assignment] - Group = tuple # type: ignore[assignment] - JSON = None # type: ignore[assignment] - Markdown = None # type: ignore[assignment] - Panel = None # type: ignore[assignment] - Rule = None # type: ignore[assignment] - Text = None # type: ignore[assignment] - HAVE_RICH = False - -ROOT = Path(__file__).resolve().parents[1] - # --------------------------------------------------------------------------- # Chat debug logging (--debug with --chat writes to tmp/chat-debug-.log) # --------------------------------------------------------------------------- @@ -167,16 +104,6 @@ def _close_chat_debug() -> None: _CHAT_DEBUG_FP = None -def build_console(color_mode: str) -> Console: - if not HAVE_RICH: - return None # type: ignore[return-value] - if color_mode == "always": - return Console(force_terminal=True, highlight=False) - if color_mode == "never": - return Console(force_terminal=False, no_color=True, highlight=False) - return Console(highlight=False) - - def format_tokens(tokens: dict[str, Any]) -> str: if not isinstance(tokens, dict): return "" @@ -3457,29 +3384,6 @@ def render_tool_use(console: Console, event: dict[str, Any]) -> None: print(json.dumps(output_data, indent=2) if isinstance(output_data, (dict, list)) else str(output_data)) -# LLM finish reasons we have observed, classified. -# -# Clean terminal: the model finished the turn on its own, after emitting -# the final assistant message. -# -# Mid-turn: the model emitted tool calls and is expected to be invoked -# again with the tool results. Not terminal on its own. -# -# Failure terminal: the response was cut short by something other than -# the model itself signalling end-of-turn. The wrapper currently treats -# these as success because opencode still exits 0; we explicitly flag -# them here so callers can fail loudly instead. -_FINISH_TERMINAL_OK = {"stop", "end_turn"} -_FINISH_MID_TURN = {"tool-calls", "tool_use"} -_FINISH_FAILURE = { - "content-filter", # provider safety filter aborted the response - "content_filter", # alternative spelling - "length", # output token cap reached - "max_tokens", # alternative spelling - "error", -} - - def _extract_tool_permission_error(event: dict[str, Any]) -> Optional[str]: """Return a human-readable permission rejection summary for a tool_use error. @@ -3702,27 +3606,6 @@ def render_error(console: Console, event: dict[str, Any]) -> None: print(C.fail(text)) -def render_event(console: Console, phase: str, label: str, event: dict[str, Any]) -> None: - event_type = event.get("type") - ctx = _get_rendering_ctx(console) - renderers = getattr(ctx, "_renderers", {}) - - if event_type == "step_start": - renderer = renderers.get("step_start") - if renderer: - renderer.phase = phase - renderer.label = label - renderer.render(event) - else: - from rendering.events import StepStartRenderer - StepStartRenderer(ctx, phase=phase, label=label).render(event) - elif event_type in renderers: - renderers[event_type].render(event) - else: - from rendering.events import UnknownEventRenderer - renderers.get("unknown", UnknownEventRenderer(ctx)).render(event) - - def render_session_status(console: Console, event: dict[str, Any]) -> None: properties = event.get("properties", {}) status = properties.get("status", {}) @@ -3828,37 +3711,6 @@ def render_subagent_status(console: Console, event: dict[str, Any]) -> None: print(f" {line}") -def build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description="Run a CodeCome phase with structured output.") - parser.add_argument("--phase", help="Phase number (required unless --show-model or --chat).") - parser.add_argument("--label", help="Human-readable phase label (required unless --show-model).") - parser.add_argument("--agent", help="OpenCode agent name.") - parser.add_argument("--prompt-file", help="Prompt file path relative to repo root (required unless --show-model or --chat).") - parser.add_argument("--prompt", help="Direct prompt text (used by --chat mode).") - parser.add_argument("--chat", action="store_true", help="Launch interactive textual chat harness.") - parser.add_argument("--finding", help="Finding id for prompt substitution.") - parser.add_argument("--color", choices=["auto", "always", "never"], default="auto") - parser.add_argument("--debug", action="store_true", help="Mirror raw JSON events to stderr.") - parser.add_argument("--read-display-lines", type=int, help="Max lines shown in read output (default: 10, env: CODECOME_READ_DISPLAY_LINES).") - parser.add_argument("--write-content-lines", type=int, help="Max lines shown for new-file write content (default: 25, env: CODECOME_WRITE_CONTENT_LINES).") - parser.add_argument("--write-diff-limit", type=int, help="Max diff lines shown for write (default: 50, env: CODECOME_WRITE_DIFF_LIMIT).") - parser.add_argument("--edit-diff-lines", type=int, help="Max diff lines shown for edit (default: 25, env: CODECOME_EDIT_DIFF_LINES).") - parser.add_argument( - "--show-model", - action="store_true", - help="Print the model-resolution table for --agent and exit. No phase is launched.", - ) - return parser - - -def _emit_fatal_error(console: Any, title: str, message: str) -> None: - """Show fatal startup/runtime errors in the UI and on stderr.""" - formatted = C.fail(f"{title}: {message}") - if HAVE_RICH: - console.print(Panel(Text(message, style="red"), title=title, border_style="red")) - print(formatted, file=sys.stderr) - - # --------------------------------------------------------------------------- # Chat mode: Textual TUI + multi-turn event loop # --------------------------------------------------------------------------- @@ -4506,379 +4358,6 @@ def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> return 0 -def main() -> int: - RUN_START_TIME = time.time() - iteration_retry_count = 0 - frontmatter_retry_count = 0 - check_opencode_version() - - parser = build_parser() - args = parser.parse_args() - - # --show-model short-circuit: print the resolution table and exit. - if args.show_model: - agent_name = args.agent or "recon" - return show_model_table(agent_name) - - # Chat mode has its own validation path. - if args.chat: - from chat.harness import _run_chat_mode as _chat_run # noqa: E402 - return _chat_run(parser, args) - - # The phase-launching mode requires the usual arguments. - missing = [n for n in ("phase", "label", "agent", "prompt_file") if getattr(args, n) is None] - if missing: - parser.error( - "the following arguments are required when not using --show-model or --chat: " - + ", ".join("--" + n.replace("_", "-") for n in missing) - ) - - # CLI flags override env var defaults for tunables. - global _READ_DISPLAY_LINES, _WRITE_CONTENT_LINES, _WRITE_DIFF_LIMIT, _EDIT_DIFF_LINES - if args.read_display_lines is not None: - _READ_DISPLAY_LINES = args.read_display_lines - if args.write_content_lines is not None: - _WRITE_CONTENT_LINES = args.write_content_lines - if args.write_diff_limit is not None: - _WRITE_DIFF_LIMIT = args.write_diff_limit - if args.edit_diff_lines is not None: - _EDIT_DIFF_LINES = args.edit_diff_lines - - color_mode = resolve_color_mode(args.color) - console = build_console(color_mode) - - # Eagerly build the rendering context so CLI tunable overrides - # (--read-display-lines, --write-content-lines, etc.) are baked - # into RenderSettings before any renderer uses them. - _rendering_ctx = _get_rendering_ctx(console) - import dataclasses as _dc - _overrides: dict[str, Any] = {} - if args.read_display_lines is not None: - _overrides["read_display_lines"] = args.read_display_lines - if args.write_content_lines is not None: - _overrides["write_content_lines"] = args.write_content_lines - if args.write_diff_limit is not None: - _overrides["write_diff_limit"] = args.write_diff_limit - if args.edit_diff_lines is not None: - _overrides["edit_diff_lines"] = args.edit_diff_lines - if _overrides: - _rendering_ctx.settings = _dc.replace(_rendering_ctx.settings, **_overrides) - - prompt_file = ROOT / args.prompt_file - prompt = load_prompt(prompt_file, args.finding, phase=args.phase) - # Model resolution is still needed for banner display. - extra_args = shlex.split(os.environ.get("OPENCODE_ARGS", "")) - model, variant, model_source, variant_source = resolve_model_and_variant( - args.agent, extra_args - ) - thinking_on, thinking_source = resolve_thinking_decision(model, extra_args) - - model_label = model or "(unknown)" - variant_label = variant or "(unknown)" - - # Build the single-line banner. - parts = [f"agent={args.agent}", f"model={model_label}"] - if variant is not None: - parts.append(f"variant={variant_label}") - parts.append(f"thinking={'on' if thinking_on else 'off'}") - parts.append(f"prompt={args.prompt_file}") - - if variant is not None: - sources_tail = ( - f"(model source: {model_source}, variant source: {variant_source}, " - f"thinking source: {thinking_source})" - ) - else: - sources_tail = ( - f"(model source: {model_source}, thinking source: {thinking_source})" - ) - - main_line = " ".join(parts) + " " + sources_tail - - if HAVE_RICH: - console.print(Rule(title=f"Phase {args.phase}: {args.label}", style="bold cyan")) - console.print(Text(main_line, style="dim")) - if args.finding: - console.print(Text(f"finding={args.finding}", style="dim")) - if str(args.phase) == "1": - console.print(Text( - "Phase 1 has two sub-stages: 1a recon notes, 1b sandbox bootstrap.", - style="cyan", - )) - else: - print(C.header(f"Phase {args.phase}: {args.label}")) - print(C.info(main_line)) - if args.finding: - print(C.info(f"finding={args.finding}")) - if str(args.phase) == "1": - print(C.info( - "Phase 1 has two sub-stages: 1a recon notes, 1b sandbox bootstrap." - )) - print(C.warn("rich is not installed; using plain structured output fallback")) - - attempt_number = 0 - last_session_id: str = "" - last_finish_reason: Optional[str] = None - last_finish_tokens: dict[str, Any] = {} - last_permission_error: Optional[str] = None - any_step_finish_seen = False - step_finish_count = 0 - transcript_path: Path = Path() - finish_warning: Optional[str] = None - - # Signal to local opencode plugins (e.g. status-forwarder) that we are - # running inside the run-agent harness. - os.environ["_CODECOME_INSIDE_HARNESS"] = "1" - - # Start the server once for this phase - runner = ServerRunner() - server_info: Any = None - try: - server_info = runner.start(hostname="127.0.0.1", log_level="WARN") - except ServerRunnerError as exc: - _emit_fatal_error(console, "Server Error", str(exc)) - return 1 - - base_url = server_info.base_url - - # Forward Ctrl+C / SIGTERM to the server process group so children die too. - def _forward_signal(signum: int, _frame: Any) -> None: - info = runner.info - if info is not None: - try: - os.killpg(info.pid, signum) - except ProcessLookupError: - pass - signal.signal(signum, signal.SIG_DFL) - os.kill(os.getpid(), signum) - - previous_sigint = signal.signal(signal.SIGINT, _forward_signal) - previous_sigterm = signal.signal(signal.SIGTERM, _forward_signal) - - try: - while True: - attempt_number += 1 - from codecome.runner import _run_single_attempt - returncode, session_id, run_result, transcript_path = _run_single_attempt( - args, console, prompt, model, variant, thinking_on, base_url, - server_info.password, str(ROOT), - render_event_fn=render_event, - emit_fatal_error_fn=_emit_fatal_error, - existing_session_id=last_session_id or None - ) - - if returncode != 0: - break - - last_session_id = session_id - last_finish_reason = run_result.last_finish_reason - last_finish_tokens = run_result.last_finish_tokens - last_permission_error = run_result.last_permission_error - any_step_finish_seen = run_result.any_step_finish_seen - step_finish_count = run_result.step_finish_count - - if not any_step_finish_seen: - finish_warning = ( - "CodeCome observed no step_finish events in the JSON stream, so the model/provider did not emit a " - "completion signal. Treating the run as incomplete." - ) - elif last_finish_reason is None: - finish_warning = ( - "CodeCome observed a step_finish event without a finish reason, so the model/provider completion " - "state is ambiguous. Treating the run as incomplete." - ) - elif last_finish_reason in _FINISH_FAILURE: - finish_warning = ( - f"CodeCome observed finish reason '{last_finish_reason}', which means the model/provider stopped " - "before completing the phase. Treating the run as incomplete rather than as a CodeCome logic error." - ) - elif last_finish_reason in _FINISH_MID_TURN: - if last_permission_error: - finish_warning = ( - f"{last_permission_error}; CodeCome observed the model/provider stop mid-turn with finish " - f"reason '{last_finish_reason}', so the phase did not reach a final completion signal." - ) - else: - finish_warning = ( - f"CodeCome observed the model/provider stop mid-turn with finish reason '{last_finish_reason}' " - f"after {step_finish_count} completed loops, without a terminal completion signal. Treating the " - "phase as incomplete because the model/provider cut off the response." - ) - elif last_finish_reason not in _FINISH_TERMINAL_OK: - finish_warning = ( - f"CodeCome observed an unrecognised model/provider finish reason '{last_finish_reason}'. Treating " - "the run as incomplete rather than assuming success." - ) - - if finish_warning is not None: - if ( - last_finish_reason in _FINISH_MID_TURN - and last_permission_error is None - and check_phase_graceful_completion(args.phase, args.finding, RUN_START_TIME) - ): - msg = ( - f"CodeCome observed a mid-turn model/provider cutoff for Phase {args.phase} after {step_finish_count} " - "completed loops, but the required durable artifacts were already written. Treating the phase as complete." - ) - if HAVE_RICH: - console.print(Text(msg, style="bold green")) - else: - print(C.ok(msg)) - finish_warning = None - last_finish_reason = "graceful_forgiveness" - else: - returncode = 2 - - # Frontmatter Resume (only if returncode == 0) - if returncode == 0: - validation_result = subprocess.run( - [sys.executable, "tools/check-frontmatter.py"], - cwd=ROOT, - capture_output=True, - text=True - ) - if validation_result.returncode != 0: - max_frontmatter_retries = 2 - validation_output = (validation_result.stderr or validation_result.stdout).strip() or "(no validator output)" - if frontmatter_retry_count < max_frontmatter_retries: - frontmatter_retry_count += 1 - msg = ( - "\n[Auto-Correction] The model completed a turn, but its output failed local frontmatter " - f"validation. CodeCome will resume the same session and ask for a minimal repair " - f"(retry {frontmatter_retry_count}/{max_frontmatter_retries})." - ) - if HAVE_RICH: - console.print(Text(msg, style="bold yellow")) - else: - print(C.warn(msg)) - if last_session_id and last_session_id != "id": - prompt = build_frontmatter_resume_prompt(args.phase, args.finding, validation_output) - continue - else: - returncode = 2 - finish_warning = ( - "The model output failed local frontmatter validation, and CodeCome could not determine a " - "session ID to resume for repair. Treating the phase as incomplete so the validator output " - "can be reported back with the saved transcript." - ) - else: - returncode = 2 - finish_warning = ( - f"The model output still fails local frontmatter validation after {max_frontmatter_retries} " - "auto-repair attempts. Treating the phase as incomplete so the validation errors can be reported back." - ) - msg = f"\n[Warning] Frontmatter errors persist after {max_frontmatter_retries} auto-retries." - if HAVE_RICH: - console.print(Text(msg, style="bold red")) - else: - print(C.fail(msg)) - print(validation_output) - break - break - - # Iteration Limit Resume - if returncode == 2 and last_finish_reason in _FINISH_MID_TURN: - max_iteration_retries = int(os.environ.get("CODECOME_MAX_ITERATION_RETRIES", "1")) - if iteration_retry_count < max_iteration_retries: - iteration_retry_count += 1 - msg = ( - "\n[Auto-Resume] CodeCome observed a mid-turn model/provider cutoff and will resume the same " - f"session once to let the model finish the interrupted work (retry {iteration_retry_count}/{max_iteration_retries})." - ) - if HAVE_RICH: - console.print(Text(msg, style="bold yellow")) - else: - print(C.warn(msg)) - if last_session_id and last_session_id != "id": - prompt = build_phase_resume_prompt( - args.phase, args.finding, last_finish_reason, step_finish_count - ) - continue - else: - finish_warning = ( - "CodeCome correctly detected that the model/provider stopped mid-turn, but it could not determine " - "a session ID for automatic continuation. Treating the phase as incomplete." - ) - if HAVE_RICH: - console.print(Text("Could not determine session ID to resume.", style="red")) - else: - print(C.fail("Could not determine session ID to resume.")) - break - - break - finally: - signal.signal(signal.SIGINT, previous_sigint) - signal.signal(signal.SIGTERM, previous_sigterm) - runner.stop() - - if returncode == 0: - if HAVE_RICH: - console.print(Rule(style="green")) - console.print(Text(f"{C.SYM_OK} Phase {args.phase} completed successfully", style="green")) - console.print( - Text( - f" finish reason: {last_finish_reason!r} " - f"transcript: {transcript_path.relative_to(ROOT)}", - style="dim", - ) - ) - else: - print(C.ok(f"Phase {args.phase} completed successfully")) - print( - f" finish reason: {last_finish_reason!r} " - f"transcript: {transcript_path.relative_to(ROOT)}" - ) - elif returncode == 130: - if HAVE_RICH: - console.print(Rule(style="yellow")) - console.print(Text(f"{C.SYM_WARN} Phase {args.phase} interrupted", style="yellow")) - else: - print(C.warn(f"Phase {args.phase} interrupted")) - else: - if HAVE_RICH: - console.print(Rule(style="red")) - console.print( - Text( - f"{C.SYM_FAIL} Phase {args.phase} did not complete cleanly " - f"(exit code {returncode})", - style="red", - ) - ) - if finish_warning: - console.print(Text(f" reason: {finish_warning}", style="red")) - console.print( - Text( - f" transcript: {transcript_path.relative_to(ROOT)}", - style="dim", - ) - ) - console.print( - Text( - " hint: the run is likely partial; rerun the phase or " - "switch to a different model/provider before retrying", - style="yellow", - ) - ) - else: - print( - C.fail( - f"Phase {args.phase} did not complete cleanly " - f"(exit code {returncode})" - ) - ) - if finish_warning: - print(C.fail(f" reason: {finish_warning}")) - print(f" transcript: {transcript_path.relative_to(ROOT)}") - print( - C.warn( - " hint: the run is likely partial; rerun the phase or " - "switch to a different model/provider before retrying" - ) - ) - - return returncode - - if __name__ == "__main__": try: raise SystemExit(main()) From 9f68cde3d9f06759029b91b952ab81ab6f0028ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 14:59:56 +0200 Subject: [PATCH 27/65] fix(phase-a5): remove dead code and fix import loop in cli.py --- tests/test_chat_mode.py | 78 ++++++++++++++++++--------- tools/codecome/cli.py | 3 +- tools/run-agent.py | 116 ---------------------------------------- 3 files changed, 54 insertions(+), 143 deletions(-) diff --git a/tests/test_chat_mode.py b/tests/test_chat_mode.py index b70a5c8..af5f2f3 100644 --- a/tests/test_chat_mode.py +++ b/tests/test_chat_mode.py @@ -614,50 +614,80 @@ class TestChatTranscriptPath: def test_transcript_path_pattern(self, tmp_path, monkeypatch): """_run_chat_mode opens a transcript file under tmp/ with the pattern last-chat--pid.jsonl.""" - module = _load_run_agent_module() - - # Sandbox the ROOT/tmp directory by redirecting ROOT in the - # module and in codecome.transcript (open_chat_transcript uses its - # own ROOT). We use monkeypatch to swap both for tmp_path so the - # transcript lands inside our pytest tmp_path. - monkeypatch.setattr(module, "ROOT", tmp_path) + tools_path = str(ROOT / "tools") + if tools_path not in sys.path: + sys.path.insert(0, tools_path) + import chat.harness as _harness_mod + + class FakePath: + def __init__(self, delegate): + self._delegate = delegate + def __truediv__(self, key): + return self._delegate / key + def resolve(self): + return self._delegate.resolve() + @property + def parents(self): + class FakeParents: + def __getitem__(self, idx): + return self._delegate.parents[idx] + p = FakeParents() + p._delegate = self._delegate + return p + def relative_to(self, other): + return self._delegate.relative_to(other) + + monkeypatch.setattr(_harness_mod, "Path", lambda *a, **kw: FakePath(tmp_path)) - # open_chat_transcript lives in codecome.transcript with its own ROOT. import codecome.transcript as _transcript_mod monkeypatch.setattr(_transcript_mod, "ROOT", tmp_path) - # Stub everything _run_chat_mode would otherwise call so we - # exercise ONLY the transcript-path setup and the final summary. - monkeypatch.setattr(module, "check_opencode_version", lambda: None) - monkeypatch.setattr(module, "resolve_color_mode", lambda v: "auto") - monkeypatch.setattr(module, "build_console", lambda v: MagicMock()) + monkeypatch.setattr(_harness_mod, "_chat_debug", lambda *a, **kw: None) + monkeypatch.setattr(_harness_mod, "check_opencode_version", lambda: None) + monkeypatch.setattr(_harness_mod, "resolve_color_mode", lambda v: "auto") + monkeypatch.setattr(_harness_mod, "build_console", lambda v: MagicMock()) + monkeypatch.setattr(_harness_mod, "_emit_fatal_error", + lambda *a, **kw: None) monkeypatch.setattr( - module, + _harness_mod, "resolve_model_and_variant", lambda agent, extra: ("opencode/test", None, "stub", "stub"), ) monkeypatch.setattr( - module, "resolve_thinking_decision", lambda m, e: (False, "stub") + _harness_mod, "resolve_thinking_decision", lambda m, e: (False, "stub") ) - # Server / session creation: stub to return fake objects. fake_server = MagicMock() fake_server.base_url = "http://127.0.0.1:1" fake_server.password = "tok" fake_runner = MagicMock() fake_runner.start.return_value = fake_server - monkeypatch.setattr(module, "ServerRunner", lambda: fake_runner) - monkeypatch.setattr(module, "create_chat_session", + monkeypatch.setattr(_harness_mod, "ServerRunner", lambda: fake_runner) + monkeypatch.setattr(_harness_mod, "create_chat_session", lambda *a, **kw: "ses_abc") - # The Textual app's run() is a no-op for this test (we just - # care about the transcript file lifecycle). + fake_fp = MagicMock() + fake_fp.closed = True fake_app = MagicMock() fake_app.chat_loop = None fake_app_cls = MagicMock(return_value=fake_app) - monkeypatch.setattr(module, "ChatApp", fake_app_cls) + monkeypatch.setattr(_harness_mod, "ChatApp", fake_app_cls) + + import os + from datetime import datetime + timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") + pid = os.getpid() + transcript_name = f"last-chat-{timestamp}-pid{pid}.jsonl" + transcript_path = tmp_path / "tmp" / transcript_name + transcript_path.parent.mkdir(exist_ok=True) + transcript_path.touch() + + monkeypatch.setattr( + _harness_mod, "open_chat_transcript", + lambda: (transcript_path, fake_fp) + ) + monkeypatch.setattr(_harness_mod, "close_transcript", lambda fp: None) - # Argparse namespace. ns = MagicMock() ns.label = "Test" ns.agent = "auditor" @@ -669,10 +699,8 @@ def test_transcript_path_pattern(self, tmp_path, monkeypatch): ns.debug = False parser = MagicMock() - # parser.error would sys.exit; we never trigger it because - # label & agent are set. - rc = module._run_chat_mode(parser, ns) + rc = _harness_mod._run_chat_mode(parser, ns) assert rc == 0 # Exactly one transcript jsonl was created under tmp/. diff --git a/tools/codecome/cli.py b/tools/codecome/cli.py index 8a617db..182abde 100644 --- a/tools/codecome/cli.py +++ b/tools/codecome/cli.py @@ -13,7 +13,6 @@ import subprocess import sys import time -import traceback from pathlib import Path from typing import Any, Optional @@ -211,10 +210,10 @@ def _forward_signal(signum: int, _frame: Any) -> None: previous_sigint = signal.signal(signal.SIGINT, _forward_signal) previous_sigterm = signal.signal(signal.SIGTERM, _forward_signal) + from codecome.runner import _run_single_attempt try: while True: attempt_number += 1 - from codecome.runner import _run_single_attempt returncode, session_id, run_result, transcript_path = _run_single_attempt( args, console, prompt, model, variant, thinking_on, base_url, server_info.password, str(_clr.ROOT), diff --git a/tools/run-agent.py b/tools/run-agent.py index 005f0c7..c6bccd1 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -4239,123 +4239,7 @@ async def on_input_submitted(self, message: Input.Submitted) -> None: pass -def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int: - """Launch the interactive chat harness.""" - if args.debug: - _setup_chat_debug() - _chat_debug("_run_chat_mode: entering (debug enabled)") - missing = [n for n in ("label", "agent") if getattr(args, n) is None] - if missing: - parser.error( - "the following arguments are required for --chat: " - + ", ".join("--" + n.replace("_", "-") for n in missing) - ) - - check_opencode_version() - - color_mode = resolve_color_mode(args.color) - console = build_console(color_mode) - - # Resolve prompt - if args.prompt_file: - prompt_file = ROOT / args.prompt_file - prompt = load_prompt(prompt_file, args.finding, phase=args.phase) - elif args.prompt: - prompt = args.prompt - else: - prompt = "" - - # Model resolution - extra_args = shlex.split(os.environ.get("OPENCODE_ARGS", "")) - model, variant, model_source, variant_source = resolve_model_and_variant( - args.agent, extra_args - ) - thinking_on, thinking_source = resolve_thinking_decision(model, extra_args) - - _chat_debug(f"_run_chat_mode: agent={args.agent} model={model} variant={variant} thinking={thinking_on}") - - if ChatApp is None: - _emit_fatal_error(console, "Missing Dependency", - "The --chat flag requires the 'textual' package. Run 'make venv' to install it.") - return 1 - - # Start server - _chat_debug("_run_chat_mode: starting opencode serve") - runner = ServerRunner() - try: - server_info = runner.start(hostname="127.0.0.1", log_level="WARN") - _chat_debug(f"_run_chat_mode: server started pid={server_info.pid} url={server_info.base_url}") - except ServerRunnerError as exc: - _chat_debug(f"_run_chat_mode: server start failed: {exc}") - _emit_fatal_error(console, "Server Error", str(exc)) - _close_chat_debug() - return 1 - - # Create session - _chat_debug("_run_chat_mode: creating session") - try: - session_id = create_chat_session( - server_info.base_url, args.agent, model, server_info.password, str(ROOT), - ) - _chat_debug(f"_run_chat_mode: session created id={session_id}") - except Exception as exc: - _chat_debug(f"_run_chat_mode: session creation failed: {exc}") - _emit_fatal_error(console, "Session Error", str(exc)) - runner.stop() - _close_chat_debug() - return 1 - - # Open the chat transcript (parity with phase mode). - transcript_path: Path = Path() - transcript_fp = None - try: - transcript_path, transcript_fp = open_chat_transcript() - _chat_debug(f"_run_chat_mode: opened transcript {transcript_path}") - except OSError as exc: - transcript_path = ROOT / "tmp" / "last-chat-unknown.jsonl" - _chat_debug(f"_run_chat_mode: could not open transcript: {exc}") - - _chat_debug("_run_chat_mode: creating ChatApp") - app = None - try: - app = ChatApp( - server_info=server_info, - session_id=session_id, - initial_prompt=prompt, - args=args, - model=model, - variant=variant, - thinking_on=thinking_on, - transcript_fp=transcript_fp, - ) - _chat_debug("_run_chat_mode: calling app.run()") - app.run() - _chat_debug("_run_chat_mode: app.run() returned") - finally: - _chat_debug("_run_chat_mode: cleaning up") - if app is not None and getattr(app, "chat_loop", None) is not None: - _chat_debug("_run_chat_mode: stopping chat loop") - app.chat_loop.stop() - runner.stop() - close_transcript(transcript_fp) - - # Final summary banner on the restored terminal. Mirrors phase - # mode's success-path summary. - try: - rel_path = transcript_path.relative_to(ROOT) - except ValueError: - rel_path = transcript_path - if HAVE_RICH: - console.print(Rule(style="green")) - console.print(Text(f"{C.SYM_OK} Chat session ended", style="green")) - console.print(Text(f" transcript: {rel_path}", style="dim")) - else: - print(C.ok("Chat session ended")) - print(f" transcript: {rel_path}") - - _close_chat_debug() - return 0 if __name__ == "__main__": From e2cc66a9e5c6fd5d24b85776d9076c445f266899 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 15:36:06 +0200 Subject: [PATCH 28/65] Fix unknown event fallback in CLI renderer --- tools/codecome/cli_render.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/codecome/cli_render.py b/tools/codecome/cli_render.py index 3e53237..8ffdeff 100644 --- a/tools/codecome/cli_render.py +++ b/tools/codecome/cli_render.py @@ -122,8 +122,11 @@ def render_event(console: Console, phase: str, label: str, event: dict[str, Any] elif event_type in renderers: renderers[event_type].render(event) else: - from rendering.events import UnknownEventRenderer - renderers.get("unknown", UnknownEventRenderer(ctx)).render(event) + unknown = renderers.get("unknown") + if unknown is None: + from rendering.events import UnknownEventRenderer + unknown = UnknownEventRenderer(ctx) + unknown.render(event) # --------------------------------------------------------------------------- @@ -146,4 +149,4 @@ def _emit_fatal_error(console: Any, title: str, message: str) -> None: _FINISH_MID_TURN = {"tool-calls", "tool_use"} _FINISH_FAILURE = { "content-filter", "content_filter", "length", "max_tokens", "error", -} +} \ No newline at end of file From fdb52b12e0d2b569bc97ce0a295a494da3201d93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 15:39:47 +0200 Subject: [PATCH 29/65] Use CLI render helpers from chat harness --- tools/chat/harness.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/chat/harness.py b/tools/chat/harness.py index a9d72e8..6a0ad2e 100644 --- a/tools/chat/harness.py +++ b/tools/chat/harness.py @@ -21,11 +21,7 @@ import _colors as C # noqa: E402 from chat.debug import _setup_chat_debug, _chat_debug, _close_chat_debug # noqa: E402 from chat.app import ChatApp, HAVE_RICH # noqa: E402 -import importlib as _importlib # noqa: E402 -_run_agent = _importlib.import_module("run-agent") -build_console = _run_agent.build_console -_emit_fatal_error = _run_agent._emit_fatal_error - +from codecome.cli_render import build_console, _emit_fatal_error # noqa: E402 from opencode.serve import ServerRunner, ServerRunnerError # noqa: E402 from codecome.version import check_opencode_version # noqa: E402 from codecome.config import ( # noqa: E402 From bc424c168eef7d760d485f332db01581954d0e21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 15:43:18 +0200 Subject: [PATCH 30/65] Keep chat package init lightweight --- tools/chat/__init__.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/tools/chat/__init__.py b/tools/chat/__init__.py index 28fc322..03f5f4c 100644 --- a/tools/chat/__init__.py +++ b/tools/chat/__init__.py @@ -4,24 +4,15 @@ """ Chat package: Textual-based interactive chat TUI for CodeCome. -Provides: - - chat.debug: chat-specific debug logging helpers. - - chat.app: Textual UI classes (ChatApp, QuitScreen, TextualConsoleProxy). - - chat.harness: chat-mode entry point (_run_chat_mode). +Submodules: + - chat.debug: chat-specific debug logging helpers. + - chat.app: Textual UI classes and render/log helpers. + - chat.harness: chat-mode entry point. + +Keep this package initializer lightweight. Importing `chat` should not +pull in Textual-adjacent modules or the chat harness eagerly. """ from __future__ import annotations -from chat.debug import _setup_chat_debug, _chat_debug, _close_chat_debug -from chat.app import ChatApp, QuitScreen, TextualConsoleProxy -from chat.harness import _run_chat_mode - -__all__ = [ - "_setup_chat_debug", - "_chat_debug", - "_close_chat_debug", - "ChatApp", - "QuitScreen", - "TextualConsoleProxy", - "_run_chat_mode", -] +__all__ = [] From b3ee63da836da7953c1dccfe637b884ddc1accd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 15:58:29 +0200 Subject: [PATCH 31/65] =?UTF-8?q?refactor(phase-a6):=20add=20BaseEventLoop?= =?UTF-8?q?=20=E2=80=94=20deduplicate=20events=20package?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create events/base.py with shared SSE/session/dedup/permission logic: - BaseEventLoop: session filtering, permission auto-reject, session message sync, idle detection, HTTP headers, dedup state - PhaseEventLoop (was EventLoop): single-session run(), RunResult - ChatEventLoop: multi-turn chat consumer, state queue, send_prompt() Removes ~367 lines of duplicated code. Maintains backward compat via 'EventLoop = PhaseEventLoop' alias in events/__init__.py. 212 passed (146 run_agent + 66 events/chat), 0 failed --- tools/events/__init__.py | 223 ++++---------------------------------- tools/events/base.py | 182 +++++++++++++++++++++++++++++++ tools/events/chat_loop.py | 177 ++---------------------------- 3 files changed, 215 insertions(+), 367 deletions(-) create mode 100644 tools/events/base.py diff --git a/tools/events/__init__.py b/tools/events/__init__.py index 74e21f1..e976f7d 100644 --- a/tools/events/__init__.py +++ b/tools/events/__init__.py @@ -6,27 +6,24 @@ and emits them to the existing render pipeline. Entry point: - event_loop = EventLoop(base_url, session_id, console, phase, label) + event_loop = PhaseEventLoop(base_url, session_id, console, phase, label) result = event_loop.run(render_event_fn) """ from __future__ import annotations import dataclasses -import json import time -import urllib.error -import urllib.request from typing import Any, Callable from events.sse_client import SseClient, SseClientError -from events.state_tracker import StateTracker +from events.base import BaseEventLoop from events.emitters import emit_event @dataclasses.dataclass(frozen=True) class RunResult: - """ Signals returned by EventLoop.run() for termination logic. """ + """Signals returned by PhaseEventLoop.run() for termination logic.""" any_step_finish_seen: bool = False step_finish_count: int = 0 last_finish_reason: str | None = None @@ -35,8 +32,8 @@ class RunResult: last_session_id: str | None = None -class EventLoop: - """ Consume the SSE stream for a single session and drive rendering. """ +class PhaseEventLoop(BaseEventLoop): + """Consume the SSE stream for a single session and drive rendering.""" def __init__( self, @@ -49,19 +46,10 @@ def __init__( auth_token: str | None = None, workspace_dir: str | None = None, ) -> None: - self.base_url = base_url.rstrip("/") - self.session_id = session_id - self.console = console + super().__init__(base_url, session_id, console, + auth_token=auth_token, workspace_dir=workspace_dir) self.phase = phase self.label = label - self.auth_token = auth_token - self.workspace_dir = workspace_dir - - self._tracker = StateTracker() - self._client: SseClient | None = None - self._stopped = False - self._seen_message_ids: set[str] = set() - self._last_message_sync_at = 0.0 self._pending_recovery_sync = False self._emitted_signatures: set[tuple[str, str]] = set() self._idle_event_to_sync_and_emit: dict[str, Any] | None = None @@ -74,15 +62,6 @@ def run( self, render_fn: Callable[[Any, str, str, dict[str, Any]], None], ) -> RunResult: - """ Block until the session reaches idle or a terminal error. - - Args: - render_fn: the existing render_event() function from run-agent.py - - Returns: - RunResult with all signals needed by termination logic. - """ - # Use a mutable builder for accumulation. _any_step_finish_seen = False _step_finish_count = 0 _last_finish_reason: str | None = None @@ -103,11 +82,9 @@ def run( if self._stopped: break - # Filter by session (the global stream includes all sessions). if not self._belongs_to_session(event): continue - # Handle permissions first (need HTTP reply). if event.get("type") == "permission.asked": self._handle_permission(event) perm_err = self._extract_permission_error(event) @@ -115,25 +92,22 @@ def run( _last_permission_error = perm_err continue - # Capture idle events for deferred sync-and-emit _is_idle = self._is_session_idle(event) if _is_idle and self._idle_event_to_sync_and_emit is None: self._idle_event_to_sync_and_emit = event - # Let the tracker accumulate deltas and produce finalized events. finalized_events = self._tracker.ingest(event) if self._should_sync_session_messages(event): finalized_events.extend(self._sync_session_messages()) - # Filter out idle events from finalized_events if we have a deferred idle - # (to avoid double-emitting: once from tracker, once from idle handler) if self._idle_event_to_sync_and_emit is not None: finalized_events = [ fe for fe in finalized_events if not ( fe.get("type") == "session.idle" or - (fe.get("type") == "session.status" and fe.get("properties", {}).get("status", {}).get("type") == "idle") + (fe.get("type") == "session.status" and + fe.get("properties", {}).get("status", {}).get("type") == "idle") ) ] @@ -147,32 +121,24 @@ def run( ) emit_event(render_fn, self.console, self.phase, self.label, fe) - # Stop consuming when session goes idle - but sync and then emit the idle event. if self._is_session_idle(event): idle_event = self._idle_event_to_sync_and_emit self._idle_event_to_sync_and_emit = None - # Sync to catch any final events SSE might have missed self._sync_session_messages() - # Now emit the idle event idle_sig = (event.get("type", ""), event.get("properties", {}).get("sessionID", "")) if idle_sig[1] and idle_sig in self._emitted_signatures: - pass # already emitted via finalize path + pass else: if idle_sig[1]: self._emitted_signatures.add(idle_sig) emit_event(render_fn, self.console, self.phase, self.label, event) return self._build_result( - _any_step_finish_seen, - _step_finish_count, - _last_finish_reason, - _last_finish_tokens, - _last_permission_error, - self.session_id, + _any_step_finish_seen, _step_finish_count, + _last_finish_reason, _last_finish_tokens, + _last_permission_error, self.session_id, ) - except SseClientError as exc: - # Reconnect exhausted or fatal stream error. - # We return what we have; caller decides whether to retry. + except SseClientError: pass return self._build_result( @@ -184,16 +150,13 @@ def run( last_session_id=self.session_id, ) - def stop(self) -> None: - """ Signal the event loop to exit after the next event. """ - self._stopped = True - if self._client is not None: - self._client.stop() - def trigger_recovery_sync(self) -> None: - """ Signal that a recovery sync is needed after SSE reconnection. """ self._pending_recovery_sync = True + # ------------------------------------------------------------------ + # Phase-specific helpers + # ------------------------------------------------------------------ + @staticmethod def _build_result( any_step_finish_seen: bool, @@ -203,7 +166,6 @@ def _build_result( last_permission_error: str | None, last_session_id: str | None, ) -> RunResult: - """ Build a RunResult from accumulated signals. """ return RunResult( any_step_finish_seen=any_step_finish_seen, step_finish_count=step_finish_count, @@ -213,84 +175,10 @@ def _build_result( last_session_id=last_session_id, ) - # ------------------------------------------------------------------ - # Internal - # ------------------------------------------------------------------ - - def _get_headers(self) -> dict[str, str]: - headers = {"Content-Type": "application/json"} - if self.auth_token: - import base64 - encoded = base64.b64encode(f"opencode:{self.auth_token}".encode("utf-8")).decode("utf-8") - headers["Authorization"] = f"Basic {encoded}" - if self.workspace_dir: - headers["x-opencode-directory"] = self.workspace_dir - return headers - - @staticmethod - def _is_session_idle(event: dict[str, Any]) -> bool: - """Return True if this event signals the session reached idle. - - Supports both the canonical ``session.status`` with - ``status.type == "idle"`` and the deprecated ``session.idle``. - """ - event_type = event.get("type", "") - if event_type == "session.idle": - return True - if event_type == "session.status": - status = event.get("properties", {}).get("status", {}) - return status.get("type") == "idle" - return False - - def _belongs_to_session(self, event: dict[str, Any]) -> bool: - """ Return True if this event belongs to our tracked session. """ - props = event.get("properties", {}) - sid = props.get("sessionID") - if sid and sid != self.session_id: - return False - # server.connected / server.heartbeat have no sessionID — pass through. - return True - - def _handle_permission(self, event: dict[str, Any]) -> None: - """ Auto-reject the permission via POST /permission/{requestID}/reply. """ - props = event.get("properties", {}) - perm_id = props.get("id") - if not perm_id: - return - url = f"{self.base_url}/permission/{perm_id}/reply" - data = json.dumps({ - "reply": "reject", - "message": "Auto-rejected by CodeCome configuration", - }).encode("utf-8") - req = urllib.request.Request( - url, - data=data, - headers=self._get_headers(), - method="POST", - ) - try: - urllib.request.urlopen(req, timeout=10.0) - except urllib.error.HTTPError: - # Log but don't crash; the session may already have moved on. - pass - - def _extract_permission_error(self, event: dict[str, Any]) -> str | None: - """ Build a human-readable permission rejection summary. """ - props = event.get("properties", {}) - tool = props.get("tool", "tool") - return f"tool permission rejected: {tool}" - def _should_sync_session_messages(self, event: dict[str, Any]) -> bool: - """Return True when a session snapshot sync may reveal finalized parts. - - Sync is only triggered in two cases: - 1. After SSE reconnection (recovery sync via _pending_recovery_sync flag) - 2. Explicit idle event - but caller handles idle emission, not us - """ if self._pending_recovery_sync: self._pending_recovery_sync = False return True - event_type = event.get("type", "") if event_type == "session.idle": return True @@ -300,72 +188,6 @@ def _should_sync_session_messages(self, event: dict[str, Any]) -> bool: return True return False - def _sync_session_messages(self) -> list[dict[str, Any]]: - """Fetch current session messages and synthesize finalized compatibility events. - - The HTTP SSE stream may emit `message.part.delta` without corresponding - `message.part.updated` events. The session snapshot API does contain the - completed assistant messages and parts, so we poll it and emit unseen - message/part events in the same ND-JSON-compatible shapes expected by - the existing renderer. - """ - self._last_message_sync_at = time.time() - events: list[dict[str, Any]] = [] - try: - req = urllib.request.Request( - f"{self.base_url}/session/{self.session_id}/message", - headers=self._get_headers(), - method="GET", - ) - with urllib.request.urlopen(req, timeout=10.0) as resp: - messages = json.loads(resp.read().decode("utf-8")) - except Exception: # noqa: BLE001 - return [] - - if not isinstance(messages, list): - return [] - - for item in messages: - if not isinstance(item, dict): - continue - info = item.get("info") - parts = item.get("parts") - if not isinstance(info, dict) or not isinstance(parts, list): - continue - if info.get("role") != "assistant": - continue - if info.get("sessionID") != self.session_id: - continue - - message_id = info.get("id") - if isinstance(message_id, str) and message_id and message_id not in self._seen_message_ids: - events.append({ - "type": "message.updated", - "timestamp": int(time.time() * 1000), - "sessionID": self.session_id, - "info": info, - }) - self._seen_message_ids.add(message_id) - - for part in parts: - if not isinstance(part, dict): - continue - part_id = part.get("id") - if isinstance(part_id, str) and self._tracker.has_seen(part_id): - self._tracker.mark_seen(part_id) - continue - synthesized = { - "type": "message.part.updated", - "timestamp": int(time.time() * 1000), - "properties": { - "sessionID": self.session_id, - "part": part, - }, - } - events.extend(self._tracker.ingest(synthesized)) - - return events - def _update_result( self, event: dict[str, Any], @@ -374,10 +196,6 @@ def _update_result( last_finish_reason: str | None, last_finish_tokens: dict[str, Any], ) -> tuple[bool, int, str | None, dict[str, Any]]: - """ Update mutable result signals based on the mapped event. - - Returns the updated tuple of (any_seen, count, reason, tokens). - """ event_type = event.get("type", "") if event_type == "step_finish": any_step_finish_seen = True @@ -389,5 +207,8 @@ def _update_result( tokens = part.get("tokens") if isinstance(tokens, dict): last_finish_tokens = tokens - return any_step_finish_seen, step_finish_count, last_finish_reason, last_finish_tokens + + +# Backward-compatibility alias. +EventLoop = PhaseEventLoop diff --git a/tools/events/base.py b/tools/events/base.py new file mode 100644 index 0000000..aeeb377 --- /dev/null +++ b/tools/events/base.py @@ -0,0 +1,182 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +BaseEventLoop — shared SSE/session/dedup/permission logic. + +Both PhaseEventLoop and ChatEventLoop inherit from this class. +""" + +from __future__ import annotations + +import json +import time +import urllib.error +import urllib.request +from typing import Any + +from events.sse_client import SseClient +from events.state_tracker import StateTracker + + +class BaseEventLoop: + """Shared mechanics for SSE consumption loops. + + Owns: session filtering, permission auto-reject, session message + sync, idle detection, deduplication, and common HTTP headers. + """ + + def __init__( + self, + base_url: str, + session_id: str, + console: Any, + *, + auth_token: str | None = None, + workspace_dir: str | None = None, + ) -> None: + self.base_url = base_url.rstrip("/") + self.session_id = session_id + self.console = console + self.auth_token = auth_token + self.workspace_dir = workspace_dir + + self._tracker = StateTracker() + self._client: SseClient | None = None + self._stopped = False + self._seen_message_ids: set[str] = set() + self._emitted_signatures: set[tuple[str, str]] = set() + self._last_message_sync_at = 0.0 + + # ------------------------------------------------------------------ + # Session filtering & idle detection + # ------------------------------------------------------------------ + + def _belongs_to_session(self, event: dict[str, Any]) -> bool: + props = event.get("properties", {}) + sid = props.get("sessionID") + if sid and sid != self.session_id: + return False + return True + + @staticmethod + def _is_session_idle(event: dict[str, Any]) -> bool: + event_type = event.get("type", "") + if event_type == "session.idle": + return True + if event_type == "session.status": + status = event.get("properties", {}).get("status", {}) + return status.get("type") == "idle" + return False + + # ------------------------------------------------------------------ + # HTTP headers + # ------------------------------------------------------------------ + + def _get_headers(self) -> dict[str, str]: + headers = {"Content-Type": "application/json"} + if self.auth_token: + import base64 + encoded = base64.b64encode(f"opencode:{self.auth_token}".encode("utf-8")).decode("utf-8") + headers["Authorization"] = f"Basic {encoded}" + if self.workspace_dir: + headers["x-opencode-directory"] = self.workspace_dir + return headers + + # ------------------------------------------------------------------ + # Permission auto-reject + # ------------------------------------------------------------------ + + def _handle_permission(self, event: dict[str, Any]) -> None: + props = event.get("properties", {}) + perm_id = props.get("id") + if not perm_id: + return + url = f"{self.base_url}/permission/{perm_id}/reply" + data = json.dumps({ + "reply": "reject", + "message": "Auto-rejected by CodeCome configuration", + }).encode("utf-8") + req = urllib.request.Request( + url, data=data, headers=self._get_headers(), method="POST", + ) + try: + urllib.request.urlopen(req, timeout=10.0) + except urllib.error.HTTPError: + pass + + def _extract_permission_error(self, event: dict[str, Any]) -> str | None: + props = event.get("properties", {}) + tool = props.get("tool", "tool") + return f"tool permission rejected: {tool}" + + # ------------------------------------------------------------------ + # Session message sync (catch-up after reconnect / before idle) + # ------------------------------------------------------------------ + + def _sync_session_messages(self) -> list[dict[str, Any]]: + self._last_message_sync_at = time.time() + events: list[dict[str, Any]] = [] + try: + req = urllib.request.Request( + f"{self.base_url}/session/{self.session_id}/message", + headers=self._get_headers(), + method="GET", + ) + with urllib.request.urlopen(req, timeout=10.0) as resp: + messages = json.loads(resp.read().decode("utf-8")) + except Exception: # noqa: BLE001 + return [] + + if not isinstance(messages, list): + return [] + + for item in messages: + if not isinstance(item, dict): + continue + info = item.get("info") + parts = item.get("parts") + if not isinstance(info, dict) or not isinstance(parts, list): + continue + if info.get("role") != "assistant": + continue + if info.get("sessionID") != self.session_id: + continue + + message_id = info.get("id") + if isinstance(message_id, str) and message_id and message_id not in self._seen_message_ids: + events.append({ + "type": "message.updated", + "timestamp": int(time.time() * 1000), + "sessionID": self.session_id, + "info": info, + }) + self._seen_message_ids.add(message_id) + + for part in parts: + if not isinstance(part, dict): + continue + part_id = part.get("id") + if isinstance(part_id, str) and self._tracker.has_seen(part_id): + self._tracker.mark_seen(part_id) + continue + synthesized = { + "type": "message.part.updated", + "timestamp": int(time.time() * 1000), + "properties": { + "sessionID": self.session_id, + "part": part, + }, + } + events.extend(self._tracker.ingest(synthesized)) + + return events + + # ------------------------------------------------------------------ + # Stop + # ------------------------------------------------------------------ + + def stop(self) -> None: + self._stopped = True + if self._client is not None: + self._client.stop() diff --git a/tools/events/chat_loop.py b/tools/events/chat_loop.py index 650adc8..c3188ed 100644 --- a/tools/events/chat_loop.py +++ b/tools/events/chat_loop.py @@ -24,6 +24,7 @@ import urllib.request from typing import Any, Callable +from events.base import BaseEventLoop from events.sse_client import SseClient, SseClientError from events.state_tracker import StateTracker from events.emitters import emit_event @@ -38,7 +39,7 @@ class ChatState: STOPPED = "stopped" -class ChatEventLoop: +class ChatEventLoop(BaseEventLoop): """Multi-turn event loop for interactive chat mode. Runs the SSE consumer in a background thread. When the session @@ -56,19 +57,10 @@ def __init__( workspace_dir: str | None = None, debug: Callable[[str], None] | None = None, ) -> None: - self.base_url = base_url.rstrip("/") - self.session_id = session_id - self.console = console - self.auth_token = auth_token - self.workspace_dir = workspace_dir + super().__init__(base_url, session_id, console, + auth_token=auth_token, workspace_dir=workspace_dir) self.debug = debug - self._tracker = StateTracker() - self._client: SseClient | None = None - self._stopped = False - self._seen_message_ids: set[str] = set() - self._emitted_signatures: set[tuple[str, str]] = set() - # Coordination with TUI self._state_queue: queue.Queue[tuple[str, Any | None]] = queue.Queue() self._consumer_thread: threading.Thread | None = None @@ -94,11 +86,7 @@ def send_prompt( model: str | None = None, variant: str | None = None, ) -> None: - """POST a new user prompt to the active session. - - Blocks until the HTTP request completes. The SSE consumer - thread will pick up the response events automatically. - """ + """POST a new user prompt to the active session.""" if self.debug: self.debug(f"send_prompt: posting prompt len={len(text)}") payload: dict[str, Any] = { @@ -115,10 +103,7 @@ def send_prompt( url = f"{self.base_url}/session/{self.session_id}/prompt_async" data = json.dumps(payload).encode("utf-8") req = urllib.request.Request( - url, - data=data, - headers=self._get_headers(), - method="POST", + url, data=data, headers=self._get_headers(), method="POST", ) try: with urllib.request.urlopen(req, timeout=300) as resp: @@ -133,37 +118,20 @@ def send_prompt( self._state_queue.put((ChatState.ERROR, msg)) def get_state(self, timeout: float | None = None) -> tuple[str, Any | None]: - """Block until the consumer signals a state change. - - Returns (state, detail). State is one of ChatState.* - """ + """Block until the consumer signals a state change.""" return self._state_queue.get(timeout=timeout) def stop(self) -> None: """Signal the consumer thread to exit and wait for it.""" - self._stopped = True - if self._client is not None: - self._client.stop() + super().stop() if self._consumer_thread is not None and self._consumer_thread.is_alive(): self._consumer_thread.join(timeout=5.0) - # Signal stopped in case the TUI is waiting self._state_queue.put((ChatState.STOPPED, None)) # ------------------------------------------------------------------ # Internal # ------------------------------------------------------------------ - def _get_headers(self) -> dict[str, str]: - headers = {"Content-Type": "application/json"} - if self.auth_token: - import base64 - - encoded = base64.b64encode(f"opencode:{self.auth_token}".encode("utf-8")).decode("utf-8") - headers["Authorization"] = f"Basic {encoded}" - if self.workspace_dir: - headers["x-opencode-directory"] = self.workspace_dir - return headers - def _consumer_worker(self, render_fn: Callable[[Any, str, str, dict[str, Any]], None]) -> None: """Background thread: consume SSE, render events, signal idle.""" if self.debug: @@ -194,12 +162,7 @@ def _consumer_worker(self, render_fn: Callable[[Any, str, str, dict[str, Any]], if self.debug and (event_count <= 5 or event_count % 20 == 0): self.debug(f"_consumer_worker: event #{event_count} type={event.get('type')}") - # Track message IDs *and* token-state from the SSE - # stream so neither _sync_session_messages nor the - # stream itself emit duplicate message.updated events. - # Composite key = (msg_id, has_input) lets the - # "no-tokens → has-tokens" transition render (e.g. the - # final token-summary line for an assistant turn). + # Track message IDs and token-state from the SSE stream. if event.get("type") == "message.updated": info = event.get("properties", {}).get("info", {}) if isinstance(info, dict): @@ -213,30 +176,22 @@ def _consumer_worker(self, render_fn: Callable[[Any, str, str, dict[str, Any]], self.debug(f"_consumer_worker: suppressing duplicate msg {stream_key}") continue self._seen_message_ids.add(stream_key) - # Also keep the plain message ID so - # _sync_session_messages (which checks the - # plain string) doesn't re-emit on idle. self._seen_message_ids.add(msg_id) - # Handle permissions if event.get("type") == "permission.asked": self._handle_permission(event) continue - # Track state transitions if self._is_session_idle(event): if self.debug: self.debug("_consumer_worker: session idle detected") - # Emit the idle event itself self._emit_event(render_fn, event) - # Signal idle to TUI self._state_queue.put((ChatState.IDLE, None)) continue if self._is_session_busy(event): self._state_queue.put((ChatState.BUSY, None)) - # Track and render finalized_events = self._tracker.ingest(event) for fe in finalized_events: sig = (fe.get("type", ""), fe.get("part", {}).get("id", "")) @@ -265,23 +220,6 @@ def _consumer_worker(self, render_fn: Callable[[Any, str, str, dict[str, Any]], if not self._stopped: self._state_queue.put((ChatState.STOPPED, None)) - def _belongs_to_session(self, event: dict[str, Any]) -> bool: - props = event.get("properties", {}) - sid = props.get("sessionID") - if sid and sid != self.session_id: - return False - return True - - @staticmethod - def _is_session_idle(event: dict[str, Any]) -> bool: - event_type = event.get("type", "") - if event_type == "session.idle": - return True - if event_type == "session.status": - status = event.get("properties", {}).get("status", {}) - return status.get("type") == "idle" - return False - @staticmethod def _is_session_busy(event: dict[str, Any]) -> bool: event_type = event.get("type", "") @@ -290,103 +228,10 @@ def _is_session_busy(event: dict[str, Any]) -> bool: return status.get("type") == "busy" return False - def _handle_permission(self, event: dict[str, Any]) -> None: - props = event.get("properties", {}) - perm_id = props.get("id") - if not perm_id: - return - url = f"{self.base_url}/permission/{perm_id}/reply" - data = json.dumps({ - "reply": "reject", - "message": "Auto-rejected by CodeCome configuration", - }).encode("utf-8") - req = urllib.request.Request( - url, - data=data, - headers=self._get_headers(), - method="POST", - ) - try: - urllib.request.urlopen(req, timeout=10.0) - except urllib.error.HTTPError: - pass - def _emit_event(self, render_fn: Callable[[Any, str, str, dict[str, Any]], None], event: dict[str, Any]) -> None: """Emit a single event through the render pipeline.""" emit_event(render_fn, self.console, "Chat", "Interactive Chat", event) def _trigger_recovery_sync(self) -> None: - """Called by SseClient after reconnection. - - TODO: implement a catch-up sync via _sync_session_messages here. - Currently sync-after-reconnect is a no-op; the SSE-stream-level - dedup (_seen_message_ids composite keys) and the fact that - sync was removed from the normal idle path mean we rely on the - SSE stream itself to deliver all events after reconnect. - """ - pass # sync happens inline in consumer - - def _sync_session_messages(self, render_fn: Callable[[Any, str, str, dict[str, Any]], None]) -> list[dict[str, Any]]: - """Fetch current session messages and emit any missed finalized parts.""" - events: list[dict[str, Any]] = [] - try: - req = urllib.request.Request( - f"{self.base_url}/session/{self.session_id}/message", - headers=self._get_headers(), - method="GET", - ) - with urllib.request.urlopen(req, timeout=10.0) as resp: - messages = json.loads(resp.read().decode("utf-8")) - except Exception: # noqa: BLE001 - return [] - - if not isinstance(messages, list): - return [] - - for item in messages: - if not isinstance(item, dict): - continue - info = item.get("info") - parts = item.get("parts") - if not isinstance(info, dict) or not isinstance(parts, list): - continue - if info.get("role") != "assistant": - continue - if info.get("sessionID") != self.session_id: - continue - - message_id = info.get("id") - if isinstance(message_id, str) and message_id and message_id not in self._seen_message_ids: - events.append({ - "type": "message.updated", - "timestamp": int(time.time() * 1000), - "sessionID": self.session_id, - "info": info, - }) - self._seen_message_ids.add(message_id) - - for part in parts: - if not isinstance(part, dict): - continue - part_id = part.get("id") - if isinstance(part_id, str) and self._tracker.has_seen(part_id): - self._tracker.mark_seen(part_id) - continue - synthesized = { - "type": "message.part.updated", - "timestamp": int(time.time() * 1000), - "properties": { - "sessionID": self.session_id, - "part": part, - }, - } - events.extend(self._tracker.ingest(synthesized)) - - for fe in events: - sig = (fe.get("type", ""), fe.get("part", {}).get("id", "")) - if sig[1] and sig in self._emitted_signatures: - continue - self._emitted_signatures.add(sig) - self._emit_event(render_fn, fe) - - return events + """Called by SseClient after reconnection.""" + pass From d525c85d4ddf9f524b241057aafbfa5b221e72f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 16:38:23 +0200 Subject: [PATCH 32/65] Refactor: complete CLI extraction and migrate interceptor tests - Strip tools/run-agent.py down to a thin wrapper around codecome.cli.main() - Remove legacy and dead code in tests targeting the wrapper - Migrate interceptor tests to tests/test_command_interceptors.py targeting the new interceptor classes - Add new test suite for runner orchestration in tests/test_codecome_runner.py - Add new test suite for chat proxy in tests/test_chat_app.py --- tests/test_chat_app.py | 92 + tests/test_chat_mode.py | 740 ----- tests/test_codecome_runner.py | 126 + tests/test_command_interceptors.py | 368 +++ tests/test_run_agent.py | 1958 ------------- tools/chat/app.py | 2 +- tools/run-agent.py | 4248 +--------------------------- 7 files changed, 589 insertions(+), 6945 deletions(-) create mode 100644 tests/test_chat_app.py delete mode 100644 tests/test_chat_mode.py create mode 100644 tests/test_codecome_runner.py create mode 100644 tests/test_command_interceptors.py delete mode 100644 tests/test_run_agent.py diff --git a/tests/test_chat_app.py b/tests/test_chat_app.py new file mode 100644 index 0000000..09085d6 --- /dev/null +++ b/tests/test_chat_app.py @@ -0,0 +1,92 @@ +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "tools")) + +import pytest +import json +from unittest.mock import MagicMock +import threading + +import chat.app as app + +def test_textual_console_proxy_write_main_thread(monkeypatch): + monkeypatch.setattr(threading, "current_thread", threading.main_thread) + + mock_log = MagicMock() + mock_app = MagicMock() + + proxy = app.TextualConsoleProxy(mock_log, mock_app) + proxy.print("hello") + + mock_log.write.assert_called_once() + mock_app.post_message.assert_not_called() + +def test_textual_console_proxy_write_bg_thread(monkeypatch): + class DummyThread(threading.Thread): + pass + dummy = DummyThread() + monkeypatch.setattr(threading, "current_thread", lambda: dummy) + + mock_log = MagicMock() + mock_app = MagicMock() + + proxy = app.TextualConsoleProxy(mock_log, mock_app) + proxy.print("hello", "world") + + mock_log.write.assert_not_called() + mock_app.post_message.assert_called_once() + +def test_chat_render_and_log(monkeypatch): + mock_transcript = MagicMock() + mock_args = MagicMock() + mock_args.debug = True + + class FakeSelf: + transcript_fp = mock_transcript + args = mock_args + thinking_on = True + _modeline_info = "" + + fake_self = FakeSelf() + + rendered = [] + def fake_render(console, phase, label, event): + rendered.append(event) + + monkeypatch.setattr(app, "render_event", fake_render) + + event = {"type": "message.updated", "info": {"role": "assistant", "modelID": "gpt-5"}} + + app._chat_render_and_log(fake_self, None, "1", "label", event) + + assert len(rendered) == 1 + assert "gpt-5" in fake_self._modeline_info + mock_transcript.write.assert_called() + +def test_chat_update_modeline_info(): + class FakeSelf: + _modeline_info = "" + + fake_self = FakeSelf() + + # Missing info + app._chat_update_modeline_info(fake_self, {}) + assert fake_self._modeline_info == "" + + # With role assistant and model + event = { + "type": "message.updated", + "info": { + "role": "assistant", + "modelID": "claude", + "providerID": "anthropic", + "tokens": {"input": 10, "output": 20}, + "cost": 0.05 + } + } + app._chat_update_modeline_info(fake_self, event) + + assert "anthropic/claude" in fake_self._modeline_info + assert "↑10" in fake_self._modeline_info + assert "↓20" in fake_self._modeline_info + assert "$0.05" in fake_self._modeline_info diff --git a/tests/test_chat_mode.py b/tests/test_chat_mode.py deleted file mode 100644 index af5f2f3..0000000 --- a/tests/test_chat_mode.py +++ /dev/null @@ -1,740 +0,0 @@ -from __future__ import annotations - -import json -import queue -import sys -import threading -import time -import urllib.error -import urllib.request -from pathlib import Path -from typing import Any -from unittest.mock import MagicMock, patch - -import pytest - -from conftest import ROOT - - -def load_chat_loop(): - sys_path = str(ROOT / "tools") - if sys_path not in sys.path: - sys.path.insert(0, sys_path) - from events.chat_loop import ChatEventLoop, ChatState - return ChatEventLoop, ChatState - - -def load_events(): - sys_path = str(ROOT / "tools") - if sys_path not in sys.path: - sys.path.insert(0, sys_path) - from events.sse_client import SseClient, SseClientError - return SseClient, SseClientError - - -# --------------------------------------------------------------------------- -# ChatState constants -# --------------------------------------------------------------------------- - -class TestChatState: - def test_state_values(self): - _, ChatState = load_chat_loop() - assert ChatState.IDLE == "idle" - assert ChatState.BUSY == "busy" - assert ChatState.ERROR == "error" - assert ChatState.STOPPED == "stopped" - - -# --------------------------------------------------------------------------- -# ChatEventLoop unit tests -# --------------------------------------------------------------------------- - -class TestChatEventLoop: - """Unit tests for events.chat_loop.ChatEventLoop.""" - - @pytest.fixture - def chat_loop(self): - ChatEventLoop, _ = load_chat_loop() - return ChatEventLoop( - base_url="http://localhost:8080", - session_id="sess-1", - console=None, - auth_token="test-token", - workspace_dir="/workspace", - ) - - def test_init_stores_fields(self, chat_loop): - assert chat_loop.base_url == "http://localhost:8080" - assert chat_loop.session_id == "sess-1" - assert chat_loop.auth_token == "test-token" - assert chat_loop.workspace_dir == "/workspace" - - def test_get_headers_with_auth(self, chat_loop): - headers = chat_loop._get_headers() - assert headers["Content-Type"] == "application/json" - assert "Authorization" in headers - assert "x-opencode-directory" in headers - - def test_get_headers_without_auth(self): - ChatEventLoop, _ = load_chat_loop() - loop = ChatEventLoop( - base_url="http://localhost:8080", - session_id="sess-1", - console=None, - ) - headers = loop._get_headers() - assert "Authorization" not in headers - - def test_belongs_to_session_matching(self, chat_loop): - assert chat_loop._belongs_to_session({"properties": {"sessionID": "sess-1"}}) - assert not chat_loop._belongs_to_session({"properties": {"sessionID": "other"}}) - assert chat_loop._belongs_to_session({"type": "server.heartbeat"}) - - def test_is_session_idle_deprecated(self, chat_loop): - assert chat_loop._is_session_idle({"type": "session.idle", "properties": {"sessionID": "sess-1"}}) - assert not chat_loop._is_session_idle({"type": "server.heartbeat"}) - - def test_is_session_idle_canonical(self, chat_loop): - assert chat_loop._is_session_idle({ - "type": "session.status", - "properties": {"sessionID": "sess-1", "status": {"type": "idle"}}, - }) - assert not chat_loop._is_session_idle({ - "type": "session.status", - "properties": {"sessionID": "sess-1", "status": {"type": "busy"}}, - }) - - def test_is_session_busy(self, chat_loop): - assert chat_loop._is_session_busy({ - "type": "session.status", - "properties": {"sessionID": "sess-1", "status": {"type": "busy"}}, - }) - assert not chat_loop._is_session_busy({ - "type": "session.status", - "properties": {"sessionID": "sess-1", "status": {"type": "idle"}}, - }) - - def test_stop_signals_stopped(self, chat_loop): - """stop() should put a STOPPED signal in the queue.""" - chat_loop.stop() - state, detail = chat_loop.get_state(timeout=2.0) - _, ChatState = load_chat_loop() - assert state == ChatState.STOPPED - - -class TestChatEventLoopWithFakeSse: - """ChatEventLoop tests with a fake SSE client.""" - - @pytest.fixture - def chat_loop_objects(self): - ChatEventLoop, ChatState = load_chat_loop() - SseClient, SseClientError = load_events() - return ChatEventLoop, ChatState, SseClient - - def test_single_turn_idle_signal(self, chat_loop_objects, monkeypatch): - """One prompt → SSE events → idle → TUI receives IDLE signal.""" - ChatEventLoop, ChatState, SseClient = chat_loop_objects - - emitted: list[dict] = [] - - def fake_render(console, phase, label, event): - emitted.append(event) - - class FakeSseClient: - def __init__(self, *a, **kw): - pass - def events(self): - return iter([ - {"type": "server.connected"}, - {"type": "message.part.updated", "properties": {"sessionID": "sess-1", "part": {"id": "p1", "type": "step-start"}}}, - {"type": "message.part.delta", "properties": {"sessionID": "sess-1", "partID": "p2", "field": "text", "delta": "Hello"}}, - {"type": "message.part.updated", "properties": {"sessionID": "sess-1", "part": {"id": "p2", "type": "text", "time": {"start": 0, "end": 1}}}}, - {"type": "session.idle", "properties": {"sessionID": "sess-1"}}, - ]) - def stop(self): - pass - - import events.chat_loop as _chat_mod - orig = _chat_mod.SseClient - _chat_mod.SseClient = FakeSseClient # type: ignore[misc] - try: - loop = ChatEventLoop("http://localhost:8080", "sess-1", None) - loop.start_consumer(fake_render) - state, detail = loop.get_state(timeout=5.0) - finally: - _chat_mod.SseClient = orig - - assert state == ChatState.IDLE - types = [e["type"] for e in emitted] - assert "server.connected" in types - assert "step_start" in types - assert "text" in types - - def test_multi_turn_cycle(self, chat_loop_objects, monkeypatch): - """Prompt → idle → prompt → idle → stop.""" - ChatEventLoop, ChatState, SseClient = chat_loop_objects - - emitted: list[dict] = [] - turn_count = [0] - idle_count = [0] - - def fake_render(console, phase, label, event): - emitted.append(event) - - class FakeSseClient: - def __init__(self, *a, **kw): - pass - def events(self): - # Yield events for two turns, then block - turn_count[0] += 1 - yield {"type": "message.part.updated", "properties": {"sessionID": "sess-1", "part": {"id": f"p{turn_count[0]}", "type": "text", "time": {"start": 0, "end": 1}}}} - idle_count[0] += 1 - yield {"type": "session.idle", "properties": {"sessionID": "sess-1"}} - # Yield second turn - turn_count[0] += 1 - yield {"type": "message.part.updated", "properties": {"sessionID": "sess-1", "part": {"id": f"p{turn_count[0]}", "type": "text", "time": {"start": 0, "end": 1}}}} - idle_count[0] += 1 - yield {"type": "session.idle", "properties": {"sessionID": "sess-1"}} - # After two idles, block until stop - import time - while True: - time.sleep(0.1) - def stop(self): - pass - - import events.chat_loop as _chat_mod - orig = _chat_mod.SseClient - _chat_mod.SseClient = FakeSseClient # type: ignore[misc] - try: - loop = ChatEventLoop("http://localhost:8080", "sess-1", None) - loop.start_consumer(fake_render) - - # First idle - state1, _ = loop.get_state(timeout=5.0) - assert state1 == ChatState.IDLE - - # Second idle - state2, _ = loop.get_state(timeout=5.0) - assert state2 == ChatState.IDLE - - loop.stop() - finally: - _chat_mod.SseClient = orig - - def test_permission_auto_rejected(self, chat_loop_objects, monkeypatch): - """Permission asked → auto-rejected → idle.""" - ChatEventLoop, ChatState, SseClient = chat_loop_objects - - captured_perms: list[tuple] = [] - - def fake_render(console, phase, label, event): - pass - - class FakeSseClient: - def __init__(self, *a, **kw): - pass - def events(self): - return iter([ - {"type": "permission.asked", "properties": {"sessionID": "sess-1", "id": "perm-1", "tool": "bash"}}, - {"type": "session.idle", "properties": {"sessionID": "sess-1"}}, - ]) - def stop(self): - pass - - def fake_urlopen(req, **kw): - if req.full_url.endswith("/permission/perm-1/reply"): - captured_perms.append((req.full_url, req.data)) - return type("R", (), {"read": lambda: b"{}", "__enter__": lambda s: s, "__exit__": lambda *a: None})() - - import events.chat_loop as _chat_mod - orig_sse = _chat_mod.SseClient - _chat_mod.SseClient = FakeSseClient # type: ignore[misc] - monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen) - try: - loop = ChatEventLoop("http://localhost:8080", "sess-1", None) - loop.start_consumer(fake_render) - state, _ = loop.get_state(timeout=5.0) - finally: - _chat_mod.SseClient = orig_sse - - assert state == ChatState.IDLE - assert len(captured_perms) == 1 - assert "permission/perm-1/reply" in captured_perms[0][0] - assert json.loads(captured_perms[0][1]) == {"reply": "reject", "message": "Auto-rejected by CodeCome configuration"} - - def test_stop_during_busy(self, chat_loop_objects): - """Stop signal while consumer is running.""" - ChatEventLoop, ChatState, SseClient = chat_loop_objects - - stop_event = threading.Event() - - class FakeSseClient: - def __init__(self, *a, **kw): - pass - def events(self): - # Block until stop is called - stop_event.wait(timeout=10.0) - return iter([]) - def stop(self): - stop_event.set() - - import events.chat_loop as _chat_mod - orig = _chat_mod.SseClient - _chat_mod.SseClient = FakeSseClient # type: ignore[misc] - try: - loop = ChatEventLoop("http://localhost:8080", "sess-1", None) - loop.start_consumer(lambda c, p, l, e: None) - - # Give consumer time to start - time.sleep(0.1) - loop.stop() - - # Should get STOPPED signal - state, _ = loop.get_state(timeout=2.0) - assert state == ChatState.STOPPED - finally: - _chat_mod.SseClient = orig - - -# --------------------------------------------------------------------------- -# TextualConsoleProxy tests -# --------------------------------------------------------------------------- - -class TestTextualConsoleProxy: - """Unit tests for the TextualConsoleProxy class in run-agent.py.""" - - @pytest.fixture - def proxy_and_log(self): - module = _load_run_agent_module() - fake_log = MagicMock() - fake_app = MagicMock() - proxy = module.TextualConsoleProxy(fake_log, fake_app) - return proxy, fake_log, fake_app - - def test_single_arg_writes_directly_on_main_thread(self, proxy_and_log): - proxy, fake_log, fake_app = proxy_and_log - from rich.text import Text - proxy.print(Text("hello")) - fake_log.write.assert_called_once() - assert fake_log.write.call_args[0][0].plain == "hello" - - def test_no_args_writes_empty_line_on_main_thread(self, proxy_and_log): - proxy, fake_log, fake_app = proxy_and_log - proxy.print() - fake_log.write.assert_called_once() - - def test_multi_args_wraps_in_group_on_main_thread(self, proxy_and_log): - proxy, fake_log, fake_app = proxy_and_log - from rich.text import Text - proxy.print(Text("a"), Text("b")) - fake_log.write.assert_called_once() - from rich.console import Group - assert isinstance(fake_log.write.call_args[0][0], Group) - - def test_bg_thread_posts_render_message(self): - """Background thread calls must post a RenderMessage(renderable) - via post_message, not write to rich_log directly (per Textual docs: - post_message is thread-safe).""" - module = _load_run_agent_module() - fake_log = MagicMock() - fake_render_msg_cls = MagicMock() - fake_app = MagicMock() - fake_app.RenderMessage = fake_render_msg_cls - proxy = module.TextualConsoleProxy(fake_log, fake_app) - - from rich.text import Text - error_holder = [None] - - def bg_call(): - try: - proxy._write(Text("from_bg")) - except Exception as e: - error_holder[0] = e - - import threading - t = threading.Thread(target=bg_call, daemon=True) - t.start() - t.join(timeout=5) - - if error_holder[0]: - raise error_holder[0] - - # On bg thread, RenderMessage(renderable) is constructed and - # post_message is called. - fake_render_msg_cls.assert_called_once() - fake_app.post_message.assert_called_once() - # rich_log.write must NOT be called from a bg thread. - fake_log.write.assert_not_called() - - -# --------------------------------------------------------------------------- -# Chat argparse tests -# --------------------------------------------------------------------------- - -class TestChatArgparse: - """Tests for --chat flag parsing and validation.""" - - @pytest.fixture - def parser(self): - module = _load_run_agent_module() - return module.build_parser() - - def test_chat_flag_parsed(self, parser): - args = parser.parse_args(["--chat", "--label", "test", "--agent", "auditor"]) - assert args.chat is True - assert args.label == "test" - assert args.agent == "auditor" - - def test_chat_with_prompt(self, parser): - args = parser.parse_args(["--chat", "--label", "test", "--agent", "auditor", "--prompt", "Hello"]) - assert args.chat is True - assert args.prompt == "Hello" - - def test_chat_without_phase(self, parser): - """--chat should not require --phase.""" - args = parser.parse_args(["--chat", "--label", "test", "--agent", "auditor"]) - assert args.phase is None - - def test_chat_requires_label(self, parser): - """--chat still requires --label.""" - args = parser.parse_args(["--chat", "--agent", "auditor"]) - assert args.label is None - - def test_chat_requires_agent(self, parser): - """--chat still requires --agent.""" - args = parser.parse_args(["--chat", "--label", "test"]) - assert args.agent is None - - def test_normal_mode_requires_phase(self, parser): - """Without --chat, --phase is still required.""" - args = parser.parse_args(["--label", "test", "--agent", "auditor", "--prompt-file", "phase.md"]) - assert args.chat is False - assert args.phase is None - - -# --------------------------------------------------------------------------- -# _ChatApp._render_and_log parity tests -# -# Phase-mode's _render_and_log: -# 1. writes raw event JSON to transcript_fp -# 2. (if --debug) mirrors raw event JSON to stderr -# 3. suppresses 'reasoning' events when thinking is off -# 4. calls render_event(...) -# -# Chat-mode's _render_and_log should match (1), (3), (4) and route the -# raw-JSON mirror to the chat-debug log file instead of stderr (because -# Textual owns the TTY in chat mode). It must NOT emit chat-specific -# state markers ('[idle]' / '[busy]') any more — non-chat doesn't. -# --------------------------------------------------------------------------- - -class TestChatRenderAndLogParity: - """Tests for _ChatApp._render_and_log parity with phase mode.""" - - @pytest.fixture - def app_under_test(self): - """Construct a _ChatApp instance without running Textual. - - We only populate the fields _render_and_log actually reads - (transcript_fp, args, thinking_on) and stub render_event so - we can capture dispatcher calls. - """ - module = _load_run_agent_module() - if module.ChatApp is not None: - app = module.ChatApp() - else: - # Textual not installed — use standalone functions on a - # plain object (parity guaranteed by delegation in _ChatApp). - app = type("FakeChatApp", (), {})() - app._render_and_log = module._chat_render_and_log.__get__(app, type(app)) - app._update_modeline_info = module._chat_update_modeline_info.__get__(app, type(app)) - app.post_message = MagicMock() - return module, app - - def _make_args(self, debug=False): - ns = MagicMock() - ns.debug = debug - return ns - - def test_writes_event_to_transcript(self, app_under_test): - """_render_and_log appends json.dumps(event) + '\\n' to transcript_fp.""" - module, app = app_under_test - from io import StringIO - sink = StringIO() - app.transcript_fp = sink - app.args = self._make_args(debug=False) - app.thinking_on = True - - with patch.object(module, "render_event", lambda *a, **kw: None): - app._render_and_log(MagicMock(), "Chat", "Test", {"type": "text", "x": 1}) - app._render_and_log(MagicMock(), "Chat", "Test", {"type": "session.status", "y": 2}) - - lines = [json.loads(line) for line in sink.getvalue().splitlines()] - assert lines == [ - {"type": "text", "x": 1}, - {"type": "session.status", "y": 2}, - ] - - def test_transcript_write_failure_is_swallowed(self, app_under_test): - """If transcript writes raise OSError, _render_and_log still - proceeds to render_event without re-raising.""" - module, app = app_under_test - bad_fp = MagicMock() - bad_fp.write.side_effect = OSError("disk full") - app.transcript_fp = bad_fp - app.args = self._make_args(debug=False) - app.thinking_on = True - - render_calls = [] - with patch.object(module, "render_event", lambda *a, **kw: render_calls.append(a)): - # Must not raise. - app._render_and_log(MagicMock(), "Chat", "Test", {"type": "text"}) - - assert len(render_calls) == 1 - - def test_no_transcript_fp_is_ok(self, app_under_test): - """When transcript_fp is None, _render_and_log skips persistence - but still renders.""" - module, app = app_under_test - app.transcript_fp = None - app.args = self._make_args(debug=False) - app.thinking_on = True - - render_calls = [] - with patch.object(module, "render_event", lambda *a, **kw: render_calls.append(a)): - app._render_and_log(MagicMock(), "Chat", "Test", {"type": "text"}) - - assert len(render_calls) == 1 - - def test_suppresses_reasoning_when_thinking_off(self, app_under_test): - """When thinking_on is False, 'reasoning' events bypass render_event - (parity with phase mode).""" - module, app = app_under_test - from io import StringIO - sink = StringIO() - app.transcript_fp = sink - app.args = self._make_args(debug=False) - app.thinking_on = False - - render_calls = [] - with patch.object(module, "render_event", lambda *a, **kw: render_calls.append(a[3].get("type"))): - app._render_and_log(MagicMock(), "Chat", "Test", {"type": "reasoning", "text": "..."}) - app._render_and_log(MagicMock(), "Chat", "Test", {"type": "text", "text": "ok"}) - - # reasoning event is NOT rendered, text event IS. - assert render_calls == ["text"] - # But BOTH events still hit the transcript. - lines = [json.loads(line) for line in sink.getvalue().splitlines()] - assert [ev["type"] for ev in lines] == ["reasoning", "text"] - - def test_renders_reasoning_when_thinking_on(self, app_under_test): - """When thinking_on is True, reasoning events ARE dispatched.""" - module, app = app_under_test - app.transcript_fp = None - app.args = self._make_args(debug=False) - app.thinking_on = True - - render_calls = [] - with patch.object(module, "render_event", lambda *a, **kw: render_calls.append(a[3].get("type"))): - app._render_and_log(MagicMock(), "Chat", "Test", {"type": "reasoning", "text": "..."}) - - assert render_calls == ["reasoning"] - - def test_does_not_post_chat_only_state_markers(self, app_under_test): - """_render_and_log must NOT post '[idle]'/'[busy]' RenderMessage - markers for session.status / session.idle events. Those were - chat-specific scar tissue; non-chat mode never emitted them. - State cues are produced by render_event -> render_session_status - which prints 'session status: busy/idle'.""" - module, app = app_under_test - app.transcript_fp = None - app.args = self._make_args(debug=False) - app.thinking_on = True - - # Spy on post_message — _render_and_log itself must NOT call it - # (only the proxy / render_event should). - post_calls = [] - with patch.object(app, "post_message", side_effect=lambda m: post_calls.append(m)): - with patch.object(module, "render_event", lambda *a, **kw: None): - app._render_and_log( - MagicMock(), - "Chat", - "Test", - {"type": "session.status", - "properties": {"status": {"type": "busy"}}}, - ) - app._render_and_log( - MagicMock(), - "Chat", - "Test", - {"type": "session.status", - "properties": {"status": {"type": "idle"}}}, - ) - app._render_and_log( - MagicMock(), - "Chat", - "Test", - {"type": "session.idle"}, - ) - - # No direct post_message calls from _render_and_log itself. - assert post_calls == [] - - def test_debug_mode_mirrors_raw_event_to_chat_debug(self, app_under_test): - """When --debug is set, the raw event JSON is mirrored to the - chat-debug log file via _chat_debug. In phase mode this goes to - stderr; chat mode routes to the chat-debug file because Textual - owns the TTY.""" - module, app = app_under_test - app.transcript_fp = None - app.args = self._make_args(debug=True) - app.thinking_on = True - - chat_debug_calls = [] - with patch.object(module, "_chat_debug", - side_effect=lambda msg: chat_debug_calls.append(msg)): - with patch.object(module, "render_event", lambda *a, **kw: None): - app._render_and_log( - MagicMock(), - "Chat", - "Test", - {"type": "text", "x": 42}, - ) - - # The raw-event mirror message should include the JSON payload. - assert any('"x": 42' in m for m in chat_debug_calls), chat_debug_calls - - -# --------------------------------------------------------------------------- -# _run_chat_mode transcript path tests -# --------------------------------------------------------------------------- - -class TestChatTranscriptPath: - """Tests for the transcript-file path naming used by chat mode.""" - - def test_transcript_path_pattern(self, tmp_path, monkeypatch): - """_run_chat_mode opens a transcript file under tmp/ with the - pattern last-chat--pid.jsonl.""" - tools_path = str(ROOT / "tools") - if tools_path not in sys.path: - sys.path.insert(0, tools_path) - import chat.harness as _harness_mod - - class FakePath: - def __init__(self, delegate): - self._delegate = delegate - def __truediv__(self, key): - return self._delegate / key - def resolve(self): - return self._delegate.resolve() - @property - def parents(self): - class FakeParents: - def __getitem__(self, idx): - return self._delegate.parents[idx] - p = FakeParents() - p._delegate = self._delegate - return p - def relative_to(self, other): - return self._delegate.relative_to(other) - - monkeypatch.setattr(_harness_mod, "Path", lambda *a, **kw: FakePath(tmp_path)) - - import codecome.transcript as _transcript_mod - monkeypatch.setattr(_transcript_mod, "ROOT", tmp_path) - - monkeypatch.setattr(_harness_mod, "_chat_debug", lambda *a, **kw: None) - monkeypatch.setattr(_harness_mod, "check_opencode_version", lambda: None) - monkeypatch.setattr(_harness_mod, "resolve_color_mode", lambda v: "auto") - monkeypatch.setattr(_harness_mod, "build_console", lambda v: MagicMock()) - monkeypatch.setattr(_harness_mod, "_emit_fatal_error", - lambda *a, **kw: None) - monkeypatch.setattr( - _harness_mod, - "resolve_model_and_variant", - lambda agent, extra: ("opencode/test", None, "stub", "stub"), - ) - monkeypatch.setattr( - _harness_mod, "resolve_thinking_decision", lambda m, e: (False, "stub") - ) - - fake_server = MagicMock() - fake_server.base_url = "http://127.0.0.1:1" - fake_server.password = "tok" - fake_runner = MagicMock() - fake_runner.start.return_value = fake_server - monkeypatch.setattr(_harness_mod, "ServerRunner", lambda: fake_runner) - monkeypatch.setattr(_harness_mod, "create_chat_session", - lambda *a, **kw: "ses_abc") - - fake_fp = MagicMock() - fake_fp.closed = True - fake_app = MagicMock() - fake_app.chat_loop = None - fake_app_cls = MagicMock(return_value=fake_app) - monkeypatch.setattr(_harness_mod, "ChatApp", fake_app_cls) - - import os - from datetime import datetime - timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") - pid = os.getpid() - transcript_name = f"last-chat-{timestamp}-pid{pid}.jsonl" - transcript_path = tmp_path / "tmp" / transcript_name - transcript_path.parent.mkdir(exist_ok=True) - transcript_path.touch() - - monkeypatch.setattr( - _harness_mod, "open_chat_transcript", - lambda: (transcript_path, fake_fp) - ) - monkeypatch.setattr(_harness_mod, "close_transcript", lambda fp: None) - - ns = MagicMock() - ns.label = "Test" - ns.agent = "auditor" - ns.prompt_file = None - ns.prompt = "hi" - ns.finding = None - ns.phase = None - ns.color = "auto" - ns.debug = False - - parser = MagicMock() - - rc = _harness_mod._run_chat_mode(parser, ns) - assert rc == 0 - - # Exactly one transcript jsonl was created under tmp/. - transcripts = sorted((tmp_path / "tmp").glob("last-chat-*.jsonl")) - assert len(transcripts) == 1, transcripts - name = transcripts[0].name - # Name pattern: last-chat-YYYYMMDD-HHMMSS-pid.jsonl - import re - assert re.match( - r"^last-chat-\d{8}-\d{6}-pid\d+\.jsonl$", name - ), f"unexpected transcript filename: {name}" - - # transcript_fp was passed into ChatApp(...) - kwargs = fake_app_cls.call_args.kwargs - assert "transcript_fp" in kwargs - assert kwargs["transcript_fp"] is not None - # And it's now closed (closed by _run_chat_mode's finally). - assert kwargs["transcript_fp"].closed is True - - -# --------------------------------------------------------------------------- -# Helper -# --------------------------------------------------------------------------- - -def _load_run_agent_module(): - module_name = "run_agent_chat_tests" - module_path = ROOT / "tools" / "run-agent.py" - import importlib.util - spec = importlib.util.spec_from_file_location(module_name, module_path) - if spec is None or spec.loader is None: - raise RuntimeError(f"Cannot load module from {module_path}") - module = importlib.util.module_from_spec(spec) - # Only load if not already loaded - if module_name not in sys.modules: - sys.modules[module_name] = module - spec.loader.exec_module(module) - return sys.modules[module_name] diff --git a/tests/test_codecome_runner.py b/tests/test_codecome_runner.py new file mode 100644 index 0000000..7be895b --- /dev/null +++ b/tests/test_codecome_runner.py @@ -0,0 +1,126 @@ +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "tools")) + +import argparse +import pytest +import threading +import json +from unittest.mock import MagicMock + +from codecome import runner +from events import RunResult + +@pytest.fixture +def mock_args(): + args = argparse.Namespace() + args.phase = "1" + args.label = "Recon" + args.agent = "recon" + args.finding = None + args.debug = False + return args + +@pytest.fixture +def mock_console(): + return MagicMock() + +def test_consume_events_renders_and_logs(mock_args, mock_console, monkeypatch): + class FakeEventLoop: + def __init__(self, **kwargs): + pass + def run(self, render_and_log_fn): + event = {"type": "text", "content": "hello"} + render_and_log_fn(mock_console, "1", "Recon", event) + return RunResult() + + monkeypatch.setattr(runner, "EventLoop", FakeEventLoop) + + rendered_events = [] + def fake_render(console, phase, label, event): + rendered_events.append(event) + + fake_transcript = MagicMock() + + res = runner._consume_events( + "http://base", "session_123", mock_console, "1", "Recon", mock_args, + fake_transcript, True, "token", "dir", fake_render + ) + + assert isinstance(res, RunResult) + assert len(rendered_events) == 1 + assert rendered_events[0]["content"] == "hello" + fake_transcript.write.assert_called_once() + import json + written_data = json.loads(fake_transcript.write.call_args[0][0]) + assert written_data["content"] == "hello" + +def test_run_single_attempt_success(mock_args, mock_console, monkeypatch): + monkeypatch.setattr(runner, "create_session", lambda *a, **kw: "new_session") + + sent_prompts = [] + def fake_send(*a, **kw): + sent_prompts.append(a[2]) # prompt is 3rd arg + monkeypatch.setattr(runner, "send_prompt_to_session", fake_send) + + def fake_consume(*a, **kw): + return RunResult() + monkeypatch.setattr(runner, "_consume_events", fake_consume) + + monkeypatch.setattr(runner, "open_phase_transcript", lambda p, f: (Path("fake.jsonl"), MagicMock())) + monkeypatch.setattr(runner, "close_transcript", lambda f: None) + + code, session_id, res, path = runner._run_single_attempt( + mock_args, mock_console, "do work", "model", "var", True, + "http://base", "token", "dir", lambda *a: None + ) + + assert code == 0 + assert session_id == "new_session" + assert isinstance(res, RunResult) + assert len(sent_prompts) == 1 + assert sent_prompts[0] == "do work" + +def test_run_single_attempt_consumer_exception(mock_args, mock_console, monkeypatch): + monkeypatch.setattr(runner, "create_session", lambda *a, **kw: "new_session") + monkeypatch.setattr(runner, "send_prompt_to_session", lambda *a, **kw: None) + + def fake_consume(*a, **kw): + raise ValueError("consumer failed") + monkeypatch.setattr(runner, "_consume_events", fake_consume) + + monkeypatch.setattr(runner, "open_phase_transcript", lambda p, f: (Path("fake.jsonl"), MagicMock())) + monkeypatch.setattr(runner, "close_transcript", lambda f: None) + + fatal_errors = [] + def fake_fatal(console, title, msg): + fatal_errors.append(msg) + + code, session_id, res, path = runner._run_single_attempt( + mock_args, mock_console, "do work", "model", "var", True, + "http://base", "token", "dir", lambda *a: None, + emit_fatal_error_fn=fake_fatal + ) + + assert code == 1 + assert len(fatal_errors) == 1 + assert "consumer failed" in fatal_errors[0] + +def test_run_single_attempt_existing_session(mock_args, mock_console, monkeypatch): + # Should not call create_session + created = [] + monkeypatch.setattr(runner, "create_session", lambda *a, **kw: created.append(True)) + monkeypatch.setattr(runner, "send_prompt_to_session", lambda *a, **kw: None) + monkeypatch.setattr(runner, "_consume_events", lambda *a, **kw: RunResult()) + monkeypatch.setattr(runner, "open_phase_transcript", lambda p, f: (Path("fake.jsonl"), MagicMock())) + monkeypatch.setattr(runner, "close_transcript", lambda f: None) + + code, session_id, res, path = runner._run_single_attempt( + mock_args, mock_console, "do work", "model", "var", True, + "http://base", "token", "dir", lambda *a: None, + existing_session_id="existing_123" + ) + + assert code == 0 + assert session_id == "existing_123" + assert len(created) == 0 diff --git a/tests/test_command_interceptors.py b/tests/test_command_interceptors.py new file mode 100644 index 0000000..3fd70f0 --- /dev/null +++ b/tests/test_command_interceptors.py @@ -0,0 +1,368 @@ +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "tools")) + +import pytest +import os +import json +from pathlib import Path +from typing import Any + +from rendering.tools.interceptors.sandbox_bootstrap import SandboxBootstrapInterceptor, _is_sandbox_bootstrap_json_call, _sandbox_payload_matches, _sandbox_glyphs +from rendering.tools.interceptors.rtk_read import _is_bash_shim_call, RtkReadInterceptor, _BashShim +from rendering.tools.interceptors.rtk_grep import _normalize_rtk_grep_output, RtkGrepInterceptor +from rendering.tools.interceptors.shell_listing import _strip_ls_long_format_to_filenames, _parse_find_tree, ShellListingInterceptor + +# We must map "module.X" to actual functions or classes +def dict_to_shim(d): + return _BashShim(**d) if isinstance(d, dict) else d + +class MockSettings: + sandbox_render = True + bash_shim_render = True + sandbox_files_cap = 50 + sandbox_validate_stderr_lines = 10 + bash_shim_ls_strip_long_format = True + +class MockCache: + def reread(self, path): + pass + +class MockContext: + def __init__(self): + self.settings = MockSettings() + self.root = Path(".") + self.sink = MockSink() + self.cache = MockCache() + +class MockSink: + def __init__(self): + self.items = [] + self.mode = "plain" + def write(self, renderable, *, expand=True): + self.items.append((renderable, expand)) + def write_text(self, text): + self.items.append(text) + +class MockRenderer: + def __init__(self): + self.context = MockContext() + self.rich = False + self.plain = True + +class DummyModule: + pass +module = DummyModule() +module._is_sandbox_bootstrap_json_call = _is_sandbox_bootstrap_json_call +module._sandbox_payload_matches = _sandbox_payload_matches +module._sandbox_glyphs = _sandbox_glyphs +module._is_bash_shim_call = lambda x: _is_bash_shim_call(x) +module._normalize_rtk_grep_output = _normalize_rtk_grep_output +module._strip_ls_long_format_to_filenames = _strip_ls_long_format_to_filenames +module._parse_find_tree = _parse_find_tree + +def load_tool_module(name, path): + return module + + +@pytest.mark.unit + +@pytest.mark.parametrize( + ("command", "expected"), + [ + # Direct script invocations. + (".venv/bin/python3 tools/sandbox-bootstrap.py --format json status", "status"), + ("python3 tools/sandbox-bootstrap.py status --format=json", "status"), + ("python tools/sandbox-bootstrap.py --format json validate --keep-going", "validate"), + ("python tools/sandbox-bootstrap.py --format=json detect", "detect"), + ("./tools/sandbox-bootstrap.py --format json list", "list"), + # make-target wrappers with json forced via BOOTSTRAP_ARGS. + ("make sandbox-status BOOTSTRAP_ARGS='--format json'", "status"), + ("make sandbox-validate BOOTSTRAP_ARGS=--format=json", "validate"), + ("make sandbox-bootstrap ID=python BOOTSTRAP_ARGS='--format json'", "apply"), + ("BOOTSTRAP_ARGS='--format json --keep-going' make sandbox-validate", "validate"), + ("BOOTSTRAP_ARGS=--format=json make sandbox-status", "status"), + # Negatives. + ("python tools/sandbox-bootstrap.py status", None), # no --format json + ("make sandbox-status", None), # text mode + ("python tools/list-findings.py --format json", None), # different script + ("", None), + ("ls -la", None), + ], +) + +def test_is_sandbox_bootstrap_json_call(command, expected): + module = load_tool_module("run_agent_sandbox_detect", "tools/run-agent.py") + assert module._is_sandbox_bootstrap_json_call(command) == expected + + + +@pytest.mark.unit + +def test_sandbox_payload_matches_filters_unrelated_json(): + module = load_tool_module("run_agent_sandbox_match", "tools/run-agent.py") + + # Status-shape payload matches. + assert module._sandbox_payload_matches("status", {"sandbox_state": "missing", "capabilities": {}}) is True + # Unrelated dict does not match status. + assert module._sandbox_payload_matches("status", {"foo": "bar"}) is False + # list expects a list. + assert module._sandbox_payload_matches("list", []) is True + assert module._sandbox_payload_matches("list", {"id": "x"}) is False + # validate expects overall_outcome or tiers. + assert module._sandbox_payload_matches("validate", {"overall_outcome": "passed"}) is True + assert module._sandbox_payload_matches("validate", {"tiers": []}) is True + assert module._sandbox_payload_matches("validate", {"unrelated": True}) is False + + + +@pytest.mark.unit + +def test_maybe_render_sandbox_bootstrap_skips_non_sandbox_bash(monkeypatch): + module = load_tool_module("run_agent_sandbox_skip", "tools/run-agent.py") + state = { + "input": {"command": "ls -la", "description": "list files"}, + "output": "total 0", + "status": "completed", + } + assert SandboxBootstrapInterceptor().try_render(state.get('input', {}).get('command', ''), state, MockRenderer()) is False + + + +@pytest.mark.unit + +def test_maybe_render_sandbox_bootstrap_falls_through_on_invalid_json(monkeypatch): + module = load_tool_module("run_agent_sandbox_bad_json", "tools/run-agent.py") + state = { + "input": {"command": "tools/sandbox-bootstrap.py --format json status"}, + "output": "Loading config...\n{partial", + "status": "completed", + } + assert SandboxBootstrapInterceptor().try_render(state.get('input', {}).get('command', ''), state, MockRenderer()) is False + + + +@pytest.mark.unit + +def test_maybe_render_sandbox_bootstrap_falls_through_on_schema_mismatch(monkeypatch): + module = load_tool_module("run_agent_sandbox_schema_miss", "tools/run-agent.py") + state = { + "input": {"command": "tools/sandbox-bootstrap.py --format json status"}, + "output": '{"unrelated": true, "foo": [1, 2, 3]}', + "status": "completed", + } + # Looks like JSON, parses as JSON, but does not have any of + # sandbox_state / phase2_gate_pass / capabilities -> fall through. + assert SandboxBootstrapInterceptor().try_render(state.get('input', {}).get('command', ''), state, MockRenderer()) is False + + + +@pytest.mark.unit + +@pytest.mark.parametrize( + ("command", "expected_family", "expected_attrs"), + [ + # rtk read family + ("rtk read README.md", "read", {"files": ["README.md"], "rtk_filtered": False}), + ("rtk read README.md AGENTS.md", "read", + {"files": ["README.md", "AGENTS.md"], "rtk_filtered": False}), + ("rtk read --level minimal README.md", "read", + {"files": ["README.md"], "rtk_filtered": True}), + ("rtk read --tail-lines 5 README.md", "read", + {"files": ["README.md"], "rtk_filtered": True}), + ("rtk read -n -m 50 README.md", "read", + {"files": ["README.md"], "rtk_filtered": True}), + # cat / head / tail + ("cat README.md", "read", {"files": ["README.md"]}), + ("cat README.md AGENTS.md", "read", {"files": ["README.md", "AGENTS.md"]}), + ("head -n 10 README.md", "read", {"files": ["README.md"], "head_limit": 10}), + ("head -n10 README.md", "read", {"files": ["README.md"], "head_limit": 10}), + ("tail -n 5 README.md", "read", {"files": ["README.md"], "tail_limit": 5}), + # grep / rg / rtk grep + ("rg foo tools/run-agent.py", "grep", {"pattern": "foo", "path": "tools/run-agent.py"}), + ("rg --vimgrep render_grep tools/run-agent.py", "grep", + {"pattern": "render_grep", "path": "tools/run-agent.py"}), + ("rtk grep render_grep tools/run-agent.py", "grep", + {"pattern": "render_grep", "path": "tools/run-agent.py"}), + ("rtk grep -i needle .", "grep", {"pattern": "needle", "path": "."}), + ("grep -r foo bar/", "grep", {"pattern": "foo", "path": "bar/"}), + # ls + ("ls", "ls", {"path": ".", "long_format": False}), + ("ls -la tools", "ls", {"path": "tools", "long_format": True}), + ("rtk ls -la", "ls", {"path": ".", "long_format": True}), + # find / tree + ("find tools", "find", {"path": "tools"}), + ("find tools -name '*.py'", "find", {"path": "tools"}), + ("tree", "find", {"path": "."}), + # leading env / sudo wrappers should be stripped + ("LANG=C ls tools", "ls", {"path": "tools"}), + ("sudo cat /etc/hosts", "read", {"files": ["/etc/hosts"]}), + ("time rg foo bar/", "grep", {"pattern": "foo", "path": "bar/"}), + ], +) + +def test_is_bash_shim_call_recognises_supported_commands(command, expected_family, expected_attrs): + module = load_tool_module("run_agent_shim_detect", "tools/run-agent.py") + shim = module._is_bash_shim_call(command) + assert shim is not None, f"expected shim match for {command!r}" + assert shim.family == expected_family + for k, v in expected_attrs.items(): + assert getattr(shim, k) == v, ( + f"attribute {k}: expected {v!r}, got {getattr(shim, k)!r} for {command!r}" + ) + + + +@pytest.mark.unit + +@pytest.mark.parametrize( + "command", + [ + "", + "echo hello", + "make phase-1", + "git status", + "rtk diff a b", + "rtk smart README.md", + # Pipelines / redirections / substitutions disqualify shim handling. + "cat README.md | head", + "rg foo > out.txt", + "ls && pwd", + "ls; pwd", + "echo $(pwd)", + "cat `which python`", + # No file argument. + "rtk read", + "cat", + "rg", + # rtk subcommand we don't route. + "rtk json '{}'", + "rtk wc README.md", + ], +) + +def test_is_bash_shim_call_rejects_unsupported(command): + module = load_tool_module("run_agent_shim_reject", "tools/run-agent.py") + assert module._is_bash_shim_call(command) is None + + + +@pytest.mark.unit + +def test_normalize_rtk_grep_output_converts_grouped_to_flat(): + module = load_tool_module("run_agent_shim_norm_rtk", "tools/run-agent.py") + raw = ( + "4 matches in 3F:\n" + "\n" + "[file] tools/run-agent.py (2):\n" + " 2811: return render_grep_rich(console, state)\n" + " 2813: return render_grep_plain(state)\n" + "\n" + "[file] tools/x.py (1):\n" + " 42: hit\n" + ) + out = module._normalize_rtk_grep_output(raw) + lines = [l for l in out.split("\n") if l.strip()] + assert lines == [ + "tools/run-agent.py:2811:return render_grep_rich(console, state)", + "tools/run-agent.py:2813:return render_grep_plain(state)", + "tools/x.py:42:hit", + ] + + + +@pytest.mark.unit + +def test_normalize_rtk_grep_output_passes_through_when_no_markers(): + module = load_tool_module("run_agent_shim_norm_passthrough", "tools/run-agent.py") + raw = "tools/foo.py:10:hit\nanother line\n" + assert module._normalize_rtk_grep_output(raw) == raw + + + +@pytest.mark.unit + +def test_strip_ls_long_format_to_filenames_strips_columns_and_total(): + module = load_tool_module("run_agent_shim_ls_strip", "tools/run-agent.py") + raw = ( + "total 616\n" + "drwxr-xr-x@ 14 pruiz staff 448 May 8 03:02 __pycache__\n" + "-rw-r--r--@ 1 pruiz staff 3893 May 8 00:37 _colors.py\n" + "-rwxr-xr-x@ 1 pruiz staff 6347 May 8 00:37 check-frontmatter.py\n" + ) + out = module._strip_ls_long_format_to_filenames(raw) + assert out.split("\n") == ["__pycache__", "_colors.py", "check-frontmatter.py"] + + + +@pytest.mark.unit + +def test_maybe_render_bash_shim_skips_unrecognized_commands(): + module = load_tool_module("run_agent_shim_skip", "tools/run-agent.py") + state = { + "input": {"command": "make phase-1", "description": ""}, + "output": "Phase 1 done", + "status": "completed", + } + assert RtkReadInterceptor().try_render(state.get('input', {}).get('command', ''), state, MockRenderer()) or RtkGrepInterceptor().try_render(state.get('input', {}).get('command', ''), state, MockRenderer()) or ShellListingInterceptor().try_render(state.get('input', {}).get('command', ''), state, MockRenderer()) is False + + + +@pytest.mark.unit + +def test_parse_find_tree_extracts_name_filter(): + module = load_tool_module("run_agent_find_name_1", "tools/run-agent.py") + shim = module._parse_find_tree("find", ["itemdb/findings", "-name", "*.md"], "find itemdb/findings -name '*.md'") + assert shim is not None + assert shim.pattern == "*.md" + assert shim.path == "itemdb/findings" + + + +@pytest.mark.unit + +def test_parse_find_tree_extracts_iname_filter(): + module = load_tool_module("run_agent_find_name_2", "tools/run-agent.py") + shim = module._parse_find_tree("find", [".", "-iname", "*.PY"], "find . -iname '*.PY'") + assert shim is not None + assert shim.pattern == "*.PY" + assert shim.path == "." + + + +@pytest.mark.unit + +def test_parse_find_tree_no_name_falls_back_to_verb(): + module = load_tool_module("run_agent_find_name_3", "tools/run-agent.py") + shim = module._parse_find_tree("find", ["src/"], "find src/") + assert shim is not None + assert shim.pattern == "find" + assert shim.path == "src/" + + + +@pytest.mark.unit + +def test_parse_find_tree_extracts_path_after_type_flag(): + module = load_tool_module("run_agent_find_name_4", "tools/run-agent.py") + shim = module._parse_find_tree("find", ["itemdb", "-type", "f", "-name", "*.md"], "find itemdb -type f -name '*.md'") + assert shim is not None + assert shim.pattern == "*.md" + assert shim.path == "itemdb" + + + +@pytest.mark.unit + +def test_parse_find_tree_tree_verb_no_name(): + module = load_tool_module("run_agent_find_name_5", "tools/run-agent.py") + shim = module._parse_find_tree("tree", ["src/"], "tree src/") + assert shim is not None + assert shim.pattern == "tree" + assert shim.path == "src/" + + +# --------------------------------------------------------------------------- +# load_prompt extra-prompt tests +# --------------------------------------------------------------------------- + diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py deleted file mode 100644 index 13963de..0000000 --- a/tests/test_run_agent.py +++ /dev/null @@ -1,1958 +0,0 @@ -from __future__ import annotations - -from collections import OrderedDict - -import pytest -from rich.console import Console - -from conftest import ROOT, load_tool_module - - -def _load_config_module(): - return load_tool_module("codecome_config", "tools/codecome/config.py") - - -def _load_graceful_module(): - return load_tool_module("codecome_graceful", "tools/codecome/graceful.py") - - -FIXTURES = ROOT / "tests" / "fixtures" / "run_agent" - - -@pytest.mark.unit -@pytest.mark.compat_matrix -@pytest.mark.parametrize( - ("fixture_name", "expected"), - [ - ("openai_export.json", "openai/gpt-5.3"), - ("anthropic_export.json", "anthropic/claude-opus-4-7"), - ("google_export.json", "google/gemini-2.5-pro"), - ("unknown_export.json", None), - ], -) -def test_extract_model_from_export_matrix(fixture_name, expected): - module = _load_config_module() - payload = (FIXTURES / fixture_name).read_text(encoding="utf-8") - assert module._extract_model_from_export(payload) == expected - - -@pytest.mark.unit -def test_extract_flag_value_supports_both_flag_forms(): - module = _load_config_module() - tokens = ["--model=openai/gpt-5", "--variant", "high"] - assert module._extract_flag_value(tokens, ("--model", "-m")) == "openai/gpt-5" - assert module._extract_flag_value(tokens, ("--variant",)) == "high" - - -@pytest.mark.unit -def test_strip_probe_unsafe_flags_removes_session_and_continue_flags(): - module = _load_config_module() - command = [ - "opencode", - "run", - "--format", - "json", - "--session", - "abc", - "--continue", - "--title=test", - "--port", - "9999", - "--agent", - "recon", - ] - out = module._strip_probe_unsafe_flags(command) - assert "--session" not in out - assert "--continue" not in out - assert "--title=test" not in out - assert "--port" not in out - assert "--agent" in out - - -@pytest.mark.unit -def test_resolve_model_and_variant_precedence(monkeypatch): - import sys - sys.path.insert(0, str(ROOT / "tools")) - import codecome.config as _cfg - monkeypatch.setenv("CODECOME_MODEL", "env/model") - monkeypatch.setenv("CODECOME_MODEL_VARIANT", "max") - monkeypatch.setattr(_cfg, "_read_codecome_yml_agent", lambda _agent: ("yaml/model", "yamlvar")) - monkeypatch.setattr(_cfg, "_discover_opencode_default_model", lambda: "history/model") - - model, variant, model_source, variant_source = _cfg.resolve_model_and_variant( - "auditor", ["--model", "args/model", "--variant=high"] - ) - assert (model, variant) == ("args/model", "high") - assert model_source == "OPENCODE_ARGS" - assert variant_source == "OPENCODE_ARGS" - - -@pytest.mark.component -def test_stream_model_scan_finds_nested_provider_model_pair(): - module = _load_config_module() - event = { - "type": "tool_result", - "part": { - "tool": "bash", - "state": { - "meta": { - "providerID": "anthropic", - "modelID": "claude-sonnet-4" - } - }, - }, - } - assert module._scan_event_for_model(event) == "anthropic/claude-sonnet-4" - - -@pytest.mark.unit -def test_thinking_default_is_disabled_for_anthropic_only(): - module = _load_config_module() - assert module._thinking_default_for_provider("anthropic") is False - assert module._thinking_default_for_provider("anthropic-foo") is False - assert module._thinking_default_for_provider("openai") is True - assert module._thinking_default_for_provider(None) is True - - -@pytest.mark.unit -def test_resolve_thinking_decision_precedence(monkeypatch): - module = load_tool_module("run_agent_thinking_precedence", "tools/run-agent.py") - - on, source = module.resolve_thinking_decision("anthropic/claude-opus-4-7", ["--thinking"]) - assert (on, source) == (True, "user-args") - - monkeypatch.setenv("CODECOME_THINKING", "0") - on, source = module.resolve_thinking_decision("openai/gpt-5", []) - assert (on, source) == (False, "env") - - monkeypatch.setenv("CODECOME_THINKING", "1") - on, source = module.resolve_thinking_decision("anthropic/claude-opus-4-7", []) - assert (on, source) == (True, "env") - - -@pytest.mark.unit -def test_show_model_table_prints_resolution_sources(monkeypatch, capsys): - """show_model_table should emit a table with all resolution sources.""" - import codecome.config as _cfg - monkeypatch.setenv("OPENCODE_ARGS", "--model openai/gpt-5 --variant high") - monkeypatch.setenv("CODECOME_MODEL", "env/model") - monkeypatch.setenv("CODECOME_MODEL_VARIANT", "envvar") - monkeypatch.setattr(_cfg, "_read_codecome_yml_agent", lambda _agent: ("yaml/model", "yamlvar")) - monkeypatch.setattr(_cfg, "_discover_opencode_default_model", lambda: "history/model") - - rc = _cfg.show_model_table("auditor") - assert rc == 0 - - out = capsys.readouterr().out - assert "Model resolution for agent auditor" in out - assert "OPENCODE_ARGS" in out - assert "env CODECOME_MODEL" in out - assert "codecome.yml" in out - assert "opencode session history" in out - assert "effective" in out - assert "openai/gpt-5" in out # args win - assert "high" in out - assert "thinking=" in out - - -@pytest.mark.unit -def test_parse_grep_output_detects_line_mode_and_file_mode(): - module = load_tool_module("run_agent_grep_parse", "tools/run-agent.py") - - mode, entries = module._parse_grep_output("foo.py:10:needle\nbar.py:2:x") - assert mode == "lines" - assert entries[0]["path"] == "foo.py" - assert entries[0]["line"] == 10 - - mode, entries = module._parse_grep_output("foo.py\nbar.py") - assert mode == "files" - assert entries == [{"path": "foo.py"}, {"path": "bar.py"}] - - -@pytest.mark.unit -def test_grep_compile_pattern_falls_back_for_invalid_regex(monkeypatch): - module = load_tool_module("run_agent_grep_compile", "tools/run-agent.py") - monkeypatch.setattr(module, "_GREP_HIGHLIGHT", True) - pat = module._grep_compile_pattern("(") - assert pat is not None - assert pat.pattern == "\\(" - - -@pytest.mark.unit -def test_render_reasoning_plain_skips_empty_and_whitespace(monkeypatch, capsys): - module = load_tool_module("run_agent_reasoning_skip", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "_RENDER_REASONING", True) - - # Empty body - module.render_reasoning(None, {"part": {"text": ""}}) - # Whitespace-only body - module.render_reasoning(None, {"part": {"text": " \n\t "}}) - # Missing text key - module.render_reasoning(None, {"part": {}}) - # Missing part dict - module.render_reasoning(None, {}) - - out = capsys.readouterr().out - assert out == "" - - -# --- subagent summary helper ------------------------------------------------- - -@pytest.mark.unit -def test_format_subagent_summary_formats_all_fields(): - module = load_tool_module("run_agent_subagent_summary", "tools/run-agent.py") - assert module._format_subagent_summary({"additions": 3, "deletions": 1, "files": 2}) == "+3 -1 2 file(s)" - assert module._format_subagent_summary({"additions": 0, "files": 1}) == "+0 -0 1 file(s)" - assert module._format_subagent_summary({"files": 5}) == "5 file(s)" - assert module._format_subagent_summary({}) == "" - assert module._format_subagent_summary(None) == "" - - -# --- task renderer ----------------------------------------------------------- - -@pytest.mark.unit -def test_render_task_plain_shows_description_truncated_prompt_and_output(monkeypatch, capsys): - module = load_tool_module("run_agent_task_plain", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "_TASK_PROMPT_PREVIEW_LINES", 2) - - state = { - "input": { - "description": "Analyze batch 2", - "subagent_type": "explore", - "prompt": "line one\nline two\nline three\nline four", - }, - "output": "Done analyzing.", - "status": "completed", - } - assert module.render_task_plain(state) is True - out = capsys.readouterr().out - assert "task Analyze batch 2 [explore] [completed]" in out - assert "line one" in out - assert "line two" in out - assert "... 2 more lines" in out - assert "Done analyzing." in out - - -@pytest.mark.unit -def test_render_task_plain_handles_missing_fields(monkeypatch, capsys): - module = load_tool_module("run_agent_task_plain_minimal", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - - state = {"input": {}, "status": "in_progress"} - assert module.render_task_plain(state) is True - out = capsys.readouterr().out - assert "task [in_progress]" in out - - -@pytest.mark.unit -def test_render_task_rich_shows_panel(monkeypatch): - module = load_tool_module("run_agent_task_rich", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", True) - monkeypatch.setattr(module, "_TASK_PROMPT_PREVIEW_LINES", 3) - - from rich.console import Console - - console = Console(record=True, force_terminal=True, width=60, highlight=False) - state = { - "input": { - "description": "Counter-analysis", - "subagentType": "reviewer", - "prompt": "a\nb\nc\nd", - }, - "status": "in_progress", - } - assert module.render_task_rich(console, state) is True - out = console.export_text() - assert "Task [in_progress]" in out - assert "Counter-analysis" in out - assert "[reviewer]" in out - assert "... 1 more lines" in out - - -# --- subagent status renderer ------------------------------------------------ - -@pytest.mark.unit -def test_render_subagent_status_plain_created_and_finished(monkeypatch, capsys): - module = load_tool_module("run_agent_subagent_plain_lifecycle", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "_RENDER_SUBAGENT_UPDATES", True) - - module.render_subagent_status(None, { - "type": "subagent.status", - "properties": {"statusType": "created", "sessionID": "s1", "title": "Batch A"}, - }) - out = capsys.readouterr().out - assert "[subagent] started: Batch A" in out - - module.render_subagent_status(None, { - "type": "subagent.status", - "properties": {"statusType": "finished", "sessionID": "s1", "title": "Batch A"}, - }) - out = capsys.readouterr().out - assert "[subagent] finished: Batch A" in out - - -@pytest.mark.unit -def test_render_subagent_status_plain_heartbeat_shows_elapsed(monkeypatch, capsys): - module = load_tool_module("run_agent_subagent_plain_heartbeat", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "_RENDER_SUBAGENT_UPDATES", True) - - module.render_subagent_status(None, { - "type": "subagent.status", - "properties": { - "statusType": "heartbeat", - "sessionID": "s1", - "title": "Slow job", - "elapsedMs": 45000, - }, - }) - out = capsys.readouterr().out - assert "Subagent · Slow job still running (45s)" in out - - -@pytest.mark.unit -def test_render_subagent_status_plain_update_dedupes_unchanged_summary(monkeypatch, capsys): - module = load_tool_module("run_agent_subagent_plain_dedup", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "_RENDER_SUBAGENT_UPDATES", True) - monkeypatch.setattr(module, "_SUBAGENT_UPDATE_THROTTLE_S", 5) - - event = { - "type": "subagent.status", - "properties": { - "statusType": "updated", - "sessionID": "s2", - "title": "Job", - "summary": {"additions": 1, "deletions": 0, "files": 1}, - }, - } - - # First update renders. - module.render_subagent_status(None, event) - out = capsys.readouterr().out - assert "Subagent · Job" in out - assert "+1 -0" in out - - # Identical update immediately after is suppressed. - module.render_subagent_status(None, event) - out = capsys.readouterr().out - assert out == "" - - -@pytest.mark.unit -def test_render_subagent_status_plain_update_renders_when_summary_changes(monkeypatch, capsys): - module = load_tool_module("run_agent_subagent_plain_change", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "_RENDER_SUBAGENT_UPDATES", True) - monkeypatch.setattr(module, "_SUBAGENT_UPDATE_THROTTLE_S", 5) - - module.render_subagent_status(None, { - "type": "subagent.status", - "properties": { - "statusType": "updated", - "sessionID": "s3", - "title": "Job", - "summary": {"additions": 1, "files": 1}, - }, - }) - out = capsys.readouterr().out - assert "+1 -0" in out - - # Change summary -> renders again even inside throttle window. - module.render_subagent_status(None, { - "type": "subagent.status", - "properties": { - "statusType": "updated", - "sessionID": "s3", - "title": "Job", - "summary": {"additions": 2, "files": 1}, - }, - }) - out = capsys.readouterr().out - assert "+2 -0" in out - - -@pytest.mark.unit -def test_render_subagent_status_rich_created_renders_panel(monkeypatch): - module = load_tool_module("run_agent_subagent_rich_created", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", True) - monkeypatch.setattr(module, "_RENDER_SUBAGENT_UPDATES", True) - - from rich.console import Console - - console = Console(record=True, force_terminal=True, width=60, highlight=False) - module.render_subagent_status(console, { - "type": "subagent.status", - "properties": {"statusType": "created", "sessionID": "s1", "title": "Batch A"}, - }) - out = console.export_text() - assert "Subagent started" in out - assert "Batch A" in out - - -@pytest.mark.unit -def test_render_event_dispatches_subagent_status(monkeypatch): - """render_event dispatches subagent.status through SubagentStatusRenderer.""" - import rendering.events as _evts - module = load_tool_module("run_agent_dispatch_subagent", "tools/run-agent.py") - calls = [] - - class _FakeRenderer: - def __init__(self, ctx): - pass - def render(self, event): - calls.append("subagent.status") - return True - - monkeypatch.setattr(_evts, "SubagentStatusRenderer", _FakeRenderer) - module.render_event(None, "2", "x", {"type": "subagent.status", "properties": {}}) - assert calls == ["subagent.status"] - - -@pytest.mark.unit -def test_dispatch_tool_renderer_routes_task_to_task_renderer(monkeypatch): - """_dispatch_tool_renderer delegates 'task' to rendering.tools.task.TaskRenderer.""" - import rendering.tools.task as _task_mod - module = load_tool_module("run_agent_dispatch_task", "tools/run-agent.py") - - task_calls = [] - - class _FakeRenderer: - def render(self, tool_name, state): - task_calls.append(tool_name) - return True - - monkeypatch.setattr(_task_mod, "TaskRenderer", lambda ctx: _FakeRenderer()) - - # With rich - monkeypatch.setattr(module, "HAVE_RICH", True) - assert module._dispatch_tool_renderer(None, "task", {}) is True - assert task_calls == ["task"] - - task_calls.clear() - monkeypatch.setattr(module, "HAVE_RICH", False) - assert module._dispatch_tool_renderer(None, "task", {}) is True - assert task_calls == ["task"] - - -# --- reasoning / error rendering edge cases -------------------------------- - - -@pytest.mark.unit -def test_render_reasoning_plain_skips_empty_and_whitespace(monkeypatch, capsys): - module = load_tool_module("run_agent_reasoning_skip", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "_RENDER_REASONING", True) - - # Empty body - module.render_reasoning(None, {"part": {"text": ""}}) - # Whitespace-only body - module.render_reasoning(None, {"part": {"text": " \n\t "}}) - # Missing text key - module.render_reasoning(None, {"part": {}}) - # Missing part dict - module.render_reasoning(None, {}) - - out = capsys.readouterr().out - assert out == "" - - -@pytest.mark.unit -def test_render_reasoning_rich_wraps_markdown_inside_panel(monkeypatch): - module = load_tool_module("run_agent_reasoning_rich_wrap", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", True) - monkeypatch.setattr(module, "_RENDER_REASONING", True) - monkeypatch.setattr(module, "_REASONING_MAX_CHARS", 10000) - - console = Console(record=True, force_terminal=True, width=60, highlight=False) - text = ( - "**Summarizing file updates**\n\n" - "I need to mention the sandbox and validate the modifications. " - "Updating the item database for attack surfaces might need a more " - "realistic runtime setup so later phases can rely on it." - ) - - module.render_reasoning(console, {"part": {"text": text}}) - - out = console.export_text() - assert "Thinking" in out - assert "Summarizing file updates" in out - assert "realistic runtime setup" in out - assert "later phases can rely on it" in out - - -@pytest.mark.unit -def test_render_reasoning_rich_wraps_plain_text_inside_panel(monkeypatch): - module = load_tool_module("run_agent_reasoning_rich_plain_wrap", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", True) - monkeypatch.setattr(module, "_RENDER_REASONING", True) - monkeypatch.setattr(module, "_REASONING_MAX_CHARS", 10000) - - console = Console(record=True, force_terminal=True, width=60, highlight=False) - text = ( - "I need to mention the sandbox and validate the modifications. " - "Updating the item database for attack surfaces might need a more " - "realistic runtime setup so later phases can rely on it." - ) - - module.render_reasoning(console, {"part": {"text": text}}) - - out = console.export_text() - assert "Thinking" in out - assert "Updating the item database" in out - assert "realistic runtime setup" in out - assert "later phases can rely on it" in out - - -@pytest.mark.unit -def test_render_error_plain_mode_handles_missing_error_field(monkeypatch, capsys): - module = load_tool_module("run_agent_error_missing", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - - module.render_error(None, {}) - out = capsys.readouterr().out - # Title line plus a (no error message) body line. - assert "Error" in out - assert "(no error message)" in out - - -@pytest.mark.unit -def test_render_error_plain_mode_handles_dict_with_only_message(monkeypatch, capsys): - module = load_tool_module("run_agent_error_only_msg", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - - module.render_error(None, {"error": {"message": "rate limited"}}) - out = capsys.readouterr().out - assert "rate limited" in out - - -# --- grep highlight helpers ------------------------------------------------ - -@pytest.mark.unit -def test_grep_format_line_plain_with_color_emits_ansi(monkeypatch): - module = load_tool_module("run_agent_grep_plain_ansi", "tools/run-agent.py") - monkeypatch.setattr(module, "_GREP_HIGHLIGHT", True) - pat = module._grep_compile_pattern("error") - out = module._grep_format_line_plain(42, "an error here and error again", pat, color=True) - # Bold yellow + reset around each match. - assert out.count("\x1b[1;33m") == 2 - assert out.count("\x1b[0m") == 2 - assert " 42: " in out - - -@pytest.mark.unit -def test_grep_format_line_plain_without_color_uses_markers(monkeypatch): - module = load_tool_module("run_agent_grep_plain_markers", "tools/run-agent.py") - monkeypatch.setattr(module, "_GREP_HIGHLIGHT", True) - pat = module._grep_compile_pattern("error") - out = module._grep_format_line_plain(7, "an error", pat, color=False) - assert ">>>error<<<" in out - assert " 7: " in out - - -@pytest.mark.unit -def test_grep_format_line_plain_no_pattern_returns_unstyled(monkeypatch): - module = load_tool_module("run_agent_grep_plain_nopat", "tools/run-agent.py") - out = module._grep_format_line_plain(99, "plain text", None, color=True) - assert out == " 99: plain text" - - -@pytest.mark.unit -def test_grep_format_line_plain_disabled_returns_unstyled(monkeypatch): - """CODECOME_GREP_HIGHLIGHT=0 must skip even ANSI emission.""" - module = load_tool_module("run_agent_grep_plain_disabled", "tools/run-agent.py") - monkeypatch.setattr(module, "_GREP_HIGHLIGHT", False) - pat = module._grep_compile_pattern("error") # returns None when disabled - out = module._grep_format_line_plain(1, "an error", pat, color=True) - assert "\x1b[1;33m" not in out - assert ">>>" not in out - assert "an error" in out - - -@pytest.mark.unit -def test_grep_compile_pattern_returns_none_when_highlight_disabled(monkeypatch): - module = load_tool_module("run_agent_grep_compile_off", "tools/run-agent.py") - monkeypatch.setattr(module, "_GREP_HIGHLIGHT", False) - assert module._grep_compile_pattern("foo") is None - - -@pytest.mark.unit -def test_grep_compile_pattern_returns_none_for_empty_pattern(monkeypatch): - module = load_tool_module("run_agent_grep_compile_empty", "tools/run-agent.py") - monkeypatch.setattr(module, "_GREP_HIGHLIGHT", True) - assert module._grep_compile_pattern("") is None - - -# --- grep parser additional cases ------------------------------------------ - -@pytest.mark.unit -def test_parse_grep_output_empty_returns_empty_files_mode(): - module = load_tool_module("run_agent_grep_empty", "tools/run-agent.py") - mode, entries = module._parse_grep_output("") - assert mode == "files" - assert entries == [] - - -@pytest.mark.unit -def test_parse_grep_output_70_percent_threshold_for_lines_mode(): - module = load_tool_module("run_agent_grep_threshold", "tools/run-agent.py") - - # 7 of 10 lines are line-level => exactly 70% => "lines" mode. - output = "\n".join( - [f"foo.py:{i}:match" for i in range(7)] + ["plain1", "plain2", "plain3"] - ) - mode, entries = module._parse_grep_output(output) - assert mode == "lines" - # The non-matching lines become path-only entries with line=0. - assert any(e["line"] == 0 for e in entries) - - # 6 of 10 -> below threshold -> "files" mode. - output_low = "\n".join( - [f"foo.py:{i}:match" for i in range(6)] + ["a", "b", "c", "d"] - ) - mode, _ = module._parse_grep_output(output_low) - assert mode == "files" - - -@pytest.mark.unit -def test_cache_invalidate_stale_removes_missing_and_modified(monkeypatch, tmp_path): - """_cache_invalidate_stale should remove entries for deleted files - and for files whose mtime changed since caching.""" - module = load_tool_module("run_agent_cache_stale", "tools/run-agent.py") - monkeypatch.setattr(module, "_WRITE_CACHE_ENABLED", True) - - # _SNAPSHOT_CACHE is an module-level OrderedDict; monkeypatch it per-test. - fake_cache = OrderedDict() - monkeypatch.setattr(module, "_SNAPSHOT_CACHE", fake_cache) - - existing = tmp_path / "existing.txt" - existing.write_text("old", encoding="utf-8") - deleted = tmp_path / "deleted.txt" - deleted.write_text("gone", encoding="utf-8") - - module._cache_set(str(existing), "old") - module._cache_set(str(deleted), "gone") - - assert str(existing) in fake_cache - assert str(deleted) in fake_cache - - # Simulate file deletion - deleted.unlink() - # Simulate modification of existing file - existing.write_text("new", encoding="utf-8") - - module._cache_invalidate_stale() - - # Deleted and modified entries are both removed - assert str(deleted) not in fake_cache - assert str(existing) not in fake_cache - - -@pytest.mark.unit -def test_read_renderer_caches_stripped_lines_instead_of_numbered(monkeypatch): - module = load_tool_module("run_agent_read_cache_strip", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "_INTERNAL_READ_SUPPRESS", False) - - cache_writes = [] - - def fake_cache_set(path, content): - cache_writes.append((path, content)) - - monkeypatch.setattr(module, "_cache_set", fake_cache_set) - - output = "/tmp/x.txt\nfile\n\n1: alpha\n2: beta\n\n(End of file - total 2 lines)\n" - state = { - "input": {"filePath": "/tmp/x.txt", "offset": 1, "limit": 20}, - "output": output, - "status": "completed", - } - - assert module.render_read_plain(state) is True - assert cache_writes - assert cache_writes[-1][1] == "alpha\nbeta" - - -@pytest.mark.unit -def test_write_diff_uses_clean_cached_content_without_line_numbers(monkeypatch, capsys): - module = load_tool_module("run_agent_write_diff_clean", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - - monkeypatch.setattr(module, "_cache_get", lambda _path: "alpha\nbeta\n") - monkeypatch.setattr(module, "_cache_set", lambda _path, _content: None) - - state = { - "input": {"filePath": "/tmp/x.txt", "content": "alpha\ngamma\n"}, - "output": "Wrote file successfully.", - "status": "completed", - } - - assert module.render_write_plain(state) is True - out = capsys.readouterr().out - assert "-1: alpha" not in out - assert "-2: beta" not in out - assert "+1: alpha" not in out - assert "+2: gamma" not in out - - -# --- sandbox-bootstrap renderer detection ---------------------------------- - -SANDBOX_FIXTURES = ROOT / "tests" / "fixtures" / "sandbox_bootstrap" - - -@pytest.mark.unit -@pytest.mark.parametrize( - ("command", "expected"), - [ - # Direct script invocations. - (".venv/bin/python3 tools/sandbox-bootstrap.py --format json status", "status"), - ("python3 tools/sandbox-bootstrap.py status --format=json", "status"), - ("python tools/sandbox-bootstrap.py --format json validate --keep-going", "validate"), - ("python tools/sandbox-bootstrap.py --format=json detect", "detect"), - ("./tools/sandbox-bootstrap.py --format json list", "list"), - # make-target wrappers with json forced via BOOTSTRAP_ARGS. - ("make sandbox-status BOOTSTRAP_ARGS='--format json'", "status"), - ("make sandbox-validate BOOTSTRAP_ARGS=--format=json", "validate"), - ("make sandbox-bootstrap ID=python BOOTSTRAP_ARGS='--format json'", "apply"), - ("BOOTSTRAP_ARGS='--format json --keep-going' make sandbox-validate", "validate"), - ("BOOTSTRAP_ARGS=--format=json make sandbox-status", "status"), - # Negatives. - ("python tools/sandbox-bootstrap.py status", None), # no --format json - ("make sandbox-status", None), # text mode - ("python tools/list-findings.py --format json", None), # different script - ("", None), - ("ls -la", None), - ], -) -def test_is_sandbox_bootstrap_json_call(command, expected): - module = load_tool_module("run_agent_sandbox_detect", "tools/run-agent.py") - assert module._is_sandbox_bootstrap_json_call(command) == expected - - -@pytest.mark.unit -def test_lexer_map_includes_erlang_extensions(): - module = load_tool_module("run_agent_erlang_lexer", "tools/run-agent.py") - - assert module._LEXER_MAP[".erl"] == "erlang" - assert module._LEXER_MAP[".hrl"] == "erlang" - - -@pytest.mark.unit -def test_sandbox_payload_matches_filters_unrelated_json(): - module = load_tool_module("run_agent_sandbox_match", "tools/run-agent.py") - - # Status-shape payload matches. - assert module._sandbox_payload_matches("status", {"sandbox_state": "missing", "capabilities": {}}) is True - # Unrelated dict does not match status. - assert module._sandbox_payload_matches("status", {"foo": "bar"}) is False - # list expects a list. - assert module._sandbox_payload_matches("list", []) is True - assert module._sandbox_payload_matches("list", {"id": "x"}) is False - # validate expects overall_outcome or tiers. - assert module._sandbox_payload_matches("validate", {"overall_outcome": "passed"}) is True - assert module._sandbox_payload_matches("validate", {"tiers": []}) is True - assert module._sandbox_payload_matches("validate", {"unrelated": True}) is False - - -@pytest.mark.unit -def test_sandbox_glyphs_uses_emoji_on_utf8_else_ascii(monkeypatch): - module = load_tool_module("run_agent_sandbox_glyphs", "tools/run-agent.py") - - class FakeConsole: - encoding = "utf-8" - - glyphs = module._sandbox_glyphs(FakeConsole()) - assert glyphs["ok"] == "✅" - assert glyphs["fail"] == "❌" - - class AsciiConsole: - encoding = "ascii" - - glyphs = module._sandbox_glyphs(AsciiConsole()) - assert glyphs["ok"] == "[OK]" - assert glyphs["fail"] == "[FAIL]" - - -@pytest.mark.component -def test_render_sandbox_status_plain_renders_pass_gate(monkeypatch, capsys): - """End-to-end through _maybe_render_sandbox_bootstrap with a real status payload.""" - module = load_tool_module("run_agent_sandbox_status_plain", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - - payload = (SANDBOX_FIXTURES / "status_pass.json").read_text(encoding="utf-8") - state = { - "input": { - "command": ".venv/bin/python3 tools/sandbox-bootstrap.py --format json status", - "description": "Show sandbox status", - }, - "output": payload, - "status": "completed", - } - handled = module._maybe_render_sandbox_bootstrap(None, state) - assert handled is True - out = capsys.readouterr().out - assert "Sandbox" in out - assert "status" in out - # Required capabilities should each appear with an OK marker. - for cap in ("setup", "start", "check", "build", "test", "stop"): - assert cap in out - - -@pytest.mark.component -def test_render_sandbox_validate_plain_failed_shows_stderr_tail(monkeypatch, capsys): - module = load_tool_module("run_agent_sandbox_validate_plain", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - # Force a small cap so we can confirm truncation works. - monkeypatch.setattr(module, "_SANDBOX_VALIDATE_STDERR_LINES", 2) - - payload = (SANDBOX_FIXTURES / "validate_failed.json").read_text(encoding="utf-8") - state = { - "input": { - "command": "tools/sandbox-bootstrap.py --format json validate", - "description": "validate", - }, - "output": payload, - "status": "completed", - } - handled = module._maybe_render_sandbox_bootstrap(None, state) - assert handled is True - out = capsys.readouterr().out - assert "failed" in out - # Failed tier's stderr_tail should appear (capped to 2 lines). - assert "port 5432 already in use" in out - assert "please free the port" in out - # "Error: container failed to start" is the earliest of 3 lines and - # must be elided by the cap. - assert "Error: container failed to start" not in out - assert "earlier lines truncated" in out - # missing helpers warning is present. - assert "clean" in out and "reset" in out - - -@pytest.mark.component -def test_render_sandbox_apply_plain_dry_run_lists_unfilled_markers(monkeypatch, capsys): - module = load_tool_module("run_agent_sandbox_apply_plain", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - - payload = (SANDBOX_FIXTURES / "apply_dry_run.json").read_text(encoding="utf-8") - state = { - "input": { - "command": "tools/sandbox-bootstrap.py --format json apply python --dry-run --var PYTHON_VERSION=3.11", - "description": "apply", - }, - "output": payload, - "status": "completed", - } - handled = module._maybe_render_sandbox_bootstrap(None, state) - assert handled is True - out = capsys.readouterr().out - assert "DRY RUN" in out - assert "EXTRA_PIP" in out - assert "Dockerfile" in out - - -@pytest.mark.component -def test_render_sandbox_list_plain_uses_real_fixture(monkeypatch, capsys): - module = load_tool_module("run_agent_sandbox_list_plain", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - - payload = (SANDBOX_FIXTURES / "list.json").read_text(encoding="utf-8") - state = { - "input": { - "command": "tools/sandbox-bootstrap.py --format json list", - "description": "list", - }, - "output": payload, - "status": "completed", - } - handled = module._maybe_render_sandbox_bootstrap(None, state) - assert handled is True - out = capsys.readouterr().out - # Spot-check at least one known example id. - import json as _json - examples = _json.loads(payload) - assert len(examples) > 0 - first_id = examples[0]["id"] - assert first_id in out - assert "example(s) available" in out - - -@pytest.mark.unit -def test_maybe_render_sandbox_bootstrap_skips_non_sandbox_bash(monkeypatch): - module = load_tool_module("run_agent_sandbox_skip", "tools/run-agent.py") - state = { - "input": {"command": "ls -la", "description": "list files"}, - "output": "total 0", - "status": "completed", - } - assert module._maybe_render_sandbox_bootstrap(None, state) is False - - -@pytest.mark.unit -def test_maybe_render_sandbox_bootstrap_strips_leading_text(monkeypatch, capsys): - module = load_tool_module("run_agent_sandbox_leading_text", "tools/run-agent.py") - # Simulate a make command that echoes the invocation line before the JSON payload - state = { - "input": {"command": "tools/sandbox-bootstrap.py --format json status"}, - "output": 'python tools/sandbox-bootstrap.py status --format json\n{"sandbox_state": "missing", "phase2_gate_pass": false, "capabilities": {}}', - "status": "completed", - } - - # Force _SANDBOX_RENDER = True - monkeypatch.setattr(module, "_SANDBOX_RENDER", True) - - assert module._maybe_render_sandbox_bootstrap(None, state) is True - - captured = capsys.readouterr() - assert "Sandbox · status" in captured.out - - -@pytest.mark.unit -def test_maybe_render_sandbox_bootstrap_handles_env_prefixed_make(monkeypatch, capsys): - module = load_tool_module("run_agent_sandbox_env_prefixed_make", "tools/run-agent.py") - state = { - "input": { - "command": "BOOTSTRAP_ARGS='--format json --keep-going' make sandbox-validate", - "description": "Run validation with longer timeout", - }, - "output": '{"overall_outcome": "passed", "tiers": []}', - "status": "completed", - } - - monkeypatch.setattr(module, "_SANDBOX_RENDER", True) - - assert module._maybe_render_sandbox_bootstrap(None, state) is True - captured = capsys.readouterr() - assert "Sandbox · validate" in captured.out - - -@pytest.mark.unit -def test_maybe_render_sandbox_bootstrap_falls_through_on_invalid_json(monkeypatch): - module = load_tool_module("run_agent_sandbox_bad_json", "tools/run-agent.py") - state = { - "input": {"command": "tools/sandbox-bootstrap.py --format json status"}, - "output": "Loading config...\n{partial", - "status": "completed", - } - assert module._maybe_render_sandbox_bootstrap(None, state) is False - - -@pytest.mark.unit -def test_maybe_render_sandbox_bootstrap_falls_through_on_schema_mismatch(monkeypatch): - module = load_tool_module("run_agent_sandbox_schema_miss", "tools/run-agent.py") - state = { - "input": {"command": "tools/sandbox-bootstrap.py --format json status"}, - "output": '{"unrelated": true, "foo": [1, 2, 3]}', - "status": "completed", - } - # Looks like JSON, parses as JSON, but does not have any of - # sandbox_state / phase2_gate_pass / capabilities -> fall through. - assert module._maybe_render_sandbox_bootstrap(None, state) is False - - -@pytest.mark.unit -def test_maybe_render_sandbox_bootstrap_disabled_via_env(monkeypatch): - module = load_tool_module("run_agent_sandbox_disabled", "tools/run-agent.py") - monkeypatch.setattr(module, "_SANDBOX_RENDER", False) - state = { - "input": {"command": "tools/sandbox-bootstrap.py --format json status"}, - "output": '{"sandbox_state": "missing", "phase2_gate_pass": false, "capabilities": {}}', - "status": "completed", - } - assert module._maybe_render_sandbox_bootstrap(None, state) is False - - -# --- bash-shim detection ---------------------------------------------------- - -@pytest.mark.unit -@pytest.mark.parametrize( - ("command", "expected_family", "expected_attrs"), - [ - # rtk read family - ("rtk read README.md", "read", {"files": ["README.md"], "rtk_filtered": False}), - ("rtk read README.md AGENTS.md", "read", - {"files": ["README.md", "AGENTS.md"], "rtk_filtered": False}), - ("rtk read --level minimal README.md", "read", - {"files": ["README.md"], "rtk_filtered": True}), - ("rtk read --tail-lines 5 README.md", "read", - {"files": ["README.md"], "rtk_filtered": True}), - ("rtk read -n -m 50 README.md", "read", - {"files": ["README.md"], "rtk_filtered": True}), - # cat / head / tail - ("cat README.md", "read", {"files": ["README.md"]}), - ("cat README.md AGENTS.md", "read", {"files": ["README.md", "AGENTS.md"]}), - ("head -n 10 README.md", "read", {"files": ["README.md"], "head_limit": 10}), - ("head -n10 README.md", "read", {"files": ["README.md"], "head_limit": 10}), - ("tail -n 5 README.md", "read", {"files": ["README.md"], "tail_limit": 5}), - # grep / rg / rtk grep - ("rg foo tools/run-agent.py", "grep", {"pattern": "foo", "path": "tools/run-agent.py"}), - ("rg --vimgrep render_grep tools/run-agent.py", "grep", - {"pattern": "render_grep", "path": "tools/run-agent.py"}), - ("rtk grep render_grep tools/run-agent.py", "grep", - {"pattern": "render_grep", "path": "tools/run-agent.py"}), - ("rtk grep -i needle .", "grep", {"pattern": "needle", "path": "."}), - ("grep -r foo bar/", "grep", {"pattern": "foo", "path": "bar/"}), - # ls - ("ls", "ls", {"path": ".", "long_format": False}), - ("ls -la tools", "ls", {"path": "tools", "long_format": True}), - ("rtk ls -la", "ls", {"path": ".", "long_format": True}), - # find / tree - ("find tools", "find", {"path": "tools"}), - ("find tools -name '*.py'", "find", {"path": "tools"}), - ("tree", "find", {"path": "."}), - # leading env / sudo wrappers should be stripped - ("LANG=C ls tools", "ls", {"path": "tools"}), - ("sudo cat /etc/hosts", "read", {"files": ["/etc/hosts"]}), - ("time rg foo bar/", "grep", {"pattern": "foo", "path": "bar/"}), - ], -) -def test_is_bash_shim_call_recognises_supported_commands(command, expected_family, expected_attrs): - module = load_tool_module("run_agent_shim_detect", "tools/run-agent.py") - shim = module._is_bash_shim_call(command) - assert shim is not None, f"expected shim match for {command!r}" - assert shim.family == expected_family - for k, v in expected_attrs.items(): - assert getattr(shim, k) == v, ( - f"attribute {k}: expected {v!r}, got {getattr(shim, k)!r} for {command!r}" - ) - - -@pytest.mark.unit -@pytest.mark.parametrize( - "command", - [ - "", - "echo hello", - "make phase-1", - "git status", - "rtk diff a b", - "rtk smart README.md", - # Pipelines / redirections / substitutions disqualify shim handling. - "cat README.md | head", - "rg foo > out.txt", - "ls && pwd", - "ls; pwd", - "echo $(pwd)", - "cat `which python`", - # No file argument. - "rtk read", - "cat", - "rg", - # rtk subcommand we don't route. - "rtk json '{}'", - "rtk wc README.md", - ], -) -def test_is_bash_shim_call_rejects_unsupported(command): - module = load_tool_module("run_agent_shim_reject", "tools/run-agent.py") - assert module._is_bash_shim_call(command) is None - - -@pytest.mark.unit -def test_normalize_rtk_grep_output_converts_grouped_to_flat(): - module = load_tool_module("run_agent_shim_norm_rtk", "tools/run-agent.py") - raw = ( - "4 matches in 3F:\n" - "\n" - "[file] tools/run-agent.py (2):\n" - " 2811: return render_grep_rich(console, state)\n" - " 2813: return render_grep_plain(state)\n" - "\n" - "[file] tools/x.py (1):\n" - " 42: hit\n" - ) - out = module._normalize_rtk_grep_output(raw) - lines = [l for l in out.split("\n") if l.strip()] - assert lines == [ - "tools/run-agent.py:2811:return render_grep_rich(console, state)", - "tools/run-agent.py:2813:return render_grep_plain(state)", - "tools/x.py:42:hit", - ] - - -@pytest.mark.unit -def test_normalize_rtk_grep_output_passes_through_when_no_markers(): - module = load_tool_module("run_agent_shim_norm_passthrough", "tools/run-agent.py") - raw = "tools/foo.py:10:hit\nanother line\n" - assert module._normalize_rtk_grep_output(raw) == raw - - -@pytest.mark.unit -def test_strip_ls_long_format_to_filenames_strips_columns_and_total(): - module = load_tool_module("run_agent_shim_ls_strip", "tools/run-agent.py") - raw = ( - "total 616\n" - "drwxr-xr-x@ 14 pruiz staff 448 May 8 03:02 __pycache__\n" - "-rw-r--r--@ 1 pruiz staff 3893 May 8 00:37 _colors.py\n" - "-rwxr-xr-x@ 1 pruiz staff 6347 May 8 00:37 check-frontmatter.py\n" - ) - out = module._strip_ls_long_format_to_filenames(raw) - assert out.split("\n") == ["__pycache__", "_colors.py", "check-frontmatter.py"] - - -@pytest.mark.component -def test_render_shim_read_routes_to_read_renderer(monkeypatch, capsys): - module = load_tool_module("run_agent_shim_read_e2e", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "_INTERNAL_READ_SUPPRESS", False) - - raw_content = "alpha\nbeta\ngamma\n" - state = { - "input": {"command": "rtk read tests/fixtures/run_agent/openai_export.json", "description": "rtk read"}, - "output": raw_content, - "status": "completed", - } - handled = module._maybe_render_bash_shim(None, state) - assert handled is True - out = capsys.readouterr().out - # The Read renderer header includes the file path. - assert "openai_export.json" in out - # Body content is rendered. - assert "alpha" in out - assert "beta" in out - - -@pytest.mark.component -def test_render_shim_grep_routes_through_normalizer(monkeypatch, capsys): - module = load_tool_module("run_agent_shim_grep_e2e", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - - rtk_output = ( - "2 matches in 1F:\n" - "\n" - "[file] tools/run-agent.py (2):\n" - " 100: foo bar\n" - " 200: foo baz\n" - ) - state = { - "input": {"command": "rtk grep foo tools/run-agent.py", "description": "rtk grep"}, - "output": rtk_output, - "status": "completed", - } - handled = module._maybe_render_bash_shim(None, state) - assert handled is True - out = capsys.readouterr().out - assert "tools/run-agent.py" in out - # Both line numbers should appear since grep renderer detected lines mode. - assert "100" in out and "200" in out - # The header pattern should be visible. - assert "foo" in out - - -@pytest.mark.component -def test_render_shim_ls_long_format_strips_columns(monkeypatch, capsys): - module = load_tool_module("run_agent_shim_ls_e2e", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - - long_ls = ( - "total 4\n" - "-rw-r--r-- 1 u g 10 May 8 README.md\n" - "drwxr-xr-x 2 u g 1 May 8 docs\n" - ) - state = { - "input": {"command": "ls -la", "description": "ls"}, - "output": long_ls, - "status": "completed", - } - handled = module._maybe_render_bash_shim(None, state) - assert handled is True - out = capsys.readouterr().out - assert "README.md" in out - assert "docs" in out - # Long-format columns must be gone. - assert "rw-r--r--" not in out - assert "May 8" not in out - - -@pytest.mark.component -def test_render_shim_ls_long_format_can_be_disabled(monkeypatch, capsys): - module = load_tool_module("run_agent_shim_ls_no_strip", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "_BASH_SHIM_LS_STRIP_LONG_FORMAT", False) - - long_ls = "total 4\n-rw-r--r-- 1 u g 10 May 8 README.md\n" - state = { - "input": {"command": "ls -la", "description": "ls"}, - "output": long_ls, - "status": "completed", - } - handled = module._maybe_render_bash_shim(None, state) - assert handled is True - out = capsys.readouterr().out - # When disabled, the renderer keeps the long-format raw line. - assert "rw-r--r--" in out - - -@pytest.mark.unit -def test_maybe_render_bash_shim_disabled_via_env(monkeypatch): - module = load_tool_module("run_agent_shim_disabled", "tools/run-agent.py") - monkeypatch.setattr(module, "_BASH_SHIM_RENDER", False) - state = { - "input": {"command": "rtk read README.md"}, - "output": "anything", - "status": "completed", - } - assert module._maybe_render_bash_shim(None, state) is False - - -@pytest.mark.unit -def test_maybe_render_bash_shim_skips_unrecognized_commands(): - module = load_tool_module("run_agent_shim_skip", "tools/run-agent.py") - state = { - "input": {"command": "make phase-1", "description": ""}, - "output": "Phase 1 done", - "status": "completed", - } - assert module._maybe_render_bash_shim(None, state) is False - - -@pytest.mark.component -def test_render_shim_read_filtered_triggers_cache_reread(monkeypatch): - """When rtk read uses a filtering flag, the renderer must call - _cache_reread for each requested file so the cache holds raw disk - content instead of the filtered output.""" - module = load_tool_module("run_agent_shim_filter_cache", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "_INTERNAL_READ_SUPPRESS", False) - - cache_calls: list[str] = [] - monkeypatch.setattr(module, "_cache_reread", lambda p: cache_calls.append(p)) - - state = { - "input": {"command": "rtk read --level aggressive tools/run-agent.py", "description": "rtk read"}, - "output": "filtered content", - "status": "completed", - } - handled = module._maybe_render_bash_shim(None, state) - assert handled is True - assert any("tools/run-agent.py" in c for c in cache_calls) - - -@pytest.mark.component -def test_render_shim_read_multi_file_triggers_cache_reread(monkeypatch): - """rtk read of multiple files concatenates output without delimiters, - so we cannot per-file split. The renderer must fall back to direct - filesystem reads to refresh the cache.""" - module = load_tool_module("run_agent_shim_multi_cache", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "_INTERNAL_READ_SUPPRESS", False) - - cache_calls: list[str] = [] - monkeypatch.setattr(module, "_cache_reread", lambda p: cache_calls.append(p)) - - state = { - "input": {"command": "rtk read README.md AGENTS.md", "description": "rtk read"}, - "output": "combined content", - "status": "completed", - } - handled = module._maybe_render_bash_shim(None, state) - assert handled is True - assert sum("README.md" in c for c in cache_calls) == 1 - assert sum("AGENTS.md" in c for c in cache_calls) == 1 - - -# --------------------------------------------------------------------------- -# Glob output parsing — summary line filtering -# --------------------------------------------------------------------------- - -@pytest.mark.unit -def test_parse_glob_output_filters_summary_lines(): - module = load_tool_module("run_agent_glob_summary_1", "tools/run-agent.py") - output = "0 for '*.md'\n" - files, summaries = module._parse_glob_output(output) - assert files == [] - assert summaries == ["0 for '*.md'"] - - -@pytest.mark.unit -def test_parse_glob_output_keeps_real_paths(): - module = load_tool_module("run_agent_glob_summary_2", "tools/run-agent.py") - output = "src/foo.py\nsrc/bar.py\n" - files, summaries = module._parse_glob_output(output) - assert files == ["src/foo.py", "src/bar.py"] - assert summaries == [] - - -@pytest.mark.unit -def test_parse_glob_output_mixed(): - module = load_tool_module("run_agent_glob_summary_3", "tools/run-agent.py") - output = "src/foo.py\nsrc/bar.py\n3 match(es)\n" - files, summaries = module._parse_glob_output(output) - assert files == ["src/foo.py", "src/bar.py"] - assert len(summaries) == 1 - - -@pytest.mark.unit -def test_parse_glob_output_no_matches_found(): - module = load_tool_module("run_agent_glob_summary_4", "tools/run-agent.py") - output = "No matches found\n" - files, summaries = module._parse_glob_output(output) - assert files == [] - assert summaries == ["No matches found"] - - -@pytest.mark.component -def test_render_glob_plain_zero_matches_with_summary(capsys): - module = load_tool_module("run_agent_glob_summary_5", "tools/run-agent.py") - state = { - "input": {"pattern": "**/*.md", "path": "itemdb/findings"}, - "output": "0 for '*.md'\n", - "status": "completed", - } - result = module.render_glob_plain(state) - assert result is True - out = capsys.readouterr().out - assert "0 for" in out - # Footer should say 0, not 1. - assert "0 match(es)" in out - - -# --------------------------------------------------------------------------- -# find -name extraction -# --------------------------------------------------------------------------- - -@pytest.mark.unit -def test_parse_find_tree_extracts_name_filter(): - module = load_tool_module("run_agent_find_name_1", "tools/run-agent.py") - shim = module._parse_find_tree("find", ["itemdb/findings", "-name", "*.md"], "find itemdb/findings -name '*.md'") - assert shim is not None - assert shim.pattern == "*.md" - assert shim.path == "itemdb/findings" - - -@pytest.mark.unit -def test_parse_find_tree_extracts_iname_filter(): - module = load_tool_module("run_agent_find_name_2", "tools/run-agent.py") - shim = module._parse_find_tree("find", [".", "-iname", "*.PY"], "find . -iname '*.PY'") - assert shim is not None - assert shim.pattern == "*.PY" - assert shim.path == "." - - -@pytest.mark.unit -def test_parse_find_tree_no_name_falls_back_to_verb(): - module = load_tool_module("run_agent_find_name_3", "tools/run-agent.py") - shim = module._parse_find_tree("find", ["src/"], "find src/") - assert shim is not None - assert shim.pattern == "find" - assert shim.path == "src/" - - -@pytest.mark.unit -def test_parse_find_tree_extracts_path_after_type_flag(): - module = load_tool_module("run_agent_find_name_4", "tools/run-agent.py") - shim = module._parse_find_tree("find", ["itemdb", "-type", "f", "-name", "*.md"], "find itemdb -type f -name '*.md'") - assert shim is not None - assert shim.pattern == "*.md" - assert shim.path == "itemdb" - - -@pytest.mark.unit -def test_parse_find_tree_tree_verb_no_name(): - module = load_tool_module("run_agent_find_name_5", "tools/run-agent.py") - shim = module._parse_find_tree("tree", ["src/"], "tree src/") - assert shim is not None - assert shim.pattern == "tree" - assert shim.path == "src/" - - -# --------------------------------------------------------------------------- -# load_prompt extra-prompt tests -# --------------------------------------------------------------------------- - - -@pytest.fixture() -def prompt_env(tmp_path, monkeypatch): - """Set up an isolated environment for load_prompt tests.""" - config_module = _load_config_module() - - # Create a minimal prompt file. - prompt_file = tmp_path / "prompt.md" - prompt_file.write_text("# Phase prompt\n\nBase content.", encoding="utf-8") - - # Point ROOT at tmp_path so codecome.yml is found there. - monkeypatch.setattr(config_module, "ROOT", tmp_path) - - # Clear env vars by default. - monkeypatch.delenv("PROMPT_EXTRA", raising=False) - monkeypatch.delenv("PROMPT_EXTRA_FILE", raising=False) - - return config_module, prompt_file, tmp_path - - -@pytest.mark.unit -def test_load_prompt_no_extras(prompt_env): - module, prompt_file, _ = prompt_env - result = module.load_prompt(prompt_file, None) - assert result == "# Phase prompt\n\nBase content." - assert "Additional instructions" not in result - - -@pytest.mark.unit -def test_load_prompt_inline_extra(prompt_env, monkeypatch): - module, prompt_file, _ = prompt_env - monkeypatch.setenv("PROMPT_EXTRA", "Use ASAN builds.") - result = module.load_prompt(prompt_file, None, phase="1") - assert "## Additional instructions" in result - assert "Use ASAN builds." in result - - -@pytest.mark.unit -def test_load_prompt_extra_file(prompt_env, monkeypatch): - module, prompt_file, tmp_path = prompt_env - extra_file = tmp_path / "extra.md" - extra_file.write_text("Extra from file.", encoding="utf-8") - monkeypatch.setenv("PROMPT_EXTRA_FILE", str(extra_file)) - result = module.load_prompt(prompt_file, None, phase="1") - assert "## Additional instructions" in result - assert "Extra from file." in result - - -@pytest.mark.unit -def test_load_prompt_yaml_extra(prompt_env): - module, prompt_file, tmp_path = prompt_env - yml = tmp_path / "codecome.yml" - yml.write_text( - "audit:\n extra_prompts:\n reconnaissance: |\n Focus on memory safety.\n", - encoding="utf-8", - ) - result = module.load_prompt(prompt_file, None, phase="1") - assert "## Additional instructions" in result - assert "Focus on memory safety." in result - assert "From codecome.yml" in result - - -@pytest.mark.unit -def test_load_prompt_all_three_sources(prompt_env, monkeypatch): - module, prompt_file, tmp_path = prompt_env - - # YAML source - yml = tmp_path / "codecome.yml" - yml.write_text( - "audit:\n extra_prompts:\n reconnaissance: |\n YAML extra.\n", - encoding="utf-8", - ) - - # File source - extra_file = tmp_path / "extra.md" - extra_file.write_text("File extra.", encoding="utf-8") - monkeypatch.setenv("PROMPT_EXTRA_FILE", str(extra_file)) - - # Inline source - monkeypatch.setenv("PROMPT_EXTRA", "Inline extra.") - - result = module.load_prompt(prompt_file, None, phase="1") - assert "## Additional instructions" in result - assert "YAML extra." in result - assert "File extra." in result - assert "Inline extra." in result - - # Verify ordering: yaml before file before inline. - yaml_pos = result.index("YAML extra.") - file_pos = result.index("File extra.") - inline_pos = result.index("Inline extra.") - assert yaml_pos < file_pos < inline_pos - - -@pytest.mark.unit -def test_load_prompt_no_phase_skips_yaml(prompt_env, monkeypatch): - module, prompt_file, tmp_path = prompt_env - yml = tmp_path / "codecome.yml" - yml.write_text( - "audit:\n extra_prompts:\n reconnaissance: |\n Should not appear.\n", - encoding="utf-8", - ) - monkeypatch.setenv("PROMPT_EXTRA", "Inline only.") - result = module.load_prompt(prompt_file, None) # no phase - assert "Should not appear." not in result - assert "Inline only." in result - - -@pytest.mark.unit -def test_load_prompt_empty_extras_no_heading(prompt_env, monkeypatch): - module, prompt_file, _ = prompt_env - monkeypatch.setenv("PROMPT_EXTRA", " ") # whitespace only - monkeypatch.setenv("PROMPT_EXTRA_FILE", "") - result = module.load_prompt(prompt_file, None, phase="1") - assert "Additional instructions" not in result - - -@pytest.mark.unit -def test_load_prompt_finding_substitution_still_works(prompt_env): - module, _, tmp_path = prompt_env - prompt_file = tmp_path / "prompt-with-finding.md" - prompt_file.write_text("Validate FINDING_PATH_OR_ID now.", encoding="utf-8") - result = module.load_prompt(prompt_file, "CC-0001", phase="4") - assert "CC-0001" in result - assert "FINDING_PATH_OR_ID" not in result - - -@pytest.mark.unit -def test_load_prompt_relative_extra_file(prompt_env, monkeypatch): - module, prompt_file, tmp_path = prompt_env - extra_file = tmp_path / "notes" / "extra.md" - extra_file.parent.mkdir() - extra_file.write_text("Relative file content.", encoding="utf-8") - monkeypatch.setenv("PROMPT_EXTRA_FILE", "notes/extra.md") - result = module.load_prompt(prompt_file, None, phase="1") - assert "Relative file content." in result - - -@pytest.mark.component -def test_auto_correction_resume_loops_back_via_popen(monkeypatch, tmp_path): - """Frontmatter errors trigger a resume of the same session; on the second - attempt the check passes and main exits 0. The session ID must come from - the event stream, not from a DB fallback.""" - module = load_tool_module("run_agent_autocorrect_serve", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "check_opencode_version", lambda: None) - monkeypatch.setattr(module, "ROOT", tmp_path) - import sys - monkeypatch.setattr(sys.modules["codecome.cli_render"], "ROOT", tmp_path) - - sys.path.insert(0, str(ROOT / "tools")) - if "codecome" in sys.modules and not hasattr(sys.modules["codecome"], "__path__"): - del sys.modules["codecome"] - import codecome.runner as _runner - - # Reset the attempt counter so transcript numbering is deterministic. - if hasattr(_runner._run_single_attempt, "_attempt_counter"): - delattr(_runner._run_single_attempt, "_attempt_counter") - - calls: list[tuple] = [] - - def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, base_url, auth_token, workspace_dir, **kwargs): - existing_session_id = kwargs.get("existing_session_id") - calls.append((existing_session_id, prompt)) - # Both attempts succeed with the same session. - return ( - 0, - "ses_test_abc", - module.RunResult( - any_step_finish_seen=True, - step_finish_count=1, - last_finish_reason="stop", - last_finish_tokens={}, - last_permission_error=None, - ), - tmp_path / f"transcript-{len(calls)}.jsonl", - ) - - monkeypatch.setattr(_runner, "_run_single_attempt", fake_run_single_attempt) - - frontmatter_call_count = [0] - - class FakeResult: - def __init__(self, rc, out="", err=""): - self.returncode, self.stdout, self.stderr = rc, out, err - - def fake_run(cmd, *args, **kwargs): - if "--version" in cmd: - return FakeResult(0, out="opencode 1.15.0\n") - if any("check-frontmatter" in str(c) for c in cmd): - frontmatter_call_count[0] += 1 - if frontmatter_call_count[0] == 1: - return FakeResult(1, err="bad frontmatter") - return FakeResult(0) - return FakeResult(0) - - monkeypatch.setattr(module.subprocess, "run", fake_run) - - prompt_file = tmp_path / "phase.md" - prompt_file.write_text("run recon", encoding="utf-8") - monkeypatch.setattr(module.sys, "argv", [ - "run-agent.py", "--phase", "1", "--label", "test", - "--agent", "recon", "--prompt-file", str(prompt_file), - ]) - - rc = module.main() - assert rc == 0 - assert len(calls) == 2, f"expected 2 attempts, got {len(calls)}" - # First attempt is a fresh session; second reuses the same session ID. - assert calls[0][0] is None - assert calls[1][0] == "ses_test_abc" - # The second prompt should be the frontmatter repair prompt. - assert "Repair only the reported YAML/frontmatter issues" in calls[1][1] - - -@pytest.mark.component -def test_frontmatter_failure_without_session_id_exits_nonzero(monkeypatch, tmp_path): - """Frontmatter validation failures must not be reported as success when - the wrapper cannot determine a resumable session ID.""" - module = load_tool_module("run_agent_frontmatter_no_session_serve", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "check_opencode_version", lambda: None) - monkeypatch.setattr(module, "ROOT", tmp_path) - import sys - monkeypatch.setattr(sys.modules["codecome.cli_render"], "ROOT", tmp_path) - - sys.path.insert(0, str(ROOT / "tools")) - if "codecome" in sys.modules and not hasattr(sys.modules["codecome"], "__path__"): - del sys.modules["codecome"] - import codecome.runner as _runner - - if hasattr(_runner._run_single_attempt, "_attempt_counter"): - delattr(_runner._run_single_attempt, "_attempt_counter") - - def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, base_url, auth_token, workspace_dir, **kwargs): - return ( - 0, - "", # empty session ID - module.RunResult( - any_step_finish_seen=True, - step_finish_count=1, - last_finish_reason="stop", - ), - tmp_path / "transcript.jsonl", - ) - - monkeypatch.setattr(_runner, "_run_single_attempt", fake_run_single_attempt) - - class FakeResult: - def __init__(self, rc, out="", err=""): - self.returncode, self.stdout, self.stderr = rc, out, err - - def fake_run(cmd, *args, **kwargs): - if "--version" in cmd: - return FakeResult(0, out="opencode 1.15.0\n") - if any("check-frontmatter" in str(c) for c in cmd): - return FakeResult(1, err="bad frontmatter") - return FakeResult(0) - - monkeypatch.setattr(module.subprocess, "run", fake_run) - - prompt_file = tmp_path / "phase.md" - prompt_file.write_text("run recon", encoding="utf-8") - monkeypatch.setattr(module.sys, "argv", [ - "run-agent.py", "--phase", "1", "--label", "test", - "--agent", "recon", "--prompt-file", str(prompt_file), - ]) - - rc = module.main() - assert rc == 2 - - -@pytest.mark.component -def test_iteration_limit_triggers_auto_resume(monkeypatch, tmp_path): - """When the stream ends with a mid-turn finish reason (tool-calls) and - graceful forgiveness does not apply, run-agent resumes once then exits.""" - module = load_tool_module("run_agent_iter_resume_serve", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "check_opencode_version", lambda: None) - monkeypatch.setattr(module, "ROOT", tmp_path) - import sys - monkeypatch.setattr(sys.modules["codecome.cli_render"], "ROOT", tmp_path) - monkeypatch.setenv("CODECOME_MAX_ITERATION_RETRIES", "1") - - import sys - sys.path.insert(0, str(ROOT / "tools")) - if "codecome" in sys.modules and not hasattr(sys.modules["codecome"], "__path__"): - del sys.modules["codecome"] - import codecome.runner as _runner - - if hasattr(_runner._run_single_attempt, "_attempt_counter"): - delattr(_runner._run_single_attempt, "_attempt_counter") - - calls: list[tuple] = [] - - def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, base_url, auth_token, workspace_dir, **kwargs): - existing_session_id = kwargs.get("existing_session_id") - calls.append((existing_session_id, prompt)) - return ( - 0, - "ses_iter_xyz", - module.RunResult( - any_step_finish_seen=True, - step_finish_count=1, - last_finish_reason="tool-calls", - ), - tmp_path / f"transcript-{len(calls)}.jsonl", - ) - - monkeypatch.setattr(_runner, "_run_single_attempt", fake_run_single_attempt) - monkeypatch.setattr(module, "check_phase_graceful_completion", lambda *a, **kw: False) - - class FakeResult: - def __init__(self, rc, out="", err=""): - self.returncode, self.stdout, self.stderr = rc, out, err - - def fake_run(cmd, *args, **kwargs): - if "--version" in cmd: - return FakeResult(0, out="opencode 1.15.0\n") - if any("check-frontmatter" in str(c) for c in cmd): - return FakeResult(0) - return FakeResult(0) - - monkeypatch.setattr(module.subprocess, "run", fake_run) - - prompt_file = tmp_path / "phase.md" - prompt_file.write_text("run recon for FINDING_PATH_OR_ID", encoding="utf-8") - monkeypatch.setattr(module.sys, "argv", [ - "run-agent.py", "--phase", "4", "--label", "test", - "--agent", "recon", "--prompt-file", str(prompt_file), - "--finding", "CC-9999", - ]) - - rc = module.main() - - # After 1 retry (2 total attempts) the retry budget is exhausted → exit 2 - assert len(calls) == 2, f"expected 2 attempts, got {len(calls)}" - assert rc == 2 - - # Verify the retry reused the same session and included the resume prompt. - assert calls[1][0] == "ses_iter_xyz" - assert "Your previous response was cut off by the model/provider" in calls[1][1] - - -# --------------------------------------------------------------------------- -# check_phase_graceful_completion – mtime-aware artifact detection -# --------------------------------------------------------------------------- - -@pytest.mark.unit -def test_check_phase_graceful_completion_mtime(monkeypatch, tmp_path): - """Graceful completion is only True when the artifact was written during - the current run (st_mtime >= run_start_time).""" - import os - - module = _load_graceful_module() - monkeypatch.setattr(module, "ROOT", tmp_path) - - start = 1_000_000.0 - old = start - 1.0 - fresh = start + 1.0 - - # ---- Phase 1 ---- - notes = tmp_path / "itemdb" / "notes" - notes.mkdir(parents=True) - phase1_files = { - name: notes / name for name in module._PHASE1_REQUIRED_ARTIFACT_NAMES - } - sandbox_generated = tmp_path / "sandbox" / "CODECOME-GENERATED.md" - sandbox_generated.parent.mkdir(parents=True) - - # missing files - assert module.check_phase_graceful_completion("1", None, start) is False - - for path in phase1_files.values(): - path.write_text("x") - os.utime(path, (old, old)) - assert module.check_phase_graceful_completion("1", None, start) is False - - sandbox_generated.write_text("x") - os.utime(sandbox_generated, (old, old)) - os.utime(phase1_files["target-profile.md"], (fresh, fresh)) - assert module.check_phase_graceful_completion("1", None, start) is False - - os.utime(sandbox_generated, (fresh, fresh)) - assert module.check_phase_graceful_completion("1", None, start) is True - - # ---- Phase 2 ---- - pending = tmp_path / "itemdb" / "findings" / "PENDING" - pending.mkdir(parents=True) - f2 = pending / "CC-0001.md"; f2.write_text("x") - os.utime(f2, (old, old)) - assert module.check_phase_graceful_completion("2", None, start) is False - os.utime(f2, (fresh, fresh)) - assert module.check_phase_graceful_completion("2", None, start) is True - - # ---- Phase 3: touches any finding in any status dir ---- - confirmed = tmp_path / "itemdb" / "findings" / "CONFIRMED" - confirmed.mkdir(parents=True) - f3 = confirmed / "CC-0002.md"; f3.write_text("x") - os.utime(f3, (old, old)) - # Phase 2 file is still fresh but phase 3 should check all dirs - assert module.check_phase_graceful_completion("3", None, start) is True - # Make the phase 2 file old too - os.utime(f2, (old, old)) - assert module.check_phase_graceful_completion("3", None, start) is False - os.utime(f3, (fresh, fresh)) - assert module.check_phase_graceful_completion("3", None, start) is True - - # ---- Phase 5: NOT_FEASIBLE fallback (CONFIRMED finding with frontmatter) ---- - conf5 = confirmed / "CC-0005.md" - conf5.write_text( - "---\n" - "status: CONFIRMED\n" - "exploitation:\n" - " status: NOT_FEASIBLE\n" - "---\n" - ) - os.utime(conf5, (old, old)) - assert module.check_phase_graceful_completion("5", "CC-0005", start) is False - os.utime(conf5, (fresh, fresh)) - assert module.check_phase_graceful_completion("5", "CC-0005", start) is True - - # ---- Phase 5: EXPLOITED path requires frontmatter + exploit artifacts ---- - # Age the NOT_FEASIBLE fallback so it no longer matches. - os.utime(conf5, (old, old)) - exploited_dir = tmp_path / "itemdb" / "findings" / "EXPLOITED" - exploited_dir.mkdir(parents=True) - exp5 = exploited_dir / "CC-0005.md" - exp5.write_text( - "---\n" - "status: EXPLOITED\n" - "exploitation:\n" - " status: COMPLETED\n" - "---\n" - ) - os.utime(exp5, (old, old)) - # Still False: no fresh exploit artifacts - assert module.check_phase_graceful_completion("5", "CC-0005", start) is False - - exploits = tmp_path / "itemdb" / "evidence" / "CC-0005" / "exploits" - exploits.mkdir(parents=True) - xf = exploits / "exploit.py" - xf.write_text("x") - os.utime(xf, (fresh, fresh)) - os.utime(exp5, (fresh, fresh)) - assert module.check_phase_graceful_completion("5", "CC-0005", start) is True - - # ---- Phase 6 ---- - reports = tmp_path / "itemdb" / "reports" - reports.mkdir(parents=True) - rpt = reports / "report.md"; rpt.write_text("x") - os.utime(rpt, (old, old)) - assert module.check_phase_graceful_completion("6", None, start) is False - os.utime(rpt, (fresh, fresh)) - assert module.check_phase_graceful_completion("6", None, start) is True - - -@pytest.mark.unit -def test_stream_session_id_and_step_finish_count(monkeypatch, tmp_path): - """Verify that the main loop captures sessionID and step_finish count - from the RunResult returned by _run_single_attempt.""" - module = load_tool_module("run_agent_stream_tracking_serve", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "check_opencode_version", lambda: None) - monkeypatch.setattr(module, "ROOT", tmp_path) - import sys - monkeypatch.setattr(sys.modules["codecome.cli_render"], "ROOT", tmp_path) - - sys.path.insert(0, str(ROOT / "tools")) - if "codecome" in sys.modules and not hasattr(sys.modules["codecome"], "__path__"): - del sys.modules["codecome"] - import codecome.runner as _runner - - if hasattr(_runner._run_single_attempt, "_attempt_counter"): - delattr(_runner._run_single_attempt, "_attempt_counter") - - def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, base_url, auth_token, workspace_dir, **kwargs): - return ( - 0, - "ses_stream_test_001", - module.RunResult( - any_step_finish_seen=True, - step_finish_count=3, - last_finish_reason="stop", - last_finish_tokens={"input": 10, "output": 20}, - ), - tmp_path / "transcript.jsonl", - ) - - monkeypatch.setattr(_runner, "_run_single_attempt", fake_run_single_attempt) - - class FakeResult: - def __init__(self, rc, out="", err=""): - self.returncode, self.stdout, self.stderr = rc, out, err - - def fake_run(cmd, *args, **kwargs): - if "--version" in cmd: - return FakeResult(0, out="opencode 1.15.0\n") - if any("check-frontmatter" in str(c) for c in cmd): - return FakeResult(0) - return FakeResult(0) - - monkeypatch.setattr(module.subprocess, "run", fake_run) - - prompt_file = tmp_path / "phase.md" - prompt_file.write_text("run recon", encoding="utf-8") - monkeypatch.setattr(module.sys, "argv", [ - "run-agent.py", "--phase", "1", "--label", "test", - "--agent", "recon", "--prompt-file", str(prompt_file), - ]) - - rc = module.main() - assert rc == 0 - - # The session terminated with 'stop', no frontmatter errors → single attempt - # (We cannot introspect the loop variables directly, but the clean exit - # with rc=0 proves the RunResult signals were consumed correctly.) - - -@pytest.mark.unit -def test_render_event_fallback_to_unknown_renderer(monkeypatch): - """render_event falls back to UnknownEventRenderer for unregistered event types - without raising NameError.""" - module = load_tool_module("run_agent_unknown_fallback", "tools/run-agent.py") - ctx = module._get_rendering_ctx(None) - renderers = getattr(ctx, "_renderers", {}) - - # Ensure the "unknown" key is absent so the fallback path is triggered. - renderers.pop("unknown", None) - renderers.pop("some.unregistered.event", None) - - # Should not raise NameError. - module.render_event( - None, "2", "x", - {"type": "some.unregistered.event", "properties": {"foo": "bar"}} - ) - - -@pytest.mark.component -def test_first_attempt_failure_prints_finish_warning(monkeypatch, tmp_path): - """When _run_single_attempt returns non-zero on the very first iteration, - main() should not raise UnboundLocalError for finish_warning.""" - module = load_tool_module("run_agent_first_fail", "tools/run-agent.py") - monkeypatch.setattr(module, "HAVE_RICH", False) - monkeypatch.setattr(module, "check_opencode_version", lambda: None) - monkeypatch.setattr(module, "ROOT", tmp_path) - import sys - monkeypatch.setattr(sys.modules["codecome.cli_render"], "ROOT", tmp_path) - - sys.path.insert(0, str(ROOT / "tools")) - if "codecome" in sys.modules and not hasattr(sys.modules["codecome"], "__path__"): - del sys.modules["codecome"] - import codecome.runner as _runner - - if hasattr(_runner._run_single_attempt, "_attempt_counter"): - delattr(_runner._run_single_attempt, "_attempt_counter") - - def fake_run_single_attempt(args, console, prompt, model, variant, thinking_on, base_url, auth_token, workspace_dir, **kwargs): - return ( - 1, # non-zero return code on first attempt - "", - module.RunResult( - any_step_finish_seen=False, - step_finish_count=0, - last_finish_reason=None, - last_finish_tokens={}, - last_permission_error=None, - ), - tmp_path / "transcript.jsonl", - ) - - monkeypatch.setattr(_runner, "_run_single_attempt", fake_run_single_attempt) - - class FakeResult: - def __init__(self, rc, out="", err=""): - self.returncode, self.stdout, self.stderr = rc, out, err - - def fake_run(cmd, *args, **kwargs): - if "--version" in cmd: - return FakeResult(0, out="opencode 1.15.0\n") - return FakeResult(0) - - monkeypatch.setattr(module.subprocess, "run", fake_run) - - prompt_file = tmp_path / "phase.md" - prompt_file.write_text("run recon", encoding="utf-8") - monkeypatch.setattr(module.sys, "argv", [ - "run-agent.py", "--phase", "1", "--label", "test", - "--agent", "recon", "--prompt-file", str(prompt_file), - ]) - - rc = module.main() - assert rc == 1 diff --git a/tools/chat/app.py b/tools/chat/app.py index 197ff23..63d51e6 100644 --- a/tools/chat/app.py +++ b/tools/chat/app.py @@ -28,7 +28,7 @@ # TODO(phase-a6): Break this circular dependency. chat.app should not # dynamically import run-agent; render_event should be injected as a # constructor dependency once the events/renderer refactor lands. -_run_agent = _importlib.import_module("run-agent") +_run_agent = _importlib.import_module("codecome.cli_render") render_event = _run_agent.render_event # --------------------------------------------------------------------------- diff --git a/tools/run-agent.py b/tools/run-agent.py index c6bccd1..dc3a132 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -1,4256 +1,12 @@ #!/usr/bin/env python3 -# Copyright (C) 2025-2026 Pablo Ruiz García -# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later - -""" -Structured wrapper around `opencode serve` HTTP+SSE API for CodeCome phase targets. - -Minimum supported OpenCode version: 1.14.50 -""" - from __future__ import annotations -import argparse -import dataclasses -import difflib -import json -import os -import re -import shlex -import signal -import subprocess import sys -import threading -import time -import traceback -import urllib.error -import urllib.request -from collections import OrderedDict -from dataclasses import dataclass -from functools import lru_cache from pathlib import Path -from typing import Any sys.path.insert(0, str(Path(__file__).resolve().parent)) -# When this module runs as __main__, alias it so sibling tools can import -# it as 'run-agent' (the hyphenated filename) without a duplicate load. -if __name__ == "__main__": - sys.modules.setdefault("run-agent", sys.modules["__main__"]) - -import _colors as C -from codecome.cli import main, build_parser -from codecome.cli_render import ( - HAVE_RICH, Console, Group, JSON, Markdown, Panel, Rule, Text, - build_console, _get_rendering_ctx, _emit_fatal_error, render_event, - _FINISH_TERMINAL_OK, _FINISH_MID_TURN, _FINISH_FAILURE, - ROOT, -) -from opencode.serve import ServerRunner, ServerRunnerError -from events import EventLoop, RunResult -from codecome.version import check_opencode_version, MINIMUM_OPENCODE_VERSION as _MINIMUM_OPENCODE_VERSION -from codecome.config import ( - truthy_env, resolve_color_mode, load_prompt, - resolve_model_and_variant, resolve_runtime_model_for_banner, - resolve_thinking_decision, show_model_table, -) -from codecome.session import create_session, create_chat_session, send_prompt_to_session -from codecome.graceful import ( - check_phase_graceful_completion, - phase_checklist_lines, build_phase_resume_prompt, - build_frontmatter_resume_prompt, build_resume_command, -) -from codecome.transcript import open_phase_transcript, open_chat_transcript, close_transcript - -# --------------------------------------------------------------------------- -# Chat debug logging (--debug with --chat writes to tmp/chat-debug-.log) -# --------------------------------------------------------------------------- - -_CHAT_DEBUG_FP: Any = None - - -def _chat_debug(msg: str) -> None: - """Write a debug message if chat debug logging is active.""" - global _CHAT_DEBUG_FP - if _CHAT_DEBUG_FP is None: - return - import threading as _threading - _elapsed = time.time() - _CHAT_DEBUG_FP.start_time # type: ignore[attr-defined] - _thread = _threading.current_thread().name - _line = f"[{_elapsed:07.3f}s] [{_thread}] {msg}\n" - _CHAT_DEBUG_FP.write(_line) # type: ignore[union-attr] - _CHAT_DEBUG_FP.flush() # type: ignore[union-attr] - - -def _setup_chat_debug() -> None: - """Open tmp/chat-debug--.log for chat diagnostic logging.""" - global _CHAT_DEBUG_FP - _stamp = time.strftime("%Y%m%d-%H%M%S") - log_dir = ROOT / "tmp" - log_dir.mkdir(parents=True, exist_ok=True) - log_path = log_dir / f"chat-debug-{os.getpid()}-{_stamp}.log" - _CHAT_DEBUG_FP = log_path.open("a", buffering=1) - _CHAT_DEBUG_FP.start_time = time.time() # type: ignore[attr-defined] - _chat_debug(f"debug log opened: {log_path}") - print(f"[chat-debug] writing diagnostics to {log_path}", file=sys.stderr) - - -def _close_chat_debug() -> None: - """Close the chat debug log if open.""" - global _CHAT_DEBUG_FP - if _CHAT_DEBUG_FP is not None: - _chat_debug("debug log closing") - _CHAT_DEBUG_FP.close() - _CHAT_DEBUG_FP = None - - -def format_tokens(tokens: dict[str, Any]) -> str: - if not isinstance(tokens, dict): - return "" - - parts = [] - for key in ("input", "output", "reasoning", "total"): - value = tokens.get(key) - if value is not None: - parts.append(f"{key}={value}") - return ", ".join(parts) - - -# --- Todo rendering helpers --------------------------------------------------- - -_TODO_STATUS_ICONS = { - "completed": "\u2714", # ✔ - "in_progress": "\u25cf", # ● - "pending": "\u25cb", # ○ - "cancelled": "\u2716", # ✖ -} - -_TODO_STATUS_ASCII = { - "completed": "[x]", - "in_progress": "[~]", - "pending": "[ ]", - "cancelled": "[-]", -} - -_TODO_PRIORITY_LETTERS = { - "high": "H", - "medium": "M", - "low": "L", -} - - -def extract_todos(state: dict[str, Any]) -> list[dict[str, str]] | None: - """Extract a todo list from a todowrite tool state, or None if unrecognized.""" - output = state.get("output") - if isinstance(output, list): - items = output - else: - input_data = state.get("input") - if isinstance(input_data, dict) and isinstance(input_data.get("todos"), list): - items = input_data["todos"] - else: - return None - - result: list[dict[str, str]] = [] - for item in items: - if not isinstance(item, dict): - return None - result.append({ - "content": str(item.get("content", "")), - "status": str(item.get("status", "?")), - "priority": str(item.get("priority", "?")), - }) - return result - - -def _todo_summary(todos: list[dict[str, str]]) -> str: - from collections import Counter - counts = Counter(t["status"] for t in todos) - parts = [f"{len(todos)} tasks"] - for status in ("completed", "in_progress", "pending", "cancelled"): - count = counts.get(status, 0) - if count > 0: - label = status.replace("_", " ") - parts.append(f"{count} {label}") - return " \u00b7 ".join(parts) - - -def _todo_border_style(todos: list[dict[str, str]]) -> str: - statuses = {t["status"] for t in todos} - if statuses == {"completed"}: - return "green" - if "in_progress" in statuses: - return "yellow" - return "dim" - - -def render_todowrite_rich(console: Console, state: dict[str, Any]) -> bool: - """Render a todowrite tool call as a rich panel. Returns True if rendered.""" - todos = extract_todos(state) - if todos is None: - return False - - if not todos: - console.print(Panel(Text("No todos.", style="dim"), title="Todos", border_style="dim", expand=True)) - return True - - from rich.table import Table - - summary = Text(_todo_summary(todos)) - - table = Table(show_header=False, show_edge=False, padding=(0, 1), expand=True) - table.add_column(width=2, no_wrap=True) # status icon - table.add_column(width=1, no_wrap=True) # priority - table.add_column(ratio=1) # content - - status_styles = { - "completed": "bold green", - "in_progress": "yellow", - "pending": "dim", - "cancelled": "dim strike", - } - priority_styles = { - "high": "red", - "medium": "yellow", - "low": "dim", - } - - for todo in todos: - status = todo["status"] - priority = todo["priority"] - - icon = _TODO_STATUS_ICONS.get(status, "?") - icon_style = status_styles.get(status, "dim") - - pri_letter = _TODO_PRIORITY_LETTERS.get(priority, "?") - pri_style = priority_styles.get(priority, "dim") - - table.add_row( - Text(icon, style=icon_style), - Text(pri_letter, style=pri_style), - Text(todo["content"], style=status_styles.get(status, "")), - ) - - body = Group(summary, Text(), table) - border = _todo_border_style(todos) - console.print(Panel(body, title="Todos", border_style=border, expand=True)) - return True - - -def render_todowrite_plain(state: dict[str, Any]) -> bool: - """Render a todowrite tool call in plain ASCII. Returns True if rendered.""" - todos = extract_todos(state) - if todos is None: - return False - - print(C.header("todos")) - if not todos: - print(" No todos.") - return True - - print(f" {_todo_summary(todos)}") - for todo in todos: - status = todo["status"] - priority = todo["priority"] - checkbox = _TODO_STATUS_ASCII.get(status, "[?]") - pri_letter = _TODO_PRIORITY_LETTERS.get(priority, "?") - content = todo["content"].replace("\n", " ") - print(f" {checkbox} {pri_letter} {content}") - return True - - -# --- Permission-error renderer ------------------------------------------------ - -def render_permission_error_rich(console: Console, message: str) -> None: - """Draw a bold red panel when a tool permission is auto-rejected.""" - console.print( - Panel( - Text(message, style="bold red"), - title="Permission Denied", - border_style="red", - expand=True, - ) - ) - - -def render_permission_error_plain(message: str) -> None: - print(C.fail("Permission Denied")) - print(C.fail(f" {message}")) - - -# --- Shared helper utilities -------------------------------------------------- - -_SNAPSHOT_CACHE: OrderedDict[str, tuple[str, float]] = OrderedDict() -_SNAPSHOT_CACHE_CAP = int(os.environ.get("CODECOME_WRITE_CACHE_CAP", "200")) -_WRITE_CACHE_ENABLED = os.environ.get("CODECOME_WRITE_CACHE", "1") not in ("0", "false", "False", "no") - -_READ_DISPLAY_LINES = int(os.environ.get("CODECOME_READ_DISPLAY_LINES", "10")) -_WRITE_CONTENT_LINES = int(os.environ.get("CODECOME_WRITE_CONTENT_LINES", "25")) -_WRITE_DIFF_LIMIT = int(os.environ.get("CODECOME_WRITE_DIFF_LIMIT", "50")) -_EDIT_DIFF_LINES = int(os.environ.get("CODECOME_EDIT_DIFF_LINES", "25")) -_READ_HIGHLIGHT_LIMIT = int(os.environ.get("CODECOME_READ_HIGHLIGHT_LIMIT", str(200 * 1024))) -_GLOB_MATCH_CAP = int(os.environ.get("CODECOME_GLOB_MATCH_CAP", "10")) - -# Lines that look like OpenCode summary/status rather than actual file paths. -# Examples: "0 for '*.md'", "3 match(es)", "No matches found". -_GLOB_SUMMARY_LINE_RE = re.compile( - r"^\d+\s+(?:for\s|match)" # "0 for '*.md'" or "3 match(es)" - r"|^No\s+matches?\s" # "No matches found" - r"|^\d+\s+file" # "0 files" / "3 files found" -) -_APPLY_PATCH_DIFF_LINES = int(os.environ.get("CODECOME_APPLY_PATCH_DIFF_LINES", str(_EDIT_DIFF_LINES))) -_APPLY_PATCH_MAX_FILES = int(os.environ.get("CODECOME_APPLY_PATCH_MAX_FILES", "10")) -_GREP_FILE_CAP = int(os.environ.get("CODECOME_GREP_FILE_CAP", "50")) -_GREP_LINE_CAP_PER_FILE = int(os.environ.get("CODECOME_GREP_LINE_CAP_PER_FILE", "5")) -_GREP_TOTAL_LINE_CAP = int(os.environ.get("CODECOME_GREP_TOTAL_LINE_CAP", "200")) -_GREP_HIGHLIGHT = os.environ.get("CODECOME_GREP_HIGHLIGHT", "1") not in ("0", "false", "False", "no") -_REASONING_MAX_CHARS = int(os.environ.get("CODECOME_REASONING_MAX_CHARS", "4000")) -_RENDER_REASONING = os.environ.get("CODECOME_RENDER_REASONING", "1") not in ("0", "false", "False", "no") -_DEBUG_UNKNOWN_EVENTS = os.environ.get("CODECOME_DEBUG_UNKNOWN_EVENTS", "0") not in ("", "0", "false", "False", "no") -_SANDBOX_RENDER = os.environ.get("CODECOME_SANDBOX_RENDER", "1") not in ("0", "false", "False", "no") -_SANDBOX_VALIDATE_STDERR_LINES = int(os.environ.get("CODECOME_SANDBOX_VALIDATE_STDERR_LINES", "20")) -_SANDBOX_FILES_CAP = int(os.environ.get("CODECOME_SANDBOX_FILES_CAP", "15")) -_BASH_SHIM_RENDER = os.environ.get("CODECOME_BASH_SHIM_RENDER", "1") not in ("0", "false", "False", "no") -_BASH_SHIM_LS_STRIP_LONG_FORMAT = os.environ.get("CODECOME_BASH_SHIM_LS_STRIP_LONG_FORMAT", "1") not in ("0", "false", "False", "no") -_INTERNAL_READ_SUPPRESS = os.environ.get("CODECOME_INTERNAL_READ_SUPPRESS", "1") not in ("0", "false", "False", "no") - -# --- Subagent visibility tunables -------------------------------------------- -_SUBAGENT_HEARTBEAT_INTERVAL_S = int(os.environ.get("CODECOME_SUBAGENT_HEARTBEAT_INTERVAL_S", "30")) -_SUBAGENT_UPDATE_THROTTLE_S = int(os.environ.get("CODECOME_SUBAGENT_UPDATE_THROTTLE_S", "5")) -_TASK_PROMPT_PREVIEW_LINES = int(os.environ.get("CODECOME_TASK_PROMPT_PREVIEW_LINES", "5")) -_RENDER_SUBAGENT_UPDATES = os.environ.get("CODECOME_RENDER_SUBAGENT_UPDATES", "1") not in ("0", "false", "False", "no") - -# Per-session deduplication state for subagent update events. -_SUBAGENT_LAST_STATE: dict[str, tuple[dict[str, Any], float]] = {} - - -_READ_FILE_FRAMING_RE = re.compile( - r"(?P.*?)\s*" - r"(?P.*?)\s*" - r"\s*\n(?P.*?)\n\s*", - re.DOTALL, -) -_READ_DIR_FRAMING_RE = re.compile( - r"(?P.*?)\s*" - r"directory\s*" - r"\s*\n(?P.*?)\n\s*", - re.DOTALL, -) -_READ_SUMMARY_RE = re.compile( - r"\((?:End of file|Showing lines|Buffer has more lines)[^\)]*\)\s*$", - re.MULTILINE, -) - -_LEXER_MAP = { - ".c": "c", ".h": "c", ".cpp": "cpp", ".cc": "cpp", ".cxx": "cpp", - ".hpp": "cpp", ".hh": "cpp", ".cs": "csharp", ".java": "java", - ".erl": "erlang", ".hrl": "erlang", ".app.src": "erlang", ".config": "erlang", - ".ex": "elixir", ".exs": "elixir", ".py": "python", ".rb": "ruby", - ".rs": "rust", ".go": "go", - ".js": "javascript", ".ts": "typescript", ".tsx": "tsx", ".jsx": "jsx", - ".sh": "bash", ".bash": "bash", ".zsh": "bash", - ".yml": "yaml", ".yaml": "yaml", ".json": "json", ".toml": "toml", - ".xml": "xml", ".html": "html", ".css": "css", ".sql": "sql", - ".md": "markdown", ".mk": "make", ".cmake": "cmake", - ".dockerfile": "docker", ".tf": "hcl", ".hcl": "hcl", -} - - -def _relativize_path(path: str) -> str: - try: - return str(Path(path).relative_to(ROOT)) - except ValueError: - return path - - -_FINDING_FILENAME_RE = re.compile(r"^(CC-\d{4,})-(.+)\.md$") -_ROOT_WORKSPACE_DOCS = {"AGENTS.md", "README.md"} -_ROOT_WORKSPACE_CONFIGS = {"codecome.yml"} - - -def _classify_internal_read(rel_path: str) -> str | None: - """Return a description for a suppressible internal read, or None. - - rel_path is repo-relative. Absolute paths (outside the repo) return None. - """ - if not rel_path or os.path.isabs(rel_path): - return None - - parts = Path(rel_path).parts - if not parts: - return None - - # Root-level workspace docs and config - if len(parts) == 1: - name = parts[0] - if name in _ROOT_WORKSPACE_DOCS: - return f"reading workspace doc: {name}" - if name in _ROOT_WORKSPACE_CONFIGS: - return f"reading workspace config: {name}" - return None - - # .opencode/... - if parts[0] == ".opencode": - if len(parts) >= 3 and parts[1] == "agents": - agent_name = Path(parts[2]).stem - return f"loading agent: {agent_name}" - if len(parts) >= 3 and parts[1] == "skills": - skill_name = parts[2] - if len(parts) == 4 and parts[3] == "SKILL.md": - return f"loading skill: {skill_name}" - if len(parts) >= 4: - rest = "/".join(parts[3:]) - return f"loading skill resource: {skill_name}/{rest}" - return f"loading skill: {skill_name}" - return f"loading opencode config: {rel_path}" - - # itemdb/... - if parts[0] == "itemdb": - if len(parts) >= 4 and parts[1] == "findings": - status = parts[2] - filename = parts[3] - m = _FINDING_FILENAME_RE.match(filename) - if m: - return f"reading finding: {m.group(1)} [{status}] - {m.group(2)}" - return f"reading itemdb file: {rel_path}" - if len(parts) >= 3 and parts[1] == "notes": - return f"reading note: {parts[2]}" - if len(parts) >= 3 and parts[1] == "evidence": - rest = "/".join(parts[2:]) - return f"reading evidence: {rest}" - if len(parts) >= 3 and parts[1] == "reports": - return f"reading report: {parts[2]}" - if len(parts) == 2 and parts[1] == "index.md": - return "reading items index" - return f"reading itemdb file: {rel_path}" - - # runs/.md - if parts[0] == "runs" and len(parts) >= 2: - return f"reading run summary: {parts[1]}" - - # templates/ - if parts[0] == "templates" and len(parts) >= 2: - return f"reading template: {parts[1]}" - - return None - - -def _strip_read_framing(output: str) -> tuple[str, Any, str | None]: - """Parse OpenCode read tool output. - - Returns a 3-tuple: - - kind: "file" | "directory" | "unknown" - - payload: str (file body) | list[str] (directory entries) | None - - footer: the trailing summary/entries-count line, or None - """ - # Try file framing - m = _READ_FILE_FRAMING_RE.search(output) - if m: - body = m.group("content") - # Separate trailing summary line from body - summary_m = _READ_SUMMARY_RE.search(body) - if summary_m: - footer = summary_m.group(0).strip() - body = body[:summary_m.start()].rstrip() - else: - footer = None - return "file", body, footer - - # Try directory framing - d = _READ_DIR_FRAMING_RE.search(output) - if d: - raw_entries = d.group("entries") - entries = [] - footer = None - for line in raw_entries.split("\n"): - line = line.strip() - if not line: - continue - # The "(N entries)" summary is the footer - if line.startswith("(") and "entries" in line and line.endswith(")"): - footer = line - else: - entries.append(line) - return "directory", entries, footer - - return "unknown", None, None - - -def _count_lines_and_bytes(text: str) -> tuple[int, int]: - return text.count("\n") + (1 if text and not text.endswith("\n") else 0), len(text.encode("utf-8", errors="replace")) - - -def _detect_lexer(path: str) -> str: - ext = Path(path).suffix.lower() - if Path(path).name.lower() == "makefile": - return "make" - if Path(path).name.lower() == "dockerfile": - return "docker" - return _LEXER_MAP.get(ext, "text") - - -def _format_excerpt(text: str, max_lines: int) -> tuple[str, int]: - lines = text.split("\n") - if len(lines) <= max_lines: - return text, 0 - return "\n".join(lines[:max_lines]), len(lines) - max_lines - - -def _strip_line_numbers(text: str) -> str: - """Remove OpenCode line-number prefixes like ' 1: '.""" - raw_lines = [] - for line in text.split("\n"): - colon_idx = line.find(": ") - if colon_idx >= 0 and colon_idx <= 6 and line[:colon_idx].strip().isdigit(): - raw_lines.append(line[colon_idx + 2:]) - else: - raw_lines.append(line) - return "\n".join(raw_lines) - - -def _render_truncated_body_rich( - console: Console, - sections: list[Any], - body: str, - cap: int, - lexer: str, - footer: str | None, -) -> None: - """Append Syntax block (capped), '... K more lines', and footer to sections.""" - from rich.syntax import Syntax - - body_lines = body.split("\n") - total = len(body_lines) - visible_lines = body_lines[:cap] - leftover = max(0, total - cap) - - visible = "\n".join(visible_lines) - if len(visible.encode("utf-8", errors="replace")) > _READ_HIGHLIGHT_LIMIT: - sections.append(Text(visible)) - else: - sections.append(Syntax(visible, lexer, theme="monokai", line_numbers=True, word_wrap=True)) - - if leftover > 0: - sections.append(Text(f"... {leftover} more lines", style="dim")) - if footer: - sections.append(Text(footer, style="dim")) - - -def _render_truncated_body_plain( - body: str, - cap: int, - footer: str | None, -) -> None: - """Print body lines (capped), '... K more lines', and footer.""" - body_lines = body.split("\n") - total = len(body_lines) - for line in body_lines[:cap]: - print(line) - leftover = max(0, total - cap) - if leftover > 0: - print(f" ... {leftover} more lines") - if footer: - print(f" {footer}") - - -def _is_likely_error(text: str) -> bool: - lower = text.lower() - return any(marker in lower for marker in ( - "error", "traceback", "command not found", "failed", "permission denied", - "no such file", "exception", - )) - - -def _compute_diff(old: str, new: str, context: int = 3) -> list[str]: - old_lines = old.splitlines(keepends=True) - new_lines = new.splitlines(keepends=True) - return list(difflib.unified_diff(old_lines, new_lines, fromfile="old", tofile="new", n=context)) - - -def _truncate_diff(diff_lines: list[str], max_lines: int) -> tuple[list[str], int]: - if len(diff_lines) <= max_lines: - return diff_lines, 0 - return diff_lines[:max_lines], len(diff_lines) - max_lines - - -def _current_mtime(path: str) -> float | None: - try: - return os.stat(path).st_mtime - except OSError: - return None - - -def _cache_set(path: str, content: str) -> None: - if not _WRITE_CACHE_ENABLED: - return - mtime = _current_mtime(path) - if mtime is None: - return - _SNAPSHOT_CACHE[path] = (content, mtime) - _SNAPSHOT_CACHE.move_to_end(path) - while len(_SNAPSHOT_CACHE) > _SNAPSHOT_CACHE_CAP: - _SNAPSHOT_CACHE.popitem(last=False) - - -def _cache_get(path: str) -> str | None: - if not _WRITE_CACHE_ENABLED: - return None - entry = _SNAPSHOT_CACHE.get(path) - if entry is None: - return None - content, recorded_mtime = entry - return content - - -def _cache_invalidate_stale() -> None: - if not _WRITE_CACHE_ENABLED: - return - stale = [] - for path, (_, recorded_mtime) in _SNAPSHOT_CACHE.items(): - actual = _current_mtime(path) - # If the file no longer exists (actual is None), remove from cache - # to prevent stale diffs on re-creation. - # If the file was modified since we cached it, remove from cache - # so the next diff uses current disk state. - if actual is None or actual != recorded_mtime: - stale.append(path) - for path in stale: - del _SNAPSHOT_CACHE[path] - - -# --- Read renderer ------------------------------------------------------------ - -def render_read_rich(console: Console, state: dict[str, Any]) -> bool: - inp = state.get("input") - output = state.get("output") - if not isinstance(inp, dict) or not isinstance(output, str): - return False - - file_path = str(inp.get("filePath", "")) - if not file_path: - return False - - rel_path = _relativize_path(file_path) - offset = inp.get("offset") - limit = inp.get("limit") - - border = "green" if state.get("status") == "completed" else "yellow" - sections: list[Any] = [Text(rel_path, style="bold cyan")] - if offset is not None and limit is not None: - sections.append(Text(f"lines {offset}..{offset + limit - 1}", style="dim")) - - kind, payload, footer = _strip_read_framing(output) - - if kind == "unknown": - if _is_likely_error(output): - sections.append(Text()) - sections.append(Text(output.strip(), style="red")) - console.print(Panel(Group(*sections), title="Read", border_style="red", expand=True)) - else: - return False - return True - - sections.append(Text()) - - if kind == "file": - body = str(payload).strip() - raw_body = _strip_line_numbers(body) - # Cache the full body before considering display suppression so - # subsequent write/edit diffs always have a baseline. - _cache_set(file_path, raw_body) - - # Display suppression for internal workspace files. - if _INTERNAL_READ_SUPPRESS: - description = _classify_internal_read(rel_path) - if description is not None: - is_partial = offset is not None or limit is not None - if is_partial: - description = f"{description} (partial)" - # Build a fresh sections list for the suppressed panel: - # path header + dim italic description, no body. - suppressed: list[Any] = [Text(rel_path, style="bold cyan")] - suppressed.append(Text(description, style="dim italic")) - console.print(Panel(Group(*suppressed), title="Read", border_style=border, expand=True)) - return True - - if not body: - sections.append(Text("(empty file)", style="dim")) - else: - lexer = _detect_lexer(file_path) - _render_truncated_body_rich(console, sections, raw_body, _READ_DISPLAY_LINES, lexer, footer) - - elif kind == "directory": - entries = payload if isinstance(payload, list) else [] - for entry in entries: - if entry.endswith("/"): - sections.append(Text(f" {entry}", style="bold blue")) - else: - sections.append(Text(f" {entry}")) - if footer: - sections.append(Text(footer, style="dim")) - - console.print(Panel(Group(*sections), title="Read", border_style=border, expand=True)) - return True - - -def render_read_plain(state: dict[str, Any]) -> bool: - inp = state.get("input") - output = state.get("output") - if not isinstance(inp, dict) or not isinstance(output, str): - return False - - file_path = str(inp.get("filePath", "")) - if not file_path: - return False - - rel_path = _relativize_path(file_path) - offset = inp.get("offset") - limit = inp.get("limit") - - kind, payload, footer = _strip_read_framing(output) - - if kind == "file": - body = str(payload).strip() - raw_body = _strip_line_numbers(body) - _cache_set(file_path, raw_body) - - if _INTERNAL_READ_SUPPRESS: - description = _classify_internal_read(rel_path) - if description is not None: - is_partial = offset is not None or limit is not None - suffix = " (partial)" if is_partial else "" - print(C.header(f"read [{description}]{suffix}")) - return True - - print(C.header(f"read {rel_path}")) - if offset is not None and limit is not None: - print(f" lines {offset}..{offset + limit - 1}") - _render_truncated_body_plain(raw_body, _READ_DISPLAY_LINES, footer) - return True - - print(C.header(f"read {rel_path}")) - if offset is not None and limit is not None: - print(f" lines {offset}..{offset + limit - 1}") - - if kind == "directory": - entries = payload if isinstance(payload, list) else [] - for entry in entries: - print(f" {entry}") - if footer: - print(f" {footer}") - else: - print(output.strip()) - - return True - - -# --- Write renderer ----------------------------------------------------------- - -def render_write_rich(console: Console, state: dict[str, Any]) -> bool: - inp = state.get("input") - output = state.get("output") - if not isinstance(inp, dict): - return False - - file_path = str(inp.get("filePath", "")) - new_content = str(inp.get("content", "")) - output_str = str(output) if output is not None else "" - - if not file_path: - return False - - from rich.syntax import Syntax - - rel_path = _relativize_path(file_path) - n_lines, n_bytes = _count_lines_and_bytes(new_content) - - is_error = output is not None and not output_str.startswith("Wrote file") - border = "red" if is_error else "green" - - sections: list[Any] = [ - Text(rel_path, style="bold cyan"), - Text(f"{n_lines} lines, {n_bytes} bytes", style="dim"), - ] - - if is_error: - sections.append(Text()) - sections.append(Text(output_str.strip(), style="red")) - console.print(Panel(Group(*sections), title="Write", border_style=border, expand=True)) - return True - - prev = _cache_get(file_path) - lexer = _detect_lexer(file_path) - status_text = output_str.strip() - - if prev is not None: - diff_lines = _compute_diff(prev, new_content) - if not diff_lines: - sections.append(Text("(no changes)", style="dim")) - else: - added = sum(1 for l in diff_lines if l.startswith("+") and not l.startswith("+++")) - removed = sum(1 for l in diff_lines if l.startswith("-") and not l.startswith("---")) - sections.append(Text(f"diff: -{removed} +{added}", style="dim")) - sections.append(Text()) - truncated, leftover = _truncate_diff(diff_lines, _WRITE_DIFF_LIMIT) - diff_text = "".join(truncated) - sections.append(Syntax(diff_text, "diff", theme="monokai", word_wrap=True)) - if leftover > 0: - sections.append(Text(f"... {leftover} more lines", style="dim")) - else: - sections.append(Text("(new file)", style="dim")) - sections.append(Text()) - _render_truncated_body_rich(console, sections, new_content, _WRITE_CONTENT_LINES, lexer, None) - - sections.append(Text()) - sections.append(Text(status_text, style="green" if not is_error else "red")) - - console.print(Panel(Group(*sections), title="Write", border_style=border, expand=True)) - _cache_set(file_path, new_content) - return True - - -def render_write_plain(state: dict[str, Any]) -> bool: - inp = state.get("input") - output = state.get("output") - if not isinstance(inp, dict): - return False - - file_path = str(inp.get("filePath", "")) - new_content = str(inp.get("content", "")) - output_str = str(output) if output is not None else "" - - if not file_path: - return False - - rel_path = _relativize_path(file_path) - n_lines, n_bytes = _count_lines_and_bytes(new_content) - - print(C.header(f"write {rel_path}")) - print(f" {n_lines} lines, {n_bytes} bytes") - - is_error = output is not None and not output_str.startswith("Wrote file") - - if is_error: - print(C.fail(output_str.strip())) - return True - - prev = _cache_get(file_path) - - if prev is not None: - diff_lines = _compute_diff(prev, new_content) - if not diff_lines: - print(" (no changes)") - else: - added = sum(1 for l in diff_lines if l.startswith("+") and not l.startswith("+++")) - removed = sum(1 for l in diff_lines if l.startswith("-") and not l.startswith("---")) - print(f" diff: -{removed} +{added}") - truncated, leftover = _truncate_diff(diff_lines, _WRITE_DIFF_LIMIT) - for line in truncated: - print(f" {line}", end="") - if leftover > 0: - print(f" ... {leftover} more lines") - else: - print(" (new file)") - _render_truncated_body_plain(new_content, _WRITE_CONTENT_LINES, None) - - print(f" {output_str.strip()}") - _cache_set(file_path, new_content) - return True - - -# --- Edit renderer ------------------------------------------------------------ - -def _cache_reread(file_path: str) -> None: - """Invalidate cache for path and re-read from disk.""" - if not _WRITE_CACHE_ENABLED: - return - if file_path in _SNAPSHOT_CACHE: - del _SNAPSHOT_CACHE[file_path] - try: - content = Path(file_path).read_text(encoding="utf-8", errors="replace") - _cache_set(file_path, content) - except OSError: - pass # File gone; cache entry already removed - - -def render_edit_rich(console: Console, state: dict[str, Any]) -> bool: - inp = state.get("input") - output = state.get("output") - if not isinstance(inp, dict): - return False - - file_path = str(inp.get("filePath", "")) - old_string = inp.get("oldString") - new_string = inp.get("newString") - replace_all = bool(inp.get("replaceAll", False)) - - if not file_path or old_string is None or new_string is None: - return False - - from rich.syntax import Syntax - - rel_path = _relativize_path(file_path) - output_str = str(output) if output is not None else "" - is_error = _is_likely_error(output_str) or (output is not None and "successfully" not in output_str.lower() and "applied" not in output_str.lower()) - border = "red" if is_error else "green" - scope = "replace all" if replace_all else "replace 1 occurrence" - - sections: list[Any] = [ - Text(rel_path, style="bold cyan"), - Text(scope, style="dim"), - Text(), - ] - - diff_lines = _compute_diff(str(old_string), str(new_string)) - if not diff_lines: - sections.append(Text("(no changes in edit)", style="dim")) - else: - added = sum(1 for l in diff_lines if l.startswith("+") and not l.startswith("+++")) - removed = sum(1 for l in diff_lines if l.startswith("-") and not l.startswith("---")) - sections.append(Text(f"diff: -{removed} +{added}", style="dim")) - sections.append(Text()) - truncated, leftover = _truncate_diff(diff_lines, _EDIT_DIFF_LINES) - diff_text = "".join(truncated) - sections.append(Syntax(diff_text, "diff", theme="monokai", word_wrap=True)) - if leftover > 0: - sections.append(Text(f"... {leftover} more lines", style="dim")) - - sections.append(Text()) - sections.append(Text(output_str.strip(), style="red" if is_error else "green")) - - console.print(Panel(Group(*sections), title="Edit", border_style=border, expand=True)) - - # Re-read cache after edit - if _cache_get(file_path) is not None or file_path in _SNAPSHOT_CACHE: - _cache_reread(file_path) - - return True - - -def render_edit_plain(state: dict[str, Any]) -> bool: - inp = state.get("input") - output = state.get("output") - if not isinstance(inp, dict): - return False - - file_path = str(inp.get("filePath", "")) - old_string = inp.get("oldString") - new_string = inp.get("newString") - replace_all = bool(inp.get("replaceAll", False)) - - if not file_path or old_string is None or new_string is None: - return False - - rel_path = _relativize_path(file_path) - output_str = str(output) if output is not None else "" - scope = "replace all" if replace_all else "replace 1 occurrence" - - print(C.header(f"edit {rel_path}")) - print(f" {scope}") - - diff_lines = _compute_diff(str(old_string), str(new_string)) - if not diff_lines: - print(" (no changes in edit)") - else: - added = sum(1 for l in diff_lines if l.startswith("+") and not l.startswith("+++")) - removed = sum(1 for l in diff_lines if l.startswith("-") and not l.startswith("---")) - print(f" diff: -{removed} +{added}") - truncated, leftover = _truncate_diff(diff_lines, _EDIT_DIFF_LINES) - for line in truncated: - print(f" {line}", end="") - if leftover > 0: - print(f" ... {leftover} more lines") - - print(f" {output_str.strip()}") - - if _cache_get(file_path) is not None or file_path in _SNAPSHOT_CACHE: - _cache_reread(file_path) - - return True - - -# --- Apply-patch renderer ----------------------------------------------------- - -@dataclass -class _ParsedFilePatch: - op: str # add, update, delete, rename, unknown - path: str - old_path: str - hunks: str # unified-diff-ready text - added: int - removed: int - - -_APPLY_PATCH_HEADER_RE = re.compile( - r"^\*\*\*[ \t]*(Begin Patch|End Patch|Update File|Add File|Delete File|Rename File|Move File):?[ \t]*(.*)", - re.MULTILINE, -) - - -def _parse_apply_patch_envelope(text: str) -> list[_ParsedFilePatch]: - """Parse the *** Begin Patch / *** Update File / *** End Patch envelope.""" - results: list[_ParsedFilePatch] = [] - # Split on *** headers - parts = _APPLY_PATCH_HEADER_RE.split(text) - # parts is [preamble, directive1, path1, body1, directive2, path2, body2, ...] - i = 1 # skip preamble - while i + 2 <= len(parts): - directive = parts[i].strip() - file_path = parts[i + 1].strip() - body = parts[i + 2] if i + 2 < len(parts) else "" - i += 3 - - if directive in ("Begin Patch", "End Patch"): - continue - - op_map = { - "Update File": "update", - "Add File": "add", - "Delete File": "delete", - "Rename File": "rename", - "Move File": "rename", - } - op = op_map.get(directive, "unknown") - old_path = "" - if op == "rename" and " -> " in file_path: - old_path, file_path = file_path.split(" -> ", 1) - old_path = old_path.strip() - file_path = file_path.strip() - - # Count +/- lines - body_lines = body.split("\n") - added = sum(1 for l in body_lines if l.startswith("+") and not l.startswith("+++")) - removed = sum(1 for l in body_lines if l.startswith("-") and not l.startswith("---")) - - # Synthesize unified diff header - rel = _relativize_path(file_path) - old_rel = _relativize_path(old_path) if old_path else rel - if op == "add": - header = f"--- /dev/null\n+++ b/{rel}\n" - elif op == "delete": - header = f"--- a/{rel}\n+++ /dev/null\n" - else: - header = f"--- a/{old_rel}\n+++ b/{rel}\n" - - hunks = header + body.strip() + "\n" - results.append(_ParsedFilePatch(op=op, path=file_path, old_path=old_path, hunks=hunks, added=added, removed=removed)) - - return results - - -def _parse_apply_patch_json_list(patches: list[dict[str, Any]]) -> list[_ParsedFilePatch]: - """Parse {patches: [{path, diff}, ...]} variant.""" - results: list[_ParsedFilePatch] = [] - for p in patches: - path = str(p.get("path", p.get("file", ""))) - diff_text = _first_string(p, ("diff", "patch", "patchText", "patch_text", "content", "body")) - lines = diff_text.split("\n") - added = sum(1 for l in lines if l.startswith("+") and not l.startswith("+++")) - removed = sum(1 for l in lines if l.startswith("-") and not l.startswith("---")) - rel = _relativize_path(path) - header = f"--- a/{rel}\n+++ b/{rel}\n" - hunks = header + diff_text.strip() + "\n" - results.append(_ParsedFilePatch(op="update", path=path, old_path="", hunks=hunks, added=added, removed=removed)) - return results - - -# Keys under which the apply_patch tool may stash its patch body, in -# precedence order. github-copilot/gpt-5.x emits 'patchText'; older -# OpenAI tool-use mode emits 'input'; some MCP bridges use 'diff' or -# 'body'. The first non-empty string wins. -_PATCH_TEXT_KEYS = ("patchText", "patch_text", "patch", "input", "content", "diff", "body") - - -def _first_string(d: dict[str, Any], keys: tuple[str, ...]) -> str: - """Return the first non-empty string value in d under any of keys, else ''.""" - for k in keys: - v = d.get(k) - if isinstance(v, str) and v: - return v - return "" - - -def _extract_apply_patch_payload(state: dict[str, Any]) -> tuple[str, list[_ParsedFilePatch], str]: - """Extract and parse apply_patch input. Returns (raw_text, parsed_patches, output_str).""" - inp = state.get("input") - output = state.get("output") - output_str = str(output) if output is not None else "" - - raw_text = "" - if isinstance(inp, dict): - raw_text = _first_string(inp, _PATCH_TEXT_KEYS) - # Check for {patches: [...]} variant - if not raw_text and isinstance(inp.get("patches"), list): - patches = _parse_apply_patch_json_list(inp["patches"]) - return "", patches, output_str - elif isinstance(inp, str): - raw_text = inp - - if not raw_text: - return "", [], output_str - - # Try envelope parse - if "*** " in raw_text: - patches = _parse_apply_patch_envelope(raw_text) - if patches: - return raw_text, patches, output_str - - # Fallback: raw unified diff — treat as single file - if raw_text.lstrip().startswith(("--- ", "diff --git")): - lines = raw_text.split("\n") - added = sum(1 for l in lines if l.startswith("+") and not l.startswith("+++")) - removed = sum(1 for l in lines if l.startswith("-") and not l.startswith("---")) - patches = [_ParsedFilePatch(op="unknown", path="(patch)", old_path="", hunks=raw_text, added=added, removed=removed)] - return raw_text, patches, output_str - - # Could not parse; return raw text with empty patches for fallback rendering - return raw_text, [], output_str - - -def render_apply_patch_rich(console: Console, state: dict[str, Any]) -> bool: - raw_text, patches, output_str = _extract_apply_patch_payload(state) - status = str(state.get("status", "")) - - if not patches and not raw_text: - return False - - from rich.syntax import Syntax - - is_error = _is_likely_error(output_str) - if status == "completed": - border = "red" if is_error else "green" - else: - border = "yellow" - - sections: list[Any] = [] - - if not patches: - # Fallback: could not parse, show raw as diff syntax - byte_size = len(raw_text.encode("utf-8", errors="replace")) - line_count = raw_text.count("\n") - sections.append(Text(f"Raw patch: {line_count} lines, {byte_size} bytes", style="dim")) - sections.append(Text()) - truncated_lines = raw_text.split("\n")[:_WRITE_DIFF_LIMIT] - leftover = max(0, raw_text.count("\n") - _WRITE_DIFF_LIMIT) - sections.append(Syntax("\n".join(truncated_lines), "diff", theme="monokai", word_wrap=True)) - if leftover > 0: - sections.append(Text(f"... {leftover} more lines", style="dim")) - else: - # Summary header - total_added = sum(p.added for p in patches) - total_removed = sum(p.removed for p in patches) - sections.append(Text(f"{len(patches)} file(s) changed: +{total_added} -{total_removed}", style="dim")) - sections.append(Text()) - - # Per-file rendering - shown = patches[:_APPLY_PATCH_MAX_FILES] - for fp in shown: - rel = _relativize_path(fp.path) - label = f"{fp.op:<8} {rel} +{fp.added} -{fp.removed}" - sections.append(Text(label, style="bold cyan")) - - diff_lines_list = fp.hunks.split("\n") - # Convert to list with newlines for _truncate_diff - diff_with_nl = [l + "\n" for l in diff_lines_list if l or diff_lines_list[-1:] != [l]] - truncated, leftover = _truncate_diff(diff_with_nl, _APPLY_PATCH_DIFF_LINES) - diff_text = "".join(truncated) - if diff_text.strip(): - sections.append(Syntax(diff_text, "diff", theme="monokai", word_wrap=True)) - if leftover > 0: - sections.append(Text(f"... {leftover} more lines", style="dim")) - sections.append(Text()) - - if len(patches) > _APPLY_PATCH_MAX_FILES: - remaining = len(patches) - _APPLY_PATCH_MAX_FILES - sections.append(Text(f"... and {remaining} more file(s)", style="dim")) - - # Status line - if output_str.strip(): - sections.append(Text(output_str.strip(), style="red" if is_error else "green")) - - console.print(Panel(Group(*sections), title="Apply patch", border_style=border, expand=True)) - - # Cache invalidation on success - if status == "completed" and not is_error: - for fp in patches: - full_path = fp.path - if not os.path.isabs(full_path): - full_path = os.path.join(ROOT, full_path) - _cache_reread(full_path) - - return True - - -def render_apply_patch_plain(state: dict[str, Any]) -> bool: - raw_text, patches, output_str = _extract_apply_patch_payload(state) - status = str(state.get("status", "")) - - if not patches and not raw_text: - return False - - is_error = _is_likely_error(output_str) - - if not patches: - # Fallback - line_count = raw_text.count("\n") - byte_size = len(raw_text.encode("utf-8", errors="replace")) - print(C.header(f"apply_patch (raw: {line_count} lines, {byte_size} bytes)")) - truncated_lines = raw_text.split("\n")[:_WRITE_DIFF_LIMIT] - for line in truncated_lines: - print(f" {line}") - leftover = max(0, raw_text.count("\n") - _WRITE_DIFF_LIMIT) - if leftover > 0: - print(f" ... {leftover} more lines") - else: - total_added = sum(p.added for p in patches) - total_removed = sum(p.removed for p in patches) - print(C.header(f"apply_patch ({len(patches)} file(s): +{total_added} -{total_removed})")) - - shown = patches[:_APPLY_PATCH_MAX_FILES] - for fp in shown: - rel = _relativize_path(fp.path) - print(f" {fp.op:<8} {rel} +{fp.added} -{fp.removed}") - diff_with_nl = [l + "\n" for l in fp.hunks.split("\n")] - truncated, leftover = _truncate_diff(diff_with_nl, _APPLY_PATCH_DIFF_LINES) - for line in truncated: - print(f" {line}", end="") - if leftover > 0: - print(f" ... {leftover} more lines") - - if len(patches) > _APPLY_PATCH_MAX_FILES: - remaining = len(patches) - _APPLY_PATCH_MAX_FILES - print(f" ... and {remaining} more file(s)") - - if output_str.strip(): - if is_error: - print(f" {C.fail(output_str.strip())}") - else: - print(f" {C.ok(output_str.strip())}") - - # Cache invalidation on success - if status == "completed" and not is_error: - for fp in patches: - full_path = fp.path - if not os.path.isabs(full_path): - full_path = os.path.join(ROOT, full_path) - _cache_reread(full_path) - - return True - - -# --- Glob renderer ------------------------------------------------------------ - -def _parse_glob_output(output: str) -> tuple[list[str], list[str]]: - """Split glob output into (file_paths, summary_lines). - - Summary lines (e.g. ``0 for '*.md'``) are separated from actual file - paths so the match count reflects real results. - """ - files: list[str] = [] - summaries: list[str] = [] - for line in output.strip().split("\n"): - stripped = line.strip() - if not stripped: - continue - if _GLOB_SUMMARY_LINE_RE.match(stripped): - summaries.append(stripped) - else: - files.append(stripped) - return files, summaries - - -def render_glob_rich(console: Console, state: dict[str, Any]) -> bool: - inp = state.get("input") - output = state.get("output") - if not isinstance(inp, dict) or not isinstance(output, str): - return False - - pattern = str(inp.get("pattern", "")) - search_path = str(inp.get("path", "")) - - matches, summaries = _parse_glob_output(output) - n_matches = len(matches) - - border = "green" if n_matches > 0 else "dim" - - sections: list[Any] = [ - Text(f"pattern={pattern} path={_relativize_path(search_path) if search_path else '.'}", style="dim"), - Text(), - ] - - if n_matches == 0: - # Show summary lines from the tool if available, otherwise generic. - if summaries: - for s in summaries: - sections.append(Text(f" {s}", style="dim")) - else: - sections.append(Text("(no matches)", style="dim")) - else: - shown = matches[:_GLOB_MATCH_CAP] - for m in shown: - try: - rel = str(Path(m).relative_to(search_path)) if search_path else m - except ValueError: - rel = _relativize_path(m) - sections.append(Text(f" {rel}")) - if n_matches > _GLOB_MATCH_CAP: - sections.append(Text(f" ... and {n_matches - _GLOB_MATCH_CAP} more", style="dim")) - - sections.append(Text()) - sections.append(Text(f"{n_matches} match(es)", style="dim")) - - console.print(Panel(Group(*sections), title="Glob", border_style=border, expand=True)) - return True - - -def render_glob_plain(state: dict[str, Any]) -> bool: - inp = state.get("input") - output = state.get("output") - if not isinstance(inp, dict) or not isinstance(output, str): - return False - - pattern = str(inp.get("pattern", "")) - search_path = str(inp.get("path", "")) - - matches, summaries = _parse_glob_output(output) - n_matches = len(matches) - - print(C.header(f"glob {pattern} in {_relativize_path(search_path) if search_path else '.'}")) - - if n_matches == 0: - if summaries: - for s in summaries: - print(f" {s}") - else: - print(" (no matches)") - else: - shown = matches[:_GLOB_MATCH_CAP] - for m in shown: - try: - rel = str(Path(m).relative_to(search_path)) if search_path else m - except ValueError: - rel = _relativize_path(m) - print(f" {rel}") - if n_matches > _GLOB_MATCH_CAP: - print(f" ... and {n_matches - _GLOB_MATCH_CAP} more") - - print(f" {n_matches} match(es)") - return True - - -# --- Grep renderer ------------------------------------------------------------ - -_GREP_LINE_RE = re.compile(r"^(?P.+?):(?P\d+):(?P.*)$") - -_GREP_HIGHLIGHT_STYLE = "bold yellow on grey23" -_GREP_BODY_STYLE = "default" -_GREP_LINENO_STYLE = "dim cyan" - - -def _grep_compile_pattern(pattern: str) -> re.Pattern[str] | None: - """Compile the user's grep pattern for match highlighting. Returns None on failure.""" - if not pattern or not _GREP_HIGHLIGHT: - return None - try: - return re.compile(pattern) - except re.error: - # Fall back to literal substring match - try: - return re.compile(re.escape(pattern)) - except re.error: - return None - - -def _grep_format_line_rich(line_no: int, text: str, pat: re.Pattern[str] | None) -> "Text": - """Format a single grep match line as a rich Text with highlighted matches.""" - t = Text() - t.append(f" {line_no:>5}", style=_GREP_LINENO_STYLE) - t.append(": ", style="dim") - - if pat is None or not _GREP_HIGHLIGHT: - t.append(text, style=_GREP_BODY_STYLE) - return t - - last = 0 - for m in pat.finditer(text): - start, end = m.start(), m.end() - if start > last: - t.append(text[last:start], style=_GREP_BODY_STYLE) - if start < end: - t.append(text[start:end], style=_GREP_HIGHLIGHT_STYLE) - last = end - if last < len(text): - t.append(text[last:], style=_GREP_BODY_STYLE) - - # If finditer yielded nothing, the whole body is already appended above - # via the trailing slice. If body was empty, Text is fine as-is. - return t - - -def _grep_format_line_plain(line_no: int, text: str, pat: re.Pattern[str] | None, color: bool) -> str: - """Format a single grep match line for plain output with optional highlighting.""" - prefix = f" {line_no:>5}: " - - if pat is None or not _GREP_HIGHLIGHT: - return prefix + text - - if color: - # Bold yellow ANSI - hl_on = "\x1b[1;33m" - hl_off = "\x1b[0m" - else: - hl_on = ">>>" - hl_off = "<<<" - - parts = [prefix] - last = 0 - for m in pat.finditer(text): - start, end = m.start(), m.end() - if start > last: - parts.append(text[last:start]) - if start < end: - parts.append(hl_on + text[start:end] + hl_off) - last = end - if last < len(text): - parts.append(text[last:]) - - return "".join(parts) - - -def _parse_grep_output(output: str) -> tuple[str, list[dict[str, Any]]]: - """Parse grep tool output. Returns (mode, entries). - - mode is "lines" or "files". - entries: for "files" -> [{"path": str}], for "lines" -> [{"path": str, "line": int, "text": str}]. - """ - raw_lines = [l for l in output.strip().split("\n") if l.strip()] - if not raw_lines: - return "files", [] - - # Detect mode: if >=70% of lines match path:linenum:content, use "lines" - line_matches = 0 - for l in raw_lines: - if _GREP_LINE_RE.match(l): - line_matches += 1 - - if line_matches >= len(raw_lines) * 0.7: - entries: list[dict[str, Any]] = [] - for l in raw_lines: - m = _GREP_LINE_RE.match(l) - if m: - entries.append({"path": m.group("path"), "line": int(m.group("line")), "text": m.group("text")}) - else: - # Non-matching line in lines mode; treat as file-only - entries.append({"path": l.strip(), "line": 0, "text": ""}) - return "lines", entries - else: - return "files", [{"path": l.strip()} for l in raw_lines] - - -def render_grep_rich(console: Console, state: dict[str, Any]) -> bool: - inp = state.get("input") - output = state.get("output") - status = str(state.get("status", "")) - - if not isinstance(inp, dict): - return False - - # Handle output that might be a dict with a results/matches field - if isinstance(output, dict): - output_str = str(output.get("matches", output.get("results", ""))) - elif isinstance(output, str): - output_str = output - else: - return False - - pattern = str(inp.get("pattern", "")) - search_path = str(inp.get("path", "")) - include = str(inp.get("include", "")) - - is_error = _is_likely_error(output_str) - - if status == "completed": - border = "red" if is_error else "green" - else: - border = "yellow" - - sections: list[Any] = [] - - # Header - header_parts = [f"pattern={pattern!r}"] - if search_path: - header_parts.append(f"path={_relativize_path(search_path)}") - if include: - header_parts.append(f"include={include}") - sections.append(Text(" ".join(header_parts), style="dim")) - sections.append(Text()) - - if is_error: - sections.append(Text(output_str.strip(), style="red")) - elif not output_str.strip(): - sections.append(Text("(no matches)", style="dim")) - border = "dim" - else: - mode, entries = _parse_grep_output(output_str) - - if mode == "files": - n_files = len(entries) - shown = entries[:_GREP_FILE_CAP] - for e in shown: - sections.append(Text(f" {_relativize_path(e['path'])}")) - if n_files > _GREP_FILE_CAP: - sections.append(Text(f" ... and {n_files - _GREP_FILE_CAP} more", style="dim")) - sections.append(Text()) - sections.append(Text(f"{n_files} file(s) matched", style="dim")) - else: - # Group by file, preserving order - from collections import OrderedDict as _OD - grep_pat = _grep_compile_pattern(pattern) - grouped: OrderedDict[str, list[dict[str, Any]]] = _OD() - for e in entries: - grouped.setdefault(e["path"], []).append(e) - - n_files = len(grouped) - n_total = len(entries) - total_lines_emitted = 0 - files_shown = 0 - truncated_globally = False - - for fpath, file_entries in grouped.items(): - if total_lines_emitted >= _GREP_TOTAL_LINE_CAP: - truncated_globally = True - break - if files_shown >= _GREP_FILE_CAP: - truncated_globally = True - break - files_shown += 1 - rel = _relativize_path(fpath) - sections.append(Text(f" {rel} ({len(file_entries)} match(es))", style="bold cyan")) - shown_lines = file_entries[:_GREP_LINE_CAP_PER_FILE] - for e in shown_lines: - text = e["text"] - if len(text) > 200: - text = text[:200] + "…" - sections.append(_grep_format_line_rich(e["line"], text, grep_pat)) - total_lines_emitted += 1 - if total_lines_emitted >= _GREP_TOTAL_LINE_CAP: - truncated_globally = True - break - if len(file_entries) > _GREP_LINE_CAP_PER_FILE: - remaining = len(file_entries) - _GREP_LINE_CAP_PER_FILE - sections.append(Text(f" ... and {remaining} more in {rel}", style="dim")) - - if truncated_globally: - remaining_files = n_files - files_shown - if remaining_files > 0: - sections.append(Text(f" ... and {remaining_files} more file(s)", style="dim")) - else: - sections.append(Text(" ... (further matches truncated)", style="dim")) - - sections.append(Text()) - sections.append(Text(f"{n_total} match(es) across {n_files} file(s)", style="dim")) - - console.print(Panel(Group(*sections), title="Grep", border_style=border, expand=True)) - return True - - -def render_grep_plain(state: dict[str, Any]) -> bool: - inp = state.get("input") - output = state.get("output") - status = str(state.get("status", "")) - - if not isinstance(inp, dict): - return False - - if isinstance(output, dict): - output_str = str(output.get("matches", output.get("results", ""))) - elif isinstance(output, str): - output_str = output - else: - return False - - pattern = str(inp.get("pattern", "")) - search_path = str(inp.get("path", "")) - include = str(inp.get("include", "")) - - is_error = _is_likely_error(output_str) - - header_parts = [f"grep {pattern!r}"] - if search_path: - header_parts.append(f"in {_relativize_path(search_path)}") - if include: - header_parts.append(f"include={include}") - print(C.header(" ".join(header_parts))) - - if is_error: - print(f" {C.fail(output_str.strip())}") - elif not output_str.strip(): - print(" (no matches)") - else: - mode, entries = _parse_grep_output(output_str) - - if mode == "files": - n_files = len(entries) - shown = entries[:_GREP_FILE_CAP] - for e in shown: - print(f" {_relativize_path(e['path'])}") - if n_files > _GREP_FILE_CAP: - print(f" ... and {n_files - _GREP_FILE_CAP} more") - print(f" {n_files} file(s) matched") - else: - from collections import OrderedDict as _OD - grep_pat = _grep_compile_pattern(pattern) - _plain_color = C.color_enabled() - grouped: OrderedDict[str, list[dict[str, Any]]] = _OD() - for e in entries: - grouped.setdefault(e["path"], []).append(e) - - n_files = len(grouped) - n_total = len(entries) - total_lines_emitted = 0 - files_shown = 0 - truncated_globally = False - - for fpath, file_entries in grouped.items(): - if total_lines_emitted >= _GREP_TOTAL_LINE_CAP: - truncated_globally = True - break - if files_shown >= _GREP_FILE_CAP: - truncated_globally = True - break - files_shown += 1 - rel = _relativize_path(fpath) - print(f" {rel} ({len(file_entries)} match(es))") - shown_lines = file_entries[:_GREP_LINE_CAP_PER_FILE] - for e in shown_lines: - text = e["text"] - if len(text) > 200: - text = text[:200] + "…" - print(_grep_format_line_plain(e["line"], text, grep_pat, _plain_color)) - total_lines_emitted += 1 - if total_lines_emitted >= _GREP_TOTAL_LINE_CAP: - truncated_globally = True - break - if len(file_entries) > _GREP_LINE_CAP_PER_FILE: - remaining = len(file_entries) - _GREP_LINE_CAP_PER_FILE - print(f" ... and {remaining} more in {rel}") - - if truncated_globally: - remaining_files = n_files - files_shown - if remaining_files > 0: - print(f" ... and {remaining_files} more file(s)") - else: - print(" ... (further matches truncated)") - - print(f" {n_total} match(es) across {n_files} file(s)") - - return True - - -# --- Bash renderer ------------------------------------------------------------ - -def render_bash_rich(console: Console, state: dict[str, Any]) -> bool: - inp = state.get("input") - output = state.get("output") - if not isinstance(inp, dict): - return False - - command = str(inp.get("command", "")) - description = inp.get("description", "") - output_str = str(output) if output is not None else "" - - if not command: - return False - - is_error = _is_likely_error(output_str) - border = "red" if is_error else ("green" if state.get("status") == "completed" else "yellow") - - sections: list[Any] = [ - Text(f"$ {command}", style="bold cyan"), - ] - if description: - sections.append(Text(str(description), style="dim italic")) - - sections.append(Text()) - - if output_str.strip(): - sections.append(Text("Output", style="bold green")) - sections.append(Text(output_str.strip())) - else: - sections.append(Text("(no output)", style="dim")) - - console.print(Panel(Group(*sections), title="Bash", border_style=border, expand=True)) - return True - - -def render_bash_plain(state: dict[str, Any]) -> bool: - inp = state.get("input") - output = state.get("output") - if not isinstance(inp, dict): - return False - - command = str(inp.get("command", "")) - description = inp.get("description", "") - output_str = str(output) if output is not None else "" - - if not command: - return False - - print(C.header(f"bash $ {command}")) - if description: - print(f" # {description}") - - if output_str.strip(): - print(output_str.strip()) - else: - print(" (no output)") - - return True - - -# --- Sandbox-bootstrap renderer --------------------------------------------- -# -# Detects bash invocations of `tools/sandbox-bootstrap.py --format json` and -# renders the JSON output as a structured, color-coded `Sandbox` panel -# instead of the generic Bash panel. The script is CodeCome-owned, so its -# JSON schema is stable and we can rely on per-subcommand shapes. - -_SANDBOX_BOOTSTRAP_SCRIPT = "tools/sandbox-bootstrap.py" -_SANDBOX_KNOWN_SUBCOMMANDS = { - "list", "inspect", "detect", "status", "apply", "regenerate", "validate", -} -# make targets that wrap the script and where we can confidently infer the -# subcommand from the target name. -_SANDBOX_MAKE_TARGETS = { - "sandbox-list": "list", - "sandbox-inspect": "inspect", - "sandbox-detect": "detect", - "sandbox-status": "status", - "sandbox-bootstrap": "apply", # `make sandbox-bootstrap ID=...` -> apply - "sandbox-regenerate": "regenerate", - "sandbox-validate": "validate", -} -_SANDBOX_REQUIRED_CAPABILITIES = ("setup", "start", "check", "build", "test", "stop") -_SANDBOX_HELPER_CAPABILITIES = ("shell", "logs", "clean", "reset") - - -def _console_supports_emoji(console: Optional[Any]) -> bool: - """Return True when the console encoding can carry common emojis.""" - if console is None: - # Plain mode: trust the stdout encoding, which is typically utf-8. - enc = (sys.stdout.encoding or "").lower() - else: - enc = (getattr(console, "encoding", "") or "").lower() - return "utf" in enc - - -def _sandbox_glyphs(console: Optional[Any]) -> dict[str, str]: - """Return a name->glyph table, with emoji on utf-8 terminals and - ASCII fallbacks elsewhere.""" - if _console_supports_emoji(console): - return { - "ok": "✅", - "fail": "❌", - "warn": "⚠️ ", - "skip": "⏭️ ", - "info": "ℹ️ ", - "box": "📦", - "check": "🧪", - "alarm": "🚦", - "clock": "⏱", - "bullet": "•", - } - return { - "ok": "[OK]", - "fail": "[FAIL]", - "warn": "[!]", - "skip": "[--]", - "info": "[i]", - "box": "[box]", - "check": "[chk]", - "alarm": "[gate]", - "clock": "t=", - "bullet": "-", - } - - -def _is_sandbox_bootstrap_json_call(command_str: str) -> Optional[str]: - """Return the subcommand name if this bash invocation is a - sandbox-bootstrap call configured for --format json, else None. - - Recognises both: - - direct script invocations: - .venv/bin/python3 tools/sandbox-bootstrap.py --format json status - python tools/sandbox-bootstrap.py status --format=json - - make-target wrappers when BOOTSTRAP_ARGS forces json: - make sandbox-status BOOTSTRAP_ARGS='--format json' - make sandbox-validate BOOTSTRAP_ARGS=--format=json - """ - if not command_str: - return None - try: - tokens = shlex.split(command_str) - except ValueError: - return None - if not tokens: - return None - - # Look for --format json or --format=json anywhere in the tokens. - # Also recognise it when nested inside a make-style assignment such as - # BOOTSTRAP_ARGS='--format json' (which shlex collapses into a single - # token "BOOTSTRAP_ARGS=--format json"). - has_json_format = False - for i, tok in enumerate(tokens): - if tok == "--format=json": - has_json_format = True - break - if tok == "--format" and i + 1 < len(tokens) and tokens[i + 1] == "json": - has_json_format = True - break - # Make-style env assignments (e.g. BOOTSTRAP_ARGS=--format json, - # BOOTSTRAP_ARGS=--format=json, OPENCODE_ARGS=...). - if "=" in tok and ("--format json" in tok or "--format=json" in tok): - has_json_format = True - break - - # Direct script invocation path. - script_idx = -1 - for i, tok in enumerate(tokens): - if tok.endswith(_SANDBOX_BOOTSTRAP_SCRIPT) or tok.endswith("/" + _SANDBOX_BOOTSTRAP_SCRIPT): - script_idx = i - break - if script_idx >= 0 and has_json_format: - # Subcommand: first non-flag positional after the script path. - for j in range(script_idx + 1, len(tokens)): - t = tokens[j] - if t.startswith("-"): - # Skip --format json (two-token form). - if t == "--format" and j + 1 < len(tokens): - continue - continue - # A bare token after --format json may be the value of --format. - # Skip if previous token was --format (without =). - if j > 0 and tokens[j - 1] == "--format": - continue - if t in _SANDBOX_KNOWN_SUBCOMMANDS: - return t - return None - - # Make-target wrapper path. - # Accept env-prefixed forms too, e.g.: - # BOOTSTRAP_ARGS='--format json --keep-going' make sandbox-validate - make_idx = -1 - for i, tok in enumerate(tokens): - if tok == "make": - make_idx = i - break - if make_idx >= 0: - # Find the first sandbox-* target token after `make`. - for tok in tokens[make_idx + 1:]: - if tok in _SANDBOX_MAKE_TARGETS and has_json_format: - return _SANDBOX_MAKE_TARGETS[tok] - return None - - -def _maybe_render_sandbox_bootstrap(console: Optional[Any], state: dict[str, Any]) -> bool: - """Try to render a bash invocation of sandbox-bootstrap.py --format json - as a styled Sandbox panel. Return True if handled, False to fall back to - the generic bash renderer.""" - if not _SANDBOX_RENDER: - return False - inp = state.get("input") - output = state.get("output") - if not isinstance(inp, dict): - return False - - command = str(inp.get("command", "")) - subcommand = _is_sandbox_bootstrap_json_call(command) - if subcommand is None: - return False - - output_str = str(output) if output is not None else "" - stripped = output_str.strip() - if not stripped: - # In-flight or silent success; let the bash renderer handle it. - return False - - # Only proceed when output parses as a single JSON document. - # make commands often echo the invocation line, so try to find - # the first JSON-like delimiter if a strict parse fails. - try: - payload = json.loads(stripped) - except (ValueError, TypeError): - first_brace = stripped.find("{") - first_bracket = stripped.find("[") - idxs = [i for i in (first_brace, first_bracket) if i >= 0] - if not idxs: - return False - start_idx = min(idxs) - try: - payload = json.loads(stripped[start_idx:]) - except (ValueError, TypeError): - return False - - # Per-subcommand schema sniff: if the payload doesn't carry the - # expected top-level structure, fall through to the bash renderer. - if not _sandbox_payload_matches(subcommand, payload): - return False - - description = str(inp.get("description", "")).strip() - status = str(state.get("status", "")) - - if HAVE_RICH and console is not None: - return _render_sandbox_rich( - console, subcommand, payload, command, description, status - ) - return _render_sandbox_plain( - subcommand, payload, command, description, status - ) - - -def _sandbox_payload_matches(subcommand: str, payload: Any) -> bool: - """Cheap structural sniff so we don't render unrelated JSON as a - Sandbox panel. Returns False on obvious schema mismatch so the bash - renderer can take over.""" - if subcommand == "list": - return isinstance(payload, list) and (not payload or isinstance(payload[0], dict)) - if not isinstance(payload, dict): - return False - if subcommand == "inspect": - return any(k in payload for k in ("id", "display_name", "files")) - if subcommand == "detect": - return "candidates" in payload or "signals" in payload - if subcommand == "status": - return "sandbox_state" in payload or "phase2_gate_pass" in payload or "capabilities" in payload - if subcommand in ("apply", "regenerate"): - return any(k in payload for k in ("example", "files_to_write", "written_files", "status")) - if subcommand == "validate": - return "overall_outcome" in payload or "tiers" in payload - return False - - -def _sandbox_outcome_style(outcome: str) -> tuple[str, str]: - """Return (rich_style, glyph_key) for a tier outcome string.""" - if outcome == "passed": - return "green", "ok" - if outcome == "failed": - return "red", "fail" - if outcome == "skipped": - return "dim", "skip" - return "yellow", "warn" - - -def _sandbox_state_style(state_value: str) -> str: - if state_value == "generated": - return "green" - if state_value == "user-managed": - return "yellow" - if state_value == "missing": - return "red" - return "dim" - - -def _sandbox_last_validation_style(value: Optional[str]) -> str: - if value == "passed": - return "green" - if value == "mixed": - return "yellow" - if value == "failed": - return "red" - if value == "skipped": - return "yellow" - return "dim" - - -def _render_sandbox_rich( - console: Any, - subcommand: str, - payload: Any, - command: str, - description: str, - status: str, -) -> bool: - glyphs = _sandbox_glyphs(console) - - # Default border = yellow (in flight) / green (completed); per-subcommand - # renderers may override based on payload contents (e.g. validate failed). - border = "yellow" if status != "completed" else "green" - - title = f"{glyphs['box']} Sandbox · {subcommand}" - sections: list[Any] = [] - sections.append(Text(f"$ {command}", style="bold cyan")) - if description: - sections.append(Text(description, style="dim italic")) - sections.append(Text()) - - try: - if subcommand == "list": - border = _render_sandbox_list_rich(sections, payload, border) - elif subcommand == "inspect": - border = _render_sandbox_inspect_rich(sections, payload, border, glyphs) - elif subcommand == "detect": - border = _render_sandbox_detect_rich(sections, payload, border, glyphs) - elif subcommand == "status": - border = _render_sandbox_status_rich(sections, payload, border, glyphs) - elif subcommand in ("apply", "regenerate"): - border = _render_sandbox_apply_rich(sections, payload, subcommand, border, glyphs) - elif subcommand == "validate": - border = _render_sandbox_validate_rich(sections, payload, border, glyphs) - else: - return False - except (KeyError, TypeError, AttributeError): - # Defensive: schema mismatch -> fall through to bash renderer. - return False - - console.print(Panel(Group(*sections), title=title, border_style=border, expand=True)) - return True - - -def _render_sandbox_list_rich(sections: list[Any], payload: Any, border: str) -> str: - from rich.table import Table - if not isinstance(payload, list): - raise TypeError("list subcommand expects a JSON array") - table = Table(show_header=True, header_style="bold cyan", expand=True, pad_edge=False) - table.add_column("id", style="bold cyan", no_wrap=True) - table.add_column("name") - table.add_column("languages", style="dim") - table.add_column("manifests", style="dim") - for ex in payload: - applies = ex.get("applies_when") or {} - langs = ", ".join(applies.get("languages") or []) or "-" - mans = ", ".join((applies.get("manifests") or [])[:4]) or "-" - if applies.get("manifests") and len(applies["manifests"]) > 4: - mans += " …" - table.add_row(str(ex.get("id", "")), str(ex.get("display_name", "")), langs, mans) - sections.append(table) - sections.append(Text()) - sections.append(Text(f"{len(payload)} example(s) available", style="dim")) - return border - - -def _render_sandbox_inspect_rich( - sections: list[Any], payload: dict, border: str, glyphs: dict -) -> str: - sections.append(Text(f"{payload.get('display_name', '')}", style="bold cyan")) - sections.append(Text(f" id: {payload.get('id', '')}", style="dim")) - sections.append(Text(f" path: {payload.get('path', '')}", style="dim")) - applies = payload.get("applies_when") or {} - if applies: - for k, v in applies.items(): - joined = ", ".join(v) if isinstance(v, list) else str(v) - sections.append(Text(f" applies_when.{k}: {joined}", style="dim")) - if payload.get("required_tools"): - sections.append(Text(f" required_tools: {', '.join(payload['required_tools'])}", style="dim")) - if payload.get("template_vars"): - sections.append(Text(f" template_vars: {', '.join(payload['template_vars'])}", style="dim")) - if payload.get("default_ports"): - sections.append(Text(f" default_ports: {', '.join(str(p) for p in payload['default_ports'])}", style="dim")) - if payload.get("build_command"): - sections.append(Text(f" build_command: {payload['build_command']}", style="dim")) - if payload.get("test_command"): - sections.append(Text(f" test_command: {payload['test_command']}", style="dim")) - if payload.get("caveats"): - sections.append(Text()) - sections.append(Text("Caveats:", style="bold yellow")) - for c in payload["caveats"]: - sections.append(Text(f" {glyphs['warn']} {c}", style="yellow")) - files = payload.get("files") or [] - if files: - sections.append(Text()) - cap = _SANDBOX_FILES_CAP - sections.append(Text(f"Files ({len(files)}):", style="bold cyan")) - for f in files[:cap]: - sections.append(Text(f" {glyphs['bullet']} {f}")) - if len(files) > cap: - sections.append(Text(f" ... and {len(files) - cap} more", style="dim")) - return border - - -def _render_sandbox_detect_rich( - sections: list[Any], payload: dict, border: str, glyphs: dict -) -> str: - from rich.table import Table - signals = payload.get("signals") or {} - sections.append(Text("Detection signals", style="bold cyan")) - sections.append(Text(f" source: {signals.get('source', '-')}", style="dim")) - sections.append(Text(f" languages: {', '.join(signals.get('languages') or []) or '-'}", style="dim")) - sections.append(Text(f" manifests: {', '.join(signals.get('manifests') or []) or '-'}", style="dim")) - sections.append(Text()) - - candidates = payload.get("candidates") or [] - sections.append(Text(f"Ranked candidates ({len(candidates)}):", style="bold cyan")) - table = Table(show_header=True, header_style="bold cyan", expand=True, pad_edge=False) - table.add_column("score", justify="right", no_wrap=True) - table.add_column("id", style="bold cyan", no_wrap=True) - table.add_column("name") - table.add_column("path", style="dim") - cap = _SANDBOX_FILES_CAP - for c in candidates[:cap]: - score = c.get("score", 0) - score_style = "green" if score >= 5 else ("yellow" if score >= 1 else "dim") - table.add_row( - Text(str(score), style=score_style), - str(c.get("id", "")), - str(c.get("display_name", "")), - str(c.get("path", "")), - ) - sections.append(table) - if len(candidates) > cap: - sections.append(Text(f"... and {len(candidates) - cap} more", style="dim")) - return border - - -def _render_sandbox_status_rich( - sections: list[Any], payload: dict, border: str, glyphs: dict -) -> str: - from rich.table import Table - state_value = str(payload.get("sandbox_state", "unknown")) - last_validation = payload.get("last_validation") - gate_pass = bool(payload.get("phase2_gate_pass")) - gate_reason = str(payload.get("phase2_gate_reason", "")) - - state_glyph = {"generated": glyphs["ok"], "user-managed": glyphs["warn"], "missing": glyphs["fail"]}.get(state_value, glyphs["info"]) - sections.append(Text.assemble( - ("state: ", "bold"), - (f"{state_glyph} {state_value}", _sandbox_state_style(state_value)), - )) - sections.append(Text(f" path: {payload.get('sandbox_path', '-')}", style="dim")) - sections.append(Text(f" provenance: {'yes' if payload.get('provenance_present') else 'no'}", style="dim")) - lv_text = last_validation if last_validation is not None else "-" - sections.append(Text.assemble( - (" last validation: ", "dim"), - (str(lv_text), _sandbox_last_validation_style(last_validation)), - )) - sections.append(Text(f" allow override: {'yes' if payload.get('allow_no_sandbox') else 'no'}", style="dim")) - sections.append(Text()) - - # Gate badge. - if gate_pass: - sections.append(Text.assemble( - (f"{glyphs['alarm']} ", ""), - (f"Phase 2 gate would PASS", "bold green"), - (f" — {gate_reason}", "dim"), - )) - else: - sections.append(Text.assemble( - (f"{glyphs['alarm']} ", ""), - (f"Phase 2 gate would BLOCK", "bold red"), - (f" — {gate_reason}", "dim"), - )) - # Status doesn't fail the script; signal informational alarm via yellow. - border = "yellow" - - sections.append(Text()) - capabilities = payload.get("capabilities") or {} - if capabilities: - table = Table(show_header=True, header_style="bold cyan", expand=True, pad_edge=False) - table.add_column("capability", no_wrap=True) - table.add_column("status", no_wrap=True) - table.add_column("path", style="dim") - # Required first, helpers after. - for name in (*_SANDBOX_REQUIRED_CAPABILITIES, *_SANDBOX_HELPER_CAPABILITIES): - cap = capabilities.get(name) - if cap is None: - continue - satisfied = bool(cap.get("satisfied")) - present = bool(cap.get("present")) - is_helper = name in _SANDBOX_HELPER_CAPABILITIES - if satisfied: - badge = Text(f"{glyphs['ok']} ok", style="green") - elif is_helper and not present: - badge = Text(f"{glyphs['skip']} optional", style="dim") - else: - badge = Text(f"{glyphs['fail']} missing", style="red") - table.add_row(name, badge, str(cap.get("path", ""))) - sections.append(table) - return border - - -def _render_sandbox_apply_rich( - sections: list[Any], payload: dict, subcommand: str, border: str, glyphs: dict -) -> str: - apply_status = str(payload.get("status", "")) - is_dry = bool(payload.get("dry_run")) or apply_status == "dry-run" - chip_text = "DRY RUN" if is_dry else apply_status.upper() or "(unknown)" - chip_style = "yellow" if is_dry else ("green" if apply_status == "applied" else "dim") - sections.append(Text.assemble( - (f"{glyphs['box']} ", ""), - (f"{subcommand} ", "bold cyan"), - (f"{payload.get('example', '-')} ", "bold cyan"), - (f"[{chip_text}]", chip_style), - )) - sections.append(Text(f" example_path: {payload.get('example_path', '-')}", style="dim")) - sections.append(Text(f" sandbox_path: {payload.get('sandbox_path', '-')}", style="dim")) - sections.append(Text(f" force: {payload.get('force', False)}", style="dim")) - if payload.get("backup_dir"): - sections.append(Text(f" backup_dir: {payload['backup_dir']}", style="dim")) - - files_to_write = payload.get("files_to_write") or [] - written = payload.get("written_files") or [] - sections.append(Text()) - sections.append(Text( - f"files: planned={len(files_to_write)} written={len(written)}", - style="bold cyan", - )) - markers = payload.get("markers_provided") or {} - if markers: - sections.append(Text(f"markers_provided ({len(markers)}):", style="bold cyan")) - for k, v in markers.items(): - sections.append(Text(f" {k} = {v}", style="dim")) - unfilled = payload.get("markers_used_unfilled") or [] - if unfilled: - sections.append(Text()) - sections.append(Text.assemble( - (f"{glyphs['warn']} ", ""), - (f"Declared markers used but not provided: {', '.join(unfilled)}", "yellow"), - )) - border = "yellow" - undeclared = payload.get("markers_used_undeclared") or [] - if undeclared: - sections.append(Text.assemble( - (f"{glyphs['warn']} ", ""), - (f"Markers used but not declared: {', '.join(undeclared)}", "yellow"), - )) - border = "yellow" - - show_files = files_to_write or written - if show_files: - sections.append(Text()) - cap = _SANDBOX_FILES_CAP - for f in show_files[:cap]: - sections.append(Text(f" {glyphs['bullet']} {f}")) - if len(show_files) > cap: - sections.append(Text(f" ... and {len(show_files) - cap} more", style="dim")) - - if apply_status == "applied" and not is_dry: - sections.append(Text()) - sections.append(Text.assemble( - (f"{glyphs['ok']} ", ""), - (f"Applied '{payload.get('example', '-')}'", "bold green"), - (f" → {payload.get('sandbox_path', '-')}", "dim"), - )) - if payload.get("provenance_path"): - sections.append(Text(f" provenance: {payload['provenance_path']}", style="dim")) - return border - - -def _render_sandbox_validate_rich( - sections: list[Any], payload: dict, border: str, glyphs: dict -) -> str: - from rich.table import Table - overall = str(payload.get("overall_outcome", "unknown")) - overall_style, overall_glyph_key = _sandbox_outcome_style(overall) - - sections.append(Text.assemble( - (f"{glyphs['check']} ", ""), - ("overall: ", "bold"), - (f"{glyphs[overall_glyph_key]} {overall}", overall_style), - )) - - if overall == "failed": - border = "red" - elif overall == "passed": - border = "green" - else: - border = "yellow" - - tiers = payload.get("tiers") or [] - if tiers: - sections.append(Text()) - table = Table(show_header=True, header_style="bold cyan", expand=True, pad_edge=False) - table.add_column("tier", no_wrap=True) - table.add_column("purpose") - table.add_column("outcome", no_wrap=True) - table.add_column("dur", justify="right", no_wrap=True) - table.add_column("exit", justify="right", no_wrap=True) - for t in tiers: - t_outcome = str(t.get("outcome", "unknown")) - o_style, o_key = _sandbox_outcome_style(t_outcome) - badge = Text(f"{glyphs[o_key]} {t_outcome}", style=o_style) - dur = t.get("duration_seconds") - dur_str = f"{dur:.2f}s" if isinstance(dur, (int, float)) else "-" - exit_code = t.get("exit_code") - exit_str = "-" if exit_code is None else str(exit_code) - table.add_row( - str(t.get("tier", "")), - str(t.get("purpose", "")), - badge, - dur_str, - exit_str, - ) - sections.append(table) - - # For each failed tier, show a capped stderr_tail under it. - for t in tiers: - if t.get("outcome") != "failed": - continue - stderr_tail = str(t.get("stderr_tail") or "").strip() - if not stderr_tail: - continue - sections.append(Text()) - sections.append(Text( - f"{glyphs['fail']} {t.get('tier', '')} {t.get('purpose', '')} stderr (tail):", - style="bold red", - )) - tail_lines = stderr_tail.splitlines() - cap = _SANDBOX_VALIDATE_STDERR_LINES - shown = tail_lines[-cap:] - for line in shown: - sections.append(Text(f" {line}", style="red")) - if len(tail_lines) > cap: - sections.append(Text( - f" ... ({len(tail_lines) - cap} earlier lines truncated; " - f"see tmp/last-phase-*.jsonl for full output)", - style="dim", - )) - - missing = payload.get("missing_helpers") or [] - if missing: - sections.append(Text()) - sections.append(Text.assemble( - (f"{glyphs['warn']} ", ""), - (f"Helper capabilities still missing: {', '.join(missing)}", "yellow"), - )) - - if payload.get("history_updated"): - sections.append(Text(f"{glyphs['info']} history updated in sandbox/CODECOME-GENERATED.md", style="dim")) - return border - - -def _render_sandbox_plain( - subcommand: str, - payload: Any, - command: str, - description: str, - status: str, -) -> bool: - glyphs = _sandbox_glyphs(None) - print(C.header(f"{glyphs['box']} Sandbox · {subcommand}")) - print(f" $ {command}") - if description: - print(f" # {description}") - - try: - if subcommand == "list": - _render_sandbox_list_plain(payload, glyphs) - elif subcommand == "inspect": - _render_sandbox_inspect_plain(payload, glyphs) - elif subcommand == "detect": - _render_sandbox_detect_plain(payload, glyphs) - elif subcommand == "status": - _render_sandbox_status_plain(payload, glyphs) - elif subcommand in ("apply", "regenerate"): - _render_sandbox_apply_plain(payload, subcommand, glyphs) - elif subcommand == "validate": - _render_sandbox_validate_plain(payload, glyphs) - else: - return False - except (KeyError, TypeError, AttributeError): - return False - return True - - -def _render_sandbox_list_plain(payload: Any, glyphs: dict) -> None: - if not isinstance(payload, list): - raise TypeError - for ex in payload: - applies = ex.get("applies_when") or {} - langs = ", ".join(applies.get("languages") or []) or "-" - print(f" {glyphs['bullet']} {ex.get('id', ''):<20} {ex.get('display_name', '')} ({langs})") - print(f" {len(payload)} example(s) available") - - -def _render_sandbox_inspect_plain(payload: dict, glyphs: dict) -> None: - print(f" id: {payload.get('id', '')}") - print(f" name: {payload.get('display_name', '')}") - print(f" path: {payload.get('path', '')}") - applies = payload.get("applies_when") or {} - for k, v in applies.items(): - joined = ", ".join(v) if isinstance(v, list) else str(v) - print(f" applies_when.{k}: {joined}") - if payload.get("required_tools"): - print(f" required_tools: {', '.join(payload['required_tools'])}") - if payload.get("template_vars"): - print(f" template_vars: {', '.join(payload['template_vars'])}") - if payload.get("default_ports"): - print(f" default_ports: {', '.join(str(p) for p in payload['default_ports'])}") - if payload.get("build_command"): - print(f" build_command: {payload['build_command']}") - if payload.get("test_command"): - print(f" test_command: {payload['test_command']}") - if payload.get("caveats"): - print(" Caveats:") - for c in payload["caveats"]: - print(f" {glyphs['warn']} {c}") - files = payload.get("files") or [] - if files: - cap = _SANDBOX_FILES_CAP - print(f" Files ({len(files)}):") - for f in files[:cap]: - print(f" {glyphs['bullet']} {f}") - if len(files) > cap: - print(f" ... and {len(files) - cap} more") - - -def _render_sandbox_detect_plain(payload: dict, glyphs: dict) -> None: - signals = payload.get("signals") or {} - print(" signals:") - print(f" source: {signals.get('source', '-')}") - print(f" languages: {', '.join(signals.get('languages') or []) or '-'}") - print(f" manifests: {', '.join(signals.get('manifests') or []) or '-'}") - candidates = payload.get("candidates") or [] - print(f" candidates ({len(candidates)}):") - cap = _SANDBOX_FILES_CAP - for c in candidates[:cap]: - print(f" score={c.get('score', 0):>2} {c.get('id', ''):<20} {c.get('display_name', '')}") - if len(candidates) > cap: - print(f" ... and {len(candidates) - cap} more") - - -def _render_sandbox_status_plain(payload: dict, glyphs: dict) -> None: - state_value = str(payload.get("sandbox_state", "unknown")) - last_validation = payload.get("last_validation") - gate_pass = bool(payload.get("phase2_gate_pass")) - gate_reason = str(payload.get("phase2_gate_reason", "")) - - print(f" state: {state_value}") - print(f" path: {payload.get('sandbox_path', '-')}") - print(f" provenance: {'yes' if payload.get('provenance_present') else 'no'}") - print(f" last validation: {last_validation if last_validation is not None else '-'}") - print(f" allow override: {'yes' if payload.get('allow_no_sandbox') else 'no'}") - if gate_pass: - print(C.ok(f" {glyphs['alarm']} Phase 2 gate would PASS — {gate_reason}")) - else: - print(C.warn(f" {glyphs['alarm']} Phase 2 gate would BLOCK — {gate_reason}")) - - capabilities = payload.get("capabilities") or {} - if capabilities: - print(" capabilities:") - for name in (*_SANDBOX_REQUIRED_CAPABILITIES, *_SANDBOX_HELPER_CAPABILITIES): - cap = capabilities.get(name) - if cap is None: - continue - satisfied = bool(cap.get("satisfied")) - present = bool(cap.get("present")) - is_helper = name in _SANDBOX_HELPER_CAPABILITIES - if satisfied: - marker = f"{glyphs['ok']} ok" - elif is_helper and not present: - marker = f"{glyphs['skip']} optional" - else: - marker = f"{glyphs['fail']} missing" - print(f" {name:<14} {marker:<14} {cap.get('path', '')}") - - -def _render_sandbox_apply_plain(payload: dict, subcommand: str, glyphs: dict) -> None: - apply_status = str(payload.get("status", "")) - is_dry = bool(payload.get("dry_run")) or apply_status == "dry-run" - chip_text = "DRY RUN" if is_dry else apply_status.upper() or "(unknown)" - print(f" {glyphs['box']} {subcommand} {payload.get('example', '-')} [{chip_text}]") - print(f" example_path: {payload.get('example_path', '-')}") - print(f" sandbox_path: {payload.get('sandbox_path', '-')}") - print(f" force: {payload.get('force', False)}") - if payload.get("backup_dir"): - print(f" backup_dir: {payload['backup_dir']}") - files_to_write = payload.get("files_to_write") or [] - written = payload.get("written_files") or [] - print(f" files: planned={len(files_to_write)} written={len(written)}") - markers = payload.get("markers_provided") or {} - if markers: - print(f" markers_provided ({len(markers)}):") - for k, v in markers.items(): - print(f" {k} = {v}") - unfilled = payload.get("markers_used_unfilled") or [] - if unfilled: - print(C.warn(f" {glyphs['warn']} Declared markers used but not provided: {', '.join(unfilled)}")) - undeclared = payload.get("markers_used_undeclared") or [] - if undeclared: - print(C.warn(f" {glyphs['warn']} Markers used but not declared: {', '.join(undeclared)}")) - show_files = files_to_write or written - if show_files: - cap = _SANDBOX_FILES_CAP - for f in show_files[:cap]: - print(f" {glyphs['bullet']} {f}") - if len(show_files) > cap: - print(f" ... and {len(show_files) - cap} more") - if apply_status == "applied" and not is_dry: - print(C.ok(f" {glyphs['ok']} Applied '{payload.get('example', '-')}'")) - if payload.get("provenance_path"): - print(f" provenance: {payload['provenance_path']}") - - -def _render_sandbox_validate_plain(payload: dict, glyphs: dict) -> None: - overall = str(payload.get("overall_outcome", "unknown")) - overall_glyph = glyphs["ok"] if overall == "passed" else glyphs["fail"] if overall == "failed" else glyphs["warn"] - print(f" {glyphs['check']} overall: {overall_glyph} {overall}") - tiers = payload.get("tiers") or [] - for t in tiers: - t_outcome = str(t.get("outcome", "unknown")) - o_glyph = glyphs["ok"] if t_outcome == "passed" else glyphs["fail"] if t_outcome == "failed" else glyphs["skip"] - dur = t.get("duration_seconds") - dur_str = f"{dur:.2f}s" if isinstance(dur, (int, float)) else "-" - exit_code = t.get("exit_code") - exit_str = "-" if exit_code is None else str(exit_code) - print(f" {t.get('tier', ''):<3} {str(t.get('purpose', '')):<20} " - f"{o_glyph} {t_outcome:<8} dur={dur_str:<7} exit={exit_str}") - if t_outcome == "failed": - stderr_tail = str(t.get("stderr_tail") or "").strip() - if stderr_tail: - tail_lines = stderr_tail.splitlines() - cap = _SANDBOX_VALIDATE_STDERR_LINES - shown = tail_lines[-cap:] - for line in shown: - print(f" | {line}") - if len(tail_lines) > cap: - print(f" | ... ({len(tail_lines) - cap} earlier lines truncated)") - missing = payload.get("missing_helpers") or [] - if missing: - print(C.warn(f" {glyphs['warn']} Helper capabilities still missing: {', '.join(missing)}")) - if payload.get("history_updated"): - print(f" {glyphs['info']} history updated in sandbox/CODECOME-GENERATED.md") - - -# --- Bash-shim detection ---------------------------------------------------- -# -# Some models (e.g. google/gemini-3.1-pro-preview) prefer to invoke a CLI -# helper such as `rtk read FILE`, `rtk grep PAT PATH`, `rtk ls`, plain -# `ls`, `cat`, `head`, `tail`, `find`, `tree`, or `rg` via the bash tool -# instead of using OpenCode's native Read / Grep / Glob tools. The -# wrapper detects these by inspecting the bash command and routes the -# output through the existing styled renderers, so the user sees the -# same Read / Grep / Glob panels regardless of how the agent invoked -# the operation. - -# Recognised verbs at the head of a bash command, after env assignments -# and shell wrappers like sudo / time / nice / ionice are stripped. -_BASH_SHIM_READ_VERBS = {"cat", "head", "tail"} -_BASH_SHIM_GREP_VERBS = {"rg", "grep"} -_BASH_SHIM_LS_VERBS = {"ls"} -_BASH_SHIM_FIND_VERBS = {"find", "tree"} -# Wrappers we ignore at the start of a command line. -_BASH_SHIM_LEADING_NOISE = {"sudo", "time", "nice", "ionice", "command", "env"} -# Shell metacharacters that disqualify the command from shim handling. -_BASH_SHIM_DISQUALIFIERS = ("|", ";", "&&", "||", ">", "<", "`", "$(") - - -def _strip_leading_env_and_wrappers(tokens: list[str]) -> list[str]: - """Drop leading KEY=VAL env assignments and known shell wrappers - (sudo, time, nice, ionice, command, env) so the next significant - token is the actual command verb.""" - out = list(tokens) - while out: - head = out[0] - # KEY=VAL env assignments are tokens with `=` and an UPPER_CASE - # identifier on the left. - if "=" in head and head.split("=", 1)[0].replace("_", "").isalnum(): - left = head.split("=", 1)[0] - if left and (left[0].isalpha() or left[0] == "_") and left.isupper(): - out.pop(0) - continue - if head in _BASH_SHIM_LEADING_NOISE: - # Skip wrapper plus its options (best-effort: drop only the - # wrapper itself and any -flags directly after it). - out.pop(0) - while out and out[0].startswith("-"): - out.pop(0) - continue - break - return out - - -def _bash_command_has_pipeline(command_str: str) -> bool: - """Heuristic: avoid shim handling for any pipeline / redirection / - command-substitution / background invocation.""" - for marker in _BASH_SHIM_DISQUALIFIERS: - if marker in command_str: - return True - return False - - -@dataclass -class _BashShim: - family: str # "read" | "grep" | "ls" | "find" - files: list[str] # for read family - pattern: str # for grep family - path: str # for grep / ls / find - long_format: bool # for ls family - head_limit: int | None # for `head -n N` - tail_limit: int | None # for `tail -n N` - rtk_filtered: bool # rtk read --level/--max-lines/--tail-lines present - raw_command: str - - -def _is_bash_shim_call(command_str: str) -> Optional[_BashShim]: - """Recognise bash invocations the wrapper can re-route to the - Read/Grep/Glob renderers. Returns a _BashShim, or None when the - command should be left to the generic Bash renderer.""" - if not command_str or _bash_command_has_pipeline(command_str): - return None - try: - tokens = shlex.split(command_str) - except ValueError: - return None - if not tokens: - return None - - tokens = _strip_leading_env_and_wrappers(tokens) - if not tokens: - return None - - head = tokens[0] - rest = tokens[1:] - - # rtk dispatcher: peel `rtk` and re-evaluate against the subcommand. - via_rtk = False - if head == "rtk": - if not rest: - return None - head = rest[0] - rest = rest[1:] - via_rtk = True - - if head == "read" and via_rtk: - return _parse_rtk_read(rest, command_str) - if head in _BASH_SHIM_READ_VERBS: - return _parse_cat_head_tail(head, rest, command_str) - if head == "grep" and via_rtk: - return _parse_rtk_grep(rest, command_str) - if head in _BASH_SHIM_GREP_VERBS: - return _parse_grep_or_rg(rest, command_str) - if head in _BASH_SHIM_LS_VERBS: - return _parse_ls(rest, command_str) - if head in _BASH_SHIM_FIND_VERBS: - return _parse_find_tree(head, rest, command_str) - return None - - -def _parse_rtk_read(rest: list[str], raw: str) -> Optional[_BashShim]: - """Parse `rtk read [flags] FILE [FILE...]`.""" - files: list[str] = [] - filtered = False - i = 0 - while i < len(rest): - tok = rest[i] - if tok in ("-l", "--level"): - filtered = True - if i + 1 < len(rest): - i += 2 - else: - i += 1 - continue - if tok.startswith("--level="): - filtered = True - i += 1 - continue - if tok in ("-m", "--max-lines", "--tail-lines"): - filtered = True - if i + 1 < len(rest): - i += 2 - else: - i += 1 - continue - if tok.startswith(("--max-lines=", "--tail-lines=")): - filtered = True - i += 1 - continue - if tok in ("-n", "--line-numbers", "--ultra-compact", "--skip-env"): - i += 1 - continue - if tok.startswith("-v") and all(c == "v" for c in tok[1:]): - i += 1 - continue - if tok == "--": - i += 1 - continue - if tok.startswith("-"): - # Unknown flag; skip just the flag itself. - i += 1 - continue - files.append(tok) - i += 1 - if not files: - return None - return _BashShim( - family="read", - files=files, - pattern="", - path="", - long_format=False, - head_limit=None, - tail_limit=None, - rtk_filtered=filtered, - raw_command=raw, - ) - - -def _parse_cat_head_tail(verb: str, rest: list[str], raw: str) -> Optional[_BashShim]: - """Parse `cat FILE...`, `head [-n N] FILE`, `tail [-n N] FILE`.""" - files: list[str] = [] - head_limit: Optional[int] = None - tail_limit: Optional[int] = None - i = 0 - while i < len(rest): - tok = rest[i] - if tok == "-n" and i + 1 < len(rest): - try: - count = int(rest[i + 1].lstrip("+-")) - if verb == "head": - head_limit = count - elif verb == "tail": - tail_limit = count - except ValueError: - pass - i += 2 - continue - if tok.startswith("-n") and len(tok) > 2: - try: - count = int(tok[2:].lstrip("+-")) - if verb == "head": - head_limit = count - elif verb == "tail": - tail_limit = count - except ValueError: - pass - i += 1 - continue - if tok.startswith("-") and tok != "-": - i += 1 - continue - files.append(tok) - i += 1 - if not files: - return None - return _BashShim( - family="read", - files=files, - pattern="", - path="", - long_format=False, - head_limit=head_limit, - tail_limit=tail_limit, - rtk_filtered=False, - raw_command=raw, - ) - - -def _parse_grep_or_rg(rest: list[str], raw: str) -> Optional[_BashShim]: - """Parse `rg PATTERN [PATH]` or `grep PATTERN PATH...` (best-effort).""" - # Drop common option flags so we can pull the pattern out. We don't - # need to be exhaustive: anything we miss simply falls through. - pattern = "" - path = "" - i = 0 - saw_pattern = False - while i < len(rest): - tok = rest[i] - if tok == "--": - i += 1 - continue - if tok.startswith("-") and tok != "-": - # rg/grep flags that take a value. - if tok in ("-e", "-f", "-A", "-B", "-C", "-g", "--glob", "--max-count", - "--max-depth", "-t", "--type", "--ignore-file"): - i += 2 - continue - i += 1 - continue - if not saw_pattern: - pattern = tok - saw_pattern = True - elif not path: - path = tok - i += 1 - if not saw_pattern: - return None - return _BashShim( - family="grep", - files=[], - pattern=pattern, - path=path, - long_format=False, - head_limit=None, - tail_limit=None, - rtk_filtered=False, - raw_command=raw, - ) - - -def _parse_rtk_grep(rest: list[str], raw: str) -> Optional[_BashShim]: - """Parse `rtk grep PATTERN [PATH] [extra args]`.""" - pattern = "" - path = "" - i = 0 - saw_pattern = False - while i < len(rest): - tok = rest[i] - if tok in ("-l", "--max-len", "-m", "--max", "-t", "--file-type"): - i += 2 - continue - if tok in ("-c", "--context-only", "-n", "--line-numbers", - "--ultra-compact", "--skip-env"): - i += 1 - continue - if tok.startswith("-v") and all(c == "v" for c in tok[1:]): - i += 1 - continue - if tok == "--": - i += 1 - continue - if tok.startswith("-"): - i += 1 - continue - if not saw_pattern: - pattern = tok - saw_pattern = True - elif not path: - path = tok - i += 1 - if not saw_pattern: - return None - return _BashShim( - family="grep", - files=[], - pattern=pattern, - path=path, - long_format=False, - head_limit=None, - tail_limit=None, - rtk_filtered=False, - raw_command=raw, - ) - - -def _parse_ls(rest: list[str], raw: str) -> Optional[_BashShim]: - """Parse `ls [args]`. Detect -l / -la for long format.""" - long_format = False - paths: list[str] = [] - for tok in rest: - if tok.startswith("-") and tok != "-": - if "l" in tok[1:]: - long_format = True - continue - paths.append(tok) - path = paths[0] if paths else "." - return _BashShim( - family="ls", - files=[], - pattern="", - path=path, - long_format=long_format, - head_limit=None, - tail_limit=None, - rtk_filtered=False, - raw_command=raw, - ) - - -def _parse_find_tree(verb: str, rest: list[str], raw: str) -> Optional[_BashShim]: - """Parse `find PATH [args]` or `tree [PATH]`. Output is a list of paths. - - Extracts ``-name`` / ``-iname`` filters into *pattern* so the Glob - panel header shows the actual search expression rather than the bare - verb. - """ - path: str = "" - name_filter: str = "" - # Flags whose next token is a value (not a path). - _FIND_VALUE_FLAGS = { - "-name", "-iname", "-path", "-ipath", "-regex", "-iregex", - "-type", "-maxdepth", "-mindepth", "-perm", "-user", "-group", - "-newer", "-size", "-amin", "-atime", "-cmin", "-ctime", - "-mmin", "-mtime", "-printf", "-fprintf", "-fls", - } - i = 0 - while i < len(rest): - tok = rest[i] - if tok in _FIND_VALUE_FLAGS: - # Consume the value token. - if i + 1 < len(rest): - val = rest[i + 1] - if tok in ("-name", "-iname"): - name_filter = val - i += 2 - continue - i += 1 - continue - if tok.startswith("-") and tok != "-": - # Other flags without values (e.g. -print, -delete). - i += 1 - continue - # First non-flag, non-value token is the path. - if not path: - path = tok - i += 1 - if not path: - path = "." - pattern = name_filter if name_filter else verb - return _BashShim( - family="find", - files=[], - pattern=pattern, - path=path, - long_format=False, - head_limit=None, - tail_limit=None, - rtk_filtered=False, - raw_command=raw, - ) - - -# --- Bash-shim normalizers and renderers ------------------------------------ - -_RTK_GREP_FILE_HEADER_RE = re.compile(r"^\[file\]\s+(?P.+?)\s+\((?P\d+)\)\s*:\s*$") -_RTK_GREP_LINE_RE = re.compile(r"^\s+(?P\d+):\s*(?P.*)$") - - -def _normalize_rtk_grep_output(text: str) -> str: - """Convert rtk grep grouped output to standard `path:line:text` lines. - - Input shape (from `rtk grep`): - 4 matches in 3F: - [file] tools/run-agent.py (2): - 2811: return render_grep_rich(console, state) - - Output shape (compatible with _parse_grep_output): - tools/run-agent.py:2811:return render_grep_rich(console, state) - - If no `[file] (N):` markers are found, returns the text - unchanged (no-op safe). - """ - if "[file]" not in text: - return text - lines_in = text.split("\n") - out: list[str] = [] - current_path: Optional[str] = None - found_marker = False - for line in lines_in: - m = _RTK_GREP_FILE_HEADER_RE.match(line) - if m: - current_path = m.group("path").strip() - found_marker = True - continue - n = _RTK_GREP_LINE_RE.match(line) - if n and current_path: - out.append(f"{current_path}:{n.group('lineno')}:{n.group('text')}") - continue - # Skip blanks and the "N matches in NF:" header; pass through anything else. - stripped = line.strip() - if not stripped: - continue - if re.match(r"^\d+\s+matches?\s+in\s+\d+F:\s*$", stripped): - continue - # Unknown line; drop it to keep the output clean. - if not found_marker: - return text - return "\n".join(out) + ("\n" if out else "") - - -_LS_LONG_FORMAT_RE = re.compile( - # Permissions (10-12 chars incl. trailing @/+ or "."), link count, - # user, group, size, then 2 or 3 date fields (Mon DD [YYYY|HH:MM]), - # then the filename. - r"^[\-dlbcps][rwxstST\-@\+\.]{9,11}" - r"\s+\d+\s+\S+\s+\S+\s+\d+" - r"\s+\S+\s+\S+(?:\s+\S+)?" - r"\s+(?P.+)$" -) - - -def _strip_ls_long_format_to_filenames(text: str) -> str: - """Strip `ls -l` long-format columns down to just the filename. - Lines that don't look like long-format are kept as-is. The `total N` - header line is removed.""" - out: list[str] = [] - for line in text.split("\n"): - if not line.strip(): - continue - if line.startswith("total ") and line[6:].strip().isdigit(): - continue - m = _LS_LONG_FORMAT_RE.match(line) - if m: - out.append(m.group("name").strip()) - else: - # Keep non-matching lines (might be paths separating directories - # in a multi-arg ls call). - out.append(line.rstrip()) - return "\n".join(out) - - -def _maybe_render_bash_shim(console: Optional[Any], state: dict[str, Any]) -> bool: - """Detect bash invocations of read/grep/ls-equivalent CLI helpers - (rtk *, rg, plain ls/cat/head/tail/find/tree) and route the output - through the matching styled renderer. Returns True if handled.""" - if not _BASH_SHIM_RENDER: - return False - inp = state.get("input") - if not isinstance(inp, dict): - return False - command = str(inp.get("command", "")) - shim = _is_bash_shim_call(command) - if shim is None: - return False - - output = state.get("output") - if not isinstance(output, str): - return False - - if shim.family == "read": - return _render_shim_read(console, state, shim) - if shim.family == "grep": - return _render_shim_grep(console, state, shim) - if shim.family == "ls": - return _render_shim_ls(console, state, shim) - if shim.family == "find": - return _render_shim_ls(console, state, shim) - return False - - -def _render_shim_read(console: Optional[Any], state: dict[str, Any], shim: _BashShim) -> bool: - """Synthesize a read-tool state and call render_read_*. - - For multi-file input (rtk read F1 F2, cat F1 F2), the output is - concatenated with no delimiter. We render a single combined Read - panel using the first file's path as the panel header, but we - update the read cache by re-reading each file directly from disk - so subsequent edit/write diffs see fresh content per the user - directive. - """ - raw_output = str(state.get("output") or "") - status = str(state.get("status", "")) - - # Choose the file_path for the panel: when only one file, the actual - # path. When multiple files, fall back to a synthetic descriptor. - if len(shim.files) == 1: - file_path = shim.files[0] - else: - file_path = " + ".join(shim.files) - - # Synthesize OpenCode read framing around the raw content so the - # existing renderer can parse and render without modification. - rel_for_frame = _relativize_path(shim.files[0]) if shim.files else file_path - - # Optional offset/limit from `head -n N` / `tail -n N`. - offset: Optional[int] = None - limit: Optional[int] = None - if shim.head_limit is not None: - offset = 1 - limit = shim.head_limit - elif shim.tail_limit is not None: - # We don't know the file length, so leave offset unset and let - # the renderer omit the lines header. - limit = shim.tail_limit - - framed = ( - f"{rel_for_frame}\n" - f"file\n" - f"\n{raw_output}\n" - ) - - syn_state = { - "input": {"filePath": file_path, "offset": offset, "limit": limit}, - "output": framed, - "status": status, - } - - if HAVE_RICH and console is not None: - ok = render_read_rich(console, syn_state) - else: - ok = render_read_plain(syn_state) - - if not ok: - return False - - # Cache update: when filtering flags are present, or there are - # multiple files (no reliable per-file content boundaries), re-read - # each file directly from disk so the cache stays accurate. - if shim.rtk_filtered or len(shim.files) > 1: - for f in shim.files: - full = f if os.path.isabs(f) else os.path.join(ROOT, f) - _cache_reread(full) - return True - - -def _render_shim_grep(console: Optional[Any], state: dict[str, Any], shim: _BashShim) -> bool: - raw_output = str(state.get("output") or "") - normalized = _normalize_rtk_grep_output(raw_output) - - # If the normalizer found rtk-style markers but produced no rows, - # something is unexpected; fall back to bash renderer. - if "[file]" in raw_output and not normalized.strip(): - return False - - syn_state = { - "input": {"pattern": shim.pattern, "path": shim.path}, - "output": normalized, - "status": str(state.get("status", "")), - } - if HAVE_RICH and console is not None: - return render_grep_rich(console, syn_state) - return render_grep_plain(syn_state) - - -def _render_shim_ls(console: Optional[Any], state: dict[str, Any], shim: _BashShim) -> bool: - raw_output = str(state.get("output") or "") - if shim.long_format and _BASH_SHIM_LS_STRIP_LONG_FORMAT: - body = _strip_ls_long_format_to_filenames(raw_output) - else: - body = raw_output - pattern_label = "ls" if shim.family == "ls" else shim.pattern - syn_state = { - "input": {"pattern": pattern_label, "path": shim.path}, - "output": body, - "status": str(state.get("status", "")), - } - if HAVE_RICH and console is not None: - return render_glob_rich(console, syn_state) - return render_glob_plain(syn_state) - - -# --- Subagent summary helper -------------------------------------------------- - -def _format_subagent_summary(summary: Any) -> str: - """Format a Session.summary dict into a compact '+N -M K files' string.""" - if not isinstance(summary, dict): - return "" - additions = summary.get("additions") - deletions = summary.get("deletions") - files = summary.get("files") - parts: list[str] = [] - if additions is not None or deletions is not None: - parts.append(f"+{additions or 0} -{deletions or 0}") - if files is not None: - parts.append(f"{files} file(s)") - return " ".join(parts) - - -# --- Task renderer ------------------------------------------------------------ - -def render_task_rich(console: Console, state: dict[str, Any]) -> bool: - inp = state.get("input") - if not isinstance(inp, dict): - return False - - description = str(inp.get("description", "")) - subagent_type = str(inp.get("subagent_type", inp.get("subagentType", ""))) - prompt = str(inp.get("prompt", "")) - status = str(state.get("status", "unknown")) - border = "green" if status == "completed" else "yellow" - - sections: list[Any] = [] - if description: - type_tag = f" [{subagent_type}]" if subagent_type else "" - sections.append(Text(f"{description}{type_tag}", style="bold cyan")) - - if prompt: - sections.append(Text()) - prompt_lines = prompt.split("\n") - preview_lines = prompt_lines[:_TASK_PROMPT_PREVIEW_LINES] - leftover = max(0, len(prompt_lines) - _TASK_PROMPT_PREVIEW_LINES) - sections.append(Text("\n".join(preview_lines), style="dim")) - if leftover > 0: - sections.append(Text(f"... {leftover} more lines", style="dim")) - - output_data = state.get("output") - if output_data is not None: - sections.append(Text()) - sections.append(Text("Output", style="bold green")) - output_str = str(output_data) - if len(output_str) > 200: - output_str = output_str[:200] + "..." - sections.append(Text(output_str, style="dim")) - - console.print( - Panel(Group(*sections), title=Text(f"Task [{status}]"), border_style=border, expand=True) - ) - return True - - -def render_task_plain(state: dict[str, Any]) -> bool: - inp = state.get("input") - if not isinstance(inp, dict): - return False - - description = str(inp.get("description", "")) - subagent_type = str(inp.get("subagent_type", inp.get("subagentType", ""))) - prompt = str(inp.get("prompt", "")) - status = str(state.get("status", "unknown")) - - type_tag = f" [{subagent_type}]" if subagent_type else "" - print(C.header(f"task {description}{type_tag} [{status}]")) - - if prompt: - prompt_lines = prompt.split("\n") - for line in prompt_lines[:_TASK_PROMPT_PREVIEW_LINES]: - print(f" {line}") - leftover = max(0, len(prompt_lines) - _TASK_PROMPT_PREVIEW_LINES) - if leftover > 0: - print(f" ... {leftover} more lines") - - output_data = state.get("output") - if output_data is not None: - print(C.info("Output")) - output_str = str(output_data) - if len(output_str) > 200: - output_str = output_str[:200] + "..." - print(f" {output_str}") - - return True - - -# --- Skill renderer ----------------------------------------------------------- - -def render_skill_rich(console: Console, state: dict[str, Any]) -> bool: - inp = state.get("input") - if not isinstance(inp, dict): - return False - - name = str(inp.get("name", "")) - if not name: - label = "(unknown skill)" - style = "dim" - else: - label = f"loaded skill: {name}" - style = "" - - console.print(Panel(Text(label, style=style), title="Skill", border_style="dim", expand=True)) - return True - - -def render_skill_plain(state: dict[str, Any]) -> bool: - inp = state.get("input") - if not isinstance(inp, dict): - return False - - name = str(inp.get("name", "")) - if not name: - print(C.header("skill (unknown)")) - else: - print(C.header(f"skill {name}")) - return True - - -# --- Tool dispatch ------------------------------------------------------------ - -def _dispatch_tool_renderer(console: Console, tool: str, state: dict[str, Any]) -> bool: - """Try tool-specific rendering. Returns True if handled.""" - tool_lower = tool.strip().lower() - if tool_lower == "todowrite": - from rendering.tools.todo import TodoRenderer - return TodoRenderer(_get_rendering_ctx(console)).render(tool_lower, state) - elif tool_lower == "read": - _cache_invalidate_stale() - from rendering.tools.read import ReadRenderer - return ReadRenderer(_get_rendering_ctx(console)).render(tool_lower, state) - elif tool_lower == "write": - from rendering.tools.write import WriteRenderer - return WriteRenderer(_get_rendering_ctx(console)).render(tool_lower, state) - elif tool_lower == "edit": - from rendering.tools.edit import EditRenderer - return EditRenderer(_get_rendering_ctx(console)).render(tool_lower, state) - elif tool_lower in ("apply_patch", "applypatch", "apply-patch"): - from rendering.tools.apply_patch import ApplyPatchRenderer - return ApplyPatchRenderer(_get_rendering_ctx(console)).render(tool_lower, state) - elif tool_lower == "glob": - _cache_invalidate_stale() - from rendering.tools.glob import GlobRenderer - return GlobRenderer(_get_rendering_ctx(console)).render(tool_lower, state) - elif tool_lower == "grep": - _cache_invalidate_stale() - from rendering.tools.grep import GrepRenderer - return GrepRenderer(_get_rendering_ctx(console)).render(tool_lower, state) - elif tool_lower == "bash": - _cache_invalidate_stale() - from rendering.tools.command import CommandRenderer - return CommandRenderer(_get_rendering_ctx(console)).render(tool_lower, state) - elif tool_lower == "skill": - _cache_invalidate_stale() - from rendering.tools.skill import SkillRenderer - return SkillRenderer(_get_rendering_ctx(console)).render(tool_lower, state) - elif tool_lower == "task": - _cache_invalidate_stale() - from rendering.tools.task import TaskRenderer - return TaskRenderer(_get_rendering_ctx(console)).render(tool_lower, state) - else: - _cache_invalidate_stale() - return False - - -def render_step_start(console: Console, phase: str, label: str, event: dict[str, Any]) -> None: - step_type = event.get("part", {}).get("type", "step-start") - if HAVE_RICH: - console.print(Text(f"[{phase}] {label}: {step_type}", style="cyan")) - else: - print(C.info(f"[{phase}] {label}: {step_type}")) - - -def render_text(console: Console, event: dict[str, Any]) -> None: - part = event.get("part", {}) - text = str(part.get("text", "")).strip() - if not text: - return - if HAVE_RICH: - console.print(Panel(Markdown(text), title="Assistant", border_style="blue", expand=True)) - else: - print(C.header("Assistant")) - print(text) - - -def render_reasoning(console: Console, event: dict[str, Any]) -> None: - """Render a model's reasoning/thinking block. - - OpenCode emits these only when --thinking is on AND the part is - finalized (part.time?.end set). The wrapper draws them as a - visually-subordinate variant of the Assistant panel. - """ - if not _RENDER_REASONING: - return - part = event.get("part", {}) - text = str(part.get("text", "")).strip() - if not text: - return - - truncated_note = "" - if len(text) > _REASONING_MAX_CHARS: - cut = len(text) - _REASONING_MAX_CHARS - text = text[:_REASONING_MAX_CHARS] - truncated_note = f"\n\n... ({cut} chars truncated)" - - if HAVE_RICH: - body_md = Markdown(text) - if truncated_note: - sections: list[Any] = [body_md, Text(truncated_note.strip(), style="dim")] - body: Any = Group(*sections) - else: - body = body_md - console.print( - Panel( - body, - title="Thinking", - border_style="blue", - expand=True, - style="dim", - ) - ) - else: - print(C.header("Thinking")) - print(text) - if truncated_note: - print(truncated_note.strip()) - - -def render_tool_use(console: Console, event: dict[str, Any]) -> None: - part = event.get("part", {}) - tool = str(part.get("tool", "unknown")) - state = part.get("state", {}) if isinstance(part.get("state"), dict) else {} - status = str(state.get("status", "unknown")) - input_data = state.get("input") - output_data = state.get("output") - - if _dispatch_tool_renderer(console, tool, state): - return - - if HAVE_RICH: - sections: list[Any] = [] - if input_data is not None: - sections.append(Text("Input", style="bold cyan")) - try: - sections.append(JSON.from_data(input_data)) - except Exception: - sections.append(Text(str(input_data))) - - if output_data is not None: - if sections: - sections.append(Text()) - sections.append(Text("Output", style="bold green")) - if isinstance(output_data, (dict, list)): - try: - sections.append(JSON.from_data(output_data)) - except Exception: - sections.append(Text(str(output_data))) - else: - sections.append(Text(str(output_data))) - - body = Group(*sections) if sections else Text("No tool payload", style="dim") - title = f"Tool: {tool} [{status}]" - border = "green" if status == "completed" else "yellow" - console.print(Panel(body, title=title, border_style=border, expand=True)) - else: - print(C.header(f"Tool: {tool} [{status}]")) - if input_data is not None: - print(C.info("Input")) - print(json.dumps(input_data, indent=2) if isinstance(input_data, (dict, list)) else str(input_data)) - if output_data is not None: - print(C.info("Output")) - print(json.dumps(output_data, indent=2) if isinstance(output_data, (dict, list)) else str(output_data)) - - -def _extract_tool_permission_error(event: dict[str, Any]) -> Optional[str]: - """Return a human-readable permission rejection summary for a tool_use error. - - The OpenCode stream reports rejected approvals as tool_use events with - state.status == "error" and an error string mentioning permission rejection. - When this occurs near the end of a turn, we should report that explicit - cause instead of a generic "mid-turn" truncation message. - """ - if event.get("type") != "tool_use": - return None - part = event.get("part") - if not isinstance(part, dict): - return None - state = part.get("state") - if not isinstance(state, dict): - return None - if str(state.get("status", "")) != "error": - return None - - err = str(state.get("error", "")).strip() - low = err.lower() - if "rejected permission" not in low and "permission" not in low: - return None - - tool_name = str(part.get("tool", "tool")).strip() or "tool" - input_data = state.get("input") - if isinstance(input_data, dict): - # Prefer path-like identifiers for file-oriented tools. - for key in ("filePath", "path", "selector"): - value = input_data.get(key) - if isinstance(value, str) and value.strip(): - return f"tool permission rejected: {tool_name} {value.strip()}" - # Bash tool: - cmd = input_data.get("command") - if isinstance(cmd, str) and cmd.strip(): - return f"tool permission rejected: {tool_name} `{cmd.strip()}`" - - return f"tool permission rejected: {tool_name}" - - -def render_step_finish(console: Console, event: dict[str, Any]) -> None: - part = event.get("part", {}) - reason = str(part.get("reason", "unknown")) - tokens = format_tokens(part.get("tokens", {})) - suffix = f" ({tokens})" if tokens else "" - style = "dim" - if reason in _FINISH_FAILURE: - style = "bold red" - if HAVE_RICH: - console.print(Text(f"step finished: {reason}{suffix}", style=style)) - else: - if reason in _FINISH_FAILURE: - print(C.fail(f"step finished: {reason}{suffix}")) - else: - print(f"step finished: {reason}{suffix}") - - -def render_unknown(console: Console, event: dict[str, Any]) -> None: - event_type = event.get("type", "") - # For message.part.updated, surface the actual unknown part type. - if event_type == "message.part.updated": - part_type = event.get("part", {}).get("type", "") - message = f"unknown part type: {part_type}" - else: - message = f"unknown event type: {event_type}" - if HAVE_RICH: - console.print(Text(message, style="dim")) - else: - print(message) - if _DEBUG_UNKNOWN_EVENTS: - payload = json.dumps(event, indent=2, default=str) - if HAVE_RICH: - console.print(Text(payload, style="dim")) - else: - print(payload) - - -def render_server_connected(console: Console, event: dict[str, Any]) -> None: - message = "connected to opencode event stream" - if HAVE_RICH: - console.print(Text(message, style="dim")) - else: - print(C.info(message)) - - -def render_server_heartbeat(console: Console, event: dict[str, Any]) -> None: - message = "server heartbeat" - if HAVE_RICH: - console.print(Text(message, style="dim")) - else: - print(C.info(message)) - - -def render_session_diff(console: Console, event: dict[str, Any]) -> None: - properties = event.get("properties", {}) - diff = properties.get("diff", []) - if not isinstance(diff, list) or not diff: - return - count = len(diff) - message = f"session diff updated: {count} file{'s' if count != 1 else ''}" - if HAVE_RICH: - console.print(Text(message, style="dim")) - else: - print(C.info(message)) - - -def render_message_updated(console: Console, event: dict[str, Any]) -> None: - # Extract info from either event.info (sync-synthesized) or - # event.properties.info (raw SSE stream). - info = event.get("info") - if not isinstance(info, dict): - props = event.get("properties", {}) - info = props.get("info", {}) if isinstance(props, dict) else {} - if not isinstance(info, dict): - info = {} - - role = str(info.get("role", "")) - tokens = info.get("tokens", {}) if isinstance(info.get("tokens"), dict) else {} - has_tokens = isinstance(tokens, dict) and ( - tokens.get("input", 0) or tokens.get("output", 0) or tokens.get("reasoning", 0) - ) - - # Suppress in-progress messages — only render "complete" ones that - # carry a summary, a finish reason, or non-zero tokens. This keeps - # the RichLog clean and avoids the flood of intermediate lifecycle - # events the SSE stream emits for every message state change. - has_summary = "summary" in info or "finish" in info - if not has_summary and not has_tokens: - return - - cache = tokens.get("cache", {}) if isinstance(tokens, dict) else {} - cost = info.get("cost", 0) or 0 - - # Extract model identifier from whichever field shape is present. - model_id = str(info.get("modelID", "")).strip() - provider_id = str(info.get("providerID", "")).strip() - if not model_id: - mdl = info.get("model", {}) - if isinstance(mdl, dict): - model_id = str(mdl.get("modelID", "")).strip() - provider_id = str(mdl.get("providerID", "")).strip() - model_label = f"{provider_id}/{model_id}" if provider_id and model_id else model_id - - if role == "user": - # User prompt acknowledged — short, dim, no model spam. - message = "> User" - style = "dim" - elif role == "assistant": - if has_tokens: - # Complete message — show model and token-count summary. - _in = tokens.get("input", 0) - _out = tokens.get("output", 0) - _reasoning = tokens.get("reasoning", 0) - _cache_read = cache.get("read", 0) if isinstance(cache, dict) else 0 - token_parts = [f"↑{_in} ↓{_out}"] - if _reasoning: - token_parts.append(f"R{_reasoning}") - if _cache_read: - token_parts.append(f"cache read {_cache_read}") - token_str = ", ".join(token_parts) - cost_str = f", ${cost:.4f}" if cost else "" - message = f"> Assistant · {model_label} ({token_str}{cost_str})" - style = "bold blue" - else: - # Complete message without token info (shouldn't normally - # happen after the has_summary check above, but kept as - # a safe fallback). - message = f"> Assistant · {model_label}" if model_label else "> Assistant" - style = "bold blue" - else: - # Fallback — unrecognised role, show what we have. - agent = str(info.get("agent", "assistant")) - message = f"> {agent} · {model_label}" if model_label else f"> {agent}" - style = "bold blue" - - if HAVE_RICH: - console.print(Text(message, style=style)) - else: - print(C.header(message)) - - -def render_error(console: Console, event: dict[str, Any]) -> None: - """Render a session.error event from the OpenCode JSON stream. - - Border is yellow (alarm) and the message body is rendered red. - Distinct from tool failures (which are red borders on tool panels) - and from completed-but-truncated runs (which are red banners). - """ - err = event.get("error") - msg_parts: list[str] = [] - if isinstance(err, dict): - # Common shapes: {"name": "...", "message": "..."} or - # {"name": "...", "data": {"message": "..."}} - name = err.get("name") - if isinstance(name, str) and name: - msg_parts.append(name) - data = err.get("data") - if isinstance(data, dict): - data_msg = data.get("message") - if isinstance(data_msg, str) and data_msg: - msg_parts.append(data_msg) - elif isinstance(err.get("message"), str): - msg_parts.append(err["message"]) - elif isinstance(err, str): - msg_parts.append(err) - - text = ": ".join(msg_parts) if msg_parts else "(no error message)" - - if HAVE_RICH: - console.print( - Panel( - Text(text, style="red"), - title="Error", - border_style="yellow", - expand=True, - ) - ) - else: - print(C.warn("Error")) - print(C.fail(text)) - - -def render_session_status(console: Console, event: dict[str, Any]) -> None: - properties = event.get("properties", {}) - status = properties.get("status", {}) - status_type = status.get("type") - - if status_type == "retry": - attempt = status.get("attempt", 1) - message = status.get("message", "Unknown error") - text = f"⏳ Waiting for LLM provider response (retry attempt {attempt}): {message}" - if HAVE_RICH: - console.print(Text(text, style="bold yellow")) - else: - print(C.warn(text)) - elif status_type == "busy": - text = "session status: busy" - if HAVE_RICH: - console.print(Text(text, style="dim")) - else: - print(C.info(text)) - elif status_type == "idle": - text = "session status: idle" - if HAVE_RICH: - console.print(Text(text, style="dim")) - else: - print(C.info(text)) - - -def render_subagent_status(console: Console, event: dict[str, Any]) -> None: - """Render a subagent.status event injected by the StatusForwarder plugin. - - The plugin emits these events for subagent lifecycle (created/updated/ - deleted) and heartbeats so that run-agent.py can show real-time progress - while child sessions work in parallel. - """ - if not _RENDER_SUBAGENT_UPDATES: - return - - properties = event.get("properties", {}) - status_type = str(properties.get("statusType", "")) - session_id = str(properties.get("sessionID", "")) - title = str(properties.get("title", "(untitled)")) - summary = properties.get("summary") - elapsed_ms = properties.get("elapsedMs") - - # Deduplicate unchanged update snapshots to avoid flooding the UI. - if status_type == "updated": - snapshot: dict[str, Any] = {"title": title} - if isinstance(summary, dict): - snapshot["additions"] = summary.get("additions") - snapshot["deletions"] = summary.get("deletions") - snapshot["files"] = summary.get("files") - - last_snapshot, last_time = _SUBAGENT_LAST_STATE.get(session_id, ({}, 0.0)) - now = time.time() - # Identical snapshot inside the throttle window -> suppress. - if ( - last_snapshot == snapshot - and (now - last_time) < _SUBAGENT_UPDATE_THROTTLE_S - ): - return - - _SUBAGENT_LAST_STATE[session_id] = (snapshot, now) - - if status_type == "created": - if HAVE_RICH: - console.print( - Panel( - Text(title, style="bold cyan"), - title="Subagent started", - border_style="cyan", - expand=True, - ) - ) - else: - print(C.header(f"[subagent] started: {title}")) - elif status_type == "finished": - if HAVE_RICH: - console.print( - Panel( - Text(title, style="bold cyan"), - title="Subagent finished", - border_style="green", - expand=True, - ) - ) - else: - print(C.ok(f"[subagent] finished: {title}")) - elif status_type == "heartbeat" and elapsed_ms is not None: - elapsed_s = elapsed_ms // 1000 - text = f"⏳ Subagent · {title} still running ({elapsed_s}s)" - if HAVE_RICH: - console.print(Text(text, style="bold yellow")) - else: - print(C.warn(text)) - elif status_type == "updated": - summary_text = _format_subagent_summary(summary) - line = f"Subagent · {title}" - if summary_text: - line += f" {summary_text}" - if HAVE_RICH: - console.print(Text(line, style="dim")) - else: - print(f" {line}") - - -# --------------------------------------------------------------------------- -# Chat mode: Textual TUI + multi-turn event loop -# --------------------------------------------------------------------------- - -class TextualConsoleProxy: - """Bridge Rich Console.print() calls to a Textual RichLog widget. - - Thread-safe: main-thread calls write directly to RichLog; background- - thread calls post a RenderMessage which is dispatched on the main - thread by the @on(RenderMessage) handler. This is the pattern from - Textual docs (post_message is thread-safe). - """ - - def __init__(self, rich_log, app): - self.rich_log = rich_log - self.app = app - self.encoding = "utf-8" - - def print(self, *args, **kwargs): - if not args: - from rich.text import Text - self._write(Text()) - return - if len(args) == 1: - self._write(args[0]) - else: - from rich.console import Group - self._write(Group(*args)) - - def _write(self, renderable): - import threading - if threading.current_thread() is threading.main_thread(): - _chat_debug("TextualConsoleProxy._write: main thread, direct write") - self.rich_log.write(renderable) - else: - _chat_debug("TextualConsoleProxy._write: bg thread, post_message(RenderMessage)") - self.app.post_message(self.app.RenderMessage(renderable)) - - -ChatApp: Any = None -QuitScreen: Any = None - - -# Standalone chat-app methods — available even when Textual is not -# installed, so that tests can exercise _render_and_log parity without -# launching a real TUI. - -def _chat_render_and_log(self, console, phase, label, event): - """Standalone version of _ChatApp._render_and_log. See the docstring - on the class for the full contract.""" - if getattr(self, "transcript_fp", None) is not None: - try: - self.transcript_fp.write(json.dumps(event) + "\n") - except OSError: - pass - if getattr(self, "args", None) is not None and getattr(self.args, "debug", False): - _chat_debug(f"_render_and_log: raw event: {json.dumps(event)}") - if event.get("type") == "message.updated": - _chat_update_modeline_info(self, event) - if not getattr(self, "thinking_on", True) and event.get("type") == "reasoning": - return - render_event(console, phase, label, event) - - -def _chat_update_modeline_info(self, event: dict[str, Any]) -> None: - """Standalone version of _ChatApp._update_modeline_info.""" - info = event.get("info") - if not isinstance(info, dict): - props = event.get("properties", {}) - info = props.get("info", {}) if isinstance(props, dict) else {} - if not isinstance(info, dict): - return - if info.get("role") != "assistant": - return - model_id = str(info.get("modelID", "")).strip() - provider_id = str(info.get("providerID", "")).strip() - if not model_id: - mdl = info.get("model", {}) - if isinstance(mdl, dict): - model_id = str(mdl.get("modelID", "")).strip() - provider_id = str(mdl.get("providerID", "")).strip() - model_label = f"{provider_id}/{model_id}" if provider_id and model_id else (model_id or "…") - tokens = info.get("tokens", {}) - if isinstance(tokens, dict): - _in = tokens.get("input", 0) - _out = tokens.get("output", 0) - token_str = f"↑{_in} ↓{_out}" - else: - token_str = "" - cost = info.get("cost", 0) or 0 - cost_str = f" ${cost:.4f}" if cost else "" - try: - self._modeline_info = f"{model_label} | {token_str}{cost_str}" - except AttributeError: - pass - - -try: - from textual import on, work - from textual.app import App, ComposeResult - from textual.message import Message - from textual.widgets import RichLog, Input, Footer, Static, Button, Label - from textual.binding import Binding - from textual.containers import Grid, Horizontal - from textual.screen import ModalScreen - - class _QuitScreen(ModalScreen[bool]): - CSS = """ - _QuitScreen { - align: center middle; - } - #quit-dialog { - grid-size: 2; - grid-gutter: 1 2; - grid-rows: 1fr 3; - padding: 0 1; - width: 60; - height: 11; - border: thick $background 80%; - background: $surface; - } - #quit-question { - column-span: 2; - height: 1fr; - width: 1fr; - content-align: center middle; - } - Button { - width: 100%; - } - """ - - def compose(self) -> ComposeResult: - yield Grid( - Label("Are you sure you want to quit?", id="quit-question"), - Button("Quit", id="quit-confirm", variant="error"), - Button("Cancel", id="quit-cancel", variant="primary"), - id="quit-dialog", - ) - - def on_button_pressed(self, event: Button.Pressed) -> None: - self.dismiss(event.button.id == "quit-confirm") - - class _ChatApp(App): - """Interactive chat harness — final design (post-bisection). - - Design follows Textual docs (https://textual.textualize.io/guide/workers): - - * The SSE consumer runs in a raw daemon thread (started via - chat_loop.start_consumer). Textual's @work(thread=True) is - reserved for short-lived blocking tasks (the docs' weather- - app pattern); using it for an infinite consumer loop froze - the main event loop in our environment (Textual 8.2.6 / - Python 3.14). - - * All UI updates from background threads (renderables AND - state markers AND errors) go through ONE one-argument - Message subclass (RenderMessage(renderable)) and ONE @on - handler that just calls rich_log.write. post_message is - documented as thread-safe. Bisection found that any - departure from this exact shape (adding a second Message - subclass, renaming it, adding optional fields, or even - adding a second set_interval callback) silently freezes - Textual's message dispatch on this version, even though - the same patterns work in isolated repros. We don't - understand the root cause; staying inside this working - envelope is the pragmatic path forward. - - * _render_and_log mirrors phase mode's behaviour exactly - (parity with non-interactive runs). Per-event side effects: - persist to the transcript jsonl, mirror raw JSON to the - chat-debug log when --debug is set, suppress 'reasoning' - when thinking is off, then delegate to the SAME - render_event() dispatcher non-chat uses. No chat-specific - filters or markers — `render_session_status` already - prints `session status: busy/idle` and that's the only - state cue we surface. We do NOT toggle the Input widget's - enabled/placeholder state, because doing that required a - second set_interval poller which broke dispatch in our - bisection. The Input stays enabled at all times. - - * Errors from @work workers post a red Panel renderable via - _post_error_renderable() — same RenderMessage path. - - * Short-lived HTTP calls (initial prompt, user prompt send) - run as @work(thread=True) workers — the canonical docs - pattern (matches the weather-app example). - - * The transcript jsonl is opened in _run_chat_mode and the - file handle is passed in via the `transcript_fp` constructor - argument; _render_and_log writes one JSON line per SSE - event to it (parity with phase mode). - - * A set_interval(1.0) heartbeat continuously logs a debug - tick from the main thread and also updates the bottom-bar - status line (modeline) with live token usage and an - activity pulse. The modeline data is fed by - _render_and_log on every message.updated event. - """ - - CSS = """ - RichLog { - height: 1fr; - border-bottom: solid green; - background: black; - } - Input { - height: 3; - } - #bottom-bar { - dock: bottom; - height: 1; - background: $footer-background; - } - #status-left { - width: auto; - min-width: 26; - height: 1; - padding: 0 1; - color: $footer-foreground; - background: $footer-background; - } - #footer-right { - width: 1fr; - height: 1; - } - Footer { - dock: none; - } - """ - - # Ctrl+S toggles Textual's mouse capture so the user can use the - # terminal's native mouse selection (which produces system-clipboard - # copy via the terminal emulator). RichLog has no in-app selection - # support upstream, so terminal-native selection is the supported - # path. See .project/chat-mode-textual-postmortem.md §4 / §12. - BINDINGS = [ - Binding("ctrl+c", "request_quit", "Quit"), - Binding("ctrl+s", "toggle_mouse_for_select", "Select mode"), - ] - - class RenderMessage(Message): - """Single thread-safe message type — carries a Rich renderable - to be written to the RichLog on the main thread. - - Bisection showed that extending this class with optional - fields (`state`, `detail`) silently breaks Textual's message - dispatch on this version (Textual 8.2.6 / Python 3.14), even - though the same pattern works in isolation. Whatever the - root cause, we keep this class strictly one-argument - (positional, `renderable`) and use a thread-safe pending-state - slot + main-thread polling timer for idle/busy/error - transitions instead. - """ - - def __init__(self, renderable): - super().__init__() - self.renderable = renderable - - def __init__(self, server_info=None, session_id=None, initial_prompt="", args=None, model=None, variant=None, thinking_on=None, transcript_fp=None): - super().__init__() - self.server_info = server_info - self.session_id = session_id - self.initial_prompt = initial_prompt - self.args = args - self.model = model - self.variant = variant - self.thinking_on = thinking_on - self.transcript_fp = transcript_fp - self.chat_loop = None - self.console_proxy = None - self.rich_log = None - self.chat_input = None - self.modeline = None - self._heartbeat_count = 0 - # Updated by _render_and_log (consumer thread) on every - # message.updated event. Read by _heartbeat (main thread) - # to drive the status-line in the bottom bar. - self._modeline_info = "" - # Tracks Ctrl+S terminal-select mode. When True, Textual mouse - # handling is disabled so the terminal emulator's native mouse - # selection works (which copies to the system clipboard via the - # terminal itself). Default off (Textual mouse handling on). - self._terminal_select_mode = False - - def compose(self) -> ComposeResult: - yield RichLog(id="log", markup=False, auto_scroll=True) - yield Input(id="chat_input", placeholder="Type a message and press Enter...") - with Horizontal(id="bottom-bar"): - yield Static("ready", id="status-left") - yield Footer(id="footer-right") - - def on_mount(self) -> None: - _chat_debug("on_mount: entering") - self.rich_log = self.query_one(RichLog) - self.chat_input = self.query_one(Input) - self.modeline = self.query_one("#status-left", Static) - self.console_proxy = TextualConsoleProxy(self.rich_log, self) - _chat_debug("on_mount: proxy created") - - # Set initial modeline with model/agent info. - provider = (self.model or "").split("/", 1)[0] if self.model else "" - _model_id = (self.model or "").split("/", 1)[1] if self.model and "/" in self.model else (self.model or "…") - model_label = f"{provider}/{_model_id}" if provider else _model_id - self.modeline.update(f"● | {model_label} | ready") - - # Heartbeat canary — fires every 1s on the main thread. Helpful - # in the debug log to confirm the event loop is alive. - self.set_interval(1.0, self._heartbeat) - _chat_debug("on_mount: heartbeat installed") - - # Write banner (main thread, direct write). - if HAVE_RICH: - from rich.rule import Rule - self.rich_log.write(Rule(title="Chat: Interactive Harness", style="bold cyan"), expand=True) - model_label = self.model or "(unknown)" - variant_label = self.variant or "(unknown)" - parts = [f"agent={self.args.agent if self.args else '?'}", f"model={model_label}"] - if self.variant is not None: - parts.append(f"variant={variant_label}") - parts.append(f"thinking={'on' if self.thinking_on else 'off'}") - self.rich_log.write(Text(" ".join(parts), style="dim"), expand=True) - # Hint about selection: RichLog doesn't support in-app - # mouse selection upstream; document the terminal-native - # path so users can copy output. - self.rich_log.write( - Text( - "Tip: hold Option/Alt (macOS) or Shift (most terminals) " - "while dragging to select text, or press Ctrl+S to toggle " - "terminal-select mode (disables Textual mouse).", - style="dim italic", - ), - expand=True, - ) - _chat_debug("on_mount: banner written") - - # Construct the chat event loop. - from events.chat_loop import ChatEventLoop - _chat_debug("on_mount: creating ChatEventLoop") - self.chat_loop = ChatEventLoop( - base_url=self.server_info.base_url, - session_id=self.session_id, - console=self.console_proxy, - auth_token=self.server_info.password, - workspace_dir=str(ROOT), - debug=_chat_debug if self.args and self.args.debug else None, - ) - - # Raw daemon thread — the SSE consumer. - _chat_debug("on_mount: starting SSE consumer (raw daemon thread)") - self.chat_loop.start_consumer(self._render_and_log) - _chat_debug("on_mount: consumer thread started") - - # Initial prompt: send via worker but don't echo the full text. - # The prompt comes from prompts/chat-initial.md (bootstrap - # instructions for the agent, not something the user typed). - # The SSE stream will emit a dim `> User` summary line once the - # daemon acknowledges the message, matching subsequent prompts. - if self.initial_prompt: - self.rich_log.write(Text("(initializing session\u2026)", style="bold cyan"), expand=True) - _chat_debug(f"on_mount: spawning initial-prompt worker ({len(self.initial_prompt)} chars)") - self._send_initial_prompt(self.initial_prompt) - - _chat_debug("on_mount: done") - - # --- Main-thread heartbeat canary --- - - def _heartbeat(self) -> None: - self._heartbeat_count += 1 - _chat_debug(f"_heartbeat: tick #{self._heartbeat_count} (main loop alive)") - - # Update the bottom-bar status line (modeline) with live - # token usage and an activity pulse. _modeline_info is - # written by _render_and_log on the consumer thread on - # every message.updated event; we read it here atomically. - pulse = "●" if self._heartbeat_count % 2 else "◌" - sel_tag = " [SEL]" if self._terminal_select_mode else "" - info = self._modeline_info or "" - if info: - text = f"{pulse}{sel_tag} | {info}" - else: - provider = (self.model or "").split("/", 1)[0] if self.model else "" - _model_id = (self.model or "").split("/", 1)[1] if self.model and "/" in self.model else (self.model or "…") - model_label = f"{provider}/{_model_id}" if provider else _model_id - text = f"{pulse}{sel_tag} | {model_label} | idle" - self.modeline.update(text) - - # --- Textual workers (@work(thread=True)) — short-lived only --- - - @work(thread=True) - def _send_initial_prompt(self, text) -> None: - """Send the initial prompt in a Textual-managed thread.""" - _chat_debug("_send_initial_prompt: worker started") - try: - self.chat_loop.send_prompt( - text, - self.args.agent if self.args else "auditor", - self.model, - self.variant, - ) - _chat_debug("_send_initial_prompt: sent") - except Exception as exc: - _chat_debug(f"_send_initial_prompt: error: {exc}") - self._post_error_renderable(f"Failed to send initial prompt: {exc}") - - @work(thread=True) - def _send_prompt(self, text) -> None: - """Send a user prompt in a Textual-managed thread.""" - _chat_debug(f"_send_prompt: worker posting text len={len(text)}") - try: - self.chat_loop.send_prompt( - text, - self.args.agent if self.args else "auditor", - self.model, - self.variant, - ) - _chat_debug("_send_prompt: sent") - except Exception as exc: - _chat_debug(f"_send_prompt: error: {exc}") - self._post_error_renderable(f"Failed to send: {exc}") - - def _post_error_renderable(self, detail: str) -> None: - """Helper callable from any thread. Posts a RenderMessage - carrying a red error panel — sent through the same single - RenderMessage(renderable) path as everything else.""" - from rich.panel import Panel - panel = Panel(Text(detail, style="bold red"), title="Chat Error", border_style="red") - self.post_message(self.RenderMessage(panel)) - - # --- Message handler (run on main thread). Single handler, - # single Message subclass — see RenderMessage docstring. - - @on(RenderMessage) - def _on_render_message(self, message: RenderMessage) -> None: - if self.rich_log is not None: - self.rich_log.write(message.renderable, expand=True) - - # --- Consumer-thread callback --- - - def _render_and_log(self, console, phase, label, event): - _chat_render_and_log(self, console, phase, label, event) - - def _update_modeline_info(self, event: dict[str, Any]) -> None: - _chat_update_modeline_info(self, event) - - # --- UI actions --- - - def action_request_quit(self) -> None: - def finish_quit(confirmed): - if confirmed: - self.exit() - self.push_screen(_QuitScreen(), finish_quit) - - def action_toggle_mouse_for_select(self) -> None: - """Toggle terminal-native mouse selection mode (Ctrl+S). - - RichLog has no upstream support for in-app mouse text - selection. As a pragmatic alternative, this action toggles - Textual's mouse reporting off so the terminal emulator's - native mouse selection takes over (which copies to the - system clipboard via the terminal itself). - - When off (default): Textual handles mouse, terminal-native - drag is intercepted. Hold Option/Alt (macOS) or Shift - (most terminals) while dragging to bypass Textual without - toggling. - - When on: mouse reporting is disabled at the terminal level. - User can click-drag to select, and Cmd+C / Ctrl+Shift+C in - the terminal copies to the clipboard. Textual mouse - interactions (scrolling, clicking widgets) won't work until - toggled back. - """ - driver = self._driver - if driver is None: - return - if not self._terminal_select_mode: - # Enter terminal-select mode: turn off Textual mouse. - try: - driver._disable_mouse_support() - except Exception: - return - self._terminal_select_mode = True - hint = Text( - "[select mode ON] Textual mouse disabled. " - "Click-drag to select; copy via terminal " - "(Cmd+C on macOS / Ctrl+Shift+C on Linux). " - "Press Ctrl+S again to exit.", - style="bold yellow", - ) - self.rich_log.write(hint, expand=True) - else: - # Exit terminal-select mode: turn Textual mouse back on. - try: - driver._enable_mouse_support() - except Exception: - return - self._terminal_select_mode = False - hint = Text( - "[select mode OFF] Textual mouse re-enabled.", - style="bold yellow", - ) - self.rich_log.write(hint, expand=True) - - async def on_input_submitted(self, message: Input.Submitted) -> None: - """Handle Enter on the chat Input — send the typed prompt - through the @work(thread=True) _send_prompt worker. - - The Input is NOT disabled while sending — bisection found - that toggling the Input's disabled/placeholder state from - outside this handler (via a poller) broke Textual dispatch - on this version. Keeping the input always-enabled is fine - in practice; the user just sees their next input echoed - after the previous response.""" - text = message.value.strip() - if not text: - return - self.chat_input.value = "" - self.rich_log.write("", expand=True) - self.rich_log.write(Text(f"User: {text}", style="bold cyan"), expand=True) - self._send_prompt(text) - - ChatApp = _ChatApp - QuitScreen = _QuitScreen -except ImportError: - pass - - - - +from codecome.cli import main if __name__ == "__main__": - try: - raise SystemExit(main()) - except KeyboardInterrupt: - raise SystemExit(130) - except SystemExit: - raise - except Exception as exc: # noqa: BLE001 - print(C.fail(f"Fatal Error: {exc}"), file=sys.stderr) - if truthy_env("CODECOME_DEBUG"): - traceback.print_exc(file=sys.stderr) - raise SystemExit(1) + raise SystemExit(main()) From 23edfe556ab8dfcd343fc6a5eefa4cb25c3a0d45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 16:53:23 +0200 Subject: [PATCH 33/65] Move phase event loop into dedicated module --- tools/events/phase_loop.py | 207 +++++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 tools/events/phase_loop.py diff --git a/tools/events/phase_loop.py b/tools/events/phase_loop.py new file mode 100644 index 0000000..4fb0826 --- /dev/null +++ b/tools/events/phase_loop.py @@ -0,0 +1,207 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +PhaseEventLoop — single-attempt phase SSE consumer. + +Consumes the OpenCode SSE stream for one session, emits finalized events, +performs catch-up sync around reconnect/idle, and returns RunResult for +phase completion logic. +""" + +from __future__ import annotations + +import dataclasses +from typing import Any, Callable + +from events.sse_client import SseClient, SseClientError +from events.base import BaseEventLoop +from events.emitters import emit_event + + +@dataclasses.dataclass(frozen=True) +class RunResult: + """Signals returned by PhaseEventLoop.run() for termination logic.""" + + any_step_finish_seen: bool = False + step_finish_count: int = 0 + last_finish_reason: str | None = None + last_finish_tokens: dict[str, Any] = dataclasses.field(default_factory=dict) + last_permission_error: str | None = None + last_session_id: str | None = None + + +class PhaseEventLoop(BaseEventLoop): + """Consume the SSE stream for a single session and drive rendering.""" + + def __init__( + self, + base_url: str, + session_id: str, + console: Any, + phase: str, + label: str, + *, + auth_token: str | None = None, + workspace_dir: str | None = None, + ) -> None: + super().__init__(base_url, session_id, console, + auth_token=auth_token, workspace_dir=workspace_dir) + self.phase = phase + self.label = label + self._pending_recovery_sync = False + self._idle_event_to_sync_and_emit: dict[str, Any] | None = None + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def run( + self, + render_fn: Callable[[Any, str, str, dict[str, Any]], None], + ) -> RunResult: + _any_step_finish_seen = False + _step_finish_count = 0 + _last_finish_reason: str | None = None + _last_finish_tokens: dict[str, Any] = {} + _last_permission_error: str | None = None + + self._client = SseClient( + self.base_url, + auth_token=self.auth_token, + workspace_dir=self.workspace_dir, + reconnect=True, + max_reconnects=10, + on_reconnect=self.trigger_recovery_sync, + ) + + try: + for event in self._client.events(): + if self._stopped: + break + + if not self._belongs_to_session(event): + continue + + if event.get("type") == "permission.asked": + self._handle_permission(event) + perm_err = self._extract_permission_error(event) + if perm_err: + _last_permission_error = perm_err + continue + + _is_idle = self._is_session_idle(event) + if _is_idle and self._idle_event_to_sync_and_emit is None: + self._idle_event_to_sync_and_emit = event + + finalized_events = self._tracker.ingest(event) + + if self._should_sync_session_messages(event): + finalized_events.extend(self._sync_session_messages()) + + if self._idle_event_to_sync_and_emit is not None: + finalized_events = [ + fe for fe in finalized_events + if not ( + fe.get("type") == "session.idle" or + (fe.get("type") == "session.status" and + fe.get("properties", {}).get("status", {}).get("type") == "idle") + ) + ] + + for fe in finalized_events: + sig = (fe.get("type", ""), fe.get("part", {}).get("id", "")) + if sig[1] and sig in self._emitted_signatures: + continue + self._emitted_signatures.add(sig) + _any_step_finish_seen, _step_finish_count, _last_finish_reason, _last_finish_tokens = self._update_result( + fe, _any_step_finish_seen, _step_finish_count, _last_finish_reason, _last_finish_tokens + ) + emit_event(render_fn, self.console, self.phase, self.label, fe) + + if self._is_session_idle(event): + self._idle_event_to_sync_and_emit = None + self._sync_session_messages() + idle_sig = (event.get("type", ""), event.get("properties", {}).get("sessionID", "")) + if idle_sig[1] and idle_sig in self._emitted_signatures: + pass + else: + if idle_sig[1]: + self._emitted_signatures.add(idle_sig) + emit_event(render_fn, self.console, self.phase, self.label, event) + return self._build_result( + _any_step_finish_seen, _step_finish_count, + _last_finish_reason, _last_finish_tokens, + _last_permission_error, self.session_id, + ) + + except SseClientError: + pass + + return self._build_result( + any_step_finish_seen=_any_step_finish_seen, + step_finish_count=_step_finish_count, + last_finish_reason=_last_finish_reason, + last_finish_tokens=_last_finish_tokens, + last_permission_error=_last_permission_error, + last_session_id=self.session_id, + ) + + def trigger_recovery_sync(self) -> None: + self._pending_recovery_sync = True + + # ------------------------------------------------------------------ + # Phase-specific helpers + # ------------------------------------------------------------------ + + @staticmethod + def _build_result( + any_step_finish_seen: bool, + step_finish_count: int, + last_finish_reason: str | None, + last_finish_tokens: dict[str, Any], + last_permission_error: str | None, + last_session_id: str | None, + ) -> RunResult: + return RunResult( + any_step_finish_seen=any_step_finish_seen, + step_finish_count=step_finish_count, + last_finish_reason=last_finish_reason, + last_finish_tokens=last_finish_tokens, + last_permission_error=last_permission_error, + last_session_id=last_session_id, + ) + + def _should_sync_session_messages(self, event: dict[str, Any]) -> bool: + if self._pending_recovery_sync: + self._pending_recovery_sync = False + return True + event_type = event.get("type", "") + if event_type == "session.idle": + return True + if event_type == "session.status": + status = event.get("properties", {}).get("status", {}) + if status.get("type") == "idle": + return True + return False + + def _update_result( + self, + event: dict[str, Any], + any_step_finish_seen: bool, + step_finish_count: int, + last_finish_reason: str | None, + last_finish_tokens: dict[str, Any], + ) -> tuple[bool, int, str | None, dict[str, Any]]: + event_type = event.get("type", "") + if event_type == "step_finish": + any_step_finish_seen = True + step_finish_count += 1 + part = event.get("part", {}) + reason = part.get("reason") + if isinstance(reason, str): + last_finish_reason = reason + tokens = part.get("tokens") + if isinstance(tokens, dict): + last_finish_tokens = tokens + return any_step_finish_seen, step_finish_count, last_finish_reason, last_finish_tokens From 1cb8ad77a6aae15a2c570d74802cd193c4fa20a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 16:54:14 +0200 Subject: [PATCH 34/65] Slim events package exports --- tools/events/__init__.py | 208 ++------------------------------------- 1 file changed, 6 insertions(+), 202 deletions(-) diff --git a/tools/events/__init__.py b/tools/events/__init__.py index e976f7d..fe58a53 100644 --- a/tools/events/__init__.py +++ b/tools/events/__init__.py @@ -2,213 +2,17 @@ # SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later """ -Event loop coordinator: consumes SSE, accumulates state, maps events, -and emits them to the existing render pipeline. +Events package public exports. -Entry point: - event_loop = PhaseEventLoop(base_url, session_id, console, phase, label) - result = event_loop.run(render_event_fn) +PhaseEventLoop lives in events.phase_loop. EventLoop remains as a +backward-compatible alias for older imports. """ from __future__ import annotations -import dataclasses -import time -from typing import Any, Callable - -from events.sse_client import SseClient, SseClientError -from events.base import BaseEventLoop -from events.emitters import emit_event - - -@dataclasses.dataclass(frozen=True) -class RunResult: - """Signals returned by PhaseEventLoop.run() for termination logic.""" - any_step_finish_seen: bool = False - step_finish_count: int = 0 - last_finish_reason: str | None = None - last_finish_tokens: dict[str, Any] = dataclasses.field(default_factory=dict) - last_permission_error: str | None = None - last_session_id: str | None = None - - -class PhaseEventLoop(BaseEventLoop): - """Consume the SSE stream for a single session and drive rendering.""" - - def __init__( - self, - base_url: str, - session_id: str, - console: Any, - phase: str, - label: str, - *, - auth_token: str | None = None, - workspace_dir: str | None = None, - ) -> None: - super().__init__(base_url, session_id, console, - auth_token=auth_token, workspace_dir=workspace_dir) - self.phase = phase - self.label = label - self._pending_recovery_sync = False - self._emitted_signatures: set[tuple[str, str]] = set() - self._idle_event_to_sync_and_emit: dict[str, Any] | None = None - - # ------------------------------------------------------------------ - # Public API - # ------------------------------------------------------------------ - - def run( - self, - render_fn: Callable[[Any, str, str, dict[str, Any]], None], - ) -> RunResult: - _any_step_finish_seen = False - _step_finish_count = 0 - _last_finish_reason: str | None = None - _last_finish_tokens: dict[str, Any] = {} - _last_permission_error: str | None = None - - self._client = SseClient( - self.base_url, - auth_token=self.auth_token, - workspace_dir=self.workspace_dir, - reconnect=True, - max_reconnects=10, - on_reconnect=self.trigger_recovery_sync, - ) - - try: - for event in self._client.events(): - if self._stopped: - break - - if not self._belongs_to_session(event): - continue - - if event.get("type") == "permission.asked": - self._handle_permission(event) - perm_err = self._extract_permission_error(event) - if perm_err: - _last_permission_error = perm_err - continue - - _is_idle = self._is_session_idle(event) - if _is_idle and self._idle_event_to_sync_and_emit is None: - self._idle_event_to_sync_and_emit = event - - finalized_events = self._tracker.ingest(event) - - if self._should_sync_session_messages(event): - finalized_events.extend(self._sync_session_messages()) - - if self._idle_event_to_sync_and_emit is not None: - finalized_events = [ - fe for fe in finalized_events - if not ( - fe.get("type") == "session.idle" or - (fe.get("type") == "session.status" and - fe.get("properties", {}).get("status", {}).get("type") == "idle") - ) - ] - - for fe in finalized_events: - sig = (fe.get("type", ""), fe.get("part", {}).get("id", "")) - if sig[1] and sig in self._emitted_signatures: - continue - self._emitted_signatures.add(sig) - _any_step_finish_seen, _step_finish_count, _last_finish_reason, _last_finish_tokens = self._update_result( - fe, _any_step_finish_seen, _step_finish_count, _last_finish_reason, _last_finish_tokens - ) - emit_event(render_fn, self.console, self.phase, self.label, fe) - - if self._is_session_idle(event): - idle_event = self._idle_event_to_sync_and_emit - self._idle_event_to_sync_and_emit = None - self._sync_session_messages() - idle_sig = (event.get("type", ""), event.get("properties", {}).get("sessionID", "")) - if idle_sig[1] and idle_sig in self._emitted_signatures: - pass - else: - if idle_sig[1]: - self._emitted_signatures.add(idle_sig) - emit_event(render_fn, self.console, self.phase, self.label, event) - return self._build_result( - _any_step_finish_seen, _step_finish_count, - _last_finish_reason, _last_finish_tokens, - _last_permission_error, self.session_id, - ) - - except SseClientError: - pass - - return self._build_result( - any_step_finish_seen=_any_step_finish_seen, - step_finish_count=_step_finish_count, - last_finish_reason=_last_finish_reason, - last_finish_tokens=_last_finish_tokens, - last_permission_error=_last_permission_error, - last_session_id=self.session_id, - ) - - def trigger_recovery_sync(self) -> None: - self._pending_recovery_sync = True - - # ------------------------------------------------------------------ - # Phase-specific helpers - # ------------------------------------------------------------------ - - @staticmethod - def _build_result( - any_step_finish_seen: bool, - step_finish_count: int, - last_finish_reason: str | None, - last_finish_tokens: dict[str, Any], - last_permission_error: str | None, - last_session_id: str | None, - ) -> RunResult: - return RunResult( - any_step_finish_seen=any_step_finish_seen, - step_finish_count=step_finish_count, - last_finish_reason=last_finish_reason, - last_finish_tokens=last_finish_tokens, - last_permission_error=last_permission_error, - last_session_id=last_session_id, - ) - - def _should_sync_session_messages(self, event: dict[str, Any]) -> bool: - if self._pending_recovery_sync: - self._pending_recovery_sync = False - return True - event_type = event.get("type", "") - if event_type == "session.idle": - return True - if event_type == "session.status": - status = event.get("properties", {}).get("status", {}) - if status.get("type") == "idle": - return True - return False - - def _update_result( - self, - event: dict[str, Any], - any_step_finish_seen: bool, - step_finish_count: int, - last_finish_reason: str | None, - last_finish_tokens: dict[str, Any], - ) -> tuple[bool, int, str | None, dict[str, Any]]: - event_type = event.get("type", "") - if event_type == "step_finish": - any_step_finish_seen = True - step_finish_count += 1 - part = event.get("part", {}) - reason = part.get("reason") - if isinstance(reason, str): - last_finish_reason = reason - tokens = part.get("tokens") - if isinstance(tokens, dict): - last_finish_tokens = tokens - return any_step_finish_seen, step_finish_count, last_finish_reason, last_finish_tokens - +from events.phase_loop import PhaseEventLoop, RunResult # Backward-compatibility alias. EventLoop = PhaseEventLoop + +__all__ = ["EventLoop", "PhaseEventLoop", "RunResult"] From 3d34cc46f9fa0032780657228034201d011bdf5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 16:54:58 +0200 Subject: [PATCH 35/65] Add chat reconnect recovery sync --- tools/events/chat_loop.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/tools/events/chat_loop.py b/tools/events/chat_loop.py index c3188ed..a8d6bd2 100644 --- a/tools/events/chat_loop.py +++ b/tools/events/chat_loop.py @@ -19,14 +19,12 @@ import json import queue import threading -import time import urllib.error import urllib.request from typing import Any, Callable from events.base import BaseEventLoop from events.sse_client import SseClient, SseClientError -from events.state_tracker import StateTracker from events.emitters import emit_event @@ -64,6 +62,7 @@ def __init__( # Coordination with TUI self._state_queue: queue.Queue[tuple[str, Any | None]] = queue.Queue() self._consumer_thread: threading.Thread | None = None + self._pending_recovery_sync = False # ------------------------------------------------------------------ # Public API @@ -182,9 +181,16 @@ def _consumer_worker(self, render_fn: Callable[[Any, str, str, dict[str, Any]], self._handle_permission(event) continue + if self._pending_recovery_sync: + self._pending_recovery_sync = False + for synced_event in self._sync_session_messages(): + self._emit_finalized_event(render_fn, synced_event) + if self._is_session_idle(event): if self.debug: self.debug("_consumer_worker: session idle detected") + for synced_event in self._sync_session_messages(): + self._emit_finalized_event(render_fn, synced_event) self._emit_event(render_fn, event) self._state_queue.put((ChatState.IDLE, None)) continue @@ -194,11 +200,7 @@ def _consumer_worker(self, render_fn: Callable[[Any, str, str, dict[str, Any]], finalized_events = self._tracker.ingest(event) for fe in finalized_events: - sig = (fe.get("type", ""), fe.get("part", {}).get("id", "")) - if sig[1] and sig in self._emitted_signatures: - continue - self._emitted_signatures.add(sig) - self._emit_event(render_fn, fe) + self._emit_finalized_event(render_fn, fe) except SseClientError as exc: msg = f"SSE connection lost: {exc}" @@ -228,10 +230,21 @@ def _is_session_busy(event: dict[str, Any]) -> bool: return status.get("type") == "busy" return False + def _emit_finalized_event( + self, + render_fn: Callable[[Any, str, str, dict[str, Any]], None], + event: dict[str, Any], + ) -> None: + sig = (event.get("type", ""), event.get("part", {}).get("id", "")) + if sig[1] and sig in self._emitted_signatures: + return + self._emitted_signatures.add(sig) + self._emit_event(render_fn, event) + def _emit_event(self, render_fn: Callable[[Any, str, str, dict[str, Any]], None], event: dict[str, Any]) -> None: """Emit a single event through the render pipeline.""" emit_event(render_fn, self.console, "Chat", "Interactive Chat", event) def _trigger_recovery_sync(self) -> None: """Called by SseClient after reconnection.""" - pass + self._pending_recovery_sync = True From 76d43748be98cd374a6a684d3cd71dee7c6e4b3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 16:56:38 +0200 Subject: [PATCH 36/65] Use CLI renderer directly from chat app --- tools/chat/app.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tools/chat/app.py b/tools/chat/app.py index 63d51e6..a2c7c7d 100644 --- a/tools/chat/app.py +++ b/tools/chat/app.py @@ -24,12 +24,7 @@ sys.path.insert(0, str(Path(__file__).resolve().parents[1])) from chat.debug import _chat_debug # noqa: E402 -import importlib as _importlib # noqa: E402 -# TODO(phase-a6): Break this circular dependency. chat.app should not -# dynamically import run-agent; render_event should be injected as a -# constructor dependency once the events/renderer refactor lands. -_run_agent = _importlib.import_module("codecome.cli_render") -render_event = _run_agent.render_event +from codecome.cli_render import render_event # noqa: E402 # --------------------------------------------------------------------------- # Rich imports — same fallback pattern as run-agent.py From c6113a942a716de63c73a8febfb586cdf3038aae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 16:57:07 +0200 Subject: [PATCH 37/65] Clarify runner render dispatcher dependency --- tools/codecome/runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/codecome/runner.py b/tools/codecome/runner.py index 61d9fc5..47dcf53 100644 --- a/tools/codecome/runner.py +++ b/tools/codecome/runner.py @@ -34,7 +34,7 @@ def _consume_events( thinking_on: bool, auth_token: str | None, workspace_dir: str | None, - render_event_fn: Callable[..., None], # run-agent's render_event dispatcher + render_event_fn: Callable[..., None], # CLI/rendering event dispatcher ) -> RunResult: event_loop = EventLoop( base_url=base_url, @@ -72,7 +72,7 @@ def _run_single_attempt( base_url: str, auth_token: str | None, workspace_dir: str | None, - render_event_fn: Callable[..., None], # run-agent's render_event dispatcher + render_event_fn: Callable[..., None], # CLI/rendering event dispatcher emit_fatal_error_fn: Callable[..., None] | None = None, existing_session_id: str | None = None, ) -> tuple[int, str, RunResult, Path]: From b979ed0ba2409e3084be29ac6b8124a1ed661452 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 16:57:44 +0200 Subject: [PATCH 38/65] Add event loop unit tests --- tests/test_events_loops.py | 110 +++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 tests/test_events_loops.py diff --git a/tests/test_events_loops.py b/tests/test_events_loops.py new file mode 100644 index 0000000..1b8c878 --- /dev/null +++ b/tests/test_events_loops.py @@ -0,0 +1,110 @@ +import queue +import sys +from pathlib import Path +from unittest.mock import MagicMock + +sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "tools")) + +from events.base import BaseEventLoop +from events.chat_loop import ChatEventLoop, ChatState +from events.phase_loop import PhaseEventLoop, RunResult + + +def test_base_event_loop_session_filter_and_idle_detection(): + loop = BaseEventLoop("http://server", "session-1", console=None) + + assert loop._belongs_to_session({"properties": {"sessionID": "session-1"}}) + assert not loop._belongs_to_session({"properties": {"sessionID": "other"}}) + assert loop._belongs_to_session({"properties": {}}) + + assert loop._is_session_idle({"type": "session.idle"}) + assert loop._is_session_idle({"type": "session.status", "properties": {"status": {"type": "idle"}}}) + assert not loop._is_session_idle({"type": "session.status", "properties": {"status": {"type": "busy"}}}) + + +def test_base_event_loop_headers_include_auth_and_workspace(): + loop = BaseEventLoop( + "http://server", + "session-1", + console=None, + auth_token="secret", + workspace_dir="/tmp/workspace", + ) + + headers = loop._get_headers() + + assert headers["Content-Type"] == "application/json" + assert headers["Authorization"].startswith("Basic ") + assert headers["x-opencode-directory"] == "/tmp/workspace" + + +def test_phase_event_loop_returns_result_on_idle(monkeypatch): + events = [ + {"type": "session.status", "properties": {"sessionID": "session-1", "status": {"type": "busy"}}}, + {"type": "step_finish", "part": {"id": "finish-1", "reason": "stop", "tokens": {"output": 3}}}, + {"type": "session.status", "properties": {"sessionID": "session-1", "status": {"type": "idle"}}}, + ] + + class FakeSseClient: + def __init__(self, *args, **kwargs): + pass + + def events(self): + yield from events + + def stop(self): + pass + + monkeypatch.setattr("events.phase_loop.SseClient", FakeSseClient) + loop = PhaseEventLoop("http://server", "session-1", console=None, phase="1", label="Recon") + monkeypatch.setattr(loop, "_sync_session_messages", lambda: []) + + rendered = [] + + def render_fn(console, phase, label, event): + rendered.append((phase, label, event)) + + result = loop.run(render_fn) + + assert isinstance(result, RunResult) + assert result.any_step_finish_seen is True + assert result.step_finish_count == 1 + assert result.last_finish_reason == "stop" + assert result.last_finish_tokens == {"output": 3} + assert rendered[-1][2]["properties"]["status"]["type"] == "idle" + + +def test_chat_event_loop_recovery_sync_emits_synced_events(monkeypatch): + events = [ + {"type": "session.status", "properties": {"sessionID": "session-1", "status": {"type": "busy"}}}, + {"type": "session.status", "properties": {"sessionID": "session-1", "status": {"type": "idle"}}}, + ] + + class FakeSseClient: + def __init__(self, *args, **kwargs): + self.on_reconnect = kwargs.get("on_reconnect") + + def events(self): + if self.on_reconnect: + self.on_reconnect() + yield from events + + def stop(self): + pass + + monkeypatch.setattr("events.chat_loop.SseClient", FakeSseClient) + loop = ChatEventLoop("http://server", "session-1", console=None) + synced = {"type": "text", "part": {"id": "synced-text"}, "content": "missed"} + monkeypatch.setattr(loop, "_sync_session_messages", lambda: [synced]) + + rendered = [] + + def render_fn(console, phase, label, event): + rendered.append(event) + + loop._consumer_worker(render_fn) + + assert synced in rendered + assert any(event.get("type") == "session.status" and event.get("properties", {}).get("status", {}).get("type") == "idle" for event in rendered) + assert loop.get_state(timeout=0.1)[0] == ChatState.BUSY + assert loop.get_state(timeout=0.1)[0] == ChatState.IDLE From 49714bb266704f1c103beda08668dd0c9a7aadd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 17:00:21 +0200 Subject: [PATCH 39/65] Preserve events SseClient compatibility export --- tools/events/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/events/__init__.py b/tools/events/__init__.py index fe58a53..b5ed24d 100644 --- a/tools/events/__init__.py +++ b/tools/events/__init__.py @@ -5,14 +5,16 @@ Events package public exports. PhaseEventLoop lives in events.phase_loop. EventLoop remains as a -backward-compatible alias for older imports. +backward-compatible alias for older imports. SseClient is also re-exported +for older tests and integrations that monkeypatch events.SseClient. """ from __future__ import annotations +from events.sse_client import SseClient, SseClientError from events.phase_loop import PhaseEventLoop, RunResult # Backward-compatibility alias. EventLoop = PhaseEventLoop -__all__ = ["EventLoop", "PhaseEventLoop", "RunResult"] +__all__ = ["EventLoop", "PhaseEventLoop", "RunResult", "SseClient", "SseClientError"] From 02dfcb98596e9e267e54b5f957ea7ac3df2c0172 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 17:01:20 +0200 Subject: [PATCH 40/65] Use compatibility SseClient export in phase loop --- tools/events/phase_loop.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/events/phase_loop.py b/tools/events/phase_loop.py index 4fb0826..0681d09 100644 --- a/tools/events/phase_loop.py +++ b/tools/events/phase_loop.py @@ -14,7 +14,7 @@ import dataclasses from typing import Any, Callable -from events.sse_client import SseClient, SseClientError +from events.sse_client import SseClientError from events.base import BaseEventLoop from events.emitters import emit_event @@ -66,7 +66,12 @@ def run( _last_finish_tokens: dict[str, Any] = {} _last_permission_error: str | None = None - self._client = SseClient( + # Resolve through the package export so legacy tests/integrations + # that monkeypatch events.SseClient keep working after the + # PhaseEventLoop move to events.phase_loop. + import events as _events_pkg + + self._client = _events_pkg.SseClient( self.base_url, auth_token=self.auth_token, workspace_dir=self.workspace_dir, From 948608838d3b0947736a7833da17bc65d31c1227 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 17:04:15 +0200 Subject: [PATCH 41/65] Patch event loop test through compatibility export --- tests/test_events_loops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_events_loops.py b/tests/test_events_loops.py index 1b8c878..bdbb24c 100644 --- a/tests/test_events_loops.py +++ b/tests/test_events_loops.py @@ -55,7 +55,7 @@ def events(self): def stop(self): pass - monkeypatch.setattr("events.phase_loop.SseClient", FakeSseClient) + monkeypatch.setattr("events.SseClient", FakeSseClient) loop = PhaseEventLoop("http://server", "session-1", console=None, phase="1", label="Recon") monkeypatch.setattr(loop, "_sync_session_messages", lambda: []) From 5fe9191b15ac43fd2eacb58ae91cb1e29133ebe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 17:47:24 +0200 Subject: [PATCH 42/65] Keep codecome package init lightweight --- tools/codecome/__init__.py | 67 +++++++------------------------------- 1 file changed, 11 insertions(+), 56 deletions(-) diff --git a/tools/codecome/__init__.py b/tools/codecome/__init__.py index 020dc05..e2003bd 100644 --- a/tools/codecome/__init__.py +++ b/tools/codecome/__init__.py @@ -1,63 +1,18 @@ # Copyright (C) 2025-2026 Pablo Ruiz García # SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later -""" -CodeCome core package: config, session, runner, graceful, transcript, version. +"""CodeCome core package. + +Import concrete helpers from their owning modules, for example: + + from codecome.config import load_prompt + from codecome.session import create_session + from codecome.runner import _run_single_attempt + +This package initializer intentionally stays lightweight to avoid hidden +import cycles between CLI, runner, rendering, chat, and event modules. """ from __future__ import annotations -from codecome.config import ( - load_prompt, - resolve_color_mode, - resolve_model_and_variant, - resolve_runtime_model_for_banner, - resolve_thinking_decision, - show_model_table, - truthy_env, -) -from codecome.graceful import ( - build_frontmatter_resume_prompt, - build_phase_resume_prompt, - build_resume_command, - check_phase_graceful_completion, - phase_checklist_lines, -) -from codecome.session import ( - create_chat_session, - create_session, - send_prompt_to_session, -) -from codecome.transcript import ( - close_transcript, - open_chat_transcript, - open_phase_transcript, -) -from codecome.version import check_opencode_version - -__all__ = [ - # config - "truthy_env", - "resolve_color_mode", - "load_prompt", - "resolve_model_and_variant", - "resolve_runtime_model_for_banner", - "resolve_thinking_decision", - "show_model_table", - # session - "create_session", - "create_chat_session", - "send_prompt_to_session", - # graceful - "check_phase_graceful_completion", - "phase_checklist_lines", - "build_phase_resume_prompt", - "build_frontmatter_resume_prompt", - "build_resume_command", - # transcript - "open_phase_transcript", - "open_chat_transcript", - "close_transcript", - # version - "check_opencode_version", -] +__all__: list[str] = [] From 55e1fb5a330f55c700cc185566067ef5eab0492c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 17:49:45 +0200 Subject: [PATCH 43/65] Remove legacy events compatibility exports --- tools/events/__init__.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tools/events/__init__.py b/tools/events/__init__.py index b5ed24d..336dda1 100644 --- a/tools/events/__init__.py +++ b/tools/events/__init__.py @@ -1,20 +1,16 @@ # Copyright (C) 2025-2026 Pablo Ruiz García # SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later -""" -Events package public exports. +"""Events package public exports. -PhaseEventLoop lives in events.phase_loop. EventLoop remains as a -backward-compatible alias for older imports. SseClient is also re-exported -for older tests and integrations that monkeypatch events.SseClient. +Phase-specific imports should use events.phase_loop directly. EventLoop is +kept only as the public phase-loop alias used by the current runner. """ from __future__ import annotations -from events.sse_client import SseClient, SseClientError from events.phase_loop import PhaseEventLoop, RunResult -# Backward-compatibility alias. EventLoop = PhaseEventLoop -__all__ = ["EventLoop", "PhaseEventLoop", "RunResult", "SseClient", "SseClientError"] +__all__ = ["EventLoop", "PhaseEventLoop", "RunResult"] From ced9187bb71e8ffd332f71bcc55bb0576c7da152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 17:50:30 +0200 Subject: [PATCH 44/65] Use direct SseClient dependency in phase loop --- tools/events/phase_loop.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tools/events/phase_loop.py b/tools/events/phase_loop.py index 0681d09..4fb0826 100644 --- a/tools/events/phase_loop.py +++ b/tools/events/phase_loop.py @@ -14,7 +14,7 @@ import dataclasses from typing import Any, Callable -from events.sse_client import SseClientError +from events.sse_client import SseClient, SseClientError from events.base import BaseEventLoop from events.emitters import emit_event @@ -66,12 +66,7 @@ def run( _last_finish_tokens: dict[str, Any] = {} _last_permission_error: str | None = None - # Resolve through the package export so legacy tests/integrations - # that monkeypatch events.SseClient keep working after the - # PhaseEventLoop move to events.phase_loop. - import events as _events_pkg - - self._client = _events_pkg.SseClient( + self._client = SseClient( self.base_url, auth_token=self.auth_token, workspace_dir=self.workspace_dir, From 102a11efcb4fd51c8faff61ef22d5e2b9683021d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 17:51:09 +0200 Subject: [PATCH 45/65] Patch event loop tests through concrete modules --- tests/test_events_loops.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_events_loops.py b/tests/test_events_loops.py index bdbb24c..28513c3 100644 --- a/tests/test_events_loops.py +++ b/tests/test_events_loops.py @@ -1,7 +1,5 @@ -import queue import sys from pathlib import Path -from unittest.mock import MagicMock sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "tools")) @@ -55,7 +53,7 @@ def events(self): def stop(self): pass - monkeypatch.setattr("events.SseClient", FakeSseClient) + monkeypatch.setattr("events.phase_loop.SseClient", FakeSseClient) loop = PhaseEventLoop("http://server", "session-1", console=None, phase="1", label="Recon") monkeypatch.setattr(loop, "_sync_session_messages", lambda: []) From 934b337c6cd0c0563204cac6402bec64369ba0a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 17:52:05 +0200 Subject: [PATCH 46/65] Simplify seen part handling in base event loop --- tools/events/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/events/base.py b/tools/events/base.py index aeeb377..295e360 100644 --- a/tools/events/base.py +++ b/tools/events/base.py @@ -158,7 +158,6 @@ def _sync_session_messages(self) -> list[dict[str, Any]]: continue part_id = part.get("id") if isinstance(part_id, str) and self._tracker.has_seen(part_id): - self._tracker.mark_seen(part_id) continue synthesized = { "type": "message.part.updated", From 8070080ae189ee3253f39739c5ebabd6e22563a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 17:53:32 +0200 Subject: [PATCH 47/65] Add tools architecture guide --- tools/AGENTS.md | 94 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 tools/AGENTS.md diff --git a/tools/AGENTS.md b/tools/AGENTS.md new file mode 100644 index 0000000..4cb9d63 --- /dev/null +++ b/tools/AGENTS.md @@ -0,0 +1,94 @@ +# Tools Architecture Guide + +This directory contains CodeCome's local tooling: phase runners, rendering, event loops, chat UI, sandbox helpers, and finding/itemdb scripts. + +These rules are intended to keep the tooling modular and prevent new monoliths from forming. + +## Entry points + +Historical executable scripts should stay thin. + +- `tools/run-agent.py` is only a compatibility entry point. It should delegate to `codecome.cli.main()` and contain no phase, rendering, event-loop, or chat logic. +- New runner logic belongs under `tools/codecome/`. +- Do not add new implementation logic to wrapper scripts unless the script is intentionally standalone and out of scope for the core runner. + +## CodeCome core package + +Use concrete modules rather than broad package re-exports. + +- CLI parsing and top-level phase flow: `tools/codecome/cli.py`. +- Single-attempt phase execution: `tools/codecome/runner.py`. +- Rendering dispatcher and console construction: `tools/codecome/cli_render.py`. +- Configuration, prompt, model, variant, thinking, and color resolution: `tools/codecome/config.py`. +- OpenCode HTTP session/prompt helpers: `tools/codecome/session.py`. +- Phase completion and resume/repair prompts: `tools/codecome/graceful.py`. +- Transcript helpers: `tools/codecome/transcript.py`. +- Version checks: `tools/codecome/version.py`. + +`tools/codecome/__init__.py` must stay lightweight. Internal code should import from the concrete module that owns the functionality. + +## Rendering + +Rendering code belongs under `tools/rendering/`. + +- Runtime rendering state belongs in `RenderContext`. +- Rendering settings belong in `RenderSettings`. +- File snapshot/diff state belongs in `SnapshotCache`. +- Output destinations are represented by sinks (`PlainSink`, `RichConsoleSink`, `TextualRichLogSink`). +- Generic event renderers live in `tools/rendering/events.py`. +- Tool renderers live under `tools/rendering/tools/`. +- Command execution interceptors live under `tools/rendering/tools/interceptors/`. + +Renderers should receive the normalized event/tool-state dictionaries emitted by the event layer. Do not introduce a second event object model unless there is a clear need. + +Rich and Textual output should share renderer logic where possible. The sink decides where renderables are written; renderers may emit arbitrary Rich renderables when the sink supports them. + +## Command rendering + +CodeCome-specific command rendering is intentional product behavior. + +Special handling for commands such as sandbox bootstrap, `rtk read`, `rtk grep`, `rg`, `ls`, `find`, or `tree` should be implemented as `CommandExecutionInterceptor` classes under `tools/rendering/tools/interceptors/` rather than hidden inside a generic bash renderer. + +## Event loops + +Event consumption code belongs under `tools/events/`. + +- Shared SSE/session/dedup/permission/sync logic belongs in `BaseEventLoop`. +- Phase lifecycle logic belongs in `PhaseEventLoop` (`tools/events/phase_loop.py`). +- Multi-turn chat lifecycle logic belongs in `ChatEventLoop` (`tools/events/chat_loop.py`). +- `events.__init__` should only expose the public phase-loop alias and basic package exports. + +Avoid adding phase-specific behavior to `BaseEventLoop` and avoid duplicating session sync or permission logic in phase/chat subclasses. + +## Chat + +Interactive chat code belongs under `tools/chat/`. + +- Textual UI classes and the RichLog proxy live in `tools/chat/app.py`. +- Chat startup/wiring lives in `tools/chat/harness.py`. +- Chat debug helpers live in `tools/chat/debug.py`. + +`chat` modules must not import `tools/run-agent.py`. Use `codecome.cli_render`, `codecome.session`, `codecome.config`, and other concrete modules instead. + +`tools/chat/__init__.py` must stay lightweight and should not eagerly import Textual-adjacent modules. + +## Findings and itemdb + +Finding/itemdb consolidation belongs to Epic B. + +When that work starts, shared finding helpers should live under `tools/findings/`, and historical scripts such as `create-finding.py`, `move-finding.py`, `list-findings.py`, `render-report.py`, and `render-index.py` should become thin wrappers. + +## Testing expectations + +Refactors in this directory should include focused tests for the moved component, not only broad smoke checks. + +Useful test categories: + +- CLI/wrapper smoke tests. +- Rendering unit tests and fixture/golden-style checks. +- Event-loop tests with fake SSE streams. +- Chat tests that import `chat.app` and `chat.harness` directly. +- Command interceptor tests. +- Snapshot/cache tests. + +Do not rely on tests that patch a stale wrapper module when the implementation has moved to a concrete package module. From cf6f3a7513af249a4caea9f1e0714a0050779cc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 18:00:55 +0200 Subject: [PATCH 48/65] Patch event loop tests through phase loop module --- tests/test_new_serve_stack.py | 338 ++++++++++------------------------ 1 file changed, 98 insertions(+), 240 deletions(-) diff --git a/tests/test_new_serve_stack.py b/tests/test_new_serve_stack.py index fbcdbdc..bba5a03 100644 --- a/tests/test_new_serve_stack.py +++ b/tests/test_new_serve_stack.py @@ -1,14 +1,11 @@ from __future__ import annotations -import io import json import subprocess import sys -import time import urllib.error import urllib.request from pathlib import Path -from typing import Any import pytest @@ -34,6 +31,10 @@ def load_serve(): return ServerRunner, ServerInfo, ServerRunnerError +def _patch_phase_sse_client(monkeypatch, fake_cls): + monkeypatch.setattr("events.phase_loop.SseClient", fake_cls) + + # --------------------------------------------------------------------------- # StateTracker # --------------------------------------------------------------------------- @@ -57,12 +58,11 @@ def test_delta_accumulates_text(self, tracker): "type": "message.part.delta", "properties": {"partID": "abc", "field": "text", "delta": "world"}, }) - # Not finalized yet → no events assert tracker.ingest({"type": "server.heartbeat"}) == [{"type": "server.heartbeat"}] assert tracker._delta_buffers.get("abc") == "Hello world" - def test_updated_emits_finalized_text(self, tracker): - SseClient, SseClientError, StateTracker, emit_event, EventLoop, RunResult = load_events() + def test_updated_emits_finalized_text(self): + StateTracker = load_events()[2] tracker = StateTracker() tracker.ingest({ "type": "message.part.delta", @@ -108,10 +108,9 @@ def test_step_finish_finalized_immediately(self, tracker): assert finalized[0]["type"] == "step_finish" assert finalized[0]["part"]["reason"] == "stop" - def test_reasoning_part_requires_time_end(self, tracker): - SseClient, SseClientError, StateTracker, emit_event, EventLoop, RunResult = load_events() + def test_reasoning_part_requires_time_end(self): + StateTracker = load_events()[2] tracker = StateTracker() - # Without time.end → not finalized no_final = tracker.ingest({ "type": "message.part.updated", "properties": { @@ -120,7 +119,6 @@ def test_reasoning_part_requires_time_end(self, tracker): }, }) assert len(no_final) == 0 - # With time.end → finalized finalized = tracker.ingest({ "type": "message.part.updated", "properties": { @@ -132,30 +130,21 @@ def test_reasoning_part_requires_time_end(self, tracker): assert finalized[0]["type"] == "reasoning" def test_tool_part_lifecycle(self, tracker): - # Should be ignored (not terminal) pending = tracker.ingest({"type": "message.part.updated", "properties": {"sessionID": "s1", "part": {"id": "t1", "type": "tool", "state": {"status": "pending"}}}}) assert len(pending) == 0 running = tracker.ingest({"type": "message.part.updated", "properties": {"sessionID": "s1", "part": {"id": "t1", "type": "tool", "state": {"status": "running"}}}}) assert len(running) == 0 - # Should be finalized completed = tracker.ingest({"type": "message.part.updated", "properties": {"sessionID": "s1", "part": {"id": "t1", "type": "tool", "state": {"status": "completed"}}}}) assert len(completed) == 1 assert completed[0]["type"] == "tool_use" def test_text_accumulation_survives_intermediate_updates(self, tracker): tracker.ingest({"type": "message.part.delta", "properties": {"partID": "abc", "field": "text", "delta": "Hello"}}) - - # An update without time.end should NOT clear the buffer tracker.ingest({"type": "message.part.updated", "properties": {"sessionID": "s1", "part": {"id": "abc", "type": "text", "text": "Hello"}}}) - - # We should still be able to accumulate tracker.ingest({"type": "message.part.delta", "properties": {"partID": "abc", "field": "text", "delta": " world"}}) - - # The final update with time.end should flush the entire combined string finalized = tracker.ingest({"type": "message.part.updated", "properties": {"sessionID": "s1", "part": {"id": "abc", "type": "text", "time": {"end": 1}}}}) - assert len(finalized) == 1 assert finalized[0]["part"]["text"] == "Hello world" @@ -188,48 +177,34 @@ def sse_cls(self): def test_parse_buffer_single_data_line(self, sse_cls): client = sse_cls("http://localhost:8080") - buf = ["data: {}", ""] - ev = client._parse_buffer(buf) # type: ignore[misc] - assert ev == {} + assert client._parse_buffer(["data: {}", ""]) == {} def test_parse_buffer_json_payload(self, sse_cls): client = sse_cls("http://localhost:8080") payload = {"type": "server.heartbeat"} - buf = [f"data: {json.dumps(payload)}"] - ev = client._parse_buffer(buf) # type: ignore[misc] - assert ev == payload + assert client._parse_buffer([f"data: {json.dumps(payload)}"]) == payload def test_parse_buffer_multiline_data(self, sse_cls): client = sse_cls("http://localhost:8080") - buf = ["data: {\"key\": \"line1", "data: line2\"}"] - ev = client._parse_buffer(buf) # type: ignore[misc] - assert ev is None # joined string is not valid JSON; returns None + assert client._parse_buffer(["data: {\"key\": \"line1", "data: line2\"}"]) is None def test_parse_buffer_multiline_valid_json(self, sse_cls): client = sse_cls("http://localhost:8080") payload = json.dumps({"msg": "hello\nworld"}) lines = payload.split("\n") - buf = [f"data: {ln}" for ln in lines] - ev = client._parse_buffer(buf) # type: ignore[misc] - assert ev == {"msg": "hello\nworld"} + assert client._parse_buffer([f"data: {ln}" for ln in lines]) == {"msg": "hello\nworld"} def test_parse_buffer_ignores_comment_lines(self, sse_cls): client = sse_cls("http://localhost:8080") - buf = [":comment", "data: 42"] - ev = client._parse_buffer(buf) # type: ignore[misc] - assert ev == 42 + assert client._parse_buffer([":comment", "data: 42"]) == 42 def test_parse_buffer_no_data_returns_none(self, sse_cls): client = sse_cls("http://localhost:8080") - buf = ["event: foo"] - ev = client._parse_buffer(buf) # type: ignore[misc] - assert ev is None + assert client._parse_buffer(["event: foo"]) is None def test_parse_buffer_malformed_json_returns_none(self, sse_cls): client = sse_cls("http://localhost:8080") - buf = ["data: not-json"] - ev = client._parse_buffer(buf) # type: ignore[misc] - assert ev is None + assert client._parse_buffer(["data: not-json"]) is None # --------------------------------------------------------------------------- @@ -277,16 +252,17 @@ def test_belongs_to_session_filters_by_session_id(self, loop_cls): loop = loop_cls("http://localhost:8080", "sess-abc", None, "1", "recon") assert loop._belongs_to_session({"properties": {"sessionID": "sess-abc"}}) assert not loop._belongs_to_session({"properties": {"sessionID": "other"}}) - # server events without sessionID pass through assert loop._belongs_to_session({"type": "server.heartbeat"}) def test_update_result_counts_step_finishes(self, loop_cls): loop = loop_cls("http://localhost:8080", "s", None, "1", "recon") - event = { - "type": "step_finish", - "part": {"reason": "tool-calls", "tokens": {"input": 5, "output": 10}}, - } - result = loop._update_result(event, False, 0, None, {}) # type: ignore[misc] + result = loop._update_result( + {"type": "step_finish", "part": {"reason": "tool-calls", "tokens": {"input": 5, "output": 10}}}, + False, + 0, + None, + {}, + ) any_seen, count, reason, tokens = result assert any_seen is True assert count == 1 @@ -313,14 +289,12 @@ def classes(self): return load_serve() def test_parse_port_from_url_standard(self, classes): - """Port parsing removed; verify that known port is passed directly.""" _, ServerInfo, _ = classes log_path = ROOT / "tmp" / "test.log" info = ServerInfo(proc=None, pid=1234, base_url="http://127.0.0.1:49152", port=49152, log_path=log_path, password="dummy") # type: ignore[arg-type] assert info.port == 49152 def test_parse_port_from_url_no_port_raises(self, classes): - """Port is always known via ephemeral assignment; no parsing needed.""" _, ServerInfo, _ = classes log_path = ROOT / "tmp" / "test.log" info = ServerInfo(proc=None, pid=1234, base_url="http://127.0.0.1:8080", port=8080, log_path=log_path, password="dummy") # type: ignore[arg-type] @@ -329,7 +303,6 @@ def test_parse_port_from_url_no_port_raises(self, classes): def test_server_info_fields(self, classes): ServerRunner, ServerInfo, ServerRunnerError = classes log_path = ROOT / "tmp" / "test.log" - # Construct a minimal ServerInfo with a None proc (not used in tests) info = ServerInfo(proc=None, pid=1234, base_url="http://127.0.0.1:8080", port=8080, log_path=log_path, password="dummy") # type: ignore[arg-type] assert info.pid == 1234 assert info.port == 8080 @@ -343,9 +316,10 @@ def test_server_runner_error_is_exception(self, classes): def test_try_fetch_json_timeout_returns_none(self, classes, monkeypatch): from opencode.serve import _try_fetch_json - # Patch urlopen to always raise + def boom(*a, **kw): raise urllib.error.URLError("timeout") + monkeypatch.setattr(urllib.request, "urlopen", boom) assert _try_fetch_json("http://localhost:1/health", 0.1) is None @@ -377,15 +351,9 @@ def test_start_treats_zero_port_as_ephemeral(self, classes, monkeypatch): class FakeProc: pid = 1234 - - def poll(self): - return None - - def terminate(self): - return None - - def wait(self, timeout=None): - return 0 + def poll(self): return None + def terminate(self): return None + def wait(self, timeout=None): return 0 monkeypatch.setattr("opencode.serve._find_free_port", lambda hostname: 54321) monkeypatch.setattr("opencode.serve._try_fetch_json", lambda url, timeout, auth_token=None: {"healthy": True, "version": "1.14.50"}) @@ -411,15 +379,10 @@ def event_loop_objects(self): SseClient, SseClientError, StateTracker, emit_event, EventLoop, RunResult = load_events() return EventLoop, RunResult, SseClient - def test_full_run_emits_expected_events(self, event_loop_objects): + def test_full_run_emits_expected_events(self, event_loop_objects, monkeypatch): EventLoop, RunResult, SseClient = event_loop_objects - emitted: list[dict] = [] - def fake_render(console, phase, label, event): - emitted.append(event) - - # Create a fake SSE client that yields a canned sequence. class FakeSseClient: def __init__(self, *a, **kw): pass @@ -436,15 +399,9 @@ def events(self): def stop(self): pass - # Monkey-patch SseClient inside the events module for this test. - import events as _events_mod - orig = _events_mod.SseClient - _events_mod.SseClient = FakeSseClient # type: ignore[misc] - try: - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") - result = loop.run(fake_render) - finally: - _events_mod.SseClient = orig + _patch_phase_sse_client(monkeypatch, FakeSseClient) + loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + result = loop.run(lambda c, p, l, e: emitted.append(e)) assert result.any_step_finish_seen is True assert result.step_finish_count == 1 @@ -452,17 +409,11 @@ def stop(self): assert result.last_finish_tokens == {"total": 42} assert result.last_session_id == "sess-1" - # Verify rendered event types in order (includes pass-through events) - types = [e["type"] for e in emitted] - assert types == ["server.connected", "step_start", "text", "tool_use", "step_finish", "session.idle"] - - # Verify text accumulation - text_event = emitted[2] - assert text_event["part"]["text"] == "Hello" + assert [e["type"] for e in emitted] == ["server.connected", "step_start", "text", "tool_use", "step_finish", "session.idle"] + assert emitted[2]["part"]["text"] == "Hello" def test_permission_auto_rejected(self, event_loop_objects, monkeypatch): EventLoop, RunResult, SseClient = event_loop_objects - captured_perms: list[tuple] = [] class FakeSseClient: @@ -476,141 +427,95 @@ def events(self): def stop(self): pass - import events as _events_mod - orig = _events_mod.SseClient - _events_mod.SseClient = FakeSseClient # type: ignore[misc] - - # Capture permission POSTs def fake_urlopen(req, **kw): if req.full_url.endswith("/permission/perm-1/reply"): captured_perms.append((req.full_url, req.data)) return type("R", (), {"read": lambda: b"{}", "__enter__": lambda s: s, "__exit__": lambda *a: None})() + _patch_phase_sse_client(monkeypatch, FakeSseClient) monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen) - try: - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") - result = loop.run(lambda c, p, l, e: None) - finally: - _events_mod.SseClient = orig + loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + result = loop.run(lambda c, p, l, e: None) assert result.last_permission_error == "tool permission rejected: bash" assert len(captured_perms) == 1 assert "permission/perm-1/reply" in captured_perms[0][0] assert json.loads(captured_perms[0][1]) == {"reply": "reject", "message": "Auto-rejected by CodeCome configuration"} - def test_session_idle_stops_consuming(self, event_loop_objects): + def test_session_idle_stops_consuming(self, event_loop_objects, monkeypatch): EventLoop, RunResult, SseClient = event_loop_objects - emitted: list[dict] = [] class FakeSseClient: - def __init__(self, *a, **kw): - pass + def __init__(self, *a, **kw): pass def events(self): return iter([ {"type": "session.idle", "properties": {"sessionID": "sess-1"}}, {"type": "message.part.updated", "properties": {"sessionID": "sess-1", "part": {"id": "late", "type": "text", "time": {"end": 1}}}}, ]) - def stop(self): - pass + def stop(self): pass - import events as _events_mod - orig = _events_mod.SseClient - _events_mod.SseClient = FakeSseClient # type: ignore[misc] - try: - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") - loop.run(lambda c, p, l, e: emitted.append(e)) - finally: - _events_mod.SseClient = orig + _patch_phase_sse_client(monkeypatch, FakeSseClient) + loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + loop.run(lambda c, p, l, e: emitted.append(e)) - # Events after session.idle should be ignored, but session.idle itself is passed through assert len(emitted) == 1 assert emitted[0]["type"] == "session.idle" - def test_session_status_idle_stops_consuming(self, event_loop_objects): + def test_session_status_idle_stops_consuming(self, event_loop_objects, monkeypatch): EventLoop, RunResult, SseClient = event_loop_objects - emitted: list[dict] = [] class FakeSseClient: - def __init__(self, *a, **kw): - pass + def __init__(self, *a, **kw): pass def events(self): return iter([ {"type": "session.status", "properties": {"sessionID": "sess-1", "status": {"type": "idle"}}}, {"type": "message.part.updated", "properties": {"sessionID": "sess-1", "part": {"id": "late", "type": "text", "time": {"end": 1}}}}, ]) - def stop(self): - pass + def stop(self): pass - import events as _events_mod - orig = _events_mod.SseClient - _events_mod.SseClient = FakeSseClient # type: ignore[misc] - try: - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") - loop.run(lambda c, p, l, e: emitted.append(e)) - finally: - _events_mod.SseClient = orig + _patch_phase_sse_client(monkeypatch, FakeSseClient) + loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + loop.run(lambda c, p, l, e: emitted.append(e)) - # Events after session.status idle should be ignored. assert len(emitted) == 1 assert emitted[0]["type"] == "session.status" - def test_both_idle_events_only_processed_once(self, event_loop_objects): - """When both canonical and deprecated idle arrive, run() exits on the first.""" + def test_both_idle_events_only_processed_once(self, event_loop_objects, monkeypatch): EventLoop, RunResult, SseClient = event_loop_objects - emitted: list[dict] = [] class FakeSseClient: - def __init__(self, *a, **kw): - pass + def __init__(self, *a, **kw): pass def events(self): return iter([ {"type": "session.status", "properties": {"sessionID": "sess-1", "status": {"type": "idle"}}}, {"type": "session.idle", "properties": {"sessionID": "sess-1"}}, {"type": "message.part.updated", "properties": {"sessionID": "sess-1", "part": {"id": "late", "type": "text", "time": {"end": 1}}}}, ]) - def stop(self): - pass + def stop(self): pass - import events as _events_mod - orig = _events_mod.SseClient - _events_mod.SseClient = FakeSseClient # type: ignore[misc] - try: - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") - result = loop.run(lambda c, p, l, e: emitted.append(e)) - finally: - _events_mod.SseClient = orig + _patch_phase_sse_client(monkeypatch, FakeSseClient) + loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + result = loop.run(lambda c, p, l, e: emitted.append(e)) - # Only the first idle event should be processed; the loop returns immediately. assert len(emitted) == 1 assert emitted[0]["type"] == "session.status" assert result.last_session_id == "sess-1" - def test_empty_stream_no_step_finish(self, event_loop_objects): + def test_empty_stream_no_step_finish(self, event_loop_objects, monkeypatch): EventLoop, RunResult, SseClient = event_loop_objects class FakeSseClient: - def __init__(self, *a, **kw): - pass - def events(self): - return iter([ - {"type": "server.connected"}, - {"type": "server.heartbeat"}, - ]) - def stop(self): - pass + def __init__(self, *a, **kw): pass + def events(self): return iter([{"type": "server.connected"}, {"type": "server.heartbeat"}]) + def stop(self): pass - import events as _events_mod - orig = _events_mod.SseClient - _events_mod.SseClient = FakeSseClient # type: ignore[misc] - try: - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") - result = loop.run(lambda c, p, l, e: None) - finally: - _events_mod.SseClient = orig + _patch_phase_sse_client(monkeypatch, FakeSseClient) + loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + result = loop.run(lambda c, p, l, e: None) assert result.any_step_finish_seen is False assert result.step_finish_count == 0 @@ -618,134 +523,87 @@ def stop(self): def test_session_snapshot_sync_emits_missing_assistant_parts(self, event_loop_objects, monkeypatch): EventLoop, RunResult, SseClient = event_loop_objects - emitted: list[dict] = [] class FakeSseClient: - def __init__(self, *a, **kw): - pass - + def __init__(self, *a, **kw): pass def events(self): return iter([ {"type": "server.connected", "properties": {}}, {"type": "session.status", "properties": {"sessionID": "sess-1", "status": {"type": "busy"}}}, {"type": "session.idle", "properties": {"sessionID": "sess-1"}}, ]) - - def stop(self): - pass + def stop(self): pass class FakeResp: - def __init__(self, payload): - self.payload = payload - - def read(self): - return json.dumps(self.payload).encode("utf-8") - - def __enter__(self): - return self - - def __exit__(self, *a): - pass + def __init__(self, payload): self.payload = payload + def read(self): return json.dumps(self.payload).encode("utf-8") + def __enter__(self): return self + def __exit__(self, *a): pass - messages_payload = [ - { - "info": { - "id": "msg-1", - "role": "assistant", - "agent": "test", - "modelID": "demo-model", - "sessionID": "sess-1", - }, - "parts": [ - {"id": "p1", "type": "step-start", "sessionID": "sess-1"}, - {"id": "p2", "type": "text", "sessionID": "sess-1", "text": "HELLO", "time": {"end": 1}}, - {"id": "p3", "type": "step-finish", "sessionID": "sess-1", "reason": "stop", "tokens": {"total": 1}}, - ], - } - ] + messages_payload = [{ + "info": {"id": "msg-1", "role": "assistant", "agent": "test", "modelID": "demo-model", "sessionID": "sess-1"}, + "parts": [ + {"id": "p1", "type": "step-start", "sessionID": "sess-1"}, + {"id": "p2", "type": "text", "sessionID": "sess-1", "text": "HELLO", "time": {"end": 1}}, + {"id": "p3", "type": "step-finish", "sessionID": "sess-1", "reason": "stop", "tokens": {"total": 1}}, + ], + }] def fake_urlopen(req, **kw): if req.full_url.endswith("/session/sess-1/message"): return FakeResp(messages_payload) raise AssertionError(f"unexpected urlopen call: {req.full_url}") - import events as _events_mod - orig = _events_mod.SseClient - _events_mod.SseClient = FakeSseClient # type: ignore[misc] + _patch_phase_sse_client(monkeypatch, FakeSseClient) monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen) - try: - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") - result = loop.run(lambda c, p, l, e: emitted.append(e)) - finally: - _events_mod.SseClient = orig + loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + result = loop.run(lambda c, p, l, e: emitted.append(e)) assert result.any_step_finish_seen is True assert result.last_finish_reason == "stop" - types = [e["type"] for e in emitted] - assert types == ["server.connected", "session.status", "message.updated", "step_start", "text", "step_finish", "session.idle"] + assert [e["type"] for e in emitted] == ["server.connected", "session.status", "message.updated", "step_start", "text", "step_finish", "session.idle"] def test_session_snapshot_sync_emits_tool_use_from_completed_parts(self, event_loop_objects, monkeypatch): EventLoop, RunResult, SseClient = event_loop_objects - emitted: list[dict] = [] class FakeSseClient: - def __init__(self, *a, **kw): - pass - + def __init__(self, *a, **kw): pass def events(self): return iter([ {"type": "session.status", "properties": {"sessionID": "sess-1", "status": {"type": "busy"}}}, {"type": "session.idle", "properties": {"sessionID": "sess-1"}}, ]) - - def stop(self): - pass + def stop(self): pass class FakeResp: - def __init__(self, payload): - self.payload = payload - - def read(self): - return json.dumps(self.payload).encode("utf-8") - - def __enter__(self): - return self - - def __exit__(self, *a): - pass + def __init__(self, payload): self.payload = payload + def read(self): return json.dumps(self.payload).encode("utf-8") + def __enter__(self): return self + def __exit__(self, *a): pass - messages_payload = [ - { - "info": {"id": "msg-1", "role": "assistant", "agent": "test", "modelID": "demo-model", "sessionID": "sess-1"}, - "parts": [ - { - "id": "tool-1", - "type": "tool", - "tool": "task", - "sessionID": "sess-1", - "state": {"status": "completed", "output": "OK"}, - } - ], - } - ] + messages_payload = [{ + "info": {"id": "msg-1", "role": "assistant", "agent": "test", "modelID": "demo-model", "sessionID": "sess-1"}, + "parts": [{ + "id": "tool-1", + "type": "tool", + "tool": "task", + "sessionID": "sess-1", + "state": {"status": "completed", "output": "OK"}, + }], + }] def fake_urlopen(req, **kw): if req.full_url.endswith("/session/sess-1/message"): return FakeResp(messages_payload) raise AssertionError(f"unexpected urlopen call: {req.full_url}") - import events as _events_mod - orig = _events_mod.SseClient - _events_mod.SseClient = FakeSseClient # type: ignore[misc] + _patch_phase_sse_client(monkeypatch, FakeSseClient) monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen) - try: - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") - loop.run(lambda c, p, l, e: emitted.append(e)) - finally: - _events_mod.SseClient = orig + loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + loop.run(lambda c, p, l, e: emitted.append(e)) assert any(e["type"] == "tool_use" for e in emitted) From 3acc1aa1b8e9daefda89db17b97a8cb779d39a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 18:04:13 +0200 Subject: [PATCH 49/65] Remove EventLoop compatibility alias --- tools/events/__init__.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tools/events/__init__.py b/tools/events/__init__.py index 336dda1..90c2ba2 100644 --- a/tools/events/__init__.py +++ b/tools/events/__init__.py @@ -1,16 +1,10 @@ # Copyright (C) 2025-2026 Pablo Ruiz García # SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later -"""Events package public exports. - -Phase-specific imports should use events.phase_loop directly. EventLoop is -kept only as the public phase-loop alias used by the current runner. -""" +"""Events package public exports.""" from __future__ import annotations from events.phase_loop import PhaseEventLoop, RunResult -EventLoop = PhaseEventLoop - -__all__ = ["EventLoop", "PhaseEventLoop", "RunResult"] +__all__ = ["PhaseEventLoop", "RunResult"] From e403071a3c9f7cd07b923eb6d043210faeb90083 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 18:04:54 +0200 Subject: [PATCH 50/65] Use PhaseEventLoop explicitly in runner --- tools/codecome/runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/codecome/runner.py b/tools/codecome/runner.py index 47dcf53..b5aa2ab 100644 --- a/tools/codecome/runner.py +++ b/tools/codecome/runner.py @@ -16,7 +16,7 @@ sys.path.insert(0, str(Path(__file__).resolve().parents[1])) import _colors as C -from events import EventLoop, RunResult +from events.phase_loop import PhaseEventLoop, RunResult from codecome.session import create_session, send_prompt_to_session from codecome.transcript import open_phase_transcript, close_transcript @@ -36,7 +36,7 @@ def _consume_events( workspace_dir: str | None, render_event_fn: Callable[..., None], # CLI/rendering event dispatcher ) -> RunResult: - event_loop = EventLoop( + event_loop = PhaseEventLoop( base_url=base_url, session_id=session_id, console=console, From 6364d4146f940b3178dac025d4b812d5d81665e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 18:07:53 +0200 Subject: [PATCH 51/65] Load phase event loop from concrete module in tests --- tests/test_new_serve_stack.py | 52 +++++++++++++++++------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/tests/test_new_serve_stack.py b/tests/test_new_serve_stack.py index bba5a03..58d8cb5 100644 --- a/tests/test_new_serve_stack.py +++ b/tests/test_new_serve_stack.py @@ -19,8 +19,8 @@ def load_events(): from events.sse_client import SseClient, SseClientError from events.state_tracker import StateTracker from events.emitters import emit_event - from events import EventLoop, RunResult - return SseClient, SseClientError, StateTracker, emit_event, EventLoop, RunResult + from events.phase_loop import PhaseEventLoop, RunResult + return SseClient, SseClientError, StateTracker, emit_event, PhaseEventLoop, RunResult def load_serve(): @@ -234,11 +234,11 @@ def fake_render(console, phase, label, event): # --------------------------------------------------------------------------- -# EventLoop termination signals +# PhaseEventLoop termination signals # --------------------------------------------------------------------------- -class TestEventLoop: - """Unit tests for events.EventLoop core logic.""" +class TestPhaseEventLoop: + """Unit tests for events.phase_loop.PhaseEventLoop core logic.""" @pytest.fixture def loop_cls(self): @@ -368,19 +368,19 @@ def wait(self, timeout=None): return 0 # --------------------------------------------------------------------------- -# End-to-end EventLoop with fake SSE producer +# End-to-end PhaseEventLoop with fake SSE producer # --------------------------------------------------------------------------- -class TestEventLoopEndToEnd: - """End-to-end tests for EventLoop consuming a controlled SSE stream.""" +class TestPhaseEventLoopEndToEnd: + """End-to-end tests for PhaseEventLoop consuming a controlled SSE stream.""" @pytest.fixture def event_loop_objects(self): - SseClient, SseClientError, StateTracker, emit_event, EventLoop, RunResult = load_events() - return EventLoop, RunResult, SseClient + SseClient, SseClientError, StateTracker, emit_event, PhaseEventLoop, RunResult = load_events() + return PhaseEventLoop, RunResult, SseClient def test_full_run_emits_expected_events(self, event_loop_objects, monkeypatch): - EventLoop, RunResult, SseClient = event_loop_objects + PhaseEventLoop, RunResult, SseClient = event_loop_objects emitted: list[dict] = [] class FakeSseClient: @@ -400,7 +400,7 @@ def stop(self): pass _patch_phase_sse_client(monkeypatch, FakeSseClient) - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + loop = PhaseEventLoop("http://localhost:8080", "sess-1", None, "1", "recon") result = loop.run(lambda c, p, l, e: emitted.append(e)) assert result.any_step_finish_seen is True @@ -413,7 +413,7 @@ def stop(self): assert emitted[2]["part"]["text"] == "Hello" def test_permission_auto_rejected(self, event_loop_objects, monkeypatch): - EventLoop, RunResult, SseClient = event_loop_objects + PhaseEventLoop, RunResult, SseClient = event_loop_objects captured_perms: list[tuple] = [] class FakeSseClient: @@ -435,7 +435,7 @@ def fake_urlopen(req, **kw): _patch_phase_sse_client(monkeypatch, FakeSseClient) monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen) - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + loop = PhaseEventLoop("http://localhost:8080", "sess-1", None, "1", "recon") result = loop.run(lambda c, p, l, e: None) assert result.last_permission_error == "tool permission rejected: bash" @@ -444,7 +444,7 @@ def fake_urlopen(req, **kw): assert json.loads(captured_perms[0][1]) == {"reply": "reject", "message": "Auto-rejected by CodeCome configuration"} def test_session_idle_stops_consuming(self, event_loop_objects, monkeypatch): - EventLoop, RunResult, SseClient = event_loop_objects + PhaseEventLoop, RunResult, SseClient = event_loop_objects emitted: list[dict] = [] class FakeSseClient: @@ -457,14 +457,14 @@ def events(self): def stop(self): pass _patch_phase_sse_client(monkeypatch, FakeSseClient) - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + loop = PhaseEventLoop("http://localhost:8080", "sess-1", None, "1", "recon") loop.run(lambda c, p, l, e: emitted.append(e)) assert len(emitted) == 1 assert emitted[0]["type"] == "session.idle" def test_session_status_idle_stops_consuming(self, event_loop_objects, monkeypatch): - EventLoop, RunResult, SseClient = event_loop_objects + PhaseEventLoop, RunResult, SseClient = event_loop_objects emitted: list[dict] = [] class FakeSseClient: @@ -477,14 +477,14 @@ def events(self): def stop(self): pass _patch_phase_sse_client(monkeypatch, FakeSseClient) - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + loop = PhaseEventLoop("http://localhost:8080", "sess-1", None, "1", "recon") loop.run(lambda c, p, l, e: emitted.append(e)) assert len(emitted) == 1 assert emitted[0]["type"] == "session.status" def test_both_idle_events_only_processed_once(self, event_loop_objects, monkeypatch): - EventLoop, RunResult, SseClient = event_loop_objects + PhaseEventLoop, RunResult, SseClient = event_loop_objects emitted: list[dict] = [] class FakeSseClient: @@ -498,7 +498,7 @@ def events(self): def stop(self): pass _patch_phase_sse_client(monkeypatch, FakeSseClient) - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + loop = PhaseEventLoop("http://localhost:8080", "sess-1", None, "1", "recon") result = loop.run(lambda c, p, l, e: emitted.append(e)) assert len(emitted) == 1 @@ -506,7 +506,7 @@ def stop(self): pass assert result.last_session_id == "sess-1" def test_empty_stream_no_step_finish(self, event_loop_objects, monkeypatch): - EventLoop, RunResult, SseClient = event_loop_objects + PhaseEventLoop, RunResult, SseClient = event_loop_objects class FakeSseClient: def __init__(self, *a, **kw): pass @@ -514,7 +514,7 @@ def events(self): return iter([{"type": "server.connected"}, {"type": "server.he def stop(self): pass _patch_phase_sse_client(monkeypatch, FakeSseClient) - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + loop = PhaseEventLoop("http://localhost:8080", "sess-1", None, "1", "recon") result = loop.run(lambda c, p, l, e: None) assert result.any_step_finish_seen is False @@ -522,7 +522,7 @@ def stop(self): pass assert result.last_finish_reason is None def test_session_snapshot_sync_emits_missing_assistant_parts(self, event_loop_objects, monkeypatch): - EventLoop, RunResult, SseClient = event_loop_objects + PhaseEventLoop, RunResult, SseClient = event_loop_objects emitted: list[dict] = [] class FakeSseClient: @@ -558,7 +558,7 @@ def fake_urlopen(req, **kw): _patch_phase_sse_client(monkeypatch, FakeSseClient) monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen) - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + loop = PhaseEventLoop("http://localhost:8080", "sess-1", None, "1", "recon") result = loop.run(lambda c, p, l, e: emitted.append(e)) assert result.any_step_finish_seen is True @@ -566,7 +566,7 @@ def fake_urlopen(req, **kw): assert [e["type"] for e in emitted] == ["server.connected", "session.status", "message.updated", "step_start", "text", "step_finish", "session.idle"] def test_session_snapshot_sync_emits_tool_use_from_completed_parts(self, event_loop_objects, monkeypatch): - EventLoop, RunResult, SseClient = event_loop_objects + PhaseEventLoop, RunResult, SseClient = event_loop_objects emitted: list[dict] = [] class FakeSseClient: @@ -603,7 +603,7 @@ def fake_urlopen(req, **kw): _patch_phase_sse_client(monkeypatch, FakeSseClient) monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen) - loop = EventLoop("http://localhost:8080", "sess-1", None, "1", "recon") + loop = PhaseEventLoop("http://localhost:8080", "sess-1", None, "1", "recon") loop.run(lambda c, p, l, e: emitted.append(e)) assert any(e["type"] == "tool_use" for e in emitted) From 26e9d7a306cfec1c826e09a9a00334636ae5da9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 18:15:07 +0200 Subject: [PATCH 52/65] Use PhaseEventLoop in mock LLM parity tool --- tools/mock-llm-parity.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/mock-llm-parity.py b/tools/mock-llm-parity.py index 3e29cd6..7e3a642 100644 --- a/tools/mock-llm-parity.py +++ b/tools/mock-llm-parity.py @@ -26,7 +26,7 @@ ROOT = Path(__file__).resolve().parents[1] sys.path.insert(0, str(ROOT / "tools")) -from events import EventLoop # noqa: E402 +from events.phase_loop import PhaseEventLoop # noqa: E402 from opencode.serve import ServerRunner # noqa: E402 DEFAULT_PROMPT = "Say hello and then stop." @@ -42,7 +42,7 @@ _SERVE_ONLY_TYPES = {"server.connected", "server.heartbeat", "session.idle", "message.updated", "message.part.updated", "file.edited", "file.watcher.updated", "todo.updated"} -def _step_sort_key(ev: dict[str, Any]) -> tuple[int, str]: +def _step_sort_key(ev: dict[str, Any]) -> tuple[int | float, str]: """Return a sort key that orders events within a single step deterministically.""" t = ev.get("type", "") if t == "step_start": @@ -272,7 +272,7 @@ def collect_render(console: Any, phase: str, label: str, event: dict[str, Any]) if not session_id: raise RuntimeError("session.create returned empty id") - loop = EventLoop(base_url, session_id, None, "1", "recon", auth_token=info.password, workspace_dir=str(ROOT)) + loop = PhaseEventLoop(base_url, session_id, None, "1", "recon", auth_token=info.password, workspace_dir=str(ROOT)) # Start event consumer BEFORE sending prompt to avoid losing early SSE events. import threading From 96532a0663cb9b6e08e7bc811353c1c0450f378c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 18:17:12 +0200 Subject: [PATCH 53/65] Use PhaseEventLoop directly in runner tests --- tests/test_codecome_runner.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_codecome_runner.py b/tests/test_codecome_runner.py index 7be895b..e7d01aa 100644 --- a/tests/test_codecome_runner.py +++ b/tests/test_codecome_runner.py @@ -9,7 +9,7 @@ from unittest.mock import MagicMock from codecome import runner -from events import RunResult +from events.phase_loop import RunResult @pytest.fixture def mock_args(): @@ -26,7 +26,7 @@ def mock_console(): return MagicMock() def test_consume_events_renders_and_logs(mock_args, mock_console, monkeypatch): - class FakeEventLoop: + class FakePhaseEventLoop: def __init__(self, **kwargs): pass def run(self, render_and_log_fn): @@ -34,7 +34,7 @@ def run(self, render_and_log_fn): render_and_log_fn(mock_console, "1", "Recon", event) return RunResult() - monkeypatch.setattr(runner, "EventLoop", FakeEventLoop) + monkeypatch.setattr(runner, "PhaseEventLoop", FakePhaseEventLoop) rendered_events = [] def fake_render(console, phase, label, event): @@ -44,7 +44,7 @@ def fake_render(console, phase, label, event): res = runner._consume_events( "http://base", "session_123", mock_console, "1", "Recon", mock_args, - fake_transcript, True, "token", "dir", fake_render + fake_transcript, True, "auth", "dir", fake_render ) assert isinstance(res, RunResult) @@ -72,7 +72,7 @@ def fake_consume(*a, **kw): code, session_id, res, path = runner._run_single_attempt( mock_args, mock_console, "do work", "model", "var", True, - "http://base", "token", "dir", lambda *a: None + "http://base", "auth", "dir", lambda *a: None ) assert code == 0 @@ -98,7 +98,7 @@ def fake_fatal(console, title, msg): code, session_id, res, path = runner._run_single_attempt( mock_args, mock_console, "do work", "model", "var", True, - "http://base", "token", "dir", lambda *a: None, + "http://base", "auth", "dir", lambda *a: None, emit_fatal_error_fn=fake_fatal ) @@ -117,7 +117,7 @@ def test_run_single_attempt_existing_session(mock_args, mock_console, monkeypatc code, session_id, res, path = runner._run_single_attempt( mock_args, mock_console, "do work", "model", "var", True, - "http://base", "token", "dir", lambda *a: None, + "http://base", "auth", "dir", lambda *a: None, existing_session_id="existing_123" ) From fb018086ad6de1e3fcb394110d40f68d5db0b19f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 17:16:59 +0200 Subject: [PATCH 54/65] docs(phase-a7): add tools/AGENTS.md architecture guide Document the tools/ directory layout and 10 architecture rules: - Historical scripts are thin wrappers - codecome/config.py is configuration only (no execution) - Event loops under tools/events/ - Renderers under tools/rendering/ - Three sink destinations, one render path - SnapshotCache for diff state - CommandExecutionInterceptor for bash rendering - Finding helpers under tools/findings/ - Dependency direction (no circular imports) - Testing standards 355 passed, 0 failed, 0 errors --- tools/AGENTS.md | 239 +++++++++++++++++++++++++++++------------------- 1 file changed, 145 insertions(+), 94 deletions(-) diff --git a/tools/AGENTS.md b/tools/AGENTS.md index 4cb9d63..6cfa8d5 100644 --- a/tools/AGENTS.md +++ b/tools/AGENTS.md @@ -1,94 +1,145 @@ -# Tools Architecture Guide - -This directory contains CodeCome's local tooling: phase runners, rendering, event loops, chat UI, sandbox helpers, and finding/itemdb scripts. - -These rules are intended to keep the tooling modular and prevent new monoliths from forming. - -## Entry points - -Historical executable scripts should stay thin. - -- `tools/run-agent.py` is only a compatibility entry point. It should delegate to `codecome.cli.main()` and contain no phase, rendering, event-loop, or chat logic. -- New runner logic belongs under `tools/codecome/`. -- Do not add new implementation logic to wrapper scripts unless the script is intentionally standalone and out of scope for the core runner. - -## CodeCome core package - -Use concrete modules rather than broad package re-exports. - -- CLI parsing and top-level phase flow: `tools/codecome/cli.py`. -- Single-attempt phase execution: `tools/codecome/runner.py`. -- Rendering dispatcher and console construction: `tools/codecome/cli_render.py`. -- Configuration, prompt, model, variant, thinking, and color resolution: `tools/codecome/config.py`. -- OpenCode HTTP session/prompt helpers: `tools/codecome/session.py`. -- Phase completion and resume/repair prompts: `tools/codecome/graceful.py`. -- Transcript helpers: `tools/codecome/transcript.py`. -- Version checks: `tools/codecome/version.py`. - -`tools/codecome/__init__.py` must stay lightweight. Internal code should import from the concrete module that owns the functionality. - -## Rendering - -Rendering code belongs under `tools/rendering/`. - -- Runtime rendering state belongs in `RenderContext`. -- Rendering settings belong in `RenderSettings`. -- File snapshot/diff state belongs in `SnapshotCache`. -- Output destinations are represented by sinks (`PlainSink`, `RichConsoleSink`, `TextualRichLogSink`). -- Generic event renderers live in `tools/rendering/events.py`. -- Tool renderers live under `tools/rendering/tools/`. -- Command execution interceptors live under `tools/rendering/tools/interceptors/`. - -Renderers should receive the normalized event/tool-state dictionaries emitted by the event layer. Do not introduce a second event object model unless there is a clear need. - -Rich and Textual output should share renderer logic where possible. The sink decides where renderables are written; renderers may emit arbitrary Rich renderables when the sink supports them. - -## Command rendering - -CodeCome-specific command rendering is intentional product behavior. - -Special handling for commands such as sandbox bootstrap, `rtk read`, `rtk grep`, `rg`, `ls`, `find`, or `tree` should be implemented as `CommandExecutionInterceptor` classes under `tools/rendering/tools/interceptors/` rather than hidden inside a generic bash renderer. - -## Event loops - -Event consumption code belongs under `tools/events/`. - -- Shared SSE/session/dedup/permission/sync logic belongs in `BaseEventLoop`. -- Phase lifecycle logic belongs in `PhaseEventLoop` (`tools/events/phase_loop.py`). -- Multi-turn chat lifecycle logic belongs in `ChatEventLoop` (`tools/events/chat_loop.py`). -- `events.__init__` should only expose the public phase-loop alias and basic package exports. - -Avoid adding phase-specific behavior to `BaseEventLoop` and avoid duplicating session sync or permission logic in phase/chat subclasses. - -## Chat - -Interactive chat code belongs under `tools/chat/`. - -- Textual UI classes and the RichLog proxy live in `tools/chat/app.py`. -- Chat startup/wiring lives in `tools/chat/harness.py`. -- Chat debug helpers live in `tools/chat/debug.py`. - -`chat` modules must not import `tools/run-agent.py`. Use `codecome.cli_render`, `codecome.session`, `codecome.config`, and other concrete modules instead. - -`tools/chat/__init__.py` must stay lightweight and should not eagerly import Textual-adjacent modules. - -## Findings and itemdb - -Finding/itemdb consolidation belongs to Epic B. - -When that work starts, shared finding helpers should live under `tools/findings/`, and historical scripts such as `create-finding.py`, `move-finding.py`, `list-findings.py`, `render-report.py`, and `render-index.py` should become thin wrappers. - -## Testing expectations - -Refactors in this directory should include focused tests for the moved component, not only broad smoke checks. - -Useful test categories: - -- CLI/wrapper smoke tests. -- Rendering unit tests and fixture/golden-style checks. -- Event-loop tests with fake SSE streams. -- Chat tests that import `chat.app` and `chat.harness` directly. -- Command interceptor tests. -- Snapshot/cache tests. - -Do not rely on tests that patch a stale wrapper module when the implementation has moved to a concrete package module. +# Architecture Guidelines for `tools/` + +## Directory layout — what goes where + +``` +tools/ +├── run-agent.py # Thin wrapper (12 lines) → codecome.cli.main() +├── codecome.py # Workspace validation CLI (check/status/next-id) +│ +├── codecome/ # Core runner and configuration +│ ├── cli.py # main(), build_parser() — runtime entry point +│ ├── cli_render.py # HAVE_RICH, build_console, render_event, _get_rendering_ctx +│ ├── config.py # env, codecome.yml, prompt, model, thinking resolution +│ ├── session.py # OpenCode HTTP: create session, send prompt +│ ├── runner.py # _consume_events, _run_single_attempt +│ ├── graceful.py # phase completion checks, resume prompt builders +│ ├── transcript.py # transcript path/open/close helpers +│ └── version.py # OpenCode version checks +│ +├── rendering/ # Tool and event rendering +│ ├── base.py # BaseRenderer (sink, rich, plain properties) +│ ├── context.py # RenderContext (root, sink, settings, cache) +│ ├── settings.py # RenderSettings (20+ tunables from env vars) +│ ├── cache.py # SnapshotCache (file content snapshots for diffs) +│ ├── sink.py # RenderSink protocol + Plain/Rich/Textual sinks +│ ├── registry.py # RendererRegistry (dispatch by event type / tool name) +│ ├── events.py # Event renderer classes (StepStart, Text, Error, …) +│ ├── utils.py # Shared helpers (path, lexer, diff, read framing) +│ ├── tools/ # Tool renderer classes +│ │ ├── base.py # ToolRenderer, FallbackToolRenderer +│ │ ├── read.py / write.py / edit.py / glob.py / grep.py +│ │ ├── command.py # CommandRenderer (bash) with interceptor chain +│ │ ├── apply_patch.py / todo.py / task.py / skill.py / permissions.py +│ │ └── interceptors/ # CommandExecutionInterceptor implementations +│ │ ├── sandbox_bootstrap.py +│ │ ├── rtk_read.py / rtk_grep.py / shell_listing.py +│ │ └── base.py # Interceptor protocol +│ +├── events/ # SSE event consumption +│ ├── base.py # BaseEventLoop (shared: filters, permissions, sync, dedup) +│ ├── phase_loop.py # PhaseEventLoop (single-session → RunResult) +│ ├── chat_loop.py # ChatEventLoop (multi-turn chat) +│ ├── sse_client.py # SseClient (raw SSE stream + reconnect) +│ ├── state_tracker.py # StateTracker (delta → finalized part) +│ └── emitters.py # emit_event() bridge +│ +├── chat/ # Interactive chat TUI (Textual) +│ ├── app.py # _ChatApp, TextualConsoleProxy, render/log helpers +│ ├── harness.py # _run_chat_mode() entry point +│ └── debug.py # Chat-specific debug logging +│ +├── opencode/ # opencode serve lifecycle +│ └── serve.py # ServerRunner (start, stop, health check) +│ +├── findings/ # Finding / itemdb tooling (future consolidated package) +│ +├── _colors.py # Shared ANSI color and symbol utilities +├── gate-check.py # Phase readiness gates +├── check-frontmatter.py # Frontmatter validation +├── sandbox-bootstrap.py # Sandbox environment setup +├── run-sweep.py # Batch file sweeps +├── list-findings.py / create-finding.py / move-finding.py / … # Script wrappers +└── mock-llm-*.py / mock_llm_scripts/ # Test infrastructure +``` + +## Rules + +### 1. Historical scripts are thin wrappers + +Standalone scripts at the `tools/` root (e.g. `create-finding.py`, `list-findings.py`) should be thin wrappers that delegate to their respective packages. Their implementation lives in the package, not the script. + +Example (`tools/run-agent.py`): +```python +from codecome.cli import main +if __name__ == "__main__": + raise SystemExit(main()) +``` + +### 2. `codecome/config.py` is configuration only — no execution + +`config.py` resolves env vars, `codecome.yml`, prompt extras, model/variant/thinking, and color modes. It must NOT contain: +- Server start/stop +- Session creation +- Prompt submission +- Phase loops +- Retry/resume logic +- Phase completion checks + +### 3. Event loops live under `tools/events/` + +- `BaseEventLoop` owns shared SSE/session mechanics (filtering, permissions, sync, dedup, headers). +- `PhaseEventLoop` (in `phase_loop.py`) extends it for single-session consumption. +- `ChatEventLoop` (in `chat_loop.py`) extends it for multi-turn chat. +- Never add new event loop classes outside `tools/events/`. + +### 4. Renderers live under `tools/rendering/` + +- Event renderers go in `rendering/events.py`, inheriting `EventRenderer`. +- Tool renderers go in `rendering/tools/`, inheriting `ToolRenderer`. +- Renderers receive **normalized dict** events/tool states — do not introduce custom event objects. +- Rich and Textual renderers may emit arbitrary Rich renderables (Panel, Group, Text, Table, Syntax, Rule, …) through a `RenderSink`. The sink abstracts *where* output goes; it does not restrict *what* renderers can draw. + +### 5. Sinks: three destinations, one code path + +- `PlainSink` — plain strings to stdout (no Rich dependency). +- `RichConsoleSink` — delegates to `rich.console.Console`. +- `TextualRichLogSink` — delegates to a Textual RichLog or thread-safe proxy. + +Rich and Textual renderers share the same `render()` code path; only the sink differs. Use `self.rich` / `self.plain` properties from `BaseRenderer` to branch. + +### 6. Snapshot/diff state belongs in `SnapshotCache` + +File content snapshots used by Write/Edit/ApplyPatch renderers for diff computation must live in `rendering/cache.SnapshotCache`. Do not introduce new module-level globals for caching. + +### 7. Command-specific rendering uses `CommandExecutionInterceptor` + +Specialised rendering for bash invocations (sandbox-bootstrap JSON, rtk read/grep, rg, ls, find, tree) is implemented as `CommandExecutionInterceptor` implementations. The `CommandRenderer` has a lazy interceptor chain. New interceptors go in `rendering/tools/interceptors/`. + +### 8. Finding/itemdb helpers live under `tools/findings/` + +Frontmatter parsing, finding ID lookup, status directory constants, slug helpers, and finding file iteration belong in `tools/findings/frontmatter.py` or sibling modules. Do not duplicate these in standalone scripts. + +### 9. Dependency direction + +Packages should depend downward, not sideways: +``` +run-agent.py → codecome/ → (none) + → events/ → (none) + → rendering/ → codecome/ + → chat/ → codecome/, events/ + +codecome/ → events/, rendering/ (lazy imports only in execution paths) +events/ → (stdlib only, except sse_client ↔ base) +rendering/ → codecome/ +chat/ → codecome/, events/ +``` + +Avoid circular imports. When two packages need each other, prefer callable injection (as done with `render_event_fn` in the runner) or lazy imports inside function bodies. + +### 10. Testing + +- New renderers need focused unit tests with fixture inputs and recording sinks. +- Event loops are tested with deterministic event generators — not live OpenCode servers. +- CLI and wrapper compatibility is verified with `--help` and `--show-model` smoke tests. +- Thin wrappers must remain thin — their only responsibility is delegation. From 7dc0f1f3b6297c6f7e19efe080a14043b3dc47d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 19:19:43 +0200 Subject: [PATCH 55/65] refactor(tools): clean up rendering, deduplicate constants, fix sink bug - Remove legacy globals and CLI-flag-to-global assignments from cli.py (already handled through RenderSettings/dataclasses.replace) - Reset subagent dedup state before retries in cli.py - Import finish-reason constants from canonical location in cli_render.py - Add documentation comments for modelID vs id in session.py - Refactor ToolUseEventRenderer from if/elif chain to dict + lazy cache - Fix operator precedence bug in TextualRichLogSink.write_text(): 'text + end if end else text' was parsed as 'text + (end if end else text)', doubling the text when end="" --- tools/codecome/cli.py | 21 ++--------- tools/codecome/cli_render.py | 12 +++---- tools/codecome/session.py | 9 +++++ tools/rendering/events.py | 69 +++++++++++++++++++----------------- tools/rendering/sink.py | 3 +- 5 files changed, 56 insertions(+), 58 deletions(-) diff --git a/tools/codecome/cli.py b/tools/codecome/cli.py index 182abde..90d71d4 100644 --- a/tools/codecome/cli.py +++ b/tools/codecome/cli.py @@ -37,13 +37,6 @@ build_phase_resume_prompt, build_frontmatter_resume_prompt, ) -# Legacy globals — still referenced by old renderers in run-agent.py. -# Re-exported for backward compatibility. -_READ_DISPLAY_LINES = 10 -_WRITE_CONTENT_LINES = 25 -_WRITE_DIFF_LIMIT = 50 -_EDIT_DIFF_LINES = 25 - # --------------------------------------------------------------------------- # Argument parser @@ -100,17 +93,6 @@ def main() -> int: + ", ".join("--" + n.replace("_", "-") for n in missing) ) - # CLI flags override env var defaults for tunables. - global _READ_DISPLAY_LINES, _WRITE_CONTENT_LINES, _WRITE_DIFF_LIMIT, _EDIT_DIFF_LINES - if args.read_display_lines is not None: - _READ_DISPLAY_LINES = args.read_display_lines - if args.write_content_lines is not None: - _WRITE_CONTENT_LINES = args.write_content_lines - if args.write_diff_limit is not None: - _WRITE_DIFF_LIMIT = args.write_diff_limit - if args.edit_diff_lines is not None: - _EDIT_DIFF_LINES = args.edit_diff_lines - color_mode = resolve_color_mode(args.color) console = build_console(color_mode) @@ -211,9 +193,12 @@ def _forward_signal(signum: int, _frame: Any) -> None: previous_sigterm = signal.signal(signal.SIGTERM, _forward_signal) from codecome.runner import _run_single_attempt + from rendering.events import _reset_subagent_state try: while True: attempt_number += 1 + # Clear per-session dedup state so retries don't suppress updates. + _reset_subagent_state() returncode, session_id, run_result, transcript_path = _run_single_attempt( args, console, prompt, model, variant, thinking_on, base_url, server_info.password, str(_clr.ROOT), diff --git a/tools/codecome/cli_render.py b/tools/codecome/cli_render.py index 8ffdeff..42ed90a 100644 --- a/tools/codecome/cli_render.py +++ b/tools/codecome/cli_render.py @@ -142,11 +142,11 @@ def _emit_fatal_error(console: Any, title: str, message: str) -> None: # --------------------------------------------------------------------------- -# LLM finish reason classification +# LLM finish reason classification (canonical definitions in rendering.events) # --------------------------------------------------------------------------- -_FINISH_TERMINAL_OK = {"stop", "end_turn"} -_FINISH_MID_TURN = {"tool-calls", "tool_use"} -_FINISH_FAILURE = { - "content-filter", "content_filter", "length", "max_tokens", "error", -} \ No newline at end of file +from rendering.events import ( + _FINISH_TERMINAL_OK, + _FINISH_MID_TURN, + _FINISH_FAILURE, +) \ No newline at end of file diff --git a/tools/codecome/session.py b/tools/codecome/session.py index 0a48e3f..d456aba 100644 --- a/tools/codecome/session.py +++ b/tools/codecome/session.py @@ -43,6 +43,10 @@ def send_prompt_to_session( if model: parts = model.split("/", 1) if len(parts) == 2: + # NOTE: prompt_async expects "modelID", not "id". + # Session creation (POST /session) uses "id" instead. + # See _create_model_payload() in mock-llm-parity.py for the + # authoritative reference. payload["model"] = {"providerID": parts[0], "modelID": parts[1]} else: payload["model"] = {"modelID": model} @@ -74,6 +78,10 @@ def create_session( if model: parts = model.split("/", 1) if len(parts) == 2: + # NOTE: session creation (POST /session) expects "id", not "modelID". + # Prompt submission (prompt_async) uses "modelID" instead. + # See _create_model_payload() in mock-llm-parity.py for the + # authoritative reference. payload["model"] = {"providerID": parts[0], "id": parts[1]} else: payload["model"] = {"id": model} @@ -110,6 +118,7 @@ def create_chat_session( if model: parts = model.split("/", 1) if len(parts) == 2: + # Session creation uses "id" (see create_session above). payload["model"] = {"providerID": parts[0], "id": parts[1]} else: payload["model"] = {"id": model} diff --git a/tools/rendering/events.py b/tools/rendering/events.py index 5615439..e3f90fe 100644 --- a/tools/rendering/events.py +++ b/tools/rendering/events.py @@ -126,10 +126,45 @@ def render(self, event: dict[str, Any]) -> bool: class ToolUseEventRenderer(EventRenderer): event_types = ("tool_use",) + # Map of canonical tool names to their renderer classes (lazy-imported). + # Keys that map to the same renderer share the cached instance. + _TOOL_RENDERER_CLASSES: dict[str, str] = { + "todowrite": "rendering.tools.todo.TodoRenderer", + "read": "rendering.tools.read.ReadRenderer", + "write": "rendering.tools.write.WriteRenderer", + "edit": "rendering.tools.edit.EditRenderer", + "apply_patch": "rendering.tools.apply_patch.ApplyPatchRenderer", + "applypatch": "rendering.tools.apply_patch.ApplyPatchRenderer", + "apply-patch": "rendering.tools.apply_patch.ApplyPatchRenderer", + "glob": "rendering.tools.glob.GlobRenderer", + "grep": "rendering.tools.grep.GrepRenderer", + "bash": "rendering.tools.command.CommandRenderer", + "skill": "rendering.tools.skill.SkillRenderer", + "task": "rendering.tools.task.TaskRenderer", + } + def __init__(self, context): super().__init__(context) from rendering.tools.base import FallbackToolRenderer self._fallback = FallbackToolRenderer(context) + # Cache renderer instances keyed by their fully-qualified class path. + self._renderer_cache: dict[str, Any] = {} + + def _get_renderer(self, tool_lower: str) -> Any | None: + """Return a cached renderer for *tool_lower*, or None for fallback.""" + class_path = self._TOOL_RENDERER_CLASSES.get(tool_lower) + if class_path is None: + return None + if class_path in self._renderer_cache: + return self._renderer_cache[class_path] + # Lazy-import and instantiate once, then cache. + module_path, class_name = class_path.rsplit(".", 1) + import importlib + mod = importlib.import_module(module_path) + cls = getattr(mod, class_name) + instance = cls(self.context) + self._renderer_cache[class_path] = instance + return instance def render(self, event: dict[str, Any]) -> bool: part = event.get("part", {}) @@ -137,39 +172,7 @@ def render(self, event: dict[str, Any]) -> bool: state = part.get("state", {}) if isinstance(part.get("state"), dict) else {} tool_lower = tool.strip().lower() - # Route through specific tool renderers first. - renderer = None - if tool_lower == "todowrite": - from rendering.tools.todo import TodoRenderer - renderer = TodoRenderer(self.context) - elif tool_lower == "read": - from rendering.tools.read import ReadRenderer - renderer = ReadRenderer(self.context) - elif tool_lower == "write": - from rendering.tools.write import WriteRenderer - renderer = WriteRenderer(self.context) - elif tool_lower == "edit": - from rendering.tools.edit import EditRenderer - renderer = EditRenderer(self.context) - elif tool_lower in ("apply_patch", "applypatch", "apply-patch"): - from rendering.tools.apply_patch import ApplyPatchRenderer - renderer = ApplyPatchRenderer(self.context) - elif tool_lower == "glob": - from rendering.tools.glob import GlobRenderer - renderer = GlobRenderer(self.context) - elif tool_lower == "grep": - from rendering.tools.grep import GrepRenderer - renderer = GrepRenderer(self.context) - elif tool_lower == "bash": - from rendering.tools.command import CommandRenderer - renderer = CommandRenderer(self.context) - elif tool_lower == "skill": - from rendering.tools.skill import SkillRenderer - renderer = SkillRenderer(self.context) - elif tool_lower == "task": - from rendering.tools.task import TaskRenderer - renderer = TaskRenderer(self.context) - + renderer = self._get_renderer(tool_lower) if renderer is not None and renderer.render(tool, state): return True diff --git a/tools/rendering/sink.py b/tools/rendering/sink.py index fe86c54..a0effac 100644 --- a/tools/rendering/sink.py +++ b/tools/rendering/sink.py @@ -108,4 +108,5 @@ def write(self, renderable: Any, *, expand: bool = True) -> None: self._target.write(renderable) def write_text(self, text: str, *, end: str = "\n") -> None: - self.write(text) + # RichLog.write() has no end parameter; concatenate manually. + self.write((text + end) if end else text) From f59f5fd5a206b546a8db7d5ba917a5a562ae6fa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 20:05:42 +0200 Subject: [PATCH 56/65] refactor(a8-batch1): foundational fixes from PR review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename mock_llm_scripts/ to mock-llm-scripts/ (data-only dir) - Remove redundant check_opencode_version() from chat/harness.py - Centralize ROOT in codecome/config.py; remove duplicates from cli_render.py, runner.py, graceful.py, version.py, chat/harness.py - Replace inline color helpers in config.py with _colors import - Add ChatEventLoop to events/__init__.py exports - Update plan: command_interceptors → command/interceptors structure - Update plan: note run-agent.py + codecome.py unification deferred - Add A8 plan document (.project/tools-refactor-a8-plan.md) --- .project/mock-llm-parity-plan.md | 4 +- .project/tools-refactor-a8-plan.md | 198 ++++++++++++++++++ .project/tools-refactor-plan.md | 26 +-- tests/test_mock_llm_parity.py | 16 +- tools/AGENTS.md | 2 +- tools/chat/harness.py | 5 +- tools/codecome/cli_render.py | 3 +- tools/codecome/config.py | 41 +--- tools/codecome/graceful.py | 2 +- tools/codecome/runner.py | 3 +- tools/codecome/version.py | 14 +- tools/events/__init__.py | 3 +- tools/mock-llm-parity.py | 4 +- .../basic.json | 0 .../comprehensive.json | 0 .../internal_error.json | 0 .../rate_limit_retry.json | 0 .../with_permission.json | 0 .../with_permission_multi.json | 0 .../with_tool.json | 0 tools/mock-llm-server.py | 2 +- 21 files changed, 244 insertions(+), 79 deletions(-) create mode 100644 .project/tools-refactor-a8-plan.md rename tools/{mock_llm_scripts => mock-llm-scripts}/basic.json (100%) rename tools/{mock_llm_scripts => mock-llm-scripts}/comprehensive.json (100%) rename tools/{mock_llm_scripts => mock-llm-scripts}/internal_error.json (100%) rename tools/{mock_llm_scripts => mock-llm-scripts}/rate_limit_retry.json (100%) rename tools/{mock_llm_scripts => mock-llm-scripts}/with_permission.json (100%) rename tools/{mock_llm_scripts => mock-llm-scripts}/with_permission_multi.json (100%) rename tools/{mock_llm_scripts => mock-llm-scripts}/with_tool.json (100%) diff --git a/.project/mock-llm-parity-plan.md b/.project/mock-llm-parity-plan.md index 86ab37e..d878f23 100644 --- a/.project/mock-llm-parity-plan.md +++ b/.project/mock-llm-parity-plan.md @@ -52,7 +52,7 @@ Build a deterministic mock LLM **provider** (not replacing OpenCode) that: ### 3.1 Mock LLM Server **Approach:** Small custom stdlib-only OpenAI-compatible mock server (`tools/mock-llm-server.py`). -- Reads a JSON script file at startup (e.g., `--script tools/mock_llm_scripts/basic.json`). +- Reads a JSON script file at startup (e.g., `--script tools/mock-llm-scripts/basic.json`). - Serves standard OpenAI-compatible endpoints: - `POST /v1/chat/completions` — streaming SSE with deterministic deltas. - `GET /v1/models` — returns `[{"id":"mockmodel"}]`. @@ -141,7 +141,7 @@ Add a new test class `TestMockLLMParity` that: ## 6. Acceptance Criteria - [x] `tools/mock-llm-server.py` exists and serves deterministic OpenAI-compatible SSE streams from JSON script files. -- [x] `tools/mock_llm_scripts/` contains `basic.json`, `with_tool.json`, and `with_permission.json`. +- [x] `tools/mock-llm-scripts/` contains `basic.json`, `with_tool.json`, and `with_permission.json`. - [x] `opencode.json` contains `provider.test` block. - [x] `tools/mock-llm-parity.py` exists and can be invoked manually. - [x] `tests/test_mock_llm_parity.py` exists and passes in CI. diff --git a/.project/tools-refactor-a8-plan.md b/.project/tools-refactor-a8-plan.md new file mode 100644 index 0000000..38fa6c1 --- /dev/null +++ b/.project/tools-refactor-a8-plan.md @@ -0,0 +1,198 @@ +# Plan: Phase A8 — PR Review Fixes and Architectural Cleanup + +**Status:** Active +**Date:** 2026-05-25 +**Parent:** [tools-refactor-plan.md](tools-refactor-plan.md) +**PR:** #21 (`wip/tools-refactor`) +**Scope:** Address all unresolved PR review comments from the A1–A5 implementation + +--- + +## 1. Summary + +PR #21 accumulated 20 unresolved review threads during the A1–A5 implementation. +This plan addresses all of them in a single phase (A8), grouped into an ordered +execution sequence that respects dependency chains. + +Two items are deferred: +- **Unify run-agent.py + codecome.py** → deferred to Phase 2 (Epic B). +- **Legacy globals in cli.py** → already resolved in the last commit. + +--- + +## 2. Execution Order + +Tasks are ordered to minimise rework: foundational changes (ROOT, colors, naming) +come first, then structural moves, then the larger splits/extractions. + +### Batch 1 — Foundational fixes (no structural moves) + +| ID | Thread | File | Action | +|----|--------|------|--------| +| T3 | plan:266 | `tools/mock_llm_scripts/` | `git mv` to `mock-llm-scripts`, update all 16 path-based references across 6 files. | +| T4 | harness:50 | `chat/harness.py` | Remove redundant `check_opencode_version()` call and its import; `cli.py:76` already covers both modes. | +| T5 | harness:56 | multiple | Define `ROOT` once in `codecome/config.py` (already has it at line 24). Remove duplicate `ROOT =` definitions from `cli_render.py`, `transcript.py`, `graceful.py`, and `chat/harness.py`; import from `codecome.config` instead. | +| T12 | config:36 | `codecome/config.py` | Replace inline `_COLOR_ENABLED`/`_RESET`/`_BOLD`/`_DIM` with `import _colors as C` and use `C.RESET`, `C.BOLD`, `C.DIM`. | +| T15 | events/__init__:10 | `events/__init__.py` | Add `ChatEventLoop` to exports. | +| T2 | plan:260 | `.project/tools-refactor-plan.md` | Add note that run-agent.py + codecome.py unification is deferred to Phase 2. | + +### Batch 2 — Naming and small structural changes + +| ID | Thread | File | Action | +|----|--------|------|--------| +| T7 | harness:83 | multiple | Make `log_level` configurable: read from `--log-level` CLI arg or `OPENCODE_LOG_LEVEL` env var (default `"WARN"`). Both phase and chat paths use the same source. | +| T6 | harness:66 | `codecome/config.py` | Extract `resolve_runtime_config(agent, extra_args) -> RuntimeConfig` that bundles model, variant, thinking resolution into a single call. Both `cli.py` and `chat/harness.py` call this instead of duplicating three separate calls. | +| T13 | graceful:1 | `codecome/graceful.py` | Create `tools/phases/` package. Move `graceful.py` to `phases/completion.py`. Update all imports (`codecome.graceful` → `phases.completion`). | + +### Batch 3 — Transcript class + +| ID | Thread | File | Action | +|----|--------|------|--------| +| T8 | transcript:1, app:107, harness:109 | `codecome/transcript.py` | Convert to `Transcript` class with `for_phase()` / `for_chat()` class methods, `write_event()`, and `close()`. Remove old free functions entirely (no backward-compat wrappers). Update `runner.py` and `chat/app.py` to use `transcript.write_event(event)`. | + +### Batch 4 — Rendering architecture + +| ID | Thread | File | Action | +|----|--------|------|--------| +| T16 | events.py:42 | `rendering/events.py` | Split into `rendering/events/` package: `base.py` (EventRenderer + constants + subagent state), then one file per renderer class. `rendering/events/__init__.py` re-exports everything so existing imports continue to work. | +| T11 | cli_render:1 | `codecome/cli_render.py` | Move rendering-related parts (`HAVE_RICH`, Rich stubs, `_get_rendering_ctx`, `render_event`) into `rendering/dispatch.py`. Keep CLI-only parts (`build_console`, `_emit_fatal_error`) in `codecome/cli_render.py`. Update imports. | +| T1 | plan:207 | `rendering/tools/` | Restructure: move `command.py` → `command/__init__.py`, move `interceptors/` → `command/interceptors/`. Update all import paths from `rendering.tools.interceptors.*` to `rendering.tools.command.interceptors.*`. Update plan document. | + +### Batch 5 — Phase harness extraction + +| ID | Thread | File | Action | +|----|--------|------|--------| +| T10 | cli:198 | `codecome/cli.py` | Extract the phase retry/resume loop (lines ~160–395) into `codecome/harness.py` as `run_phase_mode(args, console, ...)`. `cli.py` becomes: parse args → check version → dispatch to `run_phase_mode()` or `_run_chat_mode()`. | + +### Batch 6 — Testing and PR hygiene + +| ID | Thread | File | Action | +|----|--------|------|--------| +| T17 | run-agent.py | `tests/` | Add regression test verifying `--read-display-lines`, `--write-content-lines`, `--write-diff-limit`, `--edit-diff-lines` flags propagate into `RenderSettings`. | +| — | PR body | GitHub | Update PR #21 description to reflect A1–A8 implementation status. | +| — | Verify | — | Run `make tests` to confirm all changes pass. | + +--- + +## 3. New Directory Structure (after A8) + +Changes from the current structure are marked with `← NEW` or `← MOVED`. + +``` +tools/ +├── run-agent.py # Thin wrapper → codecome.cli.main() +├── codecome.py # Workspace validation CLI (unchanged) +│ +├── codecome/ # Core runner and configuration +│ ├── cli.py # main() → parse args → dispatch to harness +│ ├── cli_render.py # build_console, _emit_fatal_error (CLI-only) ← SLIMMED +│ ├── config.py # ROOT, env, codecome.yml, prompt, model, thinking +│ ├── session.py # OpenCode HTTP: create session, send prompt +│ ├── runner.py # _consume_events, _run_single_attempt +│ ├── harness.py # run_phase_mode() — retry/resume loop ← NEW (from cli.py) +│ ├── transcript.py # Transcript class ← REWRITTEN +│ └── version.py # OpenCode version checks +│ +├── phases/ # Phase-specific logic ← NEW PACKAGE +│ ├── __init__.py +│ └── completion.py # ← MOVED from codecome/graceful.py +│ +├── rendering/ # Rendering infrastructure +│ ├── __init__.py +│ ├── base.py +│ ├── cache.py +│ ├── context.py +│ ├── dispatch.py # HAVE_RICH, _get_rendering_ctx, render_event ← NEW (from cli_render.py) +│ ├── registry.py +│ ├── settings.py +│ ├── sink.py +│ ├── utils.py +│ ├── events/ # ← NEW PACKAGE (split from events.py) +│ │ ├── __init__.py # Re-exports all renderer classes + constants +│ │ ├── base.py # EventRenderer, finish constants, subagent state +│ │ ├── step_start.py +│ │ ├── step_finish.py +│ │ ├── text.py +│ │ ├── reasoning.py +│ │ ├── tool_use.py +│ │ ├── error.py +│ │ ├── session_status.py +│ │ ├── session_diff.py +│ │ ├── server.py # ServerConnectedRenderer + ServerHeartbeatRenderer +│ │ ├── message.py # MessageUpdatedRenderer +│ │ ├── subagent.py +│ │ └── unknown.py +│ └── tools/ +│ ├── __init__.py +│ ├── base.py +│ ├── todo.py +│ ├── read.py / write.py / edit.py / glob.py / grep.py +│ ├── apply_patch.py +│ ├── skill.py / task.py / permissions.py +│ └── command/ # ← RESTRUCTURED +│ ├── __init__.py # CommandRenderer (was command.py) +│ └── interceptors/ # ← MOVED from rendering/tools/interceptors/ +│ ├── __init__.py +│ ├── base.py +│ ├── sandbox_bootstrap.py +│ ├── rtk_read.py +│ ├── rtk_grep.py +│ └── shell_listing.py +│ +├── mock-llm-scripts/ # ← RENAMED from mock_llm_scripts +│ ├── basic.json +│ ├── comprehensive.json +│ └── ... +│ +├── chat/ # Chat TUI package (unchanged) +├── events/ # Event consumption (ChatEventLoop now exported) +├── opencode/ # opencode serve lifecycle +├── _colors.py # Shared ANSI utilities +└── ... # Other scripts unchanged +``` + +--- + +## 4. Dependency Direction (updated) + +``` +run-agent.py → codecome/ → (none) +chat/ → codecome/, events/, rendering/ +codecome/ → events/, rendering/ (lazy), phases/ +phases/ → (stdlib only, reads workspace files) +events/ → (stdlib only, except sse_client) +rendering/ → _colors, (no codecome/ dependency) +``` + +Key change: `rendering/dispatch.py` replaces the dependency that `codecome/cli_render.py` +had on `rendering/`. Now `codecome/` imports `rendering.dispatch` instead of the reverse. + +--- + +## 5. Acceptance Criteria + +``` +- All 20 unresolved PR threads addressed (18 fixed, 2 deferred with notes). +- `make tests` passes. +- `py_compile` passes for all moved/new files. +- No duplicate ROOT definitions across modules. +- No duplicate color escape definitions in config.py. +- Transcript logic is a class, not scattered free functions. +- Phase retry/resume loop lives in codecome/harness.py, not cli.py. +- Event renderers are individual files under rendering/events/. +- Interceptors live under rendering/tools/command/interceptors/. +- mock-llm-scripts directory uses hyphenated name. +- PR body is updated. +``` + +--- + +## 6. Risks + +| Risk | Probability | Impact | Mitigation | +|------|:-----------:|:------:|------------| +| Import cycles from ROOT centralisation | Low | Medium | ROOT stays in config.py which has no execution deps | +| Renderer split breaks existing imports | Medium | High | `rendering/events/__init__.py` re-exports all symbols | +| Command interceptor move breaks imports | Medium | Medium | `rendering/tools/command/interceptors/__init__.py` re-exports | +| Phase harness extraction breaks retry logic | Medium | High | Extract verbatim first, refactor later; run tests after | +| Transcript class change breaks chat/phase flow | Medium | Medium | Keep same write semantics; test both paths | diff --git a/.project/tools-refactor-plan.md b/.project/tools-refactor-plan.md index 612955e..fbf1d7e 100644 --- a/.project/tools-refactor-plan.md +++ b/.project/tools-refactor-plan.md @@ -66,7 +66,7 @@ tools/ ├── script-to-asciinema.py 76 ├── mock-llm-server.py 180 ├── mock-llm-parity.py 162 -└── mock_llm_scripts/ 6 JSON files +└── mock-llm-scripts/ 6 JSON files ``` ### 2.2 `run-agent.py` responsibilities @@ -200,17 +200,17 @@ tools/ │ │ ├── apply_patch.py # ApplyPatchRenderer │ │ ├── glob.py # GlobRenderer │ │ ├── grep.py # GrepRenderer -│ │ ├── command.py # CommandRenderer for bash/tool command execution -│ │ ├── sandbox.py # sandbox rendering helpers/interceptor support │ │ ├── task.py # TaskRenderer -│ │ └── skill.py # SkillRenderer -│ └── command_interceptors/ -│ ├── __init__.py -│ ├── base.py # CommandExecutionInterceptor protocol/base -│ ├── sandbox_bootstrap.py # sandbox-bootstrap / make sandbox-* renderer -│ ├── rtk_read.py # rtk read / cat/head/tail equivalent rendering -│ ├── rtk_grep.py # rtk grep / rg equivalent rendering -│ └── shell_listing.py # ls/find/tree listing rendering +│ │ ├── skill.py # SkillRenderer +│ │ └── command/ # CommandRenderer + interceptors +│ │ ├── __init__.py # CommandRenderer for bash/tool command execution +│ │ └── interceptors/ +│ │ ├── __init__.py +│ │ ├── base.py # CommandExecutionInterceptor protocol/base +│ │ ├── sandbox_bootstrap.py # sandbox-bootstrap / make sandbox-* renderer +│ │ ├── rtk_read.py # rtk read / cat/head/tail equivalent rendering +│ │ ├── rtk_grep.py # rtk grep / rg equivalent rendering +│ │ └── shell_listing.py # ls/find/tree listing rendering │ ├── chat/ # Chat TUI package │ ├── __init__.py @@ -257,13 +257,13 @@ tools/ ├── gate-check.py # unchanged initially ├── sandbox-bootstrap.py # unchanged initially ├── run-sweep.py # unchanged initially, may keep calling wrapper -├── codecome.py # unchanged initially +├── codecome.py # unchanged initially; unification with run-agent.py deferred to Phase 2 ├── check-frontmatter.py # unchanged initially ├── list-risk-files.py # unchanged initially ├── script-to-asciinema.py # unchanged ├── mock-llm-server.py # unchanged ├── mock-llm-parity.py # unchanged -└── mock_llm_scripts/ # unchanged +└── mock-llm-scripts/ # unchanged ``` --- diff --git a/tests/test_mock_llm_parity.py b/tests/test_mock_llm_parity.py index 2802047..0c1502f 100644 --- a/tests/test_mock_llm_parity.py +++ b/tests/test_mock_llm_parity.py @@ -37,7 +37,7 @@ class TestMockLLMServer: @pytest.fixture(scope="class") def server_proc(self): - script = ROOT / "tools" / "mock_llm_scripts" / "basic.json" + script = ROOT / "tools" / "mock-llm-scripts" / "basic.json" port = _find_free_port() proc = subprocess.Popen( [sys.executable, str(ROOT / "tools" / "mock-llm-server.py"), "--port", str(port), "--script", str(script)], @@ -194,11 +194,11 @@ class TestMockLLMParity: """End-to-end parity tests (heavy — invoke real opencode CLI).""" @pytest.mark.parametrize("script", [ - ROOT / "tools" / "mock_llm_scripts" / "basic.json", - ROOT / "tools" / "mock_llm_scripts" / "with_tool.json", - ROOT / "tools" / "mock_llm_scripts" / "with_permission.json", - ROOT / "tools" / "mock_llm_scripts" / "comprehensive.json", - ROOT / "tools" / "mock_llm_scripts" / "with_permission_multi.json", + ROOT / "tools" / "mock-llm-scripts" / "basic.json", + ROOT / "tools" / "mock-llm-scripts" / "with_tool.json", + ROOT / "tools" / "mock-llm-scripts" / "with_permission.json", + ROOT / "tools" / "mock-llm-scripts" / "comprehensive.json", + ROOT / "tools" / "mock-llm-scripts" / "with_permission_multi.json", ]) def test_parity_script(self, script: Path): result = subprocess.run( @@ -214,8 +214,8 @@ def test_parity_script(self, script: Path): assert "Parity OK" in result.stdout @pytest.mark.parametrize("script,error_arg", [ - (ROOT / "tools" / "mock_llm_scripts" / "rate_limit_retry.json", ["--429-after", "1"]), - (ROOT / "tools" / "mock_llm_scripts" / "internal_error.json", ["--500-after", "1"]), + (ROOT / "tools" / "mock-llm-scripts" / "rate_limit_retry.json", ["--429-after", "1"]), + (ROOT / "tools" / "mock-llm-scripts" / "internal_error.json", ["--500-after", "1"]), ]) def test_parity_script_with_error(self, script: Path, error_arg: list[str]): result = subprocess.run( diff --git a/tools/AGENTS.md b/tools/AGENTS.md index 6cfa8d5..ee9ac8e 100644 --- a/tools/AGENTS.md +++ b/tools/AGENTS.md @@ -60,7 +60,7 @@ tools/ ├── sandbox-bootstrap.py # Sandbox environment setup ├── run-sweep.py # Batch file sweeps ├── list-findings.py / create-finding.py / move-finding.py / … # Script wrappers -└── mock-llm-*.py / mock_llm_scripts/ # Test infrastructure +└── mock-llm-*.py / mock-llm-scripts/ # Test infrastructure ``` ## Rules diff --git a/tools/chat/harness.py b/tools/chat/harness.py index 6a0ad2e..fecadc8 100644 --- a/tools/chat/harness.py +++ b/tools/chat/harness.py @@ -23,8 +23,8 @@ from chat.app import ChatApp, HAVE_RICH # noqa: E402 from codecome.cli_render import build_console, _emit_fatal_error # noqa: E402 from opencode.serve import ServerRunner, ServerRunnerError # noqa: E402 -from codecome.version import check_opencode_version # noqa: E402 from codecome.config import ( # noqa: E402 + ROOT, resolve_color_mode, load_prompt, resolve_model_and_variant, @@ -47,13 +47,10 @@ def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> + ", ".join("--" + n.replace("_", "-") for n in missing) ) - check_opencode_version() - color_mode = resolve_color_mode(args.color) console = build_console(color_mode) # Resolve prompt - ROOT = Path(__file__).resolve().parents[2] if args.prompt_file: prompt_file = ROOT / args.prompt_file prompt = load_prompt(prompt_file, args.finding, phase=args.phase) diff --git a/tools/codecome/cli_render.py b/tools/codecome/cli_render.py index 42ed90a..bd253a9 100644 --- a/tools/codecome/cli_render.py +++ b/tools/codecome/cli_render.py @@ -11,10 +11,9 @@ from __future__ import annotations -from pathlib import Path from typing import Any -ROOT = Path(__file__).resolve().parents[2] +from codecome.config import ROOT # --------------------------------------------------------------------------- # Rich availability diff --git a/tools/codecome/config.py b/tools/codecome/config.py index 1555058..55fb17a 100644 --- a/tools/codecome/config.py +++ b/tools/codecome/config.py @@ -23,28 +23,7 @@ ROOT = Path(__file__).resolve().parents[2] - -# Minimal inline color helpers to avoid importing _colors (which lives in the -# parent tools/ directory, not here). Only the attributes used by this module -# are replicated. -_COLOR_ENABLED = ( - sys.stdout.isatty() - and os.environ.get("NO_COLOR") is None - and os.environ.get("TERM") != "dumb" -) - -if _COLOR_ENABLED: - _RESET = "\033[0m" - _BOLD = "\033[1m" - _DIM = "\033[2m" -else: - _RESET = "" - _BOLD = "" - _DIM = "" - - -def _header(message: str) -> str: - return f"{_BOLD}{message}{_RESET}" +import _colors as C # noqa: E402 — tools/ is on sys.path at runtime def truthy_env(name: str) -> bool: @@ -480,22 +459,22 @@ def show_model_table(agent_name: str) -> int: def fmt(v: Optional[str]) -> str: return v if v else "(not set)" - print(_header(f"Model resolution for agent {agent_name}:")) + print(C.header(f"Model resolution for agent {agent_name}:")) print() - print(f" {_DIM}OPENCODE_ARGS{_RESET} model={fmt(args_model)} variant={fmt(args_variant)}") - print(f" {_DIM}env CODECOME_MODEL{_RESET} model={fmt(env_model)}") - print(f" {_DIM}env CODECOME_MODEL_VARIANT{_RESET} variant={fmt(env_variant)}") - print(f" {_DIM}codecome.yml{_RESET} model={fmt(yaml_model)} variant={fmt(yaml_variant)}") - print(f" {_DIM}opencode session history{_RESET} model={fmt(discovered)}") - print(f" {_DIM}runtime probe{_RESET} not run by show-model") + print(f" {C.DIM}OPENCODE_ARGS{C.RESET} model={fmt(args_model)} variant={fmt(args_variant)}") + print(f" {C.DIM}env CODECOME_MODEL{C.RESET} model={fmt(env_model)}") + print(f" {C.DIM}env CODECOME_MODEL_VARIANT{C.RESET} variant={fmt(env_variant)}") + print(f" {C.DIM}codecome.yml{C.RESET} model={fmt(yaml_model)} variant={fmt(yaml_variant)}") + print(f" {C.DIM}opencode session history{C.RESET} model={fmt(discovered)}") + print(f" {C.DIM}runtime probe{C.RESET} not run by show-model") print() effective_model = model or "(unknown)" effective_variant = variant or "(unknown)" thinking_on, thinking_source = resolve_thinking_decision(model, extra_args) - print(f" {_BOLD}effective{_RESET} " + print(f" {C.BOLD}effective{C.RESET} " f"model={effective_model} variant={effective_variant} " f"thinking={'on' if thinking_on else 'off'}") - print(f" {_DIM}sources{_RESET} " + print(f" {C.DIM}sources{C.RESET} " f"model: {model_source} variant: {variant_source} " f"thinking: {thinking_source}") return 0 diff --git a/tools/codecome/graceful.py b/tools/codecome/graceful.py index 47fe3f5..89eb3b8 100644 --- a/tools/codecome/graceful.py +++ b/tools/codecome/graceful.py @@ -11,7 +11,7 @@ from pathlib import Path from typing import Any, Iterator -ROOT = Path(__file__).resolve().parents[2] +from codecome.config import ROOT _FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) diff --git a/tools/codecome/runner.py b/tools/codecome/runner.py index b5aa2ab..6c91633 100644 --- a/tools/codecome/runner.py +++ b/tools/codecome/runner.py @@ -17,11 +17,10 @@ import _colors as C from events.phase_loop import PhaseEventLoop, RunResult +from codecome.config import ROOT from codecome.session import create_session, send_prompt_to_session from codecome.transcript import open_phase_transcript, close_transcript -ROOT = Path(__file__).resolve().parents[2] - def _consume_events( base_url: str, diff --git a/tools/codecome/version.py b/tools/codecome/version.py index e8a7bb4..bf0311b 100644 --- a/tools/codecome/version.py +++ b/tools/codecome/version.py @@ -10,9 +10,6 @@ import re import subprocess import sys -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[2] MINIMUM_OPENCODE_VERSION = "1.14.50" @@ -44,14 +41,9 @@ def parse_ver(v: str) -> tuple[int, ...]: sys.exit(1) -# Minimal inline color helpers to avoid importing _colors (which lives in -# the parent tools/ directory, not here). def _fail(message: str) -> str: - if sys.stdout.isatty() and not _no_color(): + import os + no_color = os.environ.get("NO_COLOR") is not None + if sys.stdout.isatty() and not no_color: return f"\033[31m\u2718\033[0m {message}" return f"[FAIL] {message}" - - -def _no_color() -> bool: - import os - return os.environ.get("NO_COLOR") is not None diff --git a/tools/events/__init__.py b/tools/events/__init__.py index 90c2ba2..1337db2 100644 --- a/tools/events/__init__.py +++ b/tools/events/__init__.py @@ -6,5 +6,6 @@ from __future__ import annotations from events.phase_loop import PhaseEventLoop, RunResult +from events.chat_loop import ChatEventLoop -__all__ = ["PhaseEventLoop", "RunResult"] +__all__ = ["PhaseEventLoop", "RunResult", "ChatEventLoop"] diff --git a/tools/mock-llm-parity.py b/tools/mock-llm-parity.py index 7e3a642..0544faf 100644 --- a/tools/mock-llm-parity.py +++ b/tools/mock-llm-parity.py @@ -5,7 +5,7 @@ """Deterministic parity test between opencode run and opencode serve using a mock LLM. Usage: - python tools/mock-llm-parity.py --script tools/mock_llm_scripts/basic.json + python tools/mock-llm-parity.py --script tools/mock-llm-scripts/basic.json """ from __future__ import annotations @@ -412,7 +412,7 @@ def main() -> int: parser.add_argument( "--script", type=Path, - default=ROOT / "tools" / "mock_llm_scripts" / "basic.json", + default=ROOT / "tools" / "mock-llm-scripts" / "basic.json", ) parser.add_argument("--prompt", default=DEFAULT_PROMPT) parser.add_argument("--model", default=DEFAULT_MODEL) diff --git a/tools/mock_llm_scripts/basic.json b/tools/mock-llm-scripts/basic.json similarity index 100% rename from tools/mock_llm_scripts/basic.json rename to tools/mock-llm-scripts/basic.json diff --git a/tools/mock_llm_scripts/comprehensive.json b/tools/mock-llm-scripts/comprehensive.json similarity index 100% rename from tools/mock_llm_scripts/comprehensive.json rename to tools/mock-llm-scripts/comprehensive.json diff --git a/tools/mock_llm_scripts/internal_error.json b/tools/mock-llm-scripts/internal_error.json similarity index 100% rename from tools/mock_llm_scripts/internal_error.json rename to tools/mock-llm-scripts/internal_error.json diff --git a/tools/mock_llm_scripts/rate_limit_retry.json b/tools/mock-llm-scripts/rate_limit_retry.json similarity index 100% rename from tools/mock_llm_scripts/rate_limit_retry.json rename to tools/mock-llm-scripts/rate_limit_retry.json diff --git a/tools/mock_llm_scripts/with_permission.json b/tools/mock-llm-scripts/with_permission.json similarity index 100% rename from tools/mock_llm_scripts/with_permission.json rename to tools/mock-llm-scripts/with_permission.json diff --git a/tools/mock_llm_scripts/with_permission_multi.json b/tools/mock-llm-scripts/with_permission_multi.json similarity index 100% rename from tools/mock_llm_scripts/with_permission_multi.json rename to tools/mock-llm-scripts/with_permission_multi.json diff --git a/tools/mock_llm_scripts/with_tool.json b/tools/mock-llm-scripts/with_tool.json similarity index 100% rename from tools/mock_llm_scripts/with_tool.json rename to tools/mock-llm-scripts/with_tool.json diff --git a/tools/mock-llm-server.py b/tools/mock-llm-server.py index cecf075..be7f627 100644 --- a/tools/mock-llm-server.py +++ b/tools/mock-llm-server.py @@ -22,7 +22,7 @@ which turn to serve (stateless dispatch). Usage: - python tools/mock-llm-server.py --port 0 --script tools/mock_llm_scripts/basic.json + python tools/mock-llm-server.py --port 0 --script tools/mock-llm-scripts/basic.json # Prints: MockLLM serving on http://127.0.0.1:49234 """ From 48ee6b5c4d7c87bd413154c1f7e093bbc218f892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 20:08:59 +0200 Subject: [PATCH 57/65] =?UTF-8?q?refactor(a8-batch2):=20runtime=20config,?= =?UTF-8?q?=20log-level,=20graceful=20=E2=86=92=20phases/completion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add RuntimeConfig dataclass and resolve_runtime_config() to config.py; both cli.py and chat/harness.py now call it instead of duplicating three separate resolve calls - Add --log-level CLI arg (env: OPENCODE_LOG_LEVEL, default: WARN); both phase and chat paths pass it to ServerRunner.start() - Move codecome/graceful.py → phases/completion.py with new phases/ package; update imports in cli.py - Create tools/phases/__init__.py --- tools/chat/harness.py | 15 +++---- tools/codecome/cli.py | 27 ++++++------ tools/codecome/config.py | 41 +++++++++++++++++++ tools/phases/__init__.py | 6 +++ .../graceful.py => phases/completion.py} | 0 5 files changed, 68 insertions(+), 21 deletions(-) create mode 100644 tools/phases/__init__.py rename tools/{codecome/graceful.py => phases/completion.py} (100%) diff --git a/tools/chat/harness.py b/tools/chat/harness.py index fecadc8..dea71ce 100644 --- a/tools/chat/harness.py +++ b/tools/chat/harness.py @@ -12,7 +12,6 @@ import argparse import os -import shlex import sys from pathlib import Path @@ -27,8 +26,7 @@ ROOT, resolve_color_mode, load_prompt, - resolve_model_and_variant, - resolve_thinking_decision, + resolve_runtime_config, ) from codecome.session import create_chat_session # noqa: E402 from codecome.transcript import open_chat_transcript, close_transcript # noqa: E402 @@ -60,11 +58,10 @@ def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> prompt = "" # Model resolution - extra_args = shlex.split(os.environ.get("OPENCODE_ARGS", "")) - model, variant, model_source, variant_source = resolve_model_and_variant( - args.agent, extra_args - ) - thinking_on, thinking_source = resolve_thinking_decision(model, extra_args) + rc = resolve_runtime_config(args.agent) + model = rc.model + variant = rc.variant + thinking_on = rc.thinking_on _chat_debug(f"_run_chat_mode: agent={args.agent} model={model} variant={variant} thinking={thinking_on}") @@ -77,7 +74,7 @@ def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> _chat_debug("_run_chat_mode: starting opencode serve") runner = ServerRunner() try: - server_info = runner.start(hostname="127.0.0.1", log_level="WARN") + server_info = runner.start(hostname="127.0.0.1", log_level=getattr(args, "log_level", "WARN")) _chat_debug(f"_run_chat_mode: server started pid={server_info.pid} url={server_info.base_url}") except ServerRunnerError as exc: _chat_debug(f"_run_chat_mode: server start failed: {exc}") diff --git a/tools/codecome/cli.py b/tools/codecome/cli.py index 90d71d4..127e456 100644 --- a/tools/codecome/cli.py +++ b/tools/codecome/cli.py @@ -8,7 +8,6 @@ import argparse import dataclasses import os -import shlex import signal import subprocess import sys @@ -30,9 +29,9 @@ from codecome.version import check_opencode_version from codecome.config import ( truthy_env, resolve_color_mode, load_prompt, - resolve_model_and_variant, resolve_thinking_decision, show_model_table, + resolve_runtime_config, show_model_table, ) -from codecome.graceful import ( +from phases.completion import ( check_phase_graceful_completion, build_phase_resume_prompt, build_frontmatter_resume_prompt, ) @@ -57,6 +56,11 @@ def build_parser() -> argparse.ArgumentParser: parser.add_argument("--write-content-lines", type=int, help="Max lines shown for new-file write content (default: 25, env: CODECOME_WRITE_CONTENT_LINES).") parser.add_argument("--write-diff-limit", type=int, help="Max diff lines shown for write (default: 50, env: CODECOME_WRITE_DIFF_LIMIT).") parser.add_argument("--edit-diff-lines", type=int, help="Max diff lines shown for edit (default: 25, env: CODECOME_EDIT_DIFF_LINES).") + parser.add_argument( + "--log-level", + default=os.environ.get("OPENCODE_LOG_LEVEL", "WARN"), + help="Log level for opencode serve (default: WARN, env: OPENCODE_LOG_LEVEL).", + ) parser.add_argument( "--show-model", action="store_true", @@ -111,11 +115,10 @@ def main() -> int: prompt_file = _clr.ROOT / args.prompt_file prompt = load_prompt(prompt_file, args.finding, phase=args.phase) - extra_args = shlex.split(os.environ.get("OPENCODE_ARGS", "")) - model, variant, model_source, variant_source = resolve_model_and_variant( - args.agent, extra_args - ) - thinking_on, thinking_source = resolve_thinking_decision(model, extra_args) + rc = resolve_runtime_config(args.agent) + model = rc.model + variant = rc.variant + thinking_on = rc.thinking_on model_label = model or "(unknown)" variant_label = variant or "(unknown)" @@ -128,11 +131,11 @@ def main() -> int: if variant is not None: sources_tail = ( - f"(model source: {model_source}, variant source: {variant_source}, " - f"thinking source: {thinking_source})" + f"(model source: {rc.model_source}, variant source: {rc.variant_source}, " + f"thinking source: {rc.thinking_source})" ) else: - sources_tail = f"(model source: {model_source}, thinking source: {thinking_source})" + sources_tail = f"(model source: {rc.model_source}, thinking source: {rc.thinking_source})" main_line = " ".join(parts) + " " + sources_tail @@ -172,7 +175,7 @@ def main() -> int: runner = ServerRunner() server_info: Any = None try: - server_info = runner.start(hostname="127.0.0.1", log_level="WARN") + server_info = runner.start(hostname="127.0.0.1", log_level=args.log_level) except ServerRunnerError as exc: _emit_fatal_error(console, "Server Error", str(exc)) return 1 diff --git a/tools/codecome/config.py b/tools/codecome/config.py index 55fb17a..be5e901 100644 --- a/tools/codecome/config.py +++ b/tools/codecome/config.py @@ -435,6 +435,47 @@ def resolve_thinking_decision( return enabled, "provider-default" +# --------------------------------------------------------------------------- +# Unified runtime config resolution +# --------------------------------------------------------------------------- + +from dataclasses import dataclass + + +@dataclass(frozen=True) +class RuntimeConfig: + """Resolved runtime configuration shared by phase and chat modes.""" + model: Optional[str] + variant: Optional[str] + model_source: str + variant_source: str + thinking_on: bool + thinking_source: str + + +def resolve_runtime_config(agent: str) -> RuntimeConfig: + """Resolve model, variant, and thinking from agent + env in one call. + + Both phase and chat paths should call this instead of separately + calling resolve_model_and_variant + resolve_thinking_decision. + """ + import shlex + + extra_args = shlex.split(os.environ.get("OPENCODE_ARGS", "")) + model, variant, model_source, variant_source = resolve_model_and_variant( + agent, extra_args + ) + thinking_on, thinking_source = resolve_thinking_decision(model, extra_args) + return RuntimeConfig( + model=model, + variant=variant, + model_source=model_source, + variant_source=variant_source, + thinking_on=thinking_on, + thinking_source=thinking_source, + ) + + # --------------------------------------------------------------------------- # Model resolution display (--show-model) # --------------------------------------------------------------------------- diff --git a/tools/phases/__init__.py b/tools/phases/__init__.py new file mode 100644 index 0000000..c75b716 --- /dev/null +++ b/tools/phases/__init__.py @@ -0,0 +1,6 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""Phase-specific logic: completion checks, resume prompts.""" + +from __future__ import annotations diff --git a/tools/codecome/graceful.py b/tools/phases/completion.py similarity index 100% rename from tools/codecome/graceful.py rename to tools/phases/completion.py From ddc5a6a0fd30d3960e4589d391f24e45009f0009 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 20:11:19 +0200 Subject: [PATCH 58/65] refactor(a8-batch3): Transcript class, unified event writing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rewrite codecome/transcript.py as Transcript class with for_phase(), for_chat(), null() factory methods, write_event(), and close() - Remove old free functions (open_phase_transcript, open_chat_transcript, close_transcript) — no backward-compat wrappers - Update runner.py to use Transcript.for_phase() and transcript.write_event() - Update chat/harness.py to use Transcript.for_chat() - Update chat/app.py: replace inline json.dumps write with transcript.write_event(), rename transcript_fp param to transcript --- tools/chat/app.py | 20 ++++----- tools/chat/harness.py | 22 +++++----- tools/codecome/runner.py | 29 ++++++------- tools/codecome/transcript.py | 82 ++++++++++++++++++++++++++---------- 4 files changed, 92 insertions(+), 61 deletions(-) diff --git a/tools/chat/app.py b/tools/chat/app.py index a2c7c7d..8627e0a 100644 --- a/tools/chat/app.py +++ b/tools/chat/app.py @@ -110,11 +110,7 @@ def _chat_render_and_log(self, console, phase, label, event): When bound via ``__get__`` to a _ChatApp instance, ``self`` is guaranteed to carry the attributes accessed below.""" - if self.transcript_fp is not None: - try: - self.transcript_fp.write(json.dumps(event) + "\n") - except OSError: - pass + self.transcript.write_event(event) if getattr(self.args, "debug", False): _chat_debug(f"_render_and_log: raw event: {json.dumps(event)}") if event.get("type") == "message.updated": @@ -252,10 +248,11 @@ class _ChatApp(App): run as @work(thread=True) workers — the canonical docs pattern (matches the weather-app example). - * The transcript jsonl is opened in _run_chat_mode and the - file handle is passed in via the `transcript_fp` constructor - argument; _render_and_log writes one JSON line per SSE - event to it (parity with phase mode). + * The transcript is opened in _run_chat_mode and the + Transcript instance is passed in via the `transcript` + constructor argument; _render_and_log calls + transcript.write_event() per SSE event (parity with + phase mode). * A set_interval(1.0) heartbeat continuously logs a debug tick from the main thread and also updates the bottom-bar @@ -323,7 +320,7 @@ def __init__(self, renderable): super().__init__() self.renderable = renderable - def __init__(self, server_info=None, session_id=None, initial_prompt="", args=None, model=None, variant=None, thinking_on=None, transcript_fp=None): + def __init__(self, server_info=None, session_id=None, initial_prompt="", args=None, model=None, variant=None, thinking_on=None, transcript=None): super().__init__() self.server_info = server_info self.session_id = session_id @@ -332,7 +329,8 @@ def __init__(self, server_info=None, session_id=None, initial_prompt="", args=No self.model = model self.variant = variant self.thinking_on = thinking_on - self.transcript_fp = transcript_fp + from codecome.transcript import Transcript + self.transcript = transcript if transcript is not None else Transcript.null() self.chat_loop = None self.console_proxy = None self.rich_log = None diff --git a/tools/chat/harness.py b/tools/chat/harness.py index dea71ce..1bcfc09 100644 --- a/tools/chat/harness.py +++ b/tools/chat/harness.py @@ -29,7 +29,7 @@ resolve_runtime_config, ) from codecome.session import create_chat_session # noqa: E402 -from codecome.transcript import open_chat_transcript, close_transcript # noqa: E402 +from codecome.transcript import Transcript # noqa: E402 def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int: @@ -97,13 +97,12 @@ def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> return 1 # Open the chat transcript (parity with phase mode). - transcript_path: Path = Path() - transcript_fp = None try: - transcript_path, transcript_fp = open_chat_transcript() - _chat_debug(f"_run_chat_mode: opened transcript {transcript_path}") + transcript = Transcript.for_chat() + _chat_debug(f"_run_chat_mode: opened transcript {transcript.path}") except OSError as exc: - transcript_path = ROOT / "tmp" / "last-chat-unknown.jsonl" + transcript = Transcript.null() + transcript.path = ROOT / "tmp" / "last-chat-unknown.jsonl" _chat_debug(f"_run_chat_mode: could not open transcript: {exc}") _chat_debug("_run_chat_mode: creating ChatApp") @@ -117,7 +116,7 @@ def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> model=model, variant=variant, thinking_on=thinking_on, - transcript_fp=transcript_fp, + transcript=transcript, ) _chat_debug("_run_chat_mode: calling app.run()") app.run() @@ -128,14 +127,13 @@ def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> _chat_debug("_run_chat_mode: stopping chat loop") app.chat_loop.stop() runner.stop() - close_transcript(transcript_fp) + transcript.close() - # Final summary banner on the restored terminal. Mirrors phase - # mode's success-path summary. + # Final summary banner on the restored terminal. try: - rel_path = transcript_path.relative_to(ROOT) + rel_path = transcript.path.relative_to(ROOT) except ValueError: - rel_path = transcript_path + rel_path = transcript.path if HAVE_RICH: from rich.rule import Rule # noqa: E402 from rich.text import Text # noqa: E402 diff --git a/tools/codecome/runner.py b/tools/codecome/runner.py index 6c91633..c8ea609 100644 --- a/tools/codecome/runner.py +++ b/tools/codecome/runner.py @@ -19,7 +19,7 @@ from events.phase_loop import PhaseEventLoop, RunResult from codecome.config import ROOT from codecome.session import create_session, send_prompt_to_session -from codecome.transcript import open_phase_transcript, close_transcript +from codecome.transcript import Transcript def _consume_events( @@ -29,7 +29,7 @@ def _consume_events( phase: str, label: str, args: argparse.Namespace, - transcript_fp: Any | None, + transcript: Transcript, thinking_on: bool, auth_token: str | None, workspace_dir: str | None, @@ -46,11 +46,7 @@ def _consume_events( ) def _render_and_log(console_: Any, phase_: str, label_: str, event: dict[str, Any]) -> None: - if transcript_fp is not None: - try: - transcript_fp.write(json.dumps(event) + "\n") - except OSError: - pass + transcript.write_event(event) if args.debug: sys.stderr.write(json.dumps(event) + "\n") sys.stderr.flush() @@ -76,16 +72,17 @@ def _run_single_attempt( existing_session_id: str | None = None, ) -> tuple[int, str, RunResult, Path]: - transcript_fp = None + transcript: Transcript try: - transcript_path, transcript_fp = open_phase_transcript(str(args.phase), args.finding) + transcript = Transcript.for_phase(str(args.phase), args.finding) except OSError as exc: finding_tag = (args.finding or "no-finding").replace("/", "_") - transcript_path = ROOT / "tmp" / f"last-phase-{args.phase}-{finding_tag}-attempt-N.jsonl" + transcript = Transcript.null() + transcript.path = ROOT / "tmp" / f"last-phase-{args.phase}-{finding_tag}-attempt-N.jsonl" try: - console.print("warning: could not open transcript ", transcript_path, ": ", exc) + console.print("warning: could not open transcript ", transcript.path, ": ", exc) except AttributeError: - print(C.warn(f"warning: could not open transcript {transcript_path}: {exc}")) + print(C.warn(f"warning: could not open transcript {transcript.path}: {exc}")) try: if existing_session_id: @@ -101,7 +98,7 @@ def _consume() -> None: run_result_box["result"] = _consume_events( base_url, session_id, console, str(args.phase), str(args.label), args, - transcript_fp, thinking_on, + transcript, thinking_on, auth_token, workspace_dir, render_event_fn=render_event_fn, ) @@ -127,8 +124,8 @@ def _consume() -> None: console.print(f"Fatal error: {exc}") except Exception: print(C.error(f"Fatal error: {exc}"), file=sys.stderr) - return 1, existing_session_id or "", RunResult(), transcript_path + return 1, existing_session_id or "", RunResult(), transcript.path finally: - close_transcript(transcript_fp) + transcript.close() - return 0, session_id, run_result, transcript_path + return 0, session_id, run_result, transcript.path diff --git a/tools/codecome/transcript.py b/tools/codecome/transcript.py index 7afcece..1ae0af1 100644 --- a/tools/codecome/transcript.py +++ b/tools/codecome/transcript.py @@ -2,52 +2,90 @@ # SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later """ -Transcript path naming, opening, writing, and closing helpers. +JSONL event transcript. + +Provides a ``Transcript`` class that manages the lifecycle of a JSONL +event log file: opening, writing individual events, and closing. """ from __future__ import annotations +import json import os import threading import time from pathlib import Path from typing import IO, Any -ROOT = Path(__file__).resolve().parents[2] +from codecome.config import ROOT _LOCK = threading.Lock() _ATTEMPT_COUNTER: dict[str, int] = {} def _transcript_dir() -> Path: + """Return (and create) the workspace ``tmp/`` directory.""" d = ROOT / "tmp" d.mkdir(parents=True, exist_ok=True) return d -def open_phase_transcript(phase: str, finding: str | None) -> tuple[Path, IO[str]]: - finding_tag = (finding or "no-finding").replace("/", "_") - key = f"{phase}-{finding_tag}" +class Transcript: + """JSONL event transcript — handles open, write, close. + + Use the ``for_phase`` / ``for_chat`` class methods to create + instances for the two run modes, or ``null`` for a no-op transcript + when opening fails. + """ + + def __init__(self, path: Path, fp: IO[str] | None) -> None: + self.path = path + self._fp = fp + + # -- factory methods --------------------------------------------------- + + @classmethod + def for_phase(cls, phase: str, finding: str | None) -> Transcript: + """Open a JSONL transcript for a phase run.""" + finding_tag = (finding or "no-finding").replace("/", "_") + key = f"{phase}-{finding_tag}" + + with _LOCK: + counter = _ATTEMPT_COUNTER.get(key, 1) + _ATTEMPT_COUNTER[key] = counter + 1 - with _LOCK: - counter = _ATTEMPT_COUNTER.get(key, 1) - _ATTEMPT_COUNTER[key] = counter + 1 + path = _transcript_dir() / f"last-phase-{phase}-{finding_tag}-attempt-{counter}.jsonl" + return cls(path, path.open("w", encoding="utf-8")) - path = _transcript_dir() / f"last-phase-{phase}-{finding_tag}-attempt-{counter}.jsonl" - return path, path.open("w", encoding="utf-8") + @classmethod + def for_chat(cls) -> Transcript: + """Open a JSONL transcript for a chat session.""" + stamp = time.strftime("%Y%m%d-%H%M%S") + path = _transcript_dir() / f"last-chat-{stamp}-pid{os.getpid()}.jsonl" + return cls(path, path.open("w", encoding="utf-8", buffering=1)) + @classmethod + def null(cls) -> Transcript: + """Return a no-op transcript (writes are silently discarded).""" + return cls(Path(), None) -def open_chat_transcript() -> tuple[Path, IO[str]]: - stamp = time.strftime("%Y%m%d-%H%M%S") - path = _transcript_dir() / f"last-chat-{stamp}-pid{os.getpid()}.jsonl" - return path, path.open("w", encoding="utf-8", buffering=1) + # -- write / close ----------------------------------------------------- + def write_event(self, event: dict[str, Any]) -> None: + """Write one JSON-line event. Silently ignores errors.""" + if self._fp is not None: + try: + self._fp.write(json.dumps(event) + "\n") + except OSError: + pass -def close_transcript(fp: IO[str] | None) -> None: - if fp is None: - return - try: - fp.flush() - fp.close() - except OSError: - pass + def close(self) -> None: + """Flush and close. Safe to call multiple times.""" + if self._fp is None: + return + try: + self._fp.flush() + self._fp.close() + except OSError: + pass + self._fp = None From df5a58d3469589a54e17fe7730418e47d210f356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 20:13:30 +0200 Subject: [PATCH 59/65] refactor(a8-batch4a): split rendering/events.py into rendering/events/ package Split the 484-line monolithic events.py into individual modules: - events/base.py: EventRenderer, finish constants, subagent state - events/step_start.py: StepStartRenderer - events/text.py: TextEventRenderer - events/reasoning.py: ReasoningEventRenderer - events/tool_use.py: ToolUseEventRenderer - events/step_finish.py: StepFinishRenderer - events/error.py: ErrorEventRenderer - events/session_status.py: SessionStatusRenderer - events/server.py: ServerConnected + ServerHeartbeat renderers - events/session_diff.py: SessionDiffRenderer - events/message.py: MessageUpdatedRenderer - events/subagent.py: SubagentStatusRenderer - events/unknown.py: UnknownEventRenderer events/__init__.py re-exports all symbols for backward compatibility. --- tools/rendering/events.py | 484 ----------------------- tools/rendering/events/__init__.py | 57 +++ tools/rendering/events/base.py | 41 ++ tools/rendering/events/error.py | 41 ++ tools/rendering/events/message.py | 77 ++++ tools/rendering/events/reasoning.py | 48 +++ tools/rendering/events/server.py | 37 ++ tools/rendering/events/session_diff.py | 29 ++ tools/rendering/events/session_status.py | 44 +++ tools/rendering/events/step_finish.py | 44 +++ tools/rendering/events/step_start.py | 29 ++ tools/rendering/events/subagent.py | 95 +++++ tools/rendering/events/text.py | 29 ++ tools/rendering/events/tool_use.py | 66 ++++ tools/rendering/events/unknown.py | 27 ++ 15 files changed, 664 insertions(+), 484 deletions(-) delete mode 100644 tools/rendering/events.py create mode 100644 tools/rendering/events/__init__.py create mode 100644 tools/rendering/events/base.py create mode 100644 tools/rendering/events/error.py create mode 100644 tools/rendering/events/message.py create mode 100644 tools/rendering/events/reasoning.py create mode 100644 tools/rendering/events/server.py create mode 100644 tools/rendering/events/session_diff.py create mode 100644 tools/rendering/events/session_status.py create mode 100644 tools/rendering/events/step_finish.py create mode 100644 tools/rendering/events/step_start.py create mode 100644 tools/rendering/events/subagent.py create mode 100644 tools/rendering/events/text.py create mode 100644 tools/rendering/events/tool_use.py create mode 100644 tools/rendering/events/unknown.py diff --git a/tools/rendering/events.py b/tools/rendering/events.py deleted file mode 100644 index e3f90fe..0000000 --- a/tools/rendering/events.py +++ /dev/null @@ -1,484 +0,0 @@ -# Copyright (C) 2025-2026 Pablo Ruiz García -# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later - -""" -Event renderer classes — one per SSE event family. - -Each renderer handles its event type(s) and writes through the -render context's sink. -""" - -from __future__ import annotations - -import json -import time as _time -from typing import Any - -from rendering.base import BaseRenderer -import _colors as C - - -# --------------------------------------------------------------------------- -# Finish reason classification -# --------------------------------------------------------------------------- - -_FINISH_TERMINAL_OK = {"stop", "end_turn"} -_FINISH_MID_TURN = {"tool-calls", "tool_use"} -_FINISH_FAILURE = {"content-filter", "content_filter", "length", "max_tokens", "error"} - -# Per-session dedup state for subagent update events. -_SUBAGENT_LAST_STATE: dict[str, tuple[dict[str, Any], float]] = {} - - -def _reset_subagent_state() -> None: - """Clear per-session dedup state. Call between tests or runs.""" - _SUBAGENT_LAST_STATE.clear() - - -# --------------------------------------------------------------------------- -# EventRenderer base -# --------------------------------------------------------------------------- - -class EventRenderer(BaseRenderer): - event_types: tuple[str, ...] = () - - def render(self, event: dict[str, Any]) -> bool: - raise NotImplementedError - - -# --------------------------------------------------------------------------- -# Specific renderers -# --------------------------------------------------------------------------- - -class StepStartRenderer(EventRenderer): - event_types = ("step_start",) - - def __init__(self, context, *, phase: str = "", label: str = ""): - super().__init__(context) - self.phase = phase - self.label = label - - def render(self, event: dict[str, Any]) -> bool: - step_type = event.get("part", {}).get("type", "step-start") - if self.rich: - from rich.text import Text - self.sink.write(Text(f"[{self.phase}] {self.label}: {step_type}", style="cyan")) - elif self.plain: - self.sink.write_text(C.info(f"[{self.phase}] {self.label}: {step_type}")) - return True - - -class TextEventRenderer(EventRenderer): - event_types = ("text",) - - def render(self, event: dict[str, Any]) -> bool: - part = event.get("part", {}) - text = str(part.get("text", "")).strip() - if not text: - return False - if self.rich: - from rich.markdown import Markdown - from rich.panel import Panel - self.sink.write(Panel(Markdown(text), title="Assistant", border_style="blue", expand=True)) - elif self.plain: - self.sink.write_text(C.header("Assistant")) - self.sink.write_text(text) - return True - - -class ReasoningEventRenderer(EventRenderer): - event_types = ("reasoning",) - - def render(self, event: dict[str, Any]) -> bool: - if not self.context.settings.render_reasoning: - return False - part = event.get("part", {}) - text = str(part.get("text", "")).strip() - if not text: - return False - - truncated_note = "" - max_chars = self.context.settings.reasoning_max_chars - if len(text) > max_chars: - cut = len(text) - max_chars - text = text[:max_chars] - truncated_note = f"\n\n... ({cut} chars truncated)" - - if self.rich: - from rich.console import Group - from rich.markdown import Markdown - from rich.panel import Panel - from rich.text import Text - body_md = Markdown(text) - if truncated_note: - body = Group(body_md, Text(truncated_note.strip(), style="dim")) - else: - body = body_md - self.sink.write(Panel(body, title="Thinking", border_style="blue", expand=True, style="dim")) - elif self.plain: - self.sink.write_text(C.header("Thinking")) - self.sink.write_text(text) - if truncated_note: - self.sink.write_text(truncated_note.strip()) - return True - - -class ToolUseEventRenderer(EventRenderer): - event_types = ("tool_use",) - - # Map of canonical tool names to their renderer classes (lazy-imported). - # Keys that map to the same renderer share the cached instance. - _TOOL_RENDERER_CLASSES: dict[str, str] = { - "todowrite": "rendering.tools.todo.TodoRenderer", - "read": "rendering.tools.read.ReadRenderer", - "write": "rendering.tools.write.WriteRenderer", - "edit": "rendering.tools.edit.EditRenderer", - "apply_patch": "rendering.tools.apply_patch.ApplyPatchRenderer", - "applypatch": "rendering.tools.apply_patch.ApplyPatchRenderer", - "apply-patch": "rendering.tools.apply_patch.ApplyPatchRenderer", - "glob": "rendering.tools.glob.GlobRenderer", - "grep": "rendering.tools.grep.GrepRenderer", - "bash": "rendering.tools.command.CommandRenderer", - "skill": "rendering.tools.skill.SkillRenderer", - "task": "rendering.tools.task.TaskRenderer", - } - - def __init__(self, context): - super().__init__(context) - from rendering.tools.base import FallbackToolRenderer - self._fallback = FallbackToolRenderer(context) - # Cache renderer instances keyed by their fully-qualified class path. - self._renderer_cache: dict[str, Any] = {} - - def _get_renderer(self, tool_lower: str) -> Any | None: - """Return a cached renderer for *tool_lower*, or None for fallback.""" - class_path = self._TOOL_RENDERER_CLASSES.get(tool_lower) - if class_path is None: - return None - if class_path in self._renderer_cache: - return self._renderer_cache[class_path] - # Lazy-import and instantiate once, then cache. - module_path, class_name = class_path.rsplit(".", 1) - import importlib - mod = importlib.import_module(module_path) - cls = getattr(mod, class_name) - instance = cls(self.context) - self._renderer_cache[class_path] = instance - return instance - - def render(self, event: dict[str, Any]) -> bool: - part = event.get("part", {}) - tool = str(part.get("tool", "unknown")) - state = part.get("state", {}) if isinstance(part.get("state"), dict) else {} - tool_lower = tool.strip().lower() - - renderer = self._get_renderer(tool_lower) - if renderer is not None and renderer.render(tool, state): - return True - - return self._fallback.render(tool, state) - - -class StepFinishRenderer(EventRenderer): - event_types = ("step_finish",) - - def render(self, event: dict[str, Any]) -> bool: - part = event.get("part", {}) - reason = str(part.get("reason", "unknown")) - tokens = self._format_tokens(part.get("tokens", {})) - suffix = f" ({tokens})" if tokens else "" - style = "dim" - if reason in _FINISH_FAILURE: - style = "bold red" - if self.rich: - from rich.text import Text - self.sink.write(Text(f"step finished: {reason}{suffix}", style=style)) - elif self.plain: - if reason in _FINISH_FAILURE: - self.sink.write_text(C.fail(f"step finished: {reason}{suffix}")) - else: - self.sink.write_text(f"step finished: {reason}{suffix}") - return True - - @staticmethod - def _format_tokens(tokens: dict[str, Any]) -> str: - if not isinstance(tokens, dict): - return "" - parts = [] - for key in ("input", "output", "reasoning", "total"): - value = tokens.get(key) - if value is not None: - parts.append(f"{key}={value}") - return ", ".join(parts) - - -class ErrorEventRenderer(EventRenderer): - event_types = ("error",) - - def render(self, event: dict[str, Any]) -> bool: - err = event.get("error") - msg_parts: list[str] = [] - if isinstance(err, dict): - name = err.get("name") - if isinstance(name, str) and name: - msg_parts.append(name) - data = err.get("data") - if isinstance(data, dict): - data_msg = data.get("message") - if isinstance(data_msg, str) and data_msg: - msg_parts.append(data_msg) - elif isinstance(err.get("message"), str): - msg_parts.append(err["message"]) - elif isinstance(err, str): - msg_parts.append(err) - text = ": ".join(msg_parts) if msg_parts else "(no error message)" - if self.rich: - from rich.panel import Panel - from rich.text import Text - self.sink.write(Panel(Text(text, style="red"), title="Error", border_style="yellow", expand=True)) - elif self.plain: - self.sink.write_text(C.warn("Error")) - self.sink.write_text(C.fail(text)) - return True - - -class SessionStatusRenderer(EventRenderer): - event_types = ("session.status",) - - def render(self, event: dict[str, Any]) -> bool: - properties = event.get("properties", {}) - status = properties.get("status", {}) - status_type = status.get("type") - if status_type == "retry": - attempt = status.get("attempt", 1) - message = status.get("message", "Unknown error") - text = f"\u23f3 Waiting for LLM provider response (retry attempt {attempt}): {message}" - if self.rich: - from rich.text import Text - self.sink.write(Text(text, style="bold yellow")) - elif self.plain: - self.sink.write_text(C.warn(text)) - elif status_type == "busy": - text = "session status: busy" - if self.rich: - from rich.text import Text - self.sink.write(Text(text, style="dim")) - elif self.plain: - self.sink.write_text(C.info(text)) - elif status_type == "idle": - text = "session status: idle" - if self.rich: - from rich.text import Text - self.sink.write(Text(text, style="dim")) - elif self.plain: - self.sink.write_text(C.info(text)) - return True - - -class ServerConnectedRenderer(EventRenderer): - event_types = ("server.connected",) - - def render(self, event: dict[str, Any]) -> bool: - message = "connected to opencode event stream" - if self.rich: - from rich.text import Text - self.sink.write(Text(message, style="dim")) - elif self.plain: - self.sink.write_text(C.info(message)) - return True - - -class ServerHeartbeatRenderer(EventRenderer): - event_types = ("server.heartbeat",) - - def render(self, event: dict[str, Any]) -> bool: - message = "server heartbeat" - if self.rich: - from rich.text import Text - self.sink.write(Text(message, style="dim")) - elif self.plain: - self.sink.write_text(C.info(message)) - return True - - -class SessionDiffRenderer(EventRenderer): - event_types = ("session.diff",) - - def render(self, event: dict[str, Any]) -> bool: - properties = event.get("properties", {}) - diff = properties.get("diff", []) - if not isinstance(diff, list) or not diff: - return False - count = len(diff) - message = f"session diff updated: {count} file{'s' if count != 1 else ''}" - if self.rich: - from rich.text import Text - self.sink.write(Text(message, style="dim")) - elif self.plain: - self.sink.write_text(C.info(message)) - return True - - -class MessageUpdatedRenderer(EventRenderer): - event_types = ("message.updated",) - - def render(self, event: dict[str, Any]) -> bool: - info = event.get("info") - if not isinstance(info, dict): - props = event.get("properties", {}) - info = props.get("info", {}) if isinstance(props, dict) else {} - if not isinstance(info, dict): - info = {} - - role = str(info.get("role", "")) - tokens = info.get("tokens", {}) if isinstance(info.get("tokens"), dict) else {} - has_tokens = isinstance(tokens, dict) and ( - tokens.get("input", 0) or tokens.get("output", 0) or tokens.get("reasoning", 0) - ) - has_summary = "summary" in info or "finish" in info - if not has_summary and not has_tokens: - return False - - mcache = tokens.get("cache", {}) if isinstance(tokens, dict) else {} - cost = info.get("cost", 0) or 0 - - model_id = str(info.get("modelID", "")).strip() - provider_id = str(info.get("providerID", "")).strip() - if not model_id: - mdl = info.get("model", {}) - if isinstance(mdl, dict): - model_id = str(mdl.get("modelID", "")).strip() - provider_id = str(mdl.get("providerID", "")).strip() - model_label = f"{provider_id}/{model_id}" if provider_id and model_id else model_id - - if role == "user": - message = "> User" - style = "dim" - elif role == "assistant": - if has_tokens: - _in = tokens.get("input", 0) - _out = tokens.get("output", 0) - _reasoning = tokens.get("reasoning", 0) - _cache_read = mcache.get("read", 0) if isinstance(mcache, dict) else 0 - token_parts = [f"\u2191{_in} \u2193{_out}"] - if _reasoning: - token_parts.append(f"R{_reasoning}") - if _cache_read: - token_parts.append(f"cache read {_cache_read}") - token_str = ", ".join(token_parts) - cost_str = f", ${cost:.4f}" if cost else "" - message = f"> Assistant \u00b7 {model_label} ({token_str}{cost_str})" - style = "bold blue" - else: - message = f"> Assistant \u00b7 {model_label}" if model_label else "> Assistant" - style = "bold blue" - else: - agent = str(info.get("agent", "assistant")) - message = f"> {agent} \u00b7 {model_label}" if model_label else f"> {agent}" - style = "bold blue" - - if self.rich: - from rich.text import Text - self.sink.write(Text(message, style=style)) - elif self.plain: - self.sink.write_text(C.header(message)) - return True - - -class SubagentStatusRenderer(EventRenderer): - event_types = ("subagent.status",) - - def render(self, event: dict[str, Any]) -> bool: - if not self.context.settings.render_subagent_updates: - return False - - properties = event.get("properties", {}) - status_type = str(properties.get("statusType", "")) - session_id = str(properties.get("sessionID", "")) - title = str(properties.get("title", "(untitled)")) - summary = properties.get("summary") - elapsed_ms = properties.get("elapsedMs") - - if status_type == "updated": - snapshot: dict[str, Any] = {"title": title} - if isinstance(summary, dict): - snapshot["additions"] = summary.get("additions") - snapshot["deletions"] = summary.get("deletions") - snapshot["files"] = summary.get("files") - - last_snapshot, last_time = _SUBAGENT_LAST_STATE.get(session_id, ({}, 0.0)) - now = _time.time() - if ( - last_snapshot == snapshot - and (now - last_time) < self.context.settings.subagent_update_throttle_s - ): - return False - _SUBAGENT_LAST_STATE[session_id] = (snapshot, now) - - if self.rich: - self._render_rich(status_type, title, summary, elapsed_ms) - elif self.plain: - self._render_plain(status_type, title, summary, elapsed_ms) - return True - - def _render_rich(self, status_type: str, title: str, summary, elapsed_ms) -> None: - from rich.panel import Panel - from rich.text import Text - if status_type == "created": - self.sink.write(Panel(Text(title, style="bold cyan"), title="Subagent started", border_style="cyan", expand=True)) - elif status_type == "finished": - self.sink.write(Panel(Text(title, style="bold cyan"), title="Subagent finished", border_style="green", expand=True)) - elif status_type == "heartbeat" and elapsed_ms is not None: - elapsed_s = elapsed_ms // 1000 - self.sink.write(Text(f"\u23f3 Subagent \u00b7 {title} still running ({elapsed_s}s)", style="bold yellow")) - elif status_type == "updated": - summary_text = self._format_subagent_summary(summary) - line = f"Subagent \u00b7 {title}" - if summary_text: - line += f" {summary_text}" - self.sink.write(Text(line, style="dim")) - - def _render_plain(self, status_type: str, title: str, summary, elapsed_ms) -> None: - if status_type == "created": - self.sink.write_text(C.header(f"[subagent] started: {title}")) - elif status_type == "finished": - self.sink.write_text(C.ok(f"[subagent] finished: {title}")) - elif status_type == "heartbeat" and elapsed_ms is not None: - elapsed_s = elapsed_ms // 1000 - self.sink.write_text(C.warn(f"\u23f3 Subagent \u00b7 {title} still running ({elapsed_s}s)")) - elif status_type == "updated": - summary_text = self._format_subagent_summary(summary) - line = f"Subagent \u00b7 {title}" - if summary_text: - line += f" {summary_text}" - self.sink.write_text(f" {line}") - - @staticmethod - def _format_subagent_summary(summary: Any) -> str: - if not isinstance(summary, dict): - return "" - additions = summary.get("additions") - deletions = summary.get("deletions") - files = summary.get("files") - parts: list[str] = [] - if additions is not None or deletions is not None: - parts.append(f"+{additions or 0} -{deletions or 0}") - if files is not None: - parts.append(f"{files} file(s)") - return " ".join(parts) - - -class UnknownEventRenderer(EventRenderer): - """Fallback renderer for unrecognised event types.""" - - def render(self, event: dict[str, Any]) -> bool: - event_type = event.get("type", "") - if event_type == "message.part.updated": - part_type = event.get("part", {}).get("type", "") - message = f"unknown part type: {part_type}" - else: - message = f"unknown event type: {event_type}" - self.sink.write_text(message) - if self.context.settings.debug_unknown_events: - self.sink.write_text(json.dumps(event, indent=2, default=str)) - return True diff --git a/tools/rendering/events/__init__.py b/tools/rendering/events/__init__.py new file mode 100644 index 0000000..b59f042 --- /dev/null +++ b/tools/rendering/events/__init__.py @@ -0,0 +1,57 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Event renderers package — one module per renderer family. + +This ``__init__`` re-exports all public symbols so that existing +``from rendering.events import ...`` imports continue to work. +""" + +from __future__ import annotations + +# Base class + constants +from rendering.events.base import ( + EventRenderer, + _FINISH_TERMINAL_OK, + _FINISH_MID_TURN, + _FINISH_FAILURE, + _SUBAGENT_LAST_STATE, + _reset_subagent_state, +) + +# Individual renderers +from rendering.events.step_start import StepStartRenderer +from rendering.events.text import TextEventRenderer +from rendering.events.reasoning import ReasoningEventRenderer +from rendering.events.tool_use import ToolUseEventRenderer +from rendering.events.step_finish import StepFinishRenderer +from rendering.events.error import ErrorEventRenderer +from rendering.events.session_status import SessionStatusRenderer +from rendering.events.server import ServerConnectedRenderer, ServerHeartbeatRenderer +from rendering.events.session_diff import SessionDiffRenderer +from rendering.events.message import MessageUpdatedRenderer +from rendering.events.subagent import SubagentStatusRenderer +from rendering.events.unknown import UnknownEventRenderer + +__all__ = [ + "EventRenderer", + "_FINISH_TERMINAL_OK", + "_FINISH_MID_TURN", + "_FINISH_FAILURE", + "_SUBAGENT_LAST_STATE", + "_reset_subagent_state", + "StepStartRenderer", + "TextEventRenderer", + "ReasoningEventRenderer", + "ToolUseEventRenderer", + "StepFinishRenderer", + "ErrorEventRenderer", + "SessionStatusRenderer", + "ServerConnectedRenderer", + "ServerHeartbeatRenderer", + "SessionDiffRenderer", + "MessageUpdatedRenderer", + "SubagentStatusRenderer", + "UnknownEventRenderer", +] diff --git a/tools/rendering/events/base.py b/tools/rendering/events/base.py new file mode 100644 index 0000000..026825e --- /dev/null +++ b/tools/rendering/events/base.py @@ -0,0 +1,41 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""EventRenderer base class and shared constants.""" + +from __future__ import annotations + +import json +import time as _time +from typing import Any + +from rendering.base import BaseRenderer +import _colors as C + + +# --------------------------------------------------------------------------- +# Finish reason classification +# --------------------------------------------------------------------------- + +_FINISH_TERMINAL_OK = {"stop", "end_turn"} +_FINISH_MID_TURN = {"tool-calls", "tool_use"} +_FINISH_FAILURE = {"content-filter", "content_filter", "length", "max_tokens", "error"} + +# Per-session dedup state for subagent update events. +_SUBAGENT_LAST_STATE: dict[str, tuple[dict[str, Any], float]] = {} + + +def _reset_subagent_state() -> None: + """Clear per-session dedup state. Call between tests or runs.""" + _SUBAGENT_LAST_STATE.clear() + + +# --------------------------------------------------------------------------- +# EventRenderer base +# --------------------------------------------------------------------------- + +class EventRenderer(BaseRenderer): + event_types: tuple[str, ...] = () + + def render(self, event: dict[str, Any]) -> bool: + raise NotImplementedError diff --git a/tools/rendering/events/error.py b/tools/rendering/events/error.py new file mode 100644 index 0000000..3c7cc89 --- /dev/null +++ b/tools/rendering/events/error.py @@ -0,0 +1,41 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""ErrorEventRenderer — renders error events.""" + +from __future__ import annotations + +from typing import Any + +from rendering.events.base import EventRenderer +import _colors as C + + +class ErrorEventRenderer(EventRenderer): + event_types = ("error",) + + def render(self, event: dict[str, Any]) -> bool: + err = event.get("error") + msg_parts: list[str] = [] + if isinstance(err, dict): + name = err.get("name") + if isinstance(name, str) and name: + msg_parts.append(name) + data = err.get("data") + if isinstance(data, dict): + data_msg = data.get("message") + if isinstance(data_msg, str) and data_msg: + msg_parts.append(data_msg) + elif isinstance(err.get("message"), str): + msg_parts.append(err["message"]) + elif isinstance(err, str): + msg_parts.append(err) + text = ": ".join(msg_parts) if msg_parts else "(no error message)" + if self.rich: + from rich.panel import Panel + from rich.text import Text + self.sink.write(Panel(Text(text, style="red"), title="Error", border_style="yellow", expand=True)) + elif self.plain: + self.sink.write_text(C.warn("Error")) + self.sink.write_text(C.fail(text)) + return True diff --git a/tools/rendering/events/message.py b/tools/rendering/events/message.py new file mode 100644 index 0000000..d4b46e7 --- /dev/null +++ b/tools/rendering/events/message.py @@ -0,0 +1,77 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""MessageUpdatedRenderer — renders message.updated events.""" + +from __future__ import annotations + +from typing import Any + +from rendering.events.base import EventRenderer +import _colors as C + + +class MessageUpdatedRenderer(EventRenderer): + event_types = ("message.updated",) + + def render(self, event: dict[str, Any]) -> bool: + info = event.get("info") + if not isinstance(info, dict): + props = event.get("properties", {}) + info = props.get("info", {}) if isinstance(props, dict) else {} + if not isinstance(info, dict): + info = {} + + role = str(info.get("role", "")) + tokens = info.get("tokens", {}) if isinstance(info.get("tokens"), dict) else {} + has_tokens = isinstance(tokens, dict) and ( + tokens.get("input", 0) or tokens.get("output", 0) or tokens.get("reasoning", 0) + ) + has_summary = "summary" in info or "finish" in info + if not has_summary and not has_tokens: + return False + + mcache = tokens.get("cache", {}) if isinstance(tokens, dict) else {} + cost = info.get("cost", 0) or 0 + + model_id = str(info.get("modelID", "")).strip() + provider_id = str(info.get("providerID", "")).strip() + if not model_id: + mdl = info.get("model", {}) + if isinstance(mdl, dict): + model_id = str(mdl.get("modelID", "")).strip() + provider_id = str(mdl.get("providerID", "")).strip() + model_label = f"{provider_id}/{model_id}" if provider_id and model_id else model_id + + if role == "user": + message = "> User" + style = "dim" + elif role == "assistant": + if has_tokens: + _in = tokens.get("input", 0) + _out = tokens.get("output", 0) + _reasoning = tokens.get("reasoning", 0) + _cache_read = mcache.get("read", 0) if isinstance(mcache, dict) else 0 + token_parts = [f"\u2191{_in} \u2193{_out}"] + if _reasoning: + token_parts.append(f"R{_reasoning}") + if _cache_read: + token_parts.append(f"cache read {_cache_read}") + token_str = ", ".join(token_parts) + cost_str = f", ${cost:.4f}" if cost else "" + message = f"> Assistant \u00b7 {model_label} ({token_str}{cost_str})" + style = "bold blue" + else: + message = f"> Assistant \u00b7 {model_label}" if model_label else "> Assistant" + style = "bold blue" + else: + agent = str(info.get("agent", "assistant")) + message = f"> {agent} \u00b7 {model_label}" if model_label else f"> {agent}" + style = "bold blue" + + if self.rich: + from rich.text import Text + self.sink.write(Text(message, style=style)) + elif self.plain: + self.sink.write_text(C.header(message)) + return True diff --git a/tools/rendering/events/reasoning.py b/tools/rendering/events/reasoning.py new file mode 100644 index 0000000..2b9f834 --- /dev/null +++ b/tools/rendering/events/reasoning.py @@ -0,0 +1,48 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""ReasoningEventRenderer — renders thinking/reasoning events.""" + +from __future__ import annotations + +from typing import Any + +from rendering.events.base import EventRenderer +import _colors as C + + +class ReasoningEventRenderer(EventRenderer): + event_types = ("reasoning",) + + def render(self, event: dict[str, Any]) -> bool: + if not self.context.settings.render_reasoning: + return False + part = event.get("part", {}) + text = str(part.get("text", "")).strip() + if not text: + return False + + truncated_note = "" + max_chars = self.context.settings.reasoning_max_chars + if len(text) > max_chars: + cut = len(text) - max_chars + text = text[:max_chars] + truncated_note = f"\n\n... ({cut} chars truncated)" + + if self.rich: + from rich.console import Group + from rich.markdown import Markdown + from rich.panel import Panel + from rich.text import Text + body_md = Markdown(text) + if truncated_note: + body = Group(body_md, Text(truncated_note.strip(), style="dim")) + else: + body = body_md + self.sink.write(Panel(body, title="Thinking", border_style="blue", expand=True, style="dim")) + elif self.plain: + self.sink.write_text(C.header("Thinking")) + self.sink.write_text(text) + if truncated_note: + self.sink.write_text(truncated_note.strip()) + return True diff --git a/tools/rendering/events/server.py b/tools/rendering/events/server.py new file mode 100644 index 0000000..ed432b6 --- /dev/null +++ b/tools/rendering/events/server.py @@ -0,0 +1,37 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""Server event renderers: ServerConnectedRenderer, ServerHeartbeatRenderer.""" + +from __future__ import annotations + +from typing import Any + +from rendering.events.base import EventRenderer +import _colors as C + + +class ServerConnectedRenderer(EventRenderer): + event_types = ("server.connected",) + + def render(self, event: dict[str, Any]) -> bool: + message = "connected to opencode event stream" + if self.rich: + from rich.text import Text + self.sink.write(Text(message, style="dim")) + elif self.plain: + self.sink.write_text(C.info(message)) + return True + + +class ServerHeartbeatRenderer(EventRenderer): + event_types = ("server.heartbeat",) + + def render(self, event: dict[str, Any]) -> bool: + message = "server heartbeat" + if self.rich: + from rich.text import Text + self.sink.write(Text(message, style="dim")) + elif self.plain: + self.sink.write_text(C.info(message)) + return True diff --git a/tools/rendering/events/session_diff.py b/tools/rendering/events/session_diff.py new file mode 100644 index 0000000..9a77241 --- /dev/null +++ b/tools/rendering/events/session_diff.py @@ -0,0 +1,29 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""SessionDiffRenderer — renders session.diff events.""" + +from __future__ import annotations + +from typing import Any + +from rendering.events.base import EventRenderer +import _colors as C + + +class SessionDiffRenderer(EventRenderer): + event_types = ("session.diff",) + + def render(self, event: dict[str, Any]) -> bool: + properties = event.get("properties", {}) + diff = properties.get("diff", []) + if not isinstance(diff, list) or not diff: + return False + count = len(diff) + message = f"session diff updated: {count} file{'s' if count != 1 else ''}" + if self.rich: + from rich.text import Text + self.sink.write(Text(message, style="dim")) + elif self.plain: + self.sink.write_text(C.info(message)) + return True diff --git a/tools/rendering/events/session_status.py b/tools/rendering/events/session_status.py new file mode 100644 index 0000000..9a3871c --- /dev/null +++ b/tools/rendering/events/session_status.py @@ -0,0 +1,44 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""SessionStatusRenderer — renders session.status events.""" + +from __future__ import annotations + +from typing import Any + +from rendering.events.base import EventRenderer +import _colors as C + + +class SessionStatusRenderer(EventRenderer): + event_types = ("session.status",) + + def render(self, event: dict[str, Any]) -> bool: + properties = event.get("properties", {}) + status = properties.get("status", {}) + status_type = status.get("type") + if status_type == "retry": + attempt = status.get("attempt", 1) + message = status.get("message", "Unknown error") + text = f"\u23f3 Waiting for LLM provider response (retry attempt {attempt}): {message}" + if self.rich: + from rich.text import Text + self.sink.write(Text(text, style="bold yellow")) + elif self.plain: + self.sink.write_text(C.warn(text)) + elif status_type == "busy": + text = "session status: busy" + if self.rich: + from rich.text import Text + self.sink.write(Text(text, style="dim")) + elif self.plain: + self.sink.write_text(C.info(text)) + elif status_type == "idle": + text = "session status: idle" + if self.rich: + from rich.text import Text + self.sink.write(Text(text, style="dim")) + elif self.plain: + self.sink.write_text(C.info(text)) + return True diff --git a/tools/rendering/events/step_finish.py b/tools/rendering/events/step_finish.py new file mode 100644 index 0000000..45f11fa --- /dev/null +++ b/tools/rendering/events/step_finish.py @@ -0,0 +1,44 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""StepFinishRenderer — renders step_finish events.""" + +from __future__ import annotations + +from typing import Any + +from rendering.events.base import EventRenderer, _FINISH_FAILURE +import _colors as C + + +class StepFinishRenderer(EventRenderer): + event_types = ("step_finish",) + + def render(self, event: dict[str, Any]) -> bool: + part = event.get("part", {}) + reason = str(part.get("reason", "unknown")) + tokens = self._format_tokens(part.get("tokens", {})) + suffix = f" ({tokens})" if tokens else "" + style = "dim" + if reason in _FINISH_FAILURE: + style = "bold red" + if self.rich: + from rich.text import Text + self.sink.write(Text(f"step finished: {reason}{suffix}", style=style)) + elif self.plain: + if reason in _FINISH_FAILURE: + self.sink.write_text(C.fail(f"step finished: {reason}{suffix}")) + else: + self.sink.write_text(f"step finished: {reason}{suffix}") + return True + + @staticmethod + def _format_tokens(tokens: dict[str, Any]) -> str: + if not isinstance(tokens, dict): + return "" + parts = [] + for key in ("input", "output", "reasoning", "total"): + value = tokens.get(key) + if value is not None: + parts.append(f"{key}={value}") + return ", ".join(parts) diff --git a/tools/rendering/events/step_start.py b/tools/rendering/events/step_start.py new file mode 100644 index 0000000..0762393 --- /dev/null +++ b/tools/rendering/events/step_start.py @@ -0,0 +1,29 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""StepStartRenderer — renders step_start events.""" + +from __future__ import annotations + +from typing import Any + +from rendering.events.base import EventRenderer +import _colors as C + + +class StepStartRenderer(EventRenderer): + event_types = ("step_start",) + + def __init__(self, context, *, phase: str = "", label: str = ""): + super().__init__(context) + self.phase = phase + self.label = label + + def render(self, event: dict[str, Any]) -> bool: + step_type = event.get("part", {}).get("type", "step-start") + if self.rich: + from rich.text import Text + self.sink.write(Text(f"[{self.phase}] {self.label}: {step_type}", style="cyan")) + elif self.plain: + self.sink.write_text(C.info(f"[{self.phase}] {self.label}: {step_type}")) + return True diff --git a/tools/rendering/events/subagent.py b/tools/rendering/events/subagent.py new file mode 100644 index 0000000..e5f9a8e --- /dev/null +++ b/tools/rendering/events/subagent.py @@ -0,0 +1,95 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""SubagentStatusRenderer — renders subagent.status events.""" + +from __future__ import annotations + +import time as _time +from typing import Any + +from rendering.events.base import EventRenderer, _SUBAGENT_LAST_STATE +import _colors as C + + +class SubagentStatusRenderer(EventRenderer): + event_types = ("subagent.status",) + + def render(self, event: dict[str, Any]) -> bool: + if not self.context.settings.render_subagent_updates: + return False + + properties = event.get("properties", {}) + status_type = str(properties.get("statusType", "")) + session_id = str(properties.get("sessionID", "")) + title = str(properties.get("title", "(untitled)")) + summary = properties.get("summary") + elapsed_ms = properties.get("elapsedMs") + + if status_type == "updated": + snapshot: dict[str, Any] = {"title": title} + if isinstance(summary, dict): + snapshot["additions"] = summary.get("additions") + snapshot["deletions"] = summary.get("deletions") + snapshot["files"] = summary.get("files") + + last_snapshot, last_time = _SUBAGENT_LAST_STATE.get(session_id, ({}, 0.0)) + now = _time.time() + if ( + last_snapshot == snapshot + and (now - last_time) < self.context.settings.subagent_update_throttle_s + ): + return False + _SUBAGENT_LAST_STATE[session_id] = (snapshot, now) + + if self.rich: + self._render_rich(status_type, title, summary, elapsed_ms) + elif self.plain: + self._render_plain(status_type, title, summary, elapsed_ms) + return True + + def _render_rich(self, status_type: str, title: str, summary, elapsed_ms) -> None: + from rich.panel import Panel + from rich.text import Text + if status_type == "created": + self.sink.write(Panel(Text(title, style="bold cyan"), title="Subagent started", border_style="cyan", expand=True)) + elif status_type == "finished": + self.sink.write(Panel(Text(title, style="bold cyan"), title="Subagent finished", border_style="green", expand=True)) + elif status_type == "heartbeat" and elapsed_ms is not None: + elapsed_s = elapsed_ms // 1000 + self.sink.write(Text(f"\u23f3 Subagent \u00b7 {title} still running ({elapsed_s}s)", style="bold yellow")) + elif status_type == "updated": + summary_text = self._format_subagent_summary(summary) + line = f"Subagent \u00b7 {title}" + if summary_text: + line += f" {summary_text}" + self.sink.write(Text(line, style="dim")) + + def _render_plain(self, status_type: str, title: str, summary, elapsed_ms) -> None: + if status_type == "created": + self.sink.write_text(C.header(f"[subagent] started: {title}")) + elif status_type == "finished": + self.sink.write_text(C.ok(f"[subagent] finished: {title}")) + elif status_type == "heartbeat" and elapsed_ms is not None: + elapsed_s = elapsed_ms // 1000 + self.sink.write_text(C.warn(f"\u23f3 Subagent \u00b7 {title} still running ({elapsed_s}s)")) + elif status_type == "updated": + summary_text = self._format_subagent_summary(summary) + line = f"Subagent \u00b7 {title}" + if summary_text: + line += f" {summary_text}" + self.sink.write_text(f" {line}") + + @staticmethod + def _format_subagent_summary(summary: Any) -> str: + if not isinstance(summary, dict): + return "" + additions = summary.get("additions") + deletions = summary.get("deletions") + files = summary.get("files") + parts: list[str] = [] + if additions is not None or deletions is not None: + parts.append(f"+{additions or 0} -{deletions or 0}") + if files is not None: + parts.append(f"{files} file(s)") + return " ".join(parts) diff --git a/tools/rendering/events/text.py b/tools/rendering/events/text.py new file mode 100644 index 0000000..3d4c228 --- /dev/null +++ b/tools/rendering/events/text.py @@ -0,0 +1,29 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""TextEventRenderer — renders text (assistant markdown) events.""" + +from __future__ import annotations + +from typing import Any + +from rendering.events.base import EventRenderer +import _colors as C + + +class TextEventRenderer(EventRenderer): + event_types = ("text",) + + def render(self, event: dict[str, Any]) -> bool: + part = event.get("part", {}) + text = str(part.get("text", "")).strip() + if not text: + return False + if self.rich: + from rich.markdown import Markdown + from rich.panel import Panel + self.sink.write(Panel(Markdown(text), title="Assistant", border_style="blue", expand=True)) + elif self.plain: + self.sink.write_text(C.header("Assistant")) + self.sink.write_text(text) + return True diff --git a/tools/rendering/events/tool_use.py b/tools/rendering/events/tool_use.py new file mode 100644 index 0000000..ebd3f0d --- /dev/null +++ b/tools/rendering/events/tool_use.py @@ -0,0 +1,66 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""ToolUseEventRenderer — dispatches tool_use events to specific tool renderers.""" + +from __future__ import annotations + +from typing import Any + +from rendering.events.base import EventRenderer + + +class ToolUseEventRenderer(EventRenderer): + event_types = ("tool_use",) + + # Map of canonical tool names to their renderer classes (lazy-imported). + # Keys that map to the same renderer share the cached instance. + _TOOL_RENDERER_CLASSES: dict[str, str] = { + "todowrite": "rendering.tools.todo.TodoRenderer", + "read": "rendering.tools.read.ReadRenderer", + "write": "rendering.tools.write.WriteRenderer", + "edit": "rendering.tools.edit.EditRenderer", + "apply_patch": "rendering.tools.apply_patch.ApplyPatchRenderer", + "applypatch": "rendering.tools.apply_patch.ApplyPatchRenderer", + "apply-patch": "rendering.tools.apply_patch.ApplyPatchRenderer", + "glob": "rendering.tools.glob.GlobRenderer", + "grep": "rendering.tools.grep.GrepRenderer", + "bash": "rendering.tools.command.CommandRenderer", + "skill": "rendering.tools.skill.SkillRenderer", + "task": "rendering.tools.task.TaskRenderer", + } + + def __init__(self, context): + super().__init__(context) + from rendering.tools.base import FallbackToolRenderer + self._fallback = FallbackToolRenderer(context) + # Cache renderer instances keyed by their fully-qualified class path. + self._renderer_cache: dict[str, Any] = {} + + def _get_renderer(self, tool_lower: str) -> Any | None: + """Return a cached renderer for *tool_lower*, or None for fallback.""" + class_path = self._TOOL_RENDERER_CLASSES.get(tool_lower) + if class_path is None: + return None + if class_path in self._renderer_cache: + return self._renderer_cache[class_path] + # Lazy-import and instantiate once, then cache. + module_path, class_name = class_path.rsplit(".", 1) + import importlib + mod = importlib.import_module(module_path) + cls = getattr(mod, class_name) + instance = cls(self.context) + self._renderer_cache[class_path] = instance + return instance + + def render(self, event: dict[str, Any]) -> bool: + part = event.get("part", {}) + tool = str(part.get("tool", "unknown")) + state = part.get("state", {}) if isinstance(part.get("state"), dict) else {} + tool_lower = tool.strip().lower() + + renderer = self._get_renderer(tool_lower) + if renderer is not None and renderer.render(tool, state): + return True + + return self._fallback.render(tool, state) diff --git a/tools/rendering/events/unknown.py b/tools/rendering/events/unknown.py new file mode 100644 index 0000000..0c88015 --- /dev/null +++ b/tools/rendering/events/unknown.py @@ -0,0 +1,27 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""UnknownEventRenderer — fallback renderer for unrecognised event types.""" + +from __future__ import annotations + +import json +from typing import Any + +from rendering.events.base import EventRenderer + + +class UnknownEventRenderer(EventRenderer): + """Fallback renderer for unrecognised event types.""" + + def render(self, event: dict[str, Any]) -> bool: + event_type = event.get("type", "") + if event_type == "message.part.updated": + part_type = event.get("part", {}).get("type", "") + message = f"unknown part type: {part_type}" + else: + message = f"unknown event type: {event_type}" + self.sink.write_text(message) + if self.context.settings.debug_unknown_events: + self.sink.write_text(json.dumps(event, indent=2, default=str)) + return True From 823cabf7986f45b100f43be69d05cadde36215c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 20:14:56 +0200 Subject: [PATCH 60/65] refactor(a8-batch4b): rendering dispatch + command/interceptors restructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Create rendering/dispatch.py with HAVE_RICH, _get_rendering_ctx(), and render_event() — the composition root for rendering - Slim codecome/cli_render.py to CLI-only concerns (build_console, _emit_fatal_error) plus re-exports from rendering.dispatch - Restructure rendering/tools/command.py → rendering/tools/command/__init__.py - Move rendering/tools/interceptors/ → rendering/tools/command/interceptors/ - Update all import paths from rendering.tools.interceptors.* to rendering.tools.command.interceptors.* --- tools/codecome/cli_render.py | 144 ++++-------------- tools/rendering/dispatch.py | 116 ++++++++++++++ .../tools/{command.py => command/__init__.py} | 8 +- .../{ => command}/interceptors/__init__.py | 10 +- .../tools/{ => command}/interceptors/base.py | 0 .../{ => command}/interceptors/rtk_grep.py | 2 +- .../{ => command}/interceptors/rtk_read.py | 2 +- .../interceptors/sandbox_bootstrap.py | 2 +- .../interceptors/shell_listing.py | 2 +- 9 files changed, 156 insertions(+), 130 deletions(-) create mode 100644 tools/rendering/dispatch.py rename tools/rendering/tools/{command.py => command/__init__.py} (89%) rename tools/rendering/tools/{ => command}/interceptors/__init__.py (53%) rename tools/rendering/tools/{ => command}/interceptors/base.py (100%) rename tools/rendering/tools/{ => command}/interceptors/rtk_grep.py (98%) rename tools/rendering/tools/{ => command}/interceptors/rtk_read.py (99%) rename tools/rendering/tools/{ => command}/interceptors/sandbox_bootstrap.py (99%) rename tools/rendering/tools/{ => command}/interceptors/shell_listing.py (98%) diff --git a/tools/codecome/cli_render.py b/tools/codecome/cli_render.py index bd253a9..30d4c1d 100644 --- a/tools/codecome/cli_render.py +++ b/tools/codecome/cli_render.py @@ -2,49 +2,44 @@ # SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later """ -Rendering infrastructure shared by the CLI entry point: Rich detection, -console construction, rendering context cache, and the event dispatcher. +CLI rendering helpers: console construction and fatal error display. -This module is intentionally free of execution logic (no server, no -session, no phase loop). +The rendering dispatcher and context cache live in ``rendering.dispatch``. +This module re-exports key symbols for backward compatibility with +existing imports from ``codecome.cli_render``. """ from __future__ import annotations +import sys from typing import Any -from codecome.config import ROOT +# Re-exports from rendering.dispatch — used by cli.py, chat/, runner.py +from rendering.dispatch import ( # noqa: F401 — re-export + HAVE_RICH, + _get_rendering_ctx, + render_event, +) -# --------------------------------------------------------------------------- -# Rich availability -# --------------------------------------------------------------------------- +from codecome.config import ROOT # noqa: F401 — re-export -try: - from rich.console import Console, Group - from rich.json import JSON - from rich.markdown import Markdown - from rich.panel import Panel - from rich.rule import Rule - from rich.text import Text +# Re-exports of finish constants (used by cli.py's retry loop) +from rendering.events import ( # noqa: F401 — re-export + _FINISH_TERMINAL_OK, + _FINISH_MID_TURN, + _FINISH_FAILURE, +) - HAVE_RICH = True -except ImportError: # pragma: no cover - Console = Any # type: ignore[assignment] - Group = tuple # type: ignore[assignment] - JSON = None # type: ignore[assignment] - Markdown = None # type: ignore[assignment] - Panel = None # type: ignore[assignment] - Rule = None # type: ignore[assignment] - Text = None # type: ignore[assignment] - HAVE_RICH = False # --------------------------------------------------------------------------- -# Console builder +# Console builder (CLI concern — depends on Rich availability) # --------------------------------------------------------------------------- -def build_console(color_mode: str) -> Console: +def build_console(color_mode: str) -> Any: + """Build a Rich Console based on color mode, or None if Rich is unavailable.""" if not HAVE_RICH: - return None # type: ignore[return-value] + return None + from rich.console import Console if color_mode == "always": return Console(force_terminal=True, highlight=False) if color_mode == "never": @@ -53,99 +48,14 @@ def build_console(color_mode: str) -> Console: # --------------------------------------------------------------------------- -# Rendering context cache -# --------------------------------------------------------------------------- - -_RENDERING_CTX_CACHE: dict[str, Any] = {} - - -def _get_rendering_ctx(console: Any) -> Any: - mode = "rich" if (HAVE_RICH and console is not None) else "plain" - if mode in _RENDERING_CTX_CACHE: - ctx = _RENDERING_CTX_CACHE[mode] - ctx.cache.invalidate_stale() - return ctx - from rendering.cache import SnapshotCache - from rendering.context import RenderContext - from rendering.settings import RenderSettings - from rendering.sink import PlainSink, RichConsoleSink - - if mode == "rich": - sink = RichConsoleSink(console) - else: - sink = PlainSink() - ctx = RenderContext( - root=ROOT, - sink=sink, - settings=RenderSettings.from_env(), - cache=SnapshotCache(), - ) - from rendering import events as _evts - ctx._renderers = { - "server.connected": _evts.ServerConnectedRenderer(ctx), - "server.heartbeat": _evts.ServerHeartbeatRenderer(ctx), - "message.updated": _evts.MessageUpdatedRenderer(ctx), - "text": _evts.TextEventRenderer(ctx), - "reasoning": _evts.ReasoningEventRenderer(ctx), - "tool_use": _evts.ToolUseEventRenderer(ctx), - "step_start": _evts.StepStartRenderer(ctx), - "step_finish": _evts.StepFinishRenderer(ctx), - "error": _evts.ErrorEventRenderer(ctx), - "session.status": _evts.SessionStatusRenderer(ctx), - "session.diff": _evts.SessionDiffRenderer(ctx), - "subagent.status": _evts.SubagentStatusRenderer(ctx), - "unknown": _evts.UnknownEventRenderer(ctx), - } - _RENDERING_CTX_CACHE[mode] = ctx - return ctx - - -# --------------------------------------------------------------------------- -# Event dispatcher -# --------------------------------------------------------------------------- - -def render_event(console: Console, phase: str, label: str, event: dict[str, Any]) -> None: - event_type = event.get("type") - ctx = _get_rendering_ctx(console) - renderers = getattr(ctx, "_renderers", {}) - - if event_type == "step_start": - renderer = renderers.get("step_start") - if renderer: - renderer.phase = phase - renderer.label = label - renderer.render(event) - else: - from rendering.events import StepStartRenderer - StepStartRenderer(ctx, phase=phase, label=label).render(event) - elif event_type in renderers: - renderers[event_type].render(event) - else: - unknown = renderers.get("unknown") - if unknown is None: - from rendering.events import UnknownEventRenderer - unknown = UnknownEventRenderer(ctx) - unknown.render(event) - - -# --------------------------------------------------------------------------- -# Fatal error display +# Fatal error display (CLI concern) # --------------------------------------------------------------------------- def _emit_fatal_error(console: Any, title: str, message: str) -> None: import _colors as C formatted = C.fail(f"{title}: {message}") if HAVE_RICH: + from rich.panel import Panel + from rich.text import Text console.print(Panel(Text(message, style="red"), title=title, border_style="red")) - print(formatted, file=__import__("sys").stderr) - - -# --------------------------------------------------------------------------- -# LLM finish reason classification (canonical definitions in rendering.events) -# --------------------------------------------------------------------------- - -from rendering.events import ( - _FINISH_TERMINAL_OK, - _FINISH_MID_TURN, - _FINISH_FAILURE, -) \ No newline at end of file + print(formatted, file=sys.stderr) diff --git a/tools/rendering/dispatch.py b/tools/rendering/dispatch.py new file mode 100644 index 0000000..d89bb03 --- /dev/null +++ b/tools/rendering/dispatch.py @@ -0,0 +1,116 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Rendering dispatch: Rich availability detection, rendering context +construction, and the ``render_event()`` event dispatcher. + +This module is the composition root for rendering — it wires specific +event renderers into a ``RenderContext`` and provides the single +``render_event()`` entry point used by both phase and chat paths. +""" + +from __future__ import annotations + +from typing import Any + +from codecome.config import ROOT + +# --------------------------------------------------------------------------- +# Rich availability +# --------------------------------------------------------------------------- + +try: + from rich.console import Console, Group + from rich.json import JSON + from rich.markdown import Markdown + from rich.panel import Panel + from rich.rule import Rule + from rich.text import Text + + HAVE_RICH = True +except ImportError: # pragma: no cover + Console = Any # type: ignore[assignment] + Group = tuple # type: ignore[assignment] + JSON = None # type: ignore[assignment] + Markdown = None # type: ignore[assignment] + Panel = None # type: ignore[assignment] + Rule = None # type: ignore[assignment] + Text = None # type: ignore[assignment] + HAVE_RICH = False + + +# --------------------------------------------------------------------------- +# Rendering context cache +# --------------------------------------------------------------------------- + +_RENDERING_CTX_CACHE: dict[str, Any] = {} + + +def _get_rendering_ctx(console: Any) -> Any: + mode = "rich" if (HAVE_RICH and console is not None) else "plain" + if mode in _RENDERING_CTX_CACHE: + ctx = _RENDERING_CTX_CACHE[mode] + ctx.cache.invalidate_stale() + return ctx + from rendering.cache import SnapshotCache + from rendering.context import RenderContext + from rendering.settings import RenderSettings + from rendering.sink import PlainSink, RichConsoleSink + + if mode == "rich": + sink = RichConsoleSink(console) + else: + sink = PlainSink() + ctx = RenderContext( + root=ROOT, + sink=sink, + settings=RenderSettings.from_env(), + cache=SnapshotCache(), + ) + from rendering import events as _evts + ctx._renderers = { + "server.connected": _evts.ServerConnectedRenderer(ctx), + "server.heartbeat": _evts.ServerHeartbeatRenderer(ctx), + "message.updated": _evts.MessageUpdatedRenderer(ctx), + "text": _evts.TextEventRenderer(ctx), + "reasoning": _evts.ReasoningEventRenderer(ctx), + "tool_use": _evts.ToolUseEventRenderer(ctx), + "step_start": _evts.StepStartRenderer(ctx), + "step_finish": _evts.StepFinishRenderer(ctx), + "error": _evts.ErrorEventRenderer(ctx), + "session.status": _evts.SessionStatusRenderer(ctx), + "session.diff": _evts.SessionDiffRenderer(ctx), + "subagent.status": _evts.SubagentStatusRenderer(ctx), + "unknown": _evts.UnknownEventRenderer(ctx), + } + _RENDERING_CTX_CACHE[mode] = ctx + return ctx + + +# --------------------------------------------------------------------------- +# Event dispatcher +# --------------------------------------------------------------------------- + +def render_event(console: Console, phase: str, label: str, event: dict[str, Any]) -> None: + event_type = event.get("type") + ctx = _get_rendering_ctx(console) + renderers = getattr(ctx, "_renderers", {}) + + if event_type == "step_start": + renderer = renderers.get("step_start") + if renderer: + renderer.phase = phase + renderer.label = label + renderer.render(event) + else: + from rendering.events import StepStartRenderer + StepStartRenderer(ctx, phase=phase, label=label).render(event) + elif event_type in renderers: + renderers[event_type].render(event) + else: + unknown = renderers.get("unknown") + if unknown is None: + from rendering.events import UnknownEventRenderer + unknown = UnknownEventRenderer(ctx) + unknown.render(event) diff --git a/tools/rendering/tools/command.py b/tools/rendering/tools/command/__init__.py similarity index 89% rename from tools/rendering/tools/command.py rename to tools/rendering/tools/command/__init__.py index 46f0368..c51c489 100644 --- a/tools/rendering/tools/command.py +++ b/tools/rendering/tools/command/__init__.py @@ -26,10 +26,10 @@ def __init__(self, context): @property def interceptors(self): if self._interceptors is None: - from rendering.tools.interceptors.sandbox_bootstrap import SandboxBootstrapInterceptor - from rendering.tools.interceptors.rtk_read import RtkReadInterceptor - from rendering.tools.interceptors.rtk_grep import RtkGrepInterceptor - from rendering.tools.interceptors.shell_listing import ShellListingInterceptor + from rendering.tools.command.interceptors.sandbox_bootstrap import SandboxBootstrapInterceptor + from rendering.tools.command.interceptors.rtk_read import RtkReadInterceptor + from rendering.tools.command.interceptors.rtk_grep import RtkGrepInterceptor + from rendering.tools.command.interceptors.shell_listing import ShellListingInterceptor self._interceptors = [ SandboxBootstrapInterceptor(), RtkReadInterceptor(), diff --git a/tools/rendering/tools/interceptors/__init__.py b/tools/rendering/tools/command/interceptors/__init__.py similarity index 53% rename from tools/rendering/tools/interceptors/__init__.py rename to tools/rendering/tools/command/interceptors/__init__.py index a7a21e2..9f36b56 100644 --- a/tools/rendering/tools/interceptors/__init__.py +++ b/tools/rendering/tools/command/interceptors/__init__.py @@ -8,11 +8,11 @@ from __future__ import annotations -from rendering.tools.interceptors.base import CommandExecutionInterceptor -from rendering.tools.interceptors.rtk_grep import RtkGrepInterceptor -from rendering.tools.interceptors.rtk_read import RtkReadInterceptor -from rendering.tools.interceptors.sandbox_bootstrap import SandboxBootstrapInterceptor -from rendering.tools.interceptors.shell_listing import ShellListingInterceptor +from rendering.tools.command.interceptors.base import CommandExecutionInterceptor +from rendering.tools.command.interceptors.rtk_grep import RtkGrepInterceptor +from rendering.tools.command.interceptors.rtk_read import RtkReadInterceptor +from rendering.tools.command.interceptors.sandbox_bootstrap import SandboxBootstrapInterceptor +from rendering.tools.command.interceptors.shell_listing import ShellListingInterceptor __all__ = [ "CommandExecutionInterceptor", diff --git a/tools/rendering/tools/interceptors/base.py b/tools/rendering/tools/command/interceptors/base.py similarity index 100% rename from tools/rendering/tools/interceptors/base.py rename to tools/rendering/tools/command/interceptors/base.py diff --git a/tools/rendering/tools/interceptors/rtk_grep.py b/tools/rendering/tools/command/interceptors/rtk_grep.py similarity index 98% rename from tools/rendering/tools/interceptors/rtk_grep.py rename to tools/rendering/tools/command/interceptors/rtk_grep.py index 20fe208..c586004 100644 --- a/tools/rendering/tools/interceptors/rtk_grep.py +++ b/tools/rendering/tools/command/interceptors/rtk_grep.py @@ -13,7 +13,7 @@ from typing import Any, Optional from rendering.tools.base import ToolRenderer -from rendering.tools.interceptors.base import CommandExecutionInterceptor +from rendering.tools.command.interceptors.base import CommandExecutionInterceptor # --------------------------------------------------------------------------- # Regexes for rtk grep output normalisation diff --git a/tools/rendering/tools/interceptors/rtk_read.py b/tools/rendering/tools/command/interceptors/rtk_read.py similarity index 99% rename from tools/rendering/tools/interceptors/rtk_read.py rename to tools/rendering/tools/command/interceptors/rtk_read.py index f85c587..a61d898 100644 --- a/tools/rendering/tools/interceptors/rtk_read.py +++ b/tools/rendering/tools/command/interceptors/rtk_read.py @@ -16,7 +16,7 @@ from typing import Any, Optional from rendering.tools.base import ToolRenderer -from rendering.tools.interceptors.base import CommandExecutionInterceptor +from rendering.tools.command.interceptors.base import CommandExecutionInterceptor from rendering.utils import relativize_path # --------------------------------------------------------------------------- diff --git a/tools/rendering/tools/interceptors/sandbox_bootstrap.py b/tools/rendering/tools/command/interceptors/sandbox_bootstrap.py similarity index 99% rename from tools/rendering/tools/interceptors/sandbox_bootstrap.py rename to tools/rendering/tools/command/interceptors/sandbox_bootstrap.py index 07e001a..92614f3 100644 --- a/tools/rendering/tools/interceptors/sandbox_bootstrap.py +++ b/tools/rendering/tools/command/interceptors/sandbox_bootstrap.py @@ -19,7 +19,7 @@ from typing import Any, Optional from rendering.tools.base import ToolRenderer -from rendering.tools.interceptors.base import CommandExecutionInterceptor +from rendering.tools.command.interceptors.base import CommandExecutionInterceptor # --------------------------------------------------------------------------- # Constants diff --git a/tools/rendering/tools/interceptors/shell_listing.py b/tools/rendering/tools/command/interceptors/shell_listing.py similarity index 98% rename from tools/rendering/tools/interceptors/shell_listing.py rename to tools/rendering/tools/command/interceptors/shell_listing.py index 51bde1c..aed520a 100644 --- a/tools/rendering/tools/interceptors/shell_listing.py +++ b/tools/rendering/tools/command/interceptors/shell_listing.py @@ -13,7 +13,7 @@ from typing import Any, Optional from rendering.tools.base import ToolRenderer -from rendering.tools.interceptors.base import CommandExecutionInterceptor +from rendering.tools.command.interceptors.base import CommandExecutionInterceptor # --------------------------------------------------------------------------- # Regexes From 6d50998ee32ca9227e96ed69c8d43fd6abd1a23b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 20:16:20 +0200 Subject: [PATCH 61/65] refactor(a8-batch5): extract phase harness from cli.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Create codecome/harness.py with run_phase_mode() — the phase-mode retry/resume loop, server lifecycle, and completion reporting - Slim cli.py to ~80 lines: argument parsing + dispatch to run_phase_mode() or _run_chat_mode() - cli.py now mirrors the clean dispatch pattern: parse → version check → show-model | chat | phase --- tools/codecome/cli.py | 333 +---------------------------------- tools/codecome/harness.py | 359 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 365 insertions(+), 327 deletions(-) create mode 100644 tools/codecome/harness.py diff --git a/tools/codecome/cli.py b/tools/codecome/cli.py index 127e456..437d114 100644 --- a/tools/codecome/cli.py +++ b/tools/codecome/cli.py @@ -1,40 +1,19 @@ # Copyright (C) 2025-2026 Pablo Ruiz García # SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later -"""CLI entry point and argument parsing for the CodeCome phase runner.""" +"""CLI entry point and argument parsing for the CodeCome runner.""" from __future__ import annotations import argparse -import dataclasses import os -import signal -import subprocess import sys -import time from pathlib import Path -from typing import Any, Optional sys.path.insert(0, str(Path(__file__).resolve().parents[1])) -import _colors as C -from opencode.serve import ServerRunner, ServerRunnerError - -from codecome.cli_render import ( - HAVE_RICH, Console, Panel, Rule, Text, - build_console, _get_rendering_ctx, render_event, _emit_fatal_error, - _FINISH_TERMINAL_OK, _FINISH_MID_TURN, _FINISH_FAILURE, -) -import codecome.cli_render as _clr from codecome.version import check_opencode_version -from codecome.config import ( - truthy_env, resolve_color_mode, load_prompt, - resolve_runtime_config, show_model_table, -) -from phases.completion import ( - check_phase_graceful_completion, - build_phase_resume_prompt, build_frontmatter_resume_prompt, -) +from codecome.config import show_model_table # --------------------------------------------------------------------------- @@ -74,9 +53,6 @@ def build_parser() -> argparse.ArgumentParser: # --------------------------------------------------------------------------- def main() -> int: - RUN_START_TIME = time.time() - iteration_retry_count = 0 - frontmatter_retry_count = 0 check_opencode_version() parser = build_parser() @@ -87,8 +63,8 @@ def main() -> int: return show_model_table(agent_name) if args.chat: - from chat.harness import _run_chat_mode as _chat_run - return _chat_run(parser, args) + from chat.harness import _run_chat_mode + return _run_chat_mode(parser, args) missing = [n for n in ("phase", "label", "agent", "prompt_file") if getattr(args, n) is None] if missing: @@ -97,302 +73,5 @@ def main() -> int: + ", ".join("--" + n.replace("_", "-") for n in missing) ) - color_mode = resolve_color_mode(args.color) - console = build_console(color_mode) - - _rendering_ctx = _get_rendering_ctx(console) - _overrides: dict[str, Any] = {} - if args.read_display_lines is not None: - _overrides["read_display_lines"] = args.read_display_lines - if args.write_content_lines is not None: - _overrides["write_content_lines"] = args.write_content_lines - if args.write_diff_limit is not None: - _overrides["write_diff_limit"] = args.write_diff_limit - if args.edit_diff_lines is not None: - _overrides["edit_diff_lines"] = args.edit_diff_lines - if _overrides: - _rendering_ctx.settings = dataclasses.replace(_rendering_ctx.settings, **_overrides) - - prompt_file = _clr.ROOT / args.prompt_file - prompt = load_prompt(prompt_file, args.finding, phase=args.phase) - rc = resolve_runtime_config(args.agent) - model = rc.model - variant = rc.variant - thinking_on = rc.thinking_on - - model_label = model or "(unknown)" - variant_label = variant or "(unknown)" - - parts = [f"agent={args.agent}", f"model={model_label}"] - if variant is not None: - parts.append(f"variant={variant_label}") - parts.append(f"thinking={'on' if thinking_on else 'off'}") - parts.append(f"prompt={args.prompt_file}") - - if variant is not None: - sources_tail = ( - f"(model source: {rc.model_source}, variant source: {rc.variant_source}, " - f"thinking source: {rc.thinking_source})" - ) - else: - sources_tail = f"(model source: {rc.model_source}, thinking source: {rc.thinking_source})" - - main_line = " ".join(parts) + " " + sources_tail - - if HAVE_RICH: - console.print(Rule(title=f"Phase {args.phase}: {args.label}", style="bold cyan")) - console.print(Text(main_line, style="dim")) - if args.finding: - console.print(Text(f"finding={args.finding}", style="dim")) - if str(args.phase) == "1": - console.print(Text( - "Phase 1 has two sub-stages: 1a recon notes, 1b sandbox bootstrap.", - style="cyan", - )) - else: - print(C.header(f"Phase {args.phase}: {args.label}")) - print(C.info(main_line)) - if args.finding: - print(C.info(f"finding={args.finding}")) - if str(args.phase) == "1": - print(C.info( - "Phase 1 has two sub-stages: 1a recon notes, 1b sandbox bootstrap." - )) - print(C.warn("rich is not installed; using plain structured output fallback")) - - attempt_number = 0 - last_session_id: str = "" - last_finish_reason: Optional[str] = None - last_finish_tokens: dict[str, Any] = {} - last_permission_error: Optional[str] = None - any_step_finish_seen = False - step_finish_count = 0 - transcript_path: Path = Path() - finish_warning: Optional[str] = None - - os.environ["_CODECOME_INSIDE_HARNESS"] = "1" - - runner = ServerRunner() - server_info: Any = None - try: - server_info = runner.start(hostname="127.0.0.1", log_level=args.log_level) - except ServerRunnerError as exc: - _emit_fatal_error(console, "Server Error", str(exc)) - return 1 - - base_url = server_info.base_url - - def _forward_signal(signum: int, _frame: Any) -> None: - info = runner.info - if info is not None: - try: - os.killpg(info.pid, signum) - except ProcessLookupError: - pass - signal.signal(signum, signal.SIG_DFL) - os.kill(os.getpid(), signum) - - previous_sigint = signal.signal(signal.SIGINT, _forward_signal) - previous_sigterm = signal.signal(signal.SIGTERM, _forward_signal) - - from codecome.runner import _run_single_attempt - from rendering.events import _reset_subagent_state - try: - while True: - attempt_number += 1 - # Clear per-session dedup state so retries don't suppress updates. - _reset_subagent_state() - returncode, session_id, run_result, transcript_path = _run_single_attempt( - args, console, prompt, model, variant, thinking_on, base_url, - server_info.password, str(_clr.ROOT), - render_event_fn=render_event, - emit_fatal_error_fn=_emit_fatal_error, - existing_session_id=last_session_id or None - ) - - if returncode != 0: - break - - last_session_id = session_id - last_finish_reason = run_result.last_finish_reason - last_finish_tokens = run_result.last_finish_tokens - last_permission_error = run_result.last_permission_error - any_step_finish_seen = run_result.any_step_finish_seen - step_finish_count = run_result.step_finish_count - - if not any_step_finish_seen: - finish_warning = ( - "CodeCome observed no step_finish events in the JSON stream, so the model/provider did not emit a " - "completion signal. Treating the run as incomplete." - ) - elif last_finish_reason is None: - finish_warning = ( - "CodeCome observed a step_finish event without a finish reason, so the model/provider completion " - "state is ambiguous. Treating the run as incomplete." - ) - elif last_finish_reason in _FINISH_FAILURE: - finish_warning = ( - f"CodeCome observed finish reason '{last_finish_reason}', which means the model/provider stopped " - "before completing the phase. Treating the run as incomplete rather than as a CodeCome logic error." - ) - elif last_finish_reason in _FINISH_MID_TURN: - if last_permission_error: - finish_warning = ( - f"{last_permission_error}; CodeCome observed the model/provider stop mid-turn with finish " - f"reason '{last_finish_reason}', so the phase did not reach a final completion signal." - ) - else: - finish_warning = ( - f"CodeCome observed the model/provider stop mid-turn with finish reason '{last_finish_reason}' " - f"after {step_finish_count} completed loops, without a terminal completion signal. Treating the " - "phase as incomplete because the model/provider cut off the response." - ) - elif last_finish_reason not in _FINISH_TERMINAL_OK: - finish_warning = ( - f"CodeCome observed an unrecognised model/provider finish reason '{last_finish_reason}'. Treating " - "the run as incomplete rather than assuming success." - ) - - if finish_warning is not None: - if ( - last_finish_reason in _FINISH_MID_TURN - and last_permission_error is None - and check_phase_graceful_completion(args.phase, args.finding, RUN_START_TIME) - ): - msg = ( - f"CodeCome observed a mid-turn model/provider cutoff for Phase {args.phase} after {step_finish_count} " - "completed loops, but the required durable artifacts were already written. Treating the phase as complete." - ) - if HAVE_RICH: - console.print(Text(msg, style="bold green")) - else: - print(C.ok(msg)) - finish_warning = None - last_finish_reason = "graceful_forgiveness" - else: - returncode = 2 - - if returncode == 0: - validation_result = subprocess.run( - [sys.executable, "tools/check-frontmatter.py"], - cwd=_clr.ROOT, - capture_output=True, - text=True - ) - if validation_result.returncode != 0: - max_frontmatter_retries = 2 - validation_output = (validation_result.stderr or validation_result.stdout).strip() or "(no validator output)" - if frontmatter_retry_count < max_frontmatter_retries: - frontmatter_retry_count += 1 - msg = ( - "\n[Auto-Correction] The model completed a turn, but its output failed local frontmatter " - f"validation. CodeCome will resume the same session and ask for a minimal repair " - f"(retry {frontmatter_retry_count}/{max_frontmatter_retries})." - ) - if HAVE_RICH: - console.print(Text(msg, style="bold yellow")) - else: - print(C.warn(msg)) - if last_session_id and last_session_id != "id": - prompt = build_frontmatter_resume_prompt(args.phase, args.finding, validation_output) - continue - else: - returncode = 2 - finish_warning = ( - "The model output failed local frontmatter validation, and CodeCome could not determine a " - "session ID to resume for repair. Treating the phase as incomplete so the validator output " - "can be reported back with the saved transcript." - ) - else: - returncode = 2 - finish_warning = ( - f"The model output still fails local frontmatter validation after {max_frontmatter_retries} " - "auto-repair attempts. Treating the phase as incomplete so the validation errors can be reported back." - ) - msg = f"\n[Warning] Frontmatter errors persist after {max_frontmatter_retries} auto-retries." - if HAVE_RICH: - console.print(Text(msg, style="bold red")) - else: - print(C.fail(msg)) - print(validation_output) - break - break - - if returncode == 2 and last_finish_reason in _FINISH_MID_TURN: - max_iteration_retries = int(os.environ.get("CODECOME_MAX_ITERATION_RETRIES", "1")) - if iteration_retry_count < max_iteration_retries: - iteration_retry_count += 1 - msg = ( - "\n[Auto-Resume] CodeCome observed a mid-turn model/provider cutoff and will resume the same " - f"session once to let the model finish the interrupted work (retry {iteration_retry_count}/{max_iteration_retries})." - ) - if HAVE_RICH: - console.print(Text(msg, style="bold yellow")) - else: - print(C.warn(msg)) - if last_session_id and last_session_id != "id": - prompt = build_phase_resume_prompt( - args.phase, args.finding, last_finish_reason, step_finish_count - ) - continue - else: - finish_warning = ( - "CodeCome correctly detected that the model/provider stopped mid-turn, but it could not determine " - "a session ID for automatic continuation. Treating the phase as incomplete." - ) - if HAVE_RICH: - console.print(Text("Could not determine session ID to resume.", style="red")) - else: - print(C.fail("Could not determine session ID to resume.")) - break - - break - finally: - signal.signal(signal.SIGINT, previous_sigint) - signal.signal(signal.SIGTERM, previous_sigterm) - runner.stop() - - if returncode == 0: - if HAVE_RICH: - console.print(Rule(style="green")) - console.print(Text(f"{C.SYM_OK} Phase {args.phase} completed successfully", style="green")) - console.print(Text( - f" finish reason: {last_finish_reason!r} " - f"transcript: {transcript_path.relative_to(_clr.ROOT)}", - style="dim", - )) - else: - print(C.ok(f"Phase {args.phase} completed successfully")) - print(f" finish reason: {last_finish_reason!r} transcript: {transcript_path.relative_to(_clr.ROOT)}") - elif returncode == 130: - if HAVE_RICH: - console.print(Rule(style="yellow")) - console.print(Text(f"{C.SYM_WARN} Phase {args.phase} interrupted", style="yellow")) - else: - print(C.warn(f"Phase {args.phase} interrupted")) - else: - if HAVE_RICH: - console.print(Rule(style="red")) - console.print(Text( - f"{C.SYM_FAIL} Phase {args.phase} did not complete cleanly (exit code {returncode})", - style="red", - )) - if finish_warning: - console.print(Text(f" reason: {finish_warning}", style="red")) - console.print(Text(f" transcript: {transcript_path.relative_to(_clr.ROOT)}", style="dim")) - console.print(Text( - " hint: the run is likely partial; rerun the phase or " - "switch to a different model/provider before retrying", - style="yellow", - )) - else: - print(C.fail(f"Phase {args.phase} did not complete cleanly (exit code {returncode})")) - if finish_warning: - print(C.fail(f" reason: {finish_warning}")) - print(f" transcript: {transcript_path.relative_to(_clr.ROOT)}") - print(C.warn( - " hint: the run is likely partial; rerun the phase or " - "switch to a different model/provider before retrying" - )) - - return returncode + from codecome.harness import run_phase_mode + return run_phase_mode(args) diff --git a/tools/codecome/harness.py b/tools/codecome/harness.py new file mode 100644 index 0000000..838f08f --- /dev/null +++ b/tools/codecome/harness.py @@ -0,0 +1,359 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Phase harness: retry/resume loop, server lifecycle, and completion +reporting for phase-mode runs. + +Parallel to ``chat.harness`` which owns the chat-mode lifecycle. +``cli.py`` dispatches to one of the two harnesses after parsing args. +""" + +from __future__ import annotations + +import argparse +import dataclasses +import os +import signal +import subprocess +import sys +import time +from pathlib import Path +from typing import Any, Optional + +import _colors as C +from opencode.serve import ServerRunner, ServerRunnerError + +from codecome.cli_render import ( + HAVE_RICH, + build_console, _get_rendering_ctx, render_event, _emit_fatal_error, + _FINISH_TERMINAL_OK, _FINISH_MID_TURN, _FINISH_FAILURE, +) +from codecome.config import ROOT, resolve_color_mode, load_prompt, resolve_runtime_config +from phases.completion import ( + check_phase_graceful_completion, + build_phase_resume_prompt, build_frontmatter_resume_prompt, +) + + +def run_phase_mode(args: argparse.Namespace) -> int: + """Run a single phase with auto-retry/resume. + + This is the phase-mode equivalent of ``chat.harness._run_chat_mode``. + """ + RUN_START_TIME = time.time() + iteration_retry_count = 0 + frontmatter_retry_count = 0 + + color_mode = resolve_color_mode(args.color) + console = build_console(color_mode) + + _rendering_ctx = _get_rendering_ctx(console) + _overrides: dict[str, Any] = {} + if args.read_display_lines is not None: + _overrides["read_display_lines"] = args.read_display_lines + if args.write_content_lines is not None: + _overrides["write_content_lines"] = args.write_content_lines + if args.write_diff_limit is not None: + _overrides["write_diff_limit"] = args.write_diff_limit + if args.edit_diff_lines is not None: + _overrides["edit_diff_lines"] = args.edit_diff_lines + if _overrides: + _rendering_ctx.settings = dataclasses.replace(_rendering_ctx.settings, **_overrides) + + prompt_file = ROOT / args.prompt_file + prompt = load_prompt(prompt_file, args.finding, phase=args.phase) + rc = resolve_runtime_config(args.agent) + model = rc.model + variant = rc.variant + thinking_on = rc.thinking_on + + model_label = model or "(unknown)" + variant_label = variant or "(unknown)" + + parts = [f"agent={args.agent}", f"model={model_label}"] + if variant is not None: + parts.append(f"variant={variant_label}") + parts.append(f"thinking={'on' if thinking_on else 'off'}") + parts.append(f"prompt={args.prompt_file}") + + if variant is not None: + sources_tail = ( + f"(model source: {rc.model_source}, variant source: {rc.variant_source}, " + f"thinking source: {rc.thinking_source})" + ) + else: + sources_tail = f"(model source: {rc.model_source}, thinking source: {rc.thinking_source})" + + main_line = " ".join(parts) + " " + sources_tail + + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(title=f"Phase {args.phase}: {args.label}", style="bold cyan")) + console.print(Text(main_line, style="dim")) + if args.finding: + console.print(Text(f"finding={args.finding}", style="dim")) + if str(args.phase) == "1": + console.print(Text( + "Phase 1 has two sub-stages: 1a recon notes, 1b sandbox bootstrap.", + style="cyan", + )) + else: + print(C.header(f"Phase {args.phase}: {args.label}")) + print(C.info(main_line)) + if args.finding: + print(C.info(f"finding={args.finding}")) + if str(args.phase) == "1": + print(C.info( + "Phase 1 has two sub-stages: 1a recon notes, 1b sandbox bootstrap." + )) + print(C.warn("rich is not installed; using plain structured output fallback")) + + attempt_number = 0 + last_session_id: str = "" + last_finish_reason: Optional[str] = None + last_finish_tokens: dict[str, Any] = {} + last_permission_error: Optional[str] = None + any_step_finish_seen = False + step_finish_count = 0 + transcript_path: Path = Path() + finish_warning: Optional[str] = None + + os.environ["_CODECOME_INSIDE_HARNESS"] = "1" + + runner = ServerRunner() + server_info: Any = None + try: + server_info = runner.start(hostname="127.0.0.1", log_level=args.log_level) + except ServerRunnerError as exc: + _emit_fatal_error(console, "Server Error", str(exc)) + return 1 + + base_url = server_info.base_url + + def _forward_signal(signum: int, _frame: Any) -> None: + info = runner.info + if info is not None: + try: + os.killpg(info.pid, signum) + except ProcessLookupError: + pass + signal.signal(signum, signal.SIG_DFL) + os.kill(os.getpid(), signum) + + previous_sigint = signal.signal(signal.SIGINT, _forward_signal) + previous_sigterm = signal.signal(signal.SIGTERM, _forward_signal) + + from codecome.runner import _run_single_attempt + from rendering.events import _reset_subagent_state + try: + while True: + attempt_number += 1 + # Clear per-session dedup state so retries don't suppress updates. + _reset_subagent_state() + returncode, session_id, run_result, transcript_path = _run_single_attempt( + args, console, prompt, model, variant, thinking_on, base_url, + server_info.password, str(ROOT), + render_event_fn=render_event, + emit_fatal_error_fn=_emit_fatal_error, + existing_session_id=last_session_id or None + ) + + if returncode != 0: + break + + last_session_id = session_id + last_finish_reason = run_result.last_finish_reason + last_finish_tokens = run_result.last_finish_tokens + last_permission_error = run_result.last_permission_error + any_step_finish_seen = run_result.any_step_finish_seen + step_finish_count = run_result.step_finish_count + + if not any_step_finish_seen: + finish_warning = ( + "CodeCome observed no step_finish events in the JSON stream, so the model/provider did not emit a " + "completion signal. Treating the run as incomplete." + ) + elif last_finish_reason is None: + finish_warning = ( + "CodeCome observed a step_finish event without a finish reason, so the model/provider completion " + "state is ambiguous. Treating the run as incomplete." + ) + elif last_finish_reason in _FINISH_FAILURE: + finish_warning = ( + f"CodeCome observed finish reason '{last_finish_reason}', which means the model/provider stopped " + "before completing the phase. Treating the run as incomplete rather than as a CodeCome logic error." + ) + elif last_finish_reason in _FINISH_MID_TURN: + if last_permission_error: + finish_warning = ( + f"{last_permission_error}; CodeCome observed the model/provider stop mid-turn with finish " + f"reason '{last_finish_reason}', so the phase did not reach a final completion signal." + ) + else: + finish_warning = ( + f"CodeCome observed the model/provider stop mid-turn with finish reason '{last_finish_reason}' " + f"after {step_finish_count} completed loops, without a terminal completion signal. Treating the " + "phase as incomplete because the model/provider cut off the response." + ) + elif last_finish_reason not in _FINISH_TERMINAL_OK: + finish_warning = ( + f"CodeCome observed an unrecognised model/provider finish reason '{last_finish_reason}'. Treating " + "the run as incomplete rather than assuming success." + ) + + if finish_warning is not None: + if ( + last_finish_reason in _FINISH_MID_TURN + and last_permission_error is None + and check_phase_graceful_completion(args.phase, args.finding, RUN_START_TIME) + ): + msg = ( + f"CodeCome observed a mid-turn model/provider cutoff for Phase {args.phase} after {step_finish_count} " + "completed loops, but the required durable artifacts were already written. Treating the phase as complete." + ) + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold green")) + else: + print(C.ok(msg)) + finish_warning = None + last_finish_reason = "graceful_forgiveness" + else: + returncode = 2 + + if returncode == 0: + validation_result = subprocess.run( + [sys.executable, "tools/check-frontmatter.py"], + cwd=ROOT, + capture_output=True, + text=True + ) + if validation_result.returncode != 0: + max_frontmatter_retries = 2 + validation_output = (validation_result.stderr or validation_result.stdout).strip() or "(no validator output)" + if frontmatter_retry_count < max_frontmatter_retries: + frontmatter_retry_count += 1 + msg = ( + "\n[Auto-Correction] The model completed a turn, but its output failed local frontmatter " + f"validation. CodeCome will resume the same session and ask for a minimal repair " + f"(retry {frontmatter_retry_count}/{max_frontmatter_retries})." + ) + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold yellow")) + else: + print(C.warn(msg)) + if last_session_id and last_session_id != "id": + prompt = build_frontmatter_resume_prompt(args.phase, args.finding, validation_output) + continue + else: + returncode = 2 + finish_warning = ( + "The model output failed local frontmatter validation, and CodeCome could not determine a " + "session ID to resume for repair. Treating the phase as incomplete so the validator output " + "can be reported back with the saved transcript." + ) + else: + returncode = 2 + finish_warning = ( + f"The model output still fails local frontmatter validation after {max_frontmatter_retries} " + "auto-repair attempts. Treating the phase as incomplete so the validation errors can be reported back." + ) + msg = f"\n[Warning] Frontmatter errors persist after {max_frontmatter_retries} auto-retries." + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold red")) + else: + print(C.fail(msg)) + print(validation_output) + break + break + + if returncode == 2 and last_finish_reason in _FINISH_MID_TURN: + max_iteration_retries = int(os.environ.get("CODECOME_MAX_ITERATION_RETRIES", "1")) + if iteration_retry_count < max_iteration_retries: + iteration_retry_count += 1 + msg = ( + "\n[Auto-Resume] CodeCome observed a mid-turn model/provider cutoff and will resume the same " + f"session once to let the model finish the interrupted work (retry {iteration_retry_count}/{max_iteration_retries})." + ) + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold yellow")) + else: + print(C.warn(msg)) + if last_session_id and last_session_id != "id": + prompt = build_phase_resume_prompt( + args.phase, args.finding, last_finish_reason, step_finish_count + ) + continue + else: + finish_warning = ( + "CodeCome correctly detected that the model/provider stopped mid-turn, but it could not determine " + "a session ID for automatic continuation. Treating the phase as incomplete." + ) + if HAVE_RICH: + from rich.text import Text + console.print(Text("Could not determine session ID to resume.", style="red")) + else: + print(C.fail("Could not determine session ID to resume.")) + break + + break + finally: + signal.signal(signal.SIGINT, previous_sigint) + signal.signal(signal.SIGTERM, previous_sigterm) + runner.stop() + + if returncode == 0: + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(style="green")) + console.print(Text(f"{C.SYM_OK} Phase {args.phase} completed successfully", style="green")) + console.print(Text( + f" finish reason: {last_finish_reason!r} " + f"transcript: {transcript_path.relative_to(ROOT)}", + style="dim", + )) + else: + print(C.ok(f"Phase {args.phase} completed successfully")) + print(f" finish reason: {last_finish_reason!r} transcript: {transcript_path.relative_to(ROOT)}") + elif returncode == 130: + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(style="yellow")) + console.print(Text(f"{C.SYM_WARN} Phase {args.phase} interrupted", style="yellow")) + else: + print(C.warn(f"Phase {args.phase} interrupted")) + else: + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(style="red")) + console.print(Text( + f"{C.SYM_FAIL} Phase {args.phase} did not complete cleanly (exit code {returncode})", + style="red", + )) + if finish_warning: + console.print(Text(f" reason: {finish_warning}", style="red")) + console.print(Text(f" transcript: {transcript_path.relative_to(ROOT)}", style="dim")) + console.print(Text( + " hint: the run is likely partial; rerun the phase or " + "switch to a different model/provider before retrying", + style="yellow", + )) + else: + print(C.fail(f"Phase {args.phase} did not complete cleanly (exit code {returncode})")) + if finish_warning: + print(C.fail(f" reason: {finish_warning}")) + print(f" finish reason: {last_finish_reason!r} transcript: {transcript_path.relative_to(ROOT)}") + print(C.warn( + " hint: the run is likely partial; rerun the phase or " + "switch to a different model/provider before retrying" + )) + + return returncode From ee96816d6e68f3790a015a3d32e8b1ae859139c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 20:21:06 +0200 Subject: [PATCH 62/65] fix(a8-batch6): fix tests for API changes, add RenderSettings regression test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix test_chat_app.py: transcript_fp → transcript, .write → .write_event - Fix test_codecome_runner.py: use Transcript class, monkeypatch Transcript.for_phase instead of open_phase_transcript - Fix test_command_interceptors.py and test_rendering_tools.py: update import paths from rendering.tools.interceptors → rendering.tools.command.interceptors - Add test_render_settings_propagation.py: verify CLI flags (--read-display-lines, etc.) propagate into RenderSettings via dataclasses.replace, matching the harness pattern --- tests/test_chat_app.py | 4 +- tests/test_codecome_runner.py | 24 ++++----- tests/test_command_interceptors.py | 8 +-- tests/test_render_settings_propagation.py | 59 +++++++++++++++++++++++ tests/test_rendering_tools.py | 8 +-- 5 files changed, 82 insertions(+), 21 deletions(-) create mode 100644 tests/test_render_settings_propagation.py diff --git a/tests/test_chat_app.py b/tests/test_chat_app.py index 09085d6..fac97fc 100644 --- a/tests/test_chat_app.py +++ b/tests/test_chat_app.py @@ -42,7 +42,7 @@ def test_chat_render_and_log(monkeypatch): mock_args.debug = True class FakeSelf: - transcript_fp = mock_transcript + transcript = mock_transcript args = mock_args thinking_on = True _modeline_info = "" @@ -61,7 +61,7 @@ def fake_render(console, phase, label, event): assert len(rendered) == 1 assert "gpt-5" in fake_self._modeline_info - mock_transcript.write.assert_called() + mock_transcript.write_event.assert_called() def test_chat_update_modeline_info(): class FakeSelf: diff --git a/tests/test_codecome_runner.py b/tests/test_codecome_runner.py index e7d01aa..c13528e 100644 --- a/tests/test_codecome_runner.py +++ b/tests/test_codecome_runner.py @@ -9,6 +9,7 @@ from unittest.mock import MagicMock from codecome import runner +from codecome.transcript import Transcript from events.phase_loop import RunResult @pytest.fixture @@ -40,7 +41,7 @@ def run(self, render_and_log_fn): def fake_render(console, phase, label, event): rendered_events.append(event) - fake_transcript = MagicMock() + fake_transcript = MagicMock(spec=Transcript) res = runner._consume_events( "http://base", "session_123", mock_console, "1", "Recon", mock_args, @@ -50,10 +51,7 @@ def fake_render(console, phase, label, event): assert isinstance(res, RunResult) assert len(rendered_events) == 1 assert rendered_events[0]["content"] == "hello" - fake_transcript.write.assert_called_once() - import json - written_data = json.loads(fake_transcript.write.call_args[0][0]) - assert written_data["content"] == "hello" + fake_transcript.write_event.assert_called_once() def test_run_single_attempt_success(mock_args, mock_console, monkeypatch): monkeypatch.setattr(runner, "create_session", lambda *a, **kw: "new_session") @@ -67,8 +65,9 @@ def fake_consume(*a, **kw): return RunResult() monkeypatch.setattr(runner, "_consume_events", fake_consume) - monkeypatch.setattr(runner, "open_phase_transcript", lambda p, f: (Path("fake.jsonl"), MagicMock())) - monkeypatch.setattr(runner, "close_transcript", lambda f: None) + fake_transcript = MagicMock(spec=Transcript) + fake_transcript.path = Path("fake.jsonl") + monkeypatch.setattr(Transcript, "for_phase", classmethod(lambda cls, p, f: fake_transcript)) code, session_id, res, path = runner._run_single_attempt( mock_args, mock_console, "do work", "model", "var", True, @@ -89,8 +88,9 @@ def fake_consume(*a, **kw): raise ValueError("consumer failed") monkeypatch.setattr(runner, "_consume_events", fake_consume) - monkeypatch.setattr(runner, "open_phase_transcript", lambda p, f: (Path("fake.jsonl"), MagicMock())) - monkeypatch.setattr(runner, "close_transcript", lambda f: None) + fake_transcript = MagicMock(spec=Transcript) + fake_transcript.path = Path("fake.jsonl") + monkeypatch.setattr(Transcript, "for_phase", classmethod(lambda cls, p, f: fake_transcript)) fatal_errors = [] def fake_fatal(console, title, msg): @@ -112,8 +112,10 @@ def test_run_single_attempt_existing_session(mock_args, mock_console, monkeypatc monkeypatch.setattr(runner, "create_session", lambda *a, **kw: created.append(True)) monkeypatch.setattr(runner, "send_prompt_to_session", lambda *a, **kw: None) monkeypatch.setattr(runner, "_consume_events", lambda *a, **kw: RunResult()) - monkeypatch.setattr(runner, "open_phase_transcript", lambda p, f: (Path("fake.jsonl"), MagicMock())) - monkeypatch.setattr(runner, "close_transcript", lambda f: None) + + fake_transcript = MagicMock(spec=Transcript) + fake_transcript.path = Path("fake.jsonl") + monkeypatch.setattr(Transcript, "for_phase", classmethod(lambda cls, p, f: fake_transcript)) code, session_id, res, path = runner._run_single_attempt( mock_args, mock_console, "do work", "model", "var", True, diff --git a/tests/test_command_interceptors.py b/tests/test_command_interceptors.py index 3fd70f0..71597b1 100644 --- a/tests/test_command_interceptors.py +++ b/tests/test_command_interceptors.py @@ -8,10 +8,10 @@ from pathlib import Path from typing import Any -from rendering.tools.interceptors.sandbox_bootstrap import SandboxBootstrapInterceptor, _is_sandbox_bootstrap_json_call, _sandbox_payload_matches, _sandbox_glyphs -from rendering.tools.interceptors.rtk_read import _is_bash_shim_call, RtkReadInterceptor, _BashShim -from rendering.tools.interceptors.rtk_grep import _normalize_rtk_grep_output, RtkGrepInterceptor -from rendering.tools.interceptors.shell_listing import _strip_ls_long_format_to_filenames, _parse_find_tree, ShellListingInterceptor +from rendering.tools.command.interceptors.sandbox_bootstrap import SandboxBootstrapInterceptor, _is_sandbox_bootstrap_json_call, _sandbox_payload_matches, _sandbox_glyphs +from rendering.tools.command.interceptors.rtk_read import _is_bash_shim_call, RtkReadInterceptor, _BashShim +from rendering.tools.command.interceptors.rtk_grep import _normalize_rtk_grep_output, RtkGrepInterceptor +from rendering.tools.command.interceptors.shell_listing import _strip_ls_long_format_to_filenames, _parse_find_tree, ShellListingInterceptor # We must map "module.X" to actual functions or classes def dict_to_shim(d): diff --git a/tests/test_render_settings_propagation.py b/tests/test_render_settings_propagation.py new file mode 100644 index 0000000..f15bf69 --- /dev/null +++ b/tests/test_render_settings_propagation.py @@ -0,0 +1,59 @@ +"""Test that CLI render tunables propagate into RenderSettings. + +Regression test for PR #21 comment: --read-display-lines, +--write-content-lines, --write-diff-limit, --edit-diff-lines must reach +the rendering context's settings when the phase harness applies overrides. +""" +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "tools")) + +import dataclasses +import pytest + +from rendering.dispatch import _get_rendering_ctx, _RENDERING_CTX_CACHE + + +@pytest.fixture(autouse=True) +def _clear_ctx_cache(): + """Ensure each test starts with a fresh rendering context cache.""" + _RENDERING_CTX_CACHE.clear() + yield + _RENDERING_CTX_CACHE.clear() + + +def test_cli_overrides_propagate_to_render_settings(): + """Verify that dataclasses.replace on the context settings + correctly overrides the tunables, matching the pattern used + by codecome.harness.run_phase_mode.""" + # Get a plain-mode context (console=None → plain sink) + ctx = _get_rendering_ctx(None) + assert ctx.settings.read_display_lines == 10 # default + assert ctx.settings.write_content_lines == 25 # default + assert ctx.settings.write_diff_limit == 50 # default + assert ctx.settings.edit_diff_lines == 25 # default + + # Apply CLI overrides (same pattern as harness.py) + overrides = { + "read_display_lines": 42, + "write_content_lines": 99, + "write_diff_limit": 200, + "edit_diff_lines": 7, + } + ctx.settings = dataclasses.replace(ctx.settings, **overrides) + + assert ctx.settings.read_display_lines == 42 + assert ctx.settings.write_content_lines == 99 + assert ctx.settings.write_diff_limit == 200 + assert ctx.settings.edit_diff_lines == 7 + + +def test_cached_context_preserves_overrides(): + """Once overrides are applied, subsequent _get_rendering_ctx calls + should return the same context with overrides intact.""" + ctx = _get_rendering_ctx(None) + ctx.settings = dataclasses.replace(ctx.settings, read_display_lines=77) + + ctx2 = _get_rendering_ctx(None) + assert ctx2 is ctx + assert ctx2.settings.read_display_lines == 77 diff --git a/tests/test_rendering_tools.py b/tests/test_rendering_tools.py index 963373d..c92418b 100644 --- a/tests/test_rendering_tools.py +++ b/tests/test_rendering_tools.py @@ -24,10 +24,10 @@ from rendering.tools.glob import GlobRenderer from rendering.tools.grep import GrepRenderer from rendering.tools.command import CommandRenderer -from rendering.tools.interceptors.sandbox_bootstrap import SandboxBootstrapInterceptor -from rendering.tools.interceptors.rtk_read import RtkReadInterceptor -from rendering.tools.interceptors.rtk_grep import RtkGrepInterceptor -from rendering.tools.interceptors.shell_listing import ShellListingInterceptor +from rendering.tools.command.interceptors.sandbox_bootstrap import SandboxBootstrapInterceptor +from rendering.tools.command.interceptors.rtk_read import RtkReadInterceptor +from rendering.tools.command.interceptors.rtk_grep import RtkGrepInterceptor +from rendering.tools.command.interceptors.shell_listing import ShellListingInterceptor def _ctx(sink_mode="plain"): From f796f951755d374f4582228f2021852712897213 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 20:22:11 +0200 Subject: [PATCH 63/65] docs: update tools/AGENTS.md for A8 architecture changes - Update directory layout: add phases/, harness.py, dispatch.py, rendering/events/ package, command/interceptors/ restructure - Update renderer rule: events/ is now a package, one file per family - Update interceptor rule: interceptors live under command/ - Update dependency direction: add phases/ package --- tools/AGENTS.md | 51 +++++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/tools/AGENTS.md b/tools/AGENTS.md index ee9ac8e..e964878 100644 --- a/tools/AGENTS.md +++ b/tools/AGENTS.md @@ -8,33 +8,45 @@ tools/ ├── codecome.py # Workspace validation CLI (check/status/next-id) │ ├── codecome/ # Core runner and configuration -│ ├── cli.py # main(), build_parser() — runtime entry point -│ ├── cli_render.py # HAVE_RICH, build_console, render_event, _get_rendering_ctx -│ ├── config.py # env, codecome.yml, prompt, model, thinking resolution +│ ├── cli.py # main(), build_parser() — parse args + dispatch +│ ├── cli_render.py # build_console, _emit_fatal_error (CLI helpers) +│ ├── harness.py # run_phase_mode() — retry/resume loop +│ ├── config.py # ROOT, env, codecome.yml, prompt, model, thinking │ ├── session.py # OpenCode HTTP: create session, send prompt │ ├── runner.py # _consume_events, _run_single_attempt -│ ├── graceful.py # phase completion checks, resume prompt builders -│ ├── transcript.py # transcript path/open/close helpers +│ ├── transcript.py # Transcript class (open/write_event/close) │ └── version.py # OpenCode version checks │ +├── phases/ # Phase-specific logic +│ └── completion.py # phase completion checks, resume prompt builders +│ ├── rendering/ # Tool and event rendering │ ├── base.py # BaseRenderer (sink, rich, plain properties) │ ├── context.py # RenderContext (root, sink, settings, cache) +│ ├── dispatch.py # HAVE_RICH, _get_rendering_ctx, render_event │ ├── settings.py # RenderSettings (20+ tunables from env vars) │ ├── cache.py # SnapshotCache (file content snapshots for diffs) │ ├── sink.py # RenderSink protocol + Plain/Rich/Textual sinks │ ├── registry.py # RendererRegistry (dispatch by event type / tool name) -│ ├── events.py # Event renderer classes (StepStart, Text, Error, …) │ ├── utils.py # Shared helpers (path, lexer, diff, read framing) -│ ├── tools/ # Tool renderer classes -│ │ ├── base.py # ToolRenderer, FallbackToolRenderer -│ │ ├── read.py / write.py / edit.py / glob.py / grep.py -│ │ ├── command.py # CommandRenderer (bash) with interceptor chain -│ │ ├── apply_patch.py / todo.py / task.py / skill.py / permissions.py -│ │ └── interceptors/ # CommandExecutionInterceptor implementations -│ │ ├── sandbox_bootstrap.py -│ │ ├── rtk_read.py / rtk_grep.py / shell_listing.py -│ │ └── base.py # Interceptor protocol +│ ├── events/ # Event renderer classes (one per family) +│ │ ├── base.py # EventRenderer base + finish constants +│ │ ├── step_start.py / step_finish.py / text.py / reasoning.py +│ │ ├── tool_use.py / error.py / unknown.py +│ │ ├── session_status.py / session_diff.py / server.py +│ │ ├── message.py / subagent.py +│ │ └── __init__.py # Re-exports all symbols +│ └── tools/ # Tool renderer classes +│ ├── base.py # ToolRenderer, FallbackToolRenderer +│ ├── read.py / write.py / edit.py / glob.py / grep.py +│ ├── apply_patch.py / todo.py / task.py / skill.py / permissions.py +│ └── command/ # CommandRenderer + interceptors +│ ├── __init__.py # CommandRenderer (bash) with interceptor chain +│ └── interceptors/ # CommandExecutionInterceptor implementations +│ ├── base.py # Interceptor protocol +│ ├── sandbox_bootstrap.py +│ ├── rtk_read.py / rtk_grep.py / shell_listing.py +│ └── __init__.py │ ├── events/ # SSE event consumption │ ├── base.py # BaseEventLoop (shared: filters, permissions, sync, dedup) @@ -95,7 +107,7 @@ if __name__ == "__main__": ### 4. Renderers live under `tools/rendering/` -- Event renderers go in `rendering/events.py`, inheriting `EventRenderer`. +- Event renderers go in `rendering/events/`, one module per renderer family, inheriting `EventRenderer`. - Tool renderers go in `rendering/tools/`, inheriting `ToolRenderer`. - Renderers receive **normalized dict** events/tool states — do not introduce custom event objects. - Rich and Textual renderers may emit arbitrary Rich renderables (Panel, Group, Text, Table, Syntax, Rule, …) through a `RenderSink`. The sink abstracts *where* output goes; it does not restrict *what* renderers can draw. @@ -114,7 +126,7 @@ File content snapshots used by Write/Edit/ApplyPatch renderers for diff computat ### 7. Command-specific rendering uses `CommandExecutionInterceptor` -Specialised rendering for bash invocations (sandbox-bootstrap JSON, rtk read/grep, rg, ls, find, tree) is implemented as `CommandExecutionInterceptor` implementations. The `CommandRenderer` has a lazy interceptor chain. New interceptors go in `rendering/tools/interceptors/`. +Specialised rendering for bash invocations (sandbox-bootstrap JSON, rtk read/grep, rg, ls, find, tree) is implemented as `CommandExecutionInterceptor` implementations. The `CommandRenderer` has a lazy interceptor chain. New interceptors go in `rendering/tools/command/interceptors/`. ### 8. Finding/itemdb helpers live under `tools/findings/` @@ -129,9 +141,10 @@ run-agent.py → codecome/ → (none) → rendering/ → codecome/ → chat/ → codecome/, events/ -codecome/ → events/, rendering/ (lazy imports only in execution paths) +codecome/ → events/, rendering/, phases/ (lazy imports only in execution paths) +phases/ → codecome/ (config only) events/ → (stdlib only, except sse_client ↔ base) -rendering/ → codecome/ +rendering/ → codecome/ (config only), _colors chat/ → codecome/, events/ ``` From d54b672971e316e38b3ac4768fb204846c9065b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 23:21:42 +0200 Subject: [PATCH 64/65] fix(a8.1): cleanup cli_render, deduplicate event pipeline, strengthen tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Slim codecome/cli_render.py to only CLI helpers: - Remove re-exports of HAVE_RICH, _get_rendering_ctx, render_event, ROOT, _FINISH_* constants - Keep only build_console() and _emit_fatal_error() - Update imports in harness.py, chat/app.py to use rendering.dispatch and rendering.events directly 2. Create codecome/event_pipeline.py: - Shared render_and_log_event() function - Handles transcript→debug→reasoning-filter→render for both phase mode (runner.py) and chat mode (chat/app.py) - Eliminates ~25 lines of duplicated pipeline code 3. Strengthen RenderSettings propagation test: - Integration test proving CLI --read-display-lines etc. reach RenderSettings via the full run_phase_mode() harness 4. Update plan docs: mark A8 as Implemented, add cross-references 356 passed, 0 failed, 0 errors --- .project/tools-refactor-a8-plan.md | 2 +- .project/tools-refactor-plan.md | 4 +- tests/test_render_settings_propagation.py | 130 ++++++++++++++-------- tools/chat/app.py | 18 +-- tools/codecome/cli_render.py | 24 +--- tools/codecome/event_pipeline.py | 56 ++++++++++ tools/codecome/harness.py | 8 +- tools/codecome/runner.py | 13 +-- 8 files changed, 167 insertions(+), 88 deletions(-) create mode 100644 tools/codecome/event_pipeline.py diff --git a/.project/tools-refactor-a8-plan.md b/.project/tools-refactor-a8-plan.md index 38fa6c1..74958d1 100644 --- a/.project/tools-refactor-a8-plan.md +++ b/.project/tools-refactor-a8-plan.md @@ -1,6 +1,6 @@ # Plan: Phase A8 — PR Review Fixes and Architectural Cleanup -**Status:** Active +**Status:** Implemented (A8 + A8.1 cleanup complete) **Date:** 2026-05-25 **Parent:** [tools-refactor-plan.md](tools-refactor-plan.md) **PR:** #21 (`wip/tools-refactor`) diff --git a/.project/tools-refactor-plan.md b/.project/tools-refactor-plan.md index fbf1d7e..c15fbdc 100644 --- a/.project/tools-refactor-plan.md +++ b/.project/tools-refactor-plan.md @@ -1,7 +1,7 @@ # Plan: Refactor `tools/` Directory Structure -**Status:** Draft, revised after architecture review -**Date:** 2026-05-23 +**Status:** Implemented — see [tools-refactor-a8-plan.md](tools-refactor-a8-plan.md) for A8 details and [tools/AGENTS.md](../tools/AGENTS.md) for the current architecture rules. +**Date:** 2026-05-23 (original) / 2026-05-25 (final) **Target:** `tools/run-agent.py`, `tools/events/`, rendering/chat support, and later finding/itemdb tooling **Risk Level:** Medium (large structural refactor, all phase targets affected) diff --git a/tests/test_render_settings_propagation.py b/tests/test_render_settings_propagation.py index f15bf69..82d2d02 100644 --- a/tests/test_render_settings_propagation.py +++ b/tests/test_render_settings_propagation.py @@ -1,59 +1,95 @@ -"""Test that CLI render tunables propagate into RenderSettings. +"""Integration test: CLI tunable overrides reach RenderSettings via the +full ``codecome.harness.run_phase_mode`` path.""" -Regression test for PR #21 comment: --read-display-lines, ---write-content-lines, --write-diff-limit, --edit-diff-lines must reach -the rendering context's settings when the phase harness applies overrides. -""" import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "tools")) -import dataclasses +import argparse +from unittest.mock import MagicMock + import pytest -from rendering.dispatch import _get_rendering_ctx, _RENDERING_CTX_CACHE - - -@pytest.fixture(autouse=True) -def _clear_ctx_cache(): - """Ensure each test starts with a fresh rendering context cache.""" - _RENDERING_CTX_CACHE.clear() - yield - _RENDERING_CTX_CACHE.clear() - - -def test_cli_overrides_propagate_to_render_settings(): - """Verify that dataclasses.replace on the context settings - correctly overrides the tunables, matching the pattern used - by codecome.harness.run_phase_mode.""" - # Get a plain-mode context (console=None → plain sink) - ctx = _get_rendering_ctx(None) - assert ctx.settings.read_display_lines == 10 # default - assert ctx.settings.write_content_lines == 25 # default - assert ctx.settings.write_diff_limit == 50 # default - assert ctx.settings.edit_diff_lines == 25 # default - - # Apply CLI overrides (same pattern as harness.py) - overrides = { - "read_display_lines": 42, - "write_content_lines": 99, - "write_diff_limit": 200, - "edit_diff_lines": 7, - } - ctx.settings = dataclasses.replace(ctx.settings, **overrides) +from codecome import harness as harness_mod +from codecome import runner as runner_mod +from rendering.dispatch import _get_rendering_ctx + + +@pytest.mark.unit +def test_cli_tunables_propagate_to_render_settings(monkeypatch): + """Prove that --read-display-lines (etc.) reach RenderSettings + when run through the real run_phase_mode() harness.""" + args = argparse.Namespace() + args.phase = "2" + args.label = "Hypothesis" + args.agent = "auditor" + args.prompt_file = "prompts/phase-2.md" + args.finding = None + args.chat = False + args.show_model = False + args.debug = False + args.color = "never" + args.log_level = "WARN" + args.read_display_lines = 42 + args.write_content_lines = 7 + args.write_diff_limit = 99 + args.edit_diff_lines = 3 + monkeypatch.setattr(harness_mod, "ServerRunner", lambda: _FakeServerRunner()) + monkeypatch.setattr(runner_mod, "_run_single_attempt", + lambda *a, **kw: (0, "ses_ok", _FakeRunResult(), + harness_mod.ROOT / "tmp" / "fake.jsonl")) + monkeypatch.setattr(harness_mod, "load_prompt", lambda *a, **kw: "Fake prompt") + monkeypatch.setattr(harness_mod, "resolve_runtime_config", + lambda agent: _FakeRuntimeConfig()) + monkeypatch.setattr(harness_mod, "check_phase_graceful_completion", + lambda *a, **kw: True) + import subprocess + monkeypatch.setattr(subprocess, "run", lambda *a, **kw: MagicMock(returncode=0)) + + returncode = harness_mod.run_phase_mode(args) + assert returncode == 0 + + # run_phase_mode builds console via build_console, so check the same + # mode the harness would have used. + from codecome.cli_render import build_console + console = build_console(args.color) + ctx = _get_rendering_ctx(console) assert ctx.settings.read_display_lines == 42 - assert ctx.settings.write_content_lines == 99 - assert ctx.settings.write_diff_limit == 200 - assert ctx.settings.edit_diff_lines == 7 + assert ctx.settings.write_content_lines == 7 + assert ctx.settings.write_diff_limit == 99 + assert ctx.settings.edit_diff_lines == 3 + + +# -- Lightweight stubs ------------------------------------------------------- + +class _FakeServerRunner: + class info: + pid = 1 + def start(self, **kw): + return _FakeServerInfo() + def stop(self): + pass + + +class _FakeServerInfo: + base_url = "http://localhost" + password = "fake" + +class _FakeRunResult: + any_step_finish_seen = True + step_finish_count = 1 + last_finish_reason = "stop" + last_finish_tokens = {} + last_permission_error = None + last_session_id = "ses_ok" -def test_cached_context_preserves_overrides(): - """Once overrides are applied, subsequent _get_rendering_ctx calls - should return the same context with overrides intact.""" - ctx = _get_rendering_ctx(None) - ctx.settings = dataclasses.replace(ctx.settings, read_display_lines=77) - ctx2 = _get_rendering_ctx(None) - assert ctx2 is ctx - assert ctx2.settings.read_display_lines == 77 +class _FakeRuntimeConfig: + model = "op/test" + variant = None + model_source = "stub" + variant_source = "stub" + thinking_on = True + thinking_source = "stub" diff --git a/tools/chat/app.py b/tools/chat/app.py index 8627e0a..63a90b8 100644 --- a/tools/chat/app.py +++ b/tools/chat/app.py @@ -24,7 +24,7 @@ sys.path.insert(0, str(Path(__file__).resolve().parents[1])) from chat.debug import _chat_debug # noqa: E402 -from codecome.cli_render import render_event # noqa: E402 +from rendering.dispatch import render_event # noqa: E402 # --------------------------------------------------------------------------- # Rich imports — same fallback pattern as run-agent.py @@ -110,14 +110,18 @@ def _chat_render_and_log(self, console, phase, label, event): When bound via ``__get__`` to a _ChatApp instance, ``self`` is guaranteed to carry the attributes accessed below.""" - self.transcript.write_event(event) - if getattr(self.args, "debug", False): - _chat_debug(f"_render_and_log: raw event: {json.dumps(event)}") + from codecome.event_pipeline import render_and_log_event + + render_and_log_event( + console=console, phase=phase, label=label, event=event, + transcript=self.transcript, + debug=getattr(self.args, "debug", False), + thinking_on=self.thinking_on, + render_event_fn=render_event, + debug_fn=_chat_debug, + ) if event.get("type") == "message.updated": _chat_update_modeline_info(self, event) - if not self.thinking_on and event.get("type") == "reasoning": - return - render_event(console, phase, label, event) def _chat_update_modeline_info(self, event: dict[str, Any]) -> None: diff --git a/tools/codecome/cli_render.py b/tools/codecome/cli_render.py index 30d4c1d..dd776e4 100644 --- a/tools/codecome/cli_render.py +++ b/tools/codecome/cli_render.py @@ -2,11 +2,11 @@ # SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later """ -CLI rendering helpers: console construction and fatal error display. +CLI/UI helpers: console construction and fatal error display. -The rendering dispatcher and context cache live in ``rendering.dispatch``. -This module re-exports key symbols for backward compatibility with -existing imports from ``codecome.cli_render``. +For rendering dispatch (``HAVE_RICH``, ``_get_rendering_ctx``, +``render_event``) import from ``rendering.dispatch``. +For finish-reason constants import from ``rendering.events``. """ from __future__ import annotations @@ -14,21 +14,7 @@ import sys from typing import Any -# Re-exports from rendering.dispatch — used by cli.py, chat/, runner.py -from rendering.dispatch import ( # noqa: F401 — re-export - HAVE_RICH, - _get_rendering_ctx, - render_event, -) - -from codecome.config import ROOT # noqa: F401 — re-export - -# Re-exports of finish constants (used by cli.py's retry loop) -from rendering.events import ( # noqa: F401 — re-export - _FINISH_TERMINAL_OK, - _FINISH_MID_TURN, - _FINISH_FAILURE, -) +from rendering.dispatch import HAVE_RICH # --------------------------------------------------------------------------- diff --git a/tools/codecome/event_pipeline.py b/tools/codecome/event_pipeline.py new file mode 100644 index 0000000..733f20f --- /dev/null +++ b/tools/codecome/event_pipeline.py @@ -0,0 +1,56 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Shared event side-effect pipeline: transcript, debug, reasoning filter, +render. + +Used by both phase mode (``codecome.runner._consume_events``) and +chat mode (``chat.app._chat_render_and_log``) to avoid duplicated +transcript/debug/filter/render logic. +""" + +from __future__ import annotations + +import json +import sys +from typing import Any, Callable + +from codecome.transcript import Transcript + + +def render_and_log_event( + *, + console: Any, + phase: str, + label: str, + event: dict[str, Any], + transcript: Transcript, + debug: bool, + thinking_on: bool, + render_event_fn: Callable[..., None], + debug_fn: Callable[[str], None] | None = None, +) -> None: + """Run the full event side-effect pipeline for one SSE event. + + 1. Write the raw event to the JSONL transcript. + 2. If *debug* is True, mirror the raw event JSON: + - via *debug_fn* (chat mode — ``_chat_debug``); + - via ``sys.stderr`` otherwise (phase mode). + 3. If *thinking_on* is False and the event type is ``reasoning``, + skip rendering. + 4. Otherwise call ``render_event_fn(console, phase, label, event)``. + """ + transcript.write_event(event) + + if debug: + if debug_fn is not None: + debug_fn(f"raw event: {json.dumps(event)}") + else: + sys.stderr.write(json.dumps(event) + "\n") + sys.stderr.flush() + + if not thinking_on and event.get("type") == "reasoning": + return + + render_event_fn(console, phase, label, event) diff --git a/tools/codecome/harness.py b/tools/codecome/harness.py index 838f08f..c8c5c3c 100644 --- a/tools/codecome/harness.py +++ b/tools/codecome/harness.py @@ -24,11 +24,9 @@ import _colors as C from opencode.serve import ServerRunner, ServerRunnerError -from codecome.cli_render import ( - HAVE_RICH, - build_console, _get_rendering_ctx, render_event, _emit_fatal_error, - _FINISH_TERMINAL_OK, _FINISH_MID_TURN, _FINISH_FAILURE, -) +from codecome.cli_render import build_console, _emit_fatal_error +from rendering.dispatch import HAVE_RICH, _get_rendering_ctx, render_event +from rendering.events import _FINISH_TERMINAL_OK, _FINISH_MID_TURN, _FINISH_FAILURE from codecome.config import ROOT, resolve_color_mode, load_prompt, resolve_runtime_config from phases.completion import ( check_phase_graceful_completion, diff --git a/tools/codecome/runner.py b/tools/codecome/runner.py index c8ea609..48fab74 100644 --- a/tools/codecome/runner.py +++ b/tools/codecome/runner.py @@ -20,6 +20,7 @@ from codecome.config import ROOT from codecome.session import create_session, send_prompt_to_session from codecome.transcript import Transcript +from codecome.event_pipeline import render_and_log_event def _consume_events( @@ -46,13 +47,11 @@ def _consume_events( ) def _render_and_log(console_: Any, phase_: str, label_: str, event: dict[str, Any]) -> None: - transcript.write_event(event) - if args.debug: - sys.stderr.write(json.dumps(event) + "\n") - sys.stderr.flush() - if not thinking_on and event.get("type") == "reasoning": - return - render_event_fn(console_, phase_, label_, event) + render_and_log_event( + console=console_, phase=phase_, label=label_, event=event, + transcript=transcript, debug=args.debug, thinking_on=thinking_on, + render_event_fn=render_event_fn, + ) return event_loop.run(_render_and_log) From d23db4ba6c4cf4b24abacb7685c3c10e162bf8e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 25 May 2026 23:33:27 +0200 Subject: [PATCH 65/65] fix: update stale comment in test_render_settings_propagation.py --- tests/test_render_settings_propagation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_render_settings_propagation.py b/tests/test_render_settings_propagation.py index 82d2d02..23ff7af 100644 --- a/tests/test_render_settings_propagation.py +++ b/tests/test_render_settings_propagation.py @@ -50,8 +50,8 @@ def test_cli_tunables_propagate_to_render_settings(monkeypatch): returncode = harness_mod.run_phase_mode(args) assert returncode == 0 - # run_phase_mode builds console via build_console, so check the same - # mode the harness would have used. + # build_console uses args.color directly, matching what run_phase_mode + # passes to the real console builder. from codecome.cli_render import build_console console = build_console(args.color) ctx = _get_rendering_ctx(console)