diff --git a/.opencode/agents/chat.md b/.opencode/agents/chat.md new file mode 100644 index 0000000..9b1058c --- /dev/null +++ b/.opencode/agents/chat.md @@ -0,0 +1,96 @@ +# CodeCome Chat Agent + +You are the CodeCome Chat Agent, an interactive assistant for the CodeCome vulnerability research workflow. + +Your role is to help the user interactively: answer questions about the target, the findings, the project status, and assist with any CodeCome task the user requests. + +**You must NEVER modify `codecome.yml`, `AGENTS.md`, Makefile, or any other project orchestration or configuration file unless explicitly instructed by the user.** + +## Lazy loading principle + +**This is an interactive chat session. Speed matters.** + +Do NOT read large batches of files upfront. Instead: + +1. **Read on demand.** Only read a file when the user asks about it, or when you need its content to answer a question or perform a task. +2. **Start light.** On startup, read only what the initial prompt tells you to (typically `codecome.yml` and a directory listing of `itemdb/findings/`). Do NOT read `AGENTS.md`, reconnaissance notes, skills, templates, or source code unless the user asks or a specific task requires them. +3. **Announce what you're reading.** When you do read a file, briefly mention it so the user knows what's happening (e.g., "Reading `itemdb/notes/target-profile.md`..."). +4. **Cache mentally.** Once you've read a file in this session, don't re-read it unless the user says it changed. + +## What you know (without reading files) + +You are aware of the following CodeCome structure from your training: + +### Workspace layout + +- `codecome.yml` — project configuration and audit settings. +- `src/` — target source code to audit. +- `sandbox/` — sandboxed execution and validation environment. +- `itemdb/` — file-based finding database, notes, reports, and evidence. + - `itemdb/notes/` — reconnaissance notes and target model. + - `itemdb/findings/PENDING/` — candidate findings requiring validation. + - `itemdb/findings/CONFIRMED/` — validated findings with evidence. + - `itemdb/findings/EXPLOITED/` — confirmed findings with demonstrated impact. + - `itemdb/findings/REJECTED/` — disproven or non-actionable findings. + - `itemdb/findings/DUPLICATE/` — duplicate findings. + - `itemdb/evidence/` — validation evidence, grouped by finding id. + - `itemdb/reports/` — generated Markdown reports. +- `templates/` — Markdown templates for findings, reports, etc. +- `prompts/` — phase prompts used by the harness. +- `.opencode/agents/` — agent definitions (you are `chat.md`). +- `.opencode/skills/` — reusable skills for specific domains. + +### Available agents + +| Agent | Role | +|-------|------| +| `recon` | Target reconnaissance and attack surface mapping (Phase 1) | +| `auditor` | Vulnerability hypothesis generation (Phase 2) | +| `reviewer` | Counter-analysis of pending findings (Phase 3) | +| `validator` | Validation of individual findings (Phase 4) | +| `exploiter` | Exploit development for confirmed findings (Phase 5) | +| `reporter` | Report generation (Phase 6) | +| `chat` | Interactive assistant (this agent) | + +### Available skills (load on demand only) + +Skills live under `.opencode/skills/`. Do NOT read them at startup. Read a skill only when you need its guidance for a specific task. + +- `source-recon/` — source tree reconnaissance patterns +- `finding-format/` — finding template and frontmatter rules +- `counter-analysis/` — counter-analysis methodology +- `sandbox-bootstrap/` — sandbox setup and configuration +- `sandbox-validation/` — validation inside sandboxes +- `exploit-development/` — exploit PoC development +- `exploit-recording/` — recording exploit sessions +- `exploit-validation/` — validating exploit impact +- `report-writing/` — report generation +- `c-cpp-security/`, `dotnet-security/`, `erlang-security/`, `php-security/`, `web-security/`, `sql-injection/`, `iac-security/`, `rabbitmq-security/` — target-specific security patterns +- `juliet-benchmark/` — Juliet test suite specifics + +## Capabilities + +In chat mode you can: + +- **Answer questions** about the project, target, findings, evidence, or workflow. +- **Read files on demand** when the user asks about specific code, findings, or notes. +- **Create or edit findings** if the user requests (follow `templates/finding.md` format; read the finding-format skill first). +- **Run commands** in the sandbox if the user asks for validation or testing. +- **Summarize status** — list findings by status, show recon progress, etc. +- **Assist with any phase** — if the user says "do recon on file X" or "validate finding CC-0005", read the relevant agent definition and skill on demand, then proceed. + +## Interaction style + +- Be concise. This is a chat, not a report. +- Use short answers for simple questions. +- For complex tasks, outline what you'll do before starting. +- If a task will require reading many files, warn the user and ask if they want to proceed. +- If you're unsure what the user wants, ask for clarification. + +## Safety rules + +- Do not modify target source code under `src/` unless explicitly instructed. +- Do not attack third-party systems. +- Do not exfiltrate secrets. +- Experimental work goes in `sandbox/`. +- Temporary files go in `tmp/` (workspace-relative, NOT `/tmp/`). diff --git a/.project/chat-mode-plan.md b/.project/chat-mode-plan.md new file mode 100644 index 0000000..db802e6 --- /dev/null +++ b/.project/chat-mode-plan.md @@ -0,0 +1,181 @@ +# Chat Mode Implementation Plan + +**Status:** Draft +**Date:** 2026-05-21 +**Target:** `tools/run-agent.py`, `tools/events/`, `Makefile` +**Risk Level:** Medium (adds new mode to existing harness) + +--- + +## 1. Executive Summary + +Add an interactive `--chat` mode to `run-agent.py` that reuses the existing `opencode serve` infrastructure (`ServerRunner`, `EventLoop`, `SseClient`, `StateTracker`) but runs in a multi-turn loop: idle → wait for user input → send prompt → consume SSE → idle again. + +The Textual TUI provides the user-facing interface: a `RichLog` upper panel (driven by the existing render pipeline) and an `Input` lower panel for typing messages. + +--- + +## 2. Architecture + +``` +make chat + └─ run-agent.py --chat + ├─ ServerRunner.start() # reuse tools/opencode/serve.py + ├─ POST /session # reuse _create_session() + ├─ ChatApp.run() # Textual TUI (new) + │ ├─ RichLog (upper panel) # receives rendered events + │ ├─ Input (lower panel) # user types messages + │ └─ QuitScreen (Ctrl+C modal) # confirm quit + └─ ChatEventLoop # new: idle→prompt→idle loop + ├─ SseClient # reuse tools/events/sse_client.py + ├─ StateTracker # reuse tools/events/state_tracker.py + ├─ emit_event() # reuse tools/events/emitters.py + └─ POST /session/{id}/message # reuse _send_prompt_to_session() +``` + +### Key Design Decisions + +1. **Reuse `ServerRunner`** — no need to spawn `opencode serve` manually; `ServerRunner.start()` handles health checks, ephemeral ports, and auth tokens. + +2. **New `ChatEventLoop` class** — a thin wrapper around `EventLoop` that: + - Does NOT exit on session idle + - Instead, signals the TUI that the session is ready for the next prompt + - Uses `asyncio`-compatible event signaling (or `queue.Queue`) to coordinate between the SSE consumer thread and the TUI main thread + +3. **Single session, multi-turn** — the session is created once. Each user message is sent as a new `POST /session/{id}/message` with a single text part. + +4. **Rendering stays the same** — all events flow through the existing `render_event()` → `render_text()` / `render_tool_use()` / etc. pipeline. The `TextualConsoleProxy` bridges Rich `Console.print()` to `RichLog.write()`. + +--- + +## 3. New Files / Changes + +### 3.1 `tools/events/chat_loop.py` (NEW) + +```python +class ChatEventLoop: + """Multi-turn event loop: idle → signal ready → send prompt → consume → idle.""" + + def __init__(self, base_url, session_id, console, auth_token=None, workspace_dir=None): + ... + + def start_consumer(self, render_fn): + """Start SSE consumer in background thread. Signals via queue.""" + ... + + def send_prompt(self, text, agent=None, model=None, variant=None): + """POST /session/{id}/message with text part.""" + ... + + def stop(self): + """Signal consumer thread to exit.""" + ... +``` + +**Test strategy:** +- Unit test with `FakeSseClient` (same pattern as `test_new_serve_stack.py`) +- Test prompt→idle→prompt→idle cycle +- Test stop() cleanly terminates consumer +- Test error handling (bad prompt, server down) + +### 3.2 `tools/run-agent.py` (MODIFY) + +Changes: +1. **argparse**: Add `--chat` flag, `--prompt` arg (for initial greeting) +2. **Validation**: When `--chat`, `--phase` is not required +3. **Chat path**: After server start + session creation, launch `ChatApp` instead of the phase loop +4. **Textual TUI**: `ChatApp`, `QuitScreen`, `TextualConsoleProxy` classes (conditionally imported) + +### 3.3 `Makefile` (MODIFY) + +```makefile +CHAT ?= 0 +ifeq ($(CHAT),1) +WRAPPER_ARGS += --chat +endif + +chat: venv-check + @$(PYTHON) tools/run-agent.py --chat --label "Interactive Chat" --agent $(or $(AGENT),auditor) --prompt "Please introduce yourself and wait for my instructions." +``` + +Also add `$(WRAPPER_ARGS)` to all phase targets (phases 1-6). + +### 3.4 `requirements.txt` (MODIFY) + +``` +textual>=0.80.0 +``` + +### 3.5 `tests/test_chat_mode.py` (NEW) + +Tests: +1. `TestChatEventLoop` — unit tests with fake SSE client +2. `TestChatArgparse` — `--chat` flag parsing, validation rules +3. `TestTextualConsoleProxy` — Rich → RichLog bridging +4. `TestChatMainEntry` — integration test with mocked server (monkeypatch `ServerRunner`, `_create_session`, `ChatEventLoop`) + +--- + +## 4. Test Plan + +### 4.1 Unit Tests (fast, no opencode binary) + +| Test | What | How | +|------|------|-----| +| `test_chat_event_loop_single_turn` | One prompt → SSE events → idle → ready | `FakeSseClient` yields canned events | +| `test_chat_event_loop_multi_turn` | Prompt → idle → prompt → idle → stop | Two canned event sequences | +| `test_chat_event_loop_stop_during_busy` | Stop signal while processing | `queue.Queue` + thread sync | +| `test_chat_event_loop_permission_rejected` | Permission auto-reject in chat mode | `FakeSseClient` with `permission.asked` | +| `test_chat_event_loop_error_recovery` | SSE disconnect → reconnect → continue | `FakeSseClient` with reconnect | +| `test_chat_argparse_requires_label_and_agent` | Missing required args | `parser.parse_args()` | +| `test_chat_argparse_chat_skips_phase` | `--chat` without `--phase` | `parser.parse_args()` | +| `test_textual_console_proxy_single_arg` | Proxy forwards single renderable | Mock `RichLog.write` | +| `test_textual_console_proxy_no_args` | Proxy writes empty line | Mock `RichLog.write` | +| `test_textual_console_proxy_multi_args` | Proxy wraps in `Group` | Mock `RichLog.write` | + +### 4.2 Integration Tests (requires opencode binary, marked `@pytest.mark.component`) + +| Test | What | How | +|------|------|-----| +| `test_chat_main_starts_server` | `main()` with `--chat` starts server | Monkeypatch `ChatApp` to capture args | +| `test_chat_main_missing_textual` | `--chat` without textual → error | Monkeypatch import to fail | + +### 4.3 Parity Tests (using mock-llm-server.py) + +Future: extend `mock-llm-parity.py` to test chat mode parity with a multi-turn script. + +--- + +## 5. Implementation Order + +1. ✅ Write this plan +2. Add `--chat` flag + argparse changes to `run-agent.py` +3. Implement `ChatEventLoop` in `tools/events/chat_loop.py` +4. Write unit tests for `ChatEventLoop` +5. Implement `TextualConsoleProxy` + `ChatApp` + `QuitScreen` in `run-agent.py` +6. Wire chat path in `main()` +7. Add `chat:` target + `WRAPPER_ARGS` to `Makefile` +8. Add `textual` to `requirements.txt` +9. Write integration tests +10. Run full test suite (`make tests`) +11. Rebase on master + +--- + +## 6. Obsolete Artifacts + +The following are no longer needed and should be removed: + +- `.project/chat-bridge-plan.md` — proposed a plugin bridge approach; superseded by direct serve usage +- `test_tui.py` — standalone prototype; superseded by integrated `ChatApp` + +--- + +## 7. Risks & Mitigations + +| Risk | Mitigation | +|------|-----------| +| Textual TUI blocks SSE consumer | SSE runs in daemon thread; TUI uses `call_from_thread()` | +| Server outlives TUI quit | `ServerRunner.stop()` called in cleanup; signal handler forwards SIGTERM | +| Race: prompt sent before session ready | `ChatEventLoop` uses a ready queue; prompt blocks until consumer signals idle | +| Textual not installed | Early `ImportError` check with helpful message | diff --git a/.project/chat-mode-textual-postmortem.md b/.project/chat-mode-textual-postmortem.md new file mode 100644 index 0000000..7d8b5ec --- /dev/null +++ b/.project/chat-mode-textual-postmortem.md @@ -0,0 +1,489 @@ +# Chat Mode + Textual: Postmortem and Maintenance Guide + +**Status:** Final +**Date:** 2026-05-22 +**Scope:** `tools/run-agent.py` (`_ChatApp`, `TextualConsoleProxy`), `tools/events/chat_loop.py` +**Audience:** Anyone touching the `--chat` mode TUI code, or upgrading Textual / Python + +--- + +## TL;DR + +The interactive `--chat` mode (the Textual TUI launched by `make chat`) sits inside a **narrow, empirically-verified safe envelope** for Textual cross-thread message dispatch on the versions we ship with (**Textual 8.2.6 + Python 3.14.5**). Patterns that the Textual docs imply should work — multiple `Message` subclasses, multiple `@on(...)` handlers, messages with optional fields, multiple `set_interval` callbacks — were observed to **silently freeze Textual's main event loop** when combined inside the real `_ChatApp`, even though the same patterns pass in isolated minimal repros. + +**Before changing anything in this area, read [§5 Forbidden patterns](#5-forbidden-patterns-and-why) and [§7 Safe extension recipes](#7-safe-extension-recipes).** + +--- + +## 1. Symptoms we saw + +The repeated failure mode was always the same: + +1. `make chat` (or `make chat DEBUG=1`) starts the process. +2. Pre-TUI output appears: opencode serve starts, session is created. +3. Textual enters its alternate-screen buffer (terminal "goes black"). +4. **The screen stays black forever.** No banner, no widgets visible. +5. The Python process is alive (background threads keep running, the SSE consumer keeps receiving events). The user has to kill the process from another terminal. + +When `--debug` is enabled, the debug log under `tmp/chat-debug--.log` shows the **decisive evidence**: + +- Background threads (`asyncio_0`, `asyncio_1`, `codecome-chat-consumer`) keep producing log lines indefinitely. +- `post_message(...)` calls from background threads keep firing. +- **Zero** `_heartbeat: tick #N` lines (the `set_interval(1.0, _heartbeat)` canary never fires). +- **Zero** `_on_render_message: ...` lines (the `@on(RenderMessage)` handler never fires). + +In short: **Textual's main asyncio event loop on `MainThread` stops processing scheduled callbacks** after `on_mount` returns. Background concurrency is fine; the main loop is dead. + +The fact that the heartbeat (a plain `set_interval` callback, nothing to do with Messages) does NOT fire is the smoking gun: this isn't "Messages aren't dispatched", it's "the main loop is not making forward progress on any scheduled callback". Yet the loop is not deadlocked in any visible way — no traceback, no error. + +--- + +## 2. Environment + +- Textual `8.2.6` +- Rich `14.x` +- Python `3.14.5` (installed via Homebrew on macOS arm64) +- Terminal: macOS Terminal.app (the symptom was reproducible across different LLM provider/model combos, ruling out anything related to opencode response payloads) + +We were unable to fully isolate the root cause to a Textual or CPython 3.14 bug. Minimal `App.run_test()` Pilot repros of every "broken" pattern PASSED outside the real `_ChatApp`. Inside the real app, the same patterns reliably froze the main loop. The most plausible explanation we have is a subtle interaction between Textual's startup ordering, the asyncio thread pool, and one or more of our app-side concurrency choices — but we did not find the precise trigger and stopped chasing it once we had a working envelope. + +--- + +## 3. The bisection that produced the working architecture + +We rebuilt `_ChatApp` from scratch as a 5-step ladder, each step adding one piece of complexity and tested via `make chat DEBUG=1`. The matrix below is the gold record of what is safe and what is not on this Textual/Python combination. + +| Step | Δ from previous | Result | +|------|-----------------|--------| +| 1 | Bare TUI: `compose()` yielding `RichLog`/`Input`/`Footer`, `on_mount` writing one line | ✅ works | +| 2 | + start `opencode serve` and create a session in `_run_chat_mode` (still bare ChatApp) | ✅ works | +| 3 | + `TextualConsoleProxy` + inner `RenderMessage(Message)` + `@on(RenderMessage)` handler + a one-shot `set_timer` test post | ✅ works (handler fires) | +| 4 | + raw daemon SSE consumer thread (`chat_loop.start_consumer`) + **synchronous** `chat_loop.send_prompt(...)` blocking inside `on_mount` | ✅ works (model output streams in) | +| 4a | + `set_interval(1.0, _heartbeat)` canary | ✅ works (ticks fire) | +| 4b | sync `send_prompt` → `@work(thread=True) _send_initial_prompt` | ✅ works | +| **4c** | + second Message subclass `StateMessage(Message)` + `@on(StateMessage)` handler | ❌ **freezes** | +| 4c-fix | merge `RenderMessage` + `StateMessage` into a single `ChatMessage` class with a `kind` discriminator, single `@on(ChatMessage)` handler | ❌ **freezes** | +| 4c-min | revert class structure to Step 4b's single one-arg `RenderMessage`, but post one extra `RenderMessage(Text("[probe:idle]"))` directly from the consumer's `_render_and_log` for state events | ✅ works | +| 4c-alt | extend `RenderMessage.__init__` to `(renderable=None, state=None, detail=None)` with state branching in the handler | ❌ **freezes** | +| 4c-poll | revert message class to one-arg; add a second `set_interval(0.1, _poll_state)` to poll a `threading.Lock`-protected pending-state slot | ❌ **freezes** | +| **Final** | revert all of the above, then add `@work(thread=True) _send_prompt` + restored `on_input_submitted` (these short-lived workers only fire on user actions; they are isomorphic in shape to the proven-good `_send_initial_prompt`) | ✅ works | + +The "Final" row is what now lives in `tools/run-agent.py`. Every ❌ row above is a **forbidden pattern** for this code path until we either upgrade Textual/Python or find the actual root cause. + +--- + +## 4. The architecture that works + +Documented in code under the `_ChatApp` class docstring; reproduced here for visibility. + +``` +make chat + └─ run-agent.py --chat ... (main thread) + ├─ ServerRunner.start() start opencode serve + ├─ _create_chat_session() POST /session + └─ ChatApp(...).run() Textual main loop + + (main thread / asyncio loop) + │ + on_mount ─────────┤ + │ query widgets │ + │ banner write │ + │ set_interval(1.0, _heartbeat) + │ + │ chat_loop.start_consumer(_render_and_log) + │ │ + │ ▼ (raw daemon thread) + │ _consumer_worker + │ │ for event in SseClient.events(): + │ │ _render_and_log(...) + │ │ ├─ post_message(RenderMessage(Text("[idle/busy]"))) ─┐ + │ │ └─ render_event(console_proxy, ...) │ + │ │ └─ console_proxy.print(...) │ + │ │ └─ post_message(RenderMessage(renderable))──┤ + │ │ + │ _send_initial_prompt(text) ── @work(thread=True) ── (Textual-managed thread) │ + │ HTTP POST /session/{id}/prompt_async (~150 ms) │ + │ └─ on failure: _post_error_renderable() ──────────┤ + │ │ + │ ... user types and submits Input ... │ + │ │ + └─ on_input_submitted ── _send_prompt(text) ── @work(thread=True) ── │ + HTTP POST /session/{id}/prompt_async │ + └─ on failure: _post_error_renderable │ + │ + @on(RenderMessage) _on_render_message ◄─────────────────────────────────────────────────┘ + └─ self.rich_log.write(message.renderable) +``` + +### Architectural rules + +The class docstring of `_ChatApp` lists these in code: + +1. **Long-lived consumer = raw daemon thread.** Textual's `@work(thread=True)` is documented for short-lived blocking tasks (the weather-app pattern in [Workers guide](https://textual.textualize.io/guide/workers/)). Using it for an infinite SSE consumer loop froze the main loop. Stick to `chat_loop.start_consumer(...)` (which is also what non-interactive phase mode uses). + +2. **Short-lived blocking HTTP = `@work(thread=True)`.** This is the docs-canonical pattern. Two workers in the chat app: `_send_initial_prompt` and `_send_prompt`. Each fires once, makes one HTTP POST, exits. + +3. **All cross-thread UI writes go through `RenderMessage(renderable)`** — single one-argument inner-`Message` subclass, single `@on(RenderMessage)` handler, single one-liner body (`self.rich_log.write(message.renderable)`). The proxy posts these from the consumer thread; worker errors post a `RenderMessage(Panel(...))` via `_post_error_renderable`. Everything funnels through one path. + +4. **`_render_and_log` mirrors phase mode (parity).** Per-event: write JSON to `transcript_fp` → if `--debug` mirror raw JSON to the chat-debug log file (NOT stderr, which Textual owns) → suppress `reasoning` when thinking is off → call `render_event(...)` (the same dispatcher non-chat uses). No chat-specific filters or markers. State cues come from `render_session_status` printing `session status: busy/idle` through the normal proxy path. + +5. **No Input enable/disable toggling from outside `on_input_submitted`.** Doing so required a second `set_interval` polling timer (poller for state from bg threads), which broke dispatch. The Input stays enabled at all times. The "Thinking…" UX is sacrificed; idle/busy is communicated by `render_session_status` printing `session status: busy/idle` in the normal render pipeline (parity with phase mode). + +6. **Transcript jsonl is mandatory.** `_run_chat_mode` opens `tmp/last-chat--pid.jsonl` line-buffered before constructing `ChatApp`, passes the file handle in via the `transcript_fp` constructor argument, and closes it in the `finally` block. After `app.run()` returns, the outer console prints a `Chat session ended` summary plus the `transcript: tmp/last-chat-...` path (parity with phase mode's per-attempt jsonl + final summary). + +7. **Bottom-bar modeline + heartbeat.** The heartbeat (`set_interval(1.0, _heartbeat)`) updates a `Static` widget (id `modeline`) passed as the leftmost child of `Footer` in `compose`. The widget displays `● | provider/model | ↑in ↓out | $cost` with a pulse icon alternating `●`/`◌` each tick. Data comes from `_modeline_info` (atomically refreshed by `_render_and_log` on every `message.updated` event). The heartbeat also writes `_heartbeat: tick #N` to the debug log when `--debug` is set. No second timer, no new handlers — single set_interval doing double duty. + +--- + +## 5. Forbidden patterns (and why) + +Each of these was bisected to a black screen + dead main loop in the real `_ChatApp`. Do not (re-)introduce them without a fresh bisection — and if you do find one of them works, **please update this document**. + +### 5.1 Multiple `Message` subclasses with multiple `@on(...)` handlers + +```python +# DO NOT DO THIS: +class RenderMessage(Message): ... +class StateMessage(Message): ... + +@on(RenderMessage) +def _on_render(self, m): ... + +@on(StateMessage) +def _on_state(self, m): ... +``` + +Observed in Step 4c. Main loop dies after `on_mount`. + +**Safe alternative:** keep one Message class. Use marker-renderables for sub-categories of events (see [§7.1](#71-add-a-new-cross-thread-render-channel)). + +### 5.2 Renaming the Message subclass or changing its `__init__` signature + +```python +# DO NOT DO THIS: +class ChatMessage(Message): # rename + def __init__(self, kind, renderable=None, ...): ... # multi-field + +# OR THIS: +class RenderMessage(Message): + def __init__(self, renderable=None, state=None, ...): # optional fields + ... +``` + +Observed in Steps 4c-fix and 4c-alt. Both froze. + +**Safe alternative:** `RenderMessage(renderable)` — strictly one positional argument. If you need to attach metadata, encode it inside the renderable (e.g. a tagged `Text` or a custom Rich renderable that carries extra info). + +### 5.3 Adding a second `set_interval` callback + +```python +# DO NOT DO THIS: +self.set_interval(1.0, self._heartbeat) +self.set_interval(0.1, self._poll_state) # second timer => freeze +``` + +Observed in Step 4c-poll. Main loop never fires either timer. + +**Safe alternative:** one `set_interval` (the heartbeat). If you need periodic main-thread work, fold it into `_heartbeat` (which runs once per second). For sub-second responsiveness, find a different mechanism — e.g. send a `RenderMessage` from the bg thread to wake up the dispatcher. + +### 5.4 Toggling `Input.disabled` / `Input.placeholder` from outside the input handler + +Required a `_poll_state` second `set_interval` timer to dispatch idle/busy state from the bg thread to the main thread. See §5.3. + +**Safe alternative:** leave the Input always enabled. `render_session_status` already prints `session status: busy/idle` through the normal render pipeline, which is the same cue phase mode emits, so users still get the signal — just not as widget state. + +### 5.5 Long-lived `@work(thread=True)` workers (infinite loops) + +```python +# DO NOT DO THIS: +@work(thread=True) +def _run_sse_consumer(self): + while True: + # consume SSE forever + ... +``` + +Observed in an early Step 5 attempt. The infinite worker froze main-loop progress somehow (we never pinned down whether Textual awaits worker completion in a place that blocks the main loop, but the symptom is reliable). + +**Safe alternative:** raw `threading.Thread(daemon=True)` for long-lived consumers. Reserve `@work(thread=True)` for tasks that start, do one HTTP/IO call, and exit (the docs' weather-app shape). + +### 5.6 `call_from_thread()` for cross-thread UI updates + +The Textual docs recommend `call_from_thread()` as the canonical cross-thread UI update path. In our environment, calls to `self.app.call_from_thread(self.rich_log.write, renderable)` from the consumer thread caused the consumer to stop producing events (silent crash) — and the screen stayed blank. + +**Safe alternative:** `post_message(RenderMessage(renderable))` — `post_message` is also documented as thread-safe and IS working reliably for us. + +### 5.7 Setting an instance attribute named `self.console` + +Textual's `App` exposes `self.console` (a Rich Console managed by the driver). Setting `self.console = None` in `_ChatApp.__init__` shadowed it and Textual's `_init_mode → screen._screen_resized(self.size)` path raised `AttributeError: 'NoneType' object has no attribute 'size'`. + +**Safe alternative:** name our own attribute `self.rich_console` (or similar). Anything but `self.console`. + +### 5.8 Installing custom SIGINT/SIGTERM handlers around `app.run()` + +The original implementation forwarded SIGTERM to the opencode server process group via `os.killpg(info.pid, signum)`. Because `ServerRunner.start()` puts the server in a new session (`start_new_session=True`), `os.killpg` from our process raised `PermissionError: [Errno 1] Operation not permitted`, which crashed mid-render and left the terminal in alternate-screen mode. + +**Safe alternative:** install no custom signal handlers. Textual handles SIGINT via its own `action_quit` binding. Server cleanup goes in `_run_chat_mode`'s `finally` block via `runner.stop()` (which uses `os.killpg` correctly within `ServerRunner`). + +--- + +## 6. The diagnostic toolkit + +Built into the code so we never have to reverse-engineer a freeze again. + +### 6.1 The `--debug` flag and `tmp/chat-debug--.log` + +`make chat DEBUG=1` passes `--debug` to `run-agent.py`. When set: + +- `_setup_chat_debug()` opens a per-run, line-buffered log file under `tmp/chat-debug--.log`. +- `_chat_debug(msg)` writes `[NNN.NNNs] [thread-name] msg` to that file. Safe to call from any thread. +- `ChatEventLoop` accepts a `debug` callback and uses it for consumer-side instrumentation (`_consumer_worker: starting SSE client`, event-number checkpoints, `session idle detected`, exception tracebacks). + +The log filename includes the PID and a timestamp so successive runs don't overwrite earlier evidence. + +### 6.2 The heartbeat canary + +`self.set_interval(1.0, self._heartbeat)` schedules `_heartbeat()` to fire once per second on the main thread. `_heartbeat()` writes `_heartbeat: tick #N (main loop alive)` to the debug log. + +If the chat mode appears to freeze, the FIRST thing to do is read `tmp/chat-debug-.log` and check whether heartbeat ticks are present: + +- **Ticks present, but no `_on_render_message` lines:** message dispatch is broken. Look at recent diffs that touched `RenderMessage`, `@on(...)`, or added a second Message class. +- **No ticks at all:** the main asyncio loop is dead/starved. Look for a recent change that touched scheduling (a second `set_interval`, a long-lived `@work` worker, a sync blocking call from a Textual callback). +- **Ticks present AND `_on_render_message` lines AND model events visible but TUI looks wrong:** likely a Rich rendering issue (CSS, widget sizing), not a Textual-dispatch issue. + +### 6.3 What "working" looks like + +For reference, a healthy run looks like this in the debug log (excerpt from `tmp/chat-debug-9247-20260522-042343.log`): + +``` +[002.351s] [MainThread] on_mount: entering +[002.352s] [MainThread] on_mount: starting SSE consumer (raw daemon thread) +[002.352s] [codecome-chat-consumer] _consumer_worker: entering event loop +[002.353s] [asyncio_0] _send_initial_prompt: worker started +[002.520s] [codecome-chat-consumer] _consumer_worker: event #1 type=server.connected +[002.521s] [codecome-chat-consumer] TextualConsoleProxy._write: bg thread, post_message(RenderMessage) +[003.351s] [MainThread] _heartbeat: tick #1 (main loop alive) +[004.351s] [MainThread] _heartbeat: tick #2 (main loop alive) +[008.116s] [asyncio_0] _send_prompt: worker posting text len=5 +[008.312s] [asyncio_0] _send_prompt: sent +[011.646s] [codecome-chat-consumer] _render_and_log: event type=reasoning +... +``` + +Notable: regular heartbeat ticks, consumer events flowing, the user-input `_send_prompt` worker firing in response to typed input, and the main loop alive through to clean shutdown. + +--- + +## 7. Safe extension recipes + +If you need to add a feature, follow one of these recipes verbatim. If your feature doesn't fit any of these, **add a heartbeat-canary-bisection step to your work plan** before touching the code. + +### 7.1 Add a new cross-thread render channel + +You want to show some new kind of output in the RichLog from a background thread. + +✅ Build the renderable on the bg thread and post it through the existing path: + +```python +# On a bg thread (consumer or worker): +self.app.post_message(self.app.RenderMessage(my_renderable)) +# OR via the proxy if you already have a Rich Console-like interface: +self.console_proxy.print(my_renderable) +``` + +No new `Message` subclass. No new handler. No new field on `RenderMessage`. + +### 7.2 Add a new "kind" of event that needs main-thread state (not just rendering) + +You want main-thread side effects (e.g. update a reactive variable, change focus, push a screen) triggered by a bg-thread event. + +⚠️ This is exactly the path that produced multiple freezes. Acceptable approaches: + +1. **Encode the state in a renderable.** Have the bg thread post a `RenderMessage(MyMarker(...))` where `MyMarker` is a custom Rich renderable that carries the metadata. Have the `_on_render_message` handler check `isinstance(message.renderable, MyMarker)` and dispatch to a main-thread routine when appropriate. (Single Message class, single handler — safe.) + +2. **Reuse the heartbeat.** The main-thread `_heartbeat` method runs every second. Have the bg thread set a `threading.Event` or a thread-safe attribute, and have `_heartbeat` (which already runs on the main thread) read it. This adds NO new `set_interval` and stays within the proven envelope. Sub-second latency is sacrificed. + +3. **If you absolutely need a second timer:** wrap the new periodic work as additional behaviour inside the existing `_heartbeat`. If you need higher frequency than 1Hz, change `_heartbeat`'s interval (rather than adding a second `set_interval`). + +### 7.3 Add a new short-lived `@work(thread=True)` worker + +✅ Pattern follows the docs' weather example and our existing `_send_prompt`: + +```python +@work(thread=True) +def _do_some_short_blocking_call(self, arg): + _chat_debug(f"_do_some_short_blocking_call: started arg={arg}") + try: + result = some_blocking_io(arg) # e.g. HTTP POST + # On success, surface result via the SAME RenderMessage path: + self.post_message(self.RenderMessage(Text(f"Done: {result}"))) + except Exception as exc: + _chat_debug(f"_do_some_short_blocking_call: error: {exc}") + self._post_error_renderable(f"Failed: {exc}") +``` + +The worker MUST exit. No infinite loops. If you need a long-lived loop, use a raw `threading.Thread(daemon=True)`. + +### 7.4 Upgrade Textual or Python + +When bumping Textual or Python versions: + +1. Run `make chat DEBUG=1` and verify a healthy log (per §6.3). +2. **Optionally** try lifting one of the forbidden patterns from §5 (e.g. add a second Message subclass). If it works in the new versions, document the new minimum versions here and relax the corresponding rule. + +The hope is that future Textual/Python releases fix whatever quirk we hit. The forbidden patterns are not desirable per se — they're scar tissue from this version pair. + +--- + +## 8. Why the original chat-mode design didn't survive contact + +For historical context, the original plan ([.project/chat-mode-plan.md](./chat-mode-plan.md)) called for: + +- A `ChatEventLoop` that wraps `SseClient` (kept ✅). +- Multi-message types for state vs render (dropped ❌ — see §5.1). +- A `_watch_chat_state` asyncio task that polls a thread-safe queue (dropped ❌ — multiple `asyncio.create_task` + `asyncio.to_thread` blocking calls inside `on_mount` were among the early freeze patterns; the resulting design uses no `asyncio.create_task` at all). +- Input enable/disable on idle/busy (dropped ❌ — see §5.4). + +The current design preserves the **outcome** the original plan was after (interactive chat over `opencode serve`) but takes a less ambitious path through Textual to stay within what actually works. + +--- + +## 9. Operational checklist when this code breaks again + +When (not if) `make chat` mysteriously goes black: + +1. Re-run with `make chat DEBUG=1` and grab the newest `tmp/chat-debug-*.log`. +2. Look for `_heartbeat: tick #N` lines. + - **None:** main loop is dead. Recent change probably introduced a forbidden pattern from §5. + - **Present but no `_on_render_message`:** message dispatch is dead. Check Message subclasses, `@on(...)` handlers, recent renames. + - **Both present, model output still missing:** look at the consumer thread side (`_consumer_worker` and `_render_and_log` lines) and the SseClient — the issue is upstream of the TUI. +3. `git log --oneline` since the last known-good run; bisect the diff against the rules in §5. +4. If bisection produces a new "this should work per docs but doesn't" pattern, **update §5** with the new finding (and the matching log evidence) before merging the fix. + +--- + +## 10. Related files + +| File | Role | +|------|------| +| `tools/run-agent.py` | Houses `_ChatApp`, `TextualConsoleProxy`, `_run_chat_mode`, debug logging helpers (`_chat_debug`, `_setup_chat_debug`, `_close_chat_debug`). The `_ChatApp` class docstring summarises the rules. | +| `tools/events/chat_loop.py` | `ChatEventLoop` — owns the SSE consumer daemon thread and the `send_prompt` HTTP helper. Used by chat mode AND by other (potentially) interactive code paths. Has an optional `debug` callback. | +| `tools/events/sse_client.py`, `state_tracker.py`, `emitters.py` | Reused from non-interactive phase mode. Not chat-specific. | +| `tests/test_chat_mode.py` | Unit tests for `ChatEventLoop`, `TextualConsoleProxy`, `_ChatApp._render_and_log` parity, and the `_run_chat_mode` transcript-file lifecycle. Pure Python (no Textual app instance); won't catch the freezes documented here, but does catch parity regressions vs phase mode. | +| `Makefile` (`chat:` target) | Entry point. Accepts `DEBUG=1` to forward `--debug` (which enables the diagnostic log file and the raw-event mirror to it). | +| `.project/chat-mode-plan.md` | Original design plan (pre-bisection). Kept for historical context; this postmortem supersedes it on architecture details. | +| `.project/chat-mode-textual-postmortem.md` | This document. | +| `tmp/chat-debug-*.log` | Per-run diagnostic logs (only when `--debug`). The bisection logs (May 22, 2026) are still around and are the evidence base for §3 and §5. | +| `tmp/last-chat--pid.jsonl` | Per-run transcript jsonl, ALWAYS written. One JSON line per SSE event. Mirrors phase mode's `tmp/last-phase-...jsonl`. The transcript path is printed after the chat session ends. | + +--- + +## 11. Changelog + +### 2026-05-23 — Mouse selection: terminal-native via Ctrl+S (Option 1) + +- **Removed: `_SelectableRichLog` subclass.** Our manual `style._meta["offset"]` annotation pipeline did not produce a usable selection experience. Deep analysis (documented in [§12](#12-rationale-richlog-mouse-selection-is-not-supported-upstream)) shows RichLog lacks **all four** pieces needed for in-app selection — offset metadata, selection-style rendering inside `render_line`, cache invalidation on `selection_updated`, and `get_selection` text extraction — and reimplementing all four would mean either rendering each renderable twice (once for display, once for plain-text extraction) or replacing RichLog with `Log` (which loses all Rich markup, panels, and colors). Both options were rejected as poor value for the risk. +- **Added: `Ctrl+S` action `action_toggle_mouse_for_select`.** Toggles `App._driver._disable_mouse_support()` / `_enable_mouse_support()` (the canonical driver-level API used by Textual at startup/shutdown). When ON, Textual stops sending mouse-tracking escape sequences and the terminal emulator's native click-drag selection takes over, with Cmd+C / Ctrl+Shift+C copying to the system clipboard. A `[SEL]` indicator appears in the modeline. Status hint with current mode is written to the RichLog on each toggle. +- **Added: startup tip.** `on_mount` now writes a dim italic line after the banner explaining the Option/Alt-drag (no-toggle) and Ctrl+S (toggle) selection paths. +- **Removed imports:** `rich.segment.Segment`, `rich.style.Style`, `textual.strip.Strip` — no longer used. +- **CSS update:** `_SelectableRichLog { ... }` rule renamed to `RichLog { ... }`. `compose()` yields stock `RichLog` and `query_one(RichLog)` replaces the subclass query. +- **Files:** `tools/run-agent.py` (`_ChatApp` BINDINGS adds `ctrl+s`, `__init__` adds `_terminal_select_mode`, new `action_toggle_mouse_for_select`, `_heartbeat` adds `[SEL]` modeline tag, `compose` reverts to stock RichLog, startup hint), `.project/chat-mode-textual-postmortem.md` (new §12 + this entry). +- **Tests:** 268 pass unchanged. `make tests` quality gate clean. + +### 2026-05-22 — RichLog suppression, modeline, dedup, sync cleanup + +- **Changed: `render_message_updated`** now suppresses in-progress messages. Only "complete" messages (those with a `summary`, `finish` reason, or non-zero tokens) produce output in the RichLog. This eliminates the flood of `> User` / `> Assistant (processing...)` lines that used to appear on every lifecycle event from the SSE stream. +- **Added: bottom-bar modeline.** A `Static` widget is passed as the leftmost child of `Footer` and updated by `_heartbeat` (1 Hz, main thread) from `_modeline_info` (atomically refreshed by `_render_and_log` on every `message.updated` event from the consumer thread). Displays `● | provider/model | ↑in ↓out | $cost` with a pulsing activity indicator that alternates `●` / `◌` each heartbeat tick. No new timers, no new handlers, no bg-thread UI writes — stays within the proven safe envelope. +- **Fixed: composite-key dedup in `_consumer_worker`.** SSE-stream-level `message.updated` duplicates (same message ID, same token-state) are now suppressed via a `(msg_id, has_input)` composite key in `_seen_message_ids`. The transition from "no tokens" to "has tokens" is allowed through so the final token-summary line renders. Plain message IDs are also stored for sync-path dedup. +- **Fixed: `_sync_session_messages` removed from idle path.** Previously called on every `session.idle` / `session.status:idle`, causing a bulk re-fetch of all session messages and emitting duplicate `message.updated` events for every message. The method is retained for future reconnect-catch-up use; for normal operation the SSE stream itself carries all events. +- **Updated: `_trigger_recovery_sync` docstring** notes that sync-after-reconnect is a TODO. +- **Files:** `tools/run-agent.py` (`_ChatApp` modeline, `_update_modeline_info`, `_heartbeat`, `render_message_updated` suppression), `tools/events/chat_loop.py` (composite-key dedup, idle-sync removal). + +### 2026-05-22 — Message deduplication + enriched render + +- **Fixed: `message.updated` deduplication.** `_consumer_worker` now tracks message IDs from the raw SSE stream in `_seen_message_ids`, so `_sync_session_messages` on idle no longer re-emits duplicate `message.updated` events for messages already seen and rendered. This eliminates the flood of duplicate `> User`/`> Assistant` lines that used to appear on every idle cycle (previously up to 7 duplicates per user message). +- **Improved: `render_message_updated`** (both chat and phase modes): + - Uses `info.role` (not `info.agent`) to determine the label: `> User` for user messages, `> Assistant` for assistant messages. + - User messages are rendered dim (no model spam — just `> User`). + - Assistant messages with tokens populated (complete) show the model and token/cost summary: `> Assistant · provider/model (↑444 ↓57, R28, cache read 24448, $0.0123)` (bold blue). + - Assistant messages without tokens (in-progress) render as `> Assistant (processing...)` (dim). + - Handles both SSE-stream shape (`event.properties.info`) and sync-synthesized shape (`event.info`). + - Cost is shown only when non-zero. +- **Files:** `tools/events/chat_loop.py` (5 lines), `tools/run-agent.py` (`render_message_updated` rewrite). + +### 2026-05-22 — Parity pass + +- **Added: transcript jsonl.** `_run_chat_mode` now opens `tmp/last-chat--pid.jsonl` before starting the TUI and closes it in `finally`. Every SSE event seen by `_render_and_log` is persisted as a JSON line. The file handle is passed into `_ChatApp(transcript_fp=...)`. +- **Added: end-of-session summary.** After `app.run()` returns, the restored terminal prints a green `Chat session ended` rule plus the `transcript: tmp/last-chat-...jsonl` path. Mirrors phase mode's success summary. +- **Added: initial-prompt echo.** Before spawning the `_send_initial_prompt` worker, `on_mount` writes `User: ` to the RichLog so the user can see what they sent. +- **Added: `--debug` raw-event mirror.** With `--debug`, `_render_and_log` now writes `_render_and_log: raw event: ` to the chat-debug log file (rather than stderr, which Textual owns). Phase mode mirrors to stderr; chat mode routes to the same per-run diagnostic file the heartbeat already writes to. +- **Removed: `[idle]` / `[busy]` chat-only state markers.** `_render_and_log` no longer posts these. The non-chat `render_session_status` renderer (already invoked via `render_event`) prints `session status: busy/idle` through the normal proxy path, which is the same signal phase mode emits. This achieves full event parity between chat and phase modes. +- **Tests:** added `TestChatRenderAndLogParity` (7 cases covering transcript writes, OSError swallowing, reasoning suppression, debug-mode mirror, no state-marker emission) and `TestChatTranscriptPath` (1 case verifying the `tmp/last-chat--pid.jsonl` filename pattern is opened and closed by `_run_chat_mode`). Suite size: 23 → 31 chat tests (260 → 268 project total). +- **Docstring of `_ChatApp` updated.** Reflects the parity changes and the new transcript responsibility. + +### 2026-05-22 — Initial working build (post-bisection) + +- Established the working architecture documented in §4. +- Documented the failure-mode bisection (§3) and forbidden patterns (§5). +- Added the heartbeat canary + `--debug` chat-debug log file. + +--- + +## 12. Rationale: RichLog mouse selection is not supported upstream + +### 12.1 What Textual requires for a widget to be selectable + +In-app text selection in Textual is implemented at the `Screen` level (`screen.py` lines 1820-1916) but only works when a widget provides **all four** of the following cooperating pieces: + +| Piece | What the widget must do | Why | +|---|---|---| +| **A. Offset metadata** | Each segment in the strip returned by `render_line(y)` must carry `style._meta["offset"] = (char_x, content_y)` | The compositor's `get_widget_and_offset_at(x, y)` (`_compositor.py:944-967`) reads this to translate mouse pixel coordinates into per-character content offsets. Without it, the compositor returns `Offset(0, y)` and selection ranges collapse. | +| **B. Selection rendering** | `render_line` must read `self.text_selection`, fetch `screen--selection` component style, and stylize the selected range with the highlight background | Without this, the user sees no visual change when dragging — the selection exists in `screen.selections` but is invisible. | +| **C. Cache invalidation** | `selection_updated(selection)` must clear the per-line render cache and call `refresh()` | Without this, the freshly-rendered line is taken from cache (which lacks the selection style) on the next paint. | +| **D. Selection extraction** | `get_selection(selection)` must return the plain text under the selection as `(text, "\n")` | `Screen.action_copy_text` iterates `screen.selections` and calls each widget's `get_selection`. Without it, copy-to-clipboard returns nothing. | + +The reference implementation is `textual.widgets.Log` (`_log.py:265-362`). `Log` stores plain `str` lines, so it can cheaply build a Rich `Text` per render with selection style applied via `Text.stylize(selection_style, start, end)`, then apply `Strip.apply_offsets(scroll_x, content_y)` — the canonical Textual API for piece A. + +### 12.2 What `RichLog` provides (out of the box) + +`textual.widgets.RichLog` (`_rich_log.py`) provides **none of the four pieces**. Its `write()` calls `console.render(renderable)` and stores the resulting `Strip` list, with no plain-text representation kept anywhere. Its `render_line` just slices and styles via `apply_style(self.rich_style)`. Selection support has never been added upstream. + +### 12.3 What we tried first (and why it didn't work) + +The `_SelectableRichLog` subclass attempted to implement only piece A by mutating `self.lines[content_y]` to inject per-segment offset metadata before calling `super().render_line(y)`. This was insufficient because: + +1. **Pieces B/C/D were absent.** Even if piece A worked perfectly, the user would see no highlight on drag and nothing would copy to clipboard. From the user's perspective, this is indistinguishable from "selection is broken." +2. **`RichLog._line_cache` is content-blind.** It is keyed on `(y, scroll_x, width, widest)` — if a strip was cached before annotation, the cached (unannotated) version is returned on subsequent paints; our `render_line` override would have had to also invalidate the line cache on every mutation. +3. **`char_offset = len(segment.text)` is correct for ASCII but drifts for wide CJK/emoji** because the compositor mixes `cell_length` (for boundary checks) with `len(text)` (for offsets). Stock `Strip.apply_offsets` has the same limitation, so this is more of an upstream caveat than a bug, but it's worth noting. + +### 12.4 Why we did not fully implement A+B+C+D + +A complete in-app selection implementation would require: + +- Storing each renderable's plain-text representation alongside its strips (involves a second `console.render(...)` pass per renderable with a styles-stripped console, or maintaining a parallel `list[str]` synchronised with `self.lines`). +- A custom `render_line` / `_render_line` pair that consults `text_selection`, gets the `screen--selection` style, restyles segments in the selected range, and calls `Strip.apply_offsets(scroll_x, content_y)`. +- A custom `selection_updated` that clears `_line_cache` and `self._render_line_cache`. +- A custom `get_selection` that extracts from the plain-text store. + +That is ~150 lines of new code added inside the very widget the compositor calls on every paint. The bisection postmortem (§5) repeatedly shows that seemingly innocuous additions to this code path silently freeze Textual's main loop on this Textual 8.2.6 + Python 3.14.5 combo. The risk-to-value ratio is poor. + +### 12.5 The terminal-native escape hatch (chosen path) + +Every modern terminal emulator (iTerm2, macOS Terminal, gnome-terminal, Alacritty, kitty, Windows Terminal, …) provides mouse-driven text selection over its display buffer, with system-clipboard integration. Textual normally captures mouse events, which prevents the terminal from seeing the drag. There are two well-known escape hatches: + +1. **Modifier-key bypass.** Hold Option/Alt on macOS terminals (or Shift on most Linux/Windows terminals) while dragging. The terminal sees the drag as "not for the application" and performs its native selection. This works without any application support. +2. **Driver-level mouse toggle.** Textual's driver has `_enable_mouse_support()` / `_disable_mouse_support()` (`drivers/linux_driver.py:121, 169`), which write `\x1b[?1000h`/`\x1b[?1000l` and friends to enable/disable the terminal's mouse-reporting modes. When disabled, mouse events are no longer intercepted by Textual, and the terminal performs native selection. + +We use **(2)** behind the `Ctrl+S` action and **document (1)** in the startup tip so users have both paths available. A `[SEL]` indicator appears in the modeline when terminal-select mode is on, and toggling either way prints a status hint into the RichLog. + +### 12.6 Limitations of the chosen approach + +- **No in-app visual feedback.** When `Ctrl+S` is on, the terminal draws the selection; the Textual app is unaware. This is by design. +- **Mouse-driven Textual interactions are disabled while `[SEL]` is on.** Scrolling via mouse wheel, clicking the input, etc. require toggling back. Keyboard interactions remain unaffected. +- **Selection persists across redraws.** Because the terminal owns the selection, new output from `_render_and_log` may scroll the buffer and visually disrupt the selection. The user is expected to copy, then toggle back. + +These trade-offs are acceptable: terminal-native selection is the standard pattern for every other interactive CLI (`less`, `vim`, `tmux` copy-mode bypass, etc.), and users already know how their terminal handles it. + +### 12.7 If future Textual adds RichLog selection support + +If a future Textual version implements pieces A-D on `RichLog` itself, the canonical path is to drop the `Ctrl+S` action, remove the startup tip, and let upstream selection work. The forbidden-patterns list in §5 still applies regardless. diff --git a/Makefile b/Makefile index 1fb3f3d..ed821eb 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,11 @@ export PATH := $(CURDIR)/.venv/bin:$(PATH) export PROMPT_EXTRA export PROMPT_EXTRA_FILE +CHAT ?= 0 +ifeq ($(CHAT),1) +WRAPPER_ARGS += --chat +endif + # Pass --thinking to raw opencode run when CODECOME_THINKING=1 OPENCODE_THINKING_FLAG := $(if $(filter 1,$(CODECOME_THINKING)),--thinking,) @@ -137,7 +142,7 @@ phase-1: venv-check @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ opencode run --agent recon $(OPENCODE_THINKING_FLAG) "$$(cat prompts/phase-1-recon.md)"; \ else \ - $(PYTHON) tools/run-agent.py --phase 1 --label "Target Reconnaissance + Sandbox Bootstrap" --agent recon --prompt-file prompts/phase-1-recon.md; \ + $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 1 --label "Target Reconnaissance + Sandbox Bootstrap" --agent recon --prompt-file prompts/phase-1-recon.md; \ fi phase-2: venv-check @@ -150,7 +155,7 @@ phase-2: venv-check @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ opencode run --agent auditor $(OPENCODE_THINKING_FLAG) "$$(cat prompts/phase-2-audit.md)"; \ else \ - $(PYTHON) tools/run-agent.py --phase 2 --label "Hypothesis Generation" --agent auditor --prompt-file prompts/phase-2-audit.md; \ + $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 2 --label "Hypothesis Generation" --agent auditor --prompt-file prompts/phase-2-audit.md; \ fi phase-3: venv-check @@ -158,7 +163,7 @@ phase-3: venv-check @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ opencode run --agent reviewer $(OPENCODE_THINKING_FLAG) "$$(cat prompts/phase-3-review.md)"; \ else \ - $(PYTHON) tools/run-agent.py --phase 3 --label "Counter-analysis" --agent reviewer --prompt-file prompts/phase-3-review.md; \ + $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 3 --label "Counter-analysis" --agent reviewer --prompt-file prompts/phase-3-review.md; \ fi phase-4: venv-check @@ -167,7 +172,7 @@ phase-4: venv-check @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ opencode run --agent validator $(OPENCODE_THINKING_FLAG) "$$(sed 's#FINDING_PATH_OR_ID#$(FINDING)#g' prompts/phase-4-validate.md)"; \ else \ - $(PYTHON) tools/run-agent.py --phase 4 --label "Validation" --agent validator --prompt-file prompts/phase-4-validate.md --finding "$(FINDING)"; \ + $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 4 --label "Validation" --agent validator --prompt-file prompts/phase-4-validate.md --finding "$(FINDING)"; \ fi phase-5: venv-check @@ -176,7 +181,7 @@ phase-5: venv-check @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ opencode run --agent exploiter $(OPENCODE_THINKING_FLAG) "$$(sed 's#FINDING_PATH_OR_ID#$(FINDING)#g' prompts/phase-5-exploit.md)"; \ else \ - $(PYTHON) tools/run-agent.py --phase 5 --label "Exploit Development" --agent exploiter --prompt-file prompts/phase-5-exploit.md --finding "$(FINDING)"; \ + $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 5 --label "Exploit Development" --agent exploiter --prompt-file prompts/phase-5-exploit.md --finding "$(FINDING)"; \ fi phase-6: venv-check @@ -184,9 +189,12 @@ phase-6: venv-check @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ opencode run --agent reporter $(OPENCODE_THINKING_FLAG) "$$(cat prompts/phase-6-report.md)"; \ else \ - $(PYTHON) tools/run-agent.py --phase 6 --label "Reporting" --agent reporter --prompt-file prompts/phase-6-report.md; \ + $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 6 --label "Reporting" --agent reporter --prompt-file prompts/phase-6-report.md; \ fi +chat: venv-check + @$(PYTHON) tools/run-agent.py --chat --label "Interactive Chat" --agent $(or $(AGENT),chat) --prompt-file prompts/chat-initial.md $(if $(DEBUG),--debug,) + list-risk-files: venv-check @$(PYTHON) tools/list-risk-files.py diff --git a/prompts/chat-initial.md b/prompts/chat-initial.md new file mode 100644 index 0000000..070ab30 --- /dev/null +++ b/prompts/chat-initial.md @@ -0,0 +1,23 @@ +# Chat Mode: Initial Prompt + +You are starting an interactive chat session with the user. + +## Startup instructions + +1. Read `codecome.yml` to learn the project name and configuration. +2. List the contents of `itemdb/findings/` (all status subdirectories) to get a quick overview of the current finding statuses. +3. Respond with a brief greeting that includes: + - The project name (from `codecome.yml`). + - A one-line summary of finding counts by status (e.g., "2 PENDING, 1 CONFIRMED, 1 EXPLOITED, 1 REJECTED"). + - A note that you're ready for instructions. + +## What NOT to do at startup + +- Do NOT read `AGENTS.md`, reconnaissance notes (`itemdb/notes/*`), skills, templates, or source code. +- Do NOT perform reconnaissance or any analysis. +- Do NOT read large files or directory trees. +- Keep the startup response under 5 lines. + +## After startup + +Wait for the user's instructions. Read files only when the user asks or when a specific task requires them. If a task needs context from reconnaissance notes, skills, or source code, read those on demand. diff --git a/requirements.txt b/requirements.txt index e0e8a1d..f2f9da9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ PyYAML>=6.0.2 rich>=13.7.1 +textual>=0.80.0 pytest>=8.3.0 pytest-cov>=5.0.0 diff --git a/tests/test_chat_mode.py b/tests/test_chat_mode.py new file mode 100644 index 0000000..3de0e3b --- /dev/null +++ b/tests/test_chat_mode.py @@ -0,0 +1,699 @@ +from __future__ import annotations + +import json +import queue +import sys +import threading +import time +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest + +from conftest import ROOT + + +def load_chat_loop(): + sys_path = str(ROOT / "tools") + if sys_path not in sys.path: + sys.path.insert(0, sys_path) + from events.chat_loop import ChatEventLoop, ChatState + return ChatEventLoop, ChatState + + +def load_events(): + sys_path = str(ROOT / "tools") + if sys_path not in sys.path: + sys.path.insert(0, sys_path) + from events.sse_client import SseClient, SseClientError + return SseClient, SseClientError + + +# --------------------------------------------------------------------------- +# ChatState constants +# --------------------------------------------------------------------------- + +class TestChatState: + def test_state_values(self): + _, ChatState = load_chat_loop() + assert ChatState.IDLE == "idle" + assert ChatState.BUSY == "busy" + assert ChatState.ERROR == "error" + assert ChatState.STOPPED == "stopped" + + +# --------------------------------------------------------------------------- +# ChatEventLoop unit tests +# --------------------------------------------------------------------------- + +class TestChatEventLoop: + """Unit tests for events.chat_loop.ChatEventLoop.""" + + @pytest.fixture + def chat_loop(self): + ChatEventLoop, _ = load_chat_loop() + return ChatEventLoop( + base_url="http://localhost:8080", + session_id="sess-1", + console=None, + auth_token="test-token", + workspace_dir="/workspace", + ) + + def test_init_stores_fields(self, chat_loop): + assert chat_loop.base_url == "http://localhost:8080" + assert chat_loop.session_id == "sess-1" + assert chat_loop.auth_token == "test-token" + assert chat_loop.workspace_dir == "/workspace" + + def test_get_headers_with_auth(self, chat_loop): + headers = chat_loop._get_headers() + assert headers["Content-Type"] == "application/json" + assert "Authorization" in headers + assert "x-opencode-directory" in headers + + def test_get_headers_without_auth(self): + ChatEventLoop, _ = load_chat_loop() + loop = ChatEventLoop( + base_url="http://localhost:8080", + session_id="sess-1", + console=None, + ) + headers = loop._get_headers() + assert "Authorization" not in headers + + def test_belongs_to_session_matching(self, chat_loop): + assert chat_loop._belongs_to_session({"properties": {"sessionID": "sess-1"}}) + assert not chat_loop._belongs_to_session({"properties": {"sessionID": "other"}}) + assert chat_loop._belongs_to_session({"type": "server.heartbeat"}) + + def test_is_session_idle_deprecated(self, chat_loop): + assert chat_loop._is_session_idle({"type": "session.idle", "properties": {"sessionID": "sess-1"}}) + assert not chat_loop._is_session_idle({"type": "server.heartbeat"}) + + def test_is_session_idle_canonical(self, chat_loop): + assert chat_loop._is_session_idle({ + "type": "session.status", + "properties": {"sessionID": "sess-1", "status": {"type": "idle"}}, + }) + assert not chat_loop._is_session_idle({ + "type": "session.status", + "properties": {"sessionID": "sess-1", "status": {"type": "busy"}}, + }) + + def test_is_session_busy(self, chat_loop): + assert chat_loop._is_session_busy({ + "type": "session.status", + "properties": {"sessionID": "sess-1", "status": {"type": "busy"}}, + }) + assert not chat_loop._is_session_busy({ + "type": "session.status", + "properties": {"sessionID": "sess-1", "status": {"type": "idle"}}, + }) + + def test_stop_signals_stopped(self, chat_loop): + """stop() should put a STOPPED signal in the queue.""" + chat_loop.stop() + state, detail = chat_loop.get_state(timeout=2.0) + _, ChatState = load_chat_loop() + assert state == ChatState.STOPPED + + +class TestChatEventLoopWithFakeSse: + """ChatEventLoop tests with a fake SSE client.""" + + @pytest.fixture + def chat_loop_objects(self): + ChatEventLoop, ChatState = load_chat_loop() + SseClient, SseClientError = load_events() + return ChatEventLoop, ChatState, SseClient + + def test_single_turn_idle_signal(self, chat_loop_objects, monkeypatch): + """One prompt → SSE events → idle → TUI receives IDLE signal.""" + ChatEventLoop, ChatState, SseClient = chat_loop_objects + + emitted: list[dict] = [] + + def fake_render(console, phase, label, event): + emitted.append(event) + + class FakeSseClient: + def __init__(self, *a, **kw): + pass + def events(self): + return iter([ + {"type": "server.connected"}, + {"type": "message.part.updated", "properties": {"sessionID": "sess-1", "part": {"id": "p1", "type": "step-start"}}}, + {"type": "message.part.delta", "properties": {"sessionID": "sess-1", "partID": "p2", "field": "text", "delta": "Hello"}}, + {"type": "message.part.updated", "properties": {"sessionID": "sess-1", "part": {"id": "p2", "type": "text", "time": {"start": 0, "end": 1}}}}, + {"type": "session.idle", "properties": {"sessionID": "sess-1"}}, + ]) + def stop(self): + pass + + import events.chat_loop as _chat_mod + orig = _chat_mod.SseClient + _chat_mod.SseClient = FakeSseClient # type: ignore[misc] + try: + loop = ChatEventLoop("http://localhost:8080", "sess-1", None) + loop.start_consumer(fake_render) + state, detail = loop.get_state(timeout=5.0) + finally: + _chat_mod.SseClient = orig + + assert state == ChatState.IDLE + types = [e["type"] for e in emitted] + assert "server.connected" in types + assert "step_start" in types + assert "text" in types + + def test_multi_turn_cycle(self, chat_loop_objects, monkeypatch): + """Prompt → idle → prompt → idle → stop.""" + ChatEventLoop, ChatState, SseClient = chat_loop_objects + + emitted: list[dict] = [] + turn_count = [0] + idle_count = [0] + + def fake_render(console, phase, label, event): + emitted.append(event) + + class FakeSseClient: + def __init__(self, *a, **kw): + pass + def events(self): + # Yield events for two turns, then block + turn_count[0] += 1 + yield {"type": "message.part.updated", "properties": {"sessionID": "sess-1", "part": {"id": f"p{turn_count[0]}", "type": "text", "time": {"start": 0, "end": 1}}}} + idle_count[0] += 1 + yield {"type": "session.idle", "properties": {"sessionID": "sess-1"}} + # Yield second turn + turn_count[0] += 1 + yield {"type": "message.part.updated", "properties": {"sessionID": "sess-1", "part": {"id": f"p{turn_count[0]}", "type": "text", "time": {"start": 0, "end": 1}}}} + idle_count[0] += 1 + yield {"type": "session.idle", "properties": {"sessionID": "sess-1"}} + # After two idles, block until stop + import time + while True: + time.sleep(0.1) + def stop(self): + pass + + import events.chat_loop as _chat_mod + orig = _chat_mod.SseClient + _chat_mod.SseClient = FakeSseClient # type: ignore[misc] + try: + loop = ChatEventLoop("http://localhost:8080", "sess-1", None) + loop.start_consumer(fake_render) + + # First idle + state1, _ = loop.get_state(timeout=5.0) + assert state1 == ChatState.IDLE + + # Second idle + state2, _ = loop.get_state(timeout=5.0) + assert state2 == ChatState.IDLE + + loop.stop() + finally: + _chat_mod.SseClient = orig + + def test_permission_auto_rejected(self, chat_loop_objects, monkeypatch): + """Permission asked → auto-rejected → idle.""" + ChatEventLoop, ChatState, SseClient = chat_loop_objects + + captured_perms: list[tuple] = [] + + def fake_render(console, phase, label, event): + pass + + class FakeSseClient: + def __init__(self, *a, **kw): + pass + def events(self): + return iter([ + {"type": "permission.asked", "properties": {"sessionID": "sess-1", "id": "perm-1", "tool": "bash"}}, + {"type": "session.idle", "properties": {"sessionID": "sess-1"}}, + ]) + def stop(self): + pass + + def fake_urlopen(req, **kw): + if req.full_url.endswith("/permission/perm-1/reply"): + captured_perms.append((req.full_url, req.data)) + return type("R", (), {"read": lambda: b"{}", "__enter__": lambda s: s, "__exit__": lambda *a: None})() + + import events.chat_loop as _chat_mod + orig_sse = _chat_mod.SseClient + _chat_mod.SseClient = FakeSseClient # type: ignore[misc] + monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen) + try: + loop = ChatEventLoop("http://localhost:8080", "sess-1", None) + loop.start_consumer(fake_render) + state, _ = loop.get_state(timeout=5.0) + finally: + _chat_mod.SseClient = orig_sse + + assert state == ChatState.IDLE + assert len(captured_perms) == 1 + assert "permission/perm-1/reply" in captured_perms[0][0] + assert json.loads(captured_perms[0][1]) == {"reply": "reject", "message": "Auto-rejected by CodeCome configuration"} + + def test_stop_during_busy(self, chat_loop_objects): + """Stop signal while consumer is running.""" + ChatEventLoop, ChatState, SseClient = chat_loop_objects + + stop_event = threading.Event() + + class FakeSseClient: + def __init__(self, *a, **kw): + pass + def events(self): + # Block until stop is called + stop_event.wait(timeout=10.0) + return iter([]) + def stop(self): + stop_event.set() + + import events.chat_loop as _chat_mod + orig = _chat_mod.SseClient + _chat_mod.SseClient = FakeSseClient # type: ignore[misc] + try: + loop = ChatEventLoop("http://localhost:8080", "sess-1", None) + loop.start_consumer(lambda c, p, l, e: None) + + # Give consumer time to start + time.sleep(0.1) + loop.stop() + + # Should get STOPPED signal + state, _ = loop.get_state(timeout=2.0) + assert state == ChatState.STOPPED + finally: + _chat_mod.SseClient = orig + + +# --------------------------------------------------------------------------- +# TextualConsoleProxy tests +# --------------------------------------------------------------------------- + +class TestTextualConsoleProxy: + """Unit tests for the TextualConsoleProxy class in run-agent.py.""" + + @pytest.fixture + def proxy_and_log(self): + module = _load_run_agent_module() + fake_log = MagicMock() + fake_app = MagicMock() + proxy = module.TextualConsoleProxy(fake_log, fake_app) + return proxy, fake_log, fake_app + + def test_single_arg_writes_directly_on_main_thread(self, proxy_and_log): + proxy, fake_log, fake_app = proxy_and_log + from rich.text import Text + proxy.print(Text("hello")) + fake_log.write.assert_called_once() + assert fake_log.write.call_args[0][0].plain == "hello" + + def test_no_args_writes_empty_line_on_main_thread(self, proxy_and_log): + proxy, fake_log, fake_app = proxy_and_log + proxy.print() + fake_log.write.assert_called_once() + + def test_multi_args_wraps_in_group_on_main_thread(self, proxy_and_log): + proxy, fake_log, fake_app = proxy_and_log + from rich.text import Text + proxy.print(Text("a"), Text("b")) + fake_log.write.assert_called_once() + from rich.console import Group + assert isinstance(fake_log.write.call_args[0][0], Group) + + def test_bg_thread_posts_render_message(self): + """Background thread calls must post a RenderMessage(renderable) + via post_message, not write to rich_log directly (per Textual docs: + post_message is thread-safe).""" + module = _load_run_agent_module() + fake_log = MagicMock() + fake_render_msg_cls = MagicMock() + fake_app = MagicMock() + fake_app.RenderMessage = fake_render_msg_cls + proxy = module.TextualConsoleProxy(fake_log, fake_app) + + from rich.text import Text + error_holder = [None] + + def bg_call(): + try: + proxy._write(Text("from_bg")) + except Exception as e: + error_holder[0] = e + + import threading + t = threading.Thread(target=bg_call, daemon=True) + t.start() + t.join(timeout=5) + + if error_holder[0]: + raise error_holder[0] + + # On bg thread, RenderMessage(renderable) is constructed and + # post_message is called. + fake_render_msg_cls.assert_called_once() + fake_app.post_message.assert_called_once() + # rich_log.write must NOT be called from a bg thread. + fake_log.write.assert_not_called() + + +# --------------------------------------------------------------------------- +# Chat argparse tests +# --------------------------------------------------------------------------- + +class TestChatArgparse: + """Tests for --chat flag parsing and validation.""" + + @pytest.fixture + def parser(self): + module = _load_run_agent_module() + return module.build_parser() + + def test_chat_flag_parsed(self, parser): + args = parser.parse_args(["--chat", "--label", "test", "--agent", "auditor"]) + assert args.chat is True + assert args.label == "test" + assert args.agent == "auditor" + + def test_chat_with_prompt(self, parser): + args = parser.parse_args(["--chat", "--label", "test", "--agent", "auditor", "--prompt", "Hello"]) + assert args.chat is True + assert args.prompt == "Hello" + + def test_chat_without_phase(self, parser): + """--chat should not require --phase.""" + args = parser.parse_args(["--chat", "--label", "test", "--agent", "auditor"]) + assert args.phase is None + + def test_chat_requires_label(self, parser): + """--chat still requires --label.""" + args = parser.parse_args(["--chat", "--agent", "auditor"]) + assert args.label is None + + def test_chat_requires_agent(self, parser): + """--chat still requires --agent.""" + args = parser.parse_args(["--chat", "--label", "test"]) + assert args.agent is None + + def test_normal_mode_requires_phase(self, parser): + """Without --chat, --phase is still required.""" + args = parser.parse_args(["--label", "test", "--agent", "auditor", "--prompt-file", "phase.md"]) + assert args.chat is False + assert args.phase is None + + +# --------------------------------------------------------------------------- +# _ChatApp._render_and_log parity tests +# +# Phase-mode's _render_and_log: +# 1. writes raw event JSON to transcript_fp +# 2. (if --debug) mirrors raw event JSON to stderr +# 3. suppresses 'reasoning' events when thinking is off +# 4. calls render_event(...) +# +# Chat-mode's _render_and_log should match (1), (3), (4) and route the +# raw-JSON mirror to the chat-debug log file instead of stderr (because +# Textual owns the TTY in chat mode). It must NOT emit chat-specific +# state markers ('[idle]' / '[busy]') any more — non-chat doesn't. +# --------------------------------------------------------------------------- + +class TestChatRenderAndLogParity: + """Tests for _ChatApp._render_and_log parity with phase mode.""" + + @pytest.fixture + def app_under_test(self): + """Construct a _ChatApp instance without running Textual. + + We only populate the fields _render_and_log actually reads + (transcript_fp, args, thinking_on) and stub render_event so + we can capture dispatcher calls. + """ + module = _load_run_agent_module() + app = module.ChatApp() + return module, app + + def _make_args(self, debug=False): + ns = MagicMock() + ns.debug = debug + return ns + + def test_writes_event_to_transcript(self, app_under_test): + """_render_and_log appends json.dumps(event) + '\\n' to transcript_fp.""" + module, app = app_under_test + from io import StringIO + sink = StringIO() + app.transcript_fp = sink + app.args = self._make_args(debug=False) + app.thinking_on = True + + with patch.object(module, "render_event", lambda *a, **kw: None): + app._render_and_log(MagicMock(), "Chat", "Test", {"type": "text", "x": 1}) + app._render_and_log(MagicMock(), "Chat", "Test", {"type": "session.status", "y": 2}) + + lines = [json.loads(line) for line in sink.getvalue().splitlines()] + assert lines == [ + {"type": "text", "x": 1}, + {"type": "session.status", "y": 2}, + ] + + def test_transcript_write_failure_is_swallowed(self, app_under_test): + """If transcript writes raise OSError, _render_and_log still + proceeds to render_event without re-raising.""" + module, app = app_under_test + bad_fp = MagicMock() + bad_fp.write.side_effect = OSError("disk full") + app.transcript_fp = bad_fp + app.args = self._make_args(debug=False) + app.thinking_on = True + + render_calls = [] + with patch.object(module, "render_event", lambda *a, **kw: render_calls.append(a)): + # Must not raise. + app._render_and_log(MagicMock(), "Chat", "Test", {"type": "text"}) + + assert len(render_calls) == 1 + + def test_no_transcript_fp_is_ok(self, app_under_test): + """When transcript_fp is None, _render_and_log skips persistence + but still renders.""" + module, app = app_under_test + app.transcript_fp = None + app.args = self._make_args(debug=False) + app.thinking_on = True + + render_calls = [] + with patch.object(module, "render_event", lambda *a, **kw: render_calls.append(a)): + app._render_and_log(MagicMock(), "Chat", "Test", {"type": "text"}) + + assert len(render_calls) == 1 + + def test_suppresses_reasoning_when_thinking_off(self, app_under_test): + """When thinking_on is False, 'reasoning' events bypass render_event + (parity with phase mode).""" + module, app = app_under_test + from io import StringIO + sink = StringIO() + app.transcript_fp = sink + app.args = self._make_args(debug=False) + app.thinking_on = False + + render_calls = [] + with patch.object(module, "render_event", lambda *a, **kw: render_calls.append(a[3].get("type"))): + app._render_and_log(MagicMock(), "Chat", "Test", {"type": "reasoning", "text": "..."}) + app._render_and_log(MagicMock(), "Chat", "Test", {"type": "text", "text": "ok"}) + + # reasoning event is NOT rendered, text event IS. + assert render_calls == ["text"] + # But BOTH events still hit the transcript. + lines = [json.loads(line) for line in sink.getvalue().splitlines()] + assert [ev["type"] for ev in lines] == ["reasoning", "text"] + + def test_renders_reasoning_when_thinking_on(self, app_under_test): + """When thinking_on is True, reasoning events ARE dispatched.""" + module, app = app_under_test + app.transcript_fp = None + app.args = self._make_args(debug=False) + app.thinking_on = True + + render_calls = [] + with patch.object(module, "render_event", lambda *a, **kw: render_calls.append(a[3].get("type"))): + app._render_and_log(MagicMock(), "Chat", "Test", {"type": "reasoning", "text": "..."}) + + assert render_calls == ["reasoning"] + + def test_does_not_post_chat_only_state_markers(self, app_under_test): + """_render_and_log must NOT post '[idle]'/'[busy]' RenderMessage + markers for session.status / session.idle events. Those were + chat-specific scar tissue; non-chat mode never emitted them. + State cues are produced by render_event -> render_session_status + which prints 'session status: busy/idle'.""" + module, app = app_under_test + app.transcript_fp = None + app.args = self._make_args(debug=False) + app.thinking_on = True + + # Spy on post_message — _render_and_log itself must NOT call it + # (only the proxy / render_event should). + post_calls = [] + with patch.object(app, "post_message", side_effect=lambda m: post_calls.append(m)): + with patch.object(module, "render_event", lambda *a, **kw: None): + app._render_and_log( + MagicMock(), + "Chat", + "Test", + {"type": "session.status", + "properties": {"status": {"type": "busy"}}}, + ) + app._render_and_log( + MagicMock(), + "Chat", + "Test", + {"type": "session.status", + "properties": {"status": {"type": "idle"}}}, + ) + app._render_and_log( + MagicMock(), + "Chat", + "Test", + {"type": "session.idle"}, + ) + + # No direct post_message calls from _render_and_log itself. + assert post_calls == [] + + def test_debug_mode_mirrors_raw_event_to_chat_debug(self, app_under_test): + """When --debug is set, the raw event JSON is mirrored to the + chat-debug log file via _chat_debug. In phase mode this goes to + stderr; chat mode routes to the chat-debug file because Textual + owns the TTY.""" + module, app = app_under_test + app.transcript_fp = None + app.args = self._make_args(debug=True) + app.thinking_on = True + + chat_debug_calls = [] + with patch.object(module, "_chat_debug", + side_effect=lambda msg: chat_debug_calls.append(msg)): + with patch.object(module, "render_event", lambda *a, **kw: None): + app._render_and_log( + MagicMock(), + "Chat", + "Test", + {"type": "text", "x": 42}, + ) + + # The raw-event mirror message should include the JSON payload. + assert any('"x": 42' in m for m in chat_debug_calls), chat_debug_calls + + +# --------------------------------------------------------------------------- +# _run_chat_mode transcript path tests +# --------------------------------------------------------------------------- + +class TestChatTranscriptPath: + """Tests for the transcript-file path naming used by chat mode.""" + + def test_transcript_path_pattern(self, tmp_path, monkeypatch): + """_run_chat_mode opens a transcript file under tmp/ with the + pattern last-chat--pid.jsonl.""" + module = _load_run_agent_module() + + # Sandbox the ROOT/tmp directory by redirecting ROOT in the + # module. We use monkeypatch to swap module.ROOT for tmp_path + # so the transcript lands inside our pytest tmp_path. + monkeypatch.setattr(module, "ROOT", tmp_path) + + # Stub everything _run_chat_mode would otherwise call so we + # exercise ONLY the transcript-path setup and the final summary. + monkeypatch.setattr(module, "check_opencode_version", lambda: None) + monkeypatch.setattr(module, "resolve_color_mode", lambda v: "auto") + monkeypatch.setattr(module, "build_console", lambda v: MagicMock()) + monkeypatch.setattr( + module, + "resolve_model_and_variant", + lambda agent, extra: ("opencode/test", None, "stub", "stub"), + ) + monkeypatch.setattr( + module, "_resolve_thinking_decision", lambda m, e: (False, "stub") + ) + + # Server / session creation: stub to return fake objects. + fake_server = MagicMock() + fake_server.base_url = "http://127.0.0.1:1" + fake_server.password = "tok" + fake_runner = MagicMock() + fake_runner.start.return_value = fake_server + monkeypatch.setattr(module, "ServerRunner", lambda: fake_runner) + monkeypatch.setattr(module, "_create_chat_session", + lambda *a, **kw: "ses_abc") + + # The Textual app's run() is a no-op for this test (we just + # care about the transcript file lifecycle). + fake_app = MagicMock() + fake_app.chat_loop = None + fake_app_cls = MagicMock(return_value=fake_app) + monkeypatch.setattr(module, "ChatApp", fake_app_cls) + + # Argparse namespace. + ns = MagicMock() + ns.label = "Test" + ns.agent = "auditor" + ns.prompt_file = None + ns.prompt = "hi" + ns.finding = None + ns.phase = None + ns.color = "auto" + ns.debug = False + + parser = MagicMock() + # parser.error would sys.exit; we never trigger it because + # label & agent are set. + + rc = module._run_chat_mode(parser, ns) + assert rc == 0 + + # Exactly one transcript jsonl was created under tmp/. + transcripts = sorted((tmp_path / "tmp").glob("last-chat-*.jsonl")) + assert len(transcripts) == 1, transcripts + name = transcripts[0].name + # Name pattern: last-chat-YYYYMMDD-HHMMSS-pid.jsonl + import re + assert re.match( + r"^last-chat-\d{8}-\d{6}-pid\d+\.jsonl$", name + ), f"unexpected transcript filename: {name}" + + # transcript_fp was passed into ChatApp(...) + kwargs = fake_app_cls.call_args.kwargs + assert "transcript_fp" in kwargs + assert kwargs["transcript_fp"] is not None + # And it's now closed (closed by _run_chat_mode's finally). + assert kwargs["transcript_fp"].closed is True + + +# --------------------------------------------------------------------------- +# Helper +# --------------------------------------------------------------------------- + +def _load_run_agent_module(): + module_name = "run_agent_chat_tests" + module_path = ROOT / "tools" / "run-agent.py" + import importlib.util + spec = importlib.util.spec_from_file_location(module_name, module_path) + if spec is None or spec.loader is None: + raise RuntimeError(f"Cannot load module from {module_path}") + module = importlib.util.module_from_spec(spec) + # Only load if not already loaded + if module_name not in sys.modules: + sys.modules[module_name] = module + spec.loader.exec_module(module) + return sys.modules[module_name] diff --git a/tools/events/chat_loop.py b/tools/events/chat_loop.py new file mode 100644 index 0000000..650adc8 --- /dev/null +++ b/tools/events/chat_loop.py @@ -0,0 +1,392 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Multi-turn chat event loop: consumes SSE, signals idle/ready, and sends +new prompts on demand. Designed for the interactive --chat mode. + +Usage: + loop = ChatEventLoop(base_url, session_id, console, auth_token=..., workspace_dir=...) + loop.start_consumer(render_fn) + loop.send_prompt("Hello") + # ... wait for idle signal ... + loop.send_prompt("Follow-up") + loop.stop() +""" + +from __future__ import annotations + +import json +import queue +import threading +import time +import urllib.error +import urllib.request +from typing import Any, Callable + +from events.sse_client import SseClient, SseClientError +from events.state_tracker import StateTracker +from events.emitters import emit_event + + +class ChatState: + """Signals emitted by the chat consumer thread to the TUI.""" + + IDLE = "idle" + BUSY = "busy" + ERROR = "error" + STOPPED = "stopped" + + +class ChatEventLoop: + """Multi-turn event loop for interactive chat mode. + + Runs the SSE consumer in a background thread. When the session + reaches idle, it signals the TUI via a queue so the input can be + re-enabled. The TUI calls send_prompt() to submit new messages. + """ + + def __init__( + self, + base_url: str, + session_id: str, + console: Any, + *, + auth_token: str | None = None, + workspace_dir: str | None = None, + debug: Callable[[str], None] | None = None, + ) -> None: + self.base_url = base_url.rstrip("/") + self.session_id = session_id + self.console = console + self.auth_token = auth_token + self.workspace_dir = workspace_dir + self.debug = debug + + self._tracker = StateTracker() + self._client: SseClient | None = None + self._stopped = False + self._seen_message_ids: set[str] = set() + self._emitted_signatures: set[tuple[str, str]] = set() + + # Coordination with TUI + self._state_queue: queue.Queue[tuple[str, Any | None]] = queue.Queue() + self._consumer_thread: threading.Thread | None = None + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def start_consumer(self, render_fn: Callable[[Any, str, str, dict[str, Any]], None]) -> None: + """Start the SSE consumer in a background daemon thread.""" + self._consumer_thread = threading.Thread( + target=self._consumer_worker, + args=(render_fn,), + name="codecome-chat-consumer", + daemon=True, + ) + self._consumer_thread.start() + + def send_prompt( + self, + text: str, + agent: str | None = None, + model: str | None = None, + variant: str | None = None, + ) -> None: + """POST a new user prompt to the active session. + + Blocks until the HTTP request completes. The SSE consumer + thread will pick up the response events automatically. + """ + if self.debug: + self.debug(f"send_prompt: posting prompt len={len(text)}") + payload: dict[str, Any] = { + "parts": [{"type": "text", "text": text}], + } + if agent: + payload["agent"] = agent + if model and "/" in model: + provider_id, model_id = model.split("/", 1) + payload["model"] = {"providerID": provider_id, "modelID": model_id} + if variant: + payload["variant"] = variant + + url = f"{self.base_url}/session/{self.session_id}/prompt_async" + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request( + url, + data=data, + headers=self._get_headers(), + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=300) as resp: + resp.read() + if self.debug: + self.debug("send_prompt: HTTP POST completed") + except urllib.error.HTTPError as exc: + body = exc.read().decode("utf-8", errors="replace") + msg = f"HTTP {exc.code}: {body}" + if self.debug: + self.debug(f"send_prompt: HTTP error: {msg}") + self._state_queue.put((ChatState.ERROR, msg)) + + def get_state(self, timeout: float | None = None) -> tuple[str, Any | None]: + """Block until the consumer signals a state change. + + Returns (state, detail). State is one of ChatState.* + """ + return self._state_queue.get(timeout=timeout) + + def stop(self) -> None: + """Signal the consumer thread to exit and wait for it.""" + self._stopped = True + if self._client is not None: + self._client.stop() + if self._consumer_thread is not None and self._consumer_thread.is_alive(): + self._consumer_thread.join(timeout=5.0) + # Signal stopped in case the TUI is waiting + self._state_queue.put((ChatState.STOPPED, None)) + + # ------------------------------------------------------------------ + # Internal + # ------------------------------------------------------------------ + + def _get_headers(self) -> dict[str, str]: + headers = {"Content-Type": "application/json"} + if self.auth_token: + import base64 + + encoded = base64.b64encode(f"opencode:{self.auth_token}".encode("utf-8")).decode("utf-8") + headers["Authorization"] = f"Basic {encoded}" + if self.workspace_dir: + headers["x-opencode-directory"] = self.workspace_dir + return headers + + def _consumer_worker(self, render_fn: Callable[[Any, str, str, dict[str, Any]], None]) -> None: + """Background thread: consume SSE, render events, signal idle.""" + if self.debug: + self.debug("_consumer_worker: starting SSE client") + self._client = SseClient( + self.base_url, + auth_token=self.auth_token, + workspace_dir=self.workspace_dir, + reconnect=True, + max_reconnects=10, + on_reconnect=self._trigger_recovery_sync, + ) + + try: + event_count = 0 + if self.debug: + self.debug("_consumer_worker: entering event loop") + for event in self._client.events(): + if self._stopped: + if self.debug: + self.debug("_consumer_worker: stopped flag set, breaking") + break + + if not self._belongs_to_session(event): + continue + + event_count += 1 + if self.debug and (event_count <= 5 or event_count % 20 == 0): + self.debug(f"_consumer_worker: event #{event_count} type={event.get('type')}") + + # Track message IDs *and* token-state from the SSE + # stream so neither _sync_session_messages nor the + # stream itself emit duplicate message.updated events. + # Composite key = (msg_id, has_input) lets the + # "no-tokens → has-tokens" transition render (e.g. the + # final token-summary line for an assistant turn). + if event.get("type") == "message.updated": + info = event.get("properties", {}).get("info", {}) + if isinstance(info, dict): + msg_id = info.get("id") + if isinstance(msg_id, str) and msg_id: + tokens = info.get("tokens", {}) + has_input = bool(tokens.get("input", 0)) if isinstance(tokens, dict) else False + stream_key = f"{msg_id}:tok={1 if has_input else 0}" + if stream_key in self._seen_message_ids: + if self.debug: + self.debug(f"_consumer_worker: suppressing duplicate msg {stream_key}") + continue + self._seen_message_ids.add(stream_key) + # Also keep the plain message ID so + # _sync_session_messages (which checks the + # plain string) doesn't re-emit on idle. + self._seen_message_ids.add(msg_id) + + # Handle permissions + if event.get("type") == "permission.asked": + self._handle_permission(event) + continue + + # Track state transitions + if self._is_session_idle(event): + if self.debug: + self.debug("_consumer_worker: session idle detected") + # Emit the idle event itself + self._emit_event(render_fn, event) + # Signal idle to TUI + self._state_queue.put((ChatState.IDLE, None)) + continue + + if self._is_session_busy(event): + self._state_queue.put((ChatState.BUSY, None)) + + # Track and render + finalized_events = self._tracker.ingest(event) + for fe in finalized_events: + sig = (fe.get("type", ""), fe.get("part", {}).get("id", "")) + if sig[1] and sig in self._emitted_signatures: + continue + self._emitted_signatures.add(sig) + self._emit_event(render_fn, fe) + + except SseClientError as exc: + msg = f"SSE connection lost: {exc}" + if self.debug: + self.debug(f"_consumer_worker: SseClientError: {exc}") + self._state_queue.put((ChatState.ERROR, msg)) + except Exception as exc: + msg = f"Chat consumer error: {exc}" + if self.debug: + import traceback + self.debug(f"_consumer_worker: unexpected exception: {traceback.format_exc()}") + self._state_queue.put((ChatState.ERROR, msg)) + else: + if self.debug: + self.debug("_consumer_worker: event loop ended normally") + finally: + if self.debug: + self.debug("_consumer_worker: exiting") + if not self._stopped: + self._state_queue.put((ChatState.STOPPED, None)) + + def _belongs_to_session(self, event: dict[str, Any]) -> bool: + props = event.get("properties", {}) + sid = props.get("sessionID") + if sid and sid != self.session_id: + return False + return True + + @staticmethod + def _is_session_idle(event: dict[str, Any]) -> bool: + event_type = event.get("type", "") + if event_type == "session.idle": + return True + if event_type == "session.status": + status = event.get("properties", {}).get("status", {}) + return status.get("type") == "idle" + return False + + @staticmethod + def _is_session_busy(event: dict[str, Any]) -> bool: + event_type = event.get("type", "") + if event_type == "session.status": + status = event.get("properties", {}).get("status", {}) + return status.get("type") == "busy" + return False + + def _handle_permission(self, event: dict[str, Any]) -> None: + props = event.get("properties", {}) + perm_id = props.get("id") + if not perm_id: + return + url = f"{self.base_url}/permission/{perm_id}/reply" + data = json.dumps({ + "reply": "reject", + "message": "Auto-rejected by CodeCome configuration", + }).encode("utf-8") + req = urllib.request.Request( + url, + data=data, + headers=self._get_headers(), + method="POST", + ) + try: + urllib.request.urlopen(req, timeout=10.0) + except urllib.error.HTTPError: + pass + + def _emit_event(self, render_fn: Callable[[Any, str, str, dict[str, Any]], None], event: dict[str, Any]) -> None: + """Emit a single event through the render pipeline.""" + emit_event(render_fn, self.console, "Chat", "Interactive Chat", event) + + def _trigger_recovery_sync(self) -> None: + """Called by SseClient after reconnection. + + TODO: implement a catch-up sync via _sync_session_messages here. + Currently sync-after-reconnect is a no-op; the SSE-stream-level + dedup (_seen_message_ids composite keys) and the fact that + sync was removed from the normal idle path mean we rely on the + SSE stream itself to deliver all events after reconnect. + """ + pass # sync happens inline in consumer + + def _sync_session_messages(self, render_fn: Callable[[Any, str, str, dict[str, Any]], None]) -> list[dict[str, Any]]: + """Fetch current session messages and emit any missed finalized parts.""" + events: list[dict[str, Any]] = [] + try: + req = urllib.request.Request( + f"{self.base_url}/session/{self.session_id}/message", + headers=self._get_headers(), + method="GET", + ) + with urllib.request.urlopen(req, timeout=10.0) as resp: + messages = json.loads(resp.read().decode("utf-8")) + except Exception: # noqa: BLE001 + return [] + + if not isinstance(messages, list): + return [] + + for item in messages: + if not isinstance(item, dict): + continue + info = item.get("info") + parts = item.get("parts") + if not isinstance(info, dict) or not isinstance(parts, list): + continue + if info.get("role") != "assistant": + continue + if info.get("sessionID") != self.session_id: + continue + + message_id = info.get("id") + if isinstance(message_id, str) and message_id and message_id not in self._seen_message_ids: + events.append({ + "type": "message.updated", + "timestamp": int(time.time() * 1000), + "sessionID": self.session_id, + "info": info, + }) + self._seen_message_ids.add(message_id) + + for part in parts: + if not isinstance(part, dict): + continue + part_id = part.get("id") + if isinstance(part_id, str) and self._tracker.has_seen(part_id): + self._tracker.mark_seen(part_id) + continue + synthesized = { + "type": "message.part.updated", + "timestamp": int(time.time() * 1000), + "properties": { + "sessionID": self.session_id, + "part": part, + }, + } + events.extend(self._tracker.ingest(synthesized)) + + for fe in events: + sig = (fe.get("type", ""), fe.get("part", {}).get("id", "")) + if sig[1] and sig in self._emitted_signatures: + continue + self._emitted_signatures.add(sig) + self._emit_event(render_fn, fe) + + return events diff --git a/tools/run-agent.py b/tools/run-agent.py index a9d8188..0cb9aa8 100644 --- a/tools/run-agent.py +++ b/tools/run-agent.py @@ -59,6 +59,47 @@ ROOT = Path(__file__).resolve().parents[1] MINIMUM_OPENCODE_VERSION = "1.14.50" +# --------------------------------------------------------------------------- +# Chat debug logging (--debug with --chat writes to tmp/chat-debug-.log) +# --------------------------------------------------------------------------- + +_CHAT_DEBUG_FP: Any = None + + +def _chat_debug(msg: str) -> None: + """Write a debug message if chat debug logging is active.""" + global _CHAT_DEBUG_FP + if _CHAT_DEBUG_FP is None: + return + import threading as _threading + _elapsed = time.time() - _CHAT_DEBUG_FP.start_time # type: ignore[attr-defined] + _thread = _threading.current_thread().name + _line = f"[{_elapsed:07.3f}s] [{_thread}] {msg}\n" + _CHAT_DEBUG_FP.write(_line) # type: ignore[union-attr] + _CHAT_DEBUG_FP.flush() # type: ignore[union-attr] + + +def _setup_chat_debug() -> None: + """Open tmp/chat-debug--.log for chat diagnostic logging.""" + global _CHAT_DEBUG_FP + _stamp = time.strftime("%Y%m%d-%H%M%S") + log_dir = ROOT / "tmp" + log_dir.mkdir(parents=True, exist_ok=True) + log_path = log_dir / f"chat-debug-{os.getpid()}-{_stamp}.log" + _CHAT_DEBUG_FP = log_path.open("a", buffering=1) + _CHAT_DEBUG_FP.start_time = time.time() # type: ignore[attr-defined] + _chat_debug(f"debug log opened: {log_path}") + print(f"[chat-debug] writing diagnostics to {log_path}", file=sys.stderr) + + +def _close_chat_debug() -> None: + """Close the chat debug log if open.""" + global _CHAT_DEBUG_FP + if _CHAT_DEBUG_FP is not None: + _chat_debug("debug log closing") + _CHAT_DEBUG_FP.close() + _CHAT_DEBUG_FP = None + def check_opencode_version() -> None: try: @@ -3918,12 +3959,76 @@ def render_session_diff(console: Console, event: dict[str, Any]) -> None: def render_message_updated(console: Console, event: dict[str, Any]) -> None: - info = event.get("info", {}) if isinstance(event.get("info"), dict) else {} - agent = str(info.get("agent", "assistant")) - model_id = str(info.get("modelID", info.get("model", ""))) - message = f"> {agent} · {model_id}" if model_id else f"> {agent}" + # Extract info from either event.info (sync-synthesized) or + # event.properties.info (raw SSE stream). + info = event.get("info") + if not isinstance(info, dict): + props = event.get("properties", {}) + info = props.get("info", {}) if isinstance(props, dict) else {} + if not isinstance(info, dict): + info = {} + + role = str(info.get("role", "")) + tokens = info.get("tokens", {}) if isinstance(info.get("tokens"), dict) else {} + has_tokens = isinstance(tokens, dict) and ( + tokens.get("input", 0) or tokens.get("output", 0) or tokens.get("reasoning", 0) + ) + + # Suppress in-progress messages — only render "complete" ones that + # carry a summary, a finish reason, or non-zero tokens. This keeps + # the RichLog clean and avoids the flood of intermediate lifecycle + # events the SSE stream emits for every message state change. + has_summary = "summary" in info or "finish" in info + if not has_summary and not has_tokens: + return + + cache = tokens.get("cache", {}) if isinstance(tokens, dict) else {} + cost = info.get("cost", 0) or 0 + + # Extract model identifier from whichever field shape is present. + model_id = str(info.get("modelID", "")).strip() + provider_id = str(info.get("providerID", "")).strip() + if not model_id: + mdl = info.get("model", {}) + if isinstance(mdl, dict): + model_id = str(mdl.get("modelID", "")).strip() + provider_id = str(mdl.get("providerID", "")).strip() + model_label = f"{provider_id}/{model_id}" if provider_id and model_id else model_id + + if role == "user": + # User prompt acknowledged — short, dim, no model spam. + message = "> User" + style = "dim" + elif role == "assistant": + if has_tokens: + # Complete message — show model and token-count summary. + _in = tokens.get("input", 0) + _out = tokens.get("output", 0) + _reasoning = tokens.get("reasoning", 0) + _cache_read = cache.get("read", 0) if isinstance(cache, dict) else 0 + token_parts = [f"↑{_in} ↓{_out}"] + if _reasoning: + token_parts.append(f"R{_reasoning}") + if _cache_read: + token_parts.append(f"cache read {_cache_read}") + token_str = ", ".join(token_parts) + cost_str = f", ${cost:.4f}" if cost else "" + message = f"> Assistant · {model_label} ({token_str}{cost_str})" + style = "bold blue" + else: + # Complete message without token info (shouldn't normally + # happen after the has_summary check above, but kept as + # a safe fallback). + message = f"> Assistant · {model_label}" if model_label else "> Assistant" + style = "bold blue" + else: + # Fallback — unrecognised role, show what we have. + agent = str(info.get("agent", "assistant")) + message = f"> {agent} · {model_label}" if model_label else f"> {agent}" + style = "bold blue" + if HAVE_RICH: - console.print(Text(message, style="bold blue")) + console.print(Text(message, style=style)) else: print(C.header(message)) @@ -4106,10 +4211,12 @@ def render_subagent_status(console: Console, event: dict[str, Any]) -> None: def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(description="Run a CodeCome phase with structured output.") - parser.add_argument("--phase", help="Phase number (required unless --show-model).") + parser.add_argument("--phase", help="Phase number (required unless --show-model or --chat).") parser.add_argument("--label", help="Human-readable phase label (required unless --show-model).") parser.add_argument("--agent", help="OpenCode agent name.") - parser.add_argument("--prompt-file", help="Prompt file path relative to repo root (required unless --show-model).") + parser.add_argument("--prompt-file", help="Prompt file path relative to repo root (required unless --show-model or --chat).") + parser.add_argument("--prompt", help="Direct prompt text (used by --chat mode).") + parser.add_argument("--chat", action="store_true", help="Launch interactive textual chat harness.") parser.add_argument("--finding", help="Finding id for prompt substitution.") parser.add_argument("--color", choices=["auto", "always", "never"], default="auto") parser.add_argument("--debug", action="store_true", help="Mirror raw JSON events to stderr.") @@ -4518,6 +4625,37 @@ def _create_session(base_url: str, phase: str, agent: str, model: str | None, au return sid +def _create_chat_session(base_url: str, agent: str, model: str | None, auth_token: str | None, workspace_dir: str | None) -> str: + """Create a session for interactive chat mode with permission rules.""" + payload: dict[str, Any] = { + "title": "CodeCome Chat", + "agent": agent, + "permission": [ + {"permission": "question", "action": "deny", "pattern": "*"}, + {"permission": "plan_enter", "action": "deny", "pattern": "*"}, + {"permission": "plan_exit", "action": "deny", "pattern": "*"}, + ], + } + if model: + parts = model.split("/", 1) + if len(parts) == 2: + payload["model"] = {"providerID": parts[0], "id": parts[1]} + else: + payload["model"] = {"id": model} + req = urllib.request.Request( + f"{base_url}/session", + data=json.dumps(payload).encode("utf-8"), + headers=_get_headers(auth_token, workspace_dir), + method="POST", + ) + resp = urllib.request.urlopen(req, timeout=10.0) + data = json.loads(resp.read().decode("utf-8")) + sid = str(data.get("id", "")) + if not sid: + raise RuntimeError("Server returned empty session ID") + return sid + + def _consume_events( base_url: str, session_id: str, @@ -4692,6 +4830,687 @@ def _emit_fatal_error(console: Any, title: str, message: str) -> None: print(formatted, file=sys.stderr) +# --------------------------------------------------------------------------- +# Chat mode: Textual TUI + multi-turn event loop +# --------------------------------------------------------------------------- + +class TextualConsoleProxy: + """Bridge Rich Console.print() calls to a Textual RichLog widget. + + Thread-safe: main-thread calls write directly to RichLog; background- + thread calls post a RenderMessage which is dispatched on the main + thread by the @on(RenderMessage) handler. This is the pattern from + Textual docs (post_message is thread-safe). + """ + + def __init__(self, rich_log, app): + self.rich_log = rich_log + self.app = app + self.encoding = "utf-8" + + def print(self, *args, **kwargs): + if not args: + from rich.text import Text + self._write(Text()) + return + if len(args) == 1: + self._write(args[0]) + else: + from rich.console import Group + self._write(Group(*args)) + + def _write(self, renderable): + import threading + if threading.current_thread() is threading.main_thread(): + _chat_debug("TextualConsoleProxy._write: main thread, direct write") + self.rich_log.write(renderable) + else: + _chat_debug("TextualConsoleProxy._write: bg thread, post_message(RenderMessage)") + self.app.post_message(self.app.RenderMessage(renderable)) + + +ChatApp: Any = None +QuitScreen: Any = None + +try: + from textual import on, work + from textual.app import App, ComposeResult + from textual.message import Message + from textual.widgets import RichLog, Input, Footer, Static, Button, Label + from textual.binding import Binding + from textual.containers import Grid, Horizontal + from textual.screen import ModalScreen + + class _QuitScreen(ModalScreen[bool]): + CSS = """ + _QuitScreen { + align: center middle; + } + #quit-dialog { + grid-size: 2; + grid-gutter: 1 2; + grid-rows: 1fr 3; + padding: 0 1; + width: 60; + height: 11; + border: thick $background 80%; + background: $surface; + } + #quit-question { + column-span: 2; + height: 1fr; + width: 1fr; + content-align: center middle; + } + Button { + width: 100%; + } + """ + + def compose(self) -> ComposeResult: + yield Grid( + Label("Are you sure you want to quit?", id="quit-question"), + Button("Quit", id="quit-confirm", variant="error"), + Button("Cancel", id="quit-cancel", variant="primary"), + id="quit-dialog", + ) + + def on_button_pressed(self, event: Button.Pressed) -> None: + self.dismiss(event.button.id == "quit-confirm") + + class _ChatApp(App): + """Interactive chat harness — final design (post-bisection). + + Design follows Textual docs (https://textual.textualize.io/guide/workers): + + * The SSE consumer runs in a raw daemon thread (started via + chat_loop.start_consumer). Textual's @work(thread=True) is + reserved for short-lived blocking tasks (the docs' weather- + app pattern); using it for an infinite consumer loop froze + the main event loop in our environment (Textual 8.2.6 / + Python 3.14). + + * All UI updates from background threads (renderables AND + state markers AND errors) go through ONE one-argument + Message subclass (RenderMessage(renderable)) and ONE @on + handler that just calls rich_log.write. post_message is + documented as thread-safe. Bisection found that any + departure from this exact shape (adding a second Message + subclass, renaming it, adding optional fields, or even + adding a second set_interval callback) silently freezes + Textual's message dispatch on this version, even though + the same patterns work in isolated repros. We don't + understand the root cause; staying inside this working + envelope is the pragmatic path forward. + + * _render_and_log mirrors phase mode's behaviour exactly + (parity with non-interactive runs). Per-event side effects: + persist to the transcript jsonl, mirror raw JSON to the + chat-debug log when --debug is set, suppress 'reasoning' + when thinking is off, then delegate to the SAME + render_event() dispatcher non-chat uses. No chat-specific + filters or markers — `render_session_status` already + prints `session status: busy/idle` and that's the only + state cue we surface. We do NOT toggle the Input widget's + enabled/placeholder state, because doing that required a + second set_interval poller which broke dispatch in our + bisection. The Input stays enabled at all times. + + * Errors from @work workers post a red Panel renderable via + _post_error_renderable() — same RenderMessage path. + + * Short-lived HTTP calls (initial prompt, user prompt send) + run as @work(thread=True) workers — the canonical docs + pattern (matches the weather-app example). + + * The transcript jsonl is opened in _run_chat_mode and the + file handle is passed in via the `transcript_fp` constructor + argument; _render_and_log writes one JSON line per SSE + event to it (parity with phase mode). + + * A set_interval(1.0) heartbeat continuously logs a debug + tick from the main thread and also updates the bottom-bar + status line (modeline) with live token usage and an + activity pulse. The modeline data is fed by + _render_and_log on every message.updated event. + """ + + CSS = """ + RichLog { + height: 1fr; + border-bottom: solid green; + background: black; + } + Input { + height: 3; + } + #bottom-bar { + dock: bottom; + height: 1; + background: $footer-background; + } + #status-left { + width: auto; + min-width: 26; + height: 1; + padding: 0 1; + color: $footer-foreground; + background: $footer-background; + } + #footer-right { + width: 1fr; + height: 1; + } + Footer { + dock: none; + } + """ + + # Ctrl+S toggles Textual's mouse capture so the user can use the + # terminal's native mouse selection (which produces system-clipboard + # copy via the terminal emulator). RichLog has no in-app selection + # support upstream, so terminal-native selection is the supported + # path. See .project/chat-mode-textual-postmortem.md §4 / §12. + BINDINGS = [ + Binding("ctrl+c", "request_quit", "Quit"), + Binding("ctrl+s", "toggle_mouse_for_select", "Select mode"), + ] + + class RenderMessage(Message): + """Single thread-safe message type — carries a Rich renderable + to be written to the RichLog on the main thread. + + Bisection showed that extending this class with optional + fields (`state`, `detail`) silently breaks Textual's message + dispatch on this version (Textual 8.2.6 / Python 3.14), even + though the same pattern works in isolation. Whatever the + root cause, we keep this class strictly one-argument + (positional, `renderable`) and use a thread-safe pending-state + slot + main-thread polling timer for idle/busy/error + transitions instead. + """ + + def __init__(self, renderable): + super().__init__() + self.renderable = renderable + + def __init__(self, server_info=None, session_id=None, initial_prompt="", args=None, rich_console=None, model=None, variant=None, thinking_on=None, transcript_fp=None): + super().__init__() + self.server_info = server_info + self.session_id = session_id + self.initial_prompt = initial_prompt + self.args = args + self.rich_console = rich_console + self.model = model + self.variant = variant + self.thinking_on = thinking_on + self.transcript_fp = transcript_fp + self.chat_loop = None + self.console_proxy = None + self.rich_log = None + self.chat_input = None + self.modeline = None + self._heartbeat_count = 0 + # Updated by _render_and_log (consumer thread) on every + # message.updated event. Read by _heartbeat (main thread) + # to drive the status-line in the bottom bar. + self._modeline_info = "" + # Tracks Ctrl+S terminal-select mode. When True, Textual mouse + # handling is disabled so the terminal emulator's native mouse + # selection works (which copies to the system clipboard via the + # terminal itself). Default off (Textual mouse handling on). + self._terminal_select_mode = False + + def compose(self) -> ComposeResult: + yield RichLog(id="log", markup=False, auto_scroll=True) + yield Input(id="chat_input", placeholder="Type a message and press Enter...") + with Horizontal(id="bottom-bar"): + yield Static("ready", id="status-left") + yield Footer(id="footer-right") + + def on_mount(self) -> None: + _chat_debug("on_mount: entering") + self.rich_log = self.query_one(RichLog) + self.chat_input = self.query_one(Input) + self.modeline = self.query_one("#status-left", Static) + self.console_proxy = TextualConsoleProxy(self.rich_log, self) + _chat_debug("on_mount: proxy created") + + # Set initial modeline with model/agent info. + provider = (self.model or "").split("/", 1)[0] if self.model else "" + _model_id = (self.model or "").split("/", 1)[1] if self.model and "/" in self.model else (self.model or "…") + model_label = f"{provider}/{_model_id}" if provider else _model_id + self.modeline.update(f"● | {model_label} | ready") + + # Heartbeat canary — fires every 1s on the main thread. Helpful + # in the debug log to confirm the event loop is alive. + self.set_interval(1.0, self._heartbeat) + _chat_debug("on_mount: heartbeat installed") + + # Write banner (main thread, direct write). + if HAVE_RICH: + from rich.rule import Rule + self.rich_log.write(Rule(title="Chat: Interactive Harness", style="bold cyan"), expand=True) + model_label = self.model or "(unknown)" + variant_label = self.variant or "(unknown)" + parts = [f"agent={self.args.agent if self.args else '?'}", f"model={model_label}"] + if self.variant is not None: + parts.append(f"variant={variant_label}") + parts.append(f"thinking={'on' if self.thinking_on else 'off'}") + self.rich_log.write(Text(" ".join(parts), style="dim"), expand=True) + # Hint about selection: RichLog doesn't support in-app + # mouse selection upstream; document the terminal-native + # path so users can copy output. + self.rich_log.write( + Text( + "Tip: hold Option/Alt (macOS) or Shift (most terminals) " + "while dragging to select text, or press Ctrl+S to toggle " + "terminal-select mode (disables Textual mouse).", + style="dim italic", + ), + expand=True, + ) + _chat_debug("on_mount: banner written") + + # Construct the chat event loop. + from events.chat_loop import ChatEventLoop + _chat_debug("on_mount: creating ChatEventLoop") + self.chat_loop = ChatEventLoop( + base_url=self.server_info.base_url, + session_id=self.session_id, + console=self.console_proxy, + auth_token=self.server_info.password, + workspace_dir=str(ROOT), + debug=_chat_debug if self.args and self.args.debug else None, + ) + + # Raw daemon thread — the SSE consumer. + _chat_debug("on_mount: starting SSE consumer (raw daemon thread)") + self.chat_loop.start_consumer(self._render_and_log) + _chat_debug("on_mount: consumer thread started") + + # Initial prompt: send via worker but don't echo the full text. + # The prompt comes from prompts/chat-initial.md (bootstrap + # instructions for the agent, not something the user typed). + # The SSE stream will emit a dim `> User` summary line once the + # daemon acknowledges the message, matching subsequent prompts. + if self.initial_prompt: + self.rich_log.write(Text("(initializing session\u2026)", style="bold cyan"), expand=True) + _chat_debug(f"on_mount: spawning initial-prompt worker ({len(self.initial_prompt)} chars)") + self._send_initial_prompt(self.initial_prompt) + + _chat_debug("on_mount: done") + + # --- Main-thread heartbeat canary --- + + def _heartbeat(self) -> None: + self._heartbeat_count += 1 + _chat_debug(f"_heartbeat: tick #{self._heartbeat_count} (main loop alive)") + + # Update the bottom-bar status line (modeline) with live + # token usage and an activity pulse. _modeline_info is + # written by _render_and_log on the consumer thread on + # every message.updated event; we read it here atomically. + pulse = "●" if self._heartbeat_count % 2 else "◌" + sel_tag = " [SEL]" if self._terminal_select_mode else "" + info = self._modeline_info or "" + if info: + text = f"{pulse}{sel_tag} | {info}" + else: + provider = (self.model or "").split("/", 1)[0] if self.model else "" + _model_id = (self.model or "").split("/", 1)[1] if self.model and "/" in self.model else (self.model or "…") + model_label = f"{provider}/{_model_id}" if provider else _model_id + text = f"{pulse}{sel_tag} | {model_label} | idle" + self.modeline.update(text) + + # --- Textual workers (@work(thread=True)) — short-lived only --- + + @work(thread=True) + def _send_initial_prompt(self, text) -> None: + """Send the initial prompt in a Textual-managed thread.""" + _chat_debug("_send_initial_prompt: worker started") + try: + self.chat_loop.send_prompt( + text, + self.args.agent if self.args else "auditor", + self.model, + self.variant, + ) + _chat_debug("_send_initial_prompt: sent") + except Exception as exc: + _chat_debug(f"_send_initial_prompt: error: {exc}") + self._post_error_renderable(f"Failed to send initial prompt: {exc}") + + @work(thread=True) + def _send_prompt(self, text) -> None: + """Send a user prompt in a Textual-managed thread.""" + _chat_debug(f"_send_prompt: worker posting text len={len(text)}") + try: + self.chat_loop.send_prompt( + text, + self.args.agent if self.args else "auditor", + self.model, + self.variant, + ) + _chat_debug("_send_prompt: sent") + except Exception as exc: + _chat_debug(f"_send_prompt: error: {exc}") + self._post_error_renderable(f"Failed to send: {exc}") + + def _post_error_renderable(self, detail: str) -> None: + """Helper callable from any thread. Posts a RenderMessage + carrying a red error panel — sent through the same single + RenderMessage(renderable) path as everything else.""" + from rich.panel import Panel + panel = Panel(Text(detail, style="bold red"), title="Chat Error", border_style="red") + self.post_message(self.RenderMessage(panel)) + + # --- Message handler (run on main thread). Single handler, + # single Message subclass — see RenderMessage docstring. + + @on(RenderMessage) + def _on_render_message(self, message: RenderMessage) -> None: + if self.rich_log is not None: + self.rich_log.write(message.renderable, expand=True) + + # --- Consumer-thread callback --- + + def _render_and_log(self, console, phase, label, event): + """Called from the SSE consumer thread. Mirrors phase mode's + _render_and_log exactly (parity with non-interactive runs): + + 1. Persist the raw event to the transcript jsonl. + 2. When --debug, mirror the raw event JSON to the + chat-debug log file (phase mode mirrors to stderr; + in chat mode stderr would corrupt Textual's + alternate-screen output, so we route to the debug + file instead). + 3. Suppress 'reasoning' events when thinking is off. + 4. Delegate to render_event() — the SAME dispatcher + used by non-interactive runs. + + Also updates _modeline_info from every message.updated + event (even in-progress ones) so the bottom-bar status + line stays live. + + The render_event() call ends up posting RenderMessage(s) + through the console_proxy, which the @on(RenderMessage) + handler writes to the RichLog on the main thread.""" + # (1) Transcript jsonl — parity with phase mode. + if self.transcript_fp is not None: + try: + self.transcript_fp.write(json.dumps(event) + "\n") + except OSError: + pass + # (2) Raw-event mirror — to the chat-debug file rather than + # stderr (Textual owns the TTY in chat mode). + if self.args is not None and getattr(self.args, "debug", False): + _chat_debug(f"_render_and_log: raw event: {json.dumps(event)}") + else: + _chat_debug(f"_render_and_log: event type={event.get('type')}") + + # Update the bottom-bar modeline on every message.updated + # so token/cost/liveness info refreshes live. + if event.get("type") == "message.updated": + self._update_modeline_info(event) + + # (3) Suppress reasoning when thinking is off. + if not self.thinking_on and event.get("type") == "reasoning": + return + # (4) Render via the same dispatcher non-chat uses. No + # chat-specific markers or filters — full parity. + render_event(console, phase, label, event) + + def _update_modeline_info(self, event: dict[str, Any]) -> None: + """Extract model/tokens from a message.updated event and store + for the heartbeat to surface in the bottom bar.""" + info = event.get("info") + if not isinstance(info, dict): + props = event.get("properties", {}) + info = props.get("info", {}) if isinstance(props, dict) else {} + if not isinstance(info, dict): + return + # Only use assistant messages for the modeline; user + # messages carry no new token data. + if info.get("role") != "assistant": + return + model_id = str(info.get("modelID", "")).strip() + provider_id = str(info.get("providerID", "")).strip() + if not model_id: + mdl = info.get("model", {}) + if isinstance(mdl, dict): + model_id = str(mdl.get("modelID", "")).strip() + provider_id = str(mdl.get("providerID", "")).strip() + model_label = f"{provider_id}/{model_id}" if provider_id and model_id else (model_id or "…") + + tokens = info.get("tokens", {}) + if isinstance(tokens, dict): + _in = tokens.get("input", 0) + _out = tokens.get("output", 0) + total = tokens.get("total", _in + _out) + token_str = f"↑{_in} ↓{_out}" + else: + total = 0 + token_str = "" + + cost = info.get("cost", 0) or 0 + cost_str = f" ${cost:.4f}" if cost else "" + + self._modeline_info = f"{model_label} | {token_str}{cost_str}" + + # --- UI actions --- + + def action_request_quit(self) -> None: + def finish_quit(confirmed): + if confirmed: + self.exit() + self.push_screen(_QuitScreen(), finish_quit) + + def action_toggle_mouse_for_select(self) -> None: + """Toggle terminal-native mouse selection mode (Ctrl+S). + + RichLog has no upstream support for in-app mouse text + selection. As a pragmatic alternative, this action toggles + Textual's mouse reporting off so the terminal emulator's + native mouse selection takes over (which copies to the + system clipboard via the terminal itself). + + When off (default): Textual handles mouse, terminal-native + drag is intercepted. Hold Option/Alt (macOS) or Shift + (most terminals) while dragging to bypass Textual without + toggling. + + When on: mouse reporting is disabled at the terminal level. + User can click-drag to select, and Cmd+C / Ctrl+Shift+C in + the terminal copies to the clipboard. Textual mouse + interactions (scrolling, clicking widgets) won't work until + toggled back. + """ + driver = self._driver + if driver is None: + return + if not self._terminal_select_mode: + # Enter terminal-select mode: turn off Textual mouse. + try: + driver._disable_mouse_support() + except Exception: + return + self._terminal_select_mode = True + hint = Text( + "[select mode ON] Textual mouse disabled. " + "Click-drag to select; copy via terminal " + "(Cmd+C on macOS / Ctrl+Shift+C on Linux). " + "Press Ctrl+S again to exit.", + style="bold yellow", + ) + self.rich_log.write(hint, expand=True) + else: + # Exit terminal-select mode: turn Textual mouse back on. + try: + driver._enable_mouse_support() + except Exception: + return + self._terminal_select_mode = False + hint = Text( + "[select mode OFF] Textual mouse re-enabled.", + style="bold yellow", + ) + self.rich_log.write(hint, expand=True) + + async def on_input_submitted(self, message: Input.Submitted) -> None: + """Handle Enter on the chat Input — send the typed prompt + through the @work(thread=True) _send_prompt worker. + + The Input is NOT disabled while sending — bisection found + that toggling the Input's disabled/placeholder state from + outside this handler (via a poller) broke Textual dispatch + on this version. Keeping the input always-enabled is fine + in practice; the user just sees their next input echoed + after the previous response.""" + text = message.value.strip() + if not text: + return + self.chat_input.value = "" + self.rich_log.write("", expand=True) + self.rich_log.write(Text(f"User: {text}", style="bold cyan"), expand=True) + self._send_prompt(text) + + ChatApp = _ChatApp + QuitScreen = _QuitScreen +except ImportError: + pass + + +def _run_chat_mode(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int: + """Launch the interactive chat harness.""" + if args.debug: + _setup_chat_debug() + _chat_debug("_run_chat_mode: entering (debug enabled)") + + missing = [n for n in ("label", "agent") if getattr(args, n) is None] + if missing: + parser.error( + "the following arguments are required for --chat: " + + ", ".join("--" + n.replace("_", "-") for n in missing) + ) + + check_opencode_version() + + color_mode = resolve_color_mode(args.color) + console = build_console(color_mode) + + # Resolve prompt + if args.prompt_file: + prompt_file = ROOT / args.prompt_file + prompt = load_prompt(prompt_file, args.finding, phase=args.phase) + elif args.prompt: + prompt = args.prompt + else: + prompt = "" + + # Model resolution + extra_args = shlex.split(os.environ.get("OPENCODE_ARGS", "")) + model, variant, model_source, variant_source = resolve_model_and_variant( + args.agent, extra_args + ) + thinking_on, thinking_source = _resolve_thinking_decision(model, extra_args) + + _chat_debug(f"_run_chat_mode: agent={args.agent} model={model} variant={variant} thinking={thinking_on}") + + if ChatApp is None: + _emit_fatal_error(console, "Missing Dependency", + "The --chat flag requires the 'textual' package. Run 'make venv' to install it.") + return 1 + + # Start server + _chat_debug("_run_chat_mode: starting opencode serve") + runner = ServerRunner() + try: + server_info = runner.start(hostname="127.0.0.1", log_level="WARN") + _chat_debug(f"_run_chat_mode: server started pid={server_info.pid} url={server_info.base_url}") + except ServerRunnerError as exc: + _chat_debug(f"_run_chat_mode: server start failed: {exc}") + _emit_fatal_error(console, "Server Error", str(exc)) + _close_chat_debug() + return 1 + + # Create session + _chat_debug("_run_chat_mode: creating session") + try: + session_id = _create_chat_session( + server_info.base_url, args.agent, model, server_info.password, str(ROOT), + ) + _chat_debug(f"_run_chat_mode: session created id={session_id}") + except Exception as exc: + _chat_debug(f"_run_chat_mode: session creation failed: {exc}") + _emit_fatal_error(console, "Session Error", str(exc)) + runner.stop() + _close_chat_debug() + return 1 + + # Open the chat transcript (parity with phase mode, which writes + # tmp/last-phase---attempt-N.jsonl). We use a + # filename that includes both a timestamp and the PID so successive + # runs (or several runs from different shells) don't clobber each + # other. Open line-buffered so the file is durable across crashes. + stamp = time.strftime("%Y%m%d-%H%M%S") + transcript_dir = ROOT / "tmp" + transcript_dir.mkdir(parents=True, exist_ok=True) + transcript_path = transcript_dir / f"last-chat-{stamp}-pid{os.getpid()}.jsonl" + transcript_fp = None + try: + transcript_fp = transcript_path.open("w", encoding="utf-8", buffering=1) + _chat_debug(f"_run_chat_mode: opened transcript {transcript_path}") + except OSError as exc: + _chat_debug(f"_run_chat_mode: could not open transcript {transcript_path}: {exc}") + + _chat_debug("_run_chat_mode: creating ChatApp") + app = None + try: + app = ChatApp( + server_info=server_info, + session_id=session_id, + initial_prompt=prompt, + args=args, + model=model, + variant=variant, + thinking_on=thinking_on, + transcript_fp=transcript_fp, + ) + _chat_debug("_run_chat_mode: calling app.run()") + app.run() + _chat_debug("_run_chat_mode: app.run() returned") + finally: + _chat_debug("_run_chat_mode: cleaning up") + if app is not None and getattr(app, "chat_loop", None) is not None: + _chat_debug("_run_chat_mode: stopping chat loop") + app.chat_loop.stop() + runner.stop() + if transcript_fp is not None: + try: + transcript_fp.flush() + transcript_fp.close() + except OSError: + pass + + # Final summary banner on the restored terminal. Mirrors phase + # mode's success-path summary. + try: + rel_path = transcript_path.relative_to(ROOT) + except ValueError: + rel_path = transcript_path + if HAVE_RICH: + console.print(Rule(style="green")) + console.print(Text(f"{C.SYM_OK} Chat session ended", style="green")) + console.print(Text(f" transcript: {rel_path}", style="dim")) + else: + print(C.ok("Chat session ended")) + print(f" transcript: {rel_path}") + + _close_chat_debug() + return 0 + + def main() -> int: RUN_START_TIME = time.time() iteration_retry_count = 0 @@ -4706,11 +5525,15 @@ def main() -> int: agent_name = args.agent or "recon" return show_model_table(agent_name) + # Chat mode has its own validation path. + if args.chat: + return _run_chat_mode(parser, args) + # The phase-launching mode requires the usual arguments. missing = [n for n in ("phase", "label", "agent", "prompt_file") if getattr(args, n) is None] if missing: parser.error( - "the following arguments are required when not using --show-model: " + "the following arguments are required when not using --show-model or --chat: " + ", ".join("--" + n.replace("_", "-") for n in missing) )