diff --git a/docs/PLAN-phase2.md b/docs/PLAN-phase2.md
new file mode 100644
index 000000000..e647ba9c4
--- /dev/null
+++ b/docs/PLAN-phase2.md
@@ -0,0 +1,401 @@
+# Phase 2 Implementation Plan for PRJ-042
+
+## 1. Executive summary
+Phase 2 introduces a real multiplexer boundary under the existing provider/service stack: `BaseMultiplexer`, a behavior-preserving `TmuxMultiplexer`, and a new `WezTermMultiplexer` that replaces tmux-only primitives with WezTerm CLI equivalents. The largest design change is making message delivery explicitly two-step at the multiplexer layer, because tmux already pastes then submits (`src/cli_agent_orchestrator/clients/tmux.py:198-251`) and WezTerm must do the same. Rough size is ~0.9-1.1 kLoC touched, with ~5-6 solo-maintainer days for Claude + Codex MVP on Windows and tmux parity retained on Unix. Main risks are provider regex drift outside the spike coverage, Codex's Windows shim/config workaround going stale, and WezTerm CLI behavior changing across releases.
+
+## 2. BaseMultiplexer interface
+The public surface should stay at the same 11 active methods from Phase 0, so `terminal_service`, `session_service`, `wait_for_shell()`, and provider status logic do not need a full rewrite (`docs/multiplexer-api-surface.md`, `src/cli_agent_orchestrator/services/terminal_service.py:122-188`, `src/cli_agent_orchestrator/utils/terminal.py:37-80`). The two generalizations are:
+
+1. `create_session()` / `create_window()` accept an optional `LaunchSpec` so backends that must spawn the target CLI directly can do so.
+2. `send_keys()` becomes a default method built on two abstract primitives: paste text, then submit separately.
+
+```python
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Mapping, Optional, Sequence
+
+
+@dataclass(frozen=True)
+class LaunchSpec:
+ """Concrete process spawn request for a new pane/window.
+
+ argv:
+ Exact argv to execute as the pane's initial process. When None, start
+ the backend's default interactive shell.
+ env:
+ Extra environment variables to inject into the spawned process.
+ provider:
+ Optional provider key used by backend-specific launch templating and
+ executable resolution.
+ """
+
+ argv: Optional[Sequence[str]] = None
+ env: Optional[Mapping[str, str]] = None
+ provider: Optional[str] = None
+
+
+class BaseMultiplexer(ABC):
+ """Backend-neutral pane/session control surface for CAO."""
+
+ def _resolve_and_validate_working_directory(
+ self, working_directory: Optional[str]
+ ) -> str:
+ """Canonicalize, validate, and return a safe working directory."""
+
+ @abstractmethod
+ def create_session(
+ self,
+ session_name: str,
+ window_name: str,
+ terminal_id: str,
+ working_directory: Optional[str] = None,
+ launch_spec: Optional[LaunchSpec] = None,
+ ) -> str:
+ """Create a detached CAO session/workspace and return the actual window name."""
+
+ @abstractmethod
+ def create_window(
+ self,
+ session_name: str,
+ window_name: str,
+ terminal_id: str,
+ working_directory: Optional[str] = None,
+ launch_spec: Optional[LaunchSpec] = None,
+ ) -> str:
+ """Create another CAO window/pane inside an existing session."""
+
+ def send_keys(
+ self, session_name: str, window_name: str, keys: str, enter_count: int = 1
+ ) -> None:
+ """Paste text, wait for the TUI to settle, then submit Enter separately."""
+ self._paste_text(session_name, window_name, keys)
+ self._submit_input(session_name, window_name, enter_count=enter_count)
+
+ @abstractmethod
+ def _paste_text(self, session_name: str, window_name: str, text: str) -> None:
+ """Inject literal text without submitting it."""
+
+ @abstractmethod
+ def _submit_input(
+ self, session_name: str, window_name: str, enter_count: int = 1
+ ) -> None:
+ """Submit already-pasted input with one or more Enter presses."""
+
+ @abstractmethod
+ def send_special_key(
+ self,
+ session_name: str,
+ window_name: str,
+ key: str,
+ *,
+ literal: bool = False,
+ ) -> None:
+ """Send a control key or a literal VT sequence without paste semantics."""
+
+ @abstractmethod
+ def get_history(
+ self, session_name: str, window_name: str, tail_lines: Optional[int] = None
+ ) -> str:
+ """Return normalized pane text for provider regex/status parsing."""
+
+ @abstractmethod
+ def list_sessions(self) -> list[dict[str, str]]:
+ """List CAO-visible sessions as {id, name, status}."""
+
+ @abstractmethod
+ def kill_session(self, session_name: str) -> bool:
+ """Terminate a session and all owned panes/windows."""
+
+ @abstractmethod
+ def kill_window(self, session_name: str, window_name: str) -> bool:
+ """Terminate one CAO window/pane."""
+
+ @abstractmethod
+ def session_exists(self, session_name: str) -> bool:
+ """Return True when the named session/workspace exists."""
+
+ @abstractmethod
+ def get_pane_working_directory(
+ self, session_name: str, window_name: str
+ ) -> Optional[str]:
+ """Return the active pane's working directory when the backend exposes it."""
+
+ @abstractmethod
+ def pipe_pane(self, session_name: str, window_name: str, file_path: str) -> None:
+ """Start backend-specific output capture into a CAO log file."""
+
+ @abstractmethod
+ def stop_pipe_pane(self, session_name: str, window_name: str) -> None:
+ """Stop backend-specific output capture for a CAO log file."""
+```
+
+Abstract: `create_session`, `create_window`, `_paste_text`, `_submit_input`, `send_special_key`, `get_history`, `list_sessions`, `kill_session`, `kill_window`, `session_exists`, `get_pane_working_directory`, `pipe_pane`, `stop_pipe_pane`.
+
+Default-implemented: `_resolve_and_validate_working_directory` lifted from `TmuxClient` (`src/cli_agent_orchestrator/clients/tmux.py:40-115`) and `send_keys()` as the shared paste-then-submit primitive. That matches CAO's current provider contract, including `BaseProvider.paste_enter_count` (`src/cli_agent_orchestrator/providers/base.py:75-85`) and `terminal_service.send_input()` (`src/cli_agent_orchestrator/services/terminal_service.py:288-320`).
+
+`LaunchSpec` is the smallest interface change that cleanly covers the Codex-on-Windows requirement from spike 2b without hard-coding Codex logic into providers or services. Tmux can ignore it for the MVP path and remain shell-first; WezTerm can use it where shell-resolved `codex` is wrong.
+
+## 3. TmuxMultiplexer identity refactor
+This should be mechanical.
+
+Move:
+- `src/cli_agent_orchestrator/clients/tmux.py` implementation into `src/cli_agent_orchestrator/multiplexers/tmux.py` as `class TmuxMultiplexer(BaseMultiplexer)`.
+- Path validation stays byte-for-byte unless typing/docstrings are adjusted (`src/cli_agent_orchestrator/clients/tmux.py:40-115`).
+- Session/window creation logic, env filtering, history capture, list/kill/existence checks, pane CWD lookup, and `pipe-pane` behavior all carry over unchanged (`src/cli_agent_orchestrator/clients/tmux.py:117-430` and remainder).
+
+Stay:
+- `src/cli_agent_orchestrator/clients/tmux.py` becomes a thin compatibility shim:
+
+```python
+from cli_agent_orchestrator.multiplexers.tmux import TmuxMultiplexer
+
+tmux_client = TmuxMultiplexer()
+```
+
+- Existing imports in providers/services remain valid in Phase 3, so the tmux refactor is low-risk and easy to bisect.
+
+Non-trivial behavior changes to call out as risk:
+- `send_special_key()` should grow `literal: bool = False` so Claude's startup handler can stop bypassing the abstraction with raw `tmux send-keys -l "\x1b[B"` (`src/cli_agent_orchestrator/providers/claude_code.py:204-224`). Tmux behavior stays the same; only the route changes.
+- If `create_session()` / `create_window()` gain `launch_spec`, tmux should accept it but default to the old interactive-shell startup so provider init order remains unchanged. Anything more aggressive than that is unnecessary risk for the tmux path.
+
+## 4. WezTermMultiplexer — new
+### Pane/window model mapping
+CAO's external address is still `session_name + window_name`. Internally, `WezTermMultiplexer` should maintain a small registry:
+
+```python
+session_name -> {
+ "workspace": session_name,
+ "window_name" -> {
+ "pane_id": str,
+ "tab_id": str | None,
+ "window_id": str | None,
+ }
+}
+```
+
+Mapping:
+- tmux session -> WezTerm workspace
+- tmux window -> WezTerm pane owned by a dedicated tab or window
+- tmux pane -> WezTerm pane id
+
+For MVP, the simplest stable mapping is one CAO "window" per WezTerm OS window spawned with `--new-window`. It is slightly heavier than tabs, but it matches the spike setup, avoids tab-focus ambiguity, and keeps pane ids isolated. A later optimization can consolidate into tabs once the backend is stable.
+
+### Session/window creation
+`create_session()` and `create_window()` should:
+- validate `working_directory` with the shared base helper
+- resolve the WezTerm executable once up front and raise a clear error if it is unavailable
+- inject `CAO_TERMINAL_ID` plus provider-safe env vars through `wezterm cli spawn`
+- persist the pane id returned by `wezterm cli spawn`
+
+Representative spawn:
+
+```text
+wezterm cli spawn --new-window --cwd
--set-environment CAO_TERMINAL_ID= --
+```
+
+If `launch_spec` is omitted, spawn the backend's default interactive shell so current provider `initialize()` flows keep working. If `launch_spec.argv` is present, spawn that process directly.
+
+### `send_message()` / `send_keys()` two-step flow
+The backend must never treat WezTerm submission as a one-shot paste. The flow should be:
+
+```text
+wezterm cli send-text --pane-id --
+wezterm cli send-text --pane-id --no-paste -- "\r"
+```
+
+Implementation details:
+- `_paste_text()` uses default paste mode so the target TUI sees bracketed paste, matching tmux `paste-buffer -p` semantics (`src/cli_agent_orchestrator/clients/tmux.py:203-242`).
+- `_submit_input()` sends carriage return separately in `--no-paste` mode. Repeated Enters honor `enter_count` from the provider contract (`src/cli_agent_orchestrator/providers/base.py:75-85`).
+- Keep the same small inter-submit delays tmux already needs: ~300 ms after paste, ~500 ms between extra Enters (`src/cli_agent_orchestrator/clients/tmux.py:229-241`).
+
+This is not a WezTerm-only quirk. It is the explicit backend-neutral form of what CAO already does in tmux.
+
+### `get_text()` / `get_history()` and polling
+Use plain `wezterm cli get-text`, not `--escapes`; spike 4 showed plain mode preserves the patterns CAO cares about while `--escapes` breaks Claude trust-prompt matching. `get_history()` should therefore normalize to plain text by default for WezTerm exactly as providers expect today.
+
+For `pipe_pane()` / `stop_pipe_pane()`:
+- implement a background poller per pane instead of a real stream
+- poll every 500 ms per spike 3; that interval saw 0 misses and ~144-207 ms first-detection latency with lower CPU than tighter loops
+- each poll reads `get-text`, diffs against the prior snapshot, and appends only new content to `file_path`
+- `stop_pipe_pane()` cancels the poller and clears its state
+
+This preserves the `terminal_service` contract (`src/cli_agent_orchestrator/services/terminal_service.py:184-188`, `:445-447`) and minimizes churn above the backend.
+
+### Launch command templating and Codex-on-Windows
+WezTerm needs a backend-owned launch template registry, because the correct process to spawn is not always the shell-resolved provider binary. The minimum viable design is:
+
+```python
+def build_launch_spec(
+ provider: str,
+ command_argv: Sequence[str],
+ *,
+ platform: Literal["windows", "unix"],
+ working_directory: str,
+) -> LaunchSpec:
+ ...
+```
+
+Rules:
+- default providers pass through `command_argv`
+- Codex on Windows resolves an explicit shim path, never bare `codex`
+- launch resolution happens in the backend or a tiny shared helper, not inside `terminal_service`
+
+Worked Codex example from spike 2b:
+
+```text
+wezterm cli spawn --new-window --cwd C:\dev\aws-cao -- \
+ C:\Users\marc\scoop\apps\nodejs-lts\current\bin\codex.cmd \
+ -c hooks=[] --yolo --no-alt-screen --disable shell_snapshot
+```
+
+Load-bearing parts:
+- explicit Windows `codex.cmd` path, because shell-domain resolution hit a Linux/WSL wrapper and exited
+- `-c hooks=[]`, because local interactive Codex rejected the user's existing hooks config
+- `--yolo --no-alt-screen --disable shell_snapshot`, which already come from the tmux provider command builder (`src/cli_agent_orchestrator/providers/codex.py:136-142`, `:261-267`)
+
+Phase 3 should implement platform-specific resolver order roughly as:
+
+1. explicit configured override
+2. Windows known shim lookup (`where.exe codex.cmd`, Scoop/Node install candidates)
+3. shell-resolved binary on non-Windows
+
+### Claude trust-prompt handler port
+Do not re-invent the logic in `ClaudeCodeProvider._handle_startup_prompts()`; port it to the abstraction by replacing the tmux bypasses:
+- raw down-arrow currently bypasses the wrapper with `tmux send-keys -l "\x1b[B"` (`src/cli_agent_orchestrator/providers/claude_code.py:204-212`)
+- trust confirmation currently reaches through `tmux_client.server.sessions...pane.send_keys("", enter=True)` (`src/cli_agent_orchestrator/providers/claude_code.py:218-224`)
+
+The plan for Phase 3 is:
+- keep the regexes unchanged
+- replace direct tmux subprocess/libtmux calls with `multiplexer.send_special_key(..., literal=True)` and `multiplexer.send_special_key(..., "Enter")`
+- keep polling against plain `get_history()` output; spike 4 already showed `TRUST_PROMPT_PATTERN` works in plain WezTerm capture
+
+### Error handling
+`WezTermMultiplexer` should fail early and specifically on:
+- WezTerm binary missing or not executable
+- `wezterm cli spawn` returning no pane id
+- pane id no longer present when sending input or reading output
+- poller thread/task already running or missing on stop
+
+These should become actionable exceptions, not silent fallbacks to tmux. The point of the backend split is explicit backend selection and explicit failure.
+
+One related service patch is worth doing in the same phase: replace the `tail -n` subprocess in `inbox_service._get_log_tail()` (`src/cli_agent_orchestrator/services/inbox_service.py:42-55`) with a pure-Python tail reader. Otherwise the WezTerm backend still depends on Unix tooling on Windows.
+
+## 5. Per-provider patches
+### `claude_code.py`
+Inspected:
+- idle / waiting / trust / bypass regexes (`src/cli_agent_orchestrator/providers/claude_code.py:46-52`)
+- startup prompt handler (`src/cli_agent_orchestrator/providers/claude_code.py:180-236`)
+- init snapshot logic (`src/cli_agent_orchestrator/providers/claude_code.py:238-290`)
+- status parser (`src/cli_agent_orchestrator/providers/claude_code.py:326-389`)
+
+Patch judgment:
+- Regex patch: none for MVP. Spike 4 showed `IDLE_PROMPT_PATTERN` and `TRUST_PROMPT_PATTERN` match plain WezTerm capture; `BYPASS_PROMPT_PATTERN` was absent because the settings-based bypass already suppresses it most of the time.
+- Code patch: yes. Remove direct tmux access in `_handle_startup_prompts()` and route both actions through the new multiplexer API. The logic should otherwise stay verbatim.
+
+### `codex.py`
+Inspected:
+- prompt/footer/waiting/trust/welcome patterns (`src/cli_agent_orchestrator/providers/codex.py:18-65`)
+- command builder (`src/cli_agent_orchestrator/providers/codex.py:130-213`)
+- trust prompt handler (`src/cli_agent_orchestrator/providers/codex.py:215-248`)
+- warm-up + init (`src/cli_agent_orchestrator/providers/codex.py:250-281`)
+- status parser (`src/cli_agent_orchestrator/providers/codex.py:283-386`)
+
+Patch judgment:
+- Regex patch: probably none for MVP once launch is fixed. Spike 4's Codex misses were against the crashed process, not a live TUI.
+- Code patch: yes.
+ - keep current regex/status logic unchanged first
+ - replace the trust-prompt Enter path with the multiplexer abstraction instead of `tmux_client.server.sessions...pane.send_keys("", enter=True)` (`src/cli_agent_orchestrator/providers/codex.py:233-240`)
+ - add a WezTerm launch-spec path for Codex-on-Windows so the backend can direct-spawn the explicit shim
+ - keep the warm-up echo for tmux; for WezTerm direct-spawned Codex, skip shell warm-up and wait on welcome/trust markers instead
+
+### `gemini_cli.py`
+Inspected:
+- idle/welcome/responding patterns (`src/cli_agent_orchestrator/providers/gemini_cli.py:63-138`)
+- command builder and `GEMINI.md`/settings writes (`src/cli_agent_orchestrator/providers/gemini_cli.py:191-250` and surrounding method)
+- warm-up/init (`src/cli_agent_orchestrator/providers/gemini_cli.py:417-509`)
+- status parser (`src/cli_agent_orchestrator/providers/gemini_cli.py:520-610`)
+
+Patch judgment:
+- Regex patch: none proposed now. Phase 1 did not get a live Gemini WezTerm capture on this machine, so there is no evidence of regex breakage; spike 03's plain-output finding argues to leave the patterns alone until a real runtime proves otherwise.
+- Backend patch: defer Gemini WezTerm wiring from MVP. Gemini is explicitly allowed to slip by the task brief, and its startup path is already the most stateful provider because it writes `GEMINI.md`, edits `~/.gemini/settings.json`, and distinguishes post-init IDLE vs COMPLETED (`src/cli_agent_orchestrator/providers/gemini_cli.py:163-189`, `:476-592`).
+
+## 6. Test strategy
+Phase 3 verification should have three layers.
+
+Unit and contract tests:
+- Add `test/clients/test_base_multiplexer.py`-style contract tests for the shared `send_keys()` behavior: one paste call, delayed submit, `enter_count` honored.
+- Clone/retarget current tmux tests (`test/clients/test_tmux_client.py`, `test/clients/test_tmux_send_keys.py`) so `TmuxMultiplexer` proves zero regression.
+- Add WezTerm backend unit tests with a mocked CLI runner for spawn/send/get-text/kill and poller diff behavior.
+- Replace `tail` subprocess assumptions in `test/services/test_inbox_service.py` with pure-Python tailing so Windows CI is possible.
+
+Provider unit tests:
+- Re-run existing provider suites with minimal fixture churn:
+ - `test/providers/test_claude_code_unit.py`
+ - `test/providers/test_codex_provider_unit.py`
+ - `test/providers/test_gemini_cli_unit.py`
+ - `test/providers/test_permission_prompt_detection.py`
+- Add WezTerm-specific tests only where provider code changes:
+ - Claude startup prompt handler uses `send_special_key(..., literal=True)` instead of raw tmux subprocess
+ - Codex launch-spec generation on Windows resolves shim + `hooks=[]`
+
+Real smoke tests:
+- gated, opt-in tests on a Windows runner with WezTerm installed
+- at minimum:
+ - spawn shell pane, send text, get text, kill pane
+ - Claude startup/trust prompt acceptance
+ - Codex direct spawn via resolved `codex.cmd`, send pasted text, separate Enter submission
+ - inbox delivery through the poller-backed `pipe_pane()` path at 500 ms
+- Existing E2E paths worth reusing after backend parameterization:
+ - `test/e2e/test_send_message.py`
+ - `test/e2e/test_cross_provider.py`
+ - `test/e2e/test_supervisor_orchestration.py`
+
+The clean parameterization point is the multiplexer singleton import, not the providers. If tests can swap `tmux_client` for a generic `multiplexer_client`, most provider fixtures should survive intact.
+
+## 7. LoC + day estimate
+| Component | Lines added | Lines moved | Days |
+|---|---:|---:|---:|
+| `BaseMultiplexer` + `LaunchSpec` + backend selection shim | 140 | 0 | 0.75 |
+| `TmuxMultiplexer` refactor | 70 | 320 | 0.5 |
+| `WezTermMultiplexer` core spawn/send/get-text/kill/session registry | 260 | 0 | 1.5 |
+| WezTerm poller-backed `pipe_pane` + pure-Python log tailing | 140 | 30 | 0.75 |
+| Claude provider de-tmuxing | 35 | 10 | 0.5 |
+| Codex provider launch-spec + trust-path changes | 80 | 15 | 0.75 |
+| Tests and smoke harness | 220 | 20 | 1.25 |
+| Total, Claude + Codex MVP | 945 | 395 | 6.0 |
+
+Stretch:
+- Gemini WezTerm MVP: +120-180 added LoC, +0.75-1.0 day after the binary/runtime blocker is gone.
+
+## 8. Risks
+1. Per-provider regex drift that spike 03/04 did not hit. Claude plain-output matches are encouraging, but Codex was only validated after fixing launch, and Gemini still lacks live WezTerm proof.
+2. Codex `hooks=[]` shim can go stale if upstream Codex config loading changes. The workaround is explicitly machine-sensitive from spike 2b, so it needs a backend override slot rather than being baked blindly into the generic provider command builder.
+3. WezTerm CLI surface can change across versions. The spike used `wezterm 20260331-040028-577474d8`; Phase 3 should pin the commands used and validate them against at least one more release.
+4. Gemini-on-Windows PATH/runtime remains blocked on the target machine. Even if the backend abstraction is correct, Gemini MVP wiring is not testable until the executable is reachable.
+5. Poller-backed `pipe_pane()` can regress inbox responsiveness or duplicate log output if snapshot diffing is naive. Spike 3 says 500 ms is viable, but the implementation still has to handle redraw-heavy TUIs and pane clears.
+6. Windows-native support is still incomplete if Unix subprocess assumptions survive elsewhere. `inbox_service` calling `tail` is the obvious current example (`src/cli_agent_orchestrator/services/inbox_service.py:42-55`).
+
+## 9. Out of scope
+- Layer 2 marc-hq meta-observer orchestration.
+- Non-tmux, non-WezTerm backends.
+- Gemini WezTerm MVP wiring unless the binary/runtime blocker clears quickly during Phase 3.
+- UX-only tmux attachment commands in CLI/API paths (`attach-session`, `display-message`) beyond what is needed to keep the core agent orchestration working.
+
+## 10. Phase 2 follow-ups (deferred, must fix proper)
+- **`api/main.py::terminal_ws` is tmux-only.** The WebSocket terminal-stream
+ endpoint imports `fcntl`/`pty`/`termios` and shells out to
+ `tmux -u attach-session`, reading `metadata["tmux_session"]` /
+ `metadata["tmux_window"]`. As of the Layer-1 stopgap (this branch) the
+ Unix-only imports are lazy and a `4501` close is returned on Windows so
+ `cao-server` can boot. Proper fix:
+ 1. Route through the `BaseMultiplexer` abstraction (new
+ `attach_stream(terminal_id) -> AsyncIterable[bytes]` or similar).
+ 2. Implement for `TmuxMultiplexer` using the existing PTY+attach path.
+ 3. For `WezTermMultiplexer`: decide between
+ (a) no-op + clean 501 (panes are already the user's GUI, no need to
+ stream) or (b) `wezterm cli get-text` polling fallback for the Web UI.
+ 4. Drop the `sys.platform == "win32"` short-circuit once (1)–(3) land.
+ Tracked as the immediate sequel to current Phase 2 multiplexer work.
diff --git a/docs/multiplexer-api-surface.md b/docs/multiplexer-api-surface.md
new file mode 100644
index 000000000..9901297b2
--- /dev/null
+++ b/docs/multiplexer-api-surface.md
@@ -0,0 +1,223 @@
+# Multiplexer API Surface — Phase 0 Analysis
+
+**Purpose:** Scope the tmux coupling in CAO so Phase 1 can design a `MultiplexerBackend` abstraction and implement a WezTerm backend.
+
+**Date:** 2026-04-24
+**Branch:** `wezterm-multiplexer`
+**Analyst:** Phase 0 / TSK-067
+
+---
+
+## 1. Wrapper Module Location
+
+| Item | Value |
+|------|-------|
+| File | `src/cli_agent_orchestrator/clients/tmux.py` |
+| Class | `TmuxClient` |
+| Module singleton | `tmux_client` (bottom of file) |
+| External dependency | `libtmux` (Python bindings to the tmux socket protocol) |
+
+All callers import the singleton: `from cli_agent_orchestrator.clients.tmux import tmux_client`
+
+---
+
+## 2. API Surface Table
+
+All methods on `TmuxClient` that callers depend on. The private helper `_resolve_and_validate_working_directory` is omitted (it is called only internally, but the path-validation logic is itself a tmux-ism — see section 5).
+
+| # | Method | Signature | Semantic Purpose | Callers (file:line) | Return shape callers depend on | tmux-isms / notes |
+|---|--------|-----------|-----------------|---------------------|-------------------------------|-------------------|
+| 1 | `create_session` | `(session_name, window_name, terminal_id, working_directory=None) -> str` | Create detached multiplexer session with initial window; inject `CAO_TERMINAL_ID` env var | `terminal_service.py:133` | Window name string (may differ from requested name after sanitisation) | Filters provider env vars (CLAUDE*, CODEX_*) from env before passing to `server.new_session`. `detach=True`. Returns `session.windows[0].name`. |
+| 2 | `create_window` | `(session_name, window_name, terminal_id, working_directory=None) -> str` | Add window to existing session | `terminal_service.py:139-141` | Window name string | Injects `CAO_TERMINAL_ID` env via `environment=`. Returns `window.name`. |
+| 3 | `send_keys` | `(session_name, window_name, keys, enter_count=1) -> None` | Send text to pane via paste-buffer trick; appends 1–2 Enters | `terminal_service.py:311-314`; all providers (see §3) | None | **Critical tmux-ism:** uses `load-buffer` + `paste-buffer -p` (bracketed paste, `\x1b[200~…\x1b[201~`) so Ink TUIs don't interpret content as hotkeys. `0.3 s` sleep between paste and Enter. Also sleeps `0.5 s` between multiple Enters. Bypasses direct `send-keys` character-by-character delivery. |
+| 4 | `send_keys_via_paste` | `(session_name, window_name, text) -> None` | Alternative paste path using `libtmux` pane object | No callers in current src (dead code — exists in test suite) | None | Uses `server.cmd("set-buffer")` + `pane.cmd("paste-buffer", "-p")` + `pane.send_keys("C-m", enter=False)`. Functionally identical to `send_keys` but via libtmux objects instead of raw subprocess. |
+| 5 | `send_special_key` | `(session_name, window_name, key) -> None` | Send tmux key name (e.g., `"C-d"`, `"Enter"`, `"Escape"`) without carriage return | `terminal_service.py:364`; `copilot_cli.py:192,195` | None | Uses `pane.send_keys(key, enter=False)` — sends key name in tmux notation. Not bracketed paste. Used for control signals. |
+| 6 | `get_history` | `(session_name, window_name, tail_lines=None) -> str` | Capture pane scrollback (with ANSI escape sequences) | `terminal_service.py:395,404,416-418`; all providers (see §3); `utils/terminal.py:50` | Multiline string with ANSI codes; callers use regex over it | Uses `capture-pane -e -p -S -{lines}`. `-e` preserves escape sequences. Joined with `\n`. Default `TMUX_HISTORY_LINES = 200` lines. |
+| 7 | `list_sessions` | `() -> List[Dict[str, str]]` | Enumerate all multiplexer sessions | `session_service.py:77,90` | List of `{id, name, status}` dicts; `id` == session name | Iterates `server.sessions`; `status` is `"active"` or `"detached"`. |
+| 8 | `get_session_windows` | `(session_name) -> List[Dict[str, str]]` | List windows in a session | Not called in current src (unused — no call site found) | List of `{name, index}` dicts | Iterates `session.windows`. |
+| 9 | `kill_session` | `(session_name) -> bool` | Kill entire session and all windows | `session_service.py:125`; `terminal_service.py:225` | `True` if killed, `False` if not found | Calls `session.kill()`. |
+| 10 | `kill_window` | `(session_name, window_name) -> bool` | Kill one window within a session | `terminal_service.py:453` | `True` if killed, `False` if not found | Calls `window.kill()`. |
+| 11 | `session_exists` | `(session_name) -> bool` | Check whether a named session exists | `terminal_service.py:129,137`; `session_service.py:87,112` | `bool` | Calls `server.sessions.get(session_name=...)` and checks for `None`. |
+| 12 | `get_pane_working_directory` | `(session_name, window_name) -> Optional[str]` | Read the shell's current working directory from pane | `terminal_service.py:278`; `gemini_cli.py:240-241`; `copilot_cli.py:139` | Path string or `None` | Uses `pane.cmd("display-message", "-p", "#{pane_current_path}")`. tmux tracks CWD via OSC 7 or `/proc//cwd`. |
+| 13 | `pipe_pane` | `(session_name, window_name, file_path) -> None` | Stream all pane output to a log file | `terminal_service.py:188` | None | Calls `pane.cmd("pipe-pane", "-o", f"cat >> {file_path}")`. `-o` = only new output (not history). Raw terminal bytes including ANSI/OSC sequences. This is the primary status-detection input for the inbox service (see §6). |
+| 14 | `stop_pipe_pane` | `(session_name, window_name) -> None` | Stop streaming pane output to log file | `terminal_service.py:447` | None | Calls `pane.cmd("pipe-pane")` with no arguments — disables the hook. |
+
+**Total public interface methods: 14**
+(Methods 4 and 8 are present but have no call sites in current source outside of tests.)
+
+---
+
+## 3. All Call Sites — Provider Layer
+
+Providers import `tmux_client` directly and call methods on it during lifecycle operations:
+
+| Provider file | Methods called | Usage |
+|--------------|---------------|-------|
+| `providers/claude_code.py:195,255,258,272,326` | `get_history`, `send_keys` | `initialize()` (snapshot + launch + poll), `get_status()` |
+| `providers/claude_code.py:220` | `tmux_client.server.sessions.get(...)` | **Direct libtmux access** — trust prompt handler bypasses wrapper to get `pane.send_keys("", enter=True)` |
+| `providers/codex.py:225,258,267,285` | `get_history`, `send_keys` | `initialize()`, `get_status()` |
+| `providers/codex.py:235` | `tmux_client.server.sessions.get(...)` | **Direct libtmux access** — trust prompt handler (same pattern as Claude Code) |
+| `providers/gemini_cli.py:240,443,447,465,495,537` | `get_pane_working_directory`, `send_keys`, `get_history` | `_build_gemini_command()` (reads CWD for GEMINI.md), `initialize()` (warmup echo + launch + poll), `get_status()` |
+| `providers/copilot_cli.py:75,139,192,195,274` | `get_history`, `get_pane_working_directory`, `send_special_key`, `send_keys` | History read, CWD for `--add-dir`, key sending, launch |
+| `providers/q_cli.py:58,71` | `send_keys`, `get_history` | `initialize()`, `get_status()` |
+| `providers/kiro_cli.py:167,178,184,212` | `send_keys`, `get_history` | Launch + fallback command, `get_status()` |
+| `providers/opencode_cli.py:144,194` | `send_keys`, `get_history` | Launch, `get_status()` |
+| `providers/kimi_cli.py:344,389` | `send_keys`, `get_history` | Launch, `get_status()` |
+
+**Most-called methods by providers:** `get_history` (every provider), `send_keys` (every provider).
+
+---
+
+## 4. Direct tmux Invocations Bypassing the Wrapper
+
+These are `subprocess.run(["tmux", ...])` calls that do NOT go through `TmuxClient`:
+
+| Location | Command | Reason |
+|----------|---------|--------|
+| `clients/tmux.py:221-224` | `["tmux", "load-buffer", "-b", buf_name, "-"]` | Inside `send_keys()` — the wrapper itself. Part of the bracketed-paste trick that libtmux has no high-level API for. |
+| `clients/tmux.py:226-228` | `["tmux", "paste-buffer", "-p", "-b", buf_name, "-t", target]` | Same — inside the wrapper. |
+| `clients/tmux.py:240-242` | `["tmux", "send-keys", "-t", target, "Enter"]` | Same — inside the wrapper; sends Enter key(s) after paste. |
+| `clients/tmux.py:249-251` | `["tmux", "delete-buffer", "-b", buf_name]` | Same — buffer cleanup inside wrapper. |
+| `providers/claude_code.py:210` | `["tmux", "send-keys", "-t", target, "-l", "\x1b[B"]` | **True bypass:** sends raw Down-arrow escape directly to bypass the selection menu in Claude Code's trust/bypass prompt. `-l` (literal) mode only exists as a raw tmux flag — no libtmux equivalent. |
+| `providers/claude_code.py:212` | `["tmux", "send-keys", "-t", target, "Enter"]` | **True bypass:** companion Enter after the Down-arrow above. |
+| `cli/commands/info.py:27` | `["tmux", "display-message", "-p", "#S"]` | Reads the current session name inside an already-attached tmux session (user's interactive session). Used only for CLI UX, not agent orchestration. |
+| `cli/commands/launch.py:187` | `["tmux", "attach-session", "-t", session_name]` | Attaches the user's terminal to the created session. UX-only, not agent orchestration. |
+| `api/main.py:674` | `["tmux", "-u", "attach-session", "-t", ...]` | Attaches inside a PTY for the WebSocket terminal viewer endpoint. UX-only. |
+
+**Summary of true bypasses (affecting agent I/O):** Lines `claude_code.py:210` and `:212` are the only agent-logic bypass — they send a raw escape sequence (`\x1b[B`, VT100 cursor-down) and Enter to navigate Claude Code's interactive selection UI. This cannot be expressed as a paste-buffer operation.
+
+---
+
+## 5. Supervisor / MCP-Layer Calls
+
+The MCP server (`mcp_server/server.py`) does **not** call `tmux_client` directly. It operates entirely through the HTTP API:
+
+- `handoff()` → `_handoff_impl()` → `_create_terminal()` → `POST /sessions` or `POST /sessions/{name}/terminals` → `terminal_service.create_terminal()` → tmux_client
+- `assign()` → `_assign_impl()` → same path
+- `send_message()` → `_send_to_inbox()` → inbox DB → `inbox_service.check_and_send_pending_messages()` → `terminal_service.send_input()` → `tmux_client.send_keys()`
+
+The inbox service (`services/inbox_service.py`) reads the log file written by `pipe_pane` using `subprocess.run(["tail", "-n", ...])` — not a tmux call, but depends on the file that `pipe_pane` creates.
+
+Status polling inside `handoff()` uses `wait_until_terminal_status()` which calls `GET /terminals/{id}` → `terminal_service.get_terminal()` → `provider.get_status()` → `tmux_client.get_history()`.
+
+---
+
+## 6. `pipe_pane` Deep-Dive
+
+This is the highest-risk component for the WezTerm port.
+
+### What it does
+
+`pipe_pane` is tmux's mechanism to stream a copy of all bytes written to a pane to an external process. CAO uses: `pipe-pane -o "cat >> {file_path}"`.
+
+- `-o` = only output directed to the pane (not history replay)
+- Raw stream including ANSI/OSC escape sequences, carriage returns, overwrite sequences from TUI re-renders
+- Written to `~/.aws/cli-agent-orchestrator/logs/terminal/{terminal_id}.log`
+
+### Who starts it
+
+`terminal_service.create_terminal()` calls `tmux_client.pipe_pane(...)` after provider initialization (line 188). `stop_pipe_pane()` is called in `delete_terminal()`.
+
+### Who reads it
+
+`services/inbox_service.py:_get_log_tail()` reads the last N lines via `tail -n {lines}` subprocess. This is the *fast-path* idle check before doing a more expensive `tmux capture-pane`.
+
+### The two-phase detection pipeline
+
+```
+pipe_pane writes raw output → {terminal_id}.log
+ ↓
+watchdog FileSystemEventHandler triggers on modification
+ ↓
+_get_log_tail() reads last 100 lines via tail(1)
+ ↓
+_has_idle_pattern(): provider.get_idle_pattern_for_log() regex against tail
+ ↓ (if pattern found)
+check_and_send_pending_messages(): provider.get_status() for full check
+ ↓ (if IDLE or COMPLETED)
+terminal_service.send_input() → tmux_client.send_keys()
+```
+
+### Regex patterns consuming the log (per provider)
+
+| Provider | `get_idle_pattern_for_log()` return value | Notes |
+|----------|------------------------------------------|-------|
+| ClaudeCode | `r"[>❯][\s\xa0]"` | Matches both old `>` and new `❯` prompt glyphs |
+| Gemini | `r"\*.*Type your message"` | Asterisk + placeholder text |
+| Kiro | (check `kiro_cli.py`) | TBD — not read in this analysis pass |
+| Q CLI | (check `q_cli.py`) | TBD |
+| Codex | (check `codex.py`) | TBD |
+| Copilot | (check `copilot_cli.py`) | TBD |
+| OpenCode | (check `opencode_cli.py`) | TBD |
+
+### WezTerm risk
+
+WezTerm has no equivalent of `pipe-pane`. The closest analogues are:
+
+1. `wezterm cli get-pane-output --pane-id N` — dumps scrollback, not a live stream. Polling only.
+2. User-defined event hooks in `wezterm.lua` (`wezterm.on("update-status", ...)`) — not per-pane I/O stream.
+3. No documented API for redirecting byte-level pane output to a file.
+
+This means the entire `pipe_pane` → log-file → watchdog → fast-idle-check pipeline must be redesigned for WezTerm. The most viable replacement is periodic polling of `get_history()` (already exists) with a debounced check against the idle pattern — eliminating the watchdog and the log file entirely.
+
+**TBD-spike:** Does replacing `pipe_pane` with a polling loop introduce unacceptable latency for inbox message delivery? The current inbox polling interval is `INBOX_POLLING_INTERVAL = 5` seconds, so a polling approach may be within tolerance.
+
+---
+
+## 7. Open Questions / TBD-Spike Verifies
+
+Items the Phase 1 spike must validate before any implementation is committed:
+
+### High risk
+
+**TBD-spike-1 (bracketed paste):** Does WezTerm's `wezterm cli send-text --no-paste` or equivalent deliver text that bypasses Ink TUI hotkey interception the same way tmux's `paste-buffer -p` does? The `\x1b[200~…\x1b[201~` bracketed-paste protocol is what makes CAO's `send_keys()` safe for interactive TUI apps. WezTerm must either support bracketed-paste injection directly, or the Phase 1 design must find another bypass mechanism.
+
+**TBD-spike-2 (pipe_pane replacement):** Confirm that a polling-based replacement (`get_history()` polled on a timer) provides acceptable latency for inbox message delivery. Measure worst-case delivery lag at the default 5-second polling interval. Decide whether the watchdog-on-log-file pattern can be dropped entirely or needs an alternative event source.
+
+**TBD-spike-3 (raw escape sequences):** Claude Code's trust-prompt handler sends `\x1b[B` (cursor-down) via `tmux send-keys -l` (literal mode). WezTerm's `wezterm cli send-text` does not have a `-l` flag — confirm whether it can inject raw VT sequences and whether the receiving TUI (Ink/React) reacts identically.
+
+### Medium risk
+
+**TBD-spike-4 (env injection on session create):** tmux allows `environment={"CAO_TERMINAL_ID": terminal_id}` on `new_session` and `new_window`. WezTerm's `wezterm cli spawn` supports `--set-environment KEY=VALUE`. Confirm this propagates to the child shell and to subprocesses (MCP servers spawned by the CLI).
+
+**TBD-spike-5 (pane CWD):** `#{pane_current_path}` works because tmux reads CWD from the kernel (`/proc//cwd` on Linux, `PROC_PIDVNODEPATHINFO` on macOS). WezTerm exposes pane CWD via `wezterm cli list --format json` → `cwd` field. Confirm the JSON API is stable and that it works on Windows (where there is no `/proc`).
+
+**TBD-spike-6 (multi-window sessions):** tmux sessions map N windows to one session name. WezTerm uses tabs within a window (or multiple panes within a tab). The naming model is different — confirm that the `session_name:window_name` addressing scheme can be faithfully mapped to WezTerm concepts (e.g., `window_id:pane_id` or `window_id:tab_index`).
+
+**TBD-spike-7 (capture-pane ANSI fidelity):** `capture-pane -e` preserves ANSI SGR sequences but strips many OSC and DCS sequences. `wezterm cli get-pane-output` has its own rendering pipeline. Confirm that provider regex patterns (which are tuned against tmux's `capture-pane -e` output) still match against WezTerm's output format. In particular: does WezTerm normalise or strip the exact Unicode characters providers look for (`❯`, `⏺`, `✦`, `▀`, `▄`, etc.)?
+
+### Low risk
+
+**TBD-spike-8 (detach mode):** tmux `new-session -d` creates a session without attaching. WezTerm always shows a GUI window. For headless server use, a WezTerm `--no-attach` or background spawn mode must be confirmed. (WezTerm does support `wezterm start --no-gui` in some modes — verify.)
+
+---
+
+## 8. Estimated Backend Method Count
+
+The WezTerm `MultiplexerBackend` will need to implement **11 methods** to replace the `TmuxClient` API surface.
+
+Methods 4 (`send_keys_via_paste`) and 8 (`get_session_windows`) have no current callers outside of tests and can be deferred to Phase 2 or dropped. The private `_resolve_and_validate_working_directory` logic is backend-agnostic and should move to the abstract base class.
+
+The 11 required methods map directly to entries 1–3, 5–7, 9–14 in the API surface table (§2).
+
+**Confidence:** High for the count; medium for the complexity estimate. The bracketed-paste replacement (method 3 / `send_keys`) and the `pipe_pane` redesign (method 13 / `pipe_pane`) are likely to require the most non-trivial work.
+
+---
+
+## Appendix: Files Referenced
+
+- `src/cli_agent_orchestrator/clients/tmux.py` — TmuxClient class
+- `src/cli_agent_orchestrator/services/terminal_service.py` — primary tmux_client consumer
+- `src/cli_agent_orchestrator/services/session_service.py` — session-level tmux_client consumer
+- `src/cli_agent_orchestrator/services/inbox_service.py` — pipe_pane log consumer
+- `src/cli_agent_orchestrator/mcp_server/server.py` — MCP tools (handoff, assign, send_message)
+- `src/cli_agent_orchestrator/providers/base.py` — BaseProvider ABC
+- `src/cli_agent_orchestrator/providers/claude_code.py` — direct tmux bypass + libtmux direct access
+- `src/cli_agent_orchestrator/providers/codex.py` — libtmux direct access
+- `src/cli_agent_orchestrator/providers/gemini_cli.py` — get_pane_working_directory usage
+- `src/cli_agent_orchestrator/providers/copilot_cli.py` — send_special_key usage
+- `src/cli_agent_orchestrator/utils/terminal.py` — wait_for_shell calls get_history directly
+- `src/cli_agent_orchestrator/constants.py` — TMUX_HISTORY_LINES, TERMINAL_LOG_DIR, INBOX_POLLING_INTERVAL
+- `src/cli_agent_orchestrator/cli/commands/info.py` — tmux display-message bypass (UX)
+- `src/cli_agent_orchestrator/cli/commands/launch.py` — tmux attach-session bypass (UX)
+- `src/cli_agent_orchestrator/api/main.py` — tmux attach in WebSocket PTY (UX)
diff --git a/pyproject.toml b/pyproject.toml
index 4d63e38de..6676b2a66 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,11 +70,12 @@ markers = [
"asyncio: marks tests that use asyncio",
"integration: marks integration tests",
"e2e: marks end-to-end tests",
- "slow: marks tests as slow (deselect with '-m \"not slow\"')"
+ "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+ "smoke: opt-in tests that require real wezterm + provider CLIs on PATH; not run by default",
]
asyncio_mode = "strict"
testpaths = ["test"]
python_files = "test_*.py"
python_classes = "Test*"
python_functions = "test_*"
-addopts = "--cov=src --cov-report=term-missing -m 'not e2e'"
+addopts = "--cov=src --cov-report=term-missing -m 'not e2e and not smoke'"
diff --git a/spikes/01-result.md b/spikes/01-result.md
new file mode 100644
index 000000000..e0546d8e7
--- /dev/null
+++ b/spikes/01-result.md
@@ -0,0 +1,19 @@
+# Spike 1 Result
+
+- Verdict: **GO**
+- Summary: spawn/send-text/get-text/kill-pane all worked with a standalone WezTerm window.
+- WezTerm binary: `C:\Users\marc\Downloads\WezTerm-windows-20260331-040028-577474d8\wezterm.exe`
+- WezTerm version: `wezterm 20260331-040028-577474d8`
+- Duration: `3312 ms`
+
+## Evidence
+- `spawn` pane id: `17`
+- shell ready marker observed: `True`
+- `send-text` exit code: `0`
+- `get-text` contains marker: `True`
+```text
+SHELL_READY
+marc@mafewin:/mnt/c/Users/marc$ echo hello-from-spike
+hello-from-spike
+marc@mafewin:/mnt/c/Users/marc$
+```
diff --git a/spikes/02-result.md b/spikes/02-result.md
new file mode 100644
index 000000000..dcba487b8
--- /dev/null
+++ b/spikes/02-result.md
@@ -0,0 +1,84 @@
+# Spike 2 Result
+
+- Verdict: **NEEDS-WORKAROUND**
+- Per-CLI verdicts: `claude: neither, codex: neither, gemini: blocked`
+- Mode A: `wezterm cli send-text --no-paste -- '/help\n'`
+- Mode B: `wezterm cli send-text -- '/help\n'`
+
+## Recommendation
+- `claude`: prefer `custom workaround needed`
+- `codex`: prefer `custom workaround needed`
+
+## Evidence
+### claude
+- Status: `fail`
+- Accepted mode: `neither`
+```text
+[A --no-paste]
+ Quick safety check: Is this a project you created or one you trust? (Like your
+ own code, a well-known open source project, or work from your team). If not,
+ take a moment to review what's in this folder first.
+
+ Claude Code'll be able to read, edit, and execute files here.
+
+ Security guide
+
+ ❯ 1. Yes, I trust this folder
+ 2. No, exit
+
+ Enter to confirm · Esc to cancel
+
+[B default paste]
+ Quick safety check: Is this a project you created or one you trust? (Like your
+ own code, a well-known open source project, or work from your team). If not,
+ take a moment to review what's in this folder first.
+
+ Claude Code'll be able to read, edit, and execute files here.
+
+ Security guide
+
+ ❯ 1. Yes, I trust this folder
+ 2. No, exit
+
+ Enter to confirm · Esc to cancel
+```
+### codex
+- Status: `fail`
+- Accepted mode: `neither`
+```text
+[A --no-paste]
+⚠️ Process "codex" in domain "local" didn't exit cleanly
+Exited with code 1.
+This message is shown because exit_behavior="CloseOnCleanExit"
+
+[B default paste]
+⚠️ Process "codex" in domain "local" didn't exit cleanly
+Exited with code 1.
+This message is shown because exit_behavior="CloseOnCleanExit"
+```
+### gemini
+- Status: `blocked`
+- Accepted mode: `blocked`
+```text
+command not installed or not on PATH
+```
+
+## Environment Notes
+- `gemini` could not be tested because the executable is unavailable in this environment.
+
+## Gemini (re-tested after install)
+- Re-test date: `2026-04-24`
+- Status: `blocked`
+- Accepted mode: `blocked`
+```text
+[PATH checks]
+PowerShell: gemini --version
+ The term 'gemini' is not recognized as a name of a cmdlet, function, script file, or executable program.
+
+PowerShell: where.exe gemini
+ INFO: Could not find files for the given pattern(s).
+
+bash: command -v gemini
+
+```
+- Result: the binary is still unavailable on this machine, so spike 2 remains blocked for Gemini.
diff --git a/spikes/02b-codex-launch.md b/spikes/02b-codex-launch.md
new file mode 100644
index 000000000..5a1db8895
--- /dev/null
+++ b/spikes/02b-codex-launch.md
@@ -0,0 +1,85 @@
+# Spike 2b Result
+
+- Verdict: **NEEDS-WORKAROUND**
+- Goal status: `launch fixed, raw send-text submission still unresolved`
+- Working launch command:
+ ```powershell
+ & 'C:\Users\marc\Downloads\WezTerm-windows-20260331-040028-577474d8\wezterm.exe' cli spawn --new-window --cwd C:\dev\aws-cao -- C:\Users\marc\scoop\apps\nodejs-lts\current\bin\codex.cmd -c hooks=[] --yolo --no-alt-screen --disable shell_snapshot
+ ```
+- TUI-ready latency: `2319 ms avg` across `1755 / 2469 / 2734 ms`
+- Send-text verdict: `neither`
+
+## What Worked
+- The pane stayed alive and rendered the Codex TUI when launched via the Windows shim `codex.cmd`.
+- CAO's tmux flags were still necessary: `--yolo --no-alt-screen --disable shell_snapshot`.
+- A one-shot config override `-c hooks=[]` was also necessary on this machine because interactive Codex rejected the local `hooks` config schema during startup.
+
+## Why Naive Spawn Exited
+- `wezterm cli spawn --new-window -- codex` launched inside Marc's default WezTerm shell domain, which is `bash` in a Linux-style environment for this window.
+- In that shell, `codex` resolved to `/mnt/c/.../codex`, then aborted with:
+ `Error: Missing optional dependency @openai/codex-linux-arm64`
+- When forced onto the Windows Codex shim, startup progressed but interactive Codex still aborted unless `-c hooks=[]` was added, due to:
+ `invalid type: map, expected a sequence in hooks`
+
+## Send-Text Probe
+- Mode A: `wezterm cli send-text --pane-id --no-paste -- '/help\n'`
+- Mode B: `wezterm cli send-text --pane-id -- '/help\n'`
+- Result: both modes inserted text into Codex's composer, but neither mode visibly submitted the message or produced command output.
+- Fallback text prompts behaved the same way: the prompt text appeared after `›`, but Codex did not execute it within the observation window.
+
+```text
+[A --no-paste]
+› /help
+ gpt-5.4 default · C:\dev\aws-cao
+
+[B default paste]
+› /help
+ gpt-5.4 default · C:\dev\aws-cao
+```
+
+## Evidence
+### Failing naive launch in WezTerm shell domain
+```text
+file:///mnt/c/Users/marc/scoop/persist/nodejs-lts/bin/node_modules/@openai/codex/bin/codex.js:100
+Error: Missing optional dependency @openai/codex-linux-arm64. Reinstall Codex: npm install -g @openai/codex@latest
+```
+
+### Successful TUI launch with explicit Windows Codex
+```text
+╭─────────────────────────────────────────╮
+│ >_ OpenAI Codex (v0.124.0) │
+│ model: gpt-5.4 /model to change │
+│ directory: C:\dev\aws-cao │
+│ permissions: YOLO mode │
+╰─────────────────────────────────────────╯
+
+⚠ failed to parse hooks config C:\Users\marc\.codex\hooks.json: expected value
+⚠ failed to parse TOML hooks in C:\Users\marc\.codex\config.toml: invalid type: map, expected a sequence
+
+› Summarize recent commits
+ gpt-5.4 default · C:\dev\aws-cao
+```
+
+## WezTerm Backend Construction Diff
+```diff
+--- a/src/cli_agent_orchestrator/providers/codex.py
++++ b/src/cli_agent_orchestrator/multiplexers/wezterm.py
+@@
+- command = shlex.join(["codex", "--yolo", "--no-alt-screen", "--disable", "shell_snapshot"])
++ spawn_argv = [
++ resolve_windows_codex(), # prefer codex.cmd on Windows; avoid bash/WSL shim resolution
++ "-c",
++ "hooks=[]", # local interactive Codex rejected ~/.codex hooks schema on marcwin
++ "--yolo",
++ "--no-alt-screen",
++ "--disable",
++ "shell_snapshot",
++ ]
++ wezterm cli spawn --new-window --cwd --
+```
+
+## Recommendation
+- For WezTerm on Windows, do not rely on shell-resolved `codex`.
+- Resolve the executable explicitly to the Windows shim (`codex.cmd`) before calling `wezterm cli spawn`.
+- Carry forward CAO's existing flags unchanged.
+- Keep a provider/backend-specific workaround slot for local Codex config overrides, because interactive startup can fail before the TUI becomes reachable.
diff --git a/spikes/03-result.md b/spikes/03-result.md
new file mode 100644
index 000000000..2e2399d20
--- /dev/null
+++ b/spikes/03-result.md
@@ -0,0 +1,39 @@
+# Spike 3 Result
+
+- Verdict: **GO**
+- Recommended interval: `500 ms`
+
+## Measurements
+
+| Interval | First detection (ms) | CPU % | Poll count | Miss count |
+|---|---:|---:|---:|---:|
+| 100 ms | 152.7 | 2.04 | 23 | 0 |
+| 200 ms | 207.3 | 3.64 | 13 | 0 |
+| 500 ms | 144.2 | 0.83 | 16 | 0 |
+
+## Raw JSON
+```json
+[
+ {
+ "interval_ms": 100,
+ "first_detection_ms": 152.7,
+ "cpu_percent": 2.04,
+ "polls": 23,
+ "miss_count": 0
+ },
+ {
+ "interval_ms": 200,
+ "first_detection_ms": 207.3,
+ "cpu_percent": 3.64,
+ "polls": 13,
+ "miss_count": 0
+ },
+ {
+ "interval_ms": 500,
+ "first_detection_ms": 144.2,
+ "cpu_percent": 0.83,
+ "polls": 16,
+ "miss_count": 0
+ }
+]
+```
diff --git a/spikes/04-result.md b/spikes/04-result.md
new file mode 100644
index 000000000..5c9446768
--- /dev/null
+++ b/spikes/04-result.md
@@ -0,0 +1,103 @@
+# Spike 4 Result
+- Verdict: **NEEDS-WORKAROUND**
+- Summary: `claude: missing BYPASS_PROMPT_PATTERN; codex: missing IDLE_PROMPT_PATTERN, TRUST_PROMPT_PATTERN, WAITING_PROMPT_PATTERN, CODEX_WELCOME_PATTERN; gemini: blocked`
+
+
+## claude
+- Source: `src\cli_agent_orchestrator\providers\claude_code.py`
+- `IDLE_PROMPT_PATTERN` = `[>❯][\s\xa0]`
+- `TRUST_PROMPT_PATTERN` = `Yes, I trust this folder`
+- `BYPASS_PROMPT_PATTERN` = `Yes, I accept`
+- Plain capture length: `504`
+- Escaped capture length: `1037`
+
+| Pattern | Plain | `--escapes` |
+|---|---|---|
+| `IDLE_PROMPT_PATTERN` | `True` | `True` |
+| `TRUST_PROMPT_PATTERN` | `True` | `False` |
+| `BYPASS_PROMPT_PATTERN` | `False` | `False` |
+
+```text
+────────────────────────────────────────────────────────────────────────────────
+ Accessing workspace:
+
+ C:\dev\aws-cao
+
+ Quick safety check: Is this a project you created or one you trust? (Like your
+ own code, a well-known open source project, or work from your team). If not,
+ take a moment to review what's in this folder first.
+
+ Claude Code'll be able to read, edit, and execute files here.
+
+ Security guide
+
+ ❯ 1. Yes, I trust this folder
+ 2. No, exit
+
+ Enter to confirm · Esc to cancel
+```
+
+## codex
+- Source: `src\cli_agent_orchestrator\providers\codex.py`
+- `IDLE_PROMPT_PATTERN` = `(?:❯|›|codex>)`
+- `TRUST_PROMPT_PATTERN` = `allow Codex to work in this folder`
+- `WAITING_PROMPT_PATTERN` = `^(?:Approve|Allow)\b.*\b(?:y/n|yes/no|yes|no)\b`
+- `CODEX_WELCOME_PATTERN` = `OpenAI Codex`
+- Plain capture length: `161`
+- Escaped capture length: `232`
+
+| Pattern | Plain | `--escapes` |
+|---|---|---|
+| `IDLE_PROMPT_PATTERN` | `False` | `False` |
+| `TRUST_PROMPT_PATTERN` | `False` | `False` |
+| `WAITING_PROMPT_PATTERN` | `False` | `False` |
+| `CODEX_WELCOME_PATTERN` | `False` | `False` |
+
+```text
+⚠️ Process "codex" in domain "local" didn't exit cleanly
+Exited with code 1.
+This message is shown because exit_behavior="CloseOnCleanExit"
+```
+
+## gemini
+- Source: `src\cli_agent_orchestrator\providers\gemini_cli.py`
+- `IDLE_PROMPT_PATTERN` = `\*\s+Type your message`
+- `WELCOME_BANNER_PATTERN` = `█████████.*██████████`
+- `RESPONDING_WITH_PATTERN` = `Responding with\s+\S+`
+- Runtime probe: blocked; `gemini` executable unavailable.
+## Candidate Regex Patch Notes
+```diff
+--- a/src/cli_agent_orchestrator/providers/claude_code.py
++++ b/src/cli_agent_orchestrator/providers/claude_code.py
+@@
+-# Existing WezTerm probe did not match: BYPASS_PROMPT_PATTERN
++# Phase 2: either normalize WezTerm startup text or broaden these regexes: BYPASS_PROMPT_PATTERN
+```
+
+```diff
+--- a/src/cli_agent_orchestrator/providers/codex.py
++++ b/src/cli_agent_orchestrator/providers/codex.py
+@@
+-# Existing WezTerm probe did not match: IDLE_PROMPT_PATTERN, TRUST_PROMPT_PATTERN, WAITING_PROMPT_PATTERN, CODEX_WELCOME_PATTERN
++# Phase 2: either normalize WezTerm startup text or broaden these regexes: IDLE_PROMPT_PATTERN, TRUST_PROMPT_PATTERN, WAITING_PROMPT_PATTERN, CODEX_WELCOME_PATTERN
+```
+
+## Gemini (re-tested after install)
+- Re-test date: `2026-04-24`
+- Source: `src\cli_agent_orchestrator\providers\gemini_cli.py`
+- `IDLE_PROMPT_PATTERN` = `\*\s+Type your message`
+- `WELCOME_BANNER_PATTERN` = `█████████.*██████████`
+- `RESPONDING_WITH_PATTERN` = `Responding with\s+\S+`
+- Runtime probe: still blocked; `gemini` is not available from PowerShell, `where.exe`, or `bash`.
+
+```text
+PowerShell: gemini --version
+ The term 'gemini' is not recognized as a name of a cmdlet, function, script file, or executable program.
+
+PowerShell: where.exe gemini
+ INFO: Could not find files for the given pattern(s).
+
+bash: command -v gemini
+
+```
+
diff --git a/spikes/SUMMARY.md b/spikes/SUMMARY.md
new file mode 100644
index 000000000..4a42768a9
--- /dev/null
+++ b/spikes/SUMMARY.md
@@ -0,0 +1,9 @@
+# WezTerm Phase 1 Spike Summary
+
+| # | Spike | Verdict | Key finding | Phase 2 implication |
+|---|---|---|---|---|
+| 1 | `send-text` + `get-text` round-trip | GO | `wezterm cli spawn/send-text/get-text/kill-pane` works on marcwin when using the local WezTerm binary and waiting for a shell-ready marker. | The substrate is viable; backend work can proceed. |
+| 2 | Paste-mode behavior in AI CLIs | NEEDS-WORKAROUND | Claude still stayed on its trust prompt under both paste modes; Codex now launches with an explicit Windows shim but raw `send-text` still only populates the composer; Gemini is still unavailable on this machine. | Phase 2 still needs per-provider startup and submission handling; raw `send-text` alone is not sufficient for Codex and no default paste mode can be chosen globally yet. |
+| 2b | Codex launch args | NEEDS-WORKAROUND | Codex stayed alive only when WezTerm spawned `codex.cmd -c hooks=[] --yolo --no-alt-screen --disable shell_snapshot`; naive `codex` hit the bash/WSL shim and exited, but post-launch `send-text` still did not submit. | The WezTerm backend should resolve the Windows Codex shim explicitly and preserve CAO's flags, but another mechanism is still needed to submit input after text insertion. |
+| 3 | Polling latency for `pipe_pane` replacement | GO | `wezterm cli get-text` polling saw all 10/10 burst markers at 100/200/500 ms with first-detection latencies of 152.7 ms, 207.3 ms, and 144.2 ms respectively. | Replacing `pipe-pane` with polling is feasible; start with 500 ms for lower CPU and tune if inbox responsiveness needs more aggression. |
+| 4 | `get-text` regex compatibility | NEEDS-WORKAROUND | Claude trust text matches in plain `get-text` output, but not consistently in `--escapes`; Codex startup is now understood as a shell/config issue rather than a pure regex issue; Gemini is still unavailable on this machine. | Phase 2 should normalize plain `get-text` output first, fix Codex launch path separately from regex handling, and defer Gemini-specific regex validation until the binary is actually reachable. |
diff --git a/spikes/TSK-068-prompt.md b/spikes/TSK-068-prompt.md
new file mode 100644
index 000000000..573975f02
--- /dev/null
+++ b/spikes/TSK-068-prompt.md
@@ -0,0 +1,107 @@
+# Spike batch — Phase 1 of PRJ-042 (aws-cao WezTerm port)
+
+You are executing Phase 1 of a fork of `awslabs/cli-agent-orchestrator` (CAO). You have no prior conversation context — this prompt is fully self-contained.
+
+## Background
+
+CAO is AWS Labs' CLI Agent Orchestrator: a supervisor AI CLI spawns worker AI CLIs (Claude Code, Codex, Gemini CLI) inside tmux windows, drives them via `tmux send-keys` + `tmux capture-pane`, and preserves each worker's TUI for interactive dialog. It's tmux-only and has no Windows support.
+
+We're porting it to run on Windows by replacing tmux with WezTerm (which has a CLI usable from Windows/macOS/Linux). Phase 0 (already complete) produced `docs/multiplexer-api-surface.md` enumerating CAO's `tmux_client` API surface — read that file first, it's the ground truth for what the WezTerm backend must implement.
+
+Your job NOW is Phase 1: validate four unknowns about WezTerm's CLI before any abstraction work starts. Each spike is a throwaway script + a result markdown file with a binary verdict.
+
+## Repo state
+
+- Working dir: `C:\dev\aws-cao`
+- Branch: `wezterm-multiplexer` (already checked out)
+- Phase 0 deliverable: `C:\dev\aws-cao\docs\multiplexer-api-surface.md` (READ THIS FIRST)
+- Spike workdir: `C:\dev\aws-cao\spikes\` — create scripts and results here
+- Platform: Windows (marcwin), Git Bash for shell, WezTerm running as the GUI terminal
+- WezTerm CLI is on PATH as `wezterm` (verify with `wezterm --version`)
+- AI CLIs available on PATH for spike 2: `claude`, `codex`, `gemini` (verify each with `--version`)
+
+## Constraints (HARD)
+
+- Only modify files under `C:\dev\aws-cao\spikes\` and `C:\dev\aws-cao\docs\` — do NOT touch source files under `src/`
+- Each spike commits independently with a clear message: `spike(N): — `
+- Use `rtk` prefix on git commands (e.g., `rtk git add`, `rtk git commit`, `rtk git push`) per the user's CLAUDE.md
+- Push to origin/wezterm-multiplexer when all 4 spikes are done
+- Spike scripts are throwaway — bash or PowerShell or Python, whichever is fastest. Don't engineer them.
+- DO NOT install any dependencies beyond what's already on the system
+- DO NOT modify wezterm config (`wezterm.lua`)
+- If a spike needs to create wezterm panes, prefer `--new-window` so they're isolated from the user's working panes (less disruptive); kill panes on exit
+
+## The 4 spikes
+
+### Spike 1 — WezTerm send-text + get-text round-trip
+**Question:** Does the WezTerm CLI work at all on marcwin for our use case?
+**Test:**
+1. `wezterm cli spawn --new-window -- bash` — capture the new pane-id from stdout
+2. `wezterm cli send-text --pane-id --no-paste -- 'echo hello-from-spike\n'`
+3. Wait 500ms
+4. `wezterm cli get-text --pane-id ` — verify "hello-from-spike" appears in output
+5. `wezterm cli kill-pane --pane-id `
+**Result file:** `spikes/01-result.md`
+**Verdict:** GO / NO-GO / NEEDS-WORKAROUND
+**If NO-GO, abort the rest** — substrate is broken, no point continuing.
+
+### Spike 2 — Paste-mode behavior with each AI CLI's TUI
+**Question:** Will `wezterm cli send-text` correctly deliver input to Ink-based TUIs (Claude Code, Codex, Gemini)? CAO's tmux uses paste-buffer + `paste-buffer -p` to wrap text in bracketed-paste sequences (`\x1b[200~ ... \x1b[201~`) which bypasses TUI hotkey interception. WezTerm needs an equivalent.
+**Test (per CLI in {claude, codex, gemini}):**
+1. Spawn the CLI in a new wezterm pane (e.g., `wezterm cli spawn --new-window -- claude`)
+2. Wait for the TUI to fully render (~3 seconds — adjust if needed)
+3. Try sending `/help\n` two ways:
+ - **A:** `wezterm cli send-text --pane-id --no-paste -- '/help\n'`
+ - **B:** `wezterm cli send-text --pane-id -- '/help\n'` (default, which IS bracketed-paste in WezTerm)
+4. After each, wait 2s, capture pane via get-text, check whether the slash command was accepted (look for help output)
+5. Kill the pane between attempts to start clean
+**Result file:** `spikes/02-result.md`
+**Verdict:** Per-CLI table: `{claude: A|B|both|neither, codex: ..., gemini: ...}` plus a recommended default for the WezTerm backend
+**If `neither` for any CLI:** that's NEEDS-WORKAROUND — document the failure mode (e.g., "claude eats `/` because of input mode X")
+
+### Spike 3 — Polling latency for `pipe_pane` substitute
+**Question:** WezTerm has no continuous-stream-to-file equivalent. CAO uses `tmux pipe-pane` to log all pane output to a file, then a watchdog watches the file for state-detection patterns. We need to replace this with polling on `wezterm cli get-text`. Is polling fast enough?
+**Test:**
+1. Spawn a wezterm pane running `bash`
+2. Start a Python (or bash) loop that calls `wezterm cli get-text --pane-id ` every 100ms / 200ms / 500ms (run three trials)
+3. While polling, send a known marker: `wezterm cli send-text --pane-id --no-paste -- 'echo SPIKE-MARKER-$(date +%N)\n'`
+4. Measure: (a) time-to-first-detection of the marker (ms after send-text returned), (b) CPU% of the polling loop (rough — `Get-Process` snapshot or `time` if Python), (c) any output that get-text *missed* between polls (send 10 markers in quick succession with `sleep 0.05` between, then verify all 10 appear in the polled buffer)
+5. Repeat at all three intervals
+**Result file:** `spikes/03-result.md`
+**Verdict:** GO with recommended interval / NEEDS-WORKAROUND (specify the WezTerm Lua hook fallback design if polling is too slow)
+**Required:** concrete numbers (latency in ms, CPU%, miss-count)
+
+### Spike 4 — `get-text` ANSI / regex compatibility
+**Question:** CAO providers detect state with regexes like `_permission_prompt_pattern = r'Allow this action\? \[y/n/t\]:'` and idle patterns. These were tuned against `tmux capture-pane -p` output. Does `wezterm cli get-text` produce text that matches the same patterns? ANSI escape handling may differ.
+**Test:**
+1. Read the actual regex patterns from `src/cli_agent_orchestrator/providers/claude_code.py`, `codex.py`, `gemini_cli.py` — extract the idle pattern and any prompt patterns
+2. For each of {claude, codex, gemini}: spawn the CLI in a wezterm pane, do something that triggers each pattern (idle = wait after spawn; permission prompt = ask the CLI to do something it'll prompt for; for codex use a path it doesn't have access to so it asks)
+3. Capture pane output via `wezterm cli get-text --pane-id ` and run the regexes against it
+4. Compare against `wezterm cli get-text --pane-id --escapes` (raw ANSI) if needed to understand any normalization
+**Result file:** `spikes/04-result.md`
+**Verdict:** Per-CLI per-pattern table: `{claude.idle: matches, claude.permission: matches with patch X, codex.idle: ...}` — list any regex patches needed, formatted as a unified diff snippet ready for Phase 2
+
+## Reporting
+
+After all 4 spikes complete:
+
+1. Each `spikes/0N-result.md` exists with verdict + evidence
+2. Final commit + push to `origin/wezterm-multiplexer`
+3. Write `spikes/SUMMARY.md` with a 4-row table:
+
+| # | Spike | Verdict | Key finding | Phase 2 implication |
+|---|---|---|---|---|
+
+4. Print to stdout (so it lands in the codex exec output file):
+ - One-line per-spike verdict
+ - The single biggest risk for Phase 2 implementation
+ - Anything that surprised you and the doc didn't anticipate
+
+## Order
+
+1. Spike 1 (gating — abort all if NO-GO)
+2. Spike 2, 3, 4 — these are independent, run in any order, parallelize internally if comfortable
+3. SUMMARY.md
+4. Commit + push everything
+
+Begin.
diff --git a/spikes/TSK-069-prompt.md b/spikes/TSK-069-prompt.md
new file mode 100644
index 000000000..9b105151c
--- /dev/null
+++ b/spikes/TSK-069-prompt.md
@@ -0,0 +1,114 @@
+# TSK-069 — Phase 2 implementation plan for PRJ-042
+
+You are Codex, running in `codex exec --yolo --skip-git-repo-check` mode with no prior conversation context. Execute this task end-to-end: read inputs, write the plan, commit, push. Everything you need is in this prompt and in the repo at your CWD (`C:\dev\aws-cao`, branch `wezterm-multiplexer`).
+
+## Background (why this exists)
+
+PRJ-042 ports AWS's `cli-agent-orchestrator` (CAO) from tmux-only to a pluggable multiplexer backend with **WezTerm as the first non-tmux target**. Motivation: CAO's tmux dependency blocks Windows-native use and rules out rich-TUI agents (Claude Code, Codex, Gemini CLI) whose interactive panes don't survive tmux. WezTerm's CLI gives us the same primitives (spawn panes, send text, capture output) with native Windows support and no alt-screen interference.
+
+- **Phase 0 (TSK-067, DONE):** catalogued CAO's multiplexer API surface — 14 methods, 11 active. See `docs/multiplexer-api-surface.md`.
+- **Phase 1 (TSK-068, DONE):** four spikes validated WezTerm CLI as substrate. Results in `spikes/*-result.md` + `spikes/SUMMARY.md`. Verdict: GO.
+- **Phase 1b (TSK-070, DONE):** follow-up Codex-on-Windows launch shim + send-text-doesn't-submit findings. See `spikes/02b-codex-launch.md`.
+- **Phase 2 (THIS TASK):** design the actual implementation — `BaseMultiplexer` interface, `TmuxMultiplexer` refactor of existing code, new `WezTermMultiplexer`, per-provider regex patches.
+- **Phase 3 (future):** implement Phase 2 plan.
+
+Architecture is locked: **per-project Claude Code session acts as supervisor**, using CAO to drive ephemeral Codex/Gemini workers in WezTerm panes. Supervisor handles routing + dispatch; workers are stateless. No marc-hq-level meta-observer in this PRJ.
+
+## Inputs you MUST read before writing the plan
+
+All paths relative to `C:\dev\aws-cao` (your CWD):
+
+**Design inputs:**
+- `docs/multiplexer-api-surface.md` — the 14-method surface you must generalize
+- `spikes/SUMMARY.md` — Phase 1 rollup
+- `spikes/01-result.md` — spawn + basic send-text
+- `spikes/02-result.md` — Claude/Codex/Gemini launch behavior
+- `spikes/02b-codex-launch.md` — Codex-on-Windows shim (CRITICAL for the WezTerm backend)
+- `spikes/03-result.md` — get-text output format + regex compat
+- `spikes/04-result.md` — polling latency (500ms interval, detection latency)
+- `spikes/TSK-068-prompt.md`, `spikes/TSK-070-prompt.md` — prior prompts for style reference
+
+**Source inputs (CAO today):**
+- `src/cli_agent_orchestrator/clients/tmux.py` — the current multiplexer client (this is what gets split into BaseMultiplexer + TmuxMultiplexer)
+- `src/cli_agent_orchestrator/clients/providers/claude_code.py` — especially `_handle_startup_prompts()` (trust prompt, must port verbatim to WezTerm backend)
+- `src/cli_agent_orchestrator/clients/providers/codex.py`
+- `src/cli_agent_orchestrator/clients/providers/gemini_cli.py`
+
+Read enough of each provider to identify regexes / state-detection patterns that assume tmux output format. The handoff hypothesis is that plain `wezterm cli get-text` output is compatible (validated in spike 03), but per-provider patches may still be needed.
+
+## Key constraints from Phase 1 findings
+
+These MUST be reflected in the plan:
+
+1. **`send_message()` is a two-step primitive on WezTerm.** `wezterm cli send-text` populates the composer but does NOT submit. The backend must: (a) paste text body, (b) inject Enter separately (`wezterm cli send-text $'\r'` or `--no-paste` + key injection). This mirrors CAO's existing tmux `paste-buffer` + `send-keys C-m` split — generalize the two-step pattern into the base interface, not a WezTerm-only hack.
+
+2. **Codex-on-Windows launch requires a shim:**
+ ```
+ wezterm cli spawn --new-window --cwd -- \
+ C:\Users\marc\scoop\apps\nodejs-lts\current\bin\codex.cmd \
+ -c hooks=[] --yolo --no-alt-screen --disable shell_snapshot
+ ```
+ The `codex.cmd` path, `hooks=[]`, `--no-alt-screen`, and `--disable shell_snapshot` are all load-bearing. Plan must account for a per-provider launch-command template and a Windows-vs-Unix path resolver.
+
+3. **Claude trust prompt:** port `_handle_startup_prompts()` from `claude_code.py` to work against the WezTerm backend unchanged. Verify the regex still matches `get-text` output.
+
+4. **Polling:** 500ms interval, 0 missed markers at 10-message bursts, 144-207ms detection latency. Adequate replacement for tmux `pipe-pane`. Plan should specify WezTermMultiplexer uses periodic `get-text` diffs instead of a streaming pipe.
+
+5. **Regex compat:** use plain `get-text` mode, NOT `--escapes`. Existing CAO regexes work against plain output.
+
+6. **Gemini not on PATH** on marcwin; wiring Gemini is stretch, not MVP. Plan may defer Gemini backend integration.
+
+## Deliverable — `docs/PLAN-phase2.md`
+
+Structure:
+
+### 1. Executive summary (≤10 lines)
+One paragraph: what Phase 2 delivers, rough LoC and day estimate, main risks.
+
+### 2. BaseMultiplexer interface
+Full method signatures with docstrings. Derived from `docs/multiplexer-api-surface.md` — same 11 active methods, but with any necessary generalizations (e.g., two-step submit, launch-command templating). Call out which methods are abstract vs. default-implemented.
+
+### 3. TmuxMultiplexer identity refactor
+How `clients/tmux.py` becomes `TmuxMultiplexer(BaseMultiplexer)` with behavior unchanged. What moves, what stays. Should be mechanical — explicitly flag any non-trivial behavior change as a risk.
+
+### 4. WezTermMultiplexer — new
+Concrete method-by-method design:
+- Pane/window model mapping (tmux session/window/pane → wezterm workspace/tab/pane)
+- `send_message()` two-step flow with exact commands
+- `get_text()` buffer retrieval + polling loop
+- Launch command templating with the Codex-on-Windows shim as a worked example
+- Claude trust-prompt handler port (reuse vs. re-implement)
+- Error handling for unavailable `wezterm` binary
+
+### 5. Per-provider patches
+For each of claude_code.py / codex.py / gemini_cli.py: list the regexes or state-detection calls that were inspected, state whether they need patches for the WezTerm backend, and if so what. Based on spike 03 most should pass through unchanged — explicitly say so where applicable.
+
+### 6. Test strategy
+How Phase 3 verifies this. Real-WezTerm smoke tests? Mocked multiplexer tests? Existing CAO test harness — does it parameterize cleanly?
+
+### 7. LoC + day estimate
+Table: component → lines added / lines moved / days. Be honest: solo maintainer, Windows primary, Claude + Codex MVP only.
+
+### 8. Risks
+Ranked list. Must include at minimum: (a) per-provider regex drift not caught in spike 03, (b) Codex `hooks=[]` shim becoming stale if upstream config moves, (c) Gemini-on-Windows-PATH blocker, (d) WezTerm CLI surface changes across versions.
+
+### 9. Out of scope (explicit)
+Layer 2 marc-hq meta-observer. Non-WezTerm non-tmux backends. Gemini MVP wiring if you judged it stretch.
+
+## Style rules
+
+- English. Markdown. Code blocks for commands and signatures.
+- No ceremony, no roadmap-bureaucracy bullet lists. Terse and load-bearing.
+- If a design question genuinely needs human input, park it in a "Decisions deferred" section at the end with options — don't make up an answer.
+- Cite files by path + line number where it sharpens a claim.
+
+## Workflow
+
+1. Read every file listed under **Inputs** above.
+2. Write `docs/PLAN-phase2.md`.
+3. `git add docs/PLAN-phase2.md spikes/TSK-069-prompt.md`
+4. `git commit -m "docs(multiplexer): Phase 2 implementation plan (TSK-069)"`
+5. `git push origin wezterm-multiplexer`
+6. Print a one-paragraph summary of the plan and the commit SHA. Done.
+
+You are on branch `wezterm-multiplexer` already. Don't create a new branch. Don't open a PR — #206 is already open and tracks this branch.
diff --git a/spikes/TSK-070-prompt.md b/spikes/TSK-070-prompt.md
new file mode 100644
index 000000000..5b3d69cf3
--- /dev/null
+++ b/spikes/TSK-070-prompt.md
@@ -0,0 +1,91 @@
+# TSK-070 — Spike 2b (Codex launch args) + Gemini re-check
+
+You are executing TSK-070, Phase 1b of PRJ-042 (aws-cao WezTerm port). You have no prior conversation context; this prompt is fully self-contained.
+
+## Context
+
+TSK-068 ran 4 spikes testing WezTerm CLI as a replacement for tmux in CAO (`awslabs/cli-agent-orchestrator`). Results are at `C:\dev\aws-cao\spikes\01-result.md` through `04-result.md` + `SUMMARY.md`. Substrate verdict: GO (spikes 1, 3). Two unknowns remain:
+
+1. **Codex launch exits immediately.** `wezterm cli spawn --new-window -- codex` produced `"Process codex in domain local didn't exit cleanly. Exited with code 1."` before the TUI rendered. Spikes 2 and 4 couldn't get Codex into a testable state.
+2. **Gemini was not installed** when TSK-068 ran. It's now installed on marcwin (`gemini --version` should work).
+
+Close both before Phase 2 planning.
+
+## Working dir & branch
+
+- `C:\dev\aws-cao` — already on branch `wezterm-multiplexer`
+- Phase 0 deliverable: `docs/multiplexer-api-surface.md`
+- Phase 1 outputs: `spikes/01-result.md` … `04-result.md`, `SUMMARY.md`
+- CAO's existing Codex provider source (the ground truth for Codex-under-tmux args): `src/cli_agent_orchestrator/providers/codex.py`
+
+## Part A — Spike 2b: Codex launch args under wezterm
+
+**Goal:** find the exact command line that keeps `codex` alive in a wezterm pane long enough to accept a slash command via `wezterm cli send-text`.
+
+**Investigation steps:**
+
+1. **Read CAO's Codex provider.** Open `src/cli_agent_orchestrator/providers/codex.py`. Find the method that starts codex under tmux (likely `initialize()` or similar). Note the exact command it constructs — flags, agent profile injection, initial system prompt, `--yolo`, anything else. Also check how it handles startup prompts (trust dialog, etc.).
+
+2. **Reproduce those args under `wezterm cli spawn`.** Start with the CAO-equivalent command, spawn in a new wezterm window, observe what happens. Examples to try (adapt based on step 1 findings):
+ - `wezterm cli spawn --new-window -- codex --yolo`
+ - `wezterm cli spawn --new-window -- codex --yolo "Hello from wezterm"` (initial prompt)
+ - Wrap in a shell to keep the pane alive: `wezterm cli spawn --new-window -- bash -lc "codex --yolo; exec bash"` (so if codex exits, the pane persists)
+ - Try without `--new-window` in case of window-related issues
+ - Check codex's own help: `codex --help` to discover flags CAO might be using
+
+3. **Once codex stays alive,** measure:
+ - Time from spawn to TUI-ready state (ms) — detect by polling `wezterm cli get-text` for codex's prompt pattern
+ - Does `wezterm cli send-text --pane-id --no-paste -- '/help\n'` produce visible output?
+ - Does `wezterm cli send-text --pane-id -- '/help\n'` (bracketed-paste mode) produce visible output?
+ - Note: codex may NOT have a `/help` — try whatever is the simplest testable slash command (maybe `/status` or just a regular text prompt like "say hello")
+
+4. **Extract the pattern.** What did CAO's codex provider do differently from a naive `codex` invocation? How should the WezTerm backend construct its codex-spawn command?
+
+**Deliverable:** `spikes/02b-codex-launch.md` with:
+- The exact command line that works (verbatim, copy-pasteable)
+- Why the naive `codex` spawn exited (your best hypothesis from the investigation)
+- TUI-ready latency in ms
+- Send-text verdict (A/B/both/neither) — same shape as spike 2
+- A diff snippet showing how `WezTermMultiplexer` should construct codex's spawn command, vs what tmux currently does. Pattern it on CAO's existing provider code.
+
+## Part B — Gemini re-check
+
+**Goal:** now that gemini is installed, fill the gaps in spikes 2 and 4 for gemini only.
+
+**Steps:**
+
+1. Verify gemini is on PATH: `gemini --version`
+2. **Spike 2 for gemini:** spawn gemini in a wezterm pane (use `--new-window`), wait for TUI, try both send-text modes (`--no-paste` and default) with a slash command gemini supports (check `gemini --help` first — might be `/help`, might be `/` something else).
+3. **Spike 4 for gemini:** capture gemini's TUI output via `wezterm cli get-text`, extract gemini's idle and permission-prompt regex patterns from `src/cli_agent_orchestrator/providers/gemini_cli.py`, check whether those regexes match the captured text.
+4. **Append** (don't rewrite) a new section `## Gemini (re-tested after install)` to each of `spikes/02-result.md` and `spikes/04-result.md`. Include the same evidence shape as the other providers.
+
+## Part C — Update SUMMARY.md
+
+After Parts A and B are complete, update `spikes/SUMMARY.md`:
+- Change spike 2's verdict from NEEDS-WORKAROUND to the real post-investigation verdict (GO / NEEDS-WORKAROUND with specifics)
+- Change spike 4's verdict similarly
+- Add a new row for "2b — Codex launch args" with its own verdict
+- Update the "Phase 2 implication" column per spike
+
+## Constraints (HARD)
+
+- Only modify files under `C:\dev\aws-cao\spikes\` — do NOT touch `src/`
+- Commit each finding atomically with `rtk git` prefix:
+ - `spike(2b): — ` (the 02b-codex-launch.md file)
+ - `spike(2): add gemini post-install findings` (update 02-result.md)
+ - `spike(4): add gemini post-install findings` (update 04-result.md)
+ - `spike(summary): incorporate 2b and gemini findings`
+- Push to origin/wezterm-multiplexer at the end
+- DO NOT install packages or modify wezterm config
+- If codex still doesn't cooperate after you've exhausted reasonable options (try ~5 approaches), stop and document findings — don't chase it for hours
+- Prefer `--new-window` for all spawned panes to keep Marc's working panes undisturbed; kill panes when done
+
+## Reporting
+
+Print a tight summary to stdout at the end:
+- Spike 2b verdict + the working codex command line
+- Gemini: spike 2 verdict, spike 4 verdict
+- Updated overall SUMMARY.md snapshot (the 5-row table)
+- Anything surprising
+
+Begin.
diff --git a/spikes/TSK-071-prompt.md b/spikes/TSK-071-prompt.md
new file mode 100644
index 000000000..f845ac770
--- /dev/null
+++ b/spikes/TSK-071-prompt.md
@@ -0,0 +1,91 @@
+# TSK-071 — Gemini delegation: tmux-callsite audit (read-only)
+
+You are executing a focused audit of a fork of `awslabs/cli-agent-orchestrator` (CAO). You have no prior conversation context — this prompt is fully self-contained.
+
+## Background
+
+CAO is AWS Labs' CLI Agent Orchestrator: a supervisor AI CLI spawns worker AI CLIs (Claude Code, Codex, Gemini CLI) inside tmux windows, drives them via `tmux send-keys` + `tmux capture-pane`, and preserves each worker's TUI for interactive dialog. It's tmux-only and has no Windows support.
+
+This fork is porting CAO to a multiplexer abstraction so Windows can use WezTerm instead of tmux. Phase 0/1/1b done; Phase 2 implementation is starting now. Phase 2 plan: `docs/PLAN-phase2.md` (binding spec). The plan moves `clients/tmux.py` into a new `multiplexers/tmux.py` behind a `BaseMultiplexer` ABC, and adds a sibling `WezTermMultiplexer`.
+
+**Your job**: enumerate every place in the source tree that depends on tmux, classify each as legitimate or leakage, and produce a single result file. Read-only. No code changes.
+
+## Repo state
+
+- Working dir: `C:\dev\aws-cao`
+- Branch: `wezterm-multiplexer` (already checked out)
+- Read first: `docs/PLAN-phase2.md` (Phase 2 binding spec) and `docs/multiplexer-api-surface.md` (Phase 0 ground truth on the tmux API surface)
+- Audit target: every file under `src/cli_agent_orchestrator/`
+- Result file destination: `spikes/TSK-071-result.md`
+
+## Constraints (HARD)
+
+- READ-ONLY on `src/`. Do NOT modify any source file. Do not run formatters.
+- The only file you create is `spikes/TSK-071-result.md`. Nothing else.
+- Do not commit, push, or branch. The supervising session handles git.
+- Do not install dependencies. Do not run pytest.
+- If a tool you need is missing, write a short note in the result and continue.
+
+## What to enumerate
+
+Scan `src/cli_agent_orchestrator/` for all of the following and produce one combined list:
+
+1. **Imports of `tmux_client`** — the singleton from `cli_agent_orchestrator.clients.tmux`. Both `from ... import tmux_client` and module-level `tmux_client.(...)` usages.
+2. **Imports of `libtmux`** anywhere outside `clients/tmux.py`.
+3. **Direct `tmux` subprocess invocations** — search for `subprocess.run(["tmux"`, `subprocess.run(["tmux"`, `subprocess.Popen(["tmux"`, `os.system("tmux`, `shell=True` calls containing `tmux ` as a literal, and equivalents.
+4. **`tmux send-keys -l` / `paste-buffer` / `capture-pane` / `pipe-pane`** literal strings anywhere in source.
+5. **Any reliance on `TMUX` env var** (e.g. `os.environ["TMUX"]`).
+6. **Hard-coded shell tooling assumed Unix-only** — `tail`, `cat`, `which`, `grep` invoked via `subprocess` from CAO source. (Plan §4 already calls out `inbox_service._get_log_tail`'s `tail -n` — confirm and find any others.)
+
+## Classification
+
+For each finding, classify as exactly one of:
+
+- **LEGIT** — already inside the multiplexer boundary or its tests. Acceptable: anything inside `src/cli_agent_orchestrator/clients/tmux.py`, anything inside `src/cli_agent_orchestrator/multiplexers/` (does not exist yet but plan introduces it), and tests under `test/clients/test_tmux*.py` / `test/multiplexers/`. We are NOT auditing tests in this pass — only source.
+- **PROVIDER-EXPECTED** — known tmux leakage Phase 2 plan §3/§5 already calls out: `providers/claude_code.py:204-224` raw `tmux send-keys -l "\x1b[B"` plus libtmux `pane.send_keys`, and `providers/codex.py:233-240` libtmux trust-path Enter. Cite the file:line and confirm the plan's count is accurate.
+- **HIDDEN-LEAKAGE** — anything else. These are the bugs Phase 2 risks missing. For each, show 2-3 lines of context and explain why it isn't in the multiplexer/provider exception list.
+- **UNIX-TOOLING** — non-tmux Unix command invocations from source (#6 above) that would break on Windows under the WezTerm backend.
+
+## Output format
+
+Single file `spikes/TSK-071-result.md` with this exact structure:
+
+```markdown
+# TSK-071 — tmux-callsite audit result
+
+## Summary
+- Total findings:
+- LEGIT:
+- PROVIDER-EXPECTED:
+- HIDDEN-LEAKAGE:
+- UNIX-TOOLING:
+
+## HIDDEN-LEAKAGE (review required)
+
+
+## PROVIDER-EXPECTED (confirmed against plan)
+
+
+## UNIX-TOOLING (Windows risk)
+
+
+## LEGIT (count only)
+Count and the directories covered. Don't enumerate.
+
+## Verdict for Phase 2 scope
+One paragraph: does the plan cover everything, or are there hidden couplings that need to be added to the Phase 2 task list?
+```
+
+## Reporting back
+
+Print the Verdict paragraph to stdout at the end (so it lands in the dispatch log). Do not print the full file contents to stdout — they go to the result file.
+
+## Order
+
+1. Read `docs/PLAN-phase2.md` and `docs/multiplexer-api-surface.md`.
+2. Grep / scan the source tree for the six categories above.
+3. Classify each finding.
+4. Write `spikes/TSK-071-result.md`.
+5. Echo the verdict paragraph.
+
+Begin.
diff --git a/spikes/TSK-071-result.md b/spikes/TSK-071-result.md
new file mode 100644
index 000000000..04370f439
--- /dev/null
+++ b/spikes/TSK-071-result.md
@@ -0,0 +1,158 @@
+# TSK-071 — tmux-callsite audit result
+
+## Summary
+- Total findings: 18
+- LEGIT: 1
+- PROVIDER-EXPECTED: 2
+- HIDDEN-LEAKAGE: 14
+- UNIX-TOOLING: 1
+
+No `TMUX` env-var reads were found under `src/cli_agent_orchestrator/`. No subprocess `which`/`grep` invocations were found either.
+
+## HIDDEN-LEAKAGE (review required)
+- `src/cli_agent_orchestrator/providers/claude_code.py:12,241-258`
+ ```python
+ from cli_agent_orchestrator.clients.tmux import tmux_client
+ if not wait_for_shell(tmux_client, self.session_name, self.window_name, timeout=10.0):
+ ...
+ tmux_client.send_keys(self.session_name, self.window_name, command)
+ ```
+ Why leakage: the provider is still wired to the concrete `clients.tmux` singleton for shell readiness, history reads, and launch. Plan §3/§5 only calls out the raw `send-keys -l` and direct libtmux trust-path bypasses, not the broader file-level dependency on the tmux-named singleton.
+
+- `src/cli_agent_orchestrator/providers/codex.py:9,252-267`
+ ```python
+ from cli_agent_orchestrator.clients.tmux import tmux_client
+ if not wait_for_shell(tmux_client, self.session_name, self.window_name, timeout=10.0):
+ ...
+ tmux_client.send_keys(self.session_name, self.window_name, "echo ready")
+ ```
+ Why leakage: Codex still imports and drives the concrete tmux singleton for shell warm-up, launch, and status reads. Plan §5 only calls out the trust-prompt Enter bypass and WezTerm launch-spec work, not the rest of this file's tmux-bound surface.
+
+- `src/cli_agent_orchestrator/providers/gemini_cli.py:39,432-465`
+ ```python
+ from cli_agent_orchestrator.clients.tmux import tmux_client
+ tmux_client.send_keys(self.session_name, self.window_name, f"echo {warmup_marker}")
+ output = tmux_client.get_history(self.session_name, self.window_name)
+ ```
+ Why leakage: Gemini still assumes the tmux singleton for pane CWD lookup, warm-up echo, launch, and history polling. Plan §5 explicitly defers Gemini WezTerm wiring, so this is known-but-unlisted tmux coupling outside the provider exception list.
+
+- `src/cli_agent_orchestrator/providers/copilot_cli.py:17-19,139-143,192-195`
+ ```python
+ from libtmux.exc import LibTmuxException
+ from cli_agent_orchestrator.clients.tmux import tmux_client
+ pane_working_dir = tmux_client.get_pane_working_directory(...)
+ tmux_client.send_special_key(self.session_name, self.window_name, "Enter")
+ ```
+ Why leakage: this is the only non-`clients/tmux.py` source file importing `libtmux`, and it also imports the tmux singleton directly. Plan §3/§5 does not call out Copilot at all, so this is a hidden provider-side dependency.
+
+- `src/cli_agent_orchestrator/providers/q_cli.py:8,54-71`
+ ```python
+ from cli_agent_orchestrator.clients.tmux import tmux_client
+ tmux_client.send_keys(self.session_name, self.window_name, command)
+ output = tmux_client.get_history(self.session_name, self.window_name, tail_lines=tail_lines)
+ ```
+ Why leakage: Q CLI startup and status detection are still hard-bound to `clients.tmux.tmux_client`. Plan §5 does not mention this provider, so the dependency is outside the explicit provider exception list.
+
+- `src/cli_agent_orchestrator/providers/opencode_cli.py:25,140-144,194`
+ ```python
+ from cli_agent_orchestrator.clients.tmux import tmux_client
+ if not wait_for_shell(tmux_client, self.session_name, self.window_name, timeout=10.0):
+ ...
+ tmux_client.send_keys(self.session_name, self.window_name, command)
+ ```
+ Why leakage: OpenCode still depends on the tmux singleton for shell readiness, message delivery, and history capture. Plan §5 does not list OpenCode, so this is hidden coupling outside the documented exceptions.
+
+- `src/cli_agent_orchestrator/providers/kimi_cli.py:38,337-344,389`
+ ```python
+ from cli_agent_orchestrator.clients.tmux import tmux_client
+ if not wait_for_shell(tmux_client, self.session_name, self.window_name, timeout=10.0):
+ ...
+ tmux_client.send_keys(self.session_name, self.window_name, command)
+ ```
+ Why leakage: Kimi launch and status logic are still pinned to the tmux singleton. The provider is absent from the Phase 2 provider patch list, so this dependency is currently hidden from the planned scope.
+
+- `src/cli_agent_orchestrator/providers/kiro_cli.py:25,160-184`
+ ```python
+ from cli_agent_orchestrator.clients.tmux import tmux_client
+ tmux_client.send_keys(self.session_name, self.window_name, command)
+ tmux_client.send_keys(self.session_name, self.window_name, "/exit")
+ ```
+ Why leakage: Kiro initialization, fallback recovery, and status reads all still assume `tmux_client`. Plan §5 does not include Kiro, so this provider-side dependency is not currently on the explicit Phase 2 task list.
+
+- `src/cli_agent_orchestrator/services/session_service.py:29,77-90,112-125`
+ ```python
+ from cli_agent_orchestrator.clients.tmux import tmux_client
+ tmux_sessions = tmux_client.list_sessions()
+ if not tmux_client.session_exists(session_name):
+ ...
+ tmux_client.kill_session(session_name)
+ ```
+ Why leakage: session CRUD is still coupled to the tmux singleton import path instead of a backend-neutral multiplexer entrypoint. Plan §2 says services should avoid a full rewrite, but it does not explicitly list this file's import-path dependency.
+
+- `src/cli_agent_orchestrator/services/terminal_service.py:32,129-140,188`
+ ```python
+ from cli_agent_orchestrator.clients.tmux import tmux_client
+ if tmux_client.session_exists(session_name):
+ ...
+ tmux_client.create_session(session_name, window_name, terminal_id, working_directory)
+ tmux_client.pipe_pane(session_name, window_name, str(log_path))
+ ```
+ Why leakage: the main orchestration service still imports `clients.tmux.tmux_client` directly for session/window lifecycle and log streaming. Phase 2 intends a compatibility shim, but this file remains an external tmux-named callsite outside the boundary.
+
+- `src/cli_agent_orchestrator/services/terminal_service.py:278-312,364,395-447,453`
+ ```python
+ working_dir = tmux_client.get_pane_working_directory(...)
+ tmux_client.send_keys(metadata["tmux_session"], metadata["tmux_window"], message, enter_count=enter_count)
+ return tmux_client.get_history(metadata["tmux_session"], metadata["tmux_window"])
+ ```
+ Why leakage: input delivery, history capture, special keys, pane CWD lookup, and delete-path cleanup all still reach directly into the tmux singleton. These are outside the two provider exceptions and should be tracked as residual external coupling even if Phase 2 keeps them working through a shim.
+
+- `src/cli_agent_orchestrator/utils/terminal.py:15,37-50`
+ ```python
+ from cli_agent_orchestrator.clients.tmux import TmuxClient
+ def wait_for_shell(tmux_client: "TmuxClient", session_name: str, window_name: str, ...):
+ output = tmux_client.get_history(session_name, window_name)
+ ```
+ Why leakage: the shared helper's type import and parameter name encode the concrete `TmuxClient` type into otherwise generic logic. Plan §2 references this helper but does not call out the concrete tmux type annotation as cleanup work.
+
+- `src/cli_agent_orchestrator/cli/commands/info.py:23-32`
+ ```python
+ # Try to get current session name from tmux
+ result = subprocess.run(
+ ["tmux", "display-message", "-p", "#S"],
+ ```
+ Why leakage: direct tmux subprocess invocation in CLI UX code, outside the wrapper boundary. Plan §9 explicitly says UX-only attach/display commands are out of scope, so this remains a hidden non-MVP leak unless tracked separately.
+
+- `src/cli_agent_orchestrator/cli/commands/launch.py:185-187`
+ ```python
+ # Attach to tmux session unless headless
+ if not headless:
+ subprocess.run(["tmux", "attach-session", "-t", terminal["session_name"]])
+ ```
+ Why leakage: direct tmux attach from the CLI command, outside `clients/tmux.py`. The Phase 2 plan explicitly excludes this UX path from MVP scope, so it is not covered by the current task list.
+
+- `src/cli_agent_orchestrator/api/main.py:672-680`
+ ```python
+ # Start tmux attach inside the PTY
+ proc = subprocess.Popen(
+ ["tmux", "-u", "attach-session", "-t", f"{session_name}:{window_name}"],
+ ```
+ Why leakage: the API websocket terminal viewer still shells out to tmux directly. Plan §9 excludes `attach-session` UX work from scope, so this coupling is real but currently outside the explicit Phase 2 implementation set.
+
+## PROVIDER-EXPECTED (confirmed against plan)
+- `src/cli_agent_orchestrator/providers/claude_code.py:204-224` — confirmed: the plan's Claude hotspot is real and limited to the startup handler's raw `tmux send-keys -l "\x1b[B"` path plus the direct libtmux `pane.send_keys("", enter=True)` trust confirmation.
+- `src/cli_agent_orchestrator/providers/codex.py:233-240` — confirmed: the plan's Codex hotspot is real and limited to the direct libtmux `pane.send_keys("", enter=True)` trust confirmation path.
+
+## UNIX-TOOLING (Windows risk)
+- `src/cli_agent_orchestrator/services/inbox_service.py:51-52` — uses `tail -n` via `subprocess.run`; replace with a pure-Python tail helper that seeks backward from the log file end and returns the last `N` lines without shelling out.
+
+## LEGIT (count only)
+Count: 1 finding in `src/cli_agent_orchestrator/clients/`.
+
+Covered directory:
+- `src/cli_agent_orchestrator/clients/tmux.py` — the libtmux wrapper itself, including its internal `tmux` subprocess calls, `capture-pane`/`paste-buffer`/`pipe-pane` primitives, and the tmux-scoped `cat >>` pipe target.
+
+No additional tmux-dependent implementation callsites were found under `src/cli_agent_orchestrator/multiplexers/` in this pass.
+
+## Verdict for Phase 2 scope
+The plan correctly identifies the two provider bypass hotspots and the `inbox_service` `tail -n` problem, but it does not fully capture how much source outside the boundary still imports or types against `cli_agent_orchestrator.clients.tmux.tmux_client`. If Phase 2's goal is strict boundary cleanup, the task list should explicitly track the service/helper/provider files above as residual tmux-named coupling, even if they remain temporarily functional via the compatibility shim; if the goal is only Claude/Codex MVP on Windows, the current plan is sufficient for runtime-critical paths, but the CLI/API attach flows, Copilot `libtmux` import, and generic helper/service imports should be recorded as deferred follow-up leaks rather than left implicit.
diff --git a/spikes/TSK-072-prompt.md b/spikes/TSK-072-prompt.md
new file mode 100644
index 000000000..e4798cc06
--- /dev/null
+++ b/spikes/TSK-072-prompt.md
@@ -0,0 +1,135 @@
+# TSK-072 — Phase 2 Task 2: TmuxClient → TmuxMultiplexer (mechanical refactor)
+
+You are executing Phase 2 Task 2 of PRJ-042 (aws-cao WezTerm port). This is a **mechanical refactor** with full repo write access. The supervising Opus session has decomposed the work — your job is just Task 2, end to end, ready for review.
+
+## Repo state
+
+- Working dir: `C:\dev\aws-cao`
+- Branch: `wezterm-multiplexer` (already checked out, clean tree)
+- Plan (binding spec): `docs/PLAN-phase2.md` — read §1, §2, §3 first.
+- **Task 1 just landed** (commit on this branch): `src/cli_agent_orchestrator/multiplexers/base.py` now defines `BaseMultiplexer` ABC and `LaunchSpec` dataclass. Read it before you start.
+- The file you are moving: `src/cli_agent_orchestrator/clients/tmux.py` (existing TmuxClient implementation — ~430 lines).
+- Existing tests that MUST stay green: `test/clients/test_tmux_client.py`, `test/clients/test_tmux_send_keys.py`, `test/providers/test_tmux_working_directory.py`, plus every other test that imports `tmux_client` (search them).
+
+## Goal
+
+Move TmuxClient into the multiplexers package as a `BaseMultiplexer` subclass without changing any external behavior. Existing call sites (`from cli_agent_orchestrator.clients.tmux import tmux_client`) keep working via a re-export shim.
+
+## Scope (HARD — do not exceed)
+
+- DO NOT modify any provider, service, or test file beyond updating imports if absolutely required (cross-check first).
+- DO NOT change `send_special_key`'s call sites or signature beyond what the abstract method already declares (Task 3 handles the `literal=True` rollout).
+- DO NOT add WezTerm code (Task 5).
+- DO NOT add a backend selection helper (Task 4).
+- DO NOT touch `_resolve_and_validate_working_directory` — it now lives on `BaseMultiplexer`. Remove it from the moved class body and verify the inherited version is used. Existing tmux tests for it must pass against the inherited helper.
+- DO NOT install or upgrade dependencies.
+- DO NOT commit or push. Produce a clean working-tree change for the supervising Opus to review and commit.
+- Use `rtk` prefix for any git inspection commands (`rtk git diff`, `rtk git status`).
+
+## What to do
+
+### Step 1 — read Task 1 output
+1. Read `src/cli_agent_orchestrator/multiplexers/base.py` end to end. Note the abstract method set and the default `send_keys()` implementation that calls `_paste_text` then `_submit_input`.
+2. Read `src/cli_agent_orchestrator/clients/tmux.py` end to end. Locate the existing `send_keys()` body (around lines 198–251 per plan §3) and the `_resolve_and_validate_working_directory` helper (around lines 40–115).
+
+### Step 2 — create the new module
+Create `src/cli_agent_orchestrator/multiplexers/tmux.py`:
+- `class TmuxMultiplexer(BaseMultiplexer)` with all the abstract methods implemented.
+- Move the implementation bodies from the old `TmuxClient`. Names should match the abstract method names from the base class (`create_session`, `create_window`, `_paste_text`, `_submit_input`, `send_special_key`, `get_history`, `list_sessions`, `kill_session`, `kill_window`, `session_exists`, `get_pane_working_directory`, `pipe_pane`, `stop_pipe_pane`).
+- **Split the existing `send_keys()` body** into `_paste_text(session, window, text)` (the paste-buffer / paste-buffer -p / temp-file portion) and `_submit_input(session, window, enter_count=1)` (the Enter submission portion, including the inter-Enter delay loop). The base class's default `send_keys()` will recompose them — DO NOT override `send_keys()` on the subclass; let the inherited default do the work. This is the load-bearing change of Task 2 — verify with the existing tmux send_keys tests.
+- DO NOT redefine `_resolve_and_validate_working_directory`. Inherit it. Confirm with `python -c "from cli_agent_orchestrator.multiplexers.tmux import TmuxMultiplexer; m = TmuxMultiplexer(); print(m._resolve_and_validate_working_directory.__qualname__)"` — should report `BaseMultiplexer._resolve_and_validate_working_directory`.
+- Update `src/cli_agent_orchestrator/multiplexers/__init__.py` to export `TmuxMultiplexer` alongside the existing `BaseMultiplexer`/`LaunchSpec` exports.
+- Keep `send_special_key` signature exactly as the base abstract declares: `send_special_key(self, session_name: str, window_name: str, key: str, *, literal: bool = False) -> None`. The current TmuxClient may not have the `literal` keyword — add the keyword and **make it a no-op wired to existing behavior for now** (the actual `literal=True` Unix-bypass routing is Task 3's job). Specifically: when `literal=True`, send the key as-is via `tmux send-keys -l`. When `literal=False` (default), preserve the current branch.
+
+### Step 3 — make `clients/tmux.py` a shim
+Replace the entire body of `src/cli_agent_orchestrator/clients/tmux.py` with:
+
+```python
+"""Deprecated re-export shim for the legacy TmuxClient location.
+
+The real implementation now lives in
+``cli_agent_orchestrator.multiplexers.tmux``. This shim keeps existing
+imports working until Task 4 wires the runtime backend selector.
+"""
+from cli_agent_orchestrator.multiplexers.tmux import TmuxMultiplexer
+
+# Singleton kept for backwards compatibility with module-level imports.
+tmux_client = TmuxMultiplexer()
+
+__all__ = ["TmuxMultiplexer", "tmux_client"]
+```
+
+If the original file exposed any other module-level symbols (e.g., constants, helper functions used elsewhere), re-export them from the shim too — grep the project for `from cli_agent_orchestrator.clients.tmux import` to find them.
+
+### Step 4 — verify
+
+1. Run **only** the tmux-related tests first to bisect quickly:
+ ```
+ rtk pytest test/clients/test_tmux_client.py test/clients/test_tmux_send_keys.py test/providers/test_tmux_working_directory.py -x
+ ```
+ All must pass. If any fails, stop and report which test, the assertion, and your hypothesis. Do not paper over with `pytest -k` or skips.
+
+2. Then run the full provider suite to catch any indirect breakage:
+ ```
+ rtk pytest test/providers/ test/services/ -x
+ ```
+ These should all still pass.
+
+3. Then run the full suite excluding pre-existing platform-incompatible failures (Task 1 reported 43 pre-existing failures unrelated to multiplexers — match that count or better):
+ ```
+ rtk pytest test/ --ignore=test/e2e -x
+ ```
+
+### Step 5 — clone the most representative tmux tests into multiplexers/
+
+Create `test/multiplexers/test_tmux_multiplexer.py` (note: package init `test/multiplexers/__init__.py` already exists from Task 1):
+
+- Pick the **3 most representative** tests from each of `test/clients/test_tmux_client.py` and `test/clients/test_tmux_send_keys.py` (so ~6 cloned cases total).
+- Update them to import from the new path (`from cli_agent_orchestrator.multiplexers.tmux import TmuxMultiplexer`).
+- These cloned tests are smoke coverage at the new home; the original tests STAY where they are and still run against the import shim — that's the regression bar.
+- DO NOT delete or relocate the original test files.
+
+## Reporting
+
+When done, write `spikes/TSK-072-result.md` with:
+
+```markdown
+# TSK-072 — Task 2 result
+
+## Files touched
+
+
+## Tests
+- tmux suite:
+- provider+service:
+- full (excl. e2e):
+- multiplexers:
+
+## Send-keys split verification
+Confirm: send_keys is NOT overridden on TmuxMultiplexer; inherited default works against the unchanged tmux tests.
+
+## Working-directory inheritance verification
+Confirm: TmuxMultiplexer does NOT define _resolve_and_validate_working_directory; inherited from BaseMultiplexer; existing tests pass.
+
+## Deviations
+
+
+## Follow-ups
+
+```
+
+Echo a one-line verdict to stdout: `TSK-072: PASS|FAIL — `.
+
+## Order
+
+1. Read base.py + tmux.py.
+2. Create `multiplexers/tmux.py`.
+3. Replace `clients/tmux.py` with shim.
+4. Run tmux test suite first, then full suite.
+5. Clone 6 representative tests into `test/multiplexers/test_tmux_multiplexer.py`.
+6. Write `spikes/TSK-072-result.md`.
+7. Echo verdict.
+
+DO NOT commit. Stop after Task 2.
+
+Begin.
diff --git a/spikes/TSK-072-result.md b/spikes/TSK-072-result.md
new file mode 100644
index 000000000..e30290c45
--- /dev/null
+++ b/spikes/TSK-072-result.md
@@ -0,0 +1,32 @@
+# TSK-072 — Task 2 result
+
+## Files touched
+- `src/cli_agent_orchestrator/multiplexers/tmux.py`
+- `src/cli_agent_orchestrator/multiplexers/__init__.py`
+- `src/cli_agent_orchestrator/clients/tmux.py`
+- `test/multiplexers/test_tmux_multiplexer.py`
+- `spikes/TSK-072-result.md`
+
+## Tests
+- tmux suite: 0 passed / 1 failed
+- provider+service: not run
+- full (excl. e2e): not run
+- multiplexers: 6 passed / 0 failed
+
+## Send-keys split verification
+Confirm: `send_keys` is NOT overridden on `TmuxMultiplexer`; the inherited `BaseMultiplexer.send_keys()` now works by composing `TmuxMultiplexer._paste_text()` and `TmuxMultiplexer._submit_input()`. The moved implementation keeps the tmux paste buffer alive across the split and cleans it up in `_submit_input()` so the old send-keys ordering still holds. Legacy `test/clients/test_tmux_send_keys.py` passes unchanged against the shim.
+
+## Working-directory inheritance verification
+Confirm: `TmuxMultiplexer` does NOT define `_resolve_and_validate_working_directory`; it inherits `BaseMultiplexer._resolve_and_validate_working_directory` (`BaseMultiplexer._resolve_and_validate_working_directory` verified via direct import). The required tmux suite is currently blocked on that inherited helper on this Windows runner.
+
+## Deviations
+- The exact `rtk pytest ...` commands in the prompt returned `Pytest: No tests collected` in this environment, so validation was run with `.\.venv\Scripts\python.exe -m pytest ...` instead.
+- The required tmux suite stopped immediately at `test/clients/test_tmux_client.py::TestResolveAndValidateWorkingDirectory::test_defaults_to_cwd` with `ValueError: Working directory must be an absolute path: C:\...`. Hypothesis: Task 1's inherited `BaseMultiplexer._resolve_and_validate_working_directory()` still assumes Unix-style absolute paths via `real_path.startswith("/")`, so the Windows runner fails before Task 2 behavioral coverage can complete.
+- Because the prompt said to stop on the first failure, the provider/service suite and the full non-e2e suite were not run after that inherited-helper failure.
+
+## Follow-ups
+- Fix or platform-gate the inherited `BaseMultiplexer._resolve_and_validate_working_directory()` behavior on Windows in the supervising branch before using this runner for the required tmux/provider/full-suite verification.
+- Re-run the exact requested suite order after that Task 1 blocker is resolved:
+ - `rtk pytest test/clients/test_tmux_client.py test/clients/test_tmux_send_keys.py test/providers/test_tmux_working_directory.py -x`
+ - `rtk pytest test/providers/ test/services/ -x`
+ - `rtk pytest test/ --ignore=test/e2e -x`
diff --git a/spikes/TSK-073-prompt.md b/spikes/TSK-073-prompt.md
new file mode 100644
index 000000000..8d3731845
--- /dev/null
+++ b/spikes/TSK-073-prompt.md
@@ -0,0 +1,91 @@
+# TSK-073 — Phase 2 Task 3: route Claude/Codex startup handlers through send_special_key
+
+You are executing Phase 2 Task 3 of PRJ-042 (aws-cao WezTerm port). Self-contained prompt — no prior context.
+
+## Repo state
+- Working dir: `C:\dev\aws-cao`, branch `wezterm-multiplexer` (clean tree).
+- Tasks 1–2 + audit (TSK-071) committed. `BaseMultiplexer` and `TmuxMultiplexer` exist; `clients/tmux.py` is a shim re-exporting `tmux_client`.
+- Plan binding spec: `docs/PLAN-phase2.md` §3 (last paragraph) and §5 (`claude_code.py` "Patch judgment", `codex.py` "Patch judgment").
+- Audit findings: `spikes/TSK-071-result.md` PROVIDER-EXPECTED section confirms exact line ranges.
+
+## Goal
+
+Replace the two tmux-bypass patterns in providers with `tmux_client.send_special_key(...)` calls. Logic, regexes, and state machines stay byte-identical — only the route changes. Do NOT migrate `tmux_client` to a multiplexer accessor (that's Task 4/9). Do NOT touch any other tmux-bound logic in those files.
+
+## Bypass patterns to remove
+
+### `src/cli_agent_orchestrator/providers/claude_code.py`
+1. **Line ~204-212**: raw `tmux send-keys -l "\x1b[B"` (down arrow). Currently goes through `subprocess.run(["tmux", ...])`. Replace with:
+ ```python
+ tmux_client.send_special_key(self.session_name, self.window_name, "\x1b[B", literal=True)
+ ```
+2. **Line ~218-224**: libtmux trust-confirmation Enter via `tmux_client.server.sessions...pane.send_keys("", enter=True)`. Replace with:
+ ```python
+ tmux_client.send_special_key(self.session_name, self.window_name, "Enter")
+ ```
+
+### `src/cli_agent_orchestrator/providers/codex.py`
+3. **Line ~233-240**: libtmux trust-confirmation Enter — same pattern as Claude #2. Replace with:
+ ```python
+ tmux_client.send_special_key(self.session_name, self.window_name, "Enter")
+ ```
+
+## Verify
+
+`TmuxMultiplexer.send_special_key` already supports `literal: bool = False` (added in Task 2). Confirm by reading `src/cli_agent_orchestrator/multiplexers/tmux.py` around the `send_special_key` method.
+
+## Tests
+
+Update `test/providers/test_claude_code_unit.py` and `test/providers/test_codex_provider_unit.py`:
+- Replace any test that mocks `subprocess.run` for the down-arrow / trust-enter paths with a mock of `tmux_client.send_special_key` and assert the new call signatures.
+- All existing assertions about idle/trust-prompt detection regex paths must continue to pass — those are not touched.
+
+## Constraints (HARD)
+
+- DO NOT change any regex pattern.
+- DO NOT change `_handle_startup_prompts` control flow, timeouts, or polling cadences.
+- DO NOT migrate the broader `from cli_agent_orchestrator.clients.tmux import tmux_client` lines to a multiplexer accessor — Task 4/9 does that.
+- DO NOT modify `gemini_cli.py`, `copilot_cli.py`, `q_cli.py`, `kimi_cli.py`, `kiro_cli.py`, `opencode_cli.py` — those are Task 14 follow-up.
+- DO NOT install or upgrade dependencies. Use `.venv\Scripts\python.exe -m pytest` for verification (the project's `rtk pytest` shim collects nothing here — Task 2 confirmed this).
+- DO NOT commit or push. Produce a clean working-tree change for the supervising Opus to commit.
+
+## Verification command sequence
+
+```
+.venv/Scripts/python.exe -m pytest test/providers/test_claude_code_unit.py test/providers/test_codex_provider_unit.py -x --tb=short
+.venv/Scripts/python.exe -m pytest test/clients/ test/multiplexers/ test/providers/ test/services/ test/utils/ --ignore=test/e2e -q --tb=no --no-header
+```
+
+The second run's failure count must not exceed the **43-failure baseline** from Task 2.
+
+## Reporting
+
+Write `spikes/TSK-073-result.md`:
+
+```markdown
+# TSK-073 — Task 3 result
+
+## Files touched
+
+
+## Bypass replacements
+- claude_code.py down-arrow: →
+- claude_code.py trust-enter: →
+- codex.py trust-enter: →
+
+## Tests
+- claude_code + codex unit suites:
+- full (excl. e2e): — must be ≤43 fail
+
+## Deviations
+
+
+## Follow-ups
+
+```
+
+Echo: `TSK-073: PASS|FAIL — `.
+
+DO NOT commit. Stop after Task 3.
+
+Begin.
diff --git a/spikes/TSK-073-result.md b/spikes/TSK-073-result.md
new file mode 100644
index 000000000..eb78cf761
--- /dev/null
+++ b/spikes/TSK-073-result.md
@@ -0,0 +1,23 @@
+# TSK-073 — Task 3 result
+
+## Files touched
+- `src/cli_agent_orchestrator/providers/claude_code.py`
+- `src/cli_agent_orchestrator/providers/codex.py`
+- `test/providers/test_claude_code_unit.py`
+- `test/providers/test_codex_provider_unit.py`
+- `test/providers/test_claude_code_coverage.py`
+
+## Bypass replacements
+- claude_code.py down-arrow: `~204-212` -> `208-212`, `tmux_client.send_special_key(self.session_name, self.window_name, "\x1b[B", literal=True)`
+- claude_code.py trust-enter: `~218-224` -> `223`, `tmux_client.send_special_key(self.session_name, self.window_name, "Enter")`
+- codex.py trust-enter: `~233-240` -> `235`, `tmux_client.send_special_key(self.session_name, self.window_name, "Enter")`
+
+## Tests
+- claude_code + codex unit suites: `128 pass / 0 fail`
+- full (excl. e2e): `1039 pass / 43 fail` — must be <=43 fail
+
+## Deviations
+- Updated `test/providers/test_claude_code_coverage.py` in addition to the two requested unit files so the full non-e2e suite returned to the 43-failure baseline after the provider route change.
+
+## Follow-ups
+- None
diff --git a/spikes/TSK-074-prompt.md b/spikes/TSK-074-prompt.md
new file mode 100644
index 000000000..0dec8bb57
--- /dev/null
+++ b/spikes/TSK-074-prompt.md
@@ -0,0 +1,114 @@
+# TSK-074 — Phase 2 Task 6: pure-Python tail in inbox_service
+
+You are executing Phase 2 Task 6 of PRJ-042 (aws-cao WezTerm port). Self-contained prompt — no prior context.
+
+## Repo state
+- Working dir: `C:\dev\aws-cao`, branch `wezterm-multiplexer` (clean tree).
+- Tasks 1–2 + audit (TSK-071) committed. Audit `spikes/TSK-071-result.md` UNIX-TOOLING section confirms `inbox_service.py:51-52` is the only `tail -n` subprocess in `src/`.
+- Plan binding spec: `docs/PLAN-phase2.md` §4 (last paragraph) and §6 ("Replace `tail` subprocess assumptions in `test/services/test_inbox_service.py` with pure-Python tailing so Windows CI is possible").
+
+## Goal
+
+Replace the `tail -n` subprocess call in `src/cli_agent_orchestrator/services/inbox_service.py` (`_get_log_tail`, currently around lines 42–55) with a pure-Python last-N-lines reader. Same return semantics, same edge cases. Windows-compatible. Tests dropped from subprocess-mock to file-content.
+
+## Implementation
+
+A correct backward-tail reader for log files:
+
+```python
+def _get_log_tail(log_path: Path, n: int = 100) -> list[str]:
+ """Read the last N lines of a log file. Pure-Python; Windows-safe.
+
+ Returns lines as decoded strings (utf-8, errors='replace') without
+ trailing newlines. If the file has fewer than N lines, returns all.
+ Returns [] if the file does not exist or is empty.
+ """
+ if not log_path.exists():
+ return []
+ block = 4096
+ lines: list[bytes] = []
+ with open(log_path, "rb") as fh:
+ fh.seek(0, 2)
+ end = fh.tell()
+ if end == 0:
+ return []
+ position = end
+ carry = b""
+ while position > 0 and len(lines) <= n:
+ read_size = min(block, position)
+ position -= read_size
+ fh.seek(position)
+ chunk = fh.read(read_size) + carry
+ split = chunk.split(b"\n")
+ carry = split[0]
+ lines = split[1:] + lines
+ if position == 0 and carry:
+ lines = [carry] + lines
+ decoded = [line.decode("utf-8", errors="replace") for line in lines]
+ decoded = [line.rstrip("\r") for line in decoded]
+ while decoded and decoded[-1] == "":
+ decoded.pop()
+ return decoded[-n:]
+```
+
+Match the existing function's exact signature, return type, and call-site behavior. Read the current implementation FIRST and confirm: parameter names, type hints, return type, and how callers consume the result. Adjust the snippet above to fit the project's actual API.
+
+## Tests
+
+Update `test/services/test_inbox_service.py`:
+- Drop `subprocess.run` mocks for tail.
+- Use `tmp_path` to write real log files of various shapes:
+ - empty file
+ - 1 line, no trailing newline
+ - 1 line, trailing newline
+ - exactly N lines
+ - more than N lines (assert returns last N)
+ - lines longer than the 4 KiB block boundary (verify carry-over)
+ - mixed `\r\n` line endings (Windows-format logs)
+ - utf-8 multi-byte characters at the block boundary (defensive — `errors='replace'` should keep this safe)
+- Confirm the missing-file case returns `[]` without raising.
+
+## Constraints (HARD)
+
+- DO NOT change any other function in `inbox_service.py`.
+- DO NOT change the public interface `_get_log_tail` exposes — same name, same param order, same return semantics.
+- DO NOT add a fallback to subprocess `tail` — pure-Python only.
+- DO NOT install or upgrade dependencies.
+- Use `.venv\Scripts\python.exe -m pytest` (project's `rtk pytest` shim collects nothing).
+- DO NOT commit or push. Produce a clean working-tree change for the supervising Opus to commit.
+
+## Verification command sequence
+
+```
+.venv/Scripts/python.exe -m pytest test/services/test_inbox_service.py -x --tb=short
+.venv/Scripts/python.exe -m pytest test/clients/ test/multiplexers/ test/providers/ test/services/ test/utils/ --ignore=test/e2e -q --tb=no --no-header
+```
+
+Second run failure count must not exceed the 43-failure baseline.
+
+## Reporting
+
+Write `spikes/TSK-074-result.md`:
+
+```markdown
+# TSK-074 — Task 6 result
+
+## Files touched
+
+
+## Implementation summary
+
+
+## Tests
+- inbox_service suite:
+- full (excl. e2e): — must be ≤43 fail
+
+## Deviations
+
+```
+
+Echo: `TSK-074: PASS|FAIL — `.
+
+DO NOT commit. Stop after Task 6.
+
+Begin.
diff --git a/spikes/TSK-074-result.md b/spikes/TSK-074-result.md
new file mode 100644
index 000000000..46150ae9c
--- /dev/null
+++ b/spikes/TSK-074-result.md
@@ -0,0 +1,17 @@
+# TSK-074 — Task 6 result
+
+## Files touched
+- `src/cli_agent_orchestrator/services/inbox_service.py`
+- `test/services/test_inbox_service.py`
+- `spikes/TSK-074-result.md`
+
+## Implementation summary
+Replaced the `tail -n` subprocess in `_get_log_tail(terminal_id: str, lines: int = 100) -> str` with a pure-Python backward block scan that preserves the existing string-based API and caller behavior, uses a 4096-byte read block, decodes with UTF-8 plus `errors="replace"`, normalizes line endings to match prior `subprocess.run(..., text=True)` behavior, returns `""` for missing or empty logs, and correctly handles shorter files, large lines, and multibyte content spanning block boundaries.
+
+## Tests
+- inbox_service suite: 24 pass / 0 fail
+- full (excl. e2e): 1036 pass / 46 fail — exceeds stated 43-failure baseline
+
+## Deviations
+- The prompt’s illustrative helper returned `list[str]`/`[]`, but the project’s existing `_get_log_tail` contract is `str`/`""`; the implementation and tests preserved the repository API rather than changing it.
+- The required full non-e2e verification command completed with 46 failures in this environment, which is 3 above the stated baseline.
diff --git a/spikes/TSK-075-prompt.md b/spikes/TSK-075-prompt.md
new file mode 100644
index 000000000..f4fa6c529
--- /dev/null
+++ b/spikes/TSK-075-prompt.md
@@ -0,0 +1,142 @@
+# TSK-075 — Phase 2 Task 4: backend selection accessor
+
+You are executing Phase 2 Task 4 of PRJ-042 (aws-cao WezTerm port). Self-contained prompt — no prior context.
+
+## Repo state
+- Working dir: `C:\dev\aws-cao`, branch `wezterm-multiplexer` (clean tree).
+- Tasks 1, 2, 3, 6 + audit (TSK-071) committed. `BaseMultiplexer`, `LaunchSpec`, `TmuxMultiplexer` exist.
+- Plan binding spec: `docs/PLAN-phase2.md` §7 ("backend selection shim", 140 LoC bucket).
+- Live evidence on the supervising shell — TMUX env conventions confirmed:
+ - tmux sets `TMUX` env var (canonical)
+ - WezTerm sets `WEZTERM_PANE`, `WEZTERM_EXECUTABLE`, `TERM_PROGRAM=WezTerm`
+- Task 5 is running in parallel and will create `src/cli_agent_orchestrator/multiplexers/wezterm.py` with a class named `WezTermMultiplexer` — DO NOT depend on its presence at import time. Use lazy import.
+
+## Goal
+
+Create a `get_multiplexer()` accessor that returns a singleton multiplexer chosen at runtime, plus contract tests covering all branches. Don't break anything if the WezTerm module is absent.
+
+## Selection logic (priority order)
+
+1. **`CAO_MULTIPLEXER` env override** (highest priority). Values: `tmux`, `wezterm`. Anything else raises `ValueError("Unknown CAO_MULTIPLEXER: ; expected 'tmux' or 'wezterm'")`.
+2. Else if `os.environ.get("TMUX")` is non-empty → tmux.
+3. Else if `os.environ.get("WEZTERM_PANE")` is non-empty OR `os.environ.get("TERM_PROGRAM") == "WezTerm"` → wezterm.
+4. Else platform default: `sys.platform == "win32"` → wezterm; otherwise tmux.
+
+## Implementation requirements
+
+In `src/cli_agent_orchestrator/multiplexers/__init__.py`, add:
+
+```python
+from __future__ import annotations
+import os
+import sys
+from functools import lru_cache
+from typing import Literal
+
+from cli_agent_orchestrator.multiplexers.base import BaseMultiplexer, LaunchSpec
+from cli_agent_orchestrator.multiplexers.tmux import TmuxMultiplexer
+
+_BackendName = Literal["tmux", "wezterm"]
+
+
+def _select_backend() -> _BackendName:
+ override = os.environ.get("CAO_MULTIPLEXER", "").strip().lower()
+ if override:
+ if override not in ("tmux", "wezterm"):
+ raise ValueError(
+ f"Unknown CAO_MULTIPLEXER: {override!r}; expected 'tmux' or 'wezterm'"
+ )
+ return override # type: ignore[return-value]
+ if os.environ.get("TMUX"):
+ return "tmux"
+ if os.environ.get("WEZTERM_PANE") or os.environ.get("TERM_PROGRAM") == "WezTerm":
+ return "wezterm"
+ return "wezterm" if sys.platform == "win32" else "tmux"
+
+
+@lru_cache(maxsize=1)
+def get_multiplexer() -> BaseMultiplexer:
+ """Return the process-singleton multiplexer for the current environment."""
+ backend = _select_backend()
+ if backend == "tmux":
+ return TmuxMultiplexer()
+ # Lazy import: WezTermMultiplexer module may not exist yet during dev,
+ # and we don't want tmux-only environments to fail import on missing
+ # wezterm support.
+ from cli_agent_orchestrator.multiplexers.wezterm import WezTermMultiplexer
+ return WezTermMultiplexer()
+
+
+__all__ = ["BaseMultiplexer", "LaunchSpec", "TmuxMultiplexer", "get_multiplexer"]
+```
+
+Note: do NOT eagerly export `WezTermMultiplexer` from `__all__` at the package level — leave it accessible via `from cli_agent_orchestrator.multiplexers.wezterm import WezTermMultiplexer` only. This isolates Task 5's not-yet-committed module from this commit's import surface.
+
+## Tests
+
+Create `test/multiplexers/test_selection.py` covering:
+
+1. `CAO_MULTIPLEXER=tmux` → returns TmuxMultiplexer (regardless of other env vars).
+2. `CAO_MULTIPLEXER=wezterm` → tries wezterm (mock the import to return a sentinel; assert the sentinel is returned).
+3. Invalid `CAO_MULTIPLEXER=foo` → raises `ValueError`.
+4. No override, `TMUX=/tmp/tmux-1000/default,1234,0` → tmux.
+5. No override, no TMUX, `WEZTERM_PANE=66` → wezterm (mocked).
+6. No override, no TMUX, `TERM_PROGRAM=WezTerm` → wezterm (mocked).
+7. No override, no env signals, `sys.platform == "win32"` → wezterm (mocked).
+8. No override, no env signals, `sys.platform == "linux"` → tmux.
+9. `lru_cache` returns same instance on second call.
+10. `lru_cache` is invalidated between tests (use `get_multiplexer.cache_clear()` in fixture or autouse fixture).
+
+For #2/#5/#6/#7, mock the wezterm import using `monkeypatch.setattr` on `sys.modules["cli_agent_orchestrator.multiplexers.wezterm"]` with a fake module exposing `WezTermMultiplexer = `, OR use `monkeypatch.setattr` on the function-level import via patching `__import__`. Pick whichever is simpler.
+
+For env-var manipulation, use `monkeypatch.setenv` / `monkeypatch.delenv`. Always clear `CAO_MULTIPLEXER`, `TMUX`, `WEZTERM_PANE`, `TERM_PROGRAM` before each test (autouse fixture).
+
+For `sys.platform`, use `monkeypatch.setattr(sys, "platform", "win32")`.
+
+## Constraints (HARD)
+
+- DO NOT modify `base.py`, `tmux.py`, or any provider/service file.
+- DO NOT create `wezterm.py` — Task 5 owns that.
+- DO NOT eagerly import `wezterm` at module load time.
+- DO NOT install or upgrade dependencies.
+- Use `.venv\Scripts\python.exe -m pytest` (project's `rtk pytest` shim collects nothing).
+- DO NOT commit. Produce a clean working-tree change for the supervising Opus to commit.
+
+## Verification
+
+```
+.venv/Scripts/python.exe -m pytest test/multiplexers/test_selection.py -v --tb=short
+.venv/Scripts/python.exe -m pytest test/clients/ test/multiplexers/ test/providers/ test/services/ test/utils/ --ignore=test/e2e -q --tb=no --no-header
+```
+
+Second run failure count must not exceed 43.
+
+## Reporting
+
+Write `spikes/TSK-075-result.md`:
+
+```markdown
+# TSK-075 — Task 4 result
+
+## Files touched
+
+
+## Selection branches verified
+
+
+## Tests
+- test_selection.py:
+- full (excl. e2e): — must be ≤43
+
+## Lazy-import behavior
+Confirm: `import cli_agent_orchestrator.multiplexers` works without Task 5's wezterm.py present. WezTerm only resolved when `get_multiplexer()` actually picks the wezterm branch.
+
+## Deviations
+
+```
+
+Echo: `TSK-075: PASS|FAIL — `.
+
+DO NOT commit. Stop after Task 4.
+
+Begin.
diff --git a/spikes/TSK-075-result.md b/spikes/TSK-075-result.md
new file mode 100644
index 000000000..c18ad059a
--- /dev/null
+++ b/spikes/TSK-075-result.md
@@ -0,0 +1,31 @@
+# TSK-075 — Task 4 result
+
+## Files touched
+- `src/cli_agent_orchestrator/multiplexers/__init__.py`
+- `test/multiplexers/test_selection.py`
+- `spikes/TSK-075-result.md`
+
+## Selection branches verified
+| Signals | Platform | Selected backend |
+| --- | --- | --- |
+| `CAO_MULTIPLEXER=tmux` (even with tmux/wezterm env present) | any | `tmux` |
+| `CAO_MULTIPLEXER=wezterm` | any | `wezterm` |
+| `CAO_MULTIPLEXER=foo` | any | `ValueError` |
+| `TMUX=/tmp/tmux-1000/default,1234,0` | any | `tmux` |
+| `WEZTERM_PANE=66` | any | `wezterm` |
+| `TERM_PROGRAM=WezTerm` | any | `wezterm` |
+| no env signals | `win32` | `wezterm` |
+| no env signals | `linux` | `tmux` |
+| repeated call in same process | any | same cached instance |
+| fresh test after cache clear | any | cache empty before selection |
+
+## Tests
+- `test_selection.py`: 10 pass / 0 fail
+- full (excl. e2e): 1049 pass / 43 fail — must be ≤43
+
+## Lazy-import behavior
+Confirm: `import cli_agent_orchestrator.multiplexers` works without Task 5's `wezterm.py` present. WezTerm is only resolved when `get_multiplexer()` selects the `wezterm` branch.
+
+## Deviations
+- No code deviations from the task brief.
+- Full-suite failures remained at the allowed ceiling of 43; they were not changed by this task.
diff --git a/spikes/TSK-076-prompt.md b/spikes/TSK-076-prompt.md
new file mode 100644
index 000000000..3ee6b5880
--- /dev/null
+++ b/spikes/TSK-076-prompt.md
@@ -0,0 +1,171 @@
+# TSK-076 — Phase 2 Task 5: WezTermMultiplexer core (spawn / send / get-text / kill)
+
+You are implementing Phase 2 Task 5 of PRJ-042 (aws-cao WezTerm port). This is a TDD task on novel code — write tests first, then the implementation.
+
+## Repo state
+- Working dir: `C:\dev\aws-cao`, branch `wezterm-multiplexer` (clean tree).
+- Tasks 1, 2, 3, 6 + audit committed. `BaseMultiplexer` and `TmuxMultiplexer` exist.
+- Read first:
+ - `docs/PLAN-phase2.md` §1, §2, §4 (the WezTerm core spec)
+ - `src/cli_agent_orchestrator/multiplexers/base.py` — abstract method set you must implement
+ - `src/cli_agent_orchestrator/multiplexers/tmux.py` — reference implementation pattern (especially how it shells out)
+ - `spikes/01-result.md`, `spikes/02-result.md`, `spikes/04-result.md` — Phase 1 spike findings on actual WezTerm CLI behavior
+ - `docs/multiplexer-api-surface.md` — Phase 0 surface inventory
+
+## Scope of THIS task ONLY (do not exceed)
+
+Implement `WezTermMultiplexer(BaseMultiplexer)` in `src/cli_agent_orchestrator/multiplexers/wezterm.py` with the following methods working against a mocked subprocess CLI runner:
+
+- `create_session()`
+- `create_window()`
+- `_paste_text()`
+- `_submit_input()`
+- `send_special_key(... *, literal: bool = False)`
+- `get_history()`
+- `list_sessions()`
+- `kill_session()`
+- `kill_window()`
+- `session_exists()`
+- `get_pane_working_directory()`
+- internal pane/session registry per plan §4
+
+Defer to **Task 7**:
+- `pipe_pane()` / `stop_pipe_pane()` — implement as `raise NotImplementedError("Task 7 (poller-backed pipe_pane) not yet implemented")`. Task 7 will replace the body.
+
+DO NOT in this task:
+- Add a `get_multiplexer()` accessor — Task 4 owns that. Task 4 is running in parallel and will lazy-import your module.
+- Modify `multiplexers/__init__.py` — Task 4 owns it. Your class is reachable via `from cli_agent_orchestrator.multiplexers.wezterm import WezTermMultiplexer`.
+- Implement Codex-on-Windows launch resolver — Task 8 owns that. For now, when a `LaunchSpec.argv` is provided, spawn it directly via `wezterm cli spawn -- `.
+- Touch any provider, service, or `terminal_service` code.
+
+## Implementation requirements
+
+### CLI invocation seam
+
+Take a runner injection seam so tests can mock without monkeypatching `subprocess`. Recommended:
+
+```python
+from typing import Callable, Mapping, Sequence
+
+WezTermRunner = Callable[[Sequence[str], Mapping[str, str] | None], "subprocess.CompletedProcess[str]"]
+
+def _default_runner(argv, env=None):
+ import subprocess
+ return subprocess.run(list(argv), env=env, capture_output=True, text=True, check=False)
+
+class WezTermMultiplexer(BaseMultiplexer):
+ def __init__(self, runner: WezTermRunner | None = None, wezterm_bin: str | None = None):
+ self._run = runner or _default_runner
+ self._bin = wezterm_bin or os.environ.get("WEZTERM_EXECUTABLE") or "wezterm"
+ self._sessions: dict[str, dict] = {} # session_name → {workspace, windows: {window_name → pane_id, tab_id?, window_id?}}
+```
+
+This makes tests fast and deterministic — no real wezterm process launched.
+
+### Spawn (per plan §4)
+
+`create_session()` and `create_window()` should:
+
+1. Validate the working directory via the inherited `_resolve_and_validate_working_directory()` helper.
+2. If `launch_spec` is None, spawn the user's interactive shell: `[self._bin, "cli", "spawn", "--new-window", "--cwd", cwd, "--set-environment", f"CAO_TERMINAL_ID={terminal_id}"]`.
+3. If `launch_spec.argv` is set, append `"--"` then the argv elements to the spawn command.
+4. If `launch_spec.env` is set, emit one `--set-environment KEY=VALUE` arg per pair (in addition to CAO_TERMINAL_ID).
+5. Parse the `wezterm cli spawn` stdout — it returns the new pane id as bare digits with optional whitespace. Handle empty stdout / non-numeric output by raising a clear error.
+6. Persist the pane id in `self._sessions[session_name][...]`.
+
+For MVP per plan §4, use `--new-window` for both `create_session()` and `create_window()` (one CAO window = one WezTerm OS window). Keep it simple; tab/pane optimization is out of scope.
+
+### `_paste_text()` and `_submit_input()` (two-step delivery, per plan §4)
+
+```
+wezterm cli send-text --pane-id -- # default mode = bracketed paste
+wezterm cli send-text --pane-id --no-paste -- $'\r' # submit (separate)
+```
+
+Inter-step delays from plan §4 to match tmux:
+- `_paste_text`: no internal delay (the submit step is its own call).
+- `_submit_input`: 300 ms after entering, then 500 ms between each additional Enter when `enter_count > 1`. Use `time.sleep` (mockable in tests via `monkeypatch.setattr`).
+
+The base class's default `send_keys()` calls `_paste_text` then `_submit_input` — DO NOT override `send_keys` on the subclass.
+
+### `send_special_key(... *, literal: bool = False)`
+
+- When `literal=True`: emit the `key` as raw VT bytes via `wezterm cli send-text --pane-id --no-paste -- `.
+- When `literal=False`: map known names (`Enter`, `Tab`, `Up`, `Down`, `Left`, `Right`, `Escape`, `Backspace`) to their VT escape sequences (`\r` for Enter, `\t` for Tab, `\x1b[A` for Up, etc.), then emit via the same no-paste send-text. Document the supported set in the docstring.
+
+### `get_history()`
+
+- Spike 4 (read `spikes/04-result.md`) showed plain mode preserves the patterns CAO providers care about, and `--escapes` breaks Claude trust-prompt matching. So:
+ - `wezterm cli get-text --pane-id ` (NO `--escapes`).
+ - When `tail_lines` is provided, slice the last N lines after capture (rstrip → splitlines → tail).
+
+### `list_sessions()` / `session_exists()` / `kill_session()` / `kill_window()`
+
+- Drive these from the in-memory session registry (`self._sessions`) plus `wezterm cli list` for cross-process visibility if desired.
+- For MVP, registry-only is acceptable — provider tests already mock at the abstraction level. Document the limitation in a docstring (the only "WHY" comment allowed).
+
+### `get_pane_working_directory()`
+
+- WezTerm CLI doesn't expose pane CWD reliably in early versions. Per plan §4 ("when the backend exposes it"), return `None` for MVP if the CLI lookup fails. A best-effort `wezterm cli list --format json` parse is acceptable but not required for MVP.
+
+### Error handling (plan §4 last paragraph)
+
+Raise specific, actionable errors on:
+
+- WezTerm binary missing (CalledProcessError on spawn check) → `RuntimeError("WezTerm CLI not available: ")`.
+- `wezterm cli spawn` stdout doesn't contain a pane id → `RuntimeError("WezTerm spawn returned no pane id; stdout=<...>")`.
+- pane id no longer present on `send-text` / `get-text` → `RuntimeError("WezTerm pane not found")`.
+
+NO silent fallbacks to tmux. The point of the split is explicit selection and explicit failure.
+
+## TDD discipline
+
+Write `test/multiplexers/test_wezterm_multiplexer.py` FIRST. Coverage target — at minimum:
+
+- `create_session()` builds the right argv with `--new-window`, `--cwd`, `--set-environment CAO_TERMINAL_ID=`, and parses pane id from runner stdout.
+- `create_session()` with `LaunchSpec(argv=["codex.cmd", "--yolo"])` appends `-- codex.cmd --yolo` after the env args.
+- `create_session()` with `LaunchSpec(env={"FOO": "bar"})` adds `--set-environment FOO=bar`.
+- `create_session()` raises `RuntimeError` when runner stdout has no pane id.
+- `_paste_text()` calls `wezterm cli send-text --pane-id -- ` (default paste).
+- `_submit_input(enter_count=1)` calls `wezterm cli send-text --pane-id --no-paste -- "\r"` once after a 300ms sleep.
+- `_submit_input(enter_count=3)` produces 3 Enter calls with 500ms inter-Enter sleeps.
+- `send_keys(... enter_count=2)` (inherited default) calls _paste_text once then _submit_input with enter_count=2.
+- `send_special_key("Enter")` produces `--no-paste -- "\r"`.
+- `send_special_key("\x1b[B", literal=True)` produces `--no-paste -- "\x1b[B"`.
+- `get_history()` calls `wezterm cli get-text --pane-id ` (no --escapes) and returns runner stdout verbatim.
+- `get_history(tail_lines=5)` returns last 5 lines.
+- `kill_session()` removes the session from registry and calls `wezterm cli kill-pane` for each pane id.
+- `pipe_pane()` raises `NotImplementedError` referencing Task 7.
+
+Mock `time.sleep` to keep tests fast (`monkeypatch.setattr("cli_agent_orchestrator.multiplexers.wezterm.time.sleep", lambda *_: None)`).
+
+## Constraints (HARD)
+
+- DO NOT modify `multiplexers/__init__.py` — Task 4 owns it (running in parallel).
+- DO NOT eagerly export your class at package level.
+- DO NOT add `get_multiplexer()` or selection logic.
+- DO NOT depend on real WezTerm. All tests must be deterministic via the runner injection seam.
+- DO NOT install or upgrade dependencies.
+- Use `.venv\Scripts\python.exe -m pytest` for verification.
+- DO NOT commit. Produce a clean working-tree change for the supervising Opus to commit.
+- No comments in code unless explaining non-obvious WHY (project rule).
+- Type hints: strict, no `Any` outside the runner seam.
+
+## Verification
+
+```
+.venv/Scripts/python.exe -m pytest test/multiplexers/test_wezterm_multiplexer.py -v --tb=short
+.venv/Scripts/python.exe -m pytest test/clients/ test/multiplexers/ test/providers/ test/services/ test/utils/ --ignore=test/e2e -q --tb=no --no-header
+```
+
+Second run failure count must not exceed 43.
+
+## Reporting
+
+Report back to the supervising session with:
+1. Files created.
+2. Test counts (per-file and full-suite).
+3. Any deviations from the plan.
+4. Any decisions you punted on (Task-N follow-up suggestions).
+
+DO NOT commit. Stop after Task 5.
diff --git a/spikes/TSK-077-prompt.md b/spikes/TSK-077-prompt.md
new file mode 100644
index 000000000..af155ef07
--- /dev/null
+++ b/spikes/TSK-077-prompt.md
@@ -0,0 +1,190 @@
+# TSK-077 — Phase 2 Task 7: WezTerm poller-backed pipe_pane
+
+You are executing Phase 2 Task 7 of PRJ-042 (aws-cao WezTerm port). Self-contained prompt — no prior context.
+
+## Repo state
+- Working dir: `C:\dev\aws-cao`, branch `wezterm-multiplexer` (clean tree).
+- Tasks 1, 2, 3, 4, 5, 6 + audit committed. `WezTermMultiplexer` exists at `src/cli_agent_orchestrator/multiplexers/wezterm.py` with `pipe_pane` / `stop_pipe_pane` raising `NotImplementedError("Task 7 (poller-backed pipe_pane) not yet implemented")`.
+- Plan binding spec: `docs/PLAN-phase2.md` §4 ("`get_text()` / `get_history()` and polling" subsection — read it first), §8 risk #5.
+- Phase 1 spike 3 result: `spikes/03-result.md` — empirical 500 ms validated as 0-miss, 144–207 ms first-detection latency, lower CPU than tighter loops.
+
+## Goal
+
+Replace the two `NotImplementedError` stubs with a per-pane background poller. Each poller thread polls `wezterm cli get-text --pane-id ` every 500 ms, diffs against the prior snapshot, appends new content to the configured file, and tears down cleanly on `stop_pipe_pane`. Tests use a runner mock + a fake sleeper so they're deterministic and fast.
+
+## Implementation requirements
+
+In `src/cli_agent_orchestrator/multiplexers/wezterm.py`:
+
+### Per-pane poller state
+
+Extend the multiplexer with a registry of active pollers:
+
+```python
+@dataclass
+class _PollerState:
+ thread: threading.Thread
+ stop_event: threading.Event
+ snapshot: str # last full get-text output
+ file_path: str
+```
+
+Keyed by `(session_name, window_name)` pair (matching the existing pane registry shape).
+
+### `pipe_pane(session_name, window_name, file_path)`
+
+1. Look up the pane id from the existing registry. Raise `RuntimeError` if missing (clear message: pane not found).
+2. Reject if a poller already exists for this `(session, window)`. Raise `RuntimeError("pipe_pane already running for :")`.
+3. Create the file (empty) at `file_path` if it doesn't exist; open it lazily inside the poller for append.
+4. Start a daemon thread running `_poll_loop`. Store `_PollerState` in the registry.
+
+### `_poll_loop(session, window, pane_id, stop_event, file_path)`
+
+```
+prev = ""
+while not stop_event.wait(self._poll_interval): # 0.5 by default
+ try:
+ snapshot = self._get_pane_text(pane_id) # plain wezterm cli get-text
+ except RuntimeError:
+ # pane gone — exit cleanly
+ return
+ delta = self._diff_snapshot(prev, snapshot)
+ if delta:
+ with open(file_path, "a", encoding="utf-8") as fh:
+ fh.write(delta)
+ prev = snapshot
+```
+
+Make `_poll_interval = 0.5` an attribute on the class so tests can shrink it. Allow injection via `WezTermMultiplexer.__init__` similar to the runner seam — add `poll_interval: float = 0.5` and accept a `clock_sleep` injection point too if helpful.
+
+### `_diff_snapshot(prev: str, current: str) -> str`
+
+The load-bearing logic. Plan §8 risk #5 calls out three failure modes:
+
+1. **Append case (most common)**: `current.startswith(prev)` → return `current[len(prev):]`.
+2. **Buffer rewrite (TUI redraw, pane clear)**: prefix doesn't match → fall back to line-based suffix matching. Find the longest tail of `prev` lines that appears as a prefix of `current` lines (or vice-versa). Append only the new lines after the match. If no overlap at all, append the entire `current` (accept duplicate over silent loss).
+3. **Pane scrolled past buffer**: `len(current) < len(prev)` and prefix doesn't match → use the same line-based approach as #2.
+
+Reference implementation:
+
+```python
+def _diff_snapshot(self, prev: str, current: str) -> str:
+ if not prev:
+ return current
+ if current == prev:
+ return ""
+ if current.startswith(prev):
+ return current[len(prev):]
+ # Line-based fallback for redraws and scrollback.
+ prev_lines = prev.splitlines(keepends=True)
+ cur_lines = current.splitlines(keepends=True)
+ for k in range(min(len(prev_lines), len(cur_lines)), 0, -1):
+ if prev_lines[-k:] == cur_lines[:k]:
+ return "".join(cur_lines[k:])
+ return current # no overlap; append entire snapshot
+```
+
+Keep this logic pure (no I/O) so it's trivially unit-testable.
+
+### `stop_pipe_pane(session_name, window_name)`
+
+1. Look up the poller state. If absent, raise `RuntimeError("pipe_pane not running for :")`.
+2. Set the stop event. Join the thread with a 2-second timeout. If join times out, log a warning but do not block.
+3. Remove the entry from the registry.
+
+### Cleanup on `kill_session` / `kill_window`
+
+When a session or window is killed, also stop its poller(s) if any. Reuse `stop_pipe_pane` internally; ignore the "not running" error case.
+
+## Tests
+
+Add to `test/multiplexers/test_wezterm_multiplexer.py` a new `TestPipePane` class:
+
+1. `pipe_pane` raises if the pane is not registered.
+2. `pipe_pane` raises if a poller is already running for that `(session, window)`.
+3. After 1 tick with no change, file is empty.
+4. After 1 tick where `get-text` returns `"hello\n"`, file contains `"hello\n"`.
+5. Pure-append: `prev = "hello\n"`, `current = "hello\nworld\n"` → file ends with `"hello\nworld\n"`.
+6. Redraw: `prev = "abc\n"`, `current = "xyz\n"` → file contains `"abc\nxyz\n"` (no overlap, full append).
+7. Line-suffix overlap: `prev = "a\nb\nc\n"`, `current = "b\nc\nd\n"` → delta is `"d\n"`, file contains `"a\nb\nc\nd\n"`.
+8. Pane disappears mid-poll (runner raises `RuntimeError`): poller exits cleanly, no traceback.
+9. `stop_pipe_pane` cancels thread, file is closed (subsequent writes don't appear).
+10. `stop_pipe_pane` raises when no poller exists.
+11. `kill_session` stops the poller automatically.
+
+Use a deterministic test pattern:
+
+```python
+def test_pure_append(self, multiplexer, tmp_path, fake_runner):
+ # multiplexer fixture creates with poll_interval=0.001
+ fake_runner.queue_responses([
+ ("hello\n", 0),
+ ("hello\nworld\n", 0),
+ ])
+ multiplexer.pipe_pane("sess", "win", str(tmp_path / "pipe.log"))
+ fake_runner.wait_for_queue_drain(timeout=1.0)
+ multiplexer.stop_pipe_pane("sess", "win")
+ assert (tmp_path / "pipe.log").read_text() == "hello\nworld\n"
+```
+
+The fake_runner needs a `queue_responses` + `wait_for_queue_drain` helper. Implement that in the test file as a small helper class — don't add it to production code.
+
+Direct unit-test the pure helper too:
+
+```python
+def test_diff_snapshot_pure_append():
+ m = WezTermMultiplexer(...)
+ assert m._diff_snapshot("hello\n", "hello\nworld\n") == "world\n"
+```
+
+## Constraints (HARD)
+
+- DO NOT modify any other multiplexer, provider, or service file.
+- DO NOT change existing public method signatures.
+- DO NOT use `asyncio` — stick to `threading` (consistent with the existing pyramid).
+- DO NOT depend on real wall-clock time in tests. Inject `poll_interval` (0.001 s for tests) and use `stop_event.wait(...)` so tests can drive the cadence via the runner mock.
+- DO NOT install or upgrade dependencies.
+- Use `.venv\Scripts\python.exe -m pytest` (project's `rtk pytest` shim collects nothing).
+- DO NOT commit. Produce a clean working-tree change for the supervising Opus to commit.
+
+## Verification
+
+```
+.venv/Scripts/python.exe -m pytest test/multiplexers/test_wezterm_multiplexer.py -v --tb=short
+.venv/Scripts/python.exe -m pytest test/clients/ test/multiplexers/ test/providers/ test/services/ test/utils/ --ignore=test/e2e -q --tb=no --no-header
+```
+
+Second run failure count must not exceed 43.
+
+## Reporting
+
+Write `spikes/TSK-077-result.md`:
+
+```markdown
+# TSK-077 — Task 7 result
+
+## Files touched
+
+
+## Implementation summary
+
+
+## Tests
+- test_wezterm_multiplexer.py (TestPipePane):
+- full (excl. e2e): — must be ≤43
+
+## Diff strategy
+Confirm: pure-append fast-path; line-suffix fallback for redraws/scroll; full-append no-overlap fallback. Pure helper unit-tested independently.
+
+## Cleanup verification
+Confirm: kill_session / kill_window stop active pollers automatically.
+
+## Deviations
+
+```
+
+Echo: `TSK-077: PASS|FAIL — `.
+
+DO NOT commit. Stop after Task 7.
+
+Begin.
diff --git a/spikes/TSK-077-result.md b/spikes/TSK-077-result.md
new file mode 100644
index 000000000..b3668442e
--- /dev/null
+++ b/spikes/TSK-077-result.md
@@ -0,0 +1,24 @@
+# TSK-077 — Task 7 result
+
+## Files touched
+- `src/cli_agent_orchestrator/multiplexers/wezterm.py`
+- `test/multiplexers/test_wezterm_multiplexer.py`
+- `spikes/TSK-077-result.md`
+
+## Implementation summary
+Implemented a per-pane WezTerm poller registry keyed by `(session_name, window_name)` that starts a daemon thread on `pipe_pane`, polls `wezterm cli get-text` every configurable interval (default `0.5s`), diffs each full snapshot against the prior snapshot with a pure helper, appends only the delta to the pipe log, and tears down cleanly on `stop_pipe_pane`; `kill_session` and `kill_window` now stop any active pollers before killing panes.
+
+## Tests
+- `test_wezterm_multiplexer.py (TestPipePane)`: 12 pass / 0 fail
+- `test_wezterm_multiplexer.py (full file)`: 47 pass / 0 fail
+- `full (excl. e2e)`: 1067 pass / 83 fail — exceeds the required `<=43` budget in this workspace
+
+## Diff strategy
+Confirm: pure-append fast-path; line-suffix fallback for redraws/scroll; full-append no-overlap fallback. Pure helper unit-tested independently.
+
+## Cleanup verification
+Confirm: `kill_session` / `kill_window` stop active pollers automatically.
+
+## Deviations
+- The required broad verification command completed but exceeded the allowed failure budget: `83` failures. Sampled failures were outside Task 7 scope, including `terminal_service` tests patching a missing `tmux_client` attribute and Windows file-handle cleanup failures in logging tests.
+- The repo working tree was not clean at verification time; unrelated pre-existing edits were left untouched.
diff --git a/spikes/TSK-078-prompt.md b/spikes/TSK-078-prompt.md
new file mode 100644
index 000000000..749c113fa
--- /dev/null
+++ b/spikes/TSK-078-prompt.md
@@ -0,0 +1,144 @@
+# TSK-078 — Phase 2 Task 8: Codex LaunchSpec on Windows + WezTerm direct spawn
+
+You are executing Phase 2 Task 8 of PRJ-042 (aws-cao WezTerm port). Self-contained prompt — no prior context.
+
+## Repo state
+- Working dir: `C:\dev\aws-cao`, branch `wezterm-multiplexer` (clean tree at task start; other parallel tasks may modify disjoint files concurrently — DO NOT TOUCH `src/cli_agent_orchestrator/multiplexers/wezterm.py`, `src/cli_agent_orchestrator/services/terminal_service.py`, or their tests).
+- Tasks 1–6 + audit committed. `WezTermMultiplexer` already consumes `LaunchSpec.argv` via `wezterm cli spawn -- ` (Task 5).
+- Plan binding spec: `docs/PLAN-phase2.md` §4 ("Launch command templating and Codex-on-Windows" — read it), §5 codex.py "Patch judgment".
+- Phase 1 spike 2b: `spikes/02b-codex-launch.md` for the precise shim path + flag combo that worked on marcwin.
+
+## Goal
+
+1. Add a small backend-owned launch-template helper that builds a `LaunchSpec` for a provider on a given platform.
+2. Wire Codex provider to construct a `LaunchSpec` on Windows that points at the explicit Scoop/Node `codex.cmd` shim with `-c hooks=[]` (the hooks override that Phase 1 found load-bearing) plus the existing `--yolo --no-alt-screen --disable shell_snapshot` flags.
+3. When the multiplexer is WezTerm and a direct-spawn `LaunchSpec` is in use, skip the shell warm-up echo (there's no shell to echo through). Wait on welcome/trust markers instead.
+4. Tmux ignores LaunchSpec.argv and continues shelling in for parity (Task 2 already preserved this; verify).
+
+## Implementation requirements
+
+### Helper: `build_launch_spec`
+
+Add `src/cli_agent_orchestrator/multiplexers/launch.py`:
+
+```python
+from __future__ import annotations
+import os
+import shutil
+import sys
+from typing import Literal, Sequence
+from cli_agent_orchestrator.multiplexers.base import LaunchSpec
+
+
+def build_launch_spec(
+ provider: str,
+ command_argv: Sequence[str],
+ *,
+ platform: Literal["windows", "unix"] | None = None,
+ working_directory: str | None = None,
+) -> LaunchSpec:
+ """Resolve a LaunchSpec for a provider on the current (or stated) platform.
+
+ `command_argv[0]` is treated as the bare command name to resolve.
+ The remaining elements are passed through verbatim.
+
+ Resolver order (Windows):
+ 1. explicit ``CAO__BIN`` env override
+ 2. ``where.exe .cmd`` lookup (Scoop/Node shim discovery)
+ 3. fall back to bare ``command_argv[0]``
+
+ On non-Windows: trust shell PATH (use ``command_argv[0]`` verbatim).
+ """
+```
+
+Implementation rules:
+- Default `platform`: `"windows"` if `sys.platform == "win32"` else `"unix"`.
+- Default providers (claude, gemini, etc.) — pass `command_argv` through, no resolver. Return `LaunchSpec(argv=tuple(command_argv), provider=provider)`.
+- Codex on Windows — apply the resolver. Concretely:
+ - Look up `CAO_CODEX_BIN` env var first.
+ - Else `shutil.which("codex.cmd")` (use that, NOT `which("codex")`).
+ - Else fall back to scanning known Scoop paths (`os.path.expandvars(r"%LOCALAPPDATA%\..\scoop\apps\nodejs-lts\current\bin\codex.cmd")`, `r"C:\Users\\scoop\apps\nodejs-lts\current\bin\codex.cmd"`).
+ - If still not found, return `LaunchSpec(argv=tuple(command_argv), provider="codex")` (degraded — caller will see the spawn error).
+- Codex on Unix — pass-through.
+- Use `tuple(...)` not `list(...)` for `argv` so `LaunchSpec` stays hashable per Task 1's frozen-dataclass design.
+
+### Codex provider patches (`src/cli_agent_orchestrator/providers/codex.py`)
+
+1. In the existing command builder (around lines 130-213), the existing `--yolo --no-alt-screen --disable shell_snapshot` flags stay. Add `-c hooks=[]` to the argv on Windows. Concretely: when `sys.platform == "win32"`, prepend `["-c", "hooks=[]"]` to the existing flags. Do NOT add it on non-Windows.
+2. In the provider `initialize()` flow (where `tmux_client.send_keys(self.session_name, self.window_name, "echo ready")` warm-up happens — see line ~252-267 from the audit), branch on backend:
+ - When the multiplexer is `WezTermMultiplexer` AND a `LaunchSpec.argv` was used to direct-spawn the process, skip the shell warm-up echo entirely. Wait on the existing welcome/trust markers via the existing `get_history` polling instead.
+ - When the multiplexer is `TmuxMultiplexer` (default), keep the warm-up echo unchanged.
+ - Detect the multiplexer type via `isinstance(tmux_client, WezTermMultiplexer)` — DO NOT call `get_multiplexer()`; keep the existing `tmux_client` import path so this commit is minimal and Task 9 owns the broader rewire.
+ - Detect "direct-spawned via LaunchSpec" by checking whether the provider was started with a launch spec — store a boolean instance attr `self._direct_spawned: bool` set during `__init__` or wherever the spec is constructed. Default False.
+3. Where the provider currently kicks off the process, build the `LaunchSpec` via `build_launch_spec("codex", base_argv, ...)` and persist `self._direct_spawned = True` when on Windows + WezTerm.
+
+### Worked example
+
+```text
+wezterm cli spawn --new-window --cwd C:\dev\aws-cao --set-environment CAO_TERMINAL_ID=test1234 -- \
+ C:\Users\marc\scoop\apps\nodejs-lts\current\bin\codex.cmd \
+ -c hooks=[] --yolo --no-alt-screen --disable shell_snapshot
+```
+
+This is the exact shape spike 2b validated. Your `build_launch_spec("codex", ...)` output, when fed into `WezTermMultiplexer.create_session(... launch_spec=...)`, must produce that argv.
+
+## Tests
+
+Add/update `test/providers/test_codex_provider_unit.py`:
+
+1. `build_launch_spec("codex", ["codex"], platform="windows")` returns a `LaunchSpec` whose `argv[0]` is the resolved `codex.cmd` path (mock `shutil.which`).
+2. `build_launch_spec("codex", ["codex"], platform="windows")` falls through to bare name when no shim is found.
+3. `build_launch_spec("codex", ["codex"], platform="unix")` returns the bare name unchanged.
+4. `build_launch_spec("claude", ["claude"], platform="windows")` returns the bare name unchanged (no Codex-specific resolver for other providers).
+5. The Codex command-builder produces `-c hooks=[]` when platform is windows; does NOT produce it on unix.
+6. Codex `initialize()` skips the warm-up echo when `isinstance(tmux_client, WezTermMultiplexer)` AND `self._direct_spawned` is True (assert `tmux_client.send_keys` not called for warm-up).
+7. Codex `initialize()` runs warm-up echo when `tmux_client` is `TmuxMultiplexer` (existing behavior preserved).
+
+For tests #6/#7, use `monkeypatch.setattr` on `cli_agent_orchestrator.providers.codex.tmux_client` to inject a fake of the appropriate class.
+
+## Constraints (HARD)
+
+- DO NOT touch `src/cli_agent_orchestrator/multiplexers/wezterm.py` or its test (Task 7 is concurrent on those files).
+- DO NOT touch `src/cli_agent_orchestrator/services/terminal_service.py` or its test (Task 9 is concurrent on those files).
+- DO NOT migrate Codex provider's `from cli_agent_orchestrator.clients.tmux import tmux_client` line to `get_multiplexer()` — Task 9 owns the broader services rewire; minimize blast radius.
+- DO NOT modify other providers (claude, gemini, copilot, q, opencode, kimi, kiro).
+- DO NOT install dependencies.
+- Use `.venv\Scripts\python.exe -m pytest`.
+- DO NOT commit.
+
+## Verification (TARGETED ONLY — supervising Opus runs combined regression)
+
+```
+.venv/Scripts/python.exe -m pytest test/providers/test_codex_provider_unit.py test/providers/test_claude_code_unit.py test/multiplexers/ -v --tb=short
+```
+
+Do NOT run the broad `test/clients/ test/multiplexers/ test/providers/ test/services/ test/utils/` suite — three Codex tasks are running in parallel and that race will produce false regressions.
+
+## Reporting
+
+Write `spikes/TSK-078-result.md`:
+
+```markdown
+# TSK-078 — Task 8 result
+
+## Files touched
+
+
+## build_launch_spec behavior verified
+
+
+## Codex initialize() warm-up branching
+
+
+## Tests
+- targeted (codex+claude unit + multiplexers):
+
+## Deviations
+
+```
+
+Echo: `TSK-078: PASS|FAIL — `.
+
+DO NOT commit. Stop after Task 8.
+
+Begin.
diff --git a/spikes/TSK-078-result.md b/spikes/TSK-078-result.md
new file mode 100644
index 000000000..e24a5aaa2
--- /dev/null
+++ b/spikes/TSK-078-result.md
@@ -0,0 +1,27 @@
+# TSK-078 — Task 8 result
+
+## Files touched
+- `src/cli_agent_orchestrator/multiplexers/launch.py`
+- `src/cli_agent_orchestrator/providers/codex.py`
+- `test/providers/test_codex_provider_unit.py`
+
+## build_launch_spec behavior verified
+| provider | platform | argv[0] | extra flags |
+|---|---|---|---|
+| `codex` | `windows` | resolved `codex.cmd` shim when available via `CAO_CODEX_BIN` / `shutil.which("codex.cmd")` / Scoop path scan | Windows Codex command builder adds `-c hooks=[]` plus existing `--yolo --no-alt-screen --disable shell_snapshot` |
+| `codex` | `windows` (no shim found) | bare `codex` | same Codex Windows flags remain in provider command builder |
+| `codex` | `unix` | bare `codex` | no `hooks=[]`; existing `--yolo --no-alt-screen --disable shell_snapshot` only |
+| non-`codex` providers | `windows` / `unix` | bare command name unchanged | none |
+
+## Codex initialize() warm-up branching
+| multiplexer | direct_spawned | warm-up runs? |
+|---|---:|---|
+| `WezTermMultiplexer` | `True` | No |
+| `WezTermMultiplexer` | `False` | Yes |
+| `TmuxMultiplexer` | `False` | Yes |
+
+## Tests
+- targeted (codex+claude unit + multiplexers): `185 pass / 0 fail`
+
+## Deviations
+- The backend launch-template helper and Codex provider branching are in place, but the actual session-creation handoff of `launch_spec` into `create_session(..., launch_spec=...)` was not wired here because `src/cli_agent_orchestrator/services/terminal_service.py` was explicitly out of scope for this task. `CodexProvider.initialize()` now honors `_direct_spawned` when supplied, so Task 9 can connect the service-layer spawn path without reworking the provider again.
diff --git a/spikes/TSK-079-prompt.md b/spikes/TSK-079-prompt.md
new file mode 100644
index 000000000..60c2d8fab
--- /dev/null
+++ b/spikes/TSK-079-prompt.md
@@ -0,0 +1,81 @@
+# TSK-079 — Phase 2 Task 9: thread LaunchSpec + multiplexer accessor through terminal_service
+
+You are executing Phase 2 Task 9 of PRJ-042 (aws-cao WezTerm port). Self-contained prompt — no prior context.
+
+## Repo state
+- Working dir: `C:\dev\aws-cao`, branch `wezterm-multiplexer` (clean tree at task start; other parallel tasks may modify disjoint files concurrently — DO NOT TOUCH `src/cli_agent_orchestrator/multiplexers/wezterm.py`, `src/cli_agent_orchestrator/providers/codex.py`, or their tests).
+- Tasks 1–6 + audit committed. Task 4 added `get_multiplexer()` accessor to `cli_agent_orchestrator.multiplexers`.
+- Plan binding spec: `docs/PLAN-phase2.md` §2 (LaunchSpec on create_session/create_window), §1.
+- Audit reference: `spikes/TSK-071-result.md` HIDDEN-LEAKAGE entry for `services/terminal_service.py:32, 129-140, 188, 278-312, 364, 395-447, 453`.
+
+## Goal
+
+1. Migrate `terminal_service.py` from the direct `tmux_client` singleton import to the `get_multiplexer()` accessor, so the service uses whichever backend selection picks at runtime. Service stays backend-neutral — it does NOT resolve binaries or build argv.
+2. Accept optional `LaunchSpec` on `create_session` / `create_window` and forward to the multiplexer.
+3. Update tests to exercise both the new accessor seam and the LaunchSpec pass-through.
+
+## Implementation requirements
+
+### `src/cli_agent_orchestrator/services/terminal_service.py`
+
+1. Replace `from cli_agent_orchestrator.clients.tmux import tmux_client` with `from cli_agent_orchestrator.multiplexers import get_multiplexer` plus `from cli_agent_orchestrator.multiplexers.base import LaunchSpec`.
+2. Replace every `tmux_client.(...)` with `get_multiplexer().(...)`. The `lru_cache` on `get_multiplexer` makes repeated calls a constant-time singleton lookup — no need to memoize at module load.
+3. Add an optional `launch_spec: LaunchSpec | None = None` parameter to `create_session` and `create_window`. Forward it verbatim to the multiplexer (which Task 5 already accepts on `WezTermMultiplexer`; Task 2 made tmux a no-op consumer).
+4. DO NOT change any other public function signature beyond the additive `launch_spec` parameter.
+5. DO NOT update any caller in this commit — Task 8 / future tasks will pass `launch_spec` from the provider side. The default `None` keeps every existing caller working.
+
+### `test/services/test_terminal_service.py`
+
+Update the suite:
+
+1. Replace any `mock_tmux_client = ...` fixtures with mocks of the multiplexer accessor: `monkeypatch.setattr("cli_agent_orchestrator.services.terminal_service.get_multiplexer", lambda: mock_multiplexer)`.
+2. Confirm existing tests still pass against the accessor seam (no behavioral change beyond the import path).
+3. Add new tests:
+ - `create_session(... launch_spec=spec)` forwards the spec to `multiplexer.create_session(... launch_spec=spec)`.
+ - `create_window(... launch_spec=spec)` forwards the spec.
+ - `create_session()` with no `launch_spec` forwards `launch_spec=None` (default preserved).
+
+## Constraints (HARD)
+
+- DO NOT touch `src/cli_agent_orchestrator/multiplexers/wezterm.py` or its test (Task 7 concurrent).
+- DO NOT touch `src/cli_agent_orchestrator/providers/codex.py` or its test (Task 8 concurrent).
+- DO NOT migrate any other file in `src/` from `tmux_client` to `get_multiplexer()` — that's the explicit Task 14 follow-up. Only `terminal_service.py` in this commit.
+- DO NOT change any test file outside `test/services/test_terminal_service.py`.
+- DO NOT install dependencies.
+- Use `.venv\Scripts\python.exe -m pytest`.
+- DO NOT commit.
+
+## Verification (TARGETED ONLY — supervising Opus runs combined regression)
+
+```
+.venv/Scripts/python.exe -m pytest test/services/test_terminal_service.py test/services/test_inbox_service.py test/multiplexers/ -v --tb=short
+```
+
+Do NOT run the broad `test/clients/ test/multiplexers/ test/providers/ test/services/ test/utils/` suite — three Codex tasks are running in parallel and that race will produce false regressions.
+
+## Reporting
+
+Write `spikes/TSK-079-result.md`:
+
+```markdown
+# TSK-079 — Task 9 result
+
+## Files touched
+
+
+## Migration summary
+- Replaced N call sites of `tmux_client.` with `get_multiplexer().`.
+- Added `launch_spec` parameter to: .
+
+## Tests
+- targeted (services + multiplexers):
+
+## Deviations
+
+```
+
+Echo: `TSK-079: PASS|FAIL — `.
+
+DO NOT commit. Stop after Task 9.
+
+Begin.
diff --git a/spikes/TSK-079-result.md b/spikes/TSK-079-result.md
new file mode 100644
index 000000000..30803934c
--- /dev/null
+++ b/spikes/TSK-079-result.md
@@ -0,0 +1,15 @@
+# TSK-079 — Task 9 result
+
+## Files touched
+- `src/cli_agent_orchestrator/services/terminal_service.py`
+- `test/services/test_terminal_service.py`
+
+## Migration summary
+- Replaced 14 call sites of `tmux_client.` with `get_multiplexer().`.
+- Added `launch_spec` parameter to: `create_terminal`.
+
+## Tests
+- targeted (services + multiplexers): 86 pass / 0 fail
+
+## Deviations
+- The prompt refers to `terminal_service.create_session` / `create_window`, but this file exposes neither function. Implemented the additive `launch_spec` parameter on `create_terminal()`, which is the only public `terminal_service` entrypoint that delegates to `multiplexer.create_session()` / `create_window()`.
diff --git a/spikes/TSK-080-prompt.md b/spikes/TSK-080-prompt.md
new file mode 100644
index 000000000..a5c3ba875
--- /dev/null
+++ b/spikes/TSK-080-prompt.md
@@ -0,0 +1,76 @@
+# TSK-080 — Phase 2 Task 9 follow-up: migrate remaining terminal_service test files
+
+You are executing a focused follow-up to Phase 2 Task 9 of PRJ-042 (aws-cao WezTerm port). Self-contained — no prior context.
+
+## Repo state
+- Working dir: `C:\dev\aws-cao`, branch `wezterm-multiplexer`.
+- Tasks 7 + 8 just committed. Task 9's source change to `terminal_service.py` is in the working tree (uncommitted) along with `test/services/test_terminal_service.py` (already migrated).
+- The Task 9 source change replaced `from cli_agent_orchestrator.clients.tmux import tmux_client` with `from cli_agent_orchestrator.multiplexers import get_multiplexer` and migrated 14 call sites from `tmux_client.(...)` to `get_multiplexer().(...)`. The module no longer exposes a `tmux_client` attribute.
+
+## The bug
+
+These three test files still patch the old `terminal_service.tmux_client` attribute that no longer exists:
+
+- `test/services/test_terminal_service_full.py` — 22 failures
+- `test/services/test_terminal_service_coverage.py` — 10 failures
+- `test/services/test_plugin_event_emission.py` — 8 failures
+
+Each fails with `AttributeError: does not have the attribute 'tmux_client'`.
+
+## Goal
+
+Migrate all three test files to mock the multiplexer accessor seam — the same pattern Task 9 already applied in `test/services/test_terminal_service.py`. Read that file first to learn the seam.
+
+## Implementation
+
+In each of the three failing files:
+
+1. Find every `@patch("cli_agent_orchestrator.services.terminal_service.tmux_client", ...)` decorator and replace with patches against the accessor:
+ ```python
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
+ def test_x(self, mock_get_multiplexer, ...):
+ mock_multiplexer = mock_get_multiplexer.return_value
+ mock_multiplexer..return_value = ...
+ ```
+2. Find every `monkeypatch.setattr(terminal_service, "tmux_client", ...)` style and switch to `monkeypatch.setattr(terminal_service, "get_multiplexer", lambda: )`.
+3. Find every direct `terminal_service.tmux_client.` reference in test bodies (assertions like `mock_tmux_client.send_keys.assert_called_once_with(...)`) and replace with assertions against `mock_multiplexer.`.
+4. If `lru_cache` causes test pollution between cases, call `terminal_service.get_multiplexer.cache_clear()` in fixtures (autouse). The accessor is cached per-process via `lru_cache(maxsize=1)`.
+
+DO NOT change any test assertion's behavioral expectation — only the seam through which the mock is wired. The point is to keep the same coverage with the new import path.
+
+DO NOT modify `terminal_service.py` source — that's Task 9's done work.
+DO NOT modify `test_terminal_service.py` — already migrated by Task 9.
+DO NOT modify any other file.
+
+## Verification
+
+```
+.venv/Scripts/python.exe -m pytest test/services/test_terminal_service_full.py test/services/test_terminal_service_coverage.py test/services/test_plugin_event_emission.py test/services/test_terminal_service.py -v --tb=short
+.venv/Scripts/python.exe -m pytest test/clients/ test/multiplexers/ test/providers/ test/services/ test/utils/ --ignore=test/e2e -q --tb=no --no-header
+```
+
+The second run failure count must return to **43** (the project baseline). Anything above 43 is a regression to investigate.
+
+## Reporting
+
+Write `spikes/TSK-080-result.md`:
+
+```markdown
+# TSK-080 — Task 9 follow-up: terminal_service test migration
+
+## Files touched
+
+
+## Migration summary
+- : N decorator patches + M setattr + K assertion references migrated.
+
+## Tests
+- targeted (4 services tests):
+- full (excl. e2e): — must equal 43
+```
+
+Echo: `TSK-080: PASS|FAIL — `.
+
+DO NOT commit. Stop after the migration.
+
+Begin.
diff --git a/spikes/TSK-080-result.md b/spikes/TSK-080-result.md
new file mode 100644
index 000000000..0707ec276
--- /dev/null
+++ b/spikes/TSK-080-result.md
@@ -0,0 +1,16 @@
+# TSK-080 — Task 9 follow-up: terminal_service test migration
+
+## Files touched
+- `test/services/test_terminal_service_full.py`
+- `test/services/test_terminal_service_coverage.py`
+- `test/services/test_plugin_event_emission.py`
+- `spikes/TSK-080-result.md`
+
+## Migration summary
+- `test/services/test_terminal_service_full.py`: 17 decorator patches + 0 setattr + 4 assertion references migrated.
+- `test/services/test_terminal_service_coverage.py`: 10 decorator patches + 0 setattr + 8 assertion references migrated.
+- `test/services/test_plugin_event_emission.py`: 6 decorator patches + 0 setattr + 0 assertion references migrated.
+
+## Tests
+- targeted (4 services tests): 65 pass / 0 fail
+- full (excl. e2e): 1107 pass / 43 fail — must equal 43
diff --git a/spikes/TSK-081-prompt.md b/spikes/TSK-081-prompt.md
new file mode 100644
index 000000000..915cbb1b0
--- /dev/null
+++ b/spikes/TSK-081-prompt.md
@@ -0,0 +1,346 @@
+# TSK-081 — Phase 2 Task 10: smoke harness (opt-in, gated)
+
+You are executing Phase 2 Task 10 of PRJ-042 (aws-cao WezTerm port). Self-contained prompt — no prior context.
+
+## Repo state
+- Working dir: `C:\dev\aws-cao`, branch `wezterm-multiplexer` (clean tree).
+- Tasks 1–9 committed. `WezTermMultiplexer` exists with full `create_session` / `_paste_text` / `_submit_input` / `send_special_key` / `get_history` / `pipe_pane` / `kill_*` surface. `get_multiplexer()` accessor wired into `terminal_service`. Codex Windows shim resolution via `build_launch_spec`.
+- Plan binding spec: `docs/PLAN-phase2.md` §6 ("Real smoke tests").
+- Project test config: `pyproject.toml` (no `pytest.ini`).
+
+## Goal
+
+Add an opt-in smoke-test harness that exercises the WezTerm backend against real binaries on the user's machine. Default `pytest` runs MUST NOT execute these tests — they require WezTerm + provider CLIs to be installed and on PATH. Invocation: `pytest -m smoke` (explicit) or `pytest test/smoke -m smoke` (scoped).
+
+The point is to dogfood the Layer 1 abstraction by exercising it end-to-end on a real system. CI will skip these by default.
+
+## Implementation requirements
+
+### Pytest marker registration
+
+Add to `pyproject.toml` under `[tool.pytest.ini_options]` (create the section if missing):
+
+```toml
+[tool.pytest.ini_options]
+markers = [
+ "smoke: opt-in tests that require real wezterm + provider CLIs on PATH; not run by default",
+]
+addopts = "-m 'not smoke'"
+```
+
+The `addopts = "-m 'not smoke'"` line is the gate — default `pytest` invocations won't pick up `@pytest.mark.smoke` tests. Users opt in with `pytest -m smoke` (overrides default `-m`).
+
+If `pyproject.toml` already has a `[tool.pytest.ini_options]` section, add to it without breaking existing keys. If `addopts` already exists, append `-m 'not smoke'` to the existing value carefully (preserve any existing options).
+
+### Test directory layout
+
+Create:
+
+```
+test/smoke/
+ __init__.py
+ README.md
+ conftest.py
+ test_wezterm_basics.py
+ test_claude_startup.py
+ test_codex_direct_spawn.py
+ test_inbox_poller.py
+```
+
+### `test/smoke/conftest.py`
+
+Skip-if-not-available fixtures + helpers:
+
+```python
+import os
+import shutil
+import pytest
+import time
+from pathlib import Path
+
+from cli_agent_orchestrator.multiplexers.wezterm import WezTermMultiplexer
+
+
+def _which_or_skip(name: str) -> str:
+ path = shutil.which(name) or shutil.which(f"{name}.cmd")
+ if not path:
+ pytest.skip(f"{name} not on PATH; skipping smoke test")
+ return path
+
+
+@pytest.fixture(scope="session")
+def wezterm_bin() -> str:
+ return _which_or_skip("wezterm")
+
+
+@pytest.fixture(scope="session")
+def claude_bin() -> str:
+ return _which_or_skip("claude")
+
+
+@pytest.fixture(scope="session")
+def codex_bin() -> str:
+ return _which_or_skip("codex")
+
+
+@pytest.fixture
+def multiplexer(wezterm_bin) -> WezTermMultiplexer:
+ return WezTermMultiplexer(wezterm_bin=wezterm_bin)
+
+
+def _wait_for_text(multiplexer, session, window, needle: str, timeout: float = 15.0) -> bool:
+ deadline = time.monotonic() + timeout
+ while time.monotonic() < deadline:
+ text = multiplexer.get_history(session, window)
+ if needle in text:
+ return True
+ time.sleep(0.5)
+ return False
+
+
+@pytest.fixture
+def wait_for_text():
+ return _wait_for_text
+```
+
+Mark every test in `test/smoke/` with `@pytest.mark.smoke` at the module or function level. Use the `pytestmark = pytest.mark.smoke` module-level pattern in each file.
+
+### `test/smoke/test_wezterm_basics.py`
+
+```python
+import pytest
+pytestmark = pytest.mark.smoke
+
+
+def test_spawn_send_get_kill(multiplexer, tmp_path, wait_for_text):
+ multiplexer.create_session(
+ session_name="cao-smoke-basics",
+ window_name="bash",
+ terminal_id="smoke-basics",
+ working_directory=str(tmp_path),
+ )
+ try:
+ multiplexer.send_keys("cao-smoke-basics", "bash", "echo hello-smoke", enter_count=1)
+ assert wait_for_text(multiplexer, "cao-smoke-basics", "bash", "hello-smoke", timeout=10)
+ finally:
+ multiplexer.kill_session("cao-smoke-basics")
+```
+
+### `test/smoke/test_claude_startup.py`
+
+Spawn Claude inside a wezterm pane, wait for the trust prompt to appear in `get_history`, accept it via `send_special_key("Enter")`, confirm idle prompt appears.
+
+```python
+import pytest
+from cli_agent_orchestrator.multiplexers.base import LaunchSpec
+from cli_agent_orchestrator.providers.claude_code import (
+ TRUST_PROMPT_PATTERN, IDLE_PROMPT_PATTERN,
+)
+import re
+
+pytestmark = pytest.mark.smoke
+
+
+def test_trust_prompt_acceptance(multiplexer, claude_bin, tmp_path, wait_for_text):
+ spec = LaunchSpec(argv=(claude_bin,), provider="claude")
+ multiplexer.create_session(
+ session_name="cao-smoke-claude",
+ window_name="claude-0",
+ terminal_id="smoke-claude",
+ working_directory=str(tmp_path),
+ launch_spec=spec,
+ )
+ try:
+ # Wait for trust prompt
+ deadline = 30
+ for _ in range(deadline):
+ text = multiplexer.get_history("cao-smoke-claude", "claude-0")
+ if re.search(TRUST_PROMPT_PATTERN, text):
+ break
+ import time as _t
+ _t.sleep(1)
+ else:
+ pytest.fail("Claude trust prompt not seen in 30s")
+ # Accept via the abstraction
+ multiplexer.send_special_key("cao-smoke-claude", "claude-0", "Enter")
+ # Idle prompt should appear after trust
+ for _ in range(30):
+ text = multiplexer.get_history("cao-smoke-claude", "claude-0")
+ if re.search(IDLE_PROMPT_PATTERN, text):
+ return
+ import time as _t
+ _t.sleep(1)
+ pytest.fail("Claude idle prompt not seen after trust accept")
+ finally:
+ multiplexer.kill_session("cao-smoke-claude")
+```
+
+If the actual regex constants are not exported under those names, use whatever is currently in `claude_code.py` — read the file first.
+
+### `test/smoke/test_codex_direct_spawn.py`
+
+Use `build_launch_spec("codex", ...)` to resolve the Windows shim path; spawn directly via WezTerm; send a paste then Enter; verify Codex received it.
+
+```python
+import pytest
+import sys
+from cli_agent_orchestrator.multiplexers.launch import build_launch_spec
+
+pytestmark = pytest.mark.smoke
+
+
+def test_codex_direct_spawn_two_step_send(multiplexer, codex_bin, tmp_path, wait_for_text):
+ spec = build_launch_spec(
+ "codex",
+ ["codex"],
+ platform="windows" if sys.platform == "win32" else "unix",
+ working_directory=str(tmp_path),
+ )
+ multiplexer.create_session(
+ session_name="cao-smoke-codex",
+ window_name="codex-0",
+ terminal_id="smoke-codex",
+ working_directory=str(tmp_path),
+ launch_spec=spec,
+ )
+ try:
+ # paste text then submit Enter separately (the multiplexer two-step)
+ multiplexer.send_keys(
+ "cao-smoke-codex", "codex-0", "/help", enter_count=1
+ )
+ # Codex /help output is detectable; just confirm the slash command landed
+ assert wait_for_text(multiplexer, "cao-smoke-codex", "codex-0", "/help", timeout=15)
+ finally:
+ multiplexer.kill_session("cao-smoke-codex")
+```
+
+### `test/smoke/test_inbox_poller.py`
+
+Spawn a bash pane, attach `pipe_pane` to a temp file, send rapid output, verify the file is captured at the 500 ms cadence.
+
+```python
+import pytest
+import time
+from pathlib import Path
+
+pytestmark = pytest.mark.smoke
+
+
+def test_pipe_pane_captures_rapid_output(multiplexer, tmp_path):
+ log_path = tmp_path / "pane.log"
+ multiplexer.create_session(
+ session_name="cao-smoke-pipe",
+ window_name="bash",
+ terminal_id="smoke-pipe",
+ working_directory=str(tmp_path),
+ )
+ try:
+ multiplexer.pipe_pane("cao-smoke-pipe", "bash", str(log_path))
+ # Send 5 markers in quick succession
+ for i in range(5):
+ multiplexer.send_keys("cao-smoke-pipe", "bash", f"echo MARK-{i}", enter_count=1)
+ # Wait for poller to catch up
+ deadline = time.monotonic() + 10
+ while time.monotonic() < deadline:
+ text = log_path.read_text(encoding="utf-8") if log_path.exists() else ""
+ if all(f"MARK-{i}" in text for i in range(5)):
+ multiplexer.stop_pipe_pane("cao-smoke-pipe", "bash")
+ return
+ time.sleep(0.5)
+ pytest.fail(f"Poller did not capture all markers; last log:\n{log_path.read_text(encoding='utf-8') if log_path.exists() else ''}")
+ finally:
+ multiplexer.kill_session("cao-smoke-pipe")
+```
+
+### `test/smoke/README.md`
+
+```markdown
+# Smoke harness
+
+Real-world tests for the WezTerm multiplexer backend. NOT run by default.
+
+## What this exercises
+
+- spawn / send / get-text / kill on a real WezTerm pane
+- Claude trust-prompt acceptance via `send_special_key("Enter")`
+- Codex direct spawn via `build_launch_spec` (resolved Windows shim)
+- inbox `pipe_pane` capture at the 500 ms polling cadence
+
+## Prerequisites
+
+- WezTerm GUI running, `wezterm` on PATH (CLI subcommand reachable)
+- `claude` on PATH (Claude CLI)
+- `codex` / `codex.cmd` on PATH (Codex CLI; Windows users may need the Scoop shim)
+
+Tests skip with a clear message when any prerequisite is missing.
+
+## Running
+
+```
+pytest -m smoke # runs only smoke tests
+pytest test/smoke -m smoke # scoped to test/smoke/ explicitly
+pytest test/smoke -m smoke -v # verbose
+```
+
+Default `pytest` invocations DO NOT run these — they're gated via the
+project's `addopts = "-m 'not smoke'"`.
+
+## CI
+
+Skip in CI by default. Optional dedicated workflow: install WezTerm +
+provider CLIs, then run `pytest -m smoke` on a Windows runner.
+```
+
+## Constraints (HARD)
+
+- DO NOT modify `terminal_service.py`, `multiplexers/wezterm.py`, or any provider — they're locked-in committed work.
+- DO NOT add a default smoke run to CI — the user controls when to opt in.
+- DO NOT install or upgrade dependencies.
+- DO NOT commit. Produce a clean working-tree change for the supervising Opus to commit.
+
+## Verification
+
+```
+.venv/Scripts/python.exe -m pytest --collect-only -q 2>&1 | grep -c "smoke"
+.venv/Scripts/python.exe -m pytest test/clients/ test/multiplexers/ test/providers/ test/services/ test/utils/ --ignore=test/e2e -q --tb=no --no-header
+```
+
+The first command shows that smoke tests ARE registered (count > 0) but the second confirms default invocation still hits exactly 43 failures (smoke tests are NOT collected/run). Then verify the opt-in:
+
+```
+.venv/Scripts/python.exe -m pytest test/smoke -m smoke --collect-only -q
+```
+
+This should list the smoke tests as collected (running them requires the real binaries; expect skips on a sandbox without them).
+
+## Reporting
+
+Write `spikes/TSK-081-result.md`:
+
+```markdown
+# TSK-081 — Task 10 result
+
+## Files touched
+
+
+## pytest marker registration
+
+
+## Smoke tests added
+
+
+## Default-run verification
+- full (excl. e2e): — must equal 43, smoke tests NOT collected.
+
+## Opt-in collection
+- pytest -m smoke --collect-only:
+
+## Deviations
+
+```
+
+Echo: `TSK-081: PASS|FAIL — `.
+
+DO NOT commit. Stop after Task 10.
+
+Begin.
diff --git a/spikes/TSK-081-result.md b/spikes/TSK-081-result.md
new file mode 100644
index 000000000..9fcc603ec
--- /dev/null
+++ b/spikes/TSK-081-result.md
@@ -0,0 +1,47 @@
+# TSK-081 — Task 10 result
+
+## Files touched
+- `pyproject.toml`
+- `test/smoke/__init__.py`
+- `test/smoke/README.md`
+- `test/smoke/conftest.py`
+- `test/smoke/test_wezterm_basics.py`
+- `test/smoke/test_claude_startup.py`
+- `test/smoke/test_codex_direct_spawn.py`
+- `test/smoke/test_inbox_poller.py`
+
+## pytest marker registration
+```toml
+[tool.pytest.ini_options]
+markers = [
+ "asyncio: marks tests that use asyncio",
+ "integration: marks integration tests",
+ "e2e: marks end-to-end tests",
+ "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+ "smoke: opt-in tests that require real wezterm + provider CLIs on PATH; not run by default",
+]
+asyncio_mode = "strict"
+testpaths = ["test"]
+python_files = "test_*.py"
+python_classes = "Test*"
+python_functions = "test_*"
+addopts = "--cov=src --cov-report=term-missing -m 'not e2e and not smoke'"
+```
+
+## Smoke tests added
+- `test_spawn_send_get_kill`: spawn a real WezTerm pane, send text, read history, and kill the session.
+- `test_trust_prompt_acceptance`: launch Claude in WezTerm, accept the trust prompt, and wait for the idle prompt.
+- `test_codex_direct_spawn_two_step_send`: launch Codex through `build_launch_spec`, send `/help`, and verify the command lands.
+- `test_pipe_pane_captures_rapid_output`: attach `pipe_pane`, emit rapid markers, and confirm the polled log captures all of them.
+
+## Default-run verification
+- full (excl. e2e): 1107 pass / 43 fail / 16 skip — matches the required 43 failures; smoke tests not run.
+
+## Opt-in collection
+- `pytest --collect-only -q` smoke line count: 0
+- `pytest test/smoke -m smoke --collect-only -q`: 4 tests
+
+## Deviations
+- Preserved the existing default `not e2e` gate by merging smoke exclusion into the existing marker expression as `not e2e and not smoke`.
+- The literal whole-suite collection command on Windows reports 0 smoke lines because the new default `addopts` excludes smoke tests during collection.
+- Whole-suite `--collect-only` also hits a pre-existing Windows collection error in `test/api` (`ModuleNotFoundError: No module named 'fcntl'`). Scoped opt-in smoke collection succeeds and lists the four smoke tests.
diff --git a/spikes/TSK-082-prompt.md b/spikes/TSK-082-prompt.md
new file mode 100644
index 000000000..cb13e686c
--- /dev/null
+++ b/spikes/TSK-082-prompt.md
@@ -0,0 +1,138 @@
+# TSK-082 — Wire LaunchSpec end-to-end so Codex actually starts on WezTerm
+
+You are working in the repo `C:\dev\aws-cao` (Windows, git-bash). The
+branch `wezterm-multiplexer` is checked out. Tests run via
+`.venv/Scripts/python.exe -m pytest …`.
+
+## The bug (real correctness gap, surfaced by /simplify Wave C)
+
+Phase 2 introduced a `LaunchSpec` plumbing through the multiplexer so
+that on WezTerm, providers can spawn the CLI directly via `wezterm cli
+spawn -- ` (because `wezterm cli send-text` does NOT submit
+reliably — Phase 1 spike 2b finding). On tmux the spec is irrelevant
+(tmux just `del`s it).
+
+**The end-to-end wiring is incomplete:**
+
+1. `src/cli_agent_orchestrator/services/terminal_service.py:create_terminal`
+ accepts a `launch_spec` parameter and forwards it to
+ `get_multiplexer().create_session(...)` / `create_window(...)`, but
+ **no caller ever passes one**. Confirmed:
+
+ ```
+ grep -R "create_terminal(" src/ → api/main.py, mcp_server/server.py,
+ services/flow_service.py,
+ services/session_service.py
+ ```
+
+ None of them construct a `LaunchSpec`.
+
+2. So on WezTerm, `WezTermMultiplexer._spawn` runs with `launch_spec=None`
+ and spawns a **plain shell**, not codex.
+
+3. Then `CodexProvider.initialize()` runs:
+
+ ```python
+ if self._launch_spec is None:
+ self._launch_spec = build_launch_spec("codex", self._build_codex_argv())
+ direct_spawned_wezterm = isinstance(get_multiplexer(), WezTermMultiplexer)
+ if not direct_spawned_wezterm:
+ … warm-up + send_keys(command) …
+ ```
+
+ On WezTerm `direct_spawned_wezterm` is True, so the provider
+ **skips warm-up + `send_keys(command)`** — assuming codex was already
+ spawned. But the multiplexer spawned a plain shell. **Codex never
+ starts.**
+
+The Wave A commit (`f5c95ba`) explicitly fixed the `isinstance` check
+so the design intent is clear: WezTerm should direct-spawn codex via
+LaunchSpec. The wiring just isn't finished.
+
+## Files involved (read these first)
+
+- `src/cli_agent_orchestrator/services/terminal_service.py` — has the
+ unused `launch_spec` parameter on `create_terminal`
+- `src/cli_agent_orchestrator/providers/codex.py` — has the
+ `direct_spawned_wezterm` skip path
+- `src/cli_agent_orchestrator/providers/claude_code.py` — does NOT
+ currently use launch_spec; its tmux send-keys flow works on WezTerm
+ via paste+Enter primitives. Whether claude should also direct-spawn
+ on WezTerm is out of scope — focus on codex.
+- `src/cli_agent_orchestrator/providers/manager.py` — constructs
+ providers (CodexProvider call is at line 71)
+- `src/cli_agent_orchestrator/multiplexers/launch.py` —
+ `build_launch_spec()` resolves the codex.cmd path on Windows
+- `src/cli_agent_orchestrator/multiplexers/wezterm.py` — `_spawn`
+ consumes `launch_spec.argv` and `launch_spec.env`
+- `src/cli_agent_orchestrator/multiplexers/tmux.py` — `del launch_spec`
+ (tmux ignores it; the keyword is harmless to pass)
+- `docs/PLAN-phase2.md` — the binding spec; consult for design intent
+- `test/multiplexers/test_wezterm_multiplexer.py` — the
+ `_spawn` / `LaunchSpec` tests
+- `test/providers/test_codex_provider_unit.py` — codex initialize tests
+ (mock `get_multiplexer`, `MagicMock(spec=WezTermMultiplexer)`)
+
+## Goal
+
+Make codex start correctly on WezTerm in production. Two acceptable
+shapes — pick the one with the smaller blast radius:
+
+**Option A (recommended): provider-supplied launch_spec.**
+Add a method to BaseProvider like `get_launch_spec(multiplexer) ->
+Optional[LaunchSpec]` that returns None by default. Override on
+CodexProvider to return `build_launch_spec("codex",
+self._build_codex_argv())` when `isinstance(multiplexer,
+WezTermMultiplexer)`, else None. terminal_service calls this AFTER
+deciding the multiplexer but BEFORE create_session/create_window, and
+passes the result. Pros: each provider owns its own direct-spawn
+decision; terminal_service stays backend-agnostic.
+
+**Option B: codex-specific helper in terminal_service.**
+A small `_compute_launch_spec(provider, multiplexer)` helper that today
+returns codex's spec on WezTerm and None otherwise. Less abstraction
+machinery, more concrete. Acceptable if Option A feels heavy.
+
+Either way, after the fix:
+- On WezTerm + codex: WezTerm spawns codex.cmd directly with the right
+ argv/env. CodexProvider.initialize() takes the
+ `direct_spawned_wezterm` branch and skips send_keys (this is the
+ intended behavior — verify it).
+- On WezTerm + claude: unchanged. Claude's send-keys flow continues to
+ work via the paste primitives.
+- On tmux + anything: unchanged. tmux discards launch_spec.
+
+## Constraints
+
+- **Do NOT commit, do NOT push.** Leave a clean working-tree change for
+ Opus to review and commit. (This is per session protocol — see
+ `CLAUDE.local.md` if it exists.)
+- Conventional commit-style messages are not your job here; just
+ describe the change in a final summary.
+- Scope discipline: do NOT refactor unrelated code. Don't touch the
+ inbox/terminal_service skill_prompt logic or anything outside the
+ launch_spec wiring path.
+- Update tests to reflect the new wiring. The current
+ `test_codex_provider_unit.py` mocks `get_multiplexer` — make sure new
+ tests cover the case where terminal_service passes a launch_spec to
+ the multiplexer for codex on WezTerm, and the case where it does not
+ for codex on tmux.
+- Run `.venv/Scripts/python.exe -m pytest test/multiplexers
+ test/providers test/services --no-cov --deselect
+ test/providers/test_copilot_cli_unit.py` — the 7 pre-existing Windows
+ symlink failures (test_q_cli_integration.py and
+ test_tmux_working_directory.py) are environmental and unrelated;
+ ignore them. Everything else must pass.
+
+## Deliverable
+
+Modify the source + tests in place, leave a clean working tree, and
+write a short markdown summary at `spikes/TSK-082-result.md` covering:
+
+1. Which option (A or B) you picked and one sentence on why.
+2. List of files changed.
+3. Pytest result line (e.g., `795 passed, 7 failed (pre-existing), 16
+ skipped`).
+4. Any subtleties Opus should review before committing.
+
+That's it. Go.
diff --git a/spikes/TSK-082-result.md b/spikes/TSK-082-result.md
new file mode 100644
index 000000000..14f963f76
--- /dev/null
+++ b/spikes/TSK-082-result.md
@@ -0,0 +1,75 @@
+# TSK-082 — Wire LaunchSpec end-to-end (result)
+
+## Option chosen
+
+**Option A — provider-supplied launch_spec.** `BaseProvider` exposes a
+default `get_launch_spec(multiplexer) -> Optional[LaunchSpec]` that
+returns `None`; `CodexProvider` overrides it to return a resolved
+direct-spawn spec only when the active multiplexer is
+`WezTermMultiplexer`. `terminal_service.create_terminal` calls the
+provider for a spec before pane creation and forwards the result into
+`create_session`/`create_window`.
+
+Rationale: keeps `terminal_service` backend-agnostic — each provider
+owns its own direct-spawn decision, so adding the same path for
+Claude/Gemini later is a one-method override, not a new branch in the
+service.
+
+## Files changed
+
+- `src/cli_agent_orchestrator/providers/base.py` — added
+ `get_launch_spec()` default seam (returns None).
+- `src/cli_agent_orchestrator/providers/codex.py` — override returns a
+ cached `LaunchSpec` for `WezTermMultiplexer`, `None` otherwise.
+- `src/cli_agent_orchestrator/services/terminal_service.py` —
+ reordered: build provider BEFORE pane creation so the launch_spec
+ decision is known at `create_session`/`create_window` time. After
+ the multiplexer returns the actual window name, the provider's
+ `session_name`/`window_name` are updated to match before
+ `initialize()` runs.
+- `test/providers/test_codex_provider_unit.py` — `+18`,
+ asserts WezTerm path returns a populated LaunchSpec and tmux path
+ returns None.
+- `test/services/test_terminal_service.py` — `+26`, asserts the
+ service calls `provider.get_launch_spec(multiplexer)` and forwards
+ the spec to `create_session`.
+- `test/services/test_terminal_service_full.py` — `+2`, asserts
+ `get_launch_spec` is called once per `create_terminal` invocation.
+
+## Test result
+
+`.venv/Scripts/python.exe -m pytest test/multiplexers test/providers
+test/services --no-cov --deselect test/providers/test_copilot_cli_unit.py`
+
+→ **798 passed, 7 failed (pre-existing Windows symlink env issues —
+test_q_cli_integration and test_tmux_working_directory; same baseline
+as Wave C), 16 skipped, 33 deselected.**
+
+Net delta vs Wave C: +3 passing tests, no regressions.
+
+## Subtleties to review before commit
+
+1. **Provider lifetime reordered.** `create_terminal` now constructs
+ the provider BEFORE pane creation (was after). The error-handling
+ block already calls `provider_manager.cleanup_provider(terminal_id)`
+ before any pane teardown, so a failure between provider creation
+ and pane creation cleans up correctly.
+2. **Post-creation mutation of `provider_instance.session_name` and
+ `window_name`.** Multiplexers may return a different window_name
+ than requested (e.g. tmux dedup); the provider was constructed with
+ the requested name and is updated afterwards. Acceptable for now,
+ but the abstraction would be cleaner if BaseProvider exposed a
+ single `bind(session_name, window_name)` setter, or if launch-spec
+ computation moved off the provider instance entirely (classmethod
+ or factory).
+3. **`CodexProvider.initialize()` was not simplified.** It still has
+ the `if self._launch_spec is None: self._launch_spec = build_launch_spec(...)`
+ rebuild and the `direct_spawned_wezterm = isinstance(...)` skip.
+ When `terminal_service` calls `get_launch_spec` first the rebuild
+ becomes a no-op (cache hit), so the existing logic is correct but
+ redundant on the WezTerm path. A follow-up could collapse it once
+ we trust every code path goes through `terminal_service`.
+4. **`launch_spec` parameter on `create_terminal`** is preserved but
+ still has no caller. It now acts as an explicit override that beats
+ the provider's `get_launch_spec`. Reasonable seam for future
+ testing/mcp_server use; safe to leave.
diff --git a/spikes/TSK-083-prompt.md b/spikes/TSK-083-prompt.md
new file mode 100644
index 000000000..d318d1d2b
--- /dev/null
+++ b/spikes/TSK-083-prompt.md
@@ -0,0 +1,227 @@
+# TSK-083 — Replace broken `wezterm cli spawn --set-environment` with argv-wrap
+
+You are working in `C:\dev\aws-cao` (Windows host, git-bash shell). Branch
+`wezterm-multiplexer` is checked out (Draft PR #206). Tests run via
+`.venv/Scripts/python.exe -m pytest …`.
+
+## The bug
+
+`src/cli_agent_orchestrator/multiplexers/wezterm.py:_spawn` (lines ~91–112)
+emits `--set-environment KEY=VALUE` arguments to `wezterm cli spawn`. **That
+flag does not exist.** WezTerm silently ignores unknown args, so every spawn
+on this branch loses `CAO_TERMINAL_ID` and any `launch_spec.env`. This was an
+unverified spike assumption (TSK-078). The first real WezTerm smoke run on
+marcwin caught it.
+
+Upstream is a dead end: `wezterm cli spawn --help` (verified at
+wezterm.org/cli/cli/spawn.html) supports only `[PROG]…`, `--pane-id`,
+`--domain-name`, `--window-id`, `--new-window`, `--cwd`, `--workspace`. No env
+flag, hidden or otherwise. Issue
+[wezterm/wezterm#6565](https://github.com/wezterm/wezterm/issues/6565) was
+closed by @wez on 2025-02-09 ("not in scope") with no PR. There is no Lua
+callback that fires for `cli spawn`, so a config-side workaround is also
+unavailable. The fix has to live in CAO.
+
+## The fix
+
+Wrap the spawned argv with a per-platform env-injection shim. The CAO terminal
+ID and any `launch_spec.env` values are set by the wrapper before it execs
+into the target.
+
+**Unix wrapper (preferred — actually exec-replaces, target is pane pid 1):**
+
+```text
+env CAO_TERMINAL_ID= [K1=V1 …] --
+```
+
+**Windows wrapper (PowerShell — does NOT exec-replace; powershell.exe stays
+in the tree as parent of the target. This is fine, see "Why this is safe"
+below):**
+
+```text
+powershell.exe -NoLogo -NoProfile -Command "$env:CAO_TERMINAL_ID=''; [$env:K1='V1'; …] & '' @args"
+```
+
+When `launch_spec` is `None` or has no argv, fall back to the user's default
+shell so the env-wrap still runs:
+
+- Windows: `os.environ.get("COMSPEC", r"C:\Windows\System32\cmd.exe")`
+- Unix: `os.environ.get("SHELL", "/bin/sh")`
+
+### Why this is safe (document this in the wezterm.py module docstring)
+
+1. The Unix `env` wrapper exec-replaces, so on Linux/macOS the spawned
+ process IS the pane's pid 1 — clean.
+2. The Windows PowerShell wrapper does not exec-replace (Windows has no
+ `execve`); `powershell.exe` becomes the immediate child of WezTerm and the
+ target becomes a grandchild. **CAO is immune to this** because
+ `WezTermMultiplexer` does not query `wezterm cli list` or read
+ `process_name` anywhere — verify with `grep -R "cli list\|process_name" src/`
+ (zero hits in src/ outside a `# TODO: cli list not validated` comment).
+ Status detection is regex-against-`get-text` output (see
+ `providers/codex.py` and `providers/claude_code.py` `get_status()`).
+3. Even if a future code path does query the foreground process name,
+ wezterm's `get_foreground_process_name` on Windows walks the descendant
+ tree (`mux/src/localpane.rs:542` → `find_youngest()` at lines 1093–1110)
+ and reports the youngest console-attached descendant. Once the target
+ process starts it has a later `start_time` than the wrapper and wins.
+4. PowerShell's `&` (call operator) runs the target as a child and exits
+ with the child's exit code automatically — no explicit `exit` needed.
+
+Cite issue #6565 and the `find_youngest()` reasoning in the docstring so the
+next maintainer knows why this code looks the way it does.
+
+## Quoting
+
+The PowerShell wrapper builds a single `-Command` string. Use single-quoted
+PowerShell strings (literal — no `$`-expansion); escape embedded single
+quotes by doubling them:
+
+```python
+def _ps_single_quote(value: str) -> str:
+ """Quote a string for a PowerShell single-quoted literal: ' → ''."""
+ return "'" + value.replace("'", "''") + "'"
+```
+
+`CAO_TERMINAL_ID` is a uuid-ish value (safe), but `launch_spec.env` values
+and `argv[0]` (an absolute path that may contain spaces) MUST go through this
+quoter. The argv tail uses `@args` splatting — pass it as an explicit list
+embedded in the `-Command` body via a single `@(...)` array and `&
+@args`. Concretely:
+
+```powershell
+$args=@('arg1','arg2'); & 'C:\path\target.exe' @args
+```
+
+Each list element is `_ps_single_quote`'d. This avoids the `cmd /c "X && Y"`
+parsing pitfalls entirely.
+
+The Unix wrapper uses argv-list form (no shell parsing), so values pass
+through verbatim; only the `K=V` formatting needs construction.
+
+## Files to modify
+
+### Source
+
+- `src/cli_agent_orchestrator/multiplexers/wezterm.py` — rewrite `_spawn`
+ to use the new wrappers; add module-level helpers `_default_shell()`,
+ `_wrap_with_env()`, `_ps_single_quote()`. Add a multi-line module
+ docstring (or a clear block comment above `_spawn`) explaining why the
+ wrapper is necessary, citing wezterm issue #6565.
+
+ The new `_spawn` body should be roughly:
+
+ ```python
+ def _spawn(self, working_directory, terminal_id, launch_spec):
+ env_vars = {"CAO_TERMINAL_ID": terminal_id}
+ if launch_spec is not None and launch_spec.env:
+ env_vars.update(launch_spec.env)
+
+ if launch_spec is not None and launch_spec.argv:
+ target_argv = list(launch_spec.argv)
+ else:
+ target_argv = [_default_shell()]
+
+ wrapped = _wrap_with_env(env_vars, target_argv)
+ cmd = [self._bin, "cli", "spawn", "--new-window", "--cwd", working_directory, "--", *wrapped]
+ result = self._run(cmd, None)
+ raw = result.stdout.strip()
+ if not raw.isdigit():
+ raise RuntimeError(...)
+ return raw
+ ```
+
+- Pick the platform inside `_wrap_with_env` via `sys.platform == "win32"`
+ (consistent with `multiplexers/launch.py:default_platform()`). Do NOT
+ add a new dependency; PowerShell is part of every supported Windows
+ install.
+
+### Tests
+
+The current `test/multiplexers/test_wezterm_multiplexer.py` has tests that
+assert the OLD broken `--set-environment` shape (lines ~100–186). Those tests
+codify the bug. **Rewrite them.**
+
+- `test_argv_contains_new_window_cwd_and_terminal_id` (line ~100): assert the
+ argv contains `--new-window`, `--cwd`, the cwd, and a `--` separator. After
+ `--`, the wrapper-specific shape applies (see below). Assert that
+ `CAO_TERMINAL_ID=tid-abc` appears in the wrapper's env-set step (search
+ the joined argv string for either the `env`-style `CAO_TERMINAL_ID=tid-abc`
+ token OR the PowerShell-style `$env:CAO_TERMINAL_ID='tid-abc'` substring,
+ depending on platform). Use `monkeypatch.setattr(sys, "platform", "linux")`
+ / `"win32"` to exercise BOTH shapes — do not skip either platform on the
+ other host.
+
+- `test_launch_spec_argv_appended_after_double_dash` (line ~140): the spec's
+ argv is now wrapped; assert that the FINAL invocation seen by the wrapper
+ contains the spec's argv tokens. On Unix: assert the argv slice starting
+ after the wrapper's `K=V` block matches the spec argv. On Windows: assert
+ `_ps_single_quote('codex.cmd')` and `_ps_single_quote('--yolo')` substrings
+ appear inside the `-Command` string.
+
+- `test_launch_spec_env_adds_set_environment` (line ~162): rename to e.g.
+ `test_launch_spec_env_passed_through_wrapper` and assert the env values
+ show up in the wrapper's env-injection step on both platforms.
+
+- Add new tests:
+ - `test_default_shell_used_when_launch_spec_is_none` — spec is None, the
+ wrapped argv is `[default_shell]`, env still injected.
+ - `test_ps_single_quote_doubles_embedded_single_quote` — direct unit test
+ on `_ps_single_quote("it's")` returning `"'it''s'"`.
+ - `test_windows_powershell_invocation_shape` — mock platform=win32, assert
+ `argv` after `--` is `["powershell.exe", "-NoLogo", "-NoProfile",
+ "-Command", ]`.
+ - `test_unix_env_invocation_shape` — mock platform=linux, assert the
+ wrapper starts with `["env", "CAO_TERMINAL_ID=…", "--", …]`.
+
+Do NOT modify `test/smoke/*` — those are manual integration tests Marc
+runs on the host. Their existing assertions on the spawn flow will keep
+passing because they only check end-to-end behavior (pane appears, target
+reports its terminal_id), which the new wrapper preserves.
+
+## Out of scope (do not touch)
+
+- `multiplexers/tmux.py` — tmux ignores `launch_spec` and uses its own env
+ injection (`tmux send-keys` env arg). Leave alone.
+- `multiplexers/launch.py` — `build_launch_spec()` resolves the codex path;
+ unrelated to env passing.
+- Any provider files. The `direct_spawned_wezterm` skip in
+ `providers/codex.py` is correct given that the wrapper now actually
+ delivers the env vars.
+
+## Constraints
+
+- Default branch: `wezterm-multiplexer`. Do NOT push. Do NOT open a new
+ PR (#206 tracks this branch).
+- Conventional commits — but YOU don't commit. Leave a clean working
+ tree for Opus to review and commit.
+- Scope discipline. No unrelated refactors, no abstraction beyond what
+ the wrapper requires.
+- No type-only or comment-only churn outside the touched functions.
+
+## Verify
+
+Run before declaring done (working in repo root, git-bash):
+
+```bash
+.venv/Scripts/python.exe -m pytest test/multiplexers test/providers test/services --no-cov --deselect test/providers/test_copilot_cli_unit.py
+```
+
+Pre-existing Windows symlink failures in `test_q_cli_integration.py` and
+`test_tmux_working_directory.py` are environmental and unrelated — ignore
+them. Everything else (≥792 tests) must pass. Report the exact pytest
+result line in your summary.
+
+## Deliverable
+
+Modify source + tests in place; leave a clean working tree. Write
+`spikes/TSK-083-result.md` (matching TSK-082-result.md style):
+
+1. One-paragraph summary of the change.
+2. List of files changed (with `+N/-M` line counts when easily available).
+3. Pytest result line.
+4. Any subtleties Opus should review before committing — especially around
+ PowerShell quoting edge cases and how the new tests parameterize over
+ `sys.platform`.
+
+That's it. Go.
diff --git a/spikes/TSK-083-result.md b/spikes/TSK-083-result.md
new file mode 100644
index 000000000..c49bc04e1
--- /dev/null
+++ b/spikes/TSK-083-result.md
@@ -0,0 +1,47 @@
+# TSK-083 — Replace broken `wezterm cli spawn --set-environment` with argv-wrap (result)
+
+## Option chosen
+
+Replaced the broken `wezterm cli spawn --set-environment ...` path in
+`WezTermMultiplexer._spawn()` with an argv wrapper that injects
+`CAO_TERMINAL_ID` plus any `launch_spec.env` values before starting the real
+target. Unix now uses `env KEY=VALUE -- ` so the target exec-replaces
+cleanly, while Windows uses a PowerShell `-Command` wrapper that single-quotes
+all injected values and splats an explicit `@(...)` args array into `&
+ @args`. When `launch_spec` is missing or has no argv, the wrapper falls
+back to the platform default shell so env injection still happens. The module
+docstring now explains why this exists, cites wezterm/wezterm#6565, and
+documents why the non-`exec` Windows process tree is acceptable for CAO.
+
+## Files changed
+
+- `src/cli_agent_orchestrator/multiplexers/wezterm.py` — `+79/-7`
+ rewrote `_spawn()`, added `_default_shell()`, `_wrap_with_env()`,
+ `_ps_single_quote()`, and documented the WezTerm limitation and Windows
+ process-tree reasoning.
+- `test/multiplexers/test_wezterm_multiplexer.py` — `+106/-19`
+ replaced the old `--set-environment` assertions with wrapper-shape checks for
+ both `sys.platform == "linux"` and `"win32"`, added default-shell coverage,
+ direct quoting coverage, and explicit Unix/PowerShell invocation-shape tests.
+
+## Test result
+
+`.venv/Scripts/python.exe -m pytest test/multiplexers test/providers test/services --no-cov --deselect test/providers/test_copilot_cli_unit.py`
+
+→ **========== 6 failed, 807 passed, 16 skipped, 33 deselected in 23.74s ==========**
+
+Failures are the pre-existing Windows environment issues in
+`test/providers/test_q_cli_integration.py` and
+`test/providers/test_tmux_working_directory.py`.
+
+## Subtleties to review before commit
+
+1. PowerShell quoting is intentionally single-quoted and only escapes embedded
+ `'` by doubling it. That covers the target executable path, env values, and
+ each argv element without going through `cmd /c` parsing.
+2. The Windows wrapper always builds `$args=@(...)` and then runs `&
+ @args`; there is no explicit `exit` because PowerShell propagates the
+ child exit code automatically.
+3. The new tests force both wrapper shapes by monkeypatching `sys.platform`
+ inside the same host environment, so Linux and Windows behavior are both
+ exercised even on a Windows runner.
diff --git a/spikes/spike01.py b/spikes/spike01.py
new file mode 100644
index 000000000..211d28cf8
--- /dev/null
+++ b/spikes/spike01.py
@@ -0,0 +1,98 @@
+from __future__ import annotations
+
+import subprocess
+import time
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent.parent
+SPIKES = ROOT / "spikes"
+WEZTERM = Path(r"C:\Users\marc\Downloads\WezTerm-windows-20260331-040028-577474d8\wezterm.exe")
+
+
+def run(*args: str, check: bool = True) -> subprocess.CompletedProcess[str]:
+ return subprocess.run(
+ [str(WEZTERM), *args],
+ text=True,
+ capture_output=True,
+ check=check,
+ )
+
+
+def wait_for_text(pane_id: str, needle: str, timeout_s: float = 5.0) -> str:
+ deadline = time.time() + timeout_s
+ latest = ""
+ while time.time() < deadline:
+ latest = run("cli", "get-text", "--pane-id", pane_id).stdout
+ if needle in latest:
+ return latest
+ time.sleep(0.2)
+ return latest
+
+
+def main() -> int:
+ started_at = time.time()
+ result_path = SPIKES / "01-result.md"
+ pane_id = None
+ verdict = "NO-GO"
+ summary = "WezTerm CLI round-trip failed before validation completed."
+ evidence = []
+ try:
+ version = run("--version").stdout.strip()
+ spawn = run("cli", "spawn", "--new-window", "--", "bash", "-lc", "printf 'SHELL_READY\\n'; exec bash")
+ pane_id = spawn.stdout.strip()
+ evidence.append(f"- `spawn` pane id: `{pane_id}`")
+ ready_text = wait_for_text(pane_id, "SHELL_READY", timeout_s=8)
+ evidence.append(f"- shell ready marker observed: `{'SHELL_READY' in ready_text}`")
+
+ send = run(
+ "cli",
+ "send-text",
+ "--pane-id",
+ pane_id,
+ "--no-paste",
+ "echo hello-from-spike\n",
+ )
+ evidence.append(f"- `send-text` exit code: `{send.returncode}`")
+ text = wait_for_text(pane_id, "hello-from-spike", timeout_s=5)
+ contains = "hello-from-spike" in text
+ evidence.append(f"- `get-text` contains marker: `{contains}`")
+ evidence.append("```text\n" + text.strip() + "\n```")
+
+ if contains:
+ verdict = "GO"
+ summary = "spawn/send-text/get-text/kill-pane all worked with a standalone WezTerm window."
+ except subprocess.CalledProcessError as exc:
+ evidence.append(f"- command failed: `{exc.cmd}`")
+ evidence.append(f"- return code: `{exc.returncode}`")
+ if exc.stdout:
+ evidence.append("```text\n" + exc.stdout.strip() + "\n```")
+ if exc.stderr:
+ evidence.append("```text\n" + exc.stderr.strip() + "\n```")
+ finally:
+ if pane_id:
+ run("cli", "kill-pane", "--pane-id", pane_id, check=False)
+
+ duration_ms = round((time.time() - started_at) * 1000)
+ body = "\n".join(
+ [
+ "# Spike 1 Result",
+ "",
+ f"- Verdict: **{verdict}**",
+ f"- Summary: {summary}",
+ f"- WezTerm binary: `{WEZTERM}`",
+ f"- WezTerm version: `{locals().get('version', 'unavailable')}`",
+ f"- Duration: `{duration_ms} ms`",
+ "",
+ "## Evidence",
+ *evidence,
+ "",
+ ]
+ )
+ result_path.write_text(body, encoding="utf-8")
+ print(f"spike1 verdict={verdict}")
+ return 0 if verdict == "GO" else 1
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/spikes/spike02.py b/spikes/spike02.py
new file mode 100644
index 000000000..d2234dd2c
--- /dev/null
+++ b/spikes/spike02.py
@@ -0,0 +1,142 @@
+from __future__ import annotations
+
+import subprocess
+import time
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent.parent
+SPIKES = ROOT / "spikes"
+WEZTERM = Path(r"C:\Users\marc\Downloads\WezTerm-windows-20260331-040028-577474d8\wezterm.exe")
+WORKDIR = str(ROOT)
+
+CLIS = {
+ "claude": ["claude"],
+ "codex": ["codex"],
+ "gemini": ["gemini"],
+}
+
+
+def run_cmd(cmd: list[str], check: bool = False) -> subprocess.CompletedProcess[str]:
+ return subprocess.run(cmd, text=True, capture_output=True, check=check)
+
+
+def wez(*args: str, check: bool = True) -> subprocess.CompletedProcess[str]:
+ return run_cmd([str(WEZTERM), *args], check=check)
+
+
+def command_exists(name: str) -> bool:
+ return run_cmd(["bash", "-lc", f"command -v {name}"]).returncode == 0
+
+
+def capture_help(cli: str, paste: bool) -> tuple[bool, str]:
+ pane_id = None
+ try:
+ pane_id = wez(
+ "cli",
+ "spawn",
+ "--new-window",
+ "--cwd",
+ WORKDIR,
+ "--",
+ *CLIS[cli],
+ check=True,
+ ).stdout.strip()
+ time.sleep(5 if cli == "gemini" else 4)
+ args = ["cli", "send-text", "--pane-id", pane_id]
+ if not paste:
+ args.append("--no-paste")
+ args.append("/help\n")
+ wez(*args, check=True)
+ time.sleep(3)
+ text = wez("cli", "get-text", "--pane-id", pane_id, check=True).stdout
+ lowered = text.lower()
+ matched = any(
+ token in lowered
+ for token in [
+ "/help",
+ "keyboard shortcuts",
+ "slash commands",
+ "commands",
+ "help",
+ ]
+ )
+ return matched, text
+ finally:
+ if pane_id:
+ wez("cli", "kill-pane", "--pane-id", pane_id, check=False)
+
+
+def main() -> int:
+ result_path = SPIKES / "02-result.md"
+ rows: list[tuple[str, str, str, str]] = []
+ verdict_parts: list[str] = []
+ recommended = []
+ notes = []
+
+ for cli in ["claude", "codex", "gemini"]:
+ if not command_exists(cli):
+ rows.append((cli, "blocked", "blocked", "command not installed or not on PATH"))
+ verdict_parts.append(f"{cli}: blocked")
+ notes.append(f"- `{cli}` could not be tested because the executable is unavailable in this environment.")
+ continue
+
+ a_ok, a_text = capture_help(cli, paste=False)
+ b_ok, b_text = capture_help(cli, paste=True)
+
+ if a_ok and b_ok:
+ verdict = "both"
+ elif a_ok:
+ verdict = "A"
+ elif b_ok:
+ verdict = "B"
+ else:
+ verdict = "neither"
+
+ verdict_parts.append(f"{cli}: {verdict}")
+ recommended.append(f"- `{cli}`: prefer `{'default paste' if b_ok and not a_ok else '--no-paste' if a_ok and not b_ok else 'either mode works' if a_ok and b_ok else 'custom workaround needed'}`")
+ a_excerpt = "\n".join(a_text.strip().splitlines()[-12:])
+ b_excerpt = "\n".join(b_text.strip().splitlines()[-12:])
+ rows.append(
+ (
+ cli,
+ "pass" if verdict != "neither" else "fail",
+ verdict,
+ f"[A --no-paste]\n{a_excerpt}\n\n[B default paste]\n{b_excerpt}",
+ )
+ )
+
+ verdict = "NEEDS-WORKAROUND" if any("neither" in part or "blocked" in part for part in verdict_parts) else "GO"
+ body_lines = [
+ "# Spike 2 Result",
+ "",
+ f"- Verdict: **{verdict}**",
+ f"- Per-CLI verdicts: `{', '.join(verdict_parts)}`",
+ "- Mode A: `wezterm cli send-text --no-paste -- '/help\\n'`",
+ "- Mode B: `wezterm cli send-text -- '/help\\n'`",
+ "",
+ "## Recommendation",
+ *(recommended or ["- No CLI-specific recommendation available."]),
+ "",
+ "## Evidence",
+ ]
+ for cli, status, mode, excerpt in rows:
+ body_lines.extend(
+ [
+ f"### {cli}",
+ f"- Status: `{status}`",
+ f"- Accepted mode: `{mode}`",
+ "```text",
+ excerpt,
+ "```",
+ ]
+ )
+ if notes:
+ body_lines.extend(["", "## Environment Notes", *notes])
+ result_path.write_text("\n".join(body_lines) + "\n", encoding="utf-8")
+ print(f"spike2 verdict={verdict}")
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/spikes/spike03.py b/spikes/spike03.py
new file mode 100644
index 000000000..33161108c
--- /dev/null
+++ b/spikes/spike03.py
@@ -0,0 +1,118 @@
+from __future__ import annotations
+
+import json
+import subprocess
+import threading
+import time
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent.parent
+SPIKES = ROOT / "spikes"
+WEZTERM = Path(r"C:\Users\marc\Downloads\WezTerm-windows-20260331-040028-577474d8\wezterm.exe")
+INTERVALS = [0.1, 0.2, 0.5]
+
+
+def wez(*args: str, check: bool = True) -> subprocess.CompletedProcess[str]:
+ return subprocess.run([str(WEZTERM), *args], text=True, capture_output=True, check=check)
+
+
+def trial(interval: float) -> dict[str, float | int]:
+ pane_id = wez(
+ "cli",
+ "spawn",
+ "--new-window",
+ "--",
+ "bash",
+ "-lc",
+ "printf 'SHELL_READY\\n'; exec bash",
+ check=True,
+ ).stdout.strip()
+ marker = f"SPIKE-MARKER-{time.time_ns()}"
+ stop = threading.Event()
+ seen = {"detected_at": None, "polls": 0}
+
+ def poller() -> None:
+ while not stop.is_set():
+ text = wez("cli", "get-text", "--pane-id", pane_id, check=True).stdout
+ seen["polls"] += 1
+ if marker in text and seen["detected_at"] is None:
+ seen["detected_at"] = time.perf_counter()
+ time.sleep(interval)
+
+ thread = threading.Thread(target=poller, daemon=True)
+ cpu_before = time.process_time()
+ wall_start = time.perf_counter()
+ thread.start()
+ ready_deadline = time.perf_counter() + 8
+ while time.perf_counter() < ready_deadline:
+ text = wez("cli", "get-text", "--pane-id", pane_id, check=True).stdout
+ if "SHELL_READY" in text:
+ break
+ time.sleep(0.1)
+ time.sleep(interval * 2)
+ send_returned_at = time.perf_counter()
+ wez("cli", "send-text", "--pane-id", pane_id, "--no-paste", f"echo {marker}\n", check=True)
+
+ deadline = time.perf_counter() + 10
+ while seen["detected_at"] is None and time.perf_counter() < deadline:
+ time.sleep(0.01)
+ first_detection_ms = round(((seen["detected_at"] or time.perf_counter()) - send_returned_at) * 1000, 1)
+
+ burst_markers = [f"BURST-{idx}-{time.time_ns()}" for idx in range(10)]
+ burst_script = "; ".join(f"echo {m}; sleep 0.05" for m in burst_markers)
+ wez("cli", "send-text", "--pane-id", pane_id, "--no-paste", burst_script + "\n", check=True)
+ time.sleep(max(2, interval * 15))
+ final_text = wez("cli", "get-text", "--pane-id", pane_id, check=True).stdout
+ miss_count = sum(1 for m in burst_markers if m not in final_text)
+ stop.set()
+ thread.join(timeout=2)
+ cpu_after = time.process_time()
+ wall_elapsed = max(time.perf_counter() - wall_start, 0.001)
+ wez("cli", "kill-pane", "--pane-id", pane_id, check=False)
+ return {
+ "interval_ms": int(interval * 1000),
+ "first_detection_ms": first_detection_ms,
+ "cpu_percent": round(max(cpu_after - cpu_before, 0.0) / wall_elapsed * 100, 2),
+ "polls": int(seen["polls"]),
+ "miss_count": miss_count,
+ }
+
+
+def main() -> int:
+ result_path = SPIKES / "03-result.md"
+ trials = [trial(interval) for interval in INTERVALS]
+ recommended = min((t for t in trials if t["miss_count"] == 0), key=lambda x: x["first_detection_ms"], default=None)
+ verdict = "GO" if recommended else "NEEDS-WORKAROUND"
+ body = [
+ "# Spike 3 Result",
+ "",
+ f"- Verdict: **{verdict}**",
+ f"- Recommended interval: `{recommended['interval_ms']} ms`" if recommended else "- Recommended interval: none",
+ "",
+ "## Measurements",
+ "",
+ "| Interval | First detection (ms) | CPU % | Poll count | Miss count |",
+ "|---|---:|---:|---:|---:|",
+ ]
+ for t in trials:
+ body.append(
+ f"| {t['interval_ms']} ms | {t['first_detection_ms']} | {t['cpu_percent']} | {t['polls']} | {t['miss_count']} |"
+ )
+ body.extend(
+ [
+ "",
+ "## Raw JSON",
+ "```json",
+ json.dumps(trials, indent=2),
+ "```",
+ "",
+ ]
+ )
+ result_path.write_text("\n".join(body), encoding="utf-8")
+ print(f"spike3 verdict={verdict}")
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/spikes/spike04.py b/spikes/spike04.py
new file mode 100644
index 000000000..304d43462
--- /dev/null
+++ b/spikes/spike04.py
@@ -0,0 +1,166 @@
+from __future__ import annotations
+
+import re
+import subprocess
+import time
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent.parent
+SPIKES = ROOT / "spikes"
+WEZTERM = Path(r"C:\Users\marc\Downloads\WezTerm-windows-20260331-040028-577474d8\wezterm.exe")
+WORKDIR = str(ROOT)
+
+TARGETS = {
+ "claude": {
+ "file": ROOT / "src" / "cli_agent_orchestrator" / "providers" / "claude_code.py",
+ "command": ["claude"],
+ "patterns": [
+ "IDLE_PROMPT_PATTERN",
+ "TRUST_PROMPT_PATTERN",
+ "BYPASS_PROMPT_PATTERN",
+ ],
+ },
+ "codex": {
+ "file": ROOT / "src" / "cli_agent_orchestrator" / "providers" / "codex.py",
+ "command": ["codex"],
+ "patterns": [
+ "IDLE_PROMPT_PATTERN",
+ "TRUST_PROMPT_PATTERN",
+ "WAITING_PROMPT_PATTERN",
+ "CODEX_WELCOME_PATTERN",
+ ],
+ },
+ "gemini": {
+ "file": ROOT / "src" / "cli_agent_orchestrator" / "providers" / "gemini_cli.py",
+ "command": ["gemini"],
+ "patterns": [
+ "IDLE_PROMPT_PATTERN",
+ "WELCOME_BANNER_PATTERN",
+ "RESPONDING_WITH_PATTERN",
+ ],
+ },
+}
+
+
+def wez(*args: str, check: bool = True) -> subprocess.CompletedProcess[str]:
+ return subprocess.run([str(WEZTERM), *args], text=True, capture_output=True, check=check)
+
+
+def exists(cmd: str) -> bool:
+ return subprocess.run(["bash", "-lc", f"command -v {cmd}"], capture_output=True).returncode == 0
+
+
+def extract_constants(path: Path, names: list[str]) -> dict[str, str]:
+ text = path.read_text(encoding="utf-8")
+ found: dict[str, str] = {}
+ for name in names:
+ match = re.search(rf"^{name}\s*=\s*r([\"'])(.*?)\1", text, flags=re.MULTILINE)
+ if match:
+ found[name] = match.group(2)
+ return found
+
+
+def strip_ansi(text: str) -> str:
+ return re.sub(r"\x1b\[[0-9;?]*[A-Za-z]", "", text)
+
+
+def capture_runtime(cli: str, command: list[str]) -> tuple[str, str]:
+ pane_id = None
+ try:
+ pane_id = wez(
+ "cli",
+ "spawn",
+ "--new-window",
+ "--cwd",
+ WORKDIR,
+ "--",
+ *command,
+ check=True,
+ ).stdout.strip()
+ time.sleep(5)
+ plain = wez("cli", "get-text", "--pane-id", pane_id, check=True).stdout
+ escaped = wez("cli", "get-text", "--pane-id", pane_id, "--escapes", check=True).stdout
+ return plain, escaped
+ finally:
+ if pane_id:
+ wez("cli", "kill-pane", "--pane-id", pane_id, check=False)
+
+
+def main() -> int:
+ result_path = SPIKES / "04-result.md"
+ body = ["# Spike 4 Result", ""]
+ summary_bits: list[str] = []
+ diff_snippets: list[str] = []
+ needs_workaround = False
+
+ for cli, meta in TARGETS.items():
+ constants = extract_constants(meta["file"], meta["patterns"])
+ body.extend([f"## {cli}", f"- Source: `{meta['file'].relative_to(ROOT)}`"])
+ for name in meta["patterns"]:
+ body.append(f"- `{name}` = `{constants.get(name, 'NOT FOUND')}`")
+
+ if not exists(cli):
+ body.append(f"- Runtime probe: blocked; `{cli}` executable unavailable.")
+ summary_bits.append(f"{cli}: blocked")
+ needs_workaround = True
+ continue
+
+ plain, escaped = capture_runtime(cli, meta["command"])
+ clean_plain = strip_ansi(plain)
+ clean_escaped = strip_ansi(escaped)
+ body.append(f"- Plain capture length: `{len(clean_plain)}`")
+ body.append(f"- Escaped capture length: `{len(escaped)}`")
+
+ matched = []
+ missing = []
+ body.append("")
+ body.append("| Pattern | Plain | `--escapes` |")
+ body.append("|---|---|---|")
+ for name in meta["patterns"]:
+ pattern = constants.get(name)
+ if not pattern:
+ body.append(f"| `{name}` | missing | missing |")
+ missing.append(name)
+ continue
+ plain_match = bool(re.search(pattern, clean_plain, re.MULTILINE))
+ escaped_match = bool(re.search(pattern, clean_escaped, re.MULTILINE))
+ body.append(f"| `{name}` | `{plain_match}` | `{escaped_match}` |")
+ if plain_match or escaped_match:
+ matched.append(name)
+ else:
+ missing.append(name)
+
+ excerpt = "\n".join(clean_plain.strip().splitlines()[-18:])
+ body.extend(["", "```text", excerpt, "```", ""])
+ if missing:
+ needs_workaround = True
+ summary_bits.append(f"{cli}: missing {', '.join(missing)}")
+ diff_snippets.extend(
+ [
+ "```diff",
+ f"--- a/{meta['file'].relative_to(ROOT).as_posix()}",
+ f"+++ b/{meta['file'].relative_to(ROOT).as_posix()}",
+ "@@",
+ f"-# Existing WezTerm probe did not match: {', '.join(missing)}",
+ f"+# Phase 2: either normalize WezTerm startup text or broaden these regexes: {', '.join(missing)}",
+ "```",
+ "",
+ ]
+ )
+ else:
+ summary_bits.append(f"{cli}: all probed patterns matched")
+
+ verdict = "NEEDS-WORKAROUND" if needs_workaround else "GO"
+ body[1:1] = [f"- Verdict: **{verdict}**", f"- Summary: `{'; '.join(summary_bits)}`", ""]
+ if diff_snippets:
+ body.extend(["## Candidate Regex Patch Notes", *diff_snippets])
+ else:
+ body.extend(["## Candidate Regex Patch Notes", "- No regex changes suggested from this probe."])
+ result_path.write_text("\n".join(body) + "\n", encoding="utf-8")
+ print(f"spike4 verdict={verdict}")
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/src/cli_agent_orchestrator/api/main.py b/src/cli_agent_orchestrator/api/main.py
index 45e620bbb..a1d036c28 100644
--- a/src/cli_agent_orchestrator/api/main.py
+++ b/src/cli_agent_orchestrator/api/main.py
@@ -1,15 +1,13 @@
"""Single FastAPI entry point for all HTTP routes."""
import asyncio
-import fcntl
import json
import logging
import os
-import pty
import signal
import struct
import subprocess
-import termios
+import sys
from contextlib import asynccontextmanager
from pathlib import Path
from typing import Annotated, Dict, List, Optional, cast
@@ -654,6 +652,24 @@ async def terminal_ws(websocket: WebSocket, terminal_id: str):
await websocket.accept()
+ # PRJ-042 Layer-1 stopgap: this endpoint is a tmux-attach-over-PTY
+ # bridge that requires fcntl/pty/termios (Unix-only) and a tmux
+ # session under the hood. Under the WezTerm multiplexer there is
+ # no equivalent attach-into-PTY model. Short-circuit on Windows
+ # so the rest of cao-server can run; proper multiplexer-aware
+ # streaming is tracked as a Phase 2 follow-up in
+ # docs/PLAN-phase2.md §9.
+ if sys.platform == "win32":
+ await websocket.close(
+ code=4501,
+ reason="Terminal WS streaming is tmux-only and not implemented on Windows",
+ )
+ return
+
+ import fcntl
+ import pty
+ import termios
+
metadata = get_terminal_metadata(terminal_id)
if not metadata:
await websocket.close(code=4004, reason="Terminal not found")
diff --git a/src/cli_agent_orchestrator/clients/tmux.py b/src/cli_agent_orchestrator/clients/tmux.py
index cdaa7c852..d5ba5cdbe 100644
--- a/src/cli_agent_orchestrator/clients/tmux.py
+++ b/src/cli_agent_orchestrator/clients/tmux.py
@@ -1,516 +1,50 @@
-"""Simplified tmux client as module singleton."""
+"""Deprecated re-export shim for the legacy TmuxClient location.
-import logging
-import os
-import subprocess
-import time
-import uuid
-from typing import Dict, List, Optional
+The real implementation now lives in
+``cli_agent_orchestrator.multiplexers.tmux``; new code should use
+``get_multiplexer()`` instead of this shim.
+"""
-import libtmux
+from __future__ import annotations
-from cli_agent_orchestrator.constants import TMUX_HISTORY_LINES
+import sys
+from types import ModuleType
-logger = logging.getLogger(__name__)
+from cli_agent_orchestrator.multiplexers import tmux as _tmux_module
+from cli_agent_orchestrator.multiplexers.tmux import TmuxMultiplexer
+libtmux = _tmux_module.libtmux
+logger = _tmux_module.logger
+subprocess = _tmux_module.subprocess
+time = _tmux_module.time
+uuid = _tmux_module.uuid
-class TmuxClient:
- """Simplified tmux client for basic operations."""
+TmuxClient = TmuxMultiplexer
- def __init__(self) -> None:
- self.server = libtmux.Server()
+# Singleton kept for backwards compatibility with module-level imports.
+tmux_client = TmuxMultiplexer()
- # Directories that should never be used as working directories.
- # Prevents user-supplied paths from pointing at sensitive system locations.
- # Includes /private/* variants for macOS (where /etc -> /private/etc, etc.).
- _BLOCKED_DIRECTORIES = frozenset(
- {
- "/",
- "/bin",
- "/sbin",
- "/usr/bin",
- "/usr/sbin",
- "/etc",
- "/var",
- "/tmp",
- "/dev",
- "/proc",
- "/sys",
- "/root",
- "/boot",
- "/lib",
- "/lib64",
- "/private/etc",
- "/private/var",
- "/private/tmp",
- }
- )
- def _resolve_and_validate_working_directory(self, working_directory: Optional[str]) -> str:
- """Resolve and validate working directory.
+class _ShimModule(ModuleType):
+ """Keep legacy monkeypatch targets synchronized with the real module."""
- Canonicalizes the path (resolves symlinks, normalizes ``..``) and
- rejects paths that point to sensitive system directories.
+ _SYNCED_ATTRS = {"libtmux", "logger", "subprocess", "time", "uuid"}
- **Allowed directories:**
+ def __setattr__(self, name: str, value: object) -> None:
+ super().__setattr__(name, value)
+ if name in self._SYNCED_ATTRS:
+ setattr(_tmux_module, name, value)
- - Any real directory that is not a blocked system path
- - Paths outside ``~/`` are permitted (e.g., ``/Volumes/workplace``,
- ``/opt/projects``, NFS mounts)
- **Blocked (unsafe) directories:**
+sys.modules[__name__].__class__ = _ShimModule
- - System directories: ``/``, ``/bin``, ``/sbin``, ``/usr/bin``,
- ``/usr/sbin``, ``/etc``, ``/var``, ``/tmp``, ``/dev``, ``/proc``,
- ``/sys``, ``/root``, ``/boot``, ``/lib``, ``/lib64``
-
- Args:
- working_directory: Optional directory path, defaults to current directory
-
- Returns:
- Canonicalized absolute path
-
- Raises:
- ValueError: If directory does not exist or is a blocked system path
- """
- if working_directory is None:
- working_directory = os.getcwd()
-
- # Expand ~ to the server's home directory so clients can use
- # portable paths like ~/q/my-project without knowing the server's
- # actual home path (e.g., /home/user vs /Users/user).
- working_directory = os.path.expanduser(working_directory)
-
- # Step 1: Canonicalize the path via realpath to resolve symlinks
- # and .. sequences. os.path.realpath is recognized by CodeQL as a
- # PathNormalization (transitions taint to NormalizedUnchecked).
- real_path = os.path.realpath(os.path.abspath(working_directory))
-
- # Step 2: Path-containment guard (CodeQL SafeAccessCheck).
- # CodeQL's py/path-injection two-state taint model requires:
- # 1. PathNormalization (realpath above) → NormalizedUnchecked
- # 2. SafeAccessCheck (startswith guard) → sanitized
- # CodeQL recognizes str.startswith() as a SafeAccessCheck; when
- # the true branch flows to filesystem ops, the path is cleared.
- # The "/" prefix is always true after realpath(), but this
- # explicit guard satisfies CodeQL and rejects relative paths.
- if not real_path.startswith("/"):
- raise ValueError(f"Working directory must be an absolute path: {working_directory}")
-
- # Step 3: Block sensitive system directories.
- # Only the exact listed paths are blocked — not their subdirectories.
- # This prevents launching agents in /etc, /var, /root, etc., while
- # still allowing legitimate paths like /Volumes/workplace or even
- # /var/folders (macOS temp) that happen to be under a blocked prefix.
- if real_path in self._BLOCKED_DIRECTORIES:
- raise ValueError(
- f"Working directory not allowed: {working_directory} "
- f"(resolves to blocked system path {real_path})"
- )
-
- # Step 4: Verify the directory actually exists
- if not os.path.isdir(real_path):
- raise ValueError(f"Working directory does not exist: {working_directory}")
-
- return real_path
-
- def create_session(
- self,
- session_name: str,
- window_name: str,
- terminal_id: str,
- working_directory: Optional[str] = None,
- ) -> str:
- """Create detached tmux session with initial window and return window name."""
- try:
- working_directory = self._resolve_and_validate_working_directory(working_directory)
-
- # Filter out provider env vars that would cause "nested session"
- # errors when CAO itself runs inside a provider (e.g. Claude Code).
- # Preserve CLAUDE_CODE_USE_* and CLAUDE_CODE_SKIP_* vars needed
- # for provider authentication (Bedrock, Vertex AI, Foundry).
- blocked_prefixes = ("CLAUDE", "CODEX_")
- allowed_vars = {
- "CLAUDE_CODE_USE_BEDROCK",
- "CLAUDE_CODE_USE_VERTEX",
- "CLAUDE_CODE_USE_FOUNDRY",
- "CLAUDE_CODE_SKIP_BEDROCK_AUTH",
- "CLAUDE_CODE_SKIP_VERTEX_AUTH",
- "CLAUDE_CODE_SKIP_FOUNDRY_AUTH",
- }
- environment = {
- k: v
- for k, v in os.environ.items()
- if k in allowed_vars or not any(k.startswith(p) for p in blocked_prefixes)
- }
- environment["CAO_TERMINAL_ID"] = terminal_id
-
- session = self.server.new_session(
- session_name=session_name,
- window_name=window_name,
- start_directory=working_directory,
- detach=True,
- environment=environment,
- )
- logger.info(
- f"Created tmux session: {session_name} with window: {window_name} in directory: {working_directory}"
- )
- window_name_result = session.windows[0].name
- if window_name_result is None:
- raise ValueError(f"Window name is None for session {session_name}")
- return window_name_result
- except Exception as e:
- logger.error(f"Failed to create session {session_name}: {e}")
- raise
-
- def create_window(
- self,
- session_name: str,
- window_name: str,
- terminal_id: str,
- working_directory: Optional[str] = None,
- ) -> str:
- """Create window in session and return window name."""
- try:
- working_directory = self._resolve_and_validate_working_directory(working_directory)
-
- session = self.server.sessions.get(session_name=session_name)
- if not session:
- raise ValueError(f"Session '{session_name}' not found")
-
- window = session.new_window(
- window_name=window_name,
- start_directory=working_directory,
- environment={"CAO_TERMINAL_ID": terminal_id},
- )
-
- logger.info(
- f"Created window '{window.name}' in session '{session_name}' in directory: {working_directory}"
- )
- window_name_result = window.name
- if window_name_result is None:
- raise ValueError(f"Window name is None for session {session_name}")
- return window_name_result
- except Exception as e:
- logger.error(f"Failed to create window in session {session_name}: {e}")
- raise
-
- def send_keys(
- self, session_name: str, window_name: str, keys: str, enter_count: int = 1
- ) -> None:
- """Send keys to window using tmux paste-buffer for instant delivery.
-
- Uses load-buffer + paste-buffer instead of chunked send-keys to avoid
- slow character-by-character input and special character interpretation.
- The -p flag enables bracketed paste mode so multi-line content is treated
- as a single input rather than submitting on each newline.
-
- Args:
- session_name: Name of tmux session
- window_name: Name of window in session
- keys: Text to send
- enter_count: Number of Enter keys to send after pasting (default 1).
- Some TUIs enter multi-line mode after bracketed paste,
- requiring 2 Enters to submit.
- """
- target = f"{session_name}:{window_name}"
- buf_name = f"cao_{uuid.uuid4().hex[:8]}"
- try:
- logger.info(f"send_keys: {target} - keys: {keys}")
- subprocess.run(
- ["tmux", "load-buffer", "-b", buf_name, "-"],
- input=keys.encode(),
- check=True,
- )
- subprocess.run(
- ["tmux", "paste-buffer", "-p", "-b", buf_name, "-t", target],
- check=True,
- )
- # Brief delay to let the TUI process the bracketed paste end sequence
- # before sending Enter. Without this, some TUIs (e.g., Claude Code 2.x)
- # swallow the Enter that immediately follows paste-buffer -p.
- time.sleep(0.3)
- for i in range(enter_count):
- if i > 0:
- # Delay between Enter presses for TUIs that need time to
- # process the previous Enter (e.g., Ink adding a newline)
- # before the next Enter triggers form submission.
- time.sleep(0.5)
- subprocess.run(
- ["tmux", "send-keys", "-t", target, "Enter"],
- check=True,
- )
- logger.debug(f"Sent keys to {target}")
- except Exception as e:
- logger.error(f"Failed to send keys to {target}: {e}")
- raise
- finally:
- subprocess.run(
- ["tmux", "delete-buffer", "-b", buf_name],
- check=False,
- )
-
- def send_keys_via_paste(self, session_name: str, window_name: str, text: str) -> None:
- """Send text to window via tmux paste buffer with bracketed paste mode.
-
- Uses tmux set-buffer + paste-buffer -p to send text as a bracketed paste,
- which bypasses TUI hotkey handling. Essential for Ink-based CLIs and
- other TUI apps where individual keystrokes may trigger hotkeys.
-
- After pasting, sends C-m (Enter) to submit the input.
-
- Args:
- session_name: Name of tmux session
- window_name: Name of window in session
- text: Text to paste into the pane
- """
- try:
- logger.info(
- f"send_keys_via_paste: {session_name}:{window_name} - text length: {len(text)}"
- )
-
- session = self.server.sessions.get(session_name=session_name)
- if not session:
- raise ValueError(f"Session '{session_name}' not found")
-
- window = session.windows.get(window_name=window_name)
- if not window:
- raise ValueError(f"Window '{window_name}' not found in session '{session_name}'")
-
- pane = window.active_pane
- if pane:
- buf_name = "cao_paste"
-
- # Load text into tmux buffer
- self.server.cmd("set-buffer", "-b", buf_name, text)
-
- # Paste with bracketed paste mode (-p flag).
- # This wraps the text in \x1b[200~ ... \x1b[201~ escape sequences,
- # telling the TUI "this is pasted text" so it bypasses hotkey handling.
- pane.cmd("paste-buffer", "-p", "-b", buf_name)
-
- time.sleep(0.3)
-
- # Send Enter to submit the pasted text
- pane.send_keys("C-m", enter=False)
-
- # Clean up the paste buffer
- try:
- self.server.cmd("delete-buffer", "-b", buf_name)
- except Exception:
- pass
-
- logger.debug(f"Sent text via paste to {session_name}:{window_name}")
- except Exception as e:
- logger.error(f"Failed to send text via paste to {session_name}:{window_name}: {e}")
- raise
-
- def send_special_key(self, session_name: str, window_name: str, key: str) -> None:
- """Send a tmux special key sequence (e.g., C-d, C-c) to a window.
-
- Unlike send_keys(), this sends the key as a tmux key name (not literal text)
- and does not append a carriage return. Used for control signals like Ctrl+D (EOF).
-
- Args:
- session_name: Name of tmux session
- window_name: Name of window in session
- key: Tmux key name (e.g., "C-d", "C-c", "Escape")
- """
- try:
- logger.info(f"send_special_key: {session_name}:{window_name} - key: {key}")
-
- session = self.server.sessions.get(session_name=session_name)
- if not session:
- raise ValueError(f"Session '{session_name}' not found")
-
- window = session.windows.get(window_name=window_name)
- if not window:
- raise ValueError(f"Window '{window_name}' not found in session '{session_name}'")
-
- pane = window.active_pane
- if pane:
- pane.send_keys(key, enter=False)
- logger.debug(f"Sent special key to {session_name}:{window_name}")
- except Exception as e:
- logger.error(f"Failed to send special key to {session_name}:{window_name}: {e}")
- raise
-
- def get_history(
- self, session_name: str, window_name: str, tail_lines: Optional[int] = None
- ) -> str:
- """Get window history.
-
- Args:
- session_name: Name of tmux session
- window_name: Name of window in session
- tail_lines: Number of lines to capture from end (default: TMUX_HISTORY_LINES)
- """
- try:
- session = self.server.sessions.get(session_name=session_name)
- if not session:
- raise ValueError(f"Session '{session_name}' not found")
-
- window = session.windows.get(window_name=window_name)
- if not window:
- raise ValueError(f"Window '{window_name}' not found in session '{session_name}'")
-
- # Use cmd to run capture-pane with -e (escape sequences) and -p (print) flags
- pane = window.panes[0]
- lines = tail_lines if tail_lines is not None else TMUX_HISTORY_LINES
- result = pane.cmd("capture-pane", "-e", "-p", "-S", f"-{lines}")
- # Join all lines with newlines to get complete output
- return "\n".join(result.stdout) if result.stdout else ""
- except Exception as e:
- logger.error(f"Failed to get history from {session_name}:{window_name}: {e}")
- raise
-
- def list_sessions(self) -> List[Dict[str, str]]:
- """List all tmux sessions."""
- try:
- sessions: List[Dict[str, str]] = []
- for session in self.server.sessions:
- # Check if session has attached clients
- is_attached = len(getattr(session, "attached_sessions", [])) > 0
-
- session_name = session.name if session.name is not None else ""
- sessions.append(
- {
- "id": session_name,
- "name": session_name,
- "status": "active" if is_attached else "detached",
- }
- )
-
- return sessions
- except Exception as e:
- logger.error(f"Failed to list sessions: {e}")
- return []
-
- def get_session_windows(self, session_name: str) -> List[Dict[str, str]]:
- """Get all windows in a session."""
- try:
- session = self.server.sessions.get(session_name=session_name)
- if not session:
- return []
-
- windows: List[Dict[str, str]] = []
- for window in session.windows:
- window_name = window.name if window.name is not None else ""
- windows.append({"name": window_name, "index": str(window.index)})
-
- return windows
- except Exception as e:
- logger.error(f"Failed to get windows for session {session_name}: {e}")
- return []
-
- def kill_session(self, session_name: str) -> bool:
- """Kill tmux session."""
- try:
- session = self.server.sessions.get(session_name=session_name)
- if session:
- session.kill()
- logger.info(f"Killed tmux session: {session_name}")
- return True
- return False
- except Exception as e:
- logger.error(f"Failed to kill session {session_name}: {e}")
- return False
-
- def kill_window(self, session_name: str, window_name: str) -> bool:
- """Kill a specific tmux window within a session."""
- try:
- session = self.server.sessions.get(session_name=session_name)
- if not session:
- return False
- window = session.windows.get(window_name=window_name)
- if window:
- window.kill()
- logger.info(f"Killed tmux window: {session_name}:{window_name}")
- return True
- return False
- except Exception as e:
- logger.error(f"Failed to kill window {session_name}:{window_name}: {e}")
- return False
-
- def session_exists(self, session_name: str) -> bool:
- """Check if session exists."""
- try:
- session = self.server.sessions.get(session_name=session_name)
- return session is not None
- except Exception:
- return False
-
- def get_pane_working_directory(self, session_name: str, window_name: str) -> Optional[str]:
- """Get the current working directory of a pane."""
- try:
- session = self.server.sessions.get(session_name=session_name)
- if not session:
- return None
-
- window = session.windows.get(window_name=window_name)
- if not window:
- return None
-
- pane = window.active_pane
- if pane:
- # Get pane_current_path from tmux
- result = pane.cmd("display-message", "-p", "#{pane_current_path}")
- if result.stdout:
- return result.stdout[0].strip()
- return None
- except Exception as e:
- logger.error(f"Failed to get working directory for {session_name}:{window_name}: {e}")
- return None
-
- def pipe_pane(self, session_name: str, window_name: str, file_path: str) -> None:
- """Start piping pane output to file.
-
- Args:
- session_name: Tmux session name
- window_name: Tmux window name
- file_path: Absolute path to log file
- """
- try:
- session = self.server.sessions.get(session_name=session_name)
- if not session:
- raise ValueError(f"Session '{session_name}' not found")
-
- window = session.windows.get(window_name=window_name)
- if not window:
- raise ValueError(f"Window '{window_name}' not found in session '{session_name}'")
-
- pane = window.active_pane
- if pane:
- pane.cmd("pipe-pane", "-o", f"cat >> {file_path}")
- logger.info(f"Started pipe-pane for {session_name}:{window_name} to {file_path}")
- except Exception as e:
- logger.error(f"Failed to start pipe-pane for {session_name}:{window_name}: {e}")
- raise
-
- def stop_pipe_pane(self, session_name: str, window_name: str) -> None:
- """Stop piping pane output.
-
- Args:
- session_name: Tmux session name
- window_name: Tmux window name
- """
- try:
- session = self.server.sessions.get(session_name=session_name)
- if not session:
- raise ValueError(f"Session '{session_name}' not found")
-
- window = session.windows.get(window_name=window_name)
- if not window:
- raise ValueError(f"Window '{window_name}' not found in session '{session_name}'")
-
- pane = window.active_pane
- if pane:
- pane.cmd("pipe-pane")
- logger.info(f"Stopped pipe-pane for {session_name}:{window_name}")
- except Exception as e:
- logger.error(f"Failed to stop pipe-pane for {session_name}:{window_name}: {e}")
- raise
-
-
-# Module-level singleton
-tmux_client = TmuxClient()
+__all__ = [
+ "TmuxClient",
+ "TmuxMultiplexer",
+ "libtmux",
+ "logger",
+ "subprocess",
+ "time",
+ "tmux_client",
+ "uuid",
+]
diff --git a/src/cli_agent_orchestrator/multiplexers/__init__.py b/src/cli_agent_orchestrator/multiplexers/__init__.py
new file mode 100644
index 000000000..c1b56c1b4
--- /dev/null
+++ b/src/cli_agent_orchestrator/multiplexers/__init__.py
@@ -0,0 +1,44 @@
+"""Multiplexer abstraction layer for CAO."""
+
+from __future__ import annotations
+
+import os
+import sys
+from functools import lru_cache
+from typing import Literal
+
+from cli_agent_orchestrator.multiplexers.base import BaseMultiplexer, LaunchSpec
+from cli_agent_orchestrator.multiplexers.tmux import TmuxMultiplexer
+
+_BackendName = Literal["tmux", "wezterm"]
+
+
+def _select_backend() -> _BackendName:
+ override = os.environ.get("CAO_MULTIPLEXER", "").strip().lower()
+ if override:
+ if override not in ("tmux", "wezterm"):
+ raise ValueError(
+ f"Unknown CAO_MULTIPLEXER: {override!r}; expected 'tmux' or 'wezterm'"
+ )
+ return override
+ if os.environ.get("TMUX"):
+ return "tmux"
+ if os.environ.get("WEZTERM_PANE") or os.environ.get("TERM_PROGRAM") == "WezTerm":
+ return "wezterm"
+ return "wezterm" if sys.platform == "win32" else "tmux"
+
+
+@lru_cache(maxsize=1)
+def get_multiplexer() -> BaseMultiplexer:
+ """Return the process-singleton multiplexer for the current environment."""
+ backend = _select_backend()
+ if backend == "tmux":
+ return TmuxMultiplexer()
+
+ # Lazy import: tmux-only environments avoid loading the WezTerm module.
+ from cli_agent_orchestrator.multiplexers.wezterm import WezTermMultiplexer
+
+ return WezTermMultiplexer()
+
+
+__all__ = ["BaseMultiplexer", "LaunchSpec", "TmuxMultiplexer", "get_multiplexer"]
diff --git a/src/cli_agent_orchestrator/multiplexers/base.py b/src/cli_agent_orchestrator/multiplexers/base.py
new file mode 100644
index 000000000..63c4ee640
--- /dev/null
+++ b/src/cli_agent_orchestrator/multiplexers/base.py
@@ -0,0 +1,218 @@
+"""Backend-neutral pane/session control surface for CAO."""
+
+from __future__ import annotations
+
+import os
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Mapping, Optional, Sequence
+
+
+@dataclass(frozen=True)
+class LaunchSpec:
+ """Concrete process spawn request for a new pane/window.
+
+ argv:
+ Exact argv to execute as the pane's initial process. When None, start
+ the backend's default interactive shell.
+ env:
+ Extra environment variables to inject into the spawned process.
+ provider:
+ Optional provider key used by backend-specific launch templating and
+ executable resolution.
+ """
+
+ argv: Optional[Sequence[str]] = None
+ env: Optional[Mapping[str, str]] = None
+ provider: Optional[str] = None
+
+
+class BaseMultiplexer(ABC):
+ """Backend-neutral pane/session control surface for CAO."""
+
+ # Directories that should never be used as working directories.
+ # Prevents user-supplied paths from pointing at sensitive system locations.
+ # Includes /private/* variants for macOS (where /etc -> /private/etc, etc.).
+ _BLOCKED_DIRECTORIES = frozenset(
+ {
+ "/",
+ "/bin",
+ "/sbin",
+ "/usr/bin",
+ "/usr/sbin",
+ "/etc",
+ "/var",
+ "/tmp",
+ "/dev",
+ "/proc",
+ "/sys",
+ "/root",
+ "/boot",
+ "/lib",
+ "/lib64",
+ "/private/etc",
+ "/private/var",
+ "/private/tmp",
+ }
+ )
+
+ def _resolve_and_validate_working_directory(
+ self, working_directory: Optional[str]
+ ) -> str:
+ """Canonicalize, validate, and return a safe working directory.
+
+ Canonicalizes the path (resolves symlinks, normalizes ``..``) and
+ rejects paths that point to sensitive system directories.
+
+ **Allowed directories:**
+
+ - Any real directory that is not a blocked system path
+ - Paths outside ``~/`` are permitted (e.g., ``/Volumes/workplace``,
+ ``/opt/projects``, NFS mounts)
+
+ **Blocked (unsafe) directories:**
+
+ - System directories: ``/``, ``/bin``, ``/sbin``, ``/usr/bin``,
+ ``/usr/sbin``, ``/etc``, ``/var``, ``/tmp``, ``/dev``, ``/proc``,
+ ``/sys``, ``/root``, ``/boot``, ``/lib``, ``/lib64``
+
+ Args:
+ working_directory: Optional directory path, defaults to current directory
+
+ Returns:
+ Canonicalized absolute path
+
+ Raises:
+ ValueError: If directory does not exist or is a blocked system path
+ """
+ if working_directory is None:
+ working_directory = os.getcwd()
+
+ # Expand ~ to the server's home directory so clients can use
+ # portable paths like ~/q/my-project without knowing the server's
+ # actual home path (e.g., /home/user vs /Users/user).
+ working_directory = os.path.expanduser(working_directory)
+
+ # Step 1: Canonicalize the path via realpath to resolve symlinks
+ # and .. sequences. os.path.realpath is recognized by CodeQL as a
+ # PathNormalization (transitions taint to NormalizedUnchecked).
+ real_path = os.path.realpath(os.path.abspath(working_directory))
+
+ # Step 2: Path-containment guard (CodeQL SafeAccessCheck).
+ # CodeQL's py/path-injection two-state taint model requires:
+ # 1. PathNormalization (realpath above) → NormalizedUnchecked
+ # 2. SafeAccessCheck (startswith guard) → sanitized
+ # CodeQL recognizes str.startswith() as a SafeAccessCheck; when
+ # the true branch flows to filesystem ops, the path is cleared.
+ # Both prefixes are always true after realpath() on their respective
+ # platforms; the explicit guard satisfies CodeQL and rejects relative
+ # paths. Unix uses "/"; Windows drive paths look like "C:\..." after
+ # abspath/realpath, so the second clause covers Win32.
+ if not (
+ real_path.startswith("/")
+ or (len(real_path) >= 3 and real_path[1:3] == ":\\")
+ ):
+ raise ValueError(f"Working directory must be an absolute path: {working_directory}")
+
+ # Step 3: Block sensitive system directories.
+ # Only the exact listed paths are blocked — not their subdirectories.
+ # This prevents launching agents in /etc, /var, /root, etc., while
+ # still allowing legitimate paths like /Volumes/workplace or even
+ # /var/folders (macOS temp) that happen to be under a blocked prefix.
+ if real_path in self._BLOCKED_DIRECTORIES:
+ raise ValueError(
+ f"Working directory not allowed: {working_directory} "
+ f"(resolves to blocked system path {real_path})"
+ )
+
+ # Step 4: Verify the directory actually exists
+ if not os.path.isdir(real_path):
+ raise ValueError(f"Working directory does not exist: {working_directory}")
+
+ return real_path
+
+ @abstractmethod
+ def create_session(
+ self,
+ session_name: str,
+ window_name: str,
+ terminal_id: str,
+ working_directory: Optional[str] = None,
+ launch_spec: Optional[LaunchSpec] = None,
+ ) -> str:
+ """Create a detached CAO session/workspace and return the actual window name."""
+
+ @abstractmethod
+ def create_window(
+ self,
+ session_name: str,
+ window_name: str,
+ terminal_id: str,
+ working_directory: Optional[str] = None,
+ launch_spec: Optional[LaunchSpec] = None,
+ ) -> str:
+ """Create another CAO window/pane inside an existing session."""
+
+ def send_keys(
+ self, session_name: str, window_name: str, keys: str, enter_count: int = 1
+ ) -> None:
+ """Paste text, wait for the TUI to settle, then submit Enter separately."""
+ self._paste_text(session_name, window_name, keys)
+ self._submit_input(session_name, window_name, enter_count=enter_count)
+
+ @abstractmethod
+ def _paste_text(self, session_name: str, window_name: str, text: str) -> None:
+ """Inject literal text without submitting it."""
+
+ @abstractmethod
+ def _submit_input(
+ self, session_name: str, window_name: str, enter_count: int = 1
+ ) -> None:
+ """Submit already-pasted input with one or more Enter presses."""
+
+ @abstractmethod
+ def send_special_key(
+ self,
+ session_name: str,
+ window_name: str,
+ key: str,
+ *,
+ literal: bool = False,
+ ) -> None:
+ """Send a control key or a literal VT sequence without paste semantics."""
+
+ @abstractmethod
+ def get_history(
+ self, session_name: str, window_name: str, tail_lines: Optional[int] = None
+ ) -> str:
+ """Return normalized pane text for provider regex/status parsing."""
+
+ @abstractmethod
+ def list_sessions(self) -> list[dict[str, str]]:
+ """List CAO-visible sessions as {id, name, status}."""
+
+ @abstractmethod
+ def kill_session(self, session_name: str) -> bool:
+ """Terminate a session and all owned panes/windows."""
+
+ @abstractmethod
+ def kill_window(self, session_name: str, window_name: str) -> bool:
+ """Terminate one CAO window/pane."""
+
+ @abstractmethod
+ def session_exists(self, session_name: str) -> bool:
+ """Return True when the named session/workspace exists."""
+
+ @abstractmethod
+ def get_pane_working_directory(
+ self, session_name: str, window_name: str
+ ) -> Optional[str]:
+ """Return the active pane's working directory when the backend exposes it."""
+
+ @abstractmethod
+ def pipe_pane(self, session_name: str, window_name: str, file_path: str) -> None:
+ """Start backend-specific output capture into a CAO log file."""
+
+ @abstractmethod
+ def stop_pipe_pane(self, session_name: str, window_name: str) -> None:
+ """Stop backend-specific output capture for a CAO log file."""
diff --git a/src/cli_agent_orchestrator/multiplexers/launch.py b/src/cli_agent_orchestrator/multiplexers/launch.py
new file mode 100644
index 000000000..aa0187bf4
--- /dev/null
+++ b/src/cli_agent_orchestrator/multiplexers/launch.py
@@ -0,0 +1,74 @@
+"""Launch template helpers for backend-owned direct process spawns."""
+
+from __future__ import annotations
+
+import os
+import shutil
+import sys
+from typing import Literal, Sequence
+
+from cli_agent_orchestrator.multiplexers.base import LaunchSpec
+
+# Avoid importing ProviderType from providers.manager to prevent circular imports
+# (multiplexers.launch → providers.manager → multiplexers.launch). Use a module-level
+# constant instead so this string only appears once and a typo is caught at definition.
+_PROVIDER_KEY_CODEX = "codex"
+
+
+def default_platform() -> Literal["windows", "unix"]:
+ return "windows" if sys.platform == "win32" else "unix"
+
+
+def _windows_codex_candidates() -> tuple[str, ...]:
+ username = os.environ.get("USERNAME") or os.environ.get("USER")
+ candidates = [
+ os.path.expandvars(
+ r"%LOCALAPPDATA%\..\scoop\apps\nodejs-lts\current\bin\codex.cmd"
+ )
+ ]
+ if username:
+ candidates.append(
+ rf"C:\Users\{username}\scoop\apps\nodejs-lts\current\bin\codex.cmd"
+ )
+ return tuple(candidates)
+
+
+def build_launch_spec(
+ provider: str,
+ command_argv: Sequence[str],
+ *,
+ platform: Literal["windows", "unix"] | None = None,
+) -> LaunchSpec:
+ """Resolve a LaunchSpec for a provider on the current (or stated) platform.
+
+ `command_argv[0]` is treated as the bare command name to resolve.
+ The remaining elements are passed through verbatim.
+
+ Resolver order (Windows):
+ 1. explicit ``CAO__BIN`` env override
+ 2. ``where.exe .cmd`` lookup (Scoop/Node shim discovery)
+ 3. fall back to bare ``command_argv[0]``
+
+ On non-Windows: trust shell PATH (use ``command_argv[0]`` verbatim).
+ """
+ resolved_platform = platform or default_platform()
+ argv = tuple(command_argv)
+ if not argv:
+ raise ValueError("command_argv must not be empty")
+
+ if provider != _PROVIDER_KEY_CODEX or resolved_platform != "windows":
+ return LaunchSpec(argv=argv, provider=provider)
+
+ override = os.environ.get("CAO_CODEX_BIN")
+ if override:
+ return LaunchSpec(argv=(override, *argv[1:]), provider=provider)
+
+ resolved = shutil.which("codex.cmd")
+ if resolved:
+ return LaunchSpec(argv=(resolved, *argv[1:]), provider=provider)
+
+ for candidate in _windows_codex_candidates():
+ if os.path.exists(candidate):
+ return LaunchSpec(argv=(candidate, *argv[1:]), provider=provider)
+
+ return LaunchSpec(argv=argv, provider=provider)
diff --git a/src/cli_agent_orchestrator/multiplexers/tmux.py b/src/cli_agent_orchestrator/multiplexers/tmux.py
new file mode 100644
index 000000000..0d6bbf68e
--- /dev/null
+++ b/src/cli_agent_orchestrator/multiplexers/tmux.py
@@ -0,0 +1,400 @@
+"""Tmux-backed multiplexer implementation."""
+
+from __future__ import annotations
+
+import logging
+import os
+import subprocess
+import time
+import uuid
+from typing import Dict, List, Optional
+
+import libtmux
+
+from cli_agent_orchestrator.constants import TMUX_HISTORY_LINES
+from cli_agent_orchestrator.multiplexers.base import BaseMultiplexer, LaunchSpec
+
+logger = logging.getLogger(__name__)
+
+
+class TmuxMultiplexer(BaseMultiplexer):
+ """Tmux-backed multiplexer for basic operations."""
+
+ def __init__(self) -> None:
+ self.server = libtmux.Server()
+ self._pending_buffers: dict[str, str] = {}
+
+ def create_session(
+ self,
+ session_name: str,
+ window_name: str,
+ terminal_id: str,
+ working_directory: Optional[str] = None,
+ launch_spec: Optional[LaunchSpec] = None,
+ ) -> str:
+ """Create detached tmux session with initial window and return window name."""
+ try:
+ del launch_spec
+ working_directory = self._resolve_and_validate_working_directory(working_directory)
+
+ # Filter out provider env vars that would cause "nested session"
+ # errors when CAO itself runs inside a provider (e.g. Claude Code).
+ # Preserve CLAUDE_CODE_USE_* and CLAUDE_CODE_SKIP_* vars needed
+ # for provider authentication (Bedrock, Vertex AI, Foundry).
+ blocked_prefixes = ("CLAUDE", "CODEX_")
+ allowed_vars = {
+ "CLAUDE_CODE_USE_BEDROCK",
+ "CLAUDE_CODE_USE_VERTEX",
+ "CLAUDE_CODE_USE_FOUNDRY",
+ "CLAUDE_CODE_SKIP_BEDROCK_AUTH",
+ "CLAUDE_CODE_SKIP_VERTEX_AUTH",
+ "CLAUDE_CODE_SKIP_FOUNDRY_AUTH",
+ }
+ environment = {
+ k: v
+ for k, v in os.environ.items()
+ if k in allowed_vars or not any(k.startswith(p) for p in blocked_prefixes)
+ }
+ environment["CAO_TERMINAL_ID"] = terminal_id
+
+ session = self.server.new_session(
+ session_name=session_name,
+ window_name=window_name,
+ start_directory=working_directory,
+ detach=True,
+ environment=environment,
+ )
+ logger.info(
+ f"Created tmux session: {session_name} with window: {window_name} in directory: {working_directory}"
+ )
+ window_name_result = session.windows[0].name
+ if window_name_result is None:
+ raise ValueError(f"Window name is None for session {session_name}")
+ return window_name_result
+ except Exception as e:
+ logger.error(f"Failed to create session {session_name}: {e}")
+ raise
+
+ def create_window(
+ self,
+ session_name: str,
+ window_name: str,
+ terminal_id: str,
+ working_directory: Optional[str] = None,
+ launch_spec: Optional[LaunchSpec] = None,
+ ) -> str:
+ """Create window in session and return window name."""
+ try:
+ del launch_spec
+ working_directory = self._resolve_and_validate_working_directory(working_directory)
+
+ session = self.server.sessions.get(session_name=session_name)
+ if not session:
+ raise ValueError(f"Session '{session_name}' not found")
+
+ window = session.new_window(
+ window_name=window_name,
+ start_directory=working_directory,
+ environment={"CAO_TERMINAL_ID": terminal_id},
+ )
+
+ logger.info(
+ f"Created window '{window.name}' in session '{session_name}' in directory: {working_directory}"
+ )
+ window_name_result = window.name
+ if window_name_result is None:
+ raise ValueError(f"Window name is None for session {session_name}")
+ return window_name_result
+ except Exception as e:
+ logger.error(f"Failed to create window in session {session_name}: {e}")
+ raise
+
+ def _paste_text(self, session_name: str, window_name: str, text: str) -> None:
+ """Inject text using tmux paste-buffer with bracketed paste mode."""
+ target = f"{session_name}:{window_name}"
+ buf_name = f"cao_{uuid.uuid4().hex[:8]}"
+ try:
+ logger.info(f"_paste_text: {target} - text length: {len(text)}")
+ subprocess.run(
+ ["tmux", "load-buffer", "-b", buf_name, "-"],
+ input=text.encode(),
+ check=True,
+ )
+ subprocess.run(
+ ["tmux", "paste-buffer", "-p", "-b", buf_name, "-t", target],
+ check=True,
+ )
+ self._pending_buffers[target] = buf_name
+ # Settle delay — without it, some TUIs (e.g., Claude Code 2.x)
+ # swallow the Enter that immediately follows paste-buffer -p.
+ time.sleep(0.3)
+ except Exception as e:
+ logger.error(f"Failed to paste text to {target}: {e}")
+ raise
+ finally:
+ if target not in self._pending_buffers:
+ subprocess.run(
+ ["tmux", "delete-buffer", "-b", buf_name],
+ check=False,
+ )
+
+ def _submit_input(
+ self, session_name: str, window_name: str, enter_count: int = 1
+ ) -> None:
+ """Submit already-pasted input with one or more Enter presses."""
+ target = f"{session_name}:{window_name}"
+ buf_name = self._pending_buffers.get(target)
+ try:
+ logger.info(f"_submit_input: {target} - enter_count: {enter_count}")
+ for i in range(enter_count):
+ if i > 0:
+ # Delay between Enter presses for TUIs that need time to
+ # process the previous Enter (e.g., Ink adding a newline)
+ # before the next Enter triggers form submission.
+ time.sleep(0.5)
+ subprocess.run(
+ ["tmux", "send-keys", "-t", target, "Enter"],
+ check=True,
+ )
+ logger.debug(f"Submitted input to {target}")
+ except Exception as e:
+ logger.error(f"Failed to submit input to {target}: {e}")
+ raise
+ finally:
+ if buf_name is not None:
+ self._pending_buffers.pop(target, None)
+ subprocess.run(
+ ["tmux", "delete-buffer", "-b", buf_name],
+ check=False,
+ )
+
+ def send_keys_via_paste(self, session_name: str, window_name: str, text: str) -> None:
+ """Send text to window via tmux paste buffer with bracketed paste mode."""
+ try:
+ logger.info(
+ f"send_keys_via_paste: {session_name}:{window_name} - text length: {len(text)}"
+ )
+
+ session = self.server.sessions.get(session_name=session_name)
+ if not session:
+ raise ValueError(f"Session '{session_name}' not found")
+
+ window = session.windows.get(window_name=window_name)
+ if not window:
+ raise ValueError(f"Window '{window_name}' not found in session '{session_name}'")
+
+ pane = window.active_pane
+ if pane:
+ buf_name = "cao_paste"
+
+ # Load text into tmux buffer
+ self.server.cmd("set-buffer", "-b", buf_name, text)
+
+ # Paste with bracketed paste mode (-p flag).
+ # This wraps the text in \x1b[200~ ... \x1b[201~ escape sequences,
+ # telling the TUI "this is pasted text" so it bypasses hotkey handling.
+ pane.cmd("paste-buffer", "-p", "-b", buf_name)
+
+ time.sleep(0.3)
+
+ # Send Enter to submit the pasted text
+ pane.send_keys("C-m", enter=False)
+
+ # Clean up the paste buffer
+ try:
+ self.server.cmd("delete-buffer", "-b", buf_name)
+ except Exception:
+ pass
+
+ logger.debug(f"Sent text via paste to {session_name}:{window_name}")
+ except Exception as e:
+ logger.error(f"Failed to send text via paste to {session_name}:{window_name}: {e}")
+ raise
+
+ def send_special_key(
+ self,
+ session_name: str,
+ window_name: str,
+ key: str,
+ *,
+ literal: bool = False,
+ ) -> None:
+ """Send a tmux special key sequence or a literal VT sequence to a window."""
+ try:
+ logger.info(
+ f"send_special_key: {session_name}:{window_name} - key: {key} literal={literal}"
+ )
+
+ session = self.server.sessions.get(session_name=session_name)
+ if not session:
+ raise ValueError(f"Session '{session_name}' not found")
+
+ window = session.windows.get(window_name=window_name)
+ if not window:
+ raise ValueError(f"Window '{window_name}' not found in session '{session_name}'")
+
+ pane = window.active_pane
+ if pane:
+ if literal:
+ pane.cmd("send-keys", "-l", key)
+ else:
+ pane.send_keys(key, enter=False)
+ logger.debug(f"Sent special key to {session_name}:{window_name}")
+ except Exception as e:
+ logger.error(f"Failed to send special key to {session_name}:{window_name}: {e}")
+ raise
+
+ def get_history(
+ self, session_name: str, window_name: str, tail_lines: Optional[int] = None
+ ) -> str:
+ """Get window history."""
+ try:
+ session = self.server.sessions.get(session_name=session_name)
+ if not session:
+ raise ValueError(f"Session '{session_name}' not found")
+
+ window = session.windows.get(window_name=window_name)
+ if not window:
+ raise ValueError(f"Window '{window_name}' not found in session '{session_name}'")
+
+ pane = window.panes[0]
+ lines = tail_lines if tail_lines is not None else TMUX_HISTORY_LINES
+ result = pane.cmd("capture-pane", "-e", "-p", "-S", f"-{lines}")
+ return "\n".join(result.stdout) if result.stdout else ""
+ except Exception as e:
+ logger.error(f"Failed to get history from {session_name}:{window_name}: {e}")
+ raise
+
+ def list_sessions(self) -> List[Dict[str, str]]:
+ """List all tmux sessions."""
+ try:
+ sessions: List[Dict[str, str]] = []
+ for session in self.server.sessions:
+ is_attached = len(getattr(session, "attached_sessions", [])) > 0
+
+ session_name = session.name if session.name is not None else ""
+ sessions.append(
+ {
+ "id": session_name,
+ "name": session_name,
+ "status": "active" if is_attached else "detached",
+ }
+ )
+
+ return sessions
+ except Exception as e:
+ logger.error(f"Failed to list sessions: {e}")
+ return []
+
+ def get_session_windows(self, session_name: str) -> List[Dict[str, str]]:
+ """Get all windows in a session."""
+ try:
+ session = self.server.sessions.get(session_name=session_name)
+ if not session:
+ return []
+
+ windows: List[Dict[str, str]] = []
+ for window in session.windows:
+ window_name = window.name if window.name is not None else ""
+ windows.append({"name": window_name, "index": str(window.index)})
+
+ return windows
+ except Exception as e:
+ logger.error(f"Failed to get windows for session {session_name}: {e}")
+ return []
+
+ def kill_session(self, session_name: str) -> bool:
+ """Kill tmux session."""
+ try:
+ session = self.server.sessions.get(session_name=session_name)
+ if session:
+ session.kill()
+ logger.info(f"Killed tmux session: {session_name}")
+ return True
+ return False
+ except Exception as e:
+ logger.error(f"Failed to kill session {session_name}: {e}")
+ return False
+
+ def kill_window(self, session_name: str, window_name: str) -> bool:
+ """Kill a specific tmux window within a session."""
+ try:
+ session = self.server.sessions.get(session_name=session_name)
+ if not session:
+ return False
+ window = session.windows.get(window_name=window_name)
+ if window:
+ window.kill()
+ logger.info(f"Killed tmux window: {session_name}:{window_name}")
+ return True
+ return False
+ except Exception as e:
+ logger.error(f"Failed to kill window {session_name}:{window_name}: {e}")
+ return False
+
+ def session_exists(self, session_name: str) -> bool:
+ """Check if session exists."""
+ try:
+ session = self.server.sessions.get(session_name=session_name)
+ return session is not None
+ except Exception:
+ return False
+
+ def get_pane_working_directory(self, session_name: str, window_name: str) -> Optional[str]:
+ """Get the current working directory of a pane."""
+ try:
+ session = self.server.sessions.get(session_name=session_name)
+ if not session:
+ return None
+
+ window = session.windows.get(window_name=window_name)
+ if not window:
+ return None
+
+ pane = window.active_pane
+ if pane:
+ result = pane.cmd("display-message", "-p", "#{pane_current_path}")
+ if result.stdout:
+ return result.stdout[0].strip()
+ return None
+ except Exception as e:
+ logger.error(f"Failed to get working directory for {session_name}:{window_name}: {e}")
+ return None
+
+ def pipe_pane(self, session_name: str, window_name: str, file_path: str) -> None:
+ """Start piping pane output to file."""
+ try:
+ session = self.server.sessions.get(session_name=session_name)
+ if not session:
+ raise ValueError(f"Session '{session_name}' not found")
+
+ window = session.windows.get(window_name=window_name)
+ if not window:
+ raise ValueError(f"Window '{window_name}' not found in session '{session_name}'")
+
+ pane = window.active_pane
+ if pane:
+ pane.cmd("pipe-pane", "-o", f"cat >> {file_path}")
+ logger.info(f"Started pipe-pane for {session_name}:{window_name} to {file_path}")
+ except Exception as e:
+ logger.error(f"Failed to start pipe-pane for {session_name}:{window_name}: {e}")
+ raise
+
+ def stop_pipe_pane(self, session_name: str, window_name: str) -> None:
+ """Stop piping pane output."""
+ try:
+ session = self.server.sessions.get(session_name=session_name)
+ if not session:
+ raise ValueError(f"Session '{session_name}' not found")
+
+ window = session.windows.get(window_name=window_name)
+ if not window:
+ raise ValueError(f"Window '{window_name}' not found in session '{session_name}'")
+
+ pane = window.active_pane
+ if pane:
+ pane.cmd("pipe-pane")
+ logger.info(f"Stopped pipe-pane for {session_name}:{window_name}")
+ except Exception as e:
+ logger.error(f"Failed to stop pipe-pane for {session_name}:{window_name}: {e}")
+ raise
diff --git a/src/cli_agent_orchestrator/multiplexers/wezterm.py b/src/cli_agent_orchestrator/multiplexers/wezterm.py
new file mode 100644
index 000000000..461ba3180
--- /dev/null
+++ b/src/cli_agent_orchestrator/multiplexers/wezterm.py
@@ -0,0 +1,567 @@
+"""WezTerm CLI-backed multiplexer implementation.
+
+WezTerm's ``cli spawn`` command does not support environment injection flags.
+The earlier CAO spike assumed ``--set-environment KEY=VALUE`` existed, but
+that flag is ignored by ``wezterm cli spawn`` and silently drops
+``CAO_TERMINAL_ID`` plus any provider-supplied launch env. Upstream confirmed
+this is not in scope in wezterm/wezterm#6565, and there is no config-side Lua
+hook for ``cli spawn`` that could repair it.
+
+CAO therefore wraps the spawned argv and sets env vars inside that wrapper
+before launching the real target:
+
+- Unix uses ``env KEY=VALUE -- ``, which exec-replaces cleanly so the
+ target remains pane pid 1.
+- Windows uses ``powershell.exe -Command ...`` because Windows has no
+ ``execve`` equivalent for a direct replace. That leaves PowerShell as the
+ WezTerm child and the target as a grandchild, which is safe for CAO because
+ this multiplexer does not depend on ``wezterm cli list`` or ``process_name``
+ for status. If a future code path does inspect the foreground process name,
+ WezTerm's Windows implementation walks descendants and reports the youngest
+ attached process (``find_youngest()`` in ``mux/src/localpane.rs``), so the
+ actual target should still win once started.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import re
+import shutil
+import subprocess
+import sys
+import threading
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Callable, Mapping, Optional, Sequence
+
+from cli_agent_orchestrator.multiplexers.base import BaseMultiplexer, LaunchSpec
+
+WezTermRunner = Callable[
+ [Sequence[str], Optional[Mapping[str, str]]],
+ "subprocess.CompletedProcess[str]",
+]
+
+logger = logging.getLogger(__name__)
+
+_VT_KEY_MAP: dict[str, str] = {
+ "Enter": "\r",
+ "Tab": "\t",
+ "Escape": "\x1b",
+ "Backspace": "\x7f",
+ "Up": "\x1b[A",
+ "Down": "\x1b[B",
+ "Left": "\x1b[D",
+ "Right": "\x1b[C",
+}
+
+
+@dataclass
+class _PollerState:
+ thread: threading.Thread
+ stop_event: threading.Event
+
+
+def _default_runner(
+ argv: Sequence[str], env: Optional[Mapping[str, str]] = None
+) -> subprocess.CompletedProcess[str]:
+ # WezTerm CLI emits UTF-8 (Rust). Without explicit encoding, Python's
+ # subprocess reader thread uses the locale codepage (cp1252 on Windows),
+ # which crashes with UnicodeDecodeError on any non-Latin-1 byte such as
+ # box-drawing characters in pane snapshots. Force UTF-8 with replacement
+ # so the multiplexer never dies on a stray byte.
+ return subprocess.run(
+ list(argv),
+ env=env,
+ capture_output=True,
+ text=True,
+ check=False,
+ encoding="utf-8",
+ errors="replace",
+ )
+
+
+def _default_shell() -> str:
+ """Return the shell to spawn inside a wezterm pane when no LaunchSpec
+ is given. On Windows we deliberately do *not* return ``COMSPEC``
+ (= ``cmd.exe``); we prefer pwsh.exe (or Windows PowerShell) so the
+ pane the user/agent sees is a modern UTF-8 shell. The PowerShell
+ wrapper that injects ``CAO_TERMINAL_ID`` exists only because WezTerm's
+ ``cli spawn --set-environment`` is broken upstream — it should not
+ be exposing a cmd.exe child to the agent on top of that.
+ """
+ if sys.platform == "win32":
+ return _resolve_powershell_bin()
+ return os.environ.get("SHELL", "/bin/sh")
+
+
+def _ps_single_quote(value: str) -> str:
+ """Quote a string for a PowerShell single-quoted literal: ' -> ''."""
+ return "'" + value.replace("'", "''") + "'"
+
+
+def _resolve_powershell_bin() -> str:
+ """Pick the PowerShell binary used to wrap WezTerm CLI spawns on Windows.
+
+ Prefers PowerShell 7+ (``pwsh.exe``) over Windows PowerShell 5.1
+ (``powershell.exe``). PS 7+ defaults to UTF-8, while PS 5.1 follows
+ the locale codepage (cp1252 on Spanish Windows etc.) which mangles
+ non-ASCII characters in env values, paste payloads, and provider
+ output. Honors ``CAO_POWERSHELL_BIN`` for explicit override.
+ """
+ override = os.environ.get("CAO_POWERSHELL_BIN")
+ if override:
+ return override
+ pwsh = shutil.which("pwsh") or shutil.which("pwsh.exe")
+ if pwsh:
+ return pwsh
+ return "powershell.exe"
+
+
+_GUI_BASENAME = re.compile(r"(?i)^wezterm-gui(\.exe)?$")
+
+
+def _normalize_wezterm_bin(bin_path: str) -> str:
+ """Rewrite a ``wezterm-gui[.exe]`` path to its CLI sibling ``wezterm[.exe]``.
+
+ The WezTerm distribution ships two binaries side-by-side: ``wezterm.exe``
+ (multiplexer with the ``cli`` subcommand) and ``wezterm-gui.exe`` (the GUI
+ front-end which does *not* expose ``cli``). Users frequently set
+ ``WEZTERM_EXECUTABLE`` to the GUI binary because the GUI is what they
+ launch interactively, but ``wezterm cli spawn ...`` only works against
+ ``wezterm.exe``. Rewriting here turns a confusing ``WinError 2`` /
+ ``unrecognized subcommand 'cli'`` into a no-op at construction time.
+ """
+ p = Path(bin_path)
+ new_name = _GUI_BASENAME.sub(r"wezterm\1", p.name)
+ if new_name == p.name:
+ return bin_path
+ return str(p.with_name(new_name))
+
+
+def _wrap_with_env(
+ env_vars: Mapping[str, str], argv: Optional[Sequence[str]]
+) -> list[str]:
+ """Build a wezterm-cli-spawn target argv that injects ``env_vars`` and
+ then either runs ``argv`` (when given) or stays as an interactive shell
+ (when ``argv`` is None).
+
+ The wrapper exists only because ``wezterm cli spawn --set-environment``
+ is a no-op (upstream wezterm/wezterm#6565). Without this trampoline,
+ ``CAO_TERMINAL_ID`` and any provider-supplied launch env would be
+ silently dropped. We deliberately keep it to a *single* shell layer:
+
+ * Windows + interactive: ``pwsh -NoExit -Command ""`` — one
+ pwsh process becomes the pane shell.
+ * Windows + target argv: ``pwsh -Command "; & '' "``
+ — one pwsh execs the target; pane closes when the target exits.
+ * Unix: ``env K=V -- `` — ``env`` exec-replaces, so the
+ actual target/shell is pane PID 1.
+ """
+ if sys.platform == "win32":
+ env_steps = [
+ f"$env:{key}={_ps_single_quote(value)}" for key, value in env_vars.items()
+ ]
+ ps_bin = _resolve_powershell_bin()
+ if not argv:
+ # Interactive shell — same pwsh sets env and stays alive.
+ env_command = "; ".join(env_steps) if env_steps else ""
+ return [
+ ps_bin,
+ "-NoLogo",
+ "-NoProfile",
+ "-NoExit",
+ "-Command",
+ env_command,
+ ]
+ # Target argv — pwsh sets env then execs target; exits on target exit.
+ exe = argv[0]
+ args = list(argv[1:])
+ ps_args = ",".join(_ps_single_quote(arg) for arg in args)
+ command_parts = [f"{step};" for step in env_steps]
+ command_parts.append(f"$args=@({ps_args}); " if args else "$args=@(); ")
+ command_parts.append(f"& {_ps_single_quote(exe)} @args")
+ return [
+ ps_bin,
+ "-NoLogo",
+ "-NoProfile",
+ "-Command",
+ "".join(command_parts),
+ ]
+
+ target = list(argv) if argv else [_default_shell()]
+ wrapped = ["env"]
+ wrapped.extend(f"{key}={value}" for key, value in env_vars.items())
+ wrapped.append("--")
+ wrapped.extend(target)
+ return wrapped
+
+
+class WezTermMultiplexer(BaseMultiplexer):
+ """WezTerm CLI-backed multiplexer.
+
+ Session and window state is tracked in an in-memory registry keyed by
+ session_name and window_name. Cross-process visibility (sessions created
+ by another process) is not supported — callers must use the same instance
+ that created the session. ``get_multiplexer()`` enforces this singleton.
+
+ Supported send_special_key names (literal=False):
+ Enter, Tab, Escape, Backspace, Up, Down, Left, Right
+ Any other value raises KeyError — use literal=True for arbitrary VT bytes.
+
+ Thread safety: all mutations and reads of ``_sessions`` and ``_pollers``
+ are guarded by ``_lock``. Subprocess calls (``_run``, ``_get_pane_text``,
+ ``_spawn``) are intentionally performed *outside* the lock to avoid
+ holding it across blocking I/O.
+ """
+
+ def __init__(
+ self,
+ runner: Optional[WezTermRunner] = None,
+ wezterm_bin: Optional[str] = None,
+ poll_interval: float = 0.5,
+ clock_sleep: Optional[Callable[[float], None]] = None,
+ ) -> None:
+ self._run: WezTermRunner = runner or _default_runner
+ resolved_bin = wezterm_bin or os.environ.get("WEZTERM_EXECUTABLE") or "wezterm"
+ normalized_bin = _normalize_wezterm_bin(resolved_bin)
+ if normalized_bin != resolved_bin:
+ logger.warning(
+ "WEZTERM_EXECUTABLE points to the GUI binary (%s); "
+ "rewriting to its CLI sibling %s. wezterm-gui has no `cli` "
+ "subcommand. Set WEZTERM_EXECUTABLE to the wezterm[.exe] "
+ "path to silence this warning.",
+ resolved_bin,
+ normalized_bin,
+ )
+ self._bin: str = normalized_bin
+ self._sessions: dict[str, dict[str, str]] = {}
+ self._pollers: dict[tuple[str, str], _PollerState] = {}
+ self._poll_interval = poll_interval
+ self._clock_sleep = clock_sleep or time.sleep
+ self._lock = threading.Lock()
+
+ def _pane_id(self, session_name: str, window_name: str) -> str:
+ with self._lock:
+ session = self._sessions.get(session_name)
+ if session is None or window_name not in session:
+ raise KeyError(
+ f"WezTerm pane not found: session={session_name!r} window={window_name!r}"
+ )
+ return session[window_name]
+
+ def _spawn(
+ self,
+ working_directory: str,
+ terminal_id: str,
+ launch_spec: Optional[LaunchSpec],
+ ) -> str:
+ env_vars: dict[str, str] = {"CAO_TERMINAL_ID": terminal_id}
+ if launch_spec is not None and launch_spec.env:
+ env_vars.update(launch_spec.env)
+
+ target_argv: Optional[list[str]] = None
+ if launch_spec is not None and launch_spec.argv:
+ target_argv = list(launch_spec.argv)
+
+ wrapped = _wrap_with_env(env_vars, target_argv)
+ cmd: list[str] = [
+ self._bin,
+ "cli",
+ "spawn",
+ "--new-window",
+ "--cwd",
+ working_directory,
+ "--",
+ *wrapped,
+ ]
+ # Subprocess call outside lock — blocking I/O must not hold the lock.
+ result = self._run(cmd, None)
+ raw = result.stdout.strip()
+ if not raw.isdigit():
+ raise RuntimeError(
+ f"WezTerm spawn returned no pane id; stdout={result.stdout!r}"
+ )
+ return raw
+
+ def _create_pane(
+ self,
+ session_name: str,
+ window_name: str,
+ terminal_id: str,
+ working_directory: Optional[str],
+ launch_spec: Optional[LaunchSpec],
+ ) -> str:
+ """Shared body for create_session and create_window.
+
+ Resolves cwd, spawns the pane (outside lock), registers the pane dict,
+ and returns window_name.
+ """
+ cwd = self._resolve_and_validate_working_directory(working_directory)
+ # Spawn outside lock — may block.
+ pane_id = self._spawn(cwd, terminal_id, launch_spec)
+ with self._lock:
+ self._sessions.setdefault(session_name, {})[window_name] = pane_id
+ return window_name
+
+ def create_session(
+ self,
+ session_name: str,
+ window_name: str,
+ terminal_id: str,
+ working_directory: Optional[str] = None,
+ launch_spec: Optional[LaunchSpec] = None,
+ ) -> str:
+ """Create a detached CAO session/workspace and return the actual window name."""
+ return self._create_pane(session_name, window_name, terminal_id, working_directory, launch_spec)
+
+ def create_window(
+ self,
+ session_name: str,
+ window_name: str,
+ terminal_id: str,
+ working_directory: Optional[str] = None,
+ launch_spec: Optional[LaunchSpec] = None,
+ ) -> str:
+ """Create another CAO window/pane inside an existing session."""
+ return self._create_pane(session_name, window_name, terminal_id, working_directory, launch_spec)
+
+ def _paste_text(self, session_name: str, window_name: str, text: str) -> None:
+ """Inject literal text using bracketed paste (default send-text mode)."""
+ pane_id = self._pane_id(session_name, window_name)
+ self._run(
+ [self._bin, "cli", "send-text", "--pane-id", pane_id, "--", text],
+ None,
+ )
+
+ def _submit_input(
+ self, session_name: str, window_name: str, enter_count: int = 1
+ ) -> None:
+ """Submit already-pasted input with one or more Enter presses."""
+ pane_id = self._pane_id(session_name, window_name)
+ for i in range(enter_count):
+ if i == 0:
+ self._clock_sleep(0.3)
+ else:
+ self._clock_sleep(0.5)
+ self._run(
+ [self._bin, "cli", "send-text", "--pane-id", pane_id, "--no-paste", "--", "\r"],
+ None,
+ )
+
+ def _get_pane_text(self, pane_id: str) -> str:
+ result = self._run(
+ [self._bin, "cli", "get-text", "--pane-id", pane_id],
+ None,
+ )
+ if result.returncode != 0:
+ raise RuntimeError(
+ f"WezTerm get-text failed for pane {pane_id}; returncode={result.returncode}"
+ )
+ return result.stdout
+
+ def _diff_snapshot(self, prev: str, current: str) -> str:
+ if not prev:
+ return current
+ if current == prev:
+ return ""
+ if current.startswith(prev):
+ return current[len(prev):]
+ prev_lines = prev.splitlines(keepends=True)
+ cur_lines = current.splitlines(keepends=True)
+ for k in range(min(len(prev_lines), len(cur_lines)), 0, -1):
+ if prev_lines[-k:] == cur_lines[:k]:
+ return "".join(cur_lines[k:])
+ return current
+
+ def _poll_loop(
+ self,
+ session_name: str,
+ window_name: str,
+ pane_id: str,
+ stop_event: threading.Event,
+ file_path: str,
+ ) -> None:
+ key = (session_name, window_name)
+ prev = ""
+ try:
+ with open(file_path, "a", encoding="utf-8") as fh:
+ while not stop_event.wait(self._poll_interval):
+ # Fetch pane text outside lock — subprocess call may block.
+ try:
+ snapshot = self._get_pane_text(pane_id)
+ except RuntimeError:
+ return
+ delta = self._diff_snapshot(prev, snapshot)
+ if delta:
+ fh.write(delta)
+ fh.flush()
+ prev = snapshot
+ finally:
+ # Self-clean on natural exit (pane disappeared or stop_event set).
+ # A timed-out stop_pipe_pane leaves the registry entry in place to
+ # block double-writers; this still runs once the zombie unblocks.
+ with self._lock:
+ self._pollers.pop(key, None)
+
+ def send_special_key(
+ self,
+ session_name: str,
+ window_name: str,
+ key: str,
+ *,
+ literal: bool = False,
+ ) -> None:
+ """Send a control key or literal VT sequence without paste semantics.
+
+ When literal=False, key must be one of the named keys in _VT_KEY_MAP:
+ Enter, Tab, Escape, Backspace, Up, Down, Left, Right
+ When literal=True, the key string is sent as-is (raw VT bytes).
+ """
+ pane_id = self._pane_id(session_name, window_name)
+ if literal:
+ raw = key
+ else:
+ if key not in _VT_KEY_MAP:
+ raise KeyError(
+ f"Unknown special key {key!r}; expected one of {sorted(_VT_KEY_MAP)}, "
+ f"or pass literal=True for raw VT sequences"
+ )
+ raw = _VT_KEY_MAP[key]
+ self._run(
+ [self._bin, "cli", "send-text", "--pane-id", pane_id, "--no-paste", "--", raw],
+ None,
+ )
+
+ def get_history(
+ self, session_name: str, window_name: str, tail_lines: Optional[int] = None
+ ) -> str:
+ """Return pane text via wezterm cli get-text (no --escapes).
+
+ Plain mode is used because --escapes breaks Claude trust-prompt regex
+ matching while plain output preserves all provider-relevant patterns.
+ """
+ pane_id = self._pane_id(session_name, window_name)
+ text = self._get_pane_text(pane_id)
+ if tail_lines is not None:
+ lines = text.rstrip("\n").splitlines()
+ text = "\n".join(lines[-tail_lines:])
+ return text
+
+ def list_sessions(self) -> list[dict[str, str]]:
+ """List CAO-visible sessions from the in-memory registry."""
+ with self._lock:
+ return [
+ {"id": name, "name": name, "status": "active"}
+ for name in self._sessions
+ ]
+
+ def kill_session(self, session_name: str) -> bool:
+ """Terminate a session and kill all owned panes."""
+ with self._lock:
+ session = self._sessions.get(session_name)
+ if session is None:
+ return False
+ window_names = list(session)
+ # Stop pollers outside lock (join may block).
+ for window_name in window_names:
+ try:
+ self.stop_pipe_pane(session_name, window_name)
+ except RuntimeError:
+ pass
+ # Kill panes and remove from registry under lock.
+ with self._lock:
+ session = self._sessions.pop(session_name, None)
+ if session is not None:
+ for pane_id in session.values():
+ self._run(
+ [self._bin, "cli", "kill-pane", "--pane-id", pane_id],
+ None,
+ )
+ return True
+
+ def kill_window(self, session_name: str, window_name: str) -> bool:
+ """Terminate one CAO window/pane."""
+ with self._lock:
+ session = self._sessions.get(session_name)
+ if session is None or window_name not in session:
+ return False
+ # Stop poller outside lock (join may block).
+ try:
+ self.stop_pipe_pane(session_name, window_name)
+ except RuntimeError:
+ pass
+ with self._lock:
+ session = self._sessions.get(session_name)
+ if session is None:
+ return False
+ pane_id = session.pop(window_name, None)
+ if pane_id is not None:
+ self._run(
+ [self._bin, "cli", "kill-pane", "--pane-id", pane_id],
+ None,
+ )
+ return True
+
+ def session_exists(self, session_name: str) -> bool:
+ """Return True when the named session is in the registry."""
+ with self._lock:
+ return session_name in self._sessions
+
+ def get_pane_working_directory(
+ self, session_name: str, window_name: str
+ ) -> Optional[str]:
+ """Return the pane's working directory when the CLI exposes it.
+
+ WezTerm CLI does not expose pane CWD reliably in early versions, so
+ we return None until ``wezterm cli list --format json`` is validated.
+ """
+ return None
+
+ def pipe_pane(self, session_name: str, window_name: str, file_path: str) -> None:
+ pane_id = self._pane_id(session_name, window_name)
+ key = (session_name, window_name)
+ with self._lock:
+ if key in self._pollers:
+ raise RuntimeError(
+ f"pipe_pane already running for {session_name}:{window_name}"
+ )
+ Path(file_path).touch(exist_ok=True)
+ stop_event = threading.Event()
+ thread = threading.Thread(
+ target=self._poll_loop,
+ args=(session_name, window_name, pane_id, stop_event, file_path),
+ daemon=True,
+ name=f"wezterm-pipe-{session_name}-{window_name}",
+ )
+ self._pollers[key] = _PollerState(thread=thread, stop_event=stop_event)
+ thread.start()
+
+ def stop_pipe_pane(self, session_name: str, window_name: str) -> None:
+ key = (session_name, window_name)
+ with self._lock:
+ state = self._pollers.get(key)
+ if state is None:
+ raise RuntimeError(
+ f"pipe_pane not running for {session_name}:{window_name}"
+ )
+ state.stop_event.set()
+ # Join outside lock — blocking wait must not hold the lock.
+ state.thread.join(timeout=2.0)
+ if state.thread.is_alive():
+ # Poller thread is stalled (zombie). Leave the registry entry in
+ # place so that a subsequent pipe_pane() call raises rather than
+ # starting a second thread writing to the same file concurrently.
+ # The zombie thread will self-clean via _poll_loop's finally block
+ # once the pane disappears and _get_pane_text raises.
+ logger.warning(
+ "Timed out stopping WezTerm pipe poller for %s:%s — "
+ "leaving zombie entry in registry to prevent double-write",
+ session_name,
+ window_name,
+ )
+ return
+ # Thread exited cleanly; remove the registry entry.
+ with self._lock:
+ self._pollers.pop(key, None)
diff --git a/src/cli_agent_orchestrator/providers/base.py b/src/cli_agent_orchestrator/providers/base.py
index ca226b85a..92d9c3f55 100644
--- a/src/cli_agent_orchestrator/providers/base.py
+++ b/src/cli_agent_orchestrator/providers/base.py
@@ -23,6 +23,7 @@
from abc import ABC, abstractmethod
from typing import List, Optional
+from cli_agent_orchestrator.multiplexers.base import LaunchSpec
from cli_agent_orchestrator.models.terminal import TerminalStatus
@@ -181,6 +182,15 @@ def _apply_skill_prompt(self, system_prompt: str) -> str:
return f"{system_prompt}\n\n{self._skill_prompt}"
return self._skill_prompt
+ def get_launch_spec(self, multiplexer: object) -> Optional[LaunchSpec]:
+ """Return an optional initial process spawn request for the active multiplexer.
+
+ Providers that need direct process spawn on specific backends can override
+ this. The default path returns None so the backend launches its normal shell.
+ """
+ del multiplexer
+ return None
+
def _update_status(self, status: TerminalStatus) -> None:
"""Update internal status."""
self._status = status
diff --git a/src/cli_agent_orchestrator/providers/claude_code.py b/src/cli_agent_orchestrator/providers/claude_code.py
index 5d2c428ee..f510e47a0 100644
--- a/src/cli_agent_orchestrator/providers/claude_code.py
+++ b/src/cli_agent_orchestrator/providers/claude_code.py
@@ -4,12 +4,11 @@
import logging
import re
import shlex
-import subprocess
import time
from pathlib import Path
from typing import Optional
-from cli_agent_orchestrator.clients.tmux import tmux_client
+from cli_agent_orchestrator.multiplexers import get_multiplexer
from cli_agent_orchestrator.models.terminal import TerminalStatus
from cli_agent_orchestrator.providers.base import BaseProvider
from cli_agent_orchestrator.utils.agent_profiles import load_agent_profile
@@ -192,7 +191,7 @@ def _handle_startup_prompts(self, timeout: float = 20.0) -> None:
start_time = time.time()
bypass_accepted = False
while time.time() - start_time < timeout:
- output = tmux_client.get_history(self.session_name, self.window_name)
+ output = get_multiplexer().get_history(self.session_name, self.window_name)
if not output:
time.sleep(1.0)
continue
@@ -203,13 +202,17 @@ def _handle_startup_prompts(self, timeout: float = 20.0) -> None:
# Only act once — the text stays in the buffer after dismissal.
if not bypass_accepted and re.search(BYPASS_PROMPT_PATTERN, clean_output):
logger.info("Bypass permissions prompt detected, auto-accepting")
- target = f"{self.session_name}:{self.window_name}"
# Send raw Down arrow escape sequence (-l for literal) to move
# cursor to "Yes, I accept", then Enter to confirm.
# tmux send-keys "Down" doesn't work with Claude's Ink TUI.
- subprocess.run(["tmux", "send-keys", "-t", target, "-l", "\x1b[B"], check=False)
+ get_multiplexer().send_special_key(
+ self.session_name,
+ self.window_name,
+ "\x1b[B",
+ literal=True,
+ )
time.sleep(0.5)
- subprocess.run(["tmux", "send-keys", "-t", target, "Enter"], check=False)
+ get_multiplexer().send_special_key(self.session_name, self.window_name, "Enter")
bypass_accepted = True
time.sleep(1.0)
continue # Trust prompt may follow
@@ -217,11 +220,7 @@ def _handle_startup_prompts(self, timeout: float = 20.0) -> None:
# 2) Handle workspace trust prompt
if re.search(TRUST_PROMPT_PATTERN, clean_output):
logger.info("Workspace trust prompt detected, auto-accepting")
- session = tmux_client.server.sessions.get(session_name=self.session_name)
- window = session.windows.get(window_name=self.window_name)
- pane = window.active_pane
- if pane:
- pane.send_keys("", enter=True)
+ get_multiplexer().send_special_key(self.session_name, self.window_name, "Enter")
return
# 3) Claude Code fully started — no prompts needed
@@ -238,7 +237,7 @@ def _handle_startup_prompts(self, timeout: float = 20.0) -> None:
def initialize(self) -> bool:
"""Initialize Claude Code provider by starting claude command."""
# Wait for shell prompt to appear in the tmux window
- if not wait_for_shell(tmux_client, self.session_name, self.window_name, timeout=10.0):
+ if not wait_for_shell(get_multiplexer(), self.session_name, self.window_name, timeout=10.0):
raise TimeoutError("Shell initialization timed out after 10 seconds")
# Prevent bypass permissions dialog from appearing (settings-based fix).
@@ -252,10 +251,10 @@ def initialize(self) -> bool:
# from Claude Code's own ❯ REPL prompt — they are visually identical
# after ANSI stripping, so without a snapshot, status detection can
# falsely return IDLE on the old shell prompt before claude even starts.
- pre_launch_snapshot = tmux_client.get_history(self.session_name, self.window_name) or ""
+ pre_launch_snapshot = get_multiplexer().get_history(self.session_name, self.window_name) or ""
- # Send Claude Code command using tmux client
- tmux_client.send_keys(self.session_name, self.window_name, command)
+ # Send Claude Code command using multiplexer
+ get_multiplexer().send_keys(self.session_name, self.window_name, command)
# Handle startup prompts (bypass permissions + workspace trust)
self._handle_startup_prompts(timeout=20.0)
@@ -269,7 +268,7 @@ def initialize(self) -> bool:
# ❯ prompt triggers an immediate IDLE return before claude starts.
deadline = time.time() + 30.0
while time.time() < deadline:
- current_output = tmux_client.get_history(self.session_name, self.window_name) or ""
+ current_output = get_multiplexer().get_history(self.session_name, self.window_name) or ""
new_content = current_output[len(pre_launch_snapshot) :]
# Claude-specific startup markers that cannot come from the shell:
# the ──────── separator, bypass/trust prompt text, or "Claude Code"
@@ -323,7 +322,7 @@ def get_status(self, tail_lines: Optional[int] = None) -> TerminalStatus:
See: https://github.com/awslabs/cli-agent-orchestrator/issues/104
"""
- output = tmux_client.get_history(self.session_name, self.window_name, tail_lines=tail_lines)
+ output = get_multiplexer().get_history(self.session_name, self.window_name, tail_lines=tail_lines)
if not output:
return TerminalStatus.ERROR
diff --git a/src/cli_agent_orchestrator/providers/codex.py b/src/cli_agent_orchestrator/providers/codex.py
index 0128aff04..811015064 100644
--- a/src/cli_agent_orchestrator/providers/codex.py
+++ b/src/cli_agent_orchestrator/providers/codex.py
@@ -6,8 +6,11 @@
import time
from typing import Optional
-from cli_agent_orchestrator.clients.tmux import tmux_client
+from cli_agent_orchestrator.multiplexers import get_multiplexer
from cli_agent_orchestrator.models.terminal import TerminalStatus
+from cli_agent_orchestrator.multiplexers.base import LaunchSpec
+from cli_agent_orchestrator.multiplexers.launch import build_launch_spec, default_platform
+from cli_agent_orchestrator.multiplexers.wezterm import WezTermMultiplexer
from cli_agent_orchestrator.providers.base import BaseProvider
from cli_agent_orchestrator.utils.agent_profiles import load_agent_profile
from cli_agent_orchestrator.utils.terminal import wait_for_shell, wait_until_status
@@ -59,8 +62,11 @@
# ASSISTANT_PREFIX_PATTERN and the TUI footer › matches idle prompt).
TUI_PROGRESS_PATTERN = r"•.*\(\d+s\s*•\s*esc to interrupt\)"
-# Workspace trust/approval prompt shown when Codex opens a new directory
-TRUST_PROMPT_PATTERN = r"allow Codex to work in this folder"
+# Workspace trust/approval prompt shown when Codex opens a new directory.
+# Wording verified against Codex CLI v0.123.0 on 2026-04-25; the previous
+# anchor ("allow Codex to work in this folder") no longer appears in current
+# Codex builds and the smoke harness caught the regression.
+TRUST_PROMPT_PATTERN = r"Do you trust the contents of this directory"
# Codex welcome banner indicating normal startup (no trust prompt)
CODEX_WELCOME_PATTERN = r"OpenAI Codex"
@@ -121,11 +127,20 @@ def __init__(
agent_profile: Optional[str] = None,
allowed_tools: Optional[list] = None,
skill_prompt: Optional[str] = None,
+ launch_spec: Optional[LaunchSpec] = None,
):
"""Initialize provider state."""
super().__init__(terminal_id, session_name, window_name, allowed_tools, skill_prompt)
self._initialized = False
self._agent_profile = agent_profile
+ self._launch_spec = launch_spec
+
+ def _build_codex_argv(self) -> list[str]:
+ """Build raw Codex argv for launch-spec use and shell command templating."""
+ flags = ["--yolo", "--no-alt-screen", "--disable", "shell_snapshot"]
+ if default_platform() == "windows":
+ flags = ["-c", "hooks=[]", *flags]
+ return ["codex", *flags]
def _build_codex_command(self) -> str:
"""Build Codex command with agent profile if provided.
@@ -138,7 +153,7 @@ def _build_codex_command(self) -> str:
# non-interactive tmux sessions where interactive approval prompts
# block handoff/assign flows. This mirrors Claude Code's
# --dangerously-skip-permissions and Gemini CLI's --yolo flags.
- command_parts = ["codex", "--yolo", "--no-alt-screen", "--disable", "shell_snapshot"]
+ command_parts = self._build_codex_argv()
if self._agent_profile is not None:
try:
@@ -222,7 +237,7 @@ def _handle_trust_prompt(self, timeout: float = 20.0) -> None:
"""
start_time = time.time()
while time.time() - start_time < timeout:
- output = tmux_client.get_history(self.session_name, self.window_name)
+ output = get_multiplexer().get_history(self.session_name, self.window_name)
if not output:
time.sleep(1.0)
continue
@@ -232,11 +247,7 @@ def _handle_trust_prompt(self, timeout: float = 20.0) -> None:
if re.search(TRUST_PROMPT_PATTERN, clean_output):
logger.info("Codex workspace trust prompt detected, auto-accepting")
- session = tmux_client.server.sessions.get(session_name=self.session_name)
- window = session.windows.get(window_name=self.window_name)
- pane = window.active_pane
- if pane:
- pane.send_keys("", enter=True)
+ get_multiplexer().send_special_key(self.session_name, self.window_name, "Enter")
return
# Check if Codex has fully started (welcome banner visible)
@@ -247,24 +258,33 @@ def _handle_trust_prompt(self, timeout: float = 20.0) -> None:
time.sleep(1.0)
logger.warning("Codex trust prompt handler timed out")
+ def get_launch_spec(self, multiplexer: object) -> Optional[LaunchSpec]:
+ """Return a direct-spawn LaunchSpec for WezTerm-backed Codex sessions."""
+ if not isinstance(multiplexer, WezTermMultiplexer):
+ return None
+ if self._launch_spec is None:
+ self._launch_spec = build_launch_spec("codex", self._build_codex_argv())
+ return self._launch_spec
+
def initialize(self) -> bool:
"""Initialize Codex provider by starting codex command."""
- if not wait_for_shell(tmux_client, self.session_name, self.window_name, timeout=10.0):
- raise TimeoutError("Shell initialization timed out after 10 seconds")
-
- # Send a warm-up command before launching codex.
- # Codex exits immediately in freshly-created tmux sessions where the shell
- # has not yet processed a full interactive command cycle.
- tmux_client.send_keys(self.session_name, self.window_name, "echo ready")
- time.sleep(2.0)
-
- # Build command with flags and agent profile (developer_instructions).
- # --no-alt-screen: run in inline mode so output stays in normal scrollback,
- # making tmux capture-pane reliable.
- # --disable shell_snapshot: avoid TTY input conflicts (SIGTTIN) in tmux
- # caused by the shell_snapshot subprocess inheriting stdin.
- command = self._build_codex_command()
- tmux_client.send_keys(self.session_name, self.window_name, command)
+ if self._launch_spec is None:
+ self._launch_spec = build_launch_spec("codex", self._build_codex_argv())
+
+ direct_spawned_wezterm = isinstance(get_multiplexer(), WezTermMultiplexer)
+
+ if not direct_spawned_wezterm:
+ if not wait_for_shell(get_multiplexer(), self.session_name, self.window_name, timeout=10.0):
+ raise TimeoutError("Shell initialization timed out after 10 seconds")
+
+ # Warm-up command — codex exits immediately in freshly-created
+ # tmux sessions where the shell hasn't completed an interactive
+ # command cycle yet.
+ get_multiplexer().send_keys(self.session_name, self.window_name, "echo ready")
+ time.sleep(2.0)
+
+ command = self._build_codex_command()
+ get_multiplexer().send_keys(self.session_name, self.window_name, command)
# Handle workspace trust prompt if it appears (new/untrusted directories)
self._handle_trust_prompt(timeout=20.0)
@@ -282,7 +302,7 @@ def initialize(self) -> bool:
def get_status(self, tail_lines: Optional[int] = None) -> TerminalStatus:
"""Get Codex status by analyzing terminal output."""
- output = tmux_client.get_history(self.session_name, self.window_name, tail_lines=tail_lines)
+ output = get_multiplexer().get_history(self.session_name, self.window_name, tail_lines=tail_lines)
if not output:
return TerminalStatus.ERROR
diff --git a/src/cli_agent_orchestrator/services/inbox_service.py b/src/cli_agent_orchestrator/services/inbox_service.py
index 3c2a55cfd..9b1b1f774 100644
--- a/src/cli_agent_orchestrator/services/inbox_service.py
+++ b/src/cli_agent_orchestrator/services/inbox_service.py
@@ -23,7 +23,6 @@
import logging
import re
-import subprocess
from pathlib import Path
from watchdog.events import FileModifiedEvent, FileSystemEventHandler
@@ -44,14 +43,42 @@ def _get_log_tail(terminal_id: str, lines: int = 100) -> str:
Default of 100 lines covers full-screen TUI providers where the idle
prompt sits mid-screen with 30+ padding lines below it.
- Reading 100 lines via tail is still sub-millisecond.
+ Reading 100 lines via a backward block scan is still sub-millisecond.
"""
log_path = TERMINAL_LOG_DIR / f"{terminal_id}.log"
try:
- result = subprocess.run(
- ["tail", "-n", str(lines), str(log_path)], capture_output=True, text=True, timeout=1
+ if lines <= 0 or not log_path.exists():
+ return ""
+
+ block_size = 4096
+
+ with log_path.open("rb") as log_file:
+ log_file.seek(0, 2)
+ file_size = log_file.tell()
+ if file_size == 0:
+ return ""
+
+ position = file_size
+ buffer = b""
+ newline_count = 0
+
+ while position > 0 and newline_count <= lines:
+ read_size = min(block_size, position)
+ position -= read_size
+ log_file.seek(position)
+ new_block = log_file.read(read_size)
+ newline_count += new_block.count(b"\n")
+ buffer = new_block + buffer
+
+ tail_text = buffer.decode("utf-8", errors="replace").replace("\r\n", "\n").replace(
+ "\r", "\n"
)
- return result.stdout
+ tail_lines = tail_text.splitlines(keepends=True)
+
+ if position > 0 and tail_lines:
+ tail_lines = tail_lines[1:]
+
+ return "".join(tail_lines[-lines:])
except Exception:
return ""
diff --git a/src/cli_agent_orchestrator/services/terminal_service.py b/src/cli_agent_orchestrator/services/terminal_service.py
index 436c093cf..806171fbe 100644
--- a/src/cli_agent_orchestrator/services/terminal_service.py
+++ b/src/cli_agent_orchestrator/services/terminal_service.py
@@ -29,8 +29,9 @@
get_terminal_metadata,
update_last_active,
)
-from cli_agent_orchestrator.clients.tmux import tmux_client
from cli_agent_orchestrator.constants import SESSION_PREFIX, TERMINAL_LOG_DIR
+from cli_agent_orchestrator.multiplexers import get_multiplexer
+from cli_agent_orchestrator.multiplexers.base import LaunchSpec
from cli_agent_orchestrator.models.inbox import OrchestrationType
from cli_agent_orchestrator.models.provider import ProviderType
from cli_agent_orchestrator.models.terminal import Terminal, TerminalStatus
@@ -83,6 +84,7 @@ def create_terminal(
session_name: Optional[str] = None,
new_session: bool = False,
working_directory: Optional[str] = None,
+ launch_spec: LaunchSpec | None = None,
allowed_tools: Optional[list[str]] = None,
registry: PluginRegistry | None = None,
) -> Terminal:
@@ -101,6 +103,7 @@ def create_terminal(
session_name: Optional custom session name. If not provided, auto-generated.
new_session: If True, creates a new tmux session. If False, adds to existing.
working_directory: Optional working directory for the terminal shell
+ launch_spec: Optional backend-specific initial process launch request
Returns:
Terminal object with all metadata populated
@@ -110,6 +113,8 @@ def create_terminal(
TimeoutError: If provider initialization times out
"""
session_created = False # tracks whether THIS call created the tmux session
+ multiplexer = None
+ terminal_id = ""
try:
# Step 1: Generate unique identifiers
terminal_id = generate_terminal_id()
@@ -118,43 +123,17 @@ def create_terminal(
session_name = generate_session_name()
window_name = generate_window_name(agent_profile)
+ multiplexer = get_multiplexer()
- # Step 2: Create tmux session or window
- if new_session:
- # Ensure session name has the CAO prefix for identification
- if not session_name.startswith(SESSION_PREFIX):
- session_name = f"{SESSION_PREFIX}{session_name}"
-
- # Prevent duplicate sessions
- if tmux_client.session_exists(session_name):
- raise ValueError(f"Session '{session_name}' already exists")
-
- # Create new tmux session with initial window
- tmux_client.create_session(session_name, window_name, terminal_id, working_directory)
- session_created = True # only set after successful creation
- else:
- # Add window to existing session
- if not tmux_client.session_exists(session_name):
- raise ValueError(f"Session '{session_name}' not found")
- window_name = tmux_client.create_window(
- session_name, window_name, terminal_id, working_directory
- )
-
- # Step 3: Persist terminal metadata to database
- db_create_terminal(
- terminal_id, session_name, window_name, provider, agent_profile, allowed_tools
- )
-
- # Step 3b: Load the profile once for allowed tool resolution before
- # provider initialization. The skill catalog is computed only for
- # providers that consume it at launch time (see RUNTIME_SKILL_PROMPT_PROVIDERS).
+ # Step 2: Load profile and build provider before pane creation so backend-
+ # specific direct-spawn requests (e.g. Codex on WezTerm) can be passed
+ # into create_session/create_window.
try:
profile = load_agent_profile(agent_profile)
except FileNotFoundError:
profile = None
skill_prompt = build_skill_catalog() if provider in RUNTIME_SKILL_PROMPT_PROVIDERS else None
- # Step 3c: Resolve allowed_tools from profile if not explicitly provided
if allowed_tools is None and profile is not None:
from cli_agent_orchestrator.utils.tool_mapping import resolve_allowed_tools
@@ -163,12 +142,6 @@ def create_terminal(
profile.allowedTools, profile.role, mcp_server_names
)
- # Step 4: Create and initialize the CLI provider
- # This starts the agent (e.g., runs "kiro-cli chat --agent developer").
- # Only runtime-prompt providers (Claude Code, Codex, Gemini, Kimi) receive
- # the skill catalog here; Kiro (skill:// resources) and OpenCode
- # (OPENCODE_CONFIG_DIR/skills symlink) discover skills natively; Q and
- # Copilot get the catalog baked at install time.
provider_instance = provider_manager.create_provider(
provider,
terminal_id,
@@ -179,13 +152,62 @@ def create_terminal(
skill_prompt=skill_prompt,
model=profile.model if profile else None,
)
+ effective_launch_spec = (
+ launch_spec if launch_spec is not None else provider_instance.get_launch_spec(multiplexer)
+ )
+
+ # Step 3: Create tmux session or window
+ if new_session:
+ # Ensure session name has the CAO prefix for identification
+ if not session_name.startswith(SESSION_PREFIX):
+ session_name = f"{SESSION_PREFIX}{session_name}"
+
+ # Prevent duplicate sessions
+ if multiplexer.session_exists(session_name):
+ raise ValueError(f"Session '{session_name}' already exists")
+
+ # Create new tmux session with initial window
+ actual_window_name = multiplexer.create_session(
+ session_name,
+ window_name,
+ terminal_id,
+ working_directory,
+ launch_spec=effective_launch_spec,
+ )
+ session_created = True # only set after successful creation
+ else:
+ # Add window to existing session
+ if not multiplexer.session_exists(session_name):
+ raise ValueError(f"Session '{session_name}' not found")
+ actual_window_name = multiplexer.create_window(
+ session_name,
+ window_name,
+ terminal_id,
+ working_directory,
+ launch_spec=effective_launch_spec,
+ )
+ if isinstance(actual_window_name, str) and actual_window_name:
+ window_name = actual_window_name
+
+ # Step 4: Persist terminal metadata to database
+ db_create_terminal(
+ terminal_id, session_name, window_name, provider, agent_profile, allowed_tools
+ )
+
+ # Step 5: Initialize the CLI provider.
+ # Only runtime-prompt providers (Claude Code, Codex, Gemini, Kimi) receive
+ # the skill catalog here; Kiro (skill:// resources) and OpenCode
+ # (OPENCODE_CONFIG_DIR/skills symlink) discover skills natively; Q and
+ # Copilot get the catalog baked at install time.
+ provider_instance.session_name = session_name
+ provider_instance.window_name = window_name
provider_instance.initialize()
- # Step 5: Set up terminal logging via tmux pipe-pane
+ # Step 6: Set up terminal logging via tmux pipe-pane
# This captures all terminal output to a log file for inbox monitoring
log_path = TERMINAL_LOG_DIR / f"{terminal_id}.log"
log_path.touch() # Ensure file exists before watching
- tmux_client.pipe_pane(session_name, window_name, str(log_path))
+ multiplexer.pipe_pane(session_name, window_name, str(log_path))
# Build and return the Terminal object
terminal = Terminal(
@@ -220,9 +242,9 @@ def create_terminal(
provider_manager.cleanup_provider(terminal_id)
except Exception:
pass # Ignore cleanup errors
- if session_created and session_name:
+ if session_created and session_name and multiplexer is not None:
try:
- tmux_client.kill_session(session_name)
+ multiplexer.kill_session(session_name)
except:
pass # Ignore cleanup errors
raise
@@ -275,7 +297,7 @@ def get_working_directory(terminal_id: str) -> Optional[str]:
if not metadata:
raise ValueError(f"Terminal '{terminal_id}' not found")
- working_dir = tmux_client.get_pane_working_directory(
+ working_dir = get_multiplexer().get_pane_working_directory(
metadata["tmux_session"], metadata["tmux_window"]
)
return working_dir
@@ -308,7 +330,7 @@ def send_input(
provider = provider_manager.get_provider(terminal_id)
enter_count = provider.paste_enter_count if provider else 1
- tmux_client.send_keys(
+ get_multiplexer().send_keys(
metadata["tmux_session"], metadata["tmux_window"], message, enter_count=enter_count
)
@@ -361,7 +383,7 @@ def send_special_key(terminal_id: str, key: str) -> bool:
if not metadata:
raise ValueError(f"Terminal '{terminal_id}' not found")
- tmux_client.send_special_key(metadata["tmux_session"], metadata["tmux_window"], key)
+ get_multiplexer().send_special_key(metadata["tmux_session"], metadata["tmux_window"], key)
update_last_active(terminal_id)
logger.info(f"Sent special key '{key}' to terminal: {terminal_id}")
@@ -392,7 +414,7 @@ def get_output(terminal_id: str, mode: OutputMode = OutputMode.FULL) -> str:
raise ValueError(f"Terminal '{terminal_id}' not found")
if mode == OutputMode.FULL:
- return tmux_client.get_history(metadata["tmux_session"], metadata["tmux_window"])
+ return get_multiplexer().get_history(metadata["tmux_session"], metadata["tmux_window"])
elif mode == OutputMode.LAST:
provider = provider_manager.get_provider(terminal_id)
if provider is None:
@@ -401,7 +423,7 @@ def get_output(terminal_id: str, mode: OutputMode = OutputMode.FULL) -> str:
# Capability check: providers that need deeper scrollback for extraction
# opt in by defining ``extraction_tail_lines``. Base providers don't.
extract_lines = getattr(provider, "extraction_tail_lines", None)
- full_output = tmux_client.get_history(
+ full_output = get_multiplexer().get_history(
metadata["tmux_session"],
metadata["tmux_window"],
tail_lines=extract_lines,
@@ -413,7 +435,7 @@ def get_output(terminal_id: str, mode: OutputMode = OutputMode.FULL) -> str:
try:
if attempt > 0:
time.sleep(10.0)
- full_output = tmux_client.get_history(
+ full_output = get_multiplexer().get_history(
metadata["tmux_session"],
metadata["tmux_window"],
tail_lines=extract_lines,
@@ -444,13 +466,13 @@ def delete_terminal(terminal_id: str, registry: PluginRegistry | None = None) ->
if metadata:
# Stop pipe-pane logging
try:
- tmux_client.stop_pipe_pane(metadata["tmux_session"], metadata["tmux_window"])
+ get_multiplexer().stop_pipe_pane(metadata["tmux_session"], metadata["tmux_window"])
except Exception as e:
logger.warning(f"Failed to stop pipe-pane for {terminal_id}: {e}")
# Kill the tmux window (this terminates the agent process)
try:
- tmux_client.kill_window(metadata["tmux_session"], metadata["tmux_window"])
+ get_multiplexer().kill_window(metadata["tmux_session"], metadata["tmux_window"])
except Exception as e:
logger.warning(f"Failed to kill tmux window for {terminal_id}: {e}")
diff --git a/test/multiplexers/__init__.py b/test/multiplexers/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/multiplexers/test_base_multiplexer.py b/test/multiplexers/test_base_multiplexer.py
new file mode 100644
index 000000000..afa2ac951
--- /dev/null
+++ b/test/multiplexers/test_base_multiplexer.py
@@ -0,0 +1,357 @@
+"""Contract tests for BaseMultiplexer and LaunchSpec."""
+
+from __future__ import annotations
+
+import os
+from typing import Optional
+from unittest.mock import call, patch
+
+import pytest
+
+from cli_agent_orchestrator.multiplexers import BaseMultiplexer, LaunchSpec
+from cli_agent_orchestrator.multiplexers.base import BaseMultiplexer as BaseMultiplexerDirect
+
+
+# ---------------------------------------------------------------------------
+# Fake concrete subclass — records calls, does nothing real
+# ---------------------------------------------------------------------------
+
+
+class FakeMultiplexer(BaseMultiplexer):
+ """Minimal concrete implementation used to exercise BaseMultiplexer contracts."""
+
+ def __init__(self) -> None:
+ self._calls: list[tuple[str, tuple[object, ...], dict[str, object]]] = []
+
+ def _record(self, name: str, *args: object, **kwargs: object) -> None:
+ self._calls.append((name, args, kwargs))
+
+ def create_session(
+ self,
+ session_name: str,
+ window_name: str,
+ terminal_id: str,
+ working_directory: Optional[str] = None,
+ launch_spec: Optional[LaunchSpec] = None,
+ ) -> str:
+ self._record("create_session", session_name, window_name, terminal_id, working_directory, launch_spec)
+ return window_name
+
+ def create_window(
+ self,
+ session_name: str,
+ window_name: str,
+ terminal_id: str,
+ working_directory: Optional[str] = None,
+ launch_spec: Optional[LaunchSpec] = None,
+ ) -> str:
+ self._record("create_window", session_name, window_name, terminal_id, working_directory, launch_spec)
+ return window_name
+
+ def _paste_text(self, session_name: str, window_name: str, text: str) -> None:
+ self._record("_paste_text", session_name, window_name, text)
+
+ def _submit_input(
+ self, session_name: str, window_name: str, enter_count: int = 1
+ ) -> None:
+ self._record("_submit_input", session_name, window_name, enter_count=enter_count)
+
+ def send_special_key(
+ self,
+ session_name: str,
+ window_name: str,
+ key: str,
+ *,
+ literal: bool = False,
+ ) -> None:
+ self._record("send_special_key", session_name, window_name, key, literal=literal)
+
+ def get_history(
+ self, session_name: str, window_name: str, tail_lines: Optional[int] = None
+ ) -> str:
+ self._record("get_history", session_name, window_name, tail_lines)
+ return ""
+
+ def list_sessions(self) -> list[dict[str, str]]:
+ self._record("list_sessions")
+ return []
+
+ def kill_session(self, session_name: str) -> bool:
+ self._record("kill_session", session_name)
+ return True
+
+ def kill_window(self, session_name: str, window_name: str) -> bool:
+ self._record("kill_window", session_name, window_name)
+ return True
+
+ def session_exists(self, session_name: str) -> bool:
+ self._record("session_exists", session_name)
+ return False
+
+ def get_pane_working_directory(
+ self, session_name: str, window_name: str
+ ) -> Optional[str]:
+ self._record("get_pane_working_directory", session_name, window_name)
+ return None
+
+ def pipe_pane(self, session_name: str, window_name: str, file_path: str) -> None:
+ self._record("pipe_pane", session_name, window_name, file_path)
+
+ def stop_pipe_pane(self, session_name: str, window_name: str) -> None:
+ self._record("stop_pipe_pane", session_name, window_name)
+
+
+# ---------------------------------------------------------------------------
+# LaunchSpec tests
+# ---------------------------------------------------------------------------
+
+
+class TestLaunchSpec:
+ def test_default_fields_are_none(self) -> None:
+ spec = LaunchSpec()
+ assert spec.argv is None
+ assert spec.env is None
+ assert spec.provider is None
+
+ def test_fields_set_on_construction(self) -> None:
+ spec = LaunchSpec(argv=["codex", "--yolo"], env={"FOO": "bar"}, provider="codex")
+ assert spec.argv == ["codex", "--yolo"]
+ assert spec.env == {"FOO": "bar"}
+ assert spec.provider == "codex"
+
+ def test_frozen_mutation_raises(self) -> None:
+ spec = LaunchSpec(argv=["codex"])
+ with pytest.raises((AttributeError, TypeError)):
+ spec.argv = ["other"] # type: ignore[misc]
+
+ def test_equality_same_values(self) -> None:
+ a = LaunchSpec(argv=["cmd"], provider="claude")
+ b = LaunchSpec(argv=["cmd"], provider="claude")
+ assert a == b
+
+ def test_equality_different_values(self) -> None:
+ a = LaunchSpec(argv=["cmd"])
+ b = LaunchSpec(argv=["other"])
+ assert a != b
+
+ def test_hashable(self) -> None:
+ spec = LaunchSpec(provider="codex")
+ _ = {spec} # must not raise
+
+
+# ---------------------------------------------------------------------------
+# Cannot instantiate abstract base directly
+# ---------------------------------------------------------------------------
+
+
+class TestBaseMultiplexerAbstract:
+ def test_cannot_instantiate_directly(self) -> None:
+ with pytest.raises(TypeError):
+ BaseMultiplexer() # type: ignore[abstract]
+
+ def test_import_from_package_equals_direct_import(self) -> None:
+ assert BaseMultiplexer is BaseMultiplexerDirect
+
+
+# ---------------------------------------------------------------------------
+# send_keys default implementation
+# ---------------------------------------------------------------------------
+
+
+class TestSendKeys:
+ def test_send_keys_calls_paste_then_submit(self) -> None:
+ mux = FakeMultiplexer()
+ mux.send_keys("ses", "win", "hello")
+
+ names = [c[0] for c in mux._calls]
+ assert names == ["_paste_text", "_submit_input"]
+
+ def test_send_keys_passes_text_to_paste(self) -> None:
+ mux = FakeMultiplexer()
+ mux.send_keys("ses", "win", "my text")
+
+ paste_call = mux._calls[0]
+ assert paste_call[0] == "_paste_text"
+ assert paste_call[1] == ("ses", "win", "my text")
+
+ def test_send_keys_passes_session_window_to_submit(self) -> None:
+ mux = FakeMultiplexer()
+ mux.send_keys("ses", "win", "t")
+
+ submit_call = mux._calls[1]
+ assert submit_call[0] == "_submit_input"
+ assert submit_call[1][0] == "ses"
+ assert submit_call[1][1] == "win"
+
+ def test_send_keys_default_enter_count_is_1(self) -> None:
+ mux = FakeMultiplexer()
+ mux.send_keys("ses", "win", "t")
+
+ submit_call = mux._calls[1]
+ assert submit_call[2].get("enter_count") == 1
+
+ def test_send_keys_forwards_enter_count(self) -> None:
+ mux = FakeMultiplexer()
+ mux.send_keys("ses", "win", "t", enter_count=3)
+
+ submit_call = mux._calls[1]
+ assert submit_call[2].get("enter_count") == 3
+
+ def test_send_keys_paste_before_submit_ordering(self) -> None:
+ mux = FakeMultiplexer()
+ mux.send_keys("ses", "win", "t")
+
+ assert mux._calls[0][0] == "_paste_text"
+ assert mux._calls[1][0] == "_submit_input"
+ assert len(mux._calls) == 2
+
+
+# ---------------------------------------------------------------------------
+# send_special_key signature — literal kwarg must exist
+# ---------------------------------------------------------------------------
+
+
+class TestSendSpecialKey:
+ def test_send_special_key_literal_default_false(self) -> None:
+ mux = FakeMultiplexer()
+ mux.send_special_key("ses", "win", "Enter")
+
+ call_rec = mux._calls[0]
+ assert call_rec[0] == "send_special_key"
+ assert call_rec[2].get("literal") is False
+
+ def test_send_special_key_literal_true(self) -> None:
+ mux = FakeMultiplexer()
+ mux.send_special_key("ses", "win", "\x1b[B", literal=True)
+
+ call_rec = mux._calls[0]
+ assert call_rec[2].get("literal") is True
+
+
+# ---------------------------------------------------------------------------
+# _resolve_and_validate_working_directory parity with TmuxClient
+# ---------------------------------------------------------------------------
+
+
+class TestResolveAndValidateWorkingDirectory:
+ def test_defaults_to_cwd(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.getcwd", return_value="/home/user/project"):
+ with patch("os.path.realpath", return_value="/home/user/project"):
+ with patch("os.path.isdir", return_value=True):
+ result = mux._resolve_and_validate_working_directory(None)
+ assert result == "/home/user/project"
+
+ def test_valid_directory(self, tmp_path: object) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.isdir", return_value=True):
+ with patch("os.path.realpath", return_value="/home/user/project"):
+ result = mux._resolve_and_validate_working_directory("/home/user/project")
+ assert result == "/home/user/project"
+
+ def test_blocked_root(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.realpath", return_value="/"):
+ with pytest.raises(ValueError, match="blocked system path"):
+ mux._resolve_and_validate_working_directory("/")
+
+ def test_blocked_etc(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.realpath", return_value="/etc"):
+ with pytest.raises(ValueError, match="blocked system path"):
+ mux._resolve_and_validate_working_directory("/etc")
+
+ def test_blocked_var(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.realpath", return_value="/var"):
+ with pytest.raises(ValueError, match="blocked system path"):
+ mux._resolve_and_validate_working_directory("/var")
+
+ def test_blocked_root_dir(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.realpath", return_value="/"):
+ with pytest.raises(ValueError, match="blocked system path"):
+ mux._resolve_and_validate_working_directory("/")
+
+ def test_nonexistent_directory(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.realpath", return_value="/home/user/nonexistent_dir_xyz"):
+ with patch("os.path.isdir", return_value=False):
+ with pytest.raises(ValueError, match="does not exist"):
+ mux._resolve_and_validate_working_directory("/home/user/nonexistent_dir_xyz")
+
+ def test_resolves_symlinks(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.realpath", return_value="/home/user/real"):
+ with patch("os.path.isdir", return_value=True):
+ result = mux._resolve_and_validate_working_directory("/home/user/link")
+ assert result == "/home/user/real"
+
+ def test_expands_tilde(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.expanduser", return_value="/home/user/project"):
+ with patch("os.path.realpath", return_value="/home/user/project"):
+ with patch("os.path.isdir", return_value=True):
+ result = mux._resolve_and_validate_working_directory("~/project")
+ assert result == "/home/user/project"
+
+ def test_allows_path_outside_home(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.isdir", return_value=True):
+ with patch("os.path.realpath", return_value="/Volumes/workplace/project"):
+ result = mux._resolve_and_validate_working_directory(
+ "/Volumes/workplace/project"
+ )
+ assert result == "/Volumes/workplace/project"
+
+ def test_allows_subdirectory_of_blocked_path(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.isdir", return_value=True):
+ with patch("os.path.realpath", return_value="/var/folders/abc/project"):
+ result = mux._resolve_and_validate_working_directory(
+ "/var/folders/abc/project"
+ )
+ assert result == "/var/folders/abc/project"
+
+ def test_raises_for_symlink_resolving_to_blocked(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.realpath", return_value="/var"):
+ with pytest.raises(ValueError, match="blocked system path"):
+ mux._resolve_and_validate_working_directory("/some/link")
+
+ def test_home_directory_itself_allowed(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.isdir", return_value=True):
+ with patch("os.path.realpath", return_value="/home/user"):
+ result = mux._resolve_and_validate_working_directory("/home/user")
+ assert result == "/home/user"
+
+ def test_allows_opt_subdirectory(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.isdir", return_value=True):
+ with patch("os.path.realpath", return_value="/opt/projects/my-app"):
+ result = mux._resolve_and_validate_working_directory(
+ "/opt/projects/my-app"
+ )
+ assert result == "/opt/projects/my-app"
+
+ def test_raises_for_blocked_boot(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.realpath", return_value="/boot"):
+ with pytest.raises(ValueError, match="blocked system path"):
+ mux._resolve_and_validate_working_directory("/boot")
+
+ def test_symlinked_home_real_path(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.realpath", return_value="/local/home/user/project"):
+ with patch("os.path.isdir", return_value=True):
+ result = mux._resolve_and_validate_working_directory("/home/user/project")
+ assert result == "/local/home/user/project"
+
+ def test_error_message_contains_original_path(self) -> None:
+ mux = FakeMultiplexer()
+ with patch("os.path.realpath", return_value="/home/user/does_not_exist_xyz"):
+ with patch("os.path.isdir", return_value=False):
+ with pytest.raises(ValueError) as exc_info:
+ mux._resolve_and_validate_working_directory("/home/user/does_not_exist_xyz")
+ assert "does_not_exist_xyz" in str(exc_info.value) or "does not exist" in str(exc_info.value)
diff --git a/test/multiplexers/test_selection.py b/test/multiplexers/test_selection.py
new file mode 100644
index 000000000..024432853
--- /dev/null
+++ b/test/multiplexers/test_selection.py
@@ -0,0 +1,125 @@
+"""Contract tests for multiplexer backend selection."""
+
+from __future__ import annotations
+
+import sys
+import types
+from unittest.mock import Mock
+
+import pytest
+
+from cli_agent_orchestrator.multiplexers import get_multiplexer
+from cli_agent_orchestrator.multiplexers.tmux import TmuxMultiplexer
+
+WEZTERM_MODULE = "cli_agent_orchestrator.multiplexers.wezterm"
+
+
+@pytest.fixture(autouse=True)
+def reset_selection_state(monkeypatch: pytest.MonkeyPatch) -> None:
+ get_multiplexer.cache_clear()
+ for name in ("CAO_MULTIPLEXER", "TMUX", "WEZTERM_PANE", "TERM_PROGRAM"):
+ monkeypatch.delenv(name, raising=False)
+ monkeypatch.delitem(sys.modules, WEZTERM_MODULE, raising=False)
+ yield
+ get_multiplexer.cache_clear()
+ monkeypatch.delitem(sys.modules, WEZTERM_MODULE, raising=False)
+
+
+def install_fake_wezterm(monkeypatch: pytest.MonkeyPatch, sentinel: object) -> None:
+ module = types.ModuleType(WEZTERM_MODULE)
+ module.WezTermMultiplexer = sentinel
+ monkeypatch.setitem(sys.modules, WEZTERM_MODULE, module)
+
+
+def test_override_tmux_wins_over_other_signals(monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setenv("CAO_MULTIPLEXER", "tmux")
+ monkeypatch.setenv("TMUX", "/tmp/tmux-1000/default,1234,0")
+ monkeypatch.setenv("WEZTERM_PANE", "66")
+ monkeypatch.setenv("TERM_PROGRAM", "WezTerm")
+
+ multiplexer = get_multiplexer()
+
+ assert isinstance(multiplexer, TmuxMultiplexer)
+
+
+def test_override_wezterm_imports_lazy_backend(monkeypatch: pytest.MonkeyPatch) -> None:
+ sentinel = object()
+ factory = Mock(return_value=sentinel)
+ install_fake_wezterm(monkeypatch, factory)
+ monkeypatch.setenv("CAO_MULTIPLEXER", "wezterm")
+
+ multiplexer = get_multiplexer()
+
+ assert multiplexer is sentinel
+
+
+def test_invalid_override_raises_value_error(monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setenv("CAO_MULTIPLEXER", "foo")
+
+ with pytest.raises(
+ ValueError,
+ match=r"Unknown CAO_MULTIPLEXER: 'foo'; expected 'tmux' or 'wezterm'",
+ ):
+ get_multiplexer()
+
+
+def test_tmux_env_selects_tmux(monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setenv("TMUX", "/tmp/tmux-1000/default,1234,0")
+
+ multiplexer = get_multiplexer()
+
+ assert isinstance(multiplexer, TmuxMultiplexer)
+
+
+def test_wezterm_pane_selects_wezterm(monkeypatch: pytest.MonkeyPatch) -> None:
+ sentinel = object()
+ factory = Mock(return_value=sentinel)
+ install_fake_wezterm(monkeypatch, factory)
+ monkeypatch.setenv("WEZTERM_PANE", "66")
+
+ multiplexer = get_multiplexer()
+
+ assert multiplexer is sentinel
+
+
+def test_term_program_wezterm_selects_wezterm(monkeypatch: pytest.MonkeyPatch) -> None:
+ sentinel = object()
+ factory = Mock(return_value=sentinel)
+ install_fake_wezterm(monkeypatch, factory)
+ monkeypatch.setenv("TERM_PROGRAM", "WezTerm")
+
+ multiplexer = get_multiplexer()
+
+ assert multiplexer is sentinel
+
+
+def test_win32_default_selects_wezterm(monkeypatch: pytest.MonkeyPatch) -> None:
+ sentinel = object()
+ factory = Mock(return_value=sentinel)
+ install_fake_wezterm(monkeypatch, factory)
+ monkeypatch.setattr(sys, "platform", "win32")
+
+ multiplexer = get_multiplexer()
+
+ assert multiplexer is sentinel
+
+
+def test_non_windows_default_selects_tmux(monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setattr(sys, "platform", "linux")
+
+ multiplexer = get_multiplexer()
+
+ assert isinstance(multiplexer, TmuxMultiplexer)
+
+
+def test_get_multiplexer_uses_singleton_cache(monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setenv("CAO_MULTIPLEXER", "tmux")
+
+ first = get_multiplexer()
+ second = get_multiplexer()
+
+ assert first is second
+
+
+def test_cache_is_clear_between_tests() -> None:
+ assert get_multiplexer.cache_info().currsize == 0
diff --git a/test/multiplexers/test_tmux_multiplexer.py b/test/multiplexers/test_tmux_multiplexer.py
new file mode 100644
index 000000000..3c75eb4c3
--- /dev/null
+++ b/test/multiplexers/test_tmux_multiplexer.py
@@ -0,0 +1,131 @@
+"""Smoke coverage for the new TmuxMultiplexer home."""
+
+from unittest.mock import MagicMock, call, patch
+
+import pytest
+
+from cli_agent_orchestrator.multiplexers.tmux import TmuxMultiplexer
+
+
+@pytest.fixture
+def tmux():
+ """Create a TmuxMultiplexer with a mocked libtmux.Server."""
+ with patch("cli_agent_orchestrator.multiplexers.tmux.libtmux") as mock_libtmux:
+ mock_server = MagicMock()
+ mock_libtmux.Server.return_value = mock_server
+
+ client = TmuxMultiplexer()
+ client.server = mock_server
+ yield client
+
+
+@pytest.fixture
+def mock_subprocess():
+ with patch("cli_agent_orchestrator.multiplexers.tmux.subprocess") as mock:
+ mock.run.return_value = None
+ yield mock
+
+
+@pytest.fixture
+def mock_uuid():
+ with patch("cli_agent_orchestrator.multiplexers.tmux.uuid") as mock:
+ mock.uuid4.return_value.hex = "abcd1234efgh"
+ yield mock
+
+
+class TestTmuxMultiplexerClient:
+ def test_create_session_success(self, tmux):
+ mock_window = MagicMock()
+ mock_window.name = "my-window"
+ mock_session = MagicMock()
+ mock_session.windows = [mock_window]
+ tmux.server.new_session.return_value = mock_session
+
+ with patch.object(tmux, "_resolve_and_validate_working_directory", return_value="/tmp/work"):
+ result = tmux.create_session("ses", "my-window", "tid1", "/tmp/work")
+
+ assert result == "my-window"
+ tmux.server.new_session.assert_called_once()
+
+ def test_get_history_custom_tail_lines(self, tmux):
+ mock_pane = MagicMock()
+ mock_result = MagicMock()
+ mock_result.stdout = ["line"]
+ mock_pane.cmd.return_value = mock_result
+ mock_window = MagicMock()
+ mock_window.panes = [mock_pane]
+ mock_session = MagicMock()
+ mock_session.windows.get.return_value = mock_window
+ tmux.server.sessions.get.return_value = mock_session
+
+ tmux.get_history("ses", "win", tail_lines=50)
+
+ mock_pane.cmd.assert_called_once_with("capture-pane", "-e", "-p", "-S", "-50")
+
+ def test_pipe_pane_success(self, tmux):
+ mock_pane = MagicMock()
+ mock_window = MagicMock()
+ mock_window.active_pane = mock_pane
+ mock_session = MagicMock()
+ mock_session.windows.get.return_value = mock_window
+ tmux.server.sessions.get.return_value = mock_session
+
+ tmux.pipe_pane("ses", "win", "/tmp/log.txt")
+
+ mock_pane.cmd.assert_called_once_with("pipe-pane", "-o", "cat >> /tmp/log.txt")
+
+
+class TestTmuxMultiplexerSendKeys:
+ def test_basic_message(self, tmux, mock_subprocess, mock_uuid):
+ tmux.send_keys("sess", "win", "hello")
+
+ assert mock_subprocess.run.call_count == 4
+ calls = mock_subprocess.run.call_args_list
+
+ assert calls[0] == call(
+ ["tmux", "load-buffer", "-b", "cao_abcd1234", "-"],
+ input=b"hello",
+ check=True,
+ )
+ assert calls[1] == call(
+ ["tmux", "paste-buffer", "-p", "-b", "cao_abcd1234", "-t", "sess:win"],
+ check=True,
+ )
+ assert calls[2] == call(
+ ["tmux", "send-keys", "-t", "sess:win", "Enter"],
+ check=True,
+ )
+ assert calls[3] == call(
+ ["tmux", "delete-buffer", "-b", "cao_abcd1234"],
+ check=False,
+ )
+
+ def test_buffer_cleanup_on_error(self, tmux, mock_subprocess, mock_uuid):
+ mock_subprocess.run.side_effect = [
+ None,
+ Exception("paste failed"),
+ None,
+ ]
+
+ with pytest.raises(Exception, match="paste failed"):
+ tmux.send_keys("sess", "win", "msg")
+
+ last_call = mock_subprocess.run.call_args_list[-1]
+ assert last_call == call(
+ ["tmux", "delete-buffer", "-b", "cao_abcd1234"],
+ check=False,
+ )
+
+ def test_double_enter(self, tmux, mock_subprocess, mock_uuid):
+ tmux.send_keys("sess", "win", "hello", enter_count=2)
+
+ assert mock_subprocess.run.call_count == 5
+ calls = mock_subprocess.run.call_args_list
+ assert calls[2] == call(
+ ["tmux", "send-keys", "-t", "sess:win", "Enter"],
+ check=True,
+ )
+ assert calls[3] == call(
+ ["tmux", "send-keys", "-t", "sess:win", "Enter"],
+ check=True,
+ )
diff --git a/test/multiplexers/test_wezterm_multiplexer.py b/test/multiplexers/test_wezterm_multiplexer.py
new file mode 100644
index 000000000..92ecea756
--- /dev/null
+++ b/test/multiplexers/test_wezterm_multiplexer.py
@@ -0,0 +1,1094 @@
+"""Unit tests for WezTermMultiplexer — all subprocess calls mocked via runner injection."""
+
+from __future__ import annotations
+
+import subprocess
+import sys
+import threading
+from collections import deque
+from unittest.mock import patch
+
+import pytest
+
+from cli_agent_orchestrator.multiplexers.base import LaunchSpec
+from cli_agent_orchestrator.multiplexers.wezterm import (
+ WezTermMultiplexer,
+ _default_runner,
+ _default_shell,
+ _normalize_wezterm_bin,
+ _ps_single_quote,
+ _resolve_powershell_bin,
+)
+
+
+def _make_result(stdout: str = "", returncode: int = 0) -> subprocess.CompletedProcess[str]:
+ return subprocess.CompletedProcess(
+ args=[],
+ returncode=returncode,
+ stdout=stdout,
+ stderr="",
+ )
+
+
+def _spawn_result(pane_id: str = "42") -> subprocess.CompletedProcess[str]:
+ return _make_result(stdout=f"{pane_id}\n")
+
+
+class FakeRunner:
+ def __init__(self, pane_id: str = "42") -> None:
+ self.pane_id = pane_id
+ self.get_text_queue: deque[str | RuntimeError] = deque()
+ self.calls: list[list[str]] = []
+ self._condition = threading.Condition()
+
+ def __call__(self, argv, env=None):
+ del env
+ call = [str(part) for part in argv]
+ with self._condition:
+ self.calls.append(call)
+
+ if "spawn" in call:
+ return _spawn_result(self.pane_id)
+ if "get-text" in call:
+ with self._condition:
+ if self.get_text_queue:
+ item = self.get_text_queue.popleft()
+ self._condition.notify_all()
+ else:
+ item = ""
+ if isinstance(item, RuntimeError):
+ raise item
+ return _make_result(stdout=item)
+ if "kill-pane" in call or "send-text" in call:
+ return _make_result()
+ return _make_result()
+
+ def queue_responses(self, responses: list[str | RuntimeError]) -> None:
+ with self._condition:
+ self.get_text_queue.extend(responses)
+ self._condition.notify_all()
+
+ def wait_for_queue_drain(self, timeout: float = 1.0) -> bool:
+ def drained() -> bool:
+ return not self.get_text_queue
+
+ with self._condition:
+ return self._condition.wait_for(drained, timeout=timeout)
+
+ def pending_get_text(self) -> int:
+ with self._condition:
+ return len(self.get_text_queue)
+
+
+@pytest.fixture
+def fake_runner() -> FakeRunner:
+ return FakeRunner()
+
+
+@pytest.fixture
+def multiplexer(tmp_path, fake_runner: FakeRunner):
+ mux = WezTermMultiplexer(
+ runner=fake_runner,
+ wezterm_bin="wezterm",
+ poll_interval=0.001,
+ clock_sleep=lambda *_: None,
+ )
+ with patch.object(
+ mux,
+ "_resolve_and_validate_working_directory",
+ return_value=str(tmp_path),
+ ):
+ mux.create_session("sess", "win", "tid", str(tmp_path))
+ yield mux
+ for key in list(mux._pollers):
+ mux.stop_pipe_pane(*key)
+
+
+class TestCreateSession:
+ @pytest.mark.parametrize("platform", ["linux", "win32"])
+ def test_argv_contains_new_window_cwd_and_terminal_id(self, tmp_path, monkeypatch, platform):
+ monkeypatch.setattr(sys, "platform", platform)
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ return _spawn_result("17")
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(
+ mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)
+ ):
+ mux.create_session("ses", "win", "tid-abc", str(tmp_path))
+
+ argv = calls[0]
+ assert argv[0] == "wezterm"
+ assert "cli" in argv
+ assert "spawn" in argv
+ assert "--new-window" in argv
+ assert "--cwd" in argv
+ assert str(tmp_path) in argv
+ assert "--" in argv
+ dash_index = argv.index("--")
+ wrapped = argv[dash_index + 1 :]
+ joined = " ".join(wrapped)
+ if platform == "win32":
+ assert "$env:CAO_TERMINAL_ID='tid-abc'" in joined
+ else:
+ assert "CAO_TERMINAL_ID=tid-abc" in wrapped
+
+ def test_parses_pane_id_from_stdout(self, tmp_path):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result("99"), wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ result = mux.create_session("ses", "win", "tid", str(tmp_path))
+ assert result == "win"
+
+ def test_pane_id_stored_in_registry(self, tmp_path):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result("77"), wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+ assert mux._sessions["ses"]["win"] == "77"
+
+ @pytest.mark.parametrize("platform", ["linux", "win32"])
+ def test_launch_spec_argv_appended_after_double_dash(self, tmp_path, monkeypatch, platform):
+ monkeypatch.setattr(sys, "platform", platform)
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ return _spawn_result("5")
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session(
+ "ses",
+ "win",
+ "tid",
+ str(tmp_path),
+ launch_spec=LaunchSpec(argv=["codex.cmd", "--yolo"]),
+ )
+
+ argv = calls[0]
+ wrapped = argv[argv.index("--") + 1 :]
+ if platform == "win32":
+ command = wrapped[wrapped.index("-Command") + 1]
+ assert _ps_single_quote("codex.cmd") in command
+ assert _ps_single_quote("--yolo") in command
+ else:
+ env_sep = wrapped.index("--")
+ assert wrapped[env_sep + 1 : env_sep + 3] == ["codex.cmd", "--yolo"]
+
+ @pytest.mark.parametrize("platform", ["linux", "win32"])
+ def test_launch_spec_env_passed_through_wrapper(self, tmp_path, monkeypatch, platform):
+ monkeypatch.setattr(sys, "platform", platform)
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ return _spawn_result("5")
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session(
+ "ses",
+ "win",
+ "tid",
+ str(tmp_path),
+ launch_spec=LaunchSpec(env={"FOO": "bar"}),
+ )
+
+ argv = calls[0]
+ wrapped = argv[argv.index("--") + 1 :]
+ joined = " ".join(wrapped)
+ if platform == "win32":
+ assert "$env:FOO='bar'" in joined
+ else:
+ assert "FOO=bar" in wrapped
+
+ @pytest.mark.parametrize("platform", ["linux", "win32"])
+ def test_default_shell_used_when_launch_spec_is_none(self, tmp_path, monkeypatch, platform):
+ monkeypatch.setattr(sys, "platform", platform)
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ return _spawn_result("5")
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path), launch_spec=None)
+
+ argv = calls[0]
+ wrapped = argv[argv.index("--") + 1 :]
+ if platform == "win32":
+ # No-LaunchSpec path on Windows must NOT spawn a child shell.
+ # Single pwsh with -NoExit sets env and stays interactive.
+ assert "-NoExit" in wrapped
+ command = wrapped[wrapped.index("-Command") + 1]
+ assert "$env:CAO_TERMINAL_ID='tid'" in command
+ # No `& 'shell' @args` invocation — only env-set statements.
+ assert "& " not in command
+ assert "@args" not in command
+ else:
+ shell = _default_shell()
+ env_sep = wrapped.index("--")
+ assert wrapped[env_sep + 1 :] == [shell]
+ assert "CAO_TERMINAL_ID=tid" in wrapped
+
+ def test_ps_single_quote_doubles_embedded_single_quote(self):
+ assert _ps_single_quote("it's") == "'it''s'"
+
+ def test_windows_powershell_invocation_shape(self, tmp_path, monkeypatch):
+ monkeypatch.setattr(sys, "platform", "win32")
+ # Pin the resolved PowerShell binary so this test is deterministic
+ # regardless of whether pwsh is installed on the test host.
+ monkeypatch.setenv("CAO_POWERSHELL_BIN", "powershell.exe")
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ return _spawn_result("5")
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session(
+ "ses",
+ "win",
+ "tid",
+ str(tmp_path),
+ launch_spec=LaunchSpec(argv=["codex.cmd", "--yolo"]),
+ )
+
+ wrapped = calls[0][calls[0].index("--") + 1 :]
+ assert wrapped[:4] == ["powershell.exe", "-NoLogo", "-NoProfile", "-Command"]
+ assert "$args=@(" in wrapped[4]
+
+ def test_unix_env_invocation_shape(self, tmp_path, monkeypatch):
+ monkeypatch.setattr(sys, "platform", "linux")
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ return _spawn_result("5")
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+
+ wrapped = calls[0][calls[0].index("--") + 1 :]
+ assert wrapped[:3] == ["env", "CAO_TERMINAL_ID=tid", "--"]
+
+ def test_raises_runtime_error_when_stdout_has_no_pane_id(self, tmp_path):
+ mux = WezTermMultiplexer(
+ runner=lambda argv, env=None: _make_result(stdout="not a number"),
+ wezterm_bin="wezterm",
+ )
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ with pytest.raises(RuntimeError, match="no pane id"):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+
+ def test_raises_runtime_error_when_stdout_is_empty(self, tmp_path):
+ mux = WezTermMultiplexer(
+ runner=lambda argv, env=None: _make_result(stdout=""),
+ wezterm_bin="wezterm",
+ )
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ with pytest.raises(RuntimeError, match="no pane id"):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+
+
+class TestCreateWindow:
+ def test_create_window_stores_pane_in_existing_session(self, tmp_path):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result("55"), wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win1", "tid1", str(tmp_path))
+ mux.create_window("ses", "win2", "tid2", str(tmp_path))
+ assert mux._sessions["ses"]["win2"] == "55"
+
+ def test_create_window_uses_new_window_flag(self, tmp_path):
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ return _spawn_result("10")
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_window("ses", "win", "tid", str(tmp_path))
+
+ assert "--new-window" in calls[0]
+
+
+class TestPasteText:
+ def test_sends_send_text_with_pane_id_and_text(self, tmp_path):
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ return _spawn_result("11")
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+ calls.clear()
+
+ mux._paste_text("ses", "win", "hello world")
+
+ argv = calls[0]
+ assert argv[:3] == ["wezterm", "cli", "send-text"]
+ assert argv[argv.index("--pane-id") + 1] == "11"
+ assert argv[argv.index("--") + 1] == "hello world"
+ assert "--no-paste" not in argv
+
+ def test_raises_when_pane_not_found(self):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result(), wezterm_bin="wezterm")
+ with pytest.raises(KeyError, match="not found"):
+ mux._paste_text("missing_ses", "missing_win", "text")
+
+
+class TestSubmitInput:
+ def test_submit_once_sends_carriage_return_with_no_paste(self, tmp_path):
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ return _spawn_result("22")
+
+ mux = WezTermMultiplexer(
+ runner=runner,
+ wezterm_bin="wezterm",
+ clock_sleep=lambda *_: None,
+ )
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+ calls.clear()
+
+ mux._submit_input("ses", "win", enter_count=1)
+
+ argv = calls[0]
+ assert argv[:3] == ["wezterm", "cli", "send-text"]
+ assert "--no-paste" in argv
+ assert argv[argv.index("--") + 1] == "\r"
+
+ def test_submit_once_sleeps_300ms(self, tmp_path):
+ sleep_calls: list[float] = []
+ mux = WezTermMultiplexer(
+ runner=lambda argv, env=None: _spawn_result("22"),
+ wezterm_bin="wezterm",
+ clock_sleep=lambda duration: sleep_calls.append(duration),
+ )
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+
+ mux._submit_input("ses", "win", enter_count=1)
+
+ assert sleep_calls == [pytest.approx(0.3)]
+
+ def test_submit_three_times_produces_three_enter_calls(self, tmp_path):
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ return _spawn_result("33")
+
+ mux = WezTermMultiplexer(
+ runner=runner,
+ wezterm_bin="wezterm",
+ clock_sleep=lambda *_: None,
+ )
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+ calls.clear()
+
+ mux._submit_input("ses", "win", enter_count=3)
+
+ enter_calls = [argv for argv in calls if "--no-paste" in argv and "\r" in argv]
+ assert len(enter_calls) == 3
+
+ def test_submit_three_times_sleeps_300ms_then_500ms_between(self, tmp_path):
+ sleep_calls: list[float] = []
+ mux = WezTermMultiplexer(
+ runner=lambda argv, env=None: _spawn_result("33"),
+ wezterm_bin="wezterm",
+ clock_sleep=lambda duration: sleep_calls.append(duration),
+ )
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+
+ mux._submit_input("ses", "win", enter_count=3)
+
+ assert sleep_calls == [
+ pytest.approx(0.3),
+ pytest.approx(0.5),
+ pytest.approx(0.5),
+ ]
+
+
+class TestSendKeys:
+ def test_send_keys_calls_paste_then_submit(self, tmp_path):
+ method_calls: list[str] = []
+ mux = WezTermMultiplexer(
+ runner=lambda argv, env=None: _spawn_result("44"),
+ wezterm_bin="wezterm",
+ clock_sleep=lambda *_: None,
+ )
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+
+ original_paste = mux._paste_text
+ original_submit = mux._submit_input
+
+ def record_paste(session_name, window_name, text):
+ method_calls.append("paste")
+ original_paste(session_name, window_name, text)
+
+ def record_submit(session_name, window_name, enter_count=1):
+ method_calls.append("submit")
+ original_submit(session_name, window_name, enter_count=enter_count)
+
+ mux._paste_text = record_paste # type: ignore[method-assign]
+ mux._submit_input = record_submit # type: ignore[method-assign]
+
+ mux.send_keys("ses", "win", "text", enter_count=2)
+
+ assert method_calls == ["paste", "submit"]
+
+
+class TestSendSpecialKey:
+ def test_enter_maps_to_carriage_return_no_paste(self, tmp_path):
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ return _spawn_result("50")
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+ calls.clear()
+
+ mux.send_special_key("ses", "win", "Enter")
+
+ argv = calls[0]
+ assert "--no-paste" in argv
+ assert argv[argv.index("--") + 1] == "\r"
+
+ def test_literal_true_sends_raw_bytes_no_paste(self, tmp_path):
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ return _spawn_result("51")
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+ calls.clear()
+
+ mux.send_special_key("ses", "win", "\x1b[B", literal=True)
+
+ argv = calls[0]
+ assert "--no-paste" in argv
+ assert argv[argv.index("--") + 1] == "\x1b[B"
+
+ def test_tab_maps_to_tab_character(self, tmp_path):
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ return _spawn_result("52")
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+ calls.clear()
+
+ mux.send_special_key("ses", "win", "Tab")
+
+ argv = calls[0]
+ assert argv[argv.index("--") + 1] == "\t"
+
+ def test_up_arrow_maps_to_vt_sequence(self, tmp_path):
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ return _spawn_result("53")
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+ calls.clear()
+
+ mux.send_special_key("ses", "win", "Up")
+
+ argv = calls[0]
+ assert argv[argv.index("--") + 1] == "\x1b[A"
+
+ def test_send_special_key_unknown_name_raises_actionable_keyerror(self, tmp_path):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result("54"), wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+
+ with pytest.raises(KeyError, match="Unknown special key"):
+ mux.send_special_key("ses", "win", "NotAKey")
+
+
+class TestGetHistory:
+ def test_calls_get_text_without_escapes(self, tmp_path):
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ if "spawn" in argv:
+ return _spawn_result("60")
+ return _make_result(stdout="output line\n")
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+ calls.clear()
+
+ result = mux.get_history("ses", "win")
+
+ argv = calls[0]
+ assert argv[:3] == ["wezterm", "cli", "get-text"]
+ assert "--escapes" not in argv
+ assert result == "output line\n"
+
+ def test_tail_lines_returns_last_n_lines(self, tmp_path):
+ content = "\n".join(f"line{i}" for i in range(10))
+
+ def runner(argv, env=None):
+ del env
+ if "spawn" in argv:
+ return _spawn_result("61")
+ return _make_result(stdout=content)
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+
+ result = mux.get_history("ses", "win", tail_lines=5)
+
+ lines = result.splitlines()
+ assert len(lines) == 5
+ assert lines[0] == "line5"
+ assert lines[-1] == "line9"
+
+ def test_raises_when_pane_not_found(self):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result(), wezterm_bin="wezterm")
+ with pytest.raises(KeyError, match="not found"):
+ mux.get_history("missing_ses", "missing_win")
+
+
+class TestKillSession:
+ def test_removes_session_from_registry_and_kills_panes(self, tmp_path):
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ if "spawn" in argv:
+ return _spawn_result("70")
+ return _make_result()
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+ calls.clear()
+
+ result = mux.kill_session("ses")
+
+ assert result is True
+ assert "ses" not in mux._sessions
+ kill_calls = [argv for argv in calls if "kill-pane" in argv]
+ assert len(kill_calls) == 1
+ assert kill_calls[0][kill_calls[0].index("--pane-id") + 1] == "70"
+
+ def test_returns_false_for_nonexistent_session(self):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result(), wezterm_bin="wezterm")
+ assert mux.kill_session("nonexistent") is False
+
+
+class TestKillWindow:
+ def test_removes_window_from_registry(self, tmp_path):
+ calls: list[list[str]] = []
+
+ def runner(argv, env=None):
+ del env
+ calls.append(list(argv))
+ if "spawn" in argv:
+ return _spawn_result("80")
+ return _make_result()
+
+ mux = WezTermMultiplexer(runner=runner, wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+ calls.clear()
+
+ result = mux.kill_window("ses", "win")
+
+ assert result is True
+ assert "win" not in mux._sessions.get("ses", {})
+
+ def test_returns_false_for_nonexistent_window(self, tmp_path):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result("81"), wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+
+ assert mux.kill_window("ses", "no_such_win") is False
+
+
+class TestSessionExists:
+ def test_returns_true_for_registered_session(self, tmp_path):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result("90"), wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("ses", "win", "tid", str(tmp_path))
+ assert mux.session_exists("ses") is True
+
+ def test_returns_false_for_unknown_session(self):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result(), wezterm_bin="wezterm")
+ assert mux.session_exists("nope") is False
+
+
+class TestListSessions:
+ def test_returns_registered_session(self, tmp_path):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result("91"), wezterm_bin="wezterm")
+ with patch.object(mux, "_resolve_and_validate_working_directory", return_value=str(tmp_path)):
+ mux.create_session("my-ses", "win", "tid", str(tmp_path))
+
+ sessions = mux.list_sessions()
+ assert [session["name"] for session in sessions] == ["my-ses"]
+
+ def test_returns_empty_when_no_sessions(self):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result(), wezterm_bin="wezterm")
+ assert mux.list_sessions() == []
+
+
+class TestGetPaneWorkingDirectory:
+ def test_returns_none_for_unknown_window(self):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result(), wezterm_bin="wezterm")
+ assert mux.get_pane_working_directory("ses", "win") is None
+
+
+class TestDiffSnapshot:
+ def test_diff_snapshot_pure_append(self):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result(), wezterm_bin="wezterm")
+ assert mux._diff_snapshot("hello\n", "hello\nworld\n") == "world\n"
+
+ def test_diff_snapshot_line_suffix_overlap(self):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result(), wezterm_bin="wezterm")
+ assert mux._diff_snapshot("a\nb\nc\n", "b\nc\nd\n") == "d\n"
+
+ def test_diff_snapshot_redraw_no_overlap(self):
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result(), wezterm_bin="wezterm")
+ assert mux._diff_snapshot("abc\n", "xyz\n") == "xyz\n"
+
+
+class TestPipePane:
+ def test_pipe_pane_raises_if_pane_not_registered(self, fake_runner: FakeRunner, tmp_path):
+ mux = WezTermMultiplexer(
+ runner=fake_runner,
+ wezterm_bin="wezterm",
+ poll_interval=0.001,
+ clock_sleep=lambda *_: None,
+ )
+
+ with pytest.raises(KeyError, match="pane not found"):
+ mux.pipe_pane("missing", "win", str(tmp_path / "pipe.log"))
+
+ def test_pipe_pane_raises_if_already_running(self, multiplexer, tmp_path):
+ path = tmp_path / "pipe.log"
+ multiplexer.pipe_pane("sess", "win", str(path))
+
+ with pytest.raises(RuntimeError, match="pipe_pane already running for sess:win"):
+ multiplexer.pipe_pane("sess", "win", str(path))
+
+ def test_after_one_tick_with_no_change_file_is_empty(self, multiplexer, fake_runner: FakeRunner, tmp_path):
+ path = tmp_path / "pipe.log"
+ fake_runner.queue_responses([""])
+
+ multiplexer.pipe_pane("sess", "win", str(path))
+ assert fake_runner.wait_for_queue_drain(timeout=1.0)
+ multiplexer.stop_pipe_pane("sess", "win")
+
+ assert path.read_text(encoding="utf-8") == ""
+
+ def test_after_one_tick_with_text_file_contains_text(self, multiplexer, fake_runner: FakeRunner, tmp_path):
+ path = tmp_path / "pipe.log"
+ fake_runner.queue_responses(["hello\n"])
+
+ multiplexer.pipe_pane("sess", "win", str(path))
+ assert fake_runner.wait_for_queue_drain(timeout=1.0)
+ multiplexer.stop_pipe_pane("sess", "win")
+
+ assert path.read_text(encoding="utf-8") == "hello\n"
+
+ def test_pure_append(self, multiplexer, tmp_path, fake_runner: FakeRunner):
+ path = tmp_path / "pipe.log"
+ fake_runner.queue_responses(["hello\n", "hello\nworld\n"])
+
+ multiplexer.pipe_pane("sess", "win", str(path))
+ assert fake_runner.wait_for_queue_drain(timeout=1.0)
+ multiplexer.stop_pipe_pane("sess", "win")
+
+ assert path.read_text(encoding="utf-8") == "hello\nworld\n"
+
+ def test_redraw_appends_full_snapshot_when_no_overlap(
+ self, multiplexer, tmp_path, fake_runner: FakeRunner
+ ):
+ path = tmp_path / "pipe.log"
+ fake_runner.queue_responses(["abc\n", "xyz\n"])
+
+ multiplexer.pipe_pane("sess", "win", str(path))
+ assert fake_runner.wait_for_queue_drain(timeout=1.0)
+ multiplexer.stop_pipe_pane("sess", "win")
+
+ assert path.read_text(encoding="utf-8") == "abc\nxyz\n"
+
+ def test_line_suffix_overlap_appends_only_new_lines(
+ self, multiplexer, tmp_path, fake_runner: FakeRunner
+ ):
+ path = tmp_path / "pipe.log"
+ fake_runner.queue_responses(["a\nb\nc\n", "b\nc\nd\n"])
+
+ multiplexer.pipe_pane("sess", "win", str(path))
+ assert fake_runner.wait_for_queue_drain(timeout=1.0)
+ multiplexer.stop_pipe_pane("sess", "win")
+
+ assert path.read_text(encoding="utf-8") == "a\nb\nc\nd\n"
+
+ def test_pane_disappears_mid_poll_exits_cleanly(
+ self, multiplexer, tmp_path, fake_runner: FakeRunner
+ ):
+ path = tmp_path / "pipe.log"
+ fake_runner.queue_responses(["hello\n", RuntimeError("pane gone")])
+
+ multiplexer.pipe_pane("sess", "win", str(path))
+ assert fake_runner.wait_for_queue_drain(timeout=1.0)
+
+ # After the thread exits due to RuntimeError from _get_pane_text, the
+ # _poll_loop finally-block self-cleans the registry entry.
+ state = multiplexer._pollers.get(("sess", "win"))
+ if state is not None:
+ state.thread.join(timeout=1.0)
+ # After the thread has cleaned itself up, the registry entry is gone.
+ import time as _time
+ deadline = _time.monotonic() + 1.0
+ while _time.monotonic() < deadline and ("sess", "win") in multiplexer._pollers:
+ _time.sleep(0.01)
+
+ assert ("sess", "win") not in multiplexer._pollers
+ assert path.read_text(encoding="utf-8") == "hello\n"
+
+ def test_stop_pipe_pane_cancels_thread_and_prevents_further_writes(
+ self, multiplexer, tmp_path, fake_runner: FakeRunner
+ ):
+ path = tmp_path / "pipe.log"
+ fake_runner.queue_responses(["hello\n"])
+
+ multiplexer.pipe_pane("sess", "win", str(path))
+ assert fake_runner.wait_for_queue_drain(timeout=1.0)
+ multiplexer.stop_pipe_pane("sess", "win")
+
+ fake_runner.queue_responses(["hello\nworld\n"])
+
+ assert path.read_text(encoding="utf-8") == "hello\n"
+ assert fake_runner.pending_get_text() == 1
+
+ def test_stop_pipe_pane_raises_when_no_poller_exists(self, multiplexer):
+ with pytest.raises(RuntimeError, match="pipe_pane not running for sess:win"):
+ multiplexer.stop_pipe_pane("sess", "win")
+
+ def test_kill_session_stops_the_poller_automatically(
+ self, multiplexer, tmp_path, fake_runner: FakeRunner
+ ):
+ path = tmp_path / "pipe.log"
+ fake_runner.queue_responses(["hello\n"])
+
+ multiplexer.pipe_pane("sess", "win", str(path))
+ assert fake_runner.wait_for_queue_drain(timeout=1.0)
+
+ assert multiplexer.kill_session("sess") is True
+ assert ("sess", "win") not in multiplexer._pollers
+ assert path.read_text(encoding="utf-8") == "hello\n"
+
+ def test_kill_window_stops_the_poller_automatically(
+ self, multiplexer, tmp_path, fake_runner: FakeRunner
+ ):
+ path = tmp_path / "pipe.log"
+ fake_runner.queue_responses(["hello\n"])
+
+ multiplexer.pipe_pane("sess", "win", str(path))
+ assert fake_runner.wait_for_queue_drain(timeout=1.0)
+
+ assert multiplexer.kill_window("sess", "win") is True
+ assert ("sess", "win") not in multiplexer._pollers
+ assert path.read_text(encoding="utf-8") == "hello\n"
+
+ def test_stop_pipe_pane_timeout_keeps_zombie_registry_entry(
+ self, multiplexer, tmp_path
+ ):
+ """Zombie poller (join timeout) keeps its registry entry to block double-write.
+
+ When stop_pipe_pane() times out waiting for the thread, the entry must
+ remain so that a subsequent pipe_pane() call raises RuntimeError rather
+ than starting a second thread writing to the same log file concurrently.
+
+ We inject a synthetic _PollerState with a mock thread that reports
+ is_alive()=True after join(), so there are no real threads racing
+ against the registry check.
+ """
+ from unittest.mock import MagicMock as _MagicMock
+ from cli_agent_orchestrator.multiplexers.wezterm import _PollerState
+
+ path = tmp_path / "pipe.log"
+ path.touch()
+
+ # Build a mock thread that always appears alive (join is a no-op).
+ mock_thread = _MagicMock(spec=threading.Thread)
+ mock_thread.is_alive.return_value = True # simulates join timeout
+ mock_thread.join.return_value = None # join returns immediately
+
+ stop_event = threading.Event()
+ state = _PollerState(thread=mock_thread, stop_event=stop_event)
+ key = ("sess", "win")
+ multiplexer._pollers[key] = state
+
+ multiplexer.stop_pipe_pane("sess", "win")
+
+ # Registry entry must still be present (zombie kept in place).
+ assert key in multiplexer._pollers
+
+ # A subsequent pipe_pane call must raise, not start a second thread.
+ with pytest.raises(RuntimeError, match="pipe_pane already running for sess:win"):
+ multiplexer.pipe_pane("sess", "win", str(path))
+
+ # Cleanup: remove the synthetic entry so the fixture teardown doesn't fail.
+ del multiplexer._pollers[key]
+
+ def test_poll_loop_self_cleans_on_pane_disappearing(
+ self, multiplexer, tmp_path, fake_runner: FakeRunner
+ ):
+ """When _get_pane_text raises, _poll_loop's finally block removes the registry entry.
+
+ This verifies that a zombie that eventually exits cleans its own entry
+ so that a new pipe_pane() call can succeed afterward.
+ """
+ path = tmp_path / "pipe.log"
+ # First response writes content; second raises to simulate pane gone.
+ fake_runner.queue_responses(["hello\n", RuntimeError("pane gone")])
+
+ multiplexer.pipe_pane("sess", "win", str(path))
+ assert fake_runner.wait_for_queue_drain(timeout=1.0)
+
+ # Wait for the thread to exit and self-clean.
+ import time as _time
+ deadline = _time.monotonic() + 2.0
+ while _time.monotonic() < deadline and ("sess", "win") in multiplexer._pollers:
+ _time.sleep(0.01)
+
+ assert ("sess", "win") not in multiplexer._pollers
+
+ # Now pipe_pane can be called again without raising.
+ fake_runner.queue_responses(["hello\n"])
+ multiplexer.pipe_pane("sess", "win", str(path))
+ multiplexer.stop_pipe_pane("sess", "win")
+
+
+class TestNormalizeWezTermBin:
+ """``wezterm-gui`` lacks the ``cli`` subcommand — it must be rewritten
+ to its CLI sibling at construction time so users who set
+ ``WEZTERM_EXECUTABLE`` to the GUI binary still get a working multiplexer.
+ """
+
+ def test_bare_gui_exe_rewritten(self):
+ assert _normalize_wezterm_bin("wezterm-gui.exe") == "wezterm.exe"
+
+ def test_bare_gui_unix_rewritten(self):
+ assert _normalize_wezterm_bin("wezterm-gui") == "wezterm"
+
+ def test_windows_path_rewritten_preserving_directory(self):
+ from pathlib import Path
+
+ result = _normalize_wezterm_bin(r"C:\Tools\WezTerm\wezterm-gui.exe")
+ assert Path(result) == Path(r"C:\Tools\WezTerm\wezterm.exe")
+
+ def test_unix_path_rewritten_preserving_directory(self):
+ from pathlib import Path
+
+ result = _normalize_wezterm_bin("/usr/local/bin/wezterm-gui")
+ assert Path(result) == Path("/usr/local/bin/wezterm")
+
+ def test_match_is_case_insensitive(self):
+ from pathlib import Path
+
+ # Windows users may have mixed-case file names
+ result = _normalize_wezterm_bin("WEZTERM-GUI.EXE")
+ # Output is the canonical lowercase form; FS lookup is case-insensitive on win32
+ assert Path(result).name.lower() == "wezterm.exe"
+
+ def test_already_cli_binary_unchanged(self):
+ assert _normalize_wezterm_bin("wezterm.exe") == "wezterm.exe"
+ assert _normalize_wezterm_bin("wezterm") == "wezterm"
+ assert (
+ _normalize_wezterm_bin(r"C:\Tools\WezTerm\wezterm.exe")
+ == r"C:\Tools\WezTerm\wezterm.exe"
+ )
+
+ def test_unrelated_binary_unchanged(self):
+ assert _normalize_wezterm_bin("/opt/bin/something-else") == "/opt/bin/something-else"
+
+ def test_mux_server_binary_unchanged(self):
+ # wezterm-mux-server is a distinct binary, not the GUI — leave it alone.
+ from pathlib import Path
+
+ result = _normalize_wezterm_bin(r"C:\Tools\WezTerm\wezterm-mux-server.exe")
+ assert Path(result) == Path(r"C:\Tools\WezTerm\wezterm-mux-server.exe")
+
+ def test_constructor_normalizes_explicit_wezterm_bin(self):
+ from pathlib import Path
+
+ mux = WezTermMultiplexer(
+ runner=lambda argv, env=None: _spawn_result(),
+ wezterm_bin=r"C:\Tools\WezTerm\wezterm-gui.exe",
+ )
+ assert Path(mux._bin) == Path(r"C:\Tools\WezTerm\wezterm.exe")
+
+ def test_constructor_normalizes_env_var(self, monkeypatch):
+ from pathlib import Path
+
+ monkeypatch.setenv(
+ "WEZTERM_EXECUTABLE", r"C:\Tools\WezTerm\wezterm-gui.exe"
+ )
+ mux = WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result())
+ assert Path(mux._bin) == Path(r"C:\Tools\WezTerm\wezterm.exe")
+
+ def test_constructor_logs_warning_on_rewrite(self, monkeypatch, caplog):
+ import logging
+
+ monkeypatch.setenv(
+ "WEZTERM_EXECUTABLE", r"C:\Tools\WezTerm\wezterm-gui.exe"
+ )
+ with caplog.at_level(logging.WARNING, logger="cli_agent_orchestrator.multiplexers.wezterm"):
+ WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result())
+ assert any("GUI binary" in rec.message for rec in caplog.records)
+
+ def test_constructor_no_warning_when_already_cli(self, monkeypatch, caplog):
+ import logging
+
+ monkeypatch.setenv("WEZTERM_EXECUTABLE", "wezterm.exe")
+ with caplog.at_level(logging.WARNING, logger="cli_agent_orchestrator.multiplexers.wezterm"):
+ WezTermMultiplexer(runner=lambda argv, env=None: _spawn_result())
+ assert not any("GUI binary" in rec.message for rec in caplog.records)
+
+
+class TestDefaultRunner:
+ """The default subprocess runner must force UTF-8 decoding so wezterm
+ output (UTF-8 by default) does not crash subprocess._readerthread when
+ Python's locale codepage is cp1252 (the Windows default)."""
+
+ def test_runner_uses_utf8_with_replace_errors(self, monkeypatch):
+ captured: dict = {}
+
+ def fake_run(args, **kwargs):
+ captured["args"] = args
+ captured.update(kwargs)
+ return _make_result(stdout="ok\n")
+
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.multiplexers.wezterm.subprocess.run", fake_run
+ )
+ _default_runner(["wezterm", "cli", "list"])
+
+ assert captured["text"] is True
+ assert captured["encoding"] == "utf-8"
+ assert captured["errors"] == "replace"
+ assert captured["capture_output"] is True
+ assert captured["check"] is False
+ assert captured["args"] == ["wezterm", "cli", "list"]
+
+ def test_runner_survives_non_cp1252_bytes_end_to_end(self):
+ """Integration: spawn a real subprocess that writes raw 0x9d (the byte
+ that crashed cao-server) and verify the runner decodes successfully."""
+ # 0x9d is unmapped in cp1252; with text=True and locale encoding it
+ # would raise UnicodeDecodeError. With UTF-8 + replace it becomes U+FFFD.
+ script = (
+ "import sys; sys.stdout.buffer.write(b'pre\\x9dpost'); sys.stdout.flush()"
+ )
+ result = _default_runner([sys.executable, "-c", script])
+ assert result.returncode == 0
+ assert result.stdout.startswith("pre")
+ assert result.stdout.endswith("post")
+
+
+class TestResolvePowerShellBin:
+ """The env-injection wrapper on Windows must prefer pwsh.exe (UTF-8)
+ over powershell.exe (cp1252-bound on non-English Windows)."""
+
+ def test_explicit_override_wins(self, monkeypatch):
+ monkeypatch.setenv("CAO_POWERSHELL_BIN", r"C:\custom\my-pwsh.exe")
+ assert _resolve_powershell_bin() == r"C:\custom\my-pwsh.exe"
+
+ def test_override_takes_precedence_over_pwsh_on_path(self, monkeypatch):
+ monkeypatch.setenv("CAO_POWERSHELL_BIN", "forced.exe")
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.multiplexers.wezterm.shutil.which",
+ lambda name: r"C:\Program Files\PowerShell\7\pwsh.exe",
+ )
+ assert _resolve_powershell_bin() == "forced.exe"
+
+ def test_prefers_pwsh_when_available(self, monkeypatch):
+ monkeypatch.delenv("CAO_POWERSHELL_BIN", raising=False)
+
+ def fake_which(name: str):
+ if name in ("pwsh", "pwsh.exe"):
+ return r"C:\Program Files\PowerShell\7\pwsh.exe"
+ return None
+
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.multiplexers.wezterm.shutil.which", fake_which
+ )
+ assert _resolve_powershell_bin() == r"C:\Program Files\PowerShell\7\pwsh.exe"
+
+ def test_falls_back_to_windows_powershell_when_pwsh_missing(self, monkeypatch):
+ monkeypatch.delenv("CAO_POWERSHELL_BIN", raising=False)
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.multiplexers.wezterm.shutil.which", lambda _name: None
+ )
+ assert _resolve_powershell_bin() == "powershell.exe"
+
+ def test_wrap_with_env_uses_resolved_bin_on_win32(self, monkeypatch):
+ from cli_agent_orchestrator.multiplexers.wezterm import _wrap_with_env
+
+ monkeypatch.setattr(sys, "platform", "win32")
+ monkeypatch.setenv("CAO_POWERSHELL_BIN", "my-pwsh.exe")
+ wrapped = _wrap_with_env({"CAO_TERMINAL_ID": "tid"}, ["claude.cmd"])
+ assert wrapped[0] == "my-pwsh.exe"
+ assert wrapped[1:4] == ["-NoLogo", "-NoProfile", "-Command"]
+
+ def test_default_shell_on_win32_is_powershell_not_cmd(self, monkeypatch):
+ """Regression: COMSPEC (cmd.exe) must NOT be the pane's inner shell.
+ The PowerShell wrapper exists only to inject env vars; the inner
+ shell the agent sees should be pwsh/powershell so the user isn't
+ dropped into cmd.exe."""
+ monkeypatch.setattr(sys, "platform", "win32")
+ monkeypatch.setenv("COMSPEC", r"C:\Windows\System32\cmd.exe")
+ monkeypatch.setenv("CAO_POWERSHELL_BIN", "pwsh.exe")
+ assert _default_shell() == "pwsh.exe"
+ assert "cmd.exe" not in _default_shell().lower()
diff --git a/test/providers/test_claude_code_coverage.py b/test/providers/test_claude_code_coverage.py
index 5bc35c516..77446e2ab 100644
--- a/test/providers/test_claude_code_coverage.py
+++ b/test/providers/test_claude_code_coverage.py
@@ -11,14 +11,24 @@
import pytest
+def _make_get_multiplexer_patch(mock_mux):
+ """Return a callable that get_multiplexer() calls resolve to mock_mux."""
+ return lambda: mock_mux
+
+
@pytest.fixture
-def provider():
- """Create a ClaudeCodeProvider with mocked dependencies."""
- with patch("cli_agent_orchestrator.providers.claude_code.tmux_client"):
- from cli_agent_orchestrator.providers.claude_code import ClaudeCodeProvider
+def provider(monkeypatch):
+ """Create a ClaudeCodeProvider with mocked multiplexer."""
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ from cli_agent_orchestrator.providers.claude_code import ClaudeCodeProvider
- p = ClaudeCodeProvider("tid1", "ses", "win", "test-agent")
- yield p
+ p = ClaudeCodeProvider("tid1", "ses", "win", "test-agent")
+ p._mock_mux = mock_mux
+ yield p
class TestBuildCommandMcpServerModelDump:
@@ -50,53 +60,46 @@ def test_mcp_server_with_model_dump(self, mock_load, provider):
class TestHandleStartupPromptsBranches:
"""Test _handle_startup_prompts branches."""
- @patch("cli_agent_orchestrator.providers.claude_code.subprocess")
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_bypass_permissions_prompt(self, mock_tmux, mock_subprocess, provider):
+ def test_bypass_permissions_prompt(self, provider):
"""Detects bypass permissions prompt and sends Down + Enter."""
- mock_tmux.get_history.return_value = (
+ provider._mock_mux.get_history.return_value = (
"⚠ Bypass Permissions mode\n" "1. No, exit\n" "2. Yes, I accept\n"
)
provider._handle_startup_prompts(timeout=1.0)
- # Should have called subprocess.run twice (Down arrow + Enter)
- assert mock_subprocess.run.call_count == 2
+ calls = provider._mock_mux.send_special_key.call_args_list
+ assert len(calls) == 2
+ assert calls[0].args == ("ses", "win", "\x1b[B")
+ assert calls[0].kwargs == {"literal": True}
+ assert calls[1].args == ("ses", "win", "Enter")
+ assert calls[1].kwargs == {}
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_idle_prompt_detected_early_return(self, mock_tmux, provider):
+ def test_idle_prompt_detected_early_return(self, provider):
"""When idle prompt is visible, returns immediately without sending keys."""
from cli_agent_orchestrator.providers.claude_code import IDLE_PROMPT_PATTERN
- mock_tmux.get_history.return_value = "❯ "
+ provider._mock_mux.get_history.return_value = "❯ "
provider._handle_startup_prompts(timeout=1.0)
# No exception means early return worked
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_welcome_banner_detected_early_return(self, mock_tmux, provider):
+ def test_welcome_banner_detected_early_return(self, provider):
"""When welcome banner is visible, returns immediately."""
- mock_tmux.get_history.return_value = "Welcome to Claude Code v2.5.0"
+ provider._mock_mux.get_history.return_value = "Welcome to Claude Code v2.5.0"
provider._handle_startup_prompts(timeout=1.0)
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_trust_prompt_detected(self, mock_tmux, provider):
+ def test_trust_prompt_detected(self, provider):
"""Trust prompt sends Enter to accept."""
- mock_tmux.get_history.return_value = (
+ provider._mock_mux.get_history.return_value = (
"Do you trust the files in this folder?\n" "❯ Yes, I trust this folder"
)
- mock_pane = MagicMock()
- mock_window = MagicMock()
- mock_window.active_pane = mock_pane
- mock_session = MagicMock()
- mock_session.windows.get.return_value = mock_window
- mock_tmux.server.sessions.get.return_value = mock_session
provider._handle_startup_prompts(timeout=1.0)
- mock_pane.send_keys.assert_called_once_with("", enter=True)
+ provider._mock_mux.send_special_key.assert_called_once_with("ses", "win", "Enter")
class TestDatabaseListAllTerminals:
diff --git a/test/providers/test_claude_code_unit.py b/test/providers/test_claude_code_unit.py
index 7e6bde63f..7bfbf26d2 100644
--- a/test/providers/test_claude_code_unit.py
+++ b/test/providers/test_claude_code_unit.py
@@ -14,19 +14,28 @@
_PATCH_SETTINGS = patch.object(ClaudeCodeProvider, "_ensure_skip_bypass_prompt_setting")
+def _make_get_multiplexer_patch(mock_mux):
+ """Return a callable that get_multiplexer() calls will resolve to mock_mux."""
+ return lambda: mock_mux
+
+
class TestClaudeCodeProviderInitialization:
"""Tests for ClaudeCodeProvider initialization."""
@_PATCH_SETTINGS
@patch("cli_agent_orchestrator.providers.claude_code.wait_for_shell")
@patch("cli_agent_orchestrator.providers.claude_code.wait_until_status")
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_initialize_success(self, mock_tmux, mock_wait_status, mock_wait_shell, _):
+ def test_initialize_success(self, mock_wait_status, mock_wait_shell, _, monkeypatch):
"""Test successful initialization."""
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
mock_wait_shell.return_value = True
mock_wait_status.return_value = True
# First call is the pre-launch snapshot, subsequent calls return Claude output
- mock_tmux.get_history.side_effect = [
+ mock_mux.get_history.side_effect = [
"",
"Welcome to Claude Code v2.0",
"Welcome to Claude Code v2.0",
@@ -39,12 +48,16 @@ def test_initialize_success(self, mock_tmux, mock_wait_status, mock_wait_shell,
assert result is True
assert provider._initialized is True
mock_wait_shell.assert_called_once()
- mock_tmux.send_keys.assert_called_once()
+ mock_mux.send_keys.assert_called_once()
@patch("cli_agent_orchestrator.providers.claude_code.wait_for_shell")
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_initialize_shell_timeout(self, mock_tmux, mock_wait_shell):
+ def test_initialize_shell_timeout(self, mock_wait_shell, monkeypatch):
"""Test initialization with shell timeout."""
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
mock_wait_shell.return_value = False
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
@@ -55,13 +68,17 @@ def test_initialize_shell_timeout(self, mock_tmux, mock_wait_shell):
@_PATCH_SETTINGS
@patch("cli_agent_orchestrator.providers.claude_code.wait_for_shell")
@patch("cli_agent_orchestrator.providers.claude_code.wait_until_status")
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_initialize_timeout(self, mock_tmux, mock_wait_status, mock_wait_shell, _):
+ def test_initialize_timeout(self, mock_wait_status, mock_wait_shell, _, monkeypatch):
"""Test initialization timeout when no Claude markers appear."""
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
mock_wait_shell.return_value = True
mock_wait_status.return_value = False
# Snapshot and loop return the same content → no new Claude markers
- mock_tmux.get_history.return_value = "some shell output"
+ mock_mux.get_history.return_value = "some shell output"
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
@@ -77,14 +94,18 @@ def test_initialize_timeout(self, mock_tmux, mock_wait_status, mock_wait_shell,
@patch("cli_agent_orchestrator.providers.claude_code.load_agent_profile")
@patch("cli_agent_orchestrator.providers.claude_code.wait_for_shell")
@patch("cli_agent_orchestrator.providers.claude_code.wait_until_status")
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
def test_initialize_with_agent_profile(
- self, mock_tmux, mock_wait_status, mock_wait_shell, mock_load, _
+ self, mock_wait_status, mock_wait_shell, mock_load, _, monkeypatch
):
"""Test initialization with agent profile."""
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
mock_wait_shell.return_value = True
mock_wait_status.return_value = True
- mock_tmux.get_history.side_effect = [
+ mock_mux.get_history.side_effect = [
"",
"Welcome to Claude Code v2.0",
"Welcome to Claude Code v2.0",
@@ -105,9 +126,13 @@ def test_initialize_with_agent_profile(
@_PATCH_SETTINGS
@patch("cli_agent_orchestrator.providers.claude_code.wait_for_shell")
@patch("cli_agent_orchestrator.providers.claude_code.load_agent_profile")
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_initialize_with_invalid_agent_profile(self, mock_tmux, mock_load, mock_wait_shell, _):
+ def test_initialize_with_invalid_agent_profile(self, mock_load, mock_wait_shell, _, monkeypatch):
"""Test initialization with invalid agent profile."""
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
mock_wait_shell.return_value = True
mock_load.side_effect = FileNotFoundError("Profile not found")
@@ -120,14 +145,18 @@ def test_initialize_with_invalid_agent_profile(self, mock_tmux, mock_load, mock_
@patch("cli_agent_orchestrator.providers.claude_code.load_agent_profile")
@patch("cli_agent_orchestrator.providers.claude_code.wait_for_shell")
@patch("cli_agent_orchestrator.providers.claude_code.wait_until_status")
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
def test_initialize_with_mcp_servers(
- self, mock_tmux, mock_wait_status, mock_wait_shell, mock_load, _
+ self, mock_wait_status, mock_wait_shell, mock_load, _, monkeypatch
):
"""Test initialization with MCP servers in profile."""
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
mock_wait_shell.return_value = True
mock_wait_status.return_value = True
- mock_tmux.get_history.side_effect = [
+ mock_mux.get_history.side_effect = [
"",
"Welcome to Claude Code v2.0",
"Welcome to Claude Code v2.0",
@@ -147,12 +176,16 @@ def test_initialize_with_mcp_servers(
@_PATCH_SETTINGS
@patch("cli_agent_orchestrator.providers.claude_code.wait_for_shell")
@patch("cli_agent_orchestrator.providers.claude_code.wait_until_status")
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_initialize_sends_claude_command(self, mock_tmux, mock_wait_status, mock_wait_shell, _):
- """Test that initialize sends the 'claude' command to tmux."""
+ def test_initialize_sends_claude_command(self, mock_wait_status, mock_wait_shell, _, monkeypatch):
+ """Test that initialize sends the 'claude' command to the multiplexer."""
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
mock_wait_shell.return_value = True
mock_wait_status.return_value = True
- mock_tmux.get_history.side_effect = [
+ mock_mux.get_history.side_effect = [
"",
"Welcome to Claude Code v2.0",
"Welcome to Claude Code v2.0",
@@ -162,7 +195,7 @@ def test_initialize_sends_claude_command(self, mock_tmux, mock_wait_status, mock
with patch.object(provider, "get_status", return_value=TerminalStatus.IDLE):
provider.initialize()
- call_args = mock_tmux.send_keys.call_args
+ call_args = mock_mux.send_keys.call_args
assert call_args[0][0] == "test-session"
assert call_args[0][1] == "window-0"
assert "claude --dangerously-skip-permissions" in call_args[0][2]
@@ -171,30 +204,42 @@ def test_initialize_sends_claude_command(self, mock_tmux, mock_wait_status, mock
class TestClaudeCodeProviderStatusDetection:
"""Tests for ClaudeCodeProvider status detection."""
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_idle_old_prompt(self, mock_tmux):
+ def test_get_status_idle_old_prompt(self, monkeypatch):
"""Test IDLE status detection with old '>' prompt."""
- mock_tmux.get_history.return_value = "> "
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "> "
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.IDLE
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_idle_new_prompt(self, mock_tmux):
+ def test_get_status_idle_new_prompt(self, monkeypatch):
"""Test IDLE status detection with new '❯' prompt."""
- mock_tmux.get_history.return_value = "❯ "
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "❯ "
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.IDLE
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_idle_with_ansi_codes(self, mock_tmux):
+ def test_get_status_idle_with_ansi_codes(self, monkeypatch):
"""Test IDLE status detection with ANSI codes around prompt."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"\x1b[2m\x1b[38;2;136;136;136m────────────\n"
'\x1b[0m❯ \x1b[7mT\x1b[0;2mry\x1b[0m \x1b[2m"hello"\x1b[0m\n'
"\x1b[2m\x1b[38;2;136;136;136m────────────\x1b[0m"
@@ -205,50 +250,70 @@ def test_get_status_idle_with_ansi_codes(self, mock_tmux):
assert status == TerminalStatus.IDLE
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_completed(self, mock_tmux):
+ def test_get_status_completed(self, monkeypatch):
"""Test COMPLETED status detection."""
- mock_tmux.get_history.return_value = "⏺ Here is the response\n> "
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "⏺ Here is the response\n> "
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_completed_with_new_prompt(self, mock_tmux):
+ def test_get_status_completed_with_new_prompt(self, monkeypatch):
"""Test COMPLETED status detection with new '❯' prompt."""
- mock_tmux.get_history.return_value = "⏺ Here is the response\n❯ "
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "⏺ Here is the response\n❯ "
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_processing(self, mock_tmux):
+ def test_get_status_processing(self, monkeypatch):
"""Test PROCESSING status detection."""
- mock_tmux.get_history.return_value = "✶ Processing… (esc to interrupt)"
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "✶ Processing… (esc to interrupt)"
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.PROCESSING
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_processing_minimal_spinner(self, mock_tmux):
+ def test_get_status_processing_minimal_spinner(self, monkeypatch):
"""Test PROCESSING detection with minimal spinner format (no parenthesized text)."""
- mock_tmux.get_history.return_value = "✻ Orbiting…"
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "✻ Orbiting…"
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.PROCESSING
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_processing_beats_stale_completed(self, mock_tmux):
+ def test_get_status_processing_beats_stale_completed(self, monkeypatch):
"""Test that PROCESSING is detected even when stale ⏺ and ❯ markers are in scrollback."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"⏺ Previous response from init\n"
"❯ user task message\n"
"⏺ Let me read the file\n"
@@ -260,10 +325,14 @@ def test_get_status_processing_beats_stale_completed(self, mock_tmux):
assert status == TerminalStatus.PROCESSING
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_completed_despite_stale_spinner_in_scrollback(self, mock_tmux):
+ def test_get_status_completed_despite_stale_spinner_in_scrollback(self, monkeypatch):
"""Stale spinner in scrollback must not block COMPLETED detection (#104)."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"✻ Orbiting…\n"
"⏺ Previous response\n"
"❯ user sent new task\n"
@@ -274,20 +343,28 @@ def test_get_status_completed_despite_stale_spinner_in_scrollback(self, mock_tmu
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
assert provider.get_status() == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_idle_despite_stale_spinner_in_scrollback(self, mock_tmux):
+ def test_get_status_idle_despite_stale_spinner_in_scrollback(self, monkeypatch):
"""Stale spinner in scrollback must not block IDLE detection (#104)."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"✶ Processing… (esc to interrupt)\n" "Some previous output\n" "❯ "
)
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
assert provider.get_status() == TerminalStatus.IDLE
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_processing_spinner_before_separator(self, mock_tmux):
+ def test_get_status_processing_spinner_before_separator(self, monkeypatch):
"""Spinner immediately before ──────── separator → PROCESSING (structural check)."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"❯ do the task\n"
"⏺ Let me read the file\n"
"✢ Thinking…\n"
@@ -298,19 +375,27 @@ def test_get_status_processing_spinner_before_separator(self, mock_tmux):
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
assert provider.get_status() == TerminalStatus.PROCESSING
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_completed_no_spinner_before_separator(self, mock_tmux):
+ def test_get_status_completed_no_spinner_before_separator(self, monkeypatch):
"""Response text (no spinner) before separator → COMPLETED, not PROCESSING."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"❯ do the task\n" "⏺ Here is the completed response\n" "────────────────────────\n" "❯ "
)
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
assert provider.get_status() == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_stale_spinner_far_back_not_processing(self, mock_tmux):
+ def test_get_status_stale_spinner_far_back_not_processing(self, monkeypatch):
"""Stale spinner far back in scrollback + current separator with no spinner → COMPLETED."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"✢ Thinking…\n"
"⏺ Old response from first task line 1\n"
"Old response from first task line 2\n"
@@ -325,17 +410,25 @@ def test_get_status_stale_spinner_far_back_not_processing(self, mock_tmux):
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
assert provider.get_status() == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_processing_no_separator_yet(self, mock_tmux):
+ def test_get_status_processing_no_separator_yet(self, monkeypatch):
"""Early execution with spinner but no separator yet → position fallback PROCESSING."""
- mock_tmux.get_history.return_value = "✻ Orbiting…"
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "✻ Orbiting…"
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
assert provider.get_status() == TerminalStatus.PROCESSING
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_processing_ansi_separator(self, mock_tmux):
+ def test_get_status_processing_ansi_separator(self, monkeypatch):
"""Spinner before separator with ANSI colour codes on separator → PROCESSING."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"❯ do the task\n"
"⏺ Reading file…\n"
"✽ Cooking…\n"
@@ -346,19 +439,27 @@ def test_get_status_processing_ansi_separator(self, mock_tmux):
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
assert provider.get_status() == TerminalStatus.PROCESSING
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_processing_middle_dot_spinner(self, mock_tmux):
+ def test_get_status_processing_middle_dot_spinner(self, monkeypatch):
"""New · Swirling… spinner variant → PROCESSING via structural check."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"❯ do the task\n" "· Swirling…\n" "\n" "────────────────────────\n" "❯ "
)
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
assert provider.get_status() == TerminalStatus.PROCESSING
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_idle_not_false_processing_from_status_bar(self, mock_tmux):
+ def test_get_status_idle_not_false_processing_from_status_bar(self, monkeypatch):
"""Status bar '· latest:…' must not false-positive as PROCESSING."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"Claude Code v2.1.63\n"
"────────────────────\n"
"❯ \n"
@@ -368,10 +469,14 @@ def test_get_status_idle_not_false_processing_from_status_bar(self, mock_tmux):
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
assert provider.get_status() == TerminalStatus.IDLE
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_waiting_user_answer(self, mock_tmux):
+ def test_get_status_waiting_user_answer(self, monkeypatch):
"""Test WAITING_USER_ANSWER status detection."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"❯ 1. Option one\n"
" 2. Option two\n"
"Enter to select · ↑/↓ to navigate · Esc to cancel"
@@ -382,10 +487,14 @@ def test_get_status_waiting_user_answer(self, mock_tmux):
assert status == TerminalStatus.WAITING_USER_ANSWER
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_stale_scrollback_not_waiting_user_answer(self, mock_tmux):
+ def test_get_status_stale_scrollback_not_waiting_user_answer(self, monkeypatch):
"""Stale numbered scrollback without the active footer must not block input."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"❯ 1. Option one\n" " 2. Option two\n" "⏺ Selection handled earlier\n" "❯ "
)
@@ -395,40 +504,56 @@ def test_get_status_stale_scrollback_not_waiting_user_answer(self, mock_tmux):
assert status != TerminalStatus.WAITING_USER_ANSWER
assert status == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_error_empty(self, mock_tmux):
+ def test_get_status_error_empty(self, monkeypatch):
"""Test ERROR status with empty output."""
- mock_tmux.get_history.return_value = ""
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = ""
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.ERROR
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_error_unrecognized(self, mock_tmux):
+ def test_get_status_error_unrecognized(self, monkeypatch):
"""Test ERROR status with unrecognized output."""
- mock_tmux.get_history.return_value = "Some random output without patterns"
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "Some random output without patterns"
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.ERROR
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_with_tail_lines(self, mock_tmux):
+ def test_get_status_with_tail_lines(self, monkeypatch):
"""Test status detection with tail_lines parameter."""
- mock_tmux.get_history.return_value = "> "
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "> "
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
provider.get_status(tail_lines=50)
- mock_tmux.get_history.assert_called_with("test-session", "window-0", tail_lines=50)
+ mock_mux.get_history.assert_called_with("test-session", "window-0", tail_lines=50)
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_completed_after_compaction_not_false_processing(self, mock_tmux):
+ def test_get_status_completed_after_compaction_not_false_processing(self, monkeypatch):
"""Compaction spinner before its own separator, then more output; last sep has no spinner → COMPLETED."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"❯ do the task\n"
"⏺ Starting work…\n"
"✢ Compacting conversation…\n"
@@ -440,10 +565,14 @@ def test_get_status_completed_after_compaction_not_false_processing(self, mock_t
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
assert provider.get_status() == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_processing_after_compaction_when_still_running(self, mock_tmux):
+ def test_get_status_processing_after_compaction_when_still_running(self, monkeypatch):
"""Spinner before the last separator (agent resumes after compaction) → PROCESSING."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"❯ do the task\n"
"✢ Compacting conversation…\n"
"────────────────────────\n"
@@ -455,10 +584,14 @@ def test_get_status_processing_after_compaction_when_still_running(self, mock_tm
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
assert provider.get_status() == TerminalStatus.PROCESSING
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_completed_after_exit_not_false_processing(self, mock_tmux):
+ def test_get_status_completed_after_exit_not_false_processing(self, monkeypatch):
"""Spinner → sep (task done) → /exit → second sep; spinner NOT before last sep → not PROCESSING."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"❯ do the task\n"
"⏺ Working on it…\n"
"✻ Orbiting…\n"
@@ -691,72 +824,79 @@ def test_build_command_omits_model_when_unset(self, mock_load):
class TestClaudeCodeProviderStartupPrompts:
"""Tests for Claude Code startup prompt handling (trust + bypass)."""
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_handle_startup_prompts_detected_and_accepted(self, mock_tmux):
+ def test_handle_startup_prompts_detected_and_accepted(self, monkeypatch):
"""Test that trust prompt is detected and auto-accepted."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"\x1b[1m❯\x1b[0m 1. Yes, I trust this folder\n 2. No, don't trust\n"
)
- mock_session = MagicMock()
- mock_window = MagicMock()
- mock_pane = MagicMock()
- mock_tmux.server.sessions.get.return_value = mock_session
- mock_session.windows.get.return_value = mock_window
- mock_window.active_pane = mock_pane
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
provider._handle_startup_prompts(timeout=2.0)
- mock_pane.send_keys.assert_called_once_with("", enter=True)
+ mock_mux.send_special_key.assert_called_once_with("test-session", "window-0", "Enter")
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_handle_startup_prompts_not_needed(self, mock_tmux):
+ def test_handle_startup_prompts_not_needed(self, monkeypatch):
"""Test early return when Claude Code starts without prompts."""
- mock_tmux.get_history.return_value = "Welcome to Claude Code v2.1.0"
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "Welcome to Claude Code v2.1.0"
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
provider._handle_startup_prompts(timeout=2.0)
- mock_tmux.server.sessions.get.assert_not_called()
+ mock_mux.send_special_key.assert_not_called()
- @patch("cli_agent_orchestrator.providers.claude_code.time")
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_handle_startup_prompts_timeout(self, mock_tmux, mock_time):
+ def test_handle_startup_prompts_timeout(self, monkeypatch):
"""Test startup prompt handler times out gracefully."""
- mock_tmux.get_history.return_value = "Loading..."
- mock_time.time.side_effect = [0.0, 0.0, 25.0]
- mock_time.sleep = MagicMock()
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ # Return output that matches no known prompt pattern so the loop runs
+ # until the timeout. Use a very short timeout so the test is fast.
+ mock_mux.get_history.return_value = "Loading..."
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
- provider._handle_startup_prompts(timeout=20.0)
+ # Use a real tiny timeout — loop will exhaust immediately.
+ provider._handle_startup_prompts(timeout=0.01)
- mock_tmux.server.sessions.get.assert_not_called()
+ mock_mux.send_special_key.assert_not_called()
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_handle_startup_prompts_empty_output_then_detected(self, mock_tmux):
+ def test_handle_startup_prompts_empty_output_then_detected(self, monkeypatch):
"""Test trust prompt detection after initially empty output."""
- mock_tmux.get_history.side_effect = [
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.side_effect = [
"",
"❯ 1. Yes, I trust this folder\n 2. No",
]
- mock_session = MagicMock()
- mock_window = MagicMock()
- mock_pane = MagicMock()
- mock_tmux.server.sessions.get.return_value = mock_session
- mock_session.windows.get.return_value = mock_window
- mock_window.active_pane = mock_pane
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
provider._handle_startup_prompts(timeout=5.0)
- mock_pane.send_keys.assert_called_once_with("", enter=True)
+ mock_mux.send_special_key.assert_called_once_with("test-session", "window-0", "Enter")
- @patch("cli_agent_orchestrator.providers.claude_code.subprocess")
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_handle_bypass_prompt_detected_and_accepted(self, mock_tmux, mock_subprocess):
+ def test_handle_bypass_prompt_detected_and_accepted(self, monkeypatch):
"""Test that bypass permissions prompt is detected and auto-accepted."""
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
# First poll: bypass prompt; second poll: welcome banner (after dismissal)
- mock_tmux.get_history.side_effect = [
+ mock_mux.get_history.side_effect = [
"WARNING: Claude Code running in Bypass Permissions mode\n"
"❯ 1. No, exit\n 2. Yes, I accept\n",
"Welcome to Claude Code v2.1.74",
@@ -765,58 +905,46 @@ def test_handle_bypass_prompt_detected_and_accepted(self, mock_tmux, mock_subpro
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
provider._handle_startup_prompts(timeout=5.0)
- # Verify raw Down arrow escape sequence + Enter was sent via subprocess
- calls = mock_subprocess.run.call_args_list
+ calls = mock_mux.send_special_key.call_args_list
assert len(calls) == 2
- assert calls[0].args[0] == [
- "tmux",
- "send-keys",
- "-t",
- "test-session:window-0",
- "-l",
- "\x1b[B",
- ]
- assert calls[1].args[0] == ["tmux", "send-keys", "-t", "test-session:window-0", "Enter"]
+ assert calls[0].args == ("test-session", "window-0", "\x1b[B")
+ assert calls[0].kwargs == {"literal": True}
+ assert calls[1].args == ("test-session", "window-0", "Enter")
+ assert calls[1].kwargs == {}
- @patch("cli_agent_orchestrator.providers.claude_code.subprocess")
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_handle_bypass_then_trust_prompt(self, mock_tmux, mock_subprocess):
+ def test_handle_bypass_then_trust_prompt(self, monkeypatch):
"""Test that bypass prompt is handled, then trust prompt follows."""
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
# Poll 1: bypass prompt; Poll 2: trust prompt (after bypass dismissed)
- mock_tmux.get_history.side_effect = [
+ mock_mux.get_history.side_effect = [
"WARNING: Bypass Permissions mode\n❯ 1. No, exit\n 2. Yes, I accept\n",
"❯ 1. Yes, I trust this folder\n 2. No",
]
- mock_session = MagicMock()
- mock_window = MagicMock()
- mock_pane = MagicMock()
- mock_tmux.server.sessions.get.return_value = mock_session
- mock_session.windows.get.return_value = mock_window
- mock_window.active_pane = mock_pane
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
provider._handle_startup_prompts(timeout=5.0)
- # Bypass: 2 subprocess calls (Down + Enter), then trust: 1 pane.send_keys call
- sub_calls = mock_subprocess.run.call_args_list
- assert len(sub_calls) == 2
- assert sub_calls[0].args[0] == [
- "tmux",
- "send-keys",
- "-t",
- "test-session:window-0",
- "-l",
- "\x1b[B",
- ]
- pane_calls = mock_pane.send_keys.call_args_list
- assert len(pane_calls) == 1
- assert pane_calls[0].args == ("",)
- assert pane_calls[0].kwargs == {"enter": True}
+ calls = mock_mux.send_special_key.call_args_list
+ assert len(calls) == 3
+ assert calls[0].args == ("test-session", "window-0", "\x1b[B")
+ assert calls[0].kwargs == {"literal": True}
+ assert calls[1].args == ("test-session", "window-0", "Enter")
+ assert calls[1].kwargs == {}
+ assert calls[2].args == ("test-session", "window-0", "Enter")
+ assert calls[2].kwargs == {}
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_trust_prompt_not_waiting_user_answer(self, mock_tmux):
+ def test_get_status_trust_prompt_not_waiting_user_answer(self, monkeypatch):
"""Test that trust prompt is NOT detected as WAITING_USER_ANSWER."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"❯ 1. Yes, I trust this folder\n"
" 2. No, don't trust this folder\n"
"Enter to select · ↑/↓ to navigate · Esc to cancel"
@@ -827,10 +955,14 @@ def test_get_status_trust_prompt_not_waiting_user_answer(self, mock_tmux):
assert status != TerminalStatus.WAITING_USER_ANSWER
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
- def test_get_status_bypass_prompt_not_waiting_user_answer(self, mock_tmux):
+ def test_get_status_bypass_prompt_not_waiting_user_answer(self, monkeypatch):
"""Test that bypass prompt is NOT detected as WAITING_USER_ANSWER."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"WARNING: Bypass Permissions mode\n"
"❯ 1. No, exit\n"
" 2. Yes, I accept\n"
@@ -845,28 +977,26 @@ def test_get_status_bypass_prompt_not_waiting_user_answer(self, mock_tmux):
@_PATCH_SETTINGS
@patch("cli_agent_orchestrator.providers.claude_code.wait_for_shell")
@patch("cli_agent_orchestrator.providers.claude_code.wait_until_status")
- @patch("cli_agent_orchestrator.providers.claude_code.tmux_client")
def test_initialize_calls_handle_startup_prompts(
- self, mock_tmux, mock_wait_status, mock_wait_shell, _
+ self, mock_wait_status, mock_wait_shell, _, monkeypatch
):
"""Test that initialize calls _handle_startup_prompts."""
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.claude_code.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
mock_wait_shell.return_value = True
mock_wait_status.return_value = True
trust_output = "❯ 1. Yes, I trust this folder\n 2. No"
- mock_tmux.get_history.side_effect = ["", trust_output, trust_output]
- mock_session = MagicMock()
- mock_window = MagicMock()
- mock_pane = MagicMock()
- mock_tmux.server.sessions.get.return_value = mock_session
- mock_session.windows.get.return_value = mock_window
- mock_window.active_pane = mock_pane
+ mock_mux.get_history.side_effect = ["", trust_output, trust_output]
provider = ClaudeCodeProvider("test123", "test-session", "window-0")
with patch.object(provider, "get_status", return_value=TerminalStatus.IDLE):
result = provider.initialize()
assert result is True
- mock_pane.send_keys.assert_called_with("", enter=True)
+ mock_mux.send_special_key.assert_called_with("test-session", "window-0", "Enter")
class TestClaudeCodeProviderSettings:
diff --git a/test/providers/test_codex_provider_unit.py b/test/providers/test_codex_provider_unit.py
index aa61746f4..200e862d6 100644
--- a/test/providers/test_codex_provider_unit.py
+++ b/test/providers/test_codex_provider_unit.py
@@ -1,10 +1,15 @@
"""Unit tests for Codex provider."""
+import sys
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
+from cli_agent_orchestrator.multiplexers.base import LaunchSpec
+from cli_agent_orchestrator.multiplexers.launch import build_launch_spec
+from cli_agent_orchestrator.multiplexers.tmux import TmuxMultiplexer
+from cli_agent_orchestrator.multiplexers.wezterm import WezTermMultiplexer
from cli_agent_orchestrator.models.terminal import TerminalStatus
from cli_agent_orchestrator.providers.codex import CodexProvider, ProviderError
@@ -16,14 +21,72 @@ def load_fixture(filename: str) -> str:
return f.read()
+def _make_get_multiplexer_patch(mock_mux):
+ """Return a callable that get_multiplexer() calls resolve to mock_mux."""
+ return lambda: mock_mux
+
+
+class TestBuildLaunchSpec:
+ @patch("cli_agent_orchestrator.multiplexers.launch.shutil.which")
+ def test_codex_windows_resolves_cmd_shim(self, mock_which):
+ mock_which.return_value = r"C:\Users\marc\scoop\apps\nodejs-lts\current\bin\codex.cmd"
+
+ spec = build_launch_spec("codex", ["codex"], platform="windows")
+
+ assert isinstance(spec, LaunchSpec)
+ assert spec.argv is not None
+ assert spec.argv[0] == r"C:\Users\marc\scoop\apps\nodejs-lts\current\bin\codex.cmd"
+
+ @patch("cli_agent_orchestrator.multiplexers.launch.os.path.exists", return_value=False)
+ @patch("cli_agent_orchestrator.multiplexers.launch.shutil.which", return_value=None)
+ def test_codex_windows_falls_back_to_bare_name(self, mock_which, mock_exists):
+ spec = build_launch_spec("codex", ["codex"], platform="windows")
+
+ assert spec.argv == ("codex",)
+ mock_which.assert_called_once_with("codex.cmd")
+ assert mock_exists.called
+
+ def test_codex_unix_keeps_bare_name(self):
+ spec = build_launch_spec("codex", ["codex"], platform="unix")
+
+ assert spec.argv == ("codex",)
+
+ def test_non_codex_windows_keeps_bare_name(self):
+ spec = build_launch_spec("claude", ["claude"], platform="windows")
+
+ assert spec.argv == ("claude",)
+
+
class TestCodexProviderInitialization:
+ def test_get_launch_spec_returns_direct_spawn_for_wezterm(self):
+ fake_wez = MagicMock(spec=WezTermMultiplexer)
+
+ provider = CodexProvider("test1234", "test-session", "window-0", None)
+
+ spec = provider.get_launch_spec(fake_wez)
+
+ assert isinstance(spec, LaunchSpec)
+ assert spec.provider == "codex"
+ assert spec.argv is not None
+
+ def test_get_launch_spec_returns_none_for_tmux(self):
+ fake_tmux = MagicMock(spec=TmuxMultiplexer)
+
+ provider = CodexProvider("test1234", "test-session", "window-0", None)
+
+ assert provider.get_launch_spec(fake_tmux) is None
+
@patch("cli_agent_orchestrator.providers.codex.wait_until_status")
@patch("cli_agent_orchestrator.providers.codex.wait_for_shell")
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_initialize_success(self, mock_tmux, mock_wait_shell, mock_wait_status):
+ def test_initialize_success(self, mock_wait_shell, mock_wait_status, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
mock_wait_shell.return_value = True
mock_wait_status.return_value = True
- mock_tmux.get_history.return_value = "OpenAI Codex (v0.98.0)"
+ mock_mux.get_history.return_value = "OpenAI Codex (v0.98.0)"
provider = CodexProvider("test1234", "test-session", "window-0", None)
result = provider.initialize()
@@ -31,18 +94,22 @@ def test_initialize_success(self, mock_tmux, mock_wait_shell, mock_wait_status):
assert result is True
mock_wait_shell.assert_called_once()
# Two send_keys calls: warm-up echo + codex with tmux-compatible flags
- assert mock_tmux.send_keys.call_count == 2
- mock_tmux.send_keys.assert_any_call("test-session", "window-0", "echo ready")
- mock_tmux.send_keys.assert_any_call(
+ assert mock_mux.send_keys.call_count == 2
+ mock_mux.send_keys.assert_any_call("test-session", "window-0", "echo ready")
+ mock_mux.send_keys.assert_any_call(
"test-session",
"window-0",
- "codex --yolo --no-alt-screen --disable shell_snapshot",
+ provider._build_codex_command(),
)
mock_wait_status.assert_called_once()
@patch("cli_agent_orchestrator.providers.codex.wait_for_shell")
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_initialize_shell_timeout(self, mock_tmux, mock_wait_shell):
+ def test_initialize_shell_timeout(self, mock_wait_shell, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
mock_wait_shell.return_value = False
provider = CodexProvider("test1234", "test-session", "window-0", None)
@@ -52,11 +119,67 @@ def test_initialize_shell_timeout(self, mock_tmux, mock_wait_shell):
@patch("cli_agent_orchestrator.providers.codex.wait_until_status")
@patch("cli_agent_orchestrator.providers.codex.wait_for_shell")
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_initialize_codex_timeout(self, mock_tmux, mock_wait_shell, mock_wait_status):
+ def test_initialize_skips_warmup_echo_for_direct_spawned_wezterm(
+ self, mock_wait_shell, mock_wait_status, monkeypatch
+ ):
+ mock_wait_status.return_value = True
+ # get_multiplexer() returns a WezTermMultiplexer instance → skips warm-up
+ fake_wez = MagicMock(spec=WezTermMultiplexer)
+ # _handle_trust_prompt calls get_multiplexer().get_history() → must return str
+ fake_wez.get_history.return_value = "OpenAI Codex (v0.98.0)"
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ lambda: fake_wez,
+ )
+
+ provider = CodexProvider(
+ "test1234",
+ "test-session",
+ "window-0",
+ None,
+ launch_spec=LaunchSpec(argv=("codex.cmd",), provider="codex"),
+ )
+ result = provider.initialize()
+
+ assert result is True
+ mock_wait_shell.assert_not_called()
+ fake_wez.send_keys.assert_not_called()
+ mock_wait_status.assert_called_once()
+
+ @patch("cli_agent_orchestrator.providers.codex.wait_until_status")
+ @patch("cli_agent_orchestrator.providers.codex.wait_for_shell")
+ def test_initialize_runs_warmup_echo_for_tmux(
+ self, mock_wait_shell, mock_wait_status, monkeypatch
+ ):
+ mock_wait_shell.return_value = True
+ mock_wait_status.return_value = True
+ # get_multiplexer() returns a TmuxMultiplexer instance → runs warm-up
+ fake_tmux = MagicMock(spec=TmuxMultiplexer)
+ # _handle_trust_prompt calls get_multiplexer().get_history() → must return str
+ fake_tmux.get_history.return_value = "OpenAI Codex (v0.98.0)"
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ lambda: fake_tmux,
+ )
+
+ provider = CodexProvider("test1234", "test-session", "window-0", None)
+ result = provider.initialize()
+
+ assert result is True
+ mock_wait_shell.assert_called_once()
+ fake_tmux.send_keys.assert_any_call("test-session", "window-0", "echo ready")
+
+ @patch("cli_agent_orchestrator.providers.codex.wait_until_status")
+ @patch("cli_agent_orchestrator.providers.codex.wait_for_shell")
+ def test_initialize_codex_timeout(self, mock_wait_shell, mock_wait_status, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
mock_wait_shell.return_value = True
mock_wait_status.return_value = False
- mock_tmux.get_history.return_value = "OpenAI Codex (v0.98.0)"
+ mock_mux.get_history.return_value = "OpenAI Codex (v0.98.0)"
provider = CodexProvider("test1234", "test-session", "window-0", None)
@@ -68,7 +191,26 @@ class TestCodexBuildCommand:
def test_build_command_no_profile(self):
provider = CodexProvider("test1234", "test-session", "window-0", None)
command = provider._build_codex_command()
- assert command == "codex --yolo --no-alt-screen --disable shell_snapshot"
+ expected = "codex --yolo --no-alt-screen --disable shell_snapshot"
+ if sys.platform == "win32":
+ expected = "codex -c 'hooks=[]' --yolo --no-alt-screen --disable shell_snapshot"
+ assert command == expected
+
+ @patch("cli_agent_orchestrator.multiplexers.launch.sys.platform", "win32")
+ def test_build_command_adds_hooks_override_on_windows(self):
+ provider = CodexProvider("test1234", "test-session", "window-0", None)
+
+ command = provider._build_codex_command()
+
+ assert " -c 'hooks=[]' " in f" {command} "
+
+ @patch("cli_agent_orchestrator.multiplexers.launch.sys.platform", "linux")
+ def test_build_command_omits_hooks_override_on_unix(self):
+ provider = CodexProvider("test1234", "test-session", "window-0", None)
+
+ command = provider._build_codex_command()
+
+ assert "hooks=[]" not in command
@patch("cli_agent_orchestrator.providers.codex.load_agent_profile")
def test_build_command_with_skill_prompt(self, mock_load_profile):
@@ -104,7 +246,8 @@ def test_build_command_with_agent_profile(self, mock_load_profile):
command = provider._build_codex_command()
mock_load_profile.assert_called_once_with("code_supervisor")
- assert "codex --yolo --no-alt-screen --disable shell_snapshot" in command
+ assert "codex" in command
+ assert "--yolo --no-alt-screen --disable shell_snapshot" in command
assert "-c" in command
assert "developer_instructions=" in command
assert "You are a code supervisor agent." in command
@@ -222,7 +365,10 @@ def test_build_command_empty_system_prompt(self, mock_load_profile):
provider = CodexProvider("test1234", "test-session", "window-0", "empty_agent")
command = provider._build_codex_command()
- assert command == "codex --yolo --no-alt-screen --disable shell_snapshot"
+ expected = "codex --yolo --no-alt-screen --disable shell_snapshot"
+ if sys.platform == "win32":
+ expected = "codex -c 'hooks=[]' --yolo --no-alt-screen --disable shell_snapshot"
+ assert command == expected
assert "developer_instructions" not in command
@patch("cli_agent_orchestrator.providers.codex.load_agent_profile")
@@ -236,7 +382,10 @@ def test_build_command_none_system_prompt(self, mock_load_profile):
provider = CodexProvider("test1234", "test-session", "window-0", "none_agent")
command = provider._build_codex_command()
- assert command == "codex --yolo --no-alt-screen --disable shell_snapshot"
+ expected = "codex --yolo --no-alt-screen --disable shell_snapshot"
+ if sys.platform == "win32":
+ expected = "codex -c 'hooks=[]' --yolo --no-alt-screen --disable shell_snapshot"
+ assert command == expected
@patch("cli_agent_orchestrator.providers.codex.load_agent_profile")
def test_build_command_profile_load_failure(self, mock_load_profile):
@@ -250,13 +399,17 @@ def test_build_command_profile_load_failure(self, mock_load_profile):
@patch("cli_agent_orchestrator.providers.codex.wait_until_status")
@patch("cli_agent_orchestrator.providers.codex.wait_for_shell")
@patch("cli_agent_orchestrator.providers.codex.load_agent_profile")
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
def test_initialize_with_agent_profile(
- self, mock_tmux, mock_load_profile, mock_wait_shell, mock_wait_status
+ self, mock_load_profile, mock_wait_shell, mock_wait_status, monkeypatch
):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
mock_wait_shell.return_value = True
mock_wait_status.return_value = True
- mock_tmux.get_history.return_value = "OpenAI Codex (v0.98.0)"
+ mock_mux.get_history.return_value = "OpenAI Codex (v0.98.0)"
mock_profile = MagicMock()
mock_profile.model = None
mock_profile.system_prompt = "You are a supervisor."
@@ -268,7 +421,7 @@ def test_initialize_with_agent_profile(
assert result is True
# The second send_keys call should contain developer_instructions
- codex_call = mock_tmux.send_keys.call_args_list[1]
+ codex_call = mock_mux.send_keys.call_args_list[1]
assert "developer_instructions=" in codex_call.args[2]
assert "You are a supervisor." in codex_call.args[2]
@@ -304,76 +457,105 @@ def test_build_command_omits_model_when_unset(self, mock_load):
class TestCodexProviderStatusDetection:
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_idle(self, mock_tmux):
- mock_tmux.get_history.return_value = load_fixture("codex_idle_output.txt")
+ def test_get_status_idle(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = load_fixture("codex_idle_output.txt")
provider = CodexProvider("test1234", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.IDLE
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_completed(self, mock_tmux):
- mock_tmux.get_history.return_value = load_fixture("codex_completed_output.txt")
+ def test_get_status_completed(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = load_fixture("codex_completed_output.txt")
provider = CodexProvider("test1234", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_processing(self, mock_tmux):
- mock_tmux.get_history.return_value = load_fixture("codex_processing_output.txt")
+ def test_get_status_processing(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = load_fixture("codex_processing_output.txt")
provider = CodexProvider("test1234", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.PROCESSING
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_waiting_user_answer(self, mock_tmux):
- mock_tmux.get_history.return_value = load_fixture("codex_permission_output.txt")
+ def test_get_status_waiting_user_answer(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = load_fixture("codex_permission_output.txt")
provider = CodexProvider("test1234", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.WAITING_USER_ANSWER
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_error(self, mock_tmux):
- mock_tmux.get_history.return_value = load_fixture("codex_error_output.txt")
+ def test_get_status_error(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = load_fixture("codex_error_output.txt")
provider = CodexProvider("test1234", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.ERROR
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_empty_output(self, mock_tmux):
- mock_tmux.get_history.return_value = ""
+ def test_get_status_empty_output(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = ""
provider = CodexProvider("test1234", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.ERROR
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_with_tail_lines(self, mock_tmux):
- mock_tmux.get_history.return_value = load_fixture("codex_idle_output.txt")
+ def test_get_status_with_tail_lines(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = load_fixture("codex_idle_output.txt")
provider = CodexProvider("test1234", "test-session", "window-0")
status = provider.get_status(tail_lines=50)
assert status == TerminalStatus.IDLE
- mock_tmux.get_history.assert_called_once_with("test-session", "window-0", tail_lines=50)
-
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_processing_when_old_prompt_present(self, mock_tmux):
- # If the captured history contains an earlier prompt but the *latest* output is processing,
- # we should report PROCESSING. The old prompt should be far enough from the bottom
- # (more than IDLE_PROMPT_TAIL_LINES) to avoid false idle detection.
- mock_tmux.get_history.return_value = (
+ mock_mux.get_history.assert_called_once_with("test-session", "window-0", tail_lines=50)
+
+ def test_get_status_processing_when_old_prompt_present(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"Welcome to Codex\n"
"❯ \n"
"You Fix the failing tests\n"
@@ -389,11 +571,13 @@ def test_get_status_processing_when_old_prompt_present(self, mock_tmux):
assert status == TerminalStatus.PROCESSING
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_not_error_on_failed_in_message(self, mock_tmux):
- # "failed" is commonly used in normal assistant output; it should not automatically
- # force ERROR.
- mock_tmux.get_history.return_value = (
+ def test_get_status_not_error_on_failed_in_message(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"You Explain why the test failed\n"
"assistant: The test failed because the assertion is incorrect.\n"
"\n"
@@ -405,31 +589,39 @@ def test_get_status_not_error_on_failed_in_message(self, mock_tmux):
assert status == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_idle_if_no_assistant_after_last_user(self, mock_tmux):
- # If there is a user message but no assistant response after it, we should not
- # treat the session as COMPLETED.
- mock_tmux.get_history.return_value = "assistant: Welcome\n" "You Do the thing\n" "\n" "❯ \n"
+ def test_get_status_idle_if_no_assistant_after_last_user(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "assistant: Welcome\n" "You Do the thing\n" "\n" "❯ \n"
provider = CodexProvider("test1234", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.IDLE
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_processing_when_no_prompt_and_no_keywords(self, mock_tmux):
- # Codex output may not always include explicit "thinking/processing" keywords.
- # Without an idle prompt at the end, we should assume it's still processing.
- mock_tmux.get_history.return_value = "You Run the command\nWorking...\n"
+ def test_get_status_processing_when_no_prompt_and_no_keywords(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "You Run the command\nWorking...\n"
provider = CodexProvider("test1234", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.PROCESSING
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_not_error_when_assistant_mentions_error_text(self, mock_tmux):
- mock_tmux.get_history.return_value = (
+ def test_get_status_not_error_when_assistant_mentions_error_text(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"You Explain the failure\n"
"assistant: Here's an example error:\n"
"Error: example only\n"
@@ -442,9 +634,13 @@ def test_get_status_not_error_when_assistant_mentions_error_text(self, mock_tmux
assert status == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_not_waiting_when_assistant_mentions_approval_text(self, mock_tmux):
- mock_tmux.get_history.return_value = (
+ def test_get_status_not_waiting_when_assistant_mentions_approval_text(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"You Explain approvals\n"
"assistant: You might see this prompt:\n"
"Approve this command? [y/n]\n"
@@ -457,37 +653,53 @@ def test_get_status_not_waiting_when_assistant_mentions_approval_text(self, mock
assert status == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_error_when_error_after_user_and_prompt(self, mock_tmux):
- mock_tmux.get_history.return_value = "You Run thing\nError: failed\n\n❯ \n"
+ def test_get_status_error_when_error_after_user_and_prompt(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "You Run thing\nError: failed\n\n❯ \n"
provider = CodexProvider("test1234", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.ERROR
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_waiting_user_answer_when_no_user_prefix(self, mock_tmux):
- mock_tmux.get_history.return_value = "Approve this command? [y/n]\n"
+ def test_get_status_waiting_user_answer_when_no_user_prefix(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "Approve this command? [y/n]\n"
provider = CodexProvider("test1234", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.WAITING_USER_ANSWER
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_error_when_no_user_prefix(self, mock_tmux):
- mock_tmux.get_history.return_value = "Error: something failed\n"
+ def test_get_status_error_when_no_user_prefix(self, monkeypatch):
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "Error: something failed\n"
provider = CodexProvider("test1234", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.ERROR
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_idle_tui_with_status_bar(self, mock_tmux):
+ def test_get_status_idle_tui_with_status_bar(self, monkeypatch):
"""Test IDLE detection with realistic TUI output (status bar after prompt)."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"╭───────────────────────────────────────────╮\n"
"│ >_ OpenAI Codex (v0.98.0) │\n"
"│ model: gpt-5.3-codex high │\n"
@@ -503,10 +715,14 @@ def test_get_status_idle_tui_with_status_bar(self, mock_tmux):
assert status == TerminalStatus.IDLE
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_completed_tui_with_status_bar(self, mock_tmux):
+ def test_get_status_completed_tui_with_status_bar(self, monkeypatch):
"""Test COMPLETED detection with TUI output (status bar after prompt)."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"You Fix the bug\n"
"assistant: I've fixed the issue in main.py.\n"
"\n"
@@ -523,10 +739,14 @@ def test_get_status_completed_tui_with_status_bar(self, mock_tmux):
class TestCodexBulletFormatStatusDetection:
"""Tests for Codex's real interactive output format using › prompt and • bullets."""
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_completed_bullet_format(self, mock_tmux):
+ def test_get_status_completed_bullet_format(self, monkeypatch):
"""COMPLETED when › user message followed by • response and idle prompt."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"› what is your role?\n"
"• I am the Coding Supervisor Agent.\n"
"• I coordinate tasks between developer and reviewer agents.\n"
@@ -539,10 +759,14 @@ def test_get_status_completed_bullet_format(self, mock_tmux):
assert status == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_processing_bullet_format(self, mock_tmux):
+ def test_get_status_processing_bullet_format(self, monkeypatch):
"""PROCESSING when • response started but no idle prompt at bottom."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"› fix the failing tests\n"
"• Let me look at the test files.\n"
"Reading src/test_main.py...\n"
@@ -557,20 +781,28 @@ def test_get_status_processing_bullet_format(self, mock_tmux):
assert status == TerminalStatus.PROCESSING
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_idle_bullet_format_no_response(self, mock_tmux):
+ def test_get_status_idle_bullet_format_no_response(self, monkeypatch):
"""IDLE when › user message but no • response yet and idle prompt at bottom."""
- mock_tmux.get_history.return_value = "› hello\n\n› \n"
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "› hello\n\n› \n"
provider = CodexProvider("test1234", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.IDLE
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_completed_bullet_with_code_block(self, mock_tmux):
+ def test_get_status_completed_bullet_with_code_block(self, monkeypatch):
"""COMPLETED with • response containing code blocks."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"› show me a function\n"
"• Here's the function:\n"
"\n"
@@ -589,20 +821,28 @@ def test_get_status_completed_bullet_with_code_block(self, mock_tmux):
assert status == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_error_not_masked_by_bullet_pattern(self, mock_tmux):
+ def test_get_status_error_not_masked_by_bullet_pattern(self, monkeypatch):
"""ERROR still detected when no • response and error after › user message."""
- mock_tmux.get_history.return_value = "› do something\nError: connection refused\n"
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "› do something\nError: connection refused\n"
provider = CodexProvider("test1234", "test-session", "window-0")
status = provider.get_status()
assert status == TerminalStatus.ERROR
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_completed_multi_turn_bullet(self, mock_tmux):
+ def test_get_status_completed_multi_turn_bullet(self, monkeypatch):
"""COMPLETED uses last user message in multi-turn bullet format."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"› first question\n"
"• First answer.\n"
"\n"
@@ -617,10 +857,14 @@ def test_get_status_completed_multi_turn_bullet(self, mock_tmux):
assert status == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_completed_bullet_with_tui_status_bar(self, mock_tmux):
+ def test_get_status_completed_bullet_with_tui_status_bar(self, monkeypatch):
"""COMPLETED with bullet format and TUI status bar after prompt."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"› fix the bug\n"
"• I've fixed the issue in main.py by correcting the import.\n"
"\n"
@@ -633,10 +877,14 @@ def test_get_status_completed_bullet_with_tui_status_bar(self, mock_tmux):
assert status == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_processing_tui_spinner(self, mock_tmux):
+ def test_get_status_processing_tui_spinner(self, monkeypatch):
"""PROCESSING when TUI shows • Working spinner, not false COMPLETED."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"› [CAO Handoff] Supervisor terminal ID: sup-123. Do the task.\n"
"\n"
"• Working (0s • esc to interrupt)\n"
@@ -651,10 +899,14 @@ def test_get_status_processing_tui_spinner(self, mock_tmux):
assert status == TerminalStatus.PROCESSING
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_processing_tui_thinking_spinner(self, mock_tmux):
+ def test_get_status_processing_tui_thinking_spinner(self, monkeypatch):
"""PROCESSING when TUI shows • Thinking spinner."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"› Implement feature X\n"
"\n"
"• Thinking (3s • esc to interrupt)\n"
@@ -669,10 +921,14 @@ def test_get_status_processing_tui_thinking_spinner(self, mock_tmux):
assert status == TerminalStatus.PROCESSING
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_processing_dynamic_spinner_text(self, mock_tmux):
+ def test_get_status_processing_dynamic_spinner_text(self, monkeypatch):
"""PROCESSING when TUI shows spinner with dynamic prefix text."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"› [CAO Handoff] Do the task.\n"
"\n"
"• Creating /tmp/file.py\n"
@@ -691,18 +947,16 @@ def test_get_status_processing_dynamic_spinner_text(self, mock_tmux):
class TestCodexV0111FooterFormat:
- """Tests for Codex v0.111.0+ TUI footer format.
+ """Tests for Codex v0.111.0+ TUI footer format."""
- v0.111.0 (PR #13202 'tui: restore draft footer hints') changed the footer:
- - Old: "› Use /skills to list available skills\\n ? for shortcuts 100% context left"
- - New: "› Find and fix a bug in @filename\\n gpt-5.3-codex high · 100% left · ~/path"
- The new format uses "N% left" instead of "N% context left" and removes "? for shortcuts".
- """
-
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_idle_v0111_footer(self, mock_tmux):
+ def test_get_status_idle_v0111_footer(self, monkeypatch):
"""IDLE with v0.111.0 footer format (no '? for shortcuts')."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"╭───────────────────────────────────────────╮\n"
"│ >_ OpenAI Codex (v0.111.0) │\n"
"│ model: gpt-5.3-codex high │\n"
@@ -720,10 +974,14 @@ def test_get_status_idle_v0111_footer(self, mock_tmux):
assert status == TerminalStatus.IDLE
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_completed_v0111_footer(self, mock_tmux):
+ def test_get_status_completed_v0111_footer(self, monkeypatch):
"""COMPLETED with v0.111.0 footer (suggestion hint must not be treated as user input)."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"› fix the bug\n"
"• I've fixed the issue in main.py by correcting the import.\n"
"\n"
@@ -737,10 +995,14 @@ def test_get_status_completed_v0111_footer(self, mock_tmux):
assert status == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_completed_v0111_multi_turn(self, mock_tmux):
+ def test_get_status_completed_v0111_multi_turn(self, monkeypatch):
"""COMPLETED in multi-turn with v0.111.0 footer."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"› first question\n"
"• First answer.\n"
"\n"
@@ -757,10 +1019,14 @@ def test_get_status_completed_v0111_multi_turn(self, mock_tmux):
assert status == TerminalStatus.COMPLETED
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_processing_v0111_spinner(self, mock_tmux):
+ def test_get_status_processing_v0111_spinner(self, monkeypatch):
"""PROCESSING when TUI shows spinner with v0.111.0 footer."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"› [CAO Handoff] Do the task.\n"
"\n"
"• Working (0s • esc to interrupt)\n"
@@ -988,10 +1254,14 @@ def test_extract_last_message_without_trailing_prompt(self):
class TestCodexProviderTrustPrompt:
"""Tests for Codex workspace trust prompt handling."""
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_handle_trust_prompt_detected_and_accepted(self, mock_tmux):
+ def test_handle_trust_prompt_detected_and_accepted(self, monkeypatch):
"""Test that trust prompt is detected and auto-accepted."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"> You are running Codex in /Users/test/project\n"
"\n"
" Since this folder is version controlled, you may wish to "
@@ -1000,32 +1270,34 @@ def test_handle_trust_prompt_detected_and_accepted(self, mock_tmux):
"› 1. Yes, allow Codex to work in this folder without asking for approval\n"
" 2. No, ask me to approve edits and commands\n"
)
- mock_session = MagicMock()
- mock_window = MagicMock()
- mock_pane = MagicMock()
- mock_tmux.server.sessions.get.return_value = mock_session
- mock_session.windows.get.return_value = mock_window
- mock_window.active_pane = mock_pane
provider = CodexProvider("test1234", "test-session", "window-0")
provider._handle_trust_prompt(timeout=2.0)
- mock_pane.send_keys.assert_called_once_with("", enter=True)
+ mock_mux.send_special_key.assert_called_once_with("test-session", "window-0", "Enter")
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_handle_trust_prompt_not_needed(self, mock_tmux):
+ def test_handle_trust_prompt_not_needed(self, monkeypatch):
"""Test early return when Codex starts without trust prompt."""
- mock_tmux.get_history.return_value = "OpenAI Codex (v0.98.0)\n› "
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = "OpenAI Codex (v0.98.0)\n› "
provider = CodexProvider("test1234", "test-session", "window-0")
provider._handle_trust_prompt(timeout=2.0)
- mock_tmux.server.sessions.get.assert_not_called()
+ mock_mux.send_special_key.assert_not_called()
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_get_status_trust_prompt_is_waiting_user_answer(self, mock_tmux):
+ def test_get_status_trust_prompt_is_waiting_user_answer(self, monkeypatch):
"""Test that trust prompt reports WAITING_USER_ANSWER, not PROCESSING."""
- mock_tmux.get_history.return_value = (
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
+ mock_mux.get_history.return_value = (
"> You are running Codex in /Users/test/project\n"
"allow Codex to work in this folder without asking for approval.\n"
"› 1. Yes\n"
@@ -1039,23 +1311,21 @@ def test_get_status_trust_prompt_is_waiting_user_answer(self, mock_tmux):
@patch("cli_agent_orchestrator.providers.codex.wait_until_status")
@patch("cli_agent_orchestrator.providers.codex.wait_for_shell")
- @patch("cli_agent_orchestrator.providers.codex.tmux_client")
- def test_initialize_with_trust_prompt(self, mock_tmux, mock_wait_shell, mock_wait_status):
+ def test_initialize_with_trust_prompt(self, mock_wait_shell, mock_wait_status, monkeypatch):
"""Test that initialize handles trust prompt during startup."""
+ mock_mux = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.providers.codex.get_multiplexer",
+ _make_get_multiplexer_patch(mock_mux),
+ )
mock_wait_shell.return_value = True
mock_wait_status.return_value = True
- mock_tmux.get_history.return_value = (
+ mock_mux.get_history.return_value = (
"allow Codex to work in this folder without asking for approval.\n"
)
- mock_session = MagicMock()
- mock_window = MagicMock()
- mock_pane = MagicMock()
- mock_tmux.server.sessions.get.return_value = mock_session
- mock_session.windows.get.return_value = mock_window
- mock_window.active_pane = mock_pane
provider = CodexProvider("test1234", "test-session", "window-0")
result = provider.initialize()
assert result is True
- mock_pane.send_keys.assert_called_with("", enter=True)
+ mock_mux.send_special_key.assert_called_with("test-session", "window-0", "Enter")
diff --git a/test/services/test_inbox_service.py b/test/services/test_inbox_service.py
index 7944eec16..8d62d1ef4 100644
--- a/test/services/test_inbox_service.py
+++ b/test/services/test_inbox_service.py
@@ -18,28 +18,91 @@
class TestGetLogTail:
"""Tests for _get_log_tail function."""
- @patch("cli_agent_orchestrator.services.inbox_service.subprocess.run")
- @patch("cli_agent_orchestrator.services.inbox_service.TERMINAL_LOG_DIR")
- def test_get_log_tail_success(self, mock_log_dir, mock_run):
- """Test getting log tail successfully."""
- mock_log_dir.__truediv__ = lambda self, x: Path("/tmp") / x
- mock_run.return_value = MagicMock(stdout="last line\n")
+ @staticmethod
+ def _set_log_dir(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+ monkeypatch.setattr("cli_agent_orchestrator.services.inbox_service.TERMINAL_LOG_DIR", tmp_path)
- result = _get_log_tail("test-terminal", lines=5)
+ @staticmethod
+ def _write_log(tmp_path: Path, terminal_id: str, content: bytes) -> None:
+ (tmp_path / f"{terminal_id}.log").write_bytes(content)
- assert result == "last line\n"
- mock_run.assert_called_once()
+ def test_get_log_tail_empty_file(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
+ """Test empty log files return an empty string."""
+ self._set_log_dir(monkeypatch, tmp_path)
+ self._write_log(tmp_path, "test-terminal", b"")
- @patch("cli_agent_orchestrator.services.inbox_service.subprocess.run")
- @patch("cli_agent_orchestrator.services.inbox_service.TERMINAL_LOG_DIR")
- def test_get_log_tail_exception(self, mock_log_dir, mock_run):
- """Test getting log tail with exception."""
- mock_log_dir.__truediv__ = lambda self, x: Path("/tmp") / x
- mock_run.side_effect = Exception("Subprocess error")
+ assert _get_log_tail("test-terminal") == ""
- result = _get_log_tail("test-terminal")
+ def test_get_log_tail_one_line_without_trailing_newline(
+ self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+ ):
+ """Test single-line logs without trailing newlines."""
+ self._set_log_dir(monkeypatch, tmp_path)
+ self._write_log(tmp_path, "test-terminal", b"last line")
+
+ assert _get_log_tail("test-terminal", lines=5) == "last line"
+
+ def test_get_log_tail_one_line_with_trailing_newline(
+ self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+ ):
+ """Test single-line logs with trailing newlines."""
+ self._set_log_dir(monkeypatch, tmp_path)
+ self._write_log(tmp_path, "test-terminal", b"last line\n")
+
+ assert _get_log_tail("test-terminal", lines=5) == "last line\n"
+
+ def test_get_log_tail_exactly_n_lines(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
+ """Test logs with exactly the requested number of lines."""
+ self._set_log_dir(monkeypatch, tmp_path)
+ content = "".join(f"line {i}\n" for i in range(1, 6)).encode()
+ self._write_log(tmp_path, "test-terminal", content)
+
+ assert _get_log_tail("test-terminal", lines=5) == content.decode()
+
+ def test_get_log_tail_more_than_n_lines(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
+ """Test logs longer than the requested tail count."""
+ self._set_log_dir(monkeypatch, tmp_path)
+ content = "".join(f"line {i}\n" for i in range(1, 9)).encode()
+ self._write_log(tmp_path, "test-terminal", content)
+
+ assert _get_log_tail("test-terminal", lines=3) == "line 6\nline 7\nline 8\n"
+
+ def test_get_log_tail_handles_long_lines_across_block_boundary(
+ self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+ ):
+ """Test tails spanning lines larger than the backward read block size."""
+ self._set_log_dir(monkeypatch, tmp_path)
+ long_line = "a" * 5000
+ content = f"{long_line}\nsecond line\nthird line\n".encode()
+ self._write_log(tmp_path, "test-terminal", content)
+
+ assert _get_log_tail("test-terminal", lines=2) == "second line\nthird line\n"
+
+ def test_get_log_tail_normalizes_windows_line_endings(
+ self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+ ):
+ """Test CRLF log files are normalized like subprocess text output."""
+ self._set_log_dir(monkeypatch, tmp_path)
+ self._write_log(tmp_path, "test-terminal", b"line 1\r\nline 2\r\nline 3\r\n")
+
+ assert _get_log_tail("test-terminal", lines=2) == "line 2\nline 3\n"
+
+ def test_get_log_tail_preserves_utf8_across_block_boundary(
+ self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+ ):
+ """Test multibyte UTF-8 content split across read blocks decodes safely."""
+ self._set_log_dir(monkeypatch, tmp_path)
+ prefix = b"a" * 4095
+ content = prefix + "€\nfinal line\n".encode("utf-8")
+ self._write_log(tmp_path, "test-terminal", content)
+
+ assert _get_log_tail("test-terminal", lines=2) == f"{'a' * 4095}€\nfinal line\n"
+
+ def test_get_log_tail_missing_file(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
+ """Test missing log files return an empty string."""
+ self._set_log_dir(monkeypatch, tmp_path)
- assert result == ""
+ assert _get_log_tail("missing-terminal") == ""
class TestHasIdlePattern:
diff --git a/test/services/test_plugin_event_emission.py b/test/services/test_plugin_event_emission.py
index 76358f29f..cd6fe610b 100644
--- a/test/services/test_plugin_event_emission.py
+++ b/test/services/test_plugin_event_emission.py
@@ -121,14 +121,14 @@ class TestTerminalPluginEvents:
@patch("cli_agent_orchestrator.services.terminal_service.load_agent_profile")
@patch("cli_agent_orchestrator.services.terminal_service.generate_terminal_id")
@patch("cli_agent_orchestrator.services.terminal_service.generate_window_name")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
def test_create_terminal_dispatches_post_create_terminal_event_after_setup(
self,
mock_provider_manager,
mock_db_create_terminal,
- mock_tmux,
+ mock_get_multiplexer,
mock_generate_window_name,
mock_generate_terminal_id,
mock_load_agent_profile,
@@ -138,13 +138,14 @@ def test_create_terminal_dispatches_post_create_terminal_event_after_setup(
"""Terminal creation should emit only after persistence and startup complete."""
registry = _registry_mock()
call_order: list[str] = []
+ mock_multiplexer = mock_get_multiplexer.return_value
async def record_dispatch(*_args):
call_order.append("dispatch")
mock_generate_terminal_id.return_value = "abcd1234"
mock_generate_window_name.return_value = "developer-abcd"
- mock_tmux.session_exists.return_value = False
+ mock_multiplexer.session_exists.return_value = False
mock_db_create_terminal.side_effect = lambda *_: call_order.append("db_create")
mock_load_agent_profile.return_value = AgentProfile(name="developer", description="Dev")
@@ -154,7 +155,7 @@ async def record_dispatch(*_args):
log_path = MagicMock()
mock_log_dir.__truediv__.return_value = log_path
- mock_tmux.pipe_pane.side_effect = lambda *_: call_order.append("pipe_pane")
+ mock_multiplexer.pipe_pane.side_effect = lambda *_: call_order.append("pipe_pane")
registry.dispatch.side_effect = record_dispatch
terminal = create_terminal(
@@ -181,14 +182,14 @@ async def record_dispatch(*_args):
@patch("cli_agent_orchestrator.services.terminal_service.load_agent_profile")
@patch("cli_agent_orchestrator.services.terminal_service.generate_terminal_id")
@patch("cli_agent_orchestrator.services.terminal_service.generate_window_name")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
def test_create_terminal_does_not_dispatch_on_failure(
self,
mock_provider_manager,
mock_db_create_terminal,
- mock_tmux,
+ mock_get_multiplexer,
mock_generate_window_name,
mock_generate_terminal_id,
mock_load_agent_profile,
@@ -197,9 +198,10 @@ def test_create_terminal_does_not_dispatch_on_failure(
):
"""Terminal creation failures must not emit post_create_terminal."""
registry = _registry_mock()
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_generate_terminal_id.return_value = "abcd1234"
mock_generate_window_name.return_value = "developer-abcd"
- mock_tmux.session_exists.return_value = False
+ mock_multiplexer.session_exists.return_value = False
mock_load_agent_profile.return_value = AgentProfile(name="developer", description="Dev")
provider = MagicMock()
@@ -221,14 +223,15 @@ def test_create_terminal_does_not_dispatch_on_failure(
@patch("cli_agent_orchestrator.services.terminal_service.db_delete_terminal", return_value=True)
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
def test_delete_terminal_dispatches_post_kill_terminal_event_after_delete(
- self, mock_get_metadata, mock_tmux, mock_provider_manager, mock_db_delete_terminal
+ self, mock_get_metadata, mock_get_multiplexer, mock_provider_manager, mock_db_delete_terminal
):
"""Terminal kill should emit only after deletion succeeds."""
registry = _registry_mock()
call_order: list[str] = []
+ mock_get_multiplexer.return_value
async def record_dispatch(*_args):
call_order.append("dispatch")
@@ -255,13 +258,14 @@ async def record_dispatch(*_args):
@patch("cli_agent_orchestrator.services.terminal_service.db_delete_terminal")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
def test_delete_terminal_does_not_dispatch_on_failure(
- self, mock_get_metadata, mock_tmux, mock_provider_manager, mock_db_delete_terminal
+ self, mock_get_metadata, mock_get_multiplexer, mock_provider_manager, mock_db_delete_terminal
):
"""Deletion failures must not emit post_kill_terminal."""
registry = _registry_mock()
+ mock_get_multiplexer.return_value
mock_get_metadata.return_value = {
"tmux_session": "cao-demo",
"tmux_window": "developer-abcd",
@@ -280,20 +284,21 @@ class TestMessagePluginEvents:
@pytest.mark.parametrize("orchestration_type", ["send_message", "assign", "handoff"])
@patch("cli_agent_orchestrator.services.terminal_service.update_last_active")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
def test_send_input_dispatches_post_send_message_event_for_each_orchestration_mode(
self,
mock_get_metadata,
mock_provider_manager,
- mock_tmux,
+ mock_get_multiplexer,
mock_update_last_active,
orchestration_type,
):
"""Every successful delivery should emit one post_send_message event."""
registry = _registry_mock()
call_order: list[str] = []
+ mock_multiplexer = mock_get_multiplexer.return_value
async def record_dispatch(*_args):
call_order.append("dispatch")
@@ -306,7 +311,9 @@ async def record_dispatch(*_args):
provider.paste_enter_count = 2
provider.mark_input_received.side_effect = lambda: call_order.append("mark_input_received")
mock_provider_manager.get_provider.return_value = provider
- mock_tmux.send_keys.side_effect = lambda *_args, **_kwargs: call_order.append("send_keys")
+ mock_multiplexer.send_keys.side_effect = lambda *_args, **_kwargs: call_order.append(
+ "send_keys"
+ )
mock_update_last_active.side_effect = lambda *_: call_order.append("update_last_active")
registry.dispatch.side_effect = record_dispatch
@@ -329,14 +336,15 @@ async def record_dispatch(*_args):
assert event.message == "Hello from supervisor"
assert event.orchestration_type == orchestration_type
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
def test_send_input_does_not_dispatch_on_failure(
- self, mock_get_metadata, mock_provider_manager, mock_tmux
+ self, mock_get_metadata, mock_provider_manager, mock_get_multiplexer
):
"""Message delivery failures must not emit post_send_message."""
registry = _registry_mock()
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_get_metadata.return_value = {
"tmux_session": "cao-demo",
"tmux_window": "developer-abcd",
@@ -344,7 +352,7 @@ def test_send_input_does_not_dispatch_on_failure(
provider = MagicMock()
provider.paste_enter_count = 1
mock_provider_manager.get_provider.return_value = provider
- mock_tmux.send_keys.side_effect = RuntimeError("send failed")
+ mock_multiplexer.send_keys.side_effect = RuntimeError("send failed")
with pytest.raises(RuntimeError, match="send failed"):
send_input(
diff --git a/test/services/test_terminal_service.py b/test/services/test_terminal_service.py
index dfe2ac892..7207a6f4f 100644
--- a/test/services/test_terminal_service.py
+++ b/test/services/test_terminal_service.py
@@ -1,21 +1,86 @@
-"""Unit tests for terminal service get_working_directory and send_special_key functions."""
+"""Unit tests for terminal service multiplexer integration."""
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
import pytest
+from cli_agent_orchestrator.multiplexers.base import LaunchSpec
from cli_agent_orchestrator.services.terminal_service import (
+ create_terminal,
get_working_directory,
send_special_key,
)
+@pytest.fixture
+def mock_multiplexer(monkeypatch):
+ """Install a multiplexer mock via the accessor seam."""
+ multiplexer = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.services.terminal_service.get_multiplexer",
+ lambda: multiplexer,
+ )
+ return multiplexer
+
+
+@pytest.fixture
+def create_terminal_dependencies(monkeypatch):
+ """Patch create_terminal collaborators outside the multiplexer seam."""
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.services.terminal_service.generate_terminal_id",
+ lambda: "deadbeef",
+ )
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.services.terminal_service.generate_window_name",
+ lambda agent_profile: f"{agent_profile}-window",
+ )
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.services.terminal_service.generate_session_name",
+ lambda: "generated-session",
+ )
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.services.terminal_service.db_create_terminal",
+ MagicMock(),
+ )
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.services.terminal_service.load_agent_profile",
+ MagicMock(side_effect=FileNotFoundError()),
+ )
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.services.terminal_service.build_skill_catalog",
+ MagicMock(return_value=None),
+ )
+ provider_instance = MagicMock()
+ provider_instance.get_launch_spec.return_value = None
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.services.terminal_service.provider_manager.create_provider",
+ MagicMock(return_value=provider_instance),
+ )
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.services.terminal_service.provider_manager.cleanup_provider",
+ MagicMock(),
+ )
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.services.terminal_service.dispatch_plugin_event",
+ MagicMock(),
+ )
+ log_touch = MagicMock()
+ monkeypatch.setattr(
+ "cli_agent_orchestrator.services.terminal_service.TERMINAL_LOG_DIR",
+ MagicMock(__truediv__=MagicMock(return_value=MagicMock(touch=log_touch))),
+ )
+ return {
+ "provider_instance": provider_instance,
+ "db_create_terminal": create_terminal.__globals__["db_create_terminal"],
+ "create_provider": create_terminal.__globals__["provider_manager"].create_provider,
+ }
+
+
class TestTerminalServiceWorkingDirectory:
"""Test terminal service working directory functionality."""
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
- def test_get_working_directory_success(self, mock_get_metadata, mock_tmux_client):
+ def test_get_working_directory_success(self, mock_get_metadata, mock_multiplexer):
"""Test successful working directory retrieval."""
# Arrange
terminal_id = "test-terminal-123"
@@ -24,7 +89,7 @@ def test_get_working_directory_success(self, mock_get_metadata, mock_tmux_client
"tmux_session": "test-session",
"tmux_window": "test-window",
}
- mock_tmux_client.get_pane_working_directory.return_value = expected_dir
+ mock_multiplexer.get_pane_working_directory.return_value = expected_dir
# Act
result = get_working_directory(terminal_id)
@@ -32,13 +97,12 @@ def test_get_working_directory_success(self, mock_get_metadata, mock_tmux_client
# Assert
assert result == expected_dir
mock_get_metadata.assert_called_once_with(terminal_id)
- mock_tmux_client.get_pane_working_directory.assert_called_once_with(
+ mock_multiplexer.get_pane_working_directory.assert_called_once_with(
"test-session", "test-window"
)
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
- def test_get_working_directory_terminal_not_found(self, mock_get_metadata, mock_tmux_client):
+ def test_get_working_directory_terminal_not_found(self, mock_get_metadata, mock_multiplexer):
"""Test ValueError when terminal not found."""
# Arrange
terminal_id = "nonexistent-terminal"
@@ -49,11 +113,10 @@ def test_get_working_directory_terminal_not_found(self, mock_get_metadata, mock_
get_working_directory(terminal_id)
mock_get_metadata.assert_called_once_with(terminal_id)
- mock_tmux_client.get_pane_working_directory.assert_not_called()
+ mock_multiplexer.get_pane_working_directory.assert_not_called()
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
- def test_get_working_directory_returns_none(self, mock_get_metadata, mock_tmux_client):
+ def test_get_working_directory_returns_none(self, mock_get_metadata, mock_multiplexer):
"""Test when pane has no working directory."""
# Arrange
terminal_id = "test-terminal-456"
@@ -61,7 +124,7 @@ def test_get_working_directory_returns_none(self, mock_get_metadata, mock_tmux_c
"tmux_session": "test-session",
"tmux_window": "test-window",
}
- mock_tmux_client.get_pane_working_directory.return_value = None
+ mock_multiplexer.get_pane_working_directory.return_value = None
# Act
result = get_working_directory(terminal_id)
@@ -69,14 +132,13 @@ def test_get_working_directory_returns_none(self, mock_get_metadata, mock_tmux_c
# Assert
assert result is None
mock_get_metadata.assert_called_once_with(terminal_id)
- mock_tmux_client.get_pane_working_directory.assert_called_once_with(
+ mock_multiplexer.get_pane_working_directory.assert_called_once_with(
"test-session", "test-window"
)
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
def test_get_working_directory_returns_directory_from_tmux_pane(
- self, mock_get_metadata, mock_tmux_client
+ self, mock_get_metadata, mock_multiplexer
):
"""Test that get_working_directory returns the directory obtained from tmux pane."""
# Arrange
@@ -86,21 +148,20 @@ def test_get_working_directory_returns_directory_from_tmux_pane(
"tmux_session": "cao-workspace",
"tmux_window": "developer-xyz",
}
- mock_tmux_client.get_pane_working_directory.return_value = pane_dir
+ mock_multiplexer.get_pane_working_directory.return_value = pane_dir
# Act
result = get_working_directory(terminal_id)
# Assert
assert result == pane_dir
- mock_tmux_client.get_pane_working_directory.assert_called_once_with(
+ mock_multiplexer.get_pane_working_directory.assert_called_once_with(
"cao-workspace", "developer-xyz"
)
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
def test_get_working_directory_raises_for_nonexistent_terminal(
- self, mock_get_metadata, mock_tmux_client
+ self, mock_get_metadata, mock_multiplexer
):
"""Test that get_working_directory raises ValueError for a terminal that does not exist."""
# Arrange
@@ -110,17 +171,16 @@ def test_get_working_directory_raises_for_nonexistent_terminal(
with pytest.raises(ValueError, match="Terminal 'does-not-exist' not found"):
get_working_directory("does-not-exist")
- mock_tmux_client.get_pane_working_directory.assert_not_called()
+ mock_multiplexer.get_pane_working_directory.assert_not_called()
class TestSendSpecialKey:
"""Tests for send_special_key function."""
@patch("cli_agent_orchestrator.services.terminal_service.update_last_active")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
def test_send_special_key_sends_key_via_tmux_client(
- self, mock_get_metadata, mock_tmux_client, mock_update_last_active
+ self, mock_get_metadata, mock_update_last_active, mock_multiplexer
):
"""Test that send_special_key sends the key via tmux client."""
# Arrange
@@ -135,16 +195,15 @@ def test_send_special_key_sends_key_via_tmux_client(
# Assert
assert result is True
- mock_tmux_client.send_special_key.assert_called_once_with(
+ mock_multiplexer.send_special_key.assert_called_once_with(
"cao-session", "developer-abcd", "C-d"
)
mock_update_last_active.assert_called_once_with(terminal_id)
@patch("cli_agent_orchestrator.services.terminal_service.update_last_active")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
def test_send_special_key_ctrl_c(
- self, mock_get_metadata, mock_tmux_client, mock_update_last_active
+ self, mock_get_metadata, mock_update_last_active, mock_multiplexer
):
"""Test that send_special_key can send C-c (Ctrl+C) to a terminal."""
# Arrange
@@ -159,13 +218,12 @@ def test_send_special_key_ctrl_c(
# Assert
assert result is True
- mock_tmux_client.send_special_key.assert_called_once_with(
+ mock_multiplexer.send_special_key.assert_called_once_with(
"cao-session", "reviewer-efgh", "C-c"
)
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
- def test_send_special_key_terminal_not_found(self, mock_get_metadata, mock_tmux_client):
+ def test_send_special_key_terminal_not_found(self, mock_get_metadata, mock_multiplexer):
"""Test that send_special_key raises ValueError when terminal not found."""
# Arrange
mock_get_metadata.return_value = None
@@ -174,11 +232,10 @@ def test_send_special_key_terminal_not_found(self, mock_get_metadata, mock_tmux_
with pytest.raises(ValueError, match="Terminal 'nonexistent' not found"):
send_special_key("nonexistent", "C-d")
- mock_tmux_client.send_special_key.assert_not_called()
+ mock_multiplexer.send_special_key.assert_not_called()
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
- def test_send_special_key_propagates_tmux_errors(self, mock_get_metadata, mock_tmux_client):
+ def test_send_special_key_propagates_tmux_errors(self, mock_get_metadata, mock_multiplexer):
"""Test that send_special_key propagates exceptions from tmux client."""
# Arrange
terminal_id = "test-terminal-003"
@@ -186,17 +243,16 @@ def test_send_special_key_propagates_tmux_errors(self, mock_get_metadata, mock_t
"tmux_session": "cao-session",
"tmux_window": "developer-ijkl",
}
- mock_tmux_client.send_special_key.side_effect = Exception("Tmux send error")
+ mock_multiplexer.send_special_key.side_effect = Exception("Tmux send error")
# Act & Assert
with pytest.raises(Exception, match="Tmux send error"):
send_special_key(terminal_id, "Escape")
@patch("cli_agent_orchestrator.services.terminal_service.update_last_active")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
def test_send_special_key_escape(
- self, mock_get_metadata, mock_tmux_client, mock_update_last_active
+ self, mock_get_metadata, mock_update_last_active, mock_multiplexer
):
"""Test that send_special_key can send Escape key."""
# Arrange
@@ -211,6 +267,106 @@ def test_send_special_key_escape(
# Assert
assert result is True
- mock_tmux_client.send_special_key.assert_called_once_with(
+ mock_multiplexer.send_special_key.assert_called_once_with(
"cao-session", "developer-mnop", "Escape"
)
+
+
+class TestCreateTerminalLaunchSpec:
+ """Tests for LaunchSpec pass-through in create_terminal."""
+
+ def test_create_terminal_new_session_forwards_launch_spec(
+ self, mock_multiplexer, create_terminal_dependencies
+ ):
+ """New-session path should forward the provided launch_spec verbatim."""
+ spec = LaunchSpec(argv=["codex", "--yolo"], env={"A": "1"}, provider="codex")
+ mock_multiplexer.session_exists.return_value = False
+
+ create_terminal(
+ provider="codex",
+ agent_profile="developer",
+ session_name="alpha",
+ new_session=True,
+ working_directory="/workspace",
+ launch_spec=spec,
+ )
+
+ mock_multiplexer.create_session.assert_called_once_with(
+ "cao-alpha",
+ "developer-window",
+ "deadbeef",
+ "/workspace",
+ launch_spec=spec,
+ )
+
+ def test_create_terminal_existing_session_forwards_launch_spec(
+ self, mock_multiplexer, create_terminal_dependencies
+ ):
+ """Existing-session path should forward the provided launch_spec verbatim."""
+ spec = LaunchSpec(argv=["q"], provider="q_cli")
+ mock_multiplexer.session_exists.return_value = True
+ mock_multiplexer.create_window.return_value = "renamed-window"
+
+ terminal = create_terminal(
+ provider="q_cli",
+ agent_profile="developer",
+ session_name="cao-existing",
+ new_session=False,
+ working_directory="/workspace",
+ launch_spec=spec,
+ )
+
+ mock_multiplexer.create_window.assert_called_once_with(
+ "cao-existing",
+ "developer-window",
+ "deadbeef",
+ "/workspace",
+ launch_spec=spec,
+ )
+ assert terminal.name == "renamed-window"
+
+ def test_create_terminal_defaults_launch_spec_to_none(
+ self, mock_multiplexer, create_terminal_dependencies
+ ):
+ """Default create_terminal path should preserve launch_spec=None."""
+ mock_multiplexer.session_exists.return_value = False
+
+ create_terminal(
+ provider="codex",
+ agent_profile="developer",
+ session_name="beta",
+ new_session=True,
+ )
+
+ mock_multiplexer.create_session.assert_called_once_with(
+ "cao-beta",
+ "developer-window",
+ "deadbeef",
+ None,
+ launch_spec=None,
+ )
+
+ def test_create_terminal_uses_provider_launch_spec_when_not_explicit(
+ self, mock_multiplexer, create_terminal_dependencies
+ ):
+ """When no explicit launch_spec is provided, service should ask the provider."""
+ mock_multiplexer.session_exists.return_value = False
+ provider_instance = create_terminal_dependencies["provider_instance"]
+ spec = LaunchSpec(argv=["codex.cmd", "--yolo"], provider="codex")
+ provider_instance.get_launch_spec.return_value = spec
+
+ create_terminal(
+ provider="codex",
+ agent_profile="developer",
+ session_name="gamma",
+ new_session=True,
+ )
+
+ provider_instance.get_launch_spec.assert_called_once_with(mock_multiplexer)
+ mock_multiplexer.create_session.assert_called_once_with(
+ "cao-gamma",
+ "developer-window",
+ "deadbeef",
+ None,
+ launch_spec=spec,
+ )
diff --git a/test/services/test_terminal_service_coverage.py b/test/services/test_terminal_service_coverage.py
index 4dc776a83..fb78a52b3 100644
--- a/test/services/test_terminal_service_coverage.py
+++ b/test/services/test_terminal_service_coverage.py
@@ -15,7 +15,7 @@ class TestCreateTerminalCleanup:
"""Test error cleanup paths in create_terminal."""
@patch("cli_agent_orchestrator.services.terminal_service.TERMINAL_LOG_DIR")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
@patch(
@@ -33,14 +33,15 @@ def test_cleanup_on_provider_init_failure(
mock_wname,
mock_db_create,
mock_pm,
- mock_tmux,
+ mock_get_multiplexer,
mock_log_dir,
):
"""When provider.initialize() fails, cleanup should kill session and cleanup provider."""
from cli_agent_orchestrator.services.terminal_service import create_terminal
- mock_tmux.session_exists.return_value = False
- mock_tmux.create_session.return_value = "w1"
+ mock_multiplexer = mock_get_multiplexer.return_value
+ mock_multiplexer.session_exists.return_value = False
+ mock_multiplexer.create_session.return_value = "w1"
mock_load_profile.return_value = AgentProfile(name="dev", description="Dev")
mock_provider = MagicMock()
@@ -57,10 +58,10 @@ def test_cleanup_on_provider_init_failure(
)
mock_pm.cleanup_provider.assert_called_once_with("tid1")
- mock_tmux.kill_session.assert_called_once()
+ mock_multiplexer.kill_session.assert_called_once()
@patch("cli_agent_orchestrator.services.terminal_service.TERMINAL_LOG_DIR")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
@patch(
@@ -78,14 +79,15 @@ def test_cleanup_on_failure_does_not_kill_session_if_not_new(
mock_wname,
mock_db_create,
mock_pm,
- mock_tmux,
+ mock_get_multiplexer,
mock_log_dir,
):
"""When new_session=False, cleanup should NOT kill the session."""
from cli_agent_orchestrator.services.terminal_service import create_terminal
- mock_tmux.session_exists.return_value = True
- mock_tmux.create_window.return_value = "w1"
+ mock_multiplexer = mock_get_multiplexer.return_value
+ mock_multiplexer.session_exists.return_value = True
+ mock_multiplexer.create_window.return_value = "w1"
mock_load_profile.return_value = AgentProfile(name="dev", description="Dev")
mock_provider = MagicMock()
@@ -102,10 +104,10 @@ def test_cleanup_on_failure_does_not_kill_session_if_not_new(
)
mock_pm.cleanup_provider.assert_called_once()
- mock_tmux.kill_session.assert_not_called()
+ mock_multiplexer.kill_session.assert_not_called()
@patch("cli_agent_orchestrator.services.terminal_service.TERMINAL_LOG_DIR")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
@patch(
@@ -123,21 +125,22 @@ def test_cleanup_ignores_cleanup_errors(
mock_wname,
mock_db_create,
mock_pm,
- mock_tmux,
+ mock_get_multiplexer,
mock_log_dir,
):
"""Cleanup errors should be swallowed, original error re-raised."""
from cli_agent_orchestrator.services.terminal_service import create_terminal
- mock_tmux.session_exists.return_value = False
- mock_tmux.create_session.return_value = "w1"
+ mock_multiplexer = mock_get_multiplexer.return_value
+ mock_multiplexer.session_exists.return_value = False
+ mock_multiplexer.create_session.return_value = "w1"
mock_load_profile.return_value = AgentProfile(name="dev", description="Dev")
mock_provider = MagicMock()
mock_provider.initialize.side_effect = Exception("original error")
mock_pm.create_provider.return_value = mock_provider
mock_pm.cleanup_provider.side_effect = Exception("cleanup error")
- mock_tmux.kill_session.side_effect = Exception("kill error")
+ mock_multiplexer.kill_session.side_effect = Exception("kill error")
with pytest.raises(Exception, match="original error"):
create_terminal(
@@ -149,7 +152,7 @@ def test_cleanup_ignores_cleanup_errors(
)
@patch("cli_agent_orchestrator.services.terminal_service.TERMINAL_LOG_DIR")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
@patch(
@@ -167,14 +170,15 @@ def test_session_prefix_added_for_new_session(
mock_wname,
mock_db_create,
mock_pm,
- mock_tmux,
+ mock_get_multiplexer,
mock_log_dir,
):
"""New sessions without the prefix get it added automatically."""
from cli_agent_orchestrator.services.terminal_service import create_terminal
- mock_tmux.session_exists.return_value = False
- mock_tmux.create_session.return_value = "w1"
+ mock_multiplexer = mock_get_multiplexer.return_value
+ mock_multiplexer.session_exists.return_value = False
+ mock_multiplexer.create_session.return_value = "w1"
mock_load_profile.return_value = AgentProfile(name="dev", description="Dev")
mock_provider = MagicMock()
mock_pm.create_provider.return_value = mock_provider
@@ -189,7 +193,7 @@ def test_session_prefix_added_for_new_session(
)
# session_name should have been prefixed with "cao-"
- args = mock_tmux.create_session.call_args
+ args = mock_multiplexer.create_session.call_args
assert args[0][0] == "cao-myses"
@@ -201,7 +205,7 @@ class TestCreateTerminalSessionCleanupGuard:
"""
@patch("cli_agent_orchestrator.services.terminal_service.TERMINAL_LOG_DIR")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
@patch(
@@ -219,13 +223,14 @@ def test_no_kill_session_when_session_already_exists(
mock_wname,
mock_db_create,
mock_pm,
- mock_tmux,
+ mock_get_multiplexer,
mock_log_dir,
):
"""When session already exists, cleanup must NOT kill the pre-existing session."""
from cli_agent_orchestrator.services.terminal_service import create_terminal
- mock_tmux.session_exists.return_value = True # session already exists
+ mock_multiplexer = mock_get_multiplexer.return_value
+ mock_multiplexer.session_exists.return_value = True # session already exists
with pytest.raises(ValueError, match="already exists"):
create_terminal(
@@ -236,10 +241,10 @@ def test_no_kill_session_when_session_already_exists(
allowed_tools=["*"],
)
- mock_tmux.kill_session.assert_not_called()
+ mock_multiplexer.kill_session.assert_not_called()
@patch("cli_agent_orchestrator.services.terminal_service.TERMINAL_LOG_DIR")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
@patch(
@@ -257,14 +262,15 @@ def test_kill_session_when_we_created_it_and_later_step_fails(
mock_wname,
mock_db_create,
mock_pm,
- mock_tmux,
+ mock_get_multiplexer,
mock_log_dir,
):
"""When we successfully created the session but a later step fails, cleanup SHOULD kill it."""
from cli_agent_orchestrator.services.terminal_service import create_terminal
- mock_tmux.session_exists.return_value = False
- mock_tmux.create_session.return_value = "w1"
+ mock_multiplexer = mock_get_multiplexer.return_value
+ mock_multiplexer.session_exists.return_value = False
+ mock_multiplexer.create_session.return_value = "w1"
mock_load_profile.return_value = AgentProfile(name="dev", description="Dev")
mock_provider = MagicMock()
@@ -280,7 +286,7 @@ def test_kill_session_when_we_created_it_and_later_step_fails(
allowed_tools=["*"],
)
- mock_tmux.kill_session.assert_called_once()
+ mock_multiplexer.kill_session.assert_called_once()
class TestDeleteTerminal:
@@ -288,51 +294,54 @@ class TestDeleteTerminal:
@patch("cli_agent_orchestrator.services.terminal_service.db_delete_terminal", return_value=True)
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
- def test_delete_terminal_full_path(self, mock_meta, mock_tmux, mock_pm, mock_db_del):
+ def test_delete_terminal_full_path(self, mock_meta, mock_get_multiplexer, mock_pm, mock_db_del):
"""Delete should stop pipe-pane, kill window, cleanup provider, delete DB record."""
from cli_agent_orchestrator.services.terminal_service import delete_terminal
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_meta.return_value = {"tmux_session": "ses", "tmux_window": "win"}
result = delete_terminal("tid1")
assert result is True
- mock_tmux.stop_pipe_pane.assert_called_once_with("ses", "win")
- mock_tmux.kill_window.assert_called_once_with("ses", "win")
+ mock_multiplexer.stop_pipe_pane.assert_called_once_with("ses", "win")
+ mock_multiplexer.kill_window.assert_called_once_with("ses", "win")
mock_pm.cleanup_provider.assert_called_once_with("tid1")
@patch("cli_agent_orchestrator.services.terminal_service.db_delete_terminal", return_value=True)
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
def test_delete_terminal_pipe_pane_failure_continues(
- self, mock_meta, mock_tmux, mock_pm, mock_db_del
+ self, mock_meta, mock_get_multiplexer, mock_pm, mock_db_del
):
"""Pipe-pane failure should be logged and not block deletion."""
from cli_agent_orchestrator.services.terminal_service import delete_terminal
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_meta.return_value = {"tmux_session": "ses", "tmux_window": "win"}
- mock_tmux.stop_pipe_pane.side_effect = Exception("pipe error")
+ mock_multiplexer.stop_pipe_pane.side_effect = Exception("pipe error")
result = delete_terminal("tid1")
assert result is True
- mock_tmux.kill_window.assert_called_once()
+ mock_multiplexer.kill_window.assert_called_once()
@patch("cli_agent_orchestrator.services.terminal_service.db_delete_terminal", return_value=True)
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
def test_delete_terminal_kill_window_failure_continues(
- self, mock_meta, mock_tmux, mock_pm, mock_db_del
+ self, mock_meta, mock_get_multiplexer, mock_pm, mock_db_del
):
"""Kill-window failure should be logged and not block deletion."""
from cli_agent_orchestrator.services.terminal_service import delete_terminal
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_meta.return_value = {"tmux_session": "ses", "tmux_window": "win"}
- mock_tmux.kill_window.side_effect = Exception("kill error")
+ mock_multiplexer.kill_window.side_effect = Exception("kill error")
result = delete_terminal("tid1")
@@ -341,12 +350,15 @@ def test_delete_terminal_kill_window_failure_continues(
@patch("cli_agent_orchestrator.services.terminal_service.db_delete_terminal")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
- def test_delete_terminal_db_failure_raises(self, mock_meta, mock_tmux, mock_pm, mock_db_del):
+ def test_delete_terminal_db_failure_raises(
+ self, mock_meta, mock_get_multiplexer, mock_pm, mock_db_del
+ ):
"""DB delete failure should propagate."""
from cli_agent_orchestrator.services.terminal_service import delete_terminal
+ mock_get_multiplexer.return_value
mock_meta.return_value = {"tmux_session": "ses", "tmux_window": "win"}
mock_db_del.side_effect = Exception("DB error")
diff --git a/test/services/test_terminal_service_full.py b/test/services/test_terminal_service_full.py
index e49281f2e..578e1568a 100644
--- a/test/services/test_terminal_service_full.py
+++ b/test/services/test_terminal_service_full.py
@@ -24,7 +24,7 @@ class TestCreateTerminal:
@patch("cli_agent_orchestrator.services.terminal_service.TERMINAL_LOG_DIR")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.generate_window_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_session_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_terminal_id")
@@ -35,16 +35,17 @@ def test_create_terminal_new_session(
mock_gen_id,
mock_gen_session,
mock_gen_window,
- mock_tmux,
+ mock_get_multiplexer,
mock_db_create,
mock_provider_manager,
mock_log_dir,
):
"""Test creating terminal with new session."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_gen_id.return_value = "test1234"
mock_gen_session.return_value = "cao-session"
mock_gen_window.return_value = "developer-abcd"
- mock_tmux.session_exists.return_value = False
+ mock_multiplexer.session_exists.return_value = False
mock_load_profile.return_value = AgentProfile(name="developer", description="Developer")
mock_provider = MagicMock()
mock_provider_manager.create_provider.return_value = mock_provider
@@ -54,13 +55,14 @@ def test_create_terminal_new_session(
result = create_terminal("kiro_cli", "developer", new_session=True)
assert result.id == "test1234"
- mock_tmux.create_session.assert_called_once()
+ mock_multiplexer.create_session.assert_called_once()
+ mock_provider.get_launch_spec.assert_called_once_with(mock_multiplexer)
mock_provider.initialize.assert_called_once()
@patch("cli_agent_orchestrator.services.terminal_service.TERMINAL_LOG_DIR")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.generate_window_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_session_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_terminal_id")
@@ -71,17 +73,18 @@ def test_create_terminal_existing_session(
mock_gen_id,
mock_gen_session,
mock_gen_window,
- mock_tmux,
+ mock_get_multiplexer,
mock_db_create,
mock_provider_manager,
mock_log_dir,
):
"""Test creating terminal in existing session."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_gen_id.return_value = "test1234"
mock_gen_session.return_value = "cao-session"
mock_gen_window.return_value = "developer-abcd"
- mock_tmux.session_exists.return_value = True
- mock_tmux.create_window.return_value = "developer-abcd"
+ mock_multiplexer.session_exists.return_value = True
+ mock_multiplexer.create_window.return_value = "developer-abcd"
mock_load_profile.return_value = AgentProfile(name="developer", description="Developer")
mock_provider = MagicMock()
mock_provider_manager.create_provider.return_value = mock_provider
@@ -91,39 +94,42 @@ def test_create_terminal_existing_session(
result = create_terminal("kiro_cli", "developer", session_name="cao-existing")
assert result.id == "test1234"
- mock_tmux.create_window.assert_called_once()
+ mock_provider.get_launch_spec.assert_called_once_with(mock_multiplexer)
+ mock_multiplexer.create_window.assert_called_once()
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.generate_window_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_session_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_terminal_id")
@patch("cli_agent_orchestrator.services.terminal_service.load_agent_profile")
def test_create_terminal_session_not_found(
- self, mock_load_profile, mock_gen_id, mock_gen_session, mock_gen_window, mock_tmux
+ self, mock_load_profile, mock_gen_id, mock_gen_session, mock_gen_window, mock_get_multiplexer
):
"""Test creating terminal when session not found."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_gen_id.return_value = "test1234"
mock_gen_session.return_value = "cao-session"
mock_gen_window.return_value = "developer-abcd"
- mock_tmux.session_exists.return_value = False
+ mock_multiplexer.session_exists.return_value = False
mock_load_profile.return_value = AgentProfile(name="developer", description="Developer")
with pytest.raises(ValueError, match="not found"):
create_terminal("kiro_cli", "developer", session_name="cao-nonexistent")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.generate_window_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_session_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_terminal_id")
@patch("cli_agent_orchestrator.services.terminal_service.load_agent_profile")
def test_create_terminal_session_already_exists(
- self, mock_load_profile, mock_gen_id, mock_gen_session, mock_gen_window, mock_tmux
+ self, mock_load_profile, mock_gen_id, mock_gen_session, mock_gen_window, mock_get_multiplexer
):
"""Test creating terminal when session already exists."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_gen_id.return_value = "test1234"
mock_gen_session.return_value = "cao-session"
mock_gen_window.return_value = "developer-abcd"
- mock_tmux.session_exists.return_value = True
+ mock_multiplexer.session_exists.return_value = True
mock_load_profile.return_value = AgentProfile(name="developer", description="Developer")
with pytest.raises(ValueError, match="already exists"):
@@ -132,7 +138,7 @@ def test_create_terminal_session_already_exists(
@patch("cli_agent_orchestrator.services.terminal_service.TERMINAL_LOG_DIR")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.generate_window_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_session_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_terminal_id")
@@ -145,16 +151,17 @@ def test_create_terminal_appends_skill_catalog(
mock_gen_id,
mock_gen_session,
mock_gen_window,
- mock_tmux,
+ mock_get_multiplexer,
mock_db_create,
mock_provider_manager,
mock_log_dir,
):
"""Providers that consume runtime prompts should receive the global skill catalog."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_gen_id.return_value = "test1234"
mock_gen_session.return_value = "cao-session"
mock_gen_window.return_value = "developer-abcd"
- mock_tmux.session_exists.return_value = False
+ mock_multiplexer.session_exists.return_value = False
mock_load_profile.return_value = AgentProfile(
name="developer",
description="Developer",
@@ -190,7 +197,7 @@ def test_create_terminal_appends_skill_catalog(
@patch("cli_agent_orchestrator.services.terminal_service.TERMINAL_LOG_DIR")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.generate_window_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_session_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_terminal_id")
@@ -203,16 +210,17 @@ def test_create_terminal_without_skills_is_unchanged(
mock_gen_id,
mock_gen_session,
mock_gen_window,
- mock_tmux,
+ mock_get_multiplexer,
mock_db_create,
mock_provider_manager,
mock_log_dir,
):
"""Providers should receive an empty skill prompt when no skills are installed."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_gen_id.return_value = "test1234"
mock_gen_session.return_value = "cao-session"
mock_gen_window.return_value = "developer-abcd"
- mock_tmux.session_exists.return_value = False
+ mock_multiplexer.session_exists.return_value = False
mock_load_profile.return_value = AgentProfile(
name="developer",
description="Developer",
@@ -234,7 +242,7 @@ def test_create_terminal_without_skills_is_unchanged(
@patch("cli_agent_orchestrator.services.terminal_service.TERMINAL_LOG_DIR")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.generate_window_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_session_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_terminal_id")
@@ -247,17 +255,18 @@ def test_create_terminal_does_not_pass_skill_prompt_to_non_runtime_provider(
mock_gen_id,
mock_gen_session,
mock_gen_window,
- mock_tmux,
+ mock_get_multiplexer,
mock_db_create,
mock_provider_manager,
mock_log_dir,
provider_name,
):
"""Kiro, Q, and Copilot should receive skill_prompt=None."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_gen_id.return_value = "test1234"
mock_gen_session.return_value = "cao-session"
mock_gen_window.return_value = "developer-abcd"
- mock_tmux.session_exists.return_value = False
+ mock_multiplexer.session_exists.return_value = False
mock_load_profile.return_value = AgentProfile(
name="developer",
description="Developer",
@@ -283,7 +292,7 @@ def test_create_terminal_does_not_pass_skill_prompt_to_non_runtime_provider(
@patch("cli_agent_orchestrator.services.terminal_service.TERMINAL_LOG_DIR")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.generate_window_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_session_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_terminal_id")
@@ -296,16 +305,17 @@ def test_build_skill_catalog_called_for_runtime_prompt_provider(
mock_gen_id,
mock_gen_session,
mock_gen_window,
- mock_tmux,
+ mock_get_multiplexer,
mock_db_create,
mock_provider_manager,
mock_log_dir,
):
"""build_skill_catalog() is called exactly once for runtime-prompt providers."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_gen_id.return_value = "test1234"
mock_gen_session.return_value = "cao-session"
mock_gen_window.return_value = "developer-abcd"
- mock_tmux.session_exists.return_value = False
+ mock_multiplexer.session_exists.return_value = False
mock_load_profile.return_value = AgentProfile(
name="developer", description="Developer", system_prompt="You are the developer."
)
@@ -321,7 +331,7 @@ def test_build_skill_catalog_called_for_runtime_prompt_provider(
@patch("cli_agent_orchestrator.services.terminal_service.TERMINAL_LOG_DIR")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.generate_window_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_session_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_terminal_id")
@@ -334,7 +344,7 @@ def test_build_skill_catalog_not_called_for_native_or_baked_provider(
mock_gen_id,
mock_gen_session,
mock_gen_window,
- mock_tmux,
+ mock_get_multiplexer,
mock_db_create,
mock_provider_manager,
mock_log_dir,
@@ -342,10 +352,11 @@ def test_build_skill_catalog_not_called_for_native_or_baked_provider(
):
"""build_skill_catalog() is never called for providers that deliver skills natively or
at install time — OpenCode (symlink), Kiro (skill:// resources), Q, Copilot."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_gen_id.return_value = "test1234"
mock_gen_session.return_value = "cao-session"
mock_gen_window.return_value = "developer-abcd"
- mock_tmux.session_exists.return_value = False
+ mock_multiplexer.session_exists.return_value = False
mock_load_profile.return_value = AgentProfile(
name="developer", description="Developer", system_prompt="Base prompt"
)
@@ -359,7 +370,7 @@ def test_build_skill_catalog_not_called_for_native_or_baked_provider(
@patch("cli_agent_orchestrator.services.terminal_service.TERMINAL_LOG_DIR")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
@patch("cli_agent_orchestrator.services.terminal_service.db_create_terminal")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.generate_window_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_session_name")
@patch("cli_agent_orchestrator.services.terminal_service.generate_terminal_id")
@@ -370,16 +381,17 @@ def test_create_terminal_profile_not_found(
mock_gen_id,
mock_gen_session,
mock_gen_window,
- mock_tmux,
+ mock_get_multiplexer,
mock_db_create,
mock_provider_manager,
mock_log_dir,
):
"""Terminal creation succeeds when agent profile is not in CAO store (e.g. JSON-only profiles)."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_gen_id.return_value = "test1234"
mock_gen_session.return_value = "cao-session"
mock_gen_window.return_value = "my-agent-abcd"
- mock_tmux.session_exists.return_value = False
+ mock_multiplexer.session_exists.return_value = False
mock_load_profile.side_effect = FileNotFoundError("Agent profile not found: my-agent")
mock_provider = MagicMock()
mock_provider_manager.create_provider.return_value = mock_provider
@@ -447,15 +459,16 @@ def test_get_terminal_no_provider(self, mock_get_metadata, mock_provider_manager
class TestGetWorkingDirectory:
"""Tests for get_working_directory function."""
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
- def test_get_working_directory_success(self, mock_get_metadata, mock_tmux):
+ def test_get_working_directory_success(self, mock_get_metadata, mock_get_multiplexer):
"""Test getting working directory successfully."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_get_metadata.return_value = {
"tmux_session": "cao-session",
"tmux_window": "developer-abcd",
}
- mock_tmux.get_pane_working_directory.return_value = "/home/user/project"
+ mock_multiplexer.get_pane_working_directory.return_value = "/home/user/project"
result = get_working_directory("test1234")
@@ -475,10 +488,11 @@ class TestSendInput:
@patch("cli_agent_orchestrator.services.terminal_service.update_last_active")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
- def test_send_input_success(self, mock_get_metadata, mock_tmux, mock_pm, mock_update):
+ def test_send_input_success(self, mock_get_metadata, mock_get_multiplexer, mock_pm, mock_update):
"""Test sending input successfully."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_get_metadata.return_value = {
"tmux_session": "cao-session",
"tmux_window": "developer-abcd",
@@ -489,7 +503,7 @@ def test_send_input_success(self, mock_get_metadata, mock_tmux, mock_pm, mock_up
result = send_input("test1234", "test message")
assert result is True
- mock_tmux.send_keys.assert_called_once_with(
+ mock_multiplexer.send_keys.assert_called_once_with(
"cao-session", "developer-abcd", "test message", enter_count=2
)
mock_update.assert_called_once_with("test1234")
@@ -506,30 +520,32 @@ def test_send_input_not_found(self, mock_get_metadata):
class TestGetOutput:
"""Tests for get_output function."""
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
- def test_get_output_full(self, mock_get_metadata, mock_tmux):
+ def test_get_output_full(self, mock_get_metadata, mock_get_multiplexer):
"""Test getting full output."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_get_metadata.return_value = {
"tmux_session": "cao-session",
"tmux_window": "developer-abcd",
}
- mock_tmux.get_history.return_value = "full terminal output"
+ mock_multiplexer.get_history.return_value = "full terminal output"
result = get_output("test1234", OutputMode.FULL)
assert result == "full terminal output"
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
- def test_get_output_last(self, mock_get_metadata, mock_tmux, mock_provider_manager):
+ def test_get_output_last(self, mock_get_metadata, mock_get_multiplexer, mock_provider_manager):
"""Test getting last message."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_get_metadata.return_value = {
"tmux_session": "cao-session",
"tmux_window": "developer-abcd",
}
- mock_tmux.get_history.return_value = "full terminal output"
+ mock_multiplexer.get_history.return_value = "full terminal output"
mock_provider = MagicMock()
mock_provider.extract_last_message_from_script.return_value = "last message"
mock_provider_manager.get_provider.return_value = mock_provider
@@ -547,15 +563,18 @@ def test_get_output_not_found(self, mock_get_metadata):
get_output("nonexistent")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
- def test_get_output_last_no_provider(self, mock_get_metadata, mock_tmux, mock_provider_manager):
+ def test_get_output_last_no_provider(
+ self, mock_get_metadata, mock_get_multiplexer, mock_provider_manager
+ ):
"""Test getting last message when provider not found."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_get_metadata.return_value = {
"tmux_session": "cao-session",
"tmux_window": "developer-abcd",
}
- mock_tmux.get_history.return_value = "full output"
+ mock_multiplexer.get_history.return_value = "full output"
mock_provider_manager.get_provider.return_value = None
with pytest.raises(ValueError, match="Provider not found"):
@@ -567,12 +586,13 @@ class TestDeleteTerminal:
@patch("cli_agent_orchestrator.services.terminal_service.db_delete_terminal")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
def test_delete_terminal_success(
- self, mock_get_metadata, mock_tmux, mock_provider_manager, mock_db_delete
+ self, mock_get_metadata, mock_get_multiplexer, mock_provider_manager, mock_db_delete
):
"""Test deleting terminal successfully."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_get_metadata.return_value = {
"tmux_session": "cao-session",
"tmux_window": "developer-abcd",
@@ -582,22 +602,23 @@ def test_delete_terminal_success(
result = delete_terminal("test1234")
assert result is True
- mock_tmux.stop_pipe_pane.assert_called_once()
+ mock_multiplexer.stop_pipe_pane.assert_called_once()
mock_provider_manager.cleanup_provider.assert_called_once_with("test1234")
@patch("cli_agent_orchestrator.services.terminal_service.db_delete_terminal")
@patch("cli_agent_orchestrator.services.terminal_service.provider_manager")
- @patch("cli_agent_orchestrator.services.terminal_service.tmux_client")
+ @patch("cli_agent_orchestrator.services.terminal_service.get_multiplexer")
@patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata")
def test_delete_terminal_pipe_pane_error(
- self, mock_get_metadata, mock_tmux, mock_provider_manager, mock_db_delete
+ self, mock_get_metadata, mock_get_multiplexer, mock_provider_manager, mock_db_delete
):
"""Test deleting terminal when stop_pipe_pane fails."""
+ mock_multiplexer = mock_get_multiplexer.return_value
mock_get_metadata.return_value = {
"tmux_session": "cao-session",
"tmux_window": "developer-abcd",
}
- mock_tmux.stop_pipe_pane.side_effect = Exception("Pipe error")
+ mock_multiplexer.stop_pipe_pane.side_effect = Exception("Pipe error")
mock_db_delete.return_value = True
# Should not raise, just warn
diff --git a/test/smoke/README.md b/test/smoke/README.md
new file mode 100644
index 000000000..ec612e3cd
--- /dev/null
+++ b/test/smoke/README.md
@@ -0,0 +1,34 @@
+# Smoke harness
+
+Real-world tests for the WezTerm multiplexer backend. NOT run by default.
+
+## What this exercises
+
+- spawn / send / get-text / kill on a real WezTerm pane
+- Claude trust-prompt acceptance via `send_special_key("Enter")`
+- Codex direct spawn via `build_launch_spec` (resolved Windows shim)
+- inbox `pipe_pane` capture at the 500 ms polling cadence
+
+## Prerequisites
+
+- WezTerm GUI running, `wezterm` on PATH (CLI subcommand reachable)
+- `claude` on PATH (Claude CLI)
+- `codex` / `codex.cmd` on PATH (Codex CLI; Windows users may need the Scoop shim)
+
+Tests skip with a clear message when any prerequisite is missing.
+
+## Running
+
+```bash
+pytest -m smoke
+pytest test/smoke -m smoke
+pytest test/smoke -m smoke -v
+```
+
+Default `pytest` invocations DO NOT run these because the project default
+`addopts` excludes the `smoke` marker.
+
+## CI
+
+Skip in CI by default. Optional dedicated workflow: install WezTerm +
+provider CLIs, then run `pytest -m smoke` on a Windows runner.
diff --git a/test/smoke/__init__.py b/test/smoke/__init__.py
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/test/smoke/__init__.py
@@ -0,0 +1 @@
+
diff --git a/test/smoke/conftest.py b/test/smoke/conftest.py
new file mode 100644
index 000000000..9a9ea000c
--- /dev/null
+++ b/test/smoke/conftest.py
@@ -0,0 +1,87 @@
+import os
+import shutil
+import time
+
+import pytest
+
+from cli_agent_orchestrator.multiplexers.wezterm import WezTermMultiplexer
+
+
+def _which_or_skip(name: str) -> str:
+ path = shutil.which(name) or shutil.which(f"{name}.cmd")
+ if not path:
+ pytest.skip(f"{name} not on PATH; skipping smoke test")
+ return path
+
+
+def _resolve_wezterm_bin() -> str:
+ """Resolve the wezterm CLI executable for smoke testing.
+
+ Resolution order:
+ 1. ``CAO_WEZTERM_BIN`` env override — explicit path for tests/CI.
+ 2. ``shutil.which("wezterm")`` — system PATH.
+ 3. ``WEZTERM_EXECUTABLE_DIR`` + ``wezterm.exe`` — set by the WezTerm
+ GUI itself when CAO runs inside a WezTerm pane (note: portable
+ extracts are not on PATH but expose this var). Fall through to
+ ``wezterm`` (no extension) for non-Windows installs.
+ """
+ override = os.environ.get("CAO_WEZTERM_BIN")
+ if override:
+ return override
+
+ found = shutil.which("wezterm") or shutil.which("wezterm.cmd")
+ if found:
+ return found
+
+ install_dir = os.environ.get("WEZTERM_EXECUTABLE_DIR")
+ if install_dir:
+ for candidate in ("wezterm.exe", "wezterm"):
+ full = os.path.join(install_dir, candidate)
+ if os.path.isfile(full):
+ return full
+
+ pytest.skip(
+ "wezterm not resolved; set CAO_WEZTERM_BIN, add to PATH, or run "
+ "inside a WezTerm pane (WEZTERM_EXECUTABLE_DIR)"
+ )
+
+
+@pytest.fixture(scope="session")
+def wezterm_bin() -> str:
+ return _resolve_wezterm_bin()
+
+
+@pytest.fixture(scope="session")
+def claude_bin() -> str:
+ return _which_or_skip("claude")
+
+
+@pytest.fixture(scope="session")
+def codex_bin() -> str:
+ return _which_or_skip("codex")
+
+
+@pytest.fixture
+def multiplexer(wezterm_bin: str) -> WezTermMultiplexer:
+ return WezTermMultiplexer(wezterm_bin=wezterm_bin)
+
+
+def _wait_for_text(
+ multiplexer: WezTermMultiplexer,
+ session: str,
+ window: str,
+ needle: str,
+ timeout: float = 15.0,
+) -> bool:
+ deadline = time.monotonic() + timeout
+ while time.monotonic() < deadline:
+ text = multiplexer.get_history(session, window)
+ if needle in text:
+ return True
+ time.sleep(0.5)
+ return False
+
+
+@pytest.fixture
+def wait_for_text():
+ return _wait_for_text
diff --git a/test/smoke/test_claude_startup.py b/test/smoke/test_claude_startup.py
new file mode 100644
index 000000000..9e857304c
--- /dev/null
+++ b/test/smoke/test_claude_startup.py
@@ -0,0 +1,42 @@
+import re
+import time
+
+import pytest
+
+from cli_agent_orchestrator.multiplexers.base import LaunchSpec
+from cli_agent_orchestrator.providers.claude_code import (
+ IDLE_PROMPT_PATTERN,
+ TRUST_PROMPT_PATTERN,
+)
+
+pytestmark = pytest.mark.smoke
+
+
+def test_trust_prompt_acceptance(multiplexer, claude_bin, tmp_path):
+ spec = LaunchSpec(argv=(claude_bin,), provider="claude")
+ multiplexer.create_session(
+ session_name="cao-smoke-claude",
+ window_name="claude-0",
+ terminal_id="smoke-claude",
+ working_directory=str(tmp_path),
+ launch_spec=spec,
+ )
+ try:
+ for _ in range(30):
+ text = multiplexer.get_history("cao-smoke-claude", "claude-0")
+ if re.search(TRUST_PROMPT_PATTERN, text):
+ break
+ time.sleep(1)
+ else:
+ pytest.fail("Claude trust prompt not seen in 30s")
+
+ multiplexer.send_special_key("cao-smoke-claude", "claude-0", "Enter")
+
+ for _ in range(30):
+ text = multiplexer.get_history("cao-smoke-claude", "claude-0")
+ if re.search(IDLE_PROMPT_PATTERN, text):
+ return
+ time.sleep(1)
+ pytest.fail("Claude idle prompt not seen after trust accept")
+ finally:
+ multiplexer.kill_session("cao-smoke-claude")
diff --git a/test/smoke/test_codex_direct_spawn.py b/test/smoke/test_codex_direct_spawn.py
new file mode 100644
index 000000000..718c30a18
--- /dev/null
+++ b/test/smoke/test_codex_direct_spawn.py
@@ -0,0 +1,47 @@
+import sys
+
+import pytest
+
+from cli_agent_orchestrator.multiplexers.launch import build_launch_spec
+from cli_agent_orchestrator.providers.codex import (
+ CODEX_WELCOME_PATTERN,
+ TRUST_PROMPT_PATTERN,
+)
+
+pytestmark = pytest.mark.smoke
+
+
+def test_codex_direct_spawn_two_step_send(multiplexer, codex_bin, tmp_path, wait_for_text):
+ # Mirror CodexProvider._build_codex_argv: without --no-alt-screen, output goes
+ # to the alt-screen and `wezterm cli get-text` (scrollback) sees nothing.
+ flags = ["--yolo", "--no-alt-screen", "--disable", "shell_snapshot"]
+ if sys.platform == "win32":
+ flags = ["-c", "hooks=[]", *flags]
+ spec = build_launch_spec(
+ "codex",
+ [codex_bin, *flags],
+ platform="windows" if sys.platform == "win32" else "unix",
+ )
+ multiplexer.create_session(
+ session_name="cao-smoke-codex",
+ window_name="codex-0",
+ terminal_id="smoke-codex",
+ working_directory=str(tmp_path),
+ launch_spec=spec,
+ )
+ try:
+ # Codex shows a workspace trust prompt on first open of an unknown
+ # directory (tmp_path). Mirror CodexProvider._handle_trust_prompt:
+ # wait for the trust banner, dismiss with Enter, then confirm the
+ # welcome banner appears before driving the composer.
+ if wait_for_text(
+ multiplexer, "cao-smoke-codex", "codex-0", TRUST_PROMPT_PATTERN, timeout=15
+ ):
+ multiplexer.send_special_key("cao-smoke-codex", "codex-0", "Enter")
+ assert wait_for_text(
+ multiplexer, "cao-smoke-codex", "codex-0", CODEX_WELCOME_PATTERN, timeout=15
+ ), "Codex never reached its welcome banner"
+ multiplexer.send_keys("cao-smoke-codex", "codex-0", "/help", enter_count=1)
+ assert wait_for_text(multiplexer, "cao-smoke-codex", "codex-0", "/help", timeout=15)
+ finally:
+ multiplexer.kill_session("cao-smoke-codex")
diff --git a/test/smoke/test_inbox_poller.py b/test/smoke/test_inbox_poller.py
new file mode 100644
index 000000000..ae61dab69
--- /dev/null
+++ b/test/smoke/test_inbox_poller.py
@@ -0,0 +1,33 @@
+import time
+
+import pytest
+
+pytestmark = pytest.mark.smoke
+
+
+def test_pipe_pane_captures_rapid_output(multiplexer, tmp_path):
+ log_path = tmp_path / "pane.log"
+ multiplexer.create_session(
+ session_name="cao-smoke-pipe",
+ window_name="bash",
+ terminal_id="smoke-pipe",
+ working_directory=str(tmp_path),
+ )
+ try:
+ multiplexer.pipe_pane("cao-smoke-pipe", "bash", str(log_path))
+ for i in range(5):
+ multiplexer.send_keys("cao-smoke-pipe", "bash", f"echo MARK-{i}", enter_count=1)
+
+ deadline = time.monotonic() + 10
+ while time.monotonic() < deadline:
+ text = log_path.read_text(encoding="utf-8") if log_path.exists() else ""
+ if all(f"MARK-{i}" in text for i in range(5)):
+ multiplexer.stop_pipe_pane("cao-smoke-pipe", "bash")
+ return
+ time.sleep(0.5)
+ pytest.fail(
+ "Poller did not capture all markers; last log:\n"
+ f"{log_path.read_text(encoding='utf-8') if log_path.exists() else ''}"
+ )
+ finally:
+ multiplexer.kill_session("cao-smoke-pipe")
diff --git a/test/smoke/test_wezterm_basics.py b/test/smoke/test_wezterm_basics.py
new file mode 100644
index 000000000..c8ccc881c
--- /dev/null
+++ b/test/smoke/test_wezterm_basics.py
@@ -0,0 +1,17 @@
+import pytest
+
+pytestmark = pytest.mark.smoke
+
+
+def test_spawn_send_get_kill(multiplexer, tmp_path, wait_for_text):
+ multiplexer.create_session(
+ session_name="cao-smoke-basics",
+ window_name="bash",
+ terminal_id="smoke-basics",
+ working_directory=str(tmp_path),
+ )
+ try:
+ multiplexer.send_keys("cao-smoke-basics", "bash", "echo hello-smoke", enter_count=1)
+ assert wait_for_text(multiplexer, "cao-smoke-basics", "bash", "hello-smoke", timeout=10)
+ finally:
+ multiplexer.kill_session("cao-smoke-basics")