From 63e0bfebbf7b36fa1da904ed9a85737898d41062 Mon Sep 17 00:00:00 2001 From: Anil Kumar Date: Mon, 8 Jun 2026 23:32:00 -0700 Subject: [PATCH 1/7] fix(herdr): ignore stale pane_closed events for reused compact pane_ids herdr 0.6.8 replays its entire pane_closed history on every fresh events.subscribe and reuses compact pane_ids when a tab is killed and a new tab takes the same index. CAO triggers a fresh subscribe on every terminal registration via _force_reconnect(), so a replayed close for an old incarnation of a pane arrives mapped to the live terminal now occupying that reused index. _handle_lifecycle_event then deleted a live terminal's DB record. Two observed symptoms from this one cause: - terminal deleted before its handoff message was sent -> message never delivered - terminal deleted milliseconds after send -> agent ran fine but the handoff poller hit 404 every second for minutes (log spam) Neither pane was actually closed; both stayed alive in herdr. Fix: before acting on pane_closed, confirm the terminal's tab label (tmux_window, unique per incarnation and already persisted) is genuinely gone from herdr. Live label means the close is a replayed event for an older incarnation -> skip. Missing label, including user-initiated close, means a real close -> delete. If herdr cannot be queried, fall toward delete so a possibly-closed terminal is never stranded. This mirrors the label-based liveness check _reconcile() already uses. Adds regression tests covering the reused-pane_id skip, the genuine-close delete, and the herdr-unreachable fallback. Assisted-by: Amazon Q Developer --- .../services/herdr_inbox_service.py | 57 +++++++++ test/backends/test_herdr_inbox_service.py | 113 ++++++++++++++++++ 2 files changed, 170 insertions(+) diff --git a/src/cli_agent_orchestrator/services/herdr_inbox_service.py b/src/cli_agent_orchestrator/services/herdr_inbox_service.py index cc7c669e..c9cc14be 100644 --- a/src/cli_agent_orchestrator/services/herdr_inbox_service.py +++ b/src/cli_agent_orchestrator/services/herdr_inbox_service.py @@ -522,6 +522,38 @@ async def _event_loop(self) -> None: if terminal_id not in self._working_since: self._working_since[terminal_id] = time.time() + def _label_still_live(self, window_name: str) -> bool: + """Return True if a tab with this label is still live in herdr. + + Used to disambiguate herdr's reused compact pane_ids on replayed + pane_closed events. The tab label is unique per incarnation, so a live + label means the close event refers to an older incarnation and is stale. + + Fails toward False (not live) when herdr can't be queried, so the caller + proceeds with cleanup rather than leaving a possibly-closed terminal. + """ + try: + result = subprocess.run( + ["herdr", "--session", self._herdr_session, "tab", "list"], + capture_output=True, + text=True, + timeout=10, + ) + if result.returncode != 0: + logger.warning( + "_label_still_live: herdr tab list failed (rc=%s): %s", + result.returncode, + result.stderr.strip(), + ) + return False + tab_data = json.loads(result.stdout) + tabs = tab_data.get("result", {}).get("tabs", []) + live_labels = {tab.get("label", "") for tab in tabs} + return window_name in live_labels + except (subprocess.SubprocessError, json.JSONDecodeError, KeyError, OSError) as e: + logger.warning("_label_still_live: could not query herdr (%s)", e) + return False + def _handle_lifecycle_event(self, event_type: str, data: dict) -> None: """Handle pane.closed and workspace.closed events.""" from cli_agent_orchestrator.backends.registry import get_backend @@ -541,6 +573,31 @@ def _handle_lifecycle_event(self, event_type: str, data: dict) -> None: meta = get_terminal_metadata(terminal_id) session_name = meta["tmux_session"] if meta else None + # Guard against herdr's compact pane_id reuse + event replay. + # + # herdr (0.6.8) reuses compact pane_ids when a tab is killed and a + # new tab takes the same index, AND replays the ENTIRE pane_closed + # history on every fresh events.subscribe (which register_terminal + # triggers via _force_reconnect). So a replayed close for an OLD + # incarnation of this pane_id arrives mapped to the terminal that now + # occupies the reused index — deleting a live terminal. + # + # The tab label (tmux_window) is unique per incarnation, so confirm + # the label is genuinely gone from herdr before deleting. If the + # label is still live, this close is stale (replayed) — ignore it. + # If herdr can't be queried, fall toward delete: never leave a + # terminal we think is open when it may actually be closed. + window_name = meta["tmux_window"] if meta else None + if window_name and self._label_still_live(window_name): + logger.info( + "pane.closed: ignoring stale close for %s (pane=%s) — " + "label %s still live in herdr (compact pane_id reused)", + terminal_id, + pane_id, + window_name, + ) + return + # Remove from maps self._pane_to_terminal.pop(pane_id, None) self._terminal_to_pane.pop(terminal_id, None) diff --git a/test/backends/test_herdr_inbox_service.py b/test/backends/test_herdr_inbox_service.py index 46081262..575475f5 100644 --- a/test/backends/test_herdr_inbox_service.py +++ b/test/backends/test_herdr_inbox_service.py @@ -744,6 +744,119 @@ def test_pane_closed_unknown_pane_is_noop(self, mock_meta, mock_delete): mock_delete.assert_not_called() mock_meta.assert_not_called() + @patch("cli_agent_orchestrator.services.herdr_inbox_service.subprocess.run") + @patch("cli_agent_orchestrator.clients.database.delete_terminal") + @patch("cli_agent_orchestrator.clients.database.get_terminal_metadata") + def test_pane_closed_skips_delete_when_label_still_live( + self, mock_meta, mock_delete, mock_run + ): + """Replayed close for a reused compact pane_id must NOT delete a live terminal. + + herdr (0.6.8) replays ALL historical pane_closed events on every fresh + events.subscribe and reuses compact pane_ids when a tab is killed and a + new tab takes the same index. So a replayed close for an OLD incarnation + of pane-3 arrives mapped to the NEW terminal now occupying pane-3. + + The tab label (tmux_window) is unique per incarnation, so when the label + is still live in herdr the close is stale and must be ignored. + """ + service = HerdrInboxService(socket_path="/tmp/test.sock") + # New incarnation: terminal "9d00610c" occupies reused pane-3 with a + # fresh unique label. + service.register_terminal("9d00610c", "pane-3", is_kiro=False) + mock_meta.return_value = { + "tmux_session": "cao-investigation", + "tmux_window": "sherlock-e8dc", + } + + # herdr tab list shows the label is still live (new incarnation alive). + tab_list_response = json.dumps( + {"result": {"tabs": [{"label": "sherlock-e8dc", "workspace_id": "ws-1"}]}} + ) + + def subprocess_side_effect(cmd, **_): + m = MagicMock() + m.returncode = 0 + m.stdout = tab_list_response + return m + + mock_run.side_effect = subprocess_side_effect + + service._handle_lifecycle_event("pane.closed", {"pane_id": "pane-3"}) + + # The live terminal must survive: no delete, maps intact. + mock_delete.assert_not_called() + assert service._pane_to_terminal.get("pane-3") == "9d00610c" + assert service._terminal_to_pane.get("9d00610c") == "pane-3" + + @patch("cli_agent_orchestrator.services.herdr_inbox_service.subprocess.run") + @patch("cli_agent_orchestrator.clients.database.delete_terminal") + @patch("cli_agent_orchestrator.clients.database.get_terminal_metadata") + def test_pane_closed_deletes_when_label_gone(self, mock_meta, mock_delete, mock_run): + """Genuine close (label absent from herdr) still deletes the terminal. + + This is the user-initiated-close path: no kill_window ran, so the + pane_closed event is the only signal. The tab label is genuinely gone + from herdr, so the terminal must be cleaned up. + """ + service = HerdrInboxService(socket_path="/tmp/test.sock") + service.register_terminal("9d00610c", "pane-3", is_kiro=False) + mock_meta.return_value = { + "tmux_session": "cao-investigation", + "tmux_window": "sherlock-e8dc", + } + + # herdr tab list does NOT contain the label — genuinely closed. + tab_list_response = json.dumps( + {"result": {"tabs": [{"label": "other-tab", "workspace_id": "ws-1"}]}} + ) + + def subprocess_side_effect(cmd, **_): + m = MagicMock() + m.returncode = 0 + m.stdout = tab_list_response + return m + + mock_run.side_effect = subprocess_side_effect + + service._handle_lifecycle_event("pane.closed", {"pane_id": "pane-3"}) + + mock_delete.assert_called_once_with("9d00610c") + assert "pane-3" not in service._pane_to_terminal + assert "9d00610c" not in service._terminal_to_pane + + @patch("cli_agent_orchestrator.services.herdr_inbox_service.subprocess.run") + @patch("cli_agent_orchestrator.clients.database.delete_terminal") + @patch("cli_agent_orchestrator.clients.database.get_terminal_metadata") + def test_pane_closed_deletes_when_herdr_query_fails( + self, mock_meta, mock_delete, mock_run + ): + """If herdr cannot be queried, fall back to deleting (fail toward cleanup). + + We must never leave a terminal we believe is open when it may be closed, + so an unreachable herdr makes the liveness check fail toward delete. + """ + service = HerdrInboxService(socket_path="/tmp/test.sock") + service.register_terminal("9d00610c", "pane-3", is_kiro=False) + mock_meta.return_value = { + "tmux_session": "cao-investigation", + "tmux_window": "sherlock-e8dc", + } + + def subprocess_side_effect(cmd, **_): + m = MagicMock() + m.returncode = 1 # herdr query failed + m.stdout = "" + m.stderr = "boom" + return m + + mock_run.side_effect = subprocess_side_effect + + service._handle_lifecycle_event("pane.closed", {"pane_id": "pane-3"}) + + mock_delete.assert_called_once_with("9d00610c") + assert "pane-3" not in service._pane_to_terminal + @patch("cli_agent_orchestrator.clients.database.get_terminal_metadata") @patch("cli_agent_orchestrator.clients.database.delete_terminals_by_session") def test_workspace_closed_removes_all_terminals_for_session( From 54e6100de8fcb2d8228a9ceb5493ffae2faa1224 Mon Sep 17 00:00:00 2001 From: Anil Kumar Date: Mon, 8 Jun 2026 23:32:22 -0700 Subject: [PATCH 2/7] fix(output): distinguish buffer overflow from no-response in get_output When no response marker was found after escalating through all scrollback steps, get_output always returned [PARTIAL RESPONSE], conflating two distinct cases: - overflow: the marker was produced but pushed past the scrollback cap - no response: the agent completed without producing any text (tool-only turn, crash, or silent timeout) Use buffer fullness as the discriminator. If the retrieved buffer is at least 90% of the last escalation cap, treat it as overflow and keep the [PARTIAL RESPONSE] label. Otherwise emit [NO RESPONSE], which lets the caller distinguish "retry/fetch more" from "agent failed to respond, investigate why" instead of mislabeling a silent turn as partial. Updates tests to cover both the sparse no-response buffer and the near-full overflow buffer. Assisted-by: Amazon Q Developer --- .../services/terminal_service.py | 39 ++++++++++++++---- test/services/test_terminal_service_full.py | 41 +++++++++++++++++-- 2 files changed, 69 insertions(+), 11 deletions(-) diff --git a/src/cli_agent_orchestrator/services/terminal_service.py b/src/cli_agent_orchestrator/services/terminal_service.py index d8640566..c6c731be 100644 --- a/src/cli_agent_orchestrator/services/terminal_service.py +++ b/src/cli_agent_orchestrator/services/terminal_service.py @@ -672,14 +672,37 @@ def get_output(terminal_id: str, mode: OutputMode = OutputMode.FULL) -> str: except ValueError: pass - # Full scrollback also failed — return partial. - logger.warning( - "get_output: %s response marker not found in full_history, returning partial", - terminal_id, - ) - return ( - f"[PARTIAL RESPONSE - response marker not found in full scrollback]\n{full_output}" - ) + # Full scrollback also failed — distinguish overflow from no response. + # If the buffer is close to full (>=90% of last escalation cap), the + # response marker was likely produced but pushed past the scrollback + # limit (overflow). If the buffer is mostly empty, the agent never + # produced a text response (e.g. only tool calls, crash, or timeout). + actual_lines = len(full_output.splitlines()) + overflow_threshold = int(_ESCALATION_STEPS[-1] * 0.9) + if actual_lines >= overflow_threshold: + logger.warning( + "get_output: %s response marker not found, buffer near-full " + "(%d lines >= %d threshold) — likely overflow", + terminal_id, + actual_lines, + overflow_threshold, + ) + return ( + f"[PARTIAL RESPONSE - response marker not found, buffer overflow likely " + f"({actual_lines} lines retrieved)]\n{full_output}" + ) + else: + logger.warning( + "get_output: %s response marker not found, buffer sparse " + "(%d lines < %d threshold) — agent likely produced no text response", + terminal_id, + actual_lines, + overflow_threshold, + ) + return ( + f"[NO RESPONSE - agent completed without producing a text response " + f"({actual_lines} lines in buffer)]\n{full_output}" + ) except Exception as e: logger.error(f"Failed to get output from terminal {terminal_id}: {e}") diff --git a/test/services/test_terminal_service_full.py b/test/services/test_terminal_service_full.py index 11fffdd7..7e11923e 100644 --- a/test/services/test_terminal_service_full.py +++ b/test/services/test_terminal_service_full.py @@ -856,15 +856,16 @@ def test_get_output_last_escalates_and_finds_marker( @patch("cli_agent_orchestrator.services.terminal_service.status_monitor") @patch("cli_agent_orchestrator.backends.registry._backend") @patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata") - def test_get_output_last_escalates_all_steps_then_partial( + def test_get_output_last_escalates_all_steps_then_no_response( self, mock_get_metadata, mock_tmux, mock_status_monitor, mock_provider_manager ): - """Escalating fetch: marker never found — returns PARTIAL RESPONSE prefix.""" + """Escalating fetch: marker never found, sparse buffer — returns NO RESPONSE prefix.""" mock_get_metadata.return_value = { "tmux_session": "cao-session", "tmux_window": "developer-abcd", } mock_status_monitor.get_buffer.return_value = "buffered output" + # Short output (few lines) — agent never produced text response mock_tmux.get_history.return_value = "raw tail content" mock_provider = MagicMock( spec=[ @@ -877,7 +878,8 @@ def test_get_output_last_escalates_all_steps_then_partial( result = get_output("test1234", OutputMode.LAST) - assert result.startswith("[PARTIAL RESPONSE") + assert result.startswith("[NO RESPONSE") + assert "agent completed without producing a text response" in result assert "raw tail content" in result # 4 escalation steps + 1 full_history attempt = 5 total assert mock_tmux.get_history.call_count == 5 @@ -885,6 +887,39 @@ def test_get_output_last_escalates_all_steps_then_partial( _, last_kwargs = mock_tmux.get_history.call_args assert last_kwargs.get("full_history") is True + @patch("cli_agent_orchestrator.services.terminal_service.provider_manager") + @patch("cli_agent_orchestrator.services.terminal_service.status_monitor") + @patch("cli_agent_orchestrator.backends.registry._backend") + @patch("cli_agent_orchestrator.services.terminal_service.get_terminal_metadata") + def test_get_output_last_escalates_all_steps_then_partial_overflow( + self, mock_get_metadata, mock_tmux, mock_status_monitor, mock_provider_manager + ): + """Escalating fetch: marker never found, buffer near-full — returns PARTIAL RESPONSE (overflow).""" + mock_get_metadata.return_value = { + "tmux_session": "cao-session", + "tmux_window": "developer-abcd", + } + mock_status_monitor.get_buffer.return_value = "buffered output" + # Simulate near-full buffer (>= 90% of 5000 = 4500 lines) + large_output = "\n".join(f"line {i}" for i in range(4800)) + mock_tmux.get_history.return_value = large_output + mock_provider = MagicMock( + spec=[ + "extract_last_message_from_script", + "extraction_retries", + ] + ) # no extraction_tail_lines attribute → escalation path + mock_provider.extract_last_message_from_script.side_effect = ValueError("no marker") + mock_provider_manager.get_provider.return_value = mock_provider + + result = get_output("test1234", OutputMode.LAST) + + assert result.startswith("[PARTIAL RESPONSE") + assert "buffer overflow likely" in result + assert "4800 lines retrieved" in result + # 4 escalation steps + 1 full_history attempt = 5 total + assert mock_tmux.get_history.call_count == 5 + @patch("cli_agent_orchestrator.services.terminal_service.provider_manager") @patch("cli_agent_orchestrator.services.terminal_service.status_monitor") @patch("cli_agent_orchestrator.backends.registry._backend") From fa7b3143fea48c542a85c183d835f41ddcb3999f Mon Sep 17 00:00:00 2001 From: Anil Kumar Date: Fri, 12 Jun 2026 14:55:02 -0700 Subject: [PATCH 3/7] fix(herdr): re-map renumbered panes instead of deleting live terminals HerdrInboxService._reconcile() treated herdr's ephemeral compact pane_ids as stable terminal identity. herdr renumbers pane_ids when a sibling tab in the workspace closes, so a live terminal's stored pane_id falls out of `herdr pane list`. The stale-pane set-diff then deleted the still-running terminal. Every new-worker register_terminal forces a socket reconnect, which runs reconcile over all siblings, so the bug triggered routinely in multi-pane sessions. Reconcile now treats the durable tab label (tmux_window) as identity: - live label -> re-resolve the current pane_id via get_pane_id and re-map both _pane_to_terminal and _terminal_to_pane (no delete) - absent label (or re-resolve failure) -> prune and delete as before Workspace teardown is likewise gated on workspace-label liveness rather than the pane diff, so a workspace whose panes were merely renumbered is never killed out from under working agents. Secondary fix: the workspace.closed handler looked up _workspace_to_session, which is populated only by _reconcile. A workspace that closed before any reconcile cached it produced a silent no-op and leaked the session's terminals as orphan DB rows. Added _resolve_session_from_herdr() to resolve the session from live herdr state when the map misses, treating the event as unresolvable (no destructive action) only after the live query also fails. Removed now-dead _resolve_pane_id() and invalidate_cache() from HerdrBackend (the reconcile path no longer resolves pane_ids by scanning the list) and their tests. Adds 5 regression tests covering pane re-map, delete-on-absent-label, live-workspace protection, uncached workspace.closed resolution, and the unresolvable safe no-op. Assisted by AI --- .../backends/herdr_backend.py | 46 +--- .../services/herdr_inbox_service.py | 149 ++++++++++-- test/backends/test_herdr_backend.py | 59 ----- test/backends/test_herdr_inbox_service.py | 229 +++++++++++++++++- 4 files changed, 347 insertions(+), 136 deletions(-) diff --git a/src/cli_agent_orchestrator/backends/herdr_backend.py b/src/cli_agent_orchestrator/backends/herdr_backend.py index 0b33288d..bea3f339 100644 --- a/src/cli_agent_orchestrator/backends/herdr_backend.py +++ b/src/cli_agent_orchestrator/backends/herdr_backend.py @@ -109,7 +109,7 @@ def _sanitize_herdr_args(args: List[str]) -> List[str]: # Cache TTL for pane_id resolution (seconds). -# Used by _resolve_pane_id() (inbox service path, herdr-native terminal_ids) and +# Used by get_pane_id() (fast-path, reads the cache populated at create time) and # _resolve_workspace_id(). _resolve_pane_id_from_window() never caches pane_ids — # herdr renumbers panes on deletion, so it resolves the pane fresh every call. _PANE_CACHE_TTL = 5.0 @@ -202,42 +202,6 @@ def _parse_herdr_json(self, stdout: str) -> dict: return cast(dict, data["result"]) return cast(dict, data) - def _resolve_pane_id(self, terminal_id: str) -> str: - """Resolve terminal_id to current compact pane_id. - - Uses a cache with 5s TTL to reduce redundant herdr pane list calls. - - Args: - terminal_id: Stable terminal identifier - - Returns: - Current compact pane_id - - Raises: - TerminalNotFoundError: If terminal_id not found in pane list - """ - # Check cache - if terminal_id in self._pane_cache: - pane_id, cached_at = self._pane_cache[terminal_id] - if time.time() - cached_at < _PANE_CACHE_TTL: - return pane_id - - # Resolve via herdr pane list - result = self._run_herdr(["pane", "list"]) - try: - data = self._parse_herdr_json(result.stdout) - panes = data.get("panes", []) if isinstance(data, dict) else data - except json.JSONDecodeError as e: - raise TerminalBackendError(f"Failed to parse herdr pane list output: {e}") from e - - for pane in panes: - if pane.get("terminal_id") == terminal_id: - pane_id = str(pane["pane_id"]) - self._pane_cache[terminal_id] = (pane_id, time.time()) - return pane_id - - raise TerminalNotFoundError(terminal_id) - def _resolve_workspace_id(self, session_name: str) -> str: """Resolve session_name (workspace label) to workspace ID. @@ -887,11 +851,3 @@ def _resolve_pane_id_from_window(self, session_name: str, window_name: str) -> s return str(pane["pane_id"]) raise TerminalNotFoundError(f"{session_name}:{window_name}") - - def invalidate_cache(self) -> None: - """Invalidate all cached pane_id mappings. - - Called after herdr reconnection when pane_ids may have compacted. - """ - self._pane_cache.clear() - self._workspace_cache.clear() diff --git a/src/cli_agent_orchestrator/services/herdr_inbox_service.py b/src/cli_agent_orchestrator/services/herdr_inbox_service.py index c9cc14be..6c290615 100644 --- a/src/cli_agent_orchestrator/services/herdr_inbox_service.py +++ b/src/cli_agent_orchestrator/services/herdr_inbox_service.py @@ -370,54 +370,109 @@ async def _reconcile(self) -> None: except (json.JSONDecodeError, KeyError) as e: logger.warning(f"Reconcile: failed to parse tab list: {e}") - # Find and prune stale panes + # Find stale panes: stored pane_id no longer in herdr's live pane list. + # + # A stale pane_id does NOT mean the terminal is dead. herdr renumbers + # compact pane_ids when a sibling tab in the workspace closes, so a + # still-running terminal's stored pane_id can fall out of the live list + # while its tab is very much alive. Identity must come from the durable + # tab label (tmux_window), never the ephemeral pane_id. stale_pane_ids = set(self._pane_to_terminal.keys()) - live_pane_ids if not stale_pane_ids: logger.debug("Reconcile: all panes live, nothing to prune") return - # Track which sessions lose terminals - affected_sessions: Dict[str, int] = {} # session_name -> remaining count + # Live workspace labels, used to gate workspace teardown below: never + # kill a workspace whose label is still present in herdr. + live_workspace_labels = set(self._workspace_to_session.values()) + + # Sessions that genuinely lost a terminal (deleted, not re-mapped). + affected_sessions: Set[str] = set() + remapped = 0 + deleted = 0 for pane_id in stale_pane_ids: - terminal_id = self._pane_to_terminal.pop(pane_id, None) + terminal_id = self._pane_to_terminal.get(pane_id) if not terminal_id: + self._pane_to_terminal.pop(pane_id, None) continue - # Get session name before deleting + # Session/window identity before any mutation. meta = get_terminal_metadata(terminal_id) - term_session = meta["tmux_session"] if meta else None + term_session: Optional[str] = meta["tmux_session"] if meta else None + term_window: Optional[str] = meta["tmux_window"] if meta else None + + # Re-map renumbered-but-live panes instead of deleting. A live tab + # label means the pane_id was renumbered, not closed: re-resolve the + # current pane_id and update both maps. Only when re-resolution fails + # do we fall through to the delete path. + if term_window and self._label_still_live(term_window): + try: + new_pane_id = get_backend().get_pane_id( + terminal_id, term_session or "", term_window + ) + except Exception as e: + logger.warning( + "Reconcile: tab %s live but pane re-resolve failed for %s (%s); " + "deleting", + term_window, + terminal_id, + e, + ) + else: + self._pane_to_terminal.pop(pane_id, None) + self._pane_to_terminal[new_pane_id] = terminal_id + self._terminal_to_pane[terminal_id] = new_pane_id + logger.info( + "Reconcile: re-mapped %s %s -> %s (pane renumbered, tab still live)", + terminal_id, + pane_id, + new_pane_id, + ) + remapped += 1 + continue - # Remove from all maps + # Tab label genuinely gone (or re-resolve failed): prune maps and + # delete the orphaned DB record. + self._pane_to_terminal.pop(pane_id, None) self._terminal_to_pane.pop(terminal_id, None) self._kiro_terminals.discard(terminal_id) self._working_since.pop(terminal_id, None) - # Delete orphaned DB record try: delete_terminal(terminal_id) + deleted += 1 except Exception as e: logger.warning(f"Reconcile: failed to delete terminal {terminal_id}: {e}") if term_session: - affected_sessions.setdefault(term_session, 0) - - # Count remaining terminals per affected session - for tid, _ in self._terminal_to_pane.items(): - meta = get_terminal_metadata(tid) - if meta and meta["tmux_session"] in affected_sessions: - affected_sessions[meta["tmux_session"]] += 1 - - # Kill workspaces with zero remaining terminals - for session_name, remaining in affected_sessions.items(): - if remaining == 0: - try: - get_backend().kill_session(session_name) - logger.info(f"Reconcile: killed empty workspace {session_name}") - except Exception as e: - logger.warning(f"Reconcile: failed to kill workspace {session_name}: {e}") + affected_sessions.add(term_session) + + # Kill a workspace only when its label is gone from herdr AND no managed + # terminal remains for the session. A live label means the workspace is + # alive and its panes were merely renumbered — killing it would tear down + # working agents. + if affected_sessions: + remaining_by_session: Dict[str, int] = {s: 0 for s in affected_sessions} + for tid in self._terminal_to_pane: + meta = get_terminal_metadata(tid) + if meta and meta["tmux_session"] in remaining_by_session: + remaining_by_session[meta["tmux_session"]] += 1 + + for session_name, remaining in remaining_by_session.items(): + if remaining == 0 and session_name not in live_workspace_labels: + try: + get_backend().kill_session(session_name) + logger.info(f"Reconcile: killed empty workspace {session_name}") + except Exception as e: + logger.warning(f"Reconcile: failed to kill workspace {session_name}: {e}") - logger.info(f"Reconcile: pruned {len(stale_pane_ids)} stale panes") + logger.info( + "Reconcile: %d stale pane(s) — %d re-mapped, %d deleted", + len(stale_pane_ids), + remapped, + deleted, + ) async def _connect(self) -> None: """Connect to the herdr socket.""" @@ -554,6 +609,40 @@ def _label_still_live(self, window_name: str) -> bool: logger.warning("_label_still_live: could not query herdr (%s)", e) return False + def _resolve_session_from_herdr(self, workspace_id: str) -> Optional[str]: + """Resolve a workspace_id to its session name from live herdr state. + + Used by workspace.closed handling when the in-memory _workspace_to_session + map (populated only by _reconcile) does not contain the closed + workspace_id. Queries herdr workspace list, refreshes the whole map from + the result, and returns the label for workspace_id if found. + + Returns None when herdr cannot be queried or the workspace_id is not in + the live list, so the caller can treat the event as unresolvable and take + no destructive action. + """ + try: + result = subprocess.run( + ["herdr", "--session", self._herdr_session, "workspace", "list"], + capture_output=True, + text=True, + timeout=10, + ) + if result.returncode != 0: + logger.warning( + "_resolve_session_from_herdr: herdr workspace list failed (rc=%s): %s", + result.returncode, + result.stderr.strip(), + ) + return None + ws_data = json.loads(result.stdout) + workspaces = ws_data.get("result", {}).get("workspaces", []) + self._workspace_to_session = {ws["workspace_id"]: ws["label"] for ws in workspaces} + return self._workspace_to_session.get(workspace_id) + except (subprocess.SubprocessError, json.JSONDecodeError, KeyError, OSError) as e: + logger.warning("_resolve_session_from_herdr: could not query herdr (%s)", e) + return None + def _handle_lifecycle_event(self, event_type: str, data: dict) -> None: """Handle pane.closed and workspace.closed events.""" from cli_agent_orchestrator.backends.registry import get_backend @@ -629,7 +718,15 @@ def _handle_lifecycle_event(self, event_type: str, data: dict) -> None: workspace_id = data.get("workspace_id", "") session_name = self._workspace_to_session.get(workspace_id) if not session_name: - return + # The in-memory map is populated only by _reconcile(); a workspace + # that closed before any reconcile cached it would otherwise be a + # silent no-op, leaking the session's terminals as orphan rows. + # Resolve the session identity from live herdr state instead of + # trusting the map. Only treat the event as unresolvable after the + # live query also fails to identify a session. + session_name = self._resolve_session_from_herdr(workspace_id) + if not session_name: + return # Delete all DB terminals for this session try: diff --git a/test/backends/test_herdr_backend.py b/test/backends/test_herdr_backend.py index 537e0706..f6d0f31e 100644 --- a/test/backends/test_herdr_backend.py +++ b/test/backends/test_herdr_backend.py @@ -79,55 +79,6 @@ def test_all_methods_implemented(self, backend): assert callable(getattr(backend, method)) -# --- Pane ID Resolution --- - - -class TestPaneIdResolution: - """Test terminal_id → pane_id resolution with cache.""" - - @patch("subprocess.run") - def test_resolves_pane_id_from_list(self, mock_run, backend): - """Should resolve terminal_id to pane_id via herdr pane list.""" - panes = [ - {"terminal_id": "term_abc", "pane_id": "w1-1", "workspace_id": "w1"}, - {"terminal_id": "term_def", "pane_id": "w1-2", "workspace_id": "w1"}, - ] - mock_run.return_value = _completed(_make_pane_list_response(panes)) - - result = backend._resolve_pane_id("term_abc") - assert result == "w1-1" - - @patch("subprocess.run") - def test_cache_hit_avoids_subprocess(self, mock_run, backend): - """Cached pane_id should be returned without calling herdr.""" - backend._pane_cache["term_cached"] = ("w2-3", time.time()) - - result = backend._resolve_pane_id("term_cached") - assert result == "w2-3" - mock_run.assert_not_called() - - @patch("subprocess.run") - def test_cache_expired_calls_subprocess(self, mock_run, backend): - """Expired cache entry should trigger a fresh resolve.""" - backend._pane_cache["term_old"] = ("w2-3", time.time() - 10.0) # expired - - panes = [{"terminal_id": "term_old", "pane_id": "w2-4", "workspace_id": "w2"}] - mock_run.return_value = _completed(_make_pane_list_response(panes)) - - result = backend._resolve_pane_id("term_old") - assert result == "w2-4" - mock_run.assert_called_once() - - @patch("subprocess.run") - def test_raises_terminal_not_found(self, mock_run, backend): - """Should raise TerminalNotFoundError when terminal_id is not in list.""" - panes = [{"terminal_id": "term_other", "pane_id": "w1-1", "workspace_id": "w1"}] - mock_run.return_value = _completed(_make_pane_list_response(panes)) - - with pytest.raises(TerminalNotFoundError, match="term_missing"): - backend._resolve_pane_id("term_missing") - - # --- Command Construction --- @@ -522,16 +473,6 @@ def test_herdr_not_found_raises_backend_error(self, mock_run, backend): with pytest.raises(TerminalBackendError, match="herdr CLI not found"): backend._run_herdr(["workspace", "list"]) - def test_invalidate_cache_clears_all(self, backend): - """invalidate_cache should empty both caches.""" - backend._pane_cache["tid1"] = ("w1-1", time.time()) - backend._workspace_cache["cao-test"] = ("w1", time.time()) - - backend.invalidate_cache() - - assert backend._pane_cache == {} - assert backend._workspace_cache == {} - # --- Multi-pane resolution (S-008) --- diff --git a/test/backends/test_herdr_inbox_service.py b/test/backends/test_herdr_inbox_service.py index 575475f5..c1be80ab 100644 --- a/test/backends/test_herdr_inbox_service.py +++ b/test/backends/test_herdr_inbox_service.py @@ -747,9 +747,7 @@ def test_pane_closed_unknown_pane_is_noop(self, mock_meta, mock_delete): @patch("cli_agent_orchestrator.services.herdr_inbox_service.subprocess.run") @patch("cli_agent_orchestrator.clients.database.delete_terminal") @patch("cli_agent_orchestrator.clients.database.get_terminal_metadata") - def test_pane_closed_skips_delete_when_label_still_live( - self, mock_meta, mock_delete, mock_run - ): + def test_pane_closed_skips_delete_when_label_still_live(self, mock_meta, mock_delete, mock_run): """Replayed close for a reused compact pane_id must NOT delete a live terminal. herdr (0.6.8) replays ALL historical pane_closed events on every fresh @@ -828,9 +826,7 @@ def subprocess_side_effect(cmd, **_): @patch("cli_agent_orchestrator.services.herdr_inbox_service.subprocess.run") @patch("cli_agent_orchestrator.clients.database.delete_terminal") @patch("cli_agent_orchestrator.clients.database.get_terminal_metadata") - def test_pane_closed_deletes_when_herdr_query_fails( - self, mock_meta, mock_delete, mock_run - ): + def test_pane_closed_deletes_when_herdr_query_fails(self, mock_meta, mock_delete, mock_run): """If herdr cannot be queried, fall back to deleting (fail toward cleanup). We must never leave a terminal we believe is open when it may be closed, @@ -1042,6 +1038,227 @@ async def run(): callback.assert_called_once_with("tid-a") +class TestHerdrInboxServiceReconcileLiveTerminal: + """Reconcile MUST NOT delete a terminal whose tab label is still live. + + herdr renumbers compact pane_ids when a sibling tab closes, so a live + terminal's stored pane_id can fall out of `herdr pane list`. The stale-pane + diff then treats it as dead. Identity must come from the durable tab label + (tmux_window), not the ephemeral pane_id: a live label means the pane was + renumbered (re-map it), an absent label means it was genuinely closed + (delete it). + """ + + @patch("cli_agent_orchestrator.backends.registry.get_backend") + @patch("cli_agent_orchestrator.services.herdr_inbox_service.subprocess.run") + @patch("cli_agent_orchestrator.clients.database.delete_terminal") + @patch("cli_agent_orchestrator.clients.database.get_terminal_metadata") + def test_reconcile_remaps_renumbered_but_live_pane( + self, mock_meta, mock_delete, mock_run, mock_get_backend + ): + """Stored pane_id missing from live list but tab label live -> re-map, never delete.""" + service = HerdrInboxService(socket_path="/tmp/test.sock") + service.register_terminal("tid1", "pane-old") + mock_meta.return_value = {"tmux_session": "sess", "tmux_window": "win-1"} + + # Live pane list no longer contains pane-old (renumbered to pane-new). + pane_list_response = json.dumps({"result": {"panes": [{"pane_id": "pane-new"}]}}) + # Empty workspace list bypasses the DB cross-check; isolates stale-pane logic. + ws_list_response = json.dumps({"result": {"workspaces": []}}) + # _label_still_live() sees win-1 as a live tab -> pane was renumbered, not closed. + tab_list_response = json.dumps( + {"result": {"tabs": [{"label": "win-1", "workspace_id": "ws-1"}]}} + ) + + def subprocess_side_effect(cmd, **_): + m = MagicMock() + m.returncode = 0 + if "pane" in cmd and "list" in cmd: + m.stdout = pane_list_response + elif "tab" in cmd and "list" in cmd: + m.stdout = tab_list_response + else: + m.stdout = ws_list_response + return m + + mock_run.side_effect = subprocess_side_effect + + mock_backend = MagicMock() + mock_backend.get_pane_id.return_value = "pane-new" + mock_get_backend.return_value = mock_backend + + _run_async(service._reconcile()) + + # Live terminal survives: NOT deleted. + mock_delete.assert_not_called() + # Re-mapped to the current pane_id. + assert service._pane_to_terminal.get("pane-new") == "tid1" + assert service._terminal_to_pane.get("tid1") == "pane-new" + # Old (renumbered-away) pane_id is gone from the map. + assert "pane-old" not in service._pane_to_terminal + + @patch("cli_agent_orchestrator.backends.registry.get_backend") + @patch("cli_agent_orchestrator.services.herdr_inbox_service.subprocess.run") + @patch("cli_agent_orchestrator.clients.database.delete_terminal") + @patch("cli_agent_orchestrator.clients.database.get_terminal_metadata") + def test_reconcile_deletes_when_tab_label_gone( + self, mock_meta, mock_delete, mock_run, mock_get_backend + ): + """Stored pane_id missing AND tab label absent from herdr -> prune maps + delete.""" + service = HerdrInboxService(socket_path="/tmp/test.sock") + service.register_terminal("tid1", "pane-old") + service._working_since["tid1"] = time.time() + mock_meta.return_value = {"tmux_session": "sess", "tmux_window": "win-gone"} + + pane_list_response = json.dumps({"result": {"panes": [{"pane_id": "pane-other"}]}}) + ws_list_response = json.dumps({"result": {"workspaces": []}}) + # win-gone is NOT among live tab labels -> genuinely closed. + tab_list_response = json.dumps( + {"result": {"tabs": [{"label": "some-other-window", "workspace_id": "ws-1"}]}} + ) + + def subprocess_side_effect(cmd, **_): + m = MagicMock() + m.returncode = 0 + if "pane" in cmd and "list" in cmd: + m.stdout = pane_list_response + elif "tab" in cmd and "list" in cmd: + m.stdout = tab_list_response + else: + m.stdout = ws_list_response + return m + + mock_run.side_effect = subprocess_side_effect + mock_get_backend.return_value = MagicMock() + + _run_async(service._reconcile()) + + # Genuinely-closed terminal is cleaned up. + mock_delete.assert_called_once_with("tid1") + assert "pane-old" not in service._pane_to_terminal + assert "tid1" not in service._terminal_to_pane + assert "tid1" not in service._working_since + + @patch("cli_agent_orchestrator.backends.registry.get_backend") + @patch("cli_agent_orchestrator.services.herdr_inbox_service.subprocess.run") + @patch("cli_agent_orchestrator.clients.database.delete_terminal") + @patch("cli_agent_orchestrator.clients.database.list_terminals_by_session") + @patch("cli_agent_orchestrator.clients.database.get_terminal_metadata") + def test_reconcile_does_not_kill_live_workspace_on_pane_diff( + self, mock_meta, mock_list, mock_delete, mock_run, mock_get_backend + ): + """A live workspace (label present) must NOT be killed merely because its + pane failed the pane_id set diff. The renumbered pane is re-mapped and the + workspace stays alive.""" + service = HerdrInboxService(socket_path="/tmp/test.sock") + service.register_terminal("tid1", "pane-old") + mock_meta.return_value = {"tmux_session": "sess", "tmux_window": "win-1"} + + pane_list_response = json.dumps({"result": {"panes": [{"pane_id": "pane-new"}]}}) + # Workspace "sess" is LIVE. + ws_list_response = json.dumps( + {"result": {"workspaces": [{"workspace_id": "ws-1", "label": "sess"}]}} + ) + # Tab label win-1 is LIVE. + tab_list_response = json.dumps( + {"result": {"tabs": [{"label": "win-1", "workspace_id": "ws-1"}]}} + ) + + def subprocess_side_effect(cmd, **_): + m = MagicMock() + m.returncode = 0 + if "pane" in cmd and "list" in cmd: + m.stdout = pane_list_response + elif "tab" in cmd and "list" in cmd: + m.stdout = tab_list_response + else: + m.stdout = ws_list_response + return m + + mock_run.side_effect = subprocess_side_effect + # DB cross-check: terminal's window matches a live tab -> not a ghost. + mock_list.return_value = [{"id": "tid1", "tmux_window": "win-1"}] + + mock_backend = MagicMock() + mock_backend.get_pane_id.return_value = "pane-new" + mock_get_backend.return_value = mock_backend + + _run_async(service._reconcile()) + + # The live workspace must NOT be killed. + mock_backend.kill_session.assert_not_called() + # And the live terminal must NOT be deleted. + mock_delete.assert_not_called() + # Terminal re-mapped, still owned by the session. + assert service._terminal_to_pane.get("tid1") == "pane-new" + + +class TestHerdrInboxServiceWorkspaceClosedLiveResolution: + """workspace.closed cleanup must not depend on _workspace_to_session having + been populated by a prior _reconcile(). It must resolve session identity from + live herdr state at event time so a real workspace close always cleans up. + """ + + @patch("cli_agent_orchestrator.services.herdr_inbox_service.subprocess.run") + @patch("cli_agent_orchestrator.clients.database.get_terminal_metadata") + @patch("cli_agent_orchestrator.clients.database.delete_terminals_by_session") + def test_workspace_closed_resolves_uncached_workspace_from_live_herdr( + self, mock_delete_by_session, mock_meta, mock_run + ): + """workspace_id NOT in the in-memory map is resolved via herdr workspace + list, then the session's terminals are deleted.""" + service = HerdrInboxService(socket_path="/tmp/test.sock") + # Map is empty — the workspace closed before any reconcile cached it. + assert service._workspace_to_session == {} + service.register_terminal("tid1", "p-1") + mock_meta.side_effect = lambda tid: {"tmux_session": "sess-new"} if tid == "tid1" else None + + # Live herdr resolves ws-new -> sess-new. + ws_list_response = json.dumps( + {"result": {"workspaces": [{"workspace_id": "ws-new", "label": "sess-new"}]}} + ) + + def subprocess_side_effect(cmd, **_): + m = MagicMock() + m.returncode = 0 + m.stdout = ws_list_response + return m + + mock_run.side_effect = subprocess_side_effect + + service._handle_lifecycle_event("workspace.closed", {"workspace_id": "ws-new"}) + + # Resolved live -> session terminals deleted despite empty in-memory map. + mock_delete_by_session.assert_called_once_with("sess-new") + # Map pruned for the closed session's terminal. + assert "p-1" not in service._pane_to_terminal + assert "tid1" not in service._terminal_to_pane + + @patch("cli_agent_orchestrator.services.herdr_inbox_service.subprocess.run") + @patch("cli_agent_orchestrator.clients.database.delete_terminals_by_session") + def test_workspace_closed_unresolvable_is_safe_noop(self, mock_delete_by_session, mock_run): + """workspace_id absent from both the map and live herdr state -> no deletions.""" + service = HerdrInboxService(socket_path="/tmp/test.sock") + + # Live herdr does not know this workspace either. + ws_list_response = json.dumps( + {"result": {"workspaces": [{"workspace_id": "ws-other", "label": "sess-other"}]}} + ) + + def subprocess_side_effect(cmd, **_): + m = MagicMock() + m.returncode = 0 + m.stdout = ws_list_response + return m + + mock_run.side_effect = subprocess_side_effect + + service._handle_lifecycle_event("workspace.closed", {"workspace_id": "ws-ghost"}) + + # Nothing destructive happens. + mock_delete_by_session.assert_not_called() + + class TestHerdrInboxServiceSocketPath: """Test socket path resolution.""" From a6d4021600865d126c987d113735e63b06d64047 Mon Sep 17 00:00:00 2001 From: Anil Kumar Date: Tue, 16 Jun 2026 11:14:23 -0700 Subject: [PATCH 4/7] fix(cli): auto-detect server backend in CLI commands (closes #308) When cao-server --terminal herdr is used without terminal_backend in config.json, CLI processes (cao launch, cao terminal restore, cao info) defaulted to tmux because they resolve get_backend() independently. - /health endpoint now reports terminal_backend field - New sync_backend_from_server() helper queries /health and aligns the CLI's backend singleton before attach/create_window calls - cao info checks CAO_SESSION_NAME env var (set by herdr) before falling back to tmux display-message --- src/cli_agent_orchestrator/api/main.py | 6 ++ .../cli/commands/info.py | 29 ++++---- .../cli/commands/launch.py | 10 ++- .../cli/commands/terminal.py | 2 + src/cli_agent_orchestrator/utils/terminal.py | 26 +++++++ test/api/test_api_endpoints.py | 18 +++++ test/cli/commands/test_info.py | 43 +++++++++++- test/cli/commands/test_launch.py | 63 +++++++++++++++++ test/utils/test_terminal.py | 70 +++++++++++++++++++ 9 files changed, 251 insertions(+), 16 deletions(-) diff --git a/src/cli_agent_orchestrator/api/main.py b/src/cli_agent_orchestrator/api/main.py index 9e0553d7..65465b64 100644 --- a/src/cli_agent_orchestrator/api/main.py +++ b/src/cli_agent_orchestrator/api/main.py @@ -378,12 +378,18 @@ def _build_pty_env() -> Dict[str, str]: async def health_check(): import shutil + from cli_agent_orchestrator.backends.herdr_backend import HerdrBackend + def _probe(binary: str) -> str: return "ok" if shutil.which(binary) else "unavailable" + backend = get_backend() + backend_name = "herdr" if isinstance(backend, HerdrBackend) else "tmux" + return { "status": "ok", "service": "cli-agent-orchestrator", + "terminal_backend": backend_name, "components": { "cao": "ok", "herdr": _probe("herdr"), diff --git a/src/cli_agent_orchestrator/cli/commands/info.py b/src/cli_agent_orchestrator/cli/commands/info.py index 269927f5..211c0d6f 100644 --- a/src/cli_agent_orchestrator/cli/commands/info.py +++ b/src/cli_agent_orchestrator/cli/commands/info.py @@ -1,5 +1,6 @@ """Info command for CLI Agent Orchestrator CLI.""" +import os import subprocess import click @@ -20,18 +21,22 @@ def info(): # Display database path click.echo(f"Database path: {DATABASE_FILE}") - # Try to get current session name from tmux - session_name = None - try: - result = subprocess.run( - ["tmux", "display-message", "-p", "#S"], - capture_output=True, - text=True, - check=True, - ) - session_name = result.stdout.strip() - except (subprocess.CalledProcessError, FileNotFoundError): - pass + # Try to get current session name: + # 1. Check CAO_SESSION_NAME env var (set by herdr backend) + # 2. Fall back to tmux display-message (works for tmux backend) + session_name = os.environ.get("CAO_SESSION_NAME") + + if not session_name: + try: + result = subprocess.run( + ["tmux", "display-message", "-p", "#S"], + capture_output=True, + text=True, + check=True, + ) + session_name = result.stdout.strip() + except (subprocess.CalledProcessError, FileNotFoundError): + pass if session_name and session_name.startswith(SESSION_PREFIX): try: diff --git a/src/cli_agent_orchestrator/cli/commands/launch.py b/src/cli_agent_orchestrator/cli/commands/launch.py index 30f429a5..95d7f062 100644 --- a/src/cli_agent_orchestrator/cli/commands/launch.py +++ b/src/cli_agent_orchestrator/cli/commands/launch.py @@ -16,7 +16,11 @@ SERVER_PORT, ) from cli_agent_orchestrator.models.terminal import TerminalStatus -from cli_agent_orchestrator.utils.terminal import poll_until_done, wait_until_terminal_status +from cli_agent_orchestrator.utils.terminal import ( + poll_until_done, + sync_backend_from_server, + wait_until_terminal_status, +) # Providers that require workspace folder access PROVIDERS_REQUIRING_WORKSPACE_ACCESS = { @@ -302,6 +306,10 @@ def launch( # if it times out we still attach so the user can inspect the # half-initialized session rather than orphan it in tmux. if not headless: + # Align the CLI's backend singleton with the running server. + # Without this, ``cao-server --terminal herdr`` + no config.json + # entry causes the CLI to default to tmux. See issue #308. + sync_backend_from_server() ready = wait_until_terminal_status( terminal["id"], {TerminalStatus.IDLE, TerminalStatus.COMPLETED}, diff --git a/src/cli_agent_orchestrator/cli/commands/terminal.py b/src/cli_agent_orchestrator/cli/commands/terminal.py index e3bb8e3c..b0560bdb 100644 --- a/src/cli_agent_orchestrator/cli/commands/terminal.py +++ b/src/cli_agent_orchestrator/cli/commands/terminal.py @@ -8,6 +8,7 @@ from cli_agent_orchestrator.backends.registry import get_backend from cli_agent_orchestrator.constants import API_BASE_URL, TERMINAL_LOG_DIR +from cli_agent_orchestrator.utils.terminal import sync_backend_from_server @click.group() @@ -62,6 +63,7 @@ def restore(terminal_id: str): window_shell = f"exec {login_shell} -l" try: + sync_backend_from_server() get_backend().create_window( session_name, window_name, diff --git a/src/cli_agent_orchestrator/utils/terminal.py b/src/cli_agent_orchestrator/utils/terminal.py index 5d8d7a87..4c6130ee 100644 --- a/src/cli_agent_orchestrator/utils/terminal.py +++ b/src/cli_agent_orchestrator/utils/terminal.py @@ -188,6 +188,32 @@ async def wait_until_status( return False +def sync_backend_from_server() -> None: + """Query the running cao-server's /health endpoint and align the local backend singleton. + + When ``cao-server --terminal herdr`` is used without setting ``terminal_backend`` + in config.json, CLI processes that call ``get_backend()`` default to tmux. + This function bridges the gap by reading the server's active backend and + calling ``set_backend()`` so subsequent ``get_backend()`` calls return the + correct backend type. + + Failures (server unreachable, unexpected response) are logged and silently + ignored — the CLI falls back to its normal config-based resolution. + """ + from cli_agent_orchestrator.backends.factory import BackendFactory + from cli_agent_orchestrator.backends.registry import set_backend + + try: + resp = requests.get(f"{API_BASE_URL}/health", timeout=2.0) + resp.raise_for_status() + data = resp.json() + backend_name = data.get("terminal_backend") + if backend_name: + set_backend(BackendFactory.create(backend_override=backend_name)) + except Exception as e: + logger.debug(f"sync_backend_from_server: could not reach server: {e}") + + def poll_until_done(terminal_id: str, timeout: float, polling_interval: float = 1.0) -> None: """Poll terminal status until completed/error or timeout. diff --git a/test/api/test_api_endpoints.py b/test/api/test_api_endpoints.py index 6c080378..ba2afad3 100644 --- a/test/api/test_api_endpoints.py +++ b/test/api/test_api_endpoints.py @@ -39,6 +39,24 @@ def test_health_check_returns_ok(self, client): assert components["herdr"] in ("ok", "unavailable") assert components["claude"] in ("ok", "unavailable") + def test_health_reports_terminal_backend_tmux(self, client): + """GET /health reports terminal_backend matching the active backend (tmux).""" + with patch("cli_agent_orchestrator.api.main.get_backend") as mock_backend: + mock_backend.return_value = MagicMock(spec=[]) # not HerdrBackend + response = client.get("/health") + data = response.json() + assert data["terminal_backend"] == "tmux" + + def test_health_reports_terminal_backend_herdr(self, client): + """GET /health reports terminal_backend='herdr' when server uses HerdrBackend.""" + from cli_agent_orchestrator.backends.herdr_backend import HerdrBackend + + mock_herdr = MagicMock(spec=HerdrBackend) + with patch("cli_agent_orchestrator.api.main.get_backend", return_value=mock_herdr): + response = client.get("/health") + data = response.json() + assert data["terminal_backend"] == "herdr" + # ── Agent profiles endpoint ────────────────────────────────────────── diff --git a/test/cli/commands/test_info.py b/test/cli/commands/test_info.py index 066feab1..247d52b4 100644 --- a/test/cli/commands/test_info.py +++ b/test/cli/commands/test_info.py @@ -11,15 +11,52 @@ class TestInfoCommand: """Test cao info command.""" def test_info_not_in_tmux(self): - """Test output when not running inside tmux.""" + """Test output when not running inside tmux and no env var.""" runner = CliRunner() - with patch("subprocess.run", side_effect=FileNotFoundError): - result = runner.invoke(info) + env_patch = {k: v for k, v in __import__("os").environ.items() if k != "CAO_SESSION_NAME"} + with patch.dict("os.environ", env_patch, clear=True): + with patch("subprocess.run", side_effect=FileNotFoundError): + result = runner.invoke(info) assert result.exit_code == 0 assert "Database path:" in result.output assert "Not currently in a CAO session." in result.output + def test_info_via_cao_session_name_env_var(self): + """Test that CAO_SESSION_NAME env var is used for herdr backend detection.""" + runner = CliRunner() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"terminals": [{"id": "t1"}]} + + with patch.dict("os.environ", {"CAO_SESSION_NAME": "cao-herdr-session"}): + with patch("requests.get", return_value=mock_response): + result = runner.invoke(info) + + assert result.exit_code == 0 + assert "Session ID: cao-herdr-session" in result.output + assert "Active terminals: 1" in result.output + + def test_info_env_var_takes_precedence_over_tmux(self): + """Test that CAO_SESSION_NAME env var takes precedence over tmux.""" + runner = CliRunner() + mock_subprocess = MagicMock() + mock_subprocess.stdout = "cao-tmux-session\n" + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"terminals": []} + + with patch.dict("os.environ", {"CAO_SESSION_NAME": "cao-herdr-session"}): + with patch("subprocess.run", return_value=mock_subprocess) as mock_run: + with patch("requests.get", return_value=mock_response): + result = runner.invoke(info) + + assert result.exit_code == 0 + assert "Session ID: cao-herdr-session" in result.output + # tmux should NOT have been called since env var was present + mock_run.assert_not_called() + def test_info_in_tmux_non_cao_session(self): """Test output when in tmux but not a CAO session.""" runner = CliRunner() diff --git a/test/cli/commands/test_launch.py b/test/cli/commands/test_launch.py index 02c80529..53faec91 100644 --- a/test/cli/commands/test_launch.py +++ b/test/cli/commands/test_launch.py @@ -8,6 +8,69 @@ from cli_agent_orchestrator.cli.commands.launch import _parse_env_pairs, launch +# ── Backend auto-detection (issue #308) ────────────────────────────── + + +def test_launch_syncs_backend_from_server_before_attach(): + """Non-headless launch calls sync_backend_from_server() before get_backend(). + + Regression guard for #308: when ``cao-server --terminal herdr`` is used + without config.json, the CLI must auto-detect the server's backend via + /health rather than defaulting to tmux. + """ + runner = CliRunner() + call_order = [] + + with ( + patch("cli_agent_orchestrator.cli.commands.launch.requests.post") as mock_post, + patch("cli_agent_orchestrator.cli.commands.launch.get_backend") as mock_get_backend, + patch("cli_agent_orchestrator.cli.commands.launch.wait_until_terminal_status") as mock_wait, + patch("cli_agent_orchestrator.cli.commands.launch.sync_backend_from_server") as mock_sync, + ): + mock_post.return_value.json.return_value = { + "session_name": "test-session", + "id": "test-terminal-id", + "name": "test-terminal", + } + mock_post.return_value.raise_for_status.return_value = None + mock_wait.return_value = True + + def record_sync(): + call_order.append("sync") + + def record_attach(*a, **kw): + call_order.append("attach") + + mock_sync.side_effect = record_sync + mock_get_backend.return_value.attach_session.side_effect = record_attach + + result = runner.invoke(launch, ["--agents", "test-agent", "--yolo"]) + + assert result.exit_code == 0 + mock_sync.assert_called_once() + assert call_order == ["sync", "attach"] + + +def test_launch_headless_does_not_sync_backend(): + """Headless launch skips sync_backend_from_server (no attach needed).""" + runner = CliRunner() + + with ( + patch("cli_agent_orchestrator.cli.commands.launch.requests.post") as mock_post, + patch("cli_agent_orchestrator.cli.commands.launch.sync_backend_from_server") as mock_sync, + ): + mock_post.return_value.json.return_value = { + "session_name": "test-session", + "id": "test-terminal-id", + "name": "test-terminal", + } + mock_post.return_value.raise_for_status.return_value = None + + result = runner.invoke(launch, ["--agents", "test-agent", "--headless", "--yolo"]) + + assert result.exit_code == 0 + mock_sync.assert_not_called() + def test_launch_passes_cwd_by_default(): """Test that launch command sends current working directory when not explicitly provided.""" diff --git a/test/utils/test_terminal.py b/test/utils/test_terminal.py index 9d92e5ab..a6b85557 100644 --- a/test/utils/test_terminal.py +++ b/test/utils/test_terminal.py @@ -10,6 +10,7 @@ generate_session_name, generate_terminal_id, generate_window_name, + sync_backend_from_server, validate_tmux_name, wait_for_shell, wait_until_status, @@ -389,3 +390,72 @@ def test_wait_until_terminal_status_multi_status_no_match(self, mock_get): ) assert result is False + + +# ── sync_backend_from_server (issue #308) ──────────────────────────── + + +class TestSyncBackendFromServer: + """Tests for sync_backend_from_server() helper.""" + + def test_syncs_herdr_backend_from_health(self): + """When /health reports terminal_backend='herdr', set_backend is called with herdr.""" + mock_resp = MagicMock() + mock_resp.json.return_value = {"terminal_backend": "herdr"} + mock_resp.raise_for_status.return_value = None + + with ( + patch("cli_agent_orchestrator.utils.terminal.requests.get", return_value=mock_resp), + patch("cli_agent_orchestrator.backends.factory.BackendFactory.create") as mock_create, + patch("cli_agent_orchestrator.backends.registry.set_backend") as mock_set, + ): + mock_backend = MagicMock() + mock_create.return_value = mock_backend + + sync_backend_from_server() + + mock_create.assert_called_once_with(backend_override="herdr") + mock_set.assert_called_once_with(mock_backend) + + def test_syncs_tmux_backend_from_health(self): + """When /health reports terminal_backend='tmux', set_backend is called with tmux.""" + mock_resp = MagicMock() + mock_resp.json.return_value = {"terminal_backend": "tmux"} + mock_resp.raise_for_status.return_value = None + + with ( + patch("cli_agent_orchestrator.utils.terminal.requests.get", return_value=mock_resp), + patch("cli_agent_orchestrator.backends.factory.BackendFactory.create") as mock_create, + patch("cli_agent_orchestrator.backends.registry.set_backend") as mock_set, + ): + mock_backend = MagicMock() + mock_create.return_value = mock_backend + + sync_backend_from_server() + + mock_create.assert_called_once_with(backend_override="tmux") + mock_set.assert_called_once_with(mock_backend) + + def test_silently_handles_connection_error(self): + """When server is unreachable, no exception is raised.""" + import requests as _requests + + with patch( + "cli_agent_orchestrator.utils.terminal.requests.get", + side_effect=_requests.exceptions.ConnectionError("refused"), + ): + # Must not raise + sync_backend_from_server() + + def test_silently_handles_missing_field(self): + """When /health response lacks terminal_backend, no set_backend call.""" + mock_resp = MagicMock() + mock_resp.json.return_value = {"status": "ok"} + mock_resp.raise_for_status.return_value = None + + with ( + patch("cli_agent_orchestrator.utils.terminal.requests.get", return_value=mock_resp), + patch("cli_agent_orchestrator.backends.registry.set_backend") as mock_set, + ): + sync_backend_from_server() + mock_set.assert_not_called() From 1361e17989d8beefdfca68ba1b6018ee89381b2a Mon Sep 17 00:00:00 2001 From: Anil Kumar Date: Wed, 17 Jun 2026 18:09:04 -0700 Subject: [PATCH 5/7] refactor: use count('\n') instead of splitlines() for line counting Avoids allocating a full line list on large scrollback buffers. Addresses Copilot review comment on PR #309. --- src/cli_agent_orchestrator/services/terminal_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cli_agent_orchestrator/services/terminal_service.py b/src/cli_agent_orchestrator/services/terminal_service.py index c6c731be..5bf57d9c 100644 --- a/src/cli_agent_orchestrator/services/terminal_service.py +++ b/src/cli_agent_orchestrator/services/terminal_service.py @@ -677,7 +677,7 @@ def get_output(terminal_id: str, mode: OutputMode = OutputMode.FULL) -> str: # response marker was likely produced but pushed past the scrollback # limit (overflow). If the buffer is mostly empty, the agent never # produced a text response (e.g. only tool calls, crash, or timeout). - actual_lines = len(full_output.splitlines()) + actual_lines = full_output.count("\n") + 1 overflow_threshold = int(_ESCALATION_STEPS[-1] * 0.9) if actual_lines >= overflow_threshold: logger.warning( From 4b6dcd1203b4ad69d643e4fde17fb46c4f55a07a Mon Sep 17 00:00:00 2001 From: Anil Kumar Date: Thu, 18 Jun 2026 20:46:52 -0700 Subject: [PATCH 6/7] fix: invalidate pane cache before re-resolve in reconcile remap path Prevents get_pane_id() from returning the same stale pane_id that reconcile just proved is no longer live, which could leave event delivery pointed at the wrong pane until the cache TTL expires. Addresses review comment by haofeif on PR #309. --- .../services/herdr_inbox_service.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/cli_agent_orchestrator/services/herdr_inbox_service.py b/src/cli_agent_orchestrator/services/herdr_inbox_service.py index 6c290615..dc54db7f 100644 --- a/src/cli_agent_orchestrator/services/herdr_inbox_service.py +++ b/src/cli_agent_orchestrator/services/herdr_inbox_service.py @@ -408,7 +408,13 @@ async def _reconcile(self) -> None: # do we fall through to the delete path. if term_window and self._label_still_live(term_window): try: - new_pane_id = get_backend().get_pane_id( + # Invalidate pane cache so get_pane_id does a fresh label-based + # lookup instead of returning the stale pane_id we just proved + # is no longer live. See PR #309 review comment. + backend = get_backend() + if hasattr(backend, "_pane_cache"): + backend._pane_cache.pop(terminal_id, None) + new_pane_id = backend.get_pane_id( terminal_id, term_session or "", term_window ) except Exception as e: From b8feb1d8b579a1fc12715da2112e994951a3f18c Mon Sep 17 00:00:00 2001 From: Anil Kumar Date: Thu, 18 Jun 2026 20:51:52 -0700 Subject: [PATCH 7/7] style: black formatting for herdr_inbox_service.py --- src/cli_agent_orchestrator/services/herdr_inbox_service.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/cli_agent_orchestrator/services/herdr_inbox_service.py b/src/cli_agent_orchestrator/services/herdr_inbox_service.py index dc54db7f..a0703991 100644 --- a/src/cli_agent_orchestrator/services/herdr_inbox_service.py +++ b/src/cli_agent_orchestrator/services/herdr_inbox_service.py @@ -414,9 +414,7 @@ async def _reconcile(self) -> None: backend = get_backend() if hasattr(backend, "_pane_cache"): backend._pane_cache.pop(terminal_id, None) - new_pane_id = backend.get_pane_id( - terminal_id, term_session or "", term_window - ) + new_pane_id = backend.get_pane_id(terminal_id, term_session or "", term_window) except Exception as e: logger.warning( "Reconcile: tab %s live but pane re-resolve failed for %s (%s); "