diff --git a/AGENTS.md b/AGENTS.md index d8d4c7112..5786712ed 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -209,6 +209,97 @@ To view the generated SVG snapshots in a browser: - Do not embed API keys or endpoints in code; rely on runtime configuration/env vars when integrating new services. - When packaging, verify no sensitive files are included in `dist/`; adjust `openhands-cli.spec` if new assets are added. +## Headless/JSON Mode for E2E Testing + +The CLI supports a headless JSON output mode useful for automated E2E testing without the TUI. This is particularly helpful for testing event flows (e.g., hook rejections) programmatically. + +### Running Headless Mode + +```bash +# Basic headless run with JSON event output +LLM_API_KEY=$LLM_API_KEY LLM_BASE_URL=https://llm-proxy.eval.all-hands.dev \ + uv run openhands --headless --json --override-with-envs \ + -t "Your task prompt here" > output.log 2>stderr.log & +``` + +- `--headless`: Runs without the TUI (no interactive UI) +- `--json`: Outputs each event as a JSON object separated by `--JSON Event--` markers +- `--override-with-envs`: Uses `LLM_API_KEY`/`LLM_BASE_URL` env vars instead of stored settings +- `-t "..."`: The task/prompt to send to the agent +- Run in background (`&`) so you can monitor logs with `grep`/`tail` + +### Analyzing JSON Output + +```bash +# Count events +grep -c "JSON Event" output.log + +# Search for specific event types +grep '"kind": "HookExecutionEvent"' output.log +grep '"blocked": true' output.log + +# View a specific event with context +grep -B2 -A20 '"kind": "HookExecutionEvent"' output.log +``` + +### Using tmux for Interactive TUI Testing + +When you need to observe the actual TUI (not headless), use tmux: + +```bash +# Start a tmux session +tmux new-session -d -s test-cli -x 120 -y 40 + +# Send the CLI command to tmux +tmux send-keys -t test-cli 'cd /path/to/workspace && uv run openhands' Enter + +# Wait for startup, then send a task +tmux send-keys -t test-cli 'your task prompt' Enter + +# Capture the screen to check output +tmux capture-pane -t test-cli -p + +# Detach: Ctrl+b d (or programmatically) +tmux detach -s test-cli + +# Kill when done +tmux kill-session -t test-cli +``` + +### Testing Against the SDK Repo (Hook Testing) + +The `OpenHands/software-agent-sdk` repo has pre-commit hooks configured, making it a good workspace for testing hook behavior: + +```bash +# Clone the SDK repo +git clone https://github.com/OpenHands/software-agent-sdk.git /path/to/sdk + +# Run CLI in that workspace to trigger hooks +cd /path/to/sdk +LLM_API_KEY=$LLM_API_KEY LLM_BASE_URL=https://llm-proxy.eval.all-hands.dev \ + /path/to/cli/.venv/bin/openhands --headless --json --override-with-envs \ + -t "break the pre-commit and return finish" > /tmp/test.log 2>&1 & +``` + +### LLM Configuration for E2E Tests + +Agent settings are stored in `~/.openhands/agent_settings.json`. For E2E testing: +- Use `--override-with-envs` flag with `LLM_API_KEY` and `LLM_BASE_URL` env vars +- Or modify `~/.openhands/agent_settings.json` directly (remember to restore after) + +## SDK Event Flow: Hooks + +Understanding how the SDK emits events for different hook types is critical for CLI event handling: + +- **Stop hooks** (on finish): Emit `HookExecutionEvent` (with `blocked=true/false`) + `MessageEvent` with feedback. They do **not** produce `UserRejectObservation`. +- **PreToolUse hooks** (before tool calls): Emit `HookExecutionEvent` + `UserRejectObservation(rejection_source="hook")` when blocked. +- **Successful hooks** (not blocked): Emit `HookExecutionEvent` with `blocked=false`, `success=true`. These are typically hidden in the UI. + +Key SDK source locations: +- `openhands-sdk/openhands/sdk/agent/agent.py`: `_ActionBatch` class creates `UserRejectObservation` with `rejection_source="hook"` for PreToolUse blocks (around L168-173) +- `openhands-sdk/openhands/sdk/hook/hook_manager.py`: Hook execution and `HookExecutionEvent` emission +- `openhands-sdk/openhands/sdk/event/event.py`: `HookExecutionEvent` class definition (fields: `hook_event_type`, `blocked`, `success`, `exit_code`, `reason`, `stdout`, `stderr`) + ## TUI State Management Architecture The TUI uses a reactive state management pattern with clear separation of concerns. Key files are in `openhands_cli/tui/core/`. diff --git a/openhands_cli/acp_impl/events/event.py b/openhands_cli/acp_impl/events/event.py index aafe2d690..81d72f285 100644 --- a/openhands_cli/acp_impl/events/event.py +++ b/openhands_cli/acp_impl/events/event.py @@ -14,6 +14,7 @@ CondensationRequest, ConversationStateUpdateEvent, Event, + HookExecutionEvent, MessageEvent, ObservationEvent, PauseEvent, @@ -87,6 +88,8 @@ async def __call__(self, event: Event) -> None: await self.shared_events_handler.handle_system_prompt(self, event) elif isinstance(event, PauseEvent): await self.shared_events_handler.handle_pause(self, event) + elif isinstance(event, HookExecutionEvent): + await self.shared_events_handler.handle_hook_execution(self, event) elif isinstance(event, Condensation): await self.shared_events_handler.handle_condensation(self, event) elif isinstance(event, CondensationRequest): diff --git a/openhands_cli/acp_impl/events/shared_event_handler.py b/openhands_cli/acp_impl/events/shared_event_handler.py index 0fc9f8296..ad0726997 100644 --- a/openhands_cli/acp_impl/events/shared_event_handler.py +++ b/openhands_cli/acp_impl/events/shared_event_handler.py @@ -23,6 +23,7 @@ Condensation, CondensationRequest, Event, + HookExecutionEvent, ObservationEvent, PauseEvent, SystemPromptEvent, @@ -48,12 +49,24 @@ # Formatting constants for consistent headers across streaming and non-streaming modes REASONING_HEADER = "**Reasoning**:\n" THOUGHT_HEADER = "\n**Thought**:\n" +HOOK_BLOCKED_HEADER = "**Hook Blocked Action**:\n" def _event_visualize_to_plain(event: Event) -> str: return str(event.visualize.plain) +def _is_hook_rejection( + event: UserRejectObservation | AgentErrorEvent | HookExecutionEvent, +) -> bool: + """Check if a rejection event originated from a hook.""" + if isinstance(event, HookExecutionEvent): + return event.blocked + if isinstance(event, UserRejectObservation): + return event.rejection_source == "hook" + return False + + class _ACPContext(Protocol): session_id: str conn: Client @@ -106,6 +119,14 @@ async def handle_system_prompt( ) -> None: await self.send_thought(ctx, str(event.visualize.plain)) + async def handle_hook_execution( + self, ctx: _ACPContext, event: HookExecutionEvent + ) -> None: + text = _event_visualize_to_plain(event) + if event.blocked: + text = f"{HOOK_BLOCKED_HEADER}{text}" + await self.send_thought(ctx, text) + async def handle_condensation(self, ctx: _ACPContext, event: Condensation) -> None: await self.send_thought(ctx, _event_visualize_to_plain(event)) @@ -117,11 +138,15 @@ async def handle_condensation_request( async def handle_user_reject_or_agent_error( self, ctx: _ACPContext, event: UserRejectObservation | AgentErrorEvent ) -> None: + text = _event_visualize_to_plain(event) + # Prepend hook blocked header for hook rejections + if _is_hook_rejection(event): + text = f"{HOOK_BLOCKED_HEADER}{text}" await self.send_tool_progress( ctx, tool_call_id=event.tool_call_id, status="failed", - text=_event_visualize_to_plain(event), + text=text, raw_output=event.model_dump(), ) diff --git a/openhands_cli/acp_impl/events/token_streamer.py b/openhands_cli/acp_impl/events/token_streamer.py index 0063d0574..bd4acbf0c 100644 --- a/openhands_cli/acp_impl/events/token_streamer.py +++ b/openhands_cli/acp_impl/events/token_streamer.py @@ -34,6 +34,7 @@ Condensation, CondensationRequest, ConversationStateUpdateEvent, + HookExecutionEvent, ObservationEvent, PauseEvent, SystemPromptEvent, @@ -172,6 +173,8 @@ async def unstreamed_event_handler(self, event: Event) -> None: await self.shared_events_handler.handle_system_prompt(self, event) elif isinstance(event, PauseEvent): await self.shared_events_handler.handle_pause(self, event) + elif isinstance(event, HookExecutionEvent): + await self.shared_events_handler.handle_hook_execution(self, event) elif isinstance(event, Condensation): await self.shared_events_handler.handle_condensation(self, event) elif isinstance(event, CondensationRequest): diff --git a/openhands_cli/tui/widgets/richlog_visualizer.py b/openhands_cli/tui/widgets/richlog_visualizer.py index 594d975df..106858971 100644 --- a/openhands_cli/tui/widgets/richlog_visualizer.py +++ b/openhands_cli/tui/widgets/richlog_visualizer.py @@ -14,6 +14,7 @@ from openhands.sdk.event import ( ActionEvent, AgentErrorEvent, + HookExecutionEvent, MessageEvent, ObservationEvent, PauseEvent, @@ -52,6 +53,26 @@ DEFAULT_AGENT_NAME = "OpenHands Agent" +def _is_hook_rejection( + event: UserRejectObservation | AgentErrorEvent | HookExecutionEvent, +) -> bool: + """Check if a rejection event originated from a hook.""" + if isinstance(event, HookExecutionEvent): + return event.blocked + if isinstance(event, UserRejectObservation): + return event.rejection_source == "hook" + return False + + +def _get_rejection_title( + event: UserRejectObservation | AgentErrorEvent | HookExecutionEvent, +) -> str: + """Get the appropriate title for a rejection event.""" + if _is_hook_rejection(event): + return "Hook Blocked Action" + return "User Rejected Action" + + if TYPE_CHECKING: from textual.containers import VerticalScroll from textual.widget import Widget @@ -75,6 +96,10 @@ def _get_event_symbol_color(event: Event) -> str: return OPENHANDS_THEME.primary else: return OPENHANDS_THEME.accent or DEFAULT_COLOR + elif isinstance(event, HookExecutionEvent): + if event.blocked: + return OPENHANDS_THEME.error or DEFAULT_COLOR + return DEFAULT_COLOR elif isinstance(event, AgentErrorEvent): return OPENHANDS_THEME.error or DEFAULT_COLOR elif isinstance(event, ConversationErrorEvent): @@ -814,9 +839,17 @@ def _create_event_collapsible(self, event: Event) -> Collapsible | None: self._pending_actions[event.tool_call_id] = (event, collapsible) return collapsible + # UserRejectObservation needs dynamic title based on rejection_source + if isinstance(event, UserRejectObservation): + return self._create_titled_collapsible(event, _get_rejection_title(event)) + + # HookExecutionEvent: dynamic title based on blocked status + if isinstance(event, HookExecutionEvent): + title = _get_rejection_title(event) if event.blocked else "Hook Executed" + return self._create_titled_collapsible(event, title) + fallback_titles: list[tuple[type[Event], str]] = [ (ObservationEvent, "Observation"), - (UserRejectObservation, "User Rejected Action"), (AgentErrorEvent, "Agent Error"), (ConversationErrorEvent, "Conversation Error"), (PauseEvent, "User Paused"), diff --git a/tests/acp/events/test_shared_event_handler.py b/tests/acp/events/test_shared_event_handler.py new file mode 100644 index 000000000..127af94b0 --- /dev/null +++ b/tests/acp/events/test_shared_event_handler.py @@ -0,0 +1,84 @@ +"""Tests for ACP shared event handler hook rejection detection.""" + +from openhands.sdk.event import ( + AgentErrorEvent, + HookExecutionEvent, + UserRejectObservation, +) +from openhands_cli.acp_impl.events.shared_event_handler import ( + HOOK_BLOCKED_HEADER, + _is_hook_rejection, +) + + +class TestACPHookRejectionDetection: + """Tests for hook rejection detection in ACP shared event handler.""" + + def test_is_hook_rejection_with_hook_source(self): + """Test _is_hook_rejection returns True for hook rejections.""" + event = UserRejectObservation( + action_id="test_action_id", + tool_name="terminal", + tool_call_id="call_1", + rejection_reason="Blocked by security hook", + rejection_source="hook", + ) + assert _is_hook_rejection(event) is True + + def test_is_hook_rejection_with_user_source(self): + """Test _is_hook_rejection returns False for user rejections.""" + event = UserRejectObservation( + action_id="test_action_id", + tool_name="terminal", + tool_call_id="call_1", + rejection_reason="User rejected the action", + rejection_source="user", + ) + assert _is_hook_rejection(event) is False + + def test_is_hook_rejection_with_default_source(self): + """Test _is_hook_rejection returns False with default source.""" + event = UserRejectObservation( + action_id="test_action_id", + tool_name="terminal", + tool_call_id="call_1", + rejection_reason="User rejected the action", + ) + assert _is_hook_rejection(event) is False + + def test_is_hook_rejection_with_agent_error_event(self): + """Test _is_hook_rejection returns False for AgentErrorEvent.""" + event = AgentErrorEvent( + error="Something went wrong", + tool_name="terminal", + tool_call_id="call_1", + ) + assert _is_hook_rejection(event) is False + + def test_is_hook_rejection_with_blocked_hook_execution_event(self): + """Test _is_hook_rejection returns True for blocked HookExecutionEvent.""" + event = HookExecutionEvent( + hook_event_type="Stop", + hook_command=".openhands/hooks/on_stop.sh", + success=False, + blocked=True, + exit_code=2, + reason="Checks failed", + ) + assert _is_hook_rejection(event) is True + + def test_is_hook_rejection_with_successful_hook_execution_event(self): + """Test _is_hook_rejection returns False for successful HookExecutionEvent.""" + event = HookExecutionEvent( + hook_event_type="Stop", + hook_command=".openhands/hooks/on_stop.sh", + success=True, + blocked=False, + exit_code=0, + ) + assert _is_hook_rejection(event) is False + + def test_hook_blocked_header_format(self): + """Test HOOK_BLOCKED_HEADER has expected format.""" + assert "Hook" in HOOK_BLOCKED_HEADER + assert HOOK_BLOCKED_HEADER.endswith("\n") diff --git a/tests/snapshots/__snapshots__/test_visualizer_snapshots/TestRejectionEventSnapshots.test_hook_rejection_display.svg b/tests/snapshots/__snapshots__/test_visualizer_snapshots/TestRejectionEventSnapshots.test_hook_rejection_display.svg new file mode 100644 index 000000000..a5285f04f --- /dev/null +++ b/tests/snapshots/__snapshots__/test_visualizer_snapshots/TestRejectionEventSnapshots.test_hook_rejection_display.svg @@ -0,0 +1,104 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + VisualizerTestApp + + + + + + + + + + + +> hi how are you + + Hook Blocked Action: Tool: terminal  Rejection Reason: Blocked by security +hook: dangero... + + + + + + + + + diff --git a/tests/snapshots/__snapshots__/test_visualizer_snapshots/TestRejectionEventSnapshots.test_user_and_hook_rejections_comparison.svg b/tests/snapshots/__snapshots__/test_visualizer_snapshots/TestRejectionEventSnapshots.test_user_and_hook_rejections_comparison.svg new file mode 100644 index 000000000..6771bc375 --- /dev/null +++ b/tests/snapshots/__snapshots__/test_visualizer_snapshots/TestRejectionEventSnapshots.test_user_and_hook_rejections_comparison.svg @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + VisualizerTestApp + + + + + + + + + + + +> hi how are you + + User Rejected Action: Tool: terminal  Rejection Reason: User chose not to +proceed + + Hook Blocked Action: Tool: terminal  Rejection Reason: Hook blocked: rm +-rf detected + + + + + + + + + + diff --git a/tests/snapshots/__snapshots__/test_visualizer_snapshots/TestRejectionEventSnapshots.test_user_rejection_display.svg b/tests/snapshots/__snapshots__/test_visualizer_snapshots/TestRejectionEventSnapshots.test_user_rejection_display.svg new file mode 100644 index 000000000..373f86a1d --- /dev/null +++ b/tests/snapshots/__snapshots__/test_visualizer_snapshots/TestRejectionEventSnapshots.test_user_rejection_display.svg @@ -0,0 +1,104 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + VisualizerTestApp + + + + + + + + + + + +> hi how are you + + User Rejected Action: Tool: terminal  Rejection Reason: I don't want to +run this command + + + + + + + + + diff --git a/tests/snapshots/test_visualizer_snapshots.py b/tests/snapshots/test_visualizer_snapshots.py index 6f469267a..319a08010 100644 --- a/tests/snapshots/test_visualizer_snapshots.py +++ b/tests/snapshots/test_visualizer_snapshots.py @@ -4,14 +4,14 @@ proper padding alignment with user messages. """ -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any, Literal, cast from unittest.mock import MagicMock from textual.app import App, ComposeResult from textual.containers import VerticalScroll from textual.widgets import Static -from openhands.sdk.event import ActionEvent +from openhands.sdk.event import ActionEvent, UserRejectObservation from openhands.sdk.llm import MessageToolCall from openhands.sdk.tool.builtins.finish import FinishAction from openhands.sdk.tool.builtins.think import ThinkAction @@ -121,6 +121,20 @@ def _create_think_action_event(thought: str) -> ActionEvent: ) +def _create_user_reject_observation( + rejection_reason: str, + rejection_source: Literal["user", "hook"] = "user", +) -> UserRejectObservation: + """Create a UserRejectObservation event for testing.""" + return UserRejectObservation( + action_id="test_action_id", + tool_name="terminal", + tool_call_id="test_tool_call_id", + rejection_reason=rejection_reason, + rejection_source=rejection_source, + ) + + class TestVisualizerSnapshots: """Snapshot tests for the ConversationVisualizer.""" @@ -141,3 +155,41 @@ def test_multiple_actions_alignment(self, snap_compare): _create_finish_action_event("Done! Here's the result."), ] assert snap_compare(VisualizerTestApp(events), terminal_size=(80, 16)) + + +class TestRejectionEventSnapshots: + """Snapshot tests for rejection events (user and hook rejections).""" + + def test_user_rejection_display(self, snap_compare): + """Verify user rejection shows 'User Rejected Action' title.""" + events = [ + _create_user_reject_observation( + rejection_reason="I don't want to run this command", + rejection_source="user", + ) + ] + assert snap_compare(VisualizerTestApp(events), terminal_size=(80, 12)) + + def test_hook_rejection_display(self, snap_compare): + """Verify hook rejection shows 'Hook Blocked Action' title.""" + events = [ + _create_user_reject_observation( + rejection_reason="Blocked by security hook: dangerous command detected", + rejection_source="hook", + ) + ] + assert snap_compare(VisualizerTestApp(events), terminal_size=(80, 12)) + + def test_user_and_hook_rejections_comparison(self, snap_compare): + """Verify visual difference between user and hook rejections.""" + events = [ + _create_user_reject_observation( + rejection_reason="User chose not to proceed", + rejection_source="user", + ), + _create_user_reject_observation( + rejection_reason="Hook blocked: rm -rf detected", + rejection_source="hook", + ), + ] + assert snap_compare(VisualizerTestApp(events), terminal_size=(80, 16)) diff --git a/tests/tui/widgets/test_richlog_visualizer.py b/tests/tui/widgets/test_richlog_visualizer.py index 9ca787048..02187c2f2 100644 --- a/tests/tui/widgets/test_richlog_visualizer.py +++ b/tests/tui/widgets/test_richlog_visualizer.py @@ -1247,6 +1247,135 @@ def capture_add(func, *args): assert "user-message" in widget.classes +class TestHookRejectionDetection: + """Tests for hook rejection detection using rejection_source field.""" + + def test_is_hook_rejection_with_hook_source(self): + """Test _is_hook_rejection returns True for hook rejections.""" + from openhands.sdk.event import UserRejectObservation + from openhands_cli.tui.widgets.richlog_visualizer import _is_hook_rejection + + event = UserRejectObservation( + action_id="test_action_id", + tool_name="terminal", + tool_call_id="call_1", + rejection_reason="Blocked by security hook", + rejection_source="hook", + ) + assert _is_hook_rejection(event) is True + + def test_is_hook_rejection_with_user_source(self): + """Test _is_hook_rejection returns False for user rejections.""" + from openhands.sdk.event import UserRejectObservation + from openhands_cli.tui.widgets.richlog_visualizer import _is_hook_rejection + + event = UserRejectObservation( + action_id="test_action_id", + tool_name="terminal", + tool_call_id="call_1", + rejection_reason="User rejected the action", + rejection_source="user", + ) + assert _is_hook_rejection(event) is False + + def test_is_hook_rejection_with_default_source(self): + """Test _is_hook_rejection returns False with default source.""" + from openhands.sdk.event import UserRejectObservation + from openhands_cli.tui.widgets.richlog_visualizer import _is_hook_rejection + + event = UserRejectObservation( + action_id="test_action_id", + tool_name="terminal", + tool_call_id="call_1", + rejection_reason="User rejected the action", + ) + assert _is_hook_rejection(event) is False + + def test_is_hook_rejection_with_agent_error_event(self): + """Test _is_hook_rejection returns False for AgentErrorEvent.""" + from openhands.sdk.event import AgentErrorEvent + from openhands_cli.tui.widgets.richlog_visualizer import _is_hook_rejection + + event = AgentErrorEvent( + error="Something went wrong", + tool_name="terminal", + tool_call_id="call_1", + ) + assert _is_hook_rejection(event) is False + + def test_get_rejection_title_for_hook(self): + """Test _get_rejection_title returns correct title for hook rejection.""" + from openhands.sdk.event import UserRejectObservation + from openhands_cli.tui.widgets.richlog_visualizer import _get_rejection_title + + event = UserRejectObservation( + action_id="test_action_id", + tool_name="terminal", + tool_call_id="call_1", + rejection_reason="Blocked by hook", + rejection_source="hook", + ) + assert _get_rejection_title(event) == "Hook Blocked Action" + + def test_get_rejection_title_for_user(self): + """Test _get_rejection_title returns correct title for user rejection.""" + from openhands.sdk.event import UserRejectObservation + from openhands_cli.tui.widgets.richlog_visualizer import _get_rejection_title + + event = UserRejectObservation( + action_id="test_action_id", + tool_name="terminal", + tool_call_id="call_1", + rejection_reason="User rejected", + rejection_source="user", + ) + assert _get_rejection_title(event) == "User Rejected Action" + + def test_is_hook_rejection_with_blocked_hook_execution_event(self): + """Test _is_hook_rejection returns True for blocked HookExecutionEvent.""" + from openhands.sdk.event import HookExecutionEvent + from openhands_cli.tui.widgets.richlog_visualizer import _is_hook_rejection + + event = HookExecutionEvent( + hook_event_type="Stop", + hook_command=".openhands/hooks/on_stop.sh", + success=False, + blocked=True, + exit_code=2, + reason="Checks failed", + ) + assert _is_hook_rejection(event) is True + + def test_is_hook_rejection_with_successful_hook_execution_event(self): + """Test _is_hook_rejection returns False for successful HookExecutionEvent.""" + from openhands.sdk.event import HookExecutionEvent + from openhands_cli.tui.widgets.richlog_visualizer import _is_hook_rejection + + event = HookExecutionEvent( + hook_event_type="Stop", + hook_command=".openhands/hooks/on_stop.sh", + success=True, + blocked=False, + exit_code=0, + ) + assert _is_hook_rejection(event) is False + + def test_get_rejection_title_for_blocked_hook_execution(self): + """Test _get_rejection_title for blocked HookExecutionEvent.""" + from openhands.sdk.event import HookExecutionEvent + from openhands_cli.tui.widgets.richlog_visualizer import _get_rejection_title + + event = HookExecutionEvent( + hook_event_type="Stop", + hook_command=".openhands/hooks/on_stop.sh", + success=False, + blocked=True, + exit_code=2, + reason="CI checks failed", + ) + assert _get_rejection_title(event) == "Hook Blocked Action" + + class TestDefaultAgentPrefixBehavior: """Tests for hiding agent prefix for the default OpenHands Agent.