diff --git a/openhands-tools/openhands/tools/task/manager.py b/openhands-tools/openhands/tools/task/manager.py index 1d48e982b7..9a22543425 100644 --- a/openhands-tools/openhands/tools/task/manager.py +++ b/openhands-tools/openhands/tools/task/manager.py @@ -355,19 +355,17 @@ def _run_task(self, task: Task, prompt: str) -> Task: try: task.conversation.send_message(prompt, sender=parent_name) self._run_until_finished(task.id, task.conversation) - # A run-limit stop (cost budget or iteration cap) ends the - # sub-conversation in ERROR without raising; surface that to the - # parent instead of an empty "completed" result. - if ( - task.conversation.state.execution_status - == ConversationExecutionStatus.ERROR - ): - task.set_error(self._run_error_detail(task.conversation)) - logger.warning(f"Task '{task.id}' ended with an error.") - else: + status = task.conversation.state.execution_status + if status == ConversationExecutionStatus.FINISHED: result = get_agent_final_response(task.conversation.state.events) task.set_result(result) logger.info(f"Task '{task.id}' completed.") + else: + # Any non-FINISHED terminal status (run-limit, stuck, paused, ...) + # is surfaced as an error, not an empty "completed"; the detail + # keeps partial output so the parent can use/retry it. + task.set_error(self._run_stop_detail(task.conversation, status)) + logger.warning(f"Task '{task.id}' stopped: status '{status.value}'.") except Exception as e: task.set_error(str(e)) logger.warning(f"Task {task.id} failed with error: {e}") @@ -378,17 +376,24 @@ def _run_task(self, task: Task, prompt: str) -> Task: return task @staticmethod - def _run_error_detail(conversation: LocalConversation) -> str: - """Best-effort detail for a sub-agent run that ended in ERROR (e.g. the - cost budget or iteration cap), so the parent sees why it stopped.""" + def _run_stop_detail( + conversation: LocalConversation, + status: ConversationExecutionStatus, + ) -> str: + """Why a sub-agent stopped without finishing (run-limit, stuck, paused, ...), + plus any partial output so the parent isn't left with nothing to use.""" errors = [ e for e in conversation.state.events if isinstance(e, ConversationErrorEvent) ] - if errors: - return errors[-1].detail - return "Sub-agent run ended with an error." + reason = ( + errors[-1].detail + if errors + else f"Sub-agent stopped without finishing (status: {status.value})." + ) + partial = get_agent_final_response(conversation.state.events) + return f"{reason}\nPartial result:\n{partial}" if partial else reason def _run_until_finished( self, task_id: str, conversation: LocalConversation diff --git a/tests/tools/task/test_task_manager.py b/tests/tools/task/test_task_manager.py index 1d9c181030..4270ccf5d7 100644 --- a/tests/tools/task/test_task_manager.py +++ b/tests/tools/task/test_task_manager.py @@ -1,6 +1,6 @@ import uuid from pathlib import Path -from typing import cast +from typing import Any, cast from unittest.mock import MagicMock, patch import pytest @@ -8,6 +8,7 @@ from openhands.sdk import LLM, Agent from openhands.sdk.conversation.impl.local_conversation import LocalConversation +from openhands.sdk.conversation.state import ConversationExecutionStatus from openhands.sdk.hooks.config import HookConfig, HookDefinition, HookMatcher from openhands.sdk.subagent.registry import ( _reset_registry_for_tests, @@ -397,6 +398,9 @@ def test_sub_agents_inherit_parent_prompt_cache_key(self, tmp_path): def _make_task_with_mock_conv(task_id: str, **conv_kwargs) -> Task: """Create a Task with a MagicMock conversation, bypassing Pydantic validation.""" mock_conv = MagicMock(**conv_kwargs) + # Default to a FINISHED run; a non-FINISHED status is now surfaced as an error, + # so override state.execution_status to test stuck/paused paths. + mock_conv.state.execution_status = ConversationExecutionStatus.FINISHED return Task.model_construct( id=task_id, conversation_id=uuid.uuid4(), @@ -487,6 +491,24 @@ def test_run_sets_error_on_exception(self, tmp_path): assert "agent exploded" in result.error assert result.result is None + def test_non_finished_status_surfaced_as_error(self, tmp_path): + """A sub-agent that ends non-FINISHED (e.g. STUCK) is surfaced as an error, + not reported as an empty success.""" + manager, _ = _manager_with_parent(tmp_path) + + task = _make_task_with_mock_conv("task_00000001") + assert task.conversation is not None + mock_conv = cast(Any, task.conversation) + mock_conv.state.execution_status = ConversationExecutionStatus.STUCK + mock_conv.state.events = [] + manager._tasks[task.id] = task + + result = manager._run_task(task=task, prompt="do something") + + assert result.status == TaskStatus.ERROR + assert result.result is None + assert "stuck" in (result.error or "").lower() + def test_run_evicts_conversation_after_error(self, tmp_path): """Even on error, the task's conversation should be evicted (finally block).""" manager, _ = _manager_with_parent(tmp_path) @@ -983,27 +1005,30 @@ def test_budget_inherits_from_parent(self, tmp_path): class TestRunErrorSurfacing: - """A sub-agent run that ends in ERROR (cost budget or iteration cap) is - surfaced to the parent task rather than reported as an empty success.""" + """A sub-agent run that ends in a non-FINISHED status (stuck, paused, run-limit, + ...) is surfaced to the parent task rather than reported as an empty success.""" - def test_run_error_detail_returns_last_error(self): + def test_run_stop_detail_returns_last_error(self): from types import SimpleNamespace from openhands.sdk.event.conversation_error import ConversationErrorEvent err = ConversationErrorEvent( source="environment", - code="MaxBudgetReached", - detail="Agent reached maximum budget limit ($0.05).", + code="MaxIterationsReached", + detail="Agent reached the maximum iteration limit.", ) conv = SimpleNamespace(state=SimpleNamespace(events=[err])) - assert ( - TaskManager._run_error_detail(cast(LocalConversation, conv)) == err.detail + detail = TaskManager._run_stop_detail( + cast(LocalConversation, conv), ConversationExecutionStatus.ERROR ) + assert err.detail in detail - def test_run_error_detail_default_when_no_error_event(self): + def test_run_stop_detail_default_mentions_status(self): from types import SimpleNamespace conv = SimpleNamespace(state=SimpleNamespace(events=[])) - detail = TaskManager._run_error_detail(cast(LocalConversation, conv)) - assert "error" in detail.lower() + detail = TaskManager._run_stop_detail( + cast(LocalConversation, conv), ConversationExecutionStatus.STUCK + ) + assert "stuck" in detail.lower()