Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 21 additions & 16 deletions openhands-tools/openhands/tools/task/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,19 +355,17 @@ def _run_task(self, task: Task, prompt: str) -> Task:
try:
task.conversation.send_message(prompt, sender=parent_name)
self._run_until_finished(task.id, task.conversation)
# A run-limit stop (cost budget or iteration cap) ends the
# sub-conversation in ERROR without raising; surface that to the
# parent instead of an empty "completed" result.
if (
task.conversation.state.execution_status
== ConversationExecutionStatus.ERROR
):
task.set_error(self._run_error_detail(task.conversation))
logger.warning(f"Task '{task.id}' ended with an error.")
else:
status = task.conversation.state.execution_status
if status == ConversationExecutionStatus.FINISHED:
result = get_agent_final_response(task.conversation.state.events)
task.set_result(result)
logger.info(f"Task '{task.id}' completed.")
else:
# Any non-FINISHED terminal status (run-limit, stuck, paused, ...)
# is surfaced as an error, not an empty "completed"; the detail
# keeps partial output so the parent can use/retry it.
task.set_error(self._run_stop_detail(task.conversation, status))
logger.warning(f"Task '{task.id}' stopped: status '{status.value}'.")
except Exception as e:
task.set_error(str(e))
logger.warning(f"Task {task.id} failed with error: {e}")
Expand All @@ -378,17 +376,24 @@ def _run_task(self, task: Task, prompt: str) -> Task:
return task

@staticmethod
def _run_error_detail(conversation: LocalConversation) -> str:
"""Best-effort detail for a sub-agent run that ended in ERROR (e.g. the
cost budget or iteration cap), so the parent sees why it stopped."""
def _run_stop_detail(
conversation: LocalConversation,
status: ConversationExecutionStatus,
) -> str:
"""Why a sub-agent stopped without finishing (run-limit, stuck, paused, ...),
plus any partial output so the parent isn't left with nothing to use."""
errors = [
e
for e in conversation.state.events
if isinstance(e, ConversationErrorEvent)
]
if errors:
return errors[-1].detail
return "Sub-agent run ended with an error."
reason = (
errors[-1].detail
if errors
else f"Sub-agent stopped without finishing (status: {status.value})."
)
partial = get_agent_final_response(conversation.state.events)
return f"{reason}\nPartial result:\n{partial}" if partial else reason

def _run_until_finished(
self, task_id: str, conversation: LocalConversation
Expand Down
47 changes: 36 additions & 11 deletions tests/tools/task/test_task_manager.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import uuid
from pathlib import Path
from typing import cast
from typing import Any, cast
from unittest.mock import MagicMock, patch

import pytest
from pydantic import SecretStr

from openhands.sdk import LLM, Agent
from openhands.sdk.conversation.impl.local_conversation import LocalConversation
from openhands.sdk.conversation.state import ConversationExecutionStatus
from openhands.sdk.hooks.config import HookConfig, HookDefinition, HookMatcher
from openhands.sdk.subagent.registry import (
_reset_registry_for_tests,
Expand Down Expand Up @@ -397,6 +398,9 @@ def test_sub_agents_inherit_parent_prompt_cache_key(self, tmp_path):
def _make_task_with_mock_conv(task_id: str, **conv_kwargs) -> Task:
"""Create a Task with a MagicMock conversation, bypassing Pydantic validation."""
mock_conv = MagicMock(**conv_kwargs)
# Default to a FINISHED run; a non-FINISHED status is now surfaced as an error,
# so override state.execution_status to test stuck/paused paths.
mock_conv.state.execution_status = ConversationExecutionStatus.FINISHED
return Task.model_construct(
id=task_id,
conversation_id=uuid.uuid4(),
Expand Down Expand Up @@ -487,6 +491,24 @@ def test_run_sets_error_on_exception(self, tmp_path):
assert "agent exploded" in result.error
assert result.result is None

def test_non_finished_status_surfaced_as_error(self, tmp_path):
"""A sub-agent that ends non-FINISHED (e.g. STUCK) is surfaced as an error,
not reported as an empty success."""
manager, _ = _manager_with_parent(tmp_path)

task = _make_task_with_mock_conv("task_00000001")
assert task.conversation is not None
mock_conv = cast(Any, task.conversation)
mock_conv.state.execution_status = ConversationExecutionStatus.STUCK
mock_conv.state.events = []
manager._tasks[task.id] = task

result = manager._run_task(task=task, prompt="do something")

assert result.status == TaskStatus.ERROR
assert result.result is None
assert "stuck" in (result.error or "").lower()

def test_run_evicts_conversation_after_error(self, tmp_path):
"""Even on error, the task's conversation should be evicted (finally block)."""
manager, _ = _manager_with_parent(tmp_path)
Expand Down Expand Up @@ -983,27 +1005,30 @@ def test_budget_inherits_from_parent(self, tmp_path):


class TestRunErrorSurfacing:
"""A sub-agent run that ends in ERROR (cost budget or iteration cap) is
surfaced to the parent task rather than reported as an empty success."""
"""A sub-agent run that ends in a non-FINISHED status (stuck, paused, run-limit,
...) is surfaced to the parent task rather than reported as an empty success."""

def test_run_error_detail_returns_last_error(self):
def test_run_stop_detail_returns_last_error(self):
from types import SimpleNamespace

from openhands.sdk.event.conversation_error import ConversationErrorEvent

err = ConversationErrorEvent(
source="environment",
code="MaxBudgetReached",
detail="Agent reached maximum budget limit ($0.05).",
code="MaxIterationsReached",
detail="Agent reached the maximum iteration limit.",
)
conv = SimpleNamespace(state=SimpleNamespace(events=[err]))
assert (
TaskManager._run_error_detail(cast(LocalConversation, conv)) == err.detail
detail = TaskManager._run_stop_detail(
cast(LocalConversation, conv), ConversationExecutionStatus.ERROR
)
assert err.detail in detail

def test_run_error_detail_default_when_no_error_event(self):
def test_run_stop_detail_default_mentions_status(self):
from types import SimpleNamespace

conv = SimpleNamespace(state=SimpleNamespace(events=[]))
detail = TaskManager._run_error_detail(cast(LocalConversation, conv))
assert "error" in detail.lower()
detail = TaskManager._run_stop_detail(
cast(LocalConversation, conv), ConversationExecutionStatus.STUCK
)
assert "stuck" in detail.lower()
Loading