From 9c2b2270d77f8acd7e50588dc6253c2c952ba6de Mon Sep 17 00:00:00 2001 From: enyst Date: Thu, 11 Jun 2026 20:44:55 +0000 Subject: [PATCH 1/5] feat(sdk): add ask oracle tool Co-authored-by: openhands --- .pr/ask_oracle_live_validation.json | 30 +++ .pr/ask_oracle_live_validation.py | 125 +++++++++ .pr/ask_oracle_test_results.json | 21 ++ .pr/ask_oracle_validation_summary.md | 65 +++++ .../54_ask_oracle_tool/main.py | 82 ++++++ openhands-sdk/openhands/sdk/settings/model.py | 35 ++- .../openhands/sdk/tool/builtins/__init__.py | 11 + .../openhands/sdk/tool/builtins/ask_oracle.py | 255 ++++++++++++++++++ tests/examples/test_examples.py | 4 + tests/sdk/test_settings.py | 5 + tests/sdk/tool/test_ask_oracle.py | 171 ++++++++++++ 11 files changed, 802 insertions(+), 2 deletions(-) create mode 100644 .pr/ask_oracle_live_validation.json create mode 100644 .pr/ask_oracle_live_validation.py create mode 100644 .pr/ask_oracle_test_results.json create mode 100644 .pr/ask_oracle_validation_summary.md create mode 100644 examples/01_standalone_sdk/54_ask_oracle_tool/main.py create mode 100644 openhands-sdk/openhands/sdk/tool/builtins/ask_oracle.py create mode 100644 tests/sdk/tool/test_ask_oracle.py diff --git a/.pr/ask_oracle_live_validation.json b/.pr/ask_oracle_live_validation.json new file mode 100644 index 0000000000..493a25bbea --- /dev/null +++ b/.pr/ask_oracle_live_validation.json @@ -0,0 +1,30 @@ +{ + "started_at": "2026-06-11T20:40:20.866050+00:00", + "finished_at": "2026-06-11T20:40:29.750538+00:00", + "issue": "https://github.com/OpenHands/software-agent-sdk/issues/3672", + "primary_profile": { + "model": "openai/gpt-5-nano", + "provider": "OpenAI direct", + "usage_id": "ask-oracle-live-primary", + "response_text": "primary profile live check ok", + "succeeded": true + }, + "oracle_profile": { + "profile_name": "oracle", + "model": "litellm_proxy/openai/gpt-5-mini", + "base_url": "https://llm-proxy.eval.all-hands.dev", + "base_url_source": "openhands-sdk/openhands/sdk/agent/base.py", + "usage_id": "ask-oracle-live-oracle" + }, + "ask_oracle_tool": { + "registered_tool_names": [ + "ask_oracle", + "finish", + "think" + ], + "observation_is_error": false, + "observation_profile_name": "oracle", + "observation_oracle_model": "litellm_proxy/openai/gpt-5-mini", + "observation_text": "- I am the Oracle profile (gpt-5-mini via the eval LiteLLM proxy), available as a second-opinion reviewer. \n- Ask for a second opinion to surface alternative approaches and hidden blind spots (e.g., different assumptions, edge cases, or failure modes you overlooked); caveat \u2014 provide clear context and the approaches you\u2019ve tried so the Oracle can give targeted, non-redundant guidance." + } +} diff --git a/.pr/ask_oracle_live_validation.py b/.pr/ask_oracle_live_validation.py new file mode 100644 index 0000000000..a54338237f --- /dev/null +++ b/.pr/ask_oracle_live_validation.py @@ -0,0 +1,125 @@ +import json +import os +import shutil +import tempfile +from datetime import UTC, datetime +from pathlib import Path + +from pydantic import SecretStr + +from openhands.sdk import ( + LLM, + LLMProfileStore, + LocalConversation, + OpenHandsAgentSettings, +) +from openhands.sdk.agent.utils import make_llm_completion +from openhands.sdk.llm import Message, TextContent, llm_profile_store +from openhands.sdk.tool.builtins import AskOracleAction + + +RESULT_PATH = Path(__file__).with_name("ask_oracle_live_validation.json") +PRIMARY_MODEL = "openai/gpt-5-nano" +ORACLE_MODEL = "litellm_proxy/openai/gpt-5-mini" +ORACLE_BASE_URL = "https://llm-proxy.eval.all-hands.dev" + + +def first_text(message: Message) -> str: + return "".join( + content.text for content in message.content if isinstance(content, TextContent) + ).strip() + + +started_at = datetime.now(UTC).isoformat() +profile_store_dir = Path(tempfile.mkdtemp()) / "profiles" +setattr(llm_profile_store, "_DEFAULT_PROFILE_DIR", profile_store_dir) + +try: + primary_llm = LLM( + model=PRIMARY_MODEL, + api_key=SecretStr(os.environ["OPENAI_API_KEY"]), + usage_id="ask-oracle-live-primary", + max_output_tokens=1000, + reasoning_effort="low", + ) + oracle_llm = LLM( + model=ORACLE_MODEL, + api_key=SecretStr(os.environ["LITELLM_API_KEY"]), + base_url=ORACLE_BASE_URL, + usage_id="ask-oracle-live-oracle", + max_output_tokens=1000, + reasoning_effort="low", + ) + + primary_response = make_llm_completion( + primary_llm, + [ + Message( + role="user", + content=[ + TextContent( + text=("Reply with exactly: primary profile live check ok") + ) + ], + ) + ], + ) + primary_text = first_text(primary_response.message) + + store = LLMProfileStore() + store.save("oracle", oracle_llm, include_secrets=True) + + settings = OpenHandsAgentSettings( + llm=primary_llm, + oracle_llm_profile="oracle", + enable_switch_llm_tool=False, + ) + agent = settings.create_agent() + conversation = LocalConversation(agent=agent, workspace=Path.cwd()) + conversation._ensure_agent_ready() + observation = conversation.execute_tool( + "ask_oracle", + AskOracleAction( + question=( + "Answer in two concise bullets: confirm that you are the Oracle " + "profile and give one practical reason an agent should ask for a " + "second opinion when stuck." + ), + context=( + "The active LLM profile is OpenAI direct gpt-5-nano. The Oracle " + "profile is gpt-5-mini through the eval LiteLLM proxy." + ), + ), + ) + + result = { + "started_at": started_at, + "finished_at": datetime.now(UTC).isoformat(), + "issue": "https://github.com/OpenHands/software-agent-sdk/issues/3672", + "primary_profile": { + "model": PRIMARY_MODEL, + "provider": "OpenAI direct", + "usage_id": primary_llm.usage_id, + "response_text": primary_text, + "succeeded": bool(primary_text), + }, + "oracle_profile": { + "profile_name": "oracle", + "model": ORACLE_MODEL, + "base_url": ORACLE_BASE_URL, + "base_url_source": "openhands-sdk/openhands/sdk/agent/base.py", + "usage_id": "ask-oracle-live-oracle", + }, + "ask_oracle_tool": { + "registered_tool_names": sorted(agent.tools_map), + "observation_is_error": observation.is_error, + "observation_profile_name": observation.profile_name, + "observation_oracle_model": observation.oracle_model, + "observation_text": observation.text, + }, + } +finally: + shutil.rmtree(profile_store_dir.parent, ignore_errors=True) + +RESULT_PATH.write_text(json.dumps(result, indent=2) + "\n") +print(json.dumps(result, indent=2)) diff --git a/.pr/ask_oracle_test_results.json b/.pr/ask_oracle_test_results.json new file mode 100644 index 0000000000..966e1ecf56 --- /dev/null +++ b/.pr/ask_oracle_test_results.json @@ -0,0 +1,21 @@ +{ + "targeted_pytest": { + "command": "uv run pytest tests/sdk/tool/test_ask_oracle.py tests/sdk/tool/test_builtins.py tests/sdk/test_settings.py::test_llm_agent_settings_export_schema_groups_sections tests/examples/test_examples.py::test_directory_example_is_discovered", + "result": "11 passed", + "warnings": 5 + }, + "example_pytest": { + "command": "uv run pytest tests/examples/test_examples.py --run-examples -k 54_ask_oracle_tool", + "result": "1 passed, 64 deselected", + "warnings": 5 + }, + "pre_commit": { + "command": "uv run pre-commit run --files ", + "result": "passed" + }, + "live_validation": { + "command": "OPENHANDS_SUPPRESS_BANNER=1 OPENAI_API_KEY=$OPENAI_API_KEY LITELLM_API_KEY=$LITELLM_API_KEY uv run python .pr/ask_oracle_live_validation.py", + "result_file": ".pr/ask_oracle_live_validation.json", + "result": "passed" + } +} diff --git a/.pr/ask_oracle_validation_summary.md b/.pr/ask_oracle_validation_summary.md new file mode 100644 index 0000000000..5ab1a4b0d9 --- /dev/null +++ b/.pr/ask_oracle_validation_summary.md @@ -0,0 +1,65 @@ +# ask_oracle validation evidence + +This directory is temporary PR evidence for the `ask_oracle` tool implementation. + +## What changed + +- Added `ask_oracle`, a read-only built-in SDK tool that loads a saved LLM profile and asks it for stateless second-opinion advice. +- Added `OpenHandsAgentSettings.oracle_llm_profile`; setting this saved profile name makes the tool available on the standard OpenHands agent. +- The active conversation LLM is not switched. The Oracle call uses the profile only for that consultation. + +## Live validation + +Evidence file: `.pr/ask_oracle_live_validation.json` + +Command run: + +```bash +OPENHANDS_SUPPRESS_BANNER=1 \ +OPENAI_API_KEY="$OPENAI_API_KEY" \ +LITELLM_API_KEY="$LITELLM_API_KEY" \ +uv run python .pr/ask_oracle_live_validation.py +``` + +Validated profiles: + +- Regular profile: `openai/gpt-5-nano` with OpenAI direct API key. +- Oracle profile: `litellm_proxy/openai/gpt-5-mini` with the eval LiteLLM key. +- Eval proxy base URL: `https://llm-proxy.eval.all-hands.dev`, found in `openhands-sdk/openhands/sdk/agent/base.py`. + +Result summary: + +- Primary direct OpenAI profile returned: `primary profile live check ok`. +- `ask_oracle` loaded the saved `oracle` profile from an isolated temporary profile store. +- Tool observation was successful (`observation_is_error: false`). +- Oracle response identified itself as the Oracle profile and explained why an agent should ask for a second opinion when stuck. +- The temporary profile store was removed in a `finally` block after the run. + +## Validation commands + +Pre-commit command run on changed files: passed. + +### Targeted tests + +Command run: + +```bash +uv run pytest \ + tests/sdk/tool/test_ask_oracle.py \ + tests/sdk/tool/test_builtins.py \ + tests/sdk/test_settings.py::test_llm_agent_settings_export_schema_groups_sections \ + tests/examples/test_examples.py::test_directory_example_is_discovered +``` + +Result: `11 passed`. + +### Example execution + +Command run: + +```bash +uv run pytest tests/examples/test_examples.py --run-examples -k 54_ask_oracle_tool +``` + +Result: `1 passed, 64 deselected`. + diff --git a/examples/01_standalone_sdk/54_ask_oracle_tool/main.py b/examples/01_standalone_sdk/54_ask_oracle_tool/main.py new file mode 100644 index 0000000000..70101f05eb --- /dev/null +++ b/examples/01_standalone_sdk/54_ask_oracle_tool/main.py @@ -0,0 +1,82 @@ +"""Example: configure an Oracle LLM profile for the ask_oracle tool. + +Set `OPENAI_API_KEY` for the primary OpenAI profile and `LITELLM_API_KEY` for +the eval proxy Oracle profile before running live. Optional overrides: + + ASK_ORACLE_PRIMARY_MODEL=openai/gpt-5-nano + ASK_ORACLE_MODEL=litellm_proxy/openai/gpt-5-mini + ASK_ORACLE_BASE_URL=https://llm-proxy.eval.all-hands.dev +""" + +import os +import shutil +import tempfile +from pathlib import Path + +from pydantic import SecretStr + +from openhands.sdk import ( + LLM, + LLMProfileStore, + LocalConversation, + OpenHandsAgentSettings, +) +from openhands.sdk.llm import llm_profile_store +from openhands.sdk.tool.builtins import AskOracleAction + + +primary_api_key = os.getenv("OPENAI_API_KEY") or os.getenv("LLM_API_KEY") +oracle_api_key = os.getenv("LITELLM_API_KEY") or os.getenv("LLM_API_KEY") + +if not primary_api_key or not oracle_api_key: + print( + "Set OPENAI_API_KEY (or LLM_API_KEY) and LITELLM_API_KEY " + "to run the live ask_oracle example." + ) + print("EXAMPLE_COST: 0") + raise SystemExit(0) + +profile_store_dir = Path(tempfile.mkdtemp()) / "profiles" +setattr(llm_profile_store, "_DEFAULT_PROFILE_DIR", profile_store_dir) +store = LLMProfileStore() + +primary_llm = LLM( + model=os.getenv("ASK_ORACLE_PRIMARY_MODEL", "openai/gpt-5-nano"), + api_key=SecretStr(primary_api_key), + usage_id="ask-oracle-example-primary", + max_output_tokens=1000, + reasoning_effort="low", +) +oracle_llm = LLM( + model=os.getenv("ASK_ORACLE_MODEL", "litellm_proxy/openai/gpt-5-mini"), + api_key=SecretStr(oracle_api_key), + base_url=os.getenv("ASK_ORACLE_BASE_URL", "https://llm-proxy.eval.all-hands.dev"), + usage_id="ask-oracle-example-oracle", + max_output_tokens=1000, + reasoning_effort="low", +) + +try: + store.save("oracle", oracle_llm, include_secrets=True) + settings = OpenHandsAgentSettings(llm=primary_llm, oracle_llm_profile="oracle") + agent = settings.create_agent() + conversation = LocalConversation(agent=agent, workspace=Path.cwd()) + conversation._ensure_agent_ready() + + print(f"Configured tools: {sorted(agent.tools_map)}") + observation = conversation.execute_tool( + "ask_oracle", + AskOracleAction( + question=( + "In one sentence, recommend whether a feature flag should be stored " + "as one nullable setting or as a separate boolean plus string." + ), + context="Prefer the simplest backwards-compatible SDK settings design.", + ), + ) + + print("Oracle said:") + print(observation.text) + print("EXAMPLE_COST: 0") +finally: + shutil.rmtree(profile_store_dir.parent, ignore_errors=True) diff --git a/openhands-sdk/openhands/sdk/settings/model.py b/openhands-sdk/openhands/sdk/settings/model.py index a20844587b..f76c6683af 100644 --- a/openhands-sdk/openhands/sdk/settings/model.py +++ b/openhands-sdk/openhands/sdk/settings/model.py @@ -38,6 +38,7 @@ from openhands.sdk.conversation.request import SendMessageRequest from openhands.sdk.hooks import HookConfig from openhands.sdk.llm import LLM +from openhands.sdk.llm.llm_profile_store import PROFILE_NAME_PATTERN from openhands.sdk.llm.utils.openhands_provider import ( canonicalize_openhands_llm_payload, ) @@ -1007,6 +1008,21 @@ class OpenHandsAgentSettings(AgentSettingsBase): ).model_dump() }, ) + oracle_llm_profile: str | None = Field( + default=None, + pattern=PROFILE_NAME_PATTERN, + description=( + "Saved LLM profile name to use for the ask_oracle tool. When set, " + "the tool is available to consult this profile for a second opinion." + ), + json_schema_extra={ + SETTINGS_METADATA_KEY: SettingsFieldMetadata( + label="Oracle LLM profile", + prominence=SettingProminence.MINOR, + variant="openhands", + ).model_dump() + }, + ) tool_concurrency_limit: int = Field( default=1, ge=1, @@ -1100,7 +1116,11 @@ def create_agent(self) -> Agent: agent = settings.create_agent() """ from openhands.sdk.agent import Agent - from openhands.sdk.tool.builtins import BUILT_IN_TOOLS, SwitchLLMTool + from openhands.sdk.tool.builtins import ( + BUILT_IN_TOOLS, + AskOracleTool, + SwitchLLMTool, + ) # Bypass ``_serialize_mcp_config``: MCP servers need real env/headers. mcp_config = ( @@ -1112,9 +1132,20 @@ def create_agent(self) -> Agent: if self.enable_switch_llm_tool: include_default_tools.append(SwitchLLMTool.__name__) + tools = list(self.tools) + if self.oracle_llm_profile and not any( + tool.name == AskOracleTool.name for tool in tools + ): + tools.append( + Tool( + name=AskOracleTool.name, + params={"profile_name": self.oracle_llm_profile}, + ) + ) + return Agent( llm=self.llm, - tools=self.tools, + tools=tools, mcp_config=mcp_config, include_default_tools=include_default_tools, agent_context=self.agent_context, diff --git a/openhands-sdk/openhands/sdk/tool/builtins/__init__.py b/openhands-sdk/openhands/sdk/tool/builtins/__init__.py index 15dbf75e67..3a7bcc85b2 100644 --- a/openhands-sdk/openhands/sdk/tool/builtins/__init__.py +++ b/openhands-sdk/openhands/sdk/tool/builtins/__init__.py @@ -5,6 +5,12 @@ For tools that require interacting with the environment, add them to `openhands-tools`. """ +from openhands.sdk.tool.builtins.ask_oracle import ( + AskOracleAction, + AskOracleExecutor, + AskOracleObservation, + AskOracleTool, +) from openhands.sdk.tool.builtins.finish import ( FinishAction, FinishExecutor, @@ -41,6 +47,7 @@ # conditional wiring in `Agent._initialize`. BUILT_IN_TOOL_CLASSES = { **{tool.__name__: tool for tool in BUILT_IN_TOOLS}, + AskOracleTool.__name__: AskOracleTool, InvokeSkillTool.__name__: InvokeSkillTool, SwitchLLMTool.__name__: SwitchLLMTool, } @@ -48,6 +55,10 @@ __all__ = [ "BUILT_IN_TOOLS", "BUILT_IN_TOOL_CLASSES", + "AskOracleTool", + "AskOracleAction", + "AskOracleObservation", + "AskOracleExecutor", "FinishTool", "FinishAction", "FinishObservation", diff --git a/openhands-sdk/openhands/sdk/tool/builtins/ask_oracle.py b/openhands-sdk/openhands/sdk/tool/builtins/ask_oracle.py new file mode 100644 index 0000000000..ff9e9527c0 --- /dev/null +++ b/openhands-sdk/openhands/sdk/tool/builtins/ask_oracle.py @@ -0,0 +1,255 @@ +from collections.abc import Sequence +from typing import TYPE_CHECKING, Self + +from pydantic import Field +from rich.text import Text + +from openhands.sdk.llm.llm_profile_store import PROFILE_NAME_REGEX, LLMProfileStore +from openhands.sdk.tool.registry import register_tool +from openhands.sdk.tool.tool import ( + Action, + Observation, + ToolAnnotations, + ToolDefinition, + ToolExecutor, +) + + +if TYPE_CHECKING: + from openhands.sdk.conversation.impl.local_conversation import LocalConversation + from openhands.sdk.conversation.state import ConversationState + + +class AskOracleAction(Action): + """Action for asking a configured Oracle LLM profile for advice.""" + + question: str = Field( + description=( + "The specific question or dilemma to ask the Oracle about. Use this " + "when you are stuck, uncertain, or need a second opinion." + ) + ) + context: str | None = Field( + default=None, + description=( + "Optional extra context, such as approaches already tried, constraints, " + "or the recommendation you are considering." + ), + ) + + @property + def visualize(self) -> Text: + content = Text() + content.append("Ask Oracle: ", style="bold cyan") + content.append(self.question) + if self.context: + content.append("\nContext: ", style="bold") + content.append(self.context) + return content + + +class AskOracleObservation(Observation): + """Observation returned by the Oracle consultation.""" + + profile_name: str = Field(description="LLM profile used for the Oracle call.") + oracle_model: str | None = Field( + default=None, + description="Model configured by the Oracle profile, when available.", + ) + + @property + def visualize(self) -> Text: + content = Text() + if self.is_error: + content.append("Oracle consultation failed", style="bold red") + else: + content.append("Oracle recommendation", style="bold green") + content.append(f": {self.profile_name}") + if self.oracle_model: + content.append(f" ({self.oracle_model})") + if self.text: + content.append("\n") + content.append(self.text) + return content + + +_DESCRIPTION_TEMPLATE = ( + "Ask the Oracle for a second opinion. The Oracle is a configured, saved LLM " + "profile intended to be more capable for difficult reasoning.\n\n" + "Use this when you are stuck, uncertain, comparing approaches, or need a " + "higher-quality recommendation before proceeding. The Oracle receives the " + "current conversation context plus your question, but this consultation does " + "not switch the active LLM profile.\n\n" + "Treat the Oracle's response as strong guidance and follow its recommendation " + "unless you have a clear reason not to.\n\n" + "Configured Oracle profile: {profile_name}" +) + +_ORACLE_PROMPT_TEMPLATE = """\ +You are the Oracle: a highly capable reviewer giving a second opinion to an \ +OpenHands agent. + +The agent is working in an existing conversation. Use the conversation context you \ +receive to answer the agent's question. Do not call tools. Do not perform work \ +directly. Give a concrete recommendation the agent can follow, including important \ +risks or caveats. + +Question: +{question} +{context_section}""" + + +class AskOracleExecutor(ToolExecutor[AskOracleAction, AskOracleObservation]): + def __init__(self, profile_name: str | None, profile_store_dir: str | None) -> None: + self.profile_name = profile_name + self.profile_store_dir = profile_store_dir + + def __call__( + self, + action: AskOracleAction, + conversation: "LocalConversation | None" = None, + ) -> AskOracleObservation: + if not self.profile_name: + return AskOracleObservation.from_text( + text="No Oracle LLM profile is configured.", + is_error=True, + profile_name="", + ) + if conversation is None: + return AskOracleObservation.from_text( + text="Cannot ask Oracle without an active conversation.", + is_error=True, + profile_name=self.profile_name, + ) + + try: + oracle_llm = LLMProfileStore(self.profile_store_dir).load( + self.profile_name, cipher=conversation._cipher + ) + except FileNotFoundError: + return AskOracleObservation.from_text( + text=f"Oracle LLM profile '{self.profile_name}' was not found.", + is_error=True, + profile_name=self.profile_name, + ) + except ValueError as exc: + return AskOracleObservation.from_text( + text=str(exc), + is_error=True, + profile_name=self.profile_name, + ) + except Exception as exc: + return AskOracleObservation.from_text( + text=( + f"Failed to load Oracle LLM profile '{self.profile_name}': " + f"{type(exc).__name__}: {exc}" + ), + is_error=True, + profile_name=self.profile_name, + ) + + # Lazy import avoids a startup cycle while built-in tools are registered. + from openhands.sdk.agent.utils import ( + make_llm_completion, + prepare_llm_messages, + ) + from openhands.sdk.llm import Message, TextContent + + conversation._ensure_agent_ready() + context_section = ( + f"\nAdditional context from the agent:\n{action.context}\n" + if action.context + else "" + ) + oracle_prompt = _ORACLE_PROMPT_TEMPLATE.format( + question=action.question, + context_section=context_section, + ) + user_message = Message( + role="user", + content=[TextContent(text=oracle_prompt)], + ) + messages = prepare_llm_messages( + conversation.state.view, additional_messages=[user_message] + ) + + try: + response = make_llm_completion( + oracle_llm.model_copy( + update={"usage_id": f"oracle-profile:{self.profile_name}"}, + deep=True, + ), + messages, + tools=list(conversation.agent.tools_map.values()), + ) + except Exception as exc: + return AskOracleObservation.from_text( + text=( + f"Oracle LLM profile '{self.profile_name}' failed: " + f"{type(exc).__name__}: {exc}" + ), + is_error=True, + profile_name=self.profile_name, + oracle_model=oracle_llm.model, + ) + + oracle_text = "".join( + content.text + for content in response.message.content + if isinstance(content, TextContent) + ).strip() + if not oracle_text: + return AskOracleObservation.from_text( + text="Oracle did not return a text recommendation.", + is_error=True, + profile_name=self.profile_name, + oracle_model=oracle_llm.model, + ) + + return AskOracleObservation.from_text( + text=oracle_text, + profile_name=self.profile_name, + oracle_model=oracle_llm.model, + ) + + +class AskOracleTool(ToolDefinition[AskOracleAction, AskOracleObservation]): + """Tool for consulting a configured Oracle LLM profile.""" + + @classmethod + def create( + cls, + conv_state: "ConversationState | None" = None, # noqa: ARG003 + profile_name: str | None = None, + profile_store_dir: str | None = None, + **params, + ) -> Sequence[Self]: + if params: + raise ValueError( + "AskOracleTool only accepts profile_name and profile_store_dir" + ) + if profile_name is not None and not PROFILE_NAME_REGEX.match(profile_name): + raise ValueError( + "Invalid Oracle profile name. Profile names must be 1-64 " + "characters, start with a letter or digit, and contain only " + "letters, digits, '.', '_', or '-'." + ) + + profile_display = profile_name or "not configured" + return [ + cls( + description=_DESCRIPTION_TEMPLATE.format(profile_name=profile_display), + action_type=AskOracleAction, + observation_type=AskOracleObservation, + executor=AskOracleExecutor(profile_name, profile_store_dir), + annotations=ToolAnnotations( + readOnlyHint=True, + destructiveHint=False, + idempotentHint=False, + openWorldHint=False, + ), + ) + ] + + +register_tool(AskOracleTool.name, AskOracleTool) diff --git a/tests/examples/test_examples.py b/tests/examples/test_examples.py index 82b94fc46b..9d1e9066d5 100644 --- a/tests/examples/test_examples.py +++ b/tests/examples/test_examples.py @@ -29,6 +29,7 @@ EXAMPLES_ROOT / "01_standalone_sdk" / "33_hooks", EXAMPLES_ROOT / "01_standalone_sdk" / "37_llm_profile_store", EXAMPLES_ROOT / "01_standalone_sdk" / "51_agent_hooks", + EXAMPLES_ROOT / "01_standalone_sdk" / "54_ask_oracle_tool", EXAMPLES_ROOT / "01_standalone_sdk" / "43_mixed_marketplace_skills", EXAMPLES_ROOT / "02_remote_agent_server" / "06_custom_tool", EXAMPLES_ROOT / "05_skills_and_plugins" / "01_loading_agentskills", @@ -101,6 +102,9 @@ def test_directory_example_is_discovered() -> None: assert ( EXAMPLES_ROOT / "01_standalone_sdk" / "51_agent_hooks" / "main.py" ) in EXAMPLES + assert ( + EXAMPLES_ROOT / "01_standalone_sdk" / "54_ask_oracle_tool" / "main.py" + ) in EXAMPLES @pytest.mark.parametrize("example_path", EXAMPLES, ids=_normalize_path) diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py index 8fadbde3c8..60fd75f6b8 100644 --- a/tests/sdk/test_settings.py +++ b/tests/sdk/test_settings.py @@ -69,6 +69,7 @@ def test_llm_agent_settings_export_schema_groups_sections() -> None: "tools", "enable_sub_agents", "enable_switch_llm_tool", + "oracle_llm_profile", "tool_concurrency_limit", "mcp_config", } @@ -85,6 +86,9 @@ def test_llm_agent_settings_export_schema_groups_sections() -> None: assert ( general_fields["enable_switch_llm_tool"].prominence is SettingProminence.MINOR ) + assert general_fields["oracle_llm_profile"].value_type == "string" + assert general_fields["oracle_llm_profile"].default is None + assert general_fields["oracle_llm_profile"].prominence is SettingProminence.MINOR assert general_fields["tool_concurrency_limit"].value_type == "integer" assert general_fields["tool_concurrency_limit"].default == 1 assert ( @@ -328,6 +332,7 @@ def test_export_agent_settings_schema_emits_variant_tagged_sections() -> None: "tools", "enable_sub_agents", "enable_switch_llm_tool", + "oracle_llm_profile", "tool_concurrency_limit", "mcp_config", } diff --git a/tests/sdk/tool/test_ask_oracle.py b/tests/sdk/tool/test_ask_oracle.py new file mode 100644 index 0000000000..da39bd0315 --- /dev/null +++ b/tests/sdk/tool/test_ask_oracle.py @@ -0,0 +1,171 @@ +from pathlib import Path + +import pytest +from pydantic import ValidationError + +from openhands.sdk import LLM, LocalConversation, OpenHandsAgentSettings, Tool +from openhands.sdk.agent import Agent +from openhands.sdk.llm import Message, TextContent, llm_profile_store +from openhands.sdk.llm.llm_profile_store import LLMProfileStore +from openhands.sdk.testing import TestLLM +from openhands.sdk.tool.builtins import ( + AskOracleAction, + AskOracleObservation, + AskOracleTool, +) + + +def _make_llm(model: str, usage_id: str) -> LLM: + return TestLLM.from_messages([], model=model, usage_id=usage_id) + + +def _assistant_message(text: str) -> Message: + return Message(role="assistant", content=[TextContent(text=text)]) + + +def _make_conversation(profile_name: str = "oracle") -> LocalConversation: + return LocalConversation( + agent=Agent( + llm=_make_llm("default-model", "default"), + tools=[ + Tool(name=AskOracleTool.name, params={"profile_name": profile_name}) + ], + include_default_tools=[], + ), + workspace=Path.cwd(), + ) + + +def test_ask_oracle_tool_description_names_configured_profile() -> None: + tool = AskOracleTool.create(profile_name="oracle")[0] + + assert "Ask the Oracle for a second opinion" in tool.description + assert "Configured Oracle profile: oracle" in tool.description + + +def test_ask_oracle_tool_validates_profile_name() -> None: + with pytest.raises(ValueError, match="Invalid Oracle profile name"): + AskOracleTool.create(profile_name="../oracle") + + +def test_agent_settings_adds_ask_oracle_tool_when_profile_is_configured() -> None: + agent = OpenHandsAgentSettings( + llm=_make_llm("default-model", "default"), + oracle_llm_profile="oracle", + ).create_agent() + + assert any( + tool.name == AskOracleTool.name and tool.params == {"profile_name": "oracle"} + for tool in agent.tools + ) + + conversation = LocalConversation(agent=agent, workspace=Path.cwd()) + conversation._ensure_agent_ready() + assert "ask_oracle" in agent.tools_map + + +def test_agent_settings_omits_ask_oracle_tool_without_profile() -> None: + agent = OpenHandsAgentSettings( + llm=_make_llm("default-model", "default"), + ).create_agent() + + assert all(tool.name != AskOracleTool.name for tool in agent.tools) + + conversation = LocalConversation(agent=agent, workspace=Path.cwd()) + conversation._ensure_agent_ready() + assert "ask_oracle" not in agent.tools_map + + +def test_agent_settings_rejects_invalid_oracle_profile_name() -> None: + with pytest.raises(ValidationError, match="oracle_llm_profile"): + OpenHandsAgentSettings(oracle_llm_profile="../oracle") + + +def test_ask_oracle_tool_returns_oracle_recommendation( + monkeypatch: pytest.MonkeyPatch, +) -> None: + oracle_llm = TestLLM.from_messages( + [_assistant_message("Prefer the smaller, typed settings field.")], + model="oracle-model", + usage_id="oracle", + ) + + def load_profile( + self: LLMProfileStore, + name: str, + *, + cipher=None, + ) -> LLM: + assert name == "oracle" + return oracle_llm + + monkeypatch.setattr(LLMProfileStore, "load", load_profile) + conversation = _make_conversation() + + observation = conversation.execute_tool( + "ask_oracle", + AskOracleAction( + question="Should I add one setting or two?", + context="The tool needs an Oracle profile name.", + ), + ) + + assert isinstance(observation, AskOracleObservation) + assert not observation.is_error + assert observation.profile_name == "oracle" + assert observation.oracle_model == "oracle-model" + assert observation.text == "Prefer the smaller, typed settings field." + assert "Prefer the smaller" in observation.visualize.plain + assert conversation.agent.llm.model == "default-model" + assert conversation.state.agent.llm.model == "default-model" + + +def test_ask_oracle_tool_reports_missing_profile( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + profile_dir = tmp_path / "profiles" + profile_dir.mkdir() + + monkeypatch.setattr(llm_profile_store, "_DEFAULT_PROFILE_DIR", profile_dir) + conversation = _make_conversation(profile_name="missing") + + observation = conversation.execute_tool( + "ask_oracle", + AskOracleAction(question="What should I do next?"), + ) + + assert isinstance(observation, AskOracleObservation) + assert observation.is_error + assert observation.profile_name == "missing" + assert "was not found" in observation.text + + +def test_ask_oracle_tool_reports_empty_oracle_response( + monkeypatch: pytest.MonkeyPatch, +) -> None: + oracle_llm = TestLLM.from_messages( + [Message(role="assistant", content=[])], + model="oracle-model", + usage_id="oracle", + ) + + def load_profile( + self: LLMProfileStore, + name: str, + *, + cipher=None, + ) -> LLM: + return oracle_llm + + monkeypatch.setattr(LLMProfileStore, "load", load_profile) + conversation = _make_conversation() + + observation = conversation.execute_tool( + "ask_oracle", + AskOracleAction(question="What should I do next?"), + ) + + assert isinstance(observation, AskOracleObservation) + assert observation.is_error + assert observation.oracle_model == "oracle-model" + assert "did not return a text recommendation" in observation.text From 3daa1ce7b896684ed3c9e57f56c71071e24b5fab Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Fri, 12 Jun 2026 01:22:27 +0200 Subject: [PATCH 2/5] Revise Oracle description for clarity and intent Updated the description of the Oracle to clarify its purpose and capabilities. --- openhands-sdk/openhands/sdk/tool/builtins/ask_oracle.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/openhands-sdk/openhands/sdk/tool/builtins/ask_oracle.py b/openhands-sdk/openhands/sdk/tool/builtins/ask_oracle.py index ff9e9527c0..a8012e81a3 100644 --- a/openhands-sdk/openhands/sdk/tool/builtins/ask_oracle.py +++ b/openhands-sdk/openhands/sdk/tool/builtins/ask_oracle.py @@ -74,15 +74,13 @@ def visualize(self) -> Text: _DESCRIPTION_TEMPLATE = ( - "Ask the Oracle for a second opinion. The Oracle is a configured, saved LLM " - "profile intended to be more capable for difficult reasoning.\n\n" + "Ask the Oracle for a second opinion. The Oracle is a smart model " + "intended to be your help. It is capable for difficult reasoning.\n\n" "Use this when you are stuck, uncertain, comparing approaches, or need a " "higher-quality recommendation before proceeding. The Oracle receives the " - "current conversation context plus your question, but this consultation does " - "not switch the active LLM profile.\n\n" + "current conversation context plus your question.\n\n" "Treat the Oracle's response as strong guidance and follow its recommendation " "unless you have a clear reason not to.\n\n" - "Configured Oracle profile: {profile_name}" ) _ORACLE_PROMPT_TEMPLATE = """\ From ed274a8d7fa1fdb02d267abfa31b665327ad2d87 Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 12 Jun 2026 00:06:09 +0000 Subject: [PATCH 3/5] refactor(sdk): simplify oracle consultation Co-authored-by: openhands --- .pr/ask_oracle_live_validation.json | 9 +- .pr/ask_oracle_live_validation.py | 3 +- .pr/ask_oracle_test_results.json | 4 +- .pr/ask_oracle_validation_summary.md | 5 +- .../openhands/sdk/tool/builtins/ask_oracle.py | 106 ++++++------------ tests/sdk/tool/test_ask_oracle.py | 79 ++++++++++--- 6 files changed, 113 insertions(+), 93 deletions(-) diff --git a/.pr/ask_oracle_live_validation.json b/.pr/ask_oracle_live_validation.json index 493a25bbea..2ca4af6e8e 100644 --- a/.pr/ask_oracle_live_validation.json +++ b/.pr/ask_oracle_live_validation.json @@ -1,6 +1,6 @@ { - "started_at": "2026-06-11T20:40:20.866050+00:00", - "finished_at": "2026-06-11T20:40:29.750538+00:00", + "started_at": "2026-06-12T00:04:04.460387+00:00", + "finished_at": "2026-06-12T00:04:11.102035+00:00", "issue": "https://github.com/OpenHands/software-agent-sdk/issues/3672", "primary_profile": { "model": "openai/gpt-5-nano", @@ -23,8 +23,7 @@ "think" ], "observation_is_error": false, - "observation_profile_name": "oracle", - "observation_oracle_model": "litellm_proxy/openai/gpt-5-mini", - "observation_text": "- I am the Oracle profile (gpt-5-mini via the eval LiteLLM proxy), available as a second-opinion reviewer. \n- Ask for a second opinion to surface alternative approaches and hidden blind spots (e.g., different assumptions, edge cases, or failure modes you overlooked); caveat \u2014 provide clear context and the approaches you\u2019ve tried so the Oracle can give targeted, non-redundant guidance." + "observation_response": "- I am the Oracle profile: gpt-5-mini accessed via the eval LiteLLM proxy. \n- Ask for a second opinion because it can expose model blind spots or hallucinations and suggest alternative strategies; caveat \u2014 validate the second opinion against facts/constraints and avoid sharing sensitive data.", + "observation_text": "- I am the Oracle profile: gpt-5-mini accessed via the eval LiteLLM proxy. \n- Ask for a second opinion because it can expose model blind spots or hallucinations and suggest alternative strategies; caveat \u2014 validate the second opinion against facts/constraints and avoid sharing sensitive data." } } diff --git a/.pr/ask_oracle_live_validation.py b/.pr/ask_oracle_live_validation.py index a54338237f..0d4ee7f14d 100644 --- a/.pr/ask_oracle_live_validation.py +++ b/.pr/ask_oracle_live_validation.py @@ -113,8 +113,7 @@ def first_text(message: Message) -> str: "ask_oracle_tool": { "registered_tool_names": sorted(agent.tools_map), "observation_is_error": observation.is_error, - "observation_profile_name": observation.profile_name, - "observation_oracle_model": observation.oracle_model, + "observation_response": observation.response, "observation_text": observation.text, }, } diff --git a/.pr/ask_oracle_test_results.json b/.pr/ask_oracle_test_results.json index 966e1ecf56..d8be0c5a5e 100644 --- a/.pr/ask_oracle_test_results.json +++ b/.pr/ask_oracle_test_results.json @@ -1,7 +1,7 @@ { "targeted_pytest": { - "command": "uv run pytest tests/sdk/tool/test_ask_oracle.py tests/sdk/tool/test_builtins.py tests/sdk/test_settings.py::test_llm_agent_settings_export_schema_groups_sections tests/examples/test_examples.py::test_directory_example_is_discovered", - "result": "11 passed", + "command": "uv run pytest tests/sdk/tool/test_ask_oracle.py tests/sdk/tool/test_builtins.py tests/sdk/test_settings.py::test_llm_agent_settings_export_schema_groups_sections tests/sdk/test_settings.py::test_export_agent_settings_schema_emits_variant_tagged_sections tests/examples/test_examples.py::test_directory_example_is_discovered", + "result": "12 passed", "warnings": 5 }, "example_pytest": { diff --git a/.pr/ask_oracle_validation_summary.md b/.pr/ask_oracle_validation_summary.md index 5ab1a4b0d9..471cdf41cf 100644 --- a/.pr/ask_oracle_validation_summary.md +++ b/.pr/ask_oracle_validation_summary.md @@ -6,7 +6,7 @@ This directory is temporary PR evidence for the `ask_oracle` tool implementation - Added `ask_oracle`, a read-only built-in SDK tool that loads a saved LLM profile and asks it for stateless second-opinion advice. - Added `OpenHandsAgentSettings.oracle_llm_profile`; setting this saved profile name makes the tool available on the standard OpenHands agent. -- The active conversation LLM is not switched. The Oracle call uses the profile only for that consultation. +- The active conversation LLM is not switched. The Oracle call sends only the Oracle system prompt plus the agent's question and optional context, without forwarding conversation history or tools. ## Live validation @@ -48,10 +48,11 @@ uv run pytest \ tests/sdk/tool/test_ask_oracle.py \ tests/sdk/tool/test_builtins.py \ tests/sdk/test_settings.py::test_llm_agent_settings_export_schema_groups_sections \ + tests/sdk/test_settings.py::test_export_agent_settings_schema_emits_variant_tagged_sections \ tests/examples/test_examples.py::test_directory_example_is_discovered ``` -Result: `11 passed`. +Result: `12 passed`. ### Example execution diff --git a/openhands-sdk/openhands/sdk/tool/builtins/ask_oracle.py b/openhands-sdk/openhands/sdk/tool/builtins/ask_oracle.py index a8012e81a3..669f63c905 100644 --- a/openhands-sdk/openhands/sdk/tool/builtins/ask_oracle.py +++ b/openhands-sdk/openhands/sdk/tool/builtins/ask_oracle.py @@ -51,10 +51,9 @@ def visualize(self) -> Text: class AskOracleObservation(Observation): """Observation returned by the Oracle consultation.""" - profile_name: str = Field(description="LLM profile used for the Oracle call.") - oracle_model: str | None = Field( - default=None, - description="Model configured by the Oracle profile, when available.", + response: str = Field( + default="", + description="Text response returned by the Oracle.", ) @property @@ -64,34 +63,30 @@ def visualize(self) -> Text: content.append("Oracle consultation failed", style="bold red") else: content.append("Oracle recommendation", style="bold green") - content.append(f": {self.profile_name}") - if self.oracle_model: - content.append(f" ({self.oracle_model})") if self.text: content.append("\n") content.append(self.text) return content -_DESCRIPTION_TEMPLATE = ( - "Ask the Oracle for a second opinion. The Oracle is a smart model " - "intended to be your help. It is capable for difficult reasoning.\n\n" +_DESCRIPTION = ( + "Ask the Oracle for a second opinion. The Oracle is a smart model intended " + "to help with difficult reasoning.\n\n" "Use this when you are stuck, uncertain, comparing approaches, or need a " - "higher-quality recommendation before proceeding. The Oracle receives the " - "current conversation context plus your question.\n\n" + "higher-quality recommendation before proceeding.\n\n" "Treat the Oracle's response as strong guidance and follow its recommendation " - "unless you have a clear reason not to.\n\n" + "unless you have a clear reason not to." ) -_ORACLE_PROMPT_TEMPLATE = """\ +_ORACLE_SYSTEM_PROMPT = """\ You are the Oracle: a highly capable reviewer giving a second opinion to an \ OpenHands agent. -The agent is working in an existing conversation. Use the conversation context you \ -receive to answer the agent's question. Do not call tools. Do not perform work \ +Answer the agent's question directly. Do not call tools. Do not perform work \ directly. Give a concrete recommendation the agent can follow, including important \ -risks or caveats. +risks or caveats.""" +_ORACLE_USER_PROMPT_TEMPLATE = """\ Question: {question} {context_section}""" @@ -109,105 +104,79 @@ def __call__( ) -> AskOracleObservation: if not self.profile_name: return AskOracleObservation.from_text( - text="No Oracle LLM profile is configured.", - is_error=True, - profile_name="", - ) - if conversation is None: - return AskOracleObservation.from_text( - text="Cannot ask Oracle without an active conversation.", + text="The Oracle is not configured.", is_error=True, - profile_name=self.profile_name, ) + cipher = conversation._cipher if conversation is not None else None try: oracle_llm = LLMProfileStore(self.profile_store_dir).load( - self.profile_name, cipher=conversation._cipher + self.profile_name, cipher=cipher ) except FileNotFoundError: return AskOracleObservation.from_text( - text=f"Oracle LLM profile '{self.profile_name}' was not found.", + text=( + "The Oracle is not available because its configured profile " + "was not found." + ), is_error=True, - profile_name=self.profile_name, ) except ValueError as exc: return AskOracleObservation.from_text( - text=str(exc), + text=f"The Oracle is not available: {exc}", is_error=True, - profile_name=self.profile_name, ) except Exception as exc: return AskOracleObservation.from_text( - text=( - f"Failed to load Oracle LLM profile '{self.profile_name}': " - f"{type(exc).__name__}: {exc}" - ), + text=f"The Oracle is not available: {type(exc).__name__}: {exc}", is_error=True, - profile_name=self.profile_name, ) - # Lazy import avoids a startup cycle while built-in tools are registered. - from openhands.sdk.agent.utils import ( - make_llm_completion, - prepare_llm_messages, - ) + from openhands.sdk.agent.utils import make_llm_completion from openhands.sdk.llm import Message, TextContent - conversation._ensure_agent_ready() context_section = ( f"\nAdditional context from the agent:\n{action.context}\n" if action.context else "" ) - oracle_prompt = _ORACLE_PROMPT_TEMPLATE.format( + user_prompt = _ORACLE_USER_PROMPT_TEMPLATE.format( question=action.question, context_section=context_section, ) - user_message = Message( - role="user", - content=[TextContent(text=oracle_prompt)], - ) - messages = prepare_llm_messages( - conversation.state.view, additional_messages=[user_message] - ) + messages = [ + Message( + role="system", + content=[TextContent(text=_ORACLE_SYSTEM_PROMPT)], + ), + Message(role="user", content=[TextContent(text=user_prompt)]), + ] try: - response = make_llm_completion( - oracle_llm.model_copy( - update={"usage_id": f"oracle-profile:{self.profile_name}"}, - deep=True, - ), - messages, - tools=list(conversation.agent.tools_map.values()), - ) + llm_response = make_llm_completion(oracle_llm, messages) except Exception as exc: return AskOracleObservation.from_text( text=( - f"Oracle LLM profile '{self.profile_name}' failed: " - f"{type(exc).__name__}: {exc}" + "The Oracle encountered an error and did not return a " + f"response: {type(exc).__name__}: {exc}" ), is_error=True, - profile_name=self.profile_name, - oracle_model=oracle_llm.model, ) oracle_text = "".join( content.text - for content in response.message.content + for content in llm_response.message.content if isinstance(content, TextContent) ).strip() if not oracle_text: return AskOracleObservation.from_text( - text="Oracle did not return a text recommendation.", + text="The Oracle did not return a response.", is_error=True, - profile_name=self.profile_name, - oracle_model=oracle_llm.model, ) return AskOracleObservation.from_text( text=oracle_text, - profile_name=self.profile_name, - oracle_model=oracle_llm.model, + response=oracle_text, ) @@ -233,10 +202,9 @@ def create( "letters, digits, '.', '_', or '-'." ) - profile_display = profile_name or "not configured" return [ cls( - description=_DESCRIPTION_TEMPLATE.format(profile_name=profile_display), + description=_DESCRIPTION, action_type=AskOracleAction, observation_type=AskOracleObservation, executor=AskOracleExecutor(profile_name, profile_store_dir), diff --git a/tests/sdk/tool/test_ask_oracle.py b/tests/sdk/tool/test_ask_oracle.py index da39bd0315..6570912dc1 100644 --- a/tests/sdk/tool/test_ask_oracle.py +++ b/tests/sdk/tool/test_ask_oracle.py @@ -1,13 +1,16 @@ +from collections.abc import Sequence from pathlib import Path +from typing import Any, cast import pytest -from pydantic import ValidationError +from pydantic import PrivateAttr, ValidationError from openhands.sdk import LLM, LocalConversation, OpenHandsAgentSettings, Tool from openhands.sdk.agent import Agent from openhands.sdk.llm import Message, TextContent, llm_profile_store from openhands.sdk.llm.llm_profile_store import LLMProfileStore from openhands.sdk.testing import TestLLM +from openhands.sdk.tool import ToolDefinition from openhands.sdk.tool.builtins import ( AskOracleAction, AskOracleObservation, @@ -15,6 +18,39 @@ ) +class CapturingTestLLM(TestLLM): + _last_messages: list[Message] = PrivateAttr(default_factory=list) + _last_tools: Sequence[ToolDefinition] | None = PrivateAttr(default=None) + + @property + def last_messages(self) -> list[Message]: + return self._last_messages + + @property + def last_tools(self) -> Sequence[ToolDefinition] | None: + return self._last_tools + + def completion( + self, + messages: list[Message], + tools: Sequence[ToolDefinition] | None = None, + _return_metrics: bool = False, + add_security_risk_prediction: bool = False, + on_token: Any = None, + **kwargs: Any, + ): + self._last_messages = list(messages) + self._last_tools = tools + return super().completion( + messages=messages, + tools=tools, + _return_metrics=_return_metrics, + add_security_risk_prediction=add_security_risk_prediction, + on_token=on_token, + **kwargs, + ) + + def _make_llm(model: str, usage_id: str) -> LLM: return TestLLM.from_messages([], model=model, usage_id=usage_id) @@ -23,6 +59,12 @@ def _assistant_message(text: str) -> Message: return Message(role="assistant", content=[TextContent(text=text)]) +def _message_text(message: Message) -> str: + return "".join( + content.text for content in message.content if isinstance(content, TextContent) + ) + + def _make_conversation(profile_name: str = "oracle") -> LocalConversation: return LocalConversation( agent=Agent( @@ -36,11 +78,11 @@ def _make_conversation(profile_name: str = "oracle") -> LocalConversation: ) -def test_ask_oracle_tool_description_names_configured_profile() -> None: +def test_ask_oracle_tool_description_guides_second_opinion_usage() -> None: tool = AskOracleTool.create(profile_name="oracle")[0] assert "Ask the Oracle for a second opinion" in tool.description - assert "Configured Oracle profile: oracle" in tool.description + assert "Treat the Oracle's response as strong guidance" in tool.description def test_ask_oracle_tool_validates_profile_name() -> None: @@ -84,10 +126,13 @@ def test_agent_settings_rejects_invalid_oracle_profile_name() -> None: def test_ask_oracle_tool_returns_oracle_recommendation( monkeypatch: pytest.MonkeyPatch, ) -> None: - oracle_llm = TestLLM.from_messages( - [_assistant_message("Prefer the smaller, typed settings field.")], - model="oracle-model", - usage_id="oracle", + oracle_llm = cast( + CapturingTestLLM, + CapturingTestLLM.from_messages( + [_assistant_message("Prefer the smaller, typed settings field.")], + model="oracle-model", + usage_id="oracle", + ), ) def load_profile( @@ -112,10 +157,18 @@ def load_profile( assert isinstance(observation, AskOracleObservation) assert not observation.is_error - assert observation.profile_name == "oracle" - assert observation.oracle_model == "oracle-model" + assert observation.response == "Prefer the smaller, typed settings field." assert observation.text == "Prefer the smaller, typed settings field." assert "Prefer the smaller" in observation.visualize.plain + assert [message.role for message in oracle_llm.last_messages] == ["system", "user"] + assert "You are the Oracle" in _message_text(oracle_llm.last_messages[0]) + assert "Should I add one setting or two?" in _message_text( + oracle_llm.last_messages[1] + ) + assert "The tool needs an Oracle profile name." in _message_text( + oracle_llm.last_messages[1] + ) + assert oracle_llm.last_tools == [] assert conversation.agent.llm.model == "default-model" assert conversation.state.agent.llm.model == "default-model" @@ -136,8 +189,8 @@ def test_ask_oracle_tool_reports_missing_profile( assert isinstance(observation, AskOracleObservation) assert observation.is_error - assert observation.profile_name == "missing" - assert "was not found" in observation.text + assert observation.response == "" + assert "not available" in observation.text def test_ask_oracle_tool_reports_empty_oracle_response( @@ -167,5 +220,5 @@ def load_profile( assert isinstance(observation, AskOracleObservation) assert observation.is_error - assert observation.oracle_model == "oracle-model" - assert "did not return a text recommendation" in observation.text + assert observation.response == "" + assert "did not return a response" in observation.text From 41f70f7ad3feb2c0af6bf8d4f9487378d5ef10a8 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 13 Jun 2026 03:17:55 +0000 Subject: [PATCH 4/5] Fix ask oracle profile precedence Co-authored-by: openhands --- openhands-sdk/openhands/sdk/settings/model.py | 22 ++++++++++++++---- tests/sdk/tool/test_ask_oracle.py | 23 +++++++++++++++++++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/openhands-sdk/openhands/sdk/settings/model.py b/openhands-sdk/openhands/sdk/settings/model.py index 2b33b58ad9..a4f702d577 100644 --- a/openhands-sdk/openhands/sdk/settings/model.py +++ b/openhands-sdk/openhands/sdk/settings/model.py @@ -1151,10 +1151,24 @@ def create_agent(self) -> Agent: if self.enable_switch_llm_tool: include_default_tools.append(SwitchLLMTool.__name__) - tools = list(self.tools) - if self.oracle_llm_profile and not any( - tool.name == AskOracleTool.name for tool in tools - ): + tools = [] + has_oracle_tool = False + for tool in self.tools: + if self.oracle_llm_profile and tool.name == AskOracleTool.name: + has_oracle_tool = True + tools.append( + tool.model_copy( + update={ + "params": { + **tool.params, + "profile_name": self.oracle_llm_profile, + } + } + ) + ) + else: + tools.append(tool) + if self.oracle_llm_profile and not has_oracle_tool: tools.append( Tool( name=AskOracleTool.name, diff --git a/tests/sdk/tool/test_ask_oracle.py b/tests/sdk/tool/test_ask_oracle.py index 6570912dc1..3ae56241da 100644 --- a/tests/sdk/tool/test_ask_oracle.py +++ b/tests/sdk/tool/test_ask_oracle.py @@ -106,6 +106,29 @@ def test_agent_settings_adds_ask_oracle_tool_when_profile_is_configured() -> Non assert "ask_oracle" in agent.tools_map +def test_agent_settings_configured_profile_updates_existing_ask_oracle_tool() -> None: + agent = OpenHandsAgentSettings( + llm=_make_llm("default-model", "default"), + oracle_llm_profile="oracle", + tools=[ + Tool( + name=AskOracleTool.name, + params={ + "profile_name": "stale", + "profile_store_dir": "/tmp/profiles", + }, + ) + ], + ).create_agent() + + oracle_tools = [tool for tool in agent.tools if tool.name == AskOracleTool.name] + assert len(oracle_tools) == 1 + assert oracle_tools[0].params == { + "profile_name": "oracle", + "profile_store_dir": "/tmp/profiles", + } + + def test_agent_settings_omits_ask_oracle_tool_without_profile() -> None: agent = OpenHandsAgentSettings( llm=_make_llm("default-model", "default"), From 994c50802f6ccc0d6395763e6e5bc3554a0518bd Mon Sep 17 00:00:00 2001 From: enyst Date: Mon, 15 Jun 2026 23:35:31 +0000 Subject: [PATCH 5/5] Clarify SDK built-in tool grouping Co-authored-by: openhands --- openhands-sdk/openhands/sdk/tool/builtins/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/openhands-sdk/openhands/sdk/tool/builtins/__init__.py b/openhands-sdk/openhands/sdk/tool/builtins/__init__.py index 3a7bcc85b2..53b18b116c 100644 --- a/openhands-sdk/openhands/sdk/tool/builtins/__init__.py +++ b/openhands-sdk/openhands/sdk/tool/builtins/__init__.py @@ -1,8 +1,9 @@ -"""Implementing essential tools that doesn't interact with the environment. +"""SDK-resident tools that do not interact with the environment. -These are built in and are *required* for the agent to work. +`BUILT_IN_TOOLS` contains tools attached to every agent. `BUILT_IN_TOOL_CLASSES` +also includes optional SDK tools that are resolved by name from agent setup. -For tools that require interacting with the environment, add them to `openhands-tools`. +Tools that require interacting with the environment belong in `openhands-tools`. """ from openhands.sdk.tool.builtins.ask_oracle import (