Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .pr/ask_oracle_live_validation.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"started_at": "2026-06-12T00:04:04.460387+00:00",
"finished_at": "2026-06-12T00:04:11.102035+00:00",
"issue": "https://github.com/OpenHands/software-agent-sdk/issues/3672",
"primary_profile": {
"model": "openai/gpt-5-nano",
"provider": "OpenAI direct",
"usage_id": "ask-oracle-live-primary",
"response_text": "primary profile live check ok",
"succeeded": true
},
"oracle_profile": {
"profile_name": "oracle",
"model": "litellm_proxy/openai/gpt-5-mini",
"base_url": "https://llm-proxy.eval.all-hands.dev",
"base_url_source": "openhands-sdk/openhands/sdk/agent/base.py",
"usage_id": "ask-oracle-live-oracle"
},
"ask_oracle_tool": {
"registered_tool_names": [
"ask_oracle",
"finish",
"think"
],
"observation_is_error": false,
"observation_response": "- I am the Oracle profile: gpt-5-mini accessed via the eval LiteLLM proxy. \n- Ask for a second opinion because it can expose model blind spots or hallucinations and suggest alternative strategies; caveat \u2014 validate the second opinion against facts/constraints and avoid sharing sensitive data.",
"observation_text": "- I am the Oracle profile: gpt-5-mini accessed via the eval LiteLLM proxy. \n- Ask for a second opinion because it can expose model blind spots or hallucinations and suggest alternative strategies; caveat \u2014 validate the second opinion against facts/constraints and avoid sharing sensitive data."
}
}
124 changes: 124 additions & 0 deletions .pr/ask_oracle_live_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import json
import os
import shutil
import tempfile
from datetime import UTC, datetime
from pathlib import Path

from pydantic import SecretStr

from openhands.sdk import (
LLM,
LLMProfileStore,
LocalConversation,
OpenHandsAgentSettings,
)
from openhands.sdk.agent.utils import make_llm_completion
from openhands.sdk.llm import Message, TextContent, llm_profile_store
from openhands.sdk.tool.builtins import AskOracleAction


RESULT_PATH = Path(__file__).with_name("ask_oracle_live_validation.json")
PRIMARY_MODEL = "openai/gpt-5-nano"
ORACLE_MODEL = "litellm_proxy/openai/gpt-5-mini"
ORACLE_BASE_URL = "https://llm-proxy.eval.all-hands.dev"


def first_text(message: Message) -> str:
return "".join(
content.text for content in message.content if isinstance(content, TextContent)
).strip()


started_at = datetime.now(UTC).isoformat()
profile_store_dir = Path(tempfile.mkdtemp()) / "profiles"
setattr(llm_profile_store, "_DEFAULT_PROFILE_DIR", profile_store_dir)

try:
primary_llm = LLM(
model=PRIMARY_MODEL,
api_key=SecretStr(os.environ["OPENAI_API_KEY"]),
usage_id="ask-oracle-live-primary",
max_output_tokens=1000,
reasoning_effort="low",
)
oracle_llm = LLM(
model=ORACLE_MODEL,
api_key=SecretStr(os.environ["LITELLM_API_KEY"]),
base_url=ORACLE_BASE_URL,
usage_id="ask-oracle-live-oracle",
max_output_tokens=1000,
reasoning_effort="low",
)

primary_response = make_llm_completion(
primary_llm,
[
Message(
role="user",
content=[
TextContent(
text=("Reply with exactly: primary profile live check ok")
)
],
)
],
)
primary_text = first_text(primary_response.message)

store = LLMProfileStore()
store.save("oracle", oracle_llm, include_secrets=True)

settings = OpenHandsAgentSettings(
llm=primary_llm,
oracle_llm_profile="oracle",
enable_switch_llm_tool=False,
)
agent = settings.create_agent()
conversation = LocalConversation(agent=agent, workspace=Path.cwd())
conversation._ensure_agent_ready()
observation = conversation.execute_tool(
"ask_oracle",
AskOracleAction(
question=(
"Answer in two concise bullets: confirm that you are the Oracle "
"profile and give one practical reason an agent should ask for a "
"second opinion when stuck."
),
context=(
"The active LLM profile is OpenAI direct gpt-5-nano. The Oracle "
"profile is gpt-5-mini through the eval LiteLLM proxy."
),
),
)

result = {
"started_at": started_at,
"finished_at": datetime.now(UTC).isoformat(),
"issue": "https://github.com/OpenHands/software-agent-sdk/issues/3672",
"primary_profile": {
"model": PRIMARY_MODEL,
"provider": "OpenAI direct",
"usage_id": primary_llm.usage_id,
"response_text": primary_text,
"succeeded": bool(primary_text),
},
"oracle_profile": {
"profile_name": "oracle",
"model": ORACLE_MODEL,
"base_url": ORACLE_BASE_URL,
"base_url_source": "openhands-sdk/openhands/sdk/agent/base.py",
"usage_id": "ask-oracle-live-oracle",
},
"ask_oracle_tool": {
"registered_tool_names": sorted(agent.tools_map),
"observation_is_error": observation.is_error,
"observation_response": observation.response,
"observation_text": observation.text,
},
}
finally:
shutil.rmtree(profile_store_dir.parent, ignore_errors=True)

RESULT_PATH.write_text(json.dumps(result, indent=2) + "\n")
print(json.dumps(result, indent=2))
21 changes: 21 additions & 0 deletions .pr/ask_oracle_test_results.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"targeted_pytest": {
"command": "uv run pytest tests/sdk/tool/test_ask_oracle.py tests/sdk/tool/test_builtins.py tests/sdk/test_settings.py::test_llm_agent_settings_export_schema_groups_sections tests/sdk/test_settings.py::test_export_agent_settings_schema_emits_variant_tagged_sections tests/examples/test_examples.py::test_directory_example_is_discovered",
"result": "12 passed",
"warnings": 5
},
"example_pytest": {
"command": "uv run pytest tests/examples/test_examples.py --run-examples -k 54_ask_oracle_tool",
"result": "1 passed, 64 deselected",
"warnings": 5
},
"pre_commit": {
"command": "uv run pre-commit run --files <changed ask_oracle files>",
"result": "passed"
},
"live_validation": {
"command": "OPENHANDS_SUPPRESS_BANNER=1 OPENAI_API_KEY=$OPENAI_API_KEY LITELLM_API_KEY=$LITELLM_API_KEY uv run python .pr/ask_oracle_live_validation.py",
"result_file": ".pr/ask_oracle_live_validation.json",
"result": "passed"
}
}
66 changes: 66 additions & 0 deletions .pr/ask_oracle_validation_summary.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# ask_oracle validation evidence

This directory is temporary PR evidence for the `ask_oracle` tool implementation.

## What changed

- Added `ask_oracle`, a read-only built-in SDK tool that loads a saved LLM profile and asks it for stateless second-opinion advice.
- Added `OpenHandsAgentSettings.oracle_llm_profile`; setting this saved profile name makes the tool available on the standard OpenHands agent.
- The active conversation LLM is not switched. The Oracle call sends only the Oracle system prompt plus the agent's question and optional context, without forwarding conversation history or tools.

## Live validation

Evidence file: `.pr/ask_oracle_live_validation.json`

Command run:

```bash
OPENHANDS_SUPPRESS_BANNER=1 \
OPENAI_API_KEY="$OPENAI_API_KEY" \
LITELLM_API_KEY="$LITELLM_API_KEY" \
uv run python .pr/ask_oracle_live_validation.py
```

Validated profiles:

- Regular profile: `openai/gpt-5-nano` with OpenAI direct API key.
- Oracle profile: `litellm_proxy/openai/gpt-5-mini` with the eval LiteLLM key.
- Eval proxy base URL: `https://llm-proxy.eval.all-hands.dev`, found in `openhands-sdk/openhands/sdk/agent/base.py`.

Result summary:

- Primary direct OpenAI profile returned: `primary profile live check ok`.
- `ask_oracle` loaded the saved `oracle` profile from an isolated temporary profile store.
- Tool observation was successful (`observation_is_error: false`).
- Oracle response identified itself as the Oracle profile and explained why an agent should ask for a second opinion when stuck.
- The temporary profile store was removed in a `finally` block after the run.

## Validation commands

Pre-commit command run on changed files: passed.

### Targeted tests

Command run:

```bash
uv run pytest \
tests/sdk/tool/test_ask_oracle.py \
tests/sdk/tool/test_builtins.py \
tests/sdk/test_settings.py::test_llm_agent_settings_export_schema_groups_sections \
tests/sdk/test_settings.py::test_export_agent_settings_schema_emits_variant_tagged_sections \
tests/examples/test_examples.py::test_directory_example_is_discovered
```

Result: `12 passed`.

### Example execution

Command run:

```bash
uv run pytest tests/examples/test_examples.py --run-examples -k 54_ask_oracle_tool
```

Result: `1 passed, 64 deselected`.

82 changes: 82 additions & 0 deletions examples/01_standalone_sdk/54_ask_oracle_tool/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
"""Example: configure an Oracle LLM profile for the ask_oracle tool.

Set `OPENAI_API_KEY` for the primary OpenAI profile and `LITELLM_API_KEY` for
the eval proxy Oracle profile before running live. Optional overrides:

ASK_ORACLE_PRIMARY_MODEL=openai/gpt-5-nano
ASK_ORACLE_MODEL=litellm_proxy/openai/gpt-5-mini
ASK_ORACLE_BASE_URL=https://llm-proxy.eval.all-hands.dev
"""

import os
import shutil
import tempfile
from pathlib import Path

from pydantic import SecretStr

from openhands.sdk import (
LLM,
LLMProfileStore,
LocalConversation,
OpenHandsAgentSettings,
)
from openhands.sdk.llm import llm_profile_store
from openhands.sdk.tool.builtins import AskOracleAction


primary_api_key = os.getenv("OPENAI_API_KEY") or os.getenv("LLM_API_KEY")
oracle_api_key = os.getenv("LITELLM_API_KEY") or os.getenv("LLM_API_KEY")

if not primary_api_key or not oracle_api_key:
print(
"Set OPENAI_API_KEY (or LLM_API_KEY) and LITELLM_API_KEY "
"to run the live ask_oracle example."
)
print("EXAMPLE_COST: 0")
raise SystemExit(0)

profile_store_dir = Path(tempfile.mkdtemp()) / "profiles"
setattr(llm_profile_store, "_DEFAULT_PROFILE_DIR", profile_store_dir)
store = LLMProfileStore()

primary_llm = LLM(
model=os.getenv("ASK_ORACLE_PRIMARY_MODEL", "openai/gpt-5-nano"),
api_key=SecretStr(primary_api_key),
usage_id="ask-oracle-example-primary",
max_output_tokens=1000,
reasoning_effort="low",
)
oracle_llm = LLM(
model=os.getenv("ASK_ORACLE_MODEL", "litellm_proxy/openai/gpt-5-mini"),
api_key=SecretStr(oracle_api_key),
base_url=os.getenv("ASK_ORACLE_BASE_URL", "https://llm-proxy.eval.all-hands.dev"),
usage_id="ask-oracle-example-oracle",
max_output_tokens=1000,
reasoning_effort="low",
)

try:
store.save("oracle", oracle_llm, include_secrets=True)
settings = OpenHandsAgentSettings(llm=primary_llm, oracle_llm_profile="oracle")
agent = settings.create_agent()
conversation = LocalConversation(agent=agent, workspace=Path.cwd())
conversation._ensure_agent_ready()

print(f"Configured tools: {sorted(agent.tools_map)}")
observation = conversation.execute_tool(
"ask_oracle",
AskOracleAction(
question=(
"In one sentence, recommend whether a feature flag should be stored "
"as one nullable setting or as a separate boolean plus string."
),
context="Prefer the simplest backwards-compatible SDK settings design.",
),
)

print("Oracle said:")
print(observation.text)
print("EXAMPLE_COST: 0")
finally:
shutil.rmtree(profile_store_dir.parent, ignore_errors=True)
Loading
Loading