Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def _compose_conversation_info(
# The ``acp_model`` fallback is gated on the agent NOT being a live,
# initialized one. Once ``init_state`` has fired, ``current_model_id`` is the
# authoritative resolved value — including ``None`` when an override couldn't
# be applied (unknown provider, or a resume whose ``set_session_model`` the
# be applied (unknown provider, or a resume whose model-selection call the
# server rejected) — so falling back to ``acp_model`` there would re-assert an
# override the live session isn't actually running. The fallback is only for
# *cold* reads (``init_state`` hasn't fired, PrivateAttrs still empty), where
Expand Down
10 changes: 5 additions & 5 deletions openhands-agent-server/openhands/agent_server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,13 +239,13 @@ class _ConversationInfoBase(BaseModel):
supports_runtime_model_switch: bool = Field(
default=False,
description=(
"Whether a live, mid-conversation model switch (via "
"``session/set_model``) will be attempted for this conversation — "
"Whether a live, mid-conversation model switch will be attempted "
"for this conversation — "
"tells the inline picker whether to offer a live-switch control. "
"Mirrors the SDK's switch gate: ``True`` for known switch-capable "
"providers; ``True`` for unknown/custom ACP servers too, since "
"OpenHands attempts the switch optimistically rather than refusing "
"(a rejection then surfaces as an error). ``False`` for native "
"providers; ``False`` for unknown/custom ACP servers because their "
"generic config writes are not guaranteed live-switch primitives. "
"``False`` for native "
"OpenHands agents, for a known provider that declares no support, "
"and before the conversation has started a session."
),
Expand Down
51 changes: 44 additions & 7 deletions openhands-sdk/openhands/sdk/agent/acp_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,30 @@ def _write_secret_file(path: Path, value: str) -> None:
# (codex-acp 0.16+, claude-agent-acp 0.44+) rather than the UNSTABLE ``models``
# capability + ``session/set_model`` (gemini-cli, older codex/claude).
_MODEL_CONFIG_OPTION_ID = "model"
_CODEX_REASONING_EFFORTS: Final[frozenset[str]] = frozenset(
{"low", "medium", "high", "xhigh"}
)


def _codex_model_config_options(model: str) -> tuple[tuple[str, str], ...]:
"""Map combined Canvas Codex model IDs to codex-acp config options."""
base_model, sep, effort = model.rpartition("/")
if sep and base_model and effort in _CODEX_REASONING_EFFORTS:
return (
(_MODEL_CONFIG_OPTION_ID, base_model),
("reasoning_effort", effort),
)
return ((_MODEL_CONFIG_OPTION_ID, model),)


def _model_config_options(
agent_name: str | None,
model: str,
) -> tuple[tuple[str, str], ...]:
provider = detect_acp_provider_by_agent_name(agent_name or "")
if provider is not None and provider.key == "codex":
return _codex_model_config_options(model)
return ((_MODEL_CONFIG_OPTION_ID, model),)


def _model_config_option(response: Any) -> Any | None:
Expand Down Expand Up @@ -432,19 +456,23 @@ async def _apply_acp_model(
session_id: str,
model: str,
*,
agent_name: str | None = None,
via_config_option: bool,
) -> None:
"""Apply ``model`` to a live ACP session via the mechanism the session
advertised: ``set_config_option(configId="model")`` for configOptions-based
servers (codex-acp 0.16+, claude-agent-acp 0.44+), else ``set_session_model``.

The model id is a bare preset id the server lists in its ``model`` select /
``models`` capability — applied as-is on either mechanism.
The model id is normally the bare preset id listed by the server. For
Codex, callers may still pass a combined Canvas id such as ``gpt-5.5/high``;
codex-acp exposes reasoning effort as a separate config option, so split it
only on the config-options mechanism.
"""
if via_config_option:
await conn.set_config_option(
config_id=_MODEL_CONFIG_OPTION_ID, value=model, session_id=session_id
)
for config_id, value in _model_config_options(agent_name, model):
await conn.set_config_option(
config_id=config_id, value=value, session_id=session_id
)
else:
await conn.set_session_model(model_id=model, session_id=session_id)

Expand Down Expand Up @@ -665,7 +693,11 @@ async def _maybe_set_session_model(
# session won't accept degrades to the server default rather than failing.
try:
await _apply_acp_model(
conn, session_id, acp_model, via_config_option=via_config_option
conn,
session_id,
acp_model,
agent_name=agent_name,
via_config_option=via_config_option,
)
return True
except ACPRequestError as e:
Expand Down Expand Up @@ -715,7 +747,11 @@ async def _reapply_session_model_on_resume(
return False
try:
await _apply_acp_model(
conn, session_id, acp_model, via_config_option=via_config_option
conn,
session_id,
acp_model,
agent_name=agent_name,
via_config_option=via_config_option,
)
return True
except ACPRequestError as e:
Expand Down Expand Up @@ -3751,6 +3787,7 @@ def set_acp_model(self, model: str) -> None:
self._conn,
self._session_id,
model,
agent_name=self._agent_name,
via_config_option=self._model_via_config_option,
),
timeout=self.acp_prompt_timeout,
Expand Down
2 changes: 1 addition & 1 deletion openhands-sdk/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description = "OpenHands SDK - Core functionality for building AI agents"

requires-python = ">=3.12"
dependencies = [
"agent-client-protocol>=0.8.1",
"agent-client-protocol>=0.10.1",
Comment thread
neubig marked this conversation as resolved.
"deprecation>=2.1.0",
"fakeredis[lua]>=2.32.1", # Explicit dependency for docket/fastmcp background tasks
"fastmcp>=3.0.0",
Expand Down
2 changes: 1 addition & 1 deletion tests/agent_server/test_conversation_info_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def test_cold_read_falls_back_to_acp_model_override():

def test_live_agent_does_not_fall_back_to_unapplied_override():
"""Live initialized agent whose override was NOT applied (e.g. a resume whose
``set_session_model`` the server rejected): ``current_model_id`` is the
model-selection call the server rejected): ``current_model_id`` is the
authoritative ``None`` and must NOT fall back to ``acp_model`` — that would
re-assert an override the live session isn't running.
"""
Expand Down
12 changes: 6 additions & 6 deletions tests/agent_server/test_conversation_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -1211,7 +1211,7 @@ def test_switch_acp_model_inactive_service_returns_400(
def test_switch_acp_model_protocol_error_returns_400(
client, mock_conversation_service, mock_event_service, sample_conversation_id
):
"""A rejected ACP ``session/set_model`` call maps to 400, not 500.
"""A rejected ACP model-selection call maps to 400, not 500.

``ACPAgent.set_acp_model`` translates ``acp.exceptions.RequestError`` (e.g.
method-not-found on a custom server, or an invalid model id) into a
Expand All @@ -1220,7 +1220,7 @@ def test_switch_acp_model_protocol_error_returns_400(
"""
mock_conversation_service.get_event_service.return_value = mock_event_service
mock_event_service.switch_acp_model.side_effect = ValueError(
"ACP server rejected set_session_model(model='bogus'): method not found"
"ACP server rejected model switch to 'bogus': method not found"
)

client.app.dependency_overrides[get_conversation_service] = (
Expand All @@ -1241,13 +1241,13 @@ def test_switch_acp_model_timeout_returns_504(
):
"""A TimeoutError (wedged/slow ACP server) maps to 504, not 500.

``ACPAgent.set_acp_model`` bounds the ``session/set_model`` round-trip with
``acp_prompt_timeout``; an expired call raises ``TimeoutError``, which the
route surfaces as a Gateway Timeout rather than an opaque 500.
``ACPAgent.set_acp_model`` bounds the provider model-selection round-trip
with ``acp_prompt_timeout``; an expired call raises ``TimeoutError``, which
the route surfaces as a Gateway Timeout rather than an opaque 500.
"""
mock_conversation_service.get_event_service.return_value = mock_event_service
mock_event_service.switch_acp_model.side_effect = TimeoutError(
"ACP server did not answer set_session_model within 600s"
"ACP server did not answer model switch within 600s"
)

client.app.dependency_overrides[get_conversation_service] = (
Expand Down
108 changes: 101 additions & 7 deletions tests/sdk/agent/test_acp_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from unittest.mock import AsyncMock, MagicMock, patch
from unittest.mock import AsyncMock, MagicMock, call, patch

import pytest
from acp.exceptions import RequestError as ACPRequestError
Expand All @@ -27,6 +27,7 @@
_classify_acp_turn_error,
_codex_auth_file,
_codex_base_url_overrides,
_codex_model_config_options,
_estimate_cost_from_tokens,
_extract_session_models,
_extract_token_usage,
Expand Down Expand Up @@ -4727,6 +4728,20 @@ def test_noop_when_caller_already_set_provider(self):
)


class TestCodexModelConfigOptions:
def test_splits_combined_model_and_reasoning_effort(self):
assert _codex_model_config_options("gpt-5.5/high") == (
("model", "gpt-5.5"),
("reasoning_effort", "high"),
)

def test_leaves_base_or_custom_model_id_unchanged(self):
assert _codex_model_config_options("gpt-5.5") == (("model", "gpt-5.5"),)
assert _codex_model_config_options("custom/provider/model") == (
("model", "custom/provider/model"),
)


# ---------------------------------------------------------------------------
# ACP model overrides
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -4769,6 +4784,28 @@ async def test_set_config_option_mechanism(self):
conn.set_session_model.assert_not_called()
assert applied is True

@pytest.mark.asyncio
async def test_codex_config_option_splits_reasoning_effort(self):
# Canvas may persist Codex ids as ``model/effort``; codex-acp 0.16
# exposes effort as its own config option.
conn = AsyncMock()
applied = await _maybe_set_session_model(
conn, "codex-acp", "session-1", "gpt-5.4/low", via_config_option=True
)
conn.set_session_model.assert_not_called()
conn.set_config_option.assert_has_awaits(
[
call(config_id="model", value="gpt-5.4", session_id="session-1"),
call(
config_id="reasoning_effort",
value="low",
session_id="session-1",
),
]
)
assert conn.set_config_option.await_count == 2
assert applied is True

@pytest.mark.asyncio
async def test_missing_model_skips_protocol_override(self):
conn = AsyncMock()
Expand Down Expand Up @@ -4873,6 +4910,26 @@ async def test_reapply_via_set_config_option_mechanism(self):
conn.set_session_model.assert_not_called()
assert applied is True

@pytest.mark.asyncio
async def test_codex_reapply_splits_reasoning_effort(self):
conn = AsyncMock()
applied = await _reapply_session_model_on_resume(
conn, "codex-acp", "sess-1", "gpt-5.4/low", via_config_option=True
)
conn.set_session_model.assert_not_called()
conn.set_config_option.assert_has_awaits(
[
call(config_id="model", value="gpt-5.4", session_id="sess-1"),
call(
config_id="reasoning_effort",
value="low",
session_id="sess-1",
),
]
)
assert conn.set_config_option.await_count == 2
assert applied is True

@pytest.mark.asyncio
async def test_missing_model_skips_reapply(self):
conn = AsyncMock()
Expand Down Expand Up @@ -5045,7 +5102,7 @@ def test_switches_model_on_live_codex_session(self):

def test_switches_codex_via_config_option_single_call(self):
# codex-acp 0.16 configOptions: the bare preset id applies as a single
# `model` selection (reasoning effort is a separate, unmanaged option).
# `model` selection.
agent = self._wire(_make_agent(), "codex-acp", via_config_option=True)
agent.set_acp_model("gpt-5.5")
agent._conn.set_config_option.assert_awaited_once_with(
Expand All @@ -5055,6 +5112,24 @@ def test_switches_codex_via_config_option_single_call(self):
assert agent.llm.model == "gpt-5.5"
assert agent._current_model_id == "gpt-5.5"

def test_switches_codex_via_config_option_splits_reasoning_effort(self):
agent = self._wire(_make_agent(), "codex-acp", via_config_option=True)
agent.set_acp_model("gpt-5.5/high")
agent._conn.set_config_option.assert_has_awaits(
[
call(config_id="model", value="gpt-5.5", session_id="sess-1"),
call(
config_id="reasoning_effort",
value="high",
session_id="sess-1",
),
]
)
assert agent._conn.set_config_option.await_count == 2
agent._conn.set_session_model.assert_not_called()
assert agent.llm.model == "gpt-5.5/high"
assert agent._current_model_id == "gpt-5.5/high"

def test_switches_claude_via_config_option_single_call(self):
# A bare id (no `/`) applies as a single `model` selection — no effort.
agent = self._wire(_make_agent(), "claude-agent-acp", via_config_option=True)
Expand Down Expand Up @@ -7742,11 +7817,7 @@ async def test_set_session_model_rejection_propagates(self):


class TestApplyAcpModel:
"""``_apply_acp_model`` sends the model id verbatim through the mechanism the
session advertised — the ``model`` configOption select or
``set_session_model`` — with no id rewriting. (codex 0.16 exposes reasoning
effort as a separate, unmanaged ``reasoning_effort`` configOption, so the
model ids are bare presets on every provider.)"""
"""``_apply_acp_model`` uses the mechanism the session advertised."""

@pytest.mark.asyncio
async def test_config_option_single_call(self):
Expand All @@ -7768,6 +7839,29 @@ async def test_set_session_model_branch(self):
)
conn.set_config_option.assert_not_called()

@pytest.mark.asyncio
async def test_codex_config_option_splits_reasoning_effort(self):
conn = AsyncMock()
await _apply_acp_model(
conn,
"sess-1",
"gpt-5.5/xhigh",
agent_name="codex-acp",
via_config_option=True,
)
conn.set_config_option.assert_has_awaits(
[
call(config_id="model", value="gpt-5.5", session_id="sess-1"),
call(
config_id="reasoning_effort",
value="xhigh",
session_id="sess-1",
),
]
)
assert conn.set_config_option.await_count == 2
conn.set_session_model.assert_not_called()


class TestACPAgentAvailableModelsProperty:
"""``available_models`` exposes the server's model list verbatim.
Expand Down
8 changes: 4 additions & 4 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading