diff --git a/openhands-agent-server/openhands/agent_server/conversation_service.py b/openhands-agent-server/openhands/agent_server/conversation_service.py index 0e11509da7..223a4475a3 100644 --- a/openhands-agent-server/openhands/agent_server/conversation_service.py +++ b/openhands-agent-server/openhands/agent_server/conversation_service.py @@ -321,7 +321,7 @@ def _compose_conversation_info( # The ``acp_model`` fallback is gated on the agent NOT being a live, # initialized one. Once ``init_state`` has fired, ``current_model_id`` is the # authoritative resolved value — including ``None`` when an override couldn't - # be applied (unknown provider, or a resume whose ``set_session_model`` the + # be applied (unknown provider, or a resume whose model-selection call the # server rejected) — so falling back to ``acp_model`` there would re-assert an # override the live session isn't actually running. The fallback is only for # *cold* reads (``init_state`` hasn't fired, PrivateAttrs still empty), where diff --git a/openhands-agent-server/openhands/agent_server/models.py b/openhands-agent-server/openhands/agent_server/models.py index 132067d114..c94cd89ca1 100644 --- a/openhands-agent-server/openhands/agent_server/models.py +++ b/openhands-agent-server/openhands/agent_server/models.py @@ -239,13 +239,13 @@ class _ConversationInfoBase(BaseModel): supports_runtime_model_switch: bool = Field( default=False, description=( - "Whether a live, mid-conversation model switch (via " - "``session/set_model``) will be attempted for this conversation — " + "Whether a live, mid-conversation model switch will be attempted " + "for this conversation — " "tells the inline picker whether to offer a live-switch control. " "Mirrors the SDK's switch gate: ``True`` for known switch-capable " - "providers; ``True`` for unknown/custom ACP servers too, since " - "OpenHands attempts the switch optimistically rather than refusing " - "(a rejection then surfaces as an error). ``False`` for native " + "providers; ``False`` for unknown/custom ACP servers because their " + "generic config writes are not guaranteed live-switch primitives. " + "``False`` for native " "OpenHands agents, for a known provider that declares no support, " "and before the conversation has started a session." ), diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index adee0aa490..7d44628992 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -400,6 +400,30 @@ def _write_secret_file(path: Path, value: str) -> None: # (codex-acp 0.16+, claude-agent-acp 0.44+) rather than the UNSTABLE ``models`` # capability + ``session/set_model`` (gemini-cli, older codex/claude). _MODEL_CONFIG_OPTION_ID = "model" +_CODEX_REASONING_EFFORTS: Final[frozenset[str]] = frozenset( + {"low", "medium", "high", "xhigh"} +) + + +def _codex_model_config_options(model: str) -> tuple[tuple[str, str], ...]: + """Map combined Canvas Codex model IDs to codex-acp config options.""" + base_model, sep, effort = model.rpartition("/") + if sep and base_model and effort in _CODEX_REASONING_EFFORTS: + return ( + (_MODEL_CONFIG_OPTION_ID, base_model), + ("reasoning_effort", effort), + ) + return ((_MODEL_CONFIG_OPTION_ID, model),) + + +def _model_config_options( + agent_name: str | None, + model: str, +) -> tuple[tuple[str, str], ...]: + provider = detect_acp_provider_by_agent_name(agent_name or "") + if provider is not None and provider.key == "codex": + return _codex_model_config_options(model) + return ((_MODEL_CONFIG_OPTION_ID, model),) def _model_config_option(response: Any) -> Any | None: @@ -432,19 +456,23 @@ async def _apply_acp_model( session_id: str, model: str, *, + agent_name: str | None = None, via_config_option: bool, ) -> None: """Apply ``model`` to a live ACP session via the mechanism the session advertised: ``set_config_option(configId="model")`` for configOptions-based servers (codex-acp 0.16+, claude-agent-acp 0.44+), else ``set_session_model``. - The model id is a bare preset id the server lists in its ``model`` select / - ``models`` capability — applied as-is on either mechanism. + The model id is normally the bare preset id listed by the server. For + Codex, callers may still pass a combined Canvas id such as ``gpt-5.5/high``; + codex-acp exposes reasoning effort as a separate config option, so split it + only on the config-options mechanism. """ if via_config_option: - await conn.set_config_option( - config_id=_MODEL_CONFIG_OPTION_ID, value=model, session_id=session_id - ) + for config_id, value in _model_config_options(agent_name, model): + await conn.set_config_option( + config_id=config_id, value=value, session_id=session_id + ) else: await conn.set_session_model(model_id=model, session_id=session_id) @@ -665,7 +693,11 @@ async def _maybe_set_session_model( # session won't accept degrades to the server default rather than failing. try: await _apply_acp_model( - conn, session_id, acp_model, via_config_option=via_config_option + conn, + session_id, + acp_model, + agent_name=agent_name, + via_config_option=via_config_option, ) return True except ACPRequestError as e: @@ -715,7 +747,11 @@ async def _reapply_session_model_on_resume( return False try: await _apply_acp_model( - conn, session_id, acp_model, via_config_option=via_config_option + conn, + session_id, + acp_model, + agent_name=agent_name, + via_config_option=via_config_option, ) return True except ACPRequestError as e: @@ -3751,6 +3787,7 @@ def set_acp_model(self, model: str) -> None: self._conn, self._session_id, model, + agent_name=self._agent_name, via_config_option=self._model_via_config_option, ), timeout=self.acp_prompt_timeout, diff --git a/openhands-sdk/pyproject.toml b/openhands-sdk/pyproject.toml index 38713baa44..1e88bf80ee 100644 --- a/openhands-sdk/pyproject.toml +++ b/openhands-sdk/pyproject.toml @@ -5,7 +5,7 @@ description = "OpenHands SDK - Core functionality for building AI agents" requires-python = ">=3.12" dependencies = [ - "agent-client-protocol>=0.8.1", + "agent-client-protocol>=0.10.1", "deprecation>=2.1.0", "fakeredis[lua]>=2.32.1", # Explicit dependency for docket/fastmcp background tasks "fastmcp>=3.0.0", diff --git a/tests/agent_server/test_conversation_info_model.py b/tests/agent_server/test_conversation_info_model.py index 975a655ede..89ae9ed4ec 100644 --- a/tests/agent_server/test_conversation_info_model.py +++ b/tests/agent_server/test_conversation_info_model.py @@ -156,7 +156,7 @@ def test_cold_read_falls_back_to_acp_model_override(): def test_live_agent_does_not_fall_back_to_unapplied_override(): """Live initialized agent whose override was NOT applied (e.g. a resume whose - ``set_session_model`` the server rejected): ``current_model_id`` is the + model-selection call the server rejected): ``current_model_id`` is the authoritative ``None`` and must NOT fall back to ``acp_model`` — that would re-assert an override the live session isn't running. """ diff --git a/tests/agent_server/test_conversation_router.py b/tests/agent_server/test_conversation_router.py index 62355456d7..51a570487c 100644 --- a/tests/agent_server/test_conversation_router.py +++ b/tests/agent_server/test_conversation_router.py @@ -1211,7 +1211,7 @@ def test_switch_acp_model_inactive_service_returns_400( def test_switch_acp_model_protocol_error_returns_400( client, mock_conversation_service, mock_event_service, sample_conversation_id ): - """A rejected ACP ``session/set_model`` call maps to 400, not 500. + """A rejected ACP model-selection call maps to 400, not 500. ``ACPAgent.set_acp_model`` translates ``acp.exceptions.RequestError`` (e.g. method-not-found on a custom server, or an invalid model id) into a @@ -1220,7 +1220,7 @@ def test_switch_acp_model_protocol_error_returns_400( """ mock_conversation_service.get_event_service.return_value = mock_event_service mock_event_service.switch_acp_model.side_effect = ValueError( - "ACP server rejected set_session_model(model='bogus'): method not found" + "ACP server rejected model switch to 'bogus': method not found" ) client.app.dependency_overrides[get_conversation_service] = ( @@ -1241,13 +1241,13 @@ def test_switch_acp_model_timeout_returns_504( ): """A TimeoutError (wedged/slow ACP server) maps to 504, not 500. - ``ACPAgent.set_acp_model`` bounds the ``session/set_model`` round-trip with - ``acp_prompt_timeout``; an expired call raises ``TimeoutError``, which the - route surfaces as a Gateway Timeout rather than an opaque 500. + ``ACPAgent.set_acp_model`` bounds the provider model-selection round-trip + with ``acp_prompt_timeout``; an expired call raises ``TimeoutError``, which + the route surfaces as a Gateway Timeout rather than an opaque 500. """ mock_conversation_service.get_event_service.return_value = mock_event_service mock_event_service.switch_acp_model.side_effect = TimeoutError( - "ACP server did not answer set_session_model within 600s" + "ACP server did not answer model switch within 600s" ) client.app.dependency_overrides[get_conversation_service] = ( diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index aaad4b2ec7..e81399c3a9 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -11,7 +11,7 @@ from pathlib import Path from types import SimpleNamespace from typing import Any -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import AsyncMock, MagicMock, call, patch import pytest from acp.exceptions import RequestError as ACPRequestError @@ -27,6 +27,7 @@ _classify_acp_turn_error, _codex_auth_file, _codex_base_url_overrides, + _codex_model_config_options, _estimate_cost_from_tokens, _extract_session_models, _extract_token_usage, @@ -4727,6 +4728,20 @@ def test_noop_when_caller_already_set_provider(self): ) +class TestCodexModelConfigOptions: + def test_splits_combined_model_and_reasoning_effort(self): + assert _codex_model_config_options("gpt-5.5/high") == ( + ("model", "gpt-5.5"), + ("reasoning_effort", "high"), + ) + + def test_leaves_base_or_custom_model_id_unchanged(self): + assert _codex_model_config_options("gpt-5.5") == (("model", "gpt-5.5"),) + assert _codex_model_config_options("custom/provider/model") == ( + ("model", "custom/provider/model"), + ) + + # --------------------------------------------------------------------------- # ACP model overrides # --------------------------------------------------------------------------- @@ -4769,6 +4784,28 @@ async def test_set_config_option_mechanism(self): conn.set_session_model.assert_not_called() assert applied is True + @pytest.mark.asyncio + async def test_codex_config_option_splits_reasoning_effort(self): + # Canvas may persist Codex ids as ``model/effort``; codex-acp 0.16 + # exposes effort as its own config option. + conn = AsyncMock() + applied = await _maybe_set_session_model( + conn, "codex-acp", "session-1", "gpt-5.4/low", via_config_option=True + ) + conn.set_session_model.assert_not_called() + conn.set_config_option.assert_has_awaits( + [ + call(config_id="model", value="gpt-5.4", session_id="session-1"), + call( + config_id="reasoning_effort", + value="low", + session_id="session-1", + ), + ] + ) + assert conn.set_config_option.await_count == 2 + assert applied is True + @pytest.mark.asyncio async def test_missing_model_skips_protocol_override(self): conn = AsyncMock() @@ -4873,6 +4910,26 @@ async def test_reapply_via_set_config_option_mechanism(self): conn.set_session_model.assert_not_called() assert applied is True + @pytest.mark.asyncio + async def test_codex_reapply_splits_reasoning_effort(self): + conn = AsyncMock() + applied = await _reapply_session_model_on_resume( + conn, "codex-acp", "sess-1", "gpt-5.4/low", via_config_option=True + ) + conn.set_session_model.assert_not_called() + conn.set_config_option.assert_has_awaits( + [ + call(config_id="model", value="gpt-5.4", session_id="sess-1"), + call( + config_id="reasoning_effort", + value="low", + session_id="sess-1", + ), + ] + ) + assert conn.set_config_option.await_count == 2 + assert applied is True + @pytest.mark.asyncio async def test_missing_model_skips_reapply(self): conn = AsyncMock() @@ -5045,7 +5102,7 @@ def test_switches_model_on_live_codex_session(self): def test_switches_codex_via_config_option_single_call(self): # codex-acp 0.16 configOptions: the bare preset id applies as a single - # `model` selection (reasoning effort is a separate, unmanaged option). + # `model` selection. agent = self._wire(_make_agent(), "codex-acp", via_config_option=True) agent.set_acp_model("gpt-5.5") agent._conn.set_config_option.assert_awaited_once_with( @@ -5055,6 +5112,24 @@ def test_switches_codex_via_config_option_single_call(self): assert agent.llm.model == "gpt-5.5" assert agent._current_model_id == "gpt-5.5" + def test_switches_codex_via_config_option_splits_reasoning_effort(self): + agent = self._wire(_make_agent(), "codex-acp", via_config_option=True) + agent.set_acp_model("gpt-5.5/high") + agent._conn.set_config_option.assert_has_awaits( + [ + call(config_id="model", value="gpt-5.5", session_id="sess-1"), + call( + config_id="reasoning_effort", + value="high", + session_id="sess-1", + ), + ] + ) + assert agent._conn.set_config_option.await_count == 2 + agent._conn.set_session_model.assert_not_called() + assert agent.llm.model == "gpt-5.5/high" + assert agent._current_model_id == "gpt-5.5/high" + def test_switches_claude_via_config_option_single_call(self): # A bare id (no `/`) applies as a single `model` selection — no effort. agent = self._wire(_make_agent(), "claude-agent-acp", via_config_option=True) @@ -7742,11 +7817,7 @@ async def test_set_session_model_rejection_propagates(self): class TestApplyAcpModel: - """``_apply_acp_model`` sends the model id verbatim through the mechanism the - session advertised — the ``model`` configOption select or - ``set_session_model`` — with no id rewriting. (codex 0.16 exposes reasoning - effort as a separate, unmanaged ``reasoning_effort`` configOption, so the - model ids are bare presets on every provider.)""" + """``_apply_acp_model`` uses the mechanism the session advertised.""" @pytest.mark.asyncio async def test_config_option_single_call(self): @@ -7768,6 +7839,29 @@ async def test_set_session_model_branch(self): ) conn.set_config_option.assert_not_called() + @pytest.mark.asyncio + async def test_codex_config_option_splits_reasoning_effort(self): + conn = AsyncMock() + await _apply_acp_model( + conn, + "sess-1", + "gpt-5.5/xhigh", + agent_name="codex-acp", + via_config_option=True, + ) + conn.set_config_option.assert_has_awaits( + [ + call(config_id="model", value="gpt-5.5", session_id="sess-1"), + call( + config_id="reasoning_effort", + value="xhigh", + session_id="sess-1", + ), + ] + ) + assert conn.set_config_option.await_count == 2 + conn.set_session_model.assert_not_called() + class TestACPAgentAvailableModelsProperty: """``available_models`` exposes the server's model list verbatim. diff --git a/uv.lock b/uv.lock index 01a3129c0d..cb02664147 100644 --- a/uv.lock +++ b/uv.lock @@ -56,14 +56,14 @@ dev = [ [[package]] name = "agent-client-protocol" -version = "0.8.1" +version = "0.10.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1b/7b/7cdac86db388809d9e3bc58cac88cc7dfa49b7615b98fab304a828cd7f8a/agent_client_protocol-0.8.1.tar.gz", hash = "sha256:1bbf15663bf51f64942597f638e32a6284c5da918055d9672d3510e965143dbd", size = 68866, upload-time = "2026-02-13T15:34:54.567Z" } +sdist = { url = "https://files.pythonhosted.org/packages/88/a0/3b96cd8374725c69bc3dae9fcc2082f3f6cafec1be35d24d7af0f8c3265f/agent_client_protocol-0.10.1.tar.gz", hash = "sha256:355c65ca19f0568344aafc2c1552b7066a8fc491df23ab28e7e253c6c9a85a25", size = 81924, upload-time = "2026-05-24T18:46:44.444Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4b/f3/219eeca0ad4a20843d4b9eaac5532f87018b9d25730a62a16f54f6c52d1a/agent_client_protocol-0.8.1-py3-none-any.whl", hash = "sha256:9421a11fd435b4831660272d169c3812d553bb7247049c138c3ca127e4b8af8e", size = 54529, upload-time = "2026-02-13T15:34:53.344Z" }, + { url = "https://files.pythonhosted.org/packages/7b/18/d8c7ff337cf621ea79a84006a7252ff057bfb5767549bb102cc6649f4ec2/agent_client_protocol-0.10.1-py3-none-any.whl", hash = "sha256:a03d3198f4d772f2e0ec012c00ac1cce131b4710220a3dc9fae3c991d047c750", size = 65401, upload-time = "2026-05-24T18:46:43.202Z" }, ] [[package]] @@ -2520,7 +2520,7 @@ boto3 = [ [package.metadata] requires-dist = [ - { name = "agent-client-protocol", specifier = ">=0.8.1" }, + { name = "agent-client-protocol", specifier = ">=0.10.1" }, { name = "boto3", marker = "extra == 'boto3'", specifier = ">=1.35.0" }, { name = "deprecation", specifier = ">=2.1.0" }, { name = "fakeredis", extras = ["lua"], specifier = ">=2.32.1" },