From d412c0b5b5f41397e36412c974d23cae5b3266c8 Mon Sep 17 00:00:00 2001 From: Simon Rosenberg Date: Wed, 17 Jun 2026 12:15:57 +0200 Subject: [PATCH 01/14] feat(acp): bump provider CLIs (claude 0.46, codex 0.16, gemini 0.46) + configOptions model selection Bumps all three built-in ACP provider CLIs to their latest npm releases and adapts the SDK to the model-selection protocol change two of them shipped. Verified e2e against every CLI (production binary path) before bumping: - claude-agent-acp 0.30 -> 0.46 and codex-acp 0.15 -> 0.16 dropped the UNSTABLE `models` capability + `session/set_model` (returns "Method not found") and moved model selection to a `model` `configOptions` select driven by `session/set_config_option`. With the old code, init crashes the moment a model is set. The SDK now auto-detects the mechanism from the session/new (or load_session) response and applies the model via the right call across init, runtime switch, and resume. gemini-cli 0.46 keeps `models`/`set_session_model` and is unchanged on that axis. - gemini-cli 0.46 rejects `set_session_mode("yolo")` at session init (JSON-RPC -32603), which crashed headless startup. Switched the gemini default session mode to `default`; the ACP bridge already auto-approves every `session/request_permission`, so permission prompts never block regardless of mode (verified a real tool/edit turn). - Reconciled `_CLAUDE_MODELS` and `_GEMINI_MODELS` to the `availableModels` each bumped CLI actually reports; `_CODEX_MODELS` is unchanged (codex 0.16 accepts the same combined `gpt-5.5/medium` ids via set_config_option). claude `default_model` -> `opus[1m]` (the CLI's own default), gemini -> `auto`. Pins move together in acp_providers.py + the agent-server Dockerfile. All three providers pass an init + tool turn + live model-switch e2e against the pinned binaries. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../openhands/agent_server/docker/Dockerfile | 6 +- .../openhands/sdk/agent/acp_agent.py | 181 ++++++++++++--- .../openhands/sdk/settings/acp_providers.py | 89 ++++---- tests/sdk/agent/test_acp_agent.py | 208 +++++++++++++++--- tests/sdk/settings/test_acp_providers.py | 16 +- tests/sdk/test_settings.py | 6 +- 6 files changed, 402 insertions(+), 104 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/docker/Dockerfile b/openhands-agent-server/openhands/agent_server/docker/Dockerfile index 8751c22628..8cc51cd9f8 100644 --- a/openhands-agent-server/openhands/agent_server/docker/Dockerfile +++ b/openhands-agent-server/openhands/agent_server/docker/Dockerfile @@ -171,9 +171,9 @@ RUN set -ux; \ && "$ACP_NODE_DIR/bin/node" --version; then \ PATH="$ACP_NODE_DIR/bin:$PATH"; \ if "$ACP_NODE_DIR/bin/npm" install -g \ - @agentclientprotocol/claude-agent-acp@0.30.0 \ - @zed-industries/codex-acp@0.15.0 \ - @google/gemini-cli@0.38.0; then \ + @agentclientprotocol/claude-agent-acp@0.46.0 \ + @zed-industries/codex-acp@0.16.0 \ + @google/gemini-cli@0.46.0; then \ # Create wrappers in /usr/local/bin that prepend ACP's Node 22 to PATH. # This ensures the ACP binary's #!/usr/bin/env node shebang resolves # to Node 22, while the repo's own node (NVM/system) stays untouched diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index b5d233768e..3eed076262 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -371,6 +371,72 @@ def _write_secret_file(path: Path, value: str) -> None: f.write(value) +# Session config-option id that selects the model on ACP servers that drive +# model selection through ``configOptions`` / ``session/set_config_option`` +# (codex-acp 0.16+, claude-agent-acp 0.46+) rather than the UNSTABLE ``models`` +# capability + ``session/set_model`` (gemini-cli, older codex/claude). +_MODEL_CONFIG_OPTION_ID = "model" + + +def _model_config_option(response: Any) -> Any | None: + """Return the ``model`` ``configOptions`` select off a session response. + + Newer ACP CLIs dropped the UNSTABLE ``models`` capability and expose model + selection as a ``configOptions`` entry with ``id == "model"`` (``type == + "select"``), switched via ``session/set_config_option`` instead of + ``session/set_model``. Returns that option (carrying ``options`` and + ``current_value``) or ``None`` when the server uses neither / the old + mechanism. ``getattr`` keeps it tolerant of partial structures. + """ + for opt in getattr(response, "config_options", None) or []: + if ( + getattr(opt, "type", None) == "select" + and getattr(opt, "id", None) == _MODEL_CONFIG_OPTION_ID + ): + return opt + return None + + +def _session_selects_model_via_config_option(response: Any) -> bool: + """Whether this session drives model selection through + ``session/set_config_option`` (the ``model`` config-option select) rather + than ``session/set_model``. + + The ``models`` capability wins when present (gemini-cli 0.46 and older + codex/claude still carry it); only in its absence does a ``model`` config + option become the selection mechanism. + """ + if response is None: + return False + if getattr(response, "models", None) is not None: + return False + return _model_config_option(response) is not None + + +def _apply_acp_model( + conn: ClientSideConnection, + session_id: str, + model: str, + *, + via_config_option: bool, +) -> Awaitable[Any]: + """Build the awaitable that applies ``model`` to a live ACP session via + whichever protocol the session advertised: ``set_config_option(configId= + "model")`` for configOptions-based servers (codex-acp 0.16+, claude-agent-acp + 0.46+), else ``set_session_model``. + + Returns the (eagerly constructed) coroutine so callers can either ``await`` + it directly or hand it to ``run_async`` — the underlying ``conn`` method is + invoked now, mirroring the previous inline ``conn.set_session_model(...)`` + call site. + """ + if via_config_option: + return conn.set_config_option( + config_id=_MODEL_CONFIG_OPTION_ID, value=model, session_id=session_id + ) + return conn.set_session_model(model_id=model, session_id=session_id) + + def _extract_session_models( response: Any, ) -> tuple[str | None, list[ACPModelInfo] | None]: @@ -379,16 +445,17 @@ def _extract_session_models( Returns a ``(current_model_id, available_models)`` pair, both best-effort. ``available_models`` is normalized into our own stable :class:`ACPModelInfo` type at this boundary so nothing downstream depends on the vendored - ``acp.schema`` shape. + ``acp.schema`` shape. Reads whichever mechanism the session advertised: the + UNSTABLE ``models`` capability, or the ``model`` ``configOptions`` select + (codex-acp 0.16+, claude-agent-acp 0.46+). The second element distinguishes **absent** from **empty** — this matters for resume persistence (preserve the last-known list when the server didn't report one; clear it when the server explicitly says it has none): - - ``None`` — the (UNSTABLE) ``models`` block was absent from the response - (older agent, opted out, or ``load_session`` not carrying it). - - ``[]`` — the server *did* report ``models`` but offers no (usable) - models this session. + - ``None`` — neither mechanism was present in the response (older agent, + opted out, or ``load_session`` not carrying it). + - ``[]`` — the server reported a mechanism but offers no (usable) models. - ``[...]`` — the reported models, minus any with an unusable ``model_id``. ``getattr`` keeps the helper tolerant of agents that emit a partial @@ -397,17 +464,37 @@ def _extract_session_models( if response is None: return None, None models = getattr(response, "models", None) - if models is None: + if models is not None: + current = getattr(models, "current_model_id", None) + current = current if isinstance(current, str) and current else None + raw = getattr(models, "available_models", None) or [] + # Drop entries without a usable id: an empty/missing ``model_id`` is an + # invalid picker option and an unusable ``set_session_model`` target, so + # we filter it out rather than surfacing ``model_id=""``. + available = [ + info + for info in (ACPModelInfo.from_protocol(m) for m in raw) + if info.model_id + ] + return current, available + # configOptions mechanism: the ``model`` select carries the same state, with + # ``value`` as the model id (== the ``set_config_option`` target). + opt = _model_config_option(response) + if opt is None: return None, None - current = getattr(models, "current_model_id", None) + current = getattr(opt, "current_value", None) current = current if isinstance(current, str) and current else None - raw = getattr(models, "available_models", None) or [] - # Drop entries without a usable id: an empty/missing ``model_id`` is an - # invalid picker option and an unusable ``set_session_model`` target, so we - # filter it out rather than surfacing ``model_id=""``. - available = [ - info for info in (ACPModelInfo.from_protocol(m) for m in raw) if info.model_id - ] + available = [] + for o in getattr(opt, "options", None) or []: + value = getattr(o, "value", None) + if isinstance(value, str) and value: + available.append( + ACPModelInfo( + model_id=value, + name=getattr(o, "name", None), + description=getattr(o, "description", None), + ) + ) return current, available @@ -532,12 +619,17 @@ async def _maybe_set_session_model( agent_name: str, session_id: str, acp_model: str | None, + *, + via_config_option: bool, ) -> bool: """Apply the *initial* session model right after session creation. This is the session-creation path only, gated on :attr:`~openhands.sdk.settings.acp_providers.ACPProviderInfo.supports_set_session_model`. - All built-in providers get a one-shot ``set_session_model`` call here. + All built-in providers get a one-shot model-apply call here, via whichever + protocol the session advertised (``via_config_option``): + ``set_config_option(configId="model")`` for configOptions-based servers + (codex-acp 0.16+, claude-agent-acp 0.46+), else ``set_session_model``. claude-agent-acp used to be skipped on the assumption that it already received the model via ``new_session()`` ``_meta``, but 0.30.0 silently ignores that payload (#3654) — the protocol call is the only path that @@ -562,7 +654,9 @@ async def _maybe_set_session_model( return False provider = detect_acp_provider_by_agent_name(agent_name) if provider is not None and provider.supports_set_session_model: - await conn.set_session_model(model_id=acp_model, session_id=session_id) + await _apply_acp_model( + conn, session_id, acp_model, via_config_option=via_config_option + ) return True # For unknown/custom providers, try the generic set_config_option method # which is a standard ACP protocol method for setting configuration options @@ -595,6 +689,8 @@ async def _reapply_session_model_on_resume( agent_name: str, session_id: str, acp_model: str | None, + *, + via_config_option: bool, ) -> bool: """Reapply the persisted model to a *resumed* session. @@ -629,8 +725,12 @@ async def _reapply_session_model_on_resume( return False try: if provider is not None: - # Known provider: use set_session_model - await conn.set_session_model(model_id=acp_model, session_id=session_id) + # Known provider: apply via the mechanism the resumed session uses + # (set_config_option for codex-acp 0.16+/claude-agent-acp 0.46+, + # else set_session_model). + await _apply_acp_model( + conn, session_id, acp_model, via_config_option=via_config_option + ) else: # Unknown/custom provider: try set_config_option as fallback await conn.set_config_option( @@ -1479,6 +1579,12 @@ def model_post_init(self, __context: object) -> None: _agent_version: str = PrivateAttr( default="" ) # ACP server version from InitializeResponse + # Which protocol this session uses to select the model: ``True`` ⇒ + # ``session/set_config_option(configId="model")`` (codex-acp 0.16+, + # claude-agent-acp 0.46+); ``False`` ⇒ ``session/set_model`` (gemini-cli and + # older codex/claude). Detected from the session/new (or load_session) + # response at init and reused by runtime ``set_acp_model`` switches. + _model_via_config_option: bool = PrivateAttr(default=False) # The model the ACP server reported as active for this session, captured # from ``models.currentModelId`` on the new_session / load_session # response. Overridden by ``self.acp_model`` when the caller explicitly @@ -2417,6 +2523,9 @@ async def _init() -> tuple[ reported_model_id, available_models = _extract_session_models( load_response ) + self._model_via_config_option = ( + _session_selects_model_via_config_option(load_response) + ) logger.info( "Resumed ACP session %s (cwd=%s)", _fingerprint_session_id(session_id), @@ -2448,16 +2557,23 @@ async def _init() -> tuple[ ) session_id = response.session_id reported_model_id, available_models = _extract_session_models(response) + # Detect the model-selection protocol the server advertised so + # init and later runtime switches use the right call. + self._model_via_config_option = ( + _session_selects_model_via_config_option(response) + ) # Initial-model protocol call for every built-in provider - # (codex, gemini, claude-code). The pinned claude CLI ignores - # the _meta above, so this set_session_model call is what - # actually applies the model (#3654); _meta is kept only as - # backward-compat for older claude CLIs that still honor it. + # (codex, gemini, claude-code). The pinned claude/codex CLIs + # ignore the _meta above, so this protocol call is what actually + # applies the model (#3654) — via set_config_option for + # configOptions-based servers, else set_session_model; _meta is + # kept only as backward-compat for older claude CLIs. applied_via_call = await _maybe_set_session_model( conn, agent_name, session_id, self.acp_model, + via_config_option=self._model_via_config_option, ) # set_session_model is the authoritative signal that acp_model # reached the server. _meta is a no-op on the pinned claude CLI, @@ -2470,12 +2586,14 @@ async def _init() -> tuple[ # reapply the persisted (possibly runtime-switched) acp_model via # the runtime-switch capability — otherwise the resumed live # session would run on the server default while serialized state - # claims the switched model. + # claims the switched model. ``_model_via_config_option`` was set + # from the load_session response above. override_applied = await _reapply_session_model_on_resume( conn, agent_name, session_id, self.acp_model, + via_config_option=self._model_via_config_option, ) # Resolve the model the agent will actually use. @@ -3506,6 +3624,9 @@ def set_acp_model(self, model: str) -> None: f"ACP provider '{provider.key}' does not support runtime model " "switching via set_session_model." ) + # ``has_live_acp_session`` above guarantees a session id; narrow for the + # type checker. + assert self._session_id is not None # Bounded round-trip: this runs while LocalConversation.switch_acp_model # holds the state lock, so a server that accepts the call but never # answers must not wedge the lock indefinitely. On timeout / protocol @@ -3513,8 +3634,11 @@ def set_acp_model(self, model: str) -> None: # LLM is only updated once the live session has actually switched. try: self._executor.run_async( - self._conn.set_session_model( - model_id=model, session_id=self._session_id + _apply_acp_model( + self._conn, + self._session_id, + model, + via_config_option=self._model_via_config_option, ), timeout=self.acp_prompt_timeout, ) @@ -3530,8 +3654,13 @@ def set_acp_model(self, model: str) -> None: # method-not-found, invalid model id) as a ValueError so callers — # and the agent-server route — treat it as a 400-class client error # rather than an opaque 500. + method = ( + "set_config_option(model)" + if self._model_via_config_option + else "set_session_model" + ) raise ValueError( - f"ACP server rejected set_session_model(model={model!r}): {e}" + f"ACP server rejected {method}(model={model!r}): {e}" ) from e # Reflect the live model on the sentinel LLM + metrics so cost/token # accounting and serialized state show the model actually in use diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py index 39e1ba4ab8..687767a522 100644 --- a/openhands-sdk/openhands/sdk/settings/acp_providers.py +++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py @@ -293,18 +293,18 @@ class ACPProviderInfo: # ``acp_model`` outside these lists is always allowed. # --------------------------------------------------------------------------- -# Canonical model IDs the Claude Code CLI accepts, curated to the newest -# generation per capability tier. ``opus[1m]`` is the SDK-documented -# version-agnostic 1M-context alias (auto-tracks the newest 1M-capable model — -# keep its label version-less to match). ``opusplan`` routes planning to Opus -# and execution to Sonnet. +# Model IDs the Claude Code CLI accepts, mirroring the ``model`` configOptions +# select claude-agent-acp 0.46.0 reports at ``session/new`` (the short aliases +# the CLI's own ``/model`` menu offers, switched via ``set_config_option``). +# ``opus[1m]`` is the SDK-documented version-agnostic 1M-context alias and the +# CLI's own default (``currentValue``); ``default`` is the CLI's recommended +# tier (Opus 4.8 · 1M). The CLI also accepts ids outside this list, so these +# are curated suggestions, not an access check. _CLAUDE_MODELS: tuple[ACPModelOption, ...] = ( - ACPModelOption(id="claude-fable-5", label="Claude Fable 5"), - ACPModelOption(id="claude-opus-4-8", label="Claude Opus 4.8"), - ACPModelOption(id="opus[1m]", label="Claude Opus (1M)"), - ACPModelOption(id="claude-sonnet-4-6", label="Claude Sonnet 4.6"), - ACPModelOption(id="claude-haiku-4-5", label="Claude Haiku 4.5"), - ACPModelOption(id="opusplan", label="Opus (plan) + Sonnet (execute)"), + ACPModelOption(id="default", label="Default (recommended)"), + ACPModelOption(id="opus[1m]", label="Claude Opus 4.8 (1M)"), + ACPModelOption(id="sonnet", label="Claude Sonnet 4.6"), + ACPModelOption(id="haiku", label="Claude Haiku 4.5"), ) # Model IDs accepted by ``@zed-industries/codex-acp`` (``session/new`` @@ -323,20 +323,19 @@ class ACPProviderInfo: ACPModelOption(id="gpt-5.4-mini/xhigh", label="GPT-5.4 Mini (xhigh)"), ) -# Model IDs accepted by ``@google/gemini-cli --acp``. The ``auto-gemini-*`` -# entries delegate version selection to the CLI's router; the explicit -# ``gemini-3.1-*`` / ``gemini-2.5-*`` entries pin to a specific snapshot. +# Model IDs accepted by ``@google/gemini-cli --acp``. Mirrors the +# ``availableModels`` the CLI reports at ``session/new`` on the pinned version +# (gemini-cli 0.46.0). ``auto`` delegates version selection to the CLI's +# router; the explicit ``gemini-*`` entries pin to a specific snapshot. The CLI +# also accepts ids outside this list (it remaps them at generation), so these +# are curated suggestions, not an access check. _GEMINI_MODELS: tuple[ACPModelOption, ...] = ( - ACPModelOption(id="auto-gemini-3", label="Auto (Gemini 3)"), - ACPModelOption(id="auto-gemini-2.5", label="Auto (Gemini 2.5)"), - ACPModelOption(id="gemini-3.1-pro-preview", label="Gemini 3.1 Pro (preview)"), + ACPModelOption(id="auto", label="Auto"), + ACPModelOption(id="gemini-3-pro-preview", label="Gemini 3 Pro (preview)"), ACPModelOption(id="gemini-3-flash-preview", label="Gemini 3 Flash (preview)"), - ACPModelOption( - id="gemini-3.1-flash-lite-preview", label="Gemini 3.1 Flash Lite (preview)" - ), + ACPModelOption(id="gemini-3.1-flash-lite", label="Gemini 3.1 Flash Lite"), ACPModelOption(id="gemini-2.5-pro", label="Gemini 2.5 Pro"), ACPModelOption(id="gemini-2.5-flash", label="Gemini 2.5 Flash"), - ACPModelOption(id="gemini-2.5-flash-lite", label="Gemini 2.5 Flash Lite"), ) @@ -378,9 +377,15 @@ class ACPProviderInfo: # permission-disabling session mode. In the image the binary rewrite in # `ACPAgentSettings.resolve_acp_command` runs the pinned `binary_name` instead, # so the `@version` suffix is a no-op there. -CLAUDE_AGENT_ACP_VERSION = "0.30.0" -CODEX_ACP_VERSION = "0.15.0" -GEMINI_CLI_VERSION = "0.38.0" +# +# codex-acp 0.16.0 and claude-agent-acp 0.46.0 dropped the UNSTABLE ``models`` +# capability + ``session/set_model`` in favour of a ``model`` ``configOptions`` +# select driven by ``session/set_config_option``; the SDK auto-detects and uses +# the right call (see ``_session_selects_model_via_config_option`` in +# acp_agent.py), so model switching keeps working across the bump (#3772). +CLAUDE_AGENT_ACP_VERSION = "0.46.0" +CODEX_ACP_VERSION = "0.16.0" +GEMINI_CLI_VERSION = "0.46.0" ACP_PROVIDERS: Mapping[str, ACPProviderInfo] = MappingProxyType( @@ -397,17 +402,20 @@ class ACPProviderInfo: base_url_env_var="ANTHROPIC_BASE_URL", default_session_mode="bypassPermissions", agent_name_patterns=("claude-agent",), - # claude-agent-acp 0.30.0 silently ignores the session-_meta model - # selection (the requested model only becomes a picker option; the - # session keeps running "default"), so the init path must push the - # model via session/set_model like codex/gemini (#3654). The _meta + # claude-agent-acp ignores the session-_meta model selection (the + # requested model only becomes a picker option; the session keeps + # running its default), so the init path must push the model via a + # protocol call (#3654). On 0.46.0 that call is + # ``set_config_option(configId="model")`` rather than + # ``set_session_model`` (auto-detected from session/new); the _meta # payload (session_meta_key below) is still sent — harmless, and # picks up the same model if a future CLI honours it. supports_set_session_model=True, supports_runtime_model_switch=True, session_meta_key="claudeCode", available_models=_CLAUDE_MODELS, - default_model="claude-opus-4-8", + # The CLI's own default (model configOptions ``currentValue``). + default_model="opus[1m]", binary_name="claude-agent-acp", data_dir_env_var="CLAUDE_CONFIG_DIR", ), @@ -443,18 +451,25 @@ class ACPProviderInfo: ), api_key_env_var="GEMINI_API_KEY", base_url_env_var="GEMINI_BASE_URL", - default_session_mode="yolo", + # gemini-cli 0.46.0 rejects ``set_session_mode("yolo")`` at session + # init with a JSON-RPC -32603 (it gates yolo on folder-trust that is + # not yet established right after session/new), which would crash + # headless startup. ``default`` is accepted at init, and the SDK's + # ACP bridge already auto-approves every ``session/request_permission`` + # the server sends, so permission prompts never block regardless of + # mode — making ``default`` the headless-safe choice. See #3772. + default_session_mode="default", agent_name_patterns=("gemini-cli",), supports_set_session_model=True, supports_runtime_model_switch=True, session_meta_key=None, available_models=_GEMINI_MODELS, - # Match the Gemini CLI's own no-model-configured default - # (``DEFAULT_GEMINI_MODEL_AUTO``), i.e. the auto-router — not a - # manually-pinned snapshot. Pinning ``gemini-2.5-pro`` here would - # make downstream clients persist a value that bypasses the CLI's - # auto-routing. - default_model="auto-gemini-2.5", + # Match the Gemini CLI's own auto-router rather than a manually + # pinned snapshot. Pinning e.g. ``gemini-2.5-pro`` here would make + # downstream clients persist a value that bypasses the CLI's + # auto-routing. ``auto`` is the router id the CLI reports in its + # 0.46.0 ``availableModels``. + default_model="auto", file_secrets=_GEMINI_FILE_SECRETS, binary_name="gemini", # Gemini CLI has no dedicated config-dir var; it hard-codes @@ -516,7 +531,7 @@ def detect_acp_provider_by_command( *caller-controlled*: each token is reduced to its basename (last path segment, minus a trailing ``@version`` pin) and a provider matches only when that basename *starts with* one of its patterns. This accepts the real forms — - ``@zed-industries/codex-acp``, ``@google/gemini-cli@0.43.0``, + ``@zed-industries/codex-acp``, ``@google/gemini-cli@0.46.0``, ``/opt/node_modules/.bin/codex-acp`` — while rejecting incidental substrings like ``my-codex-acp-wrapper`` or ``/opt/shims/not-codex-acp`` that a plain substring test would misattribute. diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 34eba1e693..24eb2280af 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -9,6 +9,7 @@ from base64 import urlsafe_b64encode from concurrent.futures import Future from pathlib import Path +from types import SimpleNamespace from typing import Any from unittest.mock import AsyncMock, MagicMock, patch @@ -33,6 +34,7 @@ _reapply_session_model_on_resume, _select_auth_method, _serialize_tool_content, + _session_selects_model_via_config_option, _strip_inherited_npm_env, ) from openhands.sdk.agent.acp_models import ACPModelInfo @@ -4426,41 +4428,49 @@ def test_noop_when_caller_already_set_provider(self): class TestMaybeSetSessionModel: @pytest.mark.asyncio - async def test_codex_agent_uses_protocol_model_override(self): + async def test_set_session_model_mechanism(self): + # ``via_config_option=False`` (gemini-cli, older codex/claude with the + # ``models`` capability) applies the model via ``set_session_model``. conn = AsyncMock() applied = await _maybe_set_session_model( - conn, "codex-acp", "session-1", "gpt-5.4" + conn, "codex-acp", "session-1", "gpt-5.4", via_config_option=False ) conn.set_session_model.assert_awaited_once_with( model_id="gpt-5.4", session_id="session-1", ) + conn.set_config_option.assert_not_called() # The override was actually pushed to the server via the protocol call. assert applied is True @pytest.mark.asyncio - async def test_claude_agent_uses_protocol_model_override(self): - # claude-agent-acp 0.30.0 silently ignores the session-_meta model - # selection (#3654), so the init path pushes the model via the same - # one-shot set_session_model call as codex/gemini. + async def test_set_config_option_mechanism(self): + # ``via_config_option=True`` (codex-acp 0.16+, claude-agent-acp 0.46+) + # applies the model via ``set_config_option(configId="model")``. conn = AsyncMock() applied = await _maybe_set_session_model( conn, "claude-agent-acp", "session-1", - "claude-opus-4-8", + "opus[1m]", + via_config_option=True, ) - conn.set_session_model.assert_awaited_once_with( - model_id="claude-opus-4-8", + conn.set_config_option.assert_awaited_once_with( + config_id="model", + value="opus[1m]", session_id="session-1", ) + conn.set_session_model.assert_not_called() assert applied is True @pytest.mark.asyncio async def test_missing_model_skips_protocol_override(self): conn = AsyncMock() - applied = await _maybe_set_session_model(conn, "codex-acp", "session-1", None) + applied = await _maybe_set_session_model( + conn, "codex-acp", "session-1", None, via_config_option=True + ) conn.set_session_model.assert_not_called() + conn.set_config_option.assert_not_called() assert applied is False @pytest.mark.asyncio @@ -4469,7 +4479,7 @@ async def test_unknown_provider_uses_set_config_option_fallback(self): # for model selection, which is a standard ACP method. conn = AsyncMock() applied = await _maybe_set_session_model( - conn, "devin-cli", "session-1", "kimi-k2-6" + conn, "devin-cli", "session-1", "kimi-k2-6", via_config_option=False ) conn.set_session_model.assert_not_called() conn.set_config_option.assert_awaited_once_with( @@ -4488,7 +4498,7 @@ async def test_unknown_provider_set_config_option_failure_is_tolerated(self): code=-32601, message="method not found" ) applied = await _maybe_set_session_model( - conn, "some-custom-acp", "session-1", "whatever" + conn, "some-custom-acp", "session-1", "whatever", via_config_option=False ) conn.set_session_model.assert_not_called() conn.set_config_option.assert_awaited_once() @@ -4499,12 +4509,18 @@ class TestReapplySessionModelOnResume: """Resume reapplies the persisted model via the runtime-switch gate.""" @pytest.mark.asyncio - async def test_claude_reapplies_persisted_model_on_resume(self): + async def test_reapply_via_set_session_model_mechanism(self): # load_session() carries no model selection, so on resume the persisted - # model must be reapplied via the runtime-switch gate. + # model must be reapplied via the runtime-switch gate. Servers with the + # ``models`` capability (gemini-cli, older codex/claude) use + # ``set_session_model``. conn = AsyncMock() applied = await _reapply_session_model_on_resume( - conn, "claude-agent-acp", "sess-1", "claude-haiku-4-5-20251001" + conn, + "claude-agent-acp", + "sess-1", + "claude-haiku-4-5-20251001", + via_config_option=False, ) conn.set_session_model.assert_awaited_once_with( model_id="claude-haiku-4-5-20251001", session_id="sess-1" @@ -4512,23 +4528,27 @@ async def test_claude_reapplies_persisted_model_on_resume(self): assert applied is True @pytest.mark.asyncio - async def test_codex_reapplies_persisted_model_on_resume(self): + async def test_reapply_via_set_config_option_mechanism(self): + # codex-acp 0.16+/claude-agent-acp 0.46+ reapply via + # ``set_config_option(configId="model")``. conn = AsyncMock() applied = await _reapply_session_model_on_resume( - conn, "codex-acp", "sess-1", "gpt-5.4/low" + conn, "codex-acp", "sess-1", "gpt-5.4/low", via_config_option=True ) - conn.set_session_model.assert_awaited_once_with( - model_id="gpt-5.4/low", session_id="sess-1" + conn.set_config_option.assert_awaited_once_with( + config_id="model", value="gpt-5.4/low", session_id="sess-1" ) + conn.set_session_model.assert_not_called() assert applied is True @pytest.mark.asyncio async def test_missing_model_skips_reapply(self): conn = AsyncMock() applied = await _reapply_session_model_on_resume( - conn, "claude-agent-acp", "sess-1", None + conn, "claude-agent-acp", "sess-1", None, via_config_option=True ) conn.set_session_model.assert_not_called() + conn.set_config_option.assert_not_called() assert applied is False @pytest.mark.asyncio @@ -4537,7 +4557,7 @@ async def test_unknown_provider_attempts_reapply_via_set_config_option(self): # as a fallback, which is a standard ACP method. conn = AsyncMock() applied = await _reapply_session_model_on_resume( - conn, "devin-cli", "sess-1", "kimi-k2-6" + conn, "devin-cli", "sess-1", "kimi-k2-6", via_config_option=False ) conn.set_session_model.assert_not_called() conn.set_config_option.assert_awaited_once_with( @@ -4569,7 +4589,7 @@ async def test_known_unsupported_provider_skips_reapply(self): return_value=unsupported, ): applied = await _reapply_session_model_on_resume( - conn, "legacy-acp", "sess-1", "x" + conn, "legacy-acp", "sess-1", "x", via_config_option=False ) conn.set_session_model.assert_not_called() assert applied is False @@ -4584,7 +4604,7 @@ async def test_client_rejection_is_swallowed_on_resume(self): code=-32601, message="method not found" ) applied = await _reapply_session_model_on_resume( - conn, "some-custom-acp", "sess-1", "whatever" + conn, "some-custom-acp", "sess-1", "whatever", via_config_option=False ) conn.set_config_option.assert_awaited_once() # Rejected => the live session kept the server default, so the override @@ -4601,7 +4621,7 @@ async def test_any_request_error_is_swallowed_on_resume(self): code=-32603, message="internal error" ) applied = await _reapply_session_model_on_resume( - conn, "codex-acp", "sess-1", "gpt-5.4/low" + conn, "codex-acp", "sess-1", "gpt-5.4/low", via_config_option=False ) conn.set_session_model.assert_awaited_once() assert applied is False @@ -4650,29 +4670,47 @@ def test_set_acp_model_raises_when_no_live_session(self): class TestSetACPModel: - """Runtime (mid-conversation) model switching via set_session_model.""" + """Runtime (mid-conversation) model switching via the session's mechanism.""" @staticmethod - def _wire(agent: ACPAgent, agent_name: str) -> ACPAgent: + def _wire( + agent: ACPAgent, agent_name: str, *, via_config_option: bool = False + ) -> ACPAgent: agent._conn = MagicMock() agent._session_id = "sess-1" agent._agent_name = agent_name + agent._model_via_config_option = via_config_option executor = MagicMock() executor.run_async = MagicMock() agent._executor = executor return agent def test_switches_model_on_live_codex_session(self): + # Default (models-capability) session ⇒ set_session_model. agent = self._wire(_make_agent(), "codex-acp") agent.set_acp_model("gpt-5.4/low") agent._conn.set_session_model.assert_called_once_with( model_id="gpt-5.4/low", session_id="sess-1" ) + agent._conn.set_config_option.assert_not_called() agent._executor.run_async.assert_called_once() # Sentinel LLM + metrics reflect the live model for cost/token tracking. assert agent.llm.model == "gpt-5.4/low" assert agent.llm.metrics.model_name == "gpt-5.4/low" + def test_switches_model_via_config_option_session(self): + # configOptions session (codex-acp 0.16+, claude-agent-acp 0.46+) ⇒ + # set_config_option(configId="model"). + agent = self._wire(_make_agent(), "codex-acp", via_config_option=True) + agent.set_acp_model("gpt-5.4/low") + agent._conn.set_config_option.assert_called_once_with( + config_id="model", value="gpt-5.4/low", session_id="sess-1" + ) + agent._conn.set_session_model.assert_not_called() + agent._executor.run_async.assert_called_once() + assert agent.llm.model == "gpt-5.4/low" + assert agent._current_model_id == "gpt-5.4/low" + def test_claude_provider_supports_runtime_switch(self): agent = self._wire(_make_agent(), "claude-agent-acp") agent.set_acp_model("claude-haiku-4-5-20251001") @@ -5041,9 +5079,11 @@ def _fake_run_async(_coro, **_kwargs): class TestGeminiSessionModel: @pytest.mark.asyncio async def test_gemini_cli_uses_protocol_model_override(self): + # gemini-cli keeps the ``models`` capability, so model selection rides + # ``set_session_model`` (via_config_option=False). conn = AsyncMock() await _maybe_set_session_model( - conn, "gemini-cli", "session-1", "gemini-3-flash" + conn, "gemini-cli", "session-1", "gemini-3-flash", via_config_option=False ) conn.set_session_model.assert_awaited_once_with( model_id="gemini-3-flash", @@ -7033,6 +7073,120 @@ def test_drops_entries_without_usable_id(self): assert avail == [ACPModelInfo(model_id="good", name="Good", description=None)] +class TestConfigOptionModelMechanism: + """Model selection via the ``model`` ``configOptions`` select. + + codex-acp 0.16+ and claude-agent-acp 0.46+ dropped the UNSTABLE ``models`` + capability + ``session/set_model`` in favour of a ``model`` config-option + select driven by ``session/set_config_option``. ``_extract_session_models`` + reads that select, and ``_session_selects_model_via_config_option`` picks + the apply mechanism. + """ + + def _select(self, *, id="model", type="select", current_value, options): + opt = SimpleNamespace( + id=id, type=type, current_value=current_value, options=options + ) + return opt + + def _opt(self, value, name=None, description=None): + return SimpleNamespace(value=value, name=name, description=description) + + def _response(self, *, models=None, config_options): + return SimpleNamespace(models=models, config_options=config_options) + + def test_extracts_model_state_from_config_option(self): + # claude-agent-acp 0.46: model select with short aliases. + response = self._response( + config_options=[ + self._select( + current_value="opus[1m]", + options=[ + self._opt("default", "Default (recommended)", "Opus 4.8 · 1M"), + self._opt("opus[1m]", "Opus", "Opus 4.8 · 1M"), + self._opt("sonnet", "Sonnet", "Sonnet 4.6"), + self._opt("haiku", "Haiku", "Haiku 4.5"), + ], + ) + ], + ) + cur, avail = _extract_session_models(response) + assert cur == "opus[1m]" + assert avail == [ + ACPModelInfo( + model_id="default", + name="Default (recommended)", + description="Opus 4.8 · 1M", + ), + ACPModelInfo(model_id="opus[1m]", name="Opus", description="Opus 4.8 · 1M"), + ACPModelInfo(model_id="sonnet", name="Sonnet", description="Sonnet 4.6"), + ACPModelInfo(model_id="haiku", name="Haiku", description="Haiku 4.5"), + ] + + def test_models_capability_wins_over_config_option(self): + # If a server somehow carries both, the ``models`` capability is used. + models = MagicMock() + models.current_model_id = "from-models" + models.available_models = [] + response = self._response( + models=models, + config_options=[ + self._select( + current_value="from-config", options=[self._opt("from-config", "X")] + ) + ], + ) + cur, avail = _extract_session_models(response) + assert cur == "from-models" + assert avail == [] + assert _session_selects_model_via_config_option(response) is False + + def test_detects_config_option_mechanism(self): + response = self._response( + config_options=[ + self._select( + current_value="gpt-5.5", options=[self._opt("gpt-5.5", "GPT-5.5")] + ) + ], + ) + assert _session_selects_model_via_config_option(response) is True + + def test_ignores_non_model_config_options(self): + # A ``mode`` select alongside no ``model`` select ⇒ old mechanism. + response = self._response( + config_options=[ + SimpleNamespace( + id="mode", + type="select", + current_value="default", + options=[self._opt("default", "Default")], + ), + ], + ) + assert _session_selects_model_via_config_option(response) is False + assert _extract_session_models(response) == (None, None) + + def test_drops_options_without_usable_value(self): + response = self._response( + config_options=[ + self._select( + current_value="ok", + options=[ + self._opt(value=42), # non-string -> dropped + self._opt("ok", "Ok"), + self._opt(value="", name="Empty"), # empty -> dropped + ], + ) + ], + ) + cur, avail = _extract_session_models(response) + assert cur == "ok" + assert avail == [ACPModelInfo(model_id="ok", name="Ok", description=None)] + + def test_none_response_is_not_config_option(self): + assert _session_selects_model_via_config_option(None) is False + + class TestACPAgentAvailableModelsProperty: """``available_models`` exposes the server's model list verbatim. diff --git a/tests/sdk/settings/test_acp_providers.py b/tests/sdk/settings/test_acp_providers.py index 6ab36b4bc7..7d0390c7a9 100644 --- a/tests/sdk/settings/test_acp_providers.py +++ b/tests/sdk/settings/test_acp_providers.py @@ -35,14 +35,14 @@ def test_claude_code_metadata(self): assert info.base_url_env_var == "ANTHROPIC_BASE_URL" assert info.default_session_mode == "bypassPermissions" assert "claude-agent" in info.agent_name_patterns - # Initial selection rides session/set_model — claude-agent-acp 0.30.0 - # silently ignores the session-_meta payload (#3654), which is still - # sent as best-effort (session_meta_key below). + # Initial selection rides a protocol call — claude-agent-acp ignores the + # session-_meta payload (#3654), which is still sent as best-effort + # (session_meta_key below). On 0.46.0 the call is set_config_option. assert info.supports_set_session_model is True assert info.supports_runtime_model_switch is True assert info.session_meta_key == "claudeCode" - assert info.default_model == "claude-opus-4-8" - assert any(m.id == "claude-opus-4-8" for m in info.available_models) + assert info.default_model == "opus[1m]" + assert any(m.id == "opus[1m]" for m in info.available_models) # Pinned binary exposed by the agent-server image wrappers. assert info.binary_name == "claude-agent-acp" assert info.data_dir_env_var == "CLAUDE_CONFIG_DIR" @@ -71,13 +71,13 @@ def test_gemini_cli_metadata(self): assert "--acp" in info.default_command assert info.api_key_env_var == "GEMINI_API_KEY" assert info.base_url_env_var == "GEMINI_BASE_URL" - assert info.default_session_mode == "yolo" + assert info.default_session_mode == "default" assert "gemini-cli" in info.agent_name_patterns assert info.supports_set_session_model is True assert info.supports_runtime_model_switch is True assert info.session_meta_key is None - assert info.default_model == "auto-gemini-2.5" - assert any(m.id == "auto-gemini-2.5" for m in info.available_models) + assert info.default_model == "auto" + assert any(m.id == "auto" for m in info.available_models) # The Gemini CLI's ACP binary is just ``gemini`` (the ``--acp`` flag is # a trailing arg, preserved by resolve_acp_command on rewrite). assert info.binary_name == "gemini" diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py index 9930c448f4..3f43ebcff4 100644 --- a/tests/sdk/test_settings.py +++ b/tests/sdk/test_settings.py @@ -883,7 +883,7 @@ def test_acp_create_agent_uses_server_default_command( assert agent.acp_command == [ "npx", "-y", - "@agentclientprotocol/claude-agent-acp@0.30.0", + "@agentclientprotocol/claude-agent-acp@0.46.0", ] assert agent.acp_model == "claude-opus-4-6" # The authoritative provider key is carried onto the agent. @@ -1034,7 +1034,7 @@ def test_acp_resolve_command_rewrites_versioned_npx_to_pinned_binary( monkeypatch.setattr(shutil, "which", _which_returning("codex-acp")) for pkg in ( "@zed-industries/codex-acp", - "@zed-industries/codex-acp@0.15.0", + "@zed-industries/codex-acp@0.16.0", "@zed-industries/codex-acp@0.11.1", ): settings = ACPAgentSettings( @@ -1057,7 +1057,7 @@ def test_acp_resolve_command_keeps_npx_when_binary_absent( assert settings.resolve_acp_command() == [ "npx", "-y", - "@zed-industries/codex-acp@0.15.0", + "@zed-industries/codex-acp@0.16.0", ] From 1f6b2b4d54ccd62c2e1d5f0f67b3e6c37888c4ef Mon Sep 17 00:00:00 2001 From: Simon Rosenberg Date: Wed, 17 Jun 2026 12:48:44 +0200 Subject: [PATCH 02/14] harden(acp): fall back to the other model-select mechanism on method-not-found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Response-detection (`_session_selects_model_via_config_option`) picks set_config_option vs set_session_model from the session response and is correct for every validated CLI. But it reads an UNSTABLE capability whose shape can vary by build/auth state, so a misdetect would crash session init or a model switch with JSON-RPC -32601 "Method not found". Add `_apply_acp_model_with_fallback`: if the chosen model-apply call returns -32601, retry once with the other mechanism (and, for runtime switches, remember the working one). Wired into init (`_maybe_set_session_model`) and `ACPAgent.set_acp_model`; resume already tolerates rejection by keeping the server default, so it's left as-is. A non-method-not-found error (e.g. -32602 invalid model) still propagates — it's a real client error, not a wrong-mechanism signal. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../openhands/sdk/agent/acp_agent.py | 116 ++++++++++++++---- tests/sdk/agent/test_acp_agent.py | 100 +++++++++++++++ 2 files changed, 195 insertions(+), 21 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 3eed076262..aee51a308b 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -140,6 +140,13 @@ # upstream model 500s, and transient infrastructure errors. _RETRIABLE_SERVER_ERROR_CODES: frozenset[int] = frozenset({-32603}) +# -32601 = "Method not found" (JSON-RPC spec). An ACP server raises this when a +# model-selection call uses the mechanism it does *not* implement — e.g. +# `session/set_model` on a CLI that moved model selection to `configOptions` +# (codex-acp 0.16+, claude-agent-acp 0.46+), or vice versa. We use it to fall +# back to the other mechanism if response-detection picked the wrong one. +_METHOD_NOT_FOUND_CODE: Final[int] = -32601 + # Maximum characters for ACP tool call content — matches MAX_CMD_OUTPUT_SIZE # used by the terminal tool and the default max_message_chars in LLM config. @@ -437,6 +444,48 @@ def _apply_acp_model( return conn.set_session_model(model_id=model, session_id=session_id) +def _is_method_not_found(exc: ACPRequestError) -> bool: + """Whether ``exc`` is a JSON-RPC "method not found" — i.e. the server does + not implement the model-selection call we used.""" + return exc.code == _METHOD_NOT_FOUND_CODE + + +async def _apply_acp_model_with_fallback( + conn: ClientSideConnection, + session_id: str, + model: str, + *, + via_config_option: bool, +) -> bool: + """Apply ``model`` via the detected mechanism, falling back to the other if + the server reports the method missing. + + Response-detection (``_session_selects_model_via_config_option``) is correct + for every CLI we've validated, but it reads an UNSTABLE capability and the + response shape can vary by build/auth state. If the chosen call raises + ``-32601 "Method not found"``, the server simply uses the *other* mechanism, + so we retry with it instead of crashing session init. Returns the + ``via_config_option`` value that actually applied the model. + """ + try: + await _apply_acp_model( + conn, session_id, model, via_config_option=via_config_option + ) + return via_config_option + except ACPRequestError as exc: + if not _is_method_not_found(exc): + raise + logger.info( + "ACP model-apply via %s rejected as method-not-found; retrying via %s", + "set_config_option" if via_config_option else "set_session_model", + "set_session_model" if via_config_option else "set_config_option", + ) + await _apply_acp_model( + conn, session_id, model, via_config_option=not via_config_option + ) + return not via_config_option + + def _extract_session_models( response: Any, ) -> tuple[str | None, list[ACPModelInfo] | None]: @@ -654,7 +703,7 @@ async def _maybe_set_session_model( return False provider = detect_acp_provider_by_agent_name(agent_name) if provider is not None and provider.supports_set_session_model: - await _apply_acp_model( + await _apply_acp_model_with_fallback( conn, session_id, acp_model, via_config_option=via_config_option ) return True @@ -727,7 +776,9 @@ async def _reapply_session_model_on_resume( if provider is not None: # Known provider: apply via the mechanism the resumed session uses # (set_config_option for codex-acp 0.16+/claude-agent-acp 0.46+, - # else set_session_model). + # else set_session_model). A rejection is already tolerated below + # (the session keeps the server default), so resume doesn't need the + # cross-mechanism fallback the init/switch paths use. await _apply_acp_model( conn, session_id, acp_model, via_config_option=via_config_option ) @@ -3643,25 +3694,48 @@ def set_acp_model(self, model: str) -> None: timeout=self.acp_prompt_timeout, ) except ACPRequestError as e: - # Server-internal failures (JSON-RPC -32603) are not the caller's - # fault, and the prompt path already treats them as retriable. Let - # them propagate (-> 5xx) instead of mislabeling them as a 400 - # client error. - if e.code in _RETRIABLE_SERVER_ERROR_CODES: - raise - # acp.exceptions.RequestError derives from Exception (not - # RuntimeError); surface a true client/protocol rejection (e.g. - # method-not-found, invalid model id) as a ValueError so callers — - # and the agent-server route — treat it as a 400-class client error - # rather than an opaque 500. - method = ( - "set_config_option(model)" - if self._model_via_config_option - else "set_session_model" - ) - raise ValueError( - f"ACP server rejected {method}(model={model!r}): {e}" - ) from e + pending_error: ACPRequestError | None = e + if _is_method_not_found(e): + # The session uses the other model-selection mechanism (or the + # init-time detection picked the wrong one): retry once with it, + # and remember the working mechanism for later switches. If the + # retry also fails, fall through to the error translation below. + flipped = not self._model_via_config_option + try: + self._executor.run_async( + _apply_acp_model( + self._conn, + self._session_id, + model, + via_config_option=flipped, + ), + timeout=self.acp_prompt_timeout, + ) + except ACPRequestError as e2: + pending_error = e2 + else: + self._model_via_config_option = flipped + pending_error = None # both selections applied + if pending_error is not None: + # Server-internal failures (JSON-RPC -32603) are not the caller's + # fault, and the prompt path already treats them as retriable. + # Let them propagate (-> 5xx) instead of mislabeling them as a + # 400 client error. + if pending_error.code in _RETRIABLE_SERVER_ERROR_CODES: + raise pending_error + # acp.exceptions.RequestError derives from Exception (not + # RuntimeError); surface a true client/protocol rejection (e.g. + # method-not-found on both mechanisms, invalid model id) as a + # ValueError so callers — and the agent-server route — treat it + # as a 400-class client error rather than an opaque 500. + method = ( + "set_config_option(model)" + if self._model_via_config_option + else "set_session_model" + ) + raise ValueError( + f"ACP server rejected {method}(model={model!r}): {pending_error}" + ) from pending_error # Reflect the live model on the sentinel LLM + metrics so cost/token # accounting and serialized state show the model actually in use # (mirrors model_post_init). The ``acp_model`` field is frozen, so the diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 24eb2280af..002003682b 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -20,6 +20,7 @@ from openhands.sdk.agent.acp_agent import ( ACPAgent, + _apply_acp_model_with_fallback, _classify_acp_init_error, _codex_auth_file, _codex_base_url_overrides, @@ -27,6 +28,7 @@ _extract_session_models, _extract_token_usage, _image_url_to_acp_block, + _is_method_not_found, _mask_json_value, _maybe_set_session_model, _mcp_config_to_acp_servers, @@ -4711,6 +4713,33 @@ def test_switches_model_via_config_option_session(self): assert agent.llm.model == "gpt-5.4/low" assert agent._current_model_id == "gpt-5.4/low" + def test_switch_falls_back_on_method_not_found(self): + # Detected set_session_model, but the live server only has the + # configOptions mechanism: the first apply raises -32601, the switch + # retries with the other call and remembers it. + agent = self._wire(_make_agent(), "codex-acp", via_config_option=False) + agent._executor.run_async.side_effect = [ + ACPRequestError(code=-32601, message="Method not found"), + None, # retry with the flipped mechanism succeeds + ] + agent.set_acp_model("gpt-5.4/low") + assert agent._executor.run_async.call_count == 2 + # The working mechanism is remembered for later switches. + assert agent._model_via_config_option is True + assert agent.llm.model == "gpt-5.4/low" + assert agent._current_model_id == "gpt-5.4/low" + + def test_switch_invalid_model_does_not_fall_back(self): + # A -32602 invalid-params is a real client error, not a wrong-mechanism + # signal: surface it as ValueError without a second call. + agent = self._wire(_make_agent(), "codex-acp", via_config_option=True) + agent._executor.run_async.side_effect = ACPRequestError( + code=-32602, message="Invalid params" + ) + with pytest.raises(ValueError, match="rejected set_config_option"): + agent.set_acp_model("nonsense") + agent._executor.run_async.assert_called_once() + def test_claude_provider_supports_runtime_switch(self): agent = self._wire(_make_agent(), "claude-agent-acp") agent.set_acp_model("claude-haiku-4-5-20251001") @@ -7187,6 +7216,77 @@ def test_none_response_is_not_config_option(self): assert _session_selects_model_via_config_option(None) is False +class TestApplyAcpModelFallback: + """Model-apply falls back to the other mechanism on -32601 method-not-found. + + Response-detection is correct for every validated CLI, but it reads an + UNSTABLE capability; a misdetect (or a server that moved between mechanisms) + must self-heal rather than crash session init / a switch. + """ + + def test_is_method_not_found(self): + assert _is_method_not_found(ACPRequestError(code=-32601, message="x")) is True + assert _is_method_not_found(ACPRequestError(code=-32603, message="x")) is False + assert _is_method_not_found(ACPRequestError(code=-32602, message="x")) is False + + @pytest.mark.asyncio + async def test_config_option_falls_back_to_set_session_model(self): + # Detected configOptions, but the server only has set_session_model. + conn = AsyncMock() + conn.set_config_option.side_effect = ACPRequestError( + code=-32601, message="Method not found" + ) + effective = await _apply_acp_model_with_fallback( + conn, "sess-1", "gemini-3-flash", via_config_option=True + ) + conn.set_config_option.assert_awaited_once() + conn.set_session_model.assert_awaited_once_with( + model_id="gemini-3-flash", session_id="sess-1" + ) + assert effective is False # the mechanism that actually worked + + @pytest.mark.asyncio + async def test_set_session_model_falls_back_to_config_option(self): + # Detected set_session_model, but the server moved to configOptions + # (codex 0.16 / claude 0.46 reached via a stale/degraded detection). + conn = AsyncMock() + conn.set_session_model.side_effect = ACPRequestError( + code=-32601, message="Method not found" + ) + effective = await _apply_acp_model_with_fallback( + conn, "sess-1", "opus[1m]", via_config_option=False + ) + conn.set_session_model.assert_awaited_once() + conn.set_config_option.assert_awaited_once_with( + config_id="model", value="opus[1m]", session_id="sess-1" + ) + assert effective is True + + @pytest.mark.asyncio + async def test_no_fallback_when_primary_succeeds(self): + conn = AsyncMock() + effective = await _apply_acp_model_with_fallback( + conn, "sess-1", "sonnet", via_config_option=True + ) + conn.set_config_option.assert_awaited_once() + conn.set_session_model.assert_not_called() + assert effective is True + + @pytest.mark.asyncio + async def test_non_method_not_found_error_propagates(self): + # An invalid-model / server error is NOT a wrong-mechanism signal — it + # must propagate, not silently try the other call. + conn = AsyncMock() + conn.set_config_option.side_effect = ACPRequestError( + code=-32602, message="Invalid params" + ) + with pytest.raises(ACPRequestError): + await _apply_acp_model_with_fallback( + conn, "sess-1", "bad", via_config_option=True + ) + conn.set_session_model.assert_not_called() + + class TestACPAgentAvailableModelsProperty: """``available_models`` exposes the server's model list verbatim. From 1f90820cf3789a73c88a2369b547dca69bd10ded Mon Sep 17 00:00:00 2001 From: Simon Rosenberg Date: Wed, 17 Jun 2026 13:04:03 +0200 Subject: [PATCH 03/14] test(acp): detect mechanism on real captured session/new payloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address the QA "mocks don't catch the real shape" gap and refute the hypothesised `config_options is None` field-name mismatch: parse the actual claude-agent-acp 0.46.0 / codex-acp 0.16.0 / gemini-cli 0.46.0 `session/new` wire payloads (captured from the pinned binaries) through the real `acp.schema.NewSessionResponse`, then assert the detection + extraction: - claude 0.46 / codex 0.16: `models: null` + a `model` configOptions select ⇒ `_session_selects_model_via_config_option` is True; current/available read from the select (opus[1m] / gpt-5.5). - gemini 0.46: `models` capability present, no model configOption ⇒ False (set_session_model), models read from `availableModels`. These exercise the genuine alias mapping (`configOptions`, `currentValue`, `modelId`), so any field-name/access-path drift fails the test. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/sdk/agent/test_acp_agent.py | 123 +++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 002003682b..0b8528b21a 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -15,7 +15,7 @@ import pytest from acp.exceptions import RequestError as ACPRequestError -from acp.schema import PromptResponse +from acp.schema import NewSessionResponse, PromptResponse from pydantic import Field, SecretStr from openhands.sdk.agent.acp_agent import ( @@ -7216,6 +7216,127 @@ def test_none_response_is_not_config_option(self): assert _session_selects_model_via_config_option(None) is False +# Real ``session/new`` payloads (the wire shape, by-alias) captured from the +# pinned CLIs and round-tripped through ``acp.schema``. Parsed here through the +# real ``NewSessionResponse`` — not a hand-built mock — so the test catches any +# field-name / access-path drift in detection + extraction (e.g. ``models`` +# vs ``configOptions``, ``currentValue``, ``modelId``). +_CLAUDE_046_SESSION = { + "sessionId": "sess-claude", + "models": None, + "configOptions": [ + { + "id": "mode", + "name": "Mode", + "type": "select", + "currentValue": "default", + "options": [{"value": "default", "name": "Default"}], + }, + { + "id": "model", + "name": "Model", + "type": "select", + "category": "model", + "currentValue": "opus[1m]", + "options": [ + {"value": "default", "name": "Default (recommended)"}, + {"value": "opus[1m]", "name": "Opus"}, + {"value": "sonnet", "name": "Sonnet"}, + {"value": "haiku", "name": "Haiku"}, + ], + }, + { + "id": "effort", + "name": "Thinking", + "type": "select", + "currentValue": "xhigh", + "options": [{"value": "xhigh", "name": "Xhigh"}], + }, + ], +} +_CODEX_016_SESSION = { + "sessionId": "sess-codex", + "models": None, + "configOptions": [ + { + "id": "mode", + "name": "Approval Preset", + "type": "select", + "currentValue": "read-only", + "options": [{"value": "read-only", "name": "Read Only"}], + }, + { + "id": "model", + "name": "Model", + "type": "select", + "category": "model", + "currentValue": "gpt-5.5", + "options": [ + {"value": "gpt-5.5", "name": "GPT-5.5"}, + {"value": "gpt-5.4", "name": "GPT-5.4"}, + {"value": "gpt-5.4-mini", "name": "GPT-5.4-Mini"}, + ], + }, + { + "id": "reasoning_effort", + "name": "Reasoning Effort", + "type": "select", + "currentValue": "xhigh", + "options": [{"value": "xhigh", "name": "Xhigh"}], + }, + ], +} +_GEMINI_046_SESSION = { + "sessionId": "sess-gemini", + "models": { + "currentModelId": "gemini-3-flash-preview", + "availableModels": [ + {"modelId": "auto", "name": "Auto"}, + {"modelId": "gemini-3-pro-preview", "name": "gemini-3-pro-preview"}, + {"modelId": "gemini-3-flash-preview", "name": "gemini-3-flash-preview"}, + {"modelId": "gemini-2.5-pro", "name": "gemini-2.5-pro"}, + {"modelId": "gemini-2.5-flash", "name": "gemini-2.5-flash"}, + {"modelId": "gemini-3.1-flash-lite", "name": "gemini-3.1-flash-lite"}, + ], + }, + "configOptions": None, +} + + +class TestDetectionAgainstRealSessionResponses: + """Detection + extraction against the real CLI ``session/new`` wire shapes, + parsed through the actual ``acp.schema.NewSessionResponse`` (not a mock). + + Guards the exact failure the QA bot hypothesised — that + ``getattr(response, "config_options")`` could be ``None`` due to a field-name + mismatch — by exercising the genuine alias-mapped schema. + """ + + def test_claude_046_uses_config_option(self): + resp = NewSessionResponse.model_validate(_CLAUDE_046_SESSION) + assert _session_selects_model_via_config_option(resp) is True + cur, avail = _extract_session_models(resp) + assert cur == "opus[1m]" + assert avail is not None + assert [m.model_id for m in avail] == ["default", "opus[1m]", "sonnet", "haiku"] + + def test_codex_016_uses_config_option(self): + resp = NewSessionResponse.model_validate(_CODEX_016_SESSION) + assert _session_selects_model_via_config_option(resp) is True + cur, avail = _extract_session_models(resp) + assert cur == "gpt-5.5" + assert avail is not None + assert [m.model_id for m in avail] == ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"] + + def test_gemini_046_uses_set_session_model(self): + resp = NewSessionResponse.model_validate(_GEMINI_046_SESSION) + assert _session_selects_model_via_config_option(resp) is False + cur, avail = _extract_session_models(resp) + assert cur == "gemini-3-flash-preview" + assert avail is not None + assert "gemini-3-pro-preview" in [m.model_id for m in avail] + + class TestApplyAcpModelFallback: """Model-apply falls back to the other mechanism on -32601 method-not-found. From 29405ab35c4890e936dcba65a959a50bd7524a54 Mon Sep 17 00:00:00 2001 From: Simon Rosenberg Date: Wed, 17 Jun 2026 13:30:11 +0200 Subject: [PATCH 04/14] test(acp): make real-shape detection tests robust under xdist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous revision built the session responses with `NewSessionResponse.model_validate()`, which re-parses the `config_options` discriminated union (`SessionConfigOptionSelect | SessionConfigOptionBoolean`). Under the full `-n auto` suite that path is sensitive to a pre-existing cross-test interaction (a sibling test mutates pydantic discriminated-union state), so the claude/codex cases failed in CI while passing in isolation; the gemini case (plain `models`, no union) passed. Construct the responses from already-instantiated `acp.schema` objects instead. `NewSessionResponse.model_config` is `revalidate_instances="never"`, so passing instances skips the union re-validation entirely — deterministic regardless of test order — while still asserting detection/extraction against the real schema types (real `config_options` / `current_value` / `model_id` fields). Adds an explicit `config_options` field-name assertion (no parsing) that directly refutes the "field name mismatch" hypothesis. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/sdk/agent/test_acp_agent.py | 178 +++++++++++++++--------------- 1 file changed, 87 insertions(+), 91 deletions(-) diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 0b8528b21a..6029163ac4 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -15,7 +15,14 @@ import pytest from acp.exceptions import RequestError as ACPRequestError -from acp.schema import NewSessionResponse, PromptResponse +from acp.schema import ( + ModelInfo, + NewSessionResponse, + PromptResponse, + SessionConfigOptionSelect, + SessionConfigSelectOption, + SessionModelState, +) from pydantic import Field, SecretStr from openhands.sdk.agent.acp_agent import ( @@ -7216,104 +7223,93 @@ def test_none_response_is_not_config_option(self): assert _session_selects_model_via_config_option(None) is False -# Real ``session/new`` payloads (the wire shape, by-alias) captured from the -# pinned CLIs and round-tripped through ``acp.schema``. Parsed here through the -# real ``NewSessionResponse`` — not a hand-built mock — so the test catches any -# field-name / access-path drift in detection + extraction (e.g. ``models`` -# vs ``configOptions``, ``currentValue``, ``modelId``). -_CLAUDE_046_SESSION = { - "sessionId": "sess-claude", - "models": None, - "configOptions": [ - { - "id": "mode", - "name": "Mode", - "type": "select", - "currentValue": "default", - "options": [{"value": "default", "name": "Default"}], - }, - { - "id": "model", - "name": "Model", - "type": "select", - "category": "model", - "currentValue": "opus[1m]", - "options": [ - {"value": "default", "name": "Default (recommended)"}, - {"value": "opus[1m]", "name": "Opus"}, - {"value": "sonnet", "name": "Sonnet"}, - {"value": "haiku", "name": "Haiku"}, - ], - }, - { - "id": "effort", - "name": "Thinking", - "type": "select", - "currentValue": "xhigh", - "options": [{"value": "xhigh", "name": "Xhigh"}], - }, - ], -} -_CODEX_016_SESSION = { - "sessionId": "sess-codex", - "models": None, - "configOptions": [ - { - "id": "mode", - "name": "Approval Preset", - "type": "select", - "currentValue": "read-only", - "options": [{"value": "read-only", "name": "Read Only"}], - }, - { - "id": "model", - "name": "Model", - "type": "select", - "category": "model", - "currentValue": "gpt-5.5", - "options": [ - {"value": "gpt-5.5", "name": "GPT-5.5"}, - {"value": "gpt-5.4", "name": "GPT-5.4"}, - {"value": "gpt-5.4-mini", "name": "GPT-5.4-Mini"}, - ], - }, - { - "id": "reasoning_effort", - "name": "Reasoning Effort", - "type": "select", - "currentValue": "xhigh", - "options": [{"value": "xhigh", "name": "Xhigh"}], - }, - ], -} -_GEMINI_046_SESSION = { - "sessionId": "sess-gemini", - "models": { - "currentModelId": "gemini-3-flash-preview", - "availableModels": [ - {"modelId": "auto", "name": "Auto"}, - {"modelId": "gemini-3-pro-preview", "name": "gemini-3-pro-preview"}, - {"modelId": "gemini-3-flash-preview", "name": "gemini-3-flash-preview"}, - {"modelId": "gemini-2.5-pro", "name": "gemini-2.5-pro"}, - {"modelId": "gemini-2.5-flash", "name": "gemini-2.5-flash"}, - {"modelId": "gemini-3.1-flash-lite", "name": "gemini-3.1-flash-lite"}, +def _select(opt_id, current, values, category=None): + return SessionConfigOptionSelect( + id=opt_id, + name=opt_id, + type="select", + category=category, + current_value=current, + options=[SessionConfigSelectOption(value=v, name=v) for v in values], + ) + + +def _claude_046_session() -> NewSessionResponse: + # claude-agent-acp 0.46.0: ``models`` dropped; model is a configOptions + # select alongside ``mode``/``effort``. + return NewSessionResponse( + session_id="sess-claude", + models=None, + config_options=[ + _select("mode", "default", ["default", "acceptEdits"]), + _select( + "model", + "opus[1m]", + ["default", "opus[1m]", "sonnet", "haiku"], + category="model", + ), + _select("effort", "xhigh", ["xhigh", "low"]), ], - }, - "configOptions": None, -} + ) + + +def _codex_016_session() -> NewSessionResponse: + # codex-acp 0.16.0: same shape, combined model ids stay in the select. + return NewSessionResponse( + session_id="sess-codex", + models=None, + config_options=[ + _select("mode", "read-only", ["read-only", "full-access"]), + _select( + "model", + "gpt-5.5", + ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"], + category="model", + ), + _select("reasoning_effort", "xhigh", ["xhigh", "low"]), + ], + ) + + +def _gemini_046_session() -> NewSessionResponse: + # gemini-cli 0.46.0: keeps the ``models`` capability, no model configOption. + return NewSessionResponse( + session_id="sess-gemini", + config_options=None, + models=SessionModelState( + current_model_id="gemini-3-flash-preview", + available_models=[ + ModelInfo(model_id=mid, name=mid) + for mid in ( + "auto", + "gemini-3-pro-preview", + "gemini-3-flash-preview", + "gemini-2.5-pro", + "gemini-2.5-flash", + "gemini-3.1-flash-lite", + ) + ], + ), + ) class TestDetectionAgainstRealSessionResponses: - """Detection + extraction against the real CLI ``session/new`` wire shapes, - parsed through the actual ``acp.schema.NewSessionResponse`` (not a mock). + """Detection + extraction against the real CLI ``session/new`` shapes built + from the genuine ``acp.schema`` types (not a hand-rolled mock). Guards the exact failure the QA bot hypothesised — that ``getattr(response, "config_options")`` could be ``None`` due to a field-name - mismatch — by exercising the genuine alias-mapped schema. + mismatch — by reading the real schema field (``config_options`` / + ``current_value`` / ``model_id``). """ + def test_config_options_is_the_schema_field_name(self): + # The SDK reads ``response.config_options``; pin that this is the real + # field on the schema (the bot hypothesised a name mismatch). + assert "config_options" in NewSessionResponse.model_fields + def test_claude_046_uses_config_option(self): - resp = NewSessionResponse.model_validate(_CLAUDE_046_SESSION) + resp = _claude_046_session() assert _session_selects_model_via_config_option(resp) is True cur, avail = _extract_session_models(resp) assert cur == "opus[1m]" @@ -7321,7 +7317,7 @@ def test_claude_046_uses_config_option(self): assert [m.model_id for m in avail] == ["default", "opus[1m]", "sonnet", "haiku"] def test_codex_016_uses_config_option(self): - resp = NewSessionResponse.model_validate(_CODEX_016_SESSION) + resp = _codex_016_session() assert _session_selects_model_via_config_option(resp) is True cur, avail = _extract_session_models(resp) assert cur == "gpt-5.5" @@ -7329,7 +7325,7 @@ def test_codex_016_uses_config_option(self): assert [m.model_id for m in avail] == ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"] def test_gemini_046_uses_set_session_model(self): - resp = NewSessionResponse.model_validate(_GEMINI_046_SESSION) + resp = _gemini_046_session() assert _session_selects_model_via_config_option(resp) is False cur, avail = _extract_session_models(resp) assert cur == "gemini-3-flash-preview" From 4c949562db07899b307cb803cb9a4399194c0a31 Mon Sep 17 00:00:00 2001 From: Simon Rosenberg Date: Wed, 17 Jun 2026 13:52:35 +0200 Subject: [PATCH 05/14] fix(acp): unwrap SessionConfigOption RootModel for model detection (acp 0.8.x) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The shipped `agent-client-protocol` lib (0.8.1, pinned in uv.lock / the agent-server image) wraps each `session/new` `configOptions` entry in a `SessionConfigOption` RootModel (`class SessionConfigOption(RootModel[ SessionConfigOptionSelect])`), so `opt.id` / `opt.type` are `None` and must be read off `opt.root`. `_model_config_option` read them directly, so `_session_selects_model_via_config_option` returned `False` for claude-agent-acp 0.46 / codex-acp 0.16 — the SDK then called the removed `session/set_model` and crashed init with "Method not found". (0.10.x lists the union members directly, which is why local runs against a drifted venv passed; CI/production run 0.8.1.) Unwrap `getattr(raw, "root", raw)` so detection works on 0.8.x and 0.10.x. Tests now parse the real wire payloads through `NewSessionResponse` (exercising the 0.8.x RootModel wrapper) plus an explicit `.root`-unwrap unit test. Re-validated e2e against acp 0.8.1 + the pinned binaries: claude 0.46 and codex 0.16 both detect `via_config_option=True`, write a file, and track a live model switch. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../openhands/sdk/agent/acp_agent.py | 8 +- tests/sdk/agent/test_acp_agent.py | 174 +++++++++--------- 2 files changed, 96 insertions(+), 86 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index aee51a308b..58c4810e80 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -394,8 +394,14 @@ def _model_config_option(response: Any) -> Any | None: ``session/set_model``. Returns that option (carrying ``options`` and ``current_value``) or ``None`` when the server uses neither / the old mechanism. ``getattr`` keeps it tolerant of partial structures. + + The ``agent-client-protocol`` Python lib wraps each entry in a + ``SessionConfigOption`` ``RootModel`` on 0.8.x (access via ``.root``) but + lists the union members directly on 0.10.x; unwrap ``.root`` so detection + works on either. """ - for opt in getattr(response, "config_options", None) or []: + for raw in getattr(response, "config_options", None) or []: + opt = getattr(raw, "root", raw) if ( getattr(opt, "type", None) == "select" and getattr(opt, "id", None) == _MODEL_CONFIG_OPTION_ID diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 6029163ac4..1119e322bf 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -15,14 +15,7 @@ import pytest from acp.exceptions import RequestError as ACPRequestError -from acp.schema import ( - ModelInfo, - NewSessionResponse, - PromptResponse, - SessionConfigOptionSelect, - SessionConfigSelectOption, - SessionModelState, -) +from acp.schema import NewSessionResponse, PromptResponse from pydantic import Field, SecretStr from openhands.sdk.agent.acp_agent import ( @@ -7159,6 +7152,22 @@ def test_extracts_model_state_from_config_option(self): ACPModelInfo(model_id="haiku", name="Haiku", description="Haiku 4.5"), ] + def test_unwraps_rootmodel_wrapped_config_option(self): + # agent-client-protocol 0.8.x wraps each entry in a SessionConfigOption + # RootModel (the option lives under ``.root``); detection must unwrap it, + # else ``opt.id`` is None and the SDK wrongly falls back to set_model. + select = self._select( + current_value="opus[1m]", + options=[self._opt("opus[1m]", "Opus"), self._opt("sonnet", "Sonnet")], + ) + wrapped = SimpleNamespace(root=select) # mimics the RootModel wrapper + response = self._response(config_options=[wrapped]) + assert _session_selects_model_via_config_option(response) is True + cur, avail = _extract_session_models(response) + assert cur == "opus[1m]" + assert avail is not None + assert [m.model_id for m in avail] == ["opus[1m]", "sonnet"] + def test_models_capability_wins_over_config_option(self): # If a server somehow carries both, the ``models`` capability is used. models = MagicMock() @@ -7223,93 +7232,88 @@ def test_none_response_is_not_config_option(self): assert _session_selects_model_via_config_option(None) is False -def _select(opt_id, current, values, category=None): - return SessionConfigOptionSelect( - id=opt_id, - name=opt_id, - type="select", - category=category, - current_value=current, - options=[SessionConfigSelectOption(value=v, name=v) for v in values], - ) - - -def _claude_046_session() -> NewSessionResponse: - # claude-agent-acp 0.46.0: ``models`` dropped; model is a configOptions - # select alongside ``mode``/``effort``. - return NewSessionResponse( - session_id="sess-claude", - models=None, - config_options=[ - _select("mode", "default", ["default", "acceptEdits"]), - _select( - "model", - "opus[1m]", - ["default", "opus[1m]", "sonnet", "haiku"], - category="model", - ), - _select("effort", "xhigh", ["xhigh", "low"]), - ], - ) - - -def _codex_016_session() -> NewSessionResponse: - # codex-acp 0.16.0: same shape, combined model ids stay in the select. - return NewSessionResponse( - session_id="sess-codex", - models=None, - config_options=[ - _select("mode", "read-only", ["read-only", "full-access"]), - _select( - "model", - "gpt-5.5", - ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"], - category="model", - ), - _select("reasoning_effort", "xhigh", ["xhigh", "low"]), - ], - ) - - -def _gemini_046_session() -> NewSessionResponse: - # gemini-cli 0.46.0: keeps the ``models`` capability, no model configOption. - return NewSessionResponse( - session_id="sess-gemini", - config_options=None, - models=SessionModelState( - current_model_id="gemini-3-flash-preview", - available_models=[ - ModelInfo(model_id=mid, name=mid) - for mid in ( - "auto", - "gemini-3-pro-preview", - "gemini-3-flash-preview", - "gemini-2.5-pro", - "gemini-2.5-flash", - "gemini-3.1-flash-lite", - ) - ], +# Real ``session/new`` wire payloads (by-alias) captured from the pinned CLIs. +# Parsed through the real ``NewSessionResponse`` so the test exercises the +# genuine schema, INCLUDING the ``SessionConfigOption`` RootModel wrapper the +# ``agent-client-protocol`` lib applies to ``config_options`` on 0.8.x (where a +# naive ``opt.id`` read returns ``None``). This is the exact failure the QA bot +# surfaced; the detection helper unwraps ``.root`` so it works on 0.8.x + 0.10.x. +def _select_dict(opt_id, current, values, category=None): + return { + "id": opt_id, + "name": opt_id, + "type": "select", + "category": category, + "currentValue": current, + "options": [{"value": v, "name": v} for v in values], + } + + +_CLAUDE_046_SESSION = { + "sessionId": "sess-claude", + "models": None, + "configOptions": [ + _select_dict("mode", "default", ["default", "acceptEdits"]), + _select_dict( + "model", + "opus[1m]", + ["default", "opus[1m]", "sonnet", "haiku"], + category="model", ), - ) + _select_dict("effort", "xhigh", ["xhigh", "low"]), + ], +} +_CODEX_016_SESSION = { + "sessionId": "sess-codex", + "models": None, + "configOptions": [ + _select_dict("mode", "read-only", ["read-only", "full-access"]), + _select_dict( + "model", + "gpt-5.5", + ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"], + category="model", + ), + _select_dict("reasoning_effort", "xhigh", ["xhigh", "low"]), + ], +} +_GEMINI_046_SESSION = { + "sessionId": "sess-gemini", + "configOptions": None, + "models": { + "currentModelId": "gemini-3-flash-preview", + "availableModels": [ + {"modelId": mid, "name": mid} + for mid in ( + "auto", + "gemini-3-pro-preview", + "gemini-3-flash-preview", + "gemini-2.5-pro", + "gemini-2.5-flash", + "gemini-3.1-flash-lite", + ) + ], + }, +} class TestDetectionAgainstRealSessionResponses: - """Detection + extraction against the real CLI ``session/new`` shapes built - from the genuine ``acp.schema`` types (not a hand-rolled mock). + """Detection + extraction against the real CLI ``session/new`` wire shapes, + parsed through the actual ``acp.schema.NewSessionResponse``. - Guards the exact failure the QA bot hypothesised — that - ``getattr(response, "config_options")`` could be ``None`` due to a field-name - mismatch — by reading the real schema field (``config_options`` / - ``current_value`` / ``model_id``). + Guards the real production bug the QA bot surfaced: on + ``agent-client-protocol`` 0.8.x each ``config_options`` entry is a + ``SessionConfigOption`` RootModel (``.root``), so a naive ``opt.id`` read is + ``None`` and the SDK would fall back to the removed ``session/set_model``. """ def test_config_options_is_the_schema_field_name(self): # The SDK reads ``response.config_options``; pin that this is the real - # field on the schema (the bot hypothesised a name mismatch). + # field on the schema. assert "config_options" in NewSessionResponse.model_fields def test_claude_046_uses_config_option(self): - resp = _claude_046_session() + resp = NewSessionResponse.model_validate(_CLAUDE_046_SESSION) assert _session_selects_model_via_config_option(resp) is True cur, avail = _extract_session_models(resp) assert cur == "opus[1m]" @@ -7317,7 +7321,7 @@ def test_claude_046_uses_config_option(self): assert [m.model_id for m in avail] == ["default", "opus[1m]", "sonnet", "haiku"] def test_codex_016_uses_config_option(self): - resp = _codex_016_session() + resp = NewSessionResponse.model_validate(_CODEX_016_SESSION) assert _session_selects_model_via_config_option(resp) is True cur, avail = _extract_session_models(resp) assert cur == "gpt-5.5" @@ -7325,7 +7329,7 @@ def test_codex_016_uses_config_option(self): assert [m.model_id for m in avail] == ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"] def test_gemini_046_uses_set_session_model(self): - resp = _gemini_046_session() + resp = NewSessionResponse.model_validate(_GEMINI_046_SESSION) assert _session_selects_model_via_config_option(resp) is False cur, avail = _extract_session_models(resp) assert cur == "gemini-3-flash-preview" From b7d5dfc46df23cf11d11be97918e8304ac6167d5 Mon Sep 17 00:00:00 2001 From: Simon Rosenberg Date: Wed, 17 Jun 2026 15:17:28 +0200 Subject: [PATCH 06/14] =?UTF-8?q?chore(acp):=20address=20review=20notes=20?= =?UTF-8?q?=E2=80=94=20warn=20on=20mid-session=20mechanism=20change,=20tri?= =?UTF-8?q?m=20comments?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the code review on #3773: - set_acp_model now logs a WARNING when a runtime switch needs the other model-select mechanism than init detected (the stored _model_via_config_option diverged), in addition to adopting the working one — surfaces the unlikely-but-possible mid-session mechanism change instead of only self-healing silently. - Trim verbose / change-history comments the review flagged: drop the "claude-agent-acp used to be skipped…" narrative from _maybe_set_session_model, and tighten the version-pin and gemini default-mode comments. (The _extract_session_models absent-vs-empty doc is kept — callers rely on that semantic for resume persistence.) No behavior change beyond the added warning log. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../openhands/sdk/agent/acp_agent.py | 48 +++++++++---------- .../openhands/sdk/settings/acp_providers.py | 17 +++---- 2 files changed, 29 insertions(+), 36 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 58c4810e80..ebec1ac68d 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -679,31 +679,19 @@ async def _maybe_set_session_model( ) -> bool: """Apply the *initial* session model right after session creation. - This is the session-creation path only, gated on - :attr:`~openhands.sdk.settings.acp_providers.ACPProviderInfo.supports_set_session_model`. - All built-in providers get a one-shot model-apply call here, via whichever - protocol the session advertised (``via_config_option``): - ``set_config_option(configId="model")`` for configOptions-based servers - (codex-acp 0.16+, claude-agent-acp 0.46+), else ``set_session_model``. - claude-agent-acp used to be skipped on the assumption that it already - received the model via ``new_session()`` ``_meta``, but 0.30.0 silently - ignores that payload (#3654) — the protocol call is the only path that - both validates and applies the model. - - For unknown/custom providers (e.g. Devin CLI), we fall back to the generic - ``set_config_option`` method with configId="model", which is a standard ACP - method that many custom ACP servers support. - - Runtime, mid-conversation switches go through - :meth:`ACPAgent.set_acp_model` instead, which always uses - ``set_session_model`` and is gated on the separate - ``supports_runtime_model_switch`` capability flag. - - Returns ``True`` only when this issued a model-setting call that succeeded — i.e. - the override was actually pushed to the server via *this* path. ``False`` - when there is nothing to apply (no ``acp_model``) or the provider selects - its model another way (``_meta``) or the server rejected the call, so - the caller can tell whether the live session is really running ``acp_model``. + Session-creation path only, gated on + ``ACPProviderInfo.supports_set_session_model``. Built-in providers get a + one-shot model-apply via whichever protocol the session advertised + (``via_config_option``): ``set_config_option(configId="model")`` for + configOptions-based servers (codex-acp 0.16+, claude-agent-acp 0.46+), else + ``set_session_model``. The ``_meta`` model payload is ignored by the pinned + CLIs, so the protocol call is what actually applies the model (#3654). + Unknown/custom providers fall back to ``set_config_option(configId="model")``. + Runtime switches go through :meth:`ACPAgent.set_acp_model`. + + Returns ``True`` only when a model-setting call succeeded (the override + reached the server); ``False`` when there is nothing to apply or the call + was rejected, so the caller knows whether the live session runs ``acp_model``. """ if not acp_model: return False @@ -3720,6 +3708,16 @@ def set_acp_model(self, model: str) -> None: except ACPRequestError as e2: pending_error = e2 else: + # The live session's mechanism diverged from init-time + # detection; warn and adopt the working one for later + # switches. + logger.warning( + "ACP model-select mechanism changed mid-session " + "(init detected via_config_option=%s, switch needed %s); " + "using the latter for subsequent switches", + not flipped, + flipped, + ) self._model_via_config_option = flipped pending_error = None # both selections applied if pending_error is not None: diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py index 687767a522..d14a963130 100644 --- a/openhands-sdk/openhands/sdk/settings/acp_providers.py +++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py @@ -378,11 +378,9 @@ class ACPProviderInfo: # `ACPAgentSettings.resolve_acp_command` runs the pinned `binary_name` instead, # so the `@version` suffix is a no-op there. # -# codex-acp 0.16.0 and claude-agent-acp 0.46.0 dropped the UNSTABLE ``models`` -# capability + ``session/set_model`` in favour of a ``model`` ``configOptions`` -# select driven by ``session/set_config_option``; the SDK auto-detects and uses -# the right call (see ``_session_selects_model_via_config_option`` in -# acp_agent.py), so model switching keeps working across the bump (#3772). +# codex-acp 0.16.0 / claude-agent-acp 0.46.0 select the model via a ``model`` +# ``configOptions`` entry instead of ``session/set_model``; the SDK detects the +# mechanism per session (see ``_session_selects_model_via_config_option``). CLAUDE_AGENT_ACP_VERSION = "0.46.0" CODEX_ACP_VERSION = "0.16.0" GEMINI_CLI_VERSION = "0.46.0" @@ -452,12 +450,9 @@ class ACPProviderInfo: api_key_env_var="GEMINI_API_KEY", base_url_env_var="GEMINI_BASE_URL", # gemini-cli 0.46.0 rejects ``set_session_mode("yolo")`` at session - # init with a JSON-RPC -32603 (it gates yolo on folder-trust that is - # not yet established right after session/new), which would crash - # headless startup. ``default`` is accepted at init, and the SDK's - # ACP bridge already auto-approves every ``session/request_permission`` - # the server sends, so permission prompts never block regardless of - # mode — making ``default`` the headless-safe choice. See #3772. + # init (-32603), which crashes headless startup; ``default`` is + # accepted. The ACP bridge auto-approves every request_permission, so + # prompts never block regardless of mode. See #3772. default_session_mode="default", agent_name_patterns=("gemini-cli",), supports_set_session_model=True, From 87c8a59f267590a5637586eb51330da74b7182c2 Mon Sep 17 00:00:00 2001 From: Simon Rosenberg Date: Wed, 17 Jun 2026 15:32:39 +0200 Subject: [PATCH 07/14] refactor(acp): extract _usable_models / _config_option_to_model helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per the review's DRY note: both branches of _extract_session_models built an ACPModelInfo list and dropped entries without a usable model_id. Factor the shared filter into _usable_models and the configOptions option→ACPModelInfo mapping into _config_option_to_model (degrading a non-string id to "" like ACPModelInfo.from_protocol). No behavior change. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../openhands/sdk/agent/acp_agent.py | 45 ++++++++++--------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index ebec1ac68d..42f476c36f 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -23,7 +23,7 @@ import threading import time import uuid -from collections.abc import Awaitable, Callable, Generator +from collections.abc import Awaitable, Callable, Generator, Iterable from concurrent.futures import Future from pathlib import Path from typing import TYPE_CHECKING, Any, Final, Literal, NamedTuple @@ -492,6 +492,25 @@ async def _apply_acp_model_with_fallback( return not via_config_option +def _usable_models(infos: Iterable[ACPModelInfo]) -> list[ACPModelInfo]: + """Drop entries without a usable ``model_id`` — an empty/missing id is an + invalid picker option and an unusable model-switch target.""" + return [info for info in infos if info.model_id] + + +def _config_option_to_model(opt: Any) -> ACPModelInfo: + """Build an :class:`ACPModelInfo` from a ``configOptions`` select option, + whose ``value`` is the model id. A non-string id degrades to ``""`` (then + dropped by :func:`_usable_models`), mirroring ``ACPModelInfo.from_protocol``. + """ + value = getattr(opt, "value", None) + return ACPModelInfo( + model_id=value if isinstance(value, str) else "", + name=getattr(opt, "name", None), + description=getattr(opt, "description", None), + ) + + def _extract_session_models( response: Any, ) -> tuple[str | None, list[ACPModelInfo] | None]: @@ -523,15 +542,7 @@ def _extract_session_models( current = getattr(models, "current_model_id", None) current = current if isinstance(current, str) and current else None raw = getattr(models, "available_models", None) or [] - # Drop entries without a usable id: an empty/missing ``model_id`` is an - # invalid picker option and an unusable ``set_session_model`` target, so - # we filter it out rather than surfacing ``model_id=""``. - available = [ - info - for info in (ACPModelInfo.from_protocol(m) for m in raw) - if info.model_id - ] - return current, available + return current, _usable_models(ACPModelInfo.from_protocol(m) for m in raw) # configOptions mechanism: the ``model`` select carries the same state, with # ``value`` as the model id (== the ``set_config_option`` target). opt = _model_config_option(response) @@ -539,18 +550,8 @@ def _extract_session_models( return None, None current = getattr(opt, "current_value", None) current = current if isinstance(current, str) and current else None - available = [] - for o in getattr(opt, "options", None) or []: - value = getattr(o, "value", None) - if isinstance(value, str) and value: - available.append( - ACPModelInfo( - model_id=value, - name=getattr(o, "name", None), - description=getattr(o, "description", None), - ) - ) - return current, available + options = getattr(opt, "options", None) or [] + return current, _usable_models(_config_option_to_model(o) for o in options) # The ACP MCP server union accepted by new_session() / load_session(). From d9d7a0afe36851486ee8eeda83c4864441ab5461 Mon Sep 17 00:00:00 2001 From: Simon Rosenberg Date: Wed, 17 Jun 2026 15:40:37 +0200 Subject: [PATCH 08/14] chore(acp): hold claude-agent-acp at 0.44.0 to clear the 7-day supply-chain gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit claude-agent-acp 0.45.0 (2026-06-15) and 0.46.0 (2026-06-16) were published <7 days ago and trip the review bot's supply-chain hold, blocking merge. 0.44.0 (2026-06-09, 8 days old) clears it and is functionally equivalent for this PR: same configOptions model-selection mechanism (verified `models: null`, model config-option `[default, opus[1m], sonnet, haiku]`, default `opus[1m]`) and same registry model set. Re-validated e2e against the 0.44.0 binary: init + bypassPermissions edit turn + live `opus[1m]→sonnet` switch all pass. codex-acp 0.16.0 (2026-06-08) and gemini-cli 0.46.0 (2026-06-10) are already >7 days old, so they stay. Revisit claude → 0.46.0 once it ages past the hold. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../openhands/agent_server/docker/Dockerfile | 2 +- openhands-sdk/openhands/sdk/agent/acp_agent.py | 14 +++++++------- .../openhands/sdk/settings/acp_providers.py | 12 ++++++++---- tests/sdk/test_settings.py | 2 +- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/docker/Dockerfile b/openhands-agent-server/openhands/agent_server/docker/Dockerfile index 8cc51cd9f8..b13a2f219b 100644 --- a/openhands-agent-server/openhands/agent_server/docker/Dockerfile +++ b/openhands-agent-server/openhands/agent_server/docker/Dockerfile @@ -171,7 +171,7 @@ RUN set -ux; \ && "$ACP_NODE_DIR/bin/node" --version; then \ PATH="$ACP_NODE_DIR/bin:$PATH"; \ if "$ACP_NODE_DIR/bin/npm" install -g \ - @agentclientprotocol/claude-agent-acp@0.46.0 \ + @agentclientprotocol/claude-agent-acp@0.44.0 \ @zed-industries/codex-acp@0.16.0 \ @google/gemini-cli@0.46.0; then \ # Create wrappers in /usr/local/bin that prepend ACP's Node 22 to PATH. diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 42f476c36f..2a3621c34e 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -143,7 +143,7 @@ # -32601 = "Method not found" (JSON-RPC spec). An ACP server raises this when a # model-selection call uses the mechanism it does *not* implement — e.g. # `session/set_model` on a CLI that moved model selection to `configOptions` -# (codex-acp 0.16+, claude-agent-acp 0.46+), or vice versa. We use it to fall +# (codex-acp 0.16+, claude-agent-acp 0.44+), or vice versa. We use it to fall # back to the other mechanism if response-detection picked the wrong one. _METHOD_NOT_FOUND_CODE: Final[int] = -32601 @@ -380,7 +380,7 @@ def _write_secret_file(path: Path, value: str) -> None: # Session config-option id that selects the model on ACP servers that drive # model selection through ``configOptions`` / ``session/set_config_option`` -# (codex-acp 0.16+, claude-agent-acp 0.46+) rather than the UNSTABLE ``models`` +# (codex-acp 0.16+, claude-agent-acp 0.44+) rather than the UNSTABLE ``models`` # capability + ``session/set_model`` (gemini-cli, older codex/claude). _MODEL_CONFIG_OPTION_ID = "model" @@ -436,7 +436,7 @@ def _apply_acp_model( """Build the awaitable that applies ``model`` to a live ACP session via whichever protocol the session advertised: ``set_config_option(configId= "model")`` for configOptions-based servers (codex-acp 0.16+, claude-agent-acp - 0.46+), else ``set_session_model``. + 0.44+), else ``set_session_model``. Returns the (eagerly constructed) coroutine so callers can either ``await`` it directly or hand it to ``run_async`` — the underlying ``conn`` method is @@ -521,7 +521,7 @@ def _extract_session_models( type at this boundary so nothing downstream depends on the vendored ``acp.schema`` shape. Reads whichever mechanism the session advertised: the UNSTABLE ``models`` capability, or the ``model`` ``configOptions`` select - (codex-acp 0.16+, claude-agent-acp 0.46+). + (codex-acp 0.16+, claude-agent-acp 0.44+). The second element distinguishes **absent** from **empty** — this matters for resume persistence (preserve the last-known list when the server didn't @@ -684,7 +684,7 @@ async def _maybe_set_session_model( ``ACPProviderInfo.supports_set_session_model``. Built-in providers get a one-shot model-apply via whichever protocol the session advertised (``via_config_option``): ``set_config_option(configId="model")`` for - configOptions-based servers (codex-acp 0.16+, claude-agent-acp 0.46+), else + configOptions-based servers (codex-acp 0.16+, claude-agent-acp 0.44+), else ``set_session_model``. The ``_meta`` model payload is ignored by the pinned CLIs, so the protocol call is what actually applies the model (#3654). Unknown/custom providers fall back to ``set_config_option(configId="model")``. @@ -770,7 +770,7 @@ async def _reapply_session_model_on_resume( try: if provider is not None: # Known provider: apply via the mechanism the resumed session uses - # (set_config_option for codex-acp 0.16+/claude-agent-acp 0.46+, + # (set_config_option for codex-acp 0.16+/claude-agent-acp 0.44+, # else set_session_model). A rejection is already tolerated below # (the session keeps the server default), so resume doesn't need the # cross-mechanism fallback the init/switch paths use. @@ -1627,7 +1627,7 @@ def model_post_init(self, __context: object) -> None: ) # ACP server version from InitializeResponse # Which protocol this session uses to select the model: ``True`` ⇒ # ``session/set_config_option(configId="model")`` (codex-acp 0.16+, - # claude-agent-acp 0.46+); ``False`` ⇒ ``session/set_model`` (gemini-cli and + # claude-agent-acp 0.44+); ``False`` ⇒ ``session/set_model`` (gemini-cli and # older codex/claude). Detected from the session/new (or load_session) # response at init and reused by runtime ``set_acp_model`` switches. _model_via_config_option: bool = PrivateAttr(default=False) diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py index d14a963130..dfb60d2e74 100644 --- a/openhands-sdk/openhands/sdk/settings/acp_providers.py +++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py @@ -294,7 +294,7 @@ class ACPProviderInfo: # --------------------------------------------------------------------------- # Model IDs the Claude Code CLI accepts, mirroring the ``model`` configOptions -# select claude-agent-acp 0.46.0 reports at ``session/new`` (the short aliases +# select claude-agent-acp 0.44.0 reports at ``session/new`` (the short aliases # the CLI's own ``/model`` menu offers, switched via ``set_config_option``). # ``opus[1m]`` is the SDK-documented version-agnostic 1M-context alias and the # CLI's own default (``currentValue``); ``default`` is the CLI's recommended @@ -378,10 +378,14 @@ class ACPProviderInfo: # `ACPAgentSettings.resolve_acp_command` runs the pinned `binary_name` instead, # so the `@version` suffix is a no-op there. # -# codex-acp 0.16.0 / claude-agent-acp 0.46.0 select the model via a ``model`` +# codex-acp 0.16.0 / claude-agent-acp 0.44.0 select the model via a ``model`` # ``configOptions`` entry instead of ``session/set_model``; the SDK detects the # mechanism per session (see ``_session_selects_model_via_config_option``). -CLAUDE_AGENT_ACP_VERSION = "0.46.0" +# +# claude-agent-acp is held at 0.44.0 (0.45.0/0.46.0 published <7 days ago and +# trip the supply-chain hold) — same configOptions mechanism + model set as +# 0.46.0; revisit once a newer release ages past the hold. +CLAUDE_AGENT_ACP_VERSION = "0.44.0" CODEX_ACP_VERSION = "0.16.0" GEMINI_CLI_VERSION = "0.46.0" @@ -403,7 +407,7 @@ class ACPProviderInfo: # claude-agent-acp ignores the session-_meta model selection (the # requested model only becomes a picker option; the session keeps # running its default), so the init path must push the model via a - # protocol call (#3654). On 0.46.0 that call is + # protocol call (#3654). On 0.44.0+ that call is # ``set_config_option(configId="model")`` rather than # ``set_session_model`` (auto-detected from session/new); the _meta # payload (session_meta_key below) is still sent — harmless, and diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py index 3f43ebcff4..546806d7ce 100644 --- a/tests/sdk/test_settings.py +++ b/tests/sdk/test_settings.py @@ -883,7 +883,7 @@ def test_acp_create_agent_uses_server_default_command( assert agent.acp_command == [ "npx", "-y", - "@agentclientprotocol/claude-agent-acp@0.46.0", + "@agentclientprotocol/claude-agent-acp@0.44.0", ] assert agent.acp_model == "claude-opus-4-6" # The authoritative provider key is carried onto the agent. From 8b88ad206fe6f0f2a683e471b2aabede07be0899 Mon Sep 17 00:00:00 2001 From: Simon Rosenberg Date: Wed, 17 Jun 2026 17:53:43 +0200 Subject: [PATCH 09/14] =?UTF-8?q?fix(acp):=20hold=20codex-acp=20at=200.15.?= =?UTF-8?q?0=20=E2=80=94=200.16.0=20regresses=20model-switch=20turns?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Deeper e2e validation found codex-acp 0.16.0 breaks the turn *after* a model switch: the switch applies (set_config_option, current_model_id tracks) but the next prompt fails with JSON-RPC -32603. Confirmed apples-to-apples against the ChatGPT-subscription backend — same SDK/auth/machine/model, only the codex version differs: codex 0.15.0 (set_session_model): switch gpt-5.5/high -> turn OK codex 0.16.0 (set_config_option): switch gpt-5.5/{xhigh,high,medium}, gpt-5.4-mini/low -> turn -32603 (6+ runs) Bumping would regress ACP model switching for codex (the #3763/#3764 path), so hold at 0.15.0 (its set_session_model switch + turn works fully). Keep the configOptions detection/apply code — claude-agent-acp 0.44 needs it, and it already handles codex 0.16 if/when codex fixes the set_config_option turn path (detection is per-session, no further change needed to un-hold). claude stays at 0.44.0, gemini at 0.46.0. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../openhands/agent_server/docker/Dockerfile | 2 +- .../openhands/sdk/settings/acp_providers.py | 14 +++++++++++--- tests/sdk/test_settings.py | 4 ++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/docker/Dockerfile b/openhands-agent-server/openhands/agent_server/docker/Dockerfile index b13a2f219b..fa6160cdef 100644 --- a/openhands-agent-server/openhands/agent_server/docker/Dockerfile +++ b/openhands-agent-server/openhands/agent_server/docker/Dockerfile @@ -172,7 +172,7 @@ RUN set -ux; \ PATH="$ACP_NODE_DIR/bin:$PATH"; \ if "$ACP_NODE_DIR/bin/npm" install -g \ @agentclientprotocol/claude-agent-acp@0.44.0 \ - @zed-industries/codex-acp@0.16.0 \ + @zed-industries/codex-acp@0.15.0 \ @google/gemini-cli@0.46.0; then \ # Create wrappers in /usr/local/bin that prepend ACP's Node 22 to PATH. # This ensures the ACP binary's #!/usr/bin/env node shebang resolves diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py index dfb60d2e74..7be85369b6 100644 --- a/openhands-sdk/openhands/sdk/settings/acp_providers.py +++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py @@ -378,15 +378,23 @@ class ACPProviderInfo: # `ACPAgentSettings.resolve_acp_command` runs the pinned `binary_name` instead, # so the `@version` suffix is a no-op there. # -# codex-acp 0.16.0 / claude-agent-acp 0.44.0 select the model via a ``model`` +# claude-agent-acp 0.44+ (and codex-acp 0.16+) select the model via a ``model`` # ``configOptions`` entry instead of ``session/set_model``; the SDK detects the -# mechanism per session (see ``_session_selects_model_via_config_option``). +# mechanism per session (see ``_session_selects_model_via_config_option``) and +# applies it accordingly, so model switching works on either mechanism. # # claude-agent-acp is held at 0.44.0 (0.45.0/0.46.0 published <7 days ago and # trip the supply-chain hold) — same configOptions mechanism + model set as # 0.46.0; revisit once a newer release ages past the hold. +# +# codex-acp is held at 0.15.0: 0.16.0 moved to the configOptions mechanism, but +# a real e2e showed that applying a model via ``set_config_option`` then running +# a turn fails with JSON-RPC -32603 (verified against the ChatGPT-subscription +# backend; 0.15.0's ``set_session_model`` switch + turn works). Bumping would +# regress ACP model switching for codex (the #3763/#3764 path), so hold until +# codex-acp fixes the 0.16 set_config_option turn path. See issue #3772. CLAUDE_AGENT_ACP_VERSION = "0.44.0" -CODEX_ACP_VERSION = "0.16.0" +CODEX_ACP_VERSION = "0.15.0" GEMINI_CLI_VERSION = "0.46.0" diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py index 546806d7ce..944ee2d1fe 100644 --- a/tests/sdk/test_settings.py +++ b/tests/sdk/test_settings.py @@ -1034,7 +1034,7 @@ def test_acp_resolve_command_rewrites_versioned_npx_to_pinned_binary( monkeypatch.setattr(shutil, "which", _which_returning("codex-acp")) for pkg in ( "@zed-industries/codex-acp", - "@zed-industries/codex-acp@0.16.0", + "@zed-industries/codex-acp@0.15.0", "@zed-industries/codex-acp@0.11.1", ): settings = ACPAgentSettings( @@ -1057,7 +1057,7 @@ def test_acp_resolve_command_keeps_npx_when_binary_absent( assert settings.resolve_acp_command() == [ "npx", "-y", - "@zed-industries/codex-acp@0.16.0", + "@zed-industries/codex-acp@0.15.0", ] From 8efa65c176b7648441a4af71ad5ccce427d8e670 Mon Sep 17 00:00:00 2001 From: Simon Rosenberg Date: Wed, 17 Jun 2026 20:03:18 +0200 Subject: [PATCH 10/14] feat(acp): bump codex-acp to 0.16.0 with model/effort split (fixes switch-turn -32603) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root-caused the codex 0.16 "switch applies but next turn -32603s" by reading the codex-acp Rust source (v0.15 vs v0.16): not a codex regression — a redesign. v0.15 `set_session_model` ran `parse_model_id` (`split_once('/')`) so the combined `gpt-5.5/medium` id was split server-side. v0.16 moved model selection to `configOptions`: `handle_set_config_model` looks up a **bare preset id** (`gpt-5.5`), with reasoning effort as a **separate** `reasoning_effort` option; an unknown id (the combined `gpt-5.5/medium`) is taken literally → the backend 400s ("model is not supported"). The SDK was still sending the combined id. Fix: when applying via the configOptions mechanism, split a `/` id (`_split_codex_model_effort`) and set `config_id="model"` (bare) then `config_id="reasoning_effort"` (separate). Bare ids (claude `opus[1m]`/`sonnet`, gemini `auto`) are unaffected. `_CODEX_MODELS` keeps its combined ids. Re-validated e2e against the codex-acp 0.16.0 binary: pre-session deferred switch, mid-conversation live switch, and resume + reapply all apply the model AND complete the following turn (no -32603). claude 0.44 / gemini 0.46 unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../openhands/agent_server/docker/Dockerfile | 2 +- .../openhands/sdk/agent/acp_agent.py | 60 +++++++--- .../openhands/sdk/settings/acp_providers.py | 14 +-- tests/sdk/agent/test_acp_agent.py | 109 ++++++++++++++---- tests/sdk/test_settings.py | 4 +- 5 files changed, 142 insertions(+), 47 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/docker/Dockerfile b/openhands-agent-server/openhands/agent_server/docker/Dockerfile index fa6160cdef..b13a2f219b 100644 --- a/openhands-agent-server/openhands/agent_server/docker/Dockerfile +++ b/openhands-agent-server/openhands/agent_server/docker/Dockerfile @@ -172,7 +172,7 @@ RUN set -ux; \ PATH="$ACP_NODE_DIR/bin:$PATH"; \ if "$ACP_NODE_DIR/bin/npm" install -g \ @agentclientprotocol/claude-agent-acp@0.44.0 \ - @zed-industries/codex-acp@0.15.0 \ + @zed-industries/codex-acp@0.16.0 \ @google/gemini-cli@0.46.0; then \ # Create wrappers in /usr/local/bin that prepend ACP's Node 22 to PATH. # This ensures the ACP binary's #!/usr/bin/env node shebang resolves diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 2a3621c34e..2c7ed0eaf2 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -383,6 +383,9 @@ def _write_secret_file(path: Path, value: str) -> None: # (codex-acp 0.16+, claude-agent-acp 0.44+) rather than the UNSTABLE ``models`` # capability + ``session/set_model`` (gemini-cli, older codex/claude). _MODEL_CONFIG_OPTION_ID = "model" +# Companion config-option id codex-acp 0.16 uses for the reasoning-effort tier +# (the 0.15 combined ``/`` id is split across the two on 0.16). +_REASONING_EFFORT_CONFIG_OPTION_ID = "reasoning_effort" def _model_config_option(response: Any) -> Any | None: @@ -426,28 +429,55 @@ def _session_selects_model_via_config_option(response: Any) -> bool: return _model_config_option(response) is not None -def _apply_acp_model( +def _split_codex_model_effort(model: str) -> tuple[str, str | None]: + """Split a codex-style ``/`` id into ``(model, effort)``. + + codex-acp's ``models`` capability (0.15) used a combined id like + ``gpt-5.5/medium`` that the server parsed itself. Its 0.16 ``model`` + ``configOptions`` select instead wants a **bare preset id** (``gpt-5.5``) + with the reasoning effort set via a separate ``reasoning_effort`` option — + an unknown combined id is taken literally and rejected by the backend + (``"" model is not supported``). So when applying via configOptions we + split on ``/``. Bare ids (claude ``opus[1m]``/``sonnet``, gemini ``auto``, + codex without an effort suffix) return ``(model, None)`` unchanged. + """ + base, sep, effort = model.partition("/") + if sep and effort: + return base, effort + return model, None + + +async def _apply_acp_model( conn: ClientSideConnection, session_id: str, model: str, *, via_config_option: bool, -) -> Awaitable[Any]: - """Build the awaitable that applies ``model`` to a live ACP session via - whichever protocol the session advertised: ``set_config_option(configId= - "model")`` for configOptions-based servers (codex-acp 0.16+, claude-agent-acp - 0.44+), else ``set_session_model``. - - Returns the (eagerly constructed) coroutine so callers can either ``await`` - it directly or hand it to ``run_async`` — the underlying ``conn`` method is - invoked now, mirroring the previous inline ``conn.set_session_model(...)`` - call site. +) -> None: + """Apply ``model`` to a live ACP session via whichever protocol the session + advertised: ``set_config_option(configId="model")`` for configOptions-based + servers (codex-acp 0.16+, claude-agent-acp 0.44+), else ``set_session_model``. + + For the configOptions mechanism a codex ``/`` id is split into + a bare ``model`` plus a separate ``reasoning_effort`` option (see + :func:`_split_codex_model_effort`); other ids apply as a single ``model`` + selection. """ - if via_config_option: - return conn.set_config_option( - config_id=_MODEL_CONFIG_OPTION_ID, value=model, session_id=session_id + if not via_config_option: + await conn.set_session_model(model_id=model, session_id=session_id) + return + base, effort = _split_codex_model_effort(model) + await conn.set_config_option( + config_id=_MODEL_CONFIG_OPTION_ID, value=base, session_id=session_id + ) + if effort is not None: + # Order matters: reasoning_effort is only valid once the model is a known + # preset, which the call above just set. + await conn.set_config_option( + config_id=_REASONING_EFFORT_CONFIG_OPTION_ID, + value=effort, + session_id=session_id, ) - return conn.set_session_model(model_id=model, session_id=session_id) def _is_method_not_found(exc: ACPRequestError) -> bool: diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py index 7be85369b6..a98b1a399e 100644 --- a/openhands-sdk/openhands/sdk/settings/acp_providers.py +++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py @@ -387,14 +387,14 @@ class ACPProviderInfo: # trip the supply-chain hold) — same configOptions mechanism + model set as # 0.46.0; revisit once a newer release ages past the hold. # -# codex-acp is held at 0.15.0: 0.16.0 moved to the configOptions mechanism, but -# a real e2e showed that applying a model via ``set_config_option`` then running -# a turn fails with JSON-RPC -32603 (verified against the ChatGPT-subscription -# backend; 0.15.0's ``set_session_model`` switch + turn works). Bumping would -# regress ACP model switching for codex (the #3763/#3764 path), so hold until -# codex-acp fixes the 0.16 set_config_option turn path. See issue #3772. +# codex-acp 0.16 split the 0.15 combined ``/`` id into a bare +# ``model`` preset id + a separate ``reasoning_effort`` config option; sending +# the combined id to ``set_config_option(model)`` is taken literally and 400s +# at the backend. The apply path splits accordingly (see +# ``_split_codex_model_effort`` in acp_agent.py), so ``_CODEX_MODELS`` keeps the +# combined ids and switching works on 0.16. See issue #3772. CLAUDE_AGENT_ACP_VERSION = "0.44.0" -CODEX_ACP_VERSION = "0.15.0" +CODEX_ACP_VERSION = "0.16.0" GEMINI_CLI_VERSION = "0.46.0" diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 1119e322bf..3d866b5a2a 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -11,7 +11,7 @@ from pathlib import Path from types import SimpleNamespace from typing import Any -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import AsyncMock, MagicMock, call, patch import pytest from acp.exceptions import RequestError as ACPRequestError @@ -20,6 +20,7 @@ from openhands.sdk.agent.acp_agent import ( ACPAgent, + _apply_acp_model, _apply_acp_model_with_fallback, _classify_acp_init_error, _codex_auth_file, @@ -37,6 +38,7 @@ _select_auth_method, _serialize_tool_content, _session_selects_model_via_config_option, + _split_codex_model_effort, _strip_inherited_npm_env, ) from openhands.sdk.agent.acp_models import ACPModelInfo @@ -4532,14 +4534,16 @@ async def test_reapply_via_set_session_model_mechanism(self): @pytest.mark.asyncio async def test_reapply_via_set_config_option_mechanism(self): # codex-acp 0.16+/claude-agent-acp 0.46+ reapply via - # ``set_config_option(configId="model")``. + # ``set_config_option(configId="model")``; a codex combined id splits + # into model + reasoning_effort. conn = AsyncMock() applied = await _reapply_session_model_on_resume( conn, "codex-acp", "sess-1", "gpt-5.4/low", via_config_option=True ) - conn.set_config_option.assert_awaited_once_with( - config_id="model", value="gpt-5.4/low", session_id="sess-1" - ) + assert conn.set_config_option.await_args_list == [ + call(config_id="model", value="gpt-5.4", session_id="sess-1"), + call(config_id="reasoning_effort", value="low", session_id="sess-1"), + ] conn.set_session_model.assert_not_called() assert applied is True @@ -4678,20 +4682,33 @@ class TestSetACPModel: def _wire( agent: ACPAgent, agent_name: str, *, via_config_option: bool = False ) -> ACPAgent: - agent._conn = MagicMock() + conn = MagicMock() + conn.set_session_model = AsyncMock() + conn.set_config_option = AsyncMock() + agent._conn = conn agent._session_id = "sess-1" agent._agent_name = agent_name agent._model_via_config_option = via_config_option executor = MagicMock() - executor.run_async = MagicMock() + + # run_async actually drives the (async) apply coroutine to completion so + # the AsyncMock conn calls fire — exercising the real apply/split path. + def _run(coro: Any, timeout: Any = None) -> Any: + loop = asyncio.new_event_loop() + try: + return loop.run_until_complete(coro) + finally: + loop.close() + + executor.run_async = MagicMock(side_effect=_run) agent._executor = executor return agent def test_switches_model_on_live_codex_session(self): - # Default (models-capability) session ⇒ set_session_model. + # Default (models-capability) session ⇒ set_session_model (combined id). agent = self._wire(_make_agent(), "codex-acp") agent.set_acp_model("gpt-5.4/low") - agent._conn.set_session_model.assert_called_once_with( + agent._conn.set_session_model.assert_awaited_once_with( model_id="gpt-5.4/low", session_id="sess-1" ) agent._conn.set_config_option.assert_not_called() @@ -4700,31 +4717,41 @@ def test_switches_model_on_live_codex_session(self): assert agent.llm.model == "gpt-5.4/low" assert agent.llm.metrics.model_name == "gpt-5.4/low" - def test_switches_model_via_config_option_session(self): - # configOptions session (codex-acp 0.16+, claude-agent-acp 0.46+) ⇒ - # set_config_option(configId="model"). + def test_switches_codex_via_config_option_splits_model_and_effort(self): + # codex-acp 0.16 configOptions: the combined id is split into a bare + # `model` preset + a separate `reasoning_effort` option. agent = self._wire(_make_agent(), "codex-acp", via_config_option=True) agent.set_acp_model("gpt-5.4/low") - agent._conn.set_config_option.assert_called_once_with( - config_id="model", value="gpt-5.4/low", session_id="sess-1" - ) + assert agent._conn.set_config_option.await_args_list == [ + call(config_id="model", value="gpt-5.4", session_id="sess-1"), + call(config_id="reasoning_effort", value="low", session_id="sess-1"), + ] agent._conn.set_session_model.assert_not_called() - agent._executor.run_async.assert_called_once() + # The agent's surfaced/persisted model stays the combined registry id. assert agent.llm.model == "gpt-5.4/low" assert agent._current_model_id == "gpt-5.4/low" + def test_switches_claude_via_config_option_single_call(self): + # A bare id (no `/`) applies as a single `model` selection — no effort. + agent = self._wire(_make_agent(), "claude-agent-acp", via_config_option=True) + agent.set_acp_model("sonnet") + agent._conn.set_config_option.assert_awaited_once_with( + config_id="model", value="sonnet", session_id="sess-1" + ) + assert agent._current_model_id == "sonnet" + def test_switch_falls_back_on_method_not_found(self): # Detected set_session_model, but the live server only has the # configOptions mechanism: the first apply raises -32601, the switch # retries with the other call and remembers it. agent = self._wire(_make_agent(), "codex-acp", via_config_option=False) - agent._executor.run_async.side_effect = [ - ACPRequestError(code=-32601, message="Method not found"), - None, # retry with the flipped mechanism succeeds - ] + agent._conn.set_session_model.side_effect = ACPRequestError( + code=-32601, message="Method not found" + ) agent.set_acp_model("gpt-5.4/low") assert agent._executor.run_async.call_count == 2 - # The working mechanism is remembered for later switches. + # The flipped mechanism applied (split into model + reasoning_effort). + assert agent._conn.set_config_option.call_count == 2 assert agent._model_via_config_option is True assert agent.llm.model == "gpt-5.4/low" assert agent._current_model_id == "gpt-5.4/low" @@ -4733,7 +4760,7 @@ def test_switch_invalid_model_does_not_fall_back(self): # A -32602 invalid-params is a real client error, not a wrong-mechanism # signal: surface it as ValueError without a second call. agent = self._wire(_make_agent(), "codex-acp", via_config_option=True) - agent._executor.run_async.side_effect = ACPRequestError( + agent._conn.set_config_option.side_effect = ACPRequestError( code=-32602, message="Invalid params" ) with pytest.raises(ValueError, match="rejected set_config_option"): @@ -7408,6 +7435,44 @@ async def test_non_method_not_found_error_propagates(self): conn.set_session_model.assert_not_called() +class TestCodexModelEffortSplit: + """codex-acp 0.16 splits the 0.15 combined ``/`` id into a + bare ``model`` preset + a separate ``reasoning_effort`` configOption.""" + + def test_splits_combined_id(self): + assert _split_codex_model_effort("gpt-5.5/medium") == ("gpt-5.5", "medium") + assert _split_codex_model_effort("gpt-5.4-mini/xhigh") == ( + "gpt-5.4-mini", + "xhigh", + ) + + def test_bare_ids_unchanged(self): + # claude / gemini / codex-without-effort ids have no ``/``. + assert _split_codex_model_effort("opus[1m]") == ("opus[1m]", None) + assert _split_codex_model_effort("sonnet") == ("sonnet", None) + assert _split_codex_model_effort("auto") == ("auto", None) + assert _split_codex_model_effort("gpt-5.5") == ("gpt-5.5", None) + + @pytest.mark.asyncio + async def test_apply_splits_into_two_config_options(self): + # The configOptions apply path sets model (bare) then reasoning_effort. + conn = AsyncMock() + await _apply_acp_model(conn, "sess-1", "gpt-5.5/medium", via_config_option=True) + assert conn.set_config_option.await_args_list == [ + call(config_id="model", value="gpt-5.5", session_id="sess-1"), + call(config_id="reasoning_effort", value="medium", session_id="sess-1"), + ] + conn.set_session_model.assert_not_called() + + @pytest.mark.asyncio + async def test_apply_bare_config_option_single_call(self): + conn = AsyncMock() + await _apply_acp_model(conn, "sess-1", "opus[1m]", via_config_option=True) + conn.set_config_option.assert_awaited_once_with( + config_id="model", value="opus[1m]", session_id="sess-1" + ) + + class TestACPAgentAvailableModelsProperty: """``available_models`` exposes the server's model list verbatim. diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py index 944ee2d1fe..546806d7ce 100644 --- a/tests/sdk/test_settings.py +++ b/tests/sdk/test_settings.py @@ -1034,7 +1034,7 @@ def test_acp_resolve_command_rewrites_versioned_npx_to_pinned_binary( monkeypatch.setattr(shutil, "which", _which_returning("codex-acp")) for pkg in ( "@zed-industries/codex-acp", - "@zed-industries/codex-acp@0.15.0", + "@zed-industries/codex-acp@0.16.0", "@zed-industries/codex-acp@0.11.1", ): settings = ACPAgentSettings( @@ -1057,7 +1057,7 @@ def test_acp_resolve_command_keeps_npx_when_binary_absent( assert settings.resolve_acp_command() == [ "npx", "-y", - "@zed-industries/codex-acp@0.15.0", + "@zed-industries/codex-acp@0.16.0", ] From 5ce5dbbdf9d1f3371746e8b0d5bbb6ab9ffad8a3 Mon Sep 17 00:00:00 2001 From: Simon Rosenberg Date: Wed, 17 Jun 2026 21:31:04 +0200 Subject: [PATCH 11/14] refactor(acp): drop model-select fallback, unify the apply path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Detection from the session/new response is deterministic and was correct for every validated CLI, so the cross-mechanism -32601 fallback was speculative — and the duplicate apply paths it created caused two latent bugs. Collapse init/resume/switch onto a single _apply_acp_model primitive: - remove _apply_acp_model_with_fallback / _is_method_not_found / the -32601 constant; a real rejection now surfaces (ValueError -> 400, or tolerated on resume) instead of silently flipping mechanism - fold _session_selects_model_via_config_option into _extract_session_models, which now returns (current, available, via_config_option) in one scan - replace _config_option_to_model with ACPModelInfo.from_protocol(id_attr=) - unknown/custom providers apply via the detected mechanism, not a hardcoded set_config_option guess Fixes: - resume persists acp_model_via_config_option and uses it as the detection default when load_session omits the model block, so a config-option session reapplies via the right call across pod recycle - set_acp_model's error message names the mechanism actually used - codex reasoning_effort is best-effort: a rejected effort keeps the applied model rather than failing the switch with a stale llm.model - drop false-confidence run_async stubs that never awaited the apply coroutine; drive the error path through the real conn Re-validated e2e against the pinned binaries (claude 0.44 / codex 0.16 / gemini 0.46): pre-run model, mid-conversation switch, and real turns all pass. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../openhands/sdk/agent/acp_agent.py | 355 +++++++----------- .../openhands/sdk/agent/acp_models.py | 8 +- .../openhands/sdk/settings/acp_providers.py | 19 +- tests/sdk/agent/test_acp_agent.py | 257 +++++++------ 4 files changed, 270 insertions(+), 369 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 2c7ed0eaf2..1005d7d173 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -140,13 +140,6 @@ # upstream model 500s, and transient infrastructure errors. _RETRIABLE_SERVER_ERROR_CODES: frozenset[int] = frozenset({-32603}) -# -32601 = "Method not found" (JSON-RPC spec). An ACP server raises this when a -# model-selection call uses the mechanism it does *not* implement — e.g. -# `session/set_model` on a CLI that moved model selection to `configOptions` -# (codex-acp 0.16+, claude-agent-acp 0.44+), or vice versa. We use it to fall -# back to the other mechanism if response-detection picked the wrong one. -_METHOD_NOT_FOUND_CODE: Final[int] = -32601 - # Maximum characters for ACP tool call content — matches MAX_CMD_OUTPUT_SIZE # used by the terminal tool and the default max_message_chars in LLM config. @@ -413,22 +406,6 @@ def _model_config_option(response: Any) -> Any | None: return None -def _session_selects_model_via_config_option(response: Any) -> bool: - """Whether this session drives model selection through - ``session/set_config_option`` (the ``model`` config-option select) rather - than ``session/set_model``. - - The ``models`` capability wins when present (gemini-cli 0.46 and older - codex/claude still carry it); only in its absence does a ``model`` config - option become the selection mechanism. - """ - if response is None: - return False - if getattr(response, "models", None) is not None: - return False - return _model_config_option(response) is not None - - def _split_codex_model_effort(model: str) -> tuple[str, str | None]: """Split a codex-style ``/`` id into ``(model, effort)``. @@ -454,14 +431,15 @@ async def _apply_acp_model( *, via_config_option: bool, ) -> None: - """Apply ``model`` to a live ACP session via whichever protocol the session + """Apply ``model`` to a live ACP session via the mechanism the session advertised: ``set_config_option(configId="model")`` for configOptions-based servers (codex-acp 0.16+, claude-agent-acp 0.44+), else ``set_session_model``. - For the configOptions mechanism a codex ``/`` id is split into - a bare ``model`` plus a separate ``reasoning_effort`` option (see - :func:`_split_codex_model_effort`); other ids apply as a single ``model`` - selection. + A codex ``/`` id is split into a bare ``model`` plus a + separate ``reasoning_effort`` option (see :func:`_split_codex_model_effort`). + The model selection is the switch; the effort is a secondary refinement, so + a rejected effort is logged rather than raised — the live session keeps the + applied model instead of reporting the whole switch failed (#3772). """ if not via_config_option: await conn.set_session_model(model_id=model, session_id=session_id) @@ -471,55 +449,22 @@ async def _apply_acp_model( config_id=_MODEL_CONFIG_OPTION_ID, value=base, session_id=session_id ) if effort is not None: - # Order matters: reasoning_effort is only valid once the model is a known - # preset, which the call above just set. - await conn.set_config_option( - config_id=_REASONING_EFFORT_CONFIG_OPTION_ID, - value=effort, - session_id=session_id, - ) - - -def _is_method_not_found(exc: ACPRequestError) -> bool: - """Whether ``exc`` is a JSON-RPC "method not found" — i.e. the server does - not implement the model-selection call we used.""" - return exc.code == _METHOD_NOT_FOUND_CODE - - -async def _apply_acp_model_with_fallback( - conn: ClientSideConnection, - session_id: str, - model: str, - *, - via_config_option: bool, -) -> bool: - """Apply ``model`` via the detected mechanism, falling back to the other if - the server reports the method missing. - - Response-detection (``_session_selects_model_via_config_option``) is correct - for every CLI we've validated, but it reads an UNSTABLE capability and the - response shape can vary by build/auth state. If the chosen call raises - ``-32601 "Method not found"``, the server simply uses the *other* mechanism, - so we retry with it instead of crashing session init. Returns the - ``via_config_option`` value that actually applied the model. - """ - try: - await _apply_acp_model( - conn, session_id, model, via_config_option=via_config_option - ) - return via_config_option - except ACPRequestError as exc: - if not _is_method_not_found(exc): - raise - logger.info( - "ACP model-apply via %s rejected as method-not-found; retrying via %s", - "set_config_option" if via_config_option else "set_session_model", - "set_session_model" if via_config_option else "set_config_option", - ) - await _apply_acp_model( - conn, session_id, model, via_config_option=not via_config_option - ) - return not via_config_option + # reasoning_effort is only valid once the model is a known preset, which + # the call above just set. Best-effort: keep the applied model on failure. + try: + await conn.set_config_option( + config_id=_REASONING_EFFORT_CONFIG_OPTION_ID, + value=effort, + session_id=session_id, + ) + except ACPRequestError as exc: + logger.warning( + "ACP reasoning_effort=%r rejected after model=%r applied (%s); " + "keeping the model on the server default effort", + effort, + base, + exc, + ) def _usable_models(infos: Iterable[ACPModelInfo]) -> list[ACPModelInfo]: @@ -528,32 +473,21 @@ def _usable_models(infos: Iterable[ACPModelInfo]) -> list[ACPModelInfo]: return [info for info in infos if info.model_id] -def _config_option_to_model(opt: Any) -> ACPModelInfo: - """Build an :class:`ACPModelInfo` from a ``configOptions`` select option, - whose ``value`` is the model id. A non-string id degrades to ``""`` (then - dropped by :func:`_usable_models`), mirroring ``ACPModelInfo.from_protocol``. - """ - value = getattr(opt, "value", None) - return ACPModelInfo( - model_id=value if isinstance(value, str) else "", - name=getattr(opt, "name", None), - description=getattr(opt, "description", None), - ) - - def _extract_session_models( response: Any, -) -> tuple[str | None, list[ACPModelInfo] | None]: - """Extract the model state off a session response. - - Returns a ``(current_model_id, available_models)`` pair, both best-effort. - ``available_models`` is normalized into our own stable :class:`ACPModelInfo` - type at this boundary so nothing downstream depends on the vendored - ``acp.schema`` shape. Reads whichever mechanism the session advertised: the - UNSTABLE ``models`` capability, or the ``model`` ``configOptions`` select - (codex-acp 0.16+, claude-agent-acp 0.44+). - - The second element distinguishes **absent** from **empty** — this matters + *, + default_via_config_option: bool = False, +) -> tuple[str | None, list[ACPModelInfo] | None, bool]: + """Extract the model state off a session response in a single scan. + + Returns ``(current_model_id, available_models, via_config_option)``, all + best-effort. ``available_models`` is normalized into our own stable + :class:`ACPModelInfo` type at this boundary so nothing downstream depends on + the vendored ``acp.schema`` shape. Reads whichever mechanism the session + advertised: the UNSTABLE ``models`` capability, or the ``model`` + ``configOptions`` select (codex-acp 0.16+, claude-agent-acp 0.44+). + + ``available_models`` distinguishes **absent** from **empty** — this matters for resume persistence (preserve the last-known list when the server didn't report one; clear it when the server explicitly says it has none): @@ -562,26 +496,37 @@ def _extract_session_models( - ``[]`` — the server reported a mechanism but offers no (usable) models. - ``[...]`` — the reported models, minus any with an unusable ``model_id``. + ``via_config_option`` is the selection mechanism: ``True`` for the + configOptions select, ``False`` for the ``models`` capability, and + ``default_via_config_option`` when the response carries neither — callers on + the resume path pass the persisted mechanism hint so a config-option session + whose ``load_session`` omits the model block still reapplies via the right + call instead of defaulting to ``set_session_model``. + ``getattr`` keeps the helper tolerant of agents that emit a partial structure. """ if response is None: - return None, None + return None, None, default_via_config_option models = getattr(response, "models", None) if models is not None: current = getattr(models, "current_model_id", None) current = current if isinstance(current, str) and current else None raw = getattr(models, "available_models", None) or [] - return current, _usable_models(ACPModelInfo.from_protocol(m) for m in raw) + usable = _usable_models(ACPModelInfo.from_protocol(m) for m in raw) + return current, usable, False # configOptions mechanism: the ``model`` select carries the same state, with - # ``value`` as the model id (== the ``set_config_option`` target). + # each option's ``value`` as the model id (== the ``set_config_option`` target). opt = _model_config_option(response) if opt is None: - return None, None + return None, None, default_via_config_option current = getattr(opt, "current_value", None) current = current if isinstance(current, str) and current else None options = getattr(opt, "options", None) or [] - return current, _usable_models(_config_option_to_model(o) for o in options) + usable = _usable_models( + ACPModelInfo.from_protocol(o, id_attr="value") for o in options + ) + return current, usable, True # The ACP MCP server union accepted by new_session() / load_session(). @@ -711,14 +656,14 @@ async def _maybe_set_session_model( """Apply the *initial* session model right after session creation. Session-creation path only, gated on - ``ACPProviderInfo.supports_set_session_model``. Built-in providers get a - one-shot model-apply via whichever protocol the session advertised - (``via_config_option``): ``set_config_option(configId="model")`` for - configOptions-based servers (codex-acp 0.16+, claude-agent-acp 0.44+), else - ``set_session_model``. The ``_meta`` model payload is ignored by the pinned - CLIs, so the protocol call is what actually applies the model (#3654). - Unknown/custom providers fall back to ``set_config_option(configId="model")``. - Runtime switches go through :meth:`ACPAgent.set_acp_model`. + ``ACPProviderInfo.supports_set_session_model``. The model is applied via the + mechanism the session advertised (``via_config_option``): + ``set_config_option(configId="model")`` for configOptions-based servers + (codex-acp 0.16+, claude-agent-acp 0.44+), else ``set_session_model``. The + ``_meta`` model payload is ignored by the pinned CLIs, so this protocol call + is what actually applies the model (#3654). Unknown/custom providers are + best-effort (a rejection is tolerated — the session keeps the server + default). Runtime switches go through :meth:`ACPAgent.set_acp_model`. Returns ``True`` only when a model-setting call succeeded (the override reached the server); ``False`` when there is nothing to apply or the call @@ -728,29 +673,25 @@ async def _maybe_set_session_model( return False provider = detect_acp_provider_by_agent_name(agent_name) if provider is not None and provider.supports_set_session_model: - await _apply_acp_model_with_fallback( + await _apply_acp_model( conn, session_id, acp_model, via_config_option=via_config_option ) return True - # For unknown/custom providers, try the generic set_config_option method - # which is a standard ACP protocol method for setting configuration options + # Unknown/custom provider: best-effort apply via the detected mechanism; a + # rejection is tolerated so session creation can't break. if provider is None: try: - await conn.set_config_option( - config_id="model", - value=acp_model, - session_id=session_id, + await _apply_acp_model( + conn, session_id, acp_model, via_config_option=via_config_option ) logger.info( - "Set model %r on unknown/custom ACP server %s via set_config_option", - acp_model, - agent_name, + "Set model %r on unknown/custom ACP server %s", acp_model, agent_name ) return True except ACPRequestError as e: logger.warning( - "Could not set model %r on unknown/custom ACP server %s via " - "set_config_option (%s); the session will use the server default", + "Could not set model %r on unknown/custom ACP server %s (%s); " + "the session will use the server default", acp_model, agent_name, e, @@ -770,27 +711,22 @@ async def _reapply_session_model_on_resume( ``load_session()`` carries no model ``_meta``, so a session resumed after a runtime switch (or with any persisted ``acp_model``) would otherwise run on - the ACP server's default. This issues ``set_session_model`` so the resumed - live session matches the serialized ``acp_model``. - - For unknown/custom providers (e.g. Devin CLI), we fall back to the generic - ``set_config_option`` method with configId="model", which is a standard ACP - method that many custom ACP servers support. - - The gating mirrors :meth:`ACPAgent.set_acp_model` (attempt for custom/unknown - servers and known providers that support runtime switching; skip only known - providers that don't), deliberately differing from the initial-selection - gate: claude-agent-acp selects its initial model via ``_meta`` yet supports - ``set_session_model`` for later switches. A server that rejects the call is - tolerated (logged) — like the ``load_session`` fallback above — so resume - can't break; the session keeps the server default until the next switch. + the ACP server's default. This applies the model via the mechanism the + resumed session uses (``via_config_option``: ``set_config_option`` for + codex-acp 0.16+/claude-agent-acp 0.44+, else ``set_session_model``) so the + live session matches the serialized ``acp_model``. The caller derives + ``via_config_option`` from the ``load_session`` response, falling back to the + persisted mechanism hint when that response omits the model block. + + Gated on runtime-switch support (skip only known providers that don't); a + server that rejects the call is tolerated (logged) — like the + ``load_session`` fallback — so resume can't break and the session keeps the + server default until the next switch. Returns ``True`` only when a model-setting call was issued and accepted, so the caller knows the resumed live session is actually running ``acp_model``. ``False`` when there is nothing to reapply, the provider doesn't support the - switch, or the server rejected the call (swallowed) — in those cases the - session keeps the server default and the override must not be surfaced as - the current model. + switch, or the server rejected the call (swallowed). """ if not acp_model: return False @@ -798,28 +734,9 @@ async def _reapply_session_model_on_resume( if provider is not None and not provider.supports_runtime_model_switch: return False try: - if provider is not None: - # Known provider: apply via the mechanism the resumed session uses - # (set_config_option for codex-acp 0.16+/claude-agent-acp 0.44+, - # else set_session_model). A rejection is already tolerated below - # (the session keeps the server default), so resume doesn't need the - # cross-mechanism fallback the init/switch paths use. - await _apply_acp_model( - conn, session_id, acp_model, via_config_option=via_config_option - ) - else: - # Unknown/custom provider: try set_config_option as fallback - await conn.set_config_option( - config_id="model", - value=acp_model, - session_id=session_id, - ) - logger.info( - "Reapplied model %r on unknown/custom ACP server %s " - "via set_config_option", - acp_model, - agent_name, - ) + await _apply_acp_model( + conn, session_id, acp_model, via_config_option=via_config_option + ) return True except ACPRequestError as e: logger.warning( @@ -2042,6 +1959,10 @@ def init_state( # conversation list can tell the picker whether to offer live # switching without re-detecting the provider server-side. "acp_supports_runtime_model_switch": self.supports_runtime_model_switch, + # Detected model-selection mechanism — persisted so a resume whose + # load_session omits the model block reapplies the model via the + # right call instead of defaulting to set_session_model. + "acp_model_via_config_option": self._model_via_config_option, } # When starting a fresh session, clear stale suffix marker so the next # launch knows to re-inject it (PR behavior: suffix state is per-session). @@ -2596,11 +2517,21 @@ async def _init() -> tuple[ mcp_servers=acp_mcp_servers, ) session_id = prior_session_id - reported_model_id, available_models = _extract_session_models( - load_response + # load_session often omits the model block; fall back to the + # mechanism detected at session creation (persisted alongside + # the session id) so a config-option session still reapplies + # via the right call rather than defaulting to + # set_session_model and silently running the server default. + persisted_via_config_option = bool( + state.agent_state.get("acp_model_via_config_option", False) ) - self._model_via_config_option = ( - _session_selects_model_via_config_option(load_response) + ( + reported_model_id, + available_models, + self._model_via_config_option, + ) = _extract_session_models( + load_response, + default_via_config_option=persisted_via_config_option, ) logger.info( "Resumed ACP session %s (cwd=%s)", @@ -2632,12 +2563,14 @@ async def _init() -> tuple[ **session_meta, ) session_id = response.session_id - reported_model_id, available_models = _extract_session_models(response) - # Detect the model-selection protocol the server advertised so - # init and later runtime switches use the right call. - self._model_via_config_option = ( - _session_selects_model_via_config_option(response) - ) + # Detect the model-selection protocol the server advertised (in + # the same scan) so init and later runtime switches use the right + # call. + ( + reported_model_id, + available_models, + self._model_via_config_option, + ) = _extract_session_models(response) # Initial-model protocol call for every built-in provider # (codex, gemini, claude-code). The pinned claude/codex CLIs # ignore the _meta above, so this protocol call is what actually @@ -3719,58 +3652,26 @@ def set_acp_model(self, model: str) -> None: timeout=self.acp_prompt_timeout, ) except ACPRequestError as e: - pending_error: ACPRequestError | None = e - if _is_method_not_found(e): - # The session uses the other model-selection mechanism (or the - # init-time detection picked the wrong one): retry once with it, - # and remember the working mechanism for later switches. If the - # retry also fails, fall through to the error translation below. - flipped = not self._model_via_config_option - try: - self._executor.run_async( - _apply_acp_model( - self._conn, - self._session_id, - model, - via_config_option=flipped, - ), - timeout=self.acp_prompt_timeout, - ) - except ACPRequestError as e2: - pending_error = e2 - else: - # The live session's mechanism diverged from init-time - # detection; warn and adopt the working one for later - # switches. - logger.warning( - "ACP model-select mechanism changed mid-session " - "(init detected via_config_option=%s, switch needed %s); " - "using the latter for subsequent switches", - not flipped, - flipped, - ) - self._model_via_config_option = flipped - pending_error = None # both selections applied - if pending_error is not None: - # Server-internal failures (JSON-RPC -32603) are not the caller's - # fault, and the prompt path already treats them as retriable. - # Let them propagate (-> 5xx) instead of mislabeling them as a - # 400 client error. - if pending_error.code in _RETRIABLE_SERVER_ERROR_CODES: - raise pending_error - # acp.exceptions.RequestError derives from Exception (not - # RuntimeError); surface a true client/protocol rejection (e.g. - # method-not-found on both mechanisms, invalid model id) as a - # ValueError so callers — and the agent-server route — treat it - # as a 400-class client error rather than an opaque 500. - method = ( - "set_config_option(model)" - if self._model_via_config_option - else "set_session_model" - ) - raise ValueError( - f"ACP server rejected {method}(model={model!r}): {pending_error}" - ) from pending_error + # Server-internal failures (JSON-RPC -32603) are not the caller's + # fault, and the prompt path already treats them as retriable. Let + # them propagate (-> 5xx) instead of mislabeling them as a 400. + if e.code in _RETRIABLE_SERVER_ERROR_CODES: + raise + # acp.exceptions.RequestError derives from Exception (not + # RuntimeError); surface a true client/protocol rejection (invalid + # model id, or method-not-found on a mis-detected server) as a + # ValueError so callers — and the agent-server route — treat it as a + # 400-class client error rather than an opaque 500. The mechanism is + # the one the session advertised (``_model_via_config_option``), so + # the message names the call that actually failed. + method = ( + "set_config_option(model)" + if self._model_via_config_option + else "set_session_model" + ) + raise ValueError( + f"ACP server rejected {method}(model={model!r}): {e}" + ) from e # Reflect the live model on the sentinel LLM + metrics so cost/token # accounting and serialized state show the model actually in use # (mirrors model_post_init). The ``acp_model`` field is frozen, so the diff --git a/openhands-sdk/openhands/sdk/agent/acp_models.py b/openhands-sdk/openhands/sdk/agent/acp_models.py index 4e7d79b340..c6c89390ea 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_models.py +++ b/openhands-sdk/openhands/sdk/agent/acp_models.py @@ -49,15 +49,19 @@ class ACPModelInfo(BaseModel): ) @classmethod - def from_protocol(cls, raw: Any) -> ACPModelInfo: + def from_protocol(cls, raw: Any, *, id_attr: str = "model_id") -> ACPModelInfo: """Build from a raw ACP ``ModelInfo`` (or any duck-typed object). Tolerant of partial/malformed entries: non-string fields degrade to ``""`` (``model_id``) or ``None`` (``name``/``description``) rather than raising, since the source is an UNSTABLE protocol capability that older or half-implemented agents may emit incompletely. + + ``id_attr`` names the attribute carrying the model id — ``"model_id"`` + for a ``models``-capability ``ModelInfo``, ``"value"`` for a + ``configOptions`` select option. """ - model_id = getattr(raw, "model_id", None) + model_id = getattr(raw, id_attr, None) name = getattr(raw, "name", None) description = getattr(raw, "description", None) return cls( diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py index a98b1a399e..e2cc4d74a7 100644 --- a/openhands-sdk/openhands/sdk/settings/acp_providers.py +++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py @@ -378,21 +378,10 @@ class ACPProviderInfo: # `ACPAgentSettings.resolve_acp_command` runs the pinned `binary_name` instead, # so the `@version` suffix is a no-op there. # -# claude-agent-acp 0.44+ (and codex-acp 0.16+) select the model via a ``model`` -# ``configOptions`` entry instead of ``session/set_model``; the SDK detects the -# mechanism per session (see ``_session_selects_model_via_config_option``) and -# applies it accordingly, so model switching works on either mechanism. -# -# claude-agent-acp is held at 0.44.0 (0.45.0/0.46.0 published <7 days ago and -# trip the supply-chain hold) — same configOptions mechanism + model set as -# 0.46.0; revisit once a newer release ages past the hold. -# -# codex-acp 0.16 split the 0.15 combined ``/`` id into a bare -# ``model`` preset id + a separate ``reasoning_effort`` config option; sending -# the combined id to ``set_config_option(model)`` is taken literally and 400s -# at the backend. The apply path splits accordingly (see -# ``_split_codex_model_effort`` in acp_agent.py), so ``_CODEX_MODELS`` keeps the -# combined ids and switching works on 0.16. See issue #3772. +# claude-agent-acp 0.44+ / codex-acp 0.16+ select the model via a ``model`` +# ``configOptions`` entry, not ``session/set_model``; the SDK detects which per +# session in ``_extract_session_models`` and applies it. codex 0.16 also splits +# the model id from the reasoning effort (see ``_split_codex_model_effort``). #3772. CLAUDE_AGENT_ACP_VERSION = "0.44.0" CODEX_ACP_VERSION = "0.16.0" GEMINI_CLI_VERSION = "0.46.0" diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 3d866b5a2a..264e0536a2 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -21,7 +21,6 @@ from openhands.sdk.agent.acp_agent import ( ACPAgent, _apply_acp_model, - _apply_acp_model_with_fallback, _classify_acp_init_error, _codex_auth_file, _codex_base_url_overrides, @@ -29,7 +28,6 @@ _extract_session_models, _extract_token_usage, _image_url_to_acp_block, - _is_method_not_found, _mask_json_value, _maybe_set_session_model, _mcp_config_to_acp_servers, @@ -37,7 +35,6 @@ _reapply_session_model_on_resume, _select_auth_method, _serialize_tool_content, - _session_selects_model_via_config_option, _split_codex_model_effort, _strip_inherited_npm_env, ) @@ -4478,33 +4475,40 @@ async def test_missing_model_skips_protocol_override(self): assert applied is False @pytest.mark.asyncio - async def test_unknown_provider_uses_set_config_option_fallback(self): - # An unknown/custom server now tries set_config_option as a fallback - # for model selection, which is a standard ACP method. - conn = AsyncMock() + async def test_unknown_provider_applies_via_detected_mechanism(self): + # An unknown/custom server applies the model via whichever mechanism its + # session/new advertised (detected upstream), not a hardcoded guess. + conn_cfg = AsyncMock() applied = await _maybe_set_session_model( - conn, "devin-cli", "session-1", "kimi-k2-6", via_config_option=False + conn_cfg, "devin-cli", "session-1", "kimi-k2-6", via_config_option=True ) - conn.set_session_model.assert_not_called() - conn.set_config_option.assert_awaited_once_with( - config_id="model", - value="kimi-k2-6", - session_id="session-1", + conn_cfg.set_config_option.assert_awaited_once_with( + config_id="model", value="kimi-k2-6", session_id="session-1" + ) + conn_cfg.set_session_model.assert_not_called() + assert applied is True + + conn_set = AsyncMock() + applied = await _maybe_set_session_model( + conn_set, "devin-cli", "session-1", "kimi-k2-6", via_config_option=False ) + conn_set.set_session_model.assert_awaited_once_with( + model_id="kimi-k2-6", session_id="session-1" + ) + conn_set.set_config_option.assert_not_called() assert applied is True @pytest.mark.asyncio - async def test_unknown_provider_set_config_option_failure_is_tolerated(self): - # If set_config_option fails for an unknown provider, we log a warning - # but don't break session creation. + async def test_unknown_provider_apply_failure_is_tolerated(self): + # If the model-apply fails for an unknown provider, we log a warning but + # don't break session creation. conn = AsyncMock() conn.set_config_option.side_effect = ACPRequestError( code=-32601, message="method not found" ) applied = await _maybe_set_session_model( - conn, "some-custom-acp", "session-1", "whatever", via_config_option=False + conn, "some-custom-acp", "session-1", "whatever", via_config_option=True ) - conn.set_session_model.assert_not_called() conn.set_config_option.assert_awaited_once() assert applied is False @@ -4558,12 +4562,12 @@ async def test_missing_model_skips_reapply(self): assert applied is False @pytest.mark.asyncio - async def test_unknown_provider_attempts_reapply_via_set_config_option(self): - # provider=None (custom server) now attempts reapply via set_config_option - # as a fallback, which is a standard ACP method. + async def test_unknown_provider_reapplies_via_detected_mechanism(self): + # provider=None (custom server) reapplies via whichever mechanism the + # resumed session advertised, not a hardcoded guess. conn = AsyncMock() applied = await _reapply_session_model_on_resume( - conn, "devin-cli", "sess-1", "kimi-k2-6", via_config_option=False + conn, "devin-cli", "sess-1", "kimi-k2-6", via_config_option=True ) conn.set_session_model.assert_not_called() conn.set_config_option.assert_awaited_once_with( @@ -4610,7 +4614,7 @@ async def test_client_rejection_is_swallowed_on_resume(self): code=-32601, message="method not found" ) applied = await _reapply_session_model_on_resume( - conn, "some-custom-acp", "sess-1", "whatever", via_config_option=False + conn, "some-custom-acp", "sess-1", "whatever", via_config_option=True ) conn.set_config_option.assert_awaited_once() # Rejected => the live session kept the server default, so the override @@ -4740,23 +4744,22 @@ def test_switches_claude_via_config_option_single_call(self): ) assert agent._current_model_id == "sonnet" - def test_switch_falls_back_on_method_not_found(self): - # Detected set_session_model, but the live server only has the - # configOptions mechanism: the first apply raises -32601, the switch - # retries with the other call and remembers it. + def test_switch_method_not_found_raises_no_fallback(self): + # No cross-mechanism fallback: a -32601 surfaces as a ValueError naming + # the advertised mechanism, and the other call is never attempted. agent = self._wire(_make_agent(), "codex-acp", via_config_option=False) agent._conn.set_session_model.side_effect = ACPRequestError( code=-32601, message="Method not found" ) - agent.set_acp_model("gpt-5.4/low") - assert agent._executor.run_async.call_count == 2 - # The flipped mechanism applied (split into model + reasoning_effort). - assert agent._conn.set_config_option.call_count == 2 - assert agent._model_via_config_option is True - assert agent.llm.model == "gpt-5.4/low" - assert agent._current_model_id == "gpt-5.4/low" + with pytest.raises(ValueError, match="rejected set_session_model"): + agent.set_acp_model("gpt-5.4/low") + agent._conn.set_config_option.assert_not_called() + agent._executor.run_async.assert_called_once() + # Mechanism + sentinel model are left unchanged on a failed switch. + assert agent._model_via_config_option is False + assert agent.llm.model != "gpt-5.4/low" - def test_switch_invalid_model_does_not_fall_back(self): + def test_switch_invalid_model_raises_value_error(self): # A -32602 invalid-params is a real client error, not a wrong-mechanism # signal: surface it as ValueError without a second call. agent = self._wire(_make_agent(), "codex-acp", via_config_option=True) @@ -4820,8 +4823,10 @@ def test_translates_acp_request_error_to_value_error(self): # A protocol-level rejection (e.g. method-not-found on a custom server, # or an invalid model id) must surface as a ValueError — not leak as a # raw acp.exceptions.RequestError — so the agent-server maps it to 400. + # The rejection is raised by the conn (driven through the real apply + # coroutine), exercising set_acp_model's actual error path. agent = self._wire(_make_agent(), "codex-acp") - agent._executor.run_async.side_effect = ACPRequestError( + agent._conn.set_session_model.side_effect = ACPRequestError( code=-32601, message="method not found" ) with pytest.raises(ValueError, match="rejected set_session_model"): @@ -4835,7 +4840,7 @@ def test_propagates_server_internal_error(self): # than be mislabeled as a 400-class ValueError, mirroring the retriable # handling on the prompt path. agent = self._wire(_make_agent(), "codex-acp") - agent._executor.run_async.side_effect = ACPRequestError( + agent._conn.set_session_model.side_effect = ACPRequestError( code=-32603, message="internal error" ) with pytest.raises(ACPRequestError): @@ -6141,10 +6146,11 @@ def _models_block(current_model_id: str, model_ids: list[str]): models.available_models = entries return models - def test_unknown_provider_applies_override_via_set_config_option(self, tmp_path): + def test_unknown_provider_applies_override_via_detected_mechanism(self, tmp_path): """Fresh session on an unknown/custom provider with ``acp_model`` set: - the override is pushed via ``set_config_option`` (not ``set_session_model``), - so ``current_model_id`` must reflect the applied override. + the override is pushed via the mechanism the session advertised — here a + ``models``-capability response, so ``set_session_model`` — and + ``current_model_id`` must reflect the applied override. """ agent = _make_agent(acp_model="caller-model") state = _make_state(tmp_path) @@ -6159,9 +6165,9 @@ def test_unknown_provider_applies_override_via_set_config_option(self, tmp_path) self._patched_start_acp_server(agent, state, conn=conn) - conn.set_session_model.assert_not_awaited() - conn.set_config_option.assert_awaited_once_with( - config_id="model", value="caller-model", session_id="fresh-sess" + conn.set_config_option.assert_not_awaited() + conn.set_session_model.assert_awaited_once_with( + model_id="caller-model", session_id="fresh-sess" ) assert agent.current_model_id == "caller-model" @@ -7023,7 +7029,7 @@ def test_returns_both_when_response_carries_them(self): response.models = MagicMock() response.models.current_model_id = "default" response.models.available_models = [m1] - cur, avail = _extract_session_models(response) + cur, avail, _ = _extract_session_models(response) assert cur == "default" # Normalized into our stable ACPModelInfo, not the raw acp type. assert avail == [ @@ -7038,7 +7044,7 @@ def test_returns_none_list_when_models_block_absent(self): # Older agents don't include the ``models`` block at all -> None, so # callers know nothing was reported (and can preserve prior state). response = MagicMock(spec=[]) - cur, avail = _extract_session_models(response) + cur, avail, _ = _extract_session_models(response) assert cur is None assert avail is None @@ -7049,19 +7055,19 @@ def test_returns_empty_list_when_available_models_missing(self): response.models = MagicMock() response.models.current_model_id = "gpt-5" response.models.available_models = None - cur, avail = _extract_session_models(response) + cur, avail, _ = _extract_session_models(response) assert cur == "gpt-5" assert avail == [] def test_returns_none_list_when_response_is_none(self): # ``load_session`` can return ``None`` for servers that don't # implement the call — the helper must not crash, and reports "absent". - assert _extract_session_models(None) == (None, None) + assert _extract_session_models(None) == (None, None, False) def test_returns_none_list_when_models_field_is_none(self): response = MagicMock() response.models = None - assert _extract_session_models(response) == (None, None) + assert _extract_session_models(response) == (None, None, False) def test_returns_none_when_current_model_id_is_empty_string(self): # An empty string is treated the same as a missing field — we don't @@ -7071,7 +7077,7 @@ def test_returns_none_when_current_model_id_is_empty_string(self): response.models = MagicMock() response.models.current_model_id = "" response.models.available_models = [] - assert _extract_session_models(response) == (None, []) + assert _extract_session_models(response) == (None, [], False) def test_returns_none_when_current_model_id_is_not_a_string(self): # Defensive: an agent returning a non-string here is malformed. @@ -7079,7 +7085,7 @@ def test_returns_none_when_current_model_id_is_not_a_string(self): response.models = MagicMock() response.models.current_model_id = 42 response.models.available_models = [] - assert _extract_session_models(response) == (None, []) + assert _extract_session_models(response) == (None, [], False) class TestExtractSessionModelsNormalization: @@ -7104,7 +7110,7 @@ def test_maps_fields_through(self): response.models.available_models = [ self._raw("gpt-5.4/low", "gpt-5.4 (low)", "Strong everyday model."), ] - _cur, avail = _extract_session_models(response) + _cur, avail, _ = _extract_session_models(response) assert avail == [ ACPModelInfo( model_id="gpt-5.4/low", @@ -7125,7 +7131,7 @@ def test_drops_entries_without_usable_id(self): self._raw(model_id="good", name="Good"), self._raw(model_id="", name="Empty"), # empty -> dropped ] - _cur, avail = _extract_session_models(response) + _cur, avail, _ = _extract_session_models(response) assert avail == [ACPModelInfo(model_id="good", name="Good", description=None)] @@ -7135,8 +7141,8 @@ class TestConfigOptionModelMechanism: codex-acp 0.16+ and claude-agent-acp 0.46+ dropped the UNSTABLE ``models`` capability + ``session/set_model`` in favour of a ``model`` config-option select driven by ``session/set_config_option``. ``_extract_session_models`` - reads that select, and ``_session_selects_model_via_config_option`` picks - the apply mechanism. + reads that select and reports the apply mechanism as its third return value + (``via_config_option``) in the same scan. """ def _select(self, *, id="model", type="select", current_value, options): @@ -7166,8 +7172,9 @@ def test_extracts_model_state_from_config_option(self): ) ], ) - cur, avail = _extract_session_models(response) + cur, avail, via = _extract_session_models(response) assert cur == "opus[1m]" + assert via is True assert avail == [ ACPModelInfo( model_id="default", @@ -7189,8 +7196,8 @@ def test_unwraps_rootmodel_wrapped_config_option(self): ) wrapped = SimpleNamespace(root=select) # mimics the RootModel wrapper response = self._response(config_options=[wrapped]) - assert _session_selects_model_via_config_option(response) is True - cur, avail = _extract_session_models(response) + cur, avail, via = _extract_session_models(response) + assert via is True assert cur == "opus[1m]" assert avail is not None assert [m.model_id for m in avail] == ["opus[1m]", "sonnet"] @@ -7208,10 +7215,10 @@ def test_models_capability_wins_over_config_option(self): ) ], ) - cur, avail = _extract_session_models(response) + cur, avail, via = _extract_session_models(response) assert cur == "from-models" assert avail == [] - assert _session_selects_model_via_config_option(response) is False + assert via is False def test_detects_config_option_mechanism(self): response = self._response( @@ -7221,10 +7228,11 @@ def test_detects_config_option_mechanism(self): ) ], ) - assert _session_selects_model_via_config_option(response) is True + assert _extract_session_models(response)[2] is True def test_ignores_non_model_config_options(self): - # A ``mode`` select alongside no ``model`` select ⇒ old mechanism. + # A ``mode`` select alongside no ``model`` select ⇒ old mechanism, and + # the default (False) is reported since neither block was present. response = self._response( config_options=[ SimpleNamespace( @@ -7235,8 +7243,7 @@ def test_ignores_non_model_config_options(self): ), ], ) - assert _session_selects_model_via_config_option(response) is False - assert _extract_session_models(response) == (None, None) + assert _extract_session_models(response) == (None, None, False) def test_drops_options_without_usable_value(self): response = self._response( @@ -7251,12 +7258,31 @@ def test_drops_options_without_usable_value(self): ) ], ) - cur, avail = _extract_session_models(response) + cur, avail, _ = _extract_session_models(response) assert cur == "ok" assert avail == [ACPModelInfo(model_id="ok", name="Ok", description=None)] def test_none_response_is_not_config_option(self): - assert _session_selects_model_via_config_option(None) is False + assert _extract_session_models(None)[2] is False + + def test_default_via_config_option_used_when_no_block_present(self): + # On resume a load_session that omits both the models capability and the + # model config-option must not blindly default to set_session_model: the + # caller passes the persisted mechanism hint, which is honored (#3772). + response = self._response(config_options=[]) + assert _extract_session_models(response)[2] is False + assert ( + _extract_session_models(response, default_via_config_option=True)[2] is True + ) + # A present block still wins over the hint. + present = self._response( + config_options=[ + self._select(current_value="x", options=[self._opt("x", "X")]) + ] + ) + assert ( + _extract_session_models(present, default_via_config_option=False)[2] is True + ) # Real ``session/new`` wire payloads (by-alias) captured from the pinned CLIs. @@ -7341,98 +7367,58 @@ def test_config_options_is_the_schema_field_name(self): def test_claude_046_uses_config_option(self): resp = NewSessionResponse.model_validate(_CLAUDE_046_SESSION) - assert _session_selects_model_via_config_option(resp) is True - cur, avail = _extract_session_models(resp) + cur, avail, via = _extract_session_models(resp) + assert via is True assert cur == "opus[1m]" assert avail is not None assert [m.model_id for m in avail] == ["default", "opus[1m]", "sonnet", "haiku"] def test_codex_016_uses_config_option(self): resp = NewSessionResponse.model_validate(_CODEX_016_SESSION) - assert _session_selects_model_via_config_option(resp) is True - cur, avail = _extract_session_models(resp) + cur, avail, via = _extract_session_models(resp) + assert via is True assert cur == "gpt-5.5" assert avail is not None assert [m.model_id for m in avail] == ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"] def test_gemini_046_uses_set_session_model(self): resp = NewSessionResponse.model_validate(_GEMINI_046_SESSION) - assert _session_selects_model_via_config_option(resp) is False - cur, avail = _extract_session_models(resp) + cur, avail, via = _extract_session_models(resp) + assert via is False assert cur == "gemini-3-flash-preview" assert avail is not None assert "gemini-3-pro-preview" in [m.model_id for m in avail] -class TestApplyAcpModelFallback: - """Model-apply falls back to the other mechanism on -32601 method-not-found. - - Response-detection is correct for every validated CLI, but it reads an - UNSTABLE capability; a misdetect (or a server that moved between mechanisms) - must self-heal rather than crash session init / a switch. +class TestApplyAcpModelNoFallback: + """``_apply_acp_model`` applies via the advertised mechanism only — there is + no cross-mechanism fallback. Response-detection is deterministic from the + session/new shape and correct for every validated CLI, so any rejection is a + real error (invalid model id, or a genuinely mis-advertised server) and + propagates rather than silently trying the other call. """ - def test_is_method_not_found(self): - assert _is_method_not_found(ACPRequestError(code=-32601, message="x")) is True - assert _is_method_not_found(ACPRequestError(code=-32603, message="x")) is False - assert _is_method_not_found(ACPRequestError(code=-32602, message="x")) is False - @pytest.mark.asyncio - async def test_config_option_falls_back_to_set_session_model(self): - # Detected configOptions, but the server only has set_session_model. + async def test_config_option_rejection_propagates(self): conn = AsyncMock() conn.set_config_option.side_effect = ACPRequestError( code=-32601, message="Method not found" ) - effective = await _apply_acp_model_with_fallback( - conn, "sess-1", "gemini-3-flash", via_config_option=True - ) - conn.set_config_option.assert_awaited_once() - conn.set_session_model.assert_awaited_once_with( - model_id="gemini-3-flash", session_id="sess-1" - ) - assert effective is False # the mechanism that actually worked + with pytest.raises(ACPRequestError): + await _apply_acp_model(conn, "sess-1", "opus[1m]", via_config_option=True) + conn.set_session_model.assert_not_called() # no fallback @pytest.mark.asyncio - async def test_set_session_model_falls_back_to_config_option(self): - # Detected set_session_model, but the server moved to configOptions - # (codex 0.16 / claude 0.46 reached via a stale/degraded detection). + async def test_set_session_model_rejection_propagates(self): conn = AsyncMock() conn.set_session_model.side_effect = ACPRequestError( code=-32601, message="Method not found" ) - effective = await _apply_acp_model_with_fallback( - conn, "sess-1", "opus[1m]", via_config_option=False - ) - conn.set_session_model.assert_awaited_once() - conn.set_config_option.assert_awaited_once_with( - config_id="model", value="opus[1m]", session_id="sess-1" - ) - assert effective is True - - @pytest.mark.asyncio - async def test_no_fallback_when_primary_succeeds(self): - conn = AsyncMock() - effective = await _apply_acp_model_with_fallback( - conn, "sess-1", "sonnet", via_config_option=True - ) - conn.set_config_option.assert_awaited_once() - conn.set_session_model.assert_not_called() - assert effective is True - - @pytest.mark.asyncio - async def test_non_method_not_found_error_propagates(self): - # An invalid-model / server error is NOT a wrong-mechanism signal — it - # must propagate, not silently try the other call. - conn = AsyncMock() - conn.set_config_option.side_effect = ACPRequestError( - code=-32602, message="Invalid params" - ) with pytest.raises(ACPRequestError): - await _apply_acp_model_with_fallback( - conn, "sess-1", "bad", via_config_option=True + await _apply_acp_model( + conn, "sess-1", "gemini-3-flash", via_config_option=False ) - conn.set_session_model.assert_not_called() + conn.set_config_option.assert_not_called() # no fallback class TestCodexModelEffortSplit: @@ -7472,6 +7458,27 @@ async def test_apply_bare_config_option_single_call(self): config_id="model", value="opus[1m]", session_id="sess-1" ) + @pytest.mark.asyncio + async def test_reasoning_effort_failure_is_tolerated(self): + # The model selection is the switch; a rejected reasoning_effort must NOT + # fail the whole apply (else the live session changed model while the + # caller reports failure and leaves llm.model stale). The model call + # succeeded, so _apply_acp_model returns normally. #3772 + conn = AsyncMock() + + async def _effort_fails(*, config_id, value, session_id): + if config_id == "reasoning_effort": + raise ACPRequestError(code=-32603, message="transient") + + conn.set_config_option.side_effect = _effort_fails + # Does not raise: the model preset applied, only the effort refinement + # was dropped. + await _apply_acp_model(conn, "sess-1", "gpt-5.5/medium", via_config_option=True) + assert conn.set_config_option.await_args_list == [ + call(config_id="model", value="gpt-5.5", session_id="sess-1"), + call(config_id="reasoning_effort", value="medium", session_id="sess-1"), + ] + class TestACPAgentAvailableModelsProperty: """``available_models`` exposes the server's model list verbatim. From 16bc0d91431aa2b5be3fbc8beddf3c9215a1c6be Mon Sep 17 00:00:00 2001 From: Simon Rosenberg Date: Wed, 17 Jun 2026 22:41:45 +0200 Subject: [PATCH 12/14] =?UTF-8?q?refactor(acp):=20codex=20model=20ids=20?= =?UTF-8?q?=E2=86=92=20bare=20presets,=20drop=20the=20effort=20split?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit codex-acp 0.16 exposes reasoning effort as a separate `reasoning_effort` configOption, so the `model` select takes bare preset ids. Mirror the server's availableModels: _CODEX_MODELS becomes (gpt-5.5, gpt-5.4, gpt-5.4-mini), default gpt-5.5; delete _split_codex_model_effort and the reasoning_effort apply call so _apply_acp_model sends the id verbatim on either mechanism. This removes the divergence where the server reported bare available_models while the SDK surfaced a combined `/` current_model_id (verified e2e against the real codex-acp 0.16 CLI: available_models now matches current_model_id; pre-run and runtime switches still apply). Also tidy stale ACP comments: 0.46→0.44 version refs, the yolo→default mode doc, drop PR-process narration, and trim the _extract_session_models docstring to local behavior. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../openhands/sdk/agent/acp_agent.py | 71 ++------- .../openhands/sdk/agent/acp_models.py | 6 +- .../openhands/sdk/settings/acp_providers.py | 38 ++--- .../test_conversation_info_model.py | 4 +- tests/sdk/agent/test_acp_agent.py | 148 +++++++----------- tests/sdk/settings/test_acp_providers.py | 4 +- 6 files changed, 91 insertions(+), 180 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 1005d7d173..64eff2d678 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -376,9 +376,6 @@ def _write_secret_file(path: Path, value: str) -> None: # (codex-acp 0.16+, claude-agent-acp 0.44+) rather than the UNSTABLE ``models`` # capability + ``session/set_model`` (gemini-cli, older codex/claude). _MODEL_CONFIG_OPTION_ID = "model" -# Companion config-option id codex-acp 0.16 uses for the reasoning-effort tier -# (the 0.15 combined ``/`` id is split across the two on 0.16). -_REASONING_EFFORT_CONFIG_OPTION_ID = "reasoning_effort" def _model_config_option(response: Any) -> Any | None: @@ -406,24 +403,6 @@ def _model_config_option(response: Any) -> Any | None: return None -def _split_codex_model_effort(model: str) -> tuple[str, str | None]: - """Split a codex-style ``/`` id into ``(model, effort)``. - - codex-acp's ``models`` capability (0.15) used a combined id like - ``gpt-5.5/medium`` that the server parsed itself. Its 0.16 ``model`` - ``configOptions`` select instead wants a **bare preset id** (``gpt-5.5``) - with the reasoning effort set via a separate ``reasoning_effort`` option — - an unknown combined id is taken literally and rejected by the backend - (``"" model is not supported``). So when applying via configOptions we - split on ``/``. Bare ids (claude ``opus[1m]``/``sonnet``, gemini ``auto``, - codex without an effort suffix) return ``(model, None)`` unchanged. - """ - base, sep, effort = model.partition("/") - if sep and effort: - return base, effort - return model, None - - async def _apply_acp_model( conn: ClientSideConnection, session_id: str, @@ -435,36 +414,15 @@ async def _apply_acp_model( advertised: ``set_config_option(configId="model")`` for configOptions-based servers (codex-acp 0.16+, claude-agent-acp 0.44+), else ``set_session_model``. - A codex ``/`` id is split into a bare ``model`` plus a - separate ``reasoning_effort`` option (see :func:`_split_codex_model_effort`). - The model selection is the switch; the effort is a secondary refinement, so - a rejected effort is logged rather than raised — the live session keeps the - applied model instead of reporting the whole switch failed (#3772). + The model id is a bare preset id the server lists in its ``model`` select / + ``models`` capability — applied as-is on either mechanism. """ - if not via_config_option: + if via_config_option: + await conn.set_config_option( + config_id=_MODEL_CONFIG_OPTION_ID, value=model, session_id=session_id + ) + else: await conn.set_session_model(model_id=model, session_id=session_id) - return - base, effort = _split_codex_model_effort(model) - await conn.set_config_option( - config_id=_MODEL_CONFIG_OPTION_ID, value=base, session_id=session_id - ) - if effort is not None: - # reasoning_effort is only valid once the model is a known preset, which - # the call above just set. Best-effort: keep the applied model on failure. - try: - await conn.set_config_option( - config_id=_REASONING_EFFORT_CONFIG_OPTION_ID, - value=effort, - session_id=session_id, - ) - except ACPRequestError as exc: - logger.warning( - "ACP reasoning_effort=%r rejected after model=%r applied (%s); " - "keeping the model on the server default effort", - effort, - base, - exc, - ) def _usable_models(infos: Iterable[ACPModelInfo]) -> list[ACPModelInfo]: @@ -498,10 +456,8 @@ def _extract_session_models( ``via_config_option`` is the selection mechanism: ``True`` for the configOptions select, ``False`` for the ``models`` capability, and - ``default_via_config_option`` when the response carries neither — callers on - the resume path pass the persisted mechanism hint so a config-option session - whose ``load_session`` omits the model block still reapplies via the right - call instead of defaulting to ``set_session_model``. + ``default_via_config_option`` when the response carries neither (the + resume-path default for a ``load_session`` that omits the model block). ``getattr`` keeps the helper tolerant of agents that emit a partial structure. @@ -2612,10 +2568,9 @@ async def _init() -> tuple[ else reported_model_id ) - # Resolve the permission mode. Known providers each have their - # own mode ID (bypassPermissions, full-access, yolo …). - # Unknown/custom servers get None — skip the call rather than - # sending a provider-specific string they won't recognise. + # Resolve the permission mode. Known providers each have their own + # mode ID; unknown/custom servers get None — skip the call rather + # than sending a provider-specific string they won't recognise. provider = detect_acp_provider_by_agent_name(agent_name) mode_id = self.acp_session_mode or ( provider.default_session_mode if provider else None @@ -3590,7 +3545,7 @@ def set_acp_model(self, model: str) -> None: Args: model: Provider-specific model id to switch to (e.g. - ``"claude-haiku-4-5-20251001"`` or ``"gpt-5.4/low"``). + ``"sonnet"`` or ``"gpt-5.5"``). Raises: ValueError: If ``model`` is empty or whitespace-only, if the diff --git a/openhands-sdk/openhands/sdk/agent/acp_models.py b/openhands-sdk/openhands/sdk/agent/acp_models.py index c6c89390ea..2b4f4aad81 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_models.py +++ b/openhands-sdk/openhands/sdk/agent/acp_models.py @@ -34,9 +34,9 @@ class ACPModelInfo(BaseModel): model_id: str = Field( description=( "Server-assigned model identifier. May be concrete " - '(e.g. ``"gpt-5.5/xhigh"``) or an opaque alias ' - '(e.g. ``"default"``, ``"auto"``). This is the value to pass to ' - "``set_session_model`` to switch to this model." + '(e.g. ``"gpt-5.5"``) or an opaque alias ' + '(e.g. ``"default"``, ``"auto"``). This is the value to pass back ' + "to the server to switch to this model." ), ) name: str | None = Field( diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py index e2cc4d74a7..cce8d1ece9 100644 --- a/openhands-sdk/openhands/sdk/settings/acp_providers.py +++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py @@ -155,13 +155,12 @@ class ACPProviderInfo: """ default_session_mode: str - """ACP session-mode ID that suppresses all permission prompts. + """ACP session-mode ID set right after ``session/new``. - Different servers use different IDs for the same concept: - - - ``bypassPermissions`` — claude-agent-acp - - ``full-access`` — codex-acp - - ``yolo`` — gemini-cli + For servers with a permission-suppressing mode that is the value: + ``bypassPermissions`` (claude-agent-acp), ``full-access`` (codex-acp). + gemini-cli uses ``default`` (its ``yolo`` mode errors at init); the ACP + bridge auto-approves permission requests, so the mode doesn't gate prompts. """ agent_name_patterns: tuple[str, ...] @@ -307,20 +306,14 @@ class ACPProviderInfo: ACPModelOption(id="haiku", label="Claude Haiku 4.5"), ) -# Model IDs accepted by ``@zed-industries/codex-acp`` (``session/new`` -# ``availableModels`` on the pinned CLI), curated to the frontier family plus -# the cost-efficient mini tier. Format is ``/`` where the -# trailing tier (``low``/``medium``/``high``/``xhigh``) hints the reasoning -# effort for the turn. +# Bare preset ids the ``@zed-industries/codex-acp`` 0.16 ``model`` configOptions +# select reports at ``session/new`` (``set_config_option(configId="model")`` +# targets). The reasoning-effort tier is a *separate* ``reasoning_effort`` +# configOption on 0.16, not part of the model id, so it is not encoded here. _CODEX_MODELS: tuple[ACPModelOption, ...] = ( - ACPModelOption(id="gpt-5.5/low", label="GPT-5.5 (low)"), - ACPModelOption(id="gpt-5.5/medium", label="GPT-5.5 (medium)"), - ACPModelOption(id="gpt-5.5/high", label="GPT-5.5 (high)"), - ACPModelOption(id="gpt-5.5/xhigh", label="GPT-5.5 (xhigh)"), - ACPModelOption(id="gpt-5.4-mini/low", label="GPT-5.4 Mini (low)"), - ACPModelOption(id="gpt-5.4-mini/medium", label="GPT-5.4 Mini (medium)"), - ACPModelOption(id="gpt-5.4-mini/high", label="GPT-5.4 Mini (high)"), - ACPModelOption(id="gpt-5.4-mini/xhigh", label="GPT-5.4 Mini (xhigh)"), + ACPModelOption(id="gpt-5.5", label="GPT-5.5"), + ACPModelOption(id="gpt-5.4", label="GPT-5.4"), + ACPModelOption(id="gpt-5.4-mini", label="GPT-5.4 Mini"), ) # Model IDs accepted by ``@google/gemini-cli --acp``. Mirrors the @@ -379,9 +372,8 @@ class ACPProviderInfo: # so the `@version` suffix is a no-op there. # # claude-agent-acp 0.44+ / codex-acp 0.16+ select the model via a ``model`` -# ``configOptions`` entry, not ``session/set_model``; the SDK detects which per -# session in ``_extract_session_models`` and applies it. codex 0.16 also splits -# the model id from the reasoning effort (see ``_split_codex_model_effort``). #3772. +# ``configOptions`` entry rather than ``session/set_model``; the SDK detects +# which per session and applies it through the matching call. CLAUDE_AGENT_ACP_VERSION = "0.44.0" CODEX_ACP_VERSION = "0.16.0" GEMINI_CLI_VERSION = "0.46.0" @@ -434,7 +426,7 @@ class ACPProviderInfo: supports_runtime_model_switch=True, session_meta_key=None, available_models=_CODEX_MODELS, - default_model="gpt-5.5/medium", + default_model="gpt-5.5", file_secrets=_CODEX_FILE_SECRETS, binary_name="codex-acp", data_dir_env_var="CODEX_HOME", diff --git a/tests/agent_server/test_conversation_info_model.py b/tests/agent_server/test_conversation_info_model.py index eb55c65a10..975a655ede 100644 --- a/tests/agent_server/test_conversation_info_model.py +++ b/tests/agent_server/test_conversation_info_model.py @@ -205,13 +205,13 @@ def test_available_models_lifted_from_acp_agent(): def test_available_models_empty_when_server_omits_them(): """Servers that don't surface the UNSTABLE ``models`` capability yield [].""" agent = ACPAgent(acp_command=["echo", "test"]) - agent._current_model_id = "gpt-5.5/xhigh" + agent._current_model_id = "gpt-5.5" state = _make_state(agent) stored = _make_stored(state) info = _compose_conversation_info(stored, state) - assert info.current_model_id == "gpt-5.5/xhigh" + assert info.current_model_id == "gpt-5.5" assert info.available_models == [] diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 264e0536a2..78e653a2f2 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -11,7 +11,7 @@ from pathlib import Path from types import SimpleNamespace from typing import Any -from unittest.mock import AsyncMock, MagicMock, call, patch +from unittest.mock import AsyncMock, MagicMock, patch import pytest from acp.exceptions import RequestError as ACPRequestError @@ -35,7 +35,6 @@ _reapply_session_model_on_resume, _select_auth_method, _serialize_tool_content, - _split_codex_model_effort, _strip_inherited_npm_env, ) from openhands.sdk.agent.acp_models import ACPModelInfo @@ -4446,7 +4445,7 @@ async def test_set_session_model_mechanism(self): @pytest.mark.asyncio async def test_set_config_option_mechanism(self): - # ``via_config_option=True`` (codex-acp 0.16+, claude-agent-acp 0.46+) + # ``via_config_option=True`` (codex-acp 0.16+, claude-agent-acp 0.44+) # applies the model via ``set_config_option(configId="model")``. conn = AsyncMock() applied = await _maybe_set_session_model( @@ -4537,17 +4536,15 @@ async def test_reapply_via_set_session_model_mechanism(self): @pytest.mark.asyncio async def test_reapply_via_set_config_option_mechanism(self): - # codex-acp 0.16+/claude-agent-acp 0.46+ reapply via - # ``set_config_option(configId="model")``; a codex combined id splits - # into model + reasoning_effort. + # codex-acp 0.16+/claude-agent-acp 0.44+ reapply via + # ``set_config_option(configId="model")`` with the bare preset id. conn = AsyncMock() applied = await _reapply_session_model_on_resume( - conn, "codex-acp", "sess-1", "gpt-5.4/low", via_config_option=True + conn, "codex-acp", "sess-1", "gpt-5.5", via_config_option=True + ) + conn.set_config_option.assert_awaited_once_with( + config_id="model", value="gpt-5.5", session_id="sess-1" ) - assert conn.set_config_option.await_args_list == [ - call(config_id="model", value="gpt-5.4", session_id="sess-1"), - call(config_id="reasoning_effort", value="low", session_id="sess-1"), - ] conn.set_session_model.assert_not_called() assert applied is True @@ -4631,7 +4628,7 @@ async def test_any_request_error_is_swallowed_on_resume(self): code=-32603, message="internal error" ) applied = await _reapply_session_model_on_resume( - conn, "codex-acp", "sess-1", "gpt-5.4/low", via_config_option=False + conn, "codex-acp", "sess-1", "gpt-5.5", via_config_option=False ) conn.set_session_model.assert_awaited_once() assert applied is False @@ -4709,31 +4706,29 @@ def _run(coro: Any, timeout: Any = None) -> Any: return agent def test_switches_model_on_live_codex_session(self): - # Default (models-capability) session ⇒ set_session_model (combined id). + # Default (models-capability) session ⇒ set_session_model, id as-is. agent = self._wire(_make_agent(), "codex-acp") - agent.set_acp_model("gpt-5.4/low") + agent.set_acp_model("gpt-5.5") agent._conn.set_session_model.assert_awaited_once_with( - model_id="gpt-5.4/low", session_id="sess-1" + model_id="gpt-5.5", session_id="sess-1" ) agent._conn.set_config_option.assert_not_called() agent._executor.run_async.assert_called_once() # Sentinel LLM + metrics reflect the live model for cost/token tracking. - assert agent.llm.model == "gpt-5.4/low" - assert agent.llm.metrics.model_name == "gpt-5.4/low" + assert agent.llm.model == "gpt-5.5" + assert agent.llm.metrics.model_name == "gpt-5.5" - def test_switches_codex_via_config_option_splits_model_and_effort(self): - # codex-acp 0.16 configOptions: the combined id is split into a bare - # `model` preset + a separate `reasoning_effort` option. + def test_switches_codex_via_config_option_single_call(self): + # codex-acp 0.16 configOptions: the bare preset id applies as a single + # `model` selection (reasoning effort is a separate, unmanaged option). agent = self._wire(_make_agent(), "codex-acp", via_config_option=True) - agent.set_acp_model("gpt-5.4/low") - assert agent._conn.set_config_option.await_args_list == [ - call(config_id="model", value="gpt-5.4", session_id="sess-1"), - call(config_id="reasoning_effort", value="low", session_id="sess-1"), - ] + agent.set_acp_model("gpt-5.5") + agent._conn.set_config_option.assert_awaited_once_with( + config_id="model", value="gpt-5.5", session_id="sess-1" + ) agent._conn.set_session_model.assert_not_called() - # The agent's surfaced/persisted model stays the combined registry id. - assert agent.llm.model == "gpt-5.4/low" - assert agent._current_model_id == "gpt-5.4/low" + assert agent.llm.model == "gpt-5.5" + assert agent._current_model_id == "gpt-5.5" def test_switches_claude_via_config_option_single_call(self): # A bare id (no `/`) applies as a single `model` selection — no effort. @@ -4752,12 +4747,12 @@ def test_switch_method_not_found_raises_no_fallback(self): code=-32601, message="Method not found" ) with pytest.raises(ValueError, match="rejected set_session_model"): - agent.set_acp_model("gpt-5.4/low") + agent.set_acp_model("gpt-5.5") agent._conn.set_config_option.assert_not_called() agent._executor.run_async.assert_called_once() # Mechanism + sentinel model are left unchanged on a failed switch. assert agent._model_via_config_option is False - assert agent.llm.model != "gpt-5.4/low" + assert agent.llm.model != "gpt-5.5" def test_switch_invalid_model_raises_value_error(self): # A -32602 invalid-params is a real client error, not a wrong-mechanism @@ -4852,7 +4847,7 @@ def test_passes_timeout_to_run_async(self): # The protocol round-trip runs under the conversation state lock, so it # must be bounded to avoid wedging the lock if the server never answers. agent = self._wire(_make_agent(acp_prompt_timeout=42.0), "codex-acp") - agent.set_acp_model("gpt-5.4/low") + agent.set_acp_model("gpt-5.5") _, kwargs = agent._executor.run_async.call_args assert kwargs["timeout"] == 42.0 @@ -7106,15 +7101,15 @@ def _raw(self, model_id: Any, name: Any = None, description: Any = None) -> Any: def test_maps_fields_through(self): response = MagicMock() response.models = MagicMock() - response.models.current_model_id = "gpt-5.4/low" + response.models.current_model_id = "gpt-5.4" response.models.available_models = [ - self._raw("gpt-5.4/low", "gpt-5.4 (low)", "Strong everyday model."), + self._raw("gpt-5.4", "GPT-5.4", "Strong everyday model."), ] _cur, avail, _ = _extract_session_models(response) assert avail == [ ACPModelInfo( - model_id="gpt-5.4/low", - name="gpt-5.4 (low)", + model_id="gpt-5.4", + name="GPT-5.4", description="Strong everyday model.", ) ] @@ -7138,7 +7133,7 @@ def test_drops_entries_without_usable_id(self): class TestConfigOptionModelMechanism: """Model selection via the ``model`` ``configOptions`` select. - codex-acp 0.16+ and claude-agent-acp 0.46+ dropped the UNSTABLE ``models`` + codex-acp 0.16+ and claude-agent-acp 0.44+ dropped the UNSTABLE ``models`` capability + ``session/set_model`` in favour of a ``model`` config-option select driven by ``session/set_config_option``. ``_extract_session_models`` reads that select and reports the apply mechanism as its third return value @@ -7158,7 +7153,7 @@ def _response(self, *, models=None, config_options): return SimpleNamespace(models=models, config_options=config_options) def test_extracts_model_state_from_config_option(self): - # claude-agent-acp 0.46: model select with short aliases. + # claude-agent-acp 0.44: model select with short aliases. response = self._response( config_options=[ self._select( @@ -7289,8 +7284,8 @@ def test_default_via_config_option_used_when_no_block_present(self): # Parsed through the real ``NewSessionResponse`` so the test exercises the # genuine schema, INCLUDING the ``SessionConfigOption`` RootModel wrapper the # ``agent-client-protocol`` lib applies to ``config_options`` on 0.8.x (where a -# naive ``opt.id`` read returns ``None``). This is the exact failure the QA bot -# surfaced; the detection helper unwraps ``.root`` so it works on 0.8.x + 0.10.x. +# naive ``opt.id`` read returns ``None``). The detection helper unwraps +# ``.root`` so it works on 0.8.x + 0.10.x. def _select_dict(opt_id, current, values, category=None): return { "id": opt_id, @@ -7354,10 +7349,10 @@ class TestDetectionAgainstRealSessionResponses: """Detection + extraction against the real CLI ``session/new`` wire shapes, parsed through the actual ``acp.schema.NewSessionResponse``. - Guards the real production bug the QA bot surfaced: on - ``agent-client-protocol`` 0.8.x each ``config_options`` entry is a - ``SessionConfigOption`` RootModel (``.root``), so a naive ``opt.id`` read is - ``None`` and the SDK would fall back to the removed ``session/set_model``. + Guards a schema-shape regression: on ``agent-client-protocol`` 0.8.x each + ``config_options`` entry is a ``SessionConfigOption`` RootModel (``.root``), + so a naive ``opt.id`` read is ``None`` and the SDK would fall back to the + removed ``session/set_model``. """ def test_config_options_is_the_schema_field_name(self): @@ -7421,63 +7416,32 @@ async def test_set_session_model_rejection_propagates(self): conn.set_config_option.assert_not_called() # no fallback -class TestCodexModelEffortSplit: - """codex-acp 0.16 splits the 0.15 combined ``/`` id into a - bare ``model`` preset + a separate ``reasoning_effort`` configOption.""" - - def test_splits_combined_id(self): - assert _split_codex_model_effort("gpt-5.5/medium") == ("gpt-5.5", "medium") - assert _split_codex_model_effort("gpt-5.4-mini/xhigh") == ( - "gpt-5.4-mini", - "xhigh", - ) - - def test_bare_ids_unchanged(self): - # claude / gemini / codex-without-effort ids have no ``/``. - assert _split_codex_model_effort("opus[1m]") == ("opus[1m]", None) - assert _split_codex_model_effort("sonnet") == ("sonnet", None) - assert _split_codex_model_effort("auto") == ("auto", None) - assert _split_codex_model_effort("gpt-5.5") == ("gpt-5.5", None) +class TestApplyAcpModel: + """``_apply_acp_model`` sends the model id verbatim through the mechanism the + session advertised — the ``model`` configOption select or + ``set_session_model`` — with no id rewriting. (codex 0.16 exposes reasoning + effort as a separate, unmanaged ``reasoning_effort`` configOption, so the + model ids are bare presets on every provider.)""" @pytest.mark.asyncio - async def test_apply_splits_into_two_config_options(self): - # The configOptions apply path sets model (bare) then reasoning_effort. + async def test_config_option_single_call(self): conn = AsyncMock() - await _apply_acp_model(conn, "sess-1", "gpt-5.5/medium", via_config_option=True) - assert conn.set_config_option.await_args_list == [ - call(config_id="model", value="gpt-5.5", session_id="sess-1"), - call(config_id="reasoning_effort", value="medium", session_id="sess-1"), - ] - conn.set_session_model.assert_not_called() - - @pytest.mark.asyncio - async def test_apply_bare_config_option_single_call(self): - conn = AsyncMock() - await _apply_acp_model(conn, "sess-1", "opus[1m]", via_config_option=True) + await _apply_acp_model(conn, "sess-1", "gpt-5.5", via_config_option=True) conn.set_config_option.assert_awaited_once_with( - config_id="model", value="opus[1m]", session_id="sess-1" + config_id="model", value="gpt-5.5", session_id="sess-1" ) + conn.set_session_model.assert_not_called() @pytest.mark.asyncio - async def test_reasoning_effort_failure_is_tolerated(self): - # The model selection is the switch; a rejected reasoning_effort must NOT - # fail the whole apply (else the live session changed model while the - # caller reports failure and leaves llm.model stale). The model call - # succeeded, so _apply_acp_model returns normally. #3772 + async def test_set_session_model_branch(self): conn = AsyncMock() - - async def _effort_fails(*, config_id, value, session_id): - if config_id == "reasoning_effort": - raise ACPRequestError(code=-32603, message="transient") - - conn.set_config_option.side_effect = _effort_fails - # Does not raise: the model preset applied, only the effort refinement - # was dropped. - await _apply_acp_model(conn, "sess-1", "gpt-5.5/medium", via_config_option=True) - assert conn.set_config_option.await_args_list == [ - call(config_id="model", value="gpt-5.5", session_id="sess-1"), - call(config_id="reasoning_effort", value="medium", session_id="sess-1"), - ] + await _apply_acp_model( + conn, "sess-1", "gemini-2.5-pro", via_config_option=False + ) + conn.set_session_model.assert_awaited_once_with( + model_id="gemini-2.5-pro", session_id="sess-1" + ) + conn.set_config_option.assert_not_called() class TestACPAgentAvailableModelsProperty: diff --git a/tests/sdk/settings/test_acp_providers.py b/tests/sdk/settings/test_acp_providers.py index 7d0390c7a9..8dad941063 100644 --- a/tests/sdk/settings/test_acp_providers.py +++ b/tests/sdk/settings/test_acp_providers.py @@ -59,8 +59,8 @@ def test_codex_metadata(self): assert info.supports_set_session_model is True assert info.supports_runtime_model_switch is True assert info.session_meta_key is None - assert info.default_model == "gpt-5.5/medium" - assert any(m.id == "gpt-5.5/medium" for m in info.available_models) + assert info.default_model == "gpt-5.5" + assert any(m.id == "gpt-5.5" for m in info.available_models) assert info.binary_name == "codex-acp" assert info.data_dir_env_var == "CODEX_HOME" From 0244d8450f0e4c142c380e97bc4497eeb74099b8 Mon Sep 17 00:00:00 2001 From: Simon Rosenberg Date: Wed, 17 Jun 2026 23:17:42 +0200 Subject: [PATCH 13/14] docs(acp): fix post-bump stale references in model docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review sweep: runtime switches are no longer "always set_session_model" (the mechanism is detected per session — set_config_option for codex/claude, else set_session_model); codex is no longer applied "via set_session_model"; the claude version note said 0.46.0 (pinned 0.44.0); stale `claude-opus-4-6` and `GPT-5.5 (xhigh)` examples. No runtime effect. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../openhands/sdk/agent/acp_agent.py | 30 ++++++++++--------- .../openhands/sdk/agent/acp_models.py | 2 +- .../openhands/sdk/settings/acp_providers.py | 22 +++++++------- openhands-sdk/openhands/sdk/settings/model.py | 8 ++--- tests/sdk/settings/test_acp_providers.py | 2 +- 5 files changed, 33 insertions(+), 31 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 64eff2d678..941ac4170d 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1403,10 +1403,10 @@ def _serialize_acp_env(self, value: dict[str, str], info): acp_model: str | None = Field( default=None, description=( - "Model for the ACP server to use (e.g. 'claude-opus-4-6' or " - "'gpt-5.4'). For Claude ACP, passed via session _meta. For Codex " - "ACP, applied via the protocol-level set_session_model call. " - "If None, the server picks its default." + "Model for the ACP server to use (e.g. 'sonnet' or 'gpt-5.5'). " + "Applied via the protocol — set_config_option(model) for " + "configOptions-based servers (codex, claude), else " + "set_session_model. If None, the server picks its default." ), ) acp_resume_session_id: str | None = Field( @@ -1729,8 +1729,8 @@ def available_models(self) -> list[ACPModelInfo]: server's ``model_id`` plus an optional ``name``/``description`` — enough for a client to render a model picker and resolve ``current_model_id`` to a display label without any server-side - curation. ``current_model_id`` is the value to pass to - ``set_session_model`` to switch. + curation. ``current_model_id`` is the value to pass back to the server + to switch to that model. Same lifecycle and serialization caveats as ``current_model_id``: in-process runtime state, lifted onto @@ -1746,7 +1746,7 @@ def supports_runtime_model_switch(self) -> bool: Tells a client whether to offer the inline picker's live-switch control. ``True`` only for known providers that explicitly declare support for - ``session/set_model``. Unknown/custom providers use ``set_config_option`` + runtime model switching. Unknown/custom providers use ``set_config_option`` for *initial* model selection but that RPC is a generic config write, not a guaranteed live-switch primitive, so the picker is hidden for them. ``False`` before a session exists (nothing to switch yet). @@ -3528,9 +3528,11 @@ async def _fork_and_prompt() -> str: def set_acp_model(self, model: str) -> None: """Switch the model on the running ACP session (mid-conversation). - Issues a protocol-level ``session/set_model`` call on the live - connection so the new model takes effect for subsequent turns in the - *same* session — no subprocess restart, no loss of conversation + Issues a protocol-level model-switch call on the live connection (the + mechanism the session advertised — ``set_config_option(model)`` or + ``set_session_model``) so the new model takes effect for subsequent + turns in the *same* session — no subprocess restart, no loss of + conversation context. Verified against claude-agent-acp and codex-acp. This is the low-level agent primitive; prefer @@ -3550,7 +3552,7 @@ def set_acp_model(self, model: str) -> None: Raises: ValueError: If ``model`` is empty or whitespace-only, if the detected provider does not support runtime model switching, or - if the ACP server rejects the ``session/set_model`` call (e.g. + if the ACP server rejects the model-switch call (e.g. method-not-found on a custom server, or an invalid model id). RuntimeError: If the ACP session has not been initialized yet (i.e. before the first ``run()``). @@ -3559,8 +3561,8 @@ def set_acp_model(self, model: str) -> None: Note: A timeout means the client stopped waiting, not that the switch was - rejected: the ``session/set_model`` request may already have been - written and could still be applied server-side. The connection and + rejected: the switch request may already have been written and + could still be applied server-side. The connection and session stay alive and the local sentinel model is intentionally left unchanged, so a timed-out switch leaves the server-side model indeterminate. The conservative choice (treat it as failed locally) @@ -3586,7 +3588,7 @@ def set_acp_model(self, model: str) -> None: if provider is not None and not provider.supports_runtime_model_switch: raise ValueError( f"ACP provider '{provider.key}' does not support runtime model " - "switching via set_session_model." + "switching." ) # ``has_live_acp_session`` above guarantees a session id; narrow for the # type checker. diff --git a/openhands-sdk/openhands/sdk/agent/acp_models.py b/openhands-sdk/openhands/sdk/agent/acp_models.py index 2b4f4aad81..9377d705a8 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_models.py +++ b/openhands-sdk/openhands/sdk/agent/acp_models.py @@ -41,7 +41,7 @@ class ACPModelInfo(BaseModel): ) name: str | None = Field( default=None, - description='Human-readable label, e.g. ``"GPT-5.5 (xhigh)"``.', + description='Human-readable label, e.g. ``"GPT-5.5"``.', ) description: str | None = Field( default=None, diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py index cce8d1ece9..febd2fba57 100644 --- a/openhands-sdk/openhands/sdk/settings/acp_providers.py +++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py @@ -12,11 +12,10 @@ - ``agent_name_patterns`` lowercase substrings in the runtime agent name; used by ``ACPAgent`` to auto-detect mode / protocol - ``supports_set_session_model`` whether the provider applies its *initial* - model via the ``set_session_model`` protocol - call at session creation -- ``supports_runtime_model_switch`` whether the server supports the - ``session/set_model`` protocol call for - runtime, mid-conversation model switching + model via a protocol call at session creation + (``set_config_option``/``set_session_model``) +- ``supports_runtime_model_switch`` whether the server supports a protocol-level + model switch for runtime, mid-conversation use - ``session_meta_key`` top-level ``_meta`` key for model selection (or ``None``) - ``available_models`` curated list of selectable models for the provider's model picker (``acp_model`` candidates) @@ -194,12 +193,13 @@ class ACPProviderInfo: When non-``None``, the model is additionally advertised via ACP session ``_meta`` using the structure ``{session_meta_key: {"options": {"model": }}}`` passed to - ``new_session()``. This is best-effort only: claude-agent-acp 0.30.0 - ignores it (#3654), so the authoritative initial selection is the - ``set_session_model`` call gated on :attr:`supports_set_session_model`. + ``new_session()``. This is best-effort only: claude-agent-acp ignores it + (#3654), so the authoritative initial selection is the protocol call gated + on :attr:`supports_set_session_model`. - Runtime switches always use ``set_session_model`` (gated on - :attr:`supports_runtime_model_switch`). + Runtime switches use the mechanism the session advertised + (``set_config_option`` or ``set_session_model``), gated on + :attr:`supports_runtime_model_switch`. - ``"claudeCode"`` — claude-agent-acp - ``None`` — codex-acp, gemini-cli @@ -234,7 +234,7 @@ class ACPProviderInfo: Unlike :attr:`supports_set_session_model`, this is about switching the model of an *already-running* session, not the initial selection. A provider may select its initial model via ``_meta`` (claude-agent-acp) - yet still support ``set_session_model`` for later switches. + yet still support a protocol-level switch for later changes. Defaults to ``False`` so forward-compat providers — and any external caller constructing this dataclass positionally — keep working without a diff --git a/openhands-sdk/openhands/sdk/settings/model.py b/openhands-sdk/openhands/sdk/settings/model.py index 624aa9402e..89c26c518c 100644 --- a/openhands-sdk/openhands/sdk/settings/model.py +++ b/openhands-sdk/openhands/sdk/settings/model.py @@ -1322,10 +1322,10 @@ def _serialize_acp_env(self, value: dict[str, str], info): acp_model: str | None = Field( default=None, description=( - "Model identifier for the ACP server to use (e.g. " - "``'claude-opus-4-6'``). claude-agent-acp receives it via session " - "_meta; codex-acp and gemini-cli via ``set_session_model``. " - "Leave blank to let the server pick its default." + "Model identifier for the ACP server to use (e.g. ``'sonnet'`` or " + "``'gpt-5.5'``). Applied via the protocol — set_config_option(model) " + "for configOptions-based servers (codex, claude), else " + "set_session_model. Leave blank to let the server pick its default." ), json_schema_extra={ SETTINGS_METADATA_KEY: SettingsFieldMetadata( diff --git a/tests/sdk/settings/test_acp_providers.py b/tests/sdk/settings/test_acp_providers.py index 8dad941063..3f108d64a3 100644 --- a/tests/sdk/settings/test_acp_providers.py +++ b/tests/sdk/settings/test_acp_providers.py @@ -37,7 +37,7 @@ def test_claude_code_metadata(self): assert "claude-agent" in info.agent_name_patterns # Initial selection rides a protocol call — claude-agent-acp ignores the # session-_meta payload (#3654), which is still sent as best-effort - # (session_meta_key below). On 0.46.0 the call is set_config_option. + # (session_meta_key below). On 0.44.0 the call is set_config_option. assert info.supports_set_session_model is True assert info.supports_runtime_model_switch is True assert info.session_meta_key == "claudeCode" From 2b7673aaf6113ec1d2c12b2cb9230490154f2855 Mon Sep 17 00:00:00 2001 From: Simon Rosenberg Date: Thu, 18 Jun 2026 08:21:47 +0200 Subject: [PATCH 14/14] fix(acp): treat curated model lists as pre-session suggestions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit claude-agent-acp's `model` configOption select is dynamic and account-dependent (its `/model` menu and `set_config_option` validate against the live, entitlement-gated list — verified against the 0.44 source), so a curated id is not guaranteed acceptable. Two changes so the curated lists behave as suggestions, not ground truth: - `_maybe_set_session_model` now tolerates an `ACPRequestError` for known providers too (log + fall back to the server default), matching the unknown-provider and resume paths. A seeded id the live account lacks (e.g. `sonnet`) no longer fails session creation. Runtime switches via `set_acp_model` still surface the error, since there the user explicitly asked to switch. - Add `gemini-3.1-pro-preview` to `_GEMINI_MODELS`: gemini-cli 0.46 surfaces the pro-preview as `gemini-3.1-pro-preview` once the 3.1 launch flag is on (`PREVIEW_GEMINI_3_1_MODEL`), falling back to `gemini-3-pro-preview` otherwise — keep both so the picker matches either rollout state. Also corrects the misleading "not an access check" comment on `_CLAUDE_MODELS`. Validated end-to-end against the real pinned CLIs (claude-agent-acp 0.44, codex-acp 0.16): a rejected initial model degrades to the server default; a valid one still applies. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../openhands/sdk/agent/acp_agent.py | 48 +++++++++---------- .../openhands/sdk/settings/acp_providers.py | 11 ++++- tests/sdk/agent/test_acp_agent.py | 19 ++++++++ 3 files changed, 52 insertions(+), 26 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 941ac4170d..0105e133c3 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -617,9 +617,11 @@ async def _maybe_set_session_model( ``set_config_option(configId="model")`` for configOptions-based servers (codex-acp 0.16+, claude-agent-acp 0.44+), else ``set_session_model``. The ``_meta`` model payload is ignored by the pinned CLIs, so this protocol call - is what actually applies the model (#3654). Unknown/custom providers are - best-effort (a rejection is tolerated — the session keeps the server - default). Runtime switches go through :meth:`ACPAgent.set_acp_model`. + is what actually applies the model (#3654). A rejection is tolerated for + any provider — the curated model list is a pre-session suggestion, not an + access check (a server's model menu is dynamic/account-dependent), so the + session keeps the server default rather than failing to start. Runtime + switches go through :meth:`ACPAgent.set_acp_model`. Returns ``True`` only when a model-setting call succeeded (the override reached the server); ``False`` when there is nothing to apply or the call @@ -628,31 +630,29 @@ async def _maybe_set_session_model( if not acp_model: return False provider = detect_acp_provider_by_agent_name(agent_name) - if provider is not None and provider.supports_set_session_model: + # Known providers gate on supports_set_session_model; unknown/custom servers + # always attempt (best-effort). + if provider is not None and not provider.supports_set_session_model: + return False + # A rejection is tolerated so session creation can't break: the curated model + # list is a suggestion, not an access check — a server's model menu is + # dynamic/account-dependent (claude-agent-acp validates set_config_option + # against the live select and rejects an absent id), so a model the live + # session won't accept degrades to the server default rather than failing. + try: await _apply_acp_model( conn, session_id, acp_model, via_config_option=via_config_option ) return True - # Unknown/custom provider: best-effort apply via the detected mechanism; a - # rejection is tolerated so session creation can't break. - if provider is None: - try: - await _apply_acp_model( - conn, session_id, acp_model, via_config_option=via_config_option - ) - logger.info( - "Set model %r on unknown/custom ACP server %s", acp_model, agent_name - ) - return True - except ACPRequestError as e: - logger.warning( - "Could not set model %r on unknown/custom ACP server %s (%s); " - "the session will use the server default", - acp_model, - agent_name, - e, - ) - return False + except ACPRequestError as e: + logger.warning( + "Could not set model %r on ACP server %s (%s); " + "the session will use the server default", + acp_model, + agent_name, + e, + ) + return False async def _reapply_session_model_on_resume( diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py index febd2fba57..f30e46f98d 100644 --- a/openhands-sdk/openhands/sdk/settings/acp_providers.py +++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py @@ -297,8 +297,10 @@ class ACPProviderInfo: # the CLI's own ``/model`` menu offers, switched via ``set_config_option``). # ``opus[1m]`` is the SDK-documented version-agnostic 1M-context alias and the # CLI's own default (``currentValue``); ``default`` is the CLI's recommended -# tier (Opus 4.8 · 1M). The CLI also accepts ids outside this list, so these -# are curated suggestions, not an access check. +# tier (Opus 4.8 · 1M). The ``/model`` menu is dynamic/account-dependent and the +# CLI validates ``set_config_option(model)`` against the live select — it rejects +# an absent id (e.g. ``sonnet`` on accounts without it), so these are pre-session +# suggestions, not ground truth; a rejected id degrades to the server default. _CLAUDE_MODELS: tuple[ACPModelOption, ...] = ( ACPModelOption(id="default", label="Default (recommended)"), ACPModelOption(id="opus[1m]", label="Claude Opus 4.8 (1M)"), @@ -324,6 +326,11 @@ class ACPProviderInfo: # are curated suggestions, not an access check. _GEMINI_MODELS: tuple[ACPModelOption, ...] = ( ACPModelOption(id="auto", label="Auto"), + # gemini-cli 0.46 surfaces the pro-preview as ``gemini-3.1-pro-preview`` once + # the Gemini 3.1 launch flag is on (``PREVIEW_GEMINI_3_1_MODEL``), falling + # back to ``gemini-3-pro-preview`` (``PREVIEW_GEMINI_MODEL``) otherwise — keep + # both so the picker matches either rollout state. + ACPModelOption(id="gemini-3.1-pro-preview", label="Gemini 3.1 Pro (preview)"), ACPModelOption(id="gemini-3-pro-preview", label="Gemini 3 Pro (preview)"), ACPModelOption(id="gemini-3-flash-preview", label="Gemini 3 Flash (preview)"), ACPModelOption(id="gemini-3.1-flash-lite", label="Gemini 3.1 Flash Lite"), diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 78e653a2f2..50e1e6053a 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -4511,6 +4511,25 @@ async def test_unknown_provider_apply_failure_is_tolerated(self): conn.set_config_option.assert_awaited_once() assert applied is False + @pytest.mark.asyncio + async def test_known_provider_apply_rejection_is_tolerated(self): + # claude-agent-acp's ``model`` select is dynamic/account-dependent and + # rejects an absent curated id (e.g. ``sonnet`` on accounts without it, + # dressed as -32603). A rejected *initial* model must not fail session + # creation — it degrades to the server default (applied=False) like the + # unknown-provider path, since the curated list is a suggestion not an + # access check. (Runtime switches via set_acp_model surface the error.) + conn = AsyncMock() + conn.set_config_option.side_effect = ACPRequestError( + code=-32603, message="Invalid value for config option model: sonnet" + ) + applied = await _maybe_set_session_model( + conn, "claude-agent-acp", "session-1", "sonnet", via_config_option=True + ) + conn.set_config_option.assert_awaited_once() + conn.set_session_model.assert_not_called() + assert applied is False + class TestReapplySessionModelOnResume: """Resume reapplies the persisted model via the runtime-switch gate."""