From d412c0b5b5f41397e36412c974d23cae5b3266c8 Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simon@openhands.dev>
Date: Wed, 17 Jun 2026 12:15:57 +0200
Subject: [PATCH 01/14] feat(acp): bump provider CLIs (claude 0.46, codex 0.16,
 gemini 0.46) + configOptions model selection

Bumps all three built-in ACP provider CLIs to their latest npm releases and
adapts the SDK to the model-selection protocol change two of them shipped.

Verified e2e against every CLI (production binary path) before bumping:

- claude-agent-acp 0.30 -> 0.46 and codex-acp 0.15 -> 0.16 dropped the UNSTABLE
  `models` capability + `session/set_model` (returns "Method not found") and
  moved model selection to a `model` `configOptions` select driven by
  `session/set_config_option`. With the old code, init crashes the moment a
  model is set. The SDK now auto-detects the mechanism from the session/new (or
  load_session) response and applies the model via the right call across init,
  runtime switch, and resume. gemini-cli 0.46 keeps `models`/`set_session_model`
  and is unchanged on that axis.

- gemini-cli 0.46 rejects `set_session_mode("yolo")` at session init
  (JSON-RPC -32603), which crashed headless startup. Switched the gemini default
  session mode to `default`; the ACP bridge already auto-approves every
  `session/request_permission`, so permission prompts never block regardless of
  mode (verified a real tool/edit turn).

- Reconciled `_CLAUDE_MODELS` and `_GEMINI_MODELS` to the `availableModels` each
  bumped CLI actually reports; `_CODEX_MODELS` is unchanged (codex 0.16 accepts
  the same combined `gpt-5.5/medium` ids via set_config_option). claude
  `default_model` -> `opus[1m]` (the CLI's own default), gemini -> `auto`.

Pins move together in acp_providers.py + the agent-server Dockerfile. All three
providers pass an init + tool turn + live model-switch e2e against the pinned
binaries.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../openhands/agent_server/docker/Dockerfile  |   6 +-
 .../openhands/sdk/agent/acp_agent.py          | 181 ++++++++++++---
 .../openhands/sdk/settings/acp_providers.py   |  89 ++++----
 tests/sdk/agent/test_acp_agent.py             | 208 +++++++++++++++---
 tests/sdk/settings/test_acp_providers.py      |  16 +-
 tests/sdk/test_settings.py                    |   6 +-
 6 files changed, 402 insertions(+), 104 deletions(-)

diff --git a/openhands-agent-server/openhands/agent_server/docker/Dockerfile b/openhands-agent-server/openhands/agent_server/docker/Dockerfile
index 8751c22628..8cc51cd9f8 100644
--- a/openhands-agent-server/openhands/agent_server/docker/Dockerfile
+++ b/openhands-agent-server/openhands/agent_server/docker/Dockerfile
@@ -171,9 +171,9 @@ RUN set -ux; \
          && "$ACP_NODE_DIR/bin/node" --version; then \
         PATH="$ACP_NODE_DIR/bin:$PATH"; \
         if "$ACP_NODE_DIR/bin/npm" install -g \
-            @agentclientprotocol/claude-agent-acp@0.30.0 \
-            @zed-industries/codex-acp@0.15.0 \
-            @google/gemini-cli@0.38.0; then \
+            @agentclientprotocol/claude-agent-acp@0.46.0 \
+            @zed-industries/codex-acp@0.16.0 \
+            @google/gemini-cli@0.46.0; then \
           # Create wrappers in /usr/local/bin that prepend ACP's Node 22 to PATH.
           # This ensures the ACP binary's #!/usr/bin/env node shebang resolves
           # to Node 22, while the repo's own node (NVM/system) stays untouched
diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py
index b5d233768e..3eed076262 100644
--- a/openhands-sdk/openhands/sdk/agent/acp_agent.py
+++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py
@@ -371,6 +371,72 @@ def _write_secret_file(path: Path, value: str) -> None:
         f.write(value)
 
 
+# Session config-option id that selects the model on ACP servers that drive
+# model selection through ``configOptions`` / ``session/set_config_option``
+# (codex-acp 0.16+, claude-agent-acp 0.46+) rather than the UNSTABLE ``models``
+# capability + ``session/set_model`` (gemini-cli, older codex/claude).
+_MODEL_CONFIG_OPTION_ID = "model"
+
+
+def _model_config_option(response: Any) -> Any | None:
+    """Return the ``model`` ``configOptions`` select off a session response.
+
+    Newer ACP CLIs dropped the UNSTABLE ``models`` capability and expose model
+    selection as a ``configOptions`` entry with ``id == "model"`` (``type ==
+    "select"``), switched via ``session/set_config_option`` instead of
+    ``session/set_model``. Returns that option (carrying ``options`` and
+    ``current_value``) or ``None`` when the server uses neither / the old
+    mechanism. ``getattr`` keeps it tolerant of partial structures.
+    """
+    for opt in getattr(response, "config_options", None) or []:
+        if (
+            getattr(opt, "type", None) == "select"
+            and getattr(opt, "id", None) == _MODEL_CONFIG_OPTION_ID
+        ):
+            return opt
+    return None
+
+
+def _session_selects_model_via_config_option(response: Any) -> bool:
+    """Whether this session drives model selection through
+    ``session/set_config_option`` (the ``model`` config-option select) rather
+    than ``session/set_model``.
+
+    The ``models`` capability wins when present (gemini-cli 0.46 and older
+    codex/claude still carry it); only in its absence does a ``model`` config
+    option become the selection mechanism.
+    """
+    if response is None:
+        return False
+    if getattr(response, "models", None) is not None:
+        return False
+    return _model_config_option(response) is not None
+
+
+def _apply_acp_model(
+    conn: ClientSideConnection,
+    session_id: str,
+    model: str,
+    *,
+    via_config_option: bool,
+) -> Awaitable[Any]:
+    """Build the awaitable that applies ``model`` to a live ACP session via
+    whichever protocol the session advertised: ``set_config_option(configId=
+    "model")`` for configOptions-based servers (codex-acp 0.16+, claude-agent-acp
+    0.46+), else ``set_session_model``.
+
+    Returns the (eagerly constructed) coroutine so callers can either ``await``
+    it directly or hand it to ``run_async`` — the underlying ``conn`` method is
+    invoked now, mirroring the previous inline ``conn.set_session_model(...)``
+    call site.
+    """
+    if via_config_option:
+        return conn.set_config_option(
+            config_id=_MODEL_CONFIG_OPTION_ID, value=model, session_id=session_id
+        )
+    return conn.set_session_model(model_id=model, session_id=session_id)
+
+
 def _extract_session_models(
     response: Any,
 ) -> tuple[str | None, list[ACPModelInfo] | None]:
@@ -379,16 +445,17 @@ def _extract_session_models(
     Returns a ``(current_model_id, available_models)`` pair, both best-effort.
     ``available_models`` is normalized into our own stable :class:`ACPModelInfo`
     type at this boundary so nothing downstream depends on the vendored
-    ``acp.schema`` shape.
+    ``acp.schema`` shape. Reads whichever mechanism the session advertised: the
+    UNSTABLE ``models`` capability, or the ``model`` ``configOptions`` select
+    (codex-acp 0.16+, claude-agent-acp 0.46+).
 
     The second element distinguishes **absent** from **empty** — this matters
     for resume persistence (preserve the last-known list when the server didn't
     report one; clear it when the server explicitly says it has none):
 
-    - ``None``  — the (UNSTABLE) ``models`` block was absent from the response
-      (older agent, opted out, or ``load_session`` not carrying it).
-    - ``[]``    — the server *did* report ``models`` but offers no (usable)
-      models this session.
+    - ``None``  — neither mechanism was present in the response (older agent,
+      opted out, or ``load_session`` not carrying it).
+    - ``[]``    — the server reported a mechanism but offers no (usable) models.
     - ``[...]`` — the reported models, minus any with an unusable ``model_id``.
 
     ``getattr`` keeps the helper tolerant of agents that emit a partial
@@ -397,17 +464,37 @@ def _extract_session_models(
     if response is None:
         return None, None
     models = getattr(response, "models", None)
-    if models is None:
+    if models is not None:
+        current = getattr(models, "current_model_id", None)
+        current = current if isinstance(current, str) and current else None
+        raw = getattr(models, "available_models", None) or []
+        # Drop entries without a usable id: an empty/missing ``model_id`` is an
+        # invalid picker option and an unusable ``set_session_model`` target, so
+        # we filter it out rather than surfacing ``model_id=""``.
+        available = [
+            info
+            for info in (ACPModelInfo.from_protocol(m) for m in raw)
+            if info.model_id
+        ]
+        return current, available
+    # configOptions mechanism: the ``model`` select carries the same state, with
+    # ``value`` as the model id (== the ``set_config_option`` target).
+    opt = _model_config_option(response)
+    if opt is None:
         return None, None
-    current = getattr(models, "current_model_id", None)
+    current = getattr(opt, "current_value", None)
     current = current if isinstance(current, str) and current else None
-    raw = getattr(models, "available_models", None) or []
-    # Drop entries without a usable id: an empty/missing ``model_id`` is an
-    # invalid picker option and an unusable ``set_session_model`` target, so we
-    # filter it out rather than surfacing ``model_id=""``.
-    available = [
-        info for info in (ACPModelInfo.from_protocol(m) for m in raw) if info.model_id
-    ]
+    available = []
+    for o in getattr(opt, "options", None) or []:
+        value = getattr(o, "value", None)
+        if isinstance(value, str) and value:
+            available.append(
+                ACPModelInfo(
+                    model_id=value,
+                    name=getattr(o, "name", None),
+                    description=getattr(o, "description", None),
+                )
+            )
     return current, available
 
 
@@ -532,12 +619,17 @@ async def _maybe_set_session_model(
     agent_name: str,
     session_id: str,
     acp_model: str | None,
+    *,
+    via_config_option: bool,
 ) -> bool:
     """Apply the *initial* session model right after session creation.
 
     This is the session-creation path only, gated on
     :attr:`~openhands.sdk.settings.acp_providers.ACPProviderInfo.supports_set_session_model`.
-    All built-in providers get a one-shot ``set_session_model`` call here.
+    All built-in providers get a one-shot model-apply call here, via whichever
+    protocol the session advertised (``via_config_option``):
+    ``set_config_option(configId="model")`` for configOptions-based servers
+    (codex-acp 0.16+, claude-agent-acp 0.46+), else ``set_session_model``.
     claude-agent-acp used to be skipped on the assumption that it already
     received the model via ``new_session()`` ``_meta``, but 0.30.0 silently
     ignores that payload (#3654) — the protocol call is the only path that
@@ -562,7 +654,9 @@ async def _maybe_set_session_model(
         return False
     provider = detect_acp_provider_by_agent_name(agent_name)
     if provider is not None and provider.supports_set_session_model:
-        await conn.set_session_model(model_id=acp_model, session_id=session_id)
+        await _apply_acp_model(
+            conn, session_id, acp_model, via_config_option=via_config_option
+        )
         return True
     # For unknown/custom providers, try the generic set_config_option method
     # which is a standard ACP protocol method for setting configuration options
@@ -595,6 +689,8 @@ async def _reapply_session_model_on_resume(
     agent_name: str,
     session_id: str,
     acp_model: str | None,
+    *,
+    via_config_option: bool,
 ) -> bool:
     """Reapply the persisted model to a *resumed* session.
 
@@ -629,8 +725,12 @@ async def _reapply_session_model_on_resume(
         return False
     try:
         if provider is not None:
-            # Known provider: use set_session_model
-            await conn.set_session_model(model_id=acp_model, session_id=session_id)
+            # Known provider: apply via the mechanism the resumed session uses
+            # (set_config_option for codex-acp 0.16+/claude-agent-acp 0.46+,
+            # else set_session_model).
+            await _apply_acp_model(
+                conn, session_id, acp_model, via_config_option=via_config_option
+            )
         else:
             # Unknown/custom provider: try set_config_option as fallback
             await conn.set_config_option(
@@ -1479,6 +1579,12 @@ def model_post_init(self, __context: object) -> None:
     _agent_version: str = PrivateAttr(
         default=""
     )  # ACP server version from InitializeResponse
+    # Which protocol this session uses to select the model: ``True`` ⇒
+    # ``session/set_config_option(configId="model")`` (codex-acp 0.16+,
+    # claude-agent-acp 0.46+); ``False`` ⇒ ``session/set_model`` (gemini-cli and
+    # older codex/claude). Detected from the session/new (or load_session)
+    # response at init and reused by runtime ``set_acp_model`` switches.
+    _model_via_config_option: bool = PrivateAttr(default=False)
     # The model the ACP server reported as active for this session, captured
     # from ``models.currentModelId`` on the new_session / load_session
     # response.  Overridden by ``self.acp_model`` when the caller explicitly
@@ -2417,6 +2523,9 @@ async def _init() -> tuple[
                     reported_model_id, available_models = _extract_session_models(
                         load_response
                     )
+                    self._model_via_config_option = (
+                        _session_selects_model_via_config_option(load_response)
+                    )
                     logger.info(
                         "Resumed ACP session %s (cwd=%s)",
                         _fingerprint_session_id(session_id),
@@ -2448,16 +2557,23 @@ async def _init() -> tuple[
                 )
                 session_id = response.session_id
                 reported_model_id, available_models = _extract_session_models(response)
+                # Detect the model-selection protocol the server advertised so
+                # init and later runtime switches use the right call.
+                self._model_via_config_option = (
+                    _session_selects_model_via_config_option(response)
+                )
                 # Initial-model protocol call for every built-in provider
-                # (codex, gemini, claude-code). The pinned claude CLI ignores
-                # the _meta above, so this set_session_model call is what
-                # actually applies the model (#3654); _meta is kept only as
-                # backward-compat for older claude CLIs that still honor it.
+                # (codex, gemini, claude-code). The pinned claude/codex CLIs
+                # ignore the _meta above, so this protocol call is what actually
+                # applies the model (#3654) — via set_config_option for
+                # configOptions-based servers, else set_session_model; _meta is
+                # kept only as backward-compat for older claude CLIs.
                 applied_via_call = await _maybe_set_session_model(
                     conn,
                     agent_name,
                     session_id,
                     self.acp_model,
+                    via_config_option=self._model_via_config_option,
                 )
                 # set_session_model is the authoritative signal that acp_model
                 # reached the server. _meta is a no-op on the pinned claude CLI,
@@ -2470,12 +2586,14 @@ async def _init() -> tuple[
                 # reapply the persisted (possibly runtime-switched) acp_model via
                 # the runtime-switch capability — otherwise the resumed live
                 # session would run on the server default while serialized state
-                # claims the switched model.
+                # claims the switched model. ``_model_via_config_option`` was set
+                # from the load_session response above.
                 override_applied = await _reapply_session_model_on_resume(
                     conn,
                     agent_name,
                     session_id,
                     self.acp_model,
+                    via_config_option=self._model_via_config_option,
                 )
 
             # Resolve the model the agent will actually use.
@@ -3506,6 +3624,9 @@ def set_acp_model(self, model: str) -> None:
                 f"ACP provider '{provider.key}' does not support runtime model "
                 "switching via set_session_model."
             )
+        # ``has_live_acp_session`` above guarantees a session id; narrow for the
+        # type checker.
+        assert self._session_id is not None
         # Bounded round-trip: this runs while LocalConversation.switch_acp_model
         # holds the state lock, so a server that accepts the call but never
         # answers must not wedge the lock indefinitely. On timeout / protocol
@@ -3513,8 +3634,11 @@ def set_acp_model(self, model: str) -> None:
         # LLM is only updated once the live session has actually switched.
         try:
             self._executor.run_async(
-                self._conn.set_session_model(
-                    model_id=model, session_id=self._session_id
+                _apply_acp_model(
+                    self._conn,
+                    self._session_id,
+                    model,
+                    via_config_option=self._model_via_config_option,
                 ),
                 timeout=self.acp_prompt_timeout,
             )
@@ -3530,8 +3654,13 @@ def set_acp_model(self, model: str) -> None:
             # method-not-found, invalid model id) as a ValueError so callers —
             # and the agent-server route — treat it as a 400-class client error
             # rather than an opaque 500.
+            method = (
+                "set_config_option(model)"
+                if self._model_via_config_option
+                else "set_session_model"
+            )
             raise ValueError(
-                f"ACP server rejected set_session_model(model={model!r}): {e}"
+                f"ACP server rejected {method}(model={model!r}): {e}"
             ) from e
         # Reflect the live model on the sentinel LLM + metrics so cost/token
         # accounting and serialized state show the model actually in use
diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py
index 39e1ba4ab8..687767a522 100644
--- a/openhands-sdk/openhands/sdk/settings/acp_providers.py
+++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py
@@ -293,18 +293,18 @@ class ACPProviderInfo:
 # ``acp_model`` outside these lists is always allowed.
 # ---------------------------------------------------------------------------
 
-# Canonical model IDs the Claude Code CLI accepts, curated to the newest
-# generation per capability tier. ``opus[1m]`` is the SDK-documented
-# version-agnostic 1M-context alias (auto-tracks the newest 1M-capable model —
-# keep its label version-less to match). ``opusplan`` routes planning to Opus
-# and execution to Sonnet.
+# Model IDs the Claude Code CLI accepts, mirroring the ``model`` configOptions
+# select claude-agent-acp 0.46.0 reports at ``session/new`` (the short aliases
+# the CLI's own ``/model`` menu offers, switched via ``set_config_option``).
+# ``opus[1m]`` is the SDK-documented version-agnostic 1M-context alias and the
+# CLI's own default (``currentValue``); ``default`` is the CLI's recommended
+# tier (Opus 4.8 · 1M). The CLI also accepts ids outside this list, so these
+# are curated suggestions, not an access check.
 _CLAUDE_MODELS: tuple[ACPModelOption, ...] = (
-    ACPModelOption(id="claude-fable-5", label="Claude Fable 5"),
-    ACPModelOption(id="claude-opus-4-8", label="Claude Opus 4.8"),
-    ACPModelOption(id="opus[1m]", label="Claude Opus (1M)"),
-    ACPModelOption(id="claude-sonnet-4-6", label="Claude Sonnet 4.6"),
-    ACPModelOption(id="claude-haiku-4-5", label="Claude Haiku 4.5"),
-    ACPModelOption(id="opusplan", label="Opus (plan) + Sonnet (execute)"),
+    ACPModelOption(id="default", label="Default (recommended)"),
+    ACPModelOption(id="opus[1m]", label="Claude Opus 4.8 (1M)"),
+    ACPModelOption(id="sonnet", label="Claude Sonnet 4.6"),
+    ACPModelOption(id="haiku", label="Claude Haiku 4.5"),
 )
 
 # Model IDs accepted by ``@zed-industries/codex-acp`` (``session/new``
@@ -323,20 +323,19 @@ class ACPProviderInfo:
     ACPModelOption(id="gpt-5.4-mini/xhigh", label="GPT-5.4 Mini (xhigh)"),
 )
 
-# Model IDs accepted by ``@google/gemini-cli --acp``. The ``auto-gemini-*``
-# entries delegate version selection to the CLI's router; the explicit
-# ``gemini-3.1-*`` / ``gemini-2.5-*`` entries pin to a specific snapshot.
+# Model IDs accepted by ``@google/gemini-cli --acp``. Mirrors the
+# ``availableModels`` the CLI reports at ``session/new`` on the pinned version
+# (gemini-cli 0.46.0). ``auto`` delegates version selection to the CLI's
+# router; the explicit ``gemini-*`` entries pin to a specific snapshot. The CLI
+# also accepts ids outside this list (it remaps them at generation), so these
+# are curated suggestions, not an access check.
 _GEMINI_MODELS: tuple[ACPModelOption, ...] = (
-    ACPModelOption(id="auto-gemini-3", label="Auto (Gemini 3)"),
-    ACPModelOption(id="auto-gemini-2.5", label="Auto (Gemini 2.5)"),
-    ACPModelOption(id="gemini-3.1-pro-preview", label="Gemini 3.1 Pro (preview)"),
+    ACPModelOption(id="auto", label="Auto"),
+    ACPModelOption(id="gemini-3-pro-preview", label="Gemini 3 Pro (preview)"),
     ACPModelOption(id="gemini-3-flash-preview", label="Gemini 3 Flash (preview)"),
-    ACPModelOption(
-        id="gemini-3.1-flash-lite-preview", label="Gemini 3.1 Flash Lite (preview)"
-    ),
+    ACPModelOption(id="gemini-3.1-flash-lite", label="Gemini 3.1 Flash Lite"),
     ACPModelOption(id="gemini-2.5-pro", label="Gemini 2.5 Pro"),
     ACPModelOption(id="gemini-2.5-flash", label="Gemini 2.5 Flash"),
-    ACPModelOption(id="gemini-2.5-flash-lite", label="Gemini 2.5 Flash Lite"),
 )
 
 
@@ -378,9 +377,15 @@ class ACPProviderInfo:
 # permission-disabling session mode. In the image the binary rewrite in
 # `ACPAgentSettings.resolve_acp_command` runs the pinned `binary_name` instead,
 # so the `@version` suffix is a no-op there.
-CLAUDE_AGENT_ACP_VERSION = "0.30.0"
-CODEX_ACP_VERSION = "0.15.0"
-GEMINI_CLI_VERSION = "0.38.0"
+#
+# codex-acp 0.16.0 and claude-agent-acp 0.46.0 dropped the UNSTABLE ``models``
+# capability + ``session/set_model`` in favour of a ``model`` ``configOptions``
+# select driven by ``session/set_config_option``; the SDK auto-detects and uses
+# the right call (see ``_session_selects_model_via_config_option`` in
+# acp_agent.py), so model switching keeps working across the bump (#3772).
+CLAUDE_AGENT_ACP_VERSION = "0.46.0"
+CODEX_ACP_VERSION = "0.16.0"
+GEMINI_CLI_VERSION = "0.46.0"
 
 
 ACP_PROVIDERS: Mapping[str, ACPProviderInfo] = MappingProxyType(
@@ -397,17 +402,20 @@ class ACPProviderInfo:
             base_url_env_var="ANTHROPIC_BASE_URL",
             default_session_mode="bypassPermissions",
             agent_name_patterns=("claude-agent",),
-            # claude-agent-acp 0.30.0 silently ignores the session-_meta model
-            # selection (the requested model only becomes a picker option; the
-            # session keeps running "default"), so the init path must push the
-            # model via session/set_model like codex/gemini (#3654). The _meta
+            # claude-agent-acp ignores the session-_meta model selection (the
+            # requested model only becomes a picker option; the session keeps
+            # running its default), so the init path must push the model via a
+            # protocol call (#3654). On 0.46.0 that call is
+            # ``set_config_option(configId="model")`` rather than
+            # ``set_session_model`` (auto-detected from session/new); the _meta
             # payload (session_meta_key below) is still sent — harmless, and
             # picks up the same model if a future CLI honours it.
             supports_set_session_model=True,
             supports_runtime_model_switch=True,
             session_meta_key="claudeCode",
             available_models=_CLAUDE_MODELS,
-            default_model="claude-opus-4-8",
+            # The CLI's own default (model configOptions ``currentValue``).
+            default_model="opus[1m]",
             binary_name="claude-agent-acp",
             data_dir_env_var="CLAUDE_CONFIG_DIR",
         ),
@@ -443,18 +451,25 @@ class ACPProviderInfo:
             ),
             api_key_env_var="GEMINI_API_KEY",
             base_url_env_var="GEMINI_BASE_URL",
-            default_session_mode="yolo",
+            # gemini-cli 0.46.0 rejects ``set_session_mode("yolo")`` at session
+            # init with a JSON-RPC -32603 (it gates yolo on folder-trust that is
+            # not yet established right after session/new), which would crash
+            # headless startup. ``default`` is accepted at init, and the SDK's
+            # ACP bridge already auto-approves every ``session/request_permission``
+            # the server sends, so permission prompts never block regardless of
+            # mode — making ``default`` the headless-safe choice. See #3772.
+            default_session_mode="default",
             agent_name_patterns=("gemini-cli",),
             supports_set_session_model=True,
             supports_runtime_model_switch=True,
             session_meta_key=None,
             available_models=_GEMINI_MODELS,
-            # Match the Gemini CLI's own no-model-configured default
-            # (``DEFAULT_GEMINI_MODEL_AUTO``), i.e. the auto-router — not a
-            # manually-pinned snapshot. Pinning ``gemini-2.5-pro`` here would
-            # make downstream clients persist a value that bypasses the CLI's
-            # auto-routing.
-            default_model="auto-gemini-2.5",
+            # Match the Gemini CLI's own auto-router rather than a manually
+            # pinned snapshot. Pinning e.g. ``gemini-2.5-pro`` here would make
+            # downstream clients persist a value that bypasses the CLI's
+            # auto-routing. ``auto`` is the router id the CLI reports in its
+            # 0.46.0 ``availableModels``.
+            default_model="auto",
             file_secrets=_GEMINI_FILE_SECRETS,
             binary_name="gemini",
             # Gemini CLI has no dedicated config-dir var; it hard-codes
@@ -516,7 +531,7 @@ def detect_acp_provider_by_command(
     *caller-controlled*: each token is reduced to its basename (last path segment,
     minus a trailing ``@version`` pin) and a provider matches only when that
     basename *starts with* one of its patterns. This accepts the real forms —
-    ``@zed-industries/codex-acp``, ``@google/gemini-cli@0.43.0``,
+    ``@zed-industries/codex-acp``, ``@google/gemini-cli@0.46.0``,
     ``/opt/node_modules/.bin/codex-acp`` — while rejecting incidental substrings
     like ``my-codex-acp-wrapper`` or ``/opt/shims/not-codex-acp`` that a plain
     substring test would misattribute.
diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py
index 34eba1e693..24eb2280af 100644
--- a/tests/sdk/agent/test_acp_agent.py
+++ b/tests/sdk/agent/test_acp_agent.py
@@ -9,6 +9,7 @@
 from base64 import urlsafe_b64encode
 from concurrent.futures import Future
 from pathlib import Path
+from types import SimpleNamespace
 from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
 
@@ -33,6 +34,7 @@
     _reapply_session_model_on_resume,
     _select_auth_method,
     _serialize_tool_content,
+    _session_selects_model_via_config_option,
     _strip_inherited_npm_env,
 )
 from openhands.sdk.agent.acp_models import ACPModelInfo
@@ -4426,41 +4428,49 @@ def test_noop_when_caller_already_set_provider(self):
 
 class TestMaybeSetSessionModel:
     @pytest.mark.asyncio
-    async def test_codex_agent_uses_protocol_model_override(self):
+    async def test_set_session_model_mechanism(self):
+        # ``via_config_option=False`` (gemini-cli, older codex/claude with the
+        # ``models`` capability) applies the model via ``set_session_model``.
         conn = AsyncMock()
         applied = await _maybe_set_session_model(
-            conn, "codex-acp", "session-1", "gpt-5.4"
+            conn, "codex-acp", "session-1", "gpt-5.4", via_config_option=False
         )
         conn.set_session_model.assert_awaited_once_with(
             model_id="gpt-5.4",
             session_id="session-1",
         )
+        conn.set_config_option.assert_not_called()
         # The override was actually pushed to the server via the protocol call.
         assert applied is True
 
     @pytest.mark.asyncio
-    async def test_claude_agent_uses_protocol_model_override(self):
-        # claude-agent-acp 0.30.0 silently ignores the session-_meta model
-        # selection (#3654), so the init path pushes the model via the same
-        # one-shot set_session_model call as codex/gemini.
+    async def test_set_config_option_mechanism(self):
+        # ``via_config_option=True`` (codex-acp 0.16+, claude-agent-acp 0.46+)
+        # applies the model via ``set_config_option(configId="model")``.
         conn = AsyncMock()
         applied = await _maybe_set_session_model(
             conn,
             "claude-agent-acp",
             "session-1",
-            "claude-opus-4-8",
+            "opus[1m]",
+            via_config_option=True,
         )
-        conn.set_session_model.assert_awaited_once_with(
-            model_id="claude-opus-4-8",
+        conn.set_config_option.assert_awaited_once_with(
+            config_id="model",
+            value="opus[1m]",
             session_id="session-1",
         )
+        conn.set_session_model.assert_not_called()
         assert applied is True
 
     @pytest.mark.asyncio
     async def test_missing_model_skips_protocol_override(self):
         conn = AsyncMock()
-        applied = await _maybe_set_session_model(conn, "codex-acp", "session-1", None)
+        applied = await _maybe_set_session_model(
+            conn, "codex-acp", "session-1", None, via_config_option=True
+        )
         conn.set_session_model.assert_not_called()
+        conn.set_config_option.assert_not_called()
         assert applied is False
 
     @pytest.mark.asyncio
@@ -4469,7 +4479,7 @@ async def test_unknown_provider_uses_set_config_option_fallback(self):
         # for model selection, which is a standard ACP method.
         conn = AsyncMock()
         applied = await _maybe_set_session_model(
-            conn, "devin-cli", "session-1", "kimi-k2-6"
+            conn, "devin-cli", "session-1", "kimi-k2-6", via_config_option=False
         )
         conn.set_session_model.assert_not_called()
         conn.set_config_option.assert_awaited_once_with(
@@ -4488,7 +4498,7 @@ async def test_unknown_provider_set_config_option_failure_is_tolerated(self):
             code=-32601, message="method not found"
         )
         applied = await _maybe_set_session_model(
-            conn, "some-custom-acp", "session-1", "whatever"
+            conn, "some-custom-acp", "session-1", "whatever", via_config_option=False
         )
         conn.set_session_model.assert_not_called()
         conn.set_config_option.assert_awaited_once()
@@ -4499,12 +4509,18 @@ class TestReapplySessionModelOnResume:
     """Resume reapplies the persisted model via the runtime-switch gate."""
 
     @pytest.mark.asyncio
-    async def test_claude_reapplies_persisted_model_on_resume(self):
+    async def test_reapply_via_set_session_model_mechanism(self):
         # load_session() carries no model selection, so on resume the persisted
-        # model must be reapplied via the runtime-switch gate.
+        # model must be reapplied via the runtime-switch gate. Servers with the
+        # ``models`` capability (gemini-cli, older codex/claude) use
+        # ``set_session_model``.
         conn = AsyncMock()
         applied = await _reapply_session_model_on_resume(
-            conn, "claude-agent-acp", "sess-1", "claude-haiku-4-5-20251001"
+            conn,
+            "claude-agent-acp",
+            "sess-1",
+            "claude-haiku-4-5-20251001",
+            via_config_option=False,
         )
         conn.set_session_model.assert_awaited_once_with(
             model_id="claude-haiku-4-5-20251001", session_id="sess-1"
@@ -4512,23 +4528,27 @@ async def test_claude_reapplies_persisted_model_on_resume(self):
         assert applied is True
 
     @pytest.mark.asyncio
-    async def test_codex_reapplies_persisted_model_on_resume(self):
+    async def test_reapply_via_set_config_option_mechanism(self):
+        # codex-acp 0.16+/claude-agent-acp 0.46+ reapply via
+        # ``set_config_option(configId="model")``.
         conn = AsyncMock()
         applied = await _reapply_session_model_on_resume(
-            conn, "codex-acp", "sess-1", "gpt-5.4/low"
+            conn, "codex-acp", "sess-1", "gpt-5.4/low", via_config_option=True
         )
-        conn.set_session_model.assert_awaited_once_with(
-            model_id="gpt-5.4/low", session_id="sess-1"
+        conn.set_config_option.assert_awaited_once_with(
+            config_id="model", value="gpt-5.4/low", session_id="sess-1"
         )
+        conn.set_session_model.assert_not_called()
         assert applied is True
 
     @pytest.mark.asyncio
     async def test_missing_model_skips_reapply(self):
         conn = AsyncMock()
         applied = await _reapply_session_model_on_resume(
-            conn, "claude-agent-acp", "sess-1", None
+            conn, "claude-agent-acp", "sess-1", None, via_config_option=True
         )
         conn.set_session_model.assert_not_called()
+        conn.set_config_option.assert_not_called()
         assert applied is False
 
     @pytest.mark.asyncio
@@ -4537,7 +4557,7 @@ async def test_unknown_provider_attempts_reapply_via_set_config_option(self):
         # as a fallback, which is a standard ACP method.
         conn = AsyncMock()
         applied = await _reapply_session_model_on_resume(
-            conn, "devin-cli", "sess-1", "kimi-k2-6"
+            conn, "devin-cli", "sess-1", "kimi-k2-6", via_config_option=False
         )
         conn.set_session_model.assert_not_called()
         conn.set_config_option.assert_awaited_once_with(
@@ -4569,7 +4589,7 @@ async def test_known_unsupported_provider_skips_reapply(self):
             return_value=unsupported,
         ):
             applied = await _reapply_session_model_on_resume(
-                conn, "legacy-acp", "sess-1", "x"
+                conn, "legacy-acp", "sess-1", "x", via_config_option=False
             )
         conn.set_session_model.assert_not_called()
         assert applied is False
@@ -4584,7 +4604,7 @@ async def test_client_rejection_is_swallowed_on_resume(self):
             code=-32601, message="method not found"
         )
         applied = await _reapply_session_model_on_resume(
-            conn, "some-custom-acp", "sess-1", "whatever"
+            conn, "some-custom-acp", "sess-1", "whatever", via_config_option=False
         )
         conn.set_config_option.assert_awaited_once()
         # Rejected => the live session kept the server default, so the override
@@ -4601,7 +4621,7 @@ async def test_any_request_error_is_swallowed_on_resume(self):
             code=-32603, message="internal error"
         )
         applied = await _reapply_session_model_on_resume(
-            conn, "codex-acp", "sess-1", "gpt-5.4/low"
+            conn, "codex-acp", "sess-1", "gpt-5.4/low", via_config_option=False
         )
         conn.set_session_model.assert_awaited_once()
         assert applied is False
@@ -4650,29 +4670,47 @@ def test_set_acp_model_raises_when_no_live_session(self):
 
 
 class TestSetACPModel:
-    """Runtime (mid-conversation) model switching via set_session_model."""
+    """Runtime (mid-conversation) model switching via the session's mechanism."""
 
     @staticmethod
-    def _wire(agent: ACPAgent, agent_name: str) -> ACPAgent:
+    def _wire(
+        agent: ACPAgent, agent_name: str, *, via_config_option: bool = False
+    ) -> ACPAgent:
         agent._conn = MagicMock()
         agent._session_id = "sess-1"
         agent._agent_name = agent_name
+        agent._model_via_config_option = via_config_option
         executor = MagicMock()
         executor.run_async = MagicMock()
         agent._executor = executor
         return agent
 
     def test_switches_model_on_live_codex_session(self):
+        # Default (models-capability) session ⇒ set_session_model.
         agent = self._wire(_make_agent(), "codex-acp")
         agent.set_acp_model("gpt-5.4/low")
         agent._conn.set_session_model.assert_called_once_with(
             model_id="gpt-5.4/low", session_id="sess-1"
         )
+        agent._conn.set_config_option.assert_not_called()
         agent._executor.run_async.assert_called_once()
         # Sentinel LLM + metrics reflect the live model for cost/token tracking.
         assert agent.llm.model == "gpt-5.4/low"
         assert agent.llm.metrics.model_name == "gpt-5.4/low"
 
+    def test_switches_model_via_config_option_session(self):
+        # configOptions session (codex-acp 0.16+, claude-agent-acp 0.46+) ⇒
+        # set_config_option(configId="model").
+        agent = self._wire(_make_agent(), "codex-acp", via_config_option=True)
+        agent.set_acp_model("gpt-5.4/low")
+        agent._conn.set_config_option.assert_called_once_with(
+            config_id="model", value="gpt-5.4/low", session_id="sess-1"
+        )
+        agent._conn.set_session_model.assert_not_called()
+        agent._executor.run_async.assert_called_once()
+        assert agent.llm.model == "gpt-5.4/low"
+        assert agent._current_model_id == "gpt-5.4/low"
+
     def test_claude_provider_supports_runtime_switch(self):
         agent = self._wire(_make_agent(), "claude-agent-acp")
         agent.set_acp_model("claude-haiku-4-5-20251001")
@@ -5041,9 +5079,11 @@ def _fake_run_async(_coro, **_kwargs):
 class TestGeminiSessionModel:
     @pytest.mark.asyncio
     async def test_gemini_cli_uses_protocol_model_override(self):
+        # gemini-cli keeps the ``models`` capability, so model selection rides
+        # ``set_session_model`` (via_config_option=False).
         conn = AsyncMock()
         await _maybe_set_session_model(
-            conn, "gemini-cli", "session-1", "gemini-3-flash"
+            conn, "gemini-cli", "session-1", "gemini-3-flash", via_config_option=False
         )
         conn.set_session_model.assert_awaited_once_with(
             model_id="gemini-3-flash",
@@ -7033,6 +7073,120 @@ def test_drops_entries_without_usable_id(self):
         assert avail == [ACPModelInfo(model_id="good", name="Good", description=None)]
 
 
+class TestConfigOptionModelMechanism:
+    """Model selection via the ``model`` ``configOptions`` select.
+
+    codex-acp 0.16+ and claude-agent-acp 0.46+ dropped the UNSTABLE ``models``
+    capability + ``session/set_model`` in favour of a ``model`` config-option
+    select driven by ``session/set_config_option``. ``_extract_session_models``
+    reads that select, and ``_session_selects_model_via_config_option`` picks
+    the apply mechanism.
+    """
+
+    def _select(self, *, id="model", type="select", current_value, options):
+        opt = SimpleNamespace(
+            id=id, type=type, current_value=current_value, options=options
+        )
+        return opt
+
+    def _opt(self, value, name=None, description=None):
+        return SimpleNamespace(value=value, name=name, description=description)
+
+    def _response(self, *, models=None, config_options):
+        return SimpleNamespace(models=models, config_options=config_options)
+
+    def test_extracts_model_state_from_config_option(self):
+        # claude-agent-acp 0.46: model select with short aliases.
+        response = self._response(
+            config_options=[
+                self._select(
+                    current_value="opus[1m]",
+                    options=[
+                        self._opt("default", "Default (recommended)", "Opus 4.8 · 1M"),
+                        self._opt("opus[1m]", "Opus", "Opus 4.8 · 1M"),
+                        self._opt("sonnet", "Sonnet", "Sonnet 4.6"),
+                        self._opt("haiku", "Haiku", "Haiku 4.5"),
+                    ],
+                )
+            ],
+        )
+        cur, avail = _extract_session_models(response)
+        assert cur == "opus[1m]"
+        assert avail == [
+            ACPModelInfo(
+                model_id="default",
+                name="Default (recommended)",
+                description="Opus 4.8 · 1M",
+            ),
+            ACPModelInfo(model_id="opus[1m]", name="Opus", description="Opus 4.8 · 1M"),
+            ACPModelInfo(model_id="sonnet", name="Sonnet", description="Sonnet 4.6"),
+            ACPModelInfo(model_id="haiku", name="Haiku", description="Haiku 4.5"),
+        ]
+
+    def test_models_capability_wins_over_config_option(self):
+        # If a server somehow carries both, the ``models`` capability is used.
+        models = MagicMock()
+        models.current_model_id = "from-models"
+        models.available_models = []
+        response = self._response(
+            models=models,
+            config_options=[
+                self._select(
+                    current_value="from-config", options=[self._opt("from-config", "X")]
+                )
+            ],
+        )
+        cur, avail = _extract_session_models(response)
+        assert cur == "from-models"
+        assert avail == []
+        assert _session_selects_model_via_config_option(response) is False
+
+    def test_detects_config_option_mechanism(self):
+        response = self._response(
+            config_options=[
+                self._select(
+                    current_value="gpt-5.5", options=[self._opt("gpt-5.5", "GPT-5.5")]
+                )
+            ],
+        )
+        assert _session_selects_model_via_config_option(response) is True
+
+    def test_ignores_non_model_config_options(self):
+        # A ``mode`` select alongside no ``model`` select ⇒ old mechanism.
+        response = self._response(
+            config_options=[
+                SimpleNamespace(
+                    id="mode",
+                    type="select",
+                    current_value="default",
+                    options=[self._opt("default", "Default")],
+                ),
+            ],
+        )
+        assert _session_selects_model_via_config_option(response) is False
+        assert _extract_session_models(response) == (None, None)
+
+    def test_drops_options_without_usable_value(self):
+        response = self._response(
+            config_options=[
+                self._select(
+                    current_value="ok",
+                    options=[
+                        self._opt(value=42),  # non-string -> dropped
+                        self._opt("ok", "Ok"),
+                        self._opt(value="", name="Empty"),  # empty -> dropped
+                    ],
+                )
+            ],
+        )
+        cur, avail = _extract_session_models(response)
+        assert cur == "ok"
+        assert avail == [ACPModelInfo(model_id="ok", name="Ok", description=None)]
+
+    def test_none_response_is_not_config_option(self):
+        assert _session_selects_model_via_config_option(None) is False
+
+
 class TestACPAgentAvailableModelsProperty:
     """``available_models`` exposes the server's model list verbatim.
 
diff --git a/tests/sdk/settings/test_acp_providers.py b/tests/sdk/settings/test_acp_providers.py
index 6ab36b4bc7..7d0390c7a9 100644
--- a/tests/sdk/settings/test_acp_providers.py
+++ b/tests/sdk/settings/test_acp_providers.py
@@ -35,14 +35,14 @@ def test_claude_code_metadata(self):
         assert info.base_url_env_var == "ANTHROPIC_BASE_URL"
         assert info.default_session_mode == "bypassPermissions"
         assert "claude-agent" in info.agent_name_patterns
-        # Initial selection rides session/set_model — claude-agent-acp 0.30.0
-        # silently ignores the session-_meta payload (#3654), which is still
-        # sent as best-effort (session_meta_key below).
+        # Initial selection rides a protocol call — claude-agent-acp ignores the
+        # session-_meta payload (#3654), which is still sent as best-effort
+        # (session_meta_key below). On 0.46.0 the call is set_config_option.
         assert info.supports_set_session_model is True
         assert info.supports_runtime_model_switch is True
         assert info.session_meta_key == "claudeCode"
-        assert info.default_model == "claude-opus-4-8"
-        assert any(m.id == "claude-opus-4-8" for m in info.available_models)
+        assert info.default_model == "opus[1m]"
+        assert any(m.id == "opus[1m]" for m in info.available_models)
         # Pinned binary exposed by the agent-server image wrappers.
         assert info.binary_name == "claude-agent-acp"
         assert info.data_dir_env_var == "CLAUDE_CONFIG_DIR"
@@ -71,13 +71,13 @@ def test_gemini_cli_metadata(self):
         assert "--acp" in info.default_command
         assert info.api_key_env_var == "GEMINI_API_KEY"
         assert info.base_url_env_var == "GEMINI_BASE_URL"
-        assert info.default_session_mode == "yolo"
+        assert info.default_session_mode == "default"
         assert "gemini-cli" in info.agent_name_patterns
         assert info.supports_set_session_model is True
         assert info.supports_runtime_model_switch is True
         assert info.session_meta_key is None
-        assert info.default_model == "auto-gemini-2.5"
-        assert any(m.id == "auto-gemini-2.5" for m in info.available_models)
+        assert info.default_model == "auto"
+        assert any(m.id == "auto" for m in info.available_models)
         # The Gemini CLI's ACP binary is just ``gemini`` (the ``--acp`` flag is
         # a trailing arg, preserved by resolve_acp_command on rewrite).
         assert info.binary_name == "gemini"
diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py
index 9930c448f4..3f43ebcff4 100644
--- a/tests/sdk/test_settings.py
+++ b/tests/sdk/test_settings.py
@@ -883,7 +883,7 @@ def test_acp_create_agent_uses_server_default_command(
     assert agent.acp_command == [
         "npx",
         "-y",
-        "@agentclientprotocol/claude-agent-acp@0.30.0",
+        "@agentclientprotocol/claude-agent-acp@0.46.0",
     ]
     assert agent.acp_model == "claude-opus-4-6"
     # The authoritative provider key is carried onto the agent.
@@ -1034,7 +1034,7 @@ def test_acp_resolve_command_rewrites_versioned_npx_to_pinned_binary(
     monkeypatch.setattr(shutil, "which", _which_returning("codex-acp"))
     for pkg in (
         "@zed-industries/codex-acp",
-        "@zed-industries/codex-acp@0.15.0",
+        "@zed-industries/codex-acp@0.16.0",
         "@zed-industries/codex-acp@0.11.1",
     ):
         settings = ACPAgentSettings(
@@ -1057,7 +1057,7 @@ def test_acp_resolve_command_keeps_npx_when_binary_absent(
     assert settings.resolve_acp_command() == [
         "npx",
         "-y",
-        "@zed-industries/codex-acp@0.15.0",
+        "@zed-industries/codex-acp@0.16.0",
     ]
 
 

From 1f6b2b4d54ccd62c2e1d5f0f67b3e6c37888c4ef Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simon@openhands.dev>
Date: Wed, 17 Jun 2026 12:48:44 +0200
Subject: [PATCH 02/14] harden(acp): fall back to the other model-select
 mechanism on method-not-found
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Response-detection (`_session_selects_model_via_config_option`) picks
set_config_option vs set_session_model from the session response and is correct
for every validated CLI. But it reads an UNSTABLE capability whose shape can
vary by build/auth state, so a misdetect would crash session init or a model
switch with JSON-RPC -32601 "Method not found".

Add `_apply_acp_model_with_fallback`: if the chosen model-apply call returns
-32601, retry once with the other mechanism (and, for runtime switches, remember
the working one). Wired into init (`_maybe_set_session_model`) and
`ACPAgent.set_acp_model`; resume already tolerates rejection by keeping the
server default, so it's left as-is. A non-method-not-found error (e.g. -32602
invalid model) still propagates — it's a real client error, not a
wrong-mechanism signal.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../openhands/sdk/agent/acp_agent.py          | 116 ++++++++++++++----
 tests/sdk/agent/test_acp_agent.py             | 100 +++++++++++++++
 2 files changed, 195 insertions(+), 21 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py
index 3eed076262..aee51a308b 100644
--- a/openhands-sdk/openhands/sdk/agent/acp_agent.py
+++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py
@@ -140,6 +140,13 @@
 #          upstream model 500s, and transient infrastructure errors.
 _RETRIABLE_SERVER_ERROR_CODES: frozenset[int] = frozenset({-32603})
 
+# -32601 = "Method not found" (JSON-RPC spec). An ACP server raises this when a
+# model-selection call uses the mechanism it does *not* implement — e.g.
+# `session/set_model` on a CLI that moved model selection to `configOptions`
+# (codex-acp 0.16+, claude-agent-acp 0.46+), or vice versa. We use it to fall
+# back to the other mechanism if response-detection picked the wrong one.
+_METHOD_NOT_FOUND_CODE: Final[int] = -32601
+
 
 # Maximum characters for ACP tool call content — matches MAX_CMD_OUTPUT_SIZE
 # used by the terminal tool and the default max_message_chars in LLM config.
@@ -437,6 +444,48 @@ def _apply_acp_model(
     return conn.set_session_model(model_id=model, session_id=session_id)
 
 
+def _is_method_not_found(exc: ACPRequestError) -> bool:
+    """Whether ``exc`` is a JSON-RPC "method not found" — i.e. the server does
+    not implement the model-selection call we used."""
+    return exc.code == _METHOD_NOT_FOUND_CODE
+
+
+async def _apply_acp_model_with_fallback(
+    conn: ClientSideConnection,
+    session_id: str,
+    model: str,
+    *,
+    via_config_option: bool,
+) -> bool:
+    """Apply ``model`` via the detected mechanism, falling back to the other if
+    the server reports the method missing.
+
+    Response-detection (``_session_selects_model_via_config_option``) is correct
+    for every CLI we've validated, but it reads an UNSTABLE capability and the
+    response shape can vary by build/auth state. If the chosen call raises
+    ``-32601 "Method not found"``, the server simply uses the *other* mechanism,
+    so we retry with it instead of crashing session init. Returns the
+    ``via_config_option`` value that actually applied the model.
+    """
+    try:
+        await _apply_acp_model(
+            conn, session_id, model, via_config_option=via_config_option
+        )
+        return via_config_option
+    except ACPRequestError as exc:
+        if not _is_method_not_found(exc):
+            raise
+        logger.info(
+            "ACP model-apply via %s rejected as method-not-found; retrying via %s",
+            "set_config_option" if via_config_option else "set_session_model",
+            "set_session_model" if via_config_option else "set_config_option",
+        )
+        await _apply_acp_model(
+            conn, session_id, model, via_config_option=not via_config_option
+        )
+        return not via_config_option
+
+
 def _extract_session_models(
     response: Any,
 ) -> tuple[str | None, list[ACPModelInfo] | None]:
@@ -654,7 +703,7 @@ async def _maybe_set_session_model(
         return False
     provider = detect_acp_provider_by_agent_name(agent_name)
     if provider is not None and provider.supports_set_session_model:
-        await _apply_acp_model(
+        await _apply_acp_model_with_fallback(
             conn, session_id, acp_model, via_config_option=via_config_option
         )
         return True
@@ -727,7 +776,9 @@ async def _reapply_session_model_on_resume(
         if provider is not None:
             # Known provider: apply via the mechanism the resumed session uses
             # (set_config_option for codex-acp 0.16+/claude-agent-acp 0.46+,
-            # else set_session_model).
+            # else set_session_model). A rejection is already tolerated below
+            # (the session keeps the server default), so resume doesn't need the
+            # cross-mechanism fallback the init/switch paths use.
             await _apply_acp_model(
                 conn, session_id, acp_model, via_config_option=via_config_option
             )
@@ -3643,25 +3694,48 @@ def set_acp_model(self, model: str) -> None:
                 timeout=self.acp_prompt_timeout,
             )
         except ACPRequestError as e:
-            # Server-internal failures (JSON-RPC -32603) are not the caller's
-            # fault, and the prompt path already treats them as retriable. Let
-            # them propagate (-> 5xx) instead of mislabeling them as a 400
-            # client error.
-            if e.code in _RETRIABLE_SERVER_ERROR_CODES:
-                raise
-            # acp.exceptions.RequestError derives from Exception (not
-            # RuntimeError); surface a true client/protocol rejection (e.g.
-            # method-not-found, invalid model id) as a ValueError so callers —
-            # and the agent-server route — treat it as a 400-class client error
-            # rather than an opaque 500.
-            method = (
-                "set_config_option(model)"
-                if self._model_via_config_option
-                else "set_session_model"
-            )
-            raise ValueError(
-                f"ACP server rejected {method}(model={model!r}): {e}"
-            ) from e
+            pending_error: ACPRequestError | None = e
+            if _is_method_not_found(e):
+                # The session uses the other model-selection mechanism (or the
+                # init-time detection picked the wrong one): retry once with it,
+                # and remember the working mechanism for later switches. If the
+                # retry also fails, fall through to the error translation below.
+                flipped = not self._model_via_config_option
+                try:
+                    self._executor.run_async(
+                        _apply_acp_model(
+                            self._conn,
+                            self._session_id,
+                            model,
+                            via_config_option=flipped,
+                        ),
+                        timeout=self.acp_prompt_timeout,
+                    )
+                except ACPRequestError as e2:
+                    pending_error = e2
+                else:
+                    self._model_via_config_option = flipped
+                    pending_error = None  # both selections applied
+            if pending_error is not None:
+                # Server-internal failures (JSON-RPC -32603) are not the caller's
+                # fault, and the prompt path already treats them as retriable.
+                # Let them propagate (-> 5xx) instead of mislabeling them as a
+                # 400 client error.
+                if pending_error.code in _RETRIABLE_SERVER_ERROR_CODES:
+                    raise pending_error
+                # acp.exceptions.RequestError derives from Exception (not
+                # RuntimeError); surface a true client/protocol rejection (e.g.
+                # method-not-found on both mechanisms, invalid model id) as a
+                # ValueError so callers — and the agent-server route — treat it
+                # as a 400-class client error rather than an opaque 500.
+                method = (
+                    "set_config_option(model)"
+                    if self._model_via_config_option
+                    else "set_session_model"
+                )
+                raise ValueError(
+                    f"ACP server rejected {method}(model={model!r}): {pending_error}"
+                ) from pending_error
         # Reflect the live model on the sentinel LLM + metrics so cost/token
         # accounting and serialized state show the model actually in use
         # (mirrors model_post_init). The ``acp_model`` field is frozen, so the
diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py
index 24eb2280af..002003682b 100644
--- a/tests/sdk/agent/test_acp_agent.py
+++ b/tests/sdk/agent/test_acp_agent.py
@@ -20,6 +20,7 @@
 
 from openhands.sdk.agent.acp_agent import (
     ACPAgent,
+    _apply_acp_model_with_fallback,
     _classify_acp_init_error,
     _codex_auth_file,
     _codex_base_url_overrides,
@@ -27,6 +28,7 @@
     _extract_session_models,
     _extract_token_usage,
     _image_url_to_acp_block,
+    _is_method_not_found,
     _mask_json_value,
     _maybe_set_session_model,
     _mcp_config_to_acp_servers,
@@ -4711,6 +4713,33 @@ def test_switches_model_via_config_option_session(self):
         assert agent.llm.model == "gpt-5.4/low"
         assert agent._current_model_id == "gpt-5.4/low"
 
+    def test_switch_falls_back_on_method_not_found(self):
+        # Detected set_session_model, but the live server only has the
+        # configOptions mechanism: the first apply raises -32601, the switch
+        # retries with the other call and remembers it.
+        agent = self._wire(_make_agent(), "codex-acp", via_config_option=False)
+        agent._executor.run_async.side_effect = [
+            ACPRequestError(code=-32601, message="Method not found"),
+            None,  # retry with the flipped mechanism succeeds
+        ]
+        agent.set_acp_model("gpt-5.4/low")
+        assert agent._executor.run_async.call_count == 2
+        # The working mechanism is remembered for later switches.
+        assert agent._model_via_config_option is True
+        assert agent.llm.model == "gpt-5.4/low"
+        assert agent._current_model_id == "gpt-5.4/low"
+
+    def test_switch_invalid_model_does_not_fall_back(self):
+        # A -32602 invalid-params is a real client error, not a wrong-mechanism
+        # signal: surface it as ValueError without a second call.
+        agent = self._wire(_make_agent(), "codex-acp", via_config_option=True)
+        agent._executor.run_async.side_effect = ACPRequestError(
+            code=-32602, message="Invalid params"
+        )
+        with pytest.raises(ValueError, match="rejected set_config_option"):
+            agent.set_acp_model("nonsense")
+        agent._executor.run_async.assert_called_once()
+
     def test_claude_provider_supports_runtime_switch(self):
         agent = self._wire(_make_agent(), "claude-agent-acp")
         agent.set_acp_model("claude-haiku-4-5-20251001")
@@ -7187,6 +7216,77 @@ def test_none_response_is_not_config_option(self):
         assert _session_selects_model_via_config_option(None) is False
 
 
+class TestApplyAcpModelFallback:
+    """Model-apply falls back to the other mechanism on -32601 method-not-found.
+
+    Response-detection is correct for every validated CLI, but it reads an
+    UNSTABLE capability; a misdetect (or a server that moved between mechanisms)
+    must self-heal rather than crash session init / a switch.
+    """
+
+    def test_is_method_not_found(self):
+        assert _is_method_not_found(ACPRequestError(code=-32601, message="x")) is True
+        assert _is_method_not_found(ACPRequestError(code=-32603, message="x")) is False
+        assert _is_method_not_found(ACPRequestError(code=-32602, message="x")) is False
+
+    @pytest.mark.asyncio
+    async def test_config_option_falls_back_to_set_session_model(self):
+        # Detected configOptions, but the server only has set_session_model.
+        conn = AsyncMock()
+        conn.set_config_option.side_effect = ACPRequestError(
+            code=-32601, message="Method not found"
+        )
+        effective = await _apply_acp_model_with_fallback(
+            conn, "sess-1", "gemini-3-flash", via_config_option=True
+        )
+        conn.set_config_option.assert_awaited_once()
+        conn.set_session_model.assert_awaited_once_with(
+            model_id="gemini-3-flash", session_id="sess-1"
+        )
+        assert effective is False  # the mechanism that actually worked
+
+    @pytest.mark.asyncio
+    async def test_set_session_model_falls_back_to_config_option(self):
+        # Detected set_session_model, but the server moved to configOptions
+        # (codex 0.16 / claude 0.46 reached via a stale/degraded detection).
+        conn = AsyncMock()
+        conn.set_session_model.side_effect = ACPRequestError(
+            code=-32601, message="Method not found"
+        )
+        effective = await _apply_acp_model_with_fallback(
+            conn, "sess-1", "opus[1m]", via_config_option=False
+        )
+        conn.set_session_model.assert_awaited_once()
+        conn.set_config_option.assert_awaited_once_with(
+            config_id="model", value="opus[1m]", session_id="sess-1"
+        )
+        assert effective is True
+
+    @pytest.mark.asyncio
+    async def test_no_fallback_when_primary_succeeds(self):
+        conn = AsyncMock()
+        effective = await _apply_acp_model_with_fallback(
+            conn, "sess-1", "sonnet", via_config_option=True
+        )
+        conn.set_config_option.assert_awaited_once()
+        conn.set_session_model.assert_not_called()
+        assert effective is True
+
+    @pytest.mark.asyncio
+    async def test_non_method_not_found_error_propagates(self):
+        # An invalid-model / server error is NOT a wrong-mechanism signal — it
+        # must propagate, not silently try the other call.
+        conn = AsyncMock()
+        conn.set_config_option.side_effect = ACPRequestError(
+            code=-32602, message="Invalid params"
+        )
+        with pytest.raises(ACPRequestError):
+            await _apply_acp_model_with_fallback(
+                conn, "sess-1", "bad", via_config_option=True
+            )
+        conn.set_session_model.assert_not_called()
+
+
 class TestACPAgentAvailableModelsProperty:
     """``available_models`` exposes the server's model list verbatim.
 

From 1f90820cf3789a73c88a2369b547dca69bd10ded Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simon@openhands.dev>
Date: Wed, 17 Jun 2026 13:04:03 +0200
Subject: [PATCH 03/14] test(acp): detect mechanism on real captured
 session/new payloads
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address the QA "mocks don't catch the real shape" gap and refute the
hypothesised `config_options is None` field-name mismatch: parse the actual
claude-agent-acp 0.46.0 / codex-acp 0.16.0 / gemini-cli 0.46.0 `session/new`
wire payloads (captured from the pinned binaries) through the real
`acp.schema.NewSessionResponse`, then assert the detection + extraction:

- claude 0.46 / codex 0.16: `models: null` + a `model` configOptions select ⇒
  `_session_selects_model_via_config_option` is True; current/available read
  from the select (opus[1m] / gpt-5.5).
- gemini 0.46: `models` capability present, no model configOption ⇒ False
  (set_session_model), models read from `availableModels`.

These exercise the genuine alias mapping (`configOptions`, `currentValue`,
`modelId`), so any field-name/access-path drift fails the test.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 tests/sdk/agent/test_acp_agent.py | 123 +++++++++++++++++++++++++++++-
 1 file changed, 122 insertions(+), 1 deletion(-)

diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py
index 002003682b..0b8528b21a 100644
--- a/tests/sdk/agent/test_acp_agent.py
+++ b/tests/sdk/agent/test_acp_agent.py
@@ -15,7 +15,7 @@
 
 import pytest
 from acp.exceptions import RequestError as ACPRequestError
-from acp.schema import PromptResponse
+from acp.schema import NewSessionResponse, PromptResponse
 from pydantic import Field, SecretStr
 
 from openhands.sdk.agent.acp_agent import (
@@ -7216,6 +7216,127 @@ def test_none_response_is_not_config_option(self):
         assert _session_selects_model_via_config_option(None) is False
 
 
+# Real ``session/new`` payloads (the wire shape, by-alias) captured from the
+# pinned CLIs and round-tripped through ``acp.schema``. Parsed here through the
+# real ``NewSessionResponse`` — not a hand-built mock — so the test catches any
+# field-name / access-path drift in detection + extraction (e.g. ``models``
+# vs ``configOptions``, ``currentValue``, ``modelId``).
+_CLAUDE_046_SESSION = {
+    "sessionId": "sess-claude",
+    "models": None,
+    "configOptions": [
+        {
+            "id": "mode",
+            "name": "Mode",
+            "type": "select",
+            "currentValue": "default",
+            "options": [{"value": "default", "name": "Default"}],
+        },
+        {
+            "id": "model",
+            "name": "Model",
+            "type": "select",
+            "category": "model",
+            "currentValue": "opus[1m]",
+            "options": [
+                {"value": "default", "name": "Default (recommended)"},
+                {"value": "opus[1m]", "name": "Opus"},
+                {"value": "sonnet", "name": "Sonnet"},
+                {"value": "haiku", "name": "Haiku"},
+            ],
+        },
+        {
+            "id": "effort",
+            "name": "Thinking",
+            "type": "select",
+            "currentValue": "xhigh",
+            "options": [{"value": "xhigh", "name": "Xhigh"}],
+        },
+    ],
+}
+_CODEX_016_SESSION = {
+    "sessionId": "sess-codex",
+    "models": None,
+    "configOptions": [
+        {
+            "id": "mode",
+            "name": "Approval Preset",
+            "type": "select",
+            "currentValue": "read-only",
+            "options": [{"value": "read-only", "name": "Read Only"}],
+        },
+        {
+            "id": "model",
+            "name": "Model",
+            "type": "select",
+            "category": "model",
+            "currentValue": "gpt-5.5",
+            "options": [
+                {"value": "gpt-5.5", "name": "GPT-5.5"},
+                {"value": "gpt-5.4", "name": "GPT-5.4"},
+                {"value": "gpt-5.4-mini", "name": "GPT-5.4-Mini"},
+            ],
+        },
+        {
+            "id": "reasoning_effort",
+            "name": "Reasoning Effort",
+            "type": "select",
+            "currentValue": "xhigh",
+            "options": [{"value": "xhigh", "name": "Xhigh"}],
+        },
+    ],
+}
+_GEMINI_046_SESSION = {
+    "sessionId": "sess-gemini",
+    "models": {
+        "currentModelId": "gemini-3-flash-preview",
+        "availableModels": [
+            {"modelId": "auto", "name": "Auto"},
+            {"modelId": "gemini-3-pro-preview", "name": "gemini-3-pro-preview"},
+            {"modelId": "gemini-3-flash-preview", "name": "gemini-3-flash-preview"},
+            {"modelId": "gemini-2.5-pro", "name": "gemini-2.5-pro"},
+            {"modelId": "gemini-2.5-flash", "name": "gemini-2.5-flash"},
+            {"modelId": "gemini-3.1-flash-lite", "name": "gemini-3.1-flash-lite"},
+        ],
+    },
+    "configOptions": None,
+}
+
+
+class TestDetectionAgainstRealSessionResponses:
+    """Detection + extraction against the real CLI ``session/new`` wire shapes,
+    parsed through the actual ``acp.schema.NewSessionResponse`` (not a mock).
+
+    Guards the exact failure the QA bot hypothesised — that
+    ``getattr(response, "config_options")`` could be ``None`` due to a field-name
+    mismatch — by exercising the genuine alias-mapped schema.
+    """
+
+    def test_claude_046_uses_config_option(self):
+        resp = NewSessionResponse.model_validate(_CLAUDE_046_SESSION)
+        assert _session_selects_model_via_config_option(resp) is True
+        cur, avail = _extract_session_models(resp)
+        assert cur == "opus[1m]"
+        assert avail is not None
+        assert [m.model_id for m in avail] == ["default", "opus[1m]", "sonnet", "haiku"]
+
+    def test_codex_016_uses_config_option(self):
+        resp = NewSessionResponse.model_validate(_CODEX_016_SESSION)
+        assert _session_selects_model_via_config_option(resp) is True
+        cur, avail = _extract_session_models(resp)
+        assert cur == "gpt-5.5"
+        assert avail is not None
+        assert [m.model_id for m in avail] == ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"]
+
+    def test_gemini_046_uses_set_session_model(self):
+        resp = NewSessionResponse.model_validate(_GEMINI_046_SESSION)
+        assert _session_selects_model_via_config_option(resp) is False
+        cur, avail = _extract_session_models(resp)
+        assert cur == "gemini-3-flash-preview"
+        assert avail is not None
+        assert "gemini-3-pro-preview" in [m.model_id for m in avail]
+
+
 class TestApplyAcpModelFallback:
     """Model-apply falls back to the other mechanism on -32601 method-not-found.
 

From 29405ab35c4890e936dcba65a959a50bd7524a54 Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simon@openhands.dev>
Date: Wed, 17 Jun 2026 13:30:11 +0200
Subject: [PATCH 04/14] test(acp): make real-shape detection tests robust under
 xdist
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous revision built the session responses with
`NewSessionResponse.model_validate(<dict>)`, which re-parses the
`config_options` discriminated union (`SessionConfigOptionSelect |
SessionConfigOptionBoolean`). Under the full `-n auto` suite that path is
sensitive to a pre-existing cross-test interaction (a sibling test mutates
pydantic discriminated-union state), so the claude/codex cases failed in CI
while passing in isolation; the gemini case (plain `models`, no union) passed.

Construct the responses from already-instantiated `acp.schema` objects instead.
`NewSessionResponse.model_config` is `revalidate_instances="never"`, so passing
instances skips the union re-validation entirely — deterministic regardless of
test order — while still asserting detection/extraction against the real schema
types (real `config_options` / `current_value` / `model_id` fields). Adds an
explicit `config_options` field-name assertion (no parsing) that directly
refutes the "field name mismatch" hypothesis.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 tests/sdk/agent/test_acp_agent.py | 178 +++++++++++++++---------------
 1 file changed, 87 insertions(+), 91 deletions(-)

diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py
index 0b8528b21a..6029163ac4 100644
--- a/tests/sdk/agent/test_acp_agent.py
+++ b/tests/sdk/agent/test_acp_agent.py
@@ -15,7 +15,14 @@
 
 import pytest
 from acp.exceptions import RequestError as ACPRequestError
-from acp.schema import NewSessionResponse, PromptResponse
+from acp.schema import (
+    ModelInfo,
+    NewSessionResponse,
+    PromptResponse,
+    SessionConfigOptionSelect,
+    SessionConfigSelectOption,
+    SessionModelState,
+)
 from pydantic import Field, SecretStr
 
 from openhands.sdk.agent.acp_agent import (
@@ -7216,104 +7223,93 @@ def test_none_response_is_not_config_option(self):
         assert _session_selects_model_via_config_option(None) is False
 
 
-# Real ``session/new`` payloads (the wire shape, by-alias) captured from the
-# pinned CLIs and round-tripped through ``acp.schema``. Parsed here through the
-# real ``NewSessionResponse`` — not a hand-built mock — so the test catches any
-# field-name / access-path drift in detection + extraction (e.g. ``models``
-# vs ``configOptions``, ``currentValue``, ``modelId``).
-_CLAUDE_046_SESSION = {
-    "sessionId": "sess-claude",
-    "models": None,
-    "configOptions": [
-        {
-            "id": "mode",
-            "name": "Mode",
-            "type": "select",
-            "currentValue": "default",
-            "options": [{"value": "default", "name": "Default"}],
-        },
-        {
-            "id": "model",
-            "name": "Model",
-            "type": "select",
-            "category": "model",
-            "currentValue": "opus[1m]",
-            "options": [
-                {"value": "default", "name": "Default (recommended)"},
-                {"value": "opus[1m]", "name": "Opus"},
-                {"value": "sonnet", "name": "Sonnet"},
-                {"value": "haiku", "name": "Haiku"},
-            ],
-        },
-        {
-            "id": "effort",
-            "name": "Thinking",
-            "type": "select",
-            "currentValue": "xhigh",
-            "options": [{"value": "xhigh", "name": "Xhigh"}],
-        },
-    ],
-}
-_CODEX_016_SESSION = {
-    "sessionId": "sess-codex",
-    "models": None,
-    "configOptions": [
-        {
-            "id": "mode",
-            "name": "Approval Preset",
-            "type": "select",
-            "currentValue": "read-only",
-            "options": [{"value": "read-only", "name": "Read Only"}],
-        },
-        {
-            "id": "model",
-            "name": "Model",
-            "type": "select",
-            "category": "model",
-            "currentValue": "gpt-5.5",
-            "options": [
-                {"value": "gpt-5.5", "name": "GPT-5.5"},
-                {"value": "gpt-5.4", "name": "GPT-5.4"},
-                {"value": "gpt-5.4-mini", "name": "GPT-5.4-Mini"},
-            ],
-        },
-        {
-            "id": "reasoning_effort",
-            "name": "Reasoning Effort",
-            "type": "select",
-            "currentValue": "xhigh",
-            "options": [{"value": "xhigh", "name": "Xhigh"}],
-        },
-    ],
-}
-_GEMINI_046_SESSION = {
-    "sessionId": "sess-gemini",
-    "models": {
-        "currentModelId": "gemini-3-flash-preview",
-        "availableModels": [
-            {"modelId": "auto", "name": "Auto"},
-            {"modelId": "gemini-3-pro-preview", "name": "gemini-3-pro-preview"},
-            {"modelId": "gemini-3-flash-preview", "name": "gemini-3-flash-preview"},
-            {"modelId": "gemini-2.5-pro", "name": "gemini-2.5-pro"},
-            {"modelId": "gemini-2.5-flash", "name": "gemini-2.5-flash"},
-            {"modelId": "gemini-3.1-flash-lite", "name": "gemini-3.1-flash-lite"},
+def _select(opt_id, current, values, category=None):
+    return SessionConfigOptionSelect(
+        id=opt_id,
+        name=opt_id,
+        type="select",
+        category=category,
+        current_value=current,
+        options=[SessionConfigSelectOption(value=v, name=v) for v in values],
+    )
+
+
+def _claude_046_session() -> NewSessionResponse:
+    # claude-agent-acp 0.46.0: ``models`` dropped; model is a configOptions
+    # select alongside ``mode``/``effort``.
+    return NewSessionResponse(
+        session_id="sess-claude",
+        models=None,
+        config_options=[
+            _select("mode", "default", ["default", "acceptEdits"]),
+            _select(
+                "model",
+                "opus[1m]",
+                ["default", "opus[1m]", "sonnet", "haiku"],
+                category="model",
+            ),
+            _select("effort", "xhigh", ["xhigh", "low"]),
         ],
-    },
-    "configOptions": None,
-}
+    )
+
+
+def _codex_016_session() -> NewSessionResponse:
+    # codex-acp 0.16.0: same shape, combined model ids stay in the select.
+    return NewSessionResponse(
+        session_id="sess-codex",
+        models=None,
+        config_options=[
+            _select("mode", "read-only", ["read-only", "full-access"]),
+            _select(
+                "model",
+                "gpt-5.5",
+                ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"],
+                category="model",
+            ),
+            _select("reasoning_effort", "xhigh", ["xhigh", "low"]),
+        ],
+    )
+
+
+def _gemini_046_session() -> NewSessionResponse:
+    # gemini-cli 0.46.0: keeps the ``models`` capability, no model configOption.
+    return NewSessionResponse(
+        session_id="sess-gemini",
+        config_options=None,
+        models=SessionModelState(
+            current_model_id="gemini-3-flash-preview",
+            available_models=[
+                ModelInfo(model_id=mid, name=mid)
+                for mid in (
+                    "auto",
+                    "gemini-3-pro-preview",
+                    "gemini-3-flash-preview",
+                    "gemini-2.5-pro",
+                    "gemini-2.5-flash",
+                    "gemini-3.1-flash-lite",
+                )
+            ],
+        ),
+    )
 
 
 class TestDetectionAgainstRealSessionResponses:
-    """Detection + extraction against the real CLI ``session/new`` wire shapes,
-    parsed through the actual ``acp.schema.NewSessionResponse`` (not a mock).
+    """Detection + extraction against the real CLI ``session/new`` shapes built
+    from the genuine ``acp.schema`` types (not a hand-rolled mock).
 
     Guards the exact failure the QA bot hypothesised — that
     ``getattr(response, "config_options")`` could be ``None`` due to a field-name
-    mismatch — by exercising the genuine alias-mapped schema.
+    mismatch — by reading the real schema field (``config_options`` /
+    ``current_value`` / ``model_id``).
     """
 
+    def test_config_options_is_the_schema_field_name(self):
+        # The SDK reads ``response.config_options``; pin that this is the real
+        # field on the schema (the bot hypothesised a name mismatch).
+        assert "config_options" in NewSessionResponse.model_fields
+
     def test_claude_046_uses_config_option(self):
-        resp = NewSessionResponse.model_validate(_CLAUDE_046_SESSION)
+        resp = _claude_046_session()
         assert _session_selects_model_via_config_option(resp) is True
         cur, avail = _extract_session_models(resp)
         assert cur == "opus[1m]"
@@ -7321,7 +7317,7 @@ def test_claude_046_uses_config_option(self):
         assert [m.model_id for m in avail] == ["default", "opus[1m]", "sonnet", "haiku"]
 
     def test_codex_016_uses_config_option(self):
-        resp = NewSessionResponse.model_validate(_CODEX_016_SESSION)
+        resp = _codex_016_session()
         assert _session_selects_model_via_config_option(resp) is True
         cur, avail = _extract_session_models(resp)
         assert cur == "gpt-5.5"
@@ -7329,7 +7325,7 @@ def test_codex_016_uses_config_option(self):
         assert [m.model_id for m in avail] == ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"]
 
     def test_gemini_046_uses_set_session_model(self):
-        resp = NewSessionResponse.model_validate(_GEMINI_046_SESSION)
+        resp = _gemini_046_session()
         assert _session_selects_model_via_config_option(resp) is False
         cur, avail = _extract_session_models(resp)
         assert cur == "gemini-3-flash-preview"

From 4c949562db07899b307cb803cb9a4399194c0a31 Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simon@openhands.dev>
Date: Wed, 17 Jun 2026 13:52:35 +0200
Subject: [PATCH 05/14] fix(acp): unwrap SessionConfigOption RootModel for
 model detection (acp 0.8.x)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The shipped `agent-client-protocol` lib (0.8.1, pinned in uv.lock / the
agent-server image) wraps each `session/new` `configOptions` entry in a
`SessionConfigOption` RootModel (`class SessionConfigOption(RootModel[
SessionConfigOptionSelect])`), so `opt.id` / `opt.type` are `None` and must be
read off `opt.root`. `_model_config_option` read them directly, so
`_session_selects_model_via_config_option` returned `False` for claude-agent-acp
0.46 / codex-acp 0.16 — the SDK then called the removed `session/set_model` and
crashed init with "Method not found". (0.10.x lists the union members directly,
which is why local runs against a drifted venv passed; CI/production run 0.8.1.)

Unwrap `getattr(raw, "root", raw)` so detection works on 0.8.x and 0.10.x. Tests
now parse the real wire payloads through `NewSessionResponse` (exercising the
0.8.x RootModel wrapper) plus an explicit `.root`-unwrap unit test. Re-validated
e2e against acp 0.8.1 + the pinned binaries: claude 0.46 and codex 0.16 both
detect `via_config_option=True`, write a file, and track a live model switch.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../openhands/sdk/agent/acp_agent.py          |   8 +-
 tests/sdk/agent/test_acp_agent.py             | 174 +++++++++---------
 2 files changed, 96 insertions(+), 86 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py
index aee51a308b..58c4810e80 100644
--- a/openhands-sdk/openhands/sdk/agent/acp_agent.py
+++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py
@@ -394,8 +394,14 @@ def _model_config_option(response: Any) -> Any | None:
     ``session/set_model``. Returns that option (carrying ``options`` and
     ``current_value``) or ``None`` when the server uses neither / the old
     mechanism. ``getattr`` keeps it tolerant of partial structures.
+
+    The ``agent-client-protocol`` Python lib wraps each entry in a
+    ``SessionConfigOption`` ``RootModel`` on 0.8.x (access via ``.root``) but
+    lists the union members directly on 0.10.x; unwrap ``.root`` so detection
+    works on either.
     """
-    for opt in getattr(response, "config_options", None) or []:
+    for raw in getattr(response, "config_options", None) or []:
+        opt = getattr(raw, "root", raw)
         if (
             getattr(opt, "type", None) == "select"
             and getattr(opt, "id", None) == _MODEL_CONFIG_OPTION_ID
diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py
index 6029163ac4..1119e322bf 100644
--- a/tests/sdk/agent/test_acp_agent.py
+++ b/tests/sdk/agent/test_acp_agent.py
@@ -15,14 +15,7 @@
 
 import pytest
 from acp.exceptions import RequestError as ACPRequestError
-from acp.schema import (
-    ModelInfo,
-    NewSessionResponse,
-    PromptResponse,
-    SessionConfigOptionSelect,
-    SessionConfigSelectOption,
-    SessionModelState,
-)
+from acp.schema import NewSessionResponse, PromptResponse
 from pydantic import Field, SecretStr
 
 from openhands.sdk.agent.acp_agent import (
@@ -7159,6 +7152,22 @@ def test_extracts_model_state_from_config_option(self):
             ACPModelInfo(model_id="haiku", name="Haiku", description="Haiku 4.5"),
         ]
 
+    def test_unwraps_rootmodel_wrapped_config_option(self):
+        # agent-client-protocol 0.8.x wraps each entry in a SessionConfigOption
+        # RootModel (the option lives under ``.root``); detection must unwrap it,
+        # else ``opt.id`` is None and the SDK wrongly falls back to set_model.
+        select = self._select(
+            current_value="opus[1m]",
+            options=[self._opt("opus[1m]", "Opus"), self._opt("sonnet", "Sonnet")],
+        )
+        wrapped = SimpleNamespace(root=select)  # mimics the RootModel wrapper
+        response = self._response(config_options=[wrapped])
+        assert _session_selects_model_via_config_option(response) is True
+        cur, avail = _extract_session_models(response)
+        assert cur == "opus[1m]"
+        assert avail is not None
+        assert [m.model_id for m in avail] == ["opus[1m]", "sonnet"]
+
     def test_models_capability_wins_over_config_option(self):
         # If a server somehow carries both, the ``models`` capability is used.
         models = MagicMock()
@@ -7223,93 +7232,88 @@ def test_none_response_is_not_config_option(self):
         assert _session_selects_model_via_config_option(None) is False
 
 
-def _select(opt_id, current, values, category=None):
-    return SessionConfigOptionSelect(
-        id=opt_id,
-        name=opt_id,
-        type="select",
-        category=category,
-        current_value=current,
-        options=[SessionConfigSelectOption(value=v, name=v) for v in values],
-    )
-
-
-def _claude_046_session() -> NewSessionResponse:
-    # claude-agent-acp 0.46.0: ``models`` dropped; model is a configOptions
-    # select alongside ``mode``/``effort``.
-    return NewSessionResponse(
-        session_id="sess-claude",
-        models=None,
-        config_options=[
-            _select("mode", "default", ["default", "acceptEdits"]),
-            _select(
-                "model",
-                "opus[1m]",
-                ["default", "opus[1m]", "sonnet", "haiku"],
-                category="model",
-            ),
-            _select("effort", "xhigh", ["xhigh", "low"]),
-        ],
-    )
-
-
-def _codex_016_session() -> NewSessionResponse:
-    # codex-acp 0.16.0: same shape, combined model ids stay in the select.
-    return NewSessionResponse(
-        session_id="sess-codex",
-        models=None,
-        config_options=[
-            _select("mode", "read-only", ["read-only", "full-access"]),
-            _select(
-                "model",
-                "gpt-5.5",
-                ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"],
-                category="model",
-            ),
-            _select("reasoning_effort", "xhigh", ["xhigh", "low"]),
-        ],
-    )
-
-
-def _gemini_046_session() -> NewSessionResponse:
-    # gemini-cli 0.46.0: keeps the ``models`` capability, no model configOption.
-    return NewSessionResponse(
-        session_id="sess-gemini",
-        config_options=None,
-        models=SessionModelState(
-            current_model_id="gemini-3-flash-preview",
-            available_models=[
-                ModelInfo(model_id=mid, name=mid)
-                for mid in (
-                    "auto",
-                    "gemini-3-pro-preview",
-                    "gemini-3-flash-preview",
-                    "gemini-2.5-pro",
-                    "gemini-2.5-flash",
-                    "gemini-3.1-flash-lite",
-                )
-            ],
+# Real ``session/new`` wire payloads (by-alias) captured from the pinned CLIs.
+# Parsed through the real ``NewSessionResponse`` so the test exercises the
+# genuine schema, INCLUDING the ``SessionConfigOption`` RootModel wrapper the
+# ``agent-client-protocol`` lib applies to ``config_options`` on 0.8.x (where a
+# naive ``opt.id`` read returns ``None``). This is the exact failure the QA bot
+# surfaced; the detection helper unwraps ``.root`` so it works on 0.8.x + 0.10.x.
+def _select_dict(opt_id, current, values, category=None):
+    return {
+        "id": opt_id,
+        "name": opt_id,
+        "type": "select",
+        "category": category,
+        "currentValue": current,
+        "options": [{"value": v, "name": v} for v in values],
+    }
+
+
+_CLAUDE_046_SESSION = {
+    "sessionId": "sess-claude",
+    "models": None,
+    "configOptions": [
+        _select_dict("mode", "default", ["default", "acceptEdits"]),
+        _select_dict(
+            "model",
+            "opus[1m]",
+            ["default", "opus[1m]", "sonnet", "haiku"],
+            category="model",
         ),
-    )
+        _select_dict("effort", "xhigh", ["xhigh", "low"]),
+    ],
+}
+_CODEX_016_SESSION = {
+    "sessionId": "sess-codex",
+    "models": None,
+    "configOptions": [
+        _select_dict("mode", "read-only", ["read-only", "full-access"]),
+        _select_dict(
+            "model",
+            "gpt-5.5",
+            ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"],
+            category="model",
+        ),
+        _select_dict("reasoning_effort", "xhigh", ["xhigh", "low"]),
+    ],
+}
+_GEMINI_046_SESSION = {
+    "sessionId": "sess-gemini",
+    "configOptions": None,
+    "models": {
+        "currentModelId": "gemini-3-flash-preview",
+        "availableModels": [
+            {"modelId": mid, "name": mid}
+            for mid in (
+                "auto",
+                "gemini-3-pro-preview",
+                "gemini-3-flash-preview",
+                "gemini-2.5-pro",
+                "gemini-2.5-flash",
+                "gemini-3.1-flash-lite",
+            )
+        ],
+    },
+}
 
 
 class TestDetectionAgainstRealSessionResponses:
-    """Detection + extraction against the real CLI ``session/new`` shapes built
-    from the genuine ``acp.schema`` types (not a hand-rolled mock).
+    """Detection + extraction against the real CLI ``session/new`` wire shapes,
+    parsed through the actual ``acp.schema.NewSessionResponse``.
 
-    Guards the exact failure the QA bot hypothesised — that
-    ``getattr(response, "config_options")`` could be ``None`` due to a field-name
-    mismatch — by reading the real schema field (``config_options`` /
-    ``current_value`` / ``model_id``).
+    Guards the real production bug the QA bot surfaced: on
+    ``agent-client-protocol`` 0.8.x each ``config_options`` entry is a
+    ``SessionConfigOption`` RootModel (``.root``), so a naive ``opt.id`` read is
+    ``None`` and the SDK would fall back to the removed ``session/set_model``.
     """
 
     def test_config_options_is_the_schema_field_name(self):
         # The SDK reads ``response.config_options``; pin that this is the real
-        # field on the schema (the bot hypothesised a name mismatch).
+        # field on the schema.
         assert "config_options" in NewSessionResponse.model_fields
 
     def test_claude_046_uses_config_option(self):
-        resp = _claude_046_session()
+        resp = NewSessionResponse.model_validate(_CLAUDE_046_SESSION)
         assert _session_selects_model_via_config_option(resp) is True
         cur, avail = _extract_session_models(resp)
         assert cur == "opus[1m]"
@@ -7317,7 +7321,7 @@ def test_claude_046_uses_config_option(self):
         assert [m.model_id for m in avail] == ["default", "opus[1m]", "sonnet", "haiku"]
 
     def test_codex_016_uses_config_option(self):
-        resp = _codex_016_session()
+        resp = NewSessionResponse.model_validate(_CODEX_016_SESSION)
         assert _session_selects_model_via_config_option(resp) is True
         cur, avail = _extract_session_models(resp)
         assert cur == "gpt-5.5"
@@ -7325,7 +7329,7 @@ def test_codex_016_uses_config_option(self):
         assert [m.model_id for m in avail] == ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"]
 
     def test_gemini_046_uses_set_session_model(self):
-        resp = _gemini_046_session()
+        resp = NewSessionResponse.model_validate(_GEMINI_046_SESSION)
         assert _session_selects_model_via_config_option(resp) is False
         cur, avail = _extract_session_models(resp)
         assert cur == "gemini-3-flash-preview"

From b7d5dfc46df23cf11d11be97918e8304ac6167d5 Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simon@openhands.dev>
Date: Wed, 17 Jun 2026 15:17:28 +0200
Subject: [PATCH 06/14] =?UTF-8?q?chore(acp):=20address=20review=20notes=20?=
 =?UTF-8?q?=E2=80=94=20warn=20on=20mid-session=20mechanism=20change,=20tri?=
 =?UTF-8?q?m=20comments?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to the code review on #3773:

- set_acp_model now logs a WARNING when a runtime switch needs the other
  model-select mechanism than init detected (the stored _model_via_config_option
  diverged), in addition to adopting the working one — surfaces the
  unlikely-but-possible mid-session mechanism change instead of only self-healing
  silently.
- Trim verbose / change-history comments the review flagged: drop the
  "claude-agent-acp used to be skipped…" narrative from _maybe_set_session_model,
  and tighten the version-pin and gemini default-mode comments. (The
  _extract_session_models absent-vs-empty doc is kept — callers rely on that
  semantic for resume persistence.)

No behavior change beyond the added warning log.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../openhands/sdk/agent/acp_agent.py          | 48 +++++++++----------
 .../openhands/sdk/settings/acp_providers.py   | 17 +++----
 2 files changed, 29 insertions(+), 36 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py
index 58c4810e80..ebec1ac68d 100644
--- a/openhands-sdk/openhands/sdk/agent/acp_agent.py
+++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py
@@ -679,31 +679,19 @@ async def _maybe_set_session_model(
 ) -> bool:
     """Apply the *initial* session model right after session creation.
 
-    This is the session-creation path only, gated on
-    :attr:`~openhands.sdk.settings.acp_providers.ACPProviderInfo.supports_set_session_model`.
-    All built-in providers get a one-shot model-apply call here, via whichever
-    protocol the session advertised (``via_config_option``):
-    ``set_config_option(configId="model")`` for configOptions-based servers
-    (codex-acp 0.16+, claude-agent-acp 0.46+), else ``set_session_model``.
-    claude-agent-acp used to be skipped on the assumption that it already
-    received the model via ``new_session()`` ``_meta``, but 0.30.0 silently
-    ignores that payload (#3654) — the protocol call is the only path that
-    both validates and applies the model.
-
-    For unknown/custom providers (e.g. Devin CLI), we fall back to the generic
-    ``set_config_option`` method with configId="model", which is a standard ACP
-    method that many custom ACP servers support.
-
-    Runtime, mid-conversation switches go through
-    :meth:`ACPAgent.set_acp_model` instead, which always uses
-    ``set_session_model`` and is gated on the separate
-    ``supports_runtime_model_switch`` capability flag.
-
-    Returns ``True`` only when this issued a model-setting call that succeeded — i.e.
-    the override was actually pushed to the server via *this* path. ``False``
-    when there is nothing to apply (no ``acp_model``) or the provider selects
-    its model another way (``_meta``) or the server rejected the call, so
-    the caller can tell whether the live session is really running ``acp_model``.
+    Session-creation path only, gated on
+    ``ACPProviderInfo.supports_set_session_model``. Built-in providers get a
+    one-shot model-apply via whichever protocol the session advertised
+    (``via_config_option``): ``set_config_option(configId="model")`` for
+    configOptions-based servers (codex-acp 0.16+, claude-agent-acp 0.46+), else
+    ``set_session_model``. The ``_meta`` model payload is ignored by the pinned
+    CLIs, so the protocol call is what actually applies the model (#3654).
+    Unknown/custom providers fall back to ``set_config_option(configId="model")``.
+    Runtime switches go through :meth:`ACPAgent.set_acp_model`.
+
+    Returns ``True`` only when a model-setting call succeeded (the override
+    reached the server); ``False`` when there is nothing to apply or the call
+    was rejected, so the caller knows whether the live session runs ``acp_model``.
     """
     if not acp_model:
         return False
@@ -3720,6 +3708,16 @@ def set_acp_model(self, model: str) -> None:
                 except ACPRequestError as e2:
                     pending_error = e2
                 else:
+                    # The live session's mechanism diverged from init-time
+                    # detection; warn and adopt the working one for later
+                    # switches.
+                    logger.warning(
+                        "ACP model-select mechanism changed mid-session "
+                        "(init detected via_config_option=%s, switch needed %s); "
+                        "using the latter for subsequent switches",
+                        not flipped,
+                        flipped,
+                    )
                     self._model_via_config_option = flipped
                     pending_error = None  # both selections applied
             if pending_error is not None:
diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py
index 687767a522..d14a963130 100644
--- a/openhands-sdk/openhands/sdk/settings/acp_providers.py
+++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py
@@ -378,11 +378,9 @@ class ACPProviderInfo:
 # `ACPAgentSettings.resolve_acp_command` runs the pinned `binary_name` instead,
 # so the `@version` suffix is a no-op there.
 #
-# codex-acp 0.16.0 and claude-agent-acp 0.46.0 dropped the UNSTABLE ``models``
-# capability + ``session/set_model`` in favour of a ``model`` ``configOptions``
-# select driven by ``session/set_config_option``; the SDK auto-detects and uses
-# the right call (see ``_session_selects_model_via_config_option`` in
-# acp_agent.py), so model switching keeps working across the bump (#3772).
+# codex-acp 0.16.0 / claude-agent-acp 0.46.0 select the model via a ``model``
+# ``configOptions`` entry instead of ``session/set_model``; the SDK detects the
+# mechanism per session (see ``_session_selects_model_via_config_option``).
 CLAUDE_AGENT_ACP_VERSION = "0.46.0"
 CODEX_ACP_VERSION = "0.16.0"
 GEMINI_CLI_VERSION = "0.46.0"
@@ -452,12 +450,9 @@ class ACPProviderInfo:
             api_key_env_var="GEMINI_API_KEY",
             base_url_env_var="GEMINI_BASE_URL",
             # gemini-cli 0.46.0 rejects ``set_session_mode("yolo")`` at session
-            # init with a JSON-RPC -32603 (it gates yolo on folder-trust that is
-            # not yet established right after session/new), which would crash
-            # headless startup. ``default`` is accepted at init, and the SDK's
-            # ACP bridge already auto-approves every ``session/request_permission``
-            # the server sends, so permission prompts never block regardless of
-            # mode — making ``default`` the headless-safe choice. See #3772.
+            # init (-32603), which crashes headless startup; ``default`` is
+            # accepted. The ACP bridge auto-approves every request_permission, so
+            # prompts never block regardless of mode. See #3772.
             default_session_mode="default",
             agent_name_patterns=("gemini-cli",),
             supports_set_session_model=True,

From 87c8a59f267590a5637586eb51330da74b7182c2 Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simon@openhands.dev>
Date: Wed, 17 Jun 2026 15:32:39 +0200
Subject: [PATCH 07/14] refactor(acp): extract _usable_models /
 _config_option_to_model helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per the review's DRY note: both branches of _extract_session_models built an
ACPModelInfo list and dropped entries without a usable model_id. Factor the
shared filter into _usable_models and the configOptions option→ACPModelInfo
mapping into _config_option_to_model (degrading a non-string id to "" like
ACPModelInfo.from_protocol). No behavior change.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../openhands/sdk/agent/acp_agent.py          | 45 ++++++++++---------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py
index ebec1ac68d..42f476c36f 100644
--- a/openhands-sdk/openhands/sdk/agent/acp_agent.py
+++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py
@@ -23,7 +23,7 @@
 import threading
 import time
 import uuid
-from collections.abc import Awaitable, Callable, Generator
+from collections.abc import Awaitable, Callable, Generator, Iterable
 from concurrent.futures import Future
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Final, Literal, NamedTuple
@@ -492,6 +492,25 @@ async def _apply_acp_model_with_fallback(
         return not via_config_option
 
 
+def _usable_models(infos: Iterable[ACPModelInfo]) -> list[ACPModelInfo]:
+    """Drop entries without a usable ``model_id`` — an empty/missing id is an
+    invalid picker option and an unusable model-switch target."""
+    return [info for info in infos if info.model_id]
+
+
+def _config_option_to_model(opt: Any) -> ACPModelInfo:
+    """Build an :class:`ACPModelInfo` from a ``configOptions`` select option,
+    whose ``value`` is the model id. A non-string id degrades to ``""`` (then
+    dropped by :func:`_usable_models`), mirroring ``ACPModelInfo.from_protocol``.
+    """
+    value = getattr(opt, "value", None)
+    return ACPModelInfo(
+        model_id=value if isinstance(value, str) else "",
+        name=getattr(opt, "name", None),
+        description=getattr(opt, "description", None),
+    )
+
+
 def _extract_session_models(
     response: Any,
 ) -> tuple[str | None, list[ACPModelInfo] | None]:
@@ -523,15 +542,7 @@ def _extract_session_models(
         current = getattr(models, "current_model_id", None)
         current = current if isinstance(current, str) and current else None
         raw = getattr(models, "available_models", None) or []
-        # Drop entries without a usable id: an empty/missing ``model_id`` is an
-        # invalid picker option and an unusable ``set_session_model`` target, so
-        # we filter it out rather than surfacing ``model_id=""``.
-        available = [
-            info
-            for info in (ACPModelInfo.from_protocol(m) for m in raw)
-            if info.model_id
-        ]
-        return current, available
+        return current, _usable_models(ACPModelInfo.from_protocol(m) for m in raw)
     # configOptions mechanism: the ``model`` select carries the same state, with
     # ``value`` as the model id (== the ``set_config_option`` target).
     opt = _model_config_option(response)
@@ -539,18 +550,8 @@ def _extract_session_models(
         return None, None
     current = getattr(opt, "current_value", None)
     current = current if isinstance(current, str) and current else None
-    available = []
-    for o in getattr(opt, "options", None) or []:
-        value = getattr(o, "value", None)
-        if isinstance(value, str) and value:
-            available.append(
-                ACPModelInfo(
-                    model_id=value,
-                    name=getattr(o, "name", None),
-                    description=getattr(o, "description", None),
-                )
-            )
-    return current, available
+    options = getattr(opt, "options", None) or []
+    return current, _usable_models(_config_option_to_model(o) for o in options)
 
 
 # The ACP MCP server union accepted by new_session() / load_session().

From d9d7a0afe36851486ee8eeda83c4864441ab5461 Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simon@openhands.dev>
Date: Wed, 17 Jun 2026 15:40:37 +0200
Subject: [PATCH 08/14] chore(acp): hold claude-agent-acp at 0.44.0 to clear
 the 7-day supply-chain gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

claude-agent-acp 0.45.0 (2026-06-15) and 0.46.0 (2026-06-16) were published <7
days ago and trip the review bot's supply-chain hold, blocking merge. 0.44.0
(2026-06-09, 8 days old) clears it and is functionally equivalent for this PR:
same configOptions model-selection mechanism (verified `models: null`, model
config-option `[default, opus[1m], sonnet, haiku]`, default `opus[1m]`) and same
registry model set. Re-validated e2e against the 0.44.0 binary: init +
bypassPermissions edit turn + live `opus[1m]→sonnet` switch all pass.

codex-acp 0.16.0 (2026-06-08) and gemini-cli 0.46.0 (2026-06-10) are already >7
days old, so they stay. Revisit claude → 0.46.0 once it ages past the hold.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../openhands/agent_server/docker/Dockerfile       |  2 +-
 openhands-sdk/openhands/sdk/agent/acp_agent.py     | 14 +++++++-------
 .../openhands/sdk/settings/acp_providers.py        | 12 ++++++++----
 tests/sdk/test_settings.py                         |  2 +-
 4 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/openhands-agent-server/openhands/agent_server/docker/Dockerfile b/openhands-agent-server/openhands/agent_server/docker/Dockerfile
index 8cc51cd9f8..b13a2f219b 100644
--- a/openhands-agent-server/openhands/agent_server/docker/Dockerfile
+++ b/openhands-agent-server/openhands/agent_server/docker/Dockerfile
@@ -171,7 +171,7 @@ RUN set -ux; \
          && "$ACP_NODE_DIR/bin/node" --version; then \
         PATH="$ACP_NODE_DIR/bin:$PATH"; \
         if "$ACP_NODE_DIR/bin/npm" install -g \
-            @agentclientprotocol/claude-agent-acp@0.46.0 \
+            @agentclientprotocol/claude-agent-acp@0.44.0 \
             @zed-industries/codex-acp@0.16.0 \
             @google/gemini-cli@0.46.0; then \
           # Create wrappers in /usr/local/bin that prepend ACP's Node 22 to PATH.
diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py
index 42f476c36f..2a3621c34e 100644
--- a/openhands-sdk/openhands/sdk/agent/acp_agent.py
+++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py
@@ -143,7 +143,7 @@
 # -32601 = "Method not found" (JSON-RPC spec). An ACP server raises this when a
 # model-selection call uses the mechanism it does *not* implement — e.g.
 # `session/set_model` on a CLI that moved model selection to `configOptions`
-# (codex-acp 0.16+, claude-agent-acp 0.46+), or vice versa. We use it to fall
+# (codex-acp 0.16+, claude-agent-acp 0.44+), or vice versa. We use it to fall
 # back to the other mechanism if response-detection picked the wrong one.
 _METHOD_NOT_FOUND_CODE: Final[int] = -32601
 
@@ -380,7 +380,7 @@ def _write_secret_file(path: Path, value: str) -> None:
 
 # Session config-option id that selects the model on ACP servers that drive
 # model selection through ``configOptions`` / ``session/set_config_option``
-# (codex-acp 0.16+, claude-agent-acp 0.46+) rather than the UNSTABLE ``models``
+# (codex-acp 0.16+, claude-agent-acp 0.44+) rather than the UNSTABLE ``models``
 # capability + ``session/set_model`` (gemini-cli, older codex/claude).
 _MODEL_CONFIG_OPTION_ID = "model"
 
@@ -436,7 +436,7 @@ def _apply_acp_model(
     """Build the awaitable that applies ``model`` to a live ACP session via
     whichever protocol the session advertised: ``set_config_option(configId=
     "model")`` for configOptions-based servers (codex-acp 0.16+, claude-agent-acp
-    0.46+), else ``set_session_model``.
+    0.44+), else ``set_session_model``.
 
     Returns the (eagerly constructed) coroutine so callers can either ``await``
     it directly or hand it to ``run_async`` — the underlying ``conn`` method is
@@ -521,7 +521,7 @@ def _extract_session_models(
     type at this boundary so nothing downstream depends on the vendored
     ``acp.schema`` shape. Reads whichever mechanism the session advertised: the
     UNSTABLE ``models`` capability, or the ``model`` ``configOptions`` select
-    (codex-acp 0.16+, claude-agent-acp 0.46+).
+    (codex-acp 0.16+, claude-agent-acp 0.44+).
 
     The second element distinguishes **absent** from **empty** — this matters
     for resume persistence (preserve the last-known list when the server didn't
@@ -684,7 +684,7 @@ async def _maybe_set_session_model(
     ``ACPProviderInfo.supports_set_session_model``. Built-in providers get a
     one-shot model-apply via whichever protocol the session advertised
     (``via_config_option``): ``set_config_option(configId="model")`` for
-    configOptions-based servers (codex-acp 0.16+, claude-agent-acp 0.46+), else
+    configOptions-based servers (codex-acp 0.16+, claude-agent-acp 0.44+), else
     ``set_session_model``. The ``_meta`` model payload is ignored by the pinned
     CLIs, so the protocol call is what actually applies the model (#3654).
     Unknown/custom providers fall back to ``set_config_option(configId="model")``.
@@ -770,7 +770,7 @@ async def _reapply_session_model_on_resume(
     try:
         if provider is not None:
             # Known provider: apply via the mechanism the resumed session uses
-            # (set_config_option for codex-acp 0.16+/claude-agent-acp 0.46+,
+            # (set_config_option for codex-acp 0.16+/claude-agent-acp 0.44+,
             # else set_session_model). A rejection is already tolerated below
             # (the session keeps the server default), so resume doesn't need the
             # cross-mechanism fallback the init/switch paths use.
@@ -1627,7 +1627,7 @@ def model_post_init(self, __context: object) -> None:
     )  # ACP server version from InitializeResponse
     # Which protocol this session uses to select the model: ``True`` ⇒
     # ``session/set_config_option(configId="model")`` (codex-acp 0.16+,
-    # claude-agent-acp 0.46+); ``False`` ⇒ ``session/set_model`` (gemini-cli and
+    # claude-agent-acp 0.44+); ``False`` ⇒ ``session/set_model`` (gemini-cli and
     # older codex/claude). Detected from the session/new (or load_session)
     # response at init and reused by runtime ``set_acp_model`` switches.
     _model_via_config_option: bool = PrivateAttr(default=False)
diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py
index d14a963130..dfb60d2e74 100644
--- a/openhands-sdk/openhands/sdk/settings/acp_providers.py
+++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py
@@ -294,7 +294,7 @@ class ACPProviderInfo:
 # ---------------------------------------------------------------------------
 
 # Model IDs the Claude Code CLI accepts, mirroring the ``model`` configOptions
-# select claude-agent-acp 0.46.0 reports at ``session/new`` (the short aliases
+# select claude-agent-acp 0.44.0 reports at ``session/new`` (the short aliases
 # the CLI's own ``/model`` menu offers, switched via ``set_config_option``).
 # ``opus[1m]`` is the SDK-documented version-agnostic 1M-context alias and the
 # CLI's own default (``currentValue``); ``default`` is the CLI's recommended
@@ -378,10 +378,14 @@ class ACPProviderInfo:
 # `ACPAgentSettings.resolve_acp_command` runs the pinned `binary_name` instead,
 # so the `@version` suffix is a no-op there.
 #
-# codex-acp 0.16.0 / claude-agent-acp 0.46.0 select the model via a ``model``
+# codex-acp 0.16.0 / claude-agent-acp 0.44.0 select the model via a ``model``
 # ``configOptions`` entry instead of ``session/set_model``; the SDK detects the
 # mechanism per session (see ``_session_selects_model_via_config_option``).
-CLAUDE_AGENT_ACP_VERSION = "0.46.0"
+#
+# claude-agent-acp is held at 0.44.0 (0.45.0/0.46.0 published <7 days ago and
+# trip the supply-chain hold) — same configOptions mechanism + model set as
+# 0.46.0; revisit once a newer release ages past the hold.
+CLAUDE_AGENT_ACP_VERSION = "0.44.0"
 CODEX_ACP_VERSION = "0.16.0"
 GEMINI_CLI_VERSION = "0.46.0"
 
@@ -403,7 +407,7 @@ class ACPProviderInfo:
             # claude-agent-acp ignores the session-_meta model selection (the
             # requested model only becomes a picker option; the session keeps
             # running its default), so the init path must push the model via a
-            # protocol call (#3654). On 0.46.0 that call is
+            # protocol call (#3654). On 0.44.0+ that call is
             # ``set_config_option(configId="model")`` rather than
             # ``set_session_model`` (auto-detected from session/new); the _meta
             # payload (session_meta_key below) is still sent — harmless, and
diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py
index 3f43ebcff4..546806d7ce 100644
--- a/tests/sdk/test_settings.py
+++ b/tests/sdk/test_settings.py
@@ -883,7 +883,7 @@ def test_acp_create_agent_uses_server_default_command(
     assert agent.acp_command == [
         "npx",
         "-y",
-        "@agentclientprotocol/claude-agent-acp@0.46.0",
+        "@agentclientprotocol/claude-agent-acp@0.44.0",
     ]
     assert agent.acp_model == "claude-opus-4-6"
     # The authoritative provider key is carried onto the agent.

From 8b88ad206fe6f0f2a683e471b2aabede07be0899 Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simon@openhands.dev>
Date: Wed, 17 Jun 2026 17:53:43 +0200
Subject: [PATCH 09/14] =?UTF-8?q?fix(acp):=20hold=20codex-acp=20at=200.15.?=
 =?UTF-8?q?0=20=E2=80=94=200.16.0=20regresses=20model-switch=20turns?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Deeper e2e validation found codex-acp 0.16.0 breaks the turn *after* a model
switch: the switch applies (set_config_option, current_model_id tracks) but the
next prompt fails with JSON-RPC -32603. Confirmed apples-to-apples against the
ChatGPT-subscription backend — same SDK/auth/machine/model, only the codex
version differs:

  codex 0.15.0 (set_session_model): switch gpt-5.5/high -> turn OK
  codex 0.16.0 (set_config_option): switch gpt-5.5/{xhigh,high,medium},
                                     gpt-5.4-mini/low -> turn -32603 (6+ runs)

Bumping would regress ACP model switching for codex (the #3763/#3764 path), so
hold at 0.15.0 (its set_session_model switch + turn works fully). Keep the
configOptions detection/apply code — claude-agent-acp 0.44 needs it, and it
already handles codex 0.16 if/when codex fixes the set_config_option turn path
(detection is per-session, no further change needed to un-hold). claude stays at
0.44.0, gemini at 0.46.0.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../openhands/agent_server/docker/Dockerfile       |  2 +-
 .../openhands/sdk/settings/acp_providers.py        | 14 +++++++++++---
 tests/sdk/test_settings.py                         |  4 ++--
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/openhands-agent-server/openhands/agent_server/docker/Dockerfile b/openhands-agent-server/openhands/agent_server/docker/Dockerfile
index b13a2f219b..fa6160cdef 100644
--- a/openhands-agent-server/openhands/agent_server/docker/Dockerfile
+++ b/openhands-agent-server/openhands/agent_server/docker/Dockerfile
@@ -172,7 +172,7 @@ RUN set -ux; \
         PATH="$ACP_NODE_DIR/bin:$PATH"; \
         if "$ACP_NODE_DIR/bin/npm" install -g \
             @agentclientprotocol/claude-agent-acp@0.44.0 \
-            @zed-industries/codex-acp@0.16.0 \
+            @zed-industries/codex-acp@0.15.0 \
             @google/gemini-cli@0.46.0; then \
           # Create wrappers in /usr/local/bin that prepend ACP's Node 22 to PATH.
           # This ensures the ACP binary's #!/usr/bin/env node shebang resolves
diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py
index dfb60d2e74..7be85369b6 100644
--- a/openhands-sdk/openhands/sdk/settings/acp_providers.py
+++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py
@@ -378,15 +378,23 @@ class ACPProviderInfo:
 # `ACPAgentSettings.resolve_acp_command` runs the pinned `binary_name` instead,
 # so the `@version` suffix is a no-op there.
 #
-# codex-acp 0.16.0 / claude-agent-acp 0.44.0 select the model via a ``model``
+# claude-agent-acp 0.44+ (and codex-acp 0.16+) select the model via a ``model``
 # ``configOptions`` entry instead of ``session/set_model``; the SDK detects the
-# mechanism per session (see ``_session_selects_model_via_config_option``).
+# mechanism per session (see ``_session_selects_model_via_config_option``) and
+# applies it accordingly, so model switching works on either mechanism.
 #
 # claude-agent-acp is held at 0.44.0 (0.45.0/0.46.0 published <7 days ago and
 # trip the supply-chain hold) — same configOptions mechanism + model set as
 # 0.46.0; revisit once a newer release ages past the hold.
+#
+# codex-acp is held at 0.15.0: 0.16.0 moved to the configOptions mechanism, but
+# a real e2e showed that applying a model via ``set_config_option`` then running
+# a turn fails with JSON-RPC -32603 (verified against the ChatGPT-subscription
+# backend; 0.15.0's ``set_session_model`` switch + turn works). Bumping would
+# regress ACP model switching for codex (the #3763/#3764 path), so hold until
+# codex-acp fixes the 0.16 set_config_option turn path. See issue #3772.
 CLAUDE_AGENT_ACP_VERSION = "0.44.0"
-CODEX_ACP_VERSION = "0.16.0"
+CODEX_ACP_VERSION = "0.15.0"
 GEMINI_CLI_VERSION = "0.46.0"
 
 
diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py
index 546806d7ce..944ee2d1fe 100644
--- a/tests/sdk/test_settings.py
+++ b/tests/sdk/test_settings.py
@@ -1034,7 +1034,7 @@ def test_acp_resolve_command_rewrites_versioned_npx_to_pinned_binary(
     monkeypatch.setattr(shutil, "which", _which_returning("codex-acp"))
     for pkg in (
         "@zed-industries/codex-acp",
-        "@zed-industries/codex-acp@0.16.0",
+        "@zed-industries/codex-acp@0.15.0",
         "@zed-industries/codex-acp@0.11.1",
     ):
         settings = ACPAgentSettings(
@@ -1057,7 +1057,7 @@ def test_acp_resolve_command_keeps_npx_when_binary_absent(
     assert settings.resolve_acp_command() == [
         "npx",
         "-y",
-        "@zed-industries/codex-acp@0.16.0",
+        "@zed-industries/codex-acp@0.15.0",
     ]
 
 

From 8efa65c176b7648441a4af71ad5ccce427d8e670 Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simon@openhands.dev>
Date: Wed, 17 Jun 2026 20:03:18 +0200
Subject: [PATCH 10/14] feat(acp): bump codex-acp to 0.16.0 with model/effort
 split (fixes switch-turn -32603)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Root-caused the codex 0.16 "switch applies but next turn -32603s" by reading the
codex-acp Rust source (v0.15 vs v0.16): not a codex regression — a redesign.
v0.15 `set_session_model` ran `parse_model_id` (`split_once('/')`) so the
combined `gpt-5.5/medium` id was split server-side. v0.16 moved model selection
to `configOptions`: `handle_set_config_model` looks up a **bare preset id**
(`gpt-5.5`), with reasoning effort as a **separate** `reasoning_effort` option;
an unknown id (the combined `gpt-5.5/medium`) is taken literally → the backend
400s ("model is not supported"). The SDK was still sending the combined id.

Fix: when applying via the configOptions mechanism, split a `<model>/<effort>`
id (`_split_codex_model_effort`) and set `config_id="model"` (bare) then
`config_id="reasoning_effort"` (separate). Bare ids (claude `opus[1m]`/`sonnet`,
gemini `auto`) are unaffected. `_CODEX_MODELS` keeps its combined ids.

Re-validated e2e against the codex-acp 0.16.0 binary: pre-session deferred
switch, mid-conversation live switch, and resume + reapply all apply the model
AND complete the following turn (no -32603). claude 0.44 / gemini 0.46 unchanged.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../openhands/agent_server/docker/Dockerfile  |   2 +-
 .../openhands/sdk/agent/acp_agent.py          |  60 +++++++---
 .../openhands/sdk/settings/acp_providers.py   |  14 +--
 tests/sdk/agent/test_acp_agent.py             | 109 ++++++++++++++----
 tests/sdk/test_settings.py                    |   4 +-
 5 files changed, 142 insertions(+), 47 deletions(-)

diff --git a/openhands-agent-server/openhands/agent_server/docker/Dockerfile b/openhands-agent-server/openhands/agent_server/docker/Dockerfile
index fa6160cdef..b13a2f219b 100644
--- a/openhands-agent-server/openhands/agent_server/docker/Dockerfile
+++ b/openhands-agent-server/openhands/agent_server/docker/Dockerfile
@@ -172,7 +172,7 @@ RUN set -ux; \
         PATH="$ACP_NODE_DIR/bin:$PATH"; \
         if "$ACP_NODE_DIR/bin/npm" install -g \
             @agentclientprotocol/claude-agent-acp@0.44.0 \
-            @zed-industries/codex-acp@0.15.0 \
+            @zed-industries/codex-acp@0.16.0 \
             @google/gemini-cli@0.46.0; then \
           # Create wrappers in /usr/local/bin that prepend ACP's Node 22 to PATH.
           # This ensures the ACP binary's #!/usr/bin/env node shebang resolves
diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py
index 2a3621c34e..2c7ed0eaf2 100644
--- a/openhands-sdk/openhands/sdk/agent/acp_agent.py
+++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py
@@ -383,6 +383,9 @@ def _write_secret_file(path: Path, value: str) -> None:
 # (codex-acp 0.16+, claude-agent-acp 0.44+) rather than the UNSTABLE ``models``
 # capability + ``session/set_model`` (gemini-cli, older codex/claude).
 _MODEL_CONFIG_OPTION_ID = "model"
+# Companion config-option id codex-acp 0.16 uses for the reasoning-effort tier
+# (the 0.15 combined ``<model>/<effort>`` id is split across the two on 0.16).
+_REASONING_EFFORT_CONFIG_OPTION_ID = "reasoning_effort"
 
 
 def _model_config_option(response: Any) -> Any | None:
@@ -426,28 +429,55 @@ def _session_selects_model_via_config_option(response: Any) -> bool:
     return _model_config_option(response) is not None
 
 
-def _apply_acp_model(
+def _split_codex_model_effort(model: str) -> tuple[str, str | None]:
+    """Split a codex-style ``<model>/<effort>`` id into ``(model, effort)``.
+
+    codex-acp's ``models`` capability (0.15) used a combined id like
+    ``gpt-5.5/medium`` that the server parsed itself. Its 0.16 ``model``
+    ``configOptions`` select instead wants a **bare preset id** (``gpt-5.5``)
+    with the reasoning effort set via a separate ``reasoning_effort`` option —
+    an unknown combined id is taken literally and rejected by the backend
+    (``"<id>" model is not supported``). So when applying via configOptions we
+    split on ``/``. Bare ids (claude ``opus[1m]``/``sonnet``, gemini ``auto``,
+    codex without an effort suffix) return ``(model, None)`` unchanged.
+    """
+    base, sep, effort = model.partition("/")
+    if sep and effort:
+        return base, effort
+    return model, None
+
+
+async def _apply_acp_model(
     conn: ClientSideConnection,
     session_id: str,
     model: str,
     *,
     via_config_option: bool,
-) -> Awaitable[Any]:
-    """Build the awaitable that applies ``model`` to a live ACP session via
-    whichever protocol the session advertised: ``set_config_option(configId=
-    "model")`` for configOptions-based servers (codex-acp 0.16+, claude-agent-acp
-    0.44+), else ``set_session_model``.
-
-    Returns the (eagerly constructed) coroutine so callers can either ``await``
-    it directly or hand it to ``run_async`` — the underlying ``conn`` method is
-    invoked now, mirroring the previous inline ``conn.set_session_model(...)``
-    call site.
+) -> None:
+    """Apply ``model`` to a live ACP session via whichever protocol the session
+    advertised: ``set_config_option(configId="model")`` for configOptions-based
+    servers (codex-acp 0.16+, claude-agent-acp 0.44+), else ``set_session_model``.
+
+    For the configOptions mechanism a codex ``<model>/<effort>`` id is split into
+    a bare ``model`` plus a separate ``reasoning_effort`` option (see
+    :func:`_split_codex_model_effort`); other ids apply as a single ``model``
+    selection.
     """
-    if via_config_option:
-        return conn.set_config_option(
-            config_id=_MODEL_CONFIG_OPTION_ID, value=model, session_id=session_id
+    if not via_config_option:
+        await conn.set_session_model(model_id=model, session_id=session_id)
+        return
+    base, effort = _split_codex_model_effort(model)
+    await conn.set_config_option(
+        config_id=_MODEL_CONFIG_OPTION_ID, value=base, session_id=session_id
+    )
+    if effort is not None:
+        # Order matters: reasoning_effort is only valid once the model is a known
+        # preset, which the call above just set.
+        await conn.set_config_option(
+            config_id=_REASONING_EFFORT_CONFIG_OPTION_ID,
+            value=effort,
+            session_id=session_id,
         )
-    return conn.set_session_model(model_id=model, session_id=session_id)
 
 
 def _is_method_not_found(exc: ACPRequestError) -> bool:
diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py
index 7be85369b6..a98b1a399e 100644
--- a/openhands-sdk/openhands/sdk/settings/acp_providers.py
+++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py
@@ -387,14 +387,14 @@ class ACPProviderInfo:
 # trip the supply-chain hold) — same configOptions mechanism + model set as
 # 0.46.0; revisit once a newer release ages past the hold.
 #
-# codex-acp is held at 0.15.0: 0.16.0 moved to the configOptions mechanism, but
-# a real e2e showed that applying a model via ``set_config_option`` then running
-# a turn fails with JSON-RPC -32603 (verified against the ChatGPT-subscription
-# backend; 0.15.0's ``set_session_model`` switch + turn works). Bumping would
-# regress ACP model switching for codex (the #3763/#3764 path), so hold until
-# codex-acp fixes the 0.16 set_config_option turn path. See issue #3772.
+# codex-acp 0.16 split the 0.15 combined ``<model>/<effort>`` id into a bare
+# ``model`` preset id + a separate ``reasoning_effort`` config option; sending
+# the combined id to ``set_config_option(model)`` is taken literally and 400s
+# at the backend. The apply path splits accordingly (see
+# ``_split_codex_model_effort`` in acp_agent.py), so ``_CODEX_MODELS`` keeps the
+# combined ids and switching works on 0.16. See issue #3772.
 CLAUDE_AGENT_ACP_VERSION = "0.44.0"
-CODEX_ACP_VERSION = "0.15.0"
+CODEX_ACP_VERSION = "0.16.0"
 GEMINI_CLI_VERSION = "0.46.0"
 
 
diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py
index 1119e322bf..3d866b5a2a 100644
--- a/tests/sdk/agent/test_acp_agent.py
+++ b/tests/sdk/agent/test_acp_agent.py
@@ -11,7 +11,7 @@
 from pathlib import Path
 from types import SimpleNamespace
 from typing import Any
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock, call, patch
 
 import pytest
 from acp.exceptions import RequestError as ACPRequestError
@@ -20,6 +20,7 @@
 
 from openhands.sdk.agent.acp_agent import (
     ACPAgent,
+    _apply_acp_model,
     _apply_acp_model_with_fallback,
     _classify_acp_init_error,
     _codex_auth_file,
@@ -37,6 +38,7 @@
     _select_auth_method,
     _serialize_tool_content,
     _session_selects_model_via_config_option,
+    _split_codex_model_effort,
     _strip_inherited_npm_env,
 )
 from openhands.sdk.agent.acp_models import ACPModelInfo
@@ -4532,14 +4534,16 @@ async def test_reapply_via_set_session_model_mechanism(self):
     @pytest.mark.asyncio
     async def test_reapply_via_set_config_option_mechanism(self):
         # codex-acp 0.16+/claude-agent-acp 0.46+ reapply via
-        # ``set_config_option(configId="model")``.
+        # ``set_config_option(configId="model")``; a codex combined id splits
+        # into model + reasoning_effort.
         conn = AsyncMock()
         applied = await _reapply_session_model_on_resume(
             conn, "codex-acp", "sess-1", "gpt-5.4/low", via_config_option=True
         )
-        conn.set_config_option.assert_awaited_once_with(
-            config_id="model", value="gpt-5.4/low", session_id="sess-1"
-        )
+        assert conn.set_config_option.await_args_list == [
+            call(config_id="model", value="gpt-5.4", session_id="sess-1"),
+            call(config_id="reasoning_effort", value="low", session_id="sess-1"),
+        ]
         conn.set_session_model.assert_not_called()
         assert applied is True
 
@@ -4678,20 +4682,33 @@ class TestSetACPModel:
     def _wire(
         agent: ACPAgent, agent_name: str, *, via_config_option: bool = False
     ) -> ACPAgent:
-        agent._conn = MagicMock()
+        conn = MagicMock()
+        conn.set_session_model = AsyncMock()
+        conn.set_config_option = AsyncMock()
+        agent._conn = conn
         agent._session_id = "sess-1"
         agent._agent_name = agent_name
         agent._model_via_config_option = via_config_option
         executor = MagicMock()
-        executor.run_async = MagicMock()
+
+        # run_async actually drives the (async) apply coroutine to completion so
+        # the AsyncMock conn calls fire — exercising the real apply/split path.
+        def _run(coro: Any, timeout: Any = None) -> Any:
+            loop = asyncio.new_event_loop()
+            try:
+                return loop.run_until_complete(coro)
+            finally:
+                loop.close()
+
+        executor.run_async = MagicMock(side_effect=_run)
         agent._executor = executor
         return agent
 
     def test_switches_model_on_live_codex_session(self):
-        # Default (models-capability) session ⇒ set_session_model.
+        # Default (models-capability) session ⇒ set_session_model (combined id).
         agent = self._wire(_make_agent(), "codex-acp")
         agent.set_acp_model("gpt-5.4/low")
-        agent._conn.set_session_model.assert_called_once_with(
+        agent._conn.set_session_model.assert_awaited_once_with(
             model_id="gpt-5.4/low", session_id="sess-1"
         )
         agent._conn.set_config_option.assert_not_called()
@@ -4700,31 +4717,41 @@ def test_switches_model_on_live_codex_session(self):
         assert agent.llm.model == "gpt-5.4/low"
         assert agent.llm.metrics.model_name == "gpt-5.4/low"
 
-    def test_switches_model_via_config_option_session(self):
-        # configOptions session (codex-acp 0.16+, claude-agent-acp 0.46+) ⇒
-        # set_config_option(configId="model").
+    def test_switches_codex_via_config_option_splits_model_and_effort(self):
+        # codex-acp 0.16 configOptions: the combined id is split into a bare
+        # `model` preset + a separate `reasoning_effort` option.
         agent = self._wire(_make_agent(), "codex-acp", via_config_option=True)
         agent.set_acp_model("gpt-5.4/low")
-        agent._conn.set_config_option.assert_called_once_with(
-            config_id="model", value="gpt-5.4/low", session_id="sess-1"
-        )
+        assert agent._conn.set_config_option.await_args_list == [
+            call(config_id="model", value="gpt-5.4", session_id="sess-1"),
+            call(config_id="reasoning_effort", value="low", session_id="sess-1"),
+        ]
         agent._conn.set_session_model.assert_not_called()
-        agent._executor.run_async.assert_called_once()
+        # The agent's surfaced/persisted model stays the combined registry id.
         assert agent.llm.model == "gpt-5.4/low"
         assert agent._current_model_id == "gpt-5.4/low"
 
+    def test_switches_claude_via_config_option_single_call(self):
+        # A bare id (no `/`) applies as a single `model` selection — no effort.
+        agent = self._wire(_make_agent(), "claude-agent-acp", via_config_option=True)
+        agent.set_acp_model("sonnet")
+        agent._conn.set_config_option.assert_awaited_once_with(
+            config_id="model", value="sonnet", session_id="sess-1"
+        )
+        assert agent._current_model_id == "sonnet"
+
     def test_switch_falls_back_on_method_not_found(self):
         # Detected set_session_model, but the live server only has the
         # configOptions mechanism: the first apply raises -32601, the switch
         # retries with the other call and remembers it.
         agent = self._wire(_make_agent(), "codex-acp", via_config_option=False)
-        agent._executor.run_async.side_effect = [
-            ACPRequestError(code=-32601, message="Method not found"),
-            None,  # retry with the flipped mechanism succeeds
-        ]
+        agent._conn.set_session_model.side_effect = ACPRequestError(
+            code=-32601, message="Method not found"
+        )
         agent.set_acp_model("gpt-5.4/low")
         assert agent._executor.run_async.call_count == 2
-        # The working mechanism is remembered for later switches.
+        # The flipped mechanism applied (split into model + reasoning_effort).
+        assert agent._conn.set_config_option.call_count == 2
         assert agent._model_via_config_option is True
         assert agent.llm.model == "gpt-5.4/low"
         assert agent._current_model_id == "gpt-5.4/low"
@@ -4733,7 +4760,7 @@ def test_switch_invalid_model_does_not_fall_back(self):
         # A -32602 invalid-params is a real client error, not a wrong-mechanism
         # signal: surface it as ValueError without a second call.
         agent = self._wire(_make_agent(), "codex-acp", via_config_option=True)
-        agent._executor.run_async.side_effect = ACPRequestError(
+        agent._conn.set_config_option.side_effect = ACPRequestError(
             code=-32602, message="Invalid params"
         )
         with pytest.raises(ValueError, match="rejected set_config_option"):
@@ -7408,6 +7435,44 @@ async def test_non_method_not_found_error_propagates(self):
         conn.set_session_model.assert_not_called()
 
 
+class TestCodexModelEffortSplit:
+    """codex-acp 0.16 splits the 0.15 combined ``<model>/<effort>`` id into a
+    bare ``model`` preset + a separate ``reasoning_effort`` configOption."""
+
+    def test_splits_combined_id(self):
+        assert _split_codex_model_effort("gpt-5.5/medium") == ("gpt-5.5", "medium")
+        assert _split_codex_model_effort("gpt-5.4-mini/xhigh") == (
+            "gpt-5.4-mini",
+            "xhigh",
+        )
+
+    def test_bare_ids_unchanged(self):
+        # claude / gemini / codex-without-effort ids have no ``/``.
+        assert _split_codex_model_effort("opus[1m]") == ("opus[1m]", None)
+        assert _split_codex_model_effort("sonnet") == ("sonnet", None)
+        assert _split_codex_model_effort("auto") == ("auto", None)
+        assert _split_codex_model_effort("gpt-5.5") == ("gpt-5.5", None)
+
+    @pytest.mark.asyncio
+    async def test_apply_splits_into_two_config_options(self):
+        # The configOptions apply path sets model (bare) then reasoning_effort.
+        conn = AsyncMock()
+        await _apply_acp_model(conn, "sess-1", "gpt-5.5/medium", via_config_option=True)
+        assert conn.set_config_option.await_args_list == [
+            call(config_id="model", value="gpt-5.5", session_id="sess-1"),
+            call(config_id="reasoning_effort", value="medium", session_id="sess-1"),
+        ]
+        conn.set_session_model.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_apply_bare_config_option_single_call(self):
+        conn = AsyncMock()
+        await _apply_acp_model(conn, "sess-1", "opus[1m]", via_config_option=True)
+        conn.set_config_option.assert_awaited_once_with(
+            config_id="model", value="opus[1m]", session_id="sess-1"
+        )
+
+
 class TestACPAgentAvailableModelsProperty:
     """``available_models`` exposes the server's model list verbatim.
 
diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py
index 944ee2d1fe..546806d7ce 100644
--- a/tests/sdk/test_settings.py
+++ b/tests/sdk/test_settings.py
@@ -1034,7 +1034,7 @@ def test_acp_resolve_command_rewrites_versioned_npx_to_pinned_binary(
     monkeypatch.setattr(shutil, "which", _which_returning("codex-acp"))
     for pkg in (
         "@zed-industries/codex-acp",
-        "@zed-industries/codex-acp@0.15.0",
+        "@zed-industries/codex-acp@0.16.0",
         "@zed-industries/codex-acp@0.11.1",
     ):
         settings = ACPAgentSettings(
@@ -1057,7 +1057,7 @@ def test_acp_resolve_command_keeps_npx_when_binary_absent(
     assert settings.resolve_acp_command() == [
         "npx",
         "-y",
-        "@zed-industries/codex-acp@0.15.0",
+        "@zed-industries/codex-acp@0.16.0",
     ]
 
 

From 5ce5dbbdf9d1f3371746e8b0d5bbb6ab9ffad8a3 Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simon@openhands.dev>
Date: Wed, 17 Jun 2026 21:31:04 +0200
Subject: [PATCH 11/14] refactor(acp): drop model-select fallback, unify the
 apply path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Detection from the session/new response is deterministic and was correct
for every validated CLI, so the cross-mechanism -32601 fallback was
speculative — and the duplicate apply paths it created caused two latent
bugs. Collapse init/resume/switch onto a single _apply_acp_model primitive:

- remove _apply_acp_model_with_fallback / _is_method_not_found / the -32601
  constant; a real rejection now surfaces (ValueError -> 400, or tolerated
  on resume) instead of silently flipping mechanism
- fold _session_selects_model_via_config_option into _extract_session_models,
  which now returns (current, available, via_config_option) in one scan
- replace _config_option_to_model with ACPModelInfo.from_protocol(id_attr=)
- unknown/custom providers apply via the detected mechanism, not a hardcoded
  set_config_option guess

Fixes:
- resume persists acp_model_via_config_option and uses it as the detection
  default when load_session omits the model block, so a config-option
  session reapplies via the right call across pod recycle
- set_acp_model's error message names the mechanism actually used
- codex reasoning_effort is best-effort: a rejected effort keeps the applied
  model rather than failing the switch with a stale llm.model
- drop false-confidence run_async stubs that never awaited the apply
  coroutine; drive the error path through the real conn

Re-validated e2e against the pinned binaries (claude 0.44 / codex 0.16 /
gemini 0.46): pre-run model, mid-conversation switch, and real turns all pass.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../openhands/sdk/agent/acp_agent.py          | 355 +++++++-----------
 .../openhands/sdk/agent/acp_models.py         |   8 +-
 .../openhands/sdk/settings/acp_providers.py   |  19 +-
 tests/sdk/agent/test_acp_agent.py             | 257 +++++++------
 4 files changed, 270 insertions(+), 369 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py
index 2c7ed0eaf2..1005d7d173 100644
--- a/openhands-sdk/openhands/sdk/agent/acp_agent.py
+++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py
@@ -140,13 +140,6 @@
 #          upstream model 500s, and transient infrastructure errors.
 _RETRIABLE_SERVER_ERROR_CODES: frozenset[int] = frozenset({-32603})
 
-# -32601 = "Method not found" (JSON-RPC spec). An ACP server raises this when a
-# model-selection call uses the mechanism it does *not* implement — e.g.
-# `session/set_model` on a CLI that moved model selection to `configOptions`
-# (codex-acp 0.16+, claude-agent-acp 0.44+), or vice versa. We use it to fall
-# back to the other mechanism if response-detection picked the wrong one.
-_METHOD_NOT_FOUND_CODE: Final[int] = -32601
-
 
 # Maximum characters for ACP tool call content — matches MAX_CMD_OUTPUT_SIZE
 # used by the terminal tool and the default max_message_chars in LLM config.
@@ -413,22 +406,6 @@ def _model_config_option(response: Any) -> Any | None:
     return None
 
 
-def _session_selects_model_via_config_option(response: Any) -> bool:
-    """Whether this session drives model selection through
-    ``session/set_config_option`` (the ``model`` config-option select) rather
-    than ``session/set_model``.
-
-    The ``models`` capability wins when present (gemini-cli 0.46 and older
-    codex/claude still carry it); only in its absence does a ``model`` config
-    option become the selection mechanism.
-    """
-    if response is None:
-        return False
-    if getattr(response, "models", None) is not None:
-        return False
-    return _model_config_option(response) is not None
-
-
 def _split_codex_model_effort(model: str) -> tuple[str, str | None]:
     """Split a codex-style ``<model>/<effort>`` id into ``(model, effort)``.
 
@@ -454,14 +431,15 @@ async def _apply_acp_model(
     *,
     via_config_option: bool,
 ) -> None:
-    """Apply ``model`` to a live ACP session via whichever protocol the session
+    """Apply ``model`` to a live ACP session via the mechanism the session
     advertised: ``set_config_option(configId="model")`` for configOptions-based
     servers (codex-acp 0.16+, claude-agent-acp 0.44+), else ``set_session_model``.
 
-    For the configOptions mechanism a codex ``<model>/<effort>`` id is split into
-    a bare ``model`` plus a separate ``reasoning_effort`` option (see
-    :func:`_split_codex_model_effort`); other ids apply as a single ``model``
-    selection.
+    A codex ``<model>/<effort>`` id is split into a bare ``model`` plus a
+    separate ``reasoning_effort`` option (see :func:`_split_codex_model_effort`).
+    The model selection is the switch; the effort is a secondary refinement, so
+    a rejected effort is logged rather than raised — the live session keeps the
+    applied model instead of reporting the whole switch failed (#3772).
     """
     if not via_config_option:
         await conn.set_session_model(model_id=model, session_id=session_id)
@@ -471,55 +449,22 @@ async def _apply_acp_model(
         config_id=_MODEL_CONFIG_OPTION_ID, value=base, session_id=session_id
     )
     if effort is not None:
-        # Order matters: reasoning_effort is only valid once the model is a known
-        # preset, which the call above just set.
-        await conn.set_config_option(
-            config_id=_REASONING_EFFORT_CONFIG_OPTION_ID,
-            value=effort,
-            session_id=session_id,
-        )
-
-
-def _is_method_not_found(exc: ACPRequestError) -> bool:
-    """Whether ``exc`` is a JSON-RPC "method not found" — i.e. the server does
-    not implement the model-selection call we used."""
-    return exc.code == _METHOD_NOT_FOUND_CODE
-
-
-async def _apply_acp_model_with_fallback(
-    conn: ClientSideConnection,
-    session_id: str,
-    model: str,
-    *,
-    via_config_option: bool,
-) -> bool:
-    """Apply ``model`` via the detected mechanism, falling back to the other if
-    the server reports the method missing.
-
-    Response-detection (``_session_selects_model_via_config_option``) is correct
-    for every CLI we've validated, but it reads an UNSTABLE capability and the
-    response shape can vary by build/auth state. If the chosen call raises
-    ``-32601 "Method not found"``, the server simply uses the *other* mechanism,
-    so we retry with it instead of crashing session init. Returns the
-    ``via_config_option`` value that actually applied the model.
-    """
-    try:
-        await _apply_acp_model(
-            conn, session_id, model, via_config_option=via_config_option
-        )
-        return via_config_option
-    except ACPRequestError as exc:
-        if not _is_method_not_found(exc):
-            raise
-        logger.info(
-            "ACP model-apply via %s rejected as method-not-found; retrying via %s",
-            "set_config_option" if via_config_option else "set_session_model",
-            "set_session_model" if via_config_option else "set_config_option",
-        )
-        await _apply_acp_model(
-            conn, session_id, model, via_config_option=not via_config_option
-        )
-        return not via_config_option
+        # reasoning_effort is only valid once the model is a known preset, which
+        # the call above just set. Best-effort: keep the applied model on failure.
+        try:
+            await conn.set_config_option(
+                config_id=_REASONING_EFFORT_CONFIG_OPTION_ID,
+                value=effort,
+                session_id=session_id,
+            )
+        except ACPRequestError as exc:
+            logger.warning(
+                "ACP reasoning_effort=%r rejected after model=%r applied (%s); "
+                "keeping the model on the server default effort",
+                effort,
+                base,
+                exc,
+            )
 
 
 def _usable_models(infos: Iterable[ACPModelInfo]) -> list[ACPModelInfo]:
@@ -528,32 +473,21 @@ def _usable_models(infos: Iterable[ACPModelInfo]) -> list[ACPModelInfo]:
     return [info for info in infos if info.model_id]
 
 
-def _config_option_to_model(opt: Any) -> ACPModelInfo:
-    """Build an :class:`ACPModelInfo` from a ``configOptions`` select option,
-    whose ``value`` is the model id. A non-string id degrades to ``""`` (then
-    dropped by :func:`_usable_models`), mirroring ``ACPModelInfo.from_protocol``.
-    """
-    value = getattr(opt, "value", None)
-    return ACPModelInfo(
-        model_id=value if isinstance(value, str) else "",
-        name=getattr(opt, "name", None),
-        description=getattr(opt, "description", None),
-    )
-
-
 def _extract_session_models(
     response: Any,
-) -> tuple[str | None, list[ACPModelInfo] | None]:
-    """Extract the model state off a session response.
-
-    Returns a ``(current_model_id, available_models)`` pair, both best-effort.
-    ``available_models`` is normalized into our own stable :class:`ACPModelInfo`
-    type at this boundary so nothing downstream depends on the vendored
-    ``acp.schema`` shape. Reads whichever mechanism the session advertised: the
-    UNSTABLE ``models`` capability, or the ``model`` ``configOptions`` select
-    (codex-acp 0.16+, claude-agent-acp 0.44+).
-
-    The second element distinguishes **absent** from **empty** — this matters
+    *,
+    default_via_config_option: bool = False,
+) -> tuple[str | None, list[ACPModelInfo] | None, bool]:
+    """Extract the model state off a session response in a single scan.
+
+    Returns ``(current_model_id, available_models, via_config_option)``, all
+    best-effort. ``available_models`` is normalized into our own stable
+    :class:`ACPModelInfo` type at this boundary so nothing downstream depends on
+    the vendored ``acp.schema`` shape. Reads whichever mechanism the session
+    advertised: the UNSTABLE ``models`` capability, or the ``model``
+    ``configOptions`` select (codex-acp 0.16+, claude-agent-acp 0.44+).
+
+    ``available_models`` distinguishes **absent** from **empty** — this matters
     for resume persistence (preserve the last-known list when the server didn't
     report one; clear it when the server explicitly says it has none):
 
@@ -562,26 +496,37 @@ def _extract_session_models(
     - ``[]``    — the server reported a mechanism but offers no (usable) models.
     - ``[...]`` — the reported models, minus any with an unusable ``model_id``.
 
+    ``via_config_option`` is the selection mechanism: ``True`` for the
+    configOptions select, ``False`` for the ``models`` capability, and
+    ``default_via_config_option`` when the response carries neither — callers on
+    the resume path pass the persisted mechanism hint so a config-option session
+    whose ``load_session`` omits the model block still reapplies via the right
+    call instead of defaulting to ``set_session_model``.
+
     ``getattr`` keeps the helper tolerant of agents that emit a partial
     structure.
     """
     if response is None:
-        return None, None
+        return None, None, default_via_config_option
     models = getattr(response, "models", None)
     if models is not None:
         current = getattr(models, "current_model_id", None)
         current = current if isinstance(current, str) and current else None
         raw = getattr(models, "available_models", None) or []
-        return current, _usable_models(ACPModelInfo.from_protocol(m) for m in raw)
+        usable = _usable_models(ACPModelInfo.from_protocol(m) for m in raw)
+        return current, usable, False
     # configOptions mechanism: the ``model`` select carries the same state, with
-    # ``value`` as the model id (== the ``set_config_option`` target).
+    # each option's ``value`` as the model id (== the ``set_config_option`` target).
     opt = _model_config_option(response)
     if opt is None:
-        return None, None
+        return None, None, default_via_config_option
     current = getattr(opt, "current_value", None)
     current = current if isinstance(current, str) and current else None
     options = getattr(opt, "options", None) or []
-    return current, _usable_models(_config_option_to_model(o) for o in options)
+    usable = _usable_models(
+        ACPModelInfo.from_protocol(o, id_attr="value") for o in options
+    )
+    return current, usable, True
 
 
 # The ACP MCP server union accepted by new_session() / load_session().
@@ -711,14 +656,14 @@ async def _maybe_set_session_model(
     """Apply the *initial* session model right after session creation.
 
     Session-creation path only, gated on
-    ``ACPProviderInfo.supports_set_session_model``. Built-in providers get a
-    one-shot model-apply via whichever protocol the session advertised
-    (``via_config_option``): ``set_config_option(configId="model")`` for
-    configOptions-based servers (codex-acp 0.16+, claude-agent-acp 0.44+), else
-    ``set_session_model``. The ``_meta`` model payload is ignored by the pinned
-    CLIs, so the protocol call is what actually applies the model (#3654).
-    Unknown/custom providers fall back to ``set_config_option(configId="model")``.
-    Runtime switches go through :meth:`ACPAgent.set_acp_model`.
+    ``ACPProviderInfo.supports_set_session_model``. The model is applied via the
+    mechanism the session advertised (``via_config_option``):
+    ``set_config_option(configId="model")`` for configOptions-based servers
+    (codex-acp 0.16+, claude-agent-acp 0.44+), else ``set_session_model``. The
+    ``_meta`` model payload is ignored by the pinned CLIs, so this protocol call
+    is what actually applies the model (#3654). Unknown/custom providers are
+    best-effort (a rejection is tolerated — the session keeps the server
+    default). Runtime switches go through :meth:`ACPAgent.set_acp_model`.
 
     Returns ``True`` only when a model-setting call succeeded (the override
     reached the server); ``False`` when there is nothing to apply or the call
@@ -728,29 +673,25 @@ async def _maybe_set_session_model(
         return False
     provider = detect_acp_provider_by_agent_name(agent_name)
     if provider is not None and provider.supports_set_session_model:
-        await _apply_acp_model_with_fallback(
+        await _apply_acp_model(
             conn, session_id, acp_model, via_config_option=via_config_option
         )
         return True
-    # For unknown/custom providers, try the generic set_config_option method
-    # which is a standard ACP protocol method for setting configuration options
+    # Unknown/custom provider: best-effort apply via the detected mechanism; a
+    # rejection is tolerated so session creation can't break.
     if provider is None:
         try:
-            await conn.set_config_option(
-                config_id="model",
-                value=acp_model,
-                session_id=session_id,
+            await _apply_acp_model(
+                conn, session_id, acp_model, via_config_option=via_config_option
             )
             logger.info(
-                "Set model %r on unknown/custom ACP server %s via set_config_option",
-                acp_model,
-                agent_name,
+                "Set model %r on unknown/custom ACP server %s", acp_model, agent_name
             )
             return True
         except ACPRequestError as e:
             logger.warning(
-                "Could not set model %r on unknown/custom ACP server %s via "
-                "set_config_option (%s); the session will use the server default",
+                "Could not set model %r on unknown/custom ACP server %s (%s); "
+                "the session will use the server default",
                 acp_model,
                 agent_name,
                 e,
@@ -770,27 +711,22 @@ async def _reapply_session_model_on_resume(
 
     ``load_session()`` carries no model ``_meta``, so a session resumed after a
     runtime switch (or with any persisted ``acp_model``) would otherwise run on
-    the ACP server's default. This issues ``set_session_model`` so the resumed
-    live session matches the serialized ``acp_model``.
-
-    For unknown/custom providers (e.g. Devin CLI), we fall back to the generic
-    ``set_config_option`` method with configId="model", which is a standard ACP
-    method that many custom ACP servers support.
-
-    The gating mirrors :meth:`ACPAgent.set_acp_model` (attempt for custom/unknown
-    servers and known providers that support runtime switching; skip only known
-    providers that don't), deliberately differing from the initial-selection
-    gate: claude-agent-acp selects its initial model via ``_meta`` yet supports
-    ``set_session_model`` for later switches. A server that rejects the call is
-    tolerated (logged) — like the ``load_session`` fallback above — so resume
-    can't break; the session keeps the server default until the next switch.
+    the ACP server's default. This applies the model via the mechanism the
+    resumed session uses (``via_config_option``: ``set_config_option`` for
+    codex-acp 0.16+/claude-agent-acp 0.44+, else ``set_session_model``) so the
+    live session matches the serialized ``acp_model``. The caller derives
+    ``via_config_option`` from the ``load_session`` response, falling back to the
+    persisted mechanism hint when that response omits the model block.
+
+    Gated on runtime-switch support (skip only known providers that don't); a
+    server that rejects the call is tolerated (logged) — like the
+    ``load_session`` fallback — so resume can't break and the session keeps the
+    server default until the next switch.
 
     Returns ``True`` only when a model-setting call was issued and accepted, so
     the caller knows the resumed live session is actually running ``acp_model``.
     ``False`` when there is nothing to reapply, the provider doesn't support the
-    switch, or the server rejected the call (swallowed) — in those cases the
-    session keeps the server default and the override must not be surfaced as
-    the current model.
+    switch, or the server rejected the call (swallowed).
     """
     if not acp_model:
         return False
@@ -798,28 +734,9 @@ async def _reapply_session_model_on_resume(
     if provider is not None and not provider.supports_runtime_model_switch:
         return False
     try:
-        if provider is not None:
-            # Known provider: apply via the mechanism the resumed session uses
-            # (set_config_option for codex-acp 0.16+/claude-agent-acp 0.44+,
-            # else set_session_model). A rejection is already tolerated below
-            # (the session keeps the server default), so resume doesn't need the
-            # cross-mechanism fallback the init/switch paths use.
-            await _apply_acp_model(
-                conn, session_id, acp_model, via_config_option=via_config_option
-            )
-        else:
-            # Unknown/custom provider: try set_config_option as fallback
-            await conn.set_config_option(
-                config_id="model",
-                value=acp_model,
-                session_id=session_id,
-            )
-            logger.info(
-                "Reapplied model %r on unknown/custom ACP server %s "
-                "via set_config_option",
-                acp_model,
-                agent_name,
-            )
+        await _apply_acp_model(
+            conn, session_id, acp_model, via_config_option=via_config_option
+        )
         return True
     except ACPRequestError as e:
         logger.warning(
@@ -2042,6 +1959,10 @@ def init_state(
             # conversation list can tell the picker whether to offer live
             # switching without re-detecting the provider server-side.
             "acp_supports_runtime_model_switch": self.supports_runtime_model_switch,
+            # Detected model-selection mechanism — persisted so a resume whose
+            # load_session omits the model block reapplies the model via the
+            # right call instead of defaulting to set_session_model.
+            "acp_model_via_config_option": self._model_via_config_option,
         }
         # When starting a fresh session, clear stale suffix marker so the next
         # launch knows to re-inject it (PR behavior: suffix state is per-session).
@@ -2596,11 +2517,21 @@ async def _init() -> tuple[
                         mcp_servers=acp_mcp_servers,
                     )
                     session_id = prior_session_id
-                    reported_model_id, available_models = _extract_session_models(
-                        load_response
+                    # load_session often omits the model block; fall back to the
+                    # mechanism detected at session creation (persisted alongside
+                    # the session id) so a config-option session still reapplies
+                    # via the right call rather than defaulting to
+                    # set_session_model and silently running the server default.
+                    persisted_via_config_option = bool(
+                        state.agent_state.get("acp_model_via_config_option", False)
                     )
-                    self._model_via_config_option = (
-                        _session_selects_model_via_config_option(load_response)
+                    (
+                        reported_model_id,
+                        available_models,
+                        self._model_via_config_option,
+                    ) = _extract_session_models(
+                        load_response,
+                        default_via_config_option=persisted_via_config_option,
                     )
                     logger.info(
                         "Resumed ACP session %s (cwd=%s)",
@@ -2632,12 +2563,14 @@ async def _init() -> tuple[
                     **session_meta,
                 )
                 session_id = response.session_id
-                reported_model_id, available_models = _extract_session_models(response)
-                # Detect the model-selection protocol the server advertised so
-                # init and later runtime switches use the right call.
-                self._model_via_config_option = (
-                    _session_selects_model_via_config_option(response)
-                )
+                # Detect the model-selection protocol the server advertised (in
+                # the same scan) so init and later runtime switches use the right
+                # call.
+                (
+                    reported_model_id,
+                    available_models,
+                    self._model_via_config_option,
+                ) = _extract_session_models(response)
                 # Initial-model protocol call for every built-in provider
                 # (codex, gemini, claude-code). The pinned claude/codex CLIs
                 # ignore the _meta above, so this protocol call is what actually
@@ -3719,58 +3652,26 @@ def set_acp_model(self, model: str) -> None:
                 timeout=self.acp_prompt_timeout,
             )
         except ACPRequestError as e:
-            pending_error: ACPRequestError | None = e
-            if _is_method_not_found(e):
-                # The session uses the other model-selection mechanism (or the
-                # init-time detection picked the wrong one): retry once with it,
-                # and remember the working mechanism for later switches. If the
-                # retry also fails, fall through to the error translation below.
-                flipped = not self._model_via_config_option
-                try:
-                    self._executor.run_async(
-                        _apply_acp_model(
-                            self._conn,
-                            self._session_id,
-                            model,
-                            via_config_option=flipped,
-                        ),
-                        timeout=self.acp_prompt_timeout,
-                    )
-                except ACPRequestError as e2:
-                    pending_error = e2
-                else:
-                    # The live session's mechanism diverged from init-time
-                    # detection; warn and adopt the working one for later
-                    # switches.
-                    logger.warning(
-                        "ACP model-select mechanism changed mid-session "
-                        "(init detected via_config_option=%s, switch needed %s); "
-                        "using the latter for subsequent switches",
-                        not flipped,
-                        flipped,
-                    )
-                    self._model_via_config_option = flipped
-                    pending_error = None  # both selections applied
-            if pending_error is not None:
-                # Server-internal failures (JSON-RPC -32603) are not the caller's
-                # fault, and the prompt path already treats them as retriable.
-                # Let them propagate (-> 5xx) instead of mislabeling them as a
-                # 400 client error.
-                if pending_error.code in _RETRIABLE_SERVER_ERROR_CODES:
-                    raise pending_error
-                # acp.exceptions.RequestError derives from Exception (not
-                # RuntimeError); surface a true client/protocol rejection (e.g.
-                # method-not-found on both mechanisms, invalid model id) as a
-                # ValueError so callers — and the agent-server route — treat it
-                # as a 400-class client error rather than an opaque 500.
-                method = (
-                    "set_config_option(model)"
-                    if self._model_via_config_option
-                    else "set_session_model"
-                )
-                raise ValueError(
-                    f"ACP server rejected {method}(model={model!r}): {pending_error}"
-                ) from pending_error
+            # Server-internal failures (JSON-RPC -32603) are not the caller's
+            # fault, and the prompt path already treats them as retriable. Let
+            # them propagate (-> 5xx) instead of mislabeling them as a 400.
+            if e.code in _RETRIABLE_SERVER_ERROR_CODES:
+                raise
+            # acp.exceptions.RequestError derives from Exception (not
+            # RuntimeError); surface a true client/protocol rejection (invalid
+            # model id, or method-not-found on a mis-detected server) as a
+            # ValueError so callers — and the agent-server route — treat it as a
+            # 400-class client error rather than an opaque 500. The mechanism is
+            # the one the session advertised (``_model_via_config_option``), so
+            # the message names the call that actually failed.
+            method = (
+                "set_config_option(model)"
+                if self._model_via_config_option
+                else "set_session_model"
+            )
+            raise ValueError(
+                f"ACP server rejected {method}(model={model!r}): {e}"
+            ) from e
         # Reflect the live model on the sentinel LLM + metrics so cost/token
         # accounting and serialized state show the model actually in use
         # (mirrors model_post_init). The ``acp_model`` field is frozen, so the
diff --git a/openhands-sdk/openhands/sdk/agent/acp_models.py b/openhands-sdk/openhands/sdk/agent/acp_models.py
index 4e7d79b340..c6c89390ea 100644
--- a/openhands-sdk/openhands/sdk/agent/acp_models.py
+++ b/openhands-sdk/openhands/sdk/agent/acp_models.py
@@ -49,15 +49,19 @@ class ACPModelInfo(BaseModel):
     )
 
     @classmethod
-    def from_protocol(cls, raw: Any) -> ACPModelInfo:
+    def from_protocol(cls, raw: Any, *, id_attr: str = "model_id") -> ACPModelInfo:
         """Build from a raw ACP ``ModelInfo`` (or any duck-typed object).
 
         Tolerant of partial/malformed entries: non-string fields degrade to
         ``""`` (``model_id``) or ``None`` (``name``/``description``) rather
         than raising, since the source is an UNSTABLE protocol capability that
         older or half-implemented agents may emit incompletely.
+
+        ``id_attr`` names the attribute carrying the model id — ``"model_id"``
+        for a ``models``-capability ``ModelInfo``, ``"value"`` for a
+        ``configOptions`` select option.
         """
-        model_id = getattr(raw, "model_id", None)
+        model_id = getattr(raw, id_attr, None)
         name = getattr(raw, "name", None)
         description = getattr(raw, "description", None)
         return cls(
diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py
index a98b1a399e..e2cc4d74a7 100644
--- a/openhands-sdk/openhands/sdk/settings/acp_providers.py
+++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py
@@ -378,21 +378,10 @@ class ACPProviderInfo:
 # `ACPAgentSettings.resolve_acp_command` runs the pinned `binary_name` instead,
 # so the `@version` suffix is a no-op there.
 #
-# claude-agent-acp 0.44+ (and codex-acp 0.16+) select the model via a ``model``
-# ``configOptions`` entry instead of ``session/set_model``; the SDK detects the
-# mechanism per session (see ``_session_selects_model_via_config_option``) and
-# applies it accordingly, so model switching works on either mechanism.
-#
-# claude-agent-acp is held at 0.44.0 (0.45.0/0.46.0 published <7 days ago and
-# trip the supply-chain hold) — same configOptions mechanism + model set as
-# 0.46.0; revisit once a newer release ages past the hold.
-#
-# codex-acp 0.16 split the 0.15 combined ``<model>/<effort>`` id into a bare
-# ``model`` preset id + a separate ``reasoning_effort`` config option; sending
-# the combined id to ``set_config_option(model)`` is taken literally and 400s
-# at the backend. The apply path splits accordingly (see
-# ``_split_codex_model_effort`` in acp_agent.py), so ``_CODEX_MODELS`` keeps the
-# combined ids and switching works on 0.16. See issue #3772.
+# claude-agent-acp 0.44+ / codex-acp 0.16+ select the model via a ``model``
+# ``configOptions`` entry, not ``session/set_model``; the SDK detects which per
+# session in ``_extract_session_models`` and applies it. codex 0.16 also splits
+# the model id from the reasoning effort (see ``_split_codex_model_effort``). #3772.
 CLAUDE_AGENT_ACP_VERSION = "0.44.0"
 CODEX_ACP_VERSION = "0.16.0"
 GEMINI_CLI_VERSION = "0.46.0"
diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py
index 3d866b5a2a..264e0536a2 100644
--- a/tests/sdk/agent/test_acp_agent.py
+++ b/tests/sdk/agent/test_acp_agent.py
@@ -21,7 +21,6 @@
 from openhands.sdk.agent.acp_agent import (
     ACPAgent,
     _apply_acp_model,
-    _apply_acp_model_with_fallback,
     _classify_acp_init_error,
     _codex_auth_file,
     _codex_base_url_overrides,
@@ -29,7 +28,6 @@
     _extract_session_models,
     _extract_token_usage,
     _image_url_to_acp_block,
-    _is_method_not_found,
     _mask_json_value,
     _maybe_set_session_model,
     _mcp_config_to_acp_servers,
@@ -37,7 +35,6 @@
     _reapply_session_model_on_resume,
     _select_auth_method,
     _serialize_tool_content,
-    _session_selects_model_via_config_option,
     _split_codex_model_effort,
     _strip_inherited_npm_env,
 )
@@ -4478,33 +4475,40 @@ async def test_missing_model_skips_protocol_override(self):
         assert applied is False
 
     @pytest.mark.asyncio
-    async def test_unknown_provider_uses_set_config_option_fallback(self):
-        # An unknown/custom server now tries set_config_option as a fallback
-        # for model selection, which is a standard ACP method.
-        conn = AsyncMock()
+    async def test_unknown_provider_applies_via_detected_mechanism(self):
+        # An unknown/custom server applies the model via whichever mechanism its
+        # session/new advertised (detected upstream), not a hardcoded guess.
+        conn_cfg = AsyncMock()
         applied = await _maybe_set_session_model(
-            conn, "devin-cli", "session-1", "kimi-k2-6", via_config_option=False
+            conn_cfg, "devin-cli", "session-1", "kimi-k2-6", via_config_option=True
         )
-        conn.set_session_model.assert_not_called()
-        conn.set_config_option.assert_awaited_once_with(
-            config_id="model",
-            value="kimi-k2-6",
-            session_id="session-1",
+        conn_cfg.set_config_option.assert_awaited_once_with(
+            config_id="model", value="kimi-k2-6", session_id="session-1"
+        )
+        conn_cfg.set_session_model.assert_not_called()
+        assert applied is True
+
+        conn_set = AsyncMock()
+        applied = await _maybe_set_session_model(
+            conn_set, "devin-cli", "session-1", "kimi-k2-6", via_config_option=False
         )
+        conn_set.set_session_model.assert_awaited_once_with(
+            model_id="kimi-k2-6", session_id="session-1"
+        )
+        conn_set.set_config_option.assert_not_called()
         assert applied is True
 
     @pytest.mark.asyncio
-    async def test_unknown_provider_set_config_option_failure_is_tolerated(self):
-        # If set_config_option fails for an unknown provider, we log a warning
-        # but don't break session creation.
+    async def test_unknown_provider_apply_failure_is_tolerated(self):
+        # If the model-apply fails for an unknown provider, we log a warning but
+        # don't break session creation.
         conn = AsyncMock()
         conn.set_config_option.side_effect = ACPRequestError(
             code=-32601, message="method not found"
         )
         applied = await _maybe_set_session_model(
-            conn, "some-custom-acp", "session-1", "whatever", via_config_option=False
+            conn, "some-custom-acp", "session-1", "whatever", via_config_option=True
         )
-        conn.set_session_model.assert_not_called()
         conn.set_config_option.assert_awaited_once()
         assert applied is False
 
@@ -4558,12 +4562,12 @@ async def test_missing_model_skips_reapply(self):
         assert applied is False
 
     @pytest.mark.asyncio
-    async def test_unknown_provider_attempts_reapply_via_set_config_option(self):
-        # provider=None (custom server) now attempts reapply via set_config_option
-        # as a fallback, which is a standard ACP method.
+    async def test_unknown_provider_reapplies_via_detected_mechanism(self):
+        # provider=None (custom server) reapplies via whichever mechanism the
+        # resumed session advertised, not a hardcoded guess.
         conn = AsyncMock()
         applied = await _reapply_session_model_on_resume(
-            conn, "devin-cli", "sess-1", "kimi-k2-6", via_config_option=False
+            conn, "devin-cli", "sess-1", "kimi-k2-6", via_config_option=True
         )
         conn.set_session_model.assert_not_called()
         conn.set_config_option.assert_awaited_once_with(
@@ -4610,7 +4614,7 @@ async def test_client_rejection_is_swallowed_on_resume(self):
             code=-32601, message="method not found"
         )
         applied = await _reapply_session_model_on_resume(
-            conn, "some-custom-acp", "sess-1", "whatever", via_config_option=False
+            conn, "some-custom-acp", "sess-1", "whatever", via_config_option=True
         )
         conn.set_config_option.assert_awaited_once()
         # Rejected => the live session kept the server default, so the override
@@ -4740,23 +4744,22 @@ def test_switches_claude_via_config_option_single_call(self):
         )
         assert agent._current_model_id == "sonnet"
 
-    def test_switch_falls_back_on_method_not_found(self):
-        # Detected set_session_model, but the live server only has the
-        # configOptions mechanism: the first apply raises -32601, the switch
-        # retries with the other call and remembers it.
+    def test_switch_method_not_found_raises_no_fallback(self):
+        # No cross-mechanism fallback: a -32601 surfaces as a ValueError naming
+        # the advertised mechanism, and the other call is never attempted.
         agent = self._wire(_make_agent(), "codex-acp", via_config_option=False)
         agent._conn.set_session_model.side_effect = ACPRequestError(
             code=-32601, message="Method not found"
         )
-        agent.set_acp_model("gpt-5.4/low")
-        assert agent._executor.run_async.call_count == 2
-        # The flipped mechanism applied (split into model + reasoning_effort).
-        assert agent._conn.set_config_option.call_count == 2
-        assert agent._model_via_config_option is True
-        assert agent.llm.model == "gpt-5.4/low"
-        assert agent._current_model_id == "gpt-5.4/low"
+        with pytest.raises(ValueError, match="rejected set_session_model"):
+            agent.set_acp_model("gpt-5.4/low")
+        agent._conn.set_config_option.assert_not_called()
+        agent._executor.run_async.assert_called_once()
+        # Mechanism + sentinel model are left unchanged on a failed switch.
+        assert agent._model_via_config_option is False
+        assert agent.llm.model != "gpt-5.4/low"
 
-    def test_switch_invalid_model_does_not_fall_back(self):
+    def test_switch_invalid_model_raises_value_error(self):
         # A -32602 invalid-params is a real client error, not a wrong-mechanism
         # signal: surface it as ValueError without a second call.
         agent = self._wire(_make_agent(), "codex-acp", via_config_option=True)
@@ -4820,8 +4823,10 @@ def test_translates_acp_request_error_to_value_error(self):
         # A protocol-level rejection (e.g. method-not-found on a custom server,
         # or an invalid model id) must surface as a ValueError — not leak as a
         # raw acp.exceptions.RequestError — so the agent-server maps it to 400.
+        # The rejection is raised by the conn (driven through the real apply
+        # coroutine), exercising set_acp_model's actual error path.
         agent = self._wire(_make_agent(), "codex-acp")
-        agent._executor.run_async.side_effect = ACPRequestError(
+        agent._conn.set_session_model.side_effect = ACPRequestError(
             code=-32601, message="method not found"
         )
         with pytest.raises(ValueError, match="rejected set_session_model"):
@@ -4835,7 +4840,7 @@ def test_propagates_server_internal_error(self):
         # than be mislabeled as a 400-class ValueError, mirroring the retriable
         # handling on the prompt path.
         agent = self._wire(_make_agent(), "codex-acp")
-        agent._executor.run_async.side_effect = ACPRequestError(
+        agent._conn.set_session_model.side_effect = ACPRequestError(
             code=-32603, message="internal error"
         )
         with pytest.raises(ACPRequestError):
@@ -6141,10 +6146,11 @@ def _models_block(current_model_id: str, model_ids: list[str]):
         models.available_models = entries
         return models
 
-    def test_unknown_provider_applies_override_via_set_config_option(self, tmp_path):
+    def test_unknown_provider_applies_override_via_detected_mechanism(self, tmp_path):
         """Fresh session on an unknown/custom provider with ``acp_model`` set:
-        the override is pushed via ``set_config_option`` (not ``set_session_model``),
-        so ``current_model_id`` must reflect the applied override.
+        the override is pushed via the mechanism the session advertised — here a
+        ``models``-capability response, so ``set_session_model`` — and
+        ``current_model_id`` must reflect the applied override.
         """
         agent = _make_agent(acp_model="caller-model")
         state = _make_state(tmp_path)
@@ -6159,9 +6165,9 @@ def test_unknown_provider_applies_override_via_set_config_option(self, tmp_path)
 
         self._patched_start_acp_server(agent, state, conn=conn)
 
-        conn.set_session_model.assert_not_awaited()
-        conn.set_config_option.assert_awaited_once_with(
-            config_id="model", value="caller-model", session_id="fresh-sess"
+        conn.set_config_option.assert_not_awaited()
+        conn.set_session_model.assert_awaited_once_with(
+            model_id="caller-model", session_id="fresh-sess"
         )
         assert agent.current_model_id == "caller-model"
 
@@ -7023,7 +7029,7 @@ def test_returns_both_when_response_carries_them(self):
         response.models = MagicMock()
         response.models.current_model_id = "default"
         response.models.available_models = [m1]
-        cur, avail = _extract_session_models(response)
+        cur, avail, _ = _extract_session_models(response)
         assert cur == "default"
         # Normalized into our stable ACPModelInfo, not the raw acp type.
         assert avail == [
@@ -7038,7 +7044,7 @@ def test_returns_none_list_when_models_block_absent(self):
         # Older agents don't include the ``models`` block at all -> None, so
         # callers know nothing was reported (and can preserve prior state).
         response = MagicMock(spec=[])
-        cur, avail = _extract_session_models(response)
+        cur, avail, _ = _extract_session_models(response)
         assert cur is None
         assert avail is None
 
@@ -7049,19 +7055,19 @@ def test_returns_empty_list_when_available_models_missing(self):
         response.models = MagicMock()
         response.models.current_model_id = "gpt-5"
         response.models.available_models = None
-        cur, avail = _extract_session_models(response)
+        cur, avail, _ = _extract_session_models(response)
         assert cur == "gpt-5"
         assert avail == []
 
     def test_returns_none_list_when_response_is_none(self):
         # ``load_session`` can return ``None`` for servers that don't
         # implement the call — the helper must not crash, and reports "absent".
-        assert _extract_session_models(None) == (None, None)
+        assert _extract_session_models(None) == (None, None, False)
 
     def test_returns_none_list_when_models_field_is_none(self):
         response = MagicMock()
         response.models = None
-        assert _extract_session_models(response) == (None, None)
+        assert _extract_session_models(response) == (None, None, False)
 
     def test_returns_none_when_current_model_id_is_empty_string(self):
         # An empty string is treated the same as a missing field — we don't
@@ -7071,7 +7077,7 @@ def test_returns_none_when_current_model_id_is_empty_string(self):
         response.models = MagicMock()
         response.models.current_model_id = ""
         response.models.available_models = []
-        assert _extract_session_models(response) == (None, [])
+        assert _extract_session_models(response) == (None, [], False)
 
     def test_returns_none_when_current_model_id_is_not_a_string(self):
         # Defensive: an agent returning a non-string here is malformed.
@@ -7079,7 +7085,7 @@ def test_returns_none_when_current_model_id_is_not_a_string(self):
         response.models = MagicMock()
         response.models.current_model_id = 42
         response.models.available_models = []
-        assert _extract_session_models(response) == (None, [])
+        assert _extract_session_models(response) == (None, [], False)
 
 
 class TestExtractSessionModelsNormalization:
@@ -7104,7 +7110,7 @@ def test_maps_fields_through(self):
         response.models.available_models = [
             self._raw("gpt-5.4/low", "gpt-5.4 (low)", "Strong everyday model."),
         ]
-        _cur, avail = _extract_session_models(response)
+        _cur, avail, _ = _extract_session_models(response)
         assert avail == [
             ACPModelInfo(
                 model_id="gpt-5.4/low",
@@ -7125,7 +7131,7 @@ def test_drops_entries_without_usable_id(self):
             self._raw(model_id="good", name="Good"),
             self._raw(model_id="", name="Empty"),  # empty -> dropped
         ]
-        _cur, avail = _extract_session_models(response)
+        _cur, avail, _ = _extract_session_models(response)
         assert avail == [ACPModelInfo(model_id="good", name="Good", description=None)]
 
 
@@ -7135,8 +7141,8 @@ class TestConfigOptionModelMechanism:
     codex-acp 0.16+ and claude-agent-acp 0.46+ dropped the UNSTABLE ``models``
     capability + ``session/set_model`` in favour of a ``model`` config-option
     select driven by ``session/set_config_option``. ``_extract_session_models``
-    reads that select, and ``_session_selects_model_via_config_option`` picks
-    the apply mechanism.
+    reads that select and reports the apply mechanism as its third return value
+    (``via_config_option``) in the same scan.
     """
 
     def _select(self, *, id="model", type="select", current_value, options):
@@ -7166,8 +7172,9 @@ def test_extracts_model_state_from_config_option(self):
                 )
             ],
         )
-        cur, avail = _extract_session_models(response)
+        cur, avail, via = _extract_session_models(response)
         assert cur == "opus[1m]"
+        assert via is True
         assert avail == [
             ACPModelInfo(
                 model_id="default",
@@ -7189,8 +7196,8 @@ def test_unwraps_rootmodel_wrapped_config_option(self):
         )
         wrapped = SimpleNamespace(root=select)  # mimics the RootModel wrapper
         response = self._response(config_options=[wrapped])
-        assert _session_selects_model_via_config_option(response) is True
-        cur, avail = _extract_session_models(response)
+        cur, avail, via = _extract_session_models(response)
+        assert via is True
         assert cur == "opus[1m]"
         assert avail is not None
         assert [m.model_id for m in avail] == ["opus[1m]", "sonnet"]
@@ -7208,10 +7215,10 @@ def test_models_capability_wins_over_config_option(self):
                 )
             ],
         )
-        cur, avail = _extract_session_models(response)
+        cur, avail, via = _extract_session_models(response)
         assert cur == "from-models"
         assert avail == []
-        assert _session_selects_model_via_config_option(response) is False
+        assert via is False
 
     def test_detects_config_option_mechanism(self):
         response = self._response(
@@ -7221,10 +7228,11 @@ def test_detects_config_option_mechanism(self):
                 )
             ],
         )
-        assert _session_selects_model_via_config_option(response) is True
+        assert _extract_session_models(response)[2] is True
 
     def test_ignores_non_model_config_options(self):
-        # A ``mode`` select alongside no ``model`` select ⇒ old mechanism.
+        # A ``mode`` select alongside no ``model`` select ⇒ old mechanism, and
+        # the default (False) is reported since neither block was present.
         response = self._response(
             config_options=[
                 SimpleNamespace(
@@ -7235,8 +7243,7 @@ def test_ignores_non_model_config_options(self):
                 ),
             ],
         )
-        assert _session_selects_model_via_config_option(response) is False
-        assert _extract_session_models(response) == (None, None)
+        assert _extract_session_models(response) == (None, None, False)
 
     def test_drops_options_without_usable_value(self):
         response = self._response(
@@ -7251,12 +7258,31 @@ def test_drops_options_without_usable_value(self):
                 )
             ],
         )
-        cur, avail = _extract_session_models(response)
+        cur, avail, _ = _extract_session_models(response)
         assert cur == "ok"
         assert avail == [ACPModelInfo(model_id="ok", name="Ok", description=None)]
 
     def test_none_response_is_not_config_option(self):
-        assert _session_selects_model_via_config_option(None) is False
+        assert _extract_session_models(None)[2] is False
+
+    def test_default_via_config_option_used_when_no_block_present(self):
+        # On resume a load_session that omits both the models capability and the
+        # model config-option must not blindly default to set_session_model: the
+        # caller passes the persisted mechanism hint, which is honored (#3772).
+        response = self._response(config_options=[])
+        assert _extract_session_models(response)[2] is False
+        assert (
+            _extract_session_models(response, default_via_config_option=True)[2] is True
+        )
+        # A present block still wins over the hint.
+        present = self._response(
+            config_options=[
+                self._select(current_value="x", options=[self._opt("x", "X")])
+            ]
+        )
+        assert (
+            _extract_session_models(present, default_via_config_option=False)[2] is True
+        )
 
 
 # Real ``session/new`` wire payloads (by-alias) captured from the pinned CLIs.
@@ -7341,98 +7367,58 @@ def test_config_options_is_the_schema_field_name(self):
 
     def test_claude_046_uses_config_option(self):
         resp = NewSessionResponse.model_validate(_CLAUDE_046_SESSION)
-        assert _session_selects_model_via_config_option(resp) is True
-        cur, avail = _extract_session_models(resp)
+        cur, avail, via = _extract_session_models(resp)
+        assert via is True
         assert cur == "opus[1m]"
         assert avail is not None
         assert [m.model_id for m in avail] == ["default", "opus[1m]", "sonnet", "haiku"]
 
     def test_codex_016_uses_config_option(self):
         resp = NewSessionResponse.model_validate(_CODEX_016_SESSION)
-        assert _session_selects_model_via_config_option(resp) is True
-        cur, avail = _extract_session_models(resp)
+        cur, avail, via = _extract_session_models(resp)
+        assert via is True
         assert cur == "gpt-5.5"
         assert avail is not None
         assert [m.model_id for m in avail] == ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"]
 
     def test_gemini_046_uses_set_session_model(self):
         resp = NewSessionResponse.model_validate(_GEMINI_046_SESSION)
-        assert _session_selects_model_via_config_option(resp) is False
-        cur, avail = _extract_session_models(resp)
+        cur, avail, via = _extract_session_models(resp)
+        assert via is False
         assert cur == "gemini-3-flash-preview"
         assert avail is not None
         assert "gemini-3-pro-preview" in [m.model_id for m in avail]
 
 
-class TestApplyAcpModelFallback:
-    """Model-apply falls back to the other mechanism on -32601 method-not-found.
-
-    Response-detection is correct for every validated CLI, but it reads an
-    UNSTABLE capability; a misdetect (or a server that moved between mechanisms)
-    must self-heal rather than crash session init / a switch.
+class TestApplyAcpModelNoFallback:
+    """``_apply_acp_model`` applies via the advertised mechanism only — there is
+    no cross-mechanism fallback. Response-detection is deterministic from the
+    session/new shape and correct for every validated CLI, so any rejection is a
+    real error (invalid model id, or a genuinely mis-advertised server) and
+    propagates rather than silently trying the other call.
     """
 
-    def test_is_method_not_found(self):
-        assert _is_method_not_found(ACPRequestError(code=-32601, message="x")) is True
-        assert _is_method_not_found(ACPRequestError(code=-32603, message="x")) is False
-        assert _is_method_not_found(ACPRequestError(code=-32602, message="x")) is False
-
     @pytest.mark.asyncio
-    async def test_config_option_falls_back_to_set_session_model(self):
-        # Detected configOptions, but the server only has set_session_model.
+    async def test_config_option_rejection_propagates(self):
         conn = AsyncMock()
         conn.set_config_option.side_effect = ACPRequestError(
             code=-32601, message="Method not found"
         )
-        effective = await _apply_acp_model_with_fallback(
-            conn, "sess-1", "gemini-3-flash", via_config_option=True
-        )
-        conn.set_config_option.assert_awaited_once()
-        conn.set_session_model.assert_awaited_once_with(
-            model_id="gemini-3-flash", session_id="sess-1"
-        )
-        assert effective is False  # the mechanism that actually worked
+        with pytest.raises(ACPRequestError):
+            await _apply_acp_model(conn, "sess-1", "opus[1m]", via_config_option=True)
+        conn.set_session_model.assert_not_called()  # no fallback
 
     @pytest.mark.asyncio
-    async def test_set_session_model_falls_back_to_config_option(self):
-        # Detected set_session_model, but the server moved to configOptions
-        # (codex 0.16 / claude 0.46 reached via a stale/degraded detection).
+    async def test_set_session_model_rejection_propagates(self):
         conn = AsyncMock()
         conn.set_session_model.side_effect = ACPRequestError(
             code=-32601, message="Method not found"
         )
-        effective = await _apply_acp_model_with_fallback(
-            conn, "sess-1", "opus[1m]", via_config_option=False
-        )
-        conn.set_session_model.assert_awaited_once()
-        conn.set_config_option.assert_awaited_once_with(
-            config_id="model", value="opus[1m]", session_id="sess-1"
-        )
-        assert effective is True
-
-    @pytest.mark.asyncio
-    async def test_no_fallback_when_primary_succeeds(self):
-        conn = AsyncMock()
-        effective = await _apply_acp_model_with_fallback(
-            conn, "sess-1", "sonnet", via_config_option=True
-        )
-        conn.set_config_option.assert_awaited_once()
-        conn.set_session_model.assert_not_called()
-        assert effective is True
-
-    @pytest.mark.asyncio
-    async def test_non_method_not_found_error_propagates(self):
-        # An invalid-model / server error is NOT a wrong-mechanism signal — it
-        # must propagate, not silently try the other call.
-        conn = AsyncMock()
-        conn.set_config_option.side_effect = ACPRequestError(
-            code=-32602, message="Invalid params"
-        )
         with pytest.raises(ACPRequestError):
-            await _apply_acp_model_with_fallback(
-                conn, "sess-1", "bad", via_config_option=True
+            await _apply_acp_model(
+                conn, "sess-1", "gemini-3-flash", via_config_option=False
             )
-        conn.set_session_model.assert_not_called()
+        conn.set_config_option.assert_not_called()  # no fallback
 
 
 class TestCodexModelEffortSplit:
@@ -7472,6 +7458,27 @@ async def test_apply_bare_config_option_single_call(self):
             config_id="model", value="opus[1m]", session_id="sess-1"
         )
 
+    @pytest.mark.asyncio
+    async def test_reasoning_effort_failure_is_tolerated(self):
+        # The model selection is the switch; a rejected reasoning_effort must NOT
+        # fail the whole apply (else the live session changed model while the
+        # caller reports failure and leaves llm.model stale). The model call
+        # succeeded, so _apply_acp_model returns normally. #3772
+        conn = AsyncMock()
+
+        async def _effort_fails(*, config_id, value, session_id):
+            if config_id == "reasoning_effort":
+                raise ACPRequestError(code=-32603, message="transient")
+
+        conn.set_config_option.side_effect = _effort_fails
+        # Does not raise: the model preset applied, only the effort refinement
+        # was dropped.
+        await _apply_acp_model(conn, "sess-1", "gpt-5.5/medium", via_config_option=True)
+        assert conn.set_config_option.await_args_list == [
+            call(config_id="model", value="gpt-5.5", session_id="sess-1"),
+            call(config_id="reasoning_effort", value="medium", session_id="sess-1"),
+        ]
+
 
 class TestACPAgentAvailableModelsProperty:
     """``available_models`` exposes the server's model list verbatim.

From 16bc0d91431aa2b5be3fbc8beddf3c9215a1c6be Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simon@openhands.dev>
Date: Wed, 17 Jun 2026 22:41:45 +0200
Subject: [PATCH 12/14] =?UTF-8?q?refactor(acp):=20codex=20model=20ids=20?=
 =?UTF-8?q?=E2=86=92=20bare=20presets,=20drop=20the=20effort=20split?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

codex-acp 0.16 exposes reasoning effort as a separate `reasoning_effort`
configOption, so the `model` select takes bare preset ids. Mirror the
server's availableModels: _CODEX_MODELS becomes (gpt-5.5, gpt-5.4,
gpt-5.4-mini), default gpt-5.5; delete _split_codex_model_effort and the
reasoning_effort apply call so _apply_acp_model sends the id verbatim on
either mechanism.

This removes the divergence where the server reported bare available_models
while the SDK surfaced a combined `<model>/<effort>` current_model_id
(verified e2e against the real codex-acp 0.16 CLI: available_models now
matches current_model_id; pre-run and runtime switches still apply).

Also tidy stale ACP comments: 0.46→0.44 version refs, the yolo→default mode
doc, drop PR-process narration, and trim the _extract_session_models
docstring to local behavior.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../openhands/sdk/agent/acp_agent.py          |  71 ++-------
 .../openhands/sdk/agent/acp_models.py         |   6 +-
 .../openhands/sdk/settings/acp_providers.py   |  38 ++---
 .../test_conversation_info_model.py           |   4 +-
 tests/sdk/agent/test_acp_agent.py             | 148 +++++++-----------
 tests/sdk/settings/test_acp_providers.py      |   4 +-
 6 files changed, 91 insertions(+), 180 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py
index 1005d7d173..64eff2d678 100644
--- a/openhands-sdk/openhands/sdk/agent/acp_agent.py
+++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py
@@ -376,9 +376,6 @@ def _write_secret_file(path: Path, value: str) -> None:
 # (codex-acp 0.16+, claude-agent-acp 0.44+) rather than the UNSTABLE ``models``
 # capability + ``session/set_model`` (gemini-cli, older codex/claude).
 _MODEL_CONFIG_OPTION_ID = "model"
-# Companion config-option id codex-acp 0.16 uses for the reasoning-effort tier
-# (the 0.15 combined ``<model>/<effort>`` id is split across the two on 0.16).
-_REASONING_EFFORT_CONFIG_OPTION_ID = "reasoning_effort"
 
 
 def _model_config_option(response: Any) -> Any | None:
@@ -406,24 +403,6 @@ def _model_config_option(response: Any) -> Any | None:
     return None
 
 
-def _split_codex_model_effort(model: str) -> tuple[str, str | None]:
-    """Split a codex-style ``<model>/<effort>`` id into ``(model, effort)``.
-
-    codex-acp's ``models`` capability (0.15) used a combined id like
-    ``gpt-5.5/medium`` that the server parsed itself. Its 0.16 ``model``
-    ``configOptions`` select instead wants a **bare preset id** (``gpt-5.5``)
-    with the reasoning effort set via a separate ``reasoning_effort`` option —
-    an unknown combined id is taken literally and rejected by the backend
-    (``"<id>" model is not supported``). So when applying via configOptions we
-    split on ``/``. Bare ids (claude ``opus[1m]``/``sonnet``, gemini ``auto``,
-    codex without an effort suffix) return ``(model, None)`` unchanged.
-    """
-    base, sep, effort = model.partition("/")
-    if sep and effort:
-        return base, effort
-    return model, None
-
-
 async def _apply_acp_model(
     conn: ClientSideConnection,
     session_id: str,
@@ -435,36 +414,15 @@ async def _apply_acp_model(
     advertised: ``set_config_option(configId="model")`` for configOptions-based
     servers (codex-acp 0.16+, claude-agent-acp 0.44+), else ``set_session_model``.
 
-    A codex ``<model>/<effort>`` id is split into a bare ``model`` plus a
-    separate ``reasoning_effort`` option (see :func:`_split_codex_model_effort`).
-    The model selection is the switch; the effort is a secondary refinement, so
-    a rejected effort is logged rather than raised — the live session keeps the
-    applied model instead of reporting the whole switch failed (#3772).
+    The model id is a bare preset id the server lists in its ``model`` select /
+    ``models`` capability — applied as-is on either mechanism.
     """
-    if not via_config_option:
+    if via_config_option:
+        await conn.set_config_option(
+            config_id=_MODEL_CONFIG_OPTION_ID, value=model, session_id=session_id
+        )
+    else:
         await conn.set_session_model(model_id=model, session_id=session_id)
-        return
-    base, effort = _split_codex_model_effort(model)
-    await conn.set_config_option(
-        config_id=_MODEL_CONFIG_OPTION_ID, value=base, session_id=session_id
-    )
-    if effort is not None:
-        # reasoning_effort is only valid once the model is a known preset, which
-        # the call above just set. Best-effort: keep the applied model on failure.
-        try:
-            await conn.set_config_option(
-                config_id=_REASONING_EFFORT_CONFIG_OPTION_ID,
-                value=effort,
-                session_id=session_id,
-            )
-        except ACPRequestError as exc:
-            logger.warning(
-                "ACP reasoning_effort=%r rejected after model=%r applied (%s); "
-                "keeping the model on the server default effort",
-                effort,
-                base,
-                exc,
-            )
 
 
 def _usable_models(infos: Iterable[ACPModelInfo]) -> list[ACPModelInfo]:
@@ -498,10 +456,8 @@ def _extract_session_models(
 
     ``via_config_option`` is the selection mechanism: ``True`` for the
     configOptions select, ``False`` for the ``models`` capability, and
-    ``default_via_config_option`` when the response carries neither — callers on
-    the resume path pass the persisted mechanism hint so a config-option session
-    whose ``load_session`` omits the model block still reapplies via the right
-    call instead of defaulting to ``set_session_model``.
+    ``default_via_config_option`` when the response carries neither (the
+    resume-path default for a ``load_session`` that omits the model block).
 
     ``getattr`` keeps the helper tolerant of agents that emit a partial
     structure.
@@ -2612,10 +2568,9 @@ async def _init() -> tuple[
                 else reported_model_id
             )
 
-            # Resolve the permission mode.  Known providers each have their
-            # own mode ID (bypassPermissions, full-access, yolo …).
-            # Unknown/custom servers get None — skip the call rather than
-            # sending a provider-specific string they won't recognise.
+            # Resolve the permission mode. Known providers each have their own
+            # mode ID; unknown/custom servers get None — skip the call rather
+            # than sending a provider-specific string they won't recognise.
             provider = detect_acp_provider_by_agent_name(agent_name)
             mode_id = self.acp_session_mode or (
                 provider.default_session_mode if provider else None
@@ -3590,7 +3545,7 @@ def set_acp_model(self, model: str) -> None:
 
         Args:
             model: Provider-specific model id to switch to (e.g.
-                ``"claude-haiku-4-5-20251001"`` or ``"gpt-5.4/low"``).
+                ``"sonnet"`` or ``"gpt-5.5"``).
 
         Raises:
             ValueError: If ``model`` is empty or whitespace-only, if the
diff --git a/openhands-sdk/openhands/sdk/agent/acp_models.py b/openhands-sdk/openhands/sdk/agent/acp_models.py
index c6c89390ea..2b4f4aad81 100644
--- a/openhands-sdk/openhands/sdk/agent/acp_models.py
+++ b/openhands-sdk/openhands/sdk/agent/acp_models.py
@@ -34,9 +34,9 @@ class ACPModelInfo(BaseModel):
     model_id: str = Field(
         description=(
             "Server-assigned model identifier. May be concrete "
-            '(e.g. ``"gpt-5.5/xhigh"``) or an opaque alias '
-            '(e.g. ``"default"``, ``"auto"``). This is the value to pass to '
-            "``set_session_model`` to switch to this model."
+            '(e.g. ``"gpt-5.5"``) or an opaque alias '
+            '(e.g. ``"default"``, ``"auto"``). This is the value to pass back '
+            "to the server to switch to this model."
         ),
     )
     name: str | None = Field(
diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py
index e2cc4d74a7..cce8d1ece9 100644
--- a/openhands-sdk/openhands/sdk/settings/acp_providers.py
+++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py
@@ -155,13 +155,12 @@ class ACPProviderInfo:
     """
 
     default_session_mode: str
-    """ACP session-mode ID that suppresses all permission prompts.
+    """ACP session-mode ID set right after ``session/new``.
 
-    Different servers use different IDs for the same concept:
-
-    - ``bypassPermissions`` — claude-agent-acp
-    - ``full-access``       — codex-acp
-    - ``yolo``              — gemini-cli
+    For servers with a permission-suppressing mode that is the value:
+    ``bypassPermissions`` (claude-agent-acp), ``full-access`` (codex-acp).
+    gemini-cli uses ``default`` (its ``yolo`` mode errors at init); the ACP
+    bridge auto-approves permission requests, so the mode doesn't gate prompts.
     """
 
     agent_name_patterns: tuple[str, ...]
@@ -307,20 +306,14 @@ class ACPProviderInfo:
     ACPModelOption(id="haiku", label="Claude Haiku 4.5"),
 )
 
-# Model IDs accepted by ``@zed-industries/codex-acp`` (``session/new``
-# ``availableModels`` on the pinned CLI), curated to the frontier family plus
-# the cost-efficient mini tier. Format is ``<base-model>/<effort>`` where the
-# trailing tier (``low``/``medium``/``high``/``xhigh``) hints the reasoning
-# effort for the turn.
+# Bare preset ids the ``@zed-industries/codex-acp`` 0.16 ``model`` configOptions
+# select reports at ``session/new`` (``set_config_option(configId="model")``
+# targets). The reasoning-effort tier is a *separate* ``reasoning_effort``
+# configOption on 0.16, not part of the model id, so it is not encoded here.
 _CODEX_MODELS: tuple[ACPModelOption, ...] = (
-    ACPModelOption(id="gpt-5.5/low", label="GPT-5.5 (low)"),
-    ACPModelOption(id="gpt-5.5/medium", label="GPT-5.5 (medium)"),
-    ACPModelOption(id="gpt-5.5/high", label="GPT-5.5 (high)"),
-    ACPModelOption(id="gpt-5.5/xhigh", label="GPT-5.5 (xhigh)"),
-    ACPModelOption(id="gpt-5.4-mini/low", label="GPT-5.4 Mini (low)"),
-    ACPModelOption(id="gpt-5.4-mini/medium", label="GPT-5.4 Mini (medium)"),
-    ACPModelOption(id="gpt-5.4-mini/high", label="GPT-5.4 Mini (high)"),
-    ACPModelOption(id="gpt-5.4-mini/xhigh", label="GPT-5.4 Mini (xhigh)"),
+    ACPModelOption(id="gpt-5.5", label="GPT-5.5"),
+    ACPModelOption(id="gpt-5.4", label="GPT-5.4"),
+    ACPModelOption(id="gpt-5.4-mini", label="GPT-5.4 Mini"),
 )
 
 # Model IDs accepted by ``@google/gemini-cli --acp``. Mirrors the
@@ -379,9 +372,8 @@ class ACPProviderInfo:
 # so the `@version` suffix is a no-op there.
 #
 # claude-agent-acp 0.44+ / codex-acp 0.16+ select the model via a ``model``
-# ``configOptions`` entry, not ``session/set_model``; the SDK detects which per
-# session in ``_extract_session_models`` and applies it. codex 0.16 also splits
-# the model id from the reasoning effort (see ``_split_codex_model_effort``). #3772.
+# ``configOptions`` entry rather than ``session/set_model``; the SDK detects
+# which per session and applies it through the matching call.
 CLAUDE_AGENT_ACP_VERSION = "0.44.0"
 CODEX_ACP_VERSION = "0.16.0"
 GEMINI_CLI_VERSION = "0.46.0"
@@ -434,7 +426,7 @@ class ACPProviderInfo:
             supports_runtime_model_switch=True,
             session_meta_key=None,
             available_models=_CODEX_MODELS,
-            default_model="gpt-5.5/medium",
+            default_model="gpt-5.5",
             file_secrets=_CODEX_FILE_SECRETS,
             binary_name="codex-acp",
             data_dir_env_var="CODEX_HOME",
diff --git a/tests/agent_server/test_conversation_info_model.py b/tests/agent_server/test_conversation_info_model.py
index eb55c65a10..975a655ede 100644
--- a/tests/agent_server/test_conversation_info_model.py
+++ b/tests/agent_server/test_conversation_info_model.py
@@ -205,13 +205,13 @@ def test_available_models_lifted_from_acp_agent():
 def test_available_models_empty_when_server_omits_them():
     """Servers that don't surface the UNSTABLE ``models`` capability yield []."""
     agent = ACPAgent(acp_command=["echo", "test"])
-    agent._current_model_id = "gpt-5.5/xhigh"
+    agent._current_model_id = "gpt-5.5"
     state = _make_state(agent)
     stored = _make_stored(state)
 
     info = _compose_conversation_info(stored, state)
 
-    assert info.current_model_id == "gpt-5.5/xhigh"
+    assert info.current_model_id == "gpt-5.5"
     assert info.available_models == []
 
 
diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py
index 264e0536a2..78e653a2f2 100644
--- a/tests/sdk/agent/test_acp_agent.py
+++ b/tests/sdk/agent/test_acp_agent.py
@@ -11,7 +11,7 @@
 from pathlib import Path
 from types import SimpleNamespace
 from typing import Any
-from unittest.mock import AsyncMock, MagicMock, call, patch
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 from acp.exceptions import RequestError as ACPRequestError
@@ -35,7 +35,6 @@
     _reapply_session_model_on_resume,
     _select_auth_method,
     _serialize_tool_content,
-    _split_codex_model_effort,
     _strip_inherited_npm_env,
 )
 from openhands.sdk.agent.acp_models import ACPModelInfo
@@ -4446,7 +4445,7 @@ async def test_set_session_model_mechanism(self):
 
     @pytest.mark.asyncio
     async def test_set_config_option_mechanism(self):
-        # ``via_config_option=True`` (codex-acp 0.16+, claude-agent-acp 0.46+)
+        # ``via_config_option=True`` (codex-acp 0.16+, claude-agent-acp 0.44+)
         # applies the model via ``set_config_option(configId="model")``.
         conn = AsyncMock()
         applied = await _maybe_set_session_model(
@@ -4537,17 +4536,15 @@ async def test_reapply_via_set_session_model_mechanism(self):
 
     @pytest.mark.asyncio
     async def test_reapply_via_set_config_option_mechanism(self):
-        # codex-acp 0.16+/claude-agent-acp 0.46+ reapply via
-        # ``set_config_option(configId="model")``; a codex combined id splits
-        # into model + reasoning_effort.
+        # codex-acp 0.16+/claude-agent-acp 0.44+ reapply via
+        # ``set_config_option(configId="model")`` with the bare preset id.
         conn = AsyncMock()
         applied = await _reapply_session_model_on_resume(
-            conn, "codex-acp", "sess-1", "gpt-5.4/low", via_config_option=True
+            conn, "codex-acp", "sess-1", "gpt-5.5", via_config_option=True
+        )
+        conn.set_config_option.assert_awaited_once_with(
+            config_id="model", value="gpt-5.5", session_id="sess-1"
         )
-        assert conn.set_config_option.await_args_list == [
-            call(config_id="model", value="gpt-5.4", session_id="sess-1"),
-            call(config_id="reasoning_effort", value="low", session_id="sess-1"),
-        ]
         conn.set_session_model.assert_not_called()
         assert applied is True
 
@@ -4631,7 +4628,7 @@ async def test_any_request_error_is_swallowed_on_resume(self):
             code=-32603, message="internal error"
         )
         applied = await _reapply_session_model_on_resume(
-            conn, "codex-acp", "sess-1", "gpt-5.4/low", via_config_option=False
+            conn, "codex-acp", "sess-1", "gpt-5.5", via_config_option=False
         )
         conn.set_session_model.assert_awaited_once()
         assert applied is False
@@ -4709,31 +4706,29 @@ def _run(coro: Any, timeout: Any = None) -> Any:
         return agent
 
     def test_switches_model_on_live_codex_session(self):
-        # Default (models-capability) session ⇒ set_session_model (combined id).
+        # Default (models-capability) session ⇒ set_session_model, id as-is.
         agent = self._wire(_make_agent(), "codex-acp")
-        agent.set_acp_model("gpt-5.4/low")
+        agent.set_acp_model("gpt-5.5")
         agent._conn.set_session_model.assert_awaited_once_with(
-            model_id="gpt-5.4/low", session_id="sess-1"
+            model_id="gpt-5.5", session_id="sess-1"
         )
         agent._conn.set_config_option.assert_not_called()
         agent._executor.run_async.assert_called_once()
         # Sentinel LLM + metrics reflect the live model for cost/token tracking.
-        assert agent.llm.model == "gpt-5.4/low"
-        assert agent.llm.metrics.model_name == "gpt-5.4/low"
+        assert agent.llm.model == "gpt-5.5"
+        assert agent.llm.metrics.model_name == "gpt-5.5"
 
-    def test_switches_codex_via_config_option_splits_model_and_effort(self):
-        # codex-acp 0.16 configOptions: the combined id is split into a bare
-        # `model` preset + a separate `reasoning_effort` option.
+    def test_switches_codex_via_config_option_single_call(self):
+        # codex-acp 0.16 configOptions: the bare preset id applies as a single
+        # `model` selection (reasoning effort is a separate, unmanaged option).
         agent = self._wire(_make_agent(), "codex-acp", via_config_option=True)
-        agent.set_acp_model("gpt-5.4/low")
-        assert agent._conn.set_config_option.await_args_list == [
-            call(config_id="model", value="gpt-5.4", session_id="sess-1"),
-            call(config_id="reasoning_effort", value="low", session_id="sess-1"),
-        ]
+        agent.set_acp_model("gpt-5.5")
+        agent._conn.set_config_option.assert_awaited_once_with(
+            config_id="model", value="gpt-5.5", session_id="sess-1"
+        )
         agent._conn.set_session_model.assert_not_called()
-        # The agent's surfaced/persisted model stays the combined registry id.
-        assert agent.llm.model == "gpt-5.4/low"
-        assert agent._current_model_id == "gpt-5.4/low"
+        assert agent.llm.model == "gpt-5.5"
+        assert agent._current_model_id == "gpt-5.5"
 
     def test_switches_claude_via_config_option_single_call(self):
         # A bare id (no `/`) applies as a single `model` selection — no effort.
@@ -4752,12 +4747,12 @@ def test_switch_method_not_found_raises_no_fallback(self):
             code=-32601, message="Method not found"
         )
         with pytest.raises(ValueError, match="rejected set_session_model"):
-            agent.set_acp_model("gpt-5.4/low")
+            agent.set_acp_model("gpt-5.5")
         agent._conn.set_config_option.assert_not_called()
         agent._executor.run_async.assert_called_once()
         # Mechanism + sentinel model are left unchanged on a failed switch.
         assert agent._model_via_config_option is False
-        assert agent.llm.model != "gpt-5.4/low"
+        assert agent.llm.model != "gpt-5.5"
 
     def test_switch_invalid_model_raises_value_error(self):
         # A -32602 invalid-params is a real client error, not a wrong-mechanism
@@ -4852,7 +4847,7 @@ def test_passes_timeout_to_run_async(self):
         # The protocol round-trip runs under the conversation state lock, so it
         # must be bounded to avoid wedging the lock if the server never answers.
         agent = self._wire(_make_agent(acp_prompt_timeout=42.0), "codex-acp")
-        agent.set_acp_model("gpt-5.4/low")
+        agent.set_acp_model("gpt-5.5")
         _, kwargs = agent._executor.run_async.call_args
         assert kwargs["timeout"] == 42.0
 
@@ -7106,15 +7101,15 @@ def _raw(self, model_id: Any, name: Any = None, description: Any = None) -> Any:
     def test_maps_fields_through(self):
         response = MagicMock()
         response.models = MagicMock()
-        response.models.current_model_id = "gpt-5.4/low"
+        response.models.current_model_id = "gpt-5.4"
         response.models.available_models = [
-            self._raw("gpt-5.4/low", "gpt-5.4 (low)", "Strong everyday model."),
+            self._raw("gpt-5.4", "GPT-5.4", "Strong everyday model."),
         ]
         _cur, avail, _ = _extract_session_models(response)
         assert avail == [
             ACPModelInfo(
-                model_id="gpt-5.4/low",
-                name="gpt-5.4 (low)",
+                model_id="gpt-5.4",
+                name="GPT-5.4",
                 description="Strong everyday model.",
             )
         ]
@@ -7138,7 +7133,7 @@ def test_drops_entries_without_usable_id(self):
 class TestConfigOptionModelMechanism:
     """Model selection via the ``model`` ``configOptions`` select.
 
-    codex-acp 0.16+ and claude-agent-acp 0.46+ dropped the UNSTABLE ``models``
+    codex-acp 0.16+ and claude-agent-acp 0.44+ dropped the UNSTABLE ``models``
     capability + ``session/set_model`` in favour of a ``model`` config-option
     select driven by ``session/set_config_option``. ``_extract_session_models``
     reads that select and reports the apply mechanism as its third return value
@@ -7158,7 +7153,7 @@ def _response(self, *, models=None, config_options):
         return SimpleNamespace(models=models, config_options=config_options)
 
     def test_extracts_model_state_from_config_option(self):
-        # claude-agent-acp 0.46: model select with short aliases.
+        # claude-agent-acp 0.44: model select with short aliases.
         response = self._response(
             config_options=[
                 self._select(
@@ -7289,8 +7284,8 @@ def test_default_via_config_option_used_when_no_block_present(self):
 # Parsed through the real ``NewSessionResponse`` so the test exercises the
 # genuine schema, INCLUDING the ``SessionConfigOption`` RootModel wrapper the
 # ``agent-client-protocol`` lib applies to ``config_options`` on 0.8.x (where a
-# naive ``opt.id`` read returns ``None``). This is the exact failure the QA bot
-# surfaced; the detection helper unwraps ``.root`` so it works on 0.8.x + 0.10.x.
+# naive ``opt.id`` read returns ``None``). The detection helper unwraps
+# ``.root`` so it works on 0.8.x + 0.10.x.
 def _select_dict(opt_id, current, values, category=None):
     return {
         "id": opt_id,
@@ -7354,10 +7349,10 @@ class TestDetectionAgainstRealSessionResponses:
     """Detection + extraction against the real CLI ``session/new`` wire shapes,
     parsed through the actual ``acp.schema.NewSessionResponse``.
 
-    Guards the real production bug the QA bot surfaced: on
-    ``agent-client-protocol`` 0.8.x each ``config_options`` entry is a
-    ``SessionConfigOption`` RootModel (``.root``), so a naive ``opt.id`` read is
-    ``None`` and the SDK would fall back to the removed ``session/set_model``.
+    Guards a schema-shape regression: on ``agent-client-protocol`` 0.8.x each
+    ``config_options`` entry is a ``SessionConfigOption`` RootModel (``.root``),
+    so a naive ``opt.id`` read is ``None`` and the SDK would fall back to the
+    removed ``session/set_model``.
     """
 
     def test_config_options_is_the_schema_field_name(self):
@@ -7421,63 +7416,32 @@ async def test_set_session_model_rejection_propagates(self):
         conn.set_config_option.assert_not_called()  # no fallback
 
 
-class TestCodexModelEffortSplit:
-    """codex-acp 0.16 splits the 0.15 combined ``<model>/<effort>`` id into a
-    bare ``model`` preset + a separate ``reasoning_effort`` configOption."""
-
-    def test_splits_combined_id(self):
-        assert _split_codex_model_effort("gpt-5.5/medium") == ("gpt-5.5", "medium")
-        assert _split_codex_model_effort("gpt-5.4-mini/xhigh") == (
-            "gpt-5.4-mini",
-            "xhigh",
-        )
-
-    def test_bare_ids_unchanged(self):
-        # claude / gemini / codex-without-effort ids have no ``/``.
-        assert _split_codex_model_effort("opus[1m]") == ("opus[1m]", None)
-        assert _split_codex_model_effort("sonnet") == ("sonnet", None)
-        assert _split_codex_model_effort("auto") == ("auto", None)
-        assert _split_codex_model_effort("gpt-5.5") == ("gpt-5.5", None)
+class TestApplyAcpModel:
+    """``_apply_acp_model`` sends the model id verbatim through the mechanism the
+    session advertised — the ``model`` configOption select or
+    ``set_session_model`` — with no id rewriting. (codex 0.16 exposes reasoning
+    effort as a separate, unmanaged ``reasoning_effort`` configOption, so the
+    model ids are bare presets on every provider.)"""
 
     @pytest.mark.asyncio
-    async def test_apply_splits_into_two_config_options(self):
-        # The configOptions apply path sets model (bare) then reasoning_effort.
+    async def test_config_option_single_call(self):
         conn = AsyncMock()
-        await _apply_acp_model(conn, "sess-1", "gpt-5.5/medium", via_config_option=True)
-        assert conn.set_config_option.await_args_list == [
-            call(config_id="model", value="gpt-5.5", session_id="sess-1"),
-            call(config_id="reasoning_effort", value="medium", session_id="sess-1"),
-        ]
-        conn.set_session_model.assert_not_called()
-
-    @pytest.mark.asyncio
-    async def test_apply_bare_config_option_single_call(self):
-        conn = AsyncMock()
-        await _apply_acp_model(conn, "sess-1", "opus[1m]", via_config_option=True)
+        await _apply_acp_model(conn, "sess-1", "gpt-5.5", via_config_option=True)
         conn.set_config_option.assert_awaited_once_with(
-            config_id="model", value="opus[1m]", session_id="sess-1"
+            config_id="model", value="gpt-5.5", session_id="sess-1"
         )
+        conn.set_session_model.assert_not_called()
 
     @pytest.mark.asyncio
-    async def test_reasoning_effort_failure_is_tolerated(self):
-        # The model selection is the switch; a rejected reasoning_effort must NOT
-        # fail the whole apply (else the live session changed model while the
-        # caller reports failure and leaves llm.model stale). The model call
-        # succeeded, so _apply_acp_model returns normally. #3772
+    async def test_set_session_model_branch(self):
         conn = AsyncMock()
-
-        async def _effort_fails(*, config_id, value, session_id):
-            if config_id == "reasoning_effort":
-                raise ACPRequestError(code=-32603, message="transient")
-
-        conn.set_config_option.side_effect = _effort_fails
-        # Does not raise: the model preset applied, only the effort refinement
-        # was dropped.
-        await _apply_acp_model(conn, "sess-1", "gpt-5.5/medium", via_config_option=True)
-        assert conn.set_config_option.await_args_list == [
-            call(config_id="model", value="gpt-5.5", session_id="sess-1"),
-            call(config_id="reasoning_effort", value="medium", session_id="sess-1"),
-        ]
+        await _apply_acp_model(
+            conn, "sess-1", "gemini-2.5-pro", via_config_option=False
+        )
+        conn.set_session_model.assert_awaited_once_with(
+            model_id="gemini-2.5-pro", session_id="sess-1"
+        )
+        conn.set_config_option.assert_not_called()
 
 
 class TestACPAgentAvailableModelsProperty:
diff --git a/tests/sdk/settings/test_acp_providers.py b/tests/sdk/settings/test_acp_providers.py
index 7d0390c7a9..8dad941063 100644
--- a/tests/sdk/settings/test_acp_providers.py
+++ b/tests/sdk/settings/test_acp_providers.py
@@ -59,8 +59,8 @@ def test_codex_metadata(self):
         assert info.supports_set_session_model is True
         assert info.supports_runtime_model_switch is True
         assert info.session_meta_key is None
-        assert info.default_model == "gpt-5.5/medium"
-        assert any(m.id == "gpt-5.5/medium" for m in info.available_models)
+        assert info.default_model == "gpt-5.5"
+        assert any(m.id == "gpt-5.5" for m in info.available_models)
         assert info.binary_name == "codex-acp"
         assert info.data_dir_env_var == "CODEX_HOME"
 

From 0244d8450f0e4c142c380e97bc4497eeb74099b8 Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simon@openhands.dev>
Date: Wed, 17 Jun 2026 23:17:42 +0200
Subject: [PATCH 13/14] docs(acp): fix post-bump stale references in model docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Review sweep: runtime switches are no longer "always set_session_model" (the
mechanism is detected per session — set_config_option for codex/claude, else
set_session_model); codex is no longer applied "via set_session_model"; the
claude version note said 0.46.0 (pinned 0.44.0); stale `claude-opus-4-6` and
`GPT-5.5 (xhigh)` examples. No runtime effect.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../openhands/sdk/agent/acp_agent.py          | 30 ++++++++++---------
 .../openhands/sdk/agent/acp_models.py         |  2 +-
 .../openhands/sdk/settings/acp_providers.py   | 22 +++++++-------
 openhands-sdk/openhands/sdk/settings/model.py |  8 ++---
 tests/sdk/settings/test_acp_providers.py      |  2 +-
 5 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py
index 64eff2d678..941ac4170d 100644
--- a/openhands-sdk/openhands/sdk/agent/acp_agent.py
+++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py
@@ -1403,10 +1403,10 @@ def _serialize_acp_env(self, value: dict[str, str], info):
     acp_model: str | None = Field(
         default=None,
         description=(
-            "Model for the ACP server to use (e.g. 'claude-opus-4-6' or "
-            "'gpt-5.4'). For Claude ACP, passed via session _meta. For Codex "
-            "ACP, applied via the protocol-level set_session_model call. "
-            "If None, the server picks its default."
+            "Model for the ACP server to use (e.g. 'sonnet' or 'gpt-5.5'). "
+            "Applied via the protocol — set_config_option(model) for "
+            "configOptions-based servers (codex, claude), else "
+            "set_session_model. If None, the server picks its default."
         ),
     )
     acp_resume_session_id: str | None = Field(
@@ -1729,8 +1729,8 @@ def available_models(self) -> list[ACPModelInfo]:
         server's ``model_id`` plus an optional ``name``/``description`` —
         enough for a client to render a model picker and resolve
         ``current_model_id`` to a display label without any server-side
-        curation.  ``current_model_id`` is the value to pass to
-        ``set_session_model`` to switch.
+        curation.  ``current_model_id`` is the value to pass back to the server
+        to switch to that model.
 
         Same lifecycle and serialization caveats as ``current_model_id``:
         in-process runtime state, lifted onto
@@ -1746,7 +1746,7 @@ def supports_runtime_model_switch(self) -> bool:
 
         Tells a client whether to offer the inline picker's live-switch control.
         ``True`` only for known providers that explicitly declare support for
-        ``session/set_model``. Unknown/custom providers use ``set_config_option``
+        runtime model switching. Unknown/custom providers use ``set_config_option``
         for *initial* model selection but that RPC is a generic config write, not
         a guaranteed live-switch primitive, so the picker is hidden for them.
         ``False`` before a session exists (nothing to switch yet).
@@ -3528,9 +3528,11 @@ async def _fork_and_prompt() -> str:
     def set_acp_model(self, model: str) -> None:
         """Switch the model on the running ACP session (mid-conversation).
 
-        Issues a protocol-level ``session/set_model`` call on the live
-        connection so the new model takes effect for subsequent turns in the
-        *same* session — no subprocess restart, no loss of conversation
+        Issues a protocol-level model-switch call on the live connection (the
+        mechanism the session advertised — ``set_config_option(model)`` or
+        ``set_session_model``) so the new model takes effect for subsequent
+        turns in the *same* session — no subprocess restart, no loss of
+        conversation
         context. Verified against claude-agent-acp and codex-acp.
 
         This is the low-level agent primitive; prefer
@@ -3550,7 +3552,7 @@ def set_acp_model(self, model: str) -> None:
         Raises:
             ValueError: If ``model`` is empty or whitespace-only, if the
                 detected provider does not support runtime model switching, or
-                if the ACP server rejects the ``session/set_model`` call (e.g.
+                if the ACP server rejects the model-switch call (e.g.
                 method-not-found on a custom server, or an invalid model id).
             RuntimeError: If the ACP session has not been initialized yet
                 (i.e. before the first ``run()``).
@@ -3559,8 +3561,8 @@ def set_acp_model(self, model: str) -> None:
 
         Note:
             A timeout means the client stopped waiting, not that the switch was
-            rejected: the ``session/set_model`` request may already have been
-            written and could still be applied server-side. The connection and
+            rejected: the switch request may already have been written and
+            could still be applied server-side. The connection and
             session stay alive and the local sentinel model is intentionally
             left unchanged, so a timed-out switch leaves the server-side model
             indeterminate. The conservative choice (treat it as failed locally)
@@ -3586,7 +3588,7 @@ def set_acp_model(self, model: str) -> None:
         if provider is not None and not provider.supports_runtime_model_switch:
             raise ValueError(
                 f"ACP provider '{provider.key}' does not support runtime model "
-                "switching via set_session_model."
+                "switching."
             )
         # ``has_live_acp_session`` above guarantees a session id; narrow for the
         # type checker.
diff --git a/openhands-sdk/openhands/sdk/agent/acp_models.py b/openhands-sdk/openhands/sdk/agent/acp_models.py
index 2b4f4aad81..9377d705a8 100644
--- a/openhands-sdk/openhands/sdk/agent/acp_models.py
+++ b/openhands-sdk/openhands/sdk/agent/acp_models.py
@@ -41,7 +41,7 @@ class ACPModelInfo(BaseModel):
     )
     name: str | None = Field(
         default=None,
-        description='Human-readable label, e.g. ``"GPT-5.5 (xhigh)"``.',
+        description='Human-readable label, e.g. ``"GPT-5.5"``.',
     )
     description: str | None = Field(
         default=None,
diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py
index cce8d1ece9..febd2fba57 100644
--- a/openhands-sdk/openhands/sdk/settings/acp_providers.py
+++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py
@@ -12,11 +12,10 @@
 - ``agent_name_patterns``   lowercase substrings in the runtime agent name;
                             used by ``ACPAgent`` to auto-detect mode / protocol
 - ``supports_set_session_model``  whether the provider applies its *initial*
-                                  model via the ``set_session_model`` protocol
-                                  call at session creation
-- ``supports_runtime_model_switch``  whether the server supports the
-                                  ``session/set_model`` protocol call for
-                                  runtime, mid-conversation model switching
+                                  model via a protocol call at session creation
+                                  (``set_config_option``/``set_session_model``)
+- ``supports_runtime_model_switch``  whether the server supports a protocol-level
+                                  model switch for runtime, mid-conversation use
 - ``session_meta_key``      top-level ``_meta`` key for model selection (or ``None``)
 - ``available_models``      curated list of selectable models for the provider's
                             model picker (``acp_model`` candidates)
@@ -194,12 +193,13 @@ class ACPProviderInfo:
     When non-``None``, the model is additionally advertised via ACP session
     ``_meta`` using the structure
     ``{session_meta_key: {"options": {"model": <model>}}}`` passed to
-    ``new_session()``. This is best-effort only: claude-agent-acp 0.30.0
-    ignores it (#3654), so the authoritative initial selection is the
-    ``set_session_model`` call gated on :attr:`supports_set_session_model`.
+    ``new_session()``. This is best-effort only: claude-agent-acp ignores it
+    (#3654), so the authoritative initial selection is the protocol call gated
+    on :attr:`supports_set_session_model`.
 
-    Runtime switches always use ``set_session_model`` (gated on
-    :attr:`supports_runtime_model_switch`).
+    Runtime switches use the mechanism the session advertised
+    (``set_config_option`` or ``set_session_model``), gated on
+    :attr:`supports_runtime_model_switch`.
 
     - ``"claudeCode"`` — claude-agent-acp
     - ``None``         — codex-acp, gemini-cli
@@ -234,7 +234,7 @@ class ACPProviderInfo:
     Unlike :attr:`supports_set_session_model`, this is about switching the
     model of an *already-running* session, not the initial selection. A
     provider may select its initial model via ``_meta`` (claude-agent-acp)
-    yet still support ``set_session_model`` for later switches.
+    yet still support a protocol-level switch for later changes.
 
     Defaults to ``False`` so forward-compat providers — and any external
     caller constructing this dataclass positionally — keep working without a
diff --git a/openhands-sdk/openhands/sdk/settings/model.py b/openhands-sdk/openhands/sdk/settings/model.py
index 624aa9402e..89c26c518c 100644
--- a/openhands-sdk/openhands/sdk/settings/model.py
+++ b/openhands-sdk/openhands/sdk/settings/model.py
@@ -1322,10 +1322,10 @@ def _serialize_acp_env(self, value: dict[str, str], info):
     acp_model: str | None = Field(
         default=None,
         description=(
-            "Model identifier for the ACP server to use (e.g. "
-            "``'claude-opus-4-6'``). claude-agent-acp receives it via session "
-            "_meta; codex-acp and gemini-cli via ``set_session_model``. "
-            "Leave blank to let the server pick its default."
+            "Model identifier for the ACP server to use (e.g. ``'sonnet'`` or "
+            "``'gpt-5.5'``). Applied via the protocol — set_config_option(model) "
+            "for configOptions-based servers (codex, claude), else "
+            "set_session_model. Leave blank to let the server pick its default."
         ),
         json_schema_extra={
             SETTINGS_METADATA_KEY: SettingsFieldMetadata(
diff --git a/tests/sdk/settings/test_acp_providers.py b/tests/sdk/settings/test_acp_providers.py
index 8dad941063..3f108d64a3 100644
--- a/tests/sdk/settings/test_acp_providers.py
+++ b/tests/sdk/settings/test_acp_providers.py
@@ -37,7 +37,7 @@ def test_claude_code_metadata(self):
         assert "claude-agent" in info.agent_name_patterns
         # Initial selection rides a protocol call — claude-agent-acp ignores the
         # session-_meta payload (#3654), which is still sent as best-effort
-        # (session_meta_key below). On 0.46.0 the call is set_config_option.
+        # (session_meta_key below). On 0.44.0 the call is set_config_option.
         assert info.supports_set_session_model is True
         assert info.supports_runtime_model_switch is True
         assert info.session_meta_key == "claudeCode"

From 2b7673aaf6113ec1d2c12b2cb9230490154f2855 Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simon@openhands.dev>
Date: Thu, 18 Jun 2026 08:21:47 +0200
Subject: [PATCH 14/14] fix(acp): treat curated model lists as pre-session
 suggestions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

claude-agent-acp's `model` configOption select is dynamic and
account-dependent (its `/model` menu and `set_config_option` validate
against the live, entitlement-gated list — verified against the 0.44
source), so a curated id is not guaranteed acceptable. Two changes so the
curated lists behave as suggestions, not ground truth:

- `_maybe_set_session_model` now tolerates an `ACPRequestError` for known
  providers too (log + fall back to the server default), matching the
  unknown-provider and resume paths. A seeded id the live account lacks
  (e.g. `sonnet`) no longer fails session creation. Runtime switches via
  `set_acp_model` still surface the error, since there the user explicitly
  asked to switch.
- Add `gemini-3.1-pro-preview` to `_GEMINI_MODELS`: gemini-cli 0.46
  surfaces the pro-preview as `gemini-3.1-pro-preview` once the 3.1 launch
  flag is on (`PREVIEW_GEMINI_3_1_MODEL`), falling back to
  `gemini-3-pro-preview` otherwise — keep both so the picker matches either
  rollout state.

Also corrects the misleading "not an access check" comment on
`_CLAUDE_MODELS`. Validated end-to-end against the real pinned CLIs
(claude-agent-acp 0.44, codex-acp 0.16): a rejected initial model degrades
to the server default; a valid one still applies.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../openhands/sdk/agent/acp_agent.py          | 48 +++++++++----------
 .../openhands/sdk/settings/acp_providers.py   | 11 ++++-
 tests/sdk/agent/test_acp_agent.py             | 19 ++++++++
 3 files changed, 52 insertions(+), 26 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py
index 941ac4170d..0105e133c3 100644
--- a/openhands-sdk/openhands/sdk/agent/acp_agent.py
+++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py
@@ -617,9 +617,11 @@ async def _maybe_set_session_model(
     ``set_config_option(configId="model")`` for configOptions-based servers
     (codex-acp 0.16+, claude-agent-acp 0.44+), else ``set_session_model``. The
     ``_meta`` model payload is ignored by the pinned CLIs, so this protocol call
-    is what actually applies the model (#3654). Unknown/custom providers are
-    best-effort (a rejection is tolerated — the session keeps the server
-    default). Runtime switches go through :meth:`ACPAgent.set_acp_model`.
+    is what actually applies the model (#3654). A rejection is tolerated for
+    any provider — the curated model list is a pre-session suggestion, not an
+    access check (a server's model menu is dynamic/account-dependent), so the
+    session keeps the server default rather than failing to start. Runtime
+    switches go through :meth:`ACPAgent.set_acp_model`.
 
     Returns ``True`` only when a model-setting call succeeded (the override
     reached the server); ``False`` when there is nothing to apply or the call
@@ -628,31 +630,29 @@ async def _maybe_set_session_model(
     if not acp_model:
         return False
     provider = detect_acp_provider_by_agent_name(agent_name)
-    if provider is not None and provider.supports_set_session_model:
+    # Known providers gate on supports_set_session_model; unknown/custom servers
+    # always attempt (best-effort).
+    if provider is not None and not provider.supports_set_session_model:
+        return False
+    # A rejection is tolerated so session creation can't break: the curated model
+    # list is a suggestion, not an access check — a server's model menu is
+    # dynamic/account-dependent (claude-agent-acp validates set_config_option
+    # against the live select and rejects an absent id), so a model the live
+    # session won't accept degrades to the server default rather than failing.
+    try:
         await _apply_acp_model(
             conn, session_id, acp_model, via_config_option=via_config_option
         )
         return True
-    # Unknown/custom provider: best-effort apply via the detected mechanism; a
-    # rejection is tolerated so session creation can't break.
-    if provider is None:
-        try:
-            await _apply_acp_model(
-                conn, session_id, acp_model, via_config_option=via_config_option
-            )
-            logger.info(
-                "Set model %r on unknown/custom ACP server %s", acp_model, agent_name
-            )
-            return True
-        except ACPRequestError as e:
-            logger.warning(
-                "Could not set model %r on unknown/custom ACP server %s (%s); "
-                "the session will use the server default",
-                acp_model,
-                agent_name,
-                e,
-            )
-    return False
+    except ACPRequestError as e:
+        logger.warning(
+            "Could not set model %r on ACP server %s (%s); "
+            "the session will use the server default",
+            acp_model,
+            agent_name,
+            e,
+        )
+        return False
 
 
 async def _reapply_session_model_on_resume(
diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py
index febd2fba57..f30e46f98d 100644
--- a/openhands-sdk/openhands/sdk/settings/acp_providers.py
+++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py
@@ -297,8 +297,10 @@ class ACPProviderInfo:
 # the CLI's own ``/model`` menu offers, switched via ``set_config_option``).
 # ``opus[1m]`` is the SDK-documented version-agnostic 1M-context alias and the
 # CLI's own default (``currentValue``); ``default`` is the CLI's recommended
-# tier (Opus 4.8 · 1M). The CLI also accepts ids outside this list, so these
-# are curated suggestions, not an access check.
+# tier (Opus 4.8 · 1M). The ``/model`` menu is dynamic/account-dependent and the
+# CLI validates ``set_config_option(model)`` against the live select — it rejects
+# an absent id (e.g. ``sonnet`` on accounts without it), so these are pre-session
+# suggestions, not ground truth; a rejected id degrades to the server default.
 _CLAUDE_MODELS: tuple[ACPModelOption, ...] = (
     ACPModelOption(id="default", label="Default (recommended)"),
     ACPModelOption(id="opus[1m]", label="Claude Opus 4.8 (1M)"),
@@ -324,6 +326,11 @@ class ACPProviderInfo:
 # are curated suggestions, not an access check.
 _GEMINI_MODELS: tuple[ACPModelOption, ...] = (
     ACPModelOption(id="auto", label="Auto"),
+    # gemini-cli 0.46 surfaces the pro-preview as ``gemini-3.1-pro-preview`` once
+    # the Gemini 3.1 launch flag is on (``PREVIEW_GEMINI_3_1_MODEL``), falling
+    # back to ``gemini-3-pro-preview`` (``PREVIEW_GEMINI_MODEL``) otherwise — keep
+    # both so the picker matches either rollout state.
+    ACPModelOption(id="gemini-3.1-pro-preview", label="Gemini 3.1 Pro (preview)"),
     ACPModelOption(id="gemini-3-pro-preview", label="Gemini 3 Pro (preview)"),
     ACPModelOption(id="gemini-3-flash-preview", label="Gemini 3 Flash (preview)"),
     ACPModelOption(id="gemini-3.1-flash-lite", label="Gemini 3.1 Flash Lite"),
diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py
index 78e653a2f2..50e1e6053a 100644
--- a/tests/sdk/agent/test_acp_agent.py
+++ b/tests/sdk/agent/test_acp_agent.py
@@ -4511,6 +4511,25 @@ async def test_unknown_provider_apply_failure_is_tolerated(self):
         conn.set_config_option.assert_awaited_once()
         assert applied is False
 
+    @pytest.mark.asyncio
+    async def test_known_provider_apply_rejection_is_tolerated(self):
+        # claude-agent-acp's ``model`` select is dynamic/account-dependent and
+        # rejects an absent curated id (e.g. ``sonnet`` on accounts without it,
+        # dressed as -32603). A rejected *initial* model must not fail session
+        # creation — it degrades to the server default (applied=False) like the
+        # unknown-provider path, since the curated list is a suggestion not an
+        # access check. (Runtime switches via set_acp_model surface the error.)
+        conn = AsyncMock()
+        conn.set_config_option.side_effect = ACPRequestError(
+            code=-32603, message="Invalid value for config option model: sonnet"
+        )
+        applied = await _maybe_set_session_model(
+            conn, "claude-agent-acp", "session-1", "sonnet", via_config_option=True
+        )
+        conn.set_config_option.assert_awaited_once()
+        conn.set_session_model.assert_not_called()
+        assert applied is False
+
 
 class TestReapplySessionModelOnResume:
     """Resume reapplies the persisted model via the runtime-switch gate."""