diff --git a/openhands-agent-server/openhands/agent_server/docker/Dockerfile b/openhands-agent-server/openhands/agent_server/docker/Dockerfile index 8751c22628..b13a2f219b 100644 --- a/openhands-agent-server/openhands/agent_server/docker/Dockerfile +++ b/openhands-agent-server/openhands/agent_server/docker/Dockerfile @@ -171,9 +171,9 @@ RUN set -ux; \ && "$ACP_NODE_DIR/bin/node" --version; then \ PATH="$ACP_NODE_DIR/bin:$PATH"; \ if "$ACP_NODE_DIR/bin/npm" install -g \ - @agentclientprotocol/claude-agent-acp@0.30.0 \ - @zed-industries/codex-acp@0.15.0 \ - @google/gemini-cli@0.38.0; then \ + @agentclientprotocol/claude-agent-acp@0.44.0 \ + @zed-industries/codex-acp@0.16.0 \ + @google/gemini-cli@0.46.0; then \ # Create wrappers in /usr/local/bin that prepend ACP's Node 22 to PATH. # This ensures the ACP binary's #!/usr/bin/env node shebang resolves # to Node 22, while the repo's own node (NVM/system) stays untouched diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index b5d233768e..0105e133c3 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -23,7 +23,7 @@ import threading import time import uuid -from collections.abc import Awaitable, Callable, Generator +from collections.abc import Awaitable, Callable, Generator, Iterable from concurrent.futures import Future from pathlib import Path from typing import TYPE_CHECKING, Any, Final, Literal, NamedTuple @@ -371,44 +371,118 @@ def _write_secret_file(path: Path, value: str) -> None: f.write(value) -def _extract_session_models( - response: Any, -) -> tuple[str | None, list[ACPModelInfo] | None]: - """Extract the model state off a session response. +# Session config-option id that selects the model on ACP servers that drive +# model selection through ``configOptions`` / ``session/set_config_option`` +# (codex-acp 0.16+, claude-agent-acp 0.44+) rather than the UNSTABLE ``models`` +# capability + ``session/set_model`` (gemini-cli, older codex/claude). +_MODEL_CONFIG_OPTION_ID = "model" + + +def _model_config_option(response: Any) -> Any | None: + """Return the ``model`` ``configOptions`` select off a session response. - Returns a ``(current_model_id, available_models)`` pair, both best-effort. - ``available_models`` is normalized into our own stable :class:`ACPModelInfo` - type at this boundary so nothing downstream depends on the vendored - ``acp.schema`` shape. + Newer ACP CLIs dropped the UNSTABLE ``models`` capability and expose model + selection as a ``configOptions`` entry with ``id == "model"`` (``type == + "select"``), switched via ``session/set_config_option`` instead of + ``session/set_model``. Returns that option (carrying ``options`` and + ``current_value``) or ``None`` when the server uses neither / the old + mechanism. ``getattr`` keeps it tolerant of partial structures. - The second element distinguishes **absent** from **empty** — this matters + The ``agent-client-protocol`` Python lib wraps each entry in a + ``SessionConfigOption`` ``RootModel`` on 0.8.x (access via ``.root``) but + lists the union members directly on 0.10.x; unwrap ``.root`` so detection + works on either. + """ + for raw in getattr(response, "config_options", None) or []: + opt = getattr(raw, "root", raw) + if ( + getattr(opt, "type", None) == "select" + and getattr(opt, "id", None) == _MODEL_CONFIG_OPTION_ID + ): + return opt + return None + + +async def _apply_acp_model( + conn: ClientSideConnection, + session_id: str, + model: str, + *, + via_config_option: bool, +) -> None: + """Apply ``model`` to a live ACP session via the mechanism the session + advertised: ``set_config_option(configId="model")`` for configOptions-based + servers (codex-acp 0.16+, claude-agent-acp 0.44+), else ``set_session_model``. + + The model id is a bare preset id the server lists in its ``model`` select / + ``models`` capability — applied as-is on either mechanism. + """ + if via_config_option: + await conn.set_config_option( + config_id=_MODEL_CONFIG_OPTION_ID, value=model, session_id=session_id + ) + else: + await conn.set_session_model(model_id=model, session_id=session_id) + + +def _usable_models(infos: Iterable[ACPModelInfo]) -> list[ACPModelInfo]: + """Drop entries without a usable ``model_id`` — an empty/missing id is an + invalid picker option and an unusable model-switch target.""" + return [info for info in infos if info.model_id] + + +def _extract_session_models( + response: Any, + *, + default_via_config_option: bool = False, +) -> tuple[str | None, list[ACPModelInfo] | None, bool]: + """Extract the model state off a session response in a single scan. + + Returns ``(current_model_id, available_models, via_config_option)``, all + best-effort. ``available_models`` is normalized into our own stable + :class:`ACPModelInfo` type at this boundary so nothing downstream depends on + the vendored ``acp.schema`` shape. Reads whichever mechanism the session + advertised: the UNSTABLE ``models`` capability, or the ``model`` + ``configOptions`` select (codex-acp 0.16+, claude-agent-acp 0.44+). + + ``available_models`` distinguishes **absent** from **empty** — this matters for resume persistence (preserve the last-known list when the server didn't report one; clear it when the server explicitly says it has none): - - ``None`` — the (UNSTABLE) ``models`` block was absent from the response - (older agent, opted out, or ``load_session`` not carrying it). - - ``[]`` — the server *did* report ``models`` but offers no (usable) - models this session. + - ``None`` — neither mechanism was present in the response (older agent, + opted out, or ``load_session`` not carrying it). + - ``[]`` — the server reported a mechanism but offers no (usable) models. - ``[...]`` — the reported models, minus any with an unusable ``model_id``. + ``via_config_option`` is the selection mechanism: ``True`` for the + configOptions select, ``False`` for the ``models`` capability, and + ``default_via_config_option`` when the response carries neither (the + resume-path default for a ``load_session`` that omits the model block). + ``getattr`` keeps the helper tolerant of agents that emit a partial structure. """ if response is None: - return None, None + return None, None, default_via_config_option models = getattr(response, "models", None) - if models is None: - return None, None - current = getattr(models, "current_model_id", None) + if models is not None: + current = getattr(models, "current_model_id", None) + current = current if isinstance(current, str) and current else None + raw = getattr(models, "available_models", None) or [] + usable = _usable_models(ACPModelInfo.from_protocol(m) for m in raw) + return current, usable, False + # configOptions mechanism: the ``model`` select carries the same state, with + # each option's ``value`` as the model id (== the ``set_config_option`` target). + opt = _model_config_option(response) + if opt is None: + return None, None, default_via_config_option + current = getattr(opt, "current_value", None) current = current if isinstance(current, str) and current else None - raw = getattr(models, "available_models", None) or [] - # Drop entries without a usable id: an empty/missing ``model_id`` is an - # invalid picker option and an unusable ``set_session_model`` target, so we - # filter it out rather than surfacing ``model_id=""``. - available = [ - info for info in (ACPModelInfo.from_protocol(m) for m in raw) if info.model_id - ] - return current, available + options = getattr(opt, "options", None) or [] + usable = _usable_models( + ACPModelInfo.from_protocol(o, id_attr="value") for o in options + ) + return current, usable, True # The ACP MCP server union accepted by new_session() / load_session(). @@ -532,62 +606,53 @@ async def _maybe_set_session_model( agent_name: str, session_id: str, acp_model: str | None, + *, + via_config_option: bool, ) -> bool: """Apply the *initial* session model right after session creation. - This is the session-creation path only, gated on - :attr:`~openhands.sdk.settings.acp_providers.ACPProviderInfo.supports_set_session_model`. - All built-in providers get a one-shot ``set_session_model`` call here. - claude-agent-acp used to be skipped on the assumption that it already - received the model via ``new_session()`` ``_meta``, but 0.30.0 silently - ignores that payload (#3654) — the protocol call is the only path that - both validates and applies the model. - - For unknown/custom providers (e.g. Devin CLI), we fall back to the generic - ``set_config_option`` method with configId="model", which is a standard ACP - method that many custom ACP servers support. - - Runtime, mid-conversation switches go through - :meth:`ACPAgent.set_acp_model` instead, which always uses - ``set_session_model`` and is gated on the separate - ``supports_runtime_model_switch`` capability flag. - - Returns ``True`` only when this issued a model-setting call that succeeded — i.e. - the override was actually pushed to the server via *this* path. ``False`` - when there is nothing to apply (no ``acp_model``) or the provider selects - its model another way (``_meta``) or the server rejected the call, so - the caller can tell whether the live session is really running ``acp_model``. + Session-creation path only, gated on + ``ACPProviderInfo.supports_set_session_model``. The model is applied via the + mechanism the session advertised (``via_config_option``): + ``set_config_option(configId="model")`` for configOptions-based servers + (codex-acp 0.16+, claude-agent-acp 0.44+), else ``set_session_model``. The + ``_meta`` model payload is ignored by the pinned CLIs, so this protocol call + is what actually applies the model (#3654). A rejection is tolerated for + any provider — the curated model list is a pre-session suggestion, not an + access check (a server's model menu is dynamic/account-dependent), so the + session keeps the server default rather than failing to start. Runtime + switches go through :meth:`ACPAgent.set_acp_model`. + + Returns ``True`` only when a model-setting call succeeded (the override + reached the server); ``False`` when there is nothing to apply or the call + was rejected, so the caller knows whether the live session runs ``acp_model``. """ if not acp_model: return False provider = detect_acp_provider_by_agent_name(agent_name) - if provider is not None and provider.supports_set_session_model: - await conn.set_session_model(model_id=acp_model, session_id=session_id) + # Known providers gate on supports_set_session_model; unknown/custom servers + # always attempt (best-effort). + if provider is not None and not provider.supports_set_session_model: + return False + # A rejection is tolerated so session creation can't break: the curated model + # list is a suggestion, not an access check — a server's model menu is + # dynamic/account-dependent (claude-agent-acp validates set_config_option + # against the live select and rejects an absent id), so a model the live + # session won't accept degrades to the server default rather than failing. + try: + await _apply_acp_model( + conn, session_id, acp_model, via_config_option=via_config_option + ) return True - # For unknown/custom providers, try the generic set_config_option method - # which is a standard ACP protocol method for setting configuration options - if provider is None: - try: - await conn.set_config_option( - config_id="model", - value=acp_model, - session_id=session_id, - ) - logger.info( - "Set model %r on unknown/custom ACP server %s via set_config_option", - acp_model, - agent_name, - ) - return True - except ACPRequestError as e: - logger.warning( - "Could not set model %r on unknown/custom ACP server %s via " - "set_config_option (%s); the session will use the server default", - acp_model, - agent_name, - e, - ) - return False + except ACPRequestError as e: + logger.warning( + "Could not set model %r on ACP server %s (%s); " + "the session will use the server default", + acp_model, + agent_name, + e, + ) + return False async def _reapply_session_model_on_resume( @@ -595,32 +660,29 @@ async def _reapply_session_model_on_resume( agent_name: str, session_id: str, acp_model: str | None, + *, + via_config_option: bool, ) -> bool: """Reapply the persisted model to a *resumed* session. ``load_session()`` carries no model ``_meta``, so a session resumed after a runtime switch (or with any persisted ``acp_model``) would otherwise run on - the ACP server's default. This issues ``set_session_model`` so the resumed - live session matches the serialized ``acp_model``. - - For unknown/custom providers (e.g. Devin CLI), we fall back to the generic - ``set_config_option`` method with configId="model", which is a standard ACP - method that many custom ACP servers support. - - The gating mirrors :meth:`ACPAgent.set_acp_model` (attempt for custom/unknown - servers and known providers that support runtime switching; skip only known - providers that don't), deliberately differing from the initial-selection - gate: claude-agent-acp selects its initial model via ``_meta`` yet supports - ``set_session_model`` for later switches. A server that rejects the call is - tolerated (logged) — like the ``load_session`` fallback above — so resume - can't break; the session keeps the server default until the next switch. + the ACP server's default. This applies the model via the mechanism the + resumed session uses (``via_config_option``: ``set_config_option`` for + codex-acp 0.16+/claude-agent-acp 0.44+, else ``set_session_model``) so the + live session matches the serialized ``acp_model``. The caller derives + ``via_config_option`` from the ``load_session`` response, falling back to the + persisted mechanism hint when that response omits the model block. + + Gated on runtime-switch support (skip only known providers that don't); a + server that rejects the call is tolerated (logged) — like the + ``load_session`` fallback — so resume can't break and the session keeps the + server default until the next switch. Returns ``True`` only when a model-setting call was issued and accepted, so the caller knows the resumed live session is actually running ``acp_model``. ``False`` when there is nothing to reapply, the provider doesn't support the - switch, or the server rejected the call (swallowed) — in those cases the - session keeps the server default and the override must not be surfaced as - the current model. + switch, or the server rejected the call (swallowed). """ if not acp_model: return False @@ -628,22 +690,9 @@ async def _reapply_session_model_on_resume( if provider is not None and not provider.supports_runtime_model_switch: return False try: - if provider is not None: - # Known provider: use set_session_model - await conn.set_session_model(model_id=acp_model, session_id=session_id) - else: - # Unknown/custom provider: try set_config_option as fallback - await conn.set_config_option( - config_id="model", - value=acp_model, - session_id=session_id, - ) - logger.info( - "Reapplied model %r on unknown/custom ACP server %s " - "via set_config_option", - acp_model, - agent_name, - ) + await _apply_acp_model( + conn, session_id, acp_model, via_config_option=via_config_option + ) return True except ACPRequestError as e: logger.warning( @@ -1354,10 +1403,10 @@ def _serialize_acp_env(self, value: dict[str, str], info): acp_model: str | None = Field( default=None, description=( - "Model for the ACP server to use (e.g. 'claude-opus-4-6' or " - "'gpt-5.4'). For Claude ACP, passed via session _meta. For Codex " - "ACP, applied via the protocol-level set_session_model call. " - "If None, the server picks its default." + "Model for the ACP server to use (e.g. 'sonnet' or 'gpt-5.5'). " + "Applied via the protocol — set_config_option(model) for " + "configOptions-based servers (codex, claude), else " + "set_session_model. If None, the server picks its default." ), ) acp_resume_session_id: str | None = Field( @@ -1479,6 +1528,12 @@ def model_post_init(self, __context: object) -> None: _agent_version: str = PrivateAttr( default="" ) # ACP server version from InitializeResponse + # Which protocol this session uses to select the model: ``True`` ⇒ + # ``session/set_config_option(configId="model")`` (codex-acp 0.16+, + # claude-agent-acp 0.44+); ``False`` ⇒ ``session/set_model`` (gemini-cli and + # older codex/claude). Detected from the session/new (or load_session) + # response at init and reused by runtime ``set_acp_model`` switches. + _model_via_config_option: bool = PrivateAttr(default=False) # The model the ACP server reported as active for this session, captured # from ``models.currentModelId`` on the new_session / load_session # response. Overridden by ``self.acp_model`` when the caller explicitly @@ -1674,8 +1729,8 @@ def available_models(self) -> list[ACPModelInfo]: server's ``model_id`` plus an optional ``name``/``description`` — enough for a client to render a model picker and resolve ``current_model_id`` to a display label without any server-side - curation. ``current_model_id`` is the value to pass to - ``set_session_model`` to switch. + curation. ``current_model_id`` is the value to pass back to the server + to switch to that model. Same lifecycle and serialization caveats as ``current_model_id``: in-process runtime state, lifted onto @@ -1691,7 +1746,7 @@ def supports_runtime_model_switch(self) -> bool: Tells a client whether to offer the inline picker's live-switch control. ``True`` only for known providers that explicitly declare support for - ``session/set_model``. Unknown/custom providers use ``set_config_option`` + runtime model switching. Unknown/custom providers use ``set_config_option`` for *initial* model selection but that RPC is a generic config write, not a guaranteed live-switch primitive, so the picker is hidden for them. ``False`` before a session exists (nothing to switch yet). @@ -1860,6 +1915,10 @@ def init_state( # conversation list can tell the picker whether to offer live # switching without re-detecting the provider server-side. "acp_supports_runtime_model_switch": self.supports_runtime_model_switch, + # Detected model-selection mechanism — persisted so a resume whose + # load_session omits the model block reapplies the model via the + # right call instead of defaulting to set_session_model. + "acp_model_via_config_option": self._model_via_config_option, } # When starting a fresh session, clear stale suffix marker so the next # launch knows to re-inject it (PR behavior: suffix state is per-session). @@ -2414,8 +2473,21 @@ async def _init() -> tuple[ mcp_servers=acp_mcp_servers, ) session_id = prior_session_id - reported_model_id, available_models = _extract_session_models( - load_response + # load_session often omits the model block; fall back to the + # mechanism detected at session creation (persisted alongside + # the session id) so a config-option session still reapplies + # via the right call rather than defaulting to + # set_session_model and silently running the server default. + persisted_via_config_option = bool( + state.agent_state.get("acp_model_via_config_option", False) + ) + ( + reported_model_id, + available_models, + self._model_via_config_option, + ) = _extract_session_models( + load_response, + default_via_config_option=persisted_via_config_option, ) logger.info( "Resumed ACP session %s (cwd=%s)", @@ -2447,17 +2519,26 @@ async def _init() -> tuple[ **session_meta, ) session_id = response.session_id - reported_model_id, available_models = _extract_session_models(response) + # Detect the model-selection protocol the server advertised (in + # the same scan) so init and later runtime switches use the right + # call. + ( + reported_model_id, + available_models, + self._model_via_config_option, + ) = _extract_session_models(response) # Initial-model protocol call for every built-in provider - # (codex, gemini, claude-code). The pinned claude CLI ignores - # the _meta above, so this set_session_model call is what - # actually applies the model (#3654); _meta is kept only as - # backward-compat for older claude CLIs that still honor it. + # (codex, gemini, claude-code). The pinned claude/codex CLIs + # ignore the _meta above, so this protocol call is what actually + # applies the model (#3654) — via set_config_option for + # configOptions-based servers, else set_session_model; _meta is + # kept only as backward-compat for older claude CLIs. applied_via_call = await _maybe_set_session_model( conn, agent_name, session_id, self.acp_model, + via_config_option=self._model_via_config_option, ) # set_session_model is the authoritative signal that acp_model # reached the server. _meta is a no-op on the pinned claude CLI, @@ -2470,12 +2551,14 @@ async def _init() -> tuple[ # reapply the persisted (possibly runtime-switched) acp_model via # the runtime-switch capability — otherwise the resumed live # session would run on the server default while serialized state - # claims the switched model. + # claims the switched model. ``_model_via_config_option`` was set + # from the load_session response above. override_applied = await _reapply_session_model_on_resume( conn, agent_name, session_id, self.acp_model, + via_config_option=self._model_via_config_option, ) # Resolve the model the agent will actually use. @@ -2485,10 +2568,9 @@ async def _init() -> tuple[ else reported_model_id ) - # Resolve the permission mode. Known providers each have their - # own mode ID (bypassPermissions, full-access, yolo …). - # Unknown/custom servers get None — skip the call rather than - # sending a provider-specific string they won't recognise. + # Resolve the permission mode. Known providers each have their own + # mode ID; unknown/custom servers get None — skip the call rather + # than sending a provider-specific string they won't recognise. provider = detect_acp_provider_by_agent_name(agent_name) mode_id = self.acp_session_mode or ( provider.default_session_mode if provider else None @@ -3446,9 +3528,11 @@ async def _fork_and_prompt() -> str: def set_acp_model(self, model: str) -> None: """Switch the model on the running ACP session (mid-conversation). - Issues a protocol-level ``session/set_model`` call on the live - connection so the new model takes effect for subsequent turns in the - *same* session — no subprocess restart, no loss of conversation + Issues a protocol-level model-switch call on the live connection (the + mechanism the session advertised — ``set_config_option(model)`` or + ``set_session_model``) so the new model takes effect for subsequent + turns in the *same* session — no subprocess restart, no loss of + conversation context. Verified against claude-agent-acp and codex-acp. This is the low-level agent primitive; prefer @@ -3463,12 +3547,12 @@ def set_acp_model(self, model: str) -> None: Args: model: Provider-specific model id to switch to (e.g. - ``"claude-haiku-4-5-20251001"`` or ``"gpt-5.4/low"``). + ``"sonnet"`` or ``"gpt-5.5"``). Raises: ValueError: If ``model`` is empty or whitespace-only, if the detected provider does not support runtime model switching, or - if the ACP server rejects the ``session/set_model`` call (e.g. + if the ACP server rejects the model-switch call (e.g. method-not-found on a custom server, or an invalid model id). RuntimeError: If the ACP session has not been initialized yet (i.e. before the first ``run()``). @@ -3477,8 +3561,8 @@ def set_acp_model(self, model: str) -> None: Note: A timeout means the client stopped waiting, not that the switch was - rejected: the ``session/set_model`` request may already have been - written and could still be applied server-side. The connection and + rejected: the switch request may already have been written and + could still be applied server-side. The connection and session stay alive and the local sentinel model is intentionally left unchanged, so a timed-out switch leaves the server-side model indeterminate. The conservative choice (treat it as failed locally) @@ -3504,8 +3588,11 @@ def set_acp_model(self, model: str) -> None: if provider is not None and not provider.supports_runtime_model_switch: raise ValueError( f"ACP provider '{provider.key}' does not support runtime model " - "switching via set_session_model." + "switching." ) + # ``has_live_acp_session`` above guarantees a session id; narrow for the + # type checker. + assert self._session_id is not None # Bounded round-trip: this runs while LocalConversation.switch_acp_model # holds the state lock, so a server that accepts the call but never # answers must not wedge the lock indefinitely. On timeout / protocol @@ -3513,25 +3600,34 @@ def set_acp_model(self, model: str) -> None: # LLM is only updated once the live session has actually switched. try: self._executor.run_async( - self._conn.set_session_model( - model_id=model, session_id=self._session_id + _apply_acp_model( + self._conn, + self._session_id, + model, + via_config_option=self._model_via_config_option, ), timeout=self.acp_prompt_timeout, ) except ACPRequestError as e: # Server-internal failures (JSON-RPC -32603) are not the caller's # fault, and the prompt path already treats them as retriable. Let - # them propagate (-> 5xx) instead of mislabeling them as a 400 - # client error. + # them propagate (-> 5xx) instead of mislabeling them as a 400. if e.code in _RETRIABLE_SERVER_ERROR_CODES: raise # acp.exceptions.RequestError derives from Exception (not - # RuntimeError); surface a true client/protocol rejection (e.g. - # method-not-found, invalid model id) as a ValueError so callers — - # and the agent-server route — treat it as a 400-class client error - # rather than an opaque 500. + # RuntimeError); surface a true client/protocol rejection (invalid + # model id, or method-not-found on a mis-detected server) as a + # ValueError so callers — and the agent-server route — treat it as a + # 400-class client error rather than an opaque 500. The mechanism is + # the one the session advertised (``_model_via_config_option``), so + # the message names the call that actually failed. + method = ( + "set_config_option(model)" + if self._model_via_config_option + else "set_session_model" + ) raise ValueError( - f"ACP server rejected set_session_model(model={model!r}): {e}" + f"ACP server rejected {method}(model={model!r}): {e}" ) from e # Reflect the live model on the sentinel LLM + metrics so cost/token # accounting and serialized state show the model actually in use diff --git a/openhands-sdk/openhands/sdk/agent/acp_models.py b/openhands-sdk/openhands/sdk/agent/acp_models.py index 4e7d79b340..9377d705a8 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_models.py +++ b/openhands-sdk/openhands/sdk/agent/acp_models.py @@ -34,14 +34,14 @@ class ACPModelInfo(BaseModel): model_id: str = Field( description=( "Server-assigned model identifier. May be concrete " - '(e.g. ``"gpt-5.5/xhigh"``) or an opaque alias ' - '(e.g. ``"default"``, ``"auto"``). This is the value to pass to ' - "``set_session_model`` to switch to this model." + '(e.g. ``"gpt-5.5"``) or an opaque alias ' + '(e.g. ``"default"``, ``"auto"``). This is the value to pass back ' + "to the server to switch to this model." ), ) name: str | None = Field( default=None, - description='Human-readable label, e.g. ``"GPT-5.5 (xhigh)"``.', + description='Human-readable label, e.g. ``"GPT-5.5"``.', ) description: str | None = Field( default=None, @@ -49,15 +49,19 @@ class ACPModelInfo(BaseModel): ) @classmethod - def from_protocol(cls, raw: Any) -> ACPModelInfo: + def from_protocol(cls, raw: Any, *, id_attr: str = "model_id") -> ACPModelInfo: """Build from a raw ACP ``ModelInfo`` (or any duck-typed object). Tolerant of partial/malformed entries: non-string fields degrade to ``""`` (``model_id``) or ``None`` (``name``/``description``) rather than raising, since the source is an UNSTABLE protocol capability that older or half-implemented agents may emit incompletely. + + ``id_attr`` names the attribute carrying the model id — ``"model_id"`` + for a ``models``-capability ``ModelInfo``, ``"value"`` for a + ``configOptions`` select option. """ - model_id = getattr(raw, "model_id", None) + model_id = getattr(raw, id_attr, None) name = getattr(raw, "name", None) description = getattr(raw, "description", None) return cls( diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py index 39e1ba4ab8..f30e46f98d 100644 --- a/openhands-sdk/openhands/sdk/settings/acp_providers.py +++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py @@ -12,11 +12,10 @@ - ``agent_name_patterns`` lowercase substrings in the runtime agent name; used by ``ACPAgent`` to auto-detect mode / protocol - ``supports_set_session_model`` whether the provider applies its *initial* - model via the ``set_session_model`` protocol - call at session creation -- ``supports_runtime_model_switch`` whether the server supports the - ``session/set_model`` protocol call for - runtime, mid-conversation model switching + model via a protocol call at session creation + (``set_config_option``/``set_session_model``) +- ``supports_runtime_model_switch`` whether the server supports a protocol-level + model switch for runtime, mid-conversation use - ``session_meta_key`` top-level ``_meta`` key for model selection (or ``None``) - ``available_models`` curated list of selectable models for the provider's model picker (``acp_model`` candidates) @@ -155,13 +154,12 @@ class ACPProviderInfo: """ default_session_mode: str - """ACP session-mode ID that suppresses all permission prompts. + """ACP session-mode ID set right after ``session/new``. - Different servers use different IDs for the same concept: - - - ``bypassPermissions`` — claude-agent-acp - - ``full-access`` — codex-acp - - ``yolo`` — gemini-cli + For servers with a permission-suppressing mode that is the value: + ``bypassPermissions`` (claude-agent-acp), ``full-access`` (codex-acp). + gemini-cli uses ``default`` (its ``yolo`` mode errors at init); the ACP + bridge auto-approves permission requests, so the mode doesn't gate prompts. """ agent_name_patterns: tuple[str, ...] @@ -195,12 +193,13 @@ class ACPProviderInfo: When non-``None``, the model is additionally advertised via ACP session ``_meta`` using the structure ``{session_meta_key: {"options": {"model": }}}`` passed to - ``new_session()``. This is best-effort only: claude-agent-acp 0.30.0 - ignores it (#3654), so the authoritative initial selection is the - ``set_session_model`` call gated on :attr:`supports_set_session_model`. + ``new_session()``. This is best-effort only: claude-agent-acp ignores it + (#3654), so the authoritative initial selection is the protocol call gated + on :attr:`supports_set_session_model`. - Runtime switches always use ``set_session_model`` (gated on - :attr:`supports_runtime_model_switch`). + Runtime switches use the mechanism the session advertised + (``set_config_option`` or ``set_session_model``), gated on + :attr:`supports_runtime_model_switch`. - ``"claudeCode"`` — claude-agent-acp - ``None`` — codex-acp, gemini-cli @@ -235,7 +234,7 @@ class ACPProviderInfo: Unlike :attr:`supports_set_session_model`, this is about switching the model of an *already-running* session, not the initial selection. A provider may select its initial model via ``_meta`` (claude-agent-acp) - yet still support ``set_session_model`` for later switches. + yet still support a protocol-level switch for later changes. Defaults to ``False`` so forward-compat providers — and any external caller constructing this dataclass positionally — keep working without a @@ -293,50 +292,50 @@ class ACPProviderInfo: # ``acp_model`` outside these lists is always allowed. # --------------------------------------------------------------------------- -# Canonical model IDs the Claude Code CLI accepts, curated to the newest -# generation per capability tier. ``opus[1m]`` is the SDK-documented -# version-agnostic 1M-context alias (auto-tracks the newest 1M-capable model — -# keep its label version-less to match). ``opusplan`` routes planning to Opus -# and execution to Sonnet. +# Model IDs the Claude Code CLI accepts, mirroring the ``model`` configOptions +# select claude-agent-acp 0.44.0 reports at ``session/new`` (the short aliases +# the CLI's own ``/model`` menu offers, switched via ``set_config_option``). +# ``opus[1m]`` is the SDK-documented version-agnostic 1M-context alias and the +# CLI's own default (``currentValue``); ``default`` is the CLI's recommended +# tier (Opus 4.8 · 1M). The ``/model`` menu is dynamic/account-dependent and the +# CLI validates ``set_config_option(model)`` against the live select — it rejects +# an absent id (e.g. ``sonnet`` on accounts without it), so these are pre-session +# suggestions, not ground truth; a rejected id degrades to the server default. _CLAUDE_MODELS: tuple[ACPModelOption, ...] = ( - ACPModelOption(id="claude-fable-5", label="Claude Fable 5"), - ACPModelOption(id="claude-opus-4-8", label="Claude Opus 4.8"), - ACPModelOption(id="opus[1m]", label="Claude Opus (1M)"), - ACPModelOption(id="claude-sonnet-4-6", label="Claude Sonnet 4.6"), - ACPModelOption(id="claude-haiku-4-5", label="Claude Haiku 4.5"), - ACPModelOption(id="opusplan", label="Opus (plan) + Sonnet (execute)"), + ACPModelOption(id="default", label="Default (recommended)"), + ACPModelOption(id="opus[1m]", label="Claude Opus 4.8 (1M)"), + ACPModelOption(id="sonnet", label="Claude Sonnet 4.6"), + ACPModelOption(id="haiku", label="Claude Haiku 4.5"), ) -# Model IDs accepted by ``@zed-industries/codex-acp`` (``session/new`` -# ``availableModels`` on the pinned CLI), curated to the frontier family plus -# the cost-efficient mini tier. Format is ``/`` where the -# trailing tier (``low``/``medium``/``high``/``xhigh``) hints the reasoning -# effort for the turn. +# Bare preset ids the ``@zed-industries/codex-acp`` 0.16 ``model`` configOptions +# select reports at ``session/new`` (``set_config_option(configId="model")`` +# targets). The reasoning-effort tier is a *separate* ``reasoning_effort`` +# configOption on 0.16, not part of the model id, so it is not encoded here. _CODEX_MODELS: tuple[ACPModelOption, ...] = ( - ACPModelOption(id="gpt-5.5/low", label="GPT-5.5 (low)"), - ACPModelOption(id="gpt-5.5/medium", label="GPT-5.5 (medium)"), - ACPModelOption(id="gpt-5.5/high", label="GPT-5.5 (high)"), - ACPModelOption(id="gpt-5.5/xhigh", label="GPT-5.5 (xhigh)"), - ACPModelOption(id="gpt-5.4-mini/low", label="GPT-5.4 Mini (low)"), - ACPModelOption(id="gpt-5.4-mini/medium", label="GPT-5.4 Mini (medium)"), - ACPModelOption(id="gpt-5.4-mini/high", label="GPT-5.4 Mini (high)"), - ACPModelOption(id="gpt-5.4-mini/xhigh", label="GPT-5.4 Mini (xhigh)"), + ACPModelOption(id="gpt-5.5", label="GPT-5.5"), + ACPModelOption(id="gpt-5.4", label="GPT-5.4"), + ACPModelOption(id="gpt-5.4-mini", label="GPT-5.4 Mini"), ) -# Model IDs accepted by ``@google/gemini-cli --acp``. The ``auto-gemini-*`` -# entries delegate version selection to the CLI's router; the explicit -# ``gemini-3.1-*`` / ``gemini-2.5-*`` entries pin to a specific snapshot. +# Model IDs accepted by ``@google/gemini-cli --acp``. Mirrors the +# ``availableModels`` the CLI reports at ``session/new`` on the pinned version +# (gemini-cli 0.46.0). ``auto`` delegates version selection to the CLI's +# router; the explicit ``gemini-*`` entries pin to a specific snapshot. The CLI +# also accepts ids outside this list (it remaps them at generation), so these +# are curated suggestions, not an access check. _GEMINI_MODELS: tuple[ACPModelOption, ...] = ( - ACPModelOption(id="auto-gemini-3", label="Auto (Gemini 3)"), - ACPModelOption(id="auto-gemini-2.5", label="Auto (Gemini 2.5)"), + ACPModelOption(id="auto", label="Auto"), + # gemini-cli 0.46 surfaces the pro-preview as ``gemini-3.1-pro-preview`` once + # the Gemini 3.1 launch flag is on (``PREVIEW_GEMINI_3_1_MODEL``), falling + # back to ``gemini-3-pro-preview`` (``PREVIEW_GEMINI_MODEL``) otherwise — keep + # both so the picker matches either rollout state. ACPModelOption(id="gemini-3.1-pro-preview", label="Gemini 3.1 Pro (preview)"), + ACPModelOption(id="gemini-3-pro-preview", label="Gemini 3 Pro (preview)"), ACPModelOption(id="gemini-3-flash-preview", label="Gemini 3 Flash (preview)"), - ACPModelOption( - id="gemini-3.1-flash-lite-preview", label="Gemini 3.1 Flash Lite (preview)" - ), + ACPModelOption(id="gemini-3.1-flash-lite", label="Gemini 3.1 Flash Lite"), ACPModelOption(id="gemini-2.5-pro", label="Gemini 2.5 Pro"), ACPModelOption(id="gemini-2.5-flash", label="Gemini 2.5 Flash"), - ACPModelOption(id="gemini-2.5-flash-lite", label="Gemini 2.5 Flash Lite"), ) @@ -378,9 +377,13 @@ class ACPProviderInfo: # permission-disabling session mode. In the image the binary rewrite in # `ACPAgentSettings.resolve_acp_command` runs the pinned `binary_name` instead, # so the `@version` suffix is a no-op there. -CLAUDE_AGENT_ACP_VERSION = "0.30.0" -CODEX_ACP_VERSION = "0.15.0" -GEMINI_CLI_VERSION = "0.38.0" +# +# claude-agent-acp 0.44+ / codex-acp 0.16+ select the model via a ``model`` +# ``configOptions`` entry rather than ``session/set_model``; the SDK detects +# which per session and applies it through the matching call. +CLAUDE_AGENT_ACP_VERSION = "0.44.0" +CODEX_ACP_VERSION = "0.16.0" +GEMINI_CLI_VERSION = "0.46.0" ACP_PROVIDERS: Mapping[str, ACPProviderInfo] = MappingProxyType( @@ -397,17 +400,20 @@ class ACPProviderInfo: base_url_env_var="ANTHROPIC_BASE_URL", default_session_mode="bypassPermissions", agent_name_patterns=("claude-agent",), - # claude-agent-acp 0.30.0 silently ignores the session-_meta model - # selection (the requested model only becomes a picker option; the - # session keeps running "default"), so the init path must push the - # model via session/set_model like codex/gemini (#3654). The _meta + # claude-agent-acp ignores the session-_meta model selection (the + # requested model only becomes a picker option; the session keeps + # running its default), so the init path must push the model via a + # protocol call (#3654). On 0.44.0+ that call is + # ``set_config_option(configId="model")`` rather than + # ``set_session_model`` (auto-detected from session/new); the _meta # payload (session_meta_key below) is still sent — harmless, and # picks up the same model if a future CLI honours it. supports_set_session_model=True, supports_runtime_model_switch=True, session_meta_key="claudeCode", available_models=_CLAUDE_MODELS, - default_model="claude-opus-4-8", + # The CLI's own default (model configOptions ``currentValue``). + default_model="opus[1m]", binary_name="claude-agent-acp", data_dir_env_var="CLAUDE_CONFIG_DIR", ), @@ -427,7 +433,7 @@ class ACPProviderInfo: supports_runtime_model_switch=True, session_meta_key=None, available_models=_CODEX_MODELS, - default_model="gpt-5.5/medium", + default_model="gpt-5.5", file_secrets=_CODEX_FILE_SECRETS, binary_name="codex-acp", data_dir_env_var="CODEX_HOME", @@ -443,18 +449,22 @@ class ACPProviderInfo: ), api_key_env_var="GEMINI_API_KEY", base_url_env_var="GEMINI_BASE_URL", - default_session_mode="yolo", + # gemini-cli 0.46.0 rejects ``set_session_mode("yolo")`` at session + # init (-32603), which crashes headless startup; ``default`` is + # accepted. The ACP bridge auto-approves every request_permission, so + # prompts never block regardless of mode. See #3772. + default_session_mode="default", agent_name_patterns=("gemini-cli",), supports_set_session_model=True, supports_runtime_model_switch=True, session_meta_key=None, available_models=_GEMINI_MODELS, - # Match the Gemini CLI's own no-model-configured default - # (``DEFAULT_GEMINI_MODEL_AUTO``), i.e. the auto-router — not a - # manually-pinned snapshot. Pinning ``gemini-2.5-pro`` here would - # make downstream clients persist a value that bypasses the CLI's - # auto-routing. - default_model="auto-gemini-2.5", + # Match the Gemini CLI's own auto-router rather than a manually + # pinned snapshot. Pinning e.g. ``gemini-2.5-pro`` here would make + # downstream clients persist a value that bypasses the CLI's + # auto-routing. ``auto`` is the router id the CLI reports in its + # 0.46.0 ``availableModels``. + default_model="auto", file_secrets=_GEMINI_FILE_SECRETS, binary_name="gemini", # Gemini CLI has no dedicated config-dir var; it hard-codes @@ -516,7 +526,7 @@ def detect_acp_provider_by_command( *caller-controlled*: each token is reduced to its basename (last path segment, minus a trailing ``@version`` pin) and a provider matches only when that basename *starts with* one of its patterns. This accepts the real forms — - ``@zed-industries/codex-acp``, ``@google/gemini-cli@0.43.0``, + ``@zed-industries/codex-acp``, ``@google/gemini-cli@0.46.0``, ``/opt/node_modules/.bin/codex-acp`` — while rejecting incidental substrings like ``my-codex-acp-wrapper`` or ``/opt/shims/not-codex-acp`` that a plain substring test would misattribute. diff --git a/openhands-sdk/openhands/sdk/settings/model.py b/openhands-sdk/openhands/sdk/settings/model.py index 624aa9402e..89c26c518c 100644 --- a/openhands-sdk/openhands/sdk/settings/model.py +++ b/openhands-sdk/openhands/sdk/settings/model.py @@ -1322,10 +1322,10 @@ def _serialize_acp_env(self, value: dict[str, str], info): acp_model: str | None = Field( default=None, description=( - "Model identifier for the ACP server to use (e.g. " - "``'claude-opus-4-6'``). claude-agent-acp receives it via session " - "_meta; codex-acp and gemini-cli via ``set_session_model``. " - "Leave blank to let the server pick its default." + "Model identifier for the ACP server to use (e.g. ``'sonnet'`` or " + "``'gpt-5.5'``). Applied via the protocol — set_config_option(model) " + "for configOptions-based servers (codex, claude), else " + "set_session_model. Leave blank to let the server pick its default." ), json_schema_extra={ SETTINGS_METADATA_KEY: SettingsFieldMetadata( diff --git a/tests/agent_server/test_conversation_info_model.py b/tests/agent_server/test_conversation_info_model.py index eb55c65a10..975a655ede 100644 --- a/tests/agent_server/test_conversation_info_model.py +++ b/tests/agent_server/test_conversation_info_model.py @@ -205,13 +205,13 @@ def test_available_models_lifted_from_acp_agent(): def test_available_models_empty_when_server_omits_them(): """Servers that don't surface the UNSTABLE ``models`` capability yield [].""" agent = ACPAgent(acp_command=["echo", "test"]) - agent._current_model_id = "gpt-5.5/xhigh" + agent._current_model_id = "gpt-5.5" state = _make_state(agent) stored = _make_stored(state) info = _compose_conversation_info(stored, state) - assert info.current_model_id == "gpt-5.5/xhigh" + assert info.current_model_id == "gpt-5.5" assert info.available_models == [] diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 34eba1e693..50e1e6053a 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -9,16 +9,18 @@ from base64 import urlsafe_b64encode from concurrent.futures import Future from pathlib import Path +from types import SimpleNamespace from typing import Any from unittest.mock import AsyncMock, MagicMock, patch import pytest from acp.exceptions import RequestError as ACPRequestError -from acp.schema import PromptResponse +from acp.schema import NewSessionResponse, PromptResponse from pydantic import Field, SecretStr from openhands.sdk.agent.acp_agent import ( ACPAgent, + _apply_acp_model, _classify_acp_init_error, _codex_auth_file, _codex_base_url_overrides, @@ -4426,72 +4428,106 @@ def test_noop_when_caller_already_set_provider(self): class TestMaybeSetSessionModel: @pytest.mark.asyncio - async def test_codex_agent_uses_protocol_model_override(self): + async def test_set_session_model_mechanism(self): + # ``via_config_option=False`` (gemini-cli, older codex/claude with the + # ``models`` capability) applies the model via ``set_session_model``. conn = AsyncMock() applied = await _maybe_set_session_model( - conn, "codex-acp", "session-1", "gpt-5.4" + conn, "codex-acp", "session-1", "gpt-5.4", via_config_option=False ) conn.set_session_model.assert_awaited_once_with( model_id="gpt-5.4", session_id="session-1", ) + conn.set_config_option.assert_not_called() # The override was actually pushed to the server via the protocol call. assert applied is True @pytest.mark.asyncio - async def test_claude_agent_uses_protocol_model_override(self): - # claude-agent-acp 0.30.0 silently ignores the session-_meta model - # selection (#3654), so the init path pushes the model via the same - # one-shot set_session_model call as codex/gemini. + async def test_set_config_option_mechanism(self): + # ``via_config_option=True`` (codex-acp 0.16+, claude-agent-acp 0.44+) + # applies the model via ``set_config_option(configId="model")``. conn = AsyncMock() applied = await _maybe_set_session_model( conn, "claude-agent-acp", "session-1", - "claude-opus-4-8", + "opus[1m]", + via_config_option=True, ) - conn.set_session_model.assert_awaited_once_with( - model_id="claude-opus-4-8", + conn.set_config_option.assert_awaited_once_with( + config_id="model", + value="opus[1m]", session_id="session-1", ) + conn.set_session_model.assert_not_called() assert applied is True @pytest.mark.asyncio async def test_missing_model_skips_protocol_override(self): conn = AsyncMock() - applied = await _maybe_set_session_model(conn, "codex-acp", "session-1", None) + applied = await _maybe_set_session_model( + conn, "codex-acp", "session-1", None, via_config_option=True + ) conn.set_session_model.assert_not_called() + conn.set_config_option.assert_not_called() assert applied is False @pytest.mark.asyncio - async def test_unknown_provider_uses_set_config_option_fallback(self): - # An unknown/custom server now tries set_config_option as a fallback - # for model selection, which is a standard ACP method. - conn = AsyncMock() + async def test_unknown_provider_applies_via_detected_mechanism(self): + # An unknown/custom server applies the model via whichever mechanism its + # session/new advertised (detected upstream), not a hardcoded guess. + conn_cfg = AsyncMock() applied = await _maybe_set_session_model( - conn, "devin-cli", "session-1", "kimi-k2-6" + conn_cfg, "devin-cli", "session-1", "kimi-k2-6", via_config_option=True ) - conn.set_session_model.assert_not_called() - conn.set_config_option.assert_awaited_once_with( - config_id="model", - value="kimi-k2-6", - session_id="session-1", + conn_cfg.set_config_option.assert_awaited_once_with( + config_id="model", value="kimi-k2-6", session_id="session-1" + ) + conn_cfg.set_session_model.assert_not_called() + assert applied is True + + conn_set = AsyncMock() + applied = await _maybe_set_session_model( + conn_set, "devin-cli", "session-1", "kimi-k2-6", via_config_option=False ) + conn_set.set_session_model.assert_awaited_once_with( + model_id="kimi-k2-6", session_id="session-1" + ) + conn_set.set_config_option.assert_not_called() assert applied is True @pytest.mark.asyncio - async def test_unknown_provider_set_config_option_failure_is_tolerated(self): - # If set_config_option fails for an unknown provider, we log a warning - # but don't break session creation. + async def test_unknown_provider_apply_failure_is_tolerated(self): + # If the model-apply fails for an unknown provider, we log a warning but + # don't break session creation. conn = AsyncMock() conn.set_config_option.side_effect = ACPRequestError( code=-32601, message="method not found" ) applied = await _maybe_set_session_model( - conn, "some-custom-acp", "session-1", "whatever" + conn, "some-custom-acp", "session-1", "whatever", via_config_option=True + ) + conn.set_config_option.assert_awaited_once() + assert applied is False + + @pytest.mark.asyncio + async def test_known_provider_apply_rejection_is_tolerated(self): + # claude-agent-acp's ``model`` select is dynamic/account-dependent and + # rejects an absent curated id (e.g. ``sonnet`` on accounts without it, + # dressed as -32603). A rejected *initial* model must not fail session + # creation — it degrades to the server default (applied=False) like the + # unknown-provider path, since the curated list is a suggestion not an + # access check. (Runtime switches via set_acp_model surface the error.) + conn = AsyncMock() + conn.set_config_option.side_effect = ACPRequestError( + code=-32603, message="Invalid value for config option model: sonnet" + ) + applied = await _maybe_set_session_model( + conn, "claude-agent-acp", "session-1", "sonnet", via_config_option=True ) - conn.set_session_model.assert_not_called() conn.set_config_option.assert_awaited_once() + conn.set_session_model.assert_not_called() assert applied is False @@ -4499,12 +4535,18 @@ class TestReapplySessionModelOnResume: """Resume reapplies the persisted model via the runtime-switch gate.""" @pytest.mark.asyncio - async def test_claude_reapplies_persisted_model_on_resume(self): + async def test_reapply_via_set_session_model_mechanism(self): # load_session() carries no model selection, so on resume the persisted - # model must be reapplied via the runtime-switch gate. + # model must be reapplied via the runtime-switch gate. Servers with the + # ``models`` capability (gemini-cli, older codex/claude) use + # ``set_session_model``. conn = AsyncMock() applied = await _reapply_session_model_on_resume( - conn, "claude-agent-acp", "sess-1", "claude-haiku-4-5-20251001" + conn, + "claude-agent-acp", + "sess-1", + "claude-haiku-4-5-20251001", + via_config_option=False, ) conn.set_session_model.assert_awaited_once_with( model_id="claude-haiku-4-5-20251001", session_id="sess-1" @@ -4512,32 +4554,36 @@ async def test_claude_reapplies_persisted_model_on_resume(self): assert applied is True @pytest.mark.asyncio - async def test_codex_reapplies_persisted_model_on_resume(self): + async def test_reapply_via_set_config_option_mechanism(self): + # codex-acp 0.16+/claude-agent-acp 0.44+ reapply via + # ``set_config_option(configId="model")`` with the bare preset id. conn = AsyncMock() applied = await _reapply_session_model_on_resume( - conn, "codex-acp", "sess-1", "gpt-5.4/low" + conn, "codex-acp", "sess-1", "gpt-5.5", via_config_option=True ) - conn.set_session_model.assert_awaited_once_with( - model_id="gpt-5.4/low", session_id="sess-1" + conn.set_config_option.assert_awaited_once_with( + config_id="model", value="gpt-5.5", session_id="sess-1" ) + conn.set_session_model.assert_not_called() assert applied is True @pytest.mark.asyncio async def test_missing_model_skips_reapply(self): conn = AsyncMock() applied = await _reapply_session_model_on_resume( - conn, "claude-agent-acp", "sess-1", None + conn, "claude-agent-acp", "sess-1", None, via_config_option=True ) conn.set_session_model.assert_not_called() + conn.set_config_option.assert_not_called() assert applied is False @pytest.mark.asyncio - async def test_unknown_provider_attempts_reapply_via_set_config_option(self): - # provider=None (custom server) now attempts reapply via set_config_option - # as a fallback, which is a standard ACP method. + async def test_unknown_provider_reapplies_via_detected_mechanism(self): + # provider=None (custom server) reapplies via whichever mechanism the + # resumed session advertised, not a hardcoded guess. conn = AsyncMock() applied = await _reapply_session_model_on_resume( - conn, "devin-cli", "sess-1", "kimi-k2-6" + conn, "devin-cli", "sess-1", "kimi-k2-6", via_config_option=True ) conn.set_session_model.assert_not_called() conn.set_config_option.assert_awaited_once_with( @@ -4569,7 +4615,7 @@ async def test_known_unsupported_provider_skips_reapply(self): return_value=unsupported, ): applied = await _reapply_session_model_on_resume( - conn, "legacy-acp", "sess-1", "x" + conn, "legacy-acp", "sess-1", "x", via_config_option=False ) conn.set_session_model.assert_not_called() assert applied is False @@ -4584,7 +4630,7 @@ async def test_client_rejection_is_swallowed_on_resume(self): code=-32601, message="method not found" ) applied = await _reapply_session_model_on_resume( - conn, "some-custom-acp", "sess-1", "whatever" + conn, "some-custom-acp", "sess-1", "whatever", via_config_option=True ) conn.set_config_option.assert_awaited_once() # Rejected => the live session kept the server default, so the override @@ -4601,7 +4647,7 @@ async def test_any_request_error_is_swallowed_on_resume(self): code=-32603, message="internal error" ) applied = await _reapply_session_model_on_resume( - conn, "codex-acp", "sess-1", "gpt-5.4/low" + conn, "codex-acp", "sess-1", "gpt-5.5", via_config_option=False ) conn.set_session_model.assert_awaited_once() assert applied is False @@ -4650,28 +4696,93 @@ def test_set_acp_model_raises_when_no_live_session(self): class TestSetACPModel: - """Runtime (mid-conversation) model switching via set_session_model.""" + """Runtime (mid-conversation) model switching via the session's mechanism.""" @staticmethod - def _wire(agent: ACPAgent, agent_name: str) -> ACPAgent: - agent._conn = MagicMock() + def _wire( + agent: ACPAgent, agent_name: str, *, via_config_option: bool = False + ) -> ACPAgent: + conn = MagicMock() + conn.set_session_model = AsyncMock() + conn.set_config_option = AsyncMock() + agent._conn = conn agent._session_id = "sess-1" agent._agent_name = agent_name + agent._model_via_config_option = via_config_option executor = MagicMock() - executor.run_async = MagicMock() + + # run_async actually drives the (async) apply coroutine to completion so + # the AsyncMock conn calls fire — exercising the real apply/split path. + def _run(coro: Any, timeout: Any = None) -> Any: + loop = asyncio.new_event_loop() + try: + return loop.run_until_complete(coro) + finally: + loop.close() + + executor.run_async = MagicMock(side_effect=_run) agent._executor = executor return agent def test_switches_model_on_live_codex_session(self): + # Default (models-capability) session ⇒ set_session_model, id as-is. agent = self._wire(_make_agent(), "codex-acp") - agent.set_acp_model("gpt-5.4/low") - agent._conn.set_session_model.assert_called_once_with( - model_id="gpt-5.4/low", session_id="sess-1" + agent.set_acp_model("gpt-5.5") + agent._conn.set_session_model.assert_awaited_once_with( + model_id="gpt-5.5", session_id="sess-1" ) + agent._conn.set_config_option.assert_not_called() agent._executor.run_async.assert_called_once() # Sentinel LLM + metrics reflect the live model for cost/token tracking. - assert agent.llm.model == "gpt-5.4/low" - assert agent.llm.metrics.model_name == "gpt-5.4/low" + assert agent.llm.model == "gpt-5.5" + assert agent.llm.metrics.model_name == "gpt-5.5" + + def test_switches_codex_via_config_option_single_call(self): + # codex-acp 0.16 configOptions: the bare preset id applies as a single + # `model` selection (reasoning effort is a separate, unmanaged option). + agent = self._wire(_make_agent(), "codex-acp", via_config_option=True) + agent.set_acp_model("gpt-5.5") + agent._conn.set_config_option.assert_awaited_once_with( + config_id="model", value="gpt-5.5", session_id="sess-1" + ) + agent._conn.set_session_model.assert_not_called() + assert agent.llm.model == "gpt-5.5" + assert agent._current_model_id == "gpt-5.5" + + def test_switches_claude_via_config_option_single_call(self): + # A bare id (no `/`) applies as a single `model` selection — no effort. + agent = self._wire(_make_agent(), "claude-agent-acp", via_config_option=True) + agent.set_acp_model("sonnet") + agent._conn.set_config_option.assert_awaited_once_with( + config_id="model", value="sonnet", session_id="sess-1" + ) + assert agent._current_model_id == "sonnet" + + def test_switch_method_not_found_raises_no_fallback(self): + # No cross-mechanism fallback: a -32601 surfaces as a ValueError naming + # the advertised mechanism, and the other call is never attempted. + agent = self._wire(_make_agent(), "codex-acp", via_config_option=False) + agent._conn.set_session_model.side_effect = ACPRequestError( + code=-32601, message="Method not found" + ) + with pytest.raises(ValueError, match="rejected set_session_model"): + agent.set_acp_model("gpt-5.5") + agent._conn.set_config_option.assert_not_called() + agent._executor.run_async.assert_called_once() + # Mechanism + sentinel model are left unchanged on a failed switch. + assert agent._model_via_config_option is False + assert agent.llm.model != "gpt-5.5" + + def test_switch_invalid_model_raises_value_error(self): + # A -32602 invalid-params is a real client error, not a wrong-mechanism + # signal: surface it as ValueError without a second call. + agent = self._wire(_make_agent(), "codex-acp", via_config_option=True) + agent._conn.set_config_option.side_effect = ACPRequestError( + code=-32602, message="Invalid params" + ) + with pytest.raises(ValueError, match="rejected set_config_option"): + agent.set_acp_model("nonsense") + agent._executor.run_async.assert_called_once() def test_claude_provider_supports_runtime_switch(self): agent = self._wire(_make_agent(), "claude-agent-acp") @@ -4726,8 +4837,10 @@ def test_translates_acp_request_error_to_value_error(self): # A protocol-level rejection (e.g. method-not-found on a custom server, # or an invalid model id) must surface as a ValueError — not leak as a # raw acp.exceptions.RequestError — so the agent-server maps it to 400. + # The rejection is raised by the conn (driven through the real apply + # coroutine), exercising set_acp_model's actual error path. agent = self._wire(_make_agent(), "codex-acp") - agent._executor.run_async.side_effect = ACPRequestError( + agent._conn.set_session_model.side_effect = ACPRequestError( code=-32601, message="method not found" ) with pytest.raises(ValueError, match="rejected set_session_model"): @@ -4741,7 +4854,7 @@ def test_propagates_server_internal_error(self): # than be mislabeled as a 400-class ValueError, mirroring the retriable # handling on the prompt path. agent = self._wire(_make_agent(), "codex-acp") - agent._executor.run_async.side_effect = ACPRequestError( + agent._conn.set_session_model.side_effect = ACPRequestError( code=-32603, message="internal error" ) with pytest.raises(ACPRequestError): @@ -4753,7 +4866,7 @@ def test_passes_timeout_to_run_async(self): # The protocol round-trip runs under the conversation state lock, so it # must be bounded to avoid wedging the lock if the server never answers. agent = self._wire(_make_agent(acp_prompt_timeout=42.0), "codex-acp") - agent.set_acp_model("gpt-5.4/low") + agent.set_acp_model("gpt-5.5") _, kwargs = agent._executor.run_async.call_args assert kwargs["timeout"] == 42.0 @@ -5041,9 +5154,11 @@ def _fake_run_async(_coro, **_kwargs): class TestGeminiSessionModel: @pytest.mark.asyncio async def test_gemini_cli_uses_protocol_model_override(self): + # gemini-cli keeps the ``models`` capability, so model selection rides + # ``set_session_model`` (via_config_option=False). conn = AsyncMock() await _maybe_set_session_model( - conn, "gemini-cli", "session-1", "gemini-3-flash" + conn, "gemini-cli", "session-1", "gemini-3-flash", via_config_option=False ) conn.set_session_model.assert_awaited_once_with( model_id="gemini-3-flash", @@ -6045,10 +6160,11 @@ def _models_block(current_model_id: str, model_ids: list[str]): models.available_models = entries return models - def test_unknown_provider_applies_override_via_set_config_option(self, tmp_path): + def test_unknown_provider_applies_override_via_detected_mechanism(self, tmp_path): """Fresh session on an unknown/custom provider with ``acp_model`` set: - the override is pushed via ``set_config_option`` (not ``set_session_model``), - so ``current_model_id`` must reflect the applied override. + the override is pushed via the mechanism the session advertised — here a + ``models``-capability response, so ``set_session_model`` — and + ``current_model_id`` must reflect the applied override. """ agent = _make_agent(acp_model="caller-model") state = _make_state(tmp_path) @@ -6063,9 +6179,9 @@ def test_unknown_provider_applies_override_via_set_config_option(self, tmp_path) self._patched_start_acp_server(agent, state, conn=conn) - conn.set_session_model.assert_not_awaited() - conn.set_config_option.assert_awaited_once_with( - config_id="model", value="caller-model", session_id="fresh-sess" + conn.set_config_option.assert_not_awaited() + conn.set_session_model.assert_awaited_once_with( + model_id="caller-model", session_id="fresh-sess" ) assert agent.current_model_id == "caller-model" @@ -6927,7 +7043,7 @@ def test_returns_both_when_response_carries_them(self): response.models = MagicMock() response.models.current_model_id = "default" response.models.available_models = [m1] - cur, avail = _extract_session_models(response) + cur, avail, _ = _extract_session_models(response) assert cur == "default" # Normalized into our stable ACPModelInfo, not the raw acp type. assert avail == [ @@ -6942,7 +7058,7 @@ def test_returns_none_list_when_models_block_absent(self): # Older agents don't include the ``models`` block at all -> None, so # callers know nothing was reported (and can preserve prior state). response = MagicMock(spec=[]) - cur, avail = _extract_session_models(response) + cur, avail, _ = _extract_session_models(response) assert cur is None assert avail is None @@ -6953,19 +7069,19 @@ def test_returns_empty_list_when_available_models_missing(self): response.models = MagicMock() response.models.current_model_id = "gpt-5" response.models.available_models = None - cur, avail = _extract_session_models(response) + cur, avail, _ = _extract_session_models(response) assert cur == "gpt-5" assert avail == [] def test_returns_none_list_when_response_is_none(self): # ``load_session`` can return ``None`` for servers that don't # implement the call — the helper must not crash, and reports "absent". - assert _extract_session_models(None) == (None, None) + assert _extract_session_models(None) == (None, None, False) def test_returns_none_list_when_models_field_is_none(self): response = MagicMock() response.models = None - assert _extract_session_models(response) == (None, None) + assert _extract_session_models(response) == (None, None, False) def test_returns_none_when_current_model_id_is_empty_string(self): # An empty string is treated the same as a missing field — we don't @@ -6975,7 +7091,7 @@ def test_returns_none_when_current_model_id_is_empty_string(self): response.models = MagicMock() response.models.current_model_id = "" response.models.available_models = [] - assert _extract_session_models(response) == (None, []) + assert _extract_session_models(response) == (None, [], False) def test_returns_none_when_current_model_id_is_not_a_string(self): # Defensive: an agent returning a non-string here is malformed. @@ -6983,7 +7099,7 @@ def test_returns_none_when_current_model_id_is_not_a_string(self): response.models = MagicMock() response.models.current_model_id = 42 response.models.available_models = [] - assert _extract_session_models(response) == (None, []) + assert _extract_session_models(response) == (None, [], False) class TestExtractSessionModelsNormalization: @@ -7004,15 +7120,15 @@ def _raw(self, model_id: Any, name: Any = None, description: Any = None) -> Any: def test_maps_fields_through(self): response = MagicMock() response.models = MagicMock() - response.models.current_model_id = "gpt-5.4/low" + response.models.current_model_id = "gpt-5.4" response.models.available_models = [ - self._raw("gpt-5.4/low", "gpt-5.4 (low)", "Strong everyday model."), + self._raw("gpt-5.4", "GPT-5.4", "Strong everyday model."), ] - _cur, avail = _extract_session_models(response) + _cur, avail, _ = _extract_session_models(response) assert avail == [ ACPModelInfo( - model_id="gpt-5.4/low", - name="gpt-5.4 (low)", + model_id="gpt-5.4", + name="GPT-5.4", description="Strong everyday model.", ) ] @@ -7029,10 +7145,324 @@ def test_drops_entries_without_usable_id(self): self._raw(model_id="good", name="Good"), self._raw(model_id="", name="Empty"), # empty -> dropped ] - _cur, avail = _extract_session_models(response) + _cur, avail, _ = _extract_session_models(response) assert avail == [ACPModelInfo(model_id="good", name="Good", description=None)] +class TestConfigOptionModelMechanism: + """Model selection via the ``model`` ``configOptions`` select. + + codex-acp 0.16+ and claude-agent-acp 0.44+ dropped the UNSTABLE ``models`` + capability + ``session/set_model`` in favour of a ``model`` config-option + select driven by ``session/set_config_option``. ``_extract_session_models`` + reads that select and reports the apply mechanism as its third return value + (``via_config_option``) in the same scan. + """ + + def _select(self, *, id="model", type="select", current_value, options): + opt = SimpleNamespace( + id=id, type=type, current_value=current_value, options=options + ) + return opt + + def _opt(self, value, name=None, description=None): + return SimpleNamespace(value=value, name=name, description=description) + + def _response(self, *, models=None, config_options): + return SimpleNamespace(models=models, config_options=config_options) + + def test_extracts_model_state_from_config_option(self): + # claude-agent-acp 0.44: model select with short aliases. + response = self._response( + config_options=[ + self._select( + current_value="opus[1m]", + options=[ + self._opt("default", "Default (recommended)", "Opus 4.8 · 1M"), + self._opt("opus[1m]", "Opus", "Opus 4.8 · 1M"), + self._opt("sonnet", "Sonnet", "Sonnet 4.6"), + self._opt("haiku", "Haiku", "Haiku 4.5"), + ], + ) + ], + ) + cur, avail, via = _extract_session_models(response) + assert cur == "opus[1m]" + assert via is True + assert avail == [ + ACPModelInfo( + model_id="default", + name="Default (recommended)", + description="Opus 4.8 · 1M", + ), + ACPModelInfo(model_id="opus[1m]", name="Opus", description="Opus 4.8 · 1M"), + ACPModelInfo(model_id="sonnet", name="Sonnet", description="Sonnet 4.6"), + ACPModelInfo(model_id="haiku", name="Haiku", description="Haiku 4.5"), + ] + + def test_unwraps_rootmodel_wrapped_config_option(self): + # agent-client-protocol 0.8.x wraps each entry in a SessionConfigOption + # RootModel (the option lives under ``.root``); detection must unwrap it, + # else ``opt.id`` is None and the SDK wrongly falls back to set_model. + select = self._select( + current_value="opus[1m]", + options=[self._opt("opus[1m]", "Opus"), self._opt("sonnet", "Sonnet")], + ) + wrapped = SimpleNamespace(root=select) # mimics the RootModel wrapper + response = self._response(config_options=[wrapped]) + cur, avail, via = _extract_session_models(response) + assert via is True + assert cur == "opus[1m]" + assert avail is not None + assert [m.model_id for m in avail] == ["opus[1m]", "sonnet"] + + def test_models_capability_wins_over_config_option(self): + # If a server somehow carries both, the ``models`` capability is used. + models = MagicMock() + models.current_model_id = "from-models" + models.available_models = [] + response = self._response( + models=models, + config_options=[ + self._select( + current_value="from-config", options=[self._opt("from-config", "X")] + ) + ], + ) + cur, avail, via = _extract_session_models(response) + assert cur == "from-models" + assert avail == [] + assert via is False + + def test_detects_config_option_mechanism(self): + response = self._response( + config_options=[ + self._select( + current_value="gpt-5.5", options=[self._opt("gpt-5.5", "GPT-5.5")] + ) + ], + ) + assert _extract_session_models(response)[2] is True + + def test_ignores_non_model_config_options(self): + # A ``mode`` select alongside no ``model`` select ⇒ old mechanism, and + # the default (False) is reported since neither block was present. + response = self._response( + config_options=[ + SimpleNamespace( + id="mode", + type="select", + current_value="default", + options=[self._opt("default", "Default")], + ), + ], + ) + assert _extract_session_models(response) == (None, None, False) + + def test_drops_options_without_usable_value(self): + response = self._response( + config_options=[ + self._select( + current_value="ok", + options=[ + self._opt(value=42), # non-string -> dropped + self._opt("ok", "Ok"), + self._opt(value="", name="Empty"), # empty -> dropped + ], + ) + ], + ) + cur, avail, _ = _extract_session_models(response) + assert cur == "ok" + assert avail == [ACPModelInfo(model_id="ok", name="Ok", description=None)] + + def test_none_response_is_not_config_option(self): + assert _extract_session_models(None)[2] is False + + def test_default_via_config_option_used_when_no_block_present(self): + # On resume a load_session that omits both the models capability and the + # model config-option must not blindly default to set_session_model: the + # caller passes the persisted mechanism hint, which is honored (#3772). + response = self._response(config_options=[]) + assert _extract_session_models(response)[2] is False + assert ( + _extract_session_models(response, default_via_config_option=True)[2] is True + ) + # A present block still wins over the hint. + present = self._response( + config_options=[ + self._select(current_value="x", options=[self._opt("x", "X")]) + ] + ) + assert ( + _extract_session_models(present, default_via_config_option=False)[2] is True + ) + + +# Real ``session/new`` wire payloads (by-alias) captured from the pinned CLIs. +# Parsed through the real ``NewSessionResponse`` so the test exercises the +# genuine schema, INCLUDING the ``SessionConfigOption`` RootModel wrapper the +# ``agent-client-protocol`` lib applies to ``config_options`` on 0.8.x (where a +# naive ``opt.id`` read returns ``None``). The detection helper unwraps +# ``.root`` so it works on 0.8.x + 0.10.x. +def _select_dict(opt_id, current, values, category=None): + return { + "id": opt_id, + "name": opt_id, + "type": "select", + "category": category, + "currentValue": current, + "options": [{"value": v, "name": v} for v in values], + } + + +_CLAUDE_046_SESSION = { + "sessionId": "sess-claude", + "models": None, + "configOptions": [ + _select_dict("mode", "default", ["default", "acceptEdits"]), + _select_dict( + "model", + "opus[1m]", + ["default", "opus[1m]", "sonnet", "haiku"], + category="model", + ), + _select_dict("effort", "xhigh", ["xhigh", "low"]), + ], +} +_CODEX_016_SESSION = { + "sessionId": "sess-codex", + "models": None, + "configOptions": [ + _select_dict("mode", "read-only", ["read-only", "full-access"]), + _select_dict( + "model", + "gpt-5.5", + ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"], + category="model", + ), + _select_dict("reasoning_effort", "xhigh", ["xhigh", "low"]), + ], +} +_GEMINI_046_SESSION = { + "sessionId": "sess-gemini", + "configOptions": None, + "models": { + "currentModelId": "gemini-3-flash-preview", + "availableModels": [ + {"modelId": mid, "name": mid} + for mid in ( + "auto", + "gemini-3-pro-preview", + "gemini-3-flash-preview", + "gemini-2.5-pro", + "gemini-2.5-flash", + "gemini-3.1-flash-lite", + ) + ], + }, +} + + +class TestDetectionAgainstRealSessionResponses: + """Detection + extraction against the real CLI ``session/new`` wire shapes, + parsed through the actual ``acp.schema.NewSessionResponse``. + + Guards a schema-shape regression: on ``agent-client-protocol`` 0.8.x each + ``config_options`` entry is a ``SessionConfigOption`` RootModel (``.root``), + so a naive ``opt.id`` read is ``None`` and the SDK would fall back to the + removed ``session/set_model``. + """ + + def test_config_options_is_the_schema_field_name(self): + # The SDK reads ``response.config_options``; pin that this is the real + # field on the schema. + assert "config_options" in NewSessionResponse.model_fields + + def test_claude_046_uses_config_option(self): + resp = NewSessionResponse.model_validate(_CLAUDE_046_SESSION) + cur, avail, via = _extract_session_models(resp) + assert via is True + assert cur == "opus[1m]" + assert avail is not None + assert [m.model_id for m in avail] == ["default", "opus[1m]", "sonnet", "haiku"] + + def test_codex_016_uses_config_option(self): + resp = NewSessionResponse.model_validate(_CODEX_016_SESSION) + cur, avail, via = _extract_session_models(resp) + assert via is True + assert cur == "gpt-5.5" + assert avail is not None + assert [m.model_id for m in avail] == ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini"] + + def test_gemini_046_uses_set_session_model(self): + resp = NewSessionResponse.model_validate(_GEMINI_046_SESSION) + cur, avail, via = _extract_session_models(resp) + assert via is False + assert cur == "gemini-3-flash-preview" + assert avail is not None + assert "gemini-3-pro-preview" in [m.model_id for m in avail] + + +class TestApplyAcpModelNoFallback: + """``_apply_acp_model`` applies via the advertised mechanism only — there is + no cross-mechanism fallback. Response-detection is deterministic from the + session/new shape and correct for every validated CLI, so any rejection is a + real error (invalid model id, or a genuinely mis-advertised server) and + propagates rather than silently trying the other call. + """ + + @pytest.mark.asyncio + async def test_config_option_rejection_propagates(self): + conn = AsyncMock() + conn.set_config_option.side_effect = ACPRequestError( + code=-32601, message="Method not found" + ) + with pytest.raises(ACPRequestError): + await _apply_acp_model(conn, "sess-1", "opus[1m]", via_config_option=True) + conn.set_session_model.assert_not_called() # no fallback + + @pytest.mark.asyncio + async def test_set_session_model_rejection_propagates(self): + conn = AsyncMock() + conn.set_session_model.side_effect = ACPRequestError( + code=-32601, message="Method not found" + ) + with pytest.raises(ACPRequestError): + await _apply_acp_model( + conn, "sess-1", "gemini-3-flash", via_config_option=False + ) + conn.set_config_option.assert_not_called() # no fallback + + +class TestApplyAcpModel: + """``_apply_acp_model`` sends the model id verbatim through the mechanism the + session advertised — the ``model`` configOption select or + ``set_session_model`` — with no id rewriting. (codex 0.16 exposes reasoning + effort as a separate, unmanaged ``reasoning_effort`` configOption, so the + model ids are bare presets on every provider.)""" + + @pytest.mark.asyncio + async def test_config_option_single_call(self): + conn = AsyncMock() + await _apply_acp_model(conn, "sess-1", "gpt-5.5", via_config_option=True) + conn.set_config_option.assert_awaited_once_with( + config_id="model", value="gpt-5.5", session_id="sess-1" + ) + conn.set_session_model.assert_not_called() + + @pytest.mark.asyncio + async def test_set_session_model_branch(self): + conn = AsyncMock() + await _apply_acp_model( + conn, "sess-1", "gemini-2.5-pro", via_config_option=False + ) + conn.set_session_model.assert_awaited_once_with( + model_id="gemini-2.5-pro", session_id="sess-1" + ) + conn.set_config_option.assert_not_called() + + class TestACPAgentAvailableModelsProperty: """``available_models`` exposes the server's model list verbatim. diff --git a/tests/sdk/settings/test_acp_providers.py b/tests/sdk/settings/test_acp_providers.py index 6ab36b4bc7..3f108d64a3 100644 --- a/tests/sdk/settings/test_acp_providers.py +++ b/tests/sdk/settings/test_acp_providers.py @@ -35,14 +35,14 @@ def test_claude_code_metadata(self): assert info.base_url_env_var == "ANTHROPIC_BASE_URL" assert info.default_session_mode == "bypassPermissions" assert "claude-agent" in info.agent_name_patterns - # Initial selection rides session/set_model — claude-agent-acp 0.30.0 - # silently ignores the session-_meta payload (#3654), which is still - # sent as best-effort (session_meta_key below). + # Initial selection rides a protocol call — claude-agent-acp ignores the + # session-_meta payload (#3654), which is still sent as best-effort + # (session_meta_key below). On 0.44.0 the call is set_config_option. assert info.supports_set_session_model is True assert info.supports_runtime_model_switch is True assert info.session_meta_key == "claudeCode" - assert info.default_model == "claude-opus-4-8" - assert any(m.id == "claude-opus-4-8" for m in info.available_models) + assert info.default_model == "opus[1m]" + assert any(m.id == "opus[1m]" for m in info.available_models) # Pinned binary exposed by the agent-server image wrappers. assert info.binary_name == "claude-agent-acp" assert info.data_dir_env_var == "CLAUDE_CONFIG_DIR" @@ -59,8 +59,8 @@ def test_codex_metadata(self): assert info.supports_set_session_model is True assert info.supports_runtime_model_switch is True assert info.session_meta_key is None - assert info.default_model == "gpt-5.5/medium" - assert any(m.id == "gpt-5.5/medium" for m in info.available_models) + assert info.default_model == "gpt-5.5" + assert any(m.id == "gpt-5.5" for m in info.available_models) assert info.binary_name == "codex-acp" assert info.data_dir_env_var == "CODEX_HOME" @@ -71,13 +71,13 @@ def test_gemini_cli_metadata(self): assert "--acp" in info.default_command assert info.api_key_env_var == "GEMINI_API_KEY" assert info.base_url_env_var == "GEMINI_BASE_URL" - assert info.default_session_mode == "yolo" + assert info.default_session_mode == "default" assert "gemini-cli" in info.agent_name_patterns assert info.supports_set_session_model is True assert info.supports_runtime_model_switch is True assert info.session_meta_key is None - assert info.default_model == "auto-gemini-2.5" - assert any(m.id == "auto-gemini-2.5" for m in info.available_models) + assert info.default_model == "auto" + assert any(m.id == "auto" for m in info.available_models) # The Gemini CLI's ACP binary is just ``gemini`` (the ``--acp`` flag is # a trailing arg, preserved by resolve_acp_command on rewrite). assert info.binary_name == "gemini" diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py index 9930c448f4..546806d7ce 100644 --- a/tests/sdk/test_settings.py +++ b/tests/sdk/test_settings.py @@ -883,7 +883,7 @@ def test_acp_create_agent_uses_server_default_command( assert agent.acp_command == [ "npx", "-y", - "@agentclientprotocol/claude-agent-acp@0.30.0", + "@agentclientprotocol/claude-agent-acp@0.44.0", ] assert agent.acp_model == "claude-opus-4-6" # The authoritative provider key is carried onto the agent. @@ -1034,7 +1034,7 @@ def test_acp_resolve_command_rewrites_versioned_npx_to_pinned_binary( monkeypatch.setattr(shutil, "which", _which_returning("codex-acp")) for pkg in ( "@zed-industries/codex-acp", - "@zed-industries/codex-acp@0.15.0", + "@zed-industries/codex-acp@0.16.0", "@zed-industries/codex-acp@0.11.1", ): settings = ACPAgentSettings( @@ -1057,7 +1057,7 @@ def test_acp_resolve_command_keeps_npx_when_binary_absent( assert settings.resolve_acp_command() == [ "npx", "-y", - "@zed-industries/codex-acp@0.15.0", + "@zed-industries/codex-acp@0.16.0", ]