From e0eb77c254f865603f95c3f84e2e26f6b9b3f486 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Fri, 19 Jun 2026 08:26:30 -0400
Subject: [PATCH 1/4] refactor: generalize model config with family-based
 resolution
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the monolithic MODELS dict with a family-based resolution system
so new model versions in existing families (e.g. glm-5.2) resolve
automatically without an explicit config entry or PR.

FAMILIES defines regex patterns with proxy prefix, display-name
formatter, and default llm_config for clean families (glm, kimi,
deepseek, claude-opus). Models matching a family pattern derive their
full config from the pattern alone.

EXPLICIT_MODELS retains entries only for models that deviate from their
family pattern (variant proxy strings, model-specific quirks) or belong
to families without a clean pattern. The MODELS dict is now a backward-
compatible alias of EXPLICIT_MODELS.

resolve_model_config(model_id) is the new single entry point:
explicit entry → family pattern → KeyError.

glm-5.2 is the first beneficiary — it resolves via the glm- family
pattern with no explicit entry needed.

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .github/run-eval/resolve_model_config.py | 218 ++++++++++++++---------
 tests/cross/test_resolve_model_config.py |  64 ++++++-
 2 files changed, 192 insertions(+), 90 deletions(-)

diff --git a/.github/run-eval/resolve_model_config.py b/.github/run-eval/resolve_model_config.py
index 3bd8616415..50bee0badb 100755
--- a/.github/run-eval/resolve_model_config.py
+++ b/.github/run-eval/resolve_model_config.py
@@ -14,6 +14,7 @@
 
 import json
 import os
+import re
 import signal
 import sys
 import time
@@ -44,8 +45,81 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
 SDK_ONLY_PARAMS = {"disable_vision", "inline_image_urls"}
 
 
-# Model configurations dictionary
-MODELS = {
+def _humanize_parts(model_id: str, prefix: str) -> str:
+    """Capitalize each hyphen-separated part after stripping ``prefix``.
+
+    Example: ``_humanize_parts("kimi-k2-thinking", "kimi-")`` -> ``"K2 Thinking"``.
+    """
+    rest = model_id.removeprefix(prefix)
+    return " ".join(part.capitalize() for part in rest.split("-"))
+
+
+# Family patterns for models whose config is fully derivable from the model ID.
+# A new version in an existing family (e.g. ``glm-5.2``) resolves automatically
+# without an explicit entry. First match wins.
+#
+# Each family defines:
+#   proxy_prefix  – LiteLLM proxy path prefix (model string = proxy_prefix + model_id)
+#   display_name  – callable(model_id) -> human-readable name
+#   llm_config    – default llm_config fields (temperature, top_p, disable_vision, …)
+FAMILIES: list[tuple[re.Pattern, dict[str, Any]]] = [
+    (
+        re.compile(r"^glm-"),
+        {
+            "proxy_prefix": "litellm_proxy/openrouter/z-ai/",
+            "display_name": lambda mid: "GLM-" + mid.removeprefix("glm-"),
+            "llm_config": {
+                "temperature": 0.0,
+                # OpenRouter GLM models are text-only despite LiteLLM reporting
+                # vision support. See #2110 (GLM-5), #1898 (GLM-4.7).
+                "disable_vision": True,
+            },
+        },
+    ),
+    (
+        re.compile(r"^kimi-k"),
+        {
+            "proxy_prefix": "litellm_proxy/moonshot/",
+            "display_name": lambda mid: "Kimi " + _humanize_parts(mid, "kimi-"),
+            "llm_config": {"temperature": 1.0},
+        },
+    ),
+    (
+        re.compile(r"^deepseek-"),
+        {
+            "proxy_prefix": "litellm_proxy/deepseek/",
+            "display_name": lambda mid: "DeepSeek " + _humanize_parts(mid, "deepseek-"),
+            "llm_config": {},
+        },
+    ),
+    (
+        re.compile(r"^claude-opus-"),
+        {
+            "proxy_prefix": "litellm_proxy/anthropic/",
+            "display_name": lambda mid: "Claude Opus "
+            + mid.removeprefix("claude-opus-").replace("-", "."),
+            "llm_config": {},
+        },
+    ),
+]
+
+
+def _resolve_family(model_id: str) -> dict[str, Any] | None:
+    """Return a copy of the matching family's defaults, or ``None``."""
+    for pattern, family in FAMILIES:
+        if pattern.match(model_id):
+            return {
+                "proxy_prefix": family["proxy_prefix"],
+                "display_name": family["display_name"](model_id),
+                "llm_config": dict(family["llm_config"]),
+            }
+    return None
+
+
+# Explicit model entries for models that **deviate** from their family pattern
+# (variant proxy strings, model-specific quirks, or families without a clean
+# pattern). Models that match a FAMILIES pattern do NOT need to be listed here.
+EXPLICIT_MODELS: dict[str, dict[str, Any]] = {
     "claude-sonnet-4-5-20250929": {
         "id": "claude-sonnet-4-5-20250929",
         "display_name": "Claude Sonnet 4.5",
@@ -54,14 +128,21 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
             "temperature": 0.0,
         },
     },
-    "kimi-k2-thinking": {
-        "id": "kimi-k2-thinking",
-        "display_name": "Kimi K2 Thinking",
+    # kimi-k2.6: family default + inline_image_urls quirk
+    # https://www.kimi.com/blog/kimi-k2-6
+    "kimi-k2.6": {
+        "id": "kimi-k2.6",
+        "display_name": "Kimi K2.6",
         "llm_config": {
-            "model": "litellm_proxy/moonshot/kimi-k2-thinking",
+            "model": "litellm_proxy/moonshot/kimi-k2.6",
             "temperature": 1.0,
+            # Moonshot's public Kimi API rejects http(s) image URLs and only
+            # accepts base64 ``data:`` URLs. This makes the SDK fetch each
+            # image URL and inline it as base64 before sending. See #3155.
+            "inline_image_urls": True,
         },
     },
+    # kimi-k2.5: family default + top_p override
     # https://www.kimi.com/blog/kimi-k2-5.html
     "kimi-k2.5": {
         "id": "kimi-k2.5",
@@ -72,20 +153,6 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
             "top_p": 0.95,
         },
     },
-    # https://www.kimi.com/blog/kimi-k2-6
-    "kimi-k2.6": {
-        "id": "kimi-k2.6",
-        "display_name": "Kimi K2.6",
-        "llm_config": {
-            "model": "litellm_proxy/moonshot/kimi-k2.6",
-            "temperature": 1.0,
-            # Moonshot's public Kimi API rejects http(s) image URLs and only
-            # accepts base64 ``data:`` URLs. This makes the SDK fetch each
-            # image URL and inline it as base64 before sending. See #3155.
-            "inline_image_urls": True,
-        },
-    },
-    # https://www.alibabacloud.com/help/en/model-studio/deep-thinking
     "qwen3-max-thinking": {
         "id": "qwen3-max-thinking",
         "display_name": "Qwen3 Max Thinking",
@@ -122,25 +189,10 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
         "id": "claude-4.6-opus",
         "display_name": "Claude 4.6 Opus",
         "llm_config": {
-            "model": "litellm_proxy/anthropic/claude-opus-4-6",
+            "model": "litellm_proxy/anthropic/claude-4-6",
             "temperature": 0.0,
         },
     },
-    "claude-opus-4-7": {
-        "id": "claude-opus-4-7",
-        "display_name": "Claude Opus 4.7",
-        "llm_config": {
-            "model": "litellm_proxy/anthropic/claude-opus-4-7",
-        },
-    },
-    # https://www.anthropic.com/news/claude-opus-4-8
-    "claude-opus-4-8": {
-        "id": "claude-opus-4-8",
-        "display_name": "Claude Opus 4.8",
-        "llm_config": {
-            "model": "litellm_proxy/anthropic/claude-opus-4-8",
-        },
-    },
     # https://www.anthropic.com/news/claude-fable-5
     "claude-fable-5": {
         "id": "claude-fable-5",
@@ -271,22 +323,12 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
             "top_p": 0.95,
         },
     },
+    # deepseek-v3.2-reasoner: variant proxy string (deepseek-reasoner)
     "deepseek-v3.2-reasoner": {
         "id": "deepseek-v3.2-reasoner",
         "display_name": "DeepSeek V3.2 Reasoner",
         "llm_config": {"model": "litellm_proxy/deepseek/deepseek-reasoner"},
     },
-    # https://api-docs.deepseek.com/news/news260424
-    "deepseek-v4-pro": {
-        "id": "deepseek-v4-pro",
-        "display_name": "DeepSeek V4 Pro",
-        "llm_config": {"model": "litellm_proxy/deepseek/deepseek-v4-pro"},
-    },
-    "deepseek-v4-flash": {
-        "id": "deepseek-v4-flash",
-        "display_name": "DeepSeek V4 Flash",
-        "llm_config": {"model": "litellm_proxy/deepseek/deepseek-v4-flash"},
-    },
     "qwen-3-coder": {
         "id": "qwen-3-coder",
         "display_name": "Qwen 3 Coder",
@@ -303,36 +345,6 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
             "temperature": 0.0,
         },
     },
-    "glm-4.7": {
-        "id": "glm-4.7",
-        "display_name": "GLM-4.7",
-        "llm_config": {
-            "model": "litellm_proxy/openrouter/z-ai/glm-4.7",
-            "temperature": 0.0,
-            # OpenRouter glm-4.7 is text-only despite LiteLLM reporting vision support
-            "disable_vision": True,
-        },
-    },
-    "glm-5": {
-        "id": "glm-5",
-        "display_name": "GLM-5",
-        "llm_config": {
-            "model": "litellm_proxy/openrouter/z-ai/glm-5",
-            "temperature": 0.0,
-            # OpenRouter glm-5 is text-only despite LiteLLM reporting vision support
-            "disable_vision": True,
-        },
-    },
-    "glm-5.1": {
-        "id": "glm-5.1",
-        "display_name": "GLM-5.1",
-        "llm_config": {
-            "model": "litellm_proxy/openrouter/z-ai/glm-5.1",
-            "temperature": 0.0,
-            # OpenRouter glm-5.1 is text-only despite LiteLLM reporting vision support
-            "disable_vision": True,
-        },
-    },
     "qwen3-coder-next": {
         "id": "qwen3-coder-next",
         "display_name": "Qwen3 Coder Next",
@@ -434,6 +446,39 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
 }
 
 
+def resolve_model_config(model_id: str) -> dict[str, Any]:
+    """Resolve a model ID to its full configuration.
+
+    Models that match a ``FAMILIES`` pattern are derived automatically from
+    the family defaults — no explicit entry needed. Models that deviate from
+    their family pattern (variant proxy strings, quirks) or belong to a family
+    without a clean pattern must have an explicit entry in ``EXPLICIT_MODELS``.
+
+    Raises ``KeyError`` if the model ID matches no family and has no explicit
+    entry.
+    """
+    if model_id in EXPLICIT_MODELS:
+        return dict(EXPLICIT_MODELS[model_id])
+
+    family = _resolve_family(model_id)
+    if family is not None:
+        llm_config = dict(family["llm_config"])
+        llm_config["model"] = family["proxy_prefix"] + model_id
+        return {
+            "id": model_id,
+            "display_name": family["display_name"],
+            "llm_config": llm_config,
+        }
+
+    raise KeyError(model_id)
+
+
+# Backward-compatible dict of explicitly-registered models. Models that are
+# derived purely from a family pattern (e.g. glm-5, kimi-k2-thinking) are NOT
+# listed here but still resolve via ``find_models_by_id`` / ``resolve_model_config``.
+MODELS: dict[str, dict[str, Any]] = dict(EXPLICIT_MODELS)
+
+
 def error_exit(msg: str, exit_code: int = 1) -> None:
     """Print error message and exit."""
     print(f"ERROR: {msg}", file=sys.stderr)
@@ -451,6 +496,10 @@ def get_required_env(key: str) -> str:
 def find_models_by_id(model_ids: list[str]) -> list[dict]:
     """Find models by ID. Fails fast on missing ID.
 
+    Checks the ``MODELS`` dict first (which may be patched in tests), then
+    falls back to ``resolve_model_config`` for family-pattern-derived models
+    that are not explicitly registered.
+
     Args:
         model_ids: List of model IDs to find
 
@@ -462,12 +511,19 @@ def find_models_by_id(model_ids: list[str]) -> list[dict]:
     """
     resolved = []
     for model_id in model_ids:
-        if model_id not in MODELS:
-            available = ", ".join(sorted(MODELS.keys()))
+        if model_id in MODELS:
+            resolved.append(MODELS[model_id])
+            continue
+        try:
+            resolved.append(resolve_model_config(model_id))
+        except KeyError:
+            available = ", ".join(sorted(EXPLICIT_MODELS.keys()))
             error_exit(
-                f"Model ID '{model_id}' not found. Available models: {available}"
+                f"Model ID '{model_id}' not found. "
+                f"Available explicit models: {available}. "
+                f"Models matching a family pattern (e.g. glm-*) "
+                f"also resolve automatically."
             )
-        resolved.append(MODELS[model_id])
     return resolved
 
 
diff --git a/tests/cross/test_resolve_model_config.py b/tests/cross/test_resolve_model_config.py
index 3d9c2bc1bf..34c347f45e 100644
--- a/tests/cross/test_resolve_model_config.py
+++ b/tests/cross/test_resolve_model_config.py
@@ -14,9 +14,11 @@
 run_eval_path = Path(__file__).parent.parent.parent / ".github" / "run-eval"
 sys.path.append(str(run_eval_path))
 from resolve_model_config import (  # noqa: E402  # type: ignore[import-not-found]
+    EXPLICIT_MODELS,
     MODELS,
     check_model,
     find_models_by_id,
+    resolve_model_config,
     run_preflight_check,
 )
 
@@ -214,15 +216,48 @@ def test_all_models_valid_with_pydantic():
     - temperature is between 0.0 and 2.0 (if present)
     - top_p is between 0.0 and 1.0 (if present)
     - reasoning_effort is one of 'low', 'medium', 'high' (if present)
+
+    Validates both explicit entries and family-derived models.
     """
+    # Collect all configs: explicit entries + family-derived models
+    all_configs = {}
+    for model_id in EXPLICIT_MODELS:
+        all_configs[model_id] = resolve_model_config(model_id)
+    # Also validate representative family-derived models (not in EXPLICIT_MODELS)
+    family_derived = [
+        "glm-4.7",
+        "glm-5",
+        "glm-5.1",
+        "glm-5.2",
+        "kimi-k2-thinking",
+        "deepseek-v4-pro",
+        "deepseek-v4-flash",
+        "claude-opus-4-7",
+        "claude-opus-4-8",
+    ]
+    for model_id in family_derived:
+        if model_id not in all_configs:
+            all_configs[model_id] = resolve_model_config(model_id)
+
     # This will raise ValidationError if any model is invalid
-    registry = EvalModelsRegistry(models=MODELS)
-    assert len(registry.models) == len(MODELS)
+    registry = EvalModelsRegistry(models=all_configs)
+    assert len(registry.models) == len(all_configs)
 
 
 def test_find_all_models():
     """Test that find_models_by_id works for all models."""
-    all_model_ids = list(MODELS.keys())
+    # All explicit model IDs + representative family-derived model IDs
+    all_model_ids = list(EXPLICIT_MODELS.keys()) + [
+        "glm-4.7",
+        "glm-5",
+        "glm-5.1",
+        "glm-5.2",
+        "kimi-k2-thinking",
+        "deepseek-v4-pro",
+        "deepseek-v4-flash",
+        "claude-opus-4-7",
+        "claude-opus-4-8",
+    ]
     result = find_models_by_id(all_model_ids)
 
     assert len(result) == len(all_model_ids)
@@ -260,7 +295,7 @@ def test_gpt_5_3_codex_config():
 
 def test_glm_5_config():
     """Test that glm-5 has correct configuration."""
-    model = MODELS["glm-5"]
+    model = resolve_model_config("glm-5")
 
     assert model["id"] == "glm-5"
     assert model["display_name"] == "GLM-5"
@@ -270,7 +305,7 @@ def test_glm_5_config():
 
 def test_glm_5_1_config():
     """Test that glm-5.1 has correct configuration."""
-    model = MODELS["glm-5.1"]
+    model = resolve_model_config("glm-5.1")
 
     assert model["id"] == "glm-5.1"
     assert model["display_name"] == "GLM-5.1"
@@ -278,6 +313,17 @@ def test_glm_5_1_config():
     assert model["llm_config"]["disable_vision"] is True
 
 
+def test_glm_5_2_config():
+    """Test that glm-5.2 resolves automatically via the glm family pattern."""
+    model = resolve_model_config("glm-5.2")
+
+    assert model["id"] == "glm-5.2"
+    assert model["display_name"] == "GLM-5.2"
+    assert model["llm_config"]["model"] == "litellm_proxy/openrouter/z-ai/glm-5.2"
+    assert model["llm_config"]["temperature"] == 0.0
+    assert model["llm_config"]["disable_vision"] is True
+
+
 # Tests for preflight check functionality
 
 
@@ -617,7 +663,7 @@ def test_trinity_large_thinking_config():
 
 def test_claude_opus_4_7_config():
     """Test that claude-opus-4-7 has correct configuration."""
-    model = MODELS["claude-opus-4-7"]
+    model = resolve_model_config("claude-opus-4-7")
 
     assert model["id"] == "claude-opus-4-7"
     assert model["display_name"] == "Claude Opus 4.7"
@@ -646,7 +692,7 @@ def test_gpt_5_5_config():
 
 def test_deepseek_v4_pro_config():
     """Test that deepseek-v4-pro has correct configuration."""
-    model = MODELS["deepseek-v4-pro"]
+    model = resolve_model_config("deepseek-v4-pro")
 
     assert model["id"] == "deepseek-v4-pro"
     assert model["display_name"] == "DeepSeek V4 Pro"
@@ -655,7 +701,7 @@ def test_deepseek_v4_pro_config():
 
 def test_deepseek_v4_flash_config():
     """Test that deepseek-v4-flash has correct configuration."""
-    model = MODELS["deepseek-v4-flash"]
+    model = resolve_model_config("deepseek-v4-flash")
 
     assert model["id"] == "deepseek-v4-flash"
     assert model["display_name"] == "DeepSeek V4 Flash"
@@ -711,7 +757,7 @@ def test_nemotron_3_ultra_550b_a55b_or_paid_config():
 
 def test_claude_opus_4_8_config():
     """Test that claude-opus-4-8 has correct configuration."""
-    model = MODELS["claude-opus-4-8"]
+    model = resolve_model_config("claude-opus-4-8")
 
     assert model["id"] == "claude-opus-4-8"
     assert model["display_name"] == "Claude Opus 4.8"

From 97451661c2ca53127955915fa6f2bd9a188b3844 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Fri, 19 Jun 2026 08:51:26 -0400
Subject: [PATCH 2/4] fix: preserve claude-4.6-opus proxy string from main
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Regression introduced during the refactor — the proxy string was
accidentally changed from claude-opus-4-6 to claude-4-6. Restored
to match main.

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .github/run-eval/resolve_model_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/run-eval/resolve_model_config.py b/.github/run-eval/resolve_model_config.py
index 50bee0badb..dbeef10add 100755
--- a/.github/run-eval/resolve_model_config.py
+++ b/.github/run-eval/resolve_model_config.py
@@ -189,7 +189,7 @@ def _resolve_family(model_id: str) -> dict[str, Any] | None:
         "id": "claude-4.6-opus",
         "display_name": "Claude 4.6 Opus",
         "llm_config": {
-            "model": "litellm_proxy/anthropic/claude-4-6",
+            "model": "litellm_proxy/anthropic/claude-opus-4-6",
             "temperature": 0.0,
         },
     },

From 933964d7af3190805a1d914713e51057abfc8e3f Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Fri, 19 Jun 2026 09:04:30 -0400
Subject: [PATCH 3/4] fix: use find_models_by_id in workflows for
 family-derived models

The setup-matrix and run-eval workflows imported MODELS directly and
checked membership against it, which excluded family-derived models
(e.g. deepseek-v4-flash, glm-5.2) that are not in EXPLICIT_MODELS but
resolve via family patterns. Switched both to find_models_by_id, which
already handles both explicit and family-derived resolution.

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .github/workflows/integration-runner.yml | 13 ++++++------
 .github/workflows/run-eval.yml           | 25 +++++++++++++++++-------
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/integration-runner.yml b/.github/workflows/integration-runner.yml
index d32ba0fcef..44a02d57e2 100644
--- a/.github/workflows/integration-runner.yml
+++ b/.github/workflows/integration-runner.yml
@@ -95,18 +95,17 @@ jobs:
                   import os
                   import sys
                   sys.path.insert(0, '.github/run-eval')
-                  from resolve_model_config import MODELS
+                  from resolve_model_config import find_models_by_id
 
                   model_ids = os.environ["MODEL_IDS"].split(",")
                   model_ids = [m.strip() for m in model_ids if m.strip()]
 
+                  # find_models_by_id exits with code 1 and prints a helpful
+                  # message if any model ID cannot be resolved.
+                  resolved = find_models_by_id(model_ids)
+
                   matrix = []
-                  for model_id in model_ids:
-                      if model_id not in MODELS:
-                          available = ", ".join(sorted(MODELS.keys()))
-                          print(f"Error: Model ID '{model_id}' not found. Available: {available}", file=sys.stderr)
-                          sys.exit(1)
-                      model = MODELS[model_id]
+                  for model_id, model in zip(model_ids, resolved):
                       # Create run-suffix from model id (replace special chars with underscore)
                       run_suffix = model_id.replace("-", "_").replace(".", "_") + "_run"
                       matrix.append({
diff --git a/.github/workflows/run-eval.yml b/.github/workflows/run-eval.yml
index 273c7e774d..2642da610b 100644
--- a/.github/workflows/run-eval.yml
+++ b/.github/workflows/run-eval.yml
@@ -350,14 +350,25 @@ jobs:
                     MODELS_INPUT="$DEFAULT_MODEL"
                   fi
                   MODELS=$(printf '%s' "$MODELS_INPUT" | tr ', ' '\n' | sed '/^$/d' | paste -sd, -)
+
+                  # Validate model IDs using find_models_by_id (supports family-derived models)
                   ALLOWED_LIST=$(echo "$ALLOWED_MODEL_IDS_JSON" | jq -r '.[]')
-                  for MODEL in ${MODELS//,/ }; do
-                    if ! echo "$ALLOWED_LIST" | grep -Fx "$MODEL" >/dev/null; then
-                      echo "Model ID '$MODEL' not found in models.json" >&2
-                      echo "Available models: $(echo "$ALLOWED_LIST" | paste -sd, -)" >&2
-                      exit 1
-                    fi
-                  done
+                  MODEL_IDS_VALIDATED=$(MODELS="$MODELS" uv run python << 'EOF'
+                  import os, sys
+                  sys.path.insert(0, '.github/run-eval')
+                  from resolve_model_config import find_models_by_id
+                  models = os.environ.get("MODELS", "")
+                  model_ids = [m.strip() for m in models.split(",") if m.strip()]
+                  # find_models_by_id exits with code 1 and prints available models
+                  # if any ID cannot be resolved (including family-pattern matches).
+                  find_models_by_id(model_ids)
+                  print(",".join(model_ids))
+                  EOF
+                  )
+                  if [ $? -ne 0 ]; then
+                    echo "Available models: $(echo "$ALLOWED_LIST" | paste -sd, -)" >&2
+                    exit 1
+                  fi
 
                   # Sanitize values to avoid GITHUB_OUTPUT parse errors (e.g., raw SHAs)
                   SDK_SHA=$(printf '%s' "$SDK_SHA" | tr -d '\n\r')

From 7e2454b29c344db75f7ce091ed9bacf288d0c952 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Fri, 19 Jun 2026 09:07:38 -0400
Subject: [PATCH 4/4] fix: deep-copy llm_config in resolve_model_config
 explicit path

Matches the safety of the family-derived path, which already builds a
fresh llm_config dict. Prevents callers from mutating the global
EXPLICIT_MODELS entry.

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .github/run-eval/resolve_model_config.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/run-eval/resolve_model_config.py b/.github/run-eval/resolve_model_config.py
index dbeef10add..8245ef9314 100755
--- a/.github/run-eval/resolve_model_config.py
+++ b/.github/run-eval/resolve_model_config.py
@@ -458,7 +458,8 @@ def resolve_model_config(model_id: str) -> dict[str, Any]:
     entry.
     """
     if model_id in EXPLICIT_MODELS:
-        return dict(EXPLICIT_MODELS[model_id])
+        entry = EXPLICIT_MODELS[model_id]
+        return {**entry, "llm_config": dict(entry["llm_config"])}
 
     family = _resolve_family(model_id)
     if family is not None: