OpenHands · neubig · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026
diff --git a/.github/run-eval/resolve_model_config.py b/.github/run-eval/resolve_model_config.py
@@ -14,6 +14,7 @@
 
 import json
 import os
+import re
 import signal
 import sys
 import time
@@ -44,8 +45,81 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
 SDK_ONLY_PARAMS = {"disable_vision", "inline_image_urls"}
 
 
-# Model configurations dictionary
-MODELS = {
+def _humanize_parts(model_id: str, prefix: str) -> str:
+    """Capitalize each hyphen-separated part after stripping ``prefix``.
+
+    Example: ``_humanize_parts("kimi-k2-thinking", "kimi-")`` -> ``"K2 Thinking"``.
+    """
+    rest = model_id.removeprefix(prefix)
+    return " ".join(part.capitalize() for part in rest.split("-"))
+
+
+# Family patterns for models whose config is fully derivable from the model ID.
+# A new version in an existing family (e.g. ``glm-5.2``) resolves automatically
+# without an explicit entry. First match wins.
+#
+# Each family defines:
+#   proxy_prefix  – LiteLLM proxy path prefix (model string = proxy_prefix + model_id)
+#   display_name  – callable(model_id) -> human-readable name
+#   llm_config    – default llm_config fields (temperature, top_p, disable_vision, …)
+FAMILIES: list[tuple[re.Pattern, dict[str, Any]]] = [
+    (
+        re.compile(r"^glm-"),
+        {
+            "proxy_prefix": "litellm_proxy/openrouter/z-ai/",
+            "display_name": lambda mid: "GLM-" + mid.removeprefix("glm-"),
+            "llm_config": {
+                "temperature": 0.0,
+                # OpenRouter GLM models are text-only despite LiteLLM reporting
+                # vision support. See #2110 (GLM-5), #1898 (GLM-4.7).
+                "disable_vision": True,
+            },
+        },
+    ),
+    (
+        re.compile(r"^kimi-k"),
+        {
+            "proxy_prefix": "litellm_proxy/moonshot/",
+            "display_name": lambda mid: "Kimi " + _humanize_parts(mid, "kimi-"),
+            "llm_config": {"temperature": 1.0},
+        },
+    ),
+    (
+        re.compile(r"^deepseek-"),
+        {
+            "proxy_prefix": "litellm_proxy/deepseek/",
+            "display_name": lambda mid: "DeepSeek " + _humanize_parts(mid, "deepseek-"),
+            "llm_config": {},
+        },
+    ),
+    (
+        re.compile(r"^claude-opus-"),
+        {
+            "proxy_prefix": "litellm_proxy/anthropic/",
+            "display_name": lambda mid: "Claude Opus "
+            + mid.removeprefix("claude-opus-").replace("-", "."),
+            "llm_config": {},
+        },
+    ),
+]
+
+
+def _resolve_family(model_id: str) -> dict[str, Any] | None:
+    """Return a copy of the matching family's defaults, or ``None``."""
+    for pattern, family in FAMILIES:
+        if pattern.match(model_id):
+            return {
+                "proxy_prefix": family["proxy_prefix"],
+                "display_name": family["display_name"](model_id),
+                "llm_config": dict(family["llm_config"]),
+            }
+    return None
+
+
+# Explicit model entries for models that **deviate** from their family pattern
+# (variant proxy strings, model-specific quirks, or families without a clean
+# pattern). Models that match a FAMILIES pattern do NOT need to be listed here.
+EXPLICIT_MODELS: dict[str, dict[str, Any]] = {
     "claude-sonnet-4-5-20250929": {
         "id": "claude-sonnet-4-5-20250929",
         "display_name": "Claude Sonnet 4.5",
@@ -54,14 +128,21 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
             "temperature": 0.0,
         },
     },
-    "kimi-k2-thinking": {
-        "id": "kimi-k2-thinking",
-        "display_name": "Kimi K2 Thinking",
+    # kimi-k2.6: family default + inline_image_urls quirk
+    # https://www.kimi.com/blog/kimi-k2-6
+    "kimi-k2.6": {
+        "id": "kimi-k2.6",
+        "display_name": "Kimi K2.6",
         "llm_config": {
-            "model": "litellm_proxy/moonshot/kimi-k2-thinking",
+            "model": "litellm_proxy/moonshot/kimi-k2.6",
             "temperature": 1.0,
+            # Moonshot's public Kimi API rejects http(s) image URLs and only
+            # accepts base64 ``data:`` URLs. This makes the SDK fetch each
+            # image URL and inline it as base64 before sending. See #3155.
+            "inline_image_urls": True,
         },
     },
+    # kimi-k2.5: family default + top_p override
     # https://www.kimi.com/blog/kimi-k2-5.html
     "kimi-k2.5": {
         "id": "kimi-k2.5",
@@ -72,20 +153,6 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
             "top_p": 0.95,
         },
     },
-    # https://www.kimi.com/blog/kimi-k2-6
-    "kimi-k2.6": {
-        "id": "kimi-k2.6",
-        "display_name": "Kimi K2.6",
-        "llm_config": {
-            "model": "litellm_proxy/moonshot/kimi-k2.6",
-            "temperature": 1.0,
-            # Moonshot's public Kimi API rejects http(s) image URLs and only
-            # accepts base64 ``data:`` URLs. This makes the SDK fetch each
-            # image URL and inline it as base64 before sending. See #3155.
-            "inline_image_urls": True,
-        },
-    },
-    # https://www.alibabacloud.com/help/en/model-studio/deep-thinking
     "qwen3-max-thinking": {
         "id": "qwen3-max-thinking",
         "display_name": "Qwen3 Max Thinking",
@@ -126,21 +193,6 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
             "temperature": 0.0,
         },
     },
-    "claude-opus-4-7": {
-        "id": "claude-opus-4-7",
-        "display_name": "Claude Opus 4.7",
-        "llm_config": {
-            "model": "litellm_proxy/anthropic/claude-opus-4-7",
-        },
-    },
-    # https://www.anthropic.com/news/claude-opus-4-8
-    "claude-opus-4-8": {
-        "id": "claude-opus-4-8",
-        "display_name": "Claude Opus 4.8",
-        "llm_config": {
-            "model": "litellm_proxy/anthropic/claude-opus-4-8",
-        },
-    },
     # https://www.anthropic.com/news/claude-fable-5
     "claude-fable-5": {
         "id": "claude-fable-5",
@@ -271,22 +323,12 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
             "top_p": 0.95,
         },
     },
+    # deepseek-v3.2-reasoner: variant proxy string (deepseek-reasoner)
     "deepseek-v3.2-reasoner": {
         "id": "deepseek-v3.2-reasoner",
         "display_name": "DeepSeek V3.2 Reasoner",
         "llm_config": {"model": "litellm_proxy/deepseek/deepseek-reasoner"},
     },
-    # https://api-docs.deepseek.com/news/news260424
-    "deepseek-v4-pro": {
-        "id": "deepseek-v4-pro",
-        "display_name": "DeepSeek V4 Pro",
-        "llm_config": {"model": "litellm_proxy/deepseek/deepseek-v4-pro"},
-    },
-    "deepseek-v4-flash": {
-        "id": "deepseek-v4-flash",
-        "display_name": "DeepSeek V4 Flash",
-        "llm_config": {"model": "litellm_proxy/deepseek/deepseek-v4-flash"},
-    },
     "qwen-3-coder": {
         "id": "qwen-3-coder",
         "display_name": "Qwen 3 Coder",
@@ -303,36 +345,6 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
             "temperature": 0.0,
         },
     },
-    "glm-4.7": {
-        "id": "glm-4.7",
-        "display_name": "GLM-4.7",
-        "llm_config": {
-            "model": "litellm_proxy/openrouter/z-ai/glm-4.7",
-            "temperature": 0.0,
-            # OpenRouter glm-4.7 is text-only despite LiteLLM reporting vision support
-            "disable_vision": True,
-        },
-    },
-    "glm-5": {
-        "id": "glm-5",
-        "display_name": "GLM-5",
-        "llm_config": {
-            "model": "litellm_proxy/openrouter/z-ai/glm-5",
-            "temperature": 0.0,
-            # OpenRouter glm-5 is text-only despite LiteLLM reporting vision support
-            "disable_vision": True,
-        },
-    },
-    "glm-5.1": {
-        "id": "glm-5.1",
-        "display_name": "GLM-5.1",
-        "llm_config": {
-            "model": "litellm_proxy/openrouter/z-ai/glm-5.1",
-            "temperature": 0.0,
-            # OpenRouter glm-5.1 is text-only despite LiteLLM reporting vision support
-            "disable_vision": True,
-        },
-    },
     "qwen3-coder-next": {
         "id": "qwen3-coder-next",
         "display_name": "Qwen3 Coder Next",
@@ -434,6 +446,40 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
 }
 
 
+def resolve_model_config(model_id: str) -> dict[str, Any]:
+    """Resolve a model ID to its full configuration.
+
+    Models that match a ``FAMILIES`` pattern are derived automatically from
+    the family defaults — no explicit entry needed. Models that deviate from
+    their family pattern (variant proxy strings, quirks) or belong to a family
+    without a clean pattern must have an explicit entry in ``EXPLICIT_MODELS``.
+
+    Raises ``KeyError`` if the model ID matches no family and has no explicit
+    entry.
+    """
+    if model_id in EXPLICIT_MODELS:
+        entry = EXPLICIT_MODELS[model_id]
+        return {**entry, "llm_config": dict(entry["llm_config"])}
+
+    family = _resolve_family(model_id)
+    if family is not None:
+        llm_config = dict(family["llm_config"])
+        llm_config["model"] = family["proxy_prefix"] + model_id
+        return {
+            "id": model_id,
+            "display_name": family["display_name"],
+            "llm_config": llm_config,
+        }
+
+    raise KeyError(model_id)
+
+
+# Backward-compatible dict of explicitly-registered models. Models that are
+# derived purely from a family pattern (e.g. glm-5, kimi-k2-thinking) are NOT
+# listed here but still resolve via ``find_models_by_id`` / ``resolve_model_config``.
+MODELS: dict[str, dict[str, Any]] = dict(EXPLICIT_MODELS)
+
+
 def error_exit(msg: str, exit_code: int = 1) -> None:
     """Print error message and exit."""
     print(f"ERROR: {msg}", file=sys.stderr)
@@ -451,6 +497,10 @@ def get_required_env(key: str) -> str:
 def find_models_by_id(model_ids: list[str]) -> list[dict]:
     """Find models by ID. Fails fast on missing ID.
 
+    Checks the ``MODELS`` dict first (which may be patched in tests), then
+    falls back to ``resolve_model_config`` for family-pattern-derived models
+    that are not explicitly registered.
+
     Args:
         model_ids: List of model IDs to find
 
@@ -462,12 +512,19 @@ def find_models_by_id(model_ids: list[str]) -> list[dict]:
     """
     resolved = []
     for model_id in model_ids:
-        if model_id not in MODELS:
-            available = ", ".join(sorted(MODELS.keys()))
+        if model_id in MODELS:
+            resolved.append(MODELS[model_id])
+            continue
+        try:
+            resolved.append(resolve_model_config(model_id))
+        except KeyError:
+            available = ", ".join(sorted(EXPLICIT_MODELS.keys()))
             error_exit(
-                f"Model ID '{model_id}' not found. Available models: {available}"
+                f"Model ID '{model_id}' not found. "
+                f"Available explicit models: {available}. "
+                f"Models matching a family pattern (e.g. glm-*) "
+                f"also resolve automatically."
             )
-        resolved.append(MODELS[model_id])
     return resolved
 
 

diff --git a/.github/workflows/integration-runner.yml b/.github/workflows/integration-runner.yml
@@ -95,18 +95,17 @@ jobs:
                   import os
                   import sys
                   sys.path.insert(0, '.github/run-eval')
-                  from resolve_model_config import MODELS
+                  from resolve_model_config import find_models_by_id
 
                   model_ids = os.environ["MODEL_IDS"].split(",")
                   model_ids = [m.strip() for m in model_ids if m.strip()]
 
+                  # find_models_by_id exits with code 1 and prints a helpful
+                  # message if any model ID cannot be resolved.
+                  resolved = find_models_by_id(model_ids)
+
                   matrix = []
-                  for model_id in model_ids:
-                      if model_id not in MODELS:
-                          available = ", ".join(sorted(MODELS.keys()))
-                          print(f"Error: Model ID '{model_id}' not found. Available: {available}", file=sys.stderr)
-                          sys.exit(1)
-                      model = MODELS[model_id]
+                  for model_id, model in zip(model_ids, resolved):
                       # Create run-suffix from model id (replace special chars with underscore)
                       run_suffix = model_id.replace("-", "_").replace(".", "_") + "_run"
                       matrix.append({

diff --git a/.github/workflows/run-eval.yml b/.github/workflows/run-eval.yml
@@ -350,14 +350,25 @@ jobs:
                     MODELS_INPUT="$DEFAULT_MODEL"
                   fi
                   MODELS=$(printf '%s' "$MODELS_INPUT" | tr ', ' '\n' | sed '/^$/d' | paste -sd, -)
+
+                  # Validate model IDs using find_models_by_id (supports family-derived models)
                   ALLOWED_LIST=$(echo "$ALLOWED_MODEL_IDS_JSON" | jq -r '.[]')
-                  for MODEL in ${MODELS//,/ }; do
-                    if ! echo "$ALLOWED_LIST" | grep -Fx "$MODEL" >/dev/null; then
-                      echo "Model ID '$MODEL' not found in models.json" >&2
-                      echo "Available models: $(echo "$ALLOWED_LIST" | paste -sd, -)" >&2
-                      exit 1
-                    fi
-                  done
+                  MODEL_IDS_VALIDATED=$(MODELS="$MODELS" uv run python << 'EOF'
+                  import os, sys
+                  sys.path.insert(0, '.github/run-eval')
+                  from resolve_model_config import find_models_by_id
+                  models = os.environ.get("MODELS", "")
+                  model_ids = [m.strip() for m in models.split(",") if m.strip()]
+                  # find_models_by_id exits with code 1 and prints available models
+                  # if any ID cannot be resolved (including family-pattern matches).
+                  find_models_by_id(model_ids)
+                  print(",".join(model_ids))
+                  EOF
+                  )
+                  if [ $? -ne 0 ]; then
+                    echo "Available models: $(echo "$ALLOWED_LIST" | paste -sd, -)" >&2
+                    exit 1
+                  fi
 
                   # Sanitize values to avoid GITHUB_OUTPUT parse errors (e.g., raw SHAs)
                   SDK_SHA=$(printf '%s' "$SDK_SHA" | tr -d '\n\r')