From ed47a4c54b94f343549c8a481c49388728e8ff35 Mon Sep 17 00:00:00 2001
From: Tonyhuang <129367165+tuofangzhe@users.noreply.github.com>
Date: Sun, 10 May 2026 13:31:23 +0800
Subject: [PATCH] =?UTF-8?q?fix(openai-client):=20=E6=8C=89=E5=AD=90?=
 =?UTF-8?q?=E5=8F=98=E4=BD=93=E7=BB=86=E5=88=86=20reasoning=20=E5=AE=B6?=
 =?UTF-8?q?=E6=97=8F=E7=9A=84=20temperature=20=E5=89=A5=E7=A6=BB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

之前 _sanitize_body 用单一 prefix ("gpt-5.5",) 判定,导致两类问题:
1. 漏剥:gpt-5.4 / gpt-5.4-pro 是 reasoning 模型,也拒绝 temperature,
   用户测这些模型直连 api.openai.com 时拿 HTTP 400 全 0 分
   (实测样本 web_data/jobs/openai/uXQaEDP-.json,Apr 2026)
2. 一旦扩成宽 prefix 又会误剥:gpt-5.4-mini / gpt-5.4-nano 是
   非 reasoning 子变体,实际接受 temperature(mem0#4738);
   gpt-5.1 系列在 reasoning_effort=none 时也接受(litellm#27351)

改法:把元组重构成"家族 prefix + 子变体例外"两层匹配,新增
_rejects_temperature() helper,沿用现有点-横杠归一化保持一致。

覆盖测试 14 个边界(参数化),包括子变体 -mini/-nano 反向豁免、
dated snapshot (gpt-5.5-2026-04-23) 命中、写法变体归一化。
---
 src/relay_detector/protocols/openai/client.py | 42 +++++++++++++++++--
 tests/test_openai_phase2.py                   | 42 +++++++++++++++++--
 2 files changed, 77 insertions(+), 7 deletions(-)

diff --git a/src/relay_detector/protocols/openai/client.py b/src/relay_detector/protocols/openai/client.py
index e8a8d47..38c4228 100644
--- a/src/relay_detector/protocols/openai/client.py
+++ b/src/relay_detector/protocols/openai/client.py
@@ -22,9 +22,25 @@
 RETRYABLE_STATUS = {429, 500, 502, 503, 504}
 MAX_BACKOFF_S = 30.0
 MAX_RETRIES = 3
-DEFAULT_TEMPERATURE_ONLY_PREFIXES = (
-    "gpt-5.5",
+# Reasoning-tier model families that REJECT sampling parameters. Sending
+# temperature (even temperature=0) returns HTTP 400 "Unsupported value:
+# 'temperature' does not support 0 with this model. Only the default (1)
+# value is supported." The safe action is to strip the field entirely.
+#
+# IMPORTANT: -mini / -nano sub-variants of these families are distinct
+# (non-reasoning) models that DO accept temperature — they must NOT be
+# stripped, otherwise consistency / model_consistency detectors lose
+# determinism and start flapping.
+#
+# Sources (May 2026):
+#   - https://community.openai.com/t/temperature-in-gpt-5-models/1337133
+#   - https://github.com/mem0ai/mem0/issues/4738 (gpt-5.4-mini accepts temp)
+#   - https://github.com/BerriAI/litellm/issues/27351 (gpt-5.1 reasoning_effort=none accepts temp)
+_TEMPERATURE_REJECTING_FAMILIES = (
+    "gpt-5.5",  # 5.5 / 5.5-pro / 5.5-2026-04-23 (no -mini/-nano variant exists yet)
+    "gpt-5.4",  # 5.4 / 5.4-pro — but NOT 5.4-mini / 5.4-nano
 )
+_TEMPERATURE_OK_SUB_VARIANTS = ("-mini", "-nano")
 
 
 def normalize_openai_base_url(base_url: str) -> str:
@@ -34,9 +50,29 @@ def normalize_openai_base_url(base_url: str) -> str:
     return normalized + "/v1"
 
 
+def _normalize_openai_model_id(model_id: str) -> str:
+    """Same dot/underscore→hyphen canonicalization used by models_match in
+    config.py, so users typing `gpt-5_4` or `gpt-5-4` map to the same
+    family bucket as `gpt-5.4`."""
+    return model_id.replace(".", "-").replace("_", "-")
+
+
+def _rejects_temperature(model_id: str) -> bool:
+    normalized = _normalize_openai_model_id(model_id)
+    for family in _TEMPERATURE_REJECTING_FAMILIES:
+        nf = _normalize_openai_model_id(family)
+        if not normalized.startswith(nf):
+            continue
+        tail = normalized[len(nf):]
+        if any(tail.startswith(suf) for suf in _TEMPERATURE_OK_SUB_VARIANTS):
+            return False
+        return True
+    return False
+
+
 def _sanitize_body(body: dict[str, Any]) -> dict[str, Any]:
     model = body.get("model")
-    if isinstance(model, str) and model.startswith(DEFAULT_TEMPERATURE_ONLY_PREFIXES):
+    if isinstance(model, str) and _rejects_temperature(model):
         body.pop("temperature", None)
     return body
 
diff --git a/tests/test_openai_phase2.py b/tests/test_openai_phase2.py
index 937f8b8..4c8d248 100644
--- a/tests/test_openai_phase2.py
+++ b/tests/test_openai_phase2.py
@@ -193,14 +193,39 @@ def handler(request: httpx.Request) -> httpx.Response:
     assert "temperature" not in captured["body"]
 
 
+@pytest.mark.parametrize(
+    "model,expect_stripped",
+    [
+        # Reasoning-tier models that reject temperature (HTTP 400 from OpenAI)
+        ("gpt-5.5", True),
+        ("gpt-5.5-pro", True),
+        ("gpt-5.5-2026-04-23", True),
+        ("gpt-5.4", True),
+        ("gpt-5.4-pro", True),
+        # Sub-variants of reasoning families ARE distinct (non-reasoning)
+        # models that accept temperature — must NOT be stripped
+        ("gpt-5.4-mini", False),
+        ("gpt-5.4-nano", False),
+        # Other GPT-5 lines accept temperature (5.1 with reasoning_effort=none)
+        ("gpt-5.1", False),
+        ("gpt-5.1-mini", False),
+        # Legacy / non-reasoning families — never stripped
+        ("gpt-4o", False),
+        ("gpt-4o-mini", False),
+        # Dot/hyphen/underscore canonicalization: same family bucket
+        ("gpt-5-4", True),
+        ("gpt-5_4", True),
+        ("gpt-5-4-mini", False),
+    ],
+)
 @pytest.mark.asyncio
-async def test_openai_client_keeps_temperature_for_other_models():
+async def test_openai_client_temperature_strip_per_model(model: str, expect_stripped: bool):
     captured: dict[str, Any] = {}
 
     def handler(request: httpx.Request) -> httpx.Response:
         import json as _json
         captured["body"] = _json.loads(request.content)
-        return httpx.Response(200, json=_chat_payload(model="gpt-5.4"))
+        return httpx.Response(200, json=_chat_payload(model=model))
 
     transport = httpx.MockTransport(handler)
     client = OpenAIChatClient("https://api.openai.com", "sk-test")
@@ -211,14 +236,23 @@ def handler(request: httpx.Request) -> httpx.Response:
     )
     try:
         await client.chat_completions_create(
-            model="gpt-5.4",
+            model=model,
             temperature=0,
             messages=[{"role": "user", "content": "hi"}],
         )
     finally:
         await client.aclose()
 
-    assert captured["body"]["temperature"] == 0
+    if expect_stripped:
+        assert "temperature" not in captured["body"], (
+            f"{model} is a reasoning-tier model that rejects temperature — "
+            "client must strip it before sending"
+        )
+    else:
+        assert captured["body"].get("temperature") == 0, (
+            f"{model} accepts temperature — client must NOT strip it "
+            "(stripping would lose detector determinism)"
+        )
 
 
 def test_openai_detectors_use_core_base_classes():