From ed47a4c54b94f343549c8a481c49388728e8ff35 Mon Sep 17 00:00:00 2001 From: Tonyhuang <129367165+tuofangzhe@users.noreply.github.com> Date: Sun, 10 May 2026 13:31:23 +0800 Subject: [PATCH] =?UTF-8?q?fix(openai-client):=20=E6=8C=89=E5=AD=90?= =?UTF-8?q?=E5=8F=98=E4=BD=93=E7=BB=86=E5=88=86=20reasoning=20=E5=AE=B6?= =?UTF-8?q?=E6=97=8F=E7=9A=84=20temperature=20=E5=89=A5=E7=A6=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 之前 _sanitize_body 用单一 prefix ("gpt-5.5",) 判定,导致两类问题: 1. 漏剥:gpt-5.4 / gpt-5.4-pro 是 reasoning 模型,也拒绝 temperature, 用户测这些模型直连 api.openai.com 时拿 HTTP 400 全 0 分 (实测样本 web_data/jobs/openai/uXQaEDP-.json,Apr 2026) 2. 一旦扩成宽 prefix 又会误剥:gpt-5.4-mini / gpt-5.4-nano 是 非 reasoning 子变体,实际接受 temperature(mem0#4738); gpt-5.1 系列在 reasoning_effort=none 时也接受(litellm#27351) 改法:把元组重构成"家族 prefix + 子变体例外"两层匹配,新增 _rejects_temperature() helper,沿用现有点-横杠归一化保持一致。 覆盖测试 14 个边界(参数化),包括子变体 -mini/-nano 反向豁免、 dated snapshot (gpt-5.5-2026-04-23) 命中、写法变体归一化。 --- src/relay_detector/protocols/openai/client.py | 42 +++++++++++++++++-- tests/test_openai_phase2.py | 42 +++++++++++++++++-- 2 files changed, 77 insertions(+), 7 deletions(-) diff --git a/src/relay_detector/protocols/openai/client.py b/src/relay_detector/protocols/openai/client.py index e8a8d47..38c4228 100644 --- a/src/relay_detector/protocols/openai/client.py +++ b/src/relay_detector/protocols/openai/client.py @@ -22,9 +22,25 @@ RETRYABLE_STATUS = {429, 500, 502, 503, 504} MAX_BACKOFF_S = 30.0 MAX_RETRIES = 3 -DEFAULT_TEMPERATURE_ONLY_PREFIXES = ( - "gpt-5.5", +# Reasoning-tier model families that REJECT sampling parameters. Sending +# temperature (even temperature=0) returns HTTP 400 "Unsupported value: +# 'temperature' does not support 0 with this model. Only the default (1) +# value is supported." The safe action is to strip the field entirely. +# +# IMPORTANT: -mini / -nano sub-variants of these families are distinct +# (non-reasoning) models that DO accept temperature — they must NOT be +# stripped, otherwise consistency / model_consistency detectors lose +# determinism and start flapping. +# +# Sources (May 2026): +# - https://community.openai.com/t/temperature-in-gpt-5-models/1337133 +# - https://github.com/mem0ai/mem0/issues/4738 (gpt-5.4-mini accepts temp) +# - https://github.com/BerriAI/litellm/issues/27351 (gpt-5.1 reasoning_effort=none accepts temp) +_TEMPERATURE_REJECTING_FAMILIES = ( + "gpt-5.5", # 5.5 / 5.5-pro / 5.5-2026-04-23 (no -mini/-nano variant exists yet) + "gpt-5.4", # 5.4 / 5.4-pro — but NOT 5.4-mini / 5.4-nano ) +_TEMPERATURE_OK_SUB_VARIANTS = ("-mini", "-nano") def normalize_openai_base_url(base_url: str) -> str: @@ -34,9 +50,29 @@ def normalize_openai_base_url(base_url: str) -> str: return normalized + "/v1" +def _normalize_openai_model_id(model_id: str) -> str: + """Same dot/underscore→hyphen canonicalization used by models_match in + config.py, so users typing `gpt-5_4` or `gpt-5-4` map to the same + family bucket as `gpt-5.4`.""" + return model_id.replace(".", "-").replace("_", "-") + + +def _rejects_temperature(model_id: str) -> bool: + normalized = _normalize_openai_model_id(model_id) + for family in _TEMPERATURE_REJECTING_FAMILIES: + nf = _normalize_openai_model_id(family) + if not normalized.startswith(nf): + continue + tail = normalized[len(nf):] + if any(tail.startswith(suf) for suf in _TEMPERATURE_OK_SUB_VARIANTS): + return False + return True + return False + + def _sanitize_body(body: dict[str, Any]) -> dict[str, Any]: model = body.get("model") - if isinstance(model, str) and model.startswith(DEFAULT_TEMPERATURE_ONLY_PREFIXES): + if isinstance(model, str) and _rejects_temperature(model): body.pop("temperature", None) return body diff --git a/tests/test_openai_phase2.py b/tests/test_openai_phase2.py index 937f8b8..4c8d248 100644 --- a/tests/test_openai_phase2.py +++ b/tests/test_openai_phase2.py @@ -193,14 +193,39 @@ def handler(request: httpx.Request) -> httpx.Response: assert "temperature" not in captured["body"] +@pytest.mark.parametrize( + "model,expect_stripped", + [ + # Reasoning-tier models that reject temperature (HTTP 400 from OpenAI) + ("gpt-5.5", True), + ("gpt-5.5-pro", True), + ("gpt-5.5-2026-04-23", True), + ("gpt-5.4", True), + ("gpt-5.4-pro", True), + # Sub-variants of reasoning families ARE distinct (non-reasoning) + # models that accept temperature — must NOT be stripped + ("gpt-5.4-mini", False), + ("gpt-5.4-nano", False), + # Other GPT-5 lines accept temperature (5.1 with reasoning_effort=none) + ("gpt-5.1", False), + ("gpt-5.1-mini", False), + # Legacy / non-reasoning families — never stripped + ("gpt-4o", False), + ("gpt-4o-mini", False), + # Dot/hyphen/underscore canonicalization: same family bucket + ("gpt-5-4", True), + ("gpt-5_4", True), + ("gpt-5-4-mini", False), + ], +) @pytest.mark.asyncio -async def test_openai_client_keeps_temperature_for_other_models(): +async def test_openai_client_temperature_strip_per_model(model: str, expect_stripped: bool): captured: dict[str, Any] = {} def handler(request: httpx.Request) -> httpx.Response: import json as _json captured["body"] = _json.loads(request.content) - return httpx.Response(200, json=_chat_payload(model="gpt-5.4")) + return httpx.Response(200, json=_chat_payload(model=model)) transport = httpx.MockTransport(handler) client = OpenAIChatClient("https://api.openai.com", "sk-test") @@ -211,14 +236,23 @@ def handler(request: httpx.Request) -> httpx.Response: ) try: await client.chat_completions_create( - model="gpt-5.4", + model=model, temperature=0, messages=[{"role": "user", "content": "hi"}], ) finally: await client.aclose() - assert captured["body"]["temperature"] == 0 + if expect_stripped: + assert "temperature" not in captured["body"], ( + f"{model} is a reasoning-tier model that rejects temperature — " + "client must strip it before sending" + ) + else: + assert captured["body"].get("temperature") == 0, ( + f"{model} accepts temperature — client must NOT strip it " + "(stripping would lose detector determinism)" + ) def test_openai_detectors_use_core_base_classes():