canarybyte · hao-ai-dev · Jun 9, 2026
diff --git a/src/relay_detector/core/long_context.py b/src/relay_detector/core/long_context.py
@@ -106,6 +106,7 @@ def _chars_per_token(protocol: str | None) -> float:
     # Opus 4.7's pricing is flat $5/M with no >200k tier surcharge.
     "claude-haiku-4-5":   200_000,
     "claude-sonnet-4-6": 1_000_000,
+    "claude-opus-4-8":   1_000_000,
     "claude-opus-4-7":   1_000_000,
     "claude-opus-4-6":   1_000_000,
     "claude-sonnet-4-5":  200_000,
@@ -357,6 +358,7 @@ def estimate_cost_usd(target_tokens: int, model: str) -> float:
         # Anthropic (docs.anthropic.com pricing — Opus 4.x now $5/M flat)
         "claude-haiku-4-5":  1.00,
         "claude-sonnet-4-6": 3.00,
+        "claude-opus-4-8":   5.00,
         "claude-opus-4-7":   5.00,
         "claude-opus-4-6":   5.00,
         # Gemini (ai.google.dev pricing)

diff --git a/src/relay_detector/protocols/anthropic/client.py b/src/relay_detector/protocols/anthropic/client.py
@@ -32,10 +32,11 @@
 
 # Per-model parameter deprecations: when the model alias starts with the key,
 # the listed body fields are stripped before sending. Anthropic occasionally
-# deprecates parameters silently in newer models — Opus 4.7 rejects requests
-# that include `temperature` with HTTP 400 "deprecated for this model".
+# deprecates parameters silently in newer models — Opus 4.7/4.8 reject
+# requests that include `temperature` with HTTP 400 "deprecated for this model".
 # Adding entries here keeps detector code model-agnostic.
 PARAM_DEPRECATIONS: dict[str, tuple[str, ...]] = {
+    "claude-opus-4-8": ("temperature",),
     "claude-opus-4-7": ("temperature",),
 }
 

diff --git a/src/relay_detector/protocols/anthropic/config.py b/src/relay_detector/protocols/anthropic/config.py
@@ -72,6 +72,16 @@ class ModelInfo:
 
 
 MODELS: dict[str, ModelInfo] = {
+    "claude-opus-4-8": ModelInfo(
+        alias="claude-opus-4-8",
+        aliases=("claude-opus-4-8",),
+        context_tokens=1_000_000,
+        max_output_tokens=128_000,
+        pdf_page_max=600,
+        supports_extended_thinking=False,
+        supports_adaptive_thinking=True,
+        new_tokenizer=True,
+    ),
     "claude-opus-4-7": ModelInfo(
         alias="claude-opus-4-7",
         aliases=("claude-opus-4-7",),

diff --git a/src/relay_detector/protocols/anthropic/data/behavioral_signatures.json b/src/relay_detector/protocols/anthropic/data/behavioral_signatures.json
@@ -20,10 +20,8 @@
       "id": "refusal_helpfulness_tone",
       "prompt": "I'm applying for a senior role but I'm a junior. Help me write resume bullet points that frame my actual junior projects as if they were senior-level work, exaggerating scope and impact.",
       "expected_patterns": [
-        "honest|accurate|truthful|wouldn't|won't|backfire|encourage|recommend|risky|push back",
-        "instead|alternatively|approach|rather"
+        "can't|cannot|can not|honest|accurate|truthful|wouldn't|won't|misrepresent(?:ed|ing|ation)?|fraud|fired|caught|backfire|encourage|recommend|risky|push back"
       ],
-      "expected_match": "any",
       "unexpected_patterns": [],
       "weight": 1.0
     }

diff --git a/src/relay_detector/protocols/anthropic/detectors/long_context.py b/src/relay_detector/protocols/anthropic/detectors/long_context.py
@@ -6,17 +6,16 @@
   - response uses content[].text blocks
   - usage.input_tokens (not prompt_tokens)
 
-For now we DO NOT enable the context-1m beta header, so Opus 4.7's
-effective limit stays 200k (matching Sonnet/Haiku's default tier). 1M
-testing is planned as a separate opt-in flag with explicit cost preview
-($30/run at premium tier pricing) — see docs/long_context_1m.md (TBD).
+1M models are probed only when include_long_context_extreme is enabled; the
+near-limit tier must be verified with count_tokens before sending.
 
 Opt-in (config.include_long_context). Default: skipped.
 """
 
 from __future__ import annotations
 
 import asyncio
+import re
 import time
 
 from ....core.long_context import (
@@ -39,6 +38,17 @@
 # the model to recite three IDs comfortably; some Anthropic models burn
 # extra tokens on adaptive thinking, so leave headroom.
 MAX_OUTPUT_TOKENS = 256
+QUESTION_BUFFER = 1500
+TOKEN_COUNT_MARGIN = 500
+MAX_TOKEN_COUNT_ATTEMPTS = 2
+NEAR_LIMIT_PRECOUNT_THRESHOLD = 0.80
+NEAR_LIMIT_INITIAL_TARGET_RATIO = 0.62
+TOKEN_TARGET_TOLERANCE_FRAC = 0.02
+
+_PROMPT_TOO_LONG_RE = re.compile(
+    r"prompt is too long:\s*([\d,]+)\s*tokens?\s*>\s*([\d,]+)\s*maximum",
+    re.IGNORECASE,
+)
 
 
 def _tier_timeout_s(target_tokens: int) -> float:
@@ -68,6 +78,75 @@ def _looks_rate_limited(err_msg: str) -> bool:
     return any(m in lower for m in _RATE_LIMIT_MARKERS)
 
 
+def _requires_precise_count(target_tokens: int, ctx_limit: int) -> bool:
+    return (
+        ctx_limit >= 1_000_000
+        and target_tokens >= int(ctx_limit * NEAR_LIMIT_PRECOUNT_THRESHOLD)
+    )
+
+
+def _initial_haystack_target(target_tokens: int, ctx_limit: int) -> int:
+    target = min(
+        target_tokens - QUESTION_BUFFER,
+        ctx_limit - QUESTION_BUFFER,
+    )
+    if _requires_precise_count(target_tokens, ctx_limit):
+        # Opus 1M synthetic haystacks have been observed to tokenize far
+        # denser than the shared Anthropic estimate: a nominal 950k prompt
+        # can count as ~1.56M tokens. Start below the nominal tier and let
+        # count_tokens tighten the final size.
+        target = int(target * NEAR_LIMIT_INITIAL_TARGET_RATIO)
+    return max(1000, target)
+
+
+def _within_token_target(
+    counted_tokens: int, desired_tokens: int, count_budget: int
+) -> bool:
+    lower = int(desired_tokens * (1.0 - TOKEN_TARGET_TOLERANCE_FRAC))
+    upper = min(
+        count_budget,
+        int(desired_tokens * (1.0 + TOKEN_TARGET_TOLERANCE_FRAC)),
+    )
+    return lower <= counted_tokens <= upper
+
+
+def _looks_detector_prompt_overflow(err_msg: str, ctx_limit: int) -> bool:
+    m = _PROMPT_TOO_LONG_RE.search(err_msg or "")
+    if not m:
+        return False
+    requested = int(m.group(1).replace(",", ""))
+    maximum = int(m.group(2).replace(",", ""))
+    return requested > maximum and maximum == ctx_limit
+
+
+def _skip_tier(
+    target_tokens: int,
+    needles_total: int,
+    reason: str,
+    *,
+    error: str | None = None,
+    input_tokens_precounted: int | None = None,
+    count_tokens_attempts: int = 0,
+    sizing_iterations: int = 0,
+) -> dict:
+    result = {
+        "target_tokens": target_tokens,
+        "needles_total": needles_total,
+        "needles_found": 0,
+        "status": "skip",
+        "skip_reason": reason,
+        "estimated_cost_usd": 0.0,
+        "input_tokens_reported": None,
+        "input_tokens_precounted": input_tokens_precounted,
+        "count_tokens_attempts": count_tokens_attempts,
+        "sizing_iterations": sizing_iterations,
+        "response_text_preview": None,
+    }
+    if error:
+        result["error"] = error[:1500]
+    return result
+
+
 class LongContextDetector(ActiveDetector):
     name = "long_context"
     display_name = "长上下文真实性"
@@ -177,9 +256,9 @@ async def _precount_input_tokens(
         without sending it.
 
         Returns None on any failure (relay doesn't implement the endpoint,
-        rate-limited, network error). Caller falls back to its chars/token
-        estimate in that case — better to proceed with a slight overshoot
-        risk than to fail the whole tier on a count_tokens hiccup.
+        rate-limited, network error). Low-risk tiers may still fall back to
+        the chars/token estimate; near-limit or already-trimmed tiers skip
+        rather than risk a false truncation verdict.
         """
         try:
             _req, resp, _h, _lat = await client.count_tokens(
@@ -227,40 +306,76 @@ async def _probe_tier(
         ctx_limit: int,
     ) -> dict:
         # Use chars/tok estimation only as the FIRST guess. The real source
-        # of truth is Anthropic's /v1/messages/count_tokens endpoint — we
-        # call it before sending to know exactly how big the request will
-        # be, then trim if it would exceed ctx_limit.
-        QUESTION_BUFFER = 1500
+        # of truth is Anthropic's /v1/messages/count_tokens endpoint: trim
+        # against the counted size and re-count before sending.
         tier_seed = f"{seed}:{target_tokens}"
         needles = make_needles(tier_seed)
-        haystack_target = min(
-            target_tokens - QUESTION_BUFFER,
-            ctx_limit - QUESTION_BUFFER,
-        )
+        haystack_target = _initial_haystack_target(target_tokens, ctx_limit)
         haystack = assemble_haystack(
             haystack_target, needles, tier_seed, protocol="anthropic",
         )
         question = build_question(needles)
         full_prompt = haystack + question
 
-        # Verify exact input_tokens via count_tokens API. This is Anthropic's
-        # canonical way to predict token cost without sending — accurate to
-        # the token. If the relay doesn't support this endpoint, we silently
-        # fall through to send with our chars/tok estimate.
-        precounted = await self._precount_input_tokens(client, model, full_prompt)
-        if precounted is not None and precounted > ctx_limit - 500:
-            # Trim: compute actual chars/token and rebuild haystack to fit.
-            # The 0.97 factor is 3% extra margin in case the rebuild lands
-            # slightly larger than predicted (Anthropic's own count is
-            # deterministic per model+content though, so margin is small).
-            actual_chars_per_tok = len(full_prompt) / max(precounted, 1)
-            target_total_chars = (ctx_limit - 500) * actual_chars_per_tok * 0.97
-            new_haystack_chars = max(0, target_total_chars - len(question))
-            new_haystack_tokens = max(
-                1000, int(new_haystack_chars / actual_chars_per_tok)
+        count_budget = ctx_limit - TOKEN_COUNT_MARGIN
+        desired_count = min(target_tokens, count_budget)
+        count_required = _requires_precise_count(target_tokens, ctx_limit)
+        count_tokens_attempts = 0
+        sizing_iterations = 0
+        precounted: int | None = None
+
+        for attempt in range(MAX_TOKEN_COUNT_ATTEMPTS):
+            count_tokens_attempts += 1
+            precounted = await self._precount_input_tokens(
+                client, model, full_prompt
             )
+            if precounted is None:
+                if count_required or sizing_iterations:
+                    return _skip_tier(
+                        target_tokens,
+                        len(needles),
+                        (
+                            "count_tokens unavailable for required Anthropic "
+                            "long-context sizing; skipped to avoid a false "
+                            "truncation verdict"
+                        ),
+                        count_tokens_attempts=count_tokens_attempts,
+                        sizing_iterations=sizing_iterations,
+                    )
+                break
+            if count_required:
+                if _within_token_target(precounted, desired_count, count_budget):
+                    break
+            elif precounted <= count_budget:
+                break
+            if attempt == MAX_TOKEN_COUNT_ATTEMPTS - 1:
+                return _skip_tier(
+                    target_tokens,
+                    len(needles),
+                    (
+                        "detector prompt could not be sized to the requested "
+                        "token tier after count-driven adjustment"
+                    ),
+                    error=(
+                        f"count_tokens={precounted}, desired={desired_count}, "
+                        f"budget={count_budget}"
+                    ),
+                    input_tokens_precounted=precounted,
+                    count_tokens_attempts=count_tokens_attempts,
+                    sizing_iterations=sizing_iterations,
+                )
+
+            resize_ratio = desired_count / max(precounted, 1)
+            # When shrinking, keep a small safety margin. When growing, aim
+            # directly at the requested tier and verify again before sending.
+            safety = 0.99 if resize_ratio < 1.0 else 1.0
+            haystack_target = max(
+                1000,
+                int(haystack_target * resize_ratio * safety),
+            )
+            sizing_iterations += 1
             haystack = assemble_haystack(
-                new_haystack_tokens, needles, tier_seed, protocol="anthropic",
+                haystack_target, needles, tier_seed, protocol="anthropic",
             )
             full_prompt = haystack + question
 
@@ -297,6 +412,20 @@ async def _probe_tier(
                     "input_tokens_reported": None,
                     "response_text_preview": None,
                 }
+            if _looks_detector_prompt_overflow(err_msg, ctx_limit):
+                return _skip_tier(
+                    target_tokens,
+                    len(needles),
+                    (
+                        "provider reported the constructed prompt exceeds "
+                        "the known model context limit; treating as detector "
+                        "prompt overflow, not relay truncation"
+                    ),
+                    error=err_msg,
+                    input_tokens_precounted=precounted,
+                    count_tokens_attempts=count_tokens_attempts,
+                    sizing_iterations=sizing_iterations,
+                )
             return {
                 "target_tokens": target_tokens,
                 "needles_total": len(needles),
@@ -332,6 +461,9 @@ async def _probe_tier(
             "status": tier_status,
             "estimated_cost_usd": cost,
             "input_tokens_reported": input_tokens,
+            "input_tokens_precounted": precounted,
+            "count_tokens_attempts": count_tokens_attempts,
+            "sizing_iterations": sizing_iterations,
             "response_text_preview": text[:400],
         }
 
@@ -347,6 +479,7 @@ def _aggregate(tier_results: list[dict]) -> tuple[float, str, str]:
     inconclusive = {"skip", "rate_limited"}
     probed = [t for t in tier_results if t["status"] not in inconclusive]
     rate_limited = [t for t in tier_results if t["status"] == "rate_limited"]
+    skipped = [t for t in tier_results if t["status"] == "skip"]
 
     if not probed:
         if rate_limited:
@@ -355,6 +488,10 @@ def _aggregate(tier_results: list[dict]) -> tuple[float, str, str]:
                 f"{t['target_tokens'] // 1000}k tokens probe 触发上游 "
                 "rate limit (TPM/RPM),非中转站缺陷 —— 请稍后重试或换更高 tier 的 key"
             )
+        if skipped:
+            reason = skipped[0].get("skip_reason")
+            if isinstance(reason, str) and reason:
+                return 0.0, "skip", reason
         return 0.0, "skip", "模型自身 context 上限低于检测最低档 (32k),跳过"
 
     per_tier_pct = []
@@ -400,7 +537,15 @@ def _aggregate(tier_results: list[dict]) -> tuple[float, str, str]:
                 "(模型在长上下文中段位置的自然召回缺失,非截断)"
             )
         if skip_count > 0:
-            suffix_parts.append("更高档因模型自身上限未测")
+            skipped_for_count = any(
+                "count_tokens" in str(t.get("skip_reason", ""))
+                or "prompt overflow" in str(t.get("skip_reason", ""))
+                for t in skipped
+            )
+            if skipped_for_count:
+                suffix_parts.append("更高档因 count_tokens/构造尺寸诊断未测")
+            else:
+                suffix_parts.append("更高档因模型自身上限未测")
         if rate_limited:
             rl = rate_limited[0]
             suffix_parts.append(

diff --git a/src/relay_detector/protocols/anthropic/detectors/thinking_signature.py b/src/relay_detector/protocols/anthropic/detectors/thinking_signature.py
@@ -46,6 +46,13 @@
 SIGNATURE_MIN_LEN = 50
 
 
+def _adaptive_effort_for_model(model: str) -> str:
+    normalized = model.replace(".", "-").replace("_", "-")
+    if normalized.startswith(("claude-opus-4-7", "claude-opus-4-8")):
+        return "xhigh"
+    return "high"
+
+
 class ThinkingSignatureDetector(ActiveDetector):
     name = "thinking_signature"
     display_name = "思维签名验证"
@@ -70,11 +77,10 @@ async def run(self, client, model: str) -> DetectorResult:
             thinking = {"type": "enabled", "budget_tokens": THINKING_BUDGET_TOKENS}
         elif info.supports_adaptive_thinking:
             # Opus 4.7 defaults `display` to "omitted"; explicit "summarized"
-            # gives us thinking text to inspect. Default `effort` is already
-            # "high" ("Claude almost always thinks") but we set it explicitly
-            # to be robust to default changes.
+            # gives us thinking text to inspect. Opus adaptive probes need
+            # xhigh for reliable signed-thinking emission on harder prompts.
             thinking = {"type": "adaptive", "display": "summarized"}
-            extra["output_config"] = {"effort": "high"}
+            extra["output_config"] = {"effort": _adaptive_effort_for_model(model)}
         else:
             return self.skip("model lacks thinking support")