diff --git a/VERSION b/VERSION index fa18a7e81..4dc6dc76b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.8.6-SUPAI +4.8.7-SUPAI diff --git a/docs/release-readiness.md b/docs/release-readiness.md index a332ec559..f8431152a 100644 --- a/docs/release-readiness.md +++ b/docs/release-readiness.md @@ -75,6 +75,7 @@ Decision owner: |---|---|---|---|---|---| | parser_v2 | off/shadow/enforce | off/shadow/enforce | hold/promote/disable | | | | context_engine | off/shadow/enforce | off/shadow/enforce | hold/promote/disable | | | +| tool_calling | off/enforce | off/enforce | hold/promote/disable | | | | auto_continue | off/shadow/enforce | off/shadow/enforce | hold/promote/disable | | | | capability_router | off/shadow/enforce | off/shadow/enforce | hold/promote/disable | | | @@ -131,6 +132,7 @@ Required follow-ups: |---|---|---|---| | `parser_v2` | shadow 入口已就绪;报告能统计 diff、confidence、marker leak;不改变对外响应 | diff 经人工审阅;marker leak 和 false positive 均有 fixtures;Go/Node parity 可解释 | 出现未解释 marker leak、tool call 丢失、跨 chunk 误判 | | `context_engine` | ContextPlan 和 warning 可记录;tool pair 风险可被报告引用;shadow 不改变请求 | 多轮任务 shadow 稳定;tool_call/tool_result 不断链;reasoning summary 不挤压当前请求 | tool pair orphan、当前请求被错误裁剪、current-input hash 异常 | +| `tool_calling` | 全局关闭后能拒绝或剥离 tools;普通对话不受影响;输出侧不解析工具调用 | 默认开启路径单元测试通过;关闭/剥离路径有回归测试;WebUI / API 文档可回滚 | 携带 `tool_choice=auto` 的普通请求被误拒、关闭后仍注入 tools、关闭后仍解析工具调用 | | `auto_continue` | detector shadow 能记录 candidate、skip reason、trace;默认不续写 | non-stream smoke 通过;有 `max_continue_count`、`max_total_ms`、`max_extra_tokens`;失败可回退 | tool call、JSON mode、structured output、stream continuation 缺 live smoke | | `capability_router` | 所有公开 alias 有 profile;shadow 只产生 warning;冲突可被 History Analyzer 归类 | policy 经样本验证;冲突处理不破坏 search/thinking/current-input 语义 | unknown alias、search/thinking 冲突不可解释、vision/current-input 策略不完整 | diff --git a/internal/readiness/baseline.go b/internal/readiness/baseline.go index 6fbf824ff..419474b59 100644 --- a/internal/readiness/baseline.go +++ b/internal/readiness/baseline.go @@ -81,12 +81,28 @@ func defaultFeatureReadiness() []FeatureReadiness { MissingEvidence: []string{"parser shadow report", "manual diff review"}, }, { - Feature: "context_engine", - CurrentMode: ModeOff, - TargetMode: ModeShadow, - Decision: FeatureHold, - Reason: "waiting for context shadow evidence", - MissingEvidence: []string{"context shadow report", "tool pair review"}, + Feature: "context_engine", + CurrentMode: ModeEnforce, + TargetMode: ModeEnforce, + Decision: FeatureHold, + Reason: "current default is enforce; keep rollback path and collect live evidence", + Evidence: []EvidenceRef{ + {Source: "config defaults", Summary: "context_engine defaults to enforce with hybrid_recent"}, + {Source: "offline current-input smoke", Ref: "./tests/scripts/run-offline-current-input-smoke.sh"}, + }, + MissingEvidence: []string{"live multi-turn account smoke", "context shadow report for future strategy changes"}, + }, + { + Feature: "tool_calling", + CurrentMode: ModeEnforce, + TargetMode: ModeEnforce, + Decision: FeatureHold, + Reason: "current default is enabled; settings switch provides reject or ignore_tools rollback", + Evidence: []EvidenceRef{ + {Source: "unit tests", Ref: "./tests/scripts/run-unit-all.sh"}, + {Source: "docs", Ref: "docs/toolcall-semantics.md"}, + }, + MissingEvidence: []string{"live tool-call account smoke"}, }, { Feature: "auto_continue", diff --git a/internal/readiness/baseline_test.go b/internal/readiness/baseline_test.go index 2d53cd4b7..abef49253 100644 --- a/internal/readiness/baseline_test.go +++ b/internal/readiness/baseline_test.go @@ -16,8 +16,26 @@ func TestBuildBaselineReportDefaultsPendingEvidence(t *testing.T) { if report.Decision.Decision != DecisionGoFlagsOff { t.Fatalf("decision = %q, want %q", report.Decision.Decision, DecisionGoFlagsOff) } - if len(report.Features) != 4 { - t.Fatalf("features = %d, want 4", len(report.Features)) + if len(report.Features) != 5 { + t.Fatalf("features = %d, want 5", len(report.Features)) + } + var foundContext, foundToolCalling bool + for _, feature := range report.Features { + switch feature.Feature { + case "context_engine": + foundContext = true + if feature.CurrentMode != ModeEnforce || feature.TargetMode != ModeEnforce { + t.Fatalf("context_engine modes = %q/%q, want enforce/enforce", feature.CurrentMode, feature.TargetMode) + } + case "tool_calling": + foundToolCalling = true + if feature.CurrentMode != ModeEnforce || feature.TargetMode != ModeEnforce { + t.Fatalf("tool_calling modes = %q/%q, want enforce/enforce", feature.CurrentMode, feature.TargetMode) + } + } + } + if !foundContext || !foundToolCalling { + t.Fatalf("expected context_engine and tool_calling readiness rows, got %#v", report.Features) } if len(report.Shadow) != 5 { t.Fatalf("shadow evidence = %d, want 5", len(report.Shadow))