Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
4.8.6-SUPAI
4.8.7-SUPAI
2 changes: 2 additions & 0 deletions docs/release-readiness.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ Decision owner:
|---|---|---|---|---|---|
| parser_v2 | off/shadow/enforce | off/shadow/enforce | hold/promote/disable | | |
| context_engine | off/shadow/enforce | off/shadow/enforce | hold/promote/disable | | |
| tool_calling | off/enforce | off/enforce | hold/promote/disable | | |
| auto_continue | off/shadow/enforce | off/shadow/enforce | hold/promote/disable | | |
| capability_router | off/shadow/enforce | off/shadow/enforce | hold/promote/disable | | |

Expand Down Expand Up @@ -131,6 +132,7 @@ Required follow-ups:
|---|---|---|---|
| `parser_v2` | shadow 入口已就绪;报告能统计 diff、confidence、marker leak;不改变对外响应 | diff 经人工审阅;marker leak 和 false positive 均有 fixtures;Go/Node parity 可解释 | 出现未解释 marker leak、tool call 丢失、跨 chunk 误判 |
| `context_engine` | ContextPlan 和 warning 可记录;tool pair 风险可被报告引用;shadow 不改变请求 | 多轮任务 shadow 稳定;tool_call/tool_result 不断链;reasoning summary 不挤压当前请求 | tool pair orphan、当前请求被错误裁剪、current-input hash 异常 |
| `tool_calling` | 全局关闭后能拒绝或剥离 tools;普通对话不受影响;输出侧不解析工具调用 | 默认开启路径单元测试通过;关闭/剥离路径有回归测试;WebUI / API 文档可回滚 | 携带 `tool_choice=auto` 的普通请求被误拒、关闭后仍注入 tools、关闭后仍解析工具调用 |
| `auto_continue` | detector shadow 能记录 candidate、skip reason、trace;默认不续写 | non-stream smoke 通过;有 `max_continue_count`、`max_total_ms`、`max_extra_tokens`;失败可回退 | tool call、JSON mode、structured output、stream continuation 缺 live smoke |
| `capability_router` | 所有公开 alias 有 profile;shadow 只产生 warning;冲突可被 History Analyzer 归类 | policy 经样本验证;冲突处理不破坏 search/thinking/current-input 语义 | unknown alias、search/thinking 冲突不可解释、vision/current-input 策略不完整 |

Expand Down
28 changes: 22 additions & 6 deletions internal/readiness/baseline.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,28 @@ func defaultFeatureReadiness() []FeatureReadiness {
MissingEvidence: []string{"parser shadow report", "manual diff review"},
},
{
Feature: "context_engine",
CurrentMode: ModeOff,
TargetMode: ModeShadow,
Decision: FeatureHold,
Reason: "waiting for context shadow evidence",
MissingEvidence: []string{"context shadow report", "tool pair review"},
Feature: "context_engine",
CurrentMode: ModeEnforce,
TargetMode: ModeEnforce,
Decision: FeatureHold,
Reason: "current default is enforce; keep rollback path and collect live evidence",
Evidence: []EvidenceRef{
{Source: "config defaults", Summary: "context_engine defaults to enforce with hybrid_recent"},
{Source: "offline current-input smoke", Ref: "./tests/scripts/run-offline-current-input-smoke.sh"},
},
MissingEvidence: []string{"live multi-turn account smoke", "context shadow report for future strategy changes"},
},
{
Feature: "tool_calling",
CurrentMode: ModeEnforce,
TargetMode: ModeEnforce,
Decision: FeatureHold,
Reason: "current default is enabled; settings switch provides reject or ignore_tools rollback",
Evidence: []EvidenceRef{
{Source: "unit tests", Ref: "./tests/scripts/run-unit-all.sh"},
{Source: "docs", Ref: "docs/toolcall-semantics.md"},
},
MissingEvidence: []string{"live tool-call account smoke"},
},
{
Feature: "auto_continue",
Expand Down
22 changes: 20 additions & 2 deletions internal/readiness/baseline_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,26 @@ func TestBuildBaselineReportDefaultsPendingEvidence(t *testing.T) {
if report.Decision.Decision != DecisionGoFlagsOff {
t.Fatalf("decision = %q, want %q", report.Decision.Decision, DecisionGoFlagsOff)
}
if len(report.Features) != 4 {
t.Fatalf("features = %d, want 4", len(report.Features))
if len(report.Features) != 5 {
t.Fatalf("features = %d, want 5", len(report.Features))
}
var foundContext, foundToolCalling bool
for _, feature := range report.Features {
switch feature.Feature {
case "context_engine":
foundContext = true
if feature.CurrentMode != ModeEnforce || feature.TargetMode != ModeEnforce {
t.Fatalf("context_engine modes = %q/%q, want enforce/enforce", feature.CurrentMode, feature.TargetMode)
}
case "tool_calling":
foundToolCalling = true
if feature.CurrentMode != ModeEnforce || feature.TargetMode != ModeEnforce {
t.Fatalf("tool_calling modes = %q/%q, want enforce/enforce", feature.CurrentMode, feature.TargetMode)
}
}
}
if !foundContext || !foundToolCalling {
t.Fatalf("expected context_engine and tool_calling readiness rows, got %#v", report.Features)
}
if len(report.Shadow) != 5 {
t.Fatalf("shadow evidence = %d, want 5", len(report.Shadow))
Expand Down
Loading