From b806fd43cbca0a802a7ea5facc67a94c9bd54fb1 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Tue, 9 Jun 2026 15:13:38 -0300 Subject: [PATCH 01/11] refactor(harness): resolve model/provider once per turn, drop duplicate reads and the steering hop The turn loop re-fetched constant data on every round-trip: models::get ran three times per iteration (orchestrator preflight, provider stream, compaction) and harness::provider::resolve ran once per stream, all returning the same value for the whole turn. finalizeBatch rebuilt the full compaction window only to read the trailing function_result ids for dedup, and a separate turn::steering_check FSM step gated every continue/end decision. - Resolve the full catalog Model once at provisioning, persist it on the turn record, and thread it to preflight, the provider (ProviderStreamInput.model_meta), and compaction (turn_end.model_limit). Every reader falls back to a live fetch when it is absent, so an empty/cold catalog is unchanged. models::get drops from ~3 per round-trip to ~1 per turn. - Cache the provider credential resolution in the provider process, keyed by a per-run id threaded on ProviderStreamInput.resolution_key, and invalidate it on a 401. Collapses the per-stream harness::provider::resolve to ~1 per turn while still picking up a key rotated between turns. - finalizeBatch reads only the raw trailing function_result run via a new TurnStore.loadTrailingResultIds (default leaf, matching the append path), skipping the paired session-tree::compactions read. - Inline the steering_check routing (continue / max_turns / end) into assistant_streaming and function_execute, removing one durable FSM step and its queue wake per round-trip. turn::steering_check stays registered as a compat drain so records persisted in that state at deploy time advance instead of DLQ-wedging the turn; the state, schemas, and registration are removed in a follow-up once that queue is confirmed empty. Continuing batches now resume assistant_streaming with cleared function_results (the results already travel on the turn_end event for consumers). No change to durability, approvals, compaction safety, or mid-turn model/credential changes. --- .../src/context-compaction/handler-async.ts | 38 ++-- .../src/context-compaction/model-resolver.ts | 13 ++ harness/src/models-catalog/types.ts | 49 +++++ harness/src/provider-anthropic/auth.ts | 41 +++- harness/src/provider-anthropic/stream-fn.ts | 8 +- harness/src/provider-anthropic/stream.ts | 4 + .../assistant-streaming/ports.ts | 14 +- .../assistant-streaming/run.ts | 17 +- .../function-execute/ports.ts | 22 ++- .../turn-orchestrator/function-execute/run.ts | 18 +- harness/src/turn-orchestrator/preflight.ts | 10 +- .../src/turn-orchestrator/provider-router.ts | 5 + .../turn-orchestrator/provisioning/ports.ts | 8 + .../turn-orchestrator/provisioning/process.ts | 16 ++ .../turn-orchestrator/state-runtime/ports.ts | 11 +- .../turn-orchestrator/state-runtime/store.ts | 20 ++ .../state-runtime/turn-end.ts | 42 +++- harness/src/turn-orchestrator/state.ts | 10 + .../steering-check/process.ts | 2 +- .../turn-orchestrator/steering-check/run.ts | 40 ++-- harness/src/types/agent-event.ts | 10 + harness/src/types/provider.ts | 15 ++ .../context-compaction/handler-async.test.ts | 53 ++++- .../integration/mode-approval.e2e.test.ts | 23 ++- .../integration/parallel-approval.e2e.test.ts | 6 +- harness/tests/provider-anthropic/auth.test.ts | 184 +++++++++++++++++- .../_helpers/mockTurnStore.ts | 1 + .../assistant-streaming.test.ts | 4 +- .../tests/turn-orchestrator/assistant.test.ts | 10 +- .../function-awaiting-approval.test.ts | 11 +- .../function-execute.test.ts | 133 +++++++++++-- .../tests/turn-orchestrator/functions.test.ts | 42 ++-- .../tests/turn-orchestrator/preflight.test.ts | 39 ++++ .../turn-orchestrator/provider-router.test.ts | 35 ++++ .../provisioning-layer.test.ts | 77 +++++++- .../steering-check-layer.test.ts | 4 +- 36 files changed, 918 insertions(+), 117 deletions(-) diff --git a/harness/src/context-compaction/handler-async.ts b/harness/src/context-compaction/handler-async.ts index 518fafdb..2cd3debe 100644 --- a/harness/src/context-compaction/handler-async.ts +++ b/harness/src/context-compaction/handler-async.ts @@ -7,6 +7,7 @@ import { setCurrentSpanAttribute, withSpan } from '@iii-dev/observability'; import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; +import type { ModelContextLimit } from '../types/agent-event.js'; import { compactionConfig } from './config.js'; import { isSummarizeOk, @@ -54,24 +55,34 @@ export function turnEndUsage(event: unknown): Record | null { type ResolvedModel = { providerID: string; modelID: string; - modelLimit: { context: number; input: number; output: number }; + modelLimit: ModelContextLimit; } | null; -async function resolveModelFromEvent( +/** + * The fields we read off the orchestrator-produced `turn_end` event. The + * message is always an AssistantMessage, so provider/model are present (empty + * strings on synthetic/error turns); model_limit is threaded when the catalog + * resolved at turn start. + */ +type TurnEndEventView = { + message: { provider: string; model: string }; + model_limit?: ModelContextLimit; +}; + +export async function resolveModelFromEvent( iii: ISdk, session_id: string, event: unknown, ): Promise { - let providerID: string | null = null; - let modelID: string | null = null; - - if (event && typeof event === 'object') { - const ev = event as Record; - const msg = ev.message as Record | undefined; - if (msg) { - if (typeof msg.provider === 'string' && msg.provider) providerID = msg.provider; - if (typeof msg.model === 'string' && msg.model) modelID = msg.model; - } + const ev = event as TurnEndEventView; + let providerID = ev.message.provider || null; + let modelID = ev.message.model || null; + + // Closed system: the orchestrator threads the turn's limit onto turn_end, so + // when ids and the limit are present we trust them and skip the models::get + // round-trip. Empty ids (synthetic/error turns) fall through to the scan. + if (providerID && modelID && ev.model_limit) { + return { providerID, modelID, modelLimit: ev.model_limit }; } if (!providerID || !modelID) { @@ -87,8 +98,7 @@ async function resolveModelFromEvent( const messages = resp?.messages ?? []; for (let i = messages.length - 1; i >= 0; i--) { const m = messages[i]?.message; - if (!m) continue; - if (m.role !== 'assistant') continue; + if (m?.role !== 'assistant') continue; if (!providerID && typeof m.provider === 'string' && m.provider) { providerID = m.provider; } diff --git a/harness/src/context-compaction/model-resolver.ts b/harness/src/context-compaction/model-resolver.ts index 996f1e2b..7dddc1f5 100644 --- a/harness/src/context-compaction/model-resolver.ts +++ b/harness/src/context-compaction/model-resolver.ts @@ -1,3 +1,4 @@ +import type { Model } from '../models-catalog/types.js'; import { RUN_REQUEST_SCOPE } from '../turn-orchestrator/state.js'; import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; @@ -9,6 +10,18 @@ export type ResolvedModel = { modelLimit: ModelLimit; } | null; +/** + * Derive a {@link ModelLimit} from a catalog {@link Model}. Mirrors the mapping + * {@link fetchModelLimit} applies to a `models::get` result, so a threaded + * `model_meta` yields the exact same limit a fresh fetch would — letting + * callers skip the bus round-trip without changing behavior. + */ +export function limitFromModel(m: Model): ModelLimit { + const context = typeof m.context_window === 'number' ? m.context_window : 0; + const output = typeof m.max_output_tokens === 'number' ? m.max_output_tokens : 0; + return { context, input: context, output }; +} + export async function fetchModelLimit( iii: ISdk, providerID: string, diff --git a/harness/src/models-catalog/types.ts b/harness/src/models-catalog/types.ts index 14bf579f..5ab55d3e 100644 --- a/harness/src/models-catalog/types.ts +++ b/harness/src/models-catalog/types.ts @@ -3,6 +3,8 @@ * `models-catalog/src/lib.rs::{Model, Pricing, …}`. */ +import { z } from 'zod'; + export type ThinkingLevel = 'off' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'; export type Transport = 'sse' | 'websocket' | 'auto'; @@ -41,6 +43,53 @@ export type Model = { pricing?: Pricing; }; +/** + * Zod mirror of {@link Model}, used to validate model metadata that crosses a + * worker boundary (`ProviderStreamInput.model_meta`). `.passthrough()` keeps + * any future catalog fields intact. Kept field-for-field in sync with the + * `Model` type above; the compile-time guard below fails the build on drift. + */ +export const ModelSchema = z + .object({ + id: z.string(), + provider: z.string(), + api: z.string(), + display_name: z.string(), + context_window: z.number(), + max_output_tokens: z.number().optional(), + supports_thinking: z.boolean().optional(), + supports_xhigh: z.boolean().optional(), + supports_tools: z.boolean().optional(), + supports_vision: z.boolean().optional(), + supports_cache: z.boolean().optional(), + thinking_budgets: z + .object({ + minimal: z.number().optional(), + low: z.number().optional(), + medium: z.number().optional(), + high: z.number().optional(), + }) + .passthrough() + .optional(), + transports: z.array(z.enum(['sse', 'websocket', 'auto'])).optional(), + default_cache_retention: z.enum(['none', 'short', 'long']).optional(), + pricing: z + .object({ + input_per_1m: z.number(), + output_per_1m: z.number(), + cache_read_per_1m: z.number().optional(), + cache_write_per_1m: z.number().optional(), + }) + .passthrough() + .optional(), + }) + .passthrough(); + +// Compile-time guard: every value ModelSchema parses is assignable to Model. +// If a field drifts out of sync, this line fails to typecheck. +const _modelSchemaSatisfiesType = (m: z.infer): Model => m; +void _modelSchemaSatisfiesType; + export const MODELS_SCOPE = 'models'; export type Capability = diff --git a/harness/src/provider-anthropic/auth.ts b/harness/src/provider-anthropic/auth.ts index 64ac30d1..f3bfb24d 100644 --- a/harness/src/provider-anthropic/auth.ts +++ b/harness/src/provider-anthropic/auth.ts @@ -5,20 +5,55 @@ * pair with a single call. */ +import type { Model } from '../models-catalog/types.js'; import type { ISdk } from '../runtime/iii.js'; import { clampOutputTokens, getCatalogModel } from '../runtime/output-tokens.js'; -import { resolveProvider } from '../runtime/provider-resolve.js'; +import { type ProviderResolveResult, resolveProvider } from '../runtime/provider-resolve.js'; import type { WorkerConfig } from './config.js'; import { type AnthropicConfig, configWithCredential } from './types.js'; export const PROVIDER_ID = 'anthropic'; +/** + * Single-slot cache of the resolved provider credential, keyed by the turn's + * stable id. The credential is global, so within a turn its 20+ stream calls + * reuse one resolution; a new turn (new key) re-resolves, picking up a key + * rotated between turns. {@link invalidateProviderResolveCache} drops it on a + * mid-turn 401. With no key threaded, callers always resolve (no caching). + */ +let resolveCache: { key: number; resolved: ProviderResolveResult } | null = null; + +async function resolveProviderForTurn(iii: ISdk, key?: number): Promise { + if (key === undefined) return resolveProvider(iii, PROVIDER_ID); + if (resolveCache?.key === key) return resolveCache.resolved; + const resolved = await resolveProvider(iii, PROVIDER_ID); + resolveCache = { key, resolved }; + return resolved; +} + +/** Drop the cached resolution so the next stream re-resolves (called on a 401). */ +export function invalidateProviderResolveCache(): void { + resolveCache = null; +} + +/** Test seam: clear the cache between cases. */ +export function _resetProviderResolveCacheForTests(): void { + resolveCache = null; +} + export async function buildConfig( iii: ISdk, worker: WorkerConfig, model: string, + // The turn's catalog entry, threaded by the orchestrator. It is always for + // this turn's model (the orchestrator routes to this provider for that same + // model), so we use it directly; absent only when the catalog had no entry, + // where we fetch. + preResolved?: Model, + // The turn's stable id, used to dedupe the per-stream credential resolution. + resolutionKey?: number, ): Promise { - const resolved = await resolveProvider(iii, PROVIDER_ID); + const resolved = await resolveProviderForTurn(iii, resolutionKey); if (!resolved.credential) { throw new Error( 'harness::provider::resolve returned no credential for provider `anthropic` ' + @@ -26,7 +61,7 @@ export async function buildConfig( ); } const apiUrl = resolved.api_url ?? worker.default_api_url; - const catalog = await getCatalogModel(iii, PROVIDER_ID, model); + const catalog = preResolved ?? (await getCatalogModel(iii, PROVIDER_ID, model)); const maxTokens = clampOutputTokens({ modelMaxOutput: catalog?.max_output_tokens, userOverride: resolved.max_tokens, diff --git a/harness/src/provider-anthropic/stream-fn.ts b/harness/src/provider-anthropic/stream-fn.ts index fee9c4e1..50e745f5 100644 --- a/harness/src/provider-anthropic/stream-fn.ts +++ b/harness/src/provider-anthropic/stream-fn.ts @@ -34,7 +34,13 @@ export function register(iii: ISdk, worker: WorkerConfig): void { // use it directly instead of re-instantiating from the wire shape // (which no longer exists by the time we get here). const writer = input.writer_ref as ChannelWriter; - const cfg = await buildConfig(iii, worker, input.model); + const cfg = await buildConfig( + iii, + worker, + input.model, + input.model_meta, + input.resolution_key, + ); const thinking = buildThinkingConfig(input.thinking_level, cfg.max_tokens, cfg.catalog); try { const events = streamAnthropic({ diff --git a/harness/src/provider-anthropic/stream.ts b/harness/src/provider-anthropic/stream.ts index 3d6dd64b..4b62efe8 100644 --- a/harness/src/provider-anthropic/stream.ts +++ b/harness/src/provider-anthropic/stream.ts @@ -8,6 +8,7 @@ import { logger } from '../runtime/otel.js'; import type { AgentMessage, AssistantMessage } from '../types/agent-message.js'; import type { AgentFunction } from '../types/function.js'; import type { AssistantMessageEvent } from '../types/stream-event.js'; +import { invalidateProviderResolveCache } from './auth.js'; import { applyMessagesCacheAnchor, applyToolsCacheControl, buildSystemField } from './cache.js'; import { buildFinal, @@ -79,6 +80,9 @@ export async function* streamAnthropic({ if (!resp.ok) { const text = await resp.text().catch(() => ''); const kind = classifyAnthropicError(text, resp.status); + // A 401/403 means the cached credential is stale (rotated mid-turn); drop it + // so the next stream re-resolves instead of replaying the dead key. + if (kind === 'auth_expired') invalidateProviderResolveCache(); yield syntheticErrorEvent(text || `anthropic http ${resp.status}`, cfg.model, kind); return; } diff --git a/harness/src/turn-orchestrator/assistant-streaming/ports.ts b/harness/src/turn-orchestrator/assistant-streaming/ports.ts index 81831e2a..697b2d37 100644 --- a/harness/src/turn-orchestrator/assistant-streaming/ports.ts +++ b/harness/src/turn-orchestrator/assistant-streaming/ports.ts @@ -3,6 +3,7 @@ */ import { z } from 'zod'; +import type { Model } from '../../models-catalog/types.js'; import { logger } from '../../runtime/otel.js'; import type { ISdk } from '../../runtime/iii.js'; import type { AgentMessage, AssistantMessage } from '../../types/agent-message.js'; @@ -25,6 +26,10 @@ export type StreamContext = { messages: AgentMessage[]; /** Optional reasoning/thinking level from the run request. Absent = off. */ thinking_level?: string; + /** Turn's pre-resolved catalog entry, threaded to the provider. Absent = off. */ + model_meta?: Model; + /** Turn's stable id (run start time), so the provider dedupes credential resolution. */ + resolution_key?: number; }; export type StreamTurnOutcome = { @@ -36,7 +41,7 @@ export type StreamTurnOutcome = { export type AssistantRoute = | { kind: 'stopped'; reason: 'error' | 'aborted' } | { kind: 'function_execute' } - | { kind: 'steering_check' }; + | { kind: 'end_turn' }; export function parseFunctionSchemas(raw: unknown[]): AgentFunction[] { return z.array(AgentFunctionSchema).parse(raw) as AgentFunction[]; @@ -62,6 +67,7 @@ export type AssistantStreamingPorts = TurnStatePorts & { messages: AgentMessage[], provider: string, model: string, + model_meta?: Model, ): Promise<'ok' | 'compacted'>; streamTurn( ctx: StreamContext, @@ -90,8 +96,8 @@ export function createStreamingPorts(iii: ISdk): AssistantStreamingPorts { return { ...base, - async runPreflight(session_id, messages, provider, model) { - return runPreflight(iii, session_id, messages, provider, model); + async runPreflight(session_id, messages, provider, model, model_meta) { + return runPreflight(iii, session_id, messages, provider, model, model_meta); }, async streamTurn(ctx, onDelta) { @@ -106,6 +112,8 @@ export function createStreamingPorts(iii: ISdk): AssistantStreamingPorts { ctx.messages, ctx.tools, ctx.thinking_level, + ctx.model_meta, + ctx.resolution_key, ), onDelta, }); diff --git a/harness/src/turn-orchestrator/assistant-streaming/run.ts b/harness/src/turn-orchestrator/assistant-streaming/run.ts index da8b5185..a11fa6e3 100644 --- a/harness/src/turn-orchestrator/assistant-streaming/run.ts +++ b/harness/src/turn-orchestrator/assistant-streaming/run.ts @@ -34,9 +34,13 @@ export async function prepareStreamContext( const { provider, model, system_prompt, function_schemas, thinking_level } = request; const decision = decide({ provider, model }); const tools = parseFunctionSchemas(function_schemas); + // Resolved once at provisioning; preflight + provider read it instead of + // re-fetching models::get. Absent on pre-existing records → readers fall back. + const model_meta = rec.model_meta; if ( - (await ports.runPreflight(rec.session_id, messages, decision.provider, model)) === 'compacted' + (await ports.runPreflight(rec.session_id, messages, decision.provider, model, model_meta)) === + 'compacted' ) { messages = await ports.loadMessages(rec.session_id); } @@ -48,6 +52,10 @@ export async function prepareStreamContext( tools, messages, ...(thinking_level ? { thinking_level } : {}), + ...(model_meta ? { model_meta } : {}), + // Stable per run, fresh per run: dedupes the provider's credential resolution + // within the turn while still re-resolving on the next user turn. + resolution_key: rec.started_at_ms, }; } @@ -107,7 +115,7 @@ export function routeAssistantTurn(asst: AssistantMessage): AssistantRoute { if (hasFunctionCalls(asst)) { return { kind: 'function_execute' }; } - return { kind: 'steering_check' }; + return { kind: 'end_turn' }; } export async function finalizeAssistantTurn( @@ -135,7 +143,10 @@ export async function finalizeAssistantTurn( return; } - transitionTo(rec, 'steering_check'); + // end_turn: no function calls — finish inline (formerly the + // turn::steering_check hop, which always routed an empty-results turn here). + await emitTurnEndOnce(ports, rec, asst); + await ports.finishSession(rec); } export async function runAssistantStreaming( diff --git a/harness/src/turn-orchestrator/function-execute/ports.ts b/harness/src/turn-orchestrator/function-execute/ports.ts index 7b70b20a..1c4e1554 100644 --- a/harness/src/turn-orchestrator/function-execute/ports.ts +++ b/harness/src/turn-orchestrator/function-execute/ports.ts @@ -7,8 +7,10 @@ import type { DispatchResult } from '../agent-trigger.js'; import { dispatchWithHook, triggerFunctionCall } from '../agent-trigger.js'; import { emit } from '../events.js'; import type { ISdk } from '../../runtime/iii.js'; +import type { AgentEvent } from '../../types/agent-event.js'; import type { FunctionCall, FunctionResult } from '../../types/function.js'; import { createTurnStatePorts, type TurnStatePorts } from '../state-runtime/ports.js'; +import { createTurnStore } from '../state-runtime/store.js'; import type { ExecutedCall } from './types.js'; const RoutingEnvelopeSchema = z @@ -49,8 +51,17 @@ export function withRoutingEnvelope(call: FunctionCall, session_id: string): Fun export type FunctionExecutePorts = TurnStatePorts & { emitStart(session_id: string, call: FunctionCall): Promise; emitEnd(session_id: string, executed: ExecutedCall): Promise; + /** Generic agent-event emit, for the inline max_turns end (message_complete). */ + emit(session_id: string, event: AgentEvent): Promise; dispatch(call: FunctionCall, session_id: string): Promise; triggerPreApproved(call: FunctionCall): Promise; + /** + * Function_call_ids already persisted in the trailing result run, for batch + * dedup on re-entry. Reads the raw message list (not the compaction-rebuilt + * window): the trailing results live in the preserved tail, so this drops the + * paired `session-tree::compactions` read that `loadMessages` would do. + */ + loadTrailingResultIds(session_id: string): Promise>; }; function buildFunctionExecutionEnd(executed: ExecutedCall) { @@ -65,11 +76,16 @@ function buildFunctionExecutionEnd(executed: ExecutedCall) { } export function createPorts(iii: ISdk): FunctionExecutePorts { - const base = createTurnStatePorts(iii); + const store = createTurnStore(iii); + const base = createTurnStatePorts(iii, store); return { ...base, + loadTrailingResultIds(session_id) { + return store.loadTrailingResultIds(session_id); + }, + async emitStart(session_id, call) { await emit(iii, session_id, { type: 'function_execution_start', @@ -83,6 +99,10 @@ export function createPorts(iii: ISdk): FunctionExecutePorts { await emit(iii, session_id, buildFunctionExecutionEnd(executed)); }, + emit(session_id, event) { + return emit(iii, session_id, event); + }, + async dispatch(call, session_id) { // Hook/policy see the routing envelope; the target function gets the // caller's untouched args so a payload `function_id` (e.g. for diff --git a/harness/src/turn-orchestrator/function-execute/run.ts b/harness/src/turn-orchestrator/function-execute/run.ts index 9ab75b54..75df079e 100644 --- a/harness/src/turn-orchestrator/function-execute/run.ts +++ b/harness/src/turn-orchestrator/function-execute/run.ts @@ -12,10 +12,13 @@ import { missingFunctionResult, unwrapAgentTrigger, } from '../agent-trigger.js'; -import { emitTurnEndOnce } from '../state-runtime/turn-end.js'; -import { persistedTrailingResultIds } from '../state-runtime/transcript.js'; import { - transitionTo, + emitTurnEndOnce, + endTurnForMaxTurns, + maxTurnsReached, + resumeToAssistantStreaming, +} from '../state-runtime/turn-end.js'; +import { type AwaitingApprovalEntry, type FunctionBatchTurnRecord, type TurnStateRecord, @@ -205,8 +208,7 @@ export async function finalizeBatch( function_results.push(toFunctionResultMessage(entry, result)); } - const messages = await ports.loadMessages(rec.session_id); - const alreadyPersisted = persistedTrailingResultIds(messages); + const alreadyPersisted = await ports.loadTrailingResultIds(rec.session_id); const fresh = function_results.filter((r) => !alreadyPersisted.has(r.function_call_id)); if (fresh.length < function_results.length) { logger.warn('finalizeBatch: skipped duplicate function_results (re-entry detected)', { @@ -223,10 +225,14 @@ export async function finalizeBatch( await emitTurnEndOnce(ports, rec, lastAssistant, function_results); + // Routing formerly done by the turn::steering_check FSM hop, now inline: + // one durable step (and its queue wake) less per round-trip. if (allTerminate) { await ports.finishSession(rec); + } else if (maxTurnsReached(rec)) { + await endTurnForMaxTurns(ports, rec); } else { - transitionTo(rec, 'steering_check'); + resumeToAssistantStreaming(rec); } (rec as TurnStateRecord).work = undefined; diff --git a/harness/src/turn-orchestrator/preflight.ts b/harness/src/turn-orchestrator/preflight.ts index 9efa3f30..cf3aa84a 100644 --- a/harness/src/turn-orchestrator/preflight.ts +++ b/harness/src/turn-orchestrator/preflight.ts @@ -8,8 +8,9 @@ * the in-flight compaction releases the lease instead of failing the run. */ -import { fetchModelLimit } from '../context-compaction/model-resolver.js'; +import { fetchModelLimit, limitFromModel } from '../context-compaction/model-resolver.js'; import { usable as computeUsable } from '../context-compaction/overflow.js'; +import type { Model } from '../models-catalog/types.js'; import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import type { AgentMessage } from '../types/agent-message.js'; @@ -39,8 +40,13 @@ export async function runPreflight( messages: AgentMessage[], providerID: string, modelID: string, + modelMeta?: Model, ): Promise<'ok' | 'compacted'> { - const model = await fetchModelLimit(iii, providerID, modelID); + // The turn's model, resolved once at provisioning. Use it directly when + // threaded (same limit a fetch yields); fetch only when it was absent. + const model = modelMeta + ? { providerID, modelID, modelLimit: limitFromModel(modelMeta) } + : await fetchModelLimit(iii, providerID, modelID); if (!model || model.modelLimit.context === 0) { if (model && model.modelLimit.context === 0) { logger.debug('preflight: model has no context_window; skipping pre-flight check', { diff --git a/harness/src/turn-orchestrator/provider-router.ts b/harness/src/turn-orchestrator/provider-router.ts index 2d3fcce6..e5d767e8 100644 --- a/harness/src/turn-orchestrator/provider-router.ts +++ b/harness/src/turn-orchestrator/provider-router.ts @@ -3,6 +3,7 @@ * orchestrator imports it directly; there is no `router::*` bus surface. */ +import type { Model } from '../models-catalog/types.js'; import type { AgentMessage } from '../types/agent-message.js'; import type { AgentFunction } from '../types/function.js'; import type { ProviderStreamInput, StreamChannelRef } from '../types/provider.js'; @@ -62,6 +63,8 @@ export function buildInput( messages: AgentMessage[], tools: AgentFunction[], thinking_level?: string, + model_meta?: Model, + resolution_key?: number, ): ProviderStreamInput { return { writer_ref, @@ -70,5 +73,7 @@ export function buildInput( messages: messages as unknown[], tools, ...(thinking_level ? { thinking_level } : {}), + ...(model_meta ? { model_meta } : {}), + ...(resolution_key !== undefined ? { resolution_key } : {}), }; } diff --git a/harness/src/turn-orchestrator/provisioning/ports.ts b/harness/src/turn-orchestrator/provisioning/ports.ts index 1dc14b0b..7e406f2e 100644 --- a/harness/src/turn-orchestrator/provisioning/ports.ts +++ b/harness/src/turn-orchestrator/provisioning/ports.ts @@ -2,13 +2,17 @@ * Typed dependency ports for provisioning. */ +import type { Model } from '../../models-catalog/types.js'; import type { ISdk } from '../../runtime/iii.js'; +import { getCatalogModel } from '../../runtime/output-tokens.js'; import type { RunRequest } from '../run-request.js'; import { createTurnStore } from '../state-runtime/store.js'; export type ProvisioningPorts = { loadRunRequest(session_id: string): Promise; saveRunRequest(session_id: string, request: RunRequest): Promise; + /** Resolve the full catalog entry for (provider, model); null on miss/error. */ + resolveModel(provider: string, model: string): Promise; }; export function createProvisioningPorts(iii: ISdk): ProvisioningPorts { @@ -22,5 +26,9 @@ export function createProvisioningPorts(iii: ISdk): ProvisioningPorts { saveRunRequest(session_id, request) { return store.saveRunRequest(session_id, request); }, + + resolveModel(provider, model) { + return getCatalogModel(iii, provider, model); + }, }; } diff --git a/harness/src/turn-orchestrator/provisioning/process.ts b/harness/src/turn-orchestrator/provisioning/process.ts index ce39f587..c42c8886 100644 --- a/harness/src/turn-orchestrator/provisioning/process.ts +++ b/harness/src/turn-orchestrator/provisioning/process.ts @@ -2,8 +2,10 @@ * Load run request, build the provisioned RunRequest, and register the FSM step. */ +import type { Model } from '../../models-catalog/types.js'; import type { ISdk } from '../../runtime/iii.js'; import { agentTriggerTool } from '../agent-trigger.js'; +import { decide } from '../provider-router.js'; import { runTransition } from '../run-transition.js'; import type { RunRequest } from '../run-request.js'; import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; @@ -14,6 +16,8 @@ import { createProvisioningPorts, type ProvisioningPorts } from './ports.js'; export type ProvisioningOutcome = { kind: 'ready'; runRequest: RunRequest; + /** Catalog entry for the turn's model; null when the catalog has no match. */ + model_meta: Model | null; }; export async function processProvisioning( @@ -30,6 +34,14 @@ export async function processProvisioning( model: request.model, }); + // Resolve the model once for the whole turn. The decided provider (not the + // raw request.provider, which may be blank) is what every later reader keys + // on, so resolve against it. Best-effort: null is fine — readers fall back. + const decision = decide({ provider: request.provider, model: request.model }); + const model_meta = request.model + ? await ports.resolveModel(decision.provider, request.model) + : null; + return { kind: 'ready', runRequest: { @@ -37,6 +49,7 @@ export async function processProvisioning( system_prompt: prompt, function_schemas: [agentTriggerTool()], }, + model_meta, }; } @@ -46,6 +59,9 @@ export async function applyProvisioningOutcome( outcome: ProvisioningOutcome, ): Promise { await ports.saveRunRequest(rec.session_id, outcome.runRequest); + // Persist the resolved catalog entry on the durable record so preflight, + // provider streaming, and turn-end compaction read it instead of re-fetching. + if (outcome.model_meta) rec.model_meta = outcome.model_meta; transitionTo(rec, 'assistant_streaming'); } diff --git a/harness/src/turn-orchestrator/state-runtime/ports.ts b/harness/src/turn-orchestrator/state-runtime/ports.ts index c7c117ae..72512469 100644 --- a/harness/src/turn-orchestrator/state-runtime/ports.ts +++ b/harness/src/turn-orchestrator/state-runtime/ports.ts @@ -5,6 +5,7 @@ import { emit } from '../events.js'; import type { RunRequest } from '../run-request.js'; import type { ISdk } from '../../runtime/iii.js'; +import type { ModelContextLimit } from '../../types/agent-event.js'; import type { AgentMessage, FunctionResultMessage } from '../../types/agent-message.js'; import { transitionTo, type TurnStateRecord } from '../state.js'; import { createTurnStore, type TurnStore } from './store.js'; @@ -19,6 +20,7 @@ export type TurnStatePorts = { session_id: string, message: AgentMessage, function_results: FunctionResultMessage[], + model_limit?: ModelContextLimit, ): Promise; finishSession(rec: TurnStateRecord): Promise; }; @@ -47,8 +49,13 @@ export function createTurnStatePorts(iii: ISdk, store?: TurnStore): TurnStatePor return s.saveRunRequest(session_id, request); }, - async emitTurnEnd(session_id, message, function_results) { - await emit(iii, session_id, { type: 'turn_end', message, function_results }); + async emitTurnEnd(session_id, message, function_results, model_limit) { + await emit(iii, session_id, { + type: 'turn_end', + message, + function_results, + ...(model_limit ? { model_limit } : {}), + }); }, async finishSession(rec) { diff --git a/harness/src/turn-orchestrator/state-runtime/store.ts b/harness/src/turn-orchestrator/state-runtime/store.ts index c135249d..b30e8036 100644 --- a/harness/src/turn-orchestrator/state-runtime/store.ts +++ b/harness/src/turn-orchestrator/state-runtime/store.ts @@ -13,6 +13,7 @@ import { type RunRequest, parseRunRequest } from '../run-request.js'; import { toView, type TurnStateView } from '../schemas.js'; import { type TurnState, type TurnStateRecord, parseTurnStateRecord } from '../state.js'; import { loadContextView } from './context-view.js'; +import { persistedTrailingResultIds } from './transcript.js'; /** * Turn-step wakes go to the engine's `default` queue. NOTE: engine.config.yaml @@ -50,6 +51,14 @@ export type TurnStore = { writeRecord(rec: TurnStateRecord): Promise; ensureSession(session_id: string): Promise; loadMessages(session_id: string): Promise; + /** + * Function_call_ids in the trailing `function_result` run, for batch dedup. + * Reads the raw message list on the default leaf (not the compaction-rebuilt + * window {@link loadMessages} returns), so it skips the paired + * `session-tree::compactions` read — the trailing results live in the tail + * either way. + */ + loadTrailingResultIds(session_id: string): Promise>; appendMessages(session_id: string, msgs: AgentMessage[]): Promise; loadRunRequest(session_id: string): Promise; saveRunRequest(session_id: string, request: RunRequest): Promise; @@ -147,6 +156,17 @@ export function createTurnStore(iii: ISdk): TurnStore { return loadContextView(iii, session_id); }, + async loadTrailingResultIds(session_id) { + // Same response (and same `?? []` for an empty/new session) as + // loadContextView, the sibling reader of session-tree::messages. + const resp = await iii.trigger }>({ + function_id: 'session-tree::messages', + payload: { session_id }, + }); + const messages = (resp?.messages ?? []).map((e) => e.message); + return persistedTrailingResultIds(messages); + }, + async appendMessages(session_id, msgs) { for (const message of msgs) { await iii.trigger({ diff --git a/harness/src/turn-orchestrator/state-runtime/turn-end.ts b/harness/src/turn-orchestrator/state-runtime/turn-end.ts index d2be3af2..a7e9c763 100644 --- a/harness/src/turn-orchestrator/state-runtime/turn-end.ts +++ b/harness/src/turn-orchestrator/state-runtime/turn-end.ts @@ -2,11 +2,15 @@ * Shared turn-end and FSM resume helpers for step outcome application. */ +import { limitFromModel } from '../../context-compaction/model-resolver.js'; +import type { AgentEvent, ModelContextLimit } from '../../types/agent-event.js'; import { emptyAssistant, + type AgentMessage, type AssistantMessage, type FunctionResultMessage, } from '../../types/agent-message.js'; +import { syntheticAssistant } from '../synthetic-assistant.js'; import { transitionTo, type TurnStateRecord } from '../state.js'; export type TurnEndEmitter = { @@ -14,6 +18,7 @@ export type TurnEndEmitter = { session_id: string, message: AssistantMessage, function_results: FunctionResultMessage[], + model_limit?: ModelContextLimit, ): Promise; }; @@ -25,7 +30,9 @@ export async function emitTurnEndOnce( ): Promise { if (rec.turn_end_emitted) return; const last = message ?? rec.last_assistant ?? emptyAssistant(); - await ports.emitTurnEnd(rec.session_id, last, function_results); + // Thread the turn's resolved limit so compaction skips its own models::get. + const model_limit = rec.model_meta ? limitFromModel(rec.model_meta) : undefined; + await ports.emitTurnEnd(rec.session_id, last, function_results, model_limit); rec.turn_end_emitted = true; } @@ -33,3 +40,36 @@ export function resumeToAssistantStreaming(rec: TurnStateRecord): void { rec.function_results = []; transitionTo(rec, 'assistant_streaming'); } + +export function maxTurnsReached(rec: TurnStateRecord): boolean { + return rec.max_turns !== undefined && rec.turn_count >= rec.max_turns; +} + +export type MaxTurnsEndPorts = TurnEndEmitter & { + appendMessages(session_id: string, msgs: AgentMessage[]): Promise; + emit(session_id: string, event: AgentEvent): Promise; + finishSession(rec: TurnStateRecord): Promise; +}; + +/** + * End the loop at the max_turns cap: persist + surface a synthetic assistant + * notice, emit turn_end (no-op when the step already emitted it), finish. + */ +export async function endTurnForMaxTurns( + ports: MaxTurnsEndPorts, + rec: TurnStateRecord, +): Promise { + const msg = syntheticAssistant({ + stop_reason: 'end', + text: `loop stopped: max_turns (${rec.max_turns ?? 0}) reached`, + }); + rec.last_assistant = msg; + await ports.appendMessages(rec.session_id, [msg]); + await ports.emit(rec.session_id, { + type: 'message_complete', + message: msg, + body_streamed: false, + }); + await emitTurnEndOnce(ports, rec, msg); + await ports.finishSession(rec); +} diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index 32785f87..a2977392 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -7,6 +7,7 @@ */ import { z } from 'zod'; +import type { Model } from '../models-catalog/types.js'; import type { AssistantMessage, FunctionResultMessage } from '../types/agent-message.js'; import type { ExecutedCall, FunctionBatchWork, PreparedCall } from './function-execute/types.js'; @@ -47,6 +48,15 @@ type TurnStateRecordCore = { updated_at_ms: number; /** Set during assistant_streaming when message_update deltas were emitted. */ assistant_body_streamed?: boolean; + /** + * Full catalog entry for this turn's model, resolved once at `provisioning` + * and read by preflight, provider streaming, and turn-end compaction instead + * of each re-fetching `models::get`. Optional: absent on records persisted + * before this field existed (and on a cold catalog) — every reader falls back + * to a live fetch. The model is fixed for a turn, so no invalidation is + * needed. Excluded from {@link toView}, so it never bloats turn_state events. + */ + model_meta?: Model; error?: { kind: string; message: string }; }; diff --git a/harness/src/turn-orchestrator/steering-check/process.ts b/harness/src/turn-orchestrator/steering-check/process.ts index 83c1d133..06d734a6 100644 --- a/harness/src/turn-orchestrator/steering-check/process.ts +++ b/harness/src/turn-orchestrator/steering-check/process.ts @@ -28,7 +28,7 @@ export function register(iii: ISdk): void { }, { description: - 'Run one durable FSM transition for session in state steering_check: continue after tool results or end the turn.', + 'DEPRECATED compat drain (steering_check removal, release A): routing is inlined into assistant_streaming/function_execute; this handler only drains records persisted in state steering_check before the deploy. Removed in release B once the queue is confirmed empty.', }, ); } diff --git a/harness/src/turn-orchestrator/steering-check/run.ts b/harness/src/turn-orchestrator/steering-check/run.ts index 8763ec12..980aec90 100644 --- a/harness/src/turn-orchestrator/steering-check/run.ts +++ b/harness/src/turn-orchestrator/steering-check/run.ts @@ -1,9 +1,20 @@ /** * Route steering_check outcomes and apply transitions. + * + * COMPAT (release A of the steering_check removal): nothing transitions into + * the `steering_check` state anymore — its routing is inlined into + * assistant-streaming finalizeAssistantTurn and function-execute finalizeBatch. + * This module stays registered for one release so queue messages / records + * persisted in `steering_check` at deploy time drain instead of DLQ-wedging + * the turn. Release B removes the state, schemas, and this registration. */ -import { syntheticAssistant } from '../synthetic-assistant.js'; -import { emitTurnEndOnce, resumeToAssistantStreaming } from '../state-runtime/turn-end.js'; +import { + emitTurnEndOnce, + endTurnForMaxTurns, + maxTurnsReached, + resumeToAssistantStreaming, +} from '../state-runtime/turn-end.js'; import type { SteeringCheckTurnRecord } from '../state.js'; import type { SteeringCheckPorts } from './ports.js'; @@ -18,29 +29,6 @@ export function route(has_function_results: boolean): SteeringRoute { return has_function_results ? 'continue_after_function' : 'end_turn'; } -function maxTurnsReached(rec: SteeringCheckTurnRecord): boolean { - return rec.max_turns !== undefined && rec.turn_count >= rec.max_turns; -} - -async function endForMaxTurns( - ports: SteeringCheckPorts, - rec: SteeringCheckTurnRecord, -): Promise { - const msg = syntheticAssistant({ - stop_reason: 'end', - text: `loop stopped: max_turns (${rec.max_turns ?? 0}) reached`, - }); - rec.last_assistant = msg; - await ports.appendMessages(rec.session_id, [msg]); - await ports.emit(rec.session_id, { - type: 'message_complete', - message: msg, - body_streamed: false, - }); - await emitTurnEndOnce(ports, rec, msg); - await ports.finishSession(rec); -} - export async function processSteeringCheck( _ports: SteeringCheckPorts, rec: SteeringCheckTurnRecord, @@ -66,7 +54,7 @@ export async function applySteeringCheckOutcome( ): Promise { switch (outcome.kind) { case 'max_turns_reached': - await endForMaxTurns(ports, rec); + await endTurnForMaxTurns(ports, rec); return; case 'continue_after_function': resumeToAssistantStreaming(rec); diff --git a/harness/src/types/agent-event.ts b/harness/src/types/agent-event.ts index 32705310..b69e8095 100644 --- a/harness/src/types/agent-event.ts +++ b/harness/src/types/agent-event.ts @@ -10,12 +10,22 @@ import type { AgentMessage, FunctionResultMessage } from './agent-message.js'; import type { FunctionResult } from './function.js'; import type { AssistantMessageEvent } from './stream-event.js'; +/** A model's context limits, in tokens. Produced once per turn by the orchestrator. */ +export type ModelContextLimit = { context: number; input: number; output: number }; + export type AgentEvent = | { type: 'agent_end'; messages: AgentMessage[] } | { type: 'turn_end'; message: AgentMessage; function_results: FunctionResultMessage[]; + /** + * Turn's model context limit, threaded from the orchestrator so the + * compaction worker reads it inline instead of re-fetching `models::get`. + * Optional: absent only when the catalog had no entry to resolve at turn + * start, where compaction falls back to resolving the model itself. + */ + model_limit?: ModelContextLimit; } | { type: 'message_update'; diff --git a/harness/src/types/provider.ts b/harness/src/types/provider.ts index b11535df..031b2a67 100644 --- a/harness/src/types/provider.ts +++ b/harness/src/types/provider.ts @@ -6,6 +6,7 @@ import { z } from 'zod'; import { zodToJsonSchema } from 'zod-to-json-schema'; +import { ModelSchema } from '../models-catalog/types.js'; /** * Lax `StreamChannelRef` — the SDK's canonical type lives in `iii-sdk`, @@ -44,6 +45,20 @@ export const ProviderStreamInputSchema = z.object({ * `reasoning_effort`); others ignore it. Absent = off. */ thinking_level: z.string().optional(), + /** + * Optional pre-resolved catalog entry for `model`, threaded from the + * orchestrator so the provider does not re-fetch `models::get` it already + * resolved at turn start. Providers MUST ignore it unless `model_meta.id` + * and `model_meta.provider` match this request, and fall back to a live + * fetch otherwise — the field is an optimization, never a source of truth. + */ + model_meta: ModelSchema.optional(), + /** + * Optional stable id for the turn (the run's start time). Providers may use + * it to dedupe per-stream credential resolution within a turn; a new turn + * carries a new key. Purely an optimization key — never a source of truth. + */ + resolution_key: z.number().optional(), }); export type ProviderStreamInput = z.infer; diff --git a/harness/tests/context-compaction/handler-async.test.ts b/harness/tests/context-compaction/handler-async.test.ts index ef7c9c08..23dd4c6d 100644 --- a/harness/tests/context-compaction/handler-async.test.ts +++ b/harness/tests/context-compaction/handler-async.test.ts @@ -1,5 +1,10 @@ -import { describe, expect, it } from 'vitest'; -import { extractEventPayload, turnEndUsage } from '../../src/context-compaction/handler-async.js'; +import { describe, expect, it, vi } from 'vitest'; +import { + extractEventPayload, + resolveModelFromEvent, + turnEndUsage, +} from '../../src/context-compaction/handler-async.js'; +import type { ISdk } from '../../src/runtime/iii.js'; describe('extractEventPayload', () => { it('handles camelCase envelope (event.data shape)', () => { @@ -54,3 +59,47 @@ describe('turnEndUsage', () => { expect(turnEndUsage({ type: 'TurnEnd' })).toBeNull(); }); }); + +describe('resolveModelFromEvent', () => { + function trackingIii(): { iii: ISdk; calls: string[] } { + const calls: string[] = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string }) => { + calls.push(req.function_id); + // Stand-in for a models::get fallback result. + return { context_window: 200_000, max_output_tokens: 8_096 }; + }), + } as unknown as ISdk; + return { iii, calls }; + } + + it('uses the inline limit and skips models::get', async () => { + const { iii, calls } = trackingIii(); + const event = { + type: 'turn_end', + message: { provider: 'anthropic', model: 'claude-sonnet-4-6' }, + model_limit: { context: 1_000_000, input: 1_000_000, output: 64_000 }, + }; + + const resolved = await resolveModelFromEvent(iii, 'sess-1', event); + + expect(calls).not.toContain('models::get'); + expect(resolved).toEqual({ + providerID: 'anthropic', + modelID: 'claude-sonnet-4-6', + modelLimit: { context: 1_000_000, input: 1_000_000, output: 64_000 }, + }); + }); + + it('falls back to models::get when no inline limit is present', async () => { + const { iii, calls } = trackingIii(); + const event = { + type: 'turn_end', + message: { provider: 'anthropic', model: 'claude-sonnet-4-6' }, + }; + + await resolveModelFromEvent(iii, 'sess-1', event); + + expect(calls).toContain('models::get'); + }); +}); diff --git a/harness/tests/integration/mode-approval.e2e.test.ts b/harness/tests/integration/mode-approval.e2e.test.ts index 3a4801bd..425ffdc6 100644 --- a/harness/tests/integration/mode-approval.e2e.test.ts +++ b/harness/tests/integration/mode-approval.e2e.test.ts @@ -50,7 +50,7 @@ describe('mode-driven approval e2e (real consultBefore)', () => { await h.runExecute('sess-full'); const rec = h.loadTurnRecord('sess-full'); - expect(rec?.state).toBe('steering_check'); + expect(rec?.state).toBe('assistant_streaming'); expect(rec?.awaiting_approval).toEqual([]); expect(rec?.work).toBeUndefined(); expect(h.stateStore.has('approvals/sess-full/fc-1')).toBe(false); @@ -68,7 +68,7 @@ describe('mode-driven approval e2e (real consultBefore)', () => { await h.runExecute('sess-grant'); const rec = h.loadTurnRecord('sess-grant'); - expect(rec?.state).toBe('steering_check'); + expect(rec?.state).toBe('assistant_streaming'); expect(rec?.awaiting_approval).toEqual([]); expect(executionEvents(h.emitted, 'function_execution_end', 'fc-1')).toHaveLength(1); }); @@ -84,7 +84,7 @@ describe('mode-driven approval e2e (real consultBefore)', () => { await h.runExecute('sess-auto'); expect(awaitingIds(h, 'sess-auto')).toEqual([]); - expect(h.loadTurnRecord('sess-auto')?.state).toBe('steering_check'); + expect(h.loadTurnRecord('sess-auto')?.state).toBe('assistant_streaming'); }); it('manual mode with no grant parks the call (falls through to policy → needs_approval)', async () => { @@ -145,10 +145,21 @@ describe('mode-driven approval e2e (real consultBefore)', () => { const rec = h.loadTurnRecord('sess-escalate'); // Denied up front: not parked, not executed, batch finalizes with an - // error result carrying the human_only_function denial. + // error result carrying the human_only_function denial. The result + // travels on turn_end; the inline resume clears the record. expect(rec?.awaiting_approval).toEqual([]); - expect(rec?.state).toBe('steering_check'); - const denied = rec?.function_results?.find((r) => r.function_call_id === 'fc-1'); + expect(rec?.state).toBe('assistant_streaming'); + expect(rec?.function_results).toEqual([]); + const turnEnd = h.emitted.find((e) => e.type === 'turn_end') as + | { + function_results: Array<{ + function_call_id: string; + is_error: boolean; + details: unknown; + }>; + } + | undefined; + const denied = turnEnd?.function_results.find((r) => r.function_call_id === 'fc-1'); expect(denied?.is_error).toBe(true); expect(JSON.stringify(denied?.details)).toContain('human_only_function'); // No real mode change leaked into settings. diff --git a/harness/tests/integration/parallel-approval.e2e.test.ts b/harness/tests/integration/parallel-approval.e2e.test.ts index 395f95b6..f14bc466 100644 --- a/harness/tests/integration/parallel-approval.e2e.test.ts +++ b/harness/tests/integration/parallel-approval.e2e.test.ts @@ -120,7 +120,7 @@ describe('parallel approval e2e', () => { await h.resolveApproval('sess-order', 'fc-1', 'allow'); rec = h.loadTurnRecord('sess-order'); expect(rec?.awaiting_approval).toEqual([]); - expect(rec?.state).toBe('steering_check'); + expect(rec?.state).toBe('assistant_streaming'); expect(rec?.work).toBeUndefined(); }); @@ -254,7 +254,7 @@ describe('parallel approval e2e', () => { expect(executionEvents(h.emitted, 'function_execution_end', 'fc-driver')).toHaveLength(1); expect(executionEvents(h.emitted, 'function_execution_end', 'fc-late')).toHaveLength(1); expect(rec?.awaiting_approval).toEqual([]); - expect(rec?.state).toBe('steering_check'); + expect(rec?.state).toBe('assistant_streaming'); }); it('re-enqueues a follow-up wake when a resolved call leaves siblings pending', async () => { @@ -298,7 +298,7 @@ describe('parallel approval e2e', () => { decision: 'allow', reason: null, }); - expect(h.loadTurnRecord('sess-wake')?.state).toBe('steering_check'); + expect(h.loadTurnRecord('sess-wake')?.state).toBe('assistant_streaming'); expect(h.loadTurnRecord('sess-wake')?.work).toBeUndefined(); }); }); diff --git a/harness/tests/provider-anthropic/auth.test.ts b/harness/tests/provider-anthropic/auth.test.ts index 395efd0b..c9b2119c 100644 --- a/harness/tests/provider-anthropic/auth.test.ts +++ b/harness/tests/provider-anthropic/auth.test.ts @@ -1,6 +1,14 @@ -import { beforeEach, describe, expect, it, vi } from 'vitest'; -import { buildConfig } from '../../src/provider-anthropic/auth.js'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { Model } from '../../src/models-catalog/types.js'; +import { + _resetProviderResolveCacheForTests, + buildConfig, + invalidateProviderResolveCache, +} from '../../src/provider-anthropic/auth.js'; import type { WorkerConfig } from '../../src/provider-anthropic/config.js'; +import { streamAnthropic } from '../../src/provider-anthropic/stream.js'; +import { buildThinkingConfig } from '../../src/provider-anthropic/thinking.js'; +import type { AnthropicConfig } from '../../src/provider-anthropic/types.js'; import type { ISdk } from '../../src/runtime/iii.js'; const WORKER: WorkerConfig = { @@ -38,6 +46,31 @@ function iiiWithCatalog(entry: unknown, opts: { throws?: boolean } = {}): ISdk { return { trigger } as unknown as ISdk; } +/** Count models::get calls so we can prove the pre-resolved path skips the fetch. */ +function iiiCountingCatalog(entry: unknown): { iii: ISdk; modelsGetCalls: () => number } { + let n = 0; + const trigger = vi.fn().mockImplementation(async (req: { function_id: string }) => { + if (req.function_id === 'models::get') { + n++; + return entry; + } + return null; + }); + return { iii: { trigger } as unknown as ISdk, modelsGetCalls: () => n }; +} + +const THINKING_MODEL: Model = { + id: 'claude-sonnet-4-6', + provider: 'anthropic', + api: 'anthropic-messages', + display_name: 'Claude Sonnet 4.6', + context_window: 1_000_000, + max_output_tokens: 64_000, + supports_thinking: true, + supports_xhigh: true, + thinking_budgets: { minimal: 2_000, low: 4_000, medium: 8_000, high: 16_000 }, +}; + describe('buildConfig max_tokens resolution', () => { beforeEach(() => { resolveProviderMock.mockReset(); @@ -98,3 +131,150 @@ describe('buildConfig max_tokens resolution', () => { ); }); }); + +describe('buildConfig pre-resolved model threading', () => { + beforeEach(() => { + resolveProviderMock.mockReset(); + resolveProviderMock.mockResolvedValue(resolved(null)); + }); + + it('uses the pre-resolved model and skips models::get', async () => { + const { iii, modelsGetCalls } = iiiCountingCatalog({ id: 'nope', max_output_tokens: 1 }); + + const cfg = await buildConfig(iii, WORKER, 'claude-sonnet-4-6', THINKING_MODEL); + + expect(modelsGetCalls()).toBe(0); + expect(cfg.catalog).toEqual(THINKING_MODEL); + expect(cfg.max_tokens).toBe(32_000); // min(64k, 32k cap) + }); + + it('fetches models::get when no pre-resolved model is threaded', async () => { + const { iii, modelsGetCalls } = iiiCountingCatalog({ + id: 'claude-sonnet-4-6', + max_output_tokens: 64_000, + }); + + const cfg = await buildConfig(iii, WORKER, 'claude-sonnet-4-6'); + + expect(modelsGetCalls()).toBe(1); + expect(cfg.catalog?.id).toBe('claude-sonnet-4-6'); + }); + + it.each([ + 'high', + 'xhigh', + ])('produces a byte-identical thinking config (%s) with vs without the pre-resolved model', async (level) => { + // Fetch path: models::get returns the same Model. + const fetched = await buildConfig(iiiWithCatalog(THINKING_MODEL), WORKER, 'claude-sonnet-4-6'); + // Threaded path: the same Model passed in, models::get not consulted. + const { iii } = iiiCountingCatalog({ id: 'nope' }); + const threaded = await buildConfig(iii, WORKER, 'claude-sonnet-4-6', THINKING_MODEL); + + const fetchedThinking = buildThinkingConfig(level, fetched.max_tokens, fetched.catalog); + const threadedThinking = buildThinkingConfig(level, threaded.max_tokens, threaded.catalog); + + expect(threadedThinking).toEqual(fetchedThinking); + expect(threadedThinking).toBeDefined(); + }); +}); + +function anthropicCfg(): AnthropicConfig { + return { + credential_value: 'sk-test', + model: 'claude-sonnet-4-6', + max_tokens: 32_000, + api_url: 'https://api.example/v1/messages', + auth_mode: 'api_key', + }; +} + +describe('buildConfig per-turn credential resolution cache', () => { + beforeEach(() => { + resolveProviderMock.mockReset(); + resolveProviderMock.mockResolvedValue(resolved(null)); + _resetProviderResolveCacheForTests(); + }); + + it('resolves once per turn key and reuses it across streams', async () => { + const iii = iiiWithCatalog({ id: 'claude-sonnet-4-6', max_output_tokens: 64_000 }); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + expect(resolveProviderMock).toHaveBeenCalledTimes(1); + }); + + it('re-resolves when the turn key changes (next user turn)', async () => { + const iii = iiiWithCatalog({ id: 'claude-sonnet-4-6', max_output_tokens: 64_000 }); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 200); + expect(resolveProviderMock).toHaveBeenCalledTimes(2); + }); + + it('does not cache when no turn key is threaded', async () => { + const iii = iiiWithCatalog({ id: 'claude-sonnet-4-6', max_output_tokens: 64_000 }); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6'); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6'); + expect(resolveProviderMock).toHaveBeenCalledTimes(2); + }); + + it('invalidateProviderResolveCache forces a re-resolve on the same key', async () => { + const iii = iiiWithCatalog({ id: 'claude-sonnet-4-6', max_output_tokens: 64_000 }); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + invalidateProviderResolveCache(); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + expect(resolveProviderMock).toHaveBeenCalledTimes(2); + }); + + describe('401 invalidation via streamAnthropic', () => { + afterEach(() => { + vi.unstubAllGlobals(); + }); + + it('a 401 stream drops the cache so the next turn re-resolves the credential', async () => { + const iii = iiiWithCatalog({ id: 'claude-sonnet-4-6', max_output_tokens: 64_000 }); + // Prime the cache for turn key 100. + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + expect(resolveProviderMock).toHaveBeenCalledTimes(1); + + // A stream hitting 401 must invalidate the cached resolution. + vi.stubGlobal( + 'fetch', + vi.fn(async () => new Response('unauthorized', { status: 401 })), + ); + for await (const _ev of streamAnthropic({ + cfg: anthropicCfg(), + system_prompt: '', + messages: [], + tools: [], + })) { + // drain + } + + // Same turn key now re-resolves because the 401 cleared the cache. + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + expect(resolveProviderMock).toHaveBeenCalledTimes(2); + }); + + it('a 200 stream leaves the cache intact', async () => { + const iii = iiiWithCatalog({ id: 'claude-sonnet-4-6', max_output_tokens: 64_000 }); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + + vi.stubGlobal( + 'fetch', + vi.fn(async () => new Response('data: {"type":"message_stop"}\n\n', { status: 200 })), + ); + for await (const _ev of streamAnthropic({ + cfg: anthropicCfg(), + system_prompt: '', + messages: [], + tools: [], + })) { + // drain + } + + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + expect(resolveProviderMock).toHaveBeenCalledTimes(1); + }); + }); +}); diff --git a/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts b/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts index 1da608fc..2fd5e54b 100644 --- a/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts +++ b/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts @@ -24,6 +24,7 @@ export function mockTurnStore(overrides: Partial = {}): MockTurnStore writeRecord: vi.fn(async () => {}), ensureSession: vi.fn(async () => {}), loadMessages: vi.fn(async () => []), + loadTrailingResultIds: vi.fn(async () => new Set()), appendMessages: vi.fn(async () => {}), loadRunRequest: vi.fn(async () => defaultRunRequest), saveRunRequest: vi.fn(async () => {}), diff --git a/harness/tests/turn-orchestrator/assistant-streaming.test.ts b/harness/tests/turn-orchestrator/assistant-streaming.test.ts index 36edfab2..7b6c1bec 100644 --- a/harness/tests/turn-orchestrator/assistant-streaming.test.ts +++ b/harness/tests/turn-orchestrator/assistant-streaming.test.ts @@ -125,8 +125,8 @@ describe('routeAssistantTurn', () => { ).toBe('function_execute'); }); - it('routes text-only assistants to steering_check', () => { - expect(routeAssistantTurn(assistant()).kind).toBe('steering_check'); + it('routes text-only assistants to end_turn', () => { + expect(routeAssistantTurn(assistant()).kind).toBe('end_turn'); }); }); diff --git a/harness/tests/turn-orchestrator/assistant.test.ts b/harness/tests/turn-orchestrator/assistant.test.ts index b70e61e6..f8693f10 100644 --- a/harness/tests/turn-orchestrator/assistant.test.ts +++ b/harness/tests/turn-orchestrator/assistant.test.ts @@ -154,7 +154,7 @@ describe('handleStreaming', () => { expect(rec.work?.prepared).toHaveLength(1); }); - it('routes to steering_check when the assistant made no calls', async () => { + it('ends the turn inline when the assistant made no calls', async () => { const finalMsg = assistant({ content: [{ type: 'text', text: 'done reply' }] }); const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const { iii } = fakeIiiWithDone(finalMsg); @@ -164,11 +164,13 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); - expect(rec.state).toBe('steering_check'); + // No-function-call turns finish inline (formerly the steering_check hop). + expect(rec.state).toBe('stopped'); + expect(rec.turn_end_emitted).toBe(true); expect(rec.last_assistant).toEqual(finalMsg); }); - it('captures provider done frame and routes correctly (text-only → steering_check)', async () => { + it('captures provider done frame and routes correctly (text-only → end of turn)', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const finalMsg = assistant({ content: [{ type: 'text', text: 'done reply' }] }); let deliver: ((msg: string) => void) | null = null; @@ -199,7 +201,7 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); - expect(rec.state).toBe('steering_check'); + expect(rec.state).toBe('stopped'); expect(rec.last_assistant).toEqual(finalMsg); }); diff --git a/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts index 4368ea46..82085ccd 100644 --- a/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts +++ b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts @@ -113,14 +113,19 @@ describe('handleAwaitingApproval', () => { loadMessages: vi.fn(async () => []), appendMessages: vi.fn(async () => {}), }); - vi.spyOn(events, 'emit').mockResolvedValue(undefined); + const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleAwaitingApproval(iii, rec); expect(rec.awaiting_approval).toEqual([]); - expect(rec.state).toBe('steering_check'); + expect(rec.state).toBe('assistant_streaming'); expect(rec.work).toBeUndefined(); - expect(rec.function_results).toHaveLength(1); + // Results travel on turn_end; the inline resume clears the record. + expect(rec.function_results).toEqual([]); + const turnEnd = emitSpy.mock.calls.find((call) => call[2]?.type === 'turn_end')?.[2] as + | { function_results: unknown[] } + | undefined; + expect(turnEnd?.function_results).toHaveLength(1); }); it('leaves state parked when awaiting entries remain undecided', async () => { diff --git a/harness/tests/turn-orchestrator/function-execute.test.ts b/harness/tests/turn-orchestrator/function-execute.test.ts index fa6fc90b..1ff0529d 100644 --- a/harness/tests/turn-orchestrator/function-execute.test.ts +++ b/harness/tests/turn-orchestrator/function-execute.test.ts @@ -8,10 +8,14 @@ import { finalizeBatch, runOneCall, } from '../../src/turn-orchestrator/function-execute/run.js'; -import { withRoutingEnvelope } from '../../src/turn-orchestrator/function-execute/ports.js'; +import { + createPorts, + withRoutingEnvelope, +} from '../../src/turn-orchestrator/function-execute/ports.js'; import type { FunctionExecutePorts } from '../../src/turn-orchestrator/function-execute/ports.js'; import type { ExecutedCall } from '../../src/turn-orchestrator/function-execute/types.js'; import { newRecord } from '../../src/turn-orchestrator/state.js'; +import type { ISdk } from '../../src/runtime/iii.js'; import type { AssistantMessage } from '../../src/types/agent-message.js'; function makeAssistant( @@ -39,6 +43,7 @@ function stubPorts(overrides: Partial = {}): FunctionExecu return { emitStart: vi.fn(async () => {}), emitEnd: vi.fn(async () => {}), + emit: vi.fn(async () => {}), checkpoint: vi.fn(async () => {}), dispatch: vi.fn(async () => ({ kind: 'result' as const, @@ -49,6 +54,7 @@ function stubPorts(overrides: Partial = {}): FunctionExecu details: {}, })), loadMessages: vi.fn(async () => []), + loadTrailingResultIds: vi.fn(async () => new Set()), appendMessages: vi.fn(async () => {}), emitTurnEnd: vi.fn(async () => {}), finishSession: vi.fn(async (rec) => { @@ -184,17 +190,8 @@ describe('finalizeBatch', () => { const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; const appendMessages = vi.fn(async () => {}); const ports = stubPorts({ - loadMessages: vi.fn(async () => [ - { - role: 'function_result' as const, - function_call_id: 'fc-1', - function_id: 'shell::run', - content: [{ type: 'text' as const, text: 'existing' }], - details: {}, - is_error: false, - timestamp: 1, - }, - ]), + // fc-1 already lives in the trailing result run → its id is "persisted". + loadTrailingResultIds: vi.fn(async () => new Set(['fc-1'])), appendMessages, }); const rec = newRecord('s1'); @@ -215,6 +212,116 @@ describe('finalizeBatch', () => { await finalizeBatch(ports, rec); expect(appendMessages).not.toHaveBeenCalled(); - expect(rec.state).toBe('steering_check'); + expect(rec.state).toBe('assistant_streaming'); + }); + + // Inline replacements for the removed turn::steering_check hop. + it('resumes to assistant_streaming with cleared results when a result continues', async () => { + const ports = stubPorts(); + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + const rec = newRecord('s1'); + enterFunctionExecute(rec, makeAssistant([fc])); + rec.state = 'function_execute'; + rec.work = { + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {} }, + is_error: false, + duration_ms: 1, + }, + }, + }; + + await finalizeBatch(ports, rec); + + expect(rec.state).toBe('assistant_streaming'); + expect(rec.function_results).toEqual([]); + expect(rec.turn_end_emitted).toBe(true); + expect(ports.finishSession).not.toHaveBeenCalled(); + }); + + it('ends the loop inline at the max_turns cap instead of resuming', async () => { + const appendMessages = vi.fn(async () => {}); + const emit = vi.fn(async () => {}); + const ports = stubPorts({ appendMessages, emit }); + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + const rec = { ...newRecord('s1', 2), turn_count: 2 }; + enterFunctionExecute(rec, makeAssistant([fc])); + rec.state = 'function_execute'; + rec.work = { + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {} }, + is_error: false, + duration_ms: 1, + }, + }, + }; + + await finalizeBatch(ports, rec); + + expect(rec.state).toBe('stopped'); + expect(ports.finishSession).toHaveBeenCalledOnce(); + // Two appends: the batch's function_results, then the synthetic notice. + expect(appendMessages).toHaveBeenCalledTimes(2); + const appended = appendMessages.mock.calls[1]?.[1] as Array<{ content: unknown[] }>; + expect(JSON.stringify(appended)).toContain('max_turns (2) reached'); + expect( + emit.mock.calls.some((call) => (call[1] as { type: string })?.type === 'message_complete'), + ).toBe(true); + }); +}); + +describe('createPorts().loadTrailingResultIds', () => { + function trackingIii(messages: Array<{ message: unknown }>): { + iii: ISdk; + calls: Array<{ function_id: string; payload: unknown }>; + } { + const calls: Array<{ function_id: string; payload: unknown }> = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { + calls.push({ function_id: req.function_id, payload: req.payload }); + if (req.function_id === 'session-tree::messages') return { messages }; + return null; + }), + } as unknown as ISdk; + return { iii, calls }; + } + + const resultMsg = (id: string) => ({ + message: { + role: 'function_result' as const, + function_call_id: id, + function_id: 'shell::run', + content: [], + details: {}, + is_error: false, + timestamp: 1, + }, + }); + + it('reads only session-tree::messages (no compactions) on the default leaf', async () => { + const { iii, calls } = trackingIii([resultMsg('fc-1'), resultMsg('fc-2')]); + + const ids = await createPorts(iii).loadTrailingResultIds('s1'); + + expect(ids).toEqual(new Set(['fc-1', 'fc-2'])); + expect(calls.map((c) => c.function_id)).toEqual(['session-tree::messages']); + expect(calls.some((c) => c.function_id === 'session-tree::compactions')).toBe(false); + // Default leaf: no branch_leaf in the payload. + expect(calls[0]?.payload).toEqual({ session_id: 's1' }); + }); + + it('returns only the trailing result run (stops at the first non-result)', async () => { + const assistant = { message: { role: 'assistant' as const } }; + const { iii } = trackingIii([resultMsg('old'), assistant, resultMsg('fc-1')]); + + const ids = await createPorts(iii).loadTrailingResultIds('s1'); + + expect(ids).toEqual(new Set(['fc-1'])); }); }); diff --git a/harness/tests/turn-orchestrator/functions.test.ts b/harness/tests/turn-orchestrator/functions.test.ts index 54a36fe5..0d091862 100644 --- a/harness/tests/turn-orchestrator/functions.test.ts +++ b/harness/tests/turn-orchestrator/functions.test.ts @@ -10,7 +10,8 @@ import { parseApprovalDecision } from '../../src/turn-orchestrator/function-awai import { handleExecute } from '../../src/turn-orchestrator/function-execute/process.js'; import { enterFunctionExecute } from '../../src/turn-orchestrator/function-execute/run.js'; import type { FunctionBatchWork } from '../../src/turn-orchestrator/function-execute/types.js'; -import type { AssistantMessage } from '../../src/types/agent-message.js'; +import { persistedTrailingResultIds } from '../../src/turn-orchestrator/state-runtime/transcript.js'; +import type { AgentMessage, AssistantMessage } from '../../src/types/agent-message.js'; afterEach(() => { vi.restoreAllMocks(); @@ -108,6 +109,7 @@ describe('handleExecute new flow', () => { terminate: false, }, }); + const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); enterFunctionExecute( @@ -118,9 +120,15 @@ describe('handleExecute new flow', () => { mockFinalizePersistence(); await handleExecute(iii, rec); - expect(rec.state).toBe('steering_check'); - expect(rec.function_results).toHaveLength(1); - expect(rec.function_results[0]?.function_call_id).toBe('fc-1'); + // Resumed inline for the next round-trip (formerly via the steering_check + // hop); the batch results travel on turn_end, the record is cleared. + expect(rec.state).toBe('assistant_streaming'); + expect(rec.function_results).toEqual([]); + const turnEnd = emitSpy.mock.calls.find((call) => call[2]?.type === 'turn_end')?.[2] as + | { function_results: Array<{ function_call_id: string }> } + | undefined; + expect(turnEnd?.function_results).toHaveLength(1); + expect(turnEnd?.function_results[0]?.function_call_id).toBe('fc-1'); }); it('finishes the session when every function result terminates', async () => { @@ -242,13 +250,18 @@ describe('handleExecute new flow', () => { executed: {}, }); mockFinalizePersistence(); + const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); await expect(handleExecute(iii, rec)).resolves.toBeUndefined(); - expect(rec.state).toBe('steering_check'); - expect(rec.function_results).toHaveLength(1); - expect(rec.function_results[0]?.is_error).toBe(true); - const details = rec.function_results[0]?.details as Record; + // Denial result travels on turn_end; the record resumes with cleared results. + expect(rec.state).toBe('assistant_streaming'); + const turnEnd = emitSpy.mock.calls.find((call) => call[2]?.type === 'turn_end')?.[2] as + | { function_results: Array<{ is_error: boolean; details: Record }> } + | undefined; + expect(turnEnd?.function_results).toHaveLength(1); + expect(turnEnd?.function_results[0]?.is_error).toBe(true); + const details = turnEnd?.function_results[0]?.details as Record; expect(details?.status).toBe('denied'); expect(details?.denied_by).toBe('gate_unavailable'); expect(details?.function_id).toBe('shell::fs::write'); @@ -281,7 +294,7 @@ describe('handleExecute new flow', () => { (call) => (call[0] as { function_id: string }).function_id === 'shell::run', ); expect(shellCalls).toHaveLength(0); - expect(rec.state).toBe('steering_check'); + expect(rec.state).toBe('assistant_streaming'); }); it('replays persisted executed calls without re-dispatching (re-entry with pre-populated work.executed)', async () => { @@ -311,10 +324,10 @@ describe('handleExecute new flow', () => { await handleExecute(iii, rec); expect(dispatchSpy).not.toHaveBeenCalled(); - expect(rec.state).toBe('steering_check'); + expect(rec.state).toBe('assistant_streaming'); }); - it('transitions to steering_check after a successful hook dispatch', async () => { + it('resumes to assistant_streaming after a successful hook dispatch', async () => { vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ kind: 'result', result: { @@ -331,7 +344,7 @@ describe('handleExecute new flow', () => { mockFinalizePersistence(); await handleExecute(iii, rec); - expect(rec.state).toBe('steering_check'); + expect(rec.state).toBe('assistant_streaming'); }); it('emits turn lifecycle and sets turn_end_emitted when last_assistant is present', async () => { @@ -376,7 +389,7 @@ describe('handleExecute new flow', () => { await handleExecute(iii, rec); - expect(rec.state).toBe('steering_check'); + expect(rec.state).toBe('assistant_streaming'); expect(rec.turn_end_emitted).toBe(true); expect(emitSpy.mock.calls.some((call) => call[2]?.type === 'turn_end')).toBe(true); }); @@ -400,6 +413,9 @@ describe('handleExecute new flow', () => { let storedMessages: unknown[] = []; installMockTurnStore({ loadMessages: vi.fn(async () => storedMessages as never), + loadTrailingResultIds: vi.fn(async () => + persistedTrailingResultIds(storedMessages as AgentMessage[]), + ), appendMessages: vi.fn(async (_sid, msgs) => { storedMessages = [...storedMessages, ...msgs]; }), diff --git a/harness/tests/turn-orchestrator/preflight.test.ts b/harness/tests/turn-orchestrator/preflight.test.ts index 8ef858d3..03c99680 100644 --- a/harness/tests/turn-orchestrator/preflight.test.ts +++ b/harness/tests/turn-orchestrator/preflight.test.ts @@ -120,6 +120,45 @@ describe('runPreflight', () => { expect(result).toBe('ok'); }); + it('uses the pre-resolved model and skips models::get', async () => { + const { iii, calls } = makeIii({ + // If consulted, this tiny window would force a compact — proving a miss. + modelsGetResult: { context_window: 1, max_output_tokens: 0 }, + }); + const modelMeta = { + id: 'claude-3', + provider: 'anthropic', + api: 'anthropic-messages', + display_name: 'Claude 3', + context_window: 200_000, + max_output_tokens: 8_096, + }; + + const result = await runPreflight( + iii, + 'session-1', + [smallMessage], + 'anthropic', + 'claude-3', + modelMeta, + ); + + expect(result).toBe('ok'); + expect(calls.some((c) => c.function_id === 'models::get')).toBe(false); + expect(calls.some((c) => c.function_id === 'context-compaction::compact_now')).toBe(false); + }); + + it('fetches models::get when no pre-resolved model is threaded', async () => { + const { iii, calls } = makeIii({ + modelsGetResult: { context_window: 200_000, max_output_tokens: 8_096 }, + }); + + const result = await runPreflight(iii, 'session-1', [smallMessage], 'anthropic', 'claude-3'); + + expect(result).toBe('ok'); + expect(calls.some((c) => c.function_id === 'models::get')).toBe(true); + }); + it('passes session_id and model info to compact_now', async () => { const { iii, calls } = makeIii({ modelsGetResult: { context_window: 10, max_output_tokens: 0 }, diff --git a/harness/tests/turn-orchestrator/provider-router.test.ts b/harness/tests/turn-orchestrator/provider-router.test.ts index 59eed034..d69c0ccb 100644 --- a/harness/tests/turn-orchestrator/provider-router.test.ts +++ b/harness/tests/turn-orchestrator/provider-router.test.ts @@ -106,4 +106,39 @@ describe('buildInput', () => { const withoutLevel = buildInput(decision, writer, 'sys', [], []); expect(withoutLevel).not.toHaveProperty('thinking_level'); }); + + it('carries model_meta when provided and omits it when absent', () => { + const decision = { provider: 'anthropic', model: 'claude-sonnet-4-6' } as const; + const writer = { channel_id: 'c', access_key: 'k', direction: 'write' } as const; + const model_meta = { + id: 'claude-sonnet-4-6', + provider: 'anthropic', + api: 'anthropic-messages', + display_name: 'Claude Sonnet 4.6', + context_window: 1_000_000, + max_output_tokens: 64_000, + }; + const withMeta = buildInput(decision, writer, 'sys', [], [], undefined, model_meta); + expect(withMeta.model_meta).toEqual(model_meta); + const withoutMeta = buildInput(decision, writer, 'sys', [], []); + expect(withoutMeta).not.toHaveProperty('model_meta'); + }); + + it('carries resolution_key when provided and omits it when absent', () => { + const decision = { provider: 'anthropic', model: 'claude' } as const; + const writer = { channel_id: 'c', access_key: 'k', direction: 'write' } as const; + const withKey = buildInput( + decision, + writer, + 'sys', + [], + [], + undefined, + undefined, + 1738000000000, + ); + expect(withKey.resolution_key).toBe(1738000000000); + const withoutKey = buildInput(decision, writer, 'sys', [], []); + expect(withoutKey).not.toHaveProperty('resolution_key'); + }); }); diff --git a/harness/tests/turn-orchestrator/provisioning-layer.test.ts b/harness/tests/turn-orchestrator/provisioning-layer.test.ts index 18064dce..291226e5 100644 --- a/harness/tests/turn-orchestrator/provisioning-layer.test.ts +++ b/harness/tests/turn-orchestrator/provisioning-layer.test.ts @@ -1,9 +1,21 @@ import { describe, expect, it, vi } from 'vitest'; -import { applyProvisioningOutcome } from '../../src/turn-orchestrator/provisioning/process.js'; +import type { Model } from '../../src/models-catalog/types.js'; import type { ProvisioningPorts } from '../../src/turn-orchestrator/provisioning/ports.js'; -import { processProvisioning } from '../../src/turn-orchestrator/provisioning/process.js'; +import { + applyProvisioningOutcome, + processProvisioning, +} from '../../src/turn-orchestrator/provisioning/process.js'; import { newRecord } from '../../src/turn-orchestrator/state.js'; +const FAKE_MODEL: Model = { + id: 'gpt-4', + provider: 'openai', + api: 'openai-responses', + display_name: 'GPT-4', + context_window: 200_000, + max_output_tokens: 16_000, +}; + function stubPorts(overrides: Partial = {}): ProvisioningPorts { return { loadRunRequest: vi.fn(async () => ({ @@ -14,6 +26,7 @@ function stubPorts(overrides: Partial = {}): ProvisioningPort function_schemas: [], })), saveRunRequest: vi.fn(async () => {}), + resolveModel: vi.fn(async () => null), ...overrides, }; } @@ -57,10 +70,44 @@ describe('processProvisioning', () => { expect(outcome.runRequest.system_prompt).toBe('custom override'); }); + + it('resolves the model once against the DECIDED provider and returns it', async () => { + const resolveModel = vi.fn(async () => FAKE_MODEL); + const ports = stubPorts({ + // provider blank → decide() must resolve it to a concrete provider. + loadRunRequest: vi.fn(async () => ({ + provider: '', + model: 'gpt-4', + mode: null, + system_prompt: '', + function_schemas: [], + })), + resolveModel, + }); + const rec = { ...newRecord('s1'), state: 'provisioning' as const }; + + const outcome = await processProvisioning(ports, rec); + + expect(resolveModel).toHaveBeenCalledTimes(1); + // decide() maps a blank provider + 'gpt-4' → openai. + expect(resolveModel).toHaveBeenCalledWith('openai', 'gpt-4'); + expect(outcome.model_meta).toEqual(FAKE_MODEL); + }); + + it('does not resolve when there is no model id', async () => { + const resolveModel = vi.fn(async () => FAKE_MODEL); + const ports = stubPorts({ resolveModel }); + const rec = { ...newRecord('s1'), state: 'provisioning' as const }; + + const outcome = await processProvisioning(ports, rec); + + expect(resolveModel).not.toHaveBeenCalled(); + expect(outcome.model_meta).toBeNull(); + }); }); describe('applyProvisioningOutcome', () => { - it('saves run request and transitions to assistant_streaming', async () => { + it('saves run request, persists model_meta on the record, and transitions', async () => { const saveRunRequest = vi.fn(async () => {}); const ports = stubPorts({ saveRunRequest }); const rec = { ...newRecord('s1'), state: 'provisioning' as const }; @@ -72,9 +119,31 @@ describe('applyProvisioningOutcome', () => { function_schemas: [], }; - await applyProvisioningOutcome(ports, rec, { kind: 'ready', runRequest }); + await applyProvisioningOutcome(ports, rec, { + kind: 'ready', + runRequest, + model_meta: FAKE_MODEL, + }); expect(saveRunRequest).toHaveBeenCalledWith('s1', runRequest); + expect(rec.model_meta).toEqual(FAKE_MODEL); + expect(rec.state).toBe('assistant_streaming'); + }); + + it('leaves model_meta unset when resolution returned null', async () => { + const ports = stubPorts(); + const rec = { ...newRecord('s1'), state: 'provisioning' as const }; + const runRequest = { + provider: 'openai', + model: 'gpt-4', + mode: 'agent' as const, + system_prompt: 'prompt', + function_schemas: [], + }; + + await applyProvisioningOutcome(ports, rec, { kind: 'ready', runRequest, model_meta: null }); + + expect(rec.model_meta).toBeUndefined(); expect(rec.state).toBe('assistant_streaming'); }); }); diff --git a/harness/tests/turn-orchestrator/steering-check-layer.test.ts b/harness/tests/turn-orchestrator/steering-check-layer.test.ts index ad6f567a..a3fc4dc0 100644 --- a/harness/tests/turn-orchestrator/steering-check-layer.test.ts +++ b/harness/tests/turn-orchestrator/steering-check-layer.test.ts @@ -1,5 +1,4 @@ import { describe, expect, it, vi } from 'vitest'; -import type { AgentMessage } from '../../src/types/agent-message.js'; import { applySteeringCheckOutcome, processSteeringCheck, @@ -110,7 +109,8 @@ describe('applySteeringCheckOutcome', () => { expect(rec.state).toBe('stopped'); expect(rec.turn_end_emitted).toBe(true); - expect(emitTurnEnd).toHaveBeenCalledWith('s1', expect.anything(), []); + // 4th arg is the optional model_limit, undefined here (no model_meta on rec). + expect(emitTurnEnd).toHaveBeenCalledWith('s1', expect.anything(), [], undefined); expect(finishSession).toHaveBeenCalled(); }); }); From 3da73727f87619570837559e6388583e9de25a82 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Tue, 9 Jun 2026 16:41:16 -0300 Subject: [PATCH 02/11] perf(session): batch session-tree appends into one trigger per call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit appendMessages fired one session-tree::append trigger per message, and each append re-read the whole session (state::list) to resolve the active leaf and refreshed session meta — so persisting N messages cost N triggers, N full-session reads, and N meta writes. Add session-tree::append_batch: resolve the active leaf once, chain the messages in memory (each entry's parent is the previous), and write them through a new SessionStore.appendMany that refreshes meta once. The orchestrator's appendMessages now fires a single batch trigger. Behavior is identical to the serial loop (same parent chain, same order); batch entries get strictly increasing timestamps so the last entry stays the resolvable leaf for the next append. The per-message session-tree::append stays registered for other callers (compaction, tests). --- harness/src/session/tree/operations.ts | 40 ++++++++++++ harness/src/session/tree/register.ts | 24 +++++++ harness/src/session/tree/store.ts | 36 +++++++++++ .../turn-orchestrator/state-runtime/store.ts | 16 ++--- harness/tests/session/operations.test.ts | 63 ++++++++++++++++++- .../tests/turn-orchestrator/run-start.test.ts | 4 +- harness/tests/turn-orchestrator/store.test.ts | 22 ++++++- 7 files changed, 194 insertions(+), 11 deletions(-) diff --git a/harness/src/session/tree/operations.ts b/harness/src/session/tree/operations.ts index 69eb7c7e..513f13c1 100644 --- a/harness/src/session/tree/operations.ts +++ b/harness/src/session/tree/operations.ts @@ -86,6 +86,46 @@ export async function appendMessage( return id; } +/** + * Append several messages as one linear chain in a single store round-trip. + * Equivalent to calling {@link appendMessage} for each message in order, but + * resolves the active leaf once (not once per message) and writes through + * {@link SessionStore.appendMany} so meta is refreshed once. Entries get + * strictly increasing timestamps so the batch's last entry remains the + * resolvable leaf for the next append. + */ +export async function appendMessages( + store: SessionStore, + session_id: string, + parent_id: string | null, + messages: AgentMessage[], +): Promise { + if (messages.length === 0) return []; + + let parent = parent_id; + if (parent === null) { + const path = await activePath(store, session_id); + parent = path.at(-1) ?? null; + } + + const base = Date.now(); + const entries: SessionEntry[] = messages.map((message, i) => { + const id = randomUUID(); + const entry: SessionEntry = { + type: 'message', + id, + parent_id: parent, + message, + timestamp: base + i, + }; + parent = id; + return entry; + }); + + await store.appendMany(session_id, entries); + return entries.map((e) => e.id); +} + export async function activePath( store: SessionStore, session_id: string, diff --git a/harness/src/session/tree/register.ts b/harness/src/session/tree/register.ts index 558c960c..e7c62ad4 100644 --- a/harness/src/session/tree/register.ts +++ b/harness/src/session/tree/register.ts @@ -9,6 +9,7 @@ import type { AgentMessage } from '../../types/agent-message.js'; import { type UpdatePartItem, appendMessage, + appendMessages as appendMessagesOp, appendSynthetic as appendSyntheticOp, cloneSession, compact as compactOp, @@ -35,6 +36,7 @@ export const FUNCTION_IDS = { CREATE: 'session-tree::create', ENSURE: 'session-tree::ensure', APPEND: 'session-tree::append', + APPEND_BATCH: 'session-tree::append_batch', MESSAGES: 'session-tree::messages', LIST: 'session-tree::list', COMPACTIONS: 'session-tree::compactions', @@ -148,6 +150,28 @@ export function registerTree(iii: ISdk, store: SessionStore): void { { description: 'Append an AgentMessage entry to a session' }, ); + iii.registerFunction( + FUNCTION_IDS.APPEND_BATCH, + async (payload: unknown) => { + const obj = (payload ?? {}) as Record; + const session_id = requireString(obj, 'session_id'); + const parent_id = typeof obj.parent_id === 'string' ? obj.parent_id : null; + const messages = obj.messages; + if (!Array.isArray(messages)) throw new Error('missing required field: messages (array)'); + const entry_ids = await appendMessagesOp( + store, + session_id, + parent_id, + messages as AgentMessage[], + ); + return { entry_ids }; + }, + { + description: + 'Append a chain of AgentMessage entries to a session in one call (resolves the active leaf once; returns entry_ids in order).', + }, + ); + iii.registerFunction( FUNCTION_IDS.MESSAGES, async (payload: unknown) => { diff --git a/harness/src/session/tree/store.ts b/harness/src/session/tree/store.ts index 9140a71b..0bd16e4f 100644 --- a/harness/src/session/tree/store.ts +++ b/harness/src/session/tree/store.ts @@ -20,6 +20,11 @@ import { type SessionEntry, SessionError, type SessionMeta, entryTimestamp } fro export interface SessionStore { create(meta: SessionMeta): Promise; append(session_id: string, entry: SessionEntry): Promise; + /** + * Append several pre-chained entries, refreshing session meta once for the + * whole batch (vs once per {@link append}). Entries are written in order. + */ + appendMany(session_id: string, entries: SessionEntry[]): Promise; loadEntries(session_id: string): Promise; loadMeta(session_id: string): Promise; list(): Promise; @@ -44,6 +49,14 @@ export class InMemoryStore implements SessionStore { if (m) m.updated_at = Date.now(); } + async appendMany(session_id: string, entries: SessionEntry[]): Promise { + const list = this.entries.get(session_id); + if (!list) throw new SessionError('not_found', `session not found: ${session_id}`); + list.push(...entries); + const m = this.meta.get(session_id); + if (m) m.updated_at = Date.now(); + } + async loadEntries(session_id: string): Promise { return [...(this.entries.get(session_id) ?? [])]; } @@ -111,6 +124,29 @@ export class IiiStateSessionStore implements SessionStore { } } + async appendMany(session_id: string, entries: SessionEntry[]): Promise { + for (const entry of entries) { + try { + await this.state.set({ + scope: entriesScope(session_id), + key: entry.id, + value: entry, + }); + } catch (e) { + throw new SessionError('storage', `state::set entry: ${String(e)}`); + } + } + // One meta refresh for the whole batch (vs once per entry in `append`). + try { + await this.refreshMetaUpdatedAt(session_id); + } catch (e) { + logger.warn('meta updated_at refresh failed', { + session_id, + err: String(e), + }); + } + } + async loadEntries(session_id: string): Promise { let entries: SessionEntry[]; try { diff --git a/harness/src/turn-orchestrator/state-runtime/store.ts b/harness/src/turn-orchestrator/state-runtime/store.ts index b30e8036..a0707251 100644 --- a/harness/src/turn-orchestrator/state-runtime/store.ts +++ b/harness/src/turn-orchestrator/state-runtime/store.ts @@ -168,13 +168,15 @@ export function createTurnStore(iii: ISdk): TurnStore { }, async appendMessages(session_id, msgs) { - for (const message of msgs) { - await iii.trigger({ - function_id: 'session-tree::append', - payload: { session_id, message, parent_id: null }, - timeoutMs: 10_000, - }); - } + if (msgs.length === 0) return; + // One batch trigger chains all messages and resolves the active leaf once, + // instead of a per-message session-tree::append (each of which re-read the + // whole session to find the leaf). + await iii.trigger({ + function_id: 'session-tree::append_batch', + payload: { session_id, messages: msgs, parent_id: null }, + timeoutMs: 10_000, + }); }, async saveRunRequest(session_id, request) { diff --git a/harness/tests/session/operations.test.ts b/harness/tests/session/operations.test.ts index c18e346a..67345d93 100644 --- a/harness/tests/session/operations.test.ts +++ b/harness/tests/session/operations.test.ts @@ -1,7 +1,8 @@ -import { describe, expect, it } from 'vitest'; +import { describe, expect, it, vi } from 'vitest'; import { activePath, appendMessage, + appendMessages, cloneSession, createSession, exportHtml, @@ -48,6 +49,66 @@ describe('session-tree operations', () => { expect(path).toEqual([e1, e2, e3]); }); + it('appendMessages chains a batch and is equivalent to serial appends', async () => { + const store = new InMemoryStore(); + const sid = await createSession(store); + const e0 = await appendMessage(store, sid, null, userMsg('seed')); + + const ids = await appendMessages(store, sid, null, [asstMsg('a'), userMsg('b'), asstMsg('c')]); + + expect(ids).toHaveLength(3); + // The batch chains onto the existing leaf and links each entry to the prior. + const path = await activePath(store, sid); + expect(path).toEqual([e0, ...ids]); + const messages = await loadMessages(store, sid); + expect(messages.map((m) => (m.content[0] as { text: string }).text)).toEqual([ + 'seed', + 'a', + 'b', + 'c', + ]); + }); + + it('appendMessages resolves the active leaf once for the whole batch', async () => { + const store = new InMemoryStore(); + const sid = await createSession(store); + await appendMessage(store, sid, null, userMsg('seed')); + + // loadEntries backs activePath; one call means the leaf was resolved once. + const loadEntriesSpy = vi.spyOn(store, 'loadEntries'); + const appendManySpy = vi.spyOn(store, 'appendMany'); + + await appendMessages(store, sid, null, [asstMsg('a'), userMsg('b'), asstMsg('c')]); + + expect(loadEntriesSpy).toHaveBeenCalledTimes(1); + expect(appendManySpy).toHaveBeenCalledTimes(1); + expect(appendManySpy.mock.calls[0]?.[1]).toHaveLength(3); + }); + + it('appendMessages with an explicit parent skips leaf resolution', async () => { + const store = new InMemoryStore(); + const sid = await createSession(store); + const e0 = await appendMessage(store, sid, null, userMsg('seed')); + const loadEntriesSpy = vi.spyOn(store, 'loadEntries'); + + const ids = await appendMessages(store, sid, e0, [asstMsg('a')]); + + expect(loadEntriesSpy).not.toHaveBeenCalled(); + const path = await activePath(store, sid); + expect(path).toEqual([e0, ...ids]); + }); + + it('appendMessages on an empty list is a no-op', async () => { + const store = new InMemoryStore(); + const sid = await createSession(store); + const appendManySpy = vi.spyOn(store, 'appendMany'); + + const ids = await appendMessages(store, sid, null, []); + + expect(ids).toEqual([]); + expect(appendManySpy).not.toHaveBeenCalled(); + }); + it('loadMessages filters non-message entries from active path', async () => { const store = new InMemoryStore(); const sid = await createSession(store); diff --git a/harness/tests/turn-orchestrator/run-start.test.ts b/harness/tests/turn-orchestrator/run-start.test.ts index 5bddea0e..b819b87a 100644 --- a/harness/tests/turn-orchestrator/run-start.test.ts +++ b/harness/tests/turn-orchestrator/run-start.test.ts @@ -176,9 +176,9 @@ describe('execute', () => { expect(ensureCalls).toHaveLength(1); expect(ensureCalls[0]?.payload).toEqual({ session_id: 'sess-1' }); - // The single ensure must precede the run's first tree write (append). + // The single ensure must precede the run's first tree write (append batch). const ensureIdx = calls.findIndex((c) => c.function_id === 'session-tree::ensure'); - const firstAppendIdx = calls.findIndex((c) => c.function_id === 'session-tree::append'); + const firstAppendIdx = calls.findIndex((c) => c.function_id === 'session-tree::append_batch'); expect(ensureIdx).toBeGreaterThanOrEqual(0); expect(firstAppendIdx).toBeGreaterThan(ensureIdx); }); diff --git a/harness/tests/turn-orchestrator/store.test.ts b/harness/tests/turn-orchestrator/store.test.ts index 5c5f9bd1..79683eba 100644 --- a/harness/tests/turn-orchestrator/store.test.ts +++ b/harness/tests/turn-orchestrator/store.test.ts @@ -139,10 +139,30 @@ describe('session-tree call reduction', () => { timestamp: 1, }; await createTurnStore(iii).appendMessages('sess-a', [msg]); - expect(calls.filter((c) => c === 'session-tree::append')).toHaveLength(1); + // One batch trigger for the whole call, not a per-message append. + expect(calls.filter((c) => c === 'session-tree::append_batch')).toHaveLength(1); + expect(calls).not.toContain('session-tree::append'); expect(calls).not.toContain('session-tree::ensure'); }); + it('appendMessages batches a multi-message call into one trigger', async () => { + const { iii, calls } = recordingIii(); + const mk = (text: string) => ({ + role: 'user' as const, + content: [{ type: 'text' as const, text }], + timestamp: 1, + }); + await createTurnStore(iii).appendMessages('sess-a', [mk('a'), mk('b'), mk('c')]); + expect(calls.filter((c) => c === 'session-tree::append_batch')).toHaveLength(1); + }); + + it('appendMessages on an empty list triggers nothing', async () => { + const { iii, calls } = recordingIii(); + await createTurnStore(iii).appendMessages('sess-a', []); + expect(calls).not.toContain('session-tree::append_batch'); + expect(calls).not.toContain('session-tree::append'); + }); + it('ensureSession is the sole trigger of session-tree::ensure', async () => { const { iii, calls } = recordingIii(); await createTurnStore(iii).ensureSession('sess-a'); From af4e95107d881235d670e672e6d9171a17f6b329 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Tue, 9 Jun 2026 18:42:33 -0300 Subject: [PATCH 03/11] fix(harness): robust session-tree leaf resolution, lenient model_meta, crash-safe result dedup Addresses review findings on the turn-flow compaction PR. - session-tree leaf resolution: activePath now resolves the active leaf as the tip of the parent chain (the entry that is no one's parent) instead of the raw (timestamp, id) sort-max. A batch append shares one timestamp and a later append whose clock ties or steps back no longer sorts before the batch tail and gets orphaned off the active path. Drops the future-dated `base + i` timestamps the batch used to keep itself sort-last. - model_meta wire validation is now lenient: a sparse/partial catalog entry passes through (and the provider reads its fields defensively / falls back to a live fetch) instead of failing ZodError and killing every stream for that model. The field is an optimization, never a source of truth. Removes the hand-synced strict ModelSchema mirror and its compile-time guard. - trailing-result dedup skips a trailing no-call assistant (the synthetic max_turns "loop stopped" notice) instead of treating it as the turn boundary, so a crash-replay of finalizeBatch no longer re-appends the whole result run behind the notice. - session-tree::append_batch rejects falsy/non-object elements at the boundary, matching the singular append guard. Cleanups folded in: fetchModelLimit reuses limitFromModel (one mapping); ModelLimit aliases the single ModelContextLimit shape; loadContextView and loadTrailingResultIds share one loadSessionMessages reader (with a consistent timeout); IiiStateSessionStore append/appendMany/updateEntry share writeEntry + bestEffortMetaRefresh helpers. --- .../src/context-compaction/model-resolver.ts | 18 +++--- harness/src/context-compaction/overflow.ts | 8 +-- harness/src/models-catalog/types.ts | 49 --------------- harness/src/session/tree/operations.ts | 42 ++++++++++--- harness/src/session/tree/register.ts | 6 ++ harness/src/session/tree/store.ts | 63 +++++-------------- .../state-runtime/context-view.ts | 31 ++++++--- .../turn-orchestrator/state-runtime/store.ts | 12 +--- .../state-runtime/transcript.ts | 22 +++++-- harness/src/types/provider.ts | 19 +++--- harness/tests/session/operations.test.ts | 44 +++++++++++++ .../function-execute.test.ts | 47 ++++++++++++-- harness/tests/types/provider.test.ts | 22 +++++++ 13 files changed, 231 insertions(+), 152 deletions(-) diff --git a/harness/src/context-compaction/model-resolver.ts b/harness/src/context-compaction/model-resolver.ts index 7dddc1f5..999f85dd 100644 --- a/harness/src/context-compaction/model-resolver.ts +++ b/harness/src/context-compaction/model-resolver.ts @@ -1,4 +1,3 @@ -import type { Model } from '../models-catalog/types.js'; import { RUN_REQUEST_SCOPE } from '../turn-orchestrator/state.js'; import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; @@ -11,12 +10,14 @@ export type ResolvedModel = { } | null; /** - * Derive a {@link ModelLimit} from a catalog {@link Model}. Mirrors the mapping - * {@link fetchModelLimit} applies to a `models::get` result, so a threaded - * `model_meta` yields the exact same limit a fresh fetch would — letting - * callers skip the bus round-trip without changing behavior. + * Derive a {@link ModelLimit} from a catalog entry (a full {@link Model} or a + * `models::get` result). The single mapping used by both the threaded + * `model_meta` path and a fresh fetch, so they yield the exact same limit. */ -export function limitFromModel(m: Model): ModelLimit { +export function limitFromModel(m: { + context_window?: number; + max_output_tokens?: number; +}): ModelLimit { const context = typeof m.context_window === 'number' ? m.context_window : 0; const output = typeof m.max_output_tokens === 'number' ? m.max_output_tokens : 0; return { context, input: context, output }; @@ -47,13 +48,10 @@ export async function fetchModelLimit( return null; } - const context = typeof entry.context_window === 'number' ? entry.context_window : 0; - const output = typeof entry.max_output_tokens === 'number' ? entry.max_output_tokens : 0; - return { providerID, modelID, - modelLimit: { context, input: context, output }, + modelLimit: limitFromModel(entry), }; } catch (err) { logger.debug('model-resolver: models::get failed', { diff --git a/harness/src/context-compaction/overflow.ts b/harness/src/context-compaction/overflow.ts index dfc9cca0..3246100e 100644 --- a/harness/src/context-compaction/overflow.ts +++ b/harness/src/context-compaction/overflow.ts @@ -1,14 +1,12 @@ +import type { ModelContextLimit } from '../types/agent-event.js'; import { MAX_PRESERVE_RECENT_TOKENS, MIN_PRESERVE_RECENT_TOKENS, compactionConfig, } from './config.js'; -export type ModelLimit = { - context: number; - input: number; - output: number; -}; +/** A model's token limits. Single shape, defined in types/agent-event.ts. */ +export type ModelLimit = ModelContextLimit; export type ModelLike = { id: string; diff --git a/harness/src/models-catalog/types.ts b/harness/src/models-catalog/types.ts index 5ab55d3e..14bf579f 100644 --- a/harness/src/models-catalog/types.ts +++ b/harness/src/models-catalog/types.ts @@ -3,8 +3,6 @@ * `models-catalog/src/lib.rs::{Model, Pricing, …}`. */ -import { z } from 'zod'; - export type ThinkingLevel = 'off' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'; export type Transport = 'sse' | 'websocket' | 'auto'; @@ -43,53 +41,6 @@ export type Model = { pricing?: Pricing; }; -/** - * Zod mirror of {@link Model}, used to validate model metadata that crosses a - * worker boundary (`ProviderStreamInput.model_meta`). `.passthrough()` keeps - * any future catalog fields intact. Kept field-for-field in sync with the - * `Model` type above; the compile-time guard below fails the build on drift. - */ -export const ModelSchema = z - .object({ - id: z.string(), - provider: z.string(), - api: z.string(), - display_name: z.string(), - context_window: z.number(), - max_output_tokens: z.number().optional(), - supports_thinking: z.boolean().optional(), - supports_xhigh: z.boolean().optional(), - supports_tools: z.boolean().optional(), - supports_vision: z.boolean().optional(), - supports_cache: z.boolean().optional(), - thinking_budgets: z - .object({ - minimal: z.number().optional(), - low: z.number().optional(), - medium: z.number().optional(), - high: z.number().optional(), - }) - .passthrough() - .optional(), - transports: z.array(z.enum(['sse', 'websocket', 'auto'])).optional(), - default_cache_retention: z.enum(['none', 'short', 'long']).optional(), - pricing: z - .object({ - input_per_1m: z.number(), - output_per_1m: z.number(), - cache_read_per_1m: z.number().optional(), - cache_write_per_1m: z.number().optional(), - }) - .passthrough() - .optional(), - }) - .passthrough(); - -// Compile-time guard: every value ModelSchema parses is assignable to Model. -// If a field drifts out of sync, this line fails to typecheck. -const _modelSchemaSatisfiesType = (m: z.infer): Model => m; -void _modelSchemaSatisfiesType; - export const MODELS_SCOPE = 'models'; export type Capability = diff --git a/harness/src/session/tree/operations.ts b/harness/src/session/tree/operations.ts index 513f13c1..45d50746 100644 --- a/harness/src/session/tree/operations.ts +++ b/harness/src/session/tree/operations.ts @@ -90,9 +90,12 @@ export async function appendMessage( * Append several messages as one linear chain in a single store round-trip. * Equivalent to calling {@link appendMessage} for each message in order, but * resolves the active leaf once (not once per message) and writes through - * {@link SessionStore.appendMany} so meta is refreshed once. Entries get - * strictly increasing timestamps so the batch's last entry remains the - * resolvable leaf for the next append. + * {@link SessionStore.appendMany} so meta is refreshed once. + * + * Entries share a single append timestamp; their order is carried by the + * parent chain, not the clock, and {@link activePath} resolves the leaf by + * chain tip — so a later append with an equal-or-earlier wall-clock timestamp + * still chains onto this batch's tail instead of being orphaned. */ export async function appendMessages( store: SessionStore, @@ -108,15 +111,15 @@ export async function appendMessages( parent = path.at(-1) ?? null; } - const base = Date.now(); - const entries: SessionEntry[] = messages.map((message, i) => { + const timestamp = Date.now(); + const entries: SessionEntry[] = messages.map((message) => { const id = randomUUID(); const entry: SessionEntry = { type: 'message', id, parent_id: parent, message, - timestamp: base + i, + timestamp, }; parent = id; return entry; @@ -126,6 +129,28 @@ export async function appendMessages( return entries.map((e) => e.id); } +/** + * The active leaf is the tip of the newest chain: the most-recent entry that is + * not the parent of any other entry. `entries` is sorted ascending by + * (timestamp, id), so we scan from the end and take the first tip. Resolving by + * chain tip rather than raw sort-max keeps a freshly appended child as the leaf + * even when its wall-clock timestamp ties or precedes a sibling's — the case a + * batch append (shared timestamp) or a clock that did not advance would + * otherwise mis-resolve. Falls back to the most-recent entry only if every + * entry is some entry's parent (a cycle — should not occur). + */ +function resolveActiveLeaf(entries: SessionEntry[]): string | null { + const parentIds = new Set(); + for (const e of entries) { + if (e.parent_id) parentIds.add(e.parent_id); + } + for (let i = entries.length - 1; i >= 0; i--) { + const entry = entries[i]; + if (entry && !parentIds.has(entry.id)) return entry.id; + } + return entries.at(-1)?.id ?? null; +} + export async function activePath( store: SessionStore, session_id: string, @@ -134,9 +159,8 @@ export async function activePath( const entries = await store.loadEntries(session_id); if (entries.length === 0) return []; const byId = new Map(entries.map((e) => [e.id, e] as const)); - const last = entries.at(-1); - if (!last) return []; - const start = leaf ?? last.id; + const start = leaf ?? resolveActiveLeaf(entries); + if (start === null) return []; const path: string[] = []; let cursor: string | null = start; while (cursor !== null) { diff --git a/harness/src/session/tree/register.ts b/harness/src/session/tree/register.ts index e7c62ad4..6c0a0939 100644 --- a/harness/src/session/tree/register.ts +++ b/harness/src/session/tree/register.ts @@ -158,6 +158,12 @@ export function registerTree(iii: ISdk, store: SessionStore): void { const parent_id = typeof obj.parent_id === 'string' ? obj.parent_id : null; const messages = obj.messages; if (!Array.isArray(messages)) throw new Error('missing required field: messages (array)'); + // Mirror the singular APPEND guard: reject falsy/non-object elements at + // the boundary so a caller-side map bug can't persist `message: null` + // entries onto the active path. + if (messages.some((m) => !m || typeof m !== 'object')) { + throw new Error('messages must all be non-null AgentMessage objects'); + } const entry_ids = await appendMessagesOp( store, session_id, diff --git a/harness/src/session/tree/store.ts b/harness/src/session/tree/store.ts index 0bd16e4f..ba9efef9 100644 --- a/harness/src/session/tree/store.ts +++ b/harness/src/session/tree/store.ts @@ -103,48 +103,35 @@ export class IiiStateSessionStore implements SessionStore { } } - async append(session_id: string, entry: SessionEntry): Promise { + /** Write one entry under its own key. Throws on storage failure. */ + private async writeEntry(session_id: string, key: string, entry: SessionEntry): Promise { try { - await this.state.set({ - scope: entriesScope(session_id), - key: entry.id, - value: entry, - }); + await this.state.set({ scope: entriesScope(session_id), key, value: entry }); } catch (e) { throw new SessionError('storage', `state::set entry: ${String(e)}`); } - // Best-effort meta refresh — failures here log only. + } + + /** Bump meta.updated_at; best-effort, failures log only. */ + private async bestEffortMetaRefresh(session_id: string): Promise { try { await this.refreshMetaUpdatedAt(session_id); } catch (e) { - logger.warn('meta updated_at refresh failed', { - session_id, - err: String(e), - }); + logger.warn('meta updated_at refresh failed', { session_id, err: String(e) }); } } + async append(session_id: string, entry: SessionEntry): Promise { + await this.writeEntry(session_id, entry.id, entry); + await this.bestEffortMetaRefresh(session_id); + } + async appendMany(session_id: string, entries: SessionEntry[]): Promise { for (const entry of entries) { - try { - await this.state.set({ - scope: entriesScope(session_id), - key: entry.id, - value: entry, - }); - } catch (e) { - throw new SessionError('storage', `state::set entry: ${String(e)}`); - } + await this.writeEntry(session_id, entry.id, entry); } // One meta refresh for the whole batch (vs once per entry in `append`). - try { - await this.refreshMetaUpdatedAt(session_id); - } catch (e) { - logger.warn('meta updated_at refresh failed', { - session_id, - err: String(e), - }); - } + await this.bestEffortMetaRefresh(session_id); } async loadEntries(session_id: string): Promise { @@ -187,24 +174,8 @@ export class IiiStateSessionStore implements SessionStore { } async updateEntry(session_id: string, entry_id: string, updated: SessionEntry): Promise { - try { - await this.state.set({ - scope: entriesScope(session_id), - key: entry_id, - value: updated, - }); - } catch (e) { - throw new SessionError('storage', `state::set updateEntry: ${String(e)}`); - } - // Best-effort meta refresh — failures here log only. - try { - await this.refreshMetaUpdatedAt(session_id); - } catch (e) { - logger.warn('meta updated_at refresh failed', { - session_id, - err: String(e), - }); - } + await this.writeEntry(session_id, entry_id, updated); + await this.bestEffortMetaRefresh(session_id); } private async refreshMetaUpdatedAt(session_id: string): Promise { diff --git a/harness/src/turn-orchestrator/state-runtime/context-view.ts b/harness/src/turn-orchestrator/state-runtime/context-view.ts index dd50012d..ffc763cd 100644 --- a/harness/src/turn-orchestrator/state-runtime/context-view.ts +++ b/harness/src/turn-orchestrator/state-runtime/context-view.ts @@ -40,24 +40,39 @@ export function buildContextView( ]; } -type MessagesResponse = { - messages?: Array<{ entry_id: string; message: AgentMessage }>; -}; - type CompactionsResponse = { entries?: CompactionEntryRow[]; }; +export type SessionMessageRow = { entry_id: string; message: AgentMessage }; + +/** + * Raw active-path messages (entry_id + message), oldest first. The single + * `session-tree::messages` reader — shared by {@link loadContextView} (which + * also folds in compactions) and the orchestrator's trailing-result dedup — so + * the fetch shape and timeout live in one place. + */ +export async function loadSessionMessages( + iii: ISdk, + session_id: string, +): Promise { + const resp = await iii.trigger({ + function_id: 'session-tree::messages', + payload: { session_id }, + timeoutMs: 10_000, + }); + return resp?.messages ?? []; +} + export async function loadContextView(iii: ISdk, session_id: string): Promise { - const [messagesResp, compactionsResp] = await Promise.all([ - iii.trigger({ function_id: 'session-tree::messages', payload: { session_id } }), + const [messages, compactionsResp] = await Promise.all([ + loadSessionMessages(iii, session_id), iii.trigger({ function_id: 'session-tree::compactions', payload: { session_id } }), ]); - const messagesPayload = messagesResp as MessagesResponse | null; const compactionsPayload = compactionsResp as CompactionsResponse | null; - const entries: MessageWithEntryId[] = (messagesPayload?.messages ?? []).map((e) => ({ + const entries: MessageWithEntryId[] = messages.map((e) => ({ entry_id: e.entry_id, message: e.message, })); diff --git a/harness/src/turn-orchestrator/state-runtime/store.ts b/harness/src/turn-orchestrator/state-runtime/store.ts index a0707251..85b305b1 100644 --- a/harness/src/turn-orchestrator/state-runtime/store.ts +++ b/harness/src/turn-orchestrator/state-runtime/store.ts @@ -12,7 +12,7 @@ import { emit } from '../events.js'; import { type RunRequest, parseRunRequest } from '../run-request.js'; import { toView, type TurnStateView } from '../schemas.js'; import { type TurnState, type TurnStateRecord, parseTurnStateRecord } from '../state.js'; -import { loadContextView } from './context-view.js'; +import { loadContextView, loadSessionMessages } from './context-view.js'; import { persistedTrailingResultIds } from './transcript.js'; /** @@ -157,14 +157,8 @@ export function createTurnStore(iii: ISdk): TurnStore { }, async loadTrailingResultIds(session_id) { - // Same response (and same `?? []` for an empty/new session) as - // loadContextView, the sibling reader of session-tree::messages. - const resp = await iii.trigger }>({ - function_id: 'session-tree::messages', - payload: { session_id }, - }); - const messages = (resp?.messages ?? []).map((e) => e.message); - return persistedTrailingResultIds(messages); + const rows = await loadSessionMessages(iii, session_id); + return persistedTrailingResultIds(rows.map((e) => e.message)); }, async appendMessages(session_id, msgs) { diff --git a/harness/src/turn-orchestrator/state-runtime/transcript.ts b/harness/src/turn-orchestrator/state-runtime/transcript.ts index bd6b6e1b..fda45620 100644 --- a/harness/src/turn-orchestrator/state-runtime/transcript.ts +++ b/harness/src/turn-orchestrator/state-runtime/transcript.ts @@ -4,18 +4,32 @@ import type { AgentMessage, AssistantMessage } from '../../types/agent-message.js'; +function assistantHasFunctionCalls(m: AssistantMessage): boolean { + return m.content.some((b) => b.type === 'function_call'); +} + /** * Function_call_ids already persisted for the current turn. Results are appended * right after the assistant that requested them, so they form the trailing run - * of `function_result` messages; the first non-result from the tail is the turn - * boundary. + * of `function_result` messages; the requesting assistant (which carries the + * function calls) is the turn boundary. + * + * A trailing assistant with NO function calls is skipped, not treated as the + * boundary: on a max_turns crash-replay the synthetic "loop stopped" notice is + * appended after the results in the same step, and the dedup must still see the + * results behind it as persisted — otherwise the whole batch re-appends. */ export function persistedTrailingResultIds(messages: AgentMessage[]): Set { const ids = new Set(); for (let i = messages.length - 1; i >= 0; i--) { const m = messages[i]; - if (m?.role === 'function_result') ids.add(m.function_call_id); - else break; + if (!m) break; + if (m.role === 'function_result') { + ids.add(m.function_call_id); + continue; + } + if (m.role === 'assistant' && !assistantHasFunctionCalls(m)) continue; + break; } return ids; } diff --git a/harness/src/types/provider.ts b/harness/src/types/provider.ts index 031b2a67..d4f362e2 100644 --- a/harness/src/types/provider.ts +++ b/harness/src/types/provider.ts @@ -1,12 +1,11 @@ /** - * Provider streaming contract. Mirrors - * `harness/crates/harness-types/src/provider.rs` and is consumed by both - * the orchestrator (caller) and provider workers (handler). + * Provider streaming contract, consumed by both the orchestrator (caller) and + * provider workers (handler). */ import { z } from 'zod'; import { zodToJsonSchema } from 'zod-to-json-schema'; -import { ModelSchema } from '../models-catalog/types.js'; +import type { Model } from '../models-catalog/types.js'; /** * Lax `StreamChannelRef` — the SDK's canonical type lives in `iii-sdk`, @@ -48,11 +47,15 @@ export const ProviderStreamInputSchema = z.object({ /** * Optional pre-resolved catalog entry for `model`, threaded from the * orchestrator so the provider does not re-fetch `models::get` it already - * resolved at turn start. Providers MUST ignore it unless `model_meta.id` - * and `model_meta.provider` match this request, and fall back to a live - * fetch otherwise — the field is an optimization, never a source of truth. + * resolved at turn start. An optimization, never a source of truth: validated + * leniently (any object passes; anything else coerces to absent) so a sparse + * or partial catalog entry falls back to a live fetch instead of failing the + * whole stream. Providers read its fields defensively. */ - model_meta: ModelSchema.optional(), + model_meta: z + .unknown() + .optional() + .transform((v) => (v && typeof v === 'object' ? (v as Model) : undefined)), /** * Optional stable id for the turn (the run's start time). Providers may use * it to dedupe per-stream credential resolution within a turn; a new turn diff --git a/harness/tests/session/operations.test.ts b/harness/tests/session/operations.test.ts index 67345d93..78ef7aa7 100644 --- a/harness/tests/session/operations.test.ts +++ b/harness/tests/session/operations.test.ts @@ -12,8 +12,25 @@ import { tree, } from '../../src/session/tree/operations.js'; import { InMemoryStore } from '../../src/session/tree/store.js'; +import { entryTimestamp } from '../../src/session/tree/types.js'; import type { AgentMessage } from '../../src/types/agent-message.js'; +/** + * InMemoryStore returns entries in insertion order, but the production + * IiiStateSessionStore sorts loadEntries by (timestamp, id). Leaf-resolution + * bugs only surface under that sort, so this double reproduces it. + */ +class SortingStore extends InMemoryStore { + async loadEntries(session_id: string) { + const entries = await super.loadEntries(session_id); + return entries.sort((a, b) => { + const t = entryTimestamp(a) - entryTimestamp(b); + if (t !== 0) return t; + return a.id < b.id ? -1 : a.id > b.id ? 1 : 0; + }); + } +} + const userMsg = (text: string): AgentMessage => ({ role: 'user', content: [{ type: 'text', text }], @@ -109,6 +126,33 @@ describe('session-tree operations', () => { expect(appendManySpy).not.toHaveBeenCalled(); }); + it('keeps a later append on the active path when its timestamp ties or precedes the batch (sorted store, leaf = chain tip)', async () => { + const store = new SortingStore(); + const sid = await createSession(store); + const now = vi.spyOn(Date, 'now'); + + // Batch lands at t=100 (all entries share the append timestamp). + now.mockReturnValue(100); + const batch = await appendMessages(store, sid, null, [ + asstMsg('a'), + userMsg('b'), + asstMsg('c'), + ]); + + // A later single append whose clock did NOT advance (or stepped back): its + // timestamp sorts before the batch tail. Pre-fix, the sort-max leaf + // heuristic orphaned it; the chain-tip leaf keeps it on the path. + now.mockReturnValue(99); + const tail = await appendMessage(store, sid, null, asstMsg('d')); + now.mockRestore(); + + expect(await activePath(store, sid)).toEqual([...batch, tail]); + const texts = (await loadMessages(store, sid)).map( + (m) => (m.content[0] as { text: string }).text, + ); + expect(texts).toEqual(['a', 'b', 'c', 'd']); + }); + it('loadMessages filters non-message entries from active path', async () => { const store = new InMemoryStore(); const sid = await createSession(store); diff --git a/harness/tests/turn-orchestrator/function-execute.test.ts b/harness/tests/turn-orchestrator/function-execute.test.ts index 1ff0529d..5189e8e4 100644 --- a/harness/tests/turn-orchestrator/function-execute.test.ts +++ b/harness/tests/turn-orchestrator/function-execute.test.ts @@ -303,6 +303,28 @@ describe('createPorts().loadTrailingResultIds', () => { timestamp: 1, }, }); + /** Assistant that requested tool calls — the turn boundary. */ + const requestingAssistant = (id: string) => ({ + message: { + role: 'assistant' as const, + content: [{ type: 'function_call' as const, id, function_id: 'shell::run', arguments: {} }], + stop_reason: 'function_call' as const, + model: 'm', + provider: 'p', + timestamp: 1, + }, + }); + /** Synthetic assistant with no calls (e.g. the max_turns "loop stopped" notice). */ + const noticeAssistant = () => ({ + message: { + role: 'assistant' as const, + content: [{ type: 'text' as const, text: 'loop stopped' }], + stop_reason: 'end' as const, + model: 'm', + provider: 'p', + timestamp: 1, + }, + }); it('reads only session-tree::messages (no compactions) on the default leaf', async () => { const { iii, calls } = trackingIii([resultMsg('fc-1'), resultMsg('fc-2')]); @@ -316,12 +338,29 @@ describe('createPorts().loadTrailingResultIds', () => { expect(calls[0]?.payload).toEqual({ session_id: 's1' }); }); - it('returns only the trailing result run (stops at the first non-result)', async () => { - const assistant = { message: { role: 'assistant' as const } }; - const { iii } = trackingIii([resultMsg('old'), assistant, resultMsg('fc-1')]); + it('stops at the requesting assistant, not collecting a prior turn run', async () => { + const { iii } = trackingIii([ + resultMsg('old'), + requestingAssistant('req'), + resultMsg('fc-1'), + resultMsg('fc-2'), + ]); const ids = await createPorts(iii).loadTrailingResultIds('s1'); - expect(ids).toEqual(new Set(['fc-1'])); + expect(ids).toEqual(new Set(['fc-1', 'fc-2'])); + }); + + it('skips a trailing no-call notice so the results behind it stay deduped (max_turns replay)', async () => { + const { iii } = trackingIii([ + requestingAssistant('req'), + resultMsg('fc-1'), + resultMsg('fc-2'), + noticeAssistant(), + ]); + + const ids = await createPorts(iii).loadTrailingResultIds('s1'); + + expect(ids).toEqual(new Set(['fc-1', 'fc-2'])); }); }); diff --git a/harness/tests/types/provider.test.ts b/harness/tests/types/provider.test.ts index 29c5d572..ed5122fc 100644 --- a/harness/tests/types/provider.test.ts +++ b/harness/tests/types/provider.test.ts @@ -30,6 +30,28 @@ describe('ProviderStreamInputSchema', () => { expect(JSON.stringify(schema)).toContain('writer_ref'); expect(JSON.stringify(schema)).toContain('model'); }); + + it('passes a sparse model_meta through instead of failing the stream', () => { + // model_meta is an optional optimization: a partial catalog entry must fall + // back per-field at the consumer, never reject the whole input. + const sparse = ProviderStreamInputSchema.parse({ + writer_ref: { channel_id: 'c', access_key: 'k', direction: 'write' }, + model: 'm', + messages: [], + model_meta: { id: 'm', provider: 'anthropic' }, + }); + expect(sparse.model_meta).toEqual({ id: 'm', provider: 'anthropic' }); + }); + + it('coerces a non-object model_meta to absent', () => { + const parsed = ProviderStreamInputSchema.parse({ + writer_ref: { channel_id: 'c', access_key: 'k', direction: 'write' }, + model: 'm', + messages: [], + model_meta: 'garbage', + }); + expect(parsed.model_meta).toBeUndefined(); + }); }); describe('ProviderStreamOutputSchema', () => { From 0b8cbd7e6860053243deed75a812d95df69d621a Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Tue, 9 Jun 2026 18:49:13 -0300 Subject: [PATCH 04/11] fix(harness): run::start refuses to clobber an in-flight turn run::start unconditionally reset the session's turn_state record to a fresh `provisioning` and appended the new message. A second run::start for a session with a turn already running (second tab, TUI/ACP client, or a double-submit) therefore raced the in-flight step's last-write-wins saveRecord, corrupting both turns and pairing the new run_request with a stale record. Guard it: read the committed record first and, if the turn is still in flight (any non-terminal state, including a function_awaiting_approval park), ignore the kickoff without mutating anything and report `started: false`. Terminal turns (stopped/failed) and fresh sessions start normally. harness::trigger maps a not-started result to a 409 so clients can wait/retry instead of treating the ignored call as a started turn. --- harness/src/harness/trigger.ts | 4 +- harness/src/turn-orchestrator/run-start.ts | 25 ++++++-- harness/src/turn-orchestrator/schemas.ts | 10 +++- harness/src/turn-orchestrator/state.ts | 7 +++ harness/tests/harness/trigger.test.ts | 23 ++++++- .../tests/turn-orchestrator/run-start.test.ts | 60 ++++++++++++++++++- 6 files changed, 119 insertions(+), 10 deletions(-) diff --git a/harness/src/harness/trigger.ts b/harness/src/harness/trigger.ts index 92534365..ccb4e511 100644 --- a/harness/src/harness/trigger.ts +++ b/harness/src/harness/trigger.ts @@ -47,8 +47,10 @@ export function register(iii: ISdk): void { payload: body.payload, timeoutMs: BRIDGE_TIMEOUT_MS, }); + // A turn was already in flight: surface a 409 so the client can wait/retry + // instead of treating the ignored kickoff as a started turn. return { - status_code: 200, + status_code: result.started ? 200 : 409, headers: { 'content-type': 'application/json' }, body: result, }; diff --git a/harness/src/turn-orchestrator/run-start.ts b/harness/src/turn-orchestrator/run-start.ts index 1746eb98..136e10d3 100644 --- a/harness/src/turn-orchestrator/run-start.ts +++ b/harness/src/turn-orchestrator/run-start.ts @@ -5,19 +5,36 @@ * `HarnessTriggerInputSchema` parse); console/web sends * `{ session_id, message_id?, provider, model, mode?, messages }` and omits * `system_prompt`, `max_turns` (schema defaults). - * **Outgoing**: `{ session_id }` — persists run config, messages, and seeds - * `turn_state` to provisioning via `saveRecord`. + * **Outgoing**: `{ session_id, started }` — persists run config, messages, and + * seeds `turn_state` to provisioning via `saveRecord`. `started` is false when + * a turn was already in flight and this call was ignored. */ import type { ISdk } from '../runtime/iii.js'; +import { logger } from '../runtime/otel.js'; import { RunStartPayloadSchema, type RunStartPayload, type RunStartResult } from './schemas.js'; import { createTurnStore } from './state-runtime/store.js'; -import { newRecord } from './state.js'; +import { isTurnInFlight, newRecord } from './state.js'; export async function execute(iii: ISdk, payload: RunStartPayload): Promise { const store = createTurnStore(iii); const { session_id, messages, max_turns, message_id: _message_id, ...run } = payload; + // Refuse to clobber a turn that is still running for this session. A second + // run::start (second tab, TUI/ACP client, or a double-submit) would otherwise + // reset the live turn_state record to a fresh `provisioning` and race the + // in-flight step's last-write-wins saveRecord, corrupting both turns. Read + // the committed record first; terminal turns (stopped/failed) and fresh + // sessions start normally. No mutation happens on the busy path. + const existing = await store.loadRecord(session_id); + if (existing && isTurnInFlight(existing)) { + logger.warn('run::start ignored: session already has a turn in flight', { + session_id, + state: existing.state, + }); + return { session_id, started: false, reason: 'session_busy' }; + } + // Single ensure for the whole run: this is the gateway that begins the turn // loop, so every later loadMessages/appendMessages is guaranteed a live record // without re-ensuring per call. Must precede the first tree write below. @@ -32,7 +49,7 @@ export async function execute(iii: ISdk, payload: RunStartPayload): Promise; -export type RunStartResult = { session_id: string }; +/** + * `started` is false when the session already had a turn in flight and this + * call was ignored (no record reset, no message appended) — see run-start.ts. + */ +export type RunStartResult = { + session_id: string; + started: boolean; + reason?: 'session_busy'; +}; // --- turn::{state} durable step --- export const TurnStepPayloadSchema = SessionIdPayloadSchema; diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index a2977392..70f23588 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -147,3 +147,10 @@ export function transitionTo(rec: TurnStateRecord, next: TurnState): void { rec.state = next; rec.updated_at_ms = Date.now(); } + +/** A turn is done once it reaches a terminal state; otherwise it is in flight. */ +const TERMINAL_TURN_STATES = new Set(['stopped', 'failed']); + +export function isTurnInFlight(rec: TurnStateRecord): boolean { + return !TERMINAL_TURN_STATES.has(rec.state); +} diff --git a/harness/tests/harness/trigger.test.ts b/harness/tests/harness/trigger.test.ts index 53cdcd40..0416b4fc 100644 --- a/harness/tests/harness/trigger.test.ts +++ b/harness/tests/harness/trigger.test.ts @@ -45,7 +45,7 @@ describe('harness::trigger', () => { }); it('forwards payload to run::start', async () => { - const { sdk, registered, trigger } = makeFakeSdk({ session_id: 'sess' }); + const { sdk, registered, trigger } = makeFakeSdk({ session_id: 'sess', started: true }); register(sdk); const handler = registered.get('harness::trigger')?.handler; if (!handler) throw new Error('handler not registered'); @@ -64,7 +64,26 @@ describe('harness::trigger', () => { system_prompt: '', }); expect(result.status_code).toBe(200); - expect(result.body).toEqual({ session_id: 'sess' }); + expect(result.body).toEqual({ session_id: 'sess', started: true }); + }); + + it('returns 409 when run::start reports the session is busy', async () => { + const { sdk, registered } = makeFakeSdk({ + session_id: 'sess', + started: false, + reason: 'session_busy', + }); + register(sdk); + const handler = registered.get('harness::trigger')?.handler; + if (!handler) throw new Error('handler not registered'); + + const result = (await handler({ + session_id: 'sess-1', + payload: runStartPayload, + })) as Record; + + expect(result.status_code).toBe(409); + expect(result.body).toEqual({ session_id: 'sess', started: false, reason: 'session_busy' }); }); it('rejects invalid run::start payload', async () => { diff --git a/harness/tests/turn-orchestrator/run-start.test.ts b/harness/tests/turn-orchestrator/run-start.test.ts index b819b87a..4d4fd6dd 100644 --- a/harness/tests/turn-orchestrator/run-start.test.ts +++ b/harness/tests/turn-orchestrator/run-start.test.ts @@ -121,7 +121,7 @@ describe('register', () => { expect(handler).toBeDefined(); const result = await handler!(harnessRunStartPayload); - expect(result).toEqual({ session_id: 'sess-1' }); + expect(result).toEqual({ session_id: 'sess-1', started: true }); }); it('rejects invalid payloads at register boundary', async () => { @@ -147,7 +147,7 @@ describe('execute', () => { const result = await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); - expect(result).toEqual({ session_id: 'sess-1' }); + expect(result).toEqual({ session_id: 'sess-1', started: true }); const turnStateSet = calls.find( (c) => @@ -182,4 +182,60 @@ describe('execute', () => { expect(ensureIdx).toBeGreaterThanOrEqual(0); expect(firstAppendIdx).toBeGreaterThan(ensureIdx); }); + + /** iii whose state::get on turn_state returns the given persisted record. */ + function fakeIiiWithRecord(record: unknown): { iii: ISdk; calls: TriggerCall[] } { + const calls: TriggerCall[] = []; + const iii = { + trigger: async (req: { function_id: string; payload: T }): Promise => { + calls.push({ function_id: req.function_id, payload: req.payload }); + if (req.function_id === 'state::get') return record as R; + return null as R; + }, + registerFunction: vi.fn(), + } as unknown as ISdk; + return { iii, calls }; + } + + const inFlightRecord = (state: string) => ({ + session_id: 'sess-1', + state, + turn_count: 1, + function_results: [], + turn_end_emitted: false, + started_at_ms: 1, + updated_at_ms: 2, + }); + + it('refuses to clobber a turn already in flight and makes no writes', async () => { + const { iii, calls } = fakeIiiWithRecord(inFlightRecord('assistant_streaming')); + + const result = await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); + + expect(result).toEqual({ session_id: 'sess-1', started: false, reason: 'session_busy' }); + // Busy path mutates nothing: no ensure, no append, no run_request/turn_state write. + expect(calls.some((c) => c.function_id === 'session-tree::ensure')).toBe(false); + expect(calls.some((c) => c.function_id === 'session-tree::append_batch')).toBe(false); + expect(calls.some((c) => c.function_id === 'state::set')).toBe(false); + }); + + it('treats a parked function_awaiting_approval turn as in flight', async () => { + const { iii } = fakeIiiWithRecord(inFlightRecord('function_awaiting_approval')); + + const result = await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); + + expect(result.started).toBe(false); + }); + + it('starts a fresh turn when the prior turn is terminal (stopped)', async () => { + const { iii, calls } = fakeIiiWithRecord({ + ...inFlightRecord('stopped'), + turn_end_emitted: true, + }); + + const result = await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); + + expect(result).toEqual({ session_id: 'sess-1', started: true }); + expect(calls.some((c) => c.function_id === 'session-tree::append_batch')).toBe(true); + }); }); From da7e277935b5b26d2703762392a7e506846b8e47 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Tue, 9 Jun 2026 19:04:48 -0300 Subject: [PATCH 05/11] fix(harness): defer agent_end to a turn::finishing step (crash-safe terminal) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inlining the steering hop moved the terminal agent_end / finishSession into the same step as the LLM stream and the result append, before runTransition's saveRecord. A crash in that window replayed the whole step — re-running the stream and re-appending — after consumers had already seen the run end. Restore the durable boundary with an outbox: terminating turns persist their work + turn_end inline, then transition to a new `finishing` state instead of emitting agent_end. The turn::finishing step emits agent_end and advances to stopped from a clean replayable boundary. A crash before the work is saved now re-runs it without any premature run-end reaching consumers; a crash in the finishing step just re-emits agent_end (idempotent-tolerated). Unlike the per-round-trip steering hop this fires once per turn, so the continue-path inlining (the perf win) is unchanged. `finishing` is non-terminal, so the run::start in-flight guard treats a finishing turn as busy. --- .../assistant-streaming/run.ts | 10 +-- harness/src/turn-orchestrator/finishing.ts | 36 +++++++++++ .../turn-orchestrator/function-execute/run.ts | 6 +- harness/src/turn-orchestrator/register.ts | 2 + .../state-runtime/turn-end.ts | 18 +++++- harness/src/turn-orchestrator/state.ts | 6 ++ .../turn-orchestrator/steering-check/run.ts | 3 +- .../assistant-streaming.test.ts | 2 +- .../tests/turn-orchestrator/assistant.test.ts | 8 +-- .../tests/turn-orchestrator/finishing.test.ts | 64 +++++++++++++++++++ .../function-execute.test.ts | 11 ++-- .../tests/turn-orchestrator/functions.test.ts | 3 +- .../steering-check-layer.test.ts | 7 +- .../tests/turn-orchestrator/steering.test.ts | 14 ++-- 14 files changed, 161 insertions(+), 29 deletions(-) create mode 100644 harness/src/turn-orchestrator/finishing.ts create mode 100644 harness/tests/turn-orchestrator/finishing.test.ts diff --git a/harness/src/turn-orchestrator/assistant-streaming/run.ts b/harness/src/turn-orchestrator/assistant-streaming/run.ts index a11fa6e3..43d7fa61 100644 --- a/harness/src/turn-orchestrator/assistant-streaming/run.ts +++ b/harness/src/turn-orchestrator/assistant-streaming/run.ts @@ -5,7 +5,7 @@ import type { AgentMessage, AssistantMessage } from '../../types/agent-message.js'; import { decide } from '../provider-router.js'; import { syntheticAssistant } from '../synthetic-assistant.js'; -import { emitTurnEndOnce } from '../state-runtime/turn-end.js'; +import { emitTurnEndOnce, transitionToFinishing } from '../state-runtime/turn-end.js'; import { enterFunctionExecute } from '../function-execute/run.js'; import { transitionTo, type AssistantStreamingTurnRecord } from '../state.js'; import { createDeltaCoalescer } from './coalesce-deltas.js'; @@ -130,7 +130,7 @@ export async function finalizeAssistantTurn( if (route.kind === 'stopped') { await emitTurnEndOnce(ports, rec, asst); - await ports.finishSession(rec); + transitionToFinishing(rec); return; } @@ -143,10 +143,10 @@ export async function finalizeAssistantTurn( return; } - // end_turn: no function calls — finish inline (formerly the - // turn::steering_check hop, which always routed an empty-results turn here). + // end_turn: no function calls. Emit turn_end now, then route to the finishing + // step to emit agent_end after this step's durable save (the agent_end outbox). await emitTurnEndOnce(ports, rec, asst); - await ports.finishSession(rec); + transitionToFinishing(rec); } export async function runAssistantStreaming( diff --git a/harness/src/turn-orchestrator/finishing.ts b/harness/src/turn-orchestrator/finishing.ts new file mode 100644 index 00000000..685bba86 --- /dev/null +++ b/harness/src/turn-orchestrator/finishing.ts @@ -0,0 +1,36 @@ +/** + * `turn::finishing` — the terminal step. + * + * A turn whose work is durably committed transitions to `finishing` (see + * {@link transitionToFinishing}). This step emits the single `agent_end` + * run-end signal and advances to `stopped`. Keeping the terminal signal in its + * own step means a crash BEFORE the work was saved re-runs the work without + * consumers ever having seen a premature run-end; a crash IN this step simply + * re-emits agent_end (idempotent-tolerated) on replay. + */ + +import type { ISdk } from '../runtime/iii.js'; +import { runTransition } from './run-transition.js'; +import { TurnStepPayloadSchema, type TurnStepPayload } from './schemas.js'; +import { createTurnStatePorts } from './state-runtime/ports.js'; +import type { TurnStateRecord } from './state.js'; + +export async function handleFinishing(iii: ISdk, rec: TurnStateRecord): Promise { + const ports = createTurnStatePorts(iii); + // Emits agent_end and transitions the record to `stopped`. + await ports.finishSession(rec); +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::finishing', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'finishing', (i, rec) => handleFinishing(i, rec), parsed); + }, + { + description: + 'Run one durable FSM transition for session in state finishing: emit agent_end and advance to stopped.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/function-execute/run.ts b/harness/src/turn-orchestrator/function-execute/run.ts index 75df079e..31f7f238 100644 --- a/harness/src/turn-orchestrator/function-execute/run.ts +++ b/harness/src/turn-orchestrator/function-execute/run.ts @@ -17,6 +17,7 @@ import { endTurnForMaxTurns, maxTurnsReached, resumeToAssistantStreaming, + transitionToFinishing, } from '../state-runtime/turn-end.js'; import { type AwaitingApprovalEntry, @@ -226,9 +227,10 @@ export async function finalizeBatch( await emitTurnEndOnce(ports, rec, lastAssistant, function_results); // Routing formerly done by the turn::steering_check FSM hop, now inline: - // one durable step (and its queue wake) less per round-trip. + // one durable step (and its queue wake) less per round-trip. Terminal routes + // hand off to the finishing step for agent_end (see transitionToFinishing). if (allTerminate) { - await ports.finishSession(rec); + transitionToFinishing(rec); } else if (maxTurnsReached(rec)) { await endTurnForMaxTurns(ports, rec); } else { diff --git a/harness/src/turn-orchestrator/register.ts b/harness/src/turn-orchestrator/register.ts index 683b915f..fc53ff4b 100644 --- a/harness/src/turn-orchestrator/register.ts +++ b/harness/src/turn-orchestrator/register.ts @@ -1,5 +1,6 @@ import type { ISdk } from '../runtime/iii.js'; import { register as registerAssistantStreaming } from './assistant-streaming/process.js'; +import { register as registerFinishing } from './finishing.js'; import { register as registerFunctionAwaitingApproval } from './function-awaiting-approval/process.js'; import { register as registerFunctionExecute } from './function-execute/process.js'; import { register as registerGetState } from './get-state.js'; @@ -14,5 +15,6 @@ export async function register(iii: ISdk, _ctx: { configPath: string }): Promise registerFunctionExecute(iii); registerFunctionAwaitingApproval(iii); registerSteeringCheck(iii); + registerFinishing(iii); registerGetState(iii); } diff --git a/harness/src/turn-orchestrator/state-runtime/turn-end.ts b/harness/src/turn-orchestrator/state-runtime/turn-end.ts index a7e9c763..e37c3bae 100644 --- a/harness/src/turn-orchestrator/state-runtime/turn-end.ts +++ b/harness/src/turn-orchestrator/state-runtime/turn-end.ts @@ -41,6 +41,18 @@ export function resumeToAssistantStreaming(rec: TurnStateRecord): void { transitionTo(rec, 'assistant_streaming'); } +/** + * Route a terminating turn to the `finishing` step instead of emitting agent_end + * inline. The turn's durable work (assistant/results/notice + the turn_end + * stream event) is already committed by the enclosing step's saveRecord; the + * finishing step then emits agent_end and advances to `stopped` from a clean + * replayable boundary, so a crash before the save re-runs the work WITHOUT + * consumers having seen a premature run-end. + */ +export function transitionToFinishing(rec: TurnStateRecord): void { + transitionTo(rec, 'finishing'); +} + export function maxTurnsReached(rec: TurnStateRecord): boolean { return rec.max_turns !== undefined && rec.turn_count >= rec.max_turns; } @@ -48,12 +60,12 @@ export function maxTurnsReached(rec: TurnStateRecord): boolean { export type MaxTurnsEndPorts = TurnEndEmitter & { appendMessages(session_id: string, msgs: AgentMessage[]): Promise; emit(session_id: string, event: AgentEvent): Promise; - finishSession(rec: TurnStateRecord): Promise; }; /** * End the loop at the max_turns cap: persist + surface a synthetic assistant - * notice, emit turn_end (no-op when the step already emitted it), finish. + * notice, emit turn_end (no-op when the step already emitted it), then route to + * the `finishing` step to emit agent_end after the durable commit. */ export async function endTurnForMaxTurns( ports: MaxTurnsEndPorts, @@ -71,5 +83,5 @@ export async function endTurnForMaxTurns( body_streamed: false, }); await emitTurnEndOnce(ports, rec, msg); - await ports.finishSession(rec); + transitionToFinishing(rec); } diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index 70f23588..ce798a49 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -21,6 +21,11 @@ export type TurnState = | 'function_execute' | 'function_awaiting_approval' | 'steering_check' + // Terminal-pending: the turn's work is durably committed; this step only + // emits agent_end and advances to `stopped`. Splitting the terminal signal + // into its own replayable step keeps a crash from re-running the LLM stream + // after consumers already saw the run end (the agent_end "outbox"). + | 'finishing' | 'stopped' | 'failed'; @@ -106,6 +111,7 @@ const TURN_STATES = [ 'function_execute', 'function_awaiting_approval', 'steering_check', + 'finishing', 'stopped', 'failed', ] as const satisfies readonly TurnState[]; diff --git a/harness/src/turn-orchestrator/steering-check/run.ts b/harness/src/turn-orchestrator/steering-check/run.ts index 980aec90..25d97a0e 100644 --- a/harness/src/turn-orchestrator/steering-check/run.ts +++ b/harness/src/turn-orchestrator/steering-check/run.ts @@ -14,6 +14,7 @@ import { endTurnForMaxTurns, maxTurnsReached, resumeToAssistantStreaming, + transitionToFinishing, } from '../state-runtime/turn-end.js'; import type { SteeringCheckTurnRecord } from '../state.js'; import type { SteeringCheckPorts } from './ports.js'; @@ -61,7 +62,7 @@ export async function applySteeringCheckOutcome( return; case 'end_turn': await emitTurnEndOnce(ports, rec); - await ports.finishSession(rec); + transitionToFinishing(rec); return; } } diff --git a/harness/tests/turn-orchestrator/assistant-streaming.test.ts b/harness/tests/turn-orchestrator/assistant-streaming.test.ts index 7b6c1bec..668476a3 100644 --- a/harness/tests/turn-orchestrator/assistant-streaming.test.ts +++ b/harness/tests/turn-orchestrator/assistant-streaming.test.ts @@ -139,7 +139,7 @@ describe('finalizeAssistantTurn', () => { await finalizeAssistantTurn(ports, rec, asst, []); - expect(rec.state).toBe('stopped'); + expect(rec.state).toBe('finishing'); expect(rec.turn_end_emitted).toBe(true); expect(ports.persistAssistantIfNew).not.toHaveBeenCalled(); }); diff --git a/harness/tests/turn-orchestrator/assistant.test.ts b/harness/tests/turn-orchestrator/assistant.test.ts index f8693f10..d2e35516 100644 --- a/harness/tests/turn-orchestrator/assistant.test.ts +++ b/harness/tests/turn-orchestrator/assistant.test.ts @@ -118,7 +118,7 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); - expect(rec.state).toBe('stopped'); + expect(rec.state).toBe('finishing'); expect(rec.last_assistant?.stop_reason).toBe('error'); expect(rec.last_assistant?.error_message).toContain('create_channel failed'); }); @@ -165,7 +165,7 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); // No-function-call turns finish inline (formerly the steering_check hop). - expect(rec.state).toBe('stopped'); + expect(rec.state).toBe('finishing'); expect(rec.turn_end_emitted).toBe(true); expect(rec.last_assistant).toEqual(finalMsg); }); @@ -201,7 +201,7 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); - expect(rec.state).toBe('stopped'); + expect(rec.state).toBe('finishing'); expect(rec.last_assistant).toEqual(finalMsg); }); @@ -216,7 +216,7 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); - expect(rec.state).toBe('stopped'); + expect(rec.state).toBe('finishing'); expect(rec.turn_end_emitted).toBe(true); expect(appendSpy).not.toHaveBeenCalled(); }); diff --git a/harness/tests/turn-orchestrator/finishing.test.ts b/harness/tests/turn-orchestrator/finishing.test.ts new file mode 100644 index 00000000..45d72825 --- /dev/null +++ b/harness/tests/turn-orchestrator/finishing.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import { handleFinishing, register } from '../../src/turn-orchestrator/finishing.js'; +import { type TurnStateRecord, newRecord } from '../../src/turn-orchestrator/state.js'; + +type TriggerCall = { function_id: string; payload: unknown }; + +function recordingIii(): { iii: ISdk; calls: TriggerCall[] } { + const calls: TriggerCall[] = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { + calls.push({ function_id: req.function_id, payload: req.payload }); + return null; + }), + } as unknown as ISdk; + return { iii, calls }; +} + +describe('handleFinishing', () => { + it('emits agent_end and transitions to stopped', async () => { + const { iii, calls } = recordingIii(); + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'finishing' }; + + await handleFinishing(iii, rec); + + expect(rec.state).toBe('stopped'); + const agentEnd = calls.find( + (c) => + c.function_id === 'stream::set' && + (c.payload as { data?: { type?: string } }).data?.type === 'agent_end', + ); + expect(agentEnd).toBeDefined(); + }); + + it('is re-runnable: a replay re-emits agent_end and lands on stopped', async () => { + const { iii, calls } = recordingIii(); + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'finishing' }; + + await handleFinishing(iii, rec); + rec.state = 'finishing'; // simulate redelivery against the pre-save record + await handleFinishing(iii, rec); + + expect(rec.state).toBe('stopped'); + const agentEnds = calls.filter( + (c) => + c.function_id === 'stream::set' && + (c.payload as { data?: { type?: string } }).data?.type === 'agent_end', + ); + expect(agentEnds.length).toBe(2); + }); +}); + +describe('register', () => { + it('registers turn::finishing', () => { + const registered = new Map(); + const iii = { + registerFunction: (id: string, handler: unknown) => registered.set(id, handler), + } as unknown as ISdk; + + register(iii); + + expect(registered.has('turn::finishing')).toBe(true); + }); +}); diff --git a/harness/tests/turn-orchestrator/function-execute.test.ts b/harness/tests/turn-orchestrator/function-execute.test.ts index 5189e8e4..68a61bf8 100644 --- a/harness/tests/turn-orchestrator/function-execute.test.ts +++ b/harness/tests/turn-orchestrator/function-execute.test.ts @@ -182,8 +182,10 @@ describe('finalizeBatch', () => { }; await finalizeBatch(ports, rec); - expect(rec.state).toBe('stopped'); - expect(ports.finishSession).toHaveBeenCalledOnce(); + // Terminal routes hand off to the finishing step for agent_end; the step + // itself no longer emits it inline before its durable save. + expect(rec.state).toBe('finishing'); + expect(ports.finishSession).not.toHaveBeenCalled(); }); it('skips duplicate function_result ids on re-entry', async () => { @@ -264,8 +266,9 @@ describe('finalizeBatch', () => { await finalizeBatch(ports, rec); - expect(rec.state).toBe('stopped'); - expect(ports.finishSession).toHaveBeenCalledOnce(); + // max_turns persists the notice + turn_end, then defers agent_end to finishing. + expect(rec.state).toBe('finishing'); + expect(ports.finishSession).not.toHaveBeenCalled(); // Two appends: the batch's function_results, then the synthetic notice. expect(appendMessages).toHaveBeenCalledTimes(2); const appended = appendMessages.mock.calls[1]?.[1] as Array<{ content: unknown[] }>; diff --git a/harness/tests/turn-orchestrator/functions.test.ts b/harness/tests/turn-orchestrator/functions.test.ts index 0d091862..ad61d684 100644 --- a/harness/tests/turn-orchestrator/functions.test.ts +++ b/harness/tests/turn-orchestrator/functions.test.ts @@ -154,7 +154,8 @@ describe('handleExecute new flow', () => { await handleExecute(iii, rec); - expect(rec.state).toBe('stopped'); + // All-terminate routes to the finishing step (which emits agent_end + stops). + expect(rec.state).toBe('finishing'); }); it('does not re-emit function_execution_start for already-executed calls on re-entry', async () => { diff --git a/harness/tests/turn-orchestrator/steering-check-layer.test.ts b/harness/tests/turn-orchestrator/steering-check-layer.test.ts index a3fc4dc0..e2d0c286 100644 --- a/harness/tests/turn-orchestrator/steering-check-layer.test.ts +++ b/harness/tests/turn-orchestrator/steering-check-layer.test.ts @@ -97,7 +97,7 @@ describe('applySteeringCheckOutcome', () => { expect(emitTurnEnd).not.toHaveBeenCalled(); }); - it('end_turn: emits turn_end and finishes session', async () => { + it('end_turn: emits turn_end then routes to the finishing step', async () => { const emitTurnEnd = vi.fn(async () => {}); const finishSession = vi.fn(async (rec) => { rec.state = 'stopped'; @@ -107,10 +107,11 @@ describe('applySteeringCheckOutcome', () => { await applySteeringCheckOutcome(ports, rec, { kind: 'end_turn' }); - expect(rec.state).toBe('stopped'); + // agent_end is deferred to turn::finishing; the drain only emits turn_end here. + expect(rec.state).toBe('finishing'); expect(rec.turn_end_emitted).toBe(true); // 4th arg is the optional model_limit, undefined here (no model_meta on rec). expect(emitTurnEnd).toHaveBeenCalledWith('s1', expect.anything(), [], undefined); - expect(finishSession).toHaveBeenCalled(); + expect(finishSession).not.toHaveBeenCalled(); }); }); diff --git a/harness/tests/turn-orchestrator/steering.test.ts b/harness/tests/turn-orchestrator/steering.test.ts index a5b7603e..2a2431f9 100644 --- a/harness/tests/turn-orchestrator/steering.test.ts +++ b/harness/tests/turn-orchestrator/steering.test.ts @@ -49,7 +49,7 @@ describe('handleSteering', () => { expect(emitSpy).not.toHaveBeenCalled(); }); - it('end_turn: emits turn_end then finishes the session (agent_end + stopped)', async () => { + it('end_turn: emits turn_end then routes to the finishing step', async () => { const { iii } = makeIii(); const rec = steeringRec('s1'); const store = installMockTurnStore(); @@ -57,11 +57,15 @@ describe('handleSteering', () => { await handleSteering(iii, rec); - expect(rec.state).toBe('stopped'); + // turn_end is emitted now; agent_end is deferred to turn::finishing. + expect(rec.state).toBe('finishing'); expect(rec.turn_end_emitted).toBe(true); expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'turn_end' })); - expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'agent_end' })); - // agent_end is a signal: finishSession no longer reloads the transcript. + expect(emitSpy).not.toHaveBeenCalledWith( + iii, + 's1', + expect.objectContaining({ type: 'agent_end' }), + ); expect(store.loadMessages).not.toHaveBeenCalled(); }); @@ -78,7 +82,7 @@ describe('handleSteering', () => { await handleSteering(iii, rec); - expect(rec.state).toBe('stopped'); + expect(rec.state).toBe('finishing'); expect(rec.turn_end_emitted).toBe(true); expect(rec.last_assistant?.content[0]).toEqual( expect.objectContaining({ type: 'text', text: expect.stringContaining('max_turns') }), From 74d760d66874f6fc2832440d5beec71ddcf7096a Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Tue, 9 Jun 2026 20:10:18 -0300 Subject: [PATCH 06/11] =?UTF-8?q?fix(harness):=20harden=20the=20run::start?= =?UTF-8?q?=20guard=20=E2=80=94=20fail=20closed,=20stale=20takeover,=20ret?= =?UTF-8?q?ried=20wakes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three holes in the in-flight guard: - The guard read used the tolerant state wrapper, so a transient state::get failure looked like "no record" and the guard failed OPEN, clobbering a live turn. The check now uses a strict read (loadRecordStrict) that throws, so a read blip fails the kickoff with a retryable error instead. - A record wedged in a non-terminal state (lost wake, DLQ'd step) made the session busy forever — the old clobber was the accidental recovery path and nothing replaced it. run::start now deliberately takes over an in-flight record idle past STALE_TURN_TAKEOVER_MS (30m; updated_at_ms refreshes every transition). function_awaiting_approval is exempt: it parks on the user by design, and run::abort is its escape. - enqueueTurnStep swallowed enqueue failures with a warn, silently stranding the just-saved state. The wake is now retried with backoff; final failure logs at error level and the stale takeover is the recovery valve. Also: harness::trigger maps only an EXPLICIT started:false to 409 — a legacy run::start registration that returns {session_id} without the field is a success, not a conflict. And loadSessionMessages restores the 30s read budget the path always ran under; the 10s introduced with the shared reader could fail whole turns on long sessions. --- harness/src/harness/trigger.ts | 7 ++- harness/src/turn-orchestrator/run-start.ts | 36 ++++++++++--- .../state-runtime/context-view.ts | 5 +- .../turn-orchestrator/state-runtime/store.ts | 54 +++++++++++++++---- .../_helpers/mockTurnStore.ts | 1 + .../tests/turn-orchestrator/run-start.test.ts | 49 +++++++++++++++-- 6 files changed, 131 insertions(+), 21 deletions(-) diff --git a/harness/src/harness/trigger.ts b/harness/src/harness/trigger.ts index ccb4e511..766bbbb0 100644 --- a/harness/src/harness/trigger.ts +++ b/harness/src/harness/trigger.ts @@ -48,9 +48,12 @@ export function register(iii: ISdk): void { timeoutMs: BRIDGE_TIMEOUT_MS, }); // A turn was already in flight: surface a 409 so the client can wait/retry - // instead of treating the ignored kickoff as a started turn. + // instead of treating the ignored kickoff as a started turn. Only an + // EXPLICIT started:false means busy — a legacy run::start registration + // that predates the field returns {session_id} only, and its kickoffs are + // successes, not conflicts. return { - status_code: result.started ? 200 : 409, + status_code: result.started === false ? 409 : 200, headers: { 'content-type': 'application/json' }, body: result, }; diff --git a/harness/src/turn-orchestrator/run-start.ts b/harness/src/turn-orchestrator/run-start.ts index 136e10d3..53725681 100644 --- a/harness/src/turn-orchestrator/run-start.ts +++ b/harness/src/turn-orchestrator/run-start.ts @@ -16,6 +16,16 @@ import { RunStartPayloadSchema, type RunStartPayload, type RunStartResult } from import { createTurnStore } from './state-runtime/store.js'; import { isTurnInFlight, newRecord } from './state.js'; +/** + * Idle window after which an in-flight (non-parked) record is treated as + * abandoned and a new run::start may take it over. `updated_at_ms` bumps on + * every state transition, so a live turn refreshes it each round-trip; a + * single very long provider stream is the one legitimate gap, hence the + * generous window. `function_awaiting_approval` is exempt — it parks on the + * user indefinitely by design; `run::abort` is its escape, not staleness. + */ +export const STALE_TURN_TAKEOVER_MS = 30 * 60 * 1000; + export async function execute(iii: ISdk, payload: RunStartPayload): Promise { const store = createTurnStore(iii); const { session_id, messages, max_turns, message_id: _message_id, ...run } = payload; @@ -23,16 +33,30 @@ export async function execute(iii: ISdk, payload: RunStartPayload): Promise STALE_TURN_TAKEOVER_MS; + if (!takeOver) { + logger.warn('run::start ignored: session already has a turn in flight', { + session_id, + state: existing.state, + }); + return { session_id, started: false, reason: 'session_busy' }; + } + logger.error('run::start taking over a stale in-flight turn', { session_id, state: existing.state, + idle_ms: idleMs, }); - return { session_id, started: false, reason: 'session_busy' }; } // Single ensure for the whole run: this is the gateway that begins the turn diff --git a/harness/src/turn-orchestrator/state-runtime/context-view.ts b/harness/src/turn-orchestrator/state-runtime/context-view.ts index ffc763cd..b94655c5 100644 --- a/harness/src/turn-orchestrator/state-runtime/context-view.ts +++ b/harness/src/turn-orchestrator/state-runtime/context-view.ts @@ -59,7 +59,10 @@ export async function loadSessionMessages( const resp = await iii.trigger({ function_id: 'session-tree::messages', payload: { session_id }, - timeoutMs: 10_000, + // Matches the SDK default this read always ran under: long sessions + // (thousands of entries) can legitimately take >10s to list+sort, and a + // tighter budget here fails the whole turn step. + timeoutMs: 30_000, }); return resp?.messages ?? []; } diff --git a/harness/src/turn-orchestrator/state-runtime/store.ts b/harness/src/turn-orchestrator/state-runtime/store.ts index 85b305b1..6c4ed8dc 100644 --- a/harness/src/turn-orchestrator/state-runtime/store.ts +++ b/harness/src/turn-orchestrator/state-runtime/store.ts @@ -4,7 +4,7 @@ */ import { TriggerAction, type ISdk } from '../../runtime/iii.js'; -import { stateGet, stateSet } from '../../runtime/state.js'; +import { createState, stateGet, stateSet } from '../../runtime/state.js'; import { logger } from '../../runtime/otel.js'; import type { AgentMessage } from '../../types/agent-message.js'; import { RUN_REQUEST_SCOPE, TURN_STATE_SCOPE } from '../state.js'; @@ -33,20 +33,49 @@ export function shouldWakeStep(previousState: TurnState | null, newState: TurnSt return true; } +const WAKE_ENQUEUE_ATTEMPTS = 3; +const WAKE_ENQUEUE_BACKOFF_MS = 250; + +/** + * Enqueue the next FSM step. Retried because a lost wake strands the record in + * a non-terminal state with nothing scheduled to advance it (and run::start + * then reports the session busy until its stale-takeover window). On final + * failure we log at error level and rely on that takeover as the recovery + * valve — re-enqueueing from a later save is suppressed by shouldWakeStep. + */ async function enqueueTurnStep(iii: ISdk, session_id: string, state: TurnState): Promise { - try { - await iii.trigger({ - function_id: `turn::${state}`, - payload: { session_id }, - action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), - }); - } catch (err) { - logger.warn('wakeStep failed', { session_id, state, err: String(err) }); + for (let attempt = 1; attempt <= WAKE_ENQUEUE_ATTEMPTS; attempt++) { + try { + await iii.trigger({ + function_id: `turn::${state}`, + payload: { session_id }, + action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), + }); + return; + } catch (err) { + if (attempt === WAKE_ENQUEUE_ATTEMPTS) { + logger.error('wakeStep failed after retries; turn is stranded until stale takeover', { + session_id, + state, + attempts: attempt, + err: String(err), + }); + return; + } + await new Promise((resolve) => setTimeout(resolve, WAKE_ENQUEUE_BACKOFF_MS * attempt)); + } } } export type TurnStore = { loadRecord(session_id: string): Promise; + /** + * Like {@link loadRecord} but THROWS on a state-read failure instead of + * tolerantly returning null. For guards that must fail closed: run::start's + * in-flight check would clobber a live turn if a transient read error were + * indistinguishable from "no record". + */ + loadRecordStrict(session_id: string): Promise; saveRecord(rec: TurnStateRecord, previous?: TurnStateRecord | null): Promise; writeRecord(rec: TurnStateRecord): Promise; ensureSession(session_id: string): Promise; @@ -132,11 +161,18 @@ async function persistRecord( } export function createTurnStore(iii: ISdk): TurnStore { + const strictState = createState(iii, { tolerant: false }); + return { async loadRecord(session_id) { return parseTurnStateRecord(await scopedGet(iii, TURN_STATE_SCOPE, session_id)); }, + async loadRecordStrict(session_id) { + const raw = await strictState.get({ scope: TURN_STATE_SCOPE, key: session_id }); + return parseTurnStateRecord(raw); + }, + async writeRecord(rec) { await scopedSet(iii, TURN_STATE_SCOPE, rec.session_id, rec); }, diff --git a/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts b/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts index 2fd5e54b..0711ceb1 100644 --- a/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts +++ b/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts @@ -20,6 +20,7 @@ export type MockTurnStore = { export function mockTurnStore(overrides: Partial = {}): MockTurnStore { return { loadRecord: vi.fn(async () => null), + loadRecordStrict: vi.fn(async () => null), saveRecord: vi.fn(async () => {}), writeRecord: vi.fn(async () => {}), ensureSession: vi.fn(async () => {}), diff --git a/harness/tests/turn-orchestrator/run-start.test.ts b/harness/tests/turn-orchestrator/run-start.test.ts index 4d4fd6dd..43ac8305 100644 --- a/harness/tests/turn-orchestrator/run-start.test.ts +++ b/harness/tests/turn-orchestrator/run-start.test.ts @@ -1,6 +1,10 @@ import { describe, expect, it, vi } from 'vitest'; import { TriggerAction, type ISdk } from '../../src/runtime/iii.js'; -import { execute, register } from '../../src/turn-orchestrator/run-start.js'; +import { + STALE_TURN_TAKEOVER_MS, + execute, + register, +} from '../../src/turn-orchestrator/run-start.js'; import { RunStartPayloadSchema } from '../../src/turn-orchestrator/schemas.js'; import { TURN_STEP_QUEUE } from '../../src/turn-orchestrator/state-runtime/store.js'; @@ -197,14 +201,15 @@ describe('execute', () => { return { iii, calls }; } - const inFlightRecord = (state: string) => ({ + /** A record whose last transition just happened (fresh = genuinely running). */ + const inFlightRecord = (state: string, updated_at_ms = Date.now()) => ({ session_id: 'sess-1', state, turn_count: 1, function_results: [], turn_end_emitted: false, started_at_ms: 1, - updated_at_ms: 2, + updated_at_ms, }); it('refuses to clobber a turn already in flight and makes no writes', async () => { @@ -238,4 +243,42 @@ describe('execute', () => { expect(result).toEqual({ session_id: 'sess-1', started: true }); expect(calls.some((c) => c.function_id === 'session-tree::append_batch')).toBe(true); }); + + it('takes over an in-flight record idle past the stale window (wedge recovery)', async () => { + const stale = Date.now() - STALE_TURN_TAKEOVER_MS - 1_000; + const { iii, calls } = fakeIiiWithRecord(inFlightRecord('finishing', stale)); + + const result = await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); + + expect(result).toEqual({ session_id: 'sess-1', started: true }); + expect(calls.some((c) => c.function_id === 'session-tree::append_batch')).toBe(true); + }); + + it('never stale-takes-over a parked approval (user-paced; abort is its escape)', async () => { + const stale = Date.now() - STALE_TURN_TAKEOVER_MS - 1_000; + const { iii } = fakeIiiWithRecord(inFlightRecord('function_awaiting_approval', stale)); + + const result = await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); + + expect(result.started).toBe(false); + }); + + it('fails closed when the guard read fails (no clobber on a state blip)', async () => { + const iii = { + trigger: vi.fn(async (req: { function_id: string }) => { + if (req.function_id === 'state::get') throw new Error('state worker unavailable'); + return null; + }), + registerFunction: vi.fn(), + } as unknown as ISdk; + + await expect(execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload))).rejects.toThrow( + /state worker unavailable/, + ); + // Nothing was written before the guard read resolved. + const calls = (iii.trigger as ReturnType).mock.calls as Array< + [{ function_id: string }] + >; + expect(calls.some(([req]) => req.function_id === 'state::set')).toBe(false); + }); }); From 554a0a8bec353ab7171d1857fef340c88f140e17 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Tue, 9 Jun 2026 20:10:34 -0300 Subject: [PATCH 07/11] =?UTF-8?q?feat(harness):=20run::abort=20=E2=80=94?= =?UTF-8?q?=20user=20interrupt=20for=20an=20in-flight=20turn?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The run::start busy guard means an in-flight session rejects new messages, so users need a deliberate way to end a turn: a runaway agent loop, an approval prompt they walked away from, or a turn they changed their mind about. Previously the only "interrupt" was the accidental clobber the guard removed. run::abort loads the record (strict read — an abort must not silently no-op on a read blip), and for any in-flight non-finishing turn: surfaces a synthetic aborted assistant (message_complete), emits turn_end, clears parked approvals from the record, and routes to the finishing step for agent_end + stopped. No-op when nothing is running or the turn is already finishing. Best-effort against an actively-executing step (record writes are last-write-wins); the abort lands reliably in the gaps between steps, so a retry interrupts even a busy loop. Parked approval turns abort cleanly — no step holds them. Also wires the 'aborted' approval decision end-to-end: approval::resolve now accepts it on the wire (the orchestrator's read side always handled it but no writer existed), and an aborted call's synthetic result terminates the turn instead of resuming the loop for another round-trip. --- harness/src/approval-gate/schemas.ts | 5 +- .../function-awaiting-approval/run.ts | 5 +- harness/src/turn-orchestrator/register.ts | 2 + harness/src/turn-orchestrator/run-abort.ts | 71 ++++++++++ harness/src/turn-orchestrator/schemas.ts | 10 ++ harness/tests/approval-gate/schemas.test.ts | 9 ++ .../function-awaiting-approval.test.ts | 5 + .../tests/turn-orchestrator/run-abort.test.ts | 124 ++++++++++++++++++ 8 files changed, 229 insertions(+), 2 deletions(-) create mode 100644 harness/src/turn-orchestrator/run-abort.ts create mode 100644 harness/tests/turn-orchestrator/run-abort.test.ts diff --git a/harness/src/approval-gate/schemas.ts b/harness/src/approval-gate/schemas.ts index 5e5fabbb..2ffd0fed 100644 --- a/harness/src/approval-gate/schemas.ts +++ b/harness/src/approval-gate/schemas.ts @@ -43,7 +43,10 @@ export const DEFAULT_APPROVAL_SETTINGS: ApprovalSettings = { mode_set_at: 0, }; -const wireDecisionSchema = z.enum(['allow', 'deny']); +// 'aborted' is a user-initiated cancel of the parked call: the orchestrator +// converts it to a terminating synthetic result (see denialResultFromDecision) +// so the turn ends instead of resuming for another round-trip. +const wireDecisionSchema = z.enum(['allow', 'deny', 'aborted']); const deniedBySchema = z.enum(['permissions', 'user', 'gate_unavailable']); diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/run.ts b/harness/src/turn-orchestrator/function-awaiting-approval/run.ts index 9b15024e..f6cb3d49 100644 --- a/harness/src/turn-orchestrator/function-awaiting-approval/run.ts +++ b/harness/src/turn-orchestrator/function-awaiting-approval/run.ts @@ -25,7 +25,10 @@ export function denialResultFromDecision(decision: ApprovalDecision): FunctionRe decision: decision.decision, reason, }, - terminate: false, + // A denial resumes the loop so the model can react; an abort is the user + // saying "stop" — it ends the turn (when every result in the batch + // terminates; mixed batches still resume). + terminate: decision.decision === 'aborted', }; } diff --git a/harness/src/turn-orchestrator/register.ts b/harness/src/turn-orchestrator/register.ts index fc53ff4b..57b9e3e2 100644 --- a/harness/src/turn-orchestrator/register.ts +++ b/harness/src/turn-orchestrator/register.ts @@ -4,12 +4,14 @@ import { register as registerFinishing } from './finishing.js'; import { register as registerFunctionAwaitingApproval } from './function-awaiting-approval/process.js'; import { register as registerFunctionExecute } from './function-execute/process.js'; import { register as registerGetState } from './get-state.js'; +import { register as registerRunAbort } from './run-abort.js'; import { register as registerRunStart } from './run-start.js'; import { register as registerProvisioning } from './provisioning/process.js'; import { register as registerSteeringCheck } from './steering-check/process.js'; export async function register(iii: ISdk, _ctx: { configPath: string }): Promise { registerRunStart(iii); + registerRunAbort(iii); registerProvisioning(iii); registerAssistantStreaming(iii); registerFunctionExecute(iii); diff --git a/harness/src/turn-orchestrator/run-abort.ts b/harness/src/turn-orchestrator/run-abort.ts new file mode 100644 index 00000000..369c5df7 --- /dev/null +++ b/harness/src/turn-orchestrator/run-abort.ts @@ -0,0 +1,71 @@ +/** + * `run::abort` — user-initiated interrupt for a session's in-flight turn. + * + * The run::start busy guard makes an in-flight session reject new messages, so + * users need a deliberate way to end a turn: a runaway agent loop, an + * approval prompt they want to walk away from, or a turn they simply changed + * their mind about. Abort surfaces a synthetic `aborted` assistant, emits + * turn_end, clears any parked approvals from the record, and routes to the + * `finishing` step (agent_end + stopped). + * + * Best-effort against an actively-executing step: record writes are + * last-write-wins, so a step holding the record in memory can overwrite the + * abort when it saves — its NEXT wake then runs and the loop continues; the + * abort lands reliably in the gaps between steps (every round-trip has one), + * so a retry interrupts even a busy loop. Parked states + * (function_awaiting_approval) have no in-flight step and abort cleanly. + */ + +import type { ISdk } from '../runtime/iii.js'; +import { logger } from '../runtime/otel.js'; +import { emit } from './events.js'; +import { RunAbortPayloadSchema, type RunAbortPayload, type RunAbortResult } from './schemas.js'; +import { createTurnStatePorts } from './state-runtime/ports.js'; +import { createTurnStore } from './state-runtime/store.js'; +import { emitTurnEndOnce, transitionToFinishing } from './state-runtime/turn-end.js'; +import { isTurnInFlight, type TurnStateRecord } from './state.js'; +import { syntheticAssistant } from './synthetic-assistant.js'; + +export async function execute(iii: ISdk, payload: RunAbortPayload): Promise { + const { session_id } = payload; + const store = createTurnStore(iii); + // Strict read: aborting based on a tolerant null would silently no-op a + // session the user is actively trying to stop. + const rec = await store.loadRecordStrict(session_id); + + if (!rec || !isTurnInFlight(rec)) { + return { session_id, aborted: false, state: rec?.state ?? null }; + } + if (rec.state === 'finishing') { + // Already on the way out; the finishing wake will emit agent_end + stop. + return { session_id, aborted: false, state: 'finishing' }; + } + + logger.info('run::abort: ending in-flight turn', { session_id, state: rec.state }); + + const ports = createTurnStatePorts(iii, store); + const msg = syntheticAssistant({ stop_reason: 'aborted', text: 'run aborted by user' }); + rec.last_assistant = msg; + // Clear parked approvals so the view stops advertising prompts for a dead + // turn; a late approval::resolve decision-wake will stale-skip. + rec.awaiting_approval = []; + (rec as TurnStateRecord).work = undefined; + + await emit(iii, session_id, { type: 'message_complete', message: msg, body_streamed: false }); + await emitTurnEndOnce(ports, rec, msg); + transitionToFinishing(rec); + await store.saveRecord(rec); + + return { session_id, aborted: true, state: 'finishing' }; +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'run::abort', + async (payload: RunAbortPayload) => execute(iii, RunAbortPayloadSchema.parse(payload)), + { + description: + 'Abort the session\'s in-flight turn: emit an aborted assistant + turn_end, clear parked approvals, and route to finishing. Best-effort while a step is mid-execution (retry lands between steps); no-op when no turn is running.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/schemas.ts b/harness/src/turn-orchestrator/schemas.ts index 5a25cb8f..98d706bf 100644 --- a/harness/src/turn-orchestrator/schemas.ts +++ b/harness/src/turn-orchestrator/schemas.ts @@ -46,6 +46,16 @@ export type RunStartResult = { reason?: 'session_busy'; }; +// --- run::abort --- +export const RunAbortPayloadSchema = SessionIdPayloadSchema; +export type RunAbortPayload = z.infer; +/** `aborted` is false when no turn was running (or it was already finishing). */ +export type RunAbortResult = { + session_id: string; + aborted: boolean; + state: TurnState | null; +}; + // --- turn::{state} durable step --- export const TurnStepPayloadSchema = SessionIdPayloadSchema; export type TurnStepPayload = z.infer; diff --git a/harness/tests/approval-gate/schemas.test.ts b/harness/tests/approval-gate/schemas.test.ts index 1ba24949..3c21d667 100644 --- a/harness/tests/approval-gate/schemas.test.ts +++ b/harness/tests/approval-gate/schemas.test.ts @@ -26,6 +26,15 @@ describe('ResolvePayloadSchema — id normalization & validation', () => { expect(parsed.function_call_id).toBe('fc-1'); }); + it('accepts the aborted decision on the wire (user cancel of a parked call)', () => { + const parsed = ResolvePayloadSchema.parse({ + session_id: 's', + function_call_id: 'fc-1', + decision: 'aborted', + }); + expect(parsed.decision).toBe('aborted'); + }); + it.each([ ['function_call_id missing', { session_id: 's', decision: 'allow' }], ['tool_call_id only (legacy)', { session_id: 's', tool_call_id: 'legacy', decision: 'allow' }], diff --git a/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts index 82085ccd..e4876ed3 100644 --- a/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts +++ b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts @@ -94,6 +94,11 @@ describe('applyDecisionToPrepared', () => { result: denialResultFromDecision({ decision: 'deny', reason: 'policy' }), }); }); + + it('a denial resumes the loop; an abort terminates the turn', () => { + expect(denialResultFromDecision({ decision: 'deny', reason: null }).terminate).toBe(false); + expect(denialResultFromDecision({ decision: 'aborted', reason: null }).terminate).toBe(true); + }); }); describe('handleAwaitingApproval', () => { diff --git a/harness/tests/turn-orchestrator/run-abort.test.ts b/harness/tests/turn-orchestrator/run-abort.test.ts new file mode 100644 index 00000000..ddd4db92 --- /dev/null +++ b/harness/tests/turn-orchestrator/run-abort.test.ts @@ -0,0 +1,124 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import { execute, register } from '../../src/turn-orchestrator/run-abort.js'; +import { newRecord, type TurnStateRecord } from '../../src/turn-orchestrator/state.js'; + +type TriggerCall = { function_id: string; payload: unknown }; + +/** iii whose state::get on turn_state returns the given persisted record. */ +function fakeIiiWithRecord(record: unknown): { iii: ISdk; calls: TriggerCall[] } { + const calls: TriggerCall[] = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { + calls.push({ function_id: req.function_id, payload: req.payload }); + if (req.function_id === 'state::get') return record; + return null; + }), + registerFunction: vi.fn(), + } as unknown as ISdk; + return { iii, calls }; +} + +function parkedRecord(state: TurnStateRecord['state']): TurnStateRecord { + return { + ...newRecord('s1'), + state, + awaiting_approval: [ + { function_call_id: 'fc-1', function_id: 'shell::run', args: {} }, + ], + } as TurnStateRecord; +} + +function streamSets(calls: TriggerCall[]): Array<{ type?: string }> { + return calls + .filter((c) => c.function_id === 'stream::set') + .map((c) => (c.payload as { data?: { type?: string } }).data ?? {}); +} + +describe('run::abort execute', () => { + it('aborts a parked approval turn: aborted message, turn_end, cleared approvals, finishing', async () => { + const { iii, calls } = fakeIiiWithRecord(parkedRecord('function_awaiting_approval')); + + const result = await execute(iii, { session_id: 's1' }); + + expect(result).toEqual({ session_id: 's1', aborted: true, state: 'finishing' }); + + const events = streamSets(calls).map((e) => e.type); + expect(events).toContain('message_complete'); + expect(events).toContain('turn_end'); + // agent_end belongs to the finishing step, not the abort itself. + expect(events).not.toContain('agent_end'); + + // The record persisted to finishing with parked approvals cleared. + const turnStateSet = calls.find( + (c) => + c.function_id === 'state::set' && + (c.payload as { scope?: string }).scope === 'turn_state', + ); + const saved = (turnStateSet?.payload as { value: TurnStateRecord }).value; + expect(saved.state).toBe('finishing'); + expect(saved.awaiting_approval).toEqual([]); + expect(saved.last_assistant?.stop_reason).toBe('aborted'); + }); + + it('aborts a streaming-state record (best-effort interrupt between steps)', async () => { + const rec = { ...newRecord('s1'), state: 'assistant_streaming' as const }; + const { iii } = fakeIiiWithRecord(rec); + + const result = await execute(iii, { session_id: 's1' }); + + expect(result.aborted).toBe(true); + expect(result.state).toBe('finishing'); + }); + + it('no-ops when no turn is running', async () => { + const { iii, calls } = fakeIiiWithRecord({ ...newRecord('s1'), state: 'stopped' }); + + const result = await execute(iii, { session_id: 's1' }); + + expect(result).toEqual({ session_id: 's1', aborted: false, state: 'stopped' }); + expect(calls.some((c) => c.function_id === 'state::set')).toBe(false); + }); + + it('no-ops when the session has no record', async () => { + const { iii } = fakeIiiWithRecord(null); + + const result = await execute(iii, { session_id: 's1' }); + + expect(result).toEqual({ session_id: 's1', aborted: false, state: null }); + }); + + it('no-ops on a turn already finishing (it will stop on its own)', async () => { + const { iii, calls } = fakeIiiWithRecord({ ...newRecord('s1'), state: 'finishing' }); + + const result = await execute(iii, { session_id: 's1' }); + + expect(result).toEqual({ session_id: 's1', aborted: false, state: 'finishing' }); + expect(calls.some((c) => c.function_id === 'state::set')).toBe(false); + }); + + it('fails closed when the record read fails', async () => { + const iii = { + trigger: vi.fn(async (req: { function_id: string }) => { + if (req.function_id === 'state::get') throw new Error('state unavailable'); + return null; + }), + registerFunction: vi.fn(), + } as unknown as ISdk; + + await expect(execute(iii, { session_id: 's1' })).rejects.toThrow(/state unavailable/); + }); +}); + +describe('register', () => { + it('registers run::abort', () => { + const registered = new Map(); + const iii = { + registerFunction: (id: string, handler: unknown) => registered.set(id, handler), + } as unknown as ISdk; + + register(iii); + + expect(registered.has('run::abort')).toBe(true); + }); +}); From 56e38ce566b8be22952ec2210fedc3e55f5ce72d Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Tue, 9 Jun 2026 20:10:53 -0300 Subject: [PATCH 08/11] fix(console): surface session-busy kickoffs and wire Stop to run::abort MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chat backend discarded the harness::trigger result, so a refused run::start (started:false — the session already has a turn in flight) was invisible: the user's bubble rendered, the message was never appended, and the stream generator waited on session events that might never arrive (e.g. a turn parked on an approval) — a silent drop with a stuck spinner. realStream now reads the kickoff envelope: on started:false it yields a stop-reason notice telling the user the message was not sent and to stop the running turn or answer its pending approval, then ends the stream cleanly. The Stop button previously only aborted the client-side generator — the server turn kept running and kept the session busy. It now also calls the new run::abort (best-effort server cancel) so stopping actually frees the session for the next message. --- console/web/src/components/chat/ChatView.tsx | 6 +- console/web/src/lib/backend/real.ts | 66 +++++++++++++++----- console/web/src/lib/backend/types.ts | 6 ++ 3 files changed, 61 insertions(+), 17 deletions(-) diff --git a/console/web/src/components/chat/ChatView.tsx b/console/web/src/components/chat/ChatView.tsx index 16ffcbc0..606325a7 100644 --- a/console/web/src/components/chat/ChatView.tsx +++ b/console/web/src/components/chat/ChatView.tsx @@ -477,7 +477,11 @@ export function ChatView({ const handleStop = useCallback(() => { abortRef.current?.abort() - }, []) + // Server-side cancel: without it the turn keeps running and run::start + // rejects the next message as busy. Best-effort (a retry lands between + // FSM steps); errors are non-fatal — the client stream already stopped. + void backend.abortRun?.(sessionId).catch(() => {}) + }, [backend, sessionId]) // Covers the gap between submit / fcall-end and the next streamed token, // where the assistant/thought shimmer hasn't yet rendered. diff --git a/console/web/src/lib/backend/real.ts b/console/web/src/lib/backend/real.ts index 719b0cb7..e2c8af8d 100644 --- a/console/web/src/lib/backend/real.ts +++ b/console/web/src/lib/backend/real.ts @@ -97,24 +97,37 @@ async function* realStream( const { provider, model: modelId } = resolveRunParams(model) let kickoffError: Error | null = null + let sessionBusy = false client - .call('harness::trigger', { - session_id: sessionId, - message_id: messageId, - payload: { + .call<{ status_code?: number; body?: { started?: boolean } }>( + 'harness::trigger', + { session_id: sessionId, message_id: messageId, - provider, - model: modelId, - mode, - messages: [ - { - role: 'user', - content: [{ type: 'text', text: prompt }], - timestamp: Date.now(), - }, - ], + payload: { + session_id: sessionId, + message_id: messageId, + provider, + model: modelId, + mode, + messages: [ + { + role: 'user', + content: [{ type: 'text', text: prompt }], + timestamp: Date.now(), + }, + ], + }, }, + ) + .then((res) => { + // run::start refused: a turn is still in flight for this session and + // the message was NOT appended. Surface it instead of waiting on + // events that may never come (e.g. a turn parked on an approval). + if (res?.body?.started === false) { + sessionBusy = true + wake() + } }) .catch((err) => { kickoffError = err instanceof Error ? err : new Error(String(err)) @@ -126,6 +139,16 @@ async function* realStream( while (true) { if (signal?.aborted) return + if (sessionBusy) { + yield { + kind: 'stop-reason', + reason: 'error', + message: + 'A turn is still running in this session — your message was not sent. Stop the current turn (or answer its pending approval), then resend.', + } + yield { kind: 'assistant-end' } + return + } if (kickoffError) { const err = kickoffError as Error yield { @@ -135,13 +158,18 @@ async function* realStream( yield { kind: 'assistant-end' } return } - while (queue.length === 0 && !kickoffError && !signal?.aborted) { + while ( + queue.length === 0 && + !kickoffError && + !sessionBusy && + !signal?.aborted + ) { await new Promise((resolve) => { resolveNext = resolve }) } if (signal?.aborted) return - if (kickoffError) continue + if (kickoffError || sessionBusy) continue const event = queue.shift() if (!event) continue const streamEvents = translate(event, sessionId) @@ -169,6 +197,11 @@ async function realResolveApproval( }) } +async function realAbortRun(sessionId: string): Promise { + const client = await getIiiClient() + await client.call('run::abort', { session_id: sessionId }) +} + async function realCompactSession( sessionId: string, model: ModelId, @@ -249,5 +282,6 @@ export const realBackend: ChatBackend = { id: 'real', stream: realStream, resolveApproval: realResolveApproval, + abortRun: realAbortRun, compactSession: realCompactSession, } diff --git a/console/web/src/lib/backend/types.ts b/console/web/src/lib/backend/types.ts index 03f32df7..e7248de5 100644 --- a/console/web/src/lib/backend/types.ts +++ b/console/web/src/lib/backend/types.ts @@ -114,6 +114,12 @@ export interface ChatBackend { functionCallId: string, decision: 'allow' | 'deny', ): Promise + /** + * Server-side cancel of the session's in-flight turn (`run::abort`). + * The client-side AbortSignal only stops rendering; without this the + * server keeps running and `run::start` rejects new messages as busy. + */ + abortRun?(sessionId: string): Promise /** * Powers `/compact`. Compacts the session-tree (the single source of * truth) directly. `contextWindow` skips the server's `models::get` From f8673fb4281c17388e3d402882c112b6014f5acf Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Tue, 9 Jun 2026 20:27:40 -0300 Subject: [PATCH 09/11] fix(console): correlate fcall-end by function_call_id so parallel approvals don't wedge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A turn with parallel tool calls that each need approval got stuck: the user clicked approve and the prompt hung on "approving…" forever, while an already-run call was mislabeled as the pending one. Root cause was the fcall reducer matching function_execution_end to "the most recently started fcall" (a single cursor) instead of by function_call_id. Approval-resolved calls execute with skipStart (no function_execution_start), so the resolved call's end landed on whichever pending card was created last — clearing the WRONG card and leaving the real one stuck pending. The actually- awaiting call then had no live prompt, so the turn never resumed. (The backend resume itself is correct: writing the missing decision via approval::resolve advances the turn immediately.) - translate now threads function_call_id onto fcall-end (it was already on the wire event, just dropped), and emits a new fcall-approval-cleared event for calls that leave awaiting_approval so their prompt clears even when no execution follows (aborted / resolved out-of-band). - ChatView matches fcall-end to its card by function_call_id (cursor fallback only when no id is carried, e.g. the mock backend) and handles fcall-approval-cleared. This is the bug behind the stuck approval; the run::start guard added earlier made it unrecoverable-by-new-message, and the Stop→run::abort wiring is the safety net if a session ever does wedge. --- console/web/src/components/chat/ChatView.tsx | 28 ++++++++++++++--- console/web/src/lib/backend/translate.test.ts | 30 +++++++++++++++++-- console/web/src/lib/backend/translate.ts | 14 +++++++-- console/web/src/lib/backend/types.ts | 23 +++++++++++++- .../web/src/stories/playground/EventLog.tsx | 1 + 5 files changed, 87 insertions(+), 9 deletions(-) diff --git a/console/web/src/components/chat/ChatView.tsx b/console/web/src/components/chat/ChatView.tsx index 606325a7..548786b4 100644 --- a/console/web/src/components/chat/ChatView.tsx +++ b/console/web/src/components/chat/ChatView.tsx @@ -340,15 +340,35 @@ export function ChatView({ break } case 'fcall-end': { - if (!fcallId) break - onPatchMessage(conversationId, fcallId, { + // Correlate by function_call_id: parallel and approval-resolved + // calls finish out of order, so the cursor (last-started) is the + // wrong card. Fall back to the cursor only when no id is carried + // (mock backend / legacy events). + const targetId: string | null = event.functionCallId + ? ([...fcallMap.entries()].find( + ([, fcid]) => fcid === event.functionCallId, + )?.[0] ?? null) + : fcallId + if (!targetId) break + onPatchMessage(conversationId, targetId, { output: event.output, durationMs: event.durationMs, running: false, pendingApproval: false, }) - fcallMap.delete(fcallId) - fcallId = null + fcallMap.delete(targetId) + if (targetId === fcallId) fcallId = null + break + } + case 'fcall-approval-cleared': { + const clearedId = [...fcallMap.entries()].find( + ([, fcid]) => fcid === event.functionCallId, + )?.[0] + if (clearedId) { + onPatchMessage(conversationId, clearedId, { + pendingApproval: false, + }) + } break } case 'assistant-token': { diff --git a/console/web/src/lib/backend/translate.test.ts b/console/web/src/lib/backend/translate.test.ts index 82f2d8c4..3e4bb37f 100644 --- a/console/web/src/lib/backend/translate.test.ts +++ b/console/web/src/lib/backend/translate.test.ts @@ -231,7 +231,7 @@ describe('createAgentEventTranslator — turn_state_changed', () => { ).toEqual([]) }) - it('emits nothing when state leaves function_awaiting_approval (the orchestrator emits the matching function_execution_end)', () => { + it('clears the pending prompt when a call leaves function_awaiting_approval', () => { const { translate } = createAgentEventTranslator() translate( { @@ -246,6 +246,9 @@ describe('createAgentEventTranslator — turn_state_changed', () => { }, 'sess-a', ) + // The resolved call drops out of awaiting_approval — its prompt must be + // cleared explicitly. Relying on the matching function_execution_end is + // unsafe: parallel/approval-resolved ends arrive uncorrelated to the card. expect( translate( { @@ -265,7 +268,30 @@ describe('createAgentEventTranslator — turn_state_changed', () => { }, 'sess-a', ), - ).toEqual([]) + ).toEqual([{ kind: 'fcall-approval-cleared', functionCallId: 'fc-1' }]) + }) + + it('threads function_call_id onto fcall-end so the consumer can correlate', () => { + const { translate } = createAgentEventTranslator() + const out = translate( + { + type: 'function_execution_end', + function_call_id: 'fc-7', + function_id: 'shell::fs::ls', + result: { content: [], details: {} }, + is_error: false, + duration_ms: 30, + }, + 'sess-a', + ) + expect(out).toEqual([ + { + kind: 'fcall-end', + output: { content: [], details: {} }, + durationMs: 30, + functionCallId: 'fc-7', + }, + ]) }) it('partitions mirrors by sessionId so two chats do not interfere', () => { diff --git a/console/web/src/lib/backend/translate.ts b/console/web/src/lib/backend/translate.ts index 54130ee9..2e02b9d9 100644 --- a/console/web/src/lib/backend/translate.ts +++ b/console/web/src/lib/backend/translate.ts @@ -46,8 +46,8 @@ export function createAgentEventTranslator(): { const prev = mirrors.get(sessionId) ?? [] const next = pendingApprovalsFromTurnState(event.new_value) mirrors.set(sessionId, next) - const { added } = diffPending(prev, next) - return added.map((entry) => ({ + const { added, removed } = diffPending(prev, next) + const out: StreamEvent[] = added.map((entry) => ({ kind: 'fcall-start' as const, functionId: entry.function_id, input: entry.args, @@ -55,6 +55,15 @@ export function createAgentEventTranslator(): { functionCallId: entry.function_call_id, sessionId, })) + // A call that left awaiting_approval is no longer pending: clear its prompt. + // (A resolved+executed call also emits fcall-end; double-clearing is fine.) + for (const entry of removed) { + out.push({ + kind: 'fcall-approval-cleared', + functionCallId: entry.function_call_id, + }) + } + return out } function translate(event: AgentEvent, sessionId?: string): StreamEvent[] { @@ -90,6 +99,7 @@ export function createAgentEventTranslator(): { ? wrapErrorOutput(event.result) : event.result, durationMs: event.duration_ms, + functionCallId: event.function_call_id, }, ] diff --git a/console/web/src/lib/backend/types.ts b/console/web/src/lib/backend/types.ts index e7248de5..634f1a56 100644 --- a/console/web/src/lib/backend/types.ts +++ b/console/web/src/lib/backend/types.ts @@ -27,7 +27,28 @@ export type StreamEvent = /** iii session_id owning this call — needed to resolve approval. */ sessionId?: string } - | { kind: 'fcall-end'; output: unknown; durationMs: number } + | { + kind: 'fcall-end' + output: unknown + durationMs: number + /** + * iii function_call_id of the call that ended. Parallel tool calls (and + * approval-resolved calls, which emit no fcall-start) finish out of + * order, so the consumer MUST match the end to its card by this id, not + * by "the most recently started call". + */ + functionCallId?: string + } + | { + /** + * A call left the pending-approval set without executing (aborted, or + * resolved out-of-band): clear its card's approval prompt so it doesn't + * hang on "approving…". A resolved-and-executed call also emits + * `fcall-end`; both patching to non-pending is idempotent. + */ + kind: 'fcall-approval-cleared' + functionCallId: string + } | { kind: 'assistant-token'; token: string } | { kind: 'assistant-end' } | { diff --git a/console/web/src/stories/playground/EventLog.tsx b/console/web/src/stories/playground/EventLog.tsx index b844fdbd..699d70a3 100644 --- a/console/web/src/stories/playground/EventLog.tsx +++ b/console/web/src/stories/playground/EventLog.tsx @@ -175,6 +175,7 @@ function toneFor(kind: StreamEvent['kind']): string { return 'text-ink-ghost' case 'fcall-start': case 'fcall-end': + case 'fcall-approval-cleared': return 'text-accent' case 'assistant-token': return 'text-ink-ghost' From c721d6694f7d93cae9a5448056b1989e30e4a612 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Tue, 9 Jun 2026 21:18:57 -0300 Subject: [PATCH 10/11] chore: strip explanatory code comments from harness and console Remove standalone and inline // comments across the turn-orchestrator, session-tree, provider, compaction, and console chat paths. Lint directives (biome-ignore) are kept. --- console/web/src/components/chat/ChatView.tsx | 26 ------------------- console/web/src/lib/backend/real.ts | 15 ----------- console/web/src/lib/backend/translate.test.ts | 3 --- console/web/src/lib/backend/translate.ts | 2 -- harness/src/approval-gate/schemas.ts | 9 ------- .../src/context-compaction/handler-async.ts | 3 --- .../src/context-compaction/model-resolver.ts | 3 --- harness/src/harness/trigger.ts | 5 ---- harness/src/provider-anthropic/auth.ts | 5 ---- harness/src/provider-anthropic/stream-fn.ts | 4 --- harness/src/provider-anthropic/stream.ts | 4 --- harness/src/session/tree/operations.ts | 11 +------- harness/src/session/tree/register.ts | 3 --- harness/src/session/tree/store.ts | 4 --- .../assistant-streaming/ports.ts | 4 --- .../assistant-streaming/run.ts | 9 ------- harness/src/turn-orchestrator/finishing.ts | 1 - .../function-awaiting-approval/run.ts | 3 --- .../function-execute/ports.ts | 3 --- .../turn-orchestrator/function-execute/run.ts | 3 --- harness/src/turn-orchestrator/preflight.ts | 5 ---- .../src/turn-orchestrator/provider-router.ts | 5 ---- .../turn-orchestrator/provisioning/process.ts | 5 ---- harness/src/turn-orchestrator/run-abort.ts | 7 +---- harness/src/turn-orchestrator/run-start.ts | 13 ---------- harness/src/turn-orchestrator/schemas.ts | 7 ----- .../state-runtime/context-view.ts | 3 --- .../turn-orchestrator/state-runtime/ports.ts | 4 --- .../turn-orchestrator/state-runtime/store.ts | 3 --- .../state-runtime/turn-end.ts | 1 - harness/src/turn-orchestrator/state.ts | 4 --- .../context-compaction/handler-async.test.ts | 1 - .../integration/mode-approval.e2e.test.ts | 4 --- .../integration/parallel-approval.e2e.test.ts | 23 ++-------------- harness/tests/provider-anthropic/auth.test.ts | 11 +------- harness/tests/session/operations.test.ts | 6 ----- .../_helpers/mockTurnStore.ts | 1 - .../tests/turn-orchestrator/assistant.test.ts | 7 ----- .../tests/turn-orchestrator/finishing.test.ts | 2 +- .../function-awaiting-approval.test.ts | 1 - .../function-execute.test.ts | 7 ----- .../tests/turn-orchestrator/functions.test.ts | 4 --- .../tests/turn-orchestrator/preflight.test.ts | 2 -- .../turn-orchestrator/provider-router.test.ts | 8 ------ .../provisioning-layer.test.ts | 2 -- .../tests/turn-orchestrator/run-abort.test.ts | 9 ++----- .../tests/turn-orchestrator/run-start.test.ts | 8 ------ harness/tests/turn-orchestrator/store.test.ts | 2 -- harness/tests/types/provider.test.ts | 2 -- 49 files changed, 8 insertions(+), 269 deletions(-) diff --git a/console/web/src/components/chat/ChatView.tsx b/console/web/src/components/chat/ChatView.tsx index 548786b4..937065e1 100644 --- a/console/web/src/components/chat/ChatView.tsx +++ b/console/web/src/components/chat/ChatView.tsx @@ -109,7 +109,6 @@ export function ChatView({ const harnessBlockedRef = useRef(harnessBlocked) harnessBlockedRef.current = harnessBlocked - // Matches iii.session.id on every span so the traces UI can group by it. const sessionId = useMemo( () => makeSessionId(conversation.id), [conversation.id], @@ -151,10 +150,6 @@ export function ChatView({ const handleAlwaysAllow = useMemo(() => { const resolveFn = backend.resolveApproval if (!resolveFn) return undefined - // "Approve always" is a per-session grant honored in every mode, so the - // button shows on every prompt (full mode never produces prompts, so - // it's moot there). Approves this call and stops asking for the same - // function for the rest of the conversation. return async (sId: string, functionCallId: string, functionId: string) => { await approvalSettings.approveAlways(functionId) await resolveFn(sId, functionCallId, 'allow') @@ -233,7 +228,6 @@ export function ChatView({ } onCompactConversation(conversationId, marker) } else { - // empty | busy | overflow | error: nothing rewritten server-side. onPatchMessage(conversationId, pendingId, { content: formatCompactResult(result), tone: compactResultTone(result), @@ -340,10 +334,6 @@ export function ChatView({ break } case 'fcall-end': { - // Correlate by function_call_id: parallel and approval-resolved - // calls finish out of order, so the cursor (last-started) is the - // wrong card. Fall back to the cursor only when no id is carried - // (mock backend / legacy events). const targetId: string | null = event.functionCallId ? ([...fcallMap.entries()].find( ([, fcid]) => fcid === event.functionCallId, @@ -404,12 +394,6 @@ export function ChatView({ break } case 'compaction': { - // Server-side context-compaction finished rewriting the - // session's flat-state. Append a marker so: - // 1. The user sees that compaction happened. - // 2. estimateConversationTokens stops counting pre-marker - // messages → the CTX bar drops to the real value. - // We append (not replace) so the transcript stays scrollable. const compactionContent = event.mode === 'sync' ? `compacted ${event.tokensBefore.toLocaleString()} tokens before continuing` @@ -429,10 +413,6 @@ export function ChatView({ break } case 'stop-reason': { - // The assistant turn ended abnormally — show the user why - // instead of leaving the response looking like it just - // ran out of words. Pre-fix this event didn't exist and - // the same condition produced a silently truncated reply. const noticeContent = formatStopReason( event.reason, event.message, @@ -463,7 +443,6 @@ export function ChatView({ } } } catch (err) { - // Backends signal semantic failures via fcall-end; thrown = abort or bug. if (!isAbortError(err)) { console.warn('[chat] stream errored', err) } @@ -497,14 +476,9 @@ export function ChatView({ const handleStop = useCallback(() => { abortRef.current?.abort() - // Server-side cancel: without it the turn keeps running and run::start - // rejects the next message as busy. Best-effort (a retry lands between - // FSM steps); errors are non-fatal — the client stream already stopped. void backend.abortRun?.(sessionId).catch(() => {}) }, [backend, sessionId]) - // Covers the gap between submit / fcall-end and the next streamed token, - // where the assistant/thought shimmer hasn't yet rendered. const isThinking = isStreaming && (() => { diff --git a/console/web/src/lib/backend/real.ts b/console/web/src/lib/backend/real.ts index e2c8af8d..026942d4 100644 --- a/console/web/src/lib/backend/real.ts +++ b/console/web/src/lib/backend/real.ts @@ -54,12 +54,6 @@ async function* realStream( r?.() } - // Subscribe directly to this session's `agent::events` stream via a scoped - // engine stream trigger (`group_id = sessionId`), replacing the harness - // fanout hop (`ui::subscribe` → per-browser `ui::session::event` push). - // Registered before the `harness::trigger` kickoff below — both travel the - // same ordered WS connection, so the trigger is in place before the turn's - // first event is written. const stopSubscription = startSessionEventsSubscription( client, sessionId, @@ -121,9 +115,6 @@ async function* realStream( }, ) .then((res) => { - // run::start refused: a turn is still in flight for this session and - // the message was NOT appended. Surface it instead of waiting on - // events that may never come (e.g. a turn parked on an approval). if (res?.body?.started === false) { sessionBusy = true wake() @@ -210,9 +201,6 @@ async function realCompactSession( const { provider, model: modelId } = resolveRunParams(model) const client = await getIiiClient() try { - // Passing limit.context lets the server skip the models::get lookup. - // We don't know max_output here; 4096 is the same conservative default - // the server falls back to when models::get returns nothing. const DEFAULT_MAX_OUTPUT = 4_096 const modelPayload: { id: string @@ -242,10 +230,7 @@ async function realCompactSession( if (resp?.status === 'ok') { const tokensBefore = typeof resp.tokens_before === 'number' ? resp.tokens_before : 0 - // Surface zero-token "ok" as semantic empty. if (tokensBefore === 0) return { status: 'empty' } - // Fallback placeholder for engines that predate summary_text on the - // wire; without it the marker has no to ship. const summaryText = typeof resp.summary_text === 'string' && resp.summary_text.length > 0 ? resp.summary_text diff --git a/console/web/src/lib/backend/translate.test.ts b/console/web/src/lib/backend/translate.test.ts index 3e4bb37f..6bd0bb58 100644 --- a/console/web/src/lib/backend/translate.test.ts +++ b/console/web/src/lib/backend/translate.test.ts @@ -246,9 +246,6 @@ describe('createAgentEventTranslator — turn_state_changed', () => { }, 'sess-a', ) - // The resolved call drops out of awaiting_approval — its prompt must be - // cleared explicitly. Relying on the matching function_execution_end is - // unsafe: parallel/approval-resolved ends arrive uncorrelated to the card. expect( translate( { diff --git a/console/web/src/lib/backend/translate.ts b/console/web/src/lib/backend/translate.ts index 2e02b9d9..0e8b6991 100644 --- a/console/web/src/lib/backend/translate.ts +++ b/console/web/src/lib/backend/translate.ts @@ -55,8 +55,6 @@ export function createAgentEventTranslator(): { functionCallId: entry.function_call_id, sessionId, })) - // A call that left awaiting_approval is no longer pending: clear its prompt. - // (A resolved+executed call also emits fcall-end; double-clearing is fine.) for (const entry of removed) { out.push({ kind: 'fcall-approval-cleared', diff --git a/harness/src/approval-gate/schemas.ts b/harness/src/approval-gate/schemas.ts index 2ffd0fed..2e1d5c5a 100644 --- a/harness/src/approval-gate/schemas.ts +++ b/harness/src/approval-gate/schemas.ts @@ -24,13 +24,7 @@ export type AlwaysAllowEntry = z.infer; export const ApprovalSettingsSchema = z.object({ mode: PermissionModeSchema, - // Auto-mode allowlist: curated on the Configuration screen, consulted - // ONLY in auto mode. always_allow: z.array(allowEntrySchema), - // Per-session "approve always" grants made from an approval prompt. - // Consulted in EVERY mode (it's a remembered human decision, not an - // auto-policy). `.default([])` keeps records written before this field - // existed parseable. approved_always: z.array(allowEntrySchema).default([]), mode_set_at: z.number().int().nonnegative(), }); @@ -43,9 +37,6 @@ export const DEFAULT_APPROVAL_SETTINGS: ApprovalSettings = { mode_set_at: 0, }; -// 'aborted' is a user-initiated cancel of the parked call: the orchestrator -// converts it to a terminating synthetic result (see denialResultFromDecision) -// so the turn ends instead of resuming for another round-trip. const wireDecisionSchema = z.enum(['allow', 'deny', 'aborted']); const deniedBySchema = z.enum(['permissions', 'user', 'gate_unavailable']); diff --git a/harness/src/context-compaction/handler-async.ts b/harness/src/context-compaction/handler-async.ts index 2cd3debe..53e0f818 100644 --- a/harness/src/context-compaction/handler-async.ts +++ b/harness/src/context-compaction/handler-async.ts @@ -78,9 +78,6 @@ export async function resolveModelFromEvent( let providerID = ev.message.provider || null; let modelID = ev.message.model || null; - // Closed system: the orchestrator threads the turn's limit onto turn_end, so - // when ids and the limit are present we trust them and skip the models::get - // round-trip. Empty ids (synthetic/error turns) fall through to the scan. if (providerID && modelID && ev.model_limit) { return { providerID, modelID, modelLimit: ev.model_limit }; } diff --git a/harness/src/context-compaction/model-resolver.ts b/harness/src/context-compaction/model-resolver.ts index 999f85dd..4b03af32 100644 --- a/harness/src/context-compaction/model-resolver.ts +++ b/harness/src/context-compaction/model-resolver.ts @@ -98,9 +98,6 @@ export async function resolveModelFromSession( } } -// Fallback when no assistant message carries provider/model yet (first-turn -// sessions, error-only sessions). The orchestrator writes run_request at -// `run_request` scope during run::start. export async function resolveModelFromRunRequest( iii: ISdk, session_id: string, diff --git a/harness/src/harness/trigger.ts b/harness/src/harness/trigger.ts index 766bbbb0..0c60e6c8 100644 --- a/harness/src/harness/trigger.ts +++ b/harness/src/harness/trigger.ts @@ -47,11 +47,6 @@ export function register(iii: ISdk): void { payload: body.payload, timeoutMs: BRIDGE_TIMEOUT_MS, }); - // A turn was already in flight: surface a 409 so the client can wait/retry - // instead of treating the ignored kickoff as a started turn. Only an - // EXPLICIT started:false means busy — a legacy run::start registration - // that predates the field returns {session_id} only, and its kickoffs are - // successes, not conflicts. return { status_code: result.started === false ? 409 : 200, headers: { 'content-type': 'application/json' }, diff --git a/harness/src/provider-anthropic/auth.ts b/harness/src/provider-anthropic/auth.ts index f3bfb24d..f57d94a7 100644 --- a/harness/src/provider-anthropic/auth.ts +++ b/harness/src/provider-anthropic/auth.ts @@ -45,12 +45,7 @@ export async function buildConfig( iii: ISdk, worker: WorkerConfig, model: string, - // The turn's catalog entry, threaded by the orchestrator. It is always for - // this turn's model (the orchestrator routes to this provider for that same - // model), so we use it directly; absent only when the catalog had no entry, - // where we fetch. preResolved?: Model, - // The turn's stable id, used to dedupe the per-stream credential resolution. resolutionKey?: number, ): Promise { const resolved = await resolveProviderForTurn(iii, resolutionKey); diff --git a/harness/src/provider-anthropic/stream-fn.ts b/harness/src/provider-anthropic/stream-fn.ts index 50e745f5..78137092 100644 --- a/harness/src/provider-anthropic/stream-fn.ts +++ b/harness/src/provider-anthropic/stream-fn.ts @@ -29,10 +29,6 @@ export function register(iii: ISdk, worker: WorkerConfig): void { FUNCTION_ID, async (raw: unknown) => { const input = ProviderStreamRuntimeInputSchema.parse(raw); - // The iii-sdk auto-hydrates `writer_ref` (a StreamChannelRef on the - // wire) into a `ChannelWriter` instance before this handler runs — - // use it directly instead of re-instantiating from the wire shape - // (which no longer exists by the time we get here). const writer = input.writer_ref as ChannelWriter; const cfg = await buildConfig( iii, diff --git a/harness/src/provider-anthropic/stream.ts b/harness/src/provider-anthropic/stream.ts index 4b62efe8..7ad5cc38 100644 --- a/harness/src/provider-anthropic/stream.ts +++ b/harness/src/provider-anthropic/stream.ts @@ -42,7 +42,6 @@ export async function* streamAnthropic({ applyMessagesCacheAnchor(wire_messages); const wire_tools = functionsToWire(tools) as Record[]; applyToolsCacheControl(wire_tools); - // No `temperature`: the API default applies (required when thinking is on). const body = { model: cfg.model, max_tokens: cfg.max_tokens, @@ -59,7 +58,6 @@ export async function* streamAnthropic({ 'content-type': 'application/json', }; if (thinking) { - // Lets thinking interleave with tool calls. headers['anthropic-beta'] = 'interleaved-thinking-2025-05-14'; } @@ -80,8 +78,6 @@ export async function* streamAnthropic({ if (!resp.ok) { const text = await resp.text().catch(() => ''); const kind = classifyAnthropicError(text, resp.status); - // A 401/403 means the cached credential is stale (rotated mid-turn); drop it - // so the next stream re-resolves instead of replaying the dead key. if (kind === 'auth_expired') invalidateProviderResolveCache(); yield syntheticErrorEvent(text || `anthropic http ${resp.status}`, cfg.model, kind); return; diff --git a/harness/src/session/tree/operations.ts b/harness/src/session/tree/operations.ts index 45d50746..248909fb 100644 --- a/harness/src/session/tree/operations.ts +++ b/harness/src/session/tree/operations.ts @@ -233,9 +233,7 @@ export async function listSessions( let entries: SessionEntry[] = []; try { entries = await store.loadEntries(meta.session_id); - } catch { - // ignore - } + } catch {} sessions.push({ session_id: meta.session_id, created_at: meta.created_at, @@ -395,8 +393,6 @@ export async function appendSynthetic( session_id: string, opts: { text: string; - // metadata is not supported on message entries currently; accepted for - // forward-compat but discarded. metadata?: unknown; parent_id?: string | null; }, @@ -435,7 +431,6 @@ export type UpdatePartItem = { compacted_at: unknown; }; -// Loads entries once instead of O(N×M) reloads in the prune loop. export async function updateParts( store: SessionStore, session_id: string, @@ -496,8 +491,6 @@ function buildNode(entry: SessionEntry, byParent: Map buildNode(c, byParent)) }; } -// ── HTML export ──────────────────────────────────────────────────────────── - export async function exportHtml( store: SessionStore, session_id: string, @@ -548,7 +541,6 @@ function renderEntryHtml(entry: SessionEntry): string { if (entry.type === 'branch_summary') { return `
branch summary · from ${htmlEscape(entry.from_id)}
${htmlEscape(entry.summary)}
\n`; } - // compaction const read = (entry.details.read_files ?? []).map(htmlEscape).join(', '); const modified = (entry.details.modified_files ?? []).map(htmlEscape).join(', '); return `
compaction · ${entry.tokens_before} tokens before
${htmlEscape(entry.summary)}\n\nread: ${read}\nmodified: ${modified}
\n`; @@ -561,7 +553,6 @@ function renderMessageHtml(msg: AgentMessage): string { return `
assistant · ${htmlEscape(msg.model)}
${renderBlocksHtml(msg.content)}
\n`; if (msg.role === 'function_result') return `
function result · ${htmlEscape(msg.function_id)}
${renderBlocksHtml(msg.content)}
\n`; - // custom const text = msg.display ?? JSON.stringify(msg.content); return `
custom · ${htmlEscape(msg.custom_type)}
${htmlEscape(text)}
\n`; } diff --git a/harness/src/session/tree/register.ts b/harness/src/session/tree/register.ts index 6c0a0939..6cb4f5f8 100644 --- a/harness/src/session/tree/register.ts +++ b/harness/src/session/tree/register.ts @@ -158,9 +158,6 @@ export function registerTree(iii: ISdk, store: SessionStore): void { const parent_id = typeof obj.parent_id === 'string' ? obj.parent_id : null; const messages = obj.messages; if (!Array.isArray(messages)) throw new Error('missing required field: messages (array)'); - // Mirror the singular APPEND guard: reject falsy/non-object elements at - // the boundary so a caller-side map bug can't persist `message: null` - // entries onto the active path. if (messages.some((m) => !m || typeof m !== 'object')) { throw new Error('messages must all be non-null AgentMessage objects'); } diff --git a/harness/src/session/tree/store.ts b/harness/src/session/tree/store.ts index ba9efef9..58171d7e 100644 --- a/harness/src/session/tree/store.ts +++ b/harness/src/session/tree/store.ts @@ -130,7 +130,6 @@ export class IiiStateSessionStore implements SessionStore { for (const entry of entries) { await this.writeEntry(session_id, entry.id, entry); } - // One meta refresh for the whole batch (vs once per entry in `append`). await this.bestEffortMetaRefresh(session_id); } @@ -141,9 +140,6 @@ export class IiiStateSessionStore implements SessionStore { } catch (e) { throw new SessionError('storage', `state::list entries: ${String(e)}`); } - // PR #150: sort by (timestamp, id) so resumed approval replies that - // arrive after the session paused appear in correct transcript order - // even when their entry ids are non-monotonic. entries.sort((a, b) => { const t = entryTimestamp(a) - entryTimestamp(b); if (t !== 0) return t; diff --git a/harness/src/turn-orchestrator/assistant-streaming/ports.ts b/harness/src/turn-orchestrator/assistant-streaming/ports.ts index 697b2d37..faa92222 100644 --- a/harness/src/turn-orchestrator/assistant-streaming/ports.ts +++ b/harness/src/turn-orchestrator/assistant-streaming/ports.ts @@ -137,10 +137,6 @@ export function createStreamingPorts(iii: ISdk): AssistantStreamingPorts { }, async persistAssistantIfNew(session_id, asst, messages) { - // Dedup against the window already loaded in prepareStreamContext: nothing - // is persisted between that load and here within one invocation, and - // isDuplicateAssistant only inspects the trailing entry — so reusing it is - // identical to a fresh reload and saves a full session round-trip. if (isDuplicateAssistant(messages, asst)) { logger.warn('finalizeAssistant: skipping duplicate assistant push (re-entry detected)', { session_id, diff --git a/harness/src/turn-orchestrator/assistant-streaming/run.ts b/harness/src/turn-orchestrator/assistant-streaming/run.ts index 43d7fa61..69b37147 100644 --- a/harness/src/turn-orchestrator/assistant-streaming/run.ts +++ b/harness/src/turn-orchestrator/assistant-streaming/run.ts @@ -34,8 +34,6 @@ export async function prepareStreamContext( const { provider, model, system_prompt, function_schemas, thinking_level } = request; const decision = decide({ provider, model }); const tools = parseFunctionSchemas(function_schemas); - // Resolved once at provisioning; preflight + provider read it instead of - // re-fetching models::get. Absent on pre-existing records → readers fall back. const model_meta = rec.model_meta; if ( @@ -53,8 +51,6 @@ export async function prepareStreamContext( messages, ...(thinking_level ? { thinking_level } : {}), ...(model_meta ? { model_meta } : {}), - // Stable per run, fresh per run: dedupes the provider's credential resolution - // within the turn while still re-resolving on the next user turn. resolution_key: rec.started_at_ms, }; } @@ -66,9 +62,6 @@ export async function runStreamTurn( ): Promise { let body_streamed = false; - // Coalesce consecutive same-type provider deltas into one message_update to - // cut the per-token span/RPC explosion on the streaming path. Discrete - // events flush through 1:1; the final flush below guarantees the tail. const coalescer = createDeltaCoalescer((partial, event) => ports.emitMessageUpdate(session_id, partial, event), ); @@ -143,8 +136,6 @@ export async function finalizeAssistantTurn( return; } - // end_turn: no function calls. Emit turn_end now, then route to the finishing - // step to emit agent_end after this step's durable save (the agent_end outbox). await emitTurnEndOnce(ports, rec, asst); transitionToFinishing(rec); } diff --git a/harness/src/turn-orchestrator/finishing.ts b/harness/src/turn-orchestrator/finishing.ts index 685bba86..83451460 100644 --- a/harness/src/turn-orchestrator/finishing.ts +++ b/harness/src/turn-orchestrator/finishing.ts @@ -17,7 +17,6 @@ import type { TurnStateRecord } from './state.js'; export async function handleFinishing(iii: ISdk, rec: TurnStateRecord): Promise { const ports = createTurnStatePorts(iii); - // Emits agent_end and transitions the record to `stopped`. await ports.finishSession(rec); } diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/run.ts b/harness/src/turn-orchestrator/function-awaiting-approval/run.ts index f6cb3d49..0db73f92 100644 --- a/harness/src/turn-orchestrator/function-awaiting-approval/run.ts +++ b/harness/src/turn-orchestrator/function-awaiting-approval/run.ts @@ -25,9 +25,6 @@ export function denialResultFromDecision(decision: ApprovalDecision): FunctionRe decision: decision.decision, reason, }, - // A denial resumes the loop so the model can react; an abort is the user - // saying "stop" — it ends the turn (when every result in the batch - // terminates; mixed batches still resume). terminate: decision.decision === 'aborted', }; } diff --git a/harness/src/turn-orchestrator/function-execute/ports.ts b/harness/src/turn-orchestrator/function-execute/ports.ts index 1c4e1554..8777e5be 100644 --- a/harness/src/turn-orchestrator/function-execute/ports.ts +++ b/harness/src/turn-orchestrator/function-execute/ports.ts @@ -104,9 +104,6 @@ export function createPorts(iii: ISdk): FunctionExecutePorts { }, async dispatch(call, session_id) { - // Hook/policy see the routing envelope; the target function gets the - // caller's untouched args so a payload `function_id` (e.g. for - // engine::functions::info) is never clobbered by the routing `function_id`. return dispatchWithHook(iii, withRoutingEnvelope(call, session_id), call); }, diff --git a/harness/src/turn-orchestrator/function-execute/run.ts b/harness/src/turn-orchestrator/function-execute/run.ts index 31f7f238..1ae98f02 100644 --- a/harness/src/turn-orchestrator/function-execute/run.ts +++ b/harness/src/turn-orchestrator/function-execute/run.ts @@ -226,9 +226,6 @@ export async function finalizeBatch( await emitTurnEndOnce(ports, rec, lastAssistant, function_results); - // Routing formerly done by the turn::steering_check FSM hop, now inline: - // one durable step (and its queue wake) less per round-trip. Terminal routes - // hand off to the finishing step for agent_end (see transitionToFinishing). if (allTerminate) { transitionToFinishing(rec); } else if (maxTurnsReached(rec)) { diff --git a/harness/src/turn-orchestrator/preflight.ts b/harness/src/turn-orchestrator/preflight.ts index cf3aa84a..aca8d96f 100644 --- a/harness/src/turn-orchestrator/preflight.ts +++ b/harness/src/turn-orchestrator/preflight.ts @@ -42,8 +42,6 @@ export async function runPreflight( modelID: string, modelMeta?: Model, ): Promise<'ok' | 'compacted'> { - // The turn's model, resolved once at provisioning. Use it directly when - // threaded (same limit a fetch yields); fetch only when it was absent. const model = modelMeta ? { providerID, modelID, modelLimit: limitFromModel(modelMeta) } : await fetchModelLimit(iii, providerID, modelID); @@ -105,9 +103,6 @@ export async function runPreflight( throw new CompactionBusyError('compaction already in progress'); } if (res?.status !== 'ok') { - // 'empty' (nothing eligible to compact) or an unknown/drifted status: - // no compaction ran, so do NOT claim 'compacted' — the caller would - // reload messages and proceed as if the context shrank. logger.warn('preflight: compact_now returned non-ok status; proceeding without reload', { session_id, status: res?.status, diff --git a/harness/src/turn-orchestrator/provider-router.ts b/harness/src/turn-orchestrator/provider-router.ts index e5d767e8..efa71516 100644 --- a/harness/src/turn-orchestrator/provider-router.ts +++ b/harness/src/turn-orchestrator/provider-router.ts @@ -27,11 +27,6 @@ export function decide(req: RouteRequest): RouteDecision { if (p === 'kimi') return { provider: 'kimi', model: req.model }; if (p === 'lmstudio') return { provider: 'lmstudio', model: req.model }; if (p === 'llamacpp') return { provider: 'llamacpp', model: req.model }; - // Heuristic for missing provider: model name disambiguates. - // Local runtimes (LM Studio, llama.cpp) are intentionally excluded — - // their model IDs are user-controlled (e.g. `qwen/qwen3-4b-2507`, - // `Meta-Llama-3.1-8B`) and overlap with HF-style IDs from other - // services; require explicit provider='lmstudio' / 'llamacpp'. if (!p && /^gpt-|^o\d-/i.test(req.model)) { return { provider: 'openai', model: req.model }; } diff --git a/harness/src/turn-orchestrator/provisioning/process.ts b/harness/src/turn-orchestrator/provisioning/process.ts index c42c8886..09fcdc7b 100644 --- a/harness/src/turn-orchestrator/provisioning/process.ts +++ b/harness/src/turn-orchestrator/provisioning/process.ts @@ -34,9 +34,6 @@ export async function processProvisioning( model: request.model, }); - // Resolve the model once for the whole turn. The decided provider (not the - // raw request.provider, which may be blank) is what every later reader keys - // on, so resolve against it. Best-effort: null is fine — readers fall back. const decision = decide({ provider: request.provider, model: request.model }); const model_meta = request.model ? await ports.resolveModel(decision.provider, request.model) @@ -59,8 +56,6 @@ export async function applyProvisioningOutcome( outcome: ProvisioningOutcome, ): Promise { await ports.saveRunRequest(rec.session_id, outcome.runRequest); - // Persist the resolved catalog entry on the durable record so preflight, - // provider streaming, and turn-end compaction read it instead of re-fetching. if (outcome.model_meta) rec.model_meta = outcome.model_meta; transitionTo(rec, 'assistant_streaming'); } diff --git a/harness/src/turn-orchestrator/run-abort.ts b/harness/src/turn-orchestrator/run-abort.ts index 369c5df7..a386b16d 100644 --- a/harness/src/turn-orchestrator/run-abort.ts +++ b/harness/src/turn-orchestrator/run-abort.ts @@ -29,15 +29,12 @@ import { syntheticAssistant } from './synthetic-assistant.js'; export async function execute(iii: ISdk, payload: RunAbortPayload): Promise { const { session_id } = payload; const store = createTurnStore(iii); - // Strict read: aborting based on a tolerant null would silently no-op a - // session the user is actively trying to stop. const rec = await store.loadRecordStrict(session_id); if (!rec || !isTurnInFlight(rec)) { return { session_id, aborted: false, state: rec?.state ?? null }; } if (rec.state === 'finishing') { - // Already on the way out; the finishing wake will emit agent_end + stop. return { session_id, aborted: false, state: 'finishing' }; } @@ -46,8 +43,6 @@ export async function execute(iii: ISdk, payload: RunAbortPayload): Promise execute(iii, RunAbortPayloadSchema.parse(payload)), { description: - 'Abort the session\'s in-flight turn: emit an aborted assistant + turn_end, clear parked approvals, and route to finishing. Best-effort while a step is mid-execution (retry lands between steps); no-op when no turn is running.', + "Abort the session's in-flight turn: emit an aborted assistant + turn_end, clear parked approvals, and route to finishing. Best-effort while a step is mid-execution (retry lands between steps); no-op when no turn is running.", }, ); } diff --git a/harness/src/turn-orchestrator/run-start.ts b/harness/src/turn-orchestrator/run-start.ts index 53725681..460aab6c 100644 --- a/harness/src/turn-orchestrator/run-start.ts +++ b/harness/src/turn-orchestrator/run-start.ts @@ -30,16 +30,6 @@ export async function execute(iii: ISdk, payload: RunStartPayload): Promise; /** `aborted` is false when no turn was running (or it was already finishing). */ @@ -56,14 +54,12 @@ export type RunAbortResult = { state: TurnState | null; }; -// --- turn::{state} durable step --- export const TurnStepPayloadSchema = SessionIdPayloadSchema; export type TurnStepPayload = z.infer; export type TurnStepResult = | { ok: true; from_state: TurnState; to_state: TurnState } | { ok: true; skipped: true; reason: 'stale' }; -// --- function_execute / function_awaiting_approval persisted record --- const AwaitingApprovalEntrySchema = z.object({ function_call_id: z.string().min(1), function_id: z.string().min(1), @@ -113,7 +109,6 @@ export function parseFunctionBatchRecord(rec: TurnStateRecord): FunctionBatchTur `invalid function batch turn record: ${formatZodIssues(result.error)}`, ); } - // Return the same object — handlers mutate turn_state in place before saveRecord. return rec as FunctionBatchTurnRecord; } @@ -165,7 +160,6 @@ export function parseSteeringCheckRecord(rec: TurnStateRecord): SteeringCheckTur return rec as SteeringCheckTurnRecord; } -// --- turn::get_state --- export const GetStatePayloadSchema = SessionIdPayloadSchema; export type GetStatePayload = z.infer; @@ -193,7 +187,6 @@ export function toView(rec: TurnStateRecord): TurnStateView { export type GetStateResult = TurnStateView | null; -// --- turn::on_approval (approvals-scope state event) --- const ApprovalDecisionWriteEventSchema = z.object({ type: z.literal('state').optional(), scope: z.literal('approvals').optional(), diff --git a/harness/src/turn-orchestrator/state-runtime/context-view.ts b/harness/src/turn-orchestrator/state-runtime/context-view.ts index b94655c5..8247efce 100644 --- a/harness/src/turn-orchestrator/state-runtime/context-view.ts +++ b/harness/src/turn-orchestrator/state-runtime/context-view.ts @@ -59,9 +59,6 @@ export async function loadSessionMessages( const resp = await iii.trigger({ function_id: 'session-tree::messages', payload: { session_id }, - // Matches the SDK default this read always ran under: long sessions - // (thousands of entries) can legitimately take >10s to list+sort, and a - // tighter budget here fails the whole turn step. timeoutMs: 30_000, }); return resp?.messages ?? []; diff --git a/harness/src/turn-orchestrator/state-runtime/ports.ts b/harness/src/turn-orchestrator/state-runtime/ports.ts index 72512469..603925de 100644 --- a/harness/src/turn-orchestrator/state-runtime/ports.ts +++ b/harness/src/turn-orchestrator/state-runtime/ports.ts @@ -59,10 +59,6 @@ export function createTurnStatePorts(iii: ISdk, store?: TurnStore): TurnStatePor }, async finishSession(rec) { - // agent_end is a turn-end SIGNAL only. The transcript reaches the UI - // incrementally via message_update/message_complete and is re-read from - // session-tree on reload, so no consumer reads agent_end.messages. Emit it - // empty instead of reloading the whole session to fill an unused field. await emit(iii, rec.session_id, { type: 'agent_end', messages: [] }); transitionTo(rec, 'stopped'); }, diff --git a/harness/src/turn-orchestrator/state-runtime/store.ts b/harness/src/turn-orchestrator/state-runtime/store.ts index 6c4ed8dc..9caa640a 100644 --- a/harness/src/turn-orchestrator/state-runtime/store.ts +++ b/harness/src/turn-orchestrator/state-runtime/store.ts @@ -199,9 +199,6 @@ export function createTurnStore(iii: ISdk): TurnStore { async appendMessages(session_id, msgs) { if (msgs.length === 0) return; - // One batch trigger chains all messages and resolves the active leaf once, - // instead of a per-message session-tree::append (each of which re-read the - // whole session to find the leaf). await iii.trigger({ function_id: 'session-tree::append_batch', payload: { session_id, messages: msgs, parent_id: null }, diff --git a/harness/src/turn-orchestrator/state-runtime/turn-end.ts b/harness/src/turn-orchestrator/state-runtime/turn-end.ts index e37c3bae..88e4fb2f 100644 --- a/harness/src/turn-orchestrator/state-runtime/turn-end.ts +++ b/harness/src/turn-orchestrator/state-runtime/turn-end.ts @@ -30,7 +30,6 @@ export async function emitTurnEndOnce( ): Promise { if (rec.turn_end_emitted) return; const last = message ?? rec.last_assistant ?? emptyAssistant(); - // Thread the turn's resolved limit so compaction skips its own models::get. const model_limit = rec.model_meta ? limitFromModel(rec.model_meta) : undefined; await ports.emitTurnEnd(rec.session_id, last, function_results, model_limit); rec.turn_end_emitted = true; diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index ce798a49..347982a8 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -21,10 +21,6 @@ export type TurnState = | 'function_execute' | 'function_awaiting_approval' | 'steering_check' - // Terminal-pending: the turn's work is durably committed; this step only - // emits agent_end and advances to `stopped`. Splitting the terminal signal - // into its own replayable step keeps a crash from re-running the LLM stream - // after consumers already saw the run end (the agent_end "outbox"). | 'finishing' | 'stopped' | 'failed'; diff --git a/harness/tests/context-compaction/handler-async.test.ts b/harness/tests/context-compaction/handler-async.test.ts index 23dd4c6d..c65751f5 100644 --- a/harness/tests/context-compaction/handler-async.test.ts +++ b/harness/tests/context-compaction/handler-async.test.ts @@ -66,7 +66,6 @@ describe('resolveModelFromEvent', () => { const iii = { trigger: vi.fn(async (req: { function_id: string }) => { calls.push(req.function_id); - // Stand-in for a models::get fallback result. return { context_window: 200_000, max_output_tokens: 8_096 }; }), } as unknown as ISdk; diff --git a/harness/tests/integration/mode-approval.e2e.test.ts b/harness/tests/integration/mode-approval.e2e.test.ts index 425ffdc6..4660013f 100644 --- a/harness/tests/integration/mode-approval.e2e.test.ts +++ b/harness/tests/integration/mode-approval.e2e.test.ts @@ -144,9 +144,6 @@ describe('mode-driven approval e2e (real consultBefore)', () => { await h.runExecute('sess-escalate'); const rec = h.loadTurnRecord('sess-escalate'); - // Denied up front: not parked, not executed, batch finalizes with an - // error result carrying the human_only_function denial. The result - // travels on turn_end; the inline resume clears the record. expect(rec?.awaiting_approval).toEqual([]); expect(rec?.state).toBe('assistant_streaming'); expect(rec?.function_results).toEqual([]); @@ -162,7 +159,6 @@ describe('mode-driven approval e2e (real consultBefore)', () => { const denied = turnEnd?.function_results.find((r) => r.function_call_id === 'fc-1'); expect(denied?.is_error).toBe(true); expect(JSON.stringify(denied?.details)).toContain('human_only_function'); - // No real mode change leaked into settings. const settings = h.stateStore.get(`${SETTINGS_STATE_SCOPE}/sess-escalate`) as { mode: string; }; diff --git a/harness/tests/integration/parallel-approval.e2e.test.ts b/harness/tests/integration/parallel-approval.e2e.test.ts index f14bc466..6536752f 100644 --- a/harness/tests/integration/parallel-approval.e2e.test.ts +++ b/harness/tests/integration/parallel-approval.e2e.test.ts @@ -190,12 +190,6 @@ describe('parallel approval e2e', () => { ['fc-1', 'fc-2'], ); - // Both prompts approved concurrently: two approval::resolve writes whose - // wakes interleave on the shared turn_state record. The sequential tests - // never exercise this. runTransition has no optimistic-concurrency guard - // (saveRecord is an unconditional overwrite), so both wakes load the same - // parked record, each executes EVERY call, and each finalizes the batch — - // duplicating side effects and emitting turn_end multiple times. await Promise.all([ h.resolveApproval('sess-par', 'fc-1', 'allow'), h.resolveApproval('sess-par', 'fc-2', 'allow'), @@ -209,16 +203,9 @@ describe('parallel approval e2e', () => { it('drains a sibling approved mid-execution even when its own wake is dropped', async () => { const h = createParallelApprovalHarness(); - // Both calls park for approval during execute. vi.spyOn(agentTriggerModule, 'dispatchWithHook') - .mockResolvedValueOnce({ kind: 'pending' }) // execute fc-late → parks - .mockResolvedValueOnce({ kind: 'pending' }); // execute fc-driver → parks - // Approved calls execute via triggerPreApproved → triggerFunctionCall. - // Executing fc-driver approves fc-late as a side effect but writes the - // decision WITHOUT firing fc-late's wake — modeling a sibling approval whose - // own turn-step wake was dropped (retries exhausted against the lease the - // driving wake holds). fc-late was read first and skipped, so only a re-scan - // within the same wake can pick it up. + .mockResolvedValueOnce({ kind: 'pending' }) + .mockResolvedValueOnce({ kind: 'pending' }); vi.spyOn(agentTriggerModule, 'triggerFunctionCall').mockImplementation(async (_iii, call) => { if (call.id === 'fc-driver') { h.stateStore.set('approvals/sess-drain/fc-late', { @@ -247,9 +234,6 @@ describe('parallel approval e2e', () => { await h.resolveApproval('sess-drain', 'fc-driver', 'allow'); - // fc-late is drained in the same wake instead of being orphaned: it runs to - // completion and the batch finalizes (work cleared, state advances) rather - // than staying parked on fc-late forever. const rec = h.loadTurnRecord('sess-drain'); expect(executionEvents(h.emitted, 'function_execution_end', 'fc-driver')).toHaveLength(1); expect(executionEvents(h.emitted, 'function_execution_end', 'fc-late')).toHaveLength(1); @@ -277,9 +261,6 @@ describe('parallel approval e2e', () => { const rec = h.loadTurnRecord('sess-reenqueue'); expect(rec?.awaiting_approval?.map((e) => e.function_call_id)).toEqual(['fc-2']); - // The wake that resolved fc-1 must kick a fresh, uncontended wake so a - // dropped fc-2 contender can't orphan it: the approval::resolve write - // accounts for one wake, the follow-up kick for a second. expect(wakeEnqueues(h, 'sess-reenqueue')).toBeGreaterThanOrEqual(before + 2); }); diff --git a/harness/tests/provider-anthropic/auth.test.ts b/harness/tests/provider-anthropic/auth.test.ts index c9b2119c..e155cf55 100644 --- a/harness/tests/provider-anthropic/auth.test.ts +++ b/harness/tests/provider-anthropic/auth.test.ts @@ -34,7 +34,6 @@ function resolved(max_tokens: number | null) { }; } -/** Fake ISdk whose models::get returns the given catalog entry (or throws). */ function iiiWithCatalog(entry: unknown, opts: { throws?: boolean } = {}): ISdk { const trigger = vi.fn().mockImplementation(async (req: { function_id: string }) => { if (req.function_id === 'models::get') { @@ -46,7 +45,6 @@ function iiiWithCatalog(entry: unknown, opts: { throws?: boolean } = {}): ISdk { return { trigger } as unknown as ISdk; } -/** Count models::get calls so we can prove the pre-resolved path skips the fetch. */ function iiiCountingCatalog(entry: unknown): { iii: ISdk; modelsGetCalls: () => number } { let n = 0; const trigger = vi.fn().mockImplementation(async (req: { function_id: string }) => { @@ -145,7 +143,7 @@ describe('buildConfig pre-resolved model threading', () => { expect(modelsGetCalls()).toBe(0); expect(cfg.catalog).toEqual(THINKING_MODEL); - expect(cfg.max_tokens).toBe(32_000); // min(64k, 32k cap) + expect(cfg.max_tokens).toBe(32_000); }); it('fetches models::get when no pre-resolved model is threaded', async () => { @@ -164,9 +162,7 @@ describe('buildConfig pre-resolved model threading', () => { 'high', 'xhigh', ])('produces a byte-identical thinking config (%s) with vs without the pre-resolved model', async (level) => { - // Fetch path: models::get returns the same Model. const fetched = await buildConfig(iiiWithCatalog(THINKING_MODEL), WORKER, 'claude-sonnet-4-6'); - // Threaded path: the same Model passed in, models::get not consulted. const { iii } = iiiCountingCatalog({ id: 'nope' }); const threaded = await buildConfig(iii, WORKER, 'claude-sonnet-4-6', THINKING_MODEL); @@ -232,12 +228,10 @@ describe('buildConfig per-turn credential resolution cache', () => { it('a 401 stream drops the cache so the next turn re-resolves the credential', async () => { const iii = iiiWithCatalog({ id: 'claude-sonnet-4-6', max_output_tokens: 64_000 }); - // Prime the cache for turn key 100. await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); expect(resolveProviderMock).toHaveBeenCalledTimes(1); - // A stream hitting 401 must invalidate the cached resolution. vi.stubGlobal( 'fetch', vi.fn(async () => new Response('unauthorized', { status: 401 })), @@ -248,10 +242,8 @@ describe('buildConfig per-turn credential resolution cache', () => { messages: [], tools: [], })) { - // drain } - // Same turn key now re-resolves because the 401 cleared the cache. await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); expect(resolveProviderMock).toHaveBeenCalledTimes(2); }); @@ -270,7 +262,6 @@ describe('buildConfig per-turn credential resolution cache', () => { messages: [], tools: [], })) { - // drain } await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); diff --git a/harness/tests/session/operations.test.ts b/harness/tests/session/operations.test.ts index 78ef7aa7..4607dfdc 100644 --- a/harness/tests/session/operations.test.ts +++ b/harness/tests/session/operations.test.ts @@ -74,7 +74,6 @@ describe('session-tree operations', () => { const ids = await appendMessages(store, sid, null, [asstMsg('a'), userMsg('b'), asstMsg('c')]); expect(ids).toHaveLength(3); - // The batch chains onto the existing leaf and links each entry to the prior. const path = await activePath(store, sid); expect(path).toEqual([e0, ...ids]); const messages = await loadMessages(store, sid); @@ -91,7 +90,6 @@ describe('session-tree operations', () => { const sid = await createSession(store); await appendMessage(store, sid, null, userMsg('seed')); - // loadEntries backs activePath; one call means the leaf was resolved once. const loadEntriesSpy = vi.spyOn(store, 'loadEntries'); const appendManySpy = vi.spyOn(store, 'appendMany'); @@ -131,7 +129,6 @@ describe('session-tree operations', () => { const sid = await createSession(store); const now = vi.spyOn(Date, 'now'); - // Batch lands at t=100 (all entries share the append timestamp). now.mockReturnValue(100); const batch = await appendMessages(store, sid, null, [ asstMsg('a'), @@ -139,9 +136,6 @@ describe('session-tree operations', () => { asstMsg('c'), ]); - // A later single append whose clock did NOT advance (or stepped back): its - // timestamp sorts before the batch tail. Pre-fix, the sort-max leaf - // heuristic orphaned it; the chain-tip leaf keeps it on the path. now.mockReturnValue(99); const tail = await appendMessage(store, sid, null, asstMsg('d')); now.mockRestore(); diff --git a/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts b/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts index 0711ceb1..5dd04a84 100644 --- a/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts +++ b/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts @@ -33,7 +33,6 @@ export function mockTurnStore(overrides: Partial = {}): MockTurnStore } as MockTurnStore; } -/** Mock `createTurnStore` and return the store instance for assertions. */ export function installMockTurnStore(overrides: Partial = {}): MockTurnStore { const store = mockTurnStore(overrides); vi.spyOn(storeModule, 'createTurnStore').mockReturnValue(store); diff --git a/harness/tests/turn-orchestrator/assistant.test.ts b/harness/tests/turn-orchestrator/assistant.test.ts index d2e35516..3b45b479 100644 --- a/harness/tests/turn-orchestrator/assistant.test.ts +++ b/harness/tests/turn-orchestrator/assistant.test.ts @@ -97,10 +97,8 @@ describe('handleStreaming turn start', () => { await handleStreaming(iii, rec); expect(rec.turn_count).toBe(1); - // createChannel failure → synthetic error → finalizeAssistant sets turn_end_emitted = true expect(rec.turn_end_emitted).toBe(true); expect(calls.some((c) => c.function_id === 'approval::consume')).toBe(false); - // stream::set is called by emit(message_complete) and emit(turn_end) in the error path expect(calls.some((c) => c.function_id === 'stream::set')).toBe(true); }); }); @@ -143,11 +141,8 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); - // emitted message_complete via stream::set trigger expect(calls.some((c) => c.function_id === 'stream::set')).toBe(true); - // assistant persisted expect(appendSpy).toHaveBeenCalledOnce(); - // routed to function_execute (NOT assistant_finished) expect(rec.state).toBe('function_execute'); expect(rec.last_assistant).toEqual(finalMsg); expect(rec.function_results).toEqual([]); @@ -164,7 +159,6 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); - // No-function-call turns finish inline (formerly the steering_check hop). expect(rec.state).toBe('finishing'); expect(rec.turn_end_emitted).toBe(true); expect(rec.last_assistant).toEqual(finalMsg); @@ -232,7 +226,6 @@ describe('handleStreaming', () => { }, ], }); - // Simulate re-entry: messages already contain the assistant message let storedMessages: unknown[] = [finalMsg]; const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; diff --git a/harness/tests/turn-orchestrator/finishing.test.ts b/harness/tests/turn-orchestrator/finishing.test.ts index 45d72825..fda2fc8e 100644 --- a/harness/tests/turn-orchestrator/finishing.test.ts +++ b/harness/tests/turn-orchestrator/finishing.test.ts @@ -37,7 +37,7 @@ describe('handleFinishing', () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'finishing' }; await handleFinishing(iii, rec); - rec.state = 'finishing'; // simulate redelivery against the pre-save record + rec.state = 'finishing'; await handleFinishing(iii, rec); expect(rec.state).toBe('stopped'); diff --git a/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts index e4876ed3..4e7a0653 100644 --- a/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts +++ b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts @@ -125,7 +125,6 @@ describe('handleAwaitingApproval', () => { expect(rec.awaiting_approval).toEqual([]); expect(rec.state).toBe('assistant_streaming'); expect(rec.work).toBeUndefined(); - // Results travel on turn_end; the inline resume clears the record. expect(rec.function_results).toEqual([]); const turnEnd = emitSpy.mock.calls.find((call) => call[2]?.type === 'turn_end')?.[2] as | { function_results: unknown[] } diff --git a/harness/tests/turn-orchestrator/function-execute.test.ts b/harness/tests/turn-orchestrator/function-execute.test.ts index 68a61bf8..541dd08c 100644 --- a/harness/tests/turn-orchestrator/function-execute.test.ts +++ b/harness/tests/turn-orchestrator/function-execute.test.ts @@ -182,8 +182,6 @@ describe('finalizeBatch', () => { }; await finalizeBatch(ports, rec); - // Terminal routes hand off to the finishing step for agent_end; the step - // itself no longer emits it inline before its durable save. expect(rec.state).toBe('finishing'); expect(ports.finishSession).not.toHaveBeenCalled(); }); @@ -192,7 +190,6 @@ describe('finalizeBatch', () => { const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; const appendMessages = vi.fn(async () => {}); const ports = stubPorts({ - // fc-1 already lives in the trailing result run → its id is "persisted". loadTrailingResultIds: vi.fn(async () => new Set(['fc-1'])), appendMessages, }); @@ -217,7 +214,6 @@ describe('finalizeBatch', () => { expect(rec.state).toBe('assistant_streaming'); }); - // Inline replacements for the removed turn::steering_check hop. it('resumes to assistant_streaming with cleared results when a result continues', async () => { const ports = stubPorts(); const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; @@ -266,10 +262,8 @@ describe('finalizeBatch', () => { await finalizeBatch(ports, rec); - // max_turns persists the notice + turn_end, then defers agent_end to finishing. expect(rec.state).toBe('finishing'); expect(ports.finishSession).not.toHaveBeenCalled(); - // Two appends: the batch's function_results, then the synthetic notice. expect(appendMessages).toHaveBeenCalledTimes(2); const appended = appendMessages.mock.calls[1]?.[1] as Array<{ content: unknown[] }>; expect(JSON.stringify(appended)).toContain('max_turns (2) reached'); @@ -337,7 +331,6 @@ describe('createPorts().loadTrailingResultIds', () => { expect(ids).toEqual(new Set(['fc-1', 'fc-2'])); expect(calls.map((c) => c.function_id)).toEqual(['session-tree::messages']); expect(calls.some((c) => c.function_id === 'session-tree::compactions')).toBe(false); - // Default leaf: no branch_leaf in the payload. expect(calls[0]?.payload).toEqual({ session_id: 's1' }); }); diff --git a/harness/tests/turn-orchestrator/functions.test.ts b/harness/tests/turn-orchestrator/functions.test.ts index ad61d684..9b70c1f7 100644 --- a/harness/tests/turn-orchestrator/functions.test.ts +++ b/harness/tests/turn-orchestrator/functions.test.ts @@ -120,8 +120,6 @@ describe('handleExecute new flow', () => { mockFinalizePersistence(); await handleExecute(iii, rec); - // Resumed inline for the next round-trip (formerly via the steering_check - // hop); the batch results travel on turn_end, the record is cleared. expect(rec.state).toBe('assistant_streaming'); expect(rec.function_results).toEqual([]); const turnEnd = emitSpy.mock.calls.find((call) => call[2]?.type === 'turn_end')?.[2] as @@ -154,7 +152,6 @@ describe('handleExecute new flow', () => { await handleExecute(iii, rec); - // All-terminate routes to the finishing step (which emits agent_end + stops). expect(rec.state).toBe('finishing'); }); @@ -255,7 +252,6 @@ describe('handleExecute new flow', () => { await expect(handleExecute(iii, rec)).resolves.toBeUndefined(); - // Denial result travels on turn_end; the record resumes with cleared results. expect(rec.state).toBe('assistant_streaming'); const turnEnd = emitSpy.mock.calls.find((call) => call[2]?.type === 'turn_end')?.[2] as | { function_results: Array<{ is_error: boolean; details: Record }> } diff --git a/harness/tests/turn-orchestrator/preflight.test.ts b/harness/tests/turn-orchestrator/preflight.test.ts index 03c99680..f54a09aa 100644 --- a/harness/tests/turn-orchestrator/preflight.test.ts +++ b/harness/tests/turn-orchestrator/preflight.test.ts @@ -64,7 +64,6 @@ describe('runPreflight', () => { }); it('calls compact_now and returns compacted when projected >= usable', async () => { - // Tiny context window so even a small message overflows const { iii, calls } = makeIii({ modelsGetResult: { context_window: 10, max_output_tokens: 0 }, compactNowResult: { status: 'ok' }, @@ -122,7 +121,6 @@ describe('runPreflight', () => { it('uses the pre-resolved model and skips models::get', async () => { const { iii, calls } = makeIii({ - // If consulted, this tiny window would force a compact — proving a miss. modelsGetResult: { context_window: 1, max_output_tokens: 0 }, }); const modelMeta = { diff --git a/harness/tests/turn-orchestrator/provider-router.test.ts b/harness/tests/turn-orchestrator/provider-router.test.ts index d69c0ccb..0f27bb07 100644 --- a/harness/tests/turn-orchestrator/provider-router.test.ts +++ b/harness/tests/turn-orchestrator/provider-router.test.ts @@ -20,9 +20,6 @@ describe('decide', () => { it('routes llamacpp when provider=llamacpp', () => { expect(decide({ provider: 'llamacpp', model: 'Meta-Llama-3.1-8B' }).provider).toBe('llamacpp'); - // No heuristic — bare model id without explicit provider does not - // route to llamacpp (same posture as lmstudio; user-controlled ids - // overlap with HF-style ids from other services). expect(decide({ model: 'Meta-Llama-3.1-8B' }).provider).toBe('anthropic'); }); @@ -40,11 +37,6 @@ describe('decide', () => { model: 'lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF', }).provider, ).toBe('lmstudio'); - // No heuristic — a bare HF-style model id without an explicit - // provider MUST default to anthropic, not be magically routed to - // lmstudio because the namespace looks like a HF org/repo. This - // is the regression that the route's comment warns about - // ("model IDs are user-controlled … overlap with HF-style IDs"). const ambiguousIds = [ 'qwen/qwen3-4b-2507', 'google/gemma-2-9b-it', diff --git a/harness/tests/turn-orchestrator/provisioning-layer.test.ts b/harness/tests/turn-orchestrator/provisioning-layer.test.ts index 291226e5..1e6ba47e 100644 --- a/harness/tests/turn-orchestrator/provisioning-layer.test.ts +++ b/harness/tests/turn-orchestrator/provisioning-layer.test.ts @@ -74,7 +74,6 @@ describe('processProvisioning', () => { it('resolves the model once against the DECIDED provider and returns it', async () => { const resolveModel = vi.fn(async () => FAKE_MODEL); const ports = stubPorts({ - // provider blank → decide() must resolve it to a concrete provider. loadRunRequest: vi.fn(async () => ({ provider: '', model: 'gpt-4', @@ -89,7 +88,6 @@ describe('processProvisioning', () => { const outcome = await processProvisioning(ports, rec); expect(resolveModel).toHaveBeenCalledTimes(1); - // decide() maps a blank provider + 'gpt-4' → openai. expect(resolveModel).toHaveBeenCalledWith('openai', 'gpt-4'); expect(outcome.model_meta).toEqual(FAKE_MODEL); }); diff --git a/harness/tests/turn-orchestrator/run-abort.test.ts b/harness/tests/turn-orchestrator/run-abort.test.ts index ddd4db92..eb8a6e30 100644 --- a/harness/tests/turn-orchestrator/run-abort.test.ts +++ b/harness/tests/turn-orchestrator/run-abort.test.ts @@ -23,9 +23,7 @@ function parkedRecord(state: TurnStateRecord['state']): TurnStateRecord { return { ...newRecord('s1'), state, - awaiting_approval: [ - { function_call_id: 'fc-1', function_id: 'shell::run', args: {} }, - ], + awaiting_approval: [{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], } as TurnStateRecord; } @@ -46,14 +44,11 @@ describe('run::abort execute', () => { const events = streamSets(calls).map((e) => e.type); expect(events).toContain('message_complete'); expect(events).toContain('turn_end'); - // agent_end belongs to the finishing step, not the abort itself. expect(events).not.toContain('agent_end'); - // The record persisted to finishing with parked approvals cleared. const turnStateSet = calls.find( (c) => - c.function_id === 'state::set' && - (c.payload as { scope?: string }).scope === 'turn_state', + c.function_id === 'state::set' && (c.payload as { scope?: string }).scope === 'turn_state', ); const saved = (turnStateSet?.payload as { value: TurnStateRecord }).value; expect(saved.state).toBe('finishing'); diff --git a/harness/tests/turn-orchestrator/run-start.test.ts b/harness/tests/turn-orchestrator/run-start.test.ts index 43ac8305..d1f7b4c7 100644 --- a/harness/tests/turn-orchestrator/run-start.test.ts +++ b/harness/tests/turn-orchestrator/run-start.test.ts @@ -26,7 +26,6 @@ function fakeIii(): { iii: ISdk; calls: TriggerCall[] } { return { iii, calls }; } -/** Shape console/web sends inside harness::trigger payload (real.ts). */ const consoleRunStartPayload = { session_id: 'sess-1', message_id: 'msg-1', @@ -42,7 +41,6 @@ const consoleRunStartPayload = { ], }; -/** Minimal shape harness/trigger.test.ts forwards to run::start. */ const harnessRunStartPayload = { session_id: 'sess-1', provider: 'anthropic', @@ -175,19 +173,16 @@ describe('execute', () => { await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); - // Single ensure per run — later loadMessages/appendMessages no longer re-ensure. const ensureCalls = calls.filter((c) => c.function_id === 'session-tree::ensure'); expect(ensureCalls).toHaveLength(1); expect(ensureCalls[0]?.payload).toEqual({ session_id: 'sess-1' }); - // The single ensure must precede the run's first tree write (append batch). const ensureIdx = calls.findIndex((c) => c.function_id === 'session-tree::ensure'); const firstAppendIdx = calls.findIndex((c) => c.function_id === 'session-tree::append_batch'); expect(ensureIdx).toBeGreaterThanOrEqual(0); expect(firstAppendIdx).toBeGreaterThan(ensureIdx); }); - /** iii whose state::get on turn_state returns the given persisted record. */ function fakeIiiWithRecord(record: unknown): { iii: ISdk; calls: TriggerCall[] } { const calls: TriggerCall[] = []; const iii = { @@ -201,7 +196,6 @@ describe('execute', () => { return { iii, calls }; } - /** A record whose last transition just happened (fresh = genuinely running). */ const inFlightRecord = (state: string, updated_at_ms = Date.now()) => ({ session_id: 'sess-1', state, @@ -218,7 +212,6 @@ describe('execute', () => { const result = await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); expect(result).toEqual({ session_id: 'sess-1', started: false, reason: 'session_busy' }); - // Busy path mutates nothing: no ensure, no append, no run_request/turn_state write. expect(calls.some((c) => c.function_id === 'session-tree::ensure')).toBe(false); expect(calls.some((c) => c.function_id === 'session-tree::append_batch')).toBe(false); expect(calls.some((c) => c.function_id === 'state::set')).toBe(false); @@ -275,7 +268,6 @@ describe('execute', () => { await expect(execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload))).rejects.toThrow( /state worker unavailable/, ); - // Nothing was written before the guard read resolved. const calls = (iii.trigger as ReturnType).mock.calls as Array< [{ function_id: string }] >; diff --git a/harness/tests/turn-orchestrator/store.test.ts b/harness/tests/turn-orchestrator/store.test.ts index 79683eba..7def932c 100644 --- a/harness/tests/turn-orchestrator/store.test.ts +++ b/harness/tests/turn-orchestrator/store.test.ts @@ -79,7 +79,6 @@ describe('saveRecord no-op suppression', () => { const { iii, emits } = fakeIii(); const store = createTurnStore(iii); const rec = newRecord('sess-a'); - // Same view; differs only in updated_at_ms (which toView drops). const previous = { ...rec, updated_at_ms: rec.updated_at_ms + 1000 }; await store.saveRecord(rec, previous); @@ -139,7 +138,6 @@ describe('session-tree call reduction', () => { timestamp: 1, }; await createTurnStore(iii).appendMessages('sess-a', [msg]); - // One batch trigger for the whole call, not a per-message append. expect(calls.filter((c) => c === 'session-tree::append_batch')).toHaveLength(1); expect(calls).not.toContain('session-tree::append'); expect(calls).not.toContain('session-tree::ensure'); diff --git a/harness/tests/types/provider.test.ts b/harness/tests/types/provider.test.ts index ed5122fc..24907203 100644 --- a/harness/tests/types/provider.test.ts +++ b/harness/tests/types/provider.test.ts @@ -32,8 +32,6 @@ describe('ProviderStreamInputSchema', () => { }); it('passes a sparse model_meta through instead of failing the stream', () => { - // model_meta is an optional optimization: a partial catalog entry must fall - // back per-field at the consumer, never reject the whole input. const sparse = ProviderStreamInputSchema.parse({ writer_ref: { channel_id: 'c', access_key: 'k', direction: 'write' }, model: 'm', From 9fa8cd15a3625d9d6ed17b83fe5638a489b9ba51 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Tue, 9 Jun 2026 21:19:22 -0300 Subject: [PATCH 11/11] refactor(harness): remove the turn::steering_check compat drain Routing was inlined into assistant_streaming/function_execute in the previous release; the steering_check step existed only to drain records persisted in that state at deploy time. Remove the steering-check module, its registration, the 'steering_check' TurnState, the SteeringCheckTurnRecord type/schema/parser, and the steering tests. A record still persisted in steering_check no longer parses and is treated by run::start as absent; the queue must be confirmed drained before this ships. --- harness/src/index.ts | 2 +- harness/src/turn-orchestrator/iii.worker.yaml | 2 +- harness/src/turn-orchestrator/main.ts | 2 +- harness/src/turn-orchestrator/register.ts | 2 - harness/src/turn-orchestrator/schemas.ts | 25 ---- harness/src/turn-orchestrator/state.ts | 16 +-- .../turn-orchestrator/steering-check/ports.ts | 24 ---- .../steering-check/process.ts | 34 ----- .../turn-orchestrator/steering-check/run.ts | 76 ----------- .../tests/turn-orchestrator/finish.test.ts | 2 +- .../turn-orchestrator/run-transition.test.ts | 6 +- harness/tests/turn-orchestrator/state.test.ts | 16 --- .../steering-check-layer.test.ts | 117 ----------------- .../tests/turn-orchestrator/steering.test.ts | 123 ------------------ harness/tests/turn-orchestrator/store.test.ts | 2 +- 15 files changed, 10 insertions(+), 439 deletions(-) delete mode 100644 harness/src/turn-orchestrator/steering-check/ports.ts delete mode 100644 harness/src/turn-orchestrator/steering-check/process.ts delete mode 100644 harness/src/turn-orchestrator/steering-check/run.ts delete mode 100644 harness/tests/turn-orchestrator/steering-check-layer.test.ts delete mode 100644 harness/tests/turn-orchestrator/steering.test.ts diff --git a/harness/src/index.ts b/harness/src/index.ts index b62d016a..d7bd6a66 100644 --- a/harness/src/index.ts +++ b/harness/src/index.ts @@ -44,7 +44,7 @@ const WORKERS: readonly WorkerDefinition[] = [ { name: 'turn-orchestrator', description: - 'Durable run::start state machine driving each agent turn through provisioning, assistant, function-execute, and steering.', + 'Durable run::start state machine driving each agent turn through provisioning, assistant, and function-execute.', register: (iii, ctx) => registerTurnOrchestrator(iii, ctx), }, { diff --git a/harness/src/turn-orchestrator/iii.worker.yaml b/harness/src/turn-orchestrator/iii.worker.yaml index 4d9be949..1ec68b75 100644 --- a/harness/src/turn-orchestrator/iii.worker.yaml +++ b/harness/src/turn-orchestrator/iii.worker.yaml @@ -4,7 +4,7 @@ language: node deploy: binary manifest: package.json bin: iii-turn-orchestrator -description: Durable run::start state machine that drives each agent turn through provisioning, assistant, function-execute, and steering. +description: Durable run::start state machine that drives each agent turn through provisioning, assistant, and function-execute. runtime: kind: node diff --git a/harness/src/turn-orchestrator/main.ts b/harness/src/turn-orchestrator/main.ts index 9880794f..a27e780b 100644 --- a/harness/src/turn-orchestrator/main.ts +++ b/harness/src/turn-orchestrator/main.ts @@ -5,6 +5,6 @@ import { register } from './register.js'; await bootstrapWorker({ name: 'turn-orchestrator', description: - 'Durable run::start state machine driving each agent turn through provisioning, assistant, function-execute, and steering.', + 'Durable run::start state machine driving each agent turn through provisioning, assistant, and function-execute.', register: (iii, ctx) => register(iii, ctx), }); diff --git a/harness/src/turn-orchestrator/register.ts b/harness/src/turn-orchestrator/register.ts index 57b9e3e2..d6b096ae 100644 --- a/harness/src/turn-orchestrator/register.ts +++ b/harness/src/turn-orchestrator/register.ts @@ -7,7 +7,6 @@ import { register as registerGetState } from './get-state.js'; import { register as registerRunAbort } from './run-abort.js'; import { register as registerRunStart } from './run-start.js'; import { register as registerProvisioning } from './provisioning/process.js'; -import { register as registerSteeringCheck } from './steering-check/process.js'; export async function register(iii: ISdk, _ctx: { configPath: string }): Promise { registerRunStart(iii); @@ -16,7 +15,6 @@ export async function register(iii: ISdk, _ctx: { configPath: string }): Promise registerAssistantStreaming(iii); registerFunctionExecute(iii); registerFunctionAwaitingApproval(iii); - registerSteeringCheck(iii); registerFinishing(iii); registerGetState(iii); } diff --git a/harness/src/turn-orchestrator/schemas.ts b/harness/src/turn-orchestrator/schemas.ts index bcf2af2f..afe9ee66 100644 --- a/harness/src/turn-orchestrator/schemas.ts +++ b/harness/src/turn-orchestrator/schemas.ts @@ -14,7 +14,6 @@ import { type AssistantStreamingTurnRecord, type AwaitingApprovalEntry, type FunctionBatchTurnRecord, - type SteeringCheckTurnRecord, type TurnState, type TurnStateRecord, } from './state.js'; @@ -136,30 +135,6 @@ export function parseAssistantStreamingRecord(rec: TurnStateRecord): AssistantSt return rec as AssistantStreamingTurnRecord; } -/** Fields required before steering_check handlers run. */ -export const SteeringCheckTurnRecordSchema = z - .object({ - session_id: z.string().min(1), - state: z.literal('steering_check'), - turn_count: z.number(), - function_results: z.array(z.unknown()), - turn_end_emitted: z.boolean(), - started_at_ms: z.number(), - updated_at_ms: z.number(), - }) - .passthrough(); - -/** Validate persisted turn_state for steering_check; throws {@link TurnStateInvariantError}. */ -export function parseSteeringCheckRecord(rec: TurnStateRecord): SteeringCheckTurnRecord { - const result = SteeringCheckTurnRecordSchema.safeParse(rec); - if (!result.success) { - throw new TurnStateInvariantError( - `invalid steering_check turn record: ${formatZodIssues(result.error)}`, - ); - } - return rec as SteeringCheckTurnRecord; -} - export const GetStatePayloadSchema = SessionIdPayloadSchema; export type GetStatePayload = z.infer; diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index 347982a8..5b0435f8 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -20,7 +20,6 @@ export type TurnState = | 'assistant_streaming' | 'function_execute' | 'function_awaiting_approval' - | 'steering_check' | 'finishing' | 'stopped' | 'failed'; @@ -77,23 +76,11 @@ export type AssistantStreamingTurnRecord = TurnStateRecordCore & { awaiting_approval?: AwaitingApprovalEntry[]; }; -/** Persisted shape while in steering_check (work cleared on entry from function batch). */ -export type SteeringCheckTurnRecord = TurnStateRecordCore & { - state: 'steering_check'; - last_assistant?: AssistantMessage | null; - work?: TurnWork; - awaiting_approval?: AwaitingApprovalEntry[]; -}; - -type OtherTurnState = Exclude< - TurnState, - FunctionBatchState | 'assistant_streaming' | 'steering_check' ->; +type OtherTurnState = Exclude; export type TurnStateRecord = | FunctionBatchTurnRecord | AssistantStreamingTurnRecord - | SteeringCheckTurnRecord | (TurnStateRecordCore & { state: OtherTurnState; last_assistant?: AssistantMessage | null; @@ -106,7 +93,6 @@ const TURN_STATES = [ 'assistant_streaming', 'function_execute', 'function_awaiting_approval', - 'steering_check', 'finishing', 'stopped', 'failed', diff --git a/harness/src/turn-orchestrator/steering-check/ports.ts b/harness/src/turn-orchestrator/steering-check/ports.ts deleted file mode 100644 index fe6f4279..00000000 --- a/harness/src/turn-orchestrator/steering-check/ports.ts +++ /dev/null @@ -1,24 +0,0 @@ -/** - * Typed dependency ports for steering_check. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import type { AgentEvent } from '../../types/agent-event.js'; -import { emit } from '../events.js'; -import { createTurnStatePorts, type TurnStatePorts } from '../state-runtime/ports.js'; - -export type SteeringCheckPorts = TurnStatePorts & { - emit(session_id: string, event: AgentEvent): Promise; -}; - -export function createSteeringCheckPorts(iii: ISdk): SteeringCheckPorts { - const base = createTurnStatePorts(iii); - - return { - ...base, - - emit(session_id, event) { - return emit(iii, session_id, event); - }, - }; -} diff --git a/harness/src/turn-orchestrator/steering-check/process.ts b/harness/src/turn-orchestrator/steering-check/process.ts deleted file mode 100644 index 06d734a6..00000000 --- a/harness/src/turn-orchestrator/steering-check/process.ts +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Route steering_check outcomes, apply transitions, and register the FSM step. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import { runTransition } from '../run-transition.js'; -import { - TurnStepPayloadSchema, - parseSteeringCheckRecord, - type TurnStepPayload, -} from '../schemas.js'; -import type { TurnStateRecord } from '../state.js'; -import { createSteeringCheckPorts } from './ports.js'; -import { runSteeringCheck } from './run.js'; - -export async function handleSteering(iii: ISdk, rec: TurnStateRecord): Promise { - const steering = parseSteeringCheckRecord(rec); - const ports = createSteeringCheckPorts(iii); - await runSteeringCheck(ports, steering); -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::steering_check', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition(iii, 'steering_check', handleSteering, parsed); - }, - { - description: - 'DEPRECATED compat drain (steering_check removal, release A): routing is inlined into assistant_streaming/function_execute; this handler only drains records persisted in state steering_check before the deploy. Removed in release B once the queue is confirmed empty.', - }, - ); -} diff --git a/harness/src/turn-orchestrator/steering-check/run.ts b/harness/src/turn-orchestrator/steering-check/run.ts deleted file mode 100644 index 25d97a0e..00000000 --- a/harness/src/turn-orchestrator/steering-check/run.ts +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Route steering_check outcomes and apply transitions. - * - * COMPAT (release A of the steering_check removal): nothing transitions into - * the `steering_check` state anymore — its routing is inlined into - * assistant-streaming finalizeAssistantTurn and function-execute finalizeBatch. - * This module stays registered for one release so queue messages / records - * persisted in `steering_check` at deploy time drain instead of DLQ-wedging - * the turn. Release B removes the state, schemas, and this registration. - */ - -import { - emitTurnEndOnce, - endTurnForMaxTurns, - maxTurnsReached, - resumeToAssistantStreaming, - transitionToFinishing, -} from '../state-runtime/turn-end.js'; -import type { SteeringCheckTurnRecord } from '../state.js'; -import type { SteeringCheckPorts } from './ports.js'; - -export type SteeringRoute = 'continue_after_function' | 'end_turn'; - -export type SteeringCheckOutcome = - | { kind: 'max_turns_reached' } - | { kind: 'continue_after_function' } - | { kind: 'end_turn' }; - -export function route(has_function_results: boolean): SteeringRoute { - return has_function_results ? 'continue_after_function' : 'end_turn'; -} - -export async function processSteeringCheck( - _ports: SteeringCheckPorts, - rec: SteeringCheckTurnRecord, -): Promise { - const decision = route(rec.function_results.length > 0); - - if (decision === 'continue_after_function' && maxTurnsReached(rec)) { - return { kind: 'max_turns_reached' }; - } - - switch (decision) { - case 'continue_after_function': - return { kind: 'continue_after_function' }; - case 'end_turn': - return { kind: 'end_turn' }; - } -} - -export async function applySteeringCheckOutcome( - ports: SteeringCheckPorts, - rec: SteeringCheckTurnRecord, - outcome: SteeringCheckOutcome, -): Promise { - switch (outcome.kind) { - case 'max_turns_reached': - await endTurnForMaxTurns(ports, rec); - return; - case 'continue_after_function': - resumeToAssistantStreaming(rec); - return; - case 'end_turn': - await emitTurnEndOnce(ports, rec); - transitionToFinishing(rec); - return; - } -} - -export async function runSteeringCheck( - ports: SteeringCheckPorts, - rec: SteeringCheckTurnRecord, -): Promise { - const outcome = await processSteeringCheck(ports, rec); - await applySteeringCheckOutcome(ports, rec, outcome); -} diff --git a/harness/tests/turn-orchestrator/finish.test.ts b/harness/tests/turn-orchestrator/finish.test.ts index 5ccde8f0..371104de 100644 --- a/harness/tests/turn-orchestrator/finish.test.ts +++ b/harness/tests/turn-orchestrator/finish.test.ts @@ -18,7 +18,7 @@ describe('TurnStatePorts.finishSession', () => { } as unknown as ISdk; const rec = newRecord('s1'); - rec.state = 'steering_check'; + rec.state = 'finishing'; await createTurnStatePorts(iii).finishSession(rec); expect(rec.state).toBe('stopped'); diff --git a/harness/tests/turn-orchestrator/run-transition.test.ts b/harness/tests/turn-orchestrator/run-transition.test.ts index 0ec81b9f..2a1022d8 100644 --- a/harness/tests/turn-orchestrator/run-transition.test.ts +++ b/harness/tests/turn-orchestrator/run-transition.test.ts @@ -82,7 +82,7 @@ describe('runTransition', () => { }); it('routes an unexpected handler throw to failed without re-throwing', async () => { - const rec: TurnStateRecord = { ...newRecord('s1'), state: 'steering_check' }; + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const store = installMockTurnStore({ loadRecord: vi.fn(async () => rec), }); @@ -90,7 +90,9 @@ describe('runTransition', () => { throw new Error('boom'); }); - const result = await runTransition({} as ISdk, 'steering_check', handle, { session_id: 's1' }); + const result = await runTransition({} as ISdk, 'assistant_streaming', handle, { + session_id: 's1', + }); expect(result).toMatchObject({ ok: true, to_state: 'failed' }); expect(store.saveRecord).toHaveBeenCalled(); }); diff --git a/harness/tests/turn-orchestrator/state.test.ts b/harness/tests/turn-orchestrator/state.test.ts index 6d341a46..f53a4966 100644 --- a/harness/tests/turn-orchestrator/state.test.ts +++ b/harness/tests/turn-orchestrator/state.test.ts @@ -3,7 +3,6 @@ import { TurnStateInvariantError } from '../../src/turn-orchestrator/errors.js'; import { parseAssistantStreamingRecord, parseFunctionBatchRecord, - parseSteeringCheckRecord, } from '../../src/turn-orchestrator/schemas.js'; import { type TurnStateRecord, @@ -85,18 +84,3 @@ describe('parseAssistantStreamingRecord', () => { expect(() => parseAssistantStreamingRecord(rec)).toThrow(TurnStateInvariantError); }); }); - -describe('parseSteeringCheckRecord', () => { - it('returns a validated record for steering_check', () => { - const rec = newRecord('s1'); - rec.state = 'steering_check'; - const steering = parseSteeringCheckRecord(rec); - expect(steering.state).toBe('steering_check'); - expect(steering.function_results).toEqual([]); - }); - - it('throws TurnStateInvariantError when session_id is missing', () => { - const rec = { state: 'steering_check' } as TurnStateRecord; - expect(() => parseSteeringCheckRecord(rec)).toThrow(TurnStateInvariantError); - }); -}); diff --git a/harness/tests/turn-orchestrator/steering-check-layer.test.ts b/harness/tests/turn-orchestrator/steering-check-layer.test.ts deleted file mode 100644 index e2d0c286..00000000 --- a/harness/tests/turn-orchestrator/steering-check-layer.test.ts +++ /dev/null @@ -1,117 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import { - applySteeringCheckOutcome, - processSteeringCheck, - route, -} from '../../src/turn-orchestrator/steering-check/run.js'; -import type { SteeringCheckPorts } from '../../src/turn-orchestrator/steering-check/ports.js'; -import { newRecord } from '../../src/turn-orchestrator/state.js'; - -function stubPorts(overrides: Partial = {}): SteeringCheckPorts { - return { - loadMessages: vi.fn(async () => []), - appendMessages: vi.fn(async () => {}), - checkpoint: vi.fn(async () => {}), - loadRunRequest: vi.fn(async () => ({ - provider: 'openai', - model: 'gpt-4', - mode: null, - system_prompt: '', - function_schemas: [], - })), - saveRunRequest: vi.fn(async () => {}), - emitTurnEnd: vi.fn(async () => {}), - finishSession: vi.fn(async (rec) => { - rec.state = 'stopped'; - }), - emit: vi.fn(async () => {}), - ...overrides, - }; -} - -describe('route', () => { - it.each([ - [true, 'continue_after_function'], - [false, 'end_turn'], - ] as const)('route(%s) -> %s', (has_function_results, expected) => { - expect(route(has_function_results)).toBe(expected); - }); -}); - -describe('processSteeringCheck', () => { - it('returns continue_after_function when function_results present', async () => { - const ports = stubPorts(); - const rec = { - ...newRecord('s1'), - state: 'steering_check' as const, - function_results: [{ role: 'function_result', content: [] }] as never, - }; - - const outcome = await processSteeringCheck(ports, rec); - - expect(outcome).toEqual({ kind: 'continue_after_function' }); - }); - - it('returns max_turns_reached when cap hit on continue path', async () => { - const ports = stubPorts(); - const rec = { - ...newRecord('s1'), - state: 'steering_check' as const, - max_turns: 2, - turn_count: 2, - function_results: [{ role: 'function_result', content: [] }] as never, - }; - - const outcome = await processSteeringCheck(ports, rec); - - expect(outcome).toEqual({ kind: 'max_turns_reached' }); - }); - - it('returns end_turn when no function results', async () => { - const ports = stubPorts(); - const rec = { ...newRecord('s1'), state: 'steering_check' as const }; - - const outcome = await processSteeringCheck(ports, rec); - - expect(outcome).toEqual({ kind: 'end_turn' }); - }); -}); - -describe('applySteeringCheckOutcome', () => { - it('continue_after_function: transitions without reloading messages', async () => { - const loadMessages = vi.fn(async () => []); - const emitTurnEnd = vi.fn(async () => {}); - const ports = stubPorts({ loadMessages, emitTurnEnd }); - const rec = { - ...newRecord('s1'), - state: 'steering_check' as const, - function_results: [{ role: 'function_result', content: [] }] as never, - turn_end_emitted: true, - }; - - await applySteeringCheckOutcome(ports, rec, { kind: 'continue_after_function' }); - - expect(rec.state).toBe('assistant_streaming'); - expect(rec.function_results).toEqual([]); - expect(loadMessages).not.toHaveBeenCalled(); - expect(emitTurnEnd).not.toHaveBeenCalled(); - }); - - it('end_turn: emits turn_end then routes to the finishing step', async () => { - const emitTurnEnd = vi.fn(async () => {}); - const finishSession = vi.fn(async (rec) => { - rec.state = 'stopped'; - }); - const ports = stubPorts({ emitTurnEnd, finishSession }); - const rec = { ...newRecord('s1'), state: 'steering_check' as const }; - - await applySteeringCheckOutcome(ports, rec, { kind: 'end_turn' }); - - // agent_end is deferred to turn::finishing; the drain only emits turn_end here. - expect(rec.state).toBe('finishing'); - expect(rec.turn_end_emitted).toBe(true); - // 4th arg is the optional model_limit, undefined here (no model_meta on rec). - expect(emitTurnEnd).toHaveBeenCalledWith('s1', expect.anything(), [], undefined); - expect(finishSession).not.toHaveBeenCalled(); - }); -}); diff --git a/harness/tests/turn-orchestrator/steering.test.ts b/harness/tests/turn-orchestrator/steering.test.ts deleted file mode 100644 index 2a2431f9..00000000 --- a/harness/tests/turn-orchestrator/steering.test.ts +++ /dev/null @@ -1,123 +0,0 @@ -import { afterEach, describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import * as events from '../../src/turn-orchestrator/events.js'; -import { installMockTurnStore } from './_helpers/mockTurnStore.js'; -import { newRecord, type TurnStateRecord } from '../../src/turn-orchestrator/state.js'; -import { handleSteering } from '../../src/turn-orchestrator/steering-check/process.js'; - -afterEach(() => { - vi.restoreAllMocks(); -}); - -function makeIii() { - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { - if (req.function_id === 'state::get') return null; - if (req.function_id === 'state::update') return { old_value: 0 }; - if (req.function_id === 'stream::set') return null; - return null; - }), - } as unknown as ISdk; - - return { iii }; -} - -function steeringRec( - session_id: string, - overrides: Partial = {}, -): TurnStateRecord { - const rec = newRecord(session_id); - rec.state = 'steering_check'; - return { ...rec, ...overrides }; -} - -describe('handleSteering', () => { - it('continue_after_function: clears function_results without reloading messages', async () => { - const { iii } = makeIii(); - const rec = steeringRec('s1', { - function_results: [{ role: 'function_result', content: [] }] as never, - turn_end_emitted: true, - }); - const store = installMockTurnStore(); - const emitSpy = vi.spyOn(events, 'emit'); - - await handleSteering(iii, rec); - - expect(rec.state).toBe('assistant_streaming'); - expect(rec.function_results).toEqual([]); - expect(store.loadMessages).not.toHaveBeenCalled(); - expect(emitSpy).not.toHaveBeenCalled(); - }); - - it('end_turn: emits turn_end then routes to the finishing step', async () => { - const { iii } = makeIii(); - const rec = steeringRec('s1'); - const store = installMockTurnStore(); - const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); - - await handleSteering(iii, rec); - - // turn_end is emitted now; agent_end is deferred to turn::finishing. - expect(rec.state).toBe('finishing'); - expect(rec.turn_end_emitted).toBe(true); - expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'turn_end' })); - expect(emitSpy).not.toHaveBeenCalledWith( - iii, - 's1', - expect.objectContaining({ type: 'agent_end' }), - ); - expect(store.loadMessages).not.toHaveBeenCalled(); - }); - - it('caps at max_turns: emits a max_turns assistant + message_complete + turn_end and tears down instead of continuing', async () => { - const { iii } = makeIii(); - const rec = steeringRec('s1', { - max_turns: 2, - turn_count: 2, - function_results: [{ role: 'function_result', content: [] }] as never, - }); - const store = installMockTurnStore(); - const appendSpy = store.appendMessages; - const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); - - await handleSteering(iii, rec); - - expect(rec.state).toBe('finishing'); - expect(rec.turn_end_emitted).toBe(true); - expect(rec.last_assistant?.content[0]).toEqual( - expect.objectContaining({ type: 'text', text: expect.stringContaining('max_turns') }), - ); - expect(emitSpy).toHaveBeenCalledWith( - iii, - 's1', - expect.objectContaining({ type: 'message_complete' }), - ); - expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'turn_end' })); - // max_turns teardown appends the synthetic notice and finishes without a - // transcript reload (agent_end is a signal). - expect(store.loadMessages).not.toHaveBeenCalled(); - expect(appendSpy).toHaveBeenCalledWith('s1', [ - expect.objectContaining({ - content: expect.arrayContaining([ - expect.objectContaining({ text: expect.stringContaining('max_turns') }), - ]), - }), - ]); - }); - - it('continues to assistant_streaming when under max_turns (continue_after_function route)', async () => { - const { iii } = makeIii(); - const rec = steeringRec('s1', { - max_turns: 5, - turn_count: 2, - function_results: [{ role: 'function_result', content: [] }] as never, - }); - installMockTurnStore(); - vi.spyOn(events, 'emit').mockResolvedValue(undefined); - - await handleSteering(iii, rec); - - expect(rec.state).toBe('assistant_streaming'); - expect(rec.function_results).toEqual([]); - }); -}); diff --git a/harness/tests/turn-orchestrator/store.test.ts b/harness/tests/turn-orchestrator/store.test.ts index 7def932c..ece8f36b 100644 --- a/harness/tests/turn-orchestrator/store.test.ts +++ b/harness/tests/turn-orchestrator/store.test.ts @@ -179,7 +179,7 @@ describe('shouldWakeStep', () => { }); it('rejects terminal state (stopped)', () => { - expect(shouldWakeStep('steering_check', 'stopped')).toBe(false); + expect(shouldWakeStep('function_execute', 'stopped')).toBe(false); }); it('rejects function_awaiting_approval (orchestrator parks here)', () => {