diff --git a/console/web/src/components/chat/ChatView.tsx b/console/web/src/components/chat/ChatView.tsx index 16ffcbc0..937065e1 100644 --- a/console/web/src/components/chat/ChatView.tsx +++ b/console/web/src/components/chat/ChatView.tsx @@ -109,7 +109,6 @@ export function ChatView({ const harnessBlockedRef = useRef(harnessBlocked) harnessBlockedRef.current = harnessBlocked - // Matches iii.session.id on every span so the traces UI can group by it. const sessionId = useMemo( () => makeSessionId(conversation.id), [conversation.id], @@ -151,10 +150,6 @@ export function ChatView({ const handleAlwaysAllow = useMemo(() => { const resolveFn = backend.resolveApproval if (!resolveFn) return undefined - // "Approve always" is a per-session grant honored in every mode, so the - // button shows on every prompt (full mode never produces prompts, so - // it's moot there). Approves this call and stops asking for the same - // function for the rest of the conversation. return async (sId: string, functionCallId: string, functionId: string) => { await approvalSettings.approveAlways(functionId) await resolveFn(sId, functionCallId, 'allow') @@ -233,7 +228,6 @@ export function ChatView({ } onCompactConversation(conversationId, marker) } else { - // empty | busy | overflow | error: nothing rewritten server-side. onPatchMessage(conversationId, pendingId, { content: formatCompactResult(result), tone: compactResultTone(result), @@ -340,15 +334,31 @@ export function ChatView({ break } case 'fcall-end': { - if (!fcallId) break - onPatchMessage(conversationId, fcallId, { + const targetId: string | null = event.functionCallId + ? ([...fcallMap.entries()].find( + ([, fcid]) => fcid === event.functionCallId, + )?.[0] ?? null) + : fcallId + if (!targetId) break + onPatchMessage(conversationId, targetId, { output: event.output, durationMs: event.durationMs, running: false, pendingApproval: false, }) - fcallMap.delete(fcallId) - fcallId = null + fcallMap.delete(targetId) + if (targetId === fcallId) fcallId = null + break + } + case 'fcall-approval-cleared': { + const clearedId = [...fcallMap.entries()].find( + ([, fcid]) => fcid === event.functionCallId, + )?.[0] + if (clearedId) { + onPatchMessage(conversationId, clearedId, { + pendingApproval: false, + }) + } break } case 'assistant-token': { @@ -384,12 +394,6 @@ export function ChatView({ break } case 'compaction': { - // Server-side context-compaction finished rewriting the - // session's flat-state. Append a marker so: - // 1. The user sees that compaction happened. - // 2. estimateConversationTokens stops counting pre-marker - // messages → the CTX bar drops to the real value. - // We append (not replace) so the transcript stays scrollable. const compactionContent = event.mode === 'sync' ? `compacted ${event.tokensBefore.toLocaleString()} tokens before continuing` @@ -409,10 +413,6 @@ export function ChatView({ break } case 'stop-reason': { - // The assistant turn ended abnormally — show the user why - // instead of leaving the response looking like it just - // ran out of words. Pre-fix this event didn't exist and - // the same condition produced a silently truncated reply. const noticeContent = formatStopReason( event.reason, event.message, @@ -443,7 +443,6 @@ export function ChatView({ } } } catch (err) { - // Backends signal semantic failures via fcall-end; thrown = abort or bug. if (!isAbortError(err)) { console.warn('[chat] stream errored', err) } @@ -477,10 +476,9 @@ export function ChatView({ const handleStop = useCallback(() => { abortRef.current?.abort() - }, []) + void backend.abortRun?.(sessionId).catch(() => {}) + }, [backend, sessionId]) - // Covers the gap between submit / fcall-end and the next streamed token, - // where the assistant/thought shimmer hasn't yet rendered. const isThinking = isStreaming && (() => { diff --git a/console/web/src/lib/backend/real.ts b/console/web/src/lib/backend/real.ts index 719b0cb7..026942d4 100644 --- a/console/web/src/lib/backend/real.ts +++ b/console/web/src/lib/backend/real.ts @@ -54,12 +54,6 @@ async function* realStream( r?.() } - // Subscribe directly to this session's `agent::events` stream via a scoped - // engine stream trigger (`group_id = sessionId`), replacing the harness - // fanout hop (`ui::subscribe` → per-browser `ui::session::event` push). - // Registered before the `harness::trigger` kickoff below — both travel the - // same ordered WS connection, so the trigger is in place before the turn's - // first event is written. const stopSubscription = startSessionEventsSubscription( client, sessionId, @@ -97,24 +91,34 @@ async function* realStream( const { provider, model: modelId } = resolveRunParams(model) let kickoffError: Error | null = null + let sessionBusy = false client - .call('harness::trigger', { - session_id: sessionId, - message_id: messageId, - payload: { + .call<{ status_code?: number; body?: { started?: boolean } }>( + 'harness::trigger', + { session_id: sessionId, message_id: messageId, - provider, - model: modelId, - mode, - messages: [ - { - role: 'user', - content: [{ type: 'text', text: prompt }], - timestamp: Date.now(), - }, - ], + payload: { + session_id: sessionId, + message_id: messageId, + provider, + model: modelId, + mode, + messages: [ + { + role: 'user', + content: [{ type: 'text', text: prompt }], + timestamp: Date.now(), + }, + ], + }, }, + ) + .then((res) => { + if (res?.body?.started === false) { + sessionBusy = true + wake() + } }) .catch((err) => { kickoffError = err instanceof Error ? err : new Error(String(err)) @@ -126,6 +130,16 @@ async function* realStream( while (true) { if (signal?.aborted) return + if (sessionBusy) { + yield { + kind: 'stop-reason', + reason: 'error', + message: + 'A turn is still running in this session — your message was not sent. Stop the current turn (or answer its pending approval), then resend.', + } + yield { kind: 'assistant-end' } + return + } if (kickoffError) { const err = kickoffError as Error yield { @@ -135,13 +149,18 @@ async function* realStream( yield { kind: 'assistant-end' } return } - while (queue.length === 0 && !kickoffError && !signal?.aborted) { + while ( + queue.length === 0 && + !kickoffError && + !sessionBusy && + !signal?.aborted + ) { await new Promise((resolve) => { resolveNext = resolve }) } if (signal?.aborted) return - if (kickoffError) continue + if (kickoffError || sessionBusy) continue const event = queue.shift() if (!event) continue const streamEvents = translate(event, sessionId) @@ -169,6 +188,11 @@ async function realResolveApproval( }) } +async function realAbortRun(sessionId: string): Promise { + const client = await getIiiClient() + await client.call('run::abort', { session_id: sessionId }) +} + async function realCompactSession( sessionId: string, model: ModelId, @@ -177,9 +201,6 @@ async function realCompactSession( const { provider, model: modelId } = resolveRunParams(model) const client = await getIiiClient() try { - // Passing limit.context lets the server skip the models::get lookup. - // We don't know max_output here; 4096 is the same conservative default - // the server falls back to when models::get returns nothing. const DEFAULT_MAX_OUTPUT = 4_096 const modelPayload: { id: string @@ -209,10 +230,7 @@ async function realCompactSession( if (resp?.status === 'ok') { const tokensBefore = typeof resp.tokens_before === 'number' ? resp.tokens_before : 0 - // Surface zero-token "ok" as semantic empty. if (tokensBefore === 0) return { status: 'empty' } - // Fallback placeholder for engines that predate summary_text on the - // wire; without it the marker has no to ship. const summaryText = typeof resp.summary_text === 'string' && resp.summary_text.length > 0 ? resp.summary_text @@ -249,5 +267,6 @@ export const realBackend: ChatBackend = { id: 'real', stream: realStream, resolveApproval: realResolveApproval, + abortRun: realAbortRun, compactSession: realCompactSession, } diff --git a/console/web/src/lib/backend/translate.test.ts b/console/web/src/lib/backend/translate.test.ts index 82f2d8c4..6bd0bb58 100644 --- a/console/web/src/lib/backend/translate.test.ts +++ b/console/web/src/lib/backend/translate.test.ts @@ -231,7 +231,7 @@ describe('createAgentEventTranslator — turn_state_changed', () => { ).toEqual([]) }) - it('emits nothing when state leaves function_awaiting_approval (the orchestrator emits the matching function_execution_end)', () => { + it('clears the pending prompt when a call leaves function_awaiting_approval', () => { const { translate } = createAgentEventTranslator() translate( { @@ -265,7 +265,30 @@ describe('createAgentEventTranslator — turn_state_changed', () => { }, 'sess-a', ), - ).toEqual([]) + ).toEqual([{ kind: 'fcall-approval-cleared', functionCallId: 'fc-1' }]) + }) + + it('threads function_call_id onto fcall-end so the consumer can correlate', () => { + const { translate } = createAgentEventTranslator() + const out = translate( + { + type: 'function_execution_end', + function_call_id: 'fc-7', + function_id: 'shell::fs::ls', + result: { content: [], details: {} }, + is_error: false, + duration_ms: 30, + }, + 'sess-a', + ) + expect(out).toEqual([ + { + kind: 'fcall-end', + output: { content: [], details: {} }, + durationMs: 30, + functionCallId: 'fc-7', + }, + ]) }) it('partitions mirrors by sessionId so two chats do not interfere', () => { diff --git a/console/web/src/lib/backend/translate.ts b/console/web/src/lib/backend/translate.ts index 54130ee9..0e8b6991 100644 --- a/console/web/src/lib/backend/translate.ts +++ b/console/web/src/lib/backend/translate.ts @@ -46,8 +46,8 @@ export function createAgentEventTranslator(): { const prev = mirrors.get(sessionId) ?? [] const next = pendingApprovalsFromTurnState(event.new_value) mirrors.set(sessionId, next) - const { added } = diffPending(prev, next) - return added.map((entry) => ({ + const { added, removed } = diffPending(prev, next) + const out: StreamEvent[] = added.map((entry) => ({ kind: 'fcall-start' as const, functionId: entry.function_id, input: entry.args, @@ -55,6 +55,13 @@ export function createAgentEventTranslator(): { functionCallId: entry.function_call_id, sessionId, })) + for (const entry of removed) { + out.push({ + kind: 'fcall-approval-cleared', + functionCallId: entry.function_call_id, + }) + } + return out } function translate(event: AgentEvent, sessionId?: string): StreamEvent[] { @@ -90,6 +97,7 @@ export function createAgentEventTranslator(): { ? wrapErrorOutput(event.result) : event.result, durationMs: event.duration_ms, + functionCallId: event.function_call_id, }, ] diff --git a/console/web/src/lib/backend/types.ts b/console/web/src/lib/backend/types.ts index 03f32df7..634f1a56 100644 --- a/console/web/src/lib/backend/types.ts +++ b/console/web/src/lib/backend/types.ts @@ -27,7 +27,28 @@ export type StreamEvent = /** iii session_id owning this call — needed to resolve approval. */ sessionId?: string } - | { kind: 'fcall-end'; output: unknown; durationMs: number } + | { + kind: 'fcall-end' + output: unknown + durationMs: number + /** + * iii function_call_id of the call that ended. Parallel tool calls (and + * approval-resolved calls, which emit no fcall-start) finish out of + * order, so the consumer MUST match the end to its card by this id, not + * by "the most recently started call". + */ + functionCallId?: string + } + | { + /** + * A call left the pending-approval set without executing (aborted, or + * resolved out-of-band): clear its card's approval prompt so it doesn't + * hang on "approving…". A resolved-and-executed call also emits + * `fcall-end`; both patching to non-pending is idempotent. + */ + kind: 'fcall-approval-cleared' + functionCallId: string + } | { kind: 'assistant-token'; token: string } | { kind: 'assistant-end' } | { @@ -114,6 +135,12 @@ export interface ChatBackend { functionCallId: string, decision: 'allow' | 'deny', ): Promise + /** + * Server-side cancel of the session's in-flight turn (`run::abort`). + * The client-side AbortSignal only stops rendering; without this the + * server keeps running and `run::start` rejects new messages as busy. + */ + abortRun?(sessionId: string): Promise /** * Powers `/compact`. Compacts the session-tree (the single source of * truth) directly. `contextWindow` skips the server's `models::get` diff --git a/console/web/src/stories/playground/EventLog.tsx b/console/web/src/stories/playground/EventLog.tsx index b844fdbd..699d70a3 100644 --- a/console/web/src/stories/playground/EventLog.tsx +++ b/console/web/src/stories/playground/EventLog.tsx @@ -175,6 +175,7 @@ function toneFor(kind: StreamEvent['kind']): string { return 'text-ink-ghost' case 'fcall-start': case 'fcall-end': + case 'fcall-approval-cleared': return 'text-accent' case 'assistant-token': return 'text-ink-ghost' diff --git a/harness/src/approval-gate/schemas.ts b/harness/src/approval-gate/schemas.ts index 5e5fabbb..2e1d5c5a 100644 --- a/harness/src/approval-gate/schemas.ts +++ b/harness/src/approval-gate/schemas.ts @@ -24,13 +24,7 @@ export type AlwaysAllowEntry = z.infer; export const ApprovalSettingsSchema = z.object({ mode: PermissionModeSchema, - // Auto-mode allowlist: curated on the Configuration screen, consulted - // ONLY in auto mode. always_allow: z.array(allowEntrySchema), - // Per-session "approve always" grants made from an approval prompt. - // Consulted in EVERY mode (it's a remembered human decision, not an - // auto-policy). `.default([])` keeps records written before this field - // existed parseable. approved_always: z.array(allowEntrySchema).default([]), mode_set_at: z.number().int().nonnegative(), }); @@ -43,7 +37,7 @@ export const DEFAULT_APPROVAL_SETTINGS: ApprovalSettings = { mode_set_at: 0, }; -const wireDecisionSchema = z.enum(['allow', 'deny']); +const wireDecisionSchema = z.enum(['allow', 'deny', 'aborted']); const deniedBySchema = z.enum(['permissions', 'user', 'gate_unavailable']); diff --git a/harness/src/context-compaction/handler-async.ts b/harness/src/context-compaction/handler-async.ts index 518fafdb..53e0f818 100644 --- a/harness/src/context-compaction/handler-async.ts +++ b/harness/src/context-compaction/handler-async.ts @@ -7,6 +7,7 @@ import { setCurrentSpanAttribute, withSpan } from '@iii-dev/observability'; import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; +import type { ModelContextLimit } from '../types/agent-event.js'; import { compactionConfig } from './config.js'; import { isSummarizeOk, @@ -54,24 +55,31 @@ export function turnEndUsage(event: unknown): Record | null { type ResolvedModel = { providerID: string; modelID: string; - modelLimit: { context: number; input: number; output: number }; + modelLimit: ModelContextLimit; } | null; -async function resolveModelFromEvent( +/** + * The fields we read off the orchestrator-produced `turn_end` event. The + * message is always an AssistantMessage, so provider/model are present (empty + * strings on synthetic/error turns); model_limit is threaded when the catalog + * resolved at turn start. + */ +type TurnEndEventView = { + message: { provider: string; model: string }; + model_limit?: ModelContextLimit; +}; + +export async function resolveModelFromEvent( iii: ISdk, session_id: string, event: unknown, ): Promise { - let providerID: string | null = null; - let modelID: string | null = null; - - if (event && typeof event === 'object') { - const ev = event as Record; - const msg = ev.message as Record | undefined; - if (msg) { - if (typeof msg.provider === 'string' && msg.provider) providerID = msg.provider; - if (typeof msg.model === 'string' && msg.model) modelID = msg.model; - } + const ev = event as TurnEndEventView; + let providerID = ev.message.provider || null; + let modelID = ev.message.model || null; + + if (providerID && modelID && ev.model_limit) { + return { providerID, modelID, modelLimit: ev.model_limit }; } if (!providerID || !modelID) { @@ -87,8 +95,7 @@ async function resolveModelFromEvent( const messages = resp?.messages ?? []; for (let i = messages.length - 1; i >= 0; i--) { const m = messages[i]?.message; - if (!m) continue; - if (m.role !== 'assistant') continue; + if (m?.role !== 'assistant') continue; if (!providerID && typeof m.provider === 'string' && m.provider) { providerID = m.provider; } diff --git a/harness/src/context-compaction/model-resolver.ts b/harness/src/context-compaction/model-resolver.ts index 996f1e2b..4b03af32 100644 --- a/harness/src/context-compaction/model-resolver.ts +++ b/harness/src/context-compaction/model-resolver.ts @@ -9,6 +9,20 @@ export type ResolvedModel = { modelLimit: ModelLimit; } | null; +/** + * Derive a {@link ModelLimit} from a catalog entry (a full {@link Model} or a + * `models::get` result). The single mapping used by both the threaded + * `model_meta` path and a fresh fetch, so they yield the exact same limit. + */ +export function limitFromModel(m: { + context_window?: number; + max_output_tokens?: number; +}): ModelLimit { + const context = typeof m.context_window === 'number' ? m.context_window : 0; + const output = typeof m.max_output_tokens === 'number' ? m.max_output_tokens : 0; + return { context, input: context, output }; +} + export async function fetchModelLimit( iii: ISdk, providerID: string, @@ -34,13 +48,10 @@ export async function fetchModelLimit( return null; } - const context = typeof entry.context_window === 'number' ? entry.context_window : 0; - const output = typeof entry.max_output_tokens === 'number' ? entry.max_output_tokens : 0; - return { providerID, modelID, - modelLimit: { context, input: context, output }, + modelLimit: limitFromModel(entry), }; } catch (err) { logger.debug('model-resolver: models::get failed', { @@ -87,9 +98,6 @@ export async function resolveModelFromSession( } } -// Fallback when no assistant message carries provider/model yet (first-turn -// sessions, error-only sessions). The orchestrator writes run_request at -// `run_request` scope during run::start. export async function resolveModelFromRunRequest( iii: ISdk, session_id: string, diff --git a/harness/src/context-compaction/overflow.ts b/harness/src/context-compaction/overflow.ts index dfc9cca0..3246100e 100644 --- a/harness/src/context-compaction/overflow.ts +++ b/harness/src/context-compaction/overflow.ts @@ -1,14 +1,12 @@ +import type { ModelContextLimit } from '../types/agent-event.js'; import { MAX_PRESERVE_RECENT_TOKENS, MIN_PRESERVE_RECENT_TOKENS, compactionConfig, } from './config.js'; -export type ModelLimit = { - context: number; - input: number; - output: number; -}; +/** A model's token limits. Single shape, defined in types/agent-event.ts. */ +export type ModelLimit = ModelContextLimit; export type ModelLike = { id: string; diff --git a/harness/src/harness/trigger.ts b/harness/src/harness/trigger.ts index 92534365..0c60e6c8 100644 --- a/harness/src/harness/trigger.ts +++ b/harness/src/harness/trigger.ts @@ -48,7 +48,7 @@ export function register(iii: ISdk): void { timeoutMs: BRIDGE_TIMEOUT_MS, }); return { - status_code: 200, + status_code: result.started === false ? 409 : 200, headers: { 'content-type': 'application/json' }, body: result, }; diff --git a/harness/src/index.ts b/harness/src/index.ts index b62d016a..d7bd6a66 100644 --- a/harness/src/index.ts +++ b/harness/src/index.ts @@ -44,7 +44,7 @@ const WORKERS: readonly WorkerDefinition[] = [ { name: 'turn-orchestrator', description: - 'Durable run::start state machine driving each agent turn through provisioning, assistant, function-execute, and steering.', + 'Durable run::start state machine driving each agent turn through provisioning, assistant, and function-execute.', register: (iii, ctx) => registerTurnOrchestrator(iii, ctx), }, { diff --git a/harness/src/provider-anthropic/auth.ts b/harness/src/provider-anthropic/auth.ts index 64ac30d1..f57d94a7 100644 --- a/harness/src/provider-anthropic/auth.ts +++ b/harness/src/provider-anthropic/auth.ts @@ -5,20 +5,50 @@ * pair with a single call. */ +import type { Model } from '../models-catalog/types.js'; import type { ISdk } from '../runtime/iii.js'; import { clampOutputTokens, getCatalogModel } from '../runtime/output-tokens.js'; -import { resolveProvider } from '../runtime/provider-resolve.js'; +import { type ProviderResolveResult, resolveProvider } from '../runtime/provider-resolve.js'; import type { WorkerConfig } from './config.js'; import { type AnthropicConfig, configWithCredential } from './types.js'; export const PROVIDER_ID = 'anthropic'; +/** + * Single-slot cache of the resolved provider credential, keyed by the turn's + * stable id. The credential is global, so within a turn its 20+ stream calls + * reuse one resolution; a new turn (new key) re-resolves, picking up a key + * rotated between turns. {@link invalidateProviderResolveCache} drops it on a + * mid-turn 401. With no key threaded, callers always resolve (no caching). + */ +let resolveCache: { key: number; resolved: ProviderResolveResult } | null = null; + +async function resolveProviderForTurn(iii: ISdk, key?: number): Promise { + if (key === undefined) return resolveProvider(iii, PROVIDER_ID); + if (resolveCache?.key === key) return resolveCache.resolved; + const resolved = await resolveProvider(iii, PROVIDER_ID); + resolveCache = { key, resolved }; + return resolved; +} + +/** Drop the cached resolution so the next stream re-resolves (called on a 401). */ +export function invalidateProviderResolveCache(): void { + resolveCache = null; +} + +/** Test seam: clear the cache between cases. */ +export function _resetProviderResolveCacheForTests(): void { + resolveCache = null; +} + export async function buildConfig( iii: ISdk, worker: WorkerConfig, model: string, + preResolved?: Model, + resolutionKey?: number, ): Promise { - const resolved = await resolveProvider(iii, PROVIDER_ID); + const resolved = await resolveProviderForTurn(iii, resolutionKey); if (!resolved.credential) { throw new Error( 'harness::provider::resolve returned no credential for provider `anthropic` ' + @@ -26,7 +56,7 @@ export async function buildConfig( ); } const apiUrl = resolved.api_url ?? worker.default_api_url; - const catalog = await getCatalogModel(iii, PROVIDER_ID, model); + const catalog = preResolved ?? (await getCatalogModel(iii, PROVIDER_ID, model)); const maxTokens = clampOutputTokens({ modelMaxOutput: catalog?.max_output_tokens, userOverride: resolved.max_tokens, diff --git a/harness/src/provider-anthropic/stream-fn.ts b/harness/src/provider-anthropic/stream-fn.ts index fee9c4e1..78137092 100644 --- a/harness/src/provider-anthropic/stream-fn.ts +++ b/harness/src/provider-anthropic/stream-fn.ts @@ -29,12 +29,14 @@ export function register(iii: ISdk, worker: WorkerConfig): void { FUNCTION_ID, async (raw: unknown) => { const input = ProviderStreamRuntimeInputSchema.parse(raw); - // The iii-sdk auto-hydrates `writer_ref` (a StreamChannelRef on the - // wire) into a `ChannelWriter` instance before this handler runs — - // use it directly instead of re-instantiating from the wire shape - // (which no longer exists by the time we get here). const writer = input.writer_ref as ChannelWriter; - const cfg = await buildConfig(iii, worker, input.model); + const cfg = await buildConfig( + iii, + worker, + input.model, + input.model_meta, + input.resolution_key, + ); const thinking = buildThinkingConfig(input.thinking_level, cfg.max_tokens, cfg.catalog); try { const events = streamAnthropic({ diff --git a/harness/src/provider-anthropic/stream.ts b/harness/src/provider-anthropic/stream.ts index 3d6dd64b..7ad5cc38 100644 --- a/harness/src/provider-anthropic/stream.ts +++ b/harness/src/provider-anthropic/stream.ts @@ -8,6 +8,7 @@ import { logger } from '../runtime/otel.js'; import type { AgentMessage, AssistantMessage } from '../types/agent-message.js'; import type { AgentFunction } from '../types/function.js'; import type { AssistantMessageEvent } from '../types/stream-event.js'; +import { invalidateProviderResolveCache } from './auth.js'; import { applyMessagesCacheAnchor, applyToolsCacheControl, buildSystemField } from './cache.js'; import { buildFinal, @@ -41,7 +42,6 @@ export async function* streamAnthropic({ applyMessagesCacheAnchor(wire_messages); const wire_tools = functionsToWire(tools) as Record[]; applyToolsCacheControl(wire_tools); - // No `temperature`: the API default applies (required when thinking is on). const body = { model: cfg.model, max_tokens: cfg.max_tokens, @@ -58,7 +58,6 @@ export async function* streamAnthropic({ 'content-type': 'application/json', }; if (thinking) { - // Lets thinking interleave with tool calls. headers['anthropic-beta'] = 'interleaved-thinking-2025-05-14'; } @@ -79,6 +78,7 @@ export async function* streamAnthropic({ if (!resp.ok) { const text = await resp.text().catch(() => ''); const kind = classifyAnthropicError(text, resp.status); + if (kind === 'auth_expired') invalidateProviderResolveCache(); yield syntheticErrorEvent(text || `anthropic http ${resp.status}`, cfg.model, kind); return; } diff --git a/harness/src/session/tree/operations.ts b/harness/src/session/tree/operations.ts index 69eb7c7e..248909fb 100644 --- a/harness/src/session/tree/operations.ts +++ b/harness/src/session/tree/operations.ts @@ -86,6 +86,71 @@ export async function appendMessage( return id; } +/** + * Append several messages as one linear chain in a single store round-trip. + * Equivalent to calling {@link appendMessage} for each message in order, but + * resolves the active leaf once (not once per message) and writes through + * {@link SessionStore.appendMany} so meta is refreshed once. + * + * Entries share a single append timestamp; their order is carried by the + * parent chain, not the clock, and {@link activePath} resolves the leaf by + * chain tip — so a later append with an equal-or-earlier wall-clock timestamp + * still chains onto this batch's tail instead of being orphaned. + */ +export async function appendMessages( + store: SessionStore, + session_id: string, + parent_id: string | null, + messages: AgentMessage[], +): Promise { + if (messages.length === 0) return []; + + let parent = parent_id; + if (parent === null) { + const path = await activePath(store, session_id); + parent = path.at(-1) ?? null; + } + + const timestamp = Date.now(); + const entries: SessionEntry[] = messages.map((message) => { + const id = randomUUID(); + const entry: SessionEntry = { + type: 'message', + id, + parent_id: parent, + message, + timestamp, + }; + parent = id; + return entry; + }); + + await store.appendMany(session_id, entries); + return entries.map((e) => e.id); +} + +/** + * The active leaf is the tip of the newest chain: the most-recent entry that is + * not the parent of any other entry. `entries` is sorted ascending by + * (timestamp, id), so we scan from the end and take the first tip. Resolving by + * chain tip rather than raw sort-max keeps a freshly appended child as the leaf + * even when its wall-clock timestamp ties or precedes a sibling's — the case a + * batch append (shared timestamp) or a clock that did not advance would + * otherwise mis-resolve. Falls back to the most-recent entry only if every + * entry is some entry's parent (a cycle — should not occur). + */ +function resolveActiveLeaf(entries: SessionEntry[]): string | null { + const parentIds = new Set(); + for (const e of entries) { + if (e.parent_id) parentIds.add(e.parent_id); + } + for (let i = entries.length - 1; i >= 0; i--) { + const entry = entries[i]; + if (entry && !parentIds.has(entry.id)) return entry.id; + } + return entries.at(-1)?.id ?? null; +} + export async function activePath( store: SessionStore, session_id: string, @@ -94,9 +159,8 @@ export async function activePath( const entries = await store.loadEntries(session_id); if (entries.length === 0) return []; const byId = new Map(entries.map((e) => [e.id, e] as const)); - const last = entries.at(-1); - if (!last) return []; - const start = leaf ?? last.id; + const start = leaf ?? resolveActiveLeaf(entries); + if (start === null) return []; const path: string[] = []; let cursor: string | null = start; while (cursor !== null) { @@ -169,9 +233,7 @@ export async function listSessions( let entries: SessionEntry[] = []; try { entries = await store.loadEntries(meta.session_id); - } catch { - // ignore - } + } catch {} sessions.push({ session_id: meta.session_id, created_at: meta.created_at, @@ -331,8 +393,6 @@ export async function appendSynthetic( session_id: string, opts: { text: string; - // metadata is not supported on message entries currently; accepted for - // forward-compat but discarded. metadata?: unknown; parent_id?: string | null; }, @@ -371,7 +431,6 @@ export type UpdatePartItem = { compacted_at: unknown; }; -// Loads entries once instead of O(N×M) reloads in the prune loop. export async function updateParts( store: SessionStore, session_id: string, @@ -432,8 +491,6 @@ function buildNode(entry: SessionEntry, byParent: Map buildNode(c, byParent)) }; } -// ── HTML export ──────────────────────────────────────────────────────────── - export async function exportHtml( store: SessionStore, session_id: string, @@ -484,7 +541,6 @@ function renderEntryHtml(entry: SessionEntry): string { if (entry.type === 'branch_summary') { return `
branch summary · from ${htmlEscape(entry.from_id)}
${htmlEscape(entry.summary)}
\n`; } - // compaction const read = (entry.details.read_files ?? []).map(htmlEscape).join(', '); const modified = (entry.details.modified_files ?? []).map(htmlEscape).join(', '); return `
compaction · ${entry.tokens_before} tokens before
${htmlEscape(entry.summary)}\n\nread: ${read}\nmodified: ${modified}
\n`; @@ -497,7 +553,6 @@ function renderMessageHtml(msg: AgentMessage): string { return `
assistant · ${htmlEscape(msg.model)}
${renderBlocksHtml(msg.content)}
\n`; if (msg.role === 'function_result') return `
function result · ${htmlEscape(msg.function_id)}
${renderBlocksHtml(msg.content)}
\n`; - // custom const text = msg.display ?? JSON.stringify(msg.content); return `
custom · ${htmlEscape(msg.custom_type)}
${htmlEscape(text)}
\n`; } diff --git a/harness/src/session/tree/register.ts b/harness/src/session/tree/register.ts index 558c960c..6cb4f5f8 100644 --- a/harness/src/session/tree/register.ts +++ b/harness/src/session/tree/register.ts @@ -9,6 +9,7 @@ import type { AgentMessage } from '../../types/agent-message.js'; import { type UpdatePartItem, appendMessage, + appendMessages as appendMessagesOp, appendSynthetic as appendSyntheticOp, cloneSession, compact as compactOp, @@ -35,6 +36,7 @@ export const FUNCTION_IDS = { CREATE: 'session-tree::create', ENSURE: 'session-tree::ensure', APPEND: 'session-tree::append', + APPEND_BATCH: 'session-tree::append_batch', MESSAGES: 'session-tree::messages', LIST: 'session-tree::list', COMPACTIONS: 'session-tree::compactions', @@ -148,6 +150,31 @@ export function registerTree(iii: ISdk, store: SessionStore): void { { description: 'Append an AgentMessage entry to a session' }, ); + iii.registerFunction( + FUNCTION_IDS.APPEND_BATCH, + async (payload: unknown) => { + const obj = (payload ?? {}) as Record; + const session_id = requireString(obj, 'session_id'); + const parent_id = typeof obj.parent_id === 'string' ? obj.parent_id : null; + const messages = obj.messages; + if (!Array.isArray(messages)) throw new Error('missing required field: messages (array)'); + if (messages.some((m) => !m || typeof m !== 'object')) { + throw new Error('messages must all be non-null AgentMessage objects'); + } + const entry_ids = await appendMessagesOp( + store, + session_id, + parent_id, + messages as AgentMessage[], + ); + return { entry_ids }; + }, + { + description: + 'Append a chain of AgentMessage entries to a session in one call (resolves the active leaf once; returns entry_ids in order).', + }, + ); + iii.registerFunction( FUNCTION_IDS.MESSAGES, async (payload: unknown) => { diff --git a/harness/src/session/tree/store.ts b/harness/src/session/tree/store.ts index 9140a71b..58171d7e 100644 --- a/harness/src/session/tree/store.ts +++ b/harness/src/session/tree/store.ts @@ -20,6 +20,11 @@ import { type SessionEntry, SessionError, type SessionMeta, entryTimestamp } fro export interface SessionStore { create(meta: SessionMeta): Promise; append(session_id: string, entry: SessionEntry): Promise; + /** + * Append several pre-chained entries, refreshing session meta once for the + * whole batch (vs once per {@link append}). Entries are written in order. + */ + appendMany(session_id: string, entries: SessionEntry[]): Promise; loadEntries(session_id: string): Promise; loadMeta(session_id: string): Promise; list(): Promise; @@ -44,6 +49,14 @@ export class InMemoryStore implements SessionStore { if (m) m.updated_at = Date.now(); } + async appendMany(session_id: string, entries: SessionEntry[]): Promise { + const list = this.entries.get(session_id); + if (!list) throw new SessionError('not_found', `session not found: ${session_id}`); + list.push(...entries); + const m = this.meta.get(session_id); + if (m) m.updated_at = Date.now(); + } + async loadEntries(session_id: string): Promise { return [...(this.entries.get(session_id) ?? [])]; } @@ -90,25 +103,34 @@ export class IiiStateSessionStore implements SessionStore { } } - async append(session_id: string, entry: SessionEntry): Promise { + /** Write one entry under its own key. Throws on storage failure. */ + private async writeEntry(session_id: string, key: string, entry: SessionEntry): Promise { try { - await this.state.set({ - scope: entriesScope(session_id), - key: entry.id, - value: entry, - }); + await this.state.set({ scope: entriesScope(session_id), key, value: entry }); } catch (e) { throw new SessionError('storage', `state::set entry: ${String(e)}`); } - // Best-effort meta refresh — failures here log only. + } + + /** Bump meta.updated_at; best-effort, failures log only. */ + private async bestEffortMetaRefresh(session_id: string): Promise { try { await this.refreshMetaUpdatedAt(session_id); } catch (e) { - logger.warn('meta updated_at refresh failed', { - session_id, - err: String(e), - }); + logger.warn('meta updated_at refresh failed', { session_id, err: String(e) }); + } + } + + async append(session_id: string, entry: SessionEntry): Promise { + await this.writeEntry(session_id, entry.id, entry); + await this.bestEffortMetaRefresh(session_id); + } + + async appendMany(session_id: string, entries: SessionEntry[]): Promise { + for (const entry of entries) { + await this.writeEntry(session_id, entry.id, entry); } + await this.bestEffortMetaRefresh(session_id); } async loadEntries(session_id: string): Promise { @@ -118,9 +140,6 @@ export class IiiStateSessionStore implements SessionStore { } catch (e) { throw new SessionError('storage', `state::list entries: ${String(e)}`); } - // PR #150: sort by (timestamp, id) so resumed approval replies that - // arrive after the session paused appear in correct transcript order - // even when their entry ids are non-monotonic. entries.sort((a, b) => { const t = entryTimestamp(a) - entryTimestamp(b); if (t !== 0) return t; @@ -151,24 +170,8 @@ export class IiiStateSessionStore implements SessionStore { } async updateEntry(session_id: string, entry_id: string, updated: SessionEntry): Promise { - try { - await this.state.set({ - scope: entriesScope(session_id), - key: entry_id, - value: updated, - }); - } catch (e) { - throw new SessionError('storage', `state::set updateEntry: ${String(e)}`); - } - // Best-effort meta refresh — failures here log only. - try { - await this.refreshMetaUpdatedAt(session_id); - } catch (e) { - logger.warn('meta updated_at refresh failed', { - session_id, - err: String(e), - }); - } + await this.writeEntry(session_id, entry_id, updated); + await this.bestEffortMetaRefresh(session_id); } private async refreshMetaUpdatedAt(session_id: string): Promise { diff --git a/harness/src/turn-orchestrator/assistant-streaming/ports.ts b/harness/src/turn-orchestrator/assistant-streaming/ports.ts index 81831e2a..faa92222 100644 --- a/harness/src/turn-orchestrator/assistant-streaming/ports.ts +++ b/harness/src/turn-orchestrator/assistant-streaming/ports.ts @@ -3,6 +3,7 @@ */ import { z } from 'zod'; +import type { Model } from '../../models-catalog/types.js'; import { logger } from '../../runtime/otel.js'; import type { ISdk } from '../../runtime/iii.js'; import type { AgentMessage, AssistantMessage } from '../../types/agent-message.js'; @@ -25,6 +26,10 @@ export type StreamContext = { messages: AgentMessage[]; /** Optional reasoning/thinking level from the run request. Absent = off. */ thinking_level?: string; + /** Turn's pre-resolved catalog entry, threaded to the provider. Absent = off. */ + model_meta?: Model; + /** Turn's stable id (run start time), so the provider dedupes credential resolution. */ + resolution_key?: number; }; export type StreamTurnOutcome = { @@ -36,7 +41,7 @@ export type StreamTurnOutcome = { export type AssistantRoute = | { kind: 'stopped'; reason: 'error' | 'aborted' } | { kind: 'function_execute' } - | { kind: 'steering_check' }; + | { kind: 'end_turn' }; export function parseFunctionSchemas(raw: unknown[]): AgentFunction[] { return z.array(AgentFunctionSchema).parse(raw) as AgentFunction[]; @@ -62,6 +67,7 @@ export type AssistantStreamingPorts = TurnStatePorts & { messages: AgentMessage[], provider: string, model: string, + model_meta?: Model, ): Promise<'ok' | 'compacted'>; streamTurn( ctx: StreamContext, @@ -90,8 +96,8 @@ export function createStreamingPorts(iii: ISdk): AssistantStreamingPorts { return { ...base, - async runPreflight(session_id, messages, provider, model) { - return runPreflight(iii, session_id, messages, provider, model); + async runPreflight(session_id, messages, provider, model, model_meta) { + return runPreflight(iii, session_id, messages, provider, model, model_meta); }, async streamTurn(ctx, onDelta) { @@ -106,6 +112,8 @@ export function createStreamingPorts(iii: ISdk): AssistantStreamingPorts { ctx.messages, ctx.tools, ctx.thinking_level, + ctx.model_meta, + ctx.resolution_key, ), onDelta, }); @@ -129,10 +137,6 @@ export function createStreamingPorts(iii: ISdk): AssistantStreamingPorts { }, async persistAssistantIfNew(session_id, asst, messages) { - // Dedup against the window already loaded in prepareStreamContext: nothing - // is persisted between that load and here within one invocation, and - // isDuplicateAssistant only inspects the trailing entry — so reusing it is - // identical to a fresh reload and saves a full session round-trip. if (isDuplicateAssistant(messages, asst)) { logger.warn('finalizeAssistant: skipping duplicate assistant push (re-entry detected)', { session_id, diff --git a/harness/src/turn-orchestrator/assistant-streaming/run.ts b/harness/src/turn-orchestrator/assistant-streaming/run.ts index da8b5185..69b37147 100644 --- a/harness/src/turn-orchestrator/assistant-streaming/run.ts +++ b/harness/src/turn-orchestrator/assistant-streaming/run.ts @@ -5,7 +5,7 @@ import type { AgentMessage, AssistantMessage } from '../../types/agent-message.js'; import { decide } from '../provider-router.js'; import { syntheticAssistant } from '../synthetic-assistant.js'; -import { emitTurnEndOnce } from '../state-runtime/turn-end.js'; +import { emitTurnEndOnce, transitionToFinishing } from '../state-runtime/turn-end.js'; import { enterFunctionExecute } from '../function-execute/run.js'; import { transitionTo, type AssistantStreamingTurnRecord } from '../state.js'; import { createDeltaCoalescer } from './coalesce-deltas.js'; @@ -34,9 +34,11 @@ export async function prepareStreamContext( const { provider, model, system_prompt, function_schemas, thinking_level } = request; const decision = decide({ provider, model }); const tools = parseFunctionSchemas(function_schemas); + const model_meta = rec.model_meta; if ( - (await ports.runPreflight(rec.session_id, messages, decision.provider, model)) === 'compacted' + (await ports.runPreflight(rec.session_id, messages, decision.provider, model, model_meta)) === + 'compacted' ) { messages = await ports.loadMessages(rec.session_id); } @@ -48,6 +50,8 @@ export async function prepareStreamContext( tools, messages, ...(thinking_level ? { thinking_level } : {}), + ...(model_meta ? { model_meta } : {}), + resolution_key: rec.started_at_ms, }; } @@ -58,9 +62,6 @@ export async function runStreamTurn( ): Promise { let body_streamed = false; - // Coalesce consecutive same-type provider deltas into one message_update to - // cut the per-token span/RPC explosion on the streaming path. Discrete - // events flush through 1:1; the final flush below guarantees the tail. const coalescer = createDeltaCoalescer((partial, event) => ports.emitMessageUpdate(session_id, partial, event), ); @@ -107,7 +108,7 @@ export function routeAssistantTurn(asst: AssistantMessage): AssistantRoute { if (hasFunctionCalls(asst)) { return { kind: 'function_execute' }; } - return { kind: 'steering_check' }; + return { kind: 'end_turn' }; } export async function finalizeAssistantTurn( @@ -122,7 +123,7 @@ export async function finalizeAssistantTurn( if (route.kind === 'stopped') { await emitTurnEndOnce(ports, rec, asst); - await ports.finishSession(rec); + transitionToFinishing(rec); return; } @@ -135,7 +136,8 @@ export async function finalizeAssistantTurn( return; } - transitionTo(rec, 'steering_check'); + await emitTurnEndOnce(ports, rec, asst); + transitionToFinishing(rec); } export async function runAssistantStreaming( diff --git a/harness/src/turn-orchestrator/finishing.ts b/harness/src/turn-orchestrator/finishing.ts new file mode 100644 index 00000000..83451460 --- /dev/null +++ b/harness/src/turn-orchestrator/finishing.ts @@ -0,0 +1,35 @@ +/** + * `turn::finishing` — the terminal step. + * + * A turn whose work is durably committed transitions to `finishing` (see + * {@link transitionToFinishing}). This step emits the single `agent_end` + * run-end signal and advances to `stopped`. Keeping the terminal signal in its + * own step means a crash BEFORE the work was saved re-runs the work without + * consumers ever having seen a premature run-end; a crash IN this step simply + * re-emits agent_end (idempotent-tolerated) on replay. + */ + +import type { ISdk } from '../runtime/iii.js'; +import { runTransition } from './run-transition.js'; +import { TurnStepPayloadSchema, type TurnStepPayload } from './schemas.js'; +import { createTurnStatePorts } from './state-runtime/ports.js'; +import type { TurnStateRecord } from './state.js'; + +export async function handleFinishing(iii: ISdk, rec: TurnStateRecord): Promise { + const ports = createTurnStatePorts(iii); + await ports.finishSession(rec); +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::finishing', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'finishing', (i, rec) => handleFinishing(i, rec), parsed); + }, + { + description: + 'Run one durable FSM transition for session in state finishing: emit agent_end and advance to stopped.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/run.ts b/harness/src/turn-orchestrator/function-awaiting-approval/run.ts index 9b15024e..0db73f92 100644 --- a/harness/src/turn-orchestrator/function-awaiting-approval/run.ts +++ b/harness/src/turn-orchestrator/function-awaiting-approval/run.ts @@ -25,7 +25,7 @@ export function denialResultFromDecision(decision: ApprovalDecision): FunctionRe decision: decision.decision, reason, }, - terminate: false, + terminate: decision.decision === 'aborted', }; } diff --git a/harness/src/turn-orchestrator/function-execute/ports.ts b/harness/src/turn-orchestrator/function-execute/ports.ts index 7b70b20a..8777e5be 100644 --- a/harness/src/turn-orchestrator/function-execute/ports.ts +++ b/harness/src/turn-orchestrator/function-execute/ports.ts @@ -7,8 +7,10 @@ import type { DispatchResult } from '../agent-trigger.js'; import { dispatchWithHook, triggerFunctionCall } from '../agent-trigger.js'; import { emit } from '../events.js'; import type { ISdk } from '../../runtime/iii.js'; +import type { AgentEvent } from '../../types/agent-event.js'; import type { FunctionCall, FunctionResult } from '../../types/function.js'; import { createTurnStatePorts, type TurnStatePorts } from '../state-runtime/ports.js'; +import { createTurnStore } from '../state-runtime/store.js'; import type { ExecutedCall } from './types.js'; const RoutingEnvelopeSchema = z @@ -49,8 +51,17 @@ export function withRoutingEnvelope(call: FunctionCall, session_id: string): Fun export type FunctionExecutePorts = TurnStatePorts & { emitStart(session_id: string, call: FunctionCall): Promise; emitEnd(session_id: string, executed: ExecutedCall): Promise; + /** Generic agent-event emit, for the inline max_turns end (message_complete). */ + emit(session_id: string, event: AgentEvent): Promise; dispatch(call: FunctionCall, session_id: string): Promise; triggerPreApproved(call: FunctionCall): Promise; + /** + * Function_call_ids already persisted in the trailing result run, for batch + * dedup on re-entry. Reads the raw message list (not the compaction-rebuilt + * window): the trailing results live in the preserved tail, so this drops the + * paired `session-tree::compactions` read that `loadMessages` would do. + */ + loadTrailingResultIds(session_id: string): Promise>; }; function buildFunctionExecutionEnd(executed: ExecutedCall) { @@ -65,11 +76,16 @@ function buildFunctionExecutionEnd(executed: ExecutedCall) { } export function createPorts(iii: ISdk): FunctionExecutePorts { - const base = createTurnStatePorts(iii); + const store = createTurnStore(iii); + const base = createTurnStatePorts(iii, store); return { ...base, + loadTrailingResultIds(session_id) { + return store.loadTrailingResultIds(session_id); + }, + async emitStart(session_id, call) { await emit(iii, session_id, { type: 'function_execution_start', @@ -83,10 +99,11 @@ export function createPorts(iii: ISdk): FunctionExecutePorts { await emit(iii, session_id, buildFunctionExecutionEnd(executed)); }, + emit(session_id, event) { + return emit(iii, session_id, event); + }, + async dispatch(call, session_id) { - // Hook/policy see the routing envelope; the target function gets the - // caller's untouched args so a payload `function_id` (e.g. for - // engine::functions::info) is never clobbered by the routing `function_id`. return dispatchWithHook(iii, withRoutingEnvelope(call, session_id), call); }, diff --git a/harness/src/turn-orchestrator/function-execute/run.ts b/harness/src/turn-orchestrator/function-execute/run.ts index 9ab75b54..1ae98f02 100644 --- a/harness/src/turn-orchestrator/function-execute/run.ts +++ b/harness/src/turn-orchestrator/function-execute/run.ts @@ -12,10 +12,14 @@ import { missingFunctionResult, unwrapAgentTrigger, } from '../agent-trigger.js'; -import { emitTurnEndOnce } from '../state-runtime/turn-end.js'; -import { persistedTrailingResultIds } from '../state-runtime/transcript.js'; import { - transitionTo, + emitTurnEndOnce, + endTurnForMaxTurns, + maxTurnsReached, + resumeToAssistantStreaming, + transitionToFinishing, +} from '../state-runtime/turn-end.js'; +import { type AwaitingApprovalEntry, type FunctionBatchTurnRecord, type TurnStateRecord, @@ -205,8 +209,7 @@ export async function finalizeBatch( function_results.push(toFunctionResultMessage(entry, result)); } - const messages = await ports.loadMessages(rec.session_id); - const alreadyPersisted = persistedTrailingResultIds(messages); + const alreadyPersisted = await ports.loadTrailingResultIds(rec.session_id); const fresh = function_results.filter((r) => !alreadyPersisted.has(r.function_call_id)); if (fresh.length < function_results.length) { logger.warn('finalizeBatch: skipped duplicate function_results (re-entry detected)', { @@ -224,9 +227,11 @@ export async function finalizeBatch( await emitTurnEndOnce(ports, rec, lastAssistant, function_results); if (allTerminate) { - await ports.finishSession(rec); + transitionToFinishing(rec); + } else if (maxTurnsReached(rec)) { + await endTurnForMaxTurns(ports, rec); } else { - transitionTo(rec, 'steering_check'); + resumeToAssistantStreaming(rec); } (rec as TurnStateRecord).work = undefined; diff --git a/harness/src/turn-orchestrator/iii.worker.yaml b/harness/src/turn-orchestrator/iii.worker.yaml index 4d9be949..1ec68b75 100644 --- a/harness/src/turn-orchestrator/iii.worker.yaml +++ b/harness/src/turn-orchestrator/iii.worker.yaml @@ -4,7 +4,7 @@ language: node deploy: binary manifest: package.json bin: iii-turn-orchestrator -description: Durable run::start state machine that drives each agent turn through provisioning, assistant, function-execute, and steering. +description: Durable run::start state machine that drives each agent turn through provisioning, assistant, and function-execute. runtime: kind: node diff --git a/harness/src/turn-orchestrator/main.ts b/harness/src/turn-orchestrator/main.ts index 9880794f..a27e780b 100644 --- a/harness/src/turn-orchestrator/main.ts +++ b/harness/src/turn-orchestrator/main.ts @@ -5,6 +5,6 @@ import { register } from './register.js'; await bootstrapWorker({ name: 'turn-orchestrator', description: - 'Durable run::start state machine driving each agent turn through provisioning, assistant, function-execute, and steering.', + 'Durable run::start state machine driving each agent turn through provisioning, assistant, and function-execute.', register: (iii, ctx) => register(iii, ctx), }); diff --git a/harness/src/turn-orchestrator/preflight.ts b/harness/src/turn-orchestrator/preflight.ts index 9efa3f30..aca8d96f 100644 --- a/harness/src/turn-orchestrator/preflight.ts +++ b/harness/src/turn-orchestrator/preflight.ts @@ -8,8 +8,9 @@ * the in-flight compaction releases the lease instead of failing the run. */ -import { fetchModelLimit } from '../context-compaction/model-resolver.js'; +import { fetchModelLimit, limitFromModel } from '../context-compaction/model-resolver.js'; import { usable as computeUsable } from '../context-compaction/overflow.js'; +import type { Model } from '../models-catalog/types.js'; import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import type { AgentMessage } from '../types/agent-message.js'; @@ -39,8 +40,11 @@ export async function runPreflight( messages: AgentMessage[], providerID: string, modelID: string, + modelMeta?: Model, ): Promise<'ok' | 'compacted'> { - const model = await fetchModelLimit(iii, providerID, modelID); + const model = modelMeta + ? { providerID, modelID, modelLimit: limitFromModel(modelMeta) } + : await fetchModelLimit(iii, providerID, modelID); if (!model || model.modelLimit.context === 0) { if (model && model.modelLimit.context === 0) { logger.debug('preflight: model has no context_window; skipping pre-flight check', { @@ -99,9 +103,6 @@ export async function runPreflight( throw new CompactionBusyError('compaction already in progress'); } if (res?.status !== 'ok') { - // 'empty' (nothing eligible to compact) or an unknown/drifted status: - // no compaction ran, so do NOT claim 'compacted' — the caller would - // reload messages and proceed as if the context shrank. logger.warn('preflight: compact_now returned non-ok status; proceeding without reload', { session_id, status: res?.status, diff --git a/harness/src/turn-orchestrator/provider-router.ts b/harness/src/turn-orchestrator/provider-router.ts index 2d3fcce6..efa71516 100644 --- a/harness/src/turn-orchestrator/provider-router.ts +++ b/harness/src/turn-orchestrator/provider-router.ts @@ -3,6 +3,7 @@ * orchestrator imports it directly; there is no `router::*` bus surface. */ +import type { Model } from '../models-catalog/types.js'; import type { AgentMessage } from '../types/agent-message.js'; import type { AgentFunction } from '../types/function.js'; import type { ProviderStreamInput, StreamChannelRef } from '../types/provider.js'; @@ -26,11 +27,6 @@ export function decide(req: RouteRequest): RouteDecision { if (p === 'kimi') return { provider: 'kimi', model: req.model }; if (p === 'lmstudio') return { provider: 'lmstudio', model: req.model }; if (p === 'llamacpp') return { provider: 'llamacpp', model: req.model }; - // Heuristic for missing provider: model name disambiguates. - // Local runtimes (LM Studio, llama.cpp) are intentionally excluded — - // their model IDs are user-controlled (e.g. `qwen/qwen3-4b-2507`, - // `Meta-Llama-3.1-8B`) and overlap with HF-style IDs from other - // services; require explicit provider='lmstudio' / 'llamacpp'. if (!p && /^gpt-|^o\d-/i.test(req.model)) { return { provider: 'openai', model: req.model }; } @@ -62,6 +58,8 @@ export function buildInput( messages: AgentMessage[], tools: AgentFunction[], thinking_level?: string, + model_meta?: Model, + resolution_key?: number, ): ProviderStreamInput { return { writer_ref, @@ -70,5 +68,7 @@ export function buildInput( messages: messages as unknown[], tools, ...(thinking_level ? { thinking_level } : {}), + ...(model_meta ? { model_meta } : {}), + ...(resolution_key !== undefined ? { resolution_key } : {}), }; } diff --git a/harness/src/turn-orchestrator/provisioning/ports.ts b/harness/src/turn-orchestrator/provisioning/ports.ts index 1dc14b0b..7e406f2e 100644 --- a/harness/src/turn-orchestrator/provisioning/ports.ts +++ b/harness/src/turn-orchestrator/provisioning/ports.ts @@ -2,13 +2,17 @@ * Typed dependency ports for provisioning. */ +import type { Model } from '../../models-catalog/types.js'; import type { ISdk } from '../../runtime/iii.js'; +import { getCatalogModel } from '../../runtime/output-tokens.js'; import type { RunRequest } from '../run-request.js'; import { createTurnStore } from '../state-runtime/store.js'; export type ProvisioningPorts = { loadRunRequest(session_id: string): Promise; saveRunRequest(session_id: string, request: RunRequest): Promise; + /** Resolve the full catalog entry for (provider, model); null on miss/error. */ + resolveModel(provider: string, model: string): Promise; }; export function createProvisioningPorts(iii: ISdk): ProvisioningPorts { @@ -22,5 +26,9 @@ export function createProvisioningPorts(iii: ISdk): ProvisioningPorts { saveRunRequest(session_id, request) { return store.saveRunRequest(session_id, request); }, + + resolveModel(provider, model) { + return getCatalogModel(iii, provider, model); + }, }; } diff --git a/harness/src/turn-orchestrator/provisioning/process.ts b/harness/src/turn-orchestrator/provisioning/process.ts index ce39f587..09fcdc7b 100644 --- a/harness/src/turn-orchestrator/provisioning/process.ts +++ b/harness/src/turn-orchestrator/provisioning/process.ts @@ -2,8 +2,10 @@ * Load run request, build the provisioned RunRequest, and register the FSM step. */ +import type { Model } from '../../models-catalog/types.js'; import type { ISdk } from '../../runtime/iii.js'; import { agentTriggerTool } from '../agent-trigger.js'; +import { decide } from '../provider-router.js'; import { runTransition } from '../run-transition.js'; import type { RunRequest } from '../run-request.js'; import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; @@ -14,6 +16,8 @@ import { createProvisioningPorts, type ProvisioningPorts } from './ports.js'; export type ProvisioningOutcome = { kind: 'ready'; runRequest: RunRequest; + /** Catalog entry for the turn's model; null when the catalog has no match. */ + model_meta: Model | null; }; export async function processProvisioning( @@ -30,6 +34,11 @@ export async function processProvisioning( model: request.model, }); + const decision = decide({ provider: request.provider, model: request.model }); + const model_meta = request.model + ? await ports.resolveModel(decision.provider, request.model) + : null; + return { kind: 'ready', runRequest: { @@ -37,6 +46,7 @@ export async function processProvisioning( system_prompt: prompt, function_schemas: [agentTriggerTool()], }, + model_meta, }; } @@ -46,6 +56,7 @@ export async function applyProvisioningOutcome( outcome: ProvisioningOutcome, ): Promise { await ports.saveRunRequest(rec.session_id, outcome.runRequest); + if (outcome.model_meta) rec.model_meta = outcome.model_meta; transitionTo(rec, 'assistant_streaming'); } diff --git a/harness/src/turn-orchestrator/register.ts b/harness/src/turn-orchestrator/register.ts index 683b915f..d6b096ae 100644 --- a/harness/src/turn-orchestrator/register.ts +++ b/harness/src/turn-orchestrator/register.ts @@ -1,18 +1,20 @@ import type { ISdk } from '../runtime/iii.js'; import { register as registerAssistantStreaming } from './assistant-streaming/process.js'; +import { register as registerFinishing } from './finishing.js'; import { register as registerFunctionAwaitingApproval } from './function-awaiting-approval/process.js'; import { register as registerFunctionExecute } from './function-execute/process.js'; import { register as registerGetState } from './get-state.js'; +import { register as registerRunAbort } from './run-abort.js'; import { register as registerRunStart } from './run-start.js'; import { register as registerProvisioning } from './provisioning/process.js'; -import { register as registerSteeringCheck } from './steering-check/process.js'; export async function register(iii: ISdk, _ctx: { configPath: string }): Promise { registerRunStart(iii); + registerRunAbort(iii); registerProvisioning(iii); registerAssistantStreaming(iii); registerFunctionExecute(iii); registerFunctionAwaitingApproval(iii); - registerSteeringCheck(iii); + registerFinishing(iii); registerGetState(iii); } diff --git a/harness/src/turn-orchestrator/run-abort.ts b/harness/src/turn-orchestrator/run-abort.ts new file mode 100644 index 00000000..a386b16d --- /dev/null +++ b/harness/src/turn-orchestrator/run-abort.ts @@ -0,0 +1,66 @@ +/** + * `run::abort` — user-initiated interrupt for a session's in-flight turn. + * + * The run::start busy guard makes an in-flight session reject new messages, so + * users need a deliberate way to end a turn: a runaway agent loop, an + * approval prompt they want to walk away from, or a turn they simply changed + * their mind about. Abort surfaces a synthetic `aborted` assistant, emits + * turn_end, clears any parked approvals from the record, and routes to the + * `finishing` step (agent_end + stopped). + * + * Best-effort against an actively-executing step: record writes are + * last-write-wins, so a step holding the record in memory can overwrite the + * abort when it saves — its NEXT wake then runs and the loop continues; the + * abort lands reliably in the gaps between steps (every round-trip has one), + * so a retry interrupts even a busy loop. Parked states + * (function_awaiting_approval) have no in-flight step and abort cleanly. + */ + +import type { ISdk } from '../runtime/iii.js'; +import { logger } from '../runtime/otel.js'; +import { emit } from './events.js'; +import { RunAbortPayloadSchema, type RunAbortPayload, type RunAbortResult } from './schemas.js'; +import { createTurnStatePorts } from './state-runtime/ports.js'; +import { createTurnStore } from './state-runtime/store.js'; +import { emitTurnEndOnce, transitionToFinishing } from './state-runtime/turn-end.js'; +import { isTurnInFlight, type TurnStateRecord } from './state.js'; +import { syntheticAssistant } from './synthetic-assistant.js'; + +export async function execute(iii: ISdk, payload: RunAbortPayload): Promise { + const { session_id } = payload; + const store = createTurnStore(iii); + const rec = await store.loadRecordStrict(session_id); + + if (!rec || !isTurnInFlight(rec)) { + return { session_id, aborted: false, state: rec?.state ?? null }; + } + if (rec.state === 'finishing') { + return { session_id, aborted: false, state: 'finishing' }; + } + + logger.info('run::abort: ending in-flight turn', { session_id, state: rec.state }); + + const ports = createTurnStatePorts(iii, store); + const msg = syntheticAssistant({ stop_reason: 'aborted', text: 'run aborted by user' }); + rec.last_assistant = msg; + rec.awaiting_approval = []; + (rec as TurnStateRecord).work = undefined; + + await emit(iii, session_id, { type: 'message_complete', message: msg, body_streamed: false }); + await emitTurnEndOnce(ports, rec, msg); + transitionToFinishing(rec); + await store.saveRecord(rec); + + return { session_id, aborted: true, state: 'finishing' }; +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'run::abort', + async (payload: RunAbortPayload) => execute(iii, RunAbortPayloadSchema.parse(payload)), + { + description: + "Abort the session's in-flight turn: emit an aborted assistant + turn_end, clear parked approvals, and route to finishing. Best-effort while a step is mid-execution (retry lands between steps); no-op when no turn is running.", + }, + ); +} diff --git a/harness/src/turn-orchestrator/run-start.ts b/harness/src/turn-orchestrator/run-start.ts index 1746eb98..460aab6c 100644 --- a/harness/src/turn-orchestrator/run-start.ts +++ b/harness/src/turn-orchestrator/run-start.ts @@ -5,22 +5,50 @@ * `HarnessTriggerInputSchema` parse); console/web sends * `{ session_id, message_id?, provider, model, mode?, messages }` and omits * `system_prompt`, `max_turns` (schema defaults). - * **Outgoing**: `{ session_id }` — persists run config, messages, and seeds - * `turn_state` to provisioning via `saveRecord`. + * **Outgoing**: `{ session_id, started }` — persists run config, messages, and + * seeds `turn_state` to provisioning via `saveRecord`. `started` is false when + * a turn was already in flight and this call was ignored. */ import type { ISdk } from '../runtime/iii.js'; +import { logger } from '../runtime/otel.js'; import { RunStartPayloadSchema, type RunStartPayload, type RunStartResult } from './schemas.js'; import { createTurnStore } from './state-runtime/store.js'; -import { newRecord } from './state.js'; +import { isTurnInFlight, newRecord } from './state.js'; + +/** + * Idle window after which an in-flight (non-parked) record is treated as + * abandoned and a new run::start may take it over. `updated_at_ms` bumps on + * every state transition, so a live turn refreshes it each round-trip; a + * single very long provider stream is the one legitimate gap, hence the + * generous window. `function_awaiting_approval` is exempt — it parks on the + * user indefinitely by design; `run::abort` is its escape, not staleness. + */ +export const STALE_TURN_TAKEOVER_MS = 30 * 60 * 1000; export async function execute(iii: ISdk, payload: RunStartPayload): Promise { const store = createTurnStore(iii); const { session_id, messages, max_turns, message_id: _message_id, ...run } = payload; - // Single ensure for the whole run: this is the gateway that begins the turn - // loop, so every later loadMessages/appendMessages is guaranteed a live record - // without re-ensuring per call. Must precede the first tree write below. + const existing = await store.loadRecordStrict(session_id); + if (existing && isTurnInFlight(existing)) { + const idleMs = Date.now() - existing.updated_at_ms; + const takeOver = + existing.state !== 'function_awaiting_approval' && idleMs > STALE_TURN_TAKEOVER_MS; + if (!takeOver) { + logger.warn('run::start ignored: session already has a turn in flight', { + session_id, + state: existing.state, + }); + return { session_id, started: false, reason: 'session_busy' }; + } + logger.error('run::start taking over a stale in-flight turn', { + session_id, + state: existing.state, + idle_ms: idleMs, + }); + } + await store.ensureSession(session_id); await store.saveRunRequest(session_id, { @@ -32,7 +60,7 @@ export async function execute(iii: ISdk, payload: RunStartPayload): Promise; -export type RunStartResult = { session_id: string }; +/** + * `started` is false when the session already had a turn in flight and this + * call was ignored (no record reset, no message appended) — see run-start.ts. + */ +export type RunStartResult = { + session_id: string; + started: boolean; + reason?: 'session_busy'; +}; + +export const RunAbortPayloadSchema = SessionIdPayloadSchema; +export type RunAbortPayload = z.infer; +/** `aborted` is false when no turn was running (or it was already finishing). */ +export type RunAbortResult = { + session_id: string; + aborted: boolean; + state: TurnState | null; +}; -// --- turn::{state} durable step --- export const TurnStepPayloadSchema = SessionIdPayloadSchema; export type TurnStepPayload = z.infer; export type TurnStepResult = | { ok: true; from_state: TurnState; to_state: TurnState } | { ok: true; skipped: true; reason: 'stale' }; -// --- function_execute / function_awaiting_approval persisted record --- const AwaitingApprovalEntrySchema = z.object({ function_call_id: z.string().min(1), function_id: z.string().min(1), @@ -95,7 +108,6 @@ export function parseFunctionBatchRecord(rec: TurnStateRecord): FunctionBatchTur `invalid function batch turn record: ${formatZodIssues(result.error)}`, ); } - // Return the same object — handlers mutate turn_state in place before saveRecord. return rec as FunctionBatchTurnRecord; } @@ -123,31 +135,6 @@ export function parseAssistantStreamingRecord(rec: TurnStateRecord): AssistantSt return rec as AssistantStreamingTurnRecord; } -/** Fields required before steering_check handlers run. */ -export const SteeringCheckTurnRecordSchema = z - .object({ - session_id: z.string().min(1), - state: z.literal('steering_check'), - turn_count: z.number(), - function_results: z.array(z.unknown()), - turn_end_emitted: z.boolean(), - started_at_ms: z.number(), - updated_at_ms: z.number(), - }) - .passthrough(); - -/** Validate persisted turn_state for steering_check; throws {@link TurnStateInvariantError}. */ -export function parseSteeringCheckRecord(rec: TurnStateRecord): SteeringCheckTurnRecord { - const result = SteeringCheckTurnRecordSchema.safeParse(rec); - if (!result.success) { - throw new TurnStateInvariantError( - `invalid steering_check turn record: ${formatZodIssues(result.error)}`, - ); - } - return rec as SteeringCheckTurnRecord; -} - -// --- turn::get_state --- export const GetStatePayloadSchema = SessionIdPayloadSchema; export type GetStatePayload = z.infer; @@ -175,7 +162,6 @@ export function toView(rec: TurnStateRecord): TurnStateView { export type GetStateResult = TurnStateView | null; -// --- turn::on_approval (approvals-scope state event) --- const ApprovalDecisionWriteEventSchema = z.object({ type: z.literal('state').optional(), scope: z.literal('approvals').optional(), diff --git a/harness/src/turn-orchestrator/state-runtime/context-view.ts b/harness/src/turn-orchestrator/state-runtime/context-view.ts index dd50012d..8247efce 100644 --- a/harness/src/turn-orchestrator/state-runtime/context-view.ts +++ b/harness/src/turn-orchestrator/state-runtime/context-view.ts @@ -40,24 +40,39 @@ export function buildContextView( ]; } -type MessagesResponse = { - messages?: Array<{ entry_id: string; message: AgentMessage }>; -}; - type CompactionsResponse = { entries?: CompactionEntryRow[]; }; +export type SessionMessageRow = { entry_id: string; message: AgentMessage }; + +/** + * Raw active-path messages (entry_id + message), oldest first. The single + * `session-tree::messages` reader — shared by {@link loadContextView} (which + * also folds in compactions) and the orchestrator's trailing-result dedup — so + * the fetch shape and timeout live in one place. + */ +export async function loadSessionMessages( + iii: ISdk, + session_id: string, +): Promise { + const resp = await iii.trigger({ + function_id: 'session-tree::messages', + payload: { session_id }, + timeoutMs: 30_000, + }); + return resp?.messages ?? []; +} + export async function loadContextView(iii: ISdk, session_id: string): Promise { - const [messagesResp, compactionsResp] = await Promise.all([ - iii.trigger({ function_id: 'session-tree::messages', payload: { session_id } }), + const [messages, compactionsResp] = await Promise.all([ + loadSessionMessages(iii, session_id), iii.trigger({ function_id: 'session-tree::compactions', payload: { session_id } }), ]); - const messagesPayload = messagesResp as MessagesResponse | null; const compactionsPayload = compactionsResp as CompactionsResponse | null; - const entries: MessageWithEntryId[] = (messagesPayload?.messages ?? []).map((e) => ({ + const entries: MessageWithEntryId[] = messages.map((e) => ({ entry_id: e.entry_id, message: e.message, })); diff --git a/harness/src/turn-orchestrator/state-runtime/ports.ts b/harness/src/turn-orchestrator/state-runtime/ports.ts index c7c117ae..603925de 100644 --- a/harness/src/turn-orchestrator/state-runtime/ports.ts +++ b/harness/src/turn-orchestrator/state-runtime/ports.ts @@ -5,6 +5,7 @@ import { emit } from '../events.js'; import type { RunRequest } from '../run-request.js'; import type { ISdk } from '../../runtime/iii.js'; +import type { ModelContextLimit } from '../../types/agent-event.js'; import type { AgentMessage, FunctionResultMessage } from '../../types/agent-message.js'; import { transitionTo, type TurnStateRecord } from '../state.js'; import { createTurnStore, type TurnStore } from './store.js'; @@ -19,6 +20,7 @@ export type TurnStatePorts = { session_id: string, message: AgentMessage, function_results: FunctionResultMessage[], + model_limit?: ModelContextLimit, ): Promise; finishSession(rec: TurnStateRecord): Promise; }; @@ -47,15 +49,16 @@ export function createTurnStatePorts(iii: ISdk, store?: TurnStore): TurnStatePor return s.saveRunRequest(session_id, request); }, - async emitTurnEnd(session_id, message, function_results) { - await emit(iii, session_id, { type: 'turn_end', message, function_results }); + async emitTurnEnd(session_id, message, function_results, model_limit) { + await emit(iii, session_id, { + type: 'turn_end', + message, + function_results, + ...(model_limit ? { model_limit } : {}), + }); }, async finishSession(rec) { - // agent_end is a turn-end SIGNAL only. The transcript reaches the UI - // incrementally via message_update/message_complete and is re-read from - // session-tree on reload, so no consumer reads agent_end.messages. Emit it - // empty instead of reloading the whole session to fill an unused field. await emit(iii, rec.session_id, { type: 'agent_end', messages: [] }); transitionTo(rec, 'stopped'); }, diff --git a/harness/src/turn-orchestrator/state-runtime/store.ts b/harness/src/turn-orchestrator/state-runtime/store.ts index c135249d..9caa640a 100644 --- a/harness/src/turn-orchestrator/state-runtime/store.ts +++ b/harness/src/turn-orchestrator/state-runtime/store.ts @@ -4,7 +4,7 @@ */ import { TriggerAction, type ISdk } from '../../runtime/iii.js'; -import { stateGet, stateSet } from '../../runtime/state.js'; +import { createState, stateGet, stateSet } from '../../runtime/state.js'; import { logger } from '../../runtime/otel.js'; import type { AgentMessage } from '../../types/agent-message.js'; import { RUN_REQUEST_SCOPE, TURN_STATE_SCOPE } from '../state.js'; @@ -12,7 +12,8 @@ import { emit } from '../events.js'; import { type RunRequest, parseRunRequest } from '../run-request.js'; import { toView, type TurnStateView } from '../schemas.js'; import { type TurnState, type TurnStateRecord, parseTurnStateRecord } from '../state.js'; -import { loadContextView } from './context-view.js'; +import { loadContextView, loadSessionMessages } from './context-view.js'; +import { persistedTrailingResultIds } from './transcript.js'; /** * Turn-step wakes go to the engine's `default` queue. NOTE: engine.config.yaml @@ -32,24 +33,61 @@ export function shouldWakeStep(previousState: TurnState | null, newState: TurnSt return true; } +const WAKE_ENQUEUE_ATTEMPTS = 3; +const WAKE_ENQUEUE_BACKOFF_MS = 250; + +/** + * Enqueue the next FSM step. Retried because a lost wake strands the record in + * a non-terminal state with nothing scheduled to advance it (and run::start + * then reports the session busy until its stale-takeover window). On final + * failure we log at error level and rely on that takeover as the recovery + * valve — re-enqueueing from a later save is suppressed by shouldWakeStep. + */ async function enqueueTurnStep(iii: ISdk, session_id: string, state: TurnState): Promise { - try { - await iii.trigger({ - function_id: `turn::${state}`, - payload: { session_id }, - action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), - }); - } catch (err) { - logger.warn('wakeStep failed', { session_id, state, err: String(err) }); + for (let attempt = 1; attempt <= WAKE_ENQUEUE_ATTEMPTS; attempt++) { + try { + await iii.trigger({ + function_id: `turn::${state}`, + payload: { session_id }, + action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), + }); + return; + } catch (err) { + if (attempt === WAKE_ENQUEUE_ATTEMPTS) { + logger.error('wakeStep failed after retries; turn is stranded until stale takeover', { + session_id, + state, + attempts: attempt, + err: String(err), + }); + return; + } + await new Promise((resolve) => setTimeout(resolve, WAKE_ENQUEUE_BACKOFF_MS * attempt)); + } } } export type TurnStore = { loadRecord(session_id: string): Promise; + /** + * Like {@link loadRecord} but THROWS on a state-read failure instead of + * tolerantly returning null. For guards that must fail closed: run::start's + * in-flight check would clobber a live turn if a transient read error were + * indistinguishable from "no record". + */ + loadRecordStrict(session_id: string): Promise; saveRecord(rec: TurnStateRecord, previous?: TurnStateRecord | null): Promise; writeRecord(rec: TurnStateRecord): Promise; ensureSession(session_id: string): Promise; loadMessages(session_id: string): Promise; + /** + * Function_call_ids in the trailing `function_result` run, for batch dedup. + * Reads the raw message list on the default leaf (not the compaction-rebuilt + * window {@link loadMessages} returns), so it skips the paired + * `session-tree::compactions` read — the trailing results live in the tail + * either way. + */ + loadTrailingResultIds(session_id: string): Promise>; appendMessages(session_id: string, msgs: AgentMessage[]): Promise; loadRunRequest(session_id: string): Promise; saveRunRequest(session_id: string, request: RunRequest): Promise; @@ -123,11 +161,18 @@ async function persistRecord( } export function createTurnStore(iii: ISdk): TurnStore { + const strictState = createState(iii, { tolerant: false }); + return { async loadRecord(session_id) { return parseTurnStateRecord(await scopedGet(iii, TURN_STATE_SCOPE, session_id)); }, + async loadRecordStrict(session_id) { + const raw = await strictState.get({ scope: TURN_STATE_SCOPE, key: session_id }); + return parseTurnStateRecord(raw); + }, + async writeRecord(rec) { await scopedSet(iii, TURN_STATE_SCOPE, rec.session_id, rec); }, @@ -147,14 +192,18 @@ export function createTurnStore(iii: ISdk): TurnStore { return loadContextView(iii, session_id); }, + async loadTrailingResultIds(session_id) { + const rows = await loadSessionMessages(iii, session_id); + return persistedTrailingResultIds(rows.map((e) => e.message)); + }, + async appendMessages(session_id, msgs) { - for (const message of msgs) { - await iii.trigger({ - function_id: 'session-tree::append', - payload: { session_id, message, parent_id: null }, - timeoutMs: 10_000, - }); - } + if (msgs.length === 0) return; + await iii.trigger({ + function_id: 'session-tree::append_batch', + payload: { session_id, messages: msgs, parent_id: null }, + timeoutMs: 10_000, + }); }, async saveRunRequest(session_id, request) { diff --git a/harness/src/turn-orchestrator/state-runtime/transcript.ts b/harness/src/turn-orchestrator/state-runtime/transcript.ts index bd6b6e1b..fda45620 100644 --- a/harness/src/turn-orchestrator/state-runtime/transcript.ts +++ b/harness/src/turn-orchestrator/state-runtime/transcript.ts @@ -4,18 +4,32 @@ import type { AgentMessage, AssistantMessage } from '../../types/agent-message.js'; +function assistantHasFunctionCalls(m: AssistantMessage): boolean { + return m.content.some((b) => b.type === 'function_call'); +} + /** * Function_call_ids already persisted for the current turn. Results are appended * right after the assistant that requested them, so they form the trailing run - * of `function_result` messages; the first non-result from the tail is the turn - * boundary. + * of `function_result` messages; the requesting assistant (which carries the + * function calls) is the turn boundary. + * + * A trailing assistant with NO function calls is skipped, not treated as the + * boundary: on a max_turns crash-replay the synthetic "loop stopped" notice is + * appended after the results in the same step, and the dedup must still see the + * results behind it as persisted — otherwise the whole batch re-appends. */ export function persistedTrailingResultIds(messages: AgentMessage[]): Set { const ids = new Set(); for (let i = messages.length - 1; i >= 0; i--) { const m = messages[i]; - if (m?.role === 'function_result') ids.add(m.function_call_id); - else break; + if (!m) break; + if (m.role === 'function_result') { + ids.add(m.function_call_id); + continue; + } + if (m.role === 'assistant' && !assistantHasFunctionCalls(m)) continue; + break; } return ids; } diff --git a/harness/src/turn-orchestrator/state-runtime/turn-end.ts b/harness/src/turn-orchestrator/state-runtime/turn-end.ts index d2be3af2..88e4fb2f 100644 --- a/harness/src/turn-orchestrator/state-runtime/turn-end.ts +++ b/harness/src/turn-orchestrator/state-runtime/turn-end.ts @@ -2,11 +2,15 @@ * Shared turn-end and FSM resume helpers for step outcome application. */ +import { limitFromModel } from '../../context-compaction/model-resolver.js'; +import type { AgentEvent, ModelContextLimit } from '../../types/agent-event.js'; import { emptyAssistant, + type AgentMessage, type AssistantMessage, type FunctionResultMessage, } from '../../types/agent-message.js'; +import { syntheticAssistant } from '../synthetic-assistant.js'; import { transitionTo, type TurnStateRecord } from '../state.js'; export type TurnEndEmitter = { @@ -14,6 +18,7 @@ export type TurnEndEmitter = { session_id: string, message: AssistantMessage, function_results: FunctionResultMessage[], + model_limit?: ModelContextLimit, ): Promise; }; @@ -25,7 +30,8 @@ export async function emitTurnEndOnce( ): Promise { if (rec.turn_end_emitted) return; const last = message ?? rec.last_assistant ?? emptyAssistant(); - await ports.emitTurnEnd(rec.session_id, last, function_results); + const model_limit = rec.model_meta ? limitFromModel(rec.model_meta) : undefined; + await ports.emitTurnEnd(rec.session_id, last, function_results, model_limit); rec.turn_end_emitted = true; } @@ -33,3 +39,48 @@ export function resumeToAssistantStreaming(rec: TurnStateRecord): void { rec.function_results = []; transitionTo(rec, 'assistant_streaming'); } + +/** + * Route a terminating turn to the `finishing` step instead of emitting agent_end + * inline. The turn's durable work (assistant/results/notice + the turn_end + * stream event) is already committed by the enclosing step's saveRecord; the + * finishing step then emits agent_end and advances to `stopped` from a clean + * replayable boundary, so a crash before the save re-runs the work WITHOUT + * consumers having seen a premature run-end. + */ +export function transitionToFinishing(rec: TurnStateRecord): void { + transitionTo(rec, 'finishing'); +} + +export function maxTurnsReached(rec: TurnStateRecord): boolean { + return rec.max_turns !== undefined && rec.turn_count >= rec.max_turns; +} + +export type MaxTurnsEndPorts = TurnEndEmitter & { + appendMessages(session_id: string, msgs: AgentMessage[]): Promise; + emit(session_id: string, event: AgentEvent): Promise; +}; + +/** + * End the loop at the max_turns cap: persist + surface a synthetic assistant + * notice, emit turn_end (no-op when the step already emitted it), then route to + * the `finishing` step to emit agent_end after the durable commit. + */ +export async function endTurnForMaxTurns( + ports: MaxTurnsEndPorts, + rec: TurnStateRecord, +): Promise { + const msg = syntheticAssistant({ + stop_reason: 'end', + text: `loop stopped: max_turns (${rec.max_turns ?? 0}) reached`, + }); + rec.last_assistant = msg; + await ports.appendMessages(rec.session_id, [msg]); + await ports.emit(rec.session_id, { + type: 'message_complete', + message: msg, + body_streamed: false, + }); + await emitTurnEndOnce(ports, rec, msg); + transitionToFinishing(rec); +} diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index 32785f87..5b0435f8 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -7,6 +7,7 @@ */ import { z } from 'zod'; +import type { Model } from '../models-catalog/types.js'; import type { AssistantMessage, FunctionResultMessage } from '../types/agent-message.js'; import type { ExecutedCall, FunctionBatchWork, PreparedCall } from './function-execute/types.js'; @@ -19,7 +20,7 @@ export type TurnState = | 'assistant_streaming' | 'function_execute' | 'function_awaiting_approval' - | 'steering_check' + | 'finishing' | 'stopped' | 'failed'; @@ -47,6 +48,15 @@ type TurnStateRecordCore = { updated_at_ms: number; /** Set during assistant_streaming when message_update deltas were emitted. */ assistant_body_streamed?: boolean; + /** + * Full catalog entry for this turn's model, resolved once at `provisioning` + * and read by preflight, provider streaming, and turn-end compaction instead + * of each re-fetching `models::get`. Optional: absent on records persisted + * before this field existed (and on a cold catalog) — every reader falls back + * to a live fetch. The model is fixed for a turn, so no invalidation is + * needed. Excluded from {@link toView}, so it never bloats turn_state events. + */ + model_meta?: Model; error?: { kind: string; message: string }; }; @@ -66,23 +76,11 @@ export type AssistantStreamingTurnRecord = TurnStateRecordCore & { awaiting_approval?: AwaitingApprovalEntry[]; }; -/** Persisted shape while in steering_check (work cleared on entry from function batch). */ -export type SteeringCheckTurnRecord = TurnStateRecordCore & { - state: 'steering_check'; - last_assistant?: AssistantMessage | null; - work?: TurnWork; - awaiting_approval?: AwaitingApprovalEntry[]; -}; - -type OtherTurnState = Exclude< - TurnState, - FunctionBatchState | 'assistant_streaming' | 'steering_check' ->; +type OtherTurnState = Exclude; export type TurnStateRecord = | FunctionBatchTurnRecord | AssistantStreamingTurnRecord - | SteeringCheckTurnRecord | (TurnStateRecordCore & { state: OtherTurnState; last_assistant?: AssistantMessage | null; @@ -95,7 +93,7 @@ const TURN_STATES = [ 'assistant_streaming', 'function_execute', 'function_awaiting_approval', - 'steering_check', + 'finishing', 'stopped', 'failed', ] as const satisfies readonly TurnState[]; @@ -137,3 +135,10 @@ export function transitionTo(rec: TurnStateRecord, next: TurnState): void { rec.state = next; rec.updated_at_ms = Date.now(); } + +/** A turn is done once it reaches a terminal state; otherwise it is in flight. */ +const TERMINAL_TURN_STATES = new Set(['stopped', 'failed']); + +export function isTurnInFlight(rec: TurnStateRecord): boolean { + return !TERMINAL_TURN_STATES.has(rec.state); +} diff --git a/harness/src/turn-orchestrator/steering-check/ports.ts b/harness/src/turn-orchestrator/steering-check/ports.ts deleted file mode 100644 index fe6f4279..00000000 --- a/harness/src/turn-orchestrator/steering-check/ports.ts +++ /dev/null @@ -1,24 +0,0 @@ -/** - * Typed dependency ports for steering_check. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import type { AgentEvent } from '../../types/agent-event.js'; -import { emit } from '../events.js'; -import { createTurnStatePorts, type TurnStatePorts } from '../state-runtime/ports.js'; - -export type SteeringCheckPorts = TurnStatePorts & { - emit(session_id: string, event: AgentEvent): Promise; -}; - -export function createSteeringCheckPorts(iii: ISdk): SteeringCheckPorts { - const base = createTurnStatePorts(iii); - - return { - ...base, - - emit(session_id, event) { - return emit(iii, session_id, event); - }, - }; -} diff --git a/harness/src/turn-orchestrator/steering-check/process.ts b/harness/src/turn-orchestrator/steering-check/process.ts deleted file mode 100644 index 83c1d133..00000000 --- a/harness/src/turn-orchestrator/steering-check/process.ts +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Route steering_check outcomes, apply transitions, and register the FSM step. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import { runTransition } from '../run-transition.js'; -import { - TurnStepPayloadSchema, - parseSteeringCheckRecord, - type TurnStepPayload, -} from '../schemas.js'; -import type { TurnStateRecord } from '../state.js'; -import { createSteeringCheckPorts } from './ports.js'; -import { runSteeringCheck } from './run.js'; - -export async function handleSteering(iii: ISdk, rec: TurnStateRecord): Promise { - const steering = parseSteeringCheckRecord(rec); - const ports = createSteeringCheckPorts(iii); - await runSteeringCheck(ports, steering); -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::steering_check', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition(iii, 'steering_check', handleSteering, parsed); - }, - { - description: - 'Run one durable FSM transition for session in state steering_check: continue after tool results or end the turn.', - }, - ); -} diff --git a/harness/src/turn-orchestrator/steering-check/run.ts b/harness/src/turn-orchestrator/steering-check/run.ts deleted file mode 100644 index 8763ec12..00000000 --- a/harness/src/turn-orchestrator/steering-check/run.ts +++ /dev/null @@ -1,87 +0,0 @@ -/** - * Route steering_check outcomes and apply transitions. - */ - -import { syntheticAssistant } from '../synthetic-assistant.js'; -import { emitTurnEndOnce, resumeToAssistantStreaming } from '../state-runtime/turn-end.js'; -import type { SteeringCheckTurnRecord } from '../state.js'; -import type { SteeringCheckPorts } from './ports.js'; - -export type SteeringRoute = 'continue_after_function' | 'end_turn'; - -export type SteeringCheckOutcome = - | { kind: 'max_turns_reached' } - | { kind: 'continue_after_function' } - | { kind: 'end_turn' }; - -export function route(has_function_results: boolean): SteeringRoute { - return has_function_results ? 'continue_after_function' : 'end_turn'; -} - -function maxTurnsReached(rec: SteeringCheckTurnRecord): boolean { - return rec.max_turns !== undefined && rec.turn_count >= rec.max_turns; -} - -async function endForMaxTurns( - ports: SteeringCheckPorts, - rec: SteeringCheckTurnRecord, -): Promise { - const msg = syntheticAssistant({ - stop_reason: 'end', - text: `loop stopped: max_turns (${rec.max_turns ?? 0}) reached`, - }); - rec.last_assistant = msg; - await ports.appendMessages(rec.session_id, [msg]); - await ports.emit(rec.session_id, { - type: 'message_complete', - message: msg, - body_streamed: false, - }); - await emitTurnEndOnce(ports, rec, msg); - await ports.finishSession(rec); -} - -export async function processSteeringCheck( - _ports: SteeringCheckPorts, - rec: SteeringCheckTurnRecord, -): Promise { - const decision = route(rec.function_results.length > 0); - - if (decision === 'continue_after_function' && maxTurnsReached(rec)) { - return { kind: 'max_turns_reached' }; - } - - switch (decision) { - case 'continue_after_function': - return { kind: 'continue_after_function' }; - case 'end_turn': - return { kind: 'end_turn' }; - } -} - -export async function applySteeringCheckOutcome( - ports: SteeringCheckPorts, - rec: SteeringCheckTurnRecord, - outcome: SteeringCheckOutcome, -): Promise { - switch (outcome.kind) { - case 'max_turns_reached': - await endForMaxTurns(ports, rec); - return; - case 'continue_after_function': - resumeToAssistantStreaming(rec); - return; - case 'end_turn': - await emitTurnEndOnce(ports, rec); - await ports.finishSession(rec); - return; - } -} - -export async function runSteeringCheck( - ports: SteeringCheckPorts, - rec: SteeringCheckTurnRecord, -): Promise { - const outcome = await processSteeringCheck(ports, rec); - await applySteeringCheckOutcome(ports, rec, outcome); -} diff --git a/harness/src/types/agent-event.ts b/harness/src/types/agent-event.ts index 32705310..b69e8095 100644 --- a/harness/src/types/agent-event.ts +++ b/harness/src/types/agent-event.ts @@ -10,12 +10,22 @@ import type { AgentMessage, FunctionResultMessage } from './agent-message.js'; import type { FunctionResult } from './function.js'; import type { AssistantMessageEvent } from './stream-event.js'; +/** A model's context limits, in tokens. Produced once per turn by the orchestrator. */ +export type ModelContextLimit = { context: number; input: number; output: number }; + export type AgentEvent = | { type: 'agent_end'; messages: AgentMessage[] } | { type: 'turn_end'; message: AgentMessage; function_results: FunctionResultMessage[]; + /** + * Turn's model context limit, threaded from the orchestrator so the + * compaction worker reads it inline instead of re-fetching `models::get`. + * Optional: absent only when the catalog had no entry to resolve at turn + * start, where compaction falls back to resolving the model itself. + */ + model_limit?: ModelContextLimit; } | { type: 'message_update'; diff --git a/harness/src/types/provider.ts b/harness/src/types/provider.ts index b11535df..d4f362e2 100644 --- a/harness/src/types/provider.ts +++ b/harness/src/types/provider.ts @@ -1,11 +1,11 @@ /** - * Provider streaming contract. Mirrors - * `harness/crates/harness-types/src/provider.rs` and is consumed by both - * the orchestrator (caller) and provider workers (handler). + * Provider streaming contract, consumed by both the orchestrator (caller) and + * provider workers (handler). */ import { z } from 'zod'; import { zodToJsonSchema } from 'zod-to-json-schema'; +import type { Model } from '../models-catalog/types.js'; /** * Lax `StreamChannelRef` — the SDK's canonical type lives in `iii-sdk`, @@ -44,6 +44,24 @@ export const ProviderStreamInputSchema = z.object({ * `reasoning_effort`); others ignore it. Absent = off. */ thinking_level: z.string().optional(), + /** + * Optional pre-resolved catalog entry for `model`, threaded from the + * orchestrator so the provider does not re-fetch `models::get` it already + * resolved at turn start. An optimization, never a source of truth: validated + * leniently (any object passes; anything else coerces to absent) so a sparse + * or partial catalog entry falls back to a live fetch instead of failing the + * whole stream. Providers read its fields defensively. + */ + model_meta: z + .unknown() + .optional() + .transform((v) => (v && typeof v === 'object' ? (v as Model) : undefined)), + /** + * Optional stable id for the turn (the run's start time). Providers may use + * it to dedupe per-stream credential resolution within a turn; a new turn + * carries a new key. Purely an optimization key — never a source of truth. + */ + resolution_key: z.number().optional(), }); export type ProviderStreamInput = z.infer; diff --git a/harness/tests/approval-gate/schemas.test.ts b/harness/tests/approval-gate/schemas.test.ts index 1ba24949..3c21d667 100644 --- a/harness/tests/approval-gate/schemas.test.ts +++ b/harness/tests/approval-gate/schemas.test.ts @@ -26,6 +26,15 @@ describe('ResolvePayloadSchema — id normalization & validation', () => { expect(parsed.function_call_id).toBe('fc-1'); }); + it('accepts the aborted decision on the wire (user cancel of a parked call)', () => { + const parsed = ResolvePayloadSchema.parse({ + session_id: 's', + function_call_id: 'fc-1', + decision: 'aborted', + }); + expect(parsed.decision).toBe('aborted'); + }); + it.each([ ['function_call_id missing', { session_id: 's', decision: 'allow' }], ['tool_call_id only (legacy)', { session_id: 's', tool_call_id: 'legacy', decision: 'allow' }], diff --git a/harness/tests/context-compaction/handler-async.test.ts b/harness/tests/context-compaction/handler-async.test.ts index ef7c9c08..c65751f5 100644 --- a/harness/tests/context-compaction/handler-async.test.ts +++ b/harness/tests/context-compaction/handler-async.test.ts @@ -1,5 +1,10 @@ -import { describe, expect, it } from 'vitest'; -import { extractEventPayload, turnEndUsage } from '../../src/context-compaction/handler-async.js'; +import { describe, expect, it, vi } from 'vitest'; +import { + extractEventPayload, + resolveModelFromEvent, + turnEndUsage, +} from '../../src/context-compaction/handler-async.js'; +import type { ISdk } from '../../src/runtime/iii.js'; describe('extractEventPayload', () => { it('handles camelCase envelope (event.data shape)', () => { @@ -54,3 +59,46 @@ describe('turnEndUsage', () => { expect(turnEndUsage({ type: 'TurnEnd' })).toBeNull(); }); }); + +describe('resolveModelFromEvent', () => { + function trackingIii(): { iii: ISdk; calls: string[] } { + const calls: string[] = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string }) => { + calls.push(req.function_id); + return { context_window: 200_000, max_output_tokens: 8_096 }; + }), + } as unknown as ISdk; + return { iii, calls }; + } + + it('uses the inline limit and skips models::get', async () => { + const { iii, calls } = trackingIii(); + const event = { + type: 'turn_end', + message: { provider: 'anthropic', model: 'claude-sonnet-4-6' }, + model_limit: { context: 1_000_000, input: 1_000_000, output: 64_000 }, + }; + + const resolved = await resolveModelFromEvent(iii, 'sess-1', event); + + expect(calls).not.toContain('models::get'); + expect(resolved).toEqual({ + providerID: 'anthropic', + modelID: 'claude-sonnet-4-6', + modelLimit: { context: 1_000_000, input: 1_000_000, output: 64_000 }, + }); + }); + + it('falls back to models::get when no inline limit is present', async () => { + const { iii, calls } = trackingIii(); + const event = { + type: 'turn_end', + message: { provider: 'anthropic', model: 'claude-sonnet-4-6' }, + }; + + await resolveModelFromEvent(iii, 'sess-1', event); + + expect(calls).toContain('models::get'); + }); +}); diff --git a/harness/tests/harness/trigger.test.ts b/harness/tests/harness/trigger.test.ts index 53cdcd40..0416b4fc 100644 --- a/harness/tests/harness/trigger.test.ts +++ b/harness/tests/harness/trigger.test.ts @@ -45,7 +45,7 @@ describe('harness::trigger', () => { }); it('forwards payload to run::start', async () => { - const { sdk, registered, trigger } = makeFakeSdk({ session_id: 'sess' }); + const { sdk, registered, trigger } = makeFakeSdk({ session_id: 'sess', started: true }); register(sdk); const handler = registered.get('harness::trigger')?.handler; if (!handler) throw new Error('handler not registered'); @@ -64,7 +64,26 @@ describe('harness::trigger', () => { system_prompt: '', }); expect(result.status_code).toBe(200); - expect(result.body).toEqual({ session_id: 'sess' }); + expect(result.body).toEqual({ session_id: 'sess', started: true }); + }); + + it('returns 409 when run::start reports the session is busy', async () => { + const { sdk, registered } = makeFakeSdk({ + session_id: 'sess', + started: false, + reason: 'session_busy', + }); + register(sdk); + const handler = registered.get('harness::trigger')?.handler; + if (!handler) throw new Error('handler not registered'); + + const result = (await handler({ + session_id: 'sess-1', + payload: runStartPayload, + })) as Record; + + expect(result.status_code).toBe(409); + expect(result.body).toEqual({ session_id: 'sess', started: false, reason: 'session_busy' }); }); it('rejects invalid run::start payload', async () => { diff --git a/harness/tests/integration/mode-approval.e2e.test.ts b/harness/tests/integration/mode-approval.e2e.test.ts index 3a4801bd..4660013f 100644 --- a/harness/tests/integration/mode-approval.e2e.test.ts +++ b/harness/tests/integration/mode-approval.e2e.test.ts @@ -50,7 +50,7 @@ describe('mode-driven approval e2e (real consultBefore)', () => { await h.runExecute('sess-full'); const rec = h.loadTurnRecord('sess-full'); - expect(rec?.state).toBe('steering_check'); + expect(rec?.state).toBe('assistant_streaming'); expect(rec?.awaiting_approval).toEqual([]); expect(rec?.work).toBeUndefined(); expect(h.stateStore.has('approvals/sess-full/fc-1')).toBe(false); @@ -68,7 +68,7 @@ describe('mode-driven approval e2e (real consultBefore)', () => { await h.runExecute('sess-grant'); const rec = h.loadTurnRecord('sess-grant'); - expect(rec?.state).toBe('steering_check'); + expect(rec?.state).toBe('assistant_streaming'); expect(rec?.awaiting_approval).toEqual([]); expect(executionEvents(h.emitted, 'function_execution_end', 'fc-1')).toHaveLength(1); }); @@ -84,7 +84,7 @@ describe('mode-driven approval e2e (real consultBefore)', () => { await h.runExecute('sess-auto'); expect(awaitingIds(h, 'sess-auto')).toEqual([]); - expect(h.loadTurnRecord('sess-auto')?.state).toBe('steering_check'); + expect(h.loadTurnRecord('sess-auto')?.state).toBe('assistant_streaming'); }); it('manual mode with no grant parks the call (falls through to policy → needs_approval)', async () => { @@ -144,14 +144,21 @@ describe('mode-driven approval e2e (real consultBefore)', () => { await h.runExecute('sess-escalate'); const rec = h.loadTurnRecord('sess-escalate'); - // Denied up front: not parked, not executed, batch finalizes with an - // error result carrying the human_only_function denial. expect(rec?.awaiting_approval).toEqual([]); - expect(rec?.state).toBe('steering_check'); - const denied = rec?.function_results?.find((r) => r.function_call_id === 'fc-1'); + expect(rec?.state).toBe('assistant_streaming'); + expect(rec?.function_results).toEqual([]); + const turnEnd = h.emitted.find((e) => e.type === 'turn_end') as + | { + function_results: Array<{ + function_call_id: string; + is_error: boolean; + details: unknown; + }>; + } + | undefined; + const denied = turnEnd?.function_results.find((r) => r.function_call_id === 'fc-1'); expect(denied?.is_error).toBe(true); expect(JSON.stringify(denied?.details)).toContain('human_only_function'); - // No real mode change leaked into settings. const settings = h.stateStore.get(`${SETTINGS_STATE_SCOPE}/sess-escalate`) as { mode: string; }; diff --git a/harness/tests/integration/parallel-approval.e2e.test.ts b/harness/tests/integration/parallel-approval.e2e.test.ts index 395f95b6..6536752f 100644 --- a/harness/tests/integration/parallel-approval.e2e.test.ts +++ b/harness/tests/integration/parallel-approval.e2e.test.ts @@ -120,7 +120,7 @@ describe('parallel approval e2e', () => { await h.resolveApproval('sess-order', 'fc-1', 'allow'); rec = h.loadTurnRecord('sess-order'); expect(rec?.awaiting_approval).toEqual([]); - expect(rec?.state).toBe('steering_check'); + expect(rec?.state).toBe('assistant_streaming'); expect(rec?.work).toBeUndefined(); }); @@ -190,12 +190,6 @@ describe('parallel approval e2e', () => { ['fc-1', 'fc-2'], ); - // Both prompts approved concurrently: two approval::resolve writes whose - // wakes interleave on the shared turn_state record. The sequential tests - // never exercise this. runTransition has no optimistic-concurrency guard - // (saveRecord is an unconditional overwrite), so both wakes load the same - // parked record, each executes EVERY call, and each finalizes the batch — - // duplicating side effects and emitting turn_end multiple times. await Promise.all([ h.resolveApproval('sess-par', 'fc-1', 'allow'), h.resolveApproval('sess-par', 'fc-2', 'allow'), @@ -209,16 +203,9 @@ describe('parallel approval e2e', () => { it('drains a sibling approved mid-execution even when its own wake is dropped', async () => { const h = createParallelApprovalHarness(); - // Both calls park for approval during execute. vi.spyOn(agentTriggerModule, 'dispatchWithHook') - .mockResolvedValueOnce({ kind: 'pending' }) // execute fc-late → parks - .mockResolvedValueOnce({ kind: 'pending' }); // execute fc-driver → parks - // Approved calls execute via triggerPreApproved → triggerFunctionCall. - // Executing fc-driver approves fc-late as a side effect but writes the - // decision WITHOUT firing fc-late's wake — modeling a sibling approval whose - // own turn-step wake was dropped (retries exhausted against the lease the - // driving wake holds). fc-late was read first and skipped, so only a re-scan - // within the same wake can pick it up. + .mockResolvedValueOnce({ kind: 'pending' }) + .mockResolvedValueOnce({ kind: 'pending' }); vi.spyOn(agentTriggerModule, 'triggerFunctionCall').mockImplementation(async (_iii, call) => { if (call.id === 'fc-driver') { h.stateStore.set('approvals/sess-drain/fc-late', { @@ -247,14 +234,11 @@ describe('parallel approval e2e', () => { await h.resolveApproval('sess-drain', 'fc-driver', 'allow'); - // fc-late is drained in the same wake instead of being orphaned: it runs to - // completion and the batch finalizes (work cleared, state advances) rather - // than staying parked on fc-late forever. const rec = h.loadTurnRecord('sess-drain'); expect(executionEvents(h.emitted, 'function_execution_end', 'fc-driver')).toHaveLength(1); expect(executionEvents(h.emitted, 'function_execution_end', 'fc-late')).toHaveLength(1); expect(rec?.awaiting_approval).toEqual([]); - expect(rec?.state).toBe('steering_check'); + expect(rec?.state).toBe('assistant_streaming'); }); it('re-enqueues a follow-up wake when a resolved call leaves siblings pending', async () => { @@ -277,9 +261,6 @@ describe('parallel approval e2e', () => { const rec = h.loadTurnRecord('sess-reenqueue'); expect(rec?.awaiting_approval?.map((e) => e.function_call_id)).toEqual(['fc-2']); - // The wake that resolved fc-1 must kick a fresh, uncontended wake so a - // dropped fc-2 contender can't orphan it: the approval::resolve write - // accounts for one wake, the follow-up kick for a second. expect(wakeEnqueues(h, 'sess-reenqueue')).toBeGreaterThanOrEqual(before + 2); }); @@ -298,7 +279,7 @@ describe('parallel approval e2e', () => { decision: 'allow', reason: null, }); - expect(h.loadTurnRecord('sess-wake')?.state).toBe('steering_check'); + expect(h.loadTurnRecord('sess-wake')?.state).toBe('assistant_streaming'); expect(h.loadTurnRecord('sess-wake')?.work).toBeUndefined(); }); }); diff --git a/harness/tests/provider-anthropic/auth.test.ts b/harness/tests/provider-anthropic/auth.test.ts index 395efd0b..e155cf55 100644 --- a/harness/tests/provider-anthropic/auth.test.ts +++ b/harness/tests/provider-anthropic/auth.test.ts @@ -1,6 +1,14 @@ -import { beforeEach, describe, expect, it, vi } from 'vitest'; -import { buildConfig } from '../../src/provider-anthropic/auth.js'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { Model } from '../../src/models-catalog/types.js'; +import { + _resetProviderResolveCacheForTests, + buildConfig, + invalidateProviderResolveCache, +} from '../../src/provider-anthropic/auth.js'; import type { WorkerConfig } from '../../src/provider-anthropic/config.js'; +import { streamAnthropic } from '../../src/provider-anthropic/stream.js'; +import { buildThinkingConfig } from '../../src/provider-anthropic/thinking.js'; +import type { AnthropicConfig } from '../../src/provider-anthropic/types.js'; import type { ISdk } from '../../src/runtime/iii.js'; const WORKER: WorkerConfig = { @@ -26,7 +34,6 @@ function resolved(max_tokens: number | null) { }; } -/** Fake ISdk whose models::get returns the given catalog entry (or throws). */ function iiiWithCatalog(entry: unknown, opts: { throws?: boolean } = {}): ISdk { const trigger = vi.fn().mockImplementation(async (req: { function_id: string }) => { if (req.function_id === 'models::get') { @@ -38,6 +45,30 @@ function iiiWithCatalog(entry: unknown, opts: { throws?: boolean } = {}): ISdk { return { trigger } as unknown as ISdk; } +function iiiCountingCatalog(entry: unknown): { iii: ISdk; modelsGetCalls: () => number } { + let n = 0; + const trigger = vi.fn().mockImplementation(async (req: { function_id: string }) => { + if (req.function_id === 'models::get') { + n++; + return entry; + } + return null; + }); + return { iii: { trigger } as unknown as ISdk, modelsGetCalls: () => n }; +} + +const THINKING_MODEL: Model = { + id: 'claude-sonnet-4-6', + provider: 'anthropic', + api: 'anthropic-messages', + display_name: 'Claude Sonnet 4.6', + context_window: 1_000_000, + max_output_tokens: 64_000, + supports_thinking: true, + supports_xhigh: true, + thinking_budgets: { minimal: 2_000, low: 4_000, medium: 8_000, high: 16_000 }, +}; + describe('buildConfig max_tokens resolution', () => { beforeEach(() => { resolveProviderMock.mockReset(); @@ -98,3 +129,143 @@ describe('buildConfig max_tokens resolution', () => { ); }); }); + +describe('buildConfig pre-resolved model threading', () => { + beforeEach(() => { + resolveProviderMock.mockReset(); + resolveProviderMock.mockResolvedValue(resolved(null)); + }); + + it('uses the pre-resolved model and skips models::get', async () => { + const { iii, modelsGetCalls } = iiiCountingCatalog({ id: 'nope', max_output_tokens: 1 }); + + const cfg = await buildConfig(iii, WORKER, 'claude-sonnet-4-6', THINKING_MODEL); + + expect(modelsGetCalls()).toBe(0); + expect(cfg.catalog).toEqual(THINKING_MODEL); + expect(cfg.max_tokens).toBe(32_000); + }); + + it('fetches models::get when no pre-resolved model is threaded', async () => { + const { iii, modelsGetCalls } = iiiCountingCatalog({ + id: 'claude-sonnet-4-6', + max_output_tokens: 64_000, + }); + + const cfg = await buildConfig(iii, WORKER, 'claude-sonnet-4-6'); + + expect(modelsGetCalls()).toBe(1); + expect(cfg.catalog?.id).toBe('claude-sonnet-4-6'); + }); + + it.each([ + 'high', + 'xhigh', + ])('produces a byte-identical thinking config (%s) with vs without the pre-resolved model', async (level) => { + const fetched = await buildConfig(iiiWithCatalog(THINKING_MODEL), WORKER, 'claude-sonnet-4-6'); + const { iii } = iiiCountingCatalog({ id: 'nope' }); + const threaded = await buildConfig(iii, WORKER, 'claude-sonnet-4-6', THINKING_MODEL); + + const fetchedThinking = buildThinkingConfig(level, fetched.max_tokens, fetched.catalog); + const threadedThinking = buildThinkingConfig(level, threaded.max_tokens, threaded.catalog); + + expect(threadedThinking).toEqual(fetchedThinking); + expect(threadedThinking).toBeDefined(); + }); +}); + +function anthropicCfg(): AnthropicConfig { + return { + credential_value: 'sk-test', + model: 'claude-sonnet-4-6', + max_tokens: 32_000, + api_url: 'https://api.example/v1/messages', + auth_mode: 'api_key', + }; +} + +describe('buildConfig per-turn credential resolution cache', () => { + beforeEach(() => { + resolveProviderMock.mockReset(); + resolveProviderMock.mockResolvedValue(resolved(null)); + _resetProviderResolveCacheForTests(); + }); + + it('resolves once per turn key and reuses it across streams', async () => { + const iii = iiiWithCatalog({ id: 'claude-sonnet-4-6', max_output_tokens: 64_000 }); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + expect(resolveProviderMock).toHaveBeenCalledTimes(1); + }); + + it('re-resolves when the turn key changes (next user turn)', async () => { + const iii = iiiWithCatalog({ id: 'claude-sonnet-4-6', max_output_tokens: 64_000 }); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 200); + expect(resolveProviderMock).toHaveBeenCalledTimes(2); + }); + + it('does not cache when no turn key is threaded', async () => { + const iii = iiiWithCatalog({ id: 'claude-sonnet-4-6', max_output_tokens: 64_000 }); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6'); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6'); + expect(resolveProviderMock).toHaveBeenCalledTimes(2); + }); + + it('invalidateProviderResolveCache forces a re-resolve on the same key', async () => { + const iii = iiiWithCatalog({ id: 'claude-sonnet-4-6', max_output_tokens: 64_000 }); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + invalidateProviderResolveCache(); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + expect(resolveProviderMock).toHaveBeenCalledTimes(2); + }); + + describe('401 invalidation via streamAnthropic', () => { + afterEach(() => { + vi.unstubAllGlobals(); + }); + + it('a 401 stream drops the cache so the next turn re-resolves the credential', async () => { + const iii = iiiWithCatalog({ id: 'claude-sonnet-4-6', max_output_tokens: 64_000 }); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + expect(resolveProviderMock).toHaveBeenCalledTimes(1); + + vi.stubGlobal( + 'fetch', + vi.fn(async () => new Response('unauthorized', { status: 401 })), + ); + for await (const _ev of streamAnthropic({ + cfg: anthropicCfg(), + system_prompt: '', + messages: [], + tools: [], + })) { + } + + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + expect(resolveProviderMock).toHaveBeenCalledTimes(2); + }); + + it('a 200 stream leaves the cache intact', async () => { + const iii = iiiWithCatalog({ id: 'claude-sonnet-4-6', max_output_tokens: 64_000 }); + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + + vi.stubGlobal( + 'fetch', + vi.fn(async () => new Response('data: {"type":"message_stop"}\n\n', { status: 200 })), + ); + for await (const _ev of streamAnthropic({ + cfg: anthropicCfg(), + system_prompt: '', + messages: [], + tools: [], + })) { + } + + await buildConfig(iii, WORKER, 'claude-sonnet-4-6', undefined, 100); + expect(resolveProviderMock).toHaveBeenCalledTimes(1); + }); + }); +}); diff --git a/harness/tests/session/operations.test.ts b/harness/tests/session/operations.test.ts index c18e346a..4607dfdc 100644 --- a/harness/tests/session/operations.test.ts +++ b/harness/tests/session/operations.test.ts @@ -1,7 +1,8 @@ -import { describe, expect, it } from 'vitest'; +import { describe, expect, it, vi } from 'vitest'; import { activePath, appendMessage, + appendMessages, cloneSession, createSession, exportHtml, @@ -11,8 +12,25 @@ import { tree, } from '../../src/session/tree/operations.js'; import { InMemoryStore } from '../../src/session/tree/store.js'; +import { entryTimestamp } from '../../src/session/tree/types.js'; import type { AgentMessage } from '../../src/types/agent-message.js'; +/** + * InMemoryStore returns entries in insertion order, but the production + * IiiStateSessionStore sorts loadEntries by (timestamp, id). Leaf-resolution + * bugs only surface under that sort, so this double reproduces it. + */ +class SortingStore extends InMemoryStore { + async loadEntries(session_id: string) { + const entries = await super.loadEntries(session_id); + return entries.sort((a, b) => { + const t = entryTimestamp(a) - entryTimestamp(b); + if (t !== 0) return t; + return a.id < b.id ? -1 : a.id > b.id ? 1 : 0; + }); + } +} + const userMsg = (text: string): AgentMessage => ({ role: 'user', content: [{ type: 'text', text }], @@ -48,6 +66,87 @@ describe('session-tree operations', () => { expect(path).toEqual([e1, e2, e3]); }); + it('appendMessages chains a batch and is equivalent to serial appends', async () => { + const store = new InMemoryStore(); + const sid = await createSession(store); + const e0 = await appendMessage(store, sid, null, userMsg('seed')); + + const ids = await appendMessages(store, sid, null, [asstMsg('a'), userMsg('b'), asstMsg('c')]); + + expect(ids).toHaveLength(3); + const path = await activePath(store, sid); + expect(path).toEqual([e0, ...ids]); + const messages = await loadMessages(store, sid); + expect(messages.map((m) => (m.content[0] as { text: string }).text)).toEqual([ + 'seed', + 'a', + 'b', + 'c', + ]); + }); + + it('appendMessages resolves the active leaf once for the whole batch', async () => { + const store = new InMemoryStore(); + const sid = await createSession(store); + await appendMessage(store, sid, null, userMsg('seed')); + + const loadEntriesSpy = vi.spyOn(store, 'loadEntries'); + const appendManySpy = vi.spyOn(store, 'appendMany'); + + await appendMessages(store, sid, null, [asstMsg('a'), userMsg('b'), asstMsg('c')]); + + expect(loadEntriesSpy).toHaveBeenCalledTimes(1); + expect(appendManySpy).toHaveBeenCalledTimes(1); + expect(appendManySpy.mock.calls[0]?.[1]).toHaveLength(3); + }); + + it('appendMessages with an explicit parent skips leaf resolution', async () => { + const store = new InMemoryStore(); + const sid = await createSession(store); + const e0 = await appendMessage(store, sid, null, userMsg('seed')); + const loadEntriesSpy = vi.spyOn(store, 'loadEntries'); + + const ids = await appendMessages(store, sid, e0, [asstMsg('a')]); + + expect(loadEntriesSpy).not.toHaveBeenCalled(); + const path = await activePath(store, sid); + expect(path).toEqual([e0, ...ids]); + }); + + it('appendMessages on an empty list is a no-op', async () => { + const store = new InMemoryStore(); + const sid = await createSession(store); + const appendManySpy = vi.spyOn(store, 'appendMany'); + + const ids = await appendMessages(store, sid, null, []); + + expect(ids).toEqual([]); + expect(appendManySpy).not.toHaveBeenCalled(); + }); + + it('keeps a later append on the active path when its timestamp ties or precedes the batch (sorted store, leaf = chain tip)', async () => { + const store = new SortingStore(); + const sid = await createSession(store); + const now = vi.spyOn(Date, 'now'); + + now.mockReturnValue(100); + const batch = await appendMessages(store, sid, null, [ + asstMsg('a'), + userMsg('b'), + asstMsg('c'), + ]); + + now.mockReturnValue(99); + const tail = await appendMessage(store, sid, null, asstMsg('d')); + now.mockRestore(); + + expect(await activePath(store, sid)).toEqual([...batch, tail]); + const texts = (await loadMessages(store, sid)).map( + (m) => (m.content[0] as { text: string }).text, + ); + expect(texts).toEqual(['a', 'b', 'c', 'd']); + }); + it('loadMessages filters non-message entries from active path', async () => { const store = new InMemoryStore(); const sid = await createSession(store); diff --git a/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts b/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts index 1da608fc..5dd04a84 100644 --- a/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts +++ b/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts @@ -20,10 +20,12 @@ export type MockTurnStore = { export function mockTurnStore(overrides: Partial = {}): MockTurnStore { return { loadRecord: vi.fn(async () => null), + loadRecordStrict: vi.fn(async () => null), saveRecord: vi.fn(async () => {}), writeRecord: vi.fn(async () => {}), ensureSession: vi.fn(async () => {}), loadMessages: vi.fn(async () => []), + loadTrailingResultIds: vi.fn(async () => new Set()), appendMessages: vi.fn(async () => {}), loadRunRequest: vi.fn(async () => defaultRunRequest), saveRunRequest: vi.fn(async () => {}), @@ -31,7 +33,6 @@ export function mockTurnStore(overrides: Partial = {}): MockTurnStore } as MockTurnStore; } -/** Mock `createTurnStore` and return the store instance for assertions. */ export function installMockTurnStore(overrides: Partial = {}): MockTurnStore { const store = mockTurnStore(overrides); vi.spyOn(storeModule, 'createTurnStore').mockReturnValue(store); diff --git a/harness/tests/turn-orchestrator/assistant-streaming.test.ts b/harness/tests/turn-orchestrator/assistant-streaming.test.ts index 36edfab2..668476a3 100644 --- a/harness/tests/turn-orchestrator/assistant-streaming.test.ts +++ b/harness/tests/turn-orchestrator/assistant-streaming.test.ts @@ -125,8 +125,8 @@ describe('routeAssistantTurn', () => { ).toBe('function_execute'); }); - it('routes text-only assistants to steering_check', () => { - expect(routeAssistantTurn(assistant()).kind).toBe('steering_check'); + it('routes text-only assistants to end_turn', () => { + expect(routeAssistantTurn(assistant()).kind).toBe('end_turn'); }); }); @@ -139,7 +139,7 @@ describe('finalizeAssistantTurn', () => { await finalizeAssistantTurn(ports, rec, asst, []); - expect(rec.state).toBe('stopped'); + expect(rec.state).toBe('finishing'); expect(rec.turn_end_emitted).toBe(true); expect(ports.persistAssistantIfNew).not.toHaveBeenCalled(); }); diff --git a/harness/tests/turn-orchestrator/assistant.test.ts b/harness/tests/turn-orchestrator/assistant.test.ts index b70e61e6..3b45b479 100644 --- a/harness/tests/turn-orchestrator/assistant.test.ts +++ b/harness/tests/turn-orchestrator/assistant.test.ts @@ -97,10 +97,8 @@ describe('handleStreaming turn start', () => { await handleStreaming(iii, rec); expect(rec.turn_count).toBe(1); - // createChannel failure → synthetic error → finalizeAssistant sets turn_end_emitted = true expect(rec.turn_end_emitted).toBe(true); expect(calls.some((c) => c.function_id === 'approval::consume')).toBe(false); - // stream::set is called by emit(message_complete) and emit(turn_end) in the error path expect(calls.some((c) => c.function_id === 'stream::set')).toBe(true); }); }); @@ -118,7 +116,7 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); - expect(rec.state).toBe('stopped'); + expect(rec.state).toBe('finishing'); expect(rec.last_assistant?.stop_reason).toBe('error'); expect(rec.last_assistant?.error_message).toContain('create_channel failed'); }); @@ -143,18 +141,15 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); - // emitted message_complete via stream::set trigger expect(calls.some((c) => c.function_id === 'stream::set')).toBe(true); - // assistant persisted expect(appendSpy).toHaveBeenCalledOnce(); - // routed to function_execute (NOT assistant_finished) expect(rec.state).toBe('function_execute'); expect(rec.last_assistant).toEqual(finalMsg); expect(rec.function_results).toEqual([]); expect(rec.work?.prepared).toHaveLength(1); }); - it('routes to steering_check when the assistant made no calls', async () => { + it('ends the turn inline when the assistant made no calls', async () => { const finalMsg = assistant({ content: [{ type: 'text', text: 'done reply' }] }); const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const { iii } = fakeIiiWithDone(finalMsg); @@ -164,11 +159,12 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); - expect(rec.state).toBe('steering_check'); + expect(rec.state).toBe('finishing'); + expect(rec.turn_end_emitted).toBe(true); expect(rec.last_assistant).toEqual(finalMsg); }); - it('captures provider done frame and routes correctly (text-only → steering_check)', async () => { + it('captures provider done frame and routes correctly (text-only → end of turn)', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const finalMsg = assistant({ content: [{ type: 'text', text: 'done reply' }] }); let deliver: ((msg: string) => void) | null = null; @@ -199,7 +195,7 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); - expect(rec.state).toBe('steering_check'); + expect(rec.state).toBe('finishing'); expect(rec.last_assistant).toEqual(finalMsg); }); @@ -214,7 +210,7 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); - expect(rec.state).toBe('stopped'); + expect(rec.state).toBe('finishing'); expect(rec.turn_end_emitted).toBe(true); expect(appendSpy).not.toHaveBeenCalled(); }); @@ -230,7 +226,6 @@ describe('handleStreaming', () => { }, ], }); - // Simulate re-entry: messages already contain the assistant message let storedMessages: unknown[] = [finalMsg]; const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; diff --git a/harness/tests/turn-orchestrator/finish.test.ts b/harness/tests/turn-orchestrator/finish.test.ts index 5ccde8f0..371104de 100644 --- a/harness/tests/turn-orchestrator/finish.test.ts +++ b/harness/tests/turn-orchestrator/finish.test.ts @@ -18,7 +18,7 @@ describe('TurnStatePorts.finishSession', () => { } as unknown as ISdk; const rec = newRecord('s1'); - rec.state = 'steering_check'; + rec.state = 'finishing'; await createTurnStatePorts(iii).finishSession(rec); expect(rec.state).toBe('stopped'); diff --git a/harness/tests/turn-orchestrator/finishing.test.ts b/harness/tests/turn-orchestrator/finishing.test.ts new file mode 100644 index 00000000..fda2fc8e --- /dev/null +++ b/harness/tests/turn-orchestrator/finishing.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import { handleFinishing, register } from '../../src/turn-orchestrator/finishing.js'; +import { type TurnStateRecord, newRecord } from '../../src/turn-orchestrator/state.js'; + +type TriggerCall = { function_id: string; payload: unknown }; + +function recordingIii(): { iii: ISdk; calls: TriggerCall[] } { + const calls: TriggerCall[] = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { + calls.push({ function_id: req.function_id, payload: req.payload }); + return null; + }), + } as unknown as ISdk; + return { iii, calls }; +} + +describe('handleFinishing', () => { + it('emits agent_end and transitions to stopped', async () => { + const { iii, calls } = recordingIii(); + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'finishing' }; + + await handleFinishing(iii, rec); + + expect(rec.state).toBe('stopped'); + const agentEnd = calls.find( + (c) => + c.function_id === 'stream::set' && + (c.payload as { data?: { type?: string } }).data?.type === 'agent_end', + ); + expect(agentEnd).toBeDefined(); + }); + + it('is re-runnable: a replay re-emits agent_end and lands on stopped', async () => { + const { iii, calls } = recordingIii(); + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'finishing' }; + + await handleFinishing(iii, rec); + rec.state = 'finishing'; + await handleFinishing(iii, rec); + + expect(rec.state).toBe('stopped'); + const agentEnds = calls.filter( + (c) => + c.function_id === 'stream::set' && + (c.payload as { data?: { type?: string } }).data?.type === 'agent_end', + ); + expect(agentEnds.length).toBe(2); + }); +}); + +describe('register', () => { + it('registers turn::finishing', () => { + const registered = new Map(); + const iii = { + registerFunction: (id: string, handler: unknown) => registered.set(id, handler), + } as unknown as ISdk; + + register(iii); + + expect(registered.has('turn::finishing')).toBe(true); + }); +}); diff --git a/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts index 4368ea46..4e7a0653 100644 --- a/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts +++ b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts @@ -94,6 +94,11 @@ describe('applyDecisionToPrepared', () => { result: denialResultFromDecision({ decision: 'deny', reason: 'policy' }), }); }); + + it('a denial resumes the loop; an abort terminates the turn', () => { + expect(denialResultFromDecision({ decision: 'deny', reason: null }).terminate).toBe(false); + expect(denialResultFromDecision({ decision: 'aborted', reason: null }).terminate).toBe(true); + }); }); describe('handleAwaitingApproval', () => { @@ -113,14 +118,18 @@ describe('handleAwaitingApproval', () => { loadMessages: vi.fn(async () => []), appendMessages: vi.fn(async () => {}), }); - vi.spyOn(events, 'emit').mockResolvedValue(undefined); + const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleAwaitingApproval(iii, rec); expect(rec.awaiting_approval).toEqual([]); - expect(rec.state).toBe('steering_check'); + expect(rec.state).toBe('assistant_streaming'); expect(rec.work).toBeUndefined(); - expect(rec.function_results).toHaveLength(1); + expect(rec.function_results).toEqual([]); + const turnEnd = emitSpy.mock.calls.find((call) => call[2]?.type === 'turn_end')?.[2] as + | { function_results: unknown[] } + | undefined; + expect(turnEnd?.function_results).toHaveLength(1); }); it('leaves state parked when awaiting entries remain undecided', async () => { diff --git a/harness/tests/turn-orchestrator/function-execute.test.ts b/harness/tests/turn-orchestrator/function-execute.test.ts index fa6fc90b..541dd08c 100644 --- a/harness/tests/turn-orchestrator/function-execute.test.ts +++ b/harness/tests/turn-orchestrator/function-execute.test.ts @@ -8,10 +8,14 @@ import { finalizeBatch, runOneCall, } from '../../src/turn-orchestrator/function-execute/run.js'; -import { withRoutingEnvelope } from '../../src/turn-orchestrator/function-execute/ports.js'; +import { + createPorts, + withRoutingEnvelope, +} from '../../src/turn-orchestrator/function-execute/ports.js'; import type { FunctionExecutePorts } from '../../src/turn-orchestrator/function-execute/ports.js'; import type { ExecutedCall } from '../../src/turn-orchestrator/function-execute/types.js'; import { newRecord } from '../../src/turn-orchestrator/state.js'; +import type { ISdk } from '../../src/runtime/iii.js'; import type { AssistantMessage } from '../../src/types/agent-message.js'; function makeAssistant( @@ -39,6 +43,7 @@ function stubPorts(overrides: Partial = {}): FunctionExecu return { emitStart: vi.fn(async () => {}), emitEnd: vi.fn(async () => {}), + emit: vi.fn(async () => {}), checkpoint: vi.fn(async () => {}), dispatch: vi.fn(async () => ({ kind: 'result' as const, @@ -49,6 +54,7 @@ function stubPorts(overrides: Partial = {}): FunctionExecu details: {}, })), loadMessages: vi.fn(async () => []), + loadTrailingResultIds: vi.fn(async () => new Set()), appendMessages: vi.fn(async () => {}), emitTurnEnd: vi.fn(async () => {}), finishSession: vi.fn(async (rec) => { @@ -176,25 +182,15 @@ describe('finalizeBatch', () => { }; await finalizeBatch(ports, rec); - expect(rec.state).toBe('stopped'); - expect(ports.finishSession).toHaveBeenCalledOnce(); + expect(rec.state).toBe('finishing'); + expect(ports.finishSession).not.toHaveBeenCalled(); }); it('skips duplicate function_result ids on re-entry', async () => { const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; const appendMessages = vi.fn(async () => {}); const ports = stubPorts({ - loadMessages: vi.fn(async () => [ - { - role: 'function_result' as const, - function_call_id: 'fc-1', - function_id: 'shell::run', - content: [{ type: 'text' as const, text: 'existing' }], - details: {}, - is_error: false, - timestamp: 1, - }, - ]), + loadTrailingResultIds: vi.fn(async () => new Set(['fc-1'])), appendMessages, }); const rec = newRecord('s1'); @@ -215,6 +211,152 @@ describe('finalizeBatch', () => { await finalizeBatch(ports, rec); expect(appendMessages).not.toHaveBeenCalled(); - expect(rec.state).toBe('steering_check'); + expect(rec.state).toBe('assistant_streaming'); + }); + + it('resumes to assistant_streaming with cleared results when a result continues', async () => { + const ports = stubPorts(); + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + const rec = newRecord('s1'); + enterFunctionExecute(rec, makeAssistant([fc])); + rec.state = 'function_execute'; + rec.work = { + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {} }, + is_error: false, + duration_ms: 1, + }, + }, + }; + + await finalizeBatch(ports, rec); + + expect(rec.state).toBe('assistant_streaming'); + expect(rec.function_results).toEqual([]); + expect(rec.turn_end_emitted).toBe(true); + expect(ports.finishSession).not.toHaveBeenCalled(); + }); + + it('ends the loop inline at the max_turns cap instead of resuming', async () => { + const appendMessages = vi.fn(async () => {}); + const emit = vi.fn(async () => {}); + const ports = stubPorts({ appendMessages, emit }); + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + const rec = { ...newRecord('s1', 2), turn_count: 2 }; + enterFunctionExecute(rec, makeAssistant([fc])); + rec.state = 'function_execute'; + rec.work = { + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {} }, + is_error: false, + duration_ms: 1, + }, + }, + }; + + await finalizeBatch(ports, rec); + + expect(rec.state).toBe('finishing'); + expect(ports.finishSession).not.toHaveBeenCalled(); + expect(appendMessages).toHaveBeenCalledTimes(2); + const appended = appendMessages.mock.calls[1]?.[1] as Array<{ content: unknown[] }>; + expect(JSON.stringify(appended)).toContain('max_turns (2) reached'); + expect( + emit.mock.calls.some((call) => (call[1] as { type: string })?.type === 'message_complete'), + ).toBe(true); + }); +}); + +describe('createPorts().loadTrailingResultIds', () => { + function trackingIii(messages: Array<{ message: unknown }>): { + iii: ISdk; + calls: Array<{ function_id: string; payload: unknown }>; + } { + const calls: Array<{ function_id: string; payload: unknown }> = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { + calls.push({ function_id: req.function_id, payload: req.payload }); + if (req.function_id === 'session-tree::messages') return { messages }; + return null; + }), + } as unknown as ISdk; + return { iii, calls }; + } + + const resultMsg = (id: string) => ({ + message: { + role: 'function_result' as const, + function_call_id: id, + function_id: 'shell::run', + content: [], + details: {}, + is_error: false, + timestamp: 1, + }, + }); + /** Assistant that requested tool calls — the turn boundary. */ + const requestingAssistant = (id: string) => ({ + message: { + role: 'assistant' as const, + content: [{ type: 'function_call' as const, id, function_id: 'shell::run', arguments: {} }], + stop_reason: 'function_call' as const, + model: 'm', + provider: 'p', + timestamp: 1, + }, + }); + /** Synthetic assistant with no calls (e.g. the max_turns "loop stopped" notice). */ + const noticeAssistant = () => ({ + message: { + role: 'assistant' as const, + content: [{ type: 'text' as const, text: 'loop stopped' }], + stop_reason: 'end' as const, + model: 'm', + provider: 'p', + timestamp: 1, + }, + }); + + it('reads only session-tree::messages (no compactions) on the default leaf', async () => { + const { iii, calls } = trackingIii([resultMsg('fc-1'), resultMsg('fc-2')]); + + const ids = await createPorts(iii).loadTrailingResultIds('s1'); + + expect(ids).toEqual(new Set(['fc-1', 'fc-2'])); + expect(calls.map((c) => c.function_id)).toEqual(['session-tree::messages']); + expect(calls.some((c) => c.function_id === 'session-tree::compactions')).toBe(false); + expect(calls[0]?.payload).toEqual({ session_id: 's1' }); + }); + + it('stops at the requesting assistant, not collecting a prior turn run', async () => { + const { iii } = trackingIii([ + resultMsg('old'), + requestingAssistant('req'), + resultMsg('fc-1'), + resultMsg('fc-2'), + ]); + + const ids = await createPorts(iii).loadTrailingResultIds('s1'); + + expect(ids).toEqual(new Set(['fc-1', 'fc-2'])); + }); + + it('skips a trailing no-call notice so the results behind it stay deduped (max_turns replay)', async () => { + const { iii } = trackingIii([ + requestingAssistant('req'), + resultMsg('fc-1'), + resultMsg('fc-2'), + noticeAssistant(), + ]); + + const ids = await createPorts(iii).loadTrailingResultIds('s1'); + + expect(ids).toEqual(new Set(['fc-1', 'fc-2'])); }); }); diff --git a/harness/tests/turn-orchestrator/functions.test.ts b/harness/tests/turn-orchestrator/functions.test.ts index 54a36fe5..9b70c1f7 100644 --- a/harness/tests/turn-orchestrator/functions.test.ts +++ b/harness/tests/turn-orchestrator/functions.test.ts @@ -10,7 +10,8 @@ import { parseApprovalDecision } from '../../src/turn-orchestrator/function-awai import { handleExecute } from '../../src/turn-orchestrator/function-execute/process.js'; import { enterFunctionExecute } from '../../src/turn-orchestrator/function-execute/run.js'; import type { FunctionBatchWork } from '../../src/turn-orchestrator/function-execute/types.js'; -import type { AssistantMessage } from '../../src/types/agent-message.js'; +import { persistedTrailingResultIds } from '../../src/turn-orchestrator/state-runtime/transcript.js'; +import type { AgentMessage, AssistantMessage } from '../../src/types/agent-message.js'; afterEach(() => { vi.restoreAllMocks(); @@ -108,6 +109,7 @@ describe('handleExecute new flow', () => { terminate: false, }, }); + const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); enterFunctionExecute( @@ -118,9 +120,13 @@ describe('handleExecute new flow', () => { mockFinalizePersistence(); await handleExecute(iii, rec); - expect(rec.state).toBe('steering_check'); - expect(rec.function_results).toHaveLength(1); - expect(rec.function_results[0]?.function_call_id).toBe('fc-1'); + expect(rec.state).toBe('assistant_streaming'); + expect(rec.function_results).toEqual([]); + const turnEnd = emitSpy.mock.calls.find((call) => call[2]?.type === 'turn_end')?.[2] as + | { function_results: Array<{ function_call_id: string }> } + | undefined; + expect(turnEnd?.function_results).toHaveLength(1); + expect(turnEnd?.function_results[0]?.function_call_id).toBe('fc-1'); }); it('finishes the session when every function result terminates', async () => { @@ -146,7 +152,7 @@ describe('handleExecute new flow', () => { await handleExecute(iii, rec); - expect(rec.state).toBe('stopped'); + expect(rec.state).toBe('finishing'); }); it('does not re-emit function_execution_start for already-executed calls on re-entry', async () => { @@ -242,13 +248,17 @@ describe('handleExecute new flow', () => { executed: {}, }); mockFinalizePersistence(); + const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); await expect(handleExecute(iii, rec)).resolves.toBeUndefined(); - expect(rec.state).toBe('steering_check'); - expect(rec.function_results).toHaveLength(1); - expect(rec.function_results[0]?.is_error).toBe(true); - const details = rec.function_results[0]?.details as Record; + expect(rec.state).toBe('assistant_streaming'); + const turnEnd = emitSpy.mock.calls.find((call) => call[2]?.type === 'turn_end')?.[2] as + | { function_results: Array<{ is_error: boolean; details: Record }> } + | undefined; + expect(turnEnd?.function_results).toHaveLength(1); + expect(turnEnd?.function_results[0]?.is_error).toBe(true); + const details = turnEnd?.function_results[0]?.details as Record; expect(details?.status).toBe('denied'); expect(details?.denied_by).toBe('gate_unavailable'); expect(details?.function_id).toBe('shell::fs::write'); @@ -281,7 +291,7 @@ describe('handleExecute new flow', () => { (call) => (call[0] as { function_id: string }).function_id === 'shell::run', ); expect(shellCalls).toHaveLength(0); - expect(rec.state).toBe('steering_check'); + expect(rec.state).toBe('assistant_streaming'); }); it('replays persisted executed calls without re-dispatching (re-entry with pre-populated work.executed)', async () => { @@ -311,10 +321,10 @@ describe('handleExecute new flow', () => { await handleExecute(iii, rec); expect(dispatchSpy).not.toHaveBeenCalled(); - expect(rec.state).toBe('steering_check'); + expect(rec.state).toBe('assistant_streaming'); }); - it('transitions to steering_check after a successful hook dispatch', async () => { + it('resumes to assistant_streaming after a successful hook dispatch', async () => { vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ kind: 'result', result: { @@ -331,7 +341,7 @@ describe('handleExecute new flow', () => { mockFinalizePersistence(); await handleExecute(iii, rec); - expect(rec.state).toBe('steering_check'); + expect(rec.state).toBe('assistant_streaming'); }); it('emits turn lifecycle and sets turn_end_emitted when last_assistant is present', async () => { @@ -376,7 +386,7 @@ describe('handleExecute new flow', () => { await handleExecute(iii, rec); - expect(rec.state).toBe('steering_check'); + expect(rec.state).toBe('assistant_streaming'); expect(rec.turn_end_emitted).toBe(true); expect(emitSpy.mock.calls.some((call) => call[2]?.type === 'turn_end')).toBe(true); }); @@ -400,6 +410,9 @@ describe('handleExecute new flow', () => { let storedMessages: unknown[] = []; installMockTurnStore({ loadMessages: vi.fn(async () => storedMessages as never), + loadTrailingResultIds: vi.fn(async () => + persistedTrailingResultIds(storedMessages as AgentMessage[]), + ), appendMessages: vi.fn(async (_sid, msgs) => { storedMessages = [...storedMessages, ...msgs]; }), diff --git a/harness/tests/turn-orchestrator/preflight.test.ts b/harness/tests/turn-orchestrator/preflight.test.ts index 8ef858d3..f54a09aa 100644 --- a/harness/tests/turn-orchestrator/preflight.test.ts +++ b/harness/tests/turn-orchestrator/preflight.test.ts @@ -64,7 +64,6 @@ describe('runPreflight', () => { }); it('calls compact_now and returns compacted when projected >= usable', async () => { - // Tiny context window so even a small message overflows const { iii, calls } = makeIii({ modelsGetResult: { context_window: 10, max_output_tokens: 0 }, compactNowResult: { status: 'ok' }, @@ -120,6 +119,44 @@ describe('runPreflight', () => { expect(result).toBe('ok'); }); + it('uses the pre-resolved model and skips models::get', async () => { + const { iii, calls } = makeIii({ + modelsGetResult: { context_window: 1, max_output_tokens: 0 }, + }); + const modelMeta = { + id: 'claude-3', + provider: 'anthropic', + api: 'anthropic-messages', + display_name: 'Claude 3', + context_window: 200_000, + max_output_tokens: 8_096, + }; + + const result = await runPreflight( + iii, + 'session-1', + [smallMessage], + 'anthropic', + 'claude-3', + modelMeta, + ); + + expect(result).toBe('ok'); + expect(calls.some((c) => c.function_id === 'models::get')).toBe(false); + expect(calls.some((c) => c.function_id === 'context-compaction::compact_now')).toBe(false); + }); + + it('fetches models::get when no pre-resolved model is threaded', async () => { + const { iii, calls } = makeIii({ + modelsGetResult: { context_window: 200_000, max_output_tokens: 8_096 }, + }); + + const result = await runPreflight(iii, 'session-1', [smallMessage], 'anthropic', 'claude-3'); + + expect(result).toBe('ok'); + expect(calls.some((c) => c.function_id === 'models::get')).toBe(true); + }); + it('passes session_id and model info to compact_now', async () => { const { iii, calls } = makeIii({ modelsGetResult: { context_window: 10, max_output_tokens: 0 }, diff --git a/harness/tests/turn-orchestrator/provider-router.test.ts b/harness/tests/turn-orchestrator/provider-router.test.ts index 59eed034..0f27bb07 100644 --- a/harness/tests/turn-orchestrator/provider-router.test.ts +++ b/harness/tests/turn-orchestrator/provider-router.test.ts @@ -20,9 +20,6 @@ describe('decide', () => { it('routes llamacpp when provider=llamacpp', () => { expect(decide({ provider: 'llamacpp', model: 'Meta-Llama-3.1-8B' }).provider).toBe('llamacpp'); - // No heuristic — bare model id without explicit provider does not - // route to llamacpp (same posture as lmstudio; user-controlled ids - // overlap with HF-style ids from other services). expect(decide({ model: 'Meta-Llama-3.1-8B' }).provider).toBe('anthropic'); }); @@ -40,11 +37,6 @@ describe('decide', () => { model: 'lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF', }).provider, ).toBe('lmstudio'); - // No heuristic — a bare HF-style model id without an explicit - // provider MUST default to anthropic, not be magically routed to - // lmstudio because the namespace looks like a HF org/repo. This - // is the regression that the route's comment warns about - // ("model IDs are user-controlled … overlap with HF-style IDs"). const ambiguousIds = [ 'qwen/qwen3-4b-2507', 'google/gemma-2-9b-it', @@ -106,4 +98,39 @@ describe('buildInput', () => { const withoutLevel = buildInput(decision, writer, 'sys', [], []); expect(withoutLevel).not.toHaveProperty('thinking_level'); }); + + it('carries model_meta when provided and omits it when absent', () => { + const decision = { provider: 'anthropic', model: 'claude-sonnet-4-6' } as const; + const writer = { channel_id: 'c', access_key: 'k', direction: 'write' } as const; + const model_meta = { + id: 'claude-sonnet-4-6', + provider: 'anthropic', + api: 'anthropic-messages', + display_name: 'Claude Sonnet 4.6', + context_window: 1_000_000, + max_output_tokens: 64_000, + }; + const withMeta = buildInput(decision, writer, 'sys', [], [], undefined, model_meta); + expect(withMeta.model_meta).toEqual(model_meta); + const withoutMeta = buildInput(decision, writer, 'sys', [], []); + expect(withoutMeta).not.toHaveProperty('model_meta'); + }); + + it('carries resolution_key when provided and omits it when absent', () => { + const decision = { provider: 'anthropic', model: 'claude' } as const; + const writer = { channel_id: 'c', access_key: 'k', direction: 'write' } as const; + const withKey = buildInput( + decision, + writer, + 'sys', + [], + [], + undefined, + undefined, + 1738000000000, + ); + expect(withKey.resolution_key).toBe(1738000000000); + const withoutKey = buildInput(decision, writer, 'sys', [], []); + expect(withoutKey).not.toHaveProperty('resolution_key'); + }); }); diff --git a/harness/tests/turn-orchestrator/provisioning-layer.test.ts b/harness/tests/turn-orchestrator/provisioning-layer.test.ts index 18064dce..1e6ba47e 100644 --- a/harness/tests/turn-orchestrator/provisioning-layer.test.ts +++ b/harness/tests/turn-orchestrator/provisioning-layer.test.ts @@ -1,9 +1,21 @@ import { describe, expect, it, vi } from 'vitest'; -import { applyProvisioningOutcome } from '../../src/turn-orchestrator/provisioning/process.js'; +import type { Model } from '../../src/models-catalog/types.js'; import type { ProvisioningPorts } from '../../src/turn-orchestrator/provisioning/ports.js'; -import { processProvisioning } from '../../src/turn-orchestrator/provisioning/process.js'; +import { + applyProvisioningOutcome, + processProvisioning, +} from '../../src/turn-orchestrator/provisioning/process.js'; import { newRecord } from '../../src/turn-orchestrator/state.js'; +const FAKE_MODEL: Model = { + id: 'gpt-4', + provider: 'openai', + api: 'openai-responses', + display_name: 'GPT-4', + context_window: 200_000, + max_output_tokens: 16_000, +}; + function stubPorts(overrides: Partial = {}): ProvisioningPorts { return { loadRunRequest: vi.fn(async () => ({ @@ -14,6 +26,7 @@ function stubPorts(overrides: Partial = {}): ProvisioningPort function_schemas: [], })), saveRunRequest: vi.fn(async () => {}), + resolveModel: vi.fn(async () => null), ...overrides, }; } @@ -57,10 +70,42 @@ describe('processProvisioning', () => { expect(outcome.runRequest.system_prompt).toBe('custom override'); }); + + it('resolves the model once against the DECIDED provider and returns it', async () => { + const resolveModel = vi.fn(async () => FAKE_MODEL); + const ports = stubPorts({ + loadRunRequest: vi.fn(async () => ({ + provider: '', + model: 'gpt-4', + mode: null, + system_prompt: '', + function_schemas: [], + })), + resolveModel, + }); + const rec = { ...newRecord('s1'), state: 'provisioning' as const }; + + const outcome = await processProvisioning(ports, rec); + + expect(resolveModel).toHaveBeenCalledTimes(1); + expect(resolveModel).toHaveBeenCalledWith('openai', 'gpt-4'); + expect(outcome.model_meta).toEqual(FAKE_MODEL); + }); + + it('does not resolve when there is no model id', async () => { + const resolveModel = vi.fn(async () => FAKE_MODEL); + const ports = stubPorts({ resolveModel }); + const rec = { ...newRecord('s1'), state: 'provisioning' as const }; + + const outcome = await processProvisioning(ports, rec); + + expect(resolveModel).not.toHaveBeenCalled(); + expect(outcome.model_meta).toBeNull(); + }); }); describe('applyProvisioningOutcome', () => { - it('saves run request and transitions to assistant_streaming', async () => { + it('saves run request, persists model_meta on the record, and transitions', async () => { const saveRunRequest = vi.fn(async () => {}); const ports = stubPorts({ saveRunRequest }); const rec = { ...newRecord('s1'), state: 'provisioning' as const }; @@ -72,9 +117,31 @@ describe('applyProvisioningOutcome', () => { function_schemas: [], }; - await applyProvisioningOutcome(ports, rec, { kind: 'ready', runRequest }); + await applyProvisioningOutcome(ports, rec, { + kind: 'ready', + runRequest, + model_meta: FAKE_MODEL, + }); expect(saveRunRequest).toHaveBeenCalledWith('s1', runRequest); + expect(rec.model_meta).toEqual(FAKE_MODEL); + expect(rec.state).toBe('assistant_streaming'); + }); + + it('leaves model_meta unset when resolution returned null', async () => { + const ports = stubPorts(); + const rec = { ...newRecord('s1'), state: 'provisioning' as const }; + const runRequest = { + provider: 'openai', + model: 'gpt-4', + mode: 'agent' as const, + system_prompt: 'prompt', + function_schemas: [], + }; + + await applyProvisioningOutcome(ports, rec, { kind: 'ready', runRequest, model_meta: null }); + + expect(rec.model_meta).toBeUndefined(); expect(rec.state).toBe('assistant_streaming'); }); }); diff --git a/harness/tests/turn-orchestrator/run-abort.test.ts b/harness/tests/turn-orchestrator/run-abort.test.ts new file mode 100644 index 00000000..eb8a6e30 --- /dev/null +++ b/harness/tests/turn-orchestrator/run-abort.test.ts @@ -0,0 +1,119 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import { execute, register } from '../../src/turn-orchestrator/run-abort.js'; +import { newRecord, type TurnStateRecord } from '../../src/turn-orchestrator/state.js'; + +type TriggerCall = { function_id: string; payload: unknown }; + +/** iii whose state::get on turn_state returns the given persisted record. */ +function fakeIiiWithRecord(record: unknown): { iii: ISdk; calls: TriggerCall[] } { + const calls: TriggerCall[] = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { + calls.push({ function_id: req.function_id, payload: req.payload }); + if (req.function_id === 'state::get') return record; + return null; + }), + registerFunction: vi.fn(), + } as unknown as ISdk; + return { iii, calls }; +} + +function parkedRecord(state: TurnStateRecord['state']): TurnStateRecord { + return { + ...newRecord('s1'), + state, + awaiting_approval: [{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], + } as TurnStateRecord; +} + +function streamSets(calls: TriggerCall[]): Array<{ type?: string }> { + return calls + .filter((c) => c.function_id === 'stream::set') + .map((c) => (c.payload as { data?: { type?: string } }).data ?? {}); +} + +describe('run::abort execute', () => { + it('aborts a parked approval turn: aborted message, turn_end, cleared approvals, finishing', async () => { + const { iii, calls } = fakeIiiWithRecord(parkedRecord('function_awaiting_approval')); + + const result = await execute(iii, { session_id: 's1' }); + + expect(result).toEqual({ session_id: 's1', aborted: true, state: 'finishing' }); + + const events = streamSets(calls).map((e) => e.type); + expect(events).toContain('message_complete'); + expect(events).toContain('turn_end'); + expect(events).not.toContain('agent_end'); + + const turnStateSet = calls.find( + (c) => + c.function_id === 'state::set' && (c.payload as { scope?: string }).scope === 'turn_state', + ); + const saved = (turnStateSet?.payload as { value: TurnStateRecord }).value; + expect(saved.state).toBe('finishing'); + expect(saved.awaiting_approval).toEqual([]); + expect(saved.last_assistant?.stop_reason).toBe('aborted'); + }); + + it('aborts a streaming-state record (best-effort interrupt between steps)', async () => { + const rec = { ...newRecord('s1'), state: 'assistant_streaming' as const }; + const { iii } = fakeIiiWithRecord(rec); + + const result = await execute(iii, { session_id: 's1' }); + + expect(result.aborted).toBe(true); + expect(result.state).toBe('finishing'); + }); + + it('no-ops when no turn is running', async () => { + const { iii, calls } = fakeIiiWithRecord({ ...newRecord('s1'), state: 'stopped' }); + + const result = await execute(iii, { session_id: 's1' }); + + expect(result).toEqual({ session_id: 's1', aborted: false, state: 'stopped' }); + expect(calls.some((c) => c.function_id === 'state::set')).toBe(false); + }); + + it('no-ops when the session has no record', async () => { + const { iii } = fakeIiiWithRecord(null); + + const result = await execute(iii, { session_id: 's1' }); + + expect(result).toEqual({ session_id: 's1', aborted: false, state: null }); + }); + + it('no-ops on a turn already finishing (it will stop on its own)', async () => { + const { iii, calls } = fakeIiiWithRecord({ ...newRecord('s1'), state: 'finishing' }); + + const result = await execute(iii, { session_id: 's1' }); + + expect(result).toEqual({ session_id: 's1', aborted: false, state: 'finishing' }); + expect(calls.some((c) => c.function_id === 'state::set')).toBe(false); + }); + + it('fails closed when the record read fails', async () => { + const iii = { + trigger: vi.fn(async (req: { function_id: string }) => { + if (req.function_id === 'state::get') throw new Error('state unavailable'); + return null; + }), + registerFunction: vi.fn(), + } as unknown as ISdk; + + await expect(execute(iii, { session_id: 's1' })).rejects.toThrow(/state unavailable/); + }); +}); + +describe('register', () => { + it('registers run::abort', () => { + const registered = new Map(); + const iii = { + registerFunction: (id: string, handler: unknown) => registered.set(id, handler), + } as unknown as ISdk; + + register(iii); + + expect(registered.has('run::abort')).toBe(true); + }); +}); diff --git a/harness/tests/turn-orchestrator/run-start.test.ts b/harness/tests/turn-orchestrator/run-start.test.ts index 5bddea0e..d1f7b4c7 100644 --- a/harness/tests/turn-orchestrator/run-start.test.ts +++ b/harness/tests/turn-orchestrator/run-start.test.ts @@ -1,6 +1,10 @@ import { describe, expect, it, vi } from 'vitest'; import { TriggerAction, type ISdk } from '../../src/runtime/iii.js'; -import { execute, register } from '../../src/turn-orchestrator/run-start.js'; +import { + STALE_TURN_TAKEOVER_MS, + execute, + register, +} from '../../src/turn-orchestrator/run-start.js'; import { RunStartPayloadSchema } from '../../src/turn-orchestrator/schemas.js'; import { TURN_STEP_QUEUE } from '../../src/turn-orchestrator/state-runtime/store.js'; @@ -22,7 +26,6 @@ function fakeIii(): { iii: ISdk; calls: TriggerCall[] } { return { iii, calls }; } -/** Shape console/web sends inside harness::trigger payload (real.ts). */ const consoleRunStartPayload = { session_id: 'sess-1', message_id: 'msg-1', @@ -38,7 +41,6 @@ const consoleRunStartPayload = { ], }; -/** Minimal shape harness/trigger.test.ts forwards to run::start. */ const harnessRunStartPayload = { session_id: 'sess-1', provider: 'anthropic', @@ -121,7 +123,7 @@ describe('register', () => { expect(handler).toBeDefined(); const result = await handler!(harnessRunStartPayload); - expect(result).toEqual({ session_id: 'sess-1' }); + expect(result).toEqual({ session_id: 'sess-1', started: true }); }); it('rejects invalid payloads at register boundary', async () => { @@ -147,7 +149,7 @@ describe('execute', () => { const result = await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); - expect(result).toEqual({ session_id: 'sess-1' }); + expect(result).toEqual({ session_id: 'sess-1', started: true }); const turnStateSet = calls.find( (c) => @@ -171,15 +173,104 @@ describe('execute', () => { await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); - // Single ensure per run — later loadMessages/appendMessages no longer re-ensure. const ensureCalls = calls.filter((c) => c.function_id === 'session-tree::ensure'); expect(ensureCalls).toHaveLength(1); expect(ensureCalls[0]?.payload).toEqual({ session_id: 'sess-1' }); - // The single ensure must precede the run's first tree write (append). const ensureIdx = calls.findIndex((c) => c.function_id === 'session-tree::ensure'); - const firstAppendIdx = calls.findIndex((c) => c.function_id === 'session-tree::append'); + const firstAppendIdx = calls.findIndex((c) => c.function_id === 'session-tree::append_batch'); expect(ensureIdx).toBeGreaterThanOrEqual(0); expect(firstAppendIdx).toBeGreaterThan(ensureIdx); }); + + function fakeIiiWithRecord(record: unknown): { iii: ISdk; calls: TriggerCall[] } { + const calls: TriggerCall[] = []; + const iii = { + trigger: async (req: { function_id: string; payload: T }): Promise => { + calls.push({ function_id: req.function_id, payload: req.payload }); + if (req.function_id === 'state::get') return record as R; + return null as R; + }, + registerFunction: vi.fn(), + } as unknown as ISdk; + return { iii, calls }; + } + + const inFlightRecord = (state: string, updated_at_ms = Date.now()) => ({ + session_id: 'sess-1', + state, + turn_count: 1, + function_results: [], + turn_end_emitted: false, + started_at_ms: 1, + updated_at_ms, + }); + + it('refuses to clobber a turn already in flight and makes no writes', async () => { + const { iii, calls } = fakeIiiWithRecord(inFlightRecord('assistant_streaming')); + + const result = await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); + + expect(result).toEqual({ session_id: 'sess-1', started: false, reason: 'session_busy' }); + expect(calls.some((c) => c.function_id === 'session-tree::ensure')).toBe(false); + expect(calls.some((c) => c.function_id === 'session-tree::append_batch')).toBe(false); + expect(calls.some((c) => c.function_id === 'state::set')).toBe(false); + }); + + it('treats a parked function_awaiting_approval turn as in flight', async () => { + const { iii } = fakeIiiWithRecord(inFlightRecord('function_awaiting_approval')); + + const result = await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); + + expect(result.started).toBe(false); + }); + + it('starts a fresh turn when the prior turn is terminal (stopped)', async () => { + const { iii, calls } = fakeIiiWithRecord({ + ...inFlightRecord('stopped'), + turn_end_emitted: true, + }); + + const result = await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); + + expect(result).toEqual({ session_id: 'sess-1', started: true }); + expect(calls.some((c) => c.function_id === 'session-tree::append_batch')).toBe(true); + }); + + it('takes over an in-flight record idle past the stale window (wedge recovery)', async () => { + const stale = Date.now() - STALE_TURN_TAKEOVER_MS - 1_000; + const { iii, calls } = fakeIiiWithRecord(inFlightRecord('finishing', stale)); + + const result = await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); + + expect(result).toEqual({ session_id: 'sess-1', started: true }); + expect(calls.some((c) => c.function_id === 'session-tree::append_batch')).toBe(true); + }); + + it('never stale-takes-over a parked approval (user-paced; abort is its escape)', async () => { + const stale = Date.now() - STALE_TURN_TAKEOVER_MS - 1_000; + const { iii } = fakeIiiWithRecord(inFlightRecord('function_awaiting_approval', stale)); + + const result = await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); + + expect(result.started).toBe(false); + }); + + it('fails closed when the guard read fails (no clobber on a state blip)', async () => { + const iii = { + trigger: vi.fn(async (req: { function_id: string }) => { + if (req.function_id === 'state::get') throw new Error('state worker unavailable'); + return null; + }), + registerFunction: vi.fn(), + } as unknown as ISdk; + + await expect(execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload))).rejects.toThrow( + /state worker unavailable/, + ); + const calls = (iii.trigger as ReturnType).mock.calls as Array< + [{ function_id: string }] + >; + expect(calls.some(([req]) => req.function_id === 'state::set')).toBe(false); + }); }); diff --git a/harness/tests/turn-orchestrator/run-transition.test.ts b/harness/tests/turn-orchestrator/run-transition.test.ts index 0ec81b9f..2a1022d8 100644 --- a/harness/tests/turn-orchestrator/run-transition.test.ts +++ b/harness/tests/turn-orchestrator/run-transition.test.ts @@ -82,7 +82,7 @@ describe('runTransition', () => { }); it('routes an unexpected handler throw to failed without re-throwing', async () => { - const rec: TurnStateRecord = { ...newRecord('s1'), state: 'steering_check' }; + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const store = installMockTurnStore({ loadRecord: vi.fn(async () => rec), }); @@ -90,7 +90,9 @@ describe('runTransition', () => { throw new Error('boom'); }); - const result = await runTransition({} as ISdk, 'steering_check', handle, { session_id: 's1' }); + const result = await runTransition({} as ISdk, 'assistant_streaming', handle, { + session_id: 's1', + }); expect(result).toMatchObject({ ok: true, to_state: 'failed' }); expect(store.saveRecord).toHaveBeenCalled(); }); diff --git a/harness/tests/turn-orchestrator/state.test.ts b/harness/tests/turn-orchestrator/state.test.ts index 6d341a46..f53a4966 100644 --- a/harness/tests/turn-orchestrator/state.test.ts +++ b/harness/tests/turn-orchestrator/state.test.ts @@ -3,7 +3,6 @@ import { TurnStateInvariantError } from '../../src/turn-orchestrator/errors.js'; import { parseAssistantStreamingRecord, parseFunctionBatchRecord, - parseSteeringCheckRecord, } from '../../src/turn-orchestrator/schemas.js'; import { type TurnStateRecord, @@ -85,18 +84,3 @@ describe('parseAssistantStreamingRecord', () => { expect(() => parseAssistantStreamingRecord(rec)).toThrow(TurnStateInvariantError); }); }); - -describe('parseSteeringCheckRecord', () => { - it('returns a validated record for steering_check', () => { - const rec = newRecord('s1'); - rec.state = 'steering_check'; - const steering = parseSteeringCheckRecord(rec); - expect(steering.state).toBe('steering_check'); - expect(steering.function_results).toEqual([]); - }); - - it('throws TurnStateInvariantError when session_id is missing', () => { - const rec = { state: 'steering_check' } as TurnStateRecord; - expect(() => parseSteeringCheckRecord(rec)).toThrow(TurnStateInvariantError); - }); -}); diff --git a/harness/tests/turn-orchestrator/steering-check-layer.test.ts b/harness/tests/turn-orchestrator/steering-check-layer.test.ts deleted file mode 100644 index ad6f567a..00000000 --- a/harness/tests/turn-orchestrator/steering-check-layer.test.ts +++ /dev/null @@ -1,116 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import type { AgentMessage } from '../../src/types/agent-message.js'; -import { - applySteeringCheckOutcome, - processSteeringCheck, - route, -} from '../../src/turn-orchestrator/steering-check/run.js'; -import type { SteeringCheckPorts } from '../../src/turn-orchestrator/steering-check/ports.js'; -import { newRecord } from '../../src/turn-orchestrator/state.js'; - -function stubPorts(overrides: Partial = {}): SteeringCheckPorts { - return { - loadMessages: vi.fn(async () => []), - appendMessages: vi.fn(async () => {}), - checkpoint: vi.fn(async () => {}), - loadRunRequest: vi.fn(async () => ({ - provider: 'openai', - model: 'gpt-4', - mode: null, - system_prompt: '', - function_schemas: [], - })), - saveRunRequest: vi.fn(async () => {}), - emitTurnEnd: vi.fn(async () => {}), - finishSession: vi.fn(async (rec) => { - rec.state = 'stopped'; - }), - emit: vi.fn(async () => {}), - ...overrides, - }; -} - -describe('route', () => { - it.each([ - [true, 'continue_after_function'], - [false, 'end_turn'], - ] as const)('route(%s) -> %s', (has_function_results, expected) => { - expect(route(has_function_results)).toBe(expected); - }); -}); - -describe('processSteeringCheck', () => { - it('returns continue_after_function when function_results present', async () => { - const ports = stubPorts(); - const rec = { - ...newRecord('s1'), - state: 'steering_check' as const, - function_results: [{ role: 'function_result', content: [] }] as never, - }; - - const outcome = await processSteeringCheck(ports, rec); - - expect(outcome).toEqual({ kind: 'continue_after_function' }); - }); - - it('returns max_turns_reached when cap hit on continue path', async () => { - const ports = stubPorts(); - const rec = { - ...newRecord('s1'), - state: 'steering_check' as const, - max_turns: 2, - turn_count: 2, - function_results: [{ role: 'function_result', content: [] }] as never, - }; - - const outcome = await processSteeringCheck(ports, rec); - - expect(outcome).toEqual({ kind: 'max_turns_reached' }); - }); - - it('returns end_turn when no function results', async () => { - const ports = stubPorts(); - const rec = { ...newRecord('s1'), state: 'steering_check' as const }; - - const outcome = await processSteeringCheck(ports, rec); - - expect(outcome).toEqual({ kind: 'end_turn' }); - }); -}); - -describe('applySteeringCheckOutcome', () => { - it('continue_after_function: transitions without reloading messages', async () => { - const loadMessages = vi.fn(async () => []); - const emitTurnEnd = vi.fn(async () => {}); - const ports = stubPorts({ loadMessages, emitTurnEnd }); - const rec = { - ...newRecord('s1'), - state: 'steering_check' as const, - function_results: [{ role: 'function_result', content: [] }] as never, - turn_end_emitted: true, - }; - - await applySteeringCheckOutcome(ports, rec, { kind: 'continue_after_function' }); - - expect(rec.state).toBe('assistant_streaming'); - expect(rec.function_results).toEqual([]); - expect(loadMessages).not.toHaveBeenCalled(); - expect(emitTurnEnd).not.toHaveBeenCalled(); - }); - - it('end_turn: emits turn_end and finishes session', async () => { - const emitTurnEnd = vi.fn(async () => {}); - const finishSession = vi.fn(async (rec) => { - rec.state = 'stopped'; - }); - const ports = stubPorts({ emitTurnEnd, finishSession }); - const rec = { ...newRecord('s1'), state: 'steering_check' as const }; - - await applySteeringCheckOutcome(ports, rec, { kind: 'end_turn' }); - - expect(rec.state).toBe('stopped'); - expect(rec.turn_end_emitted).toBe(true); - expect(emitTurnEnd).toHaveBeenCalledWith('s1', expect.anything(), []); - expect(finishSession).toHaveBeenCalled(); - }); -}); diff --git a/harness/tests/turn-orchestrator/steering.test.ts b/harness/tests/turn-orchestrator/steering.test.ts deleted file mode 100644 index a5b7603e..00000000 --- a/harness/tests/turn-orchestrator/steering.test.ts +++ /dev/null @@ -1,119 +0,0 @@ -import { afterEach, describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import * as events from '../../src/turn-orchestrator/events.js'; -import { installMockTurnStore } from './_helpers/mockTurnStore.js'; -import { newRecord, type TurnStateRecord } from '../../src/turn-orchestrator/state.js'; -import { handleSteering } from '../../src/turn-orchestrator/steering-check/process.js'; - -afterEach(() => { - vi.restoreAllMocks(); -}); - -function makeIii() { - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { - if (req.function_id === 'state::get') return null; - if (req.function_id === 'state::update') return { old_value: 0 }; - if (req.function_id === 'stream::set') return null; - return null; - }), - } as unknown as ISdk; - - return { iii }; -} - -function steeringRec( - session_id: string, - overrides: Partial = {}, -): TurnStateRecord { - const rec = newRecord(session_id); - rec.state = 'steering_check'; - return { ...rec, ...overrides }; -} - -describe('handleSteering', () => { - it('continue_after_function: clears function_results without reloading messages', async () => { - const { iii } = makeIii(); - const rec = steeringRec('s1', { - function_results: [{ role: 'function_result', content: [] }] as never, - turn_end_emitted: true, - }); - const store = installMockTurnStore(); - const emitSpy = vi.spyOn(events, 'emit'); - - await handleSteering(iii, rec); - - expect(rec.state).toBe('assistant_streaming'); - expect(rec.function_results).toEqual([]); - expect(store.loadMessages).not.toHaveBeenCalled(); - expect(emitSpy).not.toHaveBeenCalled(); - }); - - it('end_turn: emits turn_end then finishes the session (agent_end + stopped)', async () => { - const { iii } = makeIii(); - const rec = steeringRec('s1'); - const store = installMockTurnStore(); - const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); - - await handleSteering(iii, rec); - - expect(rec.state).toBe('stopped'); - expect(rec.turn_end_emitted).toBe(true); - expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'turn_end' })); - expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'agent_end' })); - // agent_end is a signal: finishSession no longer reloads the transcript. - expect(store.loadMessages).not.toHaveBeenCalled(); - }); - - it('caps at max_turns: emits a max_turns assistant + message_complete + turn_end and tears down instead of continuing', async () => { - const { iii } = makeIii(); - const rec = steeringRec('s1', { - max_turns: 2, - turn_count: 2, - function_results: [{ role: 'function_result', content: [] }] as never, - }); - const store = installMockTurnStore(); - const appendSpy = store.appendMessages; - const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); - - await handleSteering(iii, rec); - - expect(rec.state).toBe('stopped'); - expect(rec.turn_end_emitted).toBe(true); - expect(rec.last_assistant?.content[0]).toEqual( - expect.objectContaining({ type: 'text', text: expect.stringContaining('max_turns') }), - ); - expect(emitSpy).toHaveBeenCalledWith( - iii, - 's1', - expect.objectContaining({ type: 'message_complete' }), - ); - expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'turn_end' })); - // max_turns teardown appends the synthetic notice and finishes without a - // transcript reload (agent_end is a signal). - expect(store.loadMessages).not.toHaveBeenCalled(); - expect(appendSpy).toHaveBeenCalledWith('s1', [ - expect.objectContaining({ - content: expect.arrayContaining([ - expect.objectContaining({ text: expect.stringContaining('max_turns') }), - ]), - }), - ]); - }); - - it('continues to assistant_streaming when under max_turns (continue_after_function route)', async () => { - const { iii } = makeIii(); - const rec = steeringRec('s1', { - max_turns: 5, - turn_count: 2, - function_results: [{ role: 'function_result', content: [] }] as never, - }); - installMockTurnStore(); - vi.spyOn(events, 'emit').mockResolvedValue(undefined); - - await handleSteering(iii, rec); - - expect(rec.state).toBe('assistant_streaming'); - expect(rec.function_results).toEqual([]); - }); -}); diff --git a/harness/tests/turn-orchestrator/store.test.ts b/harness/tests/turn-orchestrator/store.test.ts index 5c5f9bd1..ece8f36b 100644 --- a/harness/tests/turn-orchestrator/store.test.ts +++ b/harness/tests/turn-orchestrator/store.test.ts @@ -79,7 +79,6 @@ describe('saveRecord no-op suppression', () => { const { iii, emits } = fakeIii(); const store = createTurnStore(iii); const rec = newRecord('sess-a'); - // Same view; differs only in updated_at_ms (which toView drops). const previous = { ...rec, updated_at_ms: rec.updated_at_ms + 1000 }; await store.saveRecord(rec, previous); @@ -139,10 +138,29 @@ describe('session-tree call reduction', () => { timestamp: 1, }; await createTurnStore(iii).appendMessages('sess-a', [msg]); - expect(calls.filter((c) => c === 'session-tree::append')).toHaveLength(1); + expect(calls.filter((c) => c === 'session-tree::append_batch')).toHaveLength(1); + expect(calls).not.toContain('session-tree::append'); expect(calls).not.toContain('session-tree::ensure'); }); + it('appendMessages batches a multi-message call into one trigger', async () => { + const { iii, calls } = recordingIii(); + const mk = (text: string) => ({ + role: 'user' as const, + content: [{ type: 'text' as const, text }], + timestamp: 1, + }); + await createTurnStore(iii).appendMessages('sess-a', [mk('a'), mk('b'), mk('c')]); + expect(calls.filter((c) => c === 'session-tree::append_batch')).toHaveLength(1); + }); + + it('appendMessages on an empty list triggers nothing', async () => { + const { iii, calls } = recordingIii(); + await createTurnStore(iii).appendMessages('sess-a', []); + expect(calls).not.toContain('session-tree::append_batch'); + expect(calls).not.toContain('session-tree::append'); + }); + it('ensureSession is the sole trigger of session-tree::ensure', async () => { const { iii, calls } = recordingIii(); await createTurnStore(iii).ensureSession('sess-a'); @@ -161,7 +179,7 @@ describe('shouldWakeStep', () => { }); it('rejects terminal state (stopped)', () => { - expect(shouldWakeStep('steering_check', 'stopped')).toBe(false); + expect(shouldWakeStep('function_execute', 'stopped')).toBe(false); }); it('rejects function_awaiting_approval (orchestrator parks here)', () => { diff --git a/harness/tests/types/provider.test.ts b/harness/tests/types/provider.test.ts index 29c5d572..24907203 100644 --- a/harness/tests/types/provider.test.ts +++ b/harness/tests/types/provider.test.ts @@ -30,6 +30,26 @@ describe('ProviderStreamInputSchema', () => { expect(JSON.stringify(schema)).toContain('writer_ref'); expect(JSON.stringify(schema)).toContain('model'); }); + + it('passes a sparse model_meta through instead of failing the stream', () => { + const sparse = ProviderStreamInputSchema.parse({ + writer_ref: { channel_id: 'c', access_key: 'k', direction: 'write' }, + model: 'm', + messages: [], + model_meta: { id: 'm', provider: 'anthropic' }, + }); + expect(sparse.model_meta).toEqual({ id: 'm', provider: 'anthropic' }); + }); + + it('coerces a non-object model_meta to absent', () => { + const parsed = ProviderStreamInputSchema.parse({ + writer_ref: { channel_id: 'c', access_key: 'k', direction: 'write' }, + model: 'm', + messages: [], + model_meta: 'garbage', + }); + expect(parsed.model_meta).toBeUndefined(); + }); }); describe('ProviderStreamOutputSchema', () => {