diff --git a/harness/src/turn-orchestrator/agent-trigger.ts b/harness/src/turn-orchestrator/agent-trigger.ts index 294ffd56..5ebe7555 100644 --- a/harness/src/turn-orchestrator/agent-trigger.ts +++ b/harness/src/turn-orchestrator/agent-trigger.ts @@ -24,10 +24,37 @@ export function missingFunctionResult(): FunctionResult { }); } +/** + * Some providers (and smaller models) emit a function-call argument that is a + * JSON-*encoded string* rather than a JSON object — e.g. `payload` arrives as + * `'{"sandbox_id":"…","cmd":"bash"}'` instead of `{ sandbox_id: …, cmd: … }`. + * Forwarded verbatim, that string fails server-side deserialization with an + * opaque `serialization error: missing field ` (observed against + * `sandbox::exec`, which then surfaced as a misleading `gate_unavailable`). + * + * Coerce: if `value` is a string whose trimmed form starts with `{`/`[` and + * parses to an object/array, return the parsed value. A non-JSON string (or a + * string that parses to a scalar) is left untouched so genuine string payloads + * pass through unharmed. + */ +export function coercePayload(value: unknown): unknown { + if (typeof value !== 'string') return value; + const trimmed = value.trim(); + if (!trimmed.startsWith('{') && !trimmed.startsWith('[')) return value; + try { + const parsed = JSON.parse(trimmed); + if (parsed !== null && typeof parsed === 'object') return parsed; + } catch { + // Not valid JSON — leave the original string in place. + } + return value; +} + export function unwrapAgentTrigger(fc: FunctionCall): FunctionCall { - const args = (fc.arguments ?? {}) as Record; + const coerced = coercePayload(fc.arguments ?? {}); + const args = (coerced && typeof coerced === 'object' ? coerced : {}) as Record; const fn = typeof args.function === 'string' ? args.function : ''; - const payload = args.payload ?? {}; + const payload = coercePayload(args.payload ?? {}); return { id: fc.id, function_id: fn, arguments: payload }; } @@ -35,7 +62,7 @@ export function agentTriggerTool(): unknown { return { name: TOOL_NAME, description: - 'Call any iii function on the bus. The argument `function` is the function id (use `::` separators, e.g. `shell::fs::ls`). The argument `payload` is the function-specific JSON arguments. Skills loaded into your context tell you which functions exist and what arguments they take. The result is whatever that function returns.', + 'Call any iii function on the bus. The argument `function` is the function id (use `::` separators, e.g. `shell::fs::ls`). The argument `payload` is the function-specific arguments as a JSON object (an object literal — never a JSON-encoded string; do not stringify it). Skills loaded into your context tell you which functions exist and what arguments they take. The result is whatever that function returns.', parameters: { type: 'object', properties: { @@ -100,10 +127,46 @@ function isFunctionNotFound(err: unknown): boolean { return false; } +/** + * Extract the first complete, brace-balanced JSON object substring from `text`, + * or `null` if there is none. Used to recover a structured error envelope that + * the wire wraps behind a human prefix, e.g. + * `invocation_failed: handler error: {"code":"S211","fix":{...},...}`. + * + * The scan is string-aware (braces inside JSON string values, and `\"` / + * `\\` escapes, do not affect depth) so nested objects like `"fix":{...}` and + * messages containing literal `{`/`}` are handled, and any trailing text after + * the closing brace is ignored. Returns `null` on an unbalanced/truncated + * object. + */ +export function extractFirstJsonObject(text: string): string | null { + const start = text.indexOf('{'); + if (start === -1) return null; + let depth = 0; + let inString = false; + let escaped = false; + for (let i = start; i < text.length; i++) { + const ch = text[i]; + if (inString) { + if (escaped) escaped = false; + else if (ch === '\\') escaped = true; + else if (ch === '"') inString = false; + continue; + } + if (ch === '"') inString = true; + else if (ch === '{') depth++; + else if (ch === '}') { + depth--; + if (depth === 0) return text.slice(start, i + 1); + } + } + return null; +} + /** * If `err` is an {@link IIIInvocationError} whose `.message` carries a * structured wire payload like - * `{"code":"S210","type":"...","message":"...","docs_url":"...","retryable":bool,...}`, + * `{"code":"S210","type":"...","message":"...","docs_url":"...","fix":{...},"retryable":bool,...}`, * return that payload. Otherwise return `null`. * * Several workers (notably `iii-worker`'s `sandbox::*`) serialize their @@ -113,26 +176,69 @@ function isFunctionNotFound(err: unknown): boolean { * S-code, the docs URL, the `fix` hint, and any structured retry info from * the calling agent. * + * The wire message is not always pure JSON: the engine prepends a human + * prefix such as `invocation_failed: handler error: ` before the `{`, so a + * naive `JSON.parse` of the whole message fails and the fix hint stays buried. + * We therefore try a direct parse first (fast path for pure-JSON messages), + * then fall back to extracting the first brace-balanced JSON object embedded in + * the message. + * * Only payloads with both a `code` string and a `message` string are * accepted, so generic JSON values (numbers, plain strings, partial * envelopes) still fall through to the `gate_unavailable` path. */ function extractStructuredHandlerError(err: unknown): Record | null { if (!(err instanceof IIIInvocationError)) return null; - // `IIIInvocationError`'s Error.message is `"${code}: ${rawMessage}"`. The + // `IIIInvocationError`'s Error.message may be `"${code}: ${rawMessage}"`. The // raw handler-emitted message lives after that prefix. const prefix = `${err.code}: `; const raw = err.message.startsWith(prefix) ? err.message.slice(prefix.length) : err.message; - let parsed: unknown; - try { - parsed = JSON.parse(raw); - } catch { - return null; + + const candidates: string[] = [raw]; + // Fallback: the structured envelope is embedded after a human prefix + // (e.g. `... handler error: {json}`). Pull the first balanced JSON object. + const embedded = extractFirstJsonObject(raw); + if (embedded && embedded !== raw) candidates.push(embedded); + + for (const candidate of candidates) { + let parsed: unknown; + try { + parsed = JSON.parse(candidate); + } catch { + continue; + } + if (!parsed || typeof parsed !== 'object') continue; + const obj = parsed as Record; + if (typeof obj.code === 'string' && typeof obj.message === 'string') return obj; + } + return null; +} + +/** Best-effort message string from an arbitrary thrown value. */ +function errorMessage(err: unknown): string { + if (err instanceof Error) return err.message; + if (typeof err === 'string') return err; + if (err && typeof err === 'object') { + const msg = (err as Record).message; + if (typeof msg === 'string') return msg; } - if (!parsed || typeof parsed !== 'object') return null; - const obj = parsed as Record; - if (typeof obj.code !== 'string' || typeof obj.message !== 'string') return null; - return obj; + return ''; +} + +/** + * True when `err` is a payload-decoding failure on the worker/engine side: the + * arguments could not be deserialized into the target function's request type. + * These are CALLER errors (wrong shape, missing required field, double-encoded + * JSON, base64 in the wrong field), not infrastructure failures, so they must + * not be reported as `gate_unavailable` — that hid the real problem and sent + * agents debugging the bus instead of their own arguments. + */ +export function isArgumentDecodeError(err: unknown): boolean { + const msg = errorMessage(err); + if (!msg) return false; + return /serialization error|missing field|invalid type|unknown field|did not match any variant|expected .*(?:found|but)/i.test( + msg, + ); } export function functionNotFoundHint(badFunctionId: string): string { @@ -154,6 +260,35 @@ export function functionNotFoundHint(badFunctionId: string): string { return suggestion ? `Did you mean \`${suggestion}\`? ${generic}` : generic; } +/** + * Best-effort fetch of a function's contract (request/response schema, + * description, owning worker) from the engine, used to enrich an + * `invalid_arguments` error so the caller's NEXT attempt is correctly shaped + * without having to call `engine::functions::info` itself. + * + * Deterministic and bounded: one trigger, no retries. Returns `null` on any + * failure (so the caller falls back to the plain error) and never fetches the + * contract OF the info call itself — that is pointless and risks a loop. + * Prefers the structured `details` payload of the info response; falls back to + * the raw value. + */ +export async function fetchContract(iii: ISdk, function_id: string): Promise { + if (!function_id || function_id === 'engine::functions::info') return null; + try { + const value = await iii.trigger({ + function_id: 'engine::functions::info', + payload: { function_id }, + }); + if (value && typeof value === 'object') { + const details = (value as Record).details; + if (details !== undefined && details !== null) return details; + } + return value ?? null; + } catch { + return null; + } +} + /** Trigger a function call and normalize success/error into a FunctionResult. */ export async function triggerFunctionCall( iii: ISdk, @@ -182,15 +317,47 @@ export async function triggerFunctionCall( if (structured) { return errorResult({ error: 'handler_error', ...structured }); } + // Payload-decode failures are caller errors, not infra outages: classify + // them as `invalid_arguments` with an actionable hint instead of burying + // them in `gate_unavailable`, which sent agents debugging the bus. + if (isArgumentDecodeError(err)) { + // Auto-attach the target's contract so the model's NEXT attempt is + // correctly shaped without spending a turn on engine::functions::info. + // Best-effort: `contract` is omitted if the fetch fails. + const contract = await fetchContract(iii, function_call.function_id); + const baseMessage = + "`payload` could not be decoded into this function's arguments. Pass `payload` as a JSON OBJECT (an object literal — NOT a JSON-encoded string) whose fields match the schema: every required field, correct value types, and no field the schema does not define."; + return errorResult({ + error: 'invalid_arguments', + function: function_call.function_id, + message: contract + ? `${baseMessage} The function's contract is included below as \`contract\` — build your payload from it and retry once.` + : `${baseMessage} Fetch the exact shape via engine::functions::info.`, + detail: errorMessage(err) || String(err), + ...(contract ? { contract } : {}), + }); + } return denialResult( gateUnavailableEnvelope(function_call.function_id, `trigger_failed: ${String(err)}`), ); } } +/** + * `function_call` carries the routing envelope (session id + call identity) that + * the approval hook and policy worker read. `targetCall` is what actually + * reaches the target function. They differ on purpose: the envelope spreads + * routing fields (including `function_id` = the TARGET id) at the top level, + * which would otherwise CLOBBER a same-named argument in the caller's payload — + * e.g. `engine::functions::info { function_id: "sandbox::create" }` would arrive + * as `function_id: "engine::functions::info"` and the engine would describe + * itself. The hook needs the envelope; the target must get the caller's + * untouched arguments. Defaults to `function_call` for callers that don't split. + */ export async function dispatchWithHook( iii: ISdk, function_call: FunctionCall, + targetCall: FunctionCall = function_call, ): Promise { const outcome = await consultBefore(iii, function_call); if (outcome.kind === 'deny') { @@ -200,7 +367,7 @@ export async function dispatchWithHook( return { kind: 'pending' }; } - const result = await triggerFunctionCall(iii, function_call); + const result = await triggerFunctionCall(iii, targetCall); return { kind: 'result', result }; } diff --git a/harness/src/turn-orchestrator/config.ts b/harness/src/turn-orchestrator/config.ts index d066bee9..d9528ad8 100644 --- a/harness/src/turn-orchestrator/config.ts +++ b/harness/src/turn-orchestrator/config.ts @@ -6,8 +6,6 @@ export type TurnOrchestratorConfig = { export function loadOrchestratorConfig(cfg: Record): TurnOrchestratorConfig { return { - system_default_skills: getStringArray(cfg, 'system_default_skills', [ - 'iii://iii-directory/index', - ]), + system_default_skills: getStringArray(cfg, 'system_default_skills', []), }; } diff --git a/harness/src/turn-orchestrator/function-execute/ports.ts b/harness/src/turn-orchestrator/function-execute/ports.ts index 28dde52f..7b70b20a 100644 --- a/harness/src/turn-orchestrator/function-execute/ports.ts +++ b/harness/src/turn-orchestrator/function-execute/ports.ts @@ -84,7 +84,10 @@ export function createPorts(iii: ISdk): FunctionExecutePorts { }, async dispatch(call, session_id) { - return dispatchWithHook(iii, withRoutingEnvelope(call, session_id)); + // Hook/policy see the routing envelope; the target function gets the + // caller's untouched args so a payload `function_id` (e.g. for + // engine::functions::info) is never clobbered by the routing `function_id`. + return dispatchWithHook(iii, withRoutingEnvelope(call, session_id), call); }, async triggerPreApproved(call) { diff --git a/harness/src/turn-orchestrator/system-prompt.ts b/harness/src/turn-orchestrator/system-prompt.ts index d0b4f48e..bce891d6 100644 --- a/harness/src/turn-orchestrator/system-prompt.ts +++ b/harness/src/turn-orchestrator/system-prompt.ts @@ -28,24 +28,106 @@ function isMode(value: unknown): value is Mode { const IDENTITY_PREAMBLE = `You are an iii agent worker. -To do anything, call \`agent_trigger\` with \`{ function, payload }\`. Function -names are namespaced (e.g., \`directory::skills::get\`); never -guess them — discover via the iii skill below. - -The skills that follow this preamble are your starting context. To load -more skills on demand, call \`directory::skills::get\` with the -skill id (the path after \`iii://\`). If iii-directory is unreachable, you -can list installed functions directly via \`engine::functions::list\`. - -Before calling a function for the FIRST time in this conversation, fetch -its per-function skill body with \`directory::skills::get\` using the id -\`/\` (e.g. \`sandbox/exec\`, not just \`sandbox\`). The -worker index lists what exists; the per-function skill lists the exact -request shape — required fields, value formats (single binary vs argv -array, base64-encoded bytes, "K=V" entries), and error codes. Guessing -field names from the index burns turns on retries and can put workers -into degraded states. Cache: a skill you already fetched this turn -doesn't need to be refetched. +You act ONLY by calling \`agent_trigger\` with \`{ function, payload }\`: +\`function\` is a namespaced id (always uses \`::\`, e.g. \`directory::skills::get\`); +\`payload\` is a JSON OBJECT of that function's arguments — an object literal, NEVER +a JSON-encoded string. NEVER invent function ids or argument names from memory — +discover them from the live engine (below). + +The skills that follow this preamble are your starting context. Discover +everything else from the live engine (the iii instance) and trust it over +memory or this prompt. There are THREE sources — use them in this priority: + +1. THE ENGINE (the iii instance) — your DEFAULT for everything about workers, + triggers, and functions, including their exact API contracts. ALWAYS look + here first: + - \`engine::functions::list\` — browse functions across all workers; takes NO id, optional filters \`{ prefix }\` / \`{ search }\`. Use this to find a function id. + - \`engine::functions::info { function_id: "::" }\` — ONE function's request / response schema, description, and owning worker. THIS IS THE API CONTRACT. The \`function_id\` argument is REQUIRED: pass the concrete id of the ONE TARGET function you intend to call (e.g. \`{ function_id: "shell::fs::ls" }\`). Pass the id of the function you actually want to call — NOT \`engine::functions::info\` itself. Passing \`engine::functions::info\` (or any \`engine::*\` / \`worker::*\` / \`directory::*\` discovery call) as the \`function_id\` just returns metadata ABOUT the info function (worker \`iii-engine-functions\`, no registered triggers) — useless, and a sign you introspected the wrong thing. Those discovery calls are already documented in this preamble; never introspect them. Omitting \`function_id\` entirely errors with \`missing field \`function_id\`\`. To DISCOVER which functions exist, use \`engine::functions::list\`, never \`info\`; only call \`info\` on a concrete TARGET id you got from \`list\`. + - \`engine::workers::list\` — every WS-connected (currently RUNNING) worker. + - \`worker::list\` — installed + running workers, incl. daemon-managed builtins. + - \`engine::triggers::list\` — every trigger TYPE published (legal \`type:\` values); \`engine::triggers::info { id }\` — that type's config / return schema + provider. + - \`engine::registered-triggers::list\` — every trigger INSTANCE already bound (filter \`function_id\` / \`worker\`). + To check a worker is RUNNING, use \`engine::workers::list\` plus \`worker::list\` — + never \`directory::registry::workers::list\` (that is the registry, source 3). To + check a function is callable, use \`engine::functions::list { search: "" }\`. + +2. THE DIRECTORY (skills) — the prose HOW-TO for a worker: the iii-native way to + use it, its lifecycle and boundaries, and what NOT to do. This is knowledge the + per-function schema does NOT carry, so LOAD IT BEFORE you build with, author, or + operate a worker — it is not a last resort. See WHAT skills exist with + \`directory::skills::index\` (a per-worker overview of every skill on this + engine; \`directory::skills::list\` gives one row per individual skill), then + read the one you need with \`directory::skills::get { id: "" }\` (id is + the worker name, the path after \`iii://\`; most workers ship ONE self-contained + skill, so \`/\` usually 404s and wastes a turn). Division of + labor: the engine (source 1) gives the exact per-call CONTRACT; the skill gives + the APPROACH. Use both — the contract alone is not enough to build correctly. + +3. THE REGISTRY — ONLY to ADD A NEW worker that is not already on the engine. + Search with \`directory::registry::workers::list { search }\` / + \`directory::registry::workers::info { name }\`, then install with \`worker::add\`. + The registry lists INSTALLABLE workers; it is NOT a signal that one is running, + and NOT where you look up the API of a worker you already have. + +When the task is to BUILD, AUTHOR, or OPERATE something on iii (a worker, an API, +a job, a runtime), do this BEFORE writing any code: identify EVERY worker the task +touches — including the one named in the task itself and the runtime it runs on — +and load each one's skill with \`directory::skills::get\`. The skill tells you the +iii-native way to do it. Do NOT carry patterns from other ecosystems in from +memory — standalone servers, package managers, framework conventions, ad-hoc +processes — without first checking the skill. iii almost always has its own way +(a trigger, a built-in worker, a lifecycle), and a foreign pattern usually does +not run here and wastes the session. If you find yourself reaching for a tool that +is not an iii function, stop and read the relevant worker's skill first. + +Two rules govern EVERY \`agent_trigger\` call. Break either and the call fails: + +RULE 1 — \`payload\` is a JSON OBJECT, never a string. Pass +\`{ "field": "value" }\`, NOT \`"{ \\"field\\": \\"value\\" }"\`. This holds even +when a field's VALUE is long or multi-line (source code, JSON, markdown, HTML): +keep \`payload\` an object literal and put that long text as the ordinary string +VALUE of one field. Do NOT serialize the whole \`payload\` into a string — that is +the single most common failure, and the worker rejects it with \`invalid_arguments\` +/ \`serialization error: invalid type: string ..., expected struct\`. + WRONG payload: "{\\"path\\":\\"/a.js\\",\\"content\\":\\"line1\\\\nline2\\"}" + RIGHT payload: { "path": "/a.js", "content": "line1\\nline2" } + +RULE 2 — BEFORE you call ANY function, fetch its API contract from the engine by +passing that function's id as \`function_id\`, e.g. +\`engine::functions::info { function_id: "shell::fs::ls" }\` (the \`function_id\` is +the TARGET function, never \`engine::functions::info\` itself; omitting it errors +\`missing field \`function_id\`\`). A one-line description from +\`engine::functions::list\` is a HINT, not the contract — \`info\` is the contract. +Then shape your \`payload\` to match that schema EXACTLY: every required field, +the right value formats (single binary vs argv array, inline string vs base64, +"K=V" entries), and NO field the schema does not define. Guessing or remembering +field names burns turns on retries and can put workers into degraded states. +Cache: a contract or skill you already fetched this turn doesn't need refetching. + +When a call returns an error, READ the error and CHANGE something before the next +call — never resend the same \`function\` + \`payload\` unchanged: + - \`invalid_arguments\` / \`serialization error\` / \`missing field\` / unknown + field → YOUR payload is wrong (string instead of object, missing a required + field, an extra field, or a wrong type). Re-read the contract via + \`engine::functions::info\`, fix the object, keep the SAME function. + - \`function_not_found\` → the id is wrong. Re-check it with + \`engine::functions::list\`; do not retry the bad id. + - a structured error carrying a \`code\` and a \`fix\` hint → apply the \`fix\` + (e.g. add the exact field it names) instead of guessing. + - a timeout or an infrastructure/transport error that REPEATS → stop retrying + the same way. The approach is wrong, not the arguments: simplify the call, + split the work into smaller steps, or report the blocker and stop. Re-issuing + a call that just timed out wastes the turn and will not start succeeding. +Resending an identical failed call is never the fix. + +To AUTHOR an iii worker, read the \`iii\` skill first. You construct exactly +ONE symbol from the SDK: \`registerWorker\`. The value it RETURNS exposes +\`registerFunction\`, \`registerTrigger\`, and \`trigger\` as METHODS — always call +them as \`iii.registerFunction(...)\`. They are NOT top-level exports: +destructuring them from the SDK import yields \`undefined\` and throws +\`TypeError: registerFunction is not a function\`. Before writing code, read the +target runtime's worker skill for its specifics (module system, how to write +files, how to start the process); do not assume them. For any HTTP(S) request — fetching a URL, calling a JSON/REST API, or downloading a file — ALWAYS use the \`web::fetch\` function via \`agent_trigger\`, diff --git a/harness/tests/turn-orchestrator/agent-trigger.test.ts b/harness/tests/turn-orchestrator/agent-trigger.test.ts index 5f76ef3c..4ba5e3fc 100644 --- a/harness/tests/turn-orchestrator/agent-trigger.test.ts +++ b/harness/tests/turn-orchestrator/agent-trigger.test.ts @@ -3,10 +3,15 @@ import { IIIInvocationError, type ISdk } from '../../src/runtime/iii.js'; import { TOOL_NAME, agentTriggerTool, + coercePayload, dispatchWithHook, + extractFirstJsonObject, + fetchContract, functionNotFoundHint, + isArgumentDecodeError, isErrorResult, triggerFunctionCall, + unwrapAgentTrigger, } from '../../src/turn-orchestrator/agent-trigger.js'; import * as hookModule from '../../src/turn-orchestrator/hook.js'; @@ -28,6 +33,118 @@ describe('agent_trigger tool schema', () => { }); }); +describe('coercePayload (H1: double-encoded JSON args)', () => { + it('parses a stringified JSON object into an object', () => { + expect(coercePayload('{"sandbox_id":"abc","cmd":"bash"}')).toEqual({ + sandbox_id: 'abc', + cmd: 'bash', + }); + }); + + it('parses a stringified JSON array', () => { + expect(coercePayload('["-c", "echo hi"]')).toEqual(['-c', 'echo hi']); + }); + + it('tolerates surrounding whitespace', () => { + expect(coercePayload(' { "a": 1 } ')).toEqual({ a: 1 }); + }); + + it('leaves a real object untouched (identity)', () => { + const obj = { a: 1 }; + expect(coercePayload(obj)).toBe(obj); + }); + + it('leaves a non-JSON string untouched', () => { + expect(coercePayload('just a string')).toBe('just a string'); + }); + + it('does not coerce a JSON scalar string (only object/array)', () => { + // "5" is valid JSON but a scalar — a string that does not start with {/[ + // is left verbatim so genuine string payloads survive. + expect(coercePayload('5')).toBe('5'); + expect(coercePayload('"quoted"')).toBe('"quoted"'); + }); + + it('leaves malformed JSON-looking strings untouched', () => { + expect(coercePayload('{not valid json')).toBe('{not valid json'); + }); +}); + +describe('unwrapAgentTrigger (H1)', () => { + it('passes an object payload through unchanged', () => { + const out = unwrapAgentTrigger({ + id: 'fc-1', + function_id: '', + arguments: { function: 'sandbox::exec', payload: { sandbox_id: 'x', cmd: 'ls' } }, + }); + expect(out.function_id).toBe('sandbox::exec'); + expect(out.arguments).toEqual({ sandbox_id: 'x', cmd: 'ls' }); + }); + + it('coerces a stringified payload (the sandbox::exec failure mode)', () => { + const out = unwrapAgentTrigger({ + id: 'fc-1', + function_id: '', + arguments: { + function: 'sandbox::exec', + payload: '{"sandbox_id":"x","cmd":"bash","args":["-c","echo hi"]}', + }, + }); + expect(out.function_id).toBe('sandbox::exec'); + expect(out.arguments).toEqual({ sandbox_id: 'x', cmd: 'bash', args: ['-c', 'echo hi'] }); + }); + + it('coerces when the entire arguments object was JSON-encoded', () => { + const out = unwrapAgentTrigger({ + id: 'fc-1', + function_id: '', + arguments: '{"function":"sandbox::list","payload":{}}', + }); + expect(out.function_id).toBe('sandbox::list'); + expect(out.arguments).toEqual({}); + }); + + it('defaults payload to {} when absent', () => { + const out = unwrapAgentTrigger({ + id: 'fc-1', + function_id: '', + arguments: { function: 'sandbox::list' }, + }); + expect(out.arguments).toEqual({}); + }); +}); + +describe('isArgumentDecodeError (H2)', () => { + it('matches serde "missing field" / "serialization error"', () => { + expect( + isArgumentDecodeError( + new Error('invocation_failed: serialization error: missing field `sandbox_id`'), + ), + ).toBe(true); + }); + + it('matches untagged-enum decode failures (content_b64 mistake)', () => { + expect( + isArgumentDecodeError( + new Error('data did not match any variant of untagged enum WriteContent'), + ), + ).toBe(true); + }); + + it('matches "invalid type" serde errors', () => { + expect(isArgumentDecodeError(new Error('invalid type: string, expected a map'))).toBe(true); + }); + + it('does not match opaque handler text', () => { + expect(isArgumentDecodeError(new Error('opaque handler text'))).toBe(false); + }); + + it('reads .message off plain error-shaped objects', () => { + expect(isArgumentDecodeError({ message: 'missing field `path`' })).toBe(true); + expect(isArgumentDecodeError({ message: 'all good' })).toBe(false); + }); +}); + describe('isErrorResult', () => { it('treats details.error as error', () => { expect( @@ -106,6 +223,87 @@ describe('triggerFunctionCall', () => { expect(result.details).not.toMatchObject({ denied_by: 'gate_unavailable' }); }); + it('classifies serde decode failures as invalid_arguments, not gate_unavailable (H2)', async () => { + // The exact failure from session rjgvxowc: a double-encoded sandbox::exec + // payload reached the daemon as a string, serde could not read the struct + // fields, and the harness mislabeled it gate_unavailable. + const triggerError = new IIIInvocationError({ + code: 'HANDLER', + message: 'invocation_failed: serialization error: missing field `sandbox_id`', + function_id: 'sandbox::exec', + }); + const iii = { + trigger: vi.fn().mockRejectedValue(triggerError), + } as unknown as ISdk; + const result = await triggerFunctionCall(iii, { + id: 'fc-1', + function_id: 'sandbox::exec', + arguments: {}, + }); + expect(isErrorResult(result)).toBe(true); + expect(result.details).toMatchObject({ error: 'invalid_arguments', function: 'sandbox::exec' }); + expect(result.details).not.toMatchObject({ denied_by: 'gate_unavailable' }); + }); + + it('auto-attaches the target contract to an invalid_arguments error (chosen fix)', async () => { + // When a call fails to decode, the harness fetches the target's contract via + // engine::functions::info and embeds it so the model's NEXT attempt is + // correctly shaped without spending a turn on info. + const decodeError = new IIIInvocationError({ + code: 'HANDLER', + message: 'invocation_failed: serialization error: invalid type: string, expected struct', + function_id: 'sandbox::fs::write', + }); + const contractDetails = { + function_id: 'sandbox::fs::write', + worker_name: 'iii-sandbox', + request_schema: { sandbox_id: 'string', path: 'string', content: 'string' }, + }; + const trigger = vi.fn().mockImplementation(async ({ function_id }) => { + if (function_id === 'engine::functions::info') return { details: contractDetails }; + throw decodeError; + }); + const iii = { trigger } as unknown as ISdk; + const result = await triggerFunctionCall(iii, { + id: 'fc-1', + function_id: 'sandbox::fs::write', + arguments: 'not-an-object', + }); + expect(result.details).toMatchObject({ + error: 'invalid_arguments', + function: 'sandbox::fs::write', + contract: contractDetails, + }); + expect(JSON.stringify(result.details)).toContain('build your payload from it'); + // It fetched the contract for the TARGET, not for the info call itself. + expect(trigger).toHaveBeenCalledWith( + expect.objectContaining({ + function_id: 'engine::functions::info', + payload: { function_id: 'sandbox::fs::write' }, + }), + ); + }); + + it('omits contract and keeps the plain hint when the info fetch also fails', async () => { + // Best-effort: if engine::functions::info itself errors, fall back to the + // generic message rather than failing the error path. + const decodeError = new IIIInvocationError({ + code: 'HANDLER', + message: 'invocation_failed: serialization error: missing field `path`', + function_id: 'sandbox::fs::write', + }); + const trigger = vi.fn().mockRejectedValue(decodeError); + const iii = { trigger } as unknown as ISdk; + const result = await triggerFunctionCall(iii, { + id: 'fc-1', + function_id: 'sandbox::fs::write', + arguments: {}, + }); + expect(result.details).toMatchObject({ error: 'invalid_arguments' }); + expect(result.details).not.toHaveProperty('contract'); + expect(JSON.stringify(result.details)).toContain('engine::functions::info'); + }); + it('falls back to gate_unavailable when message is not structured JSON', async () => { const triggerError = new IIIInvocationError({ code: 'HANDLER', @@ -123,6 +321,46 @@ describe('triggerFunctionCall', () => { expect(result.details).toMatchObject({ denied_by: 'gate_unavailable' }); }); + it('surfaces a structured handler error (S211 + fix hint) embedded after a human prefix', async () => { + // Live failure from session jg7u4c7i: the engine wrapped the structured + // envelope as `invocation_failed: handler error: {json}`, so the old + // extractor (pure JSON.parse) failed and the {code, fix} hint was buried in + // gate_unavailable. The extractor must now pull the embedded JSON and the + // model must receive code + fix. + const s211 = { + code: 'S211', + docs_url: + 'https://github.com/iii-hq/iii/blob/main/crates/iii-worker/src/sandbox_daemon/README.md#S211', + fix: { parents: true }, + fix_note: + 'merge `fix` into the original request and resubmit: `parents: true` auto-creates missing intermediate directories', + message: 'parent directory not found: parent not found: /app', + retryable: false, + type: 'filesystem', + }; + const wireError = new IIIInvocationError({ + code: 'HANDLER', + message: `invocation_failed: handler error: ${JSON.stringify(s211)}`, + function_id: 'sandbox::fs::write', + }); + const iii = { trigger: vi.fn().mockRejectedValue(wireError) } as unknown as ISdk; + const result = await triggerFunctionCall(iii, { + id: 'fc-1', + function_id: 'sandbox::fs::write', + arguments: { path: '/app/index.js', content: 'x', sandbox_id: 's1' }, + }); + expect(result.details).toMatchObject({ + error: 'handler_error', + code: 'S211', + fix: { parents: true }, + message: 'parent directory not found: parent not found: /app', + }); + expect(result.details).not.toMatchObject({ denied_by: 'gate_unavailable' }); + // The fix hint is in the text the model reads, too. + const text = (result.content[0] as { text: string }).text; + expect(text).toContain('"fix":{"parents":true}'); + }); + it('falls back to gate_unavailable when JSON message lacks code/message fields', async () => { // A partial JSON payload (e.g. `{"hint": "..."}`) does NOT count as a // structured envelope — only `{code, message, ...}` shapes get the @@ -145,6 +383,81 @@ describe('triggerFunctionCall', () => { }); }); +describe('extractFirstJsonObject', () => { + it('pulls the JSON object out from behind a human prefix', () => { + const out = extractFirstJsonObject('invocation_failed: handler error: {"code":"S211"}'); + expect(out).toBe('{"code":"S211"}'); + }); + + it('handles nested objects (depth tracking)', () => { + const out = extractFirstJsonObject('x: {"code":"S211","fix":{"parents":true},"n":1}'); + expect(out).toBe('{"code":"S211","fix":{"parents":true},"n":1}'); + expect(JSON.parse(out as string)).toMatchObject({ fix: { parents: true } }); + }); + + it('ignores braces inside string values (string-aware)', () => { + const out = extractFirstJsonObject('p: {"message":"weird } and { in text","code":"S1"}'); + expect(JSON.parse(out as string)).toMatchObject({ + message: 'weird } and { in text', + code: 'S1', + }); + }); + + it('handles escaped quotes inside strings', () => { + const out = extractFirstJsonObject('p: {"message":"say \\"hi\\" }now","code":"S1"}'); + expect(JSON.parse(out as string)).toMatchObject({ message: 'say "hi" }now', code: 'S1' }); + }); + + it('ignores trailing text after the closing brace', () => { + const out = extractFirstJsonObject('{"a":1} trailing junk }'); + expect(out).toBe('{"a":1}'); + }); + + it('returns null when there is no object', () => { + expect(extractFirstJsonObject('no json here')).toBeNull(); + }); + + it('returns null on an unbalanced/truncated object', () => { + expect(extractFirstJsonObject('handler error: {"code":"S211","fix":{')).toBeNull(); + }); +}); + +describe('fetchContract', () => { + it('returns the structured details of the info response', async () => { + const details = { function_id: 'shell::fs::ls', request_schema: { path: 'string' } }; + const trigger = vi.fn().mockResolvedValue({ content: [], details, terminate: false }); + const out = await fetchContract({ trigger } as unknown as ISdk, 'shell::fs::ls'); + expect(out).toEqual(details); + }); + + it('returns the raw value when there is no details envelope', async () => { + const raw = { function_id: 'shell::fs::ls', schema: {} }; + const trigger = vi.fn().mockResolvedValue(raw); + const out = await fetchContract({ trigger } as unknown as ISdk, 'shell::fs::ls'); + expect(out).toEqual(raw); + }); + + it('never introspects the info call itself (no self-fetch / loop)', async () => { + const trigger = vi.fn(); + const out = await fetchContract({ trigger } as unknown as ISdk, 'engine::functions::info'); + expect(out).toBeNull(); + expect(trigger).not.toHaveBeenCalled(); + }); + + it('returns null on an empty function id', async () => { + const trigger = vi.fn(); + const out = await fetchContract({ trigger } as unknown as ISdk, ''); + expect(out).toBeNull(); + expect(trigger).not.toHaveBeenCalled(); + }); + + it('returns null (not throw) when the info fetch rejects', async () => { + const trigger = vi.fn().mockRejectedValue(new Error('bus down')); + const out = await fetchContract({ trigger } as unknown as ISdk, 'shell::fs::ls'); + expect(out).toBeNull(); + }); +}); + describe('dispatchWithHook returns DispatchResult', () => { it('returns kind:pending when consultBefore returns pending', async () => { vi.spyOn(hookModule, 'consultBefore').mockResolvedValue({ kind: 'pending' }); @@ -193,6 +506,34 @@ describe('dispatchWithHook returns DispatchResult', () => { expect(out.kind).toBe('result'); }); + it('triggers the TARGET with clean args, not the routing envelope (function_id clobber fix)', async () => { + // Regression: engine::functions::info { function_id: "sandbox::create" } was + // arriving with function_id overwritten by the routing envelope (which + // spreads function_id = the target id), so the engine described itself. The + // hook sees the envelope; the target must get the caller's untouched args. + vi.spyOn(hookModule, 'consultBefore').mockResolvedValue({ kind: 'allow' }); + const trigger = vi.fn().mockResolvedValue({ ok: true }); + const iii = { trigger } as unknown as ISdk; + const hookCall = { + id: 'fc-1', + function_id: 'engine::functions::info', + arguments: { function_id: 'engine::functions::info', session_id: 's1' }, + }; + const targetCall = { + id: 'fc-1', + function_id: 'engine::functions::info', + arguments: { function_id: 'sandbox::create' }, + }; + const out = await dispatchWithHook(iii, hookCall, targetCall); + expect(out.kind).toBe('result'); + expect(trigger).toHaveBeenCalledWith( + expect.objectContaining({ + function_id: 'engine::functions::info', + payload: { function_id: 'sandbox::create' }, + }), + ); + }); + it('attaches a "did you mean worker::fn" hint when function_id is a canonical skill path', async () => { // Observed in QA against google/gemma-4-e4b: model saw // `sandbox/skills/sandbox/create` in directory::skills::list, diff --git a/harness/tests/turn-orchestrator/config.test.ts b/harness/tests/turn-orchestrator/config.test.ts index 7948a32b..01fadda9 100644 --- a/harness/tests/turn-orchestrator/config.test.ts +++ b/harness/tests/turn-orchestrator/config.test.ts @@ -2,9 +2,11 @@ import { describe, expect, it } from 'vitest'; import { loadOrchestratorConfig } from '../../src/turn-orchestrator/config.js'; describe('loadOrchestratorConfig', () => { - it('applies defaults for system skills', () => { + it('defaults system_default_skills to empty when no config is supplied', () => { + // The code-level fallback is intentionally empty; the running engine + // supplies the actual list via config.yaml's system_default_skills. const cfg = loadOrchestratorConfig({}); - expect(cfg.system_default_skills).toEqual(['iii://iii-directory/index']); + expect(cfg.system_default_skills).toEqual([]); }); it('reads system_default_skills from config', () => { diff --git a/harness/tests/turn-orchestrator/system-prompt.test.ts b/harness/tests/turn-orchestrator/system-prompt.test.ts index da7ac12f..32fd0aa4 100644 --- a/harness/tests/turn-orchestrator/system-prompt.test.ts +++ b/harness/tests/turn-orchestrator/system-prompt.test.ts @@ -37,15 +37,159 @@ describe('buildSystemPrompt', () => { expect(out).toContain('@fn(directory::skills::get)'); }); - it('preamble instructs fetching per-function skill before first call', () => { - // Regression: kimi-k2.6 (and other LLMs) often jump from the worker - // index straight to a function call, guess field names, and burn - // turns on retries. The preamble must explicitly tell them to fetch - // the per-function skill body first. + it('preamble mandates checking the contract via engine::functions::info before any call (H6)', () => { + // Regression: LLMs jump straight to a function call and guess field + // names, burning turns on retries. The preamble must (a) make + // engine::functions::info the mandatory contract check before ANY call, + // and (b) steer skill reads to the worker id, since the old + // `/` fetch usually 404s. const out = buildSystemPrompt([]); - expect(out).toContain('FIRST time'); + expect(out).toContain('BEFORE you call ANY function'); + expect(out).toContain('engine::functions::info'); + expect(out).toContain('THIS IS THE API CONTRACT.'); + // Generic anti-pattern id (no worker-specific example). expect(out).toContain('/'); - expect(out).toContain('sandbox/exec'); + }); + + it('preamble marks function_id as REQUIRED on engine::functions::info with a concrete example', () => { + // Root cause observed live: the model passed `engine::functions::info` AS + // the function_id (introspecting the discovery tool itself), which returns + // that function's own metadata. The preamble must require function_id, show + // a concrete TARGET value, forbid passing a discovery call as the id, and + // describe the real missing-field error (NOT "self-metadata on omit"). + const out = buildSystemPrompt([]); + expect(out).toContain('`function_id` argument is REQUIRED'); + expect(out).toContain('{ function_id: "shell::fs::ls" }'); + // Self-introspection trap: passing the info call's own id returns info-about-info. + expect(out).toMatch(/metadata ABOUT the info function/); + expect(out).toMatch(/never introspect them/); + // Accurate omit behavior is a hard error, not self-metadata. + expect(out).toMatch(/missing field/); + // Contrast: list is the no-id browse call. + expect(out).toContain('takes NO id'); + }); + + it('preamble forbids serializing payload as a string, with a WRONG/RIGHT example (haiku stringify trap)', () => { + // Root cause observed live (haiku, todo-app build): the model put the whole + // payload as a JSON-ENCODED STRING into `payload`, so the worker rejected it + // with `serialization error: invalid type: string ..., expected struct`. The + // preamble must state payload is an object literal, call out the long/multi- + // line-value case (the trigger for stringifying source code), and show the + // exact wrong-vs-right shape. + const out = buildSystemPrompt([]); + expect(out).toContain('`payload` is a JSON OBJECT, never a string'); + expect(out).toMatch(/expected struct/); + expect(out).toContain('WRONG'); + expect(out).toContain('RIGHT'); + // Long/multi-line values must still live as a string VALUE of a field. + expect(out).toMatch(/long or multi-line/); + }); + + it('preamble teaches error-driven correction and forbids blind identical retries', () => { + // Root cause observed live: the model re-sent the same failing call (10 + // timeouts, repeated VM-boot failures) without changing anything, and + // mis-read `invalid_arguments` as an infra problem. The preamble must map + // each error class to a corrective action and ban resending an unchanged + // failed call. + const out = buildSystemPrompt([]); + expect(out).toContain('never resend the same `function` + `payload` unchanged'); + expect(out).toContain('Resending an identical failed call is never the fix.'); + // invalid_arguments must be read as a caller payload error, not infra. + expect(out).toMatch(/`invalid_arguments`[\s\S]*YOUR payload is wrong/); + // A repeating timeout/infra error must stop the retry loop. + expect(out).toMatch(/timeout or an infrastructure\/transport error that REPEATS/); + }); + + it('preamble distinguishes a list description (hint) from the info contract', () => { + // The model had `engine::functions::list` descriptions yet still guessed + // payloads. The preamble must say the list one-liner is a hint and `info` is + // the authoritative contract. + const out = buildSystemPrompt([]); + expect(out).toMatch(/is a HINT, not the contract/); + }); + + it('preamble encodes the 3-tier discovery hierarchy: engine, directory(skills), registry', () => { + // Engine (the iii instance) gives the per-call CONTRACT; the directory gives + // the APPROACH (skills, loaded before building); the registry is only for + // adding a NEW worker. + const out = buildSystemPrompt([]); + expect(out).toContain('THE ENGINE (the iii instance)'); + expect(out).toContain('THE DIRECTORY (skills)'); + expect(out).toMatch(/THE REGISTRY — ONLY to ADD A NEW worker/); + // Registry path ends in an install, not a lookup. + expect(out).toContain('worker::add'); + }); + + it('preamble treats skills as load-before-building, NOT a last resort (mqvc4qtb: agent skipped the worker skill and imported express)', () => { + // Root cause observed live: the old "skills ONLY when the engine schema is + // not enough" framing led the agent to never load the runtime worker's skill + // and import a foreign-ecosystem pattern. The preamble must (a) frame skills + // as the APPROACH to load before building, not a fallback, and (b) split + // engine=contract vs skill=approach. + const out = buildSystemPrompt([]); + expect(out).toMatch(/LOAD IT BEFORE you build/); + expect(out).not.toMatch(/ONLY when the engine schema is not enough/); + expect(out).toMatch( + /the engine \(source 1\) gives the exact per-call CONTRACT; the skill gives\s+the APPROACH/, + ); + }); + + it('preamble forbids importing foreign-ecosystem patterns before reading the worker skill (build-first directive)', () => { + const out = buildSystemPrompt([]); + expect(out).toMatch(/When the task is to BUILD, AUTHOR, or OPERATE/); + // Must tell the agent to load EVERY involved worker's skill, incl. the runtime. + expect(out).toMatch(/identify EVERY worker the task\s+touches/); + expect(out).toMatch(/load each one's skill with `directory::skills::get`/); + // Must block carrying non-iii patterns in from memory. + expect(out).toMatch(/Do NOT carry patterns from other ecosystems/); + expect(out).toMatch(/not an iii function, stop and read the relevant worker's skill/); + }); + + it('preamble documents directory::skills::index for discovering available skills', () => { + const out = buildSystemPrompt([]); + expect(out).toContain('directory::skills::index'); + expect(out).toMatch(/WHAT skills exist/); + }); + + it('preamble is generic — no worker-specific examples leak into the identity prompt', () => { + const out = buildSystemPrompt([]); + expect(out.toLowerCase()).not.toContain('sandbox'); + expect(out).not.toContain('heredoc'); + }); + + it('preamble enumerates the discovery surface (H6)', () => { + const out = buildSystemPrompt([]); + for (const fn of [ + 'engine::functions::list', + 'engine::workers::list', + 'engine::triggers::list', + 'engine::registered-triggers::list', + 'worker::list', + 'directory::registry::workers::list', + 'directory::skills::index', + 'directory::skills::get', + ]) { + expect(out).toContain(fn); + } + }); + + it('preamble checks a RUNNING worker via engine::workers::list + worker::list, not the registry', () => { + const out = buildSystemPrompt([]); + expect(out).toMatch(/To check a worker is RUNNING/i); + expect(out).toContain('engine::workers::list'); + expect(out).toContain('worker::list'); + // Must steer AWAY from the registry list for liveness checks. + expect(out).toMatch(/never `directory::registry::workers::list`/); + }); + + it('preamble carries the worker-authoring entry point (H4)', () => { + // registerFunction/registerTrigger are methods on registerWorker()'s + // return value, NOT top-level exports — the #1 worker-authoring footgun. + const out = buildSystemPrompt([]); + expect(out).toContain('registerWorker'); + expect(out).toContain('iii.registerFunction'); + // The error phrase wraps across a line in the preamble template literal. + expect(out).toMatch(/TypeError: registerFunction is not a\s+function/); }); it('skills appear in config order', () => { diff --git a/iii-directory/src/functions/skills.rs b/iii-directory/src/functions/skills.rs index aa8175c9..074af9ab 100644 --- a/iii-directory/src/functions/skills.rs +++ b/iii-directory/src/functions/skills.rs @@ -259,7 +259,21 @@ impl RegisteredWorkersCache { // Drop the guard before the async fetch. } - // Phase 2: fetch from engine WITHOUT holding the lock. + self.fetch_and_store(iii).await + } + + /// Always fetch `worker::list` fresh (ignoring the TTL), refresh the + /// cache, and return the current registered set (falling back to the + /// last-known set on error). Used by `directory::skills::index` so a + /// just-registered worker shows up immediately instead of waiting on + /// the TTL or a worker-add cache invalidation. + pub async fn get_fresh(&self, iii: &III) -> Option> { + self.fetch_and_store(iii).await + } + + /// Fetch `worker::list` from the engine WITHOUT holding the lock, + /// then store the result (or fall back to the stale set on error). + async fn fetch_and_store(&self, iii: &III) -> Option> { let result = iii .trigger(TriggerRequest { function_id: "worker::list".to_string(), @@ -269,7 +283,7 @@ impl RegisteredWorkersCache { }) .await; - // Phase 3: re-acquire the lock and store or fall back. + // Re-acquire the lock and store or fall back. let mut lock = self.inner.lock().await; match result { Ok(val) => { @@ -344,6 +358,7 @@ pub async fn resolve_visible_skills( cfg: &SkillsConfig, cache: &RegisteredWorkersCache, iii: &III, + fresh: bool, ) -> Vec { let (merged, _skipped) = fs_source::scan_skills_merged(&cfg.resolved_skills_folder(), &cfg.local_skills_folder()); @@ -352,7 +367,16 @@ pub async fn resolve_visible_skills( return merged; } - match cache.get_or_fetch(iii).await { + // `fresh` callers (the index) re-fetch `worker::list` every call so a + // just-registered worker is never hidden by a stale registered-workers + // cache; cached callers (`list`/`get`) keep the TTL fast path. + let registered = if fresh { + cache.get_fresh(iii).await + } else { + cache.get_or_fetch(iii).await + }; + + match registered { Some(registered) => filter_to_registered(merged, ®istered), None => { tracing::info!( @@ -443,7 +467,7 @@ fn register_list_skills( let iii = iii_inner.clone(); let cache = cache_inner.clone(); async move { - let entries = resolve_visible_skills(&cfg, &cache, &iii).await; + let entries = resolve_visible_skills(&cfg, &cache, &iii, false).await; let out = list_skills_filtered(entries, &input); Ok::<_, IIIError>(ListSkillsOutput { skills: out }) } @@ -557,7 +581,7 @@ fn register_index_skills( let iii = iii_inner.clone(); let cache = cache_inner.clone(); async move { - let entries = resolve_visible_skills(&cfg, &cache, &iii).await; + let entries = resolve_visible_skills(&cfg, &cache, &iii, true).await; let siblings = id_set(&entries); let rows: Vec = entries .into_iter() @@ -818,7 +842,7 @@ async fn get_skill_visible( let id = normalize_get_id(&req.id)?; reject_function_id_shaped(&id)?; validate_id(&id)?; - let visible = resolve_visible_skills(cfg, cache, iii).await; + let visible = resolve_visible_skills(cfg, cache, iii, false).await; let siblings = id_set(&visible); if let Some(fs) = find_fs_skill_in(&visible, &id) { @@ -1090,10 +1114,12 @@ pub fn extract_description(markdown: &str) -> Option { Some(buf) } -/// Character budget cap for the rendered index block. When the total -/// rendered markdown exceeds this limit it is truncated and a -/// continuation hint is appended so the consumer knows to call -/// `directory::skills::list` for the full catalog. +/// Character budget cap for the per-worker DESCRIPTION paragraphs in the +/// rendered index. Every worker's heading and `get` pointer are always +/// emitted (the index must list every installed worker); only the +/// description paragraph is dropped once the running body would exceed +/// this limit, with a note pointing at `directory::skills::get` for the +/// full reference. Workers themselves are never truncated away. const INDEX_CHAR_BUDGET: usize = 3000; /// True when `on_disk_id` is the root overview doc of a top-level worker @@ -1142,9 +1168,12 @@ fn is_index_overview(entry: &SkillEntry) -> bool { /// by `id` (the order `fs_source::scan_skills` returns); this function /// does not re-sort. /// -/// When the rendered output exceeds [`INDEX_CHAR_BUDGET`], it is -/// truncated after the last complete worker block that fits, and a -/// continuation hint is appended. +/// Every worker overview row is ALWAYS rendered (heading + `get` pointer) +/// so the index lists every installed worker. Only the optional +/// description paragraph is budget-sensitive: once the running body would +/// exceed [`INDEX_CHAR_BUDGET`], later descriptions are omitted (with a +/// note pointing at `directory::skills::get`) while the remaining workers +/// are still listed. fn render_index_markdown(entries: &[SkillEntry]) -> String { let workers: Vec<&SkillEntry> = entries.iter().filter(|e| is_index_overview(e)).collect(); @@ -1152,33 +1181,40 @@ fn render_index_markdown(entries: &[SkillEntry]) -> String { out.push_str("# Skills index\n\n"); out.push_str(&format!("{} worker(s).\n", workers.len())); - let header_len = out.len(); - let mut truncated = false; + let mut omitted_descriptions = false; for worker in &workers { - let mut block = String::new(); - block.push('\n'); - block.push_str(&format!("## {}\n", worker.title)); + out.push('\n'); + out.push_str(&format!("## {}\n", worker.title)); + + // The heading and the get-pointer are ALWAYS emitted so every + // installed worker appears — the index is the discovery surface, so + // dropping a worker entirely is never acceptable. Only the optional + // description paragraph is budget-sensitive: keep it while the body + // stays under INDEX_CHAR_BUDGET, otherwise omit it (recoverable via + // directory::skills::get) and keep listing the remaining workers. if !worker.description.is_empty() { - block.push('\n'); - block.push_str(&format!("{}\n", worker.description)); + let desc_cost = "\n".len() + worker.description.len() + "\n".len(); + if out.len() + desc_cost <= INDEX_CHAR_BUDGET { + out.push('\n'); + out.push_str(&format!("{}\n", worker.description)); + } else { + omitted_descriptions = true; + } } - block.push('\n'); - block.push_str(&format!( + + out.push('\n'); + out.push_str(&format!( "Full reference: call `directory::skills::get {{ \"id\": \"{id}\" }}` \ (legacy `iii://{id}`).\n", id = worker.id )); - - if out.len() + block.len() > INDEX_CHAR_BUDGET && out.len() > header_len { - truncated = true; - break; - } - out.push_str(&block); } - if truncated { - out.push_str("\n(... truncated; call directory::skills::list to browse all skills)\n"); + if omitted_descriptions { + out.push_str( + "\n(some descriptions omitted to save space; call directory::skills::get for the full reference)\n", + ); } out @@ -2575,30 +2611,47 @@ First paragraph. // ── render_index character budget cap ────────────────────────────── #[test] - fn render_index_truncates_on_cap_overflow() { - // Create enough workers to exceed INDEX_CHAR_BUDGET. + fn render_index_lists_every_worker_when_over_budget() { + // Enough workers (with big descriptions) to blow past + // INDEX_CHAR_BUDGET. Every worker must still appear; only the + // descriptions get dropped once the budget is hit. let mut entries = Vec::new(); for i in 0..50 { entries.push(entry( &format!("worker-{i:02}/index"), - &format!("Worker {i}"), + &format!("Worker {i:02}"), Some("index"), &"x".repeat(200), )); } let body = render_index_markdown(&entries); + + // Count header reflects ALL workers. + assert!( + body.contains("50 worker(s).\n"), + "count must reflect every worker; got: {body}" + ); + // Every worker heading is present — none dropped. + for i in 0..50 { + let heading = format!("## Worker {i:02}\n"); + assert!( + body.contains(&heading), + "worker {i:02} must be listed even over budget; missing {heading:?}" + ); + } + // Over budget, the omission note appears and points at get. assert!( - body.len() <= INDEX_CHAR_BUDGET + 200, - "body should be near the cap; got {} chars", - body.len() + body.contains("descriptions omitted"), + "should note omitted descriptions; got: {body}" ); assert!( - body.contains("truncated"), - "should contain truncation hint; got: {body}" + body.contains("directory::skills::get"), + "omission note should reference the get function; got: {body}" ); + // The old whole-worker truncation behaviour is gone. assert!( - body.contains("directory::skills::list"), - "truncation hint should reference list function; got: {body}" + !body.contains("truncated"), + "workers must never be truncated away; got: {body}" ); }