diff --git a/README.md b/README.md index 8bd31fcd..6dcac9ad 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ npx skills add iii-hq/iii --all |---|---|---| | [`acp`](acp/) | Rust | Agent Client Protocol surface — stdio JSON-RPC, exposes iii agents as ACP sessions. | | [`approval-gate`](approval-gate/) | Rust | Human-in-the-loop approval gate — evaluates each function call (continue / deny / hold), holds pending calls for a human, and emits `approval::pending-*` events. Binds the harness `pre_dispatch` hook. See [`approval-gate/architecture/`](approval-gate/architecture/). | -| [`harness`](harness/) | Node | TS port of the iii harness stack — bundles `harness` (provider registry + credentials/settings/permissions via the `configuration` worker), `turn-orchestrator`, `approval-gate`, `hook-fanout`, `models-catalog`, the `provider-*` workers, `llm-budget`, and `context-compaction` as one pnpm monorepo. Conversations persist in `session-manager`. See [`harness/README.md`](harness/README.md). | +| [`harness`](harness/) | Node | TS port of the iii harness stack — bundles `harness` (provider registry + credentials/settings/permissions via the `configuration` worker), `turn-orchestrator`, `hook-fanout`, `models-catalog`, the `provider-*` workers, `llm-budget`, and `context-compaction` as one pnpm monorepo. Approval is delegated to the standalone `approval-gate` worker via the `pre_dispatch` hook. Conversations persist in `session-manager`. See [`harness/README.md`](harness/README.md). | | [`claude-code`](claude-code/) | Node | Claude Code as an iii worker — `claude::*` runs headless Claude Code turns, mirrors raw messages onto `claude::events`, and streams AgentEvent frames onto `agent::events`. | | [`session-manager`](session-manager/) | Rust | Durable, reactive, branching conversation store — fourteen `session::*` functions plus six trigger types; the transcript backend for `harness` and `console`. See [`session-manager/architecture/`](session-manager/architecture/). | | [`database`](database/) | Rust | PostgreSQL, MySQL, and SQLite client — query, execute, transactions, prepared statements, and change feeds. | diff --git a/console/web/src/lib/backend/approval-settings.ts b/console/web/src/lib/backend/approval-settings.ts index 7eacd00a..9de3a34b 100644 --- a/console/web/src/lib/backend/approval-settings.ts +++ b/console/web/src/lib/backend/approval-settings.ts @@ -1,8 +1,8 @@ /** * RPC adapters for the per-session approval settings (`approval::*` - * handlers in harness/src/approval-gate/settings/). These are + * functions on the standalone approval-gate worker). These are * user-initiated RPCs only — agent function calls cannot reach them - * (the turn-orchestrator hook hard-denies these function ids). + * (the gate hard-denies approval::* function ids). */ import { getIiiClient } from '@/lib/iii-client' @@ -12,7 +12,7 @@ export type PermissionMode = 'manual' | 'auto' | 'full' export interface AlwaysAllowEntry { function_id: string granted_at: number - granted_by: 'user_click' + granted_by: 'user_click' | 'seed' } export interface ApprovalSettings { @@ -40,7 +40,7 @@ function coerceEntries(raw: unknown): AlwaysAllowEntry[] { (entry): AlwaysAllowEntry => ({ function_id: String(entry.function_id ?? ''), granted_at: Number(entry.granted_at ?? 0), - granted_by: 'user_click', + granted_by: entry.granted_by === 'seed' ? 'seed' : 'user_click', }), ) .filter((entry) => entry.function_id.length > 0) @@ -48,7 +48,14 @@ function coerceEntries(raw: unknown): AlwaysAllowEntry[] { function coerceSettings(raw: unknown): ApprovalSettings { if (!raw || typeof raw !== 'object') return DEFAULT_APPROVAL_SETTINGS - const r = raw as Record + // The approval-gate worker wraps the record: mutations return + // { settings }, get_settings returns { settings, source }. + const outer = raw as Record + const r = ( + outer.settings && typeof outer.settings === 'object' + ? outer.settings + : outer + ) as Record const mode: PermissionMode = r.mode === 'auto' || r.mode === 'full' ? r.mode : 'manual' return { @@ -63,7 +70,7 @@ export async function getApprovalSettings( sessionId: string, ): Promise { const client = await getIiiClient() - const raw = await client.call('approval::get_settings', { + const raw = await client.call('approval::get-settings', { session_id: sessionId, }) return coerceSettings(raw) @@ -74,7 +81,7 @@ export async function setApprovalMode( mode: PermissionMode, ): Promise { const client = await getIiiClient() - const raw = await client.call('approval::set_mode', { + const raw = await client.call('approval::set-mode', { session_id: sessionId, mode, }) @@ -86,7 +93,7 @@ export async function addAlwaysAllow( functionId: string, ): Promise { const client = await getIiiClient() - const raw = await client.call('approval::add_always_allow', { + const raw = await client.call('approval::add-always-allow', { session_id: sessionId, function_id: functionId, }) @@ -98,7 +105,7 @@ export async function removeAlwaysAllow( functionId: string, ): Promise { const client = await getIiiClient() - const raw = await client.call('approval::remove_always_allow', { + const raw = await client.call('approval::remove-always-allow', { session_id: sessionId, function_id: functionId, }) @@ -110,7 +117,7 @@ export async function approveAlways( functionId: string, ): Promise { const client = await getIiiClient() - const raw = await client.call('approval::approve_always', { + const raw = await client.call('approval::approve-always', { session_id: sessionId, function_id: functionId, }) @@ -120,7 +127,7 @@ export async function approveAlways( export async function clearApprovalSettings(sessionId: string): Promise { const client = await getIiiClient() await client - .call('approval::clear_settings', { session_id: sessionId }) + .call('approval::clear-settings', { session_id: sessionId }) .catch(() => { /* best-effort cleanup on conversation deletion */ }) diff --git a/harness/README.md b/harness/README.md index 4241ec21..1e5775e6 100644 --- a/harness/README.md +++ b/harness/README.md @@ -22,7 +22,7 @@ Read [The Harness Is the Backend](https://www.linkedin.com/pulse/harness-backend **New capability, new worker.** When the harness needs something else (shell, database, coder, another provider), you add a worker, not a fork of the orchestrator. Published workers install from the [iii worker registry](https://workers.iii.dev) with `iii worker add `; they register on the iii engine and show up in the live catalog. -**Turns, approvals, budgets.** Seven-state durable turn FSM with queue-backed steps. Approval gate with YAML permissions, parallel tool batches, pending state across reload, fail-closed when policy is unreachable. Workspace and agent budget caps. Five provider workers behind one registry. +**Turns, approvals, budgets.** Seven-state durable turn FSM with queue-backed steps. A `harness::hook::pre-dispatch` trigger type gates every agent function call — the standalone [approval-gate worker](../approval-gate/) binds its policy there (permission modes, YAML rules, pending inbox) and settles holds via `harness::function::resolve`; the chain fails closed when a bound hook is unreachable. Parallel tool batches, pending state across reload. Workspace and agent budget caps. Five provider workers behind one registry. **Context compaction.** Long sessions exceed model windows. The `context-compaction` worker compacts history as turns accumulate and backs the console `/compact` command. @@ -30,17 +30,17 @@ Read [The Harness Is the Backend](https://www.linkedin.com/pulse/harness-backend ## What ships here -Fourteen workers in one TypeScript package, one folder per worker, one feature per file: +One folder per worker, one feature per file: | Concern | Workers | | --- | --- | -| Orchestration | `turn-orchestrator` (durable turn FSM), `hook-fanout` | -| Governance | `harness` (permissions, provider registry, UI fanout), `approval-gate` | +| Orchestration | `turn-orchestrator` (durable turn FSM, pre_dispatch hook point, `harness::function::resolve`), `hook-fanout` | +| Governance | `harness` (yaml policy rules, UI fanout) | | Context | `context-compaction` (keeps long sessions inside the model window) | | Models | `models-catalog`, `provider-anthropic`, `provider-openai`, `provider-kimi`, `provider-lmstudio`, `provider-llamacpp` | | Cost | `llm-budget` | -Rust workers (`shell`, `iii-directory`, `session-manager` — the durable, reactive conversation store the harness drives through `session::*`) and engine builtins (`state::*`, `stream::*`, `iii::durable::*`) stay on the same bus; this package does not reimplement them. +Rust workers (`shell`, `iii-directory`, `session-manager` — the durable, reactive conversation store the harness drives through `session::*`, and `approval-gate` — the standalone approval policy worker at the repo root) and engine builtins (`state::*`, `stream::*`, `iii::durable::*`) stay on the same bus; this package does not reimplement them. --- diff --git a/harness/docs/architecture.md b/harness/docs/architecture.md index 45f2187a..91b66561 100644 --- a/harness/docs/architecture.md +++ b/harness/docs/architecture.md @@ -26,8 +26,8 @@ deterministic idempotent entry ids, streams assistant content via | Worker | Folder | Role | Doc | |---|---|---|---| | harness | [src/harness/](harness/src/harness/) | Meta-worker; loads `iii-permissions.yaml`, exposes `harness::trigger` (WS ingestion bridge — see [Telemetry & trace correlation](#telemetry--trace-correlation)) / `policy::check_permissions` / `ui::*` / `harness::provider::{register,resolve,list}`. Owns the provider registry + the `harness` entry in the `configuration` worker (credentials, settings, permissions — see [storage.md](harness/docs/storage.md)). | [workers/harness.md](harness/docs/workers/harness.md) | -| turn-orchestrator | [src/turn-orchestrator/](harness/src/turn-orchestrator/) | Durable FSM driving each agent turn; `dispatchWithHook` approval chokepoint. | [workers/turn-orchestrator.md](harness/docs/workers/turn-orchestrator.md) | -| approval-gate | [src/approval-gate/](harness/src/approval-gate/) | Registers `approval::resolve`; persists decisions to scope `approvals`. Wake via `turn::on_approval` state trigger. Default mode from `harness` config `permissions.default_mode`. | [workers/approval-gate.md](harness/docs/workers/approval-gate.md) | +| turn-orchestrator | [src/turn-orchestrator/](harness/src/turn-orchestrator/) | Durable FSM driving each agent turn; `dispatchWithHook` pre_dispatch hook chokepoint; owns `harness::function::resolve` and the `harness::hook::pre-dispatch` / `harness::turn-completed` trigger types. | [workers/turn-orchestrator.md](harness/docs/workers/turn-orchestrator.md) | +| approval-gate (external) | [approval-gate/ (repo root)](../../approval-gate/) | Standalone Rust worker: approval policy, pending inbox, decision RPCs. Binds `approval::gate` to the harness's pre_dispatch hook and settles holds via `harness::function::resolve`. | [workers/approval-gate.md](harness/docs/workers/approval-gate.md) | | llm-budget | [src/llm-budget/](harness/src/llm-budget/) | Workspace + agent LLM spend caps with alerts, forecast, period rollover. | [workers/llm-budget.md](harness/docs/workers/llm-budget.md) | | hook-fanout | [src/hook-fanout/](harness/src/hook-fanout/) | Generic publish-and-collect primitive over a stream topic. | [workers/hook-fanout.md](harness/docs/workers/hook-fanout.md) | | models-catalog | [src/models-catalog/](harness/src/models-catalog/) | Model-capability catalogue in iii state (provider-registered only; no embedded seed or fallback), refreshed by `provider::::refresh_models`. | [workers/models-catalog.md](harness/docs/workers/models-catalog.md) | @@ -50,7 +50,6 @@ flowchart LR subgraph harnessNode [harness workers] harness[harness] turnOrch[turn-orchestrator] - approval[approval-gate] budget[llm-budget] hook[hook-fanout] models[models-catalog] @@ -69,6 +68,7 @@ flowchart LR subgraph external [External Rust workers + engine] shell[shell] directory[iii-directory] + approval["approval-gate (standalone)"] sessionMgr["session-manager (session::*)"] state["iii engine state::* / stream::* / iii::durable::*"] end @@ -81,13 +81,14 @@ flowchart LR turnOrch -- "provider::*::stream" --> provKimi turnOrch -- "provider::*::stream" --> provLms turnOrch -- "provider::*::stream" --> provLlama - turnOrch -- "consultBefore: policy::check_permissions" --> harness + turnOrch -- "pre_dispatch hook: approval::gate" --> approval + approval -- "policy::check_permissions" --> harness turnOrch -- "session::ensure/append/update_message/set_status" --> sessionMgr turnOrch -- "state::* persistence" --> state client -- "approval::resolve" --> approval - approval -- "state::set approvals//" --> state - state -- "state trigger (scope=approvals)" --> turnOrch + approval -- "harness::function::resolve (execute / deliver)" --> turnOrch + turnOrch -- "harness::turn-completed" --> approval turnOrch -- "enqueue turn::{state} on turn-step queue" --> turnOrch provAnth -- "harness::provider::resolve" --> harness @@ -110,8 +111,9 @@ defines a 7-state durable FSM. Each state is a registered `turn::{state}` function executed via `runTransition` and enqueued onto the `turn-step` FIFO queue from `saveRecord` ([store.ts](harness/src/turn-orchestrator/state-runtime/store.ts)). `saveRecord` calls `shouldWakeStep` then enqueues `turn::{newState}` when the persisted state -transitions to a stepable state. Paused sessions are woken when `approval::resolve` writes -scope `approvals`, which fires `turn::on_approval` to enqueue `turn::function_awaiting_approval`. +transitions to a stepable state. Paused sessions are woken by +`harness::function::resolve`, which persists a `function_resolutions` row and +enqueues `turn::function_awaiting_approval` directly. ```mermaid stateDiagram-v2 @@ -136,13 +138,18 @@ unexpectedly (unless it opts into queue retry via `TransientError`). ## Approval flow -The orchestrator consults `policy::check_permissions` directly inside -`consultBefore` — `allow`, `deny`, or `pending`. There is no hook fanout on -the before path. The orchestrator parks the turn in `function_awaiting_approval` -when any call in the batch needs approval, then resumes as each parked call -receives `approval::resolve` (decisions may arrive independently and out of -batch order). Each `approval::resolve` persists the decision; the `turn::on_approval` -state trigger enqueues `turn::function_awaiting_approval`. +The orchestrator consults the `harness::hook::pre-dispatch` chain inside +`dispatchWithHook` — bound hooks (the standalone approval-gate's +`approval::gate`) answer `continue`, `deny`, or `hold`. The approval policy +itself (permission modes, allow-lists, `policy::check_permissions` fallback) +lives in the gate worker; see +[tech-specs/2026-06-agentic/approval-gate.md](../../tech-specs/2026-06-agentic/approval-gate.md). +The orchestrator parks the turn in `function_awaiting_approval` when any +call in the batch is held, then resumes as each parked call receives +`harness::function::resolve` (`execute` releases it, `deliver` answers it +without executing; decisions may arrive independently and out of batch +order). Each resolve persists a `function_resolutions` row (deleted after +consumption) and enqueues `turn::function_awaiting_approval`. ### Parallel batch during `function_execute` @@ -150,9 +157,9 @@ When the assistant message contains multiple tool calls, `runBatch` does not stop at the first `pending`. For each call in assistant tool order: - already in `work.executed` or listed in `awaiting_approval[]` → skip -- policy `allow` (or immediate policy `deny`) → dispatch, checkpoint, emit +- hook `continue` (or inline hook `deny`) → dispatch, checkpoint, emit `function_execution_end` -- policy `needs_approval` → emit `function_execution_start`, append the call +- hook `hold` → emit `function_execution_start`, append the call to `awaiting_approval[]`, **continue** remaining siblings After the loop: if any call is still awaiting approval, transition to @@ -168,45 +175,47 @@ parked until A and C are resolved. | Surface | Location | Role | |---|---|---| | Open approvals | `turn_state/` → `awaiting_approval[]` | Which calls are parked and their args | -| Decisions | `approvals//` | Written by `approval::resolve`; read on each wake | +| Decisions | `function_resolutions//` | Written by `harness::function::resolve`; consumed (and deleted) on each wake | +| Pending inbox | approval-gate worker (`approval::list-pending`) | Cross-session human-attention index, owned by the gate | | UI mirror | `turn_state_changed` on `agent::events` | Console shows pending modals from `TurnStateView.awaiting_approval` | | Reload | `turn::get_state` | One-shot lean view after refresh (no direct iii state reads) | A page refresh does not lose pending approvals as long as iii state persists. -Operators can still approve from the console after reload; each decision write -fires `turn::on_approval` to enqueue the parked turn step while the worker is running. +Operators can still approve from the console after reload; each +`harness::function::resolve` enqueues the parked turn step directly while +the worker is running. ### Resume semantics - Decisions may arrive in any order (e.g. resolve call C before call A). -- On `allow`, the parked call executes with `skipStart: true` — the +- On `execute`, the parked call runs with `skipStart: true` — the `function_execution_start` event was already emitted when the call first returned `pending`. -- A duplicate `approval::resolve` for the same call re-wakes the handler; - resolved entries are pruned idempotently so execution is not doubled. +- A duplicate `harness::function::resolve` for the same call answers + `{resolved: false}` once it settled; resolved entries are pruned + idempotently so execution is not doubled. ```mermaid sequenceDiagram participant Turn as turn-orchestrator (FSM) - participant Bus as iii bus (state::* + stream::*) + participant Gate as approval-gate (standalone) participant Harness as harness (policy::check_permissions) - participant Gate as approval-gate participant User - Note over Turn: function_execute: runBatch walks all tool calls.
pending calls append to awaiting_approval[];
allowed siblings execute in the same pass. + Note over Turn: function_execute: runBatch walks all tool calls.
held calls append to awaiting_approval[];
allowed siblings execute in the same pass. - Turn->>Harness: policy::check_permissions(function_id, args) [5s timeout] - alt rule.action == allow - Harness-->>Turn: allow → dispatch the call - else rule.action == deny - Harness-->>Turn: deny + DenialEnvelope → error FunctionResult - else no rule (needs_approval) - Harness-->>Turn: needs_approval → append to awaiting_approval[], continue batch - Note over Turn,Bus: When the batch pass finishes with any awaiting calls,
saveRecord parks in function_awaiting_approval (no wake on park). + Turn->>Gate: pre_dispatch hook approval::gate (HookInput) + alt gate allows (mode / allow-lists / yaml allow) + Gate-->>Turn: continue → dispatch the call + else gate denies + Gate-->>Turn: deny + reason → error FunctionResult + else needs a human + Gate-->>Turn: hold + pending_timeout_ms → append to awaiting_approval[], continue batch + Note over Turn: When the batch pass finishes with any awaiting calls,
saveRecord parks in function_awaiting_approval
(one post-persist scan wake). User->>Gate: approval::resolve(decision, reason) - Gate->>Bus: state::set approvals// = {decision, reason} - Gate->>Turn: enqueue turn::function_awaiting_approval - Turn->>Turn: function_awaiting_approval executes
that call immediately (skipStart), removes it from awaiting_approval[] + Gate->>Turn: harness::function::resolve (execute | deliver) + Turn->>Turn: persist function_resolutions row,
enqueue turn::function_awaiting_approval + Turn->>Turn: settle that call immediately (skipStart),
delete the row, remove it from awaiting_approval[] alt more calls still awaiting Turn->>Turn: stay in function_awaiting_approval else awaiting empty and batch incomplete @@ -217,8 +226,10 @@ sequenceDiagram end ``` -Fail-closed: policy unreachable (transport error or 5 s timeout) → -`consultBefore` denies the call with a `gate_unavailable` envelope. +Fail-closed: a bound hook unreachable (transport error or its `timeout_ms`) +→ the chain denies the call with a `gate_unavailable` envelope. Note the +gate consults `policy::check_permissions` itself as its yaml fallback; the +orchestrator no longer calls policy directly. ## Kernel deny list @@ -292,7 +303,7 @@ dependants register: ```mermaid flowchart TD harness --> turnOrch - harness --> approval + turnOrch --> approval["approval-gate (external; binds the pre_dispatch hook at boot)"] models[models-catalog] --> turnOrch sessionMgr["session-manager (external)"] --> turnOrch harness --> provAnth[provider-anthropic] diff --git a/harness/docs/storage.md b/harness/docs/storage.md index 6951756a..8cf31ffc 100644 --- a/harness/docs/storage.md +++ b/harness/docs/storage.md @@ -1,7 +1,7 @@ -# Harness storage: provider credentials, settings & permissions +# Harness storage: provider credentials & settings -Provider API keys, per-provider settings, and the default agent permission -mode live in a single entry — id `harness` — in the engine's built-in +Provider API keys and per-provider settings live in a single entry — id +`harness` — in the engine's built-in [`configuration`](https://github.com/iii-ai/iii/tree/main/workers/configuration) worker. The harness meta-worker owns that entry through its **provider registry** ([src/harness/providers/](../src/harness/providers/)). @@ -14,7 +14,6 @@ worker. ```jsonc { - "permissions": { "default_mode": "manual" }, // manual | auto | full "providers": { "anthropic": { "api_key": "sk-ant-…", "api_url": "https://…", "max_tokens": 8192 }, "openai": { "api_key": "sk-…" }, @@ -55,11 +54,11 @@ setups keep working. ## Permissions -`permissions.default_mode` (`manual | auto | full`) seeds the default -approval mode for **new** agent sessions. The approval-gate reads it at -startup and re-reads it via a `configuration` trigger on the `harness` -entry, so an operator edit takes effect without a restart. Sessions with -their own stored approval settings keep them. +Deployment approval defaults (`default_mode`, `always_allow_seed`, +`pending_timeout_ms`) live in the standalone approval-gate worker's own +`approval-gate` configuration entry — the harness entry no longer carries a +permissions block. See +[tech-specs/2026-06-agentic/approval-gate.md § Configuration](../../tech-specs/2026-06-agentic/approval-gate.md#configuration). ## Adding a provider diff --git a/harness/docs/workers/approval-gate.md b/harness/docs/workers/approval-gate.md index 3af4a634..ea642237 100644 --- a/harness/docs/workers/approval-gate.md +++ b/harness/docs/workers/approval-gate.md @@ -1,141 +1,25 @@ -# approval-gate - -Registers `approval::resolve`, the per-session approval-settings handlers -(`approval::set_mode`, `approval::add_always_allow`, …), and shared wire -schemas for the approval path. The turn-orchestrator reacts via the reactive -`turn::on_approval` state trigger. - -## Purpose - -The approval gate is the bus entry point for human decisions on parked tool -calls. It does **not** intercept function calls on the bus — the -turn-orchestrator consults `policy::check_permissions` directly inside -`consultBefore`, then applies per-session mode + always-allow before falling -back to the yaml policy. The gate's job is to accept operator input from the -console and persist the decisions (resolutions, mode changes, allow-list -mutations) where the orchestrator can read them. - -## Permission modes (per-session) - -Each session has a permission mode stored at -`approval_settings/`. The turn-orchestrator's `consultBefore` -snapshots this record at the start of each call, then evaluates in order: - -1. **human-only block.** If the agent tries to call any of - `approval::set_mode`, `approval::add_always_allow`, - `approval::remove_always_allow`, `approval::approve_always`, - `approval::get_settings`, `approval::clear_settings`, the call is - denied with rule `human_only_function`. Those handlers are only - reachable from the user-initiated RPC path; the agent's - `dispatchWithHook` route can never self-escalate. -2. **`mode === 'full'`** → allow. No safety floor; the agent may call any - function. The console renders a persistent banner while this mode is - active. -3. **`function_id ∈ approved_always`** → allow, **in every mode**. These - are per-session "approve always" grants made from an approval prompt - ("approve always" button). They are remembered human decisions, not an - auto-policy, so they hold under Manual as well as Auto. -4. **`mode === 'auto'` AND `function_id ∈ always_allow`** → allow. The - `always_allow` list is a user-curated trust profile, seeded from the - Configuration screen's default allowlist and consulted **only in - Auto**. Dormant under Manual. -5. **Fallback** → `policy::check_permissions` against `iii-permissions.yaml`. - Manual mode never short-circuits past this step; everything except - yaml `allow` rules and `approved_always` grants prompts. - -### Mode summary - -| Mode | yaml `allow` | approved_always | always_allow | yaml `deny` | otherwise | -|---|---|---|---|---|---| -| `manual` | allow | allow | dormant | deny | prompt | -| `auto` | allow | allow | allow | deny | prompt | -| `full` | allow | allow | allow | allow | allow | - -| Policy outcome (in orchestrator) | Orchestrator effect | -|---|---| -| `allow` | dispatch proceeds immediately | -| `deny` | dispatch short-circuits with a `DenialEnvelope` | -| `needs_approval` | orchestrator parks the call in `function_awaiting_approval` | - -## Resolution flow - -1. While parked, the orchestrator keeps pending calls in `awaiting_approval[]` on the turn record. -2. The console calls `approval::resolve` with `{ session_id, function_call_id, decision, reason? }`. -3. `approval::resolve` writes `approvals//` via `state::set`. -4. The `turn::on_approval` state trigger (scope `approvals`) enqueues `turn::function_awaiting_approval`. -5. `function_awaiting_approval` executes each resolved call immediately, removes it from `awaiting_approval[]`, and stays parked until none remain; then finalizes the batch or returns to `function_execute`. - -## Registered functions - -- `approval::resolve` — Validates the payload and persists the decision to scope `approvals`. Returns `{ ok: true }` or `{ ok: false, error: 'invalid_payload' | 'resume_failed' }`. -- `approval::set_mode` — Persists `{ mode }` to scope `approval_settings`. **Human-only**: the orchestrator hook denies this id when called by the agent. -- `approval::add_always_allow` — Idempotent append to the auto-mode allow-list. **Human-only**. -- `approval::remove_always_allow` — Remove an entry from the auto-mode allow-list. **Human-only**. -- `approval::approve_always` — Idempotent append to the per-session `approved_always` grants (honored in every mode). **Human-only**. -- `approval::get_settings` — Read current settings, returning defaults if none persisted. **Human-only**. -- `approval::clear_settings` — Drop the session's record on conversation delete. **Human-only**. - -Reactive wake is owned by the turn-orchestrator: - -- `turn::on_approval` — State trigger on scope `approvals`; enqueues `turn::{state}` for the parked session. - -## State keys - -Decision records use scope `approvals` (constant `STATE_SCOPE` in -[src/approval-gate/schemas.ts](harness/src/approval-gate/schemas.ts)); -per-session permission settings live in scope `approval_settings` -(constant `SETTINGS_STATE_SCOPE`): - -| Scope | Key | Value | Writer | Purpose | -|---|---|---|---|---| -| `approvals` | `/` | `{ decision: 'allow' \| 'deny' \| 'aborted', reason: string \| null }` | `approval::resolve` | Decision pickup for parked calls. | -| `approval_settings` | `` | `{ mode, always_allow: AlwaysAllowEntry[], approved_always: AlwaysAllowEntry[], mode_set_at }` | `approval::set_mode`, `approval::add_always_allow`, `approval::remove_always_allow`, `approval::approve_always` | Snapshot read by `consultBefore` before consulting the yaml policy. | - -Pending calls are tracked on the turn record (`awaiting_approval[]`), not as -separate rows under `approvals` until a decision lands. - -## Denial envelopes - -[src/approval-gate/denial.ts](harness/src/approval-gate/denial.ts) builds -`DenialEnvelope` values for policy denies (`permissionsDenyEnvelope` via -`consultBefore` in [hook.ts](harness/src/turn-orchestrator/hook.ts)). -[src/approval-gate/redact.ts](harness/src/approval-gate/redact.ts) -sanitizes tool args for `args_excerpt` on those envelopes. - -## Pending-approval signalling - -The frontend does not consume a dedicated approval signal event. The -orchestrator emits `turn_state_changed` on every `turn_state` write; the -console derives pending approvals from `awaiting_approval` on the mirrored -record. On reload it uses `turn::get_state` (not direct iii state reads). - -## Configuration - -There is no `approval_gate` section in [config.yaml](harness/config.yaml). -Scope `approvals` is fixed in code (`STATE_SCOPE`). - -Policy consultation is a direct `iii.trigger` to `policy::check_permissions` -from turn-orchestrator `consultBefore`. See -[workers/turn-orchestrator.md](workers/turn-orchestrator.md). - -## Dependencies - -From -[src/approval-gate/iii.worker.yaml](harness/src/approval-gate/iii.worker.yaml): -no explicit dependency block. - -## Source layout - -| File | Purpose | -|---|---| -| [src/approval-gate/main.ts](harness/src/approval-gate/main.ts) | Binary entry point (`iii-approval-gate`). | -| [src/approval-gate/resolve.ts](harness/src/approval-gate/resolve.ts) | Registers `approval::resolve`; persists decisions to scope `approvals`. | -| [src/approval-gate/settings/](harness/src/approval-gate/settings/) | Per-session mode/allow-list store, mutations, and handler registration (`readSettings`, `isHumanOnlyApprovalFunction`, `registerSettingsHandlers`). | -| [src/approval-gate/schemas.ts](harness/src/approval-gate/schemas.ts) | `STATE_SCOPE`, `SETTINGS_STATE_SCOPE`, wire schemas, `ApprovalSettingsSchema`, `parsePolicyReply`, `pendingKey`, `ApprovalDecisionSchema`, `ResolvePayloadSchema`. | -| [src/approval-gate/denial.ts](harness/src/approval-gate/denial.ts) | `permissionsDenyEnvelope` and related helpers. | -| [src/approval-gate/redact.ts](harness/src/approval-gate/redact.ts) | `redact` / `clip` for safe `args_excerpt` on denials. | -| [src/approval-gate/iii.worker.yaml](harness/src/approval-gate/iii.worker.yaml) | Worker manifest. | - -Related orchestrator code: -[function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts) (registers `turn::on_approval`), -[hook.ts](harness/src/turn-orchestrator/hook.ts). +# approval-gate (moved) + +The in-harness approval-gate worker has been removed. Approval policy — +permission modes, allow-lists, the yaml-policy fallback, the pending inbox, +and the decision RPCs (`approval::resolve`, `approval::set-mode`, …) — now +lives in the **standalone approval-gate worker** (Rust crate at +[`approval-gate/`](../../../approval-gate/) in the repo root). + +- Spec: [tech-specs/2026-06-agentic/approval-gate.md](../../../tech-specs/2026-06-agentic/approval-gate.md) +- Crate docs: [approval-gate/README.md](../../../approval-gate/README.md) + +The harness keeps only the mechanics the gate composes with: + +- the `harness::hook::pre-dispatch` trigger type the gate binds + `approval::gate` to (see + [turn-orchestrator.md § Hook chokepoint](turn-orchestrator.md#hook-chokepoint)), +- `harness::function::resolve` (`execute` releases a held call, `deliver` + answers it without executing), and +- the `harness::turn-completed` trigger type (terminal-turn cleanup). + +Deployment note: the gate is a separate process (like `llm-router`). Start +the harness first — the gate's `harness::hook::pre-dispatch` binding is +best-effort at boot and only retries on gate restart. A deployment without +the gate runs **ungated** (zero pre_dispatch bindings ⇒ every call is +allowed). diff --git a/harness/docs/workers/harness.md b/harness/docs/workers/harness.md index 56478513..76f0713f 100644 --- a/harness/docs/workers/harness.md +++ b/harness/docs/workers/harness.md @@ -60,10 +60,10 @@ From [src/harness/iii.worker.yaml](harness/src/harness/iii.worker.yaml): `iii-stream ^0.11.0`, `iii-bridge ^0.11.0`, `iii-http ^0.11.0`, `iii-sandbox ^0.11.0`, `iii-directory ^0.5.1`. - harness siblings: `turn-orchestrator`, `models-catalog`, - `provider-anthropic`, `provider-openai`, `approval-gate`, `session`, + `provider-anthropic`, `provider-openai`, `session`, `hook-fanout`, `llm-budget` (all `^0.2.0`). - iii engine built-in: `configuration` (the harness `configuration` entry - holds provider credentials/settings + permissions — see + holds provider credentials/settings — see [storage.md](../storage.md)). - Rust workers: `shell ^0.3.0` (for `harness::fs::read_inline`). diff --git a/harness/docs/workers/turn-orchestrator.md b/harness/docs/workers/turn-orchestrator.md index 812cd33c..25f8d1f0 100644 --- a/harness/docs/workers/turn-orchestrator.md +++ b/harness/docs/workers/turn-orchestrator.md @@ -26,10 +26,13 @@ crash-isolation contract: `dispatchWithHook` in [agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) is the single chokepoint every agent-issued function call passes through. -It runs `consultBefore` before forwarding to the target function id. -`consultBefore` triggers `policy::check_permissions` directly (5 s timeout) -and maps the reply to `allow` / `deny` / `pending`. Fail-closed: policy -unreachable → deny with a `gate_unavailable` `DenialEnvelope`. +It consults the `harness::hook::pre-dispatch` chain +([hooks/chain.ts](harness/src/turn-orchestrator/hooks/chain.ts)) before +forwarding to the target function id: each bound hook (e.g. the standalone +approval-gate worker's `approval::gate`) answers `continue` / `deny` / +`hold`. Fail-closed: an unreachable or unparseable hook → deny with a +`gate_unavailable` `DenialEnvelope`. Zero bound hooks → allow (hooks narrow +the dispatch policy, never widen it). ## Registered functions @@ -38,7 +41,8 @@ unreachable → deny with a `gate_unavailable` `DenialEnvelope`. - `turn::provisioning` — FSM step: build system prompt + single `agent_trigger` schema, write enriched `run_request`, advance to `assistant_streaming`. - `turn::assistant_streaming` — FSM step: stream the turn over a provider channel; on completion emit `message_complete`, persist the assistant message (dup-guarded), route to `function_execute` / `steering_check` / `stopped` (via `finishSession`). - `turn::function_execute` — FSM step: own the full function lifecycle via `rec.work`; build batch from `rec.last_assistant`, run each call (skip already-executed and awaiting-approval ids), checkpoint per-call via `writeRecord`; if `pending` → append to `awaiting_approval` and keep dispatching the remaining calls (pending does not block siblings); park to `function_awaiting_approval` when any call awaits approval; finalize results into messages + emit `turn_end` when the batch completes → `steering_check` / `stopped` (via `finishSession`). -- `turn::function_awaiting_approval` — FSM step: on each wake, read decisions for individual `awaiting_approval[]` entries; execute each resolved call immediately (`allow` → dispatch pre-approved; `deny`/`aborted` → synthetic denial); remove resolved entries; stay parked while any remain; when none remain → `finalizeBatch` if complete else `function_execute`. +- `turn::function_awaiting_approval` — FSM step: on each wake, read `function_resolutions` rows for individual `awaiting_approval[]` entries; settle each resolved call immediately (`execute` → dispatch pre-approved; `deliver` → use the delivered content/is_error verbatim); delete consumed rows; remove resolved entries; stay parked while any remain; when none remain → `finalizeBatch` if complete else `function_execute`. +- `harness::function::resolve` — settle one held call from a sibling worker (the approval-gate): `execute` releases it through the normal execution path, `deliver` answers it without executing (user deny, sweep timeout). Persists the decision row and wakes the parked turn directly on the `turn-step` queue. Unknown/already-settled calls → `{resolved: false}`, never an error. - `turn::steering_check` — FSM step: after tool batch or text-only assistant, continue to `assistant_streaming` when `function_results` remain (unless `max_turns`), else `turn_end` → `stopped`. - `turn::get_state` — One-shot reader returning a lean `TurnStateView` (from `schemas.ts:toView`) for a session. UI clients call this on reload to recover in-progress modals (e.g. `function_awaiting_approval`) without reading iii state directly. Returns `null` for unknown sessions. @@ -46,7 +50,12 @@ unreachable → deny with a `gate_unavailable` `DenialEnvelope`. The record-written wake is inline in `saveRecord` (no separate `on-record-written` adapter): every `saveRecord` call that transitions to a non-terminal, non-parking state enqueues `turn::{newState}` on the `turn-step` FIFO. Similarly, `turn_state_changed` events are emitted inline from `persistRecord` inside `TurnStore` — there is no separate `on-turn-state-changed` state trigger. -Paused turns (`function_awaiting_approval`) are woken when `approval::resolve` writes scope `approvals`, which fires `turn::on_approval` (registered in [function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts); see [workers/approval-gate.md](workers/approval-gate.md)). +Paused turns (`function_awaiting_approval`) are woken by `harness::function::resolve` ([function-resolve.ts](harness/src/turn-orchestrator/function-resolve.ts)), which persists the decision and enqueues the wake directly — there is no state trigger. The transition INTO `function_awaiting_approval` also enqueues one post-persist scan wake, so a resolve that raced the park is consumed. + +The worker also registers two custom trigger types sibling workers bind to: + +- `harness::hook::pre-dispatch` ([hooks/registry.ts](harness/src/turn-orchestrator/hooks/registry.ts)) — synchronous pre-dispatch hooks; the standalone approval-gate binds `approval::gate` here. Binding config: `{functions?: string[] (globs), priority?, timeout_ms? (default 5000), on_error?: "fail_closed" | "fail_open"}`. +- `harness::turn-completed` ([turn-completed.ts](harness/src/turn-orchestrator/turn-completed.ts)) — fired once when a turn goes terminal (`{session_id, turn_id, status: completed|cancelled|failed, reason?, timestamp}`); the approval-gate purges its pending records here. Delivery is at-least-once and unordered. ## Turn FSM @@ -59,20 +68,24 @@ The 7 states from [state.ts](harness/src/turn-orchestrator/state.ts): | `provisioning` | [provisioning/process.ts](harness/src/turn-orchestrator/provisioning/process.ts) | Build the system prompt (self-sufficient engine-only preamble), write enriched `run_request` (with `function_schemas: [agentTriggerTool()]`), → `assistant_streaming`. | | `assistant_streaming` | [assistant-streaming/process.ts](harness/src/turn-orchestrator/assistant-streaming/process.ts) | Increment `turn_count`; append the turn's empty assistant entry to session-manager (deterministic `entry_id`, idempotent on re-entry); create channel; trigger provider stream; replace the entry's content via `session::update_message` per coalesced delta batch (each firing `session::message-updated` — the live token surface); on completion call `finalizeAssistantTurn` which lands the final content with a strict update, emits `message_complete` on `agent::events` (stop_reason notice), then routes → `function_execute` (has calls) / `finishing` (no calls) via `finishSession` (error/aborted). | | `function_execute` | [function-execute/process.ts](harness/src/turn-orchestrator/function-execute/process.ts) | Build batch from `rec.last_assistant` (or reuse existing `rec.work`); for each call: emit `function_execution_start`, skip if already executed or awaiting approval, dispatch via `dispatchWithHook`; if `pending` → append to `awaiting_approval` and continue other calls; park to `function_awaiting_approval` when any call awaits; otherwise commit result (silent `writeRecord` checkpoint) + emit `function_execution_end`; after batch: fold results into messages + emit `turn_end` → `steering_check` / `stopped` via `finishSession`. | -| `function_awaiting_approval` | [function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts) | On each wake: for each `awaiting_approval[]` entry with a decision, execute immediately (`allow` → pre-approved dispatch; `deny`/`aborted` → synthetic denial); remove resolved entries; stay parked while any remain; when none remain → `finalizeBatch` if complete else `function_execute`. | +| `function_awaiting_approval` | [function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts) | On each wake: for each `awaiting_approval[]` entry with a `function_resolutions` row, settle immediately (`execute` → pre-approved dispatch; `deliver` → delivered content verbatim); delete consumed rows; remove resolved entries; stay parked while any remain; when none remain → `finalizeBatch` if complete else `function_execute`. | | `steering_check` | [steering-check/process.ts](harness/src/turn-orchestrator/steering-check/process.ts) | `function_results` present → `assistant_streaming` (unless `max_turns` reached); else emit `turn_end` once → `stopped` via `finishSession`. `max_turns` path emits a synthetic `message_complete` + `turn_end`. | | `stopped` | (no handler) | Terminal. Idempotent. Session teardown (`agent_end`) happens inline via `TurnStatePorts.finishSession` before entering this state. | | `failed` | (set by `runTransition` on unexpected throw) | Terminal. Carries `error: {kind, message}` on the record. Emits `message_complete{stop_reason:'error'}` + `agent_end` so the UI sees the reason. A handler may throw `TransientError` to use the queue's retry/DLQ instead. | `NON_STEPABLE_STATES` in [store.ts](harness/src/turn-orchestrator/state-runtime/store.ts) are -`stopped`, `failed`, and `function_awaiting_approval` — `saveRecord` does not -enqueue a handler for these. - -`dispatchWithHook` returns `{ kind: 'result', result }` or `{ kind: 'pending' }`. -Policy denies are returned as `{ kind: 'result' }` with a denied `FunctionResult`. -`pending` triggers the `function_awaiting_approval` park. Multiple calls may -await approval concurrently; each is executed individually as its decision -arrives. +`stopped` and `failed` — `saveRecord` does not enqueue a handler for these. +Entering `function_awaiting_approval` enqueues exactly one scan wake (it +drains any resolve that raced the park); mid-park checkpoints stay silent. + +`dispatchWithHook` returns `{ kind: 'result', result }` or +`{ kind: 'pending', held_by, pending_timeout_ms }`. Hook denies are returned +as `{ kind: 'result' }` with a denied `FunctionResult`. `pending` triggers +the `function_awaiting_approval` park. Multiple calls may await resolution +concurrently; each is settled individually as its `harness::function::resolve` +arrives. Hold timeouts are owned by the hook owner (the approval-gate's +sweep delivers a timeout denial via `function::resolve`); the harness keeps +no expiry of its own. ## State scopes @@ -83,9 +96,10 @@ Session-scoped iii state uses semantic scopes from | Scope | Key | Purpose | |---|---|---| -| `turn_state` | `` | Serialised `TurnStateRecord` (incl. `work?: TurnWork` and `error?: {kind, message}`). | +| `turn_state` | `` | Serialised `TurnStateRecord` (incl. `turn_id`, `work?: TurnWork` and `error?: {kind, message}`). | | `run_request` | `` | The `run::start` payload enriched by `provisioning` to include `function_schemas: [agentTriggerTool()]` and the assembled `system_prompt`. Typed as `RunRequest` ([run-request.ts](harness/src/turn-orchestrator/run-request.ts)). | | `event_counter` | `` | Monotonic counter for `agent::events` sequence numbers. | +| `function_resolutions` | `/` | Pending `harness::function::resolve` decisions for parked calls. Deleted by the awaiting-approval wake after consumption (and best-effort on `run::abort`) — this scope never accumulates. | Conversation history lives in the external session-manager worker (`session::*`) only. `TurnStore.loadMessages` reconstructs the provider-facing window from one paginated `session::messages { include_custom: true }` read (the latest `custom_type: "compaction"` entry on the path supplies the summary + tail boundary; empty assistant placeholders are filtered out) — see [context-view.ts](harness/src/turn-orchestrator/state-runtime/context-view.ts) and the shared client in [runtime/session.ts](harness/src/runtime/session.ts). `appendMessages` writes via `session::append`, chaining each message onto the active leaf with a deterministic `entry_id` (`-user-` for run::start seeds, `fr-` for function results, `-t-assistant` for the per-turn assistant entry) — session-manager's idempotency on `entry_id` is what makes redelivered queue steps safe; there is no separate reconcile/repair path. @@ -111,13 +125,18 @@ persist that goes through the full save path. It carries a lean record, so heavy internal fields (`work`, `last_assistant`) are never sent to consumers. -## Approval chokepoint +## Hook chokepoint -Unchanged from prior design: `dispatchWithHook` → `consultBefore` → -`policy::check_permissions` (5 s timeout, fail-closed). A `needs_approval` -reply returns `{ kind: 'pending' }` from `dispatchWithHook`, which parks the -session to `function_awaiting_approval`. `approval::resolve` writes the -decision to scope `approvals`, which fires `turn::on_approval` to enqueue `turn::function_awaiting_approval` on the `turn-step` queue. +`dispatchWithHook` → `consultPreDispatch` → each `harness::hook::pre-dispatch` +binding in priority order (per-binding `timeout_ms`, fail-closed). The +approval policy itself (permission modes, allow-lists, yaml rules) lives in +the standalone approval-gate worker — see +[tech-specs/2026-06-agentic/approval-gate.md](../../../tech-specs/2026-06-agentic/approval-gate.md). +A `hold` reply returns `{ kind: 'pending' }` from `dispatchWithHook`, which +parks the session to `function_awaiting_approval`. The hook owner later calls +`harness::function::resolve` (`execute` to release, `deliver` to answer +without executing), which persists a `function_resolutions` row and enqueues +`turn::function_awaiting_approval` on the `turn-step` queue. ## Configuration @@ -140,16 +159,18 @@ From | File | Purpose | |---|---| | [src/turn-orchestrator/main.ts](harness/src/turn-orchestrator/main.ts) | Binary entry point. | -| [src/turn-orchestrator/register.ts](harness/src/turn-orchestrator/register.ts) | Composes all registered functions: `run::start`, per-state `turn::{state}` handlers, `turn::on_approval`, `turn::get_state`. | +| [src/turn-orchestrator/register.ts](harness/src/turn-orchestrator/register.ts) | Composes all registered functions and trigger types: `run::start`, per-state `turn::{state}` handlers, `harness::function::resolve`, `turn::get_state`, `harness::hook::pre-dispatch`, `harness::turn-completed`. | | [src/turn-orchestrator/run-start.ts](harness/src/turn-orchestrator/run-start.ts) | `run::start` handler — persists run config and messages, seeds `turn_state` to `provisioning` via `saveRecord` (which wakes the FSM). | | [src/turn-orchestrator/run-transition.ts](harness/src/turn-orchestrator/run-transition.ts) | Shared FSM transition runner: load → null-check → stale-skip → handle → save. Routes to `failed` on unexpected throw; re-throws `TransientError` for queue retry. | | [src/turn-orchestrator/state-runtime/store.ts](harness/src/turn-orchestrator/state-runtime/store.ts) | `TurnStore` / `createTurnStore` — agent-scope load/save, `shouldWakeStep`, inline FIFO enqueue from `saveRecord`. | | [src/turn-orchestrator/run-request.ts](harness/src/turn-orchestrator/run-request.ts) | `RunRequest` type and `parseRunRequest` — the typed, parsed form of scope `run_request` (includes `function_schemas`). | | [src/turn-orchestrator/get-state.ts](harness/src/turn-orchestrator/get-state.ts) | `turn::get_state` — one-shot reader returning `TurnStateView \| null`. | | [src/turn-orchestrator/agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) | Dispatcher chokepoint: `dispatchWithHook` (consult + trigger), `triggerFunctionCall` (trigger/decode/error), `agentTriggerTool` (schema), `unwrapAgentTrigger`. | -| [src/turn-orchestrator/hook.ts](harness/src/turn-orchestrator/hook.ts) | `consultBefore` — `policy::check_permissions` (5 s, fail-closed) → `allow` / `pending` / `deny`. | -| [src/turn-orchestrator/function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts) | `turn::function_awaiting_approval` FSM step + `turn::on_approval` state trigger on scope `approvals`. | -| [src/turn-orchestrator/schemas.ts](harness/src/turn-orchestrator/schemas.ts) | All registered-function I/O schemas and types: `RunStartPayloadSchema`, `TurnStepPayloadSchema`, `TurnStateView`, `toView`, `ApprovalDecisionEventSchema`. | +| [src/turn-orchestrator/hooks/](harness/src/turn-orchestrator/hooks/) | The `harness::hook::pre-dispatch` surface: `registry.ts` (trigger type + subscriber set), `chain.ts` (`consultPreDispatch`, fail-closed), `types.ts` (HookInput/HookOutput/BindingConfig), `denial.ts` (`DenialEnvelope`, `gateUnavailableEnvelope`, `denialResult`). | +| [src/turn-orchestrator/function-resolve.ts](harness/src/turn-orchestrator/function-resolve.ts) | `harness::function::resolve` + the `function_resolutions` scope helpers (`readResolution`, `deleteResolution`, `enqueueAwaitingApprovalWake`). | +| [src/turn-orchestrator/turn-completed.ts](harness/src/turn-orchestrator/turn-completed.ts) | `harness::turn-completed` trigger type, `emitTurnCompleted`, `terminalStatus`. | +| [src/turn-orchestrator/function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts) | `turn::function_awaiting_approval` FSM step. | +| [src/turn-orchestrator/schemas.ts](harness/src/turn-orchestrator/schemas.ts) | All registered-function I/O schemas and types: `RunStartPayloadSchema`, `TurnStepPayloadSchema`, `TurnStateView`, `toView`, `FunctionResolvePayloadSchema`. | | [src/turn-orchestrator/state-runtime/ports.ts](harness/src/turn-orchestrator/state-runtime/ports.ts) | `TurnStatePorts` / `createTurnStatePorts` — shared dependency ports for per-state handlers (incl. `finishSession`). | | [src/turn-orchestrator/provisioning/process.ts](harness/src/turn-orchestrator/provisioning/process.ts) | `turn::provisioning` handler and provisioning pipeline. | | [src/turn-orchestrator/assistant-streaming/process.ts](harness/src/turn-orchestrator/assistant-streaming/process.ts) | `turn::assistant_streaming` handler and stream orchestration. | diff --git a/harness/package.json b/harness/package.json index 47e0fbba..4db503d4 100644 --- a/harness/package.json +++ b/harness/package.json @@ -2,7 +2,7 @@ "name": "harness", "version": "0.5.7", "private": true, - "description": "Node port of the iii harness stack: harness, approval-gate, turn-orchestrator, llm-budget, and side-cars. Conversations persist in the external session-manager worker; LLM providers are standalone llm-router plugin workers.", + "description": "Node port of the iii harness stack: harness, turn-orchestrator, llm-budget, and side-cars. Conversations persist in the external session-manager worker; LLM providers are standalone llm-router plugin workers; approval policy is the standalone approval-gate worker.", "license": "Apache-2.0", "type": "module", "engines": { @@ -22,7 +22,6 @@ "start:all": "node dist/index.js", "dev:all": "tsx --watch src/index.ts", "dev:harness": "tsx src/harness/main.ts", - "dev:approval-gate": "tsx src/approval-gate/main.ts", "dev:turn-orchestrator": "tsx src/turn-orchestrator/main.ts", "dev:llm-budget": "tsx src/llm-budget/main.ts", "dev:hook-fanout": "tsx src/hook-fanout/main.ts", @@ -32,7 +31,6 @@ "bin": { "harness": "./dist/index.js", "iii-harness": "./dist/harness/main.js", - "iii-approval-gate": "./dist/approval-gate/main.js", "iii-turn-orchestrator": "./dist/turn-orchestrator/main.js", "iii-llm-budget": "./dist/llm-budget/main.js", "iii-hook-fanout": "./dist/hook-fanout/main.js", diff --git a/harness/src/approval-gate/denial.ts b/harness/src/approval-gate/denial.ts deleted file mode 100644 index d4a609bd..00000000 --- a/harness/src/approval-gate/denial.ts +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Denial envelope assembly. The recursive redaction tree lives in - * `./redact.ts`; this module only composes the two wire envelopes - * (permissions-side and user-side) and the human-readable deny reason. - */ - -import { redact } from './redact.js'; -import { DENIAL_SCHEMA_VERSION, type DenialEnvelope, type MatchedConstraint } from './schemas.js'; - -export function reasonForPermissionsDeny( - function_id: string, - rule_id: string, - matched: MatchedConstraint | null, -): string { - if (matched) { - return `Permission denied: ${function_id} matched rule ${rule_id} on ${matched.field} ${matched.operator} ${JSON.stringify(matched.value)}. Try different arguments or use a different function.`; - } - return `Permission denied: ${function_id} matched rule ${rule_id}. This function is blocked by policy; try a different function.`; -} - -export function permissionsDenyEnvelope( - function_id: string, - rule_id: string, - matched_constraint: MatchedConstraint | null, - args: unknown, -): DenialEnvelope { - return { - schema_version: DENIAL_SCHEMA_VERSION, - status: 'denied', - denied_by: 'permissions', - function_id, - rule_id, - rule_action: 'deny', - matched_constraint: matched_constraint ?? undefined, - args_excerpt: redact(args), - reason: reasonForPermissionsDeny(function_id, rule_id, matched_constraint), - }; -} - -export function userDenyEnvelope( - function_id: string, - reason: string | null, - args: unknown, -): DenialEnvelope { - return { - schema_version: DENIAL_SCHEMA_VERSION, - status: 'denied', - denied_by: 'user', - function_id, - args_excerpt: redact(args), - reason: reason ?? 'Rejected by operator.', - }; -} diff --git a/harness/src/approval-gate/iii.worker.yaml b/harness/src/approval-gate/iii.worker.yaml deleted file mode 100644 index 0e54c698..00000000 --- a/harness/src/approval-gate/iii.worker.yaml +++ /dev/null @@ -1,14 +0,0 @@ -iii: v1 -name: approval-gate -language: node -deploy: binary -manifest: package.json -bin: iii-approval-gate -description: Registers approval::resolve; persists human decisions to the approvals scope (turn-orchestrator reacts via turn::on_approval). - -runtime: - kind: node - -scripts: - install: pnpm install - start: node ./dist/approval-gate/main.js --config ./config.yaml diff --git a/harness/src/approval-gate/main.ts b/harness/src/approval-gate/main.ts deleted file mode 100644 index 82d4e070..00000000 --- a/harness/src/approval-gate/main.ts +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env node -import { bootstrapWorker } from '../runtime/worker.js'; -import { register } from './resolve.js'; -import { initDefaultMode } from './settings/default-mode.js'; -import { registerSettingsHandlers } from './settings/register.js'; - -await bootstrapWorker({ - name: 'approval-gate', - description: - 'Registers approval::resolve, per-session settings handlers, and routes decisions to the orchestrator.', - register: async (iii) => { - await register(iii); - registerSettingsHandlers(iii); - await initDefaultMode(iii); - }, -}); diff --git a/harness/src/approval-gate/redact.ts b/harness/src/approval-gate/redact.ts deleted file mode 100644 index a10fbe6c..00000000 --- a/harness/src/approval-gate/redact.ts +++ /dev/null @@ -1,72 +0,0 @@ -/** - * Recursive argument redaction for denial envelopes. Pure and immutable: - * never mutates the input. Lives inside the approval-gate worker (not in - * a shared utility) on purpose — keep the worker boundary tight. - */ - -const ARGS_EXCERPT_LEN_CAP = 256; - -/** - * Hard ceiling on how deep we recurse. Args are caller-influenced, so an - * adversarial payload (deeply nested or self-referential) must not overflow - * the stack — past this depth the subtree collapses to a sentinel. - */ -const MAX_REDACT_DEPTH = 64; -const MAX_DEPTH_SENTINEL = ''; - -const REDACT_KEYS = new Set([ - 'password', - 'token', - 'api_key', - 'apikey', - 'secret', - 'auth', - 'authorization', - 'access_key', - 'access_token', - 'refresh_token', - 'private_key', -]); - -function isSecretKey(key: string): boolean { - const lower = key.toLowerCase(); - if (REDACT_KEYS.has(lower)) return true; - for (const suffix of REDACT_KEYS) { - if (lower.endsWith(`_${suffix}`)) return true; - } - return false; -} - -/** - * Truncate by Unicode code point so surrogate pairs aren't sliced. The cap - * is measured in code points too — using UTF-16 `.length` here would falsely - * clip (and ellipsize) astral-heavy strings that are within the cap. - */ -export function clip(s: string): string { - const points = [...s]; - if (points.length <= ARGS_EXCERPT_LEN_CAP) return s; - return `${points.slice(0, ARGS_EXCERPT_LEN_CAP).join('')}…`; -} - -/** - * Walk the value tree, redacting secret-keyed string values and clipping - * long strings. Returns a brand-new tree; the input is never mutated. - * Recursion is depth-capped (see MAX_REDACT_DEPTH) so hostile input can't - * overflow the stack; this also defuses circular references. - */ -export function redact(value: unknown): unknown { - return redactAt(value, 0); -} - -function redactAt(value: unknown, depth: number): unknown { - if (typeof value === 'string') return clip(value); - if (value === null || typeof value !== 'object') return value; - if (depth >= MAX_REDACT_DEPTH) return MAX_DEPTH_SENTINEL; - if (Array.isArray(value)) return value.map((v) => redactAt(v, depth + 1)); - return Object.fromEntries( - Object.entries(value).map(([k, v]) => [ - k, - isSecretKey(k) ? '' : redactAt(v, depth + 1), - ]), - ); -} diff --git a/harness/src/approval-gate/resolve.ts b/harness/src/approval-gate/resolve.ts deleted file mode 100644 index fb4f3a8b..00000000 --- a/harness/src/approval-gate/resolve.ts +++ /dev/null @@ -1,52 +0,0 @@ -/** - * Approval resolution handler. `approval::resolve` persists the decision to the - * shared `approvals` scope; the turn-orchestrator's reactive trigger - * (turn::on_approval) wakes the parked session. - */ - -import type { ISdk } from 'iii-sdk'; -import { logger } from '../runtime/otel.js'; -import { - STATE_SCOPE, - type ResolvePayloadInput, - ResolvePayloadSchema, - pendingKey, - resolveFunctionOptions, -} from './schemas.js'; - -export type ResolveResult = - | { ok: true } - | { ok: false; error: 'invalid_payload' | 'resume_failed' }; - -export async function handleResolveRequest( - iii: ISdk, - payload: ResolvePayloadInput, -): Promise { - const parsed = ResolvePayloadSchema.safeParse(payload); - if (!parsed.success) return { ok: false, error: 'invalid_payload' }; - - const { session_id, function_call_id, decision, reason } = parsed.data; - - try { - await iii.trigger({ - function_id: 'state::set', - payload: { - scope: STATE_SCOPE, - key: pendingKey(session_id, function_call_id), - value: { decision, reason }, - }, - }); - } catch (err) { - logger.error('approval-gate: decision write failed', { err: String(err), session_id }); - return { ok: false, error: 'resume_failed' }; - } - return { ok: true }; -} - -export async function register(iii: ISdk): Promise { - iii.registerFunction( - 'approval::resolve', - async (payload: ResolvePayloadInput) => handleResolveRequest(iii, payload), - resolveFunctionOptions, - ); -} diff --git a/harness/src/approval-gate/schemas.ts b/harness/src/approval-gate/schemas.ts deleted file mode 100644 index 2e1d5c5a..00000000 --- a/harness/src/approval-gate/schemas.ts +++ /dev/null @@ -1,143 +0,0 @@ -/** - * Wire schemas + inferred types for the approval gate. Zod validates - * ingress payloads; exported types cover denial envelopes and shared - * orchestrator contracts. - */ - -import type { RegisterFunctionOptions } from 'iii-sdk'; -import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; - -export const STATE_SCOPE = 'approvals'; -export const SETTINGS_STATE_SCOPE = 'approval_settings'; -export const DENIAL_SCHEMA_VERSION = 1; - -export const PermissionModeSchema = z.enum(['manual', 'auto', 'full']); -export type PermissionMode = z.infer; - -const allowEntrySchema = z.object({ - function_id: z.string().min(1), - granted_at: z.number().int().nonnegative(), - granted_by: z.literal('user_click'), -}); -export type AlwaysAllowEntry = z.infer; - -export const ApprovalSettingsSchema = z.object({ - mode: PermissionModeSchema, - always_allow: z.array(allowEntrySchema), - approved_always: z.array(allowEntrySchema).default([]), - mode_set_at: z.number().int().nonnegative(), -}); -export type ApprovalSettings = z.infer; - -export const DEFAULT_APPROVAL_SETTINGS: ApprovalSettings = { - mode: 'manual', - always_allow: [], - approved_always: [], - mode_set_at: 0, -}; - -const wireDecisionSchema = z.enum(['allow', 'deny', 'aborted']); - -const deniedBySchema = z.enum(['permissions', 'user', 'gate_unavailable']); - -const matchedConstraintSchema = z.object({ - field: z.string(), - operator: z.string(), - value: z.unknown(), -}); -export type MatchedConstraint = z.infer; - -const denialEnvelopeSchema = z.object({ - schema_version: z.literal(DENIAL_SCHEMA_VERSION), - status: z.literal('denied'), - denied_by: deniedBySchema, - function_id: z.string(), - rule_id: z.string().optional(), - rule_action: z.literal('deny').optional(), - matched_constraint: matchedConstraintSchema.optional(), - args_excerpt: z.unknown().optional(), - reason: z.string(), -}); -export type DenialEnvelope = z.infer; - -/** - * Wire payload for `approval::resolve`. Rejects "/" in ids at the boundary — - * it is the reserved separator in the state key. - */ -export const ResolvePayloadSchema = z - .object({ - session_id: z.string().min(1), - function_call_id: z.string().min(1), - decision: wireDecisionSchema, - reason: z.string().nullable().optional(), - }) - .superRefine((v, ctx) => { - if (v.session_id.includes('/')) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['session_id'], - message: 'session_id must not contain "/"', - }); - } - if (v.function_call_id.includes('/')) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['function_call_id'], - message: 'function_call_id must not contain "/"', - }); - } - }) - .transform((v) => ({ - session_id: v.session_id, - function_call_id: v.function_call_id, - decision: v.decision, - reason: v.reason ?? null, - })); -export type ResolvePayloadInput = z.input; - -const policyReplySchema = z.discriminatedUnion('decision', [ - z.object({ - decision: z.literal('allow'), - rule_id: z.string().optional().default(''), - }), - z.object({ - decision: z.literal('deny'), - rule_id: z.string().optional().default(''), - matched_constraint: matchedConstraintSchema.nullable().default(null), - }), -]); - -type PolicyReply = z.infer; -type PolicyOutcome = PolicyReply | { decision: 'needs_approval' }; - -/** Decode `policy::check_permissions` reply; unknown shapes → `needs_approval`. */ -export function parsePolicyReply(value: unknown): PolicyOutcome { - const parsed = policyReplySchema.safeParse(value); - return parsed.success ? parsed.data : { decision: 'needs_approval' }; -} - -/** State key in scope `approvals`: `/`. */ -export function pendingKey(session_id: string, function_call_id: string): string { - if (session_id.includes('/')) throw new Error('session_id must not contain "/"'); - if (function_call_id.includes('/')) { - throw new Error('function_call_id must not contain "/"'); - } - return `${session_id}/${function_call_id}`; -} - -const approvalDecisionSchema = z.enum(['allow', 'deny', 'aborted']); - -export const ApprovalDecisionSchema = z.object({ - decision: approvalDecisionSchema, - reason: z.string().nullable(), -}); - -/** @deprecated Use ApprovalDecisionSchema */ -export const ApprovalResumePayloadSchema = ApprovalDecisionSchema; - -export const resolveFunctionOptions = { - description: - 'Flip an approval to allow or deny. Persists the decision to the approvals scope to wake the parked turn.', - request_format: zodToJsonSchema(ResolvePayloadSchema, { name: 'ResolvePayload' }), -} as RegisterFunctionOptions; diff --git a/harness/src/approval-gate/settings/add-always-allow.ts b/harness/src/approval-gate/settings/add-always-allow.ts deleted file mode 100644 index 0dccb65a..00000000 --- a/harness/src/approval-gate/settings/add-always-allow.ts +++ /dev/null @@ -1,54 +0,0 @@ -import type { RegisterFunctionOptions, RemoteFunctionHandler } from 'iii-sdk'; -import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; -import type { ApprovalSettings } from '../schemas.js'; -import type { ISdk } from '../../runtime/iii.js'; -import { type MutationReply, functionIdField, sessionIdField } from './types.js'; -import { mutationError, ok } from './reply.js'; -import { readSettings, updateSettings } from './store.js'; - -const PayloadSchema = z.object({ - session_id: sessionIdField, - function_id: functionIdField, -}); - -const options = { - description: 'Append a function id to the per-session always-allow list (idempotent).', - request_format: zodToJsonSchema(PayloadSchema, { name: 'AlwaysAllowPayload' }), -} as RegisterFunctionOptions; - -export async function addAlwaysAllow( - iii: ISdk, - session_id: string, - function_id: string, -): Promise { - const current = await readSettings(iii, session_id); - if (current.always_allow.some((entry) => entry.function_id === function_id)) { - return current; - } - // Known race: the pre-read only narrows the duplicate-entry window; concurrent - // adds of the same id can both append. Harmless — matched/removed set-wise. - return updateSettings(iii, session_id, [ - { - type: 'append', - path: 'always_allow', - value: { function_id, granted_at: Date.now(), granted_by: 'user_click' }, - }, - ]); -} - -export function registerAddAlwaysAllow(iii: ISdk): void { - const handler: RemoteFunctionHandler = async ( - payload, - ) => { - const parsed = PayloadSchema.safeParse(payload); - if (!parsed.success) return mutationError(parsed.error.message); - try { - return ok(await addAlwaysAllow(iii, parsed.data.session_id, parsed.data.function_id)); - } catch (err) { - return mutationError(err); - } - }; - - iii.registerFunction('approval::add_always_allow', handler, options); -} diff --git a/harness/src/approval-gate/settings/approve-always.ts b/harness/src/approval-gate/settings/approve-always.ts deleted file mode 100644 index f09de5ee..00000000 --- a/harness/src/approval-gate/settings/approve-always.ts +++ /dev/null @@ -1,55 +0,0 @@ -import type { RegisterFunctionOptions, RemoteFunctionHandler } from 'iii-sdk'; -import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; -import type { ApprovalSettings } from '../schemas.js'; -import type { ISdk } from '../../runtime/iii.js'; -import { type MutationReply, functionIdField, sessionIdField } from './types.js'; -import { mutationError, ok } from './reply.js'; -import { readSettings, updateSettings } from './store.js'; - -const PayloadSchema = z.object({ - session_id: sessionIdField, - function_id: functionIdField, -}); - -const options = { - description: - 'Remember an "approve always" decision for this session — the function id stops prompting in every mode. Idempotent.', - request_format: zodToJsonSchema(PayloadSchema, { name: 'ApproveAlwaysPayload' }), -} as RegisterFunctionOptions; - -/** Idempotent on `function_id`; separate from auto-mode `always_allow`. */ -export async function approveAlways( - iii: ISdk, - session_id: string, - function_id: string, -): Promise { - const current = await readSettings(iii, session_id); - if (current.approved_always.some((entry) => entry.function_id === function_id)) { - return current; - } - // Known race: see addAlwaysAllow — pre-read only narrows the duplicate window. - return updateSettings(iii, session_id, [ - { - type: 'append', - path: 'approved_always', - value: { function_id, granted_at: Date.now(), granted_by: 'user_click' }, - }, - ]); -} - -export function registerApproveAlways(iii: ISdk): void { - const handler: RemoteFunctionHandler = async ( - payload, - ) => { - const parsed = PayloadSchema.safeParse(payload); - if (!parsed.success) return mutationError(parsed.error.message); - try { - return ok(await approveAlways(iii, parsed.data.session_id, parsed.data.function_id)); - } catch (err) { - return mutationError(err); - } - }; - - iii.registerFunction('approval::approve_always', handler, options); -} diff --git a/harness/src/approval-gate/settings/clear-settings.ts b/harness/src/approval-gate/settings/clear-settings.ts deleted file mode 100644 index e6f1fa60..00000000 --- a/harness/src/approval-gate/settings/clear-settings.ts +++ /dev/null @@ -1,31 +0,0 @@ -import type { RegisterFunctionOptions, RemoteFunctionHandler } from 'iii-sdk'; -import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; -import type { ISdk } from '../../runtime/iii.js'; -import { mutationError } from './reply.js'; -import { clearSettings } from './store.js'; -import { type MutationReply, sessionIdField } from './types.js'; - -const PayloadSchema = z.object({ - session_id: sessionIdField, -}); - -const options = { - description: 'Clear the per-session approval settings on conversation deletion.', - request_format: zodToJsonSchema(PayloadSchema, { name: 'ClearSettingsPayload' }), -} as RegisterFunctionOptions; - -export function registerClearSettings(iii: ISdk): void { - const handler: RemoteFunctionHandler = async (payload) => { - const parsed = PayloadSchema.safeParse(payload); - if (!parsed.success) return mutationError(parsed.error.message); - try { - await clearSettings(iii, parsed.data.session_id); - return { ok: true }; - } catch (err) { - return mutationError(err); - } - }; - - iii.registerFunction('approval::clear_settings', handler, options); -} diff --git a/harness/src/approval-gate/settings/default-mode.ts b/harness/src/approval-gate/settings/default-mode.ts deleted file mode 100644 index d60ad363..00000000 --- a/harness/src/approval-gate/settings/default-mode.ts +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Default approval mode, sourced from the `harness` configuration entry's - * `permissions.default_mode`. New sessions (no stored approval settings) - * inherit this mode; sessions with explicit settings keep their own. - * - * Seeded at startup and kept live via a `configuration` trigger so an - * operator editing the harness entry (manual | auto | full) takes effect - * without a restart. Tolerant of a missing configuration worker — the - * built-in 'manual' default stands. - */ - -import { - HARNESS_CONFIG_ID, - type PermissionMode, - readHarnessConfig, -} from '../../runtime/harness-config.js'; -import type { ISdk } from '../../runtime/iii.js'; -import { logger } from '../../runtime/otel.js'; - -let defaultMode: PermissionMode = 'manual'; - -export function getDefaultMode(): PermissionMode { - return defaultMode; -} - -export async function initDefaultMode(iii: ISdk): Promise { - await refresh(iii); - try { - iii.registerFunction('approval::on_harness_config', async () => { - await refresh(iii); - return null; - }); - iii.registerTrigger({ - type: 'configuration', - function_id: 'approval::on_harness_config', - config: { configuration_id: HARNESS_CONFIG_ID }, - }); - } catch (err) { - logger.warn('approval-gate: could not bind harness-config trigger', { err: String(err) }); - } -} - -async function refresh(iii: ISdk): Promise { - try { - const cfg = await readHarnessConfig(iii); - defaultMode = cfg.permissions.default_mode; - } catch (err) { - logger.warn('approval-gate: default-mode refresh failed', { err: String(err) }); - } -} diff --git a/harness/src/approval-gate/settings/get-settings.ts b/harness/src/approval-gate/settings/get-settings.ts deleted file mode 100644 index 46952b98..00000000 --- a/harness/src/approval-gate/settings/get-settings.ts +++ /dev/null @@ -1,24 +0,0 @@ -import type { RegisterFunctionOptions, RemoteFunctionHandler } from 'iii-sdk'; -import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; -import type { ISdk } from '../../runtime/iii.js'; -import { readSettings } from './store.js'; -import { sessionIdField, type GetSettingsReply } from './types.js'; - -const PayloadSchema = z.object({ - session_id: sessionIdField, -}); - -const options = { - description: 'Read the per-session approval settings; returns defaults when none persisted.', - request_format: zodToJsonSchema(PayloadSchema, { name: 'GetSettingsPayload' }), -} as RegisterFunctionOptions; - -export function registerGetSettings(iii: ISdk): void { - const handler: RemoteFunctionHandler = async (payload) => { - const parsed = PayloadSchema.parse(payload); - return readSettings(iii, parsed.session_id); - }; - - iii.registerFunction('approval::get_settings', handler, options); -} diff --git a/harness/src/approval-gate/settings/human-only.ts b/harness/src/approval-gate/settings/human-only.ts deleted file mode 100644 index 8deb9004..00000000 --- a/harness/src/approval-gate/settings/human-only.ts +++ /dev/null @@ -1,14 +0,0 @@ -const HUMAN_ONLY_FUNCTION_IDS = [ - 'approval::set_mode', - 'approval::add_always_allow', - 'approval::remove_always_allow', - 'approval::approve_always', - 'approval::get_settings', - 'approval::clear_settings', -] as const; - -export type HumanOnlyFunctionId = (typeof HUMAN_ONLY_FUNCTION_IDS)[number]; - -export function isHumanOnlyApprovalFunction(function_id: string): boolean { - return (HUMAN_ONLY_FUNCTION_IDS as readonly string[]).includes(function_id); -} diff --git a/harness/src/approval-gate/settings/register.ts b/harness/src/approval-gate/settings/register.ts deleted file mode 100644 index 26cc7026..00000000 --- a/harness/src/approval-gate/settings/register.ts +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Per-session approval settings: permission mode (manual / auto / full), - * the auto-mode `always_allow` list, and the per-session - * `approved_always` grants ("approve always" decisions made from a - * prompt, honored in every mode). - * - * Settings live in shared state under scope `approval_settings`, keyed by - * ``. `policy::check_permissions` reads a snapshot at the - * start of each check; mutations made during a check do not affect that - * check. The handlers registered here are the only writers — every entry - * carries `granted_by: 'user_click'` so audit can distinguish user grants - * from any future programmatic source. - * - * The handlers are intentionally usable only from the frontend RPC path. - * Agent-issued calls funnel through `consultBefore`, which carries a - * hardcoded deny for these function ids; user-initiated SDK calls - * (e.g. `approval::set_mode` from `console/web`) bypass the hook and - * land here directly. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import { registerAddAlwaysAllow } from './add-always-allow.js'; -import { registerApproveAlways } from './approve-always.js'; -import { registerClearSettings } from './clear-settings.js'; -import { registerGetSettings } from './get-settings.js'; -import { registerRemoveAlwaysAllow } from './remove-always-allow.js'; -import { registerSetMode } from './set-mode.js'; - -export function registerSettingsHandlers(iii: ISdk): void { - registerSetMode(iii); - registerAddAlwaysAllow(iii); - registerRemoveAlwaysAllow(iii); - registerApproveAlways(iii); - registerGetSettings(iii); - registerClearSettings(iii); -} diff --git a/harness/src/approval-gate/settings/remove-always-allow.ts b/harness/src/approval-gate/settings/remove-always-allow.ts deleted file mode 100644 index 24a26609..00000000 --- a/harness/src/approval-gate/settings/remove-always-allow.ts +++ /dev/null @@ -1,48 +0,0 @@ -import type { RegisterFunctionOptions, RemoteFunctionHandler } from 'iii-sdk'; -import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; -import type { ApprovalSettings } from '../schemas.js'; -import type { ISdk } from '../../runtime/iii.js'; -import { type MutationReply, functionIdField, sessionIdField } from './types.js'; -import { mutationError, ok } from './reply.js'; -import { readSettings, updateSettings } from './store.js'; - -const PayloadSchema = z.object({ - session_id: sessionIdField, - function_id: functionIdField, -}); - -const options = { - description: 'Remove a function id from the per-session always-allow list.', - request_format: zodToJsonSchema(PayloadSchema, { name: 'RemoveAlwaysAllowPayload' }), -} as RegisterFunctionOptions; - -export async function removeAlwaysAllow( - iii: ISdk, - session_id: string, - function_id: string, -): Promise { - const current = await readSettings(iii, session_id); - const always_allow = current.always_allow.filter((entry) => entry.function_id !== function_id); - // No array-element-remove op, so set just the always_allow field (not the whole - // record). Known race: concurrent add/remove on this field is last-writer-wins. - return updateSettings(iii, session_id, [ - { type: 'set', path: 'always_allow', value: always_allow }, - ]); -} - -export function registerRemoveAlwaysAllow(iii: ISdk): void { - const handler: RemoteFunctionHandler = async ( - payload, - ) => { - const parsed = PayloadSchema.safeParse(payload); - if (!parsed.success) return mutationError(parsed.error.message); - try { - return ok(await removeAlwaysAllow(iii, parsed.data.session_id, parsed.data.function_id)); - } catch (err) { - return mutationError(err); - } - }; - - iii.registerFunction('approval::remove_always_allow', handler, options); -} diff --git a/harness/src/approval-gate/settings/reply.ts b/harness/src/approval-gate/settings/reply.ts deleted file mode 100644 index 45aac581..00000000 --- a/harness/src/approval-gate/settings/reply.ts +++ /dev/null @@ -1,9 +0,0 @@ -import type { MutationReply } from './types.js'; - -export function ok(value: T): T { - return value; -} - -export function mutationError(err: unknown): MutationReply { - return { ok: false, error: String(err) }; -} diff --git a/harness/src/approval-gate/settings/set-mode.ts b/harness/src/approval-gate/settings/set-mode.ts deleted file mode 100644 index b4f61d4c..00000000 --- a/harness/src/approval-gate/settings/set-mode.ts +++ /dev/null @@ -1,46 +0,0 @@ -import type { RegisterFunctionOptions, RemoteFunctionHandler } from 'iii-sdk'; -import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; -import { PermissionModeSchema, type ApprovalSettings, type PermissionMode } from '../schemas.js'; -import type { ISdk } from '../../runtime/iii.js'; -import { mutationError, ok } from './reply.js'; -import { updateSettings } from './store.js'; -import { type MutationReply, sessionIdField } from './types.js'; - -const PayloadSchema = z.object({ - session_id: sessionIdField, - mode: PermissionModeSchema, -}); - -const options = { - description: 'Set the permission mode (manual | auto | full) for a session.', - request_format: zodToJsonSchema(PayloadSchema, { name: 'SetModePayload' }), -} as RegisterFunctionOptions; - -export async function setMode( - iii: ISdk, - session_id: string, - mode: PermissionMode, -): Promise { - // Field-scoped, no read: disjoint from add_always_allow/approve_always so both survive. - return updateSettings(iii, session_id, [ - { type: 'set', path: 'mode', value: mode }, - { type: 'set', path: 'mode_set_at', value: Date.now() }, - ]); -} - -export function registerSetMode(iii: ISdk): void { - const handler: RemoteFunctionHandler = async ( - payload, - ) => { - const parsed = PayloadSchema.safeParse(payload); - if (!parsed.success) return mutationError(parsed.error.message); - try { - return ok(await setMode(iii, parsed.data.session_id, parsed.data.mode)); - } catch (err) { - return mutationError(err); - } - }; - - iii.registerFunction('approval::set_mode', handler, options); -} diff --git a/harness/src/approval-gate/settings/store.ts b/harness/src/approval-gate/settings/store.ts deleted file mode 100644 index b7d6d582..00000000 --- a/harness/src/approval-gate/settings/store.ts +++ /dev/null @@ -1,68 +0,0 @@ -import { - ApprovalSettingsSchema, - DEFAULT_APPROVAL_SETTINGS, - SETTINGS_STATE_SCOPE, - type ApprovalSettings, -} from '../schemas.js'; -import type { ISdk } from '../../runtime/iii.js'; -import { createState, type UpdateOp } from '../../runtime/state.js'; -import { getDefaultMode } from './default-mode.js'; - -// Reads degrade to defaults on failure (tolerant); writes rethrow so handlers -// can reply with mutationError (strict). -const tolerantState = (iii: ISdk) => createState(iii, { tolerant: true }); -const strictState = (iii: ISdk) => createState(iii, { tolerant: false }); - -/** Default settings; `mode` follows the operator-configured permissions.default_mode. */ -function defaultSettings(): ApprovalSettings { - return { ...DEFAULT_APPROVAL_SETTINGS, mode: getDefaultMode() }; -} - -/** - * Backfill missing fields from defaults, then validate. Field-scoped writes can - * persist a partial record (the first add_always_allow stores only - * `{ always_allow }`); merging over defaults keeps it valid on read. - */ -export function parseSettings(raw: unknown): ApprovalSettings { - const base = defaultSettings(); - const merged = - raw && typeof raw === 'object' && !Array.isArray(raw) - ? { ...base, ...(raw as Record) } - : base; - const parsed = ApprovalSettingsSchema.safeParse(merged); - return parsed.success ? parsed.data : base; -} - -export async function readSettings(iii: ISdk, session_id: string): Promise { - const raw = await tolerantState(iii).get({ - scope: SETTINGS_STATE_SCOPE, - key: session_id, - }); - return parseSettings(raw); -} - -/** - * Apply field-scoped ops atomically. Each op targets one field, so concurrent - * mutations of different fields compose under the engine's per-key write-lock - * instead of clobbering the whole record (the old read/write-whole lost update). - */ -export async function updateSettings( - iii: ISdk, - session_id: string, - ops: UpdateOp[], -): Promise { - const result = await strictState(iii).update({ - scope: SETTINGS_STATE_SCOPE, - key: session_id, - ops, - }); - if (result?.errors && result.errors.length > 0) { - throw new Error(`approval-settings update rejected: ${JSON.stringify(result.errors)}`); - } - return parseSettings(result?.new_value ?? null); -} - -export async function clearSettings(iii: ISdk, session_id: string): Promise { - // Delete the key rather than write a null tombstone; reads default either way. - await strictState(iii).delete({ scope: SETTINGS_STATE_SCOPE, key: session_id }); -} diff --git a/harness/src/approval-gate/settings/types.ts b/harness/src/approval-gate/settings/types.ts deleted file mode 100644 index f0cc442c..00000000 --- a/harness/src/approval-gate/settings/types.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { z } from 'zod'; -import type { ApprovalSettings } from '../schemas.js'; - -export type { ApprovalSettings, PermissionMode } from '../schemas.js'; - -export type GetSettingsReply = ApprovalSettings; -export type MutationReply = { ok: true } | { ok: false; error: string }; - -export const sessionIdField = z - .string() - .min(1) - .refine((v) => !v.includes('/'), 'session_id must not contain "/"'); - -export const functionIdField = z.string().min(1); diff --git a/harness/src/harness/iii.worker.yaml b/harness/src/harness/iii.worker.yaml index a7488825..559c989b 100644 --- a/harness/src/harness/iii.worker.yaml +++ b/harness/src/harness/iii.worker.yaml @@ -24,7 +24,6 @@ dependencies: turn-orchestrator: "^0.2.0" llm-router: "^0.1.0" shell: "^0.3.0" - approval-gate: "^0.2.0" session: "^0.2.0" hook-fanout: "^0.2.0" configuration: "^0.11.0" diff --git a/harness/src/harness/permissions-config.ts b/harness/src/harness/permissions-config.ts deleted file mode 100644 index ed392bff..00000000 --- a/harness/src/harness/permissions-config.ts +++ /dev/null @@ -1,71 +0,0 @@ -/** - * Registers the `harness` configuration entry — permissions only. Provider - * credentials/settings moved to the `llm-router` entry, whose schema the - * router composes from provider declarations; this entry keeps the agent - * permissions block (and nothing else) editable in the console. - * - * Root `additionalProperties` stays true so a stale `providers` block left - * behind by the pre-router layout never fails value validation; the migration - * (`migrate-llm-router-config.ts`) copies it, it just stops being editable. - */ - -import { - configurationGet, - configurationRegister, - type JsonValue, -} from '../runtime/configuration.js'; -import { - DEFAULT_PERMISSION_MODE, - HARNESS_CONFIG_ID, - PERMISSION_MODES, -} from '../runtime/harness-config.js'; -import type { ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; - -const ENTRY_DESCRIPTION = 'Agent permissions, managed by the harness.'; - -export async function registerHarnessConfigEntry(iii: ISdk): Promise { - const schema = { - $schema: 'http://json-schema.org/draft-07/schema#', - type: 'object', - title: HARNESS_CONFIG_ID, - properties: { - permissions: { - type: 'object', - title: 'permissions', - properties: { - default_mode: { - type: 'string', - title: 'default mode', - description: 'Default approval mode applied to new agent sessions.', - enum: [...PERMISSION_MODES], - default: DEFAULT_PERMISSION_MODE, - }, - }, - required: ['default_mode'], - additionalProperties: false, - }, - }, - required: ['permissions'], - additionalProperties: true, - }; - - try { - // Read the stored template form so re-registration preserves any - // operator-set value; only seed on the very first registration. - const existing = await configurationGet(iii, HARNESS_CONFIG_ID, { raw: true }); - await configurationRegister(iii, { - id: HARNESS_CONFIG_ID, - name: HARNESS_CONFIG_ID, - description: ENTRY_DESCRIPTION, - schema, - ...(existing === null && { - initial_value: { - permissions: { default_mode: DEFAULT_PERMISSION_MODE }, - } as unknown as JsonValue, - }), - }); - } catch (err) { - logger.warn('harness: configuration::register failed', { err: String(err) }); - } -} diff --git a/harness/src/harness/register.ts b/harness/src/harness/register.ts index 5015257f..f0c1c520 100644 --- a/harness/src/harness/register.ts +++ b/harness/src/harness/register.ts @@ -5,7 +5,6 @@ import { loadHarnessConfig } from './config.js'; import { spawnPumps } from './fanout/index.js'; import { register as registerFs } from './fs.js'; import { migrateLlmRouterConfig } from './migrate-llm-router-config.js'; -import { registerHarnessConfigEntry } from './permissions-config.js'; import { registerPolicy } from './policy/check-permissions.js'; import { loadAndWatch } from './policy/handle.js'; import { FanoutState, registerSubscriptions } from './ui-subscribe.js'; @@ -21,10 +20,6 @@ export async function register(iii: ISdk, ctx: { configPath: string; url: string spawnPumps(iii, fanoutState); registerFs(iii, ctx.url); - // Provider credentials/settings live in the router-owned `llm-router` - // entry; the `harness` entry keeps only the permissions block. Paste-a-key - // reactivity is the router's configuration trigger now. - await registerHarnessConfigEntry(iii); // One-time copy of any pre-router provider config into the llm-router // entry (internal retries — the router composes that entry's schema only // after the first provider registers, so this must not block boot). diff --git a/harness/src/index.ts b/harness/src/index.ts index 2abd37d1..5cfa8788 100644 --- a/harness/src/index.ts +++ b/harness/src/index.ts @@ -8,9 +8,6 @@ */ import { Command } from 'commander'; -import { register as registerApprovalGate } from './approval-gate/resolve.js'; -import { initDefaultMode } from './approval-gate/settings/default-mode.js'; -import { registerSettingsHandlers as registerApprovalSettings } from './approval-gate/settings/register.js'; import { register as registerContextCompaction } from './context-compaction/register.js'; import { register as registerHarness } from './harness/register.js'; import { register as registerHookFanout } from './hook-fanout/register.js'; @@ -40,16 +37,6 @@ const WORKERS: readonly WorkerDefinition[] = [ 'Durable run::start state machine driving each agent turn through provisioning, assistant, and function-execute.', register: (iii, ctx) => registerTurnOrchestrator(iii, ctx), }, - { - name: 'approval-gate', - description: - 'Registers approval::resolve and the per-session mode/allow-list settings handlers; persists human decisions to the approvals and approval_settings scopes.', - register: async (iii) => { - await registerApprovalGate(iii); - registerApprovalSettings(iii); - await initDefaultMode(iii); - }, - }, { name: 'hook-fanout', description: diff --git a/harness/src/runtime/harness-config.ts b/harness/src/runtime/harness-config.ts deleted file mode 100644 index d6f3367a..00000000 --- a/harness/src/runtime/harness-config.ts +++ /dev/null @@ -1,64 +0,0 @@ -/** - * Shape of the single `harness` configuration entry that lives in the - * built-in `configuration` worker — the agent `permissions` block only. - * Provider credentials/settings moved to the `llm-router` entry, whose - * schema the router composes from provider declarations - * (see `harness/migrate-llm-router-config.ts` for the one-time copy). - * - * Value shape: - * - * { - * "permissions": { "default_mode": "manual" | "auto" | "full" } - * } - * - * A stale `providers` block from the pre-router layout may still be present - * in stored values; it is ignored here and tolerated by the entry schema. - */ - -import { configurationGet, type JsonValue } from './configuration.js'; -import type { ISdk } from './iii.js'; - -/** The id of the harness configuration entry. */ -export const HARNESS_CONFIG_ID = 'harness'; - -export const PERMISSION_MODES = ['manual', 'auto', 'full'] as const; -export type PermissionMode = (typeof PERMISSION_MODES)[number]; - -export type HarnessPermissions = { - default_mode: PermissionMode; -}; - -export type HarnessConfigValue = { - permissions: HarnessPermissions; -}; - -export const DEFAULT_PERMISSION_MODE: PermissionMode = 'manual'; - -function isPermissionMode(v: unknown): v is PermissionMode { - return v === 'manual' || v === 'auto' || v === 'full'; -} - -/** Coerce an arbitrary JSON value into a well-formed `HarnessConfigValue`. */ -export function normalizeHarnessConfig(value: JsonValue | null): HarnessConfigValue { - const base: HarnessConfigValue = { - permissions: { default_mode: DEFAULT_PERMISSION_MODE }, - }; - if (!value || typeof value !== 'object' || Array.isArray(value)) return base; - const obj = value as Record; - - const permissions = obj.permissions; - if (permissions && typeof permissions === 'object' && !Array.isArray(permissions)) { - const mode = (permissions as Record).default_mode; - if (isPermissionMode(mode)) base.permissions.default_mode = mode; - } - return base; -} - -/** - * Read the `harness` entry value, env-expanded and normalized. Tolerant: - * returns the base value when the entry is missing or the worker is down. - */ -export async function readHarnessConfig(iii: ISdk): Promise { - const value = await configurationGet(iii, HARNESS_CONFIG_ID, { raw: false }); - return normalizeHarnessConfig(value); -} diff --git a/harness/src/runtime/iii.ts b/harness/src/runtime/iii.ts index ae630d86..ab5b0cea 100644 --- a/harness/src/runtime/iii.ts +++ b/harness/src/runtime/iii.ts @@ -17,6 +17,9 @@ export type { RegisterFunctionInput, RegisterFunctionOptions, RegisterTriggerInput, + RegisterTriggerTypeInput, RemoteFunctionHandler, EnqueueResult, + TriggerConfig, + TriggerHandler, } from 'iii-sdk'; diff --git a/harness/src/runtime/stream.ts b/harness/src/runtime/stream.ts index 1d7c2e56..abd425b6 100644 --- a/harness/src/runtime/stream.ts +++ b/harness/src/runtime/stream.ts @@ -1,7 +1,6 @@ /** * `stream::set` wrapper used to emit AgentEvent / hook reply / arbitrary - * stream frames. Mirrors `turn-orchestrator/src/events.rs` and the - * approval-gate stream writers. + * stream frames. Mirrors `turn-orchestrator/src/events.rs`. */ import type { ISdk } from 'iii-sdk'; diff --git a/harness/src/turn-orchestrator/agent-trigger.ts b/harness/src/turn-orchestrator/agent-trigger.ts index b66a4274..4a1e1807 100644 --- a/harness/src/turn-orchestrator/agent-trigger.ts +++ b/harness/src/turn-orchestrator/agent-trigger.ts @@ -1,21 +1,34 @@ /** - * Agent function-call dispatcher + approval chokepoint. + * Agent function-call dispatcher + hook chokepoint. * * `dispatchWithHook` is the single chokepoint for FSM-issued calls: every - * agent function call goes through `consultBefore` before reaching the inner - * trigger. `triggerFunctionCall` is the shared trigger/decode/error path - * used by both the hook gate and pre-approved resume execution. + * agent function call consults the `harness::hook::pre-dispatch` chain + * before reaching the inner trigger. `triggerFunctionCall` is the shared + * trigger/decode/error path used by both the hook gate and released + * (pre-approved) resume execution. */ import { IIIInvocationError, type ISdk } from '../runtime/iii.js'; import { z } from 'zod'; import type { ContentBlock } from '../types/content.js'; import type { FunctionCall, FunctionResult } from '../types/function.js'; -import { type DenialEnvelope, consultBefore, gateUnavailableEnvelope } from './hook.js'; +import { consultPreDispatch } from './hooks/chain.js'; +import { type DenialEnvelope, denialResult, gateUnavailableEnvelope } from './hooks/denial.js'; +import type { HookInput } from './hooks/types.js'; export const TOOL_NAME = 'agent_trigger'; -export type DispatchResult = { kind: 'result'; result: FunctionResult } | { kind: 'pending' }; +/** Turn/session identity threaded from the FSM record into the hook chain. */ +export type DispatchContext = { + session_id: string; + turn_id: string; + step?: number; + metadata?: Record; +}; + +export type DispatchResult = + | { kind: 'result'; result: FunctionResult } + | { kind: 'pending'; held_by: string; pending_timeout_ms: number }; export function missingFunctionResult(): FunctionResult { return errorResult({ @@ -88,14 +101,6 @@ function errorResult(envelope: Record): FunctionResult { return { content: [text], details: envelope, terminate: false }; } -function denialResult(denial: DenialEnvelope): FunctionResult { - return { - content: [{ type: 'text', text: denial.reason }], - details: denial, - terminate: false, - }; -} - function decodeOrPassthrough(value: unknown): FunctionResult { if ( value && @@ -344,26 +349,44 @@ export async function triggerFunctionCall( /** * `function_call` carries the routing envelope (session id + call identity) that - * the approval hook and policy worker read. `targetCall` is what actually + * the pre_dispatch hooks and policy worker read. `targetCall` is what actually * reaches the target function. They differ on purpose: the envelope spreads * routing fields (including `function_id` = the TARGET id) at the top level, * which would otherwise CLOBBER a same-named argument in the caller's payload — * e.g. `engine::functions::info { function_id: "sandbox::create" }` would arrive * as `function_id: "engine::functions::info"` and the engine would describe - * itself. The hook needs the envelope; the target must get the caller's + * itself. The hooks need the envelope; the target must get the caller's * untouched arguments. Defaults to `function_call` for callers that don't split. */ export async function dispatchWithHook( iii: ISdk, + ctx: DispatchContext, function_call: FunctionCall, targetCall: FunctionCall = function_call, ): Promise { - const outcome = await consultBefore(iii, function_call); + const input: HookInput = { + point: 'pre_dispatch', + session_id: ctx.session_id, + turn_id: ctx.turn_id, + step: ctx.step, + depth: 0, + metadata: ctx.metadata, + call: { + id: function_call.id, + function_id: function_call.function_id, + arguments: function_call.arguments, + }, + }; + const outcome = await consultPreDispatch(iii, input); if (outcome.kind === 'deny') { return { kind: 'result', result: denialResult(outcome.denial) }; } - if (outcome.kind === 'pending') { - return { kind: 'pending' }; + if (outcome.kind === 'hold') { + return { + kind: 'pending', + held_by: outcome.held_by, + pending_timeout_ms: outcome.pending_timeout_ms, + }; } const result = await triggerFunctionCall(iii, targetCall); diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/ports.ts b/harness/src/turn-orchestrator/function-awaiting-approval/ports.ts index 350d3877..8d751571 100644 --- a/harness/src/turn-orchestrator/function-awaiting-approval/ports.ts +++ b/harness/src/turn-orchestrator/function-awaiting-approval/ports.ts @@ -1,31 +1,24 @@ /** - * Typed dependency ports and domain types for function_awaiting_approval. + * Typed dependency ports for function_awaiting_approval. Decisions arrive + * via `harness::function::resolve` rows in the `function_resolutions` + * scope; the wake consumes (reads, applies, deletes) them. */ -import { ApprovalDecisionSchema, STATE_SCOPE } from '../../approval-gate/schemas.js'; import type { ISdk } from '../../runtime/iii.js'; -import type { z } from 'zod'; -export type ApprovalDecision = z.infer; - -/** Decode stored approval decision from `state::get` (scope `approvals`). */ -export function parseApprovalDecision(value: unknown): ApprovalDecision | null { - const parsed = ApprovalDecisionSchema.safeParse(value); - return parsed.success ? parsed.data : null; -} +import { deleteResolution, readResolution, type FunctionResolution } from '../function-resolve.js'; export type AwaitingApprovalPorts = { - readDecision(session_id: string, function_call_id: string): Promise; + readDecision(session_id: string, function_call_id: string): Promise; + deleteDecision(session_id: string, function_call_id: string): Promise; }; export function createAwaitingApprovalPorts(iii: ISdk): AwaitingApprovalPorts { return { - async readDecision(session_id, function_call_id) { - const key = `${session_id}/${function_call_id}`; - const raw = await iii.trigger({ - function_id: 'state::get', - payload: { scope: STATE_SCOPE, key }, - }); - return parseApprovalDecision(raw); + readDecision(session_id, function_call_id) { + return readResolution(iii, session_id, function_call_id); + }, + deleteDecision(session_id, function_call_id) { + return deleteResolution(iii, session_id, function_call_id); }, }; } diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/process.ts b/harness/src/turn-orchestrator/function-awaiting-approval/process.ts index a71fe278..5b1173fc 100644 --- a/harness/src/turn-orchestrator/function-awaiting-approval/process.ts +++ b/harness/src/turn-orchestrator/function-awaiting-approval/process.ts @@ -1,44 +1,23 @@ /** - * Read approval decisions, execute resolved calls individually, and register the FSM step. + * Apply pending `function_resolutions` rows to parked calls and register + * the FSM step. Wakes arrive directly from `harness::function::resolve` + * (no state trigger): the resolve handler enqueues + * `turn::function_awaiting_approval` after persisting the decision. */ -import { TriggerAction, type ISdk } from '../../runtime/iii.js'; -import { logger } from '../../runtime/otel.js'; +import type { ISdk } from '../../runtime/iii.js'; import { createPorts } from '../function-execute/ports.js'; +import { enqueueAwaitingApprovalWake } from '../function-resolve.js'; import { runTransition } from '../run-transition.js'; import { - ApprovalDecisionEventSchema, TurnStepPayloadSchema, parseFunctionBatchRecord, type TurnStepPayload, } from '../schemas.js'; -import { TURN_STEP_QUEUE } from '../state-runtime/store.js'; import type { TurnStateRecord } from '../state.js'; import { createAwaitingApprovalPorts } from './ports.js'; import { processResolvedApprovals, routeAfterApprovalProcessing } from './run.js'; -/** Enqueue one `turn::function_awaiting_approval` wake on the turn-step queue. */ -async function enqueueAwaitingApprovalWake(iii: ISdk, session_id: string): Promise { - await iii.trigger({ - function_id: 'turn::function_awaiting_approval', - payload: { session_id }, - action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), - }); -} - -export async function handleApprovalStateWrite(iii: ISdk, event: unknown): Promise { - const parsed = ApprovalDecisionEventSchema.safeParse(event); - if (!parsed.success) return; - try { - await enqueueAwaitingApprovalWake(iii, parsed.data.session_id); - } catch (err) { - logger.warn('turn::on_approval: wake failed', { - session_id: parsed.data.session_id, - err: String(err), - }); - } -} - export async function handleAwaitingApproval(iii: ISdk, rec: TurnStateRecord): Promise { const batch = parseFunctionBatchRecord(rec); const executePorts = createPorts(iii); @@ -68,22 +47,7 @@ export function register(iii: ISdk): void { }, { description: - 'Run one durable FSM transition for session in state function_awaiting_approval: execute each call as its approval decision arrives.', - }, - ); - - iii.registerFunction( - 'turn::on_approval', - async (event: unknown) => handleApprovalStateWrite(iii, event), - { - description: - 'State trigger on scope=approvals; enqueues turn::function_awaiting_approval when a decision is written.', + 'Run one durable FSM transition for session in state function_awaiting_approval: execute each call as its resolution arrives via harness::function::resolve.', }, ); - - iii.registerTrigger({ - type: 'state', - function_id: 'turn::on_approval', - config: { scope: 'approvals' }, - }); } diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/run.ts b/harness/src/turn-orchestrator/function-awaiting-approval/run.ts index 316192b7..0699cd3b 100644 --- a/harness/src/turn-orchestrator/function-awaiting-approval/run.ts +++ b/harness/src/turn-orchestrator/function-awaiting-approval/run.ts @@ -1,58 +1,45 @@ /** - * Resolve approval decisions and route the batch after each decision. + * Consume `function_resolutions` rows for parked calls and route the + * batch after each settled call. */ -import { text } from '../../types/content.js'; -import type { FunctionResult } from '../../types/function.js'; -import { finalizeBatch, runOneCall } from '../function-execute/run.js'; +import { finalizeBatch, dispatchContext, runOneCall } from '../function-execute/run.js'; import type { FunctionExecutePorts } from '../function-execute/ports.js'; import type { PreparedCall } from '../function-execute/types.js'; import { isBatchComplete } from '../function-execute/types.js'; +import type { FunctionResolution } from '../function-resolve.js'; import { transitionTo, type FunctionBatchTurnRecord } from '../state.js'; -import type { ApprovalDecision, AwaitingApprovalPorts } from './ports.js'; - -export function denialResultFromDecision(decision: ApprovalDecision): FunctionResult { - const reason = - decision.reason ?? (decision.decision === 'aborted' ? 'session_aborted' : 'denied'); - const message = - decision.decision === 'aborted' - ? `Function call aborted: ${reason}` - : `Permission denied by user: ${reason}`; - return { - content: [text(message)], - details: { - approval_denied: true, - decision: decision.decision, - reason, - }, - terminate: decision.decision === 'aborted', - }; -} +import type { AwaitingApprovalPorts } from './ports.js'; export function applyDecisionToPrepared( current: PreparedCall, - decision: ApprovalDecision, + decision: FunctionResolution, ): PreparedCall { - if (decision.decision === 'allow') { + if (decision.action === 'execute') { return { route: 'pre_approved', call: current.call }; } return { route: 'synthetic', call: current.call, - result: denialResultFromDecision(decision), + result: { + content: decision.content, + details: decision.details ?? {}, + terminate: false, + }, + is_error: decision.is_error, }; } /** - * Apply every available approval decision to the parked batch, returning how - * many calls this wake executed. + * Apply every available resolution to the parked batch, returning how + * many calls this wake settled. * - * Re-scans until a full pass resolves nothing new: executing an approved call - * can write a sibling's decision as a side effect (a parallel approve-all), and - * that sibling — already read-and-skipped earlier in the same pass — would - * otherwise stay parked until its own wake fires. When that wake was dropped - * (it lost the lease race and exhausted the queue's retries), only a re-scan - * within this wake can drain it. + * Re-scans until a full pass resolves nothing new: executing a released + * call can settle a sibling as a side effect, and that sibling — already + * read-and-skipped earlier in the same pass — would otherwise stay parked + * until its own wake fires. When that wake was dropped (it lost the lease + * race and exhausted the queue's retries), only a re-scan within this + * wake can drain it. */ export async function processResolvedApprovals( readPorts: AwaitingApprovalPorts, @@ -62,6 +49,7 @@ export async function processResolvedApprovals( const work = rec.work; let awaiting = [...rec.awaiting_approval]; const executed = { ...work.executed }; + const ctx = dispatchContext(rec); let resolvedCount = 0; let resolvedThisPass = true; @@ -73,6 +61,8 @@ export async function processResolvedApprovals( if (executed[callId]) { awaiting = awaiting.filter((e) => e.function_call_id !== callId); + // Crash-between-execute-and-delete replays land here. + await readPorts.deleteDecision(rec.session_id, callId); continue; } @@ -83,11 +73,12 @@ export async function processResolvedApprovals( if (!current) continue; const resolved = applyDecisionToPrepared(current, decision); - await runOneCall(executePorts, rec.session_id, resolved, executed, { skipStart: true }); + await runOneCall(executePorts, ctx, resolved, executed, { skipStart: true }); awaiting = awaiting.filter((e) => e.function_call_id !== callId); rec.work = { prepared: work.prepared, executed }; await executePorts.checkpoint(rec); + await readPorts.deleteDecision(rec.session_id, callId); resolvedCount += 1; resolvedThisPass = true; } diff --git a/harness/src/turn-orchestrator/function-execute/ports.ts b/harness/src/turn-orchestrator/function-execute/ports.ts index 45673ad8..4280adca 100644 --- a/harness/src/turn-orchestrator/function-execute/ports.ts +++ b/harness/src/turn-orchestrator/function-execute/ports.ts @@ -3,7 +3,7 @@ */ import { z } from 'zod'; -import type { DispatchResult } from '../agent-trigger.js'; +import type { DispatchContext, DispatchResult } from '../agent-trigger.js'; import { dispatchWithHook, triggerFunctionCall } from '../agent-trigger.js'; import { emit } from '../events.js'; import type { ISdk } from '../../runtime/iii.js'; @@ -53,7 +53,7 @@ export type FunctionExecutePorts = TurnStatePorts & { emitEnd(session_id: string, executed: ExecutedCall): Promise; /** Generic agent-event emit, for the inline max_turns end (message_complete). */ emit(session_id: string, event: AgentEvent): Promise; - dispatch(call: FunctionCall, session_id: string): Promise; + dispatch(call: FunctionCall, ctx: DispatchContext): Promise; triggerPreApproved(call: FunctionCall): Promise; }; @@ -92,8 +92,8 @@ export function createPorts(iii: ISdk): FunctionExecutePorts { return emit(iii, session_id, event); }, - async dispatch(call, session_id) { - return dispatchWithHook(iii, withRoutingEnvelope(call, session_id), call); + async dispatch(call, ctx) { + return dispatchWithHook(iii, ctx, withRoutingEnvelope(call, ctx.session_id), call); }, async triggerPreApproved(call) { diff --git a/harness/src/turn-orchestrator/function-execute/process.ts b/harness/src/turn-orchestrator/function-execute/process.ts index dbec00d2..c6c7f8b5 100644 --- a/harness/src/turn-orchestrator/function-execute/process.ts +++ b/harness/src/turn-orchestrator/function-execute/process.ts @@ -29,6 +29,10 @@ export async function handleExecute(iii: ISdk, rec: TurnStateRecord): Promise { switch (prepared.route) { case 'synthetic': - return { kind: 'resolved', result: prepared.result, is_error: true }; + return { kind: 'resolved', result: prepared.result, is_error: prepared.is_error ?? true }; case 'pre_approved': { const result = await ports.triggerPreApproved(prepared.call); return { kind: 'resolved', result, is_error: isErrorResult(result) }; } case 'dispatch': { - const out = await ports.dispatch(prepared.call, session_id); + const out = await ports.dispatch(prepared.call, ctx); if (out.kind === 'pending') { return { kind: 'pending' }; } @@ -101,12 +112,13 @@ export type RunOneCallOptions = { export async function runOneCall( ports: FunctionExecutePorts, - session_id: string, + ctx: DispatchContext, prepared: PreparedCall, executed: Record, opts?: RunOneCallOptions, ): Promise { const call: FunctionCall = prepared.call; + const { session_id } = ctx; const prior = executed[call.id]; if (prior) { @@ -119,7 +131,7 @@ export async function runOneCall( } const startedAt = Date.now(); - const resolved = await resolvePreparedCall(ports, prepared, session_id); + const resolved = await resolvePreparedCall(ports, prepared, ctx); if (resolved.kind === 'pending') { return { kind: 'pending', call }; } @@ -143,13 +155,14 @@ export async function runBatch( const executed = { ...rec.work.executed }; const awaitingIds = new Set(rec.awaiting_approval.map((entry) => entry.function_call_id)); const newPending: PendingApproval[] = []; + const ctx = dispatchContext(rec); for (const item of prepared) { const callId = preparedCallId(item); if (executed[callId]) continue; if (awaitingIds.has(callId)) continue; - const outcome = await runOneCall(ports, rec.session_id, item, executed); + const outcome = await runOneCall(ports, ctx, item, executed); if (outcome.kind === 'pending') { newPending.push({ diff --git a/harness/src/turn-orchestrator/function-execute/types.ts b/harness/src/turn-orchestrator/function-execute/types.ts index e6b67143..10079331 100644 --- a/harness/src/turn-orchestrator/function-execute/types.ts +++ b/harness/src/turn-orchestrator/function-execute/types.ts @@ -8,7 +8,12 @@ import type { FunctionCall, FunctionResult } from '../../types/function.js'; export type PreparedCall = | { route: 'dispatch'; call: FunctionCall } | { route: 'pre_approved'; call: FunctionCall } - | { route: 'synthetic'; call: FunctionCall; result: FunctionResult }; + /** + * Pre-rendered result, never dispatched. `is_error` defaults to true + * (denials, missing-function shims); a delivered non-error result + * (`harness::function::resolve` action "deliver") sets it explicitly. + */ + | { route: 'synthetic'; call: FunctionCall; result: FunctionResult; is_error?: boolean }; export type ExecutedCall = { call: FunctionCall; diff --git a/harness/src/turn-orchestrator/function-resolve.ts b/harness/src/turn-orchestrator/function-resolve.ts new file mode 100644 index 00000000..289f01f3 --- /dev/null +++ b/harness/src/turn-orchestrator/function-resolve.ts @@ -0,0 +1,165 @@ +/** + * `harness::function::resolve` — the release/deliver surface for held + * function calls. The approval-gate worker (or any pre_dispatch hook + * owner) calls it to settle a call its hook parked: + * + * - `action: "execute"` — the call was approved: the awaiting-approval + * wake re-runs it through the normal execution path (released calls + * do not re-consult the hook chain). + * - `action: "deliver"` — answer the call with the given + * content/details/is_error WITHOUT executing (user deny, sweep + * timeout). + * + * The decision is persisted to the `function_resolutions` scope and the + * parked turn is woken directly on the turn-step queue. Rows are deleted + * by the wake after consumption — this scope never accumulates. + * + * Unknown `{session_id, function_call_id}` (or a stale `turn_id`) returns + * `{resolved: false}`, never an error: duplicate decisions and sweep + * races are benign by contract. + */ + +import { TriggerAction, type ISdk } from '../runtime/iii.js'; +import { logger } from '../runtime/otel.js'; +import { stateDelete, stateGet, stateSet } from '../runtime/state.js'; +import type { ContentBlock } from '../types/content.js'; +import { + FunctionResolvePayloadSchema, + type FunctionResolvePayload, + type FunctionResolveResult, +} from './schemas.js'; +import { createTurnStore, TURN_STEP_QUEUE } from './state-runtime/store.js'; +import { FUNCTION_BATCH_STATES, type TurnStateRecord } from './state.js'; + +/** Pending decisions for parked calls; key `/`. */ +export const FUNCTION_RESOLUTIONS_SCOPE = 'function_resolutions'; + +export function resolutionKey(session_id: string, function_call_id: string): string { + return `${session_id}/${function_call_id}`; +} + +export type FunctionResolution = + | { turn_id: string; action: 'execute'; resolved_at: number } + | { + turn_id: string; + action: 'deliver'; + is_error: boolean; + content: ContentBlock[]; + details?: unknown; + resolved_at: number; + }; + +export async function readResolution( + iii: ISdk, + session_id: string, + function_call_id: string, +): Promise { + const raw = await stateGet( + iii, + FUNCTION_RESOLUTIONS_SCOPE, + resolutionKey(session_id, function_call_id), + ); + if (!raw || typeof raw !== 'object') return null; + if (raw.action !== 'execute' && raw.action !== 'deliver') return null; + return raw; +} + +export async function deleteResolution( + iii: ISdk, + session_id: string, + function_call_id: string, +): Promise { + await stateDelete(iii, FUNCTION_RESOLUTIONS_SCOPE, resolutionKey(session_id, function_call_id)); +} + +function toResolution(payload: FunctionResolvePayload): FunctionResolution { + if (payload.action === 'execute') { + return { turn_id: payload.turn_id, action: 'execute', resolved_at: Date.now() }; + } + return { + turn_id: payload.turn_id, + action: 'deliver', + is_error: payload.is_error ?? false, + content: (payload.content ?? []) as ContentBlock[], + details: payload.details, + resolved_at: Date.now(), + }; +} + +/** Enqueue one `turn::function_awaiting_approval` wake on the turn-step queue. */ +export async function enqueueAwaitingApprovalWake(iii: ISdk, session_id: string): Promise { + try { + await iii.trigger({ + function_id: 'turn::function_awaiting_approval', + payload: { session_id }, + action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), + }); + return true; + } catch (err) { + logger.error('function::resolve wake failed; turn stays parked until another resolve', { + session_id, + err: String(err), + }); + return false; + } +} + +/** True when the record currently holds this call awaiting a decision. */ +function isHeld(rec: TurnStateRecord, payload: FunctionResolvePayload): boolean { + if (rec.turn_id !== payload.turn_id) return false; + if (!(FUNCTION_BATCH_STATES as readonly string[]).includes(rec.state)) return false; + const awaiting = rec.awaiting_approval ?? []; + if (!awaiting.some((entry) => entry.function_call_id === payload.function_call_id)) return false; + // The call must exist in the batch work. A record missing `work` (or the + // prepared entry) is structurally invalid for a function-batch state — + // resolving against it would write a row the wake can never consume. + const prepared = rec.work?.prepared ?? []; + if (!prepared.some((item) => item.call.id === payload.function_call_id)) return false; + if (rec.work?.executed?.[payload.function_call_id]) return false; + return true; +} + +export async function handleFunctionResolve( + iii: ISdk, + rawPayload: unknown, +): Promise { + const payload = FunctionResolvePayloadSchema.parse(rawPayload); + + // Strict read: a transient state outage must surface as an ERROR (the + // caller retries; the gate keeps its pending record), never as + // {resolved: false} — the gate treats that as a benign no-op and + // deletes its record, which would silently lose the hold. + const store = createTurnStore(iii); + const rec = await store.loadRecordStrict(payload.session_id); + if (!rec || !isHeld(rec, payload)) { + return { resolved: false }; + } + + const key = resolutionKey(payload.session_id, payload.function_call_id); + const existing = await readResolution(iii, payload.session_id, payload.function_call_id); + if (!existing) { + // First decision wins for SEQUENTIAL duplicates (re-deliveries). Two + // truly concurrent resolves can both pass this read and last-write-wins + // on the row — acceptable: both are settled human decisions for the + // same call, exactly one is consumed, and execution is never doubled + // (the wake's `executed` guard + per-session lease own that). + await stateSet(iii, FUNCTION_RESOLUTIONS_SCOPE, key, toResolution(payload)); + } + + const turn_resumed = await enqueueAwaitingApprovalWake(iii, payload.session_id); + return { resolved: true, turn_resumed }; +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'harness::function::resolve', + async (payload: unknown) => handleFunctionResolve(iii, payload), + { + description: + 'Settle one held function call: action "execute" releases it through the normal ' + + 'execution path; action "deliver" answers it with the given content/is_error without ' + + 'executing. Returns {resolved: false} for unknown or already-settled calls. ' + + '`turn_resumed` means a wake was enqueued, not that the turn already advanced.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/hook.ts b/harness/src/turn-orchestrator/hook.ts deleted file mode 100644 index 4f888bd7..00000000 --- a/harness/src/turn-orchestrator/hook.ts +++ /dev/null @@ -1,139 +0,0 @@ -/** - * Approval consultation. Resolves the approval verdict for one agent - * function call. Evaluation order (race-safe via a single settings - * snapshot per call): - * - * 1. Deny if the agent is trying to invoke a human-only approval - * function (`approval::set_mode`, `approval::add_always_allow`, - * etc.) — self-escalation defense. - * 2. Snapshot per-session approval settings. - * 3. `mode === 'full'` → allow. - * 4. `mode === 'auto'` and `function_id` is in the user's curated - * `always_allow` list → allow. In any other mode the list is - * dormant — the user can build it up but it only takes effect - * under Auto. - * 5. Fall through to `policy::check_permissions` (yaml rules) — the - * pre-existing behavior. - * - * Fail-closed on transport errors: unreachable policy → deny with - * `gate_unavailable`. - */ - -import { permissionsDenyEnvelope } from '../approval-gate/denial.js'; -import { DENIAL_SCHEMA_VERSION, type DenialEnvelope } from '../approval-gate/schemas.js'; -import { isHumanOnlyApprovalFunction } from '../approval-gate/settings/human-only.js'; -import { readSettings } from '../approval-gate/settings/store.js'; -import type { ApprovalSettings } from '../approval-gate/settings/types.js'; -import type { - CheckPermissionsPayload, - PolicyCheckReply, -} from '../harness/policy/check-permissions.js'; -import type { ISdk } from '../runtime/iii.js'; -export type { DenialEnvelope } from '../approval-gate/schemas.js'; -import { logger } from '../runtime/otel.js'; -import type { FunctionCall } from '../types/function.js'; - -/** Fail-closed budget for the synchronous policy consult before a call. */ -export const POLICY_TIMEOUT_MS = 5_000; - -export type HookOutcome = - | { kind: 'allow' } - | { kind: 'pending' } - | { kind: 'deny'; denial: DenialEnvelope }; - -export function gateUnavailableEnvelope(function_id: string, reason: string): DenialEnvelope { - return { - schema_version: DENIAL_SCHEMA_VERSION, - status: 'denied', - denied_by: 'gate_unavailable', - function_id, - reason, - }; -} - -function humanOnlyDenial(function_id: string, args: unknown): DenialEnvelope { - return permissionsDenyEnvelope(function_id, 'human_only_function', null, args); -} - -function extractSessionId(args: unknown): string | null { - if (args && typeof args === 'object' && !Array.isArray(args)) { - const candidate = (args as Record).session_id; - if (typeof candidate === 'string' && candidate.length > 0) return candidate; - } - return null; -} - -function isAlwaysAllowed(settings: ApprovalSettings, function_id: string): boolean { - return settings.always_allow.some( - (entry: ApprovalSettings['always_allow'][number]) => entry.function_id === function_id, - ); -} - -function isApprovedAlways(settings: ApprovalSettings, function_id: string): boolean { - return settings.approved_always.some( - (entry: ApprovalSettings['approved_always'][number]) => entry.function_id === function_id, - ); -} - -export async function consultBefore(iii: ISdk, function_call: FunctionCall): Promise { - if (isHumanOnlyApprovalFunction(function_call.function_id)) { - return { - kind: 'deny', - denial: humanOnlyDenial(function_call.function_id, function_call.arguments), - }; - } - - const session_id = extractSessionId(function_call.arguments); - const settings = session_id ? await readSettings(iii, session_id) : null; - - if (settings) { - if (settings.mode === 'full') return { kind: 'allow' }; - // Per-session "approve always" grants apply in every mode — they are - // remembered human decisions, not an auto-policy. - if (isApprovedAlways(settings, function_call.function_id)) { - return { kind: 'allow' }; - } - if (settings.mode === 'auto' && isAlwaysAllowed(settings, function_call.function_id)) { - return { kind: 'allow' }; - } - } - - try { - const reply = await iii.trigger({ - function_id: 'policy::check_permissions', - payload: { - function_id: function_call.function_id, - args: function_call.arguments as CheckPermissionsPayload['args'], - }, - timeoutMs: POLICY_TIMEOUT_MS, - }); - switch (reply.decision) { - case 'allow': - return { kind: 'allow' }; - case 'deny': - return { - kind: 'deny', - denial: permissionsDenyEnvelope( - function_call.function_id, - reply.rule_id, - reply.matched_constraint ?? null, - function_call.arguments, - ), - }; - case 'needs_approval': - return { kind: 'pending' }; - } - } catch (err) { - logger.warn('policy consult failed; failing closed', { - function_id: function_call.function_id, - err: String(err), - }); - return { - kind: 'deny', - denial: gateUnavailableEnvelope( - function_call.function_id, - `policy unreachable: ${String(err)}`, - ), - }; - } -} diff --git a/harness/src/turn-orchestrator/hooks/chain.ts b/harness/src/turn-orchestrator/hooks/chain.ts new file mode 100644 index 00000000..dc7c2c2a --- /dev/null +++ b/harness/src/turn-orchestrator/hooks/chain.ts @@ -0,0 +1,95 @@ +/** + * The pre_dispatch hook chain. Consults every bound hook (in priority + * order) for one agent function call: + * + * - `continue` → next hook (all continue ⇒ allow) + * - `deny` → short-circuit deny with the hook's reason + * - `hold` → short-circuit hold (the call parks awaiting + * `harness::function::resolve`) + * + * Transport failures (throw, timeout, unparseable reply) follow the + * binding's `on_error`: `fail_closed` (default) denies — a crashed gate + * must not wave calls through; `fail_open` logs and skips the hook. + * + * Zero matching bindings ⇒ allow: hooks narrow the dispatch policy, + * never widen it. A deployment without a gate worker is ungated. + */ + +import type { ISdk } from '../../runtime/iii.js'; +import { logger } from '../../runtime/otel.js'; +import { type DenialEnvelope, gateUnavailableEnvelope, hookDenyEnvelope } from './denial.js'; +import { snapshotBindings } from './registry.js'; +import { HookOutputSchema, type HookInput } from './types.js'; + +export type HookOutcome = + | { kind: 'allow' } + | { kind: 'deny'; denial: DenialEnvelope } + | { kind: 'hold'; held_by: string; pending_timeout_ms: number }; + +export async function consultPreDispatch(iii: ISdk, input: HookInput): Promise { + const chain = snapshotBindings(input.call.function_id); + + for (const binding of chain) { + let output: unknown; + try { + output = await iii.trigger({ + function_id: binding.function_id, + payload: input, + timeoutMs: binding.config.timeout_ms, + }); + } catch (err) { + const failure = `hook ${binding.function_id} unavailable: ${String(err)}`; + if (binding.config.on_error === 'fail_open') { + logger.warn('pre_dispatch hook failed; fail_open skips it', { + hook: binding.function_id, + function_id: input.call.function_id, + err: String(err), + }); + continue; + } + logger.warn('pre_dispatch hook failed; failing closed', { + hook: binding.function_id, + function_id: input.call.function_id, + err: String(err), + }); + return { + kind: 'deny', + denial: gateUnavailableEnvelope(input.call.function_id, failure), + }; + } + + const parsed = HookOutputSchema.safeParse(output); + if (!parsed.success) { + const failure = `hook ${binding.function_id} returned an unparseable decision`; + if (binding.config.on_error === 'fail_open') { + logger.warn('pre_dispatch hook reply unparseable; fail_open skips it', { + hook: binding.function_id, + function_id: input.call.function_id, + }); + continue; + } + return { + kind: 'deny', + denial: gateUnavailableEnvelope(input.call.function_id, failure), + }; + } + + switch (parsed.data.decision) { + case 'continue': + continue; + case 'deny': + return { + kind: 'deny', + denial: hookDenyEnvelope(input.call.function_id, parsed.data.reason), + }; + case 'hold': + return { + kind: 'hold', + held_by: binding.function_id, + pending_timeout_ms: parsed.data.pending_timeout_ms, + }; + } + } + + return { kind: 'allow' }; +} diff --git a/harness/src/turn-orchestrator/hooks/denial.ts b/harness/src/turn-orchestrator/hooks/denial.ts new file mode 100644 index 00000000..7d32b6a1 --- /dev/null +++ b/harness/src/turn-orchestrator/hooks/denial.ts @@ -0,0 +1,58 @@ +/** + * Denial envelope shapes shared by the pre_dispatch hook chain and the + * generic trigger failure path. The full approval policy (permission + * modes, allow-lists, yaml rules, redaction) lives in the standalone + * approval-gate worker; the harness only renders denial outcomes into + * function results. + */ + +import type { ContentBlock } from '../../types/content.js'; +import type { FunctionResult } from '../../types/function.js'; + +export const DENIAL_SCHEMA_VERSION = 1; + +export type DeniedBy = 'permissions' | 'user' | 'hook' | 'gate_unavailable'; + +export type DenialEnvelope = { + schema_version: typeof DENIAL_SCHEMA_VERSION; + status: 'denied'; + denied_by: DeniedBy; + function_id: string; + rule_id?: string; + rule_action?: 'deny'; + matched_constraint?: { field: string; operator: string; value: unknown }; + args_excerpt?: unknown; + reason: string; +}; + +export function gateUnavailableEnvelope(function_id: string, reason: string): DenialEnvelope { + return { + schema_version: DENIAL_SCHEMA_VERSION, + status: 'denied', + denied_by: 'gate_unavailable', + function_id, + reason, + }; +} + +/** + * Denial returned by a pre_dispatch hook. The hook wire carries only + * `{ decision: "deny", reason }` — the hook owner renders its full + * envelope into `reason`; harness-side we keep `status: 'denied'` so + * downstream rendering (`[PERMISSION_DENIED]` in types/wire.ts) and + * `isErrorResult` classify it correctly. + */ +export function hookDenyEnvelope(function_id: string, reason: string): DenialEnvelope { + return { + schema_version: DENIAL_SCHEMA_VERSION, + status: 'denied', + denied_by: 'hook', + function_id, + reason, + }; +} + +export function denialResult(denial: DenialEnvelope): FunctionResult { + const text: ContentBlock = { type: 'text', text: denial.reason }; + return { content: [text], details: denial, terminate: false }; +} diff --git a/harness/src/turn-orchestrator/hooks/registry.ts b/harness/src/turn-orchestrator/hooks/registry.ts new file mode 100644 index 00000000..cb2247ad --- /dev/null +++ b/harness/src/turn-orchestrator/hooks/registry.ts @@ -0,0 +1,79 @@ +/** + * Subscriber registry for the `harness::hook::pre-dispatch` trigger + * type. Hook owners bind via the engine's standard trigger registration + * (`iii.registerTrigger({ type: "harness::hook::pre-dispatch", … })`); + * the engine routes each registration here. After a harness restart the + * engine replays existing registrations to the type owner, so the set + * rebuilds itself. + */ + +import type { ISdk, TriggerConfig } from '../../runtime/iii.js'; +import { logger } from '../../runtime/otel.js'; +import { + type BindingConfig, + BindingConfigSchema, + PRE_DISPATCH_TRIGGER_TYPE, + globMatch, +} from './types.js'; + +export type HookBinding = { + id: string; + function_id: string; + config: BindingConfig; +}; + +const bindings = new Map(); + +export function resetPreDispatchBindingsForTests(): void { + bindings.clear(); +} + +export function addPreDispatchBindingForTests(binding: HookBinding): void { + bindings.set(binding.id, binding); +} + +function bindingMatches(binding: HookBinding, function_id: string): boolean { + const globs = binding.config.functions; + if (!globs || globs.length === 0) return true; + return globs.some((pattern) => globMatch(pattern, function_id)); +} + +/** Bindings consulted for one dispatch, in chain order. */ +export function snapshotBindings(function_id: string): HookBinding[] { + return [...bindings.values()] + .filter((binding) => bindingMatches(binding, function_id)) + .sort( + (a, b) => + (a.config.priority ?? 0) - (b.config.priority ?? 0) || + a.function_id.localeCompare(b.function_id), + ); +} + +export function registerPreDispatchTriggerType(iii: ISdk): void { + iii.registerTriggerType( + { + id: PRE_DISPATCH_TRIGGER_TYPE, + description: + 'Synchronous pre-dispatch hook point: bound functions are consulted before every ' + + 'agent function call and answer {decision: "continue" | "deny" | "hold"}. Config: ' + + '{functions?: string[] (globs), priority?: number, timeout_ms?: number, ' + + 'on_error?: "fail_closed" | "fail_open"}.', + }, + { + async registerTrigger(config: TriggerConfig) { + // A throw here rejects the binding at registration time. + const parsed = BindingConfigSchema.parse(config.config ?? {}); + bindings.set(config.id, { id: config.id, function_id: config.function_id, config: parsed }); + logger.info('pre_dispatch hook bound', { + id: config.id, + function_id: config.function_id, + functions: parsed.functions ?? ['*'], + }); + }, + async unregisterTrigger(config: TriggerConfig) { + bindings.delete(config.id); + logger.info('pre_dispatch hook unbound', { id: config.id }); + }, + }, + ); +} diff --git a/harness/src/turn-orchestrator/hooks/types.ts b/harness/src/turn-orchestrator/hooks/types.ts new file mode 100644 index 00000000..d63773b8 --- /dev/null +++ b/harness/src/turn-orchestrator/hooks/types.ts @@ -0,0 +1,75 @@ +/** + * Wire types for the `harness::hook::pre-dispatch` trigger type: the + * hook input/output contract (harness.md § Hooks › Contract) and the + * per-binding config schema. Hook owners (e.g. the approval-gate + * worker's `approval::gate`) bind to the trigger type and answer + * continue / deny / hold synchronously. + */ + +import { z } from 'zod'; + +export const PRE_DISPATCH_TRIGGER_TYPE = 'harness::hook::pre-dispatch'; + +/** Fail-closed default budget for one hook consult. */ +export const DEFAULT_HOOK_TIMEOUT_MS = 5_000; + +export type HookCall = { + id: string; + function_id: string; + arguments: unknown; +}; + +export type HookInput = { + point: 'pre_dispatch'; + session_id: string; + turn_id: string; + step?: number; + /** Sub-agent depth (0 = top-level). The harness has no nesting yet. */ + depth: number; + metadata?: Record; + call: HookCall; +}; + +/** + * Hook reply. Anything that fails this parse is a transport-level + * failure and follows the binding's `on_error` policy (fail closed by + * default) — a confused hook must never read as an allow. + */ +export const HookOutputSchema = z.discriminatedUnion('decision', [ + z.object({ decision: z.literal('continue') }), + z.object({ decision: z.literal('deny'), reason: z.string() }), + z.object({ decision: z.literal('hold'), pending_timeout_ms: z.number().int().nonnegative() }), +]); +export type HookOutput = z.infer; + +/** + * Per-binding config. `.strict()` so a misspelled filter key fails at + * registration, not silently (mirrors the gate's `deny_unknown_fields`). + */ +export const BindingConfigSchema = z + .object({ + /** Globs narrowing which dispatches consult this hook; absent/empty = all. */ + functions: z.array(z.string().min(1)).optional(), + /** Chain position; lower runs first. Default 0, ties by function_id. */ + priority: z.number().int().optional(), + timeout_ms: z.number().int().positive().default(DEFAULT_HOOK_TIMEOUT_MS), + on_error: z.enum(['fail_closed', 'fail_open']).default('fail_closed'), + }) + .strict(); +export type BindingConfig = z.infer; + +/** Turn a glob pattern (`*` = any substring) into an anchored regex. */ +function compileFunctionGlob(pattern: string): RegExp { + let re = '^'; + for (const ch of pattern) { + if (ch === '*') re += '.*'; + else re += ch.replace(/[\\^$.|?+()[\]{}]/g, '\\$&'); + } + re += '$'; + return new RegExp(re); +} + +export function globMatch(pattern: string, function_id: string): boolean { + if (!pattern.includes('*')) return pattern === function_id; + return compileFunctionGlob(pattern).test(function_id); +} diff --git a/harness/src/turn-orchestrator/register.ts b/harness/src/turn-orchestrator/register.ts index d6b096ae..255e246d 100644 --- a/harness/src/turn-orchestrator/register.ts +++ b/harness/src/turn-orchestrator/register.ts @@ -3,18 +3,27 @@ import { register as registerAssistantStreaming } from './assistant-streaming/pr import { register as registerFinishing } from './finishing.js'; import { register as registerFunctionAwaitingApproval } from './function-awaiting-approval/process.js'; import { register as registerFunctionExecute } from './function-execute/process.js'; +import { register as registerFunctionResolve } from './function-resolve.js'; import { register as registerGetState } from './get-state.js'; +import { registerPreDispatchTriggerType } from './hooks/registry.js'; import { register as registerRunAbort } from './run-abort.js'; import { register as registerRunStart } from './run-start.js'; import { register as registerProvisioning } from './provisioning/process.js'; +import { registerTurnCompletedTriggerType } from './turn-completed.js'; export async function register(iii: ISdk, _ctx: { configPath: string }): Promise { + // Trigger types first: sibling workers (the approval-gate) bind to them + // as soon as they appear, and the FSM steps below consult/emit them. + registerPreDispatchTriggerType(iii); + registerTurnCompletedTriggerType(iii); + registerRunStart(iii); registerRunAbort(iii); registerProvisioning(iii); - registerAssistantStreaming(iii); registerFunctionExecute(iii); registerFunctionAwaitingApproval(iii); + registerFunctionResolve(iii); + registerAssistantStreaming(iii); registerFinishing(iii); registerGetState(iii); } diff --git a/harness/src/turn-orchestrator/run-abort.ts b/harness/src/turn-orchestrator/run-abort.ts index b08d1c7e..bfee0c87 100644 --- a/harness/src/turn-orchestrator/run-abort.ts +++ b/harness/src/turn-orchestrator/run-abort.ts @@ -19,6 +19,7 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import { emit } from './events.js'; +import { deleteResolution } from './function-resolve.js'; import { RunAbortPayloadSchema, type RunAbortPayload, type RunAbortResult } from './schemas.js'; import { createTurnStatePorts } from './state-runtime/ports.js'; import { createTurnStore } from './state-runtime/store.js'; @@ -60,6 +61,11 @@ export async function execute(iii: ISdk, payload: RunAbortPayload): Promise {}); + } rec.awaiting_approval = []; (rec as TurnStateRecord).work = undefined; diff --git a/harness/src/turn-orchestrator/run-start.ts b/harness/src/turn-orchestrator/run-start.ts index aa866b84..f22fab52 100644 --- a/harness/src/turn-orchestrator/run-start.ts +++ b/harness/src/turn-orchestrator/run-start.ts @@ -16,6 +16,7 @@ import { sessionSetStatus, userEntryId } from '../runtime/session.js'; import { RunStartPayloadSchema, type RunStartPayload, type RunStartResult } from './schemas.js'; import { createTurnStore } from './state-runtime/store.js'; import { isTurnInFlight, newRecord } from './state.js'; +import { emitTurnCompleted } from './turn-completed.js'; /** * Idle window after which an in-flight (non-parked) record is treated as @@ -48,6 +49,16 @@ export async function execute(iii: ISdk, payload: RunStartPayload): Promise + z + .string() + .min(1) + .refine((v) => !v.includes('/'), { message: `${label} must not contain "/"` }); -export const ApprovalDecisionEventSchema = ApprovalDecisionWriteEventSchema.transform((data) => { - const session_id = data.key.slice(0, data.key.indexOf('/')); - return { session_id }; -}); +const ContentBlockSchema = z + .object({ type: z.string().min(1) }) + .passthrough() + .array(); + +/** + * `harness::function::resolve` payload — a sibling worker (the + * approval-gate) settles one held call: `execute` releases it through the + * normal dispatch pipeline; `deliver` answers it with the given content + * without executing (user deny, sweep timeout). + */ +export const FunctionResolvePayloadSchema = z + .object({ + session_id: slashFreeId('session_id'), + turn_id: z.string().min(1), + function_call_id: slashFreeId('function_call_id'), + action: z.enum(['execute', 'deliver']), + is_error: z.boolean().optional(), + content: ContentBlockSchema.optional(), + details: z.unknown().optional(), + }) + .superRefine((v, ctx) => { + if (v.action === 'deliver' && !v.content) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: ['content'], + message: 'content is required when action is "deliver"', + }); + } + }); +export type FunctionResolvePayload = z.infer; + +/** `resolved` is false for unknown/already-settled calls — never an error. */ +export type FunctionResolveResult = { + resolved: boolean; + turn_resumed?: boolean; +}; diff --git a/harness/src/turn-orchestrator/state-runtime/session-lease.ts b/harness/src/turn-orchestrator/state-runtime/session-lease.ts index 2ae05a65..66f81f1b 100644 --- a/harness/src/turn-orchestrator/state-runtime/session-lease.ts +++ b/harness/src/turn-orchestrator/state-runtime/session-lease.ts @@ -3,8 +3,8 @@ * (scope, ttl) adapter over the shared lease in runtime/lease.ts. * * The `turn-step` durable queue has no per-session ordering, so two - * `turn::function_awaiting_approval` wakes (one per `approval::resolve`, fanned - * out by `turn::on_approval`) can run concurrently and double-execute the parked + * `turn::function_awaiting_approval` wakes (one per `harness::function::resolve`) + * can run concurrently and double-execute the parked * turn. The lease lets one through; the loser throws TransientError and retries * via the queue, by which point the state has usually advanced and it stale-skips. */ diff --git a/harness/src/turn-orchestrator/state-runtime/store.ts b/harness/src/turn-orchestrator/state-runtime/store.ts index 1069ab90..1ed943ba 100644 --- a/harness/src/turn-orchestrator/state-runtime/store.ts +++ b/harness/src/turn-orchestrator/state-runtime/store.ts @@ -17,6 +17,7 @@ import { emit } from '../events.js'; import { type RunRequest, defaultRunRequest } from '../run-request.js'; import { toView, type TurnStateView } from '../schemas.js'; import { type TurnState, type TurnStateRecord } from '../state.js'; +import { emitTurnCompleted, terminalStatus } from '../turn-completed.js'; import { loadContextView } from './context-view.js'; /** @@ -28,9 +29,18 @@ import { loadContextView } from './context-view.js'; */ export const TURN_STEP_QUEUE = 'default'; -const NON_STEPABLE_STATES = new Set(['stopped', 'failed', 'function_awaiting_approval']); +const NON_STEPABLE_STATES = new Set(['stopped', 'failed']); -/** True when a persisted turn_state transition should enqueue `turn::{newState}`. */ +/** + * True when a persisted turn_state transition should enqueue `turn::{newState}`. + * + * Entering `function_awaiting_approval` enqueues exactly one scan wake: + * it drains any `harness::function::resolve` decision that landed while + * the batch was still in function_execute (that resolve's own wake + * stale-skipped against the not-yet-parked record). Subsequent wakes for + * the parked state come from function::resolve directly; mid-park + * checkpoints go through writeRecord and never wake. + */ export function shouldWakeStep(previousState: TurnState | null, newState: TurnState): boolean { if (NON_STEPABLE_STATES.has(newState)) return false; if (previousState !== null && previousState === newState) return false; @@ -136,6 +146,38 @@ async function emitTurnStateChanged( } } +const TERMINAL_STATES = new Set(['stopped', 'failed']); + +/** + * Emit `harness::turn-completed` once per terminal transition: only the + * save that moved the record from a non-terminal state into stopped/failed + * fires (terminal→terminal rewrites stay silent). At-least-once — a + * crash between persist and emit loses the event, which consumers + * (the approval-gate sweep) tolerate by design. + */ +async function emitTurnCompletedOnTerminal( + iii: ISdk, + rec: TurnStateRecord, + prev: TurnStateRecord | null, +): Promise { + if (!TERMINAL_STATES.has(rec.state)) return; + if (prev != null && TERMINAL_STATES.has(prev.state)) return; + const status = terminalStatus(rec); + try { + await emitTurnCompleted(iii, { + session_id: rec.session_id, + turn_id: rec.turn_id, + status, + ...(status === 'failed' && rec.error ? { reason: rec.error.message } : {}), + timestamp: Date.now(), + }); + } catch (err) { + // Fan-out must never abort the save path (a throw here would skip the + // step enqueue in saveRecord). Consumers tolerate lost events by design. + logger.warn('turn_completed emission failed', { session_id: rec.session_id, err: String(err) }); + } +} + async function persistRecord( iii: ISdk, rec: TurnStateRecord, @@ -159,6 +201,8 @@ async function persistRecord( ); } + await emitTurnCompletedOnTerminal(iii, rec, prev); + return prev; } diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index ef69ab30..aa37e8a8 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -7,6 +7,7 @@ */ import { z } from 'zod'; +import { uuidLike } from '../runtime/ids.js'; import type { Model } from '../types/model.js'; import type { AssistantMessage, FunctionResultMessage } from '../types/agent-message.js'; import type { ExecutedCall, FunctionBatchWork, PreparedCall } from './function-execute/types.js'; @@ -40,6 +41,14 @@ export type { ExecutedCall, FunctionBatchWork, PreparedCall }; type TurnStateRecordCore = { session_id: string; + /** + * Unique id for this run, generated at `newRecord`. Threaded through the + * pre_dispatch hook contract, `harness::function::resolve`, and + * `harness::turn-completed` so sibling workers (the approval-gate) can key + * their records to one turn. Never contains `/` (reserved state-key + * separator on the sibling side). + */ + turn_id: string; turn_count: number; max_turns?: number; /** @@ -118,7 +127,14 @@ const TurnStateRecordSchema = z export function parseTurnStateRecord(raw: unknown): TurnStateRecord | null { const result = TurnStateRecordSchema.safeParse(raw); - return result.success ? (result.data as TurnStateRecord) : null; + if (!result.success) return null; + const rec = result.data as TurnStateRecord; + // Records persisted before turn_id existed: backfill deterministically + // (stable across re-parses) so in-flight turns survive the deploy. + if (typeof rec.turn_id !== 'string' || rec.turn_id.length === 0) { + rec.turn_id = `legacy-${rec.session_id}`; + } + return rec; } export function newRecord( @@ -129,6 +145,7 @@ export function newRecord( const now = Date.now(); return { session_id, + turn_id: `t_${uuidLike()}`, state: 'provisioning', turn_count: 0, max_turns, diff --git a/harness/src/turn-orchestrator/turn-completed.ts b/harness/src/turn-orchestrator/turn-completed.ts new file mode 100644 index 00000000..8b4b4fd9 --- /dev/null +++ b/harness/src/turn-orchestrator/turn-completed.ts @@ -0,0 +1,106 @@ +/** + * The `harness::turn-completed` trigger type — fired when a turn goes + * terminal (completed / cancelled / failed). Sibling workers bind to it + * for cleanup; the approval-gate purges a terminal turn's pending + * records here instead of polling. + * + * Delivery is fire-and-forget, at-least-once, unordered: consumers must + * treat a duplicate as a no-op (the gate's purge is idempotent), and a + * lost event is collected by the consumer's own sweep. + */ + +import { z } from 'zod'; +import { TriggerAction, type ISdk, type TriggerConfig } from '../runtime/iii.js'; +import { logger } from '../runtime/otel.js'; +import type { TurnStateRecord } from './state.js'; + +export const TURN_COMPLETED_TRIGGER_TYPE = 'harness::turn-completed'; + +export type TurnCompletedStatus = 'completed' | 'cancelled' | 'failed'; + +export type TurnCompletedEvent = { + session_id: string; + turn_id: string; + status: TurnCompletedStatus; + reason?: string; + timestamp: number; +}; + +const TurnCompletedBindingConfigSchema = z + .object({ + /** Only deliver events for this session. */ + session_id: z.string().min(1).optional(), + }) + .strict(); +type TurnCompletedBindingConfig = z.infer; + +type Binding = { + id: string; + function_id: string; + config: TurnCompletedBindingConfig; +}; + +const bindings = new Map(); + +export function resetTurnCompletedBindingsForTests(): void { + bindings.clear(); +} + +export function registerTurnCompletedTriggerType(iii: ISdk): void { + iii.registerTriggerType( + { + id: TURN_COMPLETED_TRIGGER_TYPE, + description: + 'A turn went terminal (status: completed | cancelled | failed). Payload: ' + + '{session_id, turn_id, status, reason?, timestamp}. Config: {session_id?: string} ' + + '(equality filter). Delivery is at-least-once and unordered.', + }, + { + async registerTrigger(config: TriggerConfig) { + const parsed = TurnCompletedBindingConfigSchema.parse(config.config ?? {}); + bindings.set(config.id, { id: config.id, function_id: config.function_id, config: parsed }); + logger.info('turn_completed trigger bound', { + id: config.id, + function_id: config.function_id, + }); + }, + async unregisterTrigger(config: TriggerConfig) { + bindings.delete(config.id); + logger.info('turn_completed trigger unbound', { id: config.id }); + }, + }, + ); +} + +/** + * Derive the terminal status from the record. `stopped` covers both the + * natural end and the user abort — the abort path surfaces through the + * synthetic assistant's stop_reason. + */ +export function terminalStatus(rec: TurnStateRecord): TurnCompletedStatus { + if (rec.state === 'failed') return 'failed'; + const stopReason = rec.last_assistant?.stop_reason; + if (stopReason === 'aborted') return 'cancelled'; + if (stopReason === 'error') return 'failed'; + return 'completed'; +} + +/** Fire-and-forget fan-out; failures are logged and swallowed. */ +export async function emitTurnCompleted(iii: ISdk, event: TurnCompletedEvent): Promise { + for (const binding of bindings.values()) { + if (binding.config.session_id && binding.config.session_id !== event.session_id) continue; + try { + await iii.trigger({ + function_id: binding.function_id, + payload: event, + action: TriggerAction.Void(), + }); + } catch (err) { + logger.warn('turn_completed fan-out failed', { + function_id: binding.function_id, + session_id: event.session_id, + err: String(err), + }); + } + } +} diff --git a/harness/tests/approval-gate/_helpers/fakeIii.ts b/harness/tests/approval-gate/_helpers/fakeIii.ts deleted file mode 100644 index b5e9f7ff..00000000 --- a/harness/tests/approval-gate/_helpers/fakeIii.ts +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Shared minimal `ISdk` fake for approval-gate tests. Captures every - * `iii.trigger` call by `function_id` so individual tests can assert on - * the payload without re-implementing the mock surface each time. - */ - -import type { ISdk } from 'iii-sdk'; -import { vi } from 'vitest'; - -export type TriggerCall = { function_id: string; payload: unknown; action?: unknown }; - -export type FakeIii = { - iii: ISdk; - calls: TriggerCall[]; - streamSets: unknown[]; -}; - -export function fakeIii(): FakeIii { - const calls: TriggerCall[] = []; - const streamSets: unknown[] = []; - - const iii = { - trigger: vi.fn( - async ({ - function_id, - payload, - action, - }: { - function_id: string; - payload: unknown; - action?: unknown; - }) => { - calls.push({ function_id, payload, action }); - if (function_id === 'stream::set') { - streamSets.push(payload); - } - return null; - }, - ), - } as unknown as ISdk; - - return { iii, calls, streamSets }; -} diff --git a/harness/tests/approval-gate/denial.test.ts b/harness/tests/approval-gate/denial.test.ts deleted file mode 100644 index 26059382..00000000 --- a/harness/tests/approval-gate/denial.test.ts +++ /dev/null @@ -1,99 +0,0 @@ -/** - * Adversarial tests for denial-envelope assembly. The envelopes are handed - * to the model and surfaced to the operator, so the things under attack are: - * - secrets in args must be redacted in BOTH wire envelopes; - * - oversized args must be clipped before they reach the envelope; - * - the user-facing envelope must not leak permissions-internal fields; - * - the deny reason must embed an arbitrary matched value without breaking. - */ -import { describe, expect, it } from 'vitest'; -import { - permissionsDenyEnvelope, - reasonForPermissionsDeny, - userDenyEnvelope, -} from '../../src/approval-gate/denial.js'; - -describe('args redaction reaches the envelope', () => { - it('redacts secrets in args for the permissions envelope', () => { - const env = permissionsDenyEnvelope('shell::exec', 'r1', null, { - url: 'https://x', - authorization: 'Bearer leak', - nested: { api_key: 'leak2', keep: 'ok' }, - }); - expect(env.args_excerpt).toEqual({ - url: 'https://x', - authorization: '', - nested: { api_key: '', keep: 'ok' }, - }); - }); - - it('redacts secrets in args for the user envelope', () => { - const env = userDenyEnvelope('shell::exec', null, { password: 'hunter2', keep: 1 }); - expect(env.args_excerpt).toEqual({ password: '', keep: 1 }); - }); - - it('clips an oversized arg value inside the envelope', () => { - const env = permissionsDenyEnvelope('shell::exec', 'r1', null, { blob: 'x'.repeat(500) }); - const excerpt = env.args_excerpt as { blob: string }; - expect([...excerpt.blob].length).toBe(257); - expect(excerpt.blob.endsWith('…')).toBe(true); - }); -}); - -describe('permissionsDenyEnvelope', () => { - it('carries rule_id, rule_action and matched_constraint', () => { - const env = permissionsDenyEnvelope( - 'shell::exec', - 'git/no-force-push', - { field: 'command', operator: 'matches', value: '^git push --force' }, - { command: 'git push --force origin main' }, - ); - expect(env.denied_by).toBe('permissions'); - expect(env.rule_action).toBe('deny'); - expect(env.matched_constraint?.field).toBe('command'); - expect(env.reason).toContain('git/no-force-push'); - }); - - it('omits matched_constraint and uses the no-constraint phrasing when none matched', () => { - const env = permissionsDenyEnvelope('shell::exec', 'blanket-deny', null, {}); - expect(env.matched_constraint).toBeUndefined(); - expect(env.reason).toContain('blocked by policy'); - }); -}); - -describe('userDenyEnvelope — must not leak permissions internals', () => { - it('falls back to the default reason and carries no policy fields', () => { - const env = userDenyEnvelope('shell::exec', null, {}); - expect(env.denied_by).toBe('user'); - expect(env.reason).toBe('Rejected by operator.'); - expect(env.rule_id).toBeUndefined(); - expect(env.rule_action).toBeUndefined(); - expect(env.matched_constraint).toBeUndefined(); - }); - - it('preserves an explicit operator reason verbatim', () => { - const env = userDenyEnvelope('shell::exec', 'looks like data exfiltration', {}); - expect(env.reason).toBe('looks like data exfiltration'); - }); -}); - -describe('reasonForPermissionsDeny — embeds an arbitrary matched value safely', () => { - it('JSON-encodes a value containing quotes and shell metacharacters', () => { - const reason = reasonForPermissionsDeny('shell::exec', 'r1', { - field: 'command', - operator: 'eq', - value: '"; rm -rf / #', - }); - expect(reason).toContain(JSON.stringify('"; rm -rf / #')); - }); - - it('encodes a structured (object) matched value without throwing', () => { - expect(() => - reasonForPermissionsDeny('shell::exec', 'r1', { - field: 'env', - operator: 'contains', - value: { nested: [1, 2, 3] }, - }), - ).not.toThrow(); - }); -}); diff --git a/harness/tests/approval-gate/redact.test.ts b/harness/tests/approval-gate/redact.test.ts deleted file mode 100644 index 83e28538..00000000 --- a/harness/tests/approval-gate/redact.test.ts +++ /dev/null @@ -1,154 +0,0 @@ -/** - * Adversarial tests for the secret-redaction tree. This is the worker's - * last line of defence against leaking credentials into denial envelopes, - * so every test here is an attempt to (a) sneak a secret past the filter, - * (b) crash the redactor with hostile input, or (c) corrupt the process. - */ -import { describe, expect, it } from 'vitest'; -import { clip, redact } from '../../src/approval-gate/redact.js'; - -describe('clip — code-point cap (256)', () => { - it('leaves a string exactly at the cap untouched (no false ellipsis)', () => { - const at = 'a'.repeat(256); - expect(clip(at)).toBe(at); - }); - - it('clips one past the cap to 256 code points + ellipsis', () => { - const out = clip('a'.repeat(257)); - expect([...out].length).toBe(257); // 256 kept + '…' - expect(out.endsWith('…')).toBe(true); - }); - - it('does NOT clip an astral string whose code-point length is within the cap', () => { - // 200 emoji = 200 code points (under cap) but 400 UTF-16 units (over it). - // A UTF-16-length guard would falsely truncate + ellipsize this. - const s = '🎉'.repeat(200); - expect(clip(s)).toBe(s); - }); - - it('clips an astral string over the cap without splitting surrogate pairs', () => { - const out = clip('🎉'.repeat(300)); - expect([...out].length).toBe(257); - expect(out.endsWith('…')).toBe(true); - // Every retained unit is a whole emoji — no lone surrogate (�/half pair). - expect([...out].slice(0, 256).every((c) => c === '🎉')).toBe(true); - }); -}); - -describe('redact — secret key matching', () => { - it('redacts exact and suffix-matched secret keys, case-insensitively', () => { - const out = redact({ - Password: 'hunter2', - db_password: 's3cret', - api_key: 'k', - TOKEN: 't', - }) as Record; - expect(out.Password).toBe(''); - expect(out.db_password).toBe(''); - expect(out.api_key).toBe(''); - expect(out.TOKEN).toBe(''); - }); - - it('does not over-match keys that merely contain a secret substring', () => { - const out = redact({ - tokenizer: 'tiktoken', - authority: 'system', - secretary: 'alice', - passwordless: true, - }) as Record; - expect(out.tokenizer).toBe('tiktoken'); - expect(out.authority).toBe('system'); - expect(out.secretary).toBe('alice'); - expect(out.passwordless).toBe(true); - }); - - it('redacts a secret-keyed value wholesale, even when it is a nested object', () => { - // Attack: hide a secret one level under a secret key, hoping only the - // top string gets blanked. The whole subtree must collapse to . - const out = redact({ - authorization: { scheme: 'Bearer', value: 'xyz' }, - }) as Record; - expect(out.authorization).toBe(''); - }); - - it('redacts secrets buried deep in nested objects and arrays', () => { - const out = redact({ - config: { credentials: { access_token: 'k', region: 'us' } }, - hosts: [{ refresh_token: 't', name: 'h1' }], - }) as Record; - const creds = (out.config as Record>).credentials; - expect(creds.access_token).toBe(''); - expect(creds.region).toBe('us'); - const hosts = out.hosts as Array>; - expect(hosts[0].refresh_token).toBe(''); - expect(hosts[0].name).toBe('h1'); - }); -}); - -describe('redact — known boundaries (documented gaps, not bugs)', () => { - it('is key-based only: a secret hidden in a non-secret value is NOT scrubbed', () => { - // Documented limitation — redaction keys off field names, not contents. - const out = redact({ note: 'the password is hunter2' }) as Record; - expect(out.note).toBe('the password is hunter2'); - }); - - it('drops symbol-keyed entries (Object.entries ignores them)', () => { - const out = redact({ [Symbol('password')]: 'leak', keep: 1 }) as Record; - expect(out).toEqual({ keep: 1 }); - }); -}); - -describe('redact — hostile structure must not crash the worker', () => { - it('survives pathologically deep nesting without a stack overflow', () => { - let node: Record = {}; - const root = node; - for (let i = 0; i < 200_000; i++) { - const next: Record = {}; - node.n = next; - node = next; - } - expect(() => redact(root)).not.toThrow(); - }); - - it('survives a circular reference without infinite recursion', () => { - const a: Record = { password: 'p' }; - a.self = a; - let out: Record = {}; - expect(() => { - out = redact(a) as Record; - }).not.toThrow(); - expect(out.password).toBe(''); - }); -}); - -describe('redact — process integrity', () => { - it('does not pollute Object.prototype via a __proto__ key', () => { - // JSON.parse produces an own enumerable "__proto__" data property — the - // classic prototype-pollution vector. redact must keep it a plain key. - const payload = JSON.parse('{"__proto__": {"password": "x"}, "a": 1}'); - const out = redact(payload) as Record; - expect(({} as Record).password).toBeUndefined(); - expect(Object.keys(out)).toContain('__proto__'); - }); - - it('never mutates its input (frozen tree round-trips intact)', () => { - const input = { password: 'hunter2', nested: { token: 't', list: ['a', 'b'] } }; - const snapshot = JSON.parse(JSON.stringify(input)); - Object.freeze(input); - Object.freeze(input.nested); - Object.freeze(input.nested.list); - - const out = redact(input) as Record; - expect(out.password).toBe(''); - expect(input).toEqual(snapshot); - expect(out).not.toBe(input); - expect(out.nested).not.toBe(input.nested); - }); - - it('passes primitives through unchanged', () => { - expect(redact(null)).toBeNull(); - expect(redact(undefined)).toBeUndefined(); - expect(redact(42)).toBe(42); - expect(redact(true)).toBe(true); - }); -}); diff --git a/harness/tests/approval-gate/resolve.test.ts b/harness/tests/approval-gate/resolve.test.ts deleted file mode 100644 index ae74e3ca..00000000 --- a/harness/tests/approval-gate/resolve.test.ts +++ /dev/null @@ -1,106 +0,0 @@ -/** - * Adversarial tests for the approval::resolve handler. The handler is a - * trust boundary: a hostile or malformed resolve payload must never crash - * the worker, never silently fire a resume, and never resolve to the wrong - * per-call resume function. Its only sanctioned outputs are a clean - * `{ ok: true }` or a typed `{ ok: false, error }`. - */ -import { describe, expect, it, vi } from 'vitest'; -import { handleResolveRequest } from '../../src/approval-gate/resolve.js'; -import { fakeIii } from './_helpers/fakeIii.js'; - -describe('handleResolveRequest — writing the decision', () => { - it('writes the decision to approvals// with a normalized payload', async () => { - const { iii, calls } = fakeIii(); - const out = await handleResolveRequest(iii, { - session_id: 's1', - function_call_id: 'fc-1', - decision: 'deny', - reason: 'user cancelled', - }); - expect(out).toEqual({ ok: true }); - expect(calls).toEqual([ - { - function_id: 'state::set', - payload: { - scope: 'approvals', - key: 's1/fc-1', - value: { decision: 'deny', reason: 'user cancelled' }, - }, - }, - ]); - }); - - it('never emits to the agent::events stream (denial flows via execution_end)', async () => { - const { iii, streamSets } = fakeIii(); - await handleResolveRequest(iii, { - session_id: 's1', - function_call_id: 'fc-1', - decision: 'deny', - reason: 'nope', - }); - expect(streamSets).toHaveLength(0); - }); -}); - -describe('handleResolveRequest — hostile / malformed input is rejected, not crashed', () => { - it('returns invalid_payload (does not throw) when session_id contains a slash', async () => { - // "/" is the reserved separator in the state key. A slashed id must be - // refused at the boundary — the handler promise must resolve to a typed - // error, never reject out of the worker. - const { iii, calls } = fakeIii(); - await expect( - handleResolveRequest(iii, { - session_id: 'a/b', - function_call_id: 'fc', - decision: 'allow', - }), - ).resolves.toEqual({ ok: false, error: 'invalid_payload' }); - expect(calls).toHaveLength(0); - }); - - it('returns invalid_payload and fires nothing when function_call_id is missing', async () => { - const { iii, calls } = fakeIii(); - const out = await handleResolveRequest(iii, { - session_id: 's1', - decision: 'allow', - } as never); - expect(out).toEqual({ ok: false, error: 'invalid_payload' }); - expect(calls).toHaveLength(0); - }); - - it('returns invalid_payload when function_call_id contains a slash', async () => { - const { iii, calls } = fakeIii(); - const out = await handleResolveRequest(iii, { - session_id: 's1', - function_call_id: 'fc/evil', - decision: 'allow', - }); - expect(out).toEqual({ ok: false, error: 'invalid_payload' }); - expect(calls).toHaveLength(0); - }); - - it('returns invalid_payload for an out-of-enum decision', async () => { - const { iii, calls } = fakeIii(); - const out = await handleResolveRequest(iii, { - session_id: 's1', - function_call_id: 'fc-1', - decision: 'maybe', - } as never); - expect(out).toEqual({ ok: false, error: 'invalid_payload' }); - expect(calls).toHaveLength(0); - }); -}); - -describe('handleResolveRequest — downstream failure is surfaced as resume_failed', () => { - it('returns resume_failed when the state::set write rejects', async () => { - const { iii } = fakeIii(); - (iii.trigger as ReturnType).mockRejectedValue(new Error('boom')); - const out = await handleResolveRequest(iii, { - session_id: 's1', - function_call_id: 'fc-1', - decision: 'allow', - }); - expect(out).toEqual({ ok: false, error: 'resume_failed' }); - }); -}); diff --git a/harness/tests/approval-gate/schemas.test.ts b/harness/tests/approval-gate/schemas.test.ts deleted file mode 100644 index 3c21d667..00000000 --- a/harness/tests/approval-gate/schemas.test.ts +++ /dev/null @@ -1,136 +0,0 @@ -/** - * Adversarial tests for the wire schemas. Two properties matter most: - * 1. Ingress validation refuses ids that would corrupt the state key. - * 2. parsePolicyReply is fail-CLOSED — anything it can't positively - * decode as a clean allow/deny becomes needs_approval (a human gate). - * A corrupted or attacker-shaped reply must never decode as `allow`. - */ -import { describe, expect, it } from 'vitest'; -import { - ApprovalDecisionSchema, - ApprovalResumePayloadSchema, - ResolvePayloadSchema, - parsePolicyReply, - pendingKey, - resolveFunctionOptions, -} from '../../src/approval-gate/schemas.js'; - -describe('ResolvePayloadSchema — id normalization & validation', () => { - it('coerces an omitted reason to null', () => { - const parsed = ResolvePayloadSchema.parse({ - session_id: 's', - function_call_id: 'fc-1', - decision: 'deny', - }); - expect(parsed.reason).toBeNull(); - expect(parsed.function_call_id).toBe('fc-1'); - }); - - it('accepts the aborted decision on the wire (user cancel of a parked call)', () => { - const parsed = ResolvePayloadSchema.parse({ - session_id: 's', - function_call_id: 'fc-1', - decision: 'aborted', - }); - expect(parsed.decision).toBe('aborted'); - }); - - it.each([ - ['function_call_id missing', { session_id: 's', decision: 'allow' }], - ['tool_call_id only (legacy)', { session_id: 's', tool_call_id: 'legacy', decision: 'allow' }], - ['empty function_call_id', { session_id: 's', function_call_id: '', decision: 'allow' }], - ['empty session_id', { session_id: '', function_call_id: 'fc', decision: 'allow' }], - ['slash in session_id', { session_id: 'a/b', function_call_id: 'fc', decision: 'allow' }], - ['slash in function_call_id', { session_id: 's', function_call_id: 'a/b', decision: 'allow' }], - ['non-enum decision', { session_id: 's', function_call_id: 'fc', decision: 'maybe' }], - ['numeric reason', { session_id: 's', function_call_id: 'fc', decision: 'allow', reason: 7 }], - ])('rejects %s', (_label, payload) => { - expect(ResolvePayloadSchema.safeParse(payload).success).toBe(false); - }); -}); - -describe('parsePolicyReply — fail closed', () => { - it('decodes a well-formed allow and deny', () => { - expect(parsePolicyReply({ decision: 'allow', rule_id: 'r1' })).toEqual({ - decision: 'allow', - rule_id: 'r1', - }); - const deny = parsePolicyReply({ - decision: 'deny', - rule_id: 'r2', - matched_constraint: { field: 'cmd', operator: 'eq', value: 'x' }, - }); - expect(deny.decision).toBe('deny'); - }); - - it('defaults a deny with no matched_constraint to null rather than failing', () => { - expect(parsePolicyReply({ decision: 'deny' })).toEqual({ - decision: 'deny', - rule_id: '', - matched_constraint: null, - }); - }); - - it('strips unknown fields — extra keys cannot ride along', () => { - expect(parsePolicyReply({ decision: 'allow', rule_id: 'r', injected: 'x' })).toEqual({ - decision: 'allow', - rule_id: 'r', - }); - }); - - it.each([ - ['empty object', {}], - ['null', null], - ['array', []], - ['string', 'allow'], - ['unknown decision', { decision: 'maybe' }], - ['allow with wrong-typed rule_id', { decision: 'allow', rule_id: 123 }], - [ - 'deny with malformed matched_constraint', - { decision: 'deny', matched_constraint: { field: 1 } }, - ], - ])('falls through to needs_approval for %s', (_label, input) => { - expect(parsePolicyReply(input)).toEqual({ decision: 'needs_approval' }); - }); -}); - -describe('state-key derivation — separator integrity', () => { - it('derives /', () => { - expect(pendingKey('sess-1', 'fc-1')).toBe('sess-1/fc-1'); - }); - - it.each([ - ['session', 'a/b', 'c'], - ['function_call', 'a', 'b/c'], - ])('throws if the %s id smuggles a slash', (_which, session, fcall) => { - expect(() => pendingKey(session, fcall)).toThrow(); - }); -}); - -describe('ApprovalDecisionSchema', () => { - it('accepts the three terminal decisions with an explicit reason', () => { - for (const decision of ['allow', 'deny', 'aborted'] as const) { - expect(ApprovalDecisionSchema.parse({ decision, reason: null })).toEqual({ - decision, - reason: null, - }); - } - }); - - it('rejects a missing reason and an unknown decision', () => { - expect(ApprovalDecisionSchema.safeParse({ decision: 'allow' }).success).toBe(false); - expect(ApprovalDecisionSchema.safeParse({ decision: 'paused', reason: null }).success).toBe( - false, - ); - }); - - it('keeps ApprovalResumePayloadSchema as a deprecated alias', () => { - expect(ApprovalResumePayloadSchema).toBe(ApprovalDecisionSchema); - }); -}); - -describe('resolveFunctionOptions', () => { - it('carries a JSON-serializable request_format for iii registration', () => { - expect(() => JSON.stringify(resolveFunctionOptions.request_format)).not.toThrow(); - }); -}); diff --git a/harness/tests/approval-gate/settings.test.ts b/harness/tests/approval-gate/settings.test.ts deleted file mode 100644 index 512c12ba..00000000 --- a/harness/tests/approval-gate/settings.test.ts +++ /dev/null @@ -1,199 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import { isHumanOnlyApprovalFunction } from '../../src/approval-gate/settings/human-only.js'; -import { addAlwaysAllow } from '../../src/approval-gate/settings/add-always-allow.js'; -import { approveAlways } from '../../src/approval-gate/settings/approve-always.js'; -import { removeAlwaysAllow } from '../../src/approval-gate/settings/remove-always-allow.js'; -import { setMode } from '../../src/approval-gate/settings/set-mode.js'; -import { clearSettings, readSettings } from '../../src/approval-gate/settings/store.js'; -import { SETTINGS_STATE_SCOPE } from '../../src/approval-gate/schemas.js'; - -interface TriggerCall { - function_id: string; - payload: { - scope: string; - key: string; - value?: unknown; - ops?: Array<{ type: string; path?: string; value?: unknown }>; - }; -} - -function makeIii(initial: unknown = null) { - const store = new Map(); - if (initial !== null) store.set('sess-1', initial); - const calls: TriggerCall[] = []; - const trigger = vi.fn(async (req: TriggerCall) => { - calls.push(req); - if (req.function_id === 'state::get') { - return store.get(req.payload.key) ?? null; - } - if (req.function_id === 'state::set') { - if (req.payload.value === null) store.delete(req.payload.key); - else store.set(req.payload.key, req.payload.value); - return null; - } - if (req.function_id === 'state::delete') { - const old_value = store.get(req.payload.key) ?? null; - store.delete(req.payload.key); - return { old_value }; - } - // Atomic field-scoped update (synchronous read-modify-write) modelling the - // `set` and `append` ops the settings mutations use. - if (req.function_id === 'state::update') { - const old_value = store.get(req.payload.key) ?? null; - const rec: Record = - old_value && typeof old_value === 'object' && !Array.isArray(old_value) - ? { ...(old_value as Record) } - : {}; - for (const op of req.payload.ops ?? []) { - const path = op.path ?? ''; - if (op.type === 'set') rec[path] = op.value; - else if (op.type === 'append') { - const arr = Array.isArray(rec[path]) ? (rec[path] as unknown[]) : []; - rec[path] = [...arr, op.value]; - } - } - store.set(req.payload.key, rec); - return { old_value, new_value: rec }; - } - throw new Error(`unexpected trigger ${req.function_id}`); - }); - return { iii: { trigger } as unknown as ISdk, calls, store }; -} - -describe('approval-gate settings', () => { - it('readSettings returns defaults when nothing persisted', async () => { - const { iii } = makeIii(); - const s = await readSettings(iii, 'sess-1'); - expect(s.mode).toBe('manual'); - expect(s.always_allow).toEqual([]); - expect(s.mode_set_at).toBe(0); - }); - - it('clearSettings deletes the key (no null tombstone) and reverts to defaults', async () => { - const { iii, store, calls } = makeIii(); - await setMode(iii, 'sess-1', 'auto'); - expect(store.has('sess-1')).toBe(true); - - await clearSettings(iii, 'sess-1'); - - expect(store.has('sess-1')).toBe(false); - expect(calls.some((c) => c.function_id === 'state::delete')).toBe(true); - const s = await readSettings(iii, 'sess-1'); - expect(s.mode).toBe('manual'); - }); - - it('setMode persists with mode_set_at > 0', async () => { - const { iii, store } = makeIii(); - const result = await setMode(iii, 'sess-1', 'auto'); - expect(result.mode).toBe('auto'); - expect(result.mode_set_at).toBeGreaterThan(0); - expect(store.get('sess-1')).toMatchObject({ mode: 'auto' }); - }); - - it('addAlwaysAllow is idempotent on function_id', async () => { - const { iii } = makeIii(); - const once = await addAlwaysAllow(iii, 'sess-1', 'shell::fs::ls'); - expect(once.always_allow).toHaveLength(1); - const twice = await addAlwaysAllow(iii, 'sess-1', 'shell::fs::ls'); - expect(twice.always_allow).toHaveLength(1); - expect(twice.always_allow[0].granted_by).toBe('user_click'); - }); - - it('removeAlwaysAllow strips matching entries', async () => { - const { iii } = makeIii(); - await addAlwaysAllow(iii, 'sess-1', 'shell::fs::ls'); - await addAlwaysAllow(iii, 'sess-1', 'fs::read'); - const next = await removeAlwaysAllow(iii, 'sess-1', 'shell::fs::ls'); - expect(next.always_allow.map((e) => e.function_id)).toEqual(['fs::read']); - }); - - it('approveAlways appends to approved_always (idempotent), separate from always_allow', async () => { - const { iii } = makeIii(); - const once = await approveAlways(iii, 'sess-1', 'shell::exec'); - expect(once.approved_always.map((e) => e.function_id)).toEqual(['shell::exec']); - expect(once.always_allow).toEqual([]); - const twice = await approveAlways(iii, 'sess-1', 'shell::exec'); - expect(twice.approved_always).toHaveLength(1); - expect(twice.approved_always[0].granted_by).toBe('user_click'); - }); - - it('approveAlways and addAlwaysAllow write to independent lists', async () => { - const { iii } = makeIii(); - await addAlwaysAllow(iii, 'sess-1', 'shell::fs::ls'); - const after = await approveAlways(iii, 'sess-1', 'shell::exec'); - expect(after.always_allow.map((e) => e.function_id)).toEqual(['shell::fs::ls']); - expect(after.approved_always.map((e) => e.function_id)).toEqual(['shell::exec']); - }); - - it('writes go to the SETTINGS_STATE_SCOPE keyed by session_id', async () => { - const { iii, calls } = makeIii(); - await setMode(iii, 'sess-1', 'full'); - const write = calls.find( - (c) => c.function_id === 'state::set' || c.function_id === 'state::update', - ); - expect(write?.payload.scope).toBe(SETTINGS_STATE_SCOPE); - expect(write?.payload.key).toBe('sess-1'); - }); - - // --- Atomic field-scoped mutations: disjoint fields must not clobber. --- - - it('concurrent setMode and addAlwaysAllow both persist (no lost update)', async () => { - const { iii } = makeIii(); - await Promise.all([ - setMode(iii, 'sess-1', 'auto'), - addAlwaysAllow(iii, 'sess-1', 'shell::exec'), - ]); - const s = await readSettings(iii, 'sess-1'); - expect(s.mode).toBe('auto'); - expect(s.always_allow.map((e) => e.function_id)).toEqual(['shell::exec']); - }); - - it('concurrent setMode and approveAlways both persist (no lost update)', async () => { - const { iii } = makeIii(); - await Promise.all([ - setMode(iii, 'sess-1', 'full'), - approveAlways(iii, 'sess-1', 'shell::exec'), - ]); - const s = await readSettings(iii, 'sess-1'); - expect(s.mode).toBe('full'); - expect(s.approved_always.map((e) => e.function_id)).toEqual(['shell::exec']); - }); - - it('concurrent adds of different ids both land (append composition)', async () => { - const { iii } = makeIii(); - await Promise.all([ - addAlwaysAllow(iii, 'sess-1', 'a::one'), - addAlwaysAllow(iii, 'sess-1', 'b::two'), - ]); - const s = await readSettings(iii, 'sess-1'); - expect(s.always_allow.map((e) => e.function_id).sort()).toEqual(['a::one', 'b::two']); - }); - - it('readSettings backfills a partial record and keeps the configured default mode', async () => { - // A field-scoped append can persist only { always_allow }; the read must - // still produce a complete, valid record (not fall back to all-defaults). - const { iii } = makeIii({ - always_allow: [{ function_id: 'shell::exec', granted_at: 1, granted_by: 'user_click' }], - }); - const s = await readSettings(iii, 'sess-1'); - expect(s.always_allow.map((e) => e.function_id)).toEqual(['shell::exec']); - expect(s.mode).toBe('manual'); - expect(s.approved_always).toEqual([]); - expect(s.mode_set_at).toBe(0); - }); - - it('isHumanOnlyApprovalFunction catches every settings handler id', () => { - expect(isHumanOnlyApprovalFunction('approval::set_mode')).toBe(true); - expect(isHumanOnlyApprovalFunction('approval::add_always_allow')).toBe(true); - expect(isHumanOnlyApprovalFunction('approval::remove_always_allow')).toBe(true); - expect(isHumanOnlyApprovalFunction('approval::approve_always')).toBe(true); - expect(isHumanOnlyApprovalFunction('approval::get_settings')).toBe(true); - expect(isHumanOnlyApprovalFunction('approval::clear_settings')).toBe(true); - }); - - it('isHumanOnlyApprovalFunction does NOT block approval::resolve', () => { - expect(isHumanOnlyApprovalFunction('approval::resolve')).toBe(false); - expect(isHumanOnlyApprovalFunction('shell::exec')).toBe(false); - }); -}); diff --git a/harness/tests/integration/hook-gate-flow.e2e.test.ts b/harness/tests/integration/hook-gate-flow.e2e.test.ts new file mode 100644 index 00000000..5ebdef89 --- /dev/null +++ b/harness/tests/integration/hook-gate-flow.e2e.test.ts @@ -0,0 +1,133 @@ +/** + * End-to-end pre_dispatch hook flow against the REAL chain (no + * dispatchWithHook mocks): a fake `approval::gate` bound to + * `harness::hook::pre-dispatch` holds/denies/allows calls, and + * `harness::function::resolve` settles held ones — exactly the wire the + * standalone approval-gate worker drives. + */ + +import { describe, expect, it } from 'vitest'; +import { + createParallelApprovalHarness, + executionEvents, + makeAssistantWithCalls, +} from './parallel-approval-harness.js'; + +describe('pre_dispatch hook gate flow e2e', () => { + it('hold parks the call with the gate-supplied HookInput, resolve-execute releases it without re-consulting', async () => { + const h = createParallelApprovalHarness(); + // Default gate output is hold (1_800_000ms). + + h.seedExecute('sess-hold', makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }])); + await h.runExecute('sess-hold'); + + // The gate saw one pre_dispatch HookInput with the routing envelope. + expect(h.gateCalls).toHaveLength(1); + const rec = h.loadTurnRecord('sess-hold'); + expect(h.gateCalls[0]).toMatchObject({ + point: 'pre_dispatch', + session_id: 'sess-hold', + turn_id: rec?.turn_id, + depth: 0, + call: { + id: 'fc-1', + function_id: 'shell::run', + // Routing envelope: caller args + session/call identity at top level. + arguments: { + session_id: 'sess-hold', + function_call_id: 'fc-1', + function_id: 'shell::run', + }, + }, + }); + expect(rec?.state).toBe('function_awaiting_approval'); + expect(rec?.awaiting_approval?.map((e) => e.function_call_id)).toEqual(['fc-1']); + + const out = await h.resolveApproval('sess-hold', 'fc-1', 'allow'); + expect(out).toEqual({ resolved: true, turn_resumed: true }); + + const done = h.loadTurnRecord('sess-hold'); + expect(done?.state).toBe('assistant_streaming'); + expect(executionEvents(h.emitted, 'function_execution_end', 'fc-1')).toHaveLength(1); + // Released calls do NOT re-consult the hook chain. + expect(h.gateCalls).toHaveLength(1); + }); + + it('gate deny answers the call inline with a denied result, nothing parks', async () => { + const h = createParallelApprovalHarness(); + h.setGateOutput({ decision: 'deny', reason: 'blocked by policy: no shell' }); + + h.seedExecute('sess-deny', makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }])); + await h.runExecute('sess-deny'); + + const rec = h.loadTurnRecord('sess-deny'); + expect(rec?.state).toBe('assistant_streaming'); + expect(rec?.awaiting_approval ?? []).toEqual([]); + const sessionMessages = h.sessions + .messageEntries('sess-deny') + .map((e) => e.message as { role?: string; is_error?: boolean; details?: unknown }); + const denial = sessionMessages.find((m) => m.role === 'function_result'); + expect(denial?.is_error).toBe(true); + expect(denial?.details).toMatchObject({ + status: 'denied', + denied_by: 'hook', + reason: 'blocked by policy: no shell', + }); + }); + + it('gate continue lets the call dispatch straight through', async () => { + const h = createParallelApprovalHarness(); + h.setGateOutput({ decision: 'continue' }); + + h.seedExecute('sess-allow', makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }])); + await h.runExecute('sess-allow'); + + const rec = h.loadTurnRecord('sess-allow'); + expect(rec?.state).toBe('assistant_streaming'); + expect(executionEvents(h.emitted, 'function_execution_end', 'fc-1')).toHaveLength(1); + }); + + it('a crashed gate fails closed (deny, not allow, not hold)', async () => { + const h = createParallelApprovalHarness(); + h.setGateOutput(() => { + throw new Error('gate worker died'); + }); + + h.seedExecute('sess-down', makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }])); + await h.runExecute('sess-down'); + + const rec = h.loadTurnRecord('sess-down'); + expect(rec?.state).toBe('assistant_streaming'); + expect(rec?.work).toBeUndefined(); + const denial = h.sessions + .messageEntries('sess-down') + .map((e) => e.message as { role?: string; details?: Record }) + .find((m) => m.role === 'function_result'); + expect(denial?.details).toMatchObject({ denied_by: 'gate_unavailable' }); + }); + + it('resolve-deny delivers the DenialEnvelope as an is_error function result', async () => { + const h = createParallelApprovalHarness(); + + h.seedExecute( + 'sess-userdeny', + makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }]), + ); + await h.runExecute('sess-userdeny'); + await h.resolveApproval('sess-userdeny', 'fc-1', 'deny', 'too risky'); + + const rec = h.loadTurnRecord('sess-userdeny'); + expect(rec?.state).toBe('assistant_streaming'); + const denial = h.sessions + .messageEntries('sess-userdeny') + .map((e) => e.message as { role?: string; is_error?: boolean; details?: unknown }) + .find((m) => m.role === 'function_result'); + expect(denial?.is_error).toBe(true); + expect(denial?.details).toMatchObject({ + status: 'denied', + denied_by: 'user', + function_id: 'shell::run', + reason: 'too risky', + }); + }); +}); diff --git a/harness/tests/integration/mode-approval.e2e.test.ts b/harness/tests/integration/mode-approval.e2e.test.ts deleted file mode 100644 index 4660013f..00000000 --- a/harness/tests/integration/mode-approval.e2e.test.ts +++ /dev/null @@ -1,167 +0,0 @@ -/** - * Mode-driven approval e2e. Unlike the parallel-approval suite, these - * tests do NOT mock `dispatchWithHook` — they run the real `consultBefore` - * against per-session `approval_settings` seeded into the harness store, - * exercising the full ordering (full > approved_always > always_allow > - * yaml policy) through the orchestrator's function_execute path. - */ - -import { describe, expect, it } from 'vitest'; -import { SETTINGS_STATE_SCOPE } from '../../src/approval-gate/schemas.js'; -import { - createParallelApprovalHarness, - executionEvents, - makeAssistantWithCalls, - type ParallelApprovalHarness, -} from './parallel-approval-harness.js'; - -type PermissionMode = 'manual' | 'auto' | 'full'; - -interface SeedSettings { - mode?: PermissionMode; - always_allow?: string[]; - approved_always?: string[]; -} - -function entry(function_id: string) { - return { function_id, granted_at: 1, granted_by: 'user_click' as const }; -} - -/** Persist approval_settings into the harness store before runExecute. */ -function seedSettings(h: ParallelApprovalHarness, session_id: string, s: SeedSettings): void { - h.stateStore.set(`${SETTINGS_STATE_SCOPE}/${session_id}`, { - mode: s.mode ?? 'manual', - always_allow: (s.always_allow ?? []).map(entry), - approved_always: (s.approved_always ?? []).map(entry), - mode_set_at: s.mode ? 1 : 0, - }); -} - -function awaitingIds(h: ParallelApprovalHarness, session_id: string): string[] { - return h.loadTurnRecord(session_id)?.awaiting_approval?.map((e) => e.function_call_id) ?? []; -} - -describe('mode-driven approval e2e (real consultBefore)', () => { - it('full mode executes the call immediately without parking or writing an approval', async () => { - const h = createParallelApprovalHarness(); - seedSettings(h, 'sess-full', { mode: 'full' }); - h.seedExecute('sess-full', makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }])); - - await h.runExecute('sess-full'); - - const rec = h.loadTurnRecord('sess-full'); - expect(rec?.state).toBe('assistant_streaming'); - expect(rec?.awaiting_approval).toEqual([]); - expect(rec?.work).toBeUndefined(); - expect(h.stateStore.has('approvals/sess-full/fc-1')).toBe(false); - expect(executionEvents(h.emitted, 'function_execution_end', 'fc-1')).toHaveLength(1); - }); - - it('approved_always honors the grant in MANUAL mode (executes without parking)', async () => { - const h = createParallelApprovalHarness(); - seedSettings(h, 'sess-grant', { - mode: 'manual', - approved_always: ['shell::run'], - }); - h.seedExecute('sess-grant', makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }])); - - await h.runExecute('sess-grant'); - - const rec = h.loadTurnRecord('sess-grant'); - expect(rec?.state).toBe('assistant_streaming'); - expect(rec?.awaiting_approval).toEqual([]); - expect(executionEvents(h.emitted, 'function_execution_end', 'fc-1')).toHaveLength(1); - }); - - it('auto mode + always_allow hit executes without parking', async () => { - const h = createParallelApprovalHarness(); - seedSettings(h, 'sess-auto', { - mode: 'auto', - always_allow: ['shell::run'], - }); - h.seedExecute('sess-auto', makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }])); - - await h.runExecute('sess-auto'); - - expect(awaitingIds(h, 'sess-auto')).toEqual([]); - expect(h.loadTurnRecord('sess-auto')?.state).toBe('assistant_streaming'); - }); - - it('manual mode with no grant parks the call (falls through to policy → needs_approval)', async () => { - const h = createParallelApprovalHarness(); - seedSettings(h, 'sess-manual', { mode: 'manual' }); - h.seedExecute( - 'sess-manual', - makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }]), - ); - - await h.runExecute('sess-manual'); - - expect(h.loadTurnRecord('sess-manual')?.state).toBe('function_awaiting_approval'); - expect(awaitingIds(h, 'sess-manual')).toEqual(['fc-1']); - }); - - it('always_allow is DORMANT in manual mode — the call still parks', async () => { - const h = createParallelApprovalHarness(); - seedSettings(h, 'sess-dormant', { - mode: 'manual', - always_allow: ['shell::run'], - }); - h.seedExecute( - 'sess-dormant', - makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }]), - ); - - await h.runExecute('sess-dormant'); - - expect(h.loadTurnRecord('sess-dormant')?.state).toBe('function_awaiting_approval'); - expect(awaitingIds(h, 'sess-dormant')).toEqual(['fc-1']); - }); - - it('auto mode parks a call whose id is not on the allowlist', async () => { - const h = createParallelApprovalHarness(); - seedSettings(h, 'sess-miss', { - mode: 'auto', - always_allow: ['other::fn'], - }); - h.seedExecute('sess-miss', makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }])); - - await h.runExecute('sess-miss'); - - expect(awaitingIds(h, 'sess-miss')).toEqual(['fc-1']); - }); - - it('denies an agent attempt to call a human-only approval function (self-escalation)', async () => { - const h = createParallelApprovalHarness(); - seedSettings(h, 'sess-escalate', { mode: 'manual' }); - h.seedExecute( - 'sess-escalate', - makeAssistantWithCalls([ - { id: 'fc-1', functionId: 'approval::set_mode', payload: { mode: 'full' } }, - ]), - ); - - await h.runExecute('sess-escalate'); - - const rec = h.loadTurnRecord('sess-escalate'); - expect(rec?.awaiting_approval).toEqual([]); - expect(rec?.state).toBe('assistant_streaming'); - expect(rec?.function_results).toEqual([]); - const turnEnd = h.emitted.find((e) => e.type === 'turn_end') as - | { - function_results: Array<{ - function_call_id: string; - is_error: boolean; - details: unknown; - }>; - } - | undefined; - const denied = turnEnd?.function_results.find((r) => r.function_call_id === 'fc-1'); - expect(denied?.is_error).toBe(true); - expect(JSON.stringify(denied?.details)).toContain('human_only_function'); - const settings = h.stateStore.get(`${SETTINGS_STATE_SCOPE}/sess-escalate`) as { - mode: string; - }; - expect(settings.mode).toBe('manual'); - }); -}); diff --git a/harness/tests/integration/on-record-written.e2e.test.ts b/harness/tests/integration/on-record-written.e2e.test.ts index c49d904f..652ca52c 100644 --- a/harness/tests/integration/on-record-written.e2e.test.ts +++ b/harness/tests/integration/on-record-written.e2e.test.ts @@ -103,7 +103,7 @@ describe('saveRecord wake integration', () => { ]); }); - it('parking in function_awaiting_approval does NOT wake', async () => { + it('parking in function_awaiting_approval wakes once (post-persist resolution scan)', async () => { const { iii, wakeInvocations } = fakeIii(); const store = createTurnStore(iii); const rec = newRecord('sess-c'); @@ -111,6 +111,18 @@ describe('saveRecord wake integration', () => { await store.saveRecord(rec); + expect(wakeInvocations).toEqual([ + { + session_id: 'sess-c', + function_id: 'turn::function_awaiting_approval', + action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), + }, + ]); + + // Mid-park re-saves stay silent — further wakes come from + // harness::function::resolve. + wakeInvocations.length = 0; + await store.saveRecord(rec); expect(wakeInvocations).toEqual([]); }); diff --git a/harness/tests/integration/parallel-approval-harness.ts b/harness/tests/integration/parallel-approval-harness.ts index 145bf2e4..f684eac3 100644 --- a/harness/tests/integration/parallel-approval-harness.ts +++ b/harness/tests/integration/parallel-approval-harness.ts @@ -1,17 +1,24 @@ /** - * Integration harness for parallel approval flows: real TurnStore + runTransition, - * simulated iii state/streams, and dispatchWithHook routing. + * Integration harness for parallel approval flows: real TurnStore + + * runTransition + harness::function::resolve, simulated iii + * state/streams, and a scriptable fake `approval::gate` bound to the + * real pre_dispatch hook chain. */ import { vi } from 'vitest'; -import { handleResolveRequest } from '../../src/approval-gate/resolve.js'; -import { - handleApprovalStateWrite, - handleAwaitingApproval, -} from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; import { TransientError } from '../../src/turn-orchestrator/errors.js'; +import { handleAwaitingApproval } from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; import { handleExecute } from '../../src/turn-orchestrator/function-execute/process.js'; import { enterFunctionExecute } from '../../src/turn-orchestrator/function-execute/run.js'; +import { + handleFunctionResolve, + type FunctionResolveResult, +} from '../../src/turn-orchestrator/function-resolve.js'; +import { + addPreDispatchBindingForTests, + resetPreDispatchBindingsForTests, +} from '../../src/turn-orchestrator/hooks/registry.js'; +import type { HookOutput } from '../../src/turn-orchestrator/hooks/types.js'; import { runTransition } from '../../src/turn-orchestrator/run-transition.js'; import { TURN_STATE_SCOPE, @@ -28,6 +35,10 @@ export type ParallelApprovalHarness = { stateStore: Map; emitted: AgentEvent[]; sessions: FakeSessionManager; + /** Calls the fake `approval::gate` received through the hook chain. */ + gateCalls: unknown[]; + /** Script the fake gate's next answers (default: hold). */ + setGateOutput(output: HookOutput | ((input: unknown) => HookOutput)): void; loadTurnRecord(session_id: string): TurnStateRecord | null; seedExecute(session_id: string, assistant: AssistantMessage): TurnStateRecord; runExecute(session_id: string): Promise; @@ -36,7 +47,7 @@ export type ParallelApprovalHarness = { function_call_id: string, decision: 'allow' | 'deny', reason?: string | null, - ): Promise; + ): Promise; }; function makeAgentTriggerCall( @@ -114,6 +125,19 @@ export function createParallelApprovalHarness(): ParallelApprovalHarness { const stateStore = new Map(); const emitted: AgentEvent[] = []; const sessions = new FakeSessionManager(); + const gateCalls: unknown[] = []; + let gateOutput: HookOutput | ((input: unknown) => HookOutput) = { + decision: 'hold', + pending_timeout_ms: 1_800_000, + }; + + // Bind the fake gate to the REAL hook chain (module-global registry). + resetPreDispatchBindingsForTests(); + addPreDispatchBindingForTests({ + id: 'test-gate', + function_id: 'approval::gate', + config: { functions: ['*'], timeout_ms: 5_000, on_error: 'fail_closed' }, + }); const iii = { trigger: vi.fn( @@ -130,6 +154,11 @@ export function createParallelApprovalHarness(): ParallelApprovalHarness { const sessionResult = await sessions.handle(function_id, payload); if (sessionResult !== undefined) return sessionResult; + if (function_id === 'approval::gate') { + gateCalls.push(payload); + return typeof gateOutput === 'function' ? gateOutput(payload) : gateOutput; + } + if (function_id === 'state::get') { const p = payload as { scope: string; key: string }; const v = stateStore.get(`${p.scope}/${p.key}`); @@ -144,20 +173,15 @@ export function createParallelApprovalHarness(): ParallelApprovalHarness { : null; const new_value = structuredClone(p.value); stateStore.set(storeKey, new_value); - if (p.scope === 'approvals') { - const event = { - event_type: old_value == null ? 'state:created' : 'state:updated', - scope: p.scope, - key: p.key, - old_value, - new_value, - message_type: 'state', - }; - await handleApprovalStateWrite(iii as unknown as ISdk, event); - } return { old_value, new_value }; } + if (function_id === 'state::delete') { + const p = payload as { scope: string; key: string }; + stateStore.delete(`${p.scope}/${p.key}`); + return {}; + } + if (function_id === 'state::update') { // Atomic read-modify-write returning the prior value. Models the // `increment` op (event counter) and root-path `set` op (lease). @@ -200,14 +224,6 @@ export function createParallelApprovalHarness(): ParallelApprovalHarness { }; } - // Realistic default for the real `consultBefore` fall-through: no - // yaml rule matches → needs_approval. Mode/allowlist short-circuits - // happen before this in consultBefore, so tests that seed - // approval_settings exercise those paths without reaching here. - if (function_id === 'policy::check_permissions') { - return { decision: 'needs_approval' }; - } - if (function_id.startsWith('turn::') && action != null) { const p = payload as { session_id: string }; await runTurnStepWithRetry(iii as unknown as ISdk, function_id, p.session_id); @@ -219,17 +235,24 @@ export function createParallelApprovalHarness(): ParallelApprovalHarness { ), } as unknown as ISdk; + function loadTurnRecord(session_id: string): TurnStateRecord | null { + const raw = stateStore.get(`${TURN_STATE_SCOPE}/${session_id}`); + return raw ? (structuredClone(raw) as TurnStateRecord) : null; + } + return { iii, stateStore, emitted, sessions, + gateCalls, - loadTurnRecord(session_id: string): TurnStateRecord | null { - const raw = stateStore.get(`${TURN_STATE_SCOPE}/${session_id}`); - return raw ? (structuredClone(raw) as TurnStateRecord) : null; + setGateOutput(output) { + gateOutput = output; }, + loadTurnRecord, + seedExecute(session_id: string, assistant: AssistantMessage): TurnStateRecord { const rec = newRecord(session_id); enterFunctionExecute(rec, assistant); @@ -242,20 +265,45 @@ export function createParallelApprovalHarness(): ParallelApprovalHarness { await runTurnStep(iii, 'turn::function_execute', session_id); }, + /** Drive the real harness::function::resolve handler, like the gate worker does. */ async resolveApproval( session_id: string, function_call_id: string, decision: 'allow' | 'deny', reason: null | string = null, - ): Promise { - const out = await handleResolveRequest(iii, { - session_id, - function_call_id, - decision, - reason, - }); - if (!out.ok) throw new Error(`approval::resolve failed: ${out.error}`); + ): Promise { + const rec = loadTurnRecord(session_id); + if (!rec) throw new Error(`no turn record for ${session_id}`); + const why = reason ?? 'Rejected by operator.'; + const target = + rec.awaiting_approval?.find((e) => e.function_call_id === function_call_id)?.function_id ?? + 'unknown'; + const payload = + decision === 'allow' + ? { + session_id, + turn_id: rec.turn_id, + function_call_id, + action: 'execute' as const, + } + : { + session_id, + turn_id: rec.turn_id, + function_call_id, + action: 'deliver' as const, + is_error: true, + content: [{ type: 'text', text: why }], + details: { + schema_version: 1, + status: 'denied', + denied_by: 'user', + function_id: target, + reason: why, + }, + }; + const out = await handleFunctionResolve(iii, payload); await flushMicrotasks(); + return out; }, }; } diff --git a/harness/tests/integration/parallel-approval.e2e.test.ts b/harness/tests/integration/parallel-approval.e2e.test.ts index 6536752f..fbc506c1 100644 --- a/harness/tests/integration/parallel-approval.e2e.test.ts +++ b/harness/tests/integration/parallel-approval.e2e.test.ts @@ -32,7 +32,11 @@ describe('parallel approval e2e', () => { it('dispatches later calls while earlier ones park without blocking the batch', async () => { const h = createParallelApprovalHarness(); vi.spyOn(agentTriggerModule, 'dispatchWithHook') - .mockResolvedValueOnce({ kind: 'pending' }) + .mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }) .mockResolvedValueOnce({ kind: 'result', result: { @@ -41,7 +45,11 @@ describe('parallel approval e2e', () => { terminate: false, }, }) - .mockResolvedValueOnce({ kind: 'pending' }); + .mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }); h.seedExecute( 'sess-parallel', @@ -65,12 +73,20 @@ describe('parallel approval e2e', () => { it('executes one approved call immediately while a sibling stays pending', async () => { const h = createParallelApprovalHarness(); vi.spyOn(agentTriggerModule, 'dispatchWithHook') - .mockResolvedValueOnce({ kind: 'pending' }) + .mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }) .mockResolvedValueOnce({ kind: 'result', result: { content: [{ type: 'text' as const, text: 'ok' }], details: {}, terminate: false }, }) - .mockResolvedValueOnce({ kind: 'pending' }); + .mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }); h.seedExecute( 'sess-partial', @@ -99,8 +115,16 @@ describe('parallel approval e2e', () => { it('resolves approvals out of order without waiting for batch order', async () => { const h = createParallelApprovalHarness(); vi.spyOn(agentTriggerModule, 'dispatchWithHook') - .mockResolvedValueOnce({ kind: 'pending' }) - .mockResolvedValueOnce({ kind: 'pending' }); + .mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }) + .mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }); h.seedExecute( 'sess-order', @@ -127,8 +151,16 @@ describe('parallel approval e2e', () => { it('denies one pending call without affecting an unresolved sibling', async () => { const h = createParallelApprovalHarness(); vi.spyOn(agentTriggerModule, 'dispatchWithHook') - .mockResolvedValueOnce({ kind: 'pending' }) - .mockResolvedValueOnce({ kind: 'pending' }); + .mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }) + .mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }); h.seedExecute( 'sess-deny', @@ -146,16 +178,23 @@ describe('parallel approval e2e', () => { expect(rec?.awaiting_approval?.map((e) => e.function_call_id)).toEqual(['fc-2']); expect(rec?.work?.executed['fc-1']?.is_error).toBe(true); expect(rec?.work?.executed['fc-1']?.result.details).toMatchObject({ - approval_denied: true, - decision: 'deny', + status: 'denied', + denied_by: 'user', reason: 'operator rejected', }); + expect(rec?.work?.executed['fc-1']?.result.content[0]).toMatchObject({ + text: 'operator rejected', + }); expect(rec?.work?.executed['fc-2']).toBeUndefined(); }); it('is idempotent when the same decision wake is delivered twice', async () => { const h = createParallelApprovalHarness(); - vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ kind: 'pending' }); + vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }); h.seedExecute('sess-dup', makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }])); await h.runExecute('sess-dup'); @@ -175,8 +214,16 @@ describe('parallel approval e2e', () => { it('resolves two approvals fired in parallel without double-executing or double-finalizing', async () => { const h = createParallelApprovalHarness(); vi.spyOn(agentTriggerModule, 'dispatchWithHook') - .mockResolvedValueOnce({ kind: 'pending' }) - .mockResolvedValueOnce({ kind: 'pending' }); + .mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }) + .mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }); h.seedExecute( 'sess-par', @@ -204,13 +251,23 @@ describe('parallel approval e2e', () => { it('drains a sibling approved mid-execution even when its own wake is dropped', async () => { const h = createParallelApprovalHarness(); vi.spyOn(agentTriggerModule, 'dispatchWithHook') - .mockResolvedValueOnce({ kind: 'pending' }) - .mockResolvedValueOnce({ kind: 'pending' }); + .mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }) + .mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }); vi.spyOn(agentTriggerModule, 'triggerFunctionCall').mockImplementation(async (_iii, call) => { if (call.id === 'fc-driver') { - h.stateStore.set('approvals/sess-drain/fc-late', { - decision: 'allow', - reason: null, + const turn_id = h.loadTurnRecord('sess-drain')?.turn_id ?? 't_x'; + h.stateStore.set('function_resolutions/sess-drain/fc-late', { + turn_id, + action: 'execute', + resolved_at: 1, }); } return { @@ -244,8 +301,16 @@ describe('parallel approval e2e', () => { it('re-enqueues a follow-up wake when a resolved call leaves siblings pending', async () => { const h = createParallelApprovalHarness(); vi.spyOn(agentTriggerModule, 'dispatchWithHook') - .mockResolvedValueOnce({ kind: 'pending' }) - .mockResolvedValueOnce({ kind: 'pending' }); + .mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }) + .mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }); h.seedExecute( 'sess-reenqueue', @@ -264,22 +329,47 @@ describe('parallel approval e2e', () => { expect(wakeEnqueues(h, 'sess-reenqueue')).toBeGreaterThanOrEqual(before + 2); }); - it('persists the decision and wakes function_awaiting_approval via approval::resolve', async () => { + it('wakes the parked turn via harness::function::resolve and deletes the consumed row', async () => { const h = createParallelApprovalHarness(); - vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ kind: 'pending' }); + vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }); h.seedExecute('sess-wake', makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }])); await h.runExecute('sess-wake'); expect(h.loadTurnRecord('sess-wake')?.state).toBe('function_awaiting_approval'); - await h.resolveApproval('sess-wake', 'fc-1', 'allow'); + const out = await h.resolveApproval('sess-wake', 'fc-1', 'allow'); - expect(h.stateStore.get('approvals/sess-wake/fc-1')).toEqual({ - decision: 'allow', - reason: null, - }); + expect(out).toEqual({ resolved: true, turn_resumed: true }); + // The resolution row is consumed and deleted — the scope never accumulates. + expect(h.stateStore.has('function_resolutions/sess-wake/fc-1')).toBe(false); expect(h.loadTurnRecord('sess-wake')?.state).toBe('assistant_streaming'); expect(h.loadTurnRecord('sess-wake')?.work).toBeUndefined(); }); + + it('answers {resolved: false} for an unknown call and a stale turn_id', async () => { + const h = createParallelApprovalHarness(); + vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }); + + h.seedExecute('sess-miss', makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }])); + await h.runExecute('sess-miss'); + + // Unknown function_call_id. + const unknown = await h.resolveApproval('sess-miss', 'fc-unknown', 'allow'); + expect(unknown).toEqual({ resolved: false }); + expect(h.loadTurnRecord('sess-miss')?.state).toBe('function_awaiting_approval'); + + // Duplicate after the call settled. + await h.resolveApproval('sess-miss', 'fc-1', 'allow'); + const dup = await h.resolveApproval('sess-miss', 'fc-1', 'allow'); + expect(dup).toEqual({ resolved: false }); + }); }); diff --git a/harness/tests/integration/wire-parity.test.ts b/harness/tests/integration/wire-parity.test.ts index 5b285a56..f3742f80 100644 --- a/harness/tests/integration/wire-parity.test.ts +++ b/harness/tests/integration/wire-parity.test.ts @@ -6,6 +6,8 @@ */ import { describe, expect, it } from 'vitest'; +import { FunctionResolvePayloadSchema } from '../../src/turn-orchestrator/schemas.js'; +import { HookOutputSchema, type HookInput } from '../../src/turn-orchestrator/hooks/types.js'; import type { AgentEvent } from '../../src/types/agent-event.js'; import type { AssistantMessage } from '../../src/types/agent-message.js'; import { formatFunctionResultContent } from '../../src/types/wire.js'; @@ -98,3 +100,74 @@ describe('FunctionResult / formatFunctionResultContent wire shape', () => { expect(out.endsWith('Permission denied: …')).toBe(true); }); }); + +describe('approval-gate worker wire parity', () => { + // These shapes must match the Rust worker's serde forms + // (approval-gate/src/types.rs): HookInput/HookOutput and the + // harness::function::resolve payloads it sends. + + it('HookInput matches what approval::gate deserializes', () => { + const input: HookInput = { + point: 'pre_dispatch', + session_id: 's_1', + turn_id: 't_1', + step: 3, + depth: 0, + call: { id: 'c_1', function_id: 'shell::run', arguments: { cmd: 'ls' } }, + }; + expect(JSON.parse(JSON.stringify(input))).toEqual({ + point: 'pre_dispatch', + session_id: 's_1', + turn_id: 't_1', + step: 3, + depth: 0, + call: { id: 'c_1', function_id: 'shell::run', arguments: { cmd: 'ls' } }, + }); + }); + + it('HookOutput accepts the three Rust serde forms and rejects garbage', () => { + expect(HookOutputSchema.parse({ decision: 'continue' })).toEqual({ decision: 'continue' }); + expect(HookOutputSchema.parse({ decision: 'deny', reason: 'nope' })).toEqual({ + decision: 'deny', + reason: 'nope', + }); + expect(HookOutputSchema.parse({ decision: 'hold', pending_timeout_ms: 1_800_000 })).toEqual({ + decision: 'hold', + pending_timeout_ms: 1_800_000, + }); + expect(HookOutputSchema.safeParse('what even is this').success).toBe(false); + expect(HookOutputSchema.safeParse({ decision: 'hold' }).success).toBe(false); + }); + + it('function::resolve accepts the exact payloads the Rust resolve/sweep send', () => { + // approval-gate/src/functions/resolve.rs — allow: + expect( + FunctionResolvePayloadSchema.parse({ + session_id: 's_1', + turn_id: 't_9', + function_call_id: 'c_1', + action: 'execute', + }).action, + ).toBe('execute'); + + // approval-gate/src/functions/resolve.rs — deny (rendered DenialEnvelope): + const deny = FunctionResolvePayloadSchema.parse({ + session_id: 's_1', + turn_id: 't_9', + function_call_id: 'c_1', + action: 'deliver', + is_error: true, + content: [{ type: 'text', text: 'too risky' }], + details: { + schema_version: 1, + status: 'denied', + denied_by: 'user', + function_id: 'shell::run', + args_excerpt: { cmd: 'ls' }, + reason: 'too risky', + }, + }); + expect(deny.is_error).toBe(true); + expect(deny.content?.[0]).toEqual({ type: 'text', text: 'too risky' }); + }); +}); diff --git a/harness/tests/turn-orchestrator/agent-trigger.test.ts b/harness/tests/turn-orchestrator/agent-trigger.test.ts index 394caacb..1031f139 100644 --- a/harness/tests/turn-orchestrator/agent-trigger.test.ts +++ b/harness/tests/turn-orchestrator/agent-trigger.test.ts @@ -13,7 +13,9 @@ import { triggerFunctionCall, unwrapAgentTrigger, } from '../../src/turn-orchestrator/agent-trigger.js'; -import * as hookModule from '../../src/turn-orchestrator/hook.js'; +import * as chainModule from '../../src/turn-orchestrator/hooks/chain.js'; + +const CTX = { session_id: 's1', turn_id: 't_test' }; afterEach(() => { vi.restoreAllMocks(); @@ -459,19 +461,27 @@ describe('fetchContract', () => { }); describe('dispatchWithHook returns DispatchResult', () => { - it('returns kind:pending when consultBefore returns pending', async () => { - vi.spyOn(hookModule, 'consultBefore').mockResolvedValue({ kind: 'pending' }); + it('returns kind:pending when the hook chain holds the call', async () => { + vi.spyOn(chainModule, 'consultPreDispatch').mockResolvedValue({ + kind: 'hold', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }); const iii = { trigger: vi.fn() } as unknown as ISdk; - const out = await dispatchWithHook(iii, { + const out = await dispatchWithHook(iii, CTX, { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' }, }); - expect(out.kind).toBe('pending'); + expect(out).toEqual({ + kind: 'pending', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }); }); it('returns kind:result with denied details on hard deny', async () => { - vi.spyOn(hookModule, 'consultBefore').mockResolvedValue({ + vi.spyOn(chainModule, 'consultPreDispatch').mockResolvedValue({ kind: 'deny', denial: { schema_version: 1, @@ -482,7 +492,7 @@ describe('dispatchWithHook returns DispatchResult', () => { }, }); const iii = { trigger: vi.fn() } as unknown as ISdk; - const out = await dispatchWithHook(iii, { + const out = await dispatchWithHook(iii, CTX, { id: 'fc-1', function_id: 'shell::run', arguments: {}, @@ -494,11 +504,11 @@ describe('dispatchWithHook returns DispatchResult', () => { }); it('returns kind:result on allow + successful dispatch', async () => { - vi.spyOn(hookModule, 'consultBefore').mockResolvedValue({ kind: 'allow' }); + vi.spyOn(chainModule, 'consultPreDispatch').mockResolvedValue({ kind: 'allow' }); const iii = { trigger: vi.fn().mockResolvedValue({ ok: true }), } as unknown as ISdk; - const out = await dispatchWithHook(iii, { + const out = await dispatchWithHook(iii, CTX, { id: 'fc-1', function_id: 'shell::run', arguments: {}, @@ -506,12 +516,37 @@ describe('dispatchWithHook returns DispatchResult', () => { expect(out.kind).toBe('result'); }); + it('builds the HookInput from the ctx and the ENVELOPED call', async () => { + const consult = vi + .spyOn(chainModule, 'consultPreDispatch') + .mockResolvedValue({ kind: 'allow' }); + const iii = { trigger: vi.fn().mockResolvedValue({ ok: true }) } as unknown as ISdk; + await dispatchWithHook( + iii, + { session_id: 's1', turn_id: 't_9', step: 3, metadata: { message_id: 'm1' } }, + { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls', session_id: 's1' } }, + ); + expect(consult).toHaveBeenCalledWith(iii, { + point: 'pre_dispatch', + session_id: 's1', + turn_id: 't_9', + step: 3, + depth: 0, + metadata: { message_id: 'm1' }, + call: { + id: 'fc-1', + function_id: 'shell::run', + arguments: { command: 'ls', session_id: 's1' }, + }, + }); + }); + it('triggers the TARGET with clean args, not the routing envelope (function_id clobber fix)', async () => { // Regression: engine::functions::info { function_id: "sandbox::create" } was // arriving with function_id overwritten by the routing envelope (which // spreads function_id = the target id), so the engine described itself. The // hook sees the envelope; the target must get the caller's untouched args. - vi.spyOn(hookModule, 'consultBefore').mockResolvedValue({ kind: 'allow' }); + vi.spyOn(chainModule, 'consultPreDispatch').mockResolvedValue({ kind: 'allow' }); const trigger = vi.fn().mockResolvedValue({ ok: true }); const iii = { trigger } as unknown as ISdk; const hookCall = { @@ -524,7 +559,7 @@ describe('dispatchWithHook returns DispatchResult', () => { function_id: 'engine::functions::info', arguments: { function_id: 'sandbox::create' }, }; - const out = await dispatchWithHook(iii, hookCall, targetCall); + const out = await dispatchWithHook(iii, CTX, hookCall, targetCall); expect(out.kind).toBe('result'); expect(trigger).toHaveBeenCalledWith( expect.objectContaining({ @@ -540,11 +575,11 @@ describe('dispatchWithHook returns DispatchResult', () => { // callable, retried 3× on `function_not_found`. The hint must // propose the canonical worker::fn form so the recovery loop // collapses to one turn. - vi.spyOn(hookModule, 'consultBefore').mockResolvedValue({ kind: 'allow' }); + vi.spyOn(chainModule, 'consultPreDispatch').mockResolvedValue({ kind: 'allow' }); const iii = { trigger: vi.fn().mockRejectedValue({ code: 'function_not_found' }), } as unknown as ISdk; - const out = await dispatchWithHook(iii, { + const out = await dispatchWithHook(iii, CTX, { id: 'fc-1', function_id: 'sandbox/skills/sandbox/create', arguments: { image: 'node' }, @@ -559,11 +594,11 @@ describe('dispatchWithHook returns DispatchResult', () => { }); it('attaches the generic slash-path hint when function_id has slashes but no clean rewrite', async () => { - vi.spyOn(hookModule, 'consultBefore').mockResolvedValue({ kind: 'allow' }); + vi.spyOn(chainModule, 'consultPreDispatch').mockResolvedValue({ kind: 'allow' }); const iii = { trigger: vi.fn().mockRejectedValue({ code: 'function_not_found' }), } as unknown as ISdk; - const out = await dispatchWithHook(iii, { + const out = await dispatchWithHook(iii, CTX, { id: 'fc-1', function_id: 'some/odd/three-segment/id', arguments: {}, @@ -578,11 +613,11 @@ describe('dispatchWithHook returns DispatchResult', () => { }); it('falls back to the engine discovery hint when function_id contains no slash', async () => { - vi.spyOn(hookModule, 'consultBefore').mockResolvedValue({ kind: 'allow' }); + vi.spyOn(chainModule, 'consultPreDispatch').mockResolvedValue({ kind: 'allow' }); const iii = { trigger: vi.fn().mockRejectedValue({ code: 'function_not_found' }), } as unknown as ISdk; - const out = await dispatchWithHook(iii, { + const out = await dispatchWithHook(iii, CTX, { id: 'fc-1', function_id: 'misspelled', arguments: {}, diff --git a/harness/tests/turn-orchestrator/function-awaiting-approval-state-trigger.test.ts b/harness/tests/turn-orchestrator/function-awaiting-approval-state-trigger.test.ts deleted file mode 100644 index 705fc0bc..00000000 --- a/harness/tests/turn-orchestrator/function-awaiting-approval-state-trigger.test.ts +++ /dev/null @@ -1,45 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import { TriggerAction, type ISdk } from '../../src/runtime/iii.js'; -import { handleApprovalStateWrite } from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; -import { ApprovalDecisionEventSchema } from '../../src/turn-orchestrator/schemas.js'; -import { TURN_STEP_QUEUE } from '../../src/turn-orchestrator/state-runtime/store.js'; - -const matchingEvent = { - event_type: 'state:created' as const, - scope: 'approvals' as const, - key: 'sess-abc/fc-1', - old_value: null, - new_value: { decision: 'allow', reason: null }, - message_type: 'state', -}; - -describe('ApprovalDecisionEventSchema', () => { - it('extracts session_id from the / key', () => { - expect(ApprovalDecisionEventSchema.parse(matchingEvent)).toEqual({ session_id: 'sess-abc' }); - }); -}); - -describe('handleApprovalStateWrite', () => { - it('enqueues turn::function_awaiting_approval on a decision write', async () => { - const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { - triggers.push(req); - return null; - }), - } as unknown as ISdk; - - await handleApprovalStateWrite(iii, matchingEvent); - - expect(triggers).toHaveLength(1); - expect(triggers[0]?.function_id).toBe('turn::function_awaiting_approval'); - expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); - expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE })); - }); - - it('no-ops on a non-matching event', async () => { - const iii = { trigger: vi.fn() } as unknown as ISdk; - await handleApprovalStateWrite(iii, { ...matchingEvent, new_value: { reason: 'x' } }); - expect(iii.trigger).not.toHaveBeenCalled(); - }); -}); diff --git a/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts index 4e7a0653..99ab972e 100644 --- a/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts +++ b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts @@ -2,13 +2,11 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; import * as events from '../../src/turn-orchestrator/events.js'; import { installMockTurnStore } from './_helpers/mockTurnStore.js'; -import { - applyDecisionToPrepared, - denialResultFromDecision, -} from '../../src/turn-orchestrator/function-awaiting-approval/run.js'; +import { applyDecisionToPrepared } from '../../src/turn-orchestrator/function-awaiting-approval/run.js'; import { handleAwaitingApproval } from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; import { enterFunctionExecute } from '../../src/turn-orchestrator/function-execute/run.js'; import type { FunctionBatchWork } from '../../src/turn-orchestrator/function-execute/types.js'; +import type { FunctionResolution } from '../../src/turn-orchestrator/function-resolve.js'; import { newRecord, type TurnStateRecord } from '../../src/turn-orchestrator/state.js'; import type { AssistantMessage } from '../../src/types/agent-message.js'; @@ -53,12 +51,36 @@ function seedFunctionAwaitingApproval( rec.state = 'function_awaiting_approval'; } -function makeIii(approvalStore: Map): ISdk { +function executeResolution(turn_id = 't_1'): FunctionResolution { + return { turn_id, action: 'execute', resolved_at: 1 }; +} + +function deliverResolution( + overrides?: Partial>, +): FunctionResolution { + return { + turn_id: 't_1', + action: 'deliver', + is_error: true, + content: [{ type: 'text', text: 'Permission denied by user: policy' }], + details: { status: 'denied', denied_by: 'user', reason: 'policy' }, + resolved_at: 1, + ...overrides, + }; +} + +/** Mock bus: `function_resolutions` rows live in `resolutionStore`, keyed `/`. */ +function makeIii(resolutionStore: Map): ISdk { return { trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { if (req.function_id === 'state::get') { const p = req.payload as { scope: string; key: string }; - return approvalStore.get(`${p.scope}/${p.key}`) ?? null; + return resolutionStore.get(`${p.scope}/${p.key}`) ?? null; + } + if (req.function_id === 'state::delete') { + const p = req.payload as { scope: string; key: string }; + resolutionStore.delete(`${p.scope}/${p.key}`); + return {}; } if (req.function_id === 'state::update') return { old_value: 0 }; if (req.function_id === 'stream::set') return null; @@ -80,33 +102,44 @@ describe('applyDecisionToPrepared', () => { call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, }; - it('maps allow to pre_approved', () => { - expect(applyDecisionToPrepared(dispatchCall, { decision: 'allow', reason: null })).toEqual({ + it('maps execute to pre_approved', () => { + expect(applyDecisionToPrepared(dispatchCall, executeResolution())).toEqual({ route: 'pre_approved', call: dispatchCall.call, }); }); - it('maps deny to synthetic denial result', () => { - const resolved = applyDecisionToPrepared(dispatchCall, { decision: 'deny', reason: 'policy' }); - expect(resolved.route).toBe('synthetic'); - expect(resolved).toMatchObject({ - result: denialResultFromDecision({ decision: 'deny', reason: 'policy' }), + it('maps deliver to a synthetic result carrying the delivered content verbatim', () => { + const resolution = deliverResolution(); + const resolved = applyDecisionToPrepared(dispatchCall, resolution); + expect(resolved).toEqual({ + route: 'synthetic', + call: dispatchCall.call, + result: { + content: [{ type: 'text', text: 'Permission denied by user: policy' }], + details: { status: 'denied', denied_by: 'user', reason: 'policy' }, + terminate: false, + }, + is_error: true, }); }); - it('a denial resumes the loop; an abort terminates the turn', () => { - expect(denialResultFromDecision({ decision: 'deny', reason: null }).terminate).toBe(false); - expect(denialResultFromDecision({ decision: 'aborted', reason: null }).terminate).toBe(true); + it('deliver carries is_error through (false stays false)', () => { + const resolved = applyDecisionToPrepared( + dispatchCall, + deliverResolution({ is_error: false, details: undefined }), + ); + expect(resolved).toMatchObject({ route: 'synthetic', is_error: false }); + expect((resolved as { result: { details: unknown } }).result.details).toEqual({}); }); }); describe('handleAwaitingApproval', () => { - it('executes allow decision and finalizes when batch completes', async () => { - const approvalStore = new Map(); - approvalStore.set('approvals/s1/fc-1', { decision: 'allow', reason: null }); - const iii = makeIii(approvalStore); + it('executes a released call and finalizes when the batch completes', async () => { + const resolutionStore = new Map(); const rec = newRecord('s1'); + resolutionStore.set('function_resolutions/s1/fc-1', executeResolution(rec.turn_id)); + const iii = makeIii(resolutionStore); const fc = { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }; seedFunctionAwaitingApproval( rec, @@ -126,13 +159,61 @@ describe('handleAwaitingApproval', () => { expect(rec.state).toBe('assistant_streaming'); expect(rec.work).toBeUndefined(); expect(rec.function_results).toEqual([]); + // Consumed resolution row is deleted. + expect(resolutionStore.has('function_resolutions/s1/fc-1')).toBe(false); const turnEnd = emitSpy.mock.calls.find((call) => call[2]?.type === 'turn_end')?.[2] as | { function_results: unknown[] } | undefined; expect(turnEnd?.function_results).toHaveLength(1); }); - it('leaves state parked when awaiting entries remain undecided', async () => { + it('crash between checkpoint and row delete: the replay wake cleans up without re-executing', async () => { + // Simulates: a prior wake executed fc-1 and checkpointed, then crashed + // before deleting the resolution row and before the awaiting/route save. + const resolutionStore = new Map(); + const rec = newRecord('s1'); + resolutionStore.set('function_resolutions/s1/fc-1', executeResolution(rec.turn_id)); + const iii = makeIii(resolutionStore); + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + seedFunctionAwaitingApproval( + rec, + { + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {} }, + is_error: false, + duration_ms: 1, + }, + }, + }, + [{ function_call_id: 'fc-1', function_id: 'shell::run' }], + ); + installMockTurnStore({ + loadMessages: vi.fn(async () => []), + appendMessages: vi.fn(async () => {}), + }); + const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleAwaitingApproval(iii, rec); + + // No second execution: the target was never dispatched again. + const trigger = iii.trigger as unknown as ReturnType; + const shellCalls = trigger.mock.calls.filter( + (call: [{ function_id: string }]) => call[0].function_id === 'shell::run', + ); + expect(shellCalls).toHaveLength(0); + expect( + emitSpy.mock.calls.filter((call) => call[2]?.type === 'function_execution_end'), + ).toHaveLength(0); + // The orphaned row is collected and the batch finalizes. + expect(resolutionStore.has('function_resolutions/s1/fc-1')).toBe(false); + expect(rec.awaiting_approval).toEqual([]); + expect(rec.state).toBe('assistant_streaming'); + }); + + it('leaves state parked when awaiting entries remain unresolved', async () => { const iii = makeIii(new Map()); const rec = newRecord('s1'); const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; @@ -149,11 +230,11 @@ describe('handleAwaitingApproval', () => { expect(rec.awaiting_approval).toHaveLength(1); }); - it('returns to function_execute when approvals done but batch incomplete', async () => { - const approvalStore = new Map(); - approvalStore.set('approvals/s1/fc-2', { decision: 'deny', reason: null }); - const iii = makeIii(approvalStore); + it('returns to function_execute when resolutions are drained but the batch is incomplete', async () => { + const resolutionStore = new Map(); const rec = newRecord('s1'); + resolutionStore.set('function_resolutions/s1/fc-2', deliverResolution()); + const iii = makeIii(resolutionStore); const fc1 = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; const fc2 = { id: 'fc-2', function_id: 'shell::run', arguments: {} }; const fc3 = { id: 'fc-3', function_id: 'shell::run', arguments: {} }; @@ -184,6 +265,8 @@ describe('handleAwaitingApproval', () => { expect(rec.state).toBe('function_execute'); expect(rec.awaiting_approval).toEqual([]); expect(rec.work?.executed['fc-2']).toBeDefined(); + expect(rec.work?.executed['fc-2']?.is_error).toBe(true); expect(rec.work?.executed['fc-3']).toBeUndefined(); + expect(resolutionStore.has('function_resolutions/s1/fc-2')).toBe(false); }); }); diff --git a/harness/tests/turn-orchestrator/function-resolve.test.ts b/harness/tests/turn-orchestrator/function-resolve.test.ts new file mode 100644 index 00000000..65ed8af2 --- /dev/null +++ b/harness/tests/turn-orchestrator/function-resolve.test.ts @@ -0,0 +1,239 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import { handleFunctionResolve } from '../../src/turn-orchestrator/function-resolve.js'; +import { + TURN_STATE_SCOPE, + newRecord, + type TurnStateRecord, +} from '../../src/turn-orchestrator/state.js'; + +type Wake = { function_id: string; session_id: string }; + +function fixture(): { + iii: ISdk; + stateStore: Map; + wakes: Wake[]; + seedParked(session_id: string, callIds: string[]): TurnStateRecord; +} { + const stateStore = new Map(); + const wakes: Wake[] = []; + const iii = { + trigger: vi.fn( + async ({ + function_id, + payload, + action, + }: { + function_id: string; + payload: unknown; + action?: unknown; + }) => { + if (function_id === 'state::get') { + const p = payload as { scope: string; key: string }; + const v = stateStore.get(`${p.scope}/${p.key}`); + return v === undefined ? null : structuredClone(v); + } + if (function_id === 'state::set') { + const p = payload as { scope: string; key: string; value: unknown }; + const key = `${p.scope}/${p.key}`; + const old_value = stateStore.has(key) ? structuredClone(stateStore.get(key)) : null; + stateStore.set(key, structuredClone(p.value)); + return { old_value, new_value: p.value }; + } + if (function_id === 'state::delete') { + const p = payload as { scope: string; key: string }; + stateStore.delete(`${p.scope}/${p.key}`); + return {}; + } + if (function_id.startsWith('turn::') && action != null) { + wakes.push({ function_id, session_id: (payload as { session_id: string }).session_id }); + return null; + } + return null; + }, + ), + } as unknown as ISdk; + + function seedParked(session_id: string, callIds: string[]): TurnStateRecord { + const rec = newRecord(session_id); + rec.state = 'function_awaiting_approval'; + rec.last_assistant = null; + rec.work = { + prepared: callIds.map((id) => ({ + route: 'dispatch' as const, + call: { id, function_id: 'shell::run', arguments: {} }, + })), + executed: {}, + }; + rec.awaiting_approval = callIds.map((id) => ({ + function_call_id: id, + function_id: 'shell::run', + args: {}, + })); + stateStore.set(`${TURN_STATE_SCOPE}/${session_id}`, structuredClone(rec)); + return rec; + } + + return { iii, stateStore, wakes, seedParked }; +} + +describe('harness::function::resolve', () => { + it('execute persists the resolution row and wakes the parked turn', async () => { + const f = fixture(); + const rec = f.seedParked('s1', ['fc-1']); + + const out = await handleFunctionResolve(f.iii, { + session_id: 's1', + turn_id: rec.turn_id, + function_call_id: 'fc-1', + action: 'execute', + }); + + expect(out).toEqual({ resolved: true, turn_resumed: true }); + expect(f.stateStore.get('function_resolutions/s1/fc-1')).toMatchObject({ + turn_id: rec.turn_id, + action: 'execute', + }); + expect(f.wakes).toEqual([ + { function_id: 'turn::function_awaiting_approval', session_id: 's1' }, + ]); + }); + + it('deliver persists content/details/is_error verbatim', async () => { + const f = fixture(); + const rec = f.seedParked('s1', ['fc-1']); + + const out = await handleFunctionResolve(f.iii, { + session_id: 's1', + turn_id: rec.turn_id, + function_call_id: 'fc-1', + action: 'deliver', + is_error: true, + content: [{ type: 'text', text: 'too risky' }], + details: { status: 'denied', denied_by: 'user', reason: 'too risky' }, + }); + + expect(out.resolved).toBe(true); + expect(f.stateStore.get('function_resolutions/s1/fc-1')).toMatchObject({ + action: 'deliver', + is_error: true, + content: [{ type: 'text', text: 'too risky' }], + details: { denied_by: 'user' }, + }); + }); + + it('deliver without content is rejected', async () => { + const f = fixture(); + const rec = f.seedParked('s1', ['fc-1']); + + await expect( + handleFunctionResolve(f.iii, { + session_id: 's1', + turn_id: rec.turn_id, + function_call_id: 'fc-1', + action: 'deliver', + is_error: true, + }), + ).rejects.toThrow(/content is required/); + }); + + it('rejects "/" in ids at the boundary', async () => { + const f = fixture(); + await expect( + handleFunctionResolve(f.iii, { + session_id: 's/1', + turn_id: 't_1', + function_call_id: 'fc-1', + action: 'execute', + }), + ).rejects.toThrow(/must not contain/); + }); + + it.each([ + ['unknown session', 's-missing', 't_any', 'fc-1'], + ['unknown call', 's1', undefined, 'fc-unknown'], + ] as const)('%s answers {resolved: false}', async (_label, session_id, turn_id, call_id) => { + const f = fixture(); + const rec = f.seedParked('s1', ['fc-1']); + + const out = await handleFunctionResolve(f.iii, { + session_id, + turn_id: turn_id ?? rec.turn_id, + function_call_id: call_id, + action: 'execute', + }); + expect(out).toEqual({ resolved: false }); + expect(f.wakes).toEqual([]); + }); + + it('a stale turn_id answers {resolved: false}', async () => { + const f = fixture(); + f.seedParked('s1', ['fc-1']); + + const out = await handleFunctionResolve(f.iii, { + session_id: 's1', + turn_id: 't_previous_run', + function_call_id: 'fc-1', + action: 'execute', + }); + expect(out).toEqual({ resolved: false }); + }); + + it('a record outside the function-batch states answers {resolved: false}', async () => { + const f = fixture(); + const rec = f.seedParked('s1', ['fc-1']); + rec.state = 'assistant_streaming'; + f.stateStore.set(`${TURN_STATE_SCOPE}/s1`, structuredClone(rec)); + + const out = await handleFunctionResolve(f.iii, { + session_id: 's1', + turn_id: rec.turn_id, + function_call_id: 'fc-1', + action: 'execute', + }); + expect(out).toEqual({ resolved: false }); + }); + + it('duplicate resolves keep the first decision and re-enqueue the wake', async () => { + const f = fixture(); + const rec = f.seedParked('s1', ['fc-1']); + + await handleFunctionResolve(f.iii, { + session_id: 's1', + turn_id: rec.turn_id, + function_call_id: 'fc-1', + action: 'execute', + }); + const out = await handleFunctionResolve(f.iii, { + session_id: 's1', + turn_id: rec.turn_id, + function_call_id: 'fc-1', + action: 'deliver', + is_error: true, + content: [{ type: 'text', text: 'late deny' }], + }); + + expect(out).toEqual({ resolved: true, turn_resumed: true }); + // First decision wins — the deliver did not overwrite the execute. + expect(f.stateStore.get('function_resolutions/s1/fc-1')).toMatchObject({ action: 'execute' }); + expect(f.wakes).toHaveLength(2); + }); + + it('a state read failure surfaces as an error, never {resolved: false}', async () => { + const f = fixture(); + const trigger = f.iii.trigger as unknown as ReturnType; + trigger.mockImplementation(async ({ function_id }: { function_id: string }) => { + if (function_id === 'state::get') throw new Error('state down'); + return null; + }); + + await expect( + handleFunctionResolve(f.iii, { + session_id: 's1', + turn_id: 't_1', + function_call_id: 'fc-1', + action: 'execute', + }), + ).rejects.toThrow('state down'); + }); +}); diff --git a/harness/tests/turn-orchestrator/functions.test.ts b/harness/tests/turn-orchestrator/functions.test.ts index 2ab8a90b..25311c52 100644 --- a/harness/tests/turn-orchestrator/functions.test.ts +++ b/harness/tests/turn-orchestrator/functions.test.ts @@ -1,13 +1,12 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; import * as events from '../../src/turn-orchestrator/events.js'; -import * as hookModule from '../../src/turn-orchestrator/hook.js'; +import * as chainModule from '../../src/turn-orchestrator/hooks/chain.js'; import { FakeSessionManager } from '../_helpers/fakeSessionManager.js'; import { installMockTurnStore } from './_helpers/mockTurnStore.js'; import type { TurnStateRecord } from '../../src/turn-orchestrator/state.js'; import { newRecord } from '../../src/turn-orchestrator/state.js'; import * as agentTriggerModule from '../../src/turn-orchestrator/agent-trigger.js'; -import { parseApprovalDecision } from '../../src/turn-orchestrator/function-awaiting-approval/ports.js'; import { handleExecute } from '../../src/turn-orchestrator/function-execute/process.js'; import { enterFunctionExecute } from '../../src/turn-orchestrator/function-execute/run.js'; import type { FunctionBatchWork } from '../../src/turn-orchestrator/function-execute/types.js'; @@ -46,39 +45,6 @@ function makeAssistant( }; } -describe('parseApprovalDecision', () => { - it('accepts allow/deny/aborted with nullable reason (stored approval shape)', () => { - expect(parseApprovalDecision({ decision: 'allow', reason: null })).toEqual({ - decision: 'allow', - reason: null, - }); - expect(parseApprovalDecision({ decision: 'deny', reason: 'policy' })).toEqual({ - decision: 'deny', - reason: 'policy', - }); - expect(parseApprovalDecision({ decision: 'aborted', reason: 'session_aborted' })).toEqual({ - decision: 'aborted', - reason: 'session_aborted', - }); - }); - - it('rejects speculative wrapper envelopes no caller stores', () => { - expect(parseApprovalDecision({ data: { decision: 'allow', reason: null } })).toBeNull(); - expect(parseApprovalDecision({ payload: { decision: 'allow', reason: null } })).toBeNull(); - }); - - it.each([ - ['null', null], - ['undefined', undefined], - ['missing decision', { reason: null }], - ['empty decision', { decision: '', reason: null }], - ['unknown decision', { decision: 'needs_approval', reason: null }], - ['numeric reason', { decision: 'allow', reason: 7 }], - ] as const)('rejects bad shape: %s', (_label, value) => { - expect(parseApprovalDecision(value)).toBeNull(); - }); -}); - /** Seed required function-batch invariants before handleExecute. */ function seedFunctionExecute( rec: TurnStateRecord, @@ -200,7 +166,7 @@ describe('handleExecute new flow', () => { expect(fc1Ends).toHaveLength(0); }); - it('skips consultBefore on pre_approved entries and uses triggerFunctionCall', async () => { + it('skips the hook chain on pre_approved entries and uses triggerFunctionCall', async () => { const triggerSpy = vi.fn().mockResolvedValue({ ok: true }); const iii = { trigger: triggerSpy } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); @@ -213,12 +179,12 @@ describe('handleExecute new flow', () => { ], executed: {}, }); - const consultBeforeSpy = vi.spyOn(hookModule, 'consultBefore'); + const consultSpy = vi.spyOn(chainModule, 'consultPreDispatch'); mockFinalizePersistence(); await handleExecute(iii, rec); - expect(consultBeforeSpy).not.toHaveBeenCalled(); + expect(consultSpy).not.toHaveBeenCalled(); const triggerCalls = triggerSpy.mock.calls.map( (call) => (call[0] as { function_id: string }).function_id, ); diff --git a/harness/tests/turn-orchestrator/hook.test.ts b/harness/tests/turn-orchestrator/hook.test.ts deleted file mode 100644 index c02052ca..00000000 --- a/harness/tests/turn-orchestrator/hook.test.ts +++ /dev/null @@ -1,233 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import type { ApprovalSettings } from '../../src/approval-gate/schemas.js'; -import type { - CheckPermissionsPayload, - PolicyCheckReply, -} from '../../src/harness/policy/check-permissions.js'; -import type { ISdk } from '../../src/runtime/iii.js'; -import { consultBefore } from '../../src/turn-orchestrator/hook.js'; - -function fakeIii( - triggerImpl: (req: { - function_id: string; - payload: CheckPermissionsPayload; - }) => PolicyCheckReply | Promise, -) { - return { - trigger: vi.fn(async (req: { function_id: string; payload: CheckPermissionsPayload }) => - triggerImpl(req), - ), - } as unknown as ISdk; -} - -/** Build an ApprovalSettings with sane defaults; override per test. */ -function mkSettings(overrides: Partial = {}): ApprovalSettings { - return { - mode: 'manual', - always_allow: [], - approved_always: [], - mode_set_at: 1, - ...overrides, - }; -} - -function allowEntry(function_id: string) { - return { function_id, granted_at: 1, granted_by: 'user_click' as const }; -} - -/** Build an iii fake that returns approval_settings for `state::get` calls and routes policy checks through the supplied impl. */ -function modedIii( - settings: ApprovalSettings | null, - policyImpl?: () => PolicyCheckReply | Promise, -) { - return { - trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { - if (req.function_id === 'state::get') { - return settings; - } - if (req.function_id === 'policy::check_permissions') { - return policyImpl ? await policyImpl() : { decision: 'needs_approval' }; - } - throw new Error(`unexpected trigger ${req.function_id}`); - }), - } as unknown as ISdk; -} - -describe('consultBefore (direct policy call)', () => { - const fc = { id: 'fc-1', function_id: 'shell::fs::write', arguments: { path: '/tmp/x' } }; - - it('returns allow when policy decides allow', async () => { - const iii = fakeIii(({ function_id }) => { - expect(function_id).toBe('policy::check_permissions'); - return { decision: 'allow', rule_id: 'allow-tmp' }; - }); - const outcome = await consultBefore(iii, fc); - expect(outcome.kind).toBe('allow'); - }); - - it('returns deny with a permissions envelope when policy decides deny', async () => { - const iii = fakeIii(() => ({ - decision: 'deny', - rule_id: 'deny-rm-rf', - rule_action: 'deny', - matched_constraint: { field: 'path', operator: 'matches', value: '^/$' }, - })); - const outcome = await consultBefore(iii, fc); - expect(outcome.kind).toBe('deny'); - if (outcome.kind !== 'deny') return; - expect(outcome.denial.denied_by).toBe('permissions'); - expect(outcome.denial.rule_id).toBe('deny-rm-rf'); - }); - - it('returns pending when policy says needs_approval', async () => { - const iii = fakeIii(() => ({ decision: 'needs_approval' })); - const outcome = await consultBefore(iii, fc); - expect(outcome.kind).toBe('pending'); - }); - - it('fails closed (deny gate_unavailable) when policy is unreachable', async () => { - const iii = fakeIii(() => { - throw new Error('policy worker down'); - }); - const outcome = await consultBefore(iii, fc); - expect(outcome.kind).toBe('deny'); - if (outcome.kind !== 'deny') return; - expect(outcome.denial.denied_by).toBe('gate_unavailable'); - }); - - it('does NOT call hook-fanout::publish_collect', async () => { - const trigger = vi.fn( - async () => ({ decision: 'allow', rule_id: 'r' }) satisfies PolicyCheckReply, - ); - const iii = { trigger } as unknown as ISdk; - await consultBefore(iii, fc); - expect(trigger).toHaveBeenCalledTimes(1); - expect(trigger.mock.calls[0][0].function_id).toBe('policy::check_permissions'); - }); -}); - -describe('consultBefore (mode + always_allow)', () => { - const fcWithSession = { - id: 'fc-1', - function_id: 'shell::fs::write', - arguments: { path: '/tmp/x', session_id: 'sess-abc' }, - }; - const fcSafeWithSession = { - id: 'fc-2', - function_id: 'shell::fs::read', - arguments: { path: '/tmp/x', session_id: 'sess-abc' }, - }; - - it('full mode short-circuits to allow without calling policy', async () => { - const iii = modedIii(mkSettings({ mode: 'full' })); - const outcome = await consultBefore(iii, fcWithSession); - expect(outcome.kind).toBe('allow'); - const calls = (iii.trigger as ReturnType).mock.calls; - expect(calls.map((c) => c[0].function_id)).not.toContain('policy::check_permissions'); - }); - - it('auto mode + allowlist hit short-circuits to allow even when policy would deny', async () => { - const iii = modedIii( - mkSettings({ - mode: 'auto', - always_allow: [allowEntry('shell::fs::write')], - }), - () => ({ - decision: 'deny', - rule_id: 'would-deny', - rule_action: 'deny', - }), - ); - const outcome = await consultBefore(iii, fcWithSession); - expect(outcome.kind).toBe('allow'); - const calls = (iii.trigger as ReturnType).mock.calls; - expect(calls.map((c) => c[0].function_id)).not.toContain('policy::check_permissions'); - }); - - it('manual mode treats the allowlist as dormant (does NOT short-circuit)', async () => { - const iii = modedIii( - mkSettings({ - mode: 'manual', - always_allow: [allowEntry('shell::fs::write')], - }), - () => ({ decision: 'needs_approval' }), - ); - const outcome = await consultBefore(iii, fcWithSession); - expect(outcome.kind).toBe('pending'); - const calls = (iii.trigger as ReturnType).mock.calls; - expect(calls.map((c) => c[0].function_id)).toContain('policy::check_permissions'); - }); - - it('auto mode falls through to policy when function id is not on the allowlist', async () => { - const iii = modedIii(mkSettings({ mode: 'auto' }), () => ({ - decision: 'needs_approval', - })); - const outcome = await consultBefore(iii, fcSafeWithSession); - expect(outcome.kind).toBe('pending'); - }); - - it('approved_always short-circuits in MANUAL mode (per-session grant honored everywhere)', async () => { - const iii = modedIii( - mkSettings({ - mode: 'manual', - approved_always: [allowEntry('shell::fs::write')], - }), - () => ({ decision: 'needs_approval' }), - ); - const outcome = await consultBefore(iii, fcWithSession); - expect(outcome.kind).toBe('allow'); - const calls = (iii.trigger as ReturnType).mock.calls; - expect(calls.map((c) => c[0].function_id)).not.toContain('policy::check_permissions'); - }); - - it('approved_always short-circuits in AUTO mode too', async () => { - const iii = modedIii( - mkSettings({ - mode: 'auto', - approved_always: [allowEntry('shell::fs::write')], - }), - () => ({ decision: 'deny', rule_id: 'would-deny', rule_action: 'deny' }), - ); - const outcome = await consultBefore(iii, fcWithSession); - expect(outcome.kind).toBe('allow'); - }); - - it('approved_always does not affect a different function id', async () => { - const iii = modedIii( - mkSettings({ - mode: 'manual', - approved_always: [allowEntry('shell::fs::read')], - }), - () => ({ decision: 'needs_approval' }), - ); - const outcome = await consultBefore(iii, fcWithSession); - expect(outcome.kind).toBe('pending'); - }); - - it('denies human-only approval functions (self-escalation defense)', async () => { - const iii = modedIii(null); - const outcome = await consultBefore(iii, { - id: 'fc-escalate', - function_id: 'approval::set_mode', - arguments: { session_id: 'sess-abc', mode: 'full' }, - }); - expect(outcome.kind).toBe('deny'); - if (outcome.kind !== 'deny') return; - expect(outcome.denial.denied_by).toBe('permissions'); - expect(outcome.denial.rule_id).toBe('human_only_function'); - // No state::get and no policy::check_permissions — denial is upfront. - const calls = (iii.trigger as ReturnType).mock.calls; - expect(calls.length).toBe(0); - }); - - it('falls back to policy when settings are absent (legacy session)', async () => { - const iii = modedIii(null, () => ({ - decision: 'allow', - rule_id: 'r', - })); - const outcome = await consultBefore(iii, fcWithSession); - expect(outcome.kind).toBe('allow'); - const calls = (iii.trigger as ReturnType).mock.calls; - expect(calls.map((c) => c[0].function_id)).toContain('policy::check_permissions'); - }); -}); diff --git a/harness/tests/turn-orchestrator/hooks/chain.test.ts b/harness/tests/turn-orchestrator/hooks/chain.test.ts new file mode 100644 index 00000000..9ddc68b4 --- /dev/null +++ b/harness/tests/turn-orchestrator/hooks/chain.test.ts @@ -0,0 +1,163 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../../src/runtime/iii.js'; +import { consultPreDispatch } from '../../../src/turn-orchestrator/hooks/chain.js'; +import { + addPreDispatchBindingForTests, + resetPreDispatchBindingsForTests, +} from '../../../src/turn-orchestrator/hooks/registry.js'; +import type { BindingConfig, HookInput } from '../../../src/turn-orchestrator/hooks/types.js'; + +function input(function_id = 'shell::run'): HookInput { + return { + point: 'pre_dispatch', + session_id: 's1', + turn_id: 't_1', + step: 1, + depth: 0, + call: { id: 'fc-1', function_id, arguments: { cmd: 'ls' } }, + }; +} + +function bind(function_id: string, config?: Partial): void { + addPreDispatchBindingForTests({ + id: `b-${function_id}`, + function_id, + config: { timeout_ms: 5_000, on_error: 'fail_closed', ...config }, + }); +} + +function iiiAnswering(answers: Record unknown)>): ISdk { + return { + trigger: vi.fn(async ({ function_id }: { function_id: string }) => { + const a = answers[function_id]; + if (a === undefined) throw new Error(`unexpected trigger ${function_id}`); + return typeof a === 'function' ? (a as () => unknown)() : a; + }), + } as unknown as ISdk; +} + +beforeEach(() => { + resetPreDispatchBindingsForTests(); +}); + +describe('consultPreDispatch', () => { + it('allows when no bindings match (hooks narrow, never widen)', async () => { + const iii = { trigger: vi.fn() } as unknown as ISdk; + expect(await consultPreDispatch(iii, input())).toEqual({ kind: 'allow' }); + expect(iii.trigger).not.toHaveBeenCalled(); + }); + + it('skips bindings whose functions globs do not match', async () => { + bind('approval::gate', { functions: ['web::*'] }); + const iii = { trigger: vi.fn() } as unknown as ISdk; + expect(await consultPreDispatch(iii, input('shell::run'))).toEqual({ kind: 'allow' }); + expect(iii.trigger).not.toHaveBeenCalled(); + }); + + it('continue from every hook resolves to allow, passing the HookInput verbatim', async () => { + bind('approval::gate'); + const iii = iiiAnswering({ 'approval::gate': { decision: 'continue' } }); + + expect(await consultPreDispatch(iii, input())).toEqual({ kind: 'allow' }); + expect(iii.trigger).toHaveBeenCalledWith({ + function_id: 'approval::gate', + payload: input(), + timeoutMs: 5_000, + }); + }); + + it('deny short-circuits with a denied envelope carrying the hook reason', async () => { + bind('approval::gate'); + bind('zz::never-reached'); + const iii = iiiAnswering({ + 'approval::gate': { decision: 'deny', reason: 'human_only_function' }, + }); + + const out = await consultPreDispatch(iii, input()); + expect(out).toEqual({ + kind: 'deny', + denial: { + schema_version: 1, + status: 'denied', + denied_by: 'hook', + function_id: 'shell::run', + reason: 'human_only_function', + }, + }); + expect(iii.trigger).toHaveBeenCalledTimes(1); + }); + + it('hold short-circuits with the holding hook id and the timeout', async () => { + bind('approval::gate'); + const iii = iiiAnswering({ + 'approval::gate': { decision: 'hold', pending_timeout_ms: 1_800_000 }, + }); + + expect(await consultPreDispatch(iii, input())).toEqual({ + kind: 'hold', + held_by: 'approval::gate', + pending_timeout_ms: 1_800_000, + }); + }); + + it('transport failure fails closed with gate_unavailable', async () => { + bind('approval::gate'); + const iii = iiiAnswering({ + 'approval::gate': () => { + throw new Error('connection refused'); + }, + }); + + const out = await consultPreDispatch(iii, input()); + expect(out.kind).toBe('deny'); + if (out.kind !== 'deny') return; + expect(out.denial.denied_by).toBe('gate_unavailable'); + expect(out.denial.reason).toContain('approval::gate unavailable'); + expect(out.denial.reason).toContain('connection refused'); + }); + + it('an unparseable hook reply fails closed', async () => { + bind('approval::gate'); + const iii = iiiAnswering({ 'approval::gate': 'what even is this' }); + + const out = await consultPreDispatch(iii, input()); + expect(out.kind).toBe('deny'); + if (out.kind !== 'deny') return; + expect(out.denial.denied_by).toBe('gate_unavailable'); + }); + + it('fail_open skips a broken hook and continues the chain', async () => { + bind('alpha::broken', { on_error: 'fail_open' }); + bind('beta::gate'); + const iii = iiiAnswering({ + 'alpha::broken': () => { + throw new Error('down'); + }, + 'beta::gate': { decision: 'hold', pending_timeout_ms: 60_000 }, + }); + + expect(await consultPreDispatch(iii, input())).toEqual({ + kind: 'hold', + held_by: 'beta::gate', + pending_timeout_ms: 60_000, + }); + }); + + it('runs hooks in priority order and stops at the first non-continue', async () => { + bind('beta::second', { priority: 1 }); + bind('alpha::first', { priority: 0 }); + const order: string[] = []; + const iii = { + trigger: vi.fn(async ({ function_id }: { function_id: string }) => { + order.push(function_id); + return function_id === 'alpha::first' + ? { decision: 'continue' } + : { decision: 'deny', reason: 'no' }; + }), + } as unknown as ISdk; + + const out = await consultPreDispatch(iii, input()); + expect(order).toEqual(['alpha::first', 'beta::second']); + expect(out.kind).toBe('deny'); + }); +}); diff --git a/harness/tests/turn-orchestrator/hooks/registry.test.ts b/harness/tests/turn-orchestrator/hooks/registry.test.ts new file mode 100644 index 00000000..505309a4 --- /dev/null +++ b/harness/tests/turn-orchestrator/hooks/registry.test.ts @@ -0,0 +1,112 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { ISdk, TriggerConfig, TriggerHandler } from '../../../src/runtime/iii.js'; +import { + addPreDispatchBindingForTests, + registerPreDispatchTriggerType, + resetPreDispatchBindingsForTests, + snapshotBindings, +} from '../../../src/turn-orchestrator/hooks/registry.js'; +import { PRE_DISPATCH_TRIGGER_TYPE } from '../../../src/turn-orchestrator/hooks/types.js'; + +type CapturedHandler = TriggerHandler; + +function captureHandler(): { iii: ISdk; handler: () => CapturedHandler } { + let handler: CapturedHandler | undefined; + const iii = { + registerTriggerType: vi.fn((input: { id: string }, h: CapturedHandler) => { + expect(input.id).toBe(PRE_DISPATCH_TRIGGER_TYPE); + handler = h; + return {}; + }), + } as unknown as ISdk; + return { + iii, + handler: () => { + if (!handler) throw new Error('trigger type not registered'); + return handler; + }, + }; +} + +function bindingConfig(id: string, function_id: string, config: unknown): TriggerConfig { + return { id, function_id, config }; +} + +beforeEach(() => { + resetPreDispatchBindingsForTests(); +}); + +describe('pre_dispatch trigger type registry', () => { + it('registers and unregisters bindings through the engine handler', async () => { + const { iii, handler } = captureHandler(); + registerPreDispatchTriggerType(iii); + + await handler().registerTrigger(bindingConfig('b1', 'approval::gate', { functions: ['*'] })); + expect(snapshotBindings('shell::run')).toHaveLength(1); + + await handler().unregisterTrigger(bindingConfig('b1', 'approval::gate', {})); + expect(snapshotBindings('shell::run')).toHaveLength(0); + }); + + it('rejects a malformed binding config at registration (strict schema)', async () => { + const { iii, handler } = captureHandler(); + registerPreDispatchTriggerType(iii); + + await expect( + handler().registerTrigger(bindingConfig('b1', 'approval::gate', { functoins: ['*'] })), + ).rejects.toThrow(); + expect(snapshotBindings('shell::run')).toHaveLength(0); + }); + + it('treats a null config as match-all with defaults', async () => { + const { iii, handler } = captureHandler(); + registerPreDispatchTriggerType(iii); + + await handler().registerTrigger(bindingConfig('b1', 'approval::gate', null)); + const [binding] = snapshotBindings('anything::at_all'); + expect(binding).toBeDefined(); + expect(binding?.config.timeout_ms).toBe(5_000); + expect(binding?.config.on_error).toBe('fail_closed'); + }); + + it('narrows by functions globs', () => { + addPreDispatchBindingForTests({ + id: 'b1', + function_id: 'approval::gate', + config: { + functions: ['shell::*', 'harness::spawn'], + timeout_ms: 5_000, + on_error: 'fail_closed', + }, + }); + + expect(snapshotBindings('shell::run')).toHaveLength(1); + expect(snapshotBindings('shell::fs::read')).toHaveLength(1); + expect(snapshotBindings('harness::spawn')).toHaveLength(1); + expect(snapshotBindings('web::fetch')).toHaveLength(0); + }); + + it('orders the chain by priority, ties by function_id', () => { + addPreDispatchBindingForTests({ + id: 'b-late', + function_id: 'zeta::hook', + config: { priority: 10, timeout_ms: 5_000, on_error: 'fail_closed' }, + }); + addPreDispatchBindingForTests({ + id: 'b-tie-2', + function_id: 'beta::hook', + config: { timeout_ms: 5_000, on_error: 'fail_closed' }, + }); + addPreDispatchBindingForTests({ + id: 'b-tie-1', + function_id: 'alpha::hook', + config: { timeout_ms: 5_000, on_error: 'fail_closed' }, + }); + + expect(snapshotBindings('shell::run').map((b) => b.function_id)).toEqual([ + 'alpha::hook', + 'beta::hook', + 'zeta::hook', + ]); + }); +}); diff --git a/harness/tests/turn-orchestrator/store.test.ts b/harness/tests/turn-orchestrator/store.test.ts index 00b3b2a6..94ba5a90 100644 --- a/harness/tests/turn-orchestrator/store.test.ts +++ b/harness/tests/turn-orchestrator/store.test.ts @@ -183,8 +183,11 @@ describe('shouldWakeStep', () => { expect(shouldWakeStep('function_execute', 'stopped')).toBe(false); }); - it('rejects function_awaiting_approval (orchestrator parks here)', () => { - expect(shouldWakeStep('function_execute', 'function_awaiting_approval')).toBe(false); + it('wakes once on entry into function_awaiting_approval (post-persist resolution scan)', () => { + expect(shouldWakeStep('function_execute', 'function_awaiting_approval')).toBe(true); + // Mid-park re-saves stay silent; further wakes come from + // harness::function::resolve. + expect(shouldWakeStep('function_awaiting_approval', 'function_awaiting_approval')).toBe(false); }); it('rejects same-state writes', () => { diff --git a/harness/tests/turn-orchestrator/turn-completed.test.ts b/harness/tests/turn-orchestrator/turn-completed.test.ts new file mode 100644 index 00000000..5592b14e --- /dev/null +++ b/harness/tests/turn-orchestrator/turn-completed.test.ts @@ -0,0 +1,183 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { ISdk, TriggerConfig, TriggerHandler } from '../../src/runtime/iii.js'; +import { createTurnStore } from '../../src/turn-orchestrator/state-runtime/store.js'; +import { newRecord, type TurnStateRecord } from '../../src/turn-orchestrator/state.js'; +import { + TURN_COMPLETED_TRIGGER_TYPE, + emitTurnCompleted, + registerTurnCompletedTriggerType, + resetTurnCompletedBindingsForTests, + terminalStatus, +} from '../../src/turn-orchestrator/turn-completed.js'; +import { syntheticAssistant } from '../../src/turn-orchestrator/synthetic-assistant.js'; + +type Delivery = { function_id: string; payload: unknown }; + +function fixture(): { + iii: ISdk; + deliveries: Delivery[]; + handler: () => TriggerHandler; +} { + const deliveries: Delivery[] = []; + let handler: TriggerHandler | undefined; + const iii = { + registerTriggerType: vi.fn((input: { id: string }, h: TriggerHandler) => { + expect(input.id).toBe(TURN_COMPLETED_TRIGGER_TYPE); + handler = h; + return {}; + }), + trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { + if (function_id === 'state::get') return null; + if (function_id === 'state::set') return { old_value: null, new_value: payload }; + if (function_id === 'stream::set') return null; + if (function_id === 'state::update') return { old_value: 0 }; + // FSM step wakes (turn::*) are saveRecord's own business, not fan-out. + if (function_id.startsWith('turn::')) return null; + deliveries.push({ function_id, payload }); + return null; + }), + } as unknown as ISdk; + return { + iii, + deliveries, + handler: () => { + if (!handler) throw new Error('trigger type not registered'); + return handler; + }, + }; +} + +function binding(id: string, function_id: string, config: unknown): TriggerConfig { + return { id, function_id, config }; +} + +beforeEach(() => { + resetTurnCompletedBindingsForTests(); +}); + +describe('terminalStatus', () => { + it('derives failed / cancelled / completed', () => { + const rec = newRecord('s1'); + rec.state = 'failed'; + expect(terminalStatus(rec)).toBe('failed'); + + rec.state = 'stopped'; + rec.last_assistant = syntheticAssistant({ + stop_reason: 'aborted', + text: 'run aborted by user', + }); + expect(terminalStatus(rec)).toBe('cancelled'); + + rec.last_assistant = syntheticAssistant({ stop_reason: 'error', text: 'boom' }); + expect(terminalStatus(rec)).toBe('failed'); + + rec.last_assistant = null; + expect(terminalStatus(rec)).toBe('completed'); + }); +}); + +describe('turn_completed fan-out', () => { + it('delivers to bindings, honoring the session_id filter', async () => { + const f = fixture(); + registerTurnCompletedTriggerType(f.iii); + await f.handler().registerTrigger(binding('b1', 'approval::on-turn-completed', {})); + await f.handler().registerTrigger(binding('b2', 'other::handler', { session_id: 's-other' })); + + await emitTurnCompleted(f.iii, { + session_id: 's1', + turn_id: 't_1', + status: 'completed', + timestamp: 1, + }); + + expect(f.deliveries).toEqual([ + { + function_id: 'approval::on-turn-completed', + payload: { session_id: 's1', turn_id: 't_1', status: 'completed', timestamp: 1 }, + }, + ]); + }); + + it('a failing consumer never throws back into the emitter', async () => { + const f = fixture(); + registerTurnCompletedTriggerType(f.iii); + await f.handler().registerTrigger(binding('b1', 'approval::on-turn-completed', {})); + const trigger = f.iii.trigger as unknown as ReturnType; + trigger.mockRejectedValueOnce(new Error('consumer down')); + + await expect( + emitTurnCompleted(f.iii, { + session_id: 's1', + turn_id: 't_1', + status: 'failed', + reason: 'boom', + timestamp: 1, + }), + ).resolves.toBeUndefined(); + }); +}); + +describe('saveRecord terminal emission', () => { + async function saveThrough( + f: ReturnType, + rec: TurnStateRecord, + previous?: TurnStateRecord | null, + ): Promise { + await createTurnStore(f.iii).saveRecord(rec, previous); + } + + it('emits once when a turn transitions into stopped', async () => { + const f = fixture(); + registerTurnCompletedTriggerType(f.iii); + await f.handler().registerTrigger(binding('b1', 'approval::on-turn-completed', {})); + + const prev = newRecord('s1'); + prev.state = 'finishing'; + const rec = structuredClone(prev); + rec.state = 'stopped'; + + await saveThrough(f, rec, prev); + + expect(f.deliveries).toHaveLength(1); + expect(f.deliveries[0]?.payload).toMatchObject({ + session_id: 's1', + turn_id: rec.turn_id, + status: 'completed', + }); + }); + + it('carries the failure reason on failed transitions', async () => { + const f = fixture(); + registerTurnCompletedTriggerType(f.iii); + await f.handler().registerTrigger(binding('b1', 'approval::on-turn-completed', {})); + + const prev = newRecord('s1'); + prev.state = 'function_execute'; + const rec = structuredClone(prev); + rec.state = 'failed'; + rec.error = { kind: 'transition_error', message: 'boom' }; + + await saveThrough(f, rec, prev); + + expect(f.deliveries[0]?.payload).toMatchObject({ status: 'failed', reason: 'boom' }); + }); + + it('stays silent on non-terminal saves and terminal rewrites', async () => { + const f = fixture(); + registerTurnCompletedTriggerType(f.iii); + await f.handler().registerTrigger(binding('b1', 'approval::on-turn-completed', {})); + + const prev = newRecord('s1'); + prev.state = 'provisioning'; + const streaming = structuredClone(prev); + streaming.state = 'assistant_streaming'; + await saveThrough(f, streaming, prev); + + const stopped = structuredClone(streaming); + stopped.state = 'stopped'; + const rewrite = structuredClone(stopped); + await saveThrough(f, rewrite, stopped); + + expect(f.deliveries).toEqual([]); + }); +});