Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion harness/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"test": "vitest run",
"test:watch": "vitest",
"start:all": "node dist/index.js",
"dev:all": "bun --watch src/index.ts",
"dev:all": "tsx --watch src/index.ts",
"dev:harness": "tsx src/harness/main.ts",
"dev:approval-gate": "tsx src/approval-gate/main.ts",
"dev:turn-orchestrator": "tsx src/turn-orchestrator/main.ts",
Expand Down Expand Up @@ -58,8 +58,10 @@
"@opentelemetry/api": "^1.9.0",
"chokidar": "^3.6.0",
"commander": "^12.1.0",
"htmlparser2": "^9.1.0",
"iii-sdk": "^0.16.1",
"pino": "^9.5.0",
"turndown": "^7.2.0",
"uuid": "^11.0.3",
"yaml": "^2.6.1",
"zod": "^3.23.8",
Expand All @@ -71,6 +73,7 @@
"@opentelemetry/sdk-trace-base": "^1.30.0",
"@opentelemetry/sdk-trace-node": "^1.30.0",
"@types/node": "^22.10.5",
"@types/turndown": "^5.0.5",
"@types/uuid": "^10.0.0",
"esbuild": "^0.28.0",
"tsx": "^4.19.2",
Expand Down
74 changes: 74 additions & 0 deletions harness/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 18 additions & 2 deletions harness/src/provider-anthropic/wire-messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import { logger } from '../runtime/otel.js';
import type { AgentMessage } from '../types/agent-message.js';
import type { ContentBlock } from '../types/content.js';
import { formatFunctionResultContent } from '../types/wire.js';
import { formatFunctionResultBlocks, formatFunctionResultContent } from '../types/wire.js';

/**
* Content shipped in the synthetic `tool_result` placeholder we inject
Expand Down Expand Up @@ -124,10 +124,26 @@ export function toWireMessages(messages: AgentMessage[]): unknown[] {
// and the whole turn fails. Latest-wins: replace any existing block
// with the same tool_use_id in the current pending batch so the
// most recent function_result is what the model sees.
// Anthropic tool_result content accepts either a flat string or an
// array of text/image blocks. Keep the flat string whenever there
// are no images — that's the long-standing wire shape (and what
// prompt caching has seen) — and only switch to the array form when
// an image block must reach the model (e.g. web::fetch image mode).
const resultBlocks = formatFunctionResultBlocks(m);
const hasImages = resultBlocks.some((b) => b.type === 'image');
const block = {
type: 'tool_result',
tool_use_id: m.function_call_id,
content: formatFunctionResultContent(m),
content: hasImages
? resultBlocks.map((b) =>
b.type === 'image'
? {
type: 'image',
source: { type: 'base64', media_type: b.mime, data: b.data },
}
: { type: 'text', text: b.text },
)
: formatFunctionResultContent(m),
is_error: m.is_error,
};
const existingIdx = pending.findIndex(
Expand Down
5 changes: 3 additions & 2 deletions harness/src/turn-orchestrator/prompt/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,9 @@ For any HTTP(S) request — fetching a URL, calling a JSON/REST API, or download
ALWAYS use the \`web::fetch\` function via \`agent_trigger\`, never \`shell::exec\` with
\`curl\` or \`wget\`. \`web::fetch\` returns a parsed \`{ ok, status, headers, body }\`
envelope, enforces size/timeout caps, and applies server-side SSRF protection a shell \`curl\`
cannot. Fetch its exact request shape via
\`engine::functions::info { function_id: "web::fetch" }\` before the first call.
cannot. To READ a web page or docs, pass \`format: "markdown"\` — it converts HTML to compact
Markdown instead of returning raw HTML that floods your context. Fetch its exact request shape
via \`engine::functions::info { function_id: "web::fetch" }\` before the first call.

# Security

Expand Down
3 changes: 2 additions & 1 deletion harness/src/turn-orchestrator/prompt/default.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ methods \`registerFunction\`, \`registerTrigger\`, and \`trigger\` — call them

For any HTTP(S) request use \`web::fetch\`, never \`shell::exec\` with
\`curl\` or \`wget\`. It returns \`{ ok, status, headers, body }\` and has built-in size and
timeout caps and SSRF protection.
timeout caps and SSRF protection. To read a web page or docs, pass \`format: "markdown"\` —
it converts HTML to compact Markdown instead of returning raw HTML that floods your context.

# Security

Expand Down
4 changes: 3 additions & 1 deletion harness/src/turn-orchestrator/prompt/gpt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,9 @@ the handler contract is the trigger type's, not a generic one.

For any HTTP(S) request use \`web::fetch\` — never \`shell::exec\` with
\`curl\` or \`wget\`. It returns a parsed \`{ ok, status, headers, body }\` envelope with size
and timeout caps plus server-side SSRF protection.
and timeout caps plus server-side SSRF protection. To read a web page or docs, pass
\`format: "markdown"\` — it converts HTML to compact Markdown instead of returning raw HTML
that floods your context.

## Security

Expand Down
4 changes: 3 additions & 1 deletion harness/src/turn-orchestrator/prompt/kimi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,9 @@ assistant: The payload was a JSON-encoded string. Re-issuing the SAME function w
the handler contract is the trigger type's, not a generic one.
6. For any HTTP(S) request you MUST use \`web::fetch\`, never \`shell::exec\` with
\`curl\` or \`wget\`. It returns a parsed \`{ ok, status, headers, body }\` envelope with
size/timeout caps and server-side SSRF protection.
size/timeout caps and server-side SSRF protection. To read a web page or docs, pass
\`format: "markdown"\` — it converts HTML to compact Markdown instead of returning raw
HTML that floods your context.

# Security

Expand Down
21 changes: 21 additions & 0 deletions harness/src/types/wire.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,24 @@ export function formatFunctionResultContent(msg: FunctionResultMessage): string
}
return body;
}

export type WireResultBlock =
| { type: 'text'; text: string }
| { type: 'image'; mime: string; data: string };

/**
* Block-preserving variant of `formatFunctionResultContent` for providers
* whose tool-result content accepts structured blocks (Anthropic). The
* text body is built exactly as the flat-string path (including the
* `[PERMISSION_DENIED]` envelope), followed by any image blocks in their
* original order. Text-only providers keep using the flat string.
*/
export function formatFunctionResultBlocks(msg: FunctionResultMessage): WireResultBlock[] {
const blocks: WireResultBlock[] = [];
const body = formatFunctionResultContent(msg);
if (body.length > 0) blocks.push({ type: 'text', text: body });
for (const c of msg.content) {
if (c.type === 'image') blocks.push({ type: 'image', mime: c.mime, data: c.data });
}
return blocks;
}
31 changes: 30 additions & 1 deletion harness/src/web/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,29 @@
import { getNumber, getSection, getString } from '../runtime/config.js';

export type WebConfig = {
/** Per-request timeout used when the caller doesn't pass `timeout_ms`. */
default_timeout_ms: number;
/** Hard ceiling on per-request timeout. */
max_timeout_ms: number;
/**
* Response-body cap used in page-reading mode (`format` set) when the
* caller doesn't pass `max_bytes`. A transformed page body flows into the
* model's context window untruncated, so an uncapped default (one SPA page
* is easily 1 MiB+ of HTML) can blow the whole turn with "prompt is too
* long". Raw fetches keep defaulting to `max_response_bytes` so existing
* API/download callers aren't silently truncated; callers that genuinely
* need more pass `max_bytes` explicitly, up to `max_response_bytes`.
*/
default_response_bytes: number;
/** Hard ceiling on response body bytes accepted before truncation. */
max_response_bytes: number;
/**
* Max HTML body size the page-reading transforms (turndown/htmlparser2)
* will process. The transforms are synchronous and CPU-bound on the
* worker's event loop — a 5 MiB page can stall every concurrent bus
* call — so bodies above this cap are returned raw, untransformed.
*/
max_transform_bytes: number;
/** Max redirect hops before giving up. */
max_redirects: number;
/** UA we identify ourselves as. */
Expand All @@ -33,8 +52,11 @@ export type WebConfig = {
};

const DEFAULTS: WebConfig = {
max_timeout_ms: 30_000,
default_timeout_ms: 30_000,
max_timeout_ms: 120_000,
default_response_bytes: 256 * 1024,
max_response_bytes: 5 * 1024 * 1024,
max_transform_bytes: 1024 * 1024,
max_redirects: 5,
user_agent: 'iii-harness/0.1 (+web::fetch)',
allow_loopback: true,
Expand All @@ -48,8 +70,15 @@ function getBoolean(cfg: Record<string, unknown>, key: string, fallback: boolean
export function loadWebConfig(cfg: Record<string, unknown>): WebConfig {
const section = getSection(cfg, 'web');
return {
default_timeout_ms: getNumber(section, 'default_timeout_ms', DEFAULTS.default_timeout_ms),
max_timeout_ms: getNumber(section, 'max_timeout_ms', DEFAULTS.max_timeout_ms),
default_response_bytes: getNumber(
section,
'default_response_bytes',
DEFAULTS.default_response_bytes,
),
max_response_bytes: getNumber(section, 'max_response_bytes', DEFAULTS.max_response_bytes),
max_transform_bytes: getNumber(section, 'max_transform_bytes', DEFAULTS.max_transform_bytes),
max_redirects: getNumber(section, 'max_redirects', DEFAULTS.max_redirects),
user_agent: getString(section, 'user_agent', DEFAULTS.user_agent),
allow_loopback: getBoolean(section, 'allow_loopback', DEFAULTS.allow_loopback),
Expand Down
Loading
Loading