diff --git a/.github/scripts/build_skills_payload.py b/.github/scripts/build_skills_payload.py index a9a130d3..f1c85dd4 100644 --- a/.github/scripts/build_skills_payload.py +++ b/.github/scripts/build_skills_payload.py @@ -5,9 +5,9 @@ ``/skills/**/*.md`` and produces the JSON body expected by the workers-registry endpoint. Skill paths map to keys as: - /skills/SKILL.md -> "index.md" - /skills/index.md -> "index.md" (legacy fallback) - /skill.md -> "index.md" (legacy fallback) + /skills/SKILL.md -> "SKILL.md" + /skills/index.md -> "SKILL.md" (legacy fallback) + /skill.md -> "SKILL.md" (legacy fallback) /skills/.md -> "skills/.md" (except SKILL.md / index.md) If no non-empty markdown is found the script writes ``skip=true`` to @@ -26,6 +26,8 @@ KEY_RE = re.compile(r"^[a-z0-9][a-z0-9._/\-]*\.md$", re.IGNORECASE) +TOP_SKILL_KEY = "SKILL.md" + def _read_nonempty(path: pathlib.Path) -> str | None: body = path.read_text(encoding="utf-8") @@ -35,7 +37,7 @@ def _read_nonempty(path: pathlib.Path) -> str | None: def _resolve_top_skill( worker_root: pathlib.Path, ) -> tuple[str | None, pathlib.Path | None]: - """Return ``(index.md body, winning path)`` from the top-of-tree candidates. + """Return ``(overview body, winning path)`` from the top-of-tree candidates. Resolution order: ``skills/SKILL.md``, then legacy ``skills/index.md``, then legacy ``skill.md``. When multiple candidates exist, a GitHub Actions @@ -63,7 +65,7 @@ def _resolve_top_skill( def collect_skills(worker_root: pathlib.Path) -> dict[str, str]: """Return a ``{payload-key: markdown-body}`` map for one worker directory. - The worker overview is always published as registry key ``index.md``, + The worker overview is always published as registry key ``SKILL.md``, sourced from ``skills/SKILL.md`` when present. Empty bodies are skipped silently so blank placeholder files don't end up in the registry. """ @@ -75,7 +77,7 @@ def collect_skills(worker_root: pathlib.Path) -> dict[str, str]: top_body, _ = _resolve_top_skill(worker_root) if top_body is not None: - skills["index.md"] = top_body + skills[TOP_SKILL_KEY] = top_body if leaves_dir.is_dir(): for path in sorted(leaves_dir.rglob("*.md")): diff --git a/.github/scripts/parse_publish_workers_input.py b/.github/scripts/parse_publish_workers_input.py new file mode 100644 index 00000000..62189158 --- /dev/null +++ b/.github/scripts/parse_publish_workers_input.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +"""Parse workflow_dispatch workers input into a GitHub Actions matrix JSON array. + +Accepts a comma-separated list of worker folder names or the special value +``all`` (every worker allowed by create-tag). Writes the deduplicated list as +JSON to ``$GITHUB_OUTPUT`` under ``--out-key`` (default ``matrix``). +""" +from __future__ import annotations + +import argparse +import json +import os +import sys + +ALLOWED_WORKERS: tuple[str, ...] = ( + "acp", + "coder", + "console", + "database", + "harness", + "iii-directory", + "image-resize", + "mcp", + "shell", + "storage" +) + +_ALLOWED_SET = frozenset(ALLOWED_WORKERS) + + +def parse_workers(raw: str) -> list[str]: + """Return a deduplicated worker list preserving first-seen order.""" + text = raw.strip() + if not text: + raise ValueError("workers input is empty") + + if text.lower() == "all": + return list(ALLOWED_WORKERS) + + names: list[str] = [] + for part in text.split(","): + name = part.strip() + if name: + names.append(name) + + if not names: + raise ValueError("workers input is empty") + + unknown = sorted({n for n in names if n not in _ALLOWED_SET}) + if unknown: + allowed = ", ".join(ALLOWED_WORKERS) + raise ValueError( + f"unknown worker(s): {', '.join(unknown)}. " + f"Allowed: {allowed} (or use all)" + ) + + seen: set[str] = set() + deduped: list[str] = [] + for name in names: + if name not in seen: + seen.add(name) + deduped.append(name) + return deduped + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument( + "--workers", + required=True, + help='Comma-separated worker names or "all"', + ) + parser.add_argument( + "--out-key", + default="matrix", + help="GITHUB_OUTPUT key for the JSON array (default: matrix)", + ) + args = parser.parse_args() + + try: + workers = parse_workers(args.workers) + except ValueError as exc: + print(f"::error::{exc}", file=sys.stderr) + return 1 + + payload = json.dumps(workers) + gha_out = os.environ.get("GITHUB_OUTPUT") + if gha_out: + with open(gha_out, "a", encoding="utf-8") as f: + f.write(f"{args.out_key}={payload}\n") + + print(f"::notice::publish skills for {len(workers)} worker(s): {', '.join(workers)}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/scripts/test_build_skills_payload.py b/.github/scripts/test_build_skills_payload.py new file mode 100644 index 00000000..66cc9041 --- /dev/null +++ b/.github/scripts/test_build_skills_payload.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +"""Unit tests for build_skills_payload.collect_skills.""" + +from __future__ import annotations + +import pathlib +import sys +import tempfile +import unittest + +sys.path.insert(0, str(pathlib.Path(__file__).resolve().parent)) +from build_skills_payload import TOP_SKILL_KEY, collect_skills # noqa: E402 + + +class CollectSkillsTests(unittest.TestCase): + def test_single_skill_md_publishes_bundle_root_skill_md(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = pathlib.Path(tmp) / "my-worker" + (root / "skills").mkdir(parents=True) + (root / "skills" / "SKILL.md").write_text("# My Worker\n", encoding="utf-8") + skills = collect_skills(root) + self.assertEqual(skills, {TOP_SKILL_KEY: "# My Worker\n"}) + + def test_legacy_index_md_publishes_as_skill_md(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = pathlib.Path(tmp) / "legacy-worker" + (root / "skills").mkdir(parents=True) + (root / "skills" / "index.md").write_text("# Legacy\n", encoding="utf-8") + skills = collect_skills(root) + self.assertEqual(skills, {TOP_SKILL_KEY: "# Legacy\n"}) + + def test_skill_md_plus_nested_extra(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = pathlib.Path(tmp) / "nested-worker" + (root / "skills" / "extra").mkdir(parents=True) + (root / "skills" / "SKILL.md").write_text("# Overview\n", encoding="utf-8") + (root / "skills" / "extra" / "topic.md").write_text("# Topic\n", encoding="utf-8") + skills = collect_skills(root) + self.assertEqual( + skills, + { + TOP_SKILL_KEY: "# Overview\n", + "skills/extra/topic.md": "# Topic\n", + }, + ) + + def test_empty_skill_md_skipped(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = pathlib.Path(tmp) / "empty-worker" + (root / "skills").mkdir(parents=True) + (root / "skills" / "SKILL.md").write_text(" \n", encoding="utf-8") + skills = collect_skills(root) + self.assertEqual(skills, {}) + + +if __name__ == "__main__": + unittest.main() diff --git a/.github/scripts/tests/test_parse_publish_workers_input.py b/.github/scripts/tests/test_parse_publish_workers_input.py new file mode 100644 index 00000000..e91bfa56 --- /dev/null +++ b/.github/scripts/tests/test_parse_publish_workers_input.py @@ -0,0 +1,69 @@ +"""Tests for .github/scripts/parse_publish_workers_input.py.""" +from __future__ import annotations + +import json +import os +import subprocess +import sys +from pathlib import Path + +import pytest + +SCRIPT = Path(__file__).resolve().parents[1] / "parse_publish_workers_input.py" + +# Import after path is known. +sys.path.insert(0, str(SCRIPT.parent)) +from parse_publish_workers_input import ALLOWED_WORKERS, parse_workers # noqa: E402 + + +class TestParseWorkers: + def test_all_expands_to_full_list(self): + assert parse_workers("all") == list(ALLOWED_WORKERS) + assert parse_workers(" ALL ") == list(ALLOWED_WORKERS) + + def test_comma_separated_list(self): + assert parse_workers("shell,coder") == ["shell", "coder"] + + def test_dedupe_preserves_order(self): + assert parse_workers("shell,coder,shell") == ["shell", "coder"] + + def test_whitespace_trimmed(self): + assert parse_workers(" shell , coder ") == ["shell", "coder"] + + def test_unknown_worker_raises(self): + with pytest.raises(ValueError, match="unknown worker"): + parse_workers("shell,not-a-worker") + + def test_empty_raises(self): + with pytest.raises(ValueError, match="empty"): + parse_workers("") + with pytest.raises(ValueError, match="empty"): + parse_workers(" , , ") + + +def run_script(workers: str, github_output: Path) -> subprocess.CompletedProcess[str]: + env = {**os.environ, "GITHUB_OUTPUT": str(github_output)} + return subprocess.run( + [sys.executable, str(SCRIPT), "--workers", workers], + capture_output=True, + text=True, + env=env, + ) + + +def test_cli_writes_matrix_to_github_output(tmp_path): + out_file = tmp_path / "output" + out_file.write_text("") + result = run_script("shell,coder", out_file) + assert result.returncode == 0 + lines = out_file.read_text(encoding="utf-8").strip().splitlines() + assert len(lines) == 1 + key, value = lines[0].split("=", 1) + assert key == "matrix" + assert json.loads(value) == ["shell", "coder"] + + +def test_cli_unknown_worker_exits_nonzero(): + result = run_script("bogus", Path("/dev/null")) + assert result.returncode == 1 + assert "unknown worker" in result.stderr diff --git a/.github/workflows/_publish-worker-skills.yml b/.github/workflows/_publish-worker-skills.yml new file mode 100644 index 00000000..7763f3c1 --- /dev/null +++ b/.github/workflows/_publish-worker-skills.yml @@ -0,0 +1,66 @@ +name: Publish worker skills to registry + +on: + workflow_call: + inputs: + worker: + description: 'Worker folder name' + required: true + type: string + version: + description: 'Registry tag channel (latest, next, ...)' + required: true + type: string + api_url: + description: 'Workers registry base URL' + required: false + type: string + default: 'https://api.workers.iii.dev' + secrets: + WORKERS_REGISTRY_API_KEY: + required: true + +jobs: + publish: + name: POST /w/${{ inputs.worker }}/skills + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - uses: actions/checkout@v4 + + - name: Build skills payload + id: skills_payload + env: + WORKER: ${{ inputs.worker }} + VERSION: ${{ inputs.version }} + run: | + set -euo pipefail + python3 .github/scripts/build_skills_payload.py \ + --worker "$WORKER" \ + --version "$VERSION" \ + --out skills-payload.json + + - name: POST /w//skills + if: steps.skills_payload.outputs.skip != 'true' + env: + API_URL: ${{ inputs.api_url }} + API_KEY: ${{ secrets.WORKERS_REGISTRY_API_KEY }} + WORKER: ${{ inputs.worker }} + run: | + set -euo pipefail + if [[ -z "$API_KEY" ]]; then + echo "::error::WORKERS_REGISTRY_API_KEY secret is not set" + exit 1 + fi + http=$(curl -sS -o skills-response.json -w '%{http_code}' \ + -H "X-API-Key: $API_KEY" \ + -H "Content-Type: application/json" \ + -X POST "$API_URL/w/$WORKER/skills" \ + --data-binary @skills-payload.json) + echo "HTTP $http" + cat skills-response.json + if [[ "$http" != "200" ]]; then + echo "::error::publish skills failed with HTTP $http" + exit 1 + fi diff --git a/.github/workflows/create-tag.yml b/.github/workflows/create-tag.yml index b37976dd..6e121be4 100644 --- a/.github/workflows/create-tag.yml +++ b/.github/workflows/create-tag.yml @@ -17,12 +17,9 @@ on: - iii-lsp - iii-lsp-vscode - image-resize - - llm-budget - mcp - shell - storage - - todo-worker - - todo-worker-python bump: description: 'Version bump' required: true diff --git a/.github/workflows/database-e2e.yml b/.github/workflows/database-e2e.yml index 076f772d..223dc471 100644 --- a/.github/workflows/database-e2e.yml +++ b/.github/workflows/database-e2e.yml @@ -46,7 +46,7 @@ jobs: - name: Install iii engine (next) run: | curl -fsSL --retry 3 --retry-connrefused --retry-delay 5 \ - https://install.iii.dev/iii/main/install.sh | sh -s -- --next + https://install.iii.dev/iii/main/install.sh | sh echo "$HOME/.local/bin" >> "$GITHUB_PATH" - name: Verify engine diff --git a/.github/workflows/publish-worker-skills.yml b/.github/workflows/publish-worker-skills.yml new file mode 100644 index 00000000..73659f2c --- /dev/null +++ b/.github/workflows/publish-worker-skills.yml @@ -0,0 +1,53 @@ +name: Publish worker skills + +on: + workflow_dispatch: + inputs: + workers: + description: 'Comma-separated worker names, or "all"' + required: true + type: string + registry_tag: + description: 'Registry tag channel to publish skills to' + required: true + type: choice + options: + - latest + - next + default: latest + +concurrency: + group: publish-worker-skills-${{ inputs.registry_tag }} + cancel-in-progress: false + +jobs: + setup: + name: Parse workers + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.parse.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + + - name: Build worker matrix + id: parse + env: + WORKERS: ${{ inputs.workers }} + run: | + set -euo pipefail + python3 .github/scripts/parse_publish_workers_input.py \ + --workers "$WORKERS" + + publish: + name: Publish skills (${{ matrix.worker }}) + needs: setup + strategy: + fail-fast: false + matrix: + worker: ${{ fromJSON(needs.setup.outputs.matrix) }} + uses: ./.github/workflows/_publish-worker-skills.yml + with: + worker: ${{ matrix.worker }} + version: ${{ inputs.registry_tag }} + secrets: + WORKERS_REGISTRY_API_KEY: ${{ secrets.WORKERS_REGISTRY_API_KEY }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 982213ed..f6132f29 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -11,12 +11,9 @@ on: - 'iii-directory/v*' - 'iii-lsp/v*' - 'image-resize/v*' - - 'llm-budget/v*' - 'mcp/v*' - 'shell/v*' - 'storage/v*' - - 'todo-worker/v*' - - 'todo-worker-python/v*' workflow_dispatch: inputs: tag: diff --git a/coder/skills/SKILL.md b/coder/skills/SKILL.md new file mode 100644 index 00000000..cb7e5dea --- /dev/null +++ b/coder/skills/SKILL.md @@ -0,0 +1,60 @@ +--- +name: coder +description: >- + Read, search, and edit files inside a path-jailed base_path — structured + filesystem ops for agents, with glob-based secret protection and atomic + per-file writes. +--- + +# coder + +The coder worker is a path-jailed surface for filesystem work. Every `coder::*` +call resolves its `path` argument relative to a single operator-configured +`base_path` and refuses anything that would escape it — absolute inputs, `..` +segments, and crafted symlinks all return an error rather than being silently +re-jailed. A glob-based `non_accessible_globs` list keeps sensitive files +(`.env`, `*.pem`, anything under `secrets/`) visible to directory listings but +unreadable, unwritable, and unsearchable. + +The surface covers the whole read-explore-edit cycle: navigate with +`coder::tree` and `coder::list-folder`, discover with `coder::search`, inspect +with `coder::read-file`, then mutate through the batched `coder::create-file`, +`coder::update-file`, and `coder::delete-file`. Add it with `iii worker add +coder`; operator caps on per-file read/write bytes, listing pages, and search +matches live in `config.yaml`. It is filesystem-only and never spawns a process. + +## When to Use + +- Get the shape of an unfamiliar repo in one round-trip (`coder::tree`), then + drill into folders flagged as truncated (`coder::list-folder`). +- Find a string, symbol, or TODO across many files by content or path + (`coder::search`). +- Read one file's full contents after a search hit (`coder::read-file`). +- Scaffold a fresh file or subtree, or rewrite existing source line-by-line + (`coder::create-file`, `coder::update-file`). +- Remove stale files or directories (`coder::delete-file`). + +## Boundaries + +- Not for running processes — reach for `shell::exec` / `shell::exec_bg` in the + `shell` worker to build, test, format, or run git. `coder::*` never shells out. +- Paths must be relative to `base_path`; absolute inputs, `..`, and escaping + symlinks are rejected rather than re-jailed. +- `non_accessible_globs` blocks reads, writes, searches, and deletes — a denied + path is folded with "not found" so callers can't probe for its existence. +- Writes fire no engine triggers and emit no events; the only effect is the + filesystem mutation itself. +- For host-level structured fs that can forward into a sandbox microVM, use the + `shell` worker's `shell::fs::*` surface instead. + +## Functions + +- `coder::tree` — recursive directory snapshot bounded by `max_depth` and a per-folder limit; folders that hit the cap are flagged for paginated drilldown. +- `coder::list-folder` — paginated single-folder listing sorted by name; non-accessible entries are still listed with a `non_accessible: true` flag. +- `coder::search` — literal or regex search over file content and/or paths, with include/exclude globs; non-accessible files are skipped entirely. +- `coder::read-file` — read one file as UTF-8 plus `size` / `mode` / `mtime`, capped by `max_read_bytes`. +- `coder::create-file` — batched file creation with per-entry `overwrite` and `parents` flags. +- `coder::update-file` — batched `insert` / `remove` / `update_lines` / regex `replace` ops across one or more files. +- `coder::delete-file` — batched removal; `recursive: true` is required for non-empty directories and missing paths are idempotent successes. + +The batched mutators return one result per input entry so a single bad path never aborts the rest of the call, `coder::update-file` line ops are 1-based and inclusive and applied bottom-up so each op still references the caller's original line numbers, and every file commits atomically via a temp file plus rename. diff --git a/coder/skills/coder.md b/coder/skills/coder.md deleted file mode 100644 index 8ff5ba39..00000000 --- a/coder/skills/coder.md +++ /dev/null @@ -1,614 +0,0 @@ ---- -type: how-to -functions: [coder::read-file, coder::search, coder::list-folder, coder::tree, coder::create-file, coder::update-file, coder::delete-file] -title: Read, search, edit, and manage files inside the jailed base_path ---- - -# When to use - -The `coder::*` surface is the agent's single tool for filesystem work -inside the operator-configured `base_path`. Every call resolves its -`path` argument relative to that root, refuses anything that would -escape it (absolute inputs, `..` segments, symlinks that point out), -and screens reads + writes against `non_accessible_globs` so secret -files (`.env`, `*.pem`, `secrets/**`) stay invisible to the -content-touching functions even when they appear in directory -listings. Each function applies the same security model — callers -never have to re-check. - -| Question | Use this | -|-------------------------------------------------------|-----------------------| -| Get a bird's-eye view of an unfamiliar repo | `coder::tree` | -| List one folder, paginated | `coder::list-folder` | -| Find a string or regex across many files | `coder::search` | -| Read one file's contents | `coder::read-file` | -| Make a new file (optionally in a fresh subtree) | `coder::create-file` | -| Edit existing files line-by-line | `coder::update-file` | -| Remove files or directories | `coder::delete-file` | - -Reach for `shell::exec` (in the [`shell`](../../shell/README.md) -worker) instead when you need to run a process — build, test, format, -git, package manager, anything that spawns an executable. `coder::*` -is filesystem-only; it never shells out. - -# `coder::tree` - -Recursive directory snapshot, bounded so the response stays small even -for monorepo roots. Use it as the first call against an unfamiliar -codebase: one round-trip gives you the shape of the project up to -`max_depth` levels, and folders that hit `per_folder_limit` are -flagged with a `truncated` block that points you at -`coder::list-folder` for paginated drilldown. - -## Inputs - -```json -{ - "path": ".", // optional; folder relative to base_path; defaults to "." - "max_depth": 4, // optional; falls back to config.tree_default_depth (4) - "per_folder_limit": 50 // optional; falls back to config.tree_per_folder_limit (50) -} -``` - -No fields are required — `coder::tree {}` snapshots the whole -`base_path` at default depth and per-folder limit. `path` must point -at a directory; pointing it at a file returns `C210`. The root node is -depth 0, so `max_depth: 1` lists the root's immediate children and -stops. - -## Outputs - -```json -{ - "root": { - "name": "base", // basename of `path` - "path": "", // relative path under base_path; "" for the root - "kind": "dir", // "file" | "dir" | "symlink" | "other" - "size": 4096, - "mtime": 1716470000, // Unix epoch seconds - "non_accessible": false, // omitted when false - "children": [ - { - "name": "src", - "path": "src", - "kind": "dir", - "size": 4096, - "mtime": 1716470000, - "children": [ - { "name": "main.rs", "path": "src/main.rs", "kind": "file", "size": 312, "mtime": 1716470000 } - ] - } - ], - "truncated": { // omitted when the folder fit - "reason": "per_folder_limit", // "per_folder_limit" | "max_depth" | "io_error" - "shown": 50, - "total": 237, // populated only when reason == "per_folder_limit" - "hint": "use coder::list-folder for paginated access to all entries" - } - } -} -``` - -- Children are sorted lex by `name` per folder. -- `children` is omitted on file nodes and on truncated `max_depth` - nodes (the walk stops before reading the folder). -- `non_accessible: true` propagates from a `non_accessible_globs` - match on the entry's relative path — the entry is still listed in - the tree, but the content-touching functions will refuse it. -- `truncated.reason` is `"per_folder_limit"` when the folder had more - than `per_folder_limit` children (the response carries the first - `shown` children and the real `total`), `"max_depth"` when the walk - hit the depth cap (no `children`, no `total`), or `"io_error"` when - the folder couldn't be read (the `hint` carries the underlying - message). - -# `coder::list-folder` - -Paginated single-folder listing. Use it when `coder::tree` returns a -truncated folder you need to enumerate fully, or when you already know -which folder you want and don't need its subtree. Non-accessible -entries are still returned with a `non_accessible: true` flag so the -agent can tell that a `.env` exists without being able to read it. - -## Inputs - -```json -{ - "path": "src", // optional; folder relative to base_path; defaults to "." - "page": 1, // optional; 1-based; defaults to 1 - "page_size": 100 // optional; falls back to config.list_default_page_size (100); capped at list_max_page_size (1000) -} -``` - -`path` must resolve to a directory; pointing it at a file returns -`C210`. `page_size` above `list_max_page_size` is silently clamped -down. `page` values past the end return `entries: []` with -`has_more: false`, not an error. - -## Outputs - -```json -{ - "path": "src", - "entries": [ - { - "name": ".env", - "kind": "file", // "file" | "dir" | "symlink" | "other" - "size": 512, - "mtime": 1716470000, // Unix epoch seconds - "non_accessible": true // matched a non_accessible_globs pattern - }, - { - "name": "main.rs", - "kind": "file", - "size": 312, - "mtime": 1716470000, - "non_accessible": false - } - ], - "total": 2, // total entries in the folder, across all pages - "page": 1, - "page_size": 100, - "has_more": false -} -``` - -- Entries are sorted lex by `name` (dotfiles first per Unix - convention). -- `total` reflects the full folder, not the current page — divide by - `page_size` to know how many pages exist. -- `has_more` is `true` when there's at least one more page after this - one. - -# `coder::search` - -Combined content + path search across the whole jailed tree. Walks -`base_path`, applies the include/exclude globs, skips non-accessible -files entirely (their bytes never reach the matcher), and reports -content hits and path hits in separate arrays of one response. - -## Inputs - -```json -{ - "query": "TODO", // required; non-empty - "path": ".", // optional; folder relative to base_path scoping the walk; default "."; must be a directory - "regex": false, // optional; default false (literal substring); when true, query is a regex::Regex pattern - "ignore_case": false, // optional; default false; applies to both literal and regex modes - "include_globs": ["**/*.rs"], // optional; globs relative to base_path; empty = include everything - "exclude_globs": ["build/**"], // optional; globs relative to base_path; empty = exclude nothing - "max_matches": 1000, // optional; falls back to config.search_default_max_matches (1000) - "max_line_bytes": 4096, // optional; falls back to config.search_default_max_line_bytes (4096) - "search_content": true, // optional; default true; scan file contents - "search_paths": true // optional; default true; match the relative path itself -} -``` - -`query` is required and must be non-empty. At least one of -`search_content` / `search_paths` must be `true` — both `false` -returns `C210`. Glob syntax is the `globset` crate's (`**/`, `*`, -`?`, character classes); a bad glob or bad regex returns `C210` with -the offending pattern in the message. - -Binary files (any NUL byte in the first read) are skipped from -content scanning. Files larger than `max_read_bytes` are also skipped -to keep the search from loading multi-GB blobs. - -## Outputs - -```json -{ - "content_matches": [ - { - "path": "src/main.rs", // relative to base_path - "line": 42, // 1-based - "column": 5, // 1-based, byte offset of the first match on the line - "text": " // TODO: handle EOF" // the full matched line, truncated to max_line_bytes - } - ], - "path_matches": [ - { "path": "docs/TODO.md" } - ], - "truncated": false // true when either match list was capped at max_matches -} -``` - -- A line that exceeds `max_line_bytes` is truncated for both matching - and reporting; the matcher never sees the overflowed tail. -- Each line emits at most one match (the first hit) — `truncated` - reports per-file/per-line saturation, not per-character. -- `path` defaults to `.` (the whole jail). Set it to a subfolder - (e.g. `"src"`) to scope the walk; globs and result `path`s remain - anchored at `base_path` so `include_globs: ["src/**/*.rs"]` keeps - working regardless of `path`. Pointing `path` at a file (not a - directory) returns `C210`. -- When `truncated: true`, tighten the pattern or narrow with - `include_globs` / `exclude_globs` rather than bumping `max_matches` - blindly; the cap exists to keep responses bounded. - -# `coder::read-file` - -Read one file's bytes and metadata. Use it after `coder::search` -surfaces an interesting hit, or whenever you have a path in hand and -want the full content (not just a snippet). The single-file shape -keeps the call cheap; for many small files prefer a `coder::tree` plus -targeted reads over scanning the whole tree. - -## Inputs - -```json -{ "path": "src/main.rs" } // required; file relative to base_path -``` - -`path` is required. Pointing it at a directory returns `C210`; -pointing it at a non-accessible match returns `C211` (folded with -"not found" so callers can't probe). Files above `max_read_bytes` -(default 10 MiB) return `C213` before any bytes are loaded. - -## Outputs - -```json -{ - "path": "src/main.rs", // echoed for caller correlation - "content": "fn main() {\n println!(\"hi\");\n}\n", - "is_utf8": true, // false when invalid bytes were lossily replaced with U+FFFD - "size": 34, // raw on-disk size in bytes - "mode": 420, // Unix permission bits as decimal (0o644 == 420); 0o644 on non-Unix - "mtime": 1716470000 // Unix epoch seconds; 0 when the FS can't expose it -} -``` - -- `content` is always a string — binary or invalid-UTF-8 inputs are - decoded with `String::from_utf8_lossy` and the `is_utf8: false` - flag tells callers the byte count won't match exactly. -- `size` reflects on-disk bytes; `content.len()` may differ when - `is_utf8: false` because of U+FFFD substitution. - -# `coder::create-file` - -Batched file creation. Each entry in `files[]` is treated -independently — a single bad path doesn't fail the rest of the batch; -instead its `results[i]` slot carries `success: false` and the JSON -error envelope. Use it to scaffold a fresh subtree (one call with -several entries, `parents: true`) or to write a single new file -without the read-modify-write dance of `coder::update-file`. - -## Inputs - -```json -{ - "files": [ - { - "path": "tests/foo_test.rs", // required; relative to base_path - "content": "#[test]\nfn it_works() {}",// required; UTF-8 string written verbatim - "mode": "0644", // optional; octal as string; default "0644"; Unix only - "parents": true, // optional; create missing parent dirs; default true - "overwrite": false // optional; refuse on existing path when false; default false - } - ] -} -``` - -`files` must contain at least one entry; `[]` returns `C210`. Per -entry, `path` and `content` are required. `parents: true` runs -`create_dir_all` on the parent before writing. `overwrite: false` on -an existing path yields `C217` in that entry's result (the rest of -the batch still runs). - -## Outputs - -```json -{ - "results": [ - { - "path": "tests/foo_test.rs", - "success": true, - "bytes_written": 28 - // `error` omitted on success - }, - { - "path": ".env", - "success": false, - "bytes_written": 0, - "error": "{\"code\":\"C211\",\"message\":\".env: matches non_accessible_globs\"}" - } - ] -} -``` - -- One result per input entry, in input order. -- `error` is omitted on success; on failure it's the per-function - JSON envelope (`{"code":"C2xx","message":"..."}`) the same way - top-level errors are reported. -- `bytes_written` is `content.as_bytes().len()` on success, `0` on - failure. - -# `coder::update-file` - -Batched line-oriented and regex edits. Each file in `files[]` carries -an array of ops; all ops on one file commit **atomically** via a -sibling temp file + `rename`, so a mid-write failure leaves the -original byte-identical. Across files the batch is independent — one -file's failure doesn't roll back another's. - -Line numbers are **1-based and inclusive**. Line ops (`insert`, -`remove`, `update_lines`) within one file are applied **bottom-up** -(highest affected line first), so each op still references the original -line numbers the caller wrote — there's no need to recompute offsets -after an earlier op. Overlapping line ops in original-line space are -rejected up-front with `C210`. Regex `replace` ops run **after** all -line ops on the serialized file body, in declaration order, and do not -participate in line-space overlap checks. - -## Inputs - -```json -{ - "files": [ - { - "path": "schema.sql", // required; relative to base_path - "ops": [ - { "op": "insert", "at_line": 1, "content": "-- header\n-- v2" }, - { "op": "remove", "from_line": 5, "to_line": 12 }, - { "op": "update_lines", "from_line": 30, "to_line": 30, "content": "PRIMARY KEY (id)" }, - { "op": "replace", "pattern": "OLD_NAME", "replacement": "NEW_NAME" } - ] - } - ] -} -``` - -`files` must contain at least one entry, and each entry's `ops` must -contain at least one op — both empties return `C210`. - -Each op shape: - -- `insert` — insert `content` **before** `at_line` (1-based). The - special value `at_line = lines + 1` appends to the end. -- `remove` — delete the inclusive range `from_line..=to_line`. -- `update_lines` — overwrite the inclusive range with `content` (split - by `\n`). -- `replace` — substitute all regex matches in the file body. Fields: - `pattern` (required regex), `replacement` (required; Rust capture - syntax such as `$1` / `$name`), optional `ignore_case` (default - `false`). Runs after line ops in the same batch. Empty `pattern` or - invalid regex returns `C210`. No matches is a no-op; the op still - counts toward `applied`. - -`content` on line ops may be multi-line; lines are split on `\n` and -`\r` from CRLF inputs is trimmed. File line endings (`\n` vs `\r\n`) -and the presence/absence of a trailing newline are preserved across -the rewrite (regex `replace` operates on the joined body and may change -line count if the replacement introduces or removes newlines). - -## Outputs - -```json -{ - "results": [ - { - "path": "schema.sql", - "success": true, - "applied": 3, // count of ops applied (only meaningful on success) - "new_line_count": 27 // final line count after the rewrite - // `error` omitted on success - }, - { - "path": ".env", - "success": false, - "applied": 0, - "new_line_count": 0, - "error": "{\"code\":\"C211\",\"message\":\".env: matches non_accessible_globs\"}" - } - ] -} -``` - -- One result per input file, in input order. -- `applied` and `new_line_count` are `0` on failure; on success - `applied` equals `ops.len()` for that file. -- `error` is omitted on success; on failure it carries the per-file - JSON error envelope. - -Overlap semantics for line ops (each rejected with `C210`): - -- Two `remove` / `update_lines` ranges sharing any line. -- An `insert` at a line covered by a `remove` / `update_lines` range. -- Two `insert`s at the same `at_line`. -- Any range with `from_line == 0`, `from_line > to_line`, or - `to_line` past EOF. - -# `coder::delete-file` - -Batched removal. Like `create-file` and `update-file`, per-path errors -land in `results[i]` instead of failing the whole batch. Missing paths -are **idempotent successes** (`success: true, removed: false`) — safe -to retry. Directories require an explicit `recursive: true`; without -it, a non-empty directory returns an error in its result slot. - -## Inputs - -```json -{ - "paths": [".cache/build", "stale.log"], // required; non-empty; each relative to base_path - "recursive": true // optional; default false; required for non-empty directories -} -``` - -`paths` must contain at least one entry; `[]` returns `C210`. Trying -to delete `base_path` itself (e.g. `"."` resolving to the root) is -rejected with `C210` regardless of `recursive`. - -When `recursive: true`, the walk **refuses to descend through any -subtree containing a `non_accessible_globs` match**. The whole -directory remains untouched and the result reports `C211` with the -offending child — agents can't wipe out a `secrets/` folder by -deleting its parent. - -## Outputs - -```json -{ - "results": [ - { - "path": ".cache/build", - "success": true, - "removed": true // false when the path was already gone - // `error` omitted on success - }, - { - "path": "stale.log", - "success": true, - "removed": false // file didn't exist; treated as a no-op success - }, - { - "path": "secrets", - "success": false, - "removed": false, - "error": "{\"code\":\"C211\",\"message\":\"recursive delete blocked: secrets contains non-accessible secrets/api.pem\"}" - } - ] -} -``` - -- One result per input path, in input order. -- `removed: false` with `success: true` means the path didn't exist - at call time — the post-condition (the path is absent) is true, so - the call is idempotent. - -# Errors - -All `coder::*` errors serialize as `{"code":"C2xx","message":"..."}`. -Top-level failures (e.g. an empty `files: []`) come back as the -function's own `Result::Err`; the batched functions -(`create-file`, `update-file`, `delete-file`) instead embed the same -envelope inside each `results[i].error` so a single bad path never -aborts the whole call. - -| Code | Meaning | -|--------|-----------------------------------------------------------------------------------------------| -| `C210` | Bad input — malformed payload, illegal line numbers, overlapping ops, absolute path, bad glob/regex, refusing to delete `base_path` itself. | -| `C211` | Path not found **or** matches `non_accessible_globs`. Folded into one code so callers can't probe for the existence of a denied file. | -| `C213` | File exceeds `max_read_bytes` (read side) or `max_write_bytes` (create/update). For `update-file` this fires on either the pre-edit size or the post-edit size. | -| `C215` | Path escapes `base_path` lexically (`..`) or via a symlink, or the symlink target dangles outside the jail. | -| `C216` | Underlying I/O error (permission denied, EIO, …). The `message` carries the OS error string. | -| `C217` | `coder::create-file` saw an existing path with `overwrite: false`. Set `overwrite: true` to replace, or pick a different `path`. | - -# Worked example - -A typical read-explore-edit pass: snapshot the project, find an open -TODO, read it in context, rewrite the offending block, then add a -companion test file. - -1. Get the shape of the project: - - ```json - { "path": "src" } - ``` - - Sent to `coder::tree`. If a folder comes back with - `truncated.reason == "per_folder_limit"`, follow up with - `coder::list-folder` against that folder. - -2. Find every open TODO in the Rust sources: - - ```json - { - "query": "TODO", - "include_globs": ["**/*.rs"], - "exclude_globs": ["build/**", "target/**"] - } - ``` - - Sent to `coder::search`. The response carries - `content_matches[i]` with `path`, `line`, `column`, and the - matched line `text`. - -3. Read the file around a hit: - - ```json - { "path": "src/foo.rs" } - ``` - - Sent to `coder::read-file`. `content` is the full file as a - string; index into it using the `line` / `column` from step 2. - -4. Rewrite the block: - - ```json - { - "files": [{ - "path": "src/foo.rs", - "ops": [ - { "op": "update_lines", "from_line": 42, "to_line": 45, - "content": " let payload = parse(input)?;\n handle(payload)?;\n Ok(())" } - ] - }] - } - ``` - - Sent to `coder::update-file`. The response's `results[0]` carries - `success: true`, `applied: 1`, and the new `new_line_count`. - - To rename a symbol everywhere in the file without touching line - numbers, add a regex `replace` after any line ops: - - ```json - { "op": "replace", "pattern": "fn old_name", "replacement": "fn new_name" } - ``` - -5. Scaffold a companion test in one call (notice `parents: true` - creates `tests/` if it isn't there yet): - - ```json - { - "files": [{ - "path": "tests/foo_test.rs", - "content": "use my_crate::foo;\n\n#[test]\nfn it_works() {\n assert!(foo::handle(\"x\").is_ok());\n}\n", - "parents": true, - "overwrite": false - }] - } - ``` - - Sent to `coder::create-file`. If `tests/foo_test.rs` already - exists, the result carries `C217` and the original file stays put - — flip `overwrite: true` and resend if you meant to replace it. - -# Side effects - -Three functions write to disk. None of them fire engine triggers or -emit events — the only observable effect is the filesystem mutation -described below. - -- `coder::create-file` writes each entry's `content.as_bytes()` to - `base_path/path` with `std::fs::write`. When `parents: true`, runs - `std::fs::create_dir_all` on the parent first. On Unix, applies - `mode` (octal string parsed with `u32::from_str_radix(..., 8)`, - masked with `0o777`); on non-Unix the `mode` field is accepted but - ignored. -- `coder::update-file` writes via a sibling temp file named - `.coder-tmp--` in the target's parent - directory, then `std::fs::rename`s it over the original. A crash - mid-write leaves the original byte-identical; in rare cases an - orphan temp file may remain (it's safe to remove manually). Line - endings (`\n` vs `\r\n`) and trailing-newline presence are - preserved from the original file. -- `coder::delete-file` calls `std::fs::remove_file` for files and - empty dirs, `std::fs::remove_dir_all` when `recursive: true`. The - recursive path walks the subtree first and aborts with `C211` - (without removing anything) if **any** descendant matches - `non_accessible_globs` — protecting against accidentally wiping a - `secrets/` subtree by deleting its parent. - -# Related - -- [`shell::exec`](../../shell/README.md) and `shell::fs::*` — when - you need to run a process (build, test, format, git) or stream - bytes through a channel. `coder::*` never shells out. -- [`directory::skills::get`](iii://directory/skills/get) — the - iii-directory worker that surfaces this how-to (and others) to - agents at bootstrap time. -- The "Security boundary" section of [coder/README.md](../README.md) - — operator-facing detail on `base_path` canonicalisation, - `non_accessible_globs` syntax, and the symlink rejection rules. diff --git a/coder/skills/index.md b/coder/skills/index.md deleted file mode 100644 index 6fe92313..00000000 --- a/coder/skills/index.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -type: index -title: coder ---- - -# coder - -A path-jailed code worker for iii agents. Every `coder::*` call resolves -its `path` argument relative to a single configured `base_path` and -refuses anything that would escape it — absolute inputs, `..` segments, -or crafted symlinks all return an error rather than being silently -re-jailed. A glob-based `non_accessible_globs` list keeps sensitive -files (`.env`, `*.pem`, anything under `secrets/`) visible to directory -listings but unreadable, unwritable, and unsearchable. One worker, one -namespace, one security boundary. - -The surface covers the full read-explore-edit cycle: `coder::tree` and -`coder::list-folder` for navigation, `coder::search` for content/path -discovery, `coder::read-file` for inspection, and the batched -`coder::create-file` / `coder::update-file` / `coder::delete-file` -mutators for writes. Write functions commit atomically per file (temp -file + rename) so a mid-write failure leaves the original intact. - -- **Files** (`coder::*`) — read, search, explore, create, edit, and - delete files and folders, all scoped to `base_path` and screened by - `non_accessible_globs`. Caps on per-file read/write bytes, per-folder - listing pages, and search match counts are operator-configured. - -## How-tos - -### `coder::*` - -- [`coder::tree`](iii://coder/coder) — recursive snapshot bounded by `max_depth` and `per_folder_limit`; reach for it first on an unfamiliar repo. -- [`coder::list-folder`](iii://coder/coder) — paginated single-folder listing sorted by name; non-accessible entries appear with `non_accessible: true`. -- [`coder::search`](iii://coder/coder) — literal or regex search over file content and/or paths with include/exclude globs. -- [`coder::read-file`](iii://coder/coder) — read a single file as UTF-8 plus `size` / `mode` / `mtime`, capped by `max_read_bytes`. -- [`coder::create-file`](iii://coder/coder) — batched file creation with per-entry `overwrite` and `parents` flags. -- [`coder::update-file`](iii://coder/coder) — batched `insert` / `remove` / `update_lines` / regex `replace` ops; line ops are 1-based, inclusive, applied bottom-up; atomic per file. -- [`coder::delete-file`](iii://coder/coder) — batched removal; `recursive: true` required for non-empty directories, missing paths are idempotent successes. diff --git a/console/docs/custom-function-call-message.md b/console/docs/custom-function-call-message.md new file mode 100644 index 00000000..04d1eb5f --- /dev/null +++ b/console/docs/custom-function-call-message.md @@ -0,0 +1,620 @@ +# Custom `FunctionCallMessage` renderers + +How to add bespoke UI for `function-call` chat messages in the console, instead of falling back to the default request/response JSON panes. + +This is the end-to-end authoring guide: architecture, the message contract, the parser/view layer, how to wire a new family into the host component, and the two Storybook surfaces (fixture galleries + Playground scenarios) every renderer must ship with. + +**Reference implementations** (five families are wired today): + +| Family | Module | Predicate | Notable | +|--------|--------|-----------|---------| +| `sandbox::*` | [`../web/src/components/chat/sandbox/`](../web/src/components/chat/sandbox) | `isSandboxFunction` | 15 tools, terminal + raw-json tabs, approval previews, shared error handling | +| `engine::*::list` | [`../web/src/components/chat/engine/`](../web/src/components/chat/engine) | `isEngineListFunction` | read-only list/info views, no preview | +| `directory::*` | [`../web/src/components/chat/directory/`](../web/src/components/chat/directory) | `isDirectoryFunction` | skills / prompts / registry views | +| `web::fetch` | [`../web/src/components/chat/web/`](../web/src/components/chat/web) | `isWebFunction` | single tool — smallest end-to-end example | +| `worker::*` | [`../web/src/components/chat/worker/`](../web/src/components/chat/worker) | `isWorkerFunction` | lifecycle ops, request JSON used as preview | + +> Starting a brand-new family? Copy [`web/`](../web/src/components/chat/web) for the smallest complete example, or [`sandbox/`](../web/src/components/chat/sandbox) for the full feature set (terminal chrome, error views, previews). + +**Definition of done:** a renderer is not complete until it ships with **both** dev surfaces — static cards in a **fixture family gallery** and at least one interactive **Playground** scenario. Do not merge UI-only changes without playground coverage. + +--- + +## 1. The principle: opt in, or fall back to JSON + +Every tool invocation becomes a `FunctionCallMessage` (see [`../web/src/types/chat.ts`](../web/src/types/chat.ts)). The host component [`FunctionCallMessage.tsx`](../web/src/components/chat/FunctionCallMessage.tsx) (FCM) always renders the same chrome: + +1. **Header** — status dot, `permission to run` / `running` / `ran`, the function id, and duration. +2. **Body** — depends on lifecycle and whether a custom renderer returned a node. +3. **Pending bar** — approve / deny / always-allow (owned by FCM, never by your renderer). + +The default body is two `ValuePane`s (request + response) with `JsonHighlight`. A custom renderer **opts in** by returning a React node from `tryRender` / `tryRenderPreview`. If it returns `null`, FCM silently shows the JSON fallback. This is the core contract: **a renderer never has to handle a shape it doesn't recognize — just return `null`.** + +--- + +## 2. How it works today + +FCM is the host; each function family is a small plugin module that exports a `ToolView` object plus a `FunctionIdLabel`. FCM imports the five families directly and tries each in turn with the `??` (nullish-coalescing) operator — first non-null wins. + +```mermaid +flowchart TB + msg["FunctionCallMessage (functionId, input, output, flags)"] + host["FunctionCallMessage.tsx (host chrome)"] + label["FunctionIdLabel branch"] + preview["customPreview = chained tryRenderPreview()"] + terminal["customTerminal = chained tryRender()"] + families["Sandbox / Engine / Directory / Worker / Web ToolView"] + json["ValuePane JSON fallback"] + tabs["Tabs: terminal + raw json"] + + msg --> host + host --> label + host --> preview + host --> terminal + preview --> families + terminal --> families + families -->|"node"| tabs + families -->|"null"| json +``` + +The exact dispatch in [`FunctionCallMessage.tsx`](../web/src/components/chat/FunctionCallMessage.tsx): + +```tsx +const customPreview = + SandboxToolView.tryRenderPreview(message) ?? + EngineToolView.tryRenderPreview(message) ?? + DirectoryToolView.tryRenderPreview(message) ?? + WorkerToolView.tryRenderPreview(message) ?? + WebToolView.tryRenderPreview(message) + +const customTerminal = !pending + ? (SandboxToolView.tryRender(message) ?? + EngineToolView.tryRender(message) ?? + DirectoryToolView.tryRender(message) ?? + WorkerToolView.tryRender(message) ?? + WebToolView.tryRender(message)) + : null +``` + +> **Note on scale.** Five families chained by hand is fine; it is explicit and tree-shakes cleanly. There is no plugin *registry* yet — see [Scale beyond hand-wiring](#11-scale-beyond-hand-wiring) for the refactor sketch if the chain grows unwieldy. Until then, follow the [minimal wiring](#7-wire-the-family-into-functioncallmessagetsx) below. + +--- + +## 3. The message contract + +```typescript +// ../web/src/types/chat.ts +interface FunctionCallMessage extends BaseMessage { + role: 'function-call' + functionId: string // e.g. "sandbox::exec", "web::fetch" + input: unknown + output?: unknown + durationMs?: number + running?: boolean + /** awaiting user approval before execution; lifecycle: pending -> running -> done */ + pendingApproval?: boolean + functionCallId?: string // iii function_call_id — for approval::resolve + sessionId?: string // iii session_id — paired with functionCallId +} +``` + +The lifecycle flags drive what your renderer should return: + +| State | Flags | What the custom UI should do | +|-------|-------|------------------------------| +| **Pending approval** | `pendingApproval: true` | Return your compact preview from `tryRenderPreview`; return `null` from `tryRender`. FCM keeps the approve/deny bar. | +| **Running** | `running: true`, not pending | `tryRender` with the `running` flag — same shell as done, body shows a muted `executing…` shimmer. | +| **Done** | neither flag | `tryRender` returns success UI; FCM wraps it in **terminal** (default) + **raw json** tabs when non-null. | +| **Failed** | `output` carries an error | Detect and return error UI from `tryRender` **before** success parsing (see [error shapes](#43-structured-tool-errors-sandboxerrorwire)). | + +Wire shapes come straight from the harness/engine and are **not** normalized in the UI layer except inside your `parsers.ts`. + +--- + +## 4. Payload shapes to plan for + +Your parsers must tolerate four shapes. The `unwrapEnvelope` + Zod-`safeParse` pattern below handles all of them. + +### 4.1 Raw handler JSON + +What the Rust/Python handler returns directly, e.g. sandbox `ExecResponse`: + +```json +{ "stdout": "...", "stderr": "", "exit_code": 0, "duration_ms": 41, "success": true } +``` + +### 4.2 Harness agent envelope + +The harness `agent-trigger.ts` wraps many tool results before relaying them to the agent. The console receives the same shape: + +```json +{ + "content": [{ "type": "text", "text": "..." }], + "details": { "/* the actual payload */": true }, + "terminate": true +} +``` + +Always unwrap before Zod parsing. The discriminator is `Array.isArray(value.content) && 'details' in value`; `unwrapEnvelope` is idempotent (a flat payload passes through unchanged): + +```typescript +// ../web/src/components/chat/sandbox/parsers.ts +import { unwrapEnvelope } from '@/components/chat/sandbox/parsers' +const payload = unwrapEnvelope(message.output) // -> details, or the input unchanged +``` + +### 4.3 Structured tool errors (`SandboxErrorWire`) + +A flat, Stripe-style error object inside `details` (or raw output): + +```json +{ + "type": "exec_timeout", + "code": "S200", + "message": "command timed out after 100ms", + "docs_url": "https://...", + "retryable": true, + "fix": { "/* partial result */": true }, + "fix_note": "increase timeout_ms or simplify the command" +} +``` + +### 4.4 Transport / gate / `function_error` wrapper + +What you see when an invocation fails *before* the handler body — e.g. a `gate_unavailable` denial: + +```json +{ + "error": { + "kind": "function_error", + "message": "trigger_failed: ...", + "details": { + "status": "denied", + "denied_by": "gate_unavailable", + "function_id": "sandbox::fs::write", + "reason": "approval gate unreachable" + }, + "content": [{ "type": "text", "text": "..." }] + } +} +``` + +The sandbox module centralizes 4.3 + 4.4 in `parseSandboxErrorDisplay()` -> `SandboxErrorView`. Because the `function_error` envelope is shared infra (it comes from [`../web/src/lib/backend/translate.ts`](../web/src/lib/backend/translate.ts), not anything sandbox-specific), **every other family reuses it**: + +```tsx +import { SandboxErrorView } from '@/components/chat/sandbox/ErrorView' +import { parseSandboxErrorDisplay } from '@/components/chat/sandbox/parsers' + +const errorDisplay = + !running && rawOutput != null ? parseSandboxErrorDisplay(rawOutput) : null +if (errorDisplay) return +``` + +--- + +## 5. Module layout for a new family + +Mirror `sandbox/` (example family `myfeature/`): + +``` +../web/src/components/chat/myfeature/ + index.tsx # dispatcher: isFunction, tryRender, tryRenderPreview, FunctionIdLabel, exported ToolView + parsers.ts # Zod schemas + unwrapEnvelope re-export + safeParseRequest/Response + *_FUNCTION_IDS allowlist + SomeToolView.tsx # one component per function_id (or grouped by response shape) + format.ts # display helpers (bytes, paths, durations) — optional + ErrorView.tsx # domain errors — optional; most families reuse sandbox/ErrorView + shared.tsx # Chip / MetaRow / StatusPill — or import from sandbox/shared.tsx + __tests__/ + parsers.test.ts # envelope unwrap + every schema + error cases +``` + +The `web/` family is the smallest real instance of this layout — [`index.tsx`](../web/src/components/chat/web/index.tsx) + [`parsers.ts`](../web/src/components/chat/web/parsers.ts) + [`FetchView.tsx`](../web/src/components/chat/web/FetchView.tsx), reusing `sandbox/shared.tsx` and `sandbox/ErrorView.tsx`. + +--- + +## 6. The dispatcher (`index.tsx`) + +Export a single `ToolView` object. Every family today exposes the same surface: + +| Member | When called | Contract | +|--------|-------------|----------| +| `isFunction(id)` | FCM routing (label + chains) | Match against an explicit `Set` of ids — never a broad regex. The name can reflect the subset you handle (e.g. `isEngineListFunction`). | +| `tryRender(message)` | Not pending; running or done | `ReactNode \| null`. Check errors first, then `switch (functionId)`. | +| `tryRenderRunning` | (alias) | Set to `tryRender`; running state lives inside each view via the `running` prop. | +| `tryRenderPreview(message)` | `pendingApproval` | Compact approval UI; `null` -> FCM shows request JSON. Return `null` if the request JSON is already a fine preview (see `worker`, `directory`). | +| `FunctionIdLabel` | Header | Optional. Dim the `family::` prefix, emphasize the tail. | + +Canonical `tryRender` skeleton (from [`web/index.tsx`](../web/src/components/chat/web/index.tsx)): + +```tsx +function tryRender(message: FunctionCallMessage): React.ReactNode | null { + if (!isWebFunction(message.functionId)) return null // 1. not ours + if (message.pendingApproval) return null // 2. preview owns pending + + const input = unwrapEnvelope(message.input) // 3. unwrap input + const rawOutput = message.output + const output = rawOutput != null ? unwrapEnvelope(rawOutput) : undefined + const running = !!message.running + + const errorDisplay = // 4. errors BEFORE success + !running && rawOutput != null ? parseSandboxErrorDisplay(rawOutput) : null + if (errorDisplay) return + + switch (message.functionId) { // 5. one view per id + case 'web::fetch': + return + default: + return null + } +} + +export const WebToolView = { + isWebFunction, + tryRender, + tryRenderRunning: tryRender, + tryRenderPreview, +} +``` + +The matching label: + +```tsx +export function WebFunctionIdLabel({ functionId }: { functionId: string }) { + if (!functionId.startsWith('web::')) return {functionId} + const tail = functionId.slice('web::'.length) + return ( + <> + web:: + {tail} + + ) +} +``` + +--- + +## 7. Parsers (`parsers.ts`) + +- **One Zod schema per request/response struct.** Keep schemas non-strict (plain `z.object({...})`, no `.strict()`) so additive wire fields never break the UI. +- **Document the wire source** in a comment (the Rust file or the harness `schemas.ts`) like `sandbox/parsers.ts` and `web/parsers.ts` do. +- **Export an explicit allowlist** — `MY_FEATURE_FUNCTION_IDS` `as const` + a `Set`-backed `isMyFeatureFunction(id): id is MyFeatureFunctionId`. +- **`safeParseRequest` parses the input as-is; `safeParseResponse` unwraps first**, then parses: + +```typescript +import { unwrapEnvelope } from '@/components/chat/sandbox/parsers' +export { unwrapEnvelope } + +export function safeParseRequest(schema: z.ZodType, value: unknown): T | null { + const parsed = schema.safeParse(value ?? {}) + return parsed.success ? parsed.data : null +} + +export function safeParseResponse(schema: z.ZodType, value: unknown): T | null { + const parsed = schema.safeParse(unwrapEnvelope(value)) + return parsed.success ? parsed.data : null +} +``` + +The `web` family re-exports `unwrapEnvelope` and the two `safeParse*` helpers from `sandbox/parsers.ts` instead of re-implementing them — do the same unless you need different behavior. + +--- + +## 8. Views + +Each per-tool view accepts `{ input, output?, running? }` and returns `null` internally if parsing fails (the dispatcher already returned `null` for unknown ids, so this only guards malformed payloads). + +- **Reuse design tokens** rather than raw colors: `border-rule`, `border-rule-2`, `bg-bg`, `bg-paper-2`, `text-ink`, `text-ink-faint`, `text-warn`, `font-mono`. They flip correctly across light/dark themes. +- **Reuse shared chrome** from [`sandbox/shared.tsx`](../web/src/components/chat/sandbox/shared.tsx): `Chip`, `MetaRow`, `StatusPill`, `ActionLine`. And UI primitives from [`../web/src/components/ui/`](../web/src/components/ui) (`Badge`, `Button`, `Tabs`, `Cell`, `EmptyState`). +- **Terminal-style output?** Copy [`sandbox/terminal/Terminal.tsx`](../web/src/components/chat/sandbox/terminal/Terminal.tsx) + `AnsiOutput.tsx`. +- **Code / JSON blocks:** `JsonHighlight` / `CodeHighlight` from [`../web/src/lib/syntax.tsx`](../web/src/lib/syntax.tsx). +- **Running state** uses the same shell as done; the body shows a muted shimmer. The `web::fetch` running branch is a clean template: + +```tsx +if (running) { + return ( +
+ + + {method} + + {req.url} +
+ · waiting for response… +
+
+ ) +} +``` + +--- + +## 9. Wire the family into `FunctionCallMessage.tsx` + +Three edits, all in [`FunctionCallMessage.tsx`](../web/src/components/chat/FunctionCallMessage.tsx). Add to the existing chains — order is priority (first non-null wins), so put your family wherever it should win ties (in practice families are disjoint, so order rarely matters). + +**1. Import** your `ToolView` and label: + +```tsx +import { MyFeatureFunctionIdLabel, MyFeatureToolView } from '@/components/chat/myfeature' +``` + +**2. Add a branch** to the header `FunctionIdLabel`: + +```tsx +function FunctionIdLabel({ functionId }: { functionId: string }) { + if (DirectoryToolView.isDirectoryFunction(functionId)) return + if (EngineToolView.isEngineListFunction(functionId)) return + if (WorkerToolView.isWorkerFunction(functionId)) return + if (WebToolView.isWebFunction(functionId)) return + if (SandboxToolView.isSandboxFunction(functionId)) return + if (MyFeatureToolView.isMyFeatureFunction(functionId)) return + return {functionId} +} +``` + +**3. Add to the `customPreview` and `customTerminal` chains:** + +```tsx +const customPreview = + SandboxToolView.tryRenderPreview(message) ?? + EngineToolView.tryRenderPreview(message) ?? + DirectoryToolView.tryRenderPreview(message) ?? + WorkerToolView.tryRenderPreview(message) ?? + WebToolView.tryRenderPreview(message) ?? + MyFeatureToolView.tryRenderPreview(message) + +const customTerminal = !pending + ? (SandboxToolView.tryRender(message) ?? + EngineToolView.tryRender(message) ?? + DirectoryToolView.tryRender(message) ?? + WorkerToolView.tryRender(message) ?? + WebToolView.tryRender(message) ?? + MyFeatureToolView.tryRender(message)) + : null +``` + +Nothing else changes — `showRequestPaneAbove`, the `Tabs`, and the approve/deny bar are family-agnostic. The tab labels stay **terminal** / **raw json**; rename them only if "terminal" is wrong for your UX. + +--- + +## 10. Stories (required) + +A renderer needs two kinds of Storybook coverage. Run `pnpm storybook` in `console/web`. See [`../web/PLAYGROUND.md`](../web/PLAYGROUND.md) for the streaming contract. + +| Kind | Where | Purpose | +|------|-------|---------| +| **Fixture gallery** | [`FunctionCallMessage.stories.tsx`](../web/src/components/chat/FunctionCallMessage.stories.tsx) + `stories/fixtures/-fixtures.ts` | Static spec sheet — every variant visible at once, no send button. Best for pixel-polishing each card. | +| **Playground scenario** | `stories/playground/scenarios/*.ts` | Live chat driven by a `ChatBackend` — exercises the `fcall-start` -> `fcall-end` streaming contract and the event-log rail. | + +### 10.1 Fixtures + the family gallery (worked example) + +Create `stories/fixtures/myfeature-fixtures.ts` with a `base()` factory (copy [`sandbox-fixtures.ts`](../web/src/stories/fixtures/sandbox-fixtures.ts)). It centralizes the boilerplate so each fixture is one call: + +```ts +import type { FunctionCallMessage } from '@/types/chat' + +const now = Date.now() + +/** Harness `{ content, details, terminate }` envelope — use for tools that go through agent-trigger. */ +export function wrapHarness(details: unknown) { + return { content: [{ type: 'text' as const, text: JSON.stringify(details, null, 2) }], details, terminate: true } +} + +function base(id: string, functionId: string, input: unknown, output?: unknown, extra?: Partial): FunctionCallMessage { + return { id, role: 'function-call', functionId, input, output, durationMs: 240, createdAt: now, ...extra } +} + +export const myfeatureDoThing = base( + 'mf-do', 'myfeature::do_thing', + { target: 'staging' }, + wrapHarness({ ok: true, affected: 3 }), +) + +// states your renderer cares about: +export const myfeatureRunning = base('mf-run', 'myfeature::do_thing', { target: 'prod' }, undefined, { running: true }) +export const myfeaturePending = base('mf-pend', 'myfeature::do_thing', { target: 'prod' }, undefined, { pendingApproval: true }) + +export const myfeatureFixtures = [myfeatureDoThing, myfeatureRunning, myfeaturePending /* , errors, edge cases */] as const +``` + +Then register a gallery story in [`FunctionCallMessage.stories.tsx`](../web/src/components/chat/FunctionCallMessage.stories.tsx). The file already defines a `FamilyGallery` helper that renders each fixture in a bordered, `defaultOpen` box; the existing `SandboxFamily` story (the canonical example) is just: + +```tsx +import { myfeatureFixtures } from '@/stories/fixtures/myfeature-fixtures' + +export const MyFeatureFamily: Story = { + name: 'myfeature family', + render: () => , +} +``` + +That mirrors the five shipping galleries (`SandboxFamily`, `DirectoryFamily`, `EngineFamily`, `WebFamily`, `WorkerFamily`). Open **Chat / FunctionCallMessage / myfeature family** and confirm the **terminal** (default) and **raw json** tabs on each card. + +Cover, at minimum: one **done** fixture per `function_id` (mix envelope-wrapped and raw payloads), plus **pending**, **running**, and **error / gate denial** fixtures for the states your renderer handles. + +### 10.2 Playground scenario (at least one) + +Add an interactive scenario under [`stories/playground/scenarios/`](../web/src/stories/playground/scenarios) using `makeBackend` + `streamFcall` from [`helpers.ts`](../web/src/stories/playground/scenarios/helpers.ts): + +```ts +import { makeBackend, streamAssistant, streamFcall, streamThought } from './helpers' +import { myfeatureDoThing } from '@/stories/fixtures/myfeature-fixtures' // reuse fixture payloads + +export const myfeatureHero = makeBackend( + 'myfeature-hero', + async function* (_prompt, _mode, _model, opts) { + const signal = opts?.signal + yield* streamThought('calling myfeature…', { signal }) + yield* streamFcall({ + functionId: 'myfeature::do_thing', + input: myfeatureDoThing.input, + output: myfeatureDoThing.output, + waitMs: 700, + signal, + }) + yield* streamAssistant('done.', { signal }) + }, +) +``` + +Register it in [`scenarios/index.ts`](../web/src/stories/playground/scenarios/index.ts) (the `group` decides which `Playground/*.stories.tsx` surfaces it): + +```ts +import { myfeatureHero } from './myfeature-hero' + +// inside SCENARIOS: +{ + id: 'myfeature-hero', + label: 'myfeature · hero', + description: 'one myfeature:: call with realistic request/response payloads.', + group: 'agent', // an existing ScenarioGroup + preferredMode: 'agent', + backend: myfeatureHero, +} +``` + +Verify: `pnpm storybook` -> open the scenario under **Playground** -> send any message -> confirm the custom card renders (not JSON-only) and the event log shows `fcall-start` / `fcall-end`. + +### 10.3 Coverage matrix + +| Renderer feature | Fixture gallery | Playground scenario | +|------------------|-----------------|---------------------| +| Success / done | per `function_id` | one `streamFcall` with success output | +| Pending approval | `pendingApproval: true` | `streamFcall({ pendingApproval: true, approvalWaitMs })` — see [`pending-approval.ts`](../web/src/stories/playground/scenarios/pending-approval.ts) | +| Running shimmer | `running: true` | shorten `waitMs` and watch mid-flight | +| Error / gate | error fixture | `output: { error: … }` — see [`error-on-fcall.ts`](../web/src/stories/playground/scenarios/error-on-fcall.ts) | + +> **Existing gap:** the fixtures cover all five families, but there is **no** dedicated per-family `*::*` Playground scenario yet (only generic ones like `multi-function-agent`, `pending-approval`, `error-on-fcall`). Adding a focused scenario when you touch a family doubles as the template for the next one. + +--- + +## 11. `FunctionCallMessage` body logic (reference) + +Custom renderers interact with these derived flags in [`FunctionCallMessage.tsx`](../web/src/components/chat/FunctionCallMessage.tsx): + +```tsx +const hasCustomTerminal = customTerminal != null +const showRequestPaneAbove = + !(pending && customPreview) && + !(running && hasCustomTerminal) && + !(!pending && !running && hasCustomTerminal) +``` + +Which resolve to: + +| Case | Request pane above | Running slot | Done body | +|------|--------------------|--------------|-----------| +| Pending + preview | hidden | — | — | +| Pending, no preview | shown | — | — | +| Running + custom | hidden | custom | — | +| Running, no custom | shown | response JSON | — | +| Done + custom | hidden | — | Tabs: custom + raw json | +| Done, no custom | shown | — | request + response JSON | + +Approve / deny / always-allow are props on `FunctionCallMessage` (wired by [`FunctionCallGroup.tsx`](../web/src/components/chat/FunctionCallGroup.tsx) to `approval::resolve`). Custom modules never implement approval themselves. + +--- + +## 12. Scale beyond hand-wiring + +Adding a sixth, seventh family to the `??` chains stays readable for a while, but eventually a registry is cleaner. This is **not implemented yet** — sketch only: + +``` +../web/src/components/chat/function-plugins/ + types.ts # FunctionCallRenderer interface + registry.ts # ordered list of plugins + index.ts # resolvePreview(message), resolveTerminal(message), resolveLabel(functionId) +``` + +```typescript +export interface FunctionCallRenderer { + id: string + isMatch: (functionId: string) => boolean + tryRender: (message: FunctionCallMessage) => React.ReactNode | null + tryRenderPreview?: (message: FunctionCallMessage) => React.ReactNode | null + FunctionIdLabel?: (props: { functionId: string }) => React.ReactNode + primaryTabLabel?: string // default "terminal" +} +``` + +FCM would collapse to `resolveTerminal(message)` / `resolvePreview(message)` over a registered list. Until that lands, follow the [minimal wiring](#9-wire-the-family-into-functioncallmessagetsx) in step 9. + +--- + +## 13. Shared utilities you can reuse + +| Utility | Location | Use for | +|---------|----------|---------| +| `unwrapEnvelope` | [`sandbox/parsers.ts`](../web/src/components/chat/sandbox/parsers.ts) | Harness `{ content, details, terminate }` | +| `parseSandboxErrorDisplay` / `SandboxErrorView` | [`sandbox/parsers.ts`](../web/src/components/chat/sandbox/parsers.ts) / [`sandbox/ErrorView.tsx`](../web/src/components/chat/sandbox/ErrorView.tsx) | The shared `function_error` / gate-denial + `SandboxErrorWire` shapes | +| `Chip`, `MetaRow`, `StatusPill`, `ActionLine` | [`sandbox/shared.tsx`](../web/src/components/chat/sandbox/shared.tsx) | Metadata rows | +| `Terminal`, `AnsiOutput` | [`sandbox/terminal/`](../web/src/components/chat/sandbox/terminal) | Exec / run-style output | +| `JsonHighlight` / `CodeHighlight` | [`../web/src/lib/syntax.tsx`](../web/src/lib/syntax.tsx) | JSON / code blocks | +| `wrapHarness` + `base()` | [`sandbox-fixtures.ts`](../web/src/stories/fixtures/sandbox-fixtures.ts) | Fixture envelopes | +| `makeBackend`, `streamFcall`, `streamThought`, `streamAssistant` | [`scenarios/helpers.ts`](../web/src/stories/playground/scenarios/helpers.ts) | Playground scenarios | +| UI primitives | [`../web/src/components/ui/`](../web/src/components/ui) | `Badge`, `Button`, `Tabs`, `Cell`, `EmptyState` | + +--- + +## 14. Backend / catalog alignment + +- Function ids must match what the engine registers (`::` separator). +- The composer's `@`-mention catalog ([`../web/src/lib/functions-catalog.ts`](../web/src/lib/functions-catalog.ts)) is **separate** from renderers — update it only if you also want composer discoverability. +- Agent events become `FunctionCallMessage`s in [`../web/src/lib/backend/translate.ts`](../web/src/lib/backend/translate.ts). Custom UI changes display only, never translation. + +--- + +## 15. Out of scope (by design) + +- Streaming partial stdout into a card (exec is buffered upstream). +- Interactive terminal / PTY (`xterm.js`). +- Full ANSI color parsing (stdout/stderr two-tone only). +- Persisting the terminal-vs-json tab choice across messages. +- Re-run / edit from the function card. + +--- + +## 16. Pre-merge checklist + +```bash +cd console/web +pnpm test -- src/components/chat/myfeature # parsers + error cases +pnpm typecheck +pnpm exec biome check --write \ + src/components/chat/myfeature \ + src/components/chat/FunctionCallMessage.tsx \ + src/components/chat/FunctionCallMessage.stories.tsx \ + src/stories/fixtures/myfeature-fixtures.ts \ + src/stories/playground/scenarios/myfeature-hero.ts \ + src/stories/playground/scenarios/index.ts +pnpm build +pnpm storybook # smoke: family gallery (toggle tabs) + your Playground scenario +``` + +- [ ] Allowlist of every `function_id` in `parsers.ts`. +- [ ] Zod schema per request/response shape, non-strict, wire source documented. +- [ ] One view per id; success + running + (optional) preview; `null` on parse failure. +- [ ] Errors detected before success parsing. +- [ ] Wired into all three FCM chains (label + preview + terminal). +- [ ] Fixture file + `FamilyGallery` story. +- [ ] At least one Playground scenario registered. +- [ ] `parsers.test.ts` covers raw + harness-wrapped + error payloads. + +--- + +## Quick reference: family file map + +| File | Role | +|------|------| +| `/index.tsx` | Dispatcher + `FunctionIdLabel` + exported `ToolView` | +| `/parsers.ts` | Zod schemas + envelope unwrap + allowlist | +| `/*View.tsx` | Per-tool UI | +| `/__tests__/parsers.test.ts` | Unit tests | +| `stories/fixtures/-fixtures.ts` | Fixture data (required) | +| [`components/chat/FunctionCallMessage.stories.tsx`](../web/src/components/chat/FunctionCallMessage.stories.tsx) | Registers the `FamilyGallery` story | +| [`stories/playground/scenarios/*.ts`](../web/src/stories/playground/scenarios) | Playground `ChatBackend` scenarios (required) | +| [`stories/playground/scenarios/index.ts`](../web/src/stories/playground/scenarios/index.ts) | Scenario registry | +| [`FunctionCallMessage.tsx`](../web/src/components/chat/FunctionCallMessage.tsx) | Host integration | +| [`PLAYGROUND.md`](../web/PLAYGROUND.md) | Streaming contract for scenarios | + +> An earlier, sandbox-only version of this guide lives at [`../web/docs/custom-function-components.md`](../web/docs/custom-function-components.md). This document is the current, multi-family source of truth. diff --git a/harness/README.md b/harness/README.md index 2337cf02..758e9cbb 100644 --- a/harness/README.md +++ b/harness/README.md @@ -1,68 +1,67 @@ -# harness - -Node/TypeScript port of the iii harness stack. One package, one folder per -worker, one feature per file. Each worker is independently runnable as -`pnpm dev:` (development) or `iii-` (production binary). - -The Rust workers `shell`, `iii-directory`, and the engine's `state::*`/ -`stream::*`/`iii::durable::*` primitives are NOT ported — they run -alongside `harness` over the iii bus. - -## Workers - -| Folder | Bus surface | Role | -|---|---|---| -| `src/harness/` | `ui::subscribe`/`unsubscribe`, `harness::fs::read_inline`, `policy::check_permissions`, `harness::provider::{register,resolve,list}` | Meta-worker; loads `iii-permissions.yaml`; spins up `ui::*` fanout pumps; owns the provider registry + the `harness` entry in the `configuration` worker (api keys, per-provider settings, permissions). | -| `src/approval-gate/` | `approval::resolve` | Persists operator decisions to scope `approvals` (turn-orchestrator reacts via `turn::on_approval`); default mode seeded from `harness` config `permissions.default_mode`. | -| `src/turn-orchestrator/` | `run::start`, `turn::{state}`, `turn::get_state` | Durable FSM driving each agent turn; `dispatchWithHook` approval chokepoint. | -| `src/session/` | `session-tree::*` (11 fns), `session-inbox::*` (3 fns) | Branching session storage + per-session inbox queues. | -| `src/llm-budget/` | `budget::*` (14 fns) | Workspace + agent LLM spend caps. | -| `src/hook-fanout/` | `hook-fanout::publish_collect` | Generic publish-and-collect over a stream topic. | -| `src/models-catalog/` | `models::list`, `models::get`, `models::supports`, `models::register` | Model catalog populated exclusively by provider discovery (`provider::::refresh_models` -> `models::register`); no embedded seed. | -| `src/provider-anthropic/` | `provider::anthropic::{stream,complete,refresh_models}` | Anthropic SSE → channel writer; self-declares to the harness registry; pulls `/v1/models` into the catalog. | -| `src/provider-openai/` | `provider::openai::{stream,complete,refresh_models}` | OpenAI SSE → channel writer; self-declares + pulls `/v1/models`. | -| `src/provider-kimi/` | `provider::kimi::{stream,complete,refresh_models}` | Kimi (Moonshot) SSE → channel writer; self-declares + pulls `/v1/models`. | -| `src/provider-lmstudio/` | `provider::lmstudio::{stream,complete,refresh_models,load_model,unload_model}` | LM Studio (localhost); self-declares + discovers loaded models. | -| `src/provider-llamacpp/` | `provider::llamacpp::{stream,complete,refresh_models}` | llama-server (localhost); self-declares + discovers the loaded model. | -| `src/context-compaction/` | (none — pure side-car on `agent::events`) | Optional out-of-band session-history compactor. | +# The iii harness + +**The harness is not a layer on top of your backend. On iii, it is the backend.** + +Many setups keep the agent loop in one process and everything else (queues, HTTP, state, traces) in another. Tool calls cross that boundary; retries and traces rarely line up. + +On iii, agents are workers. Tools are functions. Handoffs use the same triggers and queues as the rest of the system. + +This package is the production harness for that model: turn orchestration, approvals, sessions, providers, context compaction, and budgets, all as iii workers next to shell, storage, database, and whatever you add. + +Read [The Harness Is the Backend](https://www.linkedin.com/pulse/harness-backend-mike-piccolo-2aocf/) by Mike Piccolo for the full argument. + +--- + +## What you get + +**One trace.** Each hop is a `trigger()` on the bus. Trace IDs propagate across workers, languages, and queue steps. You debug one runtime, not separate logs aligned by timestamp. + +**Live discovery.** Workers register functions on connect; the engine keeps a catalog. Agents and the console see what the system can do today, including workers added without redeploying the orchestrator. Providers self-register; the model catalog fills from discovery, not a hardcoded seed. + +**Composition, not frameworks.** Thin vs thick harnesses map to how many functions you register and how you wire triggers. Fewer functions for a lean loop; approval rules and extra workers for more structure. + +**New capability, new worker.** When the harness needs something else (shell, database, coder, another provider), you add a worker, not a fork of the orchestrator. Published workers install from the [iii worker registry](https://workers.iii.dev) with `iii worker add `; they register on the iii engine and show up in the live catalog. + +**Turns, approvals, budgets.** Seven-state durable turn FSM with queue-backed steps. Approval gate with YAML permissions, parallel tool batches, pending state across reload, fail-closed when policy is unreachable. Workspace and agent budget caps. Five provider workers behind one registry. + +**Context compaction.** Long sessions exceed model windows. The `context-compaction` worker compacts history as turns accumulate and backs the console `/compact` command. + +--- + +## What ships here + +Fifteen workers in one TypeScript package, one folder per worker, one feature per file: + +| Concern | Workers | +| --- | --- | +| Orchestration | `turn-orchestrator` (durable turn FSM), `hook-fanout` | +| Governance | `harness` (permissions, provider registry, UI fanout), `approval-gate` | +| Sessions | `session` (branching session tree + inbox queues) | +| Context | `context-compaction` (keeps long sessions inside the model window) | +| Models | `models-catalog`, `provider-anthropic`, `provider-openai`, `provider-kimi`, `provider-lmstudio`, `provider-llamacpp` | +| Cost | `llm-budget` | + +Rust workers (`shell`, `iii-directory`) and engine builtins (`state::*`, `stream::*`, `iii::durable::*`) stay on the same bus; this package does not reimplement them. + +--- ## Quickstart -```bash -pnpm install -pnpm build # compile to dist/ -# In separate terminals (or via your process manager): -node dist/harness/main.js --url ws://127.0.0.1:49134 --config ./config.yaml -node dist/turn-orchestrator/main.js --url ws://127.0.0.1:49134 --config ./config.yaml -node dist/approval-gate/main.js --url ws://127.0.0.1:49134 --config ./config.yaml -node dist/session/main.js --url ws://127.0.0.1:49134 --config ./config.yaml -node dist/hook-fanout/main.js --url ws://127.0.0.1:49134 -node dist/models-catalog/main.js --url ws://127.0.0.1:49134 -node dist/provider-anthropic/main.js --url ws://127.0.0.1:49134 --config ./config.yaml -node dist/provider-openai/main.js --url ws://127.0.0.1:49134 --config ./config.yaml -node dist/provider-kimi/main.js --url ws://127.0.0.1:49134 --config ./config.yaml -node dist/provider-lmstudio/main.js --url ws://127.0.0.1:49134 --config ./config.yaml -node dist/provider-llamacpp/main.js --url ws://127.0.0.1:49134 --config ./config.yaml -node dist/llm-budget/main.js --url ws://127.0.0.1:49134 -# Optional side-car: -node dist/context-compaction/main.js --url ws://127.0.0.1:49134 -``` - -For development, replace `node dist//main.js` with `pnpm dev:`. - -## Configuration - -All workers honour `--url` / `III_URL` for the engine WebSocket and -`--config` for the YAML config file (default `./config.yaml`). - -The harness worker watches `iii-permissions.yaml` (default -`./iii-permissions.yaml`) and reloads it on change. The shipped default -file at the workspace root is symlinked into this folder. - -## Layout - -- `docs/` — architecture documentation: [`docs/architecture.md`](docs/architecture.md) is the system overview; one file per worker lives under [`docs/workers/`](docs/workers/). -- `src/types/` — wire types (mirrors `harness/crates/harness-types`). -- `src/runtime/` — cross-worker SDK helpers (worker bootstrap, state/stream wrappers, OTel stub). -- `src//` — one folder per worker. Each `register.ts` composes the worker's bus surface from per-feature files; each `main.ts` is the binary entry-point. -- `tests/` — vitest suites per worker. +1. Install iii: `curl -fsSL https://install.iii.dev/iii/main/install.sh | sh` +2. Verify the install: `iii --version` +3. Add the harness and console workers: `iii worker add harness console` +4. Start the engine: `iii --config config.yaml` +5. Open the [console](https://workers.iii.dev/workers/console) at `http://127.0.0.1:3113` + +Chat, approve/deny, model picker, and trace explorer ship in one binary ([console](https://workers.iii.dev/workers/console)). + +Tools, orchestration, governance, and observability use the same worker, trigger, and function model as the rest of iii. + +--- + +## Further reading + +- [The Harness Is the Backend](https://www.linkedin.com/pulse/harness-backend-mike-piccolo-2aocf/) +- [console worker](https://workers.iii.dev/workers/console) +- [iii worker registry](https://workers.iii.dev) +- [iii engine](https://github.com/iii-hq/iii) diff --git a/iii-directory/Cargo.lock b/iii-directory/Cargo.lock index 902c1889..83abad8f 100644 --- a/iii-directory/Cargo.lock +++ b/iii-directory/Cargo.lock @@ -1065,7 +1065,7 @@ dependencies = [ [[package]] name = "iii-directory" -version = "0.7.2" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", diff --git a/iii-directory/skill.md b/iii-directory/skill.md deleted file mode 100644 index 74ab3697..00000000 --- a/iii-directory/skill.md +++ /dev/null @@ -1,11 +0,0 @@ -# iii-directory - -Engine introspection, workers registry proxy, and filesystem-backed skill + prompt reader for the iii engine. Every public function sits under a single `directory::*` namespace, split into four MCP-agnostic surfaces (`skills`, `prompts`, `engine`, and `registry`), so callers learn one envelope across local files, the running engine, and the public workers registry. - -Skills and prompts are sourced from a single configured folder on disk (`skills_folder`). The only write path is `directory::skills::download`, which pulls markdown into `skills_folder` from either the workers registry or a GitHub repo. `directory::skills::list` returns one row per markdown file with `title` (preferring the YAML frontmatter `title:` over the body H1) and `type` lifted from frontmatter. `directory::skills::get` accepts a bare id, a `.md` file-path form, or the legacy `iii://` URI. `SKILLS.md` is aliased to `index.md` at scan time so the new convention round-trips through both filesystem and parser. `directory::skills::index` renders a short per-worker overview that emits both relative file-path pointers (`Read [/index.md](/index.md)`) and the legacy `iii:///index` form side by side for back-compat. - -`directory::skills::download` pulls bundles from the public workers registry (`api.workers.iii.dev` by default). For self-hosted setups, repoint `registry_url` in `config.yaml` at your own registry. The `directory::registry::*` proxies share their envelope with `engine::workers::*`, so a single parser handles both local and remote worker discovery. - -```bash -iii worker add iii-directory -``` diff --git a/iii-directory/src/sources/registry.rs b/iii-directory/src/sources/registry.rs index 6b8e6f89..93aaf443 100644 --- a/iii-directory/src/sources/registry.rs +++ b/iii-directory/src/sources/registry.rs @@ -212,20 +212,34 @@ pub async fn download_typed( /// materialised under `//`, so strip a single leading /// `skills/` segment — files land at `/exec.md`, `/SKILL.md` /// rather than nesting a second `skills/` folder. A bundle-root file like -/// `index.md` (no prefix) is returned unchanged. +/// `SKILL.md` or `index.md` (no prefix) is returned unchanged. fn strip_leading_skills_segment(path: &str) -> &str { path.strip_prefix("skills/").unwrap_or(path) } +/// Stale registry snapshots may ship both legacy `index.md` and canonical +/// `SKILL.md` overviews. Prefer `SKILL.md` on disk and drop redundant +/// `index.md` entries before writing. +fn dedupe_stale_overview(skills: &mut Vec) { + let has_skill_md = skills + .iter() + .any(|s| strip_leading_skills_segment(&s.path) == "SKILL.md"); + if has_skill_md { + skills.retain(|s| strip_leading_skills_segment(&s.path) != "index.md"); + } +} + fn write_response( worker: &str, - response: WorkerSkillsResponse, + mut response: WorkerSkillsResponse, skills_folder: &Path, ) -> Result { let dest_root = skills_folder.join(worker); std::fs::create_dir_all(&dest_root) .map_err(|e| format!("create_dir_all {}: {e}", dest_root.display()))?; + dedupe_stale_overview(&mut response.skills); + let mut result = DownloadResult::new(worker); for skill in response.skills { @@ -395,6 +409,7 @@ mod tests { ); // Bundle-root files and non-prefixed paths are untouched. assert_eq!(strip_leading_skills_segment("index.md"), "index.md"); + assert_eq!(strip_leading_skills_segment("SKILL.md"), "SKILL.md"); assert_eq!(strip_leading_skills_segment("a/b.md"), "a/b.md"); } @@ -404,25 +419,46 @@ mod tests { let response = WorkerSkillsResponse { name: Some("iii".into()), version: None, + skills: vec![SkillEntry { + path: "skills/SKILL.md".into(), + content: "# skill\n".into(), + }], + prompts: vec![], + }; + let result = write_response("iii", response, tmp.path()).unwrap(); + // Lands at iii/SKILL.md, NOT iii/skills/SKILL.md. + assert!(tmp.path().join("iii/SKILL.md").is_file()); + assert!(!tmp.path().join("iii/skills/SKILL.md").exists()); + assert!(!tmp.path().join("iii/index.md").exists()); + assert_eq!(result.skills_written, vec!["SKILL.md"]); + } + + #[test] + fn write_response_dedupes_stale_index_when_skill_md_present() { + let tmp = tempfile::tempdir().unwrap(); + let response = WorkerSkillsResponse { + name: Some("iii-directory".into()), + version: None, skills: vec![ SkillEntry { path: "index.md".into(), - content: "# iii\n".into(), + content: "# stale index\n".into(), }, SkillEntry { - path: "skills/SKILL.md".into(), - content: "# skill\n".into(), + path: "SKILL.md".into(), + content: "# canonical\n".into(), }, ], prompts: vec![], }; - let result = write_response("iii", response, tmp.path()).unwrap(); - // Lands at iii/SKILL.md, NOT iii/skills/SKILL.md. - assert!(tmp.path().join("iii/SKILL.md").is_file()); - assert!(!tmp.path().join("iii/skills/SKILL.md").exists()); - assert!(tmp.path().join("iii/index.md").is_file()); - assert!(result.skills_written.contains(&"SKILL.md".to_string())); - assert!(result.skills_written.contains(&"index.md".to_string())); + let result = write_response("iii-directory", response, tmp.path()).unwrap(); + assert!(tmp.path().join("iii-directory/SKILL.md").is_file()); + assert!(!tmp.path().join("iii-directory/index.md").exists()); + assert_eq!( + std::fs::read_to_string(tmp.path().join("iii-directory/SKILL.md")).unwrap(), + "# canonical\n" + ); + assert_eq!(result.skills_written, vec!["SKILL.md"]); } #[test]