diff --git a/apps/docs/content/docs/cn/code/api-contract.mdx b/apps/docs/content/docs/cn/code/api-contract.mdx index 0bc1cea..4787552 100644 --- a/apps/docs/content/docs/cn/code/api-contract.mdx +++ b/apps/docs/content/docs/cn/code/api-contract.mdx @@ -164,6 +164,8 @@ iteration。 ## Direct Tools +> 完整指南:[工具](/cn/docs/code/tools)。 + 已验证的宿主侧直接工具调用: ```ts @@ -230,6 +232,8 @@ tool。`program` 不会出现在它自己的默认 tool set 里。 ## Verification +> 完整指南:[验证](/cn/docs/code/verification)。 + 验证信息是 session 级能力: ```ts @@ -247,6 +251,8 @@ console.log(formatVerificationSummary(session.verificationSummary())); ## Memory +> 完整指南:[记忆](/cn/docs/code/memory)。 + Node memory 已用 `FileMemoryStore` 验证: ```ts @@ -267,6 +273,8 @@ await session.recallByTags(['grep'], 10); ## Skills +> 完整指南:[Skills](/cn/docs/code/skills)。 + 文件型和 inline skills 已通过 `search_skills` 验证: ```ts @@ -290,6 +298,8 @@ skill-file 检查使用带 YAML frontmatter 的 Markdown,并覆盖了 ## Side Questions +> 完整指南:[会话](/cn/docs/code/sessions)。 + `btw()` 提出一次只读临时问题,并返回独立结果: ```ts @@ -301,6 +311,8 @@ console.log(side.totalTokens); ## Runs And Cancellation +> 完整指南:[会话](/cn/docs/code/sessions)。 + 每次 `send()` 或 `stream()` 都会记录可回放的 run state: ```ts @@ -325,6 +337,8 @@ console.log(session.traceEvents()); ## Persistence +> 完整指南:[持久化](/cn/docs/code/persistence)与[会话](/cn/docs/code/sessions)。 + 文件型 session persistence 已验证稳定 `sessionId`、`autoSave`、显式 `save()` 和 `resumeSession()`: @@ -357,6 +371,8 @@ await agent.close(); // 关闭所有活 session + 断开全 ## Delegation +> 完整指南:[任务](/cn/docs/code/tasks)与[编排](/cn/docs/code/orchestration)。 + 已验证核心委派工具的直接 helper: ```ts @@ -377,6 +393,8 @@ await session.tasks([ ## Hooks +> 完整指南:[Hooks](/cn/docs/code/hooks)。 + 已验证的 hook 管理面: ```ts @@ -396,6 +414,8 @@ session.unregisterHook('docs-observer'); ## Slash Commands +> 完整指南:[命令](/cn/docs/code/commands)。 + 自定义 slash command 通过 `session.send()` 触发: ```ts @@ -410,6 +430,8 @@ console.log(result.text); ## Lane Queue +> 完整指南:[Lane 队列](/cn/docs/code/lane-queue)。 + Queue infrastructure 是显式 opt-in: ```ts @@ -430,6 +452,8 @@ await queued.deadLetters(); ## MCP +> 完整指南:[MCP](/cn/docs/code/mcp)。闲置断开见[集群扩展点](/cn/docs/code/cluster-extension-points)。 + 集成检查覆盖一个真实 stdio MCP server: ```ts @@ -465,7 +489,9 @@ new UnixSocketTransport('/tmp/a3s.sock').kind; // 'unix_socket' ## 集群级扩展点 -这些契约让集群控制面(例如 书安OS)在**不 fork 框架**的前提下接入多租户、成本管控和容错运行。框架定义"决策点"和"结构化事件",**策略实现由 host 提供**。 +> 完整指南:[集群扩展点](/cn/docs/code/cluster-extension-points)(身份标签、预算守卫、集群事件、确定性 ID/回放、loop checkpoint、保留上限)。 + +这些契约让集群控制面在**不 fork 框架**的前提下接入多租户、成本管控和容错运行。框架定义"决策点"和"结构化事件",**策略实现由 host 提供**。 ### 身份标签 diff --git a/apps/docs/content/docs/cn/code/cluster-extension-points.mdx b/apps/docs/content/docs/cn/code/cluster-extension-points.mdx new file mode 100644 index 0000000..e189ccc --- /dev/null +++ b/apps/docs/content/docs/cn/code/cluster-extension-points.mdx @@ -0,0 +1,188 @@ +--- +title: "集群扩展点" +description: "集群宿主用来在多节点上运行长时会话、且无需分叉框架的接缝。" +--- + +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +# 集群扩展点 + +集群宿主平台在多个节点上运行长时运行的智能体会话。框架本身并不附带调度器或放置引擎,而是暴露一小组接缝:它定义决策点、发出结构化事件,并把策略交给宿主来提供。下文的所有内容都是你从框架外部接入的——你永远不需要分叉框架。 + +本页会明确区分哪些接缝在两个 SDK 中都可用(以 Node.js + Python 代码展示),哪些是当前在 Rust 核心中配置的(以散文描述,SDK 接入随后跟进)。 + +## 身份标签 + +每个会话都可以携带四个不透明的身份标签。框架从不解释它们——它会把它们传播到钩子、追踪和 `SessionData`,并在恢复时还原它们。宿主正是借此把一个会话归属到租户、主体、智能体模板以及更广的关联链。 + +请将身份标签与 `sessionStore` / `session_store` 搭配使用,使标签在进程重启后依然保留。恢复时,**由调用方提供的选项优先生效**,因此你可以在节点之间迁移会话时为其重新打标签。 + + + + +```ts +const session = agent.session('/path/to/project', { + tenantId: 'acme-corp', + principal: 'user:42', + agentTemplateId: 'reviewer-v3', + correlationId: 'req-9f2c', +}); + +// Getters return string | null +console.log(session.tenantId); // 'acme-corp' +console.log(session.principal); // 'user:42' +console.log(session.agentTemplateId); // 'reviewer-v3' +console.log(session.correlationId); // 'req-9f2c' +``` + + + + +```python +opts = SessionOptions() +opts.tenant_id = 'acme-corp' +opts.principal = 'user:42' +opts.agent_template_id = 'reviewer-v3' +opts.correlation_id = 'req-9f2c' +session = agent.session('/path/to/project', opts) + +# Getters are methods, return str | None +print(session.tenant_id()) # 'acme-corp' +print(session.principal()) # 'user:42' +print(session.agent_template_id()) # 'reviewer-v3' +print(session.correlation_id()) # 'req-9f2c' +``` + + + + +## 预算 / 成本守卫 + +预算守卫让宿主针对成本或令牌预算对每一次 LLM 调用进行把关。框架会在每次 LLM 请求*之前*调用你的守卫,并在请求返回*之后*再次调用。守卫是你自己拥有的策略;框架只负责执行你返回的决策。 + + + + +```ts +session.setBudgetGuard({ + checkBeforeLlm(sessionId, estimatedTokens) { + if (overLimit(sessionId, estimatedTokens)) { + return { decision: 'deny', resource: 'tokens', reason: 'monthly cap reached' }; + } + return { decision: 'allow' }; + }, + recordAfterLlm(sessionId, usage) { + meter(sessionId, usage); + }, +}); + +// Clear the guard +session.setBudgetGuard(null); +``` + +守卫回调**绝不能抛出异常**——抛出的错误会被视为放行(Allow)。 + + + + +```python +class MyGuard: + def check_before_llm(self, session_id, estimated_tokens): + if over_limit(session_id, estimated_tokens): + return {'decision': 'deny', 'resource': 'tokens', 'reason': 'monthly cap reached'} + return {'decision': 'allow'} + + def record_after_llm(self, session_id, usage): + meter(session_id, usage) + +opts = SessionOptions() +opts.budget_guard = MyGuard() +session = agent.session('/path/to/project', opts) + +# To clear: set opts.budget_guard = None and re-create the session. +``` + + + + +两个 SDK 的决策结构完全一致: + +| 返回值 | 效果 | +|---|---| +| `None` / `null` / `{ decision: 'allow' }` | 继续执行 LLM 调用。 | +| `{ decision: 'soft', resource, consumed, limit, message? }` | 发出 `BudgetThresholdHit`(kind 为 `soft`)并继续执行。 | +| `{ decision: 'deny', resource, reason }` | 中止 LLM 调用。Python 抛出 `RuntimeError("Budget exhausted...")`;Node 以 `"Budget exhausted..."` 拒绝(reject)。 | + +这种健壮性是刻意为之的:**缺失的守卫方法**会被当作宽松默认值处理,而**回调出错则回退为放行(Allow)**。行为异常的守卫永远无法中止一个活动会话——只有显式的 `deny` 才能做到。 + +## 集群事件词汇 + +宿主通过其钩子执行器,将集群级别的决策作为结构化的 `AgentEvent` 变体发出。会话内的钩子以统一方式订阅它们——与它们观察其他任何事件的方式相同——因此在宿主处编写的策略会原样呈现给智能体自身的钩子,无需特殊处理。 + +集群词汇如下: + +- **`BudgetThresholdHit { resource, kind, consumed, limit, message? }`** —— 预算守卫返回了 `soft` 决策(或宿主越过了它自己跟踪的某个阈值)。`kind` 用于区分软性警告与更硬性的限制。 +- **`PassivationRequested { reason, deadline_ms? }`** —— 宿主请求会话进入一个安全、可持久化的状态,以便将其从当前节点驱逐。`deadline_ms` 若存在,则表示强制驱逐前的宽限窗口。 +- **`PeerInvocation { from_session_id, from_tenant_id?, correlation_id? }`** —— 另一个会话调用了本会话。这些标签让接收方能够把调用归属回其源租户和关联链。 + +这些事件通过你的会话内钩子已经在使用的、经过验证的同一套钩子 API 来观察——Node 中为 `session.registerHook`,Python 中为 `session.register_hook`(参见[钩子](/cn/docs/code/hooks))。请将上述三个变体视为已记录在案的契约;宿主负责通过其钩子执行器发出它们。 + +## 确定性 ID 与时间(重放) + +希望在另一节点上对某次运行进行**逐位一致重放**的集群,必须消除常规运行中两处不确定性的来源:随机 ID 和挂钟时间。Rust 核心将二者建模在一个 `HostEnv { id_generator, clock }` 之后。默认实现把 UUID 生成器与系统时钟配对;重放工具会换入 `SequentialIdGenerator` 和 `FixedClock`,使得对相同输入的重新执行在任意节点上都产生相同的 ID 和时间戳,从而产生相同的输出。 + +这是当前**在 Rust 核心中配置的**。它尚未暴露在 JS/Python 选项面上,因此没有对应的 Node/Python 代码——SDK 接入可能随后跟进。 + +## 循环检查点与运行恢复 + +配置了 `sessionStore` / `session_store` 后,智能体循环会在**每一轮工具调用完成之后**持久化一个检查点,以运行 id 作为键。任何共享同一存储的节点都可以重新水合该运行并继续它。 + + + + +```ts +import { FileSessionStore } from '@a3s-lab/code'; + +const session = agent.session(workspace, { + sessionStore: new FileSessionStore('./.a3s/sessions'), + sessionId: 'session-from-node-a', +}); + +const result = await session.resumeRun('run-id-from-node-a'); +``` + + + + +```python +from a3s_code import FileSessionStore + +opts = SessionOptions() +opts.session_store = FileSessionStore('./.a3s/sessions') +opts.session_id = 'session-from-node-a' +session = agent.session(workspace, opts) + +result = session.resume_run('run-id-from-node-a') +``` + + + + +系统会为恢复的工作分配一个**新的运行 id**——存储中的原始运行保持不变。有两条错误路径值得处理: + +- **`resume_run requires a session_store`** —— 未配置存储;回退到一个全新会话。 +- **`no loop checkpoint found for run 'X'`** —— 该运行从未到达其第一个检查点,或已被清理;稍后重试,或将该运行视为丢失。 + +由于检查点只在**工具轮次之间、绝不在工具执行中途**生成,恢复的运行永远不会重放一个执行到一半的工具。存储细节参见[持久化](/cn/docs/code/persistence)。 + +## 长时运行会话的保留上限 + +运行数小时或数天的会话会在四个内存存储中累积状态:运行记录、每次运行的事件缓冲区、追踪事件,以及终态子智能体任务快照。若不加限制,它们会随会话寿命增长——对短寿命会话无妨,对长寿命会话则是真实的泄漏。 + +`SessionRetentionLimits` 为这四个存储分别设置上限。每个上限都是可选的:`None` 表示无上限的默认值。驱逐采用严格的 **FIFO**,并且**正在运行的子智能体任务永不被丢弃**——只有终态(已完成/已失败)快照会被驱逐。 + +这是当前通过 Rust 核心的 `SessionRetentionLimits` 配置的;SDK 形态将在后续跟进落地,因此目前没有对应的 Node/Python 代码。关于已经存在于 SDK 面上的每会话资源上限,参见[限制](/cn/docs/code/limits)。 + +--- + +**另见:** [多机部署](/cn/docs/code/multi-machine) · [持久化](/cn/docs/code/persistence) · [限制](/cn/docs/code/limits) · [钩子](/cn/docs/code/hooks) diff --git a/apps/docs/content/docs/cn/code/examples/ahp-safety.mdx b/apps/docs/content/docs/cn/code/examples/ahp-safety.mdx index f3bef50..bb3bbcc 100644 --- a/apps/docs/content/docs/cn/code/examples/ahp-safety.mdx +++ b/apps/docs/content/docs/cn/code/examples/ahp-safety.mdx @@ -1,17 +1,81 @@ --- -title: "AHP 安全" -description: "为 session 连接外部驾驭层" +title: "AHP 传输与安全" +description: "通过 AHP 传输将会话连接到外部宿主,并使用安全提供器与权限策略对其行为进行管控。" --- -# AHP 安全 +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +# AHP 传输与安全 + +A3S Code 会话可以通过可插拔的传输层(AHP,即 Agent Host Protocol,智能体宿主协议)连接到外部宿主。无论由哪种传输承载消息,安全层都是独立的:**安全提供器**会审查工具调用,而**权限策略**决定哪些操作可以在无人参与的情况下执行。当你需要一个由远程宿主驱动、但仍强制执行安全默认姿态的会话时,请参考本页。 + +传输层与安全层互相正交。你可以把 `HttpTransport` 换成 WebSocket 或 Unix 套接字传输,而无需改动权限规则,反之亦然。 + + + ```ts -import { Agent, HttpTransport } from '@a3s-lab/code'; +import { Agent, HttpTransport, DefaultSecurityProvider } from '@a3s-lab/code'; const agent = await Agent.create('agent.acl'); + const session = agent.session('/repo', { + // AHP transport: how the session reaches its external host. ahpTransport: new HttpTransport('http://localhost:8080/ahp', process.env.AHP_TOKEN), + // Safety: the security provider vets each tool call; the permission + // policy decides what runs unattended. These apply regardless of transport. + securityProvider: new DefaultSecurityProvider(), + permissionPolicy: { defaultDecision: 'ask' }, }); + +const result = await session.run('Audit the repo for hardcoded secrets.'); +console.log(result); + +await session.close(); +``` + +其他传输遵循相同的写法——`WebSocketTransport`、`UnixSocketTransport` 以及进程内的 `StdioTransport` 在这里都可以互换。将 `defaultDecision` 设为 `'deny'` 可获得更严格、完全受控的姿态;仅在受信任的沙箱环境中才使用 `'allow'`。 + + + + +```python +import os + +from a3s_code import ( + Agent, + SessionOptions, + HttpTransport, + DefaultSecurityProvider, + PermissionPolicy, +) + +agent = Agent.create(open("agent.acl").read()) + +opts = SessionOptions() +# AHP transport: how the session reaches its external host. +opts.transport = HttpTransport("http://localhost:8080/ahp", os.environ["AHP_TOKEN"]) +# Safety: the security provider vets each tool call; the permission +# policy decides what runs unattended. These apply regardless of transport. +opts.security_provider = DefaultSecurityProvider() +opts.permission_policy = PermissionPolicy(default_decision="ask") + +session = agent.session("/repo", opts) + +result = session.run("Audit the repo for hardcoded secrets.") +print(result) + +session.close() ``` -这个示例展示 session option 形状和 transport constructor。依赖策略、上下文注入、idle、审计或监督行为前,应先测试你的 live AHP server。 +将 `default_decision` 设为 `"deny"` 可获得更严格、完全受控的姿态;仅在受信任的沙箱环境中才使用 `"allow"`。传输层与安全层互相独立。 + + + + +## 说明 + +- **AHP 是传输层,而非策略。** 选择 HTTP、WebSocket 还是 Unix 套接字,改变的是会话*如何*抵达其宿主,而不改变智能体*被允许做什么*。安全完全由安全提供器和权限策略掌控。 +- **`DefaultSecurityProvider`** 提供了合理的基线。当你需要强制执行组织特定的规则(路径白名单、命令审查、脱敏)时,请提供你自己的提供器。 +- **权限策略**(`defaultDecision` / `default_decision`)是最重要的单个安全开关。交互式使用时优先选择 `'ask'`,无人值守运行时除非工作区已沙箱化,否则优先选择 `'deny'`。 +- 这个示例验证会话选项的形状和传输构造函数。在依赖策略、上下文注入、idle、审计或监督行为之前,应先测试你的实时 AHP 服务器。 diff --git a/apps/docs/content/docs/cn/code/examples/auto-compact.mdx b/apps/docs/content/docs/cn/code/examples/auto-compact.mdx index 0706bd8..7892063 100644 --- a/apps/docs/content/docs/cn/code/examples/auto-compact.mdx +++ b/apps/docs/content/docs/cn/code/examples/auto-compact.mdx @@ -1,17 +1,81 @@ --- title: "自动压缩" -description: "让长 session 保持在上下文预算内" +description: "让 runtime 自动将长 session 保持在上下文预算内" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # 自动压缩 +A3S Code 可以替你将长对话保持在模型的上下文预算内。启用 `autoCompact` 后, +runtime 会持续监测上下文用量;一旦超过 `autoCompactThreshold`,较早的轮次就会被 +压缩成一份持续更新的摘要,让 agent 在大量步骤中始终保持连贯,而无需你手动管理 +token。续写则处理另一个方向:当单条回复因长度被截断时,runtime 会自动继续生成, +拼出完整回复。 + + + + ```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); + const session = agent.session('/repo', { + // Compact older turns once context fills past the threshold. autoCompact: true, autoCompactThreshold: 0.75, + // Auto-continue a single response that the model truncates by length. continuationEnabled: true, maxContinuationTurns: 3, }); + +// Run a long, multi-step task. The runtime compacts older turns as needed; +// you never touch the token math. +for (let i = 0; i < 50; i++) { + await session.send(`Step ${i}: continue refactoring the parser`); +} + +// Inspect what the session is currently carrying. +console.log('history turns:', session.history().length); +console.log('recent memory:', await session.memoryRecent(5)); + +await session.close(); ``` -长 session 需要让 runtime 管理 context pressure 时,可以启用自动压缩。 + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open("agent.acl").read()) + +opts = SessionOptions() +# Compact older turns once context fills past the threshold. +opts.auto_compact = True +opts.auto_compact_threshold = 0.75 +# Auto-continue a single response that the model truncates by length. +opts.continuation_enabled = True +opts.max_continuation_turns = 3 +session = agent.session("/repo", opts) + +# Run a long, multi-step task. The runtime compacts older turns as needed; +# you never touch the token math. +for i in range(50): + session.send(f"Step {i}: continue refactoring the parser") + +# Inspect what the session is currently carrying. +print("history turns:", len(session.history())) +print("recent memory:", session.memory_recent(5)) + +session.close() +``` + + + + +长 session 若希望由 runtime 替你管理 context pressure,就使用自动压缩。 +`autoCompactThreshold` / `auto_compact_threshold` 是触发压缩的上下文窗口占比 +(0.0–1.0,默认 0.8);调低它可以更早触发压缩。用 `history()`(同步)和 +`memoryRecent` / `memory_recent` 查看实时 session。 diff --git a/apps/docs/content/docs/cn/code/examples/batch.mdx b/apps/docs/content/docs/cn/code/examples/batch.mdx index a32f77e..831a162 100644 --- a/apps/docs/content/docs/cn/code/examples/batch.mdx +++ b/apps/docs/content/docs/cn/code/examples/batch.mdx @@ -1,15 +1,79 @@ --- title: "Batch" -description: "在合适场景组合确定性操作" +description: "通过组合 SDK 的确定性辅助方法来批量执行确定性操作" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Batch -`batch` 是按意图启用的分组操作工具。宿主工作流或某个 agent 回合已经有一组清晰、相互独立、确定性的步骤时,可以使用它。 +SDK 中并没有 `batch()` 方法。当宿主工作流或某个 agent 回合已经有一组清晰、相互独立、 +确定性的步骤时,你可以直接用会话提供的确定性辅助方法(`readFile`、`grep`、`glob`、`ls`、 +`git`)把它们组合起来,并自行汇总结果。这样"批处理"就完全在你的掌控之中:无需任何模型调用, +执行顺序明确,且除非你主动调用,否则不会执行任何破坏性操作。 + +下面的示例读取包元数据、changelog 和 release script,然后在不编辑任何文件的前提下报告版本不一致之处。 + + + + +```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session('/path/to/project'); + +// Node helpers are async — group independent reads with Promise.all. +const [pkg, changelog, releaseScript] = await Promise.all([ + session.readFile('package.json'), + session.readFile('CHANGELOG.md'), + session.readFile('scripts/release.sh'), +]); + +const pkgVersion = JSON.parse(pkg).version; +const mismatches = []; +if (!changelog.includes(pkgVersion)) mismatches.push(`CHANGELOG.md is missing ${pkgVersion}`); +if (!releaseScript.includes(pkgVersion)) mismatches.push(`release.sh is missing ${pkgVersion}`); + +console.log(mismatches.length ? mismatches.join('\n') : 'All files agree on the version.'); + +await session.close(); +``` + + + + +```python +import json +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open("agent.acl").read()) +session = agent.session("/path/to/project", SessionOptions()) + +# Python helpers are synchronous — call them in sequence, no await. +pkg = session.read_file("package.json") +changelog = session.read_file("CHANGELOG.md") +release_script = session.read_file("scripts/release.sh") -```text -运行一个 batch,读取包元数据、changelog 和 release script, -然后在不编辑文件的前提下总结不一致之处。 +pkg_version = json.loads(pkg)["version"] +mismatches = [] +if pkg_version not in changelog: + mismatches.append(f"CHANGELOG.md is missing {pkg_version}") +if pkg_version not in release_script: + mismatches.append(f"release.sh is missing {pkg_version}") + +print("\n".join(mismatches) if mismatches else "All files agree on the version.") + +session.close() ``` -不要把破坏性操作放入 batch。任何破坏性宿主工作流都应放在显式应用确认或自动化关卡后面。 + + + +不要把破坏性操作混入这些分组读取之中。任何破坏性宿主工作流(写入、`git` 提交、`bash`) +都应放在显式的应用确认或你的自动化关卡之后,并通过 `permissionPolicy` / +`permission_policy` 进行约束,避免出现意料之外的步骤被静默执行。 + +如果这些步骤并非相互独立——每一步都依赖上一步的结果,并且你希望由 agent 来驱动它们—— +请改用 [`session.pipeline(...)`](/cn/docs/code/examples/orchestration),它会分阶段运行任务, +每个阶段都能接收到上一阶段的输出。 diff --git a/apps/docs/content/docs/cn/code/examples/direct-tools.mdx b/apps/docs/content/docs/cn/code/examples/direct-tools.mdx index 7960b7b..7983aae 100644 --- a/apps/docs/content/docs/cn/code/examples/direct-tools.mdx +++ b/apps/docs/content/docs/cn/code/examples/direct-tools.mdx @@ -1,24 +1,46 @@ --- title: "直接工具" -description: "不经过 LLM 回合运行确定性工具" +description: "不消耗 LLM 回合即可运行确定性宿主工具" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # 直接工具 +`session.tool(name, args)`(以及 `glob`、`grep`、`readFile` 等类型化辅助方法)会直接运行宿主工具,循环中不发起任何模型调用。它们适用于测试、迁移以及宿主驱动的工作流——你需要确定性结果,而不是一次智能体回合。对于无人值守的调用,请将权限策略的 `defaultDecision` 设为 `allow`,这样工具就不会因等待确认提示而阻塞。 + + + + ```ts -const files = await session.glob('content/docs/**/*.mdx'); -const matches = await session.grep('default_model|providers "openai"|baseUrl'); +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session('.', { + permissionPolicy: { defaultDecision: 'allow' }, +}); + +// Glob: list files by pattern +const files = await session.glob('**/*.ts'); +console.log(`glob found ${files.length} TypeScript files`); + +// Grep: search file contents +const matches = await session.grep('Agent.create'); +console.log(`grep found ${matches.length} matches`); + +// Read a file const readme = await session.readFile('README.md'); -const tests = await session.bash('npm run build'); -const raw = await session.tool('read', { file_path: 'README.md' }); +console.log(`README is ${readme.length} bytes`); + +// Direct tool call by name +const raw = await session.tool('read', { file_path: 'package.json' }); +console.log(`package.json via tool(): ${String(raw).length} bytes`); + +// Inspect available tool schemas const schemas = session.toolDefinitions(); -const program = await session.program({ - source: 'export default async function run(ctx, inputs) { return ctx.grep(inputs.q); }', - inputs: { q: 'planningMode' }, - allowedTools: ['grep'], -}); +console.log(`session exposes ${schemas.length} tools`); -// 结构化输出:生成符合 schema 校验的 JSON 对象 +// Structured output: generate a schema-validated JSON object const structured = await session.tool('generate_object', { schema: { type: 'object', @@ -28,9 +50,66 @@ const structured = await session.tool('generate_object', { language: { type: 'string' }, }, }, - prompt: '有多少 .mdx 文件?它们是什么语言的?', + prompt: 'How many TypeScript files are in this project?', schema_name: 'file_stats', }); +console.log('structured output:', structured); + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions, PermissionPolicy + +agent = Agent.create(open('agent.acl').read()) +opts = SessionOptions() +opts.permission_policy = PermissionPolicy(default_decision='allow') +session = agent.session('.', opts) + +# Glob: list files by pattern +files = session.glob('**/*.py') +print(f'glob found {len(files)} Python files') + +# Grep: search file contents +matches = session.grep('Agent.create') +print(f'grep found {len(matches)} matches') + +# Read a file +readme = session.read_file('README.md') +print(f'README is {len(readme)} bytes') + +# Direct tool call by name +raw = session.tool('read', {'file_path': 'pyproject.toml'}) +print(f'pyproject.toml via tool(): {len(str(raw))} bytes') + +# Inspect available tool schemas +schemas = session.tool_definitions() +print(f'session exposes {len(schemas)} tools') + +# Structured output: generate a schema-validated JSON object +structured = session.tool('generate_object', { + 'schema': { + 'type': 'object', + 'required': ['count', 'language'], + 'properties': { + 'count': {'type': 'integer'}, + 'language': {'type': 'string'}, + }, + }, + 'prompt': 'How many Python files are in this project?', + 'schema_name': 'file_stats', +}) +print('structured output:', structured) + +session.close() ``` -直接工具适合测试、迁移和宿主应用工作流。它们在 session 工作区下执行,应视为宿主侧特权操作。 + + + +直接工具在 session 工作区下执行,应视为宿主侧的特权操作。大多数调用(`read`、`glob`、`grep`)是纯确定性的;`generate_object` 是个例外——它仍会调用模型来填充经过 schema 校验的 JSON 对象,但由你显式驱动,而非通过自由形式的智能体回合。 + +可运行版本位于 `crates/code/sdk/node/examples/basic/test_generate_object.ts`(Python:`crates/code/sdk/python/examples/test_generate_object.py`)。 diff --git a/apps/docs/content/docs/cn/code/examples/external-tasks.mdx b/apps/docs/content/docs/cn/code/examples/external-tasks.mdx index 590048f..05a5460 100644 --- a/apps/docs/content/docs/cn/code/examples/external-tasks.mdx +++ b/apps/docs/content/docs/cn/code/examples/external-tasks.mdx @@ -1,22 +1,113 @@ --- title: "外部任务" -description: "从 agent 进程外完成工作" +description: "在 agent 进程之外完成 agent 排队的工作,并把结构化证据报告回去" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # 外部任务 +有些工作无法在 agent 进程内运行:它属于独立的 worker、CI runner,或位于另一个系统中的 +人工处理者。当某个 lane 被路由到外部处理器时,该 lane 上的工具会被**排队**而不是被执行—— +它们会作为外部任务等待。你的宿主代码会取出待处理队列、以任意方式完成工作,并通过 +`completeExternalTask` 把结果报告回去。仅当外部 worker 确实是你架构的一部分时,才使用此模式。 + +外部任务由 [lane 队列](/cn/docs/code/examples/lane-queue) 产生:你必须先注册至少一个 +`external`(或 `hybrid`)lane 处理器,否则每个任务都会在进程内运行,也就没有什么可取出的。 + + + + ```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session(process.cwd()); + +// Route a lane to an external worker so its tools are queued, not executed. +await session.setLaneHandler('execute', { mode: 'external', timeoutMs: 300000 }); + +// Drain the tasks waiting for the host to fulfill. const pending = await session.pendingExternalTasks(); -if (pending.length > 0) { - await session.completeExternalTask(pending[0].id, { - success: true, - result: { - summary: 'worker 已完成测试运行', - command: 'npm run build', - }, - }); +for (const task of pending) { + console.log(`pending: ${task.task_id} on ${task.lane} (${task.command_type})`); + + try { + // ...the host does the real work here (run CI, call a service, ask a human)... + const ok = await session.completeExternalTask(task.task_id, { + success: true, + result: { + summary: 'worker completed the test run', + command: 'npm run build', + exitCode: 0, + }, + }); + console.log('completed:', ok); + } catch (err) { + await session.completeExternalTask(task.task_id, { + success: false, + error: String(err), + }); + } } + +await session.close(); +``` + + + + +```python +import os +from a3s_code import Agent + +agent = Agent.create(open("agent.acl").read()) +session = agent.session(os.getcwd()) + +# Route a lane to an external worker so its tools are queued, not executed. +session.set_lane_handler("execute", "external", 300000) + +# Drain the tasks waiting for the host to fulfill. +pending = session.pending_external_tasks() + +for task in pending: + print(f"pending: {task['task_id']} on {task['lane']} ({task['command_type']})") + + try: + # ...the host does the real work here (run CI, call a service, ask a human)... + ok = session.complete_external_task( + task["task_id"], + success=True, + result={ + "summary": "worker completed the test run", + "command": "npm run build", + "exit_code": 0, + }, + ) + print("completed:", ok) + except Exception as err: + session.complete_external_task( + task["task_id"], + success=False, + error=str(err), + ) + +session.close() ``` -外部 worker 应返回紧凑的结构化证据,而不是只有原始日志。 + + + +说明: + +- 每个待处理任务都携带 `task_id`、`session_id`、`lane`、`command_type`、`payload` 和 + `timeout_ms`。完成时请把 `task_id` 传回 `completeExternalTask` / `complete_external_task`, + 以便将本次完成匹配到正确的任务。 +- 结果的结构为 `{ success, result?, error? }`。其中 `result` 可以是任意可 JSON 序列化的负载; + `error` 是失败时的可选消息。 +- 成功时,返回紧凑的结构化证据(一段摘要加上关键事实),而不是只有原始日志——agent 会基于 + 该结果进行推理,所以请保持其精简且机器可读。 +- 当任务被找到并完成时,`completeExternalTask` / `complete_external_task` 返回 `true`,否则 + 返回 `false`。在 Python 中这些队列方法是同步的;在 Node 中 `pendingExternalTasks` 和 + `completeExternalTask` 返回 promise。 diff --git a/apps/docs/content/docs/cn/code/examples/git-worktree.mdx b/apps/docs/content/docs/cn/code/examples/git-worktree.mdx index 62fd733..473d818 100644 --- a/apps/docs/content/docs/cn/code/examples/git-worktree.mdx +++ b/apps/docs/content/docs/cn/code/examples/git-worktree.mdx @@ -1,14 +1,105 @@ --- title: "Git Worktree" -description: "通过 session 工具表面使用 git" +description: "通过 session 的 git 工具操作 git 与 git worktree" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Git Worktree +session 的 `git` 工具以宿主侧特权操作的方式运行 git。它接收一个结构化的命令对象 +(`command`,worktree 操作还可带 `subcommand`/`name`/`path`),并返回包含 +`output` 与 `exitCode` 的工具结果。本示例先检查仓库,然后直接通过工具表面创建、 +列出并移除一个 worktree。 + + + + ```ts -const status = await session.git('status'); -const diff = await session.git('diff'); -const log = await session.git('log', undefined, undefined, undefined, undefined, undefined, undefined, 5); +import { Agent } from '@a3s-lab/code'; +import * as path from 'path'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session('/path/to/repo'); + +// Inspect the repo +const status = await session.git({ command: 'status' }); +console.log(status.output); + +// Create a worktree on a new branch +const wtPath = path.join('/path/to/repo', 'wt-feature-auth'); +const created = await session.git({ + command: 'worktree', + subcommand: 'create', + name: 'feature-auth', + path: wtPath, +}); +if (created.exitCode !== 0) throw new Error(`create failed: ${created.output}`); + +// List worktrees +const list = await session.git({ command: 'worktree', subcommand: 'list' }); +console.log(list.output); + +// Remove the worktree when done +const removed = await session.git({ + command: 'worktree', + subcommand: 'remove', + path: wtPath, +}); +if (removed.exitCode !== 0) throw new Error(`remove failed: ${removed.output}`); + +await session.close(); ``` -直接 git 调用是宿主侧特权操作。push、publish 和 release workflow 应放在应用级确认或自动化关卡后面。 + + + +```python +import os +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open("agent.acl").read()) +session = agent.session("/path/to/repo", SessionOptions()) + +# Inspect the repo +status = session.git({"command": "status"}) +print(status.output) + +# Create a worktree on a new branch +wt_path = os.path.join("/path/to/repo", "wt-feature-auth") +created = session.git({ + "command": "worktree", + "subcommand": "create", + "name": "feature-auth", + "path": wt_path, +}) +if created.exit_code != 0: + raise RuntimeError(f"create failed: {created.output}") + +# List worktrees +listing = session.git({"command": "worktree", "subcommand": "list"}) +print(listing.output) + +# Remove the worktree when done +removed = session.git({ + "command": "worktree", + "subcommand": "remove", + "path": wt_path, +}) +if removed.exit_code != 0: + raise RuntimeError(f"remove failed: {removed.output}") + +session.close() +``` + + + + +请传入命令对象,而不是位置参数:`{ command: 'status' }`、`{ command: 'diff' }` +或 `{ command: 'worktree', subcommand: 'list' }`。每次调用都会返回一个工具结果, +因此在使用 output 之前,应先检查 `exitCode`(Node)/ `exit_code`(Python)。 + +直接 git 调用是宿主侧特权操作。push、publish 和 release workflow 应放在应用级确认 +或自动化关卡后面。 + +可运行版本位于 `crates/code/sdk/node/examples/git/test_worktree_git.ts`。 diff --git a/apps/docs/content/docs/cn/code/examples/hooks.mdx b/apps/docs/content/docs/cn/code/examples/hooks.mdx index 6b2ab75..e4636e7 100644 --- a/apps/docs/content/docs/cn/code/examples/hooks.mdx +++ b/apps/docs/content/docs/cn/code/examples/hooks.mdx @@ -1,11 +1,30 @@ --- -title: "Hooks" -description: "注册生命周期事件回调" +title: "生命周期钩子" +description: "注册、统计并注销用于观测和把控 Agent 活动的生命周期事件回调。" --- -# Hooks +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +# 生命周期钩子 + +钩子让你在 Agent 活动发生时进行观测和把控。你针对某个生命周期事件注册一个具名回调, +运行时会在该节点调用它,回调返回一个决策,例如 `{ action: "continue" }`。钩子可用于 +审计、脱敏、日志记录,或在不修改 Agent 提示词的前提下实施策略。 + +其生命周期是对称的:`registerHook` 按名称添加回调,`hookCount` 告诉你当前有多少个钩子 +处于活动状态,`unregisterHook` 则按名称移除某个钩子。 + + + ```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session(process.cwd()); + +// Register a named hook on a lifecycle event. The callback must NOT throw — +// always return a decision such as { action: 'continue' }. session.registerHook( 'observe-env-read', 'pre_tool_use', @@ -14,8 +33,59 @@ session.registerHook( () => ({ action: 'continue' }), ); -console.log(session.hookCount()); +console.log('active hooks:', session.hookCount()); // 1 + +await session.run('Read the project README and summarize it.'); + +// Remove the hook by name when you no longer need it. session.unregisterHook('observe-env-read'); +console.log('active hooks:', session.hookCount()); // 0 + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open('agent.acl').read()) +session = agent.session('.', SessionOptions()) + +# Register a named hook on a lifecycle event. The callback returns a decision. +session.register_hook( + 'observe-env-read', + 'pre_tool_use', + {'pathPattern': '**/.env*'}, + {'priority': 100}, + lambda: {'action': 'continue'}, +) + +print("active hooks:", session.hook_count()) # 1 + +session.run("Read the project README and summarize it.") + +# Remove the hook by name when you no longer need it. +session.unregister_hook('observe-env-read') +print("active hooks:", session.hook_count()) # 0 + +session.close() ``` -把 hook 当作生产关卡前,需要验证你依赖的具体 event path。 + + + +注意事项: + +- 钩子回调返回一个决策。返回 `{ action: "continue" }`(Node)/ + `{"action": "continue"}`(Python)即可让 Agent 继续执行。 +- 匹配器(`{ pathPattern: '**/.env*' }`)将钩子限定到路径匹配该模式的事件,而 + `{ priority: 100 }` 用于对同一事件上的多个钩子排序(数值越大越先执行)。 +- Node 钩子回调**不得**抛出异常——未捕获的抛出可能会终止进程。请让处理逻辑保持完备, + 并始终返回一个决策。 +- `hookCount` / `hook_count` 反映当前已注册钩子的数量,在测试中可方便地断言注册与 + 清理是否生效。 +- `unregisterHook` / `unregister_hook` 接收你注册时使用的名称。请始终拆除不再需要的 + 钩子,以免它们在多次运行之间泄漏。 +- 把钩子当作生产关卡前,请先验证你所依赖的具体 event path。 diff --git a/apps/docs/content/docs/cn/code/examples/index.mdx b/apps/docs/content/docs/cn/code/examples/index.mdx index 35667d2..ba2e5cf 100644 --- a/apps/docs/content/docs/cn/code/examples/index.mdx +++ b/apps/docs/content/docs/cn/code/examples/index.mdx @@ -1,10 +1,19 @@ --- title: "示例" -description: "A3S Code v3.1.0 示例" +description: "A3S Code v3.4.0 示例" --- # 示例 -这些示例使用 A3S Code v3.1.0 的当前概念:ACL 配置、环境变量注入、session API、基于 task 的委派、自动 subagent 委派、`.a3s/agents`、AHP、直接工具、验证和可选队列基础设施。 +这些示例使用 A3S Code v3.4.0 的概念:ACL 配置、环境变量注入、session API、流式输出、结构化输出、基于 task 的委派与自动 subagent 委派、可编程编排(`parallel` / `pipeline` / `parallelResumable`)、`.a3s/agents`、AHP 安全、技能、记忆、直接工具、验证、Git 工作流以及可选的 MCP/队列基础设施。 -建议从[快速开始](/cn/docs/code/examples/quick-start)开始,然后阅读流式输出、模型切换、Hooks、安全和发布验证相关示例。 +建议从[快速开始](/cn/docs/code/examples/quick-start)开始,然后按主题阅读: + +- **Session 与运行时** — [快速开始](/cn/docs/code/examples/quick-start)、[流式输出](/cn/docs/code/examples/streaming)、[模型切换](/cn/docs/code/examples/model-switching)、[自动压缩](/cn/docs/code/examples/auto-compact) +- **结构化与可编程** — [结构化输出](/cn/docs/code/examples/structured-output)、[编排](/cn/docs/code/examples/orchestration)、[规划](/cn/docs/code/examples/planning)、[批处理](/cn/docs/code/examples/batch) +- **工具与上下文** — [直接工具](/cn/docs/code/examples/direct-tools)、[ripgrep 上下文](/cn/docs/code/examples/ripgrep-context)、[Prompt 插槽](/cn/docs/code/examples/prompt-slots)、[Git worktree](/cn/docs/code/examples/git-worktree) +- **技能与记忆** — [技能](/cn/docs/code/examples/skills)、[技能工具](/cn/docs/code/examples/skill-tool)、[记忆](/cn/docs/code/examples/memory)、[Hooks](/cn/docs/code/examples/hooks) +- **安全与验证** — [安全](/cn/docs/code/examples/security)、[AHP 安全](/cn/docs/code/examples/ahp-safety) +- **MCP 与队列** — [Lane 队列](/cn/docs/code/examples/lane-queue)、[外部任务](/cn/docs/code/examples/external-tasks) + +v3.4.0 新增:参见[编排](/cn/docs/code/examples/orchestration)示例,了解扇出(`parallel`)、分阶段(`pipeline`)和可恢复(`parallelResumable`)的多 agent 工作流。 diff --git a/apps/docs/content/docs/cn/code/examples/lane-queue.mdx b/apps/docs/content/docs/cn/code/examples/lane-queue.mdx index 60066d7..ff0b685 100644 --- a/apps/docs/content/docs/cn/code/examples/lane-queue.mdx +++ b/apps/docs/content/docs/cn/code/examples/lane-queue.mdx @@ -1,17 +1,88 @@ --- title: "Lane Queue" -description: "显式外部分发示例" +description: "将某条 lane 路由到外部 worker,并显式排空其待处理任务。" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Lane Queue +默认情况下,A3S Code 会在进程内运行每一个任务,没有任何队列。lane 队列属于**可选基础设施**:为某条 lane 注册一个外部 handler 后,路由到该 lane 的工具不会由 agent 执行,而是排入队列、等待外部 worker 取走。随后你自己排空这些待处理任务,按需运行它们,再把结果回报回来。只有当外部 worker 确实是你架构的一部分时,才需要用到它。 + +共有四条 lane:`control`、`query`、`execute` 和 `generate`。每个 handler 的 `mode` 可以是 `internal`(默认)、`external` 或 `hybrid`。 + + + + ```ts -const session = agent.session('/repo', { - queueConfig: { enableDlq: true, enableMetrics: true }, -}); +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('./agent.acl'); +const session = agent.session(process.cwd(), { builtinSkills: true }); +// Route the "execute" lane to an external worker. +// Tools on this lane are NOT run by the agent; they are queued for +// an outside worker to pick up and complete. await session.setLaneHandler('execute', { mode: 'external', timeoutMs: 300000 }); + +// hasQueue() is false until at least one external/hybrid lane is registered. +console.log('queue active:', session.hasQueue()); + +// Drain whatever is waiting for an external worker. const pending = await session.pendingExternalTasks(); +for (const task of pending) { + console.log('pending:', task.task_id, task.lane, task.command_type); + + // ... hand off to your worker, run it, then report the outcome back: + await session.completeExternalTask(task.task_id, { + success: true, + result: { note: 'done by external worker' }, + }); +} + +console.log('lane queue drained'); +``` + + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open("agent.acl").read()) +opts = SessionOptions() +opts.builtin_skills = True +session = agent.session(".", opts) + +# Route the "execute" lane to an external worker. +# Tools on this lane are NOT run by the agent; they are queued for +# an outside worker to pick up and complete. +session.set_lane_handler("execute", "external", 300000) + +# has_queue() is False until at least one external/hybrid lane is registered. +print("queue active:", session.has_queue()) + +# Drain whatever is waiting for an external worker. +pending = session.pending_external_tasks() +for task in pending: + print("pending:", task["task_id"], task["lane"], task["command_type"]) + + # ... hand off to your worker, run it, then report the outcome back: + session.complete_external_task( + task["task_id"], + success=True, + result={"note": "done by external worker"}, + ) + +print("lane queue drained") ``` -只有架构中确实存在外部 worker 时才使用队列。 + + + +说明: + +- **默认路径不含队列。** 在你至少注册一个 `external`(或 `hybrid`)lane handler 之前,`hasQueue()` / `has_queue()` 始终返回 `false`。如果你从不调用 `setLaneHandler` / `set_lane_handler`,每个任务都会在进程内运行,也就没有任何待排空的内容。 +- 每个待处理任务都带有 `task_id`、`session_id`、`lane`、`command_type`、`payload` 和 `timeout_ms`。工作完成后,把 `task_id` 传回给 `completeExternalTask` / `complete_external_task`。 +- 结果结构为 `{ success, result?, error? }`——`result` 可承载任意可 JSON 序列化的载荷,`error` 是失败时的可选消息。`completeExternalTask` / `complete_external_task` 在找到并完成任务时返回 `true`,否则返回 `false`。 +- 在 Python 中这些队列方法是同步的;在 Node 中 `setLaneHandler`、`pendingExternalTasks` 和 `completeExternalTask` 返回 promise,而 `hasQueue` 是同步的。 diff --git a/apps/docs/content/docs/cn/code/examples/memory.mdx b/apps/docs/content/docs/cn/code/examples/memory.mdx index 345f993..60ab230 100644 --- a/apps/docs/content/docs/cn/code/examples/memory.mdx +++ b/apps/docs/content/docs/cn/code/examples/memory.mdx @@ -1,17 +1,84 @@ --- title: "记忆" -description: "记住并召回任务事实" +description: "记录任务结果,之后按相似度、标签或时间召回。" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # 记忆 +持久化记忆让会话能够记录哪些做法成功(以及哪些失败),并在之后取回这些事实。 +挂载一个 `FileMemoryStore`,让记录在多次运行间保留;用 `rememberSuccess` / +`rememberFailure` 写入结果,再用 `recallSimilar`、`recallByTags` 或 +`memoryRecent` 取回它们。 + + + + ```ts -import { FileMemoryStore } from '@a3s-lab/code'; +import { Agent, FileMemoryStore } from '@a3s-lab/code'; +const agent = await Agent.create('agent.acl'); const session = agent.session('/repo', { memoryStore: new FileMemoryStore('./.a3s/memory'), }); -await session.rememberSuccess('文档重写', ['grep', 'read'], '已移除过期配置说明'); -const memories = await session.recallSimilar('文档发布重写', 5); +// Record outcomes as the agent works. +await session.rememberSuccess( + 'refactored auth module', + ['read', 'edit', 'bash'], + 'all tests passed after extracting AuthService', +); +await session.rememberFailure( + 'migration attempt', + ['bash'], + 'psql connection refused on port 5432', +); + +// Recall later — by recency, by tool tags, or by semantic similarity. +const recent = await session.memoryRecent(10); +const byTags = await session.recallByTags(['read', 'edit'], 5); +const similar = await session.recallSimilar('auth refactor', 5); + +console.log(recent.length, byTags.length, similar.length); +``` + + + + +```python +from a3s_code import Agent, SessionOptions, FileMemoryStore + +agent = Agent.create(open("agent.acl").read()) +opts = SessionOptions() +opts.memory_store = FileMemoryStore("./.a3s/memory") +session = agent.session("/repo", opts) + +# Record outcomes as the agent works. Python helpers are synchronous — no await. +session.remember_success( + "refactored auth module", + ["read", "edit", "bash"], + "all tests passed after extracting AuthService", +) +session.remember_failure( + "migration attempt", + ["bash"], + "psql connection refused on port 5432", +) + +# Recall later — by recency, by tool tags, or by semantic similarity. +recent = session.memory_recent(10) +by_tags = session.recall_by_tags(["read", "edit"], 5) +similar = session.recall_similar("auth refactor", 5) + +print(len(recent), len(by_tags), len(similar)) ``` + + + + +`rememberSuccess` 和 `rememberFailure` 都接收一段简短的任务描述、相关工具列表 +(这些工具同时充当可检索的标签),以及结果文本。三个召回方法相互补充: +`memoryRecent(limit)` 返回最新的记录,`recallByTags(tags, limit)` 按你记录的 +工具标签过滤,`recallSimilar(query, limit)` 则按与查询的语义相关度对记录排序。 +若不设置 `memoryStore`,记忆仅存在于进程内,会话关闭时即被丢弃。 diff --git a/apps/docs/content/docs/cn/code/examples/meta.json b/apps/docs/content/docs/cn/code/examples/meta.json index eb536c9..94b99f5 100644 --- a/apps/docs/content/docs/cn/code/examples/meta.json +++ b/apps/docs/content/docs/cn/code/examples/meta.json @@ -5,6 +5,7 @@ "quick-start", "streaming", "structured-output", + "orchestration", "model-switching", "direct-tools", "planning", diff --git a/apps/docs/content/docs/cn/code/examples/model-switching.mdx b/apps/docs/content/docs/cn/code/examples/model-switching.mdx index 83c8967..daecbec 100644 --- a/apps/docs/content/docs/cn/code/examples/model-switching.mdx +++ b/apps/docs/content/docs/cn/code/examples/model-switching.mdx @@ -1,11 +1,21 @@ --- title: "模型切换" -description: "按 session 覆盖模型" +description: "为每个会话选择模型,并可针对每个 worker 智能体单独覆盖,以平衡成本与能力。" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # 模型切换 -配置多个模型: +会话会使用你在 `model` 选项中传入的模型运行。先声明一次智能体可以访问的所有模型, +然后为每个会话选择其一——用快速模型处理高频、低风险的工作,用能力更强的模型进行 +评审。当你希望在不改动任何提示词的前提下,于成本与能力之间取得平衡时,可以使用这 +种方式。 + +## 声明模型 + +模型在智能体文件中配置。每个 provider 列出它对外暴露的模型,当会话未设置 `model` +时则使用 `default_model`。 ```text default_model = "openai/MiniMax-M2.7-highspeed" @@ -19,11 +29,136 @@ providers "openai" { } ``` -按 session 覆盖模型: +## 按会话设置模型 + +`model` 选项在打开会话时设置。该会话运行的一切——`send`、`run`、`task`、 +`parallel`、`pipeline`——都会使用该模型。同一个智能体配置可以为不同会话驱动不同的 +模型选择。 + + + ```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); + +// A fast model for high-volume, low-stakes work. const fast = agent.session('/repo', { model: 'openai/MiniMax-M2.7-highspeed' }); +const draft = await fast.run('Draft a short README intro for this project.'); +console.log('draft:', draft); +await fast.close(); + +// A stronger model for review / higher-stakes reasoning. const review = agent.session('/repo', { model: 'openai/gpt-4o' }); +const critique = await review.run(`Critique this README intro:\n${draft}`); +console.log('critique:', critique); +await review.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open('agent.acl').read()) + +# A fast model for high-volume, low-stakes work. +fast_opts = SessionOptions() +fast_opts.model = 'openai/MiniMax-M2.7-highspeed' +fast = agent.session('/repo', fast_opts) +draft = fast.run('Draft a short README intro for this project.') +print('draft:', draft) +fast.close() + +# A stronger model for review / higher-stakes reasoning. +review_opts = SessionOptions() +review_opts.model = 'openai/gpt-4o' +review = agent.session('/repo', review_opts) +critique = review.run(f'Critique this README intro:\n{draft}') +print('critique:', critique) +review.close() ``` -per-session `model` override 可让同一个 agent 配置为不同 session 选择不同模型。 + + + +## 按 worker 智能体覆盖模型 + +worker 智能体通过各自的规格进行注册。为某个 worker 指定自己的 `model`,即可让它运行 +在与委派它的会话不同(通常更小、更廉价)的模型上。编排会话保留自己的 `model`,只有被 +委派出去的工作才会运行在该 worker 的模型上。 + + + + +```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); + +const session = agent.session('/repo', { + // Orchestrator stays on the stronger model. + model: 'openai/gpt-4o', + // High-volume exploration runs on the cheaper model. + workerAgents: [ + { + name: 'scout', + description: 'Reads files and reports findings.', + model: 'openai/MiniMax-M2.7-highspeed', + }, + ], +}); + +// Delegate exploration to the cheaper worker, then reason on the strong model. +const findings = await session.task('scout', 'List every public API in src/.'); +const plan = await session.run(`Given these findings, propose a refactor:\n${findings}`); +console.log(plan); + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions, WorkerAgentSpec + +agent = Agent.create(open('agent.acl').read()) + +opts = SessionOptions() +# Orchestrator stays on the stronger model. +opts.model = 'openai/gpt-4o' +# High-volume exploration runs on the cheaper model. +opts.worker_agents = [ + WorkerAgentSpec( + name='scout', + description='Reads files and reports findings.', + model='openai/MiniMax-M2.7-highspeed', + ), +] +session = agent.session('/repo', opts) + +# Delegate exploration to the cheaper worker, then reason on the strong model. +findings = session.task('scout', 'List every public API in src/.') +plan = session.run(f'Given these findings, propose a refactor:\n{findings}') +print(plan) + +session.close() +``` + + + + +说明: + +- `model` 的取值是一个标识符字符串,由你的运行时解析为智能体文件中声明的某个模型—— + SDK 中没有任何硬编码的模型名称。 +- worker 智能体的 `model` 仅作用于该智能体被委派的工作。会话自身的 + `send`/`run`/`task` 调用仍使用会话的 `model`。 +- 未设置 `model` 的 worker 会继承会话的 `model`,因此你只需为那些换用不同模型确有收益 + 的智能体进行覆盖即可。 + +展示会话 `model` 选项的可运行版本位于 +`crates/code/sdk/node/examples/basic/test_api_alignment.ts`。 diff --git a/apps/docs/content/docs/cn/code/examples/orchestration.mdx b/apps/docs/content/docs/cn/code/examples/orchestration.mdx new file mode 100644 index 0000000..edf303c --- /dev/null +++ b/apps/docs/content/docs/cn/code/examples/orchestration.mdx @@ -0,0 +1,237 @@ +--- +title: "编排" +description: "用 session.parallel 扇出独立任务,用 session.pipeline 构建按条目执行的多阶段链,用 session.parallelResumable 恢复带日志的运行。" +--- + +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +# 编排 + +本页展示 A3S Code v3.4.0 中的三个可编程编排原语:用于扇出的 `session.parallel`、用于按条目执行多阶段链的 `session.pipeline`,以及用于在崩溃后仍可恢复的带日志运行的 `session.parallelResumable`。当你有多个相互独立的子代理任务时使用 parallel;当你需要让每个输入流经一组有序阶段时使用 pipeline。 + +关于这些原语背后的概念模型,请参阅[编排](/cn/docs/code/orchestration)。 + +## 用 `session.parallel` 扇出 + +`parallel` 接收一个 `AgentStepSpec` 数组并发执行它们,**按输入顺序**(而非完成顺序)为每个 spec 返回一个 `StepOutcome`。每个 spec 路由到一个具名子代理(`explore`、`plan`、`review`、`verification`、`general` 等)。在 spec 上设置 `outputSchema` / `output_schema` 即可拿到经过 schema 校验的 `structured` 结果。 + + + + +```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session('.', {}); + +// Independent steps; outcomes come back in input order, not completion order. +const outcomes = await session.parallel([ + { taskId: 'langs', agent: 'general', description: 'list', prompt: 'Name three systems languages.', maxSteps: 2 }, + { taskId: 'safe', agent: 'general', description: 'classify', prompt: 'Is Rust memory-safe without a GC? yes/no.', maxSteps: 2 }, +]); + +for (const o of outcomes) { + console.log(`[parallel] ${o.taskId}: success=${o.success}`); +} + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open("agent.acl").read()) +session = agent.session(".", SessionOptions()) + +# Independent steps; outcomes come back in input order, not completion order. +outcomes = session.parallel([ + { + "task_id": "langs", + "agent": "general", + "description": "list languages", + "prompt": "Name three systems programming languages, comma-separated.", + "max_steps": 2, + }, + { + "task_id": "verdict", + "agent": "general", + "description": "classify", + "prompt": "Is Rust memory-safe without a GC? Answer yes or no.", + "max_steps": 2, + # Schema-validated structured output for this step. + "output_schema": { + "type": "object", + "properties": {"memory_safe": {"type": "boolean"}}, + "required": ["memory_safe"], + }, + }, +]) + +for o in outcomes: + print(f"[parallel] {o['task_id']}: success={o['success']} structured={o.get('structured')}") + +session.close() +``` + + + + +结果在 Python 中是字典(`o['task_id']`、`o['success']`、`o.get('structured')`),在 Node 中是对象(`o.taskId`、`o.success`、`o.structured`)。会话选项 `maxParallelTasks` / `max_parallel_tasks` 限制并发量;多出的 spec 会排队,而返回的结果数组仍然是完整的,且保持顺序。 + +## 用 `session.pipeline` 构建按条目执行的链 + +`pipeline` 接收一个输入 `items` 列表和一个有序的 `stages` 列表。每个条目独立地流经各个阶段——阶段之间**没有屏障**,因此一个较快的条目可以在一个较慢的条目仍处于阶段 1 时就到达阶段 2。阶段回调接收一个 `ctx`:第一个阶段看到 `ctx.item`,后续阶段看到 `ctx.previous`(上一个 `StepOutcome`,你可以基于其 `.output` 继续构建)。返回下一个 spec 以继续,或返回 `null` / `None` 以提前停止该条目的链。 + + + + +```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session('.', {}); + +// Stage 2 builds on stage 1's output. A stage callback MUST NOT throw — +// return null to stop this item's chain. +const results = await session.pipeline( + ['the Rust programming language'], + [ + (ctx) => ({ + taskId: 'sum', + agent: 'general', + description: 'summarize', + prompt: `In one sentence, what is ${ctx.item}?`, + maxSteps: 2, + }), + (ctx) => ({ + taskId: 'cls', + agent: 'general', + description: 'classify', + prompt: `Reply YES or NO: does this describe a programming language?\n\n${ctx.previous.output}`, + maxSteps: 2, + }), + ], +); + +for (const r of results) { + console.log(`[pipeline] final=${r === null ? null : JSON.stringify(r.output.slice(0, 60))}`); +} + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open("agent.acl").read()) +session = agent.session(".", SessionOptions()) + +# Each item chains through stages; stage 2 builds on stage 1. Return None from a +# stage (or raise — caught and treated as None) to stop that item's chain. +results = session.pipeline( + ["the Rust programming language"], + [ + lambda ctx: { + "task_id": "summarize", + "agent": "general", + "description": "summarize", + "prompt": f"In one sentence, what is {ctx['item']}?", + "max_steps": 2, + }, + lambda ctx: { + "task_id": "classify", + "agent": "general", + "description": "classify", + "prompt": "Reply with one word YES or NO: does this describe a " + f"programming language?\n\n{ctx['previous']['output']}", + "max_steps": 2, + }, + ], +) + +for r in results: + print(f"[pipeline] final={None if r is None else r['output'][:60]!r}") + +session.close() +``` + + + + +与 `parallel` 的关键区别:阶段是有序且相互依赖的,但各条目在阶段之间**不会**彼此等待。Node 的阶段回调绝不能抛出异常——出错时返回 `null`;Python 的阶段可以抛出(抛出的阶段会被捕获并视为 `None`)。 + +## 用 `session.parallelResumable` 恢复运行 + +`parallelResumable` 就是带日志的 `parallel`。它的第一个参数是 `specs`,第二个参数是稳定的 `workflowId`;每个步骤的结果都会被记录到会话的 store,因此如果进程在运行中途崩溃,你可以用同一个 `workflowId` 再次调用它,已完成的步骤会从日志中重放而不会重新执行。它**需要一个会话 store**——在打开会话时传入 `sessionStore` / `session_store`,否则调用会抛错。 + + + + +```ts +import { Agent, FileSessionStore } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +// parallelResumable journals to the session store; it throws without one. +const session = agent.session('.', { + sessionStore: new FileSessionStore('./.a3s/sessions'), +}); + +// Signature is (specs, workflowId): specs first, stable workflowId second. +const outcomes = await session.parallelResumable( + [ + { taskId: 'deps', agent: 'general', description: 'audit deps', prompt: 'Check manifests for outdated dependencies.', maxSteps: 2 }, + { taskId: 'tests', agent: 'verification', description: 'run tests', prompt: 'Run the test suite and summarize failures.', maxSteps: 2 }, + ], + 'nightly-audit', +); + +// Re-running with the same workflowId replays completed steps from the journal. +console.log(outcomes.map((o) => `${o.taskId}:${o.success}`).join(' ')); + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions, FileSessionStore + +agent = Agent.create(open("agent.acl").read()) +# parallel_resumable journals to the session store; it raises without one. +opts = SessionOptions() +opts.session_store = FileSessionStore("./.a3s/sessions") +session = agent.session(".", opts) + +# Signature is (specs, workflow_id): specs first, stable workflow_id second. +outcomes = session.parallel_resumable( + [ + {"task_id": "deps", "agent": "general", "description": "audit deps", "prompt": "Check manifests for outdated dependencies.", "max_steps": 2}, + {"task_id": "tests", "agent": "verification", "description": "run tests", "prompt": "Run the test suite and summarize failures.", "max_steps": 2}, + ], + "nightly-audit", +) + +# Re-running with the same workflow_id replays completed steps from the journal. +print(" ".join(f"{o['task_id']}:{o['success']}" for o in outcomes)) + +session.close() +``` + + + + +说明: + +- 三个原语都返回按输入顺序对齐的结果:`{ taskId, success, output, error?, structured? }`(Node 对象)/ `{ "task_id", "success", "output", "error"?, "structured"? }`(Python 字典)。 +- 在 spec 上设置 `outputSchema` / `output_schema` 可在 `structured` 中拿到解析后的结果。 +- `maxSteps` / `max_steps` 限制每个子代理的步数;会话选项 `maxParallelTasks` / `max_parallel_tasks` 限制扇出并发量。 +- Node 的 pipeline 阶段回调绝不能抛出异常——出错时返回 `null`。Python 的阶段可以抛出(抛出的阶段会被捕获并视为 `None`)。 + +可运行版本见 `crates/code/sdk/node/examples/orchestration/parallel-pipeline.mjs` 和 `crates/code/sdk/python/examples/orchestration_workflow.py`。 diff --git a/apps/docs/content/docs/cn/code/examples/planning.mdx b/apps/docs/content/docs/cn/code/examples/planning.mdx index dcd215e..a641d52 100644 --- a/apps/docs/content/docs/cn/code/examples/planning.mdx +++ b/apps/docs/content/docs/cn/code/examples/planning.mdx @@ -1,21 +1,70 @@ --- title: "Planning" -description: "启用 planning 与 goal tracking" +description: "使用 planningMode 让 agent 先规划再行动" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Planning +Planning 模式让 session 在开始调用工具之前先生成一份结构化计划。它适合多步骤工作 +(重构、发布评审、审计),你希望 agent 先把目标拆解清楚,而不是直接动手修改。 + +通过 session 选项 `planningMode`(Node)/ `planning_mode`(Python)进行设置。 +可接受的值为: + +| 值 | 行为 | +| ------------ | ----------------------------------------------- | +| `"auto"` | 由 runtime 根据消息内容判断何时值得规划。 | +| `"enabled"` | 对每个请求都强制规划,即使是简单请求。 | +| `"disabled"` | 完全跳过规划,走最低延迟路径。 | + +`"auto"` 是默认的结构化预分析路径。 + + + + ```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); const session = agent.session('/repo', { - planningMode: 'enabled', - goalTracking: true, - maxToolRounds: 24, + planningMode: 'auto', }); -const result = await session.send('规划并完成发布就绪检查'); +const result = await session.send( + 'Plan and complete the release-readiness review.', +); console.log(result.text); +console.log(`${result.toolCallsCount} tool calls executed`); + +await session.close(); ``` -`planningMode: 'auto'` 是默认结构化预分析路径,`'enabled'` 强制启用 planning,`'disabled'` 用于低延迟请求。Planning 状态会挂到 run 上,宿主 UI 可以从 run events 渲染 TaskList 并追踪完成情况。 + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open("agent.acl").read()) +opts = SessionOptions() +opts.planning_mode = "auto" +session = agent.session("/repo", opts) + +result = session.send("Plan and complete the release-readiness review.") +print(result.text) +print(f"{result.tool_calls_count} tool calls executed") + +session.close() +``` + + + + +Planning 状态会挂到 run 上,因此宿主 UI 可以从 run events 渲染任务清单,并在 agent +工作时更新完成情况。Planning 负责组织工作;完成证据仍然来自验证命令。 -完成证据仍然来自验证命令。 +可运行的 session 示例位于 +`crates/code/sdk/node/examples/orchestration/parallel-pipeline.mjs` 和 +`crates/code/sdk/python/examples/orchestration_workflow.py`。 diff --git a/apps/docs/content/docs/cn/code/examples/prompt-slots.mdx b/apps/docs/content/docs/cn/code/examples/prompt-slots.mdx index 3ee9fa5..1c449a8 100644 --- a/apps/docs/content/docs/cn/code/examples/prompt-slots.mdx +++ b/apps/docs/content/docs/cn/code/examples/prompt-slots.mdx @@ -1,16 +1,155 @@ --- title: "Prompt Slots" -description: "程序化角色、准则与回复风格" +description: "在不覆盖核心行为的前提下,定制智能体的角色设定、准则与回复风格。" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Prompt Slots +Prompt slots 是以声明式方式塑造智能体系统提示词的会话选项。请把它们用于宿主级行为—— +角色设定、编码规范、输出风格——这些内容不应写在每次的用户 prompt 里。这些槽位叠加在 +智能体内置指令之上,因此核心工具行为(读取、写入、运行命令)会被保留。 + +共有四个槽位: + +| 槽位 | 用途 | +|------|------| +| `role` / `role` | 智能体采用的角色设定。 | +| `guidelines` / `guidelines` | 智能体必须遵循的规范与规则。 | +| `responseStyle` / `response_style` | 智能体回复的格式方式。 | +| `extra` / `extra` | 逐字追加的自由格式指令。 | + +## 基本用法 + +在打开会话时设置任意子集的槽位。它们会应用于该会话的每一轮对话。 + + + + +```ts +import { Agent } from '@a3s-lab/code'; + +async function main() { + // Node: create() takes a PATH to the ACL agent file. + const agent = await Agent.create('agent.acl'); + + const session = agent.session('/repo', { + role: 'release-readiness reviewer', + guidelines: 'Find blockers before improvements. Require command evidence for done claims.', + responseStyle: 'concise, findings first', + }); + + const result = await session.send('Is this repo ready to ship?'); + console.log(result.text); + + await session.close(); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); +``` + + + + +```python +from a3s_code import Agent, SessionOptions + +def main(): + # Python: create() takes the ACL SOURCE TEXT. + agent = Agent.create(open('agent.acl').read()) + + session = agent.session('/repo', SessionOptions( + role='release-readiness reviewer', + guidelines='Find blockers before improvements. Require command evidence for done claims.', + response_style='concise, findings first', + )) + + result = session.send('Is this repo ready to ship?') + print(result.text) + + session.close() + +main() +``` + + + + +## 逐个槽位说明 + +这四个槽位彼此独立组合。仅设置角色的会话、带有严格准则的评审者、以及追加自由格式指令的 +会话,使用的都是同一组选项。 + + + + ```ts -const session = agent.session('/repo', { - role: '发布就绪检查员', - guidelines: '先找阻塞项,再列改进建议。所有完成声明都必须有命令证据。', - responseStyle: '简洁,发现优先', +// 1. Custom role only. +let session = agent.session(workspace, { + role: 'You are a senior Rust developer who specializes in async programming.', +}); + +// 2. Role + guidelines + response style. +session = agent.session(workspace, { + role: 'You are a Python code reviewer.', + guidelines: 'Always check for type hints. Flag any use of `eval()`.', + responseStyle: 'Reply in bullet points. Be concise.', +}); + +// 3. Extra freeform instructions only. +session = agent.session(workspace, { + extra: "Always end your response with '-- A3S'", }); + +// Core tool behavior is preserved regardless of the slots. +session = agent.session(workspace, { + role: 'You are a minimalist file manager.', + guidelines: 'Only create files when explicitly asked.', +}); +const result = await session.send( + "Create a file called test.txt with the content 'prompt slots work'. Then read it back.", +); +``` + + + + +```python +# 1. Custom role only. +session = agent.session(workspace, SessionOptions( + role='You are a senior Rust developer who specializes in async programming.', +)) + +# 2. Role + guidelines + response style. +session = agent.session(workspace, SessionOptions( + role='You are a Python code reviewer.', + guidelines='Always check for type hints. Flag any use of `eval()`.', + response_style='Reply in bullet points. Be concise.', +)) + +# 3. Extra freeform instructions only. +session = agent.session(workspace, SessionOptions( + extra="Always end your response with '-- A3S'", +)) + +# Core tool behavior is preserved regardless of the slots. +session = agent.session(workspace, SessionOptions( + role='You are a minimalist file manager.', + guidelines='Only create files when explicitly asked.', +)) +result = session.send( + "Create a file called test.txt with the content 'prompt slots work'. Then read it back.", +) ``` -Prompt slots 是 session options。适合放不应写在用户 prompt 里的宿主级行为。 + + + +槽位用于定制角色设定与行为规则;它们不会禁用工具,也不会改变智能体的核心循环。请把 +任务相关的请求保留在 `send` 消息中,而把应在会话每一轮都生效的行为交给这些槽位。 + +可运行版本位于 `crates/code/sdk/node/examples/skills/test_prompt_slots.ts`。 diff --git a/apps/docs/content/docs/cn/code/examples/quick-start.mdx b/apps/docs/content/docs/cn/code/examples/quick-start.mdx index fa29e8e..af6a9d3 100644 --- a/apps/docs/content/docs/cn/code/examples/quick-start.mdx +++ b/apps/docs/content/docs/cn/code/examples/quick-start.mdx @@ -1,45 +1,61 @@ --- title: "快速开始" -description: "创建 ACL 配置并运行第一个 session" +description: "创建一个 agent、打开会话、运行一轮对话并读取结果。" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # 快速开始 -创建 `agent.acl`: +最小可用的程序:从 ACL 文件创建一个 agent,在项目目录上打开会话,使用 `send` +运行一轮对话,打印回复文本,并查看运行时为该轮生成的验证摘要。 -```text -default_model = "openai/MiniMax-M2.7-highspeed" -max_parallel_tasks = 4 + + -providers "openai" { - apiKey = env("A3S_OPENAI_API_KEY") - baseUrl = env("A3S_OPENAI_BASE_URL") +```ts +import { Agent } from '@a3s-lab/code'; - models "MiniMax-M2.7-highspeed" { - name = "MiniMax M2.7 Highspeed" - tool_call = true - } -} +// Agent.create takes a PATH to the ACL file. +const agent = await Agent.create('agent.acl'); +const session = agent.session('.'); -auto_delegation { - enabled = true - auto_parallel = false - min_confidence = 0.72 - max_tasks = 2 -} +const result = await session.send('List the files in this directory.'); +console.log(result.text); + +// What the runtime checked while producing that turn. +console.log(session.verificationSummaryText()); + +await session.close(); ``` -运行一个 session: + + -```ts -import { Agent } from '@a3s-lab/code'; +```python +from a3s_code import Agent -const agent = await Agent.create('agent.acl'); -const session = agent.session(process.cwd(), { - builtinSkills: true, - autoDelegation: { enabled: true, maxTasks: 2 }, - autoParallel: false, -}); -const result = await session.send('总结这个仓库的结构'); -console.log(result.text); +# Agent.create takes the ACL SOURCE TEXT, not a path. +agent = Agent.create(open('agent.acl').read()) +session = agent.session('.') + +result = session.send('List the files in this directory.') +print(result.text) + +# What the runtime checked while producing that turn. +print(session.verification_summary_text()) + +session.close() ``` + + + + +请注意两个 SDK 之间的差异:在 Node.js 中,`Agent.create` 接收 ACL 文件的**路径**, +而在 Python 中它接收 ACL 的**源文本**(需要你自己读取文件)。使用完毕后,请务必 +调用 `close()` 关闭会话,以便运行时刷新状态并释放资源。 + +## 下一步 + +- [流式输出](/cn/docs/code/examples/streaming) — 在 token 到达时实时读取 +- [会话](/cn/docs/code/examples/sessions) — 持久化并恢复对话 diff --git a/apps/docs/content/docs/cn/code/examples/ripgrep-context.mdx b/apps/docs/content/docs/cn/code/examples/ripgrep-context.mdx index ce5ae31..6cdb13f 100644 --- a/apps/docs/content/docs/cn/code/examples/ripgrep-context.mdx +++ b/apps/docs/content/docs/cn/code/examples/ripgrep-context.mdx @@ -1,13 +1,74 @@ --- -title: "Ripgrep 上下文" -description: "用 grep 和 read 形成紧凑上下文管线" +title: "Ripgrep 上下文构建器" +description: "在向 agent 提问前,使用 grep 和 glob 收集代码上下文。" --- -# Ripgrep 上下文 +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +# Ripgrep 上下文构建器 + +通过 `session.grep` 和 `session.glob` 进行快速代码搜索,可以收集相关的文件和匹配行, +然后将它们注入到提示中 —— 这是在 agent 开始推理之前的一个轻量级检索步骤。当你希望将 +agent 限定在大型代码库的某个特定切片,而不是让它从头开始探索时,可以使用这种方式。 + + + ```ts -const hits = await session.grep('default_model|providers "openai"|baseUrl'); -const docs = await session.glob('content/docs/**/code/**/*.mdx'); +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session('.'); + +// 1. Find candidate files by glob pattern (returns a list of paths). +const files = await session.glob('src/**/*.ts'); + +// 2. Search the workspace for the symbol we care about (returns ripgrep text). +const hits = await session.grep('createSession'); + +// 3. Build a context string and feed it into a focused prompt. +const context = [ + `Files in scope:\n${files.join('\n')}`, + `Matches for "createSession":\n${hits}`, +].join('\n\n'); + +const answer = await session.run( + `Using only this context, explain how createSession is wired up:\n\n${context}`, +); +console.log(answer); ``` -优先使用搜索摘要和相关片段,不要把完整搜索输出注入 prompt。 + + + +```python +from a3s_code import Agent + +agent = Agent.create(open('agent.acl').read()) +session = agent.session('.') + +# 1. Find candidate files by glob pattern (returns a list of paths). +files = session.glob('src/**/*.ts') + +# 2. Search the workspace for the symbol we care about (returns ripgrep text). +hits = session.grep('createSession') + +# 3. Build a context string and feed it into a focused prompt. +context = '\n\n'.join([ + 'Files in scope:\n' + '\n'.join(files), + 'Matches for "createSession":\n' + hits, +]) + +answer = session.run( + f'Using only this context, explain how createSession is wired up:\n\n{context}' +) +print(answer) +``` + + + + +`glob` 返回匹配的文件路径列表,而 `grep` 则以单个字符串的形式返回原始的 ripgrep 输出。 +两者都在本地运行并能快速返回,因此你可以在花费一次模型调用之前,链式执行多次搜索来低成本地 +组装上下文。当你需要文件的完整内容而不仅仅是匹配行时,可以将它们与 `session.readFile` +搭配使用。 diff --git a/apps/docs/content/docs/cn/code/examples/security.mdx b/apps/docs/content/docs/cn/code/examples/security.mdx index 1d33703..2942c3f 100644 --- a/apps/docs/content/docs/cn/code/examples/security.mdx +++ b/apps/docs/content/docs/cn/code/examples/security.mdx @@ -1,20 +1,186 @@ --- title: "安全" -description: "为副作用使用显式权限" +description: "通过权限策略、人工确认流程和安全提供器对特权操作进行管控" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # 安全 +智能体可能产生的每一个副作用——写文件、运行 `bash`、执行 git 推送——都会经过权限策略。 +先设置 `defaultDecision`,再列出应被 `allow`(放行)、`deny`(拒绝)或进入 `ask`(询问) +路径的模式。若要引入人工把关,可加上确认策略:`ask` 决策会在 `confirmation_required` +事件处暂停,让你的应用(或人工)对每次调用进行批准或拒绝。只要智能体面向真实仓库运行, +就应使用这套机制。 + + + + ```ts -const session = agent.session('/repo', { +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); + +const session = agent.session(process.cwd(), { permissionPolicy: { deny: ['write(**/.env*)', 'bash(rm -rf*)'], ask: ['bash(git push*)', 'bash(npm publish*)'], - allow: ['read(*)', 'grep(*)', 'glob(*)', 'bash(npm run build*)'], - defaultDecision: 'ask', + defaultDecision: 'allow', + }, + // Turn the `ask` patterns into a human-in-the-loop confirmation flow. + confirmationPolicy: { + enabled: true, + defaultTimeoutMs: 120000, + timeoutAction: 'reject', + }, +}); + +// Stream execution and resolve confirmations as they arrive. +const stream = await session.stream('Bump the version and push the release'); +while (true) { + const next = await stream.next(); + if (next.done || !next.value) break; + + const event = next.value; + if (event.type === 'confirmation_required') { + // Look up the pending request for richer display. + const [pending] = await session.pendingConfirmations(); + const toolId = pending?.toolId ?? event.toolId; + console.log(`[confirm] ${pending?.toolName ?? event.toolName}`); + console.log(JSON.stringify(pending?.args ?? {}, null, 2)); + + // In a real app, prompt the user here. + const approved = false; // deny risky operations by default + if (toolId) await session.confirmToolUse(toolId, approved, 'Reviewed by host'); + } +} + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions, PermissionPolicy, ConfirmationPolicy + + +def main() -> None: + agent = Agent.create(open("agent.acl").read()) + + opts = SessionOptions() + opts.permission_policy = PermissionPolicy( + deny=["write(**/.env*)", "bash(rm -rf*)"], + ask=["bash(git push*)", "bash(npm publish*)"], + default_decision="allow", + ) + # Turn the `ask` patterns into a human-in-the-loop confirmation flow. + opts.confirmation_policy = ConfirmationPolicy( + enabled=True, + default_timeout_ms=120_000, + timeout_action="reject", + ) + + session = agent.session(".", opts) + + # Stream execution and resolve confirmations as they arrive. + for event in session.stream("Bump the version and push the release"): + if event.event_type == "confirmation_required": + # Look up the pending request for richer display. + pending = session.pending_confirmations() + first = pending[0] if pending else {} + tool_id = first.get("tool_id") or event.tool_id + print(f"[confirm] {first.get('tool_name') or event.tool_name}") + + # In a real app, prompt the user here. + approved = False # deny risky operations by default + if tool_id: + session.confirm_tool_use(tool_id, approved, "Reviewed by host") + + session.close() + + +if __name__ == "__main__": + main() +``` + + + + +## 添加安全提供器 + +`DefaultSecurityProvider` 会启用输入污点追踪和输出净化,独立于权限策略对工具的输入输出 +进行筛查。通过 `securityProvider`(Node)/ `security_provider`(Python)传入;省略则完全 +关闭安全功能。 + + + + +```ts +import { Agent, DefaultSecurityProvider } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); + +const session = agent.session(process.cwd(), { + securityProvider: new DefaultSecurityProvider(), + permissionPolicy: { + ask: ['bash*'], + defaultDecision: 'allow', }, }); + +// Privileged host operations run through the provider + policy. +const out = await session.bash('echo "screened by the security provider"'); +console.log(out); + +await session.close(); ``` -除非最终步骤由受控自动化负责,否则 release 和 publish 操作应保持在 `ask` 或 `deny` 路径上。 -`session.tool()` 这类宿主直接调用是特权操作;暴露给用户前应由宿主应用先做权限判断。 + + + +```python +from a3s_code import Agent, SessionOptions, PermissionPolicy, DefaultSecurityProvider + + +def main() -> None: + agent = Agent.create(open("agent.acl").read()) + + opts = SessionOptions() + opts.security_provider = DefaultSecurityProvider() + opts.permission_policy = PermissionPolicy( + ask=["bash*"], + default_decision="allow", + ) + + session = agent.session(".", opts) + + # Privileged host operations run through the provider + policy. + out = session.bash('echo "screened by the security provider"') + print(out) + + session.close() + + +if __name__ == "__main__": + main() +``` + + + + +## 说明 + +- `defaultDecision` 是所有未被 `allow` / `deny` / `ask` 匹配到的模式的兜底决策(取值为 + `allow`、`deny` 或 `ask` 之一)。仅对自动化确实需要的部分逐步放开。 +- 设置 `enabled: true` 的 `confirmationPolicy` 才会把 `ask` 决策变成会暂停的 + `confirmation_required` 事件。通过 `session.confirmToolUse(toolId, approved, reason?)` + 逐个处理;若在 `defaultTimeoutMs` 内未收到答复,则由 `timeoutAction`(`reject`)决定结果。 +- 除非最终步骤由受控自动化负责,否则 release 和 publish 操作(`bash(git push*)`、 + `bash(npm publish*)`)应保持在 `ask` 或 `deny` 路径上。 +- `session.tool()`、`session.bash()`、`session.git()` 这类宿主直接调用都是特权操作。 + 它们同样会经过相同的提供器和策略,因此应进行管控,而不是无防护地暴露出去。 + +可运行的确认循环示例位于 +`crates/code/sdk/node/examples/streaming/hitl_confirmation_loop.ts` 和 +`crates/code/sdk/python/examples/hitl_confirmation_loop.py`。 diff --git a/apps/docs/content/docs/cn/code/examples/skill-tool.mdx b/apps/docs/content/docs/cn/code/examples/skill-tool.mdx index 08140ce..c60eb81 100644 --- a/apps/docs/content/docs/cn/code/examples/skill-tool.mdx +++ b/apps/docs/content/docs/cn/code/examples/skill-tool.mdx @@ -1,15 +1,87 @@ --- title: "Skill 工具" -description: "在回合中搜索并应用 skills" +description: "将已注册的 skill 作为可调用工具来调用" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Skill 工具 -当任务需要时,可以明确要求 agent 搜索并应用 skills: +skills 以两个核心工具的形式暴露给模型:`search_skills`(按意图查找 skill)和 +`Skill`(按名称调用 skill)。tool 类型的 skill 会运行其处理器;instruction 类型的 +skill 会返回其正文供模型应用。你既可以让模型在一次运行中调用这些工具,也可以通过 +`session.tool('Skill', ...)` 直接从 SDK 调用某个 skill。 + +通过 `skillDirs` / `skill_dirs` 注册 skill 目录(包含若干 `SKILL.md` 文件的文件夹), +或使用 `builtinSkills` / `builtin_skills` 依赖内置发现机制。两种方式都会让这些 skills +通过 `Skill` 与 `search_skills` 可见。 + + + + +```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session(process.cwd(), { + builtinSkills: true, + skillDirs: ['./skills'], // a folder of SKILL.md files +}); + +// Skill and search_skills are core tools — confirm they're on the surface. +console.log(session.toolNames()); + +// Option A: let the model search for and apply a skill during a run. +const run = await session.run('Search available skills, then apply the most relevant one.'); +console.log(run); + +// Option B: invoke a skill directly as a callable tool. +// Canonical args: { skill_name, prompt? }. +const result = await session.tool('Skill', { + skill_name: 'code-review', + prompt: 'Review this patch for correctness and regressions.', +}); +console.log(result); + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open('agent.acl').read()) +opts = SessionOptions() +opts.builtin_skills = True +opts.skill_dirs = ['./skills'] # a folder of SKILL.md files +session = agent.session('.', opts) + +# Skill and search_skills are core tools — confirm they're on the surface. +print(session.tool_names()) -```text -搜索可用 skills,寻找发布检查指导。 -应用最匹配的 skill,然后检查包元数据并构建证据。 +# Option A: let the model search for and apply a skill during a run. +run = session.run('Search available skills, then apply the most relevant one.') +print(run) + +# Option B: invoke a skill directly as a callable tool. +# Canonical args: { skill_name, prompt? }. +result = session.tool('Skill', { + 'skill_name': 'code-review', + 'prompt': 'Review this patch for correctness and regressions.', +}) +print(result) + +session.close() ``` -驾驭层把 `search_skills` 和 `Skill` 作为核心 skill 工具暴露。Skill 管理由 SDK 注册、`skillDirs` 或项目文件完成,不再通过模型可见的管理工具处理。 + + + +`SKILL.md` 在 frontmatter 中声明其 `kind`(`tool`、`instruction` 或 `agent`)。对于 +tool 类型的 skill,`Skill` 工具会运行该 skill 的处理器并返回其输出;对于 instruction +类型的 skill,则返回正文供模型应用。skill 管理由 SDK 注册、skill 目录或项目文件完成, +而不是通过模型可见的管理工具处理。 + +可运行版本位于 `crates/code/sdk/node/examples/skills/test_custom_skills_agents.ts`。 diff --git a/apps/docs/content/docs/cn/code/examples/skills.mdx b/apps/docs/content/docs/cn/code/examples/skills.mdx index e97ab91..3241ec4 100644 --- a/apps/docs/content/docs/cn/code/examples/skills.mdx +++ b/apps/docs/content/docs/cn/code/examples/skills.mdx @@ -1,25 +1,113 @@ --- -title: "Skills" -description: "加载文件型和内置 skills" +title: "技能与自定义智能体" +description: "切换内置技能,并从项目目录加载你自己的技能和子智能体。" --- -# Skills +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +# 技能与自定义智能体 + +A3S Code 自带一组内置技能,你还可以从磁盘上的目录加载自己的技能和子智能体来扩展会话。 +使用 `builtinSkills` 选项可以开启或关闭这些捆绑的技能,使用 `agentDirs`(或 +`registerAgentDir`)可以让会话指向包含你自定义 `*.skill.md` 和智能体定义的文件夹。 +当你希望在不改动运行时的情况下实现项目专属的行为时,这是合适的做法。 + + + ```ts -const session = agent.session('/repo', { +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); + +// Built-in skills ON, plus custom skills/agents from project dirs. +const session = agent.session('/path/to/project', { builtinSkills: true, - skillDirs: ['./skills'], + agentDirs: ['./.a3s/skills', './.a3s/agents'], }); + +// You can also register more directories after the session exists. +session.registerAgentDir('./team/shared-agents'); + +// Inspect what the session loaded. +console.log('Tools:', session.toolNames()); +console.log('Commands:', session.listCommands()); + +// The agent now has access to both built-in and custom skills. +const result = await session.run( + 'Use the project conventions skill to scaffold a new module.', +); +console.log(result); + +await session.close(); ``` -Skill 文件示例: + + -```md ---- -name: release-review -description: 检查发布阻塞项 -allowed-tools: "read(*), grep(*), bash(npm run build*)" ---- +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open('agent.acl').read()) + +# Built-in skills ON, plus custom skills/agents from project dirs. +opts = SessionOptions() +opts.builtin_skills = True +opts.agent_dirs = ['./.a3s/skills', './.a3s/agents'] + +session = agent.session('/path/to/project', opts) + +# You can also register more directories after the session exists. +session.register_agent_dir('./team/shared-agents') + +# Inspect what the session loaded. +print('Tools:', session.tool_names()) +print('Commands:', session.list_commands()) -先返回 blocker,并附证据。 +# The agent now has access to both built-in and custom skills. +result = session.run( + 'Use the project conventions skill to scaffold a new module.', +) +print(result) + +session.close() ``` + + + + +## 关闭内置技能 + +当你希望得到一个精简的会话、只使用你显式提供的技能时,将 `builtinSkills` / +`builtin_skills` 设为 `false`。此时捆绑的技能不再注册,因此 `toolNames()` / +`tool_names()` 只会反映你的自定义技能集加上核心工具。 + + + + +```ts +const session = agent.session('/path/to/project', { + builtinSkills: false, + agentDirs: ['./.a3s/skills'], +}); +``` + + + + +```python +opts = SessionOptions() +opts.builtin_skills = False +opts.agent_dirs = ['./.a3s/skills'] +session = agent.session('/path/to/project', opts) +``` + + + + +从 `agentDirs` 加载的自定义子智能体可以在 +[`session.parallel(...)`](/cn/docs/code/examples/orchestration) 和 +[`session.pipeline(...)`](/cn/docs/code/examples/orchestration) 中按名称引用, +与内置注册表中的智能体(`explore`、`plan`、`general`、`verification`、`review`)一起使用。 + +A runnable version ships at `crates/code/sdk/node/examples/skills/test_custom_skills_agents.ts`. diff --git a/apps/docs/content/docs/cn/code/examples/streaming.mdx b/apps/docs/content/docs/cn/code/examples/streaming.mdx index 604c99a..49a7eb1 100644 --- a/apps/docs/content/docs/cn/code/examples/streaming.mdx +++ b/apps/docs/content/docs/cn/code/examples/streaming.mdx @@ -1,22 +1,97 @@ --- title: "流式输出" -description: "在回合运行时读取 AgentEvent" +description: "在回合运行时读取增量 AgentEvent" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # 流式输出 +`session.stream(prompt)` 会在回合运行过程中逐步产出事件,因此你可以在文本到达时即时渲染,并实时响应工具活动。当你需要实时 UI,或希望 CLI 逐 token 打印输出(而不是等待 `send` 或 `run` 返回完整结果)时,请使用它。 + +每个事件都带有一个类型标识。常见类型包括 `text_delta`(一段助手文本)、`tool_start` / `tool_end`(工具调用的开始与结束)、`verification`(验证摘要)以及 `error`。 + + + + ```ts -const stream = await session.stream('运行测试并解释失败原因'); +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session(process.cwd(), { planningMode: 'disabled' }); + +const stream = await session.stream( + 'Use the bash tool to run the tests, then summarize the result.', +); while (true) { - const { value: event, done } = await stream.next(); - if (done) break; - if (!event) continue; + const next = await stream.next(); + if (next.done || !next.value) break; - if (event.text) process.stdout.write(event.text); - if (event.toolName) console.log('\n[tool]', event.toolName); - if (event.error) console.error(event.error); + const event = next.value; + if (event.type === 'text_delta' && event.text) { + process.stdout.write(event.text); + } else if (event.type === 'tool_start') { + console.log(`\n[tool:start] ${event.toolName ?? 'unknown'}`); + } else if (event.type === 'tool_end') { + console.log(`\n[tool:end] ${event.toolName ?? 'unknown'} exit=${event.exitCode ?? 0}`); + } else if (event.type === 'verification') { + console.log(`\n[verification] ${event.verificationSummaryText ?? ''}`); + } else if (event.type === 'error') { + throw new Error(event.error ?? 'stream error'); + } } + +console.log('\n[stream] complete'); +await session.close(); ``` -流式事件可以包含文本、工具调用、工具输出、错误、token 总数和验证摘要。 + + + +```python +import os + +from a3s_code import Agent, SessionOptions + + +def main() -> None: + agent = Agent.create(open("agent.acl").read()) + + opts = SessionOptions() + opts.planning_mode = "disabled" + session = agent.session(".", opts) + + prompt = "Use the bash tool to run the tests, then summarize the result." + + try: + for event in session.stream(prompt): + if event.event_type == "text_delta" and event.text: + print(event.text, end="", flush=True) + elif event.event_type == "tool_start": + print(f"\n[tool:start] {event.tool_name or 'unknown'}") + elif event.event_type == "tool_end": + print(f"\n[tool:end] {event.tool_name or 'unknown'} exit={event.exit_code or 0}") + elif event.event_type == "verification": + print(f"\n[verification] {event.verification_summary_text or ''}") + elif event.event_type == "error": + raise RuntimeError(event.error or "stream error") + print("\n[stream] complete") + finally: + session.close() + + +if __name__ == "__main__": + main() +``` + + + + +说明: + +- Node 端使用 `stream.next()` 手动迭代流,检查 `next.done` 与 `next.value`。在当前构建中,Python SDK 将流式输出暴露为同步迭代器,因此你可以用普通的 `for` 循环来消费它(而 `parallel`、`pipeline` 等编排 API 仍为 `async`)。 +- 类型标识字段在两种语言中不同:Node 读取 `event.type`,Python 读取 `event.event_type`。其余字段遵循各语言的大小写约定:Node 中为 `toolName` / `exitCode` / `verificationSummaryText`,Python 中为 `tool_name` / `exit_code` / `verification_summary_text`。 +- 当启用确认策略时,流式事件还可能包含人工介入(human-in-the-loop)确认信号(`confirmation_required`、`confirmation_received`、`confirmation_timeout`)。 + +可运行的流式示例位于 `crates/code/sdk/node/examples/streaming/` 目录下。一个完整的人工介入确认循环示例位于 `crates/code/sdk/node/examples/streaming/hitl_confirmation_loop.ts`,对应的 Python 版本位于 `crates/code/sdk/python/examples/hitl_confirmation_loop.py`。 diff --git a/apps/docs/content/docs/cn/code/examples/structured-output.mdx b/apps/docs/content/docs/cn/code/examples/structured-output.mdx index 171c190..88f7740 100644 --- a/apps/docs/content/docs/cn/code/examples/structured-output.mdx +++ b/apps/docs/content/docs/cn/code/examples/structured-output.mdx @@ -1,19 +1,29 @@ --- title: "结构化输出" -description: "使用 generate_object 生成符合 Schema 校验的 JSON 对象" +description: "使用 generate_object 工具生成符合 Schema 校验的 JSON 对象。" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # 结构化输出 -`generate_object` 工具生成严格符合 JSON Schema 的对象,支持智能体自主调用和直接调用两种方式。 +内置的 `generate_object` 工具会生成严格符合你所提供 JSON Schema 的 JSON 对象,而不是自由文本。当你需要机器可读的结果时使用它:抽取、分类、配置生成,或者作为另一个程序的输入。 + +你通过 `session.tool('generate_object', ...)` 调用它。工具结果会把校验后的对象以 JSON 形式放在 `result.output` 上——解析它并读取 `object` 字段。同一个工具也支持 agent 自主调用,即模型在 `send` 过程中自行决定调用它。 ## 直接工具调用 +最简单的方式:直接调用 `generate_object`,并从结果中解析出校验后的对象。 + + + + ```ts import { Agent } from '@a3s-lab/code'; const agent = await Agent.create('config.acl'); const session = agent.session('.', { + builtinSkills: true, permissionPolicy: { defaultDecision: 'allow' }, }); @@ -31,93 +41,151 @@ const result = await session.tool('generate_object', { }, }, }, - prompt: '提取: "Alice 28岁,擅长 Rust、TypeScript 和 Python。"', + prompt: 'Extract: "Alice is 28, skilled in Rust, TypeScript, and Python."', schema_name: 'developer', + mode: 'tool', }); const { object } = JSON.parse(result.output); +console.log(object); // { name: "Alice", age: 28, skills: ["Rust", "TypeScript", "Python"] } + +await session.close(); ``` -## 智能体自主调用 + + -让智能体自行决定何时使用结构化输出: +```python +import json +from a3s_code import Agent, SessionOptions, PermissionPolicy -```ts -const result = await session.send(` - 读取 config.yaml 文件,将所有服务定义提取为结构化 JSON, - 使用 generate_object 工具,schema 如下: - { type: "object", required: ["services"], properties: { - services: { type: "array", items: { type: "object", - required: ["name", "port"], properties: { - name: { type: "string" }, port: { type: "integer" } - }}}}} -`); +agent = Agent.create(open('config.acl').read()) +opts = SessionOptions() +opts.builtin_skills = True +opts.permission_policy = PermissionPolicy(default_decision="allow") +session = agent.session('.', opts) + +result = session.tool("generate_object", { + "schema": { + "type": "object", + "required": ["name", "age", "skills"], + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer", "minimum": 0}, + "skills": { + "type": "array", + "items": {"type": "string"}, + "minItems": 1, + }, + }, + }, + "prompt": 'Extract: "Alice is 28, skilled in Rust, TypeScript, and Python."', + "schema_name": "developer", + "mode": "tool", +}) + +obj = json.loads(result.output)["object"] +print(obj) +# {"name": "Alice", "age": 28, "skills": ["Rust", "TypeScript", "Python"]} + +session.close() ``` -## 两阶段模式 + + + +校验后的值位于解析输出的 `object` 键上。`required` 中声明的每个字段都保证存在且类型正确;如果模型无法满足 schema,工具会在 `result.exitCode`(Node)/ `result.exit_code`(Python)上报告非零退出码。 + +## 枚举分类 + +用 `enum` 把字段约束到一个固定集合。这会把模型变成一个可靠的分类器。 -复杂任务建议将推理与结构化输出分离: + + ```ts -// 阶段 1: 智能体自由推理 -const analysis = await session.send('分析 auth.ts 的安全性'); +const result = await session.tool('generate_object', { + schema: { + type: 'object', + required: ['sentiment', 'confidence'], + properties: { + sentiment: { type: 'string', enum: ['positive', 'negative', 'neutral'] }, + confidence: { type: 'number', minimum: 0, maximum: 1 }, + }, + }, + prompt: 'Classify sentiment: "This is the worst product I have ever used."', + schema_name: 'sentiment', +}); + +const { object } = JSON.parse(result.output); +console.log(object.sentiment, object.confidence); // "negative" 0.97 +``` + + + + +```python +result = session.tool("generate_object", { + "schema": { + "type": "object", + "required": ["sentiment", "confidence"], + "properties": { + "sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]}, + "confidence": {"type": "number", "minimum": 0, "maximum": 1}, + }, + }, + "prompt": 'Classify sentiment: "This is the worst product I have ever used."', + "schema_name": "sentiment", +}) + +obj = json.loads(result.output)["object"] +print(obj["sentiment"], obj["confidence"]) # "negative" 0.97 +``` + + + + +## 嵌套 schema 与数组 + +Schema 可以任意深度地嵌套对象和数组,运行时会校验整个结构。这能在一次调用中建模真实的配置文件、清单或 API 载荷。 + + + -// 阶段 2: 确定性结构化提取 -const structured = await session.tool('generate_object', { +```ts +const result = await session.tool('generate_object', { schema: { type: 'object', - required: ['vulnerabilities', 'risk_score'], + required: ['items'], properties: { - vulnerabilities: { + items: { type: 'array', + minItems: 3, + maxItems: 5, items: { type: 'object', - required: ['type', 'severity', 'line'], + required: ['name', 'category'], properties: { - type: { type: 'string' }, - severity: { type: 'string', enum: ['low', 'medium', 'high', 'critical'] }, - line: { type: 'integer' }, - description: { type: 'string' }, + name: { type: 'string' }, + category: { type: 'string', enum: ['fruit', 'vegetable', 'grain'] }, }, }, }, - risk_score: { type: 'number', minimum: 0, maximum: 10 }, }, }, - prompt: `基于以下分析生成结构化漏洞报告:\n\n${analysis.text}`, - schema_name: 'security_report', + prompt: 'List 3 food items with their categories.', + schema_name: 'food_list', }); -``` -## 流式部分对象 - -```ts -const stream = await session.stream('从文档中提取所有实体...'); - -for await (const ev of stream) { - if (ev.type === 'tool_output_delta' && ev.toolName === 'generate_object') { - const { object_partial } = JSON.parse(ev.text); - updateUI(object_partial); // 渐进渲染 - } - if (ev.type === 'tool_end' && ev.toolName === 'generate_object') { - const { object } = JSON.parse(ev.toolOutput); - finalize(object); // 完整校验后的最终结果 - } -} +const { items } = JSON.parse(result.output).object; +console.log(items.length, items.map((i) => i.name)); ``` -## Python + + ```python -from a3s_code import Agent, SessionOptions, PermissionPolicy -import json - -agent = Agent.create(open('config.acl').read()) -opts = SessionOptions() -opts.permission_policy = PermissionPolicy(default_decision="allow") -session = agent.session('.', opts) - result = session.tool("generate_object", { "schema": { "type": "object", @@ -125,18 +193,64 @@ result = session.tool("generate_object", { "properties": { "items": { "type": "array", - "items": {"type": "object", "required": ["name", "price"], - "properties": {"name": {"type": "string"}, "price": {"type": "number"}}} - } - } + "minItems": 3, + "maxItems": 5, + "items": { + "type": "object", + "required": ["name", "category"], + "properties": { + "name": {"type": "string"}, + "category": {"type": "string", "enum": ["fruit", "vegetable", "grain"]}, + }, + }, + }, + }, }, - "prompt": "提取商品: '苹果 ¥10, 香蕉 ¥5, 橙子 ¥8'", - "schema_name": "grocery_list", + "prompt": "List 3 food items with their categories.", + "schema_name": "food_list", }) -data = json.loads(result.output)["object"] +items = json.loads(result.output)["object"]["items"] +print(len(items), [i["name"] for i in items]) ``` + + + +## Agent 自主调用 + +你也可以让 agent 自行决定何时使用结构化输出。让它在 `send` 过程中调用 `generate_object`;它会先收集上下文,再输出对象。 + + + + +```ts +const result = await session.send( + 'Use the generate_object tool to extract the following into an object ' + + 'with fields "title" (string), "year" (integer), "genre" (string): ' + + 'The movie "Inception" was released in 2010 and is a sci-fi thriller.' +); + +console.log(`tool calls: ${result.toolCallsCount}, tokens: ${result.totalTokens}`); +``` + + + + +```python +result = session.send( + 'Use the generate_object tool to produce a JSON object with schema ' + '{"type":"object","required":["language","paradigm"],"properties":' + '{"language":{"type":"string"},"paradigm":{"type":"string"}}} ' + 'for: "Rust is a systems programming language with a focus on safety."' +) + +print(f"tool calls: {result.tool_calls_count}, tokens: {result.total_tokens}") +``` + + + + ## Schema 校验覆盖 内置校验器支持: @@ -149,3 +263,11 @@ data = json.loads(result.output)["object"] - `minimum`、`maximum`、`exclusiveMinimum`、`exclusiveMaximum` - `minItems`、`maxItems`、`items` - 嵌套对象和数组校验 + +## 说明 + +- 校验后的值位于解析后 `result.output` 的 `object` 键上。直接结构化调用传 `mode: 'tool'`,仅 prompt 的回退方式传 `mode: 'prompt'`。 +- 把你依赖的每个字段都列入 `required`——运行时会强制执行,因此缺失或类型错误的字段会导致校验失败,而不是悄悄返回部分数据。 +- `generate_object` 是内置工具,因此 session 需要启用 `builtinSkills` / `builtin_skills`(如上所示)。 + +可运行版本随源码提供,位于 `crates/code/sdk/node/examples/basic/test_generate_object.ts` 和 `crates/code/sdk/python/examples/test_generate_object.py`。 diff --git a/apps/docs/content/docs/cn/code/meta.json b/apps/docs/content/docs/cn/code/meta.json index 69b297b..750acc3 100644 --- a/apps/docs/content/docs/cn/code/meta.json +++ b/apps/docs/content/docs/cn/code/meta.json @@ -8,6 +8,7 @@ "sessions", "commands", "tools", + "verification", "tasks", "teams", "orchestration", @@ -23,6 +24,7 @@ "architecture", "lane-queue", "multi-machine", + "cluster-extension-points", "---扩展---", "providers", "mcp", diff --git a/apps/docs/content/docs/cn/code/multi-machine.mdx b/apps/docs/content/docs/cn/code/multi-machine.mdx index cea951b..c259d09 100644 --- a/apps/docs/content/docs/cn/code/multi-machine.mdx +++ b/apps/docs/content/docs/cn/code/multi-machine.mdx @@ -6,7 +6,7 @@ description: "通过 AgentExecutor seam 将编排步骤分布到多台机器" # 多机器 A3S Code 把多智能体编排表达为代码中的一套*语法*(grammar),再把由此产生的步骤 -放到你希望它运行的任何地方。这一划分沿着**框架 / 宿主(书安OS)边界**展开,于 +放到你希望它运行的任何地方。这一划分沿着**框架 / 宿主边界**展开,于 `[3.4.0]` 引入: - **框架**拥有编排语法和可序列化的数据契约。它从不决定步骤在哪里运行。 @@ -42,7 +42,7 @@ combinators (parallel / pipeline / resumable) 内置的 `TaskExecutor` 把每个步骤作为子 agent 在本地运行——在进程内、基于 Tokio—— 继承会话的 agent 注册表、LLM 客户端、工作区、MCP 工具和 subagent 跟踪器。诸如 -书安OS 的宿主用自己的 executor 替换它,把步骤分布到集群;combinators 不受影响。 +集群运行时这样的宿主用自己的 executor 替换它,把步骤分布到集群;combinators 不受影响。 `concurrency_hint()` 是**建议性的,不是本地硬上限**。本地默认返回会话的 `max_parallel_tasks`;由调度器支撑的宿主可以返回其集群范围的目标值。因为它是 hint diff --git a/apps/docs/content/docs/cn/code/orchestration.mdx b/apps/docs/content/docs/cn/code/orchestration.mdx index eec7343..e82bcda 100644 --- a/apps/docs/content/docs/cn/code/orchestration.mdx +++ b/apps/docs/content/docs/cn/code/orchestration.mdx @@ -22,7 +22,7 @@ explore → verify → review;崩溃后恢复这一批),就用编排。当 - **框架**拥有*语法*——有哪些 step、如何组合、并发*提示*,以及可序列化契约 `AgentStepSpec` / `StepOutcome`。 -- **宿主**(书安OS)拥有*放置*——传输、调度,以及 step 实际在哪里运行。 +- **宿主**拥有*放置*——传输、调度,以及 step 实际在哪里运行。 内置的默认 executor(`TaskExecutor`)在本地、进程内、基于 tokio 运行每个 step。 宿主可以替换为自己的 `AgentExecutor`,把 step 放置到集群各处;组合子从不观察 step diff --git a/apps/docs/content/docs/cn/code/persistence.mdx b/apps/docs/content/docs/cn/code/persistence.mdx index d23287a..4aee1bc 100644 --- a/apps/docs/content/docs/cn/code/persistence.mdx +++ b/apps/docs/content/docs/cn/code/persistence.mdx @@ -55,7 +55,7 @@ console.log(result.totalTokens); ``` ```python -result = await session.resume_run('run-abc123') +result = session.resume_run('run-abc123') print(result.total_tokens) ``` diff --git a/apps/docs/content/docs/cn/code/sessions.mdx b/apps/docs/content/docs/cn/code/sessions.mdx index 480e849..fef8148 100644 --- a/apps/docs/content/docs/cn/code/sessions.mdx +++ b/apps/docs/content/docs/cn/code/sessions.mdx @@ -91,7 +91,7 @@ if (session.isClosed()) { ``` ```python -await session.close() +session.close() if session.is_closed(): # send/stream 现在会以 CodeError::SessionClosed 拒绝 pass @@ -126,9 +126,9 @@ console.log(agent.isClosed()); ``` ```python -ids = await agent.list_sessions() -await agent.close_session(ids[0]) -await agent.close() # 关闭所有剩余 session + 全局 MCP +ids = agent.list_sessions() +agent.close_session(ids[0]) +agent.close() # 关闭所有剩余 session + 全局 MCP print(agent.is_closed()) ``` diff --git a/apps/docs/content/docs/cn/code/verification.mdx b/apps/docs/content/docs/cn/code/verification.mdx new file mode 100644 index 0000000..34d7f1b --- /dev/null +++ b/apps/docs/content/docs/cn/code/verification.mdx @@ -0,0 +1,156 @@ +--- +title: "验证" +description: "用验证命令和报告证明一个回合已完成,而不是轻信模型的声明" +--- + +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +# 验证 + +运行时将"完成"视为必须被**证明**的事实,而不仅仅是被声明的结果。当模型说某个任务已完成时,这句话本身毫无价值。验证把声明转化为证据:你声明一组*必须*成功的命令,运行时执行它们,结果会携带一份你可以检视、据以拦截或呈现给用户的报告。 + +验证是会话级的。Rust 核心运行每条命令,记录其退出状态和输出,并将每份报告汇总为一份随回合结果一同返回的摘要。 + +## 运行验证命令 + +一条验证命令就是一个小而具名的检查:一个 `id`、一个 `kind`、一段可读的 `description`,以及要运行的 `command`。当某个失败应被视为硬失败而非警告时,将该检查标记为 `required`。 + + + + +```ts +const report = await session.verifyCommands('release-readiness', [ + { + id: 'build', + kind: 'build', + description: 'Project compiles', + command: 'cargo build --all-features', + required: true, + timeoutMs: 120000, + }, + { + id: 'tests', + kind: 'test', + description: 'Unit tests pass', + command: 'cargo test', + required: true, + }, +]); + +console.log(report); +``` + + + + +```python +report = session.verify_commands('release-readiness', [ + { + "id": "build", + "kind": "build", + "description": "Project compiles", + "command": "cargo build --all-features", + "required": True, + "timeout_ms": 120000, + }, + { + "id": "tests", + "kind": "test", + "description": "Unit tests pass", + "command": "cargo test", + "required": True, + }, +]) + +print(report) +``` + + + + +`subject`(此处为 `release-readiness`)为这一批检查命名,使同一会话内的多次验证在报告中保持彼此独立。 + +## 读取回合结束后的摘要 + +每个回合的 `send()` 结果同样携带只读的验证字段,因此你无需单独发起一次验证调用即可据结果拦截。用这些字段判断该回合是否真正完成了它所声称的工作。 + + + + +```ts +const result = await session.send('Apply the fix and run the checks'); + +console.log(result.verificationStatus); +console.log(result.pendingVerificationCount); +console.log(result.failedVerificationCount); +console.log(result.verificationReportCount); +console.log(result.verificationSummaryText); + +if (result.failedVerificationCount > 0) { + throw new Error('Turn reported done but verification failed'); +} +``` + + + + +```python +result = session.send('Apply the fix and run the checks') + +print(result.verification_status) +print(result.pending_verification_count) +print(result.failed_verification_count) +print(result.verification_report_count) +print(result.verification_summary_text) + +if result.failed_verification_count > 0: + raise RuntimeError('Turn reported done but verification failed') +``` + + + + +## 检视报告与摘要 + +除了逐回合的字段之外,会话还暴露完整的报告集合、一份结构化摘要、可用的预设,以及一份可读的概要。该概要是向人展示某回合*为何*通过或失败的最快方式。 + + + + +```ts +import { formatVerificationSummary } from '@a3s-lab/code'; + +const reports = session.verificationReports(); +const summary = session.verificationSummary(); +const presets = session.verificationPresets(); + +// Either the session helper or the standalone formatter yields readable text. +console.log(session.verificationSummaryText()); +console.log(formatVerificationSummary(summary)); +``` + + + + +```python +reports = session.verification_reports() +summary = session.verification_summary() +presets = session.verification_presets() + +# The session helper returns a ready-to-print human-readable digest. +print(session.verification_summary_text()) +``` + + + + +`verificationPresets()` 返回运行时内置的检查模板,让你可以基于已知可靠的默认项组合命令列表,而不必手写每一条检查。 + +## 为何重要 + +没有验证,一次智能体运行止于模型的一面之词。有了验证,运行止于可观测的证据:编译通过的构建、跑通的测试套件、保持沉默的代码检查器。摘要文本提供审计轨迹;结果上的计数让你能在自动化中以失败为默认(fail closed)。 + +## 相关 + +- [遥测](/cn/docs/code/telemetry) —— 将追踪事件和验证报告作为运行时证据进行检视。 +- [限制](/cn/docs/code/limits) —— 在验证运行之前限定一个回合能完成多少工作量。 diff --git a/apps/docs/content/docs/en/code/api-contract.mdx b/apps/docs/content/docs/en/code/api-contract.mdx index 56a13b9..b0104a6 100644 --- a/apps/docs/content/docs/en/code/api-contract.mdx +++ b/apps/docs/content/docs/en/code/api-contract.mdx @@ -168,6 +168,8 @@ validated async iteration support. ## Direct Tools +> Full guide: [Tools](/docs/code/tools). + The integration check covers these host-driven direct calls: ```ts @@ -236,6 +238,8 @@ tool set. ## Verification +> Full guide: [Verification](/docs/code/verification). + Verification is session-scoped: ```ts @@ -253,6 +257,8 @@ console.log(formatVerificationSummary(session.verificationSummary())); ## Memory +> Full guide: [Memory](/docs/code/memory). + Node memory was verified with `FileMemoryStore`: ```ts @@ -273,6 +279,8 @@ present on the current Node SDK surface. ## Skills +> Full guide: [Skills](/docs/code/skills). + File-backed and inline skills are verified through `search_skills`: ```ts @@ -296,6 +304,8 @@ The skill-file check uses Markdown with YAML frontmatter and the ## Side Questions +> Full guide: [Sessions](/docs/code/sessions). + `btw()` asks a read-only side question and returns a separate result: ```ts @@ -307,6 +317,8 @@ console.log(side.totalTokens); ## Runs And Cancellation +> Full guide: [Sessions](/docs/code/sessions). + Each `send()` or `stream()` records replayable run state: ```ts @@ -332,6 +344,8 @@ console.log(session.traceEvents()); ## Persistence +> Full guide: [Persistence](/docs/code/persistence) and [Sessions](/docs/code/sessions). + File-backed session persistence was verified with stable `sessionId`, `autoSave`, explicit `save()`, and `resumeSession()`: @@ -374,6 +388,8 @@ session-scoped workers outlive the agent. ## Delegation +> Full guide: [Tasks](/docs/code/tasks) and [Orchestration](/docs/code/orchestration). + The direct helpers for the core delegation tools were verified: ```ts @@ -394,6 +410,8 @@ They return `ToolResult` values from `task` and `parallel_task`. ## Hooks +> Full guide: [Hooks](/docs/code/hooks). + The verified hook management surface is: ```ts @@ -414,6 +432,8 @@ production enforcement gate. ## Slash Commands +> Full guide: [Commands](/docs/code/commands). + Custom slash commands are invoked through `session.send()`: ```ts @@ -428,6 +448,8 @@ console.log(result.text); ## Lane Queue +> Full guide: [Lane Queue](/docs/code/lane-queue). + Queue infrastructure is opt-in: ```ts @@ -448,6 +470,8 @@ Ordinary sessions are queue-free unless `queueConfig` is provided. ## MCP +> Full guide: [MCP](/docs/code/mcp). Idle disconnect: [Cluster Extension Points](/docs/code/cluster-extension-points). + The integration check covers a live stdio MCP server: ```ts @@ -483,7 +507,9 @@ The check does not assert a live AHP server exchange. ## Cluster-grade extension points -These contracts let a cluster control plane (e.g. 书安OS) wire +> Full guide: [Cluster Extension Points](/docs/code/cluster-extension-points) (identity labels, budget guard, cluster events, deterministic IDs/replay, loop checkpoints, retention caps). + +These contracts let a cluster control plane wire multi-tenancy, cost governance, and crash-tolerant runs **without forking the framework**. The framework defines decision points and emits structured events; the host supplies the policy implementations. diff --git a/apps/docs/content/docs/en/code/cluster-extension-points.mdx b/apps/docs/content/docs/en/code/cluster-extension-points.mdx new file mode 100644 index 0000000..a5f51db --- /dev/null +++ b/apps/docs/content/docs/en/code/cluster-extension-points.mdx @@ -0,0 +1,188 @@ +--- +title: "Cluster Extension Points" +description: "The seams a cluster host uses to run long-lived agent sessions across many nodes without forking the framework." +--- + +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +# Cluster Extension Points + +A cluster host platform runs long-lived agent sessions across many nodes. The framework does not ship a scheduler or a placement engine. Instead it exposes a small set of seams: it defines the decision points, emits structured events, and lets the host supply the policy. Everything below is something you wire from outside the framework — you never fork it. + +This page is precise about which seams are available from both SDKs (shown with Node.js + Python code) and which are configured in the Rust core today (described in prose, with SDK wiring to follow). + +## Identity labels + +Every session can carry four opaque identity labels. The framework never interprets them — it propagates them to hooks, traces, and `SessionData`, and restores them on resume. This is how a host attributes a session to a tenant, a principal, an agent template, and a wider correlation chain. + +Pair identity labels with a `sessionStore` / `session_store` so the labels survive a process restart. On resume, **caller-supplied options win**, so you can relabel a session as you move it between nodes. + + + + +```ts +const session = agent.session('/path/to/project', { + tenantId: 'acme-corp', + principal: 'user:42', + agentTemplateId: 'reviewer-v3', + correlationId: 'req-9f2c', +}); + +// Getters return string | null +console.log(session.tenantId); // 'acme-corp' +console.log(session.principal); // 'user:42' +console.log(session.agentTemplateId); // 'reviewer-v3' +console.log(session.correlationId); // 'req-9f2c' +``` + + + + +```python +opts = SessionOptions() +opts.tenant_id = 'acme-corp' +opts.principal = 'user:42' +opts.agent_template_id = 'reviewer-v3' +opts.correlation_id = 'req-9f2c' +session = agent.session('/path/to/project', opts) + +# Getters are methods, return str | None +print(session.tenant_id()) # 'acme-corp' +print(session.principal()) # 'user:42' +print(session.agent_template_id()) # 'reviewer-v3' +print(session.correlation_id()) # 'req-9f2c' +``` + + + + +## Budget / cost guard + +A budget guard lets the host gate every LLM call against a cost or token budget. The framework calls your guard *before* each LLM request and *after* it returns. The guard is policy you own; the framework only enforces the decision you hand back. + + + + +```ts +session.setBudgetGuard({ + checkBeforeLlm(sessionId, estimatedTokens) { + if (overLimit(sessionId, estimatedTokens)) { + return { decision: 'deny', resource: 'tokens', reason: 'monthly cap reached' }; + } + return { decision: 'allow' }; + }, + recordAfterLlm(sessionId, usage) { + meter(sessionId, usage); + }, +}); + +// Clear the guard +session.setBudgetGuard(null); +``` + +Guard callbacks **must not throw** — a thrown error is treated as Allow. + + + + +```python +class MyGuard: + def check_before_llm(self, session_id, estimated_tokens): + if over_limit(session_id, estimated_tokens): + return {'decision': 'deny', 'resource': 'tokens', 'reason': 'monthly cap reached'} + return {'decision': 'allow'} + + def record_after_llm(self, session_id, usage): + meter(session_id, usage) + +opts = SessionOptions() +opts.budget_guard = MyGuard() +session = agent.session('/path/to/project', opts) + +# To clear: set opts.budget_guard = None and re-create the session. +``` + + + + +The decision shape is identical across both SDKs: + +| Return value | Effect | +|---|---| +| `None` / `null` / `{ decision: 'allow' }` | Proceed with the LLM call. | +| `{ decision: 'soft', resource, consumed, limit, message? }` | Emits `BudgetThresholdHit` (kind `soft`) and proceeds. | +| `{ decision: 'deny', resource, reason }` | Aborts the LLM call. Python raises `RuntimeError("Budget exhausted...")`; Node rejects with `"Budget exhausted..."`. | + +Robustness is intentional: a **missing guard method** is treated as the permissive default, and a **callback error falls back to Allow**. A misbehaving guard can never halt a live session — only an explicit `deny` does that. + +## Cluster event vocabulary + +The host emits cluster-level decisions as structured `AgentEvent` variants through its hook executor. In-session hooks subscribe to them uniformly — the same way they observe any other event — so policy authored at the host shows up to the agent's own hooks without special casing. + +The cluster vocabulary is: + +- **`BudgetThresholdHit { resource, kind, consumed, limit, message? }`** — a budget guard returned a `soft` decision (or the host crossed a threshold it tracks itself). `kind` distinguishes soft warnings from harder limits. +- **`PassivationRequested { reason, deadline_ms? }`** — the host is asking the session to reach a safe, persistable state so it can be evicted from this node. `deadline_ms`, when present, is the grace window before forced eviction. +- **`PeerInvocation { from_session_id, from_tenant_id?, correlation_id? }`** — another session invoked this one. The labels let the receiver attribute the call back to its origin tenant and correlation chain. + +These are observed through the same verified hook API your in-session hooks already use — `session.registerHook` in Node, `session.register_hook` in Python (see [Hooks](/docs/code/hooks)). Treat the three variants above as the documented contract; the host is responsible for emitting them via its hook executor. + +## Deterministic IDs and time (replay) + +A cluster that wants **bit-identical replay** of a run on a different node must remove the two sources of nondeterminism in a normal run: random IDs and the wall clock. The Rust core models both behind a `HostEnv { id_generator, clock }`. The default pairs a UUID generator with the system clock; replay tooling swaps in a `SequentialIdGenerator` and a `FixedClock` so that re-executing the same inputs produces the same IDs and timestamps, and therefore the same output, on any node. + +This is configured in the **Rust core today**. It is not yet exposed on the JS/Python option surface, so there is no Node/Python code for it — SDK wiring may follow. + +## Loop checkpoints and run resumption + +With a `sessionStore` / `session_store` configured, the agent loop persists a checkpoint after **each completed tool round**, keyed by run id. Any node that shares the same store can rehydrate the run and continue it. + + + + +```ts +import { FileSessionStore } from '@a3s-lab/code'; + +const session = agent.session(workspace, { + sessionStore: new FileSessionStore('./.a3s/sessions'), + sessionId: 'session-from-node-a', +}); + +const result = await session.resumeRun('run-id-from-node-a'); +``` + + + + +```python +from a3s_code import FileSessionStore + +opts = SessionOptions() +opts.session_store = FileSessionStore('./.a3s/sessions') +opts.session_id = 'session-from-node-a' +session = agent.session(workspace, opts) + +result = session.resume_run('run-id-from-node-a') +``` + + + + +A **new run id** is allocated for the resumed work — the original run is left intact in the store. Two error paths are worth handling: + +- **`resume_run requires a session_store`** — no store was configured; fall back to a fresh session. +- **`no loop checkpoint found for run 'X'`** — the run never reached its first checkpoint, or it was pruned; retry later or treat the run as lost. + +Because checkpoints are taken only **between tool rounds, never mid-tool**, a resumed run never replays a half-executed tool. See [Persistence](/docs/code/persistence) for store details. + +## Retention caps for long-running sessions + +A session that runs for hours or days accumulates state in four in-memory stores: run records, per-run event buffers, trace events, and terminal subagent task snapshots. Left unbounded, these grow with session age — fine for short-lived sessions, a real leak for long-lived ones. + +`SessionRetentionLimits` caps each of the four stores. Every cap is optional: `None` means the unbounded default. Eviction is strict **FIFO**, and **running subagent tasks are never dropped** — only terminal (completed/failed) snapshots are evicted. + +This is configured via the Rust core `SessionRetentionLimits` today; the SDK shapes land in a follow-up, so there is no Node/Python code for it yet. See [Limits](/docs/code/limits) for the per-session resource caps that are already on the SDK surface. + +--- + +**See also:** [Multi-machine](/docs/code/multi-machine) · [Persistence](/docs/code/persistence) · [Limits](/docs/code/limits) · [Hooks](/docs/code/hooks) diff --git a/apps/docs/content/docs/en/code/examples/ahp-safety.mdx b/apps/docs/content/docs/en/code/examples/ahp-safety.mdx index c34d453..5ca6199 100644 --- a/apps/docs/content/docs/en/code/examples/ahp-safety.mdx +++ b/apps/docs/content/docs/en/code/examples/ahp-safety.mdx @@ -1,19 +1,81 @@ --- -title: "AHP Safety" -description: "Attach an external harness to a session" +title: "AHP Transport & Safety" +description: "Attach a session to an external host over an AHP transport and gate its actions with a security provider and permission policy." --- -# AHP Safety +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +# AHP Transport & Safety + +A3S Code sessions can attach to an external host over a pluggable transport (AHP — the Agent Host Protocol). Whatever transport carries the messages, the safety layer is independent: a **security provider** vets tool calls and a **permission policy** decides what runs without a human in the loop. Use this page when you need a remote/host-driven session that still enforces a safe default posture. + +The transport and the safety layer are orthogonal. You can swap `HttpTransport` for a WebSocket or Unix-socket transport without touching the permission rules, and vice versa. + + + ```ts -import { Agent, HttpTransport } from '@a3s-lab/code'; +import { Agent, HttpTransport, DefaultSecurityProvider } from '@a3s-lab/code'; const agent = await Agent.create('agent.acl'); + const session = agent.session('/repo', { + // AHP transport: how the session reaches its external host. ahpTransport: new HttpTransport('http://localhost:8080/ahp', process.env.AHP_TOKEN), + // Safety: the security provider vets each tool call; the permission + // policy decides what runs unattended. These apply regardless of transport. + securityProvider: new DefaultSecurityProvider(), + permissionPolicy: { defaultDecision: 'ask' }, }); + +const result = await session.run('Audit the repo for hardcoded secrets.'); +console.log(result); + +await session.close(); +``` + +Other transports follow the same shape — `WebSocketTransport`, `UnixSocketTransport`, and the in-process `StdioTransport` are all interchangeable here. Set `defaultDecision` to `'deny'` for a stricter, fully-gated posture, or `'allow'` only in trusted, sandboxed environments. + + + + +```python +import os + +from a3s_code import ( + Agent, + SessionOptions, + HttpTransport, + DefaultSecurityProvider, + PermissionPolicy, +) + +agent = Agent.create(open("agent.acl").read()) + +opts = SessionOptions() +# AHP transport: how the session reaches its external host. +opts.transport = HttpTransport("http://localhost:8080/ahp", os.environ["AHP_TOKEN"]) +# Safety: the security provider vets each tool call; the permission +# policy decides what runs unattended. These apply regardless of transport. +opts.security_provider = DefaultSecurityProvider() +opts.permission_policy = PermissionPolicy(default_decision="ask") + +session = agent.session("/repo", opts) + +result = session.run("Audit the repo for hardcoded secrets.") +print(result) + +session.close() ``` -This example verifies the session option shape and transport constructor. Test -your live AHP server before relying on policy, context injection, idle, audit, -or supervision behavior. +Set `default_decision` to `"deny"` for a stricter, fully-gated posture, or `"allow"` only in trusted, sandboxed environments. The transport is independent of the safety layer. + + + + +## Notes + +- **AHP is the transport, not the policy.** Choosing HTTP, WebSocket, or a Unix socket changes *how* the session reaches its host; it does not change *what* the agent is allowed to do. Safety lives entirely in the security provider and permission policy. +- **`DefaultSecurityProvider`** ships a sensible baseline. Supply your own provider when you need to enforce organization-specific rules (path allow-lists, command vetting, redaction). +- **Permission policy** (`defaultDecision` / `default_decision`) is the single most important safety knob. Prefer `'ask'` for interactive use and `'deny'` for unattended runs unless the workspace is sandboxed. +- This example verifies the session option shape and transport constructor. Test your live AHP server before relying on policy, context injection, idle, audit, or supervision behavior. diff --git a/apps/docs/content/docs/en/code/examples/auto-compact.mdx b/apps/docs/content/docs/en/code/examples/auto-compact.mdx index 1dceddf..1783394 100644 --- a/apps/docs/content/docs/en/code/examples/auto-compact.mdx +++ b/apps/docs/content/docs/en/code/examples/auto-compact.mdx @@ -1,18 +1,83 @@ --- title: "Auto Compact" -description: "Keep long sessions inside context budget" +description: "Let the runtime keep long sessions inside the context budget automatically" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Auto Compact +A3S Code can keep long conversations inside the model's context budget for you. +Enable `autoCompact` and the runtime watches context usage; once it crosses +`autoCompactThreshold`, older turns are compacted into a running summary so the +agent stays coherent across many steps without you managing tokens by hand. +Continuation handles the other direction: when a single response is truncated by +length, the runtime automatically continues generating to assemble the full reply. + + + + ```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); + const session = agent.session('/repo', { + // Compact older turns once context fills past the threshold. autoCompact: true, autoCompactThreshold: 0.75, + // Auto-continue a single response that the model truncates by length. continuationEnabled: true, maxContinuationTurns: 3, }); + +// Run a long, multi-step task. The runtime compacts older turns as needed; +// you never touch the token math. +for (let i = 0; i < 50; i++) { + await session.send(`Step ${i}: continue refactoring the parser`); +} + +// Inspect what the session is currently carrying. +console.log('history turns:', session.history().length); +console.log('recent memory:', await session.memoryRecent(5)); + +await session.close(); ``` -Use auto-compaction for long sessions where the host wants the runtime to manage -context pressure. + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open("agent.acl").read()) + +opts = SessionOptions() +# Compact older turns once context fills past the threshold. +opts.auto_compact = True +opts.auto_compact_threshold = 0.75 +# Auto-continue a single response that the model truncates by length. +opts.continuation_enabled = True +opts.max_continuation_turns = 3 +session = agent.session("/repo", opts) + +# Run a long, multi-step task. The runtime compacts older turns as needed; +# you never touch the token math. +for i in range(50): + session.send(f"Step {i}: continue refactoring the parser") + +# Inspect what the session is currently carrying. +print("history turns:", len(session.history())) +print("recent memory:", session.memory_recent(5)) + +session.close() +``` + + + + +Use auto-compaction for long sessions where you want the runtime to manage context +pressure for you. `autoCompactThreshold` / `auto_compact_threshold` is a fraction of +the context window (0.0–1.0, default 0.8) at which compaction kicks in; lower it to +compact earlier. Inspect the live session with `history()` (synchronous) and +`memoryRecent` / `memory_recent`. diff --git a/apps/docs/content/docs/en/code/examples/batch.mdx b/apps/docs/content/docs/en/code/examples/batch.mdx index 93a4bc4..a532f4f 100644 --- a/apps/docs/content/docs/en/code/examples/batch.mdx +++ b/apps/docs/content/docs/en/code/examples/batch.mdx @@ -1,16 +1,84 @@ --- title: "Batch" -description: "Group deterministic operations when appropriate" +description: "Compose deterministic SDK helpers for grouped, model-free operations" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Batch -Batch is an intent-gated tool for grouped operations. Use it when a host workflow or agent turn has a clear list of independent deterministic steps. +There is no `batch()` method in the SDK. When a host workflow or agent turn has a +clear list of independent, deterministic steps, compose them directly from the +session's deterministic helpers (`readFile`, `grep`, `glob`, `ls`, `git`) and +aggregate the results yourself. That keeps the "batch" fully under your control: +no model calls, explicit ordering, and no destructive operations unless you +invoke them. + +The example below reads package metadata, the changelog, and the release script, +then reports version mismatches without editing any files. + + + + +```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session('/path/to/project'); + +// Node helpers are async — group independent reads with Promise.all. +const [pkg, changelog, releaseScript] = await Promise.all([ + session.readFile('package.json'), + session.readFile('CHANGELOG.md'), + session.readFile('scripts/release.sh'), +]); + +const pkgVersion = JSON.parse(pkg).version; +const mismatches = []; +if (!changelog.includes(pkgVersion)) mismatches.push(`CHANGELOG.md is missing ${pkgVersion}`); +if (!releaseScript.includes(pkgVersion)) mismatches.push(`release.sh is missing ${pkgVersion}`); + +console.log(mismatches.length ? mismatches.join('\n') : 'All files agree on the version.'); + +await session.close(); +``` + + + + +```python +import json +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open("agent.acl").read()) +session = agent.session("/path/to/project", SessionOptions()) + +# Python helpers are synchronous — call them in sequence, no await. +pkg = session.read_file("package.json") +changelog = session.read_file("CHANGELOG.md") +release_script = session.read_file("scripts/release.sh") -```text -Run a batch that reads package metadata, changelog, and release script, -then summarize mismatches without editing files. +pkg_version = json.loads(pkg)["version"] +mismatches = [] +if pkg_version not in changelog: + mismatches.append(f"CHANGELOG.md is missing {pkg_version}") +if pkg_version not in release_script: + mismatches.append(f"release.sh is missing {pkg_version}") + +print("\n".join(mismatches) if mismatches else "All files agree on the version.") + +session.close() ``` -Keep destructive operations out of batch. Put any destructive host workflow -behind explicit application approval or automation gates. + + + +Do not mix destructive operations into these grouped reads. Any destructive host +workflow (writes, `git` commits, `bash`) should sit behind explicit application +confirmation or your automation gates, constrained with `permissionPolicy` / +`permission_policy` so no unexpected step runs silently. + +When the steps are **not** independent — each one depends on the previous result +and you want an agent to drive them — use +[`session.pipeline(...)`](/docs/code/examples/orchestration) instead, which runs +the work in stages where each stage receives the prior stage's output. diff --git a/apps/docs/content/docs/en/code/examples/direct-tools.mdx b/apps/docs/content/docs/en/code/examples/direct-tools.mdx index 1951273..81d80e5 100644 --- a/apps/docs/content/docs/en/code/examples/direct-tools.mdx +++ b/apps/docs/content/docs/en/code/examples/direct-tools.mdx @@ -1,22 +1,48 @@ --- title: "Direct Tools" -description: "Run deterministic tools without an LLM turn" +description: "Run deterministic host tools without spending an LLM turn" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Direct Tools +`session.tool(name, args)` (and the typed helpers like `glob`, `grep`, `readFile`) +run a host tool directly, with no model call in the loop. Use them for tests, +migrations, and host-driven workflows where you want deterministic results +instead of an agent turn. For unattended calls, set the permission policy's +`defaultDecision` to `allow` so the tools don't block on a confirmation prompt. + + + + ```ts -const files = await session.glob('content/docs/**/*.mdx'); -const matches = await session.grep('default_model|providers "openai"|baseUrl'); +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session('.', { + permissionPolicy: { defaultDecision: 'allow' }, +}); + +// Glob: list files by pattern +const files = await session.glob('**/*.ts'); +console.log(`glob found ${files.length} TypeScript files`); + +// Grep: search file contents +const matches = await session.grep('Agent.create'); +console.log(`grep found ${matches.length} matches`); + +// Read a file const readme = await session.readFile('README.md'); -const tests = await session.bash('npm run build'); -const raw = await session.tool('read', { file_path: 'README.md' }); +console.log(`README is ${readme.length} bytes`); + +// Direct tool call by name +const raw = await session.tool('read', { file_path: 'package.json' }); +console.log(`package.json via tool(): ${String(raw).length} bytes`); + +// Inspect available tool schemas const schemas = session.toolDefinitions(); -const program = await session.program({ - source: 'export default async function run(ctx, inputs) { return ctx.grep(inputs.q); }', - inputs: { q: 'planningMode' }, - allowedTools: ['grep'], -}); +console.log(`session exposes ${schemas.length} tools`); // Structured output: generate a schema-validated JSON object const structured = await session.tool('generate_object', { @@ -28,11 +54,71 @@ const structured = await session.tool('generate_object', { language: { type: 'string' }, }, }, - prompt: 'How many .mdx files are there? What language are they in?', + prompt: 'How many TypeScript files are in this project?', schema_name: 'file_stats', }); +console.log('structured output:', structured); + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions, PermissionPolicy + +agent = Agent.create(open('agent.acl').read()) +opts = SessionOptions() +opts.permission_policy = PermissionPolicy(default_decision='allow') +session = agent.session('.', opts) + +# Glob: list files by pattern +files = session.glob('**/*.py') +print(f'glob found {len(files)} Python files') + +# Grep: search file contents +matches = session.grep('Agent.create') +print(f'grep found {len(matches)} matches') + +# Read a file +readme = session.read_file('README.md') +print(f'README is {len(readme)} bytes') + +# Direct tool call by name +raw = session.tool('read', {'file_path': 'pyproject.toml'}) +print(f'pyproject.toml via tool(): {len(str(raw))} bytes') + +# Inspect available tool schemas +schemas = session.tool_definitions() +print(f'session exposes {len(schemas)} tools') + +# Structured output: generate a schema-validated JSON object +structured = session.tool('generate_object', { + 'schema': { + 'type': 'object', + 'required': ['count', 'language'], + 'properties': { + 'count': {'type': 'integer'}, + 'language': {'type': 'string'}, + }, + }, + 'prompt': 'How many Python files are in this project?', + 'schema_name': 'file_stats', +}) +print('structured output:', structured) + +session.close() ``` -Direct tools are useful for tests, migrations, and host application workflows. -They execute under the session workspace and should be treated as privileged -host operations. + + + +Direct tools execute under the session workspace and should be treated as +privileged host operations. Most calls (`read`, `glob`, `grep`) are purely +deterministic; `generate_object` is the exception — it still calls the model +to fill a schema-validated JSON object, but you drive it explicitly rather than +through a free-form agent turn. + +A runnable version ships at `crates/code/sdk/node/examples/basic/test_generate_object.ts` +(Python: `crates/code/sdk/python/examples/test_generate_object.py`). diff --git a/apps/docs/content/docs/en/code/examples/external-tasks.mdx b/apps/docs/content/docs/en/code/examples/external-tasks.mdx index 3982801..4eec19b 100644 --- a/apps/docs/content/docs/en/code/examples/external-tasks.mdx +++ b/apps/docs/content/docs/en/code/examples/external-tasks.mdx @@ -1,22 +1,116 @@ --- title: "External Tasks" -description: "Complete work from outside the agent process" +description: "Fulfill agent-queued work from outside the agent process and report structured evidence back" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # External Tasks +Some work cannot run inside the agent process: it belongs to a separate worker, a CI +runner, or a human in another system. When a lane is routed to an external handler, the +tools on that lane are **queued** instead of executed — they wait as external tasks. Your +host code drains the pending queue, does the work however it likes, and reports the +outcome back with `completeExternalTask`. Reach for this only when an outside worker is +genuinely part of your architecture. + +External tasks are produced by the [lane queue](/docs/code/examples/lane-queue): you must +register at least one `external` (or `hybrid`) lane handler first, otherwise every task +runs in-process and there is nothing to drain. + + + + ```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session(process.cwd()); + +// Route a lane to an external worker so its tools are queued, not executed. +await session.setLaneHandler('execute', { mode: 'external', timeoutMs: 300000 }); + +// Drain the tasks waiting for the host to fulfill. const pending = await session.pendingExternalTasks(); -if (pending.length > 0) { - await session.completeExternalTask(pending[0].id, { - success: true, - result: { - summary: 'worker completed the test run', - command: 'npm run build', - }, - }); +for (const task of pending) { + console.log(`pending: ${task.task_id} on ${task.lane} (${task.command_type})`); + + try { + // ...the host does the real work here (run CI, call a service, ask a human)... + const ok = await session.completeExternalTask(task.task_id, { + success: true, + result: { + summary: 'worker completed the test run', + command: 'npm run build', + exitCode: 0, + }, + }); + console.log('completed:', ok); + } catch (err) { + await session.completeExternalTask(task.task_id, { + success: false, + error: String(err), + }); + } } + +await session.close(); +``` + + + + +```python +import os +from a3s_code import Agent + +agent = Agent.create(open("agent.acl").read()) +session = agent.session(os.getcwd()) + +# Route a lane to an external worker so its tools are queued, not executed. +session.set_lane_handler("execute", "external", 300000) + +# Drain the tasks waiting for the host to fulfill. +pending = session.pending_external_tasks() + +for task in pending: + print(f"pending: {task['task_id']} on {task['lane']} ({task['command_type']})") + + try: + # ...the host does the real work here (run CI, call a service, ask a human)... + ok = session.complete_external_task( + task["task_id"], + success=True, + result={ + "summary": "worker completed the test run", + "command": "npm run build", + "exit_code": 0, + }, + ) + print("completed:", ok) + except Exception as err: + session.complete_external_task( + task["task_id"], + success=False, + error=str(err), + ) + +session.close() ``` -External workers should return compact structured evidence, not raw logs only. + + + +Notes: + +- Each pending task carries `task_id`, `session_id`, `lane`, `command_type`, `payload`, and + `timeout_ms`. Pass the `task_id` back to `completeExternalTask` / `complete_external_task` + to match the completion to the right task. +- The result shape is `{ success, result?, error? }`. `result` holds any + JSON-serializable payload; `error` is an optional message for failures. +- On success, return compact structured evidence (a summary plus the key facts), not raw + logs only — the agent reasons over the result, so keep it small and machine-readable. +- `completeExternalTask` / `complete_external_task` returns `true` when the task was found + and completed, `false` otherwise. In Python these queue methods are synchronous; in Node + `pendingExternalTasks` and `completeExternalTask` return promises. diff --git a/apps/docs/content/docs/en/code/examples/git-worktree.mdx b/apps/docs/content/docs/en/code/examples/git-worktree.mdx index ae690a4..5cac523 100644 --- a/apps/docs/content/docs/en/code/examples/git-worktree.mdx +++ b/apps/docs/content/docs/en/code/examples/git-worktree.mdx @@ -1,15 +1,107 @@ --- title: "Git Worktree" -description: "Use git through the session tool surface" +description: "Drive git and git worktrees through the session git tool" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Git Worktree +The session `git` tool runs git as a privileged host operation. It accepts a +structured command object (`command`, plus `subcommand`/`name`/`path` for +worktrees) and returns a tool result with `output` and `exitCode`. This example +inspects a repo, then creates, lists, and removes a worktree directly through the +tool surface. + + + + ```ts -const status = await session.git('status'); -const diff = await session.git('diff'); -const log = await session.git('log', undefined, undefined, undefined, undefined, undefined, undefined, 5); +import { Agent } from '@a3s-lab/code'; +import * as path from 'path'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session('/path/to/repo'); + +// Inspect the repo +const status = await session.git({ command: 'status' }); +console.log(status.output); + +// Create a worktree on a new branch +const wtPath = path.join('/path/to/repo', 'wt-feature-auth'); +const created = await session.git({ + command: 'worktree', + subcommand: 'create', + name: 'feature-auth', + path: wtPath, +}); +if (created.exitCode !== 0) throw new Error(`create failed: ${created.output}`); + +// List worktrees +const list = await session.git({ command: 'worktree', subcommand: 'list' }); +console.log(list.output); + +// Remove the worktree when done +const removed = await session.git({ + command: 'worktree', + subcommand: 'remove', + path: wtPath, +}); +if (removed.exitCode !== 0) throw new Error(`remove failed: ${removed.output}`); + +await session.close(); ``` + + + +```python +import os +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open("agent.acl").read()) +session = agent.session("/path/to/repo", SessionOptions()) + +# Inspect the repo +status = session.git({"command": "status"}) +print(status.output) + +# Create a worktree on a new branch +wt_path = os.path.join("/path/to/repo", "wt-feature-auth") +created = session.git({ + "command": "worktree", + "subcommand": "create", + "name": "feature-auth", + "path": wt_path, +}) +if created.exit_code != 0: + raise RuntimeError(f"create failed: {created.output}") + +# List worktrees +listing = session.git({"command": "worktree", "subcommand": "list"}) +print(listing.output) + +# Remove the worktree when done +removed = session.git({ + "command": "worktree", + "subcommand": "remove", + "path": wt_path, +}) +if removed.exit_code != 0: + raise RuntimeError(f"remove failed: {removed.output}") + +session.close() +``` + + + + +Pass a command object rather than positional arguments: `{ command: 'status' }`, +`{ command: 'diff' }`, or `{ command: 'worktree', subcommand: 'list' }`. Each +call returns a tool result, so check `exitCode` (Node) / `exit_code` (Python) +before trusting the output. + Direct git calls are privileged host operations. Put push, publish, and release workflows behind application-level approval or automation gates. + +A runnable version ships at `crates/code/sdk/node/examples/git/test_worktree_git.ts`. diff --git a/apps/docs/content/docs/en/code/examples/hooks.mdx b/apps/docs/content/docs/en/code/examples/hooks.mdx index fdaebb8..6821991 100644 --- a/apps/docs/content/docs/en/code/examples/hooks.mdx +++ b/apps/docs/content/docs/en/code/examples/hooks.mdx @@ -1,11 +1,32 @@ --- -title: "Hooks" -description: "Register lifecycle event callbacks" +title: "Lifecycle Hooks" +description: "Register, count, and unregister lifecycle event callbacks that observe and gate agent activity." --- -# Hooks +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +# Lifecycle Hooks + +Hooks let you observe and gate agent activity as it happens. You register a named +callback against a lifecycle event, the runtime invokes it at that point, and the +callback returns a decision such as `{ action: "continue" }`. Use hooks for +auditing, redaction, logging, or enforcing policy without changing the agent's +prompt. + +The lifecycle is symmetric: `registerHook` adds a callback by name, `hookCount` +tells you how many are active, and `unregisterHook` removes one by its name. + + + ```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session(process.cwd()); + +// Register a named hook on a lifecycle event. The callback must NOT throw — +// always return a decision such as { action: 'continue' }. session.registerHook( 'observe-env-read', 'pre_tool_use', @@ -14,8 +35,60 @@ session.registerHook( () => ({ action: 'continue' }), ); -console.log(session.hookCount()); +console.log('active hooks:', session.hookCount()); // 1 + +await session.run('Read the project README and summarize it.'); + +// Remove the hook by name when you no longer need it. session.unregisterHook('observe-env-read'); +console.log('active hooks:', session.hookCount()); // 0 + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open('agent.acl').read()) +session = agent.session('.', SessionOptions()) + +# Register a named hook on a lifecycle event. The callback returns a decision. +session.register_hook( + 'observe-env-read', + 'pre_tool_use', + {'pathPattern': '**/.env*'}, + {'priority': 100}, + lambda: {'action': 'continue'}, +) + +print("active hooks:", session.hook_count()) # 1 + +session.run("Read the project README and summarize it.") + +# Remove the hook by name when you no longer need it. +session.unregister_hook('observe-env-read') +print("active hooks:", session.hook_count()) # 0 + +session.close() ``` -Validate the event path you depend on before using a hook as a production gate. + + + +Notes: + +- A hook callback returns a decision. Return `{ action: "continue" }` + (Node) / `{"action": "continue"}` (Python) to let the agent proceed. +- The matcher (`{ pathPattern: '**/.env*' }`) scopes the hook to events whose + path matches the pattern, and `{ priority: 100 }` orders hooks on the same + event (higher runs first). +- Node hook callbacks must **not** throw — an uncaught throw can abort the + process. Keep the handler body total and always return a decision. +- `hookCount` / `hook_count` reflects the number of currently registered hooks, + which is handy in tests to assert that registration and cleanup happened. +- `unregisterHook` / `unregister_hook` takes the name you registered with. + Always tear down hooks you no longer need so they do not leak across runs. +- Validate the event path you depend on before using a hook as a production gate. diff --git a/apps/docs/content/docs/en/code/examples/index.mdx b/apps/docs/content/docs/en/code/examples/index.mdx index f9938d1..8320ef5 100644 --- a/apps/docs/content/docs/en/code/examples/index.mdx +++ b/apps/docs/content/docs/en/code/examples/index.mdx @@ -1,10 +1,19 @@ --- title: "Examples" -description: "v3.1.0 examples for A3S Code" +description: "v3.4.0 examples for A3S Code" --- # Examples -These examples use A3S Code v3.1.0 concepts: ACL configuration, explicit environment-variable credentials, session APIs, task-based delegation, automatic subagent delegation, `.a3s/agents`, AHP, direct tools, verification, and optional queue infrastructure. +These examples use A3S Code v3.4.0 concepts: ACL configuration, explicit environment-variable credentials, session APIs, streaming, structured output, task-based and automatic subagent delegation, programmable orchestration (`parallel` / `pipeline` / `parallelResumable`), `.a3s/agents`, AHP safety, skills, memory, direct tools, verification, git workflows, and optional MCP/queue infrastructure. -Start with [Quick Start](/docs/code/examples/quick-start), then move to streaming, model switching, hooks, security, and release-style verification. +Start with the [Quick Start](/docs/code/examples/quick-start), then explore by area: + +- **Sessions & runtime** — [Quick Start](/docs/code/examples/quick-start), [Streaming](/docs/code/examples/streaming), [Model switching](/docs/code/examples/model-switching), [Auto-compact](/docs/code/examples/auto-compact) +- **Structured & programmable** — [Structured output](/docs/code/examples/structured-output), [Orchestration](/docs/code/examples/orchestration), [Planning](/docs/code/examples/planning), [Batch](/docs/code/examples/batch) +- **Tools & context** — [Direct tools](/docs/code/examples/direct-tools), [ripgrep context](/docs/code/examples/ripgrep-context), [Prompt slots](/docs/code/examples/prompt-slots), [Git worktree](/docs/code/examples/git-worktree) +- **Skills & memory** — [Skills](/docs/code/examples/skills), [Skill tool](/docs/code/examples/skill-tool), [Memory](/docs/code/examples/memory), [Hooks](/docs/code/examples/hooks) +- **Security & verification** — [Security](/docs/code/examples/security), [AHP safety](/docs/code/examples/ahp-safety) +- **MCP & queues** — [Lane queue](/docs/code/examples/lane-queue), [External tasks](/docs/code/examples/external-tasks) + +New in v3.4.0: see the [Orchestration](/docs/code/examples/orchestration) example for fan-out (`parallel`), staged (`pipeline`), and resumable (`parallelResumable`) multi-agent workflows. diff --git a/apps/docs/content/docs/en/code/examples/lane-queue.mdx b/apps/docs/content/docs/en/code/examples/lane-queue.mdx index 6b9cab1..3f44d55 100644 --- a/apps/docs/content/docs/en/code/examples/lane-queue.mdx +++ b/apps/docs/content/docs/en/code/examples/lane-queue.mdx @@ -1,17 +1,88 @@ --- title: "Lane Queue" -description: "Explicit external dispatch example" +description: "Route a lane to an external worker and drain its pending tasks explicitly." --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Lane Queue +By default A3S Code runs every task in-process, with no queue. The lane queue is **optional infrastructure**: register an external handler for a lane and the tools routed to that lane are queued for an outside worker instead of being executed by the agent. You then drain the pending tasks, run them however you like, and report results back. Reach for this only when an external worker is genuinely part of your architecture. + +The four lanes are `control`, `query`, `execute`, and `generate`. Each handler has a `mode` of `internal` (the default), `external`, or `hybrid`. + + + + ```ts -const session = agent.session('/repo', { - queueConfig: { enableDlq: true, enableMetrics: true }, -}); +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('./agent.acl'); +const session = agent.session(process.cwd(), { builtinSkills: true }); +// Route the "execute" lane to an external worker. +// Tools on this lane are NOT run by the agent; they are queued for +// an outside worker to pick up and complete. await session.setLaneHandler('execute', { mode: 'external', timeoutMs: 300000 }); + +// hasQueue() is false until at least one external/hybrid lane is registered. +console.log('queue active:', session.hasQueue()); + +// Drain whatever is waiting for an external worker. const pending = await session.pendingExternalTasks(); +for (const task of pending) { + console.log('pending:', task.task_id, task.lane, task.command_type); + + // ... hand off to your worker, run it, then report the outcome back: + await session.completeExternalTask(task.task_id, { + success: true, + result: { note: 'done by external worker' }, + }); +} + +console.log('lane queue drained'); +``` + + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open("agent.acl").read()) +opts = SessionOptions() +opts.builtin_skills = True +session = agent.session(".", opts) + +# Route the "execute" lane to an external worker. +# Tools on this lane are NOT run by the agent; they are queued for +# an outside worker to pick up and complete. +session.set_lane_handler("execute", "external", 300000) + +# has_queue() is False until at least one external/hybrid lane is registered. +print("queue active:", session.has_queue()) + +# Drain whatever is waiting for an external worker. +pending = session.pending_external_tasks() +for task in pending: + print("pending:", task["task_id"], task["lane"], task["command_type"]) + + # ... hand off to your worker, run it, then report the outcome back: + session.complete_external_task( + task["task_id"], + success=True, + result={"note": "done by external worker"}, + ) + +print("lane queue drained") ``` -Use queues only when an external worker is part of the architecture. + + + +Notes: + +- **The default path is queue-free.** `hasQueue()` / `has_queue()` returns `false` until you register at least one `external` (or `hybrid`) lane handler. If you never call `setLaneHandler` / `set_lane_handler`, every task runs in-process and there is nothing to drain. +- Each pending task carries `task_id`, `session_id`, `lane`, `command_type`, `payload`, and `timeout_ms`. Pass the `task_id` back to `completeExternalTask` / `complete_external_task` once the work is done. +- The result shape is `{ success, result?, error? }` — `result` holds any JSON-serializable payload, and `error` is an optional message for failures. `completeExternalTask` / `complete_external_task` returns `true` if the task was found and completed, `false` otherwise. +- In Python these queue methods are synchronous; in Node `setLaneHandler`, `pendingExternalTasks`, and `completeExternalTask` return promises, while `hasQueue` is synchronous. diff --git a/apps/docs/content/docs/en/code/examples/memory.mdx b/apps/docs/content/docs/en/code/examples/memory.mdx index e311d3e..75d66ba 100644 --- a/apps/docs/content/docs/en/code/examples/memory.mdx +++ b/apps/docs/content/docs/en/code/examples/memory.mdx @@ -1,17 +1,86 @@ --- title: "Memory" -description: "Remember and recall task facts" +description: "Remember task outcomes and recall them later by similarity, tags, or recency." --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Memory +Persistent memory lets a session record what worked (and what didn't) and pull +those facts back later. Attach a `FileMemoryStore` so entries survive across +runs, write outcomes with `rememberSuccess` / `rememberFailure`, then retrieve +them with `recallSimilar`, `recallByTags`, or `memoryRecent`. + + + + ```ts -import { FileMemoryStore } from '@a3s-lab/code'; +import { Agent, FileMemoryStore } from '@a3s-lab/code'; +const agent = await Agent.create('agent.acl'); const session = agent.session('/repo', { memoryStore: new FileMemoryStore('./.a3s/memory'), }); -await session.rememberSuccess('docs rewrite', ['grep', 'read'], 'stale HCL references removed'); -const memories = await session.recallSimilar('docs release rewrite', 5); +// Record outcomes as the agent works. +await session.rememberSuccess( + 'refactored auth module', + ['read', 'edit', 'bash'], + 'all tests passed after extracting AuthService', +); +await session.rememberFailure( + 'migration attempt', + ['bash'], + 'psql connection refused on port 5432', +); + +// Recall later — by recency, by tool tags, or by semantic similarity. +const recent = await session.memoryRecent(10); +const byTags = await session.recallByTags(['read', 'edit'], 5); +const similar = await session.recallSimilar('auth refactor', 5); + +console.log(recent.length, byTags.length, similar.length); +``` + + + + +```python +from a3s_code import Agent, SessionOptions, FileMemoryStore + +agent = Agent.create(open("agent.acl").read()) +opts = SessionOptions() +opts.memory_store = FileMemoryStore("./.a3s/memory") +session = agent.session("/repo", opts) + +# Record outcomes as the agent works. Python helpers are synchronous — no await. +session.remember_success( + "refactored auth module", + ["read", "edit", "bash"], + "all tests passed after extracting AuthService", +) +session.remember_failure( + "migration attempt", + ["bash"], + "psql connection refused on port 5432", +) + +# Recall later — by recency, by tool tags, or by semantic similarity. +recent = session.memory_recent(10) +by_tags = session.recall_by_tags(["read", "edit"], 5) +similar = session.recall_similar("auth refactor", 5) + +print(len(recent), len(by_tags), len(similar)) ``` + + + + +Both `rememberSuccess` and `rememberFailure` take a short task description, the +list of tools involved (which double as searchable tags), and the outcome text. +The three recall methods are complementary: `memoryRecent(limit)` returns the +newest entries, `recallByTags(tags, limit)` filters by the tool tags you +recorded, and `recallSimilar(query, limit)` ranks entries by semantic relevance +to a query. Without a `memoryStore`, memory is in-process only and is discarded +when the session closes. diff --git a/apps/docs/content/docs/en/code/examples/meta.json b/apps/docs/content/docs/en/code/examples/meta.json index b711ca8..5ade942 100644 --- a/apps/docs/content/docs/en/code/examples/meta.json +++ b/apps/docs/content/docs/en/code/examples/meta.json @@ -5,6 +5,7 @@ "quick-start", "streaming", "structured-output", + "orchestration", "model-switching", "direct-tools", "planning", diff --git a/apps/docs/content/docs/en/code/examples/model-switching.mdx b/apps/docs/content/docs/en/code/examples/model-switching.mdx index e3e85c2..cc917fc 100644 --- a/apps/docs/content/docs/en/code/examples/model-switching.mdx +++ b/apps/docs/content/docs/en/code/examples/model-switching.mdx @@ -1,11 +1,21 @@ --- title: "Model Switching" -description: "Override the configured model per session" +description: "Choose the model per session, and override it per worker agent for cost and capability tuning." --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Model Switching -Configure multiple models: +A session runs against whatever model you pass in the `model` option. Declare +the models your agent can reach once, then pick one per session — a fast model +for high-volume, low-stakes work and a stronger model for review. Use this when +you want to balance cost against capability without changing any of your prompts. + +## Declaring models + +Models are configured in your agent file. Each provider lists the models it +exposes, and `default_model` is used when a session does not set `model`. ```text default_model = "openai/MiniMax-M2.7-highspeed" @@ -19,12 +29,138 @@ providers "openai" { } ``` -Override per session: +## Per-session model + +The `model` option is set when you open the session. Everything that session +runs — `send`, `run`, `task`, `parallel`, `pipeline` — uses that model. One +agent configuration can drive different model choices for different sessions. + + + ```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); + +// A fast model for high-volume, low-stakes work. const fast = agent.session('/repo', { model: 'openai/MiniMax-M2.7-highspeed' }); +const draft = await fast.run('Draft a short README intro for this project.'); +console.log('draft:', draft); +await fast.close(); + +// A stronger model for review / higher-stakes reasoning. const review = agent.session('/repo', { model: 'openai/gpt-4o' }); +const critique = await review.run(`Critique this README intro:\n${draft}`); +console.log('critique:', critique); +await review.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open('agent.acl').read()) + +# A fast model for high-volume, low-stakes work. +fast_opts = SessionOptions() +fast_opts.model = 'openai/MiniMax-M2.7-highspeed' +fast = agent.session('/repo', fast_opts) +draft = fast.run('Draft a short README intro for this project.') +print('draft:', draft) +fast.close() + +# A stronger model for review / higher-stakes reasoning. +review_opts = SessionOptions() +review_opts.model = 'openai/gpt-4o' +review = agent.session('/repo', review_opts) +critique = review.run(f'Critique this README intro:\n{draft}') +print('critique:', critique) +review.close() ``` -Per-session `model` override lets one agent configuration drive different model -choices for different sessions. + + + +## Per-worker-agent model override + +Worker agents are registered with their own spec. Give a worker its own `model` +so it runs on a different (often smaller, cheaper) model than the session that +delegates to it. The orchestrating session keeps its own `model`; only the +delegated work runs on the worker's model. + + + + +```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); + +const session = agent.session('/repo', { + // Orchestrator stays on the stronger model. + model: 'openai/gpt-4o', + // High-volume exploration runs on the cheaper model. + workerAgents: [ + { + name: 'scout', + description: 'Reads files and reports findings.', + model: 'openai/MiniMax-M2.7-highspeed', + }, + ], +}); + +// Delegate exploration to the cheaper worker, then reason on the strong model. +const findings = await session.task('scout', 'List every public API in src/.'); +const plan = await session.run(`Given these findings, propose a refactor:\n${findings}`); +console.log(plan); + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions, WorkerAgentSpec + +agent = Agent.create(open('agent.acl').read()) + +opts = SessionOptions() +# Orchestrator stays on the stronger model. +opts.model = 'openai/gpt-4o' +# High-volume exploration runs on the cheaper model. +opts.worker_agents = [ + WorkerAgentSpec( + name='scout', + description='Reads files and reports findings.', + model='openai/MiniMax-M2.7-highspeed', + ), +] +session = agent.session('/repo', opts) + +# Delegate exploration to the cheaper worker, then reason on the strong model. +findings = session.task('scout', 'List every public API in src/.') +plan = session.run(f'Given these findings, propose a refactor:\n{findings}') +print(plan) + +session.close() +``` + + + + +Notes: + +- The `model` value is an identifier string your runtime resolves to one of the + models declared in your agent file — there are no hard-coded model names in the + SDK. +- A worker agent's `model` applies only to that agent's delegated work. The + session's own `send`/`run`/`task` calls still use the session `model`. +- A worker without a `model` inherits the session `model`, so you only override + the agents where a different model actually pays off. + +A runnable version showing the `model` option on a session ships at +`crates/code/sdk/node/examples/basic/test_api_alignment.ts`. diff --git a/apps/docs/content/docs/en/code/examples/orchestration.mdx b/apps/docs/content/docs/en/code/examples/orchestration.mdx new file mode 100644 index 0000000..9326a74 --- /dev/null +++ b/apps/docs/content/docs/en/code/examples/orchestration.mdx @@ -0,0 +1,237 @@ +--- +title: "Orchestration" +description: "Fan out independent work with session.parallel, build per-item chains with session.pipeline, and resume journaled runs with session.parallelResumable." +--- + +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +# Orchestration + +This page shows the three programmable orchestration primitives in A3S Code v3.4.0: `session.parallel` for fan-out, `session.pipeline` for per-item multi-stage chains, and `session.parallelResumable` for journaled runs that survive a crash. Use orchestration when you have several independent subagent tasks (parallel), or one transformation that flows through ordered stages per input (pipeline). + +For the conceptual model behind these primitives, see [Orchestration](/docs/code/orchestration). + +## Fan-out with `session.parallel` + +`parallel` takes an array of `AgentStepSpec` and runs them concurrently, returning one `StepOutcome` per spec **in input order** (not completion order). Each spec routes to a named subagent (`explore`, `plan`, `review`, `verification`, `general`, ...). Set `outputSchema` / `output_schema` on a spec to get a schema-validated `structured` result back. + + + + +```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session('.', {}); + +// Independent steps; outcomes come back in input order, not completion order. +const outcomes = await session.parallel([ + { taskId: 'langs', agent: 'general', description: 'list', prompt: 'Name three systems languages.', maxSteps: 2 }, + { taskId: 'safe', agent: 'general', description: 'classify', prompt: 'Is Rust memory-safe without a GC? yes/no.', maxSteps: 2 }, +]); + +for (const o of outcomes) { + console.log(`[parallel] ${o.taskId}: success=${o.success}`); +} + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open("agent.acl").read()) +session = agent.session(".", SessionOptions()) + +# Independent steps; outcomes come back in input order, not completion order. +outcomes = session.parallel([ + { + "task_id": "langs", + "agent": "general", + "description": "list languages", + "prompt": "Name three systems programming languages, comma-separated.", + "max_steps": 2, + }, + { + "task_id": "verdict", + "agent": "general", + "description": "classify", + "prompt": "Is Rust memory-safe without a GC? Answer yes or no.", + "max_steps": 2, + # Schema-validated structured output for this step. + "output_schema": { + "type": "object", + "properties": {"memory_safe": {"type": "boolean"}}, + "required": ["memory_safe"], + }, + }, +]) + +for o in outcomes: + print(f"[parallel] {o['task_id']}: success={o['success']} structured={o.get('structured')}") + +session.close() +``` + + + + +Outcomes are dicts in Python (`o['task_id']`, `o['success']`, `o.get('structured')`) and objects in Node (`o.taskId`, `o.success`, `o.structured`). The `maxParallelTasks` / `max_parallel_tasks` session option caps concurrency; extra specs queue, and the outcome array is still returned in full, in order. + +## Per-item chains with `session.pipeline` + +`pipeline` takes a list of input `items` and an ordered list of `stages`. Each item flows through the stages independently — there is **no barrier between stages**, so a fast item can reach stage 2 while a slow item is still in stage 1. A stage callback receives a `ctx`: the first stage sees `ctx.item`, later stages see `ctx.previous` (the prior `StepOutcome`, whose `.output` you build on). Return the next spec to continue, or `null` / `None` to stop that item's chain early. + + + + +```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session('.', {}); + +// Stage 2 builds on stage 1's output. A stage callback MUST NOT throw — +// return null to stop this item's chain. +const results = await session.pipeline( + ['the Rust programming language'], + [ + (ctx) => ({ + taskId: 'sum', + agent: 'general', + description: 'summarize', + prompt: `In one sentence, what is ${ctx.item}?`, + maxSteps: 2, + }), + (ctx) => ({ + taskId: 'cls', + agent: 'general', + description: 'classify', + prompt: `Reply YES or NO: does this describe a programming language?\n\n${ctx.previous.output}`, + maxSteps: 2, + }), + ], +); + +for (const r of results) { + console.log(`[pipeline] final=${r === null ? null : JSON.stringify(r.output.slice(0, 60))}`); +} + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open("agent.acl").read()) +session = agent.session(".", SessionOptions()) + +# Each item chains through stages; stage 2 builds on stage 1. Return None from a +# stage (or raise — caught and treated as None) to stop that item's chain. +results = session.pipeline( + ["the Rust programming language"], + [ + lambda ctx: { + "task_id": "summarize", + "agent": "general", + "description": "summarize", + "prompt": f"In one sentence, what is {ctx['item']}?", + "max_steps": 2, + }, + lambda ctx: { + "task_id": "classify", + "agent": "general", + "description": "classify", + "prompt": "Reply with one word YES or NO: does this describe a " + f"programming language?\n\n{ctx['previous']['output']}", + "max_steps": 2, + }, + ], +) + +for r in results: + print(f"[pipeline] final={None if r is None else r['output'][:60]!r}") + +session.close() +``` + + + + +Key difference from `parallel`: stages are ordered and dependent, but items do **not** wait for each other between stages. Node stage callbacks must never throw — return `null` on error; Python stages may raise (a raised stage is caught and treated as `None`). + +## Resumable runs with `session.parallelResumable` + +`parallelResumable` is `parallel` with a journal. It takes the `specs` first and a stable `workflowId` second; each step's outcome is journaled to the session's store, so if the process crashes mid-run you can call it again with the same `workflowId` and completed steps are replayed from the journal instead of re-executed. It **requires a session store** — pass `sessionStore` / `session_store` when opening the session, or the call throws. + + + + +```ts +import { Agent, FileSessionStore } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +// parallelResumable journals to the session store; it throws without one. +const session = agent.session('.', { + sessionStore: new FileSessionStore('./.a3s/sessions'), +}); + +// Signature is (specs, workflowId): specs first, stable workflowId second. +const outcomes = await session.parallelResumable( + [ + { taskId: 'deps', agent: 'general', description: 'audit deps', prompt: 'Check manifests for outdated dependencies.', maxSteps: 2 }, + { taskId: 'tests', agent: 'verification', description: 'run tests', prompt: 'Run the test suite and summarize failures.', maxSteps: 2 }, + ], + 'nightly-audit', +); + +// Re-running with the same workflowId replays completed steps from the journal. +console.log(outcomes.map((o) => `${o.taskId}:${o.success}`).join(' ')); + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions, FileSessionStore + +agent = Agent.create(open("agent.acl").read()) +# parallel_resumable journals to the session store; it raises without one. +opts = SessionOptions() +opts.session_store = FileSessionStore("./.a3s/sessions") +session = agent.session(".", opts) + +# Signature is (specs, workflow_id): specs first, stable workflow_id second. +outcomes = session.parallel_resumable( + [ + {"task_id": "deps", "agent": "general", "description": "audit deps", "prompt": "Check manifests for outdated dependencies.", "max_steps": 2}, + {"task_id": "tests", "agent": "verification", "description": "run tests", "prompt": "Run the test suite and summarize failures.", "max_steps": 2}, + ], + "nightly-audit", +) + +# Re-running with the same workflow_id replays completed steps from the journal. +print(" ".join(f"{o['task_id']}:{o['success']}" for o in outcomes)) + +session.close() +``` + + + + +Notes: + +- All three primitives return outcomes aligned to input order: `{ taskId, success, output, error?, structured? }` (Node objects) / `{ "task_id", "success", "output", "error"?, "structured"? }` (Python dicts). +- Set `outputSchema` / `output_schema` on a spec to get a parsed result back in `structured`. +- `maxSteps` / `max_steps` caps the steps per subagent; `maxParallelTasks` / `max_parallel_tasks` (session option) caps fan-out concurrency. +- Node pipeline stage callbacks must never throw — return `null` on error. Python stages may raise (a raised stage is caught and treated as `None`). + +A runnable version ships at `crates/code/sdk/node/examples/orchestration/parallel-pipeline.mjs` and `crates/code/sdk/python/examples/orchestration_workflow.py`. diff --git a/apps/docs/content/docs/en/code/examples/planning.mdx b/apps/docs/content/docs/en/code/examples/planning.mdx index a2be065..b56d859 100644 --- a/apps/docs/content/docs/en/code/examples/planning.mdx +++ b/apps/docs/content/docs/en/code/examples/planning.mdx @@ -1,24 +1,73 @@ --- title: "Planning" -description: "Enable planning and goal tracking" +description: "Make the agent plan before it acts with planningMode" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Planning +Planning mode tells the session to produce a structured plan before it starts +calling tools. Use it for multi-step work (refactors, release reviews, audits) +where you want the agent to decompose the goal first instead of jumping straight +into edits. + +Set it through the session option `planningMode` (Node) / `planning_mode` +(Python). The accepted values are: + +| Value | Behavior | +| ------------ | --------------------------------------------------------------- | +| `"auto"` | The runtime detects from the message when a plan is worthwhile. | +| `"enabled"` | Force a plan on every request, even simple ones. | +| `"disabled"` | Skip planning entirely for the lowest-latency path. | + +`"auto"` is the default structured pre-analysis path. + + + + ```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); const session = agent.session('/repo', { - planningMode: 'enabled', - goalTracking: true, - maxToolRounds: 24, + planningMode: 'auto', }); -const result = await session.send('Plan and complete the release-readiness review'); +const result = await session.send( + 'Plan and complete the release-readiness review.', +); console.log(result.text); +console.log(`${result.toolCallsCount} tool calls executed`); + +await session.close(); ``` -`planningMode: 'auto'` is the default structured pre-analysis path, -`'enabled'` forces planning, and `'disabled'` turns it off for low-latency -requests. Planning state is attached to the run so a host UI can render a -TaskList and update task completion from run events. + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open("agent.acl").read()) +opts = SessionOptions() +opts.planning_mode = "auto" +session = agent.session("/repo", opts) + +result = session.send("Plan and complete the release-readiness review.") +print(result.text) +print(f"{result.tool_calls_count} tool calls executed") + +session.close() +``` + + + + +Planning state is attached to the run, so a host UI can render a task list from +run events and update completion as the agent works. Planning organizes the +work; verification commands still provide the completion evidence. -Verification commands still provide the completion evidence. +Runnable session examples ship at +`crates/code/sdk/node/examples/orchestration/parallel-pipeline.mjs` and +`crates/code/sdk/python/examples/orchestration_workflow.py`. diff --git a/apps/docs/content/docs/en/code/examples/prompt-slots.mdx b/apps/docs/content/docs/en/code/examples/prompt-slots.mdx index fe9b9d6..66cda54 100644 --- a/apps/docs/content/docs/en/code/examples/prompt-slots.mdx +++ b/apps/docs/content/docs/en/code/examples/prompt-slots.mdx @@ -1,16 +1,158 @@ --- title: "Prompt Slots" -description: "Programmatic role, guidelines, and response style" +description: "Customize the agent's persona, guidelines, and response style without overriding core behavior." --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Prompt Slots +Prompt slots are session options that shape the agent's system prompt declaratively. Use +them for host-level behavior — persona, coding standards, output style — that should not +live inside each user prompt. The slots layer on top of the agent's built-in instructions, +so core tool behavior (reading, writing, running commands) is preserved. + +There are four slots: + +| Slot | Purpose | +|------|---------| +| `role` / `role` | The persona the agent adopts. | +| `guidelines` / `guidelines` | Standards and rules the agent must follow. | +| `responseStyle` / `response_style` | How the agent should format its replies. | +| `extra` / `extra` | Freeform instructions appended verbatim. | + +## Basic usage + +Set any subset of the slots when you open a session. They apply to every turn of that +session. + + + + +```ts +import { Agent } from '@a3s-lab/code'; + +async function main() { + // Node: create() takes a PATH to the ACL agent file. + const agent = await Agent.create('agent.acl'); + + const session = agent.session('/repo', { + role: 'release-readiness reviewer', + guidelines: 'Find blockers before improvements. Require command evidence for done claims.', + responseStyle: 'concise, findings first', + }); + + const result = await session.send('Is this repo ready to ship?'); + console.log(result.text); + + await session.close(); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); +``` + + + + +```python +from a3s_code import Agent, SessionOptions + +def main(): + # Python: create() takes the ACL SOURCE TEXT. + agent = Agent.create(open('agent.acl').read()) + + session = agent.session('/repo', SessionOptions( + role='release-readiness reviewer', + guidelines='Find blockers before improvements. Require command evidence for done claims.', + response_style='concise, findings first', + )) + + result = session.send('Is this repo ready to ship?') + print(result.text) + + session.close() + +main() +``` + + + + +## Each slot in turn + +The four slots compose independently. A persona-only session, a reviewer with strict +guidelines, and a session that appends a freeform instruction all use the same option set. + + + + ```ts -const session = agent.session('/repo', { - role: 'release-readiness reviewer', - guidelines: 'Find blockers before improvements. Require command evidence for done claims.', - responseStyle: 'concise, findings first', +// 1. Custom role only. +let session = agent.session(workspace, { + role: 'You are a senior Rust developer who specializes in async programming.', +}); + +// 2. Role + guidelines + response style. +session = agent.session(workspace, { + role: 'You are a Python code reviewer.', + guidelines: 'Always check for type hints. Flag any use of `eval()`.', + responseStyle: 'Reply in bullet points. Be concise.', +}); + +// 3. Extra freeform instructions only. +session = agent.session(workspace, { + extra: "Always end your response with '-- A3S'", }); + +// Core tool behavior is preserved regardless of the slots. +session = agent.session(workspace, { + role: 'You are a minimalist file manager.', + guidelines: 'Only create files when explicitly asked.', +}); +const result = await session.send( + "Create a file called test.txt with the content 'prompt slots work'. Then read it back.", +); +``` + + + + +```python +# 1. Custom role only. +session = agent.session(workspace, SessionOptions( + role='You are a senior Rust developer who specializes in async programming.', +)) + +# 2. Role + guidelines + response style. +session = agent.session(workspace, SessionOptions( + role='You are a Python code reviewer.', + guidelines='Always check for type hints. Flag any use of `eval()`.', + response_style='Reply in bullet points. Be concise.', +)) + +# 3. Extra freeform instructions only. +session = agent.session(workspace, SessionOptions( + extra="Always end your response with '-- A3S'", +)) + +# Core tool behavior is preserved regardless of the slots. +session = agent.session(workspace, SessionOptions( + role='You are a minimalist file manager.', + guidelines='Only create files when explicitly asked.', +)) +result = session.send( + "Create a file called test.txt with the content 'prompt slots work'. Then read it back.", +) ``` -Prompt slots are session options. Use them for host-level behavior that should not live in a user prompt. + + + +Slots customize personality and house rules; they do not disable tools or change the +agent's core loop. Keep task-specific requests in the `send` message and reserve the slots +for behavior that should hold across every turn of the session. + +A runnable version ships at `crates/code/sdk/node/examples/skills/test_prompt_slots.ts`. diff --git a/apps/docs/content/docs/en/code/examples/quick-start.mdx b/apps/docs/content/docs/en/code/examples/quick-start.mdx index 5466ed5..2cb6622 100644 --- a/apps/docs/content/docs/en/code/examples/quick-start.mdx +++ b/apps/docs/content/docs/en/code/examples/quick-start.mdx @@ -1,45 +1,63 @@ --- title: "Quick Start" -description: "Create an ACL config and run a first session" +description: "Create an agent, open a session, run one turn, and read the result." --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Quick Start -Create `agent.acl`: +The smallest useful program: create an agent from an ACL file, open a session on a +project directory, run one turn with `send`, print the reply text, and inspect the +verification summary the runtime produced for that turn. -```text -default_model = "openai/MiniMax-M2.7-highspeed" -max_parallel_tasks = 4 + + -providers "openai" { - apiKey = env("A3S_OPENAI_API_KEY") - baseUrl = env("A3S_OPENAI_BASE_URL") +```ts +import { Agent } from '@a3s-lab/code'; - models "MiniMax-M2.7-highspeed" { - name = "MiniMax M2.7 Highspeed" - tool_call = true - } -} +// Agent.create takes a PATH to the ACL file. +const agent = await Agent.create('agent.acl'); +const session = agent.session('.'); -auto_delegation { - enabled = true - auto_parallel = false - min_confidence = 0.72 - max_tasks = 2 -} +const result = await session.send('List the files in this directory.'); +console.log(result.text); + +// What the runtime checked while producing that turn. +console.log(session.verificationSummaryText()); + +await session.close(); ``` -Run a session: + + -```ts -import { Agent } from '@a3s-lab/code'; +```python +from a3s_code import Agent -const agent = await Agent.create('agent.acl'); -const session = agent.session(process.cwd(), { - builtinSkills: true, - autoDelegation: { enabled: true, maxTasks: 2 }, - autoParallel: false, -}); -const result = await session.send('Summarize the repository structure'); -console.log(result.text); +# Agent.create takes the ACL SOURCE TEXT, not a path. +agent = Agent.create(open('agent.acl').read()) +session = agent.session('.') + +result = session.send('List the files in this directory.') +print(result.text) + +# What the runtime checked while producing that turn. +print(session.verification_summary_text()) + +session.close() ``` + + + + +Note the asymmetry between the SDKs: in Node.js `Agent.create` takes a **path** to the +ACL file, while in Python it takes the **ACL source text** (read the file yourself). +Always `close()` the session when you are done so the runtime can flush state and +release resources. + +## Next steps + +- [Streaming](/docs/code/examples/streaming) — read tokens as they arrive +- [Sessions](/docs/code/examples/sessions) — persist and resume conversations diff --git a/apps/docs/content/docs/en/code/examples/ripgrep-context.mdx b/apps/docs/content/docs/en/code/examples/ripgrep-context.mdx index d811a3d..019ab10 100644 --- a/apps/docs/content/docs/en/code/examples/ripgrep-context.mdx +++ b/apps/docs/content/docs/en/code/examples/ripgrep-context.mdx @@ -1,13 +1,77 @@ --- -title: "Ripgrep Context" -description: "Use grep and read as a compact context pipeline" +title: "Ripgrep Context Builder" +description: "Use grep and glob to gather code context before asking the agent." --- -# Ripgrep Context +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +# Ripgrep Context Builder + +Fast code search with `session.grep` and `session.glob` lets you gather the +relevant files and matching lines, then feed them into a prompt — a lightweight +retrieval step before the agent reasons. Use this when you want to scope the +agent to a specific slice of a large codebase instead of letting it explore from +scratch. + + + ```ts -const hits = await session.grep('default_model|providers "openai"|baseUrl'); -const docs = await session.glob('content/docs/**/code/**/*.mdx'); +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session('.'); + +// 1. Find candidate files by glob pattern (returns a list of paths). +const files = await session.glob('src/**/*.ts'); + +// 2. Search the workspace for the symbol we care about (returns ripgrep text). +const hits = await session.grep('createSession'); + +// 3. Build a context string and feed it into a focused prompt. +const context = [ + `Files in scope:\n${files.join('\n')}`, + `Matches for "createSession":\n${hits}`, +].join('\n\n'); + +const answer = await session.run( + `Using only this context, explain how createSession is wired up:\n\n${context}`, +); +console.log(answer); ``` -Prefer search summaries and relevant snippets over injecting complete search output into a prompt. + + + +```python +from a3s_code import Agent + +agent = Agent.create(open('agent.acl').read()) +session = agent.session('.') + +# 1. Find candidate files by glob pattern (returns a list of paths). +files = session.glob('src/**/*.ts') + +# 2. Search the workspace for the symbol we care about (returns ripgrep text). +hits = session.grep('createSession') + +# 3. Build a context string and feed it into a focused prompt. +context = '\n\n'.join([ + 'Files in scope:\n' + '\n'.join(files), + 'Matches for "createSession":\n' + hits, +]) + +answer = session.run( + f'Using only this context, explain how createSession is wired up:\n\n{context}' +) +print(answer) +``` + + + + +`glob` returns a list of matching file paths, while `grep` returns the raw +ripgrep output as a single string. Both run locally and return quickly, so you +can chain several searches to assemble context cheaply before spending a model +turn. Pair them with `session.readFile` when you need the full body of a file +rather than just the matching lines. diff --git a/apps/docs/content/docs/en/code/examples/security.mdx b/apps/docs/content/docs/en/code/examples/security.mdx index b3490e9..c628199 100644 --- a/apps/docs/content/docs/en/code/examples/security.mdx +++ b/apps/docs/content/docs/en/code/examples/security.mdx @@ -1,21 +1,190 @@ --- title: "Security" -description: "Use explicit permissions for side effects" +description: "Gate privileged operations with a permission policy, a human-in-the-loop confirmation flow, and a security provider" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Security +Every side effect an agent can produce — writing files, running `bash`, pushing +git — flows through a permission policy. Set a `defaultDecision`, then list the +patterns that should be `allow`-ed, `deny`-ed, or sent to the `ask` path. To +keep a human in the loop, add a confirmation policy: `ask` decisions pause on a +`confirmation_required` event so your application (or a person) can approve or +reject each call. Use this whenever an agent runs against a real repository. + + + + ```ts -const session = agent.session('/repo', { +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); + +const session = agent.session(process.cwd(), { permissionPolicy: { deny: ['write(**/.env*)', 'bash(rm -rf*)'], ask: ['bash(git push*)', 'bash(npm publish*)'], - allow: ['read(*)', 'grep(*)', 'glob(*)', 'bash(npm run build*)'], - defaultDecision: 'ask', + defaultDecision: 'allow', + }, + // Turn the `ask` patterns into a human-in-the-loop confirmation flow. + confirmationPolicy: { + enabled: true, + defaultTimeoutMs: 120000, + timeoutAction: 'reject', + }, +}); + +// Stream execution and resolve confirmations as they arrive. +const stream = await session.stream('Bump the version and push the release'); +while (true) { + const next = await stream.next(); + if (next.done || !next.value) break; + + const event = next.value; + if (event.type === 'confirmation_required') { + // Look up the pending request for richer display. + const [pending] = await session.pendingConfirmations(); + const toolId = pending?.toolId ?? event.toolId; + console.log(`[confirm] ${pending?.toolName ?? event.toolName}`); + console.log(JSON.stringify(pending?.args ?? {}, null, 2)); + + // In a real app, prompt the user here. + const approved = false; // deny risky operations by default + if (toolId) await session.confirmToolUse(toolId, approved, 'Reviewed by host'); + } +} + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions, PermissionPolicy, ConfirmationPolicy + + +def main() -> None: + agent = Agent.create(open("agent.acl").read()) + + opts = SessionOptions() + opts.permission_policy = PermissionPolicy( + deny=["write(**/.env*)", "bash(rm -rf*)"], + ask=["bash(git push*)", "bash(npm publish*)"], + default_decision="allow", + ) + # Turn the `ask` patterns into a human-in-the-loop confirmation flow. + opts.confirmation_policy = ConfirmationPolicy( + enabled=True, + default_timeout_ms=120_000, + timeout_action="reject", + ) + + session = agent.session(".", opts) + + # Stream execution and resolve confirmations as they arrive. + for event in session.stream("Bump the version and push the release"): + if event.event_type == "confirmation_required": + # Look up the pending request for richer display. + pending = session.pending_confirmations() + first = pending[0] if pending else {} + tool_id = first.get("tool_id") or event.tool_id + print(f"[confirm] {first.get('tool_name') or event.tool_name}") + + # In a real app, prompt the user here. + approved = False # deny risky operations by default + if tool_id: + session.confirm_tool_use(tool_id, approved, "Reviewed by host") + + session.close() + + +if __name__ == "__main__": + main() +``` + + + + +## Add a security provider + +A `DefaultSecurityProvider` enables input taint tracking and output sanitisation, +screening tool I/O independently of the permission policy. Pass one through +`securityProvider` (Node) / `security_provider` (Python); omit it to disable +security entirely. + + + + +```ts +import { Agent, DefaultSecurityProvider } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); + +const session = agent.session(process.cwd(), { + securityProvider: new DefaultSecurityProvider(), + permissionPolicy: { + ask: ['bash*'], + defaultDecision: 'allow', }, }); + +// Privileged host operations run through the provider + policy. +const out = await session.bash('echo "screened by the security provider"'); +console.log(out); + +await session.close(); ``` -Keep release and publish actions on the `ask` or `deny` path unless automation owns the final step. -Direct host calls such as `session.tool()` are privileged host operations; gate -them in the application before exposing them to users. + + + +```python +from a3s_code import Agent, SessionOptions, PermissionPolicy, DefaultSecurityProvider + + +def main() -> None: + agent = Agent.create(open("agent.acl").read()) + + opts = SessionOptions() + opts.security_provider = DefaultSecurityProvider() + opts.permission_policy = PermissionPolicy( + ask=["bash*"], + default_decision="allow", + ) + + session = agent.session(".", opts) + + # Privileged host operations run through the provider + policy. + out = session.bash('echo "screened by the security provider"') + print(out) + + session.close() + + +if __name__ == "__main__": + main() +``` + + + + +## Notes + +- `defaultDecision` is the fallback for any pattern not matched by `allow` / `deny` + / `ask` (one of `allow`, `deny`, or `ask`). Open up only what automation needs. +- A `confirmationPolicy` with `enabled: true` is what turns `ask` decisions into a + pausing `confirmation_required` event. Resolve each one with + `session.confirmToolUse(toolId, approved, reason?)`; if no answer arrives within + `defaultTimeoutMs`, `timeoutAction` (`reject`) decides the outcome. +- Keep release and publish actions (`bash(git push*)`, `bash(npm publish*)`) on the + `ask` or `deny` path unless automation owns the final step. +- Direct host calls such as `session.tool()`, `session.bash()`, and `session.git()` + are privileged host operations. They flow through the same provider and policy, so + gate them rather than exposing them unguarded. + +A runnable confirmation loop ships at +`crates/code/sdk/node/examples/streaming/hitl_confirmation_loop.ts` and +`crates/code/sdk/python/examples/hitl_confirmation_loop.py`. diff --git a/apps/docs/content/docs/en/code/examples/skill-tool.mdx b/apps/docs/content/docs/en/code/examples/skill-tool.mdx index 5210d9e..651895e 100644 --- a/apps/docs/content/docs/en/code/examples/skill-tool.mdx +++ b/apps/docs/content/docs/en/code/examples/skill-tool.mdx @@ -1,15 +1,89 @@ --- title: "Skill Tool" -description: "Search and apply skills from a turn" +description: "Invoke a registered skill as a callable tool" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Skill Tool -Ask the agent to use skills explicitly when the task calls for it: +Skills surface to the model as two core tools: `search_skills` (find a skill by intent) +and `Skill` (invoke a skill by name). A tool-kind skill runs its handler; an +instruction-kind skill returns its body for the model to apply. You can let the model +call these tools during a run, or invoke a skill directly from the SDK with +`session.tool('Skill', ...)`. + +Register a skill directory (a folder of `SKILL.md` files) via `skillDirs` / `skill_dirs`, +or rely on built-in discovery with `builtinSkills` / `builtin_skills`. Either way the +skills become visible through `Skill` and `search_skills`. + + + + +```ts +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session(process.cwd(), { + builtinSkills: true, + skillDirs: ['./skills'], // a folder of SKILL.md files +}); + +// Skill and search_skills are core tools — confirm they're on the surface. +console.log(session.toolNames()); + +// Option A: let the model search for and apply a skill during a run. +const run = await session.run('Search available skills, then apply the most relevant one.'); +console.log(run); + +// Option B: invoke a skill directly as a callable tool. +// Canonical args: { skill_name, prompt? }. +const result = await session.tool('Skill', { + skill_name: 'code-review', + prompt: 'Review this patch for correctness and regressions.', +}); +console.log(result); + +await session.close(); +``` + + + + +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open('agent.acl').read()) +opts = SessionOptions() +opts.builtin_skills = True +opts.skill_dirs = ['./skills'] # a folder of SKILL.md files +session = agent.session('.', opts) + +# Skill and search_skills are core tools — confirm they're on the surface. +print(session.tool_names()) -```text -Search available skills for release review guidance. -Apply the best matching skill, then inspect package metadata and build evidence. +# Option A: let the model search for and apply a skill during a run. +run = session.run('Search available skills, then apply the most relevant one.') +print(run) + +# Option B: invoke a skill directly as a callable tool. +# Canonical args: { skill_name, prompt? }. +result = session.tool('Skill', { + 'skill_name': 'code-review', + 'prompt': 'Review this patch for correctness and regressions.', +}) +print(result) + +session.close() ``` -The harness exposes `search_skills` and `Skill` as core skill tools. Skill administration is handled by SDK registration, `skillDirs`, or project files, not by a model-visible management tool. + + + +A `SKILL.md` declares its `kind` in frontmatter (`tool`, `instruction`, or `agent`). +For a tool-kind skill, the `Skill` tool runs the skill's handler and returns its output; +for an instruction-kind skill, it returns the body for the model to apply. Skill +administration is handled by SDK registration, skill directories, or project files — not +by a model-visible management tool. + +A runnable version ships at `crates/code/sdk/node/examples/skills/test_custom_skills_agents.ts`. diff --git a/apps/docs/content/docs/en/code/examples/skills.mdx b/apps/docs/content/docs/en/code/examples/skills.mdx index 538f693..466781d 100644 --- a/apps/docs/content/docs/en/code/examples/skills.mdx +++ b/apps/docs/content/docs/en/code/examples/skills.mdx @@ -1,25 +1,116 @@ --- -title: "Skills" -description: "Load file-based and built-in skills" +title: "Skills & Custom Agents" +description: "Toggle the built-in skills and load your own skills and subagents from project directories." --- -# Skills +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +# Skills & Custom Agents + +A3S Code ships with a set of built-in skills, and you can extend a session with your +own skills and subagents loaded from directories on disk. Use the `builtinSkills` +option to turn the bundled skills on or off, and `agentDirs` (or `registerAgentDir`) +to point the session at folders that contain your custom `*.skill.md` and agent +definitions. This is the right approach when you want project-specific behavior +without changing the runtime. + + + ```ts -const session = agent.session('/repo', { +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); + +// Built-in skills ON, plus custom skills/agents from project dirs. +const session = agent.session('/path/to/project', { builtinSkills: true, - skillDirs: ['./skills'], + agentDirs: ['./.a3s/skills', './.a3s/agents'], }); + +// You can also register more directories after the session exists. +session.registerAgentDir('./team/shared-agents'); + +// Inspect what the session loaded. +console.log('Tools:', session.toolNames()); +console.log('Commands:', session.listCommands()); + +// The agent now has access to both built-in and custom skills. +const result = await session.run( + 'Use the project conventions skill to scaffold a new module.', +); +console.log(result); + +await session.close(); ``` -Example skill: + + -```md ---- -name: release-review -description: Review release blockers -allowed-tools: "read(*), grep(*), bash(npm run build*)" ---- +```python +from a3s_code import Agent, SessionOptions + +agent = Agent.create(open('agent.acl').read()) + +# Built-in skills ON, plus custom skills/agents from project dirs. +opts = SessionOptions() +opts.builtin_skills = True +opts.agent_dirs = ['./.a3s/skills', './.a3s/agents'] + +session = agent.session('/path/to/project', opts) + +# You can also register more directories after the session exists. +session.register_agent_dir('./team/shared-agents') + +# Inspect what the session loaded. +print('Tools:', session.tool_names()) +print('Commands:', session.list_commands()) -Return blockers first, with evidence. +# The agent now has access to both built-in and custom skills. +result = session.run( + 'Use the project conventions skill to scaffold a new module.', +) +print(result) + +session.close() ``` + + + + +## Disabling the built-in skills + +Set `builtinSkills` / `builtin_skills` to `false` when you want a lean session that +only uses the skills you explicitly provide. The bundled skills are no longer +registered, so `toolNames()` / `tool_names()` reflects only your custom set plus the +core tools. + + + + +```ts +const session = agent.session('/path/to/project', { + builtinSkills: false, + agentDirs: ['./.a3s/skills'], +}); +``` + + + + +```python +opts = SessionOptions() +opts.builtin_skills = False +opts.agent_dirs = ['./.a3s/skills'] +session = agent.session('/path/to/project', opts) +``` + + + + +Custom subagents loaded from `agentDirs` can be referenced by name in +[`session.parallel(...)`](/docs/code/examples/orchestration) and +[`session.pipeline(...)`](/docs/code/examples/orchestration) alongside the built-in +registry agents (`explore`, `plan`, `general`, `verification`, `review`). + +A runnable version ships at `crates/code/sdk/node/examples/skills/test_custom_skills_agents.ts`. diff --git a/apps/docs/content/docs/en/code/examples/streaming.mdx b/apps/docs/content/docs/en/code/examples/streaming.mdx index 37f2ae7..823bde6 100644 --- a/apps/docs/content/docs/en/code/examples/streaming.mdx +++ b/apps/docs/content/docs/en/code/examples/streaming.mdx @@ -1,22 +1,97 @@ --- title: "Streaming" -description: "Read AgentEvent values as a turn runs" +description: "Read incremental AgentEvent values as a turn runs" --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Streaming +`session.stream(prompt)` yields incremental events as the turn runs, so you can render text as it arrives and react to tool activity in real time. Use it when you want a live UI or a CLI that prints output token-by-token instead of waiting for the full result from `send` or `run`. + +Each event carries a type discriminant. The common kinds are `text_delta` (a chunk of assistant text), `tool_start` / `tool_end` (a tool call beginning and finishing), `verification` (a verification summary), and `error`. + + + + ```ts -const stream = await session.stream('Run tests and explain failures'); +import { Agent } from '@a3s-lab/code'; + +const agent = await Agent.create('agent.acl'); +const session = agent.session(process.cwd(), { planningMode: 'disabled' }); + +const stream = await session.stream( + 'Use the bash tool to run the tests, then summarize the result.', +); while (true) { - const { value: event, done } = await stream.next(); - if (done) break; - if (!event) continue; + const next = await stream.next(); + if (next.done || !next.value) break; - if (event.text) process.stdout.write(event.text); - if (event.toolName) console.log('\n[tool]', event.toolName); - if (event.error) console.error(event.error); + const event = next.value; + if (event.type === 'text_delta' && event.text) { + process.stdout.write(event.text); + } else if (event.type === 'tool_start') { + console.log(`\n[tool:start] ${event.toolName ?? 'unknown'}`); + } else if (event.type === 'tool_end') { + console.log(`\n[tool:end] ${event.toolName ?? 'unknown'} exit=${event.exitCode ?? 0}`); + } else if (event.type === 'verification') { + console.log(`\n[verification] ${event.verificationSummaryText ?? ''}`); + } else if (event.type === 'error') { + throw new Error(event.error ?? 'stream error'); + } } + +console.log('\n[stream] complete'); +await session.close(); ``` -Streamed events can include text, tool calls, tool output, errors, token totals, and verification summaries. + + + +```python +import os + +from a3s_code import Agent, SessionOptions + + +def main() -> None: + agent = Agent.create(open("agent.acl").read()) + + opts = SessionOptions() + opts.planning_mode = "disabled" + session = agent.session(".", opts) + + prompt = "Use the bash tool to run the tests, then summarize the result." + + try: + for event in session.stream(prompt): + if event.event_type == "text_delta" and event.text: + print(event.text, end="", flush=True) + elif event.event_type == "tool_start": + print(f"\n[tool:start] {event.tool_name or 'unknown'}") + elif event.event_type == "tool_end": + print(f"\n[tool:end] {event.tool_name or 'unknown'} exit={event.exit_code or 0}") + elif event.event_type == "verification": + print(f"\n[verification] {event.verification_summary_text or ''}") + elif event.event_type == "error": + raise RuntimeError(event.error or "stream error") + print("\n[stream] complete") + finally: + session.close() + + +if __name__ == "__main__": + main() +``` + + + + +Notes: + +- Node iterates the stream manually with `stream.next()`, checking `next.done` and `next.value`. In the current build the Python SDK exposes streaming as a synchronous iterator, so you consume it with a plain `for` loop (orchestration APIs such as `parallel` and `pipeline` remain `async`). +- The type discriminant differs by language: Node reads `event.type`, Python reads `event.event_type`. Other fields follow each language's casing: `toolName` / `exitCode` / `verificationSummaryText` in Node, `tool_name` / `exit_code` / `verification_summary_text` in Python. +- Streamed events can also include human-in-the-loop confirmation signals (`confirmation_required`, `confirmation_received`, `confirmation_timeout`) when a confirmation policy is enabled. + +Runnable streaming examples ship under `crates/code/sdk/node/examples/streaming/`. A complete human-in-the-loop confirmation loop ships at `crates/code/sdk/node/examples/streaming/hitl_confirmation_loop.ts`, and the matching Python version ships at `crates/code/sdk/python/examples/hitl_confirmation_loop.py`. diff --git a/apps/docs/content/docs/en/code/examples/structured-output.mdx b/apps/docs/content/docs/en/code/examples/structured-output.mdx index 0b74ec9..7b5a162 100644 --- a/apps/docs/content/docs/en/code/examples/structured-output.mdx +++ b/apps/docs/content/docs/en/code/examples/structured-output.mdx @@ -1,20 +1,29 @@ --- title: "Structured Output" -description: "Generate schema-validated JSON objects with generate_object" +description: "Generate schema-validated JSON objects with the generate_object tool." --- +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + # Structured Output -The `generate_object` tool produces JSON objects that strictly conform to a -provided JSON Schema. It supports both agent-driven and direct invocation. +The built-in `generate_object` tool produces JSON objects that strictly conform to a JSON Schema you supply, instead of free-form text. Use it whenever you need machine-readable results: extraction, classification, config generation, or feeding another program. + +You call it through `session.tool('generate_object', ...)`. The tool result carries the validated object as JSON on `result.output` — parse it and read the `object` field. The same tool also supports agent-driven invocation, where the model decides to call it during a `send`. + +## Direct tool call -## Direct Tool Call +The simplest path: call `generate_object` directly and parse the validated object out of the result. + + + ```ts import { Agent } from '@a3s-lab/code'; const agent = await Agent.create('config.acl'); const session = agent.session('.', { + builtinSkills: true, permissionPolicy: { defaultDecision: 'allow' }, }); @@ -34,91 +43,149 @@ const result = await session.tool('generate_object', { }, prompt: 'Extract: "Alice is 28, skilled in Rust, TypeScript, and Python."', schema_name: 'developer', + mode: 'tool', }); const { object } = JSON.parse(result.output); +console.log(object); // { name: "Alice", age: 28, skills: ["Rust", "TypeScript", "Python"] } + +await session.close(); ``` -## Agent-Driven (Autonomous) + + -Let the agent decide when to use structured output: +```python +import json +from a3s_code import Agent, SessionOptions, PermissionPolicy -```ts -const result = await session.send(` - Read the file config.yaml and extract all service definitions - into a structured JSON using generate_object with this schema: - { type: "object", required: ["services"], properties: { - services: { type: "array", items: { type: "object", - required: ["name", "port"], properties: { - name: { type: "string" }, port: { type: "integer" } - }}}}} -`); +agent = Agent.create(open('config.acl').read()) +opts = SessionOptions() +opts.builtin_skills = True +opts.permission_policy = PermissionPolicy(default_decision="allow") +session = agent.session('.', opts) + +result = session.tool("generate_object", { + "schema": { + "type": "object", + "required": ["name", "age", "skills"], + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer", "minimum": 0}, + "skills": { + "type": "array", + "items": {"type": "string"}, + "minItems": 1, + }, + }, + }, + "prompt": 'Extract: "Alice is 28, skilled in Rust, TypeScript, and Python."', + "schema_name": "developer", + "mode": "tool", +}) + +obj = json.loads(result.output)["object"] +print(obj) +# {"name": "Alice", "age": 28, "skills": ["Rust", "TypeScript", "Python"]} + +session.close() ``` -## Two-Phase Pattern + + + +The validated value lives on the `object` key of the parsed output. Every field declared in `required` is guaranteed present and correctly typed; if the model cannot satisfy the schema the tool reports a non-zero exit code on `result.exitCode` (Node) / `result.exit_code` (Python). + +## Enum classification + +Constrain a field to a fixed set with `enum`. This turns the model into a reliable classifier. -For complex tasks, separate reasoning from structured output: + + ```ts -// Phase 1: Agent reasons freely -const analysis = await session.send('Analyze the security of auth.ts'); +const result = await session.tool('generate_object', { + schema: { + type: 'object', + required: ['sentiment', 'confidence'], + properties: { + sentiment: { type: 'string', enum: ['positive', 'negative', 'neutral'] }, + confidence: { type: 'number', minimum: 0, maximum: 1 }, + }, + }, + prompt: 'Classify sentiment: "This is the worst product I have ever used."', + schema_name: 'sentiment', +}); + +const { object } = JSON.parse(result.output); +console.log(object.sentiment, object.confidence); // "negative" 0.97 +``` + + + + +```python +result = session.tool("generate_object", { + "schema": { + "type": "object", + "required": ["sentiment", "confidence"], + "properties": { + "sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]}, + "confidence": {"type": "number", "minimum": 0, "maximum": 1}, + }, + }, + "prompt": 'Classify sentiment: "This is the worst product I have ever used."', + "schema_name": "sentiment", +}) + +obj = json.loads(result.output)["object"] +print(obj["sentiment"], obj["confidence"]) # "negative" 0.97 +``` + + + + +## Nested schemas and arrays + +Schemas can nest objects and arrays to any depth, and the runtime validates the whole structure. This models real config files, manifests, or API payloads in one call. + + + -// Phase 2: Deterministic structured extraction -const structured = await session.tool('generate_object', { +```ts +const result = await session.tool('generate_object', { schema: { type: 'object', - required: ['vulnerabilities', 'risk_score'], + required: ['items'], properties: { - vulnerabilities: { + items: { type: 'array', + minItems: 3, + maxItems: 5, items: { type: 'object', - required: ['type', 'severity', 'line'], + required: ['name', 'category'], properties: { - type: { type: 'string' }, - severity: { type: 'string', enum: ['low', 'medium', 'high', 'critical'] }, - line: { type: 'integer' }, - description: { type: 'string' }, + name: { type: 'string' }, + category: { type: 'string', enum: ['fruit', 'vegetable', 'grain'] }, }, }, }, - risk_score: { type: 'number', minimum: 0, maximum: 10 }, }, }, - prompt: `Based on this analysis, produce a structured vulnerability report:\n\n${analysis.text}`, - schema_name: 'security_report', + prompt: 'List 3 food items with their categories.', + schema_name: 'food_list', }); -``` -## Streaming Partial Objects - -```ts -const stream = await session.stream('Extract all entities from the document...'); - -for await (const ev of stream) { - if (ev.type === 'tool_output_delta' && ev.toolName === 'generate_object') { - const { object_partial } = JSON.parse(ev.text); - updateUI(object_partial); // progressively render - } - if (ev.type === 'tool_end' && ev.toolName === 'generate_object') { - const { object } = JSON.parse(ev.toolOutput); - finalize(object); // fully validated - } -} +const { items } = JSON.parse(result.output).object; +console.log(items.length, items.map((i) => i.name)); ``` -## Python + + ```python -from a3s_code import Agent, SessionOptions, PermissionPolicy -import json - -agent = Agent.create(open('config.acl').read()) -opts = SessionOptions() -opts.permission_policy = PermissionPolicy(default_decision="allow") -session = agent.session('.', opts) - result = session.tool("generate_object", { "schema": { "type": "object", @@ -126,19 +193,65 @@ result = session.tool("generate_object", { "properties": { "items": { "type": "array", - "items": {"type": "object", "required": ["name", "price"], - "properties": {"name": {"type": "string"}, "price": {"type": "number"}}} - } - } + "minItems": 3, + "maxItems": 5, + "items": { + "type": "object", + "required": ["name", "category"], + "properties": { + "name": {"type": "string"}, + "category": {"type": "string", "enum": ["fruit", "vegetable", "grain"]}, + }, + }, + }, + }, }, - "prompt": "Extract items from: 'Apple $1.50, Banana $0.75, Orange $2.00'", - "schema_name": "grocery_list", + "prompt": "List 3 food items with their categories.", + "schema_name": "food_list", }) -data = json.loads(result.output)["object"] +items = json.loads(result.output)["object"]["items"] +print(len(items), [i["name"] for i in items]) ``` -## Schema Validation Coverage + + + +## Agent-driven invocation + +You can also let the agent decide when to use structured output. Ask it to call `generate_object` during a `send`; it gathers context first, then emits the object. + + + + +```ts +const result = await session.send( + 'Use the generate_object tool to extract the following into an object ' + + 'with fields "title" (string), "year" (integer), "genre" (string): ' + + 'The movie "Inception" was released in 2010 and is a sci-fi thriller.' +); + +console.log(`tool calls: ${result.toolCallsCount}, tokens: ${result.totalTokens}`); +``` + + + + +```python +result = session.send( + 'Use the generate_object tool to produce a JSON object with schema ' + '{"type":"object","required":["language","paradigm"],"properties":' + '{"language":{"type":"string"},"paradigm":{"type":"string"}}} ' + 'for: "Rust is a systems programming language with a focus on safety."' +) + +print(f"tool calls: {result.tool_calls_count}, tokens: {result.total_tokens}") +``` + + + + +## Schema validation coverage The built-in validator supports: @@ -150,3 +263,11 @@ The built-in validator supports: - `minimum`, `maximum`, `exclusiveMinimum`, `exclusiveMaximum` - `minItems`, `maxItems`, `items` - Nested object and array validation + +## Notes + +- The validated value is on the `object` key of the parsed `result.output`. Pass `mode: 'tool'` for direct structured calls, or `mode: 'prompt'` for a prompt-only fallback. +- List every field you depend on in `required` — the runtime enforces it, so missing or mistyped fields fail validation instead of silently returning partial data. +- `generate_object` is a built-in tool, so the session needs `builtinSkills` / `builtin_skills` enabled (shown above). + +A runnable version ships at `crates/code/sdk/node/examples/basic/test_generate_object.ts` and `crates/code/sdk/python/examples/test_generate_object.py`. diff --git a/apps/docs/content/docs/en/code/meta.json b/apps/docs/content/docs/en/code/meta.json index 1fd3566..68ade31 100644 --- a/apps/docs/content/docs/en/code/meta.json +++ b/apps/docs/content/docs/en/code/meta.json @@ -8,6 +8,7 @@ "sessions", "commands", "tools", + "verification", "tasks", "teams", "orchestration", @@ -23,6 +24,7 @@ "architecture", "lane-queue", "multi-machine", + "cluster-extension-points", "---Extensions---", "providers", "mcp", diff --git a/apps/docs/content/docs/en/code/multi-machine.mdx b/apps/docs/content/docs/en/code/multi-machine.mdx index b16b155..d825ff8 100644 --- a/apps/docs/content/docs/en/code/multi-machine.mdx +++ b/apps/docs/content/docs/en/code/multi-machine.mdx @@ -7,7 +7,7 @@ description: "Placing orchestration steps across machines via the AgentExecutor A3S Code runs multi-agent orchestration as a *grammar* expressed in code, then places the resulting steps wherever you want them to run. The split is drawn -along the **framework / host (书安OS) boundary**, introduced in `[3.4.0]`: +along the **framework / host boundary**, introduced in `[3.4.0]`: - The **framework** owns the orchestration grammar and the serializable data contracts. It never decides where a step runs. @@ -44,7 +44,7 @@ combinators (parallel / pipeline / resumable) The in-box `TaskExecutor` runs each step as a child agent locally — in-process, on Tokio — inheriting the session's agent registry, LLM client, workspace, MCP -tools, and subagent tracker. A host such as 书安OS substitutes its own executor +tools, and subagent tracker. A host such as a cluster runtime substitutes its own executor to place steps across a cluster; the combinators are unaffected. `concurrency_hint()` is **advisory, not a hard local bound**. The local default diff --git a/apps/docs/content/docs/en/code/orchestration.mdx b/apps/docs/content/docs/en/code/orchestration.mdx index 17cfc9c..473232d 100644 --- a/apps/docs/content/docs/en/code/orchestration.mdx +++ b/apps/docs/content/docs/en/code/orchestration.mdx @@ -26,7 +26,7 @@ Everything in this layer is written against a single seam, `AgentExecutor`: - The **framework** owns the *grammar* — which steps exist, how they compose, the concurrency *hint*, and the serializable contracts `AgentStepSpec` / `StepOutcome`. -- The **host** (书安OS) owns *placement* — transport, scheduling, and where a +- The **host** owns *placement* — transport, scheduling, and where a step actually runs. The in-box default executor (`TaskExecutor`) runs every step locally, diff --git a/apps/docs/content/docs/en/code/persistence.mdx b/apps/docs/content/docs/en/code/persistence.mdx index 16ba7bb..c8dc98e 100644 --- a/apps/docs/content/docs/en/code/persistence.mdx +++ b/apps/docs/content/docs/en/code/persistence.mdx @@ -58,7 +58,7 @@ console.log(result.totalTokens); ``` ```python -result = await session.resume_run('run-abc123') +result = session.resume_run('run-abc123') print(result.total_tokens) ``` diff --git a/apps/docs/content/docs/en/code/sessions.mdx b/apps/docs/content/docs/en/code/sessions.mdx index 7e16de6..fdedb69 100644 --- a/apps/docs/content/docs/en/code/sessions.mdx +++ b/apps/docs/content/docs/en/code/sessions.mdx @@ -95,7 +95,7 @@ if (session.isClosed()) { ``` ```python -await session.close() +session.close() if session.is_closed(): # send/stream now reject with CodeError::SessionClosed pass @@ -133,9 +133,9 @@ console.log(agent.isClosed()); ``` ```python -ids = await agent.list_sessions() -await agent.close_session(ids[0]) -await agent.close() # closes all remaining sessions + global MCP +ids = agent.list_sessions() +agent.close_session(ids[0]) +agent.close() # closes all remaining sessions + global MCP print(agent.is_closed()) ``` diff --git a/apps/docs/content/docs/en/code/verification.mdx b/apps/docs/content/docs/en/code/verification.mdx new file mode 100644 index 0000000..973c0e6 --- /dev/null +++ b/apps/docs/content/docs/en/code/verification.mdx @@ -0,0 +1,174 @@ +--- +title: "Verification" +description: "Prove a turn is done with verification commands and reports instead of trusting the model's claim" +--- + +import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; + +# Verification + +The harness treats "done" as something that must be **proven**, not merely +claimed. When the model says a task is complete, that assertion is worth nothing +on its own. Verification turns the claim into evidence: you declare commands +that *must* succeed, the runtime executes them, and the result carries a report +you can inspect, gate on, or surface to a user. + +Verification is session-scoped. The Rust core runs each command, records its +exit status and output, and rolls every report up into a single summary that +travels alongside the turn result. + +## Running Verification Commands + +A verification command is a small, named check: an `id`, a `kind`, a +human-readable `description`, and the `command` to run. Mark a check `required` +when a failure should be treated as a hard failure rather than a warning. + + + + +```ts +const report = await session.verifyCommands('release-readiness', [ + { + id: 'build', + kind: 'build', + description: 'Project compiles', + command: 'cargo build --all-features', + required: true, + timeoutMs: 120000, + }, + { + id: 'tests', + kind: 'test', + description: 'Unit tests pass', + command: 'cargo test', + required: true, + }, +]); + +console.log(report); +``` + + + + +```python +report = session.verify_commands('release-readiness', [ + { + "id": "build", + "kind": "build", + "description": "Project compiles", + "command": "cargo build --all-features", + "required": True, + "timeout_ms": 120000, + }, + { + "id": "tests", + "kind": "test", + "description": "Unit tests pass", + "command": "cargo test", + "required": True, + }, +]) + +print(report) +``` + + + + +The `subject` (here `release-readiness`) labels the batch so multiple +verification passes within one session stay distinct in the reports. + +## Reading The Post-Turn Summary + +Every turn's `send()` result also carries read-only verification fields, so you +can gate on the outcome without issuing a separate verification call. Use these +to decide whether the turn actually accomplished what it claimed. + + + + +```ts +const result = await session.send('Apply the fix and run the checks'); + +console.log(result.verificationStatus); +console.log(result.pendingVerificationCount); +console.log(result.failedVerificationCount); +console.log(result.verificationReportCount); +console.log(result.verificationSummaryText); + +if (result.failedVerificationCount > 0) { + throw new Error('Turn reported done but verification failed'); +} +``` + + + + +```python +result = session.send('Apply the fix and run the checks') + +print(result.verification_status) +print(result.pending_verification_count) +print(result.failed_verification_count) +print(result.verification_report_count) +print(result.verification_summary_text) + +if result.failed_verification_count > 0: + raise RuntimeError('Turn reported done but verification failed') +``` + + + + +## Inspecting Reports And Summaries + +Beyond the per-turn fields, the session exposes the full set of reports, a +structured summary, the available presets, and a human-readable digest. The +digest is the quickest way to show a person *why* a turn passed or failed. + + + + +```ts +import { formatVerificationSummary } from '@a3s-lab/code'; + +const reports = session.verificationReports(); +const summary = session.verificationSummary(); +const presets = session.verificationPresets(); + +// Either the session helper or the standalone formatter yields readable text. +console.log(session.verificationSummaryText()); +console.log(formatVerificationSummary(summary)); +``` + + + + +```python +reports = session.verification_reports() +summary = session.verification_summary() +presets = session.verification_presets() + +# The session helper returns a ready-to-print human-readable digest. +print(session.verification_summary_text()) +``` + + + + +`verificationPresets()` returns the built-in check templates the runtime ships +with, so you can compose your command list from known-good defaults instead of +hand-writing every check. + +## Why This Matters + +Without verification, an agent run ends on the model's word. With it, the run +ends on observable evidence: a build that compiled, a test suite that passed, a +linter that stayed quiet. The summary text gives you the audit trail; the +counts on the result let you fail closed in automation. + +## Related + +- [Telemetry](/docs/code/telemetry) — inspect trace events and verification reports as runtime evidence. +- [Limits](/docs/code/limits) — bound how much work a turn can do before verification runs.