Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions packages/agent/src/server/__tests__/createAgentApp.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ test('createAgentApp falls back to BORING_AGENT_TEMPLATE_PATH', async () => {
test('createAgentApp can use a custom harness factory for non-pi runtimes', async () => {
const workspaceRoot = await makeTempDir('boring-ui-custom-harness-')
const reloadSession = vi.fn(async () => true)
const telemetryEvents: Array<{ name: string; properties?: Record<string, unknown> }> = []
const telemetry = {
capture(event: { name: string; properties?: Record<string, unknown> }) {
telemetryEvents.push(event)
},
}
const harnessFactory = vi.fn(async (input) => ({
id: 'custom-test-harness',
placement: 'server' as const,
Expand All @@ -109,6 +115,7 @@ test('createAgentApp can use a custom harness factory for non-pi runtimes', asyn
mode: 'direct',
logger: false,
harnessFactory,
telemetry,
extraTools: [{
name: 'custom_runtime_tool',
description: 'Provided to harness factory.',
Expand All @@ -119,8 +126,22 @@ test('createAgentApp can use a custom harness factory for non-pi runtimes', asyn
try {
expect(harnessFactory).toHaveBeenCalledTimes(1)
expect(harnessFactory.mock.calls[0]?.[0].cwd).toBe(workspaceRoot)
expect(harnessFactory.mock.calls[0]?.[0].telemetry).toBe(telemetry)
expect(harnessFactory.mock.calls[0]?.[0].tools.map((tool: { name: string }) => tool.name)).toContain('custom_runtime_tool')

const chatRes = await app.inject({
method: 'POST',
url: '/api/v1/agent/chat',
payload: { sessionId: 'custom', message: 'secret prompt must not be captured' },
})
expect(chatRes.statusCode).toBe(200)
expect(telemetryEvents.map((event) => event.name)).toEqual([
'agent.chat.started',
'agent.chat.message.submitted',
'agent.chat.completed',
])
expect(JSON.stringify(telemetryEvents)).not.toContain('secret prompt')

const res = await app.inject({ method: 'POST', url: '/api/v1/agent/reload', payload: { sessionId: 'custom' } })
expect(res.statusCode).toBe(200)
expect(res.json()).toEqual({ ok: true, sessionId: 'custom', reloaded: true })
Expand Down
5 changes: 5 additions & 0 deletions packages/agent/src/server/createAgentApp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import Fastify, { type FastifyInstance } from 'fastify'
import type { AgentTool } from '../shared/tool'
import type { AgentHarnessFactory } from '../shared/harness'
import type { SessionStore } from '../shared/session'
import type { TelemetrySink } from '../shared/telemetry'
import { getEnv } from './config/env'
import type { RuntimeModeAdapter, RuntimeModeId } from './runtime/mode'
import { resolveMode, autoDetectMode } from './runtime/resolveMode'
Expand Down Expand Up @@ -57,6 +58,8 @@ export interface CreateAgentAppOptions {
pi?: PiHarnessOptions
/** Optional stable namespace for file-backed session storage. */
sessionNamespace?: string
/** Optional best-effort telemetry sink supplied by an embedding host. */
telemetry?: TelemetrySink
/** Optional explicit file-backed session directory. Mostly for tests/hosts. */
sessionDir?: string
/**
Expand Down Expand Up @@ -134,6 +137,7 @@ export async function createAgentApp(
sessionDir: opts.sessionDir,
systemPromptAppend: opts.systemPromptAppend,
systemPromptDynamic: opts.systemPromptDynamic,
telemetry: opts.telemetry,
})
const sessionChangesTracker = new InMemorySessionChangesTracker()

Expand Down Expand Up @@ -171,6 +175,7 @@ export async function createAgentApp(
harness,
workdir: runtimeBundle.workspace.root,
sessionChangesTracker,
telemetry: opts.telemetry,
})
await app.register(sessionRoutes, {
sessionStore: harness.sessions as unknown as SessionStore,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import { describe, expect, it, vi } from 'vitest'

import { ErrorCode } from '../../../../shared/error-codes'
import type { AgentTool } from '../../../../shared/tool'
import type { TelemetryEvent, TelemetrySink } from '../../../../shared/telemetry'
import { adaptToolForPi } from '../tool-adapter'

function createTelemetryRecorder(): { telemetry: TelemetrySink; events: TelemetryEvent[] } {
const events: TelemetryEvent[] = []
return {
events,
telemetry: {
capture(event) {
events.push(event)
},
},
}
}

function createTool(overrides: Partial<AgentTool> = {}): AgentTool {
return {
name: 'bash',
description: 'test tool',
parameters: {},
async execute() {
return { content: [{ type: 'text', text: 'ok output' }] }
},
...overrides,
}
}

async function executeAdapted(tool: AgentTool, telemetry: TelemetrySink) {
const adapted = adaptToolForPi(tool, 'sess-tool', telemetry)
return await adapted.execute(
'tool-call-1',
{ command: 'cat .env', path: '/tmp/private-path' },
new AbortController().signal,
undefined,
{} as never,
)
}

describe('tool adapter telemetry', () => {
it('emits safe agent.tool.completed telemetry without args or output', async () => {
const recorder = createTelemetryRecorder()

await executeAdapted(createTool(), recorder.telemetry)

expect(recorder.events).toHaveLength(1)
expect(recorder.events[0]).toEqual({
name: 'agent.tool.completed',
properties: {
toolName: 'bash',
sessionId: 'sess-tool',
status: 'ok',
durationMs: expect.any(Number),
},
})
const serialized = JSON.stringify(recorder.events)
expect(serialized).not.toContain('cat .env')
expect(serialized).not.toContain('private-path')
expect(serialized).not.toContain('ok output')
})

it('emits safe agent.tool.failed telemetry for tool error results', async () => {
const recorder = createTelemetryRecorder()
const tool = createTool({
async execute() {
return {
isError: true,
content: [{ type: 'text', text: 'secret stderr output' }],
details: { code: ErrorCode.enum.TOOL_EXECUTION_ERROR, command: 'cat .env' },
}
},
})

await expect(executeAdapted(tool, recorder.telemetry)).rejects.toThrow('secret stderr output')

expect(recorder.events).toHaveLength(1)
expect(recorder.events[0]).toEqual({
name: 'agent.tool.failed',
properties: {
toolName: 'bash',
sessionId: 'sess-tool',
status: 'error',
durationMs: expect.any(Number),
errorCode: ErrorCode.enum.TOOL_EXECUTION_ERROR,
},
})
const serialized = JSON.stringify(recorder.events)
expect(serialized).not.toContain('secret stderr output')
expect(serialized).not.toContain('cat .env')
})

it('emits safe agent.tool.failed telemetry for thrown tool errors', async () => {
const recorder = createTelemetryRecorder()
const tool = createTool({
async execute() {
throw new Error('raw stack /tmp/private-path secret')
},
})

await expect(executeAdapted(tool, recorder.telemetry)).rejects.toThrow('raw stack')

expect(recorder.events).toHaveLength(1)
expect(recorder.events[0]).toEqual({
name: 'agent.tool.failed',
properties: {
toolName: 'bash',
sessionId: 'sess-tool',
status: 'error',
durationMs: expect.any(Number),
errorCode: ErrorCode.enum.TOOL_EXECUTION_ERROR,
},
})
expect(JSON.stringify(recorder.events)).not.toContain('private-path')
})

it('telemetry sink failures do not change tool behavior', async () => {
const result = await executeAdapted(createTool(), {
capture() {
throw new Error('telemetry down')
},
})

expect(result).toEqual({
content: [{ type: 'text', text: 'ok output' }],
details: undefined,
})
})
})
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {
import type { AgentHarness, SendMessageInput, RunContext, MessageAttachment, FollowUpOptions } from "../../../shared/harness.js";
import { createLogger } from "../../logging.js";
import type { AgentTool } from "../../../shared/tool.js";
import type { TelemetrySink } from "../../../shared/telemetry.js";
import type { UIMessageChunk } from "../../../shared/message.js";
import { adaptToolsForPi } from "./tool-adapter.js";
import { piEventToChunks } from "./stream-adapter.js";
Expand Down Expand Up @@ -327,6 +328,8 @@ export function createPiCodingAgentHarness(opts: {
sessionNamespace?: string;
/** Optional explicit file-backed session directory. Mostly for tests/hosts. */
sessionDir?: string;
/** Optional best-effort telemetry sink supplied by an embedding host. */
telemetry?: TelemetrySink;
}): AgentHarness {
const sessionStore = new PiSessionStore(opts.cwd, {
sessionNamespace: opts.sessionNamespace,
Expand Down Expand Up @@ -473,7 +476,7 @@ export function createPiCodingAgentHarness(opts: {
const { session: piSession } = await createAgentSession({
cwd: ctx.workdir,
tools: [],
customTools: adaptToolsForPi(opts.tools, input.sessionId),
customTools: adaptToolsForPi(opts.tools, input.sessionId, opts.telemetry),
model,
thinkingLevel: input.thinkingLevel ?? "off",
sessionManager,
Expand Down
87 changes: 69 additions & 18 deletions packages/agent/src/server/harness/pi-coding-agent/tool-adapter.ts
Original file line number Diff line number Diff line change
@@ -1,33 +1,84 @@
import type { ToolDefinition } from "@mariozechner/pi-coding-agent";
import type { AgentTool } from "../../../shared/tool.js";
import type { AgentTool, ToolResult } from "../../../shared/tool.js";
import { noopTelemetry, safeCapture, type TelemetrySink } from "../../../shared/telemetry.js";
import { ErrorCode } from "../../../shared/error-codes.js";

export function adaptToolForPi(tool: AgentTool, sessionId?: string): ToolDefinition {
function toolTelemetryProperties(
toolName: string,
sessionId: string | undefined,
status: 'ok' | 'error',
startedAt: number,
result?: ToolResult,
): Record<string, string | number> {
const properties: Record<string, string | number> = {
toolName,
status,
durationMs: Date.now() - startedAt,
}
if (sessionId) properties.sessionId = sessionId
const errorCode = (result?.details as { code?: unknown } | undefined)?.code
if (status === 'error') {
properties.errorCode = ErrorCode.safeParse(errorCode).success
? (errorCode as string)
: ErrorCode.enum.TOOL_EXECUTION_ERROR
}
return properties
}

export function adaptToolForPi(tool: AgentTool, sessionId?: string, telemetry: TelemetrySink = noopTelemetry): ToolDefinition {
return {
name: tool.name,
label: tool.name,
description: tool.description,
parameters: tool.parameters as any,
promptSnippet: tool.promptSnippet ?? tool.description,
async execute(toolCallId, params, signal, onUpdate, _ctx) {
const result = await tool.execute(params as Record<string, unknown>, {
toolCallId,
abortSignal: signal ?? new AbortController().signal,
onUpdate: onUpdate
? (partial) => onUpdate({ content: [{ type: "text", text: partial }], details: undefined })
: undefined,
sessionId,
});
if (result.isError) {
throw new Error(result.content.map((c) => c.text).join("\n"));
const startedAt = Date.now();
let emittedFailure = false;
try {
const result = await tool.execute(params as Record<string, unknown>, {
toolCallId,
abortSignal: signal ?? new AbortController().signal,
onUpdate: onUpdate
? (partial) => onUpdate({ content: [{ type: "text", text: partial }], details: undefined })
: undefined,
sessionId,
});
safeCapture(telemetry, {
name: result.isError ? 'agent.tool.failed' : 'agent.tool.completed',
properties: toolTelemetryProperties(
tool.name,
sessionId,
result.isError ? 'error' : 'ok',
startedAt,
result,
),
});
if (result.isError) {
emittedFailure = true;
throw new Error(result.content.map((c) => c.text).join("\n"));
}
return {
content: result.content,
details: result.details,
};
} catch (error) {
if (!emittedFailure) {
safeCapture(telemetry, {
name: 'agent.tool.failed',
properties: toolTelemetryProperties(tool.name, sessionId, 'error', startedAt),
});
}
throw error;
}
return {
content: result.content,
details: result.details,
};
},
} as ToolDefinition;
}

export function adaptToolsForPi(tools: AgentTool[], sessionId?: string): ToolDefinition[] {
return tools.map((tool) => adaptToolForPi(tool, sessionId));
export function adaptToolsForPi(
tools: AgentTool[],
sessionId?: string,
telemetry?: TelemetrySink,
): ToolDefinition[] {
return tools.map((tool) => adaptToolForPi(tool, sessionId, telemetry));
}
Loading
Loading