From 0480a9b62ce45de0fdba030a51f426c2ab77ef0a Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Mon, 23 Mar 2026 12:28:07 +0100 Subject: [PATCH 1/3] fix(js): Align Claude agent SDK trace parity --- ...sdk-v0.1.claude-agent-sdk.span-events.json | 298 ++++++ ...sdk-v0.2.claude-agent-sdk.span-events.json | 283 ++++++ e2e/helpers/claude-agent-sdk-scenario.mjs | 85 +- .../claude-agent-sdk-trace-contract.ts | 173 +++- e2e/helpers/claude-agent-sdk.ts | 44 + e2e/package.json | 3 +- .../__snapshots__/scenario.test.ts.snap | 196 ---- .../package.json | 11 +- .../pnpm-lock.yaml | 347 ++++++- .../scenario.claude-agent-sdk-v0.1.mjs | 4 + .../scenario.claude-agent-sdk-v0.2.mjs | 4 + .../scenario.impl.mjs | 26 + .../scenario.mjs | 12 - .../scenario.test.ts | 64 +- .../__snapshots__/scenario.test.ts.snap | 257 ----- .../wrap-claude-agent-sdk-traces/package.json | 11 +- .../pnpm-lock.yaml | 347 ++++++- .../scenario.claude-agent-sdk-v0.1.ts | 5 + .../scenario.claude-agent-sdk-v0.2.ts | 5 + .../{scenario.ts => scenario.impl.ts} | 12 +- .../scenario.test.ts | 195 +--- .../configs/claude-agent-sdk.test.ts | 36 + .../configs/claude-agent-sdk.ts | 20 +- .../plugins/claude-agent-sdk-channels.ts | 2 +- .../plugins/claude-agent-sdk-plugin.test.ts | 12 +- .../plugins/claude-agent-sdk-plugin.ts | 909 +++++++++++++----- pnpm-lock.yaml | 100 +- 27 files changed, 2458 insertions(+), 1003 deletions(-) create mode 100644 e2e/helpers/__snapshots__/claude-agent-sdk-v0.1.claude-agent-sdk.span-events.json create mode 100644 e2e/helpers/__snapshots__/claude-agent-sdk-v0.2.claude-agent-sdk.span-events.json create mode 100644 e2e/helpers/claude-agent-sdk.ts delete mode 100644 e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/__snapshots__/scenario.test.ts.snap create mode 100644 e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.1.mjs create mode 100644 e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.2.mjs create mode 100644 e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.impl.mjs delete mode 100644 e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.mjs delete mode 100644 e2e/scenarios/wrap-claude-agent-sdk-traces/__snapshots__/scenario.test.ts.snap create mode 100644 e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.1.ts create mode 100644 e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.2.ts rename e2e/scenarios/wrap-claude-agent-sdk-traces/{scenario.ts => scenario.impl.ts} (60%) create mode 100644 js/src/auto-instrumentations/configs/claude-agent-sdk.test.ts diff --git a/e2e/helpers/__snapshots__/claude-agent-sdk-v0.1.claude-agent-sdk.span-events.json b/e2e/helpers/__snapshots__/claude-agent-sdk-v0.1.claude-agent-sdk.span-events.json new file mode 100644 index 000000000..57289ec3f --- /dev/null +++ b/e2e/helpers/__snapshots__/claude-agent-sdk-v0.1.claude-agent-sdk.span-events.json @@ -0,0 +1,298 @@ +{ + "async_prompt": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Part 1", + "Part 2" + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "async-prompt" + }, + "metric_keys": [], + "name": "claude-agent-async-prompt-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Part 1", + "Part 2" + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + } + }, + "basic": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Use the calculator tool to multiply 15 by 7. Do not answer from memory." + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "basic" + }, + "metric_keys": [], + "name": "claude-agent-basic-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "tool": { + "has_input": true, + "has_output": true, + "metadata": { + "gen_ai.tool.name": "calculator", + "mcp.server": "calculator" + }, + "metric_keys": [], + "name": "tool: calculator/calculator", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + } + }, + "failure": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Use the calculator tool to divide 2 by 0. Do not recover from the error." + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "failure" + }, + "metric_keys": [], + "name": "claude-agent-failure-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "tool": { + "error": "division by zero", + "has_input": true, + "has_output": false, + "metadata": { + "gen_ai.tool.name": "calculator", + "mcp.server": "calculator" + }, + "metric_keys": [], + "name": "tool: calculator/calculator", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + } + }, + "root": { + "has_input": false, + "has_output": false, + "metadata": { + "scenario": "claude-agent-sdk-traces" + }, + "metric_keys": [], + "name": "claude-agent-sdk-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task" + }, + "subagent": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Spawn a math-expert subagent to add 15 and 27 using the calculator tool. Report the result. Do not solve it yourself." + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "nested_task": { + "has_input": false, + "has_output": false, + "metadata": null, + "metric_keys": [], + "name": "Agent: math-expert", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "subagent" + }, + "metric_keys": [], + "name": "claude-agent-subagent-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task_root": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "tool": { + "has_input": true, + "has_output": true, + "metadata": { + "gen_ai.tool.name": "calculator", + "mcp.server": "calculator" + }, + "metric_keys": [], + "name": "tool: calculator/calculator", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + } + } +} diff --git a/e2e/helpers/__snapshots__/claude-agent-sdk-v0.2.claude-agent-sdk.span-events.json b/e2e/helpers/__snapshots__/claude-agent-sdk-v0.2.claude-agent-sdk.span-events.json new file mode 100644 index 000000000..ce4c4a2e6 --- /dev/null +++ b/e2e/helpers/__snapshots__/claude-agent-sdk-v0.2.claude-agent-sdk.span-events.json @@ -0,0 +1,283 @@ +{ + "async_prompt": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Part 1", + "Part 2" + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "async-prompt" + }, + "metric_keys": [], + "name": "claude-agent-async-prompt-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Part 1", + "Part 2" + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + } + }, + "basic": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Use the calculator tool to multiply 15 by 7. Do not answer from memory." + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "basic" + }, + "metric_keys": [], + "name": "claude-agent-basic-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "tool": { + "has_input": true, + "has_output": true, + "metadata": { + "gen_ai.tool.name": "calculator", + "mcp.server": "calculator" + }, + "metric_keys": [], + "name": "tool: calculator/calculator", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + } + }, + "failure": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Use the calculator tool to divide 2 by 0. Do not recover from the error." + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "failure" + }, + "metric_keys": [], + "name": "claude-agent-failure-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "tool": { + "error": "division by zero", + "has_input": true, + "has_output": false, + "metadata": { + "gen_ai.tool.name": "calculator", + "mcp.server": "calculator" + }, + "metric_keys": [], + "name": "tool: calculator/calculator", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + } + }, + "root": { + "has_input": false, + "has_output": false, + "metadata": { + "scenario": "claude-agent-sdk-traces" + }, + "metric_keys": [], + "name": "claude-agent-sdk-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task" + }, + "subagent": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Spawn a math-expert subagent to add 15 and 27 using the calculator tool. Report the result. Do not solve it yourself." + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "nested_task": { + "has_input": false, + "has_output": false, + "metadata": null, + "metric_keys": [], + "name": "Agent: sub-agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "subagent" + }, + "metric_keys": [], + "name": "claude-agent-subagent-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task_root": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "tool": null + } +} diff --git a/e2e/helpers/claude-agent-sdk-scenario.mjs b/e2e/helpers/claude-agent-sdk-scenario.mjs index 6b5f6f4dc..78eeeef6f 100644 --- a/e2e/helpers/claude-agent-sdk-scenario.mjs +++ b/e2e/helpers/claude-agent-sdk-scenario.mjs @@ -3,6 +3,9 @@ import { runOperation, runTracedScenario, } from "./provider-runtime.mjs"; +import { z } from "zod"; + +const CLAUDE_AGENT_MODEL = "claude-haiku-4-5-20251001"; function makePromptMessage(content) { return { @@ -18,7 +21,53 @@ export async function runClaudeAgentSDKScenario(options) { const sdk = options.decorateSDK ? options.decorateSDK(options.sdk) : options.sdk; - const { query } = sdk; + const { createSdkMcpServer, query, tool } = sdk; + const calculator = tool( + "calculator", + "Performs basic arithmetic operations", + { + operation: z.enum(["add", "divide", "multiply", "subtract"]), + a: z.number(), + b: z.number(), + }, + async (args) => { + let result; + + switch (args.operation) { + case "add": + result = args.a + args.b; + break; + case "subtract": + result = args.a - args.b; + break; + case "multiply": + result = args.a * args.b; + break; + case "divide": + if (args.b === 0) { + throw new Error("division by zero"); + } + result = args.a / args.b; + break; + default: + throw new Error(`unsupported operation: ${args.operation}`); + } + + return { + content: [ + { + text: `${args.operation}(${args.a}, ${args.b}) = ${result}`, + type: "text", + }, + ], + }; + }, + ); + const calculatorServer = createSdkMcpServer({ + name: "calculator", + tools: [calculator], + version: "1.0.0", + }); await runTracedScenario({ callback: async () => { @@ -26,9 +75,13 @@ export async function runClaudeAgentSDKScenario(options) { await collectAsync( query({ prompt: - "Use the calculator tool to multiply 15 by 7, then subtract 5.", + "Use the calculator tool to multiply 15 by 7. Do not answer from memory.", options: { - model: "claude-e2e-mock", + mcpServers: { + calculator: calculatorServer, + }, + model: CLAUDE_AGENT_MODEL, + permissionMode: "bypassPermissions", }, }), ); @@ -45,7 +98,9 @@ export async function runClaudeAgentSDKScenario(options) { yield makePromptMessage("Part 2"); })(), options: { - model: "claude-e2e-mock", + maxTurns: 1, + model: CLAUDE_AGENT_MODEL, + permissionMode: "bypassPermissions", }, }), ); @@ -58,14 +113,23 @@ export async function runClaudeAgentSDKScenario(options) { async () => { await collectAsync( query({ - prompt: "Spawn a math-expert subagent and report the result.", + prompt: + "Spawn a math-expert subagent to add 15 and 27 using the calculator tool. Report the result. Do not solve it yourself.", options: { agents: { "math-expert": { description: "Math specialist", + model: "haiku", + prompt: + "You are a math expert. Use the calculator tool for calculations. Be concise.", }, }, - model: "claude-e2e-mock", + allowedTools: ["Task"], + mcpServers: { + calculator: calculatorServer, + }, + model: CLAUDE_AGENT_MODEL, + permissionMode: "bypassPermissions", }, }), ); @@ -78,9 +142,14 @@ export async function runClaudeAgentSDKScenario(options) { async () => { await collectAsync( query({ - prompt: "FAIL the calculator tool call.", + prompt: + "Use the calculator tool to divide 2 by 0. Do not recover from the error.", options: { - model: "claude-e2e-mock", + mcpServers: { + calculator: calculatorServer, + }, + model: CLAUDE_AGENT_MODEL, + permissionMode: "bypassPermissions", }, }), ); diff --git a/e2e/helpers/claude-agent-sdk-trace-contract.ts b/e2e/helpers/claude-agent-sdk-trace-contract.ts index 0e6c2699b..4975bc1d6 100644 --- a/e2e/helpers/claude-agent-sdk-trace-contract.ts +++ b/e2e/helpers/claude-agent-sdk-trace-contract.ts @@ -1,4 +1,5 @@ import { expect } from "vitest"; +import { resolveFileSnapshotPath } from "./file-snapshot"; import { normalizeForSnapshot, type Json } from "./normalize"; import type { CapturedLogEvent } from "./mock-braintrust-server"; import { @@ -8,21 +9,101 @@ import { } from "./trace-selectors"; import { summarizeWrapperContract } from "./wrapper-contract"; +const SNAPSHOT_METADATA_KEYS = [ + "provider", + "model", + "operation", + "scenario", + "mcp.server", + "gen_ai.tool.name", +] as const; +const OMITTED_METRIC_KEYS = new Set([ + "prompt_cached_tokens", + "prompt_cache_creation_tokens", +]); +const SNAPSHOT_ROOT_NAME = "claude-agent-sdk-root"; +const SNAPSHOT_SCENARIO_NAME = "claude-agent-sdk-traces"; + +export function resolveClaudeAgentSDKSpanSnapshotPath( + dependencyName: string, +): string { + return resolveFileSnapshotPath( + import.meta.url, + `${dependencyName}.claude-agent-sdk.span-events.json`, + ); +} + +function summarizeSpan( + event: CapturedLogEvent | undefined, + overrides?: { + metadata?: Json; + name?: string | null; + }, +): Json { + if (!event) { + return null; + } + + const summary = summarizeWrapperContract(event, [ + ...SNAPSHOT_METADATA_KEYS, + ]) as Record; + const metricKeys = Array.isArray(summary.metric_keys) + ? summary.metric_keys.filter( + (key): key is string => + typeof key === "string" && !OMITTED_METRIC_KEYS.has(key), + ) + : summary.metric_keys; + const input = event.input as + | Array<{ content?: string; message?: { content?: string } }> + | undefined; + const inputContents = + Array.isArray(input) && + input + .map((item) => item.message?.content ?? item.content) + .filter((content): content is string => typeof content === "string"); + + if (overrides?.metadata !== undefined) { + summary.metadata = overrides.metadata; + } + if (overrides?.name !== undefined) { + summary.name = overrides.name; + } + if (typeof event.row.error === "string") { + summary.error = event.row.error; + } + if (metricKeys !== undefined) { + summary.metric_keys = metricKeys; + } + if (Array.isArray(inputContents) && inputContents.length > 0) { + summary.input_contents = inputContents; + } + + return summary; +} + export function assertClaudeAgentSDKTraceContract(options: { capturedEvents: CapturedLogEvent[]; rootName: string; scenarioName: string; }): { refs: { + asyncPromptOperation: CapturedLogEvent | undefined; asyncPromptLlm: CapturedLogEvent | undefined; asyncPromptTask: CapturedLogEvent | undefined; + basicOperation: CapturedLogEvent | undefined; basicLlm: CapturedLogEvent | undefined; basicTask: CapturedLogEvent | undefined; + basicTool: CapturedLogEvent | undefined; + failureOperation: CapturedLogEvent | undefined; failureLlm: CapturedLogEvent | undefined; failureTask: CapturedLogEvent | undefined; + failureTool: CapturedLogEvent | undefined; root: CapturedLogEvent | undefined; + subAgentOperation: CapturedLogEvent | undefined; subAgentLlm: CapturedLogEvent | undefined; + subAgentTask: CapturedLogEvent | undefined; subAgentTaskRoot: CapturedLogEvent | undefined; + subAgentTool: CapturedLogEvent | undefined; }; spanSummary: Json; } { @@ -124,43 +205,83 @@ export function assertClaudeAgentSDKTraceContract(options: { ]); } + const basicTool = findAllSpans( + options.capturedEvents, + "tool: calculator/calculator", + ).find((event) => event.span.parentIds.includes(basicTask?.span.id ?? "")); + + const subAgentTask = options.capturedEvents.find( + (event) => + event.span.type === "task" && + event.span.parentIds.includes(subAgentTaskRoot?.span.id ?? "") && + event.span.name?.startsWith("Agent:"), + ); + const subAgentTool = findAllSpans( + options.capturedEvents, + "tool: calculator/calculator", + ).find((event) => event.span.parentIds.includes(subAgentTask?.span.id ?? "")); + const failureTool = findAllSpans( + options.capturedEvents, + "tool: calculator/calculator", + ).find((event) => event.span.parentIds.includes(failureTask?.span.id ?? "")); + + if (subAgentTool && subAgentTaskRoot) { + expect(subAgentTool.span.parentIds).not.toContain(subAgentTaskRoot.span.id); + } + + if (failureTool) { + expect(failureTool.row.error).toBe("division by zero"); + } + return { refs: { + asyncPromptOperation, asyncPromptLlm, asyncPromptTask, + basicOperation, basicLlm, basicTask, + basicTool, + failureOperation, failureLlm, failureTask, + failureTool, root, + subAgentOperation, subAgentLlm, + subAgentTask, subAgentTaskRoot, + subAgentTool, }, - spanSummary: normalizeForSnapshot( - [ - root, - basicOperation, - basicTask, - basicLlm, - asyncPromptOperation, - asyncPromptTask, - asyncPromptLlm, - subAgentOperation, - subAgentTaskRoot, - subAgentLlm, - failureOperation, - failureTask, - failureLlm, - ] - .filter((event) => event !== undefined) - .map((event) => - summarizeWrapperContract(event!, [ - "provider", - "model", - "operation", - "scenario", - ]), - ) as Json, - ), + spanSummary: normalizeForSnapshot({ + async_prompt: { + llm: summarizeSpan(asyncPromptLlm), + operation: summarizeSpan(asyncPromptOperation), + task: summarizeSpan(asyncPromptTask), + }, + basic: { + llm: summarizeSpan(basicLlm), + operation: summarizeSpan(basicOperation), + task: summarizeSpan(basicTask), + tool: summarizeSpan(basicTool), + }, + failure: { + llm: summarizeSpan(failureLlm), + operation: summarizeSpan(failureOperation), + task: summarizeSpan(failureTask), + tool: summarizeSpan(failureTool), + }, + root: summarizeSpan(root, { + metadata: { scenario: SNAPSHOT_SCENARIO_NAME }, + name: SNAPSHOT_ROOT_NAME, + }), + subagent: { + llm: summarizeSpan(subAgentLlm), + nested_task: summarizeSpan(subAgentTask), + operation: summarizeSpan(subAgentOperation), + task_root: summarizeSpan(subAgentTaskRoot), + tool: summarizeSpan(subAgentTool), + }, + } as Json), }; } diff --git a/e2e/helpers/claude-agent-sdk.ts b/e2e/helpers/claude-agent-sdk.ts new file mode 100644 index 000000000..af33cd951 --- /dev/null +++ b/e2e/helpers/claude-agent-sdk.ts @@ -0,0 +1,44 @@ +import { readInstalledPackageVersion } from "./scenario-installer"; + +interface ClaudeAgentSDKScenario { + dependencyName: string; + entry: string; + version: string; +} + +const CLAUDE_AGENT_SDK_VERSION_SPECS = [ + { + dependencyName: "claude-agent-sdk-v0.1", + suffix: "v0.1", + }, + { + dependencyName: "claude-agent-sdk-v0.2", + suffix: "v0.2", + }, +] as const; + +export const CLAUDE_AGENT_SDK_SCENARIO_TIMEOUT_MS = 120_000; + +export async function getWrapClaudeAgentSDKScenarios( + scenarioDir: string, +): Promise { + return await Promise.all( + CLAUDE_AGENT_SDK_VERSION_SPECS.map(async ({ dependencyName, suffix }) => ({ + dependencyName, + entry: `scenario.claude-agent-sdk-${suffix}.ts`, + version: await readInstalledPackageVersion(scenarioDir, dependencyName), + })), + ); +} + +export async function getClaudeAgentSDKAutoHookScenarios( + scenarioDir: string, +): Promise { + return await Promise.all( + CLAUDE_AGENT_SDK_VERSION_SPECS.map(async ({ dependencyName, suffix }) => ({ + dependencyName, + entry: `scenario.claude-agent-sdk-${suffix}.mjs`, + version: await readInstalledPackageVersion(scenarioDir, dependencyName), + })), + ); +} diff --git a/e2e/package.json b/e2e/package.json index 5961d58c5..524941092 100644 --- a/e2e/package.json +++ b/e2e/package.json @@ -20,6 +20,7 @@ "braintrust": "workspace:^", "tsx": "^4.21.0", "typescript": "5.4.4", - "vitest": "^4.1.0" + "vitest": "^4.1.0", + "zod": "4.3.6" } } diff --git a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/__snapshots__/scenario.test.ts.snap b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/__snapshots__/scenario.test.ts.snap deleted file mode 100644 index 5cba58d03..000000000 --- a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/__snapshots__/scenario.test.ts.snap +++ /dev/null @@ -1,196 +0,0 @@ -// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html - -exports[`claude agent sdk auto-instrumentation via node hook collects the shared claude agent trace contract > span-events 1`] = ` -[ - { - "has_input": false, - "has_output": false, - "metadata": { - "scenario": "claude-agent-sdk-auto-instrumentation-node-hook", - }, - "metric_keys": [], - "name": "claude-agent-sdk-auto-hook-root", - "root_span_id": "", - "span_id": "", - "span_parents": [], - "type": "task", - }, - { - "has_input": false, - "has_output": false, - "metadata": { - "operation": "basic", - }, - "metric_keys": [], - "name": "claude-agent-basic-operation", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": null, - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "model": "claude-e2e-mock", - }, - "metric_keys": [], - "name": "Claude Agent", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "task", - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "model": "claude-e2e-mock", - }, - "metric_keys": [ - "completion_tokens", - "prompt_tokens", - "tokens", - ], - "name": "anthropic.messages.create", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "llm", - }, - { - "has_input": false, - "has_output": false, - "metadata": { - "operation": "async-prompt", - }, - "metric_keys": [], - "name": "claude-agent-async-prompt-operation", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": null, - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "model": "claude-e2e-mock", - }, - "metric_keys": [], - "name": "Claude Agent", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "task", - }, - { - "has_input": false, - "has_output": false, - "metadata": { - "operation": "subagent", - }, - "metric_keys": [], - "name": "claude-agent-subagent-operation", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": null, - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "model": "claude-e2e-mock", - }, - "metric_keys": [], - "name": "Claude Agent", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "task", - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "model": "claude-e2e-mock", - }, - "metric_keys": [ - "completion_tokens", - "prompt_tokens", - "tokens", - ], - "name": "anthropic.messages.create", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "llm", - }, - { - "has_input": false, - "has_output": false, - "metadata": { - "operation": "failure", - }, - "metric_keys": [], - "name": "claude-agent-failure-operation", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": null, - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "model": "claude-e2e-mock", - }, - "metric_keys": [], - "name": "Claude Agent", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "task", - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "model": "claude-e2e-mock", - }, - "metric_keys": [ - "completion_tokens", - "prompt_tokens", - "tokens", - ], - "name": "anthropic.messages.create", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "llm", - }, -] -`; diff --git a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/package.json b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/package.json index ecb73c884..048c14a87 100644 --- a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/package.json +++ b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/package.json @@ -1,7 +1,16 @@ { "name": "@braintrust/e2e-claude-agent-sdk-auto-instrumentation-node-hook", "private": true, + "braintrustScenario": { + "canary": { + "dependencies": { + "claude-agent-sdk-v0.1": "@anthropic-ai/claude-agent-sdk@0.1", + "claude-agent-sdk-v0.2": "@anthropic-ai/claude-agent-sdk@0.2" + } + } + }, "dependencies": { - "@anthropic-ai/claude-agent-sdk": "file:../../helpers/mock-claude-agent-sdk" + "claude-agent-sdk-v0.1": "npm:@anthropic-ai/claude-agent-sdk@0.1.77", + "claude-agent-sdk-v0.2": "npm:@anthropic-ai/claude-agent-sdk@0.2.81" } } diff --git a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/pnpm-lock.yaml b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/pnpm-lock.yaml index 687c7f77a..bea4806b3 100644 --- a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/pnpm-lock.yaml +++ b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/pnpm-lock.yaml @@ -8,15 +8,350 @@ importers: .: dependencies: - '@anthropic-ai/claude-agent-sdk': - specifier: file:../../helpers/mock-claude-agent-sdk - version: file:../../helpers/mock-claude-agent-sdk + claude-agent-sdk-v0.1: + specifier: npm:@anthropic-ai/claude-agent-sdk@0.1.77 + version: '@anthropic-ai/claude-agent-sdk@0.1.77(zod@4.3.6)' + claude-agent-sdk-v0.2: + specifier: npm:@anthropic-ai/claude-agent-sdk@0.2.81 + version: '@anthropic-ai/claude-agent-sdk@0.2.81(zod@4.3.6)' packages: - '@anthropic-ai/claude-agent-sdk@file:../../helpers/mock-claude-agent-sdk': - resolution: {directory: ../../helpers/mock-claude-agent-sdk, type: directory} + '@anthropic-ai/claude-agent-sdk@0.1.77': + resolution: {integrity: sha512-ZEjWQtkoB2MEY6K16DWMmF+8OhywAynH0m08V265cerbZ8xPD/2Ng2jPzbbO40mPeFSsMDJboShL+a3aObP0Jg==} + engines: {node: '>=18.0.0'} + peerDependencies: + zod: ^3.25.0 || ^4.0.0 + + '@anthropic-ai/claude-agent-sdk@0.2.81': + resolution: {integrity: sha512-CBeebgibBEN/DWOQGZN67vhuTG55RbI1hlsFSSoZ4uA/Io3lw04eHTE2ISCmdbqyJaefYTt6GKZei1nP0TQMNw==} + engines: {node: '>=18.0.0'} + peerDependencies: + zod: ^4.0.0 + + '@img/sharp-darwin-arm64@0.33.5': + resolution: {integrity: sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [darwin] + + '@img/sharp-darwin-arm64@0.34.5': + resolution: {integrity: sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [darwin] + + '@img/sharp-darwin-x64@0.33.5': + resolution: {integrity: sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [darwin] + + '@img/sharp-darwin-x64@0.34.5': + resolution: {integrity: sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [darwin] + + '@img/sharp-libvips-darwin-arm64@1.0.4': + resolution: {integrity: sha512-XblONe153h0O2zuFfTAbQYAX2JhYmDHeWikp1LM9Hul9gVPjFY427k6dFEcOL72O01QxQsWi761svJ/ev9xEDg==} + cpu: [arm64] + os: [darwin] + + '@img/sharp-libvips-darwin-arm64@1.2.4': + resolution: {integrity: sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==} + cpu: [arm64] + os: [darwin] + + '@img/sharp-libvips-darwin-x64@1.0.4': + resolution: {integrity: sha512-xnGR8YuZYfJGmWPvmlunFaWJsb9T/AO2ykoP3Fz/0X5XV2aoYBPkX6xqCQvUTKKiLddarLaxpzNe+b1hjeWHAQ==} + cpu: [x64] + os: [darwin] + + '@img/sharp-libvips-darwin-x64@1.2.4': + resolution: {integrity: sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==} + cpu: [x64] + os: [darwin] + + '@img/sharp-libvips-linux-arm64@1.0.4': + resolution: {integrity: sha512-9B+taZ8DlyyqzZQnoeIvDVR/2F4EbMepXMc/NdVbkzsJbzkUjhXv/70GQJ7tdLA4YJgNP25zukcxpX2/SueNrA==} + cpu: [arm64] + os: [linux] + + '@img/sharp-libvips-linux-arm64@1.2.4': + resolution: {integrity: sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==} + cpu: [arm64] + os: [linux] + + '@img/sharp-libvips-linux-arm@1.0.5': + resolution: {integrity: sha512-gvcC4ACAOPRNATg/ov8/MnbxFDJqf/pDePbBnuBDcjsI8PssmjoKMAz4LtLaVi+OnSb5FK/yIOamqDwGmXW32g==} + cpu: [arm] + os: [linux] + + '@img/sharp-libvips-linux-arm@1.2.4': + resolution: {integrity: sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==} + cpu: [arm] + os: [linux] + + '@img/sharp-libvips-linux-x64@1.0.4': + resolution: {integrity: sha512-MmWmQ3iPFZr0Iev+BAgVMb3ZyC4KeFc3jFxnNbEPas60e1cIfevbtuyf9nDGIzOaW9PdnDciJm+wFFaTlj5xYw==} + cpu: [x64] + os: [linux] + + '@img/sharp-libvips-linux-x64@1.2.4': + resolution: {integrity: sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==} + cpu: [x64] + os: [linux] + + '@img/sharp-libvips-linuxmusl-arm64@1.0.4': + resolution: {integrity: sha512-9Ti+BbTYDcsbp4wfYib8Ctm1ilkugkA/uscUn6UXK1ldpC1JjiXbLfFZtRlBhjPZ5o1NCLiDbg8fhUPKStHoTA==} + cpu: [arm64] + os: [linux] + + '@img/sharp-libvips-linuxmusl-arm64@1.2.4': + resolution: {integrity: sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==} + cpu: [arm64] + os: [linux] + + '@img/sharp-libvips-linuxmusl-x64@1.0.4': + resolution: {integrity: sha512-viYN1KX9m+/hGkJtvYYp+CCLgnJXwiQB39damAO7WMdKWlIhmYTfHjwSbQeUK/20vY154mwezd9HflVFM1wVSw==} + cpu: [x64] + os: [linux] + + '@img/sharp-libvips-linuxmusl-x64@1.2.4': + resolution: {integrity: sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==} + cpu: [x64] + os: [linux] + + '@img/sharp-linux-arm64@0.33.5': + resolution: {integrity: sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [linux] + + '@img/sharp-linux-arm64@0.34.5': + resolution: {integrity: sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [linux] + + '@img/sharp-linux-arm@0.33.5': + resolution: {integrity: sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm] + os: [linux] + + '@img/sharp-linux-arm@0.34.5': + resolution: {integrity: sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm] + os: [linux] + + '@img/sharp-linux-x64@0.33.5': + resolution: {integrity: sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [linux] + + '@img/sharp-linux-x64@0.34.5': + resolution: {integrity: sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [linux] + + '@img/sharp-linuxmusl-arm64@0.33.5': + resolution: {integrity: sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [linux] + + '@img/sharp-linuxmusl-arm64@0.34.5': + resolution: {integrity: sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [linux] + + '@img/sharp-linuxmusl-x64@0.33.5': + resolution: {integrity: sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [linux] + + '@img/sharp-linuxmusl-x64@0.34.5': + resolution: {integrity: sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [linux] + + '@img/sharp-win32-arm64@0.34.5': + resolution: {integrity: sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [win32] + + '@img/sharp-win32-x64@0.33.5': + resolution: {integrity: sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [win32] + + '@img/sharp-win32-x64@0.34.5': + resolution: {integrity: sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [win32] + + zod@4.3.6: + resolution: {integrity: sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==} snapshots: - '@anthropic-ai/claude-agent-sdk@file:../../helpers/mock-claude-agent-sdk': {} + '@anthropic-ai/claude-agent-sdk@0.1.77(zod@4.3.6)': + dependencies: + zod: 4.3.6 + optionalDependencies: + '@img/sharp-darwin-arm64': 0.33.5 + '@img/sharp-darwin-x64': 0.33.5 + '@img/sharp-linux-arm': 0.33.5 + '@img/sharp-linux-arm64': 0.33.5 + '@img/sharp-linux-x64': 0.33.5 + '@img/sharp-linuxmusl-arm64': 0.33.5 + '@img/sharp-linuxmusl-x64': 0.33.5 + '@img/sharp-win32-x64': 0.33.5 + + '@anthropic-ai/claude-agent-sdk@0.2.81(zod@4.3.6)': + dependencies: + zod: 4.3.6 + optionalDependencies: + '@img/sharp-darwin-arm64': 0.34.5 + '@img/sharp-darwin-x64': 0.34.5 + '@img/sharp-linux-arm': 0.34.5 + '@img/sharp-linux-arm64': 0.34.5 + '@img/sharp-linux-x64': 0.34.5 + '@img/sharp-linuxmusl-arm64': 0.34.5 + '@img/sharp-linuxmusl-x64': 0.34.5 + '@img/sharp-win32-arm64': 0.34.5 + '@img/sharp-win32-x64': 0.34.5 + + '@img/sharp-darwin-arm64@0.33.5': + optionalDependencies: + '@img/sharp-libvips-darwin-arm64': 1.0.4 + optional: true + + '@img/sharp-darwin-arm64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-darwin-arm64': 1.2.4 + optional: true + + '@img/sharp-darwin-x64@0.33.5': + optionalDependencies: + '@img/sharp-libvips-darwin-x64': 1.0.4 + optional: true + + '@img/sharp-darwin-x64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-darwin-x64': 1.2.4 + optional: true + + '@img/sharp-libvips-darwin-arm64@1.0.4': + optional: true + + '@img/sharp-libvips-darwin-arm64@1.2.4': + optional: true + + '@img/sharp-libvips-darwin-x64@1.0.4': + optional: true + + '@img/sharp-libvips-darwin-x64@1.2.4': + optional: true + + '@img/sharp-libvips-linux-arm64@1.0.4': + optional: true + + '@img/sharp-libvips-linux-arm64@1.2.4': + optional: true + + '@img/sharp-libvips-linux-arm@1.0.5': + optional: true + + '@img/sharp-libvips-linux-arm@1.2.4': + optional: true + + '@img/sharp-libvips-linux-x64@1.0.4': + optional: true + + '@img/sharp-libvips-linux-x64@1.2.4': + optional: true + + '@img/sharp-libvips-linuxmusl-arm64@1.0.4': + optional: true + + '@img/sharp-libvips-linuxmusl-arm64@1.2.4': + optional: true + + '@img/sharp-libvips-linuxmusl-x64@1.0.4': + optional: true + + '@img/sharp-libvips-linuxmusl-x64@1.2.4': + optional: true + + '@img/sharp-linux-arm64@0.33.5': + optionalDependencies: + '@img/sharp-libvips-linux-arm64': 1.0.4 + optional: true + + '@img/sharp-linux-arm64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-arm64': 1.2.4 + optional: true + + '@img/sharp-linux-arm@0.33.5': + optionalDependencies: + '@img/sharp-libvips-linux-arm': 1.0.5 + optional: true + + '@img/sharp-linux-arm@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-arm': 1.2.4 + optional: true + + '@img/sharp-linux-x64@0.33.5': + optionalDependencies: + '@img/sharp-libvips-linux-x64': 1.0.4 + optional: true + + '@img/sharp-linux-x64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-x64': 1.2.4 + optional: true + + '@img/sharp-linuxmusl-arm64@0.33.5': + optionalDependencies: + '@img/sharp-libvips-linuxmusl-arm64': 1.0.4 + optional: true + + '@img/sharp-linuxmusl-arm64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linuxmusl-arm64': 1.2.4 + optional: true + + '@img/sharp-linuxmusl-x64@0.33.5': + optionalDependencies: + '@img/sharp-libvips-linuxmusl-x64': 1.0.4 + optional: true + + '@img/sharp-linuxmusl-x64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linuxmusl-x64': 1.2.4 + optional: true + + '@img/sharp-win32-arm64@0.34.5': + optional: true + + '@img/sharp-win32-x64@0.33.5': + optional: true + + '@img/sharp-win32-x64@0.34.5': + optional: true + + zod@4.3.6: {} diff --git a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.1.mjs b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.1.mjs new file mode 100644 index 000000000..e29ff853d --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.1.mjs @@ -0,0 +1,4 @@ +import * as claudeAgentSDK from "claude-agent-sdk-v0.1"; +import { runClaudeAgentSDKAutoInstrumentationNodeHookOrExit } from "./scenario.impl.mjs"; + +runClaudeAgentSDKAutoInstrumentationNodeHookOrExit(claudeAgentSDK); diff --git a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.2.mjs b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.2.mjs new file mode 100644 index 000000000..b14310049 --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.2.mjs @@ -0,0 +1,4 @@ +import * as claudeAgentSDK from "claude-agent-sdk-v0.2"; +import { runClaudeAgentSDKAutoInstrumentationNodeHookOrExit } from "./scenario.impl.mjs"; + +runClaudeAgentSDKAutoInstrumentationNodeHookOrExit(claudeAgentSDK); diff --git a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.impl.mjs b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.impl.mjs new file mode 100644 index 000000000..1efd8cf7d --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.impl.mjs @@ -0,0 +1,26 @@ +import { + getInstalledPackageVersion, + runMain, +} from "../../helpers/provider-runtime.mjs"; +import { runClaudeAgentSDKScenario } from "../../helpers/claude-agent-sdk-scenario.mjs"; + +export { getInstalledPackageVersion }; + +export async function runClaudeAgentSDKAutoInstrumentationNodeHook( + claudeAgentSDK, +) { + await runClaudeAgentSDKScenario({ + projectNameBase: "e2e-claude-agent-sdk-auto-instrumentation-hook", + rootName: "claude-agent-sdk-auto-hook-root", + scenarioName: "claude-agent-sdk-auto-instrumentation-node-hook", + sdk: claudeAgentSDK, + }); +} + +export function runClaudeAgentSDKAutoInstrumentationNodeHookOrExit( + claudeAgentSDK, +) { + runMain(async () => + runClaudeAgentSDKAutoInstrumentationNodeHook(claudeAgentSDK), + ); +} diff --git a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.mjs b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.mjs deleted file mode 100644 index 798e8d8d5..000000000 --- a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.mjs +++ /dev/null @@ -1,12 +0,0 @@ -import * as claudeAgentSDK from "@anthropic-ai/claude-agent-sdk"; -import { runClaudeAgentSDKScenario } from "../../helpers/claude-agent-sdk-scenario.mjs"; -import { runMain } from "../../helpers/provider-runtime.mjs"; - -runMain(async () => - runClaudeAgentSDKScenario({ - projectNameBase: "e2e-claude-agent-sdk-auto-instrumentation-hook", - rootName: "claude-agent-sdk-auto-hook-root", - scenarioName: "claude-agent-sdk-auto-instrumentation-node-hook", - sdk: claudeAgentSDK, - }), -); diff --git a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.test.ts b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.test.ts index 958f51d16..3a0af2596 100644 --- a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.test.ts +++ b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.test.ts @@ -1,5 +1,13 @@ import { expect, test } from "vitest"; -import { assertClaudeAgentSDKTraceContract } from "../../helpers/claude-agent-sdk-trace-contract"; +import { + assertClaudeAgentSDKTraceContract, + resolveClaudeAgentSDKSpanSnapshotPath, +} from "../../helpers/claude-agent-sdk-trace-contract"; +import { + CLAUDE_AGENT_SDK_SCENARIO_TIMEOUT_MS, + getClaudeAgentSDKAutoHookScenarios, +} from "../../helpers/claude-agent-sdk"; +import { formatJsonFileSnapshot } from "../../helpers/file-snapshot"; import { prepareScenarioDir, resolveScenarioDir, @@ -10,29 +18,37 @@ import { E2E_TAGS } from "../../helpers/tags"; const scenarioDir = await prepareScenarioDir({ scenarioDir: resolveScenarioDir(import.meta.url), }); -const TIMEOUT_MS = 120_000; +const autoHookClaudeAgentSDKScenarios = + await getClaudeAgentSDKAutoHookScenarios(scenarioDir); -test( - "claude agent sdk auto-instrumentation via node hook collects the shared claude agent trace contract", - { - tags: [E2E_TAGS.externalApi], - timeout: TIMEOUT_MS, - }, - async () => { - await withScenarioHarness(async ({ events, runNodeScenarioDir }) => { - await runNodeScenarioDir({ - nodeArgs: ["--import", "braintrust/hook.mjs"], - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); +for (const scenario of autoHookClaudeAgentSDKScenarios) { + test( + `claude agent sdk auto-instrumentation via node hook collects the shared claude agent trace contract (claude-agent-sdk ${scenario.version})`, + { + tags: [E2E_TAGS.externalApi], + timeout: CLAUDE_AGENT_SDK_SCENARIO_TIMEOUT_MS, + }, + async () => { + await withScenarioHarness(async ({ events, runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.entry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + scenarioDir, + timeoutMs: CLAUDE_AGENT_SDK_SCENARIO_TIMEOUT_MS, + }); - const contract = assertClaudeAgentSDKTraceContract({ - capturedEvents: events(), - rootName: "claude-agent-sdk-auto-hook-root", - scenarioName: "claude-agent-sdk-auto-instrumentation-node-hook", - }); + const contract = assertClaudeAgentSDKTraceContract({ + capturedEvents: events(), + rootName: "claude-agent-sdk-auto-hook-root", + scenarioName: "claude-agent-sdk-auto-instrumentation-node-hook", + }); - expect(contract.spanSummary).toMatchSnapshot("span-events"); - }); - }, -); + await expect( + formatJsonFileSnapshot(contract.spanSummary), + ).toMatchFileSnapshot( + resolveClaudeAgentSDKSpanSnapshotPath(scenario.dependencyName), + ); + }); + }, + ); +} diff --git a/e2e/scenarios/wrap-claude-agent-sdk-traces/__snapshots__/scenario.test.ts.snap b/e2e/scenarios/wrap-claude-agent-sdk-traces/__snapshots__/scenario.test.ts.snap deleted file mode 100644 index 054013064..000000000 --- a/e2e/scenarios/wrap-claude-agent-sdk-traces/__snapshots__/scenario.test.ts.snap +++ /dev/null @@ -1,257 +0,0 @@ -// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html - -exports[`wrap-claude-agent-sdk-traces captures tool, async prompt, and subagent traces 1`] = ` -[ - { - "has_input": false, - "has_output": false, - "metadata": { - "scenario": "wrap-claude-agent-sdk-traces", - }, - "metric_keys": [], - "name": "claude-agent-sdk-wrapper-root", - "root_span_id": "", - "span_id": "", - "span_parents": [], - "type": "task", - }, - { - "has_input": false, - "has_output": false, - "metadata": { - "operation": "basic", - }, - "metric_keys": [], - "name": "claude-agent-basic-operation", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": null, - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "model": "claude-e2e-mock", - }, - "metric_keys": [], - "name": "Claude Agent", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "task", - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "model": "claude-e2e-mock", - }, - "metric_keys": [ - "completion_tokens", - "prompt_tokens", - "tokens", - ], - "name": "anthropic.messages.create", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "llm", - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "gen_ai.tool.name": "calculator", - "mcp.server": "calculator", - }, - "metric_keys": [], - "name": "tool: calculator/calculator", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "tool", - }, - { - "has_input": false, - "has_output": false, - "metadata": { - "operation": "async-prompt", - }, - "metric_keys": [], - "name": "claude-agent-async-prompt-operation", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": null, - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "model": "claude-e2e-mock", - }, - "metric_keys": [], - "name": "Claude Agent", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "task", - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "model": "claude-e2e-mock", - }, - "metric_keys": [ - "completion_tokens", - "prompt_tokens", - "tokens", - ], - "name": "anthropic.messages.create", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "llm", - }, - { - "has_input": false, - "has_output": false, - "metadata": { - "operation": "subagent", - }, - "metric_keys": [], - "name": "claude-agent-subagent-operation", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": null, - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "model": "claude-e2e-mock", - }, - "metric_keys": [], - "name": "Claude Agent", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "task", - }, - { - "has_input": false, - "has_output": true, - "metadata": null, - "metric_keys": [], - "name": "Agent: math-expert", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "task", - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "model": "claude-e2e-mock", - }, - "metric_keys": [ - "completion_tokens", - "prompt_tokens", - "tokens", - ], - "name": "anthropic.messages.create", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "llm", - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "gen_ai.tool.name": "calculator", - "mcp.server": "calculator", - }, - "metric_keys": [], - "name": "tool: calculator/calculator", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "tool", - }, - { - "has_input": false, - "has_output": false, - "metadata": { - "operation": "failure", - }, - "metric_keys": [], - "name": "claude-agent-failure-operation", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": null, - }, - { - "has_input": true, - "has_output": true, - "metadata": { - "model": "claude-e2e-mock", - }, - "metric_keys": [], - "name": "Claude Agent", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "task", - }, - { - "has_input": true, - "has_output": false, - "metadata": { - "gen_ai.tool.name": "calculator", - "mcp.server": "calculator", - }, - "metric_keys": [], - "name": "tool: calculator/calculator", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "", - ], - "type": "tool", - }, -] -`; diff --git a/e2e/scenarios/wrap-claude-agent-sdk-traces/package.json b/e2e/scenarios/wrap-claude-agent-sdk-traces/package.json index d0d076b05..5365cc8dd 100644 --- a/e2e/scenarios/wrap-claude-agent-sdk-traces/package.json +++ b/e2e/scenarios/wrap-claude-agent-sdk-traces/package.json @@ -1,7 +1,16 @@ { "name": "@braintrust/e2e-wrap-claude-agent-sdk-traces", "private": true, + "braintrustScenario": { + "canary": { + "dependencies": { + "claude-agent-sdk-v0.1": "@anthropic-ai/claude-agent-sdk@0.1", + "claude-agent-sdk-v0.2": "@anthropic-ai/claude-agent-sdk@0.2" + } + } + }, "dependencies": { - "@anthropic-ai/claude-agent-sdk": "file:../../helpers/mock-claude-agent-sdk" + "claude-agent-sdk-v0.1": "npm:@anthropic-ai/claude-agent-sdk@0.1.77", + "claude-agent-sdk-v0.2": "npm:@anthropic-ai/claude-agent-sdk@0.2.81" } } diff --git a/e2e/scenarios/wrap-claude-agent-sdk-traces/pnpm-lock.yaml b/e2e/scenarios/wrap-claude-agent-sdk-traces/pnpm-lock.yaml index 687c7f77a..bea4806b3 100644 --- a/e2e/scenarios/wrap-claude-agent-sdk-traces/pnpm-lock.yaml +++ b/e2e/scenarios/wrap-claude-agent-sdk-traces/pnpm-lock.yaml @@ -8,15 +8,350 @@ importers: .: dependencies: - '@anthropic-ai/claude-agent-sdk': - specifier: file:../../helpers/mock-claude-agent-sdk - version: file:../../helpers/mock-claude-agent-sdk + claude-agent-sdk-v0.1: + specifier: npm:@anthropic-ai/claude-agent-sdk@0.1.77 + version: '@anthropic-ai/claude-agent-sdk@0.1.77(zod@4.3.6)' + claude-agent-sdk-v0.2: + specifier: npm:@anthropic-ai/claude-agent-sdk@0.2.81 + version: '@anthropic-ai/claude-agent-sdk@0.2.81(zod@4.3.6)' packages: - '@anthropic-ai/claude-agent-sdk@file:../../helpers/mock-claude-agent-sdk': - resolution: {directory: ../../helpers/mock-claude-agent-sdk, type: directory} + '@anthropic-ai/claude-agent-sdk@0.1.77': + resolution: {integrity: sha512-ZEjWQtkoB2MEY6K16DWMmF+8OhywAynH0m08V265cerbZ8xPD/2Ng2jPzbbO40mPeFSsMDJboShL+a3aObP0Jg==} + engines: {node: '>=18.0.0'} + peerDependencies: + zod: ^3.25.0 || ^4.0.0 + + '@anthropic-ai/claude-agent-sdk@0.2.81': + resolution: {integrity: sha512-CBeebgibBEN/DWOQGZN67vhuTG55RbI1hlsFSSoZ4uA/Io3lw04eHTE2ISCmdbqyJaefYTt6GKZei1nP0TQMNw==} + engines: {node: '>=18.0.0'} + peerDependencies: + zod: ^4.0.0 + + '@img/sharp-darwin-arm64@0.33.5': + resolution: {integrity: sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [darwin] + + '@img/sharp-darwin-arm64@0.34.5': + resolution: {integrity: sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [darwin] + + '@img/sharp-darwin-x64@0.33.5': + resolution: {integrity: sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [darwin] + + '@img/sharp-darwin-x64@0.34.5': + resolution: {integrity: sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [darwin] + + '@img/sharp-libvips-darwin-arm64@1.0.4': + resolution: {integrity: sha512-XblONe153h0O2zuFfTAbQYAX2JhYmDHeWikp1LM9Hul9gVPjFY427k6dFEcOL72O01QxQsWi761svJ/ev9xEDg==} + cpu: [arm64] + os: [darwin] + + '@img/sharp-libvips-darwin-arm64@1.2.4': + resolution: {integrity: sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==} + cpu: [arm64] + os: [darwin] + + '@img/sharp-libvips-darwin-x64@1.0.4': + resolution: {integrity: sha512-xnGR8YuZYfJGmWPvmlunFaWJsb9T/AO2ykoP3Fz/0X5XV2aoYBPkX6xqCQvUTKKiLddarLaxpzNe+b1hjeWHAQ==} + cpu: [x64] + os: [darwin] + + '@img/sharp-libvips-darwin-x64@1.2.4': + resolution: {integrity: sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==} + cpu: [x64] + os: [darwin] + + '@img/sharp-libvips-linux-arm64@1.0.4': + resolution: {integrity: sha512-9B+taZ8DlyyqzZQnoeIvDVR/2F4EbMepXMc/NdVbkzsJbzkUjhXv/70GQJ7tdLA4YJgNP25zukcxpX2/SueNrA==} + cpu: [arm64] + os: [linux] + + '@img/sharp-libvips-linux-arm64@1.2.4': + resolution: {integrity: sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==} + cpu: [arm64] + os: [linux] + + '@img/sharp-libvips-linux-arm@1.0.5': + resolution: {integrity: sha512-gvcC4ACAOPRNATg/ov8/MnbxFDJqf/pDePbBnuBDcjsI8PssmjoKMAz4LtLaVi+OnSb5FK/yIOamqDwGmXW32g==} + cpu: [arm] + os: [linux] + + '@img/sharp-libvips-linux-arm@1.2.4': + resolution: {integrity: sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==} + cpu: [arm] + os: [linux] + + '@img/sharp-libvips-linux-x64@1.0.4': + resolution: {integrity: sha512-MmWmQ3iPFZr0Iev+BAgVMb3ZyC4KeFc3jFxnNbEPas60e1cIfevbtuyf9nDGIzOaW9PdnDciJm+wFFaTlj5xYw==} + cpu: [x64] + os: [linux] + + '@img/sharp-libvips-linux-x64@1.2.4': + resolution: {integrity: sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==} + cpu: [x64] + os: [linux] + + '@img/sharp-libvips-linuxmusl-arm64@1.0.4': + resolution: {integrity: sha512-9Ti+BbTYDcsbp4wfYib8Ctm1ilkugkA/uscUn6UXK1ldpC1JjiXbLfFZtRlBhjPZ5o1NCLiDbg8fhUPKStHoTA==} + cpu: [arm64] + os: [linux] + + '@img/sharp-libvips-linuxmusl-arm64@1.2.4': + resolution: {integrity: sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==} + cpu: [arm64] + os: [linux] + + '@img/sharp-libvips-linuxmusl-x64@1.0.4': + resolution: {integrity: sha512-viYN1KX9m+/hGkJtvYYp+CCLgnJXwiQB39damAO7WMdKWlIhmYTfHjwSbQeUK/20vY154mwezd9HflVFM1wVSw==} + cpu: [x64] + os: [linux] + + '@img/sharp-libvips-linuxmusl-x64@1.2.4': + resolution: {integrity: sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==} + cpu: [x64] + os: [linux] + + '@img/sharp-linux-arm64@0.33.5': + resolution: {integrity: sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [linux] + + '@img/sharp-linux-arm64@0.34.5': + resolution: {integrity: sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [linux] + + '@img/sharp-linux-arm@0.33.5': + resolution: {integrity: sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm] + os: [linux] + + '@img/sharp-linux-arm@0.34.5': + resolution: {integrity: sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm] + os: [linux] + + '@img/sharp-linux-x64@0.33.5': + resolution: {integrity: sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [linux] + + '@img/sharp-linux-x64@0.34.5': + resolution: {integrity: sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [linux] + + '@img/sharp-linuxmusl-arm64@0.33.5': + resolution: {integrity: sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [linux] + + '@img/sharp-linuxmusl-arm64@0.34.5': + resolution: {integrity: sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [linux] + + '@img/sharp-linuxmusl-x64@0.33.5': + resolution: {integrity: sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [linux] + + '@img/sharp-linuxmusl-x64@0.34.5': + resolution: {integrity: sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [linux] + + '@img/sharp-win32-arm64@0.34.5': + resolution: {integrity: sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [win32] + + '@img/sharp-win32-x64@0.33.5': + resolution: {integrity: sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [win32] + + '@img/sharp-win32-x64@0.34.5': + resolution: {integrity: sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [win32] + + zod@4.3.6: + resolution: {integrity: sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==} snapshots: - '@anthropic-ai/claude-agent-sdk@file:../../helpers/mock-claude-agent-sdk': {} + '@anthropic-ai/claude-agent-sdk@0.1.77(zod@4.3.6)': + dependencies: + zod: 4.3.6 + optionalDependencies: + '@img/sharp-darwin-arm64': 0.33.5 + '@img/sharp-darwin-x64': 0.33.5 + '@img/sharp-linux-arm': 0.33.5 + '@img/sharp-linux-arm64': 0.33.5 + '@img/sharp-linux-x64': 0.33.5 + '@img/sharp-linuxmusl-arm64': 0.33.5 + '@img/sharp-linuxmusl-x64': 0.33.5 + '@img/sharp-win32-x64': 0.33.5 + + '@anthropic-ai/claude-agent-sdk@0.2.81(zod@4.3.6)': + dependencies: + zod: 4.3.6 + optionalDependencies: + '@img/sharp-darwin-arm64': 0.34.5 + '@img/sharp-darwin-x64': 0.34.5 + '@img/sharp-linux-arm': 0.34.5 + '@img/sharp-linux-arm64': 0.34.5 + '@img/sharp-linux-x64': 0.34.5 + '@img/sharp-linuxmusl-arm64': 0.34.5 + '@img/sharp-linuxmusl-x64': 0.34.5 + '@img/sharp-win32-arm64': 0.34.5 + '@img/sharp-win32-x64': 0.34.5 + + '@img/sharp-darwin-arm64@0.33.5': + optionalDependencies: + '@img/sharp-libvips-darwin-arm64': 1.0.4 + optional: true + + '@img/sharp-darwin-arm64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-darwin-arm64': 1.2.4 + optional: true + + '@img/sharp-darwin-x64@0.33.5': + optionalDependencies: + '@img/sharp-libvips-darwin-x64': 1.0.4 + optional: true + + '@img/sharp-darwin-x64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-darwin-x64': 1.2.4 + optional: true + + '@img/sharp-libvips-darwin-arm64@1.0.4': + optional: true + + '@img/sharp-libvips-darwin-arm64@1.2.4': + optional: true + + '@img/sharp-libvips-darwin-x64@1.0.4': + optional: true + + '@img/sharp-libvips-darwin-x64@1.2.4': + optional: true + + '@img/sharp-libvips-linux-arm64@1.0.4': + optional: true + + '@img/sharp-libvips-linux-arm64@1.2.4': + optional: true + + '@img/sharp-libvips-linux-arm@1.0.5': + optional: true + + '@img/sharp-libvips-linux-arm@1.2.4': + optional: true + + '@img/sharp-libvips-linux-x64@1.0.4': + optional: true + + '@img/sharp-libvips-linux-x64@1.2.4': + optional: true + + '@img/sharp-libvips-linuxmusl-arm64@1.0.4': + optional: true + + '@img/sharp-libvips-linuxmusl-arm64@1.2.4': + optional: true + + '@img/sharp-libvips-linuxmusl-x64@1.0.4': + optional: true + + '@img/sharp-libvips-linuxmusl-x64@1.2.4': + optional: true + + '@img/sharp-linux-arm64@0.33.5': + optionalDependencies: + '@img/sharp-libvips-linux-arm64': 1.0.4 + optional: true + + '@img/sharp-linux-arm64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-arm64': 1.2.4 + optional: true + + '@img/sharp-linux-arm@0.33.5': + optionalDependencies: + '@img/sharp-libvips-linux-arm': 1.0.5 + optional: true + + '@img/sharp-linux-arm@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-arm': 1.2.4 + optional: true + + '@img/sharp-linux-x64@0.33.5': + optionalDependencies: + '@img/sharp-libvips-linux-x64': 1.0.4 + optional: true + + '@img/sharp-linux-x64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-x64': 1.2.4 + optional: true + + '@img/sharp-linuxmusl-arm64@0.33.5': + optionalDependencies: + '@img/sharp-libvips-linuxmusl-arm64': 1.0.4 + optional: true + + '@img/sharp-linuxmusl-arm64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linuxmusl-arm64': 1.2.4 + optional: true + + '@img/sharp-linuxmusl-x64@0.33.5': + optionalDependencies: + '@img/sharp-libvips-linuxmusl-x64': 1.0.4 + optional: true + + '@img/sharp-linuxmusl-x64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linuxmusl-x64': 1.2.4 + optional: true + + '@img/sharp-win32-arm64@0.34.5': + optional: true + + '@img/sharp-win32-x64@0.33.5': + optional: true + + '@img/sharp-win32-x64@0.34.5': + optional: true + + zod@4.3.6: {} diff --git a/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.1.ts b/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.1.ts new file mode 100644 index 000000000..1e5743623 --- /dev/null +++ b/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.1.ts @@ -0,0 +1,5 @@ +import * as claudeAgentSDK from "claude-agent-sdk-v0.1"; +import { runMain } from "../../helpers/scenario-runtime"; +import { runWrapClaudeAgentSDKTraces } from "./scenario.impl"; + +runMain(async () => runWrapClaudeAgentSDKTraces(claudeAgentSDK)); diff --git a/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.2.ts b/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.2.ts new file mode 100644 index 000000000..25291595f --- /dev/null +++ b/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.2.ts @@ -0,0 +1,5 @@ +import * as claudeAgentSDK from "claude-agent-sdk-v0.2"; +import { runMain } from "../../helpers/scenario-runtime"; +import { runWrapClaudeAgentSDKTraces } from "./scenario.impl"; + +runMain(async () => runWrapClaudeAgentSDKTraces(claudeAgentSDK)); diff --git a/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.ts b/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.impl.ts similarity index 60% rename from e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.ts rename to e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.impl.ts index 2aedf0b28..41a058781 100644 --- a/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.ts +++ b/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.impl.ts @@ -1,14 +1,12 @@ import { wrapClaudeAgentSDK } from "braintrust"; -import * as mockSDK from "../../helpers/mock-claude-agent-sdk/sdk.mjs"; import { runClaudeAgentSDKScenario } from "../../helpers/claude-agent-sdk-scenario.mjs"; -import { runMain } from "../../helpers/scenario-runtime"; -runMain(async () => - runClaudeAgentSDKScenario({ +export async function runWrapClaudeAgentSDKTraces(claudeAgentSDK: object) { + await runClaudeAgentSDKScenario({ decorateSDK: wrapClaudeAgentSDK, projectNameBase: "e2e-wrap-claude-agent-sdk", rootName: "claude-agent-sdk-wrapper-root", scenarioName: "wrap-claude-agent-sdk-traces", - sdk: mockSDK, - }), -); + sdk: claudeAgentSDK, + }); +} diff --git a/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.test.ts b/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.test.ts index 218dc7758..3e15697c2 100644 --- a/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.test.ts +++ b/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.test.ts @@ -1,166 +1,53 @@ import { expect, test } from "vitest"; -import { normalizeForSnapshot, type Json } from "../../helpers/normalize"; -import { assertClaudeAgentSDKTraceContract } from "../../helpers/claude-agent-sdk-trace-contract"; +import { + assertClaudeAgentSDKTraceContract, + resolveClaudeAgentSDKSpanSnapshotPath, +} from "../../helpers/claude-agent-sdk-trace-contract"; +import { + CLAUDE_AGENT_SDK_SCENARIO_TIMEOUT_MS, + getWrapClaudeAgentSDKScenarios, +} from "../../helpers/claude-agent-sdk"; +import { formatJsonFileSnapshot } from "../../helpers/file-snapshot"; import { prepareScenarioDir, resolveScenarioDir, withScenarioHarness, } from "../../helpers/scenario-harness"; import { E2E_TAGS } from "../../helpers/tags"; -import { - findAllSpans, - findChildSpans, - findLatestSpan, -} from "../../helpers/trace-selectors"; -import { summarizeWrapperContract } from "../../helpers/wrapper-contract"; const scenarioDir = await prepareScenarioDir({ scenarioDir: resolveScenarioDir(import.meta.url), }); -const TIMEOUT_MS = 120_000; - -test( - "wrap-claude-agent-sdk-traces captures tool, async prompt, and subagent traces", - { - tags: [E2E_TAGS.externalApi], - timeout: TIMEOUT_MS, - }, - async () => { - await withScenarioHarness(async ({ events, runScenarioDir }) => { - await runScenarioDir({ scenarioDir, timeoutMs: TIMEOUT_MS }); - - const capturedEvents = events(); - const contract = assertClaudeAgentSDKTraceContract({ - capturedEvents, - rootName: "claude-agent-sdk-wrapper-root", - scenarioName: "wrap-claude-agent-sdk-traces", - }); - const root = contract.refs.root; - const basicOperation = findLatestSpan( - capturedEvents, - "claude-agent-basic-operation", - ); - const asyncPromptOperation = findLatestSpan( - capturedEvents, - "claude-agent-async-prompt-operation", - ); - const subAgentOperation = findLatestSpan( - capturedEvents, - "claude-agent-subagent-operation", - ); - const failureOperation = findLatestSpan( - capturedEvents, - "claude-agent-failure-operation", - ); - const basicTask = contract.refs.basicTask; - const asyncPromptTask = contract.refs.asyncPromptTask; - const subAgentTaskRoot = contract.refs.subAgentTaskRoot; - const failureTask = contract.refs.failureTask; - const asyncPromptLlm = contract.refs.asyncPromptLlm; - - const basicLlmSpans = findChildSpans( - capturedEvents, - "anthropic.messages.create", - basicTask?.span.id, - ); - expect(basicLlmSpans.length).toBeGreaterThanOrEqual(1); - - const basicToolSpans = findAllSpans( - capturedEvents, - "tool: calculator/calculator", - ).filter((event) => event.span.rootId === basicTask?.span.rootId); - expect(basicToolSpans.length).toBeGreaterThanOrEqual(1); - - const asyncPromptInput = asyncPromptTask?.input as Array<{ - message?: { content?: string }; - }>; - expect(Array.isArray(asyncPromptInput)).toBe(true); - expect(asyncPromptInput.map((item) => item.message?.content)).toEqual([ - "Part 1", - "Part 2", - ]); - - expect(asyncPromptLlm).toBeDefined(); - const asyncPromptLlmInput = asyncPromptLlm?.input as Array<{ - content?: string; - }>; - expect(asyncPromptLlmInput.map((item) => item.content)).toEqual([ - "Part 1", - "Part 2", - ]); - - const subAgentTask = events().find( - (event) => - event.span.type === "task" && - event.span.rootId === subAgentTaskRoot?.span.rootId && - event.span.parentIds.includes(subAgentTaskRoot?.span.id ?? "") && - event.span.name?.startsWith("Agent:"), - ); - expect(subAgentTask).toBeDefined(); - - const subAgentLlmSpans = findAllSpans( - capturedEvents, - "anthropic.messages.create", - ).filter((event) => - event.span.parentIds.includes(subAgentTask?.span.id ?? ""), - ); - expect(subAgentLlmSpans.length).toBeGreaterThanOrEqual(1); - - const subAgentToolSpans = findAllSpans( - capturedEvents, - "tool: calculator/calculator", - ).filter((event) => - event.span.parentIds.includes(subAgentTask?.span.id ?? ""), - ); - expect(subAgentToolSpans.length).toBeGreaterThanOrEqual(1); - for (const toolSpan of subAgentToolSpans) { - expect(toolSpan.span.parentIds).not.toContain( - subAgentTaskRoot?.span.id, +const wrapClaudeAgentSDKScenarios = + await getWrapClaudeAgentSDKScenarios(scenarioDir); + +for (const scenario of wrapClaudeAgentSDKScenarios) { + test( + `wrap-claude-agent-sdk-traces captures tool, async prompt, and subagent traces (claude-agent-sdk ${scenario.version})`, + { + tags: [E2E_TAGS.externalApi], + timeout: CLAUDE_AGENT_SDK_SCENARIO_TIMEOUT_MS, + }, + async () => { + await withScenarioHarness(async ({ events, runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.entry, + scenarioDir, + timeoutMs: CLAUDE_AGENT_SDK_SCENARIO_TIMEOUT_MS, + }); + + const contract = assertClaudeAgentSDKTraceContract({ + capturedEvents: events(), + rootName: "claude-agent-sdk-wrapper-root", + scenarioName: "wrap-claude-agent-sdk-traces", + }); + + await expect( + formatJsonFileSnapshot(contract.spanSummary), + ).toMatchFileSnapshot( + resolveClaudeAgentSDKSpanSnapshotPath(scenario.dependencyName), ); - } - - const failureToolSpan = findAllSpans( - capturedEvents, - "tool: calculator/calculator", - ).find( - (event) => - event.span.rootId === failureTask?.span.rootId && - event.span.parentIds.includes(failureTask?.span.id ?? ""), - ); - expect(failureToolSpan).toBeDefined(); - expect(failureToolSpan?.row.error).toBe("division by zero"); - - expect( - normalizeForSnapshot( - [ - root, - basicOperation, - basicTask, - basicLlmSpans[0], - basicToolSpans[0], - asyncPromptOperation, - asyncPromptTask, - asyncPromptLlm, - subAgentOperation, - subAgentTaskRoot, - subAgentTask, - subAgentLlmSpans[0], - subAgentToolSpans[0], - failureOperation, - failureTask, - failureToolSpan, - ].map((event) => - summarizeWrapperContract(event!, [ - "provider", - "model", - "operation", - "scenario", - "mcp.server", - "gen_ai.tool.name", - ]), - ) as Json, - ), - ).toMatchSnapshot(); - }); - }, -); + }); + }, + ); +} diff --git a/js/src/auto-instrumentations/configs/claude-agent-sdk.test.ts b/js/src/auto-instrumentations/configs/claude-agent-sdk.test.ts new file mode 100644 index 000000000..5c71ea78b --- /dev/null +++ b/js/src/auto-instrumentations/configs/claude-agent-sdk.test.ts @@ -0,0 +1,36 @@ +import { describe, expect, it } from "vitest"; +import { claudeAgentSDKConfigs } from "./claude-agent-sdk"; +import { claudeAgentSDKChannels } from "../../instrumentation/plugins/claude-agent-sdk-channels"; + +describe("claudeAgentSDKConfigs", () => { + it("registers sync query instrumentation for 0.1.x", () => { + expect(claudeAgentSDKConfigs).toContainEqual({ + channelName: claudeAgentSDKChannels.query.channelName, + module: { + name: "@anthropic-ai/claude-agent-sdk", + versionRange: ">=0.1.0 <0.2.0", + filePath: "sdk.mjs", + }, + functionQuery: { + functionName: "query", + kind: "Sync", + }, + }); + }); + + it("registers export-alias query instrumentation for 0.2.x", () => { + expect(claudeAgentSDKConfigs).toContainEqual({ + channelName: claudeAgentSDKChannels.query.channelName, + module: { + name: "@anthropic-ai/claude-agent-sdk", + versionRange: ">=0.2.0", + filePath: "sdk.mjs", + }, + functionQuery: { + functionName: "query", + kind: "Sync", + isExportAlias: true, + }, + }); + }); +}); diff --git a/js/src/auto-instrumentations/configs/claude-agent-sdk.ts b/js/src/auto-instrumentations/configs/claude-agent-sdk.ts index 12cb50bc2..ac375f464 100644 --- a/js/src/auto-instrumentations/configs/claude-agent-sdk.ts +++ b/js/src/auto-instrumentations/configs/claude-agent-sdk.ts @@ -13,17 +13,31 @@ import { claudeAgentSDKChannels } from "../../instrumentation/plugins/claude-age * names like: "orchestrion:claude-agent-sdk:query" */ export const claudeAgentSDKConfigs: InstrumentationConfig[] = [ - // query - Main entry point for agent interactions (top-level exported async generator function) + // query - Main entry point for agent interactions. The SDK returns an async + // iterable, but the exported query function itself is synchronous. { channelName: claudeAgentSDKChannels.query.channelName, module: { name: "@anthropic-ai/claude-agent-sdk", - versionRange: ">=0.1.0", + versionRange: ">=0.1.0 <0.2.0", filePath: "sdk.mjs", }, functionQuery: { functionName: "query", - kind: "Async", + kind: "Sync", + }, + }, + { + channelName: claudeAgentSDKChannels.query.channelName, + module: { + name: "@anthropic-ai/claude-agent-sdk", + versionRange: ">=0.2.0", + filePath: "sdk.mjs", + }, + functionQuery: { + functionName: "query", + kind: "Sync", + isExportAlias: true, }, }, ]; diff --git a/js/src/instrumentation/plugins/claude-agent-sdk-channels.ts b/js/src/instrumentation/plugins/claude-agent-sdk-channels.ts index 37a1cdff3..09d172ac5 100644 --- a/js/src/instrumentation/plugins/claude-agent-sdk-channels.ts +++ b/js/src/instrumentation/plugins/claude-agent-sdk-channels.ts @@ -14,7 +14,7 @@ export const claudeAgentSDKChannels = defineChannels( ClaudeAgentSDKMessage >({ channelName: "query", - kind: "async", + kind: "sync-stream", }), }, ); diff --git a/js/src/instrumentation/plugins/claude-agent-sdk-plugin.test.ts b/js/src/instrumentation/plugins/claude-agent-sdk-plugin.test.ts index fbed71460..749533c2f 100644 --- a/js/src/instrumentation/plugins/claude-agent-sdk-plugin.test.ts +++ b/js/src/instrumentation/plugins/claude-agent-sdk-plugin.test.ts @@ -26,6 +26,7 @@ vi.mock("../../../util/index", () => ({ SpanTypeAttribute: { TASK: "task", LLM: "llm", + TOOL: "tool", }, isObject: vi.fn((val: unknown) => val !== null && typeof val === "object"), })); @@ -35,6 +36,7 @@ vi.mock("../../../util", () => ({ SpanTypeAttribute: { TASK: "task", LLM: "llm", + TOOL: "tool", }, })); @@ -133,7 +135,7 @@ describe("ClaudeAgentSDKPlugin", () => { expect(mockChannel.subscribe).toHaveBeenCalledWith( expect.objectContaining({ start: expect.any(Function), - asyncEnd: expect.any(Function), + end: expect.any(Function), error: expect.any(Function), }), ); @@ -270,7 +272,7 @@ describe("ClaudeAgentSDKPlugin", () => { }); }); - describe("asyncEnd handler for non-streaming", () => { + describe("end handler for sync stream results", () => { it("should handle non-streaming result", () => { const startEvent = { arguments: [ @@ -293,19 +295,19 @@ describe("ClaudeAgentSDKPlugin", () => { }, }; - handlers.asyncEnd(endEvent); + handlers.end(endEvent); // Verify no errors thrown expect(true).toBe(true); }); - it("should handle asyncEnd without matching start", () => { + it("should handle end without matching start", () => { const endEvent = { arguments: [{ prompt: "Test" }], result: { type: "result" }, }; - handlers.asyncEnd(endEvent); + handlers.end(endEvent); // Should not throw expect(true).toBe(true); diff --git a/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts b/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts index ae078536e..62b072bbe 100644 --- a/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts +++ b/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts @@ -12,13 +12,27 @@ import { } from "../../wrappers/anthropic-tokens-util"; import { claudeAgentSDKChannels } from "./claude-agent-sdk-channels"; import type { + ClaudeAgentSDKHookCallback, + ClaudeAgentSDKHookCallbackMatcher, + ClaudeAgentSDKMcpServersConfig, ClaudeAgentSDKMessage, ClaudeAgentSDKQueryOptions, + ClaudeAgentSDKQueryParams, } from "../../vendor-sdk-types/claude-agent-sdk"; -/** - * Filters options to include only specific serializable fields for logging. - */ +type ClaudeConversationMessage = { content: unknown; role: string }; +type ParsedToolName = { + displayName: string; + mcpServer?: string; + rawToolName: string; + toolName: string; +}; +type ParentSpanResolver = (toolUseID: string) => Promise; + +function isSubAgentToolName(toolName: string): boolean { + return toolName === "Agent" || toolName === "Task"; +} + function filterSerializableOptions( options: ClaudeAgentSDKQueryOptions, ): Record { @@ -49,9 +63,6 @@ function filterSerializableOptions( return filtered; } -/** - * Get a number property safely from an object. - */ function getNumberProperty(obj: unknown, key: string): number | undefined { if (!obj || typeof obj !== "object" || !(key in obj)) { return undefined; @@ -60,16 +71,11 @@ function getNumberProperty(obj: unknown, key: string): number | undefined { return typeof value === "number" ? value : undefined; } -/** - * Extract and normalize usage metrics from a Claude Agent SDK message. - */ function extractUsageFromMessage( message: ClaudeAgentSDKMessage, ): Record { const metrics: Record = {}; - // Assistant messages contain usage in message.message.usage - // Result messages contain usage in message.usage let usage: unknown; if (message.type === "assistant") { usage = message.message?.usage; @@ -81,7 +87,6 @@ function extractUsageFromMessage( return metrics; } - // Standard token counts const inputTokens = getNumberProperty(usage, "input_tokens"); if (inputTokens !== undefined) { metrics.prompt_tokens = inputTokens; @@ -92,21 +97,18 @@ function extractUsageFromMessage( metrics.completion_tokens = outputTokens; } - // Anthropic cache tokens const cacheReadTokens = getNumberProperty(usage, "cache_read_input_tokens") || 0; const cacheCreationTokens = getNumberProperty(usage, "cache_creation_input_tokens") || 0; if (cacheReadTokens > 0 || cacheCreationTokens > 0) { - const cacheTokens = extractAnthropicCacheTokens( - cacheReadTokens, - cacheCreationTokens, + Object.assign( + metrics, + extractAnthropicCacheTokens(cacheReadTokens, cacheCreationTokens), ); - Object.assign(metrics, cacheTokens); } - // Finalize Anthropic token calculations if (Object.keys(metrics).length > 0) { Object.assign(metrics, finalizeAnthropicTokens(metrics)); } @@ -114,41 +116,47 @@ function extractUsageFromMessage( return metrics; } -/** - * Builds the input array for an LLM span from the initial prompt and conversation history. - */ function buildLLMInput( prompt: string | AsyncIterable | undefined, - conversationHistory: Array<{ content: unknown; role: string }>, -): Array<{ content: unknown; role: string }> | undefined { - const promptMessage = - typeof prompt === "string" ? { content: prompt, role: "user" } : undefined; - - const inputParts = [ - ...(promptMessage ? [promptMessage] : []), - ...conversationHistory, - ]; + conversationHistory: ClaudeConversationMessage[], + capturedPromptMessages?: ClaudeAgentSDKMessage[], +): ClaudeConversationMessage[] | undefined { + const promptMessages: ClaudeConversationMessage[] = []; + + if (typeof prompt === "string") { + promptMessages.push({ content: prompt, role: "user" }); + } else if (capturedPromptMessages && capturedPromptMessages.length > 0) { + for (const msg of capturedPromptMessages) { + const role = msg.message?.role; + const content = msg.message?.content; + if (role && content !== undefined) { + promptMessages.push({ content, role }); + } + } + } + const inputParts = [...promptMessages, ...conversationHistory]; return inputParts.length > 0 ? inputParts : undefined; } -/** - * Creates an LLM span for a group of messages with the same message ID. - * Returns the final message content to add to conversation history. - */ +function formatCapturedMessages( + messages: ClaudeAgentSDKMessage[], +): ClaudeAgentSDKMessage[] { + return messages.length > 0 ? messages : []; +} + async function createLLMSpanForMessages( messages: ClaudeAgentSDKMessage[], prompt: string | AsyncIterable | undefined, - conversationHistory: Array<{ content: unknown; role: string }>, + conversationHistory: ClaudeConversationMessage[], options: ClaudeAgentSDKQueryOptions, startTime: number, - parentSpan: Awaited>["export"] extends ( - ...args: infer _ - ) => Promise - ? R - : never, -): Promise<{ content: unknown; role: string } | undefined> { - if (messages.length === 0) return undefined; + capturedPromptMessages: ClaudeAgentSDKMessage[] | undefined, + parentSpan: string, +): Promise { + if (messages.length === 0) { + return undefined; + } const lastMessage = messages[messages.length - 1]; if (lastMessage.type !== "assistant" || !lastMessage.message?.usage) { @@ -157,7 +165,11 @@ async function createLLMSpanForMessages( const model = lastMessage.message.model || options.model; const usage = extractUsageFromMessage(lastMessage); - const input = buildLLMInput(prompt, conversationHistory); + const input = buildLLMInput( + prompt, + conversationHistory, + capturedPromptMessages, + ); const outputs = messages .map((m) => m.message?.content && m.message?.role @@ -169,21 +181,20 @@ async function createLLMSpanForMessages( c !== undefined, ); - // Use traced pattern for LLM spans const span = startSpan({ name: "anthropic.messages.create", + parent: parentSpan, spanAttributes: { type: SpanTypeAttribute.LLM, }, startTime, - parent: parentSpan, }); span.log({ input, - output: outputs, metadata: model ? { model } : undefined, metrics: usage, + output: outputs, }); await span.end(); @@ -193,16 +204,489 @@ async function createLLMSpanForMessages( : undefined; } -/** - * Plugin for Claude Agent SDK auto-instrumentation. - * - * Subscribes to orchestrion:claude-agent-sdk:* channels and creates - * Braintrust spans with proper tracing for agent interactions. - * - * NOTE: Uses span type TASK (not LLM) for agent interactions since agents - * represent higher-level workflows. Individual LLM calls within the agent - * are traced separately as LLM spans. - */ +function getMcpServerMetadata( + serverName: string | undefined, + mcpServers: ClaudeAgentSDKMcpServersConfig | undefined, +): Record { + if (!serverName || !mcpServers) { + return {}; + } + + const serverConfig = mcpServers[serverName]; + if (!serverConfig) { + return {}; + } + + const metadata: Record = {}; + + if (serverConfig.type) { + metadata["mcp.type"] = serverConfig.type; + } else if (typeof serverConfig === "object" && "transport" in serverConfig) { + metadata["mcp.type"] = "sdk"; + } + + if (serverConfig.url) { + metadata["mcp.url"] = serverConfig.url; + } + + if (serverConfig.command) { + metadata["mcp.command"] = serverConfig.command; + if (serverConfig.args) { + metadata["mcp.args"] = serverConfig.args.join(" "); + } + } + + return metadata; +} + +function parseToolName(rawToolName: string): ParsedToolName { + const mcpMatch = rawToolName.match(/^mcp__([^_]+)__(.+)$/); + + if (mcpMatch) { + const [, mcpServer, toolName] = mcpMatch; + return { + displayName: `tool: ${mcpServer}/${toolName}`, + mcpServer, + rawToolName, + toolName, + }; + } + + return { + displayName: `tool: ${rawToolName}`, + rawToolName, + toolName: rawToolName, + }; +} + +function createToolTracingHooks( + resolveParentSpan: ParentSpanResolver, + activeToolSpans: Map, + mcpServers: ClaudeAgentSDKMcpServersConfig | undefined, + subAgentSpans: Map, + endedSubAgentSpans: Set, +): { + postToolUse: ClaudeAgentSDKHookCallback; + postToolUseFailure: ClaudeAgentSDKHookCallback; + preToolUse: ClaudeAgentSDKHookCallback; +} { + const preToolUse: ClaudeAgentSDKHookCallback = async (input, toolUseID) => { + if (input.hook_event_name !== "PreToolUse" || !toolUseID) { + return {}; + } + + if (isSubAgentToolName(input.tool_name)) { + return {}; + } + + const parsed = parseToolName(input.tool_name); + const toolSpan = startSpan({ + event: { + input: input.tool_input, + metadata: { + "claude_agent_sdk.cwd": input.cwd, + "claude_agent_sdk.raw_tool_name": parsed.rawToolName, + "claude_agent_sdk.session_id": input.session_id, + "gen_ai.tool.call.id": toolUseID, + "gen_ai.tool.name": parsed.toolName, + ...(parsed.mcpServer && { "mcp.server": parsed.mcpServer }), + ...getMcpServerMetadata(parsed.mcpServer, mcpServers), + }, + }, + name: parsed.displayName, + parent: await resolveParentSpan(toolUseID), + spanAttributes: { type: SpanTypeAttribute.TOOL }, + }); + + activeToolSpans.set(toolUseID, toolSpan); + return {}; + }; + + const postToolUse: ClaudeAgentSDKHookCallback = async (input, toolUseID) => { + if (input.hook_event_name !== "PostToolUse" || !toolUseID) { + return {}; + } + + const subAgentSpan = subAgentSpans.get(toolUseID); + if (subAgentSpan) { + try { + const response = input.tool_response as + | Record + | undefined; + const metadata: Record = {}; + if (response?.status) { + metadata["claude_agent_sdk.status"] = response.status; + } + if (response?.totalDurationMs) { + metadata["claude_agent_sdk.duration_ms"] = response.totalDurationMs; + } + if (response?.totalToolUseCount !== undefined) { + metadata["claude_agent_sdk.tool_use_count"] = + response.totalToolUseCount; + } + + subAgentSpan.log({ + metadata, + output: response?.content, + }); + } finally { + subAgentSpan.end(); + endedSubAgentSpans.add(toolUseID); + } + + return {}; + } + + const toolSpan = activeToolSpans.get(toolUseID); + if (!toolSpan) { + return {}; + } + + try { + toolSpan.log({ output: input.tool_response }); + } finally { + toolSpan.end(); + activeToolSpans.delete(toolUseID); + } + + return {}; + }; + + const postToolUseFailure: ClaudeAgentSDKHookCallback = async ( + input, + toolUseID, + ) => { + if (input.hook_event_name !== "PostToolUseFailure" || !toolUseID) { + return {}; + } + + const subAgentSpan = subAgentSpans.get(toolUseID); + if (subAgentSpan) { + try { + subAgentSpan.log({ error: input.error }); + } finally { + subAgentSpan.end(); + endedSubAgentSpans.add(toolUseID); + } + + return {}; + } + + const toolSpan = activeToolSpans.get(toolUseID); + if (!toolSpan) { + return {}; + } + + const parsed = parseToolName(input.tool_name); + try { + toolSpan.log({ + error: input.error, + metadata: { + "claude_agent_sdk.is_interrupt": input.is_interrupt, + "claude_agent_sdk.session_id": input.session_id, + "gen_ai.tool.call.id": toolUseID, + "gen_ai.tool.name": parsed.toolName, + ...(parsed.mcpServer && { "mcp.server": parsed.mcpServer }), + }, + }); + } finally { + toolSpan.end(); + activeToolSpans.delete(toolUseID); + } + + return {}; + }; + + return { postToolUse, postToolUseFailure, preToolUse }; +} + +function injectTracingHooks( + options: ClaudeAgentSDKQueryOptions, + resolveParentSpan: ParentSpanResolver, + activeToolSpans: Map, + subAgentSpans: Map, + endedSubAgentSpans: Set, +): ClaudeAgentSDKQueryOptions { + const { preToolUse, postToolUse, postToolUseFailure } = + createToolTracingHooks( + resolveParentSpan, + activeToolSpans, + options.mcpServers, + subAgentSpans, + endedSubAgentSpans, + ); + + const existingHooks = options.hooks ?? {}; + + return { + ...options, + hooks: { + ...existingHooks, + PostToolUse: [ + ...(existingHooks.PostToolUse ?? []), + { hooks: [postToolUse] } satisfies ClaudeAgentSDKHookCallbackMatcher, + ], + PostToolUseFailure: [ + ...(existingHooks.PostToolUseFailure ?? []), + { + hooks: [postToolUseFailure], + } satisfies ClaudeAgentSDKHookCallbackMatcher, + ], + PreToolUse: [ + ...(existingHooks.PreToolUse ?? []), + { hooks: [preToolUse] } satisfies ClaudeAgentSDKHookCallbackMatcher, + ], + }, + }; +} + +type QueryState = { + accumulatedOutputTokens: number; + activeToolSpans: Map; + capturedPromptMessages: ClaudeAgentSDKMessage[] | undefined; + currentMessageId: string | undefined; + currentMessageStartTime: number; + currentMessages: ClaudeAgentSDKMessage[]; + endedSubAgentSpans: Set; + finalResults: ClaudeConversationMessage[]; + options: ClaudeAgentSDKQueryOptions; + originalPrompt: string | AsyncIterable | undefined; + pendingSubAgentNames: Map; + processing: Promise; + promptDone: Promise; + promptStarted: () => boolean; + span: Span; + subAgentSpans: Map; + toolUseToParent: Map; +}; + +async function finalizeCurrentMessageGroup(state: QueryState): Promise { + if (state.currentMessages.length === 0) { + return; + } + + const parentToolUseId = state.currentMessages[0]?.parent_tool_use_id ?? null; + let parentSpan = await state.span.export(); + if (parentToolUseId) { + const subAgentSpan = state.subAgentSpans.get(parentToolUseId); + if (subAgentSpan) { + parentSpan = await subAgentSpan.export(); + } + } + + const finalMessage = await createLLMSpanForMessages( + state.currentMessages, + state.originalPrompt, + state.finalResults, + state.options, + state.currentMessageStartTime, + state.capturedPromptMessages, + parentSpan, + ); + + if (finalMessage) { + state.finalResults.push(finalMessage); + } + + const lastMessage = state.currentMessages[state.currentMessages.length - 1]; + if (lastMessage?.message?.usage) { + state.accumulatedOutputTokens += + getNumberProperty(lastMessage.message.usage, "output_tokens") || 0; + } + + state.currentMessages.length = 0; +} + +function maybeTrackToolUseContext( + state: QueryState, + message: ClaudeAgentSDKMessage, +): void { + if ( + message.type !== "assistant" || + !Array.isArray(message.message?.content) + ) { + return; + } + + const parentToolUseId = message.parent_tool_use_id ?? null; + + for (const block of message.message.content) { + if ( + typeof block !== "object" || + block === null || + !("type" in block) || + block.type !== "tool_use" || + !("id" in block) || + typeof block.id !== "string" + ) { + continue; + } + + state.toolUseToParent.set(block.id, parentToolUseId); + + if ( + block.name === "Task" && + typeof block.input === "object" && + block.input !== null && + "subagent_type" in block.input && + typeof block.input.subagent_type === "string" + ) { + state.pendingSubAgentNames.set(block.id, block.input.subagent_type); + } + } +} + +async function maybeStartSubAgentSpan( + state: QueryState, + message: ClaudeAgentSDKMessage, +): Promise { + if (!("parent_tool_use_id" in message)) { + return; + } + + const parentToolUseId = message.parent_tool_use_id; + if (!parentToolUseId) { + return; + } + + await ensureSubAgentSpan( + state.pendingSubAgentNames, + state.span, + state.subAgentSpans, + parentToolUseId, + ); +} + +async function ensureSubAgentSpan( + pendingSubAgentNames: Map, + rootSpan: Span, + subAgentSpans: Map, + parentToolUseId: string, +): Promise { + const existingSpan = subAgentSpans.get(parentToolUseId); + if (existingSpan) { + return existingSpan; + } + + const agentName = pendingSubAgentNames.get(parentToolUseId); + const spanName = agentName ? `Agent: ${agentName}` : "Agent: sub-agent"; + + const subAgentSpan = startSpan({ + event: { + metadata: { + ...(agentName && { "claude_agent_sdk.agent_type": agentName }), + }, + }, + name: spanName, + parent: await rootSpan.export(), + spanAttributes: { type: SpanTypeAttribute.TASK }, + }); + + subAgentSpans.set(parentToolUseId, subAgentSpan); + return subAgentSpan; +} + +async function handleStreamMessage( + state: QueryState, + message: ClaudeAgentSDKMessage, +): Promise { + maybeTrackToolUseContext(state, message); + await maybeStartSubAgentSpan(state, message); + + const messageId = message.message?.id; + if (messageId && messageId !== state.currentMessageId) { + await finalizeCurrentMessageGroup(state); + state.currentMessageId = messageId; + state.currentMessageStartTime = getCurrentUnixTimestamp(); + } + + if (message.type === "assistant" && message.message?.usage) { + state.currentMessages.push(message); + } + + if (message.type !== "result" || !message.usage) { + return; + } + + const finalUsageMetrics = extractUsageFromMessage(message); + if ( + state.currentMessages.length > 0 && + finalUsageMetrics.completion_tokens !== undefined + ) { + const lastMessage = state.currentMessages[state.currentMessages.length - 1]; + if (lastMessage?.message?.usage) { + const adjustedTokens = + finalUsageMetrics.completion_tokens - state.accumulatedOutputTokens; + if (adjustedTokens >= 0) { + lastMessage.message.usage.output_tokens = adjustedTokens; + } + + const resultUsage = message.usage; + if (resultUsage && typeof resultUsage === "object") { + const cacheReadTokens = getNumberProperty( + resultUsage, + "cache_read_input_tokens", + ); + if (cacheReadTokens !== undefined) { + lastMessage.message.usage.cache_read_input_tokens = cacheReadTokens; + } + + const cacheCreationTokens = getNumberProperty( + resultUsage, + "cache_creation_input_tokens", + ); + if (cacheCreationTokens !== undefined) { + lastMessage.message.usage.cache_creation_input_tokens = + cacheCreationTokens; + } + } + } + } + + const metadata: Record = {}; + if (message.num_turns !== undefined) { + metadata.num_turns = message.num_turns; + } + if (message.session_id !== undefined) { + metadata.session_id = message.session_id; + } + if (Object.keys(metadata).length > 0) { + state.span.log({ metadata }); + } +} + +async function finalizeQuerySpan(state: QueryState): Promise { + try { + await finalizeCurrentMessageGroup(state); + + state.span.log({ + output: + state.finalResults.length > 0 + ? state.finalResults[state.finalResults.length - 1] + : undefined, + }); + + if (state.capturedPromptMessages) { + if (state.promptStarted()) { + await state.promptDone; + } + if (state.capturedPromptMessages.length > 0) { + state.span.log({ + input: formatCapturedMessages(state.capturedPromptMessages), + }); + } + } + } finally { + for (const [id, subAgentSpan] of state.subAgentSpans) { + if (!state.endedSubAgentSpans.has(id)) { + subAgentSpan.end(); + } + } + state.subAgentSpans.clear(); + state.span.end(); + } +} + export class ClaudeAgentSDKPlugin extends BasePlugin { protected onEnable(): void { this.subscribeToQuery(); @@ -215,33 +699,41 @@ export class ClaudeAgentSDKPlugin extends BasePlugin { this.unsubscribers = []; } - /** - * Subscribe to the query channel for agent interactions. - * Handles streaming responses and traces both the top-level agent task - * and individual LLM calls. - */ private subscribeToQuery(): void { const channel = claudeAgentSDKChannels.query.tracingChannel(); - const spans = new WeakMap< - object, - { - span: Span; - startTime: number; - conversationHistory: Array<{ content: unknown; role: string }>; - currentMessages: ClaudeAgentSDKMessage[]; - currentMessageId: string | undefined; - currentMessageStartTime: number; - accumulatedOutputTokens: number; - } - >(); + const spans = new WeakMap(); const handlers: IsoChannelHandlers< ChannelMessage > = { start: (event) => { - const params = event.arguments[0]; - const prompt = params?.prompt; - const options = params?.options ?? {}; + const params = (event.arguments[0] ?? {}) as ClaudeAgentSDKQueryParams; + const originalPrompt = params.prompt; + const options = params.options ?? {}; + const promptIsAsyncIterable = + isAsyncIterable(originalPrompt); + let promptStarted = false; + let capturedPromptMessages: ClaudeAgentSDKMessage[] | undefined; + let resolvePromptDone: (() => void) | undefined; + const promptDone = new Promise((resolve) => { + resolvePromptDone = resolve; + }); + + if (promptIsAsyncIterable) { + capturedPromptMessages = []; + const promptStream = originalPrompt; + params.prompt = (async function* () { + promptStarted = true; + try { + for await (const message of promptStream) { + capturedPromptMessages!.push(message); + yield message; + } + } finally { + resolvePromptDone?.(); + } + })(); + } const span = startSpan({ name: "Claude Agent", @@ -249,224 +741,143 @@ export class ClaudeAgentSDKPlugin extends BasePlugin { type: SpanTypeAttribute.TASK, }, }); - const startTime = getCurrentUnixTimestamp(); try { span.log({ input: - typeof prompt === "string" - ? prompt - : { - type: "streaming", - description: "AsyncIterable", - }, + typeof originalPrompt === "string" + ? originalPrompt + : promptIsAsyncIterable + ? undefined + : originalPrompt !== undefined + ? String(originalPrompt) + : undefined, metadata: filterSerializableOptions(options), }); } catch (error) { console.error("Error extracting input for Claude Agent SDK:", error); } + const activeToolSpans = new Map(); + const subAgentSpans = new Map(); + const endedSubAgentSpans = new Set(); + const toolUseToParent = new Map(); + const pendingSubAgentNames = new Map(); + const optionsWithHooks = injectTracingHooks( + options, + async (toolUseID) => { + const parentToolUseId = toolUseToParent.get(toolUseID); + if (parentToolUseId) { + const subAgentSpan = await ensureSubAgentSpan( + pendingSubAgentNames, + span, + subAgentSpans, + parentToolUseId, + ); + return subAgentSpan.export(); + } + return span.export(); + }, + activeToolSpans, + subAgentSpans, + endedSubAgentSpans, + ); + + params.options = optionsWithHooks; + event.arguments[0] = params; + spans.set(event, { - span, - startTime, - conversationHistory: [], - currentMessages: [], + accumulatedOutputTokens: 0, + activeToolSpans, + capturedPromptMessages, currentMessageId: undefined, currentMessageStartTime: startTime, - accumulatedOutputTokens: 0, + currentMessages: [], + endedSubAgentSpans, + finalResults: [], + options: optionsWithHooks, + originalPrompt, + pendingSubAgentNames, + processing: Promise.resolve(), + promptDone, + promptStarted: () => promptStarted, + span, + subAgentSpans, + toolUseToParent, }); }, - asyncEnd: (event) => { - const spanData = spans.get(event); - if (!spanData) { + end: (event) => { + const state = spans.get(event); + if (!state) { return; } const eventResult = event.result; if (eventResult === undefined) { - spanData.span.end(); + state.span.end(); spans.delete(event); return; } - // Check if result is a stream if (isAsyncIterable(eventResult)) { - // Patch the stream to collect chunks and trace them patchStreamIfNeeded(eventResult, { - onChunk: async (message: ClaudeAgentSDKMessage) => { - const currentTime = getCurrentUnixTimestamp(); - const params = event.arguments[0]; - const prompt = params?.prompt; - const options = params?.options ?? {}; - - const messageId = message.message?.id; - - // When we see a new message ID, finalize the previous group - if (messageId && messageId !== spanData.currentMessageId) { - if (spanData.currentMessages.length > 0) { - const finalMessage = await createLLMSpanForMessages( - spanData.currentMessages, - prompt, - spanData.conversationHistory, - options, - spanData.currentMessageStartTime, - await spanData.span.export(), + onChunk: (message: ClaudeAgentSDKMessage) => { + maybeTrackToolUseContext(state, message); + state.processing = state.processing + .then(() => handleStreamMessage(state, message)) + .catch((error) => { + console.error( + "Error processing Claude Agent SDK stream chunk:", + error, ); - - if (finalMessage) { - spanData.conversationHistory.push(finalMessage); - } - - // Track accumulated output tokens - const lastMessage = - spanData.currentMessages[ - spanData.currentMessages.length - 1 - ]; - if (lastMessage?.message?.usage) { - const outputTokens = - getNumberProperty( - lastMessage.message.usage, - "output_tokens", - ) || 0; - spanData.accumulatedOutputTokens += outputTokens; - } - - spanData.currentMessages = []; - } - - spanData.currentMessageId = messageId; - spanData.currentMessageStartTime = currentTime; - } - - // Collect assistant messages with usage - if (message.type === "assistant" && message.message?.usage) { - spanData.currentMessages.push(message); - } - - // Capture final usage metrics from result message - if (message.type === "result" && message.usage) { - const finalUsageMetrics = extractUsageFromMessage(message); - - // HACK: Adjust the last assistant message's output_tokens to match result total. - // The result message contains aggregated totals, so we calculate the difference: - // last message tokens = total result tokens - previously accumulated tokens - // The other metrics already accumulate correctly. - if ( - spanData.currentMessages.length > 0 && - finalUsageMetrics.completion_tokens !== undefined - ) { - const lastMessage = - spanData.currentMessages[ - spanData.currentMessages.length - 1 - ]; - if (lastMessage?.message?.usage) { - const adjustedTokens = - finalUsageMetrics.completion_tokens - - spanData.accumulatedOutputTokens; - if (adjustedTokens >= 0) { - lastMessage.message.usage.output_tokens = adjustedTokens; - } - } - } - - // Log result metadata - const result_metadata: Record = {}; - if (message.num_turns !== undefined) { - result_metadata.num_turns = message.num_turns; - } - if (message.session_id !== undefined) { - result_metadata.session_id = message.session_id; - } - if (Object.keys(result_metadata).length > 0) { - spanData.span.log({ - metadata: result_metadata, - }); - } - } + }); }, - onComplete: async () => { - try { - const params = event.arguments[0]; - const prompt = params?.prompt; - const options = params?.options ?? {}; - - // Create span for final message group - if (spanData.currentMessages.length > 0) { - const finalMessage = await createLLMSpanForMessages( - spanData.currentMessages, - prompt, - spanData.conversationHistory, - options, - spanData.currentMessageStartTime, - await spanData.span.export(), - ); - - if (finalMessage) { - spanData.conversationHistory.push(finalMessage); - } - } - - // Log final output to top-level span - just the last message content - spanData.span.log({ - output: - spanData.conversationHistory.length > 0 - ? spanData.conversationHistory[ - spanData.conversationHistory.length - 1 - ] - : undefined, + onComplete: () => { + void state.processing + .then(() => finalizeQuerySpan(state)) + .finally(() => { + spans.delete(event); }); - } catch (error) { - console.error( - "Error extracting output for Claude Agent SDK:", - error, - ); - } finally { - spanData.span.end(); - spans.delete(event); - } }, onError: (error: Error) => { - spanData.span.log({ - error: error.message, - }); - spanData.span.end(); - spans.delete(event); + void state.processing + .then(() => { + state.span.log({ + error: error.message, + }); + }) + .then(() => finalizeQuerySpan(state)) + .finally(() => { + spans.delete(event); + }); }, }); - // Don't delete the span from the map yet - it will be ended by the stream - } else { - // Non-streaming response (shouldn't happen for query, but handle gracefully) - try { - spanData.span.log({ - output: eventResult, - }); - } catch (error) { - console.error( - "Error extracting output for Claude Agent SDK:", - error, - ); - } finally { - spanData.span.end(); - spans.delete(event); - } + return; + } + + try { + state.span.log({ output: eventResult }); + } catch (error) { + console.error("Error extracting output for Claude Agent SDK:", error); + } finally { + state.span.end(); + spans.delete(event); } }, error: (event) => { - const spanData = spans.get(event); - if (!spanData || !event.error) { + const state = spans.get(event); + if (!state || !event.error) { return; } - const { span } = spanData; - - span.log({ + state.span.log({ error: event.error.message, }); - span.end(); + state.span.end(); spans.delete(event); }, }; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ee8c7fd39..124fd1e39 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -59,6 +59,9 @@ importers: vitest: specifier: ^4.1.0 version: 4.1.0(@opentelemetry/api@1.9.0)(@types/node@20.19.16)(msw@2.6.6(@types/node@20.19.16)(typescript@5.4.4))(vite@6.4.1(@types/node@20.19.16)(jiti@2.6.1)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + zod: + specifier: 4.3.6 + version: 4.3.6 integrations/browser-js: dependencies: @@ -253,7 +256,7 @@ importers: version: 0.0.11 ai: specifier: ^3.2.16 - version: 3.2.16(openai@4.104.0(ws@8.18.3)(zod@4.2.1))(react@19.1.1)(svelte@5.39.0)(vue@3.5.21(typescript@5.3.3))(zod@4.2.1) + version: 3.2.16(openai@4.104.0(ws@8.18.3)(zod@4.3.6))(react@19.1.1)(svelte@5.39.0)(vue@3.5.21(typescript@5.3.3))(zod@4.3.6) braintrust: specifier: workspace:* version: link:../../js @@ -510,13 +513,13 @@ importers: devDependencies: '@ai-sdk/openai': specifier: 2.0.53 - version: 2.0.53(zod@4.2.1) + version: 2.0.53(zod@4.3.6) '@types/node': specifier: ^20.10.5 version: 20.19.16 ai: specifier: 5.0.76 - version: 5.0.76(zod@4.2.1) + version: 5.0.76(zod@4.3.6) typescript: specifier: 5.4.4 version: 5.4.4 @@ -5943,6 +5946,9 @@ packages: zod@4.2.1: resolution: {integrity: sha512-0wZ1IRqGGhMP76gLqz8EyfBXKk0J2qo2+H3fi4mcUP/KtTocoX08nmIAHl1Z2kJIZbZee8KOpBCSNPRgauucjw==} + zod@4.3.6: + resolution: {integrity: sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==} + snapshots: '@ai-sdk/anthropic@2.0.37(zod@3.25.76)': @@ -5951,12 +5957,12 @@ snapshots: '@ai-sdk/provider-utils': 3.0.12(zod@3.25.76) zod: 3.25.76 - '@ai-sdk/gateway@2.0.0(zod@4.2.1)': + '@ai-sdk/gateway@2.0.0(zod@4.3.6)': dependencies: '@ai-sdk/provider': 2.0.0 - '@ai-sdk/provider-utils': 3.0.12(zod@4.2.1) + '@ai-sdk/provider-utils': 3.0.12(zod@4.3.6) '@vercel/oidc': 3.0.3 - zod: 4.2.1 + zod: 4.3.6 '@ai-sdk/gateway@3.0.15(zod@3.25.76)': dependencies: @@ -5965,20 +5971,20 @@ snapshots: '@vercel/oidc': 3.1.0 zod: 3.25.76 - '@ai-sdk/openai@2.0.53(zod@4.2.1)': + '@ai-sdk/openai@2.0.53(zod@4.3.6)': dependencies: '@ai-sdk/provider': 2.0.0 - '@ai-sdk/provider-utils': 3.0.12(zod@4.2.1) - zod: 4.2.1 + '@ai-sdk/provider-utils': 3.0.12(zod@4.3.6) + zod: 4.3.6 - '@ai-sdk/provider-utils@1.0.0(zod@4.2.1)': + '@ai-sdk/provider-utils@1.0.0(zod@4.3.6)': dependencies: '@ai-sdk/provider': 0.0.11 eventsource-parser: 1.1.2 nanoid: 3.3.6 secure-json-parse: 2.7.0 optionalDependencies: - zod: 4.2.1 + zod: 4.3.6 '@ai-sdk/provider-utils@3.0.12(zod@3.25.76)': dependencies: @@ -5987,12 +5993,12 @@ snapshots: eventsource-parser: 3.0.6 zod: 3.25.76 - '@ai-sdk/provider-utils@3.0.12(zod@4.2.1)': + '@ai-sdk/provider-utils@3.0.12(zod@4.3.6)': dependencies: '@ai-sdk/provider': 2.0.0 '@standard-schema/spec': 1.1.0 eventsource-parser: 3.0.6 - zod: 4.2.1 + zod: 4.3.6 '@ai-sdk/provider-utils@4.0.7(zod@3.25.76)': dependencies: @@ -6017,41 +6023,41 @@ snapshots: dependencies: json-schema: 0.4.0 - '@ai-sdk/react@0.0.16(react@19.1.1)(zod@4.2.1)': + '@ai-sdk/react@0.0.16(react@19.1.1)(zod@4.3.6)': dependencies: - '@ai-sdk/provider-utils': 1.0.0(zod@4.2.1) - '@ai-sdk/ui-utils': 0.0.9(zod@4.2.1) + '@ai-sdk/provider-utils': 1.0.0(zod@4.3.6) + '@ai-sdk/ui-utils': 0.0.9(zod@4.3.6) swr: 2.2.0(react@19.1.1) optionalDependencies: react: 19.1.1 - zod: 4.2.1 + zod: 4.3.6 - '@ai-sdk/solid@0.0.11(zod@4.2.1)': + '@ai-sdk/solid@0.0.11(zod@4.3.6)': dependencies: - '@ai-sdk/ui-utils': 0.0.9(zod@4.2.1) + '@ai-sdk/ui-utils': 0.0.9(zod@4.3.6) transitivePeerDependencies: - zod - '@ai-sdk/svelte@0.0.12(svelte@5.39.0)(zod@4.2.1)': + '@ai-sdk/svelte@0.0.12(svelte@5.39.0)(zod@4.3.6)': dependencies: - '@ai-sdk/provider-utils': 1.0.0(zod@4.2.1) - '@ai-sdk/ui-utils': 0.0.9(zod@4.2.1) + '@ai-sdk/provider-utils': 1.0.0(zod@4.3.6) + '@ai-sdk/ui-utils': 0.0.9(zod@4.3.6) sswr: 2.1.0(svelte@5.39.0) optionalDependencies: svelte: 5.39.0 transitivePeerDependencies: - zod - '@ai-sdk/ui-utils@0.0.9(zod@4.2.1)': + '@ai-sdk/ui-utils@0.0.9(zod@4.3.6)': dependencies: - '@ai-sdk/provider-utils': 1.0.0(zod@4.2.1) + '@ai-sdk/provider-utils': 1.0.0(zod@4.3.6) secure-json-parse: 2.7.0 optionalDependencies: - zod: 4.2.1 + zod: 4.3.6 - '@ai-sdk/vue@0.0.11(vue@3.5.21(typescript@5.3.3))(zod@4.2.1)': + '@ai-sdk/vue@0.0.11(vue@3.5.21(typescript@5.3.3))(zod@4.3.6)': dependencies: - '@ai-sdk/ui-utils': 0.0.9(zod@4.2.1) + '@ai-sdk/ui-utils': 0.0.9(zod@4.3.6) swrv: 1.0.4(vue@3.5.21(typescript@5.3.3)) optionalDependencies: vue: 3.5.21(typescript@5.3.3) @@ -8672,38 +8678,38 @@ snapshots: dependencies: humanize-ms: 1.2.1 - ai@3.2.16(openai@4.104.0(ws@8.18.3)(zod@4.2.1))(react@19.1.1)(svelte@5.39.0)(vue@3.5.21(typescript@5.3.3))(zod@4.2.1): + ai@3.2.16(openai@4.104.0(ws@8.18.3)(zod@4.3.6))(react@19.1.1)(svelte@5.39.0)(vue@3.5.21(typescript@5.3.3))(zod@4.3.6): dependencies: '@ai-sdk/provider': 0.0.11 - '@ai-sdk/provider-utils': 1.0.0(zod@4.2.1) - '@ai-sdk/react': 0.0.16(react@19.1.1)(zod@4.2.1) - '@ai-sdk/solid': 0.0.11(zod@4.2.1) - '@ai-sdk/svelte': 0.0.12(svelte@5.39.0)(zod@4.2.1) - '@ai-sdk/ui-utils': 0.0.9(zod@4.2.1) - '@ai-sdk/vue': 0.0.11(vue@3.5.21(typescript@5.3.3))(zod@4.2.1) + '@ai-sdk/provider-utils': 1.0.0(zod@4.3.6) + '@ai-sdk/react': 0.0.16(react@19.1.1)(zod@4.3.6) + '@ai-sdk/solid': 0.0.11(zod@4.3.6) + '@ai-sdk/svelte': 0.0.12(svelte@5.39.0)(zod@4.3.6) + '@ai-sdk/ui-utils': 0.0.9(zod@4.3.6) + '@ai-sdk/vue': 0.0.11(vue@3.5.21(typescript@5.3.3))(zod@4.3.6) eventsource-parser: 1.1.2 json-schema: 0.4.0 jsondiffpatch: 0.6.0 nanoid: 3.3.6 secure-json-parse: 2.7.0 sswr: 2.1.0(svelte@5.39.0) - zod-to-json-schema: 3.22.5(zod@4.2.1) + zod-to-json-schema: 3.22.5(zod@4.3.6) optionalDependencies: - openai: 4.104.0(ws@8.18.3)(zod@4.2.1) + openai: 4.104.0(ws@8.18.3)(zod@4.3.6) react: 19.1.1 svelte: 5.39.0 - zod: 4.2.1 + zod: 4.3.6 transitivePeerDependencies: - solid-js - vue - ai@5.0.76(zod@4.2.1): + ai@5.0.76(zod@4.3.6): dependencies: - '@ai-sdk/gateway': 2.0.0(zod@4.2.1) + '@ai-sdk/gateway': 2.0.0(zod@4.3.6) '@ai-sdk/provider': 2.0.0 - '@ai-sdk/provider-utils': 3.0.12(zod@4.2.1) + '@ai-sdk/provider-utils': 3.0.12(zod@4.3.6) '@opentelemetry/api': 1.9.0 - zod: 4.2.1 + zod: 4.3.6 ai@6.0.37(zod@3.25.76): dependencies: @@ -10607,7 +10613,7 @@ snapshots: smol-toml: 1.6.0 strip-json-comments: 5.0.3 typescript: 5.5.4 - zod: 4.2.1 + zod: 4.3.6 langsmith@0.4.5(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.2.0(@opentelemetry/api@1.9.0))(openai@6.25.0(ws@8.18.3)(zod@3.25.76)): dependencies: @@ -11098,7 +11104,7 @@ snapshots: transitivePeerDependencies: - encoding - openai@4.104.0(ws@8.18.3)(zod@4.2.1): + openai@4.104.0(ws@8.18.3)(zod@4.3.6): dependencies: '@types/node': 18.19.123 '@types/node-fetch': 2.6.13 @@ -11109,7 +11115,7 @@ snapshots: node-fetch: 2.7.0 optionalDependencies: ws: 8.18.3 - zod: 4.2.1 + zod: 4.3.6 transitivePeerDependencies: - encoding optional: true @@ -12689,9 +12695,9 @@ snapshots: dependencies: zod: 3.25.76 - zod-to-json-schema@3.22.5(zod@4.2.1): + zod-to-json-schema@3.22.5(zod@4.3.6): dependencies: - zod: 4.2.1 + zod: 4.3.6 zod-to-json-schema@3.25.1(zod@3.25.76): dependencies: @@ -12706,3 +12712,5 @@ snapshots: zod@3.25.76: {} zod@4.2.1: {} + + zod@4.3.6: {} From d5c995c1b140c1455a1a7c3de2cfc3924eca52e9 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Mon, 23 Mar 2026 15:44:56 +0100 Subject: [PATCH 2/3] fix ts --- js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts b/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts index 62b072bbe..46aa20856 100644 --- a/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts +++ b/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts @@ -710,8 +710,7 @@ export class ClaudeAgentSDKPlugin extends BasePlugin { const params = (event.arguments[0] ?? {}) as ClaudeAgentSDKQueryParams; const originalPrompt = params.prompt; const options = params.options ?? {}; - const promptIsAsyncIterable = - isAsyncIterable(originalPrompt); + const promptIsAsyncIterable = isAsyncIterable(originalPrompt); let promptStarted = false; let capturedPromptMessages: ClaudeAgentSDKMessage[] | undefined; let resolvePromptDone: (() => void) | undefined; @@ -721,7 +720,8 @@ export class ClaudeAgentSDKPlugin extends BasePlugin { if (promptIsAsyncIterable) { capturedPromptMessages = []; - const promptStream = originalPrompt; + const promptStream = + originalPrompt as AsyncIterable; params.prompt = (async function* () { promptStarted = true; try { From 09c51fd44a484a25440ef34dfd7a47397531a0e2 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 24 Mar 2026 15:00:10 +0100 Subject: [PATCH 3/3] clean up e2e tests --- e2e/README.md | 2 +- .../claude-agent-sdk-trace-contract.ts | 287 -------------- e2e/helpers/claude-agent-sdk.ts | 44 --- .../mock-claude-agent-sdk/package.json | 9 - e2e/helpers/mock-claude-agent-sdk/sdk.mjs | 294 -------------- .../package.json | 16 - .../scenario.claude-agent-sdk-v0.1.mjs | 4 - .../scenario.claude-agent-sdk-v0.2.mjs | 4 - .../scenario.impl.mjs | 26 -- .../scenario.test.ts | 54 --- .../claude-agent-sdk-v0.1.span-events.json} | 0 ...claude-agent-sdk-v0.2.76.span-events.json} | 0 .../claude-agent-sdk-v0.2.79.span-events.json | 283 ++++++++++++++ .../claude-agent-sdk-v0.2.81.span-events.json | 283 ++++++++++++++ .../assertions.ts | 358 ++++++++++++++++++ .../package.json | 20 + .../pnpm-lock.yaml | 48 ++- .../scenario.claude-agent-sdk-v0.1.mjs | 5 + .../scenario.claude-agent-sdk-v0.1.ts | 5 + .../scenario.claude-agent-sdk-v0.2.76.mjs | 5 + .../scenario.claude-agent-sdk-v0.2.76.ts | 5 + .../scenario.claude-agent-sdk-v0.2.79.mjs | 5 + .../scenario.claude-agent-sdk-v0.2.79.ts | 5 + .../scenario.claude-agent-sdk-v0.2.81.mjs | 5 + .../scenario.claude-agent-sdk-v0.2.81.ts | 5 + .../scenario.impl.mjs} | 33 +- .../scenario.test.ts | 83 ++++ .../wrap-claude-agent-sdk-traces/package.json | 16 - .../pnpm-lock.yaml | 357 ----------------- .../scenario.claude-agent-sdk-v0.1.ts | 5 - .../scenario.claude-agent-sdk-v0.2.ts | 5 - .../scenario.impl.ts | 12 - .../scenario.test.ts | 53 --- 33 files changed, 1139 insertions(+), 1197 deletions(-) delete mode 100644 e2e/helpers/claude-agent-sdk-trace-contract.ts delete mode 100644 e2e/helpers/claude-agent-sdk.ts delete mode 100644 e2e/helpers/mock-claude-agent-sdk/package.json delete mode 100644 e2e/helpers/mock-claude-agent-sdk/sdk.mjs delete mode 100644 e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/package.json delete mode 100644 e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.1.mjs delete mode 100644 e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.2.mjs delete mode 100644 e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.impl.mjs delete mode 100644 e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.test.ts rename e2e/{helpers/__snapshots__/claude-agent-sdk-v0.1.claude-agent-sdk.span-events.json => scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.1.span-events.json} (100%) rename e2e/{helpers/__snapshots__/claude-agent-sdk-v0.2.claude-agent-sdk.span-events.json => scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.76.span-events.json} (100%) create mode 100644 e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json create mode 100644 e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json create mode 100644 e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts create mode 100644 e2e/scenarios/claude-agent-sdk-instrumentation/package.json rename e2e/scenarios/{claude-agent-sdk-auto-instrumentation-node-hook => claude-agent-sdk-instrumentation}/pnpm-lock.yaml (86%) create mode 100644 e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.1.mjs create mode 100644 e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.1.ts create mode 100644 e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.76.mjs create mode 100644 e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.76.ts create mode 100644 e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.79.mjs create mode 100644 e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.79.ts create mode 100644 e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.81.mjs create mode 100644 e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.81.ts rename e2e/{helpers/claude-agent-sdk-scenario.mjs => scenarios/claude-agent-sdk-instrumentation/scenario.impl.mjs} (83%) create mode 100644 e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts delete mode 100644 e2e/scenarios/wrap-claude-agent-sdk-traces/package.json delete mode 100644 e2e/scenarios/wrap-claude-agent-sdk-traces/pnpm-lock.yaml delete mode 100644 e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.1.ts delete mode 100644 e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.2.ts delete mode 100644 e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.impl.ts delete mode 100644 e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.test.ts diff --git a/e2e/README.md b/e2e/README.md index 25efd5b5d..b8a98405b 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -117,7 +117,7 @@ Some wrappers execute inside a nested test runner rather than a single SDK call. - `GEMINI_API_KEY` or `GOOGLE_API_KEY` - `OPENROUTER_API_KEY` -`wrap-claude-agent-sdk-traces` also uses `ANTHROPIC_API_KEY`, because it runs the real Claude Agent SDK against Anthropic in the same style as the existing live Anthropic wrapper coverage. +`claude-agent-sdk-instrumentation` also uses `ANTHROPIC_API_KEY`, because it runs the real Claude Agent SDK against Anthropic in the same style as the existing live Anthropic wrapper coverage. ### Scenario-local `package.json` diff --git a/e2e/helpers/claude-agent-sdk-trace-contract.ts b/e2e/helpers/claude-agent-sdk-trace-contract.ts deleted file mode 100644 index 4975bc1d6..000000000 --- a/e2e/helpers/claude-agent-sdk-trace-contract.ts +++ /dev/null @@ -1,287 +0,0 @@ -import { expect } from "vitest"; -import { resolveFileSnapshotPath } from "./file-snapshot"; -import { normalizeForSnapshot, type Json } from "./normalize"; -import type { CapturedLogEvent } from "./mock-braintrust-server"; -import { - findAllSpans, - findChildSpans, - findLatestSpan, -} from "./trace-selectors"; -import { summarizeWrapperContract } from "./wrapper-contract"; - -const SNAPSHOT_METADATA_KEYS = [ - "provider", - "model", - "operation", - "scenario", - "mcp.server", - "gen_ai.tool.name", -] as const; -const OMITTED_METRIC_KEYS = new Set([ - "prompt_cached_tokens", - "prompt_cache_creation_tokens", -]); -const SNAPSHOT_ROOT_NAME = "claude-agent-sdk-root"; -const SNAPSHOT_SCENARIO_NAME = "claude-agent-sdk-traces"; - -export function resolveClaudeAgentSDKSpanSnapshotPath( - dependencyName: string, -): string { - return resolveFileSnapshotPath( - import.meta.url, - `${dependencyName}.claude-agent-sdk.span-events.json`, - ); -} - -function summarizeSpan( - event: CapturedLogEvent | undefined, - overrides?: { - metadata?: Json; - name?: string | null; - }, -): Json { - if (!event) { - return null; - } - - const summary = summarizeWrapperContract(event, [ - ...SNAPSHOT_METADATA_KEYS, - ]) as Record; - const metricKeys = Array.isArray(summary.metric_keys) - ? summary.metric_keys.filter( - (key): key is string => - typeof key === "string" && !OMITTED_METRIC_KEYS.has(key), - ) - : summary.metric_keys; - const input = event.input as - | Array<{ content?: string; message?: { content?: string } }> - | undefined; - const inputContents = - Array.isArray(input) && - input - .map((item) => item.message?.content ?? item.content) - .filter((content): content is string => typeof content === "string"); - - if (overrides?.metadata !== undefined) { - summary.metadata = overrides.metadata; - } - if (overrides?.name !== undefined) { - summary.name = overrides.name; - } - if (typeof event.row.error === "string") { - summary.error = event.row.error; - } - if (metricKeys !== undefined) { - summary.metric_keys = metricKeys; - } - if (Array.isArray(inputContents) && inputContents.length > 0) { - summary.input_contents = inputContents; - } - - return summary; -} - -export function assertClaudeAgentSDKTraceContract(options: { - capturedEvents: CapturedLogEvent[]; - rootName: string; - scenarioName: string; -}): { - refs: { - asyncPromptOperation: CapturedLogEvent | undefined; - asyncPromptLlm: CapturedLogEvent | undefined; - asyncPromptTask: CapturedLogEvent | undefined; - basicOperation: CapturedLogEvent | undefined; - basicLlm: CapturedLogEvent | undefined; - basicTask: CapturedLogEvent | undefined; - basicTool: CapturedLogEvent | undefined; - failureOperation: CapturedLogEvent | undefined; - failureLlm: CapturedLogEvent | undefined; - failureTask: CapturedLogEvent | undefined; - failureTool: CapturedLogEvent | undefined; - root: CapturedLogEvent | undefined; - subAgentOperation: CapturedLogEvent | undefined; - subAgentLlm: CapturedLogEvent | undefined; - subAgentTask: CapturedLogEvent | undefined; - subAgentTaskRoot: CapturedLogEvent | undefined; - subAgentTool: CapturedLogEvent | undefined; - }; - spanSummary: Json; -} { - const root = findLatestSpan(options.capturedEvents, options.rootName); - const basicOperation = findLatestSpan( - options.capturedEvents, - "claude-agent-basic-operation", - ); - const asyncPromptOperation = findLatestSpan( - options.capturedEvents, - "claude-agent-async-prompt-operation", - ); - const subAgentOperation = findLatestSpan( - options.capturedEvents, - "claude-agent-subagent-operation", - ); - const failureOperation = findLatestSpan( - options.capturedEvents, - "claude-agent-failure-operation", - ); - - expect(root).toBeDefined(); - expect(root?.row.metadata).toMatchObject({ - scenario: options.scenarioName, - }); - - for (const operation of [ - basicOperation, - asyncPromptOperation, - subAgentOperation, - failureOperation, - ]) { - expect(operation).toBeDefined(); - expect(operation?.span.parentIds).toEqual([root?.span.id ?? ""]); - } - - const basicTask = findChildSpans( - options.capturedEvents, - "Claude Agent", - basicOperation?.span.id, - ).at(-1); - const asyncPromptTask = findChildSpans( - options.capturedEvents, - "Claude Agent", - asyncPromptOperation?.span.id, - ).at(-1); - const subAgentTaskRoot = findChildSpans( - options.capturedEvents, - "Claude Agent", - subAgentOperation?.span.id, - ).at(-1); - const failureTask = findChildSpans( - options.capturedEvents, - "Claude Agent", - failureOperation?.span.id, - ).at(-1); - - expect(basicTask).toBeDefined(); - expect(asyncPromptTask).toBeDefined(); - expect(subAgentTaskRoot).toBeDefined(); - expect(failureTask).toBeDefined(); - - const basicLlm = findChildSpans( - options.capturedEvents, - "anthropic.messages.create", - basicTask?.span.id, - ).at(-1); - const asyncPromptLlm = findChildSpans( - options.capturedEvents, - "anthropic.messages.create", - asyncPromptTask?.span.id, - ).find((event) => { - const input = event.input as Array<{ content?: string }> | undefined; - return Array.isArray(input) && input.some((item) => item.content); - }); - const subAgentLlm = findAllSpans( - options.capturedEvents, - "anthropic.messages.create", - ).find((event) => - event.span.parentIds.includes(subAgentTaskRoot?.span.id ?? ""), - ); - const failureLlm = findChildSpans( - options.capturedEvents, - "anthropic.messages.create", - failureTask?.span.id, - ).at(-1); - - expect(basicLlm).toBeDefined(); - expect(subAgentLlm).toBeDefined(); - expect(failureLlm).toBeDefined(); - - if (asyncPromptLlm) { - const asyncPromptLlmInput = asyncPromptLlm.input as - | Array<{ content?: string }> - | undefined; - expect(asyncPromptLlmInput?.map((item) => item.content)).toEqual([ - "Part 1", - "Part 2", - ]); - } - - const basicTool = findAllSpans( - options.capturedEvents, - "tool: calculator/calculator", - ).find((event) => event.span.parentIds.includes(basicTask?.span.id ?? "")); - - const subAgentTask = options.capturedEvents.find( - (event) => - event.span.type === "task" && - event.span.parentIds.includes(subAgentTaskRoot?.span.id ?? "") && - event.span.name?.startsWith("Agent:"), - ); - const subAgentTool = findAllSpans( - options.capturedEvents, - "tool: calculator/calculator", - ).find((event) => event.span.parentIds.includes(subAgentTask?.span.id ?? "")); - const failureTool = findAllSpans( - options.capturedEvents, - "tool: calculator/calculator", - ).find((event) => event.span.parentIds.includes(failureTask?.span.id ?? "")); - - if (subAgentTool && subAgentTaskRoot) { - expect(subAgentTool.span.parentIds).not.toContain(subAgentTaskRoot.span.id); - } - - if (failureTool) { - expect(failureTool.row.error).toBe("division by zero"); - } - - return { - refs: { - asyncPromptOperation, - asyncPromptLlm, - asyncPromptTask, - basicOperation, - basicLlm, - basicTask, - basicTool, - failureOperation, - failureLlm, - failureTask, - failureTool, - root, - subAgentOperation, - subAgentLlm, - subAgentTask, - subAgentTaskRoot, - subAgentTool, - }, - spanSummary: normalizeForSnapshot({ - async_prompt: { - llm: summarizeSpan(asyncPromptLlm), - operation: summarizeSpan(asyncPromptOperation), - task: summarizeSpan(asyncPromptTask), - }, - basic: { - llm: summarizeSpan(basicLlm), - operation: summarizeSpan(basicOperation), - task: summarizeSpan(basicTask), - tool: summarizeSpan(basicTool), - }, - failure: { - llm: summarizeSpan(failureLlm), - operation: summarizeSpan(failureOperation), - task: summarizeSpan(failureTask), - tool: summarizeSpan(failureTool), - }, - root: summarizeSpan(root, { - metadata: { scenario: SNAPSHOT_SCENARIO_NAME }, - name: SNAPSHOT_ROOT_NAME, - }), - subagent: { - llm: summarizeSpan(subAgentLlm), - nested_task: summarizeSpan(subAgentTask), - operation: summarizeSpan(subAgentOperation), - task_root: summarizeSpan(subAgentTaskRoot), - tool: summarizeSpan(subAgentTool), - }, - } as Json), - }; -} diff --git a/e2e/helpers/claude-agent-sdk.ts b/e2e/helpers/claude-agent-sdk.ts deleted file mode 100644 index af33cd951..000000000 --- a/e2e/helpers/claude-agent-sdk.ts +++ /dev/null @@ -1,44 +0,0 @@ -import { readInstalledPackageVersion } from "./scenario-installer"; - -interface ClaudeAgentSDKScenario { - dependencyName: string; - entry: string; - version: string; -} - -const CLAUDE_AGENT_SDK_VERSION_SPECS = [ - { - dependencyName: "claude-agent-sdk-v0.1", - suffix: "v0.1", - }, - { - dependencyName: "claude-agent-sdk-v0.2", - suffix: "v0.2", - }, -] as const; - -export const CLAUDE_AGENT_SDK_SCENARIO_TIMEOUT_MS = 120_000; - -export async function getWrapClaudeAgentSDKScenarios( - scenarioDir: string, -): Promise { - return await Promise.all( - CLAUDE_AGENT_SDK_VERSION_SPECS.map(async ({ dependencyName, suffix }) => ({ - dependencyName, - entry: `scenario.claude-agent-sdk-${suffix}.ts`, - version: await readInstalledPackageVersion(scenarioDir, dependencyName), - })), - ); -} - -export async function getClaudeAgentSDKAutoHookScenarios( - scenarioDir: string, -): Promise { - return await Promise.all( - CLAUDE_AGENT_SDK_VERSION_SPECS.map(async ({ dependencyName, suffix }) => ({ - dependencyName, - entry: `scenario.claude-agent-sdk-${suffix}.mjs`, - version: await readInstalledPackageVersion(scenarioDir, dependencyName), - })), - ); -} diff --git a/e2e/helpers/mock-claude-agent-sdk/package.json b/e2e/helpers/mock-claude-agent-sdk/package.json deleted file mode 100644 index 84d0a81f3..000000000 --- a/e2e/helpers/mock-claude-agent-sdk/package.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "name": "@anthropic-ai/claude-agent-sdk", - "private": true, - "version": "0.1.0", - "type": "module", - "exports": { - ".": "./sdk.mjs" - } -} diff --git a/e2e/helpers/mock-claude-agent-sdk/sdk.mjs b/e2e/helpers/mock-claude-agent-sdk/sdk.mjs deleted file mode 100644 index ac11aed1c..000000000 --- a/e2e/helpers/mock-claude-agent-sdk/sdk.mjs +++ /dev/null @@ -1,294 +0,0 @@ -function getHooks(options, eventName) { - return (options?.hooks?.[eventName] ?? []).flatMap((entry) => entry.hooks); -} - -async function invokeHooks(options, eventName, input, toolUseId) { - const signal = new AbortController().signal; - for (const hook of getHooks(options, eventName)) { - await hook(input, toolUseId, { signal }); - } -} - -function makeAssistantMessage(args) { - return { - parent_tool_use_id: args.parentToolUseId ?? null, - type: "assistant", - message: { - content: args.content, - id: args.id, - model: args.model ?? "claude-e2e-mock", - role: "assistant", - usage: { - input_tokens: 8, - output_tokens: args.outputTokens ?? 4, - }, - }, - }; -} - -function makeResultMessage(outputTokens = 4) { - return { - num_turns: 1, - type: "result", - usage: { - input_tokens: 8, - output_tokens: outputTokens, - }, - }; -} - -export function query(params) { - const { options } = params; - - return (async function* () { - if (options?.agents) { - const taskToolUseId = "task-tool-1"; - const subToolUseId = "sub-tool-1"; - - yield makeAssistantMessage({ - content: [ - { - id: taskToolUseId, - input: { - subagent_type: "math-expert", - }, - name: "Task", - type: "tool_use", - }, - ], - id: "root-assistant-1", - }); - - yield makeAssistantMessage({ - content: [ - { - id: subToolUseId, - input: { - a: 15, - b: 27, - operation: "add", - }, - name: "mcp__calculator__calculator", - type: "tool_use", - }, - ], - id: "sub-assistant-1", - parentToolUseId: taskToolUseId, - }); - - await invokeHooks( - options, - "PreToolUse", - { - cwd: "/tmp", - hook_event_name: "PreToolUse", - session_id: "session-subagent", - tool_input: { - a: 15, - b: 27, - operation: "add", - }, - tool_name: "mcp__calculator__calculator", - transcript_path: "/tmp/transcript", - }, - subToolUseId, - ); - - await invokeHooks( - options, - "PostToolUse", - { - cwd: "/tmp", - hook_event_name: "PostToolUse", - session_id: "session-subagent", - tool_input: { - a: 15, - b: 27, - operation: "add", - }, - tool_name: "mcp__calculator__calculator", - tool_response: { - content: [{ text: "add(15, 27) = 42", type: "text" }], - }, - transcript_path: "/tmp/transcript", - }, - subToolUseId, - ); - - yield makeAssistantMessage({ - content: [{ text: "The answer is 42.", type: "text" }], - id: "sub-assistant-2", - parentToolUseId: taskToolUseId, - }); - - await invokeHooks( - options, - "PostToolUse", - { - cwd: "/tmp", - hook_event_name: "PostToolUse", - session_id: "session-subagent", - tool_input: { - description: "delegate to math expert", - }, - tool_name: "Task", - tool_response: { - content: "42", - status: "success", - totalDurationMs: 1, - totalToolUseCount: 1, - }, - transcript_path: "/tmp/transcript", - }, - taskToolUseId, - ); - - yield makeResultMessage(); - return; - } - - if (params.prompt && typeof params.prompt !== "string") { - for await (const _message of params.prompt) { - } - - yield makeAssistantMessage({ - content: [{ text: "Combined async prompt response", type: "text" }], - id: "async-assistant-1", - }); - yield makeResultMessage(); - return; - } - - if (typeof params.prompt === "string" && params.prompt.includes("FAIL")) { - const toolUseId = "failure-tool-1"; - - yield makeAssistantMessage({ - content: [ - { - id: toolUseId, - input: { - a: 2, - b: 0, - operation: "divide", - }, - name: "mcp__calculator__calculator", - type: "tool_use", - }, - ], - id: "failure-assistant-1", - }); - - await invokeHooks( - options, - "PreToolUse", - { - cwd: "/tmp", - hook_event_name: "PreToolUse", - session_id: "session-failure", - tool_input: { - a: 2, - b: 0, - operation: "divide", - }, - tool_name: "mcp__calculator__calculator", - transcript_path: "/tmp/transcript", - }, - toolUseId, - ); - - await invokeHooks( - options, - "PostToolUseFailure", - { - cwd: "/tmp", - error: "division by zero", - hook_event_name: "PostToolUseFailure", - is_interrupt: false, - session_id: "session-failure", - tool_input: { - a: 2, - b: 0, - operation: "divide", - }, - tool_name: "mcp__calculator__calculator", - transcript_path: "/tmp/transcript", - }, - toolUseId, - ); - - yield makeResultMessage(); - return; - } - - const toolUseId = "basic-tool-1"; - - yield makeAssistantMessage({ - content: [ - { - id: toolUseId, - input: { - a: 15, - b: 7, - operation: "multiply", - }, - name: "mcp__calculator__calculator", - type: "tool_use", - }, - ], - id: "basic-assistant-1", - }); - - await invokeHooks( - options, - "PreToolUse", - { - cwd: "/tmp", - hook_event_name: "PreToolUse", - session_id: "session-basic", - tool_input: { - a: 15, - b: 7, - operation: "multiply", - }, - tool_name: "mcp__calculator__calculator", - transcript_path: "/tmp/transcript", - }, - toolUseId, - ); - - await invokeHooks( - options, - "PostToolUse", - { - cwd: "/tmp", - hook_event_name: "PostToolUse", - session_id: "session-basic", - tool_input: { - a: 15, - b: 7, - operation: "multiply", - }, - tool_name: "mcp__calculator__calculator", - tool_response: { - content: [{ text: "multiply(15, 7) = 105", type: "text" }], - }, - transcript_path: "/tmp/transcript", - }, - toolUseId, - ); - - yield makeAssistantMessage({ - content: [{ text: "105 minus 5 is 100.", type: "text" }], - id: "basic-assistant-2", - }); - yield makeResultMessage(); - })(); -} - -export function createSdkMcpServer(config) { - return config; -} - -export function tool(_name, _description, _schema, handler) { - return handler; -} diff --git a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/package.json b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/package.json deleted file mode 100644 index 048c14a87..000000000 --- a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/package.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "name": "@braintrust/e2e-claude-agent-sdk-auto-instrumentation-node-hook", - "private": true, - "braintrustScenario": { - "canary": { - "dependencies": { - "claude-agent-sdk-v0.1": "@anthropic-ai/claude-agent-sdk@0.1", - "claude-agent-sdk-v0.2": "@anthropic-ai/claude-agent-sdk@0.2" - } - } - }, - "dependencies": { - "claude-agent-sdk-v0.1": "npm:@anthropic-ai/claude-agent-sdk@0.1.77", - "claude-agent-sdk-v0.2": "npm:@anthropic-ai/claude-agent-sdk@0.2.81" - } -} diff --git a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.1.mjs b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.1.mjs deleted file mode 100644 index e29ff853d..000000000 --- a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.1.mjs +++ /dev/null @@ -1,4 +0,0 @@ -import * as claudeAgentSDK from "claude-agent-sdk-v0.1"; -import { runClaudeAgentSDKAutoInstrumentationNodeHookOrExit } from "./scenario.impl.mjs"; - -runClaudeAgentSDKAutoInstrumentationNodeHookOrExit(claudeAgentSDK); diff --git a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.2.mjs b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.2.mjs deleted file mode 100644 index b14310049..000000000 --- a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.claude-agent-sdk-v0.2.mjs +++ /dev/null @@ -1,4 +0,0 @@ -import * as claudeAgentSDK from "claude-agent-sdk-v0.2"; -import { runClaudeAgentSDKAutoInstrumentationNodeHookOrExit } from "./scenario.impl.mjs"; - -runClaudeAgentSDKAutoInstrumentationNodeHookOrExit(claudeAgentSDK); diff --git a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.impl.mjs b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.impl.mjs deleted file mode 100644 index 1efd8cf7d..000000000 --- a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.impl.mjs +++ /dev/null @@ -1,26 +0,0 @@ -import { - getInstalledPackageVersion, - runMain, -} from "../../helpers/provider-runtime.mjs"; -import { runClaudeAgentSDKScenario } from "../../helpers/claude-agent-sdk-scenario.mjs"; - -export { getInstalledPackageVersion }; - -export async function runClaudeAgentSDKAutoInstrumentationNodeHook( - claudeAgentSDK, -) { - await runClaudeAgentSDKScenario({ - projectNameBase: "e2e-claude-agent-sdk-auto-instrumentation-hook", - rootName: "claude-agent-sdk-auto-hook-root", - scenarioName: "claude-agent-sdk-auto-instrumentation-node-hook", - sdk: claudeAgentSDK, - }); -} - -export function runClaudeAgentSDKAutoInstrumentationNodeHookOrExit( - claudeAgentSDK, -) { - runMain(async () => - runClaudeAgentSDKAutoInstrumentationNodeHook(claudeAgentSDK), - ); -} diff --git a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.test.ts b/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.test.ts deleted file mode 100644 index 3a0af2596..000000000 --- a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/scenario.test.ts +++ /dev/null @@ -1,54 +0,0 @@ -import { expect, test } from "vitest"; -import { - assertClaudeAgentSDKTraceContract, - resolveClaudeAgentSDKSpanSnapshotPath, -} from "../../helpers/claude-agent-sdk-trace-contract"; -import { - CLAUDE_AGENT_SDK_SCENARIO_TIMEOUT_MS, - getClaudeAgentSDKAutoHookScenarios, -} from "../../helpers/claude-agent-sdk"; -import { formatJsonFileSnapshot } from "../../helpers/file-snapshot"; -import { - prepareScenarioDir, - resolveScenarioDir, - withScenarioHarness, -} from "../../helpers/scenario-harness"; -import { E2E_TAGS } from "../../helpers/tags"; - -const scenarioDir = await prepareScenarioDir({ - scenarioDir: resolveScenarioDir(import.meta.url), -}); -const autoHookClaudeAgentSDKScenarios = - await getClaudeAgentSDKAutoHookScenarios(scenarioDir); - -for (const scenario of autoHookClaudeAgentSDKScenarios) { - test( - `claude agent sdk auto-instrumentation via node hook collects the shared claude agent trace contract (claude-agent-sdk ${scenario.version})`, - { - tags: [E2E_TAGS.externalApi], - timeout: CLAUDE_AGENT_SDK_SCENARIO_TIMEOUT_MS, - }, - async () => { - await withScenarioHarness(async ({ events, runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.entry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - scenarioDir, - timeoutMs: CLAUDE_AGENT_SDK_SCENARIO_TIMEOUT_MS, - }); - - const contract = assertClaudeAgentSDKTraceContract({ - capturedEvents: events(), - rootName: "claude-agent-sdk-auto-hook-root", - scenarioName: "claude-agent-sdk-auto-instrumentation-node-hook", - }); - - await expect( - formatJsonFileSnapshot(contract.spanSummary), - ).toMatchFileSnapshot( - resolveClaudeAgentSDKSpanSnapshotPath(scenario.dependencyName), - ); - }); - }, - ); -} diff --git a/e2e/helpers/__snapshots__/claude-agent-sdk-v0.1.claude-agent-sdk.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.1.span-events.json similarity index 100% rename from e2e/helpers/__snapshots__/claude-agent-sdk-v0.1.claude-agent-sdk.span-events.json rename to e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.1.span-events.json diff --git a/e2e/helpers/__snapshots__/claude-agent-sdk-v0.2.claude-agent-sdk.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.76.span-events.json similarity index 100% rename from e2e/helpers/__snapshots__/claude-agent-sdk-v0.2.claude-agent-sdk.span-events.json rename to e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.76.span-events.json diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json new file mode 100644 index 000000000..ce4c4a2e6 --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json @@ -0,0 +1,283 @@ +{ + "async_prompt": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Part 1", + "Part 2" + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "async-prompt" + }, + "metric_keys": [], + "name": "claude-agent-async-prompt-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Part 1", + "Part 2" + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + } + }, + "basic": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Use the calculator tool to multiply 15 by 7. Do not answer from memory." + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "basic" + }, + "metric_keys": [], + "name": "claude-agent-basic-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "tool": { + "has_input": true, + "has_output": true, + "metadata": { + "gen_ai.tool.name": "calculator", + "mcp.server": "calculator" + }, + "metric_keys": [], + "name": "tool: calculator/calculator", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + } + }, + "failure": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Use the calculator tool to divide 2 by 0. Do not recover from the error." + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "failure" + }, + "metric_keys": [], + "name": "claude-agent-failure-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "tool": { + "error": "division by zero", + "has_input": true, + "has_output": false, + "metadata": { + "gen_ai.tool.name": "calculator", + "mcp.server": "calculator" + }, + "metric_keys": [], + "name": "tool: calculator/calculator", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + } + }, + "root": { + "has_input": false, + "has_output": false, + "metadata": { + "scenario": "claude-agent-sdk-traces" + }, + "metric_keys": [], + "name": "claude-agent-sdk-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task" + }, + "subagent": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Spawn a math-expert subagent to add 15 and 27 using the calculator tool. Report the result. Do not solve it yourself." + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "nested_task": { + "has_input": false, + "has_output": false, + "metadata": null, + "metric_keys": [], + "name": "Agent: sub-agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "subagent" + }, + "metric_keys": [], + "name": "claude-agent-subagent-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task_root": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "tool": null + } +} diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json new file mode 100644 index 000000000..ce4c4a2e6 --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json @@ -0,0 +1,283 @@ +{ + "async_prompt": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Part 1", + "Part 2" + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "async-prompt" + }, + "metric_keys": [], + "name": "claude-agent-async-prompt-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Part 1", + "Part 2" + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + } + }, + "basic": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Use the calculator tool to multiply 15 by 7. Do not answer from memory." + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "basic" + }, + "metric_keys": [], + "name": "claude-agent-basic-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "tool": { + "has_input": true, + "has_output": true, + "metadata": { + "gen_ai.tool.name": "calculator", + "mcp.server": "calculator" + }, + "metric_keys": [], + "name": "tool: calculator/calculator", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + } + }, + "failure": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Use the calculator tool to divide 2 by 0. Do not recover from the error." + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "failure" + }, + "metric_keys": [], + "name": "claude-agent-failure-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "tool": { + "error": "division by zero", + "has_input": true, + "has_output": false, + "metadata": { + "gen_ai.tool.name": "calculator", + "mcp.server": "calculator" + }, + "metric_keys": [], + "name": "tool: calculator/calculator", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + } + }, + "root": { + "has_input": false, + "has_output": false, + "metadata": { + "scenario": "claude-agent-sdk-traces" + }, + "metric_keys": [], + "name": "claude-agent-sdk-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task" + }, + "subagent": { + "llm": { + "has_input": true, + "has_output": true, + "input_contents": [ + "Spawn a math-expert subagent to add 15 and 27 using the calculator tool. Report the result. Do not solve it yourself." + ], + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "tokens" + ], + "name": "anthropic.messages.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + "nested_task": { + "has_input": false, + "has_output": false, + "metadata": null, + "metric_keys": [], + "name": "Agent: sub-agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "subagent" + }, + "metric_keys": [], + "name": "claude-agent-subagent-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task_root": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "claude-haiku-4-5-20251001" + }, + "metric_keys": [], + "name": "Claude Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "tool": null + } +} diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts new file mode 100644 index 000000000..507bf2ccc --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts @@ -0,0 +1,358 @@ +import { beforeAll, describe, expect, test } from "vitest"; +import { normalizeForSnapshot, type Json } from "../../helpers/normalize"; +import type { CapturedLogEvent } from "../../helpers/mock-braintrust-server"; +import { + formatJsonFileSnapshot, + resolveFileSnapshotPath, +} from "../../helpers/file-snapshot"; +import { withScenarioHarness } from "../../helpers/scenario-harness"; +import { + findAllSpans, + findChildSpans, + findLatestSpan, +} from "../../helpers/trace-selectors"; +import { E2E_TAGS } from "../../helpers/tags"; +import { summarizeWrapperContract } from "../../helpers/wrapper-contract"; +import { ROOT_NAME, SCENARIO_NAME } from "./scenario.impl.mjs"; + +type RunClaudeAgentSDKScenario = (harness: { + runNodeScenarioDir: (options: { + entry: string; + nodeArgs: string[]; + scenarioDir: string; + timeoutMs: number; + }) => Promise; + runScenarioDir: (options: { + entry: string; + scenarioDir: string; + timeoutMs: number; + }) => Promise; +}) => Promise; + +const SNAPSHOT_METADATA_KEYS = [ + "provider", + "model", + "operation", + "scenario", + "mcp.server", + "gen_ai.tool.name", +] as const; +const OMITTED_METRIC_KEYS = new Set([ + "prompt_cached_tokens", + "prompt_cache_creation_tokens", +]); + +function summarizeSpan( + event: CapturedLogEvent | undefined, + overrides?: { + metadata?: Json; + name?: string | null; + }, +): Json { + if (!event) { + return null; + } + + const summary = summarizeWrapperContract(event, [ + ...SNAPSHOT_METADATA_KEYS, + ]) as Record; + const metricKeys = Array.isArray(summary.metric_keys) + ? summary.metric_keys.filter( + (key): key is string => + typeof key === "string" && !OMITTED_METRIC_KEYS.has(key), + ) + : summary.metric_keys; + const input = event.input as + | Array<{ content?: string; message?: { content?: string } }> + | undefined; + const inputContents = + Array.isArray(input) && + input + .map((item) => item.message?.content ?? item.content) + .filter((content): content is string => typeof content === "string"); + + if (overrides?.metadata !== undefined) { + summary.metadata = overrides.metadata; + } + if (overrides?.name !== undefined) { + summary.name = overrides.name; + } + if (typeof event.row.error === "string") { + summary.error = event.row.error; + } + if (metricKeys !== undefined) { + summary.metric_keys = metricKeys; + } + if (Array.isArray(inputContents) && inputContents.length > 0) { + summary.input_contents = inputContents; + } + + return summary; +} + +function buildSpanSummary(events: CapturedLogEvent[]): Json { + const root = findLatestSpan(events, ROOT_NAME); + const basicOperation = findLatestSpan(events, "claude-agent-basic-operation"); + const asyncPromptOperation = findLatestSpan( + events, + "claude-agent-async-prompt-operation", + ); + const subAgentOperation = findLatestSpan( + events, + "claude-agent-subagent-operation", + ); + const failureOperation = findLatestSpan( + events, + "claude-agent-failure-operation", + ); + + const basicTask = findChildSpans( + events, + "Claude Agent", + basicOperation?.span.id, + ).at(-1); + const asyncPromptTask = findChildSpans( + events, + "Claude Agent", + asyncPromptOperation?.span.id, + ).at(-1); + const subAgentTaskRoot = findChildSpans( + events, + "Claude Agent", + subAgentOperation?.span.id, + ).at(-1); + const failureTask = findChildSpans( + events, + "Claude Agent", + failureOperation?.span.id, + ).at(-1); + + const basicLlm = findChildSpans( + events, + "anthropic.messages.create", + basicTask?.span.id, + ).at(-1); + const asyncPromptLlm = findChildSpans( + events, + "anthropic.messages.create", + asyncPromptTask?.span.id, + ).find((event) => { + const input = event.input as Array<{ content?: string }> | undefined; + return Array.isArray(input) && input.some((item) => item.content); + }); + const subAgentLlm = findAllSpans(events, "anthropic.messages.create").find( + (event) => event.span.parentIds.includes(subAgentTaskRoot?.span.id ?? ""), + ); + const failureLlm = findChildSpans( + events, + "anthropic.messages.create", + failureTask?.span.id, + ).at(-1); + + const basicTool = findAllSpans(events, "tool: calculator/calculator").find( + (event) => event.span.parentIds.includes(basicTask?.span.id ?? ""), + ); + const subAgentTask = events.find( + (event) => + event.span.type === "task" && + event.span.parentIds.includes(subAgentTaskRoot?.span.id ?? "") && + event.span.name?.startsWith("Agent:"), + ); + const subAgentTool = findAllSpans(events, "tool: calculator/calculator").find( + (event) => event.span.parentIds.includes(subAgentTask?.span.id ?? ""), + ); + const failureTool = findAllSpans(events, "tool: calculator/calculator").find( + (event) => event.span.parentIds.includes(failureTask?.span.id ?? ""), + ); + + return normalizeForSnapshot({ + async_prompt: { + llm: summarizeSpan(asyncPromptLlm), + operation: summarizeSpan(asyncPromptOperation), + task: summarizeSpan(asyncPromptTask), + }, + basic: { + llm: summarizeSpan(basicLlm), + operation: summarizeSpan(basicOperation), + task: summarizeSpan(basicTask), + tool: summarizeSpan(basicTool), + }, + failure: { + llm: summarizeSpan(failureLlm), + operation: summarizeSpan(failureOperation), + task: summarizeSpan(failureTask), + tool: summarizeSpan(failureTool), + }, + root: summarizeSpan(root), + subagent: { + llm: summarizeSpan(subAgentLlm), + nested_task: summarizeSpan(subAgentTask), + operation: summarizeSpan(subAgentOperation), + task_root: summarizeSpan(subAgentTaskRoot), + tool: summarizeSpan(subAgentTool), + }, + } as Json); +} + +export function defineClaudeAgentSDKInstrumentationAssertions(options: { + name: string; + runScenario: RunClaudeAgentSDKScenario; + snapshotName: string; + testFileUrl: string; + timeoutMs: number; +}): void { + const snapshotPath = resolveFileSnapshotPath( + options.testFileUrl, + `${options.snapshotName}.span-events.json`, + ); + const testConfig = { + tags: [E2E_TAGS.externalApi], + timeout: options.timeoutMs, + }; + + describe(options.name, () => { + let events: CapturedLogEvent[] = []; + + beforeAll(async () => { + await withScenarioHarness(async (harness) => { + await options.runScenario(harness); + events = harness.events(); + }); + }, options.timeoutMs); + + test("captures the root trace for the scenario", testConfig, () => { + const root = findLatestSpan(events, ROOT_NAME); + + expect(root).toBeDefined(); + expect(root?.row.metadata).toMatchObject({ + scenario: SCENARIO_NAME, + }); + }); + + test("captures tool-backed task and llm spans", testConfig, () => { + const root = findLatestSpan(events, ROOT_NAME); + const operation = findLatestSpan(events, "claude-agent-basic-operation"); + const task = findChildSpans( + events, + "Claude Agent", + operation?.span.id, + ).at(-1); + const llm = findChildSpans( + events, + "anthropic.messages.create", + task?.span.id, + ).at(-1); + const tool = findAllSpans(events, "tool: calculator/calculator").find( + (event) => event.span.parentIds.includes(task?.span.id ?? ""), + ); + + expect(operation).toBeDefined(); + expect(task).toBeDefined(); + expect(llm).toBeDefined(); + expect(tool).toBeDefined(); + expect(operation?.span.parentIds).toEqual([root?.span.id ?? ""]); + expect(tool?.span.parentIds).toEqual([task?.span.id ?? ""]); + }); + + test( + "captures async prompt input on both task and llm spans", + testConfig, + () => { + const operation = findLatestSpan( + events, + "claude-agent-async-prompt-operation", + ); + const task = findChildSpans( + events, + "Claude Agent", + operation?.span.id, + ).at(-1); + const llm = findChildSpans( + events, + "anthropic.messages.create", + task?.span.id, + ).find((event) => { + const input = event.input as Array<{ content?: string }> | undefined; + return Array.isArray(input) && input.some((item) => item.content); + }); + + expect(operation).toBeDefined(); + expect(task).toBeDefined(); + expect(task?.input).toMatchObject([ + { message: { content: "Part 1" } }, + { message: { content: "Part 2" } }, + ]); + expect(llm?.input).toMatchObject([ + { content: "Part 1" }, + { content: "Part 2" }, + ]); + }, + ); + + test("captures nested subagent task hierarchy", testConfig, () => { + const operation = findLatestSpan( + events, + "claude-agent-subagent-operation", + ); + const taskRoot = findChildSpans( + events, + "Claude Agent", + operation?.span.id, + ).at(-1); + const llm = findAllSpans(events, "anthropic.messages.create").find( + (event) => event.span.parentIds.includes(taskRoot?.span.id ?? ""), + ); + const nestedTask = events.find( + (event) => + event.span.type === "task" && + event.span.parentIds.includes(taskRoot?.span.id ?? "") && + event.span.name?.startsWith("Agent:"), + ); + const tool = findAllSpans(events, "tool: calculator/calculator").find( + (event) => event.span.parentIds.includes(nestedTask?.span.id ?? ""), + ); + + expect(operation).toBeDefined(); + expect(taskRoot).toBeDefined(); + expect(llm).toBeDefined(); + expect(nestedTask).toBeDefined(); + if (tool) { + expect(tool.span.parentIds).toContain(nestedTask?.span.id ?? ""); + expect(tool.span.parentIds).not.toContain(taskRoot?.span.id ?? ""); + } + }); + + test("captures tool failure details", testConfig, () => { + const operation = findLatestSpan( + events, + "claude-agent-failure-operation", + ); + const task = findChildSpans( + events, + "Claude Agent", + operation?.span.id, + ).at(-1); + const llm = findChildSpans( + events, + "anthropic.messages.create", + task?.span.id, + ).at(-1); + const tool = findAllSpans(events, "tool: calculator/calculator").find( + (event) => event.span.parentIds.includes(task?.span.id ?? ""), + ); + + expect(operation).toBeDefined(); + expect(task).toBeDefined(); + expect(llm).toBeDefined(); + if (tool) { + expect(tool.row.error).toBe("division by zero"); + } + }); + + test("matches the shared span snapshot", testConfig, async () => { + await expect( + formatJsonFileSnapshot(buildSpanSummary(events)), + ).toMatchFileSnapshot(snapshotPath); + }); + }); +} diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/package.json b/e2e/scenarios/claude-agent-sdk-instrumentation/package.json new file mode 100644 index 000000000..5b50f5330 --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/package.json @@ -0,0 +1,20 @@ +{ + "name": "@braintrust/e2e-claude-agent-sdk-instrumentation", + "private": true, + "braintrustScenario": { + "canary": { + "dependencies": { + "claude-agent-sdk-v0.1": "@anthropic-ai/claude-agent-sdk@0.1", + "claude-agent-sdk-v0.2.76": "@anthropic-ai/claude-agent-sdk@0.2.76", + "claude-agent-sdk-v0.2.79": "@anthropic-ai/claude-agent-sdk@0.2.79", + "claude-agent-sdk-v0.2.81": "@anthropic-ai/claude-agent-sdk@0.2.81" + } + } + }, + "dependencies": { + "claude-agent-sdk-v0.1": "npm:@anthropic-ai/claude-agent-sdk@0.1.77", + "claude-agent-sdk-v0.2.76": "npm:@anthropic-ai/claude-agent-sdk@0.2.76", + "claude-agent-sdk-v0.2.79": "npm:@anthropic-ai/claude-agent-sdk@0.2.79", + "claude-agent-sdk-v0.2.81": "npm:@anthropic-ai/claude-agent-sdk@0.2.81" + } +} diff --git a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/pnpm-lock.yaml b/e2e/scenarios/claude-agent-sdk-instrumentation/pnpm-lock.yaml similarity index 86% rename from e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/pnpm-lock.yaml rename to e2e/scenarios/claude-agent-sdk-instrumentation/pnpm-lock.yaml index bea4806b3..7e00edb1c 100644 --- a/e2e/scenarios/claude-agent-sdk-auto-instrumentation-node-hook/pnpm-lock.yaml +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/pnpm-lock.yaml @@ -11,7 +11,13 @@ importers: claude-agent-sdk-v0.1: specifier: npm:@anthropic-ai/claude-agent-sdk@0.1.77 version: '@anthropic-ai/claude-agent-sdk@0.1.77(zod@4.3.6)' - claude-agent-sdk-v0.2: + claude-agent-sdk-v0.2.76: + specifier: npm:@anthropic-ai/claude-agent-sdk@0.2.76 + version: '@anthropic-ai/claude-agent-sdk@0.2.76(zod@4.3.6)' + claude-agent-sdk-v0.2.79: + specifier: npm:@anthropic-ai/claude-agent-sdk@0.2.79 + version: '@anthropic-ai/claude-agent-sdk@0.2.79(zod@4.3.6)' + claude-agent-sdk-v0.2.81: specifier: npm:@anthropic-ai/claude-agent-sdk@0.2.81 version: '@anthropic-ai/claude-agent-sdk@0.2.81(zod@4.3.6)' @@ -23,6 +29,18 @@ packages: peerDependencies: zod: ^3.25.0 || ^4.0.0 + '@anthropic-ai/claude-agent-sdk@0.2.76': + resolution: {integrity: sha512-HZxvnT8ZWkzCnQygaYCA0dl8RSUzuVbxE1YG4ecy6vh4nQbTT36CxUxBy+QVdR12pPQluncC0mCOLhI2918Eaw==} + engines: {node: '>=18.0.0'} + peerDependencies: + zod: ^4.0.0 + + '@anthropic-ai/claude-agent-sdk@0.2.79': + resolution: {integrity: sha512-4HmjT2pzjcYSXGxe18L0D1+5GEak3bk25C2H9GlKFnOeCkYAHG4cla4U/rn+v+S2Ecv5m/hsNQ1hDbzg4Ns7rA==} + engines: {node: '>=18.0.0'} + peerDependencies: + zod: ^4.0.0 + '@anthropic-ai/claude-agent-sdk@0.2.81': resolution: {integrity: sha512-CBeebgibBEN/DWOQGZN67vhuTG55RbI1hlsFSSoZ4uA/Io3lw04eHTE2ISCmdbqyJaefYTt6GKZei1nP0TQMNw==} engines: {node: '>=18.0.0'} @@ -219,6 +237,34 @@ snapshots: '@img/sharp-linuxmusl-x64': 0.33.5 '@img/sharp-win32-x64': 0.33.5 + '@anthropic-ai/claude-agent-sdk@0.2.76(zod@4.3.6)': + dependencies: + zod: 4.3.6 + optionalDependencies: + '@img/sharp-darwin-arm64': 0.34.5 + '@img/sharp-darwin-x64': 0.34.5 + '@img/sharp-linux-arm': 0.34.5 + '@img/sharp-linux-arm64': 0.34.5 + '@img/sharp-linux-x64': 0.34.5 + '@img/sharp-linuxmusl-arm64': 0.34.5 + '@img/sharp-linuxmusl-x64': 0.34.5 + '@img/sharp-win32-arm64': 0.34.5 + '@img/sharp-win32-x64': 0.34.5 + + '@anthropic-ai/claude-agent-sdk@0.2.79(zod@4.3.6)': + dependencies: + zod: 4.3.6 + optionalDependencies: + '@img/sharp-darwin-arm64': 0.34.5 + '@img/sharp-darwin-x64': 0.34.5 + '@img/sharp-linux-arm': 0.34.5 + '@img/sharp-linux-arm64': 0.34.5 + '@img/sharp-linux-x64': 0.34.5 + '@img/sharp-linuxmusl-arm64': 0.34.5 + '@img/sharp-linuxmusl-x64': 0.34.5 + '@img/sharp-win32-arm64': 0.34.5 + '@img/sharp-win32-x64': 0.34.5 + '@anthropic-ai/claude-agent-sdk@0.2.81(zod@4.3.6)': dependencies: zod: 4.3.6 diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.1.mjs b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.1.mjs new file mode 100644 index 000000000..e63959d07 --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.1.mjs @@ -0,0 +1,5 @@ +import * as claudeAgentSDK from "claude-agent-sdk-v0.1"; +import { runMain } from "../../helpers/provider-runtime.mjs"; +import { runAutoClaudeAgentSDKInstrumentation } from "./scenario.impl.mjs"; + +runMain(async () => runAutoClaudeAgentSDKInstrumentation(claudeAgentSDK)); diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.1.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.1.ts new file mode 100644 index 000000000..83aa33f16 --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.1.ts @@ -0,0 +1,5 @@ +import * as claudeAgentSDK from "claude-agent-sdk-v0.1"; +import { runMain } from "../../helpers/scenario-runtime"; +import { runWrappedClaudeAgentSDKInstrumentation } from "./scenario.impl.mjs"; + +runMain(async () => runWrappedClaudeAgentSDKInstrumentation(claudeAgentSDK)); diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.76.mjs b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.76.mjs new file mode 100644 index 000000000..2807fca91 --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.76.mjs @@ -0,0 +1,5 @@ +import * as claudeAgentSDK from "claude-agent-sdk-v0.2.76"; +import { runMain } from "../../helpers/provider-runtime.mjs"; +import { runAutoClaudeAgentSDKInstrumentation } from "./scenario.impl.mjs"; + +runMain(async () => runAutoClaudeAgentSDKInstrumentation(claudeAgentSDK)); diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.76.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.76.ts new file mode 100644 index 000000000..2d66d92ff --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.76.ts @@ -0,0 +1,5 @@ +import * as claudeAgentSDK from "claude-agent-sdk-v0.2.76"; +import { runMain } from "../../helpers/scenario-runtime"; +import { runWrappedClaudeAgentSDKInstrumentation } from "./scenario.impl.mjs"; + +runMain(async () => runWrappedClaudeAgentSDKInstrumentation(claudeAgentSDK)); diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.79.mjs b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.79.mjs new file mode 100644 index 000000000..9f65ecc78 --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.79.mjs @@ -0,0 +1,5 @@ +import * as claudeAgentSDK from "claude-agent-sdk-v0.2.79"; +import { runMain } from "../../helpers/provider-runtime.mjs"; +import { runAutoClaudeAgentSDKInstrumentation } from "./scenario.impl.mjs"; + +runMain(async () => runAutoClaudeAgentSDKInstrumentation(claudeAgentSDK)); diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.79.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.79.ts new file mode 100644 index 000000000..2c0532073 --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.79.ts @@ -0,0 +1,5 @@ +import * as claudeAgentSDK from "claude-agent-sdk-v0.2.79"; +import { runMain } from "../../helpers/scenario-runtime"; +import { runWrappedClaudeAgentSDKInstrumentation } from "./scenario.impl.mjs"; + +runMain(async () => runWrappedClaudeAgentSDKInstrumentation(claudeAgentSDK)); diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.81.mjs b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.81.mjs new file mode 100644 index 000000000..dbf1abd73 --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.81.mjs @@ -0,0 +1,5 @@ +import * as claudeAgentSDK from "claude-agent-sdk-v0.2.81"; +import { runMain } from "../../helpers/provider-runtime.mjs"; +import { runAutoClaudeAgentSDKInstrumentation } from "./scenario.impl.mjs"; + +runMain(async () => runAutoClaudeAgentSDKInstrumentation(claudeAgentSDK)); diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.81.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.81.ts new file mode 100644 index 000000000..f65deae30 --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.claude-agent-sdk-v0.2.81.ts @@ -0,0 +1,5 @@ +import * as claudeAgentSDK from "claude-agent-sdk-v0.2.81"; +import { runMain } from "../../helpers/scenario-runtime"; +import { runWrappedClaudeAgentSDKInstrumentation } from "./scenario.impl.mjs"; + +runMain(async () => runWrappedClaudeAgentSDKInstrumentation(claudeAgentSDK)); diff --git a/e2e/helpers/claude-agent-sdk-scenario.mjs b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.impl.mjs similarity index 83% rename from e2e/helpers/claude-agent-sdk-scenario.mjs rename to e2e/scenarios/claude-agent-sdk-instrumentation/scenario.impl.mjs index 78eeeef6f..879c14416 100644 --- a/e2e/helpers/claude-agent-sdk-scenario.mjs +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.impl.mjs @@ -1,12 +1,16 @@ +import { wrapClaudeAgentSDK } from "braintrust"; import { collectAsync, runOperation, runTracedScenario, -} from "./provider-runtime.mjs"; +} from "../../helpers/provider-runtime.mjs"; import { z } from "zod"; const CLAUDE_AGENT_MODEL = "claude-haiku-4-5-20251001"; +export const ROOT_NAME = "claude-agent-sdk-root"; +export const SCENARIO_NAME = "claude-agent-sdk-traces"; + function makePromptMessage(content) { return { type: "user", @@ -17,11 +21,9 @@ function makePromptMessage(content) { }; } -export async function runClaudeAgentSDKScenario(options) { - const sdk = options.decorateSDK - ? options.decorateSDK(options.sdk) - : options.sdk; - const { createSdkMcpServer, query, tool } = sdk; +async function runClaudeAgentSDKScenario({ decorateSDK, sdk }) { + const instrumentedSDK = decorateSDK ? decorateSDK(sdk) : sdk; + const { createSdkMcpServer, query, tool } = instrumentedSDK; const calculator = tool( "calculator", "Performs basic arithmetic operations", @@ -157,9 +159,22 @@ export async function runClaudeAgentSDKScenario(options) { ); }, metadata: { - scenario: options.scenarioName, + scenario: SCENARIO_NAME, }, - projectNameBase: options.projectNameBase, - rootName: options.rootName, + projectNameBase: "e2e-claude-agent-sdk-instrumentation", + rootName: ROOT_NAME, + }); +} + +export async function runWrappedClaudeAgentSDKInstrumentation(sdk) { + await runClaudeAgentSDKScenario({ + decorateSDK: wrapClaudeAgentSDK, + sdk, + }); +} + +export async function runAutoClaudeAgentSDKInstrumentation(sdk) { + await runClaudeAgentSDKScenario({ + sdk, }); } diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts new file mode 100644 index 000000000..f63cce83e --- /dev/null +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts @@ -0,0 +1,83 @@ +import { describe } from "vitest"; +import { + prepareScenarioDir, + readInstalledPackageVersion, + resolveScenarioDir, +} from "../../helpers/scenario-harness"; +import { defineClaudeAgentSDKInstrumentationAssertions } from "./assertions"; + +const scenarioDir = await prepareScenarioDir({ + scenarioDir: resolveScenarioDir(import.meta.url), +}); +const TIMEOUT_MS = 120_000; +const claudeAgentSDKScenarios = await Promise.all( + [ + { + autoEntry: "scenario.claude-agent-sdk-v0.1.mjs", + dependencyName: "claude-agent-sdk-v0.1", + snapshotName: "claude-agent-sdk-v0.1", + wrapperEntry: "scenario.claude-agent-sdk-v0.1.ts", + }, + { + autoEntry: "scenario.claude-agent-sdk-v0.2.76.mjs", + dependencyName: "claude-agent-sdk-v0.2.76", + snapshotName: "claude-agent-sdk-v0.2.76", + wrapperEntry: "scenario.claude-agent-sdk-v0.2.76.ts", + }, + { + autoEntry: "scenario.claude-agent-sdk-v0.2.79.mjs", + dependencyName: "claude-agent-sdk-v0.2.79", + snapshotName: "claude-agent-sdk-v0.2.79", + wrapperEntry: "scenario.claude-agent-sdk-v0.2.79.ts", + }, + { + autoEntry: "scenario.claude-agent-sdk-v0.2.81.mjs", + dependencyName: "claude-agent-sdk-v0.2.81", + snapshotName: "claude-agent-sdk-v0.2.81", + wrapperEntry: "scenario.claude-agent-sdk-v0.2.81.ts", + }, + ].map(async (scenario) => ({ + ...scenario, + version: await readInstalledPackageVersion( + scenarioDir, + scenario.dependencyName, + ), + })), +); + +describe("wrapped instrumentation", () => { + for (const scenario of claudeAgentSDKScenarios) { + defineClaudeAgentSDKInstrumentationAssertions({ + name: `claude agent sdk ${scenario.version}`, + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); + } +}); + +describe("auto-hook instrumentation", () => { + for (const scenario of claudeAgentSDKScenarios) { + defineClaudeAgentSDKInstrumentationAssertions({ + name: `claude agent sdk ${scenario.version}`, + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); + } +}); diff --git a/e2e/scenarios/wrap-claude-agent-sdk-traces/package.json b/e2e/scenarios/wrap-claude-agent-sdk-traces/package.json deleted file mode 100644 index 5365cc8dd..000000000 --- a/e2e/scenarios/wrap-claude-agent-sdk-traces/package.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "name": "@braintrust/e2e-wrap-claude-agent-sdk-traces", - "private": true, - "braintrustScenario": { - "canary": { - "dependencies": { - "claude-agent-sdk-v0.1": "@anthropic-ai/claude-agent-sdk@0.1", - "claude-agent-sdk-v0.2": "@anthropic-ai/claude-agent-sdk@0.2" - } - } - }, - "dependencies": { - "claude-agent-sdk-v0.1": "npm:@anthropic-ai/claude-agent-sdk@0.1.77", - "claude-agent-sdk-v0.2": "npm:@anthropic-ai/claude-agent-sdk@0.2.81" - } -} diff --git a/e2e/scenarios/wrap-claude-agent-sdk-traces/pnpm-lock.yaml b/e2e/scenarios/wrap-claude-agent-sdk-traces/pnpm-lock.yaml deleted file mode 100644 index bea4806b3..000000000 --- a/e2e/scenarios/wrap-claude-agent-sdk-traces/pnpm-lock.yaml +++ /dev/null @@ -1,357 +0,0 @@ -lockfileVersion: '9.0' - -settings: - autoInstallPeers: true - excludeLinksFromLockfile: false - -importers: - - .: - dependencies: - claude-agent-sdk-v0.1: - specifier: npm:@anthropic-ai/claude-agent-sdk@0.1.77 - version: '@anthropic-ai/claude-agent-sdk@0.1.77(zod@4.3.6)' - claude-agent-sdk-v0.2: - specifier: npm:@anthropic-ai/claude-agent-sdk@0.2.81 - version: '@anthropic-ai/claude-agent-sdk@0.2.81(zod@4.3.6)' - -packages: - - '@anthropic-ai/claude-agent-sdk@0.1.77': - resolution: {integrity: sha512-ZEjWQtkoB2MEY6K16DWMmF+8OhywAynH0m08V265cerbZ8xPD/2Ng2jPzbbO40mPeFSsMDJboShL+a3aObP0Jg==} - engines: {node: '>=18.0.0'} - peerDependencies: - zod: ^3.25.0 || ^4.0.0 - - '@anthropic-ai/claude-agent-sdk@0.2.81': - resolution: {integrity: sha512-CBeebgibBEN/DWOQGZN67vhuTG55RbI1hlsFSSoZ4uA/Io3lw04eHTE2ISCmdbqyJaefYTt6GKZei1nP0TQMNw==} - engines: {node: '>=18.0.0'} - peerDependencies: - zod: ^4.0.0 - - '@img/sharp-darwin-arm64@0.33.5': - resolution: {integrity: sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [arm64] - os: [darwin] - - '@img/sharp-darwin-arm64@0.34.5': - resolution: {integrity: sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [arm64] - os: [darwin] - - '@img/sharp-darwin-x64@0.33.5': - resolution: {integrity: sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [x64] - os: [darwin] - - '@img/sharp-darwin-x64@0.34.5': - resolution: {integrity: sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [x64] - os: [darwin] - - '@img/sharp-libvips-darwin-arm64@1.0.4': - resolution: {integrity: sha512-XblONe153h0O2zuFfTAbQYAX2JhYmDHeWikp1LM9Hul9gVPjFY427k6dFEcOL72O01QxQsWi761svJ/ev9xEDg==} - cpu: [arm64] - os: [darwin] - - '@img/sharp-libvips-darwin-arm64@1.2.4': - resolution: {integrity: sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==} - cpu: [arm64] - os: [darwin] - - '@img/sharp-libvips-darwin-x64@1.0.4': - resolution: {integrity: sha512-xnGR8YuZYfJGmWPvmlunFaWJsb9T/AO2ykoP3Fz/0X5XV2aoYBPkX6xqCQvUTKKiLddarLaxpzNe+b1hjeWHAQ==} - cpu: [x64] - os: [darwin] - - '@img/sharp-libvips-darwin-x64@1.2.4': - resolution: {integrity: sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==} - cpu: [x64] - os: [darwin] - - '@img/sharp-libvips-linux-arm64@1.0.4': - resolution: {integrity: sha512-9B+taZ8DlyyqzZQnoeIvDVR/2F4EbMepXMc/NdVbkzsJbzkUjhXv/70GQJ7tdLA4YJgNP25zukcxpX2/SueNrA==} - cpu: [arm64] - os: [linux] - - '@img/sharp-libvips-linux-arm64@1.2.4': - resolution: {integrity: sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==} - cpu: [arm64] - os: [linux] - - '@img/sharp-libvips-linux-arm@1.0.5': - resolution: {integrity: sha512-gvcC4ACAOPRNATg/ov8/MnbxFDJqf/pDePbBnuBDcjsI8PssmjoKMAz4LtLaVi+OnSb5FK/yIOamqDwGmXW32g==} - cpu: [arm] - os: [linux] - - '@img/sharp-libvips-linux-arm@1.2.4': - resolution: {integrity: sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==} - cpu: [arm] - os: [linux] - - '@img/sharp-libvips-linux-x64@1.0.4': - resolution: {integrity: sha512-MmWmQ3iPFZr0Iev+BAgVMb3ZyC4KeFc3jFxnNbEPas60e1cIfevbtuyf9nDGIzOaW9PdnDciJm+wFFaTlj5xYw==} - cpu: [x64] - os: [linux] - - '@img/sharp-libvips-linux-x64@1.2.4': - resolution: {integrity: sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==} - cpu: [x64] - os: [linux] - - '@img/sharp-libvips-linuxmusl-arm64@1.0.4': - resolution: {integrity: sha512-9Ti+BbTYDcsbp4wfYib8Ctm1ilkugkA/uscUn6UXK1ldpC1JjiXbLfFZtRlBhjPZ5o1NCLiDbg8fhUPKStHoTA==} - cpu: [arm64] - os: [linux] - - '@img/sharp-libvips-linuxmusl-arm64@1.2.4': - resolution: {integrity: sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==} - cpu: [arm64] - os: [linux] - - '@img/sharp-libvips-linuxmusl-x64@1.0.4': - resolution: {integrity: sha512-viYN1KX9m+/hGkJtvYYp+CCLgnJXwiQB39damAO7WMdKWlIhmYTfHjwSbQeUK/20vY154mwezd9HflVFM1wVSw==} - cpu: [x64] - os: [linux] - - '@img/sharp-libvips-linuxmusl-x64@1.2.4': - resolution: {integrity: sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==} - cpu: [x64] - os: [linux] - - '@img/sharp-linux-arm64@0.33.5': - resolution: {integrity: sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [arm64] - os: [linux] - - '@img/sharp-linux-arm64@0.34.5': - resolution: {integrity: sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [arm64] - os: [linux] - - '@img/sharp-linux-arm@0.33.5': - resolution: {integrity: sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [arm] - os: [linux] - - '@img/sharp-linux-arm@0.34.5': - resolution: {integrity: sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [arm] - os: [linux] - - '@img/sharp-linux-x64@0.33.5': - resolution: {integrity: sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [x64] - os: [linux] - - '@img/sharp-linux-x64@0.34.5': - resolution: {integrity: sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [x64] - os: [linux] - - '@img/sharp-linuxmusl-arm64@0.33.5': - resolution: {integrity: sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [arm64] - os: [linux] - - '@img/sharp-linuxmusl-arm64@0.34.5': - resolution: {integrity: sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [arm64] - os: [linux] - - '@img/sharp-linuxmusl-x64@0.33.5': - resolution: {integrity: sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [x64] - os: [linux] - - '@img/sharp-linuxmusl-x64@0.34.5': - resolution: {integrity: sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [x64] - os: [linux] - - '@img/sharp-win32-arm64@0.34.5': - resolution: {integrity: sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [arm64] - os: [win32] - - '@img/sharp-win32-x64@0.33.5': - resolution: {integrity: sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [x64] - os: [win32] - - '@img/sharp-win32-x64@0.34.5': - resolution: {integrity: sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==} - engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} - cpu: [x64] - os: [win32] - - zod@4.3.6: - resolution: {integrity: sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==} - -snapshots: - - '@anthropic-ai/claude-agent-sdk@0.1.77(zod@4.3.6)': - dependencies: - zod: 4.3.6 - optionalDependencies: - '@img/sharp-darwin-arm64': 0.33.5 - '@img/sharp-darwin-x64': 0.33.5 - '@img/sharp-linux-arm': 0.33.5 - '@img/sharp-linux-arm64': 0.33.5 - '@img/sharp-linux-x64': 0.33.5 - '@img/sharp-linuxmusl-arm64': 0.33.5 - '@img/sharp-linuxmusl-x64': 0.33.5 - '@img/sharp-win32-x64': 0.33.5 - - '@anthropic-ai/claude-agent-sdk@0.2.81(zod@4.3.6)': - dependencies: - zod: 4.3.6 - optionalDependencies: - '@img/sharp-darwin-arm64': 0.34.5 - '@img/sharp-darwin-x64': 0.34.5 - '@img/sharp-linux-arm': 0.34.5 - '@img/sharp-linux-arm64': 0.34.5 - '@img/sharp-linux-x64': 0.34.5 - '@img/sharp-linuxmusl-arm64': 0.34.5 - '@img/sharp-linuxmusl-x64': 0.34.5 - '@img/sharp-win32-arm64': 0.34.5 - '@img/sharp-win32-x64': 0.34.5 - - '@img/sharp-darwin-arm64@0.33.5': - optionalDependencies: - '@img/sharp-libvips-darwin-arm64': 1.0.4 - optional: true - - '@img/sharp-darwin-arm64@0.34.5': - optionalDependencies: - '@img/sharp-libvips-darwin-arm64': 1.2.4 - optional: true - - '@img/sharp-darwin-x64@0.33.5': - optionalDependencies: - '@img/sharp-libvips-darwin-x64': 1.0.4 - optional: true - - '@img/sharp-darwin-x64@0.34.5': - optionalDependencies: - '@img/sharp-libvips-darwin-x64': 1.2.4 - optional: true - - '@img/sharp-libvips-darwin-arm64@1.0.4': - optional: true - - '@img/sharp-libvips-darwin-arm64@1.2.4': - optional: true - - '@img/sharp-libvips-darwin-x64@1.0.4': - optional: true - - '@img/sharp-libvips-darwin-x64@1.2.4': - optional: true - - '@img/sharp-libvips-linux-arm64@1.0.4': - optional: true - - '@img/sharp-libvips-linux-arm64@1.2.4': - optional: true - - '@img/sharp-libvips-linux-arm@1.0.5': - optional: true - - '@img/sharp-libvips-linux-arm@1.2.4': - optional: true - - '@img/sharp-libvips-linux-x64@1.0.4': - optional: true - - '@img/sharp-libvips-linux-x64@1.2.4': - optional: true - - '@img/sharp-libvips-linuxmusl-arm64@1.0.4': - optional: true - - '@img/sharp-libvips-linuxmusl-arm64@1.2.4': - optional: true - - '@img/sharp-libvips-linuxmusl-x64@1.0.4': - optional: true - - '@img/sharp-libvips-linuxmusl-x64@1.2.4': - optional: true - - '@img/sharp-linux-arm64@0.33.5': - optionalDependencies: - '@img/sharp-libvips-linux-arm64': 1.0.4 - optional: true - - '@img/sharp-linux-arm64@0.34.5': - optionalDependencies: - '@img/sharp-libvips-linux-arm64': 1.2.4 - optional: true - - '@img/sharp-linux-arm@0.33.5': - optionalDependencies: - '@img/sharp-libvips-linux-arm': 1.0.5 - optional: true - - '@img/sharp-linux-arm@0.34.5': - optionalDependencies: - '@img/sharp-libvips-linux-arm': 1.2.4 - optional: true - - '@img/sharp-linux-x64@0.33.5': - optionalDependencies: - '@img/sharp-libvips-linux-x64': 1.0.4 - optional: true - - '@img/sharp-linux-x64@0.34.5': - optionalDependencies: - '@img/sharp-libvips-linux-x64': 1.2.4 - optional: true - - '@img/sharp-linuxmusl-arm64@0.33.5': - optionalDependencies: - '@img/sharp-libvips-linuxmusl-arm64': 1.0.4 - optional: true - - '@img/sharp-linuxmusl-arm64@0.34.5': - optionalDependencies: - '@img/sharp-libvips-linuxmusl-arm64': 1.2.4 - optional: true - - '@img/sharp-linuxmusl-x64@0.33.5': - optionalDependencies: - '@img/sharp-libvips-linuxmusl-x64': 1.0.4 - optional: true - - '@img/sharp-linuxmusl-x64@0.34.5': - optionalDependencies: - '@img/sharp-libvips-linuxmusl-x64': 1.2.4 - optional: true - - '@img/sharp-win32-arm64@0.34.5': - optional: true - - '@img/sharp-win32-x64@0.33.5': - optional: true - - '@img/sharp-win32-x64@0.34.5': - optional: true - - zod@4.3.6: {} diff --git a/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.1.ts b/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.1.ts deleted file mode 100644 index 1e5743623..000000000 --- a/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.1.ts +++ /dev/null @@ -1,5 +0,0 @@ -import * as claudeAgentSDK from "claude-agent-sdk-v0.1"; -import { runMain } from "../../helpers/scenario-runtime"; -import { runWrapClaudeAgentSDKTraces } from "./scenario.impl"; - -runMain(async () => runWrapClaudeAgentSDKTraces(claudeAgentSDK)); diff --git a/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.2.ts b/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.2.ts deleted file mode 100644 index 25291595f..000000000 --- a/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.claude-agent-sdk-v0.2.ts +++ /dev/null @@ -1,5 +0,0 @@ -import * as claudeAgentSDK from "claude-agent-sdk-v0.2"; -import { runMain } from "../../helpers/scenario-runtime"; -import { runWrapClaudeAgentSDKTraces } from "./scenario.impl"; - -runMain(async () => runWrapClaudeAgentSDKTraces(claudeAgentSDK)); diff --git a/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.impl.ts b/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.impl.ts deleted file mode 100644 index 41a058781..000000000 --- a/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.impl.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { wrapClaudeAgentSDK } from "braintrust"; -import { runClaudeAgentSDKScenario } from "../../helpers/claude-agent-sdk-scenario.mjs"; - -export async function runWrapClaudeAgentSDKTraces(claudeAgentSDK: object) { - await runClaudeAgentSDKScenario({ - decorateSDK: wrapClaudeAgentSDK, - projectNameBase: "e2e-wrap-claude-agent-sdk", - rootName: "claude-agent-sdk-wrapper-root", - scenarioName: "wrap-claude-agent-sdk-traces", - sdk: claudeAgentSDK, - }); -} diff --git a/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.test.ts b/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.test.ts deleted file mode 100644 index 3e15697c2..000000000 --- a/e2e/scenarios/wrap-claude-agent-sdk-traces/scenario.test.ts +++ /dev/null @@ -1,53 +0,0 @@ -import { expect, test } from "vitest"; -import { - assertClaudeAgentSDKTraceContract, - resolveClaudeAgentSDKSpanSnapshotPath, -} from "../../helpers/claude-agent-sdk-trace-contract"; -import { - CLAUDE_AGENT_SDK_SCENARIO_TIMEOUT_MS, - getWrapClaudeAgentSDKScenarios, -} from "../../helpers/claude-agent-sdk"; -import { formatJsonFileSnapshot } from "../../helpers/file-snapshot"; -import { - prepareScenarioDir, - resolveScenarioDir, - withScenarioHarness, -} from "../../helpers/scenario-harness"; -import { E2E_TAGS } from "../../helpers/tags"; - -const scenarioDir = await prepareScenarioDir({ - scenarioDir: resolveScenarioDir(import.meta.url), -}); -const wrapClaudeAgentSDKScenarios = - await getWrapClaudeAgentSDKScenarios(scenarioDir); - -for (const scenario of wrapClaudeAgentSDKScenarios) { - test( - `wrap-claude-agent-sdk-traces captures tool, async prompt, and subagent traces (claude-agent-sdk ${scenario.version})`, - { - tags: [E2E_TAGS.externalApi], - timeout: CLAUDE_AGENT_SDK_SCENARIO_TIMEOUT_MS, - }, - async () => { - await withScenarioHarness(async ({ events, runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.entry, - scenarioDir, - timeoutMs: CLAUDE_AGENT_SDK_SCENARIO_TIMEOUT_MS, - }); - - const contract = assertClaudeAgentSDKTraceContract({ - capturedEvents: events(), - rootName: "claude-agent-sdk-wrapper-root", - scenarioName: "wrap-claude-agent-sdk-traces", - }); - - await expect( - formatJsonFileSnapshot(contract.spanSummary), - ).toMatchFileSnapshot( - resolveClaudeAgentSDKSpanSnapshotPath(scenario.dependencyName), - ); - }); - }, - ); -}