diff --git a/docs/docs/reference/telemetry.md b/docs/docs/reference/telemetry.md index c3b72cc6d..4535ae72b 100644 --- a/docs/docs/reference/telemetry.md +++ b/docs/docs/reference/telemetry.md @@ -11,7 +11,7 @@ We collect the following categories of events: | `session_start` | A new CLI session begins | | `session_end` | A CLI session ends (includes duration) | | `session_forked` | A session is forked from an existing one | -| `generation` | An AI model generation completes (model ID, token counts, duration — no prompt content) | +| `generation` | An AI model generation (step) completes — model ID, provider ID, agent, finish reason, cost, duration, and token breakdown: input, output, and when available: reasoning tokens (reasoning models only), cache-read tokens (prompt cache hit), cache-write tokens (new cache entry). No prompt content. | | `tool_call` | A tool is invoked (tool name and category — no arguments or output) | | `native_call` | A native engine call completes (method name and duration — no arguments) | | `command` | A CLI command is executed (command name only) | diff --git a/packages/opencode/src/altimate/telemetry/index.ts b/packages/opencode/src/altimate/telemetry/index.ts index e0f0130e4..ded93596c 100644 --- a/packages/opencode/src/altimate/telemetry/index.ts +++ b/packages/opencode/src/altimate/telemetry/index.ts @@ -14,13 +14,6 @@ export namespace Telemetry { const MAX_BUFFER_SIZE = 200 const REQUEST_TIMEOUT_MS = 10_000 - export type TokensPayload = { - input: number - output: number - reasoning: number - cache_read: number - cache_write: number - } export type Event = | { @@ -50,9 +43,15 @@ export namespace Telemetry { provider_id: string agent: string finish_reason: string - tokens: TokensPayload cost: number duration_ms: number + // Flat token fields — only present when data is available from the provider. + // No nested objects: Azure App Insights custom measures must be top-level numbers. + tokens_input: number + tokens_output: number + tokens_reasoning?: number // only for reasoning models + tokens_cache_read?: number // only when a cached prompt was reused + tokens_cache_write?: number // only when a new cache entry was written } | { type: "tool_call" diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 70ea586b8..2fd644cfe 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -16,6 +16,9 @@ import { PermissionNext } from "@/permission/next" import { Question } from "@/question" import { PartID } from "./schema" import type { SessionID, MessageID } from "./schema" +// altimate_change start — import Telemetry for per-generation token tracking +import { Telemetry } from "@/altimate/telemetry" +// altimate_change end export namespace SessionProcessor { const DOOM_LOOP_THRESHOLD = 3 @@ -47,6 +50,9 @@ export namespace SessionProcessor { log.info("process") needsCompaction = false const shouldBreak = (await Config.get()).experimental?.continue_loop_on_deny !== true + // altimate_change start — track step start time for generation telemetry duration_ms + let stepStartTime = Date.now() + // altimate_change end while (true) { try { let currentText: MessageV2.TextPart | undefined @@ -240,6 +246,9 @@ export namespace SessionProcessor { snapshot, type: "step-start", }) + // altimate_change start — record step start time for generation telemetry duration + stepStartTime = Date.now() + // altimate_change end break case "finish-step": @@ -251,6 +260,26 @@ export namespace SessionProcessor { input.assistantMessage.finish = value.finishReason input.assistantMessage.cost += usage.cost input.assistantMessage.tokens = usage.tokens + // altimate_change start — emit per-generation telemetry with token breakdown + // Only include token fields that are actually provided by the API (never default to 0). + Telemetry.track({ + type: "generation", + timestamp: Date.now(), + session_id: input.sessionID, + message_id: input.assistantMessage.id, + model_id: streamInput.model.id, + provider_id: streamInput.model.providerID, + agent: streamInput.agent.name, + finish_reason: value.finishReason, + cost: usage.cost, + duration_ms: Date.now() - stepStartTime, + tokens_input: usage.tokens.input, + tokens_output: usage.tokens.output, + ...(value.usage.reasoningTokens !== undefined && { tokens_reasoning: usage.tokens.reasoning }), + ...(value.usage.cachedInputTokens !== undefined && { tokens_cache_read: usage.tokens.cache.read }), + ...(usage.tokens.cache.write > 0 && { tokens_cache_write: usage.tokens.cache.write }), + }) + // altimate_change end await Session.updatePart({ id: PartID.ascending(), reason: value.finishReason, diff --git a/packages/opencode/test/session/processor.test.ts b/packages/opencode/test/session/processor.test.ts index 857966a5a..fd12e483d 100644 --- a/packages/opencode/test/session/processor.test.ts +++ b/packages/opencode/test/session/processor.test.ts @@ -468,20 +468,18 @@ describe("generation telemetry", () => { provider_id: "anthropic", agent: "builder", finish_reason: "end_turn", - tokens: { - input: 1000, - output: 500, - reasoning: 200, - cache_read: 800, - cache_write: 100, - }, + tokens_input: 1000, + tokens_output: 500, + tokens_reasoning: 200, + tokens_cache_read: 800, + tokens_cache_write: 100, cost: 0.05, duration_ms: 3000, } expect(event.model_id).toBe("claude-opus-4-6") - expect(event.tokens.input).toBe(1000) - expect(event.tokens.cache_read).toBe(800) + expect(event.tokens_input).toBe(1000) + expect(event.tokens_cache_read).toBe(800) expect(event.cost).toBe(0.05) expect(event.finish_reason).toBe("end_turn") }) diff --git a/packages/opencode/test/telemetry/telemetry.test.ts b/packages/opencode/test/telemetry/telemetry.test.ts index c7c575736..b8f4b1fb1 100644 --- a/packages/opencode/test/telemetry/telemetry.test.ts +++ b/packages/opencode/test/telemetry/telemetry.test.ts @@ -624,7 +624,7 @@ describe("telemetry.toAppInsightsEnvelopes (indirect)", () => { } }) - test("nested tokens object is flattened with tokens_ prefix", async () => { + test("flat token fields appear in measurements", async () => { const { fetchCalls, cleanup } = await initWithMockedFetch() try { Telemetry.track({ @@ -636,13 +636,11 @@ describe("telemetry.toAppInsightsEnvelopes (indirect)", () => { provider_id: "anthropic", agent: "builder", finish_reason: "end_turn", - tokens: { - input: 100, - output: 200, - reasoning: 50, - cache_read: 10, - cache_write: 5, - }, + tokens_input: 100, + tokens_output: 200, + tokens_reasoning: 50, + tokens_cache_read: 10, + tokens_cache_write: 5, cost: 0.01, duration_ms: 2000, }) @@ -656,8 +654,6 @@ describe("telemetry.toAppInsightsEnvelopes (indirect)", () => { expect(measurements.tokens_reasoning).toBe(50) expect(measurements.tokens_cache_read).toBe(10) expect(measurements.tokens_cache_write).toBe(5) - // Raw "tokens" key should not appear in properties - expect(envelopes[0].data.baseData.properties.tokens).toBeUndefined() } finally { cleanup() }