diff --git a/.changeset/litellm-session-id-header.md b/.changeset/litellm-session-id-header.md new file mode 100644 index 0000000000..57b4ec7fda --- /dev/null +++ b/.changeset/litellm-session-id-header.md @@ -0,0 +1,5 @@ +--- +"zoo-code": patch +--- + +Forward the active task ID to the LiteLLM proxy as an `X-Zoo-Session-ID` request header so individual conversations can be correlated in LiteLLM logs and spend tracking. The header is only sent when a task ID is present, and follows the `x--session-id` convention used by Claude Code (`x-claude-code-session-id`) and GitHub Copilot (`x-copilot-session-id`). diff --git a/src/api/providers/__tests__/lite-llm.spec.ts b/src/api/providers/__tests__/lite-llm.spec.ts index df0e8b152d..78a01e30f3 100644 --- a/src/api/providers/__tests__/lite-llm.spec.ts +++ b/src/api/providers/__tests__/lite-llm.spec.ts @@ -1115,4 +1115,61 @@ describe("LiteLLMHandler", () => { expect(id1).not.toBe(id2) }) }) + + describe("session ID header", () => { + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + choices: [{ delta: { content: "ok" } }], + usage: { prompt_tokens: 1, completion_tokens: 1 }, + } + }, + } + + it("should send the X-Zoo-Session-ID header when a taskId is provided", async () => { + mockCreate.mockReturnValue({ + withResponse: vi.fn().mockResolvedValue({ data: mockStream }), + }) + + const generator = handler.createMessage("system", [{ role: "user", content: "hi" }], { + taskId: "task-123", + }) + for await (const _chunk of generator) { + // drain the stream + } + + const requestHeaders = mockCreate.mock.calls[0][1]?.headers + expect(requestHeaders).toMatchObject({ "X-Zoo-Session-ID": "task-123" }) + }) + + it("should not send the X-Zoo-Session-ID header when no taskId is provided", async () => { + mockCreate.mockReturnValue({ + withResponse: vi.fn().mockResolvedValue({ data: mockStream }), + }) + + const generator = handler.createMessage("system", [{ role: "user", content: "hi" }]) + for await (const _chunk of generator) { + // drain the stream + } + + const requestHeaders = mockCreate.mock.calls[0][1]?.headers + expect(requestHeaders).not.toHaveProperty("X-Zoo-Session-ID") + }) + + it("should not send the X-Zoo-Session-ID header when taskId is an empty string", async () => { + mockCreate.mockReturnValue({ + withResponse: vi.fn().mockResolvedValue({ data: mockStream }), + }) + + const generator = handler.createMessage("system", [{ role: "user", content: "hi" }], { + taskId: "", + }) + for await (const _chunk of generator) { + // drain the stream + } + + const requestHeaders = mockCreate.mock.calls[0][1]?.headers + expect(requestHeaders).not.toHaveProperty("X-Zoo-Session-ID") + }) + }) }) diff --git a/src/api/providers/lite-llm.ts b/src/api/providers/lite-llm.ts index 0b79433f35..fbdcf34365 100644 --- a/src/api/providers/lite-llm.ts +++ b/src/api/providers/lite-llm.ts @@ -223,8 +223,22 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa requestOptions.temperature = this.options.modelTemperature ?? 0 } + // LiteLLM recognizes X--Session-ID for per-conversation request correlation. + // This header enables LiteLLM to group related API calls by task for logging and tracing. + // Unlike Zoo gateways (which use X-Zoo-Task-ID to correlate requests across multiple + // models within a single conversation), this header is specific to the LiteLLM provider + // and facilitates provider-level logging and debugging on LiteLLM's admin panel. + // Matches the convention used by Claude Code (x-claude-code-session-id) and + // GitHub Copilot (x-copilot-session-id). + const requestHeaders: Record = {} + if (metadata?.taskId) { + requestHeaders["X-Zoo-Session-ID"] = metadata.taskId + } + try { - const { data: completion } = await this.client.chat.completions.create(requestOptions).withResponse() + const { data: completion } = await this.client.chat.completions + .create(requestOptions, { headers: requestHeaders }) + .withResponse() let lastUsage