diff --git a/src/api/providers/__tests__/gemini.spec.ts b/src/api/providers/__tests__/gemini.spec.ts index e2633474a..76b0b3abb 100644 --- a/src/api/providers/__tests__/gemini.spec.ts +++ b/src/api/providers/__tests__/gemini.spec.ts @@ -54,6 +54,215 @@ describe("GeminiHandler", () => { }) }) + describe("thoughtSignature round-trip (issue #536)", () => { + const systemPrompt = "You are a helpful assistant" + const toolMetadata = { tools: [{ function: { name: "read_file", description: "", parameters: {} } }] } as any + + // Helper: build a mock async-iterable stream from chunks + function makeStream(chunks: unknown[]) { + return { + [Symbol.asyncIterator]: async function* () { + for (const chunk of chunks) yield chunk + }, + } + } + + // Simulate a Gemini 3.x response: thoughtSignature arrives on its own part, + // alongside a functionCall part (the way the real Gemini 3 API returns it). + const turn1Response = makeStream([ + { + candidates: [ + { + content: { + parts: [ + { thought: true, text: "thinking…" }, + { functionCall: { name: "read_file", args: { path: "foo.ts" } } }, + { thoughtSignature: "sig-abc123" }, + ], + }, + }, + ], + }, + { usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 } }, + ]) + + it("captures thoughtSignature from the stream after turn 1", async () => { + ;(handler["client"].models.generateContentStream as any).mockResolvedValue(turn1Response) + + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Read foo.ts" }] + + for await (const _chunk of handler.createMessage(systemPrompt, messages, toolMetadata)) { + // drain + } + + expect(handler.getThoughtSignature()).toBe("sig-abc123") + }) + + it("sends thoughtSignature from history on turn 2 (core regression)", async () => { + // This is the bug from issue #536: after turn 1 the thoughtSignature block is + // persisted into apiConversationHistory. On turn 2 the handler must include it + // in the outgoing request, otherwise Gemini 3.x returns an empty response. + const historyAfterTurn1: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "Read foo.ts" }, + { + role: "assistant", + // assistant turn as stored by prepareApiConversationMessage: + // tool_use block + appended thoughtSignature block + content: [ + { type: "tool_use", id: "call-1", name: "read_file", input: { path: "foo.ts" } }, + { type: "thoughtSignature", thoughtSignature: "sig-abc123" } as any, + ], + }, + { + role: "user", + content: [{ type: "tool_result", tool_use_id: "call-1", content: "file contents here" }], + }, + ] + + ;(handler["client"].models.generateContentStream as any).mockResolvedValue( + makeStream([ + { candidates: [{ content: { parts: [{ text: "Done." }] } }] }, + { usageMetadata: { promptTokenCount: 20, candidatesTokenCount: 5 } }, + ]), + ) + + for await (const _chunk of handler.createMessage(systemPrompt, historyAfterTurn1, toolMetadata)) { + // drain + } + + const callArgs = (handler["client"].models.generateContentStream as any).mock.calls[0][0] + const contents: any[] = callArgs.contents + + // The model turn in the outgoing request must carry the thoughtSignature on its functionCall part + const modelTurn = contents.find((c: any) => c.role === "model") + expect(modelTurn).toBeDefined() + const fnPart = modelTurn.parts.find((p: any) => p.functionCall) + expect(fnPart).toBeDefined() + expect(fnPart.thoughtSignature).toBe("sig-abc123") + }) + + it("falls back to base64-encoded skip_thought_signature_validator when history has no signature", async () => { + // Cross-model history scenario: prior session used a non-Gemini model, no signature stored. + // The fallback bypass token must be base64-encoded because Part.thoughtSignature is + // documented as a base64 field. Vertex AI validates this strictly. + const historyNoSig: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "Read foo.ts" }, + { + role: "assistant", + content: [{ type: "tool_use", id: "call-1", name: "read_file", input: { path: "foo.ts" } }], + }, + { + role: "user", + content: [{ type: "tool_result", tool_use_id: "call-1", content: "file contents" }], + }, + ] + + ;(handler["client"].models.generateContentStream as any).mockResolvedValue( + makeStream([ + { candidates: [{ content: { parts: [{ text: "Done." }] } }] }, + { usageMetadata: { promptTokenCount: 20, candidatesTokenCount: 5 } }, + ]), + ) + + for await (const _chunk of handler.createMessage(systemPrompt, historyNoSig, toolMetadata)) { + // drain + } + + const callArgs = (handler["client"].models.generateContentStream as any).mock.calls[0][0] + const contents: any[] = callArgs.contents + const modelTurn = contents.find((c: any) => c.role === "model") + const fnPart = modelTurn?.parts.find((p: any) => p.functionCall) + expect(fnPart).toBeDefined() + const expectedBypass = Buffer.from("skip_thought_signature_validator").toString("base64") + expect(fnPart.thoughtSignature).toBe(expectedBypass) + }) + + it("sends thoughtSignature even when reasoningEffort is disabled", async () => { + // If the user disables reasoning effort, thinkingConfig=undefined. + // The old code: includeThoughtSignatures = Boolean(thinkingConfig) || Boolean(metadata?.tools?.length) + // With tools present this is still true — but if called with no tools it would be false. + // Verify the signature is sent regardless when tools are in the metadata. + const handlerNoReasoning = new GeminiHandler({ + apiKey: "test-key", + geminiApiKey: "test-key", + apiModelId: GEMINI_MODEL_NAME, + reasoningEffort: "disable" as any, + }) + handlerNoReasoning["client"] = handler["client"] as any + + const historyWithSig: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "Read foo.ts" }, + { + role: "assistant", + content: [ + { type: "tool_use", id: "call-1", name: "read_file", input: { path: "foo.ts" } }, + { type: "thoughtSignature", thoughtSignature: "sig-xyz" } as any, + ], + }, + { + role: "user", + content: [{ type: "tool_result", tool_use_id: "call-1", content: "file contents" }], + }, + ] + + ;(handler["client"].models.generateContentStream as any).mockResolvedValue( + makeStream([ + { candidates: [{ content: { parts: [{ text: "Done." }] } }] }, + { usageMetadata: { promptTokenCount: 20, candidatesTokenCount: 5 } }, + ]), + ) + + for await (const _chunk of handlerNoReasoning.createMessage(systemPrompt, historyWithSig, toolMetadata)) { + // drain + } + + const callArgs = (handler["client"].models.generateContentStream as any).mock.calls[0][0] + const contents: any[] = callArgs.contents + const modelTurn = contents.find((c: any) => c.role === "model") + const fnPart = modelTurn?.parts.find((p: any) => p.functionCall) + expect(fnPart).toBeDefined() + expect(fnPart.thoughtSignature).toBe("sig-xyz") + }) + + it("does NOT capture thoughtSignature when there are no tools in metadata", async () => { + // Without tools, includeThoughtSignatures=false when thinkingConfig is also absent. + // This tests the boundary so we don't over-eagerly store signatures for non-tool calls. + const handlerNoReasoning = new GeminiHandler({ + apiKey: "test-key", + geminiApiKey: "test-key", + apiModelId: GEMINI_MODEL_NAME, + reasoningEffort: "disable" as any, + }) + handlerNoReasoning["client"] = handler["client"] as any + ;(handler["client"].models.generateContentStream as any).mockResolvedValue( + makeStream([ + { + candidates: [ + { + content: { + parts: [ + { functionCall: { name: "read_file", args: { path: "foo.ts" } } }, + { thoughtSignature: "sig-should-not-be-captured" }, + ], + }, + }, + ], + }, + { usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 } }, + ]), + ) + + // No tools in metadata, no thinkingConfig → includeThoughtSignatures=false + for await (const _chunk of handlerNoReasoning.createMessage(systemPrompt, [ + { role: "user", content: "hi" }, + ])) { + // drain + } + + expect(handlerNoReasoning.getThoughtSignature()).toBeUndefined() + }) + }) + describe("createMessage", () => { const mockMessages: Anthropic.Messages.MessageParam[] = [ { diff --git a/src/api/providers/lite-llm.ts b/src/api/providers/lite-llm.ts index 981f984de..3f9b3732e 100644 --- a/src/api/providers/lite-llm.ts +++ b/src/api/providers/lite-llm.ts @@ -9,6 +9,7 @@ import { ApiHandlerOptions } from "../../shared/api" import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" +import { GEMINI_THOUGHT_SIGNATURE_BYPASS } from "../transform/gemini-format" import { sanitizeOpenAiCallId } from "../../utils/tool-id" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" @@ -72,7 +73,7 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa * * Per LiteLLM documentation: * - Thought signatures are stored in provider_specific_fields.thought_signature of tool calls - * - The dummy signature base64("skip_thought_signature_validator") bypasses validation + * - The bypass token (GEMINI_THOUGHT_SIGNATURE_BYPASS) skips signature validation * * We inject the dummy signature on EVERY tool call unconditionally to ensure Gemini * doesn't complain about missing/corrupted signatures when conversation history @@ -81,8 +82,7 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa private injectThoughtSignatureForGemini( openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[], ): OpenAI.Chat.ChatCompletionMessageParam[] { - // Base64 encoded "skip_thought_signature_validator" as per LiteLLM docs - const dummySignature = Buffer.from("skip_thought_signature_validator").toString("base64") + const dummySignature = GEMINI_THOUGHT_SIGNATURE_BYPASS return openAiMessages.map((msg) => { if (msg.role === "assistant") { diff --git a/src/api/transform/__tests__/gemini-format.spec.ts b/src/api/transform/__tests__/gemini-format.spec.ts index 23f752e20..327a05479 100644 --- a/src/api/transform/__tests__/gemini-format.spec.ts +++ b/src/api/transform/__tests__/gemini-format.spec.ts @@ -123,6 +123,11 @@ describe("convertAnthropicMessageToGemini", () => { const result = convertAnthropicMessageToGemini(anthropicMessage) + // thoughtSignature must be base64-encoded: the Gemini API documents Part.thoughtSignature + // as "Encoded as base64 string". Sending the raw bypass token without base64 encoding fails + // on Vertex AI (Gemini 3.1/3.5 strict validation), causing empty-response loops on turn 2+. + const expectedBypassToken = Buffer.from("skip_thought_signature_validator").toString("base64") + expect(result).toEqual([ { role: "model", @@ -133,7 +138,7 @@ describe("convertAnthropicMessageToGemini", () => { name: "calculator", args: { operation: "add", numbers: [2, 3] }, }, - thoughtSignature: "skip_thought_signature_validator", + thoughtSignature: expectedBypassToken, }, ], }, diff --git a/src/api/transform/gemini-format.ts b/src/api/transform/gemini-format.ts index 6f2403629..53bab11c5 100644 --- a/src/api/transform/gemini-format.ts +++ b/src/api/transform/gemini-format.ts @@ -1,6 +1,11 @@ import { Anthropic } from "@anthropic-ai/sdk" import { Content, Part } from "@google/genai" +// Gemini documents Part.thoughtSignature as "Encoded as base64 string". Vertex AI enforces +// this strictly — sending the plain string causes empty responses after the first tool call. +// This bypass token tells Gemini to skip signature validation for cross-model history entries. +export const GEMINI_THOUGHT_SIGNATURE_BYPASS = Buffer.from("skip_thought_signature_validator").toString("base64") + type ThoughtSignatureContentBlock = { type: "thoughtSignature" thoughtSignature?: string @@ -42,10 +47,12 @@ export function convertAnthropicContentToGemini( // Determine the signature to attach to function calls. // If we're in a mode that expects signatures (includeThoughtSignatures is true): // 1. Use the actual signature if we found one in the history/content. - // 2. Fallback to "skip_thought_signature_validator" if missing (e.g. cross-model history). + // 2. Fallback to a base64-encoded bypass token if missing (e.g. cross-model history). + // Part.thoughtSignature is documented as "Encoded as base64 string" — Vertex AI validates + // this strictly and returns empty responses when a non-base64 value is sent. let functionCallSignature: string | undefined if (includeThoughtSignatures) { - functionCallSignature = activeThoughtSignature || "skip_thought_signature_validator" + functionCallSignature = activeThoughtSignature || GEMINI_THOUGHT_SIGNATURE_BYPASS } if (typeof content === "string") {