Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 209 additions & 0 deletions src/api/providers/__tests__/gemini.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,215 @@ describe("GeminiHandler", () => {
})
})

describe("thoughtSignature round-trip (issue #536)", () => {
const systemPrompt = "You are a helpful assistant"
const toolMetadata = { tools: [{ function: { name: "read_file", description: "", parameters: {} } }] } as any

// Helper: build a mock async-iterable stream from chunks
function makeStream(chunks: unknown[]) {
return {
[Symbol.asyncIterator]: async function* () {
for (const chunk of chunks) yield chunk
},
}
}

// Simulate a Gemini 3.x response: thoughtSignature arrives on its own part,
// alongside a functionCall part (the way the real Gemini 3 API returns it).
const turn1Response = makeStream([
{
candidates: [
{
content: {
parts: [
{ thought: true, text: "thinking…" },
{ functionCall: { name: "read_file", args: { path: "foo.ts" } } },
{ thoughtSignature: "sig-abc123" },
],
},
},
],
},
{ usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 } },
])

it("captures thoughtSignature from the stream after turn 1", async () => {
;(handler["client"].models.generateContentStream as any).mockResolvedValue(turn1Response)

const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Read foo.ts" }]

for await (const _chunk of handler.createMessage(systemPrompt, messages, toolMetadata)) {
// drain
}

expect(handler.getThoughtSignature()).toBe("sig-abc123")
})

it("sends thoughtSignature from history on turn 2 (core regression)", async () => {
// This is the bug from issue #536: after turn 1 the thoughtSignature block is
// persisted into apiConversationHistory. On turn 2 the handler must include it
// in the outgoing request, otherwise Gemini 3.x returns an empty response.
const historyAfterTurn1: Anthropic.Messages.MessageParam[] = [
{ role: "user", content: "Read foo.ts" },
{
role: "assistant",
// assistant turn as stored by prepareApiConversationMessage:
// tool_use block + appended thoughtSignature block
content: [
{ type: "tool_use", id: "call-1", name: "read_file", input: { path: "foo.ts" } },
{ type: "thoughtSignature", thoughtSignature: "sig-abc123" } as any,
],
},
{
role: "user",
content: [{ type: "tool_result", tool_use_id: "call-1", content: "file contents here" }],
},
]

;(handler["client"].models.generateContentStream as any).mockResolvedValue(
makeStream([
{ candidates: [{ content: { parts: [{ text: "Done." }] } }] },
{ usageMetadata: { promptTokenCount: 20, candidatesTokenCount: 5 } },
]),
)

for await (const _chunk of handler.createMessage(systemPrompt, historyAfterTurn1, toolMetadata)) {
// drain
}

const callArgs = (handler["client"].models.generateContentStream as any).mock.calls[0][0]
const contents: any[] = callArgs.contents

// The model turn in the outgoing request must carry the thoughtSignature on its functionCall part
const modelTurn = contents.find((c: any) => c.role === "model")
expect(modelTurn).toBeDefined()
const fnPart = modelTurn.parts.find((p: any) => p.functionCall)
expect(fnPart).toBeDefined()
expect(fnPart.thoughtSignature).toBe("sig-abc123")
})

it("falls back to base64-encoded skip_thought_signature_validator when history has no signature", async () => {
// Cross-model history scenario: prior session used a non-Gemini model, no signature stored.
// The fallback bypass token must be base64-encoded because Part.thoughtSignature is
// documented as a base64 field. Vertex AI validates this strictly.
const historyNoSig: Anthropic.Messages.MessageParam[] = [
{ role: "user", content: "Read foo.ts" },
{
role: "assistant",
content: [{ type: "tool_use", id: "call-1", name: "read_file", input: { path: "foo.ts" } }],
},
{
role: "user",
content: [{ type: "tool_result", tool_use_id: "call-1", content: "file contents" }],
},
]

;(handler["client"].models.generateContentStream as any).mockResolvedValue(
makeStream([
{ candidates: [{ content: { parts: [{ text: "Done." }] } }] },
{ usageMetadata: { promptTokenCount: 20, candidatesTokenCount: 5 } },
]),
)

for await (const _chunk of handler.createMessage(systemPrompt, historyNoSig, toolMetadata)) {
// drain
}

const callArgs = (handler["client"].models.generateContentStream as any).mock.calls[0][0]
const contents: any[] = callArgs.contents
const modelTurn = contents.find((c: any) => c.role === "model")
const fnPart = modelTurn?.parts.find((p: any) => p.functionCall)
expect(fnPart).toBeDefined()
const expectedBypass = Buffer.from("skip_thought_signature_validator").toString("base64")
expect(fnPart.thoughtSignature).toBe(expectedBypass)
})

it("sends thoughtSignature even when reasoningEffort is disabled", async () => {
// If the user disables reasoning effort, thinkingConfig=undefined.
// The old code: includeThoughtSignatures = Boolean(thinkingConfig) || Boolean(metadata?.tools?.length)
// With tools present this is still true — but if called with no tools it would be false.
// Verify the signature is sent regardless when tools are in the metadata.
const handlerNoReasoning = new GeminiHandler({
apiKey: "test-key",
geminiApiKey: "test-key",
apiModelId: GEMINI_MODEL_NAME,
reasoningEffort: "disable" as any,
})
handlerNoReasoning["client"] = handler["client"] as any

const historyWithSig: Anthropic.Messages.MessageParam[] = [
{ role: "user", content: "Read foo.ts" },
{
role: "assistant",
content: [
{ type: "tool_use", id: "call-1", name: "read_file", input: { path: "foo.ts" } },
{ type: "thoughtSignature", thoughtSignature: "sig-xyz" } as any,
],
},
{
role: "user",
content: [{ type: "tool_result", tool_use_id: "call-1", content: "file contents" }],
},
]

;(handler["client"].models.generateContentStream as any).mockResolvedValue(
makeStream([
{ candidates: [{ content: { parts: [{ text: "Done." }] } }] },
{ usageMetadata: { promptTokenCount: 20, candidatesTokenCount: 5 } },
]),
)

for await (const _chunk of handlerNoReasoning.createMessage(systemPrompt, historyWithSig, toolMetadata)) {
// drain
}

const callArgs = (handler["client"].models.generateContentStream as any).mock.calls[0][0]
const contents: any[] = callArgs.contents
const modelTurn = contents.find((c: any) => c.role === "model")
const fnPart = modelTurn?.parts.find((p: any) => p.functionCall)
expect(fnPart).toBeDefined()
expect(fnPart.thoughtSignature).toBe("sig-xyz")
})

it("does NOT capture thoughtSignature when there are no tools in metadata", async () => {
// Without tools, includeThoughtSignatures=false when thinkingConfig is also absent.
// This tests the boundary so we don't over-eagerly store signatures for non-tool calls.
const handlerNoReasoning = new GeminiHandler({
apiKey: "test-key",
geminiApiKey: "test-key",
apiModelId: GEMINI_MODEL_NAME,
reasoningEffort: "disable" as any,
})
handlerNoReasoning["client"] = handler["client"] as any
;(handler["client"].models.generateContentStream as any).mockResolvedValue(
makeStream([
{
candidates: [
{
content: {
parts: [
{ functionCall: { name: "read_file", args: { path: "foo.ts" } } },
{ thoughtSignature: "sig-should-not-be-captured" },
],
},
},
],
},
{ usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 } },
]),
)

// No tools in metadata, no thinkingConfig → includeThoughtSignatures=false
for await (const _chunk of handlerNoReasoning.createMessage(systemPrompt, [
{ role: "user", content: "hi" },
])) {
// drain
}

expect(handlerNoReasoning.getThoughtSignature()).toBeUndefined()
})
})

describe("createMessage", () => {
const mockMessages: Anthropic.Messages.MessageParam[] = [
{
Expand Down
6 changes: 3 additions & 3 deletions src/api/providers/lite-llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { ApiHandlerOptions } from "../../shared/api"

import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
import { convertToOpenAiMessages } from "../transform/openai-format"
import { GEMINI_THOUGHT_SIGNATURE_BYPASS } from "../transform/gemini-format"
import { sanitizeOpenAiCallId } from "../../utils/tool-id"

import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
Expand Down Expand Up @@ -72,7 +73,7 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
*
* Per LiteLLM documentation:
* - Thought signatures are stored in provider_specific_fields.thought_signature of tool calls
* - The dummy signature base64("skip_thought_signature_validator") bypasses validation
* - The bypass token (GEMINI_THOUGHT_SIGNATURE_BYPASS) skips signature validation
*
* We inject the dummy signature on EVERY tool call unconditionally to ensure Gemini
* doesn't complain about missing/corrupted signatures when conversation history
Expand All @@ -81,8 +82,7 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
private injectThoughtSignatureForGemini(
openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[],
): OpenAI.Chat.ChatCompletionMessageParam[] {
// Base64 encoded "skip_thought_signature_validator" as per LiteLLM docs
const dummySignature = Buffer.from("skip_thought_signature_validator").toString("base64")
const dummySignature = GEMINI_THOUGHT_SIGNATURE_BYPASS

return openAiMessages.map((msg) => {
if (msg.role === "assistant") {
Expand Down
7 changes: 6 additions & 1 deletion src/api/transform/__tests__/gemini-format.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ describe("convertAnthropicMessageToGemini", () => {

const result = convertAnthropicMessageToGemini(anthropicMessage)

// thoughtSignature must be base64-encoded: the Gemini API documents Part.thoughtSignature
// as "Encoded as base64 string". Sending the raw bypass token without base64 encoding fails
// on Vertex AI (Gemini 3.1/3.5 strict validation), causing empty-response loops on turn 2+.
const expectedBypassToken = Buffer.from("skip_thought_signature_validator").toString("base64")

expect(result).toEqual([
{
role: "model",
Expand All @@ -133,7 +138,7 @@ describe("convertAnthropicMessageToGemini", () => {
name: "calculator",
args: { operation: "add", numbers: [2, 3] },
},
thoughtSignature: "skip_thought_signature_validator",
thoughtSignature: expectedBypassToken,
},
],
},
Expand Down
11 changes: 9 additions & 2 deletions src/api/transform/gemini-format.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import { Anthropic } from "@anthropic-ai/sdk"
import { Content, Part } from "@google/genai"

// Gemini documents Part.thoughtSignature as "Encoded as base64 string". Vertex AI enforces
// this strictly — sending the plain string causes empty responses after the first tool call.
// This bypass token tells Gemini to skip signature validation for cross-model history entries.
export const GEMINI_THOUGHT_SIGNATURE_BYPASS = Buffer.from("skip_thought_signature_validator").toString("base64")

type ThoughtSignatureContentBlock = {
type: "thoughtSignature"
thoughtSignature?: string
Expand Down Expand Up @@ -42,10 +47,12 @@ export function convertAnthropicContentToGemini(
// Determine the signature to attach to function calls.
// If we're in a mode that expects signatures (includeThoughtSignatures is true):
// 1. Use the actual signature if we found one in the history/content.
// 2. Fallback to "skip_thought_signature_validator" if missing (e.g. cross-model history).
// 2. Fallback to a base64-encoded bypass token if missing (e.g. cross-model history).
// Part.thoughtSignature is documented as "Encoded as base64 string" — Vertex AI validates
// this strictly and returns empty responses when a non-base64 value is sent.
let functionCallSignature: string | undefined
if (includeThoughtSignatures) {
functionCallSignature = activeThoughtSignature || "skip_thought_signature_validator"
functionCallSignature = activeThoughtSignature || GEMINI_THOUGHT_SIGNATURE_BYPASS
}

if (typeof content === "string") {
Expand Down
Loading