diff --git a/apps/vscode-e2e/fixtures/deepseek-v4.json b/apps/vscode-e2e/fixtures/deepseek-v4.json index 995dd0951..a4710207e 100644 --- a/apps/vscode-e2e/fixtures/deepseek-v4.json +++ b/apps/vscode-e2e/fixtures/deepseek-v4.json @@ -7,6 +7,8 @@ "sequenceIndex": 0 }, "response": { + "content": "", + "reasoning": "I should read the file to find the marker.", "toolCalls": [ { "name": "read_file", @@ -69,6 +71,8 @@ "sequenceIndex": 0 }, "response": { + "content": "", + "reasoning": "I should read the file to find the marker.", "toolCalls": [ { "name": "read_file", diff --git a/apps/vscode-e2e/src/suite/providers/deepseek-v4.test.ts b/apps/vscode-e2e/src/suite/providers/deepseek-v4.test.ts index c8704882e..6391c7987 100644 --- a/apps/vscode-e2e/src/suite/providers/deepseek-v4.test.ts +++ b/apps/vscode-e2e/src/suite/providers/deepseek-v4.test.ts @@ -19,6 +19,8 @@ type CapturedDeepSeekRequest = { maxCompletionTokens?: number probeTag?: string lastUserMessage: string + /** True if any assistant message in the conversation history has a non-empty reasoning_content field. */ + hasReasoningContentInHistory: boolean } type DeepSeekProbeResult = { @@ -57,7 +59,7 @@ function getRequestBody(init?: RequestInit): thinking?: { type?: "enabled" | "disabled" } reasoning_effort?: string max_completion_tokens?: number - messages?: Array<{ role?: string; content?: unknown }> + messages?: Array<{ role?: string; content?: unknown; reasoning_content?: string }> } | undefined { if (!init?.body || typeof init.body !== "string") { @@ -83,6 +85,13 @@ function installDeepSeekRequestCapture(capture: CapturedDeepSeekRequest[], baseU const allMessagesText = JSON.stringify(body.messages ?? []) const probeTag = allMessagesText.match(/deepseek-v4-e2e:[^"\s]+/)?.[0] + const hasReasoningContentInHistory = (body.messages ?? []).some( + (message) => + message.role === "assistant" && + typeof message.reasoning_content === "string" && + message.reasoning_content.length > 0, + ) + const request = { model: body.model, thinkingType: body.thinking?.type, @@ -90,6 +99,7 @@ function installDeepSeekRequestCapture(capture: CapturedDeepSeekRequest[], baseU maxCompletionTokens: body.max_completion_tokens, probeTag, lastUserMessage, + hasReasoningContentInHistory, } satisfies CapturedDeepSeekRequest capture.push(request) @@ -123,6 +133,7 @@ function formatDiagnostics(result: DeepSeekProbeResult) { thinkingType: request.thinkingType, reasoningEffort: request.reasoningEffort, maxCompletionTokens: request.maxCompletionTokens, + hasReasoningContentInHistory: request.hasReasoningContentInHistory, probeTag: request.probeTag, lastUserMessage: request.lastUserMessage.slice(0, 160), } @@ -368,6 +379,20 @@ suite("DeepSeek V4 provider", function () { firstRequest.reasoningEffort === "high" || firstRequest.reasoningEffort === "max", `Reasoning-enabled probe should send a DeepSeek reasoning_effort.\n${diagnostics}`, ) + + // Verify that reasoning_content from turn 1 is round-tripped in the turn 2 request. + // DeepSeek's API spec requires reasoning_content to be passed back when thinking mode + // is active — omitting it may cause a 400 error depending on model version (issue #201). + const secondRequest = result.requests[1] + assert.ok( + secondRequest, + `Reasoning-enabled probe should issue a second request (after tool call).\n${diagnostics}`, + ) + assert.ok( + secondRequest.hasReasoningContentInHistory, + `Turn 2 request must include reasoning_content on the assistant message from turn 1 ` + + `(required by DeepSeek API spec when thinking mode is active — issue #201).\n${diagnostics}`, + ) } else { assert.strictEqual( firstRequest.thinkingType, @@ -379,6 +404,17 @@ suite("DeepSeek V4 provider", function () { undefined, `Reasoning-disabled probe should omit reasoning_effort.\n${diagnostics}`, ) + + // Negative guard: reasoning-off requests must never carry reasoning_content, + // which would indicate the capture flag itself is broken. + const secondRequestOff = result.requests[1] + if (secondRequestOff) { + assert.strictEqual( + secondRequestOff.hasReasoningContentInHistory, + false, + `Turn 2 request must NOT include reasoning_content when thinking is disabled.\n${diagnostics}`, + ) + } } assert.ok(result.completed, `Task should complete cleanly.\n${diagnostics}`) diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index 708a13195..332fc3960 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -805,6 +805,45 @@ describe("OpenAiHandler", () => { ]) }) }) + + it("should include reasoning_content on assistant history messages when preserveReasoning is set", async () => { + // Regression guard for issue #201: OpenAI-compatible providers (e.g. DeepSeek via custom + // base URL) must pass reasoning_content back in history when thinking mode is active. + // This exercises OpenAiHandler -> convertToOpenAiMessages directly. + const thinkingHandler = new OpenAiHandler({ + ...mockOptions, + openAiCustomModelInfo: { + contextWindow: 128_000, + supportsPromptCache: false, + preserveReasoning: true, + }, + }) + + const messagesWithReasoning: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "What files are in the project?" }, + { + role: "assistant", + content: [ + { type: "reasoning", text: "I should use the read_file tool.", summary: [] } as any, + { type: "tool_use", id: "call_001", name: "read_file", input: { path: "README.md" } }, + ], + }, + { + role: "user", + content: [{ type: "tool_result", tool_use_id: "call_001", content: "# Project\nHello." }], + }, + ] + + const stream = thinkingHandler.createMessage(systemPrompt, messagesWithReasoning) + for await (const _chunk of stream) { + } + + expect(mockCreate).toHaveBeenCalled() + const sentMessages: any[] = mockCreate.mock.calls[0][0].messages + const assistantMsg = sentMessages.find((m: any) => m.role === "assistant" && m.tool_calls?.length) + expect(assistantMsg).toBeDefined() + expect(assistantMsg.reasoning_content).toBe("I should use the read_file tool.") + }) }) describe("error handling", () => { diff --git a/src/api/transform/__tests__/openai-format.spec.ts b/src/api/transform/__tests__/openai-format.spec.ts index b58deea64..214531312 100644 --- a/src/api/transform/__tests__/openai-format.spec.ts +++ b/src/api/transform/__tests__/openai-format.spec.ts @@ -1084,6 +1084,157 @@ describe("convertToOpenAiMessages", () => { expect(assistantMessage.reasoning_details[2].data).toBe("encrypted_data") }) }) + + describe("reasoning_content round-trip for DeepSeek / Z.ai thinking mode", () => { + it("should pass through top-level reasoning_content on assistant messages", () => { + const anthropicMessages = [ + { + role: "assistant" as const, + content: "Here is my answer.", + reasoning_content: "Let me think about this carefully...", + }, + ] as any as Anthropic.Messages.MessageParam[] + + const result = convertToOpenAiMessages(anthropicMessages) + + expect(result).toHaveLength(1) + expect((result[0] as any).reasoning_content).toBe("Let me think about this carefully...") + }) + + it("should extract reasoning_content from reasoning content block", () => { + // buildCleanConversationHistory stores reasoning as a content block when preserveReasoning=true + const anthropicMessages = [ + { + role: "assistant" as const, + content: [ + { type: "reasoning", text: "Let me think...", summary: [] }, + { type: "text", text: "My answer." }, + ], + }, + ] as any as Anthropic.Messages.MessageParam[] + + const result = convertToOpenAiMessages(anthropicMessages) + + expect(result).toHaveLength(1) + const msg = result[0] as any + expect(msg.reasoning_content).toBe("Let me think...") + expect(msg.content).toBe("My answer.") + }) + + it("should extract reasoning_content from reasoning block alongside tool calls", () => { + // The critical case: DeepSeek thinking + tool call in the same turn. + // Without reasoning_content on the second request, DeepSeek returns 400: + // "The reasoning_content in the thinking mode must be passed back to the API." + const anthropicMessages = [ + { + role: "assistant" as const, + content: [ + { type: "reasoning", text: "I need to read a file.", summary: [] }, + { + type: "tool_use", + id: "call_abc", + name: "read_file", + input: { path: "foo.txt" }, + }, + ], + }, + ] as any as Anthropic.Messages.MessageParam[] + + const result = convertToOpenAiMessages(anthropicMessages) + + expect(result).toHaveLength(1) + const msg = result[0] as any + expect(msg.reasoning_content).toBe("I need to read a file.") + expect(msg.tool_calls).toHaveLength(1) + expect(msg.tool_calls[0].id).toBe("call_abc") + }) + + it("should accumulate multiple reasoning blocks in order, separated by a tool call", () => { + // DeepSeek / Z.ai interleaved thinking can emit more than one reasoning block per + // turn. A regression that overwrites (instead of accumulates) would silently drop + // all but the last block. + const anthropicMessages = [ + { + role: "assistant" as const, + content: [ + { type: "reasoning", text: "First, I should check the file.", summary: [] }, + { + type: "tool_use", + id: "call_abc", + name: "read_file", + input: { path: "foo.txt" }, + }, + { type: "reasoning", text: "Now I know what to do next.", summary: [] }, + ], + }, + ] as any as Anthropic.Messages.MessageParam[] + + const result = convertToOpenAiMessages(anthropicMessages) + + expect(result).toHaveLength(1) + const msg = result[0] as any + expect(msg.reasoning_content).toBe("First, I should check the file.Now I know what to do next.") + expect(msg.tool_calls).toHaveLength(1) + expect(msg.tool_calls[0].id).toBe("call_abc") + }) + + it("should prefer top-level reasoning_content over content block", () => { + const anthropicMessages = [ + { + role: "assistant" as const, + content: [ + { type: "reasoning", text: "block reasoning", summary: [] }, + { type: "text", text: "answer" }, + ], + reasoning_content: "top-level reasoning", + }, + ] as any as Anthropic.Messages.MessageParam[] + + const result = convertToOpenAiMessages(anthropicMessages) + + expect((result[0] as any).reasoning_content).toBe("top-level reasoning") + }) + + it("should not set reasoning_content when there is none", () => { + const anthropicMessages: Anthropic.Messages.MessageParam[] = [ + { + role: "assistant", + content: [ + { + type: "tool_use", + id: "call_abc", + name: "read_file", + input: { path: "foo.txt" }, + }, + ], + }, + ] + + const result = convertToOpenAiMessages(anthropicMessages) + + expect(result).toHaveLength(1) + expect((result[0] as any).reasoning_content).toBeUndefined() + }) + + it("should ignore non-object content parts without crashing (defensive guard)", () => { + // getReasoningBlockText guards against non-object parts (e.g. a stray + // string in the content array). Such parts are not reasoning blocks and + // must be skipped rather than crashing or being misread as reasoning. + const anthropicMessages = [ + { + role: "assistant" as const, + content: ["not-a-block" as any, { type: "text", text: "answer" }], + }, + ] as any as Anthropic.Messages.MessageParam[] + + const result = convertToOpenAiMessages(anthropicMessages) + + expect(result).toHaveLength(1) + const msg = result[0] as any + expect(msg.content).toBe("answer") + expect(msg.reasoning_content).toBeUndefined() + }) + }) }) describe("consolidateReasoningDetails", () => { diff --git a/src/api/transform/openai-format.ts b/src/api/transform/openai-format.ts index 175872c63..c06064f1d 100644 --- a/src/api/transform/openai-format.ts +++ b/src/api/transform/openai-format.ts @@ -253,6 +253,52 @@ export function sanitizeGeminiMessages( return sanitized } +/** + * A reasoning content block (DeepSeek / Z.ai interleaved thinking) that may + * appear in an assistant message's content array. Not part of Anthropic's + * ContentBlockParam union, so it's declared separately here. + */ +interface ReasoningContentBlock { + type: "reasoning" + text?: string +} + +/** + * Extracts the reasoning text if `part` is a {@link ReasoningContentBlock}, or `undefined` + * otherwise. Providers like DeepSeek / Z.ai emit a `"reasoning"` content block that isn't part of + * Anthropic's `ContentBlockParam` union, so `part` is treated as `unknown` and its shape is + * validated at runtime rather than assumed via a cast. A boolean type-guard isn't viable here: + * TypeScript can't narrow `part` to `ReasoningContentBlock` because their `type` literals don't + * overlap with any variant of `ContentBlockParam`, so the narrowed type collapses to `never`. + */ +function getReasoningBlockText(part: unknown): string | undefined { + if (!part || typeof part !== "object") { + return undefined + } + const block = part as { type?: unknown; text?: unknown } + return block.type === "reasoning" && typeof block.text === "string" ? block.text : undefined +} + +/** + * Non-standard fields Zoo Code attaches to Anthropic message params to + * round-trip provider-specific reasoning state (e.g. DeepSeek's + * `reasoning_content`, which must be echoed back or the API returns a 400). + */ +type AssistantMessageWithReasoning = Anthropic.Messages.MessageParam & { + reasoning_details?: any[] + reasoning_content?: string +} + +/** + * Extra fields layered onto an outgoing OpenAI chat message to round-trip + * provider-specific reasoning state. Shared by both branches of the + * conversion below (string-content and content-block assistant messages). + */ +type ReasoningPassthroughFields = { + reasoning_details?: any[] + reasoning_content?: string +} + /** * Options for converting Anthropic messages to OpenAI format. */ @@ -306,8 +352,8 @@ export function convertToOpenAiMessages( // will convert a single text block into a string for compactness. // If a message also contains reasoning_details (Gemini 3 / xAI / o-series, etc.), // we must preserve it here as well. - const messageWithDetails = anthropicMessage as any - const baseMessage: OpenAI.Chat.ChatCompletionMessageParam & { reasoning_details?: any[] } = { + const messageWithDetails = anthropicMessage as AssistantMessageWithReasoning + const baseMessage: OpenAI.Chat.ChatCompletionMessageParam & ReasoningPassthroughFields = { role: anthropicMessage.role, content: anthropicMessage.content, } @@ -315,7 +361,11 @@ export function convertToOpenAiMessages( if (anthropicMessage.role === "assistant") { const mapped = mapReasoningDetails(messageWithDetails.reasoning_details) if (mapped) { - ;(baseMessage as any).reasoning_details = mapped + baseMessage.reasoning_details = mapped + } + // Pass through reasoning_content for DeepSeek / Z.ai thinking mode. + if (typeof messageWithDetails.reasoning_content === "string" && messageWithDetails.reasoning_content) { + baseMessage.reasoning_content = messageWithDetails.reasoning_content } } @@ -450,6 +500,9 @@ export function convertToOpenAiMessages( } } } else if (anthropicMessage.role === "assistant") { + const messageWithDetails = anthropicMessage as AssistantMessageWithReasoning + + let extractedReasoning: string | undefined const { nonToolMessages, toolMessages } = anthropicMessage.content.reduce<{ nonToolMessages: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[] toolMessages: Anthropic.ToolUseBlockParam[] @@ -459,6 +512,17 @@ export function convertToOpenAiMessages( acc.toolMessages.push(part) } else if (part.type === "text" || part.type === "image") { acc.nonToolMessages.push(part) + } else { + const reasoningText = getReasoningBlockText(part) + if (reasoningText) { + // Extract reasoning stored as a content block (DeepSeek / Z.ai interleaved thinking). + // Must be passed back as top-level reasoning_content so providers like DeepSeek + // don't reject the request with "reasoning_content must be passed back to the API". + // Accumulate all blocks (a turn may have more than one) to preserve order. + extractedReasoning = extractedReasoning + ? extractedReasoning + reasoningText + : reasoningText + } } // assistant cannot send tool_result messages return acc }, @@ -489,15 +553,10 @@ export function convertToOpenAiMessages( }, })) - // Check if the message has reasoning_details (used by Gemini 3, xAI, etc.) - const messageWithDetails = anthropicMessage as any - // Build message with reasoning_details BEFORE tool_calls to preserve // the order expected by providers like Roo. Property order matters // when sending messages back to some APIs. - const baseMessage: OpenAI.Chat.ChatCompletionAssistantMessageParam & { - reasoning_details?: any[] - } = { + const baseMessage: OpenAI.Chat.ChatCompletionAssistantMessageParam & ReasoningPassthroughFields = { role: "assistant", // Use empty string instead of undefined for providers like Gemini (via OpenRouter) // that require every message to have content in the "parts" field @@ -511,6 +570,18 @@ export function convertToOpenAiMessages( baseMessage.reasoning_details = mapped } + // Pass through reasoning_content for providers that require it in history + // (e.g. DeepSeek thinking mode: "reasoning_content must be passed back to the API"). + // Prefer top-level field (already round-tripped); fall back to reasoning from content blocks. + const outgoingReasoningContent: string | undefined = + (typeof messageWithDetails.reasoning_content === "string" && + messageWithDetails.reasoning_content.length > 0 + ? messageWithDetails.reasoning_content + : undefined) ?? extractedReasoning + if (outgoingReasoningContent) { + baseMessage.reasoning_content = outgoingReasoningContent + } + // Add tool_calls after reasoning_details // Cannot be an empty array. API expects an array with minimum length 1, and will respond with an error if it's empty if (tool_calls.length > 0) {