From 2c2f708138b5bf6dfaaa9d9e99a9138449ceb8a6 Mon Sep 17 00:00:00 2001
From: Elliott de Launay <edelauna@gmail.com>
Date: Tue, 30 Jun 2026 22:44:23 +0000
Subject: [PATCH 1/2] fix(deepseek): reasoning_content handler across deepseek
 model providers

---
 apps/vscode-e2e/fixtures/deepseek-v4.json     |   4 +
 .../src/suite/providers/deepseek-v4.test.ts   |  27 ++++-
 src/api/providers/__tests__/openai.spec.ts    |  39 +++++++
 .../transform/__tests__/openai-format.spec.ts | 103 ++++++++++++++++++
 src/api/transform/openai-format.ts            |  33 +++++-
 5 files changed, 201 insertions(+), 5 deletions(-)

diff --git a/apps/vscode-e2e/fixtures/deepseek-v4.json b/apps/vscode-e2e/fixtures/deepseek-v4.json
index 995dd0951b..a4710207e2 100644
--- a/apps/vscode-e2e/fixtures/deepseek-v4.json
+++ b/apps/vscode-e2e/fixtures/deepseek-v4.json
@@ -7,6 +7,8 @@
 				"sequenceIndex": 0
 			},
 			"response": {
+				"content": "",
+				"reasoning": "I should read the file to find the marker.",
 				"toolCalls": [
 					{
 						"name": "read_file",
@@ -69,6 +71,8 @@
 				"sequenceIndex": 0
 			},
 			"response": {
+				"content": "",
+				"reasoning": "I should read the file to find the marker.",
 				"toolCalls": [
 					{
 						"name": "read_file",
diff --git a/apps/vscode-e2e/src/suite/providers/deepseek-v4.test.ts b/apps/vscode-e2e/src/suite/providers/deepseek-v4.test.ts
index c8704882e0..d484cfe21d 100644
--- a/apps/vscode-e2e/src/suite/providers/deepseek-v4.test.ts
+++ b/apps/vscode-e2e/src/suite/providers/deepseek-v4.test.ts
@@ -19,6 +19,8 @@ type CapturedDeepSeekRequest = {
 	maxCompletionTokens?: number
 	probeTag?: string
 	lastUserMessage: string
+	/** True if any assistant message in the conversation history has a non-empty reasoning_content field. */
+	hasReasoningContentInHistory: boolean
 }
 
 type DeepSeekProbeResult = {
@@ -57,7 +59,7 @@ function getRequestBody(init?: RequestInit):
 			thinking?: { type?: "enabled" | "disabled" }
 			reasoning_effort?: string
 			max_completion_tokens?: number
-			messages?: Array<{ role?: string; content?: unknown }>
+			messages?: Array<{ role?: string; content?: unknown; reasoning_content?: string }>
 	  }
 	| undefined {
 	if (!init?.body || typeof init.body !== "string") {
@@ -83,6 +85,13 @@ function installDeepSeekRequestCapture(capture: CapturedDeepSeekRequest[], baseU
 			const allMessagesText = JSON.stringify(body.messages ?? [])
 			const probeTag = allMessagesText.match(/deepseek-v4-e2e:[^"\s]+/)?.[0]
 
+			const hasReasoningContentInHistory = (body.messages ?? []).some(
+				(message) =>
+					message.role === "assistant" &&
+					typeof message.reasoning_content === "string" &&
+					message.reasoning_content.length > 0,
+			)
+
 			const request = {
 				model: body.model,
 				thinkingType: body.thinking?.type,
@@ -90,6 +99,7 @@ function installDeepSeekRequestCapture(capture: CapturedDeepSeekRequest[], baseU
 				maxCompletionTokens: body.max_completion_tokens,
 				probeTag,
 				lastUserMessage,
+				hasReasoningContentInHistory,
 			} satisfies CapturedDeepSeekRequest
 
 			capture.push(request)
@@ -123,6 +133,7 @@ function formatDiagnostics(result: DeepSeekProbeResult) {
 				thinkingType: request.thinkingType,
 				reasoningEffort: request.reasoningEffort,
 				maxCompletionTokens: request.maxCompletionTokens,
+				hasReasoningContentInHistory: request.hasReasoningContentInHistory,
 				probeTag: request.probeTag,
 				lastUserMessage: request.lastUserMessage.slice(0, 160),
 			}
@@ -368,6 +379,20 @@ suite("DeepSeek V4 provider", function () {
 					firstRequest.reasoningEffort === "high" || firstRequest.reasoningEffort === "max",
 					`Reasoning-enabled probe should send a DeepSeek reasoning_effort.\n${diagnostics}`,
 				)
+
+				// Verify that reasoning_content from turn 1 is round-tripped in the turn 2 request.
+				// DeepSeek's API spec requires reasoning_content to be passed back when thinking mode
+				// is active — omitting it may cause a 400 error depending on model version (issue #201).
+				const secondRequest = result.requests[1]
+				assert.ok(
+					secondRequest,
+					`Reasoning-enabled probe should issue a second request (after tool call).\n${diagnostics}`,
+				)
+				assert.ok(
+					secondRequest.hasReasoningContentInHistory,
+					`Turn 2 request must include reasoning_content on the assistant message from turn 1 ` +
+						`(required by DeepSeek API spec when thinking mode is active — issue #201).\n${diagnostics}`,
+				)
 			} else {
 				assert.strictEqual(
 					firstRequest.thinkingType,
diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts
index 708a131957..332fc39602 100644
--- a/src/api/providers/__tests__/openai.spec.ts
+++ b/src/api/providers/__tests__/openai.spec.ts
@@ -805,6 +805,45 @@ describe("OpenAiHandler", () => {
 				])
 			})
 		})
+
+		it("should include reasoning_content on assistant history messages when preserveReasoning is set", async () => {
+			// Regression guard for issue #201: OpenAI-compatible providers (e.g. DeepSeek via custom
+			// base URL) must pass reasoning_content back in history when thinking mode is active.
+			// This exercises OpenAiHandler -> convertToOpenAiMessages directly.
+			const thinkingHandler = new OpenAiHandler({
+				...mockOptions,
+				openAiCustomModelInfo: {
+					contextWindow: 128_000,
+					supportsPromptCache: false,
+					preserveReasoning: true,
+				},
+			})
+
+			const messagesWithReasoning: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "What files are in the project?" },
+				{
+					role: "assistant",
+					content: [
+						{ type: "reasoning", text: "I should use the read_file tool.", summary: [] } as any,
+						{ type: "tool_use", id: "call_001", name: "read_file", input: { path: "README.md" } },
+					],
+				},
+				{
+					role: "user",
+					content: [{ type: "tool_result", tool_use_id: "call_001", content: "# Project\nHello." }],
+				},
+			]
+
+			const stream = thinkingHandler.createMessage(systemPrompt, messagesWithReasoning)
+			for await (const _chunk of stream) {
+			}
+
+			expect(mockCreate).toHaveBeenCalled()
+			const sentMessages: any[] = mockCreate.mock.calls[0][0].messages
+			const assistantMsg = sentMessages.find((m: any) => m.role === "assistant" && m.tool_calls?.length)
+			expect(assistantMsg).toBeDefined()
+			expect(assistantMsg.reasoning_content).toBe("I should use the read_file tool.")
+		})
 	})
 
 	describe("error handling", () => {
diff --git a/src/api/transform/__tests__/openai-format.spec.ts b/src/api/transform/__tests__/openai-format.spec.ts
index b58deea649..7c0b89b76c 100644
--- a/src/api/transform/__tests__/openai-format.spec.ts
+++ b/src/api/transform/__tests__/openai-format.spec.ts
@@ -1084,6 +1084,109 @@ describe("convertToOpenAiMessages", () => {
 			expect(assistantMessage.reasoning_details[2].data).toBe("encrypted_data")
 		})
 	})
+
+	describe("reasoning_content round-trip for DeepSeek / Z.ai thinking mode", () => {
+		it("should pass through top-level reasoning_content on assistant messages", () => {
+			const anthropicMessages = [
+				{
+					role: "assistant" as const,
+					content: "Here is my answer.",
+					reasoning_content: "Let me think about this carefully...",
+				},
+			] as any as Anthropic.Messages.MessageParam[]
+
+			const result = convertToOpenAiMessages(anthropicMessages)
+
+			expect(result).toHaveLength(1)
+			expect((result[0] as any).reasoning_content).toBe("Let me think about this carefully...")
+		})
+
+		it("should extract reasoning_content from reasoning content block", () => {
+			// buildCleanConversationHistory stores reasoning as a content block when preserveReasoning=true
+			const anthropicMessages = [
+				{
+					role: "assistant" as const,
+					content: [
+						{ type: "reasoning", text: "Let me think...", summary: [] },
+						{ type: "text", text: "My answer." },
+					],
+				},
+			] as any as Anthropic.Messages.MessageParam[]
+
+			const result = convertToOpenAiMessages(anthropicMessages)
+
+			expect(result).toHaveLength(1)
+			const msg = result[0] as any
+			expect(msg.reasoning_content).toBe("Let me think...")
+			expect(msg.content).toBe("My answer.")
+		})
+
+		it("should extract reasoning_content from reasoning block alongside tool calls", () => {
+			// The critical case: DeepSeek thinking + tool call in the same turn.
+			// Without reasoning_content on the second request, DeepSeek returns 400:
+			// "The reasoning_content in the thinking mode must be passed back to the API."
+			const anthropicMessages = [
+				{
+					role: "assistant" as const,
+					content: [
+						{ type: "reasoning", text: "I need to read a file.", summary: [] },
+						{
+							type: "tool_use",
+							id: "call_abc",
+							name: "read_file",
+							input: { path: "foo.txt" },
+						},
+					],
+				},
+			] as any as Anthropic.Messages.MessageParam[]
+
+			const result = convertToOpenAiMessages(anthropicMessages)
+
+			expect(result).toHaveLength(1)
+			const msg = result[0] as any
+			expect(msg.reasoning_content).toBe("I need to read a file.")
+			expect(msg.tool_calls).toHaveLength(1)
+			expect(msg.tool_calls[0].id).toBe("call_abc")
+		})
+
+		it("should prefer top-level reasoning_content over content block", () => {
+			const anthropicMessages = [
+				{
+					role: "assistant" as const,
+					content: [
+						{ type: "reasoning", text: "block reasoning", summary: [] },
+						{ type: "text", text: "answer" },
+					],
+					reasoning_content: "top-level reasoning",
+				},
+			] as any as Anthropic.Messages.MessageParam[]
+
+			const result = convertToOpenAiMessages(anthropicMessages)
+
+			expect((result[0] as any).reasoning_content).toBe("top-level reasoning")
+		})
+
+		it("should not set reasoning_content when there is none", () => {
+			const anthropicMessages: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "assistant",
+					content: [
+						{
+							type: "tool_use",
+							id: "call_abc",
+							name: "read_file",
+							input: { path: "foo.txt" },
+						},
+					],
+				},
+			]
+
+			const result = convertToOpenAiMessages(anthropicMessages)
+
+			expect(result).toHaveLength(1)
+			expect((result[0] as any).reasoning_content).toBeUndefined()
+		})
+	})
 })
 
 describe("consolidateReasoningDetails", () => {
diff --git a/src/api/transform/openai-format.ts b/src/api/transform/openai-format.ts
index 175872c635..c60874d0ec 100644
--- a/src/api/transform/openai-format.ts
+++ b/src/api/transform/openai-format.ts
@@ -307,7 +307,10 @@ export function convertToOpenAiMessages(
 			// If a message also contains reasoning_details (Gemini 3 / xAI / o-series, etc.),
 			// we must preserve it here as well.
 			const messageWithDetails = anthropicMessage as any
-			const baseMessage: OpenAI.Chat.ChatCompletionMessageParam & { reasoning_details?: any[] } = {
+			const baseMessage: OpenAI.Chat.ChatCompletionMessageParam & {
+				reasoning_details?: any[]
+				reasoning_content?: string
+			} = {
 				role: anthropicMessage.role,
 				content: anthropicMessage.content,
 			}
@@ -317,6 +320,10 @@ export function convertToOpenAiMessages(
 				if (mapped) {
 					;(baseMessage as any).reasoning_details = mapped
 				}
+				// Pass through reasoning_content for DeepSeek / Z.ai thinking mode.
+				if (typeof messageWithDetails.reasoning_content === "string" && messageWithDetails.reasoning_content) {
+					baseMessage.reasoning_content = messageWithDetails.reasoning_content
+				}
 			}
 
 			openAiMessages.push(baseMessage)
@@ -450,6 +457,9 @@ export function convertToOpenAiMessages(
 					}
 				}
 			} else if (anthropicMessage.role === "assistant") {
+				const messageWithDetails = anthropicMessage as any
+
+				let extractedReasoning: string | undefined
 				const { nonToolMessages, toolMessages } = anthropicMessage.content.reduce<{
 					nonToolMessages: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[]
 					toolMessages: Anthropic.ToolUseBlockParam[]
@@ -459,6 +469,11 @@ export function convertToOpenAiMessages(
 							acc.toolMessages.push(part)
 						} else if (part.type === "text" || part.type === "image") {
 							acc.nonToolMessages.push(part)
+						} else if ((part as any).type === "reasoning" && (part as any).text) {
+							// Extract reasoning stored as a content block (DeepSeek / Z.ai interleaved thinking).
+							// Must be passed back as top-level reasoning_content so providers like DeepSeek
+							// don't reject the request with "reasoning_content must be passed back to the API".
+							extractedReasoning = (part as any).text
 						} // assistant cannot send tool_result messages
 						return acc
 					},
@@ -489,14 +504,12 @@ export function convertToOpenAiMessages(
 					},
 				}))
 
-				// Check if the message has reasoning_details (used by Gemini 3, xAI, etc.)
-				const messageWithDetails = anthropicMessage as any
-
 				// Build message with reasoning_details BEFORE tool_calls to preserve
 				// the order expected by providers like Roo. Property order matters
 				// when sending messages back to some APIs.
 				const baseMessage: OpenAI.Chat.ChatCompletionAssistantMessageParam & {
 					reasoning_details?: any[]
+					reasoning_content?: string
 				} = {
 					role: "assistant",
 					// Use empty string instead of undefined for providers like Gemini (via OpenRouter)
@@ -511,6 +524,18 @@ export function convertToOpenAiMessages(
 					baseMessage.reasoning_details = mapped
 				}
 
+				// Pass through reasoning_content for providers that require it in history
+				// (e.g. DeepSeek thinking mode: "reasoning_content must be passed back to the API").
+				// Prefer top-level field (already round-tripped); fall back to reasoning from content blocks.
+				const outgoingReasoningContent: string | undefined =
+					(typeof messageWithDetails.reasoning_content === "string" &&
+					messageWithDetails.reasoning_content.length > 0
+						? messageWithDetails.reasoning_content
+						: undefined) ?? extractedReasoning
+				if (outgoingReasoningContent) {
+					baseMessage.reasoning_content = outgoingReasoningContent
+				}
+
 				// Add tool_calls after reasoning_details
 				// Cannot be an empty array. API expects an array with minimum length 1, and will respond with an error if it's empty
 				if (tool_calls.length > 0) {

From 404744ce41c952e6387805577d6d64d8fa49d173 Mon Sep 17 00:00:00 2001
From: Elliott de Launay <edelauna@gmail.com>
Date: Wed, 1 Jul 2026 00:02:56 +0000
Subject: [PATCH 2/2] fix(openai-format): accumulate all parts

---
 .../src/suite/providers/deepseek-v4.test.ts           | 11 +++++++++++
 src/api/transform/openai-format.ts                    |  5 ++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/apps/vscode-e2e/src/suite/providers/deepseek-v4.test.ts b/apps/vscode-e2e/src/suite/providers/deepseek-v4.test.ts
index d484cfe21d..6391c79873 100644
--- a/apps/vscode-e2e/src/suite/providers/deepseek-v4.test.ts
+++ b/apps/vscode-e2e/src/suite/providers/deepseek-v4.test.ts
@@ -404,6 +404,17 @@ suite("DeepSeek V4 provider", function () {
 					undefined,
 					`Reasoning-disabled probe should omit reasoning_effort.\n${diagnostics}`,
 				)
+
+				// Negative guard: reasoning-off requests must never carry reasoning_content,
+				// which would indicate the capture flag itself is broken.
+				const secondRequestOff = result.requests[1]
+				if (secondRequestOff) {
+					assert.strictEqual(
+						secondRequestOff.hasReasoningContentInHistory,
+						false,
+						`Turn 2 request must NOT include reasoning_content when thinking is disabled.\n${diagnostics}`,
+					)
+				}
 			}
 
 			assert.ok(result.completed, `Task should complete cleanly.\n${diagnostics}`)
diff --git a/src/api/transform/openai-format.ts b/src/api/transform/openai-format.ts
index c60874d0ec..8e31f807eb 100644
--- a/src/api/transform/openai-format.ts
+++ b/src/api/transform/openai-format.ts
@@ -473,7 +473,10 @@ export function convertToOpenAiMessages(
 							// Extract reasoning stored as a content block (DeepSeek / Z.ai interleaved thinking).
 							// Must be passed back as top-level reasoning_content so providers like DeepSeek
 							// don't reject the request with "reasoning_content must be passed back to the API".
-							extractedReasoning = (part as any).text
+							// Accumulate all blocks (a turn may have more than one) to preserve order.
+							extractedReasoning = extractedReasoning
+								? extractedReasoning + (part as any).text
+								: (part as any).text
 						} // assistant cannot send tool_result messages
 						return acc
 					},