From c25488559ef1bbb61d64441d3ccec54ae6985b8e Mon Sep 17 00:00:00 2001
From: Naved <naved.merchant@gmail.com>
Date: Thu, 18 Jun 2026 22:36:33 -0700
Subject: [PATCH 1/8] opencode go

---
 packages/types/src/providers/opencode-go.ts   | 278 ++++++++++++++++++
 .../providers/__tests__/opencode-go.spec.ts   |  80 ++++-
 .../fetchers/__tests__/opencode-go.spec.ts    |  88 +++++-
 src/api/providers/fetchers/opencode-go.ts     |  60 +++-
 src/api/providers/opencode-go.ts              |  68 ++++-
 5 files changed, 533 insertions(+), 41 deletions(-)

diff --git a/packages/types/src/providers/opencode-go.ts b/packages/types/src/providers/opencode-go.ts
index 0efabcf155..fc165c3362 100644
--- a/packages/types/src/providers/opencode-go.ts
+++ b/packages/types/src/providers/opencode-go.ts
@@ -20,3 +20,281 @@ export const opencodeGoDefaultModelInfo: ModelInfo = {
 }
 
 export const OPENCODE_GO_DEFAULT_TEMPERATURE = 0
+
+/**
+ * Native per-model configuration for the Opencode Go plan.
+ *
+ * The Go `/v1/models` endpoint only reliably returns `id` and (sometimes)
+ * `context_window`/`max_tokens`. It does NOT advertise capability flags such
+ * as `supportsReasoningEffort`, `preserveReasoning`, `supportsMaxTokens`,
+ * `supportsPromptCache`, or pricing — all of which are required for the
+ * extension to drive reasoning controls, interleaved-thinking tool calls,
+ * the max-output-tokens slider, and accurate cost reporting.
+ *
+ * This registry encodes the native capabilities of each curated Go model,
+ * sourced from the same vendor specs used by the dedicated providers
+ * (zai/moonshot/mimo/minimax/deepseek/qwen) and the Go pricing table at
+ * https://opencode.ai/docs/go/#usage-limits. The fetcher merges the live
+ * `/models` payload on top of these defaults so that context-window and
+ * max-token values stay in sync with the gateway while capability flags and
+ * pricing remain correct.
+ *
+ * `supportsPromptCache` is intentionally `true` for models whose Go pricing
+ * table lists a "Cached Read" price: the gateway honours server-side caching
+ * and reports `cached_tokens` in usage, which the handler forwards for cost
+ * calculation. Client-side `cache_control` injection is not used on this path.
+ */
+export const opencodeGoModels: Record<string, ModelInfo> = {
+	// --- Zhipu GLM ---
+	"glm-5": {
+		maxTokens: 16_384,
+		contextWindow: 202_752,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsReasoningEffort: ["disable", "medium"],
+		reasoningEffort: "medium",
+		preserveReasoning: true,
+		inputPrice: 1.0,
+		outputPrice: 3.2,
+		cacheReadsPrice: 0.2,
+		description:
+			"GLM-5 is Zhipu's next-generation model with a 202k context window and built-in thinking capabilities. Available via the Opencode Go plan.",
+	},
+	"glm-5.1": {
+		maxTokens: 131_072,
+		contextWindow: 204_800,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsMaxTokens: true,
+		supportsReasoningEffort: ["disable", "medium"],
+		reasoningEffort: "medium",
+		preserveReasoning: true,
+		inputPrice: 1.4,
+		outputPrice: 4.4,
+		cacheReadsPrice: 0.26,
+		description:
+			"GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. Available via the Opencode Go plan.",
+	},
+	"glm-5.2": {
+		maxTokens: 131_072,
+		contextWindow: 1_000_000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsMaxTokens: true,
+		supportsReasoningEffort: ["disable", "high", "max"],
+		reasoningEffort: "high",
+		preserveReasoning: true,
+		// Go pricing matches GLM-5.1 ($1.4 / $0.26 cache / $4.4 out per 1M tokens).
+		inputPrice: 1.4,
+		outputPrice: 4.4,
+		cacheReadsPrice: 0.26,
+		description:
+			"GLM-5.2 is Zhipu's flagship model with a 1M context window, 128k max output, and dual thinking-effort modes (High/Max). It delivers top-tier long-context reasoning, coding, and agentic performance. Available via the Opencode Go plan.",
+	},
+
+	// --- Moonshot Kimi ---
+	"kimi-k2.5": {
+		maxTokens: 16_384,
+		contextWindow: 262_144,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsTemperature: true,
+		defaultTemperature: 1.0,
+		inputPrice: 0.6,
+		outputPrice: 3.0,
+		cacheReadsPrice: 0.1,
+		description:
+			"Kimi K2.5 is the latest generation of Moonshot AI's Kimi series, featuring improved reasoning capabilities. Available via the Opencode Go plan.",
+	},
+	"kimi-k2.6": {
+		maxTokens: 16_384,
+		contextWindow: 262_144,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsTemperature: true,
+		defaultTemperature: 1.0,
+		inputPrice: 0.95,
+		outputPrice: 4.0,
+		cacheReadsPrice: 0.16,
+		description:
+			"Kimi K2.6 is Moonshot AI's native multimodal agentic MoE model with a 256k context window, built for long-horizon coding and tool use. Available via the Opencode Go plan.",
+	},
+
+	// --- Xiaomi MiMo ---
+	"mimo-v2.5": {
+		maxTokens: 131_072,
+		contextWindow: 1_048_576,
+		supportsImages: true,
+		supportsPromptCache: false,
+		preserveReasoning: true,
+		inputPrice: 0.14,
+		outputPrice: 0.28,
+		cacheReadsPrice: 0.0028,
+		longContextPricing: {
+			thresholdTokens: 256_000,
+			inputPriceMultiplier: 2,
+			outputPriceMultiplier: 2,
+			cacheReadsPriceMultiplier: 2,
+		},
+		description:
+			"MiMo V2.5 - Xiaomi's full-modal understanding model (text, image, audio, video) with 1M context, deep thinking, and tool calling. Available via the Opencode Go plan.",
+	},
+	"mimo-v2.5-pro": {
+		maxTokens: 131_072,
+		contextWindow: 1_048_576,
+		supportsImages: false,
+		supportsPromptCache: false,
+		preserveReasoning: true,
+		inputPrice: 1.74,
+		outputPrice: 3.48,
+		cacheReadsPrice: 0.0145,
+		longContextPricing: {
+			thresholdTokens: 256_000,
+			inputPriceMultiplier: 2,
+			outputPriceMultiplier: 2,
+			cacheReadsPriceMultiplier: 2,
+		},
+		description:
+			"MiMo V2.5 Pro - Xiaomi's flagship reasoning model with 1M context, deep thinking, and tool calling. Available via the Opencode Go plan.",
+	},
+
+	// --- MiniMax ---
+	"minimax-m2.5": {
+		maxTokens: 16_384,
+		contextWindow: 204_800,
+		supportsImages: false,
+		supportsPromptCache: true,
+		includedTools: ["search_and_replace"],
+		excludedTools: ["apply_diff"],
+		preserveReasoning: true,
+		inputPrice: 0.3,
+		outputPrice: 1.2,
+		cacheWritesPrice: 0.375,
+		cacheReadsPrice: 0.06,
+		description:
+			"MiniMax M2.5, the latest MiniMax model with enhanced coding and agentic capabilities. Available via the Opencode Go plan.",
+	},
+	"minimax-m2.7": {
+		maxTokens: 16_384,
+		contextWindow: 204_800,
+		supportsImages: false,
+		supportsPromptCache: true,
+		includedTools: ["search_and_replace"],
+		excludedTools: ["apply_diff"],
+		preserveReasoning: true,
+		inputPrice: 0.3,
+		outputPrice: 1.2,
+		cacheWritesPrice: 0.375,
+		cacheReadsPrice: 0.06,
+		description:
+			"MiniMax M2.7, the latest MiniMax model with recursive self-improvement capabilities. Available via the Opencode Go plan.",
+	},
+	"minimax-m3": {
+		maxTokens: 131_072,
+		contextWindow: 1_000_000,
+		supportsImages: true,
+		supportsPromptCache: true,
+		includedTools: ["search_and_replace"],
+		excludedTools: ["apply_diff"],
+		preserveReasoning: true,
+		inputPrice: 0.3,
+		outputPrice: 1.2,
+		cacheReadsPrice: 0.06,
+		description:
+			"MiniMax M3, a frontier multimodal coding model with a 1M context window, agentic reasoning, and tool use. Available via the Opencode Go plan.",
+	},
+
+	// --- Alibaba Qwen ---
+	"qwen3.6-plus": {
+		maxTokens: 65_536,
+		contextWindow: 1_000_000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		preserveReasoning: true,
+		inputPrice: 0.5,
+		outputPrice: 3.0,
+		cacheReadsPrice: 0.05,
+		cacheWritesPrice: 0.625,
+		longContextPricing: {
+			thresholdTokens: 256_000,
+			inputPriceMultiplier: 4,
+			outputPriceMultiplier: 2,
+			cacheReadsPriceMultiplier: 4,
+			cacheWritesPriceMultiplier: 4,
+		},
+		description:
+			"Qwen3.6 Plus - Alibaba's balanced coding and reasoning model with a 1M context window. Available via the Opencode Go plan.",
+	},
+	"qwen3.7-plus": {
+		maxTokens: 65_536,
+		contextWindow: 1_000_000,
+		supportsImages: true,
+		supportsPromptCache: true,
+		preserveReasoning: true,
+		inputPrice: 0.4,
+		outputPrice: 1.6,
+		cacheReadsPrice: 0.04,
+		cacheWritesPrice: 0.5,
+		longContextPricing: {
+			thresholdTokens: 256_000,
+			inputPriceMultiplier: 3,
+			outputPriceMultiplier: 3,
+			cacheReadsPriceMultiplier: 3,
+			cacheWritesPriceMultiplier: 3,
+		},
+		description:
+			"Qwen3.7 Plus - Alibaba's multimodal reasoning model with a 1M context window and low-cost agentic coding. Available via the Opencode Go plan.",
+	},
+	"qwen3.7-max": {
+		maxTokens: 65_536,
+		contextWindow: 1_000_000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		preserveReasoning: true,
+		inputPrice: 2.5,
+		outputPrice: 7.5,
+		cacheReadsPrice: 0.5,
+		cacheWritesPrice: 3.125,
+		description:
+			"Qwen3.7 Max - Alibaba's flagship text-only reasoning agent model with a 1M context window, designed for long-horizon agent workflows. Available via the Opencode Go plan.",
+	},
+
+	// --- DeepSeek ---
+	"deepseek-v4-pro": {
+		maxTokens: 384_000,
+		contextWindow: 1_000_000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsReasoningEffort: ["disable", "low", "medium", "high", "xhigh"],
+		preserveReasoning: true,
+		reasoningEffort: "high",
+		inputPrice: 1.74,
+		outputPrice: 3.48,
+		cacheReadsPrice: 0.0145,
+		description:
+			"DeepSeek-V4-Pro is DeepSeek's strongest V4 model for reasoning, coding, long-context, and agentic workloads. Available via the Opencode Go plan.",
+	},
+	"deepseek-v4-flash": {
+		maxTokens: 384_000,
+		contextWindow: 1_000_000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsReasoningEffort: ["disable", "low", "medium", "high", "xhigh"],
+		preserveReasoning: true,
+		reasoningEffort: "high",
+		inputPrice: 0.14,
+		outputPrice: 0.28,
+		cacheReadsPrice: 0.0028,
+		description:
+			"DeepSeek-V4-Flash is DeepSeek's fast, cost-efficient V4 model supporting thinking and non-thinking modes. Available via the Opencode Go plan.",
+	},
+}
+
+/**
+ * Returns the native {@link ModelInfo} for a Go-plan model ID, or `undefined`
+ * when the ID is not part of the curated registry. Callers should fall back to
+ * {@link opencodeGoDefaultModelInfo} when this returns `undefined`.
+ */
+export function getOpencodeGoModelInfo(modelId: string): ModelInfo | undefined {
+	return opencodeGoModels[modelId]
+}
diff --git a/src/api/providers/__tests__/opencode-go.spec.ts b/src/api/providers/__tests__/opencode-go.spec.ts
index 2877abd36b..3d7cfdda09 100644
--- a/src/api/providers/__tests__/opencode-go.spec.ts
+++ b/src/api/providers/__tests__/opencode-go.spec.ts
@@ -6,7 +6,7 @@ vitest.mock("vscode", () => ({}))
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
 
-import { opencodeGoDefaultModelId } from "@roo-code/types"
+import { opencodeGoDefaultModelId, opencodeGoModels } from "@roo-code/types"
 
 import { OpencodeGoHandler } from "../opencode-go"
 import { ApiHandlerOptions } from "../../../shared/api"
@@ -20,13 +20,9 @@ vitest.mock("delay", () => ({
 vitest.mock("../fetchers/modelCache", () => ({
 	getModels: vitest.fn().mockImplementation(function () {
 		return Promise.resolve({
-			"glm-5.1": {
-				maxTokens: 32768,
-				contextWindow: 200000,
-				supportsImages: false,
-				supportsPromptCache: false,
-				description: "GLM 5.1",
-			},
+			// Use the native registry entry so capability flags (reasoning
+			// effort, preserveReasoning, prompt cache) are exercised.
+			"glm-5.1": { ...opencodeGoModels["glm-5.1"] },
 		})
 	}),
 	getModelsFromCache: vitest.fn().mockReturnValue(undefined),
@@ -63,13 +59,17 @@ describe("OpencodeGoHandler", () => {
 	})
 
 	describe("fetchModel", () => {
-		it("returns the configured model info", async () => {
+		it("returns the configured model info with native capability flags", async () => {
 			const handler = new OpencodeGoHandler(mockOptions)
 			const result = await handler.fetchModel()
 			expect(result.id).toBe("glm-5.1")
-			expect(result.info.maxTokens).toBe(32768)
-			expect(result.info.contextWindow).toBe(200000)
-			expect(result.info.supportsPromptCache).toBe(false)
+			// Native registry values for glm-5.1.
+			expect(result.info.maxTokens).toBe(131_072)
+			expect(result.info.contextWindow).toBe(204_800)
+			expect(result.info.supportsPromptCache).toBe(true)
+			expect(result.info.supportsReasoningEffort).toEqual(["disable", "medium"])
+			expect(result.info.preserveReasoning).toBe(true)
+			expect(result.info.supportsMaxTokens).toBe(true)
 		})
 
 		it("falls back to the default model id when none is configured", async () => {
@@ -141,7 +141,7 @@ describe("OpencodeGoHandler", () => {
 			})
 		})
 
-		it("requests a streaming completion with usage included", async () => {
+		it("requests a streaming completion with usage included and native max tokens", async () => {
 			const handler = new OpencodeGoHandler(mockOptions)
 			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
 			for await (const _chunk of handler.createMessage("sys", messages)) {
@@ -153,12 +153,60 @@ describe("OpencodeGoHandler", () => {
 					model: "glm-5.1",
 					stream: true,
 					stream_options: { include_usage: true },
-					max_completion_tokens: 32768,
+					// glm-5.1 maxTokens (131_072) is clamped to 20% of its 204_800
+					// context window => 40_960.
+					max_completion_tokens: 40_960,
 					temperature: expect.any(Number),
 				}),
 			)
 		})
 
+		it("forwards the model's default reasoning_effort for reasoning-capable models", async () => {
+			const handler = new OpencodeGoHandler(mockOptions)
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+			for await (const _chunk of handler.createMessage("sys", messages)) {
+				void _chunk // drain
+			}
+
+			// glm-5.1 advertises supportsReasoningEffort with a default of "medium".
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "glm-5.1",
+					reasoning_effort: "medium",
+				}),
+			)
+		})
+
+		it("omits reasoning_effort when the user disables reasoning", async () => {
+			const handler = new OpencodeGoHandler({ ...mockOptions, reasoningEffort: "disable" })
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+			for await (const _chunk of handler.createMessage("sys", messages)) {
+				void _chunk // drain
+			}
+
+			const callArgs = mockCreate.mock.calls[0][0] as Record<string, unknown>
+			expect(callArgs.reasoning_effort).toBeUndefined()
+		})
+
+		it("uses convertToR1Format for preserveReasoning models to keep interleaved thinking", async () => {
+			const handler = new OpencodeGoHandler(mockOptions)
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: [{ type: "text", text: "Hi" }],
+				},
+			]
+			for await (const _chunk of handler.createMessage("sys", messages)) {
+				void _chunk // drain
+			}
+
+			const callArgs = mockCreate.mock.calls[0][0] as { messages: Array<{ role: string }> }
+			// The system prompt is prepended, then the R1-converted user message.
+			expect(callArgs.messages[0]).toEqual({ role: "system", content: "sys" })
+			// convertToR1Format keeps a single user turn as one user message.
+			expect(callArgs.messages.filter((m) => m.role === "user")).toHaveLength(1)
+		})
+
 		it("streams reasoning chunks from delta.reasoning_content", async () => {
 			mockCreate.mockImplementationOnce(async () => ({
 				[Symbol.asyncIterator]: async function* () {
@@ -247,7 +295,9 @@ describe("OpencodeGoHandler", () => {
 				expect.objectContaining({
 					model: "glm-5.1",
 					stream: false,
-					max_completion_tokens: 32768,
+					// glm-5.1 maxTokens (131_072) clamped to 20% of 204_800 => 40_960.
+					max_completion_tokens: 40_960,
+					reasoning_effort: "medium",
 				}),
 			)
 		})
diff --git a/src/api/providers/fetchers/__tests__/opencode-go.spec.ts b/src/api/providers/fetchers/__tests__/opencode-go.spec.ts
index 811d09b498..c8607db4d3 100644
--- a/src/api/providers/fetchers/__tests__/opencode-go.spec.ts
+++ b/src/api/providers/fetchers/__tests__/opencode-go.spec.ts
@@ -2,7 +2,7 @@
 
 import axios from "axios"
 
-import { opencodeGoDefaultModelInfo } from "@roo-code/types"
+import { opencodeGoDefaultModelInfo, opencodeGoModels, getOpencodeGoModelInfo } from "@roo-code/types"
 
 import { getOpencodeGoModels, parseOpencodeGoModel } from "../opencode-go"
 
@@ -39,21 +39,48 @@ describe("Opencode Go Fetchers", () => {
 			})
 
 			expect(Object.keys(models).sort()).toEqual(["deepseek-v4-pro", "glm-5.1"])
+			// Live endpoint values override the native registry for volatile fields,
+			// while capability flags and pricing come from the native registry.
 			expect(models["glm-5.1"]).toMatchObject({
 				contextWindow: 202752,
 				maxTokens: 32768,
-				supportsPromptCache: false,
+				supportsPromptCache: true,
+				supportsReasoningEffort: ["disable", "medium"],
+				preserveReasoning: true,
 				description: "Zhipu GLM 5.1",
 			})
 			expect(models["deepseek-v4-pro"].contextWindow).toBe(1048576)
+			expect(models["deepseek-v4-pro"].supportsReasoningEffort).toEqual([
+				"disable",
+				"low",
+				"medium",
+				"high",
+				"xhigh",
+			])
 		})
 
-		it("falls back to default context/max tokens when metadata is absent", async () => {
+		it("uses native registry config for a curated model when metadata is absent", async () => {
 			mockedAxios.get.mockResolvedValue({ data: { data: [{ id: "kimi-k2.6" }] } })
 
 			const models = await getOpencodeGoModels("k")
 
+			// kimi-k2.6 is curated, so it gets its native context/max tokens and
+			// capability flags rather than the generic default fallback.
 			expect(models["kimi-k2.6"]).toMatchObject({
+				contextWindow: 262_144,
+				maxTokens: 16_384,
+				supportsPromptCache: true,
+				supportsTemperature: true,
+				defaultTemperature: 1.0,
+			})
+		})
+
+		it("falls back to default context/max tokens for an unknown model when metadata is absent", async () => {
+			mockedAxios.get.mockResolvedValue({ data: { data: [{ id: "some-unknown-model" }] } })
+
+			const models = await getOpencodeGoModels("k")
+
+			expect(models["some-unknown-model"]).toMatchObject({
 				contextWindow: opencodeGoDefaultModelInfo.contextWindow,
 				maxTokens: opencodeGoDefaultModelInfo.maxTokens,
 				supportsPromptCache: false,
@@ -94,11 +121,64 @@ describe("Opencode Go Fetchers", () => {
 	})
 
 	describe("parseOpencodeGoModel", () => {
-		it("treats a model with no cache pricing as not cache-capable", () => {
+		it("merges live endpoint values over the native registry for a curated model", () => {
+			const info = parseOpencodeGoModel({ id: "glm-5.1", context_window: 150000, max_output_tokens: 8000 })
+			// Live values win for volatile fields.
+			expect(info.contextWindow).toBe(150000)
+			expect(info.maxTokens).toBe(8000)
+			// Capability flags and pricing come from the native registry.
+			expect(info.supportsPromptCache).toBe(true)
+			expect(info.supportsMaxTokens).toBe(true)
+			expect(info.supportsReasoningEffort).toEqual(["disable", "medium"])
+			expect(info.preserveReasoning).toBe(true)
+			expect(info.inputPrice).toBe(1.4)
+		})
+
+		it("uses native registry defaults when the live payload omits volatile fields", () => {
+			const info = parseOpencodeGoModel({ id: "deepseek-v4-flash" })
+			const native = getOpencodeGoModelInfo("deepseek-v4-flash")!
+			expect(info.contextWindow).toBe(native.contextWindow)
+			expect(info.maxTokens).toBe(native.maxTokens)
+			expect(info.supportsPromptCache).toBe(true)
+			expect(info.preserveReasoning).toBe(true)
+			expect(info.supportsReasoningEffort).toEqual(["disable", "low", "medium", "high", "xhigh"])
+		})
+
+		it("resolves GLM-5.2 with its 1M context and High/Max reasoning effort", () => {
+			const info = parseOpencodeGoModel({ id: "glm-5.2" })
+			expect(info.contextWindow).toBe(1_000_000)
+			expect(info.maxTokens).toBe(131_072)
+			expect(info.supportsPromptCache).toBe(true)
+			expect(info.supportsMaxTokens).toBe(true)
+			expect(info.supportsReasoningEffort).toEqual(["disable", "high", "max"])
+			expect(info.reasoningEffort).toBe("high")
+			expect(info.preserveReasoning).toBe(true)
+			expect(info.inputPrice).toBe(1.4)
+			expect(info.outputPrice).toBe(4.4)
+		})
+
+		it("falls back to defaults for an unknown model with no cache pricing", () => {
 			const info = parseOpencodeGoModel({ id: "x", context_window: 100000, max_tokens: 8000 })
 			expect(info.supportsPromptCache).toBe(false)
 			expect(info.contextWindow).toBe(100000)
 			expect(info.maxTokens).toBe(8000)
 		})
+
+		it("falls back to default context/max tokens for an unknown model with no metadata", () => {
+			const info = parseOpencodeGoModel({ id: "unknown-model" })
+			expect(info.contextWindow).toBe(opencodeGoDefaultModelInfo.contextWindow)
+			expect(info.maxTokens).toBe(opencodeGoDefaultModelInfo.maxTokens)
+			expect(info.supportsPromptCache).toBe(false)
+		})
+
+		it("every curated model in the registry produces a fully-populated ModelInfo", () => {
+			for (const [id, native] of Object.entries(opencodeGoModels)) {
+				const info = parseOpencodeGoModel({ id })
+				expect(info.contextWindow).toBe(native.contextWindow)
+				expect(info.maxTokens).toBe(native.maxTokens)
+				expect(info.supportsPromptCache).toBe(native.supportsPromptCache)
+				expect(info.description).toBeTruthy()
+			}
+		})
 	})
 })
diff --git a/src/api/providers/fetchers/opencode-go.ts b/src/api/providers/fetchers/opencode-go.ts
index ac62db52a7..d28b8872f0 100644
--- a/src/api/providers/fetchers/opencode-go.ts
+++ b/src/api/providers/fetchers/opencode-go.ts
@@ -2,7 +2,7 @@ import axios from "axios"
 import { z } from "zod"
 
 import type { ModelInfo } from "@roo-code/types"
-import { opencodeGoDefaultModelInfo } from "@roo-code/types"
+import { opencodeGoDefaultModelInfo, getOpencodeGoModelInfo } from "@roo-code/types"
 
 const OPENCODE_GO_BASE_URL = "https://opencode.ai/zen/go/v1"
 
@@ -10,8 +10,9 @@ const OPENCODE_GO_BASE_URL = "https://opencode.ai/zen/go/v1"
 // `id` is the only guaranteed field; metadata is optional and best-effort, so
 // the schema is intentionally permissive. Pricing is intentionally NOT parsed:
 // the units returned by the endpoint aren't documented, and reporting a wrong
-// cost is worse than reporting "unknown" — so cost stays undefined until the
-// pricing shape is confirmed against the live endpoint.
+// cost is worse than reporting "unknown" — so cost stays sourced from the
+// native registry (or undefined for unknown models) until the pricing shape is
+// confirmed against the live endpoint.
 const opencodeGoModelSchema = z.object({
 	id: z.string(),
 	name: z.string().optional(),
@@ -32,20 +33,53 @@ const opencodeGoModelsResponseSchema = z.object({
 /**
  * Maps a raw Opencode Go model entry to the internal {@link ModelInfo} shape.
  *
- * Falls back to {@link opencodeGoDefaultModelInfo} when the upstream payload
- * omits context-window or max-token fields, ensuring downstream consumers
- * always receive a fully-populated object.
+ * The Go `/models` endpoint only reliably returns `id` and (sometimes)
+ * `context_window`/`max_tokens`. It does NOT advertise capability flags
+ * (`supportsReasoningEffort`, `preserveReasoning`, `supportsMaxTokens`,
+ * `supportsPromptCache`) or pricing, all of which the extension needs to drive
+ * reasoning controls, interleaved-thinking tool calls, the max-output-tokens
+ * slider, and accurate cost reporting.
+ *
+ * Resolution order for a fully-populated {@link ModelInfo}:
+ *   1. Start from the native registry ({@link getOpencodeGoModelInfo}) when the
+ *      model ID is curated — this supplies correct context lengths, max tokens,
+ *      capability flags, and pricing sourced from vendor specs.
+ *   2. Override `contextWindow`, `maxTokens`, and `supportsImages` with values
+ *      from the live `/models` payload when present, so the gateway stays the
+ *      source of truth for those volatile fields.
+ *   3. Fall back to {@link opencodeGoDefaultModelInfo} for any field still
+ *      missing on an unknown (non-curated) model, ensuring downstream consumers
+ *      always receive a fully-populated object.
  *
  * @param model - Validated model entry from the `/models` response.
  * @returns Normalised model metadata suitable for the model picker.
  */
-export const parseOpencodeGoModel = (model: OpencodeGoModel): ModelInfo => ({
-	maxTokens: model.max_output_tokens ?? model.max_tokens ?? opencodeGoDefaultModelInfo.maxTokens,
-	contextWindow: model.context_window ?? model.context_length ?? opencodeGoDefaultModelInfo.contextWindow,
-	supportsImages: model.supports_images ?? false,
-	supportsPromptCache: false,
-	description: model.description ?? model.name,
-})
+export const parseOpencodeGoModel = (model: OpencodeGoModel): ModelInfo => {
+	const native = getOpencodeGoModelInfo(model.id)
+
+	// Live endpoint values take precedence over the registry for volatile fields.
+	const liveContextWindow = model.context_window ?? model.context_length
+	const liveMaxTokens = model.max_output_tokens ?? model.max_tokens
+	const liveSupportsImages = model.supports_images
+
+	if (native) {
+		return {
+			...native,
+			...(liveContextWindow !== undefined && { contextWindow: liveContextWindow }),
+			...(liveMaxTokens !== undefined && { maxTokens: liveMaxTokens }),
+			...(liveSupportsImages !== undefined && { supportsImages: liveSupportsImages }),
+			description: model.description ?? model.name ?? native.description,
+		}
+	}
+
+	return {
+		maxTokens: liveMaxTokens ?? opencodeGoDefaultModelInfo.maxTokens,
+		contextWindow: liveContextWindow ?? opencodeGoDefaultModelInfo.contextWindow,
+		supportsImages: liveSupportsImages ?? false,
+		supportsPromptCache: false,
+		description: model.description ?? model.name,
+	}
+}
 
 /**
  * Fetches the list of available models from the Opencode Go `/models` endpoint.
diff --git a/src/api/providers/opencode-go.ts b/src/api/providers/opencode-go.ts
index 43d32e6192..42e6b56cdc 100644
--- a/src/api/providers/opencode-go.ts
+++ b/src/api/providers/opencode-go.ts
@@ -7,6 +7,8 @@ import { ApiHandlerOptions } from "../../shared/api"
 
 import { ApiStream } from "../transform/stream"
 import { convertToOpenAiMessages } from "../transform/openai-format"
+import { convertToR1Format } from "../transform/r1-format"
+import { getModelParams } from "../transform/model-params"
 
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
 import { RouterProvider } from "./router-provider"
@@ -24,6 +26,13 @@ import { extractReasoningFromDelta } from "./utils/extract-reasoning"
  * instead of configuring each one manually as a separate OpenAI-Compatible
  * provider (#172).
  *
+ * Model metadata (context window, max tokens, capability flags, and pricing)
+ * is sourced from the native registry in `@roo-code/types` and merged with the
+ * live `/models` payload, so each curated model keeps its correct native
+ * configuration — including `supportsReasoningEffort`, `preserveReasoning`,
+ * `supportsMaxTokens`, and prompt-cache support — instead of falling back to a
+ * single generic default.
+ *
  * Supports text generation, reasoning content (GLM/DeepSeek), tool calls,
  * and non-streaming prompt completion.
  */
@@ -41,34 +50,69 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio
 		})
 	}
 
+	/**
+	 * Resolves the configured model and computes OpenAI-format model parameters
+	 * (max tokens, temperature, reasoning effort) from the merged model info.
+	 *
+	 * Fetches the live model list first so the merged native + `/models`
+	 * metadata (context window, capability flags, pricing) is available before
+	 * parameter computation — mirroring the original `fetchModel()` flow.
+	 */
+	private async resolveModel() {
+		const { id, info } = await this.fetchModel()
+		const params = getModelParams({
+			format: "openai",
+			modelId: id,
+			model: info,
+			settings: this.options,
+			defaultTemperature: OPENCODE_GO_DEFAULT_TEMPERATURE,
+		})
+		return { id, info, ...params }
+	}
+
 	/**
 	 * Streams a chat completion response, yielding typed chunks for text,
 	 * reasoning, partial tool calls, and token usage.
+	 *
+	 * For models that require reasoning_content to be passed back during
+	 * multi-turn tool calls (`preserveReasoning`), messages are converted with
+	 * `convertToR1Format` so interleaved thinking is preserved across tool-call
+	 * continuations. Reasoning effort is forwarded when the model advertises
+	 * `supportsReasoningEffort`.
 	 */
 	override async *createMessage(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
-		const { id: modelId, info } = await this.fetchModel()
+		const { id: modelId, info, temperature, reasoningEffort, maxTokens } = await this.resolveModel()
+
+		// preserveReasoning models (GLM/DeepSeek/MiMo/MiniMax/Qwen) require
+		// reasoning_content to be carried across tool-call continuations.
+		const preserveReasoning = info.preserveReasoning === true
+		const convertedMessages = preserveReasoning
+			? convertToR1Format(messages, { mergeToolResultText: true })
+			: convertToOpenAiMessages(messages)
 
 		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
 			{ role: "system", content: systemPrompt },
-			...convertToOpenAiMessages(messages),
+			...convertedMessages,
 		]
 
 		const body: OpenAI.Chat.ChatCompletionCreateParams = {
 			model: modelId,
 			messages: openAiMessages,
-			temperature: this.supportsTemperature(modelId)
-				? (this.options.modelTemperature ?? OPENCODE_GO_DEFAULT_TEMPERATURE)
-				: undefined,
-			max_completion_tokens: info.maxTokens,
+			temperature: this.supportsTemperature(modelId) ? temperature : undefined,
+			max_completion_tokens:
+				this.options.includeMaxTokens === true ? this.options.modelMaxTokens || maxTokens : maxTokens,
 			stream: true,
 			stream_options: { include_usage: true },
 			tools: this.convertToolsForOpenAI(metadata?.tools),
 			tool_choice: metadata?.tool_choice,
 			parallel_tool_calls: metadata?.parallelToolCalls ?? true,
+			...(reasoningEffort && {
+				reasoning_effort: reasoningEffort as OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"],
+			}),
 		}
 
 		const completion = await this.client.chat.completions.create(body)
@@ -118,7 +162,7 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio
 	 * @throws Error with an Opencode Go-specific prefix if the request fails.
 	 */
 	async completePrompt(prompt: string): Promise<string> {
-		const { id: modelId, info } = await this.fetchModel()
+		const { id: modelId, temperature, reasoningEffort, maxTokens } = await this.resolveModel()
 
 		try {
 			const requestOptions: OpenAI.Chat.ChatCompletionCreateParams = {
@@ -128,10 +172,16 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio
 			}
 
 			if (this.supportsTemperature(modelId)) {
-				requestOptions.temperature = this.options.modelTemperature ?? OPENCODE_GO_DEFAULT_TEMPERATURE
+				requestOptions.temperature = temperature
 			}
 
-			requestOptions.max_completion_tokens = info.maxTokens
+			requestOptions.max_completion_tokens =
+				this.options.includeMaxTokens === true ? this.options.modelMaxTokens || maxTokens : maxTokens
+
+			if (reasoningEffort) {
+				requestOptions.reasoning_effort =
+					reasoningEffort as OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"]
+			}
 
 			const response = await this.client.chat.completions.create(requestOptions)
 			return response.choices[0]?.message.content || ""

From 22f7be62b63e5a4094652d7610f2da2940ae2b39 Mon Sep 17 00:00:00 2001
From: Naved <naved.merchant@gmail.com>
Date: Thu, 18 Jun 2026 22:41:38 -0700
Subject: [PATCH 2/8] Update default to GLM 5.2

---
 packages/types/src/providers/opencode-go.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/types/src/providers/opencode-go.ts b/packages/types/src/providers/opencode-go.ts
index fc165c3362..14b8d68a9a 100644
--- a/packages/types/src/providers/opencode-go.ts
+++ b/packages/types/src/providers/opencode-go.ts
@@ -6,7 +6,7 @@ import type { ModelInfo } from "../model.js"
 // The full model list (and metadata) is fetched dynamically from
 // `https://opencode.ai/zen/go/v1/models`, so models can be switched on the fly.
 // The values below are only a fallback used before the live list resolves.
-export const opencodeGoDefaultModelId = "glm-5.1"
+export const opencodeGoDefaultModelId = "glm-5.2"
 
 export const opencodeGoDefaultModelInfo: ModelInfo = {
 	maxTokens: 32_768,

From 0c219e2292e5995cd44de12462741658c8ca7a82 Mon Sep 17 00:00:00 2001
From: Naved <naved.merchant@gmail.com>
Date: Thu, 18 Jun 2026 23:23:57 -0700
Subject: [PATCH 3/8] Use Andthropic messages for minmax and qwen

---
 packages/types/src/providers/opencode-go.ts   |  37 ++
 .../providers/__tests__/opencode-go.spec.ts   | 198 +++++++++-
 src/api/providers/opencode-go.ts              | 346 +++++++++++++++++-
 3 files changed, 563 insertions(+), 18 deletions(-)

diff --git a/packages/types/src/providers/opencode-go.ts b/packages/types/src/providers/opencode-go.ts
index 14b8d68a9a..7a646b6236 100644
--- a/packages/types/src/providers/opencode-go.ts
+++ b/packages/types/src/providers/opencode-go.ts
@@ -290,6 +290,43 @@ export const opencodeGoModels: Record<string, ModelInfo> = {
 	},
 }
 
+/**
+ * OpenCode Go models that are only reachable via the Anthropic Messages wire
+ * format (`/v1/messages`), not the OpenAI-compatible chat completions format
+ * (`/v1/chat/completions` — referred to by the gateway as "oa-compat").
+ *
+ * The Go gateway maps every model to exactly one wire format (see the model
+ * table at https://opencode.ai/docs/go). Models listed here use
+ * `@ai-sdk/anthropic`; every other curated model uses
+ * `@ai-sdk/openai-compatible`. Sending an Anthropic-format model to the
+ * OpenAI chat completions endpoint is rejected with:
+ *
+ *   401 Model <id> is not supported for format oa-compat
+ *
+ * This is the set that drives format routing in the handler — keep it in sync
+ * with the Go model table.
+ */
+export const OPENCODE_GO_ANTHROPIC_FORMAT_MODELS = new Set<string>([
+	// --- Alibaba Qwen ---
+	"qwen3.7-max",
+	"qwen3.7-plus",
+	"qwen3.6-plus",
+	// --- MiniMax ---
+	"minimax-m3",
+	"minimax-m2.7",
+	"minimax-m2.5",
+])
+
+/**
+ * Returns `true` when the given Go-plan model ID must be requested via the
+ * Anthropic Messages format (`/v1/messages`) rather than the OpenAI-compatible
+ * chat completions format. Unknown (non-curated) model IDs default to the
+ * OpenAI-compatible format, matching the gateway's default routing.
+ */
+export function isOpencodeGoAnthropicFormatModel(modelId: string): boolean {
+	return OPENCODE_GO_ANTHROPIC_FORMAT_MODELS.has(modelId)
+}
+
 /**
  * Returns the native {@link ModelInfo} for a Go-plan model ID, or `undefined`
  * when the ID is not part of the curated registry. Callers should fall back to
diff --git a/src/api/providers/__tests__/opencode-go.spec.ts b/src/api/providers/__tests__/opencode-go.spec.ts
index 3d7cfdda09..ee163f1f21 100644
--- a/src/api/providers/__tests__/opencode-go.spec.ts
+++ b/src/api/providers/__tests__/opencode-go.spec.ts
@@ -6,7 +6,7 @@ vitest.mock("vscode", () => ({}))
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
 
-import { opencodeGoDefaultModelId, opencodeGoModels } from "@roo-code/types"
+import { opencodeGoDefaultModelId, opencodeGoModels, isOpencodeGoAnthropicFormatModel } from "@roo-code/types"
 
 import { OpencodeGoHandler } from "../opencode-go"
 import { ApiHandlerOptions } from "../../../shared/api"
@@ -23,12 +23,15 @@ vitest.mock("../fetchers/modelCache", () => ({
 			// Use the native registry entry so capability flags (reasoning
 			// effort, preserveReasoning, prompt cache) are exercised.
 			"glm-5.1": { ...opencodeGoModels["glm-5.1"] },
+			// Anthropic-format model used to exercise the /v1/messages path.
+			"qwen3.7-max": { ...opencodeGoModels["qwen3.7-max"] },
 		})
 	}),
 	getModelsFromCache: vitest.fn().mockReturnValue(undefined),
 }))
 
 const mockCreate = vitest.fn()
+const mockAnthropicCreate = vitest.fn()
 
 ;(OpenAI as any).mockImplementation(function () {
 	return {
@@ -36,6 +39,16 @@ const mockCreate = vitest.fn()
 	}
 })
 
+vitest.mock("@anthropic-ai/sdk", () => ({
+	Anthropic: vitest.fn(function () {
+		return {
+			messages: {
+				create: mockAnthropicCreate,
+			},
+		}
+	}),
+}))
+
 describe("OpencodeGoHandler", () => {
 	const mockOptions: ApiHandlerOptions = {
 		opencodeGoApiKey: "test-key",
@@ -45,6 +58,7 @@ describe("OpencodeGoHandler", () => {
 	beforeEach(() => {
 		vitest.clearAllMocks()
 		mockCreate.mockClear()
+		mockAnthropicCreate.mockClear()
 	})
 
 	it("initializes the OpenAI client with the Opencode Go base URL and key", () => {
@@ -58,6 +72,18 @@ describe("OpencodeGoHandler", () => {
 		)
 	})
 
+	it("initializes an Anthropic client rooted at /zen/go (SDK appends /v1/messages)", () => {
+		new OpencodeGoHandler(mockOptions)
+		expect(Anthropic).toHaveBeenCalledWith(
+			expect.objectContaining({
+				// The Anthropic SDK posts to `/v1/messages`, so the base URL must
+				// NOT include the trailing `/v1` used by the OpenAI client.
+				baseURL: "https://opencode.ai/zen/go",
+				apiKey: "test-key",
+			}),
+		)
+	})
+
 	describe("fetchModel", () => {
 		it("returns the configured model info with native capability flags", async () => {
 			const handler = new OpencodeGoHandler(mockOptions)
@@ -308,4 +334,174 @@ describe("OpencodeGoHandler", () => {
 			await expect(handler.completePrompt("ping")).rejects.toThrow("Opencode Go completion error: boom")
 		})
 	})
+
+	describe("Anthropic-format models (qwen3.7-max)", () => {
+		// qwen3.7-max is only reachable via the Anthropic Messages endpoint
+		// (/v1/messages); sending it to /v1/chat/completions is what produces
+		// "401 Model qwen3.7-max is not supported for format oa-compat".
+		const anthropicOptions: ApiHandlerOptions = {
+			opencodeGoApiKey: "test-key",
+			opencodeGoModelId: "qwen3.7-max",
+		}
+
+		beforeEach(() => {
+			mockAnthropicCreate.mockImplementation(async () => ({
+				[Symbol.asyncIterator]: async function* () {
+					yield {
+						type: "message_start",
+						message: {
+							usage: {
+								input_tokens: 10,
+								output_tokens: 0,
+								cache_creation_input_tokens: 2,
+								cache_read_input_tokens: 3,
+							},
+						},
+					}
+					yield {
+						type: "content_block_start",
+						index: 0,
+						content_block: { type: "text", text: "" },
+					}
+					yield { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Hello" } }
+					yield {
+						type: "content_block_start",
+						index: 1,
+						content_block: { type: "tool_use", id: "toolu_1", name: "read_file", input: {} },
+					}
+					yield {
+						type: "content_block_delta",
+						index: 1,
+						delta: { type: "input_json_delta", partial_json: '{"path":' },
+					}
+					yield { type: "content_block_stop", index: 1 }
+					yield { type: "message_delta", usage: { output_tokens: 5 } }
+					yield { type: "message_stop" }
+				},
+			}))
+		})
+
+		it("routes the request through the Anthropic /v1/messages client, not chat completions", async () => {
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+
+			for await (const _chunk of handler.createMessage("sys", messages)) {
+				void _chunk // drain
+			}
+
+			expect(mockAnthropicCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "qwen3.7-max",
+					stream: true,
+					system: expect.arrayContaining([expect.objectContaining({ type: "text", text: "sys" })]),
+				}),
+			)
+			// The OpenAI chat completions endpoint must NOT be used for this model.
+			expect(mockCreate).not.toHaveBeenCalled()
+		})
+
+		it("streams text, tool-call, usage and cost chunks from the Anthropic stream", async () => {
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+
+			const chunks: any[] = []
+			for await (const chunk of handler.createMessage("sys", messages)) {
+				chunks.push(chunk)
+			}
+
+			expect(chunks).toContainEqual({ type: "text", text: "Hello" })
+			expect(chunks).toContainEqual({
+				type: "tool_call_partial",
+				index: 1,
+				id: "toolu_1",
+				name: "read_file",
+				arguments: undefined,
+			})
+			expect(chunks).toContainEqual({
+				type: "tool_call_partial",
+				index: 1,
+				id: undefined,
+				name: undefined,
+				arguments: '{"path":',
+			})
+			// message_start usage (with cache tokens) ...
+			expect(chunks).toContainEqual({
+				type: "usage",
+				inputTokens: 10,
+				outputTokens: 0,
+				cacheWriteTokens: 2,
+				cacheReadTokens: 3,
+			})
+			// ... message_delta output tokens ...
+			expect(chunks).toContainEqual({ type: "usage", inputTokens: 0, outputTokens: 5 })
+			// ... and a final cost chunk.
+			expect(chunks.some((c) => c.type === "usage" && c.totalCost !== undefined)).toBe(true)
+		})
+
+		it("applies cache-control breakpoints when the model supports prompt caching", async () => {
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "first" },
+				{ role: "assistant", content: "ok" },
+				{ role: "user", content: "second" },
+			]
+
+			for await (const _chunk of handler.createMessage("sys", messages)) {
+				void _chunk // drain
+			}
+
+			const callArgs = mockAnthropicCreate.mock.calls[0][0] as {
+				system: Array<{ cache_control?: unknown }>
+				messages: Array<{ content: unknown }>
+			}
+			// qwen3.7-max advertises supportsPromptCache, so the system prompt
+			// gets an ephemeral cache_control breakpoint.
+			expect(callArgs.system[0].cache_control).toEqual({ type: "ephemeral" })
+		})
+
+		it("completePrompt uses the Anthropic messages endpoint and returns text content", async () => {
+			mockAnthropicCreate.mockResolvedValue({
+				content: [{ type: "text", text: "the answer" }],
+			})
+
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			expect(await handler.completePrompt("ping")).toBe("the answer")
+			expect(mockAnthropicCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "qwen3.7-max",
+					stream: false,
+					messages: [{ role: "user", content: "ping" }],
+				}),
+			)
+			expect(mockCreate).not.toHaveBeenCalled()
+		})
+
+		it("completePrompt wraps Anthropic errors with an Opencode Go-specific message", async () => {
+			mockAnthropicCreate.mockRejectedValue(new Error("boom"))
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			await expect(handler.completePrompt("ping")).rejects.toThrow("Opencode Go completion error: boom")
+		})
+	})
+
+	describe("isOpencodeGoAnthropicFormatModel", () => {
+		it("classifies Qwen and MiniMax Go models as Anthropic-format", () => {
+			expect(isOpencodeGoAnthropicFormatModel("qwen3.7-max")).toBe(true)
+			expect(isOpencodeGoAnthropicFormatModel("qwen3.7-plus")).toBe(true)
+			expect(isOpencodeGoAnthropicFormatModel("qwen3.6-plus")).toBe(true)
+			expect(isOpencodeGoAnthropicFormatModel("minimax-m3")).toBe(true)
+			expect(isOpencodeGoAnthropicFormatModel("minimax-m2.7")).toBe(true)
+			expect(isOpencodeGoAnthropicFormatModel("minimax-m2.5")).toBe(true)
+		})
+
+		it("classifies OpenAI-compatible Go models as non-Anthropic-format", () => {
+			expect(isOpencodeGoAnthropicFormatModel("glm-5.2")).toBe(false)
+			expect(isOpencodeGoAnthropicFormatModel("kimi-k2.6")).toBe(false)
+			expect(isOpencodeGoAnthropicFormatModel("deepseek-v4-pro")).toBe(false)
+			expect(isOpencodeGoAnthropicFormatModel("mimo-v2.5")).toBe(false)
+		})
+
+		it("defaults unknown model IDs to the OpenAI-compatible format", () => {
+			expect(isOpencodeGoAnthropicFormatModel("some-unknown-model")).toBe(false)
+		})
+	})
 })
diff --git a/src/api/providers/opencode-go.ts b/src/api/providers/opencode-go.ts
index 42e6b56cdc..0bb8303d5d 100644
--- a/src/api/providers/opencode-go.ts
+++ b/src/api/providers/opencode-go.ts
@@ -1,18 +1,31 @@
 import { Anthropic } from "@anthropic-ai/sdk"
+import { CacheControlEphemeral } from "@anthropic-ai/sdk/resources"
 import OpenAI from "openai"
 
-import { opencodeGoDefaultModelId, opencodeGoDefaultModelInfo, OPENCODE_GO_DEFAULT_TEMPERATURE } from "@roo-code/types"
+import {
+	opencodeGoDefaultModelId,
+	opencodeGoDefaultModelInfo,
+	OPENCODE_GO_DEFAULT_TEMPERATURE,
+	isOpencodeGoAnthropicFormatModel,
+} from "@roo-code/types"
 
 import { ApiHandlerOptions } from "../../shared/api"
 
 import { ApiStream } from "../transform/stream"
 import { convertToOpenAiMessages } from "../transform/openai-format"
 import { convertToR1Format } from "../transform/r1-format"
+import { filterNonAnthropicBlocks } from "../transform/anthropic-filter"
 import { getModelParams } from "../transform/model-params"
 
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
 import { RouterProvider } from "./router-provider"
 import { extractReasoningFromDelta } from "./utils/extract-reasoning"
+import { DEFAULT_HEADERS } from "./constants"
+import { calculateApiCostAnthropic } from "../../shared/cost"
+import {
+	convertOpenAIToolsToAnthropic,
+	convertOpenAIToolChoiceToAnthropic,
+} from "../../core/prompts/tools/native-tools/converters"
 
 /**
  * API handler for the Opencode "Go" subscription plan.
@@ -33,10 +46,36 @@ import { extractReasoningFromDelta } from "./utils/extract-reasoning"
  * `supportsMaxTokens`, and prompt-cache support — instead of falling back to a
  * single generic default.
  *
+ * ## Wire-format routing
+ *
+ * The Go gateway exposes two wire formats and maps every model to exactly one
+ * of them (see https://opencode.ai/docs/go):
+ *
+ *   - OpenAI-compatible chat completions (`/v1/chat/completions`, "oa-compat")
+ *     — used by GLM, Kimi, DeepSeek, and MiMo models.
+ *   - Anthropic Messages (`/v1/messages`) — used by Qwen (qwen3.7-max,
+ *     qwen3.7-plus, qwen3.6-plus) and MiniMax (minimax-m3, minimax-m2.7,
+ *     minimax-m2.5) models.
+ *
+ * Sending an Anthropic-format model to the chat completions endpoint is
+ * rejected with `401 Model <id> is not supported for format oa-compat`, so this
+ * handler inspects {@link isOpencodeGoAnthropicFormatModel} and routes those
+ * models through a dedicated Anthropic SDK client against `/v1/messages`.
+ *
  * Supports text generation, reasoning content (GLM/DeepSeek), tool calls,
  * and non-streaming prompt completion.
  */
 export class OpencodeGoHandler extends RouterProvider implements SingleCompletionHandler {
+	/**
+	 * Anthropic SDK client used for Go models that only accept the Anthropic
+	 * Messages wire format (`/v1/messages`).
+	 *
+	 * The SDK appends `/v1/messages` to `baseURL`, so this is set to the Go
+	 * gateway root (`https://opencode.ai/zen/go`) — NOT the `/v1` root used by
+	 * the OpenAI client — to avoid a doubled `/v1` path segment.
+	 */
+	private readonly anthropicClient: Anthropic
+
 	/** Creates a new handler bound to the user's Go API key and selected model. */
 	constructor(options: ApiHandlerOptions) {
 		super({
@@ -48,44 +87,86 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio
 			defaultModelId: opencodeGoDefaultModelId,
 			defaultModelInfo: opencodeGoDefaultModelInfo,
 		})
+
+		this.anthropicClient = new Anthropic({
+			baseURL: "https://opencode.ai/zen/go",
+			apiKey: options.opencodeGoApiKey,
+			defaultHeaders: {
+				...DEFAULT_HEADERS,
+				...(options.openAiHeaders || {}),
+			},
+		})
 	}
 
 	/**
-	 * Resolves the configured model and computes OpenAI-format model parameters
+	 * Resolves the configured model and computes model parameters
 	 * (max tokens, temperature, reasoning effort) from the merged model info.
 	 *
+	 * The wire format is derived from the model ID via
+	 * {@link isOpencodeGoAnthropicFormatModel}: Anthropic-format models compute
+	 * parameters with the `anthropic` format so reasoning is mapped to the
+	 * Anthropic-style controls; everything else uses the `openai` format.
+	 *
 	 * Fetches the live model list first so the merged native + `/models`
 	 * metadata (context window, capability flags, pricing) is available before
 	 * parameter computation — mirroring the original `fetchModel()` flow.
 	 */
 	private async resolveModel() {
 		const { id, info } = await this.fetchModel()
-		const params = getModelParams({
-			format: "openai",
-			modelId: id,
-			model: info,
-			settings: this.options,
-			defaultTemperature: OPENCODE_GO_DEFAULT_TEMPERATURE,
-		})
-		return { id, info, ...params }
+		const isAnthropic = isOpencodeGoAnthropicFormatModel(id)
+		// getModelParams is overloaded on a literal `format`, so branch the call
+		// rather than passing a union — this keeps the returned params typed as a
+		// single concrete shape per branch.
+		const params = isAnthropic
+			? getModelParams({
+					format: "anthropic",
+					modelId: id,
+					model: info,
+					settings: this.options,
+					defaultTemperature: OPENCODE_GO_DEFAULT_TEMPERATURE,
+				})
+			: getModelParams({
+					format: "openai",
+					modelId: id,
+					model: info,
+					settings: this.options,
+					defaultTemperature: OPENCODE_GO_DEFAULT_TEMPERATURE,
+				})
+		return {
+			id,
+			info,
+			format: isAnthropic ? ("anthropic" as const) : ("openai" as const),
+			maxTokens: params.maxTokens,
+			temperature: params.temperature,
+			reasoningEffort: params.reasoningEffort,
+		}
 	}
 
 	/**
 	 * Streams a chat completion response, yielding typed chunks for text,
 	 * reasoning, partial tool calls, and token usage.
 	 *
-	 * For models that require reasoning_content to be passed back during
-	 * multi-turn tool calls (`preserveReasoning`), messages are converted with
-	 * `convertToR1Format` so interleaved thinking is preserved across tool-call
-	 * continuations. Reasoning effort is forwarded when the model advertises
-	 * `supportsReasoningEffort`.
+	 * Anthropic-format models (Qwen/MiniMax) are streamed via
+	 * {@link streamAnthropicMessage} against `/v1/messages`; all other models
+	 * use the OpenAI-compatible chat completions endpoint.
+	 *
+	 * For OpenAI-format models that require reasoning_content to be passed back
+	 * during multi-turn tool calls (`preserveReasoning`), messages are
+	 * converted with `convertToR1Format` so interleaved thinking is preserved
+	 * across tool-call continuations. Reasoning effort is forwarded when the
+	 * model advertises `supportsReasoningEffort`.
 	 */
 	override async *createMessage(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
-		const { id: modelId, info, temperature, reasoningEffort, maxTokens } = await this.resolveModel()
+		const { id: modelId, info, format, temperature, reasoningEffort, maxTokens } = await this.resolveModel()
+
+		if (format === "anthropic") {
+			yield* this.streamAnthropicMessage(modelId, info, temperature, maxTokens, systemPrompt, messages, metadata)
+			return
+		}
 
 		// preserveReasoning models (GLM/DeepSeek/MiMo/MiniMax/Qwen) require
 		// reasoning_content to be carried across tool-call continuations.
@@ -154,15 +235,246 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio
 		}
 	}
 
+	/**
+	 * Streams an Anthropic Messages-format completion for Go models that only
+	 * accept the `/v1/messages` endpoint (Qwen/MiniMax).
+	 *
+	 * Mirrors the Anthropic streaming protocol handled by the dedicated
+	 * MiniMax handler: `message_start`/`message_delta` carry usage, content
+	 * blocks carry text/thinking/tool_use, and a final cost chunk is emitted
+	 * from the accumulated token counts. Prompt-cache breakpoints are applied
+	 * to the system prompt and last two user messages when the model advertises
+	 * `supportsPromptCache`, since the Go gateway honours server-side caching
+	 * and reports cache tokens in usage.
+	 */
+	private async *streamAnthropicMessage(
+		modelId: string,
+		info: { supportsPromptCache?: boolean },
+		temperature: number | undefined,
+		maxTokens: number | undefined,
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		metadata?: ApiHandlerCreateMessageMetadata,
+	): ApiStream {
+		const cacheControl: CacheControlEphemeral = { type: "ephemeral" }
+		const supportsPromptCache = info.supportsPromptCache ?? false
+
+		// Strip non-Anthropic blocks (reasoning, thoughtSignature, etc.) before
+		// sending — the gateway rejects unknown content block types.
+		const sanitizedMessages = filterNonAnthropicBlocks(messages)
+
+		const systemBlocks: Anthropic.Messages.TextBlockParam[] = [
+			supportsPromptCache
+				? { text: systemPrompt, type: "text", cache_control: cacheControl }
+				: { text: systemPrompt, type: "text" },
+		]
+
+		const requestParams: Anthropic.Messages.MessageCreateParams = {
+			model: modelId,
+			max_tokens:
+				this.options.includeMaxTokens === true
+					? this.options.modelMaxTokens || maxTokens || 16_384
+					: (maxTokens ?? 16_384),
+			temperature: this.supportsTemperature(modelId) ? (temperature ?? 1.0) : undefined,
+			system: systemBlocks,
+			messages: supportsPromptCache
+				? this.addAnthropicCacheControl(sanitizedMessages, cacheControl)
+				: sanitizedMessages,
+			stream: true,
+			tools: convertOpenAIToolsToAnthropic(metadata?.tools ?? []),
+			tool_choice: convertOpenAIToolChoiceToAnthropic(metadata?.tool_choice, metadata?.parallelToolCalls),
+		}
+
+		const stream = await this.anthropicClient.messages.create(requestParams)
+
+		let inputTokens = 0
+		let outputTokens = 0
+		let cacheWriteTokens = 0
+		let cacheReadTokens = 0
+
+		for await (const chunk of stream) {
+			switch (chunk.type) {
+				case "message_start": {
+					// Tells us cache reads/writes/input/output.
+					const {
+						input_tokens = 0,
+						output_tokens = 0,
+						cache_creation_input_tokens,
+						cache_read_input_tokens,
+					} = chunk.message.usage
+
+					yield {
+						type: "usage",
+						inputTokens: input_tokens,
+						outputTokens: output_tokens,
+						cacheWriteTokens: cache_creation_input_tokens || undefined,
+						cacheReadTokens: cache_read_input_tokens || undefined,
+					}
+
+					inputTokens += input_tokens
+					outputTokens += output_tokens
+					cacheWriteTokens += cache_creation_input_tokens || 0
+					cacheReadTokens += cache_read_input_tokens || 0
+
+					break
+				}
+				case "message_delta":
+					// Tells us stop_reason, stop_sequence, and output tokens.
+					yield {
+						type: "usage",
+						inputTokens: 0,
+						outputTokens: chunk.usage.output_tokens || 0,
+					}
+
+					break
+				case "message_stop":
+					// No usage data, just an indicator that the message is done.
+					break
+				case "content_block_start":
+					switch (chunk.content_block.type) {
+						case "thinking":
+							// Yield thinking/reasoning content
+							if (chunk.index > 0) {
+								yield { type: "reasoning", text: "\n" }
+							}
+
+							yield { type: "reasoning", text: chunk.content_block.thinking }
+							break
+						case "text":
+							// We may receive multiple text blocks
+							if (chunk.index > 0) {
+								yield { type: "text", text: "\n" }
+							}
+
+							yield { type: "text", text: chunk.content_block.text }
+							break
+						case "tool_use": {
+							// Emit initial tool call partial with id and name
+							yield {
+								type: "tool_call_partial",
+								index: chunk.index,
+								id: chunk.content_block.id,
+								name: chunk.content_block.name,
+								arguments: undefined,
+							}
+							break
+						}
+					}
+					break
+				case "content_block_delta":
+					switch (chunk.delta.type) {
+						case "thinking_delta":
+							yield { type: "reasoning", text: chunk.delta.thinking }
+							break
+						case "text_delta":
+							yield { type: "text", text: chunk.delta.text }
+							break
+						case "input_json_delta": {
+							// Emit tool call partial chunks as arguments stream in
+							yield {
+								type: "tool_call_partial",
+								index: chunk.index,
+								id: undefined,
+								name: undefined,
+								arguments: chunk.delta.partial_json,
+							}
+							break
+						}
+					}
+
+					break
+				case "content_block_stop":
+					// Block is complete - no action needed, NativeToolCallParser handles completion
+					break
+			}
+		}
+
+		// Calculate and yield final cost
+		if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) {
+			const { totalCost } = calculateApiCostAnthropic(
+				info as Parameters<typeof calculateApiCostAnthropic>[0],
+				inputTokens,
+				outputTokens,
+				cacheWriteTokens,
+				cacheReadTokens,
+			)
+
+			yield {
+				type: "usage",
+				inputTokens: 0,
+				outputTokens: 0,
+				totalCost,
+			}
+		}
+	}
+
+	/**
+	 * Adds ephemeral cache-control breakpoints to the last two user messages
+	 * so the gateway can cache the system prompt + most recent turns
+	 * server-side. Only applied when the model advertises prompt-cache support.
+	 */
+	private addAnthropicCacheControl(
+		messages: Anthropic.Messages.MessageParam[],
+		cacheControl: CacheControlEphemeral,
+	): Anthropic.Messages.MessageParam[] {
+		const userMsgIndices = messages.reduce(
+			(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),
+			[] as number[],
+		)
+
+		const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
+		const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
+
+		return messages.map((message, index) => {
+			if (index === lastUserMsgIndex || index === secondLastUserMsgIndex) {
+				return {
+					...message,
+					content:
+						typeof message.content === "string"
+							? [{ type: "text", text: message.content, cache_control: cacheControl }]
+							: message.content.map((content, contentIndex) =>
+									contentIndex === message.content.length - 1
+										? { ...content, cache_control: cacheControl }
+										: content,
+								),
+				}
+			}
+			return message
+		})
+	}
+
 	/**
 	 * Performs a non-streaming chat completion and returns the full response text.
 	 *
+	 * Anthropic-format models are completed via the `/v1/messages` endpoint;
+	 * all other models use the OpenAI-compatible chat completions endpoint.
+	 *
 	 * @param prompt - The user prompt to send as a single user message.
 	 * @returns The model's reply text, or an empty string if no content is returned.
 	 * @throws Error with an Opencode Go-specific prefix if the request fails.
 	 */
 	async completePrompt(prompt: string): Promise<string> {
-		const { id: modelId, temperature, reasoningEffort, maxTokens } = await this.resolveModel()
+		const { id: modelId, format, temperature, reasoningEffort, maxTokens } = await this.resolveModel()
+
+		if (format === "anthropic") {
+			try {
+				const message = await this.anthropicClient.messages.create({
+					model: modelId,
+					max_tokens: maxTokens ?? 16_384,
+					temperature: this.supportsTemperature(modelId) ? (temperature ?? 1.0) : undefined,
+					messages: [{ role: "user", content: prompt }],
+					stream: false,
+				})
+
+				const content = message.content.find(({ type }) => type === "text")
+				return content?.type === "text" ? content.text : ""
+			} catch (error) {
+				if (error instanceof Error) {
+					throw new Error(`Opencode Go completion error: ${error.message}`)
+				}
+				throw error
+			}
+		}
 
 		try {
 			const requestOptions: OpenAI.Chat.ChatCompletionCreateParams = {

From 6979afe487a0aff3396bffed20d439672cf20b96 Mon Sep 17 00:00:00 2001
From: Naved <naved.merchant@gmail.com>
Date: Thu, 18 Jun 2026 23:37:40 -0700
Subject: [PATCH 4/8] fix context calculation with minmax

---
 .../src/__tests__/provider-settings.test.ts   | 26 +++++++++++++++++++
 packages/types/src/provider-settings.ts       | 12 +++++++++
 2 files changed, 38 insertions(+)

diff --git a/packages/types/src/__tests__/provider-settings.test.ts b/packages/types/src/__tests__/provider-settings.test.ts
index fc7bee2268..724fc20f34 100644
--- a/packages/types/src/__tests__/provider-settings.test.ts
+++ b/packages/types/src/__tests__/provider-settings.test.ts
@@ -53,6 +53,32 @@ describe("getApiProtocol", () => {
 		})
 	})
 
+	describe("Opencode Go provider", () => {
+		it("should return 'anthropic' for opencode-go Anthropic-format models (Qwen/MiniMax)", () => {
+			expect(getApiProtocol("opencode-go", "qwen3.7-max")).toBe("anthropic")
+			expect(getApiProtocol("opencode-go", "qwen3.7-plus")).toBe("anthropic")
+			expect(getApiProtocol("opencode-go", "qwen3.6-plus")).toBe("anthropic")
+			expect(getApiProtocol("opencode-go", "minimax-m3")).toBe("anthropic")
+			expect(getApiProtocol("opencode-go", "minimax-m2.7")).toBe("anthropic")
+			expect(getApiProtocol("opencode-go", "minimax-m2.5")).toBe("anthropic")
+		})
+
+		it("should return 'openai' for opencode-go OpenAI-format models (GLM/DeepSeek/etc.)", () => {
+			expect(getApiProtocol("opencode-go", "glm-5.2")).toBe("openai")
+			expect(getApiProtocol("opencode-go", "deepseek-v4-pro")).toBe("openai")
+			expect(getApiProtocol("opencode-go", "kimi-k2.5")).toBe("openai")
+			expect(getApiProtocol("opencode-go", "mimo-v2.5")).toBe("openai")
+		})
+
+		it("should return 'openai' for opencode-go without a model", () => {
+			expect(getApiProtocol("opencode-go")).toBe("openai")
+		})
+
+		it("should return 'openai' for opencode-go with an unknown model id", () => {
+			expect(getApiProtocol("opencode-go", "some-future-model")).toBe("openai")
+		})
+	})
+
 	describe("Other providers", () => {
 		it("should return 'openai' for non-anthropic providers regardless of model", () => {
 			expect(getApiProtocol("openrouter", "claude-3-opus")).toBe("openai")
diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts
index b2c850eb86..26c4dee7e1 100644
--- a/packages/types/src/provider-settings.ts
+++ b/packages/types/src/provider-settings.ts
@@ -21,6 +21,7 @@ import {
 	internationalZAiModels,
 	minimaxModels,
 	mimoModels,
+	isOpencodeGoAnthropicFormatModel,
 } from "./providers/index.js"
 
 /**
@@ -595,6 +596,17 @@ export const getApiProtocol = (provider: ProviderName | undefined, modelId?: str
 		return "anthropic"
 	}
 
+	// Opencode Go routes a subset of its models (Qwen, MiniMax) through the
+	// Anthropic Messages wire format (`/v1/messages`), which reports usage in
+	// Anthropic style: `input_tokens` excludes cache tokens, with separate
+	// `cache_creation_input_tokens` / `cache_read_input_tokens` fields. These
+	// models must use the anthropic protocol so token/cost aggregation adds the
+	// cache tokens back into the input total — otherwise the cached prefix is
+	// dropped from `contextTokens`, undercounting context-window usage.
+	if (provider && provider === "opencode-go" && modelId && isOpencodeGoAnthropicFormatModel(modelId)) {
+		return "anthropic"
+	}
+
 	return "openai"
 }
 

From 97e041fe727b6b885f6f4d934926ff3542bfbeac Mon Sep 17 00:00:00 2001
From: Naved <naved.merchant@gmail.com>
Date: Fri, 19 Jun 2026 07:13:39 -0700
Subject: [PATCH 5/8] Update coverage

---
 .../providers/__tests__/opencode-go.spec.ts   | 341 ++++++++++++++++++
 src/api/providers/opencode-go.ts              |  32 +-
 2 files changed, 370 insertions(+), 3 deletions(-)

diff --git a/src/api/providers/__tests__/opencode-go.spec.ts b/src/api/providers/__tests__/opencode-go.spec.ts
index ee163f1f21..fa780f325e 100644
--- a/src/api/providers/__tests__/opencode-go.spec.ts
+++ b/src/api/providers/__tests__/opencode-go.spec.ts
@@ -9,6 +9,7 @@ import OpenAI from "openai"
 import { opencodeGoDefaultModelId, opencodeGoModels, isOpencodeGoAnthropicFormatModel } from "@roo-code/types"
 
 import { OpencodeGoHandler } from "../opencode-go"
+import { getModels } from "../fetchers/modelCache"
 import { ApiHandlerOptions } from "../../../shared/api"
 
 vitest.mock("openai")
@@ -310,6 +311,68 @@ describe("OpencodeGoHandler", () => {
 			const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
 			expect(reasoningChunks).toEqual([{ type: "reasoning", text: "primary thought" }])
 		})
+
+		it("uses convertToOpenAiMessages for non-preserveReasoning models", async () => {
+			// kimi-k2.6 has no preserveReasoning flag, so messages bypass
+			// convertToR1Format and go through the plain OpenAI converter.
+			vitest.mocked(getModels).mockImplementationOnce(async () => ({
+				"kimi-k2.6": { ...opencodeGoModels["kimi-k2.6"] },
+			}))
+			mockCreate.mockImplementationOnce(async () => ({
+				[Symbol.asyncIterator]: async function* () {
+					yield { choices: [{ delta: { content: "Hi" }, index: 0 }] }
+					yield {
+						choices: [{ delta: {}, index: 0 }],
+						usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+					}
+				},
+			}))
+
+			const handler = new OpencodeGoHandler({ ...mockOptions, opencodeGoModelId: "kimi-k2.6" })
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+
+			for await (const _chunk of handler.createMessage("sys", messages)) {
+				void _chunk
+			}
+
+			const callArgs = mockCreate.mock.calls[0][0] as { messages: Array<{ role: string }> }
+			expect(callArgs.messages[0]).toEqual({ role: "system", content: "sys" })
+			// A single user turn stays a single user message after OpenAI conversion.
+			expect(callArgs.messages.filter((m) => m.role === "user")).toHaveLength(1)
+		})
+
+		it("emits a usage chunk with zeroed tokens when the stream reports no usage", async () => {
+			mockCreate.mockImplementationOnce(async () => ({
+				[Symbol.asyncIterator]: async function* () {
+					yield { choices: [{ delta: { content: "Hi" }, index: 0 }] }
+					yield {
+						choices: [{ delta: {}, index: 0 }],
+						usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
+					}
+				},
+			}))
+
+			const handler = new OpencodeGoHandler(mockOptions)
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+
+			const chunks: any[] = []
+			for await (const chunk of handler.createMessage("sys", messages)) {
+				chunks.push(chunk)
+			}
+
+			expect(chunks).toContainEqual({ type: "usage", inputTokens: 0, outputTokens: 0 })
+		})
+
+		it("honors includeMaxTokens/modelMaxTokens override for max_completion_tokens", async () => {
+			const handler = new OpencodeGoHandler({ ...mockOptions, includeMaxTokens: true, modelMaxTokens: 999 })
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+
+			for await (const _chunk of handler.createMessage("sys", messages)) {
+				void _chunk
+			}
+
+			expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ max_completion_tokens: 999 }))
+		})
 	})
 
 	describe("completePrompt", () => {
@@ -333,6 +396,25 @@ describe("OpencodeGoHandler", () => {
 			const handler = new OpencodeGoHandler(mockOptions)
 			await expect(handler.completePrompt("ping")).rejects.toThrow("Opencode Go completion error: boom")
 		})
+
+		it("rethrows non-Error values unchanged", async () => {
+			mockCreate.mockRejectedValue("not an error")
+			const handler = new OpencodeGoHandler(mockOptions)
+			await expect(handler.completePrompt("ping")).rejects.toBe("not an error")
+		})
+
+		it("returns an empty string when no content is returned", async () => {
+			mockCreate.mockResolvedValue({ choices: [] })
+			const handler = new OpencodeGoHandler(mockOptions)
+			expect(await handler.completePrompt("ping")).toBe("")
+		})
+
+		it("honors includeMaxTokens/modelMaxTokens override for max_completion_tokens", async () => {
+			mockCreate.mockResolvedValue({ choices: [{ message: { content: "ok" } }] })
+			const handler = new OpencodeGoHandler({ ...mockOptions, includeMaxTokens: true, modelMaxTokens: 4321 })
+			await handler.completePrompt("ping")
+			expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ max_completion_tokens: 4321 }))
+		})
 	})
 
 	describe("Anthropic-format models (qwen3.7-max)", () => {
@@ -471,11 +553,270 @@ describe("OpencodeGoHandler", () => {
 					model: "qwen3.7-max",
 					stream: false,
 					messages: [{ role: "user", content: "ping" }],
+					// qwen3.7-max maxTokens (65_536) clamped to 20% of its 1M
+					// context window (200_000) => 65_536. includeMaxTokens is off,
+					// so the model default is used.
+					max_tokens: 65_536,
 				}),
 			)
 			expect(mockCreate).not.toHaveBeenCalled()
 		})
 
+		it("completePrompt honors includeMaxTokens/modelMaxTokens override for max_tokens", async () => {
+			mockAnthropicCreate.mockResolvedValue({
+				content: [{ type: "text", text: "ok" }],
+			})
+
+			const handler = new OpencodeGoHandler({
+				...anthropicOptions,
+				includeMaxTokens: true,
+				modelMaxTokens: 2048,
+			})
+			await handler.completePrompt("ping")
+			expect(mockAnthropicCreate).toHaveBeenCalledWith(expect.objectContaining({ max_tokens: 2048 }))
+		})
+
+		it("completePrompt rethrows non-Error values unchanged from the Anthropic path", async () => {
+			mockAnthropicCreate.mockRejectedValue("not an error")
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			await expect(handler.completePrompt("ping")).rejects.toBe("not an error")
+		})
+
+		it("completePrompt returns an empty string when no text content is returned", async () => {
+			mockAnthropicCreate.mockResolvedValue({ content: [{ type: "tool_use", id: "x", name: "n", input: {} }] })
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			expect(await handler.completePrompt("ping")).toBe("")
+		})
+
+		it("omits tools and tool_choice from the Anthropic request when no tools are provided", async () => {
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+
+			for await (const _chunk of handler.createMessage("sys", messages)) {
+				void _chunk
+			}
+
+			const callArgs = mockAnthropicCreate.mock.calls[0][0] as Record<string, unknown>
+			// Disable-tools path: with no tools, neither field is sent so the
+			// gateway doesn't force a tool-use-only turn.
+			expect(callArgs.tools).toBeUndefined()
+			expect(callArgs.tool_choice).toBeUndefined()
+		})
+
+		it("includes tools and tool_choice in the Anthropic request when tools are provided", async () => {
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+			const tools = [
+				{
+					type: "function",
+					function: {
+						name: "read_file",
+						description: "read a file",
+						parameters: { type: "object", properties: {} },
+					},
+				},
+			]
+
+			for await (const _chunk of handler.createMessage("sys", messages, { tools })) {
+				void _chunk
+			}
+
+			const callArgs = mockAnthropicCreate.mock.calls[0][0] as Record<string, unknown>
+			expect(Array.isArray(callArgs.tools)).toBe(true)
+			expect((callArgs.tools as unknown[]).length).toBe(1)
+			expect(callArgs.tool_choice).toBeDefined()
+		})
+
+		it("skips cache-control breakpoints when the Anthropic-format model does not support prompt caching", async () => {
+			vitest.mocked(getModels).mockImplementationOnce(async () => ({
+				"qwen3.7-max": { ...opencodeGoModels["qwen3.7-max"], supportsPromptCache: false },
+			}))
+
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "first" },
+				{ role: "assistant", content: "ok" },
+				{ role: "user", content: "second" },
+			]
+
+			for await (const _chunk of handler.createMessage("sys", messages)) {
+				void _chunk
+			}
+
+			const callArgs = mockAnthropicCreate.mock.calls[0][0] as {
+				system: Array<{ cache_control?: unknown }>
+				messages: Array<{ cache_control?: unknown }>
+			}
+			expect(callArgs.system[0].cache_control).toBeUndefined()
+			expect(callArgs.messages.every((m) => m.cache_control === undefined)).toBe(true)
+		})
+
+		it("applies cache-control to the last block of array-content user messages", async () => {
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: [{ type: "text", text: "first" }] },
+				{ role: "assistant", content: "ok" },
+				{
+					role: "user",
+					content: [
+						{ type: "text", text: "part-a" },
+						{ type: "text", text: "part-b" },
+					],
+				},
+			]
+
+			for await (const _chunk of handler.createMessage("sys", messages)) {
+				void _chunk
+			}
+
+			const callArgs = mockAnthropicCreate.mock.calls[0][0] as { messages: Array<{ content: any }> }
+			const lastUserMsg = callArgs.messages[callArgs.messages.length - 1]
+			const blocks = lastUserMsg.content as any[]
+			// Only the final content block of the last user message is cached.
+			expect(blocks[blocks.length - 1].cache_control).toEqual({ type: "ephemeral" })
+			expect(blocks[0].cache_control).toBeUndefined()
+		})
+
+		it("leaves messages unchanged when there are no user messages to cache", async () => {
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "assistant", content: "only assistant" }]
+
+			for await (const _chunk of handler.createMessage("sys", messages)) {
+				void _chunk
+			}
+
+			const callArgs = mockAnthropicCreate.mock.calls[0][0] as {
+				messages: Array<{ cache_control?: unknown }>
+			}
+			expect(callArgs.messages.every((m) => m.cache_control === undefined)).toBe(true)
+		})
+
+		it("streams thinking content blocks and thinking deltas", async () => {
+			mockAnthropicCreate.mockImplementationOnce(async () => ({
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "message_start", message: { usage: { input_tokens: 5, output_tokens: 0 } } }
+					// index 0: thinking block (no leading newline at index 0).
+					yield {
+						type: "content_block_start",
+						index: 0,
+						content_block: { type: "thinking", thinking: "initial thought" },
+					}
+					yield {
+						type: "content_block_delta",
+						index: 0,
+						delta: { type: "thinking_delta", thinking: " more" },
+					}
+					// index 1: text block gets a leading newline separator.
+					yield { type: "content_block_start", index: 1, content_block: { type: "text", text: "" } }
+					yield { type: "content_block_delta", index: 1, delta: { type: "text_delta", text: "answer" } }
+					// index 2: a second thinking block also gets a newline separator.
+					yield {
+						type: "content_block_start",
+						index: 2,
+						content_block: { type: "thinking", thinking: "second thought" },
+					}
+					yield { type: "message_delta", usage: { output_tokens: 3 } }
+					yield { type: "message_stop" }
+				},
+			}))
+
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+
+			const chunks: any[] = []
+			for await (const chunk of handler.createMessage("sys", messages)) {
+				chunks.push(chunk)
+			}
+
+			// index 0 thinking block (no leading newline separator at index 0).
+			expect(chunks).toContainEqual({ type: "reasoning", text: "initial thought" })
+			expect(chunks).toContainEqual({ type: "reasoning", text: " more" })
+			// index 1 text block gets a leading newline separator.
+			expect(chunks).toContainEqual({ type: "text", text: "\n" })
+			expect(chunks).toContainEqual({ type: "text", text: "answer" })
+			// index 2 thinking block gets a leading newline separator.
+			expect(chunks).toContainEqual({ type: "reasoning", text: "\n" })
+			expect(chunks).toContainEqual({ type: "reasoning", text: "second thought" })
+		})
+
+		it("honors includeMaxTokens/modelMaxTokens override for the streaming Anthropic max_tokens", async () => {
+			const handler = new OpencodeGoHandler({
+				...anthropicOptions,
+				includeMaxTokens: true,
+				modelMaxTokens: 8192,
+			})
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+
+			for await (const _chunk of handler.createMessage("sys", messages)) {
+				void _chunk
+			}
+
+			expect(mockAnthropicCreate).toHaveBeenCalledWith(expect.objectContaining({ max_tokens: 8192 }))
+		})
+
+		it("falls back to the model max_tokens when includeMaxTokens is on but modelMaxTokens is unset", async () => {
+			const handler = new OpencodeGoHandler({ ...anthropicOptions, includeMaxTokens: true })
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+
+			for await (const _chunk of handler.createMessage("sys", messages)) {
+				void _chunk
+			}
+
+			// qwen3.7-max maxTokens (65_536) clamped to 20% of 1M context => 65_536.
+			expect(mockAnthropicCreate).toHaveBeenCalledWith(expect.objectContaining({ max_tokens: 65_536 }))
+		})
+
+		it("accumulates output tokens across message_delta events into the final cost", async () => {
+			mockAnthropicCreate.mockImplementationOnce(async () => ({
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "message_start", message: { usage: { input_tokens: 10, output_tokens: 0 } } }
+					yield { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } }
+					yield { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "hi" } }
+					yield { type: "message_delta", usage: { output_tokens: 4 } }
+					yield { type: "message_delta", usage: { output_tokens: 6 } }
+					yield { type: "message_stop" }
+				},
+			}))
+
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+
+			const chunks: any[] = []
+			for await (const chunk of handler.createMessage("sys", messages)) {
+				chunks.push(chunk)
+			}
+
+			const costChunk = chunks.find((c) => c.type === "usage" && c.totalCost !== undefined)
+			expect(costChunk).toBeDefined()
+			// qwen3.7-max: input $2.5/M, output $7.5/M. Accumulated output
+			// tokens (4 + 6 = 10) must feed the cost calc — without the
+			// accumulation fix this would only reflect the 10 input tokens
+			// (0.000025) instead of input + output (0.0001).
+			expect(costChunk.totalCost).toBeCloseTo((10 * 2.5 + 10 * 7.5) / 1_000_000, 10)
+		})
+
+		it("does not yield a cost chunk when the stream reports no token usage", async () => {
+			mockAnthropicCreate.mockImplementationOnce(async () => ({
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "message_start", message: { usage: { input_tokens: 0, output_tokens: 0 } } }
+					yield { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } }
+					yield { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "hi" } }
+					yield { type: "message_delta", usage: { output_tokens: 0 } }
+					yield { type: "message_stop" }
+				},
+			}))
+
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+
+			const chunks: any[] = []
+			for await (const chunk of handler.createMessage("sys", messages)) {
+				chunks.push(chunk)
+			}
+
+			expect(chunks.some((c) => c.type === "usage" && c.totalCost !== undefined)).toBe(false)
+		})
+
 		it("completePrompt wraps Anthropic errors with an Opencode Go-specific message", async () => {
 			mockAnthropicCreate.mockRejectedValue(new Error("boom"))
 			const handler = new OpencodeGoHandler(anthropicOptions)
diff --git a/src/api/providers/opencode-go.ts b/src/api/providers/opencode-go.ts
index 0bb8303d5d..973600c720 100644
--- a/src/api/providers/opencode-go.ts
+++ b/src/api/providers/opencode-go.ts
@@ -269,6 +269,12 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio
 				: { text: systemPrompt, type: "text" },
 		]
 
+		// Only attach tools/tool_choice when the caller actually provides
+		// tools — sending an empty tool list (or a tool_choice derived from an
+		// empty set) forces some Anthropic-compatible gateways into a
+		// tool-use-only mode and is wasteful for plain text turns.
+		const tools = metadata?.tools && metadata.tools.length > 0 ? metadata.tools : undefined
+
 		const requestParams: Anthropic.Messages.MessageCreateParams = {
 			model: modelId,
 			max_tokens:
@@ -281,8 +287,15 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio
 				? this.addAnthropicCacheControl(sanitizedMessages, cacheControl)
 				: sanitizedMessages,
 			stream: true,
-			tools: convertOpenAIToolsToAnthropic(metadata?.tools ?? []),
-			tool_choice: convertOpenAIToolChoiceToAnthropic(metadata?.tool_choice, metadata?.parallelToolCalls),
+			...(tools
+				? {
+						tools: convertOpenAIToolsToAnthropic(tools),
+						tool_choice: convertOpenAIToolChoiceToAnthropic(
+							metadata?.tool_choice,
+							metadata?.parallelToolCalls,
+						),
+					}
+				: {}),
 		}
 
 		const stream = await this.anthropicClient.messages.create(requestParams)
@@ -320,6 +333,12 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio
 				}
 				case "message_delta":
 					// Tells us stop_reason, stop_sequence, and output tokens.
+					// Anthropic streams the cumulative output token count in each
+					// message_delta (the final event carries the total), so
+					// accumulate it into the running total used for cost
+					// calculation — otherwise the final cost only reflects the
+					// (typically zero) message_start output tokens.
+					outputTokens += chunk.usage.output_tokens || 0
 					yield {
 						type: "usage",
 						inputTokens: 0,
@@ -460,7 +479,14 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio
 			try {
 				const message = await this.anthropicClient.messages.create({
 					model: modelId,
-					max_tokens: maxTokens ?? 16_384,
+					// Honour the same includeMaxTokens/modelMaxTokens override
+					// logic as the streaming path so non-streaming completions
+					// respect the user's max-output slider instead of always
+					// falling back to the model default.
+					max_tokens:
+						this.options.includeMaxTokens === true
+							? this.options.modelMaxTokens || maxTokens || 16_384
+							: (maxTokens ?? 16_384),
 					temperature: this.supportsTemperature(modelId) ? (temperature ?? 1.0) : undefined,
 					messages: [{ role: "user", content: prompt }],
 					stream: false,

From ecc266d2021f9e2b5d14487e615151df326b1ca0 Mon Sep 17 00:00:00 2001
From: Naved <naved.merchant@gmail.com>
Date: Fri, 19 Jun 2026 07:14:37 -0700
Subject: [PATCH 6/8] add opencode test

---
 .../types/src/__tests__/opencode-go.test.ts   | 124 ++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 packages/types/src/__tests__/opencode-go.test.ts

diff --git a/packages/types/src/__tests__/opencode-go.test.ts b/packages/types/src/__tests__/opencode-go.test.ts
new file mode 100644
index 0000000000..25d862c268
--- /dev/null
+++ b/packages/types/src/__tests__/opencode-go.test.ts
@@ -0,0 +1,124 @@
+import {
+	opencodeGoDefaultModelId,
+	opencodeGoDefaultModelInfo,
+	opencodeGoModels,
+	OPENCODE_GO_DEFAULT_TEMPERATURE,
+	OPENCODE_GO_ANTHROPIC_FORMAT_MODELS,
+	isOpencodeGoAnthropicFormatModel,
+	getOpencodeGoModelInfo,
+} from "../providers/opencode-go.js"
+
+describe("opencode-go registry", () => {
+	const anthropicFormatModels = [
+		"qwen3.7-max",
+		"qwen3.7-plus",
+		"qwen3.6-plus",
+		"minimax-m3",
+		"minimax-m2.7",
+		"minimax-m2.5",
+	]
+	const openaiFormatModels = [
+		"glm-5",
+		"glm-5.1",
+		"glm-5.2",
+		"kimi-k2.5",
+		"kimi-k2.6",
+		"mimo-v2.5",
+		"mimo-v2.5-pro",
+		"deepseek-v4-pro",
+		"deepseek-v4-flash",
+	]
+
+	describe("isOpencodeGoAnthropicFormatModel", () => {
+		it("classifies Qwen and MiniMax models as Anthropic-format", () => {
+			for (const id of anthropicFormatModels) {
+				expect(isOpencodeGoAnthropicFormatModel(id)).toBe(true)
+			}
+		})
+
+		it("classifies GLM/Kimi/MiMo/DeepSeek models as OpenAI-compatible", () => {
+			for (const id of openaiFormatModels) {
+				expect(isOpencodeGoAnthropicFormatModel(id)).toBe(false)
+			}
+		})
+
+		it("defaults unknown model IDs to the OpenAI-compatible format", () => {
+			expect(isOpencodeGoAnthropicFormatModel("some-future-model")).toBe(false)
+			expect(isOpencodeGoAnthropicFormatModel("")).toBe(false)
+		})
+	})
+
+	describe("getOpencodeGoModelInfo", () => {
+		it("returns the native ModelInfo for a curated model", () => {
+			const info = getOpencodeGoModelInfo("qwen3.7-max")
+			expect(info).toBeDefined()
+			expect(info?.maxTokens).toBe(65_536)
+			expect(info?.contextWindow).toBe(1_000_000)
+			expect(info?.supportsPromptCache).toBe(true)
+		})
+
+		it("returns undefined for an unknown model ID", () => {
+			expect(getOpencodeGoModelInfo("not-a-real-go-model")).toBeUndefined()
+		})
+	})
+
+	describe("OPENCODE_GO_ANTHROPIC_FORMAT_MODELS", () => {
+		it("contains exactly the Qwen and MiniMax models", () => {
+			expect([...OPENCODE_GO_ANTHROPIC_FORMAT_MODELS].sort()).toEqual([...anthropicFormatModels].sort())
+		})
+
+		// The PR description calls out that the format-routing set must stay in
+		// sync with the Go model table — every routed model must have a native
+		// registry entry so capability flags and pricing resolve correctly.
+		it("every Anthropic-format model has a native registry entry", () => {
+			for (const id of OPENCODE_GO_ANTHROPIC_FORMAT_MODELS) {
+				expect(opencodeGoModels[id]).toBeDefined()
+			}
+		})
+	})
+
+	describe("opencodeGoModels registry invariants", () => {
+		it("every entry has a positive maxTokens and contextWindow", () => {
+			for (const [id, info] of Object.entries(opencodeGoModels)) {
+				expect(info.maxTokens).toBeGreaterThan(0)
+				expect(info.contextWindow).toBeGreaterThan(0)
+				// Sanity: max output must not exceed the context window.
+				expect(info.maxTokens).toBeLessThanOrEqual(info.contextWindow)
+				void id
+			}
+		})
+
+		it("every entry declares supportsImages", () => {
+			for (const info of Object.values(opencodeGoModels)) {
+				expect(typeof info.supportsImages).toBe("boolean")
+			}
+		})
+
+		it("models with an array supportsReasoningEffort expose a non-empty allow-list", () => {
+			for (const info of Object.values(opencodeGoModels)) {
+				if (Array.isArray(info.supportsReasoningEffort)) {
+					expect(info.supportsReasoningEffort.length).toBeGreaterThan(0)
+				}
+			}
+		})
+	})
+
+	describe("defaults", () => {
+		it("the default model id is a curated OpenAI-compatible model", () => {
+			expect(opencodeGoDefaultModelId).toBe("glm-5.2")
+			expect(opencodeGoModels[opencodeGoDefaultModelId]).toBeDefined()
+			expect(isOpencodeGoAnthropicFormatModel(opencodeGoDefaultModelId)).toBe(false)
+		})
+
+		it("exposes a fully-populated default ModelInfo fallback", () => {
+			expect(opencodeGoDefaultModelInfo.maxTokens).toBeGreaterThan(0)
+			expect(opencodeGoDefaultModelInfo.contextWindow).toBeGreaterThan(0)
+			expect(opencodeGoDefaultModelInfo.supportsPromptCache).toBe(false)
+			expect(opencodeGoDefaultModelInfo.description).toBeTruthy()
+		})
+
+		it("exposes a deterministic default temperature", () => {
+			expect(OPENCODE_GO_DEFAULT_TEMPERATURE).toBe(0)
+		})
+	})
+})

From 3545e9ed1aba77d31adde446eb631e0fb1a9c0d6 Mon Sep 17 00:00:00 2001
From: Naved <naved.merchant@gmail.com>
Date: Fri, 19 Jun 2026 07:17:49 -0700
Subject: [PATCH 7/8] fix typescript error

---
 src/api/providers/__tests__/opencode-go.spec.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/api/providers/__tests__/opencode-go.spec.ts b/src/api/providers/__tests__/opencode-go.spec.ts
index fa780f325e..6dbab180a1 100644
--- a/src/api/providers/__tests__/opencode-go.spec.ts
+++ b/src/api/providers/__tests__/opencode-go.spec.ts
@@ -606,7 +606,7 @@ describe("OpencodeGoHandler", () => {
 		it("includes tools and tool_choice in the Anthropic request when tools are provided", async () => {
 			const handler = new OpencodeGoHandler(anthropicOptions)
 			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
-			const tools = [
+			const tools: OpenAI.Chat.ChatCompletionTool[] = [
 				{
 					type: "function",
 					function: {
@@ -617,7 +617,7 @@ describe("OpencodeGoHandler", () => {
 				},
 			]
 
-			for await (const _chunk of handler.createMessage("sys", messages, { tools })) {
+			for await (const _chunk of handler.createMessage("sys", messages, { taskId: "test-task", tools })) {
 				void _chunk
 			}
 

From fd85efa414f16f134d0d2e221eb670ed1681cd3a Mon Sep 17 00:00:00 2001
From: Naved <naved.merchant@gmail.com>
Date: Fri, 19 Jun 2026 07:27:55 -0700
Subject: [PATCH 8/8] fix(opencode-go): address PR #652 review feedback

- Type streamAnthropicMessage's info param as ModelInfo and drop the
  force-cast so calculateApiCostAnthropic can no longer silently return
  /bin/sh when pricing fields are absent.
- Wrap pre-stream Anthropic-format errors (401/429/network) with the
  'Opencode Go completion error:' prefix for consistency with completePrompt.
- Clarify the registry doc: supportsPromptCache controls client-side
  cache_control injection (Anthropic path) only; OA-compat models price
  server-side cached_tokens via cacheReadsPrice regardless of the flag
  (MiMo stays false, matching the dedicated mimo provider).
- Add cacheWritesPrice (0.375) to minimax-m3 so its cache writes are
  billed, matching M2.5/M2.7.
- Add supportsMaxTokens to DeepSeek V4 models so the max-output slider
  is exposed like GLM.
- Strengthen the streaming cost test to assert totalCost > 0, and add
  registry invariants for MiniMax cache-write pricing and DeepSeek
  supportsMaxTokens plus a streaming error-wrapping test.
---
 .changeset/add-glm-5-2-support.md             |  2 ++
 .../types/src/__tests__/opencode-go.test.ts   | 20 ++++++++++++
 packages/types/src/providers/opencode-go.ts   | 32 ++++++++++++++++---
 .../providers/__tests__/opencode-go.spec.ts   | 23 +++++++++++--
 src/api/providers/opencode-go.ts              | 19 +++++++++--
 5 files changed, 87 insertions(+), 9 deletions(-)

diff --git a/.changeset/add-glm-5-2-support.md b/.changeset/add-glm-5-2-support.md
index 26bd162cdf..323def2089 100644
--- a/.changeset/add-glm-5-2-support.md
+++ b/.changeset/add-glm-5-2-support.md
@@ -3,3 +3,5 @@
 ---
 
 Add GLM-5.2 support with High/Max `reasoning_effort` tiers. The default effort is High (deep reasoning stays opt-in), Max is selected only when the user explicitly picks it, and the parameter is omitted entirely when reasoning is disabled.
+
+Also refines the Opencode Go provider per review: bill MiniMax M3 cache writes (`cacheWritesPrice`), expose the max-output slider for DeepSeek V4 models (`supportsMaxTokens`), wrap pre-stream Anthropic-format errors with the provider prefix, and type the Anthropic streaming path's model info as `ModelInfo` so cost calculation can no longer silently return `$0`.
diff --git a/packages/types/src/__tests__/opencode-go.test.ts b/packages/types/src/__tests__/opencode-go.test.ts
index 25d862c268..617c88675c 100644
--- a/packages/types/src/__tests__/opencode-go.test.ts
+++ b/packages/types/src/__tests__/opencode-go.test.ts
@@ -101,6 +101,26 @@ describe("opencode-go registry", () => {
 				}
 			}
 		})
+
+		it("every Anthropic-format model with prompt-cache injection declares a cacheWritesPrice", () => {
+			// MiniMax/Qwen route through /v1/messages with client-side
+			// cache_control breakpoints, so cache_creation_input_tokens are
+			// reported and billed — each must carry a cacheWritesPrice or the
+			// write cost is silently reported as $0.
+			for (const id of OPENCODE_GO_ANTHROPIC_FORMAT_MODELS) {
+				const info = getOpencodeGoModelInfo(id)
+				expect(info).toBeDefined()
+				if (info?.supportsPromptCache) {
+					expect(info.cacheWritesPrice).toBeDefined()
+					expect(info.cacheReadsPrice).toBeDefined()
+				}
+			}
+		})
+
+		it("DeepSeek entries expose supportsMaxTokens so the max-output slider is available", () => {
+			expect(getOpencodeGoModelInfo("deepseek-v4-pro")?.supportsMaxTokens).toBe(true)
+			expect(getOpencodeGoModelInfo("deepseek-v4-flash")?.supportsMaxTokens).toBe(true)
+		})
 	})
 
 	describe("defaults", () => {
diff --git a/packages/types/src/providers/opencode-go.ts b/packages/types/src/providers/opencode-go.ts
index 7a646b6236..5b1e0ada8e 100644
--- a/packages/types/src/providers/opencode-go.ts
+++ b/packages/types/src/providers/opencode-go.ts
@@ -39,10 +39,21 @@ export const OPENCODE_GO_DEFAULT_TEMPERATURE = 0
  * max-token values stay in sync with the gateway while capability flags and
  * pricing remain correct.
  *
- * `supportsPromptCache` is intentionally `true` for models whose Go pricing
- * table lists a "Cached Read" price: the gateway honours server-side caching
- * and reports `cached_tokens` in usage, which the handler forwards for cost
- * calculation. Client-side `cache_control` injection is not used on this path.
+ * `supportsPromptCache` has two distinct meanings depending on the wire format:
+ *
+ *   - Anthropic-format models (Qwen/MiniMax): `true` enables client-side
+ *     `cache_control` breakpoint injection in the handler's `/v1/messages`
+ *     path. The gateway then reports `cache_creation_input_tokens` /
+ *     `cache_read_input_tokens`, which are priced via `cacheWritesPrice` /
+ *     `cacheReadsPrice`.
+ *   - OpenAI-compatible models (GLM/Kimi/DeepSeek/MiMo): there is no
+ *     client-side `cache_control` concept, so the flag is NOT used to build
+ *     the request. The gateway performs server-side caching and reports
+ *     `cached_tokens` in `prompt_tokens_details`, which the handler forwards
+ *     as `cacheReadTokens` and prices via `cacheReadsPrice` regardless of the
+ *     flag. MiMo therefore declares `supportsPromptCache: false` (no
+ *     client-side injection, matching the dedicated `mimo` provider) while
+ *     still carrying a `cacheReadsPrice` for its server-side cache reads.
  */
 export const opencodeGoModels: Record<string, ModelInfo> = {
 	// --- Zhipu GLM ---
@@ -199,6 +210,12 @@ export const opencodeGoModels: Record<string, ModelInfo> = {
 		preserveReasoning: true,
 		inputPrice: 0.3,
 		outputPrice: 1.2,
+		// M3 routes through the Anthropic Messages path with client-side
+		// cache_control injection active, so cache_creation_input_tokens are
+		// reported and billed. Matches the MiniMax write price shared by
+		// M2.5/M2.7 (same vendor/pricing tier: $0.3 in / $1.2 out / $0.06
+		// cache read).
+		cacheWritesPrice: 0.375,
 		cacheReadsPrice: 0.06,
 		description:
 			"MiniMax M3, a frontier multimodal coding model with a 1M context window, agentic reasoning, and tool use. Available via the Opencode Go plan.",
@@ -265,6 +282,12 @@ export const opencodeGoModels: Record<string, ModelInfo> = {
 		contextWindow: 1_000_000,
 		supportsImages: false,
 		supportsPromptCache: true,
+		// DeepSeek advertises a large, explicit max-output ceiling (384k), so
+		// expose the configurable max-output slider like GLM. Without this the
+		// slider is hidden and the effective default is the 20% context-window
+		// clamp (200k); with it, users can raise the budget up to the model's
+		// 384k ceiling.
+		supportsMaxTokens: true,
 		supportsReasoningEffort: ["disable", "low", "medium", "high", "xhigh"],
 		preserveReasoning: true,
 		reasoningEffort: "high",
@@ -279,6 +302,7 @@ export const opencodeGoModels: Record<string, ModelInfo> = {
 		contextWindow: 1_000_000,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsMaxTokens: true,
 		supportsReasoningEffort: ["disable", "low", "medium", "high", "xhigh"],
 		preserveReasoning: true,
 		reasoningEffort: "high",
diff --git a/src/api/providers/__tests__/opencode-go.spec.ts b/src/api/providers/__tests__/opencode-go.spec.ts
index 6dbab180a1..31ce34fe7f 100644
--- a/src/api/providers/__tests__/opencode-go.spec.ts
+++ b/src/api/providers/__tests__/opencode-go.spec.ts
@@ -516,8 +516,13 @@ describe("OpencodeGoHandler", () => {
 			})
 			// ... message_delta output tokens ...
 			expect(chunks).toContainEqual({ type: "usage", inputTokens: 0, outputTokens: 5 })
-			// ... and a final cost chunk.
-			expect(chunks.some((c) => c.type === "usage" && c.totalCost !== undefined)).toBe(true)
+			// ... and a final cost chunk. Assert totalCost > 0 (not just
+			// defined) so CI catches the output-token accumulation regression —
+			// without accumulation the cost would be computed from
+			// outputTokens: 0 and report ~$0.
+			expect(chunks.some((c) => c.type === "usage" && typeof c.totalCost === "number" && c.totalCost > 0)).toBe(
+				true,
+			)
 		})
 
 		it("applies cache-control breakpoints when the model supports prompt caching", async () => {
@@ -822,6 +827,20 @@ describe("OpencodeGoHandler", () => {
 			const handler = new OpencodeGoHandler(anthropicOptions)
 			await expect(handler.completePrompt("ping")).rejects.toThrow("Opencode Go completion error: boom")
 		})
+
+		it("wraps pre-stream Anthropic errors from createMessage with an Opencode Go-specific message", async () => {
+			// Pre-stream failures (401, 429, network) reject the create() call
+			// before any chunk is emitted; they must be wrapped consistently
+			// with completePrompt rather than propagating raw.
+			mockAnthropicCreate.mockRejectedValue(new Error("rate limited"))
+			const handler = new OpencodeGoHandler(anthropicOptions)
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+			await expect(async () => {
+				for await (const _chunk of handler.createMessage("sys", messages)) {
+					void _chunk
+				}
+			}).rejects.toThrow("Opencode Go completion error: rate limited")
+		})
 	})
 
 	describe("isOpencodeGoAnthropicFormatModel", () => {
diff --git a/src/api/providers/opencode-go.ts b/src/api/providers/opencode-go.ts
index 973600c720..27d8ab3f7e 100644
--- a/src/api/providers/opencode-go.ts
+++ b/src/api/providers/opencode-go.ts
@@ -3,6 +3,7 @@ import { CacheControlEphemeral } from "@anthropic-ai/sdk/resources"
 import OpenAI from "openai"
 
 import {
+	type ModelInfo,
 	opencodeGoDefaultModelId,
 	opencodeGoDefaultModelInfo,
 	OPENCODE_GO_DEFAULT_TEMPERATURE,
@@ -249,7 +250,7 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio
 	 */
 	private async *streamAnthropicMessage(
 		modelId: string,
-		info: { supportsPromptCache?: boolean },
+		info: ModelInfo,
 		temperature: number | undefined,
 		maxTokens: number | undefined,
 		systemPrompt: string,
@@ -298,7 +299,19 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio
 				: {}),
 		}
 
-		const stream = await this.anthropicClient.messages.create(requestParams)
+		// Wrap pre-stream errors (401, 429, network) with the same
+		// "Opencode Go completion error:" prefix used by completePrompt so the
+		// Anthropic-format path surfaces failures consistently. Mid-stream
+		// errors propagate unchanged, matching the OpenAI streaming path.
+		let stream
+		try {
+			stream = await this.anthropicClient.messages.create(requestParams)
+		} catch (error) {
+			if (error instanceof Error) {
+				throw new Error(`Opencode Go completion error: ${error.message}`)
+			}
+			throw error
+		}
 
 		let inputTokens = 0
 		let outputTokens = 0
@@ -411,7 +424,7 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio
 		// Calculate and yield final cost
 		if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) {
 			const { totalCost } = calculateApiCostAnthropic(
-				info as Parameters<typeof calculateApiCostAnthropic>[0],
+				info,
 				inputTokens,
 				outputTokens,
 				cacheWriteTokens,