From c25488559ef1bbb61d64441d3ccec54ae6985b8e Mon Sep 17 00:00:00 2001 From: Naved Date: Thu, 18 Jun 2026 22:36:33 -0700 Subject: [PATCH 1/8] opencode go --- packages/types/src/providers/opencode-go.ts | 278 ++++++++++++++++++ .../providers/__tests__/opencode-go.spec.ts | 80 ++++- .../fetchers/__tests__/opencode-go.spec.ts | 88 +++++- src/api/providers/fetchers/opencode-go.ts | 60 +++- src/api/providers/opencode-go.ts | 68 ++++- 5 files changed, 533 insertions(+), 41 deletions(-) diff --git a/packages/types/src/providers/opencode-go.ts b/packages/types/src/providers/opencode-go.ts index 0efabcf155..fc165c3362 100644 --- a/packages/types/src/providers/opencode-go.ts +++ b/packages/types/src/providers/opencode-go.ts @@ -20,3 +20,281 @@ export const opencodeGoDefaultModelInfo: ModelInfo = { } export const OPENCODE_GO_DEFAULT_TEMPERATURE = 0 + +/** + * Native per-model configuration for the Opencode Go plan. + * + * The Go `/v1/models` endpoint only reliably returns `id` and (sometimes) + * `context_window`/`max_tokens`. It does NOT advertise capability flags such + * as `supportsReasoningEffort`, `preserveReasoning`, `supportsMaxTokens`, + * `supportsPromptCache`, or pricing — all of which are required for the + * extension to drive reasoning controls, interleaved-thinking tool calls, + * the max-output-tokens slider, and accurate cost reporting. + * + * This registry encodes the native capabilities of each curated Go model, + * sourced from the same vendor specs used by the dedicated providers + * (zai/moonshot/mimo/minimax/deepseek/qwen) and the Go pricing table at + * https://opencode.ai/docs/go/#usage-limits. The fetcher merges the live + * `/models` payload on top of these defaults so that context-window and + * max-token values stay in sync with the gateway while capability flags and + * pricing remain correct. + * + * `supportsPromptCache` is intentionally `true` for models whose Go pricing + * table lists a "Cached Read" price: the gateway honours server-side caching + * and reports `cached_tokens` in usage, which the handler forwards for cost + * calculation. Client-side `cache_control` injection is not used on this path. + */ +export const opencodeGoModels: Record = { + // --- Zhipu GLM --- + "glm-5": { + maxTokens: 16_384, + contextWindow: 202_752, + supportsImages: false, + supportsPromptCache: true, + supportsReasoningEffort: ["disable", "medium"], + reasoningEffort: "medium", + preserveReasoning: true, + inputPrice: 1.0, + outputPrice: 3.2, + cacheReadsPrice: 0.2, + description: + "GLM-5 is Zhipu's next-generation model with a 202k context window and built-in thinking capabilities. Available via the Opencode Go plan.", + }, + "glm-5.1": { + maxTokens: 131_072, + contextWindow: 204_800, + supportsImages: false, + supportsPromptCache: true, + supportsMaxTokens: true, + supportsReasoningEffort: ["disable", "medium"], + reasoningEffort: "medium", + preserveReasoning: true, + inputPrice: 1.4, + outputPrice: 4.4, + cacheReadsPrice: 0.26, + description: + "GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. Available via the Opencode Go plan.", + }, + "glm-5.2": { + maxTokens: 131_072, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + supportsMaxTokens: true, + supportsReasoningEffort: ["disable", "high", "max"], + reasoningEffort: "high", + preserveReasoning: true, + // Go pricing matches GLM-5.1 ($1.4 / $0.26 cache / $4.4 out per 1M tokens). + inputPrice: 1.4, + outputPrice: 4.4, + cacheReadsPrice: 0.26, + description: + "GLM-5.2 is Zhipu's flagship model with a 1M context window, 128k max output, and dual thinking-effort modes (High/Max). It delivers top-tier long-context reasoning, coding, and agentic performance. Available via the Opencode Go plan.", + }, + + // --- Moonshot Kimi --- + "kimi-k2.5": { + maxTokens: 16_384, + contextWindow: 262_144, + supportsImages: false, + supportsPromptCache: true, + supportsTemperature: true, + defaultTemperature: 1.0, + inputPrice: 0.6, + outputPrice: 3.0, + cacheReadsPrice: 0.1, + description: + "Kimi K2.5 is the latest generation of Moonshot AI's Kimi series, featuring improved reasoning capabilities. Available via the Opencode Go plan.", + }, + "kimi-k2.6": { + maxTokens: 16_384, + contextWindow: 262_144, + supportsImages: false, + supportsPromptCache: true, + supportsTemperature: true, + defaultTemperature: 1.0, + inputPrice: 0.95, + outputPrice: 4.0, + cacheReadsPrice: 0.16, + description: + "Kimi K2.6 is Moonshot AI's native multimodal agentic MoE model with a 256k context window, built for long-horizon coding and tool use. Available via the Opencode Go plan.", + }, + + // --- Xiaomi MiMo --- + "mimo-v2.5": { + maxTokens: 131_072, + contextWindow: 1_048_576, + supportsImages: true, + supportsPromptCache: false, + preserveReasoning: true, + inputPrice: 0.14, + outputPrice: 0.28, + cacheReadsPrice: 0.0028, + longContextPricing: { + thresholdTokens: 256_000, + inputPriceMultiplier: 2, + outputPriceMultiplier: 2, + cacheReadsPriceMultiplier: 2, + }, + description: + "MiMo V2.5 - Xiaomi's full-modal understanding model (text, image, audio, video) with 1M context, deep thinking, and tool calling. Available via the Opencode Go plan.", + }, + "mimo-v2.5-pro": { + maxTokens: 131_072, + contextWindow: 1_048_576, + supportsImages: false, + supportsPromptCache: false, + preserveReasoning: true, + inputPrice: 1.74, + outputPrice: 3.48, + cacheReadsPrice: 0.0145, + longContextPricing: { + thresholdTokens: 256_000, + inputPriceMultiplier: 2, + outputPriceMultiplier: 2, + cacheReadsPriceMultiplier: 2, + }, + description: + "MiMo V2.5 Pro - Xiaomi's flagship reasoning model with 1M context, deep thinking, and tool calling. Available via the Opencode Go plan.", + }, + + // --- MiniMax --- + "minimax-m2.5": { + maxTokens: 16_384, + contextWindow: 204_800, + supportsImages: false, + supportsPromptCache: true, + includedTools: ["search_and_replace"], + excludedTools: ["apply_diff"], + preserveReasoning: true, + inputPrice: 0.3, + outputPrice: 1.2, + cacheWritesPrice: 0.375, + cacheReadsPrice: 0.06, + description: + "MiniMax M2.5, the latest MiniMax model with enhanced coding and agentic capabilities. Available via the Opencode Go plan.", + }, + "minimax-m2.7": { + maxTokens: 16_384, + contextWindow: 204_800, + supportsImages: false, + supportsPromptCache: true, + includedTools: ["search_and_replace"], + excludedTools: ["apply_diff"], + preserveReasoning: true, + inputPrice: 0.3, + outputPrice: 1.2, + cacheWritesPrice: 0.375, + cacheReadsPrice: 0.06, + description: + "MiniMax M2.7, the latest MiniMax model with recursive self-improvement capabilities. Available via the Opencode Go plan.", + }, + "minimax-m3": { + maxTokens: 131_072, + contextWindow: 1_000_000, + supportsImages: true, + supportsPromptCache: true, + includedTools: ["search_and_replace"], + excludedTools: ["apply_diff"], + preserveReasoning: true, + inputPrice: 0.3, + outputPrice: 1.2, + cacheReadsPrice: 0.06, + description: + "MiniMax M3, a frontier multimodal coding model with a 1M context window, agentic reasoning, and tool use. Available via the Opencode Go plan.", + }, + + // --- Alibaba Qwen --- + "qwen3.6-plus": { + maxTokens: 65_536, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + preserveReasoning: true, + inputPrice: 0.5, + outputPrice: 3.0, + cacheReadsPrice: 0.05, + cacheWritesPrice: 0.625, + longContextPricing: { + thresholdTokens: 256_000, + inputPriceMultiplier: 4, + outputPriceMultiplier: 2, + cacheReadsPriceMultiplier: 4, + cacheWritesPriceMultiplier: 4, + }, + description: + "Qwen3.6 Plus - Alibaba's balanced coding and reasoning model with a 1M context window. Available via the Opencode Go plan.", + }, + "qwen3.7-plus": { + maxTokens: 65_536, + contextWindow: 1_000_000, + supportsImages: true, + supportsPromptCache: true, + preserveReasoning: true, + inputPrice: 0.4, + outputPrice: 1.6, + cacheReadsPrice: 0.04, + cacheWritesPrice: 0.5, + longContextPricing: { + thresholdTokens: 256_000, + inputPriceMultiplier: 3, + outputPriceMultiplier: 3, + cacheReadsPriceMultiplier: 3, + cacheWritesPriceMultiplier: 3, + }, + description: + "Qwen3.7 Plus - Alibaba's multimodal reasoning model with a 1M context window and low-cost agentic coding. Available via the Opencode Go plan.", + }, + "qwen3.7-max": { + maxTokens: 65_536, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + preserveReasoning: true, + inputPrice: 2.5, + outputPrice: 7.5, + cacheReadsPrice: 0.5, + cacheWritesPrice: 3.125, + description: + "Qwen3.7 Max - Alibaba's flagship text-only reasoning agent model with a 1M context window, designed for long-horizon agent workflows. Available via the Opencode Go plan.", + }, + + // --- DeepSeek --- + "deepseek-v4-pro": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + supportsReasoningEffort: ["disable", "low", "medium", "high", "xhigh"], + preserveReasoning: true, + reasoningEffort: "high", + inputPrice: 1.74, + outputPrice: 3.48, + cacheReadsPrice: 0.0145, + description: + "DeepSeek-V4-Pro is DeepSeek's strongest V4 model for reasoning, coding, long-context, and agentic workloads. Available via the Opencode Go plan.", + }, + "deepseek-v4-flash": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + supportsReasoningEffort: ["disable", "low", "medium", "high", "xhigh"], + preserveReasoning: true, + reasoningEffort: "high", + inputPrice: 0.14, + outputPrice: 0.28, + cacheReadsPrice: 0.0028, + description: + "DeepSeek-V4-Flash is DeepSeek's fast, cost-efficient V4 model supporting thinking and non-thinking modes. Available via the Opencode Go plan.", + }, +} + +/** + * Returns the native {@link ModelInfo} for a Go-plan model ID, or `undefined` + * when the ID is not part of the curated registry. Callers should fall back to + * {@link opencodeGoDefaultModelInfo} when this returns `undefined`. + */ +export function getOpencodeGoModelInfo(modelId: string): ModelInfo | undefined { + return opencodeGoModels[modelId] +} diff --git a/src/api/providers/__tests__/opencode-go.spec.ts b/src/api/providers/__tests__/opencode-go.spec.ts index 2877abd36b..3d7cfdda09 100644 --- a/src/api/providers/__tests__/opencode-go.spec.ts +++ b/src/api/providers/__tests__/opencode-go.spec.ts @@ -6,7 +6,7 @@ vitest.mock("vscode", () => ({})) import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" -import { opencodeGoDefaultModelId } from "@roo-code/types" +import { opencodeGoDefaultModelId, opencodeGoModels } from "@roo-code/types" import { OpencodeGoHandler } from "../opencode-go" import { ApiHandlerOptions } from "../../../shared/api" @@ -20,13 +20,9 @@ vitest.mock("delay", () => ({ vitest.mock("../fetchers/modelCache", () => ({ getModels: vitest.fn().mockImplementation(function () { return Promise.resolve({ - "glm-5.1": { - maxTokens: 32768, - contextWindow: 200000, - supportsImages: false, - supportsPromptCache: false, - description: "GLM 5.1", - }, + // Use the native registry entry so capability flags (reasoning + // effort, preserveReasoning, prompt cache) are exercised. + "glm-5.1": { ...opencodeGoModels["glm-5.1"] }, }) }), getModelsFromCache: vitest.fn().mockReturnValue(undefined), @@ -63,13 +59,17 @@ describe("OpencodeGoHandler", () => { }) describe("fetchModel", () => { - it("returns the configured model info", async () => { + it("returns the configured model info with native capability flags", async () => { const handler = new OpencodeGoHandler(mockOptions) const result = await handler.fetchModel() expect(result.id).toBe("glm-5.1") - expect(result.info.maxTokens).toBe(32768) - expect(result.info.contextWindow).toBe(200000) - expect(result.info.supportsPromptCache).toBe(false) + // Native registry values for glm-5.1. + expect(result.info.maxTokens).toBe(131_072) + expect(result.info.contextWindow).toBe(204_800) + expect(result.info.supportsPromptCache).toBe(true) + expect(result.info.supportsReasoningEffort).toEqual(["disable", "medium"]) + expect(result.info.preserveReasoning).toBe(true) + expect(result.info.supportsMaxTokens).toBe(true) }) it("falls back to the default model id when none is configured", async () => { @@ -141,7 +141,7 @@ describe("OpencodeGoHandler", () => { }) }) - it("requests a streaming completion with usage included", async () => { + it("requests a streaming completion with usage included and native max tokens", async () => { const handler = new OpencodeGoHandler(mockOptions) const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] for await (const _chunk of handler.createMessage("sys", messages)) { @@ -153,12 +153,60 @@ describe("OpencodeGoHandler", () => { model: "glm-5.1", stream: true, stream_options: { include_usage: true }, - max_completion_tokens: 32768, + // glm-5.1 maxTokens (131_072) is clamped to 20% of its 204_800 + // context window => 40_960. + max_completion_tokens: 40_960, temperature: expect.any(Number), }), ) }) + it("forwards the model's default reasoning_effort for reasoning-capable models", async () => { + const handler = new OpencodeGoHandler(mockOptions) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] + for await (const _chunk of handler.createMessage("sys", messages)) { + void _chunk // drain + } + + // glm-5.1 advertises supportsReasoningEffort with a default of "medium". + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-5.1", + reasoning_effort: "medium", + }), + ) + }) + + it("omits reasoning_effort when the user disables reasoning", async () => { + const handler = new OpencodeGoHandler({ ...mockOptions, reasoningEffort: "disable" }) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] + for await (const _chunk of handler.createMessage("sys", messages)) { + void _chunk // drain + } + + const callArgs = mockCreate.mock.calls[0][0] as Record + expect(callArgs.reasoning_effort).toBeUndefined() + }) + + it("uses convertToR1Format for preserveReasoning models to keep interleaved thinking", async () => { + const handler = new OpencodeGoHandler(mockOptions) + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [{ type: "text", text: "Hi" }], + }, + ] + for await (const _chunk of handler.createMessage("sys", messages)) { + void _chunk // drain + } + + const callArgs = mockCreate.mock.calls[0][0] as { messages: Array<{ role: string }> } + // The system prompt is prepended, then the R1-converted user message. + expect(callArgs.messages[0]).toEqual({ role: "system", content: "sys" }) + // convertToR1Format keeps a single user turn as one user message. + expect(callArgs.messages.filter((m) => m.role === "user")).toHaveLength(1) + }) + it("streams reasoning chunks from delta.reasoning_content", async () => { mockCreate.mockImplementationOnce(async () => ({ [Symbol.asyncIterator]: async function* () { @@ -247,7 +295,9 @@ describe("OpencodeGoHandler", () => { expect.objectContaining({ model: "glm-5.1", stream: false, - max_completion_tokens: 32768, + // glm-5.1 maxTokens (131_072) clamped to 20% of 204_800 => 40_960. + max_completion_tokens: 40_960, + reasoning_effort: "medium", }), ) }) diff --git a/src/api/providers/fetchers/__tests__/opencode-go.spec.ts b/src/api/providers/fetchers/__tests__/opencode-go.spec.ts index 811d09b498..c8607db4d3 100644 --- a/src/api/providers/fetchers/__tests__/opencode-go.spec.ts +++ b/src/api/providers/fetchers/__tests__/opencode-go.spec.ts @@ -2,7 +2,7 @@ import axios from "axios" -import { opencodeGoDefaultModelInfo } from "@roo-code/types" +import { opencodeGoDefaultModelInfo, opencodeGoModels, getOpencodeGoModelInfo } from "@roo-code/types" import { getOpencodeGoModels, parseOpencodeGoModel } from "../opencode-go" @@ -39,21 +39,48 @@ describe("Opencode Go Fetchers", () => { }) expect(Object.keys(models).sort()).toEqual(["deepseek-v4-pro", "glm-5.1"]) + // Live endpoint values override the native registry for volatile fields, + // while capability flags and pricing come from the native registry. expect(models["glm-5.1"]).toMatchObject({ contextWindow: 202752, maxTokens: 32768, - supportsPromptCache: false, + supportsPromptCache: true, + supportsReasoningEffort: ["disable", "medium"], + preserveReasoning: true, description: "Zhipu GLM 5.1", }) expect(models["deepseek-v4-pro"].contextWindow).toBe(1048576) + expect(models["deepseek-v4-pro"].supportsReasoningEffort).toEqual([ + "disable", + "low", + "medium", + "high", + "xhigh", + ]) }) - it("falls back to default context/max tokens when metadata is absent", async () => { + it("uses native registry config for a curated model when metadata is absent", async () => { mockedAxios.get.mockResolvedValue({ data: { data: [{ id: "kimi-k2.6" }] } }) const models = await getOpencodeGoModels("k") + // kimi-k2.6 is curated, so it gets its native context/max tokens and + // capability flags rather than the generic default fallback. expect(models["kimi-k2.6"]).toMatchObject({ + contextWindow: 262_144, + maxTokens: 16_384, + supportsPromptCache: true, + supportsTemperature: true, + defaultTemperature: 1.0, + }) + }) + + it("falls back to default context/max tokens for an unknown model when metadata is absent", async () => { + mockedAxios.get.mockResolvedValue({ data: { data: [{ id: "some-unknown-model" }] } }) + + const models = await getOpencodeGoModels("k") + + expect(models["some-unknown-model"]).toMatchObject({ contextWindow: opencodeGoDefaultModelInfo.contextWindow, maxTokens: opencodeGoDefaultModelInfo.maxTokens, supportsPromptCache: false, @@ -94,11 +121,64 @@ describe("Opencode Go Fetchers", () => { }) describe("parseOpencodeGoModel", () => { - it("treats a model with no cache pricing as not cache-capable", () => { + it("merges live endpoint values over the native registry for a curated model", () => { + const info = parseOpencodeGoModel({ id: "glm-5.1", context_window: 150000, max_output_tokens: 8000 }) + // Live values win for volatile fields. + expect(info.contextWindow).toBe(150000) + expect(info.maxTokens).toBe(8000) + // Capability flags and pricing come from the native registry. + expect(info.supportsPromptCache).toBe(true) + expect(info.supportsMaxTokens).toBe(true) + expect(info.supportsReasoningEffort).toEqual(["disable", "medium"]) + expect(info.preserveReasoning).toBe(true) + expect(info.inputPrice).toBe(1.4) + }) + + it("uses native registry defaults when the live payload omits volatile fields", () => { + const info = parseOpencodeGoModel({ id: "deepseek-v4-flash" }) + const native = getOpencodeGoModelInfo("deepseek-v4-flash")! + expect(info.contextWindow).toBe(native.contextWindow) + expect(info.maxTokens).toBe(native.maxTokens) + expect(info.supportsPromptCache).toBe(true) + expect(info.preserveReasoning).toBe(true) + expect(info.supportsReasoningEffort).toEqual(["disable", "low", "medium", "high", "xhigh"]) + }) + + it("resolves GLM-5.2 with its 1M context and High/Max reasoning effort", () => { + const info = parseOpencodeGoModel({ id: "glm-5.2" }) + expect(info.contextWindow).toBe(1_000_000) + expect(info.maxTokens).toBe(131_072) + expect(info.supportsPromptCache).toBe(true) + expect(info.supportsMaxTokens).toBe(true) + expect(info.supportsReasoningEffort).toEqual(["disable", "high", "max"]) + expect(info.reasoningEffort).toBe("high") + expect(info.preserveReasoning).toBe(true) + expect(info.inputPrice).toBe(1.4) + expect(info.outputPrice).toBe(4.4) + }) + + it("falls back to defaults for an unknown model with no cache pricing", () => { const info = parseOpencodeGoModel({ id: "x", context_window: 100000, max_tokens: 8000 }) expect(info.supportsPromptCache).toBe(false) expect(info.contextWindow).toBe(100000) expect(info.maxTokens).toBe(8000) }) + + it("falls back to default context/max tokens for an unknown model with no metadata", () => { + const info = parseOpencodeGoModel({ id: "unknown-model" }) + expect(info.contextWindow).toBe(opencodeGoDefaultModelInfo.contextWindow) + expect(info.maxTokens).toBe(opencodeGoDefaultModelInfo.maxTokens) + expect(info.supportsPromptCache).toBe(false) + }) + + it("every curated model in the registry produces a fully-populated ModelInfo", () => { + for (const [id, native] of Object.entries(opencodeGoModels)) { + const info = parseOpencodeGoModel({ id }) + expect(info.contextWindow).toBe(native.contextWindow) + expect(info.maxTokens).toBe(native.maxTokens) + expect(info.supportsPromptCache).toBe(native.supportsPromptCache) + expect(info.description).toBeTruthy() + } + }) }) }) diff --git a/src/api/providers/fetchers/opencode-go.ts b/src/api/providers/fetchers/opencode-go.ts index ac62db52a7..d28b8872f0 100644 --- a/src/api/providers/fetchers/opencode-go.ts +++ b/src/api/providers/fetchers/opencode-go.ts @@ -2,7 +2,7 @@ import axios from "axios" import { z } from "zod" import type { ModelInfo } from "@roo-code/types" -import { opencodeGoDefaultModelInfo } from "@roo-code/types" +import { opencodeGoDefaultModelInfo, getOpencodeGoModelInfo } from "@roo-code/types" const OPENCODE_GO_BASE_URL = "https://opencode.ai/zen/go/v1" @@ -10,8 +10,9 @@ const OPENCODE_GO_BASE_URL = "https://opencode.ai/zen/go/v1" // `id` is the only guaranteed field; metadata is optional and best-effort, so // the schema is intentionally permissive. Pricing is intentionally NOT parsed: // the units returned by the endpoint aren't documented, and reporting a wrong -// cost is worse than reporting "unknown" — so cost stays undefined until the -// pricing shape is confirmed against the live endpoint. +// cost is worse than reporting "unknown" — so cost stays sourced from the +// native registry (or undefined for unknown models) until the pricing shape is +// confirmed against the live endpoint. const opencodeGoModelSchema = z.object({ id: z.string(), name: z.string().optional(), @@ -32,20 +33,53 @@ const opencodeGoModelsResponseSchema = z.object({ /** * Maps a raw Opencode Go model entry to the internal {@link ModelInfo} shape. * - * Falls back to {@link opencodeGoDefaultModelInfo} when the upstream payload - * omits context-window or max-token fields, ensuring downstream consumers - * always receive a fully-populated object. + * The Go `/models` endpoint only reliably returns `id` and (sometimes) + * `context_window`/`max_tokens`. It does NOT advertise capability flags + * (`supportsReasoningEffort`, `preserveReasoning`, `supportsMaxTokens`, + * `supportsPromptCache`) or pricing, all of which the extension needs to drive + * reasoning controls, interleaved-thinking tool calls, the max-output-tokens + * slider, and accurate cost reporting. + * + * Resolution order for a fully-populated {@link ModelInfo}: + * 1. Start from the native registry ({@link getOpencodeGoModelInfo}) when the + * model ID is curated — this supplies correct context lengths, max tokens, + * capability flags, and pricing sourced from vendor specs. + * 2. Override `contextWindow`, `maxTokens`, and `supportsImages` with values + * from the live `/models` payload when present, so the gateway stays the + * source of truth for those volatile fields. + * 3. Fall back to {@link opencodeGoDefaultModelInfo} for any field still + * missing on an unknown (non-curated) model, ensuring downstream consumers + * always receive a fully-populated object. * * @param model - Validated model entry from the `/models` response. * @returns Normalised model metadata suitable for the model picker. */ -export const parseOpencodeGoModel = (model: OpencodeGoModel): ModelInfo => ({ - maxTokens: model.max_output_tokens ?? model.max_tokens ?? opencodeGoDefaultModelInfo.maxTokens, - contextWindow: model.context_window ?? model.context_length ?? opencodeGoDefaultModelInfo.contextWindow, - supportsImages: model.supports_images ?? false, - supportsPromptCache: false, - description: model.description ?? model.name, -}) +export const parseOpencodeGoModel = (model: OpencodeGoModel): ModelInfo => { + const native = getOpencodeGoModelInfo(model.id) + + // Live endpoint values take precedence over the registry for volatile fields. + const liveContextWindow = model.context_window ?? model.context_length + const liveMaxTokens = model.max_output_tokens ?? model.max_tokens + const liveSupportsImages = model.supports_images + + if (native) { + return { + ...native, + ...(liveContextWindow !== undefined && { contextWindow: liveContextWindow }), + ...(liveMaxTokens !== undefined && { maxTokens: liveMaxTokens }), + ...(liveSupportsImages !== undefined && { supportsImages: liveSupportsImages }), + description: model.description ?? model.name ?? native.description, + } + } + + return { + maxTokens: liveMaxTokens ?? opencodeGoDefaultModelInfo.maxTokens, + contextWindow: liveContextWindow ?? opencodeGoDefaultModelInfo.contextWindow, + supportsImages: liveSupportsImages ?? false, + supportsPromptCache: false, + description: model.description ?? model.name, + } +} /** * Fetches the list of available models from the Opencode Go `/models` endpoint. diff --git a/src/api/providers/opencode-go.ts b/src/api/providers/opencode-go.ts index 43d32e6192..42e6b56cdc 100644 --- a/src/api/providers/opencode-go.ts +++ b/src/api/providers/opencode-go.ts @@ -7,6 +7,8 @@ import { ApiHandlerOptions } from "../../shared/api" import { ApiStream } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" +import { convertToR1Format } from "../transform/r1-format" +import { getModelParams } from "../transform/model-params" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { RouterProvider } from "./router-provider" @@ -24,6 +26,13 @@ import { extractReasoningFromDelta } from "./utils/extract-reasoning" * instead of configuring each one manually as a separate OpenAI-Compatible * provider (#172). * + * Model metadata (context window, max tokens, capability flags, and pricing) + * is sourced from the native registry in `@roo-code/types` and merged with the + * live `/models` payload, so each curated model keeps its correct native + * configuration — including `supportsReasoningEffort`, `preserveReasoning`, + * `supportsMaxTokens`, and prompt-cache support — instead of falling back to a + * single generic default. + * * Supports text generation, reasoning content (GLM/DeepSeek), tool calls, * and non-streaming prompt completion. */ @@ -41,34 +50,69 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio }) } + /** + * Resolves the configured model and computes OpenAI-format model parameters + * (max tokens, temperature, reasoning effort) from the merged model info. + * + * Fetches the live model list first so the merged native + `/models` + * metadata (context window, capability flags, pricing) is available before + * parameter computation — mirroring the original `fetchModel()` flow. + */ + private async resolveModel() { + const { id, info } = await this.fetchModel() + const params = getModelParams({ + format: "openai", + modelId: id, + model: info, + settings: this.options, + defaultTemperature: OPENCODE_GO_DEFAULT_TEMPERATURE, + }) + return { id, info, ...params } + } + /** * Streams a chat completion response, yielding typed chunks for text, * reasoning, partial tool calls, and token usage. + * + * For models that require reasoning_content to be passed back during + * multi-turn tool calls (`preserveReasoning`), messages are converted with + * `convertToR1Format` so interleaved thinking is preserved across tool-call + * continuations. Reasoning effort is forwarded when the model advertises + * `supportsReasoningEffort`. */ override async *createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { - const { id: modelId, info } = await this.fetchModel() + const { id: modelId, info, temperature, reasoningEffort, maxTokens } = await this.resolveModel() + + // preserveReasoning models (GLM/DeepSeek/MiMo/MiniMax/Qwen) require + // reasoning_content to be carried across tool-call continuations. + const preserveReasoning = info.preserveReasoning === true + const convertedMessages = preserveReasoning + ? convertToR1Format(messages, { mergeToolResultText: true }) + : convertToOpenAiMessages(messages) const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ { role: "system", content: systemPrompt }, - ...convertToOpenAiMessages(messages), + ...convertedMessages, ] const body: OpenAI.Chat.ChatCompletionCreateParams = { model: modelId, messages: openAiMessages, - temperature: this.supportsTemperature(modelId) - ? (this.options.modelTemperature ?? OPENCODE_GO_DEFAULT_TEMPERATURE) - : undefined, - max_completion_tokens: info.maxTokens, + temperature: this.supportsTemperature(modelId) ? temperature : undefined, + max_completion_tokens: + this.options.includeMaxTokens === true ? this.options.modelMaxTokens || maxTokens : maxTokens, stream: true, stream_options: { include_usage: true }, tools: this.convertToolsForOpenAI(metadata?.tools), tool_choice: metadata?.tool_choice, parallel_tool_calls: metadata?.parallelToolCalls ?? true, + ...(reasoningEffort && { + reasoning_effort: reasoningEffort as OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"], + }), } const completion = await this.client.chat.completions.create(body) @@ -118,7 +162,7 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio * @throws Error with an Opencode Go-specific prefix if the request fails. */ async completePrompt(prompt: string): Promise { - const { id: modelId, info } = await this.fetchModel() + const { id: modelId, temperature, reasoningEffort, maxTokens } = await this.resolveModel() try { const requestOptions: OpenAI.Chat.ChatCompletionCreateParams = { @@ -128,10 +172,16 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio } if (this.supportsTemperature(modelId)) { - requestOptions.temperature = this.options.modelTemperature ?? OPENCODE_GO_DEFAULT_TEMPERATURE + requestOptions.temperature = temperature } - requestOptions.max_completion_tokens = info.maxTokens + requestOptions.max_completion_tokens = + this.options.includeMaxTokens === true ? this.options.modelMaxTokens || maxTokens : maxTokens + + if (reasoningEffort) { + requestOptions.reasoning_effort = + reasoningEffort as OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"] + } const response = await this.client.chat.completions.create(requestOptions) return response.choices[0]?.message.content || "" From 22f7be62b63e5a4094652d7610f2da2940ae2b39 Mon Sep 17 00:00:00 2001 From: Naved Date: Thu, 18 Jun 2026 22:41:38 -0700 Subject: [PATCH 2/8] Update default to GLM 5.2 --- packages/types/src/providers/opencode-go.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/types/src/providers/opencode-go.ts b/packages/types/src/providers/opencode-go.ts index fc165c3362..14b8d68a9a 100644 --- a/packages/types/src/providers/opencode-go.ts +++ b/packages/types/src/providers/opencode-go.ts @@ -6,7 +6,7 @@ import type { ModelInfo } from "../model.js" // The full model list (and metadata) is fetched dynamically from // `https://opencode.ai/zen/go/v1/models`, so models can be switched on the fly. // The values below are only a fallback used before the live list resolves. -export const opencodeGoDefaultModelId = "glm-5.1" +export const opencodeGoDefaultModelId = "glm-5.2" export const opencodeGoDefaultModelInfo: ModelInfo = { maxTokens: 32_768, From 0c219e2292e5995cd44de12462741658c8ca7a82 Mon Sep 17 00:00:00 2001 From: Naved Date: Thu, 18 Jun 2026 23:23:57 -0700 Subject: [PATCH 3/8] Use Andthropic messages for minmax and qwen --- packages/types/src/providers/opencode-go.ts | 37 ++ .../providers/__tests__/opencode-go.spec.ts | 198 +++++++++- src/api/providers/opencode-go.ts | 346 +++++++++++++++++- 3 files changed, 563 insertions(+), 18 deletions(-) diff --git a/packages/types/src/providers/opencode-go.ts b/packages/types/src/providers/opencode-go.ts index 14b8d68a9a..7a646b6236 100644 --- a/packages/types/src/providers/opencode-go.ts +++ b/packages/types/src/providers/opencode-go.ts @@ -290,6 +290,43 @@ export const opencodeGoModels: Record = { }, } +/** + * OpenCode Go models that are only reachable via the Anthropic Messages wire + * format (`/v1/messages`), not the OpenAI-compatible chat completions format + * (`/v1/chat/completions` — referred to by the gateway as "oa-compat"). + * + * The Go gateway maps every model to exactly one wire format (see the model + * table at https://opencode.ai/docs/go). Models listed here use + * `@ai-sdk/anthropic`; every other curated model uses + * `@ai-sdk/openai-compatible`. Sending an Anthropic-format model to the + * OpenAI chat completions endpoint is rejected with: + * + * 401 Model is not supported for format oa-compat + * + * This is the set that drives format routing in the handler — keep it in sync + * with the Go model table. + */ +export const OPENCODE_GO_ANTHROPIC_FORMAT_MODELS = new Set([ + // --- Alibaba Qwen --- + "qwen3.7-max", + "qwen3.7-plus", + "qwen3.6-plus", + // --- MiniMax --- + "minimax-m3", + "minimax-m2.7", + "minimax-m2.5", +]) + +/** + * Returns `true` when the given Go-plan model ID must be requested via the + * Anthropic Messages format (`/v1/messages`) rather than the OpenAI-compatible + * chat completions format. Unknown (non-curated) model IDs default to the + * OpenAI-compatible format, matching the gateway's default routing. + */ +export function isOpencodeGoAnthropicFormatModel(modelId: string): boolean { + return OPENCODE_GO_ANTHROPIC_FORMAT_MODELS.has(modelId) +} + /** * Returns the native {@link ModelInfo} for a Go-plan model ID, or `undefined` * when the ID is not part of the curated registry. Callers should fall back to diff --git a/src/api/providers/__tests__/opencode-go.spec.ts b/src/api/providers/__tests__/opencode-go.spec.ts index 3d7cfdda09..ee163f1f21 100644 --- a/src/api/providers/__tests__/opencode-go.spec.ts +++ b/src/api/providers/__tests__/opencode-go.spec.ts @@ -6,7 +6,7 @@ vitest.mock("vscode", () => ({})) import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" -import { opencodeGoDefaultModelId, opencodeGoModels } from "@roo-code/types" +import { opencodeGoDefaultModelId, opencodeGoModels, isOpencodeGoAnthropicFormatModel } from "@roo-code/types" import { OpencodeGoHandler } from "../opencode-go" import { ApiHandlerOptions } from "../../../shared/api" @@ -23,12 +23,15 @@ vitest.mock("../fetchers/modelCache", () => ({ // Use the native registry entry so capability flags (reasoning // effort, preserveReasoning, prompt cache) are exercised. "glm-5.1": { ...opencodeGoModels["glm-5.1"] }, + // Anthropic-format model used to exercise the /v1/messages path. + "qwen3.7-max": { ...opencodeGoModels["qwen3.7-max"] }, }) }), getModelsFromCache: vitest.fn().mockReturnValue(undefined), })) const mockCreate = vitest.fn() +const mockAnthropicCreate = vitest.fn() ;(OpenAI as any).mockImplementation(function () { return { @@ -36,6 +39,16 @@ const mockCreate = vitest.fn() } }) +vitest.mock("@anthropic-ai/sdk", () => ({ + Anthropic: vitest.fn(function () { + return { + messages: { + create: mockAnthropicCreate, + }, + } + }), +})) + describe("OpencodeGoHandler", () => { const mockOptions: ApiHandlerOptions = { opencodeGoApiKey: "test-key", @@ -45,6 +58,7 @@ describe("OpencodeGoHandler", () => { beforeEach(() => { vitest.clearAllMocks() mockCreate.mockClear() + mockAnthropicCreate.mockClear() }) it("initializes the OpenAI client with the Opencode Go base URL and key", () => { @@ -58,6 +72,18 @@ describe("OpencodeGoHandler", () => { ) }) + it("initializes an Anthropic client rooted at /zen/go (SDK appends /v1/messages)", () => { + new OpencodeGoHandler(mockOptions) + expect(Anthropic).toHaveBeenCalledWith( + expect.objectContaining({ + // The Anthropic SDK posts to `/v1/messages`, so the base URL must + // NOT include the trailing `/v1` used by the OpenAI client. + baseURL: "https://opencode.ai/zen/go", + apiKey: "test-key", + }), + ) + }) + describe("fetchModel", () => { it("returns the configured model info with native capability flags", async () => { const handler = new OpencodeGoHandler(mockOptions) @@ -308,4 +334,174 @@ describe("OpencodeGoHandler", () => { await expect(handler.completePrompt("ping")).rejects.toThrow("Opencode Go completion error: boom") }) }) + + describe("Anthropic-format models (qwen3.7-max)", () => { + // qwen3.7-max is only reachable via the Anthropic Messages endpoint + // (/v1/messages); sending it to /v1/chat/completions is what produces + // "401 Model qwen3.7-max is not supported for format oa-compat". + const anthropicOptions: ApiHandlerOptions = { + opencodeGoApiKey: "test-key", + opencodeGoModelId: "qwen3.7-max", + } + + beforeEach(() => { + mockAnthropicCreate.mockImplementation(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { + type: "message_start", + message: { + usage: { + input_tokens: 10, + output_tokens: 0, + cache_creation_input_tokens: 2, + cache_read_input_tokens: 3, + }, + }, + } + yield { + type: "content_block_start", + index: 0, + content_block: { type: "text", text: "" }, + } + yield { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Hello" } } + yield { + type: "content_block_start", + index: 1, + content_block: { type: "tool_use", id: "toolu_1", name: "read_file", input: {} }, + } + yield { + type: "content_block_delta", + index: 1, + delta: { type: "input_json_delta", partial_json: '{"path":' }, + } + yield { type: "content_block_stop", index: 1 } + yield { type: "message_delta", usage: { output_tokens: 5 } } + yield { type: "message_stop" } + }, + })) + }) + + it("routes the request through the Anthropic /v1/messages client, not chat completions", async () => { + const handler = new OpencodeGoHandler(anthropicOptions) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] + + for await (const _chunk of handler.createMessage("sys", messages)) { + void _chunk // drain + } + + expect(mockAnthropicCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "qwen3.7-max", + stream: true, + system: expect.arrayContaining([expect.objectContaining({ type: "text", text: "sys" })]), + }), + ) + // The OpenAI chat completions endpoint must NOT be used for this model. + expect(mockCreate).not.toHaveBeenCalled() + }) + + it("streams text, tool-call, usage and cost chunks from the Anthropic stream", async () => { + const handler = new OpencodeGoHandler(anthropicOptions) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("sys", messages)) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "text", text: "Hello" }) + expect(chunks).toContainEqual({ + type: "tool_call_partial", + index: 1, + id: "toolu_1", + name: "read_file", + arguments: undefined, + }) + expect(chunks).toContainEqual({ + type: "tool_call_partial", + index: 1, + id: undefined, + name: undefined, + arguments: '{"path":', + }) + // message_start usage (with cache tokens) ... + expect(chunks).toContainEqual({ + type: "usage", + inputTokens: 10, + outputTokens: 0, + cacheWriteTokens: 2, + cacheReadTokens: 3, + }) + // ... message_delta output tokens ... + expect(chunks).toContainEqual({ type: "usage", inputTokens: 0, outputTokens: 5 }) + // ... and a final cost chunk. + expect(chunks.some((c) => c.type === "usage" && c.totalCost !== undefined)).toBe(true) + }) + + it("applies cache-control breakpoints when the model supports prompt caching", async () => { + const handler = new OpencodeGoHandler(anthropicOptions) + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "first" }, + { role: "assistant", content: "ok" }, + { role: "user", content: "second" }, + ] + + for await (const _chunk of handler.createMessage("sys", messages)) { + void _chunk // drain + } + + const callArgs = mockAnthropicCreate.mock.calls[0][0] as { + system: Array<{ cache_control?: unknown }> + messages: Array<{ content: unknown }> + } + // qwen3.7-max advertises supportsPromptCache, so the system prompt + // gets an ephemeral cache_control breakpoint. + expect(callArgs.system[0].cache_control).toEqual({ type: "ephemeral" }) + }) + + it("completePrompt uses the Anthropic messages endpoint and returns text content", async () => { + mockAnthropicCreate.mockResolvedValue({ + content: [{ type: "text", text: "the answer" }], + }) + + const handler = new OpencodeGoHandler(anthropicOptions) + expect(await handler.completePrompt("ping")).toBe("the answer") + expect(mockAnthropicCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "qwen3.7-max", + stream: false, + messages: [{ role: "user", content: "ping" }], + }), + ) + expect(mockCreate).not.toHaveBeenCalled() + }) + + it("completePrompt wraps Anthropic errors with an Opencode Go-specific message", async () => { + mockAnthropicCreate.mockRejectedValue(new Error("boom")) + const handler = new OpencodeGoHandler(anthropicOptions) + await expect(handler.completePrompt("ping")).rejects.toThrow("Opencode Go completion error: boom") + }) + }) + + describe("isOpencodeGoAnthropicFormatModel", () => { + it("classifies Qwen and MiniMax Go models as Anthropic-format", () => { + expect(isOpencodeGoAnthropicFormatModel("qwen3.7-max")).toBe(true) + expect(isOpencodeGoAnthropicFormatModel("qwen3.7-plus")).toBe(true) + expect(isOpencodeGoAnthropicFormatModel("qwen3.6-plus")).toBe(true) + expect(isOpencodeGoAnthropicFormatModel("minimax-m3")).toBe(true) + expect(isOpencodeGoAnthropicFormatModel("minimax-m2.7")).toBe(true) + expect(isOpencodeGoAnthropicFormatModel("minimax-m2.5")).toBe(true) + }) + + it("classifies OpenAI-compatible Go models as non-Anthropic-format", () => { + expect(isOpencodeGoAnthropicFormatModel("glm-5.2")).toBe(false) + expect(isOpencodeGoAnthropicFormatModel("kimi-k2.6")).toBe(false) + expect(isOpencodeGoAnthropicFormatModel("deepseek-v4-pro")).toBe(false) + expect(isOpencodeGoAnthropicFormatModel("mimo-v2.5")).toBe(false) + }) + + it("defaults unknown model IDs to the OpenAI-compatible format", () => { + expect(isOpencodeGoAnthropicFormatModel("some-unknown-model")).toBe(false) + }) + }) }) diff --git a/src/api/providers/opencode-go.ts b/src/api/providers/opencode-go.ts index 42e6b56cdc..0bb8303d5d 100644 --- a/src/api/providers/opencode-go.ts +++ b/src/api/providers/opencode-go.ts @@ -1,18 +1,31 @@ import { Anthropic } from "@anthropic-ai/sdk" +import { CacheControlEphemeral } from "@anthropic-ai/sdk/resources" import OpenAI from "openai" -import { opencodeGoDefaultModelId, opencodeGoDefaultModelInfo, OPENCODE_GO_DEFAULT_TEMPERATURE } from "@roo-code/types" +import { + opencodeGoDefaultModelId, + opencodeGoDefaultModelInfo, + OPENCODE_GO_DEFAULT_TEMPERATURE, + isOpencodeGoAnthropicFormatModel, +} from "@roo-code/types" import { ApiHandlerOptions } from "../../shared/api" import { ApiStream } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" import { convertToR1Format } from "../transform/r1-format" +import { filterNonAnthropicBlocks } from "../transform/anthropic-filter" import { getModelParams } from "../transform/model-params" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { RouterProvider } from "./router-provider" import { extractReasoningFromDelta } from "./utils/extract-reasoning" +import { DEFAULT_HEADERS } from "./constants" +import { calculateApiCostAnthropic } from "../../shared/cost" +import { + convertOpenAIToolsToAnthropic, + convertOpenAIToolChoiceToAnthropic, +} from "../../core/prompts/tools/native-tools/converters" /** * API handler for the Opencode "Go" subscription plan. @@ -33,10 +46,36 @@ import { extractReasoningFromDelta } from "./utils/extract-reasoning" * `supportsMaxTokens`, and prompt-cache support — instead of falling back to a * single generic default. * + * ## Wire-format routing + * + * The Go gateway exposes two wire formats and maps every model to exactly one + * of them (see https://opencode.ai/docs/go): + * + * - OpenAI-compatible chat completions (`/v1/chat/completions`, "oa-compat") + * — used by GLM, Kimi, DeepSeek, and MiMo models. + * - Anthropic Messages (`/v1/messages`) — used by Qwen (qwen3.7-max, + * qwen3.7-plus, qwen3.6-plus) and MiniMax (minimax-m3, minimax-m2.7, + * minimax-m2.5) models. + * + * Sending an Anthropic-format model to the chat completions endpoint is + * rejected with `401 Model is not supported for format oa-compat`, so this + * handler inspects {@link isOpencodeGoAnthropicFormatModel} and routes those + * models through a dedicated Anthropic SDK client against `/v1/messages`. + * * Supports text generation, reasoning content (GLM/DeepSeek), tool calls, * and non-streaming prompt completion. */ export class OpencodeGoHandler extends RouterProvider implements SingleCompletionHandler { + /** + * Anthropic SDK client used for Go models that only accept the Anthropic + * Messages wire format (`/v1/messages`). + * + * The SDK appends `/v1/messages` to `baseURL`, so this is set to the Go + * gateway root (`https://opencode.ai/zen/go`) — NOT the `/v1` root used by + * the OpenAI client — to avoid a doubled `/v1` path segment. + */ + private readonly anthropicClient: Anthropic + /** Creates a new handler bound to the user's Go API key and selected model. */ constructor(options: ApiHandlerOptions) { super({ @@ -48,44 +87,86 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio defaultModelId: opencodeGoDefaultModelId, defaultModelInfo: opencodeGoDefaultModelInfo, }) + + this.anthropicClient = new Anthropic({ + baseURL: "https://opencode.ai/zen/go", + apiKey: options.opencodeGoApiKey, + defaultHeaders: { + ...DEFAULT_HEADERS, + ...(options.openAiHeaders || {}), + }, + }) } /** - * Resolves the configured model and computes OpenAI-format model parameters + * Resolves the configured model and computes model parameters * (max tokens, temperature, reasoning effort) from the merged model info. * + * The wire format is derived from the model ID via + * {@link isOpencodeGoAnthropicFormatModel}: Anthropic-format models compute + * parameters with the `anthropic` format so reasoning is mapped to the + * Anthropic-style controls; everything else uses the `openai` format. + * * Fetches the live model list first so the merged native + `/models` * metadata (context window, capability flags, pricing) is available before * parameter computation — mirroring the original `fetchModel()` flow. */ private async resolveModel() { const { id, info } = await this.fetchModel() - const params = getModelParams({ - format: "openai", - modelId: id, - model: info, - settings: this.options, - defaultTemperature: OPENCODE_GO_DEFAULT_TEMPERATURE, - }) - return { id, info, ...params } + const isAnthropic = isOpencodeGoAnthropicFormatModel(id) + // getModelParams is overloaded on a literal `format`, so branch the call + // rather than passing a union — this keeps the returned params typed as a + // single concrete shape per branch. + const params = isAnthropic + ? getModelParams({ + format: "anthropic", + modelId: id, + model: info, + settings: this.options, + defaultTemperature: OPENCODE_GO_DEFAULT_TEMPERATURE, + }) + : getModelParams({ + format: "openai", + modelId: id, + model: info, + settings: this.options, + defaultTemperature: OPENCODE_GO_DEFAULT_TEMPERATURE, + }) + return { + id, + info, + format: isAnthropic ? ("anthropic" as const) : ("openai" as const), + maxTokens: params.maxTokens, + temperature: params.temperature, + reasoningEffort: params.reasoningEffort, + } } /** * Streams a chat completion response, yielding typed chunks for text, * reasoning, partial tool calls, and token usage. * - * For models that require reasoning_content to be passed back during - * multi-turn tool calls (`preserveReasoning`), messages are converted with - * `convertToR1Format` so interleaved thinking is preserved across tool-call - * continuations. Reasoning effort is forwarded when the model advertises - * `supportsReasoningEffort`. + * Anthropic-format models (Qwen/MiniMax) are streamed via + * {@link streamAnthropicMessage} against `/v1/messages`; all other models + * use the OpenAI-compatible chat completions endpoint. + * + * For OpenAI-format models that require reasoning_content to be passed back + * during multi-turn tool calls (`preserveReasoning`), messages are + * converted with `convertToR1Format` so interleaved thinking is preserved + * across tool-call continuations. Reasoning effort is forwarded when the + * model advertises `supportsReasoningEffort`. */ override async *createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { - const { id: modelId, info, temperature, reasoningEffort, maxTokens } = await this.resolveModel() + const { id: modelId, info, format, temperature, reasoningEffort, maxTokens } = await this.resolveModel() + + if (format === "anthropic") { + yield* this.streamAnthropicMessage(modelId, info, temperature, maxTokens, systemPrompt, messages, metadata) + return + } // preserveReasoning models (GLM/DeepSeek/MiMo/MiniMax/Qwen) require // reasoning_content to be carried across tool-call continuations. @@ -154,15 +235,246 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio } } + /** + * Streams an Anthropic Messages-format completion for Go models that only + * accept the `/v1/messages` endpoint (Qwen/MiniMax). + * + * Mirrors the Anthropic streaming protocol handled by the dedicated + * MiniMax handler: `message_start`/`message_delta` carry usage, content + * blocks carry text/thinking/tool_use, and a final cost chunk is emitted + * from the accumulated token counts. Prompt-cache breakpoints are applied + * to the system prompt and last two user messages when the model advertises + * `supportsPromptCache`, since the Go gateway honours server-side caching + * and reports cache tokens in usage. + */ + private async *streamAnthropicMessage( + modelId: string, + info: { supportsPromptCache?: boolean }, + temperature: number | undefined, + maxTokens: number | undefined, + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + ): ApiStream { + const cacheControl: CacheControlEphemeral = { type: "ephemeral" } + const supportsPromptCache = info.supportsPromptCache ?? false + + // Strip non-Anthropic blocks (reasoning, thoughtSignature, etc.) before + // sending — the gateway rejects unknown content block types. + const sanitizedMessages = filterNonAnthropicBlocks(messages) + + const systemBlocks: Anthropic.Messages.TextBlockParam[] = [ + supportsPromptCache + ? { text: systemPrompt, type: "text", cache_control: cacheControl } + : { text: systemPrompt, type: "text" }, + ] + + const requestParams: Anthropic.Messages.MessageCreateParams = { + model: modelId, + max_tokens: + this.options.includeMaxTokens === true + ? this.options.modelMaxTokens || maxTokens || 16_384 + : (maxTokens ?? 16_384), + temperature: this.supportsTemperature(modelId) ? (temperature ?? 1.0) : undefined, + system: systemBlocks, + messages: supportsPromptCache + ? this.addAnthropicCacheControl(sanitizedMessages, cacheControl) + : sanitizedMessages, + stream: true, + tools: convertOpenAIToolsToAnthropic(metadata?.tools ?? []), + tool_choice: convertOpenAIToolChoiceToAnthropic(metadata?.tool_choice, metadata?.parallelToolCalls), + } + + const stream = await this.anthropicClient.messages.create(requestParams) + + let inputTokens = 0 + let outputTokens = 0 + let cacheWriteTokens = 0 + let cacheReadTokens = 0 + + for await (const chunk of stream) { + switch (chunk.type) { + case "message_start": { + // Tells us cache reads/writes/input/output. + const { + input_tokens = 0, + output_tokens = 0, + cache_creation_input_tokens, + cache_read_input_tokens, + } = chunk.message.usage + + yield { + type: "usage", + inputTokens: input_tokens, + outputTokens: output_tokens, + cacheWriteTokens: cache_creation_input_tokens || undefined, + cacheReadTokens: cache_read_input_tokens || undefined, + } + + inputTokens += input_tokens + outputTokens += output_tokens + cacheWriteTokens += cache_creation_input_tokens || 0 + cacheReadTokens += cache_read_input_tokens || 0 + + break + } + case "message_delta": + // Tells us stop_reason, stop_sequence, and output tokens. + yield { + type: "usage", + inputTokens: 0, + outputTokens: chunk.usage.output_tokens || 0, + } + + break + case "message_stop": + // No usage data, just an indicator that the message is done. + break + case "content_block_start": + switch (chunk.content_block.type) { + case "thinking": + // Yield thinking/reasoning content + if (chunk.index > 0) { + yield { type: "reasoning", text: "\n" } + } + + yield { type: "reasoning", text: chunk.content_block.thinking } + break + case "text": + // We may receive multiple text blocks + if (chunk.index > 0) { + yield { type: "text", text: "\n" } + } + + yield { type: "text", text: chunk.content_block.text } + break + case "tool_use": { + // Emit initial tool call partial with id and name + yield { + type: "tool_call_partial", + index: chunk.index, + id: chunk.content_block.id, + name: chunk.content_block.name, + arguments: undefined, + } + break + } + } + break + case "content_block_delta": + switch (chunk.delta.type) { + case "thinking_delta": + yield { type: "reasoning", text: chunk.delta.thinking } + break + case "text_delta": + yield { type: "text", text: chunk.delta.text } + break + case "input_json_delta": { + // Emit tool call partial chunks as arguments stream in + yield { + type: "tool_call_partial", + index: chunk.index, + id: undefined, + name: undefined, + arguments: chunk.delta.partial_json, + } + break + } + } + + break + case "content_block_stop": + // Block is complete - no action needed, NativeToolCallParser handles completion + break + } + } + + // Calculate and yield final cost + if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) { + const { totalCost } = calculateApiCostAnthropic( + info as Parameters[0], + inputTokens, + outputTokens, + cacheWriteTokens, + cacheReadTokens, + ) + + yield { + type: "usage", + inputTokens: 0, + outputTokens: 0, + totalCost, + } + } + } + + /** + * Adds ephemeral cache-control breakpoints to the last two user messages + * so the gateway can cache the system prompt + most recent turns + * server-side. Only applied when the model advertises prompt-cache support. + */ + private addAnthropicCacheControl( + messages: Anthropic.Messages.MessageParam[], + cacheControl: CacheControlEphemeral, + ): Anthropic.Messages.MessageParam[] { + const userMsgIndices = messages.reduce( + (acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc), + [] as number[], + ) + + const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1 + const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1 + + return messages.map((message, index) => { + if (index === lastUserMsgIndex || index === secondLastUserMsgIndex) { + return { + ...message, + content: + typeof message.content === "string" + ? [{ type: "text", text: message.content, cache_control: cacheControl }] + : message.content.map((content, contentIndex) => + contentIndex === message.content.length - 1 + ? { ...content, cache_control: cacheControl } + : content, + ), + } + } + return message + }) + } + /** * Performs a non-streaming chat completion and returns the full response text. * + * Anthropic-format models are completed via the `/v1/messages` endpoint; + * all other models use the OpenAI-compatible chat completions endpoint. + * * @param prompt - The user prompt to send as a single user message. * @returns The model's reply text, or an empty string if no content is returned. * @throws Error with an Opencode Go-specific prefix if the request fails. */ async completePrompt(prompt: string): Promise { - const { id: modelId, temperature, reasoningEffort, maxTokens } = await this.resolveModel() + const { id: modelId, format, temperature, reasoningEffort, maxTokens } = await this.resolveModel() + + if (format === "anthropic") { + try { + const message = await this.anthropicClient.messages.create({ + model: modelId, + max_tokens: maxTokens ?? 16_384, + temperature: this.supportsTemperature(modelId) ? (temperature ?? 1.0) : undefined, + messages: [{ role: "user", content: prompt }], + stream: false, + }) + + const content = message.content.find(({ type }) => type === "text") + return content?.type === "text" ? content.text : "" + } catch (error) { + if (error instanceof Error) { + throw new Error(`Opencode Go completion error: ${error.message}`) + } + throw error + } + } try { const requestOptions: OpenAI.Chat.ChatCompletionCreateParams = { From 6979afe487a0aff3396bffed20d439672cf20b96 Mon Sep 17 00:00:00 2001 From: Naved Date: Thu, 18 Jun 2026 23:37:40 -0700 Subject: [PATCH 4/8] fix context calculation with minmax --- .../src/__tests__/provider-settings.test.ts | 26 +++++++++++++++++++ packages/types/src/provider-settings.ts | 12 +++++++++ 2 files changed, 38 insertions(+) diff --git a/packages/types/src/__tests__/provider-settings.test.ts b/packages/types/src/__tests__/provider-settings.test.ts index fc7bee2268..724fc20f34 100644 --- a/packages/types/src/__tests__/provider-settings.test.ts +++ b/packages/types/src/__tests__/provider-settings.test.ts @@ -53,6 +53,32 @@ describe("getApiProtocol", () => { }) }) + describe("Opencode Go provider", () => { + it("should return 'anthropic' for opencode-go Anthropic-format models (Qwen/MiniMax)", () => { + expect(getApiProtocol("opencode-go", "qwen3.7-max")).toBe("anthropic") + expect(getApiProtocol("opencode-go", "qwen3.7-plus")).toBe("anthropic") + expect(getApiProtocol("opencode-go", "qwen3.6-plus")).toBe("anthropic") + expect(getApiProtocol("opencode-go", "minimax-m3")).toBe("anthropic") + expect(getApiProtocol("opencode-go", "minimax-m2.7")).toBe("anthropic") + expect(getApiProtocol("opencode-go", "minimax-m2.5")).toBe("anthropic") + }) + + it("should return 'openai' for opencode-go OpenAI-format models (GLM/DeepSeek/etc.)", () => { + expect(getApiProtocol("opencode-go", "glm-5.2")).toBe("openai") + expect(getApiProtocol("opencode-go", "deepseek-v4-pro")).toBe("openai") + expect(getApiProtocol("opencode-go", "kimi-k2.5")).toBe("openai") + expect(getApiProtocol("opencode-go", "mimo-v2.5")).toBe("openai") + }) + + it("should return 'openai' for opencode-go without a model", () => { + expect(getApiProtocol("opencode-go")).toBe("openai") + }) + + it("should return 'openai' for opencode-go with an unknown model id", () => { + expect(getApiProtocol("opencode-go", "some-future-model")).toBe("openai") + }) + }) + describe("Other providers", () => { it("should return 'openai' for non-anthropic providers regardless of model", () => { expect(getApiProtocol("openrouter", "claude-3-opus")).toBe("openai") diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts index b2c850eb86..26c4dee7e1 100644 --- a/packages/types/src/provider-settings.ts +++ b/packages/types/src/provider-settings.ts @@ -21,6 +21,7 @@ import { internationalZAiModels, minimaxModels, mimoModels, + isOpencodeGoAnthropicFormatModel, } from "./providers/index.js" /** @@ -595,6 +596,17 @@ export const getApiProtocol = (provider: ProviderName | undefined, modelId?: str return "anthropic" } + // Opencode Go routes a subset of its models (Qwen, MiniMax) through the + // Anthropic Messages wire format (`/v1/messages`), which reports usage in + // Anthropic style: `input_tokens` excludes cache tokens, with separate + // `cache_creation_input_tokens` / `cache_read_input_tokens` fields. These + // models must use the anthropic protocol so token/cost aggregation adds the + // cache tokens back into the input total — otherwise the cached prefix is + // dropped from `contextTokens`, undercounting context-window usage. + if (provider && provider === "opencode-go" && modelId && isOpencodeGoAnthropicFormatModel(modelId)) { + return "anthropic" + } + return "openai" } From 97e041fe727b6b885f6f4d934926ff3542bfbeac Mon Sep 17 00:00:00 2001 From: Naved Date: Fri, 19 Jun 2026 07:13:39 -0700 Subject: [PATCH 5/8] Update coverage --- .../providers/__tests__/opencode-go.spec.ts | 341 ++++++++++++++++++ src/api/providers/opencode-go.ts | 32 +- 2 files changed, 370 insertions(+), 3 deletions(-) diff --git a/src/api/providers/__tests__/opencode-go.spec.ts b/src/api/providers/__tests__/opencode-go.spec.ts index ee163f1f21..fa780f325e 100644 --- a/src/api/providers/__tests__/opencode-go.spec.ts +++ b/src/api/providers/__tests__/opencode-go.spec.ts @@ -9,6 +9,7 @@ import OpenAI from "openai" import { opencodeGoDefaultModelId, opencodeGoModels, isOpencodeGoAnthropicFormatModel } from "@roo-code/types" import { OpencodeGoHandler } from "../opencode-go" +import { getModels } from "../fetchers/modelCache" import { ApiHandlerOptions } from "../../../shared/api" vitest.mock("openai") @@ -310,6 +311,68 @@ describe("OpencodeGoHandler", () => { const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning") expect(reasoningChunks).toEqual([{ type: "reasoning", text: "primary thought" }]) }) + + it("uses convertToOpenAiMessages for non-preserveReasoning models", async () => { + // kimi-k2.6 has no preserveReasoning flag, so messages bypass + // convertToR1Format and go through the plain OpenAI converter. + vitest.mocked(getModels).mockImplementationOnce(async () => ({ + "kimi-k2.6": { ...opencodeGoModels["kimi-k2.6"] }, + })) + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { choices: [{ delta: { content: "Hi" }, index: 0 }] } + yield { + choices: [{ delta: {}, index: 0 }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + } + }, + })) + + const handler = new OpencodeGoHandler({ ...mockOptions, opencodeGoModelId: "kimi-k2.6" }) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] + + for await (const _chunk of handler.createMessage("sys", messages)) { + void _chunk + } + + const callArgs = mockCreate.mock.calls[0][0] as { messages: Array<{ role: string }> } + expect(callArgs.messages[0]).toEqual({ role: "system", content: "sys" }) + // A single user turn stays a single user message after OpenAI conversion. + expect(callArgs.messages.filter((m) => m.role === "user")).toHaveLength(1) + }) + + it("emits a usage chunk with zeroed tokens when the stream reports no usage", async () => { + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { choices: [{ delta: { content: "Hi" }, index: 0 }] } + yield { + choices: [{ delta: {}, index: 0 }], + usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }, + } + }, + })) + + const handler = new OpencodeGoHandler(mockOptions) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("sys", messages)) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "usage", inputTokens: 0, outputTokens: 0 }) + }) + + it("honors includeMaxTokens/modelMaxTokens override for max_completion_tokens", async () => { + const handler = new OpencodeGoHandler({ ...mockOptions, includeMaxTokens: true, modelMaxTokens: 999 }) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] + + for await (const _chunk of handler.createMessage("sys", messages)) { + void _chunk + } + + expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ max_completion_tokens: 999 })) + }) }) describe("completePrompt", () => { @@ -333,6 +396,25 @@ describe("OpencodeGoHandler", () => { const handler = new OpencodeGoHandler(mockOptions) await expect(handler.completePrompt("ping")).rejects.toThrow("Opencode Go completion error: boom") }) + + it("rethrows non-Error values unchanged", async () => { + mockCreate.mockRejectedValue("not an error") + const handler = new OpencodeGoHandler(mockOptions) + await expect(handler.completePrompt("ping")).rejects.toBe("not an error") + }) + + it("returns an empty string when no content is returned", async () => { + mockCreate.mockResolvedValue({ choices: [] }) + const handler = new OpencodeGoHandler(mockOptions) + expect(await handler.completePrompt("ping")).toBe("") + }) + + it("honors includeMaxTokens/modelMaxTokens override for max_completion_tokens", async () => { + mockCreate.mockResolvedValue({ choices: [{ message: { content: "ok" } }] }) + const handler = new OpencodeGoHandler({ ...mockOptions, includeMaxTokens: true, modelMaxTokens: 4321 }) + await handler.completePrompt("ping") + expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ max_completion_tokens: 4321 })) + }) }) describe("Anthropic-format models (qwen3.7-max)", () => { @@ -471,11 +553,270 @@ describe("OpencodeGoHandler", () => { model: "qwen3.7-max", stream: false, messages: [{ role: "user", content: "ping" }], + // qwen3.7-max maxTokens (65_536) clamped to 20% of its 1M + // context window (200_000) => 65_536. includeMaxTokens is off, + // so the model default is used. + max_tokens: 65_536, }), ) expect(mockCreate).not.toHaveBeenCalled() }) + it("completePrompt honors includeMaxTokens/modelMaxTokens override for max_tokens", async () => { + mockAnthropicCreate.mockResolvedValue({ + content: [{ type: "text", text: "ok" }], + }) + + const handler = new OpencodeGoHandler({ + ...anthropicOptions, + includeMaxTokens: true, + modelMaxTokens: 2048, + }) + await handler.completePrompt("ping") + expect(mockAnthropicCreate).toHaveBeenCalledWith(expect.objectContaining({ max_tokens: 2048 })) + }) + + it("completePrompt rethrows non-Error values unchanged from the Anthropic path", async () => { + mockAnthropicCreate.mockRejectedValue("not an error") + const handler = new OpencodeGoHandler(anthropicOptions) + await expect(handler.completePrompt("ping")).rejects.toBe("not an error") + }) + + it("completePrompt returns an empty string when no text content is returned", async () => { + mockAnthropicCreate.mockResolvedValue({ content: [{ type: "tool_use", id: "x", name: "n", input: {} }] }) + const handler = new OpencodeGoHandler(anthropicOptions) + expect(await handler.completePrompt("ping")).toBe("") + }) + + it("omits tools and tool_choice from the Anthropic request when no tools are provided", async () => { + const handler = new OpencodeGoHandler(anthropicOptions) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] + + for await (const _chunk of handler.createMessage("sys", messages)) { + void _chunk + } + + const callArgs = mockAnthropicCreate.mock.calls[0][0] as Record + // Disable-tools path: with no tools, neither field is sent so the + // gateway doesn't force a tool-use-only turn. + expect(callArgs.tools).toBeUndefined() + expect(callArgs.tool_choice).toBeUndefined() + }) + + it("includes tools and tool_choice in the Anthropic request when tools are provided", async () => { + const handler = new OpencodeGoHandler(anthropicOptions) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] + const tools = [ + { + type: "function", + function: { + name: "read_file", + description: "read a file", + parameters: { type: "object", properties: {} }, + }, + }, + ] + + for await (const _chunk of handler.createMessage("sys", messages, { tools })) { + void _chunk + } + + const callArgs = mockAnthropicCreate.mock.calls[0][0] as Record + expect(Array.isArray(callArgs.tools)).toBe(true) + expect((callArgs.tools as unknown[]).length).toBe(1) + expect(callArgs.tool_choice).toBeDefined() + }) + + it("skips cache-control breakpoints when the Anthropic-format model does not support prompt caching", async () => { + vitest.mocked(getModels).mockImplementationOnce(async () => ({ + "qwen3.7-max": { ...opencodeGoModels["qwen3.7-max"], supportsPromptCache: false }, + })) + + const handler = new OpencodeGoHandler(anthropicOptions) + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "first" }, + { role: "assistant", content: "ok" }, + { role: "user", content: "second" }, + ] + + for await (const _chunk of handler.createMessage("sys", messages)) { + void _chunk + } + + const callArgs = mockAnthropicCreate.mock.calls[0][0] as { + system: Array<{ cache_control?: unknown }> + messages: Array<{ cache_control?: unknown }> + } + expect(callArgs.system[0].cache_control).toBeUndefined() + expect(callArgs.messages.every((m) => m.cache_control === undefined)).toBe(true) + }) + + it("applies cache-control to the last block of array-content user messages", async () => { + const handler = new OpencodeGoHandler(anthropicOptions) + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: [{ type: "text", text: "first" }] }, + { role: "assistant", content: "ok" }, + { + role: "user", + content: [ + { type: "text", text: "part-a" }, + { type: "text", text: "part-b" }, + ], + }, + ] + + for await (const _chunk of handler.createMessage("sys", messages)) { + void _chunk + } + + const callArgs = mockAnthropicCreate.mock.calls[0][0] as { messages: Array<{ content: any }> } + const lastUserMsg = callArgs.messages[callArgs.messages.length - 1] + const blocks = lastUserMsg.content as any[] + // Only the final content block of the last user message is cached. + expect(blocks[blocks.length - 1].cache_control).toEqual({ type: "ephemeral" }) + expect(blocks[0].cache_control).toBeUndefined() + }) + + it("leaves messages unchanged when there are no user messages to cache", async () => { + const handler = new OpencodeGoHandler(anthropicOptions) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "assistant", content: "only assistant" }] + + for await (const _chunk of handler.createMessage("sys", messages)) { + void _chunk + } + + const callArgs = mockAnthropicCreate.mock.calls[0][0] as { + messages: Array<{ cache_control?: unknown }> + } + expect(callArgs.messages.every((m) => m.cache_control === undefined)).toBe(true) + }) + + it("streams thinking content blocks and thinking deltas", async () => { + mockAnthropicCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { type: "message_start", message: { usage: { input_tokens: 5, output_tokens: 0 } } } + // index 0: thinking block (no leading newline at index 0). + yield { + type: "content_block_start", + index: 0, + content_block: { type: "thinking", thinking: "initial thought" }, + } + yield { + type: "content_block_delta", + index: 0, + delta: { type: "thinking_delta", thinking: " more" }, + } + // index 1: text block gets a leading newline separator. + yield { type: "content_block_start", index: 1, content_block: { type: "text", text: "" } } + yield { type: "content_block_delta", index: 1, delta: { type: "text_delta", text: "answer" } } + // index 2: a second thinking block also gets a newline separator. + yield { + type: "content_block_start", + index: 2, + content_block: { type: "thinking", thinking: "second thought" }, + } + yield { type: "message_delta", usage: { output_tokens: 3 } } + yield { type: "message_stop" } + }, + })) + + const handler = new OpencodeGoHandler(anthropicOptions) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("sys", messages)) { + chunks.push(chunk) + } + + // index 0 thinking block (no leading newline separator at index 0). + expect(chunks).toContainEqual({ type: "reasoning", text: "initial thought" }) + expect(chunks).toContainEqual({ type: "reasoning", text: " more" }) + // index 1 text block gets a leading newline separator. + expect(chunks).toContainEqual({ type: "text", text: "\n" }) + expect(chunks).toContainEqual({ type: "text", text: "answer" }) + // index 2 thinking block gets a leading newline separator. + expect(chunks).toContainEqual({ type: "reasoning", text: "\n" }) + expect(chunks).toContainEqual({ type: "reasoning", text: "second thought" }) + }) + + it("honors includeMaxTokens/modelMaxTokens override for the streaming Anthropic max_tokens", async () => { + const handler = new OpencodeGoHandler({ + ...anthropicOptions, + includeMaxTokens: true, + modelMaxTokens: 8192, + }) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] + + for await (const _chunk of handler.createMessage("sys", messages)) { + void _chunk + } + + expect(mockAnthropicCreate).toHaveBeenCalledWith(expect.objectContaining({ max_tokens: 8192 })) + }) + + it("falls back to the model max_tokens when includeMaxTokens is on but modelMaxTokens is unset", async () => { + const handler = new OpencodeGoHandler({ ...anthropicOptions, includeMaxTokens: true }) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] + + for await (const _chunk of handler.createMessage("sys", messages)) { + void _chunk + } + + // qwen3.7-max maxTokens (65_536) clamped to 20% of 1M context => 65_536. + expect(mockAnthropicCreate).toHaveBeenCalledWith(expect.objectContaining({ max_tokens: 65_536 })) + }) + + it("accumulates output tokens across message_delta events into the final cost", async () => { + mockAnthropicCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { type: "message_start", message: { usage: { input_tokens: 10, output_tokens: 0 } } } + yield { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } } + yield { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "hi" } } + yield { type: "message_delta", usage: { output_tokens: 4 } } + yield { type: "message_delta", usage: { output_tokens: 6 } } + yield { type: "message_stop" } + }, + })) + + const handler = new OpencodeGoHandler(anthropicOptions) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("sys", messages)) { + chunks.push(chunk) + } + + const costChunk = chunks.find((c) => c.type === "usage" && c.totalCost !== undefined) + expect(costChunk).toBeDefined() + // qwen3.7-max: input $2.5/M, output $7.5/M. Accumulated output + // tokens (4 + 6 = 10) must feed the cost calc — without the + // accumulation fix this would only reflect the 10 input tokens + // (0.000025) instead of input + output (0.0001). + expect(costChunk.totalCost).toBeCloseTo((10 * 2.5 + 10 * 7.5) / 1_000_000, 10) + }) + + it("does not yield a cost chunk when the stream reports no token usage", async () => { + mockAnthropicCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { type: "message_start", message: { usage: { input_tokens: 0, output_tokens: 0 } } } + yield { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } } + yield { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "hi" } } + yield { type: "message_delta", usage: { output_tokens: 0 } } + yield { type: "message_stop" } + }, + })) + + const handler = new OpencodeGoHandler(anthropicOptions) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("sys", messages)) { + chunks.push(chunk) + } + + expect(chunks.some((c) => c.type === "usage" && c.totalCost !== undefined)).toBe(false) + }) + it("completePrompt wraps Anthropic errors with an Opencode Go-specific message", async () => { mockAnthropicCreate.mockRejectedValue(new Error("boom")) const handler = new OpencodeGoHandler(anthropicOptions) diff --git a/src/api/providers/opencode-go.ts b/src/api/providers/opencode-go.ts index 0bb8303d5d..973600c720 100644 --- a/src/api/providers/opencode-go.ts +++ b/src/api/providers/opencode-go.ts @@ -269,6 +269,12 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio : { text: systemPrompt, type: "text" }, ] + // Only attach tools/tool_choice when the caller actually provides + // tools — sending an empty tool list (or a tool_choice derived from an + // empty set) forces some Anthropic-compatible gateways into a + // tool-use-only mode and is wasteful for plain text turns. + const tools = metadata?.tools && metadata.tools.length > 0 ? metadata.tools : undefined + const requestParams: Anthropic.Messages.MessageCreateParams = { model: modelId, max_tokens: @@ -281,8 +287,15 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio ? this.addAnthropicCacheControl(sanitizedMessages, cacheControl) : sanitizedMessages, stream: true, - tools: convertOpenAIToolsToAnthropic(metadata?.tools ?? []), - tool_choice: convertOpenAIToolChoiceToAnthropic(metadata?.tool_choice, metadata?.parallelToolCalls), + ...(tools + ? { + tools: convertOpenAIToolsToAnthropic(tools), + tool_choice: convertOpenAIToolChoiceToAnthropic( + metadata?.tool_choice, + metadata?.parallelToolCalls, + ), + } + : {}), } const stream = await this.anthropicClient.messages.create(requestParams) @@ -320,6 +333,12 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio } case "message_delta": // Tells us stop_reason, stop_sequence, and output tokens. + // Anthropic streams the cumulative output token count in each + // message_delta (the final event carries the total), so + // accumulate it into the running total used for cost + // calculation — otherwise the final cost only reflects the + // (typically zero) message_start output tokens. + outputTokens += chunk.usage.output_tokens || 0 yield { type: "usage", inputTokens: 0, @@ -460,7 +479,14 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio try { const message = await this.anthropicClient.messages.create({ model: modelId, - max_tokens: maxTokens ?? 16_384, + // Honour the same includeMaxTokens/modelMaxTokens override + // logic as the streaming path so non-streaming completions + // respect the user's max-output slider instead of always + // falling back to the model default. + max_tokens: + this.options.includeMaxTokens === true + ? this.options.modelMaxTokens || maxTokens || 16_384 + : (maxTokens ?? 16_384), temperature: this.supportsTemperature(modelId) ? (temperature ?? 1.0) : undefined, messages: [{ role: "user", content: prompt }], stream: false, From ecc266d2021f9e2b5d14487e615151df326b1ca0 Mon Sep 17 00:00:00 2001 From: Naved Date: Fri, 19 Jun 2026 07:14:37 -0700 Subject: [PATCH 6/8] add opencode test --- .../types/src/__tests__/opencode-go.test.ts | 124 ++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 packages/types/src/__tests__/opencode-go.test.ts diff --git a/packages/types/src/__tests__/opencode-go.test.ts b/packages/types/src/__tests__/opencode-go.test.ts new file mode 100644 index 0000000000..25d862c268 --- /dev/null +++ b/packages/types/src/__tests__/opencode-go.test.ts @@ -0,0 +1,124 @@ +import { + opencodeGoDefaultModelId, + opencodeGoDefaultModelInfo, + opencodeGoModels, + OPENCODE_GO_DEFAULT_TEMPERATURE, + OPENCODE_GO_ANTHROPIC_FORMAT_MODELS, + isOpencodeGoAnthropicFormatModel, + getOpencodeGoModelInfo, +} from "../providers/opencode-go.js" + +describe("opencode-go registry", () => { + const anthropicFormatModels = [ + "qwen3.7-max", + "qwen3.7-plus", + "qwen3.6-plus", + "minimax-m3", + "minimax-m2.7", + "minimax-m2.5", + ] + const openaiFormatModels = [ + "glm-5", + "glm-5.1", + "glm-5.2", + "kimi-k2.5", + "kimi-k2.6", + "mimo-v2.5", + "mimo-v2.5-pro", + "deepseek-v4-pro", + "deepseek-v4-flash", + ] + + describe("isOpencodeGoAnthropicFormatModel", () => { + it("classifies Qwen and MiniMax models as Anthropic-format", () => { + for (const id of anthropicFormatModels) { + expect(isOpencodeGoAnthropicFormatModel(id)).toBe(true) + } + }) + + it("classifies GLM/Kimi/MiMo/DeepSeek models as OpenAI-compatible", () => { + for (const id of openaiFormatModels) { + expect(isOpencodeGoAnthropicFormatModel(id)).toBe(false) + } + }) + + it("defaults unknown model IDs to the OpenAI-compatible format", () => { + expect(isOpencodeGoAnthropicFormatModel("some-future-model")).toBe(false) + expect(isOpencodeGoAnthropicFormatModel("")).toBe(false) + }) + }) + + describe("getOpencodeGoModelInfo", () => { + it("returns the native ModelInfo for a curated model", () => { + const info = getOpencodeGoModelInfo("qwen3.7-max") + expect(info).toBeDefined() + expect(info?.maxTokens).toBe(65_536) + expect(info?.contextWindow).toBe(1_000_000) + expect(info?.supportsPromptCache).toBe(true) + }) + + it("returns undefined for an unknown model ID", () => { + expect(getOpencodeGoModelInfo("not-a-real-go-model")).toBeUndefined() + }) + }) + + describe("OPENCODE_GO_ANTHROPIC_FORMAT_MODELS", () => { + it("contains exactly the Qwen and MiniMax models", () => { + expect([...OPENCODE_GO_ANTHROPIC_FORMAT_MODELS].sort()).toEqual([...anthropicFormatModels].sort()) + }) + + // The PR description calls out that the format-routing set must stay in + // sync with the Go model table — every routed model must have a native + // registry entry so capability flags and pricing resolve correctly. + it("every Anthropic-format model has a native registry entry", () => { + for (const id of OPENCODE_GO_ANTHROPIC_FORMAT_MODELS) { + expect(opencodeGoModels[id]).toBeDefined() + } + }) + }) + + describe("opencodeGoModels registry invariants", () => { + it("every entry has a positive maxTokens and contextWindow", () => { + for (const [id, info] of Object.entries(opencodeGoModels)) { + expect(info.maxTokens).toBeGreaterThan(0) + expect(info.contextWindow).toBeGreaterThan(0) + // Sanity: max output must not exceed the context window. + expect(info.maxTokens).toBeLessThanOrEqual(info.contextWindow) + void id + } + }) + + it("every entry declares supportsImages", () => { + for (const info of Object.values(opencodeGoModels)) { + expect(typeof info.supportsImages).toBe("boolean") + } + }) + + it("models with an array supportsReasoningEffort expose a non-empty allow-list", () => { + for (const info of Object.values(opencodeGoModels)) { + if (Array.isArray(info.supportsReasoningEffort)) { + expect(info.supportsReasoningEffort.length).toBeGreaterThan(0) + } + } + }) + }) + + describe("defaults", () => { + it("the default model id is a curated OpenAI-compatible model", () => { + expect(opencodeGoDefaultModelId).toBe("glm-5.2") + expect(opencodeGoModels[opencodeGoDefaultModelId]).toBeDefined() + expect(isOpencodeGoAnthropicFormatModel(opencodeGoDefaultModelId)).toBe(false) + }) + + it("exposes a fully-populated default ModelInfo fallback", () => { + expect(opencodeGoDefaultModelInfo.maxTokens).toBeGreaterThan(0) + expect(opencodeGoDefaultModelInfo.contextWindow).toBeGreaterThan(0) + expect(opencodeGoDefaultModelInfo.supportsPromptCache).toBe(false) + expect(opencodeGoDefaultModelInfo.description).toBeTruthy() + }) + + it("exposes a deterministic default temperature", () => { + expect(OPENCODE_GO_DEFAULT_TEMPERATURE).toBe(0) + }) + }) +}) From 3545e9ed1aba77d31adde446eb631e0fb1a9c0d6 Mon Sep 17 00:00:00 2001 From: Naved Date: Fri, 19 Jun 2026 07:17:49 -0700 Subject: [PATCH 7/8] fix typescript error --- src/api/providers/__tests__/opencode-go.spec.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/api/providers/__tests__/opencode-go.spec.ts b/src/api/providers/__tests__/opencode-go.spec.ts index fa780f325e..6dbab180a1 100644 --- a/src/api/providers/__tests__/opencode-go.spec.ts +++ b/src/api/providers/__tests__/opencode-go.spec.ts @@ -606,7 +606,7 @@ describe("OpencodeGoHandler", () => { it("includes tools and tool_choice in the Anthropic request when tools are provided", async () => { const handler = new OpencodeGoHandler(anthropicOptions) const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] - const tools = [ + const tools: OpenAI.Chat.ChatCompletionTool[] = [ { type: "function", function: { @@ -617,7 +617,7 @@ describe("OpencodeGoHandler", () => { }, ] - for await (const _chunk of handler.createMessage("sys", messages, { tools })) { + for await (const _chunk of handler.createMessage("sys", messages, { taskId: "test-task", tools })) { void _chunk } From fd85efa414f16f134d0d2e221eb670ed1681cd3a Mon Sep 17 00:00:00 2001 From: Naved Date: Fri, 19 Jun 2026 07:27:55 -0700 Subject: [PATCH 8/8] fix(opencode-go): address PR #652 review feedback - Type streamAnthropicMessage's info param as ModelInfo and drop the force-cast so calculateApiCostAnthropic can no longer silently return /bin/sh when pricing fields are absent. - Wrap pre-stream Anthropic-format errors (401/429/network) with the 'Opencode Go completion error:' prefix for consistency with completePrompt. - Clarify the registry doc: supportsPromptCache controls client-side cache_control injection (Anthropic path) only; OA-compat models price server-side cached_tokens via cacheReadsPrice regardless of the flag (MiMo stays false, matching the dedicated mimo provider). - Add cacheWritesPrice (0.375) to minimax-m3 so its cache writes are billed, matching M2.5/M2.7. - Add supportsMaxTokens to DeepSeek V4 models so the max-output slider is exposed like GLM. - Strengthen the streaming cost test to assert totalCost > 0, and add registry invariants for MiniMax cache-write pricing and DeepSeek supportsMaxTokens plus a streaming error-wrapping test. --- .changeset/add-glm-5-2-support.md | 2 ++ .../types/src/__tests__/opencode-go.test.ts | 20 ++++++++++++ packages/types/src/providers/opencode-go.ts | 32 ++++++++++++++++--- .../providers/__tests__/opencode-go.spec.ts | 23 +++++++++++-- src/api/providers/opencode-go.ts | 19 +++++++++-- 5 files changed, 87 insertions(+), 9 deletions(-) diff --git a/.changeset/add-glm-5-2-support.md b/.changeset/add-glm-5-2-support.md index 26bd162cdf..323def2089 100644 --- a/.changeset/add-glm-5-2-support.md +++ b/.changeset/add-glm-5-2-support.md @@ -3,3 +3,5 @@ --- Add GLM-5.2 support with High/Max `reasoning_effort` tiers. The default effort is High (deep reasoning stays opt-in), Max is selected only when the user explicitly picks it, and the parameter is omitted entirely when reasoning is disabled. + +Also refines the Opencode Go provider per review: bill MiniMax M3 cache writes (`cacheWritesPrice`), expose the max-output slider for DeepSeek V4 models (`supportsMaxTokens`), wrap pre-stream Anthropic-format errors with the provider prefix, and type the Anthropic streaming path's model info as `ModelInfo` so cost calculation can no longer silently return `$0`. diff --git a/packages/types/src/__tests__/opencode-go.test.ts b/packages/types/src/__tests__/opencode-go.test.ts index 25d862c268..617c88675c 100644 --- a/packages/types/src/__tests__/opencode-go.test.ts +++ b/packages/types/src/__tests__/opencode-go.test.ts @@ -101,6 +101,26 @@ describe("opencode-go registry", () => { } } }) + + it("every Anthropic-format model with prompt-cache injection declares a cacheWritesPrice", () => { + // MiniMax/Qwen route through /v1/messages with client-side + // cache_control breakpoints, so cache_creation_input_tokens are + // reported and billed — each must carry a cacheWritesPrice or the + // write cost is silently reported as $0. + for (const id of OPENCODE_GO_ANTHROPIC_FORMAT_MODELS) { + const info = getOpencodeGoModelInfo(id) + expect(info).toBeDefined() + if (info?.supportsPromptCache) { + expect(info.cacheWritesPrice).toBeDefined() + expect(info.cacheReadsPrice).toBeDefined() + } + } + }) + + it("DeepSeek entries expose supportsMaxTokens so the max-output slider is available", () => { + expect(getOpencodeGoModelInfo("deepseek-v4-pro")?.supportsMaxTokens).toBe(true) + expect(getOpencodeGoModelInfo("deepseek-v4-flash")?.supportsMaxTokens).toBe(true) + }) }) describe("defaults", () => { diff --git a/packages/types/src/providers/opencode-go.ts b/packages/types/src/providers/opencode-go.ts index 7a646b6236..5b1e0ada8e 100644 --- a/packages/types/src/providers/opencode-go.ts +++ b/packages/types/src/providers/opencode-go.ts @@ -39,10 +39,21 @@ export const OPENCODE_GO_DEFAULT_TEMPERATURE = 0 * max-token values stay in sync with the gateway while capability flags and * pricing remain correct. * - * `supportsPromptCache` is intentionally `true` for models whose Go pricing - * table lists a "Cached Read" price: the gateway honours server-side caching - * and reports `cached_tokens` in usage, which the handler forwards for cost - * calculation. Client-side `cache_control` injection is not used on this path. + * `supportsPromptCache` has two distinct meanings depending on the wire format: + * + * - Anthropic-format models (Qwen/MiniMax): `true` enables client-side + * `cache_control` breakpoint injection in the handler's `/v1/messages` + * path. The gateway then reports `cache_creation_input_tokens` / + * `cache_read_input_tokens`, which are priced via `cacheWritesPrice` / + * `cacheReadsPrice`. + * - OpenAI-compatible models (GLM/Kimi/DeepSeek/MiMo): there is no + * client-side `cache_control` concept, so the flag is NOT used to build + * the request. The gateway performs server-side caching and reports + * `cached_tokens` in `prompt_tokens_details`, which the handler forwards + * as `cacheReadTokens` and prices via `cacheReadsPrice` regardless of the + * flag. MiMo therefore declares `supportsPromptCache: false` (no + * client-side injection, matching the dedicated `mimo` provider) while + * still carrying a `cacheReadsPrice` for its server-side cache reads. */ export const opencodeGoModels: Record = { // --- Zhipu GLM --- @@ -199,6 +210,12 @@ export const opencodeGoModels: Record = { preserveReasoning: true, inputPrice: 0.3, outputPrice: 1.2, + // M3 routes through the Anthropic Messages path with client-side + // cache_control injection active, so cache_creation_input_tokens are + // reported and billed. Matches the MiniMax write price shared by + // M2.5/M2.7 (same vendor/pricing tier: $0.3 in / $1.2 out / $0.06 + // cache read). + cacheWritesPrice: 0.375, cacheReadsPrice: 0.06, description: "MiniMax M3, a frontier multimodal coding model with a 1M context window, agentic reasoning, and tool use. Available via the Opencode Go plan.", @@ -265,6 +282,12 @@ export const opencodeGoModels: Record = { contextWindow: 1_000_000, supportsImages: false, supportsPromptCache: true, + // DeepSeek advertises a large, explicit max-output ceiling (384k), so + // expose the configurable max-output slider like GLM. Without this the + // slider is hidden and the effective default is the 20% context-window + // clamp (200k); with it, users can raise the budget up to the model's + // 384k ceiling. + supportsMaxTokens: true, supportsReasoningEffort: ["disable", "low", "medium", "high", "xhigh"], preserveReasoning: true, reasoningEffort: "high", @@ -279,6 +302,7 @@ export const opencodeGoModels: Record = { contextWindow: 1_000_000, supportsImages: false, supportsPromptCache: true, + supportsMaxTokens: true, supportsReasoningEffort: ["disable", "low", "medium", "high", "xhigh"], preserveReasoning: true, reasoningEffort: "high", diff --git a/src/api/providers/__tests__/opencode-go.spec.ts b/src/api/providers/__tests__/opencode-go.spec.ts index 6dbab180a1..31ce34fe7f 100644 --- a/src/api/providers/__tests__/opencode-go.spec.ts +++ b/src/api/providers/__tests__/opencode-go.spec.ts @@ -516,8 +516,13 @@ describe("OpencodeGoHandler", () => { }) // ... message_delta output tokens ... expect(chunks).toContainEqual({ type: "usage", inputTokens: 0, outputTokens: 5 }) - // ... and a final cost chunk. - expect(chunks.some((c) => c.type === "usage" && c.totalCost !== undefined)).toBe(true) + // ... and a final cost chunk. Assert totalCost > 0 (not just + // defined) so CI catches the output-token accumulation regression — + // without accumulation the cost would be computed from + // outputTokens: 0 and report ~$0. + expect(chunks.some((c) => c.type === "usage" && typeof c.totalCost === "number" && c.totalCost > 0)).toBe( + true, + ) }) it("applies cache-control breakpoints when the model supports prompt caching", async () => { @@ -822,6 +827,20 @@ describe("OpencodeGoHandler", () => { const handler = new OpencodeGoHandler(anthropicOptions) await expect(handler.completePrompt("ping")).rejects.toThrow("Opencode Go completion error: boom") }) + + it("wraps pre-stream Anthropic errors from createMessage with an Opencode Go-specific message", async () => { + // Pre-stream failures (401, 429, network) reject the create() call + // before any chunk is emitted; they must be wrapped consistently + // with completePrompt rather than propagating raw. + mockAnthropicCreate.mockRejectedValue(new Error("rate limited")) + const handler = new OpencodeGoHandler(anthropicOptions) + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }] + await expect(async () => { + for await (const _chunk of handler.createMessage("sys", messages)) { + void _chunk + } + }).rejects.toThrow("Opencode Go completion error: rate limited") + }) }) describe("isOpencodeGoAnthropicFormatModel", () => { diff --git a/src/api/providers/opencode-go.ts b/src/api/providers/opencode-go.ts index 973600c720..27d8ab3f7e 100644 --- a/src/api/providers/opencode-go.ts +++ b/src/api/providers/opencode-go.ts @@ -3,6 +3,7 @@ import { CacheControlEphemeral } from "@anthropic-ai/sdk/resources" import OpenAI from "openai" import { + type ModelInfo, opencodeGoDefaultModelId, opencodeGoDefaultModelInfo, OPENCODE_GO_DEFAULT_TEMPERATURE, @@ -249,7 +250,7 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio */ private async *streamAnthropicMessage( modelId: string, - info: { supportsPromptCache?: boolean }, + info: ModelInfo, temperature: number | undefined, maxTokens: number | undefined, systemPrompt: string, @@ -298,7 +299,19 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio : {}), } - const stream = await this.anthropicClient.messages.create(requestParams) + // Wrap pre-stream errors (401, 429, network) with the same + // "Opencode Go completion error:" prefix used by completePrompt so the + // Anthropic-format path surfaces failures consistently. Mid-stream + // errors propagate unchanged, matching the OpenAI streaming path. + let stream + try { + stream = await this.anthropicClient.messages.create(requestParams) + } catch (error) { + if (error instanceof Error) { + throw new Error(`Opencode Go completion error: ${error.message}`) + } + throw error + } let inputTokens = 0 let outputTokens = 0 @@ -411,7 +424,7 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio // Calculate and yield final cost if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) { const { totalCost } = calculateApiCostAnthropic( - info as Parameters[0], + info, inputTokens, outputTokens, cacheWriteTokens,