From 043b24b16d899d9ed8de598c54a4598b28704bfd Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 27 Mar 2026 22:21:31 +0000 Subject: [PATCH] feat: add glm-5-turbo model to Z.ai provider Add glm-5-turbo to both international and mainland Z.ai model lists with 200k context window, 128k max output tokens, and prompt caching. International pricing: $1.2/M input, $0.24/M cached, $4.0/M output Mainland pricing estimated proportionally from glm-5 ratios. Closes #12018 --- packages/types/src/providers/zai.ts | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/packages/types/src/providers/zai.ts b/packages/types/src/providers/zai.ts index 69f90f232a5..2336e259ea1 100644 --- a/packages/types/src/providers/zai.ts +++ b/packages/types/src/providers/zai.ts @@ -135,6 +135,18 @@ export const internationalZAiModels = { description: "GLM-5 is Zhipu's next-generation model with a 202k context window and built-in thinking capabilities. It delivers state-of-the-art reasoning, coding, and agentic performance.", }, + "glm-5-turbo": { + maxTokens: 128_000, + contextWindow: 200_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 1.2, + outputPrice: 4.0, + cacheWritesPrice: 0, + cacheReadsPrice: 0.24, + description: + "GLM-5-Turbo is a high-throughput variant of GLM-5 with a 200k context window and 128k max output tokens, optimized for fast, cost-effective reasoning and coding tasks.", + }, "glm-4.7-flash": { maxTokens: 16_384, contextWindow: 200_000, @@ -311,6 +323,18 @@ export const mainlandZAiModels = { description: "GLM-5 is Zhipu's next-generation model with a 202k context window and built-in thinking capabilities. It delivers state-of-the-art reasoning, coding, and agentic performance.", }, + "glm-5-turbo": { + maxTokens: 128_000, + contextWindow: 200_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.58, + outputPrice: 2.07, + cacheWritesPrice: 0, + cacheReadsPrice: 0.124, + description: + "GLM-5-Turbo is a high-throughput variant of GLM-5 with a 200k context window and 128k max output tokens, optimized for fast, cost-effective reasoning and coding tasks.", + }, "glm-4.7-flash": { maxTokens: 16_384, contextWindow: 204_800,