From 043b24b16d899d9ed8de598c54a4598b28704bfd Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Fri, 27 Mar 2026 22:21:31 +0000
Subject: [PATCH] feat: add glm-5-turbo model to Z.ai provider

Add glm-5-turbo to both international and mainland Z.ai model lists
with 200k context window, 128k max output tokens, and prompt caching.

International pricing: $1.2/M input, $0.24/M cached, $4.0/M output
Mainland pricing estimated proportionally from glm-5 ratios.

Closes #12018
---
 packages/types/src/providers/zai.ts | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/packages/types/src/providers/zai.ts b/packages/types/src/providers/zai.ts
index 69f90f232a5..2336e259ea1 100644
--- a/packages/types/src/providers/zai.ts
+++ b/packages/types/src/providers/zai.ts
@@ -135,6 +135,18 @@ export const internationalZAiModels = {
 		description:
 			"GLM-5 is Zhipu's next-generation model with a 202k context window and built-in thinking capabilities. It delivers state-of-the-art reasoning, coding, and agentic performance.",
 	},
+	"glm-5-turbo": {
+		maxTokens: 128_000,
+		contextWindow: 200_000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		inputPrice: 1.2,
+		outputPrice: 4.0,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0.24,
+		description:
+			"GLM-5-Turbo is a high-throughput variant of GLM-5 with a 200k context window and 128k max output tokens, optimized for fast, cost-effective reasoning and coding tasks.",
+	},
 	"glm-4.7-flash": {
 		maxTokens: 16_384,
 		contextWindow: 200_000,
@@ -311,6 +323,18 @@ export const mainlandZAiModels = {
 		description:
 			"GLM-5 is Zhipu's next-generation model with a 202k context window and built-in thinking capabilities. It delivers state-of-the-art reasoning, coding, and agentic performance.",
 	},
+	"glm-5-turbo": {
+		maxTokens: 128_000,
+		contextWindow: 200_000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		inputPrice: 0.58,
+		outputPrice: 2.07,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0.124,
+		description:
+			"GLM-5-Turbo is a high-throughput variant of GLM-5 with a 200k context window and 128k max output tokens, optimized for fast, cost-effective reasoning and coding tasks.",
+	},
 	"glm-4.7-flash": {
 		maxTokens: 16_384,
 		contextWindow: 204_800,