diff --git a/AGENTS.md b/AGENTS.md index 34728f71..233efb06 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -37,6 +37,7 @@ Per-request LLM diagnostics scope (usage/cost summary vs diagnostics detail, inc Approval policy mode (`minimal|trusted|full-access`) and global per-project storage are specified in `dev-docs/specs/approval-mode.md`. Terminal-Bench support requirements (Harbor integration + headless benchmark mode + ATIF artifacts/validation) are specified in `dev-docs/specs/terminal-bench.md`. Future built-in tool candidates (`apply_patch`, `request_user_input`, `webfetch`, `view_image`, `lsp`, MCP resource tools) are tracked in `dev-docs/specs/future-tools.md`. +Native Z.ai GLM-5.2 provider behavior is specified in `dev-docs/specs/zai-provider.md` and implemented through core `ChatZai` plus runtime `model.provider=zai`. ## Implementation plan diff --git a/README.md b/README.md index 5cd39adb..021de0f7 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,17 @@ Under the hood, a TypeScript runtime and a Rust TUI communicate over JSON-RPC. ⚠️ **Early Development / Alpha Stage** — Codelia is under active development and is not yet production-ready. +## Provider support + +Current provider support: +- `openai` (API Key or OAuth with ChatGPT Plus/Pro) +- `anthropic` (API Key only) +- `openrouter` (API Key only) +- `zai` (API Key only; **New:** native GLM-5.2 support, with additional GLM models selectable) + +Planned / not wired as a runtime provider yet: +- `google` / Gemini + ## Security note Codelia does **not** currently provide a strong OS-level sandbox. @@ -40,14 +51,6 @@ codelia Chose a provider/model and set up auth. -Current provider support: -- `openai` (API Key or OAuth with ChatGPT Plus/Pro) -- `anthropic` (API Key only) -- `openrouter` (API Key only) - -Planned / not wired as a runtime provider yet: -- `google` / Gemini - Then type a request such as: ```text diff --git a/crates/tui/src/app/handlers/command.rs b/crates/tui/src/app/handlers/command.rs index 07f24efc..a31aad1b 100644 --- a/crates/tui/src/app/handlers/command.rs +++ b/crates/tui/src/app/handlers/command.rs @@ -21,7 +21,7 @@ use slash::{ handle_tasks_command, handle_theme_command, }; -const MODEL_PROVIDERS: &[&str] = &["openai", "anthropic", "openrouter"]; +const MODEL_PROVIDERS: &[&str] = &["openai", "anthropic", "openrouter", "zai"]; const COMMAND_SUGGESTION_LIMIT: usize = 12; const QUEUE_PREVIEW_MAX_CHARS: usize = 72; const QUEUE_LIST_LIMIT: usize = 5; diff --git a/dev-docs/specs/zai-provider.md b/dev-docs/specs/zai-provider.md new file mode 100644 index 00000000..6e1b3bd9 --- /dev/null +++ b/dev-docs/specs/zai-provider.md @@ -0,0 +1,381 @@ +# Z.ai Native Provider Spec + +Status: Implemented +Date: 2026-06-20 +Related: +- `dev-docs/specs/providers.md` +- `dev-docs/specs/model-parameter-ui.md` +- `dev-docs/specs/model-metadata.md` +- `packages/core/src/llm/base.ts` +- `packages/runtime/src/agent-factory.ts` +- `packages/runtime/src/auth/resolver.ts` +- `packages/runtime/src/rpc/model.ts` + +Implemented in phase 1: + +- Core provider adapter: `packages/core/src/llm/zai/` +- Z.ai HTTP/SSE transport helpers: `packages/core/src/llm/zai/transport.ts` +- Static model spec: `packages/core/src/models/zai.ts` +- Runtime auth/model/onboarding/agent-factory wiring for `model.provider=zai` +- TUI `/model` provider selection includes `zai` + +Still intentionally deferred: + +- dynamic Z.ai model-list fetching +- Z.ai hosted/native web search +- opt-in live integration smoke tests + +## 1. Goal + +Add a native `zai` provider for Z.ai GLM models without routing through OpenRouter. + +The native provider should preserve Codelia's provider-neutral agent loop while adapting Z.ai's Chat Completions API into the existing `BaseChatModel` contract: + +- `BaseMessage[]` in +- provider-specific request out +- provider-specific response back into `ChatInvokeCompletion` +- normalized assistant text, reasoning, tool calls, usage, and provider metadata + +## 2. Current Implementation Findings + +### 2.1 Provider identity is split across core and runtime + +Core currently defines: + +- `ProviderName = "openai" | "anthropic" | "openrouter" | "google"` in `packages/core/src/llm/base.ts` +- `HostedSearchToolDefinition.provider` with the same provider union in `packages/core/src/types/llm/tools.ts` +- model registry alias buckets for `openai`, `anthropic`, `openrouter`, and `google` in `packages/core/src/models/registry.ts` +- provider filters in `applyModelMetadata()` that ignore provider ids outside that set +- provider-qualified model parsing in `packages/core/src/agent/agent.ts` and `packages/core/src/services/compaction/service.ts` + +Runtime separately defines: + +- `SUPPORTED_PROVIDERS = ["openai", "anthropic", "openrouter"]` in `packages/runtime/src/auth/resolver.ts` +- `SupportedModelProvider = "openai" | "anthropic" | "openrouter"` in `packages/runtime/src/rpc/model.ts` +- `agent-factory` provider construction for `ChatOpenAI`, `ChatOpenRouter`, and `ChatAnthropic` + +Adding only a core `ProviderName` entry is not enough. Runtime auth, model RPC, model registry construction, and onboarding must also accept `zai`. + +### 2.2 Existing provider adapters are provider-owned + +The implemented providers follow this split: + +- `ChatOpenAI`: core adapter, OpenAI Responses API +- `ChatOpenRouter`: core adapter, OpenRouter Responses API +- `ChatAnthropic`: core adapter, Anthropic Messages API +- runtime: chooses provider, resolves auth/config, constructs the adapter + +OpenRouter's connector split is the right precedent: provider invocation belongs in `@codelia/core`, while runtime owns auth, model config, onboarding, and model listing. + +### 2.3 The OpenAI/OpenRouter path is Responses-specific + +`ChatOpenAI` and `ChatOpenRouter` use OpenAI SDK Responses APIs and serialize history as Responses input items. That path depends on Responses concepts such as `response.output`, `function_call_output`, `reasoning.encrypted_content`, `response.output_text`, and `responses.stream(...).finalResponse()`. + +Z.ai native integration should not reuse `ChatOpenAI` or `ChatOpenRouter` with a base URL change. Z.ai's documented generation endpoint is Chat Completions, not Responses. + +### 2.4 The common message model is sufficient + +Current core types already have the escape hatches needed for Z.ai: + +- `AssistantMessage.content` for final text +- `AssistantMessage.tool_calls` for function calls +- `ReasoningMessage.content` plus `raw_item` for `reasoning_content` +- `ToolCall.provider_meta` for preserving compact provider call metadata needed for replay/debug +- `ChatInvokeUsage` for token usage +- `ChatInvokeCompletion.provider_meta` for response id, request id, finish reason, and reasoning mapping metadata + +The agent loop already emits reasoning events from `ReasoningMessage`, continues based on assistant tool calls, and records `llm.request` / `llm.response` provider metadata. + +### 2.5 Static model metadata is required for reliable startup + +`buildModelRegistry()` refreshes models.dev metadata, then falls back to `DEFAULT_MODEL_REGISTRY`. In strict mode, startup fails if the selected model has neither fetched metadata nor a usable static `ModelSpec` with a positive context budget. + +Because models.dev coverage for `zai` may lag or use a different provider id, +usable Z.ai model specs must be kept in the static registry. + +## 3. Z.ai API Facts + +Verified against Z.ai developer docs on 2026-06-20: + +- Auth: `Authorization: Bearer ` +- Endpoint: `POST https://api.z.ai/api/paas/v4/chat/completions` +- Default model id: `glm-5.2` +- Static text models in newest/highest-priority order: `glm-5.2`, `glm-5.1`, + `glm-5`, `glm-5-turbo`, `glm-4.7` +- Context length: 1M for `glm-5.2`; 200K for `glm-5.1`, `glm-5`, + `glm-5-turbo`, and `glm-4.7` +- Maximum output tokens: 128K / `max_tokens <= 131072` +- Streaming: `stream=true` +- Streaming tool call arguments: `tool_stream=true` +- Reasoning stream field: `delta.reasoning_content` +- Final message reasoning field: `choices[].message.reasoning_content` +- Tool calls: `choices[].message.tool_calls` and streaming `delta.tool_calls` +- Thinking control: `thinking: { type: "enabled" | "disabled" }` +- Reasoning effort values: `max`, `xhigh`, `high`, `medium`, `low`, `minimal`, `none` +- Z.ai maps `low` / `medium` to `high`, and `xhigh` to `max` + +References: + +- `https://docs.z.ai/api-reference/llm/chat-completion` +- `https://docs.z.ai/guides/llm/glm-5.2` +- `https://docs.z.ai/guides/llm/glm-5.1` +- `https://docs.z.ai/guides/llm/glm-5` +- `https://docs.z.ai/guides/llm/glm-5-turbo` +- `https://docs.z.ai/guides/llm/glm-4.7` +- `https://docs.z.ai/guides/overview/migrate-to-glm-new` +- `https://docs.z.ai/guides/capabilities/thinking` +- `https://docs.z.ai/guides/overview/concept-param` + +## 4. Target Architecture + +### 4.1 Core + +Add `packages/core/src/llm/zai/`: + +- `chat.ts`: `ChatZai implements BaseChatModel<"zai", ZaiInvokeOptions>` +- `serializer.ts`: provider-neutral message/tool conversion and completion normalization +- optional small transport/parser helpers if streaming code grows +- focused unit tests under `packages/core/tests/` + +Export `ChatZai` from `packages/core/src/index.ts`. + +Add `packages/core/src/models/zai.ts` with static Z.ai models ordered +newest/highest-priority first: + +```ts +export const ZAI_DEFAULT_MODEL = "glm-5.2"; +export const ZAI_MODELS = ["glm-5.2", "glm-5.1", "glm-5", "glm-5-turbo", "glm-4.7"]; +``` + +Add `zai` to: + +- `ProviderName` +- model registry alias buckets and clone helpers +- `applyModelMetadata()` provider allowlist +- provider-qualified model parsing in agent/compaction/tool-output-cache paths +- hosted-search provider union only if there is a concrete Z.ai hosted-search adapter; otherwise leave native search unsupported in phase 1 + +### 4.2 Runtime + +Add `zai` to: + +- auth provider selection and onboarding +- `API_KEY_ENV` as `ZAI_API_KEY` +- API-key prompt label (`Z.ai API key`) +- model RPC supported provider union +- `model.list` / `model.set` +- `createAgentFactory` provider switch + +Add runtime client option builder: + +- default base URL: `https://api.z.ai/api/paas/v4` +- env override: `ZAI_BASE_URL` +- auth: `ZAI_API_KEY` or saved `auth.json` api key +- request timeout: `ChatZai` defaults to 20 minutes and can be disabled/overridden in tests or direct construction with `timeoutMs` + +Model listing phase 1 should use static registry. Do not add a dynamic Z.ai model-list fetch unless Z.ai exposes a stable model-list endpoint and the expected response shape is confirmed. + +### 4.3 Protocol and config + +No protocol shape change is required. `model.provider` and model RPC provider fields are strings today. + +No config schema migration is required. Existing `model.reasoning` remains the only user-facing reasoning knob. + +## 5. Request Mapping + +### 5.1 Messages + +Map Codelia messages to Chat Completions messages: + +- `system` -> `{ role: "system", content: string }` +- `user` -> `{ role: "user", content: string | provider-supported content parts }` +- `assistant` text -> `{ role: "assistant", content }` +- `assistant.tool_calls` -> assistant message with `tool_calls` +- `tool` -> `{ role: "tool", tool_call_id, content }` +- `reasoning` -> omit on replay in phase 1 + +Phase 1 should treat multimodal parts conservatively: + +- text parts are preserved +- images/documents are degraded to placeholders unless Z.ai multimodal input is explicitly implemented and tested +- provider-specific `other` parts are replayed only if `provider === "zai"` and the payload shape is known safe; otherwise stringify/degrade + +### 5.2 Tools + +Map Codelia function tools to OpenAI-style Chat Completions tools: + +```ts +{ + type: "function", + function: { + name, + description, + parameters + } +} +``` + +Initial policy: + +- support function tools +- set `tool_choice` for `auto`, `required`, `none`, or a specific tool name +- enable `tool_stream: true` whenever tools are present and `stream: true` +- ignore hosted search tools for `zai` in phase 1 +- preserve compact provider call metadata in `ToolCall.provider_meta`; do not persist raw streaming chunks in history/session snapshots + +### 5.3 Reasoning + +Keep Codelia's canonical config values: + +- `low` +- `medium` +- `high` +- `xhigh` + +Map to Z.ai request values: + +- `low` -> `high` +- `medium` -> `high` +- `high` -> `high` +- `xhigh` -> `max` + +Always send `thinking: { type: "enabled" }` in phase 1. Send `reasoning_effort` +only for models that support the parameter (`glm-5.2` in the static registry). +For those models, record both requested and applied canonical levels in +`provider_meta`: + +- requested: Codelia level +- applied: `high` for `low|medium|high`, `xhigh` for `xhigh` +- provider reasoning effort: `high` or `max` +- fallbackApplied: true when requested was `low` or `medium` + +Do not expose `minimal`, `none`, or raw Z.ai `thinking` settings in the baseline UI in phase 1. + +`sessionKey` is intentionally unused in phase 1 because Z.ai has no confirmed +OpenAI `prompt_cache_key` equivalent. Do not invent provider headers until the +contract is documented and tested. + +## 6. Streaming and Completion Normalization + +Implement streaming as the default invocation path to match existing OpenAI/OpenRouter behavior. + +The stream accumulator should collect: + +- text deltas from `choices[0].delta.content` +- reasoning deltas from `choices[0].delta.reasoning_content` +- tool call name/id/type/function argument deltas from `choices[0].delta.tool_calls` +- finish reason from the terminal chunk +- usage if supplied by Z.ai + +Return `ChatInvokeCompletion.messages` in provider event order as far as practical: + +1. reasoning message if reasoning text exists +2. assistant text message if content exists +3. assistant tool-call message if tool calls exist + +If Z.ai returns text and tool calls in the same assistant turn, preserve both rather than dropping text. Existing agent code can handle an assistant message with text plus `tool_calls`, and existing compaction already strips risky dangling tool calls during history rewriting. + +## 7. Usage, Diagnostics, and Error Handling + +Normalize usage into `ChatInvokeUsage`: + +- `model` +- `input_tokens` +- `output_tokens` +- `total_tokens` + +Z.ai returns `usage.prompt_tokens_details.cached_tokens`; normalize it to `input_cached_tokens` (omitted when zero or missing). + +`max_tokens` is supported as a per-invoke `ZaiInvokeOptions` field, but runtime +does not set a default in phase 1. This leaves Z.ai's server default in effect +until Codelia chooses an explicit cost-safety cap. + +Provider diagnostics should follow existing provider-log conventions: + +- `CODELIA_PROVIDER_LOG=1` emits one-line request/response summaries +- `CODELIA_PROVIDER_LOG_DIR` writes request/response dumps +- never log API keys +- log provider as `zai` + +HTTP error policy: + +- `401`/`403`: auth/config error +- `402`: credits/payment error, non-retryable +- `408`/`429`/`5xx`: transient or rate-limit class; surface status and a bounded body snippet +- malformed stream: provider error with enough chunk context in debug logs, not in normal UI + +## 8. Model Listing and Selection + +Phase 1: + +- `model.list(provider=zai)` returns static usable Z.ai models from `DEFAULT_MODEL_REGISTRY` +- `model.set(provider=zai, name=...)` validates against the static registry +- onboarding can pick `zai`, prompt for API key, then pick `glm-5.2` + +Do not accept arbitrary `zai` model names in phase 1. That keeps compaction and context-left behavior tied to known limits. + +## 9. Search Behavior + +`search.mode=auto` should not expose provider-native search for `zai` in phase 1. + +Runtime should fall back to local `search` for `zai`, the same way it does for providers without native hosted search support. + +Z.ai web search or retrieval tools can be evaluated later as a separate feature because their request/response contracts are not the same as Codelia's function tool loop. + +## 10. Testing Plan + +Core unit tests: + +- provider identity and default model +- text-only streaming response +- reasoning-only + text response normalization +- tool-call streaming with argument concatenation +- assistant text plus tool call in one response +- replay serialization for assistant tool call followed by tool result +- hosted search is ignored for `zai` +- usage normalization +- provider log request/response summaries do not include secrets +- raw stream chunks are counted but not retained by the default accumulator; + full raw chunks are captured only for explicit provider dump output + +Runtime unit tests: + +- `ZAI_API_KEY` env auth +- onboarding provider pick includes `zai` +- model list returns static Z.ai models with details +- model set accepts known static Z.ai models and rejects unknown ones +- agent factory constructs `ChatZai` +- `search.mode=auto` uses local search for `zai` +- reasoning mapping tests for `low|medium|high|xhigh` + +Integration tests, opt-in only: + +- gated by `INTEGRATION=1`, `ZAI_API_KEY`, and `CODELIA_TEST_ZAI_MODEL` +- text smoke +- tool round trip +- reasoning stream smoke + +Suggested focused verification commands: + +```sh +bun test packages/core/tests/zai-chat.test.ts packages/core/tests/zai-tools-serializer.test.ts +bun test packages/runtime/tests/model-zai.test.ts packages/runtime/tests/startup-onboarding.test.ts packages/runtime/tests/model-reasoning.test.ts +bun run typecheck +``` + +## 11. Rollout Phases + +1. Land spec and implementation plan. +2. Add core `zai` provider types, model spec, serializer, and `ChatZai`. +3. Add runtime auth, model RPC, model registry, and agent-factory wiring. +4. Add focused unit tests. +5. Add optional integration smoke. +6. Update local AGENTS notes after implementation is complete. + +## 12. Open Questions + +- Does Z.ai provide usage in streaming terminal chunks consistently, or is a non-stream fallback needed to guarantee usage? +- What explicit `max_tokens` default should Codelia use if it decides not to rely on Z.ai's server default? +- Should `ZAI_BASE_URL` support the coding endpoint `https://api.z.ai/api/coding/paas/v4` as a separate env override only, or should it become config? +- Does Z.ai require special handling for strict JSON schema subsets beyond function parameter JSON Schema? +- Are Z.ai tool call ids stable enough to replay directly as `tool_call_id`, or should Codelia generate fallback ids when missing? diff --git a/packages/core/AGENTS.md b/packages/core/AGENTS.md index 79915455..e19ce24f 100644 --- a/packages/core/AGENTS.md +++ b/packages/core/AGENTS.md @@ -8,6 +8,8 @@ Static model entries can use `ModelSpec.providerModelId` when the user-facing se Place the Anthropic (Claude) provider implementation in `src/llm/anthropic/`. `ChatAnthropic` applies a 20 minute SDK client timeout by default so long-running non-streaming requests do not fail at Anthropic's 10 minute default; explicit `clientOptions.timeout` still wins. Place the OpenRouter provider implementation in `src/llm/openrouter/`. +Place the Z.ai provider implementation in `src/llm/zai/`; `ChatZai` uses fetch against Z.ai Chat Completions streaming, not the OpenAI Responses adapter. +`ChatZai` must keep `ToolCall.provider_meta` compact; never persist raw streaming chunks in assistant tool calls or session history. Register defaults in `configRegistry` of `@codelia/config` (`src/config/register.ts`). Place the test under `tests/` and execute it with `bun test`. Tool-defined JSON Schema generation uses Zod v4's `toJSONSchema`. @@ -57,8 +59,9 @@ The Developer role will be abolished and will only handle system prompts. OpenAI Responses is always called with `stream=true`; HTTP and websocket transports rebuild canonical `response.output` from stream events, then merge terminal metadata (`id`/`status`/`usage`) from `finalResponse()` or websocket terminal payloads. Agent passes provider-neutral invoke context `sessionKey` using `session_id` (fallback: `run_id`) so adapters can apply conversation-stable routing hints without provider coupling. OpenAI Responses adapter maps `sessionKey` to `prompt_cache_key` and sends `session_id: ` header (Codex-compatible routing hint). +Z.ai phase 1 intentionally ignores `sessionKey` because no prompt-cache/session routing field has been confirmed. Anthropic Messages adapter enables prompt caching by default via top-level `cache_control: { type: "ephemeral" }` (can be overridden per-request). -Set `CODELIA_PROVIDER_LOG=1` to enable provider request/response diagnostics and dumps (OpenAI/Anthropic). +Set `CODELIA_PROVIDER_LOG=1` to enable provider request/response diagnostics and dumps (OpenAI/Anthropic/OpenRouter/Z.ai). Override dump path with `CODELIA_PROVIDER_LOG_DIR` (default is `./tmp` when provider log is enabled). Request debug logs include provider-specific hashes (for OpenAI: `tools_sha` / `instructions_sha` / `session_id_header=on|off`) so order/routing drift can be spotted quickly. When repopulating OpenAI's `response.output` as history, parsed fields such as `parsed_arguments` / `parsed` are removed. diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts index 97d6ee40..163d9906 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts @@ -1036,7 +1036,8 @@ const parseQualifiedModelId = ( providerRaw !== "openai" && providerRaw !== "anthropic" && providerRaw !== "openrouter" && - providerRaw !== "google" + providerRaw !== "google" && + providerRaw !== "zai" ) { return null; } diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 9c387174..87f1adee 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -31,6 +31,7 @@ export type { } from "./llm/base"; export { ChatOpenAI } from "./llm/openai/chat"; export { ChatOpenRouter } from "./llm/openrouter/chat"; +export { ChatZai } from "./llm/zai/chat"; export * from "./models"; export { getDefaultSystemPromptPath } from "./prompts"; export type { diff --git a/packages/core/src/llm/base.ts b/packages/core/src/llm/base.ts index b725311b..6b473612 100644 --- a/packages/core/src/llm/base.ts +++ b/packages/core/src/llm/base.ts @@ -2,7 +2,12 @@ import type { ChatInvokeCompletion } from "../types/llm/invoke"; import type { BaseMessage } from "../types/llm/messages"; import type { ToolChoice, ToolDefinition } from "../types/llm/tools"; -export type ProviderName = "openai" | "anthropic" | "openrouter" | "google"; +export type ProviderName = + | "openai" + | "anthropic" + | "openrouter" + | "google" + | "zai"; export type ChatInvokeInput = { messages: BaseMessage[]; diff --git a/packages/core/src/llm/zai/AGENTS.md b/packages/core/src/llm/zai/AGENTS.md new file mode 100644 index 00000000..4fcc4571 --- /dev/null +++ b/packages/core/src/llm/zai/AGENTS.md @@ -0,0 +1,12 @@ +# Z.ai provider + +- `chat.ts` owns the `BaseChatModel` adapter: request construction, provider + diagnostics, and conversion to `ChatInvokeCompletion`. +- `transport.ts` owns HTTP request execution, SSE parsing, timeout/signal + composition, and HTTP error normalization. +- `serializer.ts` owns provider-neutral message/tool/reasoning normalization. +- Keep raw stream chunks out of persisted `provider_meta`. Count chunks by + default, and capture full raw chunks only for explicit provider dump output. +- `reasoning_effort` is sent only for models that support it (`glm-5.2` in the + static registry). Other Z.ai models still send `thinking: { type: "enabled" }` + and use provider defaults for effort. diff --git a/packages/core/src/llm/zai/chat.ts b/packages/core/src/llm/zai/chat.ts new file mode 100644 index 00000000..227712b9 --- /dev/null +++ b/packages/core/src/llm/zai/chat.ts @@ -0,0 +1,248 @@ +import { createHash } from "node:crypto"; +import { ZAI_DEFAULT_MODEL } from "../../models/zai"; +import type { ChatInvokeCompletion } from "../../types/llm"; +import type { + BaseChatModel, + ChatInvokeContext, + ChatInvokeInput, +} from "../base"; +import { + getProviderLogSettings, + safeJsonStringify, + sharedPrefixChars, + writeProviderLogDump, +} from "../provider-log"; +import { + toZaiChatInvokeCompletion, + toZaiMessages, + toZaiToolChoice, + toZaiTools, + type ZaiUsage, +} from "./serializer"; +import { + streamZaiChatCompletion, + type ZaiChatCompletionRequest, + type ZaiReasoningEffort, + type ZaiStreamTerminalResponse, +} from "./transport"; + +const PROVIDER_NAME = "zai" as const; +const DEFAULT_BASE_URL = "https://api.z.ai/api/paas/v4"; +const DEFAULT_REASONING_EFFORT = "high" as const; +const DEFAULT_REQUEST_TIMEOUT_MS = 20 * 60 * 1000; + +export type ZaiInvokeOptions = { + max_tokens?: number; + temperature?: number; + top_p?: number; + reasoningEffort?: ZaiReasoningEffort | null; + [key: string]: unknown; +}; + +export type ChatZaiOptions = { + apiKey?: string; + baseURL?: string; + fetch?: typeof fetch; + model?: string; + timeoutMs?: number | null; + reasoningEffort?: ZaiReasoningEffort | null; + reasoningLevelRequested?: "low" | "medium" | "high" | "xhigh"; + reasoningLevelApplied?: "high" | "xhigh"; + reasoningFallbackApplied?: boolean; +}; + +export class ChatZai + implements BaseChatModel +{ + readonly provider: typeof PROVIDER_NAME = PROVIDER_NAME; + readonly model: string; + private readonly apiKey?: string; + private readonly baseURL: string; + private readonly fetchImpl: typeof fetch; + private readonly timeoutMs: number | null; + private readonly defaultReasoningEffort: ZaiReasoningEffort | null; + private readonly reasoningLevelMeta: { + requested?: "low" | "medium" | "high" | "xhigh"; + applied?: "high" | "xhigh"; + fallbackApplied?: boolean; + }; + private debugInvokeSeq = 0; + private lastDebugRequestPayload: string | null = null; + + constructor(options: ChatZaiOptions = {}) { + this.apiKey = options.apiKey; + this.baseURL = (options.baseURL ?? DEFAULT_BASE_URL).replace(/\/+$/, ""); + this.fetchImpl = options.fetch ?? fetch; + this.model = options.model ?? ZAI_DEFAULT_MODEL; + this.timeoutMs = options.timeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS; + this.defaultReasoningEffort = + options.reasoningEffort === undefined + ? DEFAULT_REASONING_EFFORT + : options.reasoningEffort; + this.reasoningLevelMeta = { + requested: options.reasoningLevelRequested, + applied: options.reasoningLevelApplied, + fallbackApplied: options.reasoningFallbackApplied, + }; + } + + async ainvoke( + input: ChatInvokeInput & { options?: ZaiInvokeOptions }, + // Z.ai phase 1 has no confirmed prompt-cache/session routing hint. + _context?: ChatInvokeContext, + ): Promise { + const apiKey = this.apiKey; + if (!apiKey) { + throw new Error("Z.ai API key is required"); + } + const { + messages, + tools: toolDefs, + toolChoice, + options, + model, + signal, + } = input; + const { reasoningEffort, ...rest } = options ?? {}; + const effectiveReasoningEffort = + reasoningEffort === undefined + ? this.defaultReasoningEffort + : reasoningEffort; + const tools = toZaiTools(toolDefs); + const toolChoiceParam = toZaiToolChoice(toolChoice); + const request: ZaiChatCompletionRequest = { + model: model ?? this.model, + messages: toZaiMessages(messages), + stream: true, + thinking: { type: "enabled" }, + ...rest, + ...(effectiveReasoningEffort + ? { reasoning_effort: effectiveReasoningEffort } + : {}), + ...(tools ? { tools } : {}), + ...(tools ? { tool_stream: true } : {}), + ...(toolChoiceParam ? { tool_choice: toolChoiceParam } : {}), + }; + const debugSeq = this.nextDebugInvokeSeq(); + await this.debugRequestIfEnabled(request, debugSeq); + const terminal = await this.streamRequest(apiKey, request, signal); + await this.debugResponseIfEnabled(terminal, debugSeq); + return toZaiChatInvokeCompletion(terminal.accumulated, { + request_id: terminal.request_id ?? null, + reasoning_requested: this.reasoningLevelMeta.requested, + reasoning_applied: this.reasoningLevelMeta.applied, + reasoning_effort: request.reasoning_effort, + reasoning_fallback: this.reasoningLevelMeta.fallbackApplied, + }); + } + + private async streamRequest( + apiKey: string, + request: ZaiChatCompletionRequest, + signal?: AbortSignal, + ): Promise { + const logSettings = getProviderLogSettings(); + return streamZaiChatCompletion({ + apiKey, + baseURL: this.baseURL, + fetchImpl: this.fetchImpl, + request, + signal, + timeoutMs: this.timeoutMs, + captureRawChunks: Boolean(logSettings.dumpDir), + }); + } + + private nextDebugInvokeSeq(): number { + const seq = this.debugInvokeSeq + 1; + this.debugInvokeSeq = seq; + return seq; + } + + private async debugRequestIfEnabled( + request: ZaiChatCompletionRequest, + seq: number, + ): Promise { + const settings = getProviderLogSettings(); + if (!settings.enabled && !settings.dumpDir) { + return; + } + const payload = safeJsonStringify(request); + const hash = createHash("sha256") + .update(payload) + .digest("hex") + .slice(0, 16); + const toolsHash = createHash("sha256") + .update(safeJsonStringify(request.tools ?? [])) + .digest("hex") + .slice(0, 12); + const previous = this.lastDebugRequestPayload; + const shared = previous + ? sharedPrefixChars(previous, payload) + : payload.length; + const sharedRatio = payload.length + ? ((shared / payload.length) * 100).toFixed(1) + : "100.0"; + if (settings.enabled) { + console.error( + `[zai.request] seq=${seq} bytes=${payload.length} sha256_16=${hash} shared_prefix=${shared} shared_ratio=${sharedRatio}% tools_sha=${toolsHash}`, + ); + } + if (settings.dumpDir) { + try { + await writeProviderLogDump( + settings, + PROVIDER_NAME, + seq, + "request", + request, + ); + } catch (error) { + if (settings.enabled) { + console.error( + `[zai.request] dump_failed seq=${seq} error=${String(error)}`, + ); + } + } + } + this.lastDebugRequestPayload = payload; + } + + private async debugResponseIfEnabled( + response: ZaiStreamTerminalResponse, + seq: number, + ): Promise { + const settings = getProviderLogSettings(); + if (!settings.enabled && !settings.dumpDir) { + return; + } + const usage = response.accumulated.usage; + if (settings.enabled) { + const inputTokens = usage?.prompt_tokens ?? 0; + const outputTokens = usage?.completion_tokens ?? 0; + console.error( + `[zai.response] seq=${seq} id=${response.accumulated.id ?? "unknown"} status=${response.status} chunks=${response.accumulated.rawChunkCount} finish=${String(response.accumulated.finishReason ?? "unknown")} tok_in=${inputTokens} tok_out=${outputTokens}`, + ); + } + if (settings.dumpDir) { + try { + await writeProviderLogDump( + settings, + PROVIDER_NAME, + seq, + "response", + response.accumulated, + ); + } catch (error) { + if (settings.enabled) { + console.error( + `[zai.response] dump_failed seq=${seq} error=${String(error)}`, + ); + } + } + } + } +} + +export type { ZaiChatCompletionChunk } from "./serializer"; +export type { ZaiReasoningEffort, ZaiUsage }; diff --git a/packages/core/src/llm/zai/serializer.ts b/packages/core/src/llm/zai/serializer.ts new file mode 100644 index 00000000..475263a9 --- /dev/null +++ b/packages/core/src/llm/zai/serializer.ts @@ -0,0 +1,376 @@ +import { stringifyContent } from "../../content/stringify"; +import type { + BaseMessage, + ChatInvokeCompletion, + ChatInvokeUsage, + ContentPart, + ToolCall, + ToolChoice, + ToolDefinition, +} from "../../types/llm"; +import { + isFunctionToolDefinition, + isHostedSearchToolDefinition, +} from "../../types/llm"; + +export type ZaiChatMessage = + | { + role: "system" | "user"; + content: string; + } + | { + role: "assistant"; + content?: string | null; + tool_calls?: ZaiToolCall[]; + } + | { + role: "tool"; + tool_call_id: string; + content: string; + }; + +export type ZaiTool = { + type: "function"; + function: { + name: string; + description: string; + parameters: unknown; + }; +}; + +export type ZaiToolChoice = + | "auto" + | "required" + | "none" + | { + type: "function"; + function: { name: string }; + }; + +export type ZaiToolCall = { + id: string; + type: "function"; + function: { + name: string; + arguments: string; + }; +}; + +export type ZaiUsage = { + prompt_tokens?: number; + completion_tokens?: number; + total_tokens?: number; + prompt_tokens_details?: { + cached_tokens?: number; + } | null; +}; + +export type ZaiChatCompletionChunk = { + id?: string; + model?: string; + created?: number; + choices?: Array<{ + index?: number; + delta?: { + content?: string | null; + reasoning_content?: string | null; + tool_calls?: Array<{ + index?: number; + id?: string; + type?: string; + function?: { + name?: string; + arguments?: string; + }; + }>; + }; + finish_reason?: string | null; + }>; + usage?: ZaiUsage | null; +}; + +type ZaiAccumulatedToolCall = { + index: number; + id: string; + type: "function"; + name: string; + arguments: string; + raw_chunk_count: number; +}; + +export type ZaiStreamAccumulator = { + id?: string; + model?: string; + created?: number; + content: string; + reasoningContent: string; + toolCalls: ZaiAccumulatedToolCall[]; + finishReason?: string | null; + usage?: ZaiUsage | null; + rawChunkCount: number; + rawChunks: ZaiChatCompletionChunk[]; + captureRawChunks: boolean; +}; + +const stringifyContentForZai = ( + content: string | ContentPart[] | null, +): string => stringifyContent(content, { mode: "display", joiner: "" }); + +export function toZaiMessages(messages: BaseMessage[]): ZaiChatMessage[] { + const mapped: ZaiChatMessage[] = []; + for (const message of messages) { + switch (message.role) { + case "system": + case "user": { + mapped.push({ + role: message.role, + content: stringifyContentForZai(message.content), + }); + break; + } + case "assistant": { + const content = stringifyContentForZai(message.content); + const toolCalls = toZaiReplayToolCalls(message.tool_calls); + mapped.push({ + role: "assistant", + content: content || null, + ...(toolCalls.length ? { tool_calls: toolCalls } : {}), + }); + break; + } + case "tool": { + mapped.push({ + role: "tool", + tool_call_id: message.tool_call_id, + content: stringifyContentForZai(message.content), + }); + break; + } + case "reasoning": + break; + } + } + return mapped; +} + +export function toZaiTools( + tools?: ToolDefinition[] | null, +): ZaiTool[] | undefined { + if (!tools || tools.length === 0) { + return undefined; + } + const mapped: ZaiTool[] = []; + for (const tool of tools) { + if (isFunctionToolDefinition(tool)) { + mapped.push({ + type: "function", + function: { + name: tool.name, + description: tool.description, + parameters: tool.parameters, + }, + }); + continue; + } + if (isHostedSearchToolDefinition(tool)) { + continue; + } + } + return mapped.length ? mapped : undefined; +} + +export function toZaiToolChoice( + choice?: ToolChoice | null, +): ZaiToolChoice | undefined { + if (!choice) { + return undefined; + } + if (choice === "auto" || choice === "required" || choice === "none") { + return choice; + } + return { type: "function", function: { name: choice } }; +} + +export const createZaiStreamAccumulator = (options?: { + captureRawChunks?: boolean; +}): ZaiStreamAccumulator => ({ + content: "", + reasoningContent: "", + toolCalls: [], + rawChunkCount: 0, + rawChunks: [], + captureRawChunks: options?.captureRawChunks ?? false, +}); + +export function appendZaiChatCompletionChunk( + accumulator: ZaiStreamAccumulator, + chunk: ZaiChatCompletionChunk, +): void { + accumulator.rawChunkCount += 1; + if (accumulator.captureRawChunks) { + accumulator.rawChunks.push(chunk); + } + accumulator.id ??= chunk.id; + accumulator.model ??= chunk.model; + accumulator.created ??= chunk.created; + accumulator.usage ??= chunk.usage ?? undefined; + + for (const choice of chunk.choices ?? []) { + if (choice.finish_reason !== undefined) { + accumulator.finishReason = choice.finish_reason; + } + const delta = choice.delta; + if (!delta) { + continue; + } + if (typeof delta.reasoning_content === "string") { + accumulator.reasoningContent += delta.reasoning_content; + } + if (typeof delta.content === "string") { + accumulator.content += delta.content; + } + for (const toolCallDelta of delta.tool_calls ?? []) { + const index = + typeof toolCallDelta.index === "number" + ? toolCallDelta.index + : accumulator.toolCalls.length; + let call = accumulator.toolCalls.find((entry) => entry.index === index); + if (!call) { + call = { + index, + id: toolCallDelta.id ?? `call_${index}`, + type: "function", + name: "", + arguments: "", + raw_chunk_count: 0, + }; + accumulator.toolCalls.push(call); + accumulator.toolCalls.sort((left, right) => left.index - right.index); + } + call.raw_chunk_count += 1; + if (typeof toolCallDelta.id === "string" && toolCallDelta.id) { + call.id = toolCallDelta.id; + } + if (toolCallDelta.type === "function") { + call.type = "function"; + } + if (typeof toolCallDelta.function?.name === "string") { + call.name = toolCallDelta.function.name; + } + if (typeof toolCallDelta.function?.arguments === "string") { + call.arguments += toolCallDelta.function.arguments; + } + } + } +} + +export function toZaiChatInvokeCompletion( + accumulator: ZaiStreamAccumulator, + meta?: { + reasoning_requested?: "low" | "medium" | "high" | "xhigh"; + reasoning_applied?: "high" | "xhigh"; + reasoning_effort?: "high" | "max"; + reasoning_fallback?: boolean; + request_id?: string | null; + }, +): ChatInvokeCompletion { + const messages: BaseMessage[] = []; + if (accumulator.reasoningContent) { + messages.push({ + role: "reasoning", + content: accumulator.reasoningContent, + raw_item: { + provider: "zai", + field: "reasoning_content", + response_id: accumulator.id, + }, + }); + } + const toolCalls = accumulator.toolCalls + .filter((call) => call.name || call.arguments || call.id) + .map(toToolCall); + if (accumulator.content || toolCalls.length) { + messages.push({ + role: "assistant", + content: accumulator.content || null, + ...(toolCalls.length ? { tool_calls: toolCalls } : {}), + }); + } + const usage = normalizeZaiUsage(accumulator); + return { + messages, + usage, + stop_reason: accumulator.finishReason ?? null, + provider_meta: { + response_id: accumulator.id, + request_id: meta?.request_id ?? undefined, + finish_reason: accumulator.finishReason ?? null, + reasoning_requested: meta?.reasoning_requested, + reasoning_applied: meta?.reasoning_applied, + reasoning_effort: meta?.reasoning_effort, + reasoning_fallback: meta?.reasoning_fallback, + }, + }; +} + +const toZaiReplayToolCalls = ( + toolCalls: ToolCall[] | undefined, +): ZaiToolCall[] => { + if (!toolCalls?.length) { + return []; + } + return toolCalls.map((call) => ({ + id: call.id, + type: "function", + function: { + name: call.function.name, + arguments: call.function.arguments, + }, + })); +}; + +const toToolCall = (call: ZaiAccumulatedToolCall): ToolCall => ({ + id: call.id, + type: "function", + function: { + name: call.name, + arguments: call.arguments, + }, + provider_meta: { + provider: "zai", + index: call.index, + raw_chunk_count: call.raw_chunk_count, + }, +}); + +const normalizeNumber = (value: unknown): number => { + if (typeof value !== "number" || !Number.isFinite(value) || value < 0) { + return 0; + } + return Math.trunc(value); +}; + +const normalizeZaiUsage = ( + accumulator: ZaiStreamAccumulator, +): ChatInvokeUsage | null => { + const usage = accumulator.usage; + if (!usage) { + return null; + } + const inputTokens = normalizeNumber(usage.prompt_tokens); + const outputTokens = normalizeNumber(usage.completion_tokens); + const totalTokens = + typeof usage.total_tokens === "number" + ? normalizeNumber(usage.total_tokens) + : inputTokens + outputTokens; + const cachedTokens = normalizeNumber( + usage.prompt_tokens_details?.cached_tokens, + ); + return { + model: accumulator.model ?? "", + input_tokens: inputTokens, + ...(cachedTokens > 0 ? { input_cached_tokens: cachedTokens } : {}), + output_tokens: outputTokens, + total_tokens: totalTokens, + }; +}; diff --git a/packages/core/src/llm/zai/transport.ts b/packages/core/src/llm/zai/transport.ts new file mode 100644 index 00000000..e8259203 --- /dev/null +++ b/packages/core/src/llm/zai/transport.ts @@ -0,0 +1,221 @@ +import { safeJsonStringify } from "../provider-log"; +import { + appendZaiChatCompletionChunk, + createZaiStreamAccumulator, + type ZaiChatCompletionChunk, + type ZaiChatMessage, + type ZaiStreamAccumulator, + type ZaiTool, + type ZaiToolChoice, +} from "./serializer"; + +export type ZaiReasoningEffort = "high" | "max"; + +export type ZaiChatCompletionRequest = { + model: string; + messages: ZaiChatMessage[]; + stream: true; + tools?: ZaiTool[]; + tool_choice?: ZaiToolChoice; + tool_stream?: true; + thinking: { type: "enabled" }; + reasoning_effort?: ZaiReasoningEffort; + [key: string]: unknown; +}; + +export type ZaiStreamTerminalResponse = { + status: number; + request_id?: string | null; + accumulated: ZaiStreamAccumulator; +}; + +export type StreamZaiChatCompletionOptions = { + apiKey: string; + baseURL: string; + fetchImpl: typeof fetch; + request: ZaiChatCompletionRequest; + signal?: AbortSignal; + timeoutMs: number | null; + captureRawChunks?: boolean; +}; + +export const streamZaiChatCompletion = async ({ + apiKey, + baseURL, + fetchImpl, + request, + signal, + timeoutMs, + captureRawChunks, +}: StreamZaiChatCompletionOptions): Promise => { + const requestSignal = createZaiRequestSignal(signal, timeoutMs); + try { + const response = await fetchImpl(`${baseURL}/chat/completions`, { + method: "POST", + headers: { + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + }, + body: safeJsonStringify(request), + signal: requestSignal.signal, + }); + if (!response.ok) { + throw await toZaiHttpError(response); + } + const accumulated = createZaiStreamAccumulator({ + captureRawChunks: captureRawChunks ?? false, + }); + for await (const chunk of readZaiChatCompletionStream( + response, + requestSignal.signal, + )) { + appendZaiChatCompletionChunk(accumulated, chunk); + } + return { + status: response.status, + request_id: response.headers.get("x-request-id"), + accumulated, + }; + } finally { + requestSignal.cleanup(); + } +}; + +export async function* readZaiChatCompletionStream( + response: Response, + signal?: AbortSignal, +): AsyncIterable { + if (!response.body) { + throw new Error("Z.ai provider error: response body is empty"); + } + const reader = response.body.getReader(); + throwIfAborted(signal); + const cancelOnAbort = () => { + void reader.cancel(signal?.reason).catch(() => undefined); + }; + signal?.addEventListener("abort", cancelOnAbort, { once: true }); + const decoder = new TextDecoder(); + let buffer = ""; + try { + while (true) { + throwIfAborted(signal); + const { done, value } = await reader.read(); + throwIfAborted(signal); + if (done) { + break; + } + buffer += decoder.decode(value, { stream: true }); + let newlineIndex = buffer.indexOf("\n"); + while (newlineIndex >= 0) { + const line = buffer.slice(0, newlineIndex).replace(/\r$/, ""); + buffer = buffer.slice(newlineIndex + 1); + const chunk = parseSseDataLine(line); + if (chunk === "done") { + return; + } + if (chunk) { + yield chunk; + } + newlineIndex = buffer.indexOf("\n"); + } + } + buffer += decoder.decode(); + const trailing = buffer.trim(); + if (trailing) { + const chunk = parseSseDataLine(trailing); + if (chunk && chunk !== "done") { + yield chunk; + } + } + } finally { + signal?.removeEventListener("abort", cancelOnAbort); + reader.releaseLock(); + } +} + +export const createZaiRequestSignal = ( + signal: AbortSignal | undefined, + timeoutMs: number | null, +): { signal?: AbortSignal; cleanup: () => void } => { + if (!timeoutMs || timeoutMs <= 0) { + return { signal, cleanup: () => undefined }; + } + const controller = new AbortController(); + let timeout: ReturnType | undefined = setTimeout(() => { + controller.abort(new Error("Z.ai request timeout")); + }, timeoutMs); + const onAbort = () => { + controller.abort(signal?.reason); + }; + if (signal?.aborted) { + onAbort(); + } else { + signal?.addEventListener("abort", onAbort, { once: true }); + } + return { + signal: controller.signal, + cleanup: () => { + if (timeout) { + clearTimeout(timeout); + timeout = undefined; + } + signal?.removeEventListener("abort", onAbort); + }, + }; +}; + +const toZaiHttpError = async (response: Response): Promise => { + let body = ""; + try { + body = await response.text(); + } catch { + body = ""; + } + const snippet = body ? body.slice(0, 500) : "(empty)"; + const prefix = + response.status === 401 || response.status === 403 + ? "Z.ai auth/config error" + : response.status === 402 + ? "Z.ai credits/payment error" + : response.status === 408 || + response.status === 429 || + response.status >= 500 + ? "Z.ai transient/rate-limit error" + : "Z.ai provider error"; + return new Error(`${prefix} (${response.status}): ${snippet}`); +}; + +const throwIfAborted = (signal?: AbortSignal): void => { + if (!signal?.aborted) { + return; + } + const reason = signal.reason; + if (reason instanceof Error) { + throw reason; + } + throw new Error("Z.ai request aborted"); +}; + +const parseSseDataLine = ( + line: string, +): ZaiChatCompletionChunk | "done" | null => { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith(":")) { + return null; + } + if (!trimmed.startsWith("data:")) { + return null; + } + const data = trimmed.slice("data:".length).trim(); + if (!data) { + return null; + } + if (data === "[DONE]") { + return "done"; + } + try { + return JSON.parse(data) as ZaiChatCompletionChunk; + } catch { + throw new Error(`Malformed Z.ai stream chunk: ${data.slice(0, 300)}`); + } +}; diff --git a/packages/core/src/models/AGENTS.md b/packages/core/src/models/AGENTS.md index ac1dbe96..60981a70 100644 --- a/packages/core/src/models/AGENTS.md +++ b/packages/core/src/models/AGENTS.md @@ -11,6 +11,15 @@ The model list is a snapshot, so check the update date and review it regularly. - Use `supportsFast: true` only for model ids that support the provider-specific fast path. Runtime maps that flag per provider (for example OpenAI priority service tier, Anthropic fast mode) and leaves unsupported models disabled even when `model.fast` is configured. - A model is usable only when the effective spec has a positive context budget (`maxInputTokens` or `contextWindow`). If metadata can be missing for a new/latest model that should still work, put the required limits in the static `ModelSpec`. +## Z.ai models + +- Z.ai static models are ordered newest/highest-priority first so `model.list` + presents newer GLM variants above older ones. +- `glm-5.2` is available in the static Z.ai registry with 1M context and 131,072 max output tokens. +- `glm-5.1`, `glm-5`, `glm-5-turbo`, and `glm-4.7` are available in the static Z.ai registry with 200K context and 131,072 max output tokens. +- Only `glm-5.2` receives `reasoning_effort`; older Z.ai models keep `thinking` enabled but use provider defaults for effort. +- Keep Z.ai phase 1 model listing static unless a stable provider model-list endpoint and response shape are confirmed. + ## Anthropic Claude Opus 4.8 / 4.7 - `claude-opus-4-8` is available in the static Anthropic registry with 1M context, 128k max output tokens, and Anthropic fast mode support. diff --git a/packages/core/src/models/index.ts b/packages/core/src/models/index.ts index 17f35060..93a4e8a0 100644 --- a/packages/core/src/models/index.ts +++ b/packages/core/src/models/index.ts @@ -2,14 +2,17 @@ import { ANTHROPIC_MODELS } from "./anthropic"; import { GOOGLE_MODELS } from "./google"; import { OPENAI_MODELS } from "./openai"; import { createModelRegistry } from "./registry"; +import { ZAI_MODELS } from "./zai"; export * from "./anthropic"; export * from "./google"; export * from "./openai"; export * from "./registry"; +export * from "./zai"; export const DEFAULT_MODEL_REGISTRY = createModelRegistry([ ...OPENAI_MODELS, ...ANTHROPIC_MODELS, ...GOOGLE_MODELS, + ...ZAI_MODELS, ]); diff --git a/packages/core/src/models/registry.ts b/packages/core/src/models/registry.ts index 7379edd7..55d9fffb 100644 --- a/packages/core/src/models/registry.ts +++ b/packages/core/src/models/registry.ts @@ -29,6 +29,7 @@ export function createModelRegistry(specs: ModelSpec[]): ModelRegistry { anthropic: {}, openrouter: {}, google: {}, + zai: {}, }, }; @@ -125,6 +126,7 @@ function cloneAliases( anthropic: { ...aliasesByProvider.anthropic }, openrouter: { ...aliasesByProvider.openrouter }, google: { ...aliasesByProvider.google }, + zai: { ...aliasesByProvider.zai }, }; } @@ -142,7 +144,8 @@ export function applyModelMetadata( providerId !== "openai" && providerId !== "anthropic" && providerId !== "openrouter" && - providerId !== "google" + providerId !== "google" && + providerId !== "zai" ) { continue; } diff --git a/packages/core/src/models/zai.ts b/packages/core/src/models/zai.ts new file mode 100644 index 00000000..2da35b69 --- /dev/null +++ b/packages/core/src/models/zai.ts @@ -0,0 +1,47 @@ +import type { ModelSpec } from "./registry"; + +export const ZAI_DEFAULT_MODEL = "glm-5.2"; +export const ZAI_REASONING_EFFORT_MODELS = new Set(["glm-5.2"]); + +const zaiModel = ( + id: string, + limits: { + contextWindow: number; + maxOutputTokens: number; + aliases?: string[]; + }, +): ModelSpec => ({ + id, + provider: "zai", + aliases: limits.aliases, + contextWindow: limits.contextWindow, + maxInputTokens: limits.contextWindow, + maxOutputTokens: limits.maxOutputTokens, + supportsTools: true, + supportsReasoning: true, + supportsJsonSchema: true, +}); + +export const ZAI_MODELS: ModelSpec[] = [ + zaiModel(ZAI_DEFAULT_MODEL, { + aliases: ["default"], + contextWindow: 1_000_000, + maxOutputTokens: 131_072, + }), + zaiModel("glm-5.1", { + contextWindow: 200_000, + maxOutputTokens: 131_072, + }), + zaiModel("glm-5", { + contextWindow: 200_000, + maxOutputTokens: 131_072, + }), + zaiModel("glm-5-turbo", { + contextWindow: 200_000, + maxOutputTokens: 131_072, + }), + zaiModel("glm-4.7", { + contextWindow: 200_000, + maxOutputTokens: 131_072, + }), +]; diff --git a/packages/core/src/services/compaction/service.ts b/packages/core/src/services/compaction/service.ts index edfbb2dd..0f401e26 100644 --- a/packages/core/src/services/compaction/service.ts +++ b/packages/core/src/services/compaction/service.ts @@ -504,7 +504,8 @@ const parseQualifiedModelId = ( providerRaw !== "openai" && providerRaw !== "anthropic" && providerRaw !== "openrouter" && - providerRaw !== "google" + providerRaw !== "google" && + providerRaw !== "zai" ) { return null; } diff --git a/packages/core/src/services/tool-output-cache/service.ts b/packages/core/src/services/tool-output-cache/service.ts index 154a760a..2f7813cf 100644 --- a/packages/core/src/services/tool-output-cache/service.ts +++ b/packages/core/src/services/tool-output-cache/service.ts @@ -238,7 +238,8 @@ const parseQualifiedModelId = ( providerRaw !== "openai" && providerRaw !== "anthropic" && providerRaw !== "openrouter" && - providerRaw !== "google" + providerRaw !== "google" && + providerRaw !== "zai" ) { return null; } diff --git a/packages/core/tests/tool-output-cache.test.ts b/packages/core/tests/tool-output-cache.test.ts index 6f31d249..13e68f0c 100644 --- a/packages/core/tests/tool-output-cache.test.ts +++ b/packages/core/tests/tool-output-cache.test.ts @@ -33,6 +33,7 @@ describe("ToolOutputCacheService", () => { anthropic: {}, openrouter: {}, google: {}, + zai: {}, }, }, store: createStore(), @@ -76,6 +77,7 @@ describe("ToolOutputCacheService", () => { anthropic: {}, openrouter: {}, google: {}, + zai: {}, }, }, store: createStore(), @@ -106,6 +108,7 @@ describe("ToolOutputCacheService", () => { anthropic: {}, openrouter: {}, google: {}, + zai: {}, }, }, store: createStore(), @@ -140,6 +143,7 @@ describe("ToolOutputCacheService", () => { anthropic: {}, openrouter: {}, google: {}, + zai: {}, }, }, store: createStore(), diff --git a/packages/core/tests/zai-chat.test.ts b/packages/core/tests/zai-chat.test.ts new file mode 100644 index 00000000..d146005b --- /dev/null +++ b/packages/core/tests/zai-chat.test.ts @@ -0,0 +1,313 @@ +import { afterEach, describe, expect, test } from "bun:test"; +import { promises as fs } from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { ChatZai } from "../src/llm/zai/chat"; + +type FetchCall = { + input: Parameters[0]; + init?: Parameters[1]; +}; + +const sse = (items: unknown[]): string => + `${items.map((item) => `data: ${JSON.stringify(item)}\n\n`).join("")}data: [DONE]\n\n`; + +const buildMockFetch = ( + handler: ( + input: Parameters[0], + init?: Parameters[1], + ) => Promise, +): typeof fetch => Object.assign(handler, { preconnect: fetch.preconnect }); + +describe("ChatZai", () => { + const envSnapshot = new Map(); + const setEnv = (key: string, value: string) => { + if (!envSnapshot.has(key)) { + envSnapshot.set(key, process.env[key]); + } + process.env[key] = value; + }; + + afterEach(() => { + for (const [key, value] of envSnapshot) { + if (value === undefined) { + delete process.env[key]; + } else { + process.env[key] = value; + } + } + envSnapshot.clear(); + }); + + test("uses provider=zai and streams chat completions", async () => { + const calls: FetchCall[] = []; + const fetchImpl = buildMockFetch(async (input, init) => { + calls.push({ input, init }); + return new Response( + sse([ + { + id: "chatcmpl_zai_1", + model: "glm-5.2", + choices: [{ delta: { reasoning_content: "plan " } }], + }, + { + id: "chatcmpl_zai_1", + model: "glm-5.2", + choices: [{ delta: { content: "hello" }, finish_reason: "stop" }], + usage: { + prompt_tokens: 8, + completion_tokens: 2, + total_tokens: 10, + }, + }, + ]), + { + headers: { + "content-type": "text/event-stream", + "x-request-id": "req_zai_1", + }, + }, + ); + }); + const chat = new ChatZai({ + apiKey: "test-zai-key", + baseURL: "https://example.test/v4/", + fetch: fetchImpl, + model: "glm-5.2", + reasoningEffort: "max", + reasoningLevelRequested: "xhigh", + reasoningLevelApplied: "xhigh", + reasoningFallbackApplied: false, + }); + + const completion = await chat.ainvoke({ + messages: [{ role: "user", content: "say hello" }], + }); + + expect(chat.provider).toBe("zai"); + expect(calls).toHaveLength(1); + expect(String(calls[0]?.input)).toBe( + "https://example.test/v4/chat/completions", + ); + expect(calls[0]?.init?.headers).toMatchObject({ + Authorization: "Bearer test-zai-key", + "Content-Type": "application/json", + }); + const body = JSON.parse(String(calls[0]?.init?.body)) as Record< + string, + unknown + >; + expect(body).toMatchObject({ + model: "glm-5.2", + stream: true, + thinking: { type: "enabled" }, + reasoning_effort: "max", + messages: [{ role: "user", content: "say hello" }], + }); + expect(completion.messages).toEqual([ + expect.objectContaining({ role: "reasoning", content: "plan " }), + { role: "assistant", content: "hello" }, + ]); + expect(completion.usage).toEqual({ + model: "glm-5.2", + input_tokens: 8, + output_tokens: 2, + total_tokens: 10, + }); + expect(completion.provider_meta).toMatchObject({ + response_id: "chatcmpl_zai_1", + request_id: "req_zai_1", + reasoning_requested: "xhigh", + reasoning_applied: "xhigh", + reasoning_effort: "max", + reasoning_fallback: false, + }); + }); + + test("sends tool_stream when tools are present", async () => { + const calls: FetchCall[] = []; + const fetchImpl = buildMockFetch(async (input, init) => { + calls.push({ input, init }); + return new Response( + sse([ + { + id: "chatcmpl_zai_2", + model: "glm-5.2", + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + id: "call_1", + type: "function", + function: { + name: "sample_tool", + arguments: '{"value":"ok"}', + }, + }, + ], + }, + finish_reason: "tool_calls", + }, + ], + }, + ]), + ); + }); + const chat = new ChatZai({ + apiKey: "test-zai-key", + fetch: fetchImpl, + }); + + const completion = await chat.ainvoke({ + messages: [{ role: "user", content: "call tool" }], + tools: [ + { + name: "sample_tool", + description: "sample tool", + parameters: { + type: "object", + properties: { + value: { type: "string" }, + }, + }, + }, + ], + toolChoice: "required", + }); + + const body = JSON.parse(String(calls[0]?.init?.body)) as Record< + string, + unknown + >; + expect(body.tool_stream).toBe(true); + expect(body.tool_choice).toBe("required"); + expect(body.tools).toEqual([ + { + type: "function", + function: { + name: "sample_tool", + description: "sample tool", + parameters: { + type: "object", + properties: { + value: { type: "string" }, + }, + }, + }, + }, + ]); + expect(completion.messages).toEqual([ + { + role: "assistant", + content: null, + tool_calls: [ + { + id: "call_1", + type: "function", + function: { + name: "sample_tool", + arguments: '{"value":"ok"}', + }, + provider_meta: expect.objectContaining({ + provider: "zai", + index: 0, + }), + }, + ], + }, + ]); + }); + + test("omits reasoning_effort when disabled for non-GLM-5.2 models", async () => { + const calls: FetchCall[] = []; + const fetchImpl = buildMockFetch(async (input, init) => { + calls.push({ input, init }); + return new Response( + sse([ + { + id: "chatcmpl_zai_51", + model: "glm-5.1", + choices: [{ delta: { content: "ok" }, finish_reason: "stop" }], + }, + ]), + ); + }); + const chat = new ChatZai({ + apiKey: "test-zai-key", + fetch: fetchImpl, + model: "glm-5.1", + reasoningEffort: null, + }); + + await chat.ainvoke({ + messages: [{ role: "user", content: "hello" }], + }); + + const body = JSON.parse(String(calls[0]?.init?.body)) as Record< + string, + unknown + >; + expect(body).toMatchObject({ + model: "glm-5.1", + stream: true, + thinking: { type: "enabled" }, + }); + expect(body).not.toHaveProperty("reasoning_effort"); + }); + + test("provider diagnostics do not log API keys", async () => { + const dumpDir = await fs.mkdtemp( + path.join(os.tmpdir(), "codelia-zai-log-"), + ); + const stderrLines: string[] = []; + const originalError = console.error; + console.error = (...args: unknown[]) => { + stderrLines.push(args.map(String).join(" ")); + }; + setEnv("CODELIA_PROVIDER_LOG", "1"); + setEnv("CODELIA_PROVIDER_LOG_DIR", dumpDir); + const fetchImpl = buildMockFetch(async () => { + return new Response( + sse([ + { + id: "chatcmpl_zai_log", + model: "glm-5.2", + choices: [{ delta: { content: "ok" }, finish_reason: "stop" }], + }, + ]), + ); + }); + try { + const chat = new ChatZai({ + apiKey: "test-zai-secret-key", + fetch: fetchImpl, + }); + + await chat.ainvoke({ + messages: [{ role: "user", content: "hello" }], + }); + + const stderr = stderrLines.join("\n"); + expect(stderr).toContain("[zai.request]"); + expect(stderr).toContain("[zai.response]"); + expect(stderr).not.toContain("test-zai-secret-key"); + const files = await fs.readdir(dumpDir); + expect(files.some((file) => file.includes("_zai_1_request.json"))).toBe( + true, + ); + const dumpText = ( + await Promise.all( + files.map((file) => fs.readFile(path.join(dumpDir, file), "utf8")), + ) + ).join("\n"); + expect(dumpText).toContain('"rawChunks"'); + expect(dumpText).toContain("chatcmpl_zai_log"); + expect(dumpText).not.toContain("test-zai-secret-key"); + } finally { + console.error = originalError; + await fs.rm(dumpDir, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/core/tests/zai-tools-serializer.test.ts b/packages/core/tests/zai-tools-serializer.test.ts new file mode 100644 index 00000000..7f863d4f --- /dev/null +++ b/packages/core/tests/zai-tools-serializer.test.ts @@ -0,0 +1,315 @@ +import { describe, expect, test } from "bun:test"; +import { + appendZaiChatCompletionChunk, + createZaiStreamAccumulator, + toZaiChatInvokeCompletion, + toZaiMessages, + toZaiToolChoice, + toZaiTools, +} from "../src/llm/zai/serializer"; +import type { BaseMessage, FunctionToolDefinition } from "../src/types/llm"; + +describe("zai serializer", () => { + const baseTool: FunctionToolDefinition = { + name: "sample_tool", + description: "sample tool", + parameters: { + type: "object", + properties: { + value: { type: "string" }, + }, + }, + }; + + test("maps function tools and ignores hosted search tools", () => { + const tools = toZaiTools([ + baseTool, + { + type: "hosted_search", + name: "web_search", + provider: "openai", + }, + ]); + expect(tools).toEqual([ + { + type: "function", + function: { + name: "sample_tool", + description: "sample tool", + parameters: baseTool.parameters, + }, + }, + ]); + }); + + test("maps tool choice values", () => { + expect(toZaiToolChoice("auto")).toBe("auto"); + expect(toZaiToolChoice("required")).toBe("required"); + expect(toZaiToolChoice("none")).toBe("none"); + expect(toZaiToolChoice("sample_tool")).toEqual({ + type: "function", + function: { name: "sample_tool" }, + }); + }); + + test("serializes assistant tool-call replay followed by tool result", () => { + const messages: BaseMessage[] = [ + { + role: "assistant", + content: "I will call it.", + tool_calls: [ + { + id: "call_1", + type: "function", + function: { + name: "sample_tool", + arguments: '{"value":"a"}', + }, + }, + ], + }, + { + role: "tool", + tool_call_id: "call_1", + tool_name: "sample_tool", + content: "done", + }, + ]; + + expect(toZaiMessages(messages)).toEqual([ + { + role: "assistant", + content: "I will call it.", + tool_calls: [ + { + id: "call_1", + type: "function", + function: { + name: "sample_tool", + arguments: '{"value":"a"}', + }, + }, + ], + }, + { + role: "tool", + tool_call_id: "call_1", + content: "done", + }, + ]); + }); + + test("normalizes reasoning, text, tool calls, and usage from stream chunks", () => { + const accumulator = createZaiStreamAccumulator(); + appendZaiChatCompletionChunk(accumulator, { + id: "chatcmpl_zai_1", + model: "glm-5.2", + choices: [ + { + index: 0, + delta: { + reasoning_content: "thinking", + content: "Answer: ", + tool_calls: [ + { + index: 0, + id: "call_zai_1", + type: "function", + function: { name: "sample_tool", arguments: '{"v"' }, + }, + ], + }, + }, + ], + }); + appendZaiChatCompletionChunk(accumulator, { + model: "glm-5.2", + choices: [ + { + index: 0, + delta: { + content: "ok", + tool_calls: [ + { + index: 0, + function: { arguments: ":1}" }, + }, + ], + }, + finish_reason: "tool_calls", + }, + ], + usage: { + prompt_tokens: 12, + completion_tokens: 5, + total_tokens: 17, + }, + }); + + expect( + toZaiChatInvokeCompletion(accumulator, { + reasoning_requested: "medium", + reasoning_applied: "high", + reasoning_effort: "high", + reasoning_fallback: true, + request_id: "req_1", + }), + ).toEqual({ + messages: [ + expect.objectContaining({ + role: "reasoning", + content: "thinking", + }), + { + role: "assistant", + content: "Answer: ok", + tool_calls: [ + { + id: "call_zai_1", + type: "function", + function: { + name: "sample_tool", + arguments: '{"v":1}', + }, + provider_meta: expect.objectContaining({ + provider: "zai", + index: 0, + raw_chunk_count: 2, + }), + }, + ], + }, + ], + usage: { + model: "glm-5.2", + input_tokens: 12, + output_tokens: 5, + total_tokens: 17, + }, + stop_reason: "tool_calls", + provider_meta: { + response_id: "chatcmpl_zai_1", + request_id: "req_1", + finish_reason: "tool_calls", + reasoning_requested: "medium", + reasoning_applied: "high", + reasoning_effort: "high", + reasoning_fallback: true, + }, + }); + expect(accumulator.rawChunkCount).toBe(2); + expect(accumulator.rawChunks).toEqual([]); + }); + + test("captures raw stream chunks only when requested for provider dumps", () => { + const defaultAccumulator = createZaiStreamAccumulator(); + const dumpAccumulator = createZaiStreamAccumulator({ + captureRawChunks: true, + }); + const chunk = { + id: "chatcmpl_zai_capture", + model: "glm-5.2", + choices: [{ index: 0, delta: { content: "ok" } }], + }; + + appendZaiChatCompletionChunk(defaultAccumulator, chunk); + appendZaiChatCompletionChunk(dumpAccumulator, chunk); + + expect(defaultAccumulator.rawChunkCount).toBe(1); + expect(defaultAccumulator.rawChunks).toEqual([]); + expect(dumpAccumulator.rawChunkCount).toBe(1); + expect(dumpAccumulator.rawChunks).toEqual([chunk]); + }); + + test("does not persist raw stream chunks in tool call provider metadata", () => { + const accumulator = createZaiStreamAccumulator(); + for (let index = 0; index < 10; index += 1) { + appendZaiChatCompletionChunk(accumulator, { + id: "chatcmpl_zai_chunks", + model: "glm-5.2", + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: 0, + id: "call_zai_chunks", + type: "function", + function: { + name: index === 0 ? "partial_name" : "final_name", + arguments: index === 0 ? "{" : '"ok":true}', + }, + }, + ], + }, + }, + ], + }); + } + + const completion = toZaiChatInvokeCompletion(accumulator); + const message = completion.messages.find( + (entry) => entry.role === "assistant", + ); + if (!message || message.role !== "assistant") { + throw new Error("expected assistant message"); + } + const toolCall = message.tool_calls?.[0]; + expect(toolCall?.function.name).toBe("final_name"); + expect(toolCall?.provider_meta).toEqual({ + provider: "zai", + index: 0, + raw_chunk_count: 10, + }); + expect(JSON.stringify(toolCall?.provider_meta)).not.toContain("tool_calls"); + }); + + test("maps prompt_tokens_details.cached_tokens to input_cached_tokens", () => { + const accumulator = createZaiStreamAccumulator(); + appendZaiChatCompletionChunk(accumulator, { + id: "chatcmpl_zai_cache", + model: "glm-5.2", + choices: [{ delta: { content: "ok" }, finish_reason: "stop" }], + usage: { + prompt_tokens: 100, + completion_tokens: 20, + total_tokens: 120, + prompt_tokens_details: { cached_tokens: 80 }, + }, + }); + + expect(accumulator.usage?.prompt_tokens_details?.cached_tokens).toBe(80); + const completion = toZaiChatInvokeCompletion(accumulator); + expect(completion.usage).toEqual({ + model: "glm-5.2", + input_tokens: 100, + input_cached_tokens: 80, + output_tokens: 20, + total_tokens: 120, + }); + }); + + test("omits input_cached_tokens when cached_tokens is zero or missing", () => { + const accumulator = createZaiStreamAccumulator(); + appendZaiChatCompletionChunk(accumulator, { + id: "chatcmpl_zai_nocache", + model: "glm-5.2", + choices: [{ delta: { content: "ok" }, finish_reason: "stop" }], + usage: { + prompt_tokens: 50, + completion_tokens: 10, + total_tokens: 60, + }, + }); + + const completion = toZaiChatInvokeCompletion(accumulator); + expect(completion.usage).toEqual({ + model: "glm-5.2", + input_tokens: 50, + output_tokens: 10, + total_tokens: 60, + }); + expect(completion.usage).not.toHaveProperty("input_cached_tokens"); + }); +}); diff --git a/packages/runtime/AGENTS.md b/packages/runtime/AGENTS.md index a7bb2f09..a497c5bd 100644 --- a/packages/runtime/AGENTS.md +++ b/packages/runtime/AGENTS.md @@ -25,10 +25,11 @@ tool definition guide (description/field describe): Get model metadata at startup, and if the selected model is not found, force refresh `models.dev` and recheck. If metadata is still missing but the model exists in `DEFAULT_MODEL_REGISTRY`, strict startup continues using default registry spec (strict error remains only for unknown models in both metadata and default registry). Static fallback only counts as usable when the effective model spec has a positive context budget (`maxInputTokens` or `contextWindow`); latest models that should run without fetched metadata must carry those limits in `DEFAULT_MODEL_REGISTRY`. The system prompt reads `packages/core/prompts/system.md` (can be overwritten with `CODELIA_SYSTEM_PROMPT_PATH`). -For model settings, read `model.*` of `config.json` and select openai/anthropic/openrouter. +For model settings, read `model.*` of `config.json` and select openai/anthropic/openrouter/zai. When a static registry entry uses `providerModelId` (for example a capped/full-context split of one provider model), runtime preserves the configured model id for context budgeting and UI, but resolves the provider model id for OpenAI request/metadata/reasoning handling. For Anthropic, runtime resolves `max_tokens` from model metadata limits (`max_output_tokens` -> `max_input_tokens` -> `context_window`) with static registry fallback and guarantees it stays above legacy extended `thinking.budget_tokens` when applicable. Claude Opus 4.7 uses adaptive thinking plus `output_config.effort` instead of extended thinking budgets. `model.provider=openrouter` composes core `ChatOpenRouter` (dedicated connector) instead of reusing `ChatOpenAI`. +`model.provider=zai` composes core `ChatZai`; auth uses `ZAI_API_KEY` or saved api-key auth, and `ZAI_BASE_URL` can override the default `https://api.z.ai/api/paas/v4`. When building runtime `modelRegistry` for OpenRouter, resolve the configured model id case-insensitively and register it dynamically with context/input/output limits from metadata so context-left/compaction can resolve dynamic OpenRouter models. OpenAI can override `text.verbosity` in `Responses API` with `model.verbosity` (low/medium/high). When OpenAI `experimental.openai.websocket_mode=auto` falls back from websocket to HTTP, runtime emits a visible warning once per run while continuing over HTTP. @@ -114,6 +115,7 @@ Launch for development: - OpenAI: `OPENAI_API_KEY=... bun packages/runtime/src/index.ts` - Anthropic: `ANTHROPIC_API_KEY=... bun packages/runtime/src/index.ts` - OpenRouter: `OPENROUTER_API_KEY=... bun packages/runtime/src/index.ts` +- Z.ai: `ZAI_API_KEY=... bun packages/runtime/src/index.ts` - If you want to log OpenAI OAuth HTTP 4xx/5xx: `CODELIA_DEBUG=1` - OpenRouter app headers (optional): `OPENROUTER_HTTP_REFERER` / `OPENROUTER_X_TITLE` - If you want to check the history snapshot after compaction in runtime log: `CODELIA_DEBUG=1` (output `compaction context snapshot ...`) diff --git a/packages/runtime/src/agent-factory.ts b/packages/runtime/src/agent-factory.ts index 5145190c..0384c848 100644 --- a/packages/runtime/src/agent-factory.ts +++ b/packages/runtime/src/agent-factory.ts @@ -6,11 +6,14 @@ import { ChatAnthropic, ChatOpenAI, ChatOpenRouter, + ChatZai, DEFAULT_MODEL_REGISTRY, type ModelEntry, OPENAI_DEFAULT_MODEL, resolveModel, resolveProviderModelId, + ZAI_DEFAULT_MODEL, + ZAI_REASONING_EFFORT_MODELS, } from "@codelia/core"; import { ModelMetadataServiceImpl } from "@codelia/model-metadata"; import { type ApprovalMode, parseApprovalMode } from "@codelia/shared-types"; @@ -52,6 +55,7 @@ import { resolveAnthropicMaxTokens, resolveAnthropicReasoning, resolveResponsesReasoning, + resolveZaiReasoning, } from "./model-reasoning"; import { buildModelRegistry } from "./model-registry"; import { resolveApprovalModeForRuntime } from "./permissions/approval-mode"; @@ -106,7 +110,7 @@ const isNativeSearchProvider = ( ): boolean => allowedProviders.includes(provider); const buildHostedSearchToolDefinitions = ( - provider: "openai" | "openrouter" | "anthropic", + provider: BaseChatModel["provider"], options: Awaited>, ): ToolDefinition[] => { if ( @@ -229,6 +233,16 @@ const buildOpenRouterClientOptions = ( }; }; +const buildZaiClientOptions = ( + auth: ProviderAuth, +): { apiKey: string; baseURL?: string } => { + const baseURL = readEnvValue("ZAI_BASE_URL"); + return { + apiKey: requireApiKeyAuth("Z.ai", auth), + ...(baseURL ? { baseURL } : {}), + }; +}; + const resolveModelMaxTokensFromEntry = ( entry: ModelEntry | null, ): number | null => { @@ -857,6 +871,31 @@ export const createAgentFactory = ( }); break; } + case "zai": { + const modelName = modelConfig.name ?? ZAI_DEFAULT_MODEL; + resolvedModelName = modelName; + const reasoning = resolveZaiReasoning({ + requested: requestedReasoning, + }); + const providerModelName = + resolveProviderModelId(DEFAULT_MODEL_REGISTRY, modelName, "zai") ?? + modelName; + const supportsReasoningEffort = + ZAI_REASONING_EFFORT_MODELS.has(providerModelName); + llm = new ChatZai({ + ...buildZaiClientOptions(providerAuth), + model: modelName, + reasoningEffort: supportsReasoningEffort ? reasoning.effort : null, + reasoningLevelRequested: reasoning.requested, + reasoningLevelApplied: supportsReasoningEffort + ? reasoning.applied + : undefined, + reasoningFallbackApplied: supportsReasoningEffort + ? reasoning.fallbackApplied + : undefined, + }); + break; + } default: throw new Error(`Unsupported model.provider: ${provider}`); } diff --git a/packages/runtime/src/auth/resolver.ts b/packages/runtime/src/auth/resolver.ts index ca2f2e71..69cc6e18 100644 --- a/packages/runtime/src/auth/resolver.ts +++ b/packages/runtime/src/auth/resolver.ts @@ -24,6 +24,7 @@ export const SUPPORTED_PROVIDERS = [ "openai", "anthropic", "openrouter", + "zai", ] as const; export type SupportedProvider = (typeof SUPPORTED_PROVIDERS)[number]; @@ -31,6 +32,7 @@ const API_KEY_ENV: Record = { openai: "OPENAI_API_KEY", anthropic: "ANTHROPIC_API_KEY", openrouter: "OPENROUTER_API_KEY", + zai: "ZAI_API_KEY", }; export class AuthResolver { @@ -101,7 +103,9 @@ export class AuthResolver { ? "OAuth (ChatGPT Plus/Pro) or API key" : provider === "openrouter" ? "API key (OpenRouter)" - : "API key", + : provider === "zai" + ? "API key (Z.ai)" + : "API key", })), multi: false, }); @@ -138,6 +142,8 @@ export class AuthResolver { return "Anthropic API key"; case "openrouter": return "OpenRouter API key"; + case "zai": + return "Z.ai API key"; default: return "API key"; } diff --git a/packages/runtime/src/model-reasoning.ts b/packages/runtime/src/model-reasoning.ts index 757428ef..93197ddb 100644 --- a/packages/runtime/src/model-reasoning.ts +++ b/packages/runtime/src/model-reasoning.ts @@ -27,6 +27,14 @@ export type AnthropicReasoningResolution = ReasoningResolution & { usedFallbackModelProfile: boolean; }; +export type ZaiReasoningResolution = { + requested: CanonicalReasoningLevel; + applied: "high" | "xhigh"; + effort: "high" | "max"; + fallbackApplied: boolean; + supportedLevels: readonly CanonicalReasoningLevel[]; +}; + type AnthropicReasoningModelProfile = { supportedLevels: readonly CanonicalReasoningLevel[]; budgetPresetByLevel: Partial< @@ -331,3 +339,28 @@ export const resolveAnthropicMaxTokens = ({ export const getAnthropicReasoningModelTableIds = (): string[] => Object.keys(ANTHROPIC_REASONING_MODEL_TABLE).sort(); + +export const resolveZaiReasoning = ({ + requested, +}: { + requested?: CanonicalReasoningLevel; +}): ZaiReasoningResolution => { + const normalizedRequested = normalizeRequestedReasoning(requested); + if (normalizedRequested === "xhigh") { + return { + requested: normalizedRequested, + applied: "xhigh", + effort: "max", + fallbackApplied: false, + supportedLevels: REASONING_LEVEL_ORDER, + }; + } + return { + requested: normalizedRequested, + applied: "high", + effort: "high", + fallbackApplied: + normalizedRequested === "low" || normalizedRequested === "medium", + supportedLevels: REASONING_LEVEL_ORDER, + }; +}; diff --git a/packages/runtime/src/model-registry.ts b/packages/runtime/src/model-registry.ts index d6b26dd6..1c383fdc 100644 --- a/packages/runtime/src/model-registry.ts +++ b/packages/runtime/src/model-registry.ts @@ -121,6 +121,7 @@ const withOpenRouterDynamicModel = ( anthropic: { ...registry.aliasesByProvider.anthropic }, openrouter: { ...registry.aliasesByProvider.openrouter }, google: { ...registry.aliasesByProvider.google }, + zai: { ...registry.aliasesByProvider.zai }, }, }; registerModels(next, [ diff --git a/packages/runtime/src/rpc/handlers.ts b/packages/runtime/src/rpc/handlers.ts index e512a6b0..64f98abc 100644 --- a/packages/runtime/src/rpc/handlers.ts +++ b/packages/runtime/src/rpc/handlers.ts @@ -182,7 +182,9 @@ export const createRuntimeHandlers = ({ ? "OAuth (ChatGPT Plus/Pro) or API key" : provider === "openrouter" ? "API key (OpenRouter)" - : "API key", + : provider === "zai" + ? "API key (Z.ai)" + : "API key", })), multi: false, }); diff --git a/packages/runtime/src/rpc/model.ts b/packages/runtime/src/rpc/model.ts index d11454b3..8182a3f7 100644 --- a/packages/runtime/src/rpc/model.ts +++ b/packages/runtime/src/rpc/model.ts @@ -42,7 +42,7 @@ export type ModelHandlersDeps = { sessionStateStore?: SessionStateStore; }; -type SupportedModelProvider = "openai" | "anthropic" | "openrouter"; +type SupportedModelProvider = "openai" | "anthropic" | "openrouter" | "zai"; type StaticModelProvider = Exclude; const isSupportedProvider = ( @@ -50,7 +50,8 @@ const isSupportedProvider = ( ): provider is SupportedModelProvider => provider === "openai" || provider === "anthropic" || - provider === "openrouter"; + provider === "openrouter" || + provider === "zai"; const resolveProviderModelEntry = ( providerEntries: Record | null, @@ -404,6 +405,7 @@ export const buildProviderModelList = async ({ anthropic: provider === "anthropic" ? (providerEntries ?? {}) : {}, openrouter: {}, google: {}, + zai: provider === "zai" ? (providerEntries ?? {}) : {}, }, }); const models = sortModelsByReleaseDate( @@ -415,7 +417,7 @@ export const buildProviderModelList = async ({ provider, providerEntries, ); - if (!includeDetails || !providerEntries) { + if (!includeDetails || (!providerEntries && provider !== "zai")) { return { models }; } const details: NonNullable = {}; @@ -642,7 +644,8 @@ export const createModelHandlers = ({ if ( provider !== "openai" && provider !== "anthropic" && - provider !== "openrouter" + provider !== "openrouter" && + provider !== "zai" ) { sendError(id, { code: RPC_ERROR_CODE.INVALID_PARAMS, diff --git a/packages/runtime/tests/agent-factory-zai.test.ts b/packages/runtime/tests/agent-factory-zai.test.ts new file mode 100644 index 00000000..2e0e1433 --- /dev/null +++ b/packages/runtime/tests/agent-factory-zai.test.ts @@ -0,0 +1,117 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { promises as fs } from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import type { Agent, BaseChatModel } from "@codelia/core"; +import { createAgentFactory } from "../src/agent-factory"; +import { RuntimeState } from "../src/runtime-state"; + +describe("createAgentFactory Z.ai", () => { + let tempRoot = ""; + const envSnapshot = new Map(); + let originalFetch: typeof fetch; + + const setEnv = (key: string, value: string) => { + if (!envSnapshot.has(key)) { + envSnapshot.set(key, process.env[key]); + } + process.env[key] = value; + }; + + beforeEach(async () => { + originalFetch = globalThis.fetch; + tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "codelia-zai-agent-")); + setEnv("CODELIA_LAYOUT", "xdg"); + setEnv("XDG_STATE_HOME", path.join(tempRoot, "state")); + setEnv("XDG_CACHE_HOME", path.join(tempRoot, "cache")); + setEnv("XDG_CONFIG_HOME", path.join(tempRoot, "config-home")); + setEnv("XDG_DATA_HOME", path.join(tempRoot, "data")); + setEnv("ZAI_API_KEY", "test-zai-key"); + setEnv("ZAI_BASE_URL", "https://example.test/zai/v4"); + const configPath = path.join(tempRoot, "config.json"); + setEnv("CODELIA_CONFIG_PATH", configPath); + await fs.writeFile( + configPath, + JSON.stringify( + { + version: 1, + model: { + provider: "zai", + name: "glm-5.2", + reasoning: "low", + }, + search: { + mode: "auto", + }, + execution_environment: { + startup_checks: { + enabled: false, + }, + }, + }, + null, + 2, + ), + "utf8", + ); + globalThis.fetch = Object.assign( + async (input: Parameters[0]) => { + const url = String(input); + if (url === "https://models.dev/api.json") { + return new Response( + JSON.stringify({ + openai: { + models: { + "gpt-test": { + id: "gpt-test", + name: "GPT Test", + }, + }, + }, + }), + { + status: 200, + headers: { "content-type": "application/json" }, + }, + ); + } + throw new Error(`unexpected fetch: ${url}`); + }, + { preconnect: originalFetch.preconnect }, + ) as typeof fetch; + }); + + afterEach(async () => { + globalThis.fetch = originalFetch; + for (const [key, value] of envSnapshot) { + if (value === undefined) { + delete process.env[key]; + } else { + process.env[key] = value; + } + } + envSnapshot.clear(); + await fs.rm(tempRoot, { recursive: true, force: true }); + }); + + test("constructs ChatZai and uses local search for search.mode=auto", async () => { + const projectDir = path.join(tempRoot, "project"); + await fs.mkdir(projectDir, { recursive: true }); + const state = new RuntimeState(); + state.runtimeWorkingDir = projectDir; + state.runtimeSandboxRoot = projectDir; + const getAgent = createAgentFactory(state); + + const agent = await getAgent(); + const llm = (agent as unknown as { llm: BaseChatModel }).llm; + + expect(llm.provider).toBe("zai"); + expect(llm.model).toBe("glm-5.2"); + expect(state.currentModelProvider).toBe("zai"); + expect(state.currentModelName).toBe("glm-5.2"); + expect(state.tools?.some((tool) => tool.name === "search")).toBe(true); + expect( + state.toolDefinitions?.some((tool) => tool.type === "hosted_search"), + ).toBe(false); + }); +}); diff --git a/packages/runtime/tests/auth-zai.test.ts b/packages/runtime/tests/auth-zai.test.ts new file mode 100644 index 00000000..1b4d0c1c --- /dev/null +++ b/packages/runtime/tests/auth-zai.test.ts @@ -0,0 +1,45 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { promises as fs } from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { AuthResolver, SUPPORTED_PROVIDERS } from "../src/auth/resolver"; +import { RuntimeState } from "../src/runtime-state"; + +describe("zai auth", () => { + let tempRoot = ""; + const envSnapshot = new Map(); + + const setEnv = (key: string, value: string) => { + envSnapshot.set(key, process.env[key]); + process.env[key] = value; + }; + + beforeEach(async () => { + tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "codelia-zai-auth-")); + setEnv("CODELIA_LAYOUT", "xdg"); + setEnv("XDG_STATE_HOME", path.join(tempRoot, "state")); + setEnv("XDG_CACHE_HOME", path.join(tempRoot, "cache")); + setEnv("XDG_CONFIG_HOME", path.join(tempRoot, "config")); + setEnv("XDG_DATA_HOME", path.join(tempRoot, "data")); + setEnv("ZAI_API_KEY", "test-zai-key"); + }); + + afterEach(async () => { + for (const [key, value] of envSnapshot) { + if (value === undefined) { + delete process.env[key]; + } else { + process.env[key] = value; + } + } + envSnapshot.clear(); + await fs.rm(tempRoot, { recursive: true, force: true }); + }); + + test("supports ZAI_API_KEY env auth", async () => { + expect(SUPPORTED_PROVIDERS).toContain("zai"); + const resolver = await AuthResolver.create(new RuntimeState(), () => {}); + const auth = await resolver.resolveProviderAuth("zai"); + expect(auth).toEqual({ method: "api_key", api_key: "test-zai-key" }); + }); +}); diff --git a/packages/runtime/tests/model-list-static.test.ts b/packages/runtime/tests/model-list-static.test.ts index a83923e1..fd2d499e 100644 --- a/packages/runtime/tests/model-list-static.test.ts +++ b/packages/runtime/tests/model-list-static.test.ts @@ -149,4 +149,43 @@ describe("model.list static providers", () => { expect(result.models).not.toContain("gpt-5.5-pro"); expect(result.models).not.toContain("gpt-5.3-codex"); }); + + test("omits details for non-Z.ai static providers when metadata is unavailable", async () => { + const result = await buildProviderModelList({ + provider: "openai", + includeDetails: true, + log: () => {}, + providerEntriesOverride: null, + }); + + expect(result.models).toContain("gpt-5.5"); + expect(result.details).toBeUndefined(); + }); + + test("lists static zai model details without dynamic metadata", async () => { + const result = await buildProviderModelList({ + provider: "zai", + includeDetails: true, + log: () => {}, + providerEntriesOverride: {}, + }); + + expect(result.models).toEqual([ + "glm-5.2", + "glm-5.1", + "glm-5", + "glm-5-turbo", + "glm-4.7", + ]); + expect(result.details?.["glm-5.2"]).toEqual({ + context_window: 1_000_000, + max_input_tokens: 1_000_000, + max_output_tokens: 131_072, + }); + expect(result.details?.["glm-5.1"]).toEqual({ + context_window: 200_000, + max_input_tokens: 200_000, + max_output_tokens: 131_072, + }); + }); }); diff --git a/packages/runtime/tests/model-reasoning.test.ts b/packages/runtime/tests/model-reasoning.test.ts index aa97fdb9..3ce470f5 100644 --- a/packages/runtime/tests/model-reasoning.test.ts +++ b/packages/runtime/tests/model-reasoning.test.ts @@ -5,6 +5,7 @@ import { resolveAnthropicMaxTokens, resolveAnthropicReasoning, resolveResponsesReasoning, + resolveZaiReasoning, } from "../src/model-reasoning"; describe("model reasoning mapping", () => { @@ -28,6 +29,33 @@ describe("model reasoning mapping", () => { expect(mapped.fallbackApplied).toBe(false); }); + test("maps zai reasoning to provider-supported effort values", () => { + expect(resolveZaiReasoning({ requested: "low" })).toMatchObject({ + requested: "low", + applied: "high", + effort: "high", + fallbackApplied: true, + }); + expect(resolveZaiReasoning({ requested: "medium" })).toMatchObject({ + requested: "medium", + applied: "high", + effort: "high", + fallbackApplied: true, + }); + expect(resolveZaiReasoning({ requested: "high" })).toMatchObject({ + requested: "high", + applied: "high", + effort: "high", + fallbackApplied: false, + }); + expect(resolveZaiReasoning({ requested: "xhigh" })).toMatchObject({ + requested: "xhigh", + applied: "xhigh", + effort: "max", + fallbackApplied: false, + }); + }); + test("maps anthropic known model reasoning to thinking budget preset", () => { const mapped = resolveAnthropicReasoning({ model: "claude-sonnet-4-5", diff --git a/packages/runtime/tests/model-registry.test.ts b/packages/runtime/tests/model-registry.test.ts index 909bbbf5..70d7e69c 100644 --- a/packages/runtime/tests/model-registry.test.ts +++ b/packages/runtime/tests/model-registry.test.ts @@ -64,6 +64,41 @@ describe("buildModelRegistry strict fallback", () => { expect(spec?.provider).toBe("openai"); }); + test("keeps static Z.ai GLM-5.2 limits when metadata is missing", async () => { + const registry = await buildModelRegistry(buildLlm("zai", "glm-5.2"), { + strict: true, + metadataService: buildMetadataService({ zai: {} }), + }); + + const spec = resolveModel(registry, "glm-5.2", "zai"); + expect(spec?.provider).toBe("zai"); + expect(spec?.contextWindow).toBe(1_000_000); + expect(spec?.maxInputTokens).toBe(1_000_000); + expect(spec?.maxOutputTokens).toBe(131_072); + }); + + test("keeps additional static Z.ai model limits when metadata is missing", async () => { + const registry = await buildModelRegistry(buildLlm("zai", "glm-5.1"), { + strict: true, + metadataService: buildMetadataService({ zai: {} }), + }); + + const spec = resolveModel(registry, "glm-5.1", "zai"); + expect(spec?.provider).toBe("zai"); + expect(spec?.contextWindow).toBe(200_000); + expect(spec?.maxInputTokens).toBe(200_000); + expect(spec?.maxOutputTokens).toBe(131_072); + }); + + test("throws in strict mode for unknown Z.ai models", async () => { + await expect( + buildModelRegistry(buildLlm("zai", "glm-next"), { + strict: true, + metadataService: buildMetadataService({ zai: {} }), + }), + ).rejects.toThrow("Usable model metadata not found for zai/glm-next"); + }); + test("keeps static GPT-5.4 small model limits when metadata is missing", async () => { const registry = await buildModelRegistry( buildLlm("openai", "gpt-5.4-mini"), diff --git a/packages/runtime/tests/model-state-sync.test.ts b/packages/runtime/tests/model-state-sync.test.ts index 8f235ee2..02a0d51b 100644 --- a/packages/runtime/tests/model-state-sync.test.ts +++ b/packages/runtime/tests/model-state-sync.test.ts @@ -232,6 +232,79 @@ describe("model state sync", () => { } }); + test("model.set accepts known Z.ai model and rejects unknown Z.ai model", async () => { + const env = await withTempEnv(); + const capture = createStdoutCapture(); + capture.start(); + try { + const state = new RuntimeState(); + state.lastUiContext = { + cwd: env.projectDir, + workspace_root: env.projectDir, + }; + state.runtimeWorkingDir = env.projectDir; + state.agent = {} as Agent; + const handlers = createRuntimeHandlers({ + state, + getAgent: async () => ({}) as Agent, + log: () => {}, + }); + + handlers.processMessage({ + jsonrpc: "2.0", + id: "model-set-zai-ok", + method: "model.set", + params: { + provider: "zai", + name: "glm-5.2", + }, + } satisfies RpcRequest); + const okResponse = await capture.waitForResponse("model-set-zai-ok"); + expect((okResponse as { error?: unknown }).error).toBeUndefined(); + expect(okResponse.result).toMatchObject({ + provider: "zai", + name: "glm-5.2", + }); + expect(state.currentModelProvider).toBe("zai"); + expect(state.currentModelName).toBe("glm-5.2"); + + handlers.processMessage({ + jsonrpc: "2.0", + id: "model-set-zai-51-ok", + method: "model.set", + params: { + provider: "zai", + name: "glm-5.1", + }, + } satisfies RpcRequest); + const ok51Response = await capture.waitForResponse("model-set-zai-51-ok"); + expect((ok51Response as { error?: unknown }).error).toBeUndefined(); + expect(ok51Response.result).toMatchObject({ + provider: "zai", + name: "glm-5.1", + }); + expect(state.currentModelProvider).toBe("zai"); + expect(state.currentModelName).toBe("glm-5.1"); + + handlers.processMessage({ + jsonrpc: "2.0", + id: "model-set-zai-bad", + method: "model.set", + params: { + provider: "zai", + name: "glm-next", + }, + } satisfies RpcRequest); + const badResponse = await capture.waitForResponse("model-set-zai-bad"); + expect( + (badResponse as { error?: { message?: string } }).error?.message, + ).toBe("unknown model: glm-next"); + } finally { + capture.stop(); + await env.cleanup(); + } + }); + test("model.set can switch models for the current session without writing config", async () => { const env = await withTempEnv(); const capture = createStdoutCapture();