diff --git a/backend/llm_client.py b/backend/llm_client.py index 1f237dc..17e40af 100644 --- a/backend/llm_client.py +++ b/backend/llm_client.py @@ -36,6 +36,27 @@ } +# Moonshot's Kimi Coding Plan endpoint (api.kimi.com/coding) gates access by +# User-Agent — only requests claiming to be from kimi-cli, opencode, claude-code, +# pi, or hermes-agent are accepted. Mirroring kimi-cli's UA is the documented +# way authorized clients identify themselves; do not "fix" this back to a +# generic UA without first confirming Moonshot has rotated the allow-list. +_KIMI_CODING_HOST = "api.kimi.com" +_KIMI_USER_AGENT = "KimiCLI/1.5" + + +def _build_request_headers( + api_key: Optional[str], base_url: Optional[str] +) -> Dict[str, str]: + """Build outbound HTTP headers for a chat / models request.""" + headers = {"Content-Type": "application/json"} + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + if base_url and _KIMI_CODING_HOST in base_url: + headers["User-Agent"] = _KIMI_USER_AGENT + return headers + + def _format_llm_error(error_type: str, context: Optional[Dict[str, Any]] = None) -> str: """Format an LLM error type into a user-friendly message. @@ -109,7 +130,9 @@ def strip_thinking_tags(content: str) -> Tuple[str, Optional[str]]: return cleaned, thinking_content -def _convert_image_url_to_claude(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: +def _convert_image_url_to_claude( + messages: List[Dict[str, Any]], +) -> List[Dict[str, Any]]: """Convert OpenAI-style image_url content blocks to Anthropic image+source format.""" result = [] for msg in messages: @@ -128,15 +151,23 @@ def _convert_image_url_to_claude(messages: List[Dict[str, Any]]) -> List[Dict[st media_type = header.split(":")[1].split(";")[0] except (ValueError, IndexError): media_type, b64data = "image/jpeg", url - new_parts.append({ - "type": "image", - "source": {"type": "base64", "media_type": media_type, "data": b64data}, - }) + new_parts.append( + { + "type": "image", + "source": { + "type": "base64", + "media_type": media_type, + "data": b64data, + }, + } + ) else: - new_parts.append({ - "type": "image", - "source": {"type": "url", "url": url}, - }) + new_parts.append( + { + "type": "image", + "source": {"type": "url", "url": url}, + } + ) else: new_parts.append(part) result.append({**msg, "content": new_parts}) @@ -249,9 +280,7 @@ def test_connection(self) -> Dict[str, Any]: models_url = f"{self.base_url}/tags" else: models_url = f"{self.base_url}/v1/models" - headers = {"Content-Type": "application/json"} - if self.api_key: - headers["Authorization"] = f"Bearer {self.api_key}" + headers = _build_request_headers(self.api_key, self.base_url) response = requests.get(models_url, headers=headers, timeout=10) if response.status_code == 200: data = response.json() @@ -388,6 +417,9 @@ def chat_completion( # even without explicit thinking mode. Detect this by checking if any # assistant message already carries reasoning_content — if so, preserve # it so the API receives it back on the next call. + # Always-thinking models (Kimi K2, DeepSeek-R1, MiniMax M2) require + # reasoning_content to be present on ALL assistant messages, including those + # with tool_calls, even when the model returned no reasoning for that turn. _has_reasoning = any( _msg.get("reasoning_content") for _msg in processed_messages @@ -441,9 +473,7 @@ def chat_completion( ) payload["thinking"] = {"type": "enabled", "budget_tokens": budget} - headers = {"Content-Type": "application/json"} - if self.api_key: - headers["Authorization"] = f"Bearer {self.api_key}" + headers = _build_request_headers(self.api_key, self.base_url) try: from models.db import db as _db diff --git a/backend/setup.py b/backend/setup.py index ad967e1..a71ae52 100644 --- a/backend/setup.py +++ b/backend/setup.py @@ -75,6 +75,20 @@ # prior assistant messages, so default thinking on for this provider. "default_thinking": True, }, + "kimi_coding": { + "type": "remote", + "base_url": "https://api.kimi.com/coding/v1", + "api_key_required": True, + "placeholder_model": "kimi-for-coding", + "label": "Kimi Coding Plan (Moonshot)", + "description": "Cloud · sk-kimi-… key required", + # Moonshot's coding-plan endpoint blocks unauthorized clients via a + # User-Agent allow-list. backend/llm_client.py masquerades as kimi-cli + # for any base_url under api.kimi.com so requests pass the check. + # Kimi coding models are always-thinking; the API rejects requests that + # omit reasoning_content on prior assistant messages. + "default_thinking": True, + }, "deepseek": { "type": "remote", "base_url": "https://api.deepseek.com", @@ -433,7 +447,9 @@ def run_setup( _write_system_prompt(agent_id, system_prompt) # 4.5 Copy default knowledge base files - _default_kb = os.path.join(config.BASE_DIR, 'defaults', 'super_agent_kb_evonic.md') + _default_kb = os.path.join( + config.BASE_DIR, "defaults", "super_agent_kb_evonic.md" + ) if os.path.isfile(_default_kb): _kb_dir = os.path.join(config.BASE_DIR, "agents", agent_id, "kb") os.makedirs(_kb_dir, exist_ok=True) diff --git a/routes/models.py b/routes/models.py index b763b6d..48143df 100644 --- a/routes/models.py +++ b/routes/models.py @@ -63,7 +63,7 @@ def api_create_model(): return jsonify({"success": False, "error": "type must be remote or local"}), 400 # Validate provider - valid_providers = ("openrouter", "togetherai", "ollama", "ollama_cloud", "opencode_zen", "opencode_go", "deepseek", "llama.cpp", "custom") + valid_providers = ("openrouter", "togetherai", "ollama", "ollama_cloud", "opencode_zen", "opencode_go", "kimi_coding", "deepseek", "llama.cpp", "custom") if data["provider"] not in valid_providers: return jsonify( {"success": False, "error": f"provider must be one of {valid_providers}"} diff --git a/unit_tests/test_kimi_provider.py b/unit_tests/test_kimi_provider.py new file mode 100644 index 0000000..295ed40 --- /dev/null +++ b/unit_tests/test_kimi_provider.py @@ -0,0 +1,39 @@ +"""Tests for Kimi Coding Plan User-Agent masquerade in the LLM client.""" + +from backend.llm_client import _build_request_headers + + +def test_kimi_coding_endpoint_sets_kimicli_user_agent(): + headers = _build_request_headers( + api_key="sk-kimi-abc", + base_url="https://api.kimi.com/coding/v1", + ) + assert headers["User-Agent"] == "KimiCLI/1.5" + assert headers["Authorization"] == "Bearer sk-kimi-abc" + assert headers["Content-Type"] == "application/json" + + +def test_non_kimi_endpoint_omits_user_agent(): + headers = _build_request_headers( + api_key="sk-foo", + base_url="https://api.openai.com/v1", + ) + assert "User-Agent" not in headers + assert headers["Authorization"] == "Bearer sk-foo" + + +def test_no_api_key_omits_authorization(): + headers = _build_request_headers(api_key=None, base_url="http://localhost:11434/v1") + assert "Authorization" not in headers + assert "User-Agent" not in headers + + +def test_kimi_match_is_substring_so_subpaths_work(): + # Anyone hosting at api.kimi.com/* gets the masquerade — including the + # /coding root path that the Anthropic-compat endpoint uses, in case we + # ever add that format. + headers = _build_request_headers( + api_key="sk-kimi-x", + base_url="https://api.kimi.com/coding", + ) + assert headers["User-Agent"] == "KimiCLI/1.5"