From 7ac109fea0e9bda7a1803a06d2d4da933a61a2d0 Mon Sep 17 00:00:00 2001
From: Yusril Izza Aulia <izzadev@192.168.50.171>
Date: Sun, 24 May 2026 07:35:11 +0700
Subject: [PATCH] feat: add kimi coding plan provider

---
 backend/llm_client.py            | 75 +++++++++++++++++++++-----------
 backend/setup.py                 | 18 +++++++-
 routes/models.py                 |  2 +-
 unit_tests/test_kimi_provider.py | 39 +++++++++++++++++
 4 files changed, 107 insertions(+), 27 deletions(-)
 create mode 100644 unit_tests/test_kimi_provider.py

diff --git a/backend/llm_client.py b/backend/llm_client.py
index cfb11d8e..11927bd1 100644
--- a/backend/llm_client.py
+++ b/backend/llm_client.py
@@ -36,6 +36,27 @@
 }
 
 
+# Moonshot's Kimi Coding Plan endpoint (api.kimi.com/coding) gates access by
+# User-Agent — only requests claiming to be from kimi-cli, opencode, claude-code,
+# pi, or hermes-agent are accepted. Mirroring kimi-cli's UA is the documented
+# way authorized clients identify themselves; do not "fix" this back to a
+# generic UA without first confirming Moonshot has rotated the allow-list.
+_KIMI_CODING_HOST = "api.kimi.com"
+_KIMI_USER_AGENT = "KimiCLI/1.5"
+
+
+def _build_request_headers(
+    api_key: Optional[str], base_url: Optional[str]
+) -> Dict[str, str]:
+    """Build outbound HTTP headers for a chat / models request."""
+    headers = {"Content-Type": "application/json"}
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+    if base_url and _KIMI_CODING_HOST in base_url:
+        headers["User-Agent"] = _KIMI_USER_AGENT
+    return headers
+
+
 def _format_llm_error(error_type: str, context: Optional[Dict[str, Any]] = None) -> str:
     """Format an LLM error type into a user-friendly message.
 
@@ -109,7 +130,9 @@ def strip_thinking_tags(content: str) -> Tuple[str, Optional[str]]:
     return cleaned, thinking_content
 
 
-def _convert_image_url_to_claude(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+def _convert_image_url_to_claude(
+    messages: List[Dict[str, Any]],
+) -> List[Dict[str, Any]]:
     """Convert OpenAI-style image_url content blocks to Anthropic image+source format."""
     result = []
     for msg in messages:
@@ -128,15 +151,23 @@ def _convert_image_url_to_claude(messages: List[Dict[str, Any]]) -> List[Dict[st
                         media_type = header.split(":")[1].split(";")[0]
                     except (ValueError, IndexError):
                         media_type, b64data = "image/jpeg", url
-                    new_parts.append({
-                        "type": "image",
-                        "source": {"type": "base64", "media_type": media_type, "data": b64data},
-                    })
+                    new_parts.append(
+                        {
+                            "type": "image",
+                            "source": {
+                                "type": "base64",
+                                "media_type": media_type,
+                                "data": b64data,
+                            },
+                        }
+                    )
                 else:
-                    new_parts.append({
-                        "type": "image",
-                        "source": {"type": "url", "url": url},
-                    })
+                    new_parts.append(
+                        {
+                            "type": "image",
+                            "source": {"type": "url", "url": url},
+                        }
+                    )
             else:
                 new_parts.append(part)
         result.append({**msg, "content": new_parts})
@@ -249,9 +280,7 @@ def test_connection(self) -> Dict[str, Any]:
                 models_url = f"{self.base_url}/tags"
             else:
                 models_url = f"{self.base_url}/v1/models"
-            headers = {"Content-Type": "application/json"}
-            if self.api_key:
-                headers["Authorization"] = f"Bearer {self.api_key}"
+            headers = _build_request_headers(self.api_key, self.base_url)
             response = requests.get(models_url, headers=headers, timeout=10)
             if response.status_code == 200:
                 data = response.json()
@@ -384,22 +413,20 @@ def chat_completion(
             processed_messages = [merged] + processed_messages[n_sys:]
 
         # Handle reasoning_content field based on thinking mode:
-        # - Thinking ON (self.thinking=True, enable_thinking=True): add the field on every
-        #   assistant message — some APIs (e.g. DeepSeek) require it even when the model
-        #   returned no reasoning for that particular turn.
-        # - Thinking model but disabled for this call (self.thinking=True, enable_thinking=False):
-        #   keep existing reasoning_content so it is passed back correctly (required by
-        #   DeepSeek-R1 after tool calls), but do NOT add empty strings to messages that
-        #   have none — and do NOT add the thinking parameter to the payload below.
+        # - Thinking ON (self.thinking=True): add the field on every assistant message.
+        #   Always-thinking models (Kimi K2, DeepSeek-R1, MiniMax M2) require
+        #   reasoning_content to be present on ALL assistant messages, including those
+        #   with tool_calls, even when the model returned no reasoning for that turn.
+        #   This is done unconditionally for thinking models so the API never sees a
+        #   missing reasoning_content field after tool calls.
         # - Thinking OFF (self.thinking=False): other APIs reject the field entirely — strip it.
-        if self.thinking and enable_thinking:
+        if self.thinking:
             for _msg in processed_messages:
                 if _msg.get("role") == "assistant" and "reasoning_content" not in _msg:
                     _msg["reasoning_content"] = ""
-        elif not self.thinking:
+        else:
             for _msg in processed_messages:
                 _msg.pop("reasoning_content", None)
-        # else: self.thinking=True, enable_thinking=False → leave reasoning_content as-is
 
         # Claude API uses {"type":"image","source":{...}} instead of OpenAI's image_url format.
         if is_claude:
@@ -437,9 +464,7 @@ def chat_completion(
                 )
                 payload["thinking"] = {"type": "enabled", "budget_tokens": budget}
 
-        headers = {"Content-Type": "application/json"}
-        if self.api_key:
-            headers["Authorization"] = f"Bearer {self.api_key}"
+        headers = _build_request_headers(self.api_key, self.base_url)
 
         try:
             from models.db import db as _db
diff --git a/backend/setup.py b/backend/setup.py
index fce978fd..d610e4dd 100644
--- a/backend/setup.py
+++ b/backend/setup.py
@@ -73,6 +73,20 @@
         # prior assistant messages, so default thinking on for this provider.
         "default_thinking": True,
     },
+    "kimi_coding": {
+        "type": "remote",
+        "base_url": "https://api.kimi.com/coding/v1",
+        "api_key_required": True,
+        "placeholder_model": "kimi-for-coding",
+        "label": "Kimi Coding Plan (Moonshot)",
+        "description": "Cloud · sk-kimi-… key required",
+        # Moonshot's coding-plan endpoint blocks unauthorized clients via a
+        # User-Agent allow-list. backend/llm_client.py masquerades as kimi-cli
+        # for any base_url under api.kimi.com so requests pass the check.
+        # Kimi coding models are always-thinking; the API rejects requests that
+        # omit reasoning_content on prior assistant messages.
+        "default_thinking": True,
+    },
     "llama.cpp": {
         "type": "local",
         "base_url": "http://localhost:8080/v1",
@@ -432,7 +446,9 @@ def run_setup(
         _write_system_prompt(agent_id, system_prompt)
 
         # 4.5 Copy default knowledge base file
-        _default_kb = os.path.join(config.BASE_DIR, 'defaults', 'super_agent_kb_evonic.md')
+        _default_kb = os.path.join(
+            config.BASE_DIR, "defaults", "super_agent_kb_evonic.md"
+        )
         if os.path.isfile(_default_kb):
             _kb_dir = os.path.join(config.BASE_DIR, "agents", agent_id, "kb")
             os.makedirs(_kb_dir, exist_ok=True)
diff --git a/routes/models.py b/routes/models.py
index 298f630c..740db00b 100644
--- a/routes/models.py
+++ b/routes/models.py
@@ -63,7 +63,7 @@ def api_create_model():
         return jsonify({"success": False, "error": "type must be remote or local"}), 400
 
     # Validate provider
-    valid_providers = ("openrouter", "togetherai", "ollama", "ollama_cloud", "opencode_zen", "opencode_go", "llama.cpp", "custom")
+    valid_providers = ("openrouter", "togetherai", "ollama", "ollama_cloud", "opencode_zen", "opencode_go", "kimi_coding", "llama.cpp", "custom")
     if data["provider"] not in valid_providers:
         return jsonify(
             {"success": False, "error": f"provider must be one of {valid_providers}"}
diff --git a/unit_tests/test_kimi_provider.py b/unit_tests/test_kimi_provider.py
new file mode 100644
index 00000000..295ed406
--- /dev/null
+++ b/unit_tests/test_kimi_provider.py
@@ -0,0 +1,39 @@
+"""Tests for Kimi Coding Plan User-Agent masquerade in the LLM client."""
+
+from backend.llm_client import _build_request_headers
+
+
+def test_kimi_coding_endpoint_sets_kimicli_user_agent():
+    headers = _build_request_headers(
+        api_key="sk-kimi-abc",
+        base_url="https://api.kimi.com/coding/v1",
+    )
+    assert headers["User-Agent"] == "KimiCLI/1.5"
+    assert headers["Authorization"] == "Bearer sk-kimi-abc"
+    assert headers["Content-Type"] == "application/json"
+
+
+def test_non_kimi_endpoint_omits_user_agent():
+    headers = _build_request_headers(
+        api_key="sk-foo",
+        base_url="https://api.openai.com/v1",
+    )
+    assert "User-Agent" not in headers
+    assert headers["Authorization"] == "Bearer sk-foo"
+
+
+def test_no_api_key_omits_authorization():
+    headers = _build_request_headers(api_key=None, base_url="http://localhost:11434/v1")
+    assert "Authorization" not in headers
+    assert "User-Agent" not in headers
+
+
+def test_kimi_match_is_substring_so_subpaths_work():
+    # Anyone hosting at api.kimi.com/* gets the masquerade — including the
+    # /coding root path that the Anthropic-compat endpoint uses, in case we
+    # ever add that format.
+    headers = _build_request_headers(
+        api_key="sk-kimi-x",
+        base_url="https://api.kimi.com/coding",
+    )
+    assert headers["User-Agent"] == "KimiCLI/1.5"