Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 45 additions & 15 deletions backend/llm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,27 @@
}


# Moonshot's Kimi Coding Plan endpoint (api.kimi.com/coding) gates access by
# User-Agent — only requests claiming to be from kimi-cli, opencode, claude-code,
# pi, or hermes-agent are accepted. Mirroring kimi-cli's UA is the documented
# way authorized clients identify themselves; do not "fix" this back to a
# generic UA without first confirming Moonshot has rotated the allow-list.
_KIMI_CODING_HOST = "api.kimi.com"
_KIMI_USER_AGENT = "KimiCLI/1.5"


def _build_request_headers(
api_key: Optional[str], base_url: Optional[str]
) -> Dict[str, str]:
"""Build outbound HTTP headers for a chat / models request."""
headers = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
if base_url and _KIMI_CODING_HOST in base_url:
headers["User-Agent"] = _KIMI_USER_AGENT
return headers


def _format_llm_error(error_type: str, context: Optional[Dict[str, Any]] = None) -> str:
"""Format an LLM error type into a user-friendly message.

Expand Down Expand Up @@ -109,7 +130,9 @@ def strip_thinking_tags(content: str) -> Tuple[str, Optional[str]]:
return cleaned, thinking_content


def _convert_image_url_to_claude(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
def _convert_image_url_to_claude(
messages: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
"""Convert OpenAI-style image_url content blocks to Anthropic image+source format."""
result = []
for msg in messages:
Expand All @@ -128,15 +151,23 @@ def _convert_image_url_to_claude(messages: List[Dict[str, Any]]) -> List[Dict[st
media_type = header.split(":")[1].split(";")[0]
except (ValueError, IndexError):
media_type, b64data = "image/jpeg", url
new_parts.append({
"type": "image",
"source": {"type": "base64", "media_type": media_type, "data": b64data},
})
new_parts.append(
{
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": b64data,
},
}
)
else:
new_parts.append({
"type": "image",
"source": {"type": "url", "url": url},
})
new_parts.append(
{
"type": "image",
"source": {"type": "url", "url": url},
}
)
else:
new_parts.append(part)
result.append({**msg, "content": new_parts})
Expand Down Expand Up @@ -249,9 +280,7 @@ def test_connection(self) -> Dict[str, Any]:
models_url = f"{self.base_url}/tags"
else:
models_url = f"{self.base_url}/v1/models"
headers = {"Content-Type": "application/json"}
if self.api_key:
headers["Authorization"] = f"Bearer {self.api_key}"
headers = _build_request_headers(self.api_key, self.base_url)
response = requests.get(models_url, headers=headers, timeout=10)
if response.status_code == 200:
data = response.json()
Expand Down Expand Up @@ -388,6 +417,9 @@ def chat_completion(
# even without explicit thinking mode. Detect this by checking if any
# assistant message already carries reasoning_content — if so, preserve
# it so the API receives it back on the next call.
# Always-thinking models (Kimi K2, DeepSeek-R1, MiniMax M2) require
# reasoning_content to be present on ALL assistant messages, including those
# with tool_calls, even when the model returned no reasoning for that turn.
_has_reasoning = any(
_msg.get("reasoning_content")
for _msg in processed_messages
Expand Down Expand Up @@ -441,9 +473,7 @@ def chat_completion(
)
payload["thinking"] = {"type": "enabled", "budget_tokens": budget}

headers = {"Content-Type": "application/json"}
if self.api_key:
headers["Authorization"] = f"Bearer {self.api_key}"
headers = _build_request_headers(self.api_key, self.base_url)

try:
from models.db import db as _db
Expand Down
18 changes: 17 additions & 1 deletion backend/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,20 @@
# prior assistant messages, so default thinking on for this provider.
"default_thinking": True,
},
"kimi_coding": {
"type": "remote",
"base_url": "https://api.kimi.com/coding/v1",
"api_key_required": True,
"placeholder_model": "kimi-for-coding",
"label": "Kimi Coding Plan (Moonshot)",
"description": "Cloud · sk-kimi-… key required",
# Moonshot's coding-plan endpoint blocks unauthorized clients via a
# User-Agent allow-list. backend/llm_client.py masquerades as kimi-cli
# for any base_url under api.kimi.com so requests pass the check.
# Kimi coding models are always-thinking; the API rejects requests that
# omit reasoning_content on prior assistant messages.
"default_thinking": True,
},
"deepseek": {
"type": "remote",
"base_url": "https://api.deepseek.com",
Expand Down Expand Up @@ -433,7 +447,9 @@ def run_setup(
_write_system_prompt(agent_id, system_prompt)

# 4.5 Copy default knowledge base files
_default_kb = os.path.join(config.BASE_DIR, 'defaults', 'super_agent_kb_evonic.md')
_default_kb = os.path.join(
config.BASE_DIR, "defaults", "super_agent_kb_evonic.md"
)
if os.path.isfile(_default_kb):
_kb_dir = os.path.join(config.BASE_DIR, "agents", agent_id, "kb")
os.makedirs(_kb_dir, exist_ok=True)
Expand Down
2 changes: 1 addition & 1 deletion routes/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def api_create_model():
return jsonify({"success": False, "error": "type must be remote or local"}), 400

# Validate provider
valid_providers = ("openrouter", "togetherai", "ollama", "ollama_cloud", "opencode_zen", "opencode_go", "deepseek", "llama.cpp", "custom")
valid_providers = ("openrouter", "togetherai", "ollama", "ollama_cloud", "opencode_zen", "opencode_go", "kimi_coding", "deepseek", "llama.cpp", "custom")
if data["provider"] not in valid_providers:
return jsonify(
{"success": False, "error": f"provider must be one of {valid_providers}"}
Expand Down
39 changes: 39 additions & 0 deletions unit_tests/test_kimi_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Tests for Kimi Coding Plan User-Agent masquerade in the LLM client."""

from backend.llm_client import _build_request_headers


def test_kimi_coding_endpoint_sets_kimicli_user_agent():
headers = _build_request_headers(
api_key="sk-kimi-abc",
base_url="https://api.kimi.com/coding/v1",
)
assert headers["User-Agent"] == "KimiCLI/1.5"
assert headers["Authorization"] == "Bearer sk-kimi-abc"
assert headers["Content-Type"] == "application/json"


def test_non_kimi_endpoint_omits_user_agent():
headers = _build_request_headers(
api_key="sk-foo",
base_url="https://api.openai.com/v1",
)
assert "User-Agent" not in headers
assert headers["Authorization"] == "Bearer sk-foo"


def test_no_api_key_omits_authorization():
headers = _build_request_headers(api_key=None, base_url="http://localhost:11434/v1")
assert "Authorization" not in headers
assert "User-Agent" not in headers


def test_kimi_match_is_substring_so_subpaths_work():
# Anyone hosting at api.kimi.com/* gets the masquerade — including the
# /coding root path that the Anthropic-compat endpoint uses, in case we
# ever add that format.
headers = _build_request_headers(
api_key="sk-kimi-x",
base_url="https://api.kimi.com/coding",
)
assert headers["User-Agent"] == "KimiCLI/1.5"
Loading