routatic · samueltuyizere · Jun 19, 2026 · Jun 16, 2026 · Jun 16, 2026 · Jun 16, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -20,14 +20,16 @@ Run a single test: `go test ./internal/router/ -v`
 
 **Purpose:** oc-go-cc is a proxy server that sits between Claude Code and OpenCode Go. It intercepts Anthropic API requests, transforms them to OpenAI Chat Completions format, forwards them to OpenCode Go, and transforms responses back to Anthropic SSE.
 
-**Model routing is config-driven, not code-driven.** Models are defined in `~/.config/oc-go-cc/config.json` — adding a new model does not require code changes (except for `IsAnthropicModel()` if the new model uses the Anthropic endpoint). The router in `internal/router/` selects models by matching request content against scenario patterns defined in `scenarios.go`.
+**Model routing is config-driven, not code-driven.** All models are defined in `~/.config/oc-go-cc/config.json` — adding a new model requires no code changes. Go provider models are transformed to OpenAI Chat Completions format automatically. Zen models use endpoint classification via `ClassifyEndpoint()`. The router in `internal/router/` selects models by matching request content against scenario patterns defined in `scenarios.go`.
+
+If a model's upstream doesn't support Anthropic tool format (`type: "custom"` server-tool shorthands), set `"anthropic_tools_disabled": true` in the model config to force it through the Chat Completions transform path instead of the raw Anthropic endpoint.
 
 **Two API endpoints:**
 
 - OpenAI endpoint (`/v1/chat/completions`) — used by most models (GLM, Kimi, MiMo, Qwen)
 - Anthropic endpoint (`/v1/messages`) — used only by MiniMax models
 
-`internal/client/opencode.go` routes by model ID via `IsAnthropicModel()`.
+`internal/client/opencode.go` routes Go provider models to Chat Completions; Zen models are classified by `ClassifyEndpoint()`. If a model's upstream doesn't support Anthropic tool format, set `anthropic_tools_disabled: true` in config.
 
 **Scenario detection priority** (`internal/router/scenarios.go`):
 
@@ -41,6 +43,8 @@ For streaming, the router downgrades to fast models (Qwen3.6 Plus) for better TT
 
 **Polymorphic field handling:** Anthropic's `system` and `content` fields accept both strings and arrays. `pkg/types/` uses `json.RawMessage` with accessor methods (`SystemText()`, `ContentBlocks()`) to handle both formats.
 
+**Long-running stream policy:** The proxy never kills a stream that is actively producing bytes. The server-level `WriteTimeout` is set to 0; instead each upstream read uses a per-`Read` deadline via `http.ResponseController.SetReadDeadline` that is renewed on every successful byte. If the gap between bytes exceeds `OpenCodeGo.stream_timeout_ms` (or `OpenCodeZen.stream_timeout_ms`), the connection is treated as stuck and the request is routed to the next fallback model. Defaults to `timeout_ms` when unset. Client disconnects during a stream are logged at `Debug` level — this is normal during Claude Code tool execution and is not a failure signal.
+
 ## Key Files
 
 - `cmd/oc-go-cc/main.go` — CLI entry point (cobra). Default config template is generated here.

diff --git a/cmd/oc-go-cc/main.go b/cmd/oc-go-cc/main.go
@@ -538,7 +538,7 @@ func getDefaultConfig() string {
   "port": 3456,
   "hot_reload": false,
   "enable_streaming_scenario_routing": false,
-  "respect_requested_model": false,
+  "respect_requested_model": true,
   "models": {
     "background": {
       "provider": "opencode-go",

diff --git a/internal/client/opencode.go b/internal/client/opencode.go
@@ -42,12 +42,6 @@ func (c *OpenCodeClient) nextAPIKey(keys []string) string {
 
 // NewOpenCodeClient creates a new OpenCode client.
 func NewOpenCodeClient(atomic *config.AtomicConfig) *OpenCodeClient {
-	cfg := atomic.Get()
-	timeout := time.Duration(cfg.OpenCodeGo.TimeoutMs) * time.Millisecond
-	if timeout == 0 {
-		timeout = 5 * time.Minute
-	}
-
 	transport := &http.Transport{
 		MaxIdleConns:        100,
 		MaxIdleConnsPerHost: 20,
@@ -60,21 +54,51 @@ func NewOpenCodeClient(atomic *config.AtomicConfig) *OpenCodeClient {
 	return &OpenCodeClient{
 		atomic: atomic,
 		httpClient: &http.Client{
-			Timeout:   timeout,
 			Transport: transport,
 		},
 	}
 }
 
+// StreamIdleTimeout returns the maximum gap between bytes on an active stream
+// for a model. The stream lives as long as data keeps flowing; only an idle
+// period longer than this value is treated as a stuck connection and aborted.
+// Go provider models use OpenCodeGo.StreamTimeoutMs; Zen models use
+// OpenCodeZen.StreamTimeoutMs. Falls back to 5 minutes if the config is
+// unavailable or the value is zero.
+func (c *OpenCodeClient) StreamIdleTimeout(modelConfig config.ModelConfig) time.Duration {
+	const fallback = 5 * time.Minute
+	if c == nil || c.atomic == nil {
+		return fallback
+	}
+	cfg := c.atomic.Get()
+	var ms int
+	if IsZen(modelConfig) {
+		ms = cfg.OpenCodeZen.StreamTimeoutMs
+	} else {
+		ms = cfg.OpenCodeGo.StreamTimeoutMs
+	}
+	if ms <= 0 {
+		ms = cfg.OpenCodeGo.TimeoutMs
+	}
+	if ms <= 0 {
+		return fallback
+	}
+	return time.Duration(ms) * time.Millisecond
+}
+
 // IsAnthropicModel returns true if the model requires the Anthropic endpoint.
-// This includes both Go models (minimax, all qwen) and Zen models (claude, qwen3.7-max).
+// Most Go provider models use the Chat Completions transform path for broader
+// compatibility (tool format, message roles, etc.). Exceptions are models whose
+// upstream backends don't support the OpenAI Chat Completions format and only
+// accept Anthropic Messages format.
+//
+// Only Zen models use the raw Anthropic endpoint via ClassifyEndpoint.
 func IsAnthropicModel(modelID string) bool {
 	switch modelID {
-	case "minimax-m2.5", "minimax-m2.7", "minimax-m3",
-		"qwen3.5-plus", "qwen3.6-plus", "qwen3.7-plus", "qwen3.7-max":
+	case "qwen3.7-max": // OpenCode Go backend doesn't support oa-compat for this model
 		return true
 	default:
-		return isZenAnthropicModel(modelID)
+		return false
 	}
 }
 

diff --git a/internal/client/opencode_test.go b/internal/client/opencode_test.go
@@ -2,6 +2,7 @@ package client
 
 import (
 	"testing"
+	"time"
 
 	"oc-go-cc/internal/config"
 )
@@ -13,19 +14,19 @@ func TestIsAnthropicModelOnlyRoutesNativeAnthropicModels(t *testing.T) {
 		want    bool
 	}{
 		{
-			name:    "minimax m2.5 uses anthropic endpoint",
+			name:    "minimax m2.5 uses openai endpoint on Go provider",
 			modelID: "minimax-m2.5",
-			want:    true,
+			want:    false,
 		},
 		{
-			name:    "minimax m2.7 uses anthropic endpoint",
+			name:    "minimax m2.7 uses openai endpoint on Go provider",
 			modelID: "minimax-m2.7",
-			want:    true,
+			want:    false,
 		},
 		{
-			name:    "minimax m3 uses anthropic endpoint",
+			name:    "minimax m3 uses openai endpoint on Go provider",
 			modelID: "minimax-m3",
-			want:    true,
+			want:    false,
 		},
 		{
 			name:    "deepseek pro uses openai endpoint",
@@ -63,44 +64,44 @@ func TestIsAnthropicModelOnlyRoutesNativeAnthropicModels(t *testing.T) {
 			want:    false,
 		},
 		{
-			name:    "qwen3.5-plus uses anthropic endpoint",
+			name:    "qwen3.5-plus uses openai endpoint on Go provider",
 			modelID: "qwen3.5-plus",
-			want:    true,
+			want:    false,
 		},
 		{
-			name:    "qwen3.6-plus uses anthropic endpoint",
+			name:    "qwen3.6-plus uses openai endpoint on Go provider",
 			modelID: "qwen3.6-plus",
-			want:    true,
+			want:    false,
 		},
 		{
-			name:    "qwen3.7-plus uses anthropic endpoint",
+			name:    "qwen3.7-plus uses openai endpoint on Go provider",
 			modelID: "qwen3.7-plus",
-			want:    true,
+			want:    false,
 		},
 		{
-			name:    "qwen3.7-max uses anthropic endpoint",
+			name:    "qwen3.7-max uses anthropic endpoint (no oa-compat support)",
 			modelID: "qwen3.7-max",
 			want:    true,
 		},
 		{
-			name:    "claude-sonnet-4-5 uses anthropic endpoint",
+			name:    "claude models use openai endpoint on Go provider",
 			modelID: "claude-sonnet-4-5",
-			want:    true,
+			want:    false,
 		},
 		{
-			name:    "claude-opus-4-7 uses anthropic endpoint",
+			name:    "claude-opus-4-7 uses openai endpoint on Go provider",
 			modelID: "claude-opus-4-7",
-			want:    true,
+			want:    false,
 		},
 		{
-			name:    "claude-haiku-4-5 uses anthropic endpoint",
+			name:    "claude-haiku-4-5 uses openai endpoint on Go provider",
 			modelID: "claude-haiku-4-5",
-			want:    true,
+			want:    false,
 		},
 		{
-			name:    "claude-3-5-haiku uses anthropic endpoint",
+			name:    "claude-3-5-haiku uses openai endpoint on Go provider",
 			modelID: "claude-3-5-haiku",
-			want:    true,
+			want:    false,
 		},
 	}
 
@@ -461,3 +462,52 @@ func TestNextAPIKey_ConcurrentSafety(t *testing.T) {
 		}
 	}
 }
+
+func TestStreamIdleTimeout(t *testing.T) {
+	tests := []struct {
+		name     string
+		goMs     int
+		zenMs    int
+		provider string
+		wantDur  time.Duration
+	}{
+		{
+			name:     "Go provider uses OpenCodeGo.StreamTimeoutMs",
+			goMs:     120000, // 2 min
+			provider: "opencode-go",
+			wantDur:  120 * time.Second,
+		},
+		{
+			name:     "Zen provider uses OpenCodeZen.StreamTimeoutMs",
+			goMs:     100000,
+			zenMs:    600000, // 10 min
+			provider: "opencode-zen",
+			wantDur:  10 * time.Minute,
+		},
+		{
+			name:     "falls back to OpenCodeGo.TimeoutMs when StreamTimeoutMs is zero",
+			goMs:     300000, // 5 min
+			provider: "opencode-go",
+			wantDur:  5 * time.Minute,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			cfg := &config.Config{
+				OpenCodeGo:  config.OpenCodeGoConfig{TimeoutMs: tt.goMs, StreamTimeoutMs: tt.goMs},
+				OpenCodeZen: config.OpenCodeZenConfig{TimeoutMs: tt.zenMs, StreamTimeoutMs: tt.zenMs},
+			}
+			// Fallback test: zero out StreamTimeoutMs for that provider.
+			if tt.name == "falls back to OpenCodeGo.TimeoutMs when StreamTimeoutMs is zero" {
+				cfg.OpenCodeGo.StreamTimeoutMs = 0
+			}
+			atomic := config.NewAtomicConfig(cfg, "/tmp/test-config.json")
+			c := &OpenCodeClient{atomic: atomic}
+			mc := config.ModelConfig{Provider: tt.provider, ModelID: "test-model"}
+			got := c.StreamIdleTimeout(mc)
+			if got != tt.wantDur {
+				t.Errorf("StreamIdleTimeout() = %v, want %v", got, tt.wantDur)
+			}
+		})
+	}
+}
diff --git a/internal/config/config.go b/internal/config/config.go
@@ -11,7 +11,7 @@ type Config struct {
 	Port                           int                      `json:"port"`
 	HotReload                      bool                     `json:"hot_reload"`
 	EnableStreamingScenarioRouting bool                     `json:"enable_streaming_scenario_routing"`
-	RespectRequestedModel          bool                     `json:"respect_requested_model"`
+	RespectRequestedModel          *bool                    `json:"respect_requested_model,omitempty"`
 	Models                         map[string]ModelConfig   `json:"models"`
 	Fallbacks                      map[string][]ModelConfig `json:"fallbacks"`
 	ModelOverrides                 map[string]ModelConfig   `json:"model_overrides"`
@@ -22,21 +22,23 @@ type Config struct {
 
 // ModelConfig defines routing rules for a specific model.
 type ModelConfig struct {
-	Provider         string          `json:"provider"`
-	ModelID          string          `json:"model_id"`
-	Temperature      float64         `json:"temperature"`
-	MaxTokens        int             `json:"max_tokens"`
-	ContextThreshold int             `json:"context_threshold"`
-	ReasoningEffort  string          `json:"reasoning_effort"`
-	Thinking         json.RawMessage `json:"thinking,omitempty"`
-	Vision           bool            `json:"vision"`
+	Provider               string          `json:"provider"`
+	ModelID                string          `json:"model_id"`
+	Temperature            float64         `json:"temperature"`
+	MaxTokens              int             `json:"max_tokens"`
+	ContextThreshold       int             `json:"context_threshold"`
+	ReasoningEffort        string          `json:"reasoning_effort"`
+	Thinking               json.RawMessage `json:"thinking,omitempty"`
+	Vision                 bool            `json:"vision"`
+	AnthropicToolsDisabled bool            `json:"anthropic_tools_disabled"`
 }
 
 // OpenCodeGoConfig holds the upstream OpenCode Go API settings.
 type OpenCodeGoConfig struct {
 	BaseURL          string `json:"base_url"`
 	AnthropicBaseURL string `json:"anthropic_base_url"`
 	TimeoutMs        int    `json:"timeout_ms"`
+	StreamTimeoutMs  int    `json:"stream_timeout_ms"`
 }
 
 // OpenCodeZenConfig holds the upstream OpenCode Zen API settings.
@@ -46,6 +48,7 @@ type OpenCodeZenConfig struct {
 	ResponsesBaseURL string `json:"responses_base_url"`
 	GeminiBaseURL    string `json:"gemini_base_url"`
 	TimeoutMs        int    `json:"timeout_ms"`
+	StreamTimeoutMs  int    `json:"stream_timeout_ms"`
 }
 
 // LoggingConfig controls application logging behavior.

diff --git a/internal/config/loader.go b/internal/config/loader.go
@@ -146,6 +146,9 @@ func applyDefaults(cfg *Config) {
 	if cfg.OpenCodeGo.TimeoutMs == 0 {
 		cfg.OpenCodeGo.TimeoutMs = defaultTimeoutMs
 	}
+	if cfg.OpenCodeGo.StreamTimeoutMs == 0 {
+		cfg.OpenCodeGo.StreamTimeoutMs = cfg.OpenCodeGo.TimeoutMs
+	}
 	if cfg.OpenCodeZen.BaseURL == "" {
 		cfg.OpenCodeZen.BaseURL = defaultZenBaseURL
 	}
@@ -161,6 +164,9 @@ func applyDefaults(cfg *Config) {
 	if cfg.OpenCodeZen.TimeoutMs == 0 {
 		cfg.OpenCodeZen.TimeoutMs = defaultTimeoutMs
 	}
+	if cfg.OpenCodeZen.StreamTimeoutMs == 0 {
+		cfg.OpenCodeZen.StreamTimeoutMs = cfg.OpenCodeZen.TimeoutMs
+	}
 	if cfg.Logging.Level == "" {
 		cfg.Logging.Level = defaultLogLevel
 	}
@@ -185,6 +191,33 @@ func validate(cfg *Config) error {
 	if err := validateModelOverrides(cfg.ModelOverrides); err != nil {
 		return err
 	}
+
+	if err := validateAnthropicToolsDisabled(cfg); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// validateAnthropicToolsDisabled checks that models with anthropic_tools_disabled
+// set are configured correctly. This field only applies to models that route to
+// the Anthropic endpoint; enabling it on an OpenAI Chat Completions model has no
+// effect and likely indicates a misconfiguration.
+func validateAnthropicToolsDisabled(cfg *Config) error {
+	for key, mc := range cfg.Models {
+		if mc.AnthropicToolsDisabled {
+			// Models in cfg.Models are selectable by scenario routing. The flag
+			// is only meaningful on models that go through the Anthropic endpoint.
+			// Log a warning since the config system can't resolve the endpoint
+			// without the client package.
+			fmt.Fprintf(os.Stderr, "WARNING: config: models[%q] has anthropic_tools_disabled=true — this is only effective on models routing to the Anthropic endpoint\n", key)
+		}
+	}
+	for key, mc := range cfg.ModelOverrides {
+		if mc.AnthropicToolsDisabled {
+			fmt.Fprintf(os.Stderr, "WARNING: config: model_overrides[%q] has anthropic_tools_disabled=true — this is only effective on models routing to the Anthropic endpoint\n", key)
+		}
+	}
 	return nil
 }
 

diff --git a/internal/config/loader_test.go b/internal/config/loader_test.go
@@ -276,6 +276,10 @@ func TestDefaults(t *testing.T) {
 	if cfg.OpenCodeGo.TimeoutMs != defaultTimeoutMs {
 		t.Errorf("OpenCodeGo.TimeoutMs = %d, want %d", cfg.OpenCodeGo.TimeoutMs, defaultTimeoutMs)
 	}
+	if cfg.OpenCodeGo.StreamTimeoutMs != defaultTimeoutMs {
+		t.Errorf("OpenCodeGo.StreamTimeoutMs = %d, want %d (should default to TimeoutMs when unset)",
+			cfg.OpenCodeGo.StreamTimeoutMs, defaultTimeoutMs)
+	}
 	if cfg.OpenCodeZen.BaseURL != defaultZenBaseURL {
 		t.Errorf("OpenCodeZen.BaseURL = %q, want %q", cfg.OpenCodeZen.BaseURL, defaultZenBaseURL)
 	}
@@ -291,6 +295,10 @@ func TestDefaults(t *testing.T) {
 	if cfg.OpenCodeZen.TimeoutMs != defaultTimeoutMs {
 		t.Errorf("OpenCodeZen.TimeoutMs = %d, want %d", cfg.OpenCodeZen.TimeoutMs, defaultTimeoutMs)
 	}
+	if cfg.OpenCodeZen.StreamTimeoutMs != defaultTimeoutMs {
+		t.Errorf("OpenCodeZen.StreamTimeoutMs = %d, want %d (should default to TimeoutMs when unset)",
+			cfg.OpenCodeZen.StreamTimeoutMs, defaultTimeoutMs)
+	}
 	if cfg.Logging.Level != defaultLogLevel {
 		t.Errorf("LogLevel = %q, want %q", cfg.Logging.Level, defaultLogLevel)
 	}