routatic · hungcuong9125 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/CONFIGURATION.md b/CONFIGURATION.md
@@ -243,3 +243,49 @@ When a request arrives, the proxy selects a model chain using the following orde
 3. **Scenario routing** — fall back to the scenario chain (`default`, `background`, `think`, `complex`, `long_context`, `fast`).
 
 > **Trust model:** any client whose requests flow through the proxy can select from the configured `model_overrides` set without additional authentication. If you run the proxy as a shared service, treat `model_overrides` as a privileged allowlist.
+
+### Streaming Scenario Routing
+
+`enable_streaming_scenario_routing` controls whether streaming requests are evaluated by the full scenario router or routed directly to the `fast` scenario.
+
+> **Note for Claude Code `/review-code`, `/ultracode`, and multi-agent workflows**
+>
+> If you use Claude Code workflows that dispatch many subagents or produce many parallel tool calls, enable streaming scenario routing:
+>
+> ```json
+> {
+>   "enable_streaming_scenario_routing": true
+> }
+> ```
+>
+> Without this option, streaming requests are routed through the `fast` scenario even when the request is actually tool-heavy. This can route complex Claude Code workloads, such as `/review-code` with many `Agent` tool calls, to a fast model that may not handle parallel tool-call orchestration reliably.
+>
+> When enabled, streaming requests are evaluated by the same scenario router as non-streaming requests, allowing large or tool-heavy workloads to use `complex` or `long_context` models instead of always using the `fast` model.
+
+Recommended setup for Claude Code review workflows:
+
+```json
+{
+  "enable_streaming_scenario_routing": true,
+  "models": {
+    "fast": {
+      "provider": "opencode-go",
+      "model_id": "deepseek-v4-flash",
+      "max_tokens": 4096
+    },
+    "complex": {
+      "provider": "opencode-go",
+      "model_id": "minimax-m3",
+      "max_tokens": 8192
+    },
+    "long_context": {
+      "provider": "opencode-go",
+      "model_id": "minimax-m3",
+      "max_tokens": 16384,
+      "context_threshold": 80000
+    }
+  }
+}
+```
+
+Use the `fast` scenario for short/simple requests. Use `complex` or `long_context` for code review, multi-agent dispatch, large diffs, many tools, or long-context Claude Code sessions.
diff --git a/README.md b/README.md
@@ -13,6 +13,7 @@ OpenCode Go gives you access to powerful open coding models for **$5/month** (th
 - **Transparent Proxy** — Claude Code sends Anthropic-format requests, proxy transforms to OpenAI/Responses/Gemini format and back
 - **Dual Provider Support** — Route models through OpenCode Go or OpenCode Zen based on your needs
 - **Model Routing** — Automatically routes to different models based on context (default, thinking, long context, background)
+- **Streaming Scenario Routing** — Configurable routing for streaming requests; enables proper scenario selection for Claude Code multi-agent and review workflows (see [CONFIGURATION.md](CONFIGURATION.md#streaming-scenario-routing))
 - **Fallback Chains** — If a model fails, automatically tries the next one in your configured chain
 - **Circuit Breaker** — Tracks model health and skips failing models to avoid latency spikes
 - **Real-time Streaming** — Full SSE streaming with live format transformation

diff --git a/configs/config.example.json b/configs/config.example.json
@@ -185,15 +185,17 @@
   "opencode_go": {
     "base_url": "https://opencode.ai/zen/go/v1/chat/completions",
     "anthropic_base_url": "https://opencode.ai/zen/go/v1/messages",
-    "timeout_ms": 300000
+    "timeout_ms": 300000,
+    "streaming_timeout_ms": 600000
   },
 
   "opencode_zen": {
     "base_url": "https://opencode.ai/zen/v1/chat/completions",
     "anthropic_base_url": "https://opencode.ai/zen/v1/messages",
     "responses_base_url": "https://opencode.ai/zen/v1/responses",
     "gemini_base_url": "https://opencode.ai/zen/v1/models",
-    "timeout_ms": 300000
+    "timeout_ms": 300000,
+    "streaming_timeout_ms": 600000
   },
 
   "logging": {

diff --git a/internal/client/opencode.go b/internal/client/opencode.go
@@ -40,14 +40,8 @@ func (c *OpenCodeClient) nextAPIKey(keys []string) string {
 	return keys[(old-1)%n]
 }
 
-// NewOpenCodeClient creates a new OpenCode client.
+// NewOpenCodeClient creates a client that relies on request contexts for timeouts.
 func NewOpenCodeClient(atomic *config.AtomicConfig) *OpenCodeClient {
-	cfg := atomic.Get()
-	timeout := time.Duration(cfg.OpenCodeGo.TimeoutMs) * time.Millisecond
-	if timeout == 0 {
-		timeout = 5 * time.Minute
-	}
-
 	transport := &http.Transport{
 		MaxIdleConns:        100,
 		MaxIdleConnsPerHost: 20,
@@ -60,12 +54,48 @@ func NewOpenCodeClient(atomic *config.AtomicConfig) *OpenCodeClient {
 	return &OpenCodeClient{
 		atomic: atomic,
 		httpClient: &http.Client{
-			Timeout:   timeout,
+			Timeout:   0,
 			Transport: transport,
 		},
 	}
 }
 
+// RequestTimeout returns the provider timeout for a non-streaming attempt.
+func (c *OpenCodeClient) RequestTimeout(model config.ModelConfig) time.Duration {
+	cfg := c.atomic.Get()
+	var timeoutMs int
+	if IsZen(model) {
+		timeoutMs = cfg.OpenCodeZen.TimeoutMs
+	} else {
+		timeoutMs = cfg.OpenCodeGo.TimeoutMs
+	}
+	if timeoutMs > 0 {
+		return time.Duration(timeoutMs) * time.Millisecond
+	}
+	return 5 * time.Minute
+}
+
+// StreamingTimeout returns the provider timeout for a streaming attempt.
+func (c *OpenCodeClient) StreamingTimeout(model config.ModelConfig) time.Duration {
+	cfg := c.atomic.Get()
+	var timeoutMs int
+	if IsZen(model) {
+		timeoutMs = cfg.OpenCodeZen.StreamingTimeoutMs
+		if timeoutMs <= 0 {
+			timeoutMs = cfg.OpenCodeZen.TimeoutMs
+		}
+	} else {
+		timeoutMs = cfg.OpenCodeGo.StreamingTimeoutMs
+		if timeoutMs <= 0 {
+			timeoutMs = cfg.OpenCodeGo.TimeoutMs
+		}
+	}
+	if timeoutMs > 0 {
+		return time.Duration(timeoutMs) * time.Millisecond
+	}
+	return 5 * time.Minute
+}
+
 // IsAnthropicModel returns true if the model requires the Anthropic endpoint.
 // This includes both Go models (minimax, all qwen) and Zen models (claude, qwen3.7-max).
 func IsAnthropicModel(modelID string) bool {

diff --git a/internal/client/opencode_test.go b/internal/client/opencode_test.go
@@ -2,6 +2,7 @@ package client
 
 import (
 	"testing"
+	"time"
 
 	"oc-go-cc/internal/config"
 )
@@ -461,3 +462,136 @@ func TestNextAPIKey_ConcurrentSafety(t *testing.T) {
 		}
 	}
 }
+
+func TestRequestTimeout_UsesConfiguredTimeout(t *testing.T) {
+	cfg := &config.Config{
+		OpenCodeGo: config.OpenCodeGoConfig{
+			TimeoutMs: 120000,
+		},
+	}
+	atomicCfg := config.NewAtomicConfig(cfg, "")
+	c := NewOpenCodeClient(atomicCfg)
+
+	model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
+	timeout := c.RequestTimeout(model)
+	if timeout != 120*time.Second {
+		t.Errorf("RequestTimeout = %v, want 120s", timeout)
+	}
+}
+
+func TestRequestTimeout_FallsBackToDefault(t *testing.T) {
+	cfg := &config.Config{
+		OpenCodeGo: config.OpenCodeGoConfig{
+			TimeoutMs: 0,
+		},
+	}
+	atomicCfg := config.NewAtomicConfig(cfg, "")
+	c := NewOpenCodeClient(atomicCfg)
+
+	model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
+	timeout := c.RequestTimeout(model)
+	if timeout != 5*time.Minute {
+		t.Errorf("RequestTimeout = %v, want 5m", timeout)
+	}
+}
+
+func TestRequestTimeout_ZenProvider(t *testing.T) {
+	cfg := &config.Config{
+		OpenCodeZen: config.OpenCodeZenConfig{
+			TimeoutMs: 60000,
+		},
+	}
+	atomicCfg := config.NewAtomicConfig(cfg, "")
+	c := NewOpenCodeClient(atomicCfg)
+
+	model := config.ModelConfig{Provider: ProviderOpenCodeZen, ModelID: "claude-sonnet-4.5"}
+	timeout := c.RequestTimeout(model)
+	if timeout != 60*time.Second {
+		t.Errorf("RequestTimeout = %v, want 60s", timeout)
+	}
+}
+
+func TestStreamingTimeout_UsesStreamingTimeoutMs(t *testing.T) {
+	cfg := &config.Config{
+		OpenCodeGo: config.OpenCodeGoConfig{
+			TimeoutMs:          300000,
+			StreamingTimeoutMs: 600000,
+		},
+	}
+	atomicCfg := config.NewAtomicConfig(cfg, "")
+	c := NewOpenCodeClient(atomicCfg)
+
+	model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
+	timeout := c.StreamingTimeout(model)
+	if timeout != 600*time.Second {
+		t.Errorf("StreamingTimeout = %v, want 600s", timeout)
+	}
+}
+
+func TestStreamingTimeout_FallsBackToTimeoutMs(t *testing.T) {
+	cfg := &config.Config{
+		OpenCodeGo: config.OpenCodeGoConfig{
+			TimeoutMs:          300000,
+			StreamingTimeoutMs: 0,
+		},
+	}
+	atomicCfg := config.NewAtomicConfig(cfg, "")
+	c := NewOpenCodeClient(atomicCfg)
+
+	model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
+	timeout := c.StreamingTimeout(model)
+	if timeout != 300*time.Second {
+		t.Errorf("StreamingTimeout = %v, want 300s (fallback to timeout_ms)", timeout)
+	}
+}
+
+func TestStreamingTimeout_FallsBackToDefault(t *testing.T) {
+	cfg := &config.Config{
+		OpenCodeGo: config.OpenCodeGoConfig{
+			TimeoutMs:          0,
+			StreamingTimeoutMs: 0,
+		},
+	}
+	atomicCfg := config.NewAtomicConfig(cfg, "")
+	c := NewOpenCodeClient(atomicCfg)
+
+	model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
+	timeout := c.StreamingTimeout(model)
+	if timeout != 5*time.Minute {
+		t.Errorf("StreamingTimeout = %v, want 5m", timeout)
+	}
+}
+
+func TestStreamingTimeout_ZenProvider(t *testing.T) {
+	cfg := &config.Config{
+		OpenCodeZen: config.OpenCodeZenConfig{
+			TimeoutMs:          300000,
+			StreamingTimeoutMs: 600000,
+		},
+	}
+	atomicCfg := config.NewAtomicConfig(cfg, "")
+	c := NewOpenCodeClient(atomicCfg)
+
+	model := config.ModelConfig{Provider: ProviderOpenCodeZen, ModelID: "claude-sonnet-4.5"}
+	timeout := c.StreamingTimeout(model)
+	if timeout != 600*time.Second {
+		t.Errorf("StreamingTimeout = %v, want 600s", timeout)
+	}
+}
+
+func TestStreamingTimeout_SmallConfiguredValue(t *testing.T) {
+	cfg := &config.Config{
+		OpenCodeGo: config.OpenCodeGoConfig{
+			TimeoutMs:          300000,
+			StreamingTimeoutMs: 100,
+		},
+	}
+	atomicCfg := config.NewAtomicConfig(cfg, "")
+	c := NewOpenCodeClient(atomicCfg)
+
+	model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
+	timeout := c.StreamingTimeout(model)
+	if timeout != 100*time.Millisecond {
+		t.Errorf("StreamingTimeout = %v, want 100ms", timeout)
+	}
+}
diff --git a/internal/config/atomic.go b/internal/config/atomic.go
@@ -6,8 +6,7 @@ import (
 	"sync/atomic"
 )
 
-// AtomicConfig provides thread-safe access to the configuration with support
-// for hot reloading. It uses atomic.Pointer for lock-free reads.
+// AtomicConfig provides thread-safe config access with hot reload support.
 type AtomicConfig struct {
 	ptr      atomic.Pointer[Config]
 	path     string
@@ -22,43 +21,46 @@ func NewAtomicConfig(cfg *Config, path string) *AtomicConfig {
 	return a
 }
 
-// Get returns the current configuration pointer. This is safe for concurrent use.
-// Callers must not modify the returned Config.
+// Get returns the current config pointer. Callers must treat it as read-only.
 func (a *AtomicConfig) Get() *Config {
 	return a.ptr.Load()
 }
 
-// Reload reloads the configuration from disk and atomically swaps it in.
-// If the reload fails, the old configuration is preserved and an error is returned.
-// On successful reload, all registered callbacks are invoked.
+// Reload loads the config from disk and swaps it in atomically.
 func (a *AtomicConfig) Reload() error {
 	old := a.Get()
 	cfg, err := LoadFromPath(a.path)
 	if err != nil {
 		return err
 	}
 
-	// Warn about changes that require a server restart before swapping.
+	// Warn about settings that take effect differently on reload.
 	if old != nil {
 		if old.Host != cfg.Host || old.Port != cfg.Port {
 			slog.Warn("host/port changed but requires server restart to take effect",
 				"old_host", old.Host, "new_host", cfg.Host,
 				"old_port", old.Port, "new_port", cfg.Port)
 		}
-		if old.OpenCodeGo.TimeoutMs != cfg.OpenCodeGo.TimeoutMs {
-			slog.Warn("timeout_ms changed but requires server restart to take effect",
-				"old_timeout", old.OpenCodeGo.TimeoutMs,
-				"new_timeout", cfg.OpenCodeGo.TimeoutMs)
+		// Timeout changes apply on the next request.
+		if old.OpenCodeGo.TimeoutMs != cfg.OpenCodeGo.TimeoutMs ||
+			old.OpenCodeGo.StreamingTimeoutMs != cfg.OpenCodeGo.StreamingTimeoutMs ||
+			old.OpenCodeZen.TimeoutMs != cfg.OpenCodeZen.TimeoutMs ||
+			old.OpenCodeZen.StreamingTimeoutMs != cfg.OpenCodeZen.StreamingTimeoutMs {
+			slog.Info("timeout config updated, takes effect immediately",
+				"go_timeout_ms", cfg.OpenCodeGo.TimeoutMs,
+				"go_streaming_timeout_ms", cfg.OpenCodeGo.StreamingTimeoutMs,
+				"zen_timeout_ms", cfg.OpenCodeZen.TimeoutMs,
+				"zen_streaming_timeout_ms", cfg.OpenCodeZen.StreamingTimeoutMs)
 		}
 	}
 
-	// Copy callbacks to avoid holding lock during invocation
+	// Copy callbacks before invoking them.
 	a.mu.Lock()
 	callbacks := make([]func(*Config), len(a.onReload))
 	copy(callbacks, a.onReload)
 	a.mu.Unlock()
 
-	// Invoke callbacks BEFORE swapping — they may mutate cfg (e.g., port override).
+	// Callbacks run before the swap so they can adjust cfg.
 	for _, fn := range callbacks {
 		func() {
 			defer func() {
@@ -70,7 +72,6 @@ func (a *AtomicConfig) Reload() error {
 		}()
 	}
 
-	// Now cfg is fully prepared — safe for concurrent readers.
 	a.ptr.Store(cfg)
 
 	return nil

diff --git a/internal/config/config.go b/internal/config/config.go
@@ -34,18 +34,20 @@ type ModelConfig struct {
 
 // OpenCodeGoConfig holds the upstream OpenCode Go API settings.
 type OpenCodeGoConfig struct {
-	BaseURL          string `json:"base_url"`
-	AnthropicBaseURL string `json:"anthropic_base_url"`
-	TimeoutMs        int    `json:"timeout_ms"`
+	BaseURL            string `json:"base_url"`
+	AnthropicBaseURL   string `json:"anthropic_base_url"`
+	TimeoutMs          int    `json:"timeout_ms"`
+	StreamingTimeoutMs int    `json:"streaming_timeout_ms,omitempty"`
 }
 
 // OpenCodeZenConfig holds the upstream OpenCode Zen API settings.
 type OpenCodeZenConfig struct {
-	BaseURL          string `json:"base_url"`
-	AnthropicBaseURL string `json:"anthropic_base_url"`
-	ResponsesBaseURL string `json:"responses_base_url"`
-	GeminiBaseURL    string `json:"gemini_base_url"`
-	TimeoutMs        int    `json:"timeout_ms"`
+	BaseURL            string `json:"base_url"`
+	AnthropicBaseURL   string `json:"anthropic_base_url"`
+	ResponsesBaseURL   string `json:"responses_base_url"`
+	GeminiBaseURL      string `json:"gemini_base_url"`
+	TimeoutMs          int    `json:"timeout_ms"`
+	StreamingTimeoutMs int    `json:"streaming_timeout_ms,omitempty"`
 }
 
 // LoggingConfig controls application logging behavior.