Skip to content
46 changes: 46 additions & 0 deletions CONFIGURATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -243,3 +243,49 @@ When a request arrives, the proxy selects a model chain using the following orde
3. **Scenario routing** — fall back to the scenario chain (`default`, `background`, `think`, `complex`, `long_context`, `fast`).

> **Trust model:** any client whose requests flow through the proxy can select from the configured `model_overrides` set without additional authentication. If you run the proxy as a shared service, treat `model_overrides` as a privileged allowlist.

### Streaming Scenario Routing

`enable_streaming_scenario_routing` controls whether streaming requests are evaluated by the full scenario router or routed directly to the `fast` scenario.

> **Note for Claude Code `/review-code`, `/ultracode`, and multi-agent workflows**
>
> If you use Claude Code workflows that dispatch many subagents or produce many parallel tool calls, enable streaming scenario routing:
>
> ```json
> {
> "enable_streaming_scenario_routing": true
> }
> ```
>
> Without this option, streaming requests are routed through the `fast` scenario even when the request is actually tool-heavy. This can route complex Claude Code workloads, such as `/review-code` with many `Agent` tool calls, to a fast model that may not handle parallel tool-call orchestration reliably.
>
> When enabled, streaming requests are evaluated by the same scenario router as non-streaming requests, allowing large or tool-heavy workloads to use `complex` or `long_context` models instead of always using the `fast` model.

Recommended setup for Claude Code review workflows:

```json
{
"enable_streaming_scenario_routing": true,
"models": {
"fast": {
"provider": "opencode-go",
"model_id": "deepseek-v4-flash",
"max_tokens": 4096
},
"complex": {
"provider": "opencode-go",
"model_id": "minimax-m3",
"max_tokens": 8192
},
"long_context": {
"provider": "opencode-go",
"model_id": "minimax-m3",
"max_tokens": 16384,
"context_threshold": 80000
}
}
}
```

Use the `fast` scenario for short/simple requests. Use `complex` or `long_context` for code review, multi-agent dispatch, large diffs, many tools, or long-context Claude Code sessions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ OpenCode Go gives you access to powerful open coding models for **$5/month** (th
- **Transparent Proxy** — Claude Code sends Anthropic-format requests, proxy transforms to OpenAI/Responses/Gemini format and back
- **Dual Provider Support** — Route models through OpenCode Go or OpenCode Zen based on your needs
- **Model Routing** — Automatically routes to different models based on context (default, thinking, long context, background)
- **Streaming Scenario Routing** — Configurable routing for streaming requests; enables proper scenario selection for Claude Code multi-agent and review workflows (see [CONFIGURATION.md](CONFIGURATION.md#streaming-scenario-routing))
- **Fallback Chains** — If a model fails, automatically tries the next one in your configured chain
- **Circuit Breaker** — Tracks model health and skips failing models to avoid latency spikes
- **Real-time Streaming** — Full SSE streaming with live format transformation
Expand Down
6 changes: 4 additions & 2 deletions configs/config.example.json
Original file line number Diff line number Diff line change
Expand Up @@ -185,15 +185,17 @@
"opencode_go": {
"base_url": "https://opencode.ai/zen/go/v1/chat/completions",
"anthropic_base_url": "https://opencode.ai/zen/go/v1/messages",
"timeout_ms": 300000
"timeout_ms": 300000,
"streaming_timeout_ms": 600000
},

"opencode_zen": {
"base_url": "https://opencode.ai/zen/v1/chat/completions",
"anthropic_base_url": "https://opencode.ai/zen/v1/messages",
"responses_base_url": "https://opencode.ai/zen/v1/responses",
"gemini_base_url": "https://opencode.ai/zen/v1/models",
"timeout_ms": 300000
"timeout_ms": 300000,
"streaming_timeout_ms": 600000
},

"logging": {
Expand Down
46 changes: 38 additions & 8 deletions internal/client/opencode.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,8 @@ func (c *OpenCodeClient) nextAPIKey(keys []string) string {
return keys[(old-1)%n]
}

// NewOpenCodeClient creates a new OpenCode client.
// NewOpenCodeClient creates a client that relies on request contexts for timeouts.
func NewOpenCodeClient(atomic *config.AtomicConfig) *OpenCodeClient {
cfg := atomic.Get()
timeout := time.Duration(cfg.OpenCodeGo.TimeoutMs) * time.Millisecond
if timeout == 0 {
timeout = 5 * time.Minute
}

transport := &http.Transport{
MaxIdleConns: 100,
MaxIdleConnsPerHost: 20,
Expand All @@ -60,12 +54,48 @@ func NewOpenCodeClient(atomic *config.AtomicConfig) *OpenCodeClient {
return &OpenCodeClient{
atomic: atomic,
httpClient: &http.Client{
Timeout: timeout,
Timeout: 0,
Transport: transport,
},
}
}

// RequestTimeout returns the provider timeout for a non-streaming attempt.
func (c *OpenCodeClient) RequestTimeout(model config.ModelConfig) time.Duration {
cfg := c.atomic.Get()
var timeoutMs int
if IsZen(model) {
timeoutMs = cfg.OpenCodeZen.TimeoutMs
} else {
timeoutMs = cfg.OpenCodeGo.TimeoutMs
}
if timeoutMs > 0 {
return time.Duration(timeoutMs) * time.Millisecond
}
return 5 * time.Minute
}

// StreamingTimeout returns the provider timeout for a streaming attempt.
func (c *OpenCodeClient) StreamingTimeout(model config.ModelConfig) time.Duration {
cfg := c.atomic.Get()
var timeoutMs int
if IsZen(model) {
timeoutMs = cfg.OpenCodeZen.StreamingTimeoutMs
if timeoutMs <= 0 {
timeoutMs = cfg.OpenCodeZen.TimeoutMs
}
} else {
timeoutMs = cfg.OpenCodeGo.StreamingTimeoutMs
if timeoutMs <= 0 {
timeoutMs = cfg.OpenCodeGo.TimeoutMs
}
}
if timeoutMs > 0 {
return time.Duration(timeoutMs) * time.Millisecond
}
return 5 * time.Minute
}

// IsAnthropicModel returns true if the model requires the Anthropic endpoint.
// This includes both Go models (minimax, all qwen) and Zen models (claude, qwen3.7-max).
func IsAnthropicModel(modelID string) bool {
Expand Down
134 changes: 134 additions & 0 deletions internal/client/opencode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package client

import (
"testing"
"time"

"oc-go-cc/internal/config"
)
Expand Down Expand Up @@ -461,3 +462,136 @@ func TestNextAPIKey_ConcurrentSafety(t *testing.T) {
}
}
}

func TestRequestTimeout_UsesConfiguredTimeout(t *testing.T) {
cfg := &config.Config{
OpenCodeGo: config.OpenCodeGoConfig{
TimeoutMs: 120000,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
timeout := c.RequestTimeout(model)
if timeout != 120*time.Second {
t.Errorf("RequestTimeout = %v, want 120s", timeout)
}
}

func TestRequestTimeout_FallsBackToDefault(t *testing.T) {
cfg := &config.Config{
OpenCodeGo: config.OpenCodeGoConfig{
TimeoutMs: 0,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
timeout := c.RequestTimeout(model)
if timeout != 5*time.Minute {
t.Errorf("RequestTimeout = %v, want 5m", timeout)
}
}

func TestRequestTimeout_ZenProvider(t *testing.T) {
cfg := &config.Config{
OpenCodeZen: config.OpenCodeZenConfig{
TimeoutMs: 60000,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeZen, ModelID: "claude-sonnet-4.5"}
timeout := c.RequestTimeout(model)
if timeout != 60*time.Second {
t.Errorf("RequestTimeout = %v, want 60s", timeout)
}
}

func TestStreamingTimeout_UsesStreamingTimeoutMs(t *testing.T) {
cfg := &config.Config{
OpenCodeGo: config.OpenCodeGoConfig{
TimeoutMs: 300000,
StreamingTimeoutMs: 600000,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
timeout := c.StreamingTimeout(model)
if timeout != 600*time.Second {
t.Errorf("StreamingTimeout = %v, want 600s", timeout)
}
}

func TestStreamingTimeout_FallsBackToTimeoutMs(t *testing.T) {
cfg := &config.Config{
OpenCodeGo: config.OpenCodeGoConfig{
TimeoutMs: 300000,
StreamingTimeoutMs: 0,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
timeout := c.StreamingTimeout(model)
if timeout != 300*time.Second {
t.Errorf("StreamingTimeout = %v, want 300s (fallback to timeout_ms)", timeout)
}
}

func TestStreamingTimeout_FallsBackToDefault(t *testing.T) {
cfg := &config.Config{
OpenCodeGo: config.OpenCodeGoConfig{
TimeoutMs: 0,
StreamingTimeoutMs: 0,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
timeout := c.StreamingTimeout(model)
if timeout != 5*time.Minute {
t.Errorf("StreamingTimeout = %v, want 5m", timeout)
}
}

func TestStreamingTimeout_ZenProvider(t *testing.T) {
cfg := &config.Config{
OpenCodeZen: config.OpenCodeZenConfig{
TimeoutMs: 300000,
StreamingTimeoutMs: 600000,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeZen, ModelID: "claude-sonnet-4.5"}
timeout := c.StreamingTimeout(model)
if timeout != 600*time.Second {
t.Errorf("StreamingTimeout = %v, want 600s", timeout)
}
}

func TestStreamingTimeout_SmallConfiguredValue(t *testing.T) {
cfg := &config.Config{
OpenCodeGo: config.OpenCodeGoConfig{
TimeoutMs: 300000,
StreamingTimeoutMs: 100,
},
}
atomicCfg := config.NewAtomicConfig(cfg, "")
c := NewOpenCodeClient(atomicCfg)

model := config.ModelConfig{Provider: ProviderOpenCodeGo, ModelID: "kimi-k2.6"}
timeout := c.StreamingTimeout(model)
if timeout != 100*time.Millisecond {
t.Errorf("StreamingTimeout = %v, want 100ms", timeout)
}
}
31 changes: 16 additions & 15 deletions internal/config/atomic.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ import (
"sync/atomic"
)

// AtomicConfig provides thread-safe access to the configuration with support
// for hot reloading. It uses atomic.Pointer for lock-free reads.
// AtomicConfig provides thread-safe config access with hot reload support.
type AtomicConfig struct {
ptr atomic.Pointer[Config]
path string
Expand All @@ -22,43 +21,46 @@ func NewAtomicConfig(cfg *Config, path string) *AtomicConfig {
return a
}

// Get returns the current configuration pointer. This is safe for concurrent use.
// Callers must not modify the returned Config.
// Get returns the current config pointer. Callers must treat it as read-only.
func (a *AtomicConfig) Get() *Config {
return a.ptr.Load()
}

// Reload reloads the configuration from disk and atomically swaps it in.
// If the reload fails, the old configuration is preserved and an error is returned.
// On successful reload, all registered callbacks are invoked.
// Reload loads the config from disk and swaps it in atomically.
func (a *AtomicConfig) Reload() error {
old := a.Get()
cfg, err := LoadFromPath(a.path)
if err != nil {
return err
}

// Warn about changes that require a server restart before swapping.
// Warn about settings that take effect differently on reload.
if old != nil {
if old.Host != cfg.Host || old.Port != cfg.Port {
slog.Warn("host/port changed but requires server restart to take effect",
"old_host", old.Host, "new_host", cfg.Host,
"old_port", old.Port, "new_port", cfg.Port)
}
if old.OpenCodeGo.TimeoutMs != cfg.OpenCodeGo.TimeoutMs {
slog.Warn("timeout_ms changed but requires server restart to take effect",
"old_timeout", old.OpenCodeGo.TimeoutMs,
"new_timeout", cfg.OpenCodeGo.TimeoutMs)
// Timeout changes apply on the next request.
if old.OpenCodeGo.TimeoutMs != cfg.OpenCodeGo.TimeoutMs ||
old.OpenCodeGo.StreamingTimeoutMs != cfg.OpenCodeGo.StreamingTimeoutMs ||
old.OpenCodeZen.TimeoutMs != cfg.OpenCodeZen.TimeoutMs ||
old.OpenCodeZen.StreamingTimeoutMs != cfg.OpenCodeZen.StreamingTimeoutMs {
slog.Info("timeout config updated, takes effect immediately",
"go_timeout_ms", cfg.OpenCodeGo.TimeoutMs,
"go_streaming_timeout_ms", cfg.OpenCodeGo.StreamingTimeoutMs,
"zen_timeout_ms", cfg.OpenCodeZen.TimeoutMs,
"zen_streaming_timeout_ms", cfg.OpenCodeZen.StreamingTimeoutMs)
}
}

// Copy callbacks to avoid holding lock during invocation
// Copy callbacks before invoking them.
a.mu.Lock()
callbacks := make([]func(*Config), len(a.onReload))
copy(callbacks, a.onReload)
a.mu.Unlock()

// Invoke callbacks BEFORE swapping — they may mutate cfg (e.g., port override).
// Callbacks run before the swap so they can adjust cfg.
for _, fn := range callbacks {
func() {
defer func() {
Expand All @@ -70,7 +72,6 @@ func (a *AtomicConfig) Reload() error {
}()
}

// Now cfg is fully prepared — safe for concurrent readers.
a.ptr.Store(cfg)

return nil
Expand Down
18 changes: 10 additions & 8 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,20 @@ type ModelConfig struct {

// OpenCodeGoConfig holds the upstream OpenCode Go API settings.
type OpenCodeGoConfig struct {
BaseURL string `json:"base_url"`
AnthropicBaseURL string `json:"anthropic_base_url"`
TimeoutMs int `json:"timeout_ms"`
BaseURL string `json:"base_url"`
AnthropicBaseURL string `json:"anthropic_base_url"`
TimeoutMs int `json:"timeout_ms"`
StreamingTimeoutMs int `json:"streaming_timeout_ms,omitempty"`
}

// OpenCodeZenConfig holds the upstream OpenCode Zen API settings.
type OpenCodeZenConfig struct {
BaseURL string `json:"base_url"`
AnthropicBaseURL string `json:"anthropic_base_url"`
ResponsesBaseURL string `json:"responses_base_url"`
GeminiBaseURL string `json:"gemini_base_url"`
TimeoutMs int `json:"timeout_ms"`
BaseURL string `json:"base_url"`
AnthropicBaseURL string `json:"anthropic_base_url"`
ResponsesBaseURL string `json:"responses_base_url"`
GeminiBaseURL string `json:"gemini_base_url"`
TimeoutMs int `json:"timeout_ms"`
StreamingTimeoutMs int `json:"streaming_timeout_ms,omitempty"`
}

// LoggingConfig controls application logging behavior.
Expand Down
Loading