-
-
Notifications
You must be signed in to change notification settings - Fork 66
feat(router): intelligent cost-aware + latency-aware routing #414
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,3 +37,4 @@ | |
|
|
||
| # Superpower design docs and plans (never commit) | ||
| /docs/superpowers/ | ||
| .superpowers/ | ||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| mode: set | ||
| gomodel/internal/providers/bailian/bailian.go:39.86,48.2 2 1 | ||
| gomodel/internal/providers/bailian/bailian.go:52.97,60.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:62.51,64.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:67.43,69.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:73.107,75.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:78.108,80.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:83.82,85.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:88.112,90.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:93.108,95.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:101.113,103.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:106.118,108.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:111.106,113.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:116.90,118.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:121.111,123.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:126.93,128.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:131.104,133.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:136.107,138.16 2 1 | ||
| gomodel/internal/providers/bailian/bailian.go:138.16,140.3 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:141.2,142.18 2 1 | ||
| gomodel/internal/providers/bailian/bailian.go:146.124,148.16 2 1 | ||
| gomodel/internal/providers/bailian/bailian.go:148.16,150.3 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:151.2,151.27 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:151.27,153.3 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:154.2,154.18 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:158.86,160.16 2 1 | ||
| gomodel/internal/providers/bailian/bailian.go:160.16,162.3 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:163.2,164.18 2 1 | ||
| gomodel/internal/providers/bailian/bailian.go:168.97,170.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:173.102,175.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:189.67,190.40 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:190.40,192.3 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:194.2,194.82 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:194.82,198.3 3 1 | ||
| gomodel/internal/providers/bailian/bailian.go:199.2,206.16 6 1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,127 @@ | ||
| package config | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "math" | ||
| "strconv" | ||
| "strings" | ||
| ) | ||
|
|
||
| // RouterConfig configures intelligent provider selection. When a request does | ||
| // not pin a provider and multiple providers serve the same model, the gateway | ||
| // scores candidates by cost and/or latency using the configured strategy. | ||
| type RouterConfig struct { | ||
| // Strategy is the default strategy id: "balanced", "cost_only", | ||
| // "latency_only", or "first_fit". Empty defaults to "balanced". | ||
| Strategy string `yaml:"strategy" json:"strategy" env:"MODEL_ROUTING_STRATEGY"` | ||
|
|
||
| // Weights tunes the balanced strategy. CostWeight and LatencyWeight are | ||
| // ignored by other strategies. | ||
| Weights RouterWeights `yaml:"weights" json:"weights"` | ||
|
|
||
| // WeightsCSV is the env-only form of Weights as "cost,latency" | ||
| // (e.g. "0.6,0.4"). Parsed into Weights during validation. | ||
| WeightsCSV string `yaml:"-" json:"-" env:"MODEL_ROUTING_STRATEGY_WEIGHTS"` | ||
|
|
||
| // MaxErrorRate filters candidates at/above this smoothed error ratio before | ||
| // scoring. Zero falls back to the strategy default (0.5). | ||
| MaxErrorRate float64 `yaml:"max_error_rate" json:"max_error_rate" env:"MODEL_ROUTING_MAX_ERROR_RATE"` | ||
| } | ||
|
|
||
| // RouterWeights tunes the balanced strategy's cost/latency trade-off. | ||
| type RouterWeights struct { | ||
| Cost float64 `yaml:"cost" json:"cost" env:"MODEL_ROUTING_COST_WEIGHT"` | ||
| Latency float64 `yaml:"latency" json:"latency" env:"MODEL_ROUTING_LATENCY_WEIGHT"` | ||
| } | ||
|
|
||
| // RouterStrategyBalanced and the other built-in strategy ids. | ||
| const ( | ||
| RouterStrategyBalanced = "balanced" | ||
| RouterStrategyCostOnly = "cost_only" | ||
| RouterStrategyLatencyOnly = "latency_only" | ||
| RouterStrategyFirstFit = "first_fit" | ||
| ) | ||
|
|
||
| // DefaultRouterConfig returns the default router configuration: balanced strategy | ||
| // with 0.6 cost / 0.4 latency weights and a 0.5 max error rate. | ||
| func DefaultRouterConfig() RouterConfig { | ||
| return RouterConfig{ | ||
| Strategy: RouterStrategyBalanced, | ||
| Weights: RouterWeights{ | ||
| Cost: 0.6, | ||
| Latency: 0.4, | ||
| }, | ||
| MaxErrorRate: 0.5, | ||
| } | ||
| } | ||
|
|
||
| // ValidateRouterConfig normalizes and validates the router config, applying | ||
| // defaults for empty/invalid fields. | ||
| func ValidateRouterConfig(cfg *RouterConfig) error { | ||
| if cfg.Strategy == "" { | ||
| cfg.Strategy = RouterStrategyBalanced | ||
| } | ||
| strategy := strings.ToLower(strings.TrimSpace(cfg.Strategy)) | ||
| switch strategy { | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| case RouterStrategyBalanced, RouterStrategyCostOnly, RouterStrategyLatencyOnly, RouterStrategyFirstFit: | ||
| default: | ||
| return fmt.Errorf("invalid router.strategy %q: must be one of balanced, cost_only, latency_only, first_fit", cfg.Strategy) | ||
| } | ||
| cfg.Strategy = strategy | ||
|
|
||
| // MODEL_ROUTING_STRATEGY_WEIGHTS overrides YAML weights when set. | ||
| if strings.TrimSpace(cfg.WeightsCSV) != "" { | ||
| cost, lat, err := parseWeightsCSV(cfg.WeightsCSV) | ||
| if err != nil { | ||
| return fmt.Errorf("invalid MODEL_ROUTING_STRATEGY_WEIGHTS: %w", err) | ||
| } | ||
| cfg.Weights = RouterWeights{Cost: cost, Latency: lat} | ||
| } | ||
|
|
||
| if math.IsNaN(cfg.Weights.Cost) || math.IsNaN(cfg.Weights.Latency) || | ||
| math.IsInf(cfg.Weights.Cost, 0) || math.IsInf(cfg.Weights.Latency, 0) { | ||
| return fmt.Errorf("router.weights must be finite numbers, got cost=%v latency=%v", cfg.Weights.Cost, cfg.Weights.Latency) | ||
| } | ||
| if cfg.Weights.Cost < 0 || cfg.Weights.Latency < 0 { | ||
| return fmt.Errorf("router.weights must be non-negative, got cost=%v latency=%v", cfg.Weights.Cost, cfg.Weights.Latency) | ||
| } | ||
| if cfg.Weights.Cost == 0 && cfg.Weights.Latency == 0 && strategy == RouterStrategyBalanced { | ||
| cfg.Weights = RouterWeights{Cost: 0.6, Latency: 0.4} | ||
| } | ||
| if math.IsNaN(cfg.MaxErrorRate) || math.IsInf(cfg.MaxErrorRate, 0) { | ||
| return fmt.Errorf("router.max_error_rate must be a finite number, got %v", cfg.MaxErrorRate) | ||
| } | ||
| if cfg.MaxErrorRate < 0 || cfg.MaxErrorRate > 1 { | ||
| return fmt.Errorf("router.max_error_rate must be in [0, 1], got %v", cfg.MaxErrorRate) | ||
| } | ||
|
Comment on lines
+94
to
+96
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The config validator accepts Context Used: CLAUDE.md (source)
coderabbitai[bot] marked this conversation as resolved.
|
||
| return nil | ||
| } | ||
|
|
||
| // parseWeightsCSV parses a "cost,latency" string into two non-negative floats. | ||
| func parseWeightsCSV(s string) (cost, latency float64, err error) { | ||
| parts := strings.Split(s, ",") | ||
| if len(parts) != 2 { | ||
| return 0, 0, fmt.Errorf("expected two comma-separated weights, got %q", s) | ||
| } | ||
| if cost, err = parseFloatField(parts[0], "cost"); err != nil { | ||
| return 0, 0, err | ||
| } | ||
| if latency, err = parseFloatField(parts[1], "latency"); err != nil { | ||
| return 0, 0, err | ||
| } | ||
| if cost < 0 || latency < 0 { | ||
| return 0, 0, fmt.Errorf("weights must be non-negative, got %v,%v", cost, latency) | ||
| } | ||
| return cost, latency, nil | ||
| } | ||
|
|
||
| func parseFloatField(s, name string) (float64, error) { | ||
| v, err := strconv.ParseFloat(strings.TrimSpace(s), 64) | ||
| if err != nil { | ||
| return 0, fmt.Errorf("invalid %s weight %q: %w", name, s, err) | ||
| } | ||
| if math.IsNaN(v) || math.IsInf(v, 0) { | ||
| return 0, fmt.Errorf("invalid %s weight %q: must be a finite number", name, s) | ||
| } | ||
| return v, nil | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| mode: set | ||
| gomodel/internal/providers/bailian/bailian.go:39.86,48.2 2 1 | ||
| gomodel/internal/providers/bailian/bailian.go:52.97,60.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:62.51,64.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:67.43,69.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:73.107,75.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:78.108,80.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:83.82,85.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:88.112,90.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:93.108,95.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:101.113,103.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:106.118,108.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:111.106,113.2 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:116.90,118.2 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:121.111,123.2 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:126.93,128.2 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:131.104,133.2 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:136.107,138.16 2 0 | ||
| gomodel/internal/providers/bailian/bailian.go:138.16,140.3 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:141.2,142.18 2 0 | ||
| gomodel/internal/providers/bailian/bailian.go:146.124,148.16 2 0 | ||
| gomodel/internal/providers/bailian/bailian.go:148.16,150.3 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:151.2,151.27 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:151.27,153.3 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:154.2,154.18 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:158.86,160.16 2 0 | ||
| gomodel/internal/providers/bailian/bailian.go:160.16,162.3 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:163.2,164.18 2 0 | ||
| gomodel/internal/providers/bailian/bailian.go:168.97,170.2 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:173.102,175.2 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:189.67,190.40 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:190.40,192.3 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:194.2,194.82 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:194.82,198.3 3 1 | ||
| gomodel/internal/providers/bailian/bailian.go:199.2,206.16 6 1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| mode: set | ||
| gomodel/internal/providers/bailian/bailian.go:39.86,48.2 2 1 | ||
| gomodel/internal/providers/bailian/bailian.go:52.97,60.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:62.51,64.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:67.43,69.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:73.107,75.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:78.108,80.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:83.82,85.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:88.112,90.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:93.108,95.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:101.113,103.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:106.118,108.2 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:111.106,113.2 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:116.90,118.2 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:121.111,123.2 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:126.93,128.2 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:131.104,133.2 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:136.107,138.16 2 0 | ||
| gomodel/internal/providers/bailian/bailian.go:138.16,140.3 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:141.2,142.18 2 0 | ||
| gomodel/internal/providers/bailian/bailian.go:146.124,148.16 2 0 | ||
| gomodel/internal/providers/bailian/bailian.go:148.16,150.3 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:151.2,151.27 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:151.27,153.3 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:154.2,154.18 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:158.86,160.16 2 0 | ||
| gomodel/internal/providers/bailian/bailian.go:160.16,162.3 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:163.2,164.18 2 0 | ||
| gomodel/internal/providers/bailian/bailian.go:168.97,170.2 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:173.102,175.2 1 0 | ||
| gomodel/internal/providers/bailian/bailian.go:189.67,190.40 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:190.40,192.3 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:194.2,194.82 1 1 | ||
| gomodel/internal/providers/bailian/bailian.go:194.82,198.3 3 1 | ||
| gomodel/internal/providers/bailian/bailian.go:199.2,206.16 6 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧹 Nitpick | 🔵 Trivial | ⚡ Quick win
Split this into short sub-bullets for scanability.
Line 134 packs defaults, runtime behavior, EWMA internals, and header override semantics into one dense line. Breaking it into 4–6 bullets will make operator usage much easier to parse.
Suggested structure
As per coding guidelines, “Documentation should be concise, practical, and user-focused. Show defaults, explain when to change them, and include minimal examples when useful.”
📝 Committable suggestion
🤖 Prompt for AI Agents
Source: Coding guidelines