From c144a8e68ccdec2f101234110c967c2c9bceed10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E6=B8=B8?= Date: Sun, 5 Oct 2025 16:04:31 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9C=A8=20feat:=20Smooth=20WRR=20for=20Lo?= =?UTF-8?q?ad=20Balancing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the Smooth WRR algorithm to achieve load balancing for providers and API keys. --- config/config.example.yaml | 10 ++++ src/types/config.ts | 7 +++ src/utils/load-balancing.ts | 107 ++++++++++++++++++++++++++++++++++++ src/utils/utils.ts | 22 ++++++-- 4 files changed, 142 insertions(+), 4 deletions(-) create mode 100644 src/utils/load-balancing.ts diff --git a/config/config.example.yaml b/config/config.example.yaml index ee69193..101ae9f 100644 --- a/config/config.example.yaml +++ b/config/config.example.yaml @@ -33,6 +33,16 @@ providers: responses: false base_url: https://openrouter.ai/api/v1 api_key: sk-openrouter-api-key + groq1: + type: openai + base_url: https://api.groq.com/openai/v1 + api_key: sk-groq-api-key + groq2: + type: openai + base_url: https://api.groq.com/openai/v1 + keys: + - api_key: sk-groq-api-key-1 + - api_key: sk-groq-api-key-2 models: openai/gpt-5: diff --git a/src/types/config.ts b/src/types/config.ts index bfaf928..ed70aea 100644 --- a/src/types/config.ts +++ b/src/types/config.ts @@ -80,6 +80,12 @@ export interface LMRouterConfigProvider { responses?: boolean; base_url?: string; api_key: string; + keys?: LMRouterConfigProviderKey[]; +} + +export interface LMRouterConfigProviderKey { + api_key: string; + weight?: number; } export interface LMRouterConfigModelProviderPricingFixed { @@ -113,6 +119,7 @@ export type LMRouterConfigModelProviderPricing = export interface LMRouterConfigModelProvider { provider: string; model: string; + weight?: number; context_window?: number; max_tokens?: number; responses_only?: boolean; diff --git a/src/utils/load-balancing.ts b/src/utils/load-balancing.ts new file mode 100644 index 0000000..9c045d5 --- /dev/null +++ b/src/utils/load-balancing.ts @@ -0,0 +1,107 @@ +import type { + LMRouterConfigModelProvider, + LMRouterConfigProviderKey, +} from "../types/config.js"; + +/** + * Implements a smooth weighted round-robin load balancer. + * This ensures a smooth and predictable distribution of requests based on provider weights, + * avoiding the potential for request bursts that can occur with purely random selection. + * + * The state (current weights) is stored in a global map, making it suitable for + * single-process environments. + */ +export class LoadBalancer { + // state_key -> current_weight + private static providerWeights = new Map(); + private static keyWeights = new Map(); + + public static getOrderedProviders( + providers: LMRouterConfigModelProvider[], + ): LMRouterConfigModelProvider[] { + if (!providers || providers.length === 0) { + return []; + } + + if (providers.length === 1) { + return providers; + } + + const totalWeight = providers.reduce( + (acc, p) => acc + (p.weight ?? 1), + 0, + ); + + // Find the provider with the highest current weight + let bestProvider: LMRouterConfigModelProvider | null = null; + let maxWeight = -Infinity; + + for (const provider of providers) { + const providerId = `${provider.provider}:${provider.model}`; + const currentWeight = this.providerWeights.get(providerId) ?? 0; + const newWeight = currentWeight + (provider.weight ?? 1); + this.providerWeights.set(providerId, newWeight); + + if (newWeight > maxWeight) { + maxWeight = newWeight; + bestProvider = provider; + } + } + + if (bestProvider) { + const providerId = `${bestProvider.provider}:${bestProvider.model}`; + // Decrease the best provider's weight by the total weight + this.providerWeights.set(providerId, maxWeight - totalWeight); + + // Sort providers to try the best one first, then the rest + return [...providers].sort((a, b) => (a === bestProvider ? -1 : b === bestProvider ? 1 : 0)); + } + + // Fallback to the original list if something goes wrong + return providers; + } + + public static getApiKey( + providerName: string, + keys?: LMRouterConfigProviderKey[], + ): string | undefined { + if (!keys || keys.length === 0) { + return undefined; + } + + if (keys.length === 1) { + return keys[0].api_key; + } + + const totalWeight = keys.reduce( + (acc, key) => acc + (key.weight ?? 1), + 0, + ); + + let bestKey: LMRouterConfigProviderKey | null = null; + let maxWeight = -Infinity; + + for (const key of keys) { + // Use a unique ID for each key within a provider + const keyId = `${providerName}:${key.api_key.slice(-4)}`; + const currentWeight = this.keyWeights.get(keyId) ?? 0; + const newWeight = currentWeight + (key.weight ?? 1); + this.keyWeights.set(keyId, newWeight); + + if (newWeight > maxWeight) { + maxWeight = newWeight; + bestKey = key; + } + } + + if (bestKey) { + const keyId = `${providerName}:${bestKey.api_key.slice(-4)}`; + // Decrease the best key's weight by the total weight + this.keyWeights.set(keyId, maxWeight - totalWeight); + return bestKey.api_key; + } + + // Fallback to the last key if something goes wrong + return keys[keys.length - 1].api_key; + } +} diff --git a/src/utils/utils.ts b/src/utils/utils.ts index b42e516..9c44ad3 100644 --- a/src/utils/utils.ts +++ b/src/utils/utils.ts @@ -9,6 +9,7 @@ import { getConnInfo as getConnInfoNode } from "@hono/node-server/conninfo"; import { recordApiCall } from "./billing.js"; import { TimeKeeper } from "./chrono.js"; import { getConfig } from "./config.js"; +import { LoadBalancer } from "./load-balancing.js" import type { LMRouterConfigModel, LMRouterConfigModelProvider, @@ -92,6 +93,7 @@ export const iterateModelProviders = async ( ): Promise => { const cfg = getConfig(c); let error: any = null; + let result: any = null; if (!c.var.model) { return c.json( @@ -104,20 +106,28 @@ export const iterateModelProviders = async ( ); } - for (const providerCfg of c.var.model.providers) { + // Get the providers list ordered by the smooth weighted round-robin algorithm. + // This provides both load balancing and a predictable order for failover. + const orderedProviders = LoadBalancer.getOrderedProviders(c.var.model.providers); + + for (const providerCfg of orderedProviders) { const provider = cfg.providers[providerCfg.provider]; if (!provider) { continue; } const hydratedProvider = { ...provider }; - hydratedProvider.api_key = - c.var.auth?.type === "byok" ? c.var.auth.byok : provider.api_key; + const byok = c.var.auth?.type === "byok" ? c.var.auth.byok : undefined; + hydratedProvider.api_key = byok + ? byok + : LoadBalancer.getApiKey(providerCfg.provider, provider.keys) ?? + provider.api_key; const timeKeeper = new TimeKeeper(); try { timeKeeper.record(); - return await cb(providerCfg, hydratedProvider); + result = await cb(providerCfg, hydratedProvider); + break; } catch (e) { timeKeeper.record(); await recordApiCall( @@ -138,6 +148,10 @@ export const iterateModelProviders = async ( } } + if (result) { + return result; + } + if (error) { return c.json( { From 7e6188ff19221067d394b3ad6de91a73f162ccbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E6=B8=B8?= Date: Mon, 6 Oct 2025 11:15:32 +0800 Subject: [PATCH 2/2] fix lint --- src/utils/load-balancing.ts | 14 +++++--------- src/utils/utils.ts | 10 ++++++---- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/utils/load-balancing.ts b/src/utils/load-balancing.ts index 9c045d5..0dfd61d 100644 --- a/src/utils/load-balancing.ts +++ b/src/utils/load-balancing.ts @@ -27,10 +27,7 @@ export class LoadBalancer { return providers; } - const totalWeight = providers.reduce( - (acc, p) => acc + (p.weight ?? 1), - 0, - ); + const totalWeight = providers.reduce((acc, p) => acc + (p.weight ?? 1), 0); // Find the provider with the highest current weight let bestProvider: LMRouterConfigModelProvider | null = null; @@ -54,7 +51,9 @@ export class LoadBalancer { this.providerWeights.set(providerId, maxWeight - totalWeight); // Sort providers to try the best one first, then the rest - return [...providers].sort((a, b) => (a === bestProvider ? -1 : b === bestProvider ? 1 : 0)); + return [...providers].sort((a, b) => + a === bestProvider ? -1 : b === bestProvider ? 1 : 0, + ); } // Fallback to the original list if something goes wrong @@ -73,10 +72,7 @@ export class LoadBalancer { return keys[0].api_key; } - const totalWeight = keys.reduce( - (acc, key) => acc + (key.weight ?? 1), - 0, - ); + const totalWeight = keys.reduce((acc, key) => acc + (key.weight ?? 1), 0); let bestKey: LMRouterConfigProviderKey | null = null; let maxWeight = -Infinity; diff --git a/src/utils/utils.ts b/src/utils/utils.ts index 9c44ad3..527ca14 100644 --- a/src/utils/utils.ts +++ b/src/utils/utils.ts @@ -9,7 +9,7 @@ import { getConnInfo as getConnInfoNode } from "@hono/node-server/conninfo"; import { recordApiCall } from "./billing.js"; import { TimeKeeper } from "./chrono.js"; import { getConfig } from "./config.js"; -import { LoadBalancer } from "./load-balancing.js" +import { LoadBalancer } from "./load-balancing.js"; import type { LMRouterConfigModel, LMRouterConfigModelProvider, @@ -108,7 +108,9 @@ export const iterateModelProviders = async ( // Get the providers list ordered by the smooth weighted round-robin algorithm. // This provides both load balancing and a predictable order for failover. - const orderedProviders = LoadBalancer.getOrderedProviders(c.var.model.providers); + const orderedProviders = LoadBalancer.getOrderedProviders( + c.var.model.providers, + ); for (const providerCfg of orderedProviders) { const provider = cfg.providers[providerCfg.provider]; @@ -120,8 +122,8 @@ export const iterateModelProviders = async ( const byok = c.var.auth?.type === "byok" ? c.var.auth.byok : undefined; hydratedProvider.api_key = byok ? byok - : LoadBalancer.getApiKey(providerCfg.provider, provider.keys) ?? - provider.api_key; + : (LoadBalancer.getApiKey(providerCfg.provider, provider.keys) ?? + provider.api_key); const timeKeeper = new TimeKeeper(); try {