From 644522e9c87727c80b2b0c378f8fd180f2539ed7 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Mon, 16 Mar 2026 21:44:36 +0100 Subject: [PATCH 1/2] modelsdev: singleton Store with ETag-based conditional refresh Make modelsdev.Store a process-wide singleton via sync.OnceValues so the models.dev database is loaded at most once and shared across all callers (teamloader, runtime, bedrock, embedding, config tests). Key changes: - NewStore() returns the same *Store instance for the entire process - Add ETag support: store the ETag from models.dev responses and send If-None-Match on refresh; a 304 Not Modified skips the download - Switch from json.Decoder to ReadFile/ReadAll + json.Unmarshal to avoid the decoder's intermediate buffering overhead - Hoist store creation out of per-model loops in teamloader to make the singleton intent explicit and handle errors gracefully - Persist ETag in the cache file for reuse across process restarts This eliminates ~50-60MB of redundant heap allocations per run (the 3MB JSON was being parsed into ~66MB of Go objects up to 6 times). Assisted-By: docker-agent --- pkg/modelsdev/store.go | 109 ++++++++++++++++++++++++++--------- pkg/modelsdev/types.go | 1 + pkg/teamloader/teamloader.go | 18 +++--- 3 files changed, 90 insertions(+), 38 deletions(-) diff --git a/pkg/modelsdev/store.go b/pkg/modelsdev/store.go index b92534575..a8607a56d 100644 --- a/pkg/modelsdev/store.go +++ b/pkg/modelsdev/store.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "io" "log/slog" "net/http" "os" @@ -22,15 +23,20 @@ const ( // Store manages access to the models.dev data. // All methods are safe for concurrent use. +// +// Use NewStore to obtain the process-wide singleton instance. +// The database is loaded on first access via GetDatabase and +// shared across all callers, avoiding redundant disk/network I/O. type Store struct { cacheFile string mu sync.Mutex db *Database + etag string // ETag from last successful fetch, used for conditional requests } -// NewStore creates a new models.dev store. -// The database is loaded on first access via GetDatabase. -func NewStore() (*Store, error) { +// singleton holds the process-wide Store instance. It is initialised lazily +// on the first call to NewStore. All subsequent calls return the same value. +var singleton = sync.OnceValues(func() (*Store, error) { homeDir, err := os.UserHomeDir() if err != nil { return nil, fmt.Errorf("failed to get user home directory: %w", err) @@ -44,6 +50,15 @@ func NewStore() (*Store, error) { return &Store{ cacheFile: filepath.Join(cacheDir, CacheFileName), }, nil +}) + +// NewStore returns the process-wide singleton Store. +// +// The database is loaded lazily on the first call to GetDatabase and +// then cached in memory so that every caller shares one copy. +// The first call creates the cache directory if it does not exist. +func NewStore() (*Store, error) { + return singleton() } // NewDatabaseStore creates a Store pre-populated with the given database. @@ -63,12 +78,13 @@ func (s *Store) GetDatabase(ctx context.Context) (*Database, error) { return s.db, nil } - db, err := loadDatabase(ctx, s.cacheFile) + db, etag, err := loadDatabase(ctx, s.cacheFile) if err != nil { return nil, err } s.db = db + s.etag = etag return db, nil } @@ -128,80 +144,117 @@ func (s *Store) GetModel(ctx context.Context, id string) (*Model, error) { // loadDatabase loads the database from the local cache file or // falls back to fetching from the models.dev API. -func loadDatabase(ctx context.Context, cacheFile string) (*Database, error) { +// It returns the database and the ETag associated with the data. +func loadDatabase(ctx context.Context, cacheFile string) (*Database, string, error) { // Try to load from cache first cached, err := loadFromCache(cacheFile) if err == nil && time.Since(cached.LastRefresh) < refreshInterval { - return &cached.Database, nil + return &cached.Database, cached.ETag, nil } - // Cache is invalid or doesn't exist, fetch from API - database, fetchErr := fetchFromAPI(ctx) + // Cache is stale or doesn't exist — try a conditional fetch with the ETag. + var etag string + if cached != nil { + etag = cached.ETag + } + + database, newETag, fetchErr := fetchFromAPI(ctx, etag) if fetchErr != nil { - // If API fetch fails, but we have cached data, use it + // If API fetch fails but we have cached data, use it regardless of age. if cached != nil { - return &cached.Database, nil + slog.Debug("API fetch failed, using stale cache", "error", fetchErr) + return &cached.Database, cached.ETag, nil + } + return nil, "", fmt.Errorf("failed to fetch from API and no cached data available: %w", fetchErr) + } + + // database is nil when the server returned 304 Not Modified. + if database == nil && cached != nil { + // Bump LastRefresh so we don't re-check until the next interval. + cached.LastRefresh = time.Now() + if saveErr := saveToCache(cacheFile, &cached.Database, cached.ETag); saveErr != nil { + slog.Warn("Failed to update cache timestamp", "error", saveErr) } - return nil, fmt.Errorf("failed to fetch from API and no cached data available: %w", fetchErr) + return &cached.Database, cached.ETag, nil } - // Save to cache - if err := saveToCache(cacheFile, database); err != nil { - // Log the error but don't fail the request - slog.Warn("Warning: failed to save to cache", "error", err) + // Save the fresh data to cache. + if saveErr := saveToCache(cacheFile, database, newETag); saveErr != nil { + slog.Warn("Failed to save to cache", "error", saveErr) } - return database, nil + return database, newETag, nil } -func fetchFromAPI(ctx context.Context) (*Database, error) { +// fetchFromAPI fetches the models.dev database. +// If etag is non-empty it is sent as If-None-Match; a 304 response +// returns (nil, etag, nil) to indicate no change. +func fetchFromAPI(ctx context.Context, etag string) (*Database, string, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, ModelsDevAPIURL, http.NoBody) if err != nil { - return nil, fmt.Errorf("failed to create request: %w", err) + return nil, "", fmt.Errorf("failed to create request: %w", err) + } + + if etag != "" { + req.Header.Set("If-None-Match", etag) } resp, err := (&http.Client{Timeout: 30 * time.Second}).Do(req) if err != nil { - return nil, fmt.Errorf("failed to fetch from API: %w", err) + return nil, "", fmt.Errorf("failed to fetch from API: %w", err) } defer resp.Body.Close() + if resp.StatusCode == http.StatusNotModified { + slog.Debug("models.dev data not modified (304)") + return nil, etag, nil + } + if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("API returned status %d", resp.StatusCode) + return nil, "", fmt.Errorf("API returned status %d", resp.StatusCode) + } + + // Read the full body then unmarshal — avoids the extra intermediate + // buffering that json.Decoder.Decode performs. + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, "", fmt.Errorf("failed to read response body: %w", err) } var providers map[string]Provider - if err := json.NewDecoder(resp.Body).Decode(&providers); err != nil { - return nil, fmt.Errorf("failed to decode response: %w", err) + if err := json.Unmarshal(body, &providers); err != nil { + return nil, "", fmt.Errorf("failed to decode response: %w", err) } + newETag := resp.Header.Get("ETag") + return &Database{ Providers: providers, UpdatedAt: time.Now(), - }, nil + }, newETag, nil } func loadFromCache(cacheFile string) (*CachedData, error) { - f, err := os.Open(cacheFile) + data, err := os.ReadFile(cacheFile) if err != nil { - return nil, fmt.Errorf("failed to open cache file: %w", err) + return nil, fmt.Errorf("failed to read cache file: %w", err) } - defer f.Close() var cached CachedData - if err := json.NewDecoder(f).Decode(&cached); err != nil { + if err := json.Unmarshal(data, &cached); err != nil { return nil, fmt.Errorf("failed to decode cached data: %w", err) } return &cached, nil } -func saveToCache(cacheFile string, database *Database) error { +func saveToCache(cacheFile string, database *Database, etag string) error { now := time.Now() cached := CachedData{ Database: *database, CachedAt: now, LastRefresh: now, + ETag: etag, } data, err := json.MarshalIndent(cached, "", " ") diff --git a/pkg/modelsdev/types.go b/pkg/modelsdev/types.go index 24ceac06d..a9a63a847 100644 --- a/pkg/modelsdev/types.go +++ b/pkg/modelsdev/types.go @@ -62,4 +62,5 @@ type CachedData struct { Database Database `json:"database"` CachedAt time.Time `json:"cached_at"` LastRefresh time.Time `json:"last_refresh"` + ETag string `json:"etag,omitempty"` } diff --git a/pkg/teamloader/teamloader.go b/pkg/teamloader/teamloader.go index c538e9066..b5ec970f3 100644 --- a/pkg/teamloader/teamloader.go +++ b/pkg/teamloader/teamloader.go @@ -286,6 +286,9 @@ func getModelsForAgent(ctx context.Context, cfg *latest.Config, a *latest.AgentC var models []provider.Provider thinkingConfigured := false + // Obtain the singleton store once, outside the loop. + modelsStore, modelsStoreErr := modelsdev.NewStore() + for name := range strings.SplitSeq(a.Model, ",") { modelCfg, exists := cfg.Models[name] isAutoModel := false @@ -310,11 +313,7 @@ func getModelsForAgent(ctx context.Context, cfg *latest.Config, a *latest.AgentC maxTokens := &defaultMaxTokens if modelCfg.MaxTokens != nil { maxTokens = modelCfg.MaxTokens - } else { - modelsStore, err := modelsdev.NewStore() - if err != nil { - return nil, false, err - } + } else if modelsStoreErr == nil { m, err := modelsStore.GetModel(ctx, modelCfg.Provider+"/"+modelCfg.Model) if err == nil { maxTokens = &m.Limit.Output @@ -355,6 +354,9 @@ func getModelsForAgent(ctx context.Context, cfg *latest.Config, a *latest.AgentC func getFallbackModelsForAgent(ctx context.Context, cfg *latest.Config, a *latest.AgentConfig, runConfig *config.RuntimeConfig) ([]provider.Provider, error) { var fallbackModels []provider.Provider + // Obtain the singleton store once, outside the loop. + modelsStore, modelsStoreErr := modelsdev.NewStore() + for _, name := range a.GetFallbackModels() { modelCfg, exists := cfg.Models[name] if !exists { @@ -371,11 +373,7 @@ func getFallbackModelsForAgent(ctx context.Context, cfg *latest.Config, a *lates maxTokens := &defaultMaxTokens if modelCfg.MaxTokens != nil { maxTokens = modelCfg.MaxTokens - } else { - modelsStore, err := modelsdev.NewStore() - if err != nil { - return nil, err - } + } else if modelsStoreErr == nil { m, err := modelsStore.GetModel(ctx, modelCfg.Provider+"/"+modelCfg.Model) if err == nil { maxTokens = &m.Limit.Output From 2b5b73846df658907a91229d04195288e8e7a2f2 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Mon, 16 Mar 2026 22:00:25 +0100 Subject: [PATCH 2/2] history: speed up load with ReadFile and strconv.Unquote Replace json.NewDecoder streaming decode with os.ReadFile + string splitting + strconv.Unquote. The history file stores one JSON-quoted string per line; Unquote handles the same escapes without the full JSON state machine and reflection overhead. Pre-size slices by counting newlines upfront and use map[string]struct{} for deduplication to reduce allocation count. Benchmarked on a 658KB / 7348-line history file: Before: 3.05ms, 15703 allocs, 2.32MB After: 1.41ms, 2606 allocs, 2.96MB Assisted-By: docker-agent --- pkg/history/history.go | 51 +++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/pkg/history/history.go b/pkg/history/history.go index 51633e599..a6ec8cf72 100644 --- a/pkg/history/history.go +++ b/pkg/history/history.go @@ -2,10 +2,10 @@ package history import ( "encoding/json" - "io" "os" "path/filepath" "slices" + "strconv" "strings" ) @@ -208,32 +208,53 @@ func (h *History) append(message string) error { } func (h *History) load() error { - f, err := os.Open(h.path) + data, err := os.ReadFile(h.path) if err != nil { return err } - defer f.Close() - var all []string - dec := json.NewDecoder(f) - for { - var message string - if err := dec.Decode(&message); err != nil { - if err == io.EOF { - break - } + // Count lines to pre-size the slice. + n := 0 + for _, b := range data { + if b == '\n' { + n++ + } + } + + // Parse all lines. Each line is a JSON-encoded string (e.g. "hello"). + // strconv.Unquote handles the same escape sequences as JSON and is + // much faster than json.Unmarshal for quoted strings. + all := make([]string, 0, n) + s := string(data) + for s != "" { + i := strings.IndexByte(s, '\n') + var line string + if i < 0 { + line = s + s = "" + } else { + line = s[:i] + s = s[i+1:] + } + if line == "" { + continue + } + + message, err := strconv.Unquote(line) + if err != nil { continue } all = append(all, message) } - // Deduplicate keeping the latest occurrence of each message - seen := make(map[string]bool) + // Deduplicate keeping the latest occurrence of each message. + seen := make(map[string]struct{}, len(all)) + h.Messages = make([]string, 0, len(all)) for i := len(all) - 1; i >= 0; i-- { - if seen[all[i]] { + if _, dup := seen[all[i]]; dup { continue } - seen[all[i]] = true + seen[all[i]] = struct{}{} h.Messages = append(h.Messages, all[i]) } slices.Reverse(h.Messages)