Skip to content

Commit 71b8079

Browse files
committed
fix: cleanup bug in lrucache
1 parent cf3d0ac commit 71b8079

2 files changed

Lines changed: 76 additions & 13 deletions

File tree

design-process/6-iteration.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,16 @@ A very naive implementation of round tripper has been added which takes control
1616

1717
Inspired by traefik's internally used safe package, a helper function GoWithRecover is put in utils for error handling in cases of panic. Previous the upstream was importing discovery and the relationship was pretty messed up. Now all discovery related code is taken out of upstream package. Now upstream is a dependency of discovery and not the other way around.
1818

19+
## Bug in LRUCache
20+
21+
The cleanup function for a key was cancelled only on the whole Cache reset. But when the same key is Set again or the key is deleted, we still want to stop the cleanup goroutines as either that key no longer exists(deleted) or a new cleanup function exists for it(updated).
22+
I feel uncomfortable having one cleanup goroutine per key. Adding this to the TODO. Maybe a single separate garbage collector go routine that will be listening on Events will be better.
23+
1924
## TODO
2025

2126
- Better error handling and context passing.
2227
- TLS termination support
2328
- More efficient route matching
2429
- Plugins for most common use cases like - traffic split, auth, redirects, circuit breaking etc.
2530
- Support for docker as service discoverer.
31+
- Refactor LRU cache cleanup to be more efficient.

pkg/cache/lru.go

Lines changed: 70 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,36 +18,73 @@ type Node[T any] struct {
1818
}
1919

2020
type LRUCache[T any] struct {
21-
ctx context.Context
22-
cancel context.CancelFunc
2321
Head *Node[T]
2422
Tail *Node[T]
2523
m map[string]*Node[T] // ← Store NODES, not just values
2624
maxSize int
2725
mx sync.Mutex
2826
log *logger.Logger
27+
28+
//for managing the cleanup goroutines for TTL
29+
ttlCtx context.Context
30+
ttlCancel context.CancelFunc
31+
keyCancels map[string]context.CancelFunc
32+
ttlWg sync.WaitGroup
2933
}
3034

3135
func NewLRUCache[T any](size int, logger *logger.Logger) *LRUCache[T] {
32-
ctx, cancel := context.WithCancel(context.Background()) // should parent context be passed?
36+
// Note: Its okay to use Background here as this is the context that will be
37+
// used for all TTL goroutines. Passing context from NewLRUCache will complicate things for no reason.
38+
// LRUCache exposes a Destroy() method for cleanup which the caller must respect.
39+
ctx, cancel := context.WithCancel(context.Background())
3340
return &LRUCache[T]{
34-
m: make(map[string]*Node[T]),
35-
maxSize: size,
36-
ctx: ctx,
37-
cancel: cancel,
38-
log: logger.WithComponent("LRUCache"),
41+
m: make(map[string]*Node[T]),
42+
maxSize: size,
43+
log: logger.WithComponent("LRUCache"),
44+
ttlCtx: ctx,
45+
keyCancels: make(map[string]context.CancelFunc),
46+
ttlCancel: cancel,
47+
}
48+
}
49+
50+
func (nlru *LRUCache[T]) Stop() {
51+
nlru.log.Debugf("Stopping LRU Cache TTL goroutines")
52+
nlru.mx.Lock()
53+
defer nlru.mx.Unlock()
54+
55+
if nlru.ttlCancel != nil {
56+
nlru.ttlCancel()
3957
}
58+
nlru.ttlWg.Wait()
4059
}
4160

61+
// Resets but keeps the cache operational
4262
func (nlru *LRUCache[T]) Reset() {
4363
nlru.log.Debugf("Resetting LRU Cache")
64+
nlru.Stop()
65+
4466
nlru.mx.Lock()
4567
defer nlru.mx.Unlock()
46-
nlru.cancel()
47-
nlru.ctx, nlru.cancel = context.WithCancel(context.Background())
4868
nlru.Head = nil
4969
nlru.Tail = nil
5070
nlru.m = make(map[string]*Node[T])
71+
72+
// recreate context for TTL goroutines
73+
nlru.ttlCtx, nlru.ttlCancel = context.WithCancel(context.Background())
74+
}
75+
76+
// Destroys completely - cannot be used after this
77+
func (nlru *LRUCache[T]) Destroy() {
78+
nlru.log.Debugf("Destroying LRU Cache")
79+
nlru.Stop()
80+
nlru.mx.Lock()
81+
defer nlru.mx.Unlock()
82+
nlru.Head = nil
83+
nlru.Tail = nil
84+
nlru.m = nil
85+
nlru.ttlCtx = nil
86+
nlru.keyCancels = nil
87+
nlru.ttlCancel = nil
5188
}
5289

5390
func (nlru *LRUCache[T]) DebugGet() map[string]T {
@@ -68,6 +105,14 @@ func (nlru *LRUCache[T]) PrintList() string {
68105
return ans
69106
}
70107

108+
func (lru *LRUCache[T]) cancelKeyTTL(key string) {
109+
if cancel, exists := lru.keyCancels[key]; exists {
110+
lru.log.Debugf("Cancelling TTL goroutine for key %s", key)
111+
cancel()
112+
delete(lru.keyCancels, key)
113+
}
114+
}
115+
71116
// O(1) pop - no traversal needed!
72117
func (lru *LRUCache[T]) pop(node *Node[T]) {
73118
if node == nil {
@@ -130,6 +175,8 @@ func (lru *LRUCache[T]) Delete(key string) (ok bool) {
130175

131176
if node, ok := lru.m[key]; ok {
132177
lru.pop(node)
178+
// Cancel any existing TTL goroutine for this key
179+
lru.cancelKeyTTL(key)
133180
delete(lru.m, key)
134181
return true
135182
}
@@ -141,6 +188,8 @@ func (lru *LRUCache[T]) poptail() {
141188
if lru.Tail == nil {
142189
return
143190
}
191+
// Cancel TTL cleanup go routine for the evicted key
192+
lru.cancelKeyTTL(lru.Tail.key)
144193

145194
// Remove from map and list
146195
delete(lru.m, lru.Tail.key)
@@ -154,6 +203,8 @@ func (lru *LRUCache[T]) Set(key string, val T, ttl time.Duration) {
154203

155204
// Check if key already exists
156205
if existingNode, exists := lru.m[key]; exists {
206+
// Cancel any existing TTL goroutine for this key
207+
lru.cancelKeyTTL(key)
157208
// Update value and move to front
158209
existingNode.value = val
159210
lru.pop(existingNode)
@@ -175,19 +226,25 @@ func (lru *LRUCache[T]) Set(key string, val T, ttl time.Duration) {
175226
lru.push(newNode)
176227
}
177228

178-
// TTL handling (same as before)
229+
// TTL handling with proper context
179230
if ttl >= 0 {
231+
keyCtx, keyCancel := context.WithCancel(lru.ttlCtx)
232+
lru.keyCancels[key] = keyCancel
233+
lru.ttlWg.Add(1)
180234
utils.GoWithRecover(func() {
235+
defer lru.ttlWg.Done()
236+
181237
select {
182-
case <-lru.ctx.Done():
183-
lru.log.Debugf("context done, stopping TTL goroutine for key %s", key)
238+
case <-keyCtx.Done():
239+
lru.log.Debugf("TTL context done for key %s", key)
184240
return
185241
case <-time.After(ttl):
186242
lru.log.Debugf("TTL expired for key %s, deleting from LRU Cache after %v", key, ttl)
187243
lru.Delete(key)
188244
}
189245
}, func(err any) {
190246
lru.log.Infof("panic in LRU cache ttl goroutine: %v", err)
247+
lru.ttlWg.Done()
191248
})
192249
}
193250
}

0 commit comments

Comments
 (0)