diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 3575f6d..b9cfe58 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -7,7 +7,11 @@ "Bash(grep:*)", "Bash(git commit:*)", "Bash(git push:*)", - "Bash(go:*)" + "Bash(go:*)", + "Bash(gh issue list:*)", + "Bash(gh issue view:*)", + "Bash(gh auth:*)", + "Bash(gh repo view:*)" ], "deny": [] } diff --git a/cmd/urlmap/main.go b/cmd/urlmap/main.go index 5d2f1d5..fe938d4 100644 --- a/cmd/urlmap/main.go +++ b/cmd/urlmap/main.go @@ -46,6 +46,11 @@ var ( jsThreshold float64 jsPoolSize int + // Cache flags + jsCacheEnabled bool + jsCacheSize int + jsCacheTTL time.Duration + // Robots.txt flags respectRobots bool ) @@ -105,6 +110,11 @@ func init() { // Browser pool flags rootCmd.Flags().IntVar(&jsPoolSize, "js-pool-size", 2, "Number of browser instances in the pool") + // Cache flags + rootCmd.Flags().BoolVar(&jsCacheEnabled, "js-cache", false, "Enable caching of JavaScript rendered pages") + rootCmd.Flags().IntVar(&jsCacheSize, "js-cache-size", 100, "Maximum number of cached entries") + rootCmd.Flags().DurationVar(&jsCacheTTL, "js-cache-ttl", 5*time.Minute, "Cache entry time-to-live") + // Robots.txt flags rootCmd.Flags().BoolVar(&respectRobots, "respect-robots", false, "Respect robots.txt rules and crawl delays") @@ -147,9 +157,12 @@ func runCrawl(cmd *cobra.Command, args []string) error { UserAgent: userAgent, Fallback: jsFallback, AutoDetect: jsAuto || jsAutoStrict, - StrictMode: jsAutoStrict, - Threshold: jsThreshold, - PoolSize: jsPoolSize, + StrictMode: jsAutoStrict, + Threshold: jsThreshold, + PoolSize: jsPoolSize, + CacheEnabled: jsCacheEnabled, + CacheSize: jsCacheSize, + CacheTTL: jsCacheTTL, } } diff --git a/internal/client/js_client.go b/internal/client/js_client.go index 94e73bd..627b213 100644 --- a/internal/client/js_client.go +++ b/internal/client/js_client.go @@ -12,6 +12,7 @@ type JSClient struct { pool *BrowserPool config *JSConfig logger *slog.Logger + cache *RenderCache } // NewJSClient creates a new JavaScript client with the given configuration @@ -40,6 +41,14 @@ func NewJSClient(config *JSConfig, logger *slog.Logger) (*JSClient, error) { logger: logger, } + // Create cache if enabled + if config.CacheEnabled { + client.cache = NewRenderCache(config.CacheSize, config.CacheTTL) + logger.Info("JavaScript render cache enabled", + "max_size", config.CacheSize, + "ttl", config.CacheTTL) + } + return client, nil } @@ -54,6 +63,30 @@ func (c *JSClient) RenderPage(ctx context.Context, targetURL string) (string, er // Get implements a similar interface to the HTTP client for compatibility func (c *JSClient) Get(ctx context.Context, targetURL string) (*JSResponse, error) { + // Check cache first if enabled + if c.cache != nil { + if entry, hit := c.cache.Get(targetURL); hit { + c.logger.Debug("Cache hit for URL", "url", targetURL) + + // Parse URL for response metadata + parsedURL, err := url.Parse(targetURL) + if err != nil { + return nil, fmt.Errorf("failed to parse URL: %w", err) + } + + return &JSResponse{ + URL: targetURL, + Content: entry.Content, + Status: entry.StatusCode, + Headers: entry.Headers, + Host: parsedURL.Host, + FromCache: true, + }, nil + } + c.logger.Debug("Cache miss for URL", "url", targetURL) + } + + // Not in cache, render the page content, err := c.RenderPage(ctx, targetURL) if err != nil { return nil, err @@ -65,13 +98,22 @@ func (c *JSClient) Get(ctx context.Context, targetURL string) (*JSResponse, erro return nil, fmt.Errorf("failed to parse URL: %w", err) } - return &JSResponse{ - URL: targetURL, - Content: content, - Status: 200, // Assume success if we got content - Headers: make(map[string]string), - Host: parsedURL.Host, - }, nil + response := &JSResponse{ + URL: targetURL, + Content: content, + Status: 200, // Assume success if we got content + Headers: make(map[string]string), + Host: parsedURL.Host, + FromCache: false, + } + + // Store in cache if enabled + if c.cache != nil { + c.cache.Set(targetURL, content, response.Headers, response.Status) + c.logger.Debug("Stored render result in cache", "url", targetURL) + } + + return response, nil } // Close cleans up the JavaScript client resources @@ -94,13 +136,22 @@ func (c *JSClient) GetPoolStats() map[string]interface{} { return c.pool.GetPoolStats() } +// GetCacheStats returns statistics about the render cache +func (c *JSClient) GetCacheStats() map[string]interface{} { + if c.cache == nil { + return nil + } + return c.cache.Stats() +} + // JSResponse represents a response from JavaScript rendering type JSResponse struct { - URL string - Content string - Status int - Headers map[string]string - Host string + URL string + Content string + Status int + Headers map[string]string + Host string + FromCache bool // Indicates if this response was served from cache } // String returns the rendered HTML content diff --git a/internal/client/js_client_test.go b/internal/client/js_client_test.go index 7357332..715259c 100644 --- a/internal/client/js_client_test.go +++ b/internal/client/js_client_test.go @@ -218,3 +218,162 @@ func TestJSResponse_StatusCode(t *testing.T) { t.Errorf("Expected status 200, got %d", status) } } + +func TestJSClient_CacheHit(t *testing.T) { + // Create test server + testServer := shared.CreateBasicTestServer() + defer testServer.Close() + + logger := slog.Default() + config := &JSConfig{ + Enabled: true, + BrowserType: "chromium", + Headless: true, + Timeout: 30 * time.Second, + WaitFor: "networkidle", + PoolSize: 1, + CacheEnabled: true, + CacheSize: 10, + CacheTTL: 1 * time.Hour, + } + + client, err := NewJSClient(config, logger) + if err != nil { + t.Fatalf("Failed to create JS client: %v", err) + } + defer client.Close() + + ctx := context.Background() + + // First request - should not be cached + response1, err := client.Get(ctx, testServer.URL) + if err != nil { + t.Fatalf("Failed to get page: %v", err) + } + + if response1.FromCache { + t.Error("First request should not be from cache") + } + + // Second request - should be cached + response2, err := client.Get(ctx, testServer.URL) + if err != nil { + t.Fatalf("Failed to get cached page: %v", err) + } + + if !response2.FromCache { + t.Error("Second request should be from cache") + } + + // Content should be the same + if response1.Content != response2.Content { + t.Error("Cached content should match original content") + } + + // Check cache stats + cacheStats := client.GetCacheStats() + if cacheStats == nil { + t.Fatal("Cache stats should not be nil") + } + + if cacheStats["size"].(int) != 1 { + t.Errorf("Expected cache size 1, got %v", cacheStats["size"]) + } +} + +func TestJSClient_CacheExpiration(t *testing.T) { + // Create test server + testServer := shared.CreateBasicTestServer() + defer testServer.Close() + + logger := slog.Default() + config := &JSConfig{ + Enabled: true, + BrowserType: "chromium", + Headless: true, + Timeout: 30 * time.Second, + WaitFor: "networkidle", + PoolSize: 1, + CacheEnabled: true, + CacheSize: 10, + CacheTTL: 100 * time.Millisecond, // Short TTL for testing + } + + client, err := NewJSClient(config, logger) + if err != nil { + t.Fatalf("Failed to create JS client: %v", err) + } + defer client.Close() + + ctx := context.Background() + + // First request + response1, err := client.Get(ctx, testServer.URL) + if err != nil { + t.Fatalf("Failed to get page: %v", err) + } + + if response1.FromCache { + t.Error("First request should not be from cache") + } + + // Wait for cache to expire + time.Sleep(150 * time.Millisecond) + + // Second request - should not be cached (expired) + response2, err := client.Get(ctx, testServer.URL) + if err != nil { + t.Fatalf("Failed to get page after expiration: %v", err) + } + + if response2.FromCache { + t.Error("Request after expiration should not be from cache") + } +} + +func TestJSClient_CacheDisabled(t *testing.T) { + // Create test server + testServer := shared.CreateBasicTestServer() + defer testServer.Close() + + logger := slog.Default() + config := &JSConfig{ + Enabled: true, + BrowserType: "chromium", + Headless: true, + Timeout: 30 * time.Second, + WaitFor: "networkidle", + PoolSize: 1, + CacheEnabled: false, // Cache disabled + } + + client, err := NewJSClient(config, logger) + if err != nil { + t.Fatalf("Failed to create JS client: %v", err) + } + defer client.Close() + + ctx := context.Background() + + // First request + response1, err := client.Get(ctx, testServer.URL) + if err != nil { + t.Fatalf("Failed to get page: %v", err) + } + + // Second request - should not be cached + response2, err := client.Get(ctx, testServer.URL) + if err != nil { + t.Fatalf("Failed to get page: %v", err) + } + + if response1.FromCache || response2.FromCache { + t.Error("No requests should be from cache when caching is disabled") + } + + // Cache stats should be nil + cacheStats := client.GetCacheStats() + if cacheStats != nil { + t.Error("Cache stats should be nil when caching is disabled") + } +} diff --git a/internal/client/js_config.go b/internal/client/js_config.go index 57202f3..60f9804 100644 --- a/internal/client/js_config.go +++ b/internal/client/js_config.go @@ -40,22 +40,34 @@ type JSConfig struct { // PoolSize specifies the number of browser instances in the pool PoolSize int + + // CacheEnabled indicates whether to cache rendered pages + CacheEnabled bool + + // CacheSize specifies the maximum number of cache entries + CacheSize int + + // CacheTTL specifies how long cache entries remain valid + CacheTTL time.Duration } // DefaultJSConfig returns a default JavaScript configuration func DefaultJSConfig() *JSConfig { return &JSConfig{ - Enabled: false, - BrowserType: "chromium", - Headless: true, - Timeout: 30 * time.Second, - WaitFor: "networkidle", - UserAgent: "urlmap/1.0", - AutoDetect: false, - StrictMode: false, - Threshold: 0.5, - Fallback: true, - PoolSize: 2, + Enabled: false, + BrowserType: "chromium", + Headless: true, + Timeout: 30 * time.Second, + WaitFor: "networkidle", + UserAgent: "urlmap/1.0", + AutoDetect: false, + StrictMode: false, + Threshold: 0.5, + Fallback: true, + PoolSize: 2, + CacheEnabled: false, + CacheSize: 100, + CacheTTL: 5 * time.Minute, } } @@ -101,5 +113,15 @@ func (c *JSConfig) Validate() error { return fmt.Errorf("pool size must be positive, got: %v", c.PoolSize) } + // Validate cache configuration + if c.CacheEnabled { + if c.CacheSize <= 0 { + return fmt.Errorf("cache size must be positive when cache is enabled, got: %v", c.CacheSize) + } + if c.CacheTTL <= 0 { + return fmt.Errorf("cache TTL must be positive when cache is enabled, got: %v", c.CacheTTL) + } + } + return nil } diff --git a/internal/client/render_cache.go b/internal/client/render_cache.go new file mode 100644 index 0000000..2fafde1 --- /dev/null +++ b/internal/client/render_cache.go @@ -0,0 +1,180 @@ +package client + +import ( + "sync" + "sync/atomic" + "time" +) + +// CacheEntry represents a cached render result +type CacheEntry struct { + URL string + Content string + Headers map[string]string + StatusCode int + Timestamp time.Time +} + +// RenderCache is a thread-safe in-memory cache for rendered pages +type RenderCache struct { + // Thread-safe storage + entries sync.Map // key: string (URL), value: *CacheEntry + + // Configuration + maxSize int64 // Maximum number of entries + ttl time.Duration // Time to live for cache entries + + // Size tracking + size int64 // Current number of entries (atomic) + + // For eviction + accessOrder sync.Map // key: string (URL), value: time.Time (last access time) + mu sync.Mutex +} + +// NewRenderCache creates a new render cache with the given configuration +func NewRenderCache(maxSize int, ttl time.Duration) *RenderCache { + return &RenderCache{ + maxSize: int64(maxSize), + ttl: ttl, + size: 0, + } +} + +// Get retrieves a cached entry if it exists and is not expired +func (c *RenderCache) Get(url string) (*CacheEntry, bool) { + // Try to get the entry + value, exists := c.entries.Load(url) + if !exists { + return nil, false + } + + entry := value.(*CacheEntry) + + // Check if the entry has expired + if time.Since(entry.Timestamp) > c.ttl { + // Remove expired entry + c.Delete(url) + return nil, false + } + + // Update access time + c.accessOrder.Store(url, time.Now()) + + return entry, true +} + +// Set stores a new cache entry, evicting oldest entries if necessary +func (c *RenderCache) Set(url string, content string, headers map[string]string, statusCode int) { + // Create new entry + entry := &CacheEntry{ + URL: url, + Content: content, + Headers: headers, + StatusCode: statusCode, + Timestamp: time.Now(), + } + + // Check if we're updating an existing entry + _, exists := c.entries.Load(url) + + // Store the entry + c.entries.Store(url, entry) + c.accessOrder.Store(url, time.Now()) + + // Update size if this is a new entry + if !exists { + newSize := atomic.AddInt64(&c.size, 1) + + // Check if we need to evict + if newSize > c.maxSize { + c.evictOldest() + } + } +} + +// Delete removes an entry from the cache +func (c *RenderCache) Delete(url string) { + if _, exists := c.entries.LoadAndDelete(url); exists { + c.accessOrder.Delete(url) + atomic.AddInt64(&c.size, -1) + } +} + +// evictOldest removes the least recently accessed entries until we're under maxSize +func (c *RenderCache) evictOldest() { + c.mu.Lock() + defer c.mu.Unlock() + + // Collect all entries with their access times + type accessEntry struct { + url string + accessTime time.Time + } + + var entries []accessEntry + c.accessOrder.Range(func(key, value interface{}) bool { + entries = append(entries, accessEntry{ + url: key.(string), + accessTime: value.(time.Time), + }) + return true + }) + + // Sort by access time (oldest first) + // Using simple bubble sort for small datasets + for i := 0; i < len(entries); i++ { + for j := i + 1; j < len(entries); j++ { + if entries[i].accessTime.After(entries[j].accessTime) { + entries[i], entries[j] = entries[j], entries[i] + } + } + } + + // Evict oldest entries until we're under maxSize + currentSize := atomic.LoadInt64(&c.size) + for i := 0; i < len(entries) && currentSize > c.maxSize; i++ { + c.Delete(entries[i].url) + currentSize = atomic.LoadInt64(&c.size) + } +} + +// Size returns the current number of entries in the cache +func (c *RenderCache) Size() int { + return int(atomic.LoadInt64(&c.size)) +} + +// Clear removes all entries from the cache +func (c *RenderCache) Clear() { + c.entries.Range(func(key, value interface{}) bool { + c.entries.Delete(key) + c.accessOrder.Delete(key) + return true + }) + atomic.StoreInt64(&c.size, 0) +} + +// Stats returns cache statistics +func (c *RenderCache) Stats() map[string]interface{} { + var expiredCount int + var validCount int + + c.entries.Range(func(key, value interface{}) bool { + entry := value.(*CacheEntry) + if time.Since(entry.Timestamp) > c.ttl { + expiredCount++ + } else { + validCount++ + } + return true + }) + + return map[string]interface{}{ + "size": c.Size(), + "max_size": c.maxSize, + "ttl_seconds": c.ttl.Seconds(), + "valid_entries": validCount, + "expired_entries": expiredCount, + } +} + diff --git a/internal/client/render_cache_test.go b/internal/client/render_cache_test.go new file mode 100644 index 0000000..b7812d7 --- /dev/null +++ b/internal/client/render_cache_test.go @@ -0,0 +1,355 @@ +package client + +import ( + "fmt" + "sync" + "testing" + "time" +) + +func TestNewRenderCache(t *testing.T) { + cache := NewRenderCache(100, 5*time.Minute) + + if cache == nil { + t.Fatal("NewRenderCache returned nil") + } + + if cache.maxSize != 100 { + t.Errorf("Expected maxSize 100, got %d", cache.maxSize) + } + + if cache.ttl != 5*time.Minute { + t.Errorf("Expected ttl 5 minutes, got %v", cache.ttl) + } + + if cache.Size() != 0 { + t.Errorf("Expected initial size 0, got %d", cache.Size()) + } +} + +func TestRenderCache_SetAndGet(t *testing.T) { + cache := NewRenderCache(10, 1*time.Hour) + + // Test basic set and get + url := "https://example.com" + content := "Test" + headers := map[string]string{"Content-Type": "text/html"} + statusCode := 200 + + cache.Set(url, content, headers, statusCode) + + // Test successful get + entry, found := cache.Get(url) + if !found { + t.Error("Expected to find cached entry") + } + + if entry.URL != url { + t.Errorf("Expected URL %s, got %s", url, entry.URL) + } + + if entry.Content != content { + t.Errorf("Expected content %s, got %s", content, entry.Content) + } + + if entry.StatusCode != statusCode { + t.Errorf("Expected status code %d, got %d", statusCode, entry.StatusCode) + } + + // Test cache size + if cache.Size() != 1 { + t.Errorf("Expected cache size 1, got %d", cache.Size()) + } +} + +func TestRenderCache_TTLExpiration(t *testing.T) { + cache := NewRenderCache(10, 100*time.Millisecond) + + url := "https://example.com" + content := "Test" + cache.Set(url, content, nil, 200) + + // Entry should be found immediately + _, found := cache.Get(url) + if !found { + t.Error("Expected to find cached entry immediately after setting") + } + + // Wait for TTL to expire + time.Sleep(150 * time.Millisecond) + + // Entry should be expired and removed + _, found = cache.Get(url) + if found { + t.Error("Expected entry to be expired and not found") + } + + // Size should be 0 after expiration + if cache.Size() != 0 { + t.Errorf("Expected cache size 0 after expiration, got %d", cache.Size()) + } +} + +func TestRenderCache_Eviction(t *testing.T) { + cache := NewRenderCache(3, 1*time.Hour) + + // Fill cache to capacity + cache.Set("url1", "content1", nil, 200) + time.Sleep(10 * time.Millisecond) + cache.Set("url2", "content2", nil, 200) + time.Sleep(10 * time.Millisecond) + cache.Set("url3", "content3", nil, 200) + + if cache.Size() != 3 { + t.Errorf("Expected cache size 3, got %d", cache.Size()) + } + + // Access url1 to make it more recently used + cache.Get("url1") + + // Add a fourth entry, should evict the oldest (url2) + cache.Set("url4", "content4", nil, 200) + + // Cache should still have size 3 + if cache.Size() != 3 { + t.Errorf("Expected cache size 3 after eviction, got %d", cache.Size()) + } + + // url2 should have been evicted (oldest non-accessed) + _, found := cache.Get("url2") + if found { + t.Error("Expected url2 to be evicted") + } + + // Other URLs should still be present + _, found = cache.Get("url1") + if !found { + t.Error("Expected url1 to still be in cache") + } + + _, found = cache.Get("url3") + if !found { + t.Error("Expected url3 to still be in cache") + } + + _, found = cache.Get("url4") + if !found { + t.Error("Expected url4 to still be in cache") + } +} + +func TestRenderCache_Delete(t *testing.T) { + cache := NewRenderCache(10, 1*time.Hour) + + url := "https://example.com" + cache.Set(url, "content", nil, 200) + + // Verify entry exists + _, found := cache.Get(url) + if !found { + t.Error("Expected to find cached entry") + } + + // Delete the entry + cache.Delete(url) + + // Verify entry is gone + _, found = cache.Get(url) + if found { + t.Error("Expected entry to be deleted") + } + + // Verify size is 0 + if cache.Size() != 0 { + t.Errorf("Expected cache size 0 after deletion, got %d", cache.Size()) + } +} + +func TestRenderCache_Clear(t *testing.T) { + cache := NewRenderCache(10, 1*time.Hour) + + // Add multiple entries + for i := 0; i < 5; i++ { + url := fmt.Sprintf("https://example.com/page%d", i) + cache.Set(url, fmt.Sprintf("content%d", i), nil, 200) + } + + if cache.Size() != 5 { + t.Errorf("Expected cache size 5, got %d", cache.Size()) + } + + // Clear the cache + cache.Clear() + + // Verify all entries are gone + if cache.Size() != 0 { + t.Errorf("Expected cache size 0 after clear, got %d", cache.Size()) + } + + // Verify individual entries are gone + for i := 0; i < 5; i++ { + url := fmt.Sprintf("https://example.com/page%d", i) + _, found := cache.Get(url) + if found { + t.Errorf("Expected %s to be cleared", url) + } + } +} + +func TestRenderCache_UpdateExisting(t *testing.T) { + cache := NewRenderCache(10, 1*time.Hour) + + url := "https://example.com" + cache.Set(url, "original content", nil, 200) + + if cache.Size() != 1 { + t.Errorf("Expected cache size 1, got %d", cache.Size()) + } + + // Update the same URL + cache.Set(url, "updated content", nil, 200) + + // Size should still be 1 + if cache.Size() != 1 { + t.Errorf("Expected cache size 1 after update, got %d", cache.Size()) + } + + // Content should be updated + entry, found := cache.Get(url) + if !found { + t.Error("Expected to find cached entry") + } + + if entry.Content != "updated content" { + t.Errorf("Expected updated content, got %s", entry.Content) + } +} + +func TestRenderCache_ConcurrentAccess(t *testing.T) { + cache := NewRenderCache(100, 1*time.Hour) + + // Number of goroutines and operations + numGoroutines := 10 + numOperations := 100 + + var wg sync.WaitGroup + wg.Add(numGoroutines) + + // Launch concurrent goroutines + for i := 0; i < numGoroutines; i++ { + go func(id int) { + defer wg.Done() + + for j := 0; j < numOperations; j++ { + url := fmt.Sprintf("https://example.com/page%d-%d", id, j) + + // Alternate between set and get operations + if j%2 == 0 { + cache.Set(url, fmt.Sprintf("content%d-%d", id, j), nil, 200) + } else { + // Try to get a previously set URL + prevURL := fmt.Sprintf("https://example.com/page%d-%d", id, j-1) + cache.Get(prevURL) + } + + // Occasionally delete an entry + if j%10 == 0 && j > 0 { + deleteURL := fmt.Sprintf("https://example.com/page%d-%d", id, j-10) + cache.Delete(deleteURL) + } + } + }(i) + } + + wg.Wait() + + // Verify cache is in a valid state + size := cache.Size() + if size < 0 || size > 100 { + t.Errorf("Cache size out of bounds: %d", size) + } + + // Verify we can still perform operations + cache.Set("final-test", "final-content", nil, 200) + entry, found := cache.Get("final-test") + if !found { + t.Error("Failed to set and get after concurrent access") + } + if entry.Content != "final-content" { + t.Errorf("Expected final-content, got %s", entry.Content) + } +} + +func TestRenderCache_Stats(t *testing.T) { + cache := NewRenderCache(10, 100*time.Millisecond) + + // Add some entries + cache.Set("url1", "content1", nil, 200) + cache.Set("url2", "content2", nil, 200) + + // Wait for entries to expire + time.Sleep(150 * time.Millisecond) + + // Add a fresh entry + cache.Set("url3", "content3", nil, 200) + + stats := cache.Stats() + + // Check stats structure + if stats["max_size"].(int64) != 10 { + t.Errorf("Expected max_size 10, got %v", stats["max_size"]) + } + + if stats["ttl_seconds"].(float64) != 0.1 { + t.Errorf("Expected ttl_seconds 0.1, got %v", stats["ttl_seconds"]) + } + + // Size should be 3 (2 expired + 1 valid) + if stats["size"].(int) != 3 { + t.Errorf("Expected size 3, got %v", stats["size"]) + } + + if stats["valid_entries"].(int) != 1 { + t.Errorf("Expected 1 valid entry, got %v", stats["valid_entries"]) + } + + if stats["expired_entries"].(int) != 2 { + t.Errorf("Expected 2 expired entries, got %v", stats["expired_entries"]) + } +} + +func TestRenderCache_ComplexEviction(t *testing.T) { + cache := NewRenderCache(5, 1*time.Hour) + + // Fill cache with entries accessed at different times + urls := []string{"url1", "url2", "url3", "url4", "url5"} + for i, url := range urls { + cache.Set(url, fmt.Sprintf("content%d", i), nil, 200) + time.Sleep(10 * time.Millisecond) + + // Access some entries to change their access time + if i == 1 { + cache.Get("url1") // Make url1 more recently accessed than url2 + } + } + + // Now access pattern: url5 (newest), url4, url3, url2, url1 (recently accessed) + // Add new entries to trigger eviction + cache.Set("url6", "content6", nil, 200) + + // url2 should be evicted (oldest access time among the original entries) + _, found := cache.Get("url2") + if found { + t.Error("Expected url2 to be evicted") + } + + // Check other URLs are still present + expectedPresent := []string{"url1", "url3", "url4", "url5", "url6"} + for _, url := range expectedPresent { + _, found := cache.Get(url) + if !found { + t.Errorf("Expected %s to be in cache", url) + } + } +} + diff --git a/urlmap b/urlmap index 9557cd6..51ddabf 100755 Binary files a/urlmap and b/urlmap differ