diff --git a/cmd/hooks.go b/cmd/hooks.go index de10223..da6d04e 100644 --- a/cmd/hooks.go +++ b/cmd/hooks.go @@ -9,9 +9,11 @@ import ( "os/exec" "path/filepath" "regexp" + "strconv" "strings" "time" + "codemap/limits" "codemap/scanner" "codemap/watch" ) @@ -23,10 +25,15 @@ type hubInfo struct { Imports map[string][]string } -// getHubInfo returns hub info from daemon state (fast) or fresh scan (slow) +// getHubInfo returns hub info from daemon state. +// If daemon isn't running, falls back to a fresh scan. func getHubInfo(root string) *hubInfo { - // Try daemon state first (instant) if state := watch.ReadState(root); state != nil { + // State may contain file/event info only (no dependency graph) on very + // large repos. Avoid expensive fallback scans in that case. + if len(state.Importers) == 0 && len(state.Imports) == 0 && len(state.Hubs) == 0 { + return nil + } return &hubInfo{ Hubs: state.Hubs, Importers: state.Importers, @@ -34,7 +41,12 @@ func getHubInfo(root string) *hubInfo { } } - // Fall back to fresh scan (slower) + // If daemon is running but state is unavailable, skip expensive fallback. + if watch.IsRunning(root) { + return nil + } + + // Fall back to fresh scan only when daemon is not running. fg, err := scanner.BuildFileGraph(root) if err != nil { return nil @@ -47,6 +59,17 @@ func getHubInfo(root string) *hubInfo { } } +func waitForDaemonState(root string, timeout time.Duration) *watch.State { + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if state := watch.ReadState(root); state != nil { + return state + } + time.Sleep(100 * time.Millisecond) + } + return nil +} + // RunHook executes the named hook with the given project root func RunHook(hookName, root string) error { switch hookName { @@ -94,13 +117,55 @@ func hookSessionStart(root string) error { fmt.Println("📍 Project Context:") fmt.Println() - // Run codemap to show full tree structure + // IMPORTANT: Hook output goes directly into Claude's "Messages" context, not system prompt. + // This means hook output competes with conversation history for the ~200k token limit. + // A 1.3MB output (like a full tree of a 10k file repo) = ~500k tokens = instant context overflow. + // + // We enforce two limits: + // 1. Adaptive depth: larger repos get shallower trees (depth 2-4 based on file count) + // 2. Hard cap: 60KB max output (~15k tokens, <10% of context window) + // + // Future: Consider structured output that Claude Code can format/truncate intelligently. + fileCount := 0 + fileCountKnown := false + state := watch.ReadState(root) + if state == nil && watch.IsRunning(root) { + state = waitForDaemonState(root, 2*time.Second) + } + if state != nil { + fileCount = state.FileCount + fileCountKnown = true + } + exe, err := os.Executable() if err == nil { - cmd := exec.Command(exe, root) - cmd.Stdout = os.Stdout + depth := limits.AdaptiveDepth(fileCount) + cmd := exec.Command(exe, "--depth", strconv.Itoa(depth), root) + + // Capture output to enforce size limit + var buf strings.Builder + cmd.Stdout = &buf cmd.Stderr = os.Stderr cmd.Run() + + output := buf.String() + const maxBytes = limits.MaxContextOutputBytes + + if len(output) > maxBytes { + // Truncate and add warning + output = output[:maxBytes] + // Find last newline to avoid cutting mid-line + if idx := strings.LastIndex(output, "\n"); idx > maxBytes-1000 { + output = output[:idx] + } + repoSummary := "repo size unknown" + if fileCountKnown { + repoSummary = fmt.Sprintf("repo has %d files", fileCount) + } + output += "\n\n... (truncated - " + repoSummary + ", use `codemap .` for full tree)\n" + } + + fmt.Print(output) fmt.Println() } @@ -116,10 +181,12 @@ func hookSessionStart(root string) error { importers := len(info.Importers[hub]) fmt.Printf(" ⚠️ HUB FILE: %s (imported by %d files)\n", hub, importers) } + } else if fileCountKnown && fileCount > limits.LargeRepoFileCount { + fmt.Printf("ℹ️ Hub analysis skipped for large repo (%d files)\n", fileCount) } // Show diff vs main if on a feature branch - showDiffVsMain(root) + showDiffVsMain(root, fileCount, fileCountKnown) // Show last session context if resuming work if len(lastSessionEvents) > 0 { @@ -129,8 +196,9 @@ func hookSessionStart(root string) error { return nil } -// showDiffVsMain shows files changed on this branch vs main -func showDiffVsMain(root string) { +// showDiffVsMain shows files changed on this branch vs main. +// For large/unknown repos, uses lightweight git output to avoid expensive scans. +func showDiffVsMain(root string, fileCount int, fileCountKnown bool) { // Check if we're on a branch other than main branchCmd := exec.Command("git", "rev-parse", "--abbrev-ref", "HEAD") branchCmd.Dir = root @@ -151,12 +219,46 @@ func showDiffVsMain(root string) { fmt.Println() fmt.Printf("📝 Changes on branch '%s' vs main:\n", branch) + + // Unknown file count typically means daemon state is not ready. + // Use cheap git-based output in that case to avoid startup blowups. + if !fileCountKnown || fileCount > limits.LargeRepoFileCount { + showLightweightDiffVsMain(root) + return + } + + // Run codemap --diff to show richer impact analysis on manageable repos. cmd := exec.Command(exe, "--diff", root) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr cmd.Run() } +func showLightweightDiffVsMain(root string) { + cmd := exec.Command("git", "diff", "--name-only", "main...HEAD") + cmd.Dir = root + out, err := cmd.Output() + if err != nil { + fmt.Println(" (unable to read git diff)") + return + } + + lines := strings.Split(strings.TrimSpace(string(out)), "\n") + if len(lines) == 1 && lines[0] == "" { + fmt.Println(" No files changed") + return + } + + const maxShown = 20 + for i, line := range lines { + if i >= maxShown { + fmt.Printf(" ... and %d more files\n", len(lines)-maxShown) + break + } + fmt.Printf(" • %s\n", line) + } +} + // getLastSessionEvents reads events.log for previous session context func getLastSessionEvents(root string) []string { eventsFile := filepath.Join(root, ".codemap", "events.log") diff --git a/limits/limits.go b/limits/limits.go new file mode 100644 index 0000000..d98ae37 --- /dev/null +++ b/limits/limits.go @@ -0,0 +1,27 @@ +package limits + +// Context output budgets for hook and MCP text responses. +const ( + MaxContextOutputBytes = 60000 // ~15k tokens, <10% of a 200k context window +) + +// Repo-size thresholds used to scale expensive analysis work. +const ( + MediumRepoFileCount = 2000 + LargeRepoFileCount = 5000 +) + +// AdaptiveDepth returns a safe tree depth based on repository size. +// Unknown file count (<=0) defaults to a conservative depth. +func AdaptiveDepth(fileCount int) int { + if fileCount <= 0 { + return 2 + } + if fileCount > LargeRepoFileCount { + return 2 + } + if fileCount > MediumRepoFileCount { + return 3 + } + return 4 +} diff --git a/mcp/main.go b/mcp/main.go index 85d836d..a55aad9 100644 --- a/mcp/main.go +++ b/mcp/main.go @@ -14,6 +14,7 @@ import ( "sync" "time" + "codemap/limits" "codemap/render" "codemap/scanner" "codemap/watch" @@ -29,7 +30,8 @@ var ( // Input types for tools type PathInput struct { - Path string `json:"path" jsonschema:"Path to the project directory to analyze"` + Path string `json:"path" jsonschema:"Path to the project directory to analyze"` + Depth int `json:"depth,omitempty" jsonschema:"Optional tree depth (0 = adaptive default)"` } type DiffInput struct { @@ -177,10 +179,16 @@ func handleGetStructure(ctx context.Context, req *mcp.CallToolRequest, input Pat if err != nil { return errorResult("Scan error: " + err.Error()), nil, nil } + fileCount := len(files) + depth := input.Depth + if depth <= 0 { + depth = limits.AdaptiveDepth(fileCount) + } project := scanner.Project{ Root: absRoot, Mode: "tree", + Depth: depth, Files: files, } @@ -188,23 +196,47 @@ func handleGetStructure(ctx context.Context, req *mcp.CallToolRequest, input Pat render.Tree(&buf, project) output := stripANSI(buf.String()) - // Add hub file summary - fg, err := scanner.BuildFileGraph(input.Path) - if err == nil { - hubs := fg.HubFiles() - if len(hubs) > 0 { - output += "\n⚠️ HUB FILES (high-impact, 3+ dependents):\n" - // Sort by importer count - sort.Slice(hubs, func(i, j int) bool { - return len(fg.Importers[hubs[i]]) > len(fg.Importers[hubs[j]]) - }) - for i, hub := range hubs { - if i >= 5 { - output += fmt.Sprintf(" ... and %d more hubs\n", len(hubs)-5) - break - } - output += fmt.Sprintf(" %s (%d importers)\n", hub, len(fg.Importers[hub])) + // IMPORTANT: MCP tool output contributes to Claude's context window. + // Large repos can produce megabytes of tree output, causing instant context overflow. + // Cap at 60KB (~15k tokens) to stay under 10% of typical 200k context limit. + const maxBytes = limits.MaxContextOutputBytes + if len(output) > maxBytes { + output = output[:maxBytes] + // Find last newline to avoid cutting mid-line + if idx := strings.LastIndex(output, "\n"); idx > maxBytes-1000 { + output = output[:idx] + } + output += fmt.Sprintf("\n\n... (truncated - repo has %d files, use `codemap --depth N` for full tree)\n", fileCount) + } + + // Add hub file summary. Prefer daemon cache; avoid expensive graph builds on + // very large repos. + var hubs []string + var importers map[string][]string + if state := watch.ReadState(absRoot); state != nil && (len(state.Importers) > 0 || len(state.Hubs) > 0) { + hubs = append(hubs, state.Hubs...) + importers = state.Importers + } else if fileCount <= limits.LargeRepoFileCount { + fg, err := scanner.BuildFileGraph(absRoot) + if err == nil { + hubs = fg.HubFiles() + importers = fg.Importers + } + } else { + output += "\nℹ️ Hub analysis skipped for large repo (request get_hubs for explicit analysis)\n" + } + + if len(hubs) > 0 { + output += "\n⚠️ HUB FILES (high-impact, 3+ dependents):\n" + sort.Slice(hubs, func(i, j int) bool { + return len(importers[hubs[i]]) > len(importers[hubs[j]]) + }) + for i, hub := range hubs { + if i >= 5 { + output += fmt.Sprintf(" ... and %d more hubs\n", len(hubs)-5) + break } + output += fmt.Sprintf(" %s (%d importers)\n", hub, len(importers[hub])) } } diff --git a/watch/daemon.go b/watch/daemon.go index 4708b03..bc6e47c 100644 --- a/watch/daemon.go +++ b/watch/daemon.go @@ -8,6 +8,7 @@ import ( "strings" "time" + "codemap/limits" "codemap/scanner" "github.com/fsnotify/fsnotify" @@ -77,8 +78,14 @@ func (d *Daemon) Start() error { return fmt.Errorf("initial scan failed: %w", err) } - // Compute dependency graph (best effort - don't fail if deps unavailable) - d.computeDeps() + // Compute dependency graph (best effort). Skip on very large repos to avoid + // expensive startup memory/CPU spikes in background hook flows. + fileCount := d.FileCount() + if fileCount <= limits.LargeRepoFileCount { + d.computeDeps() + } else if d.verbose { + fmt.Printf("[watch] Skipping dependency graph for large repo (%d files)\n", fileCount) + } // Add directories to watcher if err := d.addWatchDirs(); err != nil { diff --git a/watch/events.go b/watch/events.go index 8010342..b0ad187 100644 --- a/watch/events.go +++ b/watch/events.go @@ -288,10 +288,6 @@ func (d *Daemon) writeState() { d.graph.mu.RLock() defer d.graph.mu.RUnlock() - if d.graph.FileGraph == nil { - return - } - // Get last 50 events for timeline events := d.graph.Events if len(events) > 50 { @@ -301,11 +297,16 @@ func (d *Daemon) writeState() { state := State{ UpdatedAt: time.Now(), FileCount: len(d.graph.Files), - Hubs: d.graph.FileGraph.HubFiles(), - Importers: d.graph.FileGraph.Importers, - Imports: d.graph.FileGraph.Imports, + Hubs: []string{}, + Importers: map[string][]string{}, + Imports: map[string][]string{}, RecentEvents: events, } + if d.graph.FileGraph != nil { + state.Hubs = d.graph.FileGraph.HubFiles() + state.Importers = d.graph.FileGraph.Importers + state.Imports = d.graph.FileGraph.Imports + } data, err := json.MarshalIndent(state, "", " ") if err != nil { diff --git a/watch/state.go b/watch/state.go index bbcf881..ced8e51 100644 --- a/watch/state.go +++ b/watch/state.go @@ -9,8 +9,8 @@ import ( "time" ) -// ReadState reads the daemon state from disk (for hooks to use) -// Returns nil if state doesn't exist or is stale (> 30 seconds old) +// ReadState reads the daemon state from disk (for hooks to use). +// Returns nil if state doesn't exist or if it's stale and daemon is not running. func ReadState(root string) *State { stateFile := filepath.Join(root, ".codemap", "state.json") data, err := os.ReadFile(stateFile) @@ -23,9 +23,10 @@ func ReadState(root string) *State { return nil } - // Check if state is fresh (daemon still running) - if time.Since(state.UpdatedAt) > 30*time.Second { - return nil // stale, daemon probably not running + // If state is stale, still allow it when daemon is alive. + // This avoids expensive fallback scans during idle periods. + if time.Since(state.UpdatedAt) > 30*time.Second && !IsRunning(root) { + return nil } return &state diff --git a/watch/state_test.go b/watch/state_test.go new file mode 100644 index 0000000..bc8075b --- /dev/null +++ b/watch/state_test.go @@ -0,0 +1,114 @@ +package watch + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + "time" + + "codemap/scanner" +) + +func TestReadStateStaleButRunning(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "codemap-state-test") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + codemapDir := filepath.Join(tmpDir, ".codemap") + if err := os.MkdirAll(codemapDir, 0755); err != nil { + t.Fatalf("Failed to create .codemap dir: %v", err) + } + + state := State{ + UpdatedAt: time.Now().Add(-2 * time.Minute), + FileCount: 42, + } + data, err := json.Marshal(state) + if err != nil { + t.Fatalf("Failed to marshal state: %v", err) + } + if err := os.WriteFile(filepath.Join(codemapDir, "state.json"), data, 0644); err != nil { + t.Fatalf("Failed to write state file: %v", err) + } + + // Simulate running daemon by pointing pid file to current process. + if err := WritePID(tmpDir); err != nil { + t.Fatalf("Failed to write pid file: %v", err) + } + defer RemovePID(tmpDir) + + got := ReadState(tmpDir) + if got == nil { + t.Fatal("Expected stale state to be returned when daemon is running") + } + if got.FileCount != 42 { + t.Fatalf("Expected file_count 42, got %d", got.FileCount) + } +} + +func TestReadStateStaleAndNotRunning(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "codemap-state-test") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + codemapDir := filepath.Join(tmpDir, ".codemap") + if err := os.MkdirAll(codemapDir, 0755); err != nil { + t.Fatalf("Failed to create .codemap dir: %v", err) + } + + state := State{ + UpdatedAt: time.Now().Add(-2 * time.Minute), + FileCount: 10, + } + data, err := json.Marshal(state) + if err != nil { + t.Fatalf("Failed to marshal state: %v", err) + } + if err := os.WriteFile(filepath.Join(codemapDir, "state.json"), data, 0644); err != nil { + t.Fatalf("Failed to write state file: %v", err) + } + + if got := ReadState(tmpDir); got != nil { + t.Fatal("Expected nil for stale state when daemon is not running") + } +} + +func TestWriteStateWithoutFileGraph(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "codemap-state-test") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + if err := os.MkdirAll(filepath.Join(tmpDir, ".codemap"), 0755); err != nil { + t.Fatalf("Failed to create .codemap dir: %v", err) + } + + d := &Daemon{ + root: tmpDir, + graph: &Graph{ + Files: map[string]*scanner.FileInfo{ + "main.go": {Path: "main.go", Ext: ".go"}, + }, + Events: []Event{}, + }, + } + + d.writeState() + + state := ReadState(tmpDir) + if state == nil { + t.Fatal("Expected state file to be written without file graph") + } + if state.FileCount != 1 { + t.Fatalf("Expected file_count 1, got %d", state.FileCount) + } + if len(state.Hubs) != 0 { + t.Fatalf("Expected 0 hubs without file graph, got %d", len(state.Hubs)) + } +}