Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 111 additions & 9 deletions cmd/hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ import (
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"

"codemap/limits"
"codemap/scanner"
"codemap/watch"
)
Expand All @@ -23,18 +25,28 @@ type hubInfo struct {
Imports map[string][]string
}

// getHubInfo returns hub info from daemon state (fast) or fresh scan (slow)
// getHubInfo returns hub info from daemon state.
// If daemon isn't running, falls back to a fresh scan.
func getHubInfo(root string) *hubInfo {
// Try daemon state first (instant)
if state := watch.ReadState(root); state != nil {
// State may contain file/event info only (no dependency graph) on very
// large repos. Avoid expensive fallback scans in that case.
if len(state.Importers) == 0 && len(state.Imports) == 0 && len(state.Hubs) == 0 {
return nil
}
return &hubInfo{
Hubs: state.Hubs,
Importers: state.Importers,
Imports: state.Imports,
}
}

// Fall back to fresh scan (slower)
// If daemon is running but state is unavailable, skip expensive fallback.
if watch.IsRunning(root) {
return nil
}

// Fall back to fresh scan only when daemon is not running.
fg, err := scanner.BuildFileGraph(root)
if err != nil {
return nil
Expand All @@ -47,6 +59,17 @@ func getHubInfo(root string) *hubInfo {
}
}

func waitForDaemonState(root string, timeout time.Duration) *watch.State {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
if state := watch.ReadState(root); state != nil {
return state
}
time.Sleep(100 * time.Millisecond)
}
return nil
}

// RunHook executes the named hook with the given project root
func RunHook(hookName, root string) error {
switch hookName {
Expand Down Expand Up @@ -94,13 +117,55 @@ func hookSessionStart(root string) error {
fmt.Println("📍 Project Context:")
fmt.Println()

// Run codemap to show full tree structure
// IMPORTANT: Hook output goes directly into Claude's "Messages" context, not system prompt.
// This means hook output competes with conversation history for the ~200k token limit.
// A 1.3MB output (like a full tree of a 10k file repo) = ~500k tokens = instant context overflow.
//
// We enforce two limits:
// 1. Adaptive depth: larger repos get shallower trees (depth 2-4 based on file count)
// 2. Hard cap: 60KB max output (~15k tokens, <10% of context window)
//
// Future: Consider structured output that Claude Code can format/truncate intelligently.
fileCount := 0
fileCountKnown := false
state := watch.ReadState(root)
if state == nil && watch.IsRunning(root) {
state = waitForDaemonState(root, 2*time.Second)
}
if state != nil {
fileCount = state.FileCount
fileCountKnown = true
}

exe, err := os.Executable()
if err == nil {
cmd := exec.Command(exe, root)
cmd.Stdout = os.Stdout
depth := limits.AdaptiveDepth(fileCount)
cmd := exec.Command(exe, "--depth", strconv.Itoa(depth), root)

// Capture output to enforce size limit
var buf strings.Builder
cmd.Stdout = &buf
cmd.Stderr = os.Stderr
cmd.Run()

output := buf.String()
const maxBytes = limits.MaxContextOutputBytes

if len(output) > maxBytes {
// Truncate and add warning
output = output[:maxBytes]
// Find last newline to avoid cutting mid-line
if idx := strings.LastIndex(output, "\n"); idx > maxBytes-1000 {
output = output[:idx]
}
repoSummary := "repo size unknown"
if fileCountKnown {
repoSummary = fmt.Sprintf("repo has %d files", fileCount)
}
output += "\n\n... (truncated - " + repoSummary + ", use `codemap .` for full tree)\n"
}

fmt.Print(output)
fmt.Println()
}

Expand All @@ -116,10 +181,12 @@ func hookSessionStart(root string) error {
importers := len(info.Importers[hub])
fmt.Printf(" ⚠️ HUB FILE: %s (imported by %d files)\n", hub, importers)
}
} else if fileCountKnown && fileCount > limits.LargeRepoFileCount {
fmt.Printf("ℹ️ Hub analysis skipped for large repo (%d files)\n", fileCount)
}

// Show diff vs main if on a feature branch
showDiffVsMain(root)
showDiffVsMain(root, fileCount, fileCountKnown)

// Show last session context if resuming work
if len(lastSessionEvents) > 0 {
Expand All @@ -129,8 +196,9 @@ func hookSessionStart(root string) error {
return nil
}

// showDiffVsMain shows files changed on this branch vs main
func showDiffVsMain(root string) {
// showDiffVsMain shows files changed on this branch vs main.
// For large/unknown repos, uses lightweight git output to avoid expensive scans.
func showDiffVsMain(root string, fileCount int, fileCountKnown bool) {
// Check if we're on a branch other than main
branchCmd := exec.Command("git", "rev-parse", "--abbrev-ref", "HEAD")
branchCmd.Dir = root
Expand All @@ -151,12 +219,46 @@ func showDiffVsMain(root string) {

fmt.Println()
fmt.Printf("📝 Changes on branch '%s' vs main:\n", branch)

// Unknown file count typically means daemon state is not ready.
// Use cheap git-based output in that case to avoid startup blowups.
if !fileCountKnown || fileCount > limits.LargeRepoFileCount {
showLightweightDiffVsMain(root)
return
}

// Run codemap --diff to show richer impact analysis on manageable repos.
cmd := exec.Command(exe, "--diff", root)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Run()
}

func showLightweightDiffVsMain(root string) {
cmd := exec.Command("git", "diff", "--name-only", "main...HEAD")
cmd.Dir = root
out, err := cmd.Output()
if err != nil {
fmt.Println(" (unable to read git diff)")
return
}

lines := strings.Split(strings.TrimSpace(string(out)), "\n")
if len(lines) == 1 && lines[0] == "" {
fmt.Println(" No files changed")
return
}

const maxShown = 20
for i, line := range lines {
if i >= maxShown {
fmt.Printf(" ... and %d more files\n", len(lines)-maxShown)
break
}
fmt.Printf(" • %s\n", line)
}
}

// getLastSessionEvents reads events.log for previous session context
func getLastSessionEvents(root string) []string {
eventsFile := filepath.Join(root, ".codemap", "events.log")
Expand Down
27 changes: 27 additions & 0 deletions limits/limits.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package limits

// Context output budgets for hook and MCP text responses.
const (
MaxContextOutputBytes = 60000 // ~15k tokens, <10% of a 200k context window
)

// Repo-size thresholds used to scale expensive analysis work.
const (
MediumRepoFileCount = 2000
LargeRepoFileCount = 5000
)

// AdaptiveDepth returns a safe tree depth based on repository size.
// Unknown file count (<=0) defaults to a conservative depth.
func AdaptiveDepth(fileCount int) int {
if fileCount <= 0 {
return 2
}
if fileCount > LargeRepoFileCount {
return 2
}
if fileCount > MediumRepoFileCount {
return 3
}
return 4
}
66 changes: 49 additions & 17 deletions mcp/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"sync"
"time"

"codemap/limits"
"codemap/render"
"codemap/scanner"
"codemap/watch"
Expand All @@ -29,7 +30,8 @@ var (

// Input types for tools
type PathInput struct {
Path string `json:"path" jsonschema:"Path to the project directory to analyze"`
Path string `json:"path" jsonschema:"Path to the project directory to analyze"`
Depth int `json:"depth,omitempty" jsonschema:"Optional tree depth (0 = adaptive default)"`
}

type DiffInput struct {
Expand Down Expand Up @@ -177,34 +179,64 @@ func handleGetStructure(ctx context.Context, req *mcp.CallToolRequest, input Pat
if err != nil {
return errorResult("Scan error: " + err.Error()), nil, nil
}
fileCount := len(files)
depth := input.Depth
if depth <= 0 {
depth = limits.AdaptiveDepth(fileCount)
}

project := scanner.Project{
Root: absRoot,
Mode: "tree",
Depth: depth,
Files: files,
}

var buf bytes.Buffer
render.Tree(&buf, project)
output := stripANSI(buf.String())

// Add hub file summary
fg, err := scanner.BuildFileGraph(input.Path)
if err == nil {
hubs := fg.HubFiles()
if len(hubs) > 0 {
output += "\n⚠️ HUB FILES (high-impact, 3+ dependents):\n"
// Sort by importer count
sort.Slice(hubs, func(i, j int) bool {
return len(fg.Importers[hubs[i]]) > len(fg.Importers[hubs[j]])
})
for i, hub := range hubs {
if i >= 5 {
output += fmt.Sprintf(" ... and %d more hubs\n", len(hubs)-5)
break
}
output += fmt.Sprintf(" %s (%d importers)\n", hub, len(fg.Importers[hub]))
// IMPORTANT: MCP tool output contributes to Claude's context window.
// Large repos can produce megabytes of tree output, causing instant context overflow.
// Cap at 60KB (~15k tokens) to stay under 10% of typical 200k context limit.
const maxBytes = limits.MaxContextOutputBytes
if len(output) > maxBytes {
output = output[:maxBytes]
// Find last newline to avoid cutting mid-line
if idx := strings.LastIndex(output, "\n"); idx > maxBytes-1000 {
output = output[:idx]
}
output += fmt.Sprintf("\n\n... (truncated - repo has %d files, use `codemap --depth N` for full tree)\n", fileCount)
}

// Add hub file summary. Prefer daemon cache; avoid expensive graph builds on
// very large repos.
var hubs []string
var importers map[string][]string
if state := watch.ReadState(absRoot); state != nil && (len(state.Importers) > 0 || len(state.Hubs) > 0) {
hubs = append(hubs, state.Hubs...)
importers = state.Importers
} else if fileCount <= limits.LargeRepoFileCount {
fg, err := scanner.BuildFileGraph(absRoot)
if err == nil {
hubs = fg.HubFiles()
importers = fg.Importers
}
} else {
output += "\nℹ️ Hub analysis skipped for large repo (request get_hubs for explicit analysis)\n"
}

if len(hubs) > 0 {
output += "\n⚠️ HUB FILES (high-impact, 3+ dependents):\n"
sort.Slice(hubs, func(i, j int) bool {
return len(importers[hubs[i]]) > len(importers[hubs[j]])
})
for i, hub := range hubs {
if i >= 5 {
output += fmt.Sprintf(" ... and %d more hubs\n", len(hubs)-5)
break
}
output += fmt.Sprintf(" %s (%d importers)\n", hub, len(importers[hub]))
}
}

Expand Down
11 changes: 9 additions & 2 deletions watch/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"strings"
"time"

"codemap/limits"
"codemap/scanner"

"github.com/fsnotify/fsnotify"
Expand Down Expand Up @@ -77,8 +78,14 @@ func (d *Daemon) Start() error {
return fmt.Errorf("initial scan failed: %w", err)
}

// Compute dependency graph (best effort - don't fail if deps unavailable)
d.computeDeps()
// Compute dependency graph (best effort). Skip on very large repos to avoid
// expensive startup memory/CPU spikes in background hook flows.
fileCount := d.FileCount()
if fileCount <= limits.LargeRepoFileCount {
d.computeDeps()
} else if d.verbose {
fmt.Printf("[watch] Skipping dependency graph for large repo (%d files)\n", fileCount)
}

// Add directories to watcher
if err := d.addWatchDirs(); err != nil {
Expand Down
15 changes: 8 additions & 7 deletions watch/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,6 @@ func (d *Daemon) writeState() {
d.graph.mu.RLock()
defer d.graph.mu.RUnlock()

if d.graph.FileGraph == nil {
return
}

// Get last 50 events for timeline
events := d.graph.Events
if len(events) > 50 {
Expand All @@ -301,11 +297,16 @@ func (d *Daemon) writeState() {
state := State{
UpdatedAt: time.Now(),
FileCount: len(d.graph.Files),
Hubs: d.graph.FileGraph.HubFiles(),
Importers: d.graph.FileGraph.Importers,
Imports: d.graph.FileGraph.Imports,
Hubs: []string{},
Importers: map[string][]string{},
Imports: map[string][]string{},
RecentEvents: events,
}
if d.graph.FileGraph != nil {
state.Hubs = d.graph.FileGraph.HubFiles()
state.Importers = d.graph.FileGraph.Importers
state.Imports = d.graph.FileGraph.Imports
}

data, err := json.MarshalIndent(state, "", " ")
if err != nil {
Expand Down
Loading
Loading