From 1fd3d046ebfd60634b6adbc743e94fdc26026080 Mon Sep 17 00:00:00 2001 From: korboybeats Date: Sat, 21 Feb 2026 18:52:23 +0900 Subject: [PATCH] Fix hooks hanging on repos with nested gitignored git repos Two fixes: 1. scanner/astgrep.go: ScanDirectory now detects subdirectories with their own .git and passes --globs exclusions to ast-grep so it doesn't scan them. Previously ast-grep treated nested .git dirs as separate repo boundaries, ignoring the parent .gitignore and scanning thousands of files indefinitely. 2. cmd/hooks.go: findChildRepos now uses git check-ignore to skip gitignored child repos. Previously it would run full codemap trees on massive ignored directories like vendored dependencies. --- cmd/hooks.go | 34 ++++++++++++++++++++++++++++++---- scanner/astgrep.go | 32 +++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/cmd/hooks.go b/cmd/hooks.go index dfa7601..7877a50 100644 --- a/cmd/hooks.go +++ b/cmd/hooks.go @@ -820,21 +820,47 @@ func (h *hubInfo) isHub(path string) bool { return len(h.Importers[path]) >= 3 } -// findChildRepos returns subdirectories that are git repositories +// findChildRepos returns subdirectories that are git repositories, +// excluding any that are listed in the parent's .gitignore. func findChildRepos(root string) []string { entries, err := os.ReadDir(root) if err != nil { return nil } - var repos []string + // Check which directories are git-ignored using git check-ignore + // This is more reliable than parsing .gitignore ourselves since it + // handles all gitignore semantics (negation, nested, global, etc.) + var candidates []string for _, e := range entries { if !e.IsDir() || strings.HasPrefix(e.Name(), ".") { continue } - // Check if this subdirectory is a git repo if _, err := os.Stat(filepath.Join(root, e.Name(), ".git")); err == nil { - repos = append(repos, e.Name()) + candidates = append(candidates, e.Name()) + } + } + + if len(candidates) == 0 { + return nil + } + + // Use git check-ignore to filter out ignored directories + args := append([]string{"check-ignore", "--"}, candidates...) + cmd := exec.Command("git", args...) + cmd.Dir = root + out, _ := cmd.Output() + ignored := make(map[string]bool) + for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { + if line != "" { + ignored[line] = true + } + } + + var repos []string + for _, name := range candidates { + if !ignored[name] { + repos = append(repos, name) } } return repos diff --git a/scanner/astgrep.go b/scanner/astgrep.go index 8d994ee..4906728 100644 --- a/scanner/astgrep.go +++ b/scanner/astgrep.go @@ -100,6 +100,28 @@ func (s *AstGrepScanner) Available() bool { return s.binary != "" } +// findNestedGitRepos returns subdirectory names that contain their own .git +// These are separate repositories (not submodules) that should be excluded +// from scanning to avoid hanging on large nested repos. +func findNestedGitRepos(root string) []string { + entries, err := os.ReadDir(root) + if err != nil { + return nil + } + + var repos []string + for _, e := range entries { + if !e.IsDir() || strings.HasPrefix(e.Name(), ".") { + continue + } + gitPath := filepath.Join(root, e.Name(), ".git") + if info, err := os.Stat(gitPath); err == nil && info.IsDir() { + repos = append(repos, e.Name()) + } + } + return repos +} + // ScanDirectory analyzes all files in a directory using sg scan func (s *AstGrepScanner) ScanDirectory(root string) ([]FileAnalysis, error) { if !s.Available() { @@ -119,7 +141,15 @@ func (s *AstGrepScanner) ScanDirectory(root string) ([]FileAnalysis, error) { } inlineRules := strings.Join(rules, "\n---\n") - cmd := exec.Command(s.binary, "scan", "--inline-rules", inlineRules, "--json", root) + // Build command args, excluding nested git repos that ast-grep would + // treat as separate repo boundaries (ignoring parent .gitignore) + args := []string{"scan", "--inline-rules", inlineRules, "--json"} + for _, repo := range findNestedGitRepos(root) { + args = append(args, "--globs", "!"+repo+"/**") + } + args = append(args, root) + + cmd := exec.Command(s.binary, args...) out, err := cmd.CombinedOutput() if err != nil { // sg scan returns non-zero if no matches, check if output contains JSON