diff --git a/cmd/hooks.go b/cmd/hooks.go index dfa7601..7877a50 100644 --- a/cmd/hooks.go +++ b/cmd/hooks.go @@ -820,21 +820,47 @@ func (h *hubInfo) isHub(path string) bool { return len(h.Importers[path]) >= 3 } -// findChildRepos returns subdirectories that are git repositories +// findChildRepos returns subdirectories that are git repositories, +// excluding any that are listed in the parent's .gitignore. func findChildRepos(root string) []string { entries, err := os.ReadDir(root) if err != nil { return nil } - var repos []string + // Check which directories are git-ignored using git check-ignore + // This is more reliable than parsing .gitignore ourselves since it + // handles all gitignore semantics (negation, nested, global, etc.) + var candidates []string for _, e := range entries { if !e.IsDir() || strings.HasPrefix(e.Name(), ".") { continue } - // Check if this subdirectory is a git repo if _, err := os.Stat(filepath.Join(root, e.Name(), ".git")); err == nil { - repos = append(repos, e.Name()) + candidates = append(candidates, e.Name()) + } + } + + if len(candidates) == 0 { + return nil + } + + // Use git check-ignore to filter out ignored directories + args := append([]string{"check-ignore", "--"}, candidates...) + cmd := exec.Command("git", args...) + cmd.Dir = root + out, _ := cmd.Output() + ignored := make(map[string]bool) + for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { + if line != "" { + ignored[line] = true + } + } + + var repos []string + for _, name := range candidates { + if !ignored[name] { + repos = append(repos, name) } } return repos diff --git a/scanner/astgrep.go b/scanner/astgrep.go index 8d994ee..4906728 100644 --- a/scanner/astgrep.go +++ b/scanner/astgrep.go @@ -100,6 +100,28 @@ func (s *AstGrepScanner) Available() bool { return s.binary != "" } +// findNestedGitRepos returns subdirectory names that contain their own .git +// These are separate repositories (not submodules) that should be excluded +// from scanning to avoid hanging on large nested repos. +func findNestedGitRepos(root string) []string { + entries, err := os.ReadDir(root) + if err != nil { + return nil + } + + var repos []string + for _, e := range entries { + if !e.IsDir() || strings.HasPrefix(e.Name(), ".") { + continue + } + gitPath := filepath.Join(root, e.Name(), ".git") + if info, err := os.Stat(gitPath); err == nil && info.IsDir() { + repos = append(repos, e.Name()) + } + } + return repos +} + // ScanDirectory analyzes all files in a directory using sg scan func (s *AstGrepScanner) ScanDirectory(root string) ([]FileAnalysis, error) { if !s.Available() { @@ -119,7 +141,15 @@ func (s *AstGrepScanner) ScanDirectory(root string) ([]FileAnalysis, error) { } inlineRules := strings.Join(rules, "\n---\n") - cmd := exec.Command(s.binary, "scan", "--inline-rules", inlineRules, "--json", root) + // Build command args, excluding nested git repos that ast-grep would + // treat as separate repo boundaries (ignoring parent .gitignore) + args := []string{"scan", "--inline-rules", inlineRules, "--json"} + for _, repo := range findNestedGitRepos(root) { + args = append(args, "--globs", "!"+repo+"/**") + } + args = append(args, root) + + cmd := exec.Command(s.binary, args...) out, err := cmd.CombinedOutput() if err != nil { // sg scan returns non-zero if no matches, check if output contains JSON