fix: grep false negatives, output mangling, and truncation annotations

BadassBison · BadassBison · commit 2cc8a19a68ca · 2026-03-26T14:52:17.000-07:00
- grep: use --no-ignore-vcs so .gitignore'd files aren't silently skipped
  (matches grep -r behavior, avoids false negatives in large monorepos)
- grep: passthrough raw output for &lt;=50 matches so AI agents can parse
  standard file:line:content format without retry loops
- filter: replace smart_truncate heuristic with clean first-N-lines truncation
  and a single [X more lines] suffix (eliminates synthetic // ... markers
  that AI agents misread as code, causing parsing confusion and retries)
diff --git a/src/cmds/system/grep_cmd.rs b/src/cmds/system/grep_cmd.rs
@@ -28,7 +28,11 @@ pub fn run(
     let rg_pattern = pattern.replace(r"\|", "|");
 
     let mut rg_cmd = resolved_command("rg");
-    rg_cmd.args(["-n", "--no-heading", &rg_pattern, path]);
+    // --no-ignore-vcs: match grep -r behavior (don't skip .gitignore'd files).
+    // Without this, rg returns 0 matches for files in .gitignore, causing
+    // false negatives that make AI agents draw wrong conclusions.
+    // Using --no-ignore-vcs (not --no-ignore) so .ignore/.rgignore are still respected.
+    rg_cmd.args(["-n", "--no-heading", "--no-ignore-vcs", &rg_pattern, path]);
 
     if let Some(ft) = file_type {
         rg_cmd.arg("--type").arg(ft);
@@ -78,67 +82,112 @@ pub fn run(
         return Ok(());
     }
 
-    let mut by_file: HashMap<String, Vec<(usize, String)>> = HashMap::new();
-    let mut total = 0;
-
-    // Compile context regex once (instead of per-line in clean_line)
-    let context_re = if context_only {
-        Regex::new(&format!("(?i).{{0,20}}{}.*", regex::escape(pattern))).ok()
+    // Count total matches to decide output strategy
+    let total_matches = stdout.lines().count();
+
+    // Passthrough threshold: small results pass through raw so AI agents
+    // can parse standard grep output. Only use grouped format for large results
+    // where token savings are meaningful. The grouped format confuses AI agents
+    // on small result sets, causing retry loops that burn more tokens than saved.
+    let passthrough_threshold = 50;
+
+    let rtk_output = if total_matches <= passthrough_threshold {
+        // Small result set: pass through raw rg/grep output.
+        // Truncate individual lines but preserve standard file:line:content format.
+        let mut out = String::new();
+        let mut shown = 0;
+        for line in stdout.lines() {
+            if shown >= max_results {
+                break;
+            }
+            let parts: Vec<&str> = line.splitn(3, ':').collect();
+            if parts.len() == 3 {
+                let file = parts[0];
+                let line_num = parts[1];
+                let content = parts[2].trim();
+                if content.len() <= max_line_len {
+                    out.push_str(&format!("{}:{}:{}\n", file, line_num, content));
+                } else {
+                    let truncated: String = content.chars().take(max_line_len - 3).collect();
+                    out.push_str(&format!("{}:{}:{}...\n", file, line_num, truncated));
+                }
+            } else {
+                // Non-standard line (e.g., context separator), pass through
+                out.push_str(line);
+                out.push('\n');
+            }
+            shown += 1;
+        }
+        if total_matches > max_results {
+            out.push_str(&format!("... +{} matches\n", total_matches - max_results));
+        }
+        out
     } else {
-        None
-    };
+        // Large result set: use grouped format for token savings
+        let mut by_file: HashMap<String, Vec<(usize, String)>> = HashMap::new();
 
-    for line in stdout.lines() {
-        let parts: Vec<&str> = line.splitn(3, ':').collect();
-
-        let (file, line_num, content) = if parts.len() == 3 {
-            let ln = parts[1].parse().unwrap_or(0);
-            (parts[0].to_string(), ln, parts[2])
-        } else if parts.len() == 2 {
-            let ln = parts[0].parse().unwrap_or(0);
-            (path.to_string(), ln, parts[1])
+        let context_re = if context_only {
+            Regex::new(&format!("(?i).{{0,20}}{}.*", regex::escape(pattern))).ok()
         } else {
-            continue;
+            None
         };
 
-        total += 1;
-        let cleaned = clean_line(content, max_line_len, context_re.as_ref(), pattern);
-        by_file.entry(file).or_default().push((line_num, cleaned));
-    }
+        for line in stdout.lines() {
+            let parts: Vec<&str> = line.splitn(3, ':').collect();
 
-    let mut rtk_output = String::new();
-    rtk_output.push_str(&format!("{} matches in {}F:\n\n", total, by_file.len()));
-
-    let mut shown = 0;
-    let mut files: Vec<_> = by_file.iter().collect();
-    files.sort_by_key(|(f, _)| *f);
+            let (file, line_num, content) = if parts.len() == 3 {
+                let ln = parts[1].parse().unwrap_or(0);
+                (parts[0].to_string(), ln, parts[2])
+            } else if parts.len() == 2 {
+                let ln = parts[0].parse().unwrap_or(0);
+                (path.to_string(), ln, parts[1])
+            } else {
+                continue;
+            };
 
-    for (file, matches) in files {
-        if shown >= max_results {
-            break;
+            let cleaned = clean_line(content, max_line_len, context_re.as_ref(), pattern);
+            by_file.entry(file).or_default().push((line_num, cleaned));
         }
 
-        let file_display = compact_path(file);
-        rtk_output.push_str(&format!("[file] {} ({}):\n", file_display, matches.len()));
+        let mut out = String::new();
+        out.push_str(&format!(
+            "{} matches in {} files:\n\n",
+            total_matches,
+            by_file.len()
+        ));
+
+        let mut shown = 0;
+        let mut files: Vec<_> = by_file.iter().collect();
+        files.sort_by_key(|(f, _)| *f);
 
         let per_file = config::limits().grep_max_per_file;
-        for (line_num, content) in matches.iter().take(per_file) {
-            rtk_output.push_str(&format!("  {:>4}: {}\n", line_num, content));
-            shown += 1;
+        for (file, matches) in files {
             if shown >= max_results {
                 break;
             }
-        }
 
-        if matches.len() > per_file {
-            rtk_output.push_str(&format!("  +{}\n", matches.len() - per_file));
+            let file_display = compact_path(file);
+            out.push_str(&format!("[file] {} ({}):\n", file_display, matches.len()));
+
+            for (line_num, content) in matches.iter().take(per_file) {
+                out.push_str(&format!("  {:>4}: {}\n", line_num, content));
+                shown += 1;
+                if shown >= max_results {
+                    break;
+                }
+            }
+
+            if matches.len() > per_file {
+                out.push_str(&format!("  +{}\n", matches.len() - per_file));
+            }
+            out.push('\n');
         }
-        rtk_output.push('\n');
-    }
 
-    if total > shown {
-        rtk_output.push_str(&format!("... +{}\n", total - shown));
-    }
+        if total_matches > shown {
+            out.push_str(&format!("... +{}\n", total_matches - shown));
+        }
+        out
+    };
 
     print!("{}", rtk_output);
     timer.track(
@@ -320,4 +369,24 @@ mod tests {
         }
         // If rg is not installed, skip gracefully (test still passes)
     }
+
+    #[test]
+    fn test_rg_no_ignore_vcs_flag_accepted() {
+        // Verify rg accepts --no-ignore-vcs (used to match grep -r behavior for .gitignore)
+        let mut cmd = resolved_command("rg");
+        cmd.args([
+            "-n",
+            "--no-heading",
+            "--no-ignore-vcs",
+            "NONEXISTENT_PATTERN_12345",
+            ".",
+        ]);
+        if let Ok(output) = cmd.output() {
+            assert!(
+                output.status.code() == Some(1) || output.status.success(),
+                "rg --no-ignore-vcs should be accepted"
+            );
+        }
+        // If rg is not installed, skip gracefully (test still passes)
+    }
 }
diff --git a/src/core/filter.rs b/src/core/filter.rs
@@ -340,47 +340,13 @@ pub fn smart_truncate(content: &str, max_lines: usize, _lang: &Language) -> Stri
         return content.to_string();
     }
 
-    let mut result = Vec::with_capacity(max_lines);
-    let mut kept_lines = 0;
-    let mut skipped_section = false;
-
-    for line in &lines {
-        let trimmed = line.trim();
-
-        // Always keep signatures and important structural elements
-        let is_important = FUNC_SIGNATURE.is_match(trimmed)
-            || IMPORT_PATTERN.is_match(trimmed)
-            || trimmed.starts_with("pub ")
-            || trimmed.starts_with("export ")
-            || trimmed == "}"
-            || trimmed == "{";
-
-        if is_important || kept_lines < max_lines / 2 {
-            if skipped_section {
-                result.push(format!(
-                    "    // ... {} lines omitted",
-                    lines.len() - kept_lines
-                ));
-                skipped_section = false;
-            }
-            result.push((*line).to_string());
-            kept_lines += 1;
-        } else {
-            skipped_section = true;
-        }
-
-        if kept_lines >= max_lines - 1 {
-            break;
-        }
-    }
-
-    if skipped_section || kept_lines < lines.len() {
-        result.push(format!(
-            "// ... {} more lines (total: {})",
-            lines.len() - kept_lines,
-            lines.len()
-        ));
-    }
+    // Clean truncation: take first max_lines lines only.
+    // The old approach inserted synthetic "// ... N lines omitted" markers
+    // that AI agents treated as file content, causing parsing confusion
+    // and retry loops that burned more tokens than the filtering saved.
+    let mut result: Vec<String> = lines[..max_lines].iter().map(|l| (*l).to_string()).collect();
+    let omitted = lines.len() - max_lines;
+    result.push(format!("[{} more lines]", omitted));
 
     result.join("\n")
 }
@@ -499,9 +465,9 @@ fn main() {
     #[test]
     fn test_smart_truncate_overflow_count_exact() {
         // 200 plain-text lines with max_lines=20.
-        // smart_truncate keeps the first max_lines/2=10 lines, then skips the rest.
-        // The overflow message "// ... N more lines (total: T)" must satisfy:
-        //   kept_count + N == T
+        // smart_truncate keeps the first max_lines=20 lines.
+        // The overflow message "[N more lines]" must satisfy:
+        //   kept_count + N == total_lines
         let total_lines = 200usize;
         let max_lines = 20usize;
         let content: String = (0..total_lines)
@@ -538,4 +504,34 @@ fn main() {
             total_lines
         );
     }
+
+    #[test]
+    fn test_smart_truncate_no_annotations() {
+        let input = "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10\n";
+        let output = smart_truncate(input, 3, &Language::Unknown);
+        // Must NOT contain old-style "// ... N lines omitted" annotations
+        assert!(
+            !output.contains("// ..."),
+            "smart_truncate must not insert synthetic comment annotations"
+        );
+        // Must contain clean truncation marker
+        assert!(output.contains("[7 more lines]"));
+        // Must preserve first 3 lines verbatim
+        assert!(output.starts_with("line1\nline2\nline3\n"));
+    }
+
+    #[test]
+    fn test_smart_truncate_no_truncation_when_under_limit() {
+        let input = "a\nb\nc\n";
+        let output = smart_truncate(input, 10, &Language::Unknown);
+        assert_eq!(output, input);
+        assert!(!output.contains("more lines"));
+    }
+
+    #[test]
+    fn test_smart_truncate_exact_limit() {
+        let input = "a\nb\nc";
+        let output = smart_truncate(input, 3, &Language::Unknown);
+        assert_eq!(output, input);
+    }
 }