Skip to content

Commit 2cc8a19

Browse files
committed
fix: grep false negatives, output mangling, and truncation annotations
- grep: use --no-ignore-vcs so .gitignore'd files aren't silently skipped (matches grep -r behavior, avoids false negatives in large monorepos) - grep: passthrough raw output for <=50 matches so AI agents can parse standard file:line:content format without retry loops - filter: replace smart_truncate heuristic with clean first-N-lines truncation and a single [X more lines] suffix (eliminates synthetic // ... markers that AI agents misread as code, causing parsing confusion and retries)
1 parent 2efe860 commit 2cc8a19

2 files changed

Lines changed: 154 additions & 89 deletions

File tree

src/cmds/system/grep_cmd.rs

Lines changed: 114 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ pub fn run(
2828
let rg_pattern = pattern.replace(r"\|", "|");
2929

3030
let mut rg_cmd = resolved_command("rg");
31-
rg_cmd.args(["-n", "--no-heading", &rg_pattern, path]);
31+
// --no-ignore-vcs: match grep -r behavior (don't skip .gitignore'd files).
32+
// Without this, rg returns 0 matches for files in .gitignore, causing
33+
// false negatives that make AI agents draw wrong conclusions.
34+
// Using --no-ignore-vcs (not --no-ignore) so .ignore/.rgignore are still respected.
35+
rg_cmd.args(["-n", "--no-heading", "--no-ignore-vcs", &rg_pattern, path]);
3236

3337
if let Some(ft) = file_type {
3438
rg_cmd.arg("--type").arg(ft);
@@ -78,67 +82,112 @@ pub fn run(
7882
return Ok(());
7983
}
8084

81-
let mut by_file: HashMap<String, Vec<(usize, String)>> = HashMap::new();
82-
let mut total = 0;
83-
84-
// Compile context regex once (instead of per-line in clean_line)
85-
let context_re = if context_only {
86-
Regex::new(&format!("(?i).{{0,20}}{}.*", regex::escape(pattern))).ok()
85+
// Count total matches to decide output strategy
86+
let total_matches = stdout.lines().count();
87+
88+
// Passthrough threshold: small results pass through raw so AI agents
89+
// can parse standard grep output. Only use grouped format for large results
90+
// where token savings are meaningful. The grouped format confuses AI agents
91+
// on small result sets, causing retry loops that burn more tokens than saved.
92+
let passthrough_threshold = 50;
93+
94+
let rtk_output = if total_matches <= passthrough_threshold {
95+
// Small result set: pass through raw rg/grep output.
96+
// Truncate individual lines but preserve standard file:line:content format.
97+
let mut out = String::new();
98+
let mut shown = 0;
99+
for line in stdout.lines() {
100+
if shown >= max_results {
101+
break;
102+
}
103+
let parts: Vec<&str> = line.splitn(3, ':').collect();
104+
if parts.len() == 3 {
105+
let file = parts[0];
106+
let line_num = parts[1];
107+
let content = parts[2].trim();
108+
if content.len() <= max_line_len {
109+
out.push_str(&format!("{}:{}:{}\n", file, line_num, content));
110+
} else {
111+
let truncated: String = content.chars().take(max_line_len - 3).collect();
112+
out.push_str(&format!("{}:{}:{}...\n", file, line_num, truncated));
113+
}
114+
} else {
115+
// Non-standard line (e.g., context separator), pass through
116+
out.push_str(line);
117+
out.push('\n');
118+
}
119+
shown += 1;
120+
}
121+
if total_matches > max_results {
122+
out.push_str(&format!("... +{} matches\n", total_matches - max_results));
123+
}
124+
out
87125
} else {
88-
None
89-
};
126+
// Large result set: use grouped format for token savings
127+
let mut by_file: HashMap<String, Vec<(usize, String)>> = HashMap::new();
90128

91-
for line in stdout.lines() {
92-
let parts: Vec<&str> = line.splitn(3, ':').collect();
93-
94-
let (file, line_num, content) = if parts.len() == 3 {
95-
let ln = parts[1].parse().unwrap_or(0);
96-
(parts[0].to_string(), ln, parts[2])
97-
} else if parts.len() == 2 {
98-
let ln = parts[0].parse().unwrap_or(0);
99-
(path.to_string(), ln, parts[1])
129+
let context_re = if context_only {
130+
Regex::new(&format!("(?i).{{0,20}}{}.*", regex::escape(pattern))).ok()
100131
} else {
101-
continue;
132+
None
102133
};
103134

104-
total += 1;
105-
let cleaned = clean_line(content, max_line_len, context_re.as_ref(), pattern);
106-
by_file.entry(file).or_default().push((line_num, cleaned));
107-
}
135+
for line in stdout.lines() {
136+
let parts: Vec<&str> = line.splitn(3, ':').collect();
108137

109-
let mut rtk_output = String::new();
110-
rtk_output.push_str(&format!("{} matches in {}F:\n\n", total, by_file.len()));
111-
112-
let mut shown = 0;
113-
let mut files: Vec<_> = by_file.iter().collect();
114-
files.sort_by_key(|(f, _)| *f);
138+
let (file, line_num, content) = if parts.len() == 3 {
139+
let ln = parts[1].parse().unwrap_or(0);
140+
(parts[0].to_string(), ln, parts[2])
141+
} else if parts.len() == 2 {
142+
let ln = parts[0].parse().unwrap_or(0);
143+
(path.to_string(), ln, parts[1])
144+
} else {
145+
continue;
146+
};
115147

116-
for (file, matches) in files {
117-
if shown >= max_results {
118-
break;
148+
let cleaned = clean_line(content, max_line_len, context_re.as_ref(), pattern);
149+
by_file.entry(file).or_default().push((line_num, cleaned));
119150
}
120151

121-
let file_display = compact_path(file);
122-
rtk_output.push_str(&format!("[file] {} ({}):\n", file_display, matches.len()));
152+
let mut out = String::new();
153+
out.push_str(&format!(
154+
"{} matches in {} files:\n\n",
155+
total_matches,
156+
by_file.len()
157+
));
158+
159+
let mut shown = 0;
160+
let mut files: Vec<_> = by_file.iter().collect();
161+
files.sort_by_key(|(f, _)| *f);
123162

124163
let per_file = config::limits().grep_max_per_file;
125-
for (line_num, content) in matches.iter().take(per_file) {
126-
rtk_output.push_str(&format!(" {:>4}: {}\n", line_num, content));
127-
shown += 1;
164+
for (file, matches) in files {
128165
if shown >= max_results {
129166
break;
130167
}
131-
}
132168

133-
if matches.len() > per_file {
134-
rtk_output.push_str(&format!(" +{}\n", matches.len() - per_file));
169+
let file_display = compact_path(file);
170+
out.push_str(&format!("[file] {} ({}):\n", file_display, matches.len()));
171+
172+
for (line_num, content) in matches.iter().take(per_file) {
173+
out.push_str(&format!(" {:>4}: {}\n", line_num, content));
174+
shown += 1;
175+
if shown >= max_results {
176+
break;
177+
}
178+
}
179+
180+
if matches.len() > per_file {
181+
out.push_str(&format!(" +{}\n", matches.len() - per_file));
182+
}
183+
out.push('\n');
135184
}
136-
rtk_output.push('\n');
137-
}
138185

139-
if total > shown {
140-
rtk_output.push_str(&format!("... +{}\n", total - shown));
141-
}
186+
if total_matches > shown {
187+
out.push_str(&format!("... +{}\n", total_matches - shown));
188+
}
189+
out
190+
};
142191

143192
print!("{}", rtk_output);
144193
timer.track(
@@ -320,4 +369,24 @@ mod tests {
320369
}
321370
// If rg is not installed, skip gracefully (test still passes)
322371
}
372+
373+
#[test]
374+
fn test_rg_no_ignore_vcs_flag_accepted() {
375+
// Verify rg accepts --no-ignore-vcs (used to match grep -r behavior for .gitignore)
376+
let mut cmd = resolved_command("rg");
377+
cmd.args([
378+
"-n",
379+
"--no-heading",
380+
"--no-ignore-vcs",
381+
"NONEXISTENT_PATTERN_12345",
382+
".",
383+
]);
384+
if let Ok(output) = cmd.output() {
385+
assert!(
386+
output.status.code() == Some(1) || output.status.success(),
387+
"rg --no-ignore-vcs should be accepted"
388+
);
389+
}
390+
// If rg is not installed, skip gracefully (test still passes)
391+
}
323392
}

src/core/filter.rs

Lines changed: 40 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -340,47 +340,13 @@ pub fn smart_truncate(content: &str, max_lines: usize, _lang: &Language) -> Stri
340340
return content.to_string();
341341
}
342342

343-
let mut result = Vec::with_capacity(max_lines);
344-
let mut kept_lines = 0;
345-
let mut skipped_section = false;
346-
347-
for line in &lines {
348-
let trimmed = line.trim();
349-
350-
// Always keep signatures and important structural elements
351-
let is_important = FUNC_SIGNATURE.is_match(trimmed)
352-
|| IMPORT_PATTERN.is_match(trimmed)
353-
|| trimmed.starts_with("pub ")
354-
|| trimmed.starts_with("export ")
355-
|| trimmed == "}"
356-
|| trimmed == "{";
357-
358-
if is_important || kept_lines < max_lines / 2 {
359-
if skipped_section {
360-
result.push(format!(
361-
" // ... {} lines omitted",
362-
lines.len() - kept_lines
363-
));
364-
skipped_section = false;
365-
}
366-
result.push((*line).to_string());
367-
kept_lines += 1;
368-
} else {
369-
skipped_section = true;
370-
}
371-
372-
if kept_lines >= max_lines - 1 {
373-
break;
374-
}
375-
}
376-
377-
if skipped_section || kept_lines < lines.len() {
378-
result.push(format!(
379-
"// ... {} more lines (total: {})",
380-
lines.len() - kept_lines,
381-
lines.len()
382-
));
383-
}
343+
// Clean truncation: take first max_lines lines only.
344+
// The old approach inserted synthetic "// ... N lines omitted" markers
345+
// that AI agents treated as file content, causing parsing confusion
346+
// and retry loops that burned more tokens than the filtering saved.
347+
let mut result: Vec<String> = lines[..max_lines].iter().map(|l| (*l).to_string()).collect();
348+
let omitted = lines.len() - max_lines;
349+
result.push(format!("[{} more lines]", omitted));
384350

385351
result.join("\n")
386352
}
@@ -499,9 +465,9 @@ fn main() {
499465
#[test]
500466
fn test_smart_truncate_overflow_count_exact() {
501467
// 200 plain-text lines with max_lines=20.
502-
// smart_truncate keeps the first max_lines/2=10 lines, then skips the rest.
503-
// The overflow message "// ... N more lines (total: T)" must satisfy:
504-
// kept_count + N == T
468+
// smart_truncate keeps the first max_lines=20 lines.
469+
// The overflow message "[N more lines]" must satisfy:
470+
// kept_count + N == total_lines
505471
let total_lines = 200usize;
506472
let max_lines = 20usize;
507473
let content: String = (0..total_lines)
@@ -538,4 +504,34 @@ fn main() {
538504
total_lines
539505
);
540506
}
507+
508+
#[test]
509+
fn test_smart_truncate_no_annotations() {
510+
let input = "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10\n";
511+
let output = smart_truncate(input, 3, &Language::Unknown);
512+
// Must NOT contain old-style "// ... N lines omitted" annotations
513+
assert!(
514+
!output.contains("// ..."),
515+
"smart_truncate must not insert synthetic comment annotations"
516+
);
517+
// Must contain clean truncation marker
518+
assert!(output.contains("[7 more lines]"));
519+
// Must preserve first 3 lines verbatim
520+
assert!(output.starts_with("line1\nline2\nline3\n"));
521+
}
522+
523+
#[test]
524+
fn test_smart_truncate_no_truncation_when_under_limit() {
525+
let input = "a\nb\nc\n";
526+
let output = smart_truncate(input, 10, &Language::Unknown);
527+
assert_eq!(output, input);
528+
assert!(!output.contains("more lines"));
529+
}
530+
531+
#[test]
532+
fn test_smart_truncate_exact_limit() {
533+
let input = "a\nb\nc";
534+
let output = smart_truncate(input, 3, &Language::Unknown);
535+
assert_eq!(output, input);
536+
}
541537
}

0 commit comments

Comments
 (0)