Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@
## 2025-05-19 - File Discovery Allocations
**Learning:** In `discover_importing_files`, `WalkBuilder` results were being converted to `PathBuf` via `.map(|e| e.into_path())` *before* filtering. This caused allocations for every single file in the workspace (including excluded files and directories).
**Action:** Filter `ignore::DirEntry` directly using `entry.file_type()` and `entry.path()` before mapping to `PathBuf`. This avoids allocations for non-matching files.

## 2024-06-09 - String Match Line Number Allocations
**Learning:** In `find_literal_matches`, calculating line and column numbers by rescanning the string from index 0 for every match found via `.match_indices()` results in O(N*M) time complexity.
**Action:** Track line and column numbers statefully by maintaining `current_byte_idx`, `current_line`, and `current_column` variables across the loop, counting newlines only in the string slice between the previous match and the current match to achieve O(N) complexity.
56 changes: 15 additions & 41 deletions crates/mill-handlers/src/handlers/workspace/literal_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,30 +59,6 @@ fn has_word_boundary_at(content: &str, byte_pos: usize) -> bool {
.unwrap_or(true)
}

/// Convert byte offset to line and column (both 1-indexed)
///
/// This handles UTF-8 correctly by counting characters, not bytes.
/// Line breaks are detected by '\n' (supports Unix, Windows CRLF is counted correctly).
fn byte_offset_to_line_column(content: &str, byte_offset: usize) -> (u32, u32) {
let mut line = 1u32;
let mut column = 1u32;

for (byte_idx, ch) in content.char_indices() {
if byte_idx >= byte_offset {
break;
}

if ch == '\n' {
line += 1;
column = 1;
} else {
column += 1;
}
}

(line, column)
}

/// Find all literal matches of a pattern in content
///
/// # Arguments
Expand Down Expand Up @@ -111,6 +87,9 @@ pub fn find_literal_matches(content: &str, pattern: &str, whole_word: bool) -> V
}

let mut matches = Vec::new();
let mut current_byte_idx = 0;
let mut current_line = 1u32;
let mut current_column = 1u32;

// Use efficient string search (std::str::match_indices uses Boyer-Moore-like algorithm)
for (byte_offset, matched_str) in content.match_indices(pattern) {
Expand Down Expand Up @@ -143,14 +122,23 @@ pub fn find_literal_matches(content: &str, pattern: &str, whole_word: bool) -> V
}
}

let (line, column) = byte_offset_to_line_column(content, byte_offset);
// Update line and column based on characters since the last match
for ch in content[current_byte_idx..byte_offset].chars() {
if ch == '\n' {
current_line += 1;
current_column = 1;
} else {
current_column += 1;
}
}
current_byte_idx = byte_offset;

matches.push(Match {
start_byte: byte_offset,
end_byte: byte_offset + pattern.len(),
matched_text: matched_str.to_string(),
line,
column,
line: current_line,
column: current_column,
});
}

Expand Down Expand Up @@ -284,20 +272,6 @@ mod tests {
assert_eq!(matches[1].column, 8); // After emoji
}

#[test]
fn test_line_column_calculation() {
let content = "abc\ndefgh\nijkl";
// Position of 'i' is at byte 10, line 3, column 1
let (line, column) = byte_offset_to_line_column(content, 10);
assert_eq!(line, 3);
assert_eq!(column, 1);

// Position of 'e' is at byte 5, line 2, column 2
let (line, column) = byte_offset_to_line_column(content, 5);
assert_eq!(line, 2);
assert_eq!(column, 2);
}

#[test]
fn test_windows_line_endings() {
let content = "line1\r\nline2\r\nline3";
Expand Down
Loading