goobits · mudcube · Jun 9, 2026
diff --git a/.jules/bolt.md b/.jules/bolt.md
@@ -17,3 +17,7 @@
 ## 2025-05-19 - File Discovery Allocations
 **Learning:** In `discover_importing_files`, `WalkBuilder` results were being converted to `PathBuf` via `.map(|e| e.into_path())` *before* filtering. This caused allocations for every single file in the workspace (including excluded files and directories).
 **Action:** Filter `ignore::DirEntry` directly using `entry.file_type()` and `entry.path()` before mapping to `PathBuf`. This avoids allocations for non-matching files.
+
+## 2024-06-09 - String Match Line Number Allocations
+**Learning:** In `find_literal_matches`, calculating line and column numbers by rescanning the string from index 0 for every match found via `.match_indices()` results in O(N*M) time complexity.
+**Action:** Track line and column numbers statefully by maintaining `current_byte_idx`, `current_line`, and `current_column` variables across the loop, counting newlines only in the string slice between the previous match and the current match to achieve O(N) complexity.
diff --git a/crates/mill-handlers/src/handlers/workspace/literal_matcher.rs b/crates/mill-handlers/src/handlers/workspace/literal_matcher.rs
@@ -59,30 +59,6 @@ fn has_word_boundary_at(content: &str, byte_pos: usize) -> bool {
         .unwrap_or(true)
 }
 
-/// Convert byte offset to line and column (both 1-indexed)
-///
-/// This handles UTF-8 correctly by counting characters, not bytes.
-/// Line breaks are detected by '\n' (supports Unix, Windows CRLF is counted correctly).
-fn byte_offset_to_line_column(content: &str, byte_offset: usize) -> (u32, u32) {
-    let mut line = 1u32;
-    let mut column = 1u32;
-
-    for (byte_idx, ch) in content.char_indices() {
-        if byte_idx >= byte_offset {
-            break;
-        }
-
-        if ch == '\n' {
-            line += 1;
-            column = 1;
-        } else {
-            column += 1;
-        }
-    }
-
-    (line, column)
-}
-
 /// Find all literal matches of a pattern in content
 ///
 /// # Arguments
@@ -111,6 +87,9 @@ pub fn find_literal_matches(content: &str, pattern: &str, whole_word: bool) -> V
     }
 
     let mut matches = Vec::new();
+    let mut current_byte_idx = 0;
+    let mut current_line = 1u32;
+    let mut current_column = 1u32;
 
     // Use efficient string search (std::str::match_indices uses Boyer-Moore-like algorithm)
     for (byte_offset, matched_str) in content.match_indices(pattern) {
@@ -143,14 +122,23 @@ pub fn find_literal_matches(content: &str, pattern: &str, whole_word: bool) -> V
             }
         }
 
-        let (line, column) = byte_offset_to_line_column(content, byte_offset);
+        // Update line and column based on characters since the last match
+        for ch in content[current_byte_idx..byte_offset].chars() {
+            if ch == '\n' {
+                current_line += 1;
+                current_column = 1;
+            } else {
+                current_column += 1;
+            }
+        }
+        current_byte_idx = byte_offset;
 
         matches.push(Match {
             start_byte: byte_offset,
             end_byte: byte_offset + pattern.len(),
             matched_text: matched_str.to_string(),
-            line,
-            column,
+            line: current_line,
+            column: current_column,
         });
     }
 
@@ -284,20 +272,6 @@ mod tests {
         assert_eq!(matches[1].column, 8); // After emoji
     }
 
-    #[test]
-    fn test_line_column_calculation() {
-        let content = "abc\ndefgh\nijkl";
-        // Position of 'i' is at byte 10, line 3, column 1
-        let (line, column) = byte_offset_to_line_column(content, 10);
-        assert_eq!(line, 3);
-        assert_eq!(column, 1);
-
-        // Position of 'e' is at byte 5, line 2, column 2
-        let (line, column) = byte_offset_to_line_column(content, 5);
-        assert_eq!(line, 2);
-        assert_eq!(column, 2);
-    }
-
     #[test]
     fn test_windows_line_endings() {
         let content = "line1\r\nline2\r\nline3";