diff --git a/.jules/bolt.md b/.jules/bolt.md index 541c0ab51..5cd37e0b7 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -17,3 +17,7 @@ ## 2025-05-19 - File Discovery Allocations **Learning:** In `discover_importing_files`, `WalkBuilder` results were being converted to `PathBuf` via `.map(|e| e.into_path())` *before* filtering. This caused allocations for every single file in the workspace (including excluded files and directories). **Action:** Filter `ignore::DirEntry` directly using `entry.file_type()` and `entry.path()` before mapping to `PathBuf`. This avoids allocations for non-matching files. + +## 2025-06-03 - Refactoring Allocations +**Learning:** In `refactoring.rs`, `insert_at_line`, `replace_range`, and `delete_range` were collecting all lines into `Vec` via `.map(|s| s.to_string()).collect()`, causing excessive O(N) memory allocations for every modification. +**Action:** Use string iterators, `String::with_capacity(source.len())`, and `.push_str()` instead of collecting into a `Vec` when modifying multiline source code, which avoids per-line allocations. diff --git a/crates/mill-lang-common/src/refactoring.rs b/crates/mill-lang-common/src/refactoring.rs index 598ded560..42b10c16d 100644 --- a/crates/mill-lang-common/src/refactoring.rs +++ b/crates/mill-lang-common/src/refactoring.rs @@ -334,54 +334,123 @@ impl LineExtractor { /// /// Returns the modified source code pub fn insert_at_line(source: &str, line: u32, text: &str) -> String { - let mut lines: Vec = source.lines().map(|s| s.to_string()).collect(); let insert_pos = line as usize; + let mut lines = source.lines(); + + // Quick check if line is beyond EOF (rough estimation via iteration) + // If we want exact line count, we would iterate fully, but we can just do it while building + + // Bolt performance optimization: use String::with_capacity and iterators instead of + // collecting all lines into a Vec and joining. This avoids O(N) string allocations. + let mut out = String::with_capacity(source.len() + text.len() + 1); + + for _ in 0..insert_pos { + if let Some(l) = lines.next() { + out.push_str(l); + out.push('\n'); + } else { + // insert_pos is > lines.len(), return original source + return source.to_string(); + } + } - if insert_pos > lines.len() { - return source.to_string(); + out.push_str(text); + out.push('\n'); + + for l in lines { + out.push_str(l); + out.push('\n'); } - lines.insert(insert_pos, text.to_string()); - lines.join("\n") + // Remove trailing newline if it wasn't there in the original (unless text brought its own newline at the end) + // Actually, lines.join("\n") doesn't leave a trailing newline unless lines is empty or we explicitly add it. + // source.lines() doesn't include the trailing newline. + if out.ends_with('\n') { + out.pop(); + } + + out } /// Replace a range of lines with new text /// /// Returns the modified source code pub fn replace_range(source: &str, range: CodeRange, new_text: &str) -> String { - let mut lines: Vec = source.lines().map(|s| s.to_string()).collect(); - let start = range.start_line as usize; let end = range.end_line as usize; - if start >= lines.len() || end >= lines.len() { - return source.to_string(); + // Bolt performance optimization: use iterators instead of collecting into Vec + let mut lines = source.lines(); + let mut out = String::with_capacity(source.len() + new_text.len()); + + for _ in 0..start { + if let Some(l) = lines.next() { + out.push_str(l); + out.push('\n'); + } else { + return source.to_string(); + } + } + + // Drain the replaced lines + for _ in start..=end { + if lines.next().is_none() { + return source.to_string(); + } + } + + for new_line in new_text.lines() { + out.push_str(new_line); + out.push('\n'); } - // Remove old lines - lines.drain(start..=end); + for l in lines { + out.push_str(l); + out.push('\n'); + } - // Insert new text (split into lines if necessary) - for (i, new_line) in new_text.lines().enumerate() { - lines.insert(start + i, new_line.to_string()); + if out.ends_with('\n') { + out.pop(); } - lines.join("\n") + out } /// Delete a range of lines pub fn delete_range(source: &str, range: CodeRange) -> String { - let mut lines: Vec = source.lines().map(|s| s.to_string()).collect(); - let start = range.start_line as usize; let end = range.end_line as usize; - if start >= lines.len() || end >= lines.len() { - return source.to_string(); + // Bolt performance optimization: use iterators instead of collecting into Vec + let mut lines = source.lines(); + let mut out = String::with_capacity(source.len()); + + for _ in 0..start { + if let Some(l) = lines.next() { + out.push_str(l); + out.push('\n'); + } else { + return source.to_string(); + } + } + + // Drain the deleted lines + for _ in start..=end { + if lines.next().is_none() { + return source.to_string(); + } + } + + for l in lines { + out.push_str(l); + out.push('\n'); + } + + if out.ends_with('\n') { + out.pop(); } - lines.drain(start..=end); - lines.join("\n") + out } }