diff --git a/src/edit/lossless.rs b/src/edit/lossless.rs index 19c3d0f..c7e183c 100644 --- a/src/edit/lossless.rs +++ b/src/edit/lossless.rs @@ -731,6 +731,49 @@ mod tests { assert_eq!(stats.additions, 2); } + #[test] + fn test_hunk_does_not_absorb_next_file_metadata() { + // git diff puts `diff --git` and `index` lines between files. The + // parser must not absorb them as context lines of the previous hunk + // — that would inflate the hunk's line counts and produce spurious + // hunk-line-count-mismatch diagnostics. + let text = "\ +diff --git a/f1 b/f1 +index aaa..bbb 100644 +--- a/f1 ++++ b/f1 +@@ -1,2 +1,3 @@ + ctx1 ++add1 + ctx2 +diff --git a/f2 b/f2 +index ccc..ddd 100644 +--- a/f2 ++++ b/f2 +@@ -1,1 +1,1 @@ +-old ++new +"; + let parsed = parse(text); + let patch = parsed.tree(); + let files: Vec<_> = patch.patch_files().collect(); + assert_eq!(files.len(), 2); + + let hunk1 = files[0].hunks().next().unwrap(); + let stats1 = hunk1.stats(); + assert_eq!(stats1.context, 2); + assert_eq!(stats1.additions, 1); + assert_eq!(stats1.deletions, 0); + assert_eq!(hunk1.header().unwrap().check_counts(&hunk1), vec![]); + + let hunk2 = files[1].hunks().next().unwrap(); + let stats2 = hunk2.stats(); + assert_eq!(stats2.context, 0); + assert_eq!(stats2.additions, 1); + assert_eq!(stats2.deletions, 1); + assert_eq!(hunk2.header().unwrap().check_counts(&hunk2), vec![]); + } + #[test] fn test_check_counts_mismatch() { let text = "--- a/f\n+++ b/f\n@@ -1,99 +1,99 @@\n ctx\n-old\n+new\n"; diff --git a/src/edit/parse.rs b/src/edit/parse.rs index 69050ee..ccfac29 100644 --- a/src/edit/parse.rs +++ b/src/edit/parse.rs @@ -243,7 +243,7 @@ impl<'a> Parser<'a> { // Continue parsing lines that might belong to this invalid hunk // until we find another hunk or file boundary - while !self.at_end() && !self.is_hunk_end() { + while !self.at_end() && !self.is_hunk_boundary() { self.skip_to_next_line(); } return; @@ -364,13 +364,40 @@ impl<'a> Parser<'a> { self.builder.finish_node(); } - fn is_hunk_end(&self) -> bool { - // Check if we're at the start of a new hunk or file + /// Returns true at a hunk or file boundary (`@@`, `---`, `+++`). + /// Used for recovery loops that want to skip until the next known marker. + fn is_hunk_boundary(&self) -> bool { (self.at(SyntaxKind::AT) && self.peek_text(0) == Some("@@")) || (self.at(SyntaxKind::MINUS) && self.peek_text(0) == Some("---")) || (self.at(SyntaxKind::PLUS) && self.peek_text(0) == Some("+++")) } + /// Returns true when the current token can no longer be part of a + /// unified hunk body. A valid body line must start with one of: + /// ' ' (context) + /// '+' (addition) — but `+++` is a new-file header + /// '-' (deletion) — but `---` is an old-file header + /// '\\' (e.g. "\ No newline at end of file") + /// '\n' (an empty line, treated as empty context) + /// Anything else — `diff --git`, `index`, `Binary files`, free text, + /// etc. — means the hunk body has ended. Without this stricter check, + /// surrounding metadata gets silently absorbed into the previous hunk + /// as context lines, inflating its line counts. + fn is_hunk_end(&self) -> bool { + match self.current_kind() { + Some(SyntaxKind::AT) if self.peek_text(0) == Some("@@") => true, + Some(SyntaxKind::MINUS) if self.peek_text(0) == Some("---") => true, + Some(SyntaxKind::PLUS) if self.peek_text(0) == Some("+++") => true, + Some(SyntaxKind::SPACE) + | Some(SyntaxKind::PLUS) + | Some(SyntaxKind::MINUS) + | Some(SyntaxKind::BACKSLASH) + | Some(SyntaxKind::NEWLINE) => false, + None => true, + _ => true, + } + } + fn current_kind(&self) -> Option { self.tokens.get(self.cursor).map(|(kind, _)| *kind) }