breezy-team · jelmer · Apr 29, 2026 · Apr 29, 2026
@@ -731,6 +731,49 @@ mod tests {
         assert_eq!(stats.additions, 2);
     }
 
+    #[test]
+    fn test_hunk_does_not_absorb_next_file_metadata() {
+        // git diff puts `diff --git` and `index` lines between files. The
+        // parser must not absorb them as context lines of the previous hunk
+        // — that would inflate the hunk's line counts and produce spurious
+        // hunk-line-count-mismatch diagnostics.
+        let text = "\
+diff --git a/f1 b/f1
+index aaa..bbb 100644
+--- a/f1
++++ b/f1
+@@ -1,2 +1,3 @@
+ ctx1
++add1
+ ctx2
+diff --git a/f2 b/f2
+index ccc..ddd 100644
+--- a/f2
++++ b/f2
+@@ -1,1 +1,1 @@
+-old
++new
+";
+        let parsed = parse(text);
+        let patch = parsed.tree();
+        let files: Vec<_> = patch.patch_files().collect();
+        assert_eq!(files.len(), 2);
+
+        let hunk1 = files[0].hunks().next().unwrap();
+        let stats1 = hunk1.stats();
+        assert_eq!(stats1.context, 2);
+        assert_eq!(stats1.additions, 1);
+        assert_eq!(stats1.deletions, 0);
+        assert_eq!(hunk1.header().unwrap().check_counts(&hunk1), vec![]);
+
+        let hunk2 = files[1].hunks().next().unwrap();
+        let stats2 = hunk2.stats();
+        assert_eq!(stats2.context, 0);
+        assert_eq!(stats2.additions, 1);
+        assert_eq!(stats2.deletions, 1);
+        assert_eq!(hunk2.header().unwrap().check_counts(&hunk2), vec![]);
+    }
+
     #[test]
     fn test_check_counts_mismatch() {
         let text = "--- a/f\n+++ b/f\n@@ -1,99 +1,99 @@\n ctx\n-old\n+new\n";

@@ -243,7 +243,7 @@ impl<'a> Parser<'a> {
 
             // Continue parsing lines that might belong to this invalid hunk
             // until we find another hunk or file boundary
-            while !self.at_end() && !self.is_hunk_end() {
+            while !self.at_end() && !self.is_hunk_boundary() {
                 self.skip_to_next_line();
             }
             return;
@@ -364,13 +364,40 @@ impl<'a> Parser<'a> {
         self.builder.finish_node();
     }
 
-    fn is_hunk_end(&self) -> bool {
-        // Check if we're at the start of a new hunk or file
+    /// Returns true at a hunk or file boundary (`@@`, `---`, `+++`).
+    /// Used for recovery loops that want to skip until the next known marker.
+    fn is_hunk_boundary(&self) -> bool {
         (self.at(SyntaxKind::AT) && self.peek_text(0) == Some("@@"))
             || (self.at(SyntaxKind::MINUS) && self.peek_text(0) == Some("---"))
             || (self.at(SyntaxKind::PLUS) && self.peek_text(0) == Some("+++"))
     }
 
+    /// Returns true when the current token can no longer be part of a
+    /// unified hunk body. A valid body line must start with one of:
+    ///   ' '  (context)
+    ///   '+'  (addition)         — but `+++` is a new-file header
+    ///   '-'  (deletion)         — but `---` is an old-file header
+    ///   '\\' (e.g. "\ No newline at end of file")
+    ///   '\n' (an empty line, treated as empty context)
+    /// Anything else — `diff --git`, `index`, `Binary files`, free text,
+    /// etc. — means the hunk body has ended. Without this stricter check,
+    /// surrounding metadata gets silently absorbed into the previous hunk
+    /// as context lines, inflating its line counts.
+    fn is_hunk_end(&self) -> bool {
+        match self.current_kind() {
+            Some(SyntaxKind::AT) if self.peek_text(0) == Some("@@") => true,
+            Some(SyntaxKind::MINUS) if self.peek_text(0) == Some("---") => true,
+            Some(SyntaxKind::PLUS) if self.peek_text(0) == Some("+++") => true,
+            Some(SyntaxKind::SPACE)
+            | Some(SyntaxKind::PLUS)
+            | Some(SyntaxKind::MINUS)
+            | Some(SyntaxKind::BACKSLASH)
+            | Some(SyntaxKind::NEWLINE) => false,
+            None => true,
+            _ => true,
+        }
+    }
+
     fn current_kind(&self) -> Option<SyntaxKind> {
         self.tokens.get(self.cursor).map(|(kind, _)| *kind)
     }