diff --git a/src/edit/lossless.rs b/src/edit/lossless.rs index a389b21..15150fb 100644 --- a/src/edit/lossless.rs +++ b/src/edit/lossless.rs @@ -797,6 +797,24 @@ index ccc..ddd 100644 assert_eq!(hunk.header().unwrap().check_counts(&hunk), vec![]); } + #[test] + fn test_path_with_special_characters() { + // Filenames legitimately contain `-`, `+`, `@`, `,` etc. The lexer + // emits each as its own token; the path parser must include them. + let text = "\ +--- a/foo-bar+baz@1.0,v ++++ b/foo-bar+baz@1.0,v +@@ -1 +1 @@ +-old ++new +"; + let parsed = parse(text); + let patch = parsed.tree(); + let file = patch.patch_files().next().unwrap(); + assert_eq!(file.old_path().as_deref(), Some("a/foo-bar+baz@1.0,v")); + assert_eq!(file.new_path().as_deref(), Some("b/foo-bar+baz@1.0,v")); + } + #[test] fn test_check_counts_mismatch() { let text = "--- a/f\n+++ b/f\n@@ -1,99 +1,99 @@\n ctx\n-old\n+new\n"; diff --git a/src/edit/parse.rs b/src/edit/parse.rs index ef1a17e..297acff 100644 --- a/src/edit/parse.rs +++ b/src/edit/parse.rs @@ -103,48 +103,9 @@ impl<'a> Parser<'a> { self.advance(); // - self.advance(); // - - // Skip whitespace self.skip_whitespace(); - - // Parse path - collect all tokens that make up the path - let mut path_parts = Vec::new(); - let mut collecting_path = true; - while !self.at(SyntaxKind::NEWLINE) && !self.at_end() && collecting_path { - match self.current_kind() { - Some(SyntaxKind::TEXT) - | Some(SyntaxKind::SLASH) - | Some(SyntaxKind::DOT) - | Some(SyntaxKind::NUMBER) - | Some(SyntaxKind::COLON) - | Some(SyntaxKind::BACKSLASH) => { - if let Some(text) = self.current_text() { - path_parts.push(text.to_string()); - } - self.advance_without_emit(); - } - Some(SyntaxKind::WHITESPACE) if !path_parts.is_empty() => { - // Stop at whitespace after we've collected some path parts (timestamp follows) - collecting_path = false; - } - _ => { - collecting_path = false; - } - } - } - - if !path_parts.is_empty() { - let path = path_parts.join(""); - self.builder.token(SyntaxKind::PATH.into(), &path); - } - - // Skip to end of line - while !self.at(SyntaxKind::NEWLINE) && !self.at_end() { - self.advance(); - } - - if self.at(SyntaxKind::NEWLINE) { - self.advance(); - } + self.parse_file_path(); + self.skip_to_eol(); self.builder.finish_node(); } @@ -157,48 +118,9 @@ impl<'a> Parser<'a> { self.advance(); // + self.advance(); // + - // Skip whitespace self.skip_whitespace(); - - // Parse path - collect all tokens that make up the path - let mut path_parts = Vec::new(); - let mut collecting_path = true; - while !self.at(SyntaxKind::NEWLINE) && !self.at_end() && collecting_path { - match self.current_kind() { - Some(SyntaxKind::TEXT) - | Some(SyntaxKind::SLASH) - | Some(SyntaxKind::DOT) - | Some(SyntaxKind::NUMBER) - | Some(SyntaxKind::COLON) - | Some(SyntaxKind::BACKSLASH) => { - if let Some(text) = self.current_text() { - path_parts.push(text.to_string()); - } - self.advance_without_emit(); - } - Some(SyntaxKind::WHITESPACE) if !path_parts.is_empty() => { - // Stop at whitespace after we've collected some path parts (timestamp follows) - collecting_path = false; - } - _ => { - collecting_path = false; - } - } - } - - if !path_parts.is_empty() { - let path = path_parts.join(""); - self.builder.token(SyntaxKind::PATH.into(), &path); - } - - // Skip to end of line - while !self.at(SyntaxKind::NEWLINE) && !self.at_end() { - self.advance(); - } - - if self.at(SyntaxKind::NEWLINE) { - self.advance(); - } + self.parse_file_path(); + self.skip_to_eol(); self.builder.finish_node(); } @@ -905,36 +827,28 @@ impl<'a> Parser<'a> { self.builder.finish_node(); } + /// Emit a single PATH token for the file path on a header line. + /// + /// The path runs from the current position to the next whitespace (which + /// separates it from an optional timestamp) or to end of line. Filenames + /// legitimately contain `-`, `+`, `@`, `,`, etc.; the lexer emits those + /// as their own token kinds, so we stitch any non-whitespace tokens back + /// together rather than maintaining an accept-list per character. fn parse_file_path(&mut self) { - let mut path_parts = Vec::new(); - let mut collecting_path = true; - - while !self.at(SyntaxKind::NEWLINE) && !self.at_end() && collecting_path { + let mut path = String::new(); + while !self.at_end() { match self.current_kind() { - Some(SyntaxKind::TEXT) - | Some(SyntaxKind::SLASH) - | Some(SyntaxKind::DOT) - | Some(SyntaxKind::NUMBER) - | Some(SyntaxKind::MINUS) - | Some(SyntaxKind::STAR) - | Some(SyntaxKind::COLON) - | Some(SyntaxKind::BACKSLASH) => { + Some(SyntaxKind::WHITESPACE) | Some(SyntaxKind::NEWLINE) | None => break, + Some(_) => { if let Some(text) = self.current_text() { - path_parts.push(text.to_string()); + path.push_str(text); } self.advance_without_emit(); } - Some(SyntaxKind::WHITESPACE) if !path_parts.is_empty() => { - collecting_path = false; - } - _ => { - collecting_path = false; - } } } - if !path_parts.is_empty() { - let path = path_parts.join(""); + if !path.is_empty() { self.builder.token(SyntaxKind::PATH.into(), &path); } }