diff --git a/crates/perry-runtime/src/regex.rs b/crates/perry-runtime/src/regex.rs index ba328e12f3..7209705e6b 100644 --- a/crates/perry-runtime/src/regex.rs +++ b/crates/perry-runtime/src/regex.rs @@ -1885,4 +1885,24 @@ mod tests { assert_eq!(string_as_str(result), "hell0 w0rld"); } + + #[test] + fn escaped_hyphen_in_class_stays_literal() { + // #4425: `\-` inside a character class is always a literal hyphen. The + // Rust `regex` crate reads a bare `-` flanked by members as a range + // operator, so the escape must be preserved or `[a\- ]` translates to + // the invalid range `[a- ]`. + assert_eq!(js_regex_to_rust(r"[a\- ]"), r"[a\- ]"); + assert_eq!(js_regex_to_rust(r"[:\- ]"), r"[:\- ]"); + assert_eq!(js_regex_to_rust(r"[\-]"), r"[\-]"); + // Outside a class a hyphen carries no range meaning, so it stays bare. + assert_eq!(js_regex_to_rust(r"a\-b"), "a-b"); + + // The patterns that crashed `marked` at module-init must now compile. + for pat in [r"[a\- ]", r"[:\- ]", r" {0,3}\|?(?:[:\- ]*\|)+[\:\- ]*\n"] { + let flags = make_string(""); + let re = js_regexp_new(make_string(pat), flags); + assert!(!re.is_null(), "pattern failed to construct: {pat}"); + } + } } diff --git a/crates/perry-runtime/src/regex/grammar.rs b/crates/perry-runtime/src/regex/grammar.rs index 83300d52e0..2b0283a8c5 100644 --- a/crates/perry-runtime/src/regex/grammar.rs +++ b/crates/perry-runtime/src/regex/grammar.rs @@ -257,7 +257,18 @@ pub(super) fn js_regex_to_rust(pattern: &str) -> String { } } ch if is_regex_identity_escape(ch) => { - push_escaped_literal(&mut result, ch); + // Inside a character class an escaped hyphen `\-` is always a + // literal hyphen, but the Rust `regex` crate reads a bare `-` + // flanked by members as a range operator (so `[a\- ]` would + // become the invalid range `[a- ]`). Keep the escape so it + // stays a literal regardless of position. `marked`'s GFM + // table-delimiter regex `[:\- ]` relies on this. + if in_class && ch == '-' { + result.push('\\'); + result.push('-'); + } else { + push_escaped_literal(&mut result, ch); + } i += 2; } // Pass through all other backslash sequences as-is. (An escaped