@@ -784,3 +784,82 @@ fn test_md() {
784784 expected. trim( )
785785 ) ;
786786}
787+
788+ #[ test]
789+ fn test_md_parser_safety ( ) {
790+ for input in [
791+ // Mixed and interleaved delimiters
792+ r#"<rect xy="0" wh="10" md="***nested***"/>"# ,
793+ r#"<rect xy="0" wh="10" md="*a_b*c_d"/>"# ,
794+ // Adjacent backticks with no content between them
795+ r#"<rect xy="0" wh="10" md="``"/>"# ,
796+ // Two backtick pairs with nothing in between
797+ r#"<rect xy="0" wh="10" md="````"/>"# ,
798+ // Single backtick (unmatched)
799+ r#"<rect xy="0" wh="10" md="`"/>"# ,
800+ // Code block containing only spaces
801+ r#"<rect xy="0" wh="10" md="` `"/>"# ,
802+ r#"<rect xy="0" wh="10" md="` `"/>"# ,
803+ // Multi-byte char at start and end of code block (after spaces)
804+ r#"<rect xy="0" wh="10" md="` é `"/>"# ,
805+ // Multi-byte chars directly adjacent to backticks
806+ r#"<rect xy="0" wh="10" md="`café`"/>"# ,
807+ // Code block with only multi-byte chars and spaces
808+ r#"<rect xy="0" wh="10" md="` ñ ñ `"/>"# ,
809+ // CJK characters in code block
810+ r#"<rect xy="0" wh="10" md="` 你好 `"/>"# ,
811+ // Emoji in code block (4-byte UTF-8)
812+ r#"<rect xy="0" wh="10" md="` 🦀 `"/>"# ,
813+ // \n followed by multi-byte character
814+ r#"<rect xy="0" wh="10" md="hello\né"/>"# ,
815+ // \n followed by CJK
816+ r#"<rect xy="0" wh="10" md="line1\n你好"/>"# ,
817+ // \n followed by emoji
818+ r#"<rect xy="0" wh="10" md="line1\n🦀world"/>"# ,
819+ // Multiple \n in sequence
820+ r#"<rect xy="0" wh="10" md="a\n\n\nb"/>"# ,
821+ // Trailing single backslash
822+ r#"<rect xy="0" wh="10" md="hello\"/>"# ,
823+ // Trailing double backslash
824+ r#"<rect xy="0" wh="10" md="hello\\"/>"# ,
825+ // Trailing triple backslash
826+ r#"<rect xy="0" wh="10" md="hello\\\"/>"# ,
827+ // Only a backslash
828+ r#"<rect xy="0" wh="10" md="\"/>"# ,
829+ // Backslash before delimiter at end
830+ r#"<rect xy="0" wh="10" md="hello\*"/>"# ,
831+ // Backslash at end after code block
832+ r#"<rect xy="0" wh="10" md="`code`\"/>"# ,
833+ // Escaped backtick inside code block
834+ r#"<rect xy="0" wh="10" md="`\``"/>"# ,
835+ // Deeply nested emphasis
836+ r#"<rect xy="0" wh="10" md="***a]b**c*d"/>"# ,
837+ // All delimiter types in one string
838+ r#"<rect xy="0" wh="10" md="`code` *italic* **bold** _also_ __also__"/>"# ,
839+ // Empty input
840+ r#"<rect xy="0" wh="10" md=""/>"# ,
841+ // Only delimiters, no text
842+ r#"<rect xy="0" wh="10" md="***___```\\\"/>"# ,
843+ // Very long delimiter run
844+ r#"<rect xy="0" wh="10" md="**********hello**********"/>"# ,
845+ // Interleaved newlines and emphasis
846+ r#"<rect xy="0" wh="10" md="*line1\nline2*"/>"# ,
847+ // Backslash before every special character
848+ r#"<rect xy="0" wh="10" md="\*\*\`\\_"/>"# ,
849+ // Delimiter immediately after punctuation, before alphanumeric
850+ r#"<rect xy="0" wh="10" md="(*foo*)"/>"# ,
851+ // Delimiter between two punctuation marks
852+ r#"<rect xy="0" wh="10" md="!*!*!"/>"# ,
853+ // Underscore emphasis next to punctuation (stricter rules per CommonMark)
854+ r#"<rect xy="0" wh="10" md="(_foo_)"/>"# ,
855+ // Mixed punctuation and emphasis
856+ r#"<rect xy="0" wh="10" md="**foo**bar**baz**"/>"# ,
857+ // Emphasis starting/ending at punctuation boundaries
858+ r#"<rect xy="0" wh="10" md="a]]*b[*c"/>"# ,
859+ ] {
860+ assert ! (
861+ transform_str_default( input) . is_ok( ) ,
862+ "Markdown parser failed on input: {input}"
863+ ) ;
864+ }
865+ }
0 commit comments