@@ -622,31 +622,30 @@ impl CanonicalizeContext {
622622 "munderover" | "msubsup" => if n_children != 3 {
623623 bail ! ( "{} should have 3 children:\n {}" , element_name, mml_to_string( mathml) ) ;
624624 } ,
625- "mmultiscripts" => {
626- let has_prescripts = mathml. children ( ) . iter ( )
627- . any ( |& child| name ( as_element ( child) ) == "mprescripts" ) ;
628- if has_prescripts ^ ( n_children. is_multiple_of ( 2 ) ) {
629- bail ! ( "{} has the wrong number of children:\n {}" , element_name, mml_to_string( mathml) ) ;
630- }
631- } ,
632- "mlongdiv" => if n_children < 3 {
633- bail ! ( "{} should have at least 3 children:\n {}" , element_name, mml_to_string( mathml) ) ;
634- } ,
635625 _ => if n_children != 2 {
636626 bail ! ( "{} should have 2 children:\n {}" , element_name, mml_to_string( mathml) ) ;
637627 } ,
638628 }
639- }
640- if matches ! ( element_name, "mtd" | "mtr" | "mlabeledtr" ) {
629+ } else if matches ! ( element_name, "mtd" | "mtr" | "mlabeledtr" ) {
641630 let parent_name = name ( get_parent ( mathml) ) ;
642631 if ( element_name == "mtr" || element_name == "mlabeledtr" ) && parent_name != "mtable" {
643632 bail ! ( "Illegal MathML: {} is not a child of mtable. Parent is {}" , element_name, mml_to_string( get_parent( mathml) ) ) ;
644633 } else if element_name == "mtd" && !( parent_name == "mtr" || parent_name == "mlabeledtr" ) {
645634 bail ! ( "Illegal MathML: mtd is not a child of {}. Parent is {}" , parent_name, mml_to_string( get_parent( mathml) ) ) ;
646635 }
647636 }
648- let children = mathml. children ( ) ;
649- if element_name == "semantics" {
637+ else if element_name == "mmultiscripts" {
638+ let has_prescripts = mathml. children ( ) . iter ( )
639+ . any ( |& child| name ( as_element ( child) ) == "mprescripts" ) ;
640+ if has_prescripts ^ ( n_children. is_multiple_of ( 2 ) ) {
641+ bail ! ( "{} has the wrong number of children:\n {}" , element_name, mml_to_string( mathml) ) ;
642+ }
643+ } else if element_name == "mlongdiv" {
644+ if n_children < 3 {
645+ bail ! ( "{} should have at least 3 children:\n {}" , element_name, mml_to_string( mathml) ) ;
646+ }
647+ } else if element_name == "semantics" {
648+ let children = mathml. children ( ) ;
650649 if children. is_empty ( ) {
651650 return Ok ( ( ) ) ;
652651 } else {
@@ -662,16 +661,16 @@ impl CanonicalizeContext {
662661 }
663662 return CanonicalizeContext :: assure_mathml ( presentation_element) ;
664663 }
665- }
666- if !IsNode :: is_mathml ( mathml) {
664+ } else if !IsNode :: is_mathml ( mathml) {
667665 if element_name == "annotation-xml" {
668666 bail ! ( "'annotation-xml' element is not child of 'semantics' element" ) ;
669667 } else {
670668 bail ! ( "'{}' is not a valid MathML element" , element_name) ;
671669 }
672670 }
671+
673672 // valid MathML element and not a leaf -- check the children
674- for child in children {
673+ for child in mathml . children ( ) {
675674 CanonicalizeContext :: assure_mathml ( as_element ( child) ) ?;
676675 }
677676 return Ok ( ( ) ) ;
@@ -771,7 +770,8 @@ impl CanonicalizeContext {
771770 name ( parent) . to_string ( )
772771 } ;
773772 let parent_requires_child = ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN . contains ( & parent_name) ||
774- matches ! ( parent_name. as_ref( ) , "mtr" | "mlabeledtr" | "mtable" ) ;
773+ matches ! ( parent_name. as_ref( ) , "mtr" | "mlabeledtr" | "mtable" ) ||
774+ parent_name == "mmultiscripts" ;
775775
776776 // handle empty leaves -- leaving it empty causes problems with the speech rules
777777 if is_leaf ( mathml) && !EMPTY_ELEMENTS . contains ( element_name) && as_text ( mathml) . is_empty ( ) {
@@ -781,7 +781,9 @@ impl CanonicalizeContext {
781781 if mathml. children ( ) . is_empty ( ) && !EMPTY_ELEMENTS . contains ( element_name) {
782782 if element_name == "mrow" && mathml. attribute ( INTENT_ATTR ) . is_none ( ) {
783783 // if it is an empty mrow that doesn't need to be there, get rid of it. Otherwise, replace it with an mtext
784- if parent_name == "mmultiscripts" { // MathML Core dropped "none" in favor of <mrow/>, but MathCAT is written with <none/>
784+ if parent_name == "mmultiscripts" && !mathml. preceding_siblings ( ) . is_empty ( ) {
785+ // MathML Core dropped "none" in favor of <mrow/>, but MathCAT is written with <none/>
786+ // Do substitutions for the scripts, not the base
785787 set_mathml_name ( mathml, "none" ) ;
786788 return Some ( mathml) ;
787789 }
@@ -1112,6 +1114,7 @@ impl CanonicalizeContext {
11121114 // cleaning children can add or delete subsequent children, so we need to constantly update the children (and mathml)
11131115 let mut children = mathml. children ( ) ;
11141116 let mut i = 0 ;
1117+
11151118 while i < children. len ( ) {
11161119 if let Some ( child) = children[ i] . element ( ) {
11171120 match self . clean_mathml ( child) {
@@ -1418,7 +1421,7 @@ impl CanonicalizeContext {
14181421 }
14191422
14201423
1421- /// looks for pairs of (letter, pseudoscript ) such as x' or p'q' all inside of a single token element
1424+ /// looks for pairs of (letter, pseudo-script ) such as x' or p'q' all inside of a single token element
14221425 fn split_apart_pseudo_scripts < ' a > ( mi : Element < ' a > ) -> Option < Element < ' a > > {
14231426 static IS_DEGREES_C_OR_F : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( r"[°º][CF]" ) . unwrap ( ) ) ;
14241427
@@ -1562,7 +1565,7 @@ impl CanonicalizeContext {
15621565 i += 2 ;
15631566 }
15641567 }
1565- if new_children. len ( ) == 1 {
1568+ if new_children. len ( ) <= 2 { // base only, or base and </mprescripts>
15661569 mathml = as_element ( new_children[ 0 ] ) ;
15671570 } else {
15681571 mathml. replace_children ( new_children) ;
@@ -6743,9 +6746,20 @@ mod canonicalize_tests {
67436746 #[ test]
67446747 fn empty_mmultiscripts_485 ( ) -> Result < ( ) > {
67456748 let test_str = "<math><mmultiscripts> </mmultiscripts></math>" ;
6746- let target_str = " <math>
6747- <mtext data-added='missing-content' data-width='0.700'> </mtext>
6748- </math>" ;
6749+ let target_str = "" ; // shouldn't get to the point of comparing because the input is illegal.
6750+ let err = are_strs_canonically_equal_result ( test_str, target_str, & [ ] )
6751+ . expect_err ( "empty mmultiscripts should be rejected" ) ;
6752+ assert ! (
6753+ err. to_string( ) . contains( "mmultiscripts has the wrong number of children:\n <mmultiscripts></mmultiscripts>" ) ,
6754+ "unexpected error message: {err}"
6755+ ) ;
6756+ Ok ( ( ) )
6757+ }
6758+
6759+ #[ test]
6760+ fn empty_mmultiscripts_544 ( ) -> Result < ( ) > {
6761+ let test_str = "<math><mmultiscripts><mrow/><mprescripts></mprescripts><mrow/><mrow/></mmultiscripts></math>" ;
6762+ let target_str = "<math> <mtext data-changed='empty_content' data-width='0'> </mtext></math>" ;
67496763 are_strs_canonically_equal_result ( test_str, target_str, & [ ] )
67506764 }
67516765
0 commit comments