Skip to content

Commit de0fc56

Browse files
committed
Fix some problems with bad mmultiscripts.
re-fixes #485 Fixes #544 Added/fixed tests for these.
1 parent 68671f4 commit de0fc56

1 file changed

Lines changed: 38 additions & 24 deletions

File tree

src/canonicalize.rs

Lines changed: 38 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -622,31 +622,30 @@ impl CanonicalizeContext {
622622
"munderover" | "msubsup" => if n_children != 3 {
623623
bail!("{} should have 3 children:\n{}", element_name, mml_to_string(mathml));
624624
},
625-
"mmultiscripts" => {
626-
let has_prescripts = mathml.children().iter()
627-
.any(|&child| name(as_element(child)) == "mprescripts");
628-
if has_prescripts ^ (n_children.is_multiple_of(2)) {
629-
bail!("{} has the wrong number of children:\n{}", element_name, mml_to_string(mathml));
630-
}
631-
},
632-
"mlongdiv" => if n_children < 3 {
633-
bail!("{} should have at least 3 children:\n{}", element_name, mml_to_string(mathml));
634-
},
635625
_ => if n_children != 2 {
636626
bail!("{} should have 2 children:\n{}", element_name, mml_to_string(mathml));
637627
},
638628
}
639-
}
640-
if matches!(element_name, "mtd" | "mtr" | "mlabeledtr") {
629+
} else if matches!(element_name, "mtd" | "mtr" | "mlabeledtr") {
641630
let parent_name = name(get_parent(mathml));
642631
if (element_name == "mtr" || element_name == "mlabeledtr") && parent_name != "mtable" {
643632
bail!("Illegal MathML: {} is not a child of mtable. Parent is {}", element_name, mml_to_string(get_parent(mathml)));
644633
} else if element_name == "mtd" && !(parent_name == "mtr" || parent_name == "mlabeledtr") {
645634
bail!("Illegal MathML: mtd is not a child of {}. Parent is {}", parent_name, mml_to_string(get_parent(mathml)));
646635
}
647636
}
648-
let children = mathml.children();
649-
if element_name == "semantics" {
637+
else if element_name == "mmultiscripts" {
638+
let has_prescripts = mathml.children().iter()
639+
.any(|&child| name(as_element(child)) == "mprescripts");
640+
if has_prescripts ^ (n_children.is_multiple_of(2)) {
641+
bail!("{} has the wrong number of children:\n{}", element_name, mml_to_string(mathml));
642+
}
643+
} else if element_name == "mlongdiv" {
644+
if n_children < 3 {
645+
bail!("{} should have at least 3 children:\n{}", element_name, mml_to_string(mathml));
646+
}
647+
} else if element_name == "semantics" {
648+
let children = mathml.children();
650649
if children.is_empty() {
651650
return Ok( () );
652651
} else {
@@ -662,16 +661,16 @@ impl CanonicalizeContext {
662661
}
663662
return CanonicalizeContext::assure_mathml(presentation_element);
664663
}
665-
}
666-
if !IsNode::is_mathml(mathml) {
664+
} else if !IsNode::is_mathml(mathml) {
667665
if element_name == "annotation-xml" {
668666
bail!("'annotation-xml' element is not child of 'semantics' element");
669667
} else {
670668
bail!("'{}' is not a valid MathML element", element_name);
671669
}
672670
}
671+
673672
// valid MathML element and not a leaf -- check the children
674-
for child in children {
673+
for child in mathml.children() {
675674
CanonicalizeContext::assure_mathml( as_element(child) )?;
676675
}
677676
return Ok( () );
@@ -771,7 +770,8 @@ impl CanonicalizeContext {
771770
name(parent).to_string()
772771
};
773772
let parent_requires_child = ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN.contains(&parent_name) ||
774-
matches!(parent_name.as_ref(), "mtr" | "mlabeledtr" | "mtable");
773+
matches!(parent_name.as_ref(), "mtr" | "mlabeledtr" | "mtable") ||
774+
parent_name == "mmultiscripts";
775775

776776
// handle empty leaves -- leaving it empty causes problems with the speech rules
777777
if is_leaf(mathml) && !EMPTY_ELEMENTS.contains(element_name) && as_text(mathml).is_empty() {
@@ -781,7 +781,9 @@ impl CanonicalizeContext {
781781
if mathml.children().is_empty() && !EMPTY_ELEMENTS.contains(element_name) {
782782
if element_name == "mrow" && mathml.attribute(INTENT_ATTR).is_none() {
783783
// if it is an empty mrow that doesn't need to be there, get rid of it. Otherwise, replace it with an mtext
784-
if parent_name == "mmultiscripts" { // MathML Core dropped "none" in favor of <mrow/>, but MathCAT is written with <none/>
784+
if parent_name == "mmultiscripts" && !mathml.preceding_siblings().is_empty() {
785+
// MathML Core dropped "none" in favor of <mrow/>, but MathCAT is written with <none/>
786+
// Do substitutions for the scripts, not the base
785787
set_mathml_name(mathml, "none");
786788
return Some(mathml);
787789
}
@@ -1112,6 +1114,7 @@ impl CanonicalizeContext {
11121114
// cleaning children can add or delete subsequent children, so we need to constantly update the children (and mathml)
11131115
let mut children = mathml.children();
11141116
let mut i = 0;
1117+
11151118
while i < children.len() {
11161119
if let Some(child) = children[i].element() {
11171120
match self.clean_mathml(child) {
@@ -1418,7 +1421,7 @@ impl CanonicalizeContext {
14181421
}
14191422

14201423

1421-
/// looks for pairs of (letter, pseudoscript) such as x' or p'q' all inside of a single token element
1424+
/// looks for pairs of (letter, pseudo-script) such as x' or p'q' all inside of a single token element
14221425
fn split_apart_pseudo_scripts<'a>(mi: Element<'a>) -> Option<Element<'a>> {
14231426
static IS_DEGREES_C_OR_F: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[°º][CF]").unwrap());
14241427

@@ -1562,7 +1565,7 @@ impl CanonicalizeContext {
15621565
i += 2;
15631566
}
15641567
}
1565-
if new_children.len() == 1 {
1568+
if new_children.len() <= 2 { // base only, or base and </mprescripts>
15661569
mathml = as_element(new_children[0]);
15671570
} else {
15681571
mathml.replace_children(new_children);
@@ -6743,9 +6746,20 @@ mod canonicalize_tests {
67436746
#[test]
67446747
fn empty_mmultiscripts_485() -> Result<()> {
67456748
let test_str = "<math><mmultiscripts> </mmultiscripts></math>";
6746-
let target_str = " <math>
6747-
<mtext data-added='missing-content' data-width='0.700'> </mtext>
6748-
</math>";
6749+
let target_str = ""; // shouldn't get to the point of comparing because the input is illegal.
6750+
let err = are_strs_canonically_equal_result(test_str, target_str, &[])
6751+
.expect_err("empty mmultiscripts should be rejected");
6752+
assert!(
6753+
err.to_string().contains("mmultiscripts has the wrong number of children:\n <mmultiscripts></mmultiscripts>"),
6754+
"unexpected error message: {err}"
6755+
);
6756+
Ok(())
6757+
}
6758+
6759+
#[test]
6760+
fn empty_mmultiscripts_544() -> Result<()> {
6761+
let test_str = "<math><mmultiscripts><mrow/><mprescripts></mprescripts><mrow/><mrow/></mmultiscripts></math>";
6762+
let target_str = "<math> <mtext data-changed='empty_content' data-width='0'> </mtext></math>";
67496763
are_strs_canonically_equal_result(test_str, target_str, &[])
67506764
}
67516765

0 commit comments

Comments
 (0)