diff --git a/src/ts_search.cpp b/src/ts_search.cpp index 580a5ea93..b4097f2ab 100644 --- a/src/ts_search.cpp +++ b/src/ts_search.cpp @@ -53,6 +53,18 @@ SearchResult nni_search(TreeState& tree, const DataSet& ds, int maxHits, if (ds.blocks[b].has_inapplicable) { has_na = true; break; } } + // T-306: the incremental NNI accept-path computes new_score as a Fitch-only + // EW delta (best_score + delta) or an IW/profile rescore from per-pattern + // step counts. For HSJ/XFORM scoring, score_tree() additionally adds a + // topology-dependent hierarchy-DP (HSJ) or Sankoff (XFORM) term that neither + // delta captures, so fall back to a full score_tree() rescore for those modes + // (the same scoring-mode classification used by the T-275/T-303 guards). + const bool incremental_ok = + ds.scoring_mode == ScoringMode::EW || + ds.scoring_mode == ScoringMode::IW || + ds.scoring_mode == ScoringMode::XPIWE || + ds.scoring_mode == ScoringMode::PROFILE; + // Seed RNG (from R in serial mode, from thread-local in parallel mode) std::mt19937 rng = ts::make_rng(); @@ -72,8 +84,10 @@ SearchResult nni_search(TreeState& tree, const DataSet& ds, int maxHits, ++n_iterations; double new_score; - if (has_na) { - // NA datasets: fall back to full rescore (three-pass needed) + if (has_na || !incremental_ok) { + // NA datasets need the three-pass algorithm; HSJ/XFORM (T-306) need + // the full hierarchy-DP / Sankoff contribution. Both recompute the + // authoritative score via score_tree(). tree.build_postorder(); new_score = score_tree(tree, ds); } else { @@ -118,12 +132,14 @@ SearchResult nni_search(TreeState& tree, const DataSet& ds, int maxHits, } tree.nni_undo(undo); - if (!has_na) { + if (!has_na && incremental_ok) { // Restore prelim/local_cost by re-scoring the original topology tree.clip_undo_stack.clear(); fitch_incremental_downpass(tree, ds, c); tree.clip_undo_stack.clear(); } else { + // NA and HSJ/XFORM (T-306) evaluate via a full score_tree() each + // iteration, so just restore a valid postorder for the next pass. tree.build_postorder(); } } diff --git a/src/ts_tbr.cpp b/src/ts_tbr.cpp index 7981f23ef..e50a6a11d 100644 --- a/src/ts_tbr.cpp +++ b/src/ts_tbr.cpp @@ -593,6 +593,21 @@ TBRResult tbr_search(TreeState& tree, const DataSet& ds, if (ds.blocks[b].has_inapplicable) { has_na = true; break; } } + // T-306: the SPR accept-path computes `actual` as an incremental delta on + // top of best_score — a Fitch-only EW delta (best_score + delta) or an + // IW/profile rescore from per-pattern step counts. For HSJ/XFORM scoring, + // score_tree() additionally adds a topology-dependent hierarchy-DP (HSJ) or + // Sankoff (XFORM) term that neither delta captures, so best_score would + // drift from the authoritative score and corrupt accept/reject decisions. + // Restrict the incremental fast path to scoring modes whose total equals the + // Fitch/IW result; HSJ and XFORM fall back to full_rescore (the same scoring + // -mode classification used by the T-275/T-303 guards). + const bool incremental_ok = + ds.scoring_mode == ScoringMode::EW || + ds.scoring_mode == ScoringMode::IW || + ds.scoring_mode == ScoringMode::XPIWE || + ds.scoring_mode == ScoringMode::PROFILE; + // Seed RNG (from R in serial mode, from thread-local in parallel mode) std::mt19937 rng = ts::make_rng(); @@ -1145,7 +1160,7 @@ TBRResult tbr_search(TreeState& tree, const DataSet& ds, // datasets fall back to full_rescore. bool is_spr = (best_reroot_parent < 0 || best_reroot_parent == clip_node); double actual; - if (is_spr && !has_na) { + if (is_spr && !has_na && incremental_ok) { int delta = fitch_dirty_downpass(tree, ds, nz, nx); fitch_dirty_uppass(tree, ds, nz, nx); if (use_iw) { @@ -1155,7 +1170,7 @@ TBRResult tbr_search(TreeState& tree, const DataSet& ds, } else { actual = best_score + static_cast(delta); } - } else if (is_spr && has_na) { + } else if (is_spr && has_na && incremental_ok) { // T-300 NA variant: dirty-set Pass 1 + Pass 2 instead of full // rescore. Pass 3 still runs over the full tree because it // populates internal down2 (read by extract_char_steps) and @@ -1176,6 +1191,9 @@ TBRResult tbr_search(TreeState& tree, const DataSet& ds, actual = static_cast(ew_total) + ds.ew_offset; } } else { + // Non-trivial TBR rerooting, or a scoring mode whose incremental + // delta is not exact (HSJ/XFORM, see incremental_ok): recompute the + // authoritative score via score_tree(). actual = full_rescore(tree, ds); } diff --git a/tests/testthat/test-ts-t306-accept-guard.R b/tests/testthat/test-ts-t306-accept-guard.R new file mode 100644 index 000000000..c22f3cfa2 --- /dev/null +++ b/tests/testthat/test-ts-t306-accept-guard.R @@ -0,0 +1,146 @@ +# Tier 2: skipped on CRAN; see tests/testing-strategy.md +skip_on_cran() + +# ========================================================================= +# T-306 regression: HSJ/XFORM SPR/NNI accept-paths must include the +# hierarchy-DP / Sankoff contribution. +# ========================================================================= +# Bug (T-306): the SPR accept path in tbr_search() (src/ts_tbr.cpp) and the +# accept path in nni_search() (src/ts_search.cpp) updated `best_score` with a +# Fitch-only (EW) or IW/profile incremental delta. For HSJ and XFORM scoring, +# score_tree() additionally adds a topology-dependent hierarchy-DP (HSJ) or +# Sankoff (XFORM) term that the delta omits, so the search's internal +# accept/reject decisions tracked the wrong objective. The fix gates the +# incremental fast path on the scoring mode (EW/IW/XPIWE/PROFILE) and falls +# back to a full score_tree() rescore for HSJ/XFORM. +# +# This bug is SILENT at the reported-score level: run_single_replicate always +# recomputes the final score via score_tree() before pooling, and the driven +# pipeline's TBR rerooting moves (which always full_rescore) reliably recover +# the true optimum even with the buggy accept path. An empirical sweep +# confirmed a buggy build still reaches the optimum from every Fitch-optimal +# "trap" start tree. A black-box MaximizeParsimony() test therefore cannot +# discriminate buggy from fixed (this matches the T-303 sibling finding). +# +# What these tests DO lock in is that the HSJ/XFORM accept path — now routed +# through full_rescore() — produces a correct, internally-consistent, and +# deterministic driven search: it reaches the true global optimum, every +# returned tree's independent score equals the reported score (no best_score +# desync), and identical seeds give identical results. A regression that +# broke the full_rescore fallback (stale state, wrong score, crash, or +# nondeterminism) would fail here. + +library("TreeTools") + +make_t306_dat <- function(mat) { + MatrixToPhyDat(mat) +} + +# 7-tip dataset chosen so the full tree space (945 unrooted binaries) is +# brute-forceable and the hierarchy contribution sharpens the optimum: +# the HSJ/XFORM optimum is a STRICT subset of the Fitch optimum, so reaching +# it exercises the hierarchy-aware scoring, not just the Fitch component. +# columns: primary sec2 sec3 nh4 nh5 nh6 nh7 nh8 +t306_mat <- matrix(unlist(strsplit(c( + "0--00110", + "0--01101", + "0--10011", + "10010110", + "10101001", + "11011100", + "11110011" +), "")), nrow = 7, byrow = TRUE, +dimnames = list(paste0("t", 1:7), NULL)) + + +test_that("MaximizeParsimony HSJ reaches the true HSJ optimum (T-306)", { + ds <- make_t306_dat(t306_mat) + h <- CharacterHierarchy("1" = 2:3) + + # Brute force the whole tree space with the authoritative scorer + # (TreeLength uses the same path/units as the driven search's score_tree). + all_trees <- as.phylo(0:944, nTip = 7, tipLabels = names(ds)) + hsj_sc <- TreeLength(all_trees, ds, hierarchy = h, + inapplicable = "hsj", hsj_alpha = 1.0) + fit_sc <- TreeLength(all_trees, ds, concavity = Inf) + opt <- min(hsj_sc) + hsj_opt <- which(abs(hsj_sc - opt) < 1e-9) + fit_opt <- which(fit_sc == min(fit_sc)) + + # The dataset retains its discriminating structure: a sharp HSJ optimum + # that is a strict subset of the (flatter) Fitch optimum. + expect_lt(length(hsj_opt), 10L) + expect_true(all(hsj_opt %in% fit_opt)) + expect_false(setequal(hsj_opt, fit_opt)) + + for (s in c(1L, 7L, 42L, 256L)) { + set.seed(s) + res <- MaximizeParsimony(ds, hierarchy = h, inapplicable = "hsj", + hsj_alpha = 1.0, maxReplicates = 4L, + targetHits = 1L, verbosity = 0L) + expect_s3_class(res[[1]], "phylo") + expect_equal(length(res[[1]]$tip.label), 7L) + + reported <- attr(res, "score") + # The driven search finds the true global HSJ optimum. + expect_equal(reported, opt) + # No best_score desync: each returned tree's independent HSJ score + # equals the reported optimum. + recomputed <- TreeLength(res, ds, hierarchy = h, + inapplicable = "hsj", hsj_alpha = 1.0) + expect_true(all(abs(recomputed - reported) < 1e-9)) + } + + # Determinism: identical seeds yield identical optima and tree counts. + set.seed(99) + a <- MaximizeParsimony(ds, hierarchy = h, inapplicable = "hsj", + hsj_alpha = 1.0, maxReplicates = 4L, + targetHits = 1L, verbosity = 0L) + set.seed(99) + b <- MaximizeParsimony(ds, hierarchy = h, inapplicable = "hsj", + hsj_alpha = 1.0, maxReplicates = 4L, + targetHits = 1L, verbosity = 0L) + expect_equal(attr(a, "score"), attr(b, "score")) + expect_equal(length(a), length(b)) +}) + + +test_that("MaximizeParsimony XFORM reaches the true Sankoff optimum (T-306)", { + ds <- make_t306_dat(t306_mat) + h <- CharacterHierarchy("1" = 2:3) + + all_trees <- as.phylo(0:944, nTip = 7, tipLabels = names(ds)) + xf_sc <- TreeLength(all_trees, ds, hierarchy = h, inapplicable = "xform") + fit_sc <- TreeLength(all_trees, ds, concavity = Inf) + opt <- min(xf_sc) + xf_opt <- which(abs(xf_sc - opt) < 1e-9) + + # The Sankoff recoding is genuinely engaged (xform landscape differs from a + # plain-Fitch landscape) and yields a non-trivial, reasonably sharp optimum. + expect_false(isTRUE(all.equal(xf_sc, fit_sc))) + expect_gt(length(unique(round(xf_sc, 6))), 1L) + expect_lt(length(xf_opt), 30L) + + for (s in c(1L, 7L, 42L, 256L)) { + set.seed(s) + res <- MaximizeParsimony(ds, hierarchy = h, inapplicable = "xform", + maxReplicates = 4L, targetHits = 1L, + verbosity = 0L) + expect_s3_class(res[[1]], "phylo") + expect_equal(length(res[[1]]$tip.label), 7L) + + reported <- attr(res, "score") + expect_equal(reported, opt) + recomputed <- TreeLength(res, ds, hierarchy = h, inapplicable = "xform") + expect_true(all(abs(recomputed - reported) < 1e-9)) + } + + set.seed(99) + a <- MaximizeParsimony(ds, hierarchy = h, inapplicable = "xform", + maxReplicates = 4L, targetHits = 1L, verbosity = 0L) + set.seed(99) + b <- MaximizeParsimony(ds, hierarchy = h, inapplicable = "xform", + maxReplicates = 4L, targetHits = 1L, verbosity = 0L) + expect_equal(attr(a, "score"), attr(b, "score")) + expect_equal(length(a), length(b)) +}) diff --git a/vignettes/search-algorithm.Rmd b/vignettes/search-algorithm.Rmd index bb36693a8..4c517618e 100644 --- a/vignettes/search-algorithm.Rmd +++ b/vignettes/search-algorithm.Rmd @@ -263,6 +263,13 @@ the entire tree for each candidate, only the affected nodes are re-evaluated. A bounded variant bails out when the accumulated score exceeds the best candidate found so far, skipping unnecessary computation. +These incremental deltas are exact for equal-weights, implied-weights, +extended-implied-weights, and profile scoring. +Under HSJ and x-transformation scoring, however, a tree's score also +includes a topology-dependent hierarchy term (the hierarchy dynamic +program or Sankoff contribution) that an incremental Fitch delta cannot +capture; for these modes an accepted move is therefore re-scored in full +before the best score is updated. ### Zero-length edge skipping