ms609 · ms609 · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026
diff --git a/src/ts_search.cpp b/src/ts_search.cpp
@@ -53,6 +53,18 @@ SearchResult nni_search(TreeState& tree, const DataSet& ds, int maxHits,
     if (ds.blocks[b].has_inapplicable) { has_na = true; break; }
   }
 
+  // T-306: the incremental NNI accept-path computes new_score as a Fitch-only
+  // EW delta (best_score + delta) or an IW/profile rescore from per-pattern
+  // step counts.  For HSJ/XFORM scoring, score_tree() additionally adds a
+  // topology-dependent hierarchy-DP (HSJ) or Sankoff (XFORM) term that neither
+  // delta captures, so fall back to a full score_tree() rescore for those modes
+  // (the same scoring-mode classification used by the T-275/T-303 guards).
+  const bool incremental_ok =
+      ds.scoring_mode == ScoringMode::EW ||
+      ds.scoring_mode == ScoringMode::IW ||
+      ds.scoring_mode == ScoringMode::XPIWE ||
+      ds.scoring_mode == ScoringMode::PROFILE;
+
   // Seed RNG (from R in serial mode, from thread-local in parallel mode)
   std::mt19937 rng = ts::make_rng();
 
@@ -72,8 +84,10 @@ SearchResult nni_search(TreeState& tree, const DataSet& ds, int maxHits,
         ++n_iterations;
 
         double new_score;
-        if (has_na) {
-          // NA datasets: fall back to full rescore (three-pass needed)
+        if (has_na || !incremental_ok) {
+          // NA datasets need the three-pass algorithm; HSJ/XFORM (T-306) need
+          // the full hierarchy-DP / Sankoff contribution.  Both recompute the
+          // authoritative score via score_tree().
           tree.build_postorder();
           new_score = score_tree(tree, ds);
         } else {
@@ -118,12 +132,14 @@ SearchResult nni_search(TreeState& tree, const DataSet& ds, int maxHits,
         }
 
         tree.nni_undo(undo);
-        if (!has_na) {
+        if (!has_na && incremental_ok) {
           // Restore prelim/local_cost by re-scoring the original topology
           tree.clip_undo_stack.clear();
           fitch_incremental_downpass(tree, ds, c);
           tree.clip_undo_stack.clear();
         } else {
+          // NA and HSJ/XFORM (T-306) evaluate via a full score_tree() each
+          // iteration, so just restore a valid postorder for the next pass.
           tree.build_postorder();
         }
       }

diff --git a/src/ts_tbr.cpp b/src/ts_tbr.cpp
@@ -593,6 +593,21 @@ TBRResult tbr_search(TreeState& tree, const DataSet& ds,
     if (ds.blocks[b].has_inapplicable) { has_na = true; break; }
   }
 
+  // T-306: the SPR accept-path computes `actual` as an incremental delta on
+  // top of best_score — a Fitch-only EW delta (best_score + delta) or an
+  // IW/profile rescore from per-pattern step counts.  For HSJ/XFORM scoring,
+  // score_tree() additionally adds a topology-dependent hierarchy-DP (HSJ) or
+  // Sankoff (XFORM) term that neither delta captures, so best_score would
+  // drift from the authoritative score and corrupt accept/reject decisions.
+  // Restrict the incremental fast path to scoring modes whose total equals the
+  // Fitch/IW result; HSJ and XFORM fall back to full_rescore (the same scoring
+  // -mode classification used by the T-275/T-303 guards).
+  const bool incremental_ok =
+      ds.scoring_mode == ScoringMode::EW ||
+      ds.scoring_mode == ScoringMode::IW ||
+      ds.scoring_mode == ScoringMode::XPIWE ||
+      ds.scoring_mode == ScoringMode::PROFILE;
+
   // Seed RNG (from R in serial mode, from thread-local in parallel mode)
   std::mt19937 rng = ts::make_rng();
 
@@ -1145,7 +1160,7 @@ TBRResult tbr_search(TreeState& tree, const DataSet& ds,
         // datasets fall back to full_rescore.
         bool is_spr = (best_reroot_parent < 0 || best_reroot_parent == clip_node);
         double actual;
-        if (is_spr && !has_na) {
+        if (is_spr && !has_na && incremental_ok) {
           int delta = fitch_dirty_downpass(tree, ds, nz, nx);
           fitch_dirty_uppass(tree, ds, nz, nx);
           if (use_iw) {
@@ -1155,7 +1170,7 @@ TBRResult tbr_search(TreeState& tree, const DataSet& ds,
           } else {
             actual = best_score + static_cast<double>(delta);
           }
-        } else if (is_spr && has_na) {
+        } else if (is_spr && has_na && incremental_ok) {
           // T-300 NA variant: dirty-set Pass 1 + Pass 2 instead of full
           // rescore.  Pass 3 still runs over the full tree because it
           // populates internal down2 (read by extract_char_steps) and
@@ -1176,6 +1191,9 @@ TBRResult tbr_search(TreeState& tree, const DataSet& ds,
             actual = static_cast<double>(ew_total) + ds.ew_offset;
           }
         } else {
+          // Non-trivial TBR rerooting, or a scoring mode whose incremental
+          // delta is not exact (HSJ/XFORM, see incremental_ok): recompute the
+          // authoritative score via score_tree().
           actual = full_rescore(tree, ds);
         }
 

diff --git a/tests/testthat/test-ts-t306-accept-guard.R b/tests/testthat/test-ts-t306-accept-guard.R
@@ -0,0 +1,146 @@
+# Tier 2: skipped on CRAN; see tests/testing-strategy.md
+skip_on_cran()
+
+# =========================================================================
+# T-306 regression: HSJ/XFORM SPR/NNI accept-paths must include the
+# hierarchy-DP / Sankoff contribution.
+# =========================================================================
+# Bug (T-306): the SPR accept path in tbr_search() (src/ts_tbr.cpp) and the
+# accept path in nni_search() (src/ts_search.cpp) updated `best_score` with a
+# Fitch-only (EW) or IW/profile incremental delta.  For HSJ and XFORM scoring,
+# score_tree() additionally adds a topology-dependent hierarchy-DP (HSJ) or
+# Sankoff (XFORM) term that the delta omits, so the search's internal
+# accept/reject decisions tracked the wrong objective.  The fix gates the
+# incremental fast path on the scoring mode (EW/IW/XPIWE/PROFILE) and falls
+# back to a full score_tree() rescore for HSJ/XFORM.
+#
+# This bug is SILENT at the reported-score level: run_single_replicate always
+# recomputes the final score via score_tree() before pooling, and the driven
+# pipeline's TBR rerooting moves (which always full_rescore) reliably recover
+# the true optimum even with the buggy accept path.  An empirical sweep
+# confirmed a buggy build still reaches the optimum from every Fitch-optimal
+# "trap" start tree.  A black-box MaximizeParsimony() test therefore cannot
+# discriminate buggy from fixed (this matches the T-303 sibling finding).
+#
+# What these tests DO lock in is that the HSJ/XFORM accept path — now routed
+# through full_rescore() — produces a correct, internally-consistent, and
+# deterministic driven search: it reaches the true global optimum, every
+# returned tree's independent score equals the reported score (no best_score
+# desync), and identical seeds give identical results.  A regression that
+# broke the full_rescore fallback (stale state, wrong score, crash, or
+# nondeterminism) would fail here.
+
+library("TreeTools")
+
+make_t306_dat <- function(mat) {
+  MatrixToPhyDat(mat)
+}
+
+# 7-tip dataset chosen so the full tree space (945 unrooted binaries) is
+# brute-forceable and the hierarchy contribution sharpens the optimum:
+# the HSJ/XFORM optimum is a STRICT subset of the Fitch optimum, so reaching
+# it exercises the hierarchy-aware scoring, not just the Fitch component.
+#   columns: primary  sec2  sec3  nh4  nh5  nh6  nh7  nh8
+t306_mat <- matrix(unlist(strsplit(c(
+  "0--00110",
+  "0--01101",
+  "0--10011",
+  "10010110",
+  "10101001",
+  "11011100",
+  "11110011"
+), "")), nrow = 7, byrow = TRUE,
+dimnames = list(paste0("t", 1:7), NULL))
+
+
+test_that("MaximizeParsimony HSJ reaches the true HSJ optimum (T-306)", {
+  ds <- make_t306_dat(t306_mat)
+  h <- CharacterHierarchy("1" = 2:3)
+
+  # Brute force the whole tree space with the authoritative scorer
+  # (TreeLength uses the same path/units as the driven search's score_tree).
+  all_trees <- as.phylo(0:944, nTip = 7, tipLabels = names(ds))
+  hsj_sc <- TreeLength(all_trees, ds, hierarchy = h,
+                       inapplicable = "hsj", hsj_alpha = 1.0)
+  fit_sc <- TreeLength(all_trees, ds, concavity = Inf)
+  opt <- min(hsj_sc)
+  hsj_opt <- which(abs(hsj_sc - opt) < 1e-9)
+  fit_opt <- which(fit_sc == min(fit_sc))
+
+  # The dataset retains its discriminating structure: a sharp HSJ optimum
+  # that is a strict subset of the (flatter) Fitch optimum.
+  expect_lt(length(hsj_opt), 10L)
+  expect_true(all(hsj_opt %in% fit_opt))
+  expect_false(setequal(hsj_opt, fit_opt))
+
+  for (s in c(1L, 7L, 42L, 256L)) {
+    set.seed(s)
+    res <- MaximizeParsimony(ds, hierarchy = h, inapplicable = "hsj",
+                             hsj_alpha = 1.0, maxReplicates = 4L,
+                             targetHits = 1L, verbosity = 0L)
+    expect_s3_class(res[[1]], "phylo")
+    expect_equal(length(res[[1]]$tip.label), 7L)
+
+    reported <- attr(res, "score")
+    # The driven search finds the true global HSJ optimum.
+    expect_equal(reported, opt)
+    # No best_score desync: each returned tree's independent HSJ score
+    # equals the reported optimum.
+    recomputed <- TreeLength(res, ds, hierarchy = h,
+                             inapplicable = "hsj", hsj_alpha = 1.0)
+    expect_true(all(abs(recomputed - reported) < 1e-9))
+  }
+
+  # Determinism: identical seeds yield identical optima and tree counts.
+  set.seed(99)
+  a <- MaximizeParsimony(ds, hierarchy = h, inapplicable = "hsj",
+                         hsj_alpha = 1.0, maxReplicates = 4L,
+                         targetHits = 1L, verbosity = 0L)
+  set.seed(99)
+  b <- MaximizeParsimony(ds, hierarchy = h, inapplicable = "hsj",
+                         hsj_alpha = 1.0, maxReplicates = 4L,
+                         targetHits = 1L, verbosity = 0L)
+  expect_equal(attr(a, "score"), attr(b, "score"))
+  expect_equal(length(a), length(b))
+})
+
+
+test_that("MaximizeParsimony XFORM reaches the true Sankoff optimum (T-306)", {
+  ds <- make_t306_dat(t306_mat)
+  h <- CharacterHierarchy("1" = 2:3)
+
+  all_trees <- as.phylo(0:944, nTip = 7, tipLabels = names(ds))
+  xf_sc <- TreeLength(all_trees, ds, hierarchy = h, inapplicable = "xform")
+  fit_sc <- TreeLength(all_trees, ds, concavity = Inf)
+  opt <- min(xf_sc)
+  xf_opt <- which(abs(xf_sc - opt) < 1e-9)
+
+  # The Sankoff recoding is genuinely engaged (xform landscape differs from a
+  # plain-Fitch landscape) and yields a non-trivial, reasonably sharp optimum.
+  expect_false(isTRUE(all.equal(xf_sc, fit_sc)))
+  expect_gt(length(unique(round(xf_sc, 6))), 1L)
+  expect_lt(length(xf_opt), 30L)
+
+  for (s in c(1L, 7L, 42L, 256L)) {
+    set.seed(s)
+    res <- MaximizeParsimony(ds, hierarchy = h, inapplicable = "xform",
+                             maxReplicates = 4L, targetHits = 1L,
+                             verbosity = 0L)
+    expect_s3_class(res[[1]], "phylo")
+    expect_equal(length(res[[1]]$tip.label), 7L)
+
+    reported <- attr(res, "score")
+    expect_equal(reported, opt)
+    recomputed <- TreeLength(res, ds, hierarchy = h, inapplicable = "xform")
+    expect_true(all(abs(recomputed - reported) < 1e-9))
+  }
+
+  set.seed(99)
+  a <- MaximizeParsimony(ds, hierarchy = h, inapplicable = "xform",
+                         maxReplicates = 4L, targetHits = 1L, verbosity = 0L)
+  set.seed(99)
+  b <- MaximizeParsimony(ds, hierarchy = h, inapplicable = "xform",
+                         maxReplicates = 4L, targetHits = 1L, verbosity = 0L)
+  expect_equal(attr(a, "score"), attr(b, "score"))
+  expect_equal(length(a), length(b))
+})
diff --git a/vignettes/search-algorithm.Rmd b/vignettes/search-algorithm.Rmd
@@ -263,6 +263,13 @@ the entire tree for each candidate, only the affected nodes are
 re-evaluated.
 A bounded variant bails out when the accumulated score exceeds the best
 candidate found so far, skipping unnecessary computation.
+These incremental deltas are exact for equal-weights, implied-weights,
+extended-implied-weights, and profile scoring.
+Under HSJ and x-transformation scoring, however, a tree's score also
+includes a topology-dependent hierarchy term (the hierarchy dynamic
+program or Sankoff contribution) that an incremental Fitch delta cannot
+capture; for these modes an accepted move is therefore re-scored in full
+before the best score is updated.
 
 ### Zero-length edge skipping