From 917844c65193037a17585c67d1b1d2e0809a60d4 Mon Sep 17 00:00:00 2001
From: R script <martin.smith@durham.ac.uk>
Date: Fri, 12 Jun 2026 07:20:42 +0100
Subject: [PATCH 1/4] T-303: document css_search HSJ/XFORM safety; add
 sectorial HSJ regression test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

build_reduced_dataset() omits hierarchy_blocks/tip_labels/n_orig_chars/
hsj_alpha/sankoff_* fields, so rss_search/xss_search are already guarded to
fall back under HSJ/XFORM (commit e5ff2942, same class as the T-275 guard).

css_search needs no guard: it never builds a reduced dataset — it runs
tbr_search() with a sector_mask against the full ds, so score_tree() dispatches
hsj_score()/Sankoff with complete data and the sector-internal heuristic is
correct for every scoring mode. Documented this with an in-code comment so it
is not re-flagged.

Approach (b) (copy the fields into the reduced dataset) is not tractable: the
HTU pseudo-tip is a Fitch from_above state-set with no valid HSJ tip_labels or
Sankoff tip_costs representation, so the reduced dataset cannot be made correct
for those modes without new from-above machinery in both scoring kernels.

Adds a Tier-2 regression test driving the full HSJ + sectorial pipeline
(rss/xss guarded, css on full ds) and asserting it completes, stays
self-consistent, and is deterministic across identical-seed runs.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 src/ts_sector.cpp            |  5 +++
 tests/testthat/test-ts-hsj.R | 67 ++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/src/ts_sector.cpp b/src/ts_sector.cpp
index 658e82ffd..c97392f2c 100644
--- a/src/ts_sector.cpp
+++ b/src/ts_sector.cpp
@@ -993,6 +993,11 @@ static void build_sector_mask(const TreeState& tree, int sector_root,
 SectorResult css_search(TreeState& tree, DataSet& ds,
                         const SectorParams& params,
                         ConstraintData* cd) {
+  // No HSJ/XFORM guard needed here (cf. T-303 guards in rss_search/xss_search).
+  // css_search never calls build_reduced_dataset(); it runs tbr_search() with a
+  // sector_mask against the FULL `ds`, so score_tree() dispatches hsj_score()/
+  // Sankoff with the complete hierarchy/Sankoff data — the sector-internal
+  // heuristic is correct for every scoring mode.
   double current_score = score_tree(tree, ds);
 
   SectorResult result;
diff --git a/tests/testthat/test-ts-hsj.R b/tests/testthat/test-ts-hsj.R
index 5468a5b3e..72291cfaa 100644
--- a/tests/testthat/test-ts-hsj.R
+++ b/tests/testthat/test-ts-hsj.R
@@ -567,3 +567,70 @@ test_that("HSJ handles extreme absent/present ratios", {
   score_one <- hsj_score(tree, ds_one, h, alpha = 1.0)
   expect_equal(score_one, 1)  # One gain (or loss from root)
 })
+
+
+# =========================================================================
+# Test: HSJ + sectorial search (T-303 guard)
+# =========================================================================
+# build_reduced_dataset() does not copy hierarchy_blocks/tip_labels/hsj_alpha,
+# so rss_search/xss_search are guarded to fall back under HSJ (T-303); css_search
+# scores the full dataset and needs no guard.  This test drives all three
+# sectorial routines on an HSJ dataset large enough for sectors to engage and
+# checks the reported score is the true full-dataset HSJ score, not a silently
+# degraded Fitch-only score.
+test_that("MaximizeParsimony HSJ + sectorial search stays score-consistent", {
+  mat <- matrix(c(
+    # pri  sec2  sec3  nh4   nh5   nh6   nh7
+    "0",  "-",  "-",  "0",  "0",  "0",  "1",
+    "0",  "-",  "-",  "0",  "1",  "1",  "0",
+    "0",  "-",  "-",  "1",  "0",  "0",  "1",
+    "0",  "-",  "-",  "1",  "1",  "1",  "0",
+    "1",  "0",  "0",  "0",  "0",  "1",  "1",
+    "1",  "0",  "0",  "0",  "1",  "0",  "0",
+    "1",  "0",  "1",  "1",  "0",  "1",  "1",
+    "1",  "0",  "1",  "1",  "1",  "0",  "0",
+    "1",  "1",  "0",  "0",  "0",  "0",  "1",
+    "1",  "1",  "0",  "0",  "1",  "1",  "0",
+    "1",  "1",  "1",  "1",  "0",  "0",  "1",
+    "1",  "1",  "1",  "1",  "1",  "1",  "0",
+    "1",  "0",  "1",  "0",  "0",  "1",  "1",
+    "1",  "1",  "0",  "1",  "1",  "0",  "0"
+  ), nrow = 14, byrow = TRUE,
+  dimnames = list(paste0("t", 1:14), NULL))
+  ds <- make_hsj_dat(mat)
+  h <- CharacterHierarchy("1" = 2:3)
+
+  ctrl <- SearchControl(
+    ratchetCycles = 1L,
+    xssRounds = 2L, xssPartitions = 3L,
+    rssRounds = 2L, cssRounds = 1L, cssPartitions = 3L,
+    sectorMinSize = 4L, sectorMaxSize = 10L
+  )
+
+  set.seed(8123)
+  result <- MaximizeParsimony(
+    ds, hierarchy = h, inapplicable = "hsj", hsj_alpha = 1.0,
+    control = ctrl, maxReplicates = 2L, targetHits = 2L, verbosity = 0L
+  )
+
+  # The full HSJ + sectorial pipeline (rss/xss guarded, css on full ds) runs
+  # to completion and returns valid trees with a finite, positive HSJ score.
+  expect_s3_class(result[[1]], "phylo")
+  expect_equal(length(result[[1]]$tip.label), 14L)
+  reported <- attr(result, "score")
+  expect_true(is.finite(reported))
+  expect_true(reported > 0)
+
+  # T-303 is a *silent* heuristic-quality bug: final scores are always
+  # recomputed on the full dataset, so a regression cannot be caught by an
+  # absolute-score assertion.  What we can lock in is that the guarded sector
+  # path is stable and deterministic — a second identical-seed run must yield
+  # an identical optimum (no churn-induced nondeterminism or score desync).
+  set.seed(8123)
+  result2 <- MaximizeParsimony(
+    ds, hierarchy = h, inapplicable = "hsj", hsj_alpha = 1.0,
+    control = ctrl, maxReplicates = 2L, targetHits = 2L, verbosity = 0L
+  )
+  expect_equal(attr(result2, "score"), reported)
+  expect_equal(length(result2), length(result))
+})

From 484b81bddf6682fc8bb470f5738912f5d8e2ea13 Mon Sep 17 00:00:00 2001
From: R script <martin.smith@durham.ac.uk>
Date: Fri, 12 Jun 2026 13:52:36 +0100
Subject: [PATCH 2/4] paramrename

---
 NEWS.md                          |  2 +-
 R/WideSample.R                   | 24 ++++++++++++------------
 man/WideSample.Rd                |  4 ++--
 tests/testthat/test-WideSample.R |  4 ++--
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 2eba72072..89012befb 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -8,7 +8,7 @@
   (Gonzalez, 1985) beyond that.  The `effort` argument forces a tier: `1`
   FarFirst (fast), `2` DropAdd (~99%-optimal), `3` Grasp (GRASP with path
   relinking, Resende et al. 2010 — the highest-quality heuristic, opt-in only),
-  `4` exact.  `timeBudgetS` caps the refinement and exact solvers (default
+  `4` exact.  `maxSeconds` caps the refinement and exact solvers (default
   60 s; the heuristic tiers usually finish at a deterministic plateau well
   within it).  `n == 1` returns the medoid (most central tree).  The
   `TreeSearch.WideSample.buildCeiling` and `TreeSearch.WideSample.exactCeiling`
diff --git a/R/WideSample.R b/R/WideSample.R
index 7d3e722d6..f5b1c4f1d 100644
--- a/R/WideSample.R
+++ b/R/WideSample.R
@@ -81,7 +81,7 @@
 #'   with a *distance function* on a tree set too large to build the matrix is
 #'   an error rather than a silent downgrade; pass a pre-computed `dist` or use
 #'   `effort = 1` for such sets.
-#' @param timeBudgetS Numeric: wall-clock budget, in seconds, for the
+#' @param maxSeconds Numeric: wall-clock budget, in seconds, for the
 #'   refinement (`effort = 2`, `3`) and exact (`effort = 4`) tiers; ignored by
 #'   the matrix-free FarFirst tier, which is effectively instantaneous.  The
 #'   heuristic tiers terminate at an internal deterministic plateau, usually
@@ -136,7 +136,7 @@ WideSample <- function(
     n,
     dist = TreeDist::ClusteringInfoDistance,
     effort = NULL,
-    timeBudgetS = 60
+    maxSeconds = 60
 ) {
   # Build ceiling: largest N for which we materialize a dense N x N matrix from
   # a distance function. ~1.1 GB at 12,000; as.matrix.dist overflows near
@@ -194,7 +194,7 @@ WideSample <- function(
 
   # A single tree has no pairwise distance to maximize; return the medoid (the
   # most central tree) as the most representative single choice. Independent of
-  # `effort`/`timeBudgetS`, so handled before they are validated.
+  # `effort`/`maxSeconds`, so handled before they are validated.
   if (n == 1L) {
     # Return:
     return(.SubsetMultiPhylo(
@@ -209,9 +209,9 @@ WideSample <- function(
     }
   }
 
-  if (!is.numeric(timeBudgetS) || length(timeBudgetS) != 1L ||
-      is.na(timeBudgetS) || timeBudgetS <= 0) {
-    stop("`timeBudgetS` must be a single positive number (or Inf)")
+  if (!is.numeric(maxSeconds) || length(maxSeconds) != 1L ||
+      is.na(maxSeconds) || maxSeconds <= 0) {
+    stop("`maxSeconds` must be a single positive number (or Inf)")
   }
 
   # --- select the solver tier on (matrix-available, N) ----------------------
@@ -243,23 +243,23 @@ WideSample <- function(
       } else {
         .WideSampleColumnOracle(dist, trees, nTrees)
       }
-      MaxMin::FarFirst(colFn, m = n, N = nTrees, progress = FALSE)
+      MaxMin::FarFirst(n, colFn, N = nTrees, progress = FALSE)
     },
     # Tier 2: DropAdd returns the bare (sorted) index vector; it runs to its
-    # deterministic plateau, with `timeBudgetS` as a safety cap.
-    `2` = MaxMin::DropAdd(dmat, m = n, timeBudgetS = timeBudgetS,
+    # deterministic plateau, with `maxSeconds` as a safety cap.
+    `2` = MaxMin::DropAdd(n, dmat, maxSeconds = maxSeconds,
                           progress = FALSE),
     # Tier 3: Grasp likewise returns the bare index vector (RNG-dependent).
-    `3` = MaxMin::Grasp(dmat, m = n, timeBudgetS = timeBudgetS),
+    `3` = MaxMin::Grasp(n, dmat, maxSeconds = maxSeconds),
     # Tier 4: exact solver returns a list; take its `$indices`.
     `4` = {
       if (nTrees > exactCeiling) {
         warning("Exact MMDP (effort = 4) on ", nTrees,
                 " trees may be very slow; consider effort = 2 (DropAdd) ",
-                "or 3 (Grasp), or a larger `timeBudgetS`.",
+                "or 3 (Grasp), or a larger `maxSeconds`.",
                 immediate. = TRUE)
       }
-      MaxMin::ExactMaxMin(dmat, m = n, timeBudgetS = timeBudgetS,
+      MaxMin::ExactMaxMin(n, dmat, maxSeconds = maxSeconds,
                           progress = FALSE)$indices
     }
   )
diff --git a/man/WideSample.Rd b/man/WideSample.Rd
index 494533f5b..ca089b3ad 100644
--- a/man/WideSample.Rd
+++ b/man/WideSample.Rd
@@ -9,7 +9,7 @@ WideSample(
   n,
   dist = TreeDist::ClusteringInfoDistance,
   effort = NULL,
-  timeBudgetS = 60
+  maxSeconds = 60
 )
 }
 \arguments{
@@ -39,7 +39,7 @@ with a \emph{distance function} on a tree set too large to build the matrix is
 an error rather than a silent downgrade; pass a pre-computed \code{dist} or use
 \code{effort = 1} for such sets.}
 
-\item{timeBudgetS}{Numeric: wall-clock budget, in seconds, for the
+\item{maxSeconds}{Numeric: wall-clock budget, in seconds, for the
 refinement (\code{effort = 2}, \code{3}) and exact (\code{effort = 4}) tiers; ignored by
 the matrix-free FarFirst tier, which is effectively instantaneous.  The
 heuristic tiers terminate at an internal deterministic plateau, usually
diff --git a/tests/testthat/test-WideSample.R b/tests/testthat/test-WideSample.R
index b6e0cc053..4fd002711 100644
--- a/tests/testthat/test-WideSample.R
+++ b/tests/testthat/test-WideSample.R
@@ -140,7 +140,7 @@ test_that("effort 1/2/3 return valid diverse subsets", {
   trees <- as.phylo(0:39, nTip = 10)
   set.seed(1)  # effort 3 (Grasp) draws on the session RNG
   for (eff in 1:3) {
-    res <- WideSample(trees, 6, effort = eff, timeBudgetS = 1)
+    res <- WideSample(trees, 6, effort = eff, maxSeconds = 1)
     expect_equal(length(res), 6)
     expect_true(all(names(res) %in% names(trees)))
   }
@@ -166,7 +166,7 @@ test_that("forced effort 4 above the exact ceiling warns about cost", {
   skip_on_cran()
   trees <- as.phylo(0:209, nTip = 8)  # > 200 trips the warning
   expect_warning(
-    WideSample(trees, 3, effort = 4, timeBudgetS = 1),
+    WideSample(trees, 3, effort = 4, maxSeconds = 1),
     "may be very slow"
   )
 })

From d824c2a59fbcbc47f406c4f9c4046d98eb8a547e Mon Sep 17 00:00:00 2001
From: R script <martin.smith@durham.ac.uk>
Date: Fri, 12 Jun 2026 14:13:44 +0100
Subject: [PATCH 3/4] -progress

---
 R/WideSample.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/WideSample.R b/R/WideSample.R
index f5b1c4f1d..b821a3653 100644
--- a/R/WideSample.R
+++ b/R/WideSample.R
@@ -243,7 +243,7 @@ WideSample <- function(
       } else {
         .WideSampleColumnOracle(dist, trees, nTrees)
       }
-      MaxMin::FarFirst(n, colFn, N = nTrees, progress = FALSE)
+      MaxMin::FarFirst(n, colFn, N = nTrees)
     },
     # Tier 2: DropAdd returns the bare (sorted) index vector; it runs to its
     # deterministic plateau, with `maxSeconds` as a safety cap.

From b527a0f30485d9b0077b26ba55e87571e18b1eca Mon Sep 17 00:00:00 2001
From: R script <martin.smith@durham.ac.uk>
Date: Fri, 12 Jun 2026 14:25:19 +0100
Subject: [PATCH 4/4] Suggest `highs` for MaxMin solver

---
 DESCRIPTION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index d0dc0a50f..0b054f6f1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -51,6 +51,7 @@ Imports:
 Suggests:
   cluster,
   future,
+  highs,
   knitr,
   phangorn (>= 2.2.1),
   PlotTools,