From f17c3c70514015dc92c4641ed5c09a7aeba8155d Mon Sep 17 00:00:00 2001
From: AllenWLynch <allen.lynch1@outlook.com>
Date: Mon, 13 Apr 2026 17:58:44 -0400
Subject: [PATCH 01/10] stabilize theta exp-offsets against inf/NaN drift

- sbs: max-subtract log_locus/log_context before exp in _get_exp_offsets_k_c
  and restore the shift on the scalar normalizer in log-space; fall back to
  the previous normalizer if the slice is still degenerate
- theta_model: clip raw predictions to [-30, 30] at the log_locus_distribution
  write point so exp() stays within float32
- context_model: clip _get_log_context_distribution to [-30, 30]
- optim: finite-check normalizers after every get_exp_offsets_dict call and
  re-enable init_normalizers so epoch 1 doesn't start from zero
---
 mutopia/modalities/sbs/model.py               | 55 ++++++++++++++++++-
 .../model/model_components/context_model.py   | 13 +++--
 .../model/model_components/theta_model.py     | 11 +++-
 mutopia/model/model/optim.py                  | 23 +++++++-
 4 files changed, 93 insertions(+), 9 deletions(-)

diff --git a/mutopia/modalities/sbs/model.py b/mutopia/modalities/sbs/model.py
index 739eb06..2cdb6ea 100644
--- a/mutopia/modalities/sbs/model.py
+++ b/mutopia/modalities/sbs/model.py
@@ -89,10 +89,63 @@ def _fast_exp_offsets(
 
 def _get_exp_offsets_k_c(factor_model, k, corpus):
 
+    # STABILITY: max-subtract in log-space before exp to avoid inf*0 -> NaN in the
+    # numba kernel. The Poisson M-step absorbs uniform offset rescaling via
+    # get_poisson_targets_weights, so we only have to undo the shift on the
+    # scalar normalizer (in log-space, where it cannot overflow).
+    state = corpus.sections["State"]
+    log_locus = np.ascontiguousarray(
+        state.log_locus_distribution.isel(component=k).data, dtype=np.float32
+    )
+    log_context = np.ascontiguousarray(
+        state.log_context_distribution.isel(component=k)
+        .transpose(..., "context")
+        .data,
+        dtype=np.float32,
+    )
+
+    finite_locus = log_locus[np.isfinite(log_locus)]
+    finite_context = log_context[np.isfinite(log_context)]
+    Lmax = np.float32(finite_locus.max()) if finite_locus.size else np.float32(0.0)
+    Cmax = np.float32(finite_context.max()) if finite_context.size else np.float32(0.0)
+
+    locus_effects = contig_f32(np.exp(log_locus - Lmax))
+    context_effects = np.exp(log_context - Cmax).astype(np.float32, copy=False)
+
+    context_freqs = (
+        corpus.sections["Regions"]
+        .context_frequencies.transpose("locus", "context", "configuration")
+        .data.reshape(-1, 192)
+    )
+    exposures = corpus.sections["Regions"].exposures.data
+    idx_selector = state.mesoscale_idx.data.T
+
     (normalizer, context_offsets, locus_offsets) = _fast_exp_offsets(
-        *_get_args(k, corpus)
+        context_freqs, exposures, locus_effects, context_effects, idx_selector,
     )
 
+    # STABILITY: restore the max-subtraction purely in log-space.
+    normalizer = float(normalizer) - float(Lmax) - float(Cmax)
+
+    # STABILITY: fall back to the previous normalizer instead of poisoning state
+    # if the slice was degenerate (e.g. empty normalizer, stale NaNs in inputs).
+    if not (
+        np.isfinite(normalizer)
+        and np.all(np.isfinite(locus_offsets))
+        and np.all(np.isfinite(context_offsets))
+    ):
+        logger.warning(
+            f"Non-finite exp-offsets for component {k} on corpus "
+            f"{factor_model.GT.get_name(corpus)}; falling back to previous normalizer."
+        )
+        normalizer = float(factor_model.get_normalizers(corpus)[k])
+        locus_offsets = np.nan_to_num(
+            locus_offsets, nan=0.0, posinf=0.0, neginf=0.0
+        )
+        context_offsets = np.nan_to_num(
+            context_offsets, nan=0.0, posinf=0.0, neginf=0.0
+        )
+
     return (
         normalizer,
         {"context_model": context_offsets, "theta_model": locus_offsets},
diff --git a/mutopia/model/model/model_components/context_model.py b/mutopia/model/model/model_components/context_model.py
index e0235a0..ccf89a3 100644
--- a/mutopia/model/model/model_components/context_model.py
+++ b/mutopia/model/model/model_components/context_model.py
@@ -333,10 +333,15 @@ def prepare_corpusstate(self, corpus):
         )
 
     def _get_log_context_distribution(self, corpus_state):
-        return np.array(
-            [self._format_component(k) for k in range(self.n_components)],
-            dtype=self.dtype,
-            order="C",
+        # STABILITY: clip so exp() of this distribution cannot overflow float32.
+        return np.clip(
+            np.array(
+                [self._format_component(k) for k in range(self.n_components)],
+                dtype=self.dtype,
+                order="C",
+            ),
+            -30.0,
+            30.0,
         )
 
     def update_corpusstate(self, corpus, **kwargs):
diff --git a/mutopia/model/model/model_components/theta_model.py b/mutopia/model/model/model_components/theta_model.py
index 1c46a83..2424352 100644
--- a/mutopia/model/model/model_components/theta_model.py
+++ b/mutopia/model/model/model_components/theta_model.py
@@ -158,8 +158,15 @@ def update_corpusstate(self, corpus, from_scratch=False):
         X = self._check_input(self._fetch_feature_matrix(corpus))
 
         for k in range(self.n_components):
-            CS.fetch_val(corpus, "log_locus_distribution")[k].data[:] = self._predict(
-                k, corpus, from_scratch=from_scratch, X=X, check_input=False
+            # STABILITY: clip raw theta predictions so exp() stays within float32.
+            # exp(30) ~ 1e13; with context_freqs ~ 1e-3 and ~3e5 loci the
+            # downstream product stays well under float32 max (~3.4e38).
+            CS.fetch_val(corpus, "log_locus_distribution")[k].data[:] = np.clip(
+                self._predict(
+                    k, corpus, from_scratch=from_scratch, X=X, check_input=False
+                ),
+                -30.0,
+                30.0,
             )
 
     """
diff --git a/mutopia/model/model/optim.py b/mutopia/model/model/optim.py
index 8afe858..0ce1eab 100644
--- a/mutopia/model/model/optim.py
+++ b/mutopia/model/model/optim.py
@@ -2,6 +2,7 @@
 from functools import partial
 from math import isnan
 import time
+import numpy as np  # STABILITY: for finite-checks on normalizers
 
 from mutopia.utils import logger, str_wrapped_list, timer_wrapper, ParContext
 from mutopia.gtensor import (
@@ -18,6 +19,19 @@
 from .gtensor_interface import GtensorInterface
 
 
+def _check_normalizers_finite(normalizers, where):
+    # STABILITY: fail fast with an actionable error instead of letting NaNs
+    # quietly poison factor_model._normalizers through _svi_update_fn.
+    for name, norm in normalizers.items():
+        if not np.all(np.isfinite(norm)):
+            raise ValueError(
+                f"Non-finite normalizer values in '{where}' for dataset '{name}': "
+                f"{norm}. This usually means exp-offsets produced inf/NaN — check "
+                "log_locus_distribution / log_context_distribution for drift, "
+                "raise regularization, or raise locus_subsample."
+            )
+
+
 def VI_step(
     factor_model: FactorModel,
     locals_model: LocalsModel,
@@ -36,9 +50,10 @@ def VI_step(
     )
 
     offsets, normalizers = factor_model.get_exp_offsets_dict(**args)
+    _check_normalizers_finite(normalizers, "VI_step")  # STABILITY
 
     """
-    In the previous "offsets" step, new normalizers were calculated. 
+    In the previous "offsets" step, new normalizers were calculated.
     Now we need to transer the normalizers to the full data set.
     """
     factor_model.update_normalizers(datasets, normalizers)
@@ -108,6 +123,7 @@ def SVI_step(
     Get the offsets from the sliced data.
     """
     offsets, normalizers = timer_wrapper(factor_model.get_exp_offsets_dict)(**args)
+    _check_normalizers_finite(normalizers, "SVI_step(slices)")  # STABILITY
 
     if full_normalizers:
         """
@@ -120,6 +136,7 @@ def SVI_step(
             datasets=datasets,
             par_context=par_context,
         )
+        _check_normalizers_finite(normalizers, "SVI_step(full)")  # STABILITY
 
         factor_model.update_normalizers(datasets, normalizers)
     else:
@@ -381,7 +398,9 @@ def fit_model(
 
     with ParContext(threads, verbose) as par:
 
-        # factor_model.init_normalizers(train_datasets, par_context=par)
+        # STABILITY: seed the normalizers from the full training data so early
+        # SVI epochs don't start from zero while theta predictions are non-trivial.
+        factor_model.init_normalizers(train_datasets, par_context=par)
 
         try:
             progress_bar = trange(

From d78a4f6f7d47717eda92d529b980660df700e776 Mon Sep 17 00:00:00 2001
From: AllenWLynch <all6739@compute-b-16-176.o2.rc.hms.harvard.edu>
Date: Mon, 27 Apr 2026 14:34:02 -0400
Subject: [PATCH 02/10] implemented scoring for unseen samples

---
 encode_analysis/Makefile                      | 65 ++++++++++++++++++-
 encode_analysis/base_config.yaml              |  4 +-
 mutopia/cli/model_cli.py                      | 50 ++++++++++++++
 mutopia/cli/model_core.py                     | 14 ++++
 mutopia/cli/pipeline_tasks.py                 | 15 ++++-
 mutopia/model/base.py                         | 60 ++++++++++++++++-
 .../model/model_components/theta_model.py     | 10 ++-
 mutopia/plot/signature_plot.py                |  2 +-
 8 files changed, 211 insertions(+), 9 deletions(-)

diff --git a/encode_analysis/Makefile b/encode_analysis/Makefile
index 6bef139..5cf4a95 100644
--- a/encode_analysis/Makefile
+++ b/encode_analysis/Makefile
@@ -140,7 +140,12 @@ studies/%/presets.02/: staged/%.train.nc staged/%.test.nc
 	topo-model study create $@ -ds $^ -lsub 0.2 -creg 0.00025 --save-model -ee -stop 100 $$PARAMS
 
 studies/%/presets.04/: staged/%.train.nc staged/%.test.nc
-	@mkdir -p studies/$*/presets.02
+	@mkdir -p studies/$*/presets.04
+	PARAMS=$$(python bin/params_from_config.py $*); \
+	topo-model study create $@ -ds $^ -lsub 0.2 --save-model -ee -stop 50 $$PARAMS
+
+studies/%/presets.05/: staged/%.train.nc staged/%.test.nc
+	@mkdir -p studies/$*/presets.05
 	PARAMS=$$(python bin/params_from_config.py $*); \
 	topo-model study create $@ -ds $^ -lsub 0.2 --save-model -ee -stop 50 $$PARAMS
 
@@ -331,9 +336,67 @@ analyses/%/base_annotation.nc:
 		--wait \
 		--wrap="python bin/base_annotation.py --model studies/$$STUDY_NAME/$$STUDY_ID/trial=$$MODEL_ID.pkl --base-gtensor gtensors/Lung-All.nc --output $@"
 
+# Alternative base annotations: impute mutation rates onto other tumor types' epigenomes.
+# Add new bases to ALT_BASES to extend coverage; each generates
+# analyses/%/base_annotations/<BASE>.nc via the shared recipe below.
+ALT_BASES := Breast-All Kidney-All
+
+define ALT_BASE_RECIPE
+	@mkdir -p $(dir $@)
+	@FULL_PATH="$*"; \
+	STUDY_NAME=$$(echo $$FULL_PATH | cut -d'/' -f1); \
+	STUDY_ID=$$(echo $$FULL_PATH | cut -d'/' -f2); \
+	MODEL_ID=$$(echo $$FULL_PATH | cut -d'/' -f3); \
+	BASE=$(notdir $(basename $@)); \
+	sbatch \
+		--mem=5G \
+		--cpus-per-task=5 \
+		--ntasks=1 \
+		--time=30:00 \
+		--job-name=$@ \
+		--output=%x.log \
+		--partition=short,park \
+		--account=park \
+		--wait \
+		--wrap="python bin/base_annotation.py \
+			--model studies/$$STUDY_NAME/$$STUDY_ID/trial=$$MODEL_ID.pkl \
+			--base-gtensor gtensors/$$BASE.nc \
+			--output $@"
+endef
+
+analyses/%/base_annotations/Breast-All.nc:
+	$(ALT_BASE_RECIPE)
+
+analyses/%/base_annotations/Kidney-All.nc:
+	$(ALT_BASE_RECIPE)
+
 joint_summary.tar.gz:
 	tar -cvf $@ $$(for model in $$(cat models.txt); do echo "analyses/$$model/annotated.shap.nc" "analyses/$$model/base_annotation.nc analyses/$$model/analysis.ipynb"; done) meta_analysis.ipynb models.txt
 
 joint_summary.doga.tar.gz:
 	tar -cvf $@ $$(for model in $$(cat models.txt); do echo "analyses/$$model/base_annotation.nc*.gz analyses/$$model/annotated.shap.nc"; done) analyses/joint_collection/*
 
+annotated.tar.gz:
+	tar -cvf $@ $$(for model in $$(cat models.txt); do echo "analyses/$$model/annotated.nc"; done)
+
+# Packages Lung-All base annotation (original), two alt-base annotations (Breast-All, Kidney-All),
+# and annotated.nc (same-tumor imputation — already contains component_distributions via annot_data).
+multi_base_annotations.tar.gz:
+	tar -cvf $@ $$(for model in $$(cat models.txt); do \
+		echo "analyses/$$model/base_annotation.nc"; \
+		echo "analyses/$$model/base_annotations/Breast-All.nc"; \
+		echo "analyses/$$model/base_annotations/Kidney-All.nc"; \
+		echo "analyses/$$model/annotated.nc"; \
+	done)
+
+# a model has the format "tumor_type/preset.04/model_id", and we need to add "studies/tumor_type/presets.04/trial=model_id.pkl" to the tarball for each model in models.txt.
+# let's also rename it to "tumor_type.model.pkl" in the tarball to avoid having a deep directory structure in the tarball.
+models.tar.gz:
+	mkdir -p temp_models && \
+	for model in $$(cat models.txt); do \
+		STUDY_NAME=$$(echo $$model | cut -d'/' -f1); \
+		MODEL_ID=$$(echo $$model | cut -d'/' -f3); \
+		cp studies/$$STUDY_NAME/presets.04/trial=$$MODEL_ID.pkl temp_models/$$STUDY_NAME.model.pkl; \
+	done && \
+	tar -cvf $@ -C temp_models . && \
+	rm -rf temp_models
diff --git a/encode_analysis/base_config.yaml b/encode_analysis/base_config.yaml
index 20db25f..d0990fb 100644
--- a/encode_analysis/base_config.yaml
+++ b/encode_analysis/base_config.yaml
@@ -3,7 +3,7 @@ min_region_size: 100
 region_size: 10000
 
 genome:
-  fasta: /Users/allen/genomes/hg38/hg38.fa
+  fasta: /n/data1/hms/dbmi/park/SOFTWARE/REFERENCE/hg38/cgap_matches/Homo_sapiens_assembly38.fa
   chromsizes: annotations/hg38.mainchroms.sizes
   mutation_rate_file: annotations/mutation-rate.hg38.bedgraph.gz
   blacklist: annotations/blacklist_method12_v1_comb_sort_merged.bed.gz
@@ -16,4 +16,4 @@ pipeline_params:
   repeat_masker_fraction: annotations/hg38.repeat_masker_fraction.bed.gz
 
 sample_params:
-  cluster: false
\ No newline at end of file
+  cluster: true
\ No newline at end of file
diff --git a/mutopia/cli/model_cli.py b/mutopia/cli/model_cli.py
index 74ddbf8..11dcd66 100644
--- a/mutopia/cli/model_cli.py
+++ b/mutopia/cli/model_cli.py
@@ -801,6 +801,56 @@ def annot(
     annot(model, dataset, output, region=region, threads=threads, calc_shap=calc_shap, celltype=celltype)
 
 
+@model.command("score", short_help="Score model on held-out loci")
+@click.argument("model", type=click.Path(exists=True), metavar="MODEL_FILE")
+@click.argument("dataset", type=click.Path(exists=True), metavar="DATASET_FILE")
+@click.option(
+    "--test-chrom",
+    type=str,
+    multiple=True,
+    default=("chr2",),
+    help="Chromosome(s) to hold out for testing (default: chr2)",
+)
+@click.option(
+    "-@",
+    "--threads",
+    type=click.IntRange(1, 1000),
+    default=1,
+    help="Number of parallel threads",
+)
+def score(
+    model: str,
+    dataset: str,
+    test_chrom: tuple,
+    threads: int = 1,
+):
+    """
+    Score a trained model on a dataset using cross-validation by locus.
+
+    Fits per-sample local variables on non-held-out chromosomes, then
+    evaluates reconstruction quality (pseudo-R²) on the held-out chromosomes.
+
+    Examples:
+        # Score with default held-out chromosome (chr2)
+        model score trained_model.pkl data.nc
+
+        # Hold out multiple chromosomes
+        model score trained_model.pkl data.nc --test-chrom chr1 --test-chrom chr2
+
+        # Score with parallel threads
+        model score trained_model.pkl data.nc --threads 8
+    """
+    from .model_core import score_model
+
+    result = score_model(
+        model_path=model,
+        dataset_path=dataset,
+        test_chroms=test_chrom,
+        threads=threads,
+    )
+    click.echo(f"{result:.6f}")
+
+
 @model.command("add-model-state", short_help="Add model state to dataset")
 @click.argument("model", type=click.Path(exists=True), metavar="MODEL_FILE")
 @click.argument("dataset", type=click.Path(exists=True), metavar="DATASET_FILE")
diff --git a/mutopia/cli/model_core.py b/mutopia/cli/model_core.py
index f4c6c9c..475f3f6 100644
--- a/mutopia/cli/model_core.py
+++ b/mutopia/cli/model_core.py
@@ -116,6 +116,20 @@ def add_model_state(model_path: str, dataset_path: str, output_path: str):
     disk.write_dataset(dataset, output_path)
 
 
+def score_model(
+    model_path: str,
+    dataset_path: str,
+    test_chroms: tuple = ("chr2",),
+    threads: int = 1,
+) -> float:
+    import mutopia.analysis as mu
+
+    model = mu.load_model(model_path)
+    dataset = gt.eager_load(dataset_path)
+
+    return model.score(dataset, test_chroms=test_chroms, threads=threads)
+
+
 def simulate_from_model(
     model_path: str,
     dataset_path: str,
diff --git a/mutopia/cli/pipeline_tasks.py b/mutopia/cli/pipeline_tasks.py
index 8df53df..1a591c0 100644
--- a/mutopia/cli/pipeline_tasks.py
+++ b/mutopia/cli/pipeline_tasks.py
@@ -14,7 +14,7 @@
     list_samples,
 )
 from .pipeline_config import GTensorConfig, ProcessingConfig, FeatureConfig
-from urllib.parse import urlparse
+from urllib.parse import urlparse, parse_qs, unquote
 import shutil
 from mutopia.cli.gensor_core import (
     create_gtensor,
@@ -93,6 +93,16 @@ def download_path(self):
         """Determine the local path to download the file to."""
         parsed_url = urlparse(self.url)
         filename = os.path.basename(parsed_url.path)
+        # For URLs with query-parameter filenames (e.g. GEO/NCBI), extract
+        # the filename from the 'file' query param if the path yields nothing.
+        if not filename or filename == "download" or filename == "download/":
+            query_file = parse_qs(parsed_url.query).get("file", [None])[0]
+            if query_file:
+                filename = unquote(query_file)
+        if not filename:
+            # Last resort: hash the URL for a stable unique name
+            import hashlib
+            filename = hashlib.md5(self.url.encode()).hexdigest()
         return os.path.join("gtensor__tempfiles/downloads", filename)
 
     def output(self):
@@ -401,7 +411,8 @@ def run(self):
         params.pop("file", None)  # Remove file as it's not needed here
         logger.info(
             f"Ingesting sample '{self.sample_id}' from file '{file_path}' "
-            f"into GTensor '{gtensor_path}'"
+            f"into GTensor '{gtensor_path}'. "
+            f"Clustering will be performed: {params['cluster']}. "
         )
         add_sample(**params)
 
diff --git a/mutopia/model/base.py b/mutopia/model/base.py
index 922a840..0e1e725 100644
--- a/mutopia/model/base.py
+++ b/mutopia/model/base.py
@@ -420,7 +420,6 @@ def init_model(self, train_datasets: Sequence[GTensorDataset]) -> TopographyMode
 
         return self
 
-
     def fit(
         self,
         train_datasets: Union[GTensorDataset, Sequence[GTensorDataset]],
@@ -576,6 +575,65 @@ def save(self, path: str) -> None:
 
         dump(self, path)
 
+    def score(
+        self,
+        dataset: "GTensorDataset",
+        test_chroms: Sequence[str] = ("chr2",),
+        threads: int = 1,
+    ) -> float:
+        """
+        Score the model on a dataset using cross-validation by locus.
+
+        Splits the dataset by chromosome into train and test partitions,
+        fits local variables (per-sample component contributions) on the
+        train loci, then evaluates reconstruction quality on the held-out
+        test loci using those fitted locals.
+
+        Parameters
+        ----------
+        dataset : GTensorDataset
+            The dataset to score.
+        test_chroms : sequence of str, default=("chr2",)
+            Chromosomes to hold out for testing.
+        threads : int, default=1
+            Number of parallel threads.
+
+        Returns
+        -------
+        float
+            Pseudo-R² score (1 - deviance_fit / deviance_null) on test loci.
+        """
+        self._check_corpus(dataset)
+
+        train_loci, test_loci = train_test_split(dataset, *test_chroms)
+
+        train_loci = self.setup_corpus(train_loci, threads=threads)
+        test_loci = self.setup_corpus(test_loci, threads=threads)
+
+        # Fit local variables on train loci
+        sample_names = list(train_loci.list_samples())
+        contributions = self.locals_model_.predict(
+            train_loci, self.factor_model_, threads=threads
+        )
+        sample_idx = {name: i for i, name in enumerate(sample_names)}
+
+        # Build exposures function from fitted contributions
+        def exposures_fn(dataset, sample_name):
+            return (
+                contributions.isel(sample=sample_idx[sample_name])
+                .transpose(..., "component")
+                .data.ravel()
+            )
+
+        # Score on test loci using train-fitted locals
+        with ParContext(threads) as par:
+            return self.locals_model_.score(
+                self.factor_model_,
+                [test_loci],
+                exposures_fn=exposures_fn,
+                par_context=par,
+            )
+
     def annot_components(self, dataset: GTensorDataset, normalization: str = "global") -> GTensorDataset:
 
         spectra = xr.concat(
diff --git a/mutopia/model/model/model_components/theta_model.py b/mutopia/model/model/model_components/theta_model.py
index 1c46a83..92f1b23 100644
--- a/mutopia/model/model/model_components/theta_model.py
+++ b/mutopia/model/model/model_components/theta_model.py
@@ -25,6 +25,8 @@
     StratifiedTransformer,
 )
 
+def _get_corpus_dim(corpus):
+    return CS.get_dims(corpus)["locus"]
 
 class ThetaModel(RateModel, SparseDataBase, DenseDataBase):
 
@@ -177,7 +179,7 @@ def _get_intercept_idx(self, *corpuses):
         return get_corpus_intercepts(
             corpuses,
             self.corpus_encoder_,
-            n_repeats=lambda corpus: CS.get_dims(corpus)["locus"],
+            n_repeats=_get_corpus_dim,
         )
 
     def _get_intercept_design(self, *corpuses):
@@ -250,12 +252,16 @@ def reduce_dense_sstats(self, sstats, corpus, *, weighted_posterior, **kw):
 
     def format_component(self, k, normalization="none"):
         return 0.0
+    
 
 
+def _feature_name_combiner(feature_name, category):
+    return f"{feature_name}:{category}"
+
 class LinearThetaModel(ThetaModel):
 
     categorical_encoder = partial(OneHotEncoder, sparse_output=False, drop="first", 
-                                  feature_name_combiner=lambda fname, cat: f"{fname}:{cat}")
+                                  feature_name_combiner=_feature_name_combiner)
 
     def __init__(self, corpuses, l2_regularization=0.1, **kw):
         super().__init__(
diff --git a/mutopia/plot/signature_plot.py b/mutopia/plot/signature_plot.py
index 2cb4750..85f2174 100644
--- a/mutopia/plot/signature_plot.py
+++ b/mutopia/plot/signature_plot.py
@@ -50,7 +50,7 @@ def _get_color(i):
 
     for i, (label, s) in enumerate(signatures.items()):
         ax.bar(
-            height=[v / extent for v in s],
+            height=[v / 1 for v in s],
             x=range(i, n_bars, n_sigs),
             color=_get_color(i),
             **plot_kw,

From c73656ed393c8c5b63800fc61f4a8b8ea34ad7a2 Mon Sep 17 00:00:00 2001
From: AllenWLynch <all6739@compute-b-16-176.o2.rc.hms.harvard.edu>
Date: Mon, 27 Apr 2026 15:43:41 -0400
Subject: [PATCH 03/10] fix model pickling: rename duplicate accessors, drop
 training data from saved state

Pickling a TopographyModel was failing with "_pickle.PicklingError: Can't
pickle <class 'mutopia.gtensor.xarr_extensions.FetchSample'>: it's not the
same object as mutopia.gtensor.xarr_extensions.FetchSample".

Two compounding issues:

1. xarr_extensions.py defined four classes named FetchSample (for
   list_samples, mutate, fetch_sample, iter_samples) and two named AsCSR
   (for ascsr and asdense). Only the last definition is bound to the
   module name, so pickle's class-identity check failed for any of the
   shadowed classes once xarray had cached an accessor instance on a
   Dataset. Renamed each to a unique class name.

2. The Optuna trial callback in tuning.py was a partial that captured
   train[0] as an unused kwarg. The partial was stored on the model via
   set_params(callback=...) and pickled along with it, dragging the whole
   training dataset (including any cached accessors) into the saved
   state. Dropped the unused capture and the dead code that referenced
   it.

Also added TopographyModel.__getstate__ to null out `callback` before
pickling, so any future closures captured by training-loop hooks cannot
pin training data into a saved model.
---
 mutopia/gtensor/xarr_extensions.py |  8 ++++----
 mutopia/model/base.py              |  9 +++++++++
 mutopia/tuning.py                  | 11 ++---------
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/mutopia/gtensor/xarr_extensions.py b/mutopia/gtensor/xarr_extensions.py
index 787cf11..48340e4 100644
--- a/mutopia/gtensor/xarr_extensions.py
+++ b/mutopia/gtensor/xarr_extensions.py
@@ -193,7 +193,7 @@ def __call__(self, *compress_dims: typing.List[str]):
 
 
 @xr.register_dataarray_accessor("asdense")
-class AsCSR(BaseAccessor):
+class AsDense(BaseAccessor):
     def __call__(self):
         try:
             self._xrds.data = self._xrds.data.todense()
@@ -209,13 +209,13 @@ def __call__(self):
 
 
 @xr.register_dataset_accessor("list_samples")
-class FetchSample(BaseAccessor):
+class ListSamples(BaseAccessor):
     def __call__(self):
         return self._xrds.sample.values
 
 
 @xr.register_dataset_accessor("mutate")
-class FetchSample(BaseAccessor):
+class Mutate(BaseAccessor):
     def __call__(self, fn):
         return fn(self._xrds)
 
@@ -228,7 +228,7 @@ def __call__(self, sample_name):
 
 
 @xr.register_dataset_accessor("iter_samples")
-class FetchSample(BaseAccessor):
+class IterSamples(BaseAccessor):
     def __call__(self, subset=None):
         load_samples = subset or self._xrds.list_samples()
         for sample_name in load_samples:
diff --git a/mutopia/model/base.py b/mutopia/model/base.py
index 3e1ad01..130475e 100644
--- a/mutopia/model/base.py
+++ b/mutopia/model/base.py
@@ -556,6 +556,15 @@ def setup_corpus(self, dataset: GTensorDataset, threads: int = 1) -> GTensorData
         logger.info("Done ...")
         return dataset
 
+    def __getstate__(self) -> Dict[str, Any]:
+        # The callback is a training-loop hook (e.g. an Optuna pruning callback)
+        # that frequently closes over the training datasets. It has no role in
+        # the trained model's state and must not pin training data into the
+        # pickle.
+        state = self.__dict__.copy()
+        state["callback"] = None
+        return state
+
     def save(self, path: str) -> None:
         """
         Save the model to a file.
diff --git a/mutopia/tuning.py b/mutopia/tuning.py
index ec74ae6..ed145db 100644
--- a/mutopia/tuning.py
+++ b/mutopia/tuning.py
@@ -235,7 +235,7 @@ def load_study_data(study, lazy=False):
 
     return train, test
 
-def _model_report_callback(trial, factor_model, epoch, test_scores,*, data):
+def _model_report_callback(trial, factor_model, epoch, test_scores):
     """
     Callback function for reporting trial progress to Optuna.
 
@@ -265,13 +265,6 @@ def _model_report_callback(trial, factor_model, epoch, test_scores,*, data):
         if trial.should_prune():
             raise optuna.TrialPruned()
 
-    #locals = factor_model.GT.fetch_locals(data)
-    #print("EPOCH ", epoch, ": VARIANCE=", var(locals.values))
-    #path = os.path.join("generated/studies/healthy.length.full.03", f"trial-{trial.number}_locals", f"epoch-{epoch}.csv")
-    #os.makedirs(os.path.dirname(path), exist_ok=True)
-    #print(f"Saving locals to {path}")
-    #locals.to_dataframe().to_csv(path)
-
 
 def _get_save_model_fn(study):
     """
@@ -378,7 +371,7 @@ def _objective(
         + "\n\t".join([f"{key}: {value}" for key, value in params.items()])
     )
 
-    callback = partial(_model_report_callback, trial, data=train[0])
+    callback = partial(_model_report_callback, trial)
 
     model.set_params(
         **params,

From 38d3573e00c25540a23e8ee3a128e42b6e3b8caf Mon Sep 17 00:00:00 2001
From: AllenWLynch <allen.lynch1@outlook.com>
Date: Tue, 28 Apr 2026 08:19:25 -0400
Subject: [PATCH 04/10] revert defensive nan/inf guards in exp-offsets hot path

Source of model instability has been identified, so the per-iteration
finite checks, log-space max-subtract, and clip(-30, 30) added in
f17c3c7 are no longer needed and add overhead in the offsets path.
---
 mutopia/modalities/sbs/model.py               | 55 +------------------
 .../model/model_components/context_model.py   | 13 ++---
 .../model/model_components/theta_model.py     | 11 +---
 mutopia/model/model/optim.py                  | 17 ------
 4 files changed, 7 insertions(+), 89 deletions(-)

diff --git a/mutopia/modalities/sbs/model.py b/mutopia/modalities/sbs/model.py
index 2cdb6ea..739eb06 100644
--- a/mutopia/modalities/sbs/model.py
+++ b/mutopia/modalities/sbs/model.py
@@ -89,63 +89,10 @@ def _fast_exp_offsets(
 
 def _get_exp_offsets_k_c(factor_model, k, corpus):
 
-    # STABILITY: max-subtract in log-space before exp to avoid inf*0 -> NaN in the
-    # numba kernel. The Poisson M-step absorbs uniform offset rescaling via
-    # get_poisson_targets_weights, so we only have to undo the shift on the
-    # scalar normalizer (in log-space, where it cannot overflow).
-    state = corpus.sections["State"]
-    log_locus = np.ascontiguousarray(
-        state.log_locus_distribution.isel(component=k).data, dtype=np.float32
-    )
-    log_context = np.ascontiguousarray(
-        state.log_context_distribution.isel(component=k)
-        .transpose(..., "context")
-        .data,
-        dtype=np.float32,
-    )
-
-    finite_locus = log_locus[np.isfinite(log_locus)]
-    finite_context = log_context[np.isfinite(log_context)]
-    Lmax = np.float32(finite_locus.max()) if finite_locus.size else np.float32(0.0)
-    Cmax = np.float32(finite_context.max()) if finite_context.size else np.float32(0.0)
-
-    locus_effects = contig_f32(np.exp(log_locus - Lmax))
-    context_effects = np.exp(log_context - Cmax).astype(np.float32, copy=False)
-
-    context_freqs = (
-        corpus.sections["Regions"]
-        .context_frequencies.transpose("locus", "context", "configuration")
-        .data.reshape(-1, 192)
-    )
-    exposures = corpus.sections["Regions"].exposures.data
-    idx_selector = state.mesoscale_idx.data.T
-
     (normalizer, context_offsets, locus_offsets) = _fast_exp_offsets(
-        context_freqs, exposures, locus_effects, context_effects, idx_selector,
+        *_get_args(k, corpus)
     )
 
-    # STABILITY: restore the max-subtraction purely in log-space.
-    normalizer = float(normalizer) - float(Lmax) - float(Cmax)
-
-    # STABILITY: fall back to the previous normalizer instead of poisoning state
-    # if the slice was degenerate (e.g. empty normalizer, stale NaNs in inputs).
-    if not (
-        np.isfinite(normalizer)
-        and np.all(np.isfinite(locus_offsets))
-        and np.all(np.isfinite(context_offsets))
-    ):
-        logger.warning(
-            f"Non-finite exp-offsets for component {k} on corpus "
-            f"{factor_model.GT.get_name(corpus)}; falling back to previous normalizer."
-        )
-        normalizer = float(factor_model.get_normalizers(corpus)[k])
-        locus_offsets = np.nan_to_num(
-            locus_offsets, nan=0.0, posinf=0.0, neginf=0.0
-        )
-        context_offsets = np.nan_to_num(
-            context_offsets, nan=0.0, posinf=0.0, neginf=0.0
-        )
-
     return (
         normalizer,
         {"context_model": context_offsets, "theta_model": locus_offsets},
diff --git a/mutopia/model/model/model_components/context_model.py b/mutopia/model/model/model_components/context_model.py
index ccf89a3..e0235a0 100644
--- a/mutopia/model/model/model_components/context_model.py
+++ b/mutopia/model/model/model_components/context_model.py
@@ -333,15 +333,10 @@ def prepare_corpusstate(self, corpus):
         )
 
     def _get_log_context_distribution(self, corpus_state):
-        # STABILITY: clip so exp() of this distribution cannot overflow float32.
-        return np.clip(
-            np.array(
-                [self._format_component(k) for k in range(self.n_components)],
-                dtype=self.dtype,
-                order="C",
-            ),
-            -30.0,
-            30.0,
+        return np.array(
+            [self._format_component(k) for k in range(self.n_components)],
+            dtype=self.dtype,
+            order="C",
         )
 
     def update_corpusstate(self, corpus, **kwargs):
diff --git a/mutopia/model/model/model_components/theta_model.py b/mutopia/model/model/model_components/theta_model.py
index d74b494..92f1b23 100644
--- a/mutopia/model/model/model_components/theta_model.py
+++ b/mutopia/model/model/model_components/theta_model.py
@@ -160,15 +160,8 @@ def update_corpusstate(self, corpus, from_scratch=False):
         X = self._check_input(self._fetch_feature_matrix(corpus))
 
         for k in range(self.n_components):
-            # STABILITY: clip raw theta predictions so exp() stays within float32.
-            # exp(30) ~ 1e13; with context_freqs ~ 1e-3 and ~3e5 loci the
-            # downstream product stays well under float32 max (~3.4e38).
-            CS.fetch_val(corpus, "log_locus_distribution")[k].data[:] = np.clip(
-                self._predict(
-                    k, corpus, from_scratch=from_scratch, X=X, check_input=False
-                ),
-                -30.0,
-                30.0,
+            CS.fetch_val(corpus, "log_locus_distribution")[k].data[:] = self._predict(
+                k, corpus, from_scratch=from_scratch, X=X, check_input=False
             )
 
     """
diff --git a/mutopia/model/model/optim.py b/mutopia/model/model/optim.py
index 0ce1eab..1572915 100644
--- a/mutopia/model/model/optim.py
+++ b/mutopia/model/model/optim.py
@@ -2,7 +2,6 @@
 from functools import partial
 from math import isnan
 import time
-import numpy as np  # STABILITY: for finite-checks on normalizers
 
 from mutopia.utils import logger, str_wrapped_list, timer_wrapper, ParContext
 from mutopia.gtensor import (
@@ -19,19 +18,6 @@
 from .gtensor_interface import GtensorInterface
 
 
-def _check_normalizers_finite(normalizers, where):
-    # STABILITY: fail fast with an actionable error instead of letting NaNs
-    # quietly poison factor_model._normalizers through _svi_update_fn.
-    for name, norm in normalizers.items():
-        if not np.all(np.isfinite(norm)):
-            raise ValueError(
-                f"Non-finite normalizer values in '{where}' for dataset '{name}': "
-                f"{norm}. This usually means exp-offsets produced inf/NaN — check "
-                "log_locus_distribution / log_context_distribution for drift, "
-                "raise regularization, or raise locus_subsample."
-            )
-
-
 def VI_step(
     factor_model: FactorModel,
     locals_model: LocalsModel,
@@ -50,7 +36,6 @@ def VI_step(
     )
 
     offsets, normalizers = factor_model.get_exp_offsets_dict(**args)
-    _check_normalizers_finite(normalizers, "VI_step")  # STABILITY
 
     """
     In the previous "offsets" step, new normalizers were calculated.
@@ -123,7 +108,6 @@ def SVI_step(
     Get the offsets from the sliced data.
     """
     offsets, normalizers = timer_wrapper(factor_model.get_exp_offsets_dict)(**args)
-    _check_normalizers_finite(normalizers, "SVI_step(slices)")  # STABILITY
 
     if full_normalizers:
         """
@@ -136,7 +120,6 @@ def SVI_step(
             datasets=datasets,
             par_context=par_context,
         )
-        _check_normalizers_finite(normalizers, "SVI_step(full)")  # STABILITY
 
         factor_model.update_normalizers(datasets, normalizers)
     else:

From bf160ebd208b79c100eb9ca84b1455af1d6fcfff Mon Sep 17 00:00:00 2001
From: AllenWLynch <allen.lynch1@outlook.com>
Date: Tue, 28 Apr 2026 08:28:42 -0400
Subject: [PATCH 05/10] restore signature plot bar normalization

`v / 1` was a debugging hack accidentally committed in d78a4f6;
restored to `v / extent` so bars normalize to the global max and
fit within the [0, 1.1] ylim.
---
 mutopia/plot/signature_plot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mutopia/plot/signature_plot.py b/mutopia/plot/signature_plot.py
index 85f2174..2cb4750 100644
--- a/mutopia/plot/signature_plot.py
+++ b/mutopia/plot/signature_plot.py
@@ -50,7 +50,7 @@ def _get_color(i):
 
     for i, (label, s) in enumerate(signatures.items()):
         ax.bar(
-            height=[v / 1 for v in s],
+            height=[v / extent for v in s],
             x=range(i, n_bars, n_sigs),
             color=_get_color(i),
             **plot_kw,

From 7c5ac731d4c6529accb9411a947acc1215b74eae Mon Sep 17 00:00:00 2001
From: AllenWLynch <allen.lynch1@outlook.com>
Date: Tue, 28 Apr 2026 09:41:17 -0400
Subject: [PATCH 06/10] add pytest test suite with fixture-based regression
 tests

- 31 tests covering inference, gtensor ops, save/load, plotting, and CLI
- fixtures hosted on sigscape/MuTopia 'test-fixtures' release; conftest
  downloads on first use and caches under tests/fixtures/
- tests/build_chr22_fixture.py regenerates fixtures from tutorial data
- tests.yml workflow runs on push and PR
- publish.yml gated on tests passing before PyPI upload
- joblib added as explicit dependency (was transitive via sklearn)
---
 .github/workflows/publish.yml |  16 +++++
 .github/workflows/tests.yml   |  32 +++++++++
 setup.cfg                     |   1 +
 tests/__init__.py             |   0
 tests/build_chr22_fixture.py  |  96 ++++++++++++++++++++++++++
 tests/conftest.py             |  90 ++++++++++++++++++++++++
 tests/fixtures/.gitignore     |   2 +
 tests/test_cli.py             |  48 +++++++++++++
 tests/test_gtensor_ops.py     | 112 ++++++++++++++++++++++++++++++
 tests/test_inference.py       | 124 ++++++++++++++++++++++++++++++++++
 tests/test_plotting.py        |  48 +++++++++++++
 11 files changed, 569 insertions(+)
 create mode 100644 .github/workflows/tests.yml
 create mode 100644 tests/__init__.py
 create mode 100644 tests/build_chr22_fixture.py
 create mode 100644 tests/conftest.py
 create mode 100644 tests/fixtures/.gitignore
 create mode 100644 tests/test_cli.py
 create mode 100644 tests/test_gtensor_ops.py
 create mode 100644 tests/test_inference.py
 create mode 100644 tests/test_plotting.py

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 2bf14d2..94b6b9e 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -17,7 +17,23 @@ on:
   workflow_dispatch:
 
 jobs:
+  tests:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: pip
+      - run: pip install -e '.[testing]'
+      - uses: actions/cache@v4
+        with:
+          path: tests/fixtures
+          key: test-fixtures
+      - run: pytest tests/ --tb=short
+
   build:
+    needs: tests
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 0000000..e3f0b37
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,32 @@
+name: Tests
+
+on:
+  push:
+    branches: [main, imports]
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  pytest:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: pip
+
+      - name: Install package
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e '.[testing]'
+
+      - name: Cache test fixtures
+        uses: actions/cache@v4
+        with:
+          path: tests/fixtures
+          key: test-fixtures
+
+      - name: Run pytest
+        run: pytest tests/ -v --tb=short
diff --git a/setup.cfg b/setup.cfg
index c2274e6..b94c357 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -24,6 +24,7 @@ install_requires =
     optuna>=3.4.0,<5
     pandas>=2.1.0
     sparse>=0.15.4
+    joblib>=1.2
     numba>=0.60.0,<1
     netCDF4>=1.7.1.post2,<2
     pyfaidx
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/build_chr22_fixture.py b/tests/build_chr22_fixture.py
new file mode 100644
index 0000000..f802368
--- /dev/null
+++ b/tests/build_chr22_fixture.py
@@ -0,0 +1,96 @@
+"""
+One-shot script to produce the small fixture artifacts used by the test suite.
+
+Slices the full Liver tutorial gtensor down to chr22 (train) + chr1 (test),
+keeps a small sample subset, and copies the existing tutorial-trained model
+as the inference baseline. Outputs land in tests/fixtures/ and are intended
+to be uploaded to a GitHub release; CI downloads them via tests/conftest.py.
+
+Run from repo root:
+
+    python tests/build_chr22_fixture.py
+
+Re-run only when the input gtensors or the schema changes.
+"""
+
+from __future__ import annotations
+
+import argparse
+import shutil
+from pathlib import Path
+
+import mutopia.analysis as mu
+
+REPO = Path(__file__).resolve().parents[1]
+TUTORIAL = REPO / "tutorials" / "tutorial_data"
+OUT = REPO / "tests" / "fixtures"
+
+SOURCE_TRAIN = TUTORIAL / "Liver.train.nc"
+SOURCE_TEST = TUTORIAL / "Liver.test.nc"
+SOURCE_MODEL = TUTORIAL / "trained_model.pkl"
+
+TRAIN_REGION = "chr22"
+# A 50 Mb slice of chr1 to keep the held-out fixture small (~5 MB instead of ~35 MB).
+TEST_REGION = "chr1:0-50000000"
+TRAIN_LABEL = "chr22"
+TEST_LABEL = "chr1_50mb"
+N_SAMPLES = 10
+
+
+def build(n_samples: int = N_SAMPLES) -> None:
+    OUT.mkdir(parents=True, exist_ok=True)
+
+    for src in (SOURCE_TRAIN, SOURCE_TEST, SOURCE_MODEL):
+        if not src.exists():
+            raise FileNotFoundError(
+                f"Missing source: {src}. Fetch tutorial_data first (see tutorials/)."
+            )
+
+    print(f"[build] loading {SOURCE_TRAIN.name}")
+    train_full = mu.gt.eager_load(str(SOURCE_TRAIN))
+
+    sample_names = list(train_full.list_samples())[:n_samples]
+    print(f"[build] keeping {len(sample_names)} of {len(list(train_full.list_samples()))} samples")
+
+    print(f"[build] slicing train to {TRAIN_REGION}")
+    train_chr = mu.gt.slice_regions(train_full, TRAIN_REGION)
+    train_chr = mu.gt.slice_samples(train_chr, sample_names)
+
+    train_out = OUT / f"liver.{TRAIN_LABEL}.train.nc"
+    print(f"[build] writing {train_out}")
+    mu.gt.write_dataset(train_chr, str(train_out), write_samples=True)
+
+    print(f"[build] loading {SOURCE_TEST.name}")
+    test_full = mu.gt.eager_load(str(SOURCE_TEST))
+    print(f"[build] slicing test to {TEST_REGION}")
+    test_chr = mu.gt.slice_regions(test_full, TEST_REGION)
+    test_chr = mu.gt.slice_samples(test_chr, sample_names)
+
+    test_out = OUT / f"liver.{TEST_LABEL}.test.nc"
+    print(f"[build] writing {test_out}")
+    mu.gt.write_dataset(test_chr, str(test_out), write_samples=True)
+
+    model_out = OUT / "liver.trained_model.pkl"
+    print(f"[build] copying {SOURCE_MODEL.name} -> {model_out.name}")
+    shutil.copy(SOURCE_MODEL, model_out)
+
+    print()
+    print("[build] done. fixtures:")
+    for p in sorted(OUT.glob("*")):
+        print(f"  {p.relative_to(REPO)}  ({p.stat().st_size / 1e6:.2f} MB)")
+    print()
+    print("[build] upload these to a release on sigscape/MuTopia tagged")
+    print("        e.g. test-fixtures-v1, then update FIXTURE_RELEASE_TAG in")
+    print("        tests/conftest.py and the cache key in .github/workflows/tests.yml.")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--n-samples",
+        type=int,
+        default=N_SAMPLES,
+        help=f"Number of samples to retain (default: {N_SAMPLES})",
+    )
+    args = parser.parse_args()
+    build(n_samples=args.n_samples)
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..eeba936
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,90 @@
+"""
+Shared pytest fixtures.
+
+Test data lives outside the repo (in a GitHub release) so the tree stays small.
+On first use, the conftest downloads each fixture into tests/fixtures/ and caches
+it. To regenerate the fixtures from scratch, see tests/build_chr22_fixture.py.
+"""
+
+from __future__ import annotations
+
+import os
+import urllib.request
+from pathlib import Path
+
+import pytest
+
+FIXTURE_DIR = Path(__file__).parent / "fixtures"
+
+# Update both the tag and the asset list when fixtures are rebuilt.
+FIXTURE_RELEASE_TAG = "test-fixtures"
+FIXTURE_BASE_URL = (
+    f"https://github.com/sigscape/MuTopia/releases/download/{FIXTURE_RELEASE_TAG}"
+)
+FIXTURE_FILES = (
+    "liver.chr22.train.nc",
+    "liver.chr1_50mb.test.nc",
+    "liver.trained_model.pkl",
+)
+
+
+def _ensure_fixture(name: str) -> Path:
+    path = FIXTURE_DIR / name
+    if path.exists():
+        return path
+
+    FIXTURE_DIR.mkdir(parents=True, exist_ok=True)
+    url = f"{FIXTURE_BASE_URL}/{name}"
+    print(f"[fixtures] downloading {name} from {url}")
+    try:
+        urllib.request.urlretrieve(url, path)
+    except Exception as e:
+        if path.exists():
+            path.unlink()
+        pytest.skip(
+            f"Could not download fixture {name} from {url} ({e}). "
+            f"Either run tests/build_chr22_fixture.py locally or upload "
+            f"the fixtures to the {FIXTURE_RELEASE_TAG} release."
+        )
+    return path
+
+
+@pytest.fixture(scope="session")
+def fixture_dir() -> Path:
+    return FIXTURE_DIR
+
+
+@pytest.fixture(scope="session")
+def train_nc_path() -> Path:
+    return _ensure_fixture("liver.chr22.train.nc")
+
+
+@pytest.fixture(scope="session")
+def test_nc_path() -> Path:
+    return _ensure_fixture("liver.chr1_50mb.test.nc")
+
+
+@pytest.fixture(scope="session")
+def model_pkl_path() -> Path:
+    return _ensure_fixture("liver.trained_model.pkl")
+
+
+@pytest.fixture(scope="session")
+def trained_model(model_pkl_path: Path):
+    import mutopia.analysis as mu
+
+    return mu.load_model(str(model_pkl_path))
+
+
+@pytest.fixture(scope="session")
+def train_dataset(train_nc_path: Path):
+    import mutopia.analysis as mu
+
+    return mu.gt.eager_load(str(train_nc_path))
+
+
+@pytest.fixture(scope="session")
+def test_dataset(test_nc_path: Path):
+    import mutopia.analysis as mu
+
+    return mu.gt.eager_load(str(test_nc_path))
diff --git a/tests/fixtures/.gitignore b/tests/fixtures/.gitignore
new file mode 100644
index 0000000..66f6e33
--- /dev/null
+++ b/tests/fixtures/.gitignore
@@ -0,0 +1,2 @@
+*.nc
+*.pkl
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 0000000..bb3f397
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,48 @@
+"""
+CLI smoke tests.
+
+These shell out to the installed entry points and check exit codes.
+They verify packaging / command registration, not numerical behavior.
+"""
+
+from __future__ import annotations
+
+import subprocess
+
+
+def _run(cmd: list[str]) -> subprocess.CompletedProcess:
+    return subprocess.run(cmd, capture_output=True, text=True, timeout=120)
+
+
+def test_gtensor_help():
+    r = _run(["gtensor", "--help"])
+    assert r.returncode == 0, r.stderr
+    assert "compose" in r.stdout
+
+
+def test_topo_model_help():
+    r = _run(["topo-model", "--help"])
+    assert r.returncode == 0, r.stderr
+    assert "train" in r.stdout
+    assert "study" in r.stdout
+
+
+def test_topo_model_score_help():
+    r = _run(["topo-model", "score", "--help"])
+    assert r.returncode == 0, r.stderr
+
+
+def test_mutopia_sbs_help():
+    r = _run(["mutopia-sbs", "--help"])
+    assert r.returncode == 0, r.stderr
+
+
+def test_gtensor_info(train_nc_path):
+    r = _run(["gtensor", "info", str(train_nc_path)])
+    assert r.returncode == 0, r.stderr
+
+
+def test_gtensor_feature_ls(train_nc_path):
+    r = _run(["gtensor", "feature", "ls", str(train_nc_path)])
+    assert r.returncode == 0, r.stderr
+    assert "H3K27ac" in r.stdout
diff --git a/tests/test_gtensor_ops.py b/tests/test_gtensor_ops.py
new file mode 100644
index 0000000..8eefb91
--- /dev/null
+++ b/tests/test_gtensor_ops.py
@@ -0,0 +1,112 @@
+"""
+Tests for gtensor data operations: slicing, splitting, feature fetching.
+These exercise the API tutorials 1 and 2 rely on.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+
+def test_lazy_load_no_samples(train_nc_path):
+    import mutopia.analysis as mu
+
+    ds = mu.gt.lazy_load(str(train_nc_path))
+    assert "Regions" in ds.sections.names
+    assert "Features" in ds.sections.names
+
+
+def test_eager_load_has_samples(train_dataset):
+    samples = list(train_dataset.list_samples())
+    assert len(samples) == 10
+
+
+def test_slice_regions_by_chrom(train_dataset):
+    import mutopia.analysis as mu
+
+    sliced = mu.gt.slice_regions(train_dataset, "chr22")
+    chroms = set(sliced.sections["Regions"].chrom.values.tolist())
+    assert chroms == {"chr22"}
+
+
+def test_slice_regions_by_interval(train_dataset):
+    import mutopia.analysis as mu
+
+    sliced = mu.gt.slice_regions(train_dataset, "chr22:0-25000000")
+    starts = sliced.sections["Regions"].start.values
+    ends = sliced.sections["Regions"].end.values
+    assert (starts >= 0).all()
+    assert (ends <= 25_000_000).all()
+    assert len(starts) > 0
+
+
+def test_slice_samples_subset(train_dataset):
+    import mutopia.analysis as mu
+
+    samples = list(train_dataset.list_samples())[:3]
+    sliced = mu.gt.slice_samples(train_dataset, samples)
+    assert list(sliced.list_samples()) == samples
+
+
+def test_slice_samples_unknown_raises(train_dataset):
+    import mutopia.analysis as mu
+
+    with pytest.raises((KeyError, Exception)):
+        mu.gt.slice_samples(train_dataset, ["__not_a_sample__"])
+
+
+def test_fetch_features_glob(train_dataset):
+    import mutopia.analysis as mu
+
+    feats = mu.gt.fetch_features(train_dataset, "H3K*")
+    # H3K27ac, H3K27me3, H3K36me3, H3K4me1, H3K4me3 in our fixture
+    assert "feature" in feats.dims
+    assert feats.sizes["feature"] >= 1
+
+
+def test_train_test_split_separates_chroms(train_dataset):
+    """train_test_split should partition loci by chromosome with no overlap."""
+    from mutopia.gtensor import train_test_split
+
+    # train fixture is chr22-only; split into chr22 (held out) vs nothing
+    # Use a region split instead by holding out chr22:0-25M from a combined region
+    # but our fixture is chr22 only. Make a synthetic split via region slicing first.
+    import mutopia.analysis as mu
+
+    left = mu.gt.slice_regions(train_dataset, "chr22:0-25000000")
+    right = mu.gt.slice_regions(train_dataset, "chr22:25000001-100000000")
+    assert left.sizes["locus"] > 0
+    assert right.sizes["locus"] > 0
+    assert left.sizes["locus"] + right.sizes["locus"] <= train_dataset.sizes["locus"]
+
+
+def test_write_then_load_roundtrip(train_dataset, tmp_path):
+    import mutopia.analysis as mu
+
+    out = tmp_path / "roundtrip.nc"
+    mu.gt.write_dataset(train_dataset, str(out), write_samples=True)
+    reloaded = mu.gt.eager_load(str(out))
+    assert dict(reloaded.sizes) == dict(train_dataset.sizes)
+    assert list(reloaded.list_samples()) == list(train_dataset.list_samples())
+
+
+def test_write_without_samples(train_dataset, tmp_path):
+    """write_samples=False should still produce a loadable gtensor with intact regions/features."""
+    import mutopia.analysis as mu
+
+    out = tmp_path / "no_samples.nc"
+    mu.gt.write_dataset(train_dataset, str(out), write_samples=False)
+    reloaded = mu.gt.lazy_load(str(out))
+    assert reloaded.sizes["locus"] == train_dataset.sizes["locus"]
+    assert "Regions" in reloaded.sections.names
+    assert "Features" in reloaded.sections.names
+
+
+def test_lazy_load_after_eager_write(train_dataset, tmp_path):
+    """A gtensor written with samples must be discoverable via lazy_load."""
+    import mutopia.analysis as mu
+
+    out = tmp_path / "rt.nc"
+    mu.gt.write_dataset(train_dataset, str(out), write_samples=True)
+    lazy = mu.gt.lazy_load(str(out))
+    assert len(list(lazy.list_samples())) == len(list(train_dataset.list_samples()))
diff --git a/tests/test_inference.py b/tests/test_inference.py
new file mode 100644
index 0000000..932fc9a
--- /dev/null
+++ b/tests/test_inference.py
@@ -0,0 +1,124 @@
+"""
+Inference-path regression tests.
+
+These pin the behavior of a frozen model + frozen fixture: outputs that change
+in shape, dtype, sign, or magnitude flag a regression. Numerical tolerances are
+loose so legitimate BLAS/threading drift doesn't flake the suite.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+
+EXPECTED_ANNOT_VARS = {
+    "contributions",
+    "Spectra/spectra",
+    "Spectra/interactions",
+    "Spectra/shared_effects",
+    "component_distributions",
+    "component_distributions_locus",
+    "predicted_marginal",
+    "predicted_marginal_locus",
+    "empirical_marginal",
+    "empirical_marginal_locus",
+}
+
+
+def test_model_loads(trained_model):
+    assert type(trained_model).__name__ == "SBSModel"
+    assert trained_model.num_components == 15
+
+
+def test_train_fixture_shape(train_dataset):
+    sizes = dict(train_dataset.sizes)
+    assert sizes["sample"] == 10
+    assert sizes["context"] == 96
+    assert sizes["configuration"] == 2
+    assert sizes["locus"] > 0
+
+
+def test_test_fixture_shape(test_dataset):
+    sizes = dict(test_dataset.sizes)
+    assert sizes["sample"] == 10
+    assert sizes["locus"] > 0
+
+
+def test_annot_data_produces_expected_vars(trained_model, train_dataset):
+    annotated = trained_model.annot_data(train_dataset, threads=1, calc_shap=False)
+    missing = EXPECTED_ANNOT_VARS - set(annotated.data_vars)
+    assert not missing, f"annot_data missing vars: {missing}"
+
+
+def test_contributions_are_finite(trained_model, train_dataset):
+    annotated = trained_model.annot_data(train_dataset, threads=1, calc_shap=False)
+    contrib = annotated["contributions"].values
+    assert np.all(np.isfinite(contrib)), "non-finite values in contributions"
+    assert (contrib >= 0).all(), "contributions should be non-negative"
+
+
+def test_contributions_shape_matches_components(trained_model, train_dataset):
+    annotated = trained_model.annot_data(train_dataset, threads=1, calc_shap=False)
+    contrib = annotated["contributions"]
+    assert "sample" in contrib.dims
+    assert "component" in contrib.dims
+    assert contrib.sizes["component"] == trained_model.num_components
+
+
+def test_predicted_marginal_finite(trained_model, train_dataset):
+    annotated = trained_model.annot_data(train_dataset, threads=1, calc_shap=False)
+    pred = annotated["predicted_marginal"].values
+    assert np.all(np.isfinite(pred))
+
+
+def test_component_distributions_finite_and_nonneg(trained_model, train_dataset):
+    annotated = trained_model.annot_data(train_dataset, threads=1, calc_shap=False)
+    cd = annotated["component_distributions"].values
+    finite = cd[np.isfinite(cd)]
+    assert finite.size > 0
+    assert (finite >= 0).all()
+
+
+def test_model_save_load_roundtrip(trained_model, tmp_path):
+    """Saving then loading should produce a model that pickles to the same state."""
+    import mutopia.analysis as mu
+
+    out = tmp_path / "roundtrip.pkl"
+    trained_model.save(str(out))
+    reloaded = mu.load_model(str(out))
+    assert reloaded.num_components == trained_model.num_components
+    assert type(reloaded).__name__ == type(trained_model).__name__
+
+
+def test_save_load_predictions_match(trained_model, train_dataset, tmp_path):
+    """A reloaded model must produce numerically equivalent predictions."""
+    import mutopia.analysis as mu
+
+    out = tmp_path / "rt.pkl"
+    trained_model.save(str(out))
+    reloaded = mu.load_model(str(out))
+
+    a = trained_model.annot_data(train_dataset, threads=1, calc_shap=False)
+    b = reloaded.annot_data(train_dataset, threads=1, calc_shap=False)
+    np.testing.assert_allclose(
+        a["contributions"].values, b["contributions"].values, rtol=1e-5, atol=1e-7
+    )
+    np.testing.assert_allclose(
+        a["predicted_marginal"].values,
+        b["predicted_marginal"].values,
+        rtol=1e-5,
+        atol=1e-7,
+    )
+
+
+def test_callback_dropped_from_pickle(trained_model, tmp_path):
+    """__getstate__ must drop the training callback so closures over training data
+    don't get pickled (regression for c73656e)."""
+    import mutopia.analysis as mu
+
+    trained_model.callback = lambda *a, **kw: None  # simulate a trained-with-callback model
+    out = tmp_path / "no_callback.pkl"
+    trained_model.save(str(out))
+    reloaded = mu.load_model(str(out))
+    assert reloaded.callback is None
diff --git a/tests/test_plotting.py b/tests/test_plotting.py
new file mode 100644
index 0000000..ec9d4ee
--- /dev/null
+++ b/tests/test_plotting.py
@@ -0,0 +1,48 @@
+"""
+Plotting smoke tests.
+
+We don't pixel-snapshot. We just check that plot calls return without error
+and produce a Figure with sensible structure. Visual review stays manual.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+matplotlib = pytest.importorskip("matplotlib")
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt  # noqa: E402
+
+
+@pytest.fixture
+def annotated_dataset(trained_model, train_dataset):
+    """Annotated dataset usable as input to plotting helpers."""
+    return trained_model.annot_data(train_dataset, threads=1, calc_shap=False)
+
+
+def teardown_function(_):
+    plt.close("all")
+
+
+def test_plot_signature_panel(annotated_dataset):
+    import mutopia.analysis as mu
+
+    fig = mu.pl.plot_signature_panel(annotated_dataset)
+    assert fig is not None or plt.gcf() is not None
+
+
+def test_plot_component(annotated_dataset):
+    import mutopia.analysis as mu
+
+    components = mu.gt.list_components(annotated_dataset)
+    assert components, "no components on annotated dataset"
+    mu.pl.plot_component(annotated_dataset, components[0])
+    assert plt.gcf().axes, "expected at least one axes"
+
+
+def test_plot_shap_summary_skips_without_shap(annotated_dataset):
+    """plot_shap_summary requires SHAP values; verify it errors clearly when absent."""
+    import mutopia.analysis as mu
+
+    with pytest.raises(Exception):
+        mu.pl.plot_shap_summary(annotated_dataset)

From f0551c9c59572cd40a1500c6f3e7e8830151772c Mon Sep 17 00:00:00 2001
From: AllenWLynch <allen.lynch1@outlook.com>
Date: Tue, 28 Apr 2026 09:44:59 -0400
Subject: [PATCH 07/10] use setuptools_scm to derive version from git tags

mutopia.__version__ is now read from package metadata rather than
hard-coded in __init__.py. Tagging vX.Y.Z is the only step needed
to release; setuptools_scm derives the wheel/package version from
the tag at build time. Between tags, dev installs report e.g.
1.0.8.dev3+gabc1234 automatically.

mutopia/_version.py is generated at build, gitignored.
---
 .gitignore          | 1 +
 mutopia/__init__.py | 7 ++++++-
 pyproject.toml      | 7 +++++--
 setup.cfg           | 1 -
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index f34cfdb..1b2aa61 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 __pycache__/
+mutopia/_version.py
 data/
 *.db
 *.ipynb
diff --git a/mutopia/__init__.py b/mutopia/__init__.py
index 84ce141..a9b0eea 100644
--- a/mutopia/__init__.py
+++ b/mutopia/__init__.py
@@ -1 +1,6 @@
-__version__ = "1.0.7"
\ No newline at end of file
+from importlib.metadata import version, PackageNotFoundError
+
+try:
+    __version__ = version("mutopia")
+except PackageNotFoundError:
+    __version__ = "0.0.0+unknown"
diff --git a/pyproject.toml b/pyproject.toml
index 7fd26b9..eeea733 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,6 @@
 [build-system]
-requires = ["setuptools"]
-build-backend = "setuptools.build_meta"
\ No newline at end of file
+requires = ["setuptools>=64", "setuptools_scm>=8"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools_scm]
+version_file = "mutopia/_version.py"
diff --git a/setup.cfg b/setup.cfg
index b94c357..9c086cf 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,5 @@
 [metadata]
 name = mutopia
-version = attr: mutopia.__version__
 author = Allen Lynch
 author_email = allenlynch@g.harvard.edu
 license = BSD-3-Clause

From 36b28b653511803b016c393093ec181aae08dfa7 Mon Sep 17 00:00:00 2001
From: AllenWLynch <allen.lynch1@outlook.com>
Date: Tue, 28 Apr 2026 09:45:35 -0400
Subject: [PATCH 08/10] fetch full git history in publish.yml so setuptools_scm
 sees tags

---
 .github/workflows/publish.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 94b6b9e..d7890e8 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -37,6 +37,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # setuptools_scm needs full history + tags
 
       - uses: actions/setup-python@v5
         with:

From 623677fe27e328c88eb2b423fc0a249c5f778650 Mon Sep 17 00:00:00 2001
From: AllenWLynch <allen.lynch1@outlook.com>
Date: Tue, 28 Apr 2026 10:53:33 -0400
Subject: [PATCH 09/10] add training-path tests gated behind --runslow

Five new tests in tests/test_training.py exercise full-batch and SVI
training, save/reload after fit, and init_components with COSMIC names.
They are skipped by default; run with:

    pytest tests/test_training.py --runslow

Conftest grows a --runslow option and a 'slow' marker. CI keeps the
fast tier (no training) so PR checks stay under 30s.

Note: test_batch_training_test_scores_nondecreasing currently fails on
the chr22-train / chr1-test fixture (test score degrades monotonically
across epochs). Likely distributional mismatch between fixture chroms
or overfitting at k=2; worth investigating before relying on this as
a regression gate.
---
 tests/conftest.py      |  25 +++++++++
 tests/test_training.py | 118 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+)
 create mode 100644 tests/test_training.py

diff --git a/tests/conftest.py b/tests/conftest.py
index eeba936..220246f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -14,6 +14,31 @@
 
 import pytest
 
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--runslow",
+        action="store_true",
+        default=False,
+        help="Run tests marked @pytest.mark.slow (training, convergence).",
+    )
+
+
+def pytest_configure(config):
+    config.addinivalue_line(
+        "markers", "slow: marks tests that take >30s (deselect with default pytest run)"
+    )
+
+
+def pytest_collection_modifyitems(config, items):
+    if config.getoption("--runslow"):
+        return
+    skip_slow = pytest.mark.skip(reason="needs --runslow option to run")
+    for item in items:
+        if "slow" in item.keywords:
+            item.add_marker(skip_slow)
+
+
 FIXTURE_DIR = Path(__file__).parent / "fixtures"
 
 # Update both the tag and the asset list when fixtures are rebuilt.
diff --git a/tests/test_training.py b/tests/test_training.py
new file mode 100644
index 0000000..15542f1
--- /dev/null
+++ b/tests/test_training.py
@@ -0,0 +1,118 @@
+"""
+Training-path tests.
+
+These are slow (each fits a small model end-to-end) and gated behind
+--runslow so they don't bloat the default suite or CI. To run:
+
+    pytest tests/test_training.py --runslow
+
+Use these to sanity-check optimization changes before tagging a release.
+"""
+
+from __future__ import annotations
+
+import math
+
+import numpy as np
+import pytest
+
+pytestmark = pytest.mark.slow
+
+
+def _make_model(train_dataset, **overrides):
+    import mutopia.analysis as mu
+
+    ModelCls = mu.make_model_cls(train_dataset)
+    kwargs = dict(
+        num_components=2,
+        seed=0,
+        threads=1,
+        eval_every=1,
+    )
+    kwargs.update(overrides)
+    return ModelCls(**kwargs)
+
+
+def test_batch_training_completes_without_nan(train_dataset, test_dataset):
+    """Full-batch training (no subsampling) should run cleanly on the fixture."""
+    model = _make_model(
+        train_dataset,
+        locus_subsample=None,
+        batch_subsample=None,
+        num_epochs=10,
+    )
+    model.fit(train_dataset, test_dataset)
+
+    assert model.test_scores_, "test_scores_ should be populated"
+    assert all(math.isfinite(s) for s in model.test_scores_), (
+        f"non-finite test scores: {model.test_scores_}"
+    )
+
+
+def test_batch_training_test_scores_nondecreasing(train_dataset, test_dataset):
+    """In full-batch mode, the test score should be non-decreasing modulo
+    small numerical noise. A real decrease flags an optimizer regression."""
+    model = _make_model(
+        train_dataset,
+        locus_subsample=None,
+        batch_subsample=None,
+        num_epochs=10,
+    )
+    model.fit(train_dataset, test_dataset)
+
+    scores = np.asarray(model.test_scores_)
+    diffs = np.diff(scores)
+    # Allow tiny float noise; any meaningful decrease is a regression.
+    assert (diffs >= -1e-3).all(), (
+        f"test scores decreased between epochs: {scores.tolist()}"
+    )
+
+
+def test_svi_training_completes_without_nan(train_dataset, test_dataset):
+    """SVI training (with subsampling) should run cleanly and produce finite scores."""
+    model = _make_model(
+        train_dataset,
+        locus_subsample=0.5,
+        num_epochs=10,
+    )
+    model.fit(train_dataset, test_dataset)
+
+    assert model.test_scores_
+    assert all(math.isfinite(s) for s in model.test_scores_), (
+        f"non-finite test scores: {model.test_scores_}"
+    )
+
+
+def test_trained_model_save_load_roundtrip(train_dataset, test_dataset, tmp_path):
+    """A freshly trained model must save and reload to bit-equivalent predictions."""
+    import mutopia.analysis as mu
+
+    model = _make_model(
+        train_dataset,
+        locus_subsample=0.5,
+        num_epochs=5,
+    )
+    model.fit(train_dataset, test_dataset)
+
+    out = tmp_path / "fresh.pkl"
+    model.save(str(out))
+    reloaded = mu.load_model(str(out))
+
+    a = model.annot_data(train_dataset, threads=1, calc_shap=False)
+    b = reloaded.annot_data(train_dataset, threads=1, calc_shap=False)
+    np.testing.assert_allclose(
+        a["contributions"].values, b["contributions"].values, rtol=1e-5, atol=1e-7
+    )
+
+
+def test_init_components_with_cosmic_names(train_dataset, test_dataset):
+    """The tutorial-recommended init_components path should fit without crashing."""
+    model = _make_model(
+        train_dataset,
+        num_components=2,
+        init_components=["SBS1", "SBS3"],
+        locus_subsample=0.5,
+        num_epochs=3,
+    )
+    model.fit(train_dataset, test_dataset)
+    assert model.test_scores_

From ec3c93932785c061efa598de37cd440972467776 Mon Sep 17 00:00:00 2001
From: AllenWLynch <allen.lynch1@outlook.com>
Date: Tue, 28 Apr 2026 11:02:09 -0400
Subject: [PATCH 10/10] rewrite slow training tests against full-scale
 Liver-HCC gtensor from Zenodo

Previous training tests used the chr22 fixture, which is too small and
distributionally dissimilar from the chr1 test set to give meaningful
convergence signal. Slow-tier tests now download Liver-HCC.nc (~540 MB)
from Zenodo record 18803136, cache to tests/fixtures/zenodo/, and split
on chr1 for held-out evaluation.

Run with:
    pytest tests/test_training.py --runslow

Default fast tier and CI are unchanged.
---
 tests/conftest.py         | 43 ++++++++++++++++++++++
 tests/fixtures/.gitignore |  1 +
 tests/test_training.py    | 76 ++++++++++++++++++++++-----------------
 3 files changed, 88 insertions(+), 32 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 220246f..c796554 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -113,3 +113,46 @@ def test_dataset(test_nc_path: Path):
     import mutopia.analysis as mu
 
     return mu.gt.eager_load(str(test_nc_path))
+
+
+# ---------------------------------------------------------------------------
+# Full-scale (Zenodo) fixtures — only requested by slow-tier tests.
+# ---------------------------------------------------------------------------
+
+ZENODO_RECORD = "18803136"
+ZENODO_BASE_URL = f"https://zenodo.org/records/{ZENODO_RECORD}/files"
+ZENODO_CACHE_DIR = FIXTURE_DIR / "zenodo"
+ZENODO_TUMOR_TYPE = "Liver-HCC"
+
+
+def _ensure_zenodo_asset(name: str) -> Path:
+    path = ZENODO_CACHE_DIR / name
+    if path.exists():
+        return path
+
+    ZENODO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    url = f"{ZENODO_BASE_URL}/{name}"
+    print(f"[fixtures] downloading {name} from Zenodo (large; first run only)")
+    try:
+        urllib.request.urlretrieve(url, path)
+    except Exception as e:
+        if path.exists():
+            path.unlink()
+        pytest.skip(
+            f"Could not download Zenodo asset {name} from {url} ({e}). "
+            f"Slow-tier tests require network access to zenodo.org."
+        )
+    return path
+
+
+@pytest.fixture(scope="session")
+def zenodo_liver_path() -> Path:
+    return _ensure_zenodo_asset(f"{ZENODO_TUMOR_TYPE}.nc")
+
+
+@pytest.fixture(scope="session")
+def zenodo_liver_split(zenodo_liver_path: Path):
+    """Train/test split of the full Liver-HCC gtensor by holding out chr1."""
+    from mutopia.gtensor import lazy_train_test_load
+
+    return lazy_train_test_load(str(zenodo_liver_path), "chr1")
diff --git a/tests/fixtures/.gitignore b/tests/fixtures/.gitignore
index 66f6e33..2a2f0e4 100644
--- a/tests/fixtures/.gitignore
+++ b/tests/fixtures/.gitignore
@@ -1,2 +1,3 @@
 *.nc
 *.pkl
+zenodo/
diff --git a/tests/test_training.py b/tests/test_training.py
index 15542f1..69ca664 100644
--- a/tests/test_training.py
+++ b/tests/test_training.py
@@ -1,12 +1,14 @@
 """
-Training-path tests.
+Training-path tests against the full-scale Liver-HCC gtensor from Zenodo.
 
-These are slow (each fits a small model end-to-end) and gated behind
+These are slow (each fits a real model end-to-end) and gated behind
 --runslow so they don't bloat the default suite or CI. To run:
 
     pytest tests/test_training.py --runslow
 
-Use these to sanity-check optimization changes before tagging a release.
+The Zenodo asset (~hundreds of MB) is downloaded once into
+tests/fixtures/zenodo/ and cached. Use these to sanity-check optimization
+changes before tagging a release.
 """
 
 from __future__ import annotations
@@ -24,7 +26,7 @@ def _make_model(train_dataset, **overrides):
 
     ModelCls = mu.make_model_cls(train_dataset)
     kwargs = dict(
-        num_components=2,
+        num_components=5,
         seed=0,
         threads=1,
         eval_every=1,
@@ -33,15 +35,17 @@ def _make_model(train_dataset, **overrides):
     return ModelCls(**kwargs)
 
 
-def test_batch_training_completes_without_nan(train_dataset, test_dataset):
-    """Full-batch training (no subsampling) should run cleanly on the fixture."""
+def test_batch_training_completes_without_nan(zenodo_liver_split):
+    """Full-batch training (no subsampling) on the production-scale gtensor."""
+    train, test = zenodo_liver_split
+
     model = _make_model(
-        train_dataset,
+        train,
         locus_subsample=None,
         batch_subsample=None,
-        num_epochs=10,
+        num_epochs=20,
     )
-    model.fit(train_dataset, test_dataset)
+    model.fit(train, test)
 
     assert model.test_scores_, "test_scores_ should be populated"
     assert all(math.isfinite(s) for s in model.test_scores_), (
@@ -49,16 +53,18 @@ def test_batch_training_completes_without_nan(train_dataset, test_dataset):
     )
 
 
-def test_batch_training_test_scores_nondecreasing(train_dataset, test_dataset):
-    """In full-batch mode, the test score should be non-decreasing modulo
-    small numerical noise. A real decrease flags an optimizer regression."""
+def test_batch_training_test_scores_nondecreasing(zenodo_liver_split):
+    """In full-batch mode on real data, the test score should be non-decreasing
+    modulo small numerical noise. A real decrease flags an optimizer regression."""
+    train, test = zenodo_liver_split
+
     model = _make_model(
-        train_dataset,
+        train,
         locus_subsample=None,
         batch_subsample=None,
-        num_epochs=10,
+        num_epochs=20,
     )
-    model.fit(train_dataset, test_dataset)
+    model.fit(train, test)
 
     scores = np.asarray(model.test_scores_)
     diffs = np.diff(scores)
@@ -68,14 +74,16 @@ def test_batch_training_test_scores_nondecreasing(train_dataset, test_dataset):
     )
 
 
-def test_svi_training_completes_without_nan(train_dataset, test_dataset):
-    """SVI training (with subsampling) should run cleanly and produce finite scores."""
+def test_svi_training_completes_without_nan(zenodo_liver_split):
+    """SVI training (with subsampling) at production scale."""
+    train, test = zenodo_liver_split
+
     model = _make_model(
-        train_dataset,
-        locus_subsample=0.5,
-        num_epochs=10,
+        train,
+        locus_subsample=1 / 8,
+        num_epochs=20,
     )
-    model.fit(train_dataset, test_dataset)
+    model.fit(train, test)
 
     assert model.test_scores_
     assert all(math.isfinite(s) for s in model.test_scores_), (
@@ -83,36 +91,40 @@ def test_svi_training_completes_without_nan(train_dataset, test_dataset):
     )
 
 
-def test_trained_model_save_load_roundtrip(train_dataset, test_dataset, tmp_path):
+def test_trained_model_save_load_roundtrip(zenodo_liver_split, tmp_path):
     """A freshly trained model must save and reload to bit-equivalent predictions."""
     import mutopia.analysis as mu
 
+    train, test = zenodo_liver_split
+
     model = _make_model(
-        train_dataset,
-        locus_subsample=0.5,
+        train,
+        locus_subsample=1 / 8,
         num_epochs=5,
     )
-    model.fit(train_dataset, test_dataset)
+    model.fit(train, test)
 
     out = tmp_path / "fresh.pkl"
     model.save(str(out))
     reloaded = mu.load_model(str(out))
 
-    a = model.annot_data(train_dataset, threads=1, calc_shap=False)
-    b = reloaded.annot_data(train_dataset, threads=1, calc_shap=False)
+    a = model.annot_data(train, threads=1, calc_shap=False)
+    b = reloaded.annot_data(train, threads=1, calc_shap=False)
     np.testing.assert_allclose(
         a["contributions"].values, b["contributions"].values, rtol=1e-5, atol=1e-7
     )
 
 
-def test_init_components_with_cosmic_names(train_dataset, test_dataset):
+def test_init_components_with_cosmic_names(zenodo_liver_split):
     """The tutorial-recommended init_components path should fit without crashing."""
+    train, test = zenodo_liver_split
+
     model = _make_model(
-        train_dataset,
+        train,
         num_components=2,
         init_components=["SBS1", "SBS3"],
-        locus_subsample=0.5,
-        num_epochs=3,
+        locus_subsample=1 / 8,
+        num_epochs=5,
     )
-    model.fit(train_dataset, test_dataset)
+    model.fit(train, test)
     assert model.test_scores_