diff --git a/applications/dynacell/src/dynacell/evaluation/cross_condition_probe.py b/applications/dynacell/src/dynacell/evaluation/cross_condition_probe.py index 2ea5d4e3d..070c97905 100644 --- a/applications/dynacell/src/dynacell/evaluation/cross_condition_probe.py +++ b/applications/dynacell/src/dynacell/evaluation/cross_condition_probe.py @@ -65,14 +65,27 @@ def _detect_condition(eval_dir: Path) -> str: raise ValueError(f"cannot infer condition from eval_dir name {name!r}: expected trailing _{{mock,denv,zikv}}") -def _load_embeddings(eval_dir: Path, source: str, feature: str) -> tuple[np.ndarray, np.ndarray]: +def _load_embeddings( + eval_dir: Path, + source: str, + feature: str, + cache: dict[Path, tuple[np.ndarray, np.ndarray]] | None = None, +) -> tuple[np.ndarray, np.ndarray]: """Return ``(embeddings, fov_ids)`` from one ``*_single_cell_embeddings.npz``. - ``np.load`` raises ``FileNotFoundError`` when the NPZ is missing. + ``np.load`` raises ``FileNotFoundError`` when the NPZ is missing. When + *cache* is given, the result is memoized on the resolved NPZ path so the + shared reference (``mock``) side is read from disk once per group instead + of once per pair (see :func:`run` / :func:`run_for_group`). """ npz_path = eval_dir / "embeddings" / f"{source}_{feature}_single_cell_embeddings.npz" + if cache is not None and npz_path in cache: + return cache[npz_path] with np.load(npz_path) as data: - return np.asarray(data["embeddings"]), np.asarray(data["fov"]) + result = (np.asarray(data["embeddings"]), np.asarray(data["fov"])) + if cache is not None: + cache[npz_path] = result + return result def _probe_pair( @@ -82,6 +95,7 @@ def _probe_pair( source: str, n_splits: int, rng_seed: int, + cache: dict[Path, tuple[np.ndarray, np.ndarray]] | None = None, ) -> dict: """Run one ``fov_stratified_auroc`` call for the given (pair, feature, source). @@ -106,8 +120,8 @@ def _probe_pair( row["skipped_reason"] = "missing eval dir for one side of pair" return row try: - x0, fov0 = _load_embeddings(eval_dirs_by_condition[c0], source, feature) - x1, fov1 = _load_embeddings(eval_dirs_by_condition[c1], source, feature) + x0, fov0 = _load_embeddings(eval_dirs_by_condition[c0], source, feature, cache) + x1, fov1 = _load_embeddings(eval_dirs_by_condition[c1], source, feature, cache) except FileNotFoundError as e: row["skipped_reason"] = f"missing embeddings file: {e}" return row @@ -182,7 +196,9 @@ def run_for_group( eval_dirs : list[Path] Per-condition eval dirs of one (model, pool, organelle) group. The condition is inferred from each dir's trailing ``_{mock,denv,zikv}``; - dirs without a recognized token are ignored. + dirs without a recognized token are ignored. Two dirs mapping to the + same condition raise ``ValueError`` (an ambiguous group) rather than + silently picking one. n_splits, rng_seed : int Forwarded to :func:`fov_stratified_auroc`. """ @@ -192,16 +208,21 @@ def run_for_group( cond = _detect_condition(d) except ValueError: continue + if cond in by_condition: + raise ValueError(f"duplicate condition {cond!r}: {by_condition[cond]} and {d}") by_condition[cond] = d if "mock" not in by_condition: return [] + # Shared across pairs so the mock reference embeddings are read once, not + # re-read for every infected condition. Local to this call -> released on return. + cache: dict[Path, tuple[np.ndarray, np.ndarray]] = {} written: list[Path] = [] for ref, cond in _DEFAULT_PAIRS: # ref == "mock" for every default pair if cond not in by_condition: continue rows = [ - _probe_pair(by_condition, (ref, cond), feature, source, n_splits, rng_seed) + _probe_pair(by_condition, (ref, cond), feature, source, n_splits, rng_seed, cache) for feature in _FEATURE_TYPES for source in _SOURCES ] @@ -239,11 +260,14 @@ def run( raise ValueError(f"duplicate condition {cond!r}: {eval_dirs_by_condition[cond]} and {d}") eval_dirs_by_condition[cond] = d + # Shared across pairs so a condition's embeddings (e.g. the mock reference + # reused by every pair) are read once. Local to this call -> released on return. + cache: dict[Path, tuple[np.ndarray, np.ndarray]] = {} rows = [] for feature in _FEATURE_TYPES: for pair in pairs: for source in _SOURCES: - rows.append(_probe_pair(eval_dirs_by_condition, pair, feature, source, n_splits, rng_seed)) + rows.append(_probe_pair(eval_dirs_by_condition, pair, feature, source, n_splits, rng_seed, cache)) _write_rows(out_path, rows) return out_path diff --git a/applications/dynacell/src/dynacell/evaluation/instance_metrics.py b/applications/dynacell/src/dynacell/evaluation/instance_metrics.py index 78d817a56..a202bc3b7 100644 --- a/applications/dynacell/src/dynacell/evaluation/instance_metrics.py +++ b/applications/dynacell/src/dynacell/evaluation/instance_metrics.py @@ -8,7 +8,6 @@ """ import numpy as np -from cubic.metrics import average_precision DEFAULT_IOU_THRESHOLDS = (0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95) """IoU thresholds for the AP sweep (Cellpose / StarDist standard 0.50..0.95).""" @@ -70,6 +69,12 @@ def instance_average_precision( ap_vals = [0.0] * len(thresholds) tp, fp, fn = 0.0, float(n_pred), float(n_gt) else: + # Imported lazily (not at module top) so importing this module for + # DEFAULT_IOU_THRESHOLDS / _relabel_sequential — e.g. pipeline_cache pulling + # the threshold constant — does not require the GPU-only cubic stack. The + # actual AP computation still hard-requires cubic and fails loudly here. + from cubic.metrics import average_precision + ap, tp_arr, fp_arr, fn_arr = average_precision(gt, pred, thresholds) ap_vals = [float(a) for a in np.atleast_1d(ap)] idx = thresholds.index(_PRIMARY_THRESHOLD) if _PRIMARY_THRESHOLD in thresholds else 0