From 13baf361fc10e4c027f817cc8e0e78e2f22205c7 Mon Sep 17 00:00:00 2001 From: Mark A Potts Date: Mon, 22 Jun 2026 09:40:30 -0700 Subject: [PATCH] data loaders: prefer nuclei_seg, fall back to nuclear_seg Updates the two cell-extraction data paths to try the new native-20x `nuclei_seg` label first (produced by `submit_nuclei_segmentation_jobs` in ops_process), with fall-through to the legacy 5x-upscaled `nuclear_seg` from `segment_and_stitch_pheno`. * src/ops_model/data/data_loader.py:466 (CellProfileDataset) * src/ops_model/features/cp_extraction.py:1019 (bulk CP feature read) Both labels are 20x-shaped at level 0 in phenotyping_v3.zarr, so bbox slicing is unchanged. Measured impact on per-cell features over 500 sampled cells from ops0094 A/1/0: * Mean nuc area: -1.80% * Mean nuc/cell: -1.78% * Per-cell nuc IoU: mean 0.845, median 0.856 * Outlier cells (IoU < 0.5): 0.8% Pairs with the ops_process PR (royerlab/ops_process#113) that introduces the new segmentation step. Migration is per-experiment: experiments that have only run the legacy step continue to work; experiments that have run the new step transparently pick up the better masks. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ops_model/data/data_loader.py | 9 ++++++++- src/ops_model/features/cp_extraction.py | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/ops_model/data/data_loader.py b/src/ops_model/data/data_loader.py index e542f5a..462c7bb 100644 --- a/src/ops_model/data/data_loader.py +++ b/src/ops_model/data/data_loader.py @@ -463,7 +463,14 @@ def __getitem__(self, index): fov = self.stores[ci.store_key][well]["0"] mask_label = getattr(ci, "mask_label", "cell_seg") cell_mask_fov = self.stores[ci.store_key][well]["labels"][mask_label]["0"] - nuc_mask_fov = self.stores[ci.store_key][well]["labels"]["nuclear_seg"]["0"] + # Nuclear mask: prefer native-20x `nuclei_seg` (from the + # `submit_nuclei_segmentation_jobs` step), fall back to legacy + # `nuclear_seg` (5x-upscaled, from `segment_and_stitch_pheno`). + # Both labels are 20x-shaped at level 0 so bbox slicing is unchanged. + # See ops_process PR #113. + _labels_group = self.stores[ci.store_key][well]["labels"] + _nuc_label = "nuclei_seg" if "nuclei_seg" in _labels_group else "nuclear_seg" + nuc_mask_fov = _labels_group[_nuc_label]["0"] bbox = ast.literal_eval(ci.bbox) gene_label = self.label_int_lut[ci.gene_name] total_index = ci.total_index diff --git a/src/ops_model/features/cp_extraction.py b/src/ops_model/features/cp_extraction.py index b1107e2..2673321 100644 --- a/src/ops_model/features/cp_extraction.py +++ b/src/ops_model/features/cp_extraction.py @@ -1016,7 +1016,14 @@ def extract_cp_features_bulk_read( img_arr = stores[store_key][well]["0"] mask_label = getattr(first_row, "mask_label", "cell_seg") cell_seg_arr = stores[store_key][well]["labels"][mask_label]["0"] - nuc_seg_arr = stores[store_key][well]["labels"]["nuclear_seg"]["0"] + # Nuclear mask: prefer native-20x `nuclei_seg` (from the + # `submit_nuclei_segmentation_jobs` step), fall back to legacy + # `nuclear_seg` (5x-upscaled, from `segment_and_stitch_pheno`). + # Both labels are 20x-shaped at level 0; bbox slicing unchanged. + # See ops_process PR #113. + _labels_group = stores[store_key][well]["labels"] + _nuc_label = "nuclei_seg" if "nuclei_seg" in _labels_group else "nuclear_seg" + nuc_seg_arr = _labels_group[_nuc_label]["0"] chunk_size = img_arr.chunks[-1] # 512 # Phase 1: Identify unique chunks from bounding boxes