From 22443abd2c5c532f312118c1205268e0fbfdbf9c Mon Sep 17 00:00:00 2001 From: Johannes Soltwedel <38459088+jo-mueller@users.noreply.github.com> Date: Fri, 28 Nov 2025 23:03:53 +0100 Subject: [PATCH 01/12] remove spaces and brackets in file name --- .../A9/{A9 p10d.tif => A9_p10d.tif} | Bin .../{A9 p10d_features.csv => A9_p10d_features.csv} | 0 .../A9/{A9 p10d_labels.tif => A9_p10d_labels.tif} | Bin .../BBBC007_v1_images/A9/{A9 p5d.tif => A9_p5d.tif} | Bin .../A9/{A9 p5d_features.csv => A9_p5d_features.csv} | 0 .../A9/{A9 p5d_labels.tif => A9_p5d_labels.tif} | Bin .../BBBC007_v1_images/A9/{A9 p7d.tif => A9_p7d.tif} | Bin .../A9/{A9 p7d_features.csv => A9_p7d_features.csv} | 0 .../A9/{A9 p7d_labels.tif => A9_p7d_labels.tif} | Bin .../BBBC007_v1_images/A9/{A9 p9d.tif => A9_p9d.tif} | Bin .../A9/{A9 p9d_features.csv => A9_p9d_features.csv} | 0 .../A9/{A9 p9d_labels.tif => A9_p9d_labels.tif} | Bin .../{f96 (17) => f96_17}/17P1_POS0006_D_1UL.tif | Bin .../17P1_POS0006_D_1UL_features.csv | 0 .../17P1_POS0006_D_1UL_labels.tif | Bin .../{f96 (17) => f96_17}/17P1_POS0007_D_1UL.tif | Bin .../17P1_POS0007_D_1UL_features.csv | 0 .../17P1_POS0007_D_1UL_labels.tif | Bin .../{f96 (17) => f96_17}/17P1_POS0011_D_1UL.tif | Bin .../17P1_POS0011_D_1UL_features.csv | 0 .../17P1_POS0011_D_1UL_labels.tif | Bin .../{f96 (17) => f96_17}/17P1_POS0013_D_1UL.tif | Bin .../17P1_POS0013_D_1UL_features.csv | 0 .../17P1_POS0013_D_1UL_labels.tif | Bin .../{f96 (17) => f96_17}/17P1_POS0014_D_1UL.tif | Bin .../17P1_POS0014_D_1UL_features.csv | 0 .../17P1_POS0014_D_1UL_labels.tif | Bin 27 files changed, 0 insertions(+), 0 deletions(-) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/{A9 p10d.tif => A9_p10d.tif} (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/{A9 p10d_features.csv => A9_p10d_features.csv} (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/{A9 p10d_labels.tif => A9_p10d_labels.tif} (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/{A9 p5d.tif => A9_p5d.tif} (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/{A9 p5d_features.csv => A9_p5d_features.csv} (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/{A9 p5d_labels.tif => A9_p5d_labels.tif} (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/{A9 p7d.tif => A9_p7d.tif} (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/{A9 p7d_features.csv => A9_p7d_features.csv} (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/{A9 p7d_labels.tif => A9_p7d_labels.tif} (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/{A9 p9d.tif => A9_p9d.tif} (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/{A9 p9d_features.csv => A9_p9d_features.csv} (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/{A9 p9d_labels.tif => A9_p9d_labels.tif} (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/{f96 (17) => f96_17}/17P1_POS0006_D_1UL.tif (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/{f96 (17) => f96_17}/17P1_POS0006_D_1UL_features.csv (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/{f96 (17) => f96_17}/17P1_POS0006_D_1UL_labels.tif (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/{f96 (17) => f96_17}/17P1_POS0007_D_1UL.tif (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/{f96 (17) => f96_17}/17P1_POS0007_D_1UL_features.csv (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/{f96 (17) => f96_17}/17P1_POS0007_D_1UL_labels.tif (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/{f96 (17) => f96_17}/17P1_POS0011_D_1UL.tif (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/{f96 (17) => f96_17}/17P1_POS0011_D_1UL_features.csv (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/{f96 (17) => f96_17}/17P1_POS0011_D_1UL_labels.tif (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/{f96 (17) => f96_17}/17P1_POS0013_D_1UL.tif (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/{f96 (17) => f96_17}/17P1_POS0013_D_1UL_features.csv (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/{f96 (17) => f96_17}/17P1_POS0013_D_1UL_labels.tif (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/{f96 (17) => f96_17}/17P1_POS0014_D_1UL.tif (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/{f96 (17) => f96_17}/17P1_POS0014_D_1UL_features.csv (100%) rename src/napari_clusters_plotter/sample_data/BBBC007_v1_images/{f96 (17) => f96_17}/17P1_POS0014_D_1UL_labels.tif (100%) diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p10d.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p10d.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p10d.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p10d.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p10d_features.csv b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p10d_features.csv similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p10d_features.csv rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p10d_features.csv diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p10d_labels.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p10d_labels.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p10d_labels.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p10d_labels.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p5d.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p5d.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p5d.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p5d.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p5d_features.csv b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p5d_features.csv similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p5d_features.csv rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p5d_features.csv diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p5d_labels.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p5d_labels.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p5d_labels.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p5d_labels.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p7d.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p7d.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p7d.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p7d.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p7d_features.csv b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p7d_features.csv similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p7d_features.csv rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p7d_features.csv diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p7d_labels.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p7d_labels.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p7d_labels.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p7d_labels.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p9d.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p9d.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p9d.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p9d.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p9d_features.csv b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p9d_features.csv similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p9d_features.csv rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p9d_features.csv diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p9d_labels.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p9d_labels.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9 p9d_labels.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/A9/A9_p9d_labels.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0006_D_1UL.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0006_D_1UL.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0006_D_1UL.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0006_D_1UL.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0006_D_1UL_features.csv b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0006_D_1UL_features.csv similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0006_D_1UL_features.csv rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0006_D_1UL_features.csv diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0006_D_1UL_labels.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0006_D_1UL_labels.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0006_D_1UL_labels.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0006_D_1UL_labels.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0007_D_1UL.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0007_D_1UL.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0007_D_1UL.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0007_D_1UL.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0007_D_1UL_features.csv b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0007_D_1UL_features.csv similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0007_D_1UL_features.csv rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0007_D_1UL_features.csv diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0007_D_1UL_labels.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0007_D_1UL_labels.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0007_D_1UL_labels.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0007_D_1UL_labels.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0011_D_1UL.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0011_D_1UL.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0011_D_1UL.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0011_D_1UL.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0011_D_1UL_features.csv b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0011_D_1UL_features.csv similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0011_D_1UL_features.csv rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0011_D_1UL_features.csv diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0011_D_1UL_labels.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0011_D_1UL_labels.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0011_D_1UL_labels.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0011_D_1UL_labels.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0013_D_1UL.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0013_D_1UL.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0013_D_1UL.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0013_D_1UL.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0013_D_1UL_features.csv b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0013_D_1UL_features.csv similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0013_D_1UL_features.csv rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0013_D_1UL_features.csv diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0013_D_1UL_labels.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0013_D_1UL_labels.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0013_D_1UL_labels.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0013_D_1UL_labels.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0014_D_1UL.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0014_D_1UL.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0014_D_1UL.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0014_D_1UL.tif diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0014_D_1UL_features.csv b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0014_D_1UL_features.csv similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0014_D_1UL_features.csv rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0014_D_1UL_features.csv diff --git a/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0014_D_1UL_labels.tif b/src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0014_D_1UL_labels.tif similarity index 100% rename from src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96 (17)/17P1_POS0014_D_1UL_labels.tif rename to src/napari_clusters_plotter/sample_data/BBBC007_v1_images/f96_17/17P1_POS0014_D_1UL_labels.tif From 467dd7f323f372cfd0d7f24ed0f7b410bea9c563 Mon Sep 17 00:00:00 2001 From: Johannes Soltwedel <38459088+jo-mueller@users.noreply.github.com> Date: Fri, 28 Nov 2025 23:06:07 +0100 Subject: [PATCH 02/12] add pooch to deps --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c27ac479..87e72a9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,8 @@ dependencies = [ "scikit-image", "scipy", "biaplotter>=0.3.1", - "imagecodecs" + "imagecodecs", + "pooch" ] From 79a635e5cf2350122048716c1b7ff23712e5c190 Mon Sep 17 00:00:00 2001 From: Johannes Soltwedel <38459088+jo-mueller@users.noreply.github.com> Date: Fri, 28 Nov 2025 23:06:34 +0100 Subject: [PATCH 03/12] pull sample data from pooch registry --- src/napari_clusters_plotter/_sample_data.py | 125 ++++++++++++-------- 1 file changed, 75 insertions(+), 50 deletions(-) diff --git a/src/napari_clusters_plotter/_sample_data.py b/src/napari_clusters_plotter/_sample_data.py index ebb1b76b..bc83c17c 100644 --- a/src/napari_clusters_plotter/_sample_data.py +++ b/src/napari_clusters_plotter/_sample_data.py @@ -1,23 +1,63 @@ -import glob -import os from pathlib import Path from typing import List - +from skimage import io import numpy as np +import pandas as pd +import pooch +import os +import zipfile +from pathlib import Path +from napari_clusters_plotter import __version__ + +# Create the data registry +registry = pd.read_csv( + Path(__file__).parent / "sample_data/data_registry.txt", sep=': sha256:', header=None + ) +registry.columns = ['file', 'hash'] + +# parse version +if 'dev' in __version__: + from packaging.version import parse + major, minor, patch = parse(__version__).release + version = f"{major}.{minor}.{patch-1}" +else: + version = __version__ + +DATA_REGISTRY = pooch.create( + path=pooch.os_cache("napari-clusters-plotter"), + base_url=f"https://github.com/biapol/napari-clusters-plotter/releases/download/v{version}/", + registry={"sample_data.zip": registry[registry['file'] == 'sample_data.zip']['hash'].values[0]}, +) + +def load_image(fname): + zip_path = DATA_REGISTRY.fetch("sample_data.zip") + + # check if has been unzipped before + if not os.path.exists(zip_path.split(".zip")[0]): + with zipfile.ZipFile(zip_path, 'r') as z: + z.extractall(zip_path.split(".zip")[0]) + + fname = os.path.join(zip_path.split(".zip")[0], fname) + image = io.imread(fname) + + return image + +def load_tabular(fname, **kwargs): + zip_path = DATA_REGISTRY.fetch("sample_data.zip") + # check if has been unzipped before + if not os.path.exists(zip_path.split(".zip")[0]): + with zipfile.ZipFile(zip_path, 'r') as z: + z.extractall(zip_path.split(".zip")[0]) + + fname = os.path.join(zip_path.split(".zip")[0], fname) + data = pd.read_csv(fname, **kwargs) + return data def skan_skeleton() -> List["LayerData"]: # noqa: F821 - import pandas as pd - from skimage.io import imread - paths_data = Path(__file__).parent / "sample_data" / "shapes_skeleton" - df_paths = pd.read_csv( - paths_data / Path("all_paths.csv"), - ) - df_features = pd.read_csv( - paths_data / Path("skeleton_features.csv"), - index_col="Unnamed: 0", # Adjusted to match the CSV structure - ) + df_paths = load_tabular("shapes_skeleton/all_paths.csv") + df_features = load_tabular("shapes_skeleton/skeleton_features.csv", index_col="Unnamed: 0") # skeleton_id column should be categorical categorical_columns = [ @@ -49,7 +89,7 @@ def skan_skeleton() -> List["LayerData"]: # noqa: F821 ) layer_blobs = ( - imread(paths_data / Path("blobs.tif")), + load_image("shapes_skeleton/blobs.tif"), { "name": "binary blobs", "opacity": 0.5, @@ -62,17 +102,13 @@ def skan_skeleton() -> List["LayerData"]: # noqa: F821 def tgmm_mini_dataset() -> List["LayerData"]: # noqa: F821 - import pandas as pd - from skimage.io import imread - - path = Path(__file__).parent / "sample_data" / "tracking_data" - data = pd.read_csv(path / Path("tgmm-mini-tracks-layer-data.csv")) - features = pd.read_csv( - path / Path("tgmm-mini-spot.csv"), + + features = load_tabular( + "tracking_data/tgmm-mini-spot.csv", skiprows=[1, 2], low_memory=False, - encoding="utf-8", - ) + encoding="utf-8") + data = load_tabular("tracking_data/tgmm-mini-tracks-layer-data.csv") categorical_columns = [ "Label", @@ -82,7 +118,7 @@ def tgmm_mini_dataset() -> List["LayerData"]: # noqa: F821 ] for feature in categorical_columns: features[feature] = features[feature].astype("category") - tracking_label_image = imread(path / Path("tgmm-mini.tif")) + tracking_label_image = load_image("tracking_data/tgmm-mini.tif") layer_data_tuple_tracks = ( data, @@ -108,15 +144,14 @@ def tgmm_mini_dataset() -> List["LayerData"]: # noqa: F821 def bbbc_1_dataset() -> List["LayerData"]: # noqa: F821 - import pandas as pd - from skimage import io - - # get path of this file - path = Path(__file__).parent / "sample_data" / "BBBC007_v1_images" + # read data registry file + registry = pd.read_csv( + Path(__file__).parent / "sample_data/data_registry.txt", sep=': sha256:', header=None + ) + registry.columns = ['file', 'hash'] - tif_files = glob.glob( - os.path.join(str(path), "**", "*.tif"), recursive=True - ) + registry_bbby1 = registry[registry['file'].str.contains("BBBC007_v1_images")] + tif_files = registry_bbby1[registry_bbby1['file'].str.endswith(".tif")]['file'].to_list() raw_images = [f for f in tif_files if "labels" not in f] n_rows = np.ceil(np.sqrt(len(raw_images))) @@ -124,10 +159,10 @@ def bbbc_1_dataset() -> List["LayerData"]: # noqa: F821 layers = [] - images = [io.imread(f) for f in raw_images] - labels = [io.imread(f.replace(".tif", "_labels.tif")) for f in raw_images] + images = [load_image(f) for f in raw_images] + labels = [load_image(f.replace(".tif", "_labels.tif")) for f in raw_images] features = [ - pd.read_csv(f.replace(".tif", "_features.csv")) for f in raw_images + load_tabular(f.replace(".tif", "_features.csv")) for f in raw_images ] max_size = max([image.shape[0] for image in images]) @@ -172,17 +207,10 @@ def bbbc_1_dataset() -> List["LayerData"]: # noqa: F821 def cells3d_curvatures() -> List["LayerData"]: # noqa: F821 - import numpy as np - import pandas as pd - from skimage import io - - path = Path(__file__).parent / "sample_data" / "cells3d" - - # load data - vertices = np.loadtxt(path / "vertices.txt") - faces = np.loadtxt(path / "faces.txt").astype(int) - hks = pd.read_csv(path / "signature.csv") - nuclei = io.imread(path / "nucleus.tif") + vertices = load_tabular("cells3d/vertices.txt", sep=' ', header=None).to_numpy() + faces = load_tabular("cells3d/faces.txt", sep=' ', header=None).to_numpy().astype(int) + hks = load_tabular("cells3d/signature.csv") + nuclei = load_image("cells3d/nucleus.tif") # create layer data tuples layer_data_surface = ( @@ -208,12 +236,9 @@ def cells3d_curvatures() -> List["LayerData"]: # noqa: F821 def granule_compression_vectors() -> List["LayerData"]: # noqa: F821 import numpy as np - import pandas as pd from napari.utils import notifications - path = Path(__file__).parent / "sample_data" / "compression_vectors" - - features = pd.read_csv(path / "granular_compression_test.csv") + features = load_tabular("compression_vectors/granular_compression_test.csv") features["iterations"] = features["iterations"].astype("category") features["returnStatus"] = features["returnStatus"].astype("category") features["Label"] = features["Label"].astype("category") From ec55df005939dea5c2426e18a848a6fda932c0e9 Mon Sep 17 00:00:00 2001 From: Johannes Soltwedel <38459088+jo-mueller@users.noreply.github.com> Date: Fri, 28 Nov 2025 23:40:17 +0100 Subject: [PATCH 04/12] use trusted publishing workflow and upload assets upon release --- .github/workflows/test_and_deploy.yml | 74 ++++++++++++++++++++------- 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml index fdece083..005940d0 100644 --- a/.github/workflows/test_and_deploy.yml +++ b/.github/workflows/test_and_deploy.yml @@ -33,7 +33,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - # these libraries enable testing on Qt on linux + # these libraries enable testing on Qt on linux - uses: tlambert03/setup-qt-libs@v1 # strategy borrowed from vispy for installing opengl libs on windows @@ -63,28 +63,64 @@ jobs: - name: Coverage uses: codecov/codecov-action@v3 - deploy: - # this will run when you have tagged a commit, - # and requires that you have put your twine API key in your - # github secrets (see readme for details) - needs: [test] + build: + name: Build distribution 📦 runs-on: ubuntu-latest - if: contains(github.ref, 'tags') + steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.x" - - name: Install dependencies + - name: Install pypa/build + run: >- + python3 -m pip install build --user + - name: Build a binary wheel and a source tarball + run: python3 -m build + - name: Store the distribution packages + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + publish-to-pypi: + name: >- + Publish Python 🐍 distribution 📦 to PyPI + if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes + needs: + - build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/project/napari-clusters-plotter/ + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + + add-assets-to-release: + name: Add Sample Data Asset to Release + needs: [build] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Create sample_data.zip run: | - python -m pip install --upgrade pip - pip install -U setuptools setuptools_scm wheel twine build - - name: Build and publish + python src/napari_clusters_plotter/sample_data/create_sample_data_assets.py + - name: Upload Sample Data Asset + uses: actions/upload-release-asset@v1 env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} - run: | - git tag - python -m build . - twine upload dist/* + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ github.event.release.upload_url }} + asset_path: ./src/napari_clusters_plotter/sample_data/sample_data.zip + asset_name: sample_data.zip + asset_content_type: application/zip From 3e56ee282a31dc9bac299ccf3f0aa31e527be701 Mon Sep 17 00:00:00 2001 From: Johannes Soltwedel <38459088+jo-mueller@users.noreply.github.com> Date: Fri, 28 Nov 2025 23:40:28 +0100 Subject: [PATCH 05/12] untrack sample data --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index c8dad927..6691355c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,8 @@ __pycache__/ # C extensions *.so +sample_data.zip + # Distribution / packaging .Python env/ From 115581b62bb60cf8d2f851f1c6668dea17243e87 Mon Sep 17 00:00:00 2001 From: Johannes Soltwedel <38459088+jo-mueller@users.noreply.github.com> Date: Fri, 28 Nov 2025 23:41:27 +0100 Subject: [PATCH 06/12] don't ship sample data --- MANIFEST.in | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 9b448ea4..43d82a68 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,7 +2,9 @@ include LICENSE include README.md include requirements.txt -recursive-include src/napari-clusters-plotter/sample_data * +# Include necessary plugin files and exclude sample data +recursive-exclude src/napari_clusters_plotter/sample_data * +include src/napari_clusters_plotter/sample_data/data_registry.txt recursive-exclude * __pycache__ recursive-exclude * *.py[co] From f4f7a7c555fa1858e40b0431f504cfa08de8284d Mon Sep 17 00:00:00 2001 From: Johannes Soltwedel <38459088+jo-mueller@users.noreply.github.com> Date: Fri, 28 Nov 2025 23:41:38 +0100 Subject: [PATCH 07/12] Create create_sample_data_assets.py --- .../sample_data/create_sample_data_assets.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 src/napari_clusters_plotter/sample_data/create_sample_data_assets.py diff --git a/src/napari_clusters_plotter/sample_data/create_sample_data_assets.py b/src/napari_clusters_plotter/sample_data/create_sample_data_assets.py new file mode 100644 index 00000000..4dd32a6d --- /dev/null +++ b/src/napari_clusters_plotter/sample_data/create_sample_data_assets.py @@ -0,0 +1,17 @@ +import os +import zipfile +from pathlib import Path + +def create_sample_data_zip(): + sample_data_dir = Path(__file__).parent + zip_path = sample_data_dir / "sample_data.zip" + + with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + for root, _, files in os.walk(sample_data_dir): + for file in files: + file_path = Path(root) / file + arcname = file_path.relative_to(sample_data_dir) + zipf.write(file_path, arcname) + +if __name__ == "__main__": + create_sample_data_zip() \ No newline at end of file From 05701f4f6d04c4f7b4fca7b08256c3c163ccf309 Mon Sep 17 00:00:00 2001 From: Johannes Soltwedel <38459088+jo-mueller@users.noreply.github.com> Date: Sat, 29 Nov 2025 00:21:51 +0100 Subject: [PATCH 08/12] moved registry creation into parent --- .../_create_sample_data_assets.py | 44 +++++++++++++++++++ .../sample_data/create_sample_data_assets.py | 17 ------- 2 files changed, 44 insertions(+), 17 deletions(-) create mode 100644 src/napari_clusters_plotter/_create_sample_data_assets.py delete mode 100644 src/napari_clusters_plotter/sample_data/create_sample_data_assets.py diff --git a/src/napari_clusters_plotter/_create_sample_data_assets.py b/src/napari_clusters_plotter/_create_sample_data_assets.py new file mode 100644 index 00000000..51a81ae3 --- /dev/null +++ b/src/napari_clusters_plotter/_create_sample_data_assets.py @@ -0,0 +1,44 @@ +import os +import zipfile +from pathlib import Path + +def create_sample_data_zip(): + import shutil + sample_data_dir = Path(__file__).parent / "sample_data" + zip_path = Path(__file__).parent / "sample_data.zip" + + if os.path.exists(zip_path): + shutil.rmtree(zip_path) + + if os.path.exists(sample_data_dir.parent / "data_registry.txt"): + os.remove(sample_data_dir.parent / "data_registry.txt") + + with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + for root, _, files in os.walk(sample_data_dir): + for file in files: + file_path = Path(root) / file + arcname = file_path.relative_to(sample_data_dir) + zipf.write(file_path, arcname) + + # copy to sample_data folder + dest_path = sample_data_dir.parent / "sample_data" / "sample_data.zip" + os.replace(zip_path, dest_path) + +def create_registry_file(): + import hashlib + root_dir = Path(__file__).parent / "sample_data" # Update this path + registry = {} + with open(root_dir / "data_registry.txt", "w") as registry_file: + for root, _, files in os.walk(root_dir): + for fn in files: + fp = Path(root) / fn + rel_path = fp.relative_to(root_dir).as_posix() + with open(fp, "rb") as f: + file_hash = hashlib.sha256(f.read()).hexdigest() + registry[str(rel_path)] = f"sha256:{file_hash}" + registry_file.write(f'{rel_path}: sha256:{file_hash}\n') + +if __name__ == "__main__": + create_registry_file() + create_sample_data_zip() + create_registry_file() \ No newline at end of file diff --git a/src/napari_clusters_plotter/sample_data/create_sample_data_assets.py b/src/napari_clusters_plotter/sample_data/create_sample_data_assets.py deleted file mode 100644 index 4dd32a6d..00000000 --- a/src/napari_clusters_plotter/sample_data/create_sample_data_assets.py +++ /dev/null @@ -1,17 +0,0 @@ -import os -import zipfile -from pathlib import Path - -def create_sample_data_zip(): - sample_data_dir = Path(__file__).parent - zip_path = sample_data_dir / "sample_data.zip" - - with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: - for root, _, files in os.walk(sample_data_dir): - for file in files: - file_path = Path(root) / file - arcname = file_path.relative_to(sample_data_dir) - zipf.write(file_path, arcname) - -if __name__ == "__main__": - create_sample_data_zip() \ No newline at end of file From f93f1573ecf699c85a2de7c539c4f42c2d03f175 Mon Sep 17 00:00:00 2001 From: Johannes Soltwedel <38459088+jo-mueller@users.noreply.github.com> Date: Sat, 29 Nov 2025 00:22:17 +0100 Subject: [PATCH 09/12] download registry overview from assets --- src/napari_clusters_plotter/_sample_data.py | 26 ++++++++++++--------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/napari_clusters_plotter/_sample_data.py b/src/napari_clusters_plotter/_sample_data.py index bc83c17c..7f407457 100644 --- a/src/napari_clusters_plotter/_sample_data.py +++ b/src/napari_clusters_plotter/_sample_data.py @@ -9,12 +9,6 @@ from pathlib import Path from napari_clusters_plotter import __version__ -# Create the data registry -registry = pd.read_csv( - Path(__file__).parent / "sample_data/data_registry.txt", sep=': sha256:', header=None - ) -registry.columns = ['file', 'hash'] - # parse version if 'dev' in __version__: from packaging.version import parse @@ -26,7 +20,7 @@ DATA_REGISTRY = pooch.create( path=pooch.os_cache("napari-clusters-plotter"), base_url=f"https://github.com/biapol/napari-clusters-plotter/releases/download/v{version}/", - registry={"sample_data.zip": registry[registry['file'] == 'sample_data.zip']['hash'].values[0]}, + registry={"sample_data.zip": "sha256:d21889252cc439b32dacbfb2d4085057da1fe28e3c35f94fee1487804cfe9615"}, ) def load_image(fname): @@ -54,6 +48,19 @@ def load_tabular(fname, **kwargs): data = pd.read_csv(fname, **kwargs) return data +def load_registry(): + zip_path = DATA_REGISTRY.fetch("sample_data.zip") + + # check if has been unzipped before + if not os.path.exists(zip_path.split(".zip")[0]): + with zipfile.ZipFile(zip_path, 'r') as z: + z.extractall(zip_path.split(".zip")[0]) + + fname = os.path.join(zip_path.split(".zip")[0], "sample_data/data_registry.txt") + registry = pd.read_csv(fname, sep=': sha256:', header=None) + registry.columns = ['file', 'hash'] + return registry + def skan_skeleton() -> List["LayerData"]: # noqa: F821 df_paths = load_tabular("shapes_skeleton/all_paths.csv") @@ -145,10 +152,7 @@ def tgmm_mini_dataset() -> List["LayerData"]: # noqa: F821 def bbbc_1_dataset() -> List["LayerData"]: # noqa: F821 # read data registry file - registry = pd.read_csv( - Path(__file__).parent / "sample_data/data_registry.txt", sep=': sha256:', header=None - ) - registry.columns = ['file', 'hash'] + registry = load_registry() registry_bbby1 = registry[registry['file'].str.contains("BBBC007_v1_images")] tif_files = registry_bbby1[registry_bbby1['file'].str.endswith(".tif")]['file'].to_list() From 286e2ccee5b3b31d33d9556ff2cba432c0115c27 Mon Sep 17 00:00:00 2001 From: Johannes Soltwedel <38459088+jo-mueller@users.noreply.github.com> Date: Sat, 29 Nov 2025 00:25:12 +0100 Subject: [PATCH 10/12] untrack data_registry --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 6691355c..6723a52d 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ __pycache__/ *.so sample_data.zip +data_registry.txt # Distribution / packaging .Python From a6787f44472ab86570cbe0df98b58237a8cde7cf Mon Sep 17 00:00:00 2001 From: Johannes Soltwedel <38459088+jo-mueller@users.noreply.github.com> Date: Sat, 29 Nov 2025 00:35:45 +0100 Subject: [PATCH 11/12] moved up asset upload --- .github/workflows/test_and_deploy.yml | 40 ++++++++++++++------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml index 005940d0..a84ea5df 100644 --- a/.github/workflows/test_and_deploy.yml +++ b/.github/workflows/test_and_deploy.yml @@ -17,9 +17,28 @@ on: workflow_dispatch: jobs: + add-assets-to-release: + name: Add Sample Data Asset to Release + if: startsWith(github.ref, 'refs/tags/') # only run on tag pushes (releases) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Create sample_data.zip + run: | + cd src/napari_clusters_plotter/sample_data + zip -r ../../../sample_data.zip . -x "*.git*" + - name: Upload Sample Data Asset + uses: softprops/action-gh-release@v1 + with: + files: sample_data.zip + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + test: name: ${{ matrix.platform }} py${{ matrix.python-version }} runs-on: ${{ matrix.platform }} + needs: [add-assets-to-release] # Wait for assets to be uploaded + if: always() # Run tests even if no release assets (for PRs) strategy: matrix: platform: [ubuntu-latest, windows-latest, macos-latest] # macos-latest (disabled, see related issue) @@ -65,6 +84,7 @@ jobs: build: name: Build distribution 📦 + needs: [test] # Wait for tests to pass runs-on: ubuntu-latest steps: @@ -89,6 +109,7 @@ jobs: Publish Python 🐍 distribution 📦 to PyPI if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes needs: + - test - build runs-on: ubuntu-latest environment: @@ -105,22 +126,3 @@ jobs: path: dist/ - name: Publish distribution 📦 to PyPI uses: pypa/gh-action-pypi-publish@release/v1 - - add-assets-to-release: - name: Add Sample Data Asset to Release - needs: [build] - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Create sample_data.zip - run: | - python src/napari_clusters_plotter/sample_data/create_sample_data_assets.py - - name: Upload Sample Data Asset - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ github.event.release.upload_url }} - asset_path: ./src/napari_clusters_plotter/sample_data/sample_data.zip - asset_name: sample_data.zip - asset_content_type: application/zip From 9185c764e000be000f27d331af9d6ab0b9ace547 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 28 Nov 2025 23:36:16 +0000 Subject: [PATCH 12/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../_create_sample_data_assets.py | 11 +++- src/napari_clusters_plotter/_sample_data.py | 65 +++++++++++++------ 2 files changed, 53 insertions(+), 23 deletions(-) diff --git a/src/napari_clusters_plotter/_create_sample_data_assets.py b/src/napari_clusters_plotter/_create_sample_data_assets.py index 51a81ae3..066ff0e9 100644 --- a/src/napari_clusters_plotter/_create_sample_data_assets.py +++ b/src/napari_clusters_plotter/_create_sample_data_assets.py @@ -2,8 +2,10 @@ import zipfile from pathlib import Path + def create_sample_data_zip(): import shutil + sample_data_dir = Path(__file__).parent / "sample_data" zip_path = Path(__file__).parent / "sample_data.zip" @@ -13,7 +15,7 @@ def create_sample_data_zip(): if os.path.exists(sample_data_dir.parent / "data_registry.txt"): os.remove(sample_data_dir.parent / "data_registry.txt") - with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf: for root, _, files in os.walk(sample_data_dir): for file in files: file_path = Path(root) / file @@ -24,8 +26,10 @@ def create_sample_data_zip(): dest_path = sample_data_dir.parent / "sample_data" / "sample_data.zip" os.replace(zip_path, dest_path) + def create_registry_file(): import hashlib + root_dir = Path(__file__).parent / "sample_data" # Update this path registry = {} with open(root_dir / "data_registry.txt", "w") as registry_file: @@ -36,9 +40,10 @@ def create_registry_file(): with open(fp, "rb") as f: file_hash = hashlib.sha256(f.read()).hexdigest() registry[str(rel_path)] = f"sha256:{file_hash}" - registry_file.write(f'{rel_path}: sha256:{file_hash}\n') + registry_file.write(f"{rel_path}: sha256:{file_hash}\n") + if __name__ == "__main__": create_registry_file() create_sample_data_zip() - create_registry_file() \ No newline at end of file + create_registry_file() diff --git a/src/napari_clusters_plotter/_sample_data.py b/src/napari_clusters_plotter/_sample_data.py index 7f407457..32bec767 100644 --- a/src/napari_clusters_plotter/_sample_data.py +++ b/src/napari_clusters_plotter/_sample_data.py @@ -1,17 +1,19 @@ +import os +import zipfile from pathlib import Path from typing import List -from skimage import io + import numpy as np import pandas as pd import pooch -import os -import zipfile -from pathlib import Path +from skimage import io + from napari_clusters_plotter import __version__ # parse version -if 'dev' in __version__: +if "dev" in __version__: from packaging.version import parse + major, minor, patch = parse(__version__).release version = f"{major}.{minor}.{patch-1}" else: @@ -20,15 +22,18 @@ DATA_REGISTRY = pooch.create( path=pooch.os_cache("napari-clusters-plotter"), base_url=f"https://github.com/biapol/napari-clusters-plotter/releases/download/v{version}/", - registry={"sample_data.zip": "sha256:d21889252cc439b32dacbfb2d4085057da1fe28e3c35f94fee1487804cfe9615"}, + registry={ + "sample_data.zip": "sha256:d21889252cc439b32dacbfb2d4085057da1fe28e3c35f94fee1487804cfe9615" + }, ) + def load_image(fname): zip_path = DATA_REGISTRY.fetch("sample_data.zip") # check if has been unzipped before if not os.path.exists(zip_path.split(".zip")[0]): - with zipfile.ZipFile(zip_path, 'r') as z: + with zipfile.ZipFile(zip_path, "r") as z: z.extractall(zip_path.split(".zip")[0]) fname = os.path.join(zip_path.split(".zip")[0], fname) @@ -36,35 +41,42 @@ def load_image(fname): return image + def load_tabular(fname, **kwargs): zip_path = DATA_REGISTRY.fetch("sample_data.zip") # check if has been unzipped before if not os.path.exists(zip_path.split(".zip")[0]): - with zipfile.ZipFile(zip_path, 'r') as z: + with zipfile.ZipFile(zip_path, "r") as z: z.extractall(zip_path.split(".zip")[0]) fname = os.path.join(zip_path.split(".zip")[0], fname) data = pd.read_csv(fname, **kwargs) return data + def load_registry(): zip_path = DATA_REGISTRY.fetch("sample_data.zip") # check if has been unzipped before if not os.path.exists(zip_path.split(".zip")[0]): - with zipfile.ZipFile(zip_path, 'r') as z: + with zipfile.ZipFile(zip_path, "r") as z: z.extractall(zip_path.split(".zip")[0]) - fname = os.path.join(zip_path.split(".zip")[0], "sample_data/data_registry.txt") - registry = pd.read_csv(fname, sep=': sha256:', header=None) - registry.columns = ['file', 'hash'] + fname = os.path.join( + zip_path.split(".zip")[0], "sample_data/data_registry.txt" + ) + registry = pd.read_csv(fname, sep=": sha256:", header=None) + registry.columns = ["file", "hash"] return registry + def skan_skeleton() -> List["LayerData"]: # noqa: F821 df_paths = load_tabular("shapes_skeleton/all_paths.csv") - df_features = load_tabular("shapes_skeleton/skeleton_features.csv", index_col="Unnamed: 0") + df_features = load_tabular( + "shapes_skeleton/skeleton_features.csv", index_col="Unnamed: 0" + ) # skeleton_id column should be categorical categorical_columns = [ @@ -109,12 +121,13 @@ def skan_skeleton() -> List["LayerData"]: # noqa: F821 def tgmm_mini_dataset() -> List["LayerData"]: # noqa: F821 - + features = load_tabular( "tracking_data/tgmm-mini-spot.csv", skiprows=[1, 2], low_memory=False, - encoding="utf-8") + encoding="utf-8", + ) data = load_tabular("tracking_data/tgmm-mini-tracks-layer-data.csv") categorical_columns = [ @@ -154,8 +167,12 @@ def bbbc_1_dataset() -> List["LayerData"]: # noqa: F821 # read data registry file registry = load_registry() - registry_bbby1 = registry[registry['file'].str.contains("BBBC007_v1_images")] - tif_files = registry_bbby1[registry_bbby1['file'].str.endswith(".tif")]['file'].to_list() + registry_bbby1 = registry[ + registry["file"].str.contains("BBBC007_v1_images") + ] + tif_files = registry_bbby1[registry_bbby1["file"].str.endswith(".tif")][ + "file" + ].to_list() raw_images = [f for f in tif_files if "labels" not in f] n_rows = np.ceil(np.sqrt(len(raw_images))) @@ -211,8 +228,14 @@ def bbbc_1_dataset() -> List["LayerData"]: # noqa: F821 def cells3d_curvatures() -> List["LayerData"]: # noqa: F821 - vertices = load_tabular("cells3d/vertices.txt", sep=' ', header=None).to_numpy() - faces = load_tabular("cells3d/faces.txt", sep=' ', header=None).to_numpy().astype(int) + vertices = load_tabular( + "cells3d/vertices.txt", sep=" ", header=None + ).to_numpy() + faces = ( + load_tabular("cells3d/faces.txt", sep=" ", header=None) + .to_numpy() + .astype(int) + ) hks = load_tabular("cells3d/signature.csv") nuclei = load_image("cells3d/nucleus.tif") @@ -242,7 +265,9 @@ def granule_compression_vectors() -> List["LayerData"]: # noqa: F821 import numpy as np from napari.utils import notifications - features = load_tabular("compression_vectors/granular_compression_test.csv") + features = load_tabular( + "compression_vectors/granular_compression_test.csv" + ) features["iterations"] = features["iterations"].astype("category") features["returnStatus"] = features["returnStatus"].astype("category") features["Label"] = features["Label"].astype("category")