From 6b35422c0b98a4b3ccc2a44ba7f8b24f3af9ddb9 Mon Sep 17 00:00:00 2001 From: Yohsuke Fukai Date: Tue, 16 Jun 2026 21:46:09 -0700 Subject: [PATCH] fix? --- src/tracksdata/nodes/_regionprops.py | 47 +++++++++- .../nodes/_test/test_regionprops.py | 88 +++++++++++++++++++ 2 files changed, 133 insertions(+), 2 deletions(-) diff --git a/src/tracksdata/nodes/_regionprops.py b/src/tracksdata/nodes/_regionprops.py index 11ad7261..3cbb1ba4 100644 --- a/src/tracksdata/nodes/_regionprops.py +++ b/src/tracksdata/nodes/_regionprops.py @@ -37,6 +37,13 @@ class RegionPropsNodes(BaseNodesOperator): Physical spacing between pixels. If provided, affects distance-based measurements. Should be (row_spacing, col_spacing) for 2D or (depth_spacing, row_spacing, col_spacing) for 3D. + separate_arrays : bool, optional + If True, array-like properties (e.g. ``inertia_tensor`` or multi-channel + ``intensity_mean``) are flattened into multiple scalar attributes instead + of being stored as a single array attribute. The new attributes are named + ``_`` (e.g. ``intensity_mean_0``, ``intensity_mean_1``) using + the same convention as ``node_attrs(unpack=True)``. This makes individual + components filterable. Defaults to False. Attributes ---------- @@ -44,6 +51,8 @@ class RegionPropsNodes(BaseNodesOperator): List of additional properties to compute. _spacing : tuple[float, float] | None Physical spacing between pixels. + _separate_arrays : bool + Whether array-like properties are flattened into scalar attributes. Examples -------- @@ -92,6 +101,7 @@ def __init__( self, extra_properties: list[str | Callable[[RegionProperties], Any]] | None = None, spacing: tuple[float, float] | None = None, + separate_arrays: bool = False, ): super().__init__() self._extra_properties = extra_properties or [] @@ -102,6 +112,7 @@ def __init__( if "bbox" in self._extra_properties: raise ValueError("`bbox` is not supported as an extra property. It's already included by default.") self._spacing = spacing + self._separate_arrays = separate_arrays def _axis_names(self, labels: NDArray[np.integer]) -> list[str]: """ @@ -124,6 +135,37 @@ def _axis_names(self, labels: NDArray[np.integer]) -> list[str]: else: raise ValueError(f"`labels` must be 't + 2D' or 't + 3D', got '{labels.ndim}' dimensions.") + def _attr_items(self, key: str, value: Any) -> list[tuple[str, Any]]: + """ + Normalize a single property value into one or more node attribute items. + + Tuple/list/array-like numeric values are converted to numpy arrays so they + are stored consistently as fixed-shape array attributes. When + ``separate_arrays`` is enabled, such values are instead flattened into + scalar attributes named ``_`` (row-major), matching the + ``node_attrs(unpack=True)`` naming convention. + + Parameters + ---------- + key : str + The base attribute name. + value : Any + The property value returned by regionprops or a custom callable. + + Returns + ------- + list[tuple[str, Any]] + The (name, value) pairs to add to the node attributes. + """ + if isinstance(value, np.ndarray | tuple | list): + arr = np.asarray(value) + if arr.dtype.kind in "biufc" and arr.ndim >= 1: + if self._separate_arrays: + return [("_".join([key, *map(str, idx)]), arr[idx]) for idx in np.ndindex(arr.shape)] + return [(key, arr)] + + return [(key, value)] + def _init_node_attrs(self, graph: BaseGraph, node_attrs: dict[str, Any]) -> None: """ Initialize the node attributes for the graph. @@ -302,9 +344,10 @@ def _nodes_per_time( for prop in self._extra_properties: if callable(prop): - attrs[prop.__name__] = prop(obj) + key, value = prop.__name__, prop(obj) else: - attrs[prop] = getattr(obj, prop) + key, value = prop, getattr(obj, prop) + attrs.update(self._attr_items(key, value)) attrs[DEFAULT_ATTR_KEYS.MASK] = Mask(obj.image, obj.bbox) attrs[DEFAULT_ATTR_KEYS.BBOX] = np.asarray(obj.bbox, dtype=int) diff --git a/src/tracksdata/nodes/_test/test_regionprops.py b/src/tracksdata/nodes/_test/test_regionprops.py index 567c62e0..1fcca449 100644 --- a/src/tracksdata/nodes/_test/test_regionprops.py +++ b/src/tracksdata/nodes/_test/test_regionprops.py @@ -1,7 +1,9 @@ import numpy as np +import polars as pl import pytest from skimage.measure._regionprops import RegionProperties +from tracksdata.attrs import NodeAttr from tracksdata.constants import DEFAULT_ATTR_KEYS from tracksdata.graph import RustWorkXGraph from tracksdata.nodes import Mask, RegionPropsNodes @@ -334,3 +336,89 @@ def test_regionprops_multiprocessing_isolation() -> None: """Test that multiprocessing options don't affect subsequent tests.""" # Verify default n_workers is 1 assert get_options().n_workers == 1 + + +def test_regionprops_multichannel_intensity_array() -> None: + """Multi-channel intensity props are stored as fixed-shape array attributes (#195).""" + graph = RustWorkXGraph() + + labels = np.array([[[1, 1, 0], [1, 0, 2], [0, 2, 2]]], dtype=np.int32) + intensity = np.zeros((1, 3, 3, 2), dtype=np.float32) + intensity[..., 0] = [[10, 20, 0], [30, 0, 40], [0, 50, 60]] + intensity[..., 1] = [[1, 2, 0], [3, 0, 4], [0, 5, 6]] + + operator = RegionPropsNodes(extra_properties=["intensity_max"]) + operator.add_nodes(graph, labels=labels, intensity_image=intensity) + + nodes_df = graph.node_attrs() + assert isinstance(nodes_df.schema["intensity_max"], pl.Array) + assert nodes_df["intensity_max"].dtype.shape == (2,) + + +def test_regionprops_tuple_property_stored_as_array() -> None: + """Tuple-returning props (e.g. centroid_weighted) are normalized to array attributes (#191).""" + graph = RustWorkXGraph() + + labels = np.array([[[1, 1, 0], [1, 0, 2], [0, 2, 2]]], dtype=np.int32) + intensity = np.array([[[10, 20, 0], [30, 0, 40], [0, 50, 60]]], dtype=np.float32) + + operator = RegionPropsNodes(extra_properties=["centroid_weighted"]) + operator.add_nodes(graph, labels=labels, intensity_image=intensity) + + nodes_df = graph.node_attrs() + # tuple props must become fixed-shape arrays (not pl.List) so they are unpackable + assert isinstance(nodes_df.schema["centroid_weighted"], pl.Array) + unpacked = graph.node_attrs(unpack=True) + assert "centroid_weighted_0" in unpacked.columns + assert "centroid_weighted_1" in unpacked.columns + + +def test_regionprops_separate_arrays() -> None: + """`separate_arrays=True` flattens array props into filterable scalar columns (#269).""" + graph = RustWorkXGraph() + + labels = np.array([[[1, 1, 0], [1, 0, 2], [0, 2, 2]]], dtype=np.int32) + intensity = np.zeros((1, 3, 3, 2), dtype=np.float32) + intensity[..., 0] = [[10, 20, 0], [30, 0, 40], [0, 50, 60]] + intensity[..., 1] = [[1, 2, 0], [3, 0, 4], [0, 5, 6]] + + operator = RegionPropsNodes(extra_properties=["intensity_max", "inertia_tensor"], separate_arrays=True) + operator.add_nodes(graph, labels=labels, intensity_image=intensity) + + nodes_df = graph.node_attrs() + # 1D property -> single index suffix; 2D property -> row-major double index suffix + for col in ["intensity_max_0", "intensity_max_1", "inertia_tensor_0_0", "inertia_tensor_1_1"]: + assert col in nodes_df.columns + assert nodes_df[col].dtype == pl.Float64 + + # the array column itself must not exist when separated + assert "intensity_max" not in nodes_df.columns + + # separated columns are now filterable + subgraph = graph.filter(NodeAttr("intensity_max_0") > 30) + filtered = subgraph.node_attrs() + assert len(filtered) == 1 + assert filtered["intensity_max_0"][0] == 60.0 + + +def test_regionprops_separate_arrays_matches_unpack() -> None: + """`separate_arrays=True` column names match `node_attrs(unpack=True)`.""" + labels = np.array([[[1, 1, 0], [1, 0, 2], [0, 2, 2]]], dtype=np.int32) + intensity = np.zeros((1, 3, 3, 2), dtype=np.float32) + intensity[..., 0] = [[10, 20, 0], [30, 0, 40], [0, 50, 60]] + intensity[..., 1] = [[1, 2, 0], [3, 0, 4], [0, 5, 6]] + + extra = ["intensity_max", "inertia_tensor"] + + sep_graph = RustWorkXGraph() + RegionPropsNodes(extra_properties=extra, separate_arrays=True).add_nodes( + sep_graph, labels=labels, intensity_image=intensity + ) + + packed_graph = RustWorkXGraph() + RegionPropsNodes(extra_properties=extra).add_nodes(packed_graph, labels=labels, intensity_image=intensity) + + def _prop_cols(df: pl.DataFrame) -> set[str]: + return {c for c in df.columns if c.startswith(("intensity_max", "inertia_tensor"))} + + assert _prop_cols(sep_graph.node_attrs()) == _prop_cols(packed_graph.node_attrs(unpack=True))