From 5c997184f88288fd591911f1c3510fe62d12631a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Mar 2026 17:11:51 +0000 Subject: [PATCH 1/5] Initial plan From 04f2a5333ba70f118349d870f79d5d13f9362115 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Mar 2026 17:17:39 +0000 Subject: [PATCH 2/5] Add ImageToDisc pipeline step for saving processed images to file Adds a new pipeline-compatible class ImageToDisc to pyopia.io that saves processed pipeline images (raw, background, corrected, segmented, etc.) to an output folder. Supports configurable image keys, scale factor for downsizing, collage mode (all images in one file vs separate files), and image format selection. Co-authored-by: nepstad <152277+nepstad@users.noreply.github.com> --- pyopia/io.py | 174 ++++++++++++++++++++++++++++++++++++++++ pyopia/tests/test_io.py | 149 +++++++++++++++++++++++++++++++++- 2 files changed, 322 insertions(+), 1 deletion(-) diff --git a/pyopia/io.py b/pyopia/io.py index 92522b0..ec4aae2 100644 --- a/pyopia/io.py +++ b/pyopia/io.py @@ -661,6 +661,180 @@ def __call__(self, data): return data +class ImageToDisc: + '''Pipeline-compatible class for saving processed images to disc. + + Saves specified pipeline images (e.g. raw, background corrected, segmented) + to an output folder. Can optionally downscale images and/or combine them + into a single collage per input image. + + Required keys in :class:`pyopia.pipeline.Data`: + - :attr:`pyopia.pipeline.Data.filename` + - At least one of the image keys specified in ``image_keys`` + + Parameters + ---------- + output_folder : str + Path to the output folder where images will be saved. + Created automatically if it does not exist. + image_keys : list of str, optional + List of pipeline data keys to save as images. + Defaults to ``['imraw', 'imbg', 'im_corrected', 'imbw']``. + Keys that are not present in the pipeline data for a given image + will be silently skipped. + scale_factor : float, optional + Factor to downscale images before saving. E.g. 0.5 halves the + resolution. Defaults to 1.0 (no scaling). + collage : bool, optional + If True, all specified images are combined into a single collage + image (one row per image key) rather than saved as separate files. + Defaults to False. + image_format : str, optional + Image file format extension. Defaults to ``'png'``. + + Returns + ------- + data : :class:`pyopia.pipeline.Data` + Unmodified pipeline data. + + Examples + -------- + Save background-corrected and segmented images to a folder: + + .. code-block:: toml + + [steps.saveimages] + pipeline_class = 'pyopia.io.ImageToDisc' + output_folder = 'processed_images' + image_keys = ['imraw', 'im_corrected', 'imbw'] + scale_factor = 0.5 + + Save a collage of all processing stages: + + .. code-block:: toml + + [steps.saveimages] + pipeline_class = 'pyopia.io.ImageToDisc' + output_folder = 'processed_images' + collage = true + ''' + + def __init__(self, output_folder='processed_images', + image_keys=None, + scale_factor=1.0, + collage=False, + image_format='png'): + if image_keys is None: + image_keys = ['imraw', 'imbg', 'im_corrected', 'imbw'] + self.output_folder = output_folder + self.image_keys = image_keys + self.scale_factor = scale_factor + self.collage = collage + self.image_format = image_format + + def __call__(self, data): + import matplotlib.pyplot as plt + from skimage.transform import rescale + + os.makedirs(self.output_folder, exist_ok=True) + + source_filename = data.get('filename', 'unknown') + base_name = Path(source_filename).stem + + # Collect available images + available_images = [] + for key in self.image_keys: + if key in data and data[key] is not None: + img = np.array(data[key], dtype=np.float64) + available_images.append((key, img)) + + if not available_images: + logger.warning('ImageToDisc: No images found in pipeline data for the specified keys.') + return data + + if self.collage: + self._save_collage(available_images, base_name, plt) + else: + self._save_separate(available_images, base_name, plt, rescale) + + return data + + def _prepare_image(self, img, rescale_func): + '''Prepare an image for saving: handle scaling and normalisation. + + Parameters + ---------- + img : ndarray + Image array (2D or 3D, float or bool). + rescale_func : callable + skimage.transform.rescale function. + + Returns + ------- + img : ndarray + Prepared image array clipped to [0, 1]. + ''' + if self.scale_factor != 1.0: + multichannel = img.ndim == 3 + img = rescale_func(img, self.scale_factor, + channel_axis=2 if multichannel else None, + anti_aliasing=True, + preserve_range=True) + # Convert boolean (binary) images to float for saving + if img.dtype == bool: + img = img.astype(np.float64) + # Clip to valid range for plt.imsave + img = np.clip(img, 0, 1) + return img + + def _save_separate(self, available_images, base_name, plt, rescale_func): + '''Save each image key as a separate file.''' + for key, img in available_images: + img = self._prepare_image(img, rescale_func) + out_path = Path(self.output_folder) / f'{base_name}_{key}.{self.image_format}' + if img.ndim == 2: + plt.imsave(str(out_path), img, cmap='gray') + else: + plt.imsave(str(out_path), img) + logger.debug(f'ImageToDisc: Saved {key} to {out_path}') + + def _save_collage(self, available_images, base_name, plt): + '''Save all images combined into a single collage image.''' + from skimage.transform import resize + + # Determine target width (use first image width, after scale) + first_img = available_images[0][1] + if first_img.ndim == 2: + target_h, target_w = first_img.shape + else: + target_h, target_w = first_img.shape[:2] + + if self.scale_factor != 1.0: + target_h = int(target_h * self.scale_factor) + target_w = int(target_w * self.scale_factor) + + # Resize all images to the same dimensions and convert to 3-channel + panels = [] + for key, img in available_images: + if img.dtype == bool: + img = img.astype(np.float64) + + if img.ndim == 2: + img = resize(img, (target_h, target_w), anti_aliasing=True, preserve_range=True) + # Convert grayscale to RGB for stacking + img = np.stack([img, img, img], axis=-1) + else: + img = resize(img, (target_h, target_w, img.shape[2]), anti_aliasing=True, preserve_range=True) + + img = np.clip(img, 0, 1) + panels.append(img) + + collage = np.concatenate(panels, axis=0) + out_path = Path(self.output_folder) / f'{base_name}_collage.{self.image_format}' + plt.imsave(str(out_path), collage) + logger.debug(f'ImageToDisc: Saved collage to {out_path}') + + def load_toml(toml_file): """Load a TOML settings file from file diff --git a/pyopia/tests/test_io.py b/pyopia/tests/test_io.py index 181e2bf..8dd9701 100644 --- a/pyopia/tests/test_io.py +++ b/pyopia/tests/test_io.py @@ -1,8 +1,9 @@ import os from pathlib import Path import pytest +import numpy as np import pandas as pd -from pyopia.io import write_stats, load_stats, get_cf_metadata_spec +from pyopia.io import write_stats, load_stats, get_cf_metadata_spec, ImageToDisc from pyopia.instrument.silcam import generate_config @@ -64,5 +65,151 @@ def test_write_and_load_stats(tmp_path: Path): assert "PyOPIA_version" in loaded_stats.attrs +def test_image_to_disc_separate(tmp_path: Path): + """Test ImageToDisc saves separate images for each pipeline key.""" + output_folder = str(tmp_path / "output_images") + + saver = ImageToDisc( + output_folder=output_folder, + image_keys=['imraw', 'im_corrected', 'imbw'], + scale_factor=1.0, + collage=False, + ) + + # Create fake pipeline data with a mix of image types + data = { + 'filename': '/fake/path/test_image.silc', + 'imraw': np.random.rand(100, 120, 3), + 'im_corrected': np.random.rand(100, 120, 3), + 'imbw': np.random.rand(100, 120) > 0.5, # boolean segmentation mask + } + + result = saver(data) + + # Check data is returned unmodified + assert result is data + + # Check output files exist + assert os.path.isfile(os.path.join(output_folder, 'test_image_imraw.png')) + assert os.path.isfile(os.path.join(output_folder, 'test_image_im_corrected.png')) + assert os.path.isfile(os.path.join(output_folder, 'test_image_imbw.png')) + + +def test_image_to_disc_collage(tmp_path: Path): + """Test ImageToDisc saves a single collage image.""" + output_folder = str(tmp_path / "output_collage") + + saver = ImageToDisc( + output_folder=output_folder, + image_keys=['imraw', 'im_corrected'], + collage=True, + ) + + data = { + 'filename': '/fake/path/sample.silc', + 'imraw': np.random.rand(80, 100, 3), + 'im_corrected': np.random.rand(80, 100, 3), + } + + result = saver(data) + + assert result is data + assert os.path.isfile(os.path.join(output_folder, 'sample_collage.png')) + + +def test_image_to_disc_scale_factor(tmp_path: Path): + """Test ImageToDisc applies scale factor when saving separate images.""" + output_folder = str(tmp_path / "output_scaled") + + saver = ImageToDisc( + output_folder=output_folder, + image_keys=['imraw'], + scale_factor=0.5, + collage=False, + ) + + data = { + 'filename': '/fake/path/scaled_test.silc', + 'imraw': np.random.rand(100, 120, 3), + } + + saver(data) + + out_file = os.path.join(output_folder, 'scaled_test_imraw.png') + assert os.path.isfile(out_file) + + # Load the saved image and verify it was scaled down + import matplotlib.pyplot as plt + saved_img = plt.imread(out_file) + assert saved_img.shape[0] == 50 + assert saved_img.shape[1] == 60 + + +def test_image_to_disc_missing_keys(tmp_path: Path): + """Test ImageToDisc gracefully skips missing keys.""" + output_folder = str(tmp_path / "output_missing") + + saver = ImageToDisc( + output_folder=output_folder, + image_keys=['imraw', 'imbg', 'nonexistent_key'], + ) + + data = { + 'filename': '/fake/path/missing_test.silc', + 'imraw': np.random.rand(50, 60, 3), + # 'imbg' and 'nonexistent_key' intentionally missing + } + + result = saver(data) + + assert result is data + # Only imraw should be saved + assert os.path.isfile(os.path.join(output_folder, 'missing_test_imraw.png')) + assert not os.path.isfile(os.path.join(output_folder, 'missing_test_imbg.png')) + assert not os.path.isfile(os.path.join(output_folder, 'missing_test_nonexistent_key.png')) + + +def test_image_to_disc_2d_grayscale(tmp_path: Path): + """Test ImageToDisc handles 2D grayscale images.""" + output_folder = str(tmp_path / "output_gray") + + saver = ImageToDisc( + output_folder=output_folder, + image_keys=['im_corrected'], + scale_factor=0.5, + ) + + data = { + 'filename': '/fake/path/gray_test.png', + 'im_corrected': np.random.rand(100, 120), + } + + saver(data) + + assert os.path.isfile(os.path.join(output_folder, 'gray_test_im_corrected.png')) + + +def test_image_to_disc_collage_mixed_types(tmp_path: Path): + """Test collage with a mix of 2D (binary) and 3D (RGB) images.""" + output_folder = str(tmp_path / "output_collage_mixed") + + saver = ImageToDisc( + output_folder=output_folder, + image_keys=['imraw', 'imbw'], + collage=True, + scale_factor=0.5, + ) + + data = { + 'filename': '/fake/path/mixed_test.silc', + 'imraw': np.random.rand(80, 100, 3), + 'imbw': np.random.rand(80, 100) > 0.5, + } + + saver(data) + + assert os.path.isfile(os.path.join(output_folder, 'mixed_test_collage.png')) + + if __name__ == "__main__": pytest.main() From 6284483bbff6d8e428adaa54781e838015e26420 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Mar 2026 17:19:29 +0000 Subject: [PATCH 3/5] Address code review: improve memory efficiency and move imports - Keep original image dtypes until conversion is needed (avoid premature float64 conversion) - Move matplotlib/skimage imports from __call__ to the specific methods that need them - Move test-level matplotlib import to module level Co-authored-by: nepstad <152277+nepstad@users.noreply.github.com> --- pyopia/io.py | 42 +++++++++++++++++++++-------------------- pyopia/tests/test_io.py | 2 +- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/pyopia/io.py b/pyopia/io.py index ec4aae2..28756b9 100644 --- a/pyopia/io.py +++ b/pyopia/io.py @@ -733,64 +733,65 @@ def __init__(self, output_folder='processed_images', self.image_format = image_format def __call__(self, data): - import matplotlib.pyplot as plt - from skimage.transform import rescale - os.makedirs(self.output_folder, exist_ok=True) source_filename = data.get('filename', 'unknown') base_name = Path(source_filename).stem - # Collect available images + # Collect available images (keep original dtypes for efficiency) available_images = [] for key in self.image_keys: if key in data and data[key] is not None: - img = np.array(data[key], dtype=np.float64) - available_images.append((key, img)) + available_images.append((key, np.asarray(data[key]))) if not available_images: logger.warning('ImageToDisc: No images found in pipeline data for the specified keys.') return data if self.collage: - self._save_collage(available_images, base_name, plt) + self._save_collage(available_images, base_name) else: - self._save_separate(available_images, base_name, plt, rescale) + self._save_separate(available_images, base_name) return data - def _prepare_image(self, img, rescale_func): + def _prepare_image(self, img): '''Prepare an image for saving: handle scaling and normalisation. Parameters ---------- img : ndarray Image array (2D or 3D, float or bool). - rescale_func : callable - skimage.transform.rescale function. Returns ------- img : ndarray Prepared image array clipped to [0, 1]. ''' - if self.scale_factor != 1.0: - multichannel = img.ndim == 3 - img = rescale_func(img, self.scale_factor, - channel_axis=2 if multichannel else None, - anti_aliasing=True, - preserve_range=True) + from skimage.transform import rescale + # Convert boolean (binary) images to float for saving if img.dtype == bool: img = img.astype(np.float64) + else: + img = img.astype(np.float64) + + if self.scale_factor != 1.0: + multichannel = img.ndim == 3 + img = rescale(img, self.scale_factor, + channel_axis=2 if multichannel else None, + anti_aliasing=True, + preserve_range=True) # Clip to valid range for plt.imsave img = np.clip(img, 0, 1) return img - def _save_separate(self, available_images, base_name, plt, rescale_func): + def _save_separate(self, available_images, base_name): '''Save each image key as a separate file.''' + import matplotlib.pyplot as plt + for key, img in available_images: - img = self._prepare_image(img, rescale_func) + img = self._prepare_image(img) out_path = Path(self.output_folder) / f'{base_name}_{key}.{self.image_format}' if img.ndim == 2: plt.imsave(str(out_path), img, cmap='gray') @@ -798,8 +799,9 @@ def _save_separate(self, available_images, base_name, plt, rescale_func): plt.imsave(str(out_path), img) logger.debug(f'ImageToDisc: Saved {key} to {out_path}') - def _save_collage(self, available_images, base_name, plt): + def _save_collage(self, available_images, base_name): '''Save all images combined into a single collage image.''' + import matplotlib.pyplot as plt from skimage.transform import resize # Determine target width (use first image width, after scale) diff --git a/pyopia/tests/test_io.py b/pyopia/tests/test_io.py index 8dd9701..6d8f01e 100644 --- a/pyopia/tests/test_io.py +++ b/pyopia/tests/test_io.py @@ -2,6 +2,7 @@ from pathlib import Path import pytest import numpy as np +import matplotlib.pyplot as plt import pandas as pd from pyopia.io import write_stats, load_stats, get_cf_metadata_spec, ImageToDisc from pyopia.instrument.silcam import generate_config @@ -139,7 +140,6 @@ def test_image_to_disc_scale_factor(tmp_path: Path): assert os.path.isfile(out_file) # Load the saved image and verify it was scaled down - import matplotlib.pyplot as plt saved_img = plt.imread(out_file) assert saved_img.shape[0] == 50 assert saved_img.shape[1] == 60 From 2c60ce143e80c0ab772e58a43e51731fb889b924 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Mar 2026 17:21:15 +0000 Subject: [PATCH 4/5] Clean up redundant if/else in _prepare_image Co-authored-by: nepstad <152277+nepstad@users.noreply.github.com> --- pyopia/io.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pyopia/io.py b/pyopia/io.py index 28756b9..f1a23aa 100644 --- a/pyopia/io.py +++ b/pyopia/io.py @@ -770,11 +770,8 @@ def _prepare_image(self, img): ''' from skimage.transform import rescale - # Convert boolean (binary) images to float for saving - if img.dtype == bool: - img = img.astype(np.float64) - else: - img = img.astype(np.float64) + # Convert to float64 for saving + img = img.astype(np.float64) if self.scale_factor != 1.0: multichannel = img.ndim == 3 From 77618d44f3221029a2cdbf560d3001584d75ad0d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Mar 2026 17:54:18 +0000 Subject: [PATCH 5/5] Add documentation for ImageToDisc with instructive usage examples Adds documentation in two locations: - toml_config.ipynb: Comprehensive section with configuration options table, examples for separate images and collage mode, and pipeline placement guidance - processing_raw_data.ipynb: Practical section explaining when and how to use ImageToDisc for quality control and visual inspection Co-authored-by: nepstad <152277+nepstad@users.noreply.github.com> --- docs/notebooks/processing_raw_data.ipynb | 28 +++++++++ docs/notebooks/toml_config.ipynb | 79 ++++++++++++++++++++++++ 2 files changed, 107 insertions(+) diff --git a/docs/notebooks/processing_raw_data.ipynb b/docs/notebooks/processing_raw_data.ipynb index 489f38b..b909421 100644 --- a/docs/notebooks/processing_raw_data.ipynb +++ b/docs/notebooks/processing_raw_data.ipynb @@ -102,6 +102,34 @@ "```" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## (Optional) Save intermediate images to disc\n", + "\n", + "During processing, it can be very helpful to save intermediate images for visual quality control or for sharing with collaborators.\n", + "For example, you may want to inspect the background-corrected images to ensure the correction is working properly.\n", + "\n", + "To do this, add a `[steps.saveimages]` step to your `config.toml` file. This uses {class}`pyopia.io.ImageToDisc`, which saves one or more pipeline images to a specified output folder.\n", + "\n", + "Here is an example that saves the raw, background, and corrected images at half resolution:\n", + "\n", + "```toml\n", + " [steps.saveimages]\n", + " pipeline_class = 'pyopia.io.ImageToDisc'\n", + " output_folder = 'processed_images'\n", + " image_keys = ['imraw', 'imbg', 'im_corrected']\n", + " scale_factor = 0.5\n", + "```\n", + "\n", + "Place this step **after** the steps that produce the images you want to save (e.g. after `correctbackground` for background-corrected images, or after `segmentation` to also include the binary segmentation mask `imbw`).\n", + "\n", + "You can also save a single **collage** image per input file, by setting `collage = true`.\n", + "\n", + "See {ref}`toml-config` for full configuration details and more examples." + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/docs/notebooks/toml_config.ipynb b/docs/notebooks/toml_config.ipynb index 82cd49e..373bb65 100644 --- a/docs/notebooks/toml_config.ipynb +++ b/docs/notebooks/toml_config.ipynb @@ -199,6 +199,85 @@ "\n", "```" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Saving processed images to disc\n", + "\n", + "It is often useful to save intermediate pipeline images to disc for visual inspection, quality control,\n", + "or sharing with collaborators. For example, you may want to visually check that the background correction\n", + "is working correctly, or provide corrected images for manual review.\n", + "\n", + "The {class}`pyopia.io.ImageToDisc` pipeline step enables this. It can be inserted at any point in the pipeline\n", + "to save the current state of one or more images from the pipeline data. Common use cases include:\n", + "\n", + "- Saving the **background-corrected image** (`im_corrected`) for visual quality control\n", + "- Saving the **raw image** (`imraw`) and **background image** (`imbg`) alongside the corrected image for comparison\n", + "- Saving the **segmented image** (`imbw`) to verify that particle detection is working as expected\n", + "- Creating a **collage** of all processing stages for a quick overview of each image\n", + "\n", + "### Configuration options\n", + "\n", + "| Option | Description | Default |\n", + "| --- | --- | --- |\n", + "| `output_folder` | Path to folder where images will be saved (created if it does not exist) | `'processed_images'` |\n", + "| `image_keys` | List of pipeline data keys to save | `['imraw', 'imbg', 'im_corrected', 'imbw']` |\n", + "| `scale_factor` | Factor to downscale images before saving (e.g. `0.5` halves the resolution) | `1.0` |\n", + "| `collage` | If `true`, combine all images into a single vertically-stacked collage per input image | `false` |\n", + "| `image_format` | Output image format | `'png'` |\n", + "\n", + "### Example: Save separate images at half resolution\n", + "\n", + "Add this step after background correction (or after segmentation, depending on which images you want to capture):\n", + "\n", + "```toml\n", + " [steps.saveimages]\n", + " pipeline_class = 'pyopia.io.ImageToDisc'\n", + " output_folder = 'processed_images'\n", + " image_keys = ['imraw', 'imbg', 'im_corrected']\n", + " scale_factor = 0.5\n", + "```\n", + "\n", + "This will create one PNG file per image key, per input image, in the `processed_images/` folder.\n", + "For example, processing an image called `image_001.silc` would produce:\n", + "```\n", + "processed_images/\n", + "├── image_001_imraw.png\n", + "├── image_001_imbg.png\n", + "└── image_001_im_corrected.png\n", + "```\n", + "\n", + "### Example: Save a collage of all processing stages\n", + "\n", + "To get a single overview image showing all stages of processing for each input image:\n", + "\n", + "```toml\n", + " [steps.saveimages]\n", + " pipeline_class = 'pyopia.io.ImageToDisc'\n", + " output_folder = 'processed_images'\n", + " image_keys = ['imraw', 'im_corrected', 'imbw']\n", + " collage = true\n", + " scale_factor = 0.5\n", + "```\n", + "\n", + "This produces a single `image_001_collage.png` per input image, with the raw, corrected and segmented\n", + "images stacked vertically.\n", + "\n", + "### Placement in the pipeline\n", + "\n", + "The `saveimages` step should be placed **after** the processing steps that produce the images you want to save.\n", + "For instance, to save background-corrected images, place it after `correctbackground`.\n", + "To also include the segmented image (`imbw`), place it after the `segmentation` step.\n", + "You can also include multiple `saveimages` steps at different points in the pipeline if needed, e.g.\n", + "one right after background correction and another after segmentation.\n", + "\n", + "```{note}\n", + "Images that are not yet available in the pipeline data at the point where `saveimages` runs will be\n", + "silently skipped. So it is safe to request keys that may not exist for all configurations.\n", + "```" + ] } ], "metadata": {