From 1a3a79968a3c3f77aafe3a88739fffed3e10573e Mon Sep 17 00:00:00 2001
From: brandynlucca <brandyn.lucca@gmail.com>
Date: Tue, 13 Jan 2026 14:49:21 -0800
Subject: [PATCH 1/4] Updated functions reflecting xarray changes

---
 echopop/ingest/sv.py                          |  160 ++-
 echopop/inversion/inversion_matrix.py         |  105 +-
 echopop/inversion/post_inversion.py           |  272 ++--
 echopop/inversion/utils.py                    |   37 +-
 .../ingest/test_inversion_sv_ingestion.py     |   13 +-
 .../tests/inversion/test_inversion_matrix.py  |   81 +-
 echopop/validators/__init__.py                |   13 +
 echopop/validators/inversion.py               |   82 +-
 echopop/validators/scattering.py              |    2 +-
 .../workflows/feat_hake_xarray_scratch.py     | 1168 -----------------
 .../nwfsc_feat/workflows/feat_krill.py        |   36 +-
 11 files changed, 502 insertions(+), 1467 deletions(-)
 delete mode 100644 echopop/workflows/nwfsc_feat/workflows/feat_hake_xarray_scratch.py

diff --git a/echopop/ingest/sv.py b/echopop/ingest/sv.py
index 2de99c18..cdb85df7 100644
--- a/echopop/ingest/sv.py
+++ b/echopop/ingest/sv.py
@@ -1,8 +1,9 @@
 from pathlib import Path
-from typing import Any, Dict, Literal, Optional, Tuple
+from typing import Any, Dict, Literal, Optional, Tuple, Union
 
 import numpy as np
 import pandas as pd
+import xarray as xr
 
 from . import nasc
 
@@ -400,12 +401,19 @@ def aggregate_transects(
         "thickness_mean"
     ].transform("sum")
 
+    # Define safe log10 function that handles zero/negative values
+    def safe_log10_sum(x):
+        sum_val = x.sum()
+        if sum_val <= 0:
+            return -999.0  # Use standard missing data indicator
+        return 10 * np.log10(sum_val)
+
     # Aggregate the values over each interval
     data_pvt = data.groupby(["frequency", "transect_num"]).agg(
         {
             "longitude_weight": "sum",
             "latitude_weight": "sum",
-            "sv_L": lambda x: 10 * np.log10(x.sum()),
+            "sv_L": safe_log10_sum,
             "nasc": "sum",
             "thickness_interval": "mean",
         }
@@ -531,13 +539,114 @@ def integrate_measurements(
     return sv_indexed, sv_coordinates
 
 
+def df_to_xarray(df: pd.DataFrame) -> Union[xr.DataArray, xr.Dataset]:
+    """
+    Convert a pandas DataFrame with MultiIndex index and columns to an xarray Dataset.
+    Handles both dense and sparse structures, and removes duplicate index level names.
+    """
+
+    # Return None if empty
+    if df is None or df.empty:
+        return None
+
+    # Get index names
+    if isinstance(df.index, pd.MultiIndex):
+        index_names = list(df.index.names)
+    else:
+        index_names = [df.index.name] if df.index.name else ["index"]
+
+    # Check for duplicate names in index
+    if len(index_names) != len(set(index_names)):
+        # ---- Keep only first occurrence of each name
+        seen = set()
+        unique_indices = []
+        for i, name in enumerate(index_names):
+            if name not in seen:
+                seen.add(name)
+                unique_indices.append(i)
+        # ---- Drop duplicate columns that match index level names
+        df = df.copy()
+        df.index = df.index.droplevel(
+            [i for i in range(len(index_names)) if i not in unique_indices]
+        )
+        index_names = [
+            index_names[i]
+            for i in range(len(index_names))
+            if i in [idx for idx, _ in enumerate(index_names) if index_names.index(_) == idx]
+        ]
+
+    # For MultiIndex columns (dense array)
+    if isinstance(df.columns, pd.MultiIndex):
+        # ---- Get MultiIndex column names
+        column_names = list(df.columns.names)
+        # ---- Filter out non-numeric columns
+        numeric_vars = [
+            var
+            for var in df.columns.get_level_values(0).unique()
+            if not all(df.columns[df.columns.get_level_values(0) == var].get_level_values(1) == "")
+        ]
+        # ---- Check if index is sparse with non-unique combinations
+        if len(index_names) > 1:
+            # ---- For sparse data, use a single point dimension
+            mindex_coords = xr.Coordinates.from_pandas_multiindex(df.index, "point")
+            ds = xr.Dataset(
+                {var: (["point", column_names[1]], df[var].values) for var in numeric_vars},
+                coords={**mindex_coords, column_names[1]: df[numeric_vars[0]].columns.values},
+            )
+        else:
+            # ---- For dense data with unique indices, use separate dimensions
+            ds = xr.Dataset(
+                {var: (index_names + [column_names[1]], df[var].values) for var in numeric_vars},
+                coords={
+                    **{name: df.index.get_level_values(name).unique() for name in index_names},
+                    column_names[1]: df[numeric_vars[0]].columns.values,
+                },
+            )
+        # ---- Add the uneven multiindexed columns as coordinates/attributes
+        to_add_vars = [
+            var for var in df.columns.get_level_values(0).unique() if var not in numeric_vars
+        ]
+        # ---- Add as a coordinate along the index dimension
+        for var in to_add_vars:
+            # ---- Get column data
+            var_data = df[var]
+            # ---- If it's a DataFrame, get the first column
+            if isinstance(var_data, pd.DataFrame):
+                var_values = var_data.iloc[:, 0].values
+            # ---- If it is already a series
+            else:
+                var_values = var_data.values
+            # ---- Assign the coordinates using the extracted values
+            if len(index_names) > 1:
+                # ---- Sparse case
+                ds = ds.assign_coords({var: ("point", var_values)})
+            else:
+                # ---- Dense case
+                ds = ds.assign_coords({var: (index_names, var_values)})
+        return ds
+    # For MultiIndex index but single-level columns (sparse array)
+    elif isinstance(df.index, pd.MultiIndex):
+        # ---- Get unique column names
+        column_name = df.columns.name if df.columns.name else "column"
+        # ---- Create the DataArray
+        da = xr.DataArray(
+            df.values,
+            dims=["point", column_name],
+            coords={
+                "point": pd.MultiIndex.from_tuples(df.index.to_list(), names=index_names),
+                column_name: df.columns.values,
+            },
+        )
+        return da
+
+
 def ingest_echoview_sv(
     sv_path: Path,
     center_frequencies: Optional[Dict[str, float]] = None,
     transect_pattern: Optional[str] = None,
     aggregate_method: Literal["cells", "interval", "transect"] = "cells",
     impute_coordinates: bool = True,
-) -> Tuple[pd.DataFrame, pd.DataFrame]:
+) -> Tuple[Union[xr.DataArray, xr.Dataset], Union[xr.DataArray, xr.Dataset]]:
     r"""
     Complete ingestion pipeline for Echoview Sv export data.
 
@@ -563,11 +672,13 @@ def ingest_echoview_sv(
 
     Returns
     -------
-    tuple[|pd.DataFrame|, |pd.DataFrame| or None]
-        - ``sv_integrated``: Spatially aggregated acoustic data with :class:`pandas.MultiIndex`
-          columns organized by measurement type and frequency.
-        - ``sv_coordinates``: Coordinate reference data for spatial analysis, or None if
-          coordinates unavailable.
+    tuple[xr.Dataset or xr.DataArray, xr.DataArray or None]
+        - ``sv_integrated``: Spatially aggregated acoustic data as xarray Dataset with data
+          variables for each measurement type (nasc, sv_mean, thickness_mean) organized by
+          frequency dimension. For sparse data (cells, intervals), uses a "point" dimension with
+          MultiIndex coordinates.
+        - ``sv_coordinates``: Coordinate reference data as xarray DataArray with "point" dimension
+          and frequency dimension, or None if coordinates unavailable.
 
     Raises
     ------
@@ -597,7 +708,18 @@ def ingest_echoview_sv(
     >>> sv_data, coords = ingest_echoview_sv(
     ...     sv_path, frequencies, r"transect_(\d+)", "interval"
     ... )
+    >>> print(sv_data)
+    <xarray.Dataset>
+    Dimensions:  (point: 11608, frequency: 5)
+    Coordinates:
+      * point     (point) MultiIndex
+      * frequency (frequency) float64 18000.0 38000.0 70000.0 120000.0 200000.0
+    Data variables:
+        nasc            (point, frequency) float64 ...
+        sv_mean         (point, frequency) float64 ...
+        thickness_mean  (point, frequency) float64 ...
     """
+
     # Validate directory existence
     if not sv_path.exists():
         raise FileNotFoundError(f"The export file directory ({sv_path.as_posix()}) not found!")
@@ -611,7 +733,11 @@ def ingest_echoview_sv(
     # Update the units for `center_frequencies` to match expected values from Echoview
     # ---- Hz -> kHz
     if center_frequencies is not None:
-        center_frequencies = {freq * 1e-3: value for freq, value in center_frequencies.items()}
+        center_frequencies_thresh = {
+            freq * 1e-3: value for freq, value in center_frequencies.items()
+        }
+    else:
+        center_frequencies_thresh = {}
 
     # Get the target Sv files
     sv_filepaths = {"cells": [p for p in sv_path.rglob("*.csv") if p.is_file()]}
@@ -630,7 +756,7 @@ def ingest_echoview_sv(
         )
 
     # Concatenate the files
-    sv = pd.concat(
+    sv_output = pd.concat(
         [
             read_echoview_sv(row["file_path"], impute_coordinates, row["transect_num"])
             for _, row in transect_num_df.iterrows()
@@ -638,21 +764,21 @@ def ingest_echoview_sv(
     )
 
     # Sort
-    nasc.sort_echoview_export_df(sv, inplace=True)
+    nasc.sort_echoview_export_df(sv_output, inplace=True)
 
     # Set min/max threshold for each frequency
-    if center_frequencies:
-        sv_subset = sv[sv["frequency"].isin(center_frequencies)]
+    if center_frequencies_thresh:
+        sv_subset = sv_output[sv_output["frequency"].isin(center_frequencies_thresh)]
     else:
-        sv_subset = sv.copy()
-        center_frequencies = {
+        sv_subset = sv_output.copy()
+        center_frequencies_thresh = {
             freq: {"min": -999.0, "max": 999.0} for freq in sv_subset["frequency"].unique()
         }
 
     # Integrate the backscatter based on the defined aggregation method
     sv_integrated, sv_coordinates = integrate_measurements(
-        data=sv_subset, method=aggregate_method, sv_thresholds=center_frequencies
+        data=sv_subset, method=aggregate_method, sv_thresholds=center_frequencies_thresh
     )
 
     # Return
-    return sv_integrated, sv_coordinates
+    return df_to_xarray(sv_integrated), df_to_xarray(sv_coordinates)
diff --git a/echopop/inversion/inversion_matrix.py b/echopop/inversion/inversion_matrix.py
index 2778ed36..936056c6 100644
--- a/echopop/inversion/inversion_matrix.py
+++ b/echopop/inversion/inversion_matrix.py
@@ -1,12 +1,14 @@
 import time
 import warnings
-from typing import Any, Dict
+from typing import Any, Dict, Union
 
 import numpy as np
 import pandas as pd
+import xarray as xr
 from lmfit import Minimizer, Parameters
 from pydantic import ValidationError
 
+from ..ingest.sv import df_to_xarray
 from ..validators.scattering_models import SCATTERING_MODEL_PARAMETERS
 from .inversion_base import InversionBase, InvParameters
 
@@ -135,7 +137,7 @@ def prepare_minimizer(
     simulation_settings: Dict[str, Any],
     verbose: bool,
     **kwargs,
-):
+) -> Union[Minimizer, None]:
     """
     Prepare lmfit.Minimizer objects for Monte Carlo optimization.
 
@@ -192,36 +194,39 @@ def prepare_minimizer(
     Sv_measured = Sv_measured.to_numpy()[valid_idx].astype(float)
 
     # Initialize with Monte Carlo sampling
-    if simulation_settings["monte_carlo"]:
-        parameters_lmfit = monte_carlo_initialize(
-            parameters_lmfit, center_frequencies, Sv_measured, scattering_params, model_settings
-        )
-    else:
-        parameters_lmfit = parameters_lmfit[0]
-
-    # Set `iter_cb` if verbose
-    if verbose:
-        if "iter_cb" in simulation_settings and simulation_settings["iter_cb"]:
-            iter_cb_arg = simulation_settings["iter_cb"]
+    if len(Sv_measured) > 0:
+        if simulation_settings["monte_carlo"]:
+            parameters_lmfit = monte_carlo_initialize(
+                parameters_lmfit, center_frequencies, Sv_measured, scattering_params, model_settings
+            )
+        else:
+            parameters_lmfit = parameters_lmfit[0]
+
+        # Set `iter_cb` if verbose
+        if verbose:
+            if "iter_cb" in simulation_settings and simulation_settings["iter_cb"]:
+                iter_cb_arg = simulation_settings["iter_cb"]
+            else:
+                iter_cb_arg = mininizer_print_cb
         else:
-            iter_cb_arg = mininizer_print_cb
+            iter_cb_arg = None
+        # Generate `Minimizer` function class required for bounded optimization
+        return [
+            Minimizer(
+                fit_Sv,
+                parameters_lmfit,
+                fcn_args=(
+                    Sv_measured,
+                    center_frequencies,
+                    scattering_params,
+                    model_settings,
+                ),
+                iter_cb=iter_cb_arg,
+                nan_policy="omit",
+            )
+        ]
     else:
-        iter_cb_arg = None
-    # Generate `Minimizer` function class required for bounded optimization
-    return [
-        Minimizer(
-            fit_Sv,
-            parameters_lmfit,
-            fcn_args=(
-                Sv_measured,
-                center_frequencies,
-                scattering_params,
-                model_settings,
-            ),
-            iter_cb=iter_cb_arg,
-            nan_policy="omit",
-        )
-    ]
+        return None
 
 
 def fit_Sv(
@@ -648,7 +653,7 @@ class InversionMatrix(InversionBase):
 
     def __new__(
         cls,
-        data: pd.DataFrame,
+        data: xr.Dataset,
         simulation_settings: Dict[str, Any],
         verbose: bool = True,
     ):
@@ -668,7 +673,16 @@ def __new__(
         self = super().__new__(cls)
 
         # Update attributes
-        self.measurements = valid_args["data"].copy()
+        # ---- Format dataset to dataframe
+        data_fmt = valid_args["data"].copy().to_dataframe().reset_index()
+        # ---- Check for cases where a coordinate a singleton
+        unmapped_idx = list(
+            set(data_fmt.columns).intersection(
+                set(["transect_num", "interval", "layer", "longitude", "latitude", "frequency"])
+            )
+        )
+        # ---- Set the index and rotate frequency
+        self.measurements = data_fmt.set_index(unmapped_idx).unstack("frequency")
         self.simulation_settings = valid_args["simulation_settings"]
         self.verbose = verbose
 
@@ -677,7 +691,7 @@ def __new__(
 
     def __init__(
         self,
-        data: pd.DataFrame,
+        data: xr.Dataset,
         simulation_settings: Dict[str, Any],
         verbose: bool = True,
     ):
@@ -690,9 +704,9 @@ def __init__(
 
         Parameters
         ----------
-        data : pd.DataFrame
-            MultiIndex DataFrame containing acoustic measurements with required columns 'sv_mean',
-            'nasc', and 'thickness_mean' at top level, and 'frequency' as nested index level.
+        data : xr.Dataset
+            Dataset containing acoustic measurements with required variables 'sv_mean',
+            'nasc', and 'thickness_mean' at top level, and at least 'frequency' as a coordinate.
         simulation_settings : Dict[str, Any]
             Dictionary containing simulation configuration including:
             - environment: Environmental parameters (sound speed, density)
@@ -871,7 +885,7 @@ def build_scattering_model(
         # Add the `lmfit.Minimizer` objects to the dataset
         self._set_minimizers()
 
-    def invert(self, optimization_kwargs: Dict[str, Any]):
+    def invert(self, optimization_kwargs: Dict[str, Any]) -> xr.Dataset:
         """
         Execute acoustic scattering parameter inversion for all measurements.
 
@@ -896,10 +910,11 @@ def invert(self, optimization_kwargs: Dict[str, Any]):
 
         Returns
         -------
-        pd.DataFrame
-            DataFrame containing original acoustic measurements plus inversion
-            results in 'parameters' column. Each entry is an InvParameters
-            object with optimized parameter values and metadata.
+        xarray.Dataset
+            Dataset containing the original acoustic measurements (mean Sv and NASC), weighted
+            latitudes and longitudes, and layer thickness means as data variables. The increment(s)
+            used for integration (i.e., cells, interval, transect), frequency, and inversion
+            parameters are assigned as the coordinates.
 
         Notes
         -----
@@ -931,7 +946,7 @@ def invert(self, optimization_kwargs: Dict[str, Any]):
         ... }
         >>> results = inv_matrix.invert(optimization_config)
         >>> # Access optimized parameters for first measurement
-        >>> optimal_params = results.iloc[0]["parameters"]
+        >>> optimal_params = results.sel(transect_num=1)["parameters"].item()
         """
 
         # Run optimization
@@ -955,7 +970,9 @@ def invert(self, optimization_kwargs: Dict[str, Any]):
 
         # Bundle the parameter results and add to the output
         output["parameters"] = [
-            InvParameters.from_series(row.drop("Q")) for _, row in inversion_results.iterrows()
+            InvParameters.from_series(row.drop("Q")) if not row.isna().all() else None
+            for _, row in inversion_results.iterrows()
         ]
 
-        return output
+        # Convert to xarray
+        return df_to_xarray(output)
diff --git a/echopop/inversion/post_inversion.py b/echopop/inversion/post_inversion.py
index 3b2b8ffe..32f4bb08 100644
--- a/echopop/inversion/post_inversion.py
+++ b/echopop/inversion/post_inversion.py
@@ -1,64 +1,70 @@
 from typing import Literal
 
 import numpy as np
-import pandas as pd
+import xarray as xr
 
 from .utils import _extract_parameters_optimized
 
 
 def estimate_population(
-    inverted_data: pd.DataFrame,
-    nasc_data: pd.DataFrame,
+    inverted_data: xr.Dataset,
+    nasc_data: xr.DataArray,
     density_sw: float,
     reference_frequency: float,
     aggregate_method: Literal["cells", "interval", "transect"],
-):
+) -> xr.Dataset:
     """
     Estimate population characteristics from inverted acoustic data.
 
-    This function computes population-level estimates including body size,
-    density, and biomass from inverted acoustic scattering parameters.
-    It combines acoustic data with biological parameter estimates to
-    provide comprehensive population assessments.
+    This function computes population-level estimates including body size, density, and biomass
+    from inverted acoustic scattering parameters. It combines acoustic data with biological
+    parameter estimates to provide comprehensive population assessments.
 
     Parameters
     ----------
-    inverted_data : pd.DataFrame
-        DataFrame containing inverted scattering parameters with columns
-        including 'parameters' containing biological model parameters
-    nasc_data : pd.DataFrame
-        DataFrame containing Nautical Area Scattering Coefficient (NASC)
-        values with frequency as column level
+    inverted_data : xr.Dataset
+        Dataset containing inverted scattering parameters with 'parameters' coordinate containing
+        biological model parameters.
+    nasc_data : xr.DataArray
+        DataArray containing Nautical Area Scattering Coefficient (NASC) values with frequency
+        dimension.
     density_sw : float
         Seawater density in kg/m³, typically ~1026 kg/m³
     reference_frequency : float
-        Reference acoustic frequency in Hz for population estimation
+        Reference acoustic frequency in Hz for population estimation.
     aggregate_method : Literal["cells", "interval", "transect"]
         Method for spatial aggregation of population estimates:
+
         - "cells": Individual acoustic cells
+
         - "interval": Depth/range intervals
+
         - "transect": Survey transect lines
 
     Returns
     -------
-    pd.DataFrame
-        Combined DataFrame containing:
+    xr.Dataset
+        Dataset containing:
+
         - nasc: NASC values at reference frequency
+
         - number_density: Estimated areal number density (individuals/m²)
+
         - biomass_density: Estimated areal biomass density (kg/m²)
-        - Biological parameters: length_mean, radius_mean, body_volume,
-          body_density, body_weight, etc.
+
+        - Biological parameters: length_mean, radius_mean, body_volume, body_density,
+          body_weight, etc.
 
     Notes
     -----
-    The function assumes organisms can be modeled as uniformly bent cylinders
-    for volume estimation. Body weight calculation uses:
+    The function assumes organisms can be modeled as uniformly bent cylinders for volume
+    estimation. Body weight calculation uses:
 
     .. math::
         W = \\rho_{sw} \\cdot g \\cdot \\pi r^2 L
 
-    where W is body weight, ρ_sw is seawater density, g is the relative
-    density factor, r is mean radius, and L is mean length.
+    where W is body weight, ρ_sw is seawater density, g is the relative density factor, r is mean
+    radius, and L is mean length.
     """
 
     # Unpackage the parameters using optimized extraction
@@ -76,82 +82,97 @@ def estimate_population(
     # Compute average body weight [single animal]
     parameters["body_weight"] = parameters["body_volume"] * parameters["body_density"]
 
-    # Subset the dataset further to the single frequency
-    acoustic_data = (
-        inverted_data.loc[
-            :, inverted_data.columns.get_level_values("frequency") == reference_frequency
-        ]
-    ).droplevel("frequency", axis=1)
-
-    # Repeat the same for the coordinates
-    reference_nasc = (
-        nasc_data.loc[:, nasc_data.columns.get_level_values("frequency") == reference_frequency]
-    )[reference_frequency].to_frame("nasc")
+    # Subset the datasets further to the single frequency
+    acoustic_data = inverted_data.sel(frequency=reference_frequency)
+    reference_nasc = nasc_data.sel(frequency=reference_frequency)
 
     # Calculate the areal number density
     if aggregate_method == "transect":
-        reference_nasc.loc[:, "number_density"] = calculate_transect_number_density(
+        number_density = calculate_transect_number_density(
             acoustic_data, reference_nasc, parameters
-        ).to_numpy()
-    else:
-        reference_nasc["number_density"] = calculate_intervals_number_density(
-            acoustic_data, parameters
         )
+    else:
+        number_density = calculate_intervals_number_density(acoustic_data, parameters)
+
+    # Convert NASC to drop 'point' coordinate
+    nasc_cnv = reference_nasc.to_series().to_frame("nasc").reset_index()
+
+    # Convert the number densities and parameter sets and drop the point coordinate
+    number_density_cnv = number_density.to_series().to_frame("number_density").reset_index()
+
+    # Get the shared column names (excluding 'point')
+    shared_cols = [
+        col for col in number_density_cnv.columns if col in nasc_cnv.columns and col != "point"
+    ]
+
+    # Do a left-join to broadcast density values
+    merged_dens = nasc_cnv.merge(number_density_cnv, on=shared_cols, how="left").fillna(0.0)
+
+    # Reformat to Dataset structure
+    # ---- Get overlapping coordinates
+    overlapping_coords = [c for c in reference_nasc.coords if c in merged_dens.columns]
+    # ---- Find valid ordering
+    coord_order = [
+        c
+        for c in ["transect_num", "longitude", "latitude", "interval", "layer"]
+        if c in overlapping_coords
+    ]
+    # ---- Set the index
+    output_indexed = merged_dens.set_index(coord_order)
+    parameters_indexed = parameters.reset_index().set_index(
+        [c for c in parameters.index.names if c in coord_order]
+    )
 
-    # Compute areal biomass density
-    reference_nasc["biomass_density"] = (
-        reference_nasc["number_density"] * parameters["body_weight"]
+    # Calculate biomass
+    output_indexed["biomass_density"] = (
+        output_indexed["number_density"] * parameters_indexed["body_weight"]
     ).fillna(0.0)
 
-    # Find shared indices
-    idx = list(set(reference_nasc.index.names).intersection(parameters.index.names))
-
-    # Reset indices
-    reference_nasc = reference_nasc.reset_index().set_index(idx)
+    # Create Dataset by converting each column separately
+    data_vars = {}
+    for col in ["nasc", "number_density", "biomass_density", "parameters"]:
+        if col in output_indexed.columns:
+            data_vars[col] = (["point"], output_indexed[col].values)
+    # ---- Create coordinates from the MultiIndex using from_pandas_multiindex
+    coords = xr.Coordinates.from_pandas_multiindex(output_indexed.index, "point")
 
-    # Concatenate the parameters
-    return pd.concat(
-        [
-            reference_nasc,
-            parameters.reset_index()
-            .drop("number_density", axis=1)
-            .set_index(idx)
-            .reindex(reference_nasc.index),
-        ],
-        axis=1,
-    )
+    return xr.Dataset(data_vars, coords=coords)
 
 
 def calculate_transect_number_density(
-    acoustic_data: pd.DataFrame,
-    reference_nasc: pd.DataFrame,
-    parameters: pd.DataFrame,
+    acoustic_data: xr.Dataset,
+    reference_nasc: xr.DataArray,
+    parameters: xr.Dataset,
 ):
     """
     Compute areal number density aggregated by survey transect.
 
-    This function estimates the areal number density of organisms by
-    weighting volumetric densities by NASC contributions and aggregating
-    across survey transects. The method accounts for varying layer
-    thickness and spatial sampling intensity.
+    This function estimates the areal number density of organisms by weighting volumetric densities
+    by NASC contributions and aggregating across survey transects. The method accounts for varying
+    layer thickness and spatial sampling intensity.
 
     Parameters
     ----------
-    acoustic_data : pd.DataFrame
-        DataFrame containing acoustic measurements including:
+    acoustic_data : xr.Dataset
+        Dataset containing acoustic measurements including:
+
         - nasc: Nautical Area Scattering Coefficient values
+
         - thickness_mean: Mean layer thickness in meters
-    reference_nasc : pd.DataFrame
-        DataFrame with NASC values at reference frequency for weighting
-    parameters : pd.DataFrame
-        DataFrame containing biological parameters including:
+
+    reference_nasc : xr.DataArray
+        DataArray with NASC values at reference frequency for weighting.
+
+    parameters : xr.Dataset
+        Dataset containing biological parameters including:
+
         - number_density: Volumetric number density (individuals/m³)
 
     Returns
     -------
-    pd.Series
-        Areal number density in individuals/m² aggregated by transect,
-        converted from nautical miles to meters (factor of 1852²)
+    xr.DataArray
+        Areal number density in individuals/m² aggregated by transect, converted from nautical
+        miles to meters (factor of 1852²).
 
     Notes
     -----
@@ -170,60 +191,79 @@ def calculate_transect_number_density(
         w = \\frac{\\text{NASC}_{ref}}{\\text{NASC}_{acoustic}}
     """
 
-    # Create copies
-    acoustic_df = acoustic_data.copy()
-    reference_df = reference_nasc.copy()
+    # Get the transects that exist in acoustic_data
+    valid_transects = acoustic_data["transect_num"].values
 
-    # Find shared indices
-    idx = list(set(reference_nasc.index.names).intersection(acoustic_data.index.names))
+    # Filter reference_nasc to only include valid transects
+    reference_filtered = reference_nasc.where(
+        reference_nasc["transect_num"].isin(valid_transects), drop=True
+    )
 
-    # Reset the indices for each
-    acoustic_df = acoustic_df.reset_index().set_index(idx)
-    reference_df = reference_df.reset_index().set_index(idx)
+    # Reindex acoustic data to match reference_nasc structure
+    acoustic_aligned = acoustic_data.sel(transect_num=reference_filtered["transect_num"])
+    parameters_aligned = parameters.to_xarray().sel(transect_num=reference_filtered["transect_num"])
 
     # Compute the NASC weights
-    reference_df["nasc_weight"] = reference_df["nasc"] / acoustic_df["nasc"]
+    nasc_weight = reference_filtered.values / acoustic_aligned["nasc"].values
 
-    # Get the counts of each transect
-    interval_counts = reference_df.groupby(level="transect_num")["nasc"].count()
+    # Count intervals per transect
+    interval_counts = reference_filtered.groupby("transect_num").count()
 
-    # Compute the areal number density
-    areal_number_density = (
-        parameters["number_density"]
-        * reference_df["nasc_weight"]
-        * acoustic_df["thickness_mean"]
-        * interval_counts
-    ).fillna(0.0) * 1852**2
+    # Broadcast interval counts back to reference dimensions
+    interval_counts_broadcast = xr.DataArray(
+        [
+            interval_counts.sel(transect_num=t).values
+            for t in reference_filtered["transect_num"].values
+        ],
+        coords=reference_filtered.coords,
+        dims=reference_filtered.dims,
+    )
+
+    # Compute the areal number density using values
+    areal_number_density_vals = (
+        parameters_aligned["number_density"].values
+        * nasc_weight
+        * acoustic_aligned["thickness_mean"].values
+        * interval_counts_broadcast.values
+    ) * 1852**2
+
+    # Create output DataArray with reference_filtered structure
+    areal_number_density = xr.DataArray(
+        areal_number_density_vals, coords=reference_filtered.coords, dims=reference_filtered.dims
+    )
 
     # Return
     return areal_number_density
 
 
 def calculate_intervals_number_density(
-    acoustic_data: pd.DataFrame,
-    parameters: pd.DataFrame,
-):
+    acoustic_data: xr.Dataset,
+    parameters: xr.Dataset,
+) -> xr.DataArray:
     """
     Compute areal number density for individual depth/range intervals.
 
-    This function calculates areal number density by converting volumetric
-    density estimates to areal density using layer thickness information.
-    Unlike transect aggregation, this preserves interval-level resolution.
+    This function calculates areal number density by converting volumetric density estimates to
+    areal density using layer thickness information. Unlike transect aggregation, this preserves
+    interval-level resolution.
 
     Parameters
     ----------
-    acoustic_data : pd.DataFrame
-        DataFrame containing acoustic measurements with:
+    acoustic_data : xr.Dataset
+        Dataset containing acoustic measurements with:
+
         - thickness_mean: Mean layer thickness in meters for each interval
-    parameters : pd.DataFrame
-        DataFrame containing biological parameters with:
+
+    parameters : xr.Dataset
+        Dataset containing biological parameters with:
+
         - number_density: Volumetric number density (individuals/m³)
 
     Returns
     -------
-    pd.Series
-        Areal number density in individuals/m² for each interval,
-        converted from nautical miles to meters (factor of 1852²)
+    xr.DataArray
+        Areal number density in individuals/m² for each interval, converted from nautical miles to
+        meters (factor of 1852²).
 
     Notes
     -----
@@ -232,16 +272,22 @@ def calculate_intervals_number_density(
     .. math::
         N_A = N_V \\cdot h \\cdot 1852^2
 
-    where N_A is areal density (individuals/m²), N_V is volumetric density
-    (individuals/m³), h is layer thickness (m), and 1852² converts from
-    nautical miles² to meters².
+    where N_A is areal density (individuals/m²), N_V is volumetric density (individuals/m³), h is
+    layer thickness (m), and 1852² converts from nautical miles² to meters².
 
-    This method maintains spatial resolution at the interval level, making
-    it suitable for detailed vertical distribution analysis.
+    This method maintains spatial resolution at the interval level, making it suitable for
+    detailed vertical distribution analysis.
     """
 
-    # Calculate the layer density
-    layer_density = parameters["number_density"] * acoustic_data["thickness_mean"]
+    # Calculate the layer density using values to avoid index conflict
+    layer_density_vals = (
+        parameters["number_density"].values * acoustic_data["thickness_mean"].values
+    ) * 1852**2
+
+    # Create output DataArray with acoustic_data structure
+    layer_density = xr.DataArray(
+        layer_density_vals, coords=acoustic_data.coords, dims=acoustic_data.dims
+    )
 
     # Return
-    return layer_density * 1852**2
+    return layer_density
diff --git a/echopop/inversion/utils.py b/echopop/inversion/utils.py
index c38d2d2a..658bad1e 100644
--- a/echopop/inversion/utils.py
+++ b/echopop/inversion/utils.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 import pandas as pd
+import xarray as xr
 
 
 def impute_missing_sigma_bs(
@@ -649,25 +650,41 @@ def generate_frequency_interval(
     return _generate_frequency_interval_cached_key(key)
 
 
-def _extract_parameters_optimized(inverted_data: pd.DataFrame) -> pd.DataFrame:
+def _extract_parameters_optimized(inverted_data: xr.Dataset) -> pd.DataFrame:
     """
     Extract parameters more efficiently than using .apply().
 
-    This optimized version avoids the overhead of pandas .apply()
-    by using direct iteration and batch DataFrame construction.
+    This function extracts InvParameters objects from the Dataset's 'parameters' coordinate and
+    converts them to a DataFrame with parameter values as columns.
 
     Parameters
     ----------
-    inverted_data : pd.DataFrame
-        DataFrame with 'parameters' column containing InvParameters objects
+    inverted_data : xr.Dataset
+        Dataset with 'parameters' coordinate containing InvParameters objects.
 
     Returns
     -------
     pd.DataFrame
-        DataFrame with parameter values as columns
+        DataFrame with parameter values as columns, indexed by the appropriate dimensions
+        excluding 'frequency'.
     """
-    # Extract all parameter dictionaries at once
-    param_dicts = [obj.values for obj in inverted_data["parameters"]]
 
-    # Construct DataFrame in one operation
-    return pd.DataFrame(param_dicts, index=inverted_data.index)
+    # Extract parameter objects from coordinates
+    param_objects = inverted_data.coords["parameters"].values
+
+    # Extract all parameter dictionaries
+    param_dicts = [obj.values for obj in param_objects]
+
+    # Convert to DataFrame
+    params = pd.DataFrame(param_dicts)
+
+    # Get the index from inverted_data - handle both MultiIndex and regular index
+    if "point" in inverted_data.indexes and hasattr(inverted_data.indexes["point"], "names"):
+        params.index = inverted_data.indexes["point"]
+    else:
+        dim_names = [d for d in inverted_data.dims if d != "frequency"]
+        if dim_names:
+            params.index = inverted_data.indexes[dim_names[0]]
+
+    # Return
+    return params
diff --git a/echopop/tests/ingest/test_inversion_sv_ingestion.py b/echopop/tests/ingest/test_inversion_sv_ingestion.py
index ce49da1c..eb6f3c71 100644
--- a/echopop/tests/ingest/test_inversion_sv_ingestion.py
+++ b/echopop/tests/ingest/test_inversion_sv_ingestion.py
@@ -3,6 +3,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+import xarray as xr
 
 from echopop.ingest import sv as ingest_sv
 
@@ -296,8 +297,8 @@ def test_ingest_echoview_sv_basic_functionality(sv_directory_with_files):
         impute_coordinates=True,
     )
 
-    assert isinstance(result_data, pd.DataFrame)
-    assert result_coords is None or isinstance(result_coords, pd.DataFrame)
+    assert isinstance(result_data, xr.Dataset)
+    assert result_coords is None or isinstance(result_coords, xr.DataArray)
 
 
 def test_ingest_echoview_sv_with_transect_pattern(sv_directory_with_files):
@@ -311,8 +312,8 @@ def test_ingest_echoview_sv_with_transect_pattern(sv_directory_with_files):
         aggregate_method="interval",
     )
 
-    assert isinstance(result_data, pd.DataFrame)
-    assert isinstance(result_coords, pd.DataFrame)
+    assert isinstance(result_data, xr.Dataset)
+    assert isinstance(result_coords, xr.DataArray)
 
 
 def test_ingest_echoview_sv_no_center_frequencies(sv_directory_with_files):
@@ -321,8 +322,8 @@ def test_ingest_echoview_sv_no_center_frequencies(sv_directory_with_files):
         sv_directory_with_files, transect_pattern=r"x(\d+)", aggregate_method="transect"
     )
 
-    assert isinstance(result_data, pd.DataFrame)
-    assert isinstance(result_coords, pd.DataFrame)
+    assert isinstance(result_data, xr.Dataset)
+    assert isinstance(result_coords, xr.DataArray)
 
 
 def test_ingest_echoview_sv_directory_not_found():
diff --git a/echopop/tests/inversion/test_inversion_matrix.py b/echopop/tests/inversion/test_inversion_matrix.py
index 1f9daadf..ce3a9ec8 100644
--- a/echopop/tests/inversion/test_inversion_matrix.py
+++ b/echopop/tests/inversion/test_inversion_matrix.py
@@ -5,6 +5,7 @@
 
 import echopop.inversion.inversion_matrix as im
 from echopop import inversion
+from echopop.ingest import sv as ingest_sv
 from echopop.inversion import InversionMatrix, InvParameters, estimate_population
 
 
@@ -243,64 +244,52 @@ def test_estimate_population(inv_transect_info, inv_interval_info, inv_cells_inf
 
     # Test transect-index
     transect_result = estimate_population(
-        inv_transect_info["inverted"],
-        inv_transect_info["coords"],
+        ingest_sv.df_to_xarray(inv_transect_info["inverted"]),
+        ingest_sv.df_to_xarray(inv_transect_info["coords"]),
         density_sw=1026.0,
         reference_frequency=120e3,
         aggregate_method="transect",
     )
+
     # ---- Check: shape
-    assert transect_result.shape == (5, 17)
-    # ---- Get keys:
-    params = list(inv_transect_info["inverted"]["parameters"].loc[1].values.keys())
-    # ---- Check: columns
-    expected_cols = ["longitude", "latitude", "nasc", "number_density", "biomass_density"] + params
-    assert all([col in transect_result.columns for col in expected_cols])
+    assert transect_result.sizes == {"point": 5}
+    # ---- Check: coords
+    expected_coords = ["longitude", "latitude", "transect_num"]
+    assert all([c in transect_result.coords for c in expected_coords])
     # ---- Check: index
-    assert list(transect_result.index.names) == ["transect_num"]
+    assert set(transect_result.data_vars) == {"biomass_density", "number_density", "nasc"}
 
     # Test interval-index
     interval_result = estimate_population(
-        inv_interval_info["inverted"],
-        inv_interval_info["coords"],
+        ingest_sv.df_to_xarray(inv_interval_info["inverted"]),
+        ingest_sv.df_to_xarray(inv_interval_info["coords"]),
         density_sw=1026.0,
         reference_frequency=120e3,
         aggregate_method="interval",
     )
     # ---- Check: shape
-    assert interval_result.shape == (20, 15)
-    # ---- Get keys:
-    params = list(inv_interval_info["inverted"]["parameters"].loc[1, 1, 1].values.keys())
-    # ---- Check: columns
-    expected_cols = ["nasc", "number_density", "biomass_density"] + params
-    assert all([col in interval_result.columns for col in expected_cols])
+    assert interval_result.sizes == {"point": 20}
+    # ---- Check: coords
+    expected_coords = ["longitude", "latitude", "interval"]
+    assert all([c in interval_result.coords for c in expected_coords])
     # ---- Check: index
-    assert all(
-        [col in ["longitude", "latitude", "interval"] for col in list(interval_result.index.names)]
-    )
+    assert set(interval_result.data_vars) == {"biomass_density", "number_density", "nasc"}
 
     # Test cells-index
     cells_result = estimate_population(
-        inv_cells_info["inverted"],
-        inv_cells_info["coords"],
+        ingest_sv.df_to_xarray(inv_cells_info["inverted"]),
+        ingest_sv.df_to_xarray(inv_cells_info["coords"]),
         density_sw=1026.0,
         reference_frequency=120e3,
         aggregate_method="cells",
     )
     # ---- Check: shape
-    assert cells_result.shape == (10, 15)
-    # ---- Get keys:
-    params = list(inv_cells_info["inverted"]["parameters"].loc[1, 1, -1, -1].values.keys())
-    # ---- Check: columns
-    expected_cols = ["nasc", "number_density", "biomass_density"] + params
-    assert all([col in cells_result.columns for col in expected_cols])
+    assert cells_result.sizes == {"point": 10}
+    # ---- Check: coords
+    expected_coords = ["longitude", "latitude", "interval", "layer"]
+    assert all([c in cells_result.coords for c in expected_coords])
     # ---- Check: index
-    assert all(
-        [
-            col in ["longitude", "latitude", "interval", "layer"]
-            for col in list(cells_result.index.names)
-        ]
-    )
+    assert set(cells_result.data_vars) == {"biomass_density", "number_density", "nasc"}
 
 
 # Test functions for scattering operations
@@ -601,17 +590,27 @@ def test_inversion_matrix_creation():
     """Test InversionMatrix creation and basic functionality."""
     # Create mock data
     frequencies = [38e3, 120e3]
-    n_obs = 5
+    n_obs = 4
 
     # Create MultiIndex columns
-    cols = pd.MultiIndex.from_product(
-        [["sv_mean", "nasc", "thickness_mean"], frequencies], names=[None, "frequency"]
-    )
 
     # Generate random data
     np.random.seed(131)
-    data = np.random.randn(n_obs, len(cols))
-    df = pd.DataFrame(data, columns=cols)
+    data = (
+        pd.DataFrame(
+            {
+                "transect_num": np.repeat([1, 2], n_obs),
+                "frequency": np.tile(frequencies, n_obs),
+                "longitude": np.random.randn(n_obs * len(frequencies)),
+                "sv_mean": np.random.randn(n_obs * len(frequencies)),
+                "nasc": np.random.randn(n_obs * len(frequencies)),
+                "thickness_mean": np.random.randn(n_obs * len(frequencies)),
+            }
+        )
+        .set_index(["transect_num", "longitude", "frequency"])
+        .unstack("frequency")
+        .fillna(0.0)
+    )
 
     # Simulation settings
     sim_settings = {
@@ -622,7 +621,7 @@ def test_inversion_matrix_creation():
     }
 
     # Create InversionMatrix
-    inv_matrix = InversionMatrix(df, sim_settings, verbose=False)
+    inv_matrix = InversionMatrix(ingest_sv.df_to_xarray(data), sim_settings, verbose=False)
 
     assert inv_matrix is not None
     assert inv_matrix.inversion_method == "scattering_model"
diff --git a/echopop/validators/__init__.py b/echopop/validators/__init__.py
index c62c56ff..3ddcd2f4 100644
--- a/echopop/validators/__init__.py
+++ b/echopop/validators/__init__.py
@@ -11,6 +11,19 @@
     ValidateVariogramClass,
 )
 
+
+# Rebuild ValidateBuildModelArgs after InvParameters is available
+def _rebuild_on_import():
+    try:
+        from ..inversion import InvParameters  # noqa: F401
+
+        ValidateBuildModelArgs.model_rebuild()
+    except ImportError:
+        pass
+
+
+_rebuild_on_import()
+
 __all__ = [
     "ValidateBuildModelArgs",
     "ValidateHullCropArgs",
diff --git a/echopop/validators/inversion.py b/echopop/validators/inversion.py
index 4e061698..810a71b9 100644
--- a/echopop/validators/inversion.py
+++ b/echopop/validators/inversion.py
@@ -3,10 +3,10 @@
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
 
 import numpy as np
-import pandas as pd
+import xarray as xr
 from pydantic import ConfigDict, Field, RootModel, ValidationError, field_validator, model_validator
 
-from ..core.validators import BaseDataFrame, BaseDictionary
+from ..core.validators import BaseDictionary
 
 if TYPE_CHECKING:
     from ..inversion import InvParameters
@@ -89,54 +89,6 @@ def validate_expected_strata(cls, v):
         return v
 
 
-class ScatterDF(BaseDataFrame):
-    """
-    Validates MultiIndex DataFrame structure with required top-level columns and nested frequency
-    index.
-
-    Ensures the top-level column names include 'sv_mean', 'nasc', and 'thickness_mean', and that
-    the second-level column names ('frequency') are numeric. All values under each frequency must
-    be floats.
-    """
-
-    class Config(BaseDataFrame.Config):
-        title = "acoustic backscatter export DataFrame"
-        multiindex_strict = True
-        multiindex_coerce = True
-        unique_column_names = False
-
-    @classmethod
-    def pre_validate(cls, df: pd.DataFrame) -> pd.DataFrame:
-        # Raise Error if not a MultiIndex DataFrame
-        if not isinstance(df.columns, pd.MultiIndex):
-            raise TypeError("Expected MultiIndex columns.")
-
-        # Check for required top-level columns
-        # ---- Get the top level indices
-        top_level = set(df.columns.get_level_values(0))
-        # ---- Identify missing columns
-        missing_columns = {"sv_mean", "nasc", "thickness_mean"} - top_level
-        # ---- Raise Error, if needed
-        if missing_columns:
-            # ---- Format
-            missing_str = ", ".join(f"'{col}'" for col in missing_columns)
-            raise KeyError(f"Missing required top-level columns: {missing_str}.")
-
-        # Check for nested column 'frequency'
-        if "frequency" not in df.columns.names:
-            # ---- Get current column names
-            current_names = list(df.columns.names)
-            # ---- Format
-            current_str = ", ".join(f"'{col}'" for col in current_names if col)
-            # ---- Raise Error
-            raise KeyError(
-                f"Missing required nested column index 'frequency'. Current column indices are: "
-                f"{current_str}."
-            )
-
-        return df
-
-
 class EnvironmentParameters(BaseDictionary):
     """
     Environmental and medium parameters for acoustic scattering models.
@@ -180,14 +132,26 @@ class ValidateInversionMatrix(BaseDictionary):
     Validation model for InversionMatrix class configuration.
     """
 
-    data: pd.DataFrame
+    data: xr.Dataset
     simulation_settings: SimulationParameters = Field(default_factory=SimulationParameters)
     model_config = ConfigDict(title="scattering model inversion analysis")
 
     @field_validator("data", mode="after")
     def validate_data(cls, v):
-        # Validate with pandera
-        return ScatterDF.validate(v)
+
+        # Validate coordinates
+        if "frequency" not in v.coords:
+            raise KeyError("Required coordinate 'frequency' missing from input xarray.Dataset.")
+
+        # Validate data variables
+        missing_vars = {"sv_mean", "nasc", "thickness_mean"} - set(v.data_vars)
+        # ---- Raise Error, if needed
+        if missing_vars:
+            # ---- Format
+            missing_str = ", ".join(f"'{col}'" for col in missing_vars)
+            raise KeyError(f"Missing required data variables from xarray.Dataset: {missing_str}.")
+
+        return v
 
 
 class ModelSettingsParameters(BaseDictionary):
@@ -354,3 +318,15 @@ def create(cls, **kwargs):
         Factory creation method
         """
         return cls.judge(**kwargs).model_dump(exclude_none=True)
+
+
+# Rebuild models with forward references after all classes are defined
+_model_rebuilt = False
+
+
+def ensure_model_rebuilt():
+    """Ensure model is rebuilt with forward references resolved."""
+    global _model_rebuilt
+    if not _model_rebuilt:
+        ValidateBuildModelArgs.model_rebuild()
+        _model_rebuilt = True
diff --git a/echopop/validators/scattering.py b/echopop/validators/scattering.py
index a537a086..f5e8412a 100644
--- a/echopop/validators/scattering.py
+++ b/echopop/validators/scattering.py
@@ -48,4 +48,4 @@ class ValidatePCDWBASettings(BaseDictionary):
     orientation_distribution: DistributionParameters = Field(default_factory=DistributionParameters)
     taper_order: float = Field(default=10.0, gt=0.0, allow_inf_nan=False)
     type: str
-    model_config = ConfigDict(title="PCDWBA model settings and additional arguments")
+    model_config = ConfigDict(title="PCDWBA model settings and additional arguments", extra="allow")
diff --git a/echopop/workflows/nwfsc_feat/workflows/feat_hake_xarray_scratch.py b/echopop/workflows/nwfsc_feat/workflows/feat_hake_xarray_scratch.py
deleted file mode 100644
index 4a3c5e4c..00000000
--- a/echopop/workflows/nwfsc_feat/workflows/feat_hake_xarray_scratch.py
+++ /dev/null
@@ -1,1168 +0,0 @@
-import os
-import pickle
-from pathlib import Path
-from typing import Any, Dict
-
-import numpy as np
-import pandas as pd
-import xarray as xr
-from lmfit import Parameters
-
-import echopop.workflows.nwfsc_feat as feat
-from echopop import inversion, utils
-from echopop.geostatistics import cropping, kriging, variogram
-from echopop.ingest import (
-    join_geostrata_by_latitude,
-    join_strata_by_haul,
-    load_biological_data,
-    load_geostrata,
-    load_isobath_data,
-    load_kriging_variogram_params,
-    load_mesh_data,
-    load_strata,
-    nasc,
-)
-from echopop.survey import fit_length_weight_regression, proportions, stratified, transect
-from echopop.workflows.nwfsc_feat import apportionment, biology
-
-# ==================================================================================================
-# ==================================================================================================
-# DEFINE DATA ROOT DIRECTORY
-# --------------------------
-# DATA_ROOT = Path("C:/Data/EchopopData/echopop_2019")
-DATA_ROOT = Path("C:/Users/Brandyn Lucca/Documents/Data/echopop_2019")
-# ==================================================================================================
-# ==================================================================================================
-# DATA INGESTION
-# ==================================================================================================
-# Organize NASC file
-# ------------------
-
-# Merge exports
-df_intervals, df_exports = nasc.merge_echoview_nasc(
-    nasc_path=DATA_ROOT / "raw_nasc/",
-    filename_transect_pattern=r"T(\d+)",
-    default_transect_spacing=10.0,
-    default_latitude_threshold=60.0,
-)
-
-# ==================================================================================================
-# Read in transect-region-haul keys
-# ---------------------------------
-TRANSECT_REGION_FILEPATH_ALL_AGES = (
-    DATA_ROOT / "Stratification/US_CAN_2019_transect_region_haul_age1+ auto_final.xlsx"
-)
-TRANSECT_REGION_FILEPATH_NO_AGE1 = (
-    DATA_ROOT / "Stratification/US_CAN_2019_transect_region_haul_age2+ auto_20191205.xlsx"
-)
-TRANSECT_REGION_FILE_RENAME: dict = {
-    "tranect": "transect_num",
-    "region id": "region_id",
-    "trawl #": "haul_num",
-}
-TRANSECT_REGION_SHEETNAME_ALL_AGES: str = "Sheet1"
-TRANSECT_REGION_SHEETNAME_NO_AGE1: str = "Sheet1"
-
-# Read in the transect-region-haul key files for each group
-transect_region_haul_key_all_ages = nasc.read_transect_region_haul_key(
-    filename=TRANSECT_REGION_FILEPATH_ALL_AGES,
-    sheetname=TRANSECT_REGION_SHEETNAME_ALL_AGES,
-    rename_dict=TRANSECT_REGION_FILE_RENAME,
-)
-
-transect_region_haul_key_no_age1: pd.DataFrame = nasc.read_transect_region_haul_key(
-    TRANSECT_REGION_FILEPATH_NO_AGE1, TRANSECT_REGION_SHEETNAME_NO_AGE1, TRANSECT_REGION_FILE_RENAME
-)
-
-# ==================================================================================================
-# Read in transect-region-haul keys
-# ---------------------------------
-REGION_NAME_EXPR_DICT: Dict[str, dict] = {
-    "REGION_CLASS": {
-        "Age-1 Hake": "^(?:h1a(?![a-z]|m))",
-        "Age-1 Hake Mix": "^(?:h1am(?![a-z]|1a))",
-        "Hake": "^(?:h(?![a-z]|1a)|hake(?![_]))",
-        "Hake Mix": "^(?:hm(?![a-z]|1a)|hake_mix(?![_]))",
-    },
-    "HAUL_NUM": {
-        "[0-9]+",
-    },
-    "COUNTRY": {
-        "CAN": "^[cC]",
-        "US": "^[uU]",
-    },
-}
-
-# Process the region name codes to define the region classes
-# e.g. H5C - Region 2 corresponds to "Hake, Haul #5, Canada"
-df_exports_with_regions = nasc.process_region_names(
-    df=df_exports,
-    region_name_expr_dict=REGION_NAME_EXPR_DICT,
-    can_haul_offset=200,
-)
-
-# ==================================================================================================
-# [OPTIONAL] Generate transect-region-haul key from compiled values
-# ---------------------------------
-
-# Generate transect-region-haul key from compiled values
-df_transect_region_haul_key_no_age1 = nasc.generate_transect_region_haul_key(
-    df=df_exports_with_regions, filter_list=["Hake", "Hake Mix"]
-)
-
-df_transect_region_haul_key_all_ages = nasc.generate_transect_region_haul_key(
-    df=df_exports_with_regions, filter_list=["Age-1 Hake", "Age-1", "Hake", "Hake Mix"]
-)
-
-# ==================================================================================================
-# Consolidate the Echvoiew NASC export files
-# ------------------------------------------
-df_nasc_no_age1 = nasc.consolidate_echvoiew_nasc(
-    df_merged=df_exports_with_regions,
-    interval_df=df_intervals,
-    region_class_names=["Hake", "Hake Mix"],
-    impute_region_ids=True,
-    transect_region_haul_key_df=transect_region_haul_key_no_age1,
-)
-
-df_nasc_all_ages = nasc.consolidate_echvoiew_nasc(
-    df_merged=df_exports_with_regions,
-    interval_df=df_intervals,
-    region_class_names=["Age-1 Hake", "Age-1", "Hake", "Hake Mix"],
-    impute_region_ids=True,
-    transect_region_haul_key_df=transect_region_haul_key_all_ages,
-)
-
-# ==================================================================================================
-# [OPTIONAL] Read in a pre-consolidated NASC data file
-# ----------------------------------------------------
-FEAT_TO_ECHOPOP_COLUMNS = {
-    "transect": "transect_num",
-    "region id": "region_id",
-    "vessel_log_start": "distance_s",
-    "vessel_log_end": "distance_e",
-    "spacing": "transect_spacing",
-    "layer mean depth": "layer_mean_depth",
-    "layer height": "layer_height",
-    "bottom depth": "bottom_depth",
-    "assigned haul": "haul_num",
-}
-
-#
-df_nasc_all_ages = nasc.read_nasc_file(
-    filename=DATA_ROOT / "Exports/US_CAN_NASC_2019_table_all_ages.xlsx",
-    sheetname="Sheet1",
-    column_name_map=FEAT_TO_ECHOPOP_COLUMNS,
-)
-
-# ==================================================================================================
-# Load in the biolodical data
-# ---------------------------
-BIODATA_SHEETS = {
-    "catch": "biodata_catch",
-    "length": "biodata_length",
-    "specimen": "biodata_specimen",
-}
-SUBSET_DICT = {
-    "ships": {160: {"survey": 201906}, 584: {"survey": 2019097, "haul_offset": 200}},
-    "species_code": [22500],
-}
-FEAT_TO_ECHOPOP_BIODATA_COLUMNS = {
-    "frequency": "length_count",
-    "haul": "haul_num",
-    "weight_in_haul": "weight",
-}
-BIODATA_SEX = {"sex": {1: "male", 2: "female", 3: "unsexed"}}
-
-#
-dict_df_bio = load_biological_data(
-    biodata_filepath=DATA_ROOT / "Biological/1995-2023_biodata_redo.xlsx",
-    biodata_sheet_map=BIODATA_SHEETS,
-    column_name_map=FEAT_TO_ECHOPOP_BIODATA_COLUMNS,
-    subset_dict=SUBSET_DICT,
-    biodata_label_map=BIODATA_SEX,
-)
-
-# ==================================================================================================
-# Load in strata files
-# --------------------
-STRATA_SHEETS = {
-    "inpfc": "INPFC",
-    "ks": "Base KS",
-}
-FEAT_TO_ECHOPOP_STRATA_COLUMNS = {
-    "fraction_hake": "nasc_proportion",
-    "haul": "haul_num",
-    "stratum": "stratum_num",
-}
-
-#
-df_dict_strata = load_strata(
-    strata_filepath=DATA_ROOT / "Stratification/US_CAN strata 2019_final.xlsx",
-    strata_sheet_map=STRATA_SHEETS,
-    column_name_map=FEAT_TO_ECHOPOP_STRATA_COLUMNS,
-)
-
-# ==================================================================================================
-# Load in geographical strata files
-# ---------------------------------
-GEOSTRATA_SHEETS = {
-    "inpfc": "INPFC",
-    "ks": "stratification1",
-}
-FEAT_TO_ECHOPOP_GEOSTRATA_COLUMNS = {
-    "latitude (upper limit)": "northlimit_latitude",
-    "stratum": "stratum_num",
-}
-
-#
-df_dict_geostrata = load_geostrata(
-    geostrata_filepath=DATA_ROOT / "Stratification/Stratification_geographic_Lat_2019_final.xlsx",
-    geostrata_sheet_map=GEOSTRATA_SHEETS,
-    column_name_map=FEAT_TO_ECHOPOP_GEOSTRATA_COLUMNS,
-)
-
-# ==================================================================================================
-# Stratify data based on haul numbers
-# -----------------------------------
-
-# Add INPFC
-# ---- NASC
-df_nasc_all_ages = join_strata_by_haul(
-    data=df_nasc_all_ages, strata_df=df_dict_strata["inpfc"], stratum_name="stratum_inpfc"
-)
-# ---- Biodata
-dict_df_bio = join_strata_by_haul(
-    dict_df_bio, df_dict_strata["inpfc"], stratum_name="stratum_inpfc"
-)
-
-# Add KS
-# ---- NASC
-df_nasc_all_ages = join_strata_by_haul(
-    df_nasc_all_ages, df_dict_strata["ks"], stratum_name="stratum_ks"
-)
-df_nasc_no_age1 = join_strata_by_haul(
-    df_nasc_no_age1, df_dict_strata["ks"], stratum_name="stratum_ks"
-)
-# ---- Biodata
-dict_df_bio = join_strata_by_haul(dict_df_bio, df_dict_strata["ks"], stratum_name="stratum_ks")
-
-# ==================================================================================================
-# Load kriging mesh file
-# ----------------------
-
-FEAT_TO_ECHOPOP_MESH_COLUMNS = {
-    "centroid_latitude": "latitude",
-    "centroid_longitude": "longitude",
-    "fraction_cell_in_polygon": "fraction",
-}
-
-#
-df_mesh = load_mesh_data(
-    mesh_filepath=DATA_ROOT
-    / "Kriging_files/Kriging_grid_files/krig_grid2_5nm_cut_centroids_2013.xlsx",
-    sheet_name="krigedgrid2_5nm_forChu",
-    column_name_map=FEAT_TO_ECHOPOP_MESH_COLUMNS,
-)
-
-# ==================================================================================================
-# [OPTIONAL] Stratify data based on latitude intervals
-# ----------------------------------------------------
-# INPFC (from geostrata)
-df_nasc_all_ages = join_geostrata_by_latitude(
-    df_nasc_all_ages, df_dict_geostrata["inpfc"], stratum_name="geostratum_inpfc"
-)
-df_nasc_no_age1 = join_geostrata_by_latitude(
-    df_nasc_no_age1, df_dict_geostrata["inpfc"], stratum_name="geostratum_inpfc"
-)
-# KS (from geostrata)
-df_nasc_all_ages = join_geostrata_by_latitude(
-    df_nasc_all_ages, df_dict_geostrata["ks"], stratum_name="geostratum_ks"
-)
-df_nasc_no_age1 = join_geostrata_by_latitude(
-    df_nasc_no_age1, df_dict_geostrata["ks"], stratum_name="geostratum_ks"
-)
-
-# MESH
-# ---- DataFrame merged with geographically distributed stratum number (KS or INPFC)
-# -------- INPFC (from geostrata)
-df_mesh = join_geostrata_by_latitude(
-    df_mesh, df_dict_geostrata["inpfc"], stratum_name="geostratum_inpfc"
-)
-# -------- KS (from geostrata)
-df_mesh = join_geostrata_by_latitude(df_mesh, df_dict_geostrata["ks"], stratum_name="geostratum_ks")
-
-# ==================================================================================================
-# Load kriging and variogram parameters
-# -------------------------------------
-
-FEAT_TO_ECHOPOP_GEOSTATS_PARAMS_COLUMNS = {
-    "hole": "hole_effect_range",
-    "lscl": "correlation_range",
-    "nugt": "nugget",
-    "powr": "decay_power",
-    "ratio": "aspect_ratio",
-    "res": "lag_resolution",
-    "srad": "search_radius",
-}
-
-#
-dict_kriging_params, dict_variogram_params = load_kriging_variogram_params(
-    geostatistic_params_filepath=(
-        DATA_ROOT / "Kriging_files/default_vario_krig_settings_2019_US_CAN.xlsx"
-    ),
-    sheet_name="Sheet1",
-    column_name_map=FEAT_TO_ECHOPOP_GEOSTATS_PARAMS_COLUMNS,
-)
-
-# ==================================================================================================
-# ==================================================================================================
-# DATA PROCESSING
-# ==================================================================================================
-# Generate binned distributions [age, length]
-# -------------------------------------------
-AGE_BINS = np.linspace(start=1.0, stop=22.0, num=22)
-LENGTH_BINS = np.linspace(start=2.0, stop=80.0, num=40)
-
-#
-# ---- Length
-utils.binify(
-    data=dict_df_bio,
-    bins=LENGTH_BINS,
-    bin_column="length",
-)
-
-# Age
-utils.binify(
-    data=dict_df_bio,
-    bins=AGE_BINS,
-    bin_column="age",
-)
-
-# ==================================================================================================
-# Fit length-weight regression to the binned data
-# -----------------------------------------------
-
-# Dictionary for length-weight regression coefficients
-dict_length_weight_coefs = {}
-
-# For all fish
-dict_length_weight_coefs["all"] = (
-    dict_df_bio["specimen"]
-    .assign(sex="all")
-    .groupby(["sex"])
-    .apply(fit_length_weight_regression, include_groups=False)
-)
-
-# Sex-specific
-dict_length_weight_coefs["sex"] = (
-    dict_df_bio["specimen"]
-    .groupby(["sex"])
-    .apply(fit_length_weight_regression, include_groups=False)
-)
-
-# ==================================================================================================
-# Compute the mean weights per length bin
-# ---------------------------------------
-
-# Sex-specific (grouped coefficients)
-da_binned_weights_sex = biology.length_binned_weights(
-    data=dict_df_bio["specimen"],
-    length_bins=LENGTH_BINS,
-    regression_coefficients=dict_length_weight_coefs["sex"],
-    impute_bins=True,
-    minimum_count_threshold=5,
-)
-
-# All fish (single coefficient set)
-da_binned_weights_all = biology.length_binned_weights(
-    data=dict_df_bio["specimen"].assign(sex="all"),
-    length_bins=LENGTH_BINS,
-    regression_coefficients=dict_length_weight_coefs["all"],
-    impute_bins=True,
-    minimum_count_threshold=5,
-)
-
-# Combine the pivot tables by adding the "all" column to the sex-specific table
-da_binned_weight_table = xr.concat(
-    [da_binned_weights_sex, da_binned_weights_all],
-    dim = "sex"
-)
-
-# ==================================================================================================
-# Compute the count distributions per age- and length-bins
-# --------------------------------------------------------
-
-# Dataset for number counts
-ds_counts = xr.Dataset()
-
-# Aged
-ds_counts["aged"] = proportions.compute_binned_counts(
-    data=dict_df_bio["specimen"].dropna(subset=["age", "length", "weight"]),
-    groupby_cols=["stratum_ks", "length_bin", "age_bin", "sex"],
-    count_col="length",
-    agg_func="size",
-)
-
-# Unaged
-ds_counts["unaged"] = proportions.compute_binned_counts(
-    data=dict_df_bio["length"].copy().dropna(subset=["length"]),
-    groupby_cols=["stratum_ks", "length_bin", "sex"],
-    count_col="length_count",
-    agg_func="sum",
-)
-
-# ==================================================================================================
-# Compute the number proportions
-# ------------------------------
-dict_ds_number_proportion = proportions.number_proportions(
-    data=ds_counts,
-    group_columns=["stratum_ks"],
-    exclude_filters={"aged": {"sex": "unsexed"}},
-)
-
-# ==================================================================================================
-# Distribute (bin) weight over age, length, and sex
-# -------------------------------------------------
-# Pre-allocate a Dataset
-ds_da_weight_dist = xr.Dataset()
-
-# Aged
-ds_da_weight_dist["aged"] = proportions.binned_weights(
-    length_data=dict_df_bio["specimen"],
-    include_filter={"sex": ["female", "male"]},
-    interpolate_regression=False,
-    group_columns=["stratum_ks", "sex", "age_bin"],
-)
-
-# Unaged
-ds_da_weight_dist["unaged"] = proportions.binned_weights(
-    length_data=dict_df_bio["length"],
-    include_filter={"sex": ["female", "male"]},
-    interpolate_regression=True,
-    length_weight_data=da_binned_weight_table,
-    group_columns=["stratum_ks", "sex"],
-)
-
-# ==================================================================================================
-# Calculate the average weights pre stratum when combining different datasets
-# ---------------------------------------------------------------------------
-da_averaged_weight = proportions.stratum_averaged_weight(
-    number_proportions=dict_ds_number_proportion,
-    length_weight_data=da_binned_weight_table,
-    group_columns=["stratum_ks"]
-)
-
-# ==================================================================================================
-# Compute the length-binned weight proportions for aged fish
-# ----------------------------------------------------------
-
-# Initialize Dictionary container for DataArrays
-dict_da_weight_proportion = {}
-
-# Aged
-dict_da_weight_proportion["aged"] = proportions.weight_proportions(
-    weight_data=ds_da_weight_dist["aged"], 
-    catch_data=dict_df_bio["catch"], 
-    group_columns = ["stratum_ks"]
-)
-
-# ==================================================================================================
-# Compute the standardized weight proportions for unaged fish
-# ----------------------------------------------------------
-dict_da_weight_proportion["unaged"] = proportions.fitted_weight_proportions(
-    weight_data=ds_da_weight_dist["unaged"],
-    reference_weight_proportions=dict_da_weight_proportion["aged"],
-    catch_data=dict_df_bio["catch"],
-    number_proportions=dict_ds_number_proportion["unaged"],
-    binned_weights=da_binned_weights_all,
-    group_columns=["stratum_ks"]
-)
-
-# ==================================================================================================
-# ==================================================================================================
-# NASC TO POPULATION ESTIMATE CONVERSION
-# ==================================================================================================
-# Initialize the Inversion class
-# ------------------------------
-MODEL_PARAMETERS = {
-    "ts_length_regression": {"slope": 20.0, "intercept": -68.0},
-    "stratify_by": ["stratum_ks"],
-    "expected_strata": df_dict_strata["ks"].stratum_num.unique(),
-    "impute_missing_strata": True,
-    "haul_replicates": True,
-}
-
-# Initiate object to perform inversion
-invert_hake = inversion.InversionLengthTS(MODEL_PARAMETERS)
-
-# ==================================================================================================
-# Invert number density
-# ---------------------
-
-# If the above haul-averaged `sigma_bs` values were calculated, then the inversion can can
-# completed without calling in additional biodata
-df_nasc_all_ages = invert_hake.invert(
-    df_nasc=df_nasc_all_ages, df_length=[dict_df_bio["length"], dict_df_bio["specimen"]]
-)
-df_nasc_no_age1 = invert_hake.invert(
-    df_nasc=df_nasc_no_age1, df_length=[dict_df_bio["length"], dict_df_bio["specimen"]]
-)
-# ---- The average `sigma_bs` for each stratum can be inspected at:
-invert_hake.sigma_bs_strata
-
-# ==================================================================================================
-# Set transect interval distances
-# -------------------------------
-
-# Calculate along-transect interval distances which is required for getting the area-per-interval
-# and therefore going from number density to abundance
-transect.compute_interval_distance(df_nasc=df_nasc_all_ages, interval_threshold=0.05)
-transect.compute_interval_distance(df_nasc=df_nasc_no_age1, interval_threshold=0.05)
-
-# ==================================================================================================
-# Calculate transect interval areas
-# ---------------------------------
-df_nasc_all_ages["area_interval"] = (
-    df_nasc_all_ages["transect_spacing"] * df_nasc_all_ages["distance_interval"]
-)
-df_nasc_no_age1["area_interval"] = (
-    df_nasc_no_age1["transect_spacing"] * df_nasc_no_age1["distance_interval"]
-)
-
-# ==================================================================================================
-# Calculate (and apportion) number densities to abundance, and number densities/abundance for each
-# sex
-# --------------------------------------------------------------------------------------------------
-biology.compute_abundance(
-    dataset=df_nasc_no_age1,
-    exclude_filter={"sex": "unsexed"},
-    number_proportions=dict_ds_number_proportion,
-)
-
-# ==================================================================================================
-# Calculate (and apportion) biomass densities and biomass (from number density and abundance,
-# respectively) for the overall transect dataset as well as for each sex
-# --------------------------------------------------------------------------------------------------
-biology.compute_biomass(
-    dataset=df_nasc_no_age1,
-    stratum_weights=da_averaged_weight,
-)
-
-# ==================================================================================================
-# Get proportions for each stratum specific to age-1
-# --------------------------------------------------
-
-# Age-1 NASC proportions
-da_age1_nasc_proportions = proportions.get_nasc_proportions_slice(
-    number_proportions = dict_ds_number_proportion["aged"],
-    group_columns = ["stratum_ks"],
-    ts_length_regression_parameters={"slope": 20.0, "intercept": -68.0},
-    include_filter={"age_bin": [1]},
-)
-
-
-# Age-1 number proportions
-da_age1_number_proportions = proportions.get_number_proportions_slice(
-    number_proportions = dict_ds_number_proportion["aged"],
-    group_columns = ["stratum_ks"],
-    include_filter={"age_bin": [1]}
-)
-
-# Age-1 weight proportions
-da_age1_weight_proportions = proportions.get_weight_proportions_slice(
-    weight_proportions=dict_da_weight_proportion["aged"],
-    group_columns=["stratum_ks"],
-    include_filter={"age_bin": [1]},
-    number_proportions=dict_ds_number_proportion,
-    length_threshold_min=10.0,
-    weight_proportion_threshold=1e-10,
-)
-
-# ==================================================================================================
-# Apply the calculated proportions to the abundance, biomass, and NASC estimates
-# ------------------------------------------------------------------------------
-df_nasc_no_age1_prt = apportionment.remove_group_from_estimates(
-    transect_data=df_nasc_no_age1,
-    group_proportions=xr.Dataset({
-        "nasc": da_age1_nasc_proportions,
-        "abundance": da_age1_number_proportions,
-        "biomass": da_age1_weight_proportions,
-    }),
-)
-
-# ==================================================================================================
-# Distribute transect abundances across age-length-sex bins
-# ---------------------------------------------------------
-dict_ds_transect_abundance_table = apportionment.distribute_population_estimates(
-    data = df_nasc_no_age1_prt,
-    proportions = dict_ds_number_proportion,
-    variable = "abundance",
-    group_columns= ["sex", "age_bin", "length_bin", "stratum_ks"]
-)
-
-# ==================================================================================================
-# Distribute transect biomasses across age-length-sex bins
-# ---------------------------------------------------------
-dict_ds_transect_biomass_table = apportionment.distribute_population_estimates(
-    data = df_nasc_no_age1_prt,
-    proportions=dict_da_weight_proportion,
-    variable = "biomass",
-    group_columns = ["sex", "age_bin", "length_bin", "stratum_ks"]
-)
-
-# ==================================================================================================
-# Distribute transect biomasses across age-length-sex bins for aged fish only
-# ---------------------------------------------------------------------------
-ds_transect_aged_biomass_table = apportionment.distribute_population_estimates(
-    data=df_nasc_no_age1_prt,
-    proportions=dict_da_weight_proportion["aged"],
-    variable="biomass",
-    group_columns = ["sex", "age_bin", "length_bin", "stratum_ks"]
-)
-
-# ==================================================================================================
-# ==================================================================================================
-# GEOSTATISTICS
-# ==================================================================================================
-# Load reference line (isobath)
-# -----------------------------
-
-df_isobath = load_isobath_data(
-    isobath_filepath=DATA_ROOT
-    / "Kriging_files/Kriging_grid_files/transformation_isobath_coordinates.xlsx",
-    sheet_name="Smoothing_EasyKrig",
-)
-
-# ==================================================================================================
-# Transform the geospatial coordinates for the transect data
-# ----------------------------------------------------------
-df_nasc_no_age1_prt, delta_longitude, delta_latitude = cropping.transform_coordinates(
-    data=df_nasc_no_age1,
-    reference=df_isobath,
-    x_offset=-124.78338,
-    y_offset=45.0,
-)
-
-# ==================================================================================================
-# Transform the geospatial coordinates for the mesh data
-# ------------------------------------------------------
-df_mesh, _, _ = cropping.transform_coordinates(
-    data=df_mesh,
-    reference=df_isobath,
-    x_offset=-124.78338,
-    y_offset=45.0,
-    delta_x=delta_longitude,
-    delta_y=delta_latitude,
-)
-
-# ==================================================================================================
-# Initialize Variogram class
-# --------------------------
-
-# Initialize
-vgm = variogram.Variogram(
-    lag_resolution=0.002,
-    n_lags=30,
-    coordinate_names=("x", "y"),
-)
-
-# ==================================================================================================
-# Calculate the empirical variogram
-# ---------------------------------
-vgm.calculate_empirical_variogram(
-    data=df_nasc_no_age1_prt,
-    variable="biomass_density",
-    azimuth_filter=True,
-    azimuth_angle_threshold=180.0,
-)
-
-# ==================================================================================================
-# Fit theoretical/modeled variogram to the transect data
-# ------------------------------------------------------
-
-# Set up `lmfit` parameters
-# lmfit.Parameters tuples: (NAME VALUE VARY MIN  MAX  EXPR  BRUTE_STEP)
-variogram_parameters_lmfit = Parameters()
-variogram_parameters_lmfit.add_many(
-    ("nugget", dict_variogram_params["nugget"], True, 0.0),
-    ("sill", dict_variogram_params["sill"], False, 0.0),
-    ("correlation_range", dict_variogram_params["correlation_range"], False, 0.0),
-    ("hole_effect_range", dict_variogram_params["hole_effect_range"], False, 0.0, 0.1),
-    ("decay_power", dict_variogram_params["decay_power"], False, 1.25, 1.75),
-)
-
-# Set up optimization parameters used for fitting the variogram
-dict_optimization = {
-    "max_nfev": None,
-    "ftol": 1e-08,
-    "gtol": 1e-8,
-    "xtol": 1e-8,
-    "diff_step": None,
-    "tr_solver": "exact",
-    "x_scale": 1.0,
-    "jac": "2-point",
-}
-
-# Get the best-fit variogram parameters
-best_fit_parameters = vgm.fit_variogram_model(
-    model=["exponential", "bessel"],
-    model_parameters=variogram_parameters_lmfit,
-    optimizer_kwargs=dict_optimization,
-)
-print(best_fit_parameters)
-
-# ==================================================================================================
-# Initialize the kriging class object
-# -----------------------------------
-
-# Define the requisite kriging parameters
-KRIGING_PARAMETERS = {
-    "search_radius": best_fit_parameters["correlation_range"] * 3,
-    "aspect_ratio": 0.001,
-    "k_min": 3,
-    "k_max": 10,
-}
-
-# Define the requisite variogram parameters and arguments
-VARIOGRAM_PARAMETERS = {"model": ["exponential", "bessel"], **best_fit_parameters}
-
-krg = kriging.Kriging(
-    mesh=df_mesh,
-    kriging_params=KRIGING_PARAMETERS,
-    variogram_params=VARIOGRAM_PARAMETERS,
-    coordinate_names=("x", "y"),
-)
-
-# ==================================================================================================
-# Mesh cropping using the hull convex
-# -----------------------------------
-
-krg.crop_mesh(
-    crop_function=feat.transect_ends_crop,
-    transects=df_nasc_no_age1_prt,
-    latitude_resolution=1.25 / 60.0,
-    transect_mesh_region_function=feat.parameters.transect_mesh_region_2019,
-)
-
-# ==================================================================================================
-# [FEAT] Get the western extent of the transect bounds
-# ----------------------------------------------------
-transect_western_extents = feat.get_survey_western_extents(
-    transects=df_nasc_no_age1_prt, coordinate_names=("x", "y"), latitude_threshold=51.0
-)
-
-# ==================================================================================================
-# [FEAT] Register the custom search strategy
-# ------------------------------------------
-krg.register_search_strategy("FEAT_strategy", feat.western_boundary_search_strategy)
-# ---- Verify that method was registered
-krg.list_search_strategies()
-
-# ==================================================================================================
-# Krige the biomass density to get kriged biomass
-# -----------------------------------------------
-
-# Define the required keyword arguments for 'FEAT_strategy'
-# ---- Only `transect_western_extents` is needed for this particular function since the
-# `kriging_mesh` and `coordinate_names` arguments are inherited from the class instance
-FEAT_STRATEGY_KWARGS = {
-    "western_extent": transect_western_extents,
-}
-
-# Krige
-df_kriged_results = krg.krige(
-    transects=df_nasc_no_age1_prt,
-    variable="biomass_density",
-    extrapolate=False,
-    default_mesh_cell_area=6.25,
-    adaptive_search_strategy="FEAT_strategy",
-    custom_search_kwargs=FEAT_STRATEGY_KWARGS,
-)
-# ##################################################################################################
-# Back-calculate sex-specific biomass and abundance, and total NASC from the kriged biomass
-# density estimates
-# -----------------
-# Compute biomass
-df_kriged_results["biomass"] = (
-    df_kriged_results["biomass_density"] * df_kriged_results["area"]
-)
-
-# Convert biomass to abundance to NASC
-apportionment.mesh_biomass_to_nasc(
-    mesh_data=df_kriged_results,
-    biodata=dict_da_weight_proportion,
-    group_columns=["sex", "stratum_ks"],
-    mesh_biodata_link={"geostratum_ks": "stratum_ks"},
-    stratum_weights=da_averaged_weight.sel(sex="all"),
-    stratum_sigma_bs=invert_hake.sigma_bs_strata,
-)
-
-# ##################################################################################################
-# Distribute kriged abundance estimates over length and age/length
-# ----------------------------------------------------------------
-dict_ds_kriged_abundance_table = apportionment.distribute_population_estimates(
-    data = df_kriged_results,
-    proportions = dict_ds_number_proportion,
-    variable = "abundance",
-    group_columns = ["sex", "age_bin", "length_bin", "stratum_ks"],
-    data_proportions_link={"geostratum_ks": "stratum_ks"}
-)
-
-# ##################################################################################################
-# Distribute kriged biomass estimates over length and age/length
-# --------------------------------------------------------------
-dict_ds_kriged_biomass_table = apportionment.distribute_population_estimates(
-    data = df_kriged_results,
-    proportions = dict_da_weight_proportion,
-    variable = "biomass",
-    group_columns = ["sex", "age_bin", "length_bin", "stratum_ks"],
-    data_proportions_link={"geostratum_ks": "stratum_ks"}
-)
-
-# ##################################################################################################
-# Standardize the unaged abundance estimates to be distributed over age
-# ---------------------------------------------------------------------
-dict_ds_kriged_abundance_table["standardized_unaged"] = apportionment.distribute_unaged_from_aged(
-    population_table = dict_ds_kriged_abundance_table["unaged"],
-    reference_table = dict_ds_kriged_abundance_table["aged"],
-    group_columns = ["sex"],
-    impute = False
-)
-
-population_table = dict_ds_kriged_biomass_table["unaged"]
-reference_table = dict_ds_kriged_biomass_table["aged"]
-# group_columns = ["stratum_ks"]
-group_columns = ["sex"]
-impute = True
-impute=True
-impute_variable=["age_bin"]
-    
-# Get original table coordinates
-table_coords = list(population_table.coords.keys())
-table_noncoords = list(set(table_coords) - set(group_columns + ["length_bin"]))
-
-# Get reference coordinates
-reference_coords = list(reference_table.coords.keys())
-
-# Standardize the apportioned table values
-standardized_table = (
-    population_table.sum(dim=group_columns)
-    * reference_table.sum(dim=group_columns)
-    / reference_table.sum(dim=list(set(reference_coords).difference(table_coords)) + group_columns)
-).fillna(0.0)
-
-#############################################
-initial_table = population_table.sum(dim=group_columns)
-reference_table = reference_table.copy()
-standardized_table = standardized_table.copy()
-group_columns = group_columns
-subgroup_coords = table_noncoords
-impute_variable = impute_variable
-
-####
-# Extract dimensions
-group_dim = group_columns[0]
-impute_dim = impute_variable[0]
-subgroup_dim = subgroup_coords[0]
-
-# Get group and impute coordinate values
-group_vals = reference_table.coords[subgroup_dim].values
-impute_vals = reference_table.coords[impute_dim].values
-interval_dim = [d for d in reference_table.dims if d not in group_columns + impute_variable][0]
-interval_vals = reference_table.coords[interval_dim].values
-
-# Sum reference_table across all dims except group_columns + impute_variable
-reference_summed = reference_table.sum(dim=[c for c in reference_table.dims if c not in [subgroup_dim, interval_dim]])
-
-# Mask for zeros/non-zeros along impute_variable
-ref_zero_mask = reference_summed == 0.0
-ref_nonzero_mask = reference_summed != 0.0
-
-# Create translation for row numbers
-interval_to_numeric = {interval: i for i, interval in enumerate(reference_summed.coords[interval_dim].values)}
-
-# Prepare output
-imputed = standardized_table.copy()
-
- # Get the nearest-neighbor rows and recompute the indices
-imputed_rows = {
-    col: arr[
-        np.argmin(
-            np.abs(
-                ref_zero_rows[col][table_nonzeros_mask[col]][:, np.newaxis]
-                - ref_nonzero_rows[col]
-            ),
-            axis=1,
-        )
-    ]
-    for col, arr in ref_nonzero_rows.items()
-}
-
-for g in group_vals:
-    ref_col = reference_summed.sel({subgroup_dim: g})
-    ref_zero_mask_col = (ref_col == 0.0).values
-    ref_nonzero_mask_col = (ref_col != 0.0).values
-
-    # Indices for zero/nonzero
-    ref_zero_idx = np.where(ref_zero_mask_col)[0]
-    ref_nonzero_idx = np.where(ref_nonzero_mask_col)[0]
-    if len(ref_zero_idx) == 0 or len(ref_nonzero_idx) == 0:
-        continue
-
-    # Map intervals to numeric
-    interval_to_numeric = {v: i for i, v in enumerate(interval_vals)}
-
-    # Mask for initial_table nonzero at zero indices
-    initial_col = initial_table.sel({subgroup_dim: g})
-    table_nonzero_mask = (initial_col.values[ref_zero_idx] != 0.0)
-    nonzero_reference_to_table_idx = ref_zero_idx[table_nonzero_mask]
-
-    # Find nearest nonzero for each zero needing imputation
-    nearest = ref_nonzero_idx[np.argmin(np.abs(nonzero_reference_to_table_idx[:, None] - ref_nonzero_idx), axis=1)]
-
-    # Impute for each (interval, group)
-    for i, (row, nrow) in enumerate(zip(nonzero_reference_to_table_idx, nearest)):
-        idx = {interval_dim: interval_vals[row], subgroup_dim: g}
-        nidx = {interval_dim: interval_vals[nrow], subgroup_dim: g}
-        # For all impute_dim values at this (interval, group)
-        for age in impute_vals:
-            idx_full = {**idx, impute_dim: age}
-            nidx_full = {**nidx, impute_dim: age}
-            # try:
-            imputed_val = (
-                initial_table.sel({interval_dim: interval_vals[row], subgroup_dim: g}).sel({impute_dim: age}).item()
-                * reference_table.sel(nidx_full).item()
-                / reference_summed.sel(nidx).item()
-            )
-            imputed.loc[idx_full] = imputed_val
-            except Exception:
-                continue
-
-# Broadcast all arrays to the same shape 
-ref_zero_mask_b, initial_table_b, standardized_table_b = xr.broadcast(
-    ref_zero_mask, initial_table, standardized_table
-)
-ref_nonzero_mask_b, reference_group_stk_b, reference_stk_b = xr.broadcast(
-    ref_nonzero_mask, reference_summed, reference_table
-)
-
-# Convert DataArrays to numpy arrays
-ref_zero_mask_np = ref_zero_mask_b.values
-initial_vals = initial_table_b.values
-standardized_vals = standardized_table_b.values
-ref_nonzero_mask_np = ref_nonzero_mask_b.values
-reference_group_vals = reference_group_stk_b.values
-reference_stk_vals = reference_stk_b.values
-
-# For each impute_variable coordinate, find the nearest nonzero along that axis
-ivar = impute_variable[0]
-ivals = reference_summed.coords[ivar].values
-zero_idx = np.where(ref_zero_mask_b)
-nonzero_idx = np.where(ref_nonzero_mask_b)
-
-# Build an array of nearest nonzero indices for each zero index
-nearest_nonzero_idx = np.full(ref_zero_mask_b.shape, -1, dtype=int)
-for i in range(ref_zero_mask_b.shape[0]):
-    nonzero_locs = np.where(ref_nonzero_mask_b[i])[0]
-    if len(nonzero_locs) == 0:
-        continue
-    zero_locs = np.where(ref_zero_mask_b[i])[0]
-    if len(zero_locs) == 0:
-        continue
-    # For each zero, find nearest nonzero
-    for j, zl in enumerate(zero_locs):
-        nearest = nonzero_locs[np.argmin(np.abs(zl - nonzero_locs))]
-        nearest_nonzero_idx[i, zl] = nearest
-
-# Get all group_columns coordinate combinations
-mask_dims = initial_table.coords[list(set(initial_table.coords) - {"length_bin"})[0]].values
-group_coords = {dim: reference_summed.coords[dim].values for dim in group_columns}
-impute_coords = {dim: reference_table.coords[dim].values for dim in impute_variable}
-
-# Get numpy arrays for all relevant DataArrays
-initial_vals = initial_table_b.values
-reference_group_vals = reference_group_stk_b.values
-reference_stk_vals = reference_stk_b.values
-
-# Only impute where zero in reference and nonzero in initial
-mask = (ref_zero_mask_b.values & (initial_vals != 0.0) & (nearest_nonzero_idx != -1))
-idx = np.where(mask)
-
-# Vectorized imputation
-imputed_table = standardized_table_b.copy()
-
-# Only impute where zero in reference and nonzero in initial
-mask = (ref_zero_mask_b & (initial_table_b != 0.0) & (nearest_nonzero_idx != -1))
-idx = np.where(mask)
-
-# Build indexers for advanced indexing
-idx_nonzero = (idx[0], nearest_nonzero_idx[idx])
-
-# Compute imputed values in numpy
-imputed_values = (
-    initial_vals[idx]
-    * reference_group_vals[idx_nonzero]
-    / reference_stk_vals[idx_nonzero]
-)
-
-
-# Build indexers for advanced indexing
-idx_nonzero = (idx[0], nearest_nonzero_idx[idx])
-imputed_values = (
-    initial_table_b[idx]
-    * reference_group_stk_b[idx_nonzero]
-    / reference_stk_b[idx_nonzero]
-)
-imputed_table.values[idx] = imputed_values
-
-# Check coordinates
-missing_coords = [c for c in initial_table.coords if c not in reference_summed.coords]
-if not all(col in reference_stk.columns for col in initial_table_df.columns):
-    # ---- Get difference
-    missing_columns = set(initial_table.coords).difference(reference_stk.columns)
-    # ---- Print warning
-    warnings.warn(
-        f"The following columns are missing from the reference table and will therefore be "
-        f"skipped during imputation: {', '.join(missing_columns)}. ",
-        stacklevel=2,
-    )
-
-# If imputation is requested, perform it
-if not impute:
-    result = standardized_table.unstack().to_xarray()
-    if population_table.name:
-        result.name = population_table.name
-    return result
-else:
-    # ---- Impute
-    standardized_table_imputed = impute_kriged_table(
-        reference_table_df=grouped_reference,
-        initial_table_df=population_table_reshape,
-        standardized_table_df=standardized_table,
-        table_reference_indices=reference_stack_indices,
-        group_by=group_columns,
-        impute_variable=impute_variable,
-    )
-    result = standardized_table_imputed.unstack().to_xarray()
-    if population_table.name:
-        result.name = population_table.name
-    return result
-
-# ##################################################################################################
-# Standardize the unaged abundance estimates to be distributed over age
-# ---------------------------------------------------------------------
-dict_ds_kriged_biomass_table["standardized_unaged"] = apportionment.distribute_unaged_from_aged(
-    population_table = dict_ds_kriged_biomass_table["unaged"],
-    reference_table = dict_ds_kriged_biomass_table["aged"],
-    group_columns = ["sex"],
-    impute=True,
-    impute_variable=["age_bin"],
-)
-
-# ##################################################################################################
-# Consolidate the kriged abundance estimates into a single DataFrame table
-# ------------------------------------------------------------------------
-da_kriged_abundance_table = apportionment.sum_population_tables(
-    population_table={
-        "aged": dict_ds_kriged_abundance_table["aged"],
-        "unaged": dict_ds_kriged_abundance_table["standardized_unaged"]
-    },
-)
-
-# ##################################################################################################
-# Consolidate the kriged biomass estimates into a single DataFrame table
-# -----------------------------------------------------------------------
-da_kriged_biomass_table = apportionment.sum_population_tables(
-    population_table={
-        "aged": dict_ds_kriged_biomass_table["aged"],
-        "unaged": dict_ds_kriged_biomass_table["standardized_unaged"]
-    },
-)
-
-# ##################################################################################################
-# Redistribute the kriged abundance estimates
-# -------------------------------------------
-
-# Re-allocate the age-1 abundance estimates
-ds_kriged_abundance_table_noage1 = apportionment.reallocate_excluded_estimates(
-    population_table=da_kriged_abundance_table,
-    exclusion_filter={"age_bin": [1]},
-    group_columns=["sex"],
-)
-
-####################################################################################################
-# Redistribute the kriged biomass estimates
-# -----------------------------------------
-
-# Re-allocate the age-1 abundance estimates
-ds_kriged_biomass_table_noage1 = apportionment.reallocate_excluded_estimates(
-    population_table=da_kriged_biomass_table,
-    exclusion_filter={"age_bin": [1]},
-    group_columns=["sex"],
-)
-
-
-# ##################################################################################################
-# Instantiate stratified analysis
-# -------------------------------
-# Model parameters for stratified analysis initialization
-JOLLYHAMPTON_PARAMETERS = {
-    "transects_per_latitude": 5,
-    "strata_transect_proportion": 0.75,
-    "num_replicates": 1000,
-}
-
-jh = stratified.JollyHampton(JOLLYHAMPTON_PARAMETERS)
-
-# ##################################################################################################
-# Compute coefficient of variation (CV) and other estimators for transect biomass
-# -------------------------------------------------------------------------------
-
-# Run bootstrapping procedure
-jh.stratified_bootstrap(
-    data_df=df_nasc_no_age1_prt, stratify_by=["geostratum_inpfc"], variable="biomass"
-)
-
-# Store replicates
-transect_replicates = jh.bootstrap_replicates
-
-# Compute summary statistics for each stratum and overall survey
-transect_results = jh.summarize(ci_percentile=0.95, ci_method="t-jackknife")
-print(transect_results)
-
-# ##################################################################################################
-# Create virtual transects for the gridded kriged biomass results
-# ---------------------------------------------------------------
-
-kriged_transects = jh.create_virtual_transects(
-    data_df=df_kriged_results,
-    geostrata_df=df_dict_geostrata["inpfc"],
-    stratify_by=["geostratum_inpfc"],
-    variable="biomass",
-)
-
-# ##################################################################################################
-# Compute CV and other estimators for kriged biomass
-# --------------------------------------------------
-
-# Run bootstrapping procedure
-jh.stratified_bootstrap(
-    data_df=kriged_transects, stratify_by=["geostratum_inpfc"], variable="biomass"
-)
-
-# Store replicates
-kriged_replicates = jh.bootstrap_replicates
-
-# Compute summary statistics for each stratum and overall survey
-kriged_results = jh.summarize(ci_percentile=0.95, ci_method="t-jackknife")
-print(kriged_results)
-
-# ##################################################################################################
-# Compare transect and kriged outputs
-# -----------------------------------
-
-print(
-    kriged_results.xs("mean", axis=1, level="metric")
-    - transect_results.xs("mean", axis=1, level="metric")
-)
\ No newline at end of file
diff --git a/echopop/workflows/nwfsc_feat/workflows/feat_krill.py b/echopop/workflows/nwfsc_feat/workflows/feat_krill.py
index 4970678c..6e1492ee 100644
--- a/echopop/workflows/nwfsc_feat/workflows/feat_krill.py
+++ b/echopop/workflows/nwfsc_feat/workflows/feat_krill.py
@@ -1,6 +1,5 @@
 from pathlib import Path
 
-from echopop import utils
 from echopop.ingest import sv
 from echopop.inversion import InversionMatrix, InvParameters, estimate_population
 
@@ -40,8 +39,16 @@
 # DATA SUBSET
 # ==================================================================================================
 
-sv_data_sub = utils.apply_filters(sv_data, include_filter={"transect_num": [1, 2, 3]})
-
+# sv_data_sub = sv_data.sel(transect_num=[1,2,3])
+if PROCESSING_METHOD == "transect":
+    sv_data_sub = sv_data.where(sv_data["transect_num"].isin([1, 2]), drop=True)
+elif PROCESSING_METHOD == "interval": 
+    sv_data_sub = sv_data.where((sv_data["transect_num"].isin([1])) & 
+                                sv_data["interval"].isin([13, 29, 30, 31]), 
+                                drop=True)
+else:
+    out = sv_data.sel(transect_num=2, interval=17)
+    sv_data_sub = out.where(out["layer"] < 15, drop=True)
 # ==================================================================================================
 # ==================================================================================================
 # DEFINE PARAMETERS
@@ -60,6 +67,12 @@
 }
 
 # Model-specific settings, including distributions
+ENVIRONMENT = {
+    "sound_speed_sw": 1500.0,
+    "density_sw": 1026.9,
+}
+
+
 MODEL_SETTINGS = {
     "type": "pcdwba",
     "taper_order": 10.0,
@@ -68,15 +81,10 @@
     "n_wavelength": 10,
     "orientation_distribution": {"family": "gaussian", "bins": 60},
     "length_distribution": {"family": "gaussian", "bins": 100},
-}
-
-ENVIRONMENT = {
-    "sound_speed_sw": 1500.0,
-    "density_sw": 1026.9,
+    "environment": ENVIRONMENT,
 }
 
 SIMULATION_SETTINGS = {
-    "environment": ENVIRONMENT,
     "monte_carlo": True,
     "mc_realizations": 20,
     "minimum_frequency_count": 2,
@@ -102,7 +110,6 @@
     },
 }
 
-
 def run_krill_inversion_workflow():
     # ==================================================================================================
     # ==================================================================================================
@@ -117,7 +124,6 @@ def run_krill_inversion_workflow():
     # ==================================================================================================
     # INITIALIZE
     # ==================================================================================================
-
     inversion_krill = InversionMatrix(sv_data_sub, SIMULATION_SETTINGS)
 
     # ==================================================================================================
@@ -131,7 +137,7 @@ def run_krill_inversion_workflow():
     # ==================================================================================================
     # BUILD AND FORMAT SCATTERING MODEL OPTIMIZERS
     # ==================================================================================================
-    df_inversion_results = inversion_krill.invert(optimization_kwargs=OPTIMIZATION_KWARGS)
+    inversion_results = inversion_krill.invert(optimization_kwargs=OPTIMIZATION_KWARGS)
 
     # ==================================================================================================
     # ==================================================================================================
@@ -139,9 +145,11 @@ def run_krill_inversion_workflow():
     # ==================================================================================================
 
     return estimate_population(
-        df_inversion_results,
+        inversion_results,
         nasc_coordinates,
-        aggregate_method="transect",
+        aggregate_method=PROCESSING_METHOD,
         density_sw=ENVIRONMENT["density_sw"],
         reference_frequency=120e3,
     )
+run_krill_inversion_workflow()
+

From 9aa779319625907939120ccce578d0c499e454b6 Mon Sep 17 00:00:00 2001
From: brandynlucca <brandyn.lucca@gmail.com>
Date: Tue, 13 Jan 2026 14:49:51 -0800
Subject: [PATCH 2/4] Minor formatting change

---
 echopop/workflows/nwfsc_feat/workflows/feat_krill.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/echopop/workflows/nwfsc_feat/workflows/feat_krill.py b/echopop/workflows/nwfsc_feat/workflows/feat_krill.py
index 6e1492ee..734a7f03 100644
--- a/echopop/workflows/nwfsc_feat/workflows/feat_krill.py
+++ b/echopop/workflows/nwfsc_feat/workflows/feat_krill.py
@@ -151,5 +151,6 @@ def run_krill_inversion_workflow():
         density_sw=ENVIRONMENT["density_sw"],
         reference_frequency=120e3,
     )
+    
 run_krill_inversion_workflow()
 

From c7d2b90e2c8fb7a0b04c4fcf35384723e59de1b2 Mon Sep 17 00:00:00 2001
From: brandynlucca <brandyn.lucca@gmail.com>
Date: Fri, 16 Jan 2026 13:22:54 -0800
Subject: [PATCH 3/4] Minor bugfix

---
 echopop/survey/proportions.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/echopop/survey/proportions.py b/echopop/survey/proportions.py
index 118aa79a..f37e1003 100644
--- a/echopop/survey/proportions.py
+++ b/echopop/survey/proportions.py
@@ -401,7 +401,9 @@ def binned_weights(
     )
 
     # Return the grouped sums
-    return result.groupby(group_columns, observed=False)["weight"].sum().to_xarray().fillna(0.0)
+    return result.groupby(
+        group_columns, observed=False
+    )["weight"].sum().to_xarray().fillna(0.0).astype(float)
 
 
 def calculate_adjusted_proportions(
@@ -1001,7 +1003,7 @@ def fitted_weight_proportions(
     grouped_reference_proportions = reference_weight_proportions.sum(
         dim=[d for d in reference_weight_proportions.dims if d not in stratum_dim]
     )
-
+    
     # Calculate the complementary proportions
     compl_data_props = 1 - grouped_reference_proportions
 

From 36852fbccce35a0a8ff61bef24c577a6735b0196 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 16 Jan 2026 21:23:19 +0000
Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 echopop/survey/proportions.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/echopop/survey/proportions.py b/echopop/survey/proportions.py
index f37e1003..9b5b344b 100644
--- a/echopop/survey/proportions.py
+++ b/echopop/survey/proportions.py
@@ -401,9 +401,13 @@ def binned_weights(
     )
 
     # Return the grouped sums
-    return result.groupby(
-        group_columns, observed=False
-    )["weight"].sum().to_xarray().fillna(0.0).astype(float)
+    return (
+        result.groupby(group_columns, observed=False)["weight"]
+        .sum()
+        .to_xarray()
+        .fillna(0.0)
+        .astype(float)
+    )
 
 
 def calculate_adjusted_proportions(
@@ -1003,7 +1007,7 @@ def fitted_weight_proportions(
     grouped_reference_proportions = reference_weight_proportions.sum(
         dim=[d for d in reference_weight_proportions.dims if d not in stratum_dim]
     )
-    
+
     # Calculate the complementary proportions
     compl_data_props = 1 - grouped_reference_proportions