From 613746e1c6253bbf98b124184b7891b98a1efc8b Mon Sep 17 00:00:00 2001 From: Jesse Cusack Date: Thu, 7 May 2026 14:31:19 -0700 Subject: [PATCH 01/10] find drifts --- src/glide/cli.py | 4 +- src/glide/profiles.py | 122 +++++++++++++++++++++++++++++++++++-- tests/test_process_l1.py | 127 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 247 insertions(+), 6 deletions(-) diff --git a/src/glide/cli.py b/src/glide/cli.py index 7a7a633..cbc35b5 100644 --- a/src/glide/cli.py +++ b/src/glide/cli.py @@ -228,7 +228,9 @@ def l2( merged = process_l1.calculate_thermodynamics(merged, conf) - out = profiles.get_profiles(merged, shallowest_profile, min_surface_time) + out = profiles.get_profiles( + merged, shallowest_profile, min_surface_time, flt=flt_raw + ) out = profiles.assign_surface_state(out, flt=flt_raw) diff --git a/src/glide/profiles.py b/src/glide/profiles.py index 18e8e8b..46707d3 100644 --- a/src/glide/profiles.py +++ b/src/glide/profiles.py @@ -7,12 +7,94 @@ _log = logging.getLogger(__name__) +def _detect_drift( + pressure: np.ndarray, + time: np.ndarray, + dt_s: float, + flt: xr.Dataset | None = None, + min_drift_duration: float = 600.0, + pressure_std_threshold: float = 2.0, + surface_pressure: float = 2.0, +) -> np.ndarray: + """Return boolean mask True where glider is drifting at depth. + + Uses x_target_hover_depth from raw flight data if available (non-NaN and + > 0 indicates hover/drift mode is active); falls back to a rolling-variance + detector on the pressure signal. A candidate region must have rolling + pressure std < pressure_std_threshold and pressure > surface_pressure + sustained for at least min_drift_duration seconds. + """ + n = len(pressure) + drift = np.zeros(n, dtype=bool) + + if flt is not None and "x_target_hover_depth" in flt: + flt_time = flt.m_present_time.values + hover_depth = flt.x_target_hover_depth.values + order = np.argsort(flt_time) + flt_time = flt_time[order] + hover_depth = hover_depth[order] + pos = np.searchsorted(flt_time, time) + pos_left = np.maximum(pos - 1, 0) + pos_right = np.minimum(pos, len(flt_time) - 1) + nearest = np.where( + np.abs(time - flt_time[pos_left]) <= np.abs(time - flt_time[pos_right]), + pos_left, + pos_right, + ) + drift = np.isfinite(hover_depth[nearest]) & (hover_depth[nearest] > 0) + _log.debug("Drift: %d points from x_target_hover_depth", int(drift.sum())) + return drift + + valid = np.isfinite(pressure) + if not valid.any(): + return drift + + # Window = min_drift_duration / 4; smaller window reduces edge miss at drift boundaries. + # np.convolve mode='same' returns max(n, window) elements, so cap half so window < n. + half = min((n - 1) // 2, max(2, round(min_drift_duration / 4 / dt_s))) + window = 2 * half + 1 + ones = np.ones(window) + + p_fill = np.where(valid, pressure, 0.0) + cnt_w = np.convolve(valid.astype(float), ones, mode="same") + sum_w = np.convolve(p_fill, ones, mode="same") + sumsq_w = np.convolve(p_fill**2, ones, mode="same") + + safe_cnt = np.where(cnt_w >= 3, cnt_w, 1.0) + mean_w = sum_w / safe_cnt + var_w = sumsq_w / safe_cnt - mean_w**2 + rolling_std = np.where(cnt_w >= 3, np.sqrt(np.maximum(0.0, var_w)), np.inf) + + candidate = ( + valid & (pressure > surface_pressure) & (rolling_std < pressure_std_threshold) + ) + + # Keep only contiguous runs that span at least min_drift_duration + i = 0 + while i < n: + if not candidate[i]: + i += 1 + continue + j = i + 1 + while j < n and candidate[j]: + j += 1 + if (j - i) * dt_s >= min_drift_duration: + drift[i:j] = True + i = j + + _log.debug("Drift: %d points from pressure-based detector", int(drift.sum())) + return drift + + def get_profiles( ds: xr.Dataset, shallowest_profile: float, min_surface_time: float = 180.0, + flt: xr.Dataset | None = None, + min_drift_duration: float = 600.0, + drift_pressure_std: float = 2.0, ) -> xr.Dataset: - """Identify dive and climb profiles from a pressure time series. + """Identify dive, climb, and drift profiles from a pressure time series. Parameters ---------- @@ -24,6 +106,15 @@ def get_profiles( Minimum time (seconds) between consecutive dive apexes. Used to compute sample-count thresholds so the detector adapts to the data sampling rate. A value of ~180 s works for most Slocum deployments. + flt : xr.Dataset, optional + Raw flight data. Used to detect drift-at-depth via BAW_HOVER_ACTIVE + when present; otherwise pressure-based detection is used. + min_drift_duration : float + Minimum duration (seconds) for a constant-pressure period to be + classified as drift (default 600 s). + drift_pressure_std : float + Rolling pressure standard deviation threshold (dbar) for pressure-based + drift detection (default 2.0 dbar). """ raw_diff = np.diff(ds.time.values) if np.issubdtype(raw_diff.dtype, np.timedelta64): @@ -32,6 +123,21 @@ def get_profiles( dt_s = float(np.nanmedian(raw_diff)) fs = 1.0 / dt_s + drift_mask = _detect_drift( + ds.pressure.values, + ds.time.values, + dt_s, + flt=flt, + min_drift_duration=min_drift_duration, + pressure_std_threshold=drift_pressure_std, + ) + + # Mask drift before profile finding so profinder sees a gap across the + # drift period; with missing="drop" it will pair the real climb after + # drift with the correct dive. + pressure_masked = ds.pressure.values.copy().astype(float) + pressure_masked[drift_mask] = np.nan + peaks_kwargs = { "height": shallowest_profile, "prominence": shallowest_profile, @@ -52,7 +158,7 @@ def get_profiles( ) profiles = find_profiles( - ds.pressure.values, + pressure_masked, peaks_kwargs=peaks_kwargs, troughs_kwargs=troughs_kwargs, missing="drop", @@ -85,6 +191,12 @@ def get_profiles( climb_counter += 1 profile_counter += 1 + # Override drift points, clearing any profile membership assigned above + state[drift_mask] = 3 + dive_id[drift_mask] = -1 + climb_id[drift_mask] = -1 + profile_id[drift_mask] = -1 + ds["dive_id"] = ("time", dive_id, dict(_FillValue=np.int32(-1))) ds["climb_id"] = ("time", climb_id, dict(_FillValue=np.int32(-1))) ds["profile_id"] = ("time", profile_id, dict(_FillValue=np.int32(-1))) @@ -93,9 +205,9 @@ def get_profiles( state, dict( long_name="Glider state", - flag_values=np.array([-1, 0, 1, 2], "b"), - flag_meanings="unknown surface dive climb", - valid_max=np.int8(2), + flag_values=np.array([-1, 0, 1, 2, 3], "b"), + flag_meanings="unknown surface dive climb drift", + valid_max=np.int8(3), valid_min=np.int8(-1), ), ) diff --git a/tests/test_process_l1.py b/tests/test_process_l1.py index 09a555b..689e293 100644 --- a/tests/test_process_l1.py +++ b/tests/test_process_l1.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd +import pytest import xarray as xr import glide.gliderdac as gd @@ -911,3 +912,129 @@ def test_emit_ioos_profiles_yo_yo_shares_time_uv(tmp_path) -> None: # All four profiles share the same scalar time_uv (= the segment's value). assert all(t == time_uvs[0] for t in time_uvs) assert time_uvs[0] == 25.0 + + +def _make_drift_at_depth_dataset(dt_s: float) -> tuple[xr.Dataset, float, float, float]: + """Synthetic glider dataset with one dive-overshoot-drift-climb cycle. + + Returns (ds, t_dive_start, t_drift_start, t_climb_start) in seconds. + + Pressure trace: + - Surface at ~0.5 dbar for 120 s + - Dive to 105 dbar (target 100, overshoot by 5 dbar) over 1500 s + - Overshoot correction: 105→100 dbar over 120 s ← wrongly identified as + the climb before this fix + - Drift at 100 ± 2 dbar (sinusoidal wobble, period 300 s) for 3600 s + - Climb from 100 to surface over 1500 s + - Surface at ~0.5 dbar for 120 s + """ + t_surface = 120.0 + t_dive = 1500.0 + t_overshoot = 120.0 + t_drift = 3600.0 + t_climb = 1500.0 + + t0_dive = t_surface + t0_overshoot = t0_dive + t_dive + t0_drift = t0_overshoot + t_overshoot + t0_climb = t0_drift + t_drift + t_total = t0_climb + t_climb + t_surface + + t = np.arange(0.0, t_total, dt_s) + p = np.full(len(t), 0.5) + + mask = (t >= t0_dive) & (t < t0_overshoot) + p[mask] = 105.0 * (t[mask] - t0_dive) / t_dive + + mask = (t >= t0_overshoot) & (t < t0_drift) + p[mask] = 105.0 - 5.0 * (t[mask] - t0_overshoot) / t_overshoot + + mask = (t >= t0_drift) & (t < t0_climb) + p[mask] = 100.0 + 2.0 * np.sin(2.0 * np.pi * (t[mask] - t0_drift) / 300.0) + + mask = (t >= t0_climb) & (t < t0_climb + t_climb) + p[mask] = 100.0 * (1.0 - (t[mask] - t0_climb) / t_climb) + + ds = xr.Dataset({"pressure": ("time", p)}, coords={"time": t}) + return ds, t0_dive, t0_drift, t0_climb + + +@pytest.mark.parametrize( + "dt_s", + [1.0, 15.0], + ids=["post_recovery_1s", "realtime_15s"], +) +def test_get_profiles_drift_at_depth(dt_s: float) -> None: + """Drift-at-depth is detected and labelled state=3; the real climb after + drift must span the full water column rather than being cut off. + + Without drift masking, the overshoot correction (105→100 dbar before the + drift plateau) would be mis-identified as the climb portion, leaving the + actual ascent unlabelled. + """ + ds, _t_dive, t_drift_start, t_climb_start = _make_drift_at_depth_dataset(dt_s) + + result = prof.get_profiles(ds, shallowest_profile=5.0, min_surface_time=180.0) + + state = result.state.values + time = ds.time.values + + # Most of the drift plateau must be labelled state=3 + drift_state = state[(time >= t_drift_start) & (time < t_climb_start)] + assert (drift_state == 3).mean() > 0.7, ( + f"Only {(drift_state == 3).mean():.1%} of drift period is state=3 at dt_s={dt_s}" + ) + + # Most of the real climb after drift must be labelled state=2 + climb_state = state[time >= t_climb_start] + assert (climb_state == 2).mean() > 0.7, ( + f"Only {(climb_state == 2).mean():.1%} of real climb is state=2 at dt_s={dt_s}" + ) + + # The climb profile must span the full water column + climb_pressures = ds.pressure.values[state == 2] + assert climb_pressures.max() > 90.0, ( + "Climb profile must include deep data near 100 dbar" + ) + assert climb_pressures.min() < 5.0, "Climb profile must reach the near-surface" + + # state=3 (drift) must appear in the CF flag_values + assert 3 in result.state.attrs["flag_values"] + + +def test_get_profiles_drift_x_target_hover_depth() -> None: + """x_target_hover_depth in raw flight data takes precedence over pressure-based + drift detection and marks the exact drift period as state=3.""" + dt_s = 15.0 + ds, _t_dive, t_drift_start, t_climb_start = _make_drift_at_depth_dataset(dt_s) + + time = ds.time.values + + # Synthetic flight data at 4 s intervals with target hover depth set during drift + flt_time = np.arange(0.0, time[-1] + 4.0, 4.0) + hover_depth = np.full(len(flt_time), np.nan) + hover_depth[(flt_time >= t_drift_start) & (flt_time < t_climb_start)] = 100.0 + + flt = xr.Dataset( + { + "m_present_time": ("i", flt_time), + "x_target_hover_depth": ("i", hover_depth), + } + ) + + result = prof.get_profiles( + ds, shallowest_profile=5.0, min_surface_time=180.0, flt=flt + ) + + state = result.state.values + drift_mask = (time >= t_drift_start) & (time < t_climb_start) + + # BAW_HOVER_ACTIVE covers the drift period exactly, so detection should be + # almost complete (within one 4 s flight sample of each boundary) + assert (state[drift_mask] == 3).mean() > 0.9, ( + "BAW_HOVER_ACTIVE should detect drift with >90% coverage" + ) + + # Real climb after drift must still be captured + climb_state = state[time >= t_climb_start] + assert (climb_state == 2).mean() > 0.7 From 67587dffe841cc1985f8f5f1bb6768377ff96149 Mon Sep 17 00:00:00 2001 From: Jesse Cusack Date: Thu, 7 May 2026 21:52:41 -0700 Subject: [PATCH 02/10] fix profile weirdness --- src/glide/profiles.py | 312 ++++++++++++++++++++++++++------- tests/test_process_l1.py | 365 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 603 insertions(+), 74 deletions(-) diff --git a/src/glide/profiles.py b/src/glide/profiles.py index 46707d3..55aa52c 100644 --- a/src/glide/profiles.py +++ b/src/glide/profiles.py @@ -18,32 +18,41 @@ def _detect_drift( ) -> np.ndarray: """Return boolean mask True where glider is drifting at depth. - Uses x_target_hover_depth from raw flight data if available (non-NaN and - > 0 indicates hover/drift mode is active); falls back to a rolling-variance - detector on the pressure signal. A candidate region must have rolling - pressure std < pressure_std_threshold and pressure > surface_pressure - sustained for at least min_drift_duration seconds. + Uses x_hover_active from raw flight data if available (reported as 1 when + a hover episode starts and 0 when it ends, with -127 as the missing-data + sentinel; the most recent reported value is forward-filled to every science + timestamp). Falls back to a rolling-variance detector on the pressure + signal: a candidate region must have rolling pressure std < + pressure_std_threshold and pressure > surface_pressure sustained for at + least min_drift_duration seconds. """ n = len(pressure) drift = np.zeros(n, dtype=bool) - if flt is not None and "x_target_hover_depth" in flt: + if flt is not None and "x_hover_active" in flt: flt_time = flt.m_present_time.values - hover_depth = flt.x_target_hover_depth.values - order = np.argsort(flt_time) - flt_time = flt_time[order] - hover_depth = hover_depth[order] - pos = np.searchsorted(flt_time, time) - pos_left = np.maximum(pos - 1, 0) - pos_right = np.minimum(pos, len(flt_time) - 1) - nearest = np.where( - np.abs(time - flt_time[pos_left]) <= np.abs(time - flt_time[pos_right]), - pos_left, - pos_right, + hover_active = flt.x_hover_active.values + # -127 is the no-data sentinel for the signed-byte variable; treat as + # missing along with NaN. Forward-fill the most recent {0,1} report. + finite = ( + np.isfinite(hover_active) & np.isfinite(flt_time) & (hover_active != -127) ) - drift = np.isfinite(hover_depth[nearest]) & (hover_depth[nearest] > 0) - _log.debug("Drift: %d points from x_target_hover_depth", int(drift.sum())) - return drift + if finite.any(): + flt_time = flt_time[finite] + hover_active = hover_active[finite] + order = np.argsort(flt_time) + flt_time = flt_time[order] + hover_active = hover_active[order] + pos = np.searchsorted(flt_time, time, side="right") - 1 + in_range = pos >= 0 + drift[in_range] = hover_active[pos[in_range]] == 1 + _log.debug( + "Drift: %d points from x_hover_active (%d reports)", + int(drift.sum()), + len(flt_time), + ) + return drift + _log.debug("x_hover_active has no finite values; using pressure detector") valid = np.isfinite(pressure) if not valid.any(): @@ -86,6 +95,108 @@ def _detect_drift( return drift +def _absorb_apex_unknowns( + state: np.ndarray, + dive_id: np.ndarray, + climb_id: np.ndarray, + profile_id: np.ndarray, + pressure: np.ndarray, + dt_s: float, + max_gap_duration: float, + surface_pressure: float = 2.0, +) -> None: + """Absorb short unknown gaps at the underwater apex of a multi-yo segment. + + profinder occasionally leaves a few samples between dive_end and + climb_start unclassified at the peak. When such a gap is short, sits at + depth (max pressure > surface_pressure), and is sandwiched directly + between a dive (1) and a climb (2), split it at the pressure maximum: + points up to and including the max are folded into the preceding dive, + points after into the following climb. + """ + n = len(state) + unknown = state == -1 + if not unknown.any(): + return + + diff = np.diff(unknown.astype(int), prepend=0, append=0) + starts = np.where(diff == 1)[0] + ends = np.where(diff == -1)[0] + max_samples = max(2, round(max_gap_duration / dt_s)) + + for s, e in zip(starts, ends): + if (e - s) > max_samples: + continue + if s == 0 or e >= n: + continue + if state[s - 1] != 1 or state[e] != 2: + continue + p = pressure[s:e] + finite = np.isfinite(p) + if not finite.any() or float(np.min(p[finite])) < surface_pressure: + continue + + local_max = int(np.argmax(np.where(finite, p, -np.inf))) + split = s + local_max + 1 + + state[s:split] = 1 + dive_id[s:split] = dive_id[s - 1] + profile_id[s:split] = profile_id[s - 1] + + state[split:e] = 2 + climb_id[split:e] = climb_id[e] + profile_id[split:e] = profile_id[e] + + +def _absorb_post_drift_transients( + state: np.ndarray, + dive_id: np.ndarray, + climb_id: np.ndarray, + profile_id: np.ndarray, + drift_mask: np.ndarray, + dt_s: float, + max_transient_duration: float, +) -> None: + """Fold brief dive/unknown segments between a drift end and the next climb + into that climb. + + Masking the drift period leaves a discontinuous pressure signal across the + gap, which occasionally causes profinder to detect a spurious tiny peak + just after the drift in addition to the real climb. This pass extends the + climb backward to the drift end if the only intervening states are short + dive (1) or unknown (-1) segments. Anything longer than + max_transient_duration past the drift end is left alone. + """ + n = len(state) + if not drift_mask.any(): + return + + drift_diff = np.diff(drift_mask.astype(int), prepend=0, append=0) + drift_ends = np.where(drift_diff == -1)[0] + window = max(2, round(max_transient_duration / dt_s)) + + for de in drift_ends: + if de >= n: + continue + end = min(de + window, n) + in_window = state[de:end] + climb_local = np.where(in_window == 2)[0] + if len(climb_local) == 0: + continue + first_climb = de + int(climb_local[0]) + if first_climb == de: + continue + intermediate = state[de:first_climb] + if not np.all((intermediate == 1) | (intermediate == -1)): + continue + cid = climb_id[first_climb] + pid = profile_id[first_climb] + state[de:first_climb] = 2 + climb_id[de:first_climb] = cid + profile_id[de:first_climb] = pid + dive_id[de:first_climb] = -1 + + def get_profiles( ds: xr.Dataset, shallowest_profile: float, @@ -93,6 +204,8 @@ def get_profiles( flt: xr.Dataset | None = None, min_drift_duration: float = 600.0, drift_pressure_std: float = 2.0, + stall_tolerance: float = 180.0, + min_pressure_rate: float = 0.05, ) -> xr.Dataset: """Identify dive, climb, and drift profiles from a pressure time series. @@ -115,6 +228,16 @@ def get_profiles( drift_pressure_std : float Rolling pressure standard deviation threshold (dbar) for pressure-based drift detection (default 2.0 dbar). + stall_tolerance : float + Maximum time (seconds) the glider can stall or briefly reverse during + a dive or climb without profinder terminating the profile (default + 180 s = 3 min, sized to handle realistic glider stalls during + ascent). Converted to profinder's per-sample `run_length` argument. + min_pressure_rate : float + Minimum pressure change rate (dbar/s) for a sample to count toward + ascent or descent classification (default 0.05 dbar/s ≈ 5 cm/s, well + below normal glider vertical speed of 10–20 cm/s). Converted to + profinder's per-sample `min_pressure_change` argument. """ raw_diff = np.diff(ds.time.values) if np.issubdtype(raw_diff.dtype, np.timedelta64): @@ -145,7 +268,10 @@ def get_profiles( "width": max(2, round(20 * fs)), # ≥20 s half-width detects ~10 m dives } troughs_kwargs = { - "prominence": 2, + # Real inter-profile troughs (surfacings or inter-yo apexes) have a + # depth contrast at least as large as a real dive's prominence; a + # smaller "trough" is a stall or noise dip within a profile. + "prominence": shallowest_profile, "distance": max(2, round(30 * fs)), "width": max(1, round(5 * fs)), } @@ -157,10 +283,19 @@ def get_profiles( troughs_kwargs, ) + # Scale stall tolerance and pressure-rate threshold to per-sample units. + # These guard the climb/dive run from being terminated by brief stalls + # (e.g. glider hangs at depth and drifts back down a few dbar before + # resuming the ascent). + run_length = max(2, round(stall_tolerance / dt_s)) + min_pressure_change = float(min_pressure_rate * dt_s) + profiles = find_profiles( pressure_masked, peaks_kwargs=peaks_kwargs, troughs_kwargs=troughs_kwargs, + run_length=run_length, + min_pressure_change=min_pressure_change, missing="drop", ) @@ -197,6 +332,20 @@ def get_profiles( climb_id[drift_mask] = -1 profile_id[drift_mask] = -1 + _absorb_post_drift_transients( + state, dive_id, climb_id, profile_id, drift_mask, dt_s, min_surface_time + ) + + _absorb_apex_unknowns( + state, + dive_id, + climb_id, + profile_id, + ds.pressure.values, + dt_s, + min_surface_time, + ) + ds["dive_id"] = ("time", dive_id, dict(_FillValue=np.int32(-1))) ds["climb_id"] = ("time", climb_id, dict(_FillValue=np.int32(-1))) ds["profile_id"] = ("time", profile_id, dict(_FillValue=np.int32(-1))) @@ -221,61 +370,102 @@ def assign_surface_state( dt: float = 300.0, surface_pressure: float = 2.0, ) -> xr.Dataset: - """Assign surface state (0) to unknown points near GPS fixes. - - A point with state == -1 (unknown) is marked as surface (0) only if it is - within `dt` seconds of a valid GPS fix AND shallower than `surface_pressure` - dbar. The pressure gate prevents the broad GPS time window from reaching - into adjacent dives. + """Assign surface state (0) to unknown points. + + Two checks are applied in sequence: + + 1. A point with state == -1 (unknown) is marked as surface if it is within + `dt` seconds of a valid GPS fix AND shallower than `surface_pressure` + dbar. The pressure gate prevents the broad GPS time window from + reaching into adjacent dives. + 2. Any remaining contiguous unknown segment is marked as surface unless + its pressure record contains a finite value at or above + `surface_pressure` (i.e. evidence the glider was deeper). Missing + pressure is the expected state at the surface — the science sensor + is often out of water — so all-NaN unknown segments are classified + as surface. This covers the period before the first dive, after the + last climb, and any between-profile gap. Parameters ---------- ds : xr.Dataset - Dataset with state variable from get_profiles. + Dataset with state and pressure variables from get_profiles. flt : xr.Dataset, optional Raw flight data containing m_gps_lat/lon with valid GPS fix times. + If absent, only the pressure-based check runs. dt : float Time threshold in seconds for matching to GPS fixes (default 300). surface_pressure : float Maximum pressure (dbar) for a point to be considered at the surface (default 2.0). """ - if flt is None or "m_gps_lat" not in flt: - _log.warning("No flight data with GPS, cannot assign surface state") - return ds - - gps_valid = np.isfinite(flt.m_gps_lat.values) - if not gps_valid.any(): - _log.warning("No valid GPS fixes found") - return ds - - gps_times = np.sort(flt.m_present_time.values[gps_valid]) - state = ds.state.values.copy() + pressure = ds.pressure.values time_l2 = ds.time.values - unknown_mask = state == -1 - - if unknown_mask.any(): - unknown_times = time_l2[unknown_mask] - pos = np.searchsorted(gps_times, unknown_times) - dist_left = np.where( - pos > 0, - unknown_times - gps_times[np.maximum(pos - 1, 0)], - np.inf, - ) - dist_right = np.where( - pos < len(gps_times), - gps_times[np.minimum(pos, len(gps_times) - 1)] - unknown_times, - np.inf, - ) - is_near = np.minimum(dist_left, dist_right) <= dt - is_shallow = np.isfinite(ds.pressure.values[unknown_mask]) & ( - ds.pressure.values[unknown_mask] < surface_pressure - ) - is_near = is_near & is_shallow - state[unknown_mask] = np.where(is_near, np.int8(0), state[unknown_mask]) - _log.debug("Assigned %d points to surface state", int(is_near.sum())) + if flt is not None and "m_gps_lat" in flt: + gps_valid = np.isfinite(flt.m_gps_lat.values) + if gps_valid.any(): + gps_times = np.sort(flt.m_present_time.values[gps_valid]) + unknown_mask = state == -1 + if unknown_mask.any(): + unknown_times = time_l2[unknown_mask] + pos = np.searchsorted(gps_times, unknown_times) + dist_left = np.where( + pos > 0, + unknown_times - gps_times[np.maximum(pos - 1, 0)], + np.inf, + ) + dist_right = np.where( + pos < len(gps_times), + gps_times[np.minimum(pos, len(gps_times) - 1)] - unknown_times, + np.inf, + ) + is_near = np.minimum(dist_left, dist_right) <= dt + is_shallow = np.isfinite(pressure[unknown_mask]) & ( + pressure[unknown_mask] < surface_pressure + ) + state[unknown_mask] = np.where( + is_near & is_shallow, np.int8(0), state[unknown_mask] + ) + _log.debug( + "GPS-proximity surface check: %d points assigned", + int((is_near & is_shallow).sum()), + ) + else: + _log.warning("No valid GPS fixes found") + else: + _log.warning("No flight data with GPS, skipping GPS-proximity surface check") + + # Whole-segment pressure check: any contiguous unknown run that never + # exceeds surface_pressure is the glider sitting at the surface + unknown = state == -1 + if unknown.any(): + n = len(state) + diff = np.diff(unknown.astype(int), prepend=0, append=0) + starts = np.where(diff == 1)[0] + ends = np.where(diff == -1)[0] + n_assigned = 0 + for s, e in zip(starts, ends): + p = pressure[s:e] + finite = np.isfinite(p) + # Skip if there is finite evidence the glider was underwater. + if finite.any() and float(np.max(p[finite])) >= surface_pressure: + continue + # An all-NaN gap bounded by dive→climb is the underwater apex + # that the apex absorber couldn't split because pressure was + # missing. Leave it unknown rather than mislabel as surface. + if ( + not finite.any() + and 0 < s + and e < n + and state[s - 1] == 1 + and state[e] == 2 + ): + continue + state[s:e] = 0 + n_assigned += int(e - s) + _log.debug("Whole-segment surface check: %d points assigned", n_assigned) ds["state"] = ("time", state, ds.state.attrs) return ds diff --git a/tests/test_process_l1.py b/tests/test_process_l1.py index 689e293..c3fed7d 100644 --- a/tests/test_process_l1.py +++ b/tests/test_process_l1.py @@ -375,19 +375,124 @@ def test_assign_surface_state_pressure_gate() -> None: def test_assign_surface_state_no_flight_data() -> None: - """Test that assign_surface_state handles missing flight data gracefully.""" + """Without flight data the GPS-proximity check is skipped, but shallow + unknown segments are still classified as surface by the pressure-only pass.""" n = 20 state = np.full(n, -1, dtype="b") state[5:15] = 1 + pressure = np.full(n, 0.5) + pressure[5:15] = 50.0 # dive at depth ds = xr.Dataset( - {"state": ("time", state)}, + {"state": ("time", state), "pressure": ("time", pressure)}, coords={"time": np.arange(n, dtype=float)}, ) - # No flight data - should return unchanged - result = prof.assign_surface_state(ds, flt=None) - assert np.array_equal(result.state.values, state) + result = prof.assign_surface_state(ds, flt=None, surface_pressure=2.0) + + assert np.all(result.state.values[5:15] == 1), "Dive states unchanged" + assert np.all(result.state.values[:5] == 0), "Pre-dive shallow → surface" + assert np.all(result.state.values[15:] == 0), "Post-dive shallow → surface" + + +def test_assign_surface_state_pre_first_dive_and_post_last_climb() -> None: + """Time before the first dive and after the last climb is classified as + surface when pressure stays below surface_pressure.""" + n = 50 + state = np.full(n, -1, dtype="b") + state[10:20] = 1 # dive + state[20:30] = 2 # climb + pressure = np.full(n, 0.3) + pressure[10:20] = np.linspace(0.3, 80, 10) + pressure[20:30] = np.linspace(80, 0.3, 10) + + ds = xr.Dataset( + {"state": ("time", state), "pressure": ("time", pressure)}, + coords={"time": np.arange(n, dtype=float)}, + ) + + result = prof.assign_surface_state(ds, flt=None, surface_pressure=2.0) + + assert np.all(result.state.values[:10] == 0), "Pre-first-dive → surface" + assert np.all(result.state.values[30:] == 0), "Post-last-climb → surface" + assert np.all(result.state.values[10:20] == 1) + assert np.all(result.state.values[20:30] == 2) + + +def test_assign_surface_state_all_nan_pressure_is_surface() -> None: + """Surface segments where the science pressure sensor is out of water + (all NaN) must still be classified as surface.""" + n = 30 + state = np.full(n, -1, dtype="b") + state[5:15] = 1 # dive + state[15:20] = 2 # climb + pressure = np.full(n, np.nan) + pressure[5:15] = np.linspace(2, 80, 10) + pressure[15:20] = np.linspace(80, 2, 5) + # Indices 0–4 and 20–29 have NaN pressure (surface, sensor out of water) + + ds = xr.Dataset( + {"state": ("time", state), "pressure": ("time", pressure)}, + coords={"time": np.arange(n, dtype=float)}, + ) + + result = prof.assign_surface_state(ds, flt=None, surface_pressure=2.0) + + assert np.all(result.state.values[:5] == 0), "Pre-dive all-NaN → surface" + assert np.all(result.state.values[20:] == 0), "Post-climb all-NaN → surface" + assert np.all(result.state.values[5:15] == 1) + assert np.all(result.state.values[15:20] == 2) + + +def test_assign_surface_state_protects_apex_nan_gap() -> None: + """An all-NaN unknown gap sandwiched between a dive and a climb is the + underwater apex (the apex absorber couldn't split it because pressure + was missing). It must NOT be reclassified as surface.""" + n = 30 + state = np.full(n, -1, dtype="b") + state[5:13] = 1 # dive + state[16:25] = 2 # climb + pressure = np.full(n, np.nan) + pressure[5:13] = np.linspace(2, 80, 8) + pressure[16:25] = np.linspace(80, 2, 9) + # Indices 13–15 NaN (sensor briefly missing at the apex) + + ds = xr.Dataset( + {"state": ("time", state), "pressure": ("time", pressure)}, + coords={"time": np.arange(n, dtype=float)}, + ) + + result = prof.assign_surface_state(ds, flt=None, surface_pressure=2.0) + + assert np.all(result.state.values[13:16] == -1), ( + "Apex NaN gap must stay unknown, not flip to surface" + ) + assert np.all(result.state.values[5:13] == 1) + assert np.all(result.state.values[16:25] == 2) + + +def test_assign_surface_state_keeps_aborted_dive_unknown() -> None: + """An unknown segment that contains a partial dive (pressure exceeds + surface_pressure) must not be reclassified as surface.""" + n = 30 + state = np.full(n, -1, dtype="b") + state[20:25] = 2 # climb + pressure = np.full(n, 0.3) + # Aborted dive between samples 5–15: pressure briefly reaches 4 dbar + pressure[5:15] = np.array([1.0, 2.0, 3.0, 4.0, 4.0, 4.0, 3.0, 2.0, 1.0, 0.5]) + pressure[20:25] = np.linspace(40, 0.3, 5) + + ds = xr.Dataset( + {"state": ("time", state), "pressure": ("time", pressure)}, + coords={"time": np.arange(n, dtype=float)}, + ) + + result = prof.assign_surface_state(ds, flt=None, surface_pressure=2.0) + + # The whole pre-climb segment contains a sample > surface_pressure → stays unknown + assert np.all(result.state.values[:20] == -1) + # Post-climb stays surface (pressure shallow throughout) + assert np.all(result.state.values[25:] == 0) def test_add_velocity_groups_by_velocity_reports() -> None: @@ -1002,23 +1107,88 @@ def test_get_profiles_drift_at_depth(dt_s: float) -> None: assert 3 in result.state.attrs["flag_values"] -def test_get_profiles_drift_x_target_hover_depth() -> None: - """x_target_hover_depth in raw flight data takes precedence over pressure-based - drift detection and marks the exact drift period as state=3.""" +@pytest.mark.parametrize( + "dt_s", + [1.0, 15.0], + ids=["post_recovery_1s", "realtime_15s"], +) +def test_get_profiles_climb_with_stall(dt_s: float) -> None: + """A brief stall during the climb (glider hangs at ~20 dbar and drifts a + few dbar back down before resuming the ascent) must not split the climb + in two. The stall_tolerance default rides over the reversal at both + sampling rates because it is specified in seconds, not samples.""" + t_surface = 60.0 + t_dive = 800.0 + t_climb_pre_stall = 600.0 # 100 → 20 dbar + t_stall = 120.0 # hangs at 20–22 dbar for 2 min (within 3 min tolerance) + t_climb_post_stall = 400.0 # 22 → 0 dbar + t_total = ( + t_surface + t_dive + t_climb_pre_stall + t_stall + t_climb_post_stall + 60.0 + ) + + t = np.arange(0.0, t_total, dt_s) + p = np.full(len(t), 0.5) + + t0_dive = t_surface + t0_climb_pre = t0_dive + t_dive + t0_stall = t0_climb_pre + t_climb_pre_stall + t0_climb_post = t0_stall + t_stall + t0_end = t0_climb_post + t_climb_post_stall + + mask = (t >= t0_dive) & (t < t0_climb_pre) + p[mask] = 100.0 * (t[mask] - t0_dive) / t_dive + + mask = (t >= t0_climb_pre) & (t < t0_stall) + p[mask] = 100.0 - 80.0 * (t[mask] - t0_climb_pre) / t_climb_pre_stall + + # Stall: drift slowly back down from 20 to 22 dbar over 2 min + mask = (t >= t0_stall) & (t < t0_climb_post) + p[mask] = 20.0 + 2.0 * (t[mask] - t0_stall) / t_stall + + mask = (t >= t0_climb_post) & (t < t0_end) + p[mask] = 22.0 * (1.0 - (t[mask] - t0_climb_post) / t_climb_post_stall) + + # Leading NaN sidesteps a profinder edge case where valid_idx is undefined + # for NaN-free input. + p[0] = np.nan + + ds = xr.Dataset({"pressure": ("time", p)}, coords={"time": t}) + + result = prof.get_profiles(ds, shallowest_profile=5.0, min_surface_time=180.0) + + state = result.state.values + # The post-stall climb must be classified as climb, not unknown + post_stall_state = state[(t >= t0_climb_post) & (t < t0_end)] + assert (post_stall_state == 2).mean() > 0.7, ( + f"Post-stall climb only {(post_stall_state == 2).mean():.1%} state=2 at dt_s={dt_s}" + ) + # The whole climb (pre-stall + stall + post-stall) is one climb_id + climb_ids = np.unique(result.climb_id.values[result.climb_id.values >= 0]) + assert len(climb_ids) == 1, f"Expected 1 climb, got {len(climb_ids)} at dt_s={dt_s}" + + +def test_get_profiles_drift_x_hover_active() -> None: + """x_hover_active in raw flight data takes precedence over pressure-based + drift detection and marks the exact drift period as state=3. + + The Slocum flight computer reports x_hover_active only on transitions + (1 when a hover starts, 0 when it ends, -127 missing otherwise); the + detector must forward-fill the most recent {0,1} report. + """ dt_s = 15.0 ds, _t_dive, t_drift_start, t_climb_start = _make_drift_at_depth_dataset(dt_s) time = ds.time.values - # Synthetic flight data at 4 s intervals with target hover depth set during drift - flt_time = np.arange(0.0, time[-1] + 4.0, 4.0) - hover_depth = np.full(len(flt_time), np.nan) - hover_depth[(flt_time >= t_drift_start) & (flt_time < t_climb_start)] = 100.0 + # Sparse flight reports: 0 at start, 1 when hover begins, 0 when it ends. + # Mix in a -127 sentinel to confirm it is filtered out. + flt_time = np.array([0.0, 60.0, t_drift_start, t_climb_start]) + hover_active = np.array([0.0, -127.0, 1.0, 0.0]) flt = xr.Dataset( { "m_present_time": ("i", flt_time), - "x_target_hover_depth": ("i", hover_depth), + "x_hover_active": ("i", hover_active), } ) @@ -1038,3 +1208,172 @@ def test_get_profiles_drift_x_target_hover_depth() -> None: # Real climb after drift must still be captured climb_state = state[time >= t_climb_start] assert (climb_state == 2).mean() > 0.7 + + +def test_absorb_post_drift_transients() -> None: + """Brief dive (state=1) and unknown (state=-1) segments wedged between + a drift period and the real climb are absorbed into the climb.""" + n = 50 + state = np.full(n, -1, dtype="b") + drift_mask = np.zeros(n, dtype=bool) + dive_id = np.full(n, -1, dtype="i4") + climb_id = np.full(n, -1, dtype="i4") + profile_id = np.full(n, -1, dtype="i4") + + state[5:15] = 1 # main dive + dive_id[5:15] = 1 + profile_id[5:15] = 1 + + state[15:30] = 3 # drift + drift_mask[15:30] = True + + state[30:32] = 1 # spurious brief dive (transient) + dive_id[30:32] = 2 + profile_id[30:32] = 2 + + # state[32:34] left as -1 (unknown gap) + + state[34:48] = 2 # real climb + climb_id[34:48] = 1 + profile_id[34:48] = 3 + + prof._absorb_post_drift_transients( + state, + dive_id, + climb_id, + profile_id, + drift_mask, + dt_s=10.0, + max_transient_duration=180.0, + ) + + # The transient between drift and climb is now part of the climb + assert np.all(state[30:34] == 2) + assert np.all(climb_id[30:34] == 1) + assert np.all(profile_id[30:34] == 3) + assert np.all(dive_id[30:34] == -1) + + # Drift, main dive, and real climb are unchanged + assert np.all(state[5:15] == 1) + assert np.all(state[15:30] == 3) + assert np.all(state[34:48] == 2) + + +def test_absorb_apex_unknowns_splits_at_max_pressure() -> None: + """A short unknown gap at the underwater apex between a dive and a climb + is split at the pressure maximum: pre-max points become dive, post-max + points become climb.""" + n = 30 + state = np.full(n, -1, dtype="b") + dive_id = np.full(n, -1, dtype="i4") + climb_id = np.full(n, -1, dtype="i4") + profile_id = np.full(n, -1, dtype="i4") + pressure = np.zeros(n) + + state[5:13] = 1 + dive_id[5:13] = 1 + profile_id[5:13] = 1 + pressure[5:13] = np.linspace(10, 80, 8) + + # Indices 13–17 unknown, with peak (90 dbar) at index 15 + pressure[13:17] = [85, 88, 90, 87] + + state[17:25] = 2 + climb_id[17:25] = 1 + profile_id[17:25] = 2 + pressure[17:25] = np.linspace(85, 5, 8) + + prof._absorb_apex_unknowns( + state, + dive_id, + climb_id, + profile_id, + pressure, + dt_s=10.0, + max_gap_duration=180.0, + surface_pressure=2.0, + ) + + # Up to and including the max → dive + assert np.all(state[13:16] == 1) + assert np.all(dive_id[13:16] == 1) + assert np.all(profile_id[13:16] == 1) + # After the max → climb + assert np.all(state[16:17] == 2) + assert np.all(climb_id[16:17] == 1) + assert np.all(profile_id[16:17] == 2) + + +def test_absorb_apex_unknowns_skips_surface_gap() -> None: + """An unknown gap that goes shallow (glider surfaced) is NOT absorbed; it + must be reachable by the surface classifier instead.""" + n = 30 + state = np.full(n, -1, dtype="b") + dive_id = np.full(n, -1, dtype="i4") + climb_id = np.full(n, -1, dtype="i4") + profile_id = np.full(n, -1, dtype="i4") + pressure = np.zeros(n) + + state[5:10] = 1 + pressure[5:10] = np.linspace(10, 80, 5) + state[10:13] = 2 + pressure[10:13] = np.linspace(80, 1, 3) # climb back to surface + + # Unknown gap at the surface (pressure < surface_pressure) + pressure[13:17] = [0.3, 0.5, 0.4, 0.6] + + state[17:22] = 1 # next dive + pressure[17:22] = np.linspace(1, 60, 5) + + state_before = state.copy() + + prof._absorb_apex_unknowns( + state, + dive_id, + climb_id, + profile_id, + pressure, + dt_s=10.0, + max_gap_duration=180.0, + surface_pressure=2.0, + ) + + # The unknown gap was at the surface, so it stays unknown for assign_surface_state + assert np.array_equal(state, state_before) + + +def test_absorb_post_drift_transients_does_not_swallow_real_dive() -> None: + """A long dive after a drift is not absorbed (only short transients are).""" + n = 60 + state = np.full(n, -1, dtype="b") + drift_mask = np.zeros(n, dtype=bool) + dive_id = np.full(n, -1, dtype="i4") + climb_id = np.full(n, -1, dtype="i4") + profile_id = np.full(n, -1, dtype="i4") + + state[5:15] = 3 # drift + drift_mask[5:15] = True + + # A long dive (20 samples = 200 s, longer than max_transient_duration of 180 s) + state[15:35] = 1 + dive_id[15:35] = 1 + profile_id[15:35] = 1 + + state[40:55] = 2 # climb (further out) + climb_id[40:55] = 1 + profile_id[40:55] = 2 + + state_before = state.copy() + + prof._absorb_post_drift_transients( + state, + dive_id, + climb_id, + profile_id, + drift_mask, + dt_s=10.0, + max_transient_duration=180.0, + ) + + # Nothing should change because the dive between drift and climb is too long + assert np.array_equal(state, state_before) From 504bb52003dcf2c2844b99e989b9eae3cb84adee Mon Sep 17 00:00:00 2001 From: Jesse Cusack Date: Thu, 7 May 2026 22:06:04 -0700 Subject: [PATCH 03/10] refine the code --- src/glide/profiles.py | 292 +++++++++-------------------- tests/test_process_l1.py | 386 ++++++++++++--------------------------- 2 files changed, 205 insertions(+), 473 deletions(-) diff --git a/src/glide/profiles.py b/src/glide/profiles.py index 55aa52c..fca2be7 100644 --- a/src/glide/profiles.py +++ b/src/glide/profiles.py @@ -18,67 +18,49 @@ def _detect_drift( ) -> np.ndarray: """Return boolean mask True where glider is drifting at depth. - Uses x_hover_active from raw flight data if available (reported as 1 when - a hover episode starts and 0 when it ends, with -127 as the missing-data - sentinel; the most recent reported value is forward-filled to every science - timestamp). Falls back to a rolling-variance detector on the pressure - signal: a candidate region must have rolling pressure std < - pressure_std_threshold and pressure > surface_pressure sustained for at - least min_drift_duration seconds. + Uses x_hover_active from raw flight data when present (1=on, 0=off, + -127=missing; forward-filled to science timestamps). Otherwise applies a + rolling-variance detector: rolling pressure std < pressure_std_threshold + and pressure > surface_pressure sustained for min_drift_duration seconds. """ n = len(pressure) drift = np.zeros(n, dtype=bool) if flt is not None and "x_hover_active" in flt: - flt_time = flt.m_present_time.values - hover_active = flt.x_hover_active.values - # -127 is the no-data sentinel for the signed-byte variable; treat as - # missing along with NaN. Forward-fill the most recent {0,1} report. - finite = ( - np.isfinite(hover_active) & np.isfinite(flt_time) & (hover_active != -127) - ) - if finite.any(): - flt_time = flt_time[finite] - hover_active = hover_active[finite] - order = np.argsort(flt_time) - flt_time = flt_time[order] - hover_active = hover_active[order] - pos = np.searchsorted(flt_time, time, side="right") - 1 + ft = flt.m_present_time.values + ha = flt.x_hover_active.values + keep = np.isfinite(ha) & np.isfinite(ft) & (ha != -127) + if keep.any(): + order = np.argsort(ft[keep]) + ft, ha = ft[keep][order], ha[keep][order] + pos = np.searchsorted(ft, time, side="right") - 1 in_range = pos >= 0 - drift[in_range] = hover_active[pos[in_range]] == 1 - _log.debug( - "Drift: %d points from x_hover_active (%d reports)", - int(drift.sum()), - len(flt_time), - ) + drift[in_range] = ha[pos[in_range]] == 1 + _log.debug("Drift: %d points from x_hover_active", int(drift.sum())) return drift - _log.debug("x_hover_active has no finite values; using pressure detector") + _log.debug("x_hover_active has no finite values; falling back to pressure") valid = np.isfinite(pressure) if not valid.any(): return drift - # Window = min_drift_duration / 4; smaller window reduces edge miss at drift boundaries. - # np.convolve mode='same' returns max(n, window) elements, so cap half so window < n. + # Window = min_drift_duration / 4 (smaller reduces edge miss). np.convolve + # mode='same' returns max(n, window) elements, so cap so window < n. half = min((n - 1) // 2, max(2, round(min_drift_duration / 4 / dt_s))) - window = 2 * half + 1 - ones = np.ones(window) - + ones = np.ones(2 * half + 1) p_fill = np.where(valid, pressure, 0.0) - cnt_w = np.convolve(valid.astype(float), ones, mode="same") - sum_w = np.convolve(p_fill, ones, mode="same") - sumsq_w = np.convolve(p_fill**2, ones, mode="same") - - safe_cnt = np.where(cnt_w >= 3, cnt_w, 1.0) - mean_w = sum_w / safe_cnt - var_w = sumsq_w / safe_cnt - mean_w**2 - rolling_std = np.where(cnt_w >= 3, np.sqrt(np.maximum(0.0, var_w)), np.inf) + cnt = np.convolve(valid.astype(float), ones, mode="same") + sm = np.convolve(p_fill, ones, mode="same") + sq = np.convolve(p_fill**2, ones, mode="same") + safe = np.where(cnt >= 3, cnt, 1.0) + var = sq / safe - (sm / safe) ** 2 + rolling_std = np.where(cnt >= 3, np.sqrt(np.maximum(0.0, var)), np.inf) candidate = ( valid & (pressure > surface_pressure) & (rolling_std < pressure_std_threshold) ) - # Keep only contiguous runs that span at least min_drift_duration + # Keep only contiguous runs spanning at least min_drift_duration seconds. i = 0 while i < n: if not candidate[i]: @@ -91,7 +73,7 @@ def _detect_drift( drift[i:j] = True i = j - _log.debug("Drift: %d points from pressure-based detector", int(drift.sum())) + _log.debug("Drift: %d points from pressure detector", int(drift.sum())) return drift @@ -105,29 +87,19 @@ def _absorb_apex_unknowns( max_gap_duration: float, surface_pressure: float = 2.0, ) -> None: - """Absorb short unknown gaps at the underwater apex of a multi-yo segment. - - profinder occasionally leaves a few samples between dive_end and - climb_start unclassified at the peak. When such a gap is short, sits at - depth (max pressure > surface_pressure), and is sandwiched directly - between a dive (1) and a climb (2), split it at the pressure maximum: - points up to and including the max are folded into the preceding dive, - points after into the following climb. - """ + """Split short underwater unknown gaps between a dive and climb at the + pressure max: pre-max → dive, post-max → climb. Skips gaps that go + shallow (left for the surface classifier) or are too long.""" n = len(state) unknown = state == -1 if not unknown.any(): return - diff = np.diff(unknown.astype(int), prepend=0, append=0) - starts = np.where(diff == 1)[0] - ends = np.where(diff == -1)[0] + starts, ends = np.where(diff == 1)[0], np.where(diff == -1)[0] max_samples = max(2, round(max_gap_duration / dt_s)) for s, e in zip(starts, ends): - if (e - s) > max_samples: - continue - if s == 0 or e >= n: + if (e - s) > max_samples or s == 0 or e >= n: continue if state[s - 1] != 1 or state[e] != 2: continue @@ -135,14 +107,10 @@ def _absorb_apex_unknowns( finite = np.isfinite(p) if not finite.any() or float(np.min(p[finite])) < surface_pressure: continue - - local_max = int(np.argmax(np.where(finite, p, -np.inf))) - split = s + local_max + 1 - + split = s + int(np.argmax(np.where(finite, p, -np.inf))) + 1 state[s:split] = 1 dive_id[s:split] = dive_id[s - 1] profile_id[s:split] = profile_id[s - 1] - state[split:e] = 2 climb_id[split:e] = climb_id[e] profile_id[split:e] = profile_id[e] @@ -158,28 +126,19 @@ def _absorb_post_drift_transients( max_transient_duration: float, ) -> None: """Fold brief dive/unknown segments between a drift end and the next climb - into that climb. - - Masking the drift period leaves a discontinuous pressure signal across the - gap, which occasionally causes profinder to detect a spurious tiny peak - just after the drift in addition to the real climb. This pass extends the - climb backward to the drift end if the only intervening states are short - dive (1) or unknown (-1) segments. Anything longer than - max_transient_duration past the drift end is left alone. - """ + into that climb. The drift mask gap can cause profinder to detect a + spurious tiny peak; this extends the climb backward to absorb it.""" n = len(state) if not drift_mask.any(): return - - drift_diff = np.diff(drift_mask.astype(int), prepend=0, append=0) - drift_ends = np.where(drift_diff == -1)[0] + diff = np.diff(drift_mask.astype(int), prepend=0, append=0) + drift_ends = np.where(diff == -1)[0] window = max(2, round(max_transient_duration / dt_s)) for de in drift_ends: if de >= n: continue - end = min(de + window, n) - in_window = state[de:end] + in_window = state[de : min(de + window, n)] climb_local = np.where(in_window == 2)[0] if len(climb_local) == 0: continue @@ -189,11 +148,9 @@ def _absorb_post_drift_transients( intermediate = state[de:first_climb] if not np.all((intermediate == 1) | (intermediate == -1)): continue - cid = climb_id[first_climb] - pid = profile_id[first_climb] state[de:first_climb] = 2 - climb_id[de:first_climb] = cid - profile_id[de:first_climb] = pid + climb_id[de:first_climb] = climb_id[first_climb] + profile_id[de:first_climb] = profile_id[first_climb] dive_id[de:first_climb] = -1 @@ -214,30 +171,28 @@ def get_profiles( ds : xr.Dataset Merged L2 dataset with 'pressure' and 'time' variables. shallowest_profile : float - Minimum peak pressure (dbar) for a profile to be recognised. + Minimum peak pressure (dbar) for a profile to be recognised. Also + used as the inter-profile trough prominence threshold so small stalls + within a climb are not mistaken for surfacings. min_surface_time : float - Minimum time (seconds) between consecutive dive apexes. Used to - compute sample-count thresholds so the detector adapts to the data - sampling rate. A value of ~180 s works for most Slocum deployments. + Minimum time (seconds) between consecutive dive apexes (default 180). flt : xr.Dataset, optional - Raw flight data. Used to detect drift-at-depth via BAW_HOVER_ACTIVE - when present; otherwise pressure-based detection is used. + Raw flight data. Used to detect drift via x_hover_active when + present; otherwise the rolling-variance fallback runs. min_drift_duration : float - Minimum duration (seconds) for a constant-pressure period to be - classified as drift (default 600 s). + Minimum sustained-low-variance duration (s) for the pressure-based + drift detector (default 600 s). drift_pressure_std : float - Rolling pressure standard deviation threshold (dbar) for pressure-based - drift detection (default 2.0 dbar). + Rolling pressure std threshold (dbar) for the pressure-based drift + detector (default 2.0). stall_tolerance : float - Maximum time (seconds) the glider can stall or briefly reverse during - a dive or climb without profinder terminating the profile (default - 180 s = 3 min, sized to handle realistic glider stalls during - ascent). Converted to profinder's per-sample `run_length` argument. + Max stall/reversal duration (s) tolerated within a profile before + profinder terminates it (default 180 s). Sampling-rate independent + — converted to profinder's per-sample `run_length`. min_pressure_rate : float - Minimum pressure change rate (dbar/s) for a sample to count toward - ascent or descent classification (default 0.05 dbar/s ≈ 5 cm/s, well - below normal glider vertical speed of 10–20 cm/s). Converted to - profinder's per-sample `min_pressure_change` argument. + Min vertical speed (dbar/s) for a sample to count toward + ascent/descent (default 0.05 ≈ 5 cm/s, well below normal glider + speeds of 10–20 cm/s). Converted to per-sample `min_pressure_change`. """ raw_diff = np.diff(ds.time.values) if np.issubdtype(raw_diff.dtype, np.timedelta64): @@ -255,9 +210,8 @@ def get_profiles( pressure_std_threshold=drift_pressure_std, ) - # Mask drift before profile finding so profinder sees a gap across the - # drift period; with missing="drop" it will pair the real climb after - # drift with the correct dive. + # Mask drift before profile finding so profinder pairs the post-drift + # climb with the pre-drift dive across the gap (missing="drop"). pressure_masked = ds.pressure.values.copy().astype(float) pressure_masked[drift_mask] = np.nan @@ -268,37 +222,23 @@ def get_profiles( "width": max(2, round(20 * fs)), # ≥20 s half-width detects ~10 m dives } troughs_kwargs = { - # Real inter-profile troughs (surfacings or inter-yo apexes) have a - # depth contrast at least as large as a real dive's prominence; a - # smaller "trough" is a stall or noise dip within a profile. + # Real inter-profile troughs are at least as deep as a real dive's + # prominence; smaller "troughs" are stalls within a profile. "prominence": shallowest_profile, "distance": max(2, round(30 * fs)), "width": max(1, round(5 * fs)), } - _log.debug( - "fs=%.3f Hz; peaks_kwargs=%s; troughs_kwargs=%s", - fs, - peaks_kwargs, - troughs_kwargs, - ) - - # Scale stall tolerance and pressure-rate threshold to per-sample units. - # These guard the climb/dive run from being terminated by brief stalls - # (e.g. glider hangs at depth and drifts back down a few dbar before - # resuming the ascent). - run_length = max(2, round(stall_tolerance / dt_s)) - min_pressure_change = float(min_pressure_rate * dt_s) + _log.debug("fs=%.3f Hz; peaks=%s; troughs=%s", fs, peaks_kwargs, troughs_kwargs) profiles = find_profiles( pressure_masked, peaks_kwargs=peaks_kwargs, troughs_kwargs=troughs_kwargs, - run_length=run_length, - min_pressure_change=min_pressure_change, + run_length=max(2, round(stall_tolerance / dt_s)), + min_pressure_change=float(min_pressure_rate * dt_s), missing="drop", ) - _log.debug("Found %i profiles", len(profiles)) n = ds.time.size @@ -307,26 +247,17 @@ def get_profiles( profile_id = np.full(n, -1, dtype="i4") state = np.full(n, -1, dtype="b") - dive_counter = 1 - climb_counter = 1 - profile_counter = 1 - - for prof in profiles: - dive_start, dive_end, climb_start, climb_end = prof - - dive_id[dive_start:dive_end] = dive_counter - profile_id[dive_start:dive_end] = profile_counter - state[dive_start:dive_end] = 1 - dive_counter += 1 - profile_counter += 1 - - climb_id[climb_start:climb_end] = climb_counter - profile_id[climb_start:climb_end] = profile_counter - state[climb_start:climb_end] = 2 - climb_counter += 1 - profile_counter += 1 - - # Override drift points, clearing any profile membership assigned above + pid = 1 + for k, (ds_, de, cs, ce) in enumerate(profiles, start=1): + dive_id[ds_:de] = k + profile_id[ds_:de] = pid + state[ds_:de] = 1 + climb_id[cs:ce] = k + profile_id[cs:ce] = pid + 1 + state[cs:ce] = 2 + pid += 2 + + # Drift overrides any profile membership profinder may have spanned across state[drift_mask] = 3 dive_id[drift_mask] = -1 climb_id[drift_mask] = -1 @@ -335,7 +266,6 @@ def get_profiles( _absorb_post_drift_transients( state, dive_id, climb_id, profile_id, drift_mask, dt_s, min_surface_time ) - _absorb_apex_unknowns( state, dive_id, @@ -360,7 +290,6 @@ def get_profiles( valid_min=np.int8(-1), ), ) - return ds @@ -372,32 +301,12 @@ def assign_surface_state( ) -> xr.Dataset: """Assign surface state (0) to unknown points. - Two checks are applied in sequence: - - 1. A point with state == -1 (unknown) is marked as surface if it is within - `dt` seconds of a valid GPS fix AND shallower than `surface_pressure` - dbar. The pressure gate prevents the broad GPS time window from - reaching into adjacent dives. - 2. Any remaining contiguous unknown segment is marked as surface unless - its pressure record contains a finite value at or above - `surface_pressure` (i.e. evidence the glider was deeper). Missing - pressure is the expected state at the surface — the science sensor - is often out of water — so all-NaN unknown segments are classified - as surface. This covers the period before the first dive, after the - last climb, and any between-profile gap. - - Parameters - ---------- - ds : xr.Dataset - Dataset with state and pressure variables from get_profiles. - flt : xr.Dataset, optional - Raw flight data containing m_gps_lat/lon with valid GPS fix times. - If absent, only the pressure-based check runs. - dt : float - Time threshold in seconds for matching to GPS fixes (default 300). - surface_pressure : float - Maximum pressure (dbar) for a point to be considered at the surface - (default 2.0). + Two passes: (1) GPS-proximity — unknowns within `dt` seconds of a valid + GPS fix AND shallower than `surface_pressure`; (2) whole-segment — any + contiguous unknown run with no finite pressure ≥ `surface_pressure`. + Missing pressure is the expected state at the surface (sensor out of + water), so all-NaN runs are treated as surface — except when bounded by + dive→climb (the underwater apex), which is left unknown. """ state = ds.state.values.copy() pressure = ds.pressure.values @@ -407,54 +316,37 @@ def assign_surface_state( gps_valid = np.isfinite(flt.m_gps_lat.values) if gps_valid.any(): gps_times = np.sort(flt.m_present_time.values[gps_valid]) - unknown_mask = state == -1 - if unknown_mask.any(): - unknown_times = time_l2[unknown_mask] - pos = np.searchsorted(gps_times, unknown_times) - dist_left = np.where( - pos > 0, - unknown_times - gps_times[np.maximum(pos - 1, 0)], - np.inf, - ) - dist_right = np.where( + unk = state == -1 + if unk.any(): + ut = time_l2[unk] + pos = np.searchsorted(gps_times, ut) + left = np.where(pos > 0, ut - gps_times[np.maximum(pos - 1, 0)], np.inf) + right = np.where( pos < len(gps_times), - gps_times[np.minimum(pos, len(gps_times) - 1)] - unknown_times, + gps_times[np.minimum(pos, len(gps_times) - 1)] - ut, np.inf, ) - is_near = np.minimum(dist_left, dist_right) <= dt - is_shallow = np.isfinite(pressure[unknown_mask]) & ( - pressure[unknown_mask] < surface_pressure - ) - state[unknown_mask] = np.where( - is_near & is_shallow, np.int8(0), state[unknown_mask] - ) - _log.debug( - "GPS-proximity surface check: %d points assigned", - int((is_near & is_shallow).sum()), + near = np.minimum(left, right) <= dt + shallow = np.isfinite(pressure[unk]) & ( + pressure[unk] < surface_pressure ) + state[unk] = np.where(near & shallow, np.int8(0), state[unk]) else: _log.warning("No valid GPS fixes found") else: _log.warning("No flight data with GPS, skipping GPS-proximity surface check") - # Whole-segment pressure check: any contiguous unknown run that never - # exceeds surface_pressure is the glider sitting at the surface unknown = state == -1 if unknown.any(): n = len(state) diff = np.diff(unknown.astype(int), prepend=0, append=0) - starts = np.where(diff == 1)[0] - ends = np.where(diff == -1)[0] - n_assigned = 0 + starts, ends = np.where(diff == 1)[0], np.where(diff == -1)[0] for s, e in zip(starts, ends): p = pressure[s:e] finite = np.isfinite(p) - # Skip if there is finite evidence the glider was underwater. if finite.any() and float(np.max(p[finite])) >= surface_pressure: continue - # An all-NaN gap bounded by dive→climb is the underwater apex - # that the apex absorber couldn't split because pressure was - # missing. Leave it unknown rather than mislabel as surface. + # All-NaN gap bounded by dive→climb is the underwater apex; leave it if ( not finite.any() and 0 < s @@ -464,8 +356,6 @@ def assign_surface_state( ): continue state[s:e] = 0 - n_assigned += int(e - s) - _log.debug("Whole-segment surface check: %d points assigned", n_assigned) ds["state"] = ("time", state, ds.state.attrs) return ds diff --git a/tests/test_process_l1.py b/tests/test_process_l1.py index c3fed7d..5db41cc 100644 --- a/tests/test_process_l1.py +++ b/tests/test_process_l1.py @@ -1020,360 +1020,202 @@ def test_emit_ioos_profiles_yo_yo_shares_time_uv(tmp_path) -> None: def _make_drift_at_depth_dataset(dt_s: float) -> tuple[xr.Dataset, float, float, float]: - """Synthetic glider dataset with one dive-overshoot-drift-climb cycle. - - Returns (ds, t_dive_start, t_drift_start, t_climb_start) in seconds. - - Pressure trace: - - Surface at ~0.5 dbar for 120 s - - Dive to 105 dbar (target 100, overshoot by 5 dbar) over 1500 s - - Overshoot correction: 105→100 dbar over 120 s ← wrongly identified as - the climb before this fix - - Drift at 100 ± 2 dbar (sinusoidal wobble, period 300 s) for 3600 s - - Climb from 100 to surface over 1500 s - - Surface at ~0.5 dbar for 120 s - """ - t_surface = 120.0 - t_dive = 1500.0 - t_overshoot = 120.0 - t_drift = 3600.0 - t_climb = 1500.0 - + """Synthetic surface→dive(105 overshoot)→correction→drift(100±2)→climb→surface + cycle. Returns (ds, t_dive_start, t_drift_start, t_climb_start) in seconds.""" + t_surface, t_dive, t_overshoot, t_drift, t_climb = ( + 120.0, + 1500.0, + 120.0, + 3600.0, + 1500.0, + ) t0_dive = t_surface t0_overshoot = t0_dive + t_dive t0_drift = t0_overshoot + t_overshoot t0_climb = t0_drift + t_drift - t_total = t0_climb + t_climb + t_surface - - t = np.arange(0.0, t_total, dt_s) + t = np.arange(0.0, t0_climb + t_climb + t_surface, dt_s) p = np.full(len(t), 0.5) + m = (t >= t0_dive) & (t < t0_overshoot) + p[m] = 105.0 * (t[m] - t0_dive) / t_dive + m = (t >= t0_overshoot) & (t < t0_drift) + p[m] = 105.0 - 5.0 * (t[m] - t0_overshoot) / t_overshoot + m = (t >= t0_drift) & (t < t0_climb) + p[m] = 100.0 + 2.0 * np.sin(2.0 * np.pi * (t[m] - t0_drift) / 300.0) + m = (t >= t0_climb) & (t < t0_climb + t_climb) + p[m] = 100.0 * (1.0 - (t[m] - t0_climb) / t_climb) + return ( + xr.Dataset({"pressure": ("time", p)}, coords={"time": t}), + t0_dive, + t0_drift, + t0_climb, + ) - mask = (t >= t0_dive) & (t < t0_overshoot) - p[mask] = 105.0 * (t[mask] - t0_dive) / t_dive - - mask = (t >= t0_overshoot) & (t < t0_drift) - p[mask] = 105.0 - 5.0 * (t[mask] - t0_overshoot) / t_overshoot - - mask = (t >= t0_drift) & (t < t0_climb) - p[mask] = 100.0 + 2.0 * np.sin(2.0 * np.pi * (t[mask] - t0_drift) / 300.0) - - mask = (t >= t0_climb) & (t < t0_climb + t_climb) - p[mask] = 100.0 * (1.0 - (t[mask] - t0_climb) / t_climb) - ds = xr.Dataset({"pressure": ("time", p)}, coords={"time": t}) - return ds, t0_dive, t0_drift, t0_climb +def _make_state_arrays(n: int) -> tuple[np.ndarray, ...]: + """Allocate (state, dive_id, climb_id, profile_id) all initialised to -1.""" + return ( + np.full(n, -1, dtype="b"), + np.full(n, -1, dtype="i4"), + np.full(n, -1, dtype="i4"), + np.full(n, -1, dtype="i4"), + ) -@pytest.mark.parametrize( - "dt_s", - [1.0, 15.0], - ids=["post_recovery_1s", "realtime_15s"], -) +@pytest.mark.parametrize("dt_s", [1.0, 15.0], ids=["post_recovery_1s", "realtime_15s"]) def test_get_profiles_drift_at_depth(dt_s: float) -> None: - """Drift-at-depth is detected and labelled state=3; the real climb after - drift must span the full water column rather than being cut off. - - Without drift masking, the overshoot correction (105→100 dbar before the - drift plateau) would be mis-identified as the climb portion, leaving the - actual ascent unlabelled. - """ - ds, _t_dive, t_drift_start, t_climb_start = _make_drift_at_depth_dataset(dt_s) - + """Drift labelled state=3; post-drift climb spans the full water column + rather than being mis-attributed to the brief overshoot correction.""" + ds, _, t_drift, t_climb = _make_drift_at_depth_dataset(dt_s) result = prof.get_profiles(ds, shallowest_profile=5.0, min_surface_time=180.0) + state, time = result.state.values, ds.time.values - state = result.state.values - time = ds.time.values - - # Most of the drift plateau must be labelled state=3 - drift_state = state[(time >= t_drift_start) & (time < t_climb_start)] - assert (drift_state == 3).mean() > 0.7, ( - f"Only {(drift_state == 3).mean():.1%} of drift period is state=3 at dt_s={dt_s}" - ) - - # Most of the real climb after drift must be labelled state=2 - climb_state = state[time >= t_climb_start] - assert (climb_state == 2).mean() > 0.7, ( - f"Only {(climb_state == 2).mean():.1%} of real climb is state=2 at dt_s={dt_s}" - ) - - # The climb profile must span the full water column - climb_pressures = ds.pressure.values[state == 2] - assert climb_pressures.max() > 90.0, ( - "Climb profile must include deep data near 100 dbar" - ) - assert climb_pressures.min() < 5.0, "Climb profile must reach the near-surface" + drift_state = state[(time >= t_drift) & (time < t_climb)] + assert (drift_state == 3).mean() > 0.7 + assert (state[time >= t_climb] == 2).mean() > 0.7 - # state=3 (drift) must appear in the CF flag_values + climb_p = ds.pressure.values[state == 2] + assert climb_p.max() > 90.0 and climb_p.min() < 5.0 assert 3 in result.state.attrs["flag_values"] -@pytest.mark.parametrize( - "dt_s", - [1.0, 15.0], - ids=["post_recovery_1s", "realtime_15s"], -) +@pytest.mark.parametrize("dt_s", [1.0, 15.0], ids=["post_recovery_1s", "realtime_15s"]) def test_get_profiles_climb_with_stall(dt_s: float) -> None: - """A brief stall during the climb (glider hangs at ~20 dbar and drifts a - few dbar back down before resuming the ascent) must not split the climb - in two. The stall_tolerance default rides over the reversal at both - sampling rates because it is specified in seconds, not samples.""" - t_surface = 60.0 - t_dive = 800.0 - t_climb_pre_stall = 600.0 # 100 → 20 dbar - t_stall = 120.0 # hangs at 20–22 dbar for 2 min (within 3 min tolerance) - t_climb_post_stall = 400.0 # 22 → 0 dbar - t_total = ( - t_surface + t_dive + t_climb_pre_stall + t_stall + t_climb_post_stall + 60.0 - ) - - t = np.arange(0.0, t_total, dt_s) + """A 2-minute stall (within the 3-minute tolerance) within the climb must + not split the climb at either sampling rate.""" + t_surf, t_dive, t_pre, t_stall, t_post = 60.0, 800.0, 600.0, 120.0, 400.0 + t0_dive = t_surf + t0_pre = t0_dive + t_dive + t0_stall = t0_pre + t_pre + t0_post = t0_stall + t_stall + t0_end = t0_post + t_post + t = np.arange(0.0, t0_end + t_surf, dt_s) p = np.full(len(t), 0.5) - - t0_dive = t_surface - t0_climb_pre = t0_dive + t_dive - t0_stall = t0_climb_pre + t_climb_pre_stall - t0_climb_post = t0_stall + t_stall - t0_end = t0_climb_post + t_climb_post_stall - - mask = (t >= t0_dive) & (t < t0_climb_pre) - p[mask] = 100.0 * (t[mask] - t0_dive) / t_dive - - mask = (t >= t0_climb_pre) & (t < t0_stall) - p[mask] = 100.0 - 80.0 * (t[mask] - t0_climb_pre) / t_climb_pre_stall - - # Stall: drift slowly back down from 20 to 22 dbar over 2 min - mask = (t >= t0_stall) & (t < t0_climb_post) - p[mask] = 20.0 + 2.0 * (t[mask] - t0_stall) / t_stall - - mask = (t >= t0_climb_post) & (t < t0_end) - p[mask] = 22.0 * (1.0 - (t[mask] - t0_climb_post) / t_climb_post_stall) - - # Leading NaN sidesteps a profinder edge case where valid_idx is undefined - # for NaN-free input. - p[0] = np.nan - + m = (t >= t0_dive) & (t < t0_pre) + p[m] = 100.0 * (t[m] - t0_dive) / t_dive + m = (t >= t0_pre) & (t < t0_stall) + p[m] = 100.0 - 80.0 * (t[m] - t0_pre) / t_pre + m = (t >= t0_stall) & (t < t0_post) # stall: 20 → 22 dbar over 2 min + p[m] = 20.0 + 2.0 * (t[m] - t0_stall) / t_stall + m = (t >= t0_post) & (t < t0_end) + p[m] = 22.0 * (1.0 - (t[m] - t0_post) / t_post) + p[0] = np.nan # profinder needs NaN to avoid valid_idx edge case ds = xr.Dataset({"pressure": ("time", p)}, coords={"time": t}) result = prof.get_profiles(ds, shallowest_profile=5.0, min_surface_time=180.0) - - state = result.state.values - # The post-stall climb must be classified as climb, not unknown - post_stall_state = state[(t >= t0_climb_post) & (t < t0_end)] - assert (post_stall_state == 2).mean() > 0.7, ( - f"Post-stall climb only {(post_stall_state == 2).mean():.1%} state=2 at dt_s={dt_s}" - ) - # The whole climb (pre-stall + stall + post-stall) is one climb_id - climb_ids = np.unique(result.climb_id.values[result.climb_id.values >= 0]) - assert len(climb_ids) == 1, f"Expected 1 climb, got {len(climb_ids)} at dt_s={dt_s}" + assert (result.state.values[(t >= t0_post) & (t < t0_end)] == 2).mean() > 0.7 + cids = np.unique(result.climb_id.values[result.climb_id.values >= 0]) + assert len(cids) == 1 def test_get_profiles_drift_x_hover_active() -> None: - """x_hover_active in raw flight data takes precedence over pressure-based - drift detection and marks the exact drift period as state=3. - - The Slocum flight computer reports x_hover_active only on transitions - (1 when a hover starts, 0 when it ends, -127 missing otherwise); the - detector must forward-fill the most recent {0,1} report. - """ - dt_s = 15.0 - ds, _t_dive, t_drift_start, t_climb_start = _make_drift_at_depth_dataset(dt_s) - + """x_hover_active takes precedence over pressure-based detection. Sparse + transition reports must be forward-filled and -127 sentinel filtered.""" + ds, _, t_drift, t_climb = _make_drift_at_depth_dataset(15.0) time = ds.time.values - - # Sparse flight reports: 0 at start, 1 when hover begins, 0 when it ends. - # Mix in a -127 sentinel to confirm it is filtered out. - flt_time = np.array([0.0, 60.0, t_drift_start, t_climb_start]) - hover_active = np.array([0.0, -127.0, 1.0, 0.0]) - flt = xr.Dataset( { - "m_present_time": ("i", flt_time), - "x_hover_active": ("i", hover_active), + "m_present_time": ("i", np.array([0.0, 60.0, t_drift, t_climb])), + "x_hover_active": ("i", np.array([0.0, -127.0, 1.0, 0.0])), } ) - result = prof.get_profiles( ds, shallowest_profile=5.0, min_surface_time=180.0, flt=flt ) - state = result.state.values - drift_mask = (time >= t_drift_start) & (time < t_climb_start) - - # BAW_HOVER_ACTIVE covers the drift period exactly, so detection should be - # almost complete (within one 4 s flight sample of each boundary) - assert (state[drift_mask] == 3).mean() > 0.9, ( - "BAW_HOVER_ACTIVE should detect drift with >90% coverage" - ) - - # Real climb after drift must still be captured - climb_state = state[time >= t_climb_start] - assert (climb_state == 2).mean() > 0.7 + assert (state[(time >= t_drift) & (time < t_climb)] == 3).mean() > 0.9 + assert (state[time >= t_climb] == 2).mean() > 0.7 def test_absorb_post_drift_transients() -> None: - """Brief dive (state=1) and unknown (state=-1) segments wedged between - a drift period and the real climb are absorbed into the climb.""" - n = 50 - state = np.full(n, -1, dtype="b") - drift_mask = np.zeros(n, dtype=bool) - dive_id = np.full(n, -1, dtype="i4") - climb_id = np.full(n, -1, dtype="i4") - profile_id = np.full(n, -1, dtype="i4") - - state[5:15] = 1 # main dive + """Brief dive/unknown segments between drift and climb are folded into climb.""" + state, dive_id, climb_id, profile_id = _make_state_arrays(50) + drift_mask = np.zeros(50, dtype=bool) + state[5:15] = 1 dive_id[5:15] = 1 profile_id[5:15] = 1 - - state[15:30] = 3 # drift + state[15:30] = 3 drift_mask[15:30] = True - - state[30:32] = 1 # spurious brief dive (transient) + state[30:32] = 1 dive_id[30:32] = 2 - profile_id[30:32] = 2 - + profile_id[30:32] = 2 # spurious dive # state[32:34] left as -1 (unknown gap) - - state[34:48] = 2 # real climb + state[34:48] = 2 climb_id[34:48] = 1 profile_id[34:48] = 3 prof._absorb_post_drift_transients( - state, - dive_id, - climb_id, - profile_id, - drift_mask, - dt_s=10.0, - max_transient_duration=180.0, + state, dive_id, climb_id, profile_id, drift_mask, 10.0, 180.0 ) - - # The transient between drift and climb is now part of the climb assert np.all(state[30:34] == 2) assert np.all(climb_id[30:34] == 1) assert np.all(profile_id[30:34] == 3) assert np.all(dive_id[30:34] == -1) - - # Drift, main dive, and real climb are unchanged - assert np.all(state[5:15] == 1) - assert np.all(state[15:30] == 3) - assert np.all(state[34:48] == 2) + assert ( + np.all(state[5:15] == 1) + and np.all(state[15:30] == 3) + and np.all(state[34:48] == 2) + ) def test_absorb_apex_unknowns_splits_at_max_pressure() -> None: - """A short unknown gap at the underwater apex between a dive and a climb - is split at the pressure maximum: pre-max points become dive, post-max - points become climb.""" - n = 30 - state = np.full(n, -1, dtype="b") - dive_id = np.full(n, -1, dtype="i4") - climb_id = np.full(n, -1, dtype="i4") - profile_id = np.full(n, -1, dtype="i4") - pressure = np.zeros(n) - + """Short unknown gap between dive and climb is split at the pressure max.""" + state, dive_id, climb_id, profile_id = _make_state_arrays(30) + pressure = np.zeros(30) state[5:13] = 1 dive_id[5:13] = 1 profile_id[5:13] = 1 pressure[5:13] = np.linspace(10, 80, 8) - - # Indices 13–17 unknown, with peak (90 dbar) at index 15 - pressure[13:17] = [85, 88, 90, 87] - + pressure[13:17] = [85, 88, 90, 87] # apex at index 15 state[17:25] = 2 climb_id[17:25] = 1 profile_id[17:25] = 2 pressure[17:25] = np.linspace(85, 5, 8) prof._absorb_apex_unknowns( - state, - dive_id, - climb_id, - profile_id, - pressure, - dt_s=10.0, - max_gap_duration=180.0, - surface_pressure=2.0, + state, dive_id, climb_id, profile_id, pressure, 10.0, 180.0, 2.0 ) - - # Up to and including the max → dive - assert np.all(state[13:16] == 1) - assert np.all(dive_id[13:16] == 1) - assert np.all(profile_id[13:16] == 1) - # After the max → climb - assert np.all(state[16:17] == 2) - assert np.all(climb_id[16:17] == 1) - assert np.all(profile_id[16:17] == 2) + assert ( + np.all(state[13:16] == 1) + and np.all(dive_id[13:16] == 1) + and np.all(profile_id[13:16] == 1) + ) + assert state[16] == 2 and climb_id[16] == 1 and profile_id[16] == 2 def test_absorb_apex_unknowns_skips_surface_gap() -> None: - """An unknown gap that goes shallow (glider surfaced) is NOT absorbed; it - must be reachable by the surface classifier instead.""" - n = 30 - state = np.full(n, -1, dtype="b") - dive_id = np.full(n, -1, dtype="i4") - climb_id = np.full(n, -1, dtype="i4") - profile_id = np.full(n, -1, dtype="i4") - pressure = np.zeros(n) - + """A gap that goes shallow (glider surfaced) is left for the surface classifier.""" + state, dive_id, climb_id, profile_id = _make_state_arrays(30) + pressure = np.zeros(30) state[5:10] = 1 pressure[5:10] = np.linspace(10, 80, 5) state[10:13] = 2 - pressure[10:13] = np.linspace(80, 1, 3) # climb back to surface - - # Unknown gap at the surface (pressure < surface_pressure) - pressure[13:17] = [0.3, 0.5, 0.4, 0.6] - - state[17:22] = 1 # next dive + pressure[10:13] = np.linspace(80, 1, 3) + pressure[13:17] = [0.3, 0.5, 0.4, 0.6] # shallow unknown gap + state[17:22] = 1 pressure[17:22] = np.linspace(1, 60, 5) - - state_before = state.copy() + before = state.copy() prof._absorb_apex_unknowns( - state, - dive_id, - climb_id, - profile_id, - pressure, - dt_s=10.0, - max_gap_duration=180.0, - surface_pressure=2.0, + state, dive_id, climb_id, profile_id, pressure, 10.0, 180.0, 2.0 ) - - # The unknown gap was at the surface, so it stays unknown for assign_surface_state - assert np.array_equal(state, state_before) + assert np.array_equal(state, before) def test_absorb_post_drift_transients_does_not_swallow_real_dive() -> None: - """A long dive after a drift is not absorbed (only short transients are).""" - n = 60 - state = np.full(n, -1, dtype="b") - drift_mask = np.zeros(n, dtype=bool) - dive_id = np.full(n, -1, dtype="i4") - climb_id = np.full(n, -1, dtype="i4") - profile_id = np.full(n, -1, dtype="i4") - - state[5:15] = 3 # drift + """A long dive (>max_transient_duration) between drift and climb is not absorbed.""" + state, dive_id, climb_id, profile_id = _make_state_arrays(60) + drift_mask = np.zeros(60, dtype=bool) + state[5:15] = 3 drift_mask[5:15] = True - - # A long dive (20 samples = 200 s, longer than max_transient_duration of 180 s) state[15:35] = 1 dive_id[15:35] = 1 - profile_id[15:35] = 1 - - state[40:55] = 2 # climb (further out) + profile_id[15:35] = 1 # 200 s > 180 s + state[40:55] = 2 climb_id[40:55] = 1 profile_id[40:55] = 2 - - state_before = state.copy() + before = state.copy() prof._absorb_post_drift_transients( - state, - dive_id, - climb_id, - profile_id, - drift_mask, - dt_s=10.0, - max_transient_duration=180.0, + state, dive_id, climb_id, profile_id, drift_mask, 10.0, 180.0 ) - - # Nothing should change because the dive between drift and climb is too long - assert np.array_equal(state, state_before) + assert np.array_equal(state, before) From a6aa3348726eddd9b329d3dedc9d5c6cc49e5dcb Mon Sep 17 00:00:00 2001 From: Jesse Cusack Date: Thu, 7 May 2026 22:37:58 -0700 Subject: [PATCH 04/10] refine more to deal with stalls --- src/glide/profiles.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/glide/profiles.py b/src/glide/profiles.py index fca2be7..4b3fc55 100644 --- a/src/glide/profiles.py +++ b/src/glide/profiles.py @@ -161,8 +161,6 @@ def get_profiles( flt: xr.Dataset | None = None, min_drift_duration: float = 600.0, drift_pressure_std: float = 2.0, - stall_tolerance: float = 180.0, - min_pressure_rate: float = 0.05, ) -> xr.Dataset: """Identify dive, climb, and drift profiles from a pressure time series. @@ -185,14 +183,6 @@ def get_profiles( drift_pressure_std : float Rolling pressure std threshold (dbar) for the pressure-based drift detector (default 2.0). - stall_tolerance : float - Max stall/reversal duration (s) tolerated within a profile before - profinder terminates it (default 180 s). Sampling-rate independent - — converted to profinder's per-sample `run_length`. - min_pressure_rate : float - Min vertical speed (dbar/s) for a sample to count toward - ascent/descent (default 0.05 ≈ 5 cm/s, well below normal glider - speeds of 10–20 cm/s). Converted to per-sample `min_pressure_change`. """ raw_diff = np.diff(ds.time.values) if np.issubdtype(raw_diff.dtype, np.timedelta64): @@ -235,8 +225,6 @@ def get_profiles( pressure_masked, peaks_kwargs=peaks_kwargs, troughs_kwargs=troughs_kwargs, - run_length=max(2, round(stall_tolerance / dt_s)), - min_pressure_change=float(min_pressure_rate * dt_s), missing="drop", ) _log.debug("Found %i profiles", len(profiles)) From 7d86ab0f1df262cc6e4e21427977c47871d304cd Mon Sep 17 00:00:00 2001 From: Jesse Cusack Date: Thu, 7 May 2026 22:55:41 -0700 Subject: [PATCH 05/10] reduce run length even more by defaul --- src/glide/profiles.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/glide/profiles.py b/src/glide/profiles.py index 4b3fc55..47d2c14 100644 --- a/src/glide/profiles.py +++ b/src/glide/profiles.py @@ -225,6 +225,11 @@ def get_profiles( pressure_masked, peaks_kwargs=peaks_kwargs, troughs_kwargs=troughs_kwargs, + # Real glider pressure has per-second sensor bobbing (positive Δp every + # ~5 samples) that breaks profinder's default 4-sample-all-ascent + # confirmation, terminating the climb tens of dbar above the surface. + # 2 samples is loose enough to find ascent windows through the bobbing. + run_length=2, missing="drop", ) _log.debug("Found %i profiles", len(profiles)) From b42d93bc096caa131cba96ede86e8940080378c8 Mon Sep 17 00:00:00 2001 From: Jesse Cusack Date: Thu, 7 May 2026 23:30:29 -0700 Subject: [PATCH 06/10] more minor edits to refine profile finding --- src/glide/profiles.py | 33 +++++++++++++++++++++------------ tests/test_process_l1.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 12 deletions(-) diff --git a/src/glide/profiles.py b/src/glide/profiles.py index 47d2c14..0d8c7fa 100644 --- a/src/glide/profiles.py +++ b/src/glide/profiles.py @@ -87,9 +87,10 @@ def _absorb_apex_unknowns( max_gap_duration: float, surface_pressure: float = 2.0, ) -> None: - """Split short underwater unknown gaps between a dive and climb at the - pressure max: pre-max → dive, post-max → climb. Skips gaps that go - shallow (left for the surface classifier) or are too long.""" + """Split short underwater unknown gaps between adjacent profiles at the + pressure extremum. Handles dive→climb (split at max: pre→dive, post→climb) + and climb→dive yo-tops (split at min: pre→climb, post→dive). Skips gaps + that go shallow (left for the surface classifier) or are too long.""" n = len(state) unknown = state == -1 if not unknown.any(): @@ -101,19 +102,27 @@ def _absorb_apex_unknowns( for s, e in zip(starts, ends): if (e - s) > max_samples or s == 0 or e >= n: continue - if state[s - 1] != 1 or state[e] != 2: - continue + prev, nxt = state[s - 1], state[e] p = pressure[s:e] finite = np.isfinite(p) if not finite.any() or float(np.min(p[finite])) < surface_pressure: continue - split = s + int(np.argmax(np.where(finite, p, -np.inf))) + 1 - state[s:split] = 1 - dive_id[s:split] = dive_id[s - 1] - profile_id[s:split] = profile_id[s - 1] - state[split:e] = 2 - climb_id[split:e] = climb_id[e] - profile_id[split:e] = profile_id[e] + if prev == 1 and nxt == 2: + split = s + int(np.argmax(np.where(finite, p, -np.inf))) + 1 + state[s:split] = 1 + dive_id[s:split] = dive_id[s - 1] + profile_id[s:split] = profile_id[s - 1] + state[split:e] = 2 + climb_id[split:e] = climb_id[e] + profile_id[split:e] = profile_id[e] + elif prev == 2 and nxt == 1: + split = s + int(np.argmin(np.where(finite, p, np.inf))) + 1 + state[s:split] = 2 + climb_id[s:split] = climb_id[s - 1] + profile_id[s:split] = profile_id[s - 1] + state[split:e] = 1 + dive_id[split:e] = dive_id[e] + profile_id[split:e] = profile_id[e] def _absorb_post_drift_transients( diff --git a/tests/test_process_l1.py b/tests/test_process_l1.py index 5db41cc..c8e03f7 100644 --- a/tests/test_process_l1.py +++ b/tests/test_process_l1.py @@ -1182,6 +1182,38 @@ def test_absorb_apex_unknowns_splits_at_max_pressure() -> None: assert state[16] == 2 and climb_id[16] == 1 and profile_id[16] == 2 +def test_absorb_apex_unknowns_climb_to_dive_yo_top() -> None: + """Short underwater gap between climb→dive (yo-top) is split at the + pressure minimum: pre-min → climb, post-min → dive.""" + state, dive_id, climb_id, profile_id = _make_state_arrays(30) + pressure = np.zeros(30) + state[5:13] = 2 + climb_id[5:13] = 1 + profile_id[5:13] = 1 + pressure[5:13] = np.linspace(80, 50, 8) + pressure[13:17] = [48, 45, 47, 50] # yo-top min at index 14 + state[17:25] = 1 + dive_id[17:25] = 2 + profile_id[17:25] = 2 + pressure[17:25] = np.linspace(55, 200, 8) + + prof._absorb_apex_unknowns( + state, dive_id, climb_id, profile_id, pressure, 10.0, 180.0, 2.0 + ) + # Up to and including the min → climb + assert ( + np.all(state[13:15] == 2) + and np.all(climb_id[13:15] == 1) + and np.all(profile_id[13:15] == 1) + ) + # After the min → dive + assert ( + np.all(state[15:17] == 1) + and np.all(dive_id[15:17] == 2) + and np.all(profile_id[15:17] == 2) + ) + + def test_absorb_apex_unknowns_skips_surface_gap() -> None: """A gap that goes shallow (glider surfaced) is left for the surface classifier.""" state, dive_id, climb_id, profile_id = _make_state_arrays(30) From 9aee00ce40e72bb831c970c454315c2af39b78c3 Mon Sep 17 00:00:00 2001 From: Jesse Cusack Date: Thu, 7 May 2026 23:34:32 -0700 Subject: [PATCH 07/10] reduce code length --- src/glide/profiles.py | 73 +++++++++++-------------- tests/test_process_l1.py | 113 +++++++++++++++------------------------ 2 files changed, 74 insertions(+), 112 deletions(-) diff --git a/src/glide/profiles.py b/src/glide/profiles.py index 0d8c7fa..697c832 100644 --- a/src/glide/profiles.py +++ b/src/glide/profiles.py @@ -6,6 +6,14 @@ _log = logging.getLogger(__name__) +_STATE_ATTRS = dict( + long_name="Glider state", + flag_values=np.array([-1, 0, 1, 2, 3], "b"), + flag_meanings="unknown surface dive climb drift", + valid_max=np.int8(3), + valid_min=np.int8(-1), +) + def _detect_drift( pressure: np.ndarray, @@ -102,27 +110,24 @@ def _absorb_apex_unknowns( for s, e in zip(starts, ends): if (e - s) > max_samples or s == 0 or e >= n: continue - prev, nxt = state[s - 1], state[e] + prev, nxt = int(state[s - 1]), int(state[e]) + if {prev, nxt} != {1, 2}: + continue p = pressure[s:e] finite = np.isfinite(p) if not finite.any() or float(np.min(p[finite])) < surface_pressure: continue - if prev == 1 and nxt == 2: - split = s + int(np.argmax(np.where(finite, p, -np.inf))) + 1 - state[s:split] = 1 - dive_id[s:split] = dive_id[s - 1] - profile_id[s:split] = profile_id[s - 1] - state[split:e] = 2 - climb_id[split:e] = climb_id[e] - profile_id[split:e] = profile_id[e] - elif prev == 2 and nxt == 1: - split = s + int(np.argmin(np.where(finite, p, np.inf))) + 1 - state[s:split] = 2 - climb_id[s:split] = climb_id[s - 1] - profile_id[s:split] = profile_id[s - 1] - state[split:e] = 1 - dive_id[split:e] = dive_id[e] - profile_id[split:e] = profile_id[e] + # dive→climb splits at max (extremum is positive); climb→dive at min. + fill, extreme = (-np.inf, np.argmax) if prev == 1 else (np.inf, np.argmin) + split = s + int(extreme(np.where(finite, p, fill))) + 1 + pre_id = dive_id if prev == 1 else climb_id + post_id = climb_id if nxt == 2 else dive_id + state[s:split] = prev + state[split:e] = nxt + pre_id[s:split] = pre_id[s - 1] + post_id[split:e] = post_id[e] + profile_id[s:split] = profile_id[s - 1] + profile_id[split:e] = profile_id[e] def _absorb_post_drift_transients( @@ -265,33 +270,15 @@ def get_profiles( climb_id[drift_mask] = -1 profile_id[drift_mask] = -1 - _absorb_post_drift_transients( - state, dive_id, climb_id, profile_id, drift_mask, dt_s, min_surface_time - ) - _absorb_apex_unknowns( - state, - dive_id, - climb_id, - profile_id, - ds.pressure.values, - dt_s, - min_surface_time, - ) + ids = (state, dive_id, climb_id, profile_id) + _absorb_post_drift_transients(*ids, drift_mask, dt_s, min_surface_time) + _absorb_apex_unknowns(*ids, ds.pressure.values, dt_s, min_surface_time) - ds["dive_id"] = ("time", dive_id, dict(_FillValue=np.int32(-1))) - ds["climb_id"] = ("time", climb_id, dict(_FillValue=np.int32(-1))) - ds["profile_id"] = ("time", profile_id, dict(_FillValue=np.int32(-1))) - ds["state"] = ( - "time", - state, - dict( - long_name="Glider state", - flag_values=np.array([-1, 0, 1, 2, 3], "b"), - flag_meanings="unknown surface dive climb drift", - valid_max=np.int8(3), - valid_min=np.int8(-1), - ), - ) + fill = dict(_FillValue=np.int32(-1)) + ds["dive_id"] = ("time", dive_id, fill) + ds["climb_id"] = ("time", climb_id, fill) + ds["profile_id"] = ("time", profile_id, fill) + ds["state"] = ("time", state, _STATE_ATTRS) return ds diff --git a/tests/test_process_l1.py b/tests/test_process_l1.py index c8e03f7..2d06f5b 100644 --- a/tests/test_process_l1.py +++ b/tests/test_process_l1.py @@ -1061,6 +1061,14 @@ def _make_state_arrays(n: int) -> tuple[np.ndarray, ...]: ) +def _set_profile(arrs, slc, state_val, id_val, pid_val): + """Set state, the matching dive_id or climb_id, and profile_id over slc.""" + state, dive_id, climb_id, profile_id = arrs + state[slc] = state_val + (dive_id if state_val == 1 else climb_id)[slc] = id_val + profile_id[slc] = pid_val + + @pytest.mark.parametrize("dt_s", [1.0, 15.0], ids=["post_recovery_1s", "realtime_15s"]) def test_get_profiles_drift_at_depth(dt_s: float) -> None: """Drift labelled state=3; post-drift climb spans the full water column @@ -1128,28 +1136,20 @@ def test_get_profiles_drift_x_hover_active() -> None: def test_absorb_post_drift_transients() -> None: """Brief dive/unknown segments between drift and climb are folded into climb.""" - state, dive_id, climb_id, profile_id = _make_state_arrays(50) + arrs = _make_state_arrays(50) + state, dive_id, climb_id, profile_id = arrs drift_mask = np.zeros(50, dtype=bool) - state[5:15] = 1 - dive_id[5:15] = 1 - profile_id[5:15] = 1 + _set_profile(arrs, slice(5, 15), 1, 1, 1) # main dive state[15:30] = 3 - drift_mask[15:30] = True - state[30:32] = 1 - dive_id[30:32] = 2 - profile_id[30:32] = 2 # spurious dive + drift_mask[15:30] = True # drift + _set_profile(arrs, slice(30, 32), 1, 2, 2) # spurious dive # state[32:34] left as -1 (unknown gap) - state[34:48] = 2 - climb_id[34:48] = 1 - profile_id[34:48] = 3 + _set_profile(arrs, slice(34, 48), 2, 1, 3) # real climb - prof._absorb_post_drift_transients( - state, dive_id, climb_id, profile_id, drift_mask, 10.0, 180.0 - ) - assert np.all(state[30:34] == 2) - assert np.all(climb_id[30:34] == 1) - assert np.all(profile_id[30:34] == 3) - assert np.all(dive_id[30:34] == -1) + prof._absorb_post_drift_transients(*arrs, drift_mask, 10.0, 180.0) + + assert np.all(state[30:34] == 2) and np.all(climb_id[30:34] == 1) + assert np.all(profile_id[30:34] == 3) and np.all(dive_id[30:34] == -1) assert ( np.all(state[5:15] == 1) and np.all(state[15:30] == 3) @@ -1159,64 +1159,46 @@ def test_absorb_post_drift_transients() -> None: def test_absorb_apex_unknowns_splits_at_max_pressure() -> None: """Short unknown gap between dive and climb is split at the pressure max.""" - state, dive_id, climb_id, profile_id = _make_state_arrays(30) + arrs = _make_state_arrays(30) + state, dive_id, climb_id, profile_id = arrs pressure = np.zeros(30) - state[5:13] = 1 - dive_id[5:13] = 1 - profile_id[5:13] = 1 + _set_profile(arrs, slice(5, 13), 1, 1, 1) pressure[5:13] = np.linspace(10, 80, 8) pressure[13:17] = [85, 88, 90, 87] # apex at index 15 - state[17:25] = 2 - climb_id[17:25] = 1 - profile_id[17:25] = 2 + _set_profile(arrs, slice(17, 25), 2, 1, 2) pressure[17:25] = np.linspace(85, 5, 8) - prof._absorb_apex_unknowns( - state, dive_id, climb_id, profile_id, pressure, 10.0, 180.0, 2.0 - ) - assert ( - np.all(state[13:16] == 1) - and np.all(dive_id[13:16] == 1) - and np.all(profile_id[13:16] == 1) - ) + prof._absorb_apex_unknowns(*arrs, pressure, 10.0, 180.0, 2.0) + + assert np.all(state[13:16] == 1) and np.all(dive_id[13:16] == 1) + assert np.all(profile_id[13:16] == 1) assert state[16] == 2 and climb_id[16] == 1 and profile_id[16] == 2 def test_absorb_apex_unknowns_climb_to_dive_yo_top() -> None: """Short underwater gap between climb→dive (yo-top) is split at the pressure minimum: pre-min → climb, post-min → dive.""" - state, dive_id, climb_id, profile_id = _make_state_arrays(30) + arrs = _make_state_arrays(30) + state, dive_id, climb_id, profile_id = arrs pressure = np.zeros(30) - state[5:13] = 2 - climb_id[5:13] = 1 - profile_id[5:13] = 1 + _set_profile(arrs, slice(5, 13), 2, 1, 1) pressure[5:13] = np.linspace(80, 50, 8) pressure[13:17] = [48, 45, 47, 50] # yo-top min at index 14 - state[17:25] = 1 - dive_id[17:25] = 2 - profile_id[17:25] = 2 + _set_profile(arrs, slice(17, 25), 1, 2, 2) pressure[17:25] = np.linspace(55, 200, 8) - prof._absorb_apex_unknowns( - state, dive_id, climb_id, profile_id, pressure, 10.0, 180.0, 2.0 - ) - # Up to and including the min → climb - assert ( - np.all(state[13:15] == 2) - and np.all(climb_id[13:15] == 1) - and np.all(profile_id[13:15] == 1) - ) - # After the min → dive - assert ( - np.all(state[15:17] == 1) - and np.all(dive_id[15:17] == 2) - and np.all(profile_id[15:17] == 2) - ) + prof._absorb_apex_unknowns(*arrs, pressure, 10.0, 180.0, 2.0) + + assert np.all(state[13:15] == 2) and np.all(climb_id[13:15] == 1) + assert np.all(profile_id[13:15] == 1) + assert np.all(state[15:17] == 1) and np.all(dive_id[15:17] == 2) + assert np.all(profile_id[15:17] == 2) def test_absorb_apex_unknowns_skips_surface_gap() -> None: """A gap that goes shallow (glider surfaced) is left for the surface classifier.""" - state, dive_id, climb_id, profile_id = _make_state_arrays(30) + arrs = _make_state_arrays(30) + state, _, _, _ = arrs pressure = np.zeros(30) state[5:10] = 1 pressure[5:10] = np.linspace(10, 80, 5) @@ -1227,27 +1209,20 @@ def test_absorb_apex_unknowns_skips_surface_gap() -> None: pressure[17:22] = np.linspace(1, 60, 5) before = state.copy() - prof._absorb_apex_unknowns( - state, dive_id, climb_id, profile_id, pressure, 10.0, 180.0, 2.0 - ) + prof._absorb_apex_unknowns(*arrs, pressure, 10.0, 180.0, 2.0) assert np.array_equal(state, before) def test_absorb_post_drift_transients_does_not_swallow_real_dive() -> None: """A long dive (>max_transient_duration) between drift and climb is not absorbed.""" - state, dive_id, climb_id, profile_id = _make_state_arrays(60) + arrs = _make_state_arrays(60) + state, _, _, _ = arrs drift_mask = np.zeros(60, dtype=bool) state[5:15] = 3 drift_mask[5:15] = True - state[15:35] = 1 - dive_id[15:35] = 1 - profile_id[15:35] = 1 # 200 s > 180 s - state[40:55] = 2 - climb_id[40:55] = 1 - profile_id[40:55] = 2 + _set_profile(arrs, slice(15, 35), 1, 1, 1) # 200 s > 180 s + _set_profile(arrs, slice(40, 55), 2, 1, 2) before = state.copy() - prof._absorb_post_drift_transients( - state, dive_id, climb_id, profile_id, drift_mask, 10.0, 180.0 - ) + prof._absorb_post_drift_transients(*arrs, drift_mask, 10.0, 180.0) assert np.array_equal(state, before) From dfe7dffbebeab535aa5e93466b1bc501cb231d84 Mon Sep 17 00:00:00 2001 From: Jesse Cusack Date: Thu, 7 May 2026 23:53:34 -0700 Subject: [PATCH 08/10] fix mypy --- tests/test_process_l1.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_process_l1.py b/tests/test_process_l1.py index 2d06f5b..5192919 100644 --- a/tests/test_process_l1.py +++ b/tests/test_process_l1.py @@ -1051,7 +1051,9 @@ def _make_drift_at_depth_dataset(dt_s: float) -> tuple[xr.Dataset, float, float, ) -def _make_state_arrays(n: int) -> tuple[np.ndarray, ...]: +def _make_state_arrays( + n: int, +) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Allocate (state, dive_id, climb_id, profile_id) all initialised to -1.""" return ( np.full(n, -1, dtype="b"), From 51090b1afe9d3ce2f2e5c6e869d228c921239c1c Mon Sep 17 00:00:00 2001 From: Jesse Cusack Date: Fri, 8 May 2026 00:09:56 -0700 Subject: [PATCH 09/10] fix nan profiles binning issue resulting from weird profile finding --- src/glide/process_l2.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/glide/process_l2.py b/src/glide/process_l2.py index 490a0de..20b7b81 100644 --- a/src/glide/process_l2.py +++ b/src/glide/process_l2.py @@ -231,6 +231,9 @@ def bin_l2( for row in profile_indexes: profile = ds.isel(i=slice(row[0], row[1])) + if not np.isfinite(profile.depth.values).any(): + _log.debug("Skipping profile [%d:%d] with no finite depth", row[0], row[1]) + continue time = profile.time.values idx_mid = np.searchsorted(time, 0.5 * (time[0] + time[-1])) From f6ab61a0d9c88cc0dccb7c04e881c11dc144f340 Mon Sep 17 00:00:00 2001 From: Jesse Cusack Date: Fri, 8 May 2026 00:44:38 -0700 Subject: [PATCH 10/10] fix real-time drift detection and transients --- src/glide/profiles.py | 60 ++++++++++++++++++++++++++++++++++++++++ tests/test_process_l1.py | 38 +++++++++++++++++++++++++ 2 files changed, 98 insertions(+) diff --git a/src/glide/profiles.py b/src/glide/profiles.py index 697c832..baf6813 100644 --- a/src/glide/profiles.py +++ b/src/glide/profiles.py @@ -81,6 +81,29 @@ def _detect_drift( drift[i:j] = True i = j + # The rolling-variance window misses the settling phase (overshoot + # recovery before, slow ascent start after). Expand each drift run to + # absorb adjacent samples whose pressure is close to the drift mean. + # A real climb/dive moves through drift depth quickly so the expansion + # stops shortly after crossing it. + if drift.any(): + diff_d = np.diff(drift.astype(int), prepend=0, append=0) + run_starts, run_ends = np.where(diff_d == 1)[0], np.where(diff_d == -1)[0] + tol = 2 * pressure_std_threshold + for s, e in zip(run_starts, run_ends): + mean_p = float(np.nanmean(pressure[s:e])) + while ( + s > 0 + and np.isfinite(pressure[s - 1]) + and abs(pressure[s - 1] - mean_p) < tol + ): + s -= 1 + while ( + e < n and np.isfinite(pressure[e]) and abs(pressure[e] - mean_p) < tol + ): + e += 1 + drift[s:e] = True + _log.debug("Drift: %d points from pressure detector", int(drift.sum())) return drift @@ -130,6 +153,42 @@ def _absorb_apex_unknowns( profile_id[split:e] = profile_id[e] +def _absorb_pre_drift_transients( + state: np.ndarray, + dive_id: np.ndarray, + climb_id: np.ndarray, + profile_id: np.ndarray, + drift_mask: np.ndarray, + dt_s: float, + max_transient_duration: float, +) -> None: + """Fold brief climb/unknown segments between a dive end and a drift start + into that drift. This is the dive overshoot recovery: the glider arrives + past target, briefly recovers upward, then settles into hover. Without + this pass the recovery becomes a tiny climb profile in L3.""" + n = len(state) + if not drift_mask.any(): + return + diff = np.diff(drift_mask.astype(int), prepend=0, append=0) + drift_starts = np.where(diff == 1)[0] + window = max(2, round(max_transient_duration / dt_s)) + + for ds_ in drift_starts: + if ds_ == 0: + continue + start = max(0, ds_ - window) + i = ds_ - 1 + while i >= start and (state[i] == 2 or state[i] == -1): + i -= 1 + # Only absorb when bounded on the left by a dive within the window + if i < start or state[i] != 1: + continue + state[i + 1 : ds_] = 3 + dive_id[i + 1 : ds_] = -1 + climb_id[i + 1 : ds_] = -1 + profile_id[i + 1 : ds_] = -1 + + def _absorb_post_drift_transients( state: np.ndarray, dive_id: np.ndarray, @@ -271,6 +330,7 @@ def get_profiles( profile_id[drift_mask] = -1 ids = (state, dive_id, climb_id, profile_id) + _absorb_pre_drift_transients(*ids, drift_mask, dt_s, min_surface_time) _absorb_post_drift_transients(*ids, drift_mask, dt_s, min_surface_time) _absorb_apex_unknowns(*ids, ds.pressure.values, dt_s, min_surface_time) diff --git a/tests/test_process_l1.py b/tests/test_process_l1.py index 5192919..57b0472 100644 --- a/tests/test_process_l1.py +++ b/tests/test_process_l1.py @@ -1215,6 +1215,44 @@ def test_absorb_apex_unknowns_skips_surface_gap() -> None: assert np.array_equal(state, before) +def test_absorb_pre_drift_transients() -> None: + """Brief climb/unknown samples between a dive and a drift (overshoot + recovery) are folded into the drift.""" + arrs = _make_state_arrays(50) + state, dive_id, climb_id, profile_id = arrs + drift_mask = np.zeros(50, dtype=bool) + _set_profile(arrs, slice(5, 15), 1, 1, 1) # dive (peak overshoot) + _set_profile(arrs, slice(15, 18), 2, 1, 2) # brief recovery climb + state[18:20] = -1 # brief unknown + state[20:35] = 3 + drift_mask[20:35] = True # drift + _set_profile(arrs, slice(35, 45), 2, 2, 3) # post-drift climb + + prof._absorb_pre_drift_transients(*arrs, drift_mask, 10.0, 180.0) + + # The recovery climb and unknown gap are now part of the drift + assert np.all(state[15:20] == 3) + assert np.all(climb_id[15:20] == -1) and np.all(dive_id[15:20] == -1) + assert np.all(profile_id[15:20] == -1) + # Surrounding dive and post-drift climb are unchanged + assert np.all(state[5:15] == 1) and np.all(state[35:45] == 2) + + +def test_absorb_pre_drift_transients_does_not_swallow_long_climb() -> None: + """A long climb (>max_transient_duration) before a drift is not absorbed.""" + arrs = _make_state_arrays(60) + state, _, _, _ = arrs + drift_mask = np.zeros(60, dtype=bool) + _set_profile(arrs, slice(0, 10), 1, 1, 1) + _set_profile(arrs, slice(10, 35), 2, 1, 2) # 250 s > 180 s + state[35:50] = 3 + drift_mask[35:50] = True + before = state.copy() + + prof._absorb_pre_drift_transients(*arrs, drift_mask, 10.0, 180.0) + assert np.array_equal(state, before) + + def test_absorb_post_drift_transients_does_not_swallow_real_dive() -> None: """A long dive (>max_transient_duration) between drift and climb is not absorbed.""" arrs = _make_state_arrays(60)