From 613746e1c6253bbf98b124184b7891b98a1efc8b Mon Sep 17 00:00:00 2001
From: Jesse Cusack <jesse.cusack@oregonstate.edu>
Date: Thu, 7 May 2026 14:31:19 -0700
Subject: [PATCH 01/10] find drifts

---
 src/glide/cli.py         |   4 +-
 src/glide/profiles.py    | 122 +++++++++++++++++++++++++++++++++++--
 tests/test_process_l1.py | 127 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 247 insertions(+), 6 deletions(-)

diff --git a/src/glide/cli.py b/src/glide/cli.py
index 7a7a633..cbc35b5 100644
--- a/src/glide/cli.py
+++ b/src/glide/cli.py
@@ -228,7 +228,9 @@ def l2(
 
     merged = process_l1.calculate_thermodynamics(merged, conf)
 
-    out = profiles.get_profiles(merged, shallowest_profile, min_surface_time)
+    out = profiles.get_profiles(
+        merged, shallowest_profile, min_surface_time, flt=flt_raw
+    )
 
     out = profiles.assign_surface_state(out, flt=flt_raw)
 
diff --git a/src/glide/profiles.py b/src/glide/profiles.py
index 18e8e8b..46707d3 100644
--- a/src/glide/profiles.py
+++ b/src/glide/profiles.py
@@ -7,12 +7,94 @@
 _log = logging.getLogger(__name__)
 
 
+def _detect_drift(
+    pressure: np.ndarray,
+    time: np.ndarray,
+    dt_s: float,
+    flt: xr.Dataset | None = None,
+    min_drift_duration: float = 600.0,
+    pressure_std_threshold: float = 2.0,
+    surface_pressure: float = 2.0,
+) -> np.ndarray:
+    """Return boolean mask True where glider is drifting at depth.
+
+    Uses x_target_hover_depth from raw flight data if available (non-NaN and
+    > 0 indicates hover/drift mode is active); falls back to a rolling-variance
+    detector on the pressure signal.  A candidate region must have rolling
+    pressure std < pressure_std_threshold and pressure > surface_pressure
+    sustained for at least min_drift_duration seconds.
+    """
+    n = len(pressure)
+    drift = np.zeros(n, dtype=bool)
+
+    if flt is not None and "x_target_hover_depth" in flt:
+        flt_time = flt.m_present_time.values
+        hover_depth = flt.x_target_hover_depth.values
+        order = np.argsort(flt_time)
+        flt_time = flt_time[order]
+        hover_depth = hover_depth[order]
+        pos = np.searchsorted(flt_time, time)
+        pos_left = np.maximum(pos - 1, 0)
+        pos_right = np.minimum(pos, len(flt_time) - 1)
+        nearest = np.where(
+            np.abs(time - flt_time[pos_left]) <= np.abs(time - flt_time[pos_right]),
+            pos_left,
+            pos_right,
+        )
+        drift = np.isfinite(hover_depth[nearest]) & (hover_depth[nearest] > 0)
+        _log.debug("Drift: %d points from x_target_hover_depth", int(drift.sum()))
+        return drift
+
+    valid = np.isfinite(pressure)
+    if not valid.any():
+        return drift
+
+    # Window = min_drift_duration / 4; smaller window reduces edge miss at drift boundaries.
+    # np.convolve mode='same' returns max(n, window) elements, so cap half so window < n.
+    half = min((n - 1) // 2, max(2, round(min_drift_duration / 4 / dt_s)))
+    window = 2 * half + 1
+    ones = np.ones(window)
+
+    p_fill = np.where(valid, pressure, 0.0)
+    cnt_w = np.convolve(valid.astype(float), ones, mode="same")
+    sum_w = np.convolve(p_fill, ones, mode="same")
+    sumsq_w = np.convolve(p_fill**2, ones, mode="same")
+
+    safe_cnt = np.where(cnt_w >= 3, cnt_w, 1.0)
+    mean_w = sum_w / safe_cnt
+    var_w = sumsq_w / safe_cnt - mean_w**2
+    rolling_std = np.where(cnt_w >= 3, np.sqrt(np.maximum(0.0, var_w)), np.inf)
+
+    candidate = (
+        valid & (pressure > surface_pressure) & (rolling_std < pressure_std_threshold)
+    )
+
+    # Keep only contiguous runs that span at least min_drift_duration
+    i = 0
+    while i < n:
+        if not candidate[i]:
+            i += 1
+            continue
+        j = i + 1
+        while j < n and candidate[j]:
+            j += 1
+        if (j - i) * dt_s >= min_drift_duration:
+            drift[i:j] = True
+        i = j
+
+    _log.debug("Drift: %d points from pressure-based detector", int(drift.sum()))
+    return drift
+
+
 def get_profiles(
     ds: xr.Dataset,
     shallowest_profile: float,
     min_surface_time: float = 180.0,
+    flt: xr.Dataset | None = None,
+    min_drift_duration: float = 600.0,
+    drift_pressure_std: float = 2.0,
 ) -> xr.Dataset:
-    """Identify dive and climb profiles from a pressure time series.
+    """Identify dive, climb, and drift profiles from a pressure time series.
 
     Parameters
     ----------
@@ -24,6 +106,15 @@ def get_profiles(
         Minimum time (seconds) between consecutive dive apexes.  Used to
         compute sample-count thresholds so the detector adapts to the data
         sampling rate.  A value of ~180 s works for most Slocum deployments.
+    flt : xr.Dataset, optional
+        Raw flight data.  Used to detect drift-at-depth via BAW_HOVER_ACTIVE
+        when present; otherwise pressure-based detection is used.
+    min_drift_duration : float
+        Minimum duration (seconds) for a constant-pressure period to be
+        classified as drift (default 600 s).
+    drift_pressure_std : float
+        Rolling pressure standard deviation threshold (dbar) for pressure-based
+        drift detection (default 2.0 dbar).
     """
     raw_diff = np.diff(ds.time.values)
     if np.issubdtype(raw_diff.dtype, np.timedelta64):
@@ -32,6 +123,21 @@ def get_profiles(
         dt_s = float(np.nanmedian(raw_diff))
     fs = 1.0 / dt_s
 
+    drift_mask = _detect_drift(
+        ds.pressure.values,
+        ds.time.values,
+        dt_s,
+        flt=flt,
+        min_drift_duration=min_drift_duration,
+        pressure_std_threshold=drift_pressure_std,
+    )
+
+    # Mask drift before profile finding so profinder sees a gap across the
+    # drift period; with missing="drop" it will pair the real climb after
+    # drift with the correct dive.
+    pressure_masked = ds.pressure.values.copy().astype(float)
+    pressure_masked[drift_mask] = np.nan
+
     peaks_kwargs = {
         "height": shallowest_profile,
         "prominence": shallowest_profile,
@@ -52,7 +158,7 @@ def get_profiles(
     )
 
     profiles = find_profiles(
-        ds.pressure.values,
+        pressure_masked,
         peaks_kwargs=peaks_kwargs,
         troughs_kwargs=troughs_kwargs,
         missing="drop",
@@ -85,6 +191,12 @@ def get_profiles(
         climb_counter += 1
         profile_counter += 1
 
+    # Override drift points, clearing any profile membership assigned above
+    state[drift_mask] = 3
+    dive_id[drift_mask] = -1
+    climb_id[drift_mask] = -1
+    profile_id[drift_mask] = -1
+
     ds["dive_id"] = ("time", dive_id, dict(_FillValue=np.int32(-1)))
     ds["climb_id"] = ("time", climb_id, dict(_FillValue=np.int32(-1)))
     ds["profile_id"] = ("time", profile_id, dict(_FillValue=np.int32(-1)))
@@ -93,9 +205,9 @@ def get_profiles(
         state,
         dict(
             long_name="Glider state",
-            flag_values=np.array([-1, 0, 1, 2], "b"),
-            flag_meanings="unknown surface dive climb",
-            valid_max=np.int8(2),
+            flag_values=np.array([-1, 0, 1, 2, 3], "b"),
+            flag_meanings="unknown surface dive climb drift",
+            valid_max=np.int8(3),
             valid_min=np.int8(-1),
         ),
     )
diff --git a/tests/test_process_l1.py b/tests/test_process_l1.py
index 09a555b..689e293 100644
--- a/tests/test_process_l1.py
+++ b/tests/test_process_l1.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import pandas as pd
+import pytest
 import xarray as xr
 
 import glide.gliderdac as gd
@@ -911,3 +912,129 @@ def test_emit_ioos_profiles_yo_yo_shares_time_uv(tmp_path) -> None:
     # All four profiles share the same scalar time_uv (= the segment's value).
     assert all(t == time_uvs[0] for t in time_uvs)
     assert time_uvs[0] == 25.0
+
+
+def _make_drift_at_depth_dataset(dt_s: float) -> tuple[xr.Dataset, float, float, float]:
+    """Synthetic glider dataset with one dive-overshoot-drift-climb cycle.
+
+    Returns (ds, t_dive_start, t_drift_start, t_climb_start) in seconds.
+
+    Pressure trace:
+      - Surface at ~0.5 dbar for 120 s
+      - Dive to 105 dbar (target 100, overshoot by 5 dbar) over 1500 s
+      - Overshoot correction: 105→100 dbar over 120 s  ← wrongly identified as
+        the climb before this fix
+      - Drift at 100 ± 2 dbar (sinusoidal wobble, period 300 s) for 3600 s
+      - Climb from 100 to surface over 1500 s
+      - Surface at ~0.5 dbar for 120 s
+    """
+    t_surface = 120.0
+    t_dive = 1500.0
+    t_overshoot = 120.0
+    t_drift = 3600.0
+    t_climb = 1500.0
+
+    t0_dive = t_surface
+    t0_overshoot = t0_dive + t_dive
+    t0_drift = t0_overshoot + t_overshoot
+    t0_climb = t0_drift + t_drift
+    t_total = t0_climb + t_climb + t_surface
+
+    t = np.arange(0.0, t_total, dt_s)
+    p = np.full(len(t), 0.5)
+
+    mask = (t >= t0_dive) & (t < t0_overshoot)
+    p[mask] = 105.0 * (t[mask] - t0_dive) / t_dive
+
+    mask = (t >= t0_overshoot) & (t < t0_drift)
+    p[mask] = 105.0 - 5.0 * (t[mask] - t0_overshoot) / t_overshoot
+
+    mask = (t >= t0_drift) & (t < t0_climb)
+    p[mask] = 100.0 + 2.0 * np.sin(2.0 * np.pi * (t[mask] - t0_drift) / 300.0)
+
+    mask = (t >= t0_climb) & (t < t0_climb + t_climb)
+    p[mask] = 100.0 * (1.0 - (t[mask] - t0_climb) / t_climb)
+
+    ds = xr.Dataset({"pressure": ("time", p)}, coords={"time": t})
+    return ds, t0_dive, t0_drift, t0_climb
+
+
+@pytest.mark.parametrize(
+    "dt_s",
+    [1.0, 15.0],
+    ids=["post_recovery_1s", "realtime_15s"],
+)
+def test_get_profiles_drift_at_depth(dt_s: float) -> None:
+    """Drift-at-depth is detected and labelled state=3; the real climb after
+    drift must span the full water column rather than being cut off.
+
+    Without drift masking, the overshoot correction (105→100 dbar before the
+    drift plateau) would be mis-identified as the climb portion, leaving the
+    actual ascent unlabelled.
+    """
+    ds, _t_dive, t_drift_start, t_climb_start = _make_drift_at_depth_dataset(dt_s)
+
+    result = prof.get_profiles(ds, shallowest_profile=5.0, min_surface_time=180.0)
+
+    state = result.state.values
+    time = ds.time.values
+
+    # Most of the drift plateau must be labelled state=3
+    drift_state = state[(time >= t_drift_start) & (time < t_climb_start)]
+    assert (drift_state == 3).mean() > 0.7, (
+        f"Only {(drift_state == 3).mean():.1%} of drift period is state=3 at dt_s={dt_s}"
+    )
+
+    # Most of the real climb after drift must be labelled state=2
+    climb_state = state[time >= t_climb_start]
+    assert (climb_state == 2).mean() > 0.7, (
+        f"Only {(climb_state == 2).mean():.1%} of real climb is state=2 at dt_s={dt_s}"
+    )
+
+    # The climb profile must span the full water column
+    climb_pressures = ds.pressure.values[state == 2]
+    assert climb_pressures.max() > 90.0, (
+        "Climb profile must include deep data near 100 dbar"
+    )
+    assert climb_pressures.min() < 5.0, "Climb profile must reach the near-surface"
+
+    # state=3 (drift) must appear in the CF flag_values
+    assert 3 in result.state.attrs["flag_values"]
+
+
+def test_get_profiles_drift_x_target_hover_depth() -> None:
+    """x_target_hover_depth in raw flight data takes precedence over pressure-based
+    drift detection and marks the exact drift period as state=3."""
+    dt_s = 15.0
+    ds, _t_dive, t_drift_start, t_climb_start = _make_drift_at_depth_dataset(dt_s)
+
+    time = ds.time.values
+
+    # Synthetic flight data at 4 s intervals with target hover depth set during drift
+    flt_time = np.arange(0.0, time[-1] + 4.0, 4.0)
+    hover_depth = np.full(len(flt_time), np.nan)
+    hover_depth[(flt_time >= t_drift_start) & (flt_time < t_climb_start)] = 100.0
+
+    flt = xr.Dataset(
+        {
+            "m_present_time": ("i", flt_time),
+            "x_target_hover_depth": ("i", hover_depth),
+        }
+    )
+
+    result = prof.get_profiles(
+        ds, shallowest_profile=5.0, min_surface_time=180.0, flt=flt
+    )
+
+    state = result.state.values
+    drift_mask = (time >= t_drift_start) & (time < t_climb_start)
+
+    # BAW_HOVER_ACTIVE covers the drift period exactly, so detection should be
+    # almost complete (within one 4 s flight sample of each boundary)
+    assert (state[drift_mask] == 3).mean() > 0.9, (
+        "BAW_HOVER_ACTIVE should detect drift with >90% coverage"
+    )
+
+    # Real climb after drift must still be captured
+    climb_state = state[time >= t_climb_start]
+    assert (climb_state == 2).mean() > 0.7

From 67587dffe841cc1985f8f5f1bb6768377ff96149 Mon Sep 17 00:00:00 2001
From: Jesse Cusack <jesse.cusack@oregonstate.edu>
Date: Thu, 7 May 2026 21:52:41 -0700
Subject: [PATCH 02/10] fix profile weirdness

---
 src/glide/profiles.py    | 312 ++++++++++++++++++++++++++-------
 tests/test_process_l1.py | 365 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 603 insertions(+), 74 deletions(-)

diff --git a/src/glide/profiles.py b/src/glide/profiles.py
index 46707d3..55aa52c 100644
--- a/src/glide/profiles.py
+++ b/src/glide/profiles.py
@@ -18,32 +18,41 @@ def _detect_drift(
 ) -> np.ndarray:
     """Return boolean mask True where glider is drifting at depth.
 
-    Uses x_target_hover_depth from raw flight data if available (non-NaN and
-    > 0 indicates hover/drift mode is active); falls back to a rolling-variance
-    detector on the pressure signal.  A candidate region must have rolling
-    pressure std < pressure_std_threshold and pressure > surface_pressure
-    sustained for at least min_drift_duration seconds.
+    Uses x_hover_active from raw flight data if available (reported as 1 when
+    a hover episode starts and 0 when it ends, with -127 as the missing-data
+    sentinel; the most recent reported value is forward-filled to every science
+    timestamp).  Falls back to a rolling-variance detector on the pressure
+    signal: a candidate region must have rolling pressure std <
+    pressure_std_threshold and pressure > surface_pressure sustained for at
+    least min_drift_duration seconds.
     """
     n = len(pressure)
     drift = np.zeros(n, dtype=bool)
 
-    if flt is not None and "x_target_hover_depth" in flt:
+    if flt is not None and "x_hover_active" in flt:
         flt_time = flt.m_present_time.values
-        hover_depth = flt.x_target_hover_depth.values
-        order = np.argsort(flt_time)
-        flt_time = flt_time[order]
-        hover_depth = hover_depth[order]
-        pos = np.searchsorted(flt_time, time)
-        pos_left = np.maximum(pos - 1, 0)
-        pos_right = np.minimum(pos, len(flt_time) - 1)
-        nearest = np.where(
-            np.abs(time - flt_time[pos_left]) <= np.abs(time - flt_time[pos_right]),
-            pos_left,
-            pos_right,
+        hover_active = flt.x_hover_active.values
+        # -127 is the no-data sentinel for the signed-byte variable; treat as
+        # missing along with NaN. Forward-fill the most recent {0,1} report.
+        finite = (
+            np.isfinite(hover_active) & np.isfinite(flt_time) & (hover_active != -127)
         )
-        drift = np.isfinite(hover_depth[nearest]) & (hover_depth[nearest] > 0)
-        _log.debug("Drift: %d points from x_target_hover_depth", int(drift.sum()))
-        return drift
+        if finite.any():
+            flt_time = flt_time[finite]
+            hover_active = hover_active[finite]
+            order = np.argsort(flt_time)
+            flt_time = flt_time[order]
+            hover_active = hover_active[order]
+            pos = np.searchsorted(flt_time, time, side="right") - 1
+            in_range = pos >= 0
+            drift[in_range] = hover_active[pos[in_range]] == 1
+            _log.debug(
+                "Drift: %d points from x_hover_active (%d reports)",
+                int(drift.sum()),
+                len(flt_time),
+            )
+            return drift
+        _log.debug("x_hover_active has no finite values; using pressure detector")
 
     valid = np.isfinite(pressure)
     if not valid.any():
@@ -86,6 +95,108 @@ def _detect_drift(
     return drift
 
 
+def _absorb_apex_unknowns(
+    state: np.ndarray,
+    dive_id: np.ndarray,
+    climb_id: np.ndarray,
+    profile_id: np.ndarray,
+    pressure: np.ndarray,
+    dt_s: float,
+    max_gap_duration: float,
+    surface_pressure: float = 2.0,
+) -> None:
+    """Absorb short unknown gaps at the underwater apex of a multi-yo segment.
+
+    profinder occasionally leaves a few samples between dive_end and
+    climb_start unclassified at the peak.  When such a gap is short, sits at
+    depth (max pressure > surface_pressure), and is sandwiched directly
+    between a dive (1) and a climb (2), split it at the pressure maximum:
+    points up to and including the max are folded into the preceding dive,
+    points after into the following climb.
+    """
+    n = len(state)
+    unknown = state == -1
+    if not unknown.any():
+        return
+
+    diff = np.diff(unknown.astype(int), prepend=0, append=0)
+    starts = np.where(diff == 1)[0]
+    ends = np.where(diff == -1)[0]
+    max_samples = max(2, round(max_gap_duration / dt_s))
+
+    for s, e in zip(starts, ends):
+        if (e - s) > max_samples:
+            continue
+        if s == 0 or e >= n:
+            continue
+        if state[s - 1] != 1 or state[e] != 2:
+            continue
+        p = pressure[s:e]
+        finite = np.isfinite(p)
+        if not finite.any() or float(np.min(p[finite])) < surface_pressure:
+            continue
+
+        local_max = int(np.argmax(np.where(finite, p, -np.inf)))
+        split = s + local_max + 1
+
+        state[s:split] = 1
+        dive_id[s:split] = dive_id[s - 1]
+        profile_id[s:split] = profile_id[s - 1]
+
+        state[split:e] = 2
+        climb_id[split:e] = climb_id[e]
+        profile_id[split:e] = profile_id[e]
+
+
+def _absorb_post_drift_transients(
+    state: np.ndarray,
+    dive_id: np.ndarray,
+    climb_id: np.ndarray,
+    profile_id: np.ndarray,
+    drift_mask: np.ndarray,
+    dt_s: float,
+    max_transient_duration: float,
+) -> None:
+    """Fold brief dive/unknown segments between a drift end and the next climb
+    into that climb.
+
+    Masking the drift period leaves a discontinuous pressure signal across the
+    gap, which occasionally causes profinder to detect a spurious tiny peak
+    just after the drift in addition to the real climb.  This pass extends the
+    climb backward to the drift end if the only intervening states are short
+    dive (1) or unknown (-1) segments.  Anything longer than
+    max_transient_duration past the drift end is left alone.
+    """
+    n = len(state)
+    if not drift_mask.any():
+        return
+
+    drift_diff = np.diff(drift_mask.astype(int), prepend=0, append=0)
+    drift_ends = np.where(drift_diff == -1)[0]
+    window = max(2, round(max_transient_duration / dt_s))
+
+    for de in drift_ends:
+        if de >= n:
+            continue
+        end = min(de + window, n)
+        in_window = state[de:end]
+        climb_local = np.where(in_window == 2)[0]
+        if len(climb_local) == 0:
+            continue
+        first_climb = de + int(climb_local[0])
+        if first_climb == de:
+            continue
+        intermediate = state[de:first_climb]
+        if not np.all((intermediate == 1) | (intermediate == -1)):
+            continue
+        cid = climb_id[first_climb]
+        pid = profile_id[first_climb]
+        state[de:first_climb] = 2
+        climb_id[de:first_climb] = cid
+        profile_id[de:first_climb] = pid
+        dive_id[de:first_climb] = -1
+
+
 def get_profiles(
     ds: xr.Dataset,
     shallowest_profile: float,
@@ -93,6 +204,8 @@ def get_profiles(
     flt: xr.Dataset | None = None,
     min_drift_duration: float = 600.0,
     drift_pressure_std: float = 2.0,
+    stall_tolerance: float = 180.0,
+    min_pressure_rate: float = 0.05,
 ) -> xr.Dataset:
     """Identify dive, climb, and drift profiles from a pressure time series.
 
@@ -115,6 +228,16 @@ def get_profiles(
     drift_pressure_std : float
         Rolling pressure standard deviation threshold (dbar) for pressure-based
         drift detection (default 2.0 dbar).
+    stall_tolerance : float
+        Maximum time (seconds) the glider can stall or briefly reverse during
+        a dive or climb without profinder terminating the profile (default
+        180 s = 3 min, sized to handle realistic glider stalls during
+        ascent).  Converted to profinder's per-sample `run_length` argument.
+    min_pressure_rate : float
+        Minimum pressure change rate (dbar/s) for a sample to count toward
+        ascent or descent classification (default 0.05 dbar/s ≈ 5 cm/s, well
+        below normal glider vertical speed of 10–20 cm/s).  Converted to
+        profinder's per-sample `min_pressure_change` argument.
     """
     raw_diff = np.diff(ds.time.values)
     if np.issubdtype(raw_diff.dtype, np.timedelta64):
@@ -145,7 +268,10 @@ def get_profiles(
         "width": max(2, round(20 * fs)),  # ≥20 s half-width detects ~10 m dives
     }
     troughs_kwargs = {
-        "prominence": 2,
+        # Real inter-profile troughs (surfacings or inter-yo apexes) have a
+        # depth contrast at least as large as a real dive's prominence; a
+        # smaller "trough" is a stall or noise dip within a profile.
+        "prominence": shallowest_profile,
         "distance": max(2, round(30 * fs)),
         "width": max(1, round(5 * fs)),
     }
@@ -157,10 +283,19 @@ def get_profiles(
         troughs_kwargs,
     )
 
+    # Scale stall tolerance and pressure-rate threshold to per-sample units.
+    # These guard the climb/dive run from being terminated by brief stalls
+    # (e.g. glider hangs at depth and drifts back down a few dbar before
+    # resuming the ascent).
+    run_length = max(2, round(stall_tolerance / dt_s))
+    min_pressure_change = float(min_pressure_rate * dt_s)
+
     profiles = find_profiles(
         pressure_masked,
         peaks_kwargs=peaks_kwargs,
         troughs_kwargs=troughs_kwargs,
+        run_length=run_length,
+        min_pressure_change=min_pressure_change,
         missing="drop",
     )
 
@@ -197,6 +332,20 @@ def get_profiles(
     climb_id[drift_mask] = -1
     profile_id[drift_mask] = -1
 
+    _absorb_post_drift_transients(
+        state, dive_id, climb_id, profile_id, drift_mask, dt_s, min_surface_time
+    )
+
+    _absorb_apex_unknowns(
+        state,
+        dive_id,
+        climb_id,
+        profile_id,
+        ds.pressure.values,
+        dt_s,
+        min_surface_time,
+    )
+
     ds["dive_id"] = ("time", dive_id, dict(_FillValue=np.int32(-1)))
     ds["climb_id"] = ("time", climb_id, dict(_FillValue=np.int32(-1)))
     ds["profile_id"] = ("time", profile_id, dict(_FillValue=np.int32(-1)))
@@ -221,61 +370,102 @@ def assign_surface_state(
     dt: float = 300.0,
     surface_pressure: float = 2.0,
 ) -> xr.Dataset:
-    """Assign surface state (0) to unknown points near GPS fixes.
-
-    A point with state == -1 (unknown) is marked as surface (0) only if it is
-    within `dt` seconds of a valid GPS fix AND shallower than `surface_pressure`
-    dbar.  The pressure gate prevents the broad GPS time window from reaching
-    into adjacent dives.
+    """Assign surface state (0) to unknown points.
+
+    Two checks are applied in sequence:
+
+    1. A point with state == -1 (unknown) is marked as surface if it is within
+       `dt` seconds of a valid GPS fix AND shallower than `surface_pressure`
+       dbar.  The pressure gate prevents the broad GPS time window from
+       reaching into adjacent dives.
+    2. Any remaining contiguous unknown segment is marked as surface unless
+       its pressure record contains a finite value at or above
+       `surface_pressure` (i.e. evidence the glider was deeper).  Missing
+       pressure is the expected state at the surface — the science sensor
+       is often out of water — so all-NaN unknown segments are classified
+       as surface.  This covers the period before the first dive, after the
+       last climb, and any between-profile gap.
 
     Parameters
     ----------
     ds : xr.Dataset
-        Dataset with state variable from get_profiles.
+        Dataset with state and pressure variables from get_profiles.
     flt : xr.Dataset, optional
         Raw flight data containing m_gps_lat/lon with valid GPS fix times.
+        If absent, only the pressure-based check runs.
     dt : float
         Time threshold in seconds for matching to GPS fixes (default 300).
     surface_pressure : float
         Maximum pressure (dbar) for a point to be considered at the surface
         (default 2.0).
     """
-    if flt is None or "m_gps_lat" not in flt:
-        _log.warning("No flight data with GPS, cannot assign surface state")
-        return ds
-
-    gps_valid = np.isfinite(flt.m_gps_lat.values)
-    if not gps_valid.any():
-        _log.warning("No valid GPS fixes found")
-        return ds
-
-    gps_times = np.sort(flt.m_present_time.values[gps_valid])
-
     state = ds.state.values.copy()
+    pressure = ds.pressure.values
     time_l2 = ds.time.values
-    unknown_mask = state == -1
-
-    if unknown_mask.any():
-        unknown_times = time_l2[unknown_mask]
-        pos = np.searchsorted(gps_times, unknown_times)
-        dist_left = np.where(
-            pos > 0,
-            unknown_times - gps_times[np.maximum(pos - 1, 0)],
-            np.inf,
-        )
-        dist_right = np.where(
-            pos < len(gps_times),
-            gps_times[np.minimum(pos, len(gps_times) - 1)] - unknown_times,
-            np.inf,
-        )
-        is_near = np.minimum(dist_left, dist_right) <= dt
-        is_shallow = np.isfinite(ds.pressure.values[unknown_mask]) & (
-            ds.pressure.values[unknown_mask] < surface_pressure
-        )
-        is_near = is_near & is_shallow
 
-        state[unknown_mask] = np.where(is_near, np.int8(0), state[unknown_mask])
-        _log.debug("Assigned %d points to surface state", int(is_near.sum()))
+    if flt is not None and "m_gps_lat" in flt:
+        gps_valid = np.isfinite(flt.m_gps_lat.values)
+        if gps_valid.any():
+            gps_times = np.sort(flt.m_present_time.values[gps_valid])
+            unknown_mask = state == -1
+            if unknown_mask.any():
+                unknown_times = time_l2[unknown_mask]
+                pos = np.searchsorted(gps_times, unknown_times)
+                dist_left = np.where(
+                    pos > 0,
+                    unknown_times - gps_times[np.maximum(pos - 1, 0)],
+                    np.inf,
+                )
+                dist_right = np.where(
+                    pos < len(gps_times),
+                    gps_times[np.minimum(pos, len(gps_times) - 1)] - unknown_times,
+                    np.inf,
+                )
+                is_near = np.minimum(dist_left, dist_right) <= dt
+                is_shallow = np.isfinite(pressure[unknown_mask]) & (
+                    pressure[unknown_mask] < surface_pressure
+                )
+                state[unknown_mask] = np.where(
+                    is_near & is_shallow, np.int8(0), state[unknown_mask]
+                )
+                _log.debug(
+                    "GPS-proximity surface check: %d points assigned",
+                    int((is_near & is_shallow).sum()),
+                )
+        else:
+            _log.warning("No valid GPS fixes found")
+    else:
+        _log.warning("No flight data with GPS, skipping GPS-proximity surface check")
+
+    # Whole-segment pressure check: any contiguous unknown run that never
+    # exceeds surface_pressure is the glider sitting at the surface
+    unknown = state == -1
+    if unknown.any():
+        n = len(state)
+        diff = np.diff(unknown.astype(int), prepend=0, append=0)
+        starts = np.where(diff == 1)[0]
+        ends = np.where(diff == -1)[0]
+        n_assigned = 0
+        for s, e in zip(starts, ends):
+            p = pressure[s:e]
+            finite = np.isfinite(p)
+            # Skip if there is finite evidence the glider was underwater.
+            if finite.any() and float(np.max(p[finite])) >= surface_pressure:
+                continue
+            # An all-NaN gap bounded by dive→climb is the underwater apex
+            # that the apex absorber couldn't split because pressure was
+            # missing.  Leave it unknown rather than mislabel as surface.
+            if (
+                not finite.any()
+                and 0 < s
+                and e < n
+                and state[s - 1] == 1
+                and state[e] == 2
+            ):
+                continue
+            state[s:e] = 0
+            n_assigned += int(e - s)
+        _log.debug("Whole-segment surface check: %d points assigned", n_assigned)
 
     ds["state"] = ("time", state, ds.state.attrs)
     return ds
diff --git a/tests/test_process_l1.py b/tests/test_process_l1.py
index 689e293..c3fed7d 100644
--- a/tests/test_process_l1.py
+++ b/tests/test_process_l1.py
@@ -375,19 +375,124 @@ def test_assign_surface_state_pressure_gate() -> None:
 
 
 def test_assign_surface_state_no_flight_data() -> None:
-    """Test that assign_surface_state handles missing flight data gracefully."""
+    """Without flight data the GPS-proximity check is skipped, but shallow
+    unknown segments are still classified as surface by the pressure-only pass."""
     n = 20
     state = np.full(n, -1, dtype="b")
     state[5:15] = 1
+    pressure = np.full(n, 0.5)
+    pressure[5:15] = 50.0  # dive at depth
 
     ds = xr.Dataset(
-        {"state": ("time", state)},
+        {"state": ("time", state), "pressure": ("time", pressure)},
         coords={"time": np.arange(n, dtype=float)},
     )
 
-    # No flight data - should return unchanged
-    result = prof.assign_surface_state(ds, flt=None)
-    assert np.array_equal(result.state.values, state)
+    result = prof.assign_surface_state(ds, flt=None, surface_pressure=2.0)
+
+    assert np.all(result.state.values[5:15] == 1), "Dive states unchanged"
+    assert np.all(result.state.values[:5] == 0), "Pre-dive shallow → surface"
+    assert np.all(result.state.values[15:] == 0), "Post-dive shallow → surface"
+
+
+def test_assign_surface_state_pre_first_dive_and_post_last_climb() -> None:
+    """Time before the first dive and after the last climb is classified as
+    surface when pressure stays below surface_pressure."""
+    n = 50
+    state = np.full(n, -1, dtype="b")
+    state[10:20] = 1  # dive
+    state[20:30] = 2  # climb
+    pressure = np.full(n, 0.3)
+    pressure[10:20] = np.linspace(0.3, 80, 10)
+    pressure[20:30] = np.linspace(80, 0.3, 10)
+
+    ds = xr.Dataset(
+        {"state": ("time", state), "pressure": ("time", pressure)},
+        coords={"time": np.arange(n, dtype=float)},
+    )
+
+    result = prof.assign_surface_state(ds, flt=None, surface_pressure=2.0)
+
+    assert np.all(result.state.values[:10] == 0), "Pre-first-dive → surface"
+    assert np.all(result.state.values[30:] == 0), "Post-last-climb → surface"
+    assert np.all(result.state.values[10:20] == 1)
+    assert np.all(result.state.values[20:30] == 2)
+
+
+def test_assign_surface_state_all_nan_pressure_is_surface() -> None:
+    """Surface segments where the science pressure sensor is out of water
+    (all NaN) must still be classified as surface."""
+    n = 30
+    state = np.full(n, -1, dtype="b")
+    state[5:15] = 1  # dive
+    state[15:20] = 2  # climb
+    pressure = np.full(n, np.nan)
+    pressure[5:15] = np.linspace(2, 80, 10)
+    pressure[15:20] = np.linspace(80, 2, 5)
+    # Indices 0–4 and 20–29 have NaN pressure (surface, sensor out of water)
+
+    ds = xr.Dataset(
+        {"state": ("time", state), "pressure": ("time", pressure)},
+        coords={"time": np.arange(n, dtype=float)},
+    )
+
+    result = prof.assign_surface_state(ds, flt=None, surface_pressure=2.0)
+
+    assert np.all(result.state.values[:5] == 0), "Pre-dive all-NaN → surface"
+    assert np.all(result.state.values[20:] == 0), "Post-climb all-NaN → surface"
+    assert np.all(result.state.values[5:15] == 1)
+    assert np.all(result.state.values[15:20] == 2)
+
+
+def test_assign_surface_state_protects_apex_nan_gap() -> None:
+    """An all-NaN unknown gap sandwiched between a dive and a climb is the
+    underwater apex (the apex absorber couldn't split it because pressure
+    was missing).  It must NOT be reclassified as surface."""
+    n = 30
+    state = np.full(n, -1, dtype="b")
+    state[5:13] = 1  # dive
+    state[16:25] = 2  # climb
+    pressure = np.full(n, np.nan)
+    pressure[5:13] = np.linspace(2, 80, 8)
+    pressure[16:25] = np.linspace(80, 2, 9)
+    # Indices 13–15 NaN (sensor briefly missing at the apex)
+
+    ds = xr.Dataset(
+        {"state": ("time", state), "pressure": ("time", pressure)},
+        coords={"time": np.arange(n, dtype=float)},
+    )
+
+    result = prof.assign_surface_state(ds, flt=None, surface_pressure=2.0)
+
+    assert np.all(result.state.values[13:16] == -1), (
+        "Apex NaN gap must stay unknown, not flip to surface"
+    )
+    assert np.all(result.state.values[5:13] == 1)
+    assert np.all(result.state.values[16:25] == 2)
+
+
+def test_assign_surface_state_keeps_aborted_dive_unknown() -> None:
+    """An unknown segment that contains a partial dive (pressure exceeds
+    surface_pressure) must not be reclassified as surface."""
+    n = 30
+    state = np.full(n, -1, dtype="b")
+    state[20:25] = 2  # climb
+    pressure = np.full(n, 0.3)
+    # Aborted dive between samples 5–15: pressure briefly reaches 4 dbar
+    pressure[5:15] = np.array([1.0, 2.0, 3.0, 4.0, 4.0, 4.0, 3.0, 2.0, 1.0, 0.5])
+    pressure[20:25] = np.linspace(40, 0.3, 5)
+
+    ds = xr.Dataset(
+        {"state": ("time", state), "pressure": ("time", pressure)},
+        coords={"time": np.arange(n, dtype=float)},
+    )
+
+    result = prof.assign_surface_state(ds, flt=None, surface_pressure=2.0)
+
+    # The whole pre-climb segment contains a sample > surface_pressure → stays unknown
+    assert np.all(result.state.values[:20] == -1)
+    # Post-climb stays surface (pressure shallow throughout)
+    assert np.all(result.state.values[25:] == 0)
 
 
 def test_add_velocity_groups_by_velocity_reports() -> None:
@@ -1002,23 +1107,88 @@ def test_get_profiles_drift_at_depth(dt_s: float) -> None:
     assert 3 in result.state.attrs["flag_values"]
 
 
-def test_get_profiles_drift_x_target_hover_depth() -> None:
-    """x_target_hover_depth in raw flight data takes precedence over pressure-based
-    drift detection and marks the exact drift period as state=3."""
+@pytest.mark.parametrize(
+    "dt_s",
+    [1.0, 15.0],
+    ids=["post_recovery_1s", "realtime_15s"],
+)
+def test_get_profiles_climb_with_stall(dt_s: float) -> None:
+    """A brief stall during the climb (glider hangs at ~20 dbar and drifts a
+    few dbar back down before resuming the ascent) must not split the climb
+    in two.  The stall_tolerance default rides over the reversal at both
+    sampling rates because it is specified in seconds, not samples."""
+    t_surface = 60.0
+    t_dive = 800.0
+    t_climb_pre_stall = 600.0  # 100 → 20 dbar
+    t_stall = 120.0  # hangs at 20–22 dbar for 2 min (within 3 min tolerance)
+    t_climb_post_stall = 400.0  # 22 → 0 dbar
+    t_total = (
+        t_surface + t_dive + t_climb_pre_stall + t_stall + t_climb_post_stall + 60.0
+    )
+
+    t = np.arange(0.0, t_total, dt_s)
+    p = np.full(len(t), 0.5)
+
+    t0_dive = t_surface
+    t0_climb_pre = t0_dive + t_dive
+    t0_stall = t0_climb_pre + t_climb_pre_stall
+    t0_climb_post = t0_stall + t_stall
+    t0_end = t0_climb_post + t_climb_post_stall
+
+    mask = (t >= t0_dive) & (t < t0_climb_pre)
+    p[mask] = 100.0 * (t[mask] - t0_dive) / t_dive
+
+    mask = (t >= t0_climb_pre) & (t < t0_stall)
+    p[mask] = 100.0 - 80.0 * (t[mask] - t0_climb_pre) / t_climb_pre_stall
+
+    # Stall: drift slowly back down from 20 to 22 dbar over 2 min
+    mask = (t >= t0_stall) & (t < t0_climb_post)
+    p[mask] = 20.0 + 2.0 * (t[mask] - t0_stall) / t_stall
+
+    mask = (t >= t0_climb_post) & (t < t0_end)
+    p[mask] = 22.0 * (1.0 - (t[mask] - t0_climb_post) / t_climb_post_stall)
+
+    # Leading NaN sidesteps a profinder edge case where valid_idx is undefined
+    # for NaN-free input.
+    p[0] = np.nan
+
+    ds = xr.Dataset({"pressure": ("time", p)}, coords={"time": t})
+
+    result = prof.get_profiles(ds, shallowest_profile=5.0, min_surface_time=180.0)
+
+    state = result.state.values
+    # The post-stall climb must be classified as climb, not unknown
+    post_stall_state = state[(t >= t0_climb_post) & (t < t0_end)]
+    assert (post_stall_state == 2).mean() > 0.7, (
+        f"Post-stall climb only {(post_stall_state == 2).mean():.1%} state=2 at dt_s={dt_s}"
+    )
+    # The whole climb (pre-stall + stall + post-stall) is one climb_id
+    climb_ids = np.unique(result.climb_id.values[result.climb_id.values >= 0])
+    assert len(climb_ids) == 1, f"Expected 1 climb, got {len(climb_ids)} at dt_s={dt_s}"
+
+
+def test_get_profiles_drift_x_hover_active() -> None:
+    """x_hover_active in raw flight data takes precedence over pressure-based
+    drift detection and marks the exact drift period as state=3.
+
+    The Slocum flight computer reports x_hover_active only on transitions
+    (1 when a hover starts, 0 when it ends, -127 missing otherwise); the
+    detector must forward-fill the most recent {0,1} report.
+    """
     dt_s = 15.0
     ds, _t_dive, t_drift_start, t_climb_start = _make_drift_at_depth_dataset(dt_s)
 
     time = ds.time.values
 
-    # Synthetic flight data at 4 s intervals with target hover depth set during drift
-    flt_time = np.arange(0.0, time[-1] + 4.0, 4.0)
-    hover_depth = np.full(len(flt_time), np.nan)
-    hover_depth[(flt_time >= t_drift_start) & (flt_time < t_climb_start)] = 100.0
+    # Sparse flight reports: 0 at start, 1 when hover begins, 0 when it ends.
+    # Mix in a -127 sentinel to confirm it is filtered out.
+    flt_time = np.array([0.0, 60.0, t_drift_start, t_climb_start])
+    hover_active = np.array([0.0, -127.0, 1.0, 0.0])
 
     flt = xr.Dataset(
         {
             "m_present_time": ("i", flt_time),
-            "x_target_hover_depth": ("i", hover_depth),
+            "x_hover_active": ("i", hover_active),
         }
     )
 
@@ -1038,3 +1208,172 @@ def test_get_profiles_drift_x_target_hover_depth() -> None:
     # Real climb after drift must still be captured
     climb_state = state[time >= t_climb_start]
     assert (climb_state == 2).mean() > 0.7
+
+
+def test_absorb_post_drift_transients() -> None:
+    """Brief dive (state=1) and unknown (state=-1) segments wedged between
+    a drift period and the real climb are absorbed into the climb."""
+    n = 50
+    state = np.full(n, -1, dtype="b")
+    drift_mask = np.zeros(n, dtype=bool)
+    dive_id = np.full(n, -1, dtype="i4")
+    climb_id = np.full(n, -1, dtype="i4")
+    profile_id = np.full(n, -1, dtype="i4")
+
+    state[5:15] = 1  # main dive
+    dive_id[5:15] = 1
+    profile_id[5:15] = 1
+
+    state[15:30] = 3  # drift
+    drift_mask[15:30] = True
+
+    state[30:32] = 1  # spurious brief dive (transient)
+    dive_id[30:32] = 2
+    profile_id[30:32] = 2
+
+    # state[32:34] left as -1 (unknown gap)
+
+    state[34:48] = 2  # real climb
+    climb_id[34:48] = 1
+    profile_id[34:48] = 3
+
+    prof._absorb_post_drift_transients(
+        state,
+        dive_id,
+        climb_id,
+        profile_id,
+        drift_mask,
+        dt_s=10.0,
+        max_transient_duration=180.0,
+    )
+
+    # The transient between drift and climb is now part of the climb
+    assert np.all(state[30:34] == 2)
+    assert np.all(climb_id[30:34] == 1)
+    assert np.all(profile_id[30:34] == 3)
+    assert np.all(dive_id[30:34] == -1)
+
+    # Drift, main dive, and real climb are unchanged
+    assert np.all(state[5:15] == 1)
+    assert np.all(state[15:30] == 3)
+    assert np.all(state[34:48] == 2)
+
+
+def test_absorb_apex_unknowns_splits_at_max_pressure() -> None:
+    """A short unknown gap at the underwater apex between a dive and a climb
+    is split at the pressure maximum: pre-max points become dive, post-max
+    points become climb."""
+    n = 30
+    state = np.full(n, -1, dtype="b")
+    dive_id = np.full(n, -1, dtype="i4")
+    climb_id = np.full(n, -1, dtype="i4")
+    profile_id = np.full(n, -1, dtype="i4")
+    pressure = np.zeros(n)
+
+    state[5:13] = 1
+    dive_id[5:13] = 1
+    profile_id[5:13] = 1
+    pressure[5:13] = np.linspace(10, 80, 8)
+
+    # Indices 13–17 unknown, with peak (90 dbar) at index 15
+    pressure[13:17] = [85, 88, 90, 87]
+
+    state[17:25] = 2
+    climb_id[17:25] = 1
+    profile_id[17:25] = 2
+    pressure[17:25] = np.linspace(85, 5, 8)
+
+    prof._absorb_apex_unknowns(
+        state,
+        dive_id,
+        climb_id,
+        profile_id,
+        pressure,
+        dt_s=10.0,
+        max_gap_duration=180.0,
+        surface_pressure=2.0,
+    )
+
+    # Up to and including the max → dive
+    assert np.all(state[13:16] == 1)
+    assert np.all(dive_id[13:16] == 1)
+    assert np.all(profile_id[13:16] == 1)
+    # After the max → climb
+    assert np.all(state[16:17] == 2)
+    assert np.all(climb_id[16:17] == 1)
+    assert np.all(profile_id[16:17] == 2)
+
+
+def test_absorb_apex_unknowns_skips_surface_gap() -> None:
+    """An unknown gap that goes shallow (glider surfaced) is NOT absorbed; it
+    must be reachable by the surface classifier instead."""
+    n = 30
+    state = np.full(n, -1, dtype="b")
+    dive_id = np.full(n, -1, dtype="i4")
+    climb_id = np.full(n, -1, dtype="i4")
+    profile_id = np.full(n, -1, dtype="i4")
+    pressure = np.zeros(n)
+
+    state[5:10] = 1
+    pressure[5:10] = np.linspace(10, 80, 5)
+    state[10:13] = 2
+    pressure[10:13] = np.linspace(80, 1, 3)  # climb back to surface
+
+    # Unknown gap at the surface (pressure < surface_pressure)
+    pressure[13:17] = [0.3, 0.5, 0.4, 0.6]
+
+    state[17:22] = 1  # next dive
+    pressure[17:22] = np.linspace(1, 60, 5)
+
+    state_before = state.copy()
+
+    prof._absorb_apex_unknowns(
+        state,
+        dive_id,
+        climb_id,
+        profile_id,
+        pressure,
+        dt_s=10.0,
+        max_gap_duration=180.0,
+        surface_pressure=2.0,
+    )
+
+    # The unknown gap was at the surface, so it stays unknown for assign_surface_state
+    assert np.array_equal(state, state_before)
+
+
+def test_absorb_post_drift_transients_does_not_swallow_real_dive() -> None:
+    """A long dive after a drift is not absorbed (only short transients are)."""
+    n = 60
+    state = np.full(n, -1, dtype="b")
+    drift_mask = np.zeros(n, dtype=bool)
+    dive_id = np.full(n, -1, dtype="i4")
+    climb_id = np.full(n, -1, dtype="i4")
+    profile_id = np.full(n, -1, dtype="i4")
+
+    state[5:15] = 3  # drift
+    drift_mask[5:15] = True
+
+    # A long dive (20 samples = 200 s, longer than max_transient_duration of 180 s)
+    state[15:35] = 1
+    dive_id[15:35] = 1
+    profile_id[15:35] = 1
+
+    state[40:55] = 2  # climb (further out)
+    climb_id[40:55] = 1
+    profile_id[40:55] = 2
+
+    state_before = state.copy()
+
+    prof._absorb_post_drift_transients(
+        state,
+        dive_id,
+        climb_id,
+        profile_id,
+        drift_mask,
+        dt_s=10.0,
+        max_transient_duration=180.0,
+    )
+
+    # Nothing should change because the dive between drift and climb is too long
+    assert np.array_equal(state, state_before)

From 504bb52003dcf2c2844b99e989b9eae3cb84adee Mon Sep 17 00:00:00 2001
From: Jesse Cusack <jesse.cusack@oregonstate.edu>
Date: Thu, 7 May 2026 22:06:04 -0700
Subject: [PATCH 03/10] refine the code

---
 src/glide/profiles.py    | 292 +++++++++--------------------
 tests/test_process_l1.py | 386 ++++++++++++---------------------------
 2 files changed, 205 insertions(+), 473 deletions(-)

diff --git a/src/glide/profiles.py b/src/glide/profiles.py
index 55aa52c..fca2be7 100644
--- a/src/glide/profiles.py
+++ b/src/glide/profiles.py
@@ -18,67 +18,49 @@ def _detect_drift(
 ) -> np.ndarray:
     """Return boolean mask True where glider is drifting at depth.
 
-    Uses x_hover_active from raw flight data if available (reported as 1 when
-    a hover episode starts and 0 when it ends, with -127 as the missing-data
-    sentinel; the most recent reported value is forward-filled to every science
-    timestamp).  Falls back to a rolling-variance detector on the pressure
-    signal: a candidate region must have rolling pressure std <
-    pressure_std_threshold and pressure > surface_pressure sustained for at
-    least min_drift_duration seconds.
+    Uses x_hover_active from raw flight data when present (1=on, 0=off,
+    -127=missing; forward-filled to science timestamps).  Otherwise applies a
+    rolling-variance detector: rolling pressure std < pressure_std_threshold
+    and pressure > surface_pressure sustained for min_drift_duration seconds.
     """
     n = len(pressure)
     drift = np.zeros(n, dtype=bool)
 
     if flt is not None and "x_hover_active" in flt:
-        flt_time = flt.m_present_time.values
-        hover_active = flt.x_hover_active.values
-        # -127 is the no-data sentinel for the signed-byte variable; treat as
-        # missing along with NaN. Forward-fill the most recent {0,1} report.
-        finite = (
-            np.isfinite(hover_active) & np.isfinite(flt_time) & (hover_active != -127)
-        )
-        if finite.any():
-            flt_time = flt_time[finite]
-            hover_active = hover_active[finite]
-            order = np.argsort(flt_time)
-            flt_time = flt_time[order]
-            hover_active = hover_active[order]
-            pos = np.searchsorted(flt_time, time, side="right") - 1
+        ft = flt.m_present_time.values
+        ha = flt.x_hover_active.values
+        keep = np.isfinite(ha) & np.isfinite(ft) & (ha != -127)
+        if keep.any():
+            order = np.argsort(ft[keep])
+            ft, ha = ft[keep][order], ha[keep][order]
+            pos = np.searchsorted(ft, time, side="right") - 1
             in_range = pos >= 0
-            drift[in_range] = hover_active[pos[in_range]] == 1
-            _log.debug(
-                "Drift: %d points from x_hover_active (%d reports)",
-                int(drift.sum()),
-                len(flt_time),
-            )
+            drift[in_range] = ha[pos[in_range]] == 1
+            _log.debug("Drift: %d points from x_hover_active", int(drift.sum()))
             return drift
-        _log.debug("x_hover_active has no finite values; using pressure detector")
+        _log.debug("x_hover_active has no finite values; falling back to pressure")
 
     valid = np.isfinite(pressure)
     if not valid.any():
         return drift
 
-    # Window = min_drift_duration / 4; smaller window reduces edge miss at drift boundaries.
-    # np.convolve mode='same' returns max(n, window) elements, so cap half so window < n.
+    # Window = min_drift_duration / 4 (smaller reduces edge miss). np.convolve
+    # mode='same' returns max(n, window) elements, so cap so window < n.
     half = min((n - 1) // 2, max(2, round(min_drift_duration / 4 / dt_s)))
-    window = 2 * half + 1
-    ones = np.ones(window)
-
+    ones = np.ones(2 * half + 1)
     p_fill = np.where(valid, pressure, 0.0)
-    cnt_w = np.convolve(valid.astype(float), ones, mode="same")
-    sum_w = np.convolve(p_fill, ones, mode="same")
-    sumsq_w = np.convolve(p_fill**2, ones, mode="same")
-
-    safe_cnt = np.where(cnt_w >= 3, cnt_w, 1.0)
-    mean_w = sum_w / safe_cnt
-    var_w = sumsq_w / safe_cnt - mean_w**2
-    rolling_std = np.where(cnt_w >= 3, np.sqrt(np.maximum(0.0, var_w)), np.inf)
+    cnt = np.convolve(valid.astype(float), ones, mode="same")
+    sm = np.convolve(p_fill, ones, mode="same")
+    sq = np.convolve(p_fill**2, ones, mode="same")
+    safe = np.where(cnt >= 3, cnt, 1.0)
+    var = sq / safe - (sm / safe) ** 2
+    rolling_std = np.where(cnt >= 3, np.sqrt(np.maximum(0.0, var)), np.inf)
 
     candidate = (
         valid & (pressure > surface_pressure) & (rolling_std < pressure_std_threshold)
     )
 
-    # Keep only contiguous runs that span at least min_drift_duration
+    # Keep only contiguous runs spanning at least min_drift_duration seconds.
     i = 0
     while i < n:
         if not candidate[i]:
@@ -91,7 +73,7 @@ def _detect_drift(
             drift[i:j] = True
         i = j
 
-    _log.debug("Drift: %d points from pressure-based detector", int(drift.sum()))
+    _log.debug("Drift: %d points from pressure detector", int(drift.sum()))
     return drift
 
 
@@ -105,29 +87,19 @@ def _absorb_apex_unknowns(
     max_gap_duration: float,
     surface_pressure: float = 2.0,
 ) -> None:
-    """Absorb short unknown gaps at the underwater apex of a multi-yo segment.
-
-    profinder occasionally leaves a few samples between dive_end and
-    climb_start unclassified at the peak.  When such a gap is short, sits at
-    depth (max pressure > surface_pressure), and is sandwiched directly
-    between a dive (1) and a climb (2), split it at the pressure maximum:
-    points up to and including the max are folded into the preceding dive,
-    points after into the following climb.
-    """
+    """Split short underwater unknown gaps between a dive and climb at the
+    pressure max: pre-max → dive, post-max → climb.  Skips gaps that go
+    shallow (left for the surface classifier) or are too long."""
     n = len(state)
     unknown = state == -1
     if not unknown.any():
         return
-
     diff = np.diff(unknown.astype(int), prepend=0, append=0)
-    starts = np.where(diff == 1)[0]
-    ends = np.where(diff == -1)[0]
+    starts, ends = np.where(diff == 1)[0], np.where(diff == -1)[0]
     max_samples = max(2, round(max_gap_duration / dt_s))
 
     for s, e in zip(starts, ends):
-        if (e - s) > max_samples:
-            continue
-        if s == 0 or e >= n:
+        if (e - s) > max_samples or s == 0 or e >= n:
             continue
         if state[s - 1] != 1 or state[e] != 2:
             continue
@@ -135,14 +107,10 @@ def _absorb_apex_unknowns(
         finite = np.isfinite(p)
         if not finite.any() or float(np.min(p[finite])) < surface_pressure:
             continue
-
-        local_max = int(np.argmax(np.where(finite, p, -np.inf)))
-        split = s + local_max + 1
-
+        split = s + int(np.argmax(np.where(finite, p, -np.inf))) + 1
         state[s:split] = 1
         dive_id[s:split] = dive_id[s - 1]
         profile_id[s:split] = profile_id[s - 1]
-
         state[split:e] = 2
         climb_id[split:e] = climb_id[e]
         profile_id[split:e] = profile_id[e]
@@ -158,28 +126,19 @@ def _absorb_post_drift_transients(
     max_transient_duration: float,
 ) -> None:
     """Fold brief dive/unknown segments between a drift end and the next climb
-    into that climb.
-
-    Masking the drift period leaves a discontinuous pressure signal across the
-    gap, which occasionally causes profinder to detect a spurious tiny peak
-    just after the drift in addition to the real climb.  This pass extends the
-    climb backward to the drift end if the only intervening states are short
-    dive (1) or unknown (-1) segments.  Anything longer than
-    max_transient_duration past the drift end is left alone.
-    """
+    into that climb.  The drift mask gap can cause profinder to detect a
+    spurious tiny peak; this extends the climb backward to absorb it."""
     n = len(state)
     if not drift_mask.any():
         return
-
-    drift_diff = np.diff(drift_mask.astype(int), prepend=0, append=0)
-    drift_ends = np.where(drift_diff == -1)[0]
+    diff = np.diff(drift_mask.astype(int), prepend=0, append=0)
+    drift_ends = np.where(diff == -1)[0]
     window = max(2, round(max_transient_duration / dt_s))
 
     for de in drift_ends:
         if de >= n:
             continue
-        end = min(de + window, n)
-        in_window = state[de:end]
+        in_window = state[de : min(de + window, n)]
         climb_local = np.where(in_window == 2)[0]
         if len(climb_local) == 0:
             continue
@@ -189,11 +148,9 @@ def _absorb_post_drift_transients(
         intermediate = state[de:first_climb]
         if not np.all((intermediate == 1) | (intermediate == -1)):
             continue
-        cid = climb_id[first_climb]
-        pid = profile_id[first_climb]
         state[de:first_climb] = 2
-        climb_id[de:first_climb] = cid
-        profile_id[de:first_climb] = pid
+        climb_id[de:first_climb] = climb_id[first_climb]
+        profile_id[de:first_climb] = profile_id[first_climb]
         dive_id[de:first_climb] = -1
 
 
@@ -214,30 +171,28 @@ def get_profiles(
     ds : xr.Dataset
         Merged L2 dataset with 'pressure' and 'time' variables.
     shallowest_profile : float
-        Minimum peak pressure (dbar) for a profile to be recognised.
+        Minimum peak pressure (dbar) for a profile to be recognised.  Also
+        used as the inter-profile trough prominence threshold so small stalls
+        within a climb are not mistaken for surfacings.
     min_surface_time : float
-        Minimum time (seconds) between consecutive dive apexes.  Used to
-        compute sample-count thresholds so the detector adapts to the data
-        sampling rate.  A value of ~180 s works for most Slocum deployments.
+        Minimum time (seconds) between consecutive dive apexes (default 180).
     flt : xr.Dataset, optional
-        Raw flight data.  Used to detect drift-at-depth via BAW_HOVER_ACTIVE
-        when present; otherwise pressure-based detection is used.
+        Raw flight data.  Used to detect drift via x_hover_active when
+        present; otherwise the rolling-variance fallback runs.
     min_drift_duration : float
-        Minimum duration (seconds) for a constant-pressure period to be
-        classified as drift (default 600 s).
+        Minimum sustained-low-variance duration (s) for the pressure-based
+        drift detector (default 600 s).
     drift_pressure_std : float
-        Rolling pressure standard deviation threshold (dbar) for pressure-based
-        drift detection (default 2.0 dbar).
+        Rolling pressure std threshold (dbar) for the pressure-based drift
+        detector (default 2.0).
     stall_tolerance : float
-        Maximum time (seconds) the glider can stall or briefly reverse during
-        a dive or climb without profinder terminating the profile (default
-        180 s = 3 min, sized to handle realistic glider stalls during
-        ascent).  Converted to profinder's per-sample `run_length` argument.
+        Max stall/reversal duration (s) tolerated within a profile before
+        profinder terminates it (default 180 s).  Sampling-rate independent
+        — converted to profinder's per-sample `run_length`.
     min_pressure_rate : float
-        Minimum pressure change rate (dbar/s) for a sample to count toward
-        ascent or descent classification (default 0.05 dbar/s ≈ 5 cm/s, well
-        below normal glider vertical speed of 10–20 cm/s).  Converted to
-        profinder's per-sample `min_pressure_change` argument.
+        Min vertical speed (dbar/s) for a sample to count toward
+        ascent/descent (default 0.05 ≈ 5 cm/s, well below normal glider
+        speeds of 10–20 cm/s).  Converted to per-sample `min_pressure_change`.
     """
     raw_diff = np.diff(ds.time.values)
     if np.issubdtype(raw_diff.dtype, np.timedelta64):
@@ -255,9 +210,8 @@ def get_profiles(
         pressure_std_threshold=drift_pressure_std,
     )
 
-    # Mask drift before profile finding so profinder sees a gap across the
-    # drift period; with missing="drop" it will pair the real climb after
-    # drift with the correct dive.
+    # Mask drift before profile finding so profinder pairs the post-drift
+    # climb with the pre-drift dive across the gap (missing="drop").
     pressure_masked = ds.pressure.values.copy().astype(float)
     pressure_masked[drift_mask] = np.nan
 
@@ -268,37 +222,23 @@ def get_profiles(
         "width": max(2, round(20 * fs)),  # ≥20 s half-width detects ~10 m dives
     }
     troughs_kwargs = {
-        # Real inter-profile troughs (surfacings or inter-yo apexes) have a
-        # depth contrast at least as large as a real dive's prominence; a
-        # smaller "trough" is a stall or noise dip within a profile.
+        # Real inter-profile troughs are at least as deep as a real dive's
+        # prominence; smaller "troughs" are stalls within a profile.
         "prominence": shallowest_profile,
         "distance": max(2, round(30 * fs)),
         "width": max(1, round(5 * fs)),
     }
 
-    _log.debug(
-        "fs=%.3f Hz; peaks_kwargs=%s; troughs_kwargs=%s",
-        fs,
-        peaks_kwargs,
-        troughs_kwargs,
-    )
-
-    # Scale stall tolerance and pressure-rate threshold to per-sample units.
-    # These guard the climb/dive run from being terminated by brief stalls
-    # (e.g. glider hangs at depth and drifts back down a few dbar before
-    # resuming the ascent).
-    run_length = max(2, round(stall_tolerance / dt_s))
-    min_pressure_change = float(min_pressure_rate * dt_s)
+    _log.debug("fs=%.3f Hz; peaks=%s; troughs=%s", fs, peaks_kwargs, troughs_kwargs)
 
     profiles = find_profiles(
         pressure_masked,
         peaks_kwargs=peaks_kwargs,
         troughs_kwargs=troughs_kwargs,
-        run_length=run_length,
-        min_pressure_change=min_pressure_change,
+        run_length=max(2, round(stall_tolerance / dt_s)),
+        min_pressure_change=float(min_pressure_rate * dt_s),
         missing="drop",
     )
-
     _log.debug("Found %i profiles", len(profiles))
 
     n = ds.time.size
@@ -307,26 +247,17 @@ def get_profiles(
     profile_id = np.full(n, -1, dtype="i4")
     state = np.full(n, -1, dtype="b")
 
-    dive_counter = 1
-    climb_counter = 1
-    profile_counter = 1
-
-    for prof in profiles:
-        dive_start, dive_end, climb_start, climb_end = prof
-
-        dive_id[dive_start:dive_end] = dive_counter
-        profile_id[dive_start:dive_end] = profile_counter
-        state[dive_start:dive_end] = 1
-        dive_counter += 1
-        profile_counter += 1
-
-        climb_id[climb_start:climb_end] = climb_counter
-        profile_id[climb_start:climb_end] = profile_counter
-        state[climb_start:climb_end] = 2
-        climb_counter += 1
-        profile_counter += 1
-
-    # Override drift points, clearing any profile membership assigned above
+    pid = 1
+    for k, (ds_, de, cs, ce) in enumerate(profiles, start=1):
+        dive_id[ds_:de] = k
+        profile_id[ds_:de] = pid
+        state[ds_:de] = 1
+        climb_id[cs:ce] = k
+        profile_id[cs:ce] = pid + 1
+        state[cs:ce] = 2
+        pid += 2
+
+    # Drift overrides any profile membership profinder may have spanned across
     state[drift_mask] = 3
     dive_id[drift_mask] = -1
     climb_id[drift_mask] = -1
@@ -335,7 +266,6 @@ def get_profiles(
     _absorb_post_drift_transients(
         state, dive_id, climb_id, profile_id, drift_mask, dt_s, min_surface_time
     )
-
     _absorb_apex_unknowns(
         state,
         dive_id,
@@ -360,7 +290,6 @@ def get_profiles(
             valid_min=np.int8(-1),
         ),
     )
-
     return ds
 
 
@@ -372,32 +301,12 @@ def assign_surface_state(
 ) -> xr.Dataset:
     """Assign surface state (0) to unknown points.
 
-    Two checks are applied in sequence:
-
-    1. A point with state == -1 (unknown) is marked as surface if it is within
-       `dt` seconds of a valid GPS fix AND shallower than `surface_pressure`
-       dbar.  The pressure gate prevents the broad GPS time window from
-       reaching into adjacent dives.
-    2. Any remaining contiguous unknown segment is marked as surface unless
-       its pressure record contains a finite value at or above
-       `surface_pressure` (i.e. evidence the glider was deeper).  Missing
-       pressure is the expected state at the surface — the science sensor
-       is often out of water — so all-NaN unknown segments are classified
-       as surface.  This covers the period before the first dive, after the
-       last climb, and any between-profile gap.
-
-    Parameters
-    ----------
-    ds : xr.Dataset
-        Dataset with state and pressure variables from get_profiles.
-    flt : xr.Dataset, optional
-        Raw flight data containing m_gps_lat/lon with valid GPS fix times.
-        If absent, only the pressure-based check runs.
-    dt : float
-        Time threshold in seconds for matching to GPS fixes (default 300).
-    surface_pressure : float
-        Maximum pressure (dbar) for a point to be considered at the surface
-        (default 2.0).
+    Two passes: (1) GPS-proximity — unknowns within `dt` seconds of a valid
+    GPS fix AND shallower than `surface_pressure`; (2) whole-segment — any
+    contiguous unknown run with no finite pressure ≥ `surface_pressure`.
+    Missing pressure is the expected state at the surface (sensor out of
+    water), so all-NaN runs are treated as surface — except when bounded by
+    dive→climb (the underwater apex), which is left unknown.
     """
     state = ds.state.values.copy()
     pressure = ds.pressure.values
@@ -407,54 +316,37 @@ def assign_surface_state(
         gps_valid = np.isfinite(flt.m_gps_lat.values)
         if gps_valid.any():
             gps_times = np.sort(flt.m_present_time.values[gps_valid])
-            unknown_mask = state == -1
-            if unknown_mask.any():
-                unknown_times = time_l2[unknown_mask]
-                pos = np.searchsorted(gps_times, unknown_times)
-                dist_left = np.where(
-                    pos > 0,
-                    unknown_times - gps_times[np.maximum(pos - 1, 0)],
-                    np.inf,
-                )
-                dist_right = np.where(
+            unk = state == -1
+            if unk.any():
+                ut = time_l2[unk]
+                pos = np.searchsorted(gps_times, ut)
+                left = np.where(pos > 0, ut - gps_times[np.maximum(pos - 1, 0)], np.inf)
+                right = np.where(
                     pos < len(gps_times),
-                    gps_times[np.minimum(pos, len(gps_times) - 1)] - unknown_times,
+                    gps_times[np.minimum(pos, len(gps_times) - 1)] - ut,
                     np.inf,
                 )
-                is_near = np.minimum(dist_left, dist_right) <= dt
-                is_shallow = np.isfinite(pressure[unknown_mask]) & (
-                    pressure[unknown_mask] < surface_pressure
-                )
-                state[unknown_mask] = np.where(
-                    is_near & is_shallow, np.int8(0), state[unknown_mask]
-                )
-                _log.debug(
-                    "GPS-proximity surface check: %d points assigned",
-                    int((is_near & is_shallow).sum()),
+                near = np.minimum(left, right) <= dt
+                shallow = np.isfinite(pressure[unk]) & (
+                    pressure[unk] < surface_pressure
                 )
+                state[unk] = np.where(near & shallow, np.int8(0), state[unk])
         else:
             _log.warning("No valid GPS fixes found")
     else:
         _log.warning("No flight data with GPS, skipping GPS-proximity surface check")
 
-    # Whole-segment pressure check: any contiguous unknown run that never
-    # exceeds surface_pressure is the glider sitting at the surface
     unknown = state == -1
     if unknown.any():
         n = len(state)
         diff = np.diff(unknown.astype(int), prepend=0, append=0)
-        starts = np.where(diff == 1)[0]
-        ends = np.where(diff == -1)[0]
-        n_assigned = 0
+        starts, ends = np.where(diff == 1)[0], np.where(diff == -1)[0]
         for s, e in zip(starts, ends):
             p = pressure[s:e]
             finite = np.isfinite(p)
-            # Skip if there is finite evidence the glider was underwater.
             if finite.any() and float(np.max(p[finite])) >= surface_pressure:
                 continue
-            # An all-NaN gap bounded by dive→climb is the underwater apex
-            # that the apex absorber couldn't split because pressure was
-            # missing.  Leave it unknown rather than mislabel as surface.
+            # All-NaN gap bounded by dive→climb is the underwater apex; leave it
             if (
                 not finite.any()
                 and 0 < s
@@ -464,8 +356,6 @@ def assign_surface_state(
             ):
                 continue
             state[s:e] = 0
-            n_assigned += int(e - s)
-        _log.debug("Whole-segment surface check: %d points assigned", n_assigned)
 
     ds["state"] = ("time", state, ds.state.attrs)
     return ds
diff --git a/tests/test_process_l1.py b/tests/test_process_l1.py
index c3fed7d..5db41cc 100644
--- a/tests/test_process_l1.py
+++ b/tests/test_process_l1.py
@@ -1020,360 +1020,202 @@ def test_emit_ioos_profiles_yo_yo_shares_time_uv(tmp_path) -> None:
 
 
 def _make_drift_at_depth_dataset(dt_s: float) -> tuple[xr.Dataset, float, float, float]:
-    """Synthetic glider dataset with one dive-overshoot-drift-climb cycle.
-
-    Returns (ds, t_dive_start, t_drift_start, t_climb_start) in seconds.
-
-    Pressure trace:
-      - Surface at ~0.5 dbar for 120 s
-      - Dive to 105 dbar (target 100, overshoot by 5 dbar) over 1500 s
-      - Overshoot correction: 105→100 dbar over 120 s  ← wrongly identified as
-        the climb before this fix
-      - Drift at 100 ± 2 dbar (sinusoidal wobble, period 300 s) for 3600 s
-      - Climb from 100 to surface over 1500 s
-      - Surface at ~0.5 dbar for 120 s
-    """
-    t_surface = 120.0
-    t_dive = 1500.0
-    t_overshoot = 120.0
-    t_drift = 3600.0
-    t_climb = 1500.0
-
+    """Synthetic surface→dive(105 overshoot)→correction→drift(100±2)→climb→surface
+    cycle.  Returns (ds, t_dive_start, t_drift_start, t_climb_start) in seconds."""
+    t_surface, t_dive, t_overshoot, t_drift, t_climb = (
+        120.0,
+        1500.0,
+        120.0,
+        3600.0,
+        1500.0,
+    )
     t0_dive = t_surface
     t0_overshoot = t0_dive + t_dive
     t0_drift = t0_overshoot + t_overshoot
     t0_climb = t0_drift + t_drift
-    t_total = t0_climb + t_climb + t_surface
-
-    t = np.arange(0.0, t_total, dt_s)
+    t = np.arange(0.0, t0_climb + t_climb + t_surface, dt_s)
     p = np.full(len(t), 0.5)
+    m = (t >= t0_dive) & (t < t0_overshoot)
+    p[m] = 105.0 * (t[m] - t0_dive) / t_dive
+    m = (t >= t0_overshoot) & (t < t0_drift)
+    p[m] = 105.0 - 5.0 * (t[m] - t0_overshoot) / t_overshoot
+    m = (t >= t0_drift) & (t < t0_climb)
+    p[m] = 100.0 + 2.0 * np.sin(2.0 * np.pi * (t[m] - t0_drift) / 300.0)
+    m = (t >= t0_climb) & (t < t0_climb + t_climb)
+    p[m] = 100.0 * (1.0 - (t[m] - t0_climb) / t_climb)
+    return (
+        xr.Dataset({"pressure": ("time", p)}, coords={"time": t}),
+        t0_dive,
+        t0_drift,
+        t0_climb,
+    )
 
-    mask = (t >= t0_dive) & (t < t0_overshoot)
-    p[mask] = 105.0 * (t[mask] - t0_dive) / t_dive
-
-    mask = (t >= t0_overshoot) & (t < t0_drift)
-    p[mask] = 105.0 - 5.0 * (t[mask] - t0_overshoot) / t_overshoot
-
-    mask = (t >= t0_drift) & (t < t0_climb)
-    p[mask] = 100.0 + 2.0 * np.sin(2.0 * np.pi * (t[mask] - t0_drift) / 300.0)
-
-    mask = (t >= t0_climb) & (t < t0_climb + t_climb)
-    p[mask] = 100.0 * (1.0 - (t[mask] - t0_climb) / t_climb)
 
-    ds = xr.Dataset({"pressure": ("time", p)}, coords={"time": t})
-    return ds, t0_dive, t0_drift, t0_climb
+def _make_state_arrays(n: int) -> tuple[np.ndarray, ...]:
+    """Allocate (state, dive_id, climb_id, profile_id) all initialised to -1."""
+    return (
+        np.full(n, -1, dtype="b"),
+        np.full(n, -1, dtype="i4"),
+        np.full(n, -1, dtype="i4"),
+        np.full(n, -1, dtype="i4"),
+    )
 
 
-@pytest.mark.parametrize(
-    "dt_s",
-    [1.0, 15.0],
-    ids=["post_recovery_1s", "realtime_15s"],
-)
+@pytest.mark.parametrize("dt_s", [1.0, 15.0], ids=["post_recovery_1s", "realtime_15s"])
 def test_get_profiles_drift_at_depth(dt_s: float) -> None:
-    """Drift-at-depth is detected and labelled state=3; the real climb after
-    drift must span the full water column rather than being cut off.
-
-    Without drift masking, the overshoot correction (105→100 dbar before the
-    drift plateau) would be mis-identified as the climb portion, leaving the
-    actual ascent unlabelled.
-    """
-    ds, _t_dive, t_drift_start, t_climb_start = _make_drift_at_depth_dataset(dt_s)
-
+    """Drift labelled state=3; post-drift climb spans the full water column
+    rather than being mis-attributed to the brief overshoot correction."""
+    ds, _, t_drift, t_climb = _make_drift_at_depth_dataset(dt_s)
     result = prof.get_profiles(ds, shallowest_profile=5.0, min_surface_time=180.0)
+    state, time = result.state.values, ds.time.values
 
-    state = result.state.values
-    time = ds.time.values
-
-    # Most of the drift plateau must be labelled state=3
-    drift_state = state[(time >= t_drift_start) & (time < t_climb_start)]
-    assert (drift_state == 3).mean() > 0.7, (
-        f"Only {(drift_state == 3).mean():.1%} of drift period is state=3 at dt_s={dt_s}"
-    )
-
-    # Most of the real climb after drift must be labelled state=2
-    climb_state = state[time >= t_climb_start]
-    assert (climb_state == 2).mean() > 0.7, (
-        f"Only {(climb_state == 2).mean():.1%} of real climb is state=2 at dt_s={dt_s}"
-    )
-
-    # The climb profile must span the full water column
-    climb_pressures = ds.pressure.values[state == 2]
-    assert climb_pressures.max() > 90.0, (
-        "Climb profile must include deep data near 100 dbar"
-    )
-    assert climb_pressures.min() < 5.0, "Climb profile must reach the near-surface"
+    drift_state = state[(time >= t_drift) & (time < t_climb)]
+    assert (drift_state == 3).mean() > 0.7
+    assert (state[time >= t_climb] == 2).mean() > 0.7
 
-    # state=3 (drift) must appear in the CF flag_values
+    climb_p = ds.pressure.values[state == 2]
+    assert climb_p.max() > 90.0 and climb_p.min() < 5.0
     assert 3 in result.state.attrs["flag_values"]
 
 
-@pytest.mark.parametrize(
-    "dt_s",
-    [1.0, 15.0],
-    ids=["post_recovery_1s", "realtime_15s"],
-)
+@pytest.mark.parametrize("dt_s", [1.0, 15.0], ids=["post_recovery_1s", "realtime_15s"])
 def test_get_profiles_climb_with_stall(dt_s: float) -> None:
-    """A brief stall during the climb (glider hangs at ~20 dbar and drifts a
-    few dbar back down before resuming the ascent) must not split the climb
-    in two.  The stall_tolerance default rides over the reversal at both
-    sampling rates because it is specified in seconds, not samples."""
-    t_surface = 60.0
-    t_dive = 800.0
-    t_climb_pre_stall = 600.0  # 100 → 20 dbar
-    t_stall = 120.0  # hangs at 20–22 dbar for 2 min (within 3 min tolerance)
-    t_climb_post_stall = 400.0  # 22 → 0 dbar
-    t_total = (
-        t_surface + t_dive + t_climb_pre_stall + t_stall + t_climb_post_stall + 60.0
-    )
-
-    t = np.arange(0.0, t_total, dt_s)
+    """A 2-minute stall (within the 3-minute tolerance) within the climb must
+    not split the climb at either sampling rate."""
+    t_surf, t_dive, t_pre, t_stall, t_post = 60.0, 800.0, 600.0, 120.0, 400.0
+    t0_dive = t_surf
+    t0_pre = t0_dive + t_dive
+    t0_stall = t0_pre + t_pre
+    t0_post = t0_stall + t_stall
+    t0_end = t0_post + t_post
+    t = np.arange(0.0, t0_end + t_surf, dt_s)
     p = np.full(len(t), 0.5)
-
-    t0_dive = t_surface
-    t0_climb_pre = t0_dive + t_dive
-    t0_stall = t0_climb_pre + t_climb_pre_stall
-    t0_climb_post = t0_stall + t_stall
-    t0_end = t0_climb_post + t_climb_post_stall
-
-    mask = (t >= t0_dive) & (t < t0_climb_pre)
-    p[mask] = 100.0 * (t[mask] - t0_dive) / t_dive
-
-    mask = (t >= t0_climb_pre) & (t < t0_stall)
-    p[mask] = 100.0 - 80.0 * (t[mask] - t0_climb_pre) / t_climb_pre_stall
-
-    # Stall: drift slowly back down from 20 to 22 dbar over 2 min
-    mask = (t >= t0_stall) & (t < t0_climb_post)
-    p[mask] = 20.0 + 2.0 * (t[mask] - t0_stall) / t_stall
-
-    mask = (t >= t0_climb_post) & (t < t0_end)
-    p[mask] = 22.0 * (1.0 - (t[mask] - t0_climb_post) / t_climb_post_stall)
-
-    # Leading NaN sidesteps a profinder edge case where valid_idx is undefined
-    # for NaN-free input.
-    p[0] = np.nan
-
+    m = (t >= t0_dive) & (t < t0_pre)
+    p[m] = 100.0 * (t[m] - t0_dive) / t_dive
+    m = (t >= t0_pre) & (t < t0_stall)
+    p[m] = 100.0 - 80.0 * (t[m] - t0_pre) / t_pre
+    m = (t >= t0_stall) & (t < t0_post)  # stall: 20 → 22 dbar over 2 min
+    p[m] = 20.0 + 2.0 * (t[m] - t0_stall) / t_stall
+    m = (t >= t0_post) & (t < t0_end)
+    p[m] = 22.0 * (1.0 - (t[m] - t0_post) / t_post)
+    p[0] = np.nan  # profinder needs NaN to avoid valid_idx edge case
     ds = xr.Dataset({"pressure": ("time", p)}, coords={"time": t})
 
     result = prof.get_profiles(ds, shallowest_profile=5.0, min_surface_time=180.0)
-
-    state = result.state.values
-    # The post-stall climb must be classified as climb, not unknown
-    post_stall_state = state[(t >= t0_climb_post) & (t < t0_end)]
-    assert (post_stall_state == 2).mean() > 0.7, (
-        f"Post-stall climb only {(post_stall_state == 2).mean():.1%} state=2 at dt_s={dt_s}"
-    )
-    # The whole climb (pre-stall + stall + post-stall) is one climb_id
-    climb_ids = np.unique(result.climb_id.values[result.climb_id.values >= 0])
-    assert len(climb_ids) == 1, f"Expected 1 climb, got {len(climb_ids)} at dt_s={dt_s}"
+    assert (result.state.values[(t >= t0_post) & (t < t0_end)] == 2).mean() > 0.7
+    cids = np.unique(result.climb_id.values[result.climb_id.values >= 0])
+    assert len(cids) == 1
 
 
 def test_get_profiles_drift_x_hover_active() -> None:
-    """x_hover_active in raw flight data takes precedence over pressure-based
-    drift detection and marks the exact drift period as state=3.
-
-    The Slocum flight computer reports x_hover_active only on transitions
-    (1 when a hover starts, 0 when it ends, -127 missing otherwise); the
-    detector must forward-fill the most recent {0,1} report.
-    """
-    dt_s = 15.0
-    ds, _t_dive, t_drift_start, t_climb_start = _make_drift_at_depth_dataset(dt_s)
-
+    """x_hover_active takes precedence over pressure-based detection.  Sparse
+    transition reports must be forward-filled and -127 sentinel filtered."""
+    ds, _, t_drift, t_climb = _make_drift_at_depth_dataset(15.0)
     time = ds.time.values
-
-    # Sparse flight reports: 0 at start, 1 when hover begins, 0 when it ends.
-    # Mix in a -127 sentinel to confirm it is filtered out.
-    flt_time = np.array([0.0, 60.0, t_drift_start, t_climb_start])
-    hover_active = np.array([0.0, -127.0, 1.0, 0.0])
-
     flt = xr.Dataset(
         {
-            "m_present_time": ("i", flt_time),
-            "x_hover_active": ("i", hover_active),
+            "m_present_time": ("i", np.array([0.0, 60.0, t_drift, t_climb])),
+            "x_hover_active": ("i", np.array([0.0, -127.0, 1.0, 0.0])),
         }
     )
-
     result = prof.get_profiles(
         ds, shallowest_profile=5.0, min_surface_time=180.0, flt=flt
     )
-
     state = result.state.values
-    drift_mask = (time >= t_drift_start) & (time < t_climb_start)
-
-    # BAW_HOVER_ACTIVE covers the drift period exactly, so detection should be
-    # almost complete (within one 4 s flight sample of each boundary)
-    assert (state[drift_mask] == 3).mean() > 0.9, (
-        "BAW_HOVER_ACTIVE should detect drift with >90% coverage"
-    )
-
-    # Real climb after drift must still be captured
-    climb_state = state[time >= t_climb_start]
-    assert (climb_state == 2).mean() > 0.7
+    assert (state[(time >= t_drift) & (time < t_climb)] == 3).mean() > 0.9
+    assert (state[time >= t_climb] == 2).mean() > 0.7
 
 
 def test_absorb_post_drift_transients() -> None:
-    """Brief dive (state=1) and unknown (state=-1) segments wedged between
-    a drift period and the real climb are absorbed into the climb."""
-    n = 50
-    state = np.full(n, -1, dtype="b")
-    drift_mask = np.zeros(n, dtype=bool)
-    dive_id = np.full(n, -1, dtype="i4")
-    climb_id = np.full(n, -1, dtype="i4")
-    profile_id = np.full(n, -1, dtype="i4")
-
-    state[5:15] = 1  # main dive
+    """Brief dive/unknown segments between drift and climb are folded into climb."""
+    state, dive_id, climb_id, profile_id = _make_state_arrays(50)
+    drift_mask = np.zeros(50, dtype=bool)
+    state[5:15] = 1
     dive_id[5:15] = 1
     profile_id[5:15] = 1
-
-    state[15:30] = 3  # drift
+    state[15:30] = 3
     drift_mask[15:30] = True
-
-    state[30:32] = 1  # spurious brief dive (transient)
+    state[30:32] = 1
     dive_id[30:32] = 2
-    profile_id[30:32] = 2
-
+    profile_id[30:32] = 2  # spurious dive
     # state[32:34] left as -1 (unknown gap)
-
-    state[34:48] = 2  # real climb
+    state[34:48] = 2
     climb_id[34:48] = 1
     profile_id[34:48] = 3
 
     prof._absorb_post_drift_transients(
-        state,
-        dive_id,
-        climb_id,
-        profile_id,
-        drift_mask,
-        dt_s=10.0,
-        max_transient_duration=180.0,
+        state, dive_id, climb_id, profile_id, drift_mask, 10.0, 180.0
     )
-
-    # The transient between drift and climb is now part of the climb
     assert np.all(state[30:34] == 2)
     assert np.all(climb_id[30:34] == 1)
     assert np.all(profile_id[30:34] == 3)
     assert np.all(dive_id[30:34] == -1)
-
-    # Drift, main dive, and real climb are unchanged
-    assert np.all(state[5:15] == 1)
-    assert np.all(state[15:30] == 3)
-    assert np.all(state[34:48] == 2)
+    assert (
+        np.all(state[5:15] == 1)
+        and np.all(state[15:30] == 3)
+        and np.all(state[34:48] == 2)
+    )
 
 
 def test_absorb_apex_unknowns_splits_at_max_pressure() -> None:
-    """A short unknown gap at the underwater apex between a dive and a climb
-    is split at the pressure maximum: pre-max points become dive, post-max
-    points become climb."""
-    n = 30
-    state = np.full(n, -1, dtype="b")
-    dive_id = np.full(n, -1, dtype="i4")
-    climb_id = np.full(n, -1, dtype="i4")
-    profile_id = np.full(n, -1, dtype="i4")
-    pressure = np.zeros(n)
-
+    """Short unknown gap between dive and climb is split at the pressure max."""
+    state, dive_id, climb_id, profile_id = _make_state_arrays(30)
+    pressure = np.zeros(30)
     state[5:13] = 1
     dive_id[5:13] = 1
     profile_id[5:13] = 1
     pressure[5:13] = np.linspace(10, 80, 8)
-
-    # Indices 13–17 unknown, with peak (90 dbar) at index 15
-    pressure[13:17] = [85, 88, 90, 87]
-
+    pressure[13:17] = [85, 88, 90, 87]  # apex at index 15
     state[17:25] = 2
     climb_id[17:25] = 1
     profile_id[17:25] = 2
     pressure[17:25] = np.linspace(85, 5, 8)
 
     prof._absorb_apex_unknowns(
-        state,
-        dive_id,
-        climb_id,
-        profile_id,
-        pressure,
-        dt_s=10.0,
-        max_gap_duration=180.0,
-        surface_pressure=2.0,
+        state, dive_id, climb_id, profile_id, pressure, 10.0, 180.0, 2.0
     )
-
-    # Up to and including the max → dive
-    assert np.all(state[13:16] == 1)
-    assert np.all(dive_id[13:16] == 1)
-    assert np.all(profile_id[13:16] == 1)
-    # After the max → climb
-    assert np.all(state[16:17] == 2)
-    assert np.all(climb_id[16:17] == 1)
-    assert np.all(profile_id[16:17] == 2)
+    assert (
+        np.all(state[13:16] == 1)
+        and np.all(dive_id[13:16] == 1)
+        and np.all(profile_id[13:16] == 1)
+    )
+    assert state[16] == 2 and climb_id[16] == 1 and profile_id[16] == 2
 
 
 def test_absorb_apex_unknowns_skips_surface_gap() -> None:
-    """An unknown gap that goes shallow (glider surfaced) is NOT absorbed; it
-    must be reachable by the surface classifier instead."""
-    n = 30
-    state = np.full(n, -1, dtype="b")
-    dive_id = np.full(n, -1, dtype="i4")
-    climb_id = np.full(n, -1, dtype="i4")
-    profile_id = np.full(n, -1, dtype="i4")
-    pressure = np.zeros(n)
-
+    """A gap that goes shallow (glider surfaced) is left for the surface classifier."""
+    state, dive_id, climb_id, profile_id = _make_state_arrays(30)
+    pressure = np.zeros(30)
     state[5:10] = 1
     pressure[5:10] = np.linspace(10, 80, 5)
     state[10:13] = 2
-    pressure[10:13] = np.linspace(80, 1, 3)  # climb back to surface
-
-    # Unknown gap at the surface (pressure < surface_pressure)
-    pressure[13:17] = [0.3, 0.5, 0.4, 0.6]
-
-    state[17:22] = 1  # next dive
+    pressure[10:13] = np.linspace(80, 1, 3)
+    pressure[13:17] = [0.3, 0.5, 0.4, 0.6]  # shallow unknown gap
+    state[17:22] = 1
     pressure[17:22] = np.linspace(1, 60, 5)
-
-    state_before = state.copy()
+    before = state.copy()
 
     prof._absorb_apex_unknowns(
-        state,
-        dive_id,
-        climb_id,
-        profile_id,
-        pressure,
-        dt_s=10.0,
-        max_gap_duration=180.0,
-        surface_pressure=2.0,
+        state, dive_id, climb_id, profile_id, pressure, 10.0, 180.0, 2.0
     )
-
-    # The unknown gap was at the surface, so it stays unknown for assign_surface_state
-    assert np.array_equal(state, state_before)
+    assert np.array_equal(state, before)
 
 
 def test_absorb_post_drift_transients_does_not_swallow_real_dive() -> None:
-    """A long dive after a drift is not absorbed (only short transients are)."""
-    n = 60
-    state = np.full(n, -1, dtype="b")
-    drift_mask = np.zeros(n, dtype=bool)
-    dive_id = np.full(n, -1, dtype="i4")
-    climb_id = np.full(n, -1, dtype="i4")
-    profile_id = np.full(n, -1, dtype="i4")
-
-    state[5:15] = 3  # drift
+    """A long dive (>max_transient_duration) between drift and climb is not absorbed."""
+    state, dive_id, climb_id, profile_id = _make_state_arrays(60)
+    drift_mask = np.zeros(60, dtype=bool)
+    state[5:15] = 3
     drift_mask[5:15] = True
-
-    # A long dive (20 samples = 200 s, longer than max_transient_duration of 180 s)
     state[15:35] = 1
     dive_id[15:35] = 1
-    profile_id[15:35] = 1
-
-    state[40:55] = 2  # climb (further out)
+    profile_id[15:35] = 1  # 200 s > 180 s
+    state[40:55] = 2
     climb_id[40:55] = 1
     profile_id[40:55] = 2
-
-    state_before = state.copy()
+    before = state.copy()
 
     prof._absorb_post_drift_transients(
-        state,
-        dive_id,
-        climb_id,
-        profile_id,
-        drift_mask,
-        dt_s=10.0,
-        max_transient_duration=180.0,
+        state, dive_id, climb_id, profile_id, drift_mask, 10.0, 180.0
     )
-
-    # Nothing should change because the dive between drift and climb is too long
-    assert np.array_equal(state, state_before)
+    assert np.array_equal(state, before)

From a6aa3348726eddd9b329d3dedc9d5c6cc49e5dcb Mon Sep 17 00:00:00 2001
From: Jesse Cusack <jesse.cusack@oregonstate.edu>
Date: Thu, 7 May 2026 22:37:58 -0700
Subject: [PATCH 04/10] refine more to deal with stalls

---
 src/glide/profiles.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/src/glide/profiles.py b/src/glide/profiles.py
index fca2be7..4b3fc55 100644
--- a/src/glide/profiles.py
+++ b/src/glide/profiles.py
@@ -161,8 +161,6 @@ def get_profiles(
     flt: xr.Dataset | None = None,
     min_drift_duration: float = 600.0,
     drift_pressure_std: float = 2.0,
-    stall_tolerance: float = 180.0,
-    min_pressure_rate: float = 0.05,
 ) -> xr.Dataset:
     """Identify dive, climb, and drift profiles from a pressure time series.
 
@@ -185,14 +183,6 @@ def get_profiles(
     drift_pressure_std : float
         Rolling pressure std threshold (dbar) for the pressure-based drift
         detector (default 2.0).
-    stall_tolerance : float
-        Max stall/reversal duration (s) tolerated within a profile before
-        profinder terminates it (default 180 s).  Sampling-rate independent
-        — converted to profinder's per-sample `run_length`.
-    min_pressure_rate : float
-        Min vertical speed (dbar/s) for a sample to count toward
-        ascent/descent (default 0.05 ≈ 5 cm/s, well below normal glider
-        speeds of 10–20 cm/s).  Converted to per-sample `min_pressure_change`.
     """
     raw_diff = np.diff(ds.time.values)
     if np.issubdtype(raw_diff.dtype, np.timedelta64):
@@ -235,8 +225,6 @@ def get_profiles(
         pressure_masked,
         peaks_kwargs=peaks_kwargs,
         troughs_kwargs=troughs_kwargs,
-        run_length=max(2, round(stall_tolerance / dt_s)),
-        min_pressure_change=float(min_pressure_rate * dt_s),
         missing="drop",
     )
     _log.debug("Found %i profiles", len(profiles))

From 7d86ab0f1df262cc6e4e21427977c47871d304cd Mon Sep 17 00:00:00 2001
From: Jesse Cusack <jesse.cusack@oregonstate.edu>
Date: Thu, 7 May 2026 22:55:41 -0700
Subject: [PATCH 05/10] reduce run length even more by defaul

---
 src/glide/profiles.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/glide/profiles.py b/src/glide/profiles.py
index 4b3fc55..47d2c14 100644
--- a/src/glide/profiles.py
+++ b/src/glide/profiles.py
@@ -225,6 +225,11 @@ def get_profiles(
         pressure_masked,
         peaks_kwargs=peaks_kwargs,
         troughs_kwargs=troughs_kwargs,
+        # Real glider pressure has per-second sensor bobbing (positive Δp every
+        # ~5 samples) that breaks profinder's default 4-sample-all-ascent
+        # confirmation, terminating the climb tens of dbar above the surface.
+        # 2 samples is loose enough to find ascent windows through the bobbing.
+        run_length=2,
         missing="drop",
     )
     _log.debug("Found %i profiles", len(profiles))

From b42d93bc096caa131cba96ede86e8940080378c8 Mon Sep 17 00:00:00 2001
From: Jesse Cusack <jesse.cusack@oregonstate.edu>
Date: Thu, 7 May 2026 23:30:29 -0700
Subject: [PATCH 06/10] more minor edits to refine profile finding

---
 src/glide/profiles.py    | 33 +++++++++++++++++++++------------
 tests/test_process_l1.py | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/src/glide/profiles.py b/src/glide/profiles.py
index 47d2c14..0d8c7fa 100644
--- a/src/glide/profiles.py
+++ b/src/glide/profiles.py
@@ -87,9 +87,10 @@ def _absorb_apex_unknowns(
     max_gap_duration: float,
     surface_pressure: float = 2.0,
 ) -> None:
-    """Split short underwater unknown gaps between a dive and climb at the
-    pressure max: pre-max → dive, post-max → climb.  Skips gaps that go
-    shallow (left for the surface classifier) or are too long."""
+    """Split short underwater unknown gaps between adjacent profiles at the
+    pressure extremum.  Handles dive→climb (split at max: pre→dive, post→climb)
+    and climb→dive yo-tops (split at min: pre→climb, post→dive).  Skips gaps
+    that go shallow (left for the surface classifier) or are too long."""
     n = len(state)
     unknown = state == -1
     if not unknown.any():
@@ -101,19 +102,27 @@ def _absorb_apex_unknowns(
     for s, e in zip(starts, ends):
         if (e - s) > max_samples or s == 0 or e >= n:
             continue
-        if state[s - 1] != 1 or state[e] != 2:
-            continue
+        prev, nxt = state[s - 1], state[e]
         p = pressure[s:e]
         finite = np.isfinite(p)
         if not finite.any() or float(np.min(p[finite])) < surface_pressure:
             continue
-        split = s + int(np.argmax(np.where(finite, p, -np.inf))) + 1
-        state[s:split] = 1
-        dive_id[s:split] = dive_id[s - 1]
-        profile_id[s:split] = profile_id[s - 1]
-        state[split:e] = 2
-        climb_id[split:e] = climb_id[e]
-        profile_id[split:e] = profile_id[e]
+        if prev == 1 and nxt == 2:
+            split = s + int(np.argmax(np.where(finite, p, -np.inf))) + 1
+            state[s:split] = 1
+            dive_id[s:split] = dive_id[s - 1]
+            profile_id[s:split] = profile_id[s - 1]
+            state[split:e] = 2
+            climb_id[split:e] = climb_id[e]
+            profile_id[split:e] = profile_id[e]
+        elif prev == 2 and nxt == 1:
+            split = s + int(np.argmin(np.where(finite, p, np.inf))) + 1
+            state[s:split] = 2
+            climb_id[s:split] = climb_id[s - 1]
+            profile_id[s:split] = profile_id[s - 1]
+            state[split:e] = 1
+            dive_id[split:e] = dive_id[e]
+            profile_id[split:e] = profile_id[e]
 
 
 def _absorb_post_drift_transients(
diff --git a/tests/test_process_l1.py b/tests/test_process_l1.py
index 5db41cc..c8e03f7 100644
--- a/tests/test_process_l1.py
+++ b/tests/test_process_l1.py
@@ -1182,6 +1182,38 @@ def test_absorb_apex_unknowns_splits_at_max_pressure() -> None:
     assert state[16] == 2 and climb_id[16] == 1 and profile_id[16] == 2
 
 
+def test_absorb_apex_unknowns_climb_to_dive_yo_top() -> None:
+    """Short underwater gap between climb→dive (yo-top) is split at the
+    pressure minimum: pre-min → climb, post-min → dive."""
+    state, dive_id, climb_id, profile_id = _make_state_arrays(30)
+    pressure = np.zeros(30)
+    state[5:13] = 2
+    climb_id[5:13] = 1
+    profile_id[5:13] = 1
+    pressure[5:13] = np.linspace(80, 50, 8)
+    pressure[13:17] = [48, 45, 47, 50]  # yo-top min at index 14
+    state[17:25] = 1
+    dive_id[17:25] = 2
+    profile_id[17:25] = 2
+    pressure[17:25] = np.linspace(55, 200, 8)
+
+    prof._absorb_apex_unknowns(
+        state, dive_id, climb_id, profile_id, pressure, 10.0, 180.0, 2.0
+    )
+    # Up to and including the min → climb
+    assert (
+        np.all(state[13:15] == 2)
+        and np.all(climb_id[13:15] == 1)
+        and np.all(profile_id[13:15] == 1)
+    )
+    # After the min → dive
+    assert (
+        np.all(state[15:17] == 1)
+        and np.all(dive_id[15:17] == 2)
+        and np.all(profile_id[15:17] == 2)
+    )
+
+
 def test_absorb_apex_unknowns_skips_surface_gap() -> None:
     """A gap that goes shallow (glider surfaced) is left for the surface classifier."""
     state, dive_id, climb_id, profile_id = _make_state_arrays(30)

From 9aee00ce40e72bb831c970c454315c2af39b78c3 Mon Sep 17 00:00:00 2001
From: Jesse Cusack <jesse.cusack@oregonstate.edu>
Date: Thu, 7 May 2026 23:34:32 -0700
Subject: [PATCH 07/10] reduce code length

---
 src/glide/profiles.py    |  73 +++++++++++--------------
 tests/test_process_l1.py | 113 +++++++++++++++------------------------
 2 files changed, 74 insertions(+), 112 deletions(-)

diff --git a/src/glide/profiles.py b/src/glide/profiles.py
index 0d8c7fa..697c832 100644
--- a/src/glide/profiles.py
+++ b/src/glide/profiles.py
@@ -6,6 +6,14 @@
 
 _log = logging.getLogger(__name__)
 
+_STATE_ATTRS = dict(
+    long_name="Glider state",
+    flag_values=np.array([-1, 0, 1, 2, 3], "b"),
+    flag_meanings="unknown surface dive climb drift",
+    valid_max=np.int8(3),
+    valid_min=np.int8(-1),
+)
+
 
 def _detect_drift(
     pressure: np.ndarray,
@@ -102,27 +110,24 @@ def _absorb_apex_unknowns(
     for s, e in zip(starts, ends):
         if (e - s) > max_samples or s == 0 or e >= n:
             continue
-        prev, nxt = state[s - 1], state[e]
+        prev, nxt = int(state[s - 1]), int(state[e])
+        if {prev, nxt} != {1, 2}:
+            continue
         p = pressure[s:e]
         finite = np.isfinite(p)
         if not finite.any() or float(np.min(p[finite])) < surface_pressure:
             continue
-        if prev == 1 and nxt == 2:
-            split = s + int(np.argmax(np.where(finite, p, -np.inf))) + 1
-            state[s:split] = 1
-            dive_id[s:split] = dive_id[s - 1]
-            profile_id[s:split] = profile_id[s - 1]
-            state[split:e] = 2
-            climb_id[split:e] = climb_id[e]
-            profile_id[split:e] = profile_id[e]
-        elif prev == 2 and nxt == 1:
-            split = s + int(np.argmin(np.where(finite, p, np.inf))) + 1
-            state[s:split] = 2
-            climb_id[s:split] = climb_id[s - 1]
-            profile_id[s:split] = profile_id[s - 1]
-            state[split:e] = 1
-            dive_id[split:e] = dive_id[e]
-            profile_id[split:e] = profile_id[e]
+        # dive→climb splits at max (extremum is positive); climb→dive at min.
+        fill, extreme = (-np.inf, np.argmax) if prev == 1 else (np.inf, np.argmin)
+        split = s + int(extreme(np.where(finite, p, fill))) + 1
+        pre_id = dive_id if prev == 1 else climb_id
+        post_id = climb_id if nxt == 2 else dive_id
+        state[s:split] = prev
+        state[split:e] = nxt
+        pre_id[s:split] = pre_id[s - 1]
+        post_id[split:e] = post_id[e]
+        profile_id[s:split] = profile_id[s - 1]
+        profile_id[split:e] = profile_id[e]
 
 
 def _absorb_post_drift_transients(
@@ -265,33 +270,15 @@ def get_profiles(
     climb_id[drift_mask] = -1
     profile_id[drift_mask] = -1
 
-    _absorb_post_drift_transients(
-        state, dive_id, climb_id, profile_id, drift_mask, dt_s, min_surface_time
-    )
-    _absorb_apex_unknowns(
-        state,
-        dive_id,
-        climb_id,
-        profile_id,
-        ds.pressure.values,
-        dt_s,
-        min_surface_time,
-    )
+    ids = (state, dive_id, climb_id, profile_id)
+    _absorb_post_drift_transients(*ids, drift_mask, dt_s, min_surface_time)
+    _absorb_apex_unknowns(*ids, ds.pressure.values, dt_s, min_surface_time)
 
-    ds["dive_id"] = ("time", dive_id, dict(_FillValue=np.int32(-1)))
-    ds["climb_id"] = ("time", climb_id, dict(_FillValue=np.int32(-1)))
-    ds["profile_id"] = ("time", profile_id, dict(_FillValue=np.int32(-1)))
-    ds["state"] = (
-        "time",
-        state,
-        dict(
-            long_name="Glider state",
-            flag_values=np.array([-1, 0, 1, 2, 3], "b"),
-            flag_meanings="unknown surface dive climb drift",
-            valid_max=np.int8(3),
-            valid_min=np.int8(-1),
-        ),
-    )
+    fill = dict(_FillValue=np.int32(-1))
+    ds["dive_id"] = ("time", dive_id, fill)
+    ds["climb_id"] = ("time", climb_id, fill)
+    ds["profile_id"] = ("time", profile_id, fill)
+    ds["state"] = ("time", state, _STATE_ATTRS)
     return ds
 
 
diff --git a/tests/test_process_l1.py b/tests/test_process_l1.py
index c8e03f7..2d06f5b 100644
--- a/tests/test_process_l1.py
+++ b/tests/test_process_l1.py
@@ -1061,6 +1061,14 @@ def _make_state_arrays(n: int) -> tuple[np.ndarray, ...]:
     )
 
 
+def _set_profile(arrs, slc, state_val, id_val, pid_val):
+    """Set state, the matching dive_id or climb_id, and profile_id over slc."""
+    state, dive_id, climb_id, profile_id = arrs
+    state[slc] = state_val
+    (dive_id if state_val == 1 else climb_id)[slc] = id_val
+    profile_id[slc] = pid_val
+
+
 @pytest.mark.parametrize("dt_s", [1.0, 15.0], ids=["post_recovery_1s", "realtime_15s"])
 def test_get_profiles_drift_at_depth(dt_s: float) -> None:
     """Drift labelled state=3; post-drift climb spans the full water column
@@ -1128,28 +1136,20 @@ def test_get_profiles_drift_x_hover_active() -> None:
 
 def test_absorb_post_drift_transients() -> None:
     """Brief dive/unknown segments between drift and climb are folded into climb."""
-    state, dive_id, climb_id, profile_id = _make_state_arrays(50)
+    arrs = _make_state_arrays(50)
+    state, dive_id, climb_id, profile_id = arrs
     drift_mask = np.zeros(50, dtype=bool)
-    state[5:15] = 1
-    dive_id[5:15] = 1
-    profile_id[5:15] = 1
+    _set_profile(arrs, slice(5, 15), 1, 1, 1)  # main dive
     state[15:30] = 3
-    drift_mask[15:30] = True
-    state[30:32] = 1
-    dive_id[30:32] = 2
-    profile_id[30:32] = 2  # spurious dive
+    drift_mask[15:30] = True  # drift
+    _set_profile(arrs, slice(30, 32), 1, 2, 2)  # spurious dive
     # state[32:34] left as -1 (unknown gap)
-    state[34:48] = 2
-    climb_id[34:48] = 1
-    profile_id[34:48] = 3
+    _set_profile(arrs, slice(34, 48), 2, 1, 3)  # real climb
 
-    prof._absorb_post_drift_transients(
-        state, dive_id, climb_id, profile_id, drift_mask, 10.0, 180.0
-    )
-    assert np.all(state[30:34] == 2)
-    assert np.all(climb_id[30:34] == 1)
-    assert np.all(profile_id[30:34] == 3)
-    assert np.all(dive_id[30:34] == -1)
+    prof._absorb_post_drift_transients(*arrs, drift_mask, 10.0, 180.0)
+
+    assert np.all(state[30:34] == 2) and np.all(climb_id[30:34] == 1)
+    assert np.all(profile_id[30:34] == 3) and np.all(dive_id[30:34] == -1)
     assert (
         np.all(state[5:15] == 1)
         and np.all(state[15:30] == 3)
@@ -1159,64 +1159,46 @@ def test_absorb_post_drift_transients() -> None:
 
 def test_absorb_apex_unknowns_splits_at_max_pressure() -> None:
     """Short unknown gap between dive and climb is split at the pressure max."""
-    state, dive_id, climb_id, profile_id = _make_state_arrays(30)
+    arrs = _make_state_arrays(30)
+    state, dive_id, climb_id, profile_id = arrs
     pressure = np.zeros(30)
-    state[5:13] = 1
-    dive_id[5:13] = 1
-    profile_id[5:13] = 1
+    _set_profile(arrs, slice(5, 13), 1, 1, 1)
     pressure[5:13] = np.linspace(10, 80, 8)
     pressure[13:17] = [85, 88, 90, 87]  # apex at index 15
-    state[17:25] = 2
-    climb_id[17:25] = 1
-    profile_id[17:25] = 2
+    _set_profile(arrs, slice(17, 25), 2, 1, 2)
     pressure[17:25] = np.linspace(85, 5, 8)
 
-    prof._absorb_apex_unknowns(
-        state, dive_id, climb_id, profile_id, pressure, 10.0, 180.0, 2.0
-    )
-    assert (
-        np.all(state[13:16] == 1)
-        and np.all(dive_id[13:16] == 1)
-        and np.all(profile_id[13:16] == 1)
-    )
+    prof._absorb_apex_unknowns(*arrs, pressure, 10.0, 180.0, 2.0)
+
+    assert np.all(state[13:16] == 1) and np.all(dive_id[13:16] == 1)
+    assert np.all(profile_id[13:16] == 1)
     assert state[16] == 2 and climb_id[16] == 1 and profile_id[16] == 2
 
 
 def test_absorb_apex_unknowns_climb_to_dive_yo_top() -> None:
     """Short underwater gap between climb→dive (yo-top) is split at the
     pressure minimum: pre-min → climb, post-min → dive."""
-    state, dive_id, climb_id, profile_id = _make_state_arrays(30)
+    arrs = _make_state_arrays(30)
+    state, dive_id, climb_id, profile_id = arrs
     pressure = np.zeros(30)
-    state[5:13] = 2
-    climb_id[5:13] = 1
-    profile_id[5:13] = 1
+    _set_profile(arrs, slice(5, 13), 2, 1, 1)
     pressure[5:13] = np.linspace(80, 50, 8)
     pressure[13:17] = [48, 45, 47, 50]  # yo-top min at index 14
-    state[17:25] = 1
-    dive_id[17:25] = 2
-    profile_id[17:25] = 2
+    _set_profile(arrs, slice(17, 25), 1, 2, 2)
     pressure[17:25] = np.linspace(55, 200, 8)
 
-    prof._absorb_apex_unknowns(
-        state, dive_id, climb_id, profile_id, pressure, 10.0, 180.0, 2.0
-    )
-    # Up to and including the min → climb
-    assert (
-        np.all(state[13:15] == 2)
-        and np.all(climb_id[13:15] == 1)
-        and np.all(profile_id[13:15] == 1)
-    )
-    # After the min → dive
-    assert (
-        np.all(state[15:17] == 1)
-        and np.all(dive_id[15:17] == 2)
-        and np.all(profile_id[15:17] == 2)
-    )
+    prof._absorb_apex_unknowns(*arrs, pressure, 10.0, 180.0, 2.0)
+
+    assert np.all(state[13:15] == 2) and np.all(climb_id[13:15] == 1)
+    assert np.all(profile_id[13:15] == 1)
+    assert np.all(state[15:17] == 1) and np.all(dive_id[15:17] == 2)
+    assert np.all(profile_id[15:17] == 2)
 
 
 def test_absorb_apex_unknowns_skips_surface_gap() -> None:
     """A gap that goes shallow (glider surfaced) is left for the surface classifier."""
-    state, dive_id, climb_id, profile_id = _make_state_arrays(30)
+    arrs = _make_state_arrays(30)
+    state, _, _, _ = arrs
     pressure = np.zeros(30)
     state[5:10] = 1
     pressure[5:10] = np.linspace(10, 80, 5)
@@ -1227,27 +1209,20 @@ def test_absorb_apex_unknowns_skips_surface_gap() -> None:
     pressure[17:22] = np.linspace(1, 60, 5)
     before = state.copy()
 
-    prof._absorb_apex_unknowns(
-        state, dive_id, climb_id, profile_id, pressure, 10.0, 180.0, 2.0
-    )
+    prof._absorb_apex_unknowns(*arrs, pressure, 10.0, 180.0, 2.0)
     assert np.array_equal(state, before)
 
 
 def test_absorb_post_drift_transients_does_not_swallow_real_dive() -> None:
     """A long dive (>max_transient_duration) between drift and climb is not absorbed."""
-    state, dive_id, climb_id, profile_id = _make_state_arrays(60)
+    arrs = _make_state_arrays(60)
+    state, _, _, _ = arrs
     drift_mask = np.zeros(60, dtype=bool)
     state[5:15] = 3
     drift_mask[5:15] = True
-    state[15:35] = 1
-    dive_id[15:35] = 1
-    profile_id[15:35] = 1  # 200 s > 180 s
-    state[40:55] = 2
-    climb_id[40:55] = 1
-    profile_id[40:55] = 2
+    _set_profile(arrs, slice(15, 35), 1, 1, 1)  # 200 s > 180 s
+    _set_profile(arrs, slice(40, 55), 2, 1, 2)
     before = state.copy()
 
-    prof._absorb_post_drift_transients(
-        state, dive_id, climb_id, profile_id, drift_mask, 10.0, 180.0
-    )
+    prof._absorb_post_drift_transients(*arrs, drift_mask, 10.0, 180.0)
     assert np.array_equal(state, before)

From dfe7dffbebeab535aa5e93466b1bc501cb231d84 Mon Sep 17 00:00:00 2001
From: Jesse Cusack <jesse.cusack@oregonstate.edu>
Date: Thu, 7 May 2026 23:53:34 -0700
Subject: [PATCH 08/10] fix mypy

---
 tests/test_process_l1.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_process_l1.py b/tests/test_process_l1.py
index 2d06f5b..5192919 100644
--- a/tests/test_process_l1.py
+++ b/tests/test_process_l1.py
@@ -1051,7 +1051,9 @@ def _make_drift_at_depth_dataset(dt_s: float) -> tuple[xr.Dataset, float, float,
     )
 
 
-def _make_state_arrays(n: int) -> tuple[np.ndarray, ...]:
+def _make_state_arrays(
+    n: int,
+) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
     """Allocate (state, dive_id, climb_id, profile_id) all initialised to -1."""
     return (
         np.full(n, -1, dtype="b"),

From 51090b1afe9d3ce2f2e5c6e869d228c921239c1c Mon Sep 17 00:00:00 2001
From: Jesse Cusack <jesse.cusack@oregonstate.edu>
Date: Fri, 8 May 2026 00:09:56 -0700
Subject: [PATCH 09/10] fix nan profiles binning issue resulting from weird
 profile finding

---
 src/glide/process_l2.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/glide/process_l2.py b/src/glide/process_l2.py
index 490a0de..20b7b81 100644
--- a/src/glide/process_l2.py
+++ b/src/glide/process_l2.py
@@ -231,6 +231,9 @@ def bin_l2(
 
     for row in profile_indexes:
         profile = ds.isel(i=slice(row[0], row[1]))
+        if not np.isfinite(profile.depth.values).any():
+            _log.debug("Skipping profile [%d:%d] with no finite depth", row[0], row[1])
+            continue
         time = profile.time.values
         idx_mid = np.searchsorted(time, 0.5 * (time[0] + time[-1]))
 

From f6ab61a0d9c88cc0dccb7c04e881c11dc144f340 Mon Sep 17 00:00:00 2001
From: Jesse Cusack <jesse.cusack@oregonstate.edu>
Date: Fri, 8 May 2026 00:44:38 -0700
Subject: [PATCH 10/10] fix real-time drift detection and transients

---
 src/glide/profiles.py    | 60 ++++++++++++++++++++++++++++++++++++++++
 tests/test_process_l1.py | 38 +++++++++++++++++++++++++
 2 files changed, 98 insertions(+)

diff --git a/src/glide/profiles.py b/src/glide/profiles.py
index 697c832..baf6813 100644
--- a/src/glide/profiles.py
+++ b/src/glide/profiles.py
@@ -81,6 +81,29 @@ def _detect_drift(
             drift[i:j] = True
         i = j
 
+    # The rolling-variance window misses the settling phase (overshoot
+    # recovery before, slow ascent start after).  Expand each drift run to
+    # absorb adjacent samples whose pressure is close to the drift mean.
+    # A real climb/dive moves through drift depth quickly so the expansion
+    # stops shortly after crossing it.
+    if drift.any():
+        diff_d = np.diff(drift.astype(int), prepend=0, append=0)
+        run_starts, run_ends = np.where(diff_d == 1)[0], np.where(diff_d == -1)[0]
+        tol = 2 * pressure_std_threshold
+        for s, e in zip(run_starts, run_ends):
+            mean_p = float(np.nanmean(pressure[s:e]))
+            while (
+                s > 0
+                and np.isfinite(pressure[s - 1])
+                and abs(pressure[s - 1] - mean_p) < tol
+            ):
+                s -= 1
+            while (
+                e < n and np.isfinite(pressure[e]) and abs(pressure[e] - mean_p) < tol
+            ):
+                e += 1
+            drift[s:e] = True
+
     _log.debug("Drift: %d points from pressure detector", int(drift.sum()))
     return drift
 
@@ -130,6 +153,42 @@ def _absorb_apex_unknowns(
         profile_id[split:e] = profile_id[e]
 
 
+def _absorb_pre_drift_transients(
+    state: np.ndarray,
+    dive_id: np.ndarray,
+    climb_id: np.ndarray,
+    profile_id: np.ndarray,
+    drift_mask: np.ndarray,
+    dt_s: float,
+    max_transient_duration: float,
+) -> None:
+    """Fold brief climb/unknown segments between a dive end and a drift start
+    into that drift.  This is the dive overshoot recovery: the glider arrives
+    past target, briefly recovers upward, then settles into hover.  Without
+    this pass the recovery becomes a tiny climb profile in L3."""
+    n = len(state)
+    if not drift_mask.any():
+        return
+    diff = np.diff(drift_mask.astype(int), prepend=0, append=0)
+    drift_starts = np.where(diff == 1)[0]
+    window = max(2, round(max_transient_duration / dt_s))
+
+    for ds_ in drift_starts:
+        if ds_ == 0:
+            continue
+        start = max(0, ds_ - window)
+        i = ds_ - 1
+        while i >= start and (state[i] == 2 or state[i] == -1):
+            i -= 1
+        # Only absorb when bounded on the left by a dive within the window
+        if i < start or state[i] != 1:
+            continue
+        state[i + 1 : ds_] = 3
+        dive_id[i + 1 : ds_] = -1
+        climb_id[i + 1 : ds_] = -1
+        profile_id[i + 1 : ds_] = -1
+
+
 def _absorb_post_drift_transients(
     state: np.ndarray,
     dive_id: np.ndarray,
@@ -271,6 +330,7 @@ def get_profiles(
     profile_id[drift_mask] = -1
 
     ids = (state, dive_id, climb_id, profile_id)
+    _absorb_pre_drift_transients(*ids, drift_mask, dt_s, min_surface_time)
     _absorb_post_drift_transients(*ids, drift_mask, dt_s, min_surface_time)
     _absorb_apex_unknowns(*ids, ds.pressure.values, dt_s, min_surface_time)
 
diff --git a/tests/test_process_l1.py b/tests/test_process_l1.py
index 5192919..57b0472 100644
--- a/tests/test_process_l1.py
+++ b/tests/test_process_l1.py
@@ -1215,6 +1215,44 @@ def test_absorb_apex_unknowns_skips_surface_gap() -> None:
     assert np.array_equal(state, before)
 
 
+def test_absorb_pre_drift_transients() -> None:
+    """Brief climb/unknown samples between a dive and a drift (overshoot
+    recovery) are folded into the drift."""
+    arrs = _make_state_arrays(50)
+    state, dive_id, climb_id, profile_id = arrs
+    drift_mask = np.zeros(50, dtype=bool)
+    _set_profile(arrs, slice(5, 15), 1, 1, 1)  # dive (peak overshoot)
+    _set_profile(arrs, slice(15, 18), 2, 1, 2)  # brief recovery climb
+    state[18:20] = -1  # brief unknown
+    state[20:35] = 3
+    drift_mask[20:35] = True  # drift
+    _set_profile(arrs, slice(35, 45), 2, 2, 3)  # post-drift climb
+
+    prof._absorb_pre_drift_transients(*arrs, drift_mask, 10.0, 180.0)
+
+    # The recovery climb and unknown gap are now part of the drift
+    assert np.all(state[15:20] == 3)
+    assert np.all(climb_id[15:20] == -1) and np.all(dive_id[15:20] == -1)
+    assert np.all(profile_id[15:20] == -1)
+    # Surrounding dive and post-drift climb are unchanged
+    assert np.all(state[5:15] == 1) and np.all(state[35:45] == 2)
+
+
+def test_absorb_pre_drift_transients_does_not_swallow_long_climb() -> None:
+    """A long climb (>max_transient_duration) before a drift is not absorbed."""
+    arrs = _make_state_arrays(60)
+    state, _, _, _ = arrs
+    drift_mask = np.zeros(60, dtype=bool)
+    _set_profile(arrs, slice(0, 10), 1, 1, 1)
+    _set_profile(arrs, slice(10, 35), 2, 1, 2)  # 250 s > 180 s
+    state[35:50] = 3
+    drift_mask[35:50] = True
+    before = state.copy()
+
+    prof._absorb_pre_drift_transients(*arrs, drift_mask, 10.0, 180.0)
+    assert np.array_equal(state, before)
+
+
 def test_absorb_post_drift_transients_does_not_swallow_real_dive() -> None:
     """A long dive (>max_transient_duration) between drift and climb is not absorbed."""
     arrs = _make_state_arrays(60)