From 57d1ff184e28df3d3290c246fe65ef81066e5436 Mon Sep 17 00:00:00 2001 From: Jesse Cusack Date: Mon, 1 Jun 2026 21:57:00 -0700 Subject: [PATCH 1/2] lag applied to rbrctd --- src/glide/assets/config.yml | 18 ++++- src/glide/assets/core.yml | 38 +++++++++ src/glide/cli.py | 3 + src/glide/config.py | 3 + src/glide/ctd.py | 120 ++++++++++++++++++++++++++++ tests/test_config.py | 3 + tests/test_ctd.py | 153 ++++++++++++++++++++++++++++++++++++ 7 files changed, 336 insertions(+), 2 deletions(-) create mode 100644 src/glide/ctd.py create mode 100644 tests/test_ctd.py diff --git a/src/glide/assets/config.yml b/src/glide/assets/config.yml index dc24faf..c5e86c1 100644 --- a/src/glide/assets/config.yml +++ b/src/glide/assets/config.yml @@ -48,8 +48,9 @@ netcdf_attributes: # variable in it from the L2 output. include: - flight: true # fin, battpos, heading, ballast_pumped - thermo: true # salinity, density, rho0, SA, CT, N2, depth, z, sound_speed + flight: true # fin, battpos, heading, ballast_pumped + thermo: true # salinity, density, rho0, SA, CT, N2, depth, z, sound_speed + ctd_corrections: true # rbrctd_* and ctd41cp_* raw variables # ============================================================================= @@ -139,6 +140,19 @@ qc: {} l1_variables: {} +# ============================================================================= +# CTD CORRECTIONS +# ============================================================================= +# Sensor-specific lag corrections applied to the CTD temperature before the +# science dataset is merged with flight. Requires +# `include: ctd_corrections: true` so the sensor-native source variables are +# pulled out of the raw science file. + +ctd: + rbrctd: + temperature_lag: 0.9 # seconds; positive means sensor lags water + + # ============================================================================= # MERGED VARIABLES # ============================================================================= diff --git a/src/glide/assets/core.yml b/src/glide/assets/core.yml index 1608d6b..af83788 100644 --- a/src/glide/assets/core.yml +++ b/src/glide/assets/core.yml @@ -540,6 +540,44 @@ suites: units: s-2 observation_type: calculated + # --------------------------------------------------------------------------- + ctd_corrections: # CTD-native variables used by ctd.correct_ctd + # --------------------------------------------------------------------------- + # Used to correct for thermal mass and lag effects in temperature and salinity. + + rbrctd_time: + source: sci_rbrctd_timestamp + drop_from_l2: True + CF: + long_name: RBR CTD native timestamp + standard_name: time + units: seconds since 1970-01-01T00:00:00Z + observation_type: measured + + rbrctd_temperature: + source: sci_rbrctd_temperature_00 + drop_from_l2: True + CF: + long_name: RBR CTD temperature + standard_name: sea_water_temperature + units: celsius + instrument: instrument_ctd + observation_type: measured + valid_min: -5.0 + valid_max: 50.0 + + rbrctd_conductivity: + source: sci_rbrctd_conductivity_00 + drop_from_l2: True + CF: + long_name: RBR CTD conductivity + standard_name: sea_water_electrical_conductivity + units: mS cm-1 + instrument: instrument_ctd + observation_type: measured + valid_min: 0.1 + valid_max: 10.0 + # --------------------------------------------------------------------------- flight_model: # output variables from the steady-state flight model # --------------------------------------------------------------------------- diff --git a/src/glide/cli.py b/src/glide/cli.py index f4c5a46..0dec4fa 100644 --- a/src/glide/cli.py +++ b/src/glide/cli.py @@ -14,6 +14,7 @@ from . import ( ancillery, config, + ctd, gliderdac, hotel, process_l1, @@ -227,6 +228,8 @@ def l2( flt = process_l1.apply_qc(flt, conf) sci = process_l1.apply_qc(sci, conf) + sci = ctd.correct_ctd(sci, conf) + merged = process_l1.merge(flt, sci, conf, "science") merged = process_l1.calculate_thermodynamics(merged, conf) diff --git a/src/glide/config.py b/src/glide/config.py index eda3667..3bd388c 100644 --- a/src/glide/config.py +++ b/src/glide/config.py @@ -19,6 +19,7 @@ "qc", "l1_variables", "merged_variables", + "ctd", ) # Helper functions @@ -202,6 +203,7 @@ def _section(name: str) -> dict: l1_vars = _section("l1_variables") merged_vars = _section("merged_variables") flight_model_cfg = _section("flight") + ctd_cfg = _section("ctd") # Build variable set: core + enabled optional suites variables = _merge_suites(core, suites, include) @@ -244,6 +246,7 @@ def _section(name: str) -> dict: ngdac=ngdac, include=include, flight=flight_model_cfg, + ctd=ctd_cfg, ) return config diff --git a/src/glide/ctd.py b/src/glide/ctd.py new file mode 100644 index 0000000..efa5299 --- /dev/null +++ b/src/glide/ctd.py @@ -0,0 +1,120 @@ +# Applies sensor-specific lag corrections to the CTD temperature, then +# re-interpolates temperature and conductivity from the CTD-native timestamp +# grid back onto the science time grid. + +from __future__ import annotations + +import logging + +import numpy as np +import xarray as xr + +_log = logging.getLogger(__name__) + +# Canonical names of the rbrctd suite variables (after format_l1 renaming). +_RBRCTD_VARS = ( + "rbrctd_time", + "rbrctd_temperature", + "rbrctd_conductivity", +) + +_TS_SENTINEL = 946684800 # 2000-01-01T00:00:00Z + + +def correct_ctd(sci: xr.Dataset, config: dict) -> xr.Dataset: + """Apply CTD lag correction to the science dataset. + + For the RBR Concerto: shift temperature earlier in time on the native + ``rbrctd_time`` grid by ``ctd.rbrctd.temperature_lag`` seconds, then + interpolate the lag-shifted temperature and the unadjusted conductivity + back onto ``sci.time``, overwriting ``temperature`` and ``conductivity``. + + Returns ``sci`` unchanged when the rbrctd variables are not present. + Other CTDs are not yet supported. + """ + if not all(v in sci.variables for v in _RBRCTD_VARS): + _log.debug( + "rbrctd variables not present in science dataset; skipping CTD correction" + ) + return sci + + lag_s = float(((config.get("ctd") or {}).get("rbrctd") or {}).get( + "temperature_lag", 0.9 + )) + + t_native, T, C = _build_native_rbrctd(sci) + if t_native.size < 2: + _log.warning("rbrctd has fewer than 2 valid samples; skipping correction") + return sci + + T_lag = _apply_lag(t_native, T, lag_s) + + sci_t = _time_as_seconds(sci["time"]) + T_on_sci = np.interp(sci_t, t_native, T_lag, left=np.nan, right=np.nan) + C_on_sci = np.interp(sci_t, t_native, C, left=np.nan, right=np.nan) + + sci = _overwrite( + sci, "temperature", T_on_sci, f"lag-corrected (lag={lag_s}s) via ctd.correct_ctd" + ) + sci = _overwrite( + sci, "conductivity", C_on_sci, "interpolated from rbrctd native grid via ctd.correct_ctd" + ) + return sci + + +def _build_native_rbrctd( + sci: xr.Dataset, +) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + """Return (time_s, T, C) on the native rbrctd timestamp grid. + + Drops rows where the native timestamp is missing or below the sentinel, + sorts ascending, and removes duplicate timestamps. + """ + t = np.asarray(sci["rbrctd_time"].values, dtype="f8") + T = np.asarray(sci["rbrctd_temperature"].values, dtype="f8") + C = np.asarray(sci["rbrctd_conductivity"].values, dtype="f8") + + valid = np.isfinite(t) & (t > _TS_SENTINEL) + t, T, C = t[valid], T[valid], C[valid] + + order = np.argsort(t, kind="stable") + t, T, C = t[order], T[order], C[order] + + _, keep = np.unique(t, return_index=True) + keep.sort() + return t[keep], T[keep], C[keep] + + +def _apply_lag(time_s: np.ndarray, T: np.ndarray, lag_s: float) -> np.ndarray: + """Shift ``T`` earlier in time by ``lag_s`` seconds. + + The sensor reports later than the water it samples, so the "true" + temperature at time ``t`` equals the value sampled at ``t + lag``. + Equivalently, interpolate ``T`` from ``time - lag`` back onto ``time``. + """ + if lag_s == 0.0: + return T.copy() + shifted = time_s - lag_s + return np.interp(time_s, shifted, T) + + +def _overwrite( + sci: xr.Dataset, name: str, values: np.ndarray, comment: str +) -> xr.Dataset: + """Overwrite ``sci[name]`` while preserving its CF attrs and appending a comment.""" + if name not in sci.variables: + _log.warning("Cannot overwrite %s: not present in science dataset", name) + return sci + attrs = dict(sci[name].attrs) + existing = attrs.get("comment", "") + attrs["comment"] = f"{existing}; {comment}".lstrip("; ") + sci[name] = (sci[name].dims, values, attrs) + return sci + + +def _time_as_seconds(t: xr.DataArray) -> np.ndarray: + """Return ``t`` as float64 posix seconds regardless of dtype.""" + vals = t.values + if np.issubdtype(vals.dtype, np.datetime64): + return vals.astype("datetime64[ns]").astype("f8") / 1e9 + return np.asarray(vals, dtype="f8") diff --git a/tests/test_config.py b/tests/test_config.py index de41969..67b89e8 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -95,6 +95,7 @@ def test_load_config_exclude_thermo(): qc: {} l1_variables: {} merged_variables: {} + ctd: {} """) try: conf = config.load_config(path) @@ -123,6 +124,7 @@ def test_load_config_qc_override_applied(): max_gap: 900 l1_variables: {} merged_variables: {} + ctd: {} """) try: conf = config.load_config(path) @@ -154,6 +156,7 @@ def test_load_config_companion_variable(): long_name: GPS horizontal dilution of precision units: "1" merged_variables: {} + ctd: {} """) try: conf = config.load_config(path) diff --git a/tests/test_ctd.py b/tests/test_ctd.py new file mode 100644 index 0000000..156331e --- /dev/null +++ b/tests/test_ctd.py @@ -0,0 +1,153 @@ +"""Tests for glide.ctd lag correction.""" + +import numpy as np +import pandas as pd +import xarray as xr + +from glide import ctd + + +def _make_sci(t_native=None, T=None, C=None, sci_time=None): + """Build a minimal science dataset with rbrctd_* plus canonical T and C.""" + n = 100 + if sci_time is None: + sci_time = 1.78e9 + np.arange(n, dtype="f8") # well above sentinel + if t_native is None: + t_native = sci_time + 0.1 + if T is None: + T = 10.0 + np.linspace(0, 5, n).astype("f8") + if C is None: + C = 35.0 + np.linspace(0, 0.5, n).astype("f8") + + return xr.Dataset( + data_vars=dict( + rbrctd_time=("time", t_native), + rbrctd_temperature=("time", T), + rbrctd_conductivity=("time", C), + temperature=("time", T.copy()), + conductivity=("time", C.copy()), + ), + coords=dict(time=("time", pd.to_datetime(sci_time, unit="s"))), + ) + + +def _cfg(lag=0.9): + return {"ctd": {"rbrctd": {"temperature_lag": lag}}} + + +def test_correct_ctd_no_rbrctd_returns_unchanged(): + sci = _make_sci().drop_vars( + ["rbrctd_time", "rbrctd_temperature", "rbrctd_conductivity"] + ) + sci_in = sci.copy(deep=True) + out = ctd.correct_ctd(sci, _cfg()) + xr.testing.assert_identical(out.temperature, sci_in.temperature) + xr.testing.assert_identical(out.conductivity, sci_in.conductivity) + + +def test_apply_lag_zero_is_identity(): + t = np.arange(10, dtype="f8") + T = np.sin(t).astype("f8") + np.testing.assert_array_equal(ctd._apply_lag(t, T, 0.0), T) + + +def test_apply_lag_on_linear_ramp_shifts_in_time(): + # Linear ramp T = t + 100. Shifting time by -lag and interpolating back + # gives T(t - lag) = t - lag + 100 well inside the grid; edges flatten + # because of np.interp's left/right clamping. + t = np.arange(20, dtype="f8") + T = t + 100.0 + lag = 0.5 + out = ctd._apply_lag(t, T, lag) + np.testing.assert_allclose(out[5:-5], T[5:-5] + lag, atol=1e-10) + + +def test_build_native_rbrctd_drops_sentinel_and_sorts(): + sci_time = 1.78e9 + np.array([2.0, 0.0, 1.0]) + t_native = np.array([1.0, 1.78e9 + 5.0, 1.78e9 + 6.0]) # first is sentinel + sci = xr.Dataset( + data_vars=dict( + rbrctd_time=("time", t_native), + rbrctd_temperature=("time", np.array([10.0, 11.0, 12.0])), + rbrctd_conductivity=("time", np.array([30.0, 31.0, 32.0])), + ), + coords=dict(time=("time", pd.to_datetime(sci_time, unit="s"))), + ) + t, T, C = ctd._build_native_rbrctd(sci) + assert t.size == 2 + np.testing.assert_array_equal(t, [1.78e9 + 5.0, 1.78e9 + 6.0]) + np.testing.assert_array_equal(T, [11.0, 12.0]) + np.testing.assert_array_equal(C, [31.0, 32.0]) + + +def test_build_native_rbrctd_dedup_on_time(): + sci_time = 1.78e9 + np.arange(3, dtype="f8") + t_native = 1.78e9 + np.array([10.0, 10.0, 11.0]) + sci = xr.Dataset( + data_vars=dict( + rbrctd_time=("time", t_native), + rbrctd_temperature=("time", np.array([10.0, 99.0, 11.0])), + rbrctd_conductivity=("time", np.array([30.0, 31.0, 32.0])), + ), + coords=dict(time=("time", pd.to_datetime(sci_time, unit="s"))), + ) + t, _, _ = ctd._build_native_rbrctd(sci) + assert t.size == 2 # duplicate dropped + + +def test_correct_ctd_replaces_temperature_with_lag_shift(): + # On the native grid T = T0 + slope*(t - t0). After a lag shift by `lag` + # seconds, the value at time t should equal T sampled at (t + lag). + n = 100 + t_native = 1.78e9 + np.arange(n, dtype="f8") + sci_time = t_native.copy() + slope = 0.1 + T = 10.0 + slope * np.arange(n) + sci = _make_sci(t_native=t_native, T=T, sci_time=sci_time) + + out = ctd.correct_ctd(sci, _cfg(lag=0.9)) + + interior = slice(10, -10) + np.testing.assert_allclose( + out.temperature.values[interior], T[interior] + slope * 0.9, atol=1e-9 + ) + + +def test_correct_ctd_replaces_conductivity_unadjusted(): + # Conductivity is reinterpolated from the native grid but NOT lag-shifted. + # With sci_time == t_native, the values should round-trip unchanged. + n = 100 + t_native = 1.78e9 + np.arange(n, dtype="f8") + sci_time = t_native.copy() + C = 35.0 + 0.01 * np.arange(n) + sci = _make_sci(t_native=t_native, C=C, sci_time=sci_time) + + out = ctd.correct_ctd(sci, _cfg(lag=0.9)) + + np.testing.assert_allclose(out.conductivity.values, C, atol=1e-12) + + +def test_correct_ctd_preserves_temperature_attrs(): + sci = _make_sci() + sci["temperature"].attrs = { + "long_name": "Temperature", + "units": "celsius", + "valid_min": -5.0, + "valid_max": 50.0, + } + out = ctd.correct_ctd(sci, _cfg()) + assert out.temperature.attrs["long_name"] == "Temperature" + assert out.temperature.attrs["units"] == "celsius" + assert out.temperature.attrs["valid_min"] == -5.0 + assert "ctd.correct_ctd" in out.temperature.attrs["comment"] + + +def test_correct_ctd_too_few_samples_returns_unchanged(): + sci = _make_sci() + bad = np.full_like(sci["rbrctd_time"].values, np.nan) + bad[0] = 1.78e9 + 5.0 + sci["rbrctd_time"] = ("time", bad) + sci_in = sci.copy(deep=True) + out = ctd.correct_ctd(sci, _cfg()) + xr.testing.assert_identical(out.temperature, sci_in.temperature) + xr.testing.assert_identical(out.conductivity, sci_in.conductivity) From aed1b5426c12ee7300b2423e3ad8d9531203a375 Mon Sep 17 00:00:00 2001 From: Jesse Cusack Date: Mon, 1 Jun 2026 22:00:08 -0700 Subject: [PATCH 2/2] fix formatting --- src/glide/ctd.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/glide/ctd.py b/src/glide/ctd.py index efa5299..a3d0f29 100644 --- a/src/glide/ctd.py +++ b/src/glide/ctd.py @@ -38,9 +38,9 @@ def correct_ctd(sci: xr.Dataset, config: dict) -> xr.Dataset: ) return sci - lag_s = float(((config.get("ctd") or {}).get("rbrctd") or {}).get( - "temperature_lag", 0.9 - )) + lag_s = float( + ((config.get("ctd") or {}).get("rbrctd") or {}).get("temperature_lag", 0.9) + ) t_native, T, C = _build_native_rbrctd(sci) if t_native.size < 2: @@ -54,10 +54,16 @@ def correct_ctd(sci: xr.Dataset, config: dict) -> xr.Dataset: C_on_sci = np.interp(sci_t, t_native, C, left=np.nan, right=np.nan) sci = _overwrite( - sci, "temperature", T_on_sci, f"lag-corrected (lag={lag_s}s) via ctd.correct_ctd" + sci, + "temperature", + T_on_sci, + f"lag-corrected (lag={lag_s}s) via ctd.correct_ctd", ) sci = _overwrite( - sci, "conductivity", C_on_sci, "interpolated from rbrctd native grid via ctd.correct_ctd" + sci, + "conductivity", + C_on_sci, + "interpolated from rbrctd native grid via ctd.correct_ctd", ) return sci