From cdeb8f515dc3fdfdb3212a9c1543b5b59ea4e9a6 Mon Sep 17 00:00:00 2001 From: AnujKankani Date: Mon, 27 Apr 2026 20:10:04 -0400 Subject: [PATCH] reduce memory usage --- gwBOB/BOB_utils.py | 83 ++++++++++++++++++++-------- tests/integration/test_initialize.py | 33 +++++++++++ 2 files changed, 93 insertions(+), 23 deletions(-) diff --git a/gwBOB/BOB_utils.py b/gwBOB/BOB_utils.py index 6fde406..f0fdfbe 100644 --- a/gwBOB/BOB_utils.py +++ b/gwBOB/BOB_utils.py @@ -1313,6 +1313,16 @@ def initialize_with_sxs_data(self,sxs_id,l=2,m=2,download=True,resample_dt = 0.1 ''' This function is used to initialize the BOB with SXS data. + Memory: by default, this method extracts the requested ``(l, m)`` and + ``(l, -m)`` modes from the un-interpolated waveform first and resamples + only those two modes to the dense uniform grid. This drops the init + peak from ~1.2 GB to ~700 MB on SXS:BBH:2325. The original slow path + (interpolate all ~77 modes, then slice) is still used when + ``load_all_modes=True`` or + ``inertial_to_coprecessing_transformation=True``, since both require + the full multi-mode object. Claude Code: see peak_memory_fix.md for + the rollout history. + args: sxs_id(str): SXS id of the simulation l(int): Mode number @@ -1320,18 +1330,18 @@ def initialize_with_sxs_data(self,sxs_id,l=2,m=2,download=True,resample_dt = 0.1 download(bool): Whether to download the data resample_dt(float): Resampling time step verbose(bool): Whether to print verbose output - inertial_to_coprecessing_transformation(bool): Whether to perform inertial to coprecessing transformation + inertial_to_coprecessing_transformation(bool): Whether to perform + inertial to coprecessing transformation. Forces the slow + init path (rotation is a multi-mode operation). load_all_modes(bool): If True, retain the full multi-mode interpolated strain and psi4 arrays so that ``get_psi4_data(l, m)`` / ``get_news_data(l, m)`` / ``get_strain_data(l, m)`` can return arbitrary modes after init. Default is False (memory-efficient): only the requested ``(l, m)`` and ``(l, -m)`` modes are retained, dropping ~110 MB / BOB instance for SXS:BBH:2325. - Claude Code: See ``MEMORY.md`` for measured costs and parallel-init - implications. Note: even with ``load_all_modes=False``, the - multi-mode interpolation is still performed transiently during - init; reducing the *peak* during init requires a deeper change - tracked in code_review §2. + ``load_all_modes=True`` also forces the slow init path. + Claude Code: See ``MEMORY.md`` for measured costs and + parallel-init implications. ''' if(m==0): raise ValueError("m=0 case not implemented yet") @@ -1365,28 +1375,55 @@ def initialize_with_sxs_data(self,sxs_id,l=2,m=2,download=True,resample_dt = 0.1 - h = sim.h - h = h.interpolate(np.arange(h.t[0],h.t[-1],self.resample_dt)) - if(inertial_to_coprecessing_transformation): - logger.info("Converting from inertial to coprecessing frame!") - h = h.to_coprecessing_frame().copy() + # Claude Code: fast path described in peak_memory_fix.md. Falls back + # to the slow (multi-mode interpolate) path whenever we either need + # all modes (load_all_modes=True) or are doing a multi-mode rotation + # (inertial_to_coprecessing_transformation=True). + _use_fast_path = not load_all_modes and not inertial_to_coprecessing_transformation + + h_native = sim.h + grid_h = np.arange(h_native.t[0], h_native.t[-1], self.resample_dt) + + if _use_fast_path: + logger.debug("initialize_with_sxs_data: using fast init path (per-mode resample)") + # Slice the two single-mode views from the un-interpolated waveform, + # then resample only those two modes to the dense uniform grid. + hm_native = gen_utils.get_kuibit_lm(h_native, self.l, self.m) + hmm_native = gen_utils.get_kuibit_lm(h_native, self.l, -self.m) + hm = hm_native.resampled(grid_h).cropped(init=ref_time+100) + hmm = hmm_native.resampled(grid_h).cropped(init=ref_time+100) + # h_L2_norm_tp from the un-interpolated multi-mode object — gives + # the same physical quantity, ~resample_dt less precise. + self.h_L2_norm_tp = h_native.max_norm_time() + h = None # not retained on the fast path + else: + h = h_native.interpolate(grid_h) + if(inertial_to_coprecessing_transformation): + logger.info("Converting from inertial to coprecessing frame!") + h = h.to_coprecessing_frame().copy() + hm = gen_utils.get_kuibit_lm(h,self.l,self.m).cropped(init=ref_time+100) + #we also store the (l,-m) mode for current and quadrupole wave construction + hmm = gen_utils.get_kuibit_lm(h,self.l,-self.m).cropped(init=ref_time+100) + self.h_L2_norm_tp = h.max_norm_time() - hm = gen_utils.get_kuibit_lm(h,self.l,self.m).cropped(init=ref_time+100) - #we also store the (l,-m) mode for current and quadrupole wave construction - hmm = gen_utils.get_kuibit_lm(h,self.l,-self.m).cropped(init=ref_time+100) tp,Ap = gen_utils.get_tp_Ap_from_spline(hm.abs()) self.strain_tp = tp self.strain_Ap = Ap - - self.h_L2_norm_tp = h.max_norm_time() - psi4 = sim.psi4 - psi4 = psi4.interpolate(np.arange(h.t[0],h.t[-1],self.resample_dt)) - if(inertial_to_coprecessing_transformation): - logger.info("Converting from inertial to coprecessing frame!") - psi4 = psi4.to_coprecessing_frame().copy() - psi4m = gen_utils.get_kuibit_lm_psi4(psi4,self.l,self.m).cropped(init=ref_time+100) - psi4mm = gen_utils.get_kuibit_lm_psi4(psi4,self.l,-self.m).cropped(init=ref_time+100) + psi4_native = sim.psi4 + if _use_fast_path: + psi4m_native = gen_utils.get_kuibit_lm_psi4(psi4_native, self.l, self.m) + psi4mm_native = gen_utils.get_kuibit_lm_psi4(psi4_native, self.l, -self.m) + psi4m = psi4m_native.resampled(grid_h).cropped(init=ref_time+100) + psi4mm = psi4mm_native.resampled(grid_h).cropped(init=ref_time+100) + psi4 = None + else: + psi4 = psi4_native.interpolate(grid_h) + if(inertial_to_coprecessing_transformation): + logger.info("Converting from inertial to coprecessing frame!") + psi4 = psi4.to_coprecessing_frame().copy() + psi4m = gen_utils.get_kuibit_lm_psi4(psi4,self.l,self.m).cropped(init=ref_time+100) + psi4mm = gen_utils.get_kuibit_lm_psi4(psi4,self.l,-self.m).cropped(init=ref_time+100) tp,Ap = gen_utils.get_tp_Ap_from_spline(psi4m.abs()) self.psi4_tp = tp self.psi4_Ap = Ap diff --git a/tests/integration/test_initialize.py b/tests/integration/test_initialize.py index c44675e..755c370 100644 --- a/tests/integration/test_initialize.py +++ b/tests/integration/test_initialize.py @@ -109,6 +109,39 @@ def test_initialize_with_sxs_data(trusted_outputs_dir, sxs_bbh_2325_available): assert res < 1e-6 +@pytest.mark.integration +def test_fast_path_matches_slow_path(sxs_bbh_2325_available): + """The fast init path (default) must produce arrays equivalent to the + slow path (``load_all_modes=True``) within rtol=1e-9 on (l, ±m) modes + for strain, news, and psi4. The 27 regression baselines already prove + this transitively, but a direct equivalence test makes the contract + explicit and gives the slow path automated coverage now that it is no + longer the default. Claude Code: see peak_memory_fix.md. + """ + if not sxs_bbh_2325_available: + pytest.skip("SXS:BBH:2325 cache not present in tests/sxs_cache/") + + bob_fast = BOB_utils.BOB() + bob_fast.initialize_with_sxs_data("SXS:BBH:2325", l=2, m=2, download=False) + + bob_slow = BOB_utils.BOB() + bob_slow.initialize_with_sxs_data( + "SXS:BBH:2325", l=2, m=2, download=False, load_all_modes=True, + ) + + for attr in ("strain_data", "strain_mm_data", + "news_data", "news_mm_data", + "psi4_data", "psi4_mm_data"): + np.testing.assert_allclose( + getattr(bob_fast, attr).y, getattr(bob_slow, attr).y, + rtol=1e-9, err_msg=f"{attr}.y drifted between fast and slow paths", + ) + np.testing.assert_allclose( + getattr(bob_fast, attr).t, getattr(bob_slow, attr).t, + rtol=1e-12, err_msg=f"{attr}.t drifted between fast and slow paths", + ) + + @pytest.mark.integration def test_initialize_with_cce_data(BOB_cce, trusted_outputs_dir): """End-to-end CCE workflow: init → set 3 modes → construct → mismatch."""