From cdeb8f515dc3fdfdb3212a9c1543b5b59ea4e9a6 Mon Sep 17 00:00:00 2001
From: AnujKankani <kankania00@gmail.com>
Date: Mon, 27 Apr 2026 20:10:04 -0400
Subject: [PATCH] reduce memory usage

---
 gwBOB/BOB_utils.py                   | 83 ++++++++++++++++++++--------
 tests/integration/test_initialize.py | 33 +++++++++++
 2 files changed, 93 insertions(+), 23 deletions(-)

diff --git a/gwBOB/BOB_utils.py b/gwBOB/BOB_utils.py
index 6fde406..f0fdfbe 100644
--- a/gwBOB/BOB_utils.py
+++ b/gwBOB/BOB_utils.py
@@ -1313,6 +1313,16 @@ def initialize_with_sxs_data(self,sxs_id,l=2,m=2,download=True,resample_dt = 0.1
         '''
         This function is used to initialize the BOB with SXS data.
 
+        Memory: by default, this method extracts the requested ``(l, m)`` and
+        ``(l, -m)`` modes from the un-interpolated waveform first and resamples
+        only those two modes to the dense uniform grid. This drops the init
+        peak from ~1.2 GB to ~700 MB on SXS:BBH:2325. The original slow path
+        (interpolate all ~77 modes, then slice) is still used when
+        ``load_all_modes=True`` or
+        ``inertial_to_coprecessing_transformation=True``, since both require
+        the full multi-mode object. Claude Code: see peak_memory_fix.md for
+        the rollout history.
+
         args:
             sxs_id(str): SXS id of the simulation
             l(int): Mode number
@@ -1320,18 +1330,18 @@ def initialize_with_sxs_data(self,sxs_id,l=2,m=2,download=True,resample_dt = 0.1
             download(bool): Whether to download the data
             resample_dt(float): Resampling time step
             verbose(bool): Whether to print verbose output
-            inertial_to_coprecessing_transformation(bool): Whether to perform inertial to coprecessing transformation
+            inertial_to_coprecessing_transformation(bool): Whether to perform
+                inertial to coprecessing transformation. Forces the slow
+                init path (rotation is a multi-mode operation).
             load_all_modes(bool): If True, retain the full multi-mode interpolated
                 strain and psi4 arrays so that ``get_psi4_data(l, m)`` /
                 ``get_news_data(l, m)`` / ``get_strain_data(l, m)`` can return
                 arbitrary modes after init. Default is False (memory-efficient):
                 only the requested ``(l, m)`` and ``(l, -m)`` modes are
                 retained, dropping ~110 MB / BOB instance for SXS:BBH:2325.
-                Claude Code: See ``MEMORY.md`` for measured costs and parallel-init
-                implications. Note: even with ``load_all_modes=False``, the
-                multi-mode interpolation is still performed transiently during
-                init; reducing the *peak* during init requires a deeper change
-                tracked in code_review §2.
+                ``load_all_modes=True`` also forces the slow init path.
+                Claude Code: See ``MEMORY.md`` for measured costs and
+                parallel-init implications.
         '''
         if(m==0):
             raise ValueError("m=0 case not implemented yet")
@@ -1365,28 +1375,55 @@ def initialize_with_sxs_data(self,sxs_id,l=2,m=2,download=True,resample_dt = 0.1
 
         
 
-        h = sim.h
-        h = h.interpolate(np.arange(h.t[0],h.t[-1],self.resample_dt))
-        if(inertial_to_coprecessing_transformation):
-            logger.info("Converting from inertial to coprecessing frame!")
-            h = h.to_coprecessing_frame().copy()
+        # Claude Code: fast path described in peak_memory_fix.md. Falls back
+        # to the slow (multi-mode interpolate) path whenever we either need
+        # all modes (load_all_modes=True) or are doing a multi-mode rotation
+        # (inertial_to_coprecessing_transformation=True).
+        _use_fast_path = not load_all_modes and not inertial_to_coprecessing_transformation
+
+        h_native = sim.h
+        grid_h = np.arange(h_native.t[0], h_native.t[-1], self.resample_dt)
+
+        if _use_fast_path:
+            logger.debug("initialize_with_sxs_data: using fast init path (per-mode resample)")
+            # Slice the two single-mode views from the un-interpolated waveform,
+            # then resample only those two modes to the dense uniform grid.
+            hm_native  = gen_utils.get_kuibit_lm(h_native, self.l,  self.m)
+            hmm_native = gen_utils.get_kuibit_lm(h_native, self.l, -self.m)
+            hm  = hm_native.resampled(grid_h).cropped(init=ref_time+100)
+            hmm = hmm_native.resampled(grid_h).cropped(init=ref_time+100)
+            # h_L2_norm_tp from the un-interpolated multi-mode object — gives
+            # the same physical quantity, ~resample_dt less precise.
+            self.h_L2_norm_tp = h_native.max_norm_time()
+            h = None  # not retained on the fast path
+        else:
+            h = h_native.interpolate(grid_h)
+            if(inertial_to_coprecessing_transformation):
+                logger.info("Converting from inertial to coprecessing frame!")
+                h = h.to_coprecessing_frame().copy()
+            hm = gen_utils.get_kuibit_lm(h,self.l,self.m).cropped(init=ref_time+100)
+            #we also store the (l,-m) mode for current and quadrupole wave construction
+            hmm = gen_utils.get_kuibit_lm(h,self.l,-self.m).cropped(init=ref_time+100)
+            self.h_L2_norm_tp = h.max_norm_time()
 
-        hm = gen_utils.get_kuibit_lm(h,self.l,self.m).cropped(init=ref_time+100)
-        #we also store the (l,-m) mode for current and quadrupole wave construction
-        hmm = gen_utils.get_kuibit_lm(h,self.l,-self.m).cropped(init=ref_time+100)
         tp,Ap = gen_utils.get_tp_Ap_from_spline(hm.abs())
         self.strain_tp = tp
         self.strain_Ap = Ap
-        
-        self.h_L2_norm_tp = h.max_norm_time()
 
-        psi4 = sim.psi4
-        psi4 = psi4.interpolate(np.arange(h.t[0],h.t[-1],self.resample_dt))
-        if(inertial_to_coprecessing_transformation):
-            logger.info("Converting from inertial to coprecessing frame!")
-            psi4 = psi4.to_coprecessing_frame().copy()
-        psi4m = gen_utils.get_kuibit_lm_psi4(psi4,self.l,self.m).cropped(init=ref_time+100)
-        psi4mm = gen_utils.get_kuibit_lm_psi4(psi4,self.l,-self.m).cropped(init=ref_time+100)
+        psi4_native = sim.psi4
+        if _use_fast_path:
+            psi4m_native  = gen_utils.get_kuibit_lm_psi4(psi4_native, self.l,  self.m)
+            psi4mm_native = gen_utils.get_kuibit_lm_psi4(psi4_native, self.l, -self.m)
+            psi4m  = psi4m_native.resampled(grid_h).cropped(init=ref_time+100)
+            psi4mm = psi4mm_native.resampled(grid_h).cropped(init=ref_time+100)
+            psi4 = None
+        else:
+            psi4 = psi4_native.interpolate(grid_h)
+            if(inertial_to_coprecessing_transformation):
+                logger.info("Converting from inertial to coprecessing frame!")
+                psi4 = psi4.to_coprecessing_frame().copy()
+            psi4m = gen_utils.get_kuibit_lm_psi4(psi4,self.l,self.m).cropped(init=ref_time+100)
+            psi4mm = gen_utils.get_kuibit_lm_psi4(psi4,self.l,-self.m).cropped(init=ref_time+100)
         tp,Ap = gen_utils.get_tp_Ap_from_spline(psi4m.abs())
         self.psi4_tp = tp
         self.psi4_Ap = Ap
diff --git a/tests/integration/test_initialize.py b/tests/integration/test_initialize.py
index c44675e..755c370 100644
--- a/tests/integration/test_initialize.py
+++ b/tests/integration/test_initialize.py
@@ -109,6 +109,39 @@ def test_initialize_with_sxs_data(trusted_outputs_dir, sxs_bbh_2325_available):
         assert res < 1e-6
 
 
+@pytest.mark.integration
+def test_fast_path_matches_slow_path(sxs_bbh_2325_available):
+    """The fast init path (default) must produce arrays equivalent to the
+    slow path (``load_all_modes=True``) within rtol=1e-9 on (l, ±m) modes
+    for strain, news, and psi4. The 27 regression baselines already prove
+    this transitively, but a direct equivalence test makes the contract
+    explicit and gives the slow path automated coverage now that it is no
+    longer the default. Claude Code: see peak_memory_fix.md.
+    """
+    if not sxs_bbh_2325_available:
+        pytest.skip("SXS:BBH:2325 cache not present in tests/sxs_cache/")
+
+    bob_fast = BOB_utils.BOB()
+    bob_fast.initialize_with_sxs_data("SXS:BBH:2325", l=2, m=2, download=False)
+
+    bob_slow = BOB_utils.BOB()
+    bob_slow.initialize_with_sxs_data(
+        "SXS:BBH:2325", l=2, m=2, download=False, load_all_modes=True,
+    )
+
+    for attr in ("strain_data", "strain_mm_data",
+                 "news_data",   "news_mm_data",
+                 "psi4_data",   "psi4_mm_data"):
+        np.testing.assert_allclose(
+            getattr(bob_fast, attr).y, getattr(bob_slow, attr).y,
+            rtol=1e-9, err_msg=f"{attr}.y drifted between fast and slow paths",
+        )
+        np.testing.assert_allclose(
+            getattr(bob_fast, attr).t, getattr(bob_slow, attr).t,
+            rtol=1e-12, err_msg=f"{attr}.t drifted between fast and slow paths",
+        )
+
+
 @pytest.mark.integration
 def test_initialize_with_cce_data(BOB_cce, trusted_outputs_dir):
     """End-to-end CCE workflow: init → set 3 modes → construct → mismatch."""