fix: update tests to be isolated from LR model data

YichengYang-Ethan · claude · YichengYang-Ethan · commit eb27e46b1f54 · 2026-03-21T00:55:27.000-05:00
Test fixtures used real program IDs (cmu-mscf, baruch-mfe, rutgers-mqf)
that now have trained LR models, causing classification to differ from the
heuristic-based expectations. Replace with synthetic IDs (test-reach-*,
test-target-*, test-safety-*) so tests always fall back to heuristics.

Also update test_each_result_has_required_keys to use subset check instead
of exact equality, accommodating the new admission_prob field in results.

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/tests/test_list_builder.py b/tests/test_list_builder.py
@@ -51,9 +51,10 @@ def _make_programs(count: int = 9) -> list[ProgramData]:
 
     We construct programmes to exercise each bucket.
     """
+    # Use synthetic IDs so the LR model has no data and falls back to heuristics.
     # Reach programmes: low acceptance rate or high avg GPA.
     reach_1 = ProgramData(
-        id="cmu-mscf",
+        id="test-reach-1",
         name="CMU MSCF",
         university="Carnegie Mellon University",
         acceptance_rate=0.05,
@@ -67,7 +68,7 @@ def _make_programs(count: int = 9) -> list[ProgramData]:
         ],
     )
     reach_2 = ProgramData(
-        id="baruch-mfe",
+        id="test-reach-2",
         name="Baruch MFE",
         university="Baruch College, CUNY",
         acceptance_rate=0.04,
@@ -79,7 +80,7 @@ def _make_programs(count: int = 9) -> list[ProgramData]:
         ],
     )
     reach_3 = ProgramData(
-        id="princeton-mfin",
+        id="test-reach-3",
         name="Princeton MFin",
         university="Princeton University",
         acceptance_rate=0.03,
@@ -90,7 +91,7 @@ def _make_programs(count: int = 9) -> list[ProgramData]:
         ],
     )
     reach_4 = ProgramData(
-        id="mit-mfin",
+        id="test-reach-4",
         name="MIT MFin",
         university="MIT",
         acceptance_rate=0.06,
@@ -104,7 +105,7 @@ def _make_programs(count: int = 9) -> list[ProgramData]:
 
     # Target programmes: moderate acceptance, GPA near user's.
     target_1 = ProgramData(
-        id="bu-msmf",
+        id="test-target-1",
         name="BU MSMF",
         university="Boston University",
         acceptance_rate=0.12,
@@ -116,7 +117,7 @@ def _make_programs(count: int = 9) -> list[ProgramData]:
         ],
     )
     target_2 = ProgramData(
-        id="nyu-mfe",
+        id="test-target-2",
         name="NYU MFE",
         university="New York University",
         acceptance_rate=0.10,
@@ -128,7 +129,7 @@ def _make_programs(count: int = 9) -> list[ProgramData]:
         ],
     )
     target_3 = ProgramData(
-        id="gatech-qcf",
+        id="test-target-3",
         name="GaTech QCF",
         university="Georgia Tech",
         acceptance_rate=0.14,
@@ -141,7 +142,7 @@ def _make_programs(count: int = 9) -> list[ProgramData]:
 
     # Safety programmes: high acceptance, avg GPA well below user's.
     safety_1 = ProgramData(
-        id="rutgers-mqf",
+        id="test-safety-1",
         name="Rutgers MQF",
         university="Rutgers University",
         acceptance_rate=0.25,
@@ -152,7 +153,7 @@ def _make_programs(count: int = 9) -> list[ProgramData]:
         ],
     )
     safety_2 = ProgramData(
-        id="uconn-msqf",
+        id="test-safety-2",
         name="UConn MSQF",
         university="University of Connecticut",
         acceptance_rate=0.30,
@@ -248,8 +249,8 @@ def test_safety_programmes(self) -> None:
     def test_fewer_programmes_than_max(self) -> None:
         """When only 1 safety programme exists, we should get 1."""
         progs = _make_programs()
-        # Remove the second safety programme (uconn-msqf).
-        progs = [p for p in progs if p.id != "uconn-msqf"]
+        # Remove the second safety programme (test-safety-2).
+        progs = [p for p in progs if p.id != "test-safety-2"]
         sl = build_school_list(
             _make_profile(), progs, _default_evaluation(), max_safety=2,
         )
@@ -313,7 +314,7 @@ def test_empty_program_list(self) -> None:
 
     def test_single_programme(self) -> None:
         """A single reach programme should appear in reach only."""
-        progs = [_make_programs()[0]]  # CMU = reach
+        progs = [_make_programs()[0]]  # test-reach-1 = reach
         sl = build_school_list(_make_profile(), progs, _default_evaluation())
         assert len(sl.reach) == 1
         assert len(sl.target) == 0
diff --git a/tests/test_school_ranker.py b/tests/test_school_ranker.py
@@ -168,9 +168,10 @@ def _make_profile(self) -> UserProfile:
         )
 
     def _make_programs(self) -> list[ProgramData]:
+        # Use synthetic IDs so LR model fallback (heuristic) is always used.
         # Reach: low acceptance, high avg GPA
         reach = ProgramData(
-            id="cmu-mscf",
+            id="test-reach-prog",
             name="CMU MSCF",
             university="Carnegie Mellon",
             acceptance_rate=0.05,
@@ -184,7 +185,7 @@ def _make_programs(self) -> list[ProgramData]:
         )
         # Target: moderate acceptance, matched GPA
         target = ProgramData(
-            id="bu-msmf",
+            id="test-target-prog",
             name="BU MSMF",
             university="Boston University",
             acceptance_rate=0.12,
@@ -196,7 +197,7 @@ def _make_programs(self) -> list[ProgramData]:
         )
         # Safety: high acceptance, lower avg GPA
         safety = ProgramData(
-            id="rutgers-mqf",
+            id="test-safety-prog",
             name="Rutgers MQF",
             university="Rutgers",
             acceptance_rate=0.25,
@@ -237,12 +238,12 @@ def test_each_result_has_required_keys(self) -> None:
         programs = self._make_programs()
         evaluation = EvaluationResult(overall_score=7.0)
         result = rank_schools(profile, programs, evaluation)
-        expected_keys = {
+        required_keys = {
             "program_id", "name", "university", "category",
             "fit_score", "prereq_match_score", "acceptance_rate", "avg_gpa",
         }
         for entry in result["all"]:
-            assert set(entry.keys()) == expected_keys
+            assert required_keys.issubset(set(entry.keys()))
 
     def test_classification_matches_category(self) -> None:
         """Programs in each bucket should have matching category values."""
@@ -258,22 +259,22 @@ def test_classification_matches_category(self) -> None:
             assert entry["category"] == "safety"
 
     def test_reach_program_classified_correctly(self) -> None:
-        """CMU with 5% acceptance should be reach."""
+        """Program with 5% acceptance and avg GPA > user GPA should be reach."""
         profile = self._make_profile()
         programs = self._make_programs()
         evaluation = EvaluationResult(overall_score=7.0)
         result = rank_schools(profile, programs, evaluation)
-        cmu = next(r for r in result["all"] if r["program_id"] == "cmu-mscf")
-        assert cmu["category"] == "reach"
+        reach = next(r for r in result["all"] if r["program_id"] == "test-reach-prog")
+        assert reach["category"] == "reach"
 
     def test_safety_program_classified_correctly(self) -> None:
-        """Rutgers with 25% acceptance and user GPA above avg+0.1 -> safety."""
+        """Program with 25% acceptance and avg GPA below user -> safety."""
         profile = self._make_profile()
         programs = self._make_programs()
         evaluation = EvaluationResult(overall_score=7.0)
         result = rank_schools(profile, programs, evaluation)
-        rutgers = next(r for r in result["all"] if r["program_id"] == "rutgers-mqf")
-        assert rutgers["category"] == "safety"
+        safety = next(r for r in result["all"] if r["program_id"] == "test-safety-prog")
+        assert safety["category"] == "safety"
 
     def test_empty_programs_list(self) -> None:
         profile = self._make_profile()
@@ -290,6 +291,6 @@ def test_prereq_match_score_reflected(self) -> None:
         programs = self._make_programs()
         evaluation = EvaluationResult(overall_score=7.0)
         result = rank_schools(profile, programs, evaluation)
-        # Rutgers only requires calculus, which profile has -> match_score = 1.0
-        rutgers = next(r for r in result["all"] if r["program_id"] == "rutgers-mqf")
-        assert rutgers["prereq_match_score"] == 1.0
+        # test-safety-prog only requires calculus, which profile has -> match_score = 1.0
+        safety = next(r for r in result["all"] if r["program_id"] == "test-safety-prog")
+        assert safety["prereq_match_score"] == 1.0