march-madness/bracket_simulation.py at main · gmalbert/march-madness · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
"""
Bracket Simulation Engine

Monte Carlo simulation for predicting full tournament outcomes.
Implements the roadmap-bracket-simulation.md specification.
"""

import numpy as np
import random
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple
from copy import deepcopy
import json
from pathlib import Path


def normalize_team_name(name: str) -> str:
    """Normalize team name for matching."""
    # Common mappings
    mappings = {
        'N.C. State': 'NC State',
        'Illinois Chicago': 'UIC',
        'Tennessee Martin': 'UT Martin',
        'Nebraska Omaha': 'Omaha',
        'Southeastern Louisiana': 'SE Louisiana',
        'USC Upstate': 'USC Upstate',
        'IU Indy': 'IUPUI',
        'Loyola MD': 'Loyola (MD)',
        'Louisiana Monroe': 'ULM',
        'Appalachian St.': 'Appalachian State',
    }

    # Apply direct mappings
    if name in mappings:
        return mappings[name]

    # Remove common suffixes/prefixes for fuzzy matching
    normalized = name
    normalized = normalized.replace(' St.', ' State')
    normalized = normalized.replace(' St ', ' State ')

    return normalized


@dataclass
class Team:
    """Tournament team."""
    id: str
    name: str
    seed: int
    region: str
    stats: Dict = field(default_factory=dict)

    def __hash__(self):
        return hash(self.id)

    def __eq__(self, other):
        return self.id == other.id

@dataclass
class BracketState:
    """Current state of the tournament bracket."""
    teams: Dict[str, Team]
    regions: Dict[str, List[Team]]
    results: Dict[int, List[Team]] = field(default_factory=dict)

    def get_remaining_teams(self, round_num: int) -> List[Team]:
        """Get teams still alive entering a round."""
        if round_num == 1:
            return list(self.teams.values())
        return self.results.get(round_num - 1, [])

    def get_matchups(self, round_num: int) -> List[Tuple[Team, Team]]:
        """Get matchups for a specific round."""
        remaining = self.get_remaining_teams(round_num)

        if round_num <= 4:  # Regional rounds (64, 32, 16, 8)
            matchups = []
            for region in ['East', 'West', 'Midwest', 'South']:
                region_teams = [t for t in remaining if t.region == region]
                if len(region_teams) >= 2:
                    # Sort by seed for proper bracket positioning
                    region_teams.sort(key=lambda x: x.seed)
                    # Pair teams: 1vs16, 8vs9, 4vs13, 5vs12, 2vs15, 7vs10, 3vs14, 6vs11
                    pairs = self._pair_region_teams(region_teams, round_num)
                    matchups.extend(pairs)
            return matchups
        else:  # Final Four and Championship
            if round_num == 5:  # Final Four
                # Pair winners from each region
                winners_by_region = {}
                for team in remaining:
                    winners_by_region[team.region] = team
                # Standard Final Four pairing
                return [
                    (winners_by_region.get('East'), winners_by_region.get('West')),
                    (winners_by_region.get('Midwest'), winners_by_region.get('South'))
                ]
            elif round_num == 6:  # Championship
                # Two Final Four winners play
                return [(remaining[0], remaining[1])] if len(remaining) >= 2 else []

        return []

    def _pair_region_teams(self, teams: List[Team], round_num: int) -> List[Tuple[Team, Team]]:
        """Pair teams within a region for the current round."""
        if len(teams) < 2:
            return []

        # For simplicity, pair highest remaining seed vs lowest remaining seed
        teams_sorted = sorted(teams, key=lambda x: x.seed)
        pairs = []

        for i in range(len(teams_sorted) // 2):
            team1 = teams_sorted[i]
            team2 = teams_sorted[-(i+1)]
            if team1 != team2:
                pairs.append((team1, team2))

        return pairs

    def advance_winner(self, round_num: int, winner: Team):
        """Advance a team to the next round."""
        if round_num not in self.results:
            self.results[round_num] = []
        self.results[round_num].append(winner)

class BracketSimulator:
    """Monte Carlo bracket simulation engine."""

    def __init__(self, game_predictor=None):
        """
        Initialize simulator.

        Args:
            game_predictor: Function that takes (team1, team2) and returns prob_team1_wins
        """
        self.game_predictor = game_predictor or self._default_predictor

    def _default_predictor(self, team1: Team, team2: Team) -> float:
        """Default predictor based on seed difference."""
        seed_diff = team2.seed - team1.seed  # Positive if team1 is favored
        # Simple logistic function
        prob = 1 / (1 + np.exp(-seed_diff * 0.5))
        return prob

    def simulate_bracket(self, bracket_state: BracketState, num_simulations: int = 1000) -> Dict:
        """
        Run Monte Carlo simulations of the tournament.

        Args:
            bracket_state: Initial bracket state
            num_simulations: Number of brackets to simulate

        Returns:
            Dictionary with team probabilities for each round
        """
        team_stats = {}

        # Initialize stats for all teams
        for team in bracket_state.teams.values():
            team_stats[team.id] = {
                'team': team,
                'round_64_prob': 1.0,  # Always 1.0 for Round of 64
                'round_32_prob': 0.0,
                'sweet_16_prob': 0.0,
                'elite_8_prob': 0.0,
                'final_four_prob': 0.0,
                'championship_prob': 0.0,
                'winner_prob': 0.0,
                'simulations': 0
            }

        for sim in range(num_simulations):
            # Create fresh bracket state for this simulation
            sim_bracket = deepcopy(bracket_state)

            # Count this simulation for all teams
            for team in bracket_state.teams.values():
                team_stats[team.id]['simulations'] += 1

            # Simulate each round
            for round_num in range(1, 7):  # Rounds 1-6
                matchups = sim_bracket.get_matchups(round_num)

                for team1, team2 in matchups:
                    if team1 and team2:
                        # Predict winner
                        prob_team1_wins = self.game_predictor(team1, team2)
                        winner = team1 if random.random() < prob_team1_wins else team2

                        # Advance winner
                        sim_bracket.advance_winner(round_num, winner)

                # Record stats after all matchups in this round are complete
                remaining_teams = sim_bracket.get_remaining_teams(round_num + 1)
                for team in remaining_teams:
                    round_key = self._round_name(round_num + 1)
                    if round_key == '32':
                        team_stats[team.id]['round_32_prob'] += 1
                    elif round_key == 'sweet_16':
                        team_stats[team.id]['sweet_16_prob'] += 1
                    elif round_key == 'elite_8':
                        team_stats[team.id]['elite_8_prob'] += 1
                    elif round_key == 'final_four':
                        team_stats[team.id]['final_four_prob'] += 1
                    elif round_key == 'championship':
                        team_stats[team.id]['championship_prob'] += 1
                    elif round_key == 'winner':
                        team_stats[team.id]['winner_prob'] += 1

        # Calculate probabilities
        for team_id, stats in team_stats.items():
            total_sims = num_simulations
            for round_key in ['round_32_prob', 'sweet_16_prob',
                            'elite_8_prob', 'final_four_prob', 'championship_prob', 'winner_prob']:
                if round_key in stats:
                    stats[round_key] = stats[round_key] / total_sims if total_sims > 0 else 0

        return team_stats

    def _round_name(self, round_num: int) -> str:
        """Convert round number to name."""
        round_names = {
            1: '64',      # Round of 64
            2: '32',      # Round of 32
            3: 'sweet_16', # Sweet 16
            4: 'elite_8',  # Elite 8
            5: 'final_four', # Final Four
            6: 'championship', # Championship
            7: 'winner'   # Winner
        }
        return round_names.get(round_num, '64')

def create_bracket_from_data(team_data: Dict, game_predictor=None) -> Tuple[BracketState, BracketSimulator]:
    """
    Create bracket state and simulator from team data.

    Args:
        team_data: Dictionary with team information
        game_predictor: Optional custom predictor function

    Returns:
        Tuple of (BracketState, BracketSimulator)
    """
    teams = {}
    regions = {'East': [], 'West': [], 'Midwest': [], 'South': []}

    for team_info in team_data.get('teams', []):
        team = Team(
            id=str(team_info.get('id', team_info.get('name', ''))),
            name=team_info.get('name', ''),
            seed=team_info.get('seed', 16),
            region=team_info.get('region', 'East'),
            stats=team_info.get('stats', {})
        )
        teams[team.id] = team
        if team.region in regions:
            regions[team.region].append(team)

    bracket_state = BracketState(teams=teams, regions=regions)
    simulator = BracketSimulator(game_predictor=game_predictor)

    return bracket_state, simulator

def load_real_tournament_bracket(year: int = 2025) -> Dict:
    """
    Load real tournament bracket data with team stats.

    Args:
        year: Tournament year

    Returns:
        Dictionary with tournament data
    """
    try:
        # Try to load from ESPN data
        from data_collection import fetch_tournament_games
        from data_tools.efficiency_loader import EfficiencyDataLoader

        # Load tournament games
        games = fetch_tournament_games(year)
        print(f"Loaded {len(games)} tournament games from {year}")

        # Load efficiency data
        efficiency_loader = EfficiencyDataLoader()
        kenpom_df = efficiency_loader.load_kenpom()
        bart_df = efficiency_loader.load_barttorvik()

        # Extract unique teams from games
        teams = {}
        for game in games:
            # Games have homeTeam/awayTeam as strings, homeTeamId/awayTeamId, homeSeed/awaySeed
            home_id = str(game.get('homeTeamId', ''))
            away_id = str(game.get('awayTeamId', ''))

            for team_id, team_name, seed, region_key in [
                (home_id, game.get('homeTeam', ''), game.get('homeSeed', 16), 'homeRegion'),
                (away_id, game.get('awayTeam', ''), game.get('awaySeed', 16), 'awayRegion')
            ]:
                if team_id and team_id not in teams:
                    # Get efficiency data
                    team_stats = {}
                    normalized_name = normalize_team_name(team_name)

                    if kenpom_df is not None and len(kenpom_df) > 0:
                        # Determine correct column name
                        team_col = 'TeamName' if 'TeamName' in kenpom_df.columns else 'Team'

                        # Try exact match first
                        kenpom_data = kenpom_df[kenpom_df[team_col] == team_name]

                        # Try normalized match
                        if kenpom_data.empty:
                            kenpom_data = kenpom_df[kenpom_df[team_col] == normalized_name]

                        # Try fuzzy contains match
                        if kenpom_data.empty:
                            import re
                            escaped_name = re.escape(team_name)
                            kenpom_data = kenpom_df[kenpom_df[team_col].str.contains(escaped_name, case=False, na=False, regex=True)]

                        if not kenpom_data.empty:
                            team_stats.update({
                                'net_efficiency': kenpom_data.iloc[0].get('NetRtg', 0),
                                'off_efficiency': kenpom_data.iloc[0].get('ORtg', 0),
                                'def_efficiency': kenpom_data.iloc[0].get('DRtg', 0),
                                'tempo': kenpom_data.iloc[0].get('AdjT', 70),
                            })

                    # Infer region from game notes if available
                    region = 'TBD'
                    game_notes = game.get('gameNotes', '')
                    for reg in ['East', 'West', 'South', 'Midwest']:
                        if reg in game_notes:
                            region = reg
                            break

                    teams[team_id] = {
                        'id': team_id,
                        'name': team_name,
                        'seed': seed,
                        'region': region,
                        'stats': team_stats
                    }

        return {
            'year': year,
            'teams': list(teams.values()),
            'games': games
        }

    except Exception as e:
        print(f"Error loading real tournament data: {e}")
        print("Falling back to sample data...")
        return load_sample_tournament_bracket(year)

def load_sample_tournament_bracket(year: int = 2025) -> Dict:
    """
    Load sample tournament bracket data for testing.

    This is a placeholder with realistic team data for development.
    """
    # Sample teams with realistic data
    sample_teams = [
        # East Region
        {'id': '1', 'name': 'Duke', 'seed': 1, 'region': 'East', 'stats': {'net_efficiency': 25.0, 'tempo': 72.0, 'three_rate': 0.32}},
        {'id': '16', 'name': 'American', 'seed': 16, 'region': 'East', 'stats': {'net_efficiency': 5.0, 'tempo': 68.0, 'three_rate': 0.38}},
        {'id': '8', 'name': 'Florida', 'seed': 8, 'region': 'East', 'stats': {'net_efficiency': 15.0, 'tempo': 70.0, 'three_rate': 0.35}},
        {'id': '9', 'name': 'Boise State', 'seed': 9, 'region': 'East', 'stats': {'net_efficiency': 12.0, 'tempo': 69.0, 'three_rate': 0.36}},
        {'id': '4', 'name': 'Arizona', 'seed': 4, 'region': 'East', 'stats': {'net_efficiency': 20.0, 'tempo': 71.0, 'three_rate': 0.33}},
        {'id': '13', 'name': 'Akron', 'seed': 13, 'region': 'East', 'stats': {'net_efficiency': 8.0, 'tempo': 67.0, 'three_rate': 0.39}},
        {'id': '5', 'name': 'BYU', 'seed': 5, 'region': 'East', 'stats': {'net_efficiency': 18.0, 'tempo': 70.5, 'three_rate': 0.34}},
        {'id': '12', 'name': 'Duquesne', 'seed': 12, 'region': 'East', 'stats': {'net_efficiency': 10.0, 'tempo': 68.5, 'three_rate': 0.37}},
        {'id': '2', 'name': 'Alabama', 'seed': 2, 'region': 'East', 'stats': {'net_efficiency': 23.0, 'tempo': 71.5, 'three_rate': 0.31}},
        {'id': '15', 'name': 'Robert Morris', 'seed': 15, 'region': 'East', 'stats': {'net_efficiency': 6.0, 'tempo': 67.5, 'three_rate': 0.40}},
        {'id': '7', 'name': 'Clemson', 'seed': 7, 'region': 'East', 'stats': {'net_efficiency': 16.0, 'tempo': 69.5, 'three_rate': 0.35}},
        {'id': '10', 'name': 'New Mexico', 'seed': 10, 'region': 'East', 'stats': {'net_efficiency': 11.0, 'tempo': 68.0, 'three_rate': 0.36}},
        {'id': '3', 'name': 'Baylor', 'seed': 3, 'region': 'East', 'stats': {'net_efficiency': 22.0, 'tempo': 71.0, 'three_rate': 0.32}},
        {'id': '14', 'name': 'Colgate', 'seed': 14, 'region': 'East', 'stats': {'net_efficiency': 7.0, 'tempo': 67.0, 'three_rate': 0.38}},
        {'id': '6', 'name': 'Dayton', 'seed': 6, 'region': 'East', 'stats': {'net_efficiency': 17.0, 'tempo': 70.0, 'three_rate': 0.34}},
        {'id': '11', 'name': 'Nevada', 'seed': 11, 'region': 'East', 'stats': {'net_efficiency': 9.0, 'tempo': 68.5, 'three_rate': 0.37}},

        # West Region
        {'id': '17', 'name': 'Houston', 'seed': 1, 'region': 'West', 'stats': {'net_efficiency': 24.0, 'tempo': 72.5, 'three_rate': 0.33}},
        {'id': '32', 'name': 'Longwood', 'seed': 16, 'region': 'West', 'stats': {'net_efficiency': 4.0, 'tempo': 66.0, 'three_rate': 0.41}},
        {'id': '24', 'name': 'Nebraska', 'seed': 8, 'region': 'West', 'stats': {'net_efficiency': 14.0, 'tempo': 69.0, 'three_rate': 0.35}},
        {'id': '25', 'name': 'Texas A&M', 'seed': 9, 'region': 'West', 'stats': {'net_efficiency': 13.0, 'tempo': 68.5, 'three_rate': 0.36}},
        {'id': '20', 'name': 'Texas', 'seed': 4, 'region': 'West', 'stats': {'net_efficiency': 19.0, 'tempo': 70.5, 'three_rate': 0.34}},
        {'id': '29', 'name': 'Colorado State', 'seed': 13, 'region': 'West', 'stats': {'net_efficiency': 7.5, 'tempo': 67.5, 'three_rate': 0.39}},
        {'id': '21', 'name': 'Xavier', 'seed': 5, 'region': 'West', 'stats': {'net_efficiency': 17.5, 'tempo': 70.0, 'three_rate': 0.35}},
        {'id': '28', 'name': 'Missouri', 'seed': 12, 'region': 'West', 'stats': {'net_efficiency': 9.5, 'tempo': 68.0, 'three_rate': 0.37}},
        {'id': '18', 'name': 'Kentucky', 'seed': 2, 'region': 'West', 'stats': {'net_efficiency': 22.5, 'tempo': 71.5, 'three_rate': 0.32}},
        {'id': '31', 'name': 'Vermont', 'seed': 15, 'region': 'West', 'stats': {'net_efficiency': 5.5, 'tempo': 66.5, 'three_rate': 0.40}},
        {'id': '23', 'name': 'Indiana', 'seed': 7, 'region': 'West', 'stats': {'net_efficiency': 15.5, 'tempo': 69.5, 'three_rate': 0.36}},
        {'id': '26', 'name': 'Saint Mary\'s', 'seed': 10, 'region': 'West', 'stats': {'net_efficiency': 10.5, 'tempo': 67.5, 'three_rate': 0.37}},
        {'id': '19', 'name': 'Marquette', 'seed': 3, 'region': 'West', 'stats': {'net_efficiency': 21.0, 'tempo': 71.0, 'three_rate': 0.33}},
        {'id': '30', 'name': 'Drake', 'seed': 14, 'region': 'West', 'stats': {'net_efficiency': 6.5, 'tempo': 67.0, 'three_rate': 0.38}},
        {'id': '22', 'name': 'Michigan State', 'seed': 6, 'region': 'West', 'stats': {'net_efficiency': 16.5, 'tempo': 70.0, 'three_rate': 0.35}},
        {'id': '27', 'name': 'UC San Diego', 'seed': 11, 'region': 'West', 'stats': {'net_efficiency': 8.5, 'tempo': 68.0, 'three_rate': 0.38}},

        # Midwest Region
        {'id': '33', 'name': 'Kansas', 'seed': 1, 'region': 'Midwest', 'stats': {'net_efficiency': 23.5, 'tempo': 72.0, 'three_rate': 0.33}},
        {'id': '48', 'name': 'Southeast Missouri State', 'seed': 16, 'region': 'Midwest', 'stats': {'net_efficiency': 3.5, 'tempo': 65.5, 'three_rate': 0.42}},
        {'id': '40', 'name': 'UCLA', 'seed': 8, 'region': 'Midwest', 'stats': {'net_efficiency': 13.5, 'tempo': 69.0, 'three_rate': 0.36}},
        {'id': '41', 'name': 'Utah State', 'seed': 9, 'region': 'Midwest', 'stats': {'net_efficiency': 12.5, 'tempo': 68.5, 'three_rate': 0.37}},
        {'id': '36', 'name': 'Iowa State', 'seed': 4, 'region': 'Midwest', 'stats': {'net_efficiency': 18.5, 'tempo': 70.5, 'three_rate': 0.34}},
        {'id': '45', 'name': 'Lipscomb', 'seed': 13, 'region': 'Midwest', 'stats': {'net_efficiency': 7.0, 'tempo': 67.0, 'three_rate': 0.39}},
        {'id': '37', 'name': 'Maryland', 'seed': 5, 'region': 'Midwest', 'stats': {'net_efficiency': 17.0, 'tempo': 70.0, 'three_rate': 0.35}},
        {'id': '44', 'name': 'Grand Canyon', 'seed': 12, 'region': 'Midwest', 'stats': {'net_efficiency': 8.0, 'tempo': 67.5, 'three_rate': 0.38}},
        {'id': '34', 'name': 'Tennessee', 'seed': 2, 'region': 'Midwest', 'stats': {'net_efficiency': 22.0, 'tempo': 71.5, 'three_rate': 0.32}},
        {'id': '47', 'name': 'Wagner', 'seed': 15, 'region': 'Midwest', 'stats': {'net_efficiency': 4.5, 'tempo': 66.0, 'three_rate': 0.41}},
        {'id': '39', 'name': 'Washington State', 'seed': 7, 'region': 'Midwest', 'stats': {'net_efficiency': 15.0, 'tempo': 69.5, 'three_rate': 0.36}},
        {'id': '42', 'name': 'North Carolina State', 'seed': 10, 'region': 'Midwest', 'stats': {'net_efficiency': 11.0, 'tempo': 68.0, 'three_rate': 0.37}},
        {'id': '35', 'name': 'Arkansas', 'seed': 3, 'region': 'Midwest', 'stats': {'net_efficiency': 20.5, 'tempo': 71.0, 'three_rate': 0.33}},
        {'id': '46', 'name': 'Norfolk State', 'seed': 14, 'region': 'Midwest', 'stats': {'net_efficiency': 6.0, 'tempo': 66.5, 'three_rate': 0.40}},
        {'id': '38', 'name': 'Memphis', 'seed': 6, 'region': 'Midwest', 'stats': {'net_efficiency': 16.0, 'tempo': 70.0, 'three_rate': 0.35}},
        {'id': '43', 'name': 'Oklahoma', 'seed': 11, 'region': 'Midwest', 'stats': {'net_efficiency': 9.0, 'tempo': 68.5, 'three_rate': 0.38}},

        # South Region
        {'id': '49', 'name': 'Auburn', 'seed': 1, 'region': 'South', 'stats': {'net_efficiency': 23.0, 'tempo': 72.0, 'three_rate': 0.33}},
        {'id': '64', 'name': 'Alabama State', 'seed': 16, 'region': 'South', 'stats': {'net_efficiency': 3.0, 'tempo': 65.0, 'three_rate': 0.43}},
        {'id': '56', 'name': 'Louisville', 'seed': 8, 'region': 'South', 'stats': {'net_efficiency': 13.0, 'tempo': 69.0, 'three_rate': 0.36}},
        {'id': '57', 'name': 'Creighton', 'seed': 9, 'region': 'South', 'stats': {'net_efficiency': 12.0, 'tempo': 68.5, 'three_rate': 0.37}},
        {'id': '52', 'name': 'Wisconsin', 'seed': 4, 'region': 'South', 'stats': {'net_efficiency': 18.0, 'tempo': 70.5, 'three_rate': 0.34}},
        {'id': '61', 'name': 'James Madison', 'seed': 13, 'region': 'South', 'stats': {'net_efficiency': 6.5, 'tempo': 67.0, 'three_rate': 0.39}},
        {'id': '53', 'name': 'Florida Atlantic', 'seed': 5, 'region': 'South', 'stats': {'net_efficiency': 16.5, 'tempo': 70.0, 'three_rate': 0.35}},
        {'id': '60', 'name': 'Vermont', 'seed': 12, 'region': 'South', 'stats': {'net_efficiency': 7.5, 'tempo': 67.5, 'three_rate': 0.38}},
        {'id': '50', 'name': 'Texas A&M', 'seed': 2, 'region': 'South', 'stats': {'net_efficiency': 21.5, 'tempo': 71.5, 'three_rate': 0.32}},
        {'id': '63', 'name': 'FDU', 'seed': 15, 'region': 'South', 'stats': {'net_efficiency': 4.0, 'tempo': 66.0, 'three_rate': 0.41}},
        {'id': '55', 'name': 'Michigan', 'seed': 7, 'region': 'South', 'stats': {'net_efficiency': 14.5, 'tempo': 69.5, 'three_rate': 0.36}},
        {'id': '58', 'name': 'Providence', 'seed': 10, 'region': 'South', 'stats': {'net_efficiency': 10.5, 'tempo': 68.0, 'three_rate': 0.37}},
        {'id': '51', 'name': 'Iowa', 'seed': 3, 'region': 'South', 'stats': {'net_efficiency': 20.0, 'tempo': 71.0, 'three_rate': 0.33}},
        {'id': '62', 'name': 'UAB', 'seed': 14, 'region': 'South', 'stats': {'net_efficiency': 5.5, 'tempo': 66.5, 'three_rate': 0.40}},
        {'id': '54', 'name': 'Northwestern', 'seed': 6, 'region': 'South', 'stats': {'net_efficiency': 15.5, 'tempo': 70.0, 'three_rate': 0.35}},
        {'id': '59', 'name': 'Richmond', 'seed': 11, 'region': 'South', 'stats': {'net_efficiency': 8.5, 'tempo': 68.5, 'three_rate': 0.38}},
    ]

    return {'year': year, 'teams': sample_teams}

def create_predictor_from_models(models: Dict = None, efficiency_data: Dict = None) -> callable:
    """
    Create a game predictor function using trained ML models.

    Args:
        models: Dictionary of trained models (spread, total, moneyline)
        efficiency_data: Team efficiency data

    Returns:
        Function that predicts P(team1 beats team2)
    """
    # ------------------------------------------------------------------
    # Build upset predictor (trained on real + synthetic historical data)
    # ------------------------------------------------------------------
    _upset_predictor = None
    try:
        from upset_prediction import UpsetPredictor, create_training_data_from_csv, create_historical_training_data
        _upset_predictor = UpsetPredictor()
        X_real, y_real = create_training_data_from_csv()
        X_syn, y_syn = create_historical_training_data()
        if X_real is not None and len(X_real) >= 50:
            n_aug = min(200, len(X_syn))
            X = np.vstack([X_real, X_syn[:n_aug]])
            y = np.concatenate([y_real, y_syn[:n_aug]])
        else:
            X, y = X_syn, y_syn
        _upset_predictor.train(X, y)
    except Exception:
        _upset_predictor = None

    def predictor(team1: Team, team2: Team) -> float:
        """Predict probability that team1 beats team2."""
        try:
            # ── Efficiency-based probability ──────────────────────────
            eff1 = team1.stats.get('net_efficiency', 10)
            eff2 = team2.stats.get('net_efficiency', 10)
            eff_diff = eff1 - eff2
            # Each efficiency point ≈ 2% win probability
            efficiency_prob = 0.5 + (eff_diff * 0.02)
            efficiency_prob = max(0.05, min(0.95, efficiency_prob))

            if _upset_predictor is None or not _upset_predictor.is_trained:
                return efficiency_prob

            # ── Upset-model blending ──────────────────────────────────
            # Determine favorite vs underdog by seed
            if team1.seed <= team2.seed:
                favorite, underdog = team1, team2
                fav_is_team1 = True
            else:
                favorite, underdog = team2, team1
                fav_is_team1 = False

            fav_dict = {
                'seed': favorite.seed,
                'net_efficiency': favorite.stats.get('net_efficiency', 10),
                'tempo': favorite.stats.get('tempo', 70),
                'three_rate': favorite.stats.get('three_rate', 0.35),
                'def_efficiency': favorite.stats.get('def_efficiency', 100),
            }
            und_dict = {
                'seed': underdog.seed,
                'net_efficiency': underdog.stats.get('net_efficiency', 5),
                'tempo': underdog.stats.get('tempo', 68),
                'three_rate': underdog.stats.get('three_rate', 0.38),
                'def_efficiency': underdog.stats.get('def_efficiency', 100),
            }

            result = _upset_predictor.predict_upset_probability(fav_dict, und_dict)
            upset_prob = result.get('upset_probability', 0.3)

            # Blend 70% efficiency + 30% upset model
            # `upset_prob` = P(underdog/team2-perspective wins)
            if fav_is_team1:
                # team1 is favorite  → team1 win = 1 − upset_prob from upset model
                blended = 0.70 * efficiency_prob + 0.30 * (1.0 - upset_prob)
            else:
                # team1 is underdog → team1 win = upset_prob from upset model
                blended = 0.70 * efficiency_prob + 0.30 * upset_prob

            return max(0.05, min(0.95, blended))

        except Exception:
            # Fallback: seed-based logistic
            seed_diff = team2.seed - team1.seed
            return max(0.05, min(0.95, 1 / (1 + np.exp(-seed_diff * 0.5))))

    return predictor


# ===== Simple Bracket Simulator API (Roadmap Implementation) =====

def simulate_bracket(predictions: dict, num_sims: int = 10000) -> dict:
    """
    Monte Carlo simulation of bracket outcomes.

    This is a simplified API matching the roadmap specification.
    For more advanced simulations, use BracketSimulator class directly.

    Args:
        predictions: Dictionary containing:
            - "teams": List of team dictionaries with name, seed, region, stats
            - "first", "second", "sweet16", "elite8", "final4", "championship":
              Lists of matchup dictionaries with team1, team2, team1_prob
        num_sims: Number of simulations to run (default: 10000)

    Returns:
        Dictionary mapping team name to:
            - final_four_pct: Probability of reaching Final Four
            - championship_pct: Probability of reaching Championship
            - winner_pct: Probability of winning tournament

    Example:
        >>> predictions = {
        ...     "teams": [
        ...         {"name": "Duke", "seed": 1, "region": "East", "stats": {"net_efficiency": 25.0}},
        ...         {"name": "UNC", "seed": 2, "region": "East", "stats": {"net_efficiency": 22.0}}
        ...     ],
        ...     "first": [
        ...         {"team1": "Duke", "team2": "Norfolk State", "team1_prob": 0.98},
        ...         {"team1": "UNC", "team2": "Vermont", "team1_prob": 0.95}
        ...     ]
        ... }
        >>> results = simulate_bracket(predictions, num_sims=1000)
        >>> print(f"Duke Final Four: {results['Duke']['final_four_pct']:.1%}")
    """
    # Initialize results
    teams = predictions.get("teams", [])
    results = {
        team["name"]: {"final_four": 0, "championship": 0, "winner": 0}
        for team in teams
    }

    # Run simulations
    for _ in range(num_sims):
        bracket = run_single_simulation(predictions)

        # Count Final Four teams
        for team in bracket.get("final_four", []):
            if team in results:
                results[team]["final_four"] += 1

        # Count Championship teams
        for team in bracket.get("championship", []):
            if team in results:
                results[team]["championship"] += 1

        # Count winner
        winner = bracket.get("winner")
        if winner and winner in results:
            results[winner]["winner"] += 1

    # Convert to percentages
    for team in results:
        results[team]["final_four_pct"] = results[team]["final_four"] / num_sims
        results[team]["championship_pct"] = results[team]["championship"] / num_sims
        results[team]["winner_pct"] = results[team]["winner"] / num_sims

    return results


def run_single_simulation(predictions: dict) -> dict:
    """
    Run a single bracket simulation using probabilities.

    Args:
        predictions: Dictionary with matchup probabilities for each round

    Returns:
        Dictionary with winners from each round and final results:
            - first_winners, second_winners, sweet16_winners, etc.
            - final_four: List of 4 teams
            - championship: List of 2 teams
            - winner: Single team name

    Example:
        >>> predictions = {
        ...     "first": [
        ...         {"team1": "Duke", "team2": "Norfolk State", "team1_prob": 0.98}
        ...     ],
        ...     "second": [
        ...         {"team1": "Duke", "team2": "Florida", "team1_prob": 0.75}
        ...     ]
        ... }
        >>> bracket = run_single_simulation(predictions)
        >>> print(bracket["first_winners"])  # ['Duke']
    """
    bracket = predictions.copy()

    # Process each round in order
    round_names = ["first", "second", "sweet16", "elite8", "final4", "championship"]

    for round_name in round_names:
        matchups = bracket.get(round_name, [])
        if not matchups:
            continue

        winners = []

        for matchup in matchups:
            team1 = matchup.get("team1")
            team2 = matchup.get("team2")
            prob = matchup.get("team1_prob", 0.5)

            # Simulate game outcome
            winner = team1 if random.random() < prob else team2
            winners.append(winner)

        # Store winners
        bracket[f"{round_name}_winners"] = winners

    # Extract special milestones
    # Final Four = teams entering Final Four semifinals (Elite 8 winners OR final4 participants)
    if "elite8_winners" in bracket:
        bracket["final_four"] = bracket["elite8_winners"]
    elif "final4" in bracket:
        # If no elite 8, extract teams from final4 matchups
        bracket["final_four"] = []
        for matchup in bracket["final4"]:
            bracket["final_four"].append(matchup["team1"])
            bracket["final_four"].append(matchup["team2"])
    else:
        bracket["final_four"] = []

    # Championship = teams entering championship game (Final Four winners OR championship participants)
    if "final4_winners" in bracket:
        bracket["championship"] = bracket["final4_winners"]
    elif "championship" in bracket:
        # If no final4 simulation, extract teams from championship matchup
        bracket["championship"] = []
        for matchup in bracket["championship"]:
            bracket["championship"].append(matchup["team1"])
            bracket["championship"].append(matchup["team2"])
    else:
        bracket["championship"] = []

    # Winner = champion
    bracket["winner"] = bracket.get("championship_winners", [None])[0]

    return bracket