-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmap_missing_drivers.py
More file actions
41 lines (34 loc) · 1.44 KB
/
map_missing_drivers.py
File metadata and controls
41 lines (34 loc) · 1.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""
Map the 4 missing drivers manually to their full f1db format
Albon, Giovinazzi, Schumacher, Bortoleto not in active_drivers.csv
"""
import pandas as pd
from os import path
DATA_DIR = 'data_files'
# Manual mappings for the 4 missing drivers based on f1db format
missing_mappings = {
'Alexander Albon': 'alexander-albon',
'Antonio Giovinazzi': 'antonio-giovinazzi',
'Mick Schumacher': 'mick-schumacher',
'Gabriel Bortoleto': 'gabriel-bortoleto'
}
print("Loading qualifying CSV...")
quali_path = path.join(DATA_DIR, 'all_qualifying_races.csv')
quali_df = pd.read_csv(quali_path, sep='\t')
print(f"Rows with NaN driverId: {quali_df['driverId'].isna().sum()}")
# Apply mappings based on FullName
for full_name, driver_id in missing_mappings.items():
mask = quali_df['driverId'].isna() & (quali_df['FullName'] == full_name)
count = mask.sum()
if count > 0:
quali_df.loc[mask, 'driverId'] = driver_id
print(f" {full_name:20s} → {driver_id:25s} ({count} rows)")
print(f"\nAfter mapping:")
print(f"Rows with NaN driverId: {quali_df['driverId'].isna().sum()}")
if quali_df['driverId'].isna().sum() == 0:
print("\n[SUCCESS] All driverIds now populated!")
quali_df.to_csv(quali_path, sep='\t', index=False)
print(f"Saved updated CSV to {quali_path}")
else:
print("\n[WARNING] Some rows still have NaN driverId")
print(quali_df[quali_df['driverId'].isna()][['Year', 'Round', 'FullName', 'BroadcastName']])