-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathf1-analysis-weather.py
More file actions
158 lines (129 loc) · 6.7 KB
/
f1-analysis-weather.py
File metadata and controls
158 lines (129 loc) · 6.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import openmeteo_requests
import datetime as dt
import requests_cache
import numpy as np
import pandas as pd
from retry_requests import retry
from openmeteo_sdk.Variable import Variable
from os import path
import os
DATA_DIR = 'data_files/'
current_year = dt.datetime.now().year
raceNoEarlierThan = current_year - 10
races = pd.read_json(path.join(DATA_DIR, 'f1db-races.json'))
circuits = pd.read_json(path.join(DATA_DIR, 'f1db-circuits.json'))
races = races[races['year'].between(raceNoEarlierThan, current_year-1)]
circuits_and_races = pd.merge(races, circuits, left_on='circuitId', right_on='id', suffixes=['_races', '_circuits'])
circuits_and_races.columns
circuits_and_races[['id_races', 'circuitId', 'year', 'date', 'grandPrixId', 'latitude', 'longitude']]
circuits_and_races_lat_long = circuits_and_races[['id_races', 'latitude', 'longitude', 'date', 'grandPrixId', 'circuitId']]
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
all_hourly_data = []
full_params = []
for race in circuits_and_races_lat_long.itertuples():
params = {
"latitude": race.latitude,
"longitude": race.longitude,
"start_date": race.date.strftime('%Y-%m-%d'),
"end_date": race.date.strftime('%Y-%m-%d'),
"hourly": [
"temperature_2m", "precipitation", "relative_humidity_2m", "wind_speed_10m",
# --- 2A NEW ADDITIONS ---
"apparent_temperature",
"wind_gusts_10m",
"weather_code",
"cloud_cover",
"surface_pressure",
"visibility",
"soil_temperature_0cm",
"shortwave_radiation"
],
"temperature_unit": "fahrenheit",
"wind_speed_unit": "mph"
}
full_params.append(params)
# Loop through the list of params
for params in full_params:
responses = openmeteo.weather_api(url, params=params)
# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(1).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(2).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(3).ValuesAsNumpy()
hourly_apparent_temperature = hourly.Variables(4).ValuesAsNumpy()
hourly_wind_gusts_10m = hourly.Variables(5).ValuesAsNumpy()
hourly_weather_code = hourly.Variables(6).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(7).ValuesAsNumpy()
hourly_surface_pressure = hourly.Variables(8).ValuesAsNumpy()
hourly_visibility = hourly.Variables(9).ValuesAsNumpy()
hourly_soil_temperature_0cm = hourly.Variables(10).ValuesAsNumpy()
hourly_shortwave_radiation = hourly.Variables(11).ValuesAsNumpy()
hourly_data = {"date": pd.date_range(
start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
freq = pd.Timedelta(seconds = hourly.Interval()),
inclusive = "left"
)}
hourly_data["latitude"] = response.Latitude()
hourly_data["longitude"] = response.Longitude()
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["hourly_precipitation"] = hourly_precipitation
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["apparent_temperature"] = hourly_apparent_temperature
hourly_data["wind_gusts_10m"] = hourly_wind_gusts_10m
hourly_data["weather_code"] = hourly_weather_code
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["surface_pressure"] = hourly_surface_pressure
hourly_data["visibility"] = hourly_visibility
hourly_data["soil_temperature_0cm"] = hourly_soil_temperature_0cm
hourly_data["shortwave_radiation"] = hourly_shortwave_radiation
hourly_data["short_date"] = pd.to_datetime(hourly_data["date"]).strftime('%Y-%m-%d')
hourly_dataframe = pd.DataFrame(data = hourly_data)
all_hourly_data = pd.DataFrame(data = all_hourly_data)
all_hourly_data = pd.concat([all_hourly_data, hourly_dataframe], ignore_index=True)
# date_for_merge =
circuits_and_races_lat_long['date'] = pd.to_datetime(circuits_and_races_lat_long['date']).dt.strftime('%Y-%m-%d')
races_and_weather = pd.merge(all_hourly_data, circuits_and_races_lat_long, left_on='short_date', right_on='date', how='inner', suffixes=['_hourly', '_lat_long'])
print(races_and_weather)
races_and_weather.to_csv('f1WeatherData_AllData.csv', columns=[
'date_hourly', 'latitude_hourly', 'longitude_hourly',
'temperature_2m', 'hourly_precipitation', 'relative_humidity_2m', 'wind_speed_10m',
'apparent_temperature', 'wind_gusts_10m', 'weather_code', 'cloud_cover',
'surface_pressure', 'visibility', 'soil_temperature_0cm', 'shortwave_radiation',
'short_date', 'id_races', 'grandPrixId', 'circuitId'
], sep='\t')
races_and_weather_grouped = races_and_weather.groupby(
['short_date', 'latitude_hourly', 'longitude_hourly', 'id_races', 'grandPrixId', 'circuitId']
).agg(
average_temp = ('temperature_2m', 'mean'),
total_precipitation = ('hourly_precipitation', 'sum'),
average_humidity = ('relative_humidity_2m', 'mean'),
average_wind_speed = ('wind_speed_10m', 'mean'),
# --- 2A NEW AGGREGATIONS ---
apparent_temperature = ('apparent_temperature', 'mean'),
windgusts_10m = ('wind_gusts_10m', 'max'), # worst gust of the day
weathercode = ('weather_code', 'max'), # most severe code
cloudcover = ('cloud_cover', 'mean'),
surface_pressure = ('surface_pressure', 'mean'),
visibility = ('visibility', 'min'), # worst visibility
soil_temperature_0cm = ('soil_temperature_0cm', 'mean'),
shortwave_radiation = ('shortwave_radiation', 'sum'), # total solar energy
).reset_index()
races_and_weather_grouped.to_csv('f1WeatherData_Grouped.csv', columns=[
'short_date', 'id_races', 'grandPrixId', 'circuitId',
'latitude_hourly', 'longitude_hourly',
'average_temp', 'total_precipitation', 'average_humidity', 'average_wind_speed',
'apparent_temperature', 'windgusts_10m', 'weathercode', 'cloudcover',
'surface_pressure', 'visibility', 'soil_temperature_0cm', 'shortwave_radiation',
], sep='\t')
print(races_and_weather_grouped)