-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetup.py
More file actions
81 lines (61 loc) · 2.67 KB
/
setup.py
File metadata and controls
81 lines (61 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""
creates data sets and prepares them for analysis.
this is done to create consistency across all notebooks.
"""
print('creating dataset for 4d-modeller...')
import pandas as pd
import geopandas as gpd
import numpy as np
import xarray as xr
# import hydrobasins data
shapefile_path = 'data/hydrobasins_lvl6/hybas_as_lev06_v1c.shp'
shapes = gpd.read_file(shapefile_path)
# import precipitation data
def get_precip_data_for_year(data_path, shapes):
dataset = xr.open_dataset(data_path)
sums = []
for index, shape in shapes.iterrows():
# Extract the geometry
geom = shape.geometry
# Select the data from the NetCDF file using the geometry
# because \nearest\ method is used, even if there is no data
# , it will calculate an average for data nearest to it
# this makes it appear like there is data for areas where there is none (e.g., in Japan).
# TODO : needs to label whether a shape is part of tibetan plateau or not
data = dataset.sel(longitude=geom.centroid.x, latitude=geom.centroid.y, method='nearest')
# data = dataset.sel(longitude=geom.centroid.x, latitude=geom.centroid.y, method=None)
# Calculate the sum of the data
# data_sum = data.sum()
data_sum = data.sum()['tp'].values
# Append the sum to the list
sums.append(data_sum)
return sums
years = np.arange(1984, 2022, 1)
final_data_set = pd.DataFrame()
for year in years:
precip = get_precip_data_for_year(f'data/precip/{year}/data.nc', shapes=shapes)
year_df = pd.DataFrame()
year_df['precip'] = precip
year_df['precip'] = year_df['precip'].astype(np.float64)
year_df['year'] = year
final_data_set = pd.concat([final_data_set, year_df])
# combine lake catchment hydrobasins data with other datas
# Calculate the centroid of each shape and add a marker to the map
shapes['centroid_lon'] = shapes.apply(lambda row: row['geometry'].centroid.x, axis=1)
shapes['centroid_lat'] = shapes.apply(lambda row: row['geometry'].centroid.y, axis=1)
final_data_set = final_data_set.join(shapes).sort_values(by=['year', 'SORT'])
final_data_set = gpd.GeoDataFrame(final_data_set)
# bounding box from precip data
xmin = 45
xmax = 107
ymin = 24
ymax = 68
final_data_set = final_data_set.cx[xmin:xmax, ymin:ymax]
# import lake changes from google earth engine
lake_df = pd.read_csv('/Users/johnaiken/repos/tblakes/data/lake_data.csv')
df = final_data_set.merge(lake_df, on=['HYBAS_ID', 'year'])
df = gpd.GeoDataFrame(df)
print('dataset is found in df')
if __name__ == "__main__":
df.to_csv('data_for_4dm.csv')
print('data set has been created and written to file. Have a nice day!')