-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathenv_methods.py
More file actions
149 lines (113 loc) · 4.7 KB
/
Copy pathenv_methods.py
File metadata and controls
149 lines (113 loc) · 4.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 26 10:25:25 2020
Data from https://www.esrl.noaa.gov/psd/data/gridded/data.20thC_ReanV2c.pressure.html
@author: bydd1
The code below was written by Brynnydd Hamilton (https://github.com/b-r-hamilton/miss-atmo) and was copied into this project
with her permission
"""
import os
from netCDF4 import Dataset
import datetime as dt
import numpy as np
#find the closest value to some specified value in an array
def find_closest_val(val, arr):
if isinstance(val, dt.datetime):
diff = [(abs(x - val)).total_seconds() for x in arr]
else:
diff = [abs(x - val) for x in arr]
index = diff.index(min(diff))
return index
#convert from "hours since 1800-1-1" to a datetime object
def convert_datetime(val, tus):
origin = dt.datetime(1800, 1, 1, 0, 0, 0)
if tus[0] == 'h': new = origin + dt.timedelta(hours = val)
if tus[0] == 'd': new = origin + dt.timedelta(days = val)
return new
#acquire data between start and end data, return as dict
def get_data(directory, all_data, time_b=[0,0,0,0]):
files = os.listdir(directory)
nc_vars = dict()
x = Dataset(os.path.join(directory, files[0]), 'r', format = 'NETCDF4')
c_var_names = list(x.dimensions.keys())
c_var_names.remove('nbnds')
data_package = dict()
for cv in c_var_names:
data_package[cv] = x[cv][:].data.tolist()
if 'level' in c_var_names:
pres_levels = [1000, 850, 500]
pres_levels_i = [data_package['level'].index(i) for i in pres_levels]
data_package['level'] = pres_levels
#sort out the time situation
time = [convert_datetime(i, x['time'].units) for i in data_package['time']]
if not all_data:
month_start = time_b[0]
year_start = time_b[1]
month_end = time_b[2]
year_end = time_b[3]
time_start = find_closest_val(dt.datetime(year = year_start, month = month_start, day = 1), time)
time_end = find_closest_val(dt.datetime(year = year_end, month = month_end, day = 1), time)
else:
time_start = 0
time_end = len(time)
time = time[time_start:time_end]
data_package['time'] = time
for i in files[:]:
path = os.path.join(directory, i)
x = Dataset(path, 'r', format = 'NETCDF4')
print('acquring data from ' +i)
var_names = list(x.variables.keys())
var_names.remove('time_bnds')
var_name = [i for i in var_names if i not in c_var_names][0]
if 'level' in c_var_names: var = x[var_name][time_start:time_end, pres_levels_i, :, :].data
else: var = x[var_name][time_start:time_end, :, :].data
nc_vars[var_name] = var
data_package['nc_vars'] = nc_vars
return data_package
def gpcc_convert_datetime(lis):
new_list = []
for l in lis:
origin = dt.datetime(1891, 1, 1, 0, 0, 0)
new_list.append(origin + dt.timedelta(hours = l))
return new_list
def get_data_gpcc(directory, res):
if res == 1:
file = 'full_data_monthly_v2018_10.nc'
if res == 0.25:
file = 'full_data_monthly_v2018_025.nc'
x = Dataset(os.path.join(directory, file))
dictionary = dict()
dictionary['lat'] = x['lat'][:].data
dictionary['lon'] = x['lon'][:].data
dictionary['time'] = gpcc_convert_datetime(x['time'][:].data)
arr = x['precip'][:].data
arr[arr < 0] = np.nan
dictionary['precip'] = arr
return dictionary
def normalize_data(var, time):
mean_monthly = np.empty((12, var.shape[1], var.shape[2]))
stdev_monthly = np.empty((12, var.shape[1], var.shape[2]))
for i in np.arange(1,13): #iterate through months
monthly_subset = []
monthly_indices = []
for t in time:
if t.month == i:
monthly_indices.append(time.index(t))
monthly_subset = var[monthly_indices, :, :]
mean_monthly[i - 1, :, :] = np.mean(monthly_subset, axis = 0)
stdev_monthly[i - 1, :, :] = np.std(monthly_subset, axis = 0)
for date in time: #for every time value, normalize
month = date.month
frame = var[time.index(date), :, :]
#normalization formula = (x - mean) / stdev
normalized_frame = np.divide(np.subtract(frame, mean_monthly[month - 1]),
stdev_monthly[month - 1])
var[time.index(date), :, :] = normalized_frame
return var
#return all indices for a specific year for the list of filenames
def get_year_indices(datetimes, spec_year):
indices = []
for i in datetimes:
if i.year == spec_year:
indices.append(datetimes.index(i))
return indices