ESS-Visualization/env_methods.py at master · bparazin/ESS-Visualization · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 26 10:25:25 2020
Data from https://www.esrl.noaa.gov/psd/data/gridded/data.20thC_ReanV2c.pressure.html
@author: bydd1

The code below was written by Brynnydd Hamilton (https://github.com/b-r-hamilton/miss-atmo) and was copied into this project
with her permission
"""
import os
from netCDF4 import Dataset
import datetime as dt
import numpy as np

#find the closest value to some specified value in an array
def find_closest_val(val, arr):
    if isinstance(val, dt.datetime):
        diff = [(abs(x - val)).total_seconds() for x in arr]
    else:
        diff = [abs(x - val) for x in arr]
    index = diff.index(min(diff))
    return index

#convert from "hours since 1800-1-1" to a datetime object
def convert_datetime(val, tus):
    origin = dt.datetime(1800, 1, 1, 0, 0, 0)
    if tus[0] == 'h': new = origin + dt.timedelta(hours = val)
    if tus[0] == 'd': new = origin + dt.timedelta(days = val)
    return new


#acquire data between start and end data, return as dict
def get_data(directory, all_data, time_b=[0,0,0,0]):

    files = os.listdir(directory)

    nc_vars = dict()

    x = Dataset(os.path.join(directory, files[0]), 'r', format = 'NETCDF4')

    c_var_names = list(x.dimensions.keys())
    c_var_names.remove('nbnds')

    data_package = dict()

    for cv in c_var_names:

        data_package[cv] = x[cv][:].data.tolist()

    if 'level' in c_var_names:
        pres_levels = [1000, 850, 500]
        pres_levels_i = [data_package['level'].index(i) for i in pres_levels]
        data_package['level'] = pres_levels

    #sort out the time situation

    time = [convert_datetime(i, x['time'].units) for i in data_package['time']]

    if not all_data:
        month_start = time_b[0]
        year_start = time_b[1]
        month_end = time_b[2]
        year_end = time_b[3]

        time_start = find_closest_val(dt.datetime(year = year_start, month = month_start, day = 1), time)
        time_end = find_closest_val(dt.datetime(year = year_end, month = month_end, day = 1), time)

    else:
        time_start = 0
        time_end = len(time)

    time = time[time_start:time_end]

    data_package['time'] = time


    for i in files[:]:
        path = os.path.join(directory, i)
        x = Dataset(path, 'r', format = 'NETCDF4')
        print('acquring data from ' +i)
        var_names = list(x.variables.keys())
        var_names.remove('time_bnds')
        var_name = [i for i in var_names if i not in c_var_names][0]
        if 'level' in c_var_names: var = x[var_name][time_start:time_end, pres_levels_i, :, :].data
        else: var = x[var_name][time_start:time_end, :, :].data
        nc_vars[var_name] = var

    data_package['nc_vars'] = nc_vars

    return data_package

def gpcc_convert_datetime(lis):
    new_list = []
    for l in lis:
        origin = dt.datetime(1891, 1, 1, 0, 0, 0)
        new_list.append(origin + dt.timedelta(hours = l))
    return new_list

def get_data_gpcc(directory, res):
    if res == 1:
        file = 'full_data_monthly_v2018_10.nc'
    if res == 0.25:
        file = 'full_data_monthly_v2018_025.nc'
    x = Dataset(os.path.join(directory, file))
    dictionary = dict()
    dictionary['lat'] = x['lat'][:].data
    dictionary['lon'] = x['lon'][:].data
    dictionary['time'] = gpcc_convert_datetime(x['time'][:].data)
    arr =  x['precip'][:].data
    arr[arr < 0] = np.nan
    dictionary['precip'] = arr
    return dictionary

def normalize_data(var, time):

    mean_monthly = np.empty((12, var.shape[1], var.shape[2]))
    stdev_monthly = np.empty((12, var.shape[1], var.shape[2]))

    for i in np.arange(1,13): #iterate through months
        monthly_subset = []
        monthly_indices = []
        for t in time:
            if t.month == i:
                monthly_indices.append(time.index(t))
        monthly_subset = var[monthly_indices, :, :]

        mean_monthly[i - 1, :, :] = np.mean(monthly_subset, axis = 0)
        stdev_monthly[i - 1, :, :] = np.std(monthly_subset, axis = 0)

    for date in time: #for every time value, normalize
        month = date.month

        frame = var[time.index(date), :, :]

        #normalization formula = (x - mean) / stdev
        normalized_frame = np.divide(np.subtract(frame, mean_monthly[month - 1]),
                                     stdev_monthly[month - 1])
        var[time.index(date), :, :] = normalized_frame

    return var

#return all indices for a specific year for the list of filenames
def get_year_indices(datetimes, spec_year):
    indices = []
    for i in datetimes:
        if i.year == spec_year:
            indices.append(datetimes.index(i))
    return indices