From 12e5521044e48d117e443e22aa80b6178295c3f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Walbr=C3=B6l?= Date: Wed, 16 Dec 2020 15:38:02 +0100 Subject: [PATCH 1/3] Importer for AVAPS dropsondes (operated e.g. on HALO). Measurement gaps will be filled via linear interpolation and missing measurements between the aircraft altitude and the first recorded dropsonde measurement are filled with extrapolation. The extrapolation is based on BAHAMAS measurements (if available) or otherwise guessed from the existing measurements. After having measurement gaps filled, outliers are marked and, if a certain threshold is exceeded, smoothed out. Finally, the dropsonde measurements are interpolated on a 10 m grid. --- python/pyPamtra/importer.py | 990 ++++++++++++++++++++++++++++++++++++ 1 file changed, 990 insertions(+) diff --git a/python/pyPamtra/importer.py b/python/pyPamtra/importer.py index 8e188bf..52cb2df 100644 --- a/python/pyPamtra/importer.py +++ b/python/pyPamtra/importer.py @@ -3062,3 +3062,993 @@ def ncToDict(ncFilePath,keys='all',joinDimension='time',offsetKeys={},ncLib='net os.remove(ncFile) return joinedData +def read_AVAPS_dropsonde(file_raw_sondes, opt_T=np.nan, opt_P=np.nan, + opt_RH=np.nan, opt_Z=np.nan, verbose=False): + + """ + PAMTRA importer for raw AVAPS dropsonde data (preferred ending: 'PQC.nc') in clear sky + conditions over oceans. + + Parameters + ---------- + file_raw_sondes : str + File of raw dropsonde data (preferred ending: 'PQC.nc'). + opt_T : float, optional + Optional temperature (e.g. measured by BAHAMAS) (in K) at flight altitude. + opt_P: float, optional + Optional pressure (e.g. measured by BAHAMAS) (in Pa) at flight altitude. + opt_RH: float, optional + Optional relative humidity (e.g. measured by BAHAMAS) (in %) at flight altitude. + opt_Z: float, optional + Optional flight altitude during sonde launch (e.g. taken from BAHAMAS) (in m). + verbose : bool, optional + If True some extra information is printed. May be extensive if many dropsondes are imported! + """ + + + ############################################################################################ + # FUNCTIONS + + def fill_gaps(old_var, verbose=False): + # old variable gets linearly interpolated for each sonde launch. The function is ignoring nan + # values at the surface and above the launch altitude. + + new_var = deepcopy(old_var) + # create flag variable indicating if an entry of old_var has been changed: if = 0: not interpol. + interp_flag = np.zeros(old_var.shape) + + + # identify regions of nan values in the middle of the drop. Extrapolation will be handled in another + # function. + # identify the highest non-nan entry so we can cut the values above that highest entry: + # identify the lowest non-nan entry for similar reasons: + non_nan_idx = [idx for idx, x in np.ndenumerate(old_var) if (not np.isnan(x))] + non_nan_idx = np.where(~np.isnan(old_var))[0] + limits = np.array([non_nan_idx[0], non_nan_idx[-1]]) + + temp_var = deepcopy(old_var) + temp_var = temp_var[limits[0]:limits[1]+1] # will be the variable where the gaps are filled + + interp_flag_temp = np.zeros(temp_var.shape) + + # identify mid-drop-nan-values: need values after and before the nan: + nan_idx = np.argwhere(np.isnan(temp_var)) + interp_flag_temp[nan_idx] = 1 + + if nan_idx.size == 0: + return new_var, interp_flag + + else: # correct nan values: find the hole size via subtraction of subsequent indices + hole_size = np.zeros((len(nan_idx)+1,)).astype(int) + # hole_size = np.zeros(nan_idx.shape) # old version + k = 0 # index to address a hole ('hole number') + + for m in range(0, len(temp_var)-1): + + if not np.isnan(temp_var[m+1] - temp_var[m]): + hole_size[k] = 0 + + elif np.isnan(temp_var[m+1] - temp_var[m]): # k only incremented if an END of a hole has been identified: + if len(nan_idx) == 1: # handled seperately in case that merely one nan value exists in temp_var + hole_size[k] = 1 + break + + else: + if (not np.isnan(temp_var[m+1])) & (np.isnan(temp_var[m])): # END of a hole + k = k + 1 + continue + hole_size[k] = hole_size[k] + 1 # k won't be incremented until m finds another non-nan value + + else: + raise RuntimeError("Something unexpected happened when trying to find nan values in the middle " + + "of the dropsonde launch. Please contact 'a.walbroel@uni-koeln.de'. \n") + + # holes have been identified: edit the FIRST hole (editing depends on the size of the hole...) + c = 0 # dummy variable needed for the right jumps in hole_size and nan_idx. c is used to address + # nan_idx and therefore new_var... + + # meanwhile 'd' just runs through the array hole_size: + for d in range(0, len(hole_size)): + for L in range(0, hole_size[d]): # range(0, 1): L = 0 + temp_var[nan_idx[c] + L] = temp_var[nan_idx[c] - 1] + (L + 1)*(temp_var[int(nan_idx[c] + + hole_size[d])] - temp_var[nan_idx[c]-1]) / (hole_size[d] + 1) + + c = c + int(hole_size[d]) + if c > len(hole_size)-1: + break + + + # overwrite the possibly holey section: + new_var[limits[0]:limits[1]+1] = temp_var + # update interp_flag + interp_flag[limits[0]:limits[1]+1] = interp_flag_temp + + return new_var, interp_flag + + + def std_extrapol_BAH(old_dict, ill_keys, bah_dict, old_ipflag_dict=dict(), verbose=False): + # Will extrapolate some atmospheric variables to the ceiling of the dropsonde; old_ipflag will be updated. + # Needs the old variable, the interpolation flag (should've been generated by fill_gaps()), the key and + # height levels as INPUT + + new_dict = old_dict + n_alt = len(new_dict['Z']) + + new_ipflag_dict = old_ipflag_dict + + + # To get the obs_height: Floor BAHAMAS altitude to the next lowest 100 m. + drop_alt = np.floor(np.asarray(bah_dict['Z'])/100)*100 + obs_height = np.max(np.unique(drop_alt)) # omit repeated values; this value is used for the top of the extrapolation + + + # BAHAMAS (or other optional measurement platform at flight altitude) temperature, pressure, relative humidity at launch time: + bah_T = bah_dict['T'] + bah_P = bah_dict['P'] + bah_RH = bah_dict['RH'] + bah_alt = bah_dict['Z'] + + + ceiling = obs_height # last entry of altitude + if ceiling > 15000: + raise ValueError("Dropsonde launch altitude appears to be > 15000 m. Extrapolation is aborted because the " + + "tropopause may intervene.\n") + return new_dict, new_ipflag_dict + + + # Any value above obs_height will be deleted: So if e.g. Z has got values above obs_height, delete them: + # Find the first index that overshoots obs_height: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + overshoot = np.argwhere(new_dict['Z'] >= obs_height) + if len(overshoot) > 0: + overshoot = overshoot[0][0] + 1 + + for key in new_dict.keys(): + if key in ['trajectory', 'fillValues', 'ipflag']: # skip these ones ... it s not interesting anyway + continue + + if new_dict[key].ndim > 0: # otherwise: error when using len() + + if len(new_dict[key]) == n_alt: + new_dict[key] = new_dict[key][:overshoot] # limit variable to obs_height + if key in ill_keys or key == 'Z': + new_ipflag_dict[key] = new_ipflag_dict[key][:overshoot] + + + # At the end of 'Z' there may still be nans -> so we don't know to which altitude meteorological variables belong to in this region: + # Therefore: delete it and replace by extrapolation: + n_alt = len(new_dict['Z']) + last_nonnan_alt = np.argwhere(~np.isnan(new_dict['Z']))[-1][0] + if ceiling - new_dict['Z'][last_nonnan_alt] > 1000: + if verbose: + print("WARNING: highest GPS altitude measurement is at least 1000 m below the aircraft. Extrapolation may be erroneous.\n") + + for key in new_dict.keys(): + if key in ['trajectory', 'fillValues', 'ipflag']: # skip these ones ... it s not interesting anyway + continue + + if new_dict[key].ndim > 0: # otherwise: error when using len() + + if len(new_dict[key]) == n_alt: + new_dict[key] = new_dict[key][:last_nonnan_alt+1] # limit variable to obs_height + if key in ill_keys or key == 'Z': + new_ipflag_dict[key] = new_ipflag_dict[key][:last_nonnan_alt+1] + + + # Extend the old height grid up to the ceiling if the distance is greater than 10 meters: + alt = new_dict['Z'] + n_alt = len(alt) + alt = np.append(alt, np.arange(alt[np.argwhere(~np.isnan(alt))[-1]]+10, ceiling+11, 10)) + n_alt_new = len(alt) + + # Update the altitude variable in the dictionary: & update ipflag for gpsalt: + new_dict['Z'] = alt + new_ipflag_dict['Z'] = np.append(new_ipflag_dict['Z'], np.ones((n_alt_new - n_alt,))) + + + launch_time = datetime.datetime.utcfromtimestamp(new_dict['launch_time']).strftime("%Y-%m-%d %H:%M:%S") # for printing + + for key in ill_keys: + + new_var = new_dict[key] + # Must be expanded to the new height grid (up to the new ceiling) + new_var = np.append(new_var, np.full((n_alt_new - n_alt,), np.nan), axis=0) # append nans at the top of the profile + + if not new_ipflag_dict: # in case fill_gaps(...) hasn't been called before this one, it's assumed that nothing has been interpolated yet. + new_ipflag_dict[key] = np.zeros(new_var.shape) + + else: # new_ipflag also has to be extended to the new hgt grid: + new_ipflag_dict[key] = np.append(new_ipflag_dict[key], np.zeros((n_alt_new - n_alt,)), axis=0) + + + if key == 'T': + # If BAHAMAS Temperature measurement is available use it as target in case only the top 15 % of measurements + # are missing. Otherwise: + # Temperature: If dropsondes with measurements (ipflag = 0) from that day exist, estimate their average T gradient. + # If the extrapolated dropsonde temperature then deviates from BAH T by more than 5 K, use the ICAO std atmosphere + # as T gradient: + # Standard atmosphere (shifted accordingly to avoid a jump between the last known value and the extrapolation). + # ICAO standard atmosphere taken from: + # https://www.dwd.de/DE/service/lexikon/begriffe/S/Standardatmosphaere_pdf.pdf?__blob=publicationFile&v=3 + ICAO_standard_T = 288.15 - 0.0065*alt + + # Find highest non nan value if it lies below the ceiling: + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + + if alt[idx] < 0.6*ceiling: + if verbose: + print("Insufficient amount of measurements for temperature extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no temperature measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var # then just overwrite the dictionary entry with the nonedited (but extended) variable + continue + + if alt[idx] < 0.85*ceiling: # then use BAHAMAS temperature as extrapolation target: + new_var[idx+1:] = (new_var[idx] + (bah_T - new_var[idx]) / (bah_alt - alt[idx]) * (alt[idx+1:] - alt[idx])) + + else: + # Or use mean T gradient of highest 20 measurements and continue with this gradient: + # Compute mean T gradient of highest 20 measurements: + mean_T_grad = np.mean(np.asarray([(new_var[idx-19:idx+1] - new_var[idx-20:idx]) / (alt[idx-19:idx+1] - alt[idx-20:idx])])) + extra_T = 288.15 + mean_T_grad*alt + + new_var[idx+1:] = extra_T[idx+1:] - (extra_T[idx] - new_var[idx]) + + if np.abs(new_var[-1] - bah_T) > 5: # then the deviation from BAHAMAS T is too great and we use the ICAO std atmosphere + new_var[idx+1:] = ICAO_standard_T[idx+1:] - (ICAO_standard_T[idx] - new_var[idx]) + + + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + + + elif key == 'P': + # Pressure: use hydrostatic eq. with scale height H = R / g0, R = 287 J kg^-1 K^-1, g0 = 9.81 m s^-2, using + # the vertican mean temperature : + # p(z) = p0 exp( -z / H) (Holton, p.21); + + # Find highest non nan value if it lies below the ceiling: + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + + if alt[idx] < ceiling/3: + if verbose: + print("Insufficient amount of measurements for pressure extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no pressure measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var + continue + + # MAKE SURE THAT mean TEMPERATURE CAPTURES THE ACTUAL MEAN TEMPERATURE!! + if np.count_nonzero(~np.isnan(new_dict['T'][:])) / float(n_alt_new) <= 0.75: # in this case you may expect that a mean temperature would yield + # a bad representation of the true scale height. 0.75 was chosen arbitrarily. + T_icao_0 = 12*np.cos(4*np.pi*np.nanmean(new_dict['lat'][:])/360) + 288.15 # strongly simplified meridional surface temperature structure + H = 287 * np.mean(288.15 - 0.0065*alt) / 9.81 # using the ICAO standard atmosphere to compute the mean temperature + + if verbose: + print("Warning: Because insufficient non-nan temperature values were given for launch " + + launch_time + ", '" + str(np.mean(288.15 - 0.0065*alt)) + + " K' was assumed to be the mean temperature for hydrostatic pressure calculation. \n") + + else: + H = 287 * np.nanmean(new_dict['T'][:]) / 9.81 # scale height + + # Find index of lowest non nan pressure measurement: + l_idx = np.argwhere(~np.isnan(new_var[:]))[0] + p_ref = new_var[l_idx] # in Pa + alt_ref = alt[l_idx] + p_hydrostat = p_ref * np.exp(-(alt - alt_ref) / H) # in Pa + + new_var[idx+1:] = p_hydrostat[idx+1:] - (p_hydrostat[idx] - new_var[idx]) + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + + + elif key == 'u_wind' or key == 'v_wind': + # Wind: idea: Fill nan values with the mean wind gradient of the highest 20 (non-nan)measurents. + # It will only be extrapolated if the the last non-nan entry is higher than 0.80*ceiling: + # Other idea: just keep the same wind value + + # Find highest non nan value if it lies below the ceiling: + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + + if alt[idx] < 0.8*ceiling: + if verbose: + print("Insufficient amount of measurements for wind extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no wind measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var + continue + + else: + # # # extra_speed_length = 20 # amount of indices used for wind speed gradient calculation + + # # # for k in range(idx, n_alt_new): + # # # new_var[n,k] = new_var[n,idx] + (k-idx)*(new_var[n,idx] - new_var[n,idx-extra_speed_length]) / (extra_speed_length + (k-idx)) + + # Alternative: just use the latest value for higher altitudes: + new_var[idx+1:] = new_var[idx] + new_var[idx+1:] = new_var[idx+1:] + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + + + elif key == 'RH': + # Relative humidity (RH): Idea: Linearly interpolate to the BAHAMAS value + + # Find highest non nan value if it lies below the ceiling: + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + + if alt[idx] < 0.65*ceiling: + if verbose: + print("Insufficient amount of measurements for relative humidity extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no rel. hum. measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var + continue + + else: + new_var[idx+1:] = (new_var[idx] + (bah_RH - new_var[idx]) / (bah_alt - alt[idx]) * (alt[idx+1:] - alt[idx])) + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + new_var[np.argwhere(new_var[:] < 0)] = 0.0 + + + # More variables may be added here, if desired. + + + new_dict[key] = new_var + + return new_dict, new_ipflag_dict, obs_height + + + def std_extrapol(old_dict, ill_keys, old_ipflag_dict=dict(), verbose=False): + # Will extrapolate some atmospheric variables to the ceiling of the dropsonde; old_ipflag will be updated. + # Needs the old variable, the interpolation flag (should've been generated by fill_gaps()), + # the key and height levels as INPUT + + new_dict = old_dict + n_alt = len(new_dict['Z']) + + new_ipflag_dict = old_ipflag_dict + + # To get the obs_height: find highest ... (?) + if np.isnan(new_dict['reference_alt']): + # Select the highest non nan index of T or P. + highest_nonnan_Z = np.argwhere(~np.isnan(new_dict['Z']))[-1] + obs_height = np.floor(new_dict['Z'][highest_nonnan_Z[0]]/100)*100 + else: # use the reference_alt + obs_height = (np.floor(new_dict['reference_alt']/100)*100)[0] + + + ceiling = obs_height # last entry of altitude + if ceiling > 15000: + raise ValueError("Dropsonde launch altitude appears to be > 15000 m. Extrapolation is aborted because the " + + "tropopause may intervene.\n") + return new_dict, new_ipflag_dict + + + # Any value above obs_height will be deleted: So if e.g. Z has got values above obs_height, delete them: + # Find the first index that overshoots obs_height: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + overshoot = np.argwhere(new_dict['Z'] >= obs_height) + if len(overshoot) > 0: + overshoot = overshoot[0][0] + 1 + + for key in new_dict.keys(): + if key in ['trajectory', 'fillValues', 'ipflag']: # skip these ones ... it s not interesting anyway + continue + + if new_dict[key].ndim > 0: # otherwise: error when using len() + + if len(new_dict[key]) == n_alt: + new_dict[key] = new_dict[key][:overshoot] # limit variable to obs_height + if key in ill_keys or key == 'Z': + new_ipflag_dict[key] = new_ipflag_dict[key][:overshoot] + + + # At the end of 'Z' there may still be nans -> so we don't know to which altitude meteorological variables belong to in this region: + # Therefore: delete it and replace by extrapolation: + n_alt = len(new_dict['Z']) + last_nonnan_alt = np.argwhere(~np.isnan(new_dict['Z']))[-1][0] + if ceiling - new_dict['Z'][last_nonnan_alt] > 1000: + if verbose: + print("WARNING: highest GPS altitude measurement is at least 1000 m below the aircraft. Extrapolation may be erroneous.\n") + + for key in new_dict.keys(): + if key in ['trajectory', 'fillValues', 'ipflag']: # skip these ones ... it s not interesting anyway + continue + + if new_dict[key].ndim > 0: # otherwise: error when using len() + + if len(new_dict[key]) == n_alt: + new_dict[key] = new_dict[key][:last_nonnan_alt+1] # limit variable to obs_height + if key in ill_keys or key == 'Z': + new_ipflag_dict[key] = new_ipflag_dict[key][:last_nonnan_alt+1] + + + # Extend the old height grid up to the ceiling if the distance is greater than 10 meters: + alt = new_dict['Z'] + n_alt = len(alt) + alt = np.append(alt, np.arange(alt[np.argwhere(~np.isnan(alt))[-1]]+10, ceiling+11, 10)) + n_alt_new = len(alt) + + # Update the altitude variable in the dictionary: & update ipflag for gpsalt: + new_dict['Z'] = alt + new_ipflag_dict['Z'] = np.append(new_ipflag_dict['Z'], np.ones((n_alt_new - n_alt,))) + + + launch_time = datetime.datetime.utcfromtimestamp(new_dict['launch_time']).strftime("%Y-%m-%d %H:%M:%S") # for printing + + for key in ill_keys: + + new_var = new_dict[key] + # Must be expanded to the new height grid (up to the new ceiling) + new_var = np.append(new_var, np.full((n_alt_new - n_alt,), np.nan), axis=0) # append nans at the top of the profile + + if not new_ipflag_dict: # in case fill_gaps(...) wasn't called before this one, it's assumed that nothing has been interpolated yet. + new_ipflag_dict[key] = np.zeros(new_var.shape) + + else: # new_ipflag also has to be extended to the new hgt grid: + new_ipflag_dict[key] = np.append(new_ipflag_dict[key], np.zeros((n_alt_new - n_alt,)), axis=0) + + + + if key == 'T': + # Temperature: If dropsondes with measurements (ipflag = 0) from that day exist, estimate their average T gradient. + # In case more than 15 % of T measurements must be extrapolated, the ICAO standard atmosphere is used as T gradient. + # ICAO standard atmosphere taken from: + # https://www.dwd.de/DE/service/lexikon/begriffe/S/Standardatmosphaere_pdf.pdf?__blob=publicationFile&v=3 + ICAO_standard_T = 288.15 - 0.0065*alt + + # find highest non nan value if it lies below the ceiling: + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + if alt[idx] < 0.6*ceiling: + if verbose: + print("Insufficient amount of measurements for temperature extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no temperature measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var # then just overwrite the dictionary entry with the nonedited (but extended) variable + continue + + if alt[idx] < 0.85*ceiling: # then use standard atmosphere (ICAO): + new_var[idx+1:] = ICAO_standard_T[idx+1:] + (new_var[idx] - ICAO_standard_T[idx]) + + else: + # Use mean T gradient of highest 20 measurements and continue with this gradient: + # Compute mean T gradient of highest 20 measurements: + mean_T_grad = np.mean(np.asarray([(new_var[idx-19:idx+1] - new_var[idx-20:idx]) / (alt[idx-19:idx+1] - alt[idx-20:idx])])) + T_continued = 288.15 + mean_T_grad*alt + + new_var[idx+1:] = T_continued[idx+1:] - (T_continued[idx] - new_var[idx]) + + + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + + + elif key == 'P': + # Pressure: use hydrostatic eq. with scale height H = R / g0, R = 287 J kg^-1 K^-1, g0 = 9.81 m s^-2, using the vertican mean temperature : + # p(z) = p0 exp( -z / H) (Holton, p.21); + + # Find highest non nan value if it lies below the ceiling: + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + if alt[idx] < ceiling/3: + if verbose: + print("Insufficient amount of measurements for pressure extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no pressure measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var + continue + + # MAKE SURE THAT mean TEMPERATURE CAPTURES THE ACTUAL MEAN TEMPERATURE!! + if np.count_nonzero(~np.isnan(new_dict['T'][:])) / float(n_alt_new) <= 0.75: # in this case you may expect that a mean temperature would yield + # a bad representation of the true scale height. 0.75 was chosen arbitrarily. + T_icao_0 = 12*np.cos(4*np.pi*np.nanmean(new_dict['lat'][:])/360) + 288.15 # strongly simplified meridional surface temperature structure + H = 287 * np.mean(288.15 - 0.0065*alt) / 9.81 # using the ICAO standard atmosphere to compute the mean temperature + + if verbose: + print("Warning: Because insufficient non-nan temperature values were given for launch " + + launch_time + ", '" + str(np.mean(288.15 - 0.0065*alt)) + + " K' was assumed to be the mean temperature for hydrostatic pressure calculation. Can possibly be " + + "avoided if the temperature is extrapolated before the pressure.\n") + + else: + H = 287 * np.nanmean(new_dict['T'][:]) / 9.81 # scale height + + # Find index of lowest non nan pressure measurement: + l_idx = np.argwhere(~np.isnan(new_var[:]))[0] + p_ref = new_var[l_idx] # in Pa + alt_ref = alt[l_idx] + p_hydrostat = p_ref * np.exp(-(alt - alt_ref) / H) # in Pa + + new_var[idx+1:] = p_hydrostat[idx+1:] - (p_hydrostat[idx] - new_var[idx]) + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + + + elif key == 'u_wind' or key == 'v_wind': + # Wind: idea: Fill nan values with the mean wind gradient of the highest 20 (non-nan)measurents. It will only be extrapolated if the the last non-nan entry + # is higher than 0.80*ceiling: + # Other idea: Just keep the same wind value + + # Find highest non nan value if it lies below the ceiling: + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + if alt[idx] < 0.8*ceiling: + if verbose: + print("Insufficient amount of measurements for wind extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no wind measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var + continue + + else: + # # # extra_speed_length = 20 # amount of indices used for wind speed gradient calculation + + # # # for k in range(idx, n_alt_new): + # # # new_var[n,k] = new_var[n,idx] + (k-idx)*(new_var[n,idx] - new_var[n,idx-extra_speed_length]) / (extra_speed_length + (k-idx)) + + # Alternative: Just use the latest value for higher altitudes: + new_var[idx+1:] = new_var[idx] + new_var[idx+1:] = new_var[idx+1:] + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + + + elif key == 'RH': + # Relative humidity (RH): Idea: fill nan values with the mean RH of the highest 10 measurements but only + # if the highest measurement exceeds or is equal to 0.95*ceiling! + # Other idea: Use RH approx 0 for greater altitudes. + noise_strength = 0/2 # percent + + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + if alt[idx] < 0.65*ceiling: + if verbose: + print("Insufficient amount of measurements for relative humidity extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no rel. hum. measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var + continue + + elif alt[idx] >= 0.95*ceiling: + new_var[idx+1:] = np.mean(new_var[idx-9:idx+1]) + + else: + new_var[idx+1:] = 1.5 + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + new_var[np.argwhere(new_var[:] < 0)] = 0.0 + + + # More variables may be added here, if desired. + + new_dict[key] = new_var + + return new_dict, new_ipflag_dict, obs_height + + + def regridding(new_dict, obs_height, ill_keys, resolution=10): + ''' + Regridding variables specified in ill_keys to a uniform grid + (from the surface up to obs_height) with a user-defined + resolution (in meters, default=10). + ''' + + new_alt = np.arange(0, obs_height+1, 10) + + for key in ill_keys: + new_dict[key] = np.interp(new_alt, new_dict['Z'], new_dict[key]) + + new_dict['Z'] = new_alt + + return new_dict + + + def repair_surface(old_dict, ill_keys, old_ipflag_dict=dict(), verbose=False): + # Filling nan values at the surface if the gap to the surface isn't too large (e.g. measurements below 200 m + # must exist (roughly 15-20 seconds before splash). + new_dict = old_dict + alt = old_dict['Z'] + n_alt = len(alt) + new_ipflag_dict = old_ipflag_dict + launch_time = datetime.datetime.utcfromtimestamp(new_dict['launch_time']).strftime("%Y-%m-%d %H:%M:%S") + + lim = 200 # if there are no measurements below this altitude then the extrapolation at the surface won't be performed + + if ill_keys == ['Z']: + threshold_list = [ill_keys, [200], ['m']] + else: + threshold_list = [ill_keys, [5.0, 4000.0, 50.0, 0.1, 0.1, 5.0, 5.0, 1.0], + ['K', 'hPa', '%', 'deg', 'deg', 'm/s', 'm/s', 'm/s']] # used to check if surface value deviates siginificantly from lowest measurement + + for key in ill_keys: + + new_var = new_dict[key] + + if not new_ipflag_dict: # in case fill_gaps(...) wasn't called before this one, it's assumed that nothing has been interpolated yet. + new_ipflag_dict[key] = np.zeros(new_var.shape) + + + # find the first non-nan entry + idx = np.argwhere(~np.isnan(new_var[:]))[0][0] + + if alt[idx] < lim: + sfc_gap = np.arange(0,idx) + + if len(sfc_gap) == 0: + continue + else: + + # create mean gradient of the variable of 10 measurements above the lowest measurement, or, if grid is too coarse, take 200-400 m average: + if alt[idx+10] > 2*lim: # default: if alt[idx+10] > 400: # take lowest measurement to 400 m mean gradient + # find index closest to 400 m: + idx2 = np.argmin(np.abs(alt - 2*lim)) + + else: # take mean grad. of 10 measurem. above lowest measurement: + idx2 = idx+10 + + mean_grad = np.mean([new_var[j+1] - new_var[j] for j in range(idx,idx2)]) # theoretically, should never be nan because fill_gaps + # should've fixed the holes between the first and last measurement + for j in sfc_gap: + new_var[idx-j-1] = new_var[idx] - mean_grad*(j+1) + + + # check if sfc value not too far off the lowest measurement: + if key == 'RH': + if np.any(new_var[sfc_gap] < 0): + new_var[sfc_gap] = 0 + if verbose: + print("Caution, '" + key + "' surface repair resulted in negative values. Manually setting the missing values at the ground to 0 for launch " + + launch_time + ".\n") + + elif np.any(new_var[sfc_gap] > 100): + new_var[sfc_gap] = 100 + if verbose: + print("Caution, '" + key + "' surface repair resulted in >100 %. Manually setting the missing values at the ground to 100 for launch " + + launch_time + ".\n") + + threshold = threshold_list[1][threshold_list[0].index(key)] + si_unit = threshold_list[2][threshold_list[0].index(key)] + if np.abs(new_var[0] - new_var[idx]) > threshold: + if verbose: + print("Caution, '" + key + "' surface value deviates more than " + str(threshold) + " " + si_unit + " from the lowest measurement (launch " + + launch_time + ").\n") + + + new_ipflag_dict[key][sfc_gap] = 1 + + else: + if verbose: + print("No measurements below " + str(lim) + " m. Extrapolation of '" + key + "', launch " + launch_time + + " would eventually lead to wrong assumptions at the surface. Therefore aborted.\n") + continue + + return new_dict, new_ipflag_dict + + + def mark_outliers(sonde_dict, ill_keys): # mark outliers: outliers defined when exceeding certain thresholds + + new_dict = sonde_dict + + # Thresholds are defined by change of meteorol. variable with altitude: e.g. delta p / delta z + thresholds = [0.065, 40, 2.5, 1, 1] # in [K/m, Pa/m, %/m, ms^-1/m, ms^-1/m] + dz = new_dict['Z'][1:] - new_dict['Z'][:-1] # delta z + + for key in ill_keys: + if key == 'lat' or key == 'lon': + continue + + met_threshold = thresholds[ill_keys.index(key)] # threshold for key + + d_met = new_dict[key][1:] - new_dict[key][:-1] # change of meteorological variable 'key' + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + exceed_idx = np.argwhere(np.abs(d_met / dz) >= met_threshold) + new_dict[key][exceed_idx] = float('nan') + + return new_dict + + + def readDropsondeNCraw(filename, verbose=False): + """ + Dropsonde netCDF file to dictionary. + Variables with a designated unit that is not an SI unit will be converted to SI units. + Rel. humidity will be in % though. + Time will be given in seconds since 1970-01-01 00:00:00 UTC. + + Parameters + ---------- + filename : str + Filename of the raw dropsonde files (suffix "PQC.nc"!) + verbose : bool + Additional comments printed if True. + """ + + import netCDF4 as nc + + file_nc = nc.Dataset(filename) + + dropsonde_dict = dict() + dropsonde_dict['fillValues'] = dict() + for nc_keys in file_nc.variables.keys(): + nc_var = file_nc.variables[nc_keys] + dropsonde_dict[nc_keys] = np.asarray(nc_var) + + if hasattr(nc_var, 'missing_value'): + dropsonde_dict['fillValues'][nc_keys] = nc_var.missing_value + + # type of the nc_var: + ncvar_type = type(dropsonde_dict[nc_keys][0]) + + # find where the current variable has missing values and set them to nan: + missing_idx = np.argwhere(dropsonde_dict[nc_keys] == dropsonde_dict['fillValues'][nc_keys]) + + if ((ncvar_type == np.float32) or (ncvar_type == np.float64)): + dropsonde_dict[nc_keys][missing_idx] = float('nan') + + + # converting units: time stamps will be handled seperately. + if hasattr(nc_var, 'units'): + if nc_var.units == 'degC': + dropsonde_dict[nc_keys] = dropsonde_dict[nc_keys] + 273.15 + if verbose: print("From degC to K: " + str(nc_keys)) + elif nc_var.units == 'hPa': + dropsonde_dict[nc_keys] = dropsonde_dict[nc_keys]*100 + if verbose: print("From hPa to Pa: " + str(nc_keys)) + elif nc_var.units == 'gram/kg': + dropsonde_dict[nc_keys] = dropsonde_dict[nc_keys]/1000 + if verbose: print("From g/kg to kg/kg: " + str(nc_keys)) + + time_base = datetime.datetime.strptime(file_nc.variables['launch_time'].units[14:-4], "%Y-%m-%d %H:%M:%S") # time base given in the units attribute + dropsonde_dict['launch_time'] = (time_base - datetime.datetime(2020,1,1)).total_seconds() + dropsonde_dict['launch_time'] + dropsonde_dict['reference_time'] = (time_base - datetime.datetime(2020,1,1)).total_seconds() + dropsonde_dict['reference_time'] + dropsonde_dict['time'] = (time_base - datetime.datetime(2020,1,1)).total_seconds() + dropsonde_dict['time'] + if verbose: print("\n") + + # Convert to internal convention: Temperature = T, Pressure = P, relative humidity = RH, altitude = Z + dropsonde_dict['T'] = dropsonde_dict['tdry'] + dropsonde_dict['P'] = dropsonde_dict['pres'] + dropsonde_dict['RH'] = dropsonde_dict['rh'] + dropsonde_dict['Z'] = dropsonde_dict['gpsalt'] + + return dropsonde_dict + + + + ############################################################################################ + + + + # Dropsonde quality control: if there are small gaps of measurements between launch altitude and surface, they will be filled via interpolation. + # Additionally, the sondes will be extrapolated to a certain altitude. + + # BAHAMAS data (optional): + if np.all([~np.isnan(opt_T), ~np.isnan(opt_P), ~np.isnan(opt_RH), ~np.isnan(opt_Z)]): + opt_dict = {'T': opt_T, 'P': opt_P, 'RH': opt_RH, 'Z': opt_Z} + elif np.all([np.isnan(opt_T), np.isnan(opt_P), np.isnan(opt_RH), np.isnan(opt_Z)]): + opt_dict = dict() + else: + raise ValueError("Optional measurements at flight altitude must include all four variables: Temperature (in K), pressure (in Pa), relative " + + "humidity (in %), flight altitude (in m). If not all measurements can be provided, leave optional data input empty.") + + + failed_sondes = [] # lists the filenames of failed sondes (nearly no measurements) + stuck_sondes = [] # lists the filenames of stuck sondes (sondes that don't show values above 1500 m) + + + sonde_dict = readDropsondeNCraw(file_raw_sondes, verbose) + launch_date = datetime.datetime.utcfromtimestamp(sonde_dict['launch_time']).strftime("%Y-%m-%d") + dropsonde_date = (datetime.datetime.strptime(launch_date, "%Y-%m-%d")).strftime("%Y%m%d") # date displayed in the filename + # --> comfy way to find the right BAHAMAS data for std_extrapol + if verbose: + print("########## Launch time: " + datetime.datetime.utcfromtimestamp(sonde_dict['launch_time']).strftime("%Y-%m-%d %H:%M:%S") + " ##########\n") + print("Input: ", file_raw_sondes) + + + # Add another condition that checks if e.g. nearly no measurements exist at all (for T, P and RH): + if np.any([np.count_nonzero(~np.isnan(sonde_dict['T'])) < 0.1*len(sonde_dict['T']), + np.count_nonzero(~np.isnan(sonde_dict['P'])) < 0.1*len(sonde_dict['P']), + np.count_nonzero(~np.isnan(sonde_dict['RH'])) < 0.1*len(sonde_dict['RH']), + np.count_nonzero(~np.isnan(sonde_dict['lat'])) < 0.05*len(sonde_dict['lat']), + np.count_nonzero(~np.isnan(sonde_dict['lon'])) < 0.05*len(sonde_dict['lon']), + np.count_nonzero(~np.isnan(sonde_dict['u_wind'])) < 0.05*len(sonde_dict['u_wind']), + np.count_nonzero(~np.isnan(sonde_dict['v_wind'])) < 0.05*len(sonde_dict['v_wind'])]): + failed_sondes.append(file_raw_sondes) + raise ValueError("One PAMTRA-critical variable measurement failed. Skipping this dropsonde. \n") + + + # Add yet another condition that checks if the sonde got stuck mid air: + if not np.any(sonde_dict['Z'][~np.isnan(sonde_dict['Z'])] < 1500): # then I assume that the whole launch was doomed + raise ValueError("'gpsalt' doesn't seem to include any values below 1500 m. That is insufficient for extrapolation at the surface. \n") + stuck_sondes.append(file_raw_sondes) + + + # Subsequent variables will be cured from holey nan value disease...: + # pressure, temperature, relhum, wind (u & v & w), lat, lon. + # Other variables, of which you can expect a linear interpolated over gaps to be applicable, may be added. + ill_keys = ['T', 'P', 'RH', 'lat', 'lon', 'u_wind', 'v_wind'] + + + sonde_ipflag = dict() # will contain the interpolation flags for interpolated nan values in the middle of the drop + + # It's known that the altitude axis (gpsalt) does have some broken values (e.g. sudden jumps or exceeding the aircraft altitude). + # Before filliing the gaps, the altitude axis must be fixed: + sonde_dict['Z'], sonde_ipflag['Z'] = fill_gaps(sonde_dict['Z'], verbose=verbose) + for key in ill_keys: + sonde_dict[key], sonde_ipflag[key] = fill_gaps(sonde_dict[key], verbose=verbose) # altitude must be passed to check for dimensions of the to-be-cured variable... + + sonde_dict['ipflag'] = sonde_ipflag + + + # The raw dropsonde files show an altitude variable with increases from [0 to -1] in general: but probably due to gps tracking, + # the altitude decreases at the "top": this must be filtered out: + # Find highest index where altitude[idx+1] - altitude[idx] > 0: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + altitude_stop = np.argwhere(sonde_dict['Z'][1:] - sonde_dict['Z'][:-1] > 0)[-1][0] + 2 + # +2 because it's used as indexing [... : altitude_stop] => +1 and because the array size had been reduced by 1 during argwhere(...) + + # If the lowest non nan value of the altitude coordinate is < 0: cut the rest off: + # Find lowest non nan value of altitude: + lowest = np.argwhere(~np.isnan(sonde_dict['Z'][:]))[0][0] + + ndata = len(sonde_dict['Z']) + + # Then cut each variable at this altitude index: + if sonde_dict['Z'][lowest] < 0: + for key in sonde_dict.keys(): + if key in ['trajectory', 'fillValues', 'ipflag']: # skip these ones ... it s not interesting anyway + continue + + if sonde_dict[key].ndim > 0: # otherwise: error when using len() + + if len(sonde_dict[key]) == ndata: + sonde_dict[key] = sonde_dict[key][lowest:altitude_stop] + if key in ill_keys or key == 'Z': + sonde_dict['ipflag'][key] = sonde_dict['ipflag'][key][lowest:altitude_stop] + + else: + for key in sonde_dict.keys(): + if key in ['trajectory', 'fillValues', 'ipflag']: # skip these ones ... it s not interesting anyway + continue + + if sonde_dict[key].ndim > 0: # otherwise: error when using len() + + if len(sonde_dict[key]) == ndata: + sonde_dict[key] = sonde_dict[key][:altitude_stop] + if key in ill_keys or key == 'Z': + sonde_dict['ipflag'][key] = sonde_dict['ipflag'][key][:altitude_stop] + + + # The altitude index may be a bit broken... needs to be fixed. mark them as nan and let it run through fill_gaps again: + dz = sonde_dict['Z'][1:] - sonde_dict['Z'][:-1] # dz[i] = z[i+1] - z[i] + + for k in range(len(dz)): + if (dz[k] < 0) or (dz[k] > 15): # this filters too big jumps in the altitude coordinate + sonde_dict['Z'][k+1] = float('nan') + + sonde_dict['Z'], sonde_ipflag['Z'] = fill_gaps(sonde_dict['Z'], verbose=verbose) + + + # Perform surface repair for altitude coordinate: + sonde_dict, sonde_dict['ipflag'] = repair_surface(sonde_dict, ['Z'], sonde_dict['ipflag'], verbose=verbose) + # for some reasons, 'alt' is sort of unused but an assigned key in the dictionary. So ... we can also just set it to gpsalt: + sonde_dict['alt'] = sonde_dict['Z'] + + # now we still need to handle the nan values at the surface: If there are no non-nan values in the lowest 5 % of the variable + # -> don't interpolate because the assumption would eventually lead to senseless surface values: + sonde_dict, sonde_dict['ipflag'] = repair_surface(sonde_dict, ill_keys, sonde_dict['ipflag'], verbose=verbose) + + + # Extrapolating the ill_keys to the ceiling of the dropsondes (e.g. below aircraft altitude): + # CAUTION: it is expected that the dropsondes start BELOW THE TROPOPAUSE! + # It is advisable to do the TEMPERATURE EXTRAPOLATION FIRST because it improves the pressure extrapolation. + if opt_dict: + sonde_dict, sonde_dict['ipflag'], obs_height = std_extrapol_BAH(sonde_dict, ill_keys, opt_dict, sonde_dict['ipflag'], verbose=verbose) + + else: + sonde_dict, sonde_dict['ipflag'], obs_height = std_extrapol(sonde_dict, ill_keys, sonde_dict['ipflag'], verbose=verbose) + + # Regridding to a uniform vertical grid with a user-specified resolution: + sonde_dict = regridding(sonde_dict, obs_height, ill_keys, 10) + + + # Find outliers and mark them (as nan): afterwards fill them again + sonde_dict = mark_outliers(sonde_dict, ['T', 'P', 'RH', 'u_wind', 'v_wind']) + for key in ill_keys: + sonde_dict[key], sonde_ipflag[key] = fill_gaps(sonde_dict[key], verbose=verbose) + + # # # # # # # # # # # # # # # # # # # # # # # # # # + + # Check for remaining NaNs in meteorological variables: + # RH, T and P (and the height coordinate Z): + if (np.any([np.isnan(sonde_dict['T']), np.isnan(sonde_dict['P']), np.isnan(sonde_dict['RH']), + np.isnan(sonde_dict['Z'])])): + if verbose: + print(' NaN-counts: %d, %d, %d, %d' % ( + np.isnan(sonde_dict['T']).sum(), + np.isnan(sonde_dict['P']).sum(), + np.isnan(sonde_dict['RH']).sum(), + np.isnan(sonde_dict['Z']).sum())) + raise ValueError("Nans still remained in T, P, RH or Z. Aborting execution of dropsonde %s. Please contact 'a.walbroel@uni-koeln.de'." + % file_raw_sondes) + + # Check if surface winds are not nan: (index [1] is used because [0] is at 0 m above sea level) + sfc_wind_available = ~np.isnan(sonde_dict['u_wind'][1] + sonde_dict['v_wind'][1]) + if not sfc_wind_available: + if verbose: + print("WARNING, NaN in u_wind[1] or v_wind[1]. Surface winds won't be included in PAMTRA for %s !"%( + file_raw_sondes)) + print(' NaN-counts (u,v): %d, %d' % ( + np.isnan(sonde_dict['u_wind'][1]), + np.isnan(sonde_dict['v_wind'][1]))) + + + # Create pamtra object; change settings: + pam = pyPamtra() + + pam.nmlSet['passive'] = True # passive simulation + pam.nmlSet['active'] = False # False: no radar simulation + + # Create dictionary for PAMTRA data: + pamData = dict() + + Nh = len(sonde_dict['Z']) # number of height levels + Nx = 1 # number of sondes (is always 1 because one sonde is considered at a time) + nhydros = 0 # number of hydrometeors (here: clear sky assumed) + shape2d = [Nx, Nx] + shape3d = shape2d + [Nh] + + pamData['hgt_lev'] = np.broadcast_to(sonde_dict['Z'], shape3d) # height + pamData['press_lev'] = np.broadcast_to(sonde_dict['P'][:], shape3d) # pressure + pamData['temp_lev'] = np.broadcast_to(sonde_dict['T'][:], shape3d) # temperature + pamData['relhum_lev'] = np.broadcast_to(sonde_dict['RH'][:], shape3d) # relative humidity + + if sfc_wind_available: + pamData['wind10u'] = np.broadcast_to(sonde_dict['u_wind'][1], shape2d) + pamData['wind10v'] = np.broadcast_to(sonde_dict['v_wind'][1], shape2d) + + if not np.any([np.isnan(sonde_dict['reference_lon']), np.isnan(sonde_dict['reference_lat'])]): + pamData['lon'] = np.broadcast_to(sonde_dict['reference_lon'], shape2d) + pamData['lat'] = np.broadcast_to(sonde_dict['reference_lat'], shape2d) + + else: # use highest non nan lat/lon values: + pamData['lon'] = np.broadcast_to(sonde_dict['lon'][~np.isnan(sonde_dict['lon'])][-1], shape2d) + pamData['lat'] = np.broadcast_to(sonde_dict['lat'][~np.isnan(sonde_dict['lat'])][-1], shape2d) + + pamData['timestamp'] = np.broadcast_to(sonde_dict['launch_time'], shape2d) + pamData['groundtemp'] = np.broadcast_to(sonde_dict['T'][0], shape2d) + + # Obseravtion height: + obs_height = np.asarray(obs_height).flatten() + pamData['obs_height'] = np.broadcast_to(obs_height, shape2d + [len(obs_height), ]) + + # Surface type & reflectivity: + pamData['sfc_type'] = np.zeros(shape2d) # 0: ocean, 1: land + pamData['sfc_refl'] = np.chararray(shape2d) + pamData['sfc_refl'][:] = 'F' + pamData['sfc_refl'][pamData['sfc_type'] == 1] = 'S' + + + # 4d variables: hydrometeors: + shape4d = [Nx, Nx, Nh-1, 5] # potentially 5 hydrometeor classes with this setting + pamData['hydro_q'] = np.zeros(shape4d) + pamData['hydro_q'][...,0] = 0# CLOUD + pamData['hydro_q'][...,1] = 0# ICE + pamData['hydro_q'][...,2] = 0# RAIN + pamData['hydro_q'][...,3] = 0# SNOW + pamData['hydro_q'][...,4] = 0# GRAUPEL + + + # Descriptorfile must be included. otherwise, pam.p.nhydro would be 0 which is not permitted. + descriptorFile = np.array([ + #['hydro_name' 'as_ratio' 'liq_ice' 'rho_ms' 'a_ms' 'b_ms' 'alpha_as' 'beta_as' 'moment_in' 'nbin' 'dist_name' 'p_1' 'p_2' 'p_3' 'p_4' 'd_1' 'd_2' 'scat_name' 'vel_size_mod' 'canting'] + ('cwc_q', -99.0, 1, -99.0, -99.0, -99.0, -99.0, -99.0, 3, 1, 'mono', -99.0, -99.0, -99.0, -99.0, 2e-05, -99.0, 'mie-sphere', 'khvorostyanov01_drops', -99.0), + ('iwc_q', -99.0, -1, -99.0, 130.0, 3.0, 0.684, 2.0, 3, 1, 'mono_cosmo_ice', -99.0, -99.0, -99.0, -99.0, -99.0, -99.0, 'mie-sphere', 'heymsfield10_particles', -99.0), + ('rwc_q', -99.0, 1, -99.0, -99.0, -99.0, -99.0, -99.0, 3, 50, 'exp', -99.0, -99.0, 8000000.0, -99.0, 0.00012, 0.006, 'mie-sphere', 'khvorostyanov01_drops', -99.0), + ('swc_q', -99.0, -1, -99.0, 0.038, 2.0, 0.3971, 1.88, 3, 50, 'exp_cosmo_snow', -99.0, -99.0, -99.0, -99.0, 5.1e-11, 0.02, 'mie-sphere', 'heymsfield10_particles', -99.0), + ('gwc_q', -99.0, -1, -99.0, 169.6, 3.1, -99.0, -99.0, 3, 50, 'exp', -99.0, -99.0, 4000000.0, -99.0, 1e-10, 0.01, 'mie-sphere', 'khvorostyanov01_spheres', -99.0)], + dtype=[('hydro_name', 'S15'), ('as_ratio', ' Date: Thu, 17 Dec 2020 17:50:17 +0100 Subject: [PATCH 2/3] Inclusion of Sea Surface Temperature (in K) as optional parameter for the AVAPS dropsonde importer. Additionally, some spacing lines have been removed. --- python/pyPamtra/importer.py | 80 +++++-------------------------------- 1 file changed, 11 insertions(+), 69 deletions(-) diff --git a/python/pyPamtra/importer.py b/python/pyPamtra/importer.py index 52cb2df..3c640f6 100644 --- a/python/pyPamtra/importer.py +++ b/python/pyPamtra/importer.py @@ -3062,7 +3062,7 @@ def ncToDict(ncFilePath,keys='all',joinDimension='time',offsetKeys={},ncLib='net os.remove(ncFile) return joinedData -def read_AVAPS_dropsonde(file_raw_sondes, opt_T=np.nan, opt_P=np.nan, +def read_AVAPS_dropsonde(file_raw_sondes, sst=np.nan, opt_T=np.nan, opt_P=np.nan, opt_RH=np.nan, opt_Z=np.nan, verbose=False): """ @@ -3073,6 +3073,9 @@ def read_AVAPS_dropsonde(file_raw_sondes, opt_T=np.nan, opt_P=np.nan, ---------- file_raw_sondes : str File of raw dropsonde data (preferred ending: 'PQC.nc'). + sst : float, optional + Sea surface temperature (in K). If this parameter is not set, the lowest dropsonde + temperature measurement is used (eventually extrapolated). opt_T : float, optional Optional temperature (e.g. measured by BAHAMAS) (in K) at flight altitude. opt_P: float, optional @@ -3085,8 +3088,6 @@ def read_AVAPS_dropsonde(file_raw_sondes, opt_T=np.nan, opt_P=np.nan, If True some extra information is printed. May be extensive if many dropsondes are imported! """ - - ############################################################################################ # FUNCTIONS def fill_gaps(old_var, verbose=False): @@ -3097,7 +3098,6 @@ def fill_gaps(old_var, verbose=False): # create flag variable indicating if an entry of old_var has been changed: if = 0: not interpol. interp_flag = np.zeros(old_var.shape) - # identify regions of nan values in the middle of the drop. Extrapolation will be handled in another # function. # identify the highest non-nan entry so we can cut the values above that highest entry: @@ -3120,7 +3120,6 @@ def fill_gaps(old_var, verbose=False): else: # correct nan values: find the hole size via subtraction of subsequent indices hole_size = np.zeros((len(nan_idx)+1,)).astype(int) - # hole_size = np.zeros(nan_idx.shape) # old version k = 0 # index to address a hole ('hole number') for m in range(0, len(temp_var)-1): @@ -3157,7 +3156,6 @@ def fill_gaps(old_var, verbose=False): if c > len(hole_size)-1: break - # overwrite the possibly holey section: new_var[limits[0]:limits[1]+1] = temp_var # update interp_flag @@ -3165,7 +3163,6 @@ def fill_gaps(old_var, verbose=False): return new_var, interp_flag - def std_extrapol_BAH(old_dict, ill_keys, bah_dict, old_ipflag_dict=dict(), verbose=False): # Will extrapolate some atmospheric variables to the ceiling of the dropsonde; old_ipflag will be updated. # Needs the old variable, the interpolation flag (should've been generated by fill_gaps()), the key and @@ -3176,26 +3173,22 @@ def std_extrapol_BAH(old_dict, ill_keys, bah_dict, old_ipflag_dict=dict(), verbo new_ipflag_dict = old_ipflag_dict - # To get the obs_height: Floor BAHAMAS altitude to the next lowest 100 m. drop_alt = np.floor(np.asarray(bah_dict['Z'])/100)*100 obs_height = np.max(np.unique(drop_alt)) # omit repeated values; this value is used for the top of the extrapolation - # BAHAMAS (or other optional measurement platform at flight altitude) temperature, pressure, relative humidity at launch time: bah_T = bah_dict['T'] bah_P = bah_dict['P'] bah_RH = bah_dict['RH'] bah_alt = bah_dict['Z'] - ceiling = obs_height # last entry of altitude if ceiling > 15000: raise ValueError("Dropsonde launch altitude appears to be > 15000 m. Extrapolation is aborted because the " + "tropopause may intervene.\n") return new_dict, new_ipflag_dict - # Any value above obs_height will be deleted: So if e.g. Z has got values above obs_height, delete them: # Find the first index that overshoots obs_height: with warnings.catch_warnings(): @@ -3215,7 +3208,6 @@ def std_extrapol_BAH(old_dict, ill_keys, bah_dict, old_ipflag_dict=dict(), verbo if key in ill_keys or key == 'Z': new_ipflag_dict[key] = new_ipflag_dict[key][:overshoot] - # At the end of 'Z' there may still be nans -> so we don't know to which altitude meteorological variables belong to in this region: # Therefore: delete it and replace by extrapolation: n_alt = len(new_dict['Z']) @@ -3235,7 +3227,6 @@ def std_extrapol_BAH(old_dict, ill_keys, bah_dict, old_ipflag_dict=dict(), verbo if key in ill_keys or key == 'Z': new_ipflag_dict[key] = new_ipflag_dict[key][:last_nonnan_alt+1] - # Extend the old height grid up to the ceiling if the distance is greater than 10 meters: alt = new_dict['Z'] n_alt = len(alt) @@ -3246,7 +3237,6 @@ def std_extrapol_BAH(old_dict, ill_keys, bah_dict, old_ipflag_dict=dict(), verbo new_dict['Z'] = alt new_ipflag_dict['Z'] = np.append(new_ipflag_dict['Z'], np.ones((n_alt_new - n_alt,))) - launch_time = datetime.datetime.utcfromtimestamp(new_dict['launch_time']).strftime("%Y-%m-%d %H:%M:%S") # for printing for key in ill_keys: @@ -3359,10 +3349,6 @@ def std_extrapol_BAH(old_dict, ill_keys, bah_dict, old_ipflag_dict=dict(), verbo continue else: - # # # extra_speed_length = 20 # amount of indices used for wind speed gradient calculation - - # # # for k in range(idx, n_alt_new): - # # # new_var[n,k] = new_var[n,idx] + (k-idx)*(new_var[n,idx] - new_var[n,idx-extra_speed_length]) / (extra_speed_length + (k-idx)) # Alternative: just use the latest value for higher altitudes: new_var[idx+1:] = new_var[idx] @@ -3389,15 +3375,10 @@ def std_extrapol_BAH(old_dict, ill_keys, bah_dict, old_ipflag_dict=dict(), verbo new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag new_var[np.argwhere(new_var[:] < 0)] = 0.0 - - # More variables may be added here, if desired. - - new_dict[key] = new_var return new_dict, new_ipflag_dict, obs_height - def std_extrapol(old_dict, ill_keys, old_ipflag_dict=dict(), verbose=False): # Will extrapolate some atmospheric variables to the ceiling of the dropsonde; old_ipflag will be updated. # Needs the old variable, the interpolation flag (should've been generated by fill_gaps()), @@ -3423,7 +3404,6 @@ def std_extrapol(old_dict, ill_keys, old_ipflag_dict=dict(), verbose=False): "tropopause may intervene.\n") return new_dict, new_ipflag_dict - # Any value above obs_height will be deleted: So if e.g. Z has got values above obs_height, delete them: # Find the first index that overshoots obs_height: with warnings.catch_warnings(): @@ -3443,7 +3423,6 @@ def std_extrapol(old_dict, ill_keys, old_ipflag_dict=dict(), verbose=False): if key in ill_keys or key == 'Z': new_ipflag_dict[key] = new_ipflag_dict[key][:overshoot] - # At the end of 'Z' there may still be nans -> so we don't know to which altitude meteorological variables belong to in this region: # Therefore: delete it and replace by extrapolation: n_alt = len(new_dict['Z']) @@ -3490,7 +3469,6 @@ def std_extrapol(old_dict, ill_keys, old_ipflag_dict=dict(), verbose=False): new_ipflag_dict[key] = np.append(new_ipflag_dict[key], np.zeros((n_alt_new - n_alt,)), axis=0) - if key == 'T': # Temperature: If dropsondes with measurements (ipflag = 0) from that day exist, estimate their average T gradient. # In case more than 15 % of T measurements must be extrapolated, the ICAO standard atmosphere is used as T gradient. @@ -3578,10 +3556,6 @@ def std_extrapol(old_dict, ill_keys, old_ipflag_dict=dict(), verbose=False): continue else: - # # # extra_speed_length = 20 # amount of indices used for wind speed gradient calculation - - # # # for k in range(idx, n_alt_new): - # # # new_var[n,k] = new_var[n,idx] + (k-idx)*(new_var[n,idx] - new_var[n,idx-extra_speed_length]) / (extra_speed_length + (k-idx)) # Alternative: Just use the latest value for higher altitudes: new_var[idx+1:] = new_var[idx] @@ -3612,14 +3586,10 @@ def std_extrapol(old_dict, ill_keys, old_ipflag_dict=dict(), verbose=False): new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag new_var[np.argwhere(new_var[:] < 0)] = 0.0 - - # More variables may be added here, if desired. - new_dict[key] = new_var return new_dict, new_ipflag_dict, obs_height - def regridding(new_dict, obs_height, ill_keys, resolution=10): ''' Regridding variables specified in ill_keys to a uniform grid @@ -3636,7 +3606,6 @@ def regridding(new_dict, obs_height, ill_keys, resolution=10): return new_dict - def repair_surface(old_dict, ill_keys, old_ipflag_dict=dict(), verbose=False): # Filling nan values at the surface if the gap to the surface isn't too large (e.g. measurements below 200 m # must exist (roughly 15-20 seconds before splash). @@ -3718,7 +3687,6 @@ def repair_surface(old_dict, ill_keys, old_ipflag_dict=dict(), verbose=False): return new_dict, new_ipflag_dict - def mark_outliers(sonde_dict, ill_keys): # mark outliers: outliers defined when exceeding certain thresholds new_dict = sonde_dict @@ -3742,7 +3710,6 @@ def mark_outliers(sonde_dict, ill_keys): # mark outliers: outliers defined when return new_dict - def readDropsondeNCraw(filename, verbose=False): """ Dropsonde netCDF file to dictionary. @@ -3780,7 +3747,6 @@ def readDropsondeNCraw(filename, verbose=False): if ((ncvar_type == np.float32) or (ncvar_type == np.float64)): dropsonde_dict[nc_keys][missing_idx] = float('nan') - # converting units: time stamps will be handled seperately. if hasattr(nc_var, 'units'): if nc_var.units == 'degC': @@ -3809,10 +3775,6 @@ def readDropsondeNCraw(filename, verbose=False): - ############################################################################################ - - - # Dropsonde quality control: if there are small gaps of measurements between launch altitude and surface, they will be filled via interpolation. # Additionally, the sondes will be extrapolated to a certain altitude. @@ -3825,11 +3787,9 @@ def readDropsondeNCraw(filename, verbose=False): raise ValueError("Optional measurements at flight altitude must include all four variables: Temperature (in K), pressure (in Pa), relative " + "humidity (in %), flight altitude (in m). If not all measurements can be provided, leave optional data input empty.") - failed_sondes = [] # lists the filenames of failed sondes (nearly no measurements) stuck_sondes = [] # lists the filenames of stuck sondes (sondes that don't show values above 1500 m) - sonde_dict = readDropsondeNCraw(file_raw_sondes, verbose) launch_date = datetime.datetime.utcfromtimestamp(sonde_dict['launch_time']).strftime("%Y-%m-%d") dropsonde_date = (datetime.datetime.strptime(launch_date, "%Y-%m-%d")).strftime("%Y%m%d") # date displayed in the filename @@ -3838,7 +3798,6 @@ def readDropsondeNCraw(filename, verbose=False): print("########## Launch time: " + datetime.datetime.utcfromtimestamp(sonde_dict['launch_time']).strftime("%Y-%m-%d %H:%M:%S") + " ##########\n") print("Input: ", file_raw_sondes) - # Add another condition that checks if e.g. nearly no measurements exist at all (for T, P and RH): if np.any([np.count_nonzero(~np.isnan(sonde_dict['T'])) < 0.1*len(sonde_dict['T']), np.count_nonzero(~np.isnan(sonde_dict['P'])) < 0.1*len(sonde_dict['P']), @@ -3850,19 +3809,16 @@ def readDropsondeNCraw(filename, verbose=False): failed_sondes.append(file_raw_sondes) raise ValueError("One PAMTRA-critical variable measurement failed. Skipping this dropsonde. \n") - # Add yet another condition that checks if the sonde got stuck mid air: if not np.any(sonde_dict['Z'][~np.isnan(sonde_dict['Z'])] < 1500): # then I assume that the whole launch was doomed raise ValueError("'gpsalt' doesn't seem to include any values below 1500 m. That is insufficient for extrapolation at the surface. \n") stuck_sondes.append(file_raw_sondes) - # Subsequent variables will be cured from holey nan value disease...: # pressure, temperature, relhum, wind (u & v & w), lat, lon. # Other variables, of which you can expect a linear interpolated over gaps to be applicable, may be added. ill_keys = ['T', 'P', 'RH', 'lat', 'lon', 'u_wind', 'v_wind'] - sonde_ipflag = dict() # will contain the interpolation flags for interpolated nan values in the middle of the drop # It's known that the altitude axis (gpsalt) does have some broken values (e.g. sudden jumps or exceeding the aircraft altitude). @@ -3873,7 +3829,6 @@ def readDropsondeNCraw(filename, verbose=False): sonde_dict['ipflag'] = sonde_ipflag - # The raw dropsonde files show an altitude variable with increases from [0 to -1] in general: but probably due to gps tracking, # the altitude decreases at the "top": this must be filtered out: # Find highest index where altitude[idx+1] - altitude[idx] > 0: @@ -3913,7 +3868,6 @@ def readDropsondeNCraw(filename, verbose=False): if key in ill_keys or key == 'Z': sonde_dict['ipflag'][key] = sonde_dict['ipflag'][key][:altitude_stop] - # The altitude index may be a bit broken... needs to be fixed. mark them as nan and let it run through fill_gaps again: dz = sonde_dict['Z'][1:] - sonde_dict['Z'][:-1] # dz[i] = z[i+1] - z[i] @@ -3923,7 +3877,6 @@ def readDropsondeNCraw(filename, verbose=False): sonde_dict['Z'], sonde_ipflag['Z'] = fill_gaps(sonde_dict['Z'], verbose=verbose) - # Perform surface repair for altitude coordinate: sonde_dict, sonde_dict['ipflag'] = repair_surface(sonde_dict, ['Z'], sonde_dict['ipflag'], verbose=verbose) # for some reasons, 'alt' is sort of unused but an assigned key in the dictionary. So ... we can also just set it to gpsalt: @@ -3933,7 +3886,6 @@ def readDropsondeNCraw(filename, verbose=False): # -> don't interpolate because the assumption would eventually lead to senseless surface values: sonde_dict, sonde_dict['ipflag'] = repair_surface(sonde_dict, ill_keys, sonde_dict['ipflag'], verbose=verbose) - # Extrapolating the ill_keys to the ceiling of the dropsondes (e.g. below aircraft altitude): # CAUTION: it is expected that the dropsondes start BELOW THE TROPOPAUSE! # It is advisable to do the TEMPERATURE EXTRAPOLATION FIRST because it improves the pressure extrapolation. @@ -3946,13 +3898,11 @@ def readDropsondeNCraw(filename, verbose=False): # Regridding to a uniform vertical grid with a user-specified resolution: sonde_dict = regridding(sonde_dict, obs_height, ill_keys, 10) - # Find outliers and mark them (as nan): afterwards fill them again sonde_dict = mark_outliers(sonde_dict, ['T', 'P', 'RH', 'u_wind', 'v_wind']) for key in ill_keys: sonde_dict[key], sonde_ipflag[key] = fill_gaps(sonde_dict[key], verbose=verbose) - # # # # # # # # # # # # # # # # # # # # # # # # # # # Check for remaining NaNs in meteorological variables: # RH, T and P (and the height coordinate Z): @@ -4011,7 +3961,10 @@ def readDropsondeNCraw(filename, verbose=False): pamData['lat'] = np.broadcast_to(sonde_dict['lat'][~np.isnan(sonde_dict['lat'])][-1], shape2d) pamData['timestamp'] = np.broadcast_to(sonde_dict['launch_time'], shape2d) - pamData['groundtemp'] = np.broadcast_to(sonde_dict['T'][0], shape2d) + if np.isnan(sst): + pamData['groundtemp'] = np.broadcast_to(sonde_dict['T'][0], shape2d) + else: + pamData['groundtemp'] = np.broadcast_to(sst, shape2d) # Obseravtion height: obs_height = np.asarray(obs_height).flatten() @@ -4023,32 +3976,21 @@ def readDropsondeNCraw(filename, verbose=False): pamData['sfc_refl'][:] = 'F' pamData['sfc_refl'][pamData['sfc_type'] == 1] = 'S' - # 4d variables: hydrometeors: - shape4d = [Nx, Nx, Nh-1, 5] # potentially 5 hydrometeor classes with this setting + shape4d = [Nx, Nx, Nh-1, 1] # potentially 5 hydrometeor classes with this setting pamData['hydro_q'] = np.zeros(shape4d) - pamData['hydro_q'][...,0] = 0# CLOUD - pamData['hydro_q'][...,1] = 0# ICE - pamData['hydro_q'][...,2] = 0# RAIN - pamData['hydro_q'][...,3] = 0# SNOW - pamData['hydro_q'][...,4] = 0# GRAUPEL - + pamData['hydro_q'][...,0] = 0 # CLOUD # Descriptorfile must be included. otherwise, pam.p.nhydro would be 0 which is not permitted. descriptorFile = np.array([ #['hydro_name' 'as_ratio' 'liq_ice' 'rho_ms' 'a_ms' 'b_ms' 'alpha_as' 'beta_as' 'moment_in' 'nbin' 'dist_name' 'p_1' 'p_2' 'p_3' 'p_4' 'd_1' 'd_2' 'scat_name' 'vel_size_mod' 'canting'] - ('cwc_q', -99.0, 1, -99.0, -99.0, -99.0, -99.0, -99.0, 3, 1, 'mono', -99.0, -99.0, -99.0, -99.0, 2e-05, -99.0, 'mie-sphere', 'khvorostyanov01_drops', -99.0), - ('iwc_q', -99.0, -1, -99.0, 130.0, 3.0, 0.684, 2.0, 3, 1, 'mono_cosmo_ice', -99.0, -99.0, -99.0, -99.0, -99.0, -99.0, 'mie-sphere', 'heymsfield10_particles', -99.0), - ('rwc_q', -99.0, 1, -99.0, -99.0, -99.0, -99.0, -99.0, 3, 50, 'exp', -99.0, -99.0, 8000000.0, -99.0, 0.00012, 0.006, 'mie-sphere', 'khvorostyanov01_drops', -99.0), - ('swc_q', -99.0, -1, -99.0, 0.038, 2.0, 0.3971, 1.88, 3, 50, 'exp_cosmo_snow', -99.0, -99.0, -99.0, -99.0, 5.1e-11, 0.02, 'mie-sphere', 'heymsfield10_particles', -99.0), - ('gwc_q', -99.0, -1, -99.0, 169.6, 3.1, -99.0, -99.0, 3, 50, 'exp', -99.0, -99.0, 4000000.0, -99.0, 1e-10, 0.01, 'mie-sphere', 'khvorostyanov01_spheres', -99.0)], + ('cwc_q', -99.0, 1, -99.0, -99.0, -99.0, -99.0, -99.0, 3, 1, 'mono', -99.0, -99.0, -99.0, -99.0, 2e-05, -99.0, 'mie-sphere', 'khvorostyanov01_drops', -99.0)], dtype=[('hydro_name', 'S15'), ('as_ratio', ' Date: Thu, 17 Dec 2020 18:00:39 +0100 Subject: [PATCH 3/3] Forgot to add a blank line at the end of the file. --- python/pyPamtra/importer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyPamtra/importer.py b/python/pyPamtra/importer.py index 3c640f6..098d5f3 100644 --- a/python/pyPamtra/importer.py +++ b/python/pyPamtra/importer.py @@ -3993,4 +3993,4 @@ def readDropsondeNCraw(filename, verbose=False): pam.createProfile(**pamData) - return pam \ No newline at end of file + return pam