diff --git a/python/pyPamtra/importer.py b/python/pyPamtra/importer.py index 8e188bf..098d5f3 100644 --- a/python/pyPamtra/importer.py +++ b/python/pyPamtra/importer.py @@ -3062,3 +3062,935 @@ def ncToDict(ncFilePath,keys='all',joinDimension='time',offsetKeys={},ncLib='net os.remove(ncFile) return joinedData +def read_AVAPS_dropsonde(file_raw_sondes, sst=np.nan, opt_T=np.nan, opt_P=np.nan, + opt_RH=np.nan, opt_Z=np.nan, verbose=False): + + """ + PAMTRA importer for raw AVAPS dropsonde data (preferred ending: 'PQC.nc') in clear sky + conditions over oceans. + + Parameters + ---------- + file_raw_sondes : str + File of raw dropsonde data (preferred ending: 'PQC.nc'). + sst : float, optional + Sea surface temperature (in K). If this parameter is not set, the lowest dropsonde + temperature measurement is used (eventually extrapolated). + opt_T : float, optional + Optional temperature (e.g. measured by BAHAMAS) (in K) at flight altitude. + opt_P: float, optional + Optional pressure (e.g. measured by BAHAMAS) (in Pa) at flight altitude. + opt_RH: float, optional + Optional relative humidity (e.g. measured by BAHAMAS) (in %) at flight altitude. + opt_Z: float, optional + Optional flight altitude during sonde launch (e.g. taken from BAHAMAS) (in m). + verbose : bool, optional + If True some extra information is printed. May be extensive if many dropsondes are imported! + """ + + # FUNCTIONS + + def fill_gaps(old_var, verbose=False): + # old variable gets linearly interpolated for each sonde launch. The function is ignoring nan + # values at the surface and above the launch altitude. + + new_var = deepcopy(old_var) + # create flag variable indicating if an entry of old_var has been changed: if = 0: not interpol. + interp_flag = np.zeros(old_var.shape) + + # identify regions of nan values in the middle of the drop. Extrapolation will be handled in another + # function. + # identify the highest non-nan entry so we can cut the values above that highest entry: + # identify the lowest non-nan entry for similar reasons: + non_nan_idx = [idx for idx, x in np.ndenumerate(old_var) if (not np.isnan(x))] + non_nan_idx = np.where(~np.isnan(old_var))[0] + limits = np.array([non_nan_idx[0], non_nan_idx[-1]]) + + temp_var = deepcopy(old_var) + temp_var = temp_var[limits[0]:limits[1]+1] # will be the variable where the gaps are filled + + interp_flag_temp = np.zeros(temp_var.shape) + + # identify mid-drop-nan-values: need values after and before the nan: + nan_idx = np.argwhere(np.isnan(temp_var)) + interp_flag_temp[nan_idx] = 1 + + if nan_idx.size == 0: + return new_var, interp_flag + + else: # correct nan values: find the hole size via subtraction of subsequent indices + hole_size = np.zeros((len(nan_idx)+1,)).astype(int) + k = 0 # index to address a hole ('hole number') + + for m in range(0, len(temp_var)-1): + + if not np.isnan(temp_var[m+1] - temp_var[m]): + hole_size[k] = 0 + + elif np.isnan(temp_var[m+1] - temp_var[m]): # k only incremented if an END of a hole has been identified: + if len(nan_idx) == 1: # handled seperately in case that merely one nan value exists in temp_var + hole_size[k] = 1 + break + + else: + if (not np.isnan(temp_var[m+1])) & (np.isnan(temp_var[m])): # END of a hole + k = k + 1 + continue + hole_size[k] = hole_size[k] + 1 # k won't be incremented until m finds another non-nan value + + else: + raise RuntimeError("Something unexpected happened when trying to find nan values in the middle " + + "of the dropsonde launch. Please contact 'a.walbroel@uni-koeln.de'. \n") + + # holes have been identified: edit the FIRST hole (editing depends on the size of the hole...) + c = 0 # dummy variable needed for the right jumps in hole_size and nan_idx. c is used to address + # nan_idx and therefore new_var... + + # meanwhile 'd' just runs through the array hole_size: + for d in range(0, len(hole_size)): + for L in range(0, hole_size[d]): # range(0, 1): L = 0 + temp_var[nan_idx[c] + L] = temp_var[nan_idx[c] - 1] + (L + 1)*(temp_var[int(nan_idx[c] + + hole_size[d])] - temp_var[nan_idx[c]-1]) / (hole_size[d] + 1) + + c = c + int(hole_size[d]) + if c > len(hole_size)-1: + break + + # overwrite the possibly holey section: + new_var[limits[0]:limits[1]+1] = temp_var + # update interp_flag + interp_flag[limits[0]:limits[1]+1] = interp_flag_temp + + return new_var, interp_flag + + def std_extrapol_BAH(old_dict, ill_keys, bah_dict, old_ipflag_dict=dict(), verbose=False): + # Will extrapolate some atmospheric variables to the ceiling of the dropsonde; old_ipflag will be updated. + # Needs the old variable, the interpolation flag (should've been generated by fill_gaps()), the key and + # height levels as INPUT + + new_dict = old_dict + n_alt = len(new_dict['Z']) + + new_ipflag_dict = old_ipflag_dict + + # To get the obs_height: Floor BAHAMAS altitude to the next lowest 100 m. + drop_alt = np.floor(np.asarray(bah_dict['Z'])/100)*100 + obs_height = np.max(np.unique(drop_alt)) # omit repeated values; this value is used for the top of the extrapolation + + # BAHAMAS (or other optional measurement platform at flight altitude) temperature, pressure, relative humidity at launch time: + bah_T = bah_dict['T'] + bah_P = bah_dict['P'] + bah_RH = bah_dict['RH'] + bah_alt = bah_dict['Z'] + + ceiling = obs_height # last entry of altitude + if ceiling > 15000: + raise ValueError("Dropsonde launch altitude appears to be > 15000 m. Extrapolation is aborted because the " + + "tropopause may intervene.\n") + return new_dict, new_ipflag_dict + + # Any value above obs_height will be deleted: So if e.g. Z has got values above obs_height, delete them: + # Find the first index that overshoots obs_height: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + overshoot = np.argwhere(new_dict['Z'] >= obs_height) + if len(overshoot) > 0: + overshoot = overshoot[0][0] + 1 + + for key in new_dict.keys(): + if key in ['trajectory', 'fillValues', 'ipflag']: # skip these ones ... it s not interesting anyway + continue + + if new_dict[key].ndim > 0: # otherwise: error when using len() + + if len(new_dict[key]) == n_alt: + new_dict[key] = new_dict[key][:overshoot] # limit variable to obs_height + if key in ill_keys or key == 'Z': + new_ipflag_dict[key] = new_ipflag_dict[key][:overshoot] + + # At the end of 'Z' there may still be nans -> so we don't know to which altitude meteorological variables belong to in this region: + # Therefore: delete it and replace by extrapolation: + n_alt = len(new_dict['Z']) + last_nonnan_alt = np.argwhere(~np.isnan(new_dict['Z']))[-1][0] + if ceiling - new_dict['Z'][last_nonnan_alt] > 1000: + if verbose: + print("WARNING: highest GPS altitude measurement is at least 1000 m below the aircraft. Extrapolation may be erroneous.\n") + + for key in new_dict.keys(): + if key in ['trajectory', 'fillValues', 'ipflag']: # skip these ones ... it s not interesting anyway + continue + + if new_dict[key].ndim > 0: # otherwise: error when using len() + + if len(new_dict[key]) == n_alt: + new_dict[key] = new_dict[key][:last_nonnan_alt+1] # limit variable to obs_height + if key in ill_keys or key == 'Z': + new_ipflag_dict[key] = new_ipflag_dict[key][:last_nonnan_alt+1] + + # Extend the old height grid up to the ceiling if the distance is greater than 10 meters: + alt = new_dict['Z'] + n_alt = len(alt) + alt = np.append(alt, np.arange(alt[np.argwhere(~np.isnan(alt))[-1]]+10, ceiling+11, 10)) + n_alt_new = len(alt) + + # Update the altitude variable in the dictionary: & update ipflag for gpsalt: + new_dict['Z'] = alt + new_ipflag_dict['Z'] = np.append(new_ipflag_dict['Z'], np.ones((n_alt_new - n_alt,))) + + launch_time = datetime.datetime.utcfromtimestamp(new_dict['launch_time']).strftime("%Y-%m-%d %H:%M:%S") # for printing + + for key in ill_keys: + + new_var = new_dict[key] + # Must be expanded to the new height grid (up to the new ceiling) + new_var = np.append(new_var, np.full((n_alt_new - n_alt,), np.nan), axis=0) # append nans at the top of the profile + + if not new_ipflag_dict: # in case fill_gaps(...) hasn't been called before this one, it's assumed that nothing has been interpolated yet. + new_ipflag_dict[key] = np.zeros(new_var.shape) + + else: # new_ipflag also has to be extended to the new hgt grid: + new_ipflag_dict[key] = np.append(new_ipflag_dict[key], np.zeros((n_alt_new - n_alt,)), axis=0) + + + if key == 'T': + # If BAHAMAS Temperature measurement is available use it as target in case only the top 15 % of measurements + # are missing. Otherwise: + # Temperature: If dropsondes with measurements (ipflag = 0) from that day exist, estimate their average T gradient. + # If the extrapolated dropsonde temperature then deviates from BAH T by more than 5 K, use the ICAO std atmosphere + # as T gradient: + # Standard atmosphere (shifted accordingly to avoid a jump between the last known value and the extrapolation). + # ICAO standard atmosphere taken from: + # https://www.dwd.de/DE/service/lexikon/begriffe/S/Standardatmosphaere_pdf.pdf?__blob=publicationFile&v=3 + ICAO_standard_T = 288.15 - 0.0065*alt + + # Find highest non nan value if it lies below the ceiling: + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + + if alt[idx] < 0.6*ceiling: + if verbose: + print("Insufficient amount of measurements for temperature extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no temperature measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var # then just overwrite the dictionary entry with the nonedited (but extended) variable + continue + + if alt[idx] < 0.85*ceiling: # then use BAHAMAS temperature as extrapolation target: + new_var[idx+1:] = (new_var[idx] + (bah_T - new_var[idx]) / (bah_alt - alt[idx]) * (alt[idx+1:] - alt[idx])) + + else: + # Or use mean T gradient of highest 20 measurements and continue with this gradient: + # Compute mean T gradient of highest 20 measurements: + mean_T_grad = np.mean(np.asarray([(new_var[idx-19:idx+1] - new_var[idx-20:idx]) / (alt[idx-19:idx+1] - alt[idx-20:idx])])) + extra_T = 288.15 + mean_T_grad*alt + + new_var[idx+1:] = extra_T[idx+1:] - (extra_T[idx] - new_var[idx]) + + if np.abs(new_var[-1] - bah_T) > 5: # then the deviation from BAHAMAS T is too great and we use the ICAO std atmosphere + new_var[idx+1:] = ICAO_standard_T[idx+1:] - (ICAO_standard_T[idx] - new_var[idx]) + + + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + + + elif key == 'P': + # Pressure: use hydrostatic eq. with scale height H = R / g0, R = 287 J kg^-1 K^-1, g0 = 9.81 m s^-2, using + # the vertican mean temperature : + # p(z) = p0 exp( -z / H) (Holton, p.21); + + # Find highest non nan value if it lies below the ceiling: + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + + if alt[idx] < ceiling/3: + if verbose: + print("Insufficient amount of measurements for pressure extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no pressure measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var + continue + + # MAKE SURE THAT mean TEMPERATURE CAPTURES THE ACTUAL MEAN TEMPERATURE!! + if np.count_nonzero(~np.isnan(new_dict['T'][:])) / float(n_alt_new) <= 0.75: # in this case you may expect that a mean temperature would yield + # a bad representation of the true scale height. 0.75 was chosen arbitrarily. + T_icao_0 = 12*np.cos(4*np.pi*np.nanmean(new_dict['lat'][:])/360) + 288.15 # strongly simplified meridional surface temperature structure + H = 287 * np.mean(288.15 - 0.0065*alt) / 9.81 # using the ICAO standard atmosphere to compute the mean temperature + + if verbose: + print("Warning: Because insufficient non-nan temperature values were given for launch " + + launch_time + ", '" + str(np.mean(288.15 - 0.0065*alt)) + + " K' was assumed to be the mean temperature for hydrostatic pressure calculation. \n") + + else: + H = 287 * np.nanmean(new_dict['T'][:]) / 9.81 # scale height + + # Find index of lowest non nan pressure measurement: + l_idx = np.argwhere(~np.isnan(new_var[:]))[0] + p_ref = new_var[l_idx] # in Pa + alt_ref = alt[l_idx] + p_hydrostat = p_ref * np.exp(-(alt - alt_ref) / H) # in Pa + + new_var[idx+1:] = p_hydrostat[idx+1:] - (p_hydrostat[idx] - new_var[idx]) + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + + + elif key == 'u_wind' or key == 'v_wind': + # Wind: idea: Fill nan values with the mean wind gradient of the highest 20 (non-nan)measurents. + # It will only be extrapolated if the the last non-nan entry is higher than 0.80*ceiling: + # Other idea: just keep the same wind value + + # Find highest non nan value if it lies below the ceiling: + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + + if alt[idx] < 0.8*ceiling: + if verbose: + print("Insufficient amount of measurements for wind extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no wind measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var + continue + + else: + + # Alternative: just use the latest value for higher altitudes: + new_var[idx+1:] = new_var[idx] + new_var[idx+1:] = new_var[idx+1:] + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + + + elif key == 'RH': + # Relative humidity (RH): Idea: Linearly interpolate to the BAHAMAS value + + # Find highest non nan value if it lies below the ceiling: + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + + if alt[idx] < 0.65*ceiling: + if verbose: + print("Insufficient amount of measurements for relative humidity extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no rel. hum. measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var + continue + + else: + new_var[idx+1:] = (new_var[idx] + (bah_RH - new_var[idx]) / (bah_alt - alt[idx]) * (alt[idx+1:] - alt[idx])) + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + new_var[np.argwhere(new_var[:] < 0)] = 0.0 + + new_dict[key] = new_var + + return new_dict, new_ipflag_dict, obs_height + + def std_extrapol(old_dict, ill_keys, old_ipflag_dict=dict(), verbose=False): + # Will extrapolate some atmospheric variables to the ceiling of the dropsonde; old_ipflag will be updated. + # Needs the old variable, the interpolation flag (should've been generated by fill_gaps()), + # the key and height levels as INPUT + + new_dict = old_dict + n_alt = len(new_dict['Z']) + + new_ipflag_dict = old_ipflag_dict + + # To get the obs_height: find highest ... (?) + if np.isnan(new_dict['reference_alt']): + # Select the highest non nan index of T or P. + highest_nonnan_Z = np.argwhere(~np.isnan(new_dict['Z']))[-1] + obs_height = np.floor(new_dict['Z'][highest_nonnan_Z[0]]/100)*100 + else: # use the reference_alt + obs_height = (np.floor(new_dict['reference_alt']/100)*100)[0] + + + ceiling = obs_height # last entry of altitude + if ceiling > 15000: + raise ValueError("Dropsonde launch altitude appears to be > 15000 m. Extrapolation is aborted because the " + + "tropopause may intervene.\n") + return new_dict, new_ipflag_dict + + # Any value above obs_height will be deleted: So if e.g. Z has got values above obs_height, delete them: + # Find the first index that overshoots obs_height: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + overshoot = np.argwhere(new_dict['Z'] >= obs_height) + if len(overshoot) > 0: + overshoot = overshoot[0][0] + 1 + + for key in new_dict.keys(): + if key in ['trajectory', 'fillValues', 'ipflag']: # skip these ones ... it s not interesting anyway + continue + + if new_dict[key].ndim > 0: # otherwise: error when using len() + + if len(new_dict[key]) == n_alt: + new_dict[key] = new_dict[key][:overshoot] # limit variable to obs_height + if key in ill_keys or key == 'Z': + new_ipflag_dict[key] = new_ipflag_dict[key][:overshoot] + + # At the end of 'Z' there may still be nans -> so we don't know to which altitude meteorological variables belong to in this region: + # Therefore: delete it and replace by extrapolation: + n_alt = len(new_dict['Z']) + last_nonnan_alt = np.argwhere(~np.isnan(new_dict['Z']))[-1][0] + if ceiling - new_dict['Z'][last_nonnan_alt] > 1000: + if verbose: + print("WARNING: highest GPS altitude measurement is at least 1000 m below the aircraft. Extrapolation may be erroneous.\n") + + for key in new_dict.keys(): + if key in ['trajectory', 'fillValues', 'ipflag']: # skip these ones ... it s not interesting anyway + continue + + if new_dict[key].ndim > 0: # otherwise: error when using len() + + if len(new_dict[key]) == n_alt: + new_dict[key] = new_dict[key][:last_nonnan_alt+1] # limit variable to obs_height + if key in ill_keys or key == 'Z': + new_ipflag_dict[key] = new_ipflag_dict[key][:last_nonnan_alt+1] + + + # Extend the old height grid up to the ceiling if the distance is greater than 10 meters: + alt = new_dict['Z'] + n_alt = len(alt) + alt = np.append(alt, np.arange(alt[np.argwhere(~np.isnan(alt))[-1]]+10, ceiling+11, 10)) + n_alt_new = len(alt) + + # Update the altitude variable in the dictionary: & update ipflag for gpsalt: + new_dict['Z'] = alt + new_ipflag_dict['Z'] = np.append(new_ipflag_dict['Z'], np.ones((n_alt_new - n_alt,))) + + + launch_time = datetime.datetime.utcfromtimestamp(new_dict['launch_time']).strftime("%Y-%m-%d %H:%M:%S") # for printing + + for key in ill_keys: + + new_var = new_dict[key] + # Must be expanded to the new height grid (up to the new ceiling) + new_var = np.append(new_var, np.full((n_alt_new - n_alt,), np.nan), axis=0) # append nans at the top of the profile + + if not new_ipflag_dict: # in case fill_gaps(...) wasn't called before this one, it's assumed that nothing has been interpolated yet. + new_ipflag_dict[key] = np.zeros(new_var.shape) + + else: # new_ipflag also has to be extended to the new hgt grid: + new_ipflag_dict[key] = np.append(new_ipflag_dict[key], np.zeros((n_alt_new - n_alt,)), axis=0) + + + if key == 'T': + # Temperature: If dropsondes with measurements (ipflag = 0) from that day exist, estimate their average T gradient. + # In case more than 15 % of T measurements must be extrapolated, the ICAO standard atmosphere is used as T gradient. + # ICAO standard atmosphere taken from: + # https://www.dwd.de/DE/service/lexikon/begriffe/S/Standardatmosphaere_pdf.pdf?__blob=publicationFile&v=3 + ICAO_standard_T = 288.15 - 0.0065*alt + + # find highest non nan value if it lies below the ceiling: + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + if alt[idx] < 0.6*ceiling: + if verbose: + print("Insufficient amount of measurements for temperature extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no temperature measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var # then just overwrite the dictionary entry with the nonedited (but extended) variable + continue + + if alt[idx] < 0.85*ceiling: # then use standard atmosphere (ICAO): + new_var[idx+1:] = ICAO_standard_T[idx+1:] + (new_var[idx] - ICAO_standard_T[idx]) + + else: + # Use mean T gradient of highest 20 measurements and continue with this gradient: + # Compute mean T gradient of highest 20 measurements: + mean_T_grad = np.mean(np.asarray([(new_var[idx-19:idx+1] - new_var[idx-20:idx]) / (alt[idx-19:idx+1] - alt[idx-20:idx])])) + T_continued = 288.15 + mean_T_grad*alt + + new_var[idx+1:] = T_continued[idx+1:] - (T_continued[idx] - new_var[idx]) + + + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + + + elif key == 'P': + # Pressure: use hydrostatic eq. with scale height H = R / g0, R = 287 J kg^-1 K^-1, g0 = 9.81 m s^-2, using the vertican mean temperature : + # p(z) = p0 exp( -z / H) (Holton, p.21); + + # Find highest non nan value if it lies below the ceiling: + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + if alt[idx] < ceiling/3: + if verbose: + print("Insufficient amount of measurements for pressure extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no pressure measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var + continue + + # MAKE SURE THAT mean TEMPERATURE CAPTURES THE ACTUAL MEAN TEMPERATURE!! + if np.count_nonzero(~np.isnan(new_dict['T'][:])) / float(n_alt_new) <= 0.75: # in this case you may expect that a mean temperature would yield + # a bad representation of the true scale height. 0.75 was chosen arbitrarily. + T_icao_0 = 12*np.cos(4*np.pi*np.nanmean(new_dict['lat'][:])/360) + 288.15 # strongly simplified meridional surface temperature structure + H = 287 * np.mean(288.15 - 0.0065*alt) / 9.81 # using the ICAO standard atmosphere to compute the mean temperature + + if verbose: + print("Warning: Because insufficient non-nan temperature values were given for launch " + + launch_time + ", '" + str(np.mean(288.15 - 0.0065*alt)) + + " K' was assumed to be the mean temperature for hydrostatic pressure calculation. Can possibly be " + + "avoided if the temperature is extrapolated before the pressure.\n") + + else: + H = 287 * np.nanmean(new_dict['T'][:]) / 9.81 # scale height + + # Find index of lowest non nan pressure measurement: + l_idx = np.argwhere(~np.isnan(new_var[:]))[0] + p_ref = new_var[l_idx] # in Pa + alt_ref = alt[l_idx] + p_hydrostat = p_ref * np.exp(-(alt - alt_ref) / H) # in Pa + + new_var[idx+1:] = p_hydrostat[idx+1:] - (p_hydrostat[idx] - new_var[idx]) + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + + + elif key == 'u_wind' or key == 'v_wind': + # Wind: idea: Fill nan values with the mean wind gradient of the highest 20 (non-nan)measurents. It will only be extrapolated if the the last non-nan entry + # is higher than 0.80*ceiling: + # Other idea: Just keep the same wind value + + # Find highest non nan value if it lies below the ceiling: + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + if alt[idx] < 0.8*ceiling: + if verbose: + print("Insufficient amount of measurements for wind extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no wind measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var + continue + + else: + + # Alternative: Just use the latest value for higher altitudes: + new_var[idx+1:] = new_var[idx] + new_var[idx+1:] = new_var[idx+1:] + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + + + elif key == 'RH': + # Relative humidity (RH): Idea: fill nan values with the mean RH of the highest 10 measurements but only + # if the highest measurement exceeds or is equal to 0.95*ceiling! + # Other idea: Use RH approx 0 for greater altitudes. + noise_strength = 0/2 # percent + + idx = np.argwhere(~np.isnan(new_var)).flatten()[-1] + + if alt[idx] < 0.65*ceiling: + if verbose: + print("Insufficient amount of measurements for relative humidity extrapolation at the top of the dropsonde grid (" + launch_time + + "). There are no rel. hum. measurements above " + str(alt[idx]) + " m.\n") + new_dict[key] = new_var + continue + + elif alt[idx] >= 0.95*ceiling: + new_var[idx+1:] = np.mean(new_var[idx-9:idx+1]) + + else: + new_var[idx+1:] = 1.5 + new_ipflag_dict[key][idx+1:] = 1 # setting the interpol flag + new_var[np.argwhere(new_var[:] < 0)] = 0.0 + + new_dict[key] = new_var + + return new_dict, new_ipflag_dict, obs_height + + def regridding(new_dict, obs_height, ill_keys, resolution=10): + ''' + Regridding variables specified in ill_keys to a uniform grid + (from the surface up to obs_height) with a user-defined + resolution (in meters, default=10). + ''' + + new_alt = np.arange(0, obs_height+1, 10) + + for key in ill_keys: + new_dict[key] = np.interp(new_alt, new_dict['Z'], new_dict[key]) + + new_dict['Z'] = new_alt + + return new_dict + + def repair_surface(old_dict, ill_keys, old_ipflag_dict=dict(), verbose=False): + # Filling nan values at the surface if the gap to the surface isn't too large (e.g. measurements below 200 m + # must exist (roughly 15-20 seconds before splash). + new_dict = old_dict + alt = old_dict['Z'] + n_alt = len(alt) + new_ipflag_dict = old_ipflag_dict + launch_time = datetime.datetime.utcfromtimestamp(new_dict['launch_time']).strftime("%Y-%m-%d %H:%M:%S") + + lim = 200 # if there are no measurements below this altitude then the extrapolation at the surface won't be performed + + if ill_keys == ['Z']: + threshold_list = [ill_keys, [200], ['m']] + else: + threshold_list = [ill_keys, [5.0, 4000.0, 50.0, 0.1, 0.1, 5.0, 5.0, 1.0], + ['K', 'hPa', '%', 'deg', 'deg', 'm/s', 'm/s', 'm/s']] # used to check if surface value deviates siginificantly from lowest measurement + + for key in ill_keys: + + new_var = new_dict[key] + + if not new_ipflag_dict: # in case fill_gaps(...) wasn't called before this one, it's assumed that nothing has been interpolated yet. + new_ipflag_dict[key] = np.zeros(new_var.shape) + + + # find the first non-nan entry + idx = np.argwhere(~np.isnan(new_var[:]))[0][0] + + if alt[idx] < lim: + sfc_gap = np.arange(0,idx) + + if len(sfc_gap) == 0: + continue + else: + + # create mean gradient of the variable of 10 measurements above the lowest measurement, or, if grid is too coarse, take 200-400 m average: + if alt[idx+10] > 2*lim: # default: if alt[idx+10] > 400: # take lowest measurement to 400 m mean gradient + # find index closest to 400 m: + idx2 = np.argmin(np.abs(alt - 2*lim)) + + else: # take mean grad. of 10 measurem. above lowest measurement: + idx2 = idx+10 + + mean_grad = np.mean([new_var[j+1] - new_var[j] for j in range(idx,idx2)]) # theoretically, should never be nan because fill_gaps + # should've fixed the holes between the first and last measurement + for j in sfc_gap: + new_var[idx-j-1] = new_var[idx] - mean_grad*(j+1) + + + # check if sfc value not too far off the lowest measurement: + if key == 'RH': + if np.any(new_var[sfc_gap] < 0): + new_var[sfc_gap] = 0 + if verbose: + print("Caution, '" + key + "' surface repair resulted in negative values. Manually setting the missing values at the ground to 0 for launch " + + launch_time + ".\n") + + elif np.any(new_var[sfc_gap] > 100): + new_var[sfc_gap] = 100 + if verbose: + print("Caution, '" + key + "' surface repair resulted in >100 %. Manually setting the missing values at the ground to 100 for launch " + + launch_time + ".\n") + + threshold = threshold_list[1][threshold_list[0].index(key)] + si_unit = threshold_list[2][threshold_list[0].index(key)] + if np.abs(new_var[0] - new_var[idx]) > threshold: + if verbose: + print("Caution, '" + key + "' surface value deviates more than " + str(threshold) + " " + si_unit + " from the lowest measurement (launch " + + launch_time + ").\n") + + + new_ipflag_dict[key][sfc_gap] = 1 + + else: + if verbose: + print("No measurements below " + str(lim) + " m. Extrapolation of '" + key + "', launch " + launch_time + + " would eventually lead to wrong assumptions at the surface. Therefore aborted.\n") + continue + + return new_dict, new_ipflag_dict + + def mark_outliers(sonde_dict, ill_keys): # mark outliers: outliers defined when exceeding certain thresholds + + new_dict = sonde_dict + + # Thresholds are defined by change of meteorol. variable with altitude: e.g. delta p / delta z + thresholds = [0.065, 40, 2.5, 1, 1] # in [K/m, Pa/m, %/m, ms^-1/m, ms^-1/m] + dz = new_dict['Z'][1:] - new_dict['Z'][:-1] # delta z + + for key in ill_keys: + if key == 'lat' or key == 'lon': + continue + + met_threshold = thresholds[ill_keys.index(key)] # threshold for key + + d_met = new_dict[key][1:] - new_dict[key][:-1] # change of meteorological variable 'key' + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + exceed_idx = np.argwhere(np.abs(d_met / dz) >= met_threshold) + new_dict[key][exceed_idx] = float('nan') + + return new_dict + + def readDropsondeNCraw(filename, verbose=False): + """ + Dropsonde netCDF file to dictionary. + Variables with a designated unit that is not an SI unit will be converted to SI units. + Rel. humidity will be in % though. + Time will be given in seconds since 1970-01-01 00:00:00 UTC. + + Parameters + ---------- + filename : str + Filename of the raw dropsonde files (suffix "PQC.nc"!) + verbose : bool + Additional comments printed if True. + """ + + import netCDF4 as nc + + file_nc = nc.Dataset(filename) + + dropsonde_dict = dict() + dropsonde_dict['fillValues'] = dict() + for nc_keys in file_nc.variables.keys(): + nc_var = file_nc.variables[nc_keys] + dropsonde_dict[nc_keys] = np.asarray(nc_var) + + if hasattr(nc_var, 'missing_value'): + dropsonde_dict['fillValues'][nc_keys] = nc_var.missing_value + + # type of the nc_var: + ncvar_type = type(dropsonde_dict[nc_keys][0]) + + # find where the current variable has missing values and set them to nan: + missing_idx = np.argwhere(dropsonde_dict[nc_keys] == dropsonde_dict['fillValues'][nc_keys]) + + if ((ncvar_type == np.float32) or (ncvar_type == np.float64)): + dropsonde_dict[nc_keys][missing_idx] = float('nan') + + # converting units: time stamps will be handled seperately. + if hasattr(nc_var, 'units'): + if nc_var.units == 'degC': + dropsonde_dict[nc_keys] = dropsonde_dict[nc_keys] + 273.15 + if verbose: print("From degC to K: " + str(nc_keys)) + elif nc_var.units == 'hPa': + dropsonde_dict[nc_keys] = dropsonde_dict[nc_keys]*100 + if verbose: print("From hPa to Pa: " + str(nc_keys)) + elif nc_var.units == 'gram/kg': + dropsonde_dict[nc_keys] = dropsonde_dict[nc_keys]/1000 + if verbose: print("From g/kg to kg/kg: " + str(nc_keys)) + + time_base = datetime.datetime.strptime(file_nc.variables['launch_time'].units[14:-4], "%Y-%m-%d %H:%M:%S") # time base given in the units attribute + dropsonde_dict['launch_time'] = (time_base - datetime.datetime(2020,1,1)).total_seconds() + dropsonde_dict['launch_time'] + dropsonde_dict['reference_time'] = (time_base - datetime.datetime(2020,1,1)).total_seconds() + dropsonde_dict['reference_time'] + dropsonde_dict['time'] = (time_base - datetime.datetime(2020,1,1)).total_seconds() + dropsonde_dict['time'] + if verbose: print("\n") + + # Convert to internal convention: Temperature = T, Pressure = P, relative humidity = RH, altitude = Z + dropsonde_dict['T'] = dropsonde_dict['tdry'] + dropsonde_dict['P'] = dropsonde_dict['pres'] + dropsonde_dict['RH'] = dropsonde_dict['rh'] + dropsonde_dict['Z'] = dropsonde_dict['gpsalt'] + + return dropsonde_dict + + + + # Dropsonde quality control: if there are small gaps of measurements between launch altitude and surface, they will be filled via interpolation. + # Additionally, the sondes will be extrapolated to a certain altitude. + + # BAHAMAS data (optional): + if np.all([~np.isnan(opt_T), ~np.isnan(opt_P), ~np.isnan(opt_RH), ~np.isnan(opt_Z)]): + opt_dict = {'T': opt_T, 'P': opt_P, 'RH': opt_RH, 'Z': opt_Z} + elif np.all([np.isnan(opt_T), np.isnan(opt_P), np.isnan(opt_RH), np.isnan(opt_Z)]): + opt_dict = dict() + else: + raise ValueError("Optional measurements at flight altitude must include all four variables: Temperature (in K), pressure (in Pa), relative " + + "humidity (in %), flight altitude (in m). If not all measurements can be provided, leave optional data input empty.") + + failed_sondes = [] # lists the filenames of failed sondes (nearly no measurements) + stuck_sondes = [] # lists the filenames of stuck sondes (sondes that don't show values above 1500 m) + + sonde_dict = readDropsondeNCraw(file_raw_sondes, verbose) + launch_date = datetime.datetime.utcfromtimestamp(sonde_dict['launch_time']).strftime("%Y-%m-%d") + dropsonde_date = (datetime.datetime.strptime(launch_date, "%Y-%m-%d")).strftime("%Y%m%d") # date displayed in the filename + # --> comfy way to find the right BAHAMAS data for std_extrapol + if verbose: + print("########## Launch time: " + datetime.datetime.utcfromtimestamp(sonde_dict['launch_time']).strftime("%Y-%m-%d %H:%M:%S") + " ##########\n") + print("Input: ", file_raw_sondes) + + # Add another condition that checks if e.g. nearly no measurements exist at all (for T, P and RH): + if np.any([np.count_nonzero(~np.isnan(sonde_dict['T'])) < 0.1*len(sonde_dict['T']), + np.count_nonzero(~np.isnan(sonde_dict['P'])) < 0.1*len(sonde_dict['P']), + np.count_nonzero(~np.isnan(sonde_dict['RH'])) < 0.1*len(sonde_dict['RH']), + np.count_nonzero(~np.isnan(sonde_dict['lat'])) < 0.05*len(sonde_dict['lat']), + np.count_nonzero(~np.isnan(sonde_dict['lon'])) < 0.05*len(sonde_dict['lon']), + np.count_nonzero(~np.isnan(sonde_dict['u_wind'])) < 0.05*len(sonde_dict['u_wind']), + np.count_nonzero(~np.isnan(sonde_dict['v_wind'])) < 0.05*len(sonde_dict['v_wind'])]): + failed_sondes.append(file_raw_sondes) + raise ValueError("One PAMTRA-critical variable measurement failed. Skipping this dropsonde. \n") + + # Add yet another condition that checks if the sonde got stuck mid air: + if not np.any(sonde_dict['Z'][~np.isnan(sonde_dict['Z'])] < 1500): # then I assume that the whole launch was doomed + raise ValueError("'gpsalt' doesn't seem to include any values below 1500 m. That is insufficient for extrapolation at the surface. \n") + stuck_sondes.append(file_raw_sondes) + + # Subsequent variables will be cured from holey nan value disease...: + # pressure, temperature, relhum, wind (u & v & w), lat, lon. + # Other variables, of which you can expect a linear interpolated over gaps to be applicable, may be added. + ill_keys = ['T', 'P', 'RH', 'lat', 'lon', 'u_wind', 'v_wind'] + + sonde_ipflag = dict() # will contain the interpolation flags for interpolated nan values in the middle of the drop + + # It's known that the altitude axis (gpsalt) does have some broken values (e.g. sudden jumps or exceeding the aircraft altitude). + # Before filliing the gaps, the altitude axis must be fixed: + sonde_dict['Z'], sonde_ipflag['Z'] = fill_gaps(sonde_dict['Z'], verbose=verbose) + for key in ill_keys: + sonde_dict[key], sonde_ipflag[key] = fill_gaps(sonde_dict[key], verbose=verbose) # altitude must be passed to check for dimensions of the to-be-cured variable... + + sonde_dict['ipflag'] = sonde_ipflag + + # The raw dropsonde files show an altitude variable with increases from [0 to -1] in general: but probably due to gps tracking, + # the altitude decreases at the "top": this must be filtered out: + # Find highest index where altitude[idx+1] - altitude[idx] > 0: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + altitude_stop = np.argwhere(sonde_dict['Z'][1:] - sonde_dict['Z'][:-1] > 0)[-1][0] + 2 + # +2 because it's used as indexing [... : altitude_stop] => +1 and because the array size had been reduced by 1 during argwhere(...) + + # If the lowest non nan value of the altitude coordinate is < 0: cut the rest off: + # Find lowest non nan value of altitude: + lowest = np.argwhere(~np.isnan(sonde_dict['Z'][:]))[0][0] + + ndata = len(sonde_dict['Z']) + + # Then cut each variable at this altitude index: + if sonde_dict['Z'][lowest] < 0: + for key in sonde_dict.keys(): + if key in ['trajectory', 'fillValues', 'ipflag']: # skip these ones ... it s not interesting anyway + continue + + if sonde_dict[key].ndim > 0: # otherwise: error when using len() + + if len(sonde_dict[key]) == ndata: + sonde_dict[key] = sonde_dict[key][lowest:altitude_stop] + if key in ill_keys or key == 'Z': + sonde_dict['ipflag'][key] = sonde_dict['ipflag'][key][lowest:altitude_stop] + + else: + for key in sonde_dict.keys(): + if key in ['trajectory', 'fillValues', 'ipflag']: # skip these ones ... it s not interesting anyway + continue + + if sonde_dict[key].ndim > 0: # otherwise: error when using len() + + if len(sonde_dict[key]) == ndata: + sonde_dict[key] = sonde_dict[key][:altitude_stop] + if key in ill_keys or key == 'Z': + sonde_dict['ipflag'][key] = sonde_dict['ipflag'][key][:altitude_stop] + + # The altitude index may be a bit broken... needs to be fixed. mark them as nan and let it run through fill_gaps again: + dz = sonde_dict['Z'][1:] - sonde_dict['Z'][:-1] # dz[i] = z[i+1] - z[i] + + for k in range(len(dz)): + if (dz[k] < 0) or (dz[k] > 15): # this filters too big jumps in the altitude coordinate + sonde_dict['Z'][k+1] = float('nan') + + sonde_dict['Z'], sonde_ipflag['Z'] = fill_gaps(sonde_dict['Z'], verbose=verbose) + + # Perform surface repair for altitude coordinate: + sonde_dict, sonde_dict['ipflag'] = repair_surface(sonde_dict, ['Z'], sonde_dict['ipflag'], verbose=verbose) + # for some reasons, 'alt' is sort of unused but an assigned key in the dictionary. So ... we can also just set it to gpsalt: + sonde_dict['alt'] = sonde_dict['Z'] + + # now we still need to handle the nan values at the surface: If there are no non-nan values in the lowest 5 % of the variable + # -> don't interpolate because the assumption would eventually lead to senseless surface values: + sonde_dict, sonde_dict['ipflag'] = repair_surface(sonde_dict, ill_keys, sonde_dict['ipflag'], verbose=verbose) + + # Extrapolating the ill_keys to the ceiling of the dropsondes (e.g. below aircraft altitude): + # CAUTION: it is expected that the dropsondes start BELOW THE TROPOPAUSE! + # It is advisable to do the TEMPERATURE EXTRAPOLATION FIRST because it improves the pressure extrapolation. + if opt_dict: + sonde_dict, sonde_dict['ipflag'], obs_height = std_extrapol_BAH(sonde_dict, ill_keys, opt_dict, sonde_dict['ipflag'], verbose=verbose) + + else: + sonde_dict, sonde_dict['ipflag'], obs_height = std_extrapol(sonde_dict, ill_keys, sonde_dict['ipflag'], verbose=verbose) + + # Regridding to a uniform vertical grid with a user-specified resolution: + sonde_dict = regridding(sonde_dict, obs_height, ill_keys, 10) + + # Find outliers and mark them (as nan): afterwards fill them again + sonde_dict = mark_outliers(sonde_dict, ['T', 'P', 'RH', 'u_wind', 'v_wind']) + for key in ill_keys: + sonde_dict[key], sonde_ipflag[key] = fill_gaps(sonde_dict[key], verbose=verbose) + + + # Check for remaining NaNs in meteorological variables: + # RH, T and P (and the height coordinate Z): + if (np.any([np.isnan(sonde_dict['T']), np.isnan(sonde_dict['P']), np.isnan(sonde_dict['RH']), + np.isnan(sonde_dict['Z'])])): + if verbose: + print(' NaN-counts: %d, %d, %d, %d' % ( + np.isnan(sonde_dict['T']).sum(), + np.isnan(sonde_dict['P']).sum(), + np.isnan(sonde_dict['RH']).sum(), + np.isnan(sonde_dict['Z']).sum())) + raise ValueError("Nans still remained in T, P, RH or Z. Aborting execution of dropsonde %s. Please contact 'a.walbroel@uni-koeln.de'." + % file_raw_sondes) + + # Check if surface winds are not nan: (index [1] is used because [0] is at 0 m above sea level) + sfc_wind_available = ~np.isnan(sonde_dict['u_wind'][1] + sonde_dict['v_wind'][1]) + if not sfc_wind_available: + if verbose: + print("WARNING, NaN in u_wind[1] or v_wind[1]. Surface winds won't be included in PAMTRA for %s !"%( + file_raw_sondes)) + print(' NaN-counts (u,v): %d, %d' % ( + np.isnan(sonde_dict['u_wind'][1]), + np.isnan(sonde_dict['v_wind'][1]))) + + + # Create pamtra object; change settings: + pam = pyPamtra() + + pam.nmlSet['passive'] = True # passive simulation + pam.nmlSet['active'] = False # False: no radar simulation + + # Create dictionary for PAMTRA data: + pamData = dict() + + Nh = len(sonde_dict['Z']) # number of height levels + Nx = 1 # number of sondes (is always 1 because one sonde is considered at a time) + nhydros = 0 # number of hydrometeors (here: clear sky assumed) + shape2d = [Nx, Nx] + shape3d = shape2d + [Nh] + + pamData['hgt_lev'] = np.broadcast_to(sonde_dict['Z'], shape3d) # height + pamData['press_lev'] = np.broadcast_to(sonde_dict['P'][:], shape3d) # pressure + pamData['temp_lev'] = np.broadcast_to(sonde_dict['T'][:], shape3d) # temperature + pamData['relhum_lev'] = np.broadcast_to(sonde_dict['RH'][:], shape3d) # relative humidity + + if sfc_wind_available: + pamData['wind10u'] = np.broadcast_to(sonde_dict['u_wind'][1], shape2d) + pamData['wind10v'] = np.broadcast_to(sonde_dict['v_wind'][1], shape2d) + + if not np.any([np.isnan(sonde_dict['reference_lon']), np.isnan(sonde_dict['reference_lat'])]): + pamData['lon'] = np.broadcast_to(sonde_dict['reference_lon'], shape2d) + pamData['lat'] = np.broadcast_to(sonde_dict['reference_lat'], shape2d) + + else: # use highest non nan lat/lon values: + pamData['lon'] = np.broadcast_to(sonde_dict['lon'][~np.isnan(sonde_dict['lon'])][-1], shape2d) + pamData['lat'] = np.broadcast_to(sonde_dict['lat'][~np.isnan(sonde_dict['lat'])][-1], shape2d) + + pamData['timestamp'] = np.broadcast_to(sonde_dict['launch_time'], shape2d) + if np.isnan(sst): + pamData['groundtemp'] = np.broadcast_to(sonde_dict['T'][0], shape2d) + else: + pamData['groundtemp'] = np.broadcast_to(sst, shape2d) + + # Obseravtion height: + obs_height = np.asarray(obs_height).flatten() + pamData['obs_height'] = np.broadcast_to(obs_height, shape2d + [len(obs_height), ]) + + # Surface type & reflectivity: + pamData['sfc_type'] = np.zeros(shape2d) # 0: ocean, 1: land + pamData['sfc_refl'] = np.chararray(shape2d) + pamData['sfc_refl'][:] = 'F' + pamData['sfc_refl'][pamData['sfc_type'] == 1] = 'S' + + # 4d variables: hydrometeors: + shape4d = [Nx, Nx, Nh-1, 1] # potentially 5 hydrometeor classes with this setting + pamData['hydro_q'] = np.zeros(shape4d) + pamData['hydro_q'][...,0] = 0 # CLOUD + + # Descriptorfile must be included. otherwise, pam.p.nhydro would be 0 which is not permitted. + descriptorFile = np.array([ + #['hydro_name' 'as_ratio' 'liq_ice' 'rho_ms' 'a_ms' 'b_ms' 'alpha_as' 'beta_as' 'moment_in' 'nbin' 'dist_name' 'p_1' 'p_2' 'p_3' 'p_4' 'd_1' 'd_2' 'scat_name' 'vel_size_mod' 'canting'] + ('cwc_q', -99.0, 1, -99.0, -99.0, -99.0, -99.0, -99.0, 3, 1, 'mono', -99.0, -99.0, -99.0, -99.0, 2e-05, -99.0, 'mie-sphere', 'khvorostyanov01_drops', -99.0)], + dtype=[('hydro_name', 'S15'), ('as_ratio', '