Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions filter_version_PIXC_ex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
Takes a list of directories of SWOT PIXC data, filters
them to find the best version of each granule, and writes
out a JSON containing the best files for each directory.

Currently built for PGC0 and PIC0 versions

Usage:
python3 filterVersionRiverSP.py

Authors: Fiona Bennitt and Elisa Friedmann
Date: 2024-10-31
"""

import json
import os

import pandas as pd

def filterVersionPIXC(directories, outpath):
"""
Reads in all filenames, sorts them, and retrieves the best version/
counter for each file (e.g. PGC0 over PIC0, PIC0_03 over PIC0_02).
Writes out a json with the filenames for filtering upon read in
to the outpath directory.

Parameters:
directories (list): List of directories to search for best files.
outpath (string): Where to save the output JSON.

Returns:
None
"""

# Get all file names from directories
for directory in directories:
# List to store all file paths
files = []
for file in os.listdir(directory):
files.append(file)

print(f"There are {str(len(files))} original files in directory.")

# Make DataFrame of filenames
granules = pd.DataFrame({'files': files})
granules['cycle'] = granules['files'].str.slice(16, 19)
granules['pass'] = granules['files'].str.slice(20, 23)
granules['tile'] = granules['files'].str.slice(24, 28)
granules['version'] = granules['files'].str.slice(-10, -6)
granules['counter'] = granules['files'].str.slice(-5, -3)

# Sort the files
granules = granules.sort_values(by=['cycle', 'pass', 'tile', 'version', 'counter'],
ascending=[True, True, True, True, False])

# Keep only the best version of each granule
granules = granules.drop_duplicates(subset=['cycle', 'pass', 'tile'],
keep='first')

# Extract the file names of files passing the test
best_files = list(granules['files'])

print(f"There are {str(len(best_files))} best files in directory.")


# Split filepath for naming json
pieces = dirs[0].split('/')

# Write out best files as json
with open(os.path.join(outpath, pieces[5] + '_filtered.json'), 'w', encoding='utf-8') as f:
json.dump(best_files, f)

print(f"Wrote out the unique and most recently processed {str(len(best_files))} files to {outpath}{pieces[5]}_filtered.json")

# Directories to filter
dirs = ['/path_to_data_download/']
# Outpath for json
out = '/path_out/'

filterVersionPIXC(directories=dirs, outpath=out)
99 changes: 99 additions & 0 deletions filter_version_riverSP_ex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""
Takes a directory of SWOT RiverSP data (Reach OR Node) and,
filters it to find the best version of each granule, and
writes out a JSON containing the best files for each directory.

Currently built for PGC0 and PIC0 versions

Usage:
python3 filterVersionRiverSP.py

Authors: Elisa Friedmann and Fiona Bennitt
Date: 2024-10-31
"""

import json
import os

import pandas as pd

def filterVersionRiverSP(directories, outpath):
"""
Reads in all filenames, sorts them, and retrieves the best version/
counter for each file (e.g. PGC0 over PIC0, PIC0_03 over PIC0_02).
Writes out a json with the filenames for filtering upon read in
to the outpath directory.

Parameters:
directories (list): List of directories to search for best files.
outpath (string): Where to save the output JSONs.

Returns:
None
"""

# Get all .shp file names from directories
for directory in directories:
# List to store all .shp file paths
shp_files = []
for file in os.listdir(directory):
if file.endswith(".shp"):
shp_files.append(file)

print(f"There are {str(len(shp_files))} original .shp files in directory.")

# Make DataFrame of filenames
granules = pd.DataFrame({'files': shp_files})
granules['cycle'] = granules['files'].str.slice(25, 28)
granules['pass'] = granules['files'].str.slice(29, 32)
granules['version'] = granules['files'].str.slice(-11, -7)
granules['counter'] = granules['files'].str.slice(-6, -4)

# Sort the files
granules = granules.sort_values(by=['cycle', 'pass', 'version', 'counter'],
ascending=[True, True, True, False])

# Keep only the best version of each granule
granules = granules.drop_duplicates(subset=['cycle', 'pass'],
keep='first')

# Extract the file names of files passing the test
best_files = list(granules['files'])

print(f"There are {str(len(best_files))} best .shp files in directory.")

# Extract base names (file name without extensions) from the list
base_names = set(os.path.splitext(os.path.basename(file))[0] for file in best_files)

all_best_files = []
# Loop over the directories to find all files with matching base names
# for directory in directories:
for file in os.listdir(directory):
# Get the file's base name (need to use extsep as .split will
# remove only .xml from .shp.xml and those files will get missed
# base_name, extension = os.path.exstep(os.path.basename(file))
base_name, extension = os.path.basename(file).split(os.extsep, 1)

# If the base name is in the list of best files,
# append that file to the list of files to keep
if base_name in base_names:
all_best_files.append(file)

# Split filepath for naming json
pieces = directory.split('/')

# Write out best files as json
with open(os.path.join(outpath, pieces[6] + '_' + pieces[7] + '_' +
pieces[8] + '_filtered.json'),
'w', encoding='utf-8') as f:
json.dump(all_best_files, f)

print(f"Wrote out the unique and most recently processed as .json: {str(len(all_best_files))} files to {outpath}{pieces[6]}_filtered.json")

# Directories to filter
dirs = ['/path/SA/Reach',
'/path/GR/Reach']
# Outpath for json
out = '/path/'

filterVersionRiverSP(directories=dirs, outpath=out)
212 changes: 212 additions & 0 deletions src/find_swot_future_passes_science.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
"""
Find the SWOT satellite fly-by time through a bounding box.
The time will have uncertainty of tens of seconds to minutes depending on
the size of the bounding box.

Make sure download the two shapefile associated with the SWOT orbit data from the aviso website. You can use the included script to download the data.

bash download_swot_orbit.sh

Create /tmp/ folder in desired directory

Usage:

python3 find_swot_future_passes_science.py -pass_no 22 -sw_corner -74.520 42.740 -ne_corner -69.89 45.03 -output_filename ./tmp/test.png

Author: Elisa Friedmann and Fiona Bennitt - adapted from Jinbo Wang
Date: 2024-11-01
"""

import geopandas as gpd
import pandas as pd
import shapely.geometry as geometry
from datetime import timedelta
import matplotlib as mpl
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from shapely.geometry import box, LineString, Point
import numpy as np
import os,argparse
import contextily as cx



#download the two shapefile associated with the SWOT orbit data from the aviso website
if os.path.exists("../data/sph_science_nadir.zip") is False or os.path.exists("../data/sph_science_swath.zip") is False:
print("Please download the two shapefiles associated with the SWOT orbit data from the aviso website using the included script.\n")
print("bash download_swot_orbit_data.sh\n")
exit()

#parse the command line arguments
parser = argparse.ArgumentParser(description='Find the SWOT satellite fly-by time through a bounding box.')
parser.add_argument('-pass_no', type=str, help='The pass number matching the current date (or back in time too)')
parser.add_argument('-sw_corner', type=float, nargs=2, help='The southwest corner of the bounding box [lon, lat]')
parser.add_argument('-ne_corner', type=float, nargs=2, help='The northeast corner of the bounding box [lon, lat]')
parser.add_argument('-output_filename', type=str, help='The filename of the figure to save. e.g. test.png')

args = parser.parse_args()
pass_no = args.pass_no
sw_corner = args.sw_corner
ne_corner = args.ne_corner
output_filename = args.output_filename
print('Pass Number: ', pass_no)
print('Output Filename: ', output_filename)
output_filepath = os.path.split(output_filename)[0]
#print(output_filepath)


#check the command line arguments
if sw_corner is None or ne_corner is None or output_filename is None or pass_no is None:
parser.print_help('Example: \n python find_swot_timing_science.py -sw_corner -130.0 35.0 -ne_corner -125.0 40.0 -output_filename ./tmp/test.png')
exit()

#Change pass_no here to match current date (or back in time too!)
pass_no = pass_no

def find_time(ID_PASS_value, extended_bbox):

"""
Find the index of the first point in the combined LineString (for the given ID_PASS)
that intersects with the extended bounding box.

Parameters:
- ID_PASS_value (int): The ID_PASS value for which to find the intersecting point.
- extended_bbox (Polygon): The extended bounding box to check intersection.

Returns:
- int or None: Index of the first intersecting point, or None if no intersection is found.
"""
nadir_data = gpd.read_file("../data/sph_science_nadir.zip")

def join_linestrings(group):
"""Join LineStrings in the order they appear in the file."""
if len(group) == 1:
return group.iloc[0].geometry
else:
# Combine LineStrings
return LineString([pt for line in group.geometry for pt in line.coords])

def index_of_first_point_inside_bbox(line, bbox):
"""Returns the index of the first point of the LineString that falls inside the bounding box."""
for index, point in enumerate(line.coords):
if bbox.contains(Point(point)):
return index
return 0 # If no point falls inside the bounding box

# Filtering the GeoDataFrame for rows with the given ID_PASS_value
subset_gdf = nadir_data[nadir_data["ID_PASS"] == ID_PASS_value]

# Joining LineStrings if there are multiple rows with the same ID_PASS
combined_geometry = join_linestrings(subset_gdf)

# Finding the index of the first point inside the extended bounding box
index = index_of_first_point_inside_bbox(combined_geometry, extended_bbox)

time_str=subset_gdf['START_TIME'].iloc[0]
_, day_num, time_str = time_str.split(" ", 2)
days = int(day_num)
time_parts = list(map(int, time_str.split(":")))
delta = timedelta(days=days-1, hours=time_parts[0], minutes=time_parts[1], seconds=time_parts[2]+index*30)
return delta+pd.Timestamp("2023-07-21T05:33:45.768")

# Load the shapefile
gdf_karin = gpd.read_file("../data/sph_science_swath.zip")
# define the bounding box
bbox = geometry.box(sw_corner[0], sw_corner[1], ne_corner[0], ne_corner[1])
# extend the bounding box by 0.2 degree
extended_bbox = box(bbox.bounds[0] - 0.2, bbox.bounds[1] - 0.2, bbox.bounds[2] + 0.2, bbox.bounds[3] + 0.2) #for nadir data

# Filter the GeoDataFrame for rows that intersect with the extended bounding box
overlapping_segments = gdf_karin[gdf_karin.intersects(bbox)]

# Testing the subroutine with a sample ID_PASS value
#test_id_pass = 35
#find_time(test_id_pass, gdf_nadir, extended_bbox)

# Compute the index of the first point inside the bounding box for each intersecting segment
tt=[]
for a in overlapping_segments['ID_PASS']:
tt.append(find_time(a, extended_bbox))
# Add the index as a new column
overlapping_segments["TIME"] = tt

# Calculate passing times for four cycles
cycle_period = timedelta(days=20.864549)
overlapping_segments['TIME_'+str((pass_no))] = overlapping_segments["TIME"] + (int(pass_no) * cycle_period)
overlapping_segments['TIME_'+str(int(pass_no)+1)] = overlapping_segments["TIME"] + ((int(pass_no)+1) * cycle_period)
overlapping_segments['TIME_'+str(int(pass_no)+2)] = overlapping_segments["TIME"] + ((int(pass_no)+2) * cycle_period)
overlapping_segments['TIME_'+str(int(pass_no)+3)] = overlapping_segments["TIME"] + ((int(pass_no)+3) * cycle_period)

# Print the results on screen
print(overlapping_segments[['ID_PASS', 'TIME_'+str((pass_no)), 'TIME_'+str(int(pass_no)+1), 'TIME_'+str(int(pass_no)+2), 'TIME_'+str(int(pass_no)+3),]])

# Visualization of the results on a map using Cartopy and saving the figure to output_filename
# Set up the figure and axes with n by 3 subplots
n_segments = len(overlapping_segments)
n_rows = (n_segments + 2) // 3
xlim = (sw_corner[0], ne_corner[0])
ylim = (sw_corner[1], ne_corner[1])

def plotit(index, ax, row):
# Set the extent of the map
ax.set_extent([sw_corner[0], ne_corner[0], sw_corner[1], ne_corner[1]]) # Set extent based on your data's region
# Put a background image on for nice sea rendering.

# Plot the overlapping segments with light green color
ax.set_title(f"Pass: {row['ID_PASS']}")

# Add geographic features
provinces_50m = cfeature.NaturalEarthFeature('cultural',
'admin_1_states_provinces_lines',
'50m',
facecolor='none')
#Add everything to map
overlapping_segments.loc[[index]].plot(ax=ax, edgecolor="darkgreen", facecolor="lightgreen", zorder=1)
ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.LAKES, edgecolor='darkblue', facecolor='darkblue')
ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.BORDERS)
ax.add_feature(cfeature.COASTLINE)
ax.add_feature(cfeature.RIVERS, edgecolor='darkblue')
ax.add_feature(provinces_50m)
# Plot the shapefile data
#gdf.plot(ax=ax, transform=ccrs.PlateCarree())
gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True,
linewidth=1, color='gray', alpha=0.5, linestyle='--')
gl.top_labels = False # Disable longitude labels at the top
gl.right_labels = False # Disable latitude labels on the right

# #Add basemap for context
# cx.add_basemap(ax, source=cx.providers.CartoDB.PositronNoLabels, zoom=10)
# cx.add_basemap(ax, source=cx.providers.CartoDB.PositronOnlyLabels, zoom=10)



# Annotate with cycle, pass number, and the four lines of date and time
annotation_text = f"Time {pass_no}: {row['TIME_'+pass_no].strftime('%Y-%m-%d %H:%M:%S')}\n" \
f"Time {str(int(pass_no)+1)}: {row['TIME_'+str(int(pass_no)+1)].strftime('%Y-%m-%d %H:%M:%S')}\n" \
f"Time {str(int(pass_no)+2)}: {row['TIME_'+str(int(pass_no)+2)].strftime('%Y-%m-%d %H:%M:%S')}\n" \
f"Time {str(int(pass_no)+3)}: {row['TIME_'+str(int(pass_no)+3)].strftime('%Y-%m-%d %H:%M:%S')}"

ax.annotate(annotation_text, xy=(0.05, 0.1), xycoords='axes fraction', backgroundcolor='white')


fig, axes = plt.subplots(nrows=n_rows, ncols=3, figsize=(15, 25), subplot_kw={'projection': ccrs.PlateCarree()})
axes = axes.ravel()

# Plot each segment in a separate subplot
for idx, (index, row) in enumerate(overlapping_segments.iterrows()):
ax = axes[idx]
plotit(index, ax, row)

# Hide any remaining unused subplots
for idx in range(n_segments, n_rows * 3):
axes[idx].axis('off')

plt.tight_layout()

if output_filename is not None:
if not os.path.isdir(output_filepath):
os.makedirs(output_filepath)
plt.savefig(output_filename)