SWOT-community · efried130 · Oct 31, 2024 · Oct 31, 2024 · Nov 1, 2024 · Nov 1, 2024
diff --git a/filter_version_PIXC_ex.py b/filter_version_PIXC_ex.py
@@ -0,0 +1,80 @@
+"""
+Takes a list of directories of SWOT PIXC data, filters
+them to find the best version of each granule, and writes
+out a JSON containing the best files for each directory.
+
+Currently built for PGC0 and PIC0 versions
+
+Usage:
+    python3 filterVersionRiverSP.py
+
+Authors: Fiona Bennitt and Elisa Friedmann
+Date: 2024-10-31
+"""
+
+import json
+import os
+
+import pandas as pd
+
+def filterVersionPIXC(directories, outpath):
+    """
+    Reads in all filenames, sorts them, and retrieves the best version/
+    counter for each file (e.g. PGC0 over PIC0, PIC0_03 over PIC0_02).
+    Writes out a json with the filenames for filtering upon read in
+    to the outpath directory.
+
+    Parameters:
+    directories (list): List of directories to search for best files.
+    outpath (string): Where to save the output JSON.
+
+    Returns:
+    None
+    """
+
+    # Get all file names from directories
+    for directory in directories:
+        # List to store all file paths
+        files = []
+        for file in os.listdir(directory):
+            files.append(file)
+
+        print(f"There are {str(len(files))} original files in directory.")
+
+        # Make DataFrame of filenames
+        granules = pd.DataFrame({'files': files})
+        granules['cycle'] = granules['files'].str.slice(16, 19)
+        granules['pass'] = granules['files'].str.slice(20, 23)
+        granules['tile'] = granules['files'].str.slice(24, 28)
+        granules['version'] = granules['files'].str.slice(-10, -6)
+        granules['counter'] = granules['files'].str.slice(-5, -3)    
+
+        # Sort the files
+        granules = granules.sort_values(by=['cycle', 'pass', 'tile', 'version', 'counter'],
+                                        ascending=[True, True, True, True, False])    
+
+        # Keep only the best version of each granule
+        granules = granules.drop_duplicates(subset=['cycle', 'pass', 'tile'],
+                                            keep='first')    
+
+        # Extract the file names of files passing the test
+        best_files = list(granules['files'])
+
+        print(f"There are {str(len(best_files))} best files in directory.")
+
+
+        # Split filepath for naming json
+        pieces = dirs[0].split('/')
+
+        # Write out best files as json
+        with open(os.path.join(outpath, pieces[5] + '_filtered.json'), 'w', encoding='utf-8') as f:
+            json.dump(best_files, f)
+
+        print(f"Wrote out the unique and most recently processed {str(len(best_files))} files to {outpath}{pieces[5]}_filtered.json")
+
+# Directories to filter    
+dirs = ['/path_to_data_download/']
+# Outpath for json
+out = '/path_out/'
+
+filterVersionPIXC(directories=dirs, outpath=out)
diff --git a/filter_version_riverSP_ex.py b/filter_version_riverSP_ex.py
@@ -0,0 +1,99 @@
+"""
+Takes a directory of SWOT RiverSP data (Reach OR Node) and,
+filters it to find the best version of each granule, and
+writes out a JSON containing the best files for each directory.
+
+Currently built for PGC0 and PIC0 versions
+
+Usage:
+    python3 filterVersionRiverSP.py
+
+Authors: Elisa Friedmann and Fiona Bennitt
+Date: 2024-10-31
+"""
+
+import json
+import os
+
+import pandas as pd
+
+def filterVersionRiverSP(directories, outpath):
+    """
+    Reads in all filenames, sorts them, and retrieves the best version/
+    counter for each file (e.g. PGC0 over PIC0, PIC0_03 over PIC0_02).
+    Writes out a json with the filenames for filtering upon read in
+    to the outpath directory.
+
+    Parameters:
+    directories (list): List of directories to search for best files.
+    outpath (string): Where to save the output JSONs.
+
+    Returns:
+    None
+    """
+
+    # Get all .shp file names from directories
+    for directory in directories:
+        # List to store all .shp file paths
+        shp_files = []
+        for file in os.listdir(directory):
+            if file.endswith(".shp"):
+                shp_files.append(file)
+
+        print(f"There are {str(len(shp_files))} original .shp files in directory.")
+
+        # Make DataFrame of filenames
+        granules = pd.DataFrame({'files': shp_files})
+        granules['cycle'] = granules['files'].str.slice(25, 28)
+        granules['pass'] = granules['files'].str.slice(29, 32)  
+        granules['version'] = granules['files'].str.slice(-11, -7)
+        granules['counter'] = granules['files'].str.slice(-6, -4)     
+
+        # Sort the files
+        granules = granules.sort_values(by=['cycle', 'pass', 'version', 'counter'],
+                                        ascending=[True, True, True, False])    
+
+        # Keep only the best version of each granule
+        granules = granules.drop_duplicates(subset=['cycle', 'pass'],
+                                            keep='first')    
+
+        # Extract the file names of files passing the test
+        best_files = list(granules['files'])
+
+        print(f"There are {str(len(best_files))} best .shp files in directory.")
+
+        # Extract base names (file name without extensions) from the list
+        base_names = set(os.path.splitext(os.path.basename(file))[0] for file in best_files)
+
+        all_best_files = []
+        # Loop over the directories to find all files with matching base names
+        # for directory in directories:
+        for file in os.listdir(directory):
+            # Get the file's base name (need to use extsep as .split will
+            # remove only .xml from .shp.xml and those files will get missed
+            # base_name, extension = os.path.exstep(os.path.basename(file))
+            base_name, extension = os.path.basename(file).split(os.extsep, 1)
+
+            # If the base name is in the list of best files,
+            # append that file to the list of files to keep
+            if base_name in base_names:
+                all_best_files.append(file)
+
+        # Split filepath for naming json
+        pieces = directory.split('/')
+
+        # Write out best files as json
+        with open(os.path.join(outpath, pieces[6] + '_' + pieces[7] + '_' +
+                               pieces[8] + '_filtered.json'), 
+                  'w', encoding='utf-8') as f:
+            json.dump(all_best_files, f)
+
+        print(f"Wrote out the unique and most recently processed as .json: {str(len(all_best_files))} files to {outpath}{pieces[6]}_filtered.json")
+
+# Directories to filter    
+dirs = ['/path/SA/Reach',
+        '/path/GR/Reach']
+# Outpath for json
+out = '/path/'
+
+filterVersionRiverSP(directories=dirs, outpath=out)
diff --git a/src/find_swot_future_passes_science.py b/src/find_swot_future_passes_science.py
@@ -0,0 +1,212 @@
+"""
+Find the SWOT satellite fly-by time through a bounding box.
+The time will have uncertainty of tens of seconds to minutes depending on
+the size of the bounding box.
+
+Make sure download the two shapefile associated with the SWOT orbit data from the aviso website. You can use the included script to download the data.
+
+    bash download_swot_orbit.sh
+
+Create /tmp/ folder in desired directory
+
+Usage:
+
+    python3 find_swot_future_passes_science.py -pass_no 22 -sw_corner -74.520 42.740 -ne_corner -69.89 45.03 -output_filename ./tmp/test.png
+
+Author: Elisa Friedmann and Fiona Bennitt - adapted from Jinbo Wang
+Date: 2024-11-01
+"""
+
+import geopandas as gpd
+import pandas as pd
+import shapely.geometry as geometry
+from datetime import timedelta
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import cartopy.crs as ccrs
+import cartopy.feature as cfeature
+from shapely.geometry import box, LineString, Point
+import numpy as np
+import os,argparse
+import contextily as cx
+
+
+
+#download the two shapefile associated with the SWOT orbit data from the aviso website
+if os.path.exists("../data/sph_science_nadir.zip") is False or os.path.exists("../data/sph_science_swath.zip") is False:
+    print("Please download the two shapefiles associated with the SWOT orbit data from the aviso website using the included script.\n")
+    print("bash download_swot_orbit_data.sh\n")
+    exit()
+
+#parse the command line arguments
+parser = argparse.ArgumentParser(description='Find the SWOT satellite fly-by time through a bounding box.')
+parser.add_argument('-pass_no', type=str, help='The pass number matching the current date (or back in time too)')
+parser.add_argument('-sw_corner', type=float, nargs=2, help='The southwest corner of the bounding box [lon, lat]')
+parser.add_argument('-ne_corner', type=float, nargs=2, help='The northeast corner of the bounding box [lon, lat]')
+parser.add_argument('-output_filename', type=str, help='The filename of the figure to save. e.g. test.png')
+
+args = parser.parse_args()
+pass_no = args.pass_no
+sw_corner = args.sw_corner
+ne_corner = args.ne_corner
+output_filename = args.output_filename
+print('Pass Number: ', pass_no)
+print('Output Filename: ', output_filename)
+output_filepath = os.path.split(output_filename)[0]
+#print(output_filepath)
+
+
+#check the command line arguments
+if sw_corner is None or ne_corner is None or output_filename is None or pass_no is None:
+    parser.print_help('Example: \n python find_swot_timing_science.py -sw_corner -130.0 35.0 -ne_corner -125.0 40.0 -output_filename ./tmp/test.png')
+    exit()
+
+#Change pass_no here to match current date (or back in time too!)
+pass_no = pass_no
+
+def find_time(ID_PASS_value, extended_bbox):
+
+    """
+    Find the index of the first point in the combined LineString (for the given ID_PASS)
+    that intersects with the extended bounding box.
+
+    Parameters:
+    - ID_PASS_value (int): The ID_PASS value for which to find the intersecting point.
+    - extended_bbox (Polygon): The extended bounding box to check intersection.
+
+    Returns:
+    - int or None: Index of the first intersecting point, or None if no intersection is found.
+    """
+    nadir_data = gpd.read_file("../data/sph_science_nadir.zip")
+
+    def join_linestrings(group):
+        """Join LineStrings in the order they appear in the file."""
+        if len(group) == 1:
+            return group.iloc[0].geometry
+        else:
+            # Combine LineStrings
+            return LineString([pt for line in group.geometry for pt in line.coords])
+
+    def index_of_first_point_inside_bbox(line, bbox):
+        """Returns the index of the first point of the LineString that falls inside the bounding box."""
+        for index, point in enumerate(line.coords):
+            if bbox.contains(Point(point)):
+                return index
+        return 0 # If no point falls inside the bounding box
+
+    # Filtering the GeoDataFrame for rows with the given ID_PASS_value
+    subset_gdf = nadir_data[nadir_data["ID_PASS"] == ID_PASS_value]
+
+    # Joining LineStrings if there are multiple rows with the same ID_PASS
+    combined_geometry = join_linestrings(subset_gdf)
+
+    # Finding the index of the first point inside the extended bounding box
+    index = index_of_first_point_inside_bbox(combined_geometry, extended_bbox)
+
+    time_str=subset_gdf['START_TIME'].iloc[0]
+    _, day_num, time_str = time_str.split(" ", 2)
+    days = int(day_num)
+    time_parts = list(map(int, time_str.split(":")))
+    delta = timedelta(days=days-1, hours=time_parts[0], minutes=time_parts[1], seconds=time_parts[2]+index*30)
+    return delta+pd.Timestamp("2023-07-21T05:33:45.768")
+
+# Load the shapefile
+gdf_karin = gpd.read_file("../data/sph_science_swath.zip")
+# define the bounding box
+bbox = geometry.box(sw_corner[0], sw_corner[1], ne_corner[0], ne_corner[1])
+# extend the bounding box by 0.2 degree
+extended_bbox = box(bbox.bounds[0] - 0.2, bbox.bounds[1] - 0.2, bbox.bounds[2] + 0.2, bbox.bounds[3] + 0.2) #for nadir data
+
+# Filter the GeoDataFrame for rows that intersect with the extended bounding box
+overlapping_segments = gdf_karin[gdf_karin.intersects(bbox)]
+
+# Testing the subroutine with a sample ID_PASS value
+#test_id_pass = 35
+#find_time(test_id_pass, gdf_nadir, extended_bbox)
+
+# Compute the index of the first point inside the bounding box for each intersecting segment
+tt=[]
+for a in overlapping_segments['ID_PASS']:
+    tt.append(find_time(a, extended_bbox))
+# Add the index as a new column
+overlapping_segments["TIME"] = tt
+
+# Calculate passing times for four cycles
+cycle_period = timedelta(days=20.864549)
+overlapping_segments['TIME_'+str((pass_no))] = overlapping_segments["TIME"] + (int(pass_no) * cycle_period)
+overlapping_segments['TIME_'+str(int(pass_no)+1)] = overlapping_segments["TIME"] + ((int(pass_no)+1) * cycle_period)
+overlapping_segments['TIME_'+str(int(pass_no)+2)] = overlapping_segments["TIME"] + ((int(pass_no)+2) * cycle_period)
+overlapping_segments['TIME_'+str(int(pass_no)+3)] = overlapping_segments["TIME"] + ((int(pass_no)+3) * cycle_period)
+
+# Print the results on screen
+print(overlapping_segments[['ID_PASS', 'TIME_'+str((pass_no)), 'TIME_'+str(int(pass_no)+1), 'TIME_'+str(int(pass_no)+2), 'TIME_'+str(int(pass_no)+3),]])
+
+# Visualization of the results on a map using Cartopy and saving the figure to output_filename
+# Set up the figure and axes with n by 3 subplots
+n_segments = len(overlapping_segments)
+n_rows = (n_segments + 2) // 3
+xlim = (sw_corner[0], ne_corner[0])
+ylim = (sw_corner[1], ne_corner[1])
+
+def plotit(index, ax, row):
+    # Set the extent of the map
+    ax.set_extent([sw_corner[0], ne_corner[0], sw_corner[1], ne_corner[1]])  # Set extent based on your data's region
+    # Put a background image on for nice sea rendering.
+
+    # Plot the overlapping segments with light green color
+    ax.set_title(f"Pass: {row['ID_PASS']}")
+
+    # Add geographic features
+    provinces_50m = cfeature.NaturalEarthFeature('cultural',
+                                             'admin_1_states_provinces_lines',
+                                             '50m',
+                                             facecolor='none')
+    #Add everything to map
+    overlapping_segments.loc[[index]].plot(ax=ax, edgecolor="darkgreen", facecolor="lightgreen", zorder=1)
+    ax.add_feature(cfeature.LAND)
+    ax.add_feature(cfeature.LAKES, edgecolor='darkblue', facecolor='darkblue')
+    ax.add_feature(cfeature.OCEAN)
+    ax.add_feature(cfeature.BORDERS)
+    ax.add_feature(cfeature.COASTLINE)
+    ax.add_feature(cfeature.RIVERS, edgecolor='darkblue')
+    ax.add_feature(provinces_50m)
+    # Plot the shapefile data
+    #gdf.plot(ax=ax, transform=ccrs.PlateCarree())
+    gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True,
+                    linewidth=1, color='gray', alpha=0.5, linestyle='--')
+    gl.top_labels = False  # Disable longitude labels at the top
+    gl.right_labels = False  # Disable latitude labels on the right
+
+    # #Add basemap for context
+    # cx.add_basemap(ax, source=cx.providers.CartoDB.PositronNoLabels, zoom=10)
+    # cx.add_basemap(ax, source=cx.providers.CartoDB.PositronOnlyLabels, zoom=10)
+
+
+
+    # Annotate with cycle, pass number, and the four lines of date and time
+    annotation_text = f"Time {pass_no}: {row['TIME_'+pass_no].strftime('%Y-%m-%d %H:%M:%S')}\n" \
+                      f"Time {str(int(pass_no)+1)}: {row['TIME_'+str(int(pass_no)+1)].strftime('%Y-%m-%d %H:%M:%S')}\n" \
+                      f"Time {str(int(pass_no)+2)}: {row['TIME_'+str(int(pass_no)+2)].strftime('%Y-%m-%d %H:%M:%S')}\n" \
+                      f"Time {str(int(pass_no)+3)}: {row['TIME_'+str(int(pass_no)+3)].strftime('%Y-%m-%d %H:%M:%S')}"
+
+    ax.annotate(annotation_text, xy=(0.05, 0.1), xycoords='axes fraction', backgroundcolor='white')
+
+
+fig, axes = plt.subplots(nrows=n_rows, ncols=3, figsize=(15, 25), subplot_kw={'projection': ccrs.PlateCarree()})
+axes = axes.ravel()
+
+# Plot each segment in a separate subplot
+for idx, (index, row) in enumerate(overlapping_segments.iterrows()):
+    ax = axes[idx]
+    plotit(index, ax, row)
+
+# Hide any remaining unused subplots
+for idx in range(n_segments, n_rows * 3):
+    axes[idx].axis('off')
+
+plt.tight_layout()
+
+if output_filename is not None:
+    if not os.path.isdir(output_filepath):
+        os.makedirs(output_filepath)
+    plt.savefig(output_filename)