From 7eee06eb93de463cf947072938dbfa30e84ee04e Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Mon, 7 Jul 2025 14:31:11 +0200 Subject: [PATCH 1/9] Fixing test of sorted time dimension --- parcels/_datasets/utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/parcels/_datasets/utils.py b/parcels/_datasets/utils.py index 0ced45baee..26c8de8241 100644 --- a/parcels/_datasets/utils.py +++ b/parcels/_datasets/utils.py @@ -106,8 +106,13 @@ def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): print(f" {ds1_name} size: {ds1.dims[dim_name]}, {ds2_name} size: {ds2.dims[dim_name]}") # Check if coordinates associated with dimensions are sorted (increasing) if dim_name in ds1.coords and dim_name in ds2.coords: - is_ds1_sorted = np.all(np.diff(ds1[dim_name].values) >= 0) if len(ds1[dim_name].values) > 1 else True - is_ds2_sorted = np.all(np.diff(ds2[dim_name].values) >= 0) if len(ds2[dim_name].values) > 1 else True + check_val = np.timedelta64(0, "s") if isinstance(ds1[dim_name].values[0], np.datetime64) else 0.0 + is_ds1_sorted = ( + np.all(np.diff(ds1[dim_name].values) >= check_val) if len(ds1[dim_name].values) > 1 else True + ) + is_ds2_sorted = ( + np.all(np.diff(ds2[dim_name].values) >= check_val) if len(ds2[dim_name].values) > 1 else True + ) if is_ds1_sorted == is_ds2_sorted: print(f" Order for '{dim_name}' is consistent (both sorted: {is_ds1_sorted})") else: From ae6299a913d871a809d628d96b5ef878cb479a7a Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Mon, 7 Jul 2025 14:31:36 +0200 Subject: [PATCH 2/9] Support comparison of dictionaries with lists inside --- parcels/_datasets/utils.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/parcels/_datasets/utils.py b/parcels/_datasets/utils.py index 26c8de8241..b1044e7c42 100644 --- a/parcels/_datasets/utils.py +++ b/parcels/_datasets/utils.py @@ -67,6 +67,22 @@ def dataset_repr_diff(ds1: xr.Dataset, ds2: xr.Dataset) -> str: return "".join(diff) +def _dicts_equal(d1, d2): + # compare two dictionaries, including when their entries are lists or arrays ( == throws an error then) + if d1.keys() != d2.keys(): + return False + for k in d1: + v1, v2 = d1[k], d2[k] + # Compare lists or arrays element-wise + if isinstance(v1, (list, np.ndarray)) and isinstance(v2, (list, np.ndarray)): + if not np.array_equal(np.array(v1), np.array(v2)): + return False + else: + if v1 != v2: + return False + return True + + def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): print(f"Comparing {ds1_name} and {ds2_name}\n") @@ -140,7 +156,7 @@ def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): var2 = ds2[var_name] # Compare attributes - if var1.attrs == var2.attrs: + if _dicts_equal(var1.attrs, var2.attrs): print(" Attributes are identical.") else: print(" Attributes differ.") From 2b38f879042b56d417fb4c5ce619db1147e02d74 Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Mon, 7 Jul 2025 14:32:08 +0200 Subject: [PATCH 3/9] Updating time in circulation_models --- parcels/_datasets/structured/circulation_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index d34bcdc058..00e052b31c 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -7,7 +7,7 @@ __all__ = ["T", "X", "Y", "Z", "datasets"] -TIME = xr.date_range("2000", "2001", T) +TIME = np.datetime64("2000-01-01") + np.arange(T) * np.timedelta64(1, "D") def _copernicusmarine(): From ce262770de7fff6db47c312cefb47ebbb4359838 Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Mon, 7 Jul 2025 14:32:24 +0200 Subject: [PATCH 4/9] Fixes to globcurrent dataset --- parcels/_datasets/structured/circulation_models.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index 00e052b31c..1ec4a7d0bd 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -88,7 +88,7 @@ def _copernicusmarine(): ) -def _copernicusmarine_globcurrents(): +def _copernicusmarine_globcurrent(): """Copernicus Marine Service GlobCurrent dataset (MULTIOBS_GLO_PHY_MYNRT_015_003)""" return xr.Dataset( { @@ -121,6 +121,7 @@ def _copernicusmarine_globcurrents(): "standard_name": "depth", "long_name": "Depth", "units": "m", + "unit_long": "Meters", "axis": "Z", "positive": "down", }, @@ -663,7 +664,7 @@ def _hycom_espc(): "standard_name": "eastward_sea_water_velocity", "units": "m/s", "NAVO_code": 17, - "actual_range": [-3.3700001, 3.6840003], + "actual_range": np.array([-3.3700001, 3.6840003], dtype="float32"), "cell_methods": "time: mean", }, ), @@ -1131,7 +1132,7 @@ def _CROCO_idealized(): datasets = { "ds_copernicusmarine": _copernicusmarine(), - "ds_copernicusmarine_globcurrents": _copernicusmarine_globcurrents(), + "ds_copernicusmarine_globcurrent": _copernicusmarine_globcurrent(), "ds_NEMO_MOI_U": _NEMO_MOI_U(), "ds_NEMO_MOI_V": _NEMO_MOI_V(), "ds_CESM": _CESM(), From 84b7cc6b6d44bdd74729a6ae4d79a7947789f23b Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Mon, 7 Jul 2025 15:11:54 +0200 Subject: [PATCH 5/9] Add option to suppress verbose output To only show differences --- parcels/_datasets/utils.py | 70 ++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/parcels/_datasets/utils.py b/parcels/_datasets/utils.py index b1044e7c42..81cd8e013b 100644 --- a/parcels/_datasets/utils.py +++ b/parcels/_datasets/utils.py @@ -83,32 +83,34 @@ def _dicts_equal(d1, d2): return True -def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): +def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2", verbose=True): print(f"Comparing {ds1_name} and {ds2_name}\n") - # Compare dataset attributes - print("Dataset Attributes Comparison:") - if ds1.attrs == ds2.attrs: - print(" Dataset attributes are identical.") - else: - print(" Dataset attributes differ.") - for attr_name in set(ds1.attrs.keys()) | set(ds2.attrs.keys()): - if attr_name not in ds1.attrs: - print(f" Attribute '{attr_name}' only in {ds2_name}") - elif attr_name not in ds2.attrs: - print(f" Attribute '{attr_name}' only in {ds1_name}") - elif ds1.attrs[attr_name] != ds2.attrs[attr_name]: - print(f" Attribute '{attr_name}' differs:") - print(f" {ds1_name}: {ds1.attrs[attr_name]}") - print(f" {ds2_name}: {ds2.attrs[attr_name]}") - print("-" * 30) + if verbose: + print("Dataset Attributes Comparison:") + if ds1.attrs == ds2.attrs: + print(" Dataset attributes are identical.") + else: + print(" Dataset attributes differ.") + for attr_name in set(ds1.attrs.keys()) | set(ds2.attrs.keys()): + if attr_name not in ds1.attrs: + print(f" Attribute '{attr_name}' only in {ds2_name}") + elif attr_name not in ds2.attrs: + print(f" Attribute '{attr_name}' only in {ds1_name}") + elif ds1.attrs[attr_name] != ds2.attrs[attr_name]: + print(f" Attribute '{attr_name}' differs:") + print(f" {ds1_name}: {ds1.attrs[attr_name]}") + print(f" {ds2_name}: {ds2.attrs[attr_name]}") + print("-" * 30) # Compare dimensions - print("Dimensions Comparison:") + if verbose: + print("Dimensions Comparison:") ds1_dims = set(ds1.dims) ds2_dims = set(ds2.dims) if ds1_dims == ds2_dims: - print(" Dimension names are identical.") + if verbose: + print(" Dimension names are identical.") else: print(" Dimension names differ:") print(f" {ds1_name} dims: {sorted(list(ds1_dims))}") @@ -117,9 +119,11 @@ def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): # For common dimensions, compare order (implicit by comparing coordinate values for sortedness) # and size (though size is parameterized and expected to be different) for dim_name in ds1_dims.intersection(ds2_dims): - print(f" Dimension '{dim_name}':") + if verbose: + print(f" Dimension '{dim_name}':") # Sizes will differ due to DIM_SIZE, so we don't strictly compare them. - print(f" {ds1_name} size: {ds1.dims[dim_name]}, {ds2_name} size: {ds2.dims[dim_name]}") + if verbose: + print(f" {ds1_name} size: {ds1.dims[dim_name]}, {ds2_name} size: {ds2.dims[dim_name]}") # Check if coordinates associated with dimensions are sorted (increasing) if dim_name in ds1.coords and dim_name in ds2.coords: check_val = np.timedelta64(0, "s") if isinstance(ds1[dim_name].values[0], np.datetime64) else 0.0 @@ -130,20 +134,24 @@ def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): np.all(np.diff(ds2[dim_name].values) >= check_val) if len(ds2[dim_name].values) > 1 else True ) if is_ds1_sorted == is_ds2_sorted: - print(f" Order for '{dim_name}' is consistent (both sorted: {is_ds1_sorted})") + if verbose: + print(f" Order for '{dim_name}' is consistent (both sorted: {is_ds1_sorted})") else: print( f" Order for '{dim_name}' differs: {ds1_name} sorted: {is_ds1_sorted}, {ds2_name} sorted: {is_ds2_sorted}" ) - print("-" * 30) + if verbose: + print("-" * 30) # Compare variables (name, attributes, dimensions used) - print("Variables Comparison:") + if verbose: + print("Variables Comparison:") ds1_vars = set(ds1.variables.keys()) ds2_vars = set(ds2.variables.keys()) if ds1_vars == ds2_vars: - print(" Variable names are identical.") + if verbose: + print(" Variable names are identical.") else: print(" Variable names differ:") print(f" {ds1_name} vars: {sorted(list(ds1_vars - ds2_vars))}") @@ -151,13 +159,15 @@ def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): print(f" Common vars: {sorted(list(ds1_vars.intersection(ds2_vars)))}") for var_name in ds1_vars.intersection(ds2_vars): - print(f" Variable '{var_name}':") + if verbose: + print(f" Variable '{var_name}':") var1 = ds1[var_name] var2 = ds2[var_name] # Compare attributes if _dicts_equal(var1.attrs, var2.attrs): - print(" Attributes are identical.") + if verbose: + print(" Attributes are identical.") else: print(" Attributes differ.") for attr_name in set(var1.attrs.keys()) | set(var2.attrs.keys()): @@ -172,9 +182,11 @@ def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): # Compare dimensions used by the variable if var1.dims == var2.dims: - print(f" Dimensions used are identical: {var1.dims}") + if verbose: + print(f" Dimensions used are identical: {var1.dims}") else: print(" Dimensions used differ:") print(f" {ds1_name}: {var1.dims}") print(f" {ds2_name}: {var2.dims}") - print("=" * 30 + " End of Comparison " + "=" * 30) + if verbose: + print("=" * 30 + " End of Comparison " + "=" * 30) From 91a1a2ec4e57a7211433b4c339d6433b6a657a30 Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Mon, 7 Jul 2025 15:23:02 +0200 Subject: [PATCH 6/9] Making CESM return a non-tuple --- .../structured/circulation_models.py | 164 +++++++++--------- 1 file changed, 81 insertions(+), 83 deletions(-) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index 1ec4a7d0bd..53a1d5d56f 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -363,89 +363,87 @@ def _NEMO_MOI_V(): def _CESM(): """CESM model dataset""" - return ( - xr.Dataset( - { - "UVEL": ( - ["time", "z_t", "nlat", "nlon"], - np.random.rand(T, Z, Y, X).astype("float32"), - { - "long_name": "Velocity in grid-x direction", - "units": "centimeter/s", - "grid_loc": 3221, - "cell_methods": "time:mean", - }, - ), - "VVEL": ( - ["time", "z_t", "nlat", "nlon"], - np.random.rand(T, Z, Y, X).astype("float32"), - { - "long_name": "Velocity in grid-y direction", - "units": "centimeter/s", - "grid_loc": 3221, - "cell_methods": "time:mean", - }, - ), - "WVEL": ( - ["time", "z_w_top", "nlat", "nlon"], - np.random.rand(T, Z, Y, X).astype("float32"), - { - "long_name": "Vertical Velocity", - "units": "centimeter/s", - "grid_loc": 3112, - "cell_methods": "time:mean", - }, - ), - }, - coords={ - "time": ( - ["time"], - TIME, - { - "long_name": "time", - "bounds": "time_bounds", - }, - ), - "z_t": ( - ["z_t"], - np.linspace(0, 5000, Z, dtype="float32"), - { - "long_name": "depth from surface to midpoint of layer", - "units": "centimeters", - "positive": "down", - "valid_min": 500.0, - "valid_max": 537500.0, - }, - ), - "z_w_top": ( - ["z_w_top"], - np.linspace(0, 5000, Z, dtype="float32"), - { - "long_name": "depth from surface to top of layer", - "units": "centimeters", - "positive": "down", - "valid_min": 0.0, - "valid_max": 525000.94, - }, - ), - "ULONG": ( - ["nlat", "nlon"], - np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear - { - "long_name": "array of u-grid longitudes", - "units": "degrees_east", - }, - ), - "ULAT": ( - ["nlat", "nlon"], - np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear - { - "long_name": "array of u-grid latitudes", - "units": "degrees_north", - }, - ), - }, - ), + return xr.Dataset( + { + "UVEL": ( + ["time", "z_t", "nlat", "nlon"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "long_name": "Velocity in grid-x direction", + "units": "centimeter/s", + "grid_loc": "3221", + "cell_methods": "time: mean", + }, + ), + "VVEL": ( + ["time", "z_t", "nlat", "nlon"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "long_name": "Velocity in grid-y direction", + "units": "centimeter/s", + "grid_loc": "3221", + "cell_methods": "time: mean", + }, + ), + "WVEL": ( + ["time", "z_w_top", "nlat", "nlon"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "long_name": "Vertical Velocity", + "units": "centimeter/s", + "grid_loc": "3112", + "cell_methods": "time: mean", + }, + ), + }, + coords={ + "time": ( + ["time"], + np.linspace(0, 5000, T), + { + "long_name": "time", + "bounds": "time_bound", + }, + ), + "z_t": ( + ["z_t"], + np.linspace(0, 5000, Z, dtype="float32"), + { + "long_name": "depth from surface to midpoint of layer", + "units": "centimeters", + "positive": "down", + "valid_min": 500.0, + "valid_max": 537500.0, + }, + ), + "z_w_top": ( + ["z_w_top"], + np.linspace(0, 5000, Z, dtype="float32"), + { + "long_name": "depth from surface to top of layer", + "units": "centimeters", + "positive": "down", + "valid_min": 0.0, + "valid_max": 525000.9375, + }, + ), + "ULONG": ( + ["nlat", "nlon"], + np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear + { + "long_name": "array of u-grid longitudes", + "units": "degrees_east", + }, + ), + "ULAT": ( + ["nlat", "nlon"], + np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear + { + "long_name": "array of u-grid latitudes", + "units": "degrees_north", + }, + ), + }, ) From 1d1a9bcb216072ba3aac854b87783cd5bbe81876 Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Mon, 7 Jul 2025 15:23:23 +0200 Subject: [PATCH 7/9] Small fixes to circulation_models after comparing tool run --- .../structured/circulation_models.py | 50 ++++++------------- 1 file changed, 14 insertions(+), 36 deletions(-) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index 53a1d5d56f..a73a99d8e1 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -47,7 +47,7 @@ def _copernicusmarine(): "unit_long": "Meters", "units": "m", "axis": "Z", - "long_name": "depth", + "long_name": "Depth", "standard_name": "depth", "positive": "down", }, @@ -173,7 +173,6 @@ def _NEMO_MOI_U(): "valid_min": -10.0, "valid_max": 10.0, "long_name": "Zonal velocity", - "unit_long": "Meters per second", "standard_name": "sea_water_x_velocity", "short_name": "vozocrtx", "online_operation": "N/A", @@ -242,16 +241,6 @@ def _NEMO_MOI_U(): "units": "1", }, ), - "time_counter": ( - ["time_counter"], - np.empty(0, dtype="datetime64[ns]"), - { - "standard_name": "time", - "long_name": "Time axis", - "axis": "T", - "time_origin": "1950-JAN-01 00:00:00", - }, - ), "deptht": ( ["deptht"], np.linspace(1, 5500, Z, dtype="float64"), @@ -281,7 +270,6 @@ def _NEMO_MOI_V(): "valid_min": -10.0, "valid_max": 10.0, "long_name": "Meridional velocity", - "unit_long": "Meters per second", "standard_name": "sea_water_y_velocity", "short_name": "vomecrty", "online_operation": "N/A", @@ -297,8 +285,8 @@ def _NEMO_MOI_V(): np.tile(np.linspace(-179, 179, X, endpoint=False), (Y, 1)), # note that this is not curvilinear { "units": "degrees_east", - "valid_min": -179.99984754002182, - "valid_max": 179.999842386314, + "valid_min": -179.9999951021171, + "valid_max": 180.0, "long_name": "Longitude", "nav_model": "Default grid", "standard_name": "longitude", @@ -309,8 +297,8 @@ def _NEMO_MOI_V(): np.tile(np.linspace(-75, 85, Y).reshape(-1, 1), (1, X)), # note that this is not curvilinear { "units": "degrees_north", - "valid_min": -77.0104751586914, - "valid_max": 89.9591064453125, + "valid_min": -77.00110752801133, + "valid_max": 89.95529158641207, "long_name": "Latitude", "nav_model": "Default grid", "standard_name": "latitude", @@ -334,16 +322,6 @@ def _NEMO_MOI_V(): "units": "1", }, ), - "time_counter": ( - ["time_counter"], - np.empty(0, dtype="datetime64[ns]"), - { - "standard_name": "time", - "long_name": "Time axis", - "axis": "T", - "time_origin": "1950-JAN-01 00:00:00", - }, - ), "deptht": ( ["deptht"], np.linspace(1, 5500, Z, dtype="float64"), @@ -924,7 +902,7 @@ def _ecco4(): "units": "degrees_north", "coordinate": "YG XG", "coverage_content_type": "coordinate", - "comment": "Nonuniform grid spacing. Note: 'southwest' does not correspond to geographic orientation but is used for convenience to describe the computational grid. See MITgcm documentation for details.", + "comment": "Nonuniform grid spacing. Note: 'southwest' does not correspond to geographic orientation but is used for convenience to describe the computational grid. See MITgcm dcoumentation for details.", }, ), "XC": ( @@ -949,7 +927,7 @@ def _ecco4(): "units": "degrees_east", "coordinate": "YG XG", "coverage_content_type": "coordinate", - "comment": "Nonuniform grid spacing. Note: 'southwest' does not correspond to geographic orientation but is used for convenience to describe the computational grid. See MITgcm documentation for details.", + "comment": "Nonuniform grid spacing. Note: 'southwest' does not correspond to geographic orientation but is used for convenience to describe the computational grid. See MITgcm dcoumentation for details.", }, ), }, @@ -1007,7 +985,7 @@ def _CROCO_idealized(): { "long_name": "free-surface", "units": "meter", - "field": "free_surface, scalar, series", + "field": "free-surface, scalar, series", "standard_name": "sea_surface_height", }, ), @@ -1066,7 +1044,7 @@ def _CROCO_idealized(): ["eta_rho"], np.arange(Y, dtype="float32"), { - "long name": "y-dimension of the grid", + "long_name": "y-dimension of the grid", "standard_name": "y_grid_index", "axis": "Y", "c_grid_dynamic_range": f"2:{Y}", @@ -1076,8 +1054,8 @@ def _CROCO_idealized(): ["eta_v"], np.arange(Y - 1, dtype="float32"), { - "long name": "y-dimension of the grid at v location", - "standard_name": "y_grid_index_at_v_location", + "long_name": "y-dimension of the grid at v location", + "standard_name": "x_grid_index_at_v_location", "axis": "Y", "c_grid_axis_shift": 0.5, "c_grid_dynamic_range": f"2:{Y - 1}", @@ -1087,7 +1065,7 @@ def _CROCO_idealized(): ["xi_rho"], np.arange(X, dtype="float32"), { - "long name": "x-dimension of the grid", + "long_name": "x-dimension of the grid", "standard_name": "x_grid_index", "axis": "X", "c_grid_dynamic_range": f"2:{X}", @@ -1097,7 +1075,7 @@ def _CROCO_idealized(): ["xi_u"], np.arange(X - 1, dtype="float32"), { - "long name": "x-dimension of the grid at u location", + "long_name": "x-dimension of the grid at u location", "standard_name": "x_grid_index_at_u_location", "axis": "X", "c_grid_axis_shift": 0.5, @@ -1121,7 +1099,7 @@ def _CROCO_idealized(): "long_name": "y-locations of RHO-points", "units": "meter", "standard_name": "plane_y_coordinate", - "field": "y_rho, scalar", + "field": "y_rho, scal", }, ), }, From 16212fb5dd76d3b86e1626fff46499aeb8bb2da1 Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Mon, 7 Jul 2025 15:54:17 +0200 Subject: [PATCH 8/9] Adding MITgcm_mds dataset --- .../structured/circulation_models.py | 137 ++++++++++++++++++ parcels/_datasets/utils.py | 4 +- 2 files changed, 140 insertions(+), 1 deletion(-) diff --git a/parcels/_datasets/structured/circulation_models.py b/parcels/_datasets/structured/circulation_models.py index a73a99d8e1..c4eec14bc3 100644 --- a/parcels/_datasets/structured/circulation_models.py +++ b/parcels/_datasets/structured/circulation_models.py @@ -527,6 +527,142 @@ def _MITgcm_netcdf(): ) +def _MITgcm_mds(): + """MITgcm model dataset in native MDS format""" + return xr.Dataset( + { + "U": ( + ["time", "Z", "YC", "XG"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "standard_name": "sea_water_x_velocity", + "mate": "V", + "long_name": "Zonal Component of Velocity", + "units": "m s-1", + }, + ), + "V": ( + ["time", "Z", "YG", "XC"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "standard_name": "sea_water_y_velocity", + "mate": "U", + "long_name": "Meridional Component of Velocity", + "units": "m s-1", + }, + ), + "W": ( + ["time", "Zl", "YC", "XC"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "standard_name": "sea_water_z_velocity", + "long_name": "Vertical Component of Velocity", + "units": "m s-1", + }, + ), + "S": ( + ["time", "Z", "YC", "XC"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "standard_name": "sea_water_salinity", + "long_name": "Salinity", + "units": "g kg-1", + }, + ), + "T": ( + ["time", "Z", "YC", "XC"], + np.random.rand(T, Z, Y, X).astype("float32"), + { + "standard_name": "sea_water_potential_temperature", + "long_name": "Potential Temperature", + "units": "degree_Celcius", + }, + ), + }, + coords={ + "time": ( + ["time"], + np.arange(T) * np.timedelta64(1, "D"), + { + "standard_name": "time", + "long_name": "Time", + "axis": "T", + "calendar": "gregorian", + }, + ), + "Z": ( + ["Z"], + np.linspace(-25, -5000, Z, dtype="float64"), + { + "standard_name": "depth", + "long_name": "vertical coordinate of cell center", + "units": "m", + "positive": "down", + "axis": "Z", + }, + ), + "Zl": ( + ["Zl"], + np.linspace(0, -4500, Z, dtype="float64"), + { + "standard_name": "depth_at_lower_w_location", + "long_name": "vertical coordinate of lower cell interface", + "units": "m", + "positive": "down", + "axis": "Z", + "c_grid_axis_shift": -0.5, + }, + ), + "YC": ( + ["YC"], + np.linspace(500, 5000, Y, dtype="float64"), + { + "standard_name": "latitude", + "long_name": "latitude", + "units": "degrees_north", + "coordinate": "YC XC", + "axis": "Y", + }, + ), + "YG": ( + ["YG"], + np.linspace(0, 5000, Y, dtype="float64"), + { + "standard_name": "latitude_at_f_location", + "long_name": "latitude", + "units": "degrees_north", + "coordinate": "YG XG", + "axis": "Y", + "c_grid_axis_shift": -0.5, + }, + ), + "XC": ( + ["XC"], + np.linspace(500, 5000, X, dtype="float64"), + { + "standard_name": "longitude", + "long_name": "longitude", + "units": "degrees_east", + "coordinate": "YC XC", + "axis": "X", + }, + ), + "XG": ( + ["XG"], + np.linspace(0, 5000, X, dtype="float64"), + { + "standard_name": "longitude_at_f_location", + "long_name": "longitude", + "units": "degrees_east", + "coordinate": "YG XG", + "axis": "X", + "c_grid_axis_shift": -0.5, + }, + ), + }, + ) + + def _ERA5_wind(): """ERA5 10m wind model dataset""" return xr.Dataset( @@ -1113,6 +1249,7 @@ def _CROCO_idealized(): "ds_NEMO_MOI_V": _NEMO_MOI_V(), "ds_CESM": _CESM(), "ds_MITgcm_netcdf": _MITgcm_netcdf(), + "ds_MITgcm_mds": _MITgcm_mds(), "ds_ERA5_wind": _ERA5_wind(), "ds_FES_tides": _FES_tides(), "ds_hycom_espc": _hycom_espc(), diff --git a/parcels/_datasets/utils.py b/parcels/_datasets/utils.py index 81cd8e013b..147741fd27 100644 --- a/parcels/_datasets/utils.py +++ b/parcels/_datasets/utils.py @@ -126,7 +126,9 @@ def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2", verbo print(f" {ds1_name} size: {ds1.dims[dim_name]}, {ds2_name} size: {ds2.dims[dim_name]}") # Check if coordinates associated with dimensions are sorted (increasing) if dim_name in ds1.coords and dim_name in ds2.coords: - check_val = np.timedelta64(0, "s") if isinstance(ds1[dim_name].values[0], np.datetime64) else 0.0 + check_val = ( + np.timedelta64(0, "s") if isinstance(ds1[dim_name].values[0], (np.datetime64, np.timedelta64)) else 0.0 + ) is_ds1_sorted = ( np.all(np.diff(ds1[dim_name].values) >= check_val) if len(ds1[dim_name].values) > 1 else True ) From ee4817de086d669c730a76ff3a3c3897e204620c Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Mon, 7 Jul 2025 16:38:16 +0200 Subject: [PATCH 9/9] Using verbose_print function --- parcels/_datasets/utils.py | 71 +++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/parcels/_datasets/utils.py b/parcels/_datasets/utils.py index 147741fd27..f215377f2c 100644 --- a/parcels/_datasets/utils.py +++ b/parcels/_datasets/utils.py @@ -86,31 +86,32 @@ def _dicts_equal(d1, d2): def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2", verbose=True): print(f"Comparing {ds1_name} and {ds2_name}\n") - if verbose: - print("Dataset Attributes Comparison:") - if ds1.attrs == ds2.attrs: - print(" Dataset attributes are identical.") - else: - print(" Dataset attributes differ.") - for attr_name in set(ds1.attrs.keys()) | set(ds2.attrs.keys()): - if attr_name not in ds1.attrs: - print(f" Attribute '{attr_name}' only in {ds2_name}") - elif attr_name not in ds2.attrs: - print(f" Attribute '{attr_name}' only in {ds1_name}") - elif ds1.attrs[attr_name] != ds2.attrs[attr_name]: - print(f" Attribute '{attr_name}' differs:") - print(f" {ds1_name}: {ds1.attrs[attr_name]}") - print(f" {ds2_name}: {ds2.attrs[attr_name]}") - print("-" * 30) + def verbose_print(*args, **kwargs): + if verbose: + print(*args, **kwargs) + + verbose_print("Dataset Attributes Comparison:") + if ds1.attrs == ds2.attrs: + verbose_print(" Dataset attributes are identical.") + else: + print(" Dataset attributes differ.") + for attr_name in set(ds1.attrs.keys()) | set(ds2.attrs.keys()): + if attr_name not in ds1.attrs: + print(f" Attribute '{attr_name}' only in {ds2_name}") + elif attr_name not in ds2.attrs: + print(f" Attribute '{attr_name}' only in {ds1_name}") + elif ds1.attrs[attr_name] != ds2.attrs[attr_name]: + print(f" Attribute '{attr_name}' differs:") + print(f" {ds1_name}: {ds1.attrs[attr_name]}") + print(f" {ds2_name}: {ds2.attrs[attr_name]}") + verbose_print("-" * 30) # Compare dimensions - if verbose: - print("Dimensions Comparison:") + verbose_print("Dimensions Comparison:") ds1_dims = set(ds1.dims) ds2_dims = set(ds2.dims) if ds1_dims == ds2_dims: - if verbose: - print(" Dimension names are identical.") + verbose_print(" Dimension names are identical.") else: print(" Dimension names differ:") print(f" {ds1_name} dims: {sorted(list(ds1_dims))}") @@ -119,11 +120,9 @@ def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2", verbo # For common dimensions, compare order (implicit by comparing coordinate values for sortedness) # and size (though size is parameterized and expected to be different) for dim_name in ds1_dims.intersection(ds2_dims): - if verbose: - print(f" Dimension '{dim_name}':") + verbose_print(f" Dimension '{dim_name}':") # Sizes will differ due to DIM_SIZE, so we don't strictly compare them. - if verbose: - print(f" {ds1_name} size: {ds1.dims[dim_name]}, {ds2_name} size: {ds2.dims[dim_name]}") + verbose_print(f" {ds1_name} size: {ds1.dims[dim_name]}, {ds2_name} size: {ds2.dims[dim_name]}") # Check if coordinates associated with dimensions are sorted (increasing) if dim_name in ds1.coords and dim_name in ds2.coords: check_val = ( @@ -136,24 +135,20 @@ def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2", verbo np.all(np.diff(ds2[dim_name].values) >= check_val) if len(ds2[dim_name].values) > 1 else True ) if is_ds1_sorted == is_ds2_sorted: - if verbose: - print(f" Order for '{dim_name}' is consistent (both sorted: {is_ds1_sorted})") + verbose_print(f" Order for '{dim_name}' is consistent (both sorted: {is_ds1_sorted})") else: print( f" Order for '{dim_name}' differs: {ds1_name} sorted: {is_ds1_sorted}, {ds2_name} sorted: {is_ds2_sorted}" ) - if verbose: - print("-" * 30) + verbose_print("-" * 30) # Compare variables (name, attributes, dimensions used) - if verbose: - print("Variables Comparison:") + verbose_print("Variables Comparison:") ds1_vars = set(ds1.variables.keys()) ds2_vars = set(ds2.variables.keys()) if ds1_vars == ds2_vars: - if verbose: - print(" Variable names are identical.") + verbose_print(" Variable names are identical.") else: print(" Variable names differ:") print(f" {ds1_name} vars: {sorted(list(ds1_vars - ds2_vars))}") @@ -161,15 +156,13 @@ def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2", verbo print(f" Common vars: {sorted(list(ds1_vars.intersection(ds2_vars)))}") for var_name in ds1_vars.intersection(ds2_vars): - if verbose: - print(f" Variable '{var_name}':") + verbose_print(f" Variable '{var_name}':") var1 = ds1[var_name] var2 = ds2[var_name] # Compare attributes if _dicts_equal(var1.attrs, var2.attrs): - if verbose: - print(" Attributes are identical.") + verbose_print(" Attributes are identical.") else: print(" Attributes differ.") for attr_name in set(var1.attrs.keys()) | set(var2.attrs.keys()): @@ -184,11 +177,9 @@ def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2", verbo # Compare dimensions used by the variable if var1.dims == var2.dims: - if verbose: - print(f" Dimensions used are identical: {var1.dims}") + verbose_print(f" Dimensions used are identical: {var1.dims}") else: print(" Dimensions used differ:") print(f" {ds1_name}: {var1.dims}") print(f" {ds2_name}: {var2.dims}") - if verbose: - print("=" * 30 + " End of Comparison " + "=" * 30) + verbose_print("=" * 30 + " End of Comparison " + "=" * 30)