From 23df950550752b3ceedd2ef02350ea39719ce0ae Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Tue, 27 May 2025 16:45:38 +0200 Subject: [PATCH 1/6] Add structured.circulation_model.py --- parcels/_datasets/__init__.py | 2 +- parcels/_datasets/structured/circulation_model.py | 11 +++++++++++ parcels/_datasets/structured/generic.py | 2 -- 3 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 parcels/_datasets/structured/circulation_model.py diff --git a/parcels/_datasets/__init__.py b/parcels/_datasets/__init__.py index c1270f0017..ead55757f4 100644 --- a/parcels/_datasets/__init__.py +++ b/parcels/_datasets/__init__.py @@ -36,7 +36,7 @@ This subpackage is broken down into structured and unstructured parts. Each of these have common submodules: -* ``circulation_model`` -> hardcoded datasets with the intention of mimicking datasets from a certain (ocean) circulation model +* ``circulation_model`` -> hardcoded datasets with the intention of mimicking dataset structure from a certain (ocean) circulation model * ``generic`` -> hardcoded datasets that are generic, and not tied to a certain (ocean) circulation model. Instead these focus on the fundamental properties of the dataset * ``generated`` -> functions to generate datasets with varying properties * ``utils`` -> any utility functions necessary related to either generating or validating datasets diff --git a/parcels/_datasets/structured/circulation_model.py b/parcels/_datasets/structured/circulation_model.py new file mode 100644 index 0000000000..961e25ae1a --- /dev/null +++ b/parcels/_datasets/structured/circulation_model.py @@ -0,0 +1,11 @@ +import xarray as xr + + +def _nemo_data() -> xr.Dataset: + """Dataset matching NEMO model output. + + Example dataset is based off of data from MOi GLO12. + + https://www.mercator-ocean.eu/en/solutions-expertise/accessing-digital-data/product-details/?offer=4217979b-2662-329a-907c-602fdc69c3a3&system=d35404e4-40d3-59d6-3608-581c9495d86a + """ + ... diff --git a/parcels/_datasets/structured/generic.py b/parcels/_datasets/structured/generic.py index 72f070e384..2432f079c0 100644 --- a/parcels/_datasets/structured/generic.py +++ b/parcels/_datasets/structured/generic.py @@ -1,5 +1,3 @@ -"""Datasets focussing on grid geometry""" - import numpy as np import xarray as xr From 78598d201a06d2b24324ec4dee4d758e6d76cd67 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Tue, 27 May 2025 17:47:09 +0200 Subject: [PATCH 2/6] Add stubs for circulation models, and docstrings for hycom and nemo --- parcels/_datasets/__init__.py | 6 ++-- .../_datasets/structured/circulation_model.py | 33 +++++++++++++++++-- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/parcels/_datasets/__init__.py b/parcels/_datasets/__init__.py index ead55757f4..29db98c5fe 100644 --- a/parcels/_datasets/__init__.py +++ b/parcels/_datasets/__init__.py @@ -3,7 +3,7 @@ This subpackage uses xarray to generate *idealised* structured and unstructured hydrodynamical datasets that are compatible with Parcels. The goals are three-fold: -1. To provide users with documentation for the types of datasets they can expect Parcels to work with. +1. To provide users with documentation for the types of datasets they can expect Parcels to work with. When reporting bugs, users can use these datasets to reproduce the bug they're experiencing (allowing developers to quickly troubleshoot the problem). 2. To supply our tutorials with hydrodynamical datasets. 3. To offer developers datasets for use in test cases. @@ -36,8 +36,10 @@ This subpackage is broken down into structured and unstructured parts. Each of these have common submodules: -* ``circulation_model`` -> hardcoded datasets with the intention of mimicking dataset structure from a certain (ocean) circulation model +* ``circulation_model`` -> hardcoded datasets with the intention of mimicking dataset structure from a certain (ocean) circulation model. If you'd like to see Parcel support a new model, please open an issue in our issue tracker. + * exposes a dict ``datasets`` mapping dataset names to xarray datasets * ``generic`` -> hardcoded datasets that are generic, and not tied to a certain (ocean) circulation model. Instead these focus on the fundamental properties of the dataset + * exposes a dict ``datasets`` mapping dataset names to xarray datasets * ``generated`` -> functions to generate datasets with varying properties * ``utils`` -> any utility functions necessary related to either generating or validating datasets diff --git a/parcels/_datasets/structured/circulation_model.py b/parcels/_datasets/structured/circulation_model.py index 961e25ae1a..89fe2ea75b 100644 --- a/parcels/_datasets/structured/circulation_model.py +++ b/parcels/_datasets/structured/circulation_model.py @@ -1,11 +1,40 @@ import xarray as xr +from . import T, X, Y, Z + +__all__ = ["T", "X", "Y", "Z", "datasets"] + def _nemo_data() -> xr.Dataset: - """Dataset matching NEMO model output. + """Dataset matching level 0 NEMO model output. - Example dataset is based off of data from MOi GLO12. + Example dataset is based off of data from the MOi GLO12 run. https://www.mercator-ocean.eu/en/solutions-expertise/accessing-digital-data/product-details/?offer=4217979b-2662-329a-907c-602fdc69c3a3&system=d35404e4-40d3-59d6-3608-581c9495d86a """ ... + + +def _hycom_data() -> xr.Dataset: + """Dataset matching level 0 HYCOM model output. + + Example dataset is based off of data from the GOFS 3.1: 41-layer HYCOM + NCODA Global 1/12° Analysis. + + https://www.hycom.org/dataserver/gofs-3pt1/analysis + """ + ... + + +def _mitgcm_data() -> xr.Dataset: ... + + +def _pop_data() -> xr.Dataset: ... + + +def _echo_data() -> xr.Dataset: ... + + +def _croco_data() -> xr.Dataset: ... + + +datasets = {} From 4aac872344417fa7d76433aa10f260551708da8f Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Tue, 27 May 2025 18:02:03 +0200 Subject: [PATCH 3/6] Update docstrings --- .../_datasets/structured/circulation_model.py | 32 ++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/parcels/_datasets/structured/circulation_model.py b/parcels/_datasets/structured/circulation_model.py index 89fe2ea75b..7650942319 100644 --- a/parcels/_datasets/structured/circulation_model.py +++ b/parcels/_datasets/structured/circulation_model.py @@ -25,16 +25,40 @@ def _hycom_data() -> xr.Dataset: ... -def _mitgcm_data() -> xr.Dataset: ... +def _mitgcm_data() -> xr.Dataset: + """Dataset matching level 0 MITgcm model output. + Example dataset is based on the Pre-SWOT Level-4 Hourly MITgcm LLC4320 simulation, + which provides high-resolution (1/48°) global ocean state estimates with hourly outputs. -def _pop_data() -> xr.Dataset: ... + https://podaac.jpl.nasa.gov/dataset/MITgcm_LLC4320_Pre-SWOT_JPL_L4_ACC_SMST_v1.0 + """ + ... + + +def _pop_data() -> xr.Dataset: + """Dataset matching level 0 POP model output. + + TODO: Identify a suitable public dataset to mimick. + """ + ... -def _echo_data() -> xr.Dataset: ... +def _echo_data() -> xr.Dataset: + """Dataset matching level 0 ECHO model output. + TODO: Identify a suitable public dataset to mimick. -def _croco_data() -> xr.Dataset: ... + """ + ... + + +def _croco_data() -> xr.Dataset: + """Dataset matching level 0 CROCO model output. + + TODO: Identify a suitable public dataset to mimick. + """ + ... datasets = {} From 3e25a92118f357800166ea6a155cf6191175aa9b Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Tue, 27 May 2025 18:41:40 +0200 Subject: [PATCH 4/6] typo --- parcels/_datasets/structured/circulation_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parcels/_datasets/structured/circulation_model.py b/parcels/_datasets/structured/circulation_model.py index 7650942319..28825d375b 100644 --- a/parcels/_datasets/structured/circulation_model.py +++ b/parcels/_datasets/structured/circulation_model.py @@ -44,8 +44,8 @@ def _pop_data() -> xr.Dataset: ... -def _echo_data() -> xr.Dataset: - """Dataset matching level 0 ECHO model output. +def _ecco_data() -> xr.Dataset: + """Dataset matching level 0 ECCO model output. TODO: Identify a suitable public dataset to mimick. From 1f69eef9b6931fc5cbd0cc693d644f6610756387 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Wed, 28 May 2025 15:48:51 +0200 Subject: [PATCH 5/6] Add nemo example data and comparison tooling --- .../_datasets/structured/circulation_model.py | 154 +++++++++++++++++- parcels/_datasets/utils.py | 92 +++++++++++ 2 files changed, 245 insertions(+), 1 deletion(-) diff --git a/parcels/_datasets/structured/circulation_model.py b/parcels/_datasets/structured/circulation_model.py index 28825d375b..9f2ab93322 100644 --- a/parcels/_datasets/structured/circulation_model.py +++ b/parcels/_datasets/structured/circulation_model.py @@ -1,3 +1,5 @@ +import numpy as np +import pandas as pd import xarray as xr from . import T, X, Y, Z @@ -12,7 +14,157 @@ def _nemo_data() -> xr.Dataset: https://www.mercator-ocean.eu/en/solutions-expertise/accessing-digital-data/product-details/?offer=4217979b-2662-329a-907c-602fdc69c3a3&system=d35404e4-40d3-59d6-3608-581c9495d86a """ - ... + # Using data from lorenz. + # Mesh file: /storage/shared/oceanparcels/input_data/MOi/domain_ORCA0083-N006/domain_ORCA0083-N006/PSY4V3R1_mesh_hgr.nc + # Data files: /storage/shared/oceanparcels/input_data/MOi/GLO12/psy4v3r1-daily_{U,V}_*.nc + # used modulefile for reference: "/storage/shared/oceanparcels/input_data/MOi/psy4v3r1/create_fieldset2D.py" + + # scp "lorenz:/storage/shared/oceanparcels/input_data/MOi/GLO12/psy4v3r1-daily_{U,V,W,T}_2007-01-0{1,2}.nc" data-v4/nemo/field + + time_counter_data = pd.date_range(start="2007-01-01T12:00:00", periods=T, freq="D") + y_data = np.arange(1, Y + 1) + x_data = np.arange(1, X + 1) + deptht_data = np.linspace(0.494, 5.728e03, Z) + + # Create the dataset + return xr.Dataset( + data_vars={ + "sotkeavmu1": ( + ("time_counter", "y", "x"), + np.random.rand(T, Y, X).astype(np.float64), + { + "units": "m2 s-1", + "valid_min": np.float64(0.0), + "valid_max": np.float64(100.0), + "long_name": "Vertical Eddy Viscosity U 1m", + "standard_name": "ocean_vertical_eddy_viscosity_u_1m", + "short_name": "sotkeavmu1", + "online_operation": "N/A", + "interval_operation": np.int64(86400), + "interval_write": np.int64(86400), + "associate": "time_counter nav_lat nav_lon", + }, + ), + "sotkeavmu15": ( + ("time_counter", "y", "x"), + np.random.rand(T, Y, X).astype(np.float64), + { + "units": "m2 s-1", + "valid_min": np.float64(0.0), + "valid_max": np.float64(100.0), + "long_name": "Vertical Eddy Viscosity U 15m", + "standard_name": "ocean_vertical_eddy_viscosity_u_15m", + "short_name": "sotkeavmu15", + "online_operation": "N/A", + "interval_operation": np.int64(86400), + "interval_write": np.int64(86400), + "associate": "time_counter nav_lat nav_lon", + }, + ), + "sotkeavmu30": ( + ("time_counter", "y", "x"), + np.random.rand(T, Y, X).astype(np.float64), + { + "units": "m2 s-1", + "valid_min": np.float64(0.0), + "valid_max": np.float64(100.0), + "long_name": "Vertical Eddy Viscosity U 30m", + "standard_name": "ocean_vertical_eddy_viscosity_u_30m", + "short_name": "sotkeavmu30", + "online_operation": "N/A", + "interval_operation": np.int64(86400), + "interval_write": np.int64(86400), + "associate": "time_counter nav_lat nav_lon", + }, + ), + "sotkeavmu50": ( + ("time_counter", "y", "x"), + np.random.rand(T, Y, X).astype(np.float64), + { + "units": "m2 s-1", + "valid_min": np.float64(0.0), + "valid_max": np.float64(100.0), + "long_name": "Vertical Eddy Viscosity U 50m", + "standard_name": "ocean_vertical_eddy_viscosity_u_50m", + "short_name": "sotkeavmu50", + "online_operation": "N/A", + "interval_operation": np.int64(86400), + "interval_write": np.int64(86400), + "associate": "time_counter nav_lat nav_lon", + }, + ), + "vozocrtx": ( + ("time_counter", "deptht", "y", "x"), + np.random.rand(T, Z, Y, X).astype(np.float64), + { + "units": "m s-1", + "valid_min": np.float64(-10.0), + "valid_max": np.float64(10.0), + "long_name": "Zonal velocity", + "standard_name": "sea_water_x_velocity", + "short_name": "vozocrtx", + "online_operation": "N/A", + "interval_operation": np.int64(86400), + "interval_write": np.int64(86400), + "associate": "time_counter deptht nav_lat nav_lon", + }, + ), + }, + coords={ + "nav_lon": ( + ("y", "x"), + np.random.rand(Y, X).astype(np.float32), + { + "units": "degrees_east", + "valid_min": np.float32(-179.99984754002182), + "valid_max": np.float32(179.999842386314), + "long_name": "Longitude", + "nav_model": "Default grid", + "standard_name": "longitude", + }, + ), + "nav_lat": ( + ("y", "x"), + np.random.rand(Y, X).astype(np.float32), + { + "units": "degrees_north", + "valid_min": np.float32(-77.0104751586914), + "valid_max": np.float32(89.9591064453125), + "long_name": "Latitude", + "nav_model": "Default grid", + "standard_name": "latitude", + }, + ), + "x": (("x",), x_data, {"standard_name": "projection_x_coordinate", "axis": "X", "units": "1"}), + "y": (("y",), y_data, {"standard_name": "projection_y_coordinate", "axis": "Y", "units": "1"}), + "time_counter": ( + ("time_counter",), + time_counter_data, + {"standard_name": "time", "long_name": "Time axis", "axis": "T", "time_origin": "1950-JAN-01 00:00:00"}, + ), + "deptht": ( + ("deptht",), + deptht_data, + { + "units": "m", + "positive": "down", + "valid_min": np.float64(0.4940253794193268), + "valid_max": np.float64(5727.91650390625), + "long_name": "Vertical T levels", + "standard_name": "depth", + "axis": "Z", + }, + ), + }, + attrs={ + "Conventions": "CF-1.0", + "file_name": "ORCA12_LIM-T00_y2021m09d27_gridU.nc", + "institution": "MERCATOR OCEAN", + "source": "NEMO", + "TimeStamp": "2021-OCT-03 18:27:01 GMT-0000", + "references": "http://www.mercator-ocean.eu", + }, + ) def _hycom_data() -> xr.Dataset: diff --git a/parcels/_datasets/utils.py b/parcels/_datasets/utils.py index 19c64643d9..0ced45baee 100644 --- a/parcels/_datasets/utils.py +++ b/parcels/_datasets/utils.py @@ -65,3 +65,95 @@ def dataset_repr_diff(ds1: xr.Dataset, ds2: xr.Dataset) -> str: diff = difflib.ndiff(repr1.splitlines(keepends=True), repr2.splitlines(keepends=True)) return "".join(diff) + + +def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"): + print(f"Comparing {ds1_name} and {ds2_name}\n") + + # Compare dataset attributes + print("Dataset Attributes Comparison:") + if ds1.attrs == ds2.attrs: + print(" Dataset attributes are identical.") + else: + print(" Dataset attributes differ.") + for attr_name in set(ds1.attrs.keys()) | set(ds2.attrs.keys()): + if attr_name not in ds1.attrs: + print(f" Attribute '{attr_name}' only in {ds2_name}") + elif attr_name not in ds2.attrs: + print(f" Attribute '{attr_name}' only in {ds1_name}") + elif ds1.attrs[attr_name] != ds2.attrs[attr_name]: + print(f" Attribute '{attr_name}' differs:") + print(f" {ds1_name}: {ds1.attrs[attr_name]}") + print(f" {ds2_name}: {ds2.attrs[attr_name]}") + print("-" * 30) + + # Compare dimensions + print("Dimensions Comparison:") + ds1_dims = set(ds1.dims) + ds2_dims = set(ds2.dims) + if ds1_dims == ds2_dims: + print(" Dimension names are identical.") + else: + print(" Dimension names differ:") + print(f" {ds1_name} dims: {sorted(list(ds1_dims))}") + print(f" {ds2_name} dims: {sorted(list(ds2_dims))}") + + # For common dimensions, compare order (implicit by comparing coordinate values for sortedness) + # and size (though size is parameterized and expected to be different) + for dim_name in ds1_dims.intersection(ds2_dims): + print(f" Dimension '{dim_name}':") + # Sizes will differ due to DIM_SIZE, so we don't strictly compare them. + print(f" {ds1_name} size: {ds1.dims[dim_name]}, {ds2_name} size: {ds2.dims[dim_name]}") + # Check if coordinates associated with dimensions are sorted (increasing) + if dim_name in ds1.coords and dim_name in ds2.coords: + is_ds1_sorted = np.all(np.diff(ds1[dim_name].values) >= 0) if len(ds1[dim_name].values) > 1 else True + is_ds2_sorted = np.all(np.diff(ds2[dim_name].values) >= 0) if len(ds2[dim_name].values) > 1 else True + if is_ds1_sorted == is_ds2_sorted: + print(f" Order for '{dim_name}' is consistent (both sorted: {is_ds1_sorted})") + else: + print( + f" Order for '{dim_name}' differs: {ds1_name} sorted: {is_ds1_sorted}, {ds2_name} sorted: {is_ds2_sorted}" + ) + print("-" * 30) + + # Compare variables (name, attributes, dimensions used) + print("Variables Comparison:") + ds1_vars = set(ds1.variables.keys()) + ds2_vars = set(ds2.variables.keys()) + + if ds1_vars == ds2_vars: + print(" Variable names are identical.") + else: + print(" Variable names differ:") + print(f" {ds1_name} vars: {sorted(list(ds1_vars - ds2_vars))}") + print(f" {ds2_name} vars: {sorted(list(ds2_vars - ds1_vars))}") + print(f" Common vars: {sorted(list(ds1_vars.intersection(ds2_vars)))}") + + for var_name in ds1_vars.intersection(ds2_vars): + print(f" Variable '{var_name}':") + var1 = ds1[var_name] + var2 = ds2[var_name] + + # Compare attributes + if var1.attrs == var2.attrs: + print(" Attributes are identical.") + else: + print(" Attributes differ.") + for attr_name in set(var1.attrs.keys()) | set(var2.attrs.keys()): + if attr_name not in var1.attrs: + print(f" Attribute '{attr_name}' only in {ds2_name}'s '{var_name}'") + elif attr_name not in var2.attrs: + print(f" Attribute '{attr_name}' only in {ds1_name}'s '{var_name}'") + elif var1.attrs[attr_name] != var2.attrs[attr_name]: + print(f" Attribute '{attr_name}' differs for '{var_name}':") + print(f" {ds1_name}: {var1.attrs[attr_name]}") + print(f" {ds2_name}: {var2.attrs[attr_name]}") + + # Compare dimensions used by the variable + if var1.dims == var2.dims: + print(f" Dimensions used are identical: {var1.dims}") + else: + print(" Dimensions used differ:") + print(f" {ds1_name}: {var1.dims}") + print(f" {ds2_name}: {var2.dims}") + print("=" * 30 + " End of Comparison " + "=" * 30) From cf58b8c166944a423aacfebe5b673b846e671615 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Wed, 28 May 2025 17:48:49 +0200 Subject: [PATCH 6/6] typo --- parcels/_datasets/structured/circulation_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parcels/_datasets/structured/circulation_model.py b/parcels/_datasets/structured/circulation_model.py index 9f2ab93322..aa829acb15 100644 --- a/parcels/_datasets/structured/circulation_model.py +++ b/parcels/_datasets/structured/circulation_model.py @@ -15,7 +15,7 @@ def _nemo_data() -> xr.Dataset: https://www.mercator-ocean.eu/en/solutions-expertise/accessing-digital-data/product-details/?offer=4217979b-2662-329a-907c-602fdc69c3a3&system=d35404e4-40d3-59d6-3608-581c9495d86a """ # Using data from lorenz. - # Mesh file: /storage/shared/oceanparcels/input_data/MOi/domain_ORCA0083-N006/domain_ORCA0083-N006/PSY4V3R1_mesh_hgr.nc + # Mesh file: /storage/shared/oceanparcels/input_data/MOi/domain_ORCA0083-N006/SY4V3R1_mesh_hgr.nc # Data files: /storage/shared/oceanparcels/input_data/MOi/GLO12/psy4v3r1-daily_{U,V}_*.nc # used modulefile for reference: "/storage/shared/oceanparcels/input_data/MOi/psy4v3r1/create_fieldset2D.py"