Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions parcels/_datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

This subpackage uses xarray to generate *idealised* structured and unstructured hydrodynamical datasets that are compatible with Parcels. The goals are three-fold:

1. To provide users with documentation for the types of datasets they can expect Parcels to work with.
1. To provide users with documentation for the types of datasets they can expect Parcels to work with. When reporting bugs, users can use these datasets to reproduce the bug they're experiencing (allowing developers to quickly troubleshoot the problem).
2. To supply our tutorials with hydrodynamical datasets.
3. To offer developers datasets for use in test cases.

Expand Down Expand Up @@ -36,8 +36,10 @@

This subpackage is broken down into structured and unstructured parts. Each of these have common submodules:

* ``circulation_model`` -> hardcoded datasets with the intention of mimicking datasets from a certain (ocean) circulation model
* ``circulation_model`` -> hardcoded datasets with the intention of mimicking dataset structure from a certain (ocean) circulation model. If you'd like to see Parcel support a new model, please open an issue in our issue tracker.
* exposes a dict ``datasets`` mapping dataset names to xarray datasets
* ``generic`` -> hardcoded datasets that are generic, and not tied to a certain (ocean) circulation model. Instead these focus on the fundamental properties of the dataset
* exposes a dict ``datasets`` mapping dataset names to xarray datasets
* ``generated`` -> functions to generate datasets with varying properties
* ``utils`` -> any utility functions necessary related to either generating or validating datasets

Expand Down
216 changes: 216 additions & 0 deletions parcels/_datasets/structured/circulation_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
import numpy as np
import pandas as pd
import xarray as xr

from . import T, X, Y, Z

__all__ = ["T", "X", "Y", "Z", "datasets"]


def _nemo_data() -> xr.Dataset:
"""Dataset matching level 0 NEMO model output.

Example dataset is based off of data from the MOi GLO12 run.

https://www.mercator-ocean.eu/en/solutions-expertise/accessing-digital-data/product-details/?offer=4217979b-2662-329a-907c-602fdc69c3a3&system=d35404e4-40d3-59d6-3608-581c9495d86a
"""
# Using data from lorenz.
# Mesh file: /storage/shared/oceanparcels/input_data/MOi/domain_ORCA0083-N006/SY4V3R1_mesh_hgr.nc
# Data files: /storage/shared/oceanparcels/input_data/MOi/GLO12/psy4v3r1-daily_{U,V}_*.nc
# used modulefile for reference: "/storage/shared/oceanparcels/input_data/MOi/psy4v3r1/create_fieldset2D.py"

# scp "lorenz:/storage/shared/oceanparcels/input_data/MOi/GLO12/psy4v3r1-daily_{U,V,W,T}_2007-01-0{1,2}.nc" data-v4/nemo/field

time_counter_data = pd.date_range(start="2007-01-01T12:00:00", periods=T, freq="D")
y_data = np.arange(1, Y + 1)
x_data = np.arange(1, X + 1)
deptht_data = np.linspace(0.494, 5.728e03, Z)

# Create the dataset
return xr.Dataset(
data_vars={
"sotkeavmu1": (
("time_counter", "y", "x"),
np.random.rand(T, Y, X).astype(np.float64),
{
"units": "m2 s-1",
"valid_min": np.float64(0.0),
"valid_max": np.float64(100.0),
"long_name": "Vertical Eddy Viscosity U 1m",
"standard_name": "ocean_vertical_eddy_viscosity_u_1m",
"short_name": "sotkeavmu1",
"online_operation": "N/A",
"interval_operation": np.int64(86400),
"interval_write": np.int64(86400),
"associate": "time_counter nav_lat nav_lon",
},
),
"sotkeavmu15": (
("time_counter", "y", "x"),
np.random.rand(T, Y, X).astype(np.float64),
{
"units": "m2 s-1",
"valid_min": np.float64(0.0),
"valid_max": np.float64(100.0),
"long_name": "Vertical Eddy Viscosity U 15m",
"standard_name": "ocean_vertical_eddy_viscosity_u_15m",
"short_name": "sotkeavmu15",
"online_operation": "N/A",
"interval_operation": np.int64(86400),
"interval_write": np.int64(86400),
"associate": "time_counter nav_lat nav_lon",
},
),
"sotkeavmu30": (
("time_counter", "y", "x"),
np.random.rand(T, Y, X).astype(np.float64),
{
"units": "m2 s-1",
"valid_min": np.float64(0.0),
"valid_max": np.float64(100.0),
"long_name": "Vertical Eddy Viscosity U 30m",
"standard_name": "ocean_vertical_eddy_viscosity_u_30m",
"short_name": "sotkeavmu30",
"online_operation": "N/A",
"interval_operation": np.int64(86400),
"interval_write": np.int64(86400),
"associate": "time_counter nav_lat nav_lon",
},
),
"sotkeavmu50": (
("time_counter", "y", "x"),
np.random.rand(T, Y, X).astype(np.float64),
{
"units": "m2 s-1",
"valid_min": np.float64(0.0),
"valid_max": np.float64(100.0),
"long_name": "Vertical Eddy Viscosity U 50m",
"standard_name": "ocean_vertical_eddy_viscosity_u_50m",
"short_name": "sotkeavmu50",
"online_operation": "N/A",
"interval_operation": np.int64(86400),
"interval_write": np.int64(86400),
"associate": "time_counter nav_lat nav_lon",
},
),
"vozocrtx": (
("time_counter", "deptht", "y", "x"),
np.random.rand(T, Z, Y, X).astype(np.float64),
{
"units": "m s-1",
"valid_min": np.float64(-10.0),
"valid_max": np.float64(10.0),
"long_name": "Zonal velocity",
"standard_name": "sea_water_x_velocity",
"short_name": "vozocrtx",
"online_operation": "N/A",
"interval_operation": np.int64(86400),
"interval_write": np.int64(86400),
"associate": "time_counter deptht nav_lat nav_lon",
},
),
},
coords={
"nav_lon": (
("y", "x"),
np.random.rand(Y, X).astype(np.float32),
{
"units": "degrees_east",
"valid_min": np.float32(-179.99984754002182),
"valid_max": np.float32(179.999842386314),
"long_name": "Longitude",
"nav_model": "Default grid",
"standard_name": "longitude",
},
),
"nav_lat": (
("y", "x"),
np.random.rand(Y, X).astype(np.float32),
{
"units": "degrees_north",
"valid_min": np.float32(-77.0104751586914),
"valid_max": np.float32(89.9591064453125),
"long_name": "Latitude",
"nav_model": "Default grid",
"standard_name": "latitude",
},
),
"x": (("x",), x_data, {"standard_name": "projection_x_coordinate", "axis": "X", "units": "1"}),
"y": (("y",), y_data, {"standard_name": "projection_y_coordinate", "axis": "Y", "units": "1"}),
"time_counter": (
("time_counter",),
time_counter_data,
{"standard_name": "time", "long_name": "Time axis", "axis": "T", "time_origin": "1950-JAN-01 00:00:00"},
),
"deptht": (
("deptht",),
deptht_data,
{
"units": "m",
"positive": "down",
"valid_min": np.float64(0.4940253794193268),
"valid_max": np.float64(5727.91650390625),
"long_name": "Vertical T levels",
"standard_name": "depth",
"axis": "Z",
},
),
},
attrs={
"Conventions": "CF-1.0",
"file_name": "ORCA12_LIM-T00_y2021m09d27_gridU.nc",
"institution": "MERCATOR OCEAN",
"source": "NEMO",
"TimeStamp": "2021-OCT-03 18:27:01 GMT-0000",
"references": "http://www.mercator-ocean.eu",
},
)


def _hycom_data() -> xr.Dataset:
"""Dataset matching level 0 HYCOM model output.

Example dataset is based off of data from the GOFS 3.1: 41-layer HYCOM + NCODA Global 1/12° Analysis.

https://www.hycom.org/dataserver/gofs-3pt1/analysis
"""
...


def _mitgcm_data() -> xr.Dataset:
"""Dataset matching level 0 MITgcm model output.

Example dataset is based on the Pre-SWOT Level-4 Hourly MITgcm LLC4320 simulation,
which provides high-resolution (1/48°) global ocean state estimates with hourly outputs.

https://podaac.jpl.nasa.gov/dataset/MITgcm_LLC4320_Pre-SWOT_JPL_L4_ACC_SMST_v1.0
"""
...


def _pop_data() -> xr.Dataset:
"""Dataset matching level 0 POP model output.

TODO: Identify a suitable public dataset to mimick.
"""
...


def _ecco_data() -> xr.Dataset:
"""Dataset matching level 0 ECCO model output.

TODO: Identify a suitable public dataset to mimick.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just skimmed through it

When the MITgcm saves diagnostic...

does this mean that ECCO is a model built on top of MITgcm?

I'll take a more thorough look later

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this mean that ECCO is a model built on top of MITgcm?

Yep, that's correct. ECCO is an output of MITgcm, just like our MOi or Copernicus datasets are output datasets of NEMO. The interesting thing about ECCO, though, it that is has the tiled structure: see llc90 example at https://ecco-group.org/datasets.htm


"""
...


def _croco_data() -> xr.Dataset:
"""Dataset matching level 0 CROCO model output.

TODO: Identify a suitable public dataset to mimick.
"""
...


datasets = {}
2 changes: 0 additions & 2 deletions parcels/_datasets/structured/generic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
"""Datasets focussing on grid geometry"""

import numpy as np
import xarray as xr

Expand Down
92 changes: 92 additions & 0 deletions parcels/_datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,95 @@ def dataset_repr_diff(ds1: xr.Dataset, ds2: xr.Dataset) -> str:

diff = difflib.ndiff(repr1.splitlines(keepends=True), repr2.splitlines(keepends=True))
return "".join(diff)


def compare_datasets(ds1, ds2, ds1_name="Dataset 1", ds2_name="Dataset 2"):
print(f"Comparing {ds1_name} and {ds2_name}\n")

# Compare dataset attributes
print("Dataset Attributes Comparison:")
if ds1.attrs == ds2.attrs:
print(" Dataset attributes are identical.")
else:
print(" Dataset attributes differ.")
for attr_name in set(ds1.attrs.keys()) | set(ds2.attrs.keys()):
if attr_name not in ds1.attrs:
print(f" Attribute '{attr_name}' only in {ds2_name}")
elif attr_name not in ds2.attrs:
print(f" Attribute '{attr_name}' only in {ds1_name}")
elif ds1.attrs[attr_name] != ds2.attrs[attr_name]:
print(f" Attribute '{attr_name}' differs:")
print(f" {ds1_name}: {ds1.attrs[attr_name]}")
print(f" {ds2_name}: {ds2.attrs[attr_name]}")
print("-" * 30)

# Compare dimensions
print("Dimensions Comparison:")
ds1_dims = set(ds1.dims)
ds2_dims = set(ds2.dims)
if ds1_dims == ds2_dims:
print(" Dimension names are identical.")
else:
print(" Dimension names differ:")
print(f" {ds1_name} dims: {sorted(list(ds1_dims))}")
print(f" {ds2_name} dims: {sorted(list(ds2_dims))}")

# For common dimensions, compare order (implicit by comparing coordinate values for sortedness)
# and size (though size is parameterized and expected to be different)
for dim_name in ds1_dims.intersection(ds2_dims):
print(f" Dimension '{dim_name}':")
# Sizes will differ due to DIM_SIZE, so we don't strictly compare them.
print(f" {ds1_name} size: {ds1.dims[dim_name]}, {ds2_name} size: {ds2.dims[dim_name]}")
# Check if coordinates associated with dimensions are sorted (increasing)
if dim_name in ds1.coords and dim_name in ds2.coords:
is_ds1_sorted = np.all(np.diff(ds1[dim_name].values) >= 0) if len(ds1[dim_name].values) > 1 else True
is_ds2_sorted = np.all(np.diff(ds2[dim_name].values) >= 0) if len(ds2[dim_name].values) > 1 else True
if is_ds1_sorted == is_ds2_sorted:
print(f" Order for '{dim_name}' is consistent (both sorted: {is_ds1_sorted})")
else:
print(
f" Order for '{dim_name}' differs: {ds1_name} sorted: {is_ds1_sorted}, {ds2_name} sorted: {is_ds2_sorted}"
)
print("-" * 30)

# Compare variables (name, attributes, dimensions used)
print("Variables Comparison:")
ds1_vars = set(ds1.variables.keys())
ds2_vars = set(ds2.variables.keys())

if ds1_vars == ds2_vars:
print(" Variable names are identical.")
else:
print(" Variable names differ:")
print(f" {ds1_name} vars: {sorted(list(ds1_vars - ds2_vars))}")
print(f" {ds2_name} vars: {sorted(list(ds2_vars - ds1_vars))}")
print(f" Common vars: {sorted(list(ds1_vars.intersection(ds2_vars)))}")

for var_name in ds1_vars.intersection(ds2_vars):
print(f" Variable '{var_name}':")
var1 = ds1[var_name]
var2 = ds2[var_name]

# Compare attributes
if var1.attrs == var2.attrs:
print(" Attributes are identical.")
else:
print(" Attributes differ.")
for attr_name in set(var1.attrs.keys()) | set(var2.attrs.keys()):
if attr_name not in var1.attrs:
print(f" Attribute '{attr_name}' only in {ds2_name}'s '{var_name}'")
elif attr_name not in var2.attrs:
print(f" Attribute '{attr_name}' only in {ds1_name}'s '{var_name}'")
elif var1.attrs[attr_name] != var2.attrs[attr_name]:
print(f" Attribute '{attr_name}' differs for '{var_name}':")
print(f" {ds1_name}: {var1.attrs[attr_name]}")
print(f" {ds2_name}: {var2.attrs[attr_name]}")

# Compare dimensions used by the variable
if var1.dims == var2.dims:
print(f" Dimensions used are identical: {var1.dims}")
else:
print(" Dimensions used differ:")
print(f" {ds1_name}: {var1.dims}")
print(f" {ds2_name}: {var2.dims}")
print("=" * 30 + " End of Comparison " + "=" * 30)
Loading