diff --git a/OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py b/OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py new file mode 100755 index 00000000..fed4148b --- /dev/null +++ b/OceanDataStore/data/ARMOR3D/create_ARMOR3D_P1M-m_monthly_climatology.py @@ -0,0 +1,135 @@ +# ========================================================= +# create_ARMOR3D_P1M-m_climatology.py +# +# Script to create ARMOR3D REP monthly climatologies for +# climate normals (1971-2000, 1981-2010, 1991-2020) and +# write to local netCDF files. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import numpy as np +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging +from OceanDataStore.data.utils import ( + compute_dx, + compute_dy, + compute_cell_area, + compute_cell_thickness, + compute_land_sea_mask, +) + +logger = logging.getLogger(__name__) + +def main(): + # ========== Initialize Logging and Print Banner ========== # + initialise_logging() + + # ========== Prepare Data ========== # + # Open complete ARMOR3D REP monthly climatology dataset: + filepath = "/dssgfs01/scratch/otooth/npd_data/observations/ARMOR3D/armor-3d_rep_monthly_NA_*.zarr" + ds = xr.open_mfdataset(filepath, compat="no_conflicts", data_vars="all", engine="zarr") + logging.info("-> Completed: Opened ARMOR-3D REP monthly climatology dataset from Zarr stores.") + + # Rename variables to standard names: + ds = ds.rename({"to": "thetao", + "so": "so", + }) + + # Update global attributes: + ds.attrs.clear() + + ds = ds.assign_attrs({ + "Conventions": "CF-1.0", + "title": "Multi Observation Global Ocean 3D Temperature Salinity Height Geostrophic Current and MLD.", + "description": "Multi Observation Global Ocean ARMOR3D multi-year reprocessed temperature salinity, sea surface height, geostrophic current and mixed layer depth climatology on 1/8 degree regular grid and 50 depth levels.", + "source": "Numerical models: Multiple Linear Regression, Optimal Interpolation. In-situ observations: Copernicus In Situ TAC (including Argo, XBT, CTD and moorings) Copernicus Sea Level TAC, CNES-CLS22 Mean Dynamic Topography, OSTIA Sea Surface Temperature Analysis, Copernicus MOB TAC (Sea Surface Salinity), and World Ocean Atlas 2018 (WOA18).", + "dataset_type": "observation", + "product_type": "climatology", + "product_version": "2.0", + "institution": "Copernicus Marine Service, Mercator Ocean International, France", + "citation": "Multi Observation Global Ocean 3D Temperature Salinity Height Geostrophic Current and MLD. E.U. Copernicus Marine Service Information (CMEMS). Marine Data Store (MDS). DOI: 10.48670/moi-00052 (Accessed on 21 04 2026).", + "references": "Guinehut S., A.-L. Dhomps, G. Larnicol and P.-Y. Le Traon, 2012: High resolution 3D temperature and salinity fields derived from in situ and satellite observations. Ocean Sci., 8(5):845-857. Mulet, S., M.-H. Rio, A. Mignot, S. Guinehut and R. Morrow, 2012: A new estimate of the global 3D geostrophic ocean circulation based on satellite data and in-situ measurements. Deep Sea Research Part II : Topical Studies in Oceanography, 77-80(0):70-81.", + "acknowledgement": "Generated using E.U. Copernicus Marine Service Information; https://doi.org/10.48670/moi-00052.", + "license": "ARMOR3D data were obtained from https://doi.org/10.48670/moi-00052, and are provided under the Copernicus Marine Environment Monitoring Service Service Level Agreement (SLA) https://marine.copernicus.eu/user-corner/service-commitments-and-licence?pk_vid=42ac3e352be888641780994034c3bb6e", + "doi": "10.48670/moi-00052", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "0.125 degree", + "vertical_grid_type": "z", + "vertical_grid_coordinate": "depth", + "vertical_grid_levels": 50, + "aggregation": "mean", + "aggregation_frequency": "monthly", + "status": "completed", + "update_frequency": "None", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + logging.info("-> Completed: Updated ARMOR3D CF-metadata.") + + # -- Calculate climate normal monthly climatologies -- # + output_dir = "/dssgfs01/scratch/otooth/npd_data/observations/ARMOR3D/climatology/" + start_years = [1971, 1981, 1991] + end_years = [2000, 2010, 2020] + + for start_year, end_year in zip(start_years, end_years): + logging.info(f"In Progress: Calculating monthly climatology for {start_year}-{end_year} climate normal period...") + # Calculate monthly climatology for the specified period: + ds_climatology = ds.sel(time=slice(f'{start_year}-01', f'{end_year}-12')).groupby('time.month').mean() + + # Add ancillary variables: + ds_climatology['mask'] = compute_land_sea_mask(ds['thetao'].isel(time=0, depth=0)) + ds_climatology['dx'] = compute_dx(ds) + ds_climatology['dy'] = compute_dy(ds) + ds_climatology['cell_area'] = compute_cell_area(ds) + # Custom ancillary variables: + ds_climatology['cell_thickness'] = compute_cell_thickness(ds_climatology) + ds_climatology['cell_volume'] = ds_climatology['cell_thickness'] * ds_climatology['cell_area'] + + # Update attributes for custom ancillary variables: + ds_climatology['cell_volume'].attrs.update({ + 'long_name': "Grid-Cell Volume", + 'standard_name': "cell_volume", + 'units': "m3", + }) + + # Update time bounds to reflect climatological period: + ds_climatology['time_bnds'] = xr.DataArray(data=np.zeros((12, 2), dtype='datetime64[M]'), dims=('month', 'bnds')) + ds_climatology['time_bnds'][:, 0] = np.arange(f'{start_year}-01', f'{start_year+1}-01', dtype='datetime64[M]') + ds_climatology['time_bnds'][:, 1] = np.arange(f'{end_year}-01', f'{end_year+1}-01', dtype='datetime64[M]') + logging.info(f"-> Completed: Calculated monthly climatology for {start_year}-{end_year} climate normal period.") + + # Update title attribute to reflect climatological period: + ds_climatology.attrs['title'] = f"Multi Observation Global Ocean 3D Temperature Salinity Height Geostrophic Current and MLD monthly climatology ({start_year}-{end_year})." + ds_climatology.attrs['start_datetime'] = f"{start_year}-01-01" + ds_climatology.attrs['end_datetime'] = f"{end_year}-12-31" + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="lz4", clevel=5, shuffle=zarr.codecs.BloscShuffle.shuffle) + + # Infer variable chunk sizes (preserve source chunking): + for var in list(ds_climatology.data_vars) + list(ds_climatology.coords): + source_chunks = ds_climatology[var].encoding.get('chunks', None) + ds_climatology[var].encoding.clear() + ds_climatology[var].encoding['compressors'] = [blosccodec] + # Cast float64 back to float32 to match source precision: + if ds_climatology[var].dtype == np.float64: + ds_climatology[var].encoding['dtype'] = 'float32' + if source_chunks is not None: + ds_climatology[var].encoding['chunks'] = source_chunks + + # Write monthly climatology to Zarr: + output_filepath = f"{output_dir}ARMOR3D_global_{start_year}_{end_year}_monthly_climatology.zarr" + ds_climatology.to_zarr(output_filepath, zarr_format=3, mode="w") + logging.info(f"-> Completed: Saved monthly climatology for {start_year}-{end_year} climate normal period to {output_filepath}.") + + # -- Close ARMOR3D analysis datasets -- # + ds_climatology.close() + ds.close() + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py b/OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py new file mode 100755 index 00000000..1ca6ab6d --- /dev/null +++ b/OceanDataStore/data/ARMOR3D/download_ARMOR3D_0.125def_P1M-m_1993_2024.py @@ -0,0 +1,33 @@ +""" +Download global domain of ARMOR-3D dataset from Copernicus Marine. +This script downloads the monthly reprocessed files from 1993 to 2024. + +Created By: Ollie Tooth +Created On: 2026-04-21 +Contact: oliver.tooth@noc.ac.uk + +Note: This download script should be run using the env_oceandatastore environment. +""" + +# Import the Copernicus Marine API toolbox: +import copernicusmarine + +# Define filepath to credentials: +credentials_fpath = ".../copernicusmarine-credentials" +# Define output directory: +out_fdir = "/dssgfs01/scratch/otooth/npd_data/observations/ARMOR3D/" + +# Download the ARMOR3D analysis dataset for North Atlantic subdomain: +for year in range(2001, 2025): + print(f"In Progress: Downloading ARMOR3D analysis dataset for: {year}...") + copernicusmarine.subset( + dataset_id="cmems_obs-mob_glo_phy_my_0.125deg_P1M-m", + variables=["mlotst", "so", "to", "ugo", "vgo", "zo"], + start_datetime=f"{year}-01-01T00:00:00", + end_datetime=f"{year}-12-01T00:00:00", + credentials_file=credentials_fpath, + output_directory=out_fdir, + file_format="zarr", + output_filename=f"armor-3d_rep_monthly_NA_{year}.zarr", + ) + print(f"Completed: downloading ARMOR3D analysis dataset for: {year}.") diff --git a/OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm b/OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm new file mode 100755 index 00000000..d2639819 --- /dev/null +++ b/OceanDataStore/data/ARMOR3D/run_create_ARMOR3D_P1M-m_monthly_climatology.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=armor3d_monthly_climatology +#SBATCH --partition=compute +#SBATCH --time=04:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_create_ARMOR3D_P1M-m_monthly_climatology.slurm +# +# Description: SLURM script to create ARMOR3D REP monthly +# climatology datasets. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-09 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source .../miniforge3/bin/activate +conda activate env_ods + +# -- Create ARMOR3D REP monthly climatology datasets -- # +echo "In Progress: Creating ARMOR3D REP monthly climatology datasets..." + +python3 create_ARMOR3D_P1M-m_monthly_climatology.py + +echo "Completed: Created ARMOR3D REP monthly climatology datasets." diff --git a/OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm b/OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm new file mode 100755 index 00000000..5c0af2f7 --- /dev/null +++ b/OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_climatology_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=send_armor3d_my_climatology +#SBATCH --partition=compute +#SBATCH --time=04:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_send_ARMOR3D_P1M-m_climatology_to_os.slurm +# +# Description: SLURM script to send the ARMOR3D P1M-m monthly +# climatology datasets to Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-09 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source .../miniforge3/bin/activate +conda activate env_ods + +# -- Send ARMOR3D P1M-m monthly climatology datasets to JASMIN OS -- # +echo "In Progress: Sending ARMOR3D P1M-m monthly climatology to Icechunk..." + +python3 send_ARMOR3D_P1m-m_monthly_climatology_to_os.py + +echo "Completed: Sent ARMOR3D P1M-m monthly climatology to Icechunk." diff --git a/OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm b/OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm new file mode 100755 index 00000000..d4aff9cd --- /dev/null +++ b/OceanDataStore/data/ARMOR3D/run_send_ARMOR3D_P1M-m_monthly_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=armor3d_my_monthly +#SBATCH --partition=compute +#SBATCH --time=04:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_send_ARMOR3D_P1M-m_monthly_to_os.slurm +# +# Description: SLURM script to send the ARMOR3D P1M-m +# monthly dataset to Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-05-29 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source .../miniforge3/bin/activate +conda activate env_ods + +# -- Send ARMOR3D P1M-m monthly to JASMIN OS -- # +echo "In Progress: Sending ARMOR3D P1M-m monthly to Icechunk..." + +python3 send_ARMOR3D_P1m-m_monthly_to_os.py + +echo "Completed: Sent ARMOR3D P1M-m monthly to Icechunk." diff --git a/OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm b/OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm new file mode 100755 index 00000000..f95c3897 --- /dev/null +++ b/OceanDataStore/data/ARMOR3D/run_update_ARMOR3D_P1m-m_monthly_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=armor3d_monthly_update +#SBATCH --partition=compute +#SBATCH --time=01:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_update_ARMOR3D_P1m-m_monthly_to_os.slurm +# +# Description: SLURM script to update the ARMOR3D P1m-m +# monthly dataset in Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-05-29 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source .../miniforge3/bin/activate +conda activate env_ods + +# -- Update ARMOR3D P1m-m monthly in JASMIN OS -- # +echo "In Progress: Updating ARMOR3D P1m-m monthly in Icechunk..." + +python3 update_ARMOR3D_P1m-m_monthly_to_os.py + +echo "Completed: Updated ARMOR3D P1m-m monthly in Icechunk." diff --git a/OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py b/OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py new file mode 100755 index 00000000..e8b884be --- /dev/null +++ b/OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_climatology_to_os.py @@ -0,0 +1,99 @@ +# ========================================================= +# send_ARMOR3D_P1m-m_monthly_climatology_to_os.py +# +# Script to write ARMOR3D monthly climatologies to +# Icechunk repositories in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import xarray as xr +import zarr + +from OceanDataStore.cli import send_to_icechunk, initialise_logging + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Prepare Data ========== # + # Open ARMOR3D monthly climatology datasets: + filepaths = [ + "/dssgfs01/scratch/otooth/npd_data/observations/ARMOR3D/climatology/ARMOR3D_global_1971_2000_monthly_climatology.zarr", + "/dssgfs01/scratch/otooth/npd_data/observations/ARMOR3D/climatology/ARMOR3D_global_1981_2010_monthly_climatology.zarr", + "/dssgfs01/scratch/otooth/npd_data/observations/ARMOR3D/climatology/ARMOR3D_global_1991_2020_monthly_climatology.zarr" + ] + + # Define start & end years of climatology periods: + start_years = [1971, 1981, 1991] + end_years = [2000, 2010, 2020] + + # ========== Send to Icechunk Repository ========== # + bucket = "armor3d" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + variable_commits = True + config_kwargs = { + "temporary_directory":".../OceanDataStore/OceanDataStore/data/ARMOR3D/", + "local_directory":".../OceanDataStore/OceanDataStore/data/ARMOR3D/" + } + cluster_kwargs = { + "n_workers" : 30, + "threads_per_worker" : 1, + "memory_limit":"3GB" + } + + for filepath, start_yr, end_yr in zip(filepaths, start_years, end_years): + # Open ARMOR3D monthly climatology dataset: + ds = xr.open_dataset(filepath, engine='zarr') + + # Optimise chunk sizes for spatial analysis: + for var in ds.data_vars: + if ds[var].ndim == 4: + ds[var] = ds[var].chunk({'month': 1, 'depth': 3, 'latitude': 689, 'longitude': 1440}) + ds[var].encoding['chunks'] = (1, 3, 689, 1440) + elif ds[var].ndim == 3: + if "month" in ds[var].dims: + ds[var] = ds[var].chunk({'month': 1, 'latitude': 1378, 'longitude': 2880}) + ds[var].encoding['chunks'] = (1, 1378, 2880) + elif "depth" in ds[var].dims: + ds[var] = ds[var].chunk({'depth': 10, 'latitude': 1378, 'longitude': 2880}) + ds[var].encoding['chunks'] = (10, 1378, 2880) + elif (ds[var].ndim == 2): + if "latitude" in ds[var].dims and "longitude" in ds[var].dims: + ds[var] = ds[var].chunk({'latitude': 1378, 'longitude': 2880}) + ds[var].encoding['chunks'] = (1378, 2880) + elif ds[var].ndim == 1: + ds[var] = ds[var].chunk({'depth': 50}) + ds[var].encoding['chunks'] = (50,) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=5, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + # Define prefix and commit message based on climatology period: + prefix = f"armor3d_global_my_{start_yr}_{end_yr}_monthly_climatology" + commit_message = f"Added ARMOR3D Global monthly climatology ({start_yr}-{end_yr})." + + send_to_icechunk( + file=ds.drop_vars("time"), + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + exists=exists, + append_dim='month', + branch=branch, + commit_message=commit_message, + variable_commits=variable_commits, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py b/OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py new file mode 100755 index 00000000..a044c26c --- /dev/null +++ b/OceanDataStore/data/ARMOR3D/send_ARMOR3D_P1m-m_monthly_to_os.py @@ -0,0 +1,147 @@ +# ========================================================= +# send_EN4.2.2_analyses_g10_to_os.py +# +# Script to write EN.4.2.2 analyses to Icechunk repository +# in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, send_to_icechunk +from OceanDataStore.data.utils import ( + compute_dx, + compute_dy, + compute_cell_area, + compute_cell_thickness, + compute_land_sea_mask, +) + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Prepare Data ========== # + # Open complete ARMOR3D REP monthly climatology dataset: + filepath = "/dssgfs01/scratch/otooth/npd_data/observations/ARMOR3D/armor-3d_rep_monthly_NA_*.zarr" + ds = xr.open_mfdataset(filepath, compat="no_conflicts", data_vars="all", engine="zarr") + logging.info("-> Completed: Opened ARMOR-3D REP monthly climatology dataset from Zarr stores.") + + # Rename variables to standard names: + ds = ds.rename({"to": "thetao", + "so": "so", + }) + + # Update global attributes: + ds.attrs.clear() + + ds = ds.assign_attrs({ + "Conventions": "CF-1.0", + "title": "Multi Observation Global Ocean 3D Temperature Salinity Height Geostrophic Current and MLD.", + "description": "Multi Observation Global Ocean ARMOR3D multi-year reprocessed temperature salinity, sea surface height, geostrophic current and mixed layer depth monthly timeseries on 1/8 degree regular grid and 50 depth levels.", + "source": "Numerical models: Multiple Linear Regression, Optimal Interpolation. In-situ observations: Copernicus In Situ TAC (including Argo, XBT, CTD and moorings) Copernicus Sea Level TAC, CNES-CLS22 Mean Dynamic Topography, OSTIA Sea Surface Temperature Analysis, Copernicus MOB TAC (Sea Surface Salinity), and World Ocean Atlas 2018 (WOA18).", + "dataset_type": "observation", + "product_type": "timeseries", + "product_version": "2.0", + "institution": "Copernicus Marine Service, Mercator Ocean International, France", + "citation": "Multi Observation Global Ocean 3D Temperature Salinity Height Geostrophic Current and MLD. E.U. Copernicus Marine Service Information (CMEMS). Marine Data Store (MDS). DOI: 10.48670/moi-00052 (Accessed on 21 04 2026).", + "references": "Guinehut S., A.-L. Dhomps, G. Larnicol and P.-Y. Le Traon, 2012: High resolution 3D temperature and salinity fields derived from in situ and satellite observations. Ocean Sci., 8(5):845-857. Mulet, S., M.-H. Rio, A. Mignot, S. Guinehut and R. Morrow, 2012: A new estimate of the global 3D geostrophic ocean circulation based on satellite data and in-situ measurements. Deep Sea Research Part II : Topical Studies in Oceanography, 77-80(0):70-81.", + "acknowledgement": "Generated using E.U. Copernicus Marine Service Information; https://doi.org/10.48670/moi-00052.", + "license": "ARMOR3D data were obtained from https://doi.org/10.48670/moi-00052, and are provided under the Copernicus Marine Environment Monitoring Service Service Level Agreement (SLA) https://marine.copernicus.eu/user-corner/service-commitments-and-licence?pk_vid=42ac3e352be888641780994034c3bb6e", + "doi": "10.48670/moi-00052", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "0.125 degree", + "vertical_grid_type": "z", + "vertical_grid_coordinate": "depth", + "vertical_grid_levels": 50, + "aggregation": "mean", + "aggregation_frequency": "monthly", + "status": "ongoing", + "update_frequency": "quarterly", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Add ancillary variables: + ds['mask'] = compute_land_sea_mask(ds['thetao'].isel(time=0, depth=0)) + ds['dx'] = compute_dx(ds) + ds['dy'] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + # Custom ancillary variables: + ds['cell_thickness'] = compute_cell_thickness(ds) + ds['cell_volume'] = ds['cell_thickness'] * ds['cell_area'] + + # Update attributes for custom ancillary variables: + ds['cell_volume'].attrs.update({ + 'long_name': "Grid-Cell Volume", + 'standard_name': "cell_volume", + 'units': "m3", + }) + + # ========== Send to Icechunk Repository ========== # + bucket = "armor3d" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + variable_commits = True + config_kwargs = { + "temporary_directory":".../OceanDataStore/OceanDataStore/data/ARMOR3D/", + "local_directory":".../OceanDataStore/OceanDataStore/data/ARMOR3D/" + } + cluster_kwargs = { + "n_workers" : 25, + "threads_per_worker" : 1, + "memory_limit":"4GB" + } + + # Optimise chunk sizes for spatial analysis: + for var in ds.data_vars: + if ds[var].ndim == 4: + ds[var] = ds[var].chunk({'time': 1, 'depth': 3, 'latitude': 689, 'longitude': 1440}) + ds[var].encoding['chunks'] = (1, 3, 689, 1440) + elif ds[var].ndim == 3: + if "time" in ds[var].dims: + ds[var] = ds[var].chunk({'time': 1, 'latitude': 1378, 'longitude': 2880}) + ds[var].encoding['chunks'] = (1, 1378, 2880) + elif "depth" in ds[var].dims: + ds[var] = ds[var].chunk({'depth': 10, 'latitude': 1378, 'longitude': 2880}) + ds[var].encoding['chunks'] = (10, 1378, 2880) + elif (ds[var].ndim == 2): + if "latitude" in ds[var].dims and "longitude" in ds[var].dims: + ds[var] = ds[var].chunk({'latitude': 1378, 'longitude': 2880}) + ds[var].encoding['chunks'] = (1378, 2880) + elif ds[var].ndim == 1: + ds[var] = ds[var].chunk({'depth': 50}) + ds[var].encoding['chunks'] = (50,) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=5, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + # Define prefix and commit message based on period: + prefix = "armor3d_global_my_monthly" + commit_message = "Added ARMOR3D Global Monthly (1993-1999)." + + send_to_icechunk( + file=ds.sel(time=slice("1993-01-01", "1999-12-31")), + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + exists=exists, + append_dim='time', + branch=branch, + commit_message=commit_message, + variable_commits=variable_commits, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py b/OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py new file mode 100755 index 00000000..3d500270 --- /dev/null +++ b/OceanDataStore/data/ARMOR3D/update_ARMOR3D_P1m-m_monthly_to_os.py @@ -0,0 +1,143 @@ +# ========================================================= +# update_EN4.2.2_analyses_g10_to_os.py +# +# Script to update EN.4.2.2 analyses in Icechunk repository +# in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, update_icechunk +from OceanDataStore.data.utils import ( + compute_dx, + compute_dy, + compute_cell_area, + compute_cell_thickness, + compute_land_sea_mask, +) + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Prepare Data ========== # + # Open complete ARMOR3D REP monthly climatology dataset: + filepath = "/dssgfs01/scratch/otooth/npd_data/observations/ARMOR3D/armor-3d_rep_monthly_NA_*.zarr" + ds = xr.open_mfdataset(filepath, compat="no_conflicts", data_vars="all", engine="zarr") + logging.info("-> Completed: Opened ARMOR-3D REP monthly climatology dataset from Zarr stores.") + + # Rename variables to standard names: + ds = ds.rename({"to": "thetao", + "so": "so", + }) + + # Update global attributes: + ds.attrs.clear() + + ds = ds.assign_attrs({ + "Conventions": "CF-1.0", + "title": "Multi Observation Global Ocean 3D Temperature Salinity Height Geostrophic Current and MLD.", + "description": "Multi Observation Global Ocean ARMOR3D multi-year reprocessed temperature salinity, sea surface height, geostrophic current and mixed layer depth monthly timeseries on 1/8 degree regular grid and 50 depth levels.", + "source": "Numerical models: Multiple Linear Regression, Optimal Interpolation. In-situ observations: Copernicus In Situ TAC (including Argo, XBT, CTD and moorings) Copernicus Sea Level TAC, CNES-CLS22 Mean Dynamic Topography, OSTIA Sea Surface Temperature Analysis, Copernicus MOB TAC (Sea Surface Salinity), and World Ocean Atlas 2018 (WOA18).", + "dataset_type": "observation", + "product_type": "timeseries", + "product_version": "2.0", + "institution": "Copernicus Marine Service, Mercator Ocean International, France", + "citation": "Multi Observation Global Ocean 3D Temperature Salinity Height Geostrophic Current and MLD. E.U. Copernicus Marine Service Information (CMEMS). Marine Data Store (MDS). DOI: 10.48670/moi-00052 (Accessed on 21 04 2026).", + "references": "Guinehut S., A.-L. Dhomps, G. Larnicol and P.-Y. Le Traon, 2012: High resolution 3D temperature and salinity fields derived from in situ and satellite observations. Ocean Sci., 8(5):845-857. Mulet, S., M.-H. Rio, A. Mignot, S. Guinehut and R. Morrow, 2012: A new estimate of the global 3D geostrophic ocean circulation based on satellite data and in-situ measurements. Deep Sea Research Part II : Topical Studies in Oceanography, 77-80(0):70-81.", + "acknowledgement": "Generated using E.U. Copernicus Marine Service Information; https://doi.org/10.48670/moi-00052.", + "license": "ARMOR3D data were obtained from https://doi.org/10.48670/moi-00052, and are provided under the Copernicus Marine Environment Monitoring Service Service Level Agreement (SLA) https://marine.copernicus.eu/user-corner/service-commitments-and-licence?pk_vid=42ac3e352be888641780994034c3bb6e", + "doi": "10.48670/moi-00052", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "0.125 degree", + "vertical_grid_type": "z", + "vertical_grid_coordinate": "depth", + "vertical_grid_levels": 50, + "aggregation": "mean", + "aggregation_frequency": "monthly", + "status": "ongoing", + "update_frequency": "quarterly", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Add ancillary variables: + ds['mask'] = compute_land_sea_mask(ds['thetao'].isel(time=0, depth=0)) + ds['dx'] = compute_dx(ds) + ds['dy'] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + # Custom ancillary variables: + ds['cell_thickness'] = compute_cell_thickness(ds) + ds['cell_volume'] = ds['cell_thickness'] * ds['cell_area'] + + # Update attributes for custom ancillary variables: + ds['cell_volume'].attrs.update({ + 'long_name': "Grid-Cell Volume", + 'standard_name': "cell_volume", + 'units': "m3", + }) + + # ========== Send to Icechunk Repository ========== # + bucket = "armor3d" + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + config_kwargs = { + "temporary_directory":".../OceanDataStore/OceanDataStore/data/ARMOR3D/", + "local_directory":".../OceanDataStore/OceanDataStore/data/ARMOR3D/" + } + cluster_kwargs = { + "n_workers" : 25, + "threads_per_worker" : 1, + "memory_limit":"4GB" + } + + # Optimise chunk sizes for spatial analysis: + for var in ds.data_vars: + if ds[var].ndim == 4: + ds[var] = ds[var].chunk({'time': 1, 'depth': 3, 'latitude': 689, 'longitude': 1440}) + ds[var].encoding['chunks'] = (1, 3, 689, 1440) + elif ds[var].ndim == 3: + if "time" in ds[var].dims: + ds[var] = ds[var].chunk({'time': 1, 'latitude': 1378, 'longitude': 2880}) + ds[var].encoding['chunks'] = (1, 1378, 2880) + elif "depth" in ds[var].dims: + ds[var] = ds[var].chunk({'depth': 10, 'latitude': 1378, 'longitude': 2880}) + ds[var].encoding['chunks'] = (10, 1378, 2880) + elif (ds[var].ndim == 2): + if "latitude" in ds[var].dims and "longitude" in ds[var].dims: + ds[var] = ds[var].chunk({'latitude': 1378, 'longitude': 2880}) + ds[var].encoding['chunks'] = (1378, 2880) + elif ds[var].ndim == 1: + ds[var] = ds[var].chunk({'depth': 50}) + ds[var].encoding['chunks'] = (50,) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=5, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + # Define prefix and commit message based on period: + prefix = "armor3d_global_my_monthly" + commit_message = "Added ARMOR3D Global Monthly (2000-2024)." + + update_icechunk( + file=ds.sel(time=slice("2000-01-01", "2024-12-31")), + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + append_dim='time', + branch=branch, + commit_message=commit_message, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py b/OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py new file mode 100755 index 00000000..75e55cc4 --- /dev/null +++ b/OceanDataStore/data/EN.4.2.2/create_EN4.2.2_analysis_g10_climatology.py @@ -0,0 +1,162 @@ +# ========================================================= +# create_EN4.2.2_analysis_g10_climatology.py +# +# Script to create EN.4.2.2 analyses climatologies for +# climate normals (1971-2000, 1981-2010, 1991-2020) and +# write to local netCDF files. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import numpy as np +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging +from OceanDataStore.data.utils import ( + compute_cell_area, + compute_dx, + compute_dy, + compute_land_sea_mask, +) + +logger = logging.getLogger(__name__) + +def main(): + # ========== Initialize Logging and Print Banner ========== # + initialise_logging() + + # ========== Prepare Data ========== # + # Open complete EN.4.2.2 analysis dataset: + filepath = "/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/EN.4.2.2.f.analysis.g10.*.nc" + ds = xr.open_mfdataset(filepath, combine="by_coords", data_vars="all", engine="netcdf4") + logging.info("-> Completed: Opened EN.4.2.2 analysis dataset from netCDF files.") + + # Standardise coordinate dimension names: + ds = ds.rename({"lon": "longitude", "lat": "latitude"}) + + # Update longitude coordinates to be in the range [-180, 180]: + ds = ds.assign_coords( + longitude=((ds["longitude"] + 180) % 360) - 180 + ) + ds = ds.sortby("longitude") + + # Rename variables to standard names: + ds = ds.rename({"temperature": "thetao", + "salinity": "so", + "temperature_uncertainty": "thetao_uncertainty", + "salinity_uncertainty": "so_uncertainty", + "temperature_observation_weights": "thetao_obs_weights", + "salinity_observation_weights": "so_obs_weights" + }) + + # Update variable attributes: + ds["thetao"].attrs.update({ + "long_name": "Potential Temperature", + }) + ds["so"].attrs.update({ + "long_name": "Practical Salinity", + }) + ds["thetao_uncertainty"].attrs.update({ + "long_name": "Potential Temperature Error Standard Deviation", + }) + ds["so_uncertainty"].attrs.update({ + "long_name": "Practical Salinity Error Standard Deviation", + }) + ds["thetao_obs_weights"].attrs.update({ + "long_name": "Potential Temperature Observation Weights", + }) + ds["so_obs_weights"].attrs.update({ + "long_name": "Practical Salinity Observation Weights", + }) + + # Update global attributes: + ds.attrs.clear() + + ds = ds.assign_attrs({ + "Conventions": "CF-1.0", + "title": "EN.4.2.2 ocean temperature and salinity monthly climatology.", + "description": "EN.4.2.2 quality controlled ocean temperature and salinity monthly climatology from objective analyses with uncertainty estimates using Gouretski and Reseghetti (2010) corrections.", + "source": "Numerical models: Objective Analysis. In-situ observations: Argo, Arctic Synoptic Basin-wide Oceanography (ASBO) project, Global Temperature and Salinity Profile Programme (GTSPP), and World Ocean Database 2018 (WOD18).", + "dataset_type": "observation", + "product_type": "climatology", + "product_version": "1.0", + "institution": "Met Office, UK", + "citation": "Good, S. A., Martin, M. J., and Rayner, N. A., 2013. EN4: quality controlled ocean temperature and salinity profiles and monthly objective analyses with uncertainty estimates, Journal of Geophysical Research: Oceans, 118, 6704-6716, doi:10.1002/2013JC009067.", + "references": "Gouretski, V., and Reseghetti, F., 2010: On depth and temperature biases in bathythermograph data: development of a new correction scheme based on analysis of a global ocean database. Deep-Sea Research I, 57, 6. doi:10.1016/j.dsr.2010.03.011.", + "acknowledgement": "None", + "license": "EN.4.2.2 data were obtained from https://www.metoffice.gov.uk/hadobs/en4/ and are © Crown Copyright, Met Office, [2026], provided under a Non-Commercial Government Licence http://www.nationalarchives.gov.uk/doc/non-commercial-government-licence/version/2/.", + "doi": "None", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "1 degree", + "vertical_grid_type": "z", + "vertical_grid_coordinate": "depth", + "vertical_grid_levels": 42, + "aggregation": "mean", + "aggregation_frequency": "monthly", + "status": "completed", + "update_frequency": "None", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + logging.info("-> Completed: Updated EN.4.2.2 analysis CF-metadata.") + + # -- Calculate climate normal monthly climatologies -- # + output_dir = "/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/climatology/" + start_years = [1971, 1981, 1991] + end_years = [2000, 2010, 2020] + + for start_year, end_year in zip(start_years, end_years): + logging.info(f"In Progress: Calculating monthly climatology for {start_year}-{end_year} climate normal period...") + # Calculate monthly climatology for the specified period: + ds_climatology = ds.sel(time=slice(f'{start_year}-01', f'{end_year}-12')).groupby('time.month').mean() + + # Add ancillary variables: + ds_climatology['mask'] = compute_land_sea_mask(ds['thetao'].isel(time=0, depth=0)) + ds_climatology['dx'] = compute_dx(ds) + ds_climatology['dy'] = compute_dy(ds) + ds_climatology['cell_area'] = compute_cell_area(ds) + # Custom ancillary variables: + ds_climatology['cell_thickness'] = (ds_climatology['depth_bnds'].isel(bnds=1) - ds_climatology['depth_bnds'].isel(bnds=0)).isel(month=0) + ds_climatology['cell_volume'] = ds_climatology['cell_thickness'] * ds_climatology['cell_area'] + + # Update attributes for custom ancillary variables: + ds_climatology['cell_thickness'].attrs.update({ + 'long_name': "Grid-Cell Thickness", + 'standard_name': "cell_thickness", + 'units': "m", + }) + ds_climatology['cell_volume'].attrs.update({ + 'long_name': "Grid-Cell Volume", + 'standard_name': "cell_volume", + 'units': "m3", + }) + + # Update time bounds to reflect climatological period: + ds_climatology['time_bnds'][:, 0] = np.arange(f'{start_year}-01', f'{start_year+1}-01', dtype='datetime64[M]') + ds_climatology['time_bnds'][:, 1] = np.arange(f'{end_year}-01', f'{end_year+1}-01', dtype='datetime64[M]') + logging.info(f"-> Completed: Calculated monthly climatology for {start_year}-{end_year} climate normal period.") + + # Update title attribute to reflect climatological period: + ds_climatology.attrs['title'] = f"EN.4.2.2 ocean temperature and salinity monthly climatology ({start_year}-{end_year})." + + ds_climatology.attrs['description'] = f"EN.4.2.2: quality controlled ocean temperature and salinity monthly climatology ({start_year}-{end_year}) from objective analyses with uncertainty estimates using Gouretski and Reseghetti (2010) corrections." + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds_climatology.data_vars) + list(ds_climatology.coords): + ds_climatology[var].encoding['compressors'] = [blosccodec] + + # Write monthly climatology to netCDF: + output_filepath = f"{output_dir}EN.4.2.2.f.analysis.g10.{start_year}_{end_year}_monthly_climatology.nc" + ds_climatology.to_netcdf(output_filepath) + logging.info(f"-> Completed: Saved monthly climatology for {start_year}-{end_year} climate normal period to {output_filepath}.") + + # -- Close EN.4.2.2 analysis datasets -- # + ds_climatology.close() + ds.close() + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh b/OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh new file mode 100755 index 00000000..cdf905ac --- /dev/null +++ b/OceanDataStore/data/EN.4.2.2/download_EN4.2.2_analysis_g10_data.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# ---------------------------------------------------------------- +# download_EN4.2.2_analyses_g10_data.sh +# +# Description: Download the EN.4.2.2 analyses.g10 dataset from the +# Met Office Hadley Centre EN.4.2.2 website: +# http://www.metoffice.gov.uk/hadobs/en4 +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-05-27 +# ---------------------------------------------------------------- +set -euo pipefail + +# --- Inputs --- # +output_dir="/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/" + +# -- Defaults -- # +base_url="http://www.metoffice.gov.uk/hadobs/en4/data/en4-2-1" + +# --- Main Script --- # +echo "===================================================" +echo " Downloading EN.4.2.2 Analyses" +echo " v0.1.0" +echo " Oliver J. Tooth, NOC" +echo "===================================================" +echo "In Progress: Downloading EN.4.2.2 analyses dataset..." +# Iterate over years: +for yr in {1990..2026}; do + # Construct URL for current year: + if [ $yr -ge 2021 ]; then + url="$base_url/EN.4.2.2.analyses.g10.${yr}.zip" + else + url="$base_url/EN.4.2.2/EN.4.2.2.analyses.g10.${yr}.zip" + fi + + # Download and unzip file if not in output directory: + nc_files=("${output_dir}/EN.4.2.2.f.analysis.g10.${yr}"*.nc) + filepath="$output_dir/$(basename $url)" + if [ ${#nc_files[@]} -ne 12 ]; then + wget -P $output_dir $url + echo "-> Completed: Downloaded $filepath." + + unzip "$filepath" -d $output_dir + echo "-> Completed: Unzipped $filepath." + else + echo "-> Skipping Download: NetCDF files for ${yr} already exist in $output_dir." + fi +done + +echo "=======================================" diff --git a/OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm b/OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm new file mode 100755 index 00000000..e6cce063 --- /dev/null +++ b/OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=en4.2.2_analysis_g10_climatology +#SBATCH --partition=test +#SBATCH --time=00:20:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_send_EN4.2.2_analysis_g10_climatology_to_os.slurm +# +# Description: SLURM script to send the EN.4.2.2 analysis g10 +# climatology datasets to Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-05-29 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source .../miniforge3/bin/activate +conda activate env_ods + +# -- Send EN.4.2.2.analysis.g10 climatology datasets to JASMIN OS -- # +echo "In Progress: Sending EN.4.2.2.analysis.g10 climatology to Icechunk..." + +python3 send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py + +echo "Completed: Sent EN.4.2.2.analysis.g10 climatology to Icechunk." diff --git a/OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm b/OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm new file mode 100755 index 00000000..82c3ad6f --- /dev/null +++ b/OceanDataStore/data/EN.4.2.2/run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=en4.2.2_analysis_g10_monthly +#SBATCH --partition=compute +#SBATCH --time=01:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_send_EN4.2.2_analysis_g10_monthly_to_os.slurm +# +# Description: SLURM script to send the EN.4.2.2 analysis g10 +# monthly datasets to Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-05-29 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source .../miniforge3/bin/activate +conda activate env_ods + +# -- Send EN.4.2.2.analysis.g10 monthly to JASMIN OS -- # +echo "In Progress: Sending EN.4.2.2.analysis.g10 monthly to Icechunk..." + +python3 send_EN4.2.2_analysis_g10_monthly_to_os.py + +echo "Completed: Sent EN.4.2.2.analysis.g10 monthly to Icechunk." diff --git a/OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm b/OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm new file mode 100755 index 00000000..516362b0 --- /dev/null +++ b/OceanDataStore/data/EN.4.2.2/run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=en4.2.2_analysis_g10_monthly +#SBATCH --partition=compute +#SBATCH --time=01:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_update_EN4.2.2_analysis_g10_monthly_to_os.slurm +# +# Description: SLURM script to update the EN.4.2.2 analysis g10 +# monthly datasets in Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-05-29 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source .../miniforge3/bin/activate +conda activate env_ods + +# -- Update EN.4.2.2.analysis.g10 monthly in JASMIN OS -- # +echo "In Progress: Updating EN.4.2.2.analysis.g10 monthly in Icechunk..." + +python3 update_EN4.2.2_analysis_g10_monthly_to_os.py + +echo "Completed: Updated EN.4.2.2.analysis.g10 monthly in Icechunk." diff --git a/OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py b/OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py new file mode 100755 index 00000000..13f680e2 --- /dev/null +++ b/OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_climatology_to_os.py @@ -0,0 +1,76 @@ +# ========================================================= +# send_EN4.2.2_analysis_g10_climatology_to_os.py +# +# Script to write EN.4.2.2 analysis climatologies to +# Icechunk repositories in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import xarray as xr + +from OceanDataStore.cli import send_to_icechunk, initialise_logging + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Prepare Data ========== # + # Open EN.4.2.2 analysis climatology datasets: + filepaths = [ + "/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/climatology/EN.4.2.2.f.analysis.g10.1971_2000_monthly_climatology.nc", + "/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/climatology/EN.4.2.2.f.analysis.g10.1981_2010_monthly_climatology.nc", + "/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/climatology/EN.4.2.2.f.analysis.g10.1991_2020_monthly_climatology.nc" + ] + + # Define start & end years of climatology periods: + start_years = [1971, 1981, 1991] + end_years = [2000, 2010, 2020] + + # ========== Send to Icechunk Repository ========== # + bucket = "en4.2.2" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + variable_commits = True + config_kwargs = { + "temporary_directory":".../OceanDataStore/OceanDataStore/data/EN.4.2.2/", + "local_directory":".../OceanDataStore/OceanDataStore/data/EN.4.2.2/" + } + cluster_kwargs = { + "n_workers" : 10, + "threads_per_worker" : 1, + "memory_limit":"3GB" + } + + for filepath, start_yr, end_yr in zip(filepaths, start_years, end_years): + # Open EN.4.2.2 analysis climatology dataset: + ds = xr.open_dataset(filepath, engine='netcdf4') + + # Optimise chunk sizes for spatial analysis: + ds = ds.chunk({'month': 1, 'depth': 20, 'latitude': 173, 'longitude': 360}) + + # Define prefix and commit message based on climatology period: + prefix = f"en4.2.2_analysis_g10_{start_yr}_{end_yr}_monthly_climatology" + commit_message = f"Added EN.4.2.2.analysis.g10 climatology ({start_yr}-{end_yr})." + + send_to_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + exists=exists, + append_dim='month', + branch=branch, + commit_message=commit_message, + variable_commits=variable_commits, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py b/OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py new file mode 100755 index 00000000..66c2198a --- /dev/null +++ b/OceanDataStore/data/EN.4.2.2/send_EN4.2.2_analysis_g10_monthly_to_os.py @@ -0,0 +1,165 @@ +# ========================================================= +# send_EN4.2.2_analyses_g10_to_os.py +# +# Script to write EN.4.2.2 analyses to Icechunk repository +# in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, send_to_icechunk +from OceanDataStore.data.utils import ( + compute_cell_area, + compute_dx, + compute_dy, + compute_land_sea_mask, +) + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Prepare Data ========== # + # Open EN.4.2.2 analyses dataset: + filepath = "/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/EN.4.2.2.f.analysis.g10.19*.nc" + ds = xr.open_mfdataset(filepath, combine="by_coords", data_vars="all", engine="netcdf4") + + # Standardise coordinate dimension names: + ds = ds.rename({"lon": "longitude", "lat": "latitude"}) + + # Update longitude coordinates to be in the range [-180, 180]: + ds = ds.assign_coords( + longitude=((ds["longitude"] + 180) % 360) - 180 + ) + ds = ds.sortby("longitude") + + # Rename variables to standard names: + ds = ds.rename({"temperature": "thetao", + "salinity": "so", + "temperature_uncertainty": "thetao_uncertainty", + "salinity_uncertainty": "so_uncertainty", + "temperature_observation_weights": "thetao_obs_weights", + "salinity_observation_weights": "so_obs_weights" + }) + + # Update variable attributes: + ds["thetao"].attrs.update({ + "long_name": "Potential Temperature", + }) + ds["so"].attrs.update({ + "long_name": "Practical Salinity", + }) + ds["thetao_uncertainty"].attrs.update({ + "long_name": "Potential Temperature Error Standard Deviation", + }) + ds["so_uncertainty"].attrs.update({ + "long_name": "Practical Salinity Error Standard Deviation", + }) + ds["thetao_obs_weights"].attrs.update({ + "long_name": "Potential Temperature Observation Weights", + }) + ds["so_obs_weights"].attrs.update({ + "long_name": "Practical Salinity Observation Weights", + }) + + # Update global attributes: + ds.attrs.clear() + + ds = ds.assign_attrs({ + "Conventions": "CF-1.0", + "title": "EN.4.2.2 ocean temperature and salinity monthly timeseries.", + "description": "EN.4.2.2 quality controlled ocean temperature and salinity monthly timeseries from objective analyses with uncertainty estimates using Gouretski and Reseghetti (2010) corrections.", + "source": "Numerical models: Objective Analysis. In-situ observations: Argo, Arctic Synoptic Basin-wide Oceanography (ASBO) project, Global Temperature and Salinity Profile Programme (GTSPP), and World Ocean Database 2018 (WOD18).", + "dataset_type": "observation", + "product_type": "timeseries", + "product_version": "1.0", + "institution": "Met Office, UK", + "citation": "Good, S. A., Martin, M. J., and Rayner, N. A., 2013. EN4: quality controlled ocean temperature and salinity profiles and monthly objective analyses with uncertainty estimates, Journal of Geophysical Research: Oceans, 118, 6704-6716, doi:10.1002/2013JC009067.", + "references": "Gouretski, V., and Reseghetti, F., 2010: On depth and temperature biases in bathythermograph data: development of a new correction scheme based on analysis of a global ocean database. Deep-Sea Research I, 57, 6. doi:10.1016/j.dsr.2010.03.011.", + "acknowledgement": "None", + "license": "EN.4.2.2 data were obtained from https://www.metoffice.gov.uk/hadobs/en4/ and are © Crown Copyright, Met Office, [2026], provided under a Non-Commercial Government Licence http://www.nationalarchives.gov.uk/doc/non-commercial-government-licence/version/2/.", + "doi": "None", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "1 degree", + "vertical_grid_type": "z", + "vertical_grid_coordinate": "depth", + "vertical_grid_levels": 42, + "aggregation": "mean", + "aggregation_frequency": "monthly", + "status": "ongoing", + "update_frequency": "quarterly", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Add ancillary variables: + ds['mask'] = compute_land_sea_mask(ds['thetao'].isel(time=0, depth=0)) + ds['dx'] = compute_dx(ds) + ds['dy'] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Custom ancillary variables: + ds['cell_thickness'] = (ds['depth_bnds'].isel(bnds=1) - ds['depth_bnds'].isel(bnds=0)).isel(time=0) + ds['cell_volume'] = ds['cell_thickness'] * ds['cell_area'] + + # Update attributes for custom ancillary variables: + ds['cell_thickness'].attrs.update({ + 'long_name': "Grid-Cell Thickness", + 'standard_name': "cell_thickness", + 'units': "m", + }) + ds['cell_volume'].attrs.update({ + 'long_name': "Grid-Cell Volume", + 'standard_name': "cell_volume", + 'units': "m3", + }) + + # ========== Send to Icechunk Repository ========== # + bucket = "en4.2.2" + prefix = "en4.2.2_analysis_g10_monthly" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + commit_message = "Added EN.4.2.2.analysis.g10 monthly (1950-01-1999-12)." + variable_commits = True + config_kwargs = { + "temporary_directory":".../OceanDataStore/OceanDataStore/data/EN.4.2.2/", + "local_directory":".../OceanDataStore/OceanDataStore/data/EN.4.2.2/" + } + cluster_kwargs = { + "n_workers" : 25, + "threads_per_worker" : 1, + "memory_limit":"3GB" + } + + # Optimise chunk sizes for spatial analysis: + ds = ds.chunk({'time': 1, 'depth': 20, 'latitude': 173, 'longitude': 360}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + send_to_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + exists=exists, + append_dim='time', + branch=branch, + commit_message=commit_message, + variable_commits=variable_commits, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py b/OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py new file mode 100755 index 00000000..a26e1c8a --- /dev/null +++ b/OceanDataStore/data/EN.4.2.2/update_EN4.2.2_analysis_g10_monthly_to_os.py @@ -0,0 +1,161 @@ +# ========================================================= +# update_EN4.2.2_analyses_g10_to_os.py +# +# Script to update EN.4.2.2 analyses in Icechunk repository +# in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, update_icechunk +from OceanDataStore.data.utils import ( + compute_cell_area, + compute_dx, + compute_dy, + compute_land_sea_mask, +) + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Prepare Data ========== # + # Open EN.4.2.2 analyses dataset: + filepath = "/dssgfs01/scratch/otooth/npd_data/observations/EN.4.2.2/EN.4.2.2.f.analysis.g10.20*.nc" + ds = xr.open_mfdataset(filepath, combine="by_coords", data_vars="all", engine="netcdf4") + + # Standardise coordinate dimension names: + ds = ds.rename({"lon": "longitude", "lat": "latitude"}) + + # Update longitude coordinates to be in the range [-180, 180]: + ds = ds.assign_coords( + longitude=((ds["longitude"] + 180) % 360) - 180 + ) + ds = ds.sortby("longitude") + + # Rename variables to standard names: + ds = ds.rename({"temperature": "thetao", + "salinity": "so", + "temperature_uncertainty": "thetao_uncertainty", + "salinity_uncertainty": "so_uncertainty", + "temperature_observation_weights": "thetao_obs_weights", + "salinity_observation_weights": "so_obs_weights" + }) + + # Update variable attributes: + ds["thetao"].attrs.update({ + "long_name": "Potential Temperature", + }) + ds["so"].attrs.update({ + "long_name": "Practical Salinity", + }) + ds["thetao_uncertainty"].attrs.update({ + "long_name": "Potential Temperature Error Standard Deviation", + }) + ds["so_uncertainty"].attrs.update({ + "long_name": "Practical Salinity Error Standard Deviation", + }) + ds["thetao_obs_weights"].attrs.update({ + "long_name": "Potential Temperature Observation Weights", + }) + ds["so_obs_weights"].attrs.update({ + "long_name": "Practical Salinity Observation Weights", + }) + + # Update global attributes: + ds.attrs.clear() + + ds = ds.assign_attrs({ + "Conventions": "CF-1.0", + "title": "EN.4.2.2 ocean temperature and salinity monthly timeseries.", + "description": "EN.4.2.2 quality controlled ocean temperature and salinity monthly timeseries from objective analyses with uncertainty estimates using Gouretski and Reseghetti (2010) corrections.", + "source": "Numerical models: Objective Analysis. In-situ observations: Argo, Arctic Synoptic Basin-wide Oceanography (ASBO) project, Global Temperature and Salinity Profile Programme (GTSPP), and World Ocean Database 2018 (WOD18).", + "dataset_type": "observation", + "product_type": "timeseries", + "product_version": "1.0", + "institution": "Met Office, UK", + "citation": "Good, S. A., Martin, M. J., and Rayner, N. A., 2013. EN4: quality controlled ocean temperature and salinity profiles and monthly objective analyses with uncertainty estimates, Journal of Geophysical Research: Oceans, 118, 6704-6716, doi:10.1002/2013JC009067.", + "references": "Gouretski, V., and Reseghetti, F., 2010: On depth and temperature biases in bathythermograph data: development of a new correction scheme based on analysis of a global ocean database. Deep-Sea Research I, 57, 6. doi:10.1016/j.dsr.2010.03.011.", + "acknowledgement": "None", + "license": "EN.4.2.2 data were obtained from https://www.metoffice.gov.uk/hadobs/en4/ and are © Crown Copyright, Met Office, [2026], provided under a Non-Commercial Government Licence http://www.nationalarchives.gov.uk/doc/non-commercial-government-licence/version/2/.", + "doi": "None", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "1 degree", + "vertical_grid_type": "z", + "vertical_grid_coordinate": "depth", + "vertical_grid_levels": 42, + "aggregation": "mean", + "aggregation_frequency": "monthly", + "status": "ongoing", + "update_frequency": "quarterly", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Add ancillary variables: + ds['mask'] = compute_land_sea_mask(ds['thetao'].isel(time=0, depth=0)) + ds['dx'] = compute_dx(ds) + ds['dy'] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Custom ancillary variables: + ds['cell_thickness'] = (ds['depth_bnds'].isel(bnds=1) - ds['depth_bnds'].isel(bnds=0)).isel(time=0) + ds['cell_volume'] = ds['cell_thickness'] * ds['cell_area'] + + # Update attributes for custom ancillary variables: + ds['cell_thickness'].attrs.update({ + 'long_name': "Grid-Cell Thickness", + 'standard_name': "cell_thickness", + 'units': "m", + }) + ds['cell_volume'].attrs.update({ + 'long_name': "Grid-Cell Volume", + 'standard_name': "cell_volume", + 'units': "m3", + }) + + # ========== Send to Icechunk Repository ========== # + bucket = "en4.2.2" + prefix = "en4.2.2_analysis_g10_monthly" + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + commit_message = "Added EN.4.2.2.analysis.g10 monthly (2000-01-2026-03)." + config_kwargs = { + "temporary_directory":".../OceanDataStore/OceanDataStore/data/EN.4.2.2/", + "local_directory":".../OceanDataStore/OceanDataStore/data/EN.4.2.2/" + } + cluster_kwargs = { + "n_workers" : 25, + "threads_per_worker" : 1, + "memory_limit":"3GB" + } + + # Optimise chunk sizes for spatial analysis: + ds = ds.chunk({'time': 1, 'depth': 20, 'latitude': 173, 'longitude': 360}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + update_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + append_dim='time', + branch=branch, + commit_message=commit_message, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh b/OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh new file mode 100755 index 00000000..c21f24ec --- /dev/null +++ b/OceanDataStore/data/NSIDC/download_NSIDC_monthly_1979_2025_data.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# ---------------------------------------------------------------- +# download_NSIDC_monthly_1979_2025_data.sh +# +# Description: Download the National Snow & Ice Data Centre (NSIDC) +# Sea Ice Index version 4 sea ice extent & concentration GeoTiff +# files from 1979 to 2025. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-05-27 +# ---------------------------------------------------------------- +set -euo pipefail + +# --- Inputs --- # +# Define hemisphere to download data for (options: "north" or "south"): +hemisphere="north" + +# Define output directory for downloaded files: +output_dir="/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/"$hemisphere"/" + +# Single year download: +single_year=True +# Define year to download if single_year is True: +year=2025 + +# -- Defaults -- # +# Default URL prefix: +url_prefix="https://noaadata.apps.nsidc.org/NOAA/G02135/"$hemisphere"/monthly/geotiff" + +# -- Main Script -- # +echo "===================================================" +echo " Downloading NSIDC Sea Ice Index" +echo " v4.0" +echo " Oliver J. Tooth, NOC" +echo "===================================================" +echo "In Progress: Downloading NSIDC Sea Ice Index dataset..." + +mkdir -p $output_dir +cd $output_dir + +# Download monthly sea ice extent & concentration files from 1979 to 2025: +for month in 01_Jan 02_Feb 03_Mar 04_Apr 05_May 06_Jun 07_Jul 08_Aug 09_Sep 10_Oct 11_Nov 12_Dec +do + if [ "$single_year" = True ]; then + echo "Downloading NSIDC $year Sea Ice Conc. GeoTiffs for: $month" + wget -r -nd --no-check-certificate --reject "index.html*" -np -e robots=off $url_prefix/$month/ -A "*_${year}*_v4.0.tif" + else + echo "Downloading NSIDC 1979-2025 Sea Ice Conc. GeoTiffs for: $month" + wget -r -nd --no-check-certificate --reject "index.html*" -np -e robots=off $url_prefix/$month/ + fi +done + +echo "-> Completed: Downloaded NSIDC" $hemisphere "Sea Ice Extent & Concentration GeoTiffs" diff --git a/OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py b/OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py new file mode 100755 index 00000000..4acdbecc --- /dev/null +++ b/OceanDataStore/data/NSIDC/process_NSIDC_SSI_Antarctic_data.py @@ -0,0 +1,130 @@ +""" +process_NSIDC_SSI_Antarctic_data.py + +Description: Python script to post-process NSIDC Sea Ice Index +data for the Antarctic 1978-2025, including sea ice concentration, +extent and total area. + +Created by: Ollie Tooth (oliver.tooth@noc.ac.uk) +Created on: 2025-02-21 +""" +# -- Import Python packages -- # +import logging +from datetime import datetime +from glob import glob + +import numpy as np +import xarray as xr + +from OceanDataStore.cli import initialise_logging + + +# -- Define Utility Functions -- # +def get_datetimes_from_filenames(file_list): + # Extract filenames from paths: + filenames = [file.split('/')[-1] for file in file_list] + + # Convert filenames date str to datetime: + datetimes = np.array([datetime(year=int(file[2:6]), month=int(file[6:8]), day=15) for file in filenames]) + + return datetimes + + +def main(): + # ========== Initialize Logging and Print Banner ========== # + initialise_logging() + + logging.info('In Progress: Post-Processing NSIDC Sea Ice Index Antarctic Observations...') + + # ========== Load NSIDC Ancillary Data ========== # + # Define filepath to ancillary data: + anc_fpath = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/ancillary/NSIDC0771_LatLon_PS_S25km_v1.0.nc" + # Open NSIDC ancillary data as dataset: + ds_si = xr.open_dataset(anc_fpath) + + # Define filepath to NSIDC ancillary file - grid cell area: + area_fpath = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/ancillary/NSIDC0771_CellArea_PS_S25km_v1.0.nc" + # Open NSIDC grid cell area: + ds_area = xr.open_dataset(area_fpath) + logging.info("-> Completed: Loaded NSIDC ancillary data and grid cell area.") + + # ========== Load NSIDC Monthly Data ========== # + # Define directory path: + dir_path = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/antarctic/" + + # Get the list of files in the directory: + file_paths = glob(f"{dir_path}*.tif") + file_paths.sort() + + # Retrieve sea ice mask & concentration files: + version_str = "v4.0" # Options: "v3.0" or "v4.0" + mask_files = [f for f in file_paths if f"extent_{version_str}.tif" in f] + conc_files = [f for f in file_paths if f"concentration_{version_str}.tif" in f] + + # ========== Post-Process Sea Ice Mask Data ========== # + # Define the time dimension: + time_simask = xr.DataArray(data=get_datetimes_from_filenames(file_list=mask_files), dims='time', name='time') + # Load and concatenate all sea ice mask GeoTIFFs: + simask = xr.concat([xr.open_dataset(i) for i in mask_files], dim=time_simask) + logging.info("-> Completed: Loaded NSIDC sea ice mask and concentration GeoTIFF files.") + + # Sea Ice Mask is defined by [1: sea ice, 0: ocean]: + # Values greater than 1 (missing or land) are set to NaN: + ds_si['simask'] = xr.where(simask['band_data'] > 1, np.nan, simask['band_data']).squeeze(drop=True) + ds_si["simask"].attrs = {'units': '1', "long_name": "Sea Ice Mask", "standard_name": "sea_ice_mask", "comment": "1 = sea ice, 0 = ocean"} + + # ========== Post-Process Sea Ice Concentration Data ========== # + # Define the time dimension: + time_siconc = xr.DataArray(data=get_datetimes_from_filenames(file_list=conc_files), dims='time', name='time') + # Load and concatenate all sea ice extent GeoTIFFs: + siconc = xr.concat([xr.open_dataset(i) for i in conc_files], dim=time_siconc) + logging.info("-> Completed: Loaded NSIDC sea ice concentration GeoTIFF files.") + + # Sea Ice Area Fraction: + # Note concentration percentage is scaled by 10 -> requires division by 1000. + # Values greater than 1 (missing or land) are set to NaN: + ds_si['siconc'] = xr.where(siconc['band_data'] > 1000, np.nan, siconc['band_data']).squeeze(drop=True) / 1000 + ds_si['siconc'].attrs = {'units': '1', 'long_name': 'Sea Ice Area Fraction', 'standard_name': 'sea_ice_area_fraction', "comment": "0 = ocean, 0.01-0.15 = statistically insignificant, > 0.15 = sea ice"} + + # ========== Calculate sea ice area (m2) ========== # + ds_si['cell_area'] = ds_area['cell_area'] + ds_si['cell_area'].attrs = {'units': 'm2', 'long_name': 'Grid-Cell Area for Sea Ice Variables', "standard_name": "cell_area"} + + ds_si['siextent'] = (ds_si['cell_area']*ds_si['simask']).sum(dim=['x', 'y']) + ds_si['siextent'].attrs = {'units': 'm2', 'long_name': 'Total Area Where Sea Ice Area Fraction Exceeds 15%', 'standard_name': 'sea_ice_extent'} + + # ========== Update Coordinates ========== # + ds_si.coords['lon'] = ds_si['longitude'] + ds_si.coords['lat'] = ds_si['latitude'] + # Drop auxiliary variables: + ds_si = ds_si.drop_vars(["spatial_ref", "crs", "longitude", "latitude"]) + # Rename coordinates: + ds_si = ds_si.rename({'lon': 'longitude', 'lat': 'latitude'}) + + # ========== Update attributes to ensure CF-compliance: ========== # + ds_si['x'].attrs = {'standard_name': 'projection_x_coordinate', 'long_name': 'x coordinate of projection', 'units': 'meters'} + ds_si['y'].attrs = {'standard_name': 'projection_y_coordinate', 'long_name': 'y coordinate of projection', 'units': 'meters'} + ds_si['longitude'].attrs = {'standard_name': 'longitude', 'long_name': 'Longitude', 'units': 'degrees_east'} + ds_si['latitude'].attrs = {'standard_name': 'latitude', 'long_name': 'Latitude', 'units': 'degrees_north'} + ds_si['simask'].attrs.pop("valid_range", None) + + # ========== Save NSIDC Sea Ice Index Dataset ========== # + # Update variable encodings: + for var in ds_si.variables: + if ds_si[var].dtype == 'float64': + ds_si[var].encoding['missing_value'] = None + ds_si[var].encoding['_FillValue'] = None + + # Define output filepath: + out_fpath = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/NSIDC_Sea_Ice_Index_v4_Antarctic_combined_1978_2025.nc" + # Save dataset to netCDF file: + logging.info(f'In Progress: Saving NSIDC Sea Ice Index Antarctic Observations to netCDF file: {out_fpath}...') + ds_si.to_netcdf(out_fpath, unlimited_dims='time') + # Close files associated with datasets: + ds_si.close() + ds_area.close() + + logging.info(f'Completed: Saved NSIDC Sea Ice Index Antarctic Observations to netCDF file: {out_fpath}.') + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py b/OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py new file mode 100755 index 00000000..33aad7a5 --- /dev/null +++ b/OceanDataStore/data/NSIDC/process_NSIDC_SSI_Arctic_data.py @@ -0,0 +1,129 @@ +""" +process_NSIDC_SSI_Arctic_data.py + +Description: Python script to post-process NSIDC Sea Ice Index +data for the Arctic 1978-2025, including sea ice concentration, +extent and total area. + +Created by: Ollie Tooth (oliver.tooth@noc.ac.uk) +Created on: 2025-02-21 +""" +# -- Import Python packages -- # +import logging +from datetime import datetime +from glob import glob + +import numpy as np +import xarray as xr + +from OceanDataStore.cli import initialise_logging + + +# -- Define Utility Functions -- # +def get_datetimes_from_filenames(file_list): + # Extract filenames from paths: + filenames = [file.split('/')[-1] for file in file_list] + + # Convert filenames date str to datetime: + datetimes = np.array([datetime(year=int(file[2:6]), month=int(file[6:8]), day=15) for file in filenames]) + + return datetimes + +def main(): + # ========== Initialize Logging and Print Banner ========== # + initialise_logging() + + logging.info('In Progress: Post-Processing NSIDC Sea Ice Index Arctic Observations...') + + # ========== Load NSIDC Ancillary Data ========== # + # Define filepath to ancillary data: + anc_fpath = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/ancillary/NSIDC0771_LatLon_PS_N25km_v1.0.nc" + # Open NSIDC ancillary data as dataset: + ds_si = xr.open_dataset(anc_fpath) + + # Define filepath to NSIDC ancillary file - grid cell area: + area_fpath = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/ancillary/NSIDC0771_CellArea_PS_N25km_v1.0.nc" + # Open NSIDC grid cell area: + ds_area = xr.open_dataset(area_fpath) + logging.info("-> Completed: Loaded NSIDC ancillary data and grid cell area.") + + # ========== Load NSIDC Monthly Data ========== # + # Define directory path: + dir_path = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/arctic/" + + # Get the list of files in the directory: + file_paths = glob(f"{dir_path}*.tif") + file_paths.sort() + + # Retrieve sea ice mask & concentration files: + version_str = "v4.0" # Options: "v3.0" or "v4.0" + mask_files = [f for f in file_paths if f"extent_{version_str}.tif" in f] + conc_files = [f for f in file_paths if f"concentration_{version_str}.tif" in f] + + # ========== Post-Process Sea Ice Mask Data ========== # + # Define the time dimension: + time_simask = xr.DataArray(data=get_datetimes_from_filenames(file_list=mask_files), dims='time', name='time') + # Load and concatenate all sea ice mask GeoTIFFs: + simask = xr.concat([xr.open_dataset(i) for i in mask_files], dim=time_simask) + logging.info("-> Completed: Loaded NSIDC sea ice mask and concentration GeoTIFF files.") + + # Sea Ice Mask is defined by [1: sea ice, 0: ocean]: + # Values greater than 1 (missing or land) are set to NaN: + ds_si['simask'] = xr.where(simask['band_data'] > 1, np.nan, simask['band_data']).squeeze(drop=True) + ds_si["simask"].attrs = {'units': '1', "long_name": "Sea Ice Mask", "standard_name": "sea_ice_mask", "comment": "1 = sea ice, 0 = ocean"} + + # ========== Post-Process Sea Ice Concentration Data ========== # + # Define the time dimension: + time_siconc = xr.DataArray(data=get_datetimes_from_filenames(file_list=conc_files), dims='time', name='time') + # Load and concatenate all sea ice extent GeoTIFFs: + siconc = xr.concat([xr.open_dataset(i) for i in conc_files], dim=time_siconc) + logging.info("-> Completed: Loaded NSIDC sea ice concentration GeoTIFF files.") + + # Sea Ice Area Fraction: + # Note concentration percentage is scaled by 10 -> requires division by 1000. + # Values greater than 1 (missing or land) are set to NaN: + ds_si['siconc'] = xr.where(siconc['band_data'] > 1000, np.nan, siconc['band_data']).squeeze(drop=True) / 1000 + ds_si['siconc'].attrs = {'units': '1', 'long_name': 'Sea Ice Area Fraction', 'standard_name': 'sea_ice_area_fraction', "comment": "0 = ocean, 0.01-0.15 = statistically insignificant, > 0.15 = sea ice"} + + # ========== Calculate sea ice area (m2) ========== # + ds_si['cell_area'] = ds_area['cell_area'] + ds_si['cell_area'].attrs = {'units': 'm2', 'long_name': 'Grid-Cell Area for Sea Ice Variables', "standard_name": "cell_area"} + + ds_si['siextent'] = (ds_si['cell_area']*ds_si['simask']).sum(dim=['x', 'y']) + ds_si['siextent'].attrs = {'units': 'm2', 'long_name': 'Total Area Where Sea Ice Area Fraction Exceeds 15%', 'standard_name': 'sea_ice_extent'} + + # ========== Update Coordinates ========== # + ds_si.coords['lon'] = ds_si['longitude'] + ds_si.coords['lat'] = ds_si['latitude'] + # Drop auxiliary variables: + ds_si = ds_si.drop_vars(["spatial_ref", "crs", "longitude", "latitude"]) + # Rename coordinates: + ds_si = ds_si.rename({'lon': 'longitude', 'lat': 'latitude'}) + + # ========== Update attributes to ensure CF-compliance: ========== # + ds_si['x'].attrs = {'standard_name': 'projection_x_coordinate', 'long_name': 'x coordinate of projection', 'units': 'meters'} + ds_si['y'].attrs = {'standard_name': 'projection_y_coordinate', 'long_name': 'y coordinate of projection', 'units': 'meters'} + ds_si['longitude'].attrs = {'standard_name': 'longitude', 'long_name': 'Longitude', 'units': 'degrees_east'} + ds_si['latitude'].attrs = {'standard_name': 'latitude', 'long_name': 'Latitude', 'units': 'degrees_north'} + ds_si['simask'].attrs.pop("valid_range", None) + + # ========== Save NSIDC Sea Ice Index Dataset ========== # + # Update variable encodings: + for var in ds_si.variables: + if ds_si[var].dtype == 'float64': + ds_si[var].encoding['missing_value'] = None + ds_si[var].encoding['_FillValue'] = None + + # Define output filepath: + out_fpath = "/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/NSIDC_Sea_Ice_Index_v4_Arctic_combined_1978_2025.nc" + # Save dataset to netCDF file: + logging.info(f"-> Saving NSIDC Sea Ice Index Arctic Observations to netCDF file: {out_fpath}...") + ds_si.to_netcdf(out_fpath, unlimited_dims='time') + # Close files associated with datasets: + ds_si.close() + ds_area.close() + + logging.info(f'Completed: Saved NSIDC Sea Ice Index Arctic Observations to netCDF file: {out_fpath}.') + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm b/OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm new file mode 100755 index 00000000..35fc7034 --- /dev/null +++ b/OceanDataStore/data/NSIDC/run_send_NSIDC_v4.0_to_OS.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=xfer_NSIDC_sea_ice_index_v4.0 +#SBATCH --partition=test +#SBATCH --time=00:20:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ================================================================ +# run_send_NSIDC_v4.0_to_OS.slurm +# +# Description: SLURM script to send the NSIDC sea ice observations +# data to the JASMIN Object Store. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-05 +# +# ================================================================ +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source .../miniforge3/bin/activate +conda activate env_ods + +# -- Send NSIDC Sea Ice Index v4.0 to JASMIN OS -- # +echo "In Progress: Sending NSIDC Sea Ice Index v4.0 to Icechunk..." + +python3 send_NSIDC_SII_v4.0_to_os.py + +echo "Completed: Sent NSIDC Sea Ice Index v4.0 to Icechunk." diff --git a/OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py b/OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py new file mode 100755 index 00000000..7b14ed03 --- /dev/null +++ b/OceanDataStore/data/NSIDC/send_NSIDC_SII_v4.0_to_os.py @@ -0,0 +1,140 @@ +# ========================================================= +# send_NSIDC_SII_v4.0_to_os.py +# +# Script to write NSIDC Sea Ice Index version 4.0 to +# Icechunk repositories in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import xarray as xr +import zarr + +from OceanDataStore.cli import send_to_icechunk, initialise_logging + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Prepare Data ========== # + # Open NSIDC Sea Ice Index v4.0 datasets: + ds_si_arctic = xr.open_dataset("/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/NSIDC_Sea_Ice_Index_v4_Arctic_combined_1978_2025.nc") + + ds_si_antarctic = xr.open_dataset("/dssgfs01/scratch/otooth/npd_data/observations/NSIDC/NSIDC_Sea_Ice_Index_v4_Antarctic_combined_1978_2025.nc") + + # Optimise chunk sizes for spatial analysis: + ds_si_arctic = ds_si_arctic.chunk({'time': 12, 'y': 448, 'x': 304}) + ds_si_antarctic = ds_si_antarctic.chunk({'time': 12, 'y': 332, 'x': 316}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds_si_arctic.data_vars) + list(ds_si_arctic.coords): + ds_si_arctic[var].encoding['compressors'] = [blosccodec] + for var in list(ds_si_antarctic.data_vars) + list(ds_si_antarctic.coords): + ds_si_antarctic[var].encoding['compressors'] = [blosccodec] + + # Update global CF-metadata attributes: + ds_si_arctic.attrs.clear() + ds_si_arctic = ds_si_arctic.assign_attrs({ + "Conventions": "CF-1.6", + "title": "NSIDC Sea Ice Index, Version 4 - Arctic", + "description": "NSIDC Sea Ice Index version 4.0 - Arctic sea ice area fraction, sea ice extent and total sea ice area timeseries.", + "source": "Satellite observations: Sea Ice Concentrations from Nimbus-7 SMMR and DMSP SSM/I-SSMIS Passive Microwave Data (GSFC). AMSR2 Daily Polar Gridded Sea Ice Concentrations (AMSR2).", + "dataset_type": "observation", + "product_type": "timeseries", + "product_version" : "1.0", + "institution": "National Snow and Ice Data Center; Cooperative Institute for Research in Environmental Sciences; University of Colorado at Boulder; Boulder, CO", + "citation": "Fetterer, F., Knowles, K., Meier, W. N., Savoie, M., Windnagel, A. K. & Stafford, T. (2025). Sea Ice Index. (G02135, Version 4). [Data Set]. Boulder, Colorado USA. National Snow and Ice Data Center. https://doi.org/10.7265/a98x-0f50. Date Accessed 05-29-2026.", + "references": "Windnagel, A., Stafford, T., Fetterer, F., Meier, W. (2025). Sea Ice Index Version 4 Analysis. NSIDC Special Report 28. Boulder CO, USA: National Snow and Ice Data Center.", + "acknowledgement": "These data are produced and supported by the NASA National Snow and Ice Data Center Distributed Active Archive Center.", + "license": "NSIDC Sea Ice Index, Version 4 data were obtained from https://nsidc.org/data/g02135/versions/4 and are provided under a U.S. Government Works License https://www.usa.gov/government-works", + "doi": "10.7265/a98x-0f50", + "platform": "gn", + "horizontal_grid_type": "curvilinear", + "horizontal_grid_resolution": "25 km", + "aggregation": "mean", + "aggregation_frequency": "monthly", + "status": "ongoing", + "update_frequency": "quarterly", + "bbox": "[-180.0, 180.0, 30.98, 90.0]", + }) + + ds_si_antarctic.attrs.clear() + ds_si_antarctic = ds_si_antarctic.assign_attrs({ + "Conventions": "CF-1.6", + "title": "NSIDC Sea Ice Index, Version 4 - Antarctic", + "description": "NSIDC Sea Ice Index, Version 4 - Antarctic sea ice area fraction, sea ice extent and total sea ice area timeseries.", + "source": "Satellite observations: Sea Ice Concentrations from Nimbus-7 SMMR and DMSP SSM/I-SSMIS Passive Microwave Data (GSFC). AMSR2 Daily Polar Gridded Sea Ice Concentrations (AMSR2).", + "dataset_type": "observation", + "product_type": "timeseries", + "product_version" : "1.0", + "institution": "National Snow and Ice Data Center; Cooperative Institute for Research in Environmental Sciences; University of Colorado at Boulder; Boulder, CO", + "citation": "Fetterer, F., Knowles, K., Meier, W. N., Savoie, M., Windnagel, A. K. & Stafford, T. (2025). Sea Ice Index. (G02135, Version 4). [Data Set]. Boulder, Colorado USA. National Snow and Ice Data Center. https://doi.org/10.7265/a98x-0f50. Date Accessed 05-29-2026.", + "references": "Windnagel, A., Stafford, T., Fetterer, F., Meier, W. (2025). Sea Ice Index Version 4 Analysis. NSIDC Special Report 28. Boulder CO, USA: National Snow and Ice Data Center.", + "acknowledgement": "These data are produced and supported by the NASA National Snow and Ice Data Center Distributed Active Archive Center.", + "license": "NSIDC Sea Ice Index, Version 4 data were obtained from https://nsidc.org/data/g02135/versions/4 and are provided under a U.S. Government Works License https://www.usa.gov/government-works", + "doi": "10.7265/a98x-0f50", + "platform": "gn", + "horizontal_grid_type": "curvilinear", + "horizontal_grid_resolution": "25 km", + "aggregation": "mean", + "aggregation_frequency": "monthly", + "status": "ongoing", + "update_frequency": "quarterly", + "bbox": "[-180.0, 180.0, -90.0, -39.23089]", + }) + + # ========== Send to Icechunk Repository ========== # + bucket = "nsidc" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + variable_commits = True + config_kwargs = { + "temporary_directory":".../OceanDataStore/OceanDataStore/data/NSIDC/", + "local_directory":".../OceanDataStore/OceanDataStore/data/NSIDC/" + } + cluster_kwargs = { + "n_workers" : 10, + "threads_per_worker" : 1, + "memory_limit":"3GB" + } + + # -- Sea Ice Index v4.0 - Arctic -- # + send_to_icechunk( + file=ds_si_arctic, + bucket=bucket, + object_prefix="nsidc_sea_ice_index_v4_arctic_monthly", + store_credentials_json=store_credentials_json, + exists=exists, + append_dim='time', + attrs=ds_si_arctic.attrs, + branch=branch, + commit_message="Added NSIDC Sea Ice Index version 4 - Arctic (1978-01-2025-12).", + variable_commits=variable_commits, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + + # -- Sea Ice Index v4.0 - Antarctic -- # + send_to_icechunk( + file=ds_si_antarctic, + bucket=bucket, + object_prefix="nsidc_sea_ice_index_v4_antarctic_monthly", + store_credentials_json=store_credentials_json, + exists=exists, + append_dim='time', + attrs=ds_si_antarctic.attrs, + branch=branch, + commit_message="Added NSIDC Sea Ice Index version 4 - Antarctic (1978-01-2025-12).", + variable_commits=variable_commits, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm b/OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm new file mode 100755 index 00000000..aa9181cc --- /dev/null +++ b/OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=oisstv2_daily_climatology +#SBATCH --partition=test +#SBATCH --time=00:20:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_send_OISSTv2_daily_climatology_to_os.slurm +# +# Description: SLURM script to send the OISSTv2.1 daily +# climatology datasets to Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-09 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source .../miniforge3/bin/activate +conda activate env_ods + +# -- Send OISSTv2.1 daily climatology datasets to JASMIN OS -- # +echo "In Progress: Sending OISSTv2.1 daily climatology to Icechunk..." + +python3 send_OISSTv2_daily_climatology_to_os.py + +echo "Completed: Sent OISSTv2.1 daily climatology to Icechunk." diff --git a/OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm b/OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm new file mode 100755 index 00000000..22ba0250 --- /dev/null +++ b/OceanDataStore/data/OISST/run_send_OISSTv2_monthly_climatology_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=oisstv2_monthly_climatology +#SBATCH --partition=test +#SBATCH --time=00:20:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_send_OISSTv2_monthly_climatology_to_os.slurm +# +# Description: SLURM script to send the OISSTv2.1 monthly +# climatology datasets to Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-09 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source .../miniforge3/bin/activate +conda activate env_ods + +# -- Send OISSTv2.1 monthly climatology datasets to JASMIN OS -- # +echo "In Progress: Sending OISSTv2.1 monthly climatology to Icechunk..." + +python3 send_OISSTv2_monthly_climatology_to_os.py + +echo "Completed: Sent OISSTv2.1 monthly climatology to Icechunk." diff --git a/OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm b/OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm new file mode 100755 index 00000000..253bf6a7 --- /dev/null +++ b/OceanDataStore/data/OISST/run_send_OISSTv2_monthly_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=oisstv2_monthly +#SBATCH --partition=test +#SBATCH --time=00:20:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_send_OISSTv2_monthly_to_os.slurm +# +# Description: SLURM script to send the OISSTv2.1 monthly +# time-series dataset to Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-09 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source .../miniforge3/bin/activate +conda activate env_ods + +# -- Send OISSTv2.1 monthly time-series datasets to JASMIN OS -- # +echo "In Progress: Sending OISSTv2.1 monthly time-series to Icechunk..." + +python3 send_OISSTv2_monthly_to_os.py + +echo "Completed: Sent OISSTv2.1 monthly time-series to Icechunk." diff --git a/OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py b/OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py new file mode 100755 index 00000000..671a1da6 --- /dev/null +++ b/OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py @@ -0,0 +1,151 @@ +# ========================================================= +# send_OISSTv2_daily_climatology_to_os.py +# +# Script to write OISST v2.1 long-term daily climatologies +# to Icechunk repositories in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import numpy as np +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, send_to_icechunk +from OceanDataStore.data.utils import ( + compute_cell_area, + compute_dx, + compute_dy, +) + + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Send to Icechunk Repository ========== # + bucket = "oisst" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + variable_commits = True + + # Define climatology period: + start_yr = 1991 + end_yr = 2020 + + logging.info(f"In Progress: Sending OISSTv2.1 daily climatology for {start_yr}-{end_yr} to Icechunk...") + # Open OISSTv2 dataset: + filepaths = [f"/dssgfs01/scratch/otooth/npd_data/observations/OISST/icec.day.mean.ltm.{start_yr}-{end_yr}.nc", + f"/dssgfs01/scratch/otooth/npd_data/observations/OISST/sst.day.mean.ltm.{start_yr}-{end_yr}.nc" + ] + ds = xr.merge([xr.open_dataset(filepath, decode_times=False).drop_vars("valid_yr_count") for filepath in filepaths], compat="no_conflicts") + # Open OISSTv2 land-sea mask dataset: + ds_mask = xr.open_dataset("http://psl.noaa.gov/thredds/dodsC/Datasets/noaa.oisst.v2.highres/lsmask.oisst.nc", decode_times=False) + ds_mask = ds_mask.squeeze(drop=True).rename({"lon": "longitude", "lat": "latitude", "lsmask": "mask"}) + ds_mask = ds_mask.assign_coords( + longitude=((ds_mask["longitude"] + 180) % 360) - 180 + ) + + # Standardise coordinate dimension names: + ds = ds.rename({"lon": "longitude", "lat": "latitude", "time": "day"}) + + # Update longitude coordinates to be in the range [-180, 180]: + ds = ds.assign_coords( + longitude=((ds["longitude"] + 180) % 360) - 180 + ) + ds = ds.sortby("longitude") + + # Add day of year coordinate (1-365): + ds = ds.assign_coords( + day=np.arange(1, 366) + ) + + # Rename variables to standard names: + ds = ds.rename({"sst": "tos", + "icec": "siconc", + "climatology_bounds": "time_bnds", + }) + + # Add standard names and units: + ds["tos"].attrs["standard_name"] = "sea_surface_temperature" + ds["siconc"].attrs["standard_name"] = "sea_ice_area_fraction" + ds["siconc"].attrs["units"] = "1" + + # Add OISSTv2 land mask: + ds["mask"] = ds_mask["mask"] + ds["mask"].attrs.clear() + ds["mask"] = ds["mask"].assign_attrs({'long_name': "Land-Sea Binary Mask", + "standard_name": "sea_binary_mask", + "comment": "1 = sea, 0 = land" + }) + + # Add horizontal grid cell area: + ds['dx'] = compute_dx(ds) + ds['dy'] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Update time bounds to reflect climatological period: + ds['time_bnds'] = ds['time_bnds'].astype('datetime64[ns]') + ds['time_bnds'].data[:, 0] = (np.datetime64(f'{start_yr}-01', 'M') + (np.timedelta64(1, 'D') * np.arange(ds['day'].size))).astype('datetime64[ns]') + ds['time_bnds'].data[:, 1] = (np.datetime64(f'{end_yr}-01', 'M') + (np.timedelta64(1, 'D') * np.arange(ds['day'].size))).astype('datetime64[ns]') + ds.time_bnds.attrs.clear() + + # Update global attributes: + ds.attrs.clear() + ds = ds.assign_attrs({ + "Conventions": "CF-1.5", + "title": f"NOAA OISSTv2.1 Daily Climatology ({start_yr}-{end_yr})", + "description": f"NOAA 1/4° Daily Optimum Interpolation Sea Surface Temperature (OISST) version 2.1 daily sea surface temperature and sea ice fraction climatology ({start_yr}-{end_yr}).", + "source": "Numerical models: Optimal Interpolation. In-situ observations: ICOADS-D R3.0.2, Argo GDAC. Satellite observations: Advanced Very High Resolution Radiometer (AVHRR).", + "dataset_type": "observation", + "product_type": "climatology", + "product_version": "2.1", + "institution": "NOAA National Centers for Environmental Information (NCEI)", + "citation": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2021: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1", + "references": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2020: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1. Banzon, V., Smith, T. M., Chin, T. M., Liu, C., and Hankins, W., 2016: A long-term record of blended satellite and in situ sea-surface temperature for climate monitoring, modeling and environmental studies. Earth Syst. Sci. Data, 8, 165-176, doi:10.5194/essd-8-165-2016. Reynolds, R. W., T. M. Smith, C. Liu, D. B. Chelton, K. S. Casey, and M. G. Schlax, 2007: Daily high-resolution-blended analyses for sea surface temperature. Journal of Climate, 20, 5473-5496, doi:10.1175/JCLI-D-14-00293.1", + "acknowledgement": "NOAA OI SST V2 High Resolution Dataset data provided by the NOAA PSL, Boulder, Colorado, USA, from their website at https://psl.noaa.gov.", + "license": "OISST v2.1 data were obtained from https://psl.noaa.gov/data/gridded/data.noaa.oisst.v2.highres.html and are provided under a Creative Commons CC0 1.0 Universal License https://creativecommons.org/publicdomain/zero/1.0/", + "doi": "10.1175/JCLI-D-20-0166.1", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "0.25 degree", + "aggregation": "mean", + "aggregation_frequency": "daily", + "status": "completed", + "update_frequency": "None", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Optimise chunk sizes for spatial analysis: + ds = ds.chunk({'day': 5, 'latitude': 720, 'longitude': 1440}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + # Define prefix and commit message based on climatology period: + prefix = f"oisst_v2.1_{start_yr}_{end_yr}_daily_climatology" + commit_message = f"Added OISSTv2.1 Sea Surface Temperature Climatology ({start_yr}-{end_yr})." + + send_to_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + exists=exists, + append_dim='day', + branch=branch, + commit_message=commit_message, + variable_commits=variable_commits, + dask_config_kwargs=None, + dask_cluster_kwargs=None, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py b/OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py new file mode 100755 index 00000000..08923f94 --- /dev/null +++ b/OceanDataStore/data/OISST/send_OISSTv2_monthly_climatology_to_os.py @@ -0,0 +1,150 @@ +# ========================================================= +# send_OISSTv2_monthly_climatology_to_os.py +# +# Script to write OISST v2.1 long-term monthly climatologies +# to Icechunk repositories in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import numpy as np +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, send_to_icechunk +from OceanDataStore.data.utils import ( + compute_cell_area, + compute_dx, + compute_dy, +) + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Send to Icechunk Repository ========== # + bucket = "oisst" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + variable_commits = True + + # Define climatology period: + start_yr = 1991 + end_yr = 2020 + + logging.info(f"In Progress: Sending OISSTv2.1 monthly climatology for {start_yr}-{end_yr} to Icechunk...") + # Open OISSTv2 dataset: + filepaths = [f"/dssgfs01/scratch/otooth/npd_data/observations/OISST/icec.mon.ltm.{start_yr}-{end_yr}.nc", + f"/dssgfs01/scratch/otooth/npd_data/observations/OISST/sst.mon.ltm.{start_yr}-{end_yr}.nc" + ] + ds = xr.merge([xr.open_dataset(filepath, decode_times=False).drop_vars("valid_yr_count") for filepath in filepaths], compat="no_conflicts") + # Open OISSTv2 land-sea mask dataset: + ds_mask = xr.open_dataset("http://psl.noaa.gov/thredds/dodsC/Datasets/noaa.oisst.v2.highres/lsmask.oisst.nc", decode_times=False) + ds_mask = ds_mask.squeeze(drop=True).rename({"lon": "longitude", "lat": "latitude", "lsmask": "mask"}) + ds_mask = ds_mask.assign_coords( + longitude=((ds_mask["longitude"] + 180) % 360) - 180 + ) + + # Standardise coordinate dimension names: + ds = ds.rename({"lon": "longitude", "lat": "latitude", "time": "month"}) + + # Update longitude coordinates to be in the range [-180, 180]: + ds = ds.assign_coords( + longitude=((ds["longitude"] + 180) % 360) - 180 + ) + ds = ds.sortby("longitude") + + # Add month of year coordinate (1-12): + ds = ds.assign_coords( + month=np.arange(1, 13) + ) + + # Rename variables to standard names: + ds = ds.rename({"sst": "tos", + "icec": "siconc", + "climatology_bounds": "time_bnds", + }) + + # Add standard names and units: + ds["tos"].attrs["standard_name"] = "sea_surface_temperature" + ds["siconc"].attrs["standard_name"] = "sea_ice_area_fraction" + ds["siconc"].attrs["units"] = "1" + + # Add OISSTv2 land mask: + ds["mask"] = ds_mask["mask"] + ds["mask"].attrs.clear() + ds["mask"] = ds["mask"].assign_attrs({'long_name': "Land-Sea Binary Mask", + "standard_name": "sea_binary_mask", + "comment": "1 = sea, 0 = land" + }) + + # Add horizontal grid cell area: + ds['dx'] = compute_dx(ds) + ds['dy'] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Update time bounds to reflect climatological period: + ds['time_bnds'] = ds['time_bnds'].astype('datetime64[ns]') + ds['time_bnds'].data[:, 0] = (np.datetime64(f'{start_yr}-01', 'M') + (np.timedelta64(1, 'M') * np.arange(ds['month'].size))).astype('datetime64[ns]') + ds['time_bnds'].data[:, 1] = (np.datetime64(f'{end_yr}-01', 'M') + (np.timedelta64(1, 'M') * np.arange(ds['month'].size))).astype('datetime64[ns]') + ds.time_bnds.attrs.clear() + + # Update global attributes: + ds.attrs.clear() + ds = ds.assign_attrs({ + "Conventions": "CF-1.5", + "title": f"NOAA OISSTv2.1 Monthly Climatology ({start_yr}-{end_yr})", + "description": f"NOAA 1/4° Monthly Optimum Interpolation Sea Surface Temperature (OISST) version 2.1 monthly sea surface temperature and sea ice fraction climatology ({start_yr}-{end_yr}).", + "source": "Numerical models: Optimal Interpolation. In-situ observations: ICOADS-D R3.0.2, Argo GDAC. Satellite observations: Advanced Very High Resolution Radiometer (AVHRR).", + "dataset_type": "observation", + "product_type": "climatology", + "product_version": "2.1", + "institution": "NOAA National Centers for Environmental Information (NCEI)", + "citation": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2021: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1", + "references": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2020: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1. Banzon, V., Smith, T. M., Chin, T. M., Liu, C., and Hankins, W., 2016: A long-term record of blended satellite and in situ sea-surface temperature for climate monitoring, modeling and environmental studies. Earth Syst. Sci. Data, 8, 165-176, doi:10.5194/essd-8-165-2016. Reynolds, R. W., T. M. Smith, C. Liu, D. B. Chelton, K. S. Casey, and M. G. Schlax, 2007: Daily high-resolution-blended analyses for sea surface temperature. Journal of Climate, 20, 5473-5496, doi:10.1175/JCLI-D-14-00293.1", + "acknowledgement": "NOAA OI SST V2 High Resolution Dataset data provided by the NOAA PSL, Boulder, Colorado, USA, from their website at https://psl.noaa.gov.", + "license": "OISST v2.1 data were obtained from https://psl.noaa.gov/data/gridded/data.noaa.oisst.v2.highres.html and are provided under a Creative Commons CC0 1.0 Universal License https://creativecommons.org/publicdomain/zero/1.0/", + "doi": "10.1175/JCLI-D-20-0166.1", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "0.25 degree", + "aggregation": "mean", + "aggregation_frequency": "monthly", + "status": "completed", + "update_frequency": "None", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Optimise chunk sizes for spatial analysis: + ds = ds.chunk({'month': 1, 'latitude': 720, 'longitude': 1440}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + # Define prefix and commit message based on climatology period: + prefix = f"oisst_v2.1_{start_yr}_{end_yr}_monthly_climatology" + commit_message = f"Added OISSTv2.1 Sea Surface Temperature Climatology ({start_yr}-{end_yr})." + + send_to_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + exists=exists, + append_dim='month', + branch=branch, + commit_message=commit_message, + variable_commits=variable_commits, + dask_config_kwargs=None, + dask_cluster_kwargs=None, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py b/OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py new file mode 100755 index 00000000..901bd607 --- /dev/null +++ b/OceanDataStore/data/OISST/send_OISSTv2_monthly_to_os.py @@ -0,0 +1,145 @@ +# ========================================================= +# send_OISSTv2_monthly_climatology_to_os.py +# +# Script to write OISST v2.1 long-term monthly climatologies +# to Icechunk repositories in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import numpy as np +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, send_to_icechunk +from OceanDataStore.data.utils import ( + compute_cell_area, + compute_dx, + compute_dy, +) + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Send to Icechunk Repository ========== # + bucket = "oisst" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + variable_commits = True + + logging.info("In Progress: Sending OISSTv2.1 monthly mean time series to Icechunk...") + # Open OISSTv2 dataset: + filepaths = ["/dssgfs01/scratch/otooth/npd_data/observations/OISST/icec.mon.mean.nc", + "/dssgfs01/scratch/otooth/npd_data/observations/OISST/sst.mon.mean.nc" + ] + ds = xr.merge([xr.open_dataset(filepath, decode_times=False) for filepath in filepaths], compat="no_conflicts") + # Open OISSTv2 land-sea mask dataset: + ds_mask = xr.open_dataset("http://psl.noaa.gov/thredds/dodsC/Datasets/noaa.oisst.v2.highres/lsmask.oisst.nc", decode_times=False) + ds_mask = ds_mask.squeeze(drop=True).rename({"lon": "longitude", "lat": "latitude", "lsmask": "mask"}) + ds_mask = ds_mask.assign_coords( + longitude=((ds_mask["longitude"] + 180) % 360) - 180 + ) + + # Standardise coordinate dimension names: + ds = ds.rename({"lon": "longitude", "lat": "latitude"}) + + # Update longitude coordinates to be in the range [-180, 180]: + ds = ds.assign_coords( + longitude=((ds["longitude"] + 180) % 360) - 180 + ) + ds = ds.sortby("longitude") + + # Update time coordinate to be datetime objects: + ds = ds.assign_coords(time=(np.datetime64('1800-01-01', 'D') + (np.timedelta64(1, 'D') * ds['time'])).astype('datetime64[ns]')) + ds['time'].attrs.pop('units', None) + + # Rename variables to standard names: + ds = ds.rename({"sst": "tos", + "icec": "siconc", + }) + + # Add standard names and units: + ds["tos"].attrs["standard_name"] = "sea_surface_temperature" + ds["siconc"].attrs["standard_name"] = "sea_ice_area_fraction" + ds["siconc"].attrs["units"] = "1" + + # Add OISSTv2 land mask: + ds["mask"] = ds_mask["mask"] + ds["mask"].attrs.clear() + ds["mask"] = ds["mask"].assign_attrs({'long_name': "Land-Sea Binary Mask", + "standard_name": "sea_binary_mask", + "comment": "1 = sea, 0 = land" + }) + + # Add horizontal grid cell area: + ds["dx"] = compute_dx(ds) + ds["dy"] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Add Northern and Southern Hemisphere sea ice area timeseries: + ds['siarea_NH'] = (ds['siconc'].where(ds['latitude'] > 0) * ds['cell_area']).sum(dim=['latitude', 'longitude']) + ds['siarea_NH'].attrs = {'long_name': 'Total Northern Hemisphere Sea Ice Area', 'standard_name': 'sea_ice_area', 'units': 'm2'} + + ds['siarea_SH'] = (ds['siconc'].where(ds['latitude'] < 0) * ds['cell_area']).sum(dim=['latitude', 'longitude']) + ds['siarea_SH'].attrs = {'long_name': 'Total Southern Hemisphere Sea Ice Area', 'standard_name': 'sea_ice_area', 'units': 'm2'} + + # Update global attributes: + ds.attrs.clear() + ds = ds.assign_attrs({ + "Conventions": "CF-1.5", + "title": "NOAA OISSTv2.1 Monthly Timeseries", + "description": "NOAA 1/4° Monthly Optimum Interpolation Sea Surface Temperature (OISST) version 2.1 monthly sea surface temperature and sea ice fraction timeseries.", + "source": "Numerical models: Optimal Interpolation. In-situ observations: ICOADS-D R3.0.2, Argo GDAC. Satellite observations: Advanced Very High Resolution Radiometer (AVHRR).", + "dataset_type": "observation", + "product_type": "timeseries", + "product_version": "2.1", + "institution": "NOAA National Centers for Environmental Information (NCEI)", + "citation": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2021: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1", + "references": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2020: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1. Banzon, V., Smith, T. M., Chin, T. M., Liu, C., and Hankins, W., 2016: A long-term record of blended satellite and in situ sea-surface temperature for climate monitoring, modeling and environmental studies. Earth Syst. Sci. Data, 8, 165-176, doi:10.5194/essd-8-165-2016. Reynolds, R. W., T. M. Smith, C. Liu, D. B. Chelton, K. S. Casey, and M. G. Schlax, 2007: Daily high-resolution-blended analyses for sea surface temperature. Journal of Climate, 20, 5473-5496, doi:10.1175/JCLI-D-14-00293.1", + "acknowledgement": "NOAA OI SST V2 High Resolution Dataset data provided by the NOAA PSL, Boulder, Colorado, USA, from their website at https://psl.noaa.gov.", + "license": "OISST v2.1 data were obtained from https://psl.noaa.gov/data/gridded/data.noaa.oisst.v2.highres.html and are provided under a Creative Commons CC0 1.0 Universal License https://creativecommons.org/publicdomain/zero/1.0/", + "doi": "10.1175/JCLI-D-20-0166.1", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "0.25 degree", + "aggregation": "mean", + "aggregation_frequency": "monthly", + "status": "ongoing", + "update_frequency": "quarterly", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Optimise chunk sizes for spatial analysis: + ds = ds.chunk({'time': 1, 'latitude': 720, 'longitude': 1440}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + # Define prefix and commit message based on climatology period: + prefix = "oisst_v2.1_monthly" + commit_message = "Added OISSTv2.1 Sea Surface Temperature & Sea Ice Fraction Monthly Timeseries (1981-2026)." + + send_to_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + exists=exists, + append_dim='time', + branch=branch, + commit_message=commit_message, + variable_commits=variable_commits, + dask_config_kwargs=None, + dask_cluster_kwargs=None, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/WOA23/download_WOA23_climatology.sh b/OceanDataStore/data/WOA23/download_WOA23_climatology.sh new file mode 100755 index 00000000..c70bfe20 --- /dev/null +++ b/OceanDataStore/data/WOA23/download_WOA23_climatology.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# ---------------------------------------------------------------- +# download_WOA23_climatology.sh +# +# Description: Download the World Ocean Atlas 2023 30-year climatologies +# using URLs provided by the National Centers for Environmental Information. +# WOA23 provides Climate Normals for temperature and salinity at 0.25 degree +# resolution as 30-year averages for validating models. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-05-27 +# ---------------------------------------------------------------- +set -euo pipefail + +echo "===================================================" +echo " Downloading WOA23 Climatologies" +echo " v0.1.0" +echo " Oliver J. Tooth, NOC" +echo "===================================================" +echo "In Progress: Downloading WOA23 Upper 1500m Climatologies..." +for i in {00..16}; do echo $i; wget https://www.ncei.noaa.gov/data/oceans/woa/WOA23/DATA/temperature/netcdf/decav71A0/0.25/woa23_decav71A0_t${i}_04.nc ; done + +for i in {00..16}; do echo $i; wget https://www.ncei.noaa.gov/data/oceans/woa/WOA23/DATA/temperature/netcdf/decav81B0/0.25/woa23_decav81B0_t${i}_04.nc ; done + +for i in {00..16}; do echo $i; wget https://www.ncei.noaa.gov/data/oceans/woa/WOA23/DATA/temperature/netcdf/decav91C0/0.25/woa23_decav91C0_t${i}_04.nc ; done +echo "Completed: Downloaded Upper 1500m WOA23 temperature climatologies" + +echo "In Progress: Downloading WOA23 Upper 1500m Salinity Climatologies..." +for i in {00..16}; do echo $i; wget https://www.ncei.noaa.gov/data/oceans/woa/WOA23/DATA/salinity/netcdf/decav71A0/0.25/woa23_decav71A0_s${i}_04.nc ; done + +for i in {00..16}; do echo $i; wget https://www.ncei.noaa.gov/data/oceans/woa/WOA23/DATA/salinity/netcdf/decav81B0/0.25/woa23_decav81B0_s${i}_04.nc ; done + +for i in {00..16}; do echo $i; wget https://www.ncei.noaa.gov/data/oceans/woa/WOA23/DATA/salinity/netcdf/decav91C0/0.25/woa23_decav91C0_s${i}_04.nc ; done +echo "Completed: Downloaded Upper 1500m WOA23 salinity climatologies" + +# Winter average of full time period, covering full ocean depth: +echo "In Progress: Downloading WOA23 Full Depth Climatologies..." +wget https://www.ncei.noaa.gov/data/oceans/woa/WOA23/DATA/temperature/netcdf/decav/0.25/woa23_decav_t13_04.nc +wget https://www.ncei.noaa.gov/data/oceans/woa/WOA23/DATA/salinity/netcdf/decav/0.25/woa23_decav_s13_04.nc +echo "Completed: Downloaded WOA23 Full Depth Climatologies" diff --git a/OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm b/OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm new file mode 100755 index 00000000..b9847e06 --- /dev/null +++ b/OceanDataStore/data/WOA23/run_send_WOA23_annual_climatology_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=woa23_annual_climatology +#SBATCH --partition=compute +#SBATCH --time=01:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_send_WOA23_annual_climatology_to_os.slurm +# +# Description: SLURM script to send the WOA23 Annual Climatology +# datasets to Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-05 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source .../miniforge3/bin/activate +conda activate env_ods + +# -- Send WOA23 annual climatology to JASMIN OS -- # +echo "In Progress: Sending WOA23 Annual Climatologies to Icechunk..." + +python3 send_WOA23_annual_climatology_to_os.py + +echo "Completed: Sent WOA23 Annual Climatologies to Icechunk." diff --git a/OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm b/OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm new file mode 100755 index 00000000..78d3f1bd --- /dev/null +++ b/OceanDataStore/data/WOA23/run_send_WOA23_monthly_climatology_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=woa23_monthly_climatology +#SBATCH --partition=compute +#SBATCH --time=01:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_send_WOA23_monthly_climatology_to_os.slurm +# +# Description: SLURM script to send the WOA23 Monthly Climatology +# datasets to Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-06 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source .../miniforge3/bin/activate +conda activate env_ods + +# -- Send WOA23 monthly climatology to JASMIN OS -- # +echo "In Progress: Sending WOA23 Monthly Climatologies to Icechunk..." + +python3 send_WOA23_monthly_climatology_to_os.py + +echo "Completed: Sent WOA23 Monthly Climatologies to Icechunk." diff --git a/OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py b/OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py new file mode 100755 index 00000000..44ef3efa --- /dev/null +++ b/OceanDataStore/data/WOA23/send_WOA23_annual_climatology_to_os.py @@ -0,0 +1,263 @@ +# ========================================================= +# send_WOA23_annual_climatology_to_os.py +# +# Script to write WOA23 annual climatologies to Icechunk +# repository in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import numpy as np +import pandas as pd +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, send_to_icechunk +from OceanDataStore.data.utils import ( + compute_cell_area, + compute_dx, + compute_dy, +) + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + logging.info("In Progress: Sending WOA23 Annual Climatologies to JASMIN Object Store.") + + # ========== Prepare Data ========== # + filedir = "/dssgfs01/scratch/otooth/npd_data/observations/WOA2023" + # Define file paths for WOA23 annual climatologies: + salinity_paths = [f"{filedir}/woa23_decav71A0_s00_04.nc", + f"{filedir}/woa23_decav81B0_s00_04.nc", + f"{filedir}/woa23_decav91C0_s00_04.nc", + ] + temperature_paths = [f"{filedir}/woa23_decav71A0_t00_04.nc", + f"{filedir}/woa23_decav81B0_t00_04.nc", + f"{filedir}/woa23_decav91C0_t00_04.nc" + ] + + # Define year bounds for each WOA23 annual climatology: + year_bound_start = [1971, 1981, 1991] + year_bound_end = [2000, 2010, 2020] + + for temp_path, sal_path, start_year, end_year in zip(temperature_paths, salinity_paths, year_bound_start, year_bound_end): + logging.info(f"-> In Progress: Preparing WOA23 {start_year}-{end_year} annual climatology...") + # Open WOA23 annual climatologies dataset: + ds_s = xr.open_dataset(sal_path, decode_times=False) + ds_t = xr.open_dataset(temp_path, decode_times=False) + + ds = xr.merge([ds_s, ds_t], compat='no_conflicts').squeeze(drop=True) + ds['climatology_bounds'].values = np.array([np.datetime64(f'{start_year}-01-01', 'ns'), np.datetime64(f'{end_year}-12-31', 'ns')]) + + ds = ds.rename({"lon": "longitude", + "lat": "latitude", + "climatology_bounds": "time_bnds" + }) + + # Use OceanDataStore standard variable names: + for var in ds.data_vars: + if var.startswith("t_"): + ds = ds.rename({var: var.replace("t_", "thetao_")}) + elif var.startswith("s_"): + ds = ds.rename({var: var.replace("s_", "so_")}) + + # Add ancillary variables: + ds['dx'] = compute_dx(ds) + ds['dy'] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Custom ancillary variables: + ds['cell_thickness'] = ds['depth_bnds'].isel(nbounds=1) - ds['depth_bnds'].isel(nbounds=0) + ds['cell_volume'] = ds['cell_thickness'] * ds['cell_area'] + + # Update attributes for custom ancillary variables: + ds['cell_thickness'].attrs.update({ + 'long_name': "Grid-Cell Thickness", + 'standard_name': "cell_thickness", + 'units': "m", + }) + ds['cell_volume'].attrs.update({ + 'long_name': "Grid-Cell Volume", + 'standard_name': "cell_volume", + 'units': "m3", + }) + logging.info(f"Completed: Prepared WOA23 {start_year}-{end_year} annual climatology dataset with ancillary variables.") + + # ========== Prepare Ancillary Data ========== # + # Open WOA23 land sea mask: + df_mask = pd.read_table("/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/WOA23/data/landsea_04.msk", + delimiter=',', + header=0 + ) + # Define level of sea floor (i.e. bottom standard level) at each grid cell: + ds['bottom_level'] = xr.full_like(ds['cell_area'], fill_value=np.nan).squeeze(drop=True) + ds['bottom_level'].data = df_mask['Bottom_Standard_Level'].values.reshape(720, 1440) + ds['bottom_level'].name = "bottom_level" + ds['bottom_level'].attrs = {"standard_name": "model_level_number_at_sea_floor", + "long_name": "Model Level Number at Sea Floor", + "units": "1"} + # Define land sea mask (1 for ocean grid cells, 0 for land grid cells): + ds['mask'] = ds['bottom_level'] > 1 + ds['mask'].name = "land_binary_mask" + ds["mask"] = ds["mask"].assign_attrs({'long_name': "Land-Sea Binary Mask", + "standard_name": "sea_binary_mask", + "comment": "1 = sea, 0 = land" + }) + logging.info("Completed: Prepared land sea mask & bottom level variables.") + + # Open WOA23 basin mask: + df_basin = pd.read_table("/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/WOA23/data/basinmask_04.msk", + delimiter=',', + header=0 + ) + # Define basin mask (integer values for each ocean basin, NaN for land grid cells): + ds['basin_mask'] = xr.full_like(ds['cell_area'], fill_value=np.nan).squeeze(drop=True) + for basin, longitude, latitude in df_basin[["Basin_0m", "Longitude", "Latitude"]].itertuples(index=False): + ds['basin_mask'].loc[dict(latitude=latitude, longitude=longitude)] = basin + + ds['basin_mask'].attrs = { + "standard_name": "ocean_basin_mask", + "long_name": "Ocean Basin Mask", + "basin_name": { + 1: "Atlantic Ocean", + 2: "Pacific Ocean", + 3: "Indian Ocean", + 4: "Mediterranean Sea", + 5: "Baltic Sea", + 6: "Black Sea", + 7: "Red Sea", + 8: "Persian Gulf", + 9: "Hudson Bay", + 10: "Southern Ocean", + 11: "Arctic Ocean", + 12: "Sea of Japan", + 13: "Kara Sea", + 14: "Sulu Sea", + 15: "Baffin Bay", + 16: "East Mediterranean", + 17: "West Mediterranean", + 18: "Sea of Okhotsk", + 19: "Banda Sea", + 20: "Caribbean Sea", + 21: "Andaman Basin", + 22: "North Caribbean", + 23: "Gulf of Mexico", + 24: "Beaufort Sea", + 25: "South China Sea", + 26: "Barents Sea", + 27: "Celebes Sea", + 28: "Aleutian Basin", + 29: "Fiji Basin", + 30: "North American Basin", + 31: "West European Basin", + 32: "Southeast Indian Basin", + 33: "Coral Sea", + 34: "East Indian Basin", + 35: "Central Indian Basin", + 36: "Southwest Atlantic Basin", + 37: "Southeast Atlantic Basin", + 38: "Southeast Pacific Basin", + 39: "Guatemala Basin", + 40: "East Caroline Basin", + 41: "Marianas Basin", + 42: "Philippine Sea", + 43: "Arabian Sea", + 44: "Chile Basin", + 45: "Somali Basin", + 46: "Mascarene Basin", + 47: "Crozet Basin", + 48: "Guinea Basin", + 49: "Brazil Basin", + 50: "Argentine Basin", + 51: "Tasman Sea", + 52: "Atlantic Indian Basin", + 53: "Caspian Sea", + 54: "Sulu Sea II", + 55: "Venezuela Basin", + 56: "Bay of Bengal", + 57: "Java Sea", + 58: "East Indian Atlantic Basin", + 59: "Chiloe", + 60: "Bransfield Strait", + } + } + logging.info("Completed: Prepared ocean basin mask variable.") + + # ========== CF Attributes ========== # + ds.attrs.clear() + ds = ds.assign_attrs({ + "Conventions": "CF-1.6", + "title": f"World Ocean Atlas 2023 temperature and salinity annual climatology ({start_year}-{end_year}).", + "description": f"World Ocean Atlas 2023 (WOA23) temperature and salinity annual climatology for the global ocean from objectively analysed, quality controlled in-situ profile data ({start_year}-{end_year}).", + "source": "Numerical models: Objective Analysis. In-situ observations: World Ocean Database (WOD).", + "dataset_type": "observation", + "product_type": "climatology", + "product_version": "1.0", + "institution": "NOAA National Centers for Environmental Information (NCEI)", + "citation": "Reagan, James R.; Boyer, Tim P.; García, Hernán E.; Locarnini, Ricardo A.; Baranova, Olga K.; Bouchard, Courtney; Cross, Scott L.; Mishonov, Alexey V.; Paver, Christopher R.; Seidov, Dan; Wang, Zhankun; Dukhovskoy, Dmitry (2023). World Ocean Atlas 2023 (NCEI Accession 0270533). https://www.ncei.noaa.gov/archive/accession/0270533. In Reagan, James R.; Boyer, Tim P.; García, Hernán E.; Locarnini, Ricardo A.; Baranova, Olga K.; Bouchard, Courtney; Cross, Scott L.; Mishonov, Alexey V.; Paver, Christopher R.; Seidov, Dan; Wang, Zhankun; Dukhovskoy, Dmitry (2023). World Ocean Atlas 2023. NOAA National Centers for Environmental Information. Dataset. https://doi.org/10.25921/va26-hv25. Accessed 06-05-2026.", + "references": "Locarnini, R.A., A.V. Mishonov, O.K. Baranova, J.R. Reagan, T.P. Boyer, D. Seidov, Z. Wang, H.E. Garcia, C. Bouchard, S.L. Cross, C.R. Paver, and D. Dukhovskoy (2024). World Ocean Atlas 2023, Volume 1: Temperature. A. Mishonov Technical Editor, NOAA Atlas NESDIS 89. https://doi.org/10.25923/54bh-1613. Reagan, J.R., D. Seidov, Z. Wang, D. Dukhovskoy, T.P. Boyer, R.A. Locarnini, O.K. Baranova, A.V. Mishonov, H.E. Garcia, C. Bouchard, S.L. Cross, and C.R. Paver (2023). World Ocean Atlas 2023, Volume 2: Salinity. A. Mishonov Technical Editor, NOAA Atlas NESDIS 90, https://doi.org/10.25923/70qt-9574.", + "acknowledgement": "None", + "license": "World Ocean Atlas 2023 data were obtained from https://www.ncei.noaa.gov/access/world-ocean-atlas-2023/ and are provided under a Creative Commons CC0 1.0 Universal License https://creativecommons.org/publicdomain/zero/1.0/", + "doi": "https://doi.org/10.25921/va26-hv25", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "0.25 degree", + "vertical_grid_type": "z", + "vertical_grid_coordinate": "depth", + "vertical_grid_levels": 57, + "aggregation": "mean", + "aggregation_frequency": "annual", + "status": "completed", + "update_frequency": "None", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + logging.info(f"Completed: Added CF-compliant global attributes to WOA23 {start_year}-{end_year} annual climatology dataset.") + + # ========== Send to Icechunk Repository ========== # + logging.info(f"In Progress: Sending WOA23 {start_year}-{end_year} annual climatology to Icechunk Repository...") + bucket = "woa23" + prefix = f"woa23_{start_year}_{end_year}_annual_climatology" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + commit_message = f"Added WOA23 {start_year}-{end_year} annual climatology." + variable_commits = True + config_kwargs = { + "temporary_directory":".../OceanDataStore/OceanDataStore/data/WOA23/", + "local_directory":".../OceanDataStore/OceanDataStore/data/WOA23/" + } + cluster_kwargs = { + "n_workers" : 25, + "threads_per_worker" : 1, + "memory_limit":"3GB" + } + + # Optimise chunk sizes for spatial analysis: + ds = ds.chunk({'longitude': 1440, 'latitude': 720, 'depth': 5}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + send_to_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + exists=exists, + branch=branch, + commit_message=commit_message, + variable_commits=variable_commits, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + + logging.info(f"Completed: Sent WOA23 {start_year}-{end_year} annual climatology to Icechunk Repository.") + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py b/OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py new file mode 100755 index 00000000..4f29f12a --- /dev/null +++ b/OceanDataStore/data/WOA23/send_WOA23_monthly_climatology_to_os.py @@ -0,0 +1,292 @@ +# ========================================================= +# send_WOA23_monthly_climatology_to_os.py +# +# Script to write WOA23 monthly climatologies to Icechunk +# repository in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import glob +import logging + +import numpy as np +import pandas as pd +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, send_to_icechunk +from OceanDataStore.data.utils import ( + compute_cell_area, + compute_dx, + compute_dy, +) + +logger = logging.getLogger(__name__) + + +# -- Utility Functions -- # +def get_filepaths(file_prefix:str, file_directory:str) -> list: + """ + Get filepaths for World Ocean Atlas 2023 climate normals + in-situ temperature and practical salinity data. + + Parameters + ---------- + file_prefix : str + Prefix of the WOA23 filepaths to be returned. + file_directory : str + Directory containing the WOA23 files. + + Returns + ------- + filepaths : list + List of WOA23 filepaths for practical salinity & in-situ temperature. + """ + # Define salinity and temperature filenames: + salinity_filename = f"{file_prefix}_s*.nc" + temperature_filename = f"{file_prefix}_t*.nc" + + # Collect filepaths for salinity and temperature data: + filepaths_sal = sorted(glob.glob(f"{file_directory}/{salinity_filename}")) + filepaths_temp = sorted(glob.glob(f"{file_directory}/{temperature_filename}")) + + return filepaths_sal, filepaths_temp + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + logging.info("In Progress: Sending WOA23 Monthly Climatologies to JASMIN Object Store.") + + # ========== Prepare Data ========== # + filedir = "/dssgfs01/scratch/otooth/npd_data/observations/WOA2023" + salinity_paths, temperature_paths = [], [] + for prefix in ['woa23_decav71A0', 'woa23_decav81B0', 'woa23_decav91C0']: + # Define file paths for WOA23 monthly climatologies: + filepaths_sal, filepaths_temp = get_filepaths(file_directory=filedir, file_prefix=prefix) + salinity_paths.append(filepaths_sal[1:13]) + temperature_paths.append(filepaths_temp[1:13]) + + # Define year bounds for each WOA23 monthly climatology: + year_bound_start = [1971, 1981, 1991] + year_bound_end = [2000, 2010, 2020] + + for temp_path, sal_path, start_year, end_year in zip(temperature_paths, salinity_paths, year_bound_start, year_bound_end): + logging.info(f"-> In Progress: Preparing WOA23 {start_year}-{end_year} monthly climatology...") + # Open WOA23 monthly climatologies dataset: + ds_s = xr.open_mfdataset(sal_path, decode_times=False, data_vars='all') + ds_t = xr.open_mfdataset(temp_path, decode_times=False, data_vars='all') + + ds = xr.merge([ds_s, ds_t], compat='no_conflicts').squeeze(drop=True) + ds = ds.rename({'time': 'month'}).assign_coords({'month': np.arange(1, 13)}) + ds['climatology_bounds'].data = np.array([[np.datetime64(f'{start_year}-{month:02d}', 'M'), np.datetime64(f'{end_year}-{month:02d}', 'M')] for month in range(1, 13)]) + + ds = ds.rename({"lon": "longitude", + "lat": "latitude", + "climatology_bounds": "time_bnds" + }) + + # Use OceanDataStore standard variable names: + for var in ds.data_vars: + if var.startswith("t_"): + ds = ds.rename({var: var.replace("t_", "thetao_")}) + elif var.startswith("s_"): + ds = ds.rename({var: var.replace("s_", "so_")}) + + # Add ancillary variables: + ds['dx'] = compute_dx(ds) + ds['dy'] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Custom ancillary variables: + ds['cell_thickness'] = ds['depth_bnds'].isel(nbounds=1) - ds['depth_bnds'].isel(nbounds=0) + ds['cell_volume'] = ds['cell_thickness'] * ds['cell_area'] + + # Update attributes for custom ancillary variables: + ds['cell_thickness'].attrs.update({ + 'long_name': "Grid-Cell Thickness", + 'standard_name': "cell_thickness", + 'units': "m", + }) + ds['cell_volume'].attrs.update({ + 'long_name': "Grid-Cell Volume", + 'standard_name': "cell_volume", + 'units': "m3", + }) + logging.info(f"Completed: Prepared WOA23 {start_year}-{end_year} monthly climatology dataset with ancillary variables.") + + # ========== Prepare Ancillary Data ========== # + # Open WOA23 land sea mask: + df_mask = pd.read_table("/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/WOA23/data/landsea_04.msk", + delimiter=',', + header=0 + ) + # Define level of sea floor (i.e. bottom standard level) at each grid cell: + ds['bottom_level'] = xr.full_like(ds['cell_area'], fill_value=np.nan).squeeze(drop=True) + ds['bottom_level'].data = df_mask['Bottom_Standard_Level'].values.reshape(720, 1440) + ds['bottom_level'].name = "bottom_level" + ds['bottom_level'].attrs = {"standard_name": "model_level_number_at_sea_floor", + "long_name": "Model Level Number at Sea Floor", + "units": "1" + } + # Define land sea mask (1 for ocean grid cells, 0 for land grid cells): + ds['mask'] = ds['bottom_level'] > 1 + ds['mask'].name = "sea_binary_mask" + ds["mask"] = ds["mask"].assign_attrs({'long_name': "Land-Sea Binary Mask", + "standard_name": "sea_binary_mask", + "comment": "1 = sea, 0 = land" + }) + logging.info("Completed: Prepared land sea mask & bottom level variables.") + + # Open WOA23 basin mask: + df_basin = pd.read_table("/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/WOA23/data/basinmask_04.msk", + delimiter=',', + header=0 + ) + # Define basin mask (integer values for each ocean basin, NaN for land grid cells): + ds['basin_mask'] = xr.full_like(ds['cell_area'], fill_value=np.nan).squeeze(drop=True) + for basin, longitude, latitude in df_basin[["Basin_0m", "Longitude", "Latitude"]].itertuples(index=False): + ds['basin_mask'].loc[dict(latitude=latitude, longitude=longitude)] = basin + + ds['basin_mask'].attrs = { + "standard_name": "ocean_basin_mask", + "long_name": "Ocean Basin Mask", + "basin_name": { + 1: "Atlantic Ocean", + 2: "Pacific Ocean", + 3: "Indian Ocean", + 4: "Mediterranean Sea", + 5: "Baltic Sea", + 6: "Black Sea", + 7: "Red Sea", + 8: "Persian Gulf", + 9: "Hudson Bay", + 10: "Southern Ocean", + 11: "Arctic Ocean", + 12: "Sea of Japan", + 13: "Kara Sea", + 14: "Sulu Sea", + 15: "Baffin Bay", + 16: "East Mediterranean", + 17: "West Mediterranean", + 18: "Sea of Okhotsk", + 19: "Banda Sea", + 20: "Caribbean Sea", + 21: "Andaman Basin", + 22: "North Caribbean", + 23: "Gulf of Mexico", + 24: "Beaufort Sea", + 25: "South China Sea", + 26: "Barents Sea", + 27: "Celebes Sea", + 28: "Aleutian Basin", + 29: "Fiji Basin", + 30: "North American Basin", + 31: "West European Basin", + 32: "Southeast Indian Basin", + 33: "Coral Sea", + 34: "East Indian Basin", + 35: "Central Indian Basin", + 36: "Southwest Atlantic Basin", + 37: "Southeast Atlantic Basin", + 38: "Southeast Pacific Basin", + 39: "Guatemala Basin", + 40: "East Caroline Basin", + 41: "Marianas Basin", + 42: "Philippine Sea", + 43: "Arabian Sea", + 44: "Chile Basin", + 45: "Somali Basin", + 46: "Mascarene Basin", + 47: "Crozet Basin", + 48: "Guinea Basin", + 49: "Brazil Basin", + 50: "Argentine Basin", + 51: "Tasman Sea", + 52: "Atlantic Indian Basin", + 53: "Caspian Sea", + 54: "Sulu Sea II", + 55: "Venezuela Basin", + 56: "Bay of Bengal", + 57: "Java Sea", + 58: "East Indian Atlantic Basin", + 59: "Chiloe", + 60: "Bransfield Strait", + } + } + logging.info("Completed: Prepared ocean basin mask variable.") + + # ========== CF Attributes ========== # + ds.attrs.clear() + ds = ds.assign_attrs({ + "Conventions": "CF-1.6", + "title": f"World Ocean Atlas 2023 temperature and salinity monthly climatology ({start_year}-{end_year}).", + "description": f"World Ocean Atlas 2023 (WOA23) temperature and salinity monthly climatology for the global ocean from objectively analysed, quality controlled in-situ profile data ({start_year}-{end_year}).", + "source": "Numerical models: Objective Analysis. In-situ observations: World Ocean Database (WOD).", + "dataset_type": "observation", + "product_type": "climatology", + "product_version": "1.0", + "institution": "NOAA National Centers for Environmental Information (NCEI)", + "citation": "Reagan, James R.; Boyer, Tim P.; García, Hernán E.; Locarnini, Ricardo A.; Baranova, Olga K.; Bouchard, Courtney; Cross, Scott L.; Mishonov, Alexey V.; Paver, Christopher R.; Seidov, Dan; Wang, Zhankun; Dukhovskoy, Dmitry (2023). World Ocean Atlas 2023 (NCEI Accession 0270533). https://www.ncei.noaa.gov/archive/accession/0270533. In Reagan, James R.; Boyer, Tim P.; García, Hernán E.; Locarnini, Ricardo A.; Baranova, Olga K.; Bouchard, Courtney; Cross, Scott L.; Mishonov, Alexey V.; Paver, Christopher R.; Seidov, Dan; Wang, Zhankun; Dukhovskoy, Dmitry (2023). World Ocean Atlas 2023. NOAA National Centers for Environmental Information. Dataset. https://doi.org/10.25921/va26-hv25. Accessed 06-05-2026.", + "references": "Locarnini, R.A., A.V. Mishonov, O.K. Baranova, J.R. Reagan, T.P. Boyer, D. Seidov, Z. Wang, H.E. Garcia, C. Bouchard, S.L. Cross, C.R. Paver, and D. Dukhovskoy (2024). World Ocean Atlas 2023, Volume 1: Temperature. A. Mishonov Technical Editor, NOAA Atlas NESDIS 89. https://doi.org/10.25923/54bh-1613. Reagan, J.R., D. Seidov, Z. Wang, D. Dukhovskoy, T.P. Boyer, R.A. Locarnini, O.K. Baranova, A.V. Mishonov, H.E. Garcia, C. Bouchard, S.L. Cross, and C.R. Paver (2023). World Ocean Atlas 2023, Volume 2: Salinity. A. Mishonov Technical Editor, NOAA Atlas NESDIS 90, https://doi.org/10.25923/70qt-9574.", + "acknowledgement": "None", + "license": "World Ocean Atlas 2023 data were obtained from https://www.ncei.noaa.gov/access/world-ocean-atlas-2023/ and are provided under a Creative Commons CC0 1.0 Universal License https://creativecommons.org/publicdomain/zero/1.0/", + "doi": "https://doi.org/10.25921/va26-hv25", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "0.25 degree", + "vertical_grid_type": "z", + "vertical_grid_coordinate": "depth", + "vertical_grid_levels": 57, + "aggregation": "mean", + "aggregation_frequency": "monthly", + "status": "completed", + "update_frequency": "None", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + logging.info(f"Completed: Added CF-compliant global attributes to WOA23 {start_year}-{end_year} monthly climatology dataset.") + + # ========== Send to Icechunk Repository ========== # + logging.info(f"In Progress: Sending WOA23 {start_year}-{end_year} monthly climatology to Icechunk Repository...") + bucket = "woa23" + prefix = f"woa23_{start_year}_{end_year}_monthly_climatology" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + commit_message = f"Added WOA23 {start_year}-{end_year} monthly climatology." + variable_commits = True + config_kwargs = { + "temporary_directory":".../OceanDataStore/OceanDataStore/data/WOA23/", + "local_directory":".../OceanDataStore/OceanDataStore/data/WOA23/" + } + cluster_kwargs = { + "n_workers" : 25, + "threads_per_worker" : 1, + "memory_limit":"3GB" + } + + # Optimise chunk sizes for spatial analysis: + ds = ds.chunk({'longitude': 1440, 'latitude': 720, 'depth': 5, 'month': 1}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + send_to_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + exists=exists, + branch=branch, + commit_message=commit_message, + variable_commits=variable_commits, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + + logging.info(f"Completed: Sent WOA23 {start_year}-{end_year} monthly climatology to Icechunk Repository.") + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/utils.py b/OceanDataStore/data/utils.py new file mode 100644 index 00000000..37adc0f4 --- /dev/null +++ b/OceanDataStore/data/utils.py @@ -0,0 +1,506 @@ +""" +utils.py + +Description: Utility functions for processing gridded ocean data. + +Contact: Ollie Tooth (oliver.tooth@noc.ac.uk) +""" +# == Import Python packages == # +import json + +import icechunk +import numpy as np +import xarray as xr +import zarr + + +# == Utility Functions == # +def compute_gc_distance( + lat1: xr.DataArray, + lon1: xr.DataArray, + lat2: xr.DataArray, + lon2: xr.DataArray +) -> xr.DataArray: + """ + Calculate the Great-Circle distance between two sets of + geographical points on the Earth's surface. + + Parameters: + ----------- + lat1 : xr.DataArray + Latitude of the first set of points (degrees). + lon1 : xr.DataArray + Longitude of the first set of points (degrees). + lat2 : xr.DataArray + Latitude of the second set of points (degrees). + lon2 : xr.DataArray + Longitude of the second set of points (degrees). + + Returns: + -------- + dist : xr.DataArray + Great-circle distance between the two sets + of points (meters). + + """ + # Define the radius of the Earth in meters: + re = 6371000 + + # Convert latitudes and longitudes from degrees to radians: + lon1, lat1, lon2, lat2 = map(np.deg2rad, [lon1, lat1, lon2, lat2]) + dlat = lat2 - lat1 + dlon = lon2 - lon1 + + # Calculate the great-circle distance between points: + dist = (2*re*np.arcsin(np.sqrt( + np.sin(dlat/2)**2 + + (np.cos(lat1) * + np.cos(lat2) * + np.sin(dlon/2)**2) + ))) + + return dist + + +def compute_dx( + ds: xr.Dataset, + ) -> xr.DataArray: + """ + Calculate zonal length of each grid cell in meters. + + The length is calculated using the latitude and longitude coordinates + of the input dataset assuming a uniform regular grid. + + Parameters: + ----------- + ds : xr.Dataset + Input dataset containing 'latitude' and 'longitude' coordinates. + + Returns: + -------- + xr.DataArray + DataArray representing the zonal length of each grid cell. + """ + # -- Validate Input -- # + if not isinstance(ds, xr.Dataset): + raise TypeError("Input must be an xarray Dataset.") + if 'latitude' not in ds.coords or 'longitude' not in ds.coords: + raise ValueError("Input dataset must contain 'latitude' and 'longitude' coordinates.") + + # -- Calculate Grid Cell Length -- # + # Infer horizontal resolution for uniform grid: + dlon = ds['longitude'].diff(dim="longitude").mean().values + + if (ds['longitude'].ndim == 1) and (ds['latitude'].ndim == 1): + # Define 2-dimensional longitude and latitude arrays for grid cell centers: + lon = np.repeat(ds['longitude'].values[np.newaxis, :], len(ds['latitude']), axis=0) + lat = np.repeat(ds['latitude'].values[:, np.newaxis], len(ds['longitude']), axis=1) + else: + # Use existing 2-dimensional longitude and latitude arrays: + lon = ds['longitude'].values + lat = ds['latitude'].values + + # Calculate zonal and meridional grid cell dimensions: + dx = compute_gc_distance(lon1=lon - dlon / 2, lat1=lat, lon2=lon + dlon / 2, lat2=lat) + + # Define dx DataArray with CF-compliant metadata: + dx = xr.DataArray( + data=dx, + dims=('latitude', 'longitude'), + coords={'latitude': ds['latitude'], 'longitude': ds['longitude']}, + name='dx', + attrs={ + 'long_name': 'Grid-Cell Zonal Length', + 'standard_name': 'cell_x_length', + 'units': 'm', + }, + ) + + return dx + + +def compute_dy( + ds: xr.Dataset, + ) -> xr.DataArray: + """ + Calculate meridional length of each grid cell in meters. + + The length is calculated using the latitude and longitude coordinates + of the input dataset assuming a uniform regular grid. + + Parameters: + ----------- + ds : xr.Dataset + Input dataset containing 'latitude' and 'longitude' coordinates. + + Returns: + -------- + xr.DataArray + DataArray representing the meridional length of each grid cell. + """ + # -- Validate Input -- # + if not isinstance(ds, xr.Dataset): + raise TypeError("Input must be an xarray Dataset.") + if 'latitude' not in ds.coords or 'longitude' not in ds.coords: + raise ValueError("Input dataset must contain 'latitude' and 'longitude' coordinates.") + + # -- Calculate Grid Cell Length -- # + # Infer horizontal resolution for uniform grid: + dlat = ds['latitude'].diff(dim="latitude").mean().values + + if (ds['longitude'].ndim == 1) and (ds['latitude'].ndim == 1): + # Define 2-dimensional longitude and latitude arrays for grid cell centers: + lon = np.repeat(ds['longitude'].values[np.newaxis, :], len(ds['latitude']), axis=0) + lat = np.repeat(ds['latitude'].values[:, np.newaxis], len(ds['longitude']), axis=1) + else: + # Use existing 2-dimensional longitude and latitude arrays: + lon = ds['longitude'].values + lat = ds['latitude'].values + + # Calculate zonal and meridional grid cell dimensions: + dy = compute_gc_distance(lon1=lon, lat1=lat - dlat / 2, lon2=lon, lat2=lat + dlat / 2) + + # Define dy DataArray with CF-compliant metadata: + dy = xr.DataArray( + data=dy, + dims=('latitude', 'longitude'), + coords={'latitude': ds['latitude'], 'longitude': ds['longitude']}, + name='dy', + attrs={ + 'long_name': 'Grid-Cell Meridional Length', + 'standard_name': 'cell_y_length', + 'units': 'm', + }, + ) + + return dy + + +def compute_cell_area( + ds: xr.Dataset, + ) -> xr.DataArray: + """ + Calculate horizontal area of each grid cell in square meters. + + The area is calculated using the latitude and longitude coordinates + of the input dataset assuming a uniform regular grid. + + Parameters: + ----------- + ds : xr.Dataset + Input dataset containing 'latitude' and 'longitude' coordinates. + + Returns: + -------- + xr.DataArray + DataArray representing the horizontal area of each grid cell. + """ + # -- Validate Input -- # + if not isinstance(ds, xr.Dataset): + raise TypeError("Input must be an xarray Dataset.") + if 'latitude' not in ds.coords or 'longitude' not in ds.coords: + raise ValueError("Input dataset must contain 'latitude' and 'longitude' coordinates.") + + # -- Calculate Grid Cell Area -- # + # Infer horizontal resolution for uniform grid: + dlon = ds['longitude'].diff(dim="longitude").mean().values + dlat = ds['latitude'].diff(dim="latitude").mean().values + + if (ds['longitude'].ndim == 1) and (ds['latitude'].ndim == 1): + # Define 2-dimensional longitude and latitude arrays for grid cell centers: + lon = np.repeat(ds['longitude'].values[np.newaxis, :], len(ds['latitude']), axis=0) + lat = np.repeat(ds['latitude'].values[:, np.newaxis], len(ds['longitude']), axis=1) + else: + # Use existing 2-dimensional longitude and latitude arrays: + lon = ds['longitude'].values + lat = ds['latitude'].values + + # Calculate zonal and meridional grid cell dimensions: + dx = compute_gc_distance(lon1=lon - dlon / 2, lat1=lat, lon2=lon + dlon / 2, lat2=lat) + dy = compute_gc_distance(lon1=lon, lat1=lat - dlat / 2, lon2=lon, lat2=lat + dlat / 2) + + # Define cell_area DataArray with CF-compliant metadata: + cell_area = xr.DataArray( + data=dx*dy, + dims=('latitude', 'longitude'), + coords={'latitude': ds['latitude'], 'longitude': ds['longitude']}, + name='cell_area', + attrs={ + 'long_name': 'Grid-Cell Area', + 'standard_name': 'cell_area', + 'units': 'm2', + }, + ) + + return cell_area + + +def compute_cell_thickness( + ds: xr.Dataset, + ) -> xr.DataArray: + """ + Calculate vertical thickness of each grid cell in meters. + + Cell thickness is calculated using the depth coordinates of the input dataset assuming a regular grid in the vertical dimension. + + Parameters: + ----------- + ds : xr.Dataset + Input dataset containing 'depth' coordinates. + + Returns: + -------- + xr.DataArray + Vertical thickness of each grid cell. + """ + # -- Validate Input -- # + if not isinstance(ds, xr.Dataset): + raise TypeError("Input must be an xarray Dataset.") + if 'depth' not in ds.coords: + raise ValueError("Input dataset must contain 'depth' coordinates.") + depth = ds['depth'].data + + # Check that depth is 1-dimensional: + if depth.ndim != 1: + raise ValueError("Input depth DataArray must be 1-dimensional.") + + # Find interfaces between vertical levels: + interfaces = 0.5 * (depth[:-1] + depth[1:]) + # Use sea surface as top boundary: + top = 0.0 + # Extrapolate bottom boundary: + bottom = depth[-1] + (depth[-1] - interfaces[-1]) + edges = np.concatenate([[top], interfaces, [bottom]]) + + # Define cell_thickness DataArray with CF-compliant metadata: + cell_thickness = xr.DataArray( + data=np.diff(edges), + dims=('depth',), + coords={'depth': depth}, + name='cell_thickness', + attrs={ + 'long_name': 'Grid-Cell Thickness', + 'standard_name': 'cell_thickness', + 'units': 'm', + }, + ) + + return cell_thickness + +def compute_land_sea_mask( + da: xr.DataArray, + ) -> xr.DataArray: + """ + Calculate land-sea mask from a variable DataArray. + + The resulting mask is defined as follows: + * 1 -> ocean grid point + * 0 -> land grid point + + Parameters: + ----------- + da : xr.DataArray + Input variable DataArray containing NaN values on land points. + + Returns: + -------- + xr.DataArray + Land-sea mask. + """ + # -- Validate Input -- # + if not isinstance(da, xr.DataArray): + raise TypeError("Input must be an xarray DataArray.") + if da.ndim != 2: + raise ValueError("Input DataArray must be 2-dimensional.") + + # -- Calculate Land-Sea Mask -- # + # Define land-sea mask: + mask = xr.where(np.isnan(da), 0, 1) + + # Add CF-compliant metadata to the mask: + mask.attrs['long_name'] = "Land-Sea Binary Mask" + mask.attrs['standard_name'] = "sea_binary_mask" + mask.attrs['comment'] = " 1 = sea, 0 = land" + + return mask + + +def update_icechunk_global_attrs( + credentials_filepath: str, + bucket: str, + prefix: str, + attrs: dict, + commit_message: str, + branch: str='main', + region: str='us-east-1', + force_path_style: bool=True, +) -> str: + """ + Update global attributes of existing Icechunk store via a new + commit. + + Expects Icechunk S3 storage at a custom endpoint (e.g., JASMIN OS). + + Parameters: + ----------- + credentials_filepath : str + Filepath to JSON file containing Icechunk S3 storage credentials. + bucket : str + Name of the S3 bucket where the Icechunk store is located. + prefix : str + Prefix (path) within the S3 bucket where the Icechunk store is located. + attrs : dict + Dictionary of global attributes to update in the root group of the Icechunk store. + commit_message : str + Commit message describing the update to the Icechunk store. + branch : str, optional + Branch of the Icechunk repository to update (default: 'main'). + region : str, optional + AWS region where the S3 bucket is located (default: 'us-east-1'). + force_path_style : bool, optional + Whether to force path-style access for S3 (default: True). + + Returns: + -------- + str + Snapshot ID of new commit. + """ + # -- Validate Input -- # + if not isinstance(credentials_filepath, str): + raise TypeError("credentials_filepath must be a string.") + if not isinstance(bucket, str): + raise TypeError("bucket must be a string.") + if not isinstance(prefix, str): + raise TypeError("prefix must be a string.") + if not isinstance(attrs, dict): + raise TypeError("attributes must be a dictionary.") + if not isinstance(commit_message, str): + raise TypeError("commit_message must be a string.") + if not isinstance(branch, str): + raise TypeError("branch must be a string.") + if not isinstance(region, str): + raise TypeError("region must be a string.") + if not isinstance(force_path_style, bool): + raise TypeError("force_path_style must be a boolean.") + + # -- Update Icechunk Global Attributes -- # + # Load Icechunk S3 storage credentials from JSON file: + store_credentials = json.load(open(credentials_filepath, 'r')) + + # Define Icechunk storage: + storage = icechunk.s3_storage( + bucket=bucket, + prefix=prefix, + region=region, + access_key_id=store_credentials['token'], + secret_access_key=store_credentials['secret'], + endpoint_url=store_credentials['endpoint_url'], + force_path_style=force_path_style, + ) + + # Open Icechunk repository & start read-only session on main branch: + repo = icechunk.Repository.open(storage=storage) + print(f"Opened Icechunk repository at s3://{bucket}/{prefix} on branch '{branch}'") + + # Open a writable session on root group: + session = repo.writable_session(branch=branch) + root = zarr.open_group(session.store) + # Update global attributes & commit changes to repo: + root.attrs.update(attrs) + print(f"Updated global attributes via new commit on branch '{branch}' with commit message -> '{commit_message}'") + + return session.commit(message=commit_message) + + +def update_icechunk_variable_attrs( + credentials_filepath: str, + bucket: str, + prefix: str, + vars: list[str], + attrs: list[dict], + commit_message: str, + branch: str='main', + region: str='us-east-1', + force_path_style: bool=True, +) -> str: + """ + Update variable attributes of existing Icechunk store via a new + commit. + + Expects Icechunk S3 storage at a custom endpoint (e.g., JASMIN OS). + + Parameters: + ----------- + credentials_filepath : str + Filepath to JSON file containing Icechunk S3 storage credentials. + bucket : str + Name of the S3 bucket where the Icechunk store is located. + prefix : str + Prefix (path) within the S3 bucket where the Icechunk store is located. + vars : list[str] + List of variable names whose attributes are to be updated. + attrs : list[dict] + List of dictionaries containing attributes to update for each variable. + commit_message : str + Commit message describing the update to the Icechunk store. + branch : str, optional + Branch of the Icechunk repository to update (default: 'main'). + region : str, optional + AWS region where the S3 bucket is located (default: 'us-east-1'). + force_path_style : bool, optional + Whether to force path-style access for S3 (default: True). + + Returns: + -------- + str + Snapshot ID of new commit. + """ + # -- Validate Input -- # + if not isinstance(credentials_filepath, str): + raise TypeError("credentials_filepath must be a string.") + if not isinstance(bucket, str): + raise TypeError("bucket must be a string.") + if not isinstance(prefix, str): + raise TypeError("prefix must be a string.") + if not isinstance(vars, list): + raise TypeError("vars must be a list.") + if not isinstance(attrs, list): + raise TypeError("attributes must be a list.") + if not isinstance(commit_message, str): + raise TypeError("commit_message must be a string.") + if not isinstance(branch, str): + raise TypeError("branch must be a string.") + if not isinstance(region, str): + raise TypeError("region must be a string.") + if not isinstance(force_path_style, bool): + raise TypeError("force_path_style must be a boolean.") + + # -- Update Icechunk Global Attributes -- # + # Load Icechunk S3 storage credentials from JSON file: + store_credentials = json.load(open(credentials_filepath, 'r')) + + # Define Icechunk storage: + storage = icechunk.s3_storage( + bucket=bucket, + prefix=prefix, + region=region, + access_key_id=store_credentials['token'], + secret_access_key=store_credentials['secret'], + endpoint_url=store_credentials['endpoint_url'], + force_path_style=force_path_style, + ) + + # Open Icechunk repository & start read-only session on main branch: + repo = icechunk.Repository.open(storage=storage) + print(f"Opened Icechunk repository at s3://{bucket}/{prefix} on branch '{branch}'") + + # Open a writable session on root group: + session = repo.writable_session(branch=branch) + root = zarr.open_group(session.store) + # Update variable attributes & commit changes to repo: + for var, attr in zip(vars, attrs): + root[var].attrs.update(attr) + + print(f"Updated variable attributes via new commit on branch '{branch}' with commit message -> '{commit_message}'") + + return session.commit(message=commit_message)