diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0bc9f058..7eef14e8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -63,13 +63,19 @@ jobs: python -m tox -e lint test-pypi: - name: Python${{ matrix.python-version }} (PyPI + Tox) + name: Python${{ matrix.python-version }} (PyPI + Tox, ${{ matrix.os }}) needs: lint - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} timeout-minutes: 20 strategy: matrix: - python-version: [ "3.11", "3.12", "3.13" ] + python-version: [ "3.11", "3.13" ] + os: [ "macos-latest", "ubuntu-latest" ] + positional_args: [ "" ] + include: + - python-version: "3.12" + os: "windows-latest" + positional_args: "--numprocesses=0" steps: - name: Harden Runner uses: step-security/harden-runner@fe104658747b27e96e4f7e80cd0a94068e53901d # v2.16.1 @@ -89,6 +95,7 @@ jobs: cache: "pip" - name: Install HDF5 (Linux) + if: matrix.os == 'ubuntu-latest' run: | sudo apt-get update sudo apt-get install -y libhdf5-dev @@ -97,18 +104,41 @@ jobs: run: | python -m pip install --require-hashes -r CI/requirements_ci.txt + - name: Set Cache keys + shell: bash + run: | + echo "CACHE_KEY=${{ matrix.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }}" >> $GITHUB_ENV - name: Environment Caching uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + if: matrix.os == 'ubuntu-latest' with: path: | - ~/.cache/mini-esgf-data - ~/.cache/xclim-testdata + '~/.cache/mini-esgf-data' + '~/.cache/xclim-testdata' .tox - key: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }} + key: ${{ env.CACHE_KEY }} + - name: Environment Caching (macOS) + uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + if: matrix.os == 'macos-latest' + with: + path: | + '~/Library/Caches/mini-esgf-data' + '~/Library/Caches/xclim-testdata' + .tox + key: ${{ env.CACHE_KEY }} + - name: Environment Caching (Windows) + uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + if: matrix.os == 'windows-latest' + with: + path: | + '~\AppData\Local\mini-esgf-data\mini-esgf-data\Cache' + '~\AppData\Local\xclim-testdata\xclim-testdata\Cache' + .tox + key: ${{ env.CACHE_KEY }} - name: Test with tox run: | - python -m tox + python -m tox -- ${{ matrix.positional_args }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COVERALLS_FLAG_NAME: run-Python${{ matrix.python-version }} @@ -116,20 +146,22 @@ jobs: COVERALLS_SERVICE_NAME: github test-conda: - name: Python${{ matrix.python-version }} (Anaconda) + name: Python${{ matrix.python-version }} (Anaconda, ${{ matrix.os}}) needs: lint - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} timeout-minutes: 20 strategy: fail-fast: false matrix: - python-version: [ "3.11", "3.12", "3.13", "3.14" ] + python-version: [ "3.11", "3.14" ] + os: [ "macos-latest", "ubuntu-latest" ] defaults: run: shell: bash -l {0} steps: - name: Harden Runner uses: step-security/harden-runner@fe104658747b27e96e4f7e80cd0a94068e53901d # v2.16.1 + if: matrix.os == 'ubuntu-latest' with: disable-sudo: true egress-policy: audit @@ -152,8 +184,17 @@ jobs: run: | python -m pip install --no-user --no-deps --editable . - - name: Test Data Caching + - name: Test Data Caching (Linux) uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + if : matrix.os == 'ubuntu-latest' + with: + path: | + ~/.cache/mini-esgf-data + ~/.cache/xclim-testdata + key: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-conda-${{ env.ESGF_TEST_DATA_VERSION }} + - name: Test Data Caching (macOS) + uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 + if: matrix.os == 'ubuntu-latest' with: path: | ~/.cache/mini-esgf-data diff --git a/clisops/__version__.py b/clisops/__version__.py index 45ccb69a..c9cea774 100644 --- a/clisops/__version__.py +++ b/clisops/__version__.py @@ -5,6 +5,6 @@ __author__ = """Eleanor Smith""" __contact__ = "eleanor.smith@stfc.ac.uk" -__copyright__ = "Copyright 2018-2024. United Kingdom Research and Innovation" +__copyright__ = "Copyright 2018-2025. United Kingdom Research and Innovation" __license__ = "BSD-3-Clause" __version__ = "0.18.0" diff --git a/clisops/project_utils.py b/clisops/project_utils.py index 57d6a328..dfef7663 100644 --- a/clisops/project_utils.py +++ b/clisops/project_utils.py @@ -2,6 +2,7 @@ import glob import os +from pathlib import Path import xarray as xr from loguru import logger @@ -62,14 +63,15 @@ def _get_base_dirs_dict(): def _is_ds_id(dset): return dset.count(".") > 1 - def _deduce_project(self, dset): + def _deduce_project(self, dset) -> str | None: if isinstance(dset, str): - if dset.startswith("/"): + if os.path.isabs(dset): # by default this returns c3s-cmip6 not cmip6 (as they have the same base_dir) base_dirs_dict = self._get_base_dirs_dict() for project, base_dir in base_dirs_dict.items(): if dset.startswith(base_dir) and CONFIG[f"project:{project}"].get("is_default_for_path") is True: return project + return None elif self._is_ds_id(dset): return dset.split(".")[0].lower() @@ -78,7 +80,8 @@ def _deduce_project(self, dset): elif dset.endswith(".nc") or os.path.isfile(dset): dset = xr.open_dataset(dset, decode_times=xr.coders.CFDatetimeCoder(use_cftime=True)) return get_project_from_ds(dset) - + else: + return None else: raise InvalidProject(f"The format of {dset} is not known and the project name could not be found.") @@ -104,7 +107,7 @@ def _parse(self, force): self._base_dir = get_project_base_dir(self._project) # if a file, group of files or directory to files - find files - if dset.startswith("/") or dset.endswith(".nc"): + if Path(dset).is_absolute() or dset.endswith(".nc"): # if instance of FileMapper if isinstance(self.dset, FileMapper): self._files = self.dset.file_paths @@ -117,11 +120,12 @@ def _parse(self, force): self._files.append(dset) # remove file extension to create data_path - self._data_path = "/".join(dset.split("/")[:-1]) + self._data_path = os.path.dirname(dset) # if base_dir identified, insert into data_path if self._base_dir: - self._ds_id = ".".join(self._data_path.replace(self._base_dir, self._project).strip("/").split("/")) + relative_path = os.path.relpath(self._data_path, self._base_dir) + self._ds_id = ".".join(relative_path.split(os.sep)) # test if dataset id elif self._is_ds_id(dset): @@ -129,7 +133,6 @@ def _parse(self, force): mappings = CONFIG.get(f"project:{self.project}", {}).get("fixed_path_mappings", {}) - # If the dataset uses a fixed path mapping (from the config file) then use it if self._ds_id in mappings: data_path = mappings[self._ds_id] self._data_path = os.path.join(self._base_dir, data_path) @@ -139,7 +142,7 @@ def _parse(self, force): # Default mapping is done by converting '.' characters to '/' separators in path else: - self._data_path = os.path.join(self._base_dir, "/".join(dset.split(".")[1:])) + self._data_path = os.path.join(self._base_dir, os.path.join(*dset.split(".")[1:])) # use to data_path to find files if not set already if len(self._files) < 1: @@ -330,7 +333,7 @@ def switch_dset(dset: xr.Dataset | xr.DataArray | str | FileMapper) -> str: str The dataset path or dataset ID derived from the input dataset, switched from the input. """ - if dset.startswith("/"): + if isinstance(dset, str) and (dset.startswith("/") or dset.startswith("\\")): return datapath_to_dsid(dset) else: return dsid_to_datapath(dset) @@ -445,9 +448,10 @@ def get_project_base_dir(project: str) -> str: ------- str The base directory of the specified project. + The URI uses platform-dependent path encoding. """ try: - return CONFIG[f"project:{project}"]["base_dir"] + return str(Path(CONFIG[f"project:{project}"]["base_dir"])) except KeyError: raise InvalidProject("The project supplied is not known.") @@ -491,12 +495,11 @@ def get_project_from_data_node_root(url: str) -> str: """ data_node_dict = get_data_node_dirs_dict() project = None - for proj, data_node_root in data_node_dict.items(): if data_node_root in url: project = proj - if not project: + if project is None: raise InvalidProject( f"The project could not be identified from the URL {url} so it could not be mapped to a file path." ) @@ -519,8 +522,8 @@ def url_to_file_path(url: str) -> str: """ project = get_project_from_data_node_root(url) - data_node_root = CONFIG.get(f"project:{project}", {}).get("data_node_root") - base_dir = CONFIG.get(f"project:{project}", {}).get("base_dir") - file_path = os.path.join(base_dir, url.partition(data_node_root)[2]) + data_node_root = str(Path(CONFIG.get(f"project:{project}", {}).get("data_node_root"))) + base_dir = str(Path(CONFIG.get(f"project:{project}", {}).get("base_dir"))) + file_path = str(Path(base_dir).joinpath(str(Path(url.partition(data_node_root)[2])))) return file_path diff --git a/clisops/utils/dataset_utils.py b/clisops/utils/dataset_utils.py index 7effb802..36699199 100644 --- a/clisops/utils/dataset_utils.py +++ b/clisops/utils/dataset_utils.py @@ -349,7 +349,7 @@ def get_coord_type(coord: xr.DataArray | xr.Dataset) -> str | None: return None -def get_main_variable(ds, exclude_common_coords=True): +def get_main_variable(ds: xr.Dataset, exclude_common_coords: bool = True): """ Find the main variable of an xarray Dataset. @@ -401,7 +401,7 @@ def get_main_variable(ds, exclude_common_coords=True): return result -def open_xr_dataset(dset: str | pathlib.Path | list[str | pathlib.Path], **kwargs): +def open_xr_dataset(dset: str | pathlib.Path | list[str | pathlib.Path], **kwargs) -> xr.Dataset: """ Open an xarray dataset from a dataset input. @@ -441,7 +441,7 @@ def open_xr_dataset(dset: str | pathlib.Path | list[str | pathlib.Path], **kwarg # If an empty sequence, then raise an Exception if not len(dset): - raise Exception("No files found to open with xarray.") + raise FileNotFoundError("No files found to open with xarray.") # if a list we want a multi-file dataset if len(dset) > 1: @@ -481,7 +481,7 @@ def _get_kwargs_for_opener(otype, **kwargs): "remote_options", "target_options", ] - allowed_multi_args = ["combine"] + allowed_multi_args = ["combine", "data_vars"] args = { "decode_times": xr.coders.CFDatetimeCoder(use_cftime=True), @@ -494,6 +494,7 @@ def _get_kwargs_for_opener(otype, **kwargs): if otype.lower() == "multi": args["combine"] = "by_coords" + args["data_vars"] = "all" allowed_args.extend(allowed_multi_args) elif otype.lower() == "zarr": allowed_args.extend(allowed_zarr_args) diff --git a/clisops/utils/testing.py b/clisops/utils/testing.py index 84c77837..20fb9894 100644 --- a/clisops/utils/testing.py +++ b/clisops/utils/testing.py @@ -2,18 +2,23 @@ import importlib.resources as ilr import os +import platform import warnings +from collections.abc import Callable +from functools import wraps from pathlib import Path from shutil import copytree -from sys import platform +from typing import IO from urllib.error import HTTPError, URLError -from urllib.parse import urlparse +from urllib.parse import urljoin, urlparse from urllib.request import urlretrieve from filelock import FileLock from jinja2 import Template from loguru import logger +from clisops import __version__ as __clisops_version__ + try: import pooch except ImportError: @@ -37,30 +42,41 @@ "write_roocs_cfg", ] + +default_esgf_test_data_version = "v1" +"""Default version of the mini-esgf testing data to use when fetching datasets.""" + +default_esgf_test_data_url = "https://raw.githubusercontent.com/roocs/mini-esgf-data/" +"""Default URL of the mini-esgf testing data repository to use when fetching datasets.""" + +default_xclim_test_data_version = "v2024.8.23" +"""Default version of the xclim testing data to use when fetching datasets.""" + +default_xclim_test_data_url = "https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/" +"""Default URL of the xclim testing data repository to use when fetching datasets.""" + try: - default_esgf_test_data_cache = pooch.os_cache("mini-esgf-data") - default_xclim_test_data_cache = pooch.os_cache("xclim-testdata") + default_esgf_test_data_cache = str(pooch.os_cache("mini-esgf-data")) + """Default location for the mini-esgf testing data cache.""" + default_xclim_test_data_cache = str(pooch.os_cache("xclim-testdata")) + """Default location for the xclim testing data cache.""" except (AttributeError, TypeError): default_esgf_test_data_cache = None default_xclim_test_data_cache = None -ESGF_TEST_DATA_REPO_URL = os.getenv("ESGF_TEST_DATA_REPO_UR", "https://raw.githubusercontent.com/roocs/mini-esgf-data") -default_esgf_test_data_version = "v1" -ESGF_TEST_DATA_VERSION = os.getenv("ESGF_TEST_DATA_VERSION", default_esgf_test_data_version) -ESGF_TEST_DATA_CACHE_DIR = os.getenv("ESGF_TEST_DATA_CACHE_DIR", default_esgf_test_data_cache) -XCLIM_TEST_DATA_REPO_URL = os.getenv( - "XCLIM_TEST_DATA_REPO_URL", - "https://raw.githubusercontent.com/Ouranosinc/xclim-testdata", -) -default_xclim_test_data_version = "v2024.8.23" -XCLIM_TEST_DATA_VERSION = os.getenv("XCLIM_TEST_DATA_VERSION", default_xclim_test_data_version) -XCLIM_TEST_DATA_CACHE_DIR = os.getenv("XCLIM_TEST_DATA_CACHE_DIR", default_xclim_test_data_cache) +ESGF_TEST_DATA_REPO_URL = str(os.getenv("ESGF_TEST_DATA_REPO_URL", default_esgf_test_data_url)) +ESGF_TEST_DATA_VERSION = str(os.getenv("ESGF_TEST_DATA_VERSION", default_esgf_test_data_version)) +ESGF_TEST_DATA_CACHE_DIR = str(os.getenv("ESGF_TEST_DATA_CACHE_DIR", default_esgf_test_data_cache)) + +XCLIM_TEST_DATA_REPO_URL = str(os.getenv("XCLIM_TEST_DATA_REPO_URL", default_xclim_test_data_url)) +XCLIM_TEST_DATA_VERSION = str(os.getenv("XCLIM_TEST_DATA_VERSION", default_xclim_test_data_version)) +XCLIM_TEST_DATA_CACHE_DIR = str(os.getenv("XCLIM_TEST_DATA_CACHE_DIR", default_xclim_test_data_cache)) def write_roocs_cfg( template: str | None = None, - cache_dir: str | Path = default_esgf_test_data_cache, + cache_dir: str | Path | None = default_esgf_test_data_cache, ) -> str: """ Write a ROOCS configuration file for testing purposes. @@ -105,16 +121,16 @@ def write_roocs_cfg( proj_test.my.second.test:second/test/data_*.txt proj_test.another.{variable}.test:good/test/{variable}.nc """ + if cache_dir is None: + raise ValueError("cache_dir must be a valid location.") cfg_template = template or default_template roocs_config = Path(cache_dir, "roocs.ini") - cfg = Template(cfg_template).render( - base_dir=Path(ESGF_TEST_DATA_CACHE_DIR).joinpath(ESGF_TEST_DATA_VERSION).as_posix() - ) + cfg = Template(cfg_template).render(base_dir=Path(ESGF_TEST_DATA_CACHE_DIR).joinpath(ESGF_TEST_DATA_VERSION)) with open(roocs_config, "w") as fp: fp.write(cfg) - return roocs_config.as_posix() + return str(roocs_config) def get_esgf_file_paths(esgf_cache_dir: str | os.PathLike[str]) -> dict[str, str]: @@ -128,241 +144,351 @@ def get_esgf_file_paths(esgf_cache_dir: str | os.PathLike[str]) -> dict[str, str Returns ------- - dict[str, str] + dict A dictionary where keys are descriptive names of datasets and values are their corresponding file paths. """ return { - "CMIP5_ZOSTOGA": Path( - esgf_cache_dir, - "badc/cmip5/data/cmip5/output1/INM/inmcm4/rcp45/mon/ocean/Omon/r1i1p1/latest/zostoga/zostoga_Omon_inmcm4_rcp45_r1i1p1_200601-210012.nc", - ).as_posix(), - "CMIP6_RLDS": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical/r1i1p1f1/Amon/rlds/gr/v20180803/rlds_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_185001-201412.nc", - ).as_posix(), - "CMIP6_RLDS_ONE_TIME_STEP": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical/r1i1p1f1/Amon/rlds/gr/v20180803/rlds_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_185001.nc", - ).as_posix(), - "CMIP6_RLUS_ONE_TIME_STEP": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical/r1i1p1f1/Amon/rlus/gr/v20180803/rlus_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_185001.nc", - ).as_posix(), - "CMIP6_MRSOFC": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/IPSL/IPSL-CM6A-LR/ssp119/r1i1p1f1/fx/mrsofc/gr/v20190410/mrsofc_fx_IPSL-CM6A-LR_ssp119_r1i1p1f1_gr.nc", - ).as_posix(), - "CMIP6_SICONC": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/SImon/siconc/gn/latest/siconc_SImon_CanESM5_historical_r1i1p1f1_gn_185001-201412.nc", - ).as_posix(), - "CMIP6_SICONC_DAY": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/SIday/siconc/gn/v20190429/siconc_SIday_CanESM5_historical_r1i1p1f1_gn_18500101-20141231.nc", - ).as_posix(), - "CMIP6_TA": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/MIROC/MIROC6/ssp119/r1i1p1f1/Amon/ta/gn/files/d20190807/ta_Amon_MIROC6_ssp119_r1i1p1f1_gn_201501-202412.nc", - ).as_posix(), - "CMIP6_TASMIN": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/Amon/tasmin/gn/v20190710/tasmin_Amon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_201001-201412.nc", - ).as_posix(), - "CMIP6_JULIAN": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/CCCR-IITM/IITM-ESM/1pctCO2/r1i1p1f1/Omon/tos/gn/v20191204/tos_Omon_IITM-ESM_1pctCO2_r1i1p1f1_gn_193001-193412.nc", - ).as_posix(), - "CMIP6_TOS": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Omon/tos/gn/v20190710/tos_Omon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001-186912.nc", - ).as_posix(), - "CMIP6_AREACELLO": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/historical/r1i1p1f1/Ofx/areacello/gn/v20190726/areacello_Ofx_GFDL-ESM4_historical_r1i1p1f1_gn.nc", - ).as_posix(), - "CMIP6_TOS_CNRM": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/CNRM-CERFACS/CNRM-CM6-1/ssp245/r1i1p1f2/Omon/tos/gn/v20190219/tos_Omon_CNRM-CM6-1_ssp245_r1i1p1f2_gn_201501.nc", - ).as_posix(), - "CMIP6_TAS_DAY": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/MIROC/MIROC6/ssp119/r1i1p1f1/day/tas/gn/v20191016/tas_day_MIROC6_ssp119_r1i1p1f1_gn_20150101.nc", - ).as_posix(), - "CMIP6_SFTOF": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/NCC/NorESM2-MM/ssp126/r1i1p1f1/Ofx/sftof/gn/v20191108/sftof_Ofx_NorESM2-MM_ssp126_r1i1p1f1_gn.nc", - ).as_posix(), - "CMIP6_TAS_ONE_TIME_STEP": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/CAS/FGOALS-g3/historical/r1i1p1f1/Amon/tas/gn/v20190818/tas_Amon_FGOALS-g3_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_TOS_ONE_TIME_STEP": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/Omon/tos/gn/v20190710/tos_Omon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), + "CMIP5_ZOSTOGA": str( + Path( + esgf_cache_dir, + "badc/cmip5/data/cmip5/output1/INM/inmcm4/rcp45/mon/ocean/Omon/r1i1p1/latest/zostoga/zostoga_Omon_inmcm4_rcp45_r1i1p1_200601-210012.nc", + ) + ), + "CMIP6_RLDS": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical/r1i1p1f1/Amon/rlds/gr/v20180803/rlds_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_185001-201412.nc", + ) + ), + "CMIP6_RLDS_ONE_TIME_STEP": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical/r1i1p1f1/Amon/rlds/gr/v20180803/rlds_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_185001.nc", + ) + ), + "CMIP6_RLUS_ONE_TIME_STEP": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical/r1i1p1f1/Amon/rlus/gr/v20180803/rlus_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_185001.nc", + ) + ), + "CMIP6_MRSOFC": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/IPSL/IPSL-CM6A-LR/ssp119/r1i1p1f1/fx/mrsofc/gr/v20190410/mrsofc_fx_IPSL-CM6A-LR_ssp119_r1i1p1f1_gr.nc", + ) + ), + "CMIP6_SICONC": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/SImon/siconc/gn/latest/siconc_SImon_CanESM5_historical_r1i1p1f1_gn_185001-201412.nc", + ) + ), + "CMIP6_SICONC_DAY": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/SIday/siconc/gn/v20190429/siconc_SIday_CanESM5_historical_r1i1p1f1_gn_18500101-20141231.nc", + ) + ), + "CMIP6_TA": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/MIROC/MIROC6/ssp119/r1i1p1f1/Amon/ta/gn/files/d20190807/ta_Amon_MIROC6_ssp119_r1i1p1f1_gn_201501-202412.nc", + ) + ), + "CMIP6_TASMIN": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/Amon/tasmin/gn/v20190710/tasmin_Amon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_201001-201412.nc", + ) + ), + "CMIP6_JULIAN": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/CCCR-IITM/IITM-ESM/1pctCO2/r1i1p1f1/Omon/tos/gn/v20191204/tos_Omon_IITM-ESM_1pctCO2_r1i1p1f1_gn_193001-193412.nc", + ) + ), + "CMIP6_TOS": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Omon/tos/gn/v20190710/tos_Omon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001-186912.nc", + ) + ), + "CMIP6_AREACELLO": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/historical/r1i1p1f1/Ofx/areacello/gn/v20190726/areacello_Ofx_GFDL-ESM4_historical_r1i1p1f1_gn.nc", + ) + ), + "CMIP6_TOS_CNRM": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/CNRM-CERFACS/CNRM-CM6-1/ssp245/r1i1p1f2/Omon/tos/gn/v20190219/tos_Omon_CNRM-CM6-1_ssp245_r1i1p1f2_gn_201501.nc", + ) + ), + "CMIP6_TAS_DAY": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/MIROC/MIROC6/ssp119/r1i1p1f1/day/tas/gn/v20191016/tas_day_MIROC6_ssp119_r1i1p1f1_gn_20150101.nc", + ) + ), + "CMIP6_SFTOF": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/NCC/NorESM2-MM/ssp126/r1i1p1f1/Ofx/sftof/gn/v20191108/sftof_Ofx_NorESM2-MM_ssp126_r1i1p1f1_gn.nc", + ) + ), + "CMIP6_TAS_ONE_TIME_STEP": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/CAS/FGOALS-g3/historical/r1i1p1f1/Amon/tas/gn/v20190818/tas_Amon_FGOALS-g3_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_TOS_ONE_TIME_STEP": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/Omon/tos/gn/v20190710/tos_Omon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_185001.nc", + ) + ), # CMIP6 ocean with collapsing cells - "CMIP6_TOS_LR_DEGEN": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/HAMMOZ-Consortium/MPI-ESM-1-2-HAM/ssp370/r1i1p1f1/Omon/tos/gn/v20190628/tos_Omon_MPI-ESM-1-2-HAM_ssp370_r1i1p1f1_gn_201501.nc", - ).as_posix(), + "CMIP6_TOS_LR_DEGEN": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/HAMMOZ-Consortium/MPI-ESM-1-2-HAM/ssp370/r1i1p1f1/Omon/tos/gn/v20190628/tos_Omon_MPI-ESM-1-2-HAM_ssp370_r1i1p1f1_gn_201501.nc", + ) + ), # 2nd dataset CMIP6 ocean with collapsing cells - "CMIP6_FX_DEGEN": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/EC-Earth-Consortium/EC-Earth3-Veg/ssp245/r5i1p1f1/Ofx/deptho/gn/v20200312/deptho_Ofx_EC-Earth3-Veg_ssp245_r5i1p1f1_gn.nc", - ).as_posix(), + "CMIP6_FX_DEGEN": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/EC-Earth-Consortium/EC-Earth3-Veg/ssp245/r5i1p1f1/Ofx/deptho/gn/v20200312/deptho_Ofx_EC-Earth3-Veg_ssp245_r5i1p1f1_gn.nc", + ) + ), # CMIP6 ocean with collapsing cells, cells extending over 50 degrees, missing_values in lat/lon - "CMIP6_SIMASS_DEGEN": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/NCC/NorESM2-MM/ssp126/r1i1p1f1/SImon/simass/gn/v20191108/simass_SImon_NorESM2-MM_ssp126_r1i1p1f1_gn_201501.nc", - ).as_posix(), + "CMIP6_SIMASS_DEGEN": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/NCC/NorESM2-MM/ssp126/r1i1p1f1/SImon/simass/gn/v20191108/simass_SImon_NorESM2-MM_ssp126_r1i1p1f1_gn_201501.nc", + ) + ), # CMIP5 rlat,rlon uncompliant CF units - "CMIP5_WRONG_CF_UNITS": Path( - esgf_cache_dir, - "pool/data/C3SCMIP5/BCC/bcc-csm1-1/rcp85/mon/ocean/Omon/r1i1p1/zos/v20120705/zos_Omon_bcc-csm1-1_rcp85_r1i1p1_200601.nc", - ).as_posix(), + "CMIP5_WRONG_CF_UNITS": str( + Path( + esgf_cache_dir, + "pool/data/C3SCMIP5/BCC/bcc-csm1-1/rcp85/mon/ocean/Omon/r1i1p1/zos/v20120705/zos_Omon_bcc-csm1-1_rcp85_r1i1p1_200601.nc", + ) + ), # CMIP6 rlat,rlon uncompliant CF units - "CMIP6_WRONG_CF_UNITS": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/AerChemMIP/BCC/BCC-ESM1/ssp370/r1i1p1f1/Omon/pbo/gn/v20190624/pbo_Omon_BCC-ESM1_ssp370_r1i1p1f1_gn_201501.nc", - ).as_posix(), + "CMIP6_WRONG_CF_UNITS": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/AerChemMIP/BCC/BCC-ESM1/ssp370/r1i1p1f1/Omon/pbo/gn/v20190624/pbo_Omon_BCC-ESM1_ssp370_r1i1p1f1_gn_201501.nc", + ) + ), # CMIP6 lat, lon with uncompliant CF units and standard_name - "CMIP6_WRONG_CF_ATTRS": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/HighResMIP/BCC/BCC-CSM2-HR/hist-1950/r1i1p1f1/Omon/tos/gn/v20200922/tos_Omon_BCC-CSM2-HR_hist-1950_r1i1p1f1_gn_198001.nc", - ).as_posix(), - "CMIP5_MRSOS_ONE_TIME_STEP": Path( - esgf_cache_dir, - "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/day/land/day/r1i1p1/latest/mrsos/mrsos_day_HadGEM2-ES_rcp85_r1i1p1_20051201.nc", - ).as_posix(), - "CMIP6_GFDL_EXTENT": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/Omon/sos/gn/v20180701/sos_Omon_GFDL-CM4_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_TAS_PRECISION_A": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/AWI/AWI-ESM-1-1-LR/1pctCO2/r1i1p1f1/Amon/tas/gn/v20200212/tas_Amon_AWI-ESM-1-1-LR_1pctCO2_r1i1p1f1_gn_185501.nc", - ).as_posix(), - "CMIP6_TAS_PRECISION_B": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/AWI/AWI-ESM-1-1-LR/1pctCO2/r1i1p1f1/Amon/tas/gn/v20200212/tas_Amon_AWI-ESM-1-1-LR_1pctCO2_r1i1p1f1_gn_209901.nc", - ).as_posix(), - "CMIP6_ATM_VERT_ONE_TIMESTEP": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/AERmon/o3/gn/v20190710/o3_AERmon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_ATM_VERT_ONE_TIMESTEP_ZONMEAN": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/AERmon/o3/gn/v20190710/o3_AERmon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001_zm.nc", - ).as_posix(), - "CMIP6_IITM_EXTENT": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/CCCR-IITM/IITM-ESM/1pctCO2/r1i1p1f1/Omon/tos/gn/v20191204/tos_Omon_IITM-ESM_1pctCO2_r1i1p1f1_gn_193001.nc", - ).as_posix(), + "CMIP6_WRONG_CF_ATTRS": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/HighResMIP/BCC/BCC-CSM2-HR/hist-1950/r1i1p1f1/Omon/tos/gn/v20200922/tos_Omon_BCC-CSM2-HR_hist-1950_r1i1p1f1_gn_198001.nc", + ) + ), + "CMIP5_MRSOS_ONE_TIME_STEP": str( + Path( + esgf_cache_dir, + "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/day/land/day/r1i1p1/latest/mrsos/mrsos_day_HadGEM2-ES_rcp85_r1i1p1_20051201.nc", + ) + ), + "CMIP6_GFDL_EXTENT": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/Omon/sos/gn/v20180701/sos_Omon_GFDL-CM4_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_TAS_PRECISION_A": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/AWI/AWI-ESM-1-1-LR/1pctCO2/r1i1p1f1/Amon/tas/gn/v20200212/tas_Amon_AWI-ESM-1-1-LR_1pctCO2_r1i1p1f1_gn_185501.nc", + ) + ), + "CMIP6_TAS_PRECISION_B": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/AWI/AWI-ESM-1-1-LR/1pctCO2/r1i1p1f1/Amon/tas/gn/v20200212/tas_Amon_AWI-ESM-1-1-LR_1pctCO2_r1i1p1f1_gn_209901.nc", + ) + ), + "CMIP6_ATM_VERT_ONE_TIMESTEP": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/AERmon/o3/gn/v20190710/o3_AERmon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_ATM_VERT_ONE_TIMESTEP_ZONMEAN": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/AERmon/o3/gn/v20190710/o3_AERmon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001_zm.nc", + ) + ), + "CMIP6_IITM_EXTENT": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/CCCR-IITM/IITM-ESM/1pctCO2/r1i1p1f1/Omon/tos/gn/v20191204/tos_Omon_IITM-ESM_1pctCO2_r1i1p1f1_gn_193001.nc", + ) + ), # CMIP6 dataset with weird range in its longitude coordinate (-300, 60) # and unmasked missing values in the latitude and longitude coordinates - "CMIP6_EXTENT_UNMASKED": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/OMIP/NOAA-GFDL/GFDL-OM4p5B/omip1/r1i1p1f1/Omon/volcello/gn/v20180701/volcello_Omon_GFDL-OM4p5B_omip1_r1i1p1f1_gn_176801.nc", - ).as_posix(), - "CMIP6_OCE_HALO_CNRM": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/CNRM-CERFACS/CNRM-CM6-1-HR/historical/r1i1p1f2/Omon/tos/gn/v20191021/tos_Omon_CNRM-CM6-1-HR_historical_r1i1p1f2_gn_185001.nc", - ).as_posix(), - "CMIP6_UNSTR_FESOM_LR": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/AWI/AWI-ESM-1-1-LR/historical/r1i1p1f1/Omon/tos/gn/v20200212/tos_Omon_AWI-ESM-1-1-LR_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_UNSTR_ICON_A": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/ICON-ESM-LR/historical/r1i1p1f1/Amon/tas/gn/v20210215/tas_Amon_ICON-ESM-LR_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_UNSTR_VERT_ICON_O": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/ICON-ESM-LR/historical/r1i1p1f1/Omon/thetao/gn/v20210215/thetao_Omon_ICON-ESM-LR_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_UNTAGGED_MISSVALS": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Omon/tos/gn/v20191120/tos_Omon_CESM2-FV2_historical_r1i1p1f1_gn_200001.nc", - ).as_posix(), - "CMIP6_STAGGERED_UCOMP": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Omon/tauuo/gn/v20200909/tauuo_Omon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_STAGGERED_VCOMP": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Omon/tauvo/gn/v20190710/tauvo_Omon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_FILLVALUE": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/NCAR/CESM2-WACCM/historical/r1i1p1f1/day/tas/gn/v20190227/tas_day_CESM2-WACCM_historical_r1i1p1f1_gn_20000101-20091231.nc", - ).as_posix(), - "CMIP6_ZONMEAN_A": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/Omon/msftmz/gn/v20190710/msftmz_Omon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_191001.nc", - ).as_posix(), - "CMIP6_ZONMEAN_B": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/NCC/NorCPM1/historical/r22i1p1f1/Omon/msftmz/grz/v20200724/msftmz_Omon_NorCPM1_historical_r22i1p1f1_grz_185001.nc", - ).as_posix(), + "CMIP6_EXTENT_UNMASKED": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/OMIP/NOAA-GFDL/GFDL-OM4p5B/omip1/r1i1p1f1/Omon/volcello/gn/v20180701/volcello_Omon_GFDL-OM4p5B_omip1_r1i1p1f1_gn_176801.nc", + ) + ), + "CMIP6_OCE_HALO_CNRM": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/CNRM-CERFACS/CNRM-CM6-1-HR/historical/r1i1p1f2/Omon/tos/gn/v20191021/tos_Omon_CNRM-CM6-1-HR_historical_r1i1p1f2_gn_185001.nc", + ) + ), + "CMIP6_UNSTR_FESOM_LR": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/AWI/AWI-ESM-1-1-LR/historical/r1i1p1f1/Omon/tos/gn/v20200212/tos_Omon_AWI-ESM-1-1-LR_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_UNSTR_ICON_A": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/ICON-ESM-LR/historical/r1i1p1f1/Amon/tas/gn/v20210215/tas_Amon_ICON-ESM-LR_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_UNSTR_VERT_ICON_O": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/ICON-ESM-LR/historical/r1i1p1f1/Omon/thetao/gn/v20210215/thetao_Omon_ICON-ESM-LR_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_UNTAGGED_MISSVALS": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Omon/tos/gn/v20191120/tos_Omon_CESM2-FV2_historical_r1i1p1f1_gn_200001.nc", + ) + ), + "CMIP6_STAGGERED_UCOMP": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Omon/tauuo/gn/v20200909/tauuo_Omon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_STAGGERED_VCOMP": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Omon/tauvo/gn/v20190710/tauvo_Omon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_FILLVALUE": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/NCAR/CESM2-WACCM/historical/r1i1p1f1/day/tas/gn/v20190227/tas_day_CESM2-WACCM_historical_r1i1p1f1_gn_20000101-20091231.nc", + ) + ), + "CMIP6_ZONMEAN_A": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/Omon/msftmz/gn/v20190710/msftmz_Omon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_191001.nc", + ) + ), + "CMIP6_ZONMEAN_B": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/NCC/NorCPM1/historical/r22i1p1f1/Omon/msftmz/grz/v20200724/msftmz_Omon_NorCPM1_historical_r22i1p1f1_grz_185001.nc", + ) + ), # CMIP6 dataset without defined bounds on curvilinear grid - "CMIP6_NO_BOUNDS": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/CAS/FGOALS-f3-L/ssp126/r1i1p1f1/Omon/tos/gn/v20191008/tos_Omon_FGOALS-f3-L_ssp126_r1i1p1f1_gn_201501.nc", - ).as_posix(), + "CMIP6_NO_BOUNDS": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/CAS/FGOALS-f3-L/ssp126/r1i1p1f1/Omon/tos/gn/v20191008/tos_Omon_FGOALS-f3-L_ssp126_r1i1p1f1_gn_201501.nc", + ) + ), # CMIP6 dataset with character dimension 'sector' - "CMIP6_CHAR_DIM": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/IPSL/IPSL-CM6A-LR/ssp245/r1i1p1f1/Lmon/landCoverFrac/gr/v20190119/landCoverFrac_Lmon_IPSL-CM6A-LR_ssp245_r1i1p1f1_gr_201501.nc", - ).as_posix(), + "CMIP6_CHAR_DIM": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/IPSL/IPSL-CM6A-LR/ssp245/r1i1p1f1/Lmon/landCoverFrac/gr/v20190119/landCoverFrac_Lmon_IPSL-CM6A-LR_ssp245_r1i1p1f1_gr_201501.nc", + ) + ), # CORDEX dataset with maldefined bounds - "CORDEX_ERRONEOUS_BOUNDS": Path( - esgf_cache_dir, - "pool/data/C3SCORDEX/data/c3s-cordex/output/ARC-44/BCCR/ECMWF-ERAINT/evaluation/r1i1p1/BCCR-WRF331/v1/day/tas/v20200915/tas_ARC-44_ECMWF-ERAINT_evaluation_r1i1p1_BCCR-WRF331_v1_day_20010101.nc", - ).as_posix(), - "CORDEX_TAS_ONE_TIMESTEP": Path( - esgf_cache_dir, - "pool/data/CORDEX/data/cordex/output/EUR-22/GERICS/MPI-M-MPI-ESM-LR/rcp85/r1i1p1/GERICS-REMO2015/v1/mon/tas/v20191029/tas_EUR-22_MPI-M-MPI-ESM-LR_rcp85_r1i1p1_GERICS-REMO2015_v1_mon_202101.nc", - ).as_posix(), - "CORDEX_TAS_ONE_TIMESTEP_ANT": Path( - esgf_cache_dir, - "pool/data/CORDEX/data/cordex/output/ANT-44/KNMI/ECMWF-ERAINT/evaluation/r1i1p1/DMI-HIRHAM5/v1/day/tas/v20201001/tas_ANT-44_ECMWF-ERAINT_evaluation_r1i1p1_DMI-HIRHAM5_v1_day_20060101.nc", - ).as_posix(), - "CORDEX_TAS_NO_BOUNDS": Path( - esgf_cache_dir, - "pool/data/CORDEX/data/cordex/output/EUR-11/KNMI/MPI-M-MPI-ESM-LR/rcp85/r1i1p1/KNMI-RACMO22E/v1/mon/tas/v20190625/tas_EUR-11_MPI-M-MPI-ESM-LR_rcp85_r1i1p1_KNMI-RACMO22E_v1_mon_209101.nc", - ).as_posix(), - "ATLAS_v1_CMIP5": Path( - esgf_cache_dir, - "pool/data/c3s-cica-atlas/CMIP5/rcp26/pr_CMIP5_rcp26_mon_200601-210012.nc", - ).as_posix(), - "ATLAS_v1_EOBS": Path( - esgf_cache_dir, - "pool/data/c3s-cica-atlas/E-OBS/sfcwind_E-OBS_mon_195001-202112.nc", - ).as_posix(), - "ATLAS_v1_ERA5": Path( - esgf_cache_dir, - "pool/data/c3s-cica-atlas/ERA5/psl_ERA5_mon_194001-202212.nc", - ).as_posix(), - "ATLAS_v1_CORDEX": Path( - esgf_cache_dir, - "pool/data/c3s-cica-atlas/CORDEX-CORE/historical/huss_CORDEX-CORE_historical_mon_197001.nc", - ).as_posix(), - "ATLAS_v1_EOBS_GRID": Path( - esgf_cache_dir, - "pool/data/c3s-cica-atlas/E-OBS/t_E-OBS_mon_195001.nc", - ).as_posix(), - "ATLAS_v0_CORDEX_NAM": Path( - esgf_cache_dir, - "pool/data/c3s-ipcc-ar6-atlas/CORDEX-NAM/historical/rx1day_CORDEX-NAM_historical_mon_197001-200512.nc", - ).as_posix(), - "ATLAS_v0_CMIP6": Path( - esgf_cache_dir, - "pool/data/c3s-ipcc-ar6-atlas/CMIP6/ssp245/sst_CMIP6_ssp245_mon_201501-210012.nc", - ).as_posix(), - "ATLAS_v0_CORDEX_ANT": Path( - esgf_cache_dir, - "pool/data/c3s-ipcc-ar6-atlas/CORDEX-ANT/rcp45/tnn_CORDEX-ANT_rcp45_mon_200601.nc", - ).as_posix(), + "CORDEX_ERRONEOUS_BOUNDS": str( + Path( + esgf_cache_dir, + "pool/data/C3SCORDEX/data/c3s-cordex/output/ARC-44/BCCR/ECMWF-ERAINT/evaluation/r1i1p1/BCCR-WRF331/v1/day/tas/v20200915/tas_ARC-44_ECMWF-ERAINT_evaluation_r1i1p1_BCCR-WRF331_v1_day_20010101.nc", + ) + ), + "CORDEX_TAS_ONE_TIMESTEP": str( + Path( + esgf_cache_dir, + "pool/data/CORDEX/data/cordex/output/EUR-22/GERICS/MPI-M-MPI-ESM-LR/rcp85/r1i1p1/GERICS-REMO2015/v1/mon/tas/v20191029/tas_EUR-22_MPI-M-MPI-ESM-LR_rcp85_r1i1p1_GERICS-REMO2015_v1_mon_202101.nc", + ) + ), + "CORDEX_TAS_ONE_TIMESTEP_ANT": str( + Path( + esgf_cache_dir, + "pool/data/CORDEX/data/cordex/output/ANT-44/KNMI/ECMWF-ERAINT/evaluation/r1i1p1/DMI-HIRHAM5/v1/day/tas/v20201001/tas_ANT-44_ECMWF-ERAINT_evaluation_r1i1p1_DMI-HIRHAM5_v1_day_20060101.nc", + ) + ), + "CORDEX_TAS_NO_BOUNDS": str( + Path( + esgf_cache_dir, + "pool/data/CORDEX/data/cordex/output/EUR-11/KNMI/MPI-M-MPI-ESM-LR/rcp85/r1i1p1/KNMI-RACMO22E/v1/mon/tas/v20190625/tas_EUR-11_MPI-M-MPI-ESM-LR_rcp85_r1i1p1_KNMI-RACMO22E_v1_mon_209101.nc", + ) + ), + "ATLAS_v1_CMIP5": str( + Path( + esgf_cache_dir, + "pool/data/c3s-cica-atlas/CMIP5/rcp26/pr_CMIP5_rcp26_mon_200601-210012.nc", + ) + ), + "ATLAS_v1_EOBS": str( + Path( + esgf_cache_dir, + "pool/data/c3s-cica-atlas/E-OBS/sfcwind_E-OBS_mon_195001-202112.nc", + ) + ), + "ATLAS_v1_ERA5": str( + Path( + esgf_cache_dir, + "pool/data/c3s-cica-atlas/ERA5/psl_ERA5_mon_194001-202212.nc", + ) + ), + "ATLAS_v1_CORDEX": str( + Path( + esgf_cache_dir, + "pool/data/c3s-cica-atlas/CORDEX-CORE/historical/huss_CORDEX-CORE_historical_mon_197001.nc", + ) + ), + "ATLAS_v1_EOBS_GRID": str( + Path( + esgf_cache_dir, + "pool/data/c3s-cica-atlas/E-OBS/t_E-OBS_mon_195001.nc", + ) + ), + "ATLAS_v0_CORDEX_NAM": str( + Path( + esgf_cache_dir, + "pool/data/c3s-ipcc-ar6-atlas/CORDEX-NAM/historical/rx1day_CORDEX-NAM_historical_mon_197001-200512.nc", + ) + ), + "ATLAS_v0_CMIP6": str( + Path( + esgf_cache_dir, + "pool/data/c3s-ipcc-ar6-atlas/CMIP6/ssp245/sst_CMIP6_ssp245_mon_201501-210012.nc", + ) + ), + "ATLAS_v0_CORDEX_ANT": str( + Path( + esgf_cache_dir, + "pool/data/c3s-ipcc-ar6-atlas/CORDEX-ANT/rcp45/tnn_CORDEX-ANT_rcp45_mon_200601.nc", + ) + ), } @@ -401,46 +527,66 @@ def get_esgf_glob_paths(esgf_cache_dir: str | os.PathLike[str]) -> dict[str, str A dictionary where keys are dataset identifiers and values are glob paths to the datasets. """ return { - "CMIP5_TAS": Path( - esgf_cache_dir, - "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", - ).as_posix(), - "CMIP5_TAS_EC_EARTH": Path( - esgf_cache_dir, - "badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", - ).as_posix(), - "CMIP5_RH": Path( - esgf_cache_dir, - "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/historical/mon/land/Lmon/r1i1p1/latest/rh/*.nc", - ).as_posix(), - "C3S_CMIP5_TSICE": Path( - esgf_cache_dir, - "gws/nopw/j04/cp4cds1_vol1/data/c3s-cmip5/output1/NCC/NorESM1-ME/rcp60/mon/seaIce/OImon/r1i1p1/tsice/v20120614/*.nc", - ).as_posix(), - "C3S_CORDEX_AFR_TAS": Path( - esgf_cache_dir, - "pool/data/CORDEX/data/cordex/output/AFR-22/GERICS/MPI-M-MPI-ESM-LR/historical/r1i1p1/GERICS-REMO2015/v1/day/tas/v20201015/*.nc", - ).as_posix(), - "C3S_CORDEX_NAM_PR": Path( - esgf_cache_dir, - "pool/data/CORDEX/data/cordex/output/NAM-22/OURANOS/NOAA-GFDL-GFDL-ESM2M/rcp45/r1i1p1/OURANOS-CRCM5/v1/day/pr/v20200831/*.nc", - ).as_posix(), - "C3S_CORDEX_EUR_ZG500": Path( - esgf_cache_dir, - "pool/data/CORDEX/data/cordex/output/EUR-11/IPSL/IPSL-IPSL-CM5A-MR/rcp85/r1i1p1/IPSL-WRF381P/v1/day/zg500/v20190919/*.nc", - ).as_posix(), - "C3S_CORDEX_ANT_SFC_WIND": Path( - esgf_cache_dir, - "pool/data/CORDEX/data/cordex/output/ANT-44/KNMI/ECMWF-ERAINT/evaluation/r1i1p1/KNMI-RACMO21P/v1/day/sfcWind/v20201001/*.nc", - ).as_posix(), - "CMIP5_MRSOS_MULTIPLE_TIME_STEPS": Path( - esgf_cache_dir, - "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp45/day/land/day/r1i1p1/latest/mrsos/*.nc", - ).as_posix(), - "C3S_CMIP5_TAS": Path( - esgf_cache_dir, - "gws/nopw/j04/cp4cds1_vol1/data/c3s-cmip5/output1/ICHEC/EC-EARTH/historical/day/atmos/day/r1i1p1/tas/v20131231/*.nc", - ).as_posix(), + "CMIP5_TAS": str( + Path( + esgf_cache_dir, + "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", + ) + ), + "CMIP5_TAS_EC_EARTH": str( + Path( + esgf_cache_dir, + "badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", + ) + ), + "CMIP5_RH": str( + Path( + esgf_cache_dir, + "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/historical/mon/land/Lmon/r1i1p1/latest/rh/*.nc", + ) + ), + "C3S_CMIP5_TSICE": str( + Path( + esgf_cache_dir, + "gws/nopw/j04/cp4cds1_vol1/data/c3s-cmip5/output1/NCC/NorESM1-ME/rcp60/mon/seaIce/OImon/r1i1p1/tsice/v20120614/*.nc", + ) + ), + "C3S_CORDEX_AFR_TAS": str( + Path( + esgf_cache_dir, + "pool/data/CORDEX/data/cordex/output/AFR-22/GERICS/MPI-M-MPI-ESM-LR/historical/r1i1p1/GERICS-REMO2015/v1/day/tas/v20201015/*.nc", + ) + ), + "C3S_CORDEX_NAM_PR": str( + Path( + esgf_cache_dir, + "pool/data/CORDEX/data/cordex/output/NAM-22/OURANOS/NOAA-GFDL-GFDL-ESM2M/rcp45/r1i1p1/OURANOS-CRCM5/v1/day/pr/v20200831/*.nc", + ) + ), + "C3S_CORDEX_EUR_ZG500": str( + Path( + esgf_cache_dir, + "pool/data/CORDEX/data/cordex/output/EUR-11/IPSL/IPSL-IPSL-CM5A-MR/rcp85/r1i1p1/IPSL-WRF381P/v1/day/zg500/v20190919/*.nc", + ) + ), + "C3S_CORDEX_ANT_SFC_WIND": str( + Path( + esgf_cache_dir, + "pool/data/CORDEX/data/cordex/output/ANT-44/KNMI/ECMWF-ERAINT/evaluation/r1i1p1/KNMI-RACMO21P/v1/day/sfcWind/v20201001/*.nc", + ) + ), + "CMIP5_MRSOS_MULTIPLE_TIME_STEPS": str( + Path( + esgf_cache_dir, + "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp45/day/land/day/r1i1p1/latest/mrsos/*.nc", + ) + ), + "C3S_CMIP5_TAS": str( + Path( + esgf_cache_dir, + "gws/nopw/j04/cp4cds1_vol1/data/c3s-cmip5/output1/ICHEC/EC-EARTH/historical/day/atmos/day/r1i1p1/tas/v20131231/*.nc", + ) + ), } @@ -500,11 +646,14 @@ def load_registry(branch: str, repo: str) -> dict[str, str]: dict Dictionary of filenames and hashes. """ - if repo == ESGF_TEST_DATA_REPO_URL: + if not repo.endswith("/"): + repo = f"{repo}/" + + if "mini-esgf-data" in repo: project = "mini-esgf-data" default_testdata_version = ESGF_TEST_DATA_VERSION default_testdata_repo_url = ESGF_TEST_DATA_REPO_URL - elif repo == XCLIM_TEST_DATA_REPO_URL: + elif "xclim-testdata" in repo: project = "xclim-testdata" default_testdata_version = XCLIM_TEST_DATA_VERSION default_testdata_repo_url = XCLIM_TEST_DATA_REPO_URL @@ -514,17 +663,28 @@ def load_registry(branch: str, repo: str) -> dict[str, str]: f"Please use one of {ESGF_TEST_DATA_REPO_URL} or {XCLIM_TEST_DATA_REPO_URL}" ) - remote_registry = audit_url(f"{repo}{branch}/data/{project}_registry.txt") - if branch != default_testdata_version: - custom_registry_folder = Path(str(ilr.files("clisops").joinpath(f"utils/registries/{branch}"))) + remote_registry = audit_url( + urljoin( + urljoin(repo, branch if branch.endswith("/") else f"{branch}/"), + "data/registry.txt", + ) + ) + + if repo != default_testdata_repo_url: + external_repo_name = urlparse(repo).path.split("/")[-2] + external_branch_name = branch.split("/")[-1] + registry_file = Path( + str(ilr.files("clisops").joinpath(f"utils/registry.{external_repo_name}.{external_branch_name}.txt")) + ) + urlretrieve(remote_registry, registry_file) # noqa: S310 + elif branch != default_testdata_version: + custom_registry_folder = Path(str(ilr.files("clisops").joinpath(f"utils/{branch}"))) custom_registry_folder.mkdir(parents=True, exist_ok=True) - registry_file = custom_registry_folder.joinpath(f"{project}_registry.txt") + registry_file = custom_registry_folder.joinpath("registry.txt") urlretrieve(remote_registry, registry_file) # noqa: S310 - elif repo != default_testdata_repo_url: + else: registry_file = Path(str(ilr.files("clisops").joinpath(f"utils/{project}_registry.txt"))) - urlretrieve(remote_registry, registry_file) # noqa: S310 - registry_file = Path(str(ilr.files("clisops").joinpath(f"utils/{project}_registry.txt"))) if not registry_file.exists(): raise FileNotFoundError(f"Registry file not found: {registry_file}") @@ -538,7 +698,7 @@ def stratus( repo: str, branch: str, cache_dir: str | Path, - data_updates: bool = True, + allow_updates: bool = True, ): """ Pooch registry instance for xclim test data. @@ -551,7 +711,7 @@ def stratus( Branch of repository to use when fetching testing datasets. cache_dir : str or Path The path to the directory where the data files are stored. - data_updates : bool + allow_updates : bool If True, allow updates to the data files. Default is True. Returns @@ -578,49 +738,84 @@ def stratus( "You can install it with `pip install pooch` or `pip install roocs-utils[dev]`." ) - if repo.endswith("xclim-testdata"): + if "xclim-testdata" in repo: _version = XCLIM_TEST_DATA_VERSION - _default_version = default_xclim_test_data_version - elif repo.endswith("mini-esgf-data"): + _default_testdata_version = default_xclim_test_data_version + elif "mini-esgf-data" in repo: _version = ESGF_TEST_DATA_VERSION - _default_version = default_esgf_test_data_version + _default_testdata_version = default_esgf_test_data_version else: raise ValueError( f"Repository URL {repo} not recognized. " f"Please use one of {ESGF_TEST_DATA_REPO_URL} or {XCLIM_TEST_DATA_REPO_URL}" ) - remote = audit_url(f"{repo}/{branch}/data") - return pooch.create( + if not repo.endswith("/"): + repo = f"{repo}/" + remote = audit_url(urljoin(urljoin(repo, branch if branch.endswith("/") else f"{branch}/"), "data")) + + _stratus = pooch.create( path=cache_dir, base_url=remote, - version=_default_version, + version=_default_testdata_version, version_dev=_version, - allow_updates=data_updates, + allow_updates=allow_updates, registry=load_registry(branch=branch, repo=repo), ) + # Add a custom fetch method to the Pooch instance + # Needed to address: https://github.com/readthedocs/readthedocs.org/issues/11763 + # Fix inspired by @bjlittle (https://github.com/bjlittle/geovista/pull/1202) + _stratus.fetch_diversion = _stratus.fetch + + # Overload the fetch method to add user-agent headers + @wraps(_stratus.fetch_diversion) + def _fetch(*args, **kwargs: bool | Callable) -> str: # numpydoc ignore=GL08 # *args: str + def _downloader( + url: str, + output_file: str | IO, + poocher: pooch.Pooch, + check_only: bool | None = False, + ) -> None: + """Download the file from the URL and save it to the save_path.""" + headers = {"User-Agent": f"clisops ({__clisops_version__})"} + downloader = pooch.HTTPDownloader(headers=headers) + return downloader(url, output_file, poocher, check_only=check_only) + + # default to our http/s downloader with user-agent headers + kwargs.setdefault("downloader", _downloader) + return _stratus.fetch_diversion(*args, **kwargs) + + # Replace the fetch method with the custom fetch method + _stratus.fetch = _fetch + + return _stratus + def populate_testing_data( + temp_folder: Path | None = None, + *, repo: str, branch: str, - cache_dir: Path, + local_cache: Path, ): """ Populate the local cache with the testing data. Parameters ---------- + temp_folder : Path, optional + Path to a temporary folder to use as the local cache. If not provided, the default location will be used. repo : str, optional URL of the repository to use when fetching testing datasets. branch : str, optional Branch of repository to use when fetching testing datasets. - cache_dir : Path + local_cache : Path The path to the local cache. Defaults to the location set by the platformdirs library. The testing data will be downloaded to this local cache. """ # Create the Pooch instance - n = stratus(cache_dir=cache_dir, repo=repo, branch=branch) + n = stratus(repo=repo, branch=branch, cache_dir=temp_folder or local_cache) # Download the files errored_files = [] @@ -646,7 +841,7 @@ def gather_testing_data( worker_id: str, branch: str, repo: str, - cache_dir: str | os.PathLike[str] | Path, + _cache_dir: str | os.PathLike[str] | Path | None = None, ): """ Gather testing data across workers. @@ -661,7 +856,7 @@ def gather_testing_data( The branch of the repository to use when fetching testing datasets. repo : str The URL of the repository to use when fetching testing datasets. - cache_dir : str or Path + _cache_dir : str or Path The path to the local cache where the testing data is stored. Raises @@ -671,10 +866,13 @@ def gather_testing_data( FileNotFoundError If the testing data is not found and UNIX-style file-locking is not supported on Windows. """ - cache_dir = Path(cache_dir) - if repo.endswith("xclim-testdata"): + if _cache_dir is None: + raise ValueError("The cache directory must be set.") + cache_dir = Path(_cache_dir) + + if "xclim-testdata" in repo: version = default_xclim_test_data_version - elif repo.endswith("mini-esgf-data"): + elif "mini-esgf-data" in repo: version = default_esgf_test_data_version else: raise ValueError( @@ -683,9 +881,9 @@ def gather_testing_data( ) if worker_id == "master": - populate_testing_data(branch=branch, repo=repo, cache_dir=cache_dir) + populate_testing_data(branch=branch, repo=repo, local_cache=cache_dir) else: - if platform == "win32": + if platform.system() == "Windows": if not cache_dir.joinpath(branch).exists(): raise FileNotFoundError( "Testing data not found and UNIX-style file-locking is not supported on Windows. " @@ -697,7 +895,7 @@ def gather_testing_data( test_data_being_written = FileLock(lockfile) with test_data_being_written: # This flag prevents multiple calls from re-attempting to download testing data in the same pytest run - populate_testing_data(branch=branch, repo=repo, cache_dir=cache_dir) + populate_testing_data(branch=branch, repo=repo, local_cache=cache_dir) cache_dir.joinpath(".data_written").touch() with test_data_being_written.acquire(): if lockfile.exists(): diff --git a/tests/conftest.py b/tests/conftest.py index 0e79294b..54e4e2c2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,12 @@ import os +from functools import partial from pathlib import Path import numpy as np import pandas as pd import pytest import xarray as xr +from loguru import logger from clisops.utils import testing from clisops.utils.testing import stratus as _stratus @@ -249,7 +251,7 @@ def _ps_series(values, start="7/1/2000"): @pytest.fixture(scope="session", autouse=True) -def threadsafe_data_dir(tmp_path_factory): +def threadsafe_data_dir(tmp_path_factory) -> Path: return tmp_path_factory.getbasetemp().joinpath("data") @@ -287,31 +289,45 @@ def _check_output_nc(result, fname="output_001.nc", time=None): return _check_output_nc -@pytest.fixture(scope="session", autouse=True) -def load_test_data(worker_id, stratus, nimbus): +@pytest.fixture(autouse=True, scope="session") +def gather_session_data(request, worker_id, stratus, nimbus): """ - Load the test data repository. + Gather testing data on pytest run. + + When running pytest with multiple workers, one worker will copy data remotely to default cache dir while + other workers wait using lockfile. Once the lock is released, all workers will then copy data to their local + threadsafe_data_dir. As this fixture is scoped to the session, it will only run once per pytest run. - This fixture ensures that the required test data repository - has been cloned to the cache directory within the home directory. """ + + def remove_data_written_flag(cache): + """Cleanup cache folders once we are finished.""" + flag = Path(cache).joinpath(".data_written") + if flag.exists(): + try: + flag.unlink() + except FileNotFoundError: + logger.info("Teardown race condition occurred: .data_written flag already removed. Lucky!") + pass + repositories = { "stratus": { "worker_cache_dir": stratus.path, "repo": testing.ESGF_TEST_DATA_REPO_URL, "branch": testing.ESGF_TEST_DATA_VERSION, - "cache_dir": testing.ESGF_TEST_DATA_CACHE_DIR, + "_cache_dir": testing.ESGF_TEST_DATA_CACHE_DIR, }, "nimbus": { "worker_cache_dir": nimbus.path, "repo": testing.XCLIM_TEST_DATA_REPO_URL, "branch": testing.XCLIM_TEST_DATA_VERSION, - "cache_dir": testing.XCLIM_TEST_DATA_CACHE_DIR, + "_cache_dir": testing.XCLIM_TEST_DATA_CACHE_DIR, }, } for repo in repositories.values(): testing.gather_testing_data(worker_id=worker_id, **repo) + request.addfinalizer(partial(remove_data_written_flag, repo["_cache_dir"])) @pytest.fixture @@ -333,20 +349,6 @@ def c3s_cmip5_tos(): ).as_posix() -@pytest.fixture -def cmip5_archive_base(): - if "CMIP5_ARCHIVE_BASE" in os.environ: - return os.environ["CMIP5_ARCHIVE_BASE"] - return Path(__file__).parent.absolute().joinpath("mini-esgf-data/test_data/badc/cmip5/data").as_posix() - - -@pytest.fixture -def cmip6_archive_base(): - if "CMIP6_ARCHIVE_BASE" in os.environ: - return os.environ["CMIP6_ARCHIVE_BASE"] - return Path(__file__).parent.absolute().joinpath("mini-esgf-data/test_data/badc/cmip6/data").as_posix() - - @pytest.fixture(scope="session", autouse=True) def mini_esgf_data(stratus): return ( diff --git a/tests/test_core_regrid.py b/tests/test_core_regrid.py index ef170d79..6aa2617d 100644 --- a/tests/test_core_regrid.py +++ b/tests/test_core_regrid.py @@ -925,7 +925,7 @@ def test_detect_extent_shifted_lon_frame(self, mini_esgf_data): g = Grid(ds=ds) assert g.extent_lon == "global" - def test_detect_collapsed_cells(self, mini_esgf_data, load_test_data): + def test_detect_collapsed_cells(self, mini_esgf_data): """Test that collapsed cells are properly identified.""" dsA = xr.open_dataset( mini_esgf_data["CMIP6_OCE_HALO_CNRM"], diff --git a/tests/test_core_subset.py b/tests/test_core_subset.py index 35fbaa6c..9960d8d1 100644 --- a/tests/test_core_subset.py +++ b/tests/test_core_subset.py @@ -367,6 +367,8 @@ def test_dataset(self, nimbus): da = xr.open_mfdataset( [nimbus.fetch(self.nc_tasmax_file), nimbus.fetch(self.nc_tasmin_file)], combine="by_coords", + compat="no_conflicts", + data_vars="all", ) out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat) assert np.all(out.lon >= np.min(self.lon)) diff --git a/tests/test_dataset_utils.py b/tests/test_dataset_utils.py index ed4d0741..2bd0696e 100644 --- a/tests/test_dataset_utils.py +++ b/tests/test_dataset_utils.py @@ -212,11 +212,13 @@ def test_detect_coordinate_and_bounds(mini_esgf_data): mini_esgf_data["C3S_CORDEX_AFR_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ).load() ds_b = xr.open_mfdataset( mini_esgf_data["C3S_CORDEX_ANT_SFC_WIND"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ).load() ds_c = xr.open_dataset(mini_esgf_data["CMIP6_UNSTR_ICON_A"]).load() ds_d = xr.open_dataset(mini_esgf_data["CMIP6_OCE_HALO_CNRM"]).load() @@ -273,6 +275,7 @@ def test_detect_coordinate_robustness(tmpdir, mini_esgf_data): mini_esgf_data["C3S_CORDEX_AFR_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ).load() as ds: assert clidu.detect_coordinate(ds, "latitude") == "lat" assert clidu.detect_coordinate(ds, "longitude") == "lon" @@ -456,7 +459,9 @@ def test_determine_lon_lat_range_unstructured(mini_esgf_data): def test_determine_lon_lat_range_regular_lat_lon(mini_esgf_data): """Test the function determine_lon_lat_range for regular lat lon grids.""" - with xr.open_mfdataset(mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True)) as ds: + with xr.open_mfdataset( + mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), data_vars="all" + ) as ds: # Deal with immutable numpy arrays lat = ds.lat.values.copy() lat[1] = -999.0 @@ -623,8 +628,7 @@ def test_convert_lon_frame_shifted_bounds(mini_esgf_data): def test_convert_lon_frame_shifted_no_bounds(mini_esgf_data): with xr.open_dataset( - mini_esgf_data["CMIP6_IITM_EXTENT"], - decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), + mini_esgf_data["CMIP6_IITM_EXTENT"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True) ) as ds: # confirm shifted frame assert np.isclose(ds["longitude"].min(), -280.0, atol=1.0) @@ -714,6 +718,7 @@ def test_get_main_var(mini_esgf_data): mini_esgf_data["C3S_CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: result = clidu.get_main_variable(ds) assert result == "tas" @@ -724,6 +729,7 @@ def test_get_main_var_2(mini_esgf_data): mini_esgf_data["CMIP5_ZOSTOGA"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: result = clidu.get_main_variable(ds) assert result == "zostoga" @@ -734,6 +740,7 @@ def test_get_main_var_3(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: result = clidu.get_main_variable(ds) assert result == "tas" @@ -744,6 +751,7 @@ def test_get_main_var_4(mini_esgf_data): mini_esgf_data["CMIP5_RH"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: result = clidu.get_main_variable(ds) assert result == "rh" @@ -754,6 +762,7 @@ def test_get_main_var_test_data(mini_esgf_data): mini_esgf_data["CMIP6_SIMASS_DEGEN"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: var_id = clidu.get_main_variable(ds) assert var_id == "simass" @@ -764,6 +773,7 @@ def test_get_main_var_include_common_coords(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: var_id = clidu.get_main_variable(ds, exclude_common_coords=False) @@ -776,6 +786,7 @@ def test_get_standard_names(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: assert sorted(ds.cf.standard_names) == sorted( [ @@ -794,6 +805,7 @@ def test_get_latitude_cf_xarray(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: xr.testing.assert_identical(ds["lat"].reset_coords("height", drop=True), ds.cf["lat"]) xr.testing.assert_identical(ds["lat"].reset_coords("height", drop=True), ds.cf["latitude"]) @@ -804,6 +816,7 @@ def test_get_latitude_2_cf_xarray(mini_esgf_data): mini_esgf_data["C3S_CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: xr.testing.assert_identical(ds["lat"], ds.cf["lat"]) xr.testing.assert_identical(ds["lat"], ds.cf["latitude"]) @@ -816,6 +829,7 @@ def test_get_lat_lon_names_from_ds_cf_xarray(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: assert ds.cf["latitude"].name == "lat" assert ds.cf["longitude"].name == "lon" @@ -827,6 +841,7 @@ def test_get_time_cf_xarray(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: xr.testing.assert_identical(ds["time"].reset_coords(("height"), drop=True), ds.cf["time"]) @@ -838,6 +853,7 @@ def test_get_time(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["tas"] coord = da.time @@ -849,6 +865,7 @@ def test_get_latitude(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["tas"] coord = da.lat @@ -860,6 +877,7 @@ def test_get_longitude(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["tas"] coord = da.lon @@ -872,6 +890,7 @@ def test_get_time_2(mini_esgf_data): mini_esgf_data["C3S_CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["tas"] coord = da.time @@ -883,6 +902,7 @@ def test_get_latitude_2(mini_esgf_data): mini_esgf_data["C3S_CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["tas"] coord = da.lat @@ -894,6 +914,7 @@ def test_get_longitude_2(mini_esgf_data): mini_esgf_data["C3S_CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["tas"] coord = da.lon @@ -906,6 +927,7 @@ def test_get_time_3(mini_esgf_data): mini_esgf_data["CMIP5_ZOSTOGA"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["zostoga"] coord = da.time @@ -917,6 +939,7 @@ def test_get_level(mini_esgf_data): mini_esgf_data["CMIP5_ZOSTOGA"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["zostoga"] coord = da.lev @@ -928,6 +951,7 @@ def test_get_other(mini_esgf_data): mini_esgf_data["CMIP6_SICONC"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["siconc"] coord = da.type @@ -939,6 +963,7 @@ def test_order_of_coords(mini_esgf_data): mini_esgf_data["CMIP5_ZOSTOGA"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["zostoga"] @@ -972,6 +997,7 @@ def test_text_coord_not_level(mini_esgf_data): mini_esgf_data["CMIP6_CHAR_DIM"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: coord_type = clidu.get_coord_type(ds.sector) assert coord_type is None @@ -983,6 +1009,7 @@ def test_get_coords_by_type(mini_esgf_data): mini_esgf_data["C3S_CORDEX_AFR_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: # check lat, lon, time and level are found when they are coordinates lat = clidu.get_coord_by_type(ds, "latitude", ignore_aux_coords=False) @@ -1019,6 +1046,7 @@ def test_get_coords_by_type_with_no_time(mini_esgf_data): mini_esgf_data["C3S_CORDEX_AFR_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: # check time time = clidu.get_coord_by_type(ds, "time", ignore_aux_coords=False) diff --git a/tests/test_file_namers.py b/tests/test_file_namers.py index 24e3f8db..c7bada16 100644 --- a/tests/test_file_namers.py +++ b/tests/test_file_namers.py @@ -75,6 +75,7 @@ def test_StandardFileNamer_cmip5(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [(_ds, "tas_mon_HadGEM2-ES_rcp85_r1i1p1_20051216-22991216.nc")] @@ -91,6 +92,7 @@ def test_StandardFileNamer_cmip5_use_default_attr_names(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [(_ds, "tas_mon_no-model_rcp85_r1i1p1_20051216-22991216.nc")] @@ -108,6 +110,7 @@ def test_StandardFileNamer_cmip6(mini_esgf_data): mini_esgf_data["CMIP6_SICONC"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [(_ds, "siconc_SImon_CanESM5_historical_r1i1p1f1_gn_18500116-20141216.nc")] @@ -124,6 +127,7 @@ def test_StandardFileNamer_cmip6_use_default_attr_names(mini_esgf_data): mini_esgf_data["CMIP6_SICONC"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [(_ds, "siconc_SImon_no-model_historical_r1i1p1f1_no-grid_18500116-20141216.nc")] @@ -146,6 +150,7 @@ def test_StandardFileNamer_c3s_cordex(mini_esgf_data): mini_esgf_data["C3S_CORDEX_NAM_PR"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [ @@ -171,6 +176,7 @@ def test_StandardFileNamer_c3s_cordex_use_default_attr_names(mini_esgf_data): mini_esgf_data["C3S_CORDEX_NAM_PR"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [ @@ -195,6 +201,7 @@ def test_StandardFileNamer_c3s_atlas_v0(mini_esgf_data): mini_esgf_data["ATLAS_v0_CORDEX_NAM"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [ @@ -217,6 +224,7 @@ def test_StandardFileNamer_c3s_atlas_v1(mini_esgf_data): mini_esgf_data["ATLAS_v1_ERA5"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [ diff --git a/tests/test_ops_average.py b/tests/test_ops_average.py index a97b7847..794fdebf 100644 --- a/tests/test_ops_average.py +++ b/tests/test_ops_average.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import pytest import xarray as xr @@ -24,7 +25,7 @@ def _check_output_nc(result, fname="output_001.nc"): def _load_ds(fpath): - return xr.open_mfdataset(fpath, decode_times=xr.coders.CFDatetimeCoder(use_cftime=True)) + return xr.open_mfdataset(fpath, decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), data_vars="all") def test_average_basic_data_array(nimbus): @@ -209,25 +210,23 @@ def test_dim_not_found_ignore(mini_esgf_data): assert "height" in result[0] -# FIXME: This kind of test is not desirable as it is testing the internal testing implementation -# def test_aux_variables(): -# """ -# test auxiliary variables are remembered in output dataset -# Have to create a netcdf file with auxiliary variable -# """ -# -# ds = _load_ds("tests/data/test_file.nc") -# -# assert "do_i_get_written" in ds.variables -# -# result = average_over_dims( -# ds=ds, -# dims=["level", "time"], -# ignore_undetected_dims=True, -# output_type="xarray", -# ) -# -# assert "do_i_get_written" in result[0].variables +def test_aux_variables(): + """ + Test auxiliary variables are remembered in output dataset + Have to create a netcdf file with auxiliary variable + """ + ds = _load_ds(Path(__file__).parent.joinpath("data/test_file.nc")) + + assert "do_i_get_written" in ds.variables + + result = average_over_dims( + ds=ds, + dims=["level", "time"], + ignore_undetected_dims=True, + output_type="xarray", + ) + + assert "do_i_get_written" in result[0].variables @pytest.mark.skipif(xesmf is None, reason=XESMF_IMPORT_MESSAGE) diff --git a/tests/test_ops_subset.py b/tests/test_ops_subset.py index 0730e747..68261eca 100644 --- a/tests/test_ops_subset.py +++ b/tests/test_ops_subset.py @@ -22,12 +22,12 @@ from clisops.utils.output_utils import _format_time -def _load_ds(fpath: str | Path): +def _load_ds(fpath: str | Path | list[str | Path]): if isinstance(fpath, (str, Path)): if str(fpath).endswith("*.nc"): - return xr.open_mfdataset(fpath) + return xr.open_mfdataset(fpath, decode_times=xr.coders.CFDatetimeCoder(use_cftime=True)) else: - return xr.open_dataset(fpath) + return xr.open_dataset(fpath, decode_times=xr.coders.CFDatetimeCoder(use_cftime=True)) return xr.open_mfdataset(fpath) @@ -370,7 +370,7 @@ def test_time_slices_in_subset_rh(mini_esgf_data): temp_max_file_size = "10KB" CONFIG["clisops:write"]["file_size_limit"] = temp_max_file_size - with xr.open_mfdataset(mini_esgf_data["CMIP5_RH"]) as ds: + with xr.open_mfdataset(mini_esgf_data["CMIP5_RH"], data_vars="all") as ds: outputs = subset( ds=ds, time=time_interval(start_time, end_time), @@ -504,7 +504,7 @@ def test_aux_variables(): Test auxiliary variables are remembered in output dataset Have to create a netcdf file with auxiliary variable """ - ds = _load_ds("tests/data/test_file.nc") + ds = _load_ds(Path(__file__).parent.joinpath("data/test_file.nc")) assert "do_i_get_written" in ds.variables diff --git a/tests/test_ops_xarray_mean.py b/tests/test_ops_xarray_mean.py index cfe46d59..20ca4559 100644 --- a/tests/test_ops_xarray_mean.py +++ b/tests/test_ops_xarray_mean.py @@ -103,6 +103,7 @@ def test_xarray_da_mean_keep_attrs_true(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], combine="by_coords", decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), + data_vars="all", drop_variables=["time_bnds"], ) ds_tas_mean = ds.tas.mean(dim="lat", keep_attrs=True) @@ -117,6 +118,7 @@ def test_xarray_da_mean_keep_attrs_false(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], combine="by_coords", decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), + data_vars="all", ).load() ds_tas_mean = ds.tas.mean(dim="time", keep_attrs=False) ds_mean = ds.mean(dim="time", keep_attrs=False) diff --git a/tests/test_output_utils.py b/tests/test_output_utils.py index 3524be2f..ae32f89c 100644 --- a/tests/test_output_utils.py +++ b/tests/test_output_utils.py @@ -27,9 +27,7 @@ def _open(coll): if len(coll) > 1: # issues with dask and cftime ds = xr.open_mfdataset( - coll, - decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), - combine="by_coords", + coll, decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", data_vars="all" ).load() else: ds = xr.open_dataset(coll[0], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True)) diff --git a/tests/test_project_utils.py b/tests/test_project_utils.py index 13369f0b..534ca68f 100644 --- a/tests/test_project_utils.py +++ b/tests/test_project_utils.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import pytest import xarray as xr @@ -18,7 +19,7 @@ def test_get_project_name(self, mini_esgf_data): project = project_utils.get_project_name(dset) assert project == "cmip5" - dset = "/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc" + # dset = "/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc" # project = project_utils.get_project_name(dset) # assert project == "cmip5" @@ -26,6 +27,7 @@ def test_get_project_name(self, mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: project = project_utils.get_project_name(ds) assert project == "cmip5" @@ -44,7 +46,7 @@ def test_get_project_name(self, mini_esgf_data): assert project == "cmip6" # tests default for cmip6 path is c3s-cmip6 - dset = "/badc/cmip6/data/CMIP6/CMIP/MIROC/MIROC6/historical/r1i1p1f1/SImon/siconc/gn/latest/*.nc" + # dset = "/badc/cmip6/data/CMIP6/CMIP/MIROC/MIROC6/historical/r1i1p1f1/SImon/siconc/gn/latest/*.nc" # project = project_utils.get_project_name(dset) # assert project == "c3s-cmip6" @@ -65,7 +67,7 @@ def test_get_project_name(self, mini_esgf_data): assert project == "c3s-cica-atlas" # c3s-cica-atlas 2 - dset = "/pool/data/c3s-cica-atlas/ERA5/psl_ERA5_mon_194001-202212.nc" + # dset = "/pool/data/c3s-cica-atlas/ERA5/psl_ERA5_mon_194001-202212.nc" # project = project_utils.get_project_name(dset) # assert project == "c3s-cica-atlas" @@ -80,11 +82,11 @@ def test_get_project_name(self, mini_esgf_data): assert project in ["c3s-ipcc-ar6-atlas", "c3s-ipcc-atlas"] def test_get_project_base_dir(self): - cmip5_base_dir = project_utils.get_project_base_dir("cmip5") - assert cmip5_base_dir == "/mnt/lustre/work/kd0956/CMIP5/data/cmip5" + cmip5_base_dir = Path(project_utils.get_project_base_dir("cmip5")) + assert Path("/mnt/lustre/work/kd0956/CMIP5/data/cmip5").match(str(cmip5_base_dir)) - c3s_cordex_base_dir = project_utils.get_project_base_dir("c3s-cordex") - assert c3s_cordex_base_dir == "/mnt/lustre/work/ik1017/C3SCORDEX/data/c3s-cordex" + c3s_cordex_base_dir = Path(project_utils.get_project_base_dir("c3s-cordex")) + assert Path("/mnt/lustre/work/ik1017/C3SCORDEX/data/c3s-cordex").match(str(c3s_cordex_base_dir)) with pytest.raises(Exception) as exc: project_utils.get_project_base_dir("test")