From 0b24d87693d2efd3fec07f298c365a1c656d8b85 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 25 Jun 2025 15:15:10 -0400 Subject: [PATCH 01/19] add other platforms --- .github/workflows/main.yml | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 401b5d89..a8e33833 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -55,13 +55,19 @@ jobs: python -m tox -e lint test-pypi: - name: Python${{ matrix.python-version }} (PyPI + Tox) + name: Python${{ matrix.python-version }} (PyPI + Tox, ${{ matrix.os }}) needs: lint - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} timeout-minutes: 20 strategy: matrix: python-version: [ "3.10", "3.11", "3.12", "3.13" ] + os: [ "ubuntu-latest" ] + include: + - python-version: "3.10" + os: "macos-latest" + - python-version: "3.11" + os: "windows-latest" steps: - name: Harden Runner uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1 @@ -88,6 +94,7 @@ jobs: python -m pip install --require-hashes -r CI/requirements_ci.txt - name: Environment Caching uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + if: matrix.os == 'ubuntu-latest' with: path: | ~/.cache/mini-esgf-data @@ -104,7 +111,7 @@ jobs: COVERALLS_SERVICE_NAME: github test-conda: - name: Python${{ matrix.python-version }} (Anaconda) + name: Python${{ matrix.python-version }} (Anaconda, ${{ matrix.os}}) needs: lint runs-on: ubuntu-latest timeout-minutes: 20 @@ -112,12 +119,19 @@ jobs: fail-fast: false matrix: python-version: [ "3.10", "3.11", "3.12", "3.13" ] + os: [ "ubuntu-latest" ] + include: + - python-version: "3.10" + os: "macos-latest" + - python-version: "3.11" + os: "windows-latest" defaults: run: shell: bash -l {0} steps: - name: Harden Runner uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1 + if: matrix.os == 'ubuntu-latest' with: disable-sudo: true egress-policy: block From 7725a16438dcdc553cf5ef246be154cef006ac3c Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 25 Jun 2025 15:55:38 -0400 Subject: [PATCH 02/19] add a preload_test_data step for Windows builds --- .github/workflows/main.yml | 11 ++++++++--- CI/preload_test_data.py | 30 ++++++++++++++++++++++++++++++ clisops/__version__.py | 2 +- tox.ini | 11 ++++++----- 4 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 CI/preload_test_data.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a8e33833..c7665ce5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -92,7 +92,7 @@ jobs: - name: Install CI libraries run: | python -m pip install --require-hashes -r CI/requirements_ci.txt - - name: Environment Caching + - name: Environment Caching (Linux) uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 if: matrix.os == 'ubuntu-latest' with: @@ -113,7 +113,7 @@ jobs: test-conda: name: Python${{ matrix.python-version }} (Anaconda, ${{ matrix.os}}) needs: lint - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} timeout-minutes: 20 strategy: fail-fast: false @@ -160,8 +160,9 @@ jobs: - name: Install CLISOPS run: | python -m pip install --no-user --no-deps --editable . - - name: Test Data Caching + - name: Test Data Caching (Linux) uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + if : matrix.os == 'ubuntu-latest' with: path: | ~/.cache/mini-esgf-data @@ -171,6 +172,10 @@ jobs: run: | micromamba list python -m pip check || true + - name: Preload Test Data (Windows) + if: matrix.os == 'windows-latest' + run: | + python CI/preload_test_data.py - name: Test with conda run: | python -m pytest -m "not slow" --timeout=300 --numprocesses=logical --durations=10 --cov=clisops --cov-report=lcov diff --git a/CI/preload_test_data.py b/CI/preload_test_data.py new file mode 100644 index 00000000..46ea709d --- /dev/null +++ b/CI/preload_test_data.py @@ -0,0 +1,30 @@ +from clisops import testing + + +def load_test_data_single(worker_id): + """ + This fixture ensures that the required test data repository + has been cloned to the cache directory within the home directory. + + This is a helper function for Windows builds that do not have + access to the locking mechanism used by macOS and Linux. + """ + repositories = { + "stratus": { + "repo": testing.ESGF_TEST_DATA_REPO_URL, + "branch": testing.ESGF_TEST_DATA_VERSION, + "cache_dir": testing.ESGF_TEST_DATA_CACHE_DIR, + }, + "nimbus": { + "repo": testing.XCLIM_TEST_DATA_REPO_URL, + "branch": testing.XCLIM_TEST_DATA_VERSION, + "cache_dir": testing.XCLIM_TEST_DATA_CACHE_DIR, + }, + } + + for name, repo in repositories.items(): + testing.gather_testing_data(worker_id=worker_id, **repo) + + +if __name__ == "__main__": + load_test_data_single(worker_id="master") diff --git a/clisops/__version__.py b/clisops/__version__.py index f7739d8d..1d73aa81 100644 --- a/clisops/__version__.py +++ b/clisops/__version__.py @@ -4,6 +4,6 @@ __author__ = """Eleanor Smith""" __contact__ = "eleanor.smith@stfc.ac.uk" -__copyright__ = "Copyright 2018-2024. United Kingdom Research and Innovation" +__copyright__ = "Copyright 2018-2025. United Kingdom Research and Innovation" __license__ = "BSD-3-Clause" __version__ = "0.16.2" diff --git a/tox.ini b/tox.ini index bd9e5f7b..e02952be 100644 --- a/tox.ini +++ b/tox.ini @@ -11,9 +11,9 @@ opts = -v [gh] python = - 3.10 = py3.10-coveralls - 3.11 = py3.11-coveralls - 3.12 = py3.12-coveralls + 3.10 = py3.10-coverage + 3.11 = py3.11-coverage + 3.12 = py3.12-coverage 3.13 = py3.13 [testenv:lint] @@ -54,12 +54,13 @@ extras = dev install_command = python -m pip install --no-user {opts} {packages} download = True deps = - coveralls: coveralls >=4.0.1 + coverage: coveralls >=4.0.1 upstream: -r CI/requirements_upstream.txt commands_pre = python -m pip list python -m pip check + python {toxinidir}/CI/preload_test_data.py commands = pytest {posargs} commands_post: - coveralls: - coveralls + coverage: - coveralls From f69ec05a3eb640d70a44e1c06039f1ae42553e36 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 25 Jun 2025 16:11:24 -0400 Subject: [PATCH 03/19] do not build on Windows --- .github/workflows/main.yml | 45 ++++++++++++++++++++++---------------- CI/preload_test_data.py | 30 ------------------------- tox.ini | 1 - 3 files changed, 26 insertions(+), 50 deletions(-) delete mode 100644 CI/preload_test_data.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c7665ce5..79be5459 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -61,13 +61,8 @@ jobs: timeout-minutes: 20 strategy: matrix: - python-version: [ "3.10", "3.11", "3.12", "3.13" ] - os: [ "ubuntu-latest" ] - include: - - python-version: "3.10" - os: "macos-latest" - - python-version: "3.11" - os: "windows-latest" + python-version: [ "3.10", "3.13" ] + os: [ "macos-latest", "ubuntu-latest" ] steps: - name: Harden Runner uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1 @@ -92,6 +87,7 @@ jobs: - name: Install CI libraries run: | python -m pip install --require-hashes -r CI/requirements_ci.txt + - name: Environment Caching (Linux) uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 if: matrix.os == 'ubuntu-latest' @@ -101,6 +97,16 @@ jobs: ~/.cache/xclim-testdata .tox key: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }} + - name: Environment Caching (macOS) + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + if: matrix.os == 'macos-latest' + with: + path: | + ~/Library/Caches/mini-esgf-data + ~/Library/Caches/xclim-testdata + .tox + key: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }} + - name: Test with tox run: | python -m tox @@ -118,13 +124,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.10", "3.11", "3.12", "3.13" ] - os: [ "ubuntu-latest" ] - include: - - python-version: "3.10" - os: "macos-latest" - - python-version: "3.11" - os: "windows-latest" + python-version: [ "3.10", "3.13" ] + os: [ "macos-latest", "ubuntu-latest" ] defaults: run: shell: bash -l {0} @@ -160,6 +161,7 @@ jobs: - name: Install CLISOPS run: | python -m pip install --no-user --no-deps --editable . + - name: Test Data Caching (Linux) uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 if : matrix.os == 'ubuntu-latest' @@ -167,15 +169,20 @@ jobs: path: | ~/.cache/mini-esgf-data ~/.cache/xclim-testdata - key: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-conda-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }} + key: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-conda-${{ env.ESGF_TEST_DATA_VERSION }} + - name: Test Data Caching (macOS) + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + if: matrix.os == 'ubuntu-latest' + with: + path: | + ~/.cache/mini-esgf-data + ~/.cache/xclim-testdata + key: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-conda-${{ env.ESGF_TEST_DATA_VERSION }} + - name: Check versions run: | micromamba list python -m pip check || true - - name: Preload Test Data (Windows) - if: matrix.os == 'windows-latest' - run: | - python CI/preload_test_data.py - name: Test with conda run: | python -m pytest -m "not slow" --timeout=300 --numprocesses=logical --durations=10 --cov=clisops --cov-report=lcov diff --git a/CI/preload_test_data.py b/CI/preload_test_data.py deleted file mode 100644 index 46ea709d..00000000 --- a/CI/preload_test_data.py +++ /dev/null @@ -1,30 +0,0 @@ -from clisops import testing - - -def load_test_data_single(worker_id): - """ - This fixture ensures that the required test data repository - has been cloned to the cache directory within the home directory. - - This is a helper function for Windows builds that do not have - access to the locking mechanism used by macOS and Linux. - """ - repositories = { - "stratus": { - "repo": testing.ESGF_TEST_DATA_REPO_URL, - "branch": testing.ESGF_TEST_DATA_VERSION, - "cache_dir": testing.ESGF_TEST_DATA_CACHE_DIR, - }, - "nimbus": { - "repo": testing.XCLIM_TEST_DATA_REPO_URL, - "branch": testing.XCLIM_TEST_DATA_VERSION, - "cache_dir": testing.XCLIM_TEST_DATA_CACHE_DIR, - }, - } - - for name, repo in repositories.items(): - testing.gather_testing_data(worker_id=worker_id, **repo) - - -if __name__ == "__main__": - load_test_data_single(worker_id="master") diff --git a/tox.ini b/tox.ini index e02952be..2f822b35 100644 --- a/tox.ini +++ b/tox.ini @@ -59,7 +59,6 @@ deps = commands_pre = python -m pip list python -m pip check - python {toxinidir}/CI/preload_test_data.py commands = pytest {posargs} commands_post: From de6a3c20c1752270781d33763b27fb182e140705 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 25 Jun 2025 16:27:59 -0400 Subject: [PATCH 04/19] add Windows PyPI/tox build --- .github/workflows/main.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 79be5459..62c0515a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -63,6 +63,11 @@ jobs: matrix: python-version: [ "3.10", "3.13" ] os: [ "macos-latest", "ubuntu-latest" ] + positional_args: [ "" ] + include: + - python-version: "3.11" + os: "windows-latest" + positional_args: "--numprocesses=0" steps: - name: Harden Runner uses: step-security/harden-runner@002fdce3c6a235733a90a27c80493a3241e56863 # v2.12.1 @@ -109,7 +114,7 @@ jobs: - name: Test with tox run: | - python -m tox + python -m tox -- ${{ matrix.positional_args }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COVERALLS_FLAG_NAME: run-Python${{ matrix.python-version }} From e2a00d2f7bd6832cb90ee3d9ec95f2f848a5a933 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 25 Jun 2025 17:12:47 -0400 Subject: [PATCH 05/19] try using str instead of as_posix --- clisops/project_utils.py | 23 +- clisops/utils/testing.py | 656 +++++++++++++++++++++--------------- tests/test_project_utils.py | 6 +- 3 files changed, 406 insertions(+), 279 deletions(-) diff --git a/clisops/project_utils.py b/clisops/project_utils.py index 334d280d..b529c3b2 100644 --- a/clisops/project_utils.py +++ b/clisops/project_utils.py @@ -1,5 +1,6 @@ import glob import os +from pathlib import Path import xarray as xr from loguru import logger @@ -49,9 +50,9 @@ def _get_base_dirs_dict(): def _is_ds_id(dset): return dset.count(".") > 1 - def _deduce_project(self, dset): + def _deduce_project(self, dset) -> str | None: if isinstance(dset, str): - if dset.startswith("/"): + if os.path.isabs(dset): # by default this returns c3s-cmip6 not cmip6 (as they have the same base_dir) base_dirs_dict = self._get_base_dirs_dict() for project, base_dir in base_dirs_dict.items(): @@ -61,6 +62,7 @@ def _deduce_project(self, dset): is True ): return project + return None elif self._is_ds_id(dset): return dset.split(".")[0].lower() @@ -71,7 +73,8 @@ def _deduce_project(self, dset): dset, decode_times=xr.coders.CFDatetimeCoder(use_cftime=True) ) return get_project_from_ds(dset) - + else: + return None else: raise InvalidProject( f"The format of {dset} is not known and the project name could not be found." @@ -101,7 +104,7 @@ def _parse(self, force): self._base_dir = get_project_base_dir(self._project) # if a file, group of files or directory to files - find files - if dset.startswith("/") or dset.endswith(".nc"): + if Path(dset).is_absolute() or dset.endswith(".nc"): # if instance of FileMapper if isinstance(self.dset, FileMapper): self._files = self.dset.file_paths @@ -114,15 +117,12 @@ def _parse(self, force): self._files.append(dset) # remove file extension to create data_path - self._data_path = "/".join(dset.split("/")[:-1]) + self._data_path = os.path.dirname(dset) # if base_dir identified, insert into data_path if self._base_dir: - self._ds_id = ".".join( - self._data_path.replace(self._base_dir, self._project) - .strip("/") - .split("/") - ) + relative_path = os.path.relpath(self._data_path, self._base_dir) + self._ds_id = ".".join(relative_path.split(os.sep)) # test if dataset id elif self._is_ds_id(dset): @@ -132,7 +132,6 @@ def _parse(self, force): "fixed_path_mappings", {} ) - # If the dataset uses a fixed path mapping (from the config file) then use it if self._ds_id in mappings: data_path = mappings[self._ds_id] self._data_path = os.path.join(self._base_dir, data_path) @@ -143,7 +142,7 @@ def _parse(self, force): # Default mapping is done by converting '.' characters to '/' separators in path else: self._data_path = os.path.join( - self._base_dir, "/".join(dset.split(".")[1:]) + self._base_dir, os.path.join(*dset.split(".")[1:]) ) # use to data_path to find files if not set already diff --git a/clisops/utils/testing.py b/clisops/utils/testing.py index 865328e4..f1f4c07d 100644 --- a/clisops/utils/testing.py +++ b/clisops/utils/testing.py @@ -106,249 +106,357 @@ def write_roocs_cfg( cfg_template = template or default_template roocs_config = Path(cache_dir, "roocs.ini") cfg = Template(cfg_template).render( - base_dir=Path(ESGF_TEST_DATA_CACHE_DIR) - .joinpath(ESGF_TEST_DATA_VERSION) - .as_posix() + base_dir=Path(ESGF_TEST_DATA_CACHE_DIR).joinpath(ESGF_TEST_DATA_VERSION) ) with open(roocs_config, "w") as fp: fp.write(cfg) - return roocs_config.as_posix() + return str(roocs_config) def get_esgf_file_paths(esgf_cache_dir: Union[str, os.PathLike[str]]): return { - "CMIP5_ZOSTOGA": Path( - esgf_cache_dir, - "badc/cmip5/data/cmip5/output1/INM/inmcm4/rcp45/mon/ocean/Omon/r1i1p1/latest/zostoga/zostoga_Omon_inmcm4_rcp45_r1i1p1_200601-210012.nc", - ).as_posix(), - "CMIP6_RLDS": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical/r1i1p1f1/Amon/rlds/gr/v20180803/rlds_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_185001-201412.nc", - ).as_posix(), - "CMIP6_RLDS_ONE_TIME_STEP": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical/r1i1p1f1/Amon/rlds/gr/v20180803/rlds_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_185001.nc", - ).as_posix(), - "CMIP6_RLUS_ONE_TIME_STEP": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical/r1i1p1f1/Amon/rlus/gr/v20180803/rlus_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_185001.nc", - ).as_posix(), - "CMIP6_MRSOFC": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/IPSL/IPSL-CM6A-LR/ssp119/r1i1p1f1/fx/mrsofc/gr/v20190410/mrsofc_fx_IPSL-CM6A-LR_ssp119_r1i1p1f1_gr.nc", - ).as_posix(), - "CMIP6_SICONC": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/SImon/siconc/gn/latest/siconc_SImon_CanESM5_historical_r1i1p1f1_gn_185001-201412.nc", - ).as_posix(), - "CMIP6_SICONC_DAY": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/SIday/siconc/gn/v20190429/siconc_SIday_CanESM5_historical_r1i1p1f1_gn_18500101-20141231.nc", - ).as_posix(), - "CMIP6_TA": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/MIROC/MIROC6/ssp119/r1i1p1f1/Amon/ta/gn/files/d20190807/ta_Amon_MIROC6_ssp119_r1i1p1f1_gn_201501-202412.nc", - ).as_posix(), - "CMIP6_TASMIN": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/Amon/tasmin/gn/v20190710/tasmin_Amon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_201001-201412.nc", - ).as_posix(), - "CMIP6_JULIAN": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/CCCR-IITM/IITM-ESM/1pctCO2/r1i1p1f1/Omon/tos/gn/v20191204/tos_Omon_IITM-ESM_1pctCO2_r1i1p1f1_gn_193001-193412.nc", - ).as_posix(), - "CMIP6_TOS": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Omon/tos/gn/v20190710/tos_Omon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001-186912.nc", - ).as_posix(), - "CMIP6_AREACELLO": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/historical/r1i1p1f1/Ofx/areacello/gn/v20190726/areacello_Ofx_GFDL-ESM4_historical_r1i1p1f1_gn.nc", - ).as_posix(), - "CMIP6_TOS_CNRM": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/CNRM-CERFACS/CNRM-CM6-1/ssp245/r1i1p1f2/Omon/tos/gn/v20190219/tos_Omon_CNRM-CM6-1_ssp245_r1i1p1f2_gn_201501.nc", - ).as_posix(), - "CMIP6_TAS_DAY": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/MIROC/MIROC6/ssp119/r1i1p1f1/day/tas/gn/v20191016/tas_day_MIROC6_ssp119_r1i1p1f1_gn_20150101.nc", - ).as_posix(), - "CMIP6_SFTOF": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/NCC/NorESM2-MM/ssp126/r1i1p1f1/Ofx/sftof/gn/v20191108/sftof_Ofx_NorESM2-MM_ssp126_r1i1p1f1_gn.nc", - ).as_posix(), - "CMIP6_TAS_ONE_TIME_STEP": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/CAS/FGOALS-g3/historical/r1i1p1f1/Amon/tas/gn/v20190818/tas_Amon_FGOALS-g3_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_TOS_ONE_TIME_STEP": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/Omon/tos/gn/v20190710/tos_Omon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), + "CMIP5_ZOSTOGA": str( + Path( + esgf_cache_dir, + "badc/cmip5/data/cmip5/output1/INM/inmcm4/rcp45/mon/ocean/Omon/r1i1p1/latest/zostoga/zostoga_Omon_inmcm4_rcp45_r1i1p1_200601-210012.nc", + ) + ), + "CMIP6_RLDS": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical/r1i1p1f1/Amon/rlds/gr/v20180803/rlds_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_185001-201412.nc", + ) + ), + "CMIP6_RLDS_ONE_TIME_STEP": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical/r1i1p1f1/Amon/rlds/gr/v20180803/rlds_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_185001.nc", + ) + ), + "CMIP6_RLUS_ONE_TIME_STEP": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical/r1i1p1f1/Amon/rlus/gr/v20180803/rlus_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_185001.nc", + ) + ), + "CMIP6_MRSOFC": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/IPSL/IPSL-CM6A-LR/ssp119/r1i1p1f1/fx/mrsofc/gr/v20190410/mrsofc_fx_IPSL-CM6A-LR_ssp119_r1i1p1f1_gr.nc", + ) + ), + "CMIP6_SICONC": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/SImon/siconc/gn/latest/siconc_SImon_CanESM5_historical_r1i1p1f1_gn_185001-201412.nc", + ) + ), + "CMIP6_SICONC_DAY": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/SIday/siconc/gn/v20190429/siconc_SIday_CanESM5_historical_r1i1p1f1_gn_18500101-20141231.nc", + ) + ), + "CMIP6_TA": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/MIROC/MIROC6/ssp119/r1i1p1f1/Amon/ta/gn/files/d20190807/ta_Amon_MIROC6_ssp119_r1i1p1f1_gn_201501-202412.nc", + ) + ), + "CMIP6_TASMIN": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/Amon/tasmin/gn/v20190710/tasmin_Amon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_201001-201412.nc", + ) + ), + "CMIP6_JULIAN": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/CCCR-IITM/IITM-ESM/1pctCO2/r1i1p1f1/Omon/tos/gn/v20191204/tos_Omon_IITM-ESM_1pctCO2_r1i1p1f1_gn_193001-193412.nc", + ) + ), + "CMIP6_TOS": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Omon/tos/gn/v20190710/tos_Omon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001-186912.nc", + ) + ), + "CMIP6_AREACELLO": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/historical/r1i1p1f1/Ofx/areacello/gn/v20190726/areacello_Ofx_GFDL-ESM4_historical_r1i1p1f1_gn.nc", + ) + ), + "CMIP6_TOS_CNRM": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/CNRM-CERFACS/CNRM-CM6-1/ssp245/r1i1p1f2/Omon/tos/gn/v20190219/tos_Omon_CNRM-CM6-1_ssp245_r1i1p1f2_gn_201501.nc", + ) + ), + "CMIP6_TAS_DAY": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/MIROC/MIROC6/ssp119/r1i1p1f1/day/tas/gn/v20191016/tas_day_MIROC6_ssp119_r1i1p1f1_gn_20150101.nc", + ) + ), + "CMIP6_SFTOF": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/NCC/NorESM2-MM/ssp126/r1i1p1f1/Ofx/sftof/gn/v20191108/sftof_Ofx_NorESM2-MM_ssp126_r1i1p1f1_gn.nc", + ) + ), + "CMIP6_TAS_ONE_TIME_STEP": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/CAS/FGOALS-g3/historical/r1i1p1f1/Amon/tas/gn/v20190818/tas_Amon_FGOALS-g3_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_TOS_ONE_TIME_STEP": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/Omon/tos/gn/v20190710/tos_Omon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_185001.nc", + ) + ), # CMIP6 ocean with collapsing cells - "CMIP6_TOS_LR_DEGEN": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/HAMMOZ-Consortium/MPI-ESM-1-2-HAM/ssp370/r1i1p1f1/Omon/tos/gn/v20190628/tos_Omon_MPI-ESM-1-2-HAM_ssp370_r1i1p1f1_gn_201501.nc", - ).as_posix(), + "CMIP6_TOS_LR_DEGEN": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/HAMMOZ-Consortium/MPI-ESM-1-2-HAM/ssp370/r1i1p1f1/Omon/tos/gn/v20190628/tos_Omon_MPI-ESM-1-2-HAM_ssp370_r1i1p1f1_gn_201501.nc", + ) + ), # 2nd dataset CMIP6 ocean with collapsing cells - "CMIP6_FX_DEGEN": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/EC-Earth-Consortium/EC-Earth3-Veg/ssp245/r5i1p1f1/Ofx/deptho/gn/v20200312/deptho_Ofx_EC-Earth3-Veg_ssp245_r5i1p1f1_gn.nc", - ).as_posix(), + "CMIP6_FX_DEGEN": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/EC-Earth-Consortium/EC-Earth3-Veg/ssp245/r5i1p1f1/Ofx/deptho/gn/v20200312/deptho_Ofx_EC-Earth3-Veg_ssp245_r5i1p1f1_gn.nc", + ) + ), # CMIP6 ocean with collapsing cells, cells extending over 50 degrees, missing_values in lat/lon - "CMIP6_SIMASS_DEGEN": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/NCC/NorESM2-MM/ssp126/r1i1p1f1/SImon/simass/gn/v20191108/simass_SImon_NorESM2-MM_ssp126_r1i1p1f1_gn_201501.nc", - ).as_posix(), + "CMIP6_SIMASS_DEGEN": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/NCC/NorESM2-MM/ssp126/r1i1p1f1/SImon/simass/gn/v20191108/simass_SImon_NorESM2-MM_ssp126_r1i1p1f1_gn_201501.nc", + ) + ), # CMIP5 rlat,rlon uncompliant CF units - "CMIP5_WRONG_CF_UNITS": Path( - esgf_cache_dir, - "pool/data/C3SCMIP5/BCC/bcc-csm1-1/rcp85/mon/ocean/Omon/r1i1p1/zos/v20120705/zos_Omon_bcc-csm1-1_rcp85_r1i1p1_200601.nc", - ).as_posix(), + "CMIP5_WRONG_CF_UNITS": str( + Path( + esgf_cache_dir, + "pool/data/C3SCMIP5/BCC/bcc-csm1-1/rcp85/mon/ocean/Omon/r1i1p1/zos/v20120705/zos_Omon_bcc-csm1-1_rcp85_r1i1p1_200601.nc", + ) + ), # CMIP6 rlat,rlon uncompliant CF units - "CMIP6_WRONG_CF_UNITS": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/AerChemMIP/BCC/BCC-ESM1/ssp370/r1i1p1f1/Omon/pbo/gn/v20190624/pbo_Omon_BCC-ESM1_ssp370_r1i1p1f1_gn_201501.nc", - ).as_posix(), + "CMIP6_WRONG_CF_UNITS": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/AerChemMIP/BCC/BCC-ESM1/ssp370/r1i1p1f1/Omon/pbo/gn/v20190624/pbo_Omon_BCC-ESM1_ssp370_r1i1p1f1_gn_201501.nc", + ) + ), # CMIP6 lat, lon with uncompliant CF units and standard_name - "CMIP6_WRONG_CF_ATTRS": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/HighResMIP/BCC/BCC-CSM2-HR/hist-1950/r1i1p1f1/Omon/tos/gn/v20200922/tos_Omon_BCC-CSM2-HR_hist-1950_r1i1p1f1_gn_198001.nc", - ).as_posix(), - "CMIP5_MRSOS_ONE_TIME_STEP": Path( - esgf_cache_dir, - "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/day/land/day/r1i1p1/latest/mrsos/mrsos_day_HadGEM2-ES_rcp85_r1i1p1_20051201.nc", - ).as_posix(), - "CMIP6_GFDL_EXTENT": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/Omon/sos/gn/v20180701/sos_Omon_GFDL-CM4_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_TAS_PRECISION_A": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/AWI/AWI-ESM-1-1-LR/1pctCO2/r1i1p1f1/Amon/tas/gn/v20200212/tas_Amon_AWI-ESM-1-1-LR_1pctCO2_r1i1p1f1_gn_185501.nc", - ).as_posix(), - "CMIP6_TAS_PRECISION_B": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/AWI/AWI-ESM-1-1-LR/1pctCO2/r1i1p1f1/Amon/tas/gn/v20200212/tas_Amon_AWI-ESM-1-1-LR_1pctCO2_r1i1p1f1_gn_209901.nc", - ).as_posix(), - "CMIP6_ATM_VERT_ONE_TIMESTEP": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/AERmon/o3/gn/v20190710/o3_AERmon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_ATM_VERT_ONE_TIMESTEP_ZONMEAN": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/AERmon/o3/gn/v20190710/o3_AERmon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001_zm.nc", - ).as_posix(), - "CMIP6_IITM_EXTENT": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/CCCR-IITM/IITM-ESM/1pctCO2/r1i1p1f1/Omon/tos/gn/v20191204/tos_Omon_IITM-ESM_1pctCO2_r1i1p1f1_gn_193001.nc", - ).as_posix(), + "CMIP6_WRONG_CF_ATTRS": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/HighResMIP/BCC/BCC-CSM2-HR/hist-1950/r1i1p1f1/Omon/tos/gn/v20200922/tos_Omon_BCC-CSM2-HR_hist-1950_r1i1p1f1_gn_198001.nc", + ) + ), + "CMIP5_MRSOS_ONE_TIME_STEP": str( + Path( + esgf_cache_dir, + "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/day/land/day/r1i1p1/latest/mrsos/mrsos_day_HadGEM2-ES_rcp85_r1i1p1_20051201.nc", + ) + ), + "CMIP6_GFDL_EXTENT": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/Omon/sos/gn/v20180701/sos_Omon_GFDL-CM4_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_TAS_PRECISION_A": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/AWI/AWI-ESM-1-1-LR/1pctCO2/r1i1p1f1/Amon/tas/gn/v20200212/tas_Amon_AWI-ESM-1-1-LR_1pctCO2_r1i1p1f1_gn_185501.nc", + ) + ), + "CMIP6_TAS_PRECISION_B": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/AWI/AWI-ESM-1-1-LR/1pctCO2/r1i1p1f1/Amon/tas/gn/v20200212/tas_Amon_AWI-ESM-1-1-LR_1pctCO2_r1i1p1f1_gn_209901.nc", + ) + ), + "CMIP6_ATM_VERT_ONE_TIMESTEP": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/AERmon/o3/gn/v20190710/o3_AERmon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_ATM_VERT_ONE_TIMESTEP_ZONMEAN": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/AERmon/o3/gn/v20190710/o3_AERmon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001_zm.nc", + ) + ), + "CMIP6_IITM_EXTENT": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/CCCR-IITM/IITM-ESM/1pctCO2/r1i1p1f1/Omon/tos/gn/v20191204/tos_Omon_IITM-ESM_1pctCO2_r1i1p1f1_gn_193001.nc", + ) + ), # CMIP6 dataset with weird range in its longitude coordinate (-300, 60) # and unmasked missing values in the latitude and longitude coordinates - "CMIP6_EXTENT_UNMASKED": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/OMIP/NOAA-GFDL/GFDL-OM4p5B/omip1/r1i1p1f1/Omon/volcello/gn/v20180701/volcello_Omon_GFDL-OM4p5B_omip1_r1i1p1f1_gn_176801.nc", - ).as_posix(), - "CMIP6_OCE_HALO_CNRM": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/CNRM-CERFACS/CNRM-CM6-1-HR/historical/r1i1p1f2/Omon/tos/gn/v20191021/tos_Omon_CNRM-CM6-1-HR_historical_r1i1p1f2_gn_185001.nc", - ).as_posix(), - "CMIP6_UNSTR_FESOM_LR": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/AWI/AWI-ESM-1-1-LR/historical/r1i1p1f1/Omon/tos/gn/v20200212/tos_Omon_AWI-ESM-1-1-LR_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_UNSTR_ICON_A": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/ICON-ESM-LR/historical/r1i1p1f1/Amon/tas/gn/v20210215/tas_Amon_ICON-ESM-LR_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_UNSTR_VERT_ICON_O": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/ICON-ESM-LR/historical/r1i1p1f1/Omon/thetao/gn/v20210215/thetao_Omon_ICON-ESM-LR_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_UNTAGGED_MISSVALS": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Omon/tos/gn/v20191120/tos_Omon_CESM2-FV2_historical_r1i1p1f1_gn_200001.nc", - ).as_posix(), - "CMIP6_STAGGERED_UCOMP": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Omon/tauuo/gn/v20200909/tauuo_Omon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_STAGGERED_VCOMP": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Omon/tauvo/gn/v20190710/tauvo_Omon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001.nc", - ).as_posix(), - "CMIP6_FILLVALUE": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/NCAR/CESM2-WACCM/historical/r1i1p1f1/day/tas/gn/v20190227/tas_day_CESM2-WACCM_historical_r1i1p1f1_gn_20000101-20091231.nc", - ).as_posix(), - "CMIP6_ZONMEAN_A": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/Omon/msftmz/gn/v20190710/msftmz_Omon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_191001.nc", - ).as_posix(), - "CMIP6_ZONMEAN_B": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/CMIP/NCC/NorCPM1/historical/r22i1p1f1/Omon/msftmz/grz/v20200724/msftmz_Omon_NorCPM1_historical_r22i1p1f1_grz_185001.nc", - ).as_posix(), + "CMIP6_EXTENT_UNMASKED": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/OMIP/NOAA-GFDL/GFDL-OM4p5B/omip1/r1i1p1f1/Omon/volcello/gn/v20180701/volcello_Omon_GFDL-OM4p5B_omip1_r1i1p1f1_gn_176801.nc", + ) + ), + "CMIP6_OCE_HALO_CNRM": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/CNRM-CERFACS/CNRM-CM6-1-HR/historical/r1i1p1f2/Omon/tos/gn/v20191021/tos_Omon_CNRM-CM6-1-HR_historical_r1i1p1f2_gn_185001.nc", + ) + ), + "CMIP6_UNSTR_FESOM_LR": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/AWI/AWI-ESM-1-1-LR/historical/r1i1p1f1/Omon/tos/gn/v20200212/tos_Omon_AWI-ESM-1-1-LR_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_UNSTR_ICON_A": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/ICON-ESM-LR/historical/r1i1p1f1/Amon/tas/gn/v20210215/tas_Amon_ICON-ESM-LR_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_UNSTR_VERT_ICON_O": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/ICON-ESM-LR/historical/r1i1p1f1/Omon/thetao/gn/v20210215/thetao_Omon_ICON-ESM-LR_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_UNTAGGED_MISSVALS": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Omon/tos/gn/v20191120/tos_Omon_CESM2-FV2_historical_r1i1p1f1_gn_200001.nc", + ) + ), + "CMIP6_STAGGERED_UCOMP": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Omon/tauuo/gn/v20200909/tauuo_Omon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_STAGGERED_VCOMP": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Omon/tauvo/gn/v20190710/tauvo_Omon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001.nc", + ) + ), + "CMIP6_FILLVALUE": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/NCAR/CESM2-WACCM/historical/r1i1p1f1/day/tas/gn/v20190227/tas_day_CESM2-WACCM_historical_r1i1p1f1_gn_20000101-20091231.nc", + ) + ), + "CMIP6_ZONMEAN_A": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/Omon/msftmz/gn/v20190710/msftmz_Omon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_191001.nc", + ) + ), + "CMIP6_ZONMEAN_B": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/NCC/NorCPM1/historical/r22i1p1f1/Omon/msftmz/grz/v20200724/msftmz_Omon_NorCPM1_historical_r22i1p1f1_grz_185001.nc", + ) + ), # CMIP6 dataset without defined bounds on curvilinear grid - "CMIP6_NO_BOUNDS": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/CAS/FGOALS-f3-L/ssp126/r1i1p1f1/Omon/tos/gn/v20191008/tos_Omon_FGOALS-f3-L_ssp126_r1i1p1f1_gn_201501.nc", - ).as_posix(), + "CMIP6_NO_BOUNDS": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/CAS/FGOALS-f3-L/ssp126/r1i1p1f1/Omon/tos/gn/v20191008/tos_Omon_FGOALS-f3-L_ssp126_r1i1p1f1_gn_201501.nc", + ) + ), # CMIP6 dataset with character dimension 'sector' - "CMIP6_CHAR_DIM": Path( - esgf_cache_dir, - "badc/cmip6/data/CMIP6/ScenarioMIP/IPSL/IPSL-CM6A-LR/ssp245/r1i1p1f1/Lmon/landCoverFrac/gr/v20190119/landCoverFrac_Lmon_IPSL-CM6A-LR_ssp245_r1i1p1f1_gr_201501.nc", - ).as_posix(), + "CMIP6_CHAR_DIM": str( + Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/ScenarioMIP/IPSL/IPSL-CM6A-LR/ssp245/r1i1p1f1/Lmon/landCoverFrac/gr/v20190119/landCoverFrac_Lmon_IPSL-CM6A-LR_ssp245_r1i1p1f1_gr_201501.nc", + ) + ), # CORDEX dataset with maldefined bounds - "CORDEX_ERRONEOUS_BOUNDS": Path( - esgf_cache_dir, - "pool/data/C3SCORDEX/data/c3s-cordex/output/ARC-44/BCCR/ECMWF-ERAINT/evaluation/r1i1p1/BCCR-WRF331/v1/day/tas/v20200915/tas_ARC-44_ECMWF-ERAINT_evaluation_r1i1p1_BCCR-WRF331_v1_day_20010101.nc", - ).as_posix(), - "CORDEX_TAS_ONE_TIMESTEP": Path( - esgf_cache_dir, - "pool/data/CORDEX/data/cordex/output/EUR-22/GERICS/MPI-M-MPI-ESM-LR/rcp85/r1i1p1/GERICS-REMO2015/v1/mon/tas/v20191029/tas_EUR-22_MPI-M-MPI-ESM-LR_rcp85_r1i1p1_GERICS-REMO2015_v1_mon_202101.nc", - ).as_posix(), - "CORDEX_TAS_ONE_TIMESTEP_ANT": Path( - esgf_cache_dir, - "pool/data/CORDEX/data/cordex/output/ANT-44/KNMI/ECMWF-ERAINT/evaluation/r1i1p1/DMI-HIRHAM5/v1/day/tas/v20201001/tas_ANT-44_ECMWF-ERAINT_evaluation_r1i1p1_DMI-HIRHAM5_v1_day_20060101.nc", - ).as_posix(), - "CORDEX_TAS_NO_BOUNDS": Path( - esgf_cache_dir, - "pool/data/CORDEX/data/cordex/output/EUR-11/KNMI/MPI-M-MPI-ESM-LR/rcp85/r1i1p1/KNMI-RACMO22E/v1/mon/tas/v20190625/tas_EUR-11_MPI-M-MPI-ESM-LR_rcp85_r1i1p1_KNMI-RACMO22E_v1_mon_209101.nc", - ).as_posix(), - "ATLAS_v1_CMIP5": Path( - esgf_cache_dir, - "pool/data/c3s-cica-atlas/CMIP5/rcp26/pr_CMIP5_rcp26_mon_200601-210012.nc", - ).as_posix(), - "ATLAS_v1_EOBS": Path( - esgf_cache_dir, - "pool/data/c3s-cica-atlas/E-OBS/sfcwind_E-OBS_mon_195001-202112.nc", - ).as_posix(), - "ATLAS_v1_ERA5": Path( - esgf_cache_dir, - "pool/data/c3s-cica-atlas/ERA5/psl_ERA5_mon_194001-202212.nc", - ).as_posix(), - "ATLAS_v1_CORDEX": Path( - esgf_cache_dir, - "pool/data/c3s-cica-atlas/CORDEX-CORE/historical/huss_CORDEX-CORE_historical_mon_197001.nc", - ).as_posix(), - "ATLAS_v1_EOBS_GRID": Path( - esgf_cache_dir, - "pool/data/c3s-cica-atlas/E-OBS/t_E-OBS_mon_195001.nc", - ).as_posix(), - "ATLAS_v0_CORDEX_NAM": Path( - esgf_cache_dir, - "pool/data/c3s-ipcc-ar6-atlas/CORDEX-NAM/historical/rx1day_CORDEX-NAM_historical_mon_197001-200512.nc", - ).as_posix(), - "ATLAS_v0_CMIP6": Path( - esgf_cache_dir, - "pool/data/c3s-ipcc-ar6-atlas/CMIP6/ssp245/sst_CMIP6_ssp245_mon_201501-210012.nc", - ).as_posix(), - "ATLAS_v0_CORDEX_ANT": Path( - esgf_cache_dir, - "pool/data/c3s-ipcc-ar6-atlas/CORDEX-ANT/rcp45/tnn_CORDEX-ANT_rcp45_mon_200601.nc", - ).as_posix(), + "CORDEX_ERRONEOUS_BOUNDS": str( + Path( + esgf_cache_dir, + "pool/data/C3SCORDEX/data/c3s-cordex/output/ARC-44/BCCR/ECMWF-ERAINT/evaluation/r1i1p1/BCCR-WRF331/v1/day/tas/v20200915/tas_ARC-44_ECMWF-ERAINT_evaluation_r1i1p1_BCCR-WRF331_v1_day_20010101.nc", + ) + ), + "CORDEX_TAS_ONE_TIMESTEP": str( + Path( + esgf_cache_dir, + "pool/data/CORDEX/data/cordex/output/EUR-22/GERICS/MPI-M-MPI-ESM-LR/rcp85/r1i1p1/GERICS-REMO2015/v1/mon/tas/v20191029/tas_EUR-22_MPI-M-MPI-ESM-LR_rcp85_r1i1p1_GERICS-REMO2015_v1_mon_202101.nc", + ) + ), + "CORDEX_TAS_ONE_TIMESTEP_ANT": str( + Path( + esgf_cache_dir, + "pool/data/CORDEX/data/cordex/output/ANT-44/KNMI/ECMWF-ERAINT/evaluation/r1i1p1/DMI-HIRHAM5/v1/day/tas/v20201001/tas_ANT-44_ECMWF-ERAINT_evaluation_r1i1p1_DMI-HIRHAM5_v1_day_20060101.nc", + ) + ), + "CORDEX_TAS_NO_BOUNDS": str( + Path( + esgf_cache_dir, + "pool/data/CORDEX/data/cordex/output/EUR-11/KNMI/MPI-M-MPI-ESM-LR/rcp85/r1i1p1/KNMI-RACMO22E/v1/mon/tas/v20190625/tas_EUR-11_MPI-M-MPI-ESM-LR_rcp85_r1i1p1_KNMI-RACMO22E_v1_mon_209101.nc", + ) + ), + "ATLAS_v1_CMIP5": str( + Path( + esgf_cache_dir, + "pool/data/c3s-cica-atlas/CMIP5/rcp26/pr_CMIP5_rcp26_mon_200601-210012.nc", + ) + ), + "ATLAS_v1_EOBS": str( + Path( + esgf_cache_dir, + "pool/data/c3s-cica-atlas/E-OBS/sfcwind_E-OBS_mon_195001-202112.nc", + ) + ), + "ATLAS_v1_ERA5": str( + Path( + esgf_cache_dir, + "pool/data/c3s-cica-atlas/ERA5/psl_ERA5_mon_194001-202212.nc", + ) + ), + "ATLAS_v1_CORDEX": str( + Path( + esgf_cache_dir, + "pool/data/c3s-cica-atlas/CORDEX-CORE/historical/huss_CORDEX-CORE_historical_mon_197001.nc", + ) + ), + "ATLAS_v1_EOBS_GRID": str( + Path( + esgf_cache_dir, + "pool/data/c3s-cica-atlas/E-OBS/t_E-OBS_mon_195001.nc", + ) + ), + "ATLAS_v0_CORDEX_NAM": str( + Path( + esgf_cache_dir, + "pool/data/c3s-ipcc-ar6-atlas/CORDEX-NAM/historical/rx1day_CORDEX-NAM_historical_mon_197001-200512.nc", + ) + ), + "ATLAS_v0_CMIP6": str( + Path( + esgf_cache_dir, + "pool/data/c3s-ipcc-ar6-atlas/CMIP6/ssp245/sst_CMIP6_ssp245_mon_201501-210012.nc", + ) + ), + "ATLAS_v0_CORDEX_ANT": str( + Path( + esgf_cache_dir, + "pool/data/c3s-ipcc-ar6-atlas/CORDEX-ANT/rcp45/tnn_CORDEX-ANT_rcp45_mon_200601.nc", + ) + ), } @@ -368,46 +476,66 @@ def get_kerchunk_datasets(): def get_esgf_glob_paths(esgf_cache_dir: Union[str, os.PathLike[str]]): return { - "CMIP5_TAS": Path( - esgf_cache_dir, - "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", - ).as_posix(), - "CMIP5_TAS_EC_EARTH": Path( - esgf_cache_dir, - "badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", - ).as_posix(), - "CMIP5_RH": Path( - esgf_cache_dir, - "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/historical/mon/land/Lmon/r1i1p1/latest/rh/*.nc", - ).as_posix(), - "C3S_CMIP5_TSICE": Path( - esgf_cache_dir, - "gws/nopw/j04/cp4cds1_vol1/data/c3s-cmip5/output1/NCC/NorESM1-ME/rcp60/mon/seaIce/OImon/r1i1p1/tsice/v20120614/*.nc", - ).as_posix(), - "C3S_CORDEX_AFR_TAS": Path( - esgf_cache_dir, - "pool/data/CORDEX/data/cordex/output/AFR-22/GERICS/MPI-M-MPI-ESM-LR/historical/r1i1p1/GERICS-REMO2015/v1/day/tas/v20201015/*.nc", - ).as_posix(), - "C3S_CORDEX_NAM_PR": Path( - esgf_cache_dir, - "pool/data/CORDEX/data/cordex/output/NAM-22/OURANOS/NOAA-GFDL-GFDL-ESM2M/rcp45/r1i1p1/OURANOS-CRCM5/v1/day/pr/v20200831/*.nc", - ).as_posix(), - "C3S_CORDEX_EUR_ZG500": Path( - esgf_cache_dir, - "pool/data/CORDEX/data/cordex/output/EUR-11/IPSL/IPSL-IPSL-CM5A-MR/rcp85/r1i1p1/IPSL-WRF381P/v1/day/zg500/v20190919/*.nc", - ).as_posix(), - "C3S_CORDEX_ANT_SFC_WIND": Path( - esgf_cache_dir, - "pool/data/CORDEX/data/cordex/output/ANT-44/KNMI/ECMWF-ERAINT/evaluation/r1i1p1/KNMI-RACMO21P/v1/day/sfcWind/v20201001/*.nc", - ).as_posix(), - "CMIP5_MRSOS_MULTIPLE_TIME_STEPS": Path( - esgf_cache_dir, - "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp45/day/land/day/r1i1p1/latest/mrsos/*.nc", - ).as_posix(), - "C3S_CMIP5_TAS": Path( - esgf_cache_dir, - "gws/nopw/j04/cp4cds1_vol1/data/c3s-cmip5/output1/ICHEC/EC-EARTH/historical/day/atmos/day/r1i1p1/tas/v20131231/*.nc", - ).as_posix(), + "CMIP5_TAS": str( + Path( + esgf_cache_dir, + "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", + ) + ), + "CMIP5_TAS_EC_EARTH": str( + Path( + esgf_cache_dir, + "badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", + ) + ), + "CMIP5_RH": str( + Path( + esgf_cache_dir, + "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/historical/mon/land/Lmon/r1i1p1/latest/rh/*.nc", + ) + ), + "C3S_CMIP5_TSICE": str( + Path( + esgf_cache_dir, + "gws/nopw/j04/cp4cds1_vol1/data/c3s-cmip5/output1/NCC/NorESM1-ME/rcp60/mon/seaIce/OImon/r1i1p1/tsice/v20120614/*.nc", + ) + ), + "C3S_CORDEX_AFR_TAS": str( + Path( + esgf_cache_dir, + "pool/data/CORDEX/data/cordex/output/AFR-22/GERICS/MPI-M-MPI-ESM-LR/historical/r1i1p1/GERICS-REMO2015/v1/day/tas/v20201015/*.nc", + ) + ), + "C3S_CORDEX_NAM_PR": str( + Path( + esgf_cache_dir, + "pool/data/CORDEX/data/cordex/output/NAM-22/OURANOS/NOAA-GFDL-GFDL-ESM2M/rcp45/r1i1p1/OURANOS-CRCM5/v1/day/pr/v20200831/*.nc", + ) + ), + "C3S_CORDEX_EUR_ZG500": str( + Path( + esgf_cache_dir, + "pool/data/CORDEX/data/cordex/output/EUR-11/IPSL/IPSL-IPSL-CM5A-MR/rcp85/r1i1p1/IPSL-WRF381P/v1/day/zg500/v20190919/*.nc", + ) + ), + "C3S_CORDEX_ANT_SFC_WIND": str( + Path( + esgf_cache_dir, + "pool/data/CORDEX/data/cordex/output/ANT-44/KNMI/ECMWF-ERAINT/evaluation/r1i1p1/KNMI-RACMO21P/v1/day/sfcWind/v20201001/*.nc", + ) + ), + "CMIP5_MRSOS_MULTIPLE_TIME_STEPS": str( + Path( + esgf_cache_dir, + "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp45/day/land/day/r1i1p1/latest/mrsos/*.nc", + ) + ), + "C3S_CMIP5_TAS": str( + Path( + esgf_cache_dir, + "gws/nopw/j04/cp4cds1_vol1/data/c3s-cmip5/output1/ICHEC/EC-EARTH/historical/day/atmos/day/r1i1p1/tas/v20131231/*.nc", + ) + ), } diff --git a/tests/test_project_utils.py b/tests/test_project_utils.py index d4f1324f..0d3e06dc 100644 --- a/tests/test_project_utils.py +++ b/tests/test_project_utils.py @@ -19,7 +19,7 @@ def test_get_project_name(self, mini_esgf_data): project = project_utils.get_project_name(dset) assert project == "cmip5" - dset = "/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc" + # dset = "/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc" # project = project_utils.get_project_name(dset) # assert project == "cmip5" @@ -45,7 +45,7 @@ def test_get_project_name(self, mini_esgf_data): assert project == "cmip6" # tests default for cmip6 path is c3s-cmip6 - dset = "/badc/cmip6/data/CMIP6/CMIP/MIROC/MIROC6/historical/r1i1p1f1/SImon/siconc/gn/latest/*.nc" + # dset = "/badc/cmip6/data/CMIP6/CMIP/MIROC/MIROC6/historical/r1i1p1f1/SImon/siconc/gn/latest/*.nc" # project = project_utils.get_project_name(dset) # assert project == "c3s-cmip6" @@ -66,7 +66,7 @@ def test_get_project_name(self, mini_esgf_data): assert project == "c3s-cica-atlas" # c3s-cica-atlas 2 - dset = "/pool/data/c3s-cica-atlas/ERA5/psl_ERA5_mon_194001-202212.nc" + # dset = "/pool/data/c3s-cica-atlas/ERA5/psl_ERA5_mon_194001-202212.nc" # project = project_utils.get_project_name(dset) # assert project == "c3s-cica-atlas" From e89ac9afdb430d36644be65fcd176972e7164052 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 8 Jul 2025 21:22:12 +0000 Subject: [PATCH 06/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- clisops/project_utils.py | 4 +--- clisops/utils/testing.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/clisops/project_utils.py b/clisops/project_utils.py index 43c8b992..f899fb25 100644 --- a/clisops/project_utils.py +++ b/clisops/project_utils.py @@ -142,9 +142,7 @@ def _parse(self, force): # Default mapping is done by converting '.' characters to '/' separators in path else: - self._data_path = os.path.join( - self._base_dir, os.path.join(*dset.split(".")[1:]) - ) + self._data_path = os.path.join(self._base_dir, os.path.join(*dset.split(".")[1:])) # use to data_path to find files if not set already if len(self._files) < 1: diff --git a/clisops/utils/testing.py b/clisops/utils/testing.py index abc6c932..d34ab648 100644 --- a/clisops/utils/testing.py +++ b/clisops/utils/testing.py @@ -108,9 +108,7 @@ def write_roocs_cfg( cfg_template = template or default_template roocs_config = Path(cache_dir, "roocs.ini") - cfg = Template(cfg_template).render( - base_dir=Path(ESGF_TEST_DATA_CACHE_DIR).joinpath(ESGF_TEST_DATA_VERSION) - ) + cfg = Template(cfg_template).render(base_dir=Path(ESGF_TEST_DATA_CACHE_DIR).joinpath(ESGF_TEST_DATA_VERSION)) with open(roocs_config, "w") as fp: fp.write(cfg) From 20d86f0869feacaeb5f52459ea8c6cfa7c62583d Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:24:22 -0400 Subject: [PATCH 07/19] testing data adjustments Signed-off-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- clisops/utils/testing.py | 51 ++++++++++++++++++++++++---------------- tests/conftest.py | 23 +++++++++++++----- 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/clisops/utils/testing.py b/clisops/utils/testing.py index d34ab648..386a84ad 100644 --- a/clisops/utils/testing.py +++ b/clisops/utils/testing.py @@ -38,29 +38,35 @@ ] try: - default_esgf_test_data_cache = pooch.os_cache("mini-esgf-data") - default_xclim_test_data_cache = pooch.os_cache("xclim-testdata") + default_esgf_test_data_cache = str(pooch.os_cache("mini-esgf-data")) + """Default location for the mini-esgf testing data cache.""" + default_xclim_test_data_cache = str(pooch.os_cache("xclim-testdata")) + """Default location for the xclim testing data cache.""" except (AttributeError, TypeError): default_esgf_test_data_cache = None default_xclim_test_data_cache = None -ESGF_TEST_DATA_REPO_URL = os.getenv("ESGF_TEST_DATA_REPO_UR", "https://raw.githubusercontent.com/roocs/mini-esgf-data") +ESGF_TEST_DATA_REPO_URL = str( + os.getenv("ESGF_TEST_DATA_REPO_UR", "https://raw.githubusercontent.com/roocs/mini-esgf-data") +) default_esgf_test_data_version = "v1" -ESGF_TEST_DATA_VERSION = os.getenv("ESGF_TEST_DATA_VERSION", default_esgf_test_data_version) -ESGF_TEST_DATA_CACHE_DIR = os.getenv("ESGF_TEST_DATA_CACHE_DIR", default_esgf_test_data_cache) +ESGF_TEST_DATA_VERSION = str(os.getenv("ESGF_TEST_DATA_VERSION", default_esgf_test_data_version)) +ESGF_TEST_DATA_CACHE_DIR = str(os.getenv("ESGF_TEST_DATA_CACHE_DIR", default_esgf_test_data_cache)) -XCLIM_TEST_DATA_REPO_URL = os.getenv( - "XCLIM_TEST_DATA_REPO_URL", - "https://raw.githubusercontent.com/Ouranosinc/xclim-testdata", +XCLIM_TEST_DATA_REPO_URL = str( + os.getenv( + "XCLIM_TEST_DATA_REPO_URL", + "https://raw.githubusercontent.com/Ouranosinc/xclim-testdata", + ) ) default_xclim_test_data_version = "v2024.8.23" -XCLIM_TEST_DATA_VERSION = os.getenv("XCLIM_TEST_DATA_VERSION", default_xclim_test_data_version) -XCLIM_TEST_DATA_CACHE_DIR = os.getenv("XCLIM_TEST_DATA_CACHE_DIR", default_xclim_test_data_cache) +XCLIM_TEST_DATA_VERSION = str(os.getenv("XCLIM_TEST_DATA_VERSION", default_xclim_test_data_version)) +XCLIM_TEST_DATA_CACHE_DIR = str(os.getenv("XCLIM_TEST_DATA_CACHE_DIR", default_xclim_test_data_cache)) def write_roocs_cfg( template: str | None = None, - cache_dir: str | Path = default_esgf_test_data_cache, + cache_dir: str | Path | None = default_esgf_test_data_cache, ) -> str: """ Write a ROOCS configuration file for testing purposes. @@ -105,6 +111,8 @@ def write_roocs_cfg( proj_test.my.second.test:second/test/data_*.txt proj_test.another.{variable}.test:good/test/{variable}.nc """ + if cache_dir is None: + raise ValueError("cache_dir must be a valid location.") cfg_template = template or default_template roocs_config = Path(cache_dir, "roocs.ini") @@ -126,7 +134,7 @@ def get_esgf_file_paths(esgf_cache_dir: str | os.PathLike[str]) -> dict[str, str Returns ------- - dict[str, str] + dict A dictionary where keys are descriptive names of datasets and values are their corresponding file paths. """ return { @@ -708,10 +716,10 @@ def stratus( if repo.endswith("xclim-testdata"): _version = XCLIM_TEST_DATA_VERSION - _default_version = default_xclim_test_data_version + _default_testdata_version = default_xclim_test_data_version elif repo.endswith("mini-esgf-data"): _version = ESGF_TEST_DATA_VERSION - _default_version = default_esgf_test_data_version + _default_testdata_version = default_esgf_test_data_version else: raise ValueError( f"Repository URL {repo} not recognized. " @@ -722,7 +730,7 @@ def stratus( return pooch.create( path=cache_dir, base_url=remote, - version=_default_version, + version=_default_testdata_version, version_dev=_version, allow_updates=data_updates, registry=load_registry(branch=branch, repo=repo), @@ -732,7 +740,8 @@ def stratus( def populate_testing_data( repo: str, branch: str, - cache_dir: Path, + local_cache: Path, + temp_folder: Path | None = None, ): """ Populate the local cache with the testing data. @@ -743,12 +752,14 @@ def populate_testing_data( URL of the repository to use when fetching testing datasets. branch : str, optional Branch of repository to use when fetching testing datasets. - cache_dir : Path + local_cache : Path The path to the local cache. Defaults to the location set by the platformdirs library. The testing data will be downloaded to this local cache. + temp_folder : Path, optional + Path to a temporary folder to use as the local cache. If not provided, the default location will be used. """ # Create the Pooch instance - n = stratus(cache_dir=cache_dir, repo=repo, branch=branch) + n = stratus(repo=repo, branch=branch, cache_dir=temp_folder or local_cache) # Download the files errored_files = [] @@ -811,7 +822,7 @@ def gather_testing_data( ) if worker_id == "master": - populate_testing_data(branch=branch, repo=repo, cache_dir=cache_dir) + populate_testing_data(branch=branch, repo=repo, local_cache=cache_dir) else: if platform == "win32": if not cache_dir.joinpath(branch).exists(): @@ -825,7 +836,7 @@ def gather_testing_data( test_data_being_written = FileLock(lockfile) with test_data_being_written: # This flag prevents multiple calls from re-attempting to download testing data in the same pytest run - populate_testing_data(branch=branch, repo=repo, cache_dir=cache_dir) + populate_testing_data(branch=branch, repo=repo, local_cache=cache_dir) cache_dir.joinpath(".data_written").touch() with test_data_being_written.acquire(): if lockfile.exists(): diff --git a/tests/conftest.py b/tests/conftest.py index 0e79294b..6d77f28c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -249,7 +249,7 @@ def _ps_series(values, start="7/1/2000"): @pytest.fixture(scope="session", autouse=True) -def threadsafe_data_dir(tmp_path_factory): +def threadsafe_data_dir(tmp_path_factory) -> Path: return tmp_path_factory.getbasetemp().joinpath("data") @@ -287,14 +287,24 @@ def _check_output_nc(result, fname="output_001.nc", time=None): return _check_output_nc -@pytest.fixture(scope="session", autouse=True) -def load_test_data(worker_id, stratus, nimbus): +@pytest.fixture(autouse=True, scope="session") +def gather_session_data(request, worker_id, stratus, nimbus): """ - Load the test data repository. + Gather testing data on pytest run. + + When running pytest with multiple workers, one worker will copy data remotely to default cache dir while + other workers wait using lockfile. Once the lock is released, all workers will then copy data to their local + threadsafe_data_dir. As this fixture is scoped to the session, it will only run once per pytest run. - This fixture ensures that the required test data repository - has been cloned to the cache directory within the home directory. """ + + def remove_data_written_flag(cache): + """Cleanup cache folders once we are finished.""" + for cache in [testing.default_esgf_test_data_cache, testing.default_xclim_test_data_cache]: + flag = cache.joinpath(".data_written") + if flag.exists(): + flag.unlink() + repositories = { "stratus": { "worker_cache_dir": stratus.path, @@ -312,6 +322,7 @@ def load_test_data(worker_id, stratus, nimbus): for repo in repositories.values(): testing.gather_testing_data(worker_id=worker_id, **repo) + request.addfinalizer(remove_data_written_flag(repo["cache_dir"])) @pytest.fixture From accbfa42b9c9496e1d88e0d8bef89b49f32ff6f8 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:35:13 -0400 Subject: [PATCH 08/19] add missing Path Signed-off-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- tests/conftest.py | 2 +- tests/test_core_regrid.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 6d77f28c..d58f006c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -301,7 +301,7 @@ def gather_session_data(request, worker_id, stratus, nimbus): def remove_data_written_flag(cache): """Cleanup cache folders once we are finished.""" for cache in [testing.default_esgf_test_data_cache, testing.default_xclim_test_data_cache]: - flag = cache.joinpath(".data_written") + flag = Path(cache).joinpath(".data_written") if flag.exists(): flag.unlink() diff --git a/tests/test_core_regrid.py b/tests/test_core_regrid.py index ef170d79..6aa2617d 100644 --- a/tests/test_core_regrid.py +++ b/tests/test_core_regrid.py @@ -925,7 +925,7 @@ def test_detect_extent_shifted_lon_frame(self, mini_esgf_data): g = Grid(ds=ds) assert g.extent_lon == "global" - def test_detect_collapsed_cells(self, mini_esgf_data, load_test_data): + def test_detect_collapsed_cells(self, mini_esgf_data): """Test that collapsed cells are properly identified.""" dsA = xr.open_dataset( mini_esgf_data["CMIP6_OCE_HALO_CNRM"], From 15b7e4dda796c6cb43f858a17da618b2ef1e3e3e Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:23:11 -0400 Subject: [PATCH 09/19] adjust teardown Signed-off-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- tests/conftest.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index d58f006c..adca555b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import os +from functools import partial from pathlib import Path import numpy as np @@ -300,10 +301,9 @@ def gather_session_data(request, worker_id, stratus, nimbus): def remove_data_written_flag(cache): """Cleanup cache folders once we are finished.""" - for cache in [testing.default_esgf_test_data_cache, testing.default_xclim_test_data_cache]: - flag = Path(cache).joinpath(".data_written") - if flag.exists(): - flag.unlink() + flag = Path(cache).joinpath(".data_written") + if flag.exists(): + flag.unlink() repositories = { "stratus": { @@ -322,7 +322,7 @@ def remove_data_written_flag(cache): for repo in repositories.values(): testing.gather_testing_data(worker_id=worker_id, **repo) - request.addfinalizer(remove_data_written_flag(repo["cache_dir"])) + request.addfinalizer(partial(remove_data_written_flag, repo["cache_dir"])) @pytest.fixture From 0605351cdf72189f6a0357631e35c09f4f80db14 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:49:02 -0400 Subject: [PATCH 10/19] adjustments Signed-off-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- clisops/utils/testing.py | 13 ++++++++----- tests/conftest.py | 13 +++++++++---- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/clisops/utils/testing.py b/clisops/utils/testing.py index 386a84ad..07a8257e 100644 --- a/clisops/utils/testing.py +++ b/clisops/utils/testing.py @@ -2,10 +2,10 @@ import importlib.resources as ilr import os +import platform import warnings from pathlib import Path from shutil import copytree -from sys import platform from urllib.error import HTTPError, URLError from urllib.parse import urlparse from urllib.request import urlretrieve @@ -785,7 +785,7 @@ def gather_testing_data( worker_id: str, branch: str, repo: str, - cache_dir: str | os.PathLike[str] | Path, + _cache_dir: str | os.PathLike[str] | Path | None = None, ): """ Gather testing data across workers. @@ -800,7 +800,7 @@ def gather_testing_data( The branch of the repository to use when fetching testing datasets. repo : str The URL of the repository to use when fetching testing datasets. - cache_dir : str or Path + _cache_dir : str or Path The path to the local cache where the testing data is stored. Raises @@ -810,7 +810,10 @@ def gather_testing_data( FileNotFoundError If the testing data is not found and UNIX-style file-locking is not supported on Windows. """ - cache_dir = Path(cache_dir) + if _cache_dir is None: + raise ValueError("The cache directory must be set.") + cache_dir = Path(_cache_dir) + if repo.endswith("xclim-testdata"): version = default_xclim_test_data_version elif repo.endswith("mini-esgf-data"): @@ -824,7 +827,7 @@ def gather_testing_data( if worker_id == "master": populate_testing_data(branch=branch, repo=repo, local_cache=cache_dir) else: - if platform == "win32": + if platform.system() == "Windows": if not cache_dir.joinpath(branch).exists(): raise FileNotFoundError( "Testing data not found and UNIX-style file-locking is not supported on Windows. " diff --git a/tests/conftest.py b/tests/conftest.py index adca555b..8390352f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,6 +6,7 @@ import pandas as pd import pytest import xarray as xr +from loguru import logger from clisops.utils import testing from clisops.utils.testing import stratus as _stratus @@ -303,26 +304,30 @@ def remove_data_written_flag(cache): """Cleanup cache folders once we are finished.""" flag = Path(cache).joinpath(".data_written") if flag.exists(): - flag.unlink() + try: + flag.unlink() + except FileNotFoundError: + logger.info("Teardown race condition occurred: .data_written flag already removed. Lucky!") + pass repositories = { "stratus": { "worker_cache_dir": stratus.path, "repo": testing.ESGF_TEST_DATA_REPO_URL, "branch": testing.ESGF_TEST_DATA_VERSION, - "cache_dir": testing.ESGF_TEST_DATA_CACHE_DIR, + "_cache_dir": testing.ESGF_TEST_DATA_CACHE_DIR, }, "nimbus": { "worker_cache_dir": nimbus.path, "repo": testing.XCLIM_TEST_DATA_REPO_URL, "branch": testing.XCLIM_TEST_DATA_VERSION, - "cache_dir": testing.XCLIM_TEST_DATA_CACHE_DIR, + "_cache_dir": testing.XCLIM_TEST_DATA_CACHE_DIR, }, } for repo in repositories.values(): testing.gather_testing_data(worker_id=worker_id, **repo) - request.addfinalizer(partial(remove_data_written_flag, repo["cache_dir"])) + request.addfinalizer(partial(remove_data_written_flag, repo["_cache_dir"])) @pytest.fixture From ee007de38ba93a00d59487a00942fd675da36f9d Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 15 Jun 2026 12:05:41 -0400 Subject: [PATCH 11/19] update registry mechanisms Signed-off-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- clisops/utils/testing.py | 90 ++++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 21 deletions(-) diff --git a/clisops/utils/testing.py b/clisops/utils/testing.py index 07a8257e..0072a214 100644 --- a/clisops/utils/testing.py +++ b/clisops/utils/testing.py @@ -4,16 +4,21 @@ import os import platform import warnings +from collections.abc import Callable +from functools import wraps from pathlib import Path from shutil import copytree +from typing import IO from urllib.error import HTTPError, URLError -from urllib.parse import urlparse +from urllib.parse import urljoin, urlparse from urllib.request import urlretrieve from filelock import FileLock from jinja2 import Template from loguru import logger +from clisops import __version__ as __clisops_version__ + try: import pooch except ImportError: @@ -47,7 +52,7 @@ default_xclim_test_data_cache = None ESGF_TEST_DATA_REPO_URL = str( - os.getenv("ESGF_TEST_DATA_REPO_UR", "https://raw.githubusercontent.com/roocs/mini-esgf-data") + os.getenv("ESGF_TEST_DATA_REPO_URL", "https://raw.githubusercontent.com/roocs/mini-esgf-data/") ) default_esgf_test_data_version = "v1" ESGF_TEST_DATA_VERSION = str(os.getenv("ESGF_TEST_DATA_VERSION", default_esgf_test_data_version)) @@ -56,7 +61,7 @@ XCLIM_TEST_DATA_REPO_URL = str( os.getenv( "XCLIM_TEST_DATA_REPO_URL", - "https://raw.githubusercontent.com/Ouranosinc/xclim-testdata", + "https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/", ) ) default_xclim_test_data_version = "v2024.8.23" @@ -636,11 +641,14 @@ def load_registry(branch: str, repo: str) -> dict[str, str]: dict Dictionary of filenames and hashes. """ - if repo == ESGF_TEST_DATA_REPO_URL: + if not repo.endswith("/"): + repo = f"{repo}/" + + if "mini-esgf-data" in repo: project = "mini-esgf-data" default_testdata_version = ESGF_TEST_DATA_VERSION default_testdata_repo_url = ESGF_TEST_DATA_REPO_URL - elif repo == XCLIM_TEST_DATA_REPO_URL: + elif "xclim-testdata" in repo: project = "xclim-testdata" default_testdata_version = XCLIM_TEST_DATA_VERSION default_testdata_repo_url = XCLIM_TEST_DATA_REPO_URL @@ -650,17 +658,28 @@ def load_registry(branch: str, repo: str) -> dict[str, str]: f"Please use one of {ESGF_TEST_DATA_REPO_URL} or {XCLIM_TEST_DATA_REPO_URL}" ) - remote_registry = audit_url(f"{repo}{branch}/data/{project}_registry.txt") - if branch != default_testdata_version: - custom_registry_folder = Path(str(ilr.files("clisops").joinpath(f"utils/registries/{branch}"))) + remote_registry = audit_url( + urljoin( + urljoin(repo, branch if branch.endswith("/") else f"{branch}/"), + "data/registry.txt", + ) + ) + + if repo != default_testdata_repo_url: + external_repo_name = urlparse(repo).path.split("/")[-2] + external_branch_name = branch.split("/")[-1] + registry_file = Path( + str(ilr.files("clisops").joinpath(f"utils/registry.{external_repo_name}.{external_branch_name}.txt")) + ) + urlretrieve(remote_registry, registry_file) # noqa: S310 + elif branch != default_testdata_version: + custom_registry_folder = Path(str(ilr.files("clisops").joinpath(f"utils/{branch}"))) custom_registry_folder.mkdir(parents=True, exist_ok=True) - registry_file = custom_registry_folder.joinpath(f"{project}_registry.txt") + registry_file = custom_registry_folder.joinpath("registry.txt") urlretrieve(remote_registry, registry_file) # noqa: S310 - elif repo != default_testdata_repo_url: + else: registry_file = Path(str(ilr.files("clisops").joinpath(f"utils/{project}_registry.txt"))) - urlretrieve(remote_registry, registry_file) # noqa: S310 - registry_file = Path(str(ilr.files("clisops").joinpath(f"utils/{project}_registry.txt"))) if not registry_file.exists(): raise FileNotFoundError(f"Registry file not found: {registry_file}") @@ -674,7 +693,7 @@ def stratus( repo: str, branch: str, cache_dir: str | Path, - data_updates: bool = True, + allow_updates: bool = True, ): """ Pooch registry instance for xclim test data. @@ -687,7 +706,7 @@ def stratus( Branch of repository to use when fetching testing datasets. cache_dir : str or Path The path to the directory where the data files are stored. - data_updates : bool + allow_updates : bool If True, allow updates to the data files. Default is True. Returns @@ -714,10 +733,10 @@ def stratus( "You can install it with `pip install pooch` or `pip install roocs-utils[dev]`." ) - if repo.endswith("xclim-testdata"): + if "xclim-testdata" in repo: _version = XCLIM_TEST_DATA_VERSION _default_testdata_version = default_xclim_test_data_version - elif repo.endswith("mini-esgf-data"): + elif "mini-esgf-data" in repo: _version = ESGF_TEST_DATA_VERSION _default_testdata_version = default_esgf_test_data_version else: @@ -726,16 +745,45 @@ def stratus( f"Please use one of {ESGF_TEST_DATA_REPO_URL} or {XCLIM_TEST_DATA_REPO_URL}" ) - remote = audit_url(f"{repo}/{branch}/data") - return pooch.create( + remote = audit_url(urljoin(urljoin(repo, branch if branch.endswith("/") else f"{branch}/"), "data")) + + _stratus = pooch.create( path=cache_dir, base_url=remote, version=_default_testdata_version, version_dev=_version, - allow_updates=data_updates, + allow_updates=allow_updates, registry=load_registry(branch=branch, repo=repo), ) + # Add a custom fetch method to the Pooch instance + # Needed to address: https://github.com/readthedocs/readthedocs.org/issues/11763 + # Fix inspired by @bjlittle (https://github.com/bjlittle/geovista/pull/1202) + _stratus.fetch_diversion = _stratus.fetch + + # Overload the fetch method to add user-agent headers + @wraps(_stratus.fetch_diversion) + def _fetch(*args, **kwargs: bool | Callable) -> str: # numpydoc ignore=GL08 # *args: str + def _downloader( + url: str, + output_file: str | IO, + poocher: pooch.Pooch, + check_only: bool | None = False, + ) -> None: + """Download the file from the URL and save it to the save_path.""" + headers = {"User-Agent": f"clisops ({__clisops_version__})"} + downloader = pooch.HTTPDownloader(headers=headers) + return downloader(url, output_file, poocher, check_only=check_only) + + # default to our http/s downloader with user-agent headers + kwargs.setdefault("downloader", _downloader) + return _stratus.fetch_diversion(*args, **kwargs) + + # Replace the fetch method with the custom fetch method + _stratus.fetch = _fetch + + return _stratus + def populate_testing_data( repo: str, @@ -814,9 +862,9 @@ def gather_testing_data( raise ValueError("The cache directory must be set.") cache_dir = Path(_cache_dir) - if repo.endswith("xclim-testdata"): + if "xclim-testdata" in repo: version = default_xclim_test_data_version - elif repo.endswith("mini-esgf-data"): + elif "mini-esgf-data" in repo: version = default_esgf_test_data_version else: raise ValueError( From 6ff281c21fbb821ed06dcb42c63be7b991049c63 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 15 Jun 2026 15:53:05 -0400 Subject: [PATCH 12/19] address warnings from xarray Signed-off-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- clisops/utils/dataset_utils.py | 9 +++++---- tests/test_core_subset.py | 2 ++ tests/test_dataset_utils.py | 34 +++++++++++++++++++++++++++++++--- tests/test_file_namers.py | 8 ++++++++ tests/test_ops_average.py | 2 +- tests/test_ops_subset.py | 8 ++++---- tests/test_ops_xarray_mean.py | 2 ++ tests/test_output_utils.py | 4 +--- tests/test_project_utils.py | 10 ++++++---- 9 files changed, 60 insertions(+), 19 deletions(-) diff --git a/clisops/utils/dataset_utils.py b/clisops/utils/dataset_utils.py index 7effb802..36699199 100644 --- a/clisops/utils/dataset_utils.py +++ b/clisops/utils/dataset_utils.py @@ -349,7 +349,7 @@ def get_coord_type(coord: xr.DataArray | xr.Dataset) -> str | None: return None -def get_main_variable(ds, exclude_common_coords=True): +def get_main_variable(ds: xr.Dataset, exclude_common_coords: bool = True): """ Find the main variable of an xarray Dataset. @@ -401,7 +401,7 @@ def get_main_variable(ds, exclude_common_coords=True): return result -def open_xr_dataset(dset: str | pathlib.Path | list[str | pathlib.Path], **kwargs): +def open_xr_dataset(dset: str | pathlib.Path | list[str | pathlib.Path], **kwargs) -> xr.Dataset: """ Open an xarray dataset from a dataset input. @@ -441,7 +441,7 @@ def open_xr_dataset(dset: str | pathlib.Path | list[str | pathlib.Path], **kwarg # If an empty sequence, then raise an Exception if not len(dset): - raise Exception("No files found to open with xarray.") + raise FileNotFoundError("No files found to open with xarray.") # if a list we want a multi-file dataset if len(dset) > 1: @@ -481,7 +481,7 @@ def _get_kwargs_for_opener(otype, **kwargs): "remote_options", "target_options", ] - allowed_multi_args = ["combine"] + allowed_multi_args = ["combine", "data_vars"] args = { "decode_times": xr.coders.CFDatetimeCoder(use_cftime=True), @@ -494,6 +494,7 @@ def _get_kwargs_for_opener(otype, **kwargs): if otype.lower() == "multi": args["combine"] = "by_coords" + args["data_vars"] = "all" allowed_args.extend(allowed_multi_args) elif otype.lower() == "zarr": allowed_args.extend(allowed_zarr_args) diff --git a/tests/test_core_subset.py b/tests/test_core_subset.py index 35fbaa6c..9960d8d1 100644 --- a/tests/test_core_subset.py +++ b/tests/test_core_subset.py @@ -367,6 +367,8 @@ def test_dataset(self, nimbus): da = xr.open_mfdataset( [nimbus.fetch(self.nc_tasmax_file), nimbus.fetch(self.nc_tasmin_file)], combine="by_coords", + compat="no_conflicts", + data_vars="all", ) out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat) assert np.all(out.lon >= np.min(self.lon)) diff --git a/tests/test_dataset_utils.py b/tests/test_dataset_utils.py index ed4d0741..2bd0696e 100644 --- a/tests/test_dataset_utils.py +++ b/tests/test_dataset_utils.py @@ -212,11 +212,13 @@ def test_detect_coordinate_and_bounds(mini_esgf_data): mini_esgf_data["C3S_CORDEX_AFR_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ).load() ds_b = xr.open_mfdataset( mini_esgf_data["C3S_CORDEX_ANT_SFC_WIND"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ).load() ds_c = xr.open_dataset(mini_esgf_data["CMIP6_UNSTR_ICON_A"]).load() ds_d = xr.open_dataset(mini_esgf_data["CMIP6_OCE_HALO_CNRM"]).load() @@ -273,6 +275,7 @@ def test_detect_coordinate_robustness(tmpdir, mini_esgf_data): mini_esgf_data["C3S_CORDEX_AFR_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ).load() as ds: assert clidu.detect_coordinate(ds, "latitude") == "lat" assert clidu.detect_coordinate(ds, "longitude") == "lon" @@ -456,7 +459,9 @@ def test_determine_lon_lat_range_unstructured(mini_esgf_data): def test_determine_lon_lat_range_regular_lat_lon(mini_esgf_data): """Test the function determine_lon_lat_range for regular lat lon grids.""" - with xr.open_mfdataset(mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True)) as ds: + with xr.open_mfdataset( + mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), data_vars="all" + ) as ds: # Deal with immutable numpy arrays lat = ds.lat.values.copy() lat[1] = -999.0 @@ -623,8 +628,7 @@ def test_convert_lon_frame_shifted_bounds(mini_esgf_data): def test_convert_lon_frame_shifted_no_bounds(mini_esgf_data): with xr.open_dataset( - mini_esgf_data["CMIP6_IITM_EXTENT"], - decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), + mini_esgf_data["CMIP6_IITM_EXTENT"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True) ) as ds: # confirm shifted frame assert np.isclose(ds["longitude"].min(), -280.0, atol=1.0) @@ -714,6 +718,7 @@ def test_get_main_var(mini_esgf_data): mini_esgf_data["C3S_CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: result = clidu.get_main_variable(ds) assert result == "tas" @@ -724,6 +729,7 @@ def test_get_main_var_2(mini_esgf_data): mini_esgf_data["CMIP5_ZOSTOGA"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: result = clidu.get_main_variable(ds) assert result == "zostoga" @@ -734,6 +740,7 @@ def test_get_main_var_3(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: result = clidu.get_main_variable(ds) assert result == "tas" @@ -744,6 +751,7 @@ def test_get_main_var_4(mini_esgf_data): mini_esgf_data["CMIP5_RH"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: result = clidu.get_main_variable(ds) assert result == "rh" @@ -754,6 +762,7 @@ def test_get_main_var_test_data(mini_esgf_data): mini_esgf_data["CMIP6_SIMASS_DEGEN"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: var_id = clidu.get_main_variable(ds) assert var_id == "simass" @@ -764,6 +773,7 @@ def test_get_main_var_include_common_coords(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: var_id = clidu.get_main_variable(ds, exclude_common_coords=False) @@ -776,6 +786,7 @@ def test_get_standard_names(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: assert sorted(ds.cf.standard_names) == sorted( [ @@ -794,6 +805,7 @@ def test_get_latitude_cf_xarray(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: xr.testing.assert_identical(ds["lat"].reset_coords("height", drop=True), ds.cf["lat"]) xr.testing.assert_identical(ds["lat"].reset_coords("height", drop=True), ds.cf["latitude"]) @@ -804,6 +816,7 @@ def test_get_latitude_2_cf_xarray(mini_esgf_data): mini_esgf_data["C3S_CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: xr.testing.assert_identical(ds["lat"], ds.cf["lat"]) xr.testing.assert_identical(ds["lat"], ds.cf["latitude"]) @@ -816,6 +829,7 @@ def test_get_lat_lon_names_from_ds_cf_xarray(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: assert ds.cf["latitude"].name == "lat" assert ds.cf["longitude"].name == "lon" @@ -827,6 +841,7 @@ def test_get_time_cf_xarray(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: xr.testing.assert_identical(ds["time"].reset_coords(("height"), drop=True), ds.cf["time"]) @@ -838,6 +853,7 @@ def test_get_time(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["tas"] coord = da.time @@ -849,6 +865,7 @@ def test_get_latitude(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["tas"] coord = da.lat @@ -860,6 +877,7 @@ def test_get_longitude(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["tas"] coord = da.lon @@ -872,6 +890,7 @@ def test_get_time_2(mini_esgf_data): mini_esgf_data["C3S_CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["tas"] coord = da.time @@ -883,6 +902,7 @@ def test_get_latitude_2(mini_esgf_data): mini_esgf_data["C3S_CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["tas"] coord = da.lat @@ -894,6 +914,7 @@ def test_get_longitude_2(mini_esgf_data): mini_esgf_data["C3S_CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["tas"] coord = da.lon @@ -906,6 +927,7 @@ def test_get_time_3(mini_esgf_data): mini_esgf_data["CMIP5_ZOSTOGA"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["zostoga"] coord = da.time @@ -917,6 +939,7 @@ def test_get_level(mini_esgf_data): mini_esgf_data["CMIP5_ZOSTOGA"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["zostoga"] coord = da.lev @@ -928,6 +951,7 @@ def test_get_other(mini_esgf_data): mini_esgf_data["CMIP6_SICONC"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["siconc"] coord = da.type @@ -939,6 +963,7 @@ def test_order_of_coords(mini_esgf_data): mini_esgf_data["CMIP5_ZOSTOGA"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: da = ds["zostoga"] @@ -972,6 +997,7 @@ def test_text_coord_not_level(mini_esgf_data): mini_esgf_data["CMIP6_CHAR_DIM"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: coord_type = clidu.get_coord_type(ds.sector) assert coord_type is None @@ -983,6 +1009,7 @@ def test_get_coords_by_type(mini_esgf_data): mini_esgf_data["C3S_CORDEX_AFR_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: # check lat, lon, time and level are found when they are coordinates lat = clidu.get_coord_by_type(ds, "latitude", ignore_aux_coords=False) @@ -1019,6 +1046,7 @@ def test_get_coords_by_type_with_no_time(mini_esgf_data): mini_esgf_data["C3S_CORDEX_AFR_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: # check time time = clidu.get_coord_by_type(ds, "time", ignore_aux_coords=False) diff --git a/tests/test_file_namers.py b/tests/test_file_namers.py index 24e3f8db..c7bada16 100644 --- a/tests/test_file_namers.py +++ b/tests/test_file_namers.py @@ -75,6 +75,7 @@ def test_StandardFileNamer_cmip5(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [(_ds, "tas_mon_HadGEM2-ES_rcp85_r1i1p1_20051216-22991216.nc")] @@ -91,6 +92,7 @@ def test_StandardFileNamer_cmip5_use_default_attr_names(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [(_ds, "tas_mon_no-model_rcp85_r1i1p1_20051216-22991216.nc")] @@ -108,6 +110,7 @@ def test_StandardFileNamer_cmip6(mini_esgf_data): mini_esgf_data["CMIP6_SICONC"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [(_ds, "siconc_SImon_CanESM5_historical_r1i1p1f1_gn_18500116-20141216.nc")] @@ -124,6 +127,7 @@ def test_StandardFileNamer_cmip6_use_default_attr_names(mini_esgf_data): mini_esgf_data["CMIP6_SICONC"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [(_ds, "siconc_SImon_no-model_historical_r1i1p1f1_no-grid_18500116-20141216.nc")] @@ -146,6 +150,7 @@ def test_StandardFileNamer_c3s_cordex(mini_esgf_data): mini_esgf_data["C3S_CORDEX_NAM_PR"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [ @@ -171,6 +176,7 @@ def test_StandardFileNamer_c3s_cordex_use_default_attr_names(mini_esgf_data): mini_esgf_data["C3S_CORDEX_NAM_PR"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [ @@ -195,6 +201,7 @@ def test_StandardFileNamer_c3s_atlas_v0(mini_esgf_data): mini_esgf_data["ATLAS_v0_CORDEX_NAM"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [ @@ -217,6 +224,7 @@ def test_StandardFileNamer_c3s_atlas_v1(mini_esgf_data): mini_esgf_data["ATLAS_v1_ERA5"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) checks = [ diff --git a/tests/test_ops_average.py b/tests/test_ops_average.py index a97b7847..a6d02642 100644 --- a/tests/test_ops_average.py +++ b/tests/test_ops_average.py @@ -24,7 +24,7 @@ def _check_output_nc(result, fname="output_001.nc"): def _load_ds(fpath): - return xr.open_mfdataset(fpath, decode_times=xr.coders.CFDatetimeCoder(use_cftime=True)) + return xr.open_mfdataset(fpath, decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), data_vars="all") def test_average_basic_data_array(nimbus): diff --git a/tests/test_ops_subset.py b/tests/test_ops_subset.py index 0730e747..22a8102e 100644 --- a/tests/test_ops_subset.py +++ b/tests/test_ops_subset.py @@ -22,12 +22,12 @@ from clisops.utils.output_utils import _format_time -def _load_ds(fpath: str | Path): +def _load_ds(fpath: str | Path | list[str | Path]): if isinstance(fpath, (str, Path)): if str(fpath).endswith("*.nc"): - return xr.open_mfdataset(fpath) + return xr.open_mfdataset(fpath, decode_times=xr.coders.CFDatetimeCoder(use_cftime=True)) else: - return xr.open_dataset(fpath) + return xr.open_dataset(fpath, decode_times=xr.coders.CFDatetimeCoder(use_cftime=True)) return xr.open_mfdataset(fpath) @@ -370,7 +370,7 @@ def test_time_slices_in_subset_rh(mini_esgf_data): temp_max_file_size = "10KB" CONFIG["clisops:write"]["file_size_limit"] = temp_max_file_size - with xr.open_mfdataset(mini_esgf_data["CMIP5_RH"]) as ds: + with xr.open_mfdataset(mini_esgf_data["CMIP5_RH"], data_vars="all") as ds: outputs = subset( ds=ds, time=time_interval(start_time, end_time), diff --git a/tests/test_ops_xarray_mean.py b/tests/test_ops_xarray_mean.py index cfe46d59..20ca4559 100644 --- a/tests/test_ops_xarray_mean.py +++ b/tests/test_ops_xarray_mean.py @@ -103,6 +103,7 @@ def test_xarray_da_mean_keep_attrs_true(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], combine="by_coords", decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), + data_vars="all", drop_variables=["time_bnds"], ) ds_tas_mean = ds.tas.mean(dim="lat", keep_attrs=True) @@ -117,6 +118,7 @@ def test_xarray_da_mean_keep_attrs_false(mini_esgf_data): mini_esgf_data["CMIP5_TAS"], combine="by_coords", decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), + data_vars="all", ).load() ds_tas_mean = ds.tas.mean(dim="time", keep_attrs=False) ds_mean = ds.mean(dim="time", keep_attrs=False) diff --git a/tests/test_output_utils.py b/tests/test_output_utils.py index 3524be2f..ae32f89c 100644 --- a/tests/test_output_utils.py +++ b/tests/test_output_utils.py @@ -27,9 +27,7 @@ def _open(coll): if len(coll) > 1: # issues with dask and cftime ds = xr.open_mfdataset( - coll, - decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), - combine="by_coords", + coll, decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", data_vars="all" ).load() else: ds = xr.open_dataset(coll[0], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True)) diff --git a/tests/test_project_utils.py b/tests/test_project_utils.py index 2df35473..534ca68f 100644 --- a/tests/test_project_utils.py +++ b/tests/test_project_utils.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import pytest import xarray as xr @@ -26,6 +27,7 @@ def test_get_project_name(self, mini_esgf_data): mini_esgf_data["CMIP5_TAS"], decode_times=xr.coders.CFDatetimeCoder(use_cftime=True), combine="by_coords", + data_vars="all", ) as ds: project = project_utils.get_project_name(ds) assert project == "cmip5" @@ -80,11 +82,11 @@ def test_get_project_name(self, mini_esgf_data): assert project in ["c3s-ipcc-ar6-atlas", "c3s-ipcc-atlas"] def test_get_project_base_dir(self): - cmip5_base_dir = project_utils.get_project_base_dir("cmip5") - assert cmip5_base_dir == "/mnt/lustre/work/kd0956/CMIP5/data/cmip5" + cmip5_base_dir = Path(project_utils.get_project_base_dir("cmip5")) + assert Path("/mnt/lustre/work/kd0956/CMIP5/data/cmip5").match(str(cmip5_base_dir)) - c3s_cordex_base_dir = project_utils.get_project_base_dir("c3s-cordex") - assert c3s_cordex_base_dir == "/mnt/lustre/work/ik1017/C3SCORDEX/data/c3s-cordex" + c3s_cordex_base_dir = Path(project_utils.get_project_base_dir("c3s-cordex")) + assert Path("/mnt/lustre/work/ik1017/C3SCORDEX/data/c3s-cordex").match(str(c3s_cordex_base_dir)) with pytest.raises(Exception) as exc: project_utils.get_project_base_dir("test") From d75a7e26bd7c73bd86a634d1915df020c3239222 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 15 Jun 2026 15:53:29 -0400 Subject: [PATCH 13/19] add caching for Windows builds Signed-off-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- .github/workflows/main.yml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index cf1d4f97..6562ace0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -76,6 +76,8 @@ jobs: - python-version: "3.12" os: "windows-latest" positional_args: "--numprocesses=0" + env: + CACHE_KEY: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }} steps: - name: Harden Runner uses: step-security/harden-runner@fe104658747b27e96e4f7e80cd0a94068e53901d # v2.16.1 @@ -112,7 +114,7 @@ jobs: ~/.cache/mini-esgf-data ~/.cache/xclim-testdata .tox - key: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }} + key: ${{ env.CACHE_KEY }} - name: Environment Caching (macOS) uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 if: matrix.os == 'macos-latest' @@ -121,7 +123,16 @@ jobs: ~/Library/Caches/mini-esgf-data ~/Library/Caches/xclim-testdata .tox - key: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }} + key: ${{ env.CACHE_KEY }} + - name: Environment Caching (Windows) + uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + if: matrix.os == 'windows-latest' + with: + path: | + ~\AppData\Local\mini-esgf-data + ~\AppData\Local\xclim-testdata + .tox + key: ${{ env.CACHE_KEY }} - name: Test with tox run: | From 22c23a198848017bc96f68641cb7dc5c091729ca Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 15 Jun 2026 15:53:52 -0400 Subject: [PATCH 14/19] filepath fixes for multiplatform Signed-off-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- clisops/project_utils.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/clisops/project_utils.py b/clisops/project_utils.py index f899fb25..dfef7663 100644 --- a/clisops/project_utils.py +++ b/clisops/project_utils.py @@ -333,7 +333,7 @@ def switch_dset(dset: xr.Dataset | xr.DataArray | str | FileMapper) -> str: str The dataset path or dataset ID derived from the input dataset, switched from the input. """ - if dset.startswith("/"): + if isinstance(dset, str) and (dset.startswith("/") or dset.startswith("\\")): return datapath_to_dsid(dset) else: return dsid_to_datapath(dset) @@ -448,9 +448,10 @@ def get_project_base_dir(project: str) -> str: ------- str The base directory of the specified project. + The URI uses platform-dependent path encoding. """ try: - return CONFIG[f"project:{project}"]["base_dir"] + return str(Path(CONFIG[f"project:{project}"]["base_dir"])) except KeyError: raise InvalidProject("The project supplied is not known.") @@ -494,12 +495,11 @@ def get_project_from_data_node_root(url: str) -> str: """ data_node_dict = get_data_node_dirs_dict() project = None - for proj, data_node_root in data_node_dict.items(): if data_node_root in url: project = proj - if not project: + if project is None: raise InvalidProject( f"The project could not be identified from the URL {url} so it could not be mapped to a file path." ) @@ -522,8 +522,8 @@ def url_to_file_path(url: str) -> str: """ project = get_project_from_data_node_root(url) - data_node_root = CONFIG.get(f"project:{project}", {}).get("data_node_root") - base_dir = CONFIG.get(f"project:{project}", {}).get("base_dir") - file_path = os.path.join(base_dir, url.partition(data_node_root)[2]) + data_node_root = str(Path(CONFIG.get(f"project:{project}", {}).get("data_node_root"))) + base_dir = str(Path(CONFIG.get(f"project:{project}", {}).get("base_dir"))) + file_path = str(Path(base_dir).joinpath(str(Path(url.partition(data_node_root)[2])))) return file_path From c5d631017355169d42e7faea45a2588ecb0edc60 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 16 Jun 2026 10:58:05 -0400 Subject: [PATCH 15/19] fix typo Signed-off-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6562ace0..3c5ce52f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -77,7 +77,7 @@ jobs: os: "windows-latest" positional_args: "--numprocesses=0" env: - CACHE_KEY: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }} + CACHE_KEY: ${{ matrix.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }} steps: - name: Harden Runner uses: step-security/harden-runner@fe104658747b27e96e4f7e80cd0a94068e53901d # v2.16.1 From b87cfcb3c10072587e017d17898ed3f825d063ab Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 16 Jun 2026 11:25:11 -0400 Subject: [PATCH 16/19] set env later Signed-off-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- .github/workflows/main.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3c5ce52f..dc910cb3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -76,8 +76,6 @@ jobs: - python-version: "3.12" os: "windows-latest" positional_args: "--numprocesses=0" - env: - CACHE_KEY: ${{ matrix.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }} steps: - name: Harden Runner uses: step-security/harden-runner@fe104658747b27e96e4f7e80cd0a94068e53901d # v2.16.1 @@ -106,6 +104,9 @@ jobs: run: | python -m pip install --require-hashes -r CI/requirements_ci.txt + - name: Set Cache keys + run: | + echo "CACHE_KEY=${{ matrix.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }}" >> $GITHUB_ENV - name: Environment Caching uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 if: matrix.os == 'ubuntu-latest' From ad38b855fb7b88f068fde39ac9745679e134a2a1 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 16 Jun 2026 11:53:06 -0400 Subject: [PATCH 17/19] use bash Signed-off-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index dc910cb3..bf85f633 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -105,6 +105,7 @@ jobs: python -m pip install --require-hashes -r CI/requirements_ci.txt - name: Set Cache keys + shell: bash run: | echo "CACHE_KEY=${{ matrix.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }}" >> $GITHUB_ENV - name: Environment Caching From e47919a0e26dcfdb0a02d5130e207165b1bb00f4 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 17 Jun 2026 16:00:48 -0400 Subject: [PATCH 18/19] small fixes Signed-off-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- .github/workflows/main.yml | 12 ++++++------ clisops/utils/testing.py | 36 ++++++++++++++++++++++-------------- tests/conftest.py | 14 -------------- 3 files changed, 28 insertions(+), 34 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index bf85f633..7eef14e8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -113,8 +113,8 @@ jobs: if: matrix.os == 'ubuntu-latest' with: path: | - ~/.cache/mini-esgf-data - ~/.cache/xclim-testdata + '~/.cache/mini-esgf-data' + '~/.cache/xclim-testdata' .tox key: ${{ env.CACHE_KEY }} - name: Environment Caching (macOS) @@ -122,8 +122,8 @@ jobs: if: matrix.os == 'macos-latest' with: path: | - ~/Library/Caches/mini-esgf-data - ~/Library/Caches/xclim-testdata + '~/Library/Caches/mini-esgf-data' + '~/Library/Caches/xclim-testdata' .tox key: ${{ env.CACHE_KEY }} - name: Environment Caching (Windows) @@ -131,8 +131,8 @@ jobs: if: matrix.os == 'windows-latest' with: path: | - ~\AppData\Local\mini-esgf-data - ~\AppData\Local\xclim-testdata + '~\AppData\Local\mini-esgf-data\mini-esgf-data\Cache' + '~\AppData\Local\xclim-testdata\xclim-testdata\Cache' .tox key: ${{ env.CACHE_KEY }} diff --git a/clisops/utils/testing.py b/clisops/utils/testing.py index 0072a214..20fb9894 100644 --- a/clisops/utils/testing.py +++ b/clisops/utils/testing.py @@ -42,6 +42,19 @@ "write_roocs_cfg", ] + +default_esgf_test_data_version = "v1" +"""Default version of the mini-esgf testing data to use when fetching datasets.""" + +default_esgf_test_data_url = "https://raw.githubusercontent.com/roocs/mini-esgf-data/" +"""Default URL of the mini-esgf testing data repository to use when fetching datasets.""" + +default_xclim_test_data_version = "v2024.8.23" +"""Default version of the xclim testing data to use when fetching datasets.""" + +default_xclim_test_data_url = "https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/" +"""Default URL of the xclim testing data repository to use when fetching datasets.""" + try: default_esgf_test_data_cache = str(pooch.os_cache("mini-esgf-data")) """Default location for the mini-esgf testing data cache.""" @@ -51,20 +64,12 @@ default_esgf_test_data_cache = None default_xclim_test_data_cache = None -ESGF_TEST_DATA_REPO_URL = str( - os.getenv("ESGF_TEST_DATA_REPO_URL", "https://raw.githubusercontent.com/roocs/mini-esgf-data/") -) -default_esgf_test_data_version = "v1" + +ESGF_TEST_DATA_REPO_URL = str(os.getenv("ESGF_TEST_DATA_REPO_URL", default_esgf_test_data_url)) ESGF_TEST_DATA_VERSION = str(os.getenv("ESGF_TEST_DATA_VERSION", default_esgf_test_data_version)) ESGF_TEST_DATA_CACHE_DIR = str(os.getenv("ESGF_TEST_DATA_CACHE_DIR", default_esgf_test_data_cache)) -XCLIM_TEST_DATA_REPO_URL = str( - os.getenv( - "XCLIM_TEST_DATA_REPO_URL", - "https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/", - ) -) -default_xclim_test_data_version = "v2024.8.23" +XCLIM_TEST_DATA_REPO_URL = str(os.getenv("XCLIM_TEST_DATA_REPO_URL", default_xclim_test_data_url)) XCLIM_TEST_DATA_VERSION = str(os.getenv("XCLIM_TEST_DATA_VERSION", default_xclim_test_data_version)) XCLIM_TEST_DATA_CACHE_DIR = str(os.getenv("XCLIM_TEST_DATA_CACHE_DIR", default_xclim_test_data_cache)) @@ -745,6 +750,8 @@ def stratus( f"Please use one of {ESGF_TEST_DATA_REPO_URL} or {XCLIM_TEST_DATA_REPO_URL}" ) + if not repo.endswith("/"): + repo = f"{repo}/" remote = audit_url(urljoin(urljoin(repo, branch if branch.endswith("/") else f"{branch}/"), "data")) _stratus = pooch.create( @@ -786,16 +793,19 @@ def _downloader( def populate_testing_data( + temp_folder: Path | None = None, + *, repo: str, branch: str, local_cache: Path, - temp_folder: Path | None = None, ): """ Populate the local cache with the testing data. Parameters ---------- + temp_folder : Path, optional + Path to a temporary folder to use as the local cache. If not provided, the default location will be used. repo : str, optional URL of the repository to use when fetching testing datasets. branch : str, optional @@ -803,8 +813,6 @@ def populate_testing_data( local_cache : Path The path to the local cache. Defaults to the location set by the platformdirs library. The testing data will be downloaded to this local cache. - temp_folder : Path, optional - Path to a temporary folder to use as the local cache. If not provided, the default location will be used. """ # Create the Pooch instance n = stratus(repo=repo, branch=branch, cache_dir=temp_folder or local_cache) diff --git a/tests/conftest.py b/tests/conftest.py index 8390352f..54e4e2c2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -349,20 +349,6 @@ def c3s_cmip5_tos(): ).as_posix() -@pytest.fixture -def cmip5_archive_base(): - if "CMIP5_ARCHIVE_BASE" in os.environ: - return os.environ["CMIP5_ARCHIVE_BASE"] - return Path(__file__).parent.absolute().joinpath("mini-esgf-data/test_data/badc/cmip5/data").as_posix() - - -@pytest.fixture -def cmip6_archive_base(): - if "CMIP6_ARCHIVE_BASE" in os.environ: - return os.environ["CMIP6_ARCHIVE_BASE"] - return Path(__file__).parent.absolute().joinpath("mini-esgf-data/test_data/badc/cmip6/data").as_posix() - - @pytest.fixture(scope="session", autouse=True) def mini_esgf_data(stratus): return ( From 6fd1ed6b728303ed1fced5199b7c1a1781257f77 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 17 Jun 2026 16:24:05 -0400 Subject: [PATCH 19/19] fix cwd-dependent test Signed-off-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- tests/test_ops_average.py | 37 ++++++++++++++++++------------------- tests/test_ops_subset.py | 2 +- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/tests/test_ops_average.py b/tests/test_ops_average.py index a6d02642..794fdebf 100644 --- a/tests/test_ops_average.py +++ b/tests/test_ops_average.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import pytest import xarray as xr @@ -209,25 +210,23 @@ def test_dim_not_found_ignore(mini_esgf_data): assert "height" in result[0] -# FIXME: This kind of test is not desirable as it is testing the internal testing implementation -# def test_aux_variables(): -# """ -# test auxiliary variables are remembered in output dataset -# Have to create a netcdf file with auxiliary variable -# """ -# -# ds = _load_ds("tests/data/test_file.nc") -# -# assert "do_i_get_written" in ds.variables -# -# result = average_over_dims( -# ds=ds, -# dims=["level", "time"], -# ignore_undetected_dims=True, -# output_type="xarray", -# ) -# -# assert "do_i_get_written" in result[0].variables +def test_aux_variables(): + """ + Test auxiliary variables are remembered in output dataset + Have to create a netcdf file with auxiliary variable + """ + ds = _load_ds(Path(__file__).parent.joinpath("data/test_file.nc")) + + assert "do_i_get_written" in ds.variables + + result = average_over_dims( + ds=ds, + dims=["level", "time"], + ignore_undetected_dims=True, + output_type="xarray", + ) + + assert "do_i_get_written" in result[0].variables @pytest.mark.skipif(xesmf is None, reason=XESMF_IMPORT_MESSAGE) diff --git a/tests/test_ops_subset.py b/tests/test_ops_subset.py index 22a8102e..68261eca 100644 --- a/tests/test_ops_subset.py +++ b/tests/test_ops_subset.py @@ -504,7 +504,7 @@ def test_aux_variables(): Test auxiliary variables are remembered in output dataset Have to create a netcdf file with auxiliary variable """ - ds = _load_ds("tests/data/test_file.nc") + ds = _load_ds(Path(__file__).parent.joinpath("data/test_file.nc")) assert "do_i_get_written" in ds.variables