Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 52 additions & 11 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,19 @@ jobs:
python -m tox -e lint

test-pypi:
name: Python${{ matrix.python-version }} (PyPI + Tox)
name: Python${{ matrix.python-version }} (PyPI + Tox, ${{ matrix.os }})
needs: lint
runs-on: ubuntu-latest
runs-on: ${{ matrix.os }}
timeout-minutes: 20
strategy:
matrix:
python-version: [ "3.11", "3.12", "3.13" ]
python-version: [ "3.11", "3.13" ]
os: [ "macos-latest", "ubuntu-latest" ]
positional_args: [ "" ]
include:
- python-version: "3.12"
os: "windows-latest"
positional_args: "--numprocesses=0"
steps:
- name: Harden Runner
uses: step-security/harden-runner@fe104658747b27e96e4f7e80cd0a94068e53901d # v2.16.1
Expand All @@ -89,6 +95,7 @@ jobs:
cache: "pip"

- name: Install HDF5 (Linux)
if: matrix.os == 'ubuntu-latest'
run: |
sudo apt-get update
sudo apt-get install -y libhdf5-dev
Expand All @@ -97,39 +104,64 @@ jobs:
run: |
python -m pip install --require-hashes -r CI/requirements_ci.txt

- name: Set Cache keys
shell: bash
run: |
echo "CACHE_KEY=${{ matrix.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }}" >> $GITHUB_ENV
- name: Environment Caching
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
if: matrix.os == 'ubuntu-latest'
with:
path: |
~/.cache/mini-esgf-data
~/.cache/xclim-testdata
'~/.cache/mini-esgf-data'
'~/.cache/xclim-testdata'
.tox
key: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-Python${{ matrix.python-version }}-${{ env.ESGF_TEST_DATA_VERSION }}
key: ${{ env.CACHE_KEY }}
- name: Environment Caching (macOS)
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
if: matrix.os == 'macos-latest'
with:
path: |
'~/Library/Caches/mini-esgf-data'
'~/Library/Caches/xclim-testdata'
.tox
key: ${{ env.CACHE_KEY }}
- name: Environment Caching (Windows)
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
if: matrix.os == 'windows-latest'
with:
path: |
'~\AppData\Local\mini-esgf-data\mini-esgf-data\Cache'
'~\AppData\Local\xclim-testdata\xclim-testdata\Cache'
.tox
key: ${{ env.CACHE_KEY }}

- name: Test with tox
run: |
python -m tox
python -m tox -- ${{ matrix.positional_args }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COVERALLS_FLAG_NAME: run-Python${{ matrix.python-version }}
COVERALLS_PARALLEL: true
COVERALLS_SERVICE_NAME: github

test-conda:
name: Python${{ matrix.python-version }} (Anaconda)
name: Python${{ matrix.python-version }} (Anaconda, ${{ matrix.os}})
needs: lint
runs-on: ubuntu-latest
runs-on: ${{ matrix.os }}
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
python-version: [ "3.11", "3.12", "3.13", "3.14" ]
python-version: [ "3.11", "3.14" ]
os: [ "macos-latest", "ubuntu-latest" ]
defaults:
run:
shell: bash -l {0}
steps:
- name: Harden Runner
uses: step-security/harden-runner@fe104658747b27e96e4f7e80cd0a94068e53901d # v2.16.1
if: matrix.os == 'ubuntu-latest'
with:
disable-sudo: true
egress-policy: audit
Expand All @@ -152,8 +184,17 @@ jobs:
run: |
python -m pip install --no-user --no-deps --editable .

- name: Test Data Caching
- name: Test Data Caching (Linux)
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
if : matrix.os == 'ubuntu-latest'
with:
path: |
~/.cache/mini-esgf-data
~/.cache/xclim-testdata
key: ${{ runner.os }}-${{ hashFiles('pyproject.toml', 'tox.ini') }}-conda-${{ env.ESGF_TEST_DATA_VERSION }}
- name: Test Data Caching (macOS)
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
if: matrix.os == 'ubuntu-latest'
with:
path: |
~/.cache/mini-esgf-data
Expand Down
2 changes: 1 addition & 1 deletion clisops/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@

__author__ = """Eleanor Smith"""
__contact__ = "eleanor.smith@stfc.ac.uk"
__copyright__ = "Copyright 2018-2024. United Kingdom Research and Innovation"
__copyright__ = "Copyright 2018-2025. United Kingdom Research and Innovation"
__license__ = "BSD-3-Clause"
__version__ = "0.18.0"
33 changes: 18 additions & 15 deletions clisops/project_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import glob
import os
from pathlib import Path

import xarray as xr
from loguru import logger
Expand Down Expand Up @@ -62,14 +63,15 @@ def _get_base_dirs_dict():
def _is_ds_id(dset):
return dset.count(".") > 1

def _deduce_project(self, dset):
def _deduce_project(self, dset) -> str | None:
if isinstance(dset, str):
if dset.startswith("/"):
if os.path.isabs(dset):
# by default this returns c3s-cmip6 not cmip6 (as they have the same base_dir)
base_dirs_dict = self._get_base_dirs_dict()
for project, base_dir in base_dirs_dict.items():
if dset.startswith(base_dir) and CONFIG[f"project:{project}"].get("is_default_for_path") is True:
return project
return None

elif self._is_ds_id(dset):
return dset.split(".")[0].lower()
Expand All @@ -78,7 +80,8 @@ def _deduce_project(self, dset):
elif dset.endswith(".nc") or os.path.isfile(dset):
dset = xr.open_dataset(dset, decode_times=xr.coders.CFDatetimeCoder(use_cftime=True))
return get_project_from_ds(dset)

else:
return None
else:
raise InvalidProject(f"The format of {dset} is not known and the project name could not be found.")

Expand All @@ -104,7 +107,7 @@ def _parse(self, force):
self._base_dir = get_project_base_dir(self._project)

# if a file, group of files or directory to files - find files
if dset.startswith("/") or dset.endswith(".nc"):
if Path(dset).is_absolute() or dset.endswith(".nc"):
# if instance of FileMapper
if isinstance(self.dset, FileMapper):
self._files = self.dset.file_paths
Expand All @@ -117,19 +120,19 @@ def _parse(self, force):
self._files.append(dset)

# remove file extension to create data_path
self._data_path = "/".join(dset.split("/")[:-1])
self._data_path = os.path.dirname(dset)

# if base_dir identified, insert into data_path
if self._base_dir:
self._ds_id = ".".join(self._data_path.replace(self._base_dir, self._project).strip("/").split("/"))
relative_path = os.path.relpath(self._data_path, self._base_dir)
self._ds_id = ".".join(relative_path.split(os.sep))

# test if dataset id
elif self._is_ds_id(dset):
self._ds_id = dset

mappings = CONFIG.get(f"project:{self.project}", {}).get("fixed_path_mappings", {})

# If the dataset uses a fixed path mapping (from the config file) then use it
if self._ds_id in mappings:
data_path = mappings[self._ds_id]
self._data_path = os.path.join(self._base_dir, data_path)
Expand All @@ -139,7 +142,7 @@ def _parse(self, force):

# Default mapping is done by converting '.' characters to '/' separators in path
else:
self._data_path = os.path.join(self._base_dir, "/".join(dset.split(".")[1:]))
self._data_path = os.path.join(self._base_dir, os.path.join(*dset.split(".")[1:]))

# use to data_path to find files if not set already
if len(self._files) < 1:
Expand Down Expand Up @@ -330,7 +333,7 @@ def switch_dset(dset: xr.Dataset | xr.DataArray | str | FileMapper) -> str:
str
The dataset path or dataset ID derived from the input dataset, switched from the input.
"""
if dset.startswith("/"):
if isinstance(dset, str) and (dset.startswith("/") or dset.startswith("\\")):
return datapath_to_dsid(dset)
else:
return dsid_to_datapath(dset)
Expand Down Expand Up @@ -445,9 +448,10 @@ def get_project_base_dir(project: str) -> str:
-------
str
The base directory of the specified project.
The URI uses platform-dependent path encoding.
"""
try:
return CONFIG[f"project:{project}"]["base_dir"]
return str(Path(CONFIG[f"project:{project}"]["base_dir"]))
except KeyError:
raise InvalidProject("The project supplied is not known.")

Expand Down Expand Up @@ -491,12 +495,11 @@ def get_project_from_data_node_root(url: str) -> str:
"""
data_node_dict = get_data_node_dirs_dict()
project = None

for proj, data_node_root in data_node_dict.items():
if data_node_root in url:
project = proj

if not project:
if project is None:
raise InvalidProject(
f"The project could not be identified from the URL {url} so it could not be mapped to a file path."
)
Expand All @@ -519,8 +522,8 @@ def url_to_file_path(url: str) -> str:
"""
project = get_project_from_data_node_root(url)

data_node_root = CONFIG.get(f"project:{project}", {}).get("data_node_root")
base_dir = CONFIG.get(f"project:{project}", {}).get("base_dir")
file_path = os.path.join(base_dir, url.partition(data_node_root)[2])
data_node_root = str(Path(CONFIG.get(f"project:{project}", {}).get("data_node_root")))
base_dir = str(Path(CONFIG.get(f"project:{project}", {}).get("base_dir")))
file_path = str(Path(base_dir).joinpath(str(Path(url.partition(data_node_root)[2]))))

return file_path
9 changes: 5 additions & 4 deletions clisops/utils/dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ def get_coord_type(coord: xr.DataArray | xr.Dataset) -> str | None:
return None


def get_main_variable(ds, exclude_common_coords=True):
def get_main_variable(ds: xr.Dataset, exclude_common_coords: bool = True):
"""
Find the main variable of an xarray Dataset.

Expand Down Expand Up @@ -401,7 +401,7 @@ def get_main_variable(ds, exclude_common_coords=True):
return result


def open_xr_dataset(dset: str | pathlib.Path | list[str | pathlib.Path], **kwargs):
def open_xr_dataset(dset: str | pathlib.Path | list[str | pathlib.Path], **kwargs) -> xr.Dataset:
"""
Open an xarray dataset from a dataset input.

Expand Down Expand Up @@ -441,7 +441,7 @@ def open_xr_dataset(dset: str | pathlib.Path | list[str | pathlib.Path], **kwarg

# If an empty sequence, then raise an Exception
if not len(dset):
raise Exception("No files found to open with xarray.")
raise FileNotFoundError("No files found to open with xarray.")

# if a list we want a multi-file dataset
if len(dset) > 1:
Expand Down Expand Up @@ -481,7 +481,7 @@ def _get_kwargs_for_opener(otype, **kwargs):
"remote_options",
"target_options",
]
allowed_multi_args = ["combine"]
allowed_multi_args = ["combine", "data_vars"]

args = {
"decode_times": xr.coders.CFDatetimeCoder(use_cftime=True),
Expand All @@ -494,6 +494,7 @@ def _get_kwargs_for_opener(otype, **kwargs):

if otype.lower() == "multi":
args["combine"] = "by_coords"
args["data_vars"] = "all"
allowed_args.extend(allowed_multi_args)
elif otype.lower() == "zarr":
allowed_args.extend(allowed_zarr_args)
Expand Down
Loading
Loading