Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/582.improvement.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fetch ESMValTool recipes when installing the provider.
Original file line number Diff line number Diff line change
Expand Up @@ -245,14 +245,18 @@ def register( # noqa: PLR0913
This defaults to the value of `name` if not provided.
"""
if cache_name is None:
cache_name = "climate_ref"
cache_name = name

if env_cache_dir := os.environ.get("REF_DATASET_CACHE_DIR"):
cache_dir = pathlib.Path(os.path.expandvars(env_cache_dir)).expanduser()
else:
cache_dir = pooch.os_cache("climate_ref")

registry = pooch.create(
path=pooch.os_cache(cache_name),
path=cache_dir / cache_name,
base_url=base_url,
version=version,
retry_if_failed=10,
env="REF_DATASET_CACHE_DIR",
)
registry.load_registry(str(importlib.resources.files(package) / resource))
self._registries[name] = registry
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,21 +85,53 @@ def test_getitem(self, mocker, fake_registry_file):
assert retrieved_registry == mock_pooch_instance

@pytest.mark.parametrize(
"cache_name, expected", [(None, "climate_ref"), ("custom_cache", "custom_cache")]
"cache_name, expected", [(None, "test_registry"), ("custom_cache", "custom_cache")]
)
def test_with_cache_name(self, mocker, fake_registry_file, cache_name, expected):
registry = DatasetRegistryManager()
name = "test_registry"
base_url = "http://example.com"

mock_pooch = mocker.patch("climate_ref_core.dataset_registry.pooch")
mock_pooch.os_cache.return_value = Path("/path/to/climate_ref")
package, resource = self.setup_registry_file(fake_registry_file)

registry.register(name, base_url, package, resource, cache_name=cache_name)

mock_pooch.os_cache.assert_called_with(expected)
mock_pooch.os_cache.assert_called_with("climate_ref")
assert name in registry._registries
mock_pooch.create.assert_called_once()
expected_kwargs = {
"base_url": "http://example.com",
"path": Path("/path/to/climate_ref", expected),
"retry_if_failed": 10,
"version": None,
}
mock_pooch.create.assert_called_once_with(**expected_kwargs)

@pytest.mark.parametrize("env", [None, "", "/some/other/path"])
def test_with_environment_variable(self, monkeypatch, mocker, fake_registry_file, env):
if env is not None:
monkeypatch.setenv("REF_DATASET_CACHE_DIR", env)
expected_path = Path(env) / "test_registry" if env else Path("/path/to/climate_ref") / "test_registry"

registry = DatasetRegistryManager()
name = "test_registry"
base_url = "http://example.com"

mock_pooch = mocker.patch("climate_ref_core.dataset_registry.pooch")
mock_pooch.os_cache.return_value = Path("/path/to/climate_ref")
package, resource = self.setup_registry_file(fake_registry_file)

registry.register(name, base_url, package, resource)

assert name in registry._registries
expected_kwargs = {
"path": expected_path,
"base_url": "http://example.com",
"retry_if_failed": 10,
"version": None,
}
mock_pooch.create.assert_called_once_with(**expected_kwargs)


@pytest.mark.parametrize("symlink", [True, False])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,26 @@
)
from climate_ref_core.providers import CondaDiagnosticProvider
from climate_ref_esmvaltool._version import __version__
from climate_ref_esmvaltool.recipe import _ESMVALCORE_URL, _ESMVALTOOL_URL
from climate_ref_esmvaltool.diagnostics.base import _DATASETS_REGISTRY_NAME
from climate_ref_esmvaltool.recipe import (
_ESMVALCORE_URL,
_ESMVALTOOL_URL,
_RECIPES_REGISTRY_NAME,
_RECIPES_URL,
)

if TYPE_CHECKING:
from climate_ref.config import Config

_REGISTRY_NAME = "esmvaltool"


class ESMValToolProvider(CondaDiagnosticProvider):
"""Provider for ESMValTool diagnostics."""

def fetch_data(self, config: Config) -> None:
"""Fetch ESMValTool reference data."""
registry = dataset_registry_manager[_REGISTRY_NAME]
fetch_all_files(registry, _REGISTRY_NAME, output_dir=None)
for registry_name in [_DATASETS_REGISTRY_NAME, _RECIPES_REGISTRY_NAME]:
registry = dataset_registry_manager[registry_name]
fetch_all_files(registry, registry_name, output_dir=None)

def validate_setup(self, config: Config) -> bool:
"""Validate conda environment and data checksums."""
Expand All @@ -42,8 +47,9 @@ def validate_setup(self, config: Config) -> bool:
return False

# Then check data checksums
registry = dataset_registry_manager[_REGISTRY_NAME]
errors = validate_registry_cache(registry, _REGISTRY_NAME)
errors = []
for registry_name in [_DATASETS_REGISTRY_NAME, _RECIPES_REGISTRY_NAME]:
errors.extend(validate_registry_cache(dataset_registry_manager[registry_name], registry_name))
if errors:
for error in errors:
logger.error(f"{self.slug} validation failed: {error}")
Expand Down Expand Up @@ -73,8 +79,17 @@ def get_data_path(self) -> Path | None:

# Register OBS, OBS6, and raw data
dataset_registry_manager.register(
"esmvaltool",
name=_DATASETS_REGISTRY_NAME,
base_url=DATASET_URL,
package="climate_ref_esmvaltool.dataset_registry",
resource="data.txt",
cache_name=_DATASETS_REGISTRY_NAME.replace("-", "/"),
)
# Register the ESMValTool recipes.
dataset_registry_manager.register(
name=_RECIPES_REGISTRY_NAME,
base_url=_RECIPES_URL,
package="climate_ref_esmvaltool",
resource="recipes.txt",
cache_name=_RECIPES_REGISTRY_NAME.replace("-", "/"),
)
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
)
from climate_ref_esmvaltool.types import MetricBundleArgs, OutputBundleArgs, Recipe

_DATASETS_REGISTRY_NAME = "esmvaltool-datasets"


def get_cmip_source_type(
input_files: dict[SourceDatasetType, pandas.DataFrame],
Expand Down Expand Up @@ -202,13 +204,13 @@ def build_cmd(self, definition: ExecutionDefinition) -> Iterable[str]:
}

# Configure the paths to OBS/OBS6/native6 and non-compliant obs4MIPs data
registry = dataset_registry_manager["esmvaltool"]
registry = dataset_registry_manager[_DATASETS_REGISTRY_NAME]
data_dir = registry.abspath / "ESMValTool" # type: ignore[attr-defined]
if not data_dir.exists():
logger.warning(
"ESMValTool observational and reanalysis data is not available "
f"in {data_dir}, you may want to run the command "
"`ref datasets fetch-data --registry esmvaltool`."
f"`ref datasets fetch-data --registry {_DATASETS_REGISTRY_NAME}`."
)
else:
config["projects"]["OBS"] = {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from __future__ import annotations

import importlib.resources
from collections.abc import Iterator
from pathlib import Path
from typing import TYPE_CHECKING, Any

import cftime
import pandas as pd
import pooch
import yaml

from climate_ref_core.dataset_registry import dataset_registry_manager
from climate_ref_esmvaltool.types import Recipe

if TYPE_CHECKING:
Expand Down Expand Up @@ -295,17 +294,10 @@ def get_child_and_parent_dataset(
_ESMVALCORE_COMMIT = "da81d5f67158f3d2603831b56ab6b4fb8a388d86"
_ESMVALCORE_URL = f"git+https://github.com/ESMValGroup/ESMValCore.git@{_ESMVALCORE_COMMIT}"

_RECIPES = pooch.create(
path=pooch.os_cache("climate_ref_esmvaltool"),
# TODO: use a released version
# base_url="https://raw.githubusercontent.com/ESMValGroup/ESMValTool/refs/tags/v{version}/esmvaltool/recipes/",
# version=_ESMVALTOOL_VERSION,
base_url=f"https://raw.githubusercontent.com/ESMValGroup/ESMValTool/{_ESMVALTOOL_COMMIT}/esmvaltool/recipes/",
env="REF_METRICS_ESMVALTOOL_DATA_DIR",
retry_if_failed=10,
_RECIPES_URL = (
f"https://raw.githubusercontent.com/ESMValGroup/ESMValTool/{_ESMVALTOOL_COMMIT}/esmvaltool/recipes/"
)
with importlib.resources.files("climate_ref_esmvaltool").joinpath("recipes.txt").open("rb") as _buffer:
_RECIPES.load_registry(_buffer)
_RECIPES_REGISTRY_NAME = f"esmvaltool-recipes-v{_ESMVALTOOL_VERSION}"


def fix_annual_statistics_keep_year(recipe: Recipe) -> None:
Expand Down Expand Up @@ -348,7 +340,7 @@ def load_recipe(recipe: str) -> Recipe:
-------
The loaded recipe.
"""
filename = _RECIPES.fetch(recipe)
filename = dataset_registry_manager[_RECIPES_REGISTRY_NAME].fetch(recipe)

def normalize(obj: Any) -> Any:
# Ensure objects in the recipe are not shared.
Expand Down
10 changes: 5 additions & 5 deletions packages/climate-ref-esmvaltool/tests/unit/test_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pathlib import Path

import pooch
from climate_ref_esmvaltool import ESMValToolProvider, __version__, provider
from climate_ref_esmvaltool import _DATASETS_REGISTRY_NAME, ESMValToolProvider, __version__, provider


def test_provider():
Expand Down Expand Up @@ -40,11 +40,11 @@ def test_fetch_data(self, mocker):

provider.fetch_data(mock_config)

mock_fetch.assert_called_once()
mock_fetch.assert_called()
# Check it's using the right registry name
call_args = mock_fetch.call_args
assert call_args[0][1] == "esmvaltool"
assert call_args[1]["output_dir"] is None
call = mock_fetch.mock_calls[0]
assert call.args[1] == _DATASETS_REGISTRY_NAME
assert call.kwargs["output_dir"] is None

def test_validate_setup_env_missing(self, mocker):
"""Test validate_setup returns False when conda env is missing."""
Expand Down
Loading