From 6ce2df427f717d00376ae3133d917ae48ac5266f Mon Sep 17 00:00:00 2001 From: Carsten Ehbrecht Date: Thu, 18 Jun 2026 20:36:26 +0200 Subject: [PATCH] added a rook config module --- src/rook/__init__.py | 11 +--- src/rook/catalog/base.py | 10 ++-- src/rook/config.py | 104 ++++++++++++++++++++++++++++++++++ src/rook/utils/ops/helpers.py | 57 +------------------ tests/conftest.py | 6 +- tests/test_catalog_base.py | 10 ++-- tests/test_config.py | 60 ++++++++++++++++++++ tests/test_ops_consolidate.py | 4 +- tests/test_ops_helpers.py | 13 +++-- 9 files changed, 187 insertions(+), 88 deletions(-) create mode 100644 src/rook/config.py create mode 100644 tests/test_config.py diff --git a/src/rook/__init__.py b/src/rook/__init__.py index 68fb0de0..4ebcd97b 100644 --- a/src/rook/__init__.py +++ b/src/rook/__init__.py @@ -20,15 +20,6 @@ from .__version__ import __author__, __email__, __version__ # noqa: F401 -from clisops.config import get_config - - -# Workaround for clisops to not re-import rook -class Package: - __file__ = __file__ # noqa - - -package = Package() -CONFIG = get_config(package) +from .config import CONFIG # noqa: F401 from .wsgi import application # noqa diff --git a/src/rook/catalog/base.py b/src/rook/catalog/base.py index c3c734ed..b4d2329f 100644 --- a/src/rook/catalog/base.py +++ b/src/rook/catalog/base.py @@ -2,7 +2,7 @@ from pathlib import Path -from rook import CONFIG +from rook import config def make_list(value): @@ -37,12 +37,10 @@ def __init__(self, project, records): Records are an OrderedDict of dataset ids with a list of files: {'ds_id': [files]}. """ - project_config = CONFIG.get(f"project:{project}", {}) - s3_config = CONFIG.get("s3", {}) + project_config = config.get_project_config(project) self.base_dir = project_config.get("base_dir") - self.s3_base_dir = project_config.get("s3_base_dir") or s3_config.get( - "base_dir" - ) + storage_base = config.get_storage_base(project) + self.s3_base_dir = storage_base if storage_base != self.base_dir else None self.base_url = project_config.get("data_node_root") self.records = records diff --git a/src/rook/config.py b/src/rook/config.py new file mode 100644 index 00000000..d522bca6 --- /dev/null +++ b/src/rook/config.py @@ -0,0 +1,104 @@ +"""Central access to Rook configuration.""" + +import json +import sys +from pathlib import Path +from typing import Any + +from clisops.config import get_config as _get_clisops_config +from clisops.config import reload_config as _reload_clisops_config + + +_PACKAGE_FILE = Path(__file__) +CONFIG = _get_clisops_config(_PACKAGE_FILE) + + +def get_config() -> dict[str, Any]: + """Return the current Rook configuration.""" + return CONFIG + + +def reload_config() -> dict[str, Any]: + """Reload Rook configuration from the standard clisops sources.""" + global CONFIG + + CONFIG = _reload_clisops_config(_PACKAGE_FILE) + + # Keep the public compatibility alias current without making config depend + # on importing the rest of Rook. + rook_module = sys.modules.get("rook") + if rook_module is not None: + rook_module.CONFIG = CONFIG + + return CONFIG + + +def get_project_config(project: str) -> dict[str, Any]: + """Return configuration for a project, or an empty mapping.""" + config = get_config().get(f"project:{project}", {}) + return config if isinstance(config, dict) else {} + + +def get_s3_storage_options() -> dict[str, Any]: + """Build shared fsspec S3 transport options from Rook configuration.""" + config = get_config().get("s3", {}) + if not isinstance(config, dict): + return {} + + options: dict[str, Any] = {} + + raw_options = config.get("storage_options_json") + if raw_options: + options.update(_parse_json_dict(raw_options)) + + raw_client = config.get("client_kwargs_json") + if raw_client: + client_options = _parse_json_dict(raw_client) + if client_options: + options.setdefault("client_kwargs", {}).update(client_options) + + endpoint_url = config.get("endpoint_url") + if endpoint_url: + options.setdefault("client_kwargs", {})["endpoint_url"] = endpoint_url + + for key in ("anon", "key", "secret", "token"): + value = config.get(key) + if value is None or value == "": + continue + options[key] = _coerce_bool(value) if key == "anon" else value + + return options + + +def get_storage_base(project: str) -> str | None: + """Return the preferred processing root for a project.""" + project_config = get_project_config(project) + s3_config = get_config().get("s3", {}) + global_s3_base = ( + s3_config.get("base_dir") if isinstance(s3_config, dict) else None + ) + return ( + project_config.get("s3_base_dir") + or global_s3_base + or project_config.get("base_dir") + ) + + +def _parse_json_dict(value: Any) -> dict[str, Any]: + try: + parsed = json.loads(value) + except (TypeError, json.JSONDecodeError): + return {} + return parsed if isinstance(parsed, dict) else {} + + +def _coerce_bool(value: Any) -> Any: + if isinstance(value, bool): + return value + if isinstance(value, str): + lowered = value.strip().lower() + if lowered in {"1", "true", "yes", "on"}: + return True + if lowered in {"0", "false", "no", "off"}: + return False + return value diff --git a/src/rook/utils/ops/helpers.py b/src/rook/utils/ops/helpers.py index 51844e15..6f1f5f41 100644 --- a/src/rook/utils/ops/helpers.py +++ b/src/rook/utils/ops/helpers.py @@ -1,14 +1,13 @@ """Helper utilities for operation plumbing.""" import collections -import json from pathlib import Path from urllib.parse import urlsplit import xarray as xr from clisops.utils.dataset_utils import open_xr_dataset -from rook import CONFIG +from rook import config from rook.utils.apply_fixes import apply_fixes as apply_dataset_fixes KERCHUNK_EXTS = (".json", ".zst", ".zstd", ".parquet") @@ -142,55 +141,5 @@ def get_s3_open_kwargs(ds_id, file_paths): def get_s3_storage_options(): - """Build fsspec S3 storage options from rook config.""" - cfg = CONFIG.get("s3", {}) - if not isinstance(cfg, dict): - return {} - - options = {} - - raw_options = cfg.get("storage_options_json") - if raw_options: - parsed = _parse_json_dict(raw_options) - if parsed: - options.update(parsed) - - raw_client = cfg.get("client_kwargs_json") - if raw_client: - parsed = _parse_json_dict(raw_client) - if parsed: - options.setdefault("client_kwargs", {}).update(parsed) - - endpoint_url = cfg.get("endpoint_url") - if endpoint_url: - options.setdefault("client_kwargs", {})["endpoint_url"] = endpoint_url - - for key in ("anon", "key", "secret", "token"): - value = cfg.get(key) - if value is None or value == "": - continue - if key == "anon": - value = _coerce_bool(value) - options[key] = value - - return options - - -def _parse_json_dict(value): - try: - parsed = json.loads(value) - except (TypeError, json.JSONDecodeError): - return {} - return parsed if isinstance(parsed, dict) else {} - - -def _coerce_bool(value): - if isinstance(value, bool): - return value - if isinstance(value, str): - lowered = value.strip().lower() - if lowered in {"1", "true", "yes", "on"}: - return True - if lowered in {"0", "false", "no", "off"}: - return False - return value + """Return shared S3 transport options from central configuration.""" + return config.get_s3_storage_options() diff --git a/tests/conftest.py b/tests/conftest.py index 4e50ac1e..766f9793 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -77,20 +77,16 @@ def write_roocs_cfg(stratus): # TODO: reload configs in clisops # workaround ... fix code in new clisops. import clisops - from clisops.config import reload_config - import rook import rook.catalog - import rook.catalog.base import rook.catalog.intake import rook.director.director + from rook.config import reload_config cfg = reload_config() clisops.CONFIG = cfg clisops.project_utils.CONFIG = cfg - rook.CONFIG = cfg rook.director.director.CONFIG = cfg rook.catalog.CONFIG = cfg - rook.catalog.base.CONFIG = cfg rook.catalog.intake.CONFIG = cfg # print("clisops.config", clisops.CONFIG["project:cmip5"]["base_dir"]) # print("rook.config", rook.CONFIG["project:cmip6"]["base_dir"]) diff --git a/tests/test_catalog_base.py b/tests/test_catalog_base.py index eb20ba51..c37a50a1 100644 --- a/tests/test_catalog_base.py +++ b/tests/test_catalog_base.py @@ -1,4 +1,4 @@ -from rook.catalog import base +from rook import config from rook.catalog.base import Result @@ -11,7 +11,7 @@ def test_result_files_uses_project_base_dir(monkeypatch): monkeypatch.setattr( - base, + config, "CONFIG", {"project:c3s-cmip6": {"base_dir": "/data/CMIP6"}}, ) @@ -25,7 +25,7 @@ def test_result_files_uses_project_base_dir(monkeypatch): def test_result_files_uses_global_s3_base_dir(monkeypatch): monkeypatch.setattr( - base, + config, "CONFIG", { "project:c3s-cmip6": {"base_dir": "/data/CMIP6"}, @@ -44,7 +44,7 @@ def test_result_files_uses_global_s3_base_dir(monkeypatch): def test_result_files_uses_project_s3_base_dir_override(monkeypatch): monkeypatch.setattr( - base, + config, "CONFIG", { "project:c3s-cmip6": { @@ -66,7 +66,7 @@ def test_result_files_uses_project_s3_base_dir_override(monkeypatch): def test_result_download_urls_keep_data_node_root(monkeypatch): monkeypatch.setattr( - base, + config, "CONFIG", { "project:c3s-cmip6": { diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 00000000..08062d69 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,60 @@ +import rook +from rook import config + + +def test_get_project_config(monkeypatch): + monkeypatch.setattr( + config, + "CONFIG", + {"project:demo": {"base_dir": "/data/demo"}}, + ) + + assert config.get_project_config("demo") == {"base_dir": "/data/demo"} + assert config.get_project_config("missing") == {} + + +def test_get_storage_base_prefers_project_s3_override(monkeypatch): + monkeypatch.setattr( + config, + "CONFIG", + { + "project:demo": { + "base_dir": "/data/demo", + "s3_base_dir": "s3://project/demo", + }, + "s3": {"base_dir": "s3://global/data"}, + }, + ) + + assert config.get_storage_base("demo") == "s3://project/demo" + + +def test_get_storage_base_falls_back_to_local_project_root(monkeypatch): + monkeypatch.setattr( + config, + "CONFIG", + {"project:demo": {"base_dir": "/data/demo"}}, + ) + + assert config.get_storage_base("demo") == "/data/demo" + + +def test_s3_options_ignore_malformed_optional_json(monkeypatch): + monkeypatch.setattr( + config, + "CONFIG", + {"s3": {"storage_options_json": "not-json", "anon": "false"}}, + ) + + assert config.get_s3_storage_options() == {"anon": False} + + +def test_reload_config_updates_compatibility_alias(monkeypatch): + reloaded = {"project:demo": {"base_dir": "/new/data"}} + monkeypatch.setattr(config, "CONFIG", config.CONFIG) + monkeypatch.setattr(rook, "CONFIG", rook.CONFIG) + monkeypatch.setattr(config, "_reload_clisops_config", lambda _path: reloaded) + + assert config.reload_config() is reloaded + assert config.get_config() is reloaded + assert rook.CONFIG is reloaded diff --git a/tests/test_ops_consolidate.py b/tests/test_ops_consolidate.py index 85c582ac..61828754 100644 --- a/tests/test_ops_consolidate.py +++ b/tests/test_ops_consolidate.py @@ -1,5 +1,5 @@ import rook.utils.ops.consolidate as consolidate -from rook.catalog import base +from rook import config from rook.catalog.base import Result @@ -73,7 +73,7 @@ def search(self, collection, time): monkeypatch.setattr(consolidate, "derive_ds_id", lambda _dset: "dataset") monkeypatch.setattr(consolidate, "get_catalog", lambda _project: DummyCatalog()) monkeypatch.setattr( - base, + config, "CONFIG", { "project:c3s-cmip6": {"base_dir": "/data/CMIP6"}, diff --git a/tests/test_ops_helpers.py b/tests/test_ops_helpers.py index 195e238e..87381cd4 100644 --- a/tests/test_ops_helpers.py +++ b/tests/test_ops_helpers.py @@ -1,5 +1,6 @@ import xarray as xr +from rook import config import rook.utils.ops.helpers as helpers @@ -155,7 +156,7 @@ def fake_open_zarr(store, **kwargs): monkeypatch.setattr(helpers.xr, "open_zarr", fake_open_zarr) monkeypatch.setattr( - helpers, + config, "CONFIG", {"s3": {"anon": "true", "endpoint_url": "https://s3.example.org"}}, ) @@ -195,7 +196,7 @@ def fail_apply_fixes(_ds_id, _ds): def test_get_s3_open_kwargs_for_s3_netcdf(monkeypatch): monkeypatch.setattr( - helpers, + config, "CONFIG", {"s3": {"anon": "true", "endpoint_url": "https://s3.example.org"}}, ) @@ -215,7 +216,7 @@ def test_get_s3_open_kwargs_for_s3_netcdf(monkeypatch): def test_get_s3_open_kwargs_without_s3_config(monkeypatch): - monkeypatch.setattr(helpers, "CONFIG", {}) + monkeypatch.setattr(config, "CONFIG", {}) kwargs = helpers.get_s3_open_kwargs( "s3://example-bucket/path/file.nc", ["s3://example-bucket/path/file.nc"] @@ -226,7 +227,7 @@ def test_get_s3_open_kwargs_without_s3_config(monkeypatch): def test_get_s3_open_kwargs_skips_kerchunk(monkeypatch): monkeypatch.setattr( - helpers, + config, "CONFIG", {"s3": {"anon": "true", "endpoint_url": "https://s3.example.org"}}, ) @@ -240,7 +241,7 @@ def test_get_s3_open_kwargs_skips_kerchunk(monkeypatch): def test_get_s3_storage_options_merges_client_kwargs(monkeypatch): monkeypatch.setattr( - helpers, + config, "CONFIG", { "s3": { @@ -272,7 +273,7 @@ def fake_open(file_paths, **kwargs): monkeypatch.setattr(helpers, "is_kerchunk_file", lambda _: False) monkeypatch.setattr(helpers, "apply_dataset_fixes", lambda _ds_id, ds: ds) monkeypatch.setattr( - helpers, + config, "CONFIG", {"s3": {"anon": "true", "endpoint_url": "https://s3.example.org"}}, )