Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 1 addition & 10 deletions src/rook/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,6 @@

from .__version__ import __author__, __email__, __version__ # noqa: F401

from clisops.config import get_config


# Workaround for clisops to not re-import rook
class Package:
__file__ = __file__ # noqa


package = Package()
CONFIG = get_config(package)
from .config import CONFIG # noqa: F401

from .wsgi import application # noqa
10 changes: 4 additions & 6 deletions src/rook/catalog/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pathlib import Path

from rook import CONFIG
from rook import config


def make_list(value):
Expand Down Expand Up @@ -37,12 +37,10 @@ def __init__(self, project, records):

Records are an OrderedDict of dataset ids with a list of files: {'ds_id': [files]}.
"""
project_config = CONFIG.get(f"project:{project}", {})
s3_config = CONFIG.get("s3", {})
project_config = config.get_project_config(project)
self.base_dir = project_config.get("base_dir")
self.s3_base_dir = project_config.get("s3_base_dir") or s3_config.get(
"base_dir"
)
storage_base = config.get_storage_base(project)
self.s3_base_dir = storage_base if storage_base != self.base_dir else None
self.base_url = project_config.get("data_node_root")
self.records = records

Expand Down
104 changes: 104 additions & 0 deletions src/rook/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""Central access to Rook configuration."""

import json
import sys
from pathlib import Path
from typing import Any

from clisops.config import get_config as _get_clisops_config
from clisops.config import reload_config as _reload_clisops_config


_PACKAGE_FILE = Path(__file__)
CONFIG = _get_clisops_config(_PACKAGE_FILE)


def get_config() -> dict[str, Any]:
"""Return the current Rook configuration."""
return CONFIG


def reload_config() -> dict[str, Any]:
"""Reload Rook configuration from the standard clisops sources."""
global CONFIG

CONFIG = _reload_clisops_config(_PACKAGE_FILE)

# Keep the public compatibility alias current without making config depend
# on importing the rest of Rook.
rook_module = sys.modules.get("rook")
if rook_module is not None:
rook_module.CONFIG = CONFIG

return CONFIG


def get_project_config(project: str) -> dict[str, Any]:
"""Return configuration for a project, or an empty mapping."""
config = get_config().get(f"project:{project}", {})
return config if isinstance(config, dict) else {}


def get_s3_storage_options() -> dict[str, Any]:
"""Build shared fsspec S3 transport options from Rook configuration."""
config = get_config().get("s3", {})
if not isinstance(config, dict):
return {}

options: dict[str, Any] = {}

raw_options = config.get("storage_options_json")
if raw_options:
options.update(_parse_json_dict(raw_options))

raw_client = config.get("client_kwargs_json")
if raw_client:
client_options = _parse_json_dict(raw_client)
if client_options:
options.setdefault("client_kwargs", {}).update(client_options)

endpoint_url = config.get("endpoint_url")
if endpoint_url:
options.setdefault("client_kwargs", {})["endpoint_url"] = endpoint_url

for key in ("anon", "key", "secret", "token"):
value = config.get(key)
if value is None or value == "":
continue
options[key] = _coerce_bool(value) if key == "anon" else value

return options


def get_storage_base(project: str) -> str | None:
"""Return the preferred processing root for a project."""
project_config = get_project_config(project)
s3_config = get_config().get("s3", {})
global_s3_base = (
s3_config.get("base_dir") if isinstance(s3_config, dict) else None
)
return (
project_config.get("s3_base_dir")
or global_s3_base
or project_config.get("base_dir")
)


def _parse_json_dict(value: Any) -> dict[str, Any]:
try:
parsed = json.loads(value)
except (TypeError, json.JSONDecodeError):
return {}
return parsed if isinstance(parsed, dict) else {}


def _coerce_bool(value: Any) -> Any:
if isinstance(value, bool):
return value
if isinstance(value, str):
lowered = value.strip().lower()
if lowered in {"1", "true", "yes", "on"}:
return True
if lowered in {"0", "false", "no", "off"}:
return False
return value
57 changes: 3 additions & 54 deletions src/rook/utils/ops/helpers.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
"""Helper utilities for operation plumbing."""

import collections
import json
from pathlib import Path
from urllib.parse import urlsplit

import xarray as xr
from clisops.utils.dataset_utils import open_xr_dataset

from rook import CONFIG
from rook import config
from rook.utils.apply_fixes import apply_fixes as apply_dataset_fixes

KERCHUNK_EXTS = (".json", ".zst", ".zstd", ".parquet")
Expand Down Expand Up @@ -142,55 +141,5 @@ def get_s3_open_kwargs(ds_id, file_paths):


def get_s3_storage_options():
"""Build fsspec S3 storage options from rook config."""
cfg = CONFIG.get("s3", {})
if not isinstance(cfg, dict):
return {}

options = {}

raw_options = cfg.get("storage_options_json")
if raw_options:
parsed = _parse_json_dict(raw_options)
if parsed:
options.update(parsed)

raw_client = cfg.get("client_kwargs_json")
if raw_client:
parsed = _parse_json_dict(raw_client)
if parsed:
options.setdefault("client_kwargs", {}).update(parsed)

endpoint_url = cfg.get("endpoint_url")
if endpoint_url:
options.setdefault("client_kwargs", {})["endpoint_url"] = endpoint_url

for key in ("anon", "key", "secret", "token"):
value = cfg.get(key)
if value is None or value == "":
continue
if key == "anon":
value = _coerce_bool(value)
options[key] = value

return options


def _parse_json_dict(value):
try:
parsed = json.loads(value)
except (TypeError, json.JSONDecodeError):
return {}
return parsed if isinstance(parsed, dict) else {}


def _coerce_bool(value):
if isinstance(value, bool):
return value
if isinstance(value, str):
lowered = value.strip().lower()
if lowered in {"1", "true", "yes", "on"}:
return True
if lowered in {"0", "false", "no", "off"}:
return False
return value
"""Return shared S3 transport options from central configuration."""
return config.get_s3_storage_options()
6 changes: 1 addition & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,20 +77,16 @@ def write_roocs_cfg(stratus):
# TODO: reload configs in clisops
# workaround ... fix code in new clisops.
import clisops
from clisops.config import reload_config
import rook
import rook.catalog
import rook.catalog.base
import rook.catalog.intake
import rook.director.director
from rook.config import reload_config

cfg = reload_config()
clisops.CONFIG = cfg
clisops.project_utils.CONFIG = cfg
rook.CONFIG = cfg
rook.director.director.CONFIG = cfg
rook.catalog.CONFIG = cfg
rook.catalog.base.CONFIG = cfg
rook.catalog.intake.CONFIG = cfg
# print("clisops.config", clisops.CONFIG["project:cmip5"]["base_dir"])
# print("rook.config", rook.CONFIG["project:cmip6"]["base_dir"])
Expand Down
10 changes: 5 additions & 5 deletions tests/test_catalog_base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from rook.catalog import base
from rook import config
from rook.catalog.base import Result


Expand All @@ -11,7 +11,7 @@

def test_result_files_uses_project_base_dir(monkeypatch):
monkeypatch.setattr(
base,
config,
"CONFIG",
{"project:c3s-cmip6": {"base_dir": "/data/CMIP6"}},
)
Expand All @@ -25,7 +25,7 @@ def test_result_files_uses_project_base_dir(monkeypatch):

def test_result_files_uses_global_s3_base_dir(monkeypatch):
monkeypatch.setattr(
base,
config,
"CONFIG",
{
"project:c3s-cmip6": {"base_dir": "/data/CMIP6"},
Expand All @@ -44,7 +44,7 @@ def test_result_files_uses_global_s3_base_dir(monkeypatch):

def test_result_files_uses_project_s3_base_dir_override(monkeypatch):
monkeypatch.setattr(
base,
config,
"CONFIG",
{
"project:c3s-cmip6": {
Expand All @@ -66,7 +66,7 @@ def test_result_files_uses_project_s3_base_dir_override(monkeypatch):

def test_result_download_urls_keep_data_node_root(monkeypatch):
monkeypatch.setattr(
base,
config,
"CONFIG",
{
"project:c3s-cmip6": {
Expand Down
60 changes: 60 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import rook
from rook import config


def test_get_project_config(monkeypatch):
monkeypatch.setattr(
config,
"CONFIG",
{"project:demo": {"base_dir": "/data/demo"}},
)

assert config.get_project_config("demo") == {"base_dir": "/data/demo"}
assert config.get_project_config("missing") == {}


def test_get_storage_base_prefers_project_s3_override(monkeypatch):
monkeypatch.setattr(
config,
"CONFIG",
{
"project:demo": {
"base_dir": "/data/demo",
"s3_base_dir": "s3://project/demo",
},
"s3": {"base_dir": "s3://global/data"},
},
)

assert config.get_storage_base("demo") == "s3://project/demo"


def test_get_storage_base_falls_back_to_local_project_root(monkeypatch):
monkeypatch.setattr(
config,
"CONFIG",
{"project:demo": {"base_dir": "/data/demo"}},
)

assert config.get_storage_base("demo") == "/data/demo"


def test_s3_options_ignore_malformed_optional_json(monkeypatch):
monkeypatch.setattr(
config,
"CONFIG",
{"s3": {"storage_options_json": "not-json", "anon": "false"}},
)

assert config.get_s3_storage_options() == {"anon": False}


def test_reload_config_updates_compatibility_alias(monkeypatch):
reloaded = {"project:demo": {"base_dir": "/new/data"}}
monkeypatch.setattr(config, "CONFIG", config.CONFIG)
monkeypatch.setattr(rook, "CONFIG", rook.CONFIG)
monkeypatch.setattr(config, "_reload_clisops_config", lambda _path: reloaded)

assert config.reload_config() is reloaded
assert config.get_config() is reloaded
assert rook.CONFIG is reloaded
4 changes: 2 additions & 2 deletions tests/test_ops_consolidate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import rook.utils.ops.consolidate as consolidate
from rook.catalog import base
from rook import config
from rook.catalog.base import Result


Expand Down Expand Up @@ -73,7 +73,7 @@ def search(self, collection, time):
monkeypatch.setattr(consolidate, "derive_ds_id", lambda _dset: "dataset")
monkeypatch.setattr(consolidate, "get_catalog", lambda _project: DummyCatalog())
monkeypatch.setattr(
base,
config,
"CONFIG",
{
"project:c3s-cmip6": {"base_dir": "/data/CMIP6"},
Expand Down
Loading