diff --git a/test/plant/test_mixed_types.py b/test/plant/test_mixed_types.py new file mode 100644 index 00000000..dc15dcf6 --- /dev/null +++ b/test/plant/test_mixed_types.py @@ -0,0 +1,71 @@ +"""Functional coverage for mixed turbine types and per-turbine wind resources. + +The windIO standard supports assigning different turbine types (and therefore +different hub heights) to different positions in a wind farm, and supports +wind-resource data dimensioned per turbine. These tests assert that those +structures parse and round-trip through windIO's public API, rather than only +being validated implicitly via the example-validation sweep. +""" + +from pathlib import Path + +import windIO + + +def _plant_examples_dir(): + return Path(windIO.plant_ex.__file__).parent + + +def test_mixed_turbine_types_wind_farm(): + """A wind farm assigns >1 turbine type by per-position index, and the + referenced types have distinct hub heights (mixed hub heights).""" + farm_yaml = _plant_examples_dir() / "plant_wind_farm" / "multiple_types.yaml" + + # Mixed types are a first-class windIO feature -> must validate. + windIO.validate(input=farm_yaml, schema_type="plant/wind_farm") + + farm = windIO.load_yaml(farm_yaml) + layout = farm["layouts"][0] + type_idx = layout["turbine_types"] + n_positions = len(layout["coordinates"]["x"]) + + # A per-position type index referencing a multi-entry turbine_types map. + assert len(farm["turbine_types"]) >= 2 + assert len(type_idx) == n_positions + assert set(type_idx) == set(farm["turbine_types"].keys()) + # The example genuinely uses both types. + assert len(set(type_idx)) >= 2 + + # The assigned types have distinct hub heights -> mixed hub heights. + hub_heights = {k: t["hub_height"] for k, t in farm["turbine_types"].items()} + assert len(set(hub_heights.values())) >= 2 + + +def test_per_turbine_wind_resource_roundtrip(): + """A wind resource dimensioned per turbine (one hub height each, no shared + vertical profile) round-trips through dict_to_netcdf with the + ``wind_turbine`` dimension and per-turbine height preserved.""" + res_yaml = _plant_examples_dir() / "plant_energy_resource" / "WTResource.yaml" + + windIO.validate(input=res_yaml, schema_type="plant/energy_resource") + + resource = windIO.load_yaml(res_yaml) + ds = windIO.dict_to_netcdf(resource["wind_resource"]) + + # Per-turbine resource: wind_turbine is a real dimension. + assert "wind_turbine" in ds.dims + assert ds.sizes["wind_turbine"] >= 2 + + # Height is given per turbine (not a shared scalar / vertical profile). + assert "height" in ds.variables + assert ds["height"].dims == ("wind_turbine",) + assert ds["height"].sizes["wind_turbine"] == ds.sizes["wind_turbine"] + + # The resource's data variables carry the per-turbine dimension (here + # wind_speed / wind_direction are binned coordinate axes, so check the + # actual per-turbine fields). + per_turbine_vars = [ + v for v in ds.data_vars if "wind_turbine" in ds[v].dims + ] + assert per_turbine_vars, "no data variable carries the wind_turbine dimension" + assert "sector_probability" in per_turbine_vars diff --git a/windIO/examples/plant/wind_energy_system/flow_example_timeseries.yaml b/windIO/examples/plant/wind_energy_system/flow_example_timeseries.yaml index 781e5432..46fa2baf 100644 --- a/windIO/examples/plant/wind_energy_system/flow_example_timeseries.yaml +++ b/windIO/examples/plant/wind_energy_system/flow_example_timeseries.yaml @@ -18,7 +18,7 @@ attributes: ws_superposition: Linear ti_superposition: Linear rotor_averaging: - name: GQGrid + name: gq_grid n_x_grid_points: 5 n_y_grid_points: 5 background_averaging: center diff --git a/windIO/schemas/plant/wind_energy_system.yaml b/windIO/schemas/plant/wind_energy_system.yaml index 29910eef..70029351 100644 --- a/windIO/schemas/plant/wind_energy_system.yaml +++ b/windIO/schemas/plant/wind_energy_system.yaml @@ -66,19 +66,78 @@ properties: type: number # (default 0) free_stream_ti: title: Flag deciding to use freestream or waked TI + description: TI feeding the wake-expansion coefficient (k = k_a*TI + k_b) and TI-dependent deficits. False (default) = waked/effective TI; True = freestream TI. type: boolean # (default to False) ceps: title: Bastankhah c_epsilon factor type: number use_effective_ws: - title: flag to use freestream wind speed for deficit computation - type: boolean - use_effective_ti: - title: flag to use effective turbulence intensity + title: flag to use local (effective) wind speed for deficit computation (True=waked, False=freestream) type: boolean A: title: TurboNOJ wake expansion parameter type: number + fuga: + title: FUGA LUT generation parameters + description: >- + Options for on-the-fly Fuga look-up-table generation (pyfuga). + Fuga has no turbulence-intensity input, so ambient turbulence + enters through the roughness z0 and stability zeta0. When z0 is + not given it is derived from the site TI. By default a sweep of + LUTs across the site TI distribution is generated so the wake + honors per-flow-case TI. All omitted fields fall back to WIFA + defaults. + type: object + properties: + z0: + title: Roughness length(s) (m) + description: Single roughness, or an explicit list of roughnesses for the LUT sweep. Overrides the TI-derived z0. + oneOf: + - type: number + - type: array + items: + type: number + zi: + title: Atmospheric inversion / boundary-layer height (m) + type: number + zeta0: + title: Monin-Obukhov stability parameter z0/L (0 = neutral) + type: number + n_z0: + title: Number of z0 LUTs in the TI sweep (1 = single mean-TI LUT) + type: integer + ti_min: + title: Lower TI clamp for the z0 sweep (keeps z0 physical) + type: number + ti_max: + title: Upper TI clamp for the z0 sweep (keeps z0 physical) + type: number + ti_qlo: + title: Lower TI quantile spanned by the sweep + type: number + ti_qhi: + title: Upper TI quantile spanned by the sweep + type: number + nkz0: + title: pyfuga spectral density (wavenumbers per log decade) + type: integer + nbeta: + title: pyfuga azimuthal resolution + type: integer + nx: + title: LUT streamwise grid points + type: integer + ny: + title: LUT cross-stream grid points + type: integer + lut_vars: + title: LUT field components to compute (e.g. ["UL"]) + type: array + items: + type: string + cache_dir: + title: Directory for generated/cached LUTs + type: string axial_induction_model: title: axial induction model @@ -113,12 +172,23 @@ properties: coefficients: title: coefficients type: array + c0: + title: STF/IEC model coefficient 0 + type: number c1: title: STF model coefficient 1 type: number c2: title: STF model coefficient 2 type: number + c: + title: CrespoHernandez coefficients + description: >- + Calibration coefficients [c0, c1, c2, c3] for the CrespoHernandez + added-turbulence model. When given, the model is built with these + coefficients (engine-specific), reproducing a paper's calibration + (e.g. Niayifar/Zong); when omitted the engine default is used. + type: array superposition_model: title: Superposition model @@ -128,7 +198,7 @@ properties: ws_superposition: title: Speed superposition model name type: string - enum: ["Linear", "Squared", "Max", "Product", "Weighted", "Cumulative"] + enum: ["Linear", "Squared", "Max", "Product", "Weighted", "Cumulative", "Vector"] ti_superposition: title: TI superposition model name type: string @@ -141,8 +211,18 @@ properties: properties: name: title: Rotor averaging model name + description: >- + Engine-neutral: none (no rotor-averaging model — rotor centre, + the only non-node option 'Weighted' superposition accepts), + center, grid (regular rotor grid / GridRotorAvg), + eq_grid, gq_grid, polar_grid, cgi, gaussian_overlap, area_overlap. + gaussian_overlap/area_overlap are non-node overlap models — NOT compatible + with 'Weighted' superposition, which requires a node model (use grid) or + 'none'. Capitalized names are deprecated aliases of the lowercase forms. type: string - enum: ["Center", "Avg_Deficit", "EqGrid", "GQGrid", "PolarGrid", "CGI"] + enum: ["none", "center", "grid", "eq_grid", "gq_grid", "polar_grid", "cgi", + "gaussian_overlap", "area_overlap", + "Center", "Avg_Deficit", "EqGrid", "GQGrid", "PolarGrid", "CGI"] n: title: Number of grid or integration points type: integer diff --git a/windIO/validator.py b/windIO/validator.py index eec6b952..c07c9c14 100644 --- a/windIO/validator.py +++ b/windIO/validator.py @@ -6,11 +6,30 @@ import copy import jsonschema import jsonschema.validators +import numpy as np from .yaml import load_yaml from .schemas import schemaPath, schema_validation_error_formatter +def _structure_skeleton(obj): + """Return a copy of ``obj`` with numpy arrays replaced by ``[]``. + + Used for structure-only validation of array-backed (memory-efficient) + inputs: jsonschema requires JSON types (it rejects numpy arrays and would + iterate every element of a large list). Replacing each array with an empty + list keeps the surrounding structure (keys, ``dims``) validatable at O(1) + per variable while skipping element-wise checks of the bulk data. + """ + if isinstance(obj, np.ndarray): + return [] + if isinstance(obj, dict): + return {k: _structure_skeleton(v) for k, v in obj.items()} + if isinstance(obj, (list, tuple)): + return [_structure_skeleton(v) for v in obj] + return obj + + def retrieve_yaml(uri: str): if not uri.endswith(".yaml"): raise NoSuchResource(ref=uri) @@ -51,7 +70,8 @@ def _enforce_no_additional_properties(schema): def validate( - input: dict | str | Path, schema_type: str, restrictive: bool = True, defaults: bool = False, + input: dict | str | Path, schema_type: str, restrictive: bool = True, + defaults: bool = False, array_data: bool = False, ) -> None: """ Validates a given windIO input based on the selected schema type. @@ -65,8 +85,13 @@ def validate( 'turbine/turbine_schema'. restrictive (bool, optional): If True, the schema will be modified to enforce that no additional properties are allowed. Defaults to True. - defaults (bool, optional): If True, default values specified in the schema will + defaults (bool, optional): If True, default values specified in the schema will be applied to the input data during validation. Defaults to False. + array_data (bool, optional): If True, validate structure only: numpy + arrays (from an array-backed ``!include`` netCDF, or an already + array-backed dict) are replaced by ``[]`` so jsonschema checks keys + and ``dims`` without materialising/iterating the bulk data. Avoids + the dict-of-lists memory blow-up for large resources. Defaults to False. Raises: FileNotFoundError: If the schema file corresponding to the schema type is not found. @@ -84,9 +109,12 @@ def validate( raise FileNotFoundError(f"Schema file {schema_file} not found.") if type(input) is dict: - data = copy.deepcopy(input) + data = _structure_skeleton(input) if array_data else copy.deepcopy(input) elif type(input) in [str, Path, PosixPath, WindowsPath]: - data = load_yaml(input) + if array_data: + data = _structure_skeleton(load_yaml(input, nc_data="array")) + else: + data = load_yaml(input) else: raise TypeError(f"Input type {type(input)} is not supported.") diff --git a/windIO/yaml.py b/windIO/yaml.py index cfc9c60c..ca5245fe 100644 --- a/windIO/yaml.py +++ b/windIO/yaml.py @@ -19,22 +19,35 @@ def _fmt(v: Any) -> dict | list | str | float | int: v (Any): Initially, a dictionary of inputs to format. Then, individual values within the dictionary. """ + if isinstance(v, np.ndarray): + # Keep arrays as-is; the elementwise ``!= {}`` below is unsafe on arrays. + return v if isinstance(v, dict): - return {k: _fmt(v) for k, v in v.items() if _fmt(v) != {}} + out = {} + for k, val in v.items(): + fval = _fmt(val) + if not (isinstance(fval, dict) and len(fval) == 0): + out[k] = fval + return out elif isinstance(v, tuple): return list(v) else: return v -def _ds2yml(ds: xr.Dataset) -> dict: +def _ds2yml(ds: xr.Dataset, data: str = "list") -> dict: """ Converts the input xr.Dataset to a format compatible with yaml.load. Args: ds (xr.Dataset): NetCDF data loaded as a xr.Dataset + data (str): How array data is represented, forwarded to + ``xr.Dataset.to_dict``. ``"list"`` (default) yields nested Python + lists (YAML/JSON friendly). ``"array"`` keeps numpy arrays, avoiding + the ~4-28x memory blow-up of lists for large included netCDF + resources (not YAML-serialisable; use with structure-only validation). """ - d = ds.to_dict() + d = ds.to_dict(data=data) return _fmt( { **{k: v["data"] for k, v in d["coords"].items()}, @@ -49,6 +62,7 @@ def _get_YAML( read_numpy: bool = False, read_include: bool = True, n_list_flow_style: int = 1, + nc_data: str = "list", ) -> YAML: """Get `ruamel.yaml.YAML` instance default setting for windIO @@ -128,11 +142,10 @@ def include(constructor, node): filename = Path(constructor.loader.reader.stream.name).parent / node.value ext = os.path.splitext(filename)[1].lower() if ext in [".yaml", ".yml"]: - return load_yaml( - filename, _get_YAML() - ) # TODO: Make `get_YAML()` dynamic to make it possible to update + # Propagate nc_data so nested includes keep the same array mode. + return load_yaml(filename, _get_YAML(nc_data=nc_data)) elif ext in [".nc"]: - return _ds2yml(xr.open_dataset(filename)) + return _ds2yml(xr.open_dataset(filename), data=nc_data) else: raise ValueError(f"Unsupported file extension: {ext}") @@ -141,7 +154,9 @@ def include(constructor, node): return yaml_obj -def load_yaml(filename: str | Path | os.PathLike, loader=None) -> dict: +def load_yaml( + filename: str | Path | os.PathLike, loader=None, nc_data: str = "list" +) -> dict: """ Opens ``filename`` and loads the content into a dictionary with the ``_get_YAML`` function from ruamel.yaml.YAML. @@ -149,12 +164,16 @@ def load_yaml(filename: str | Path | os.PathLike, loader=None) -> dict: Args: filename (str | Path | os.PathLike): Path or file-handle to the local file to be loaded or string path to the file. loader (ruamel.yaml.YAML, optional): Defaults to SafeLoader. + nc_data (str, optional): How ``!include`` netCDF data is represented; + ``"list"`` (default) for nested Python lists, ``"array"`` to keep + numpy arrays (memory-efficient; requires structure-only validation). + Ignored when an explicit ``loader`` is given. Returns: dict: Dictionary representation of the YAML file given in ``filename``. """ if loader is None: - loader = _get_YAML() + loader = _get_YAML(nc_data=nc_data) if isinstance(filename, str): filename = Path(filename)