diff --git a/docs/source/user_guide/benchmarks/liquids.rst b/docs/source/user_guide/benchmarks/liquids.rst new file mode 100644 index 000000000..5e444d216 --- /dev/null +++ b/docs/source/user_guide/benchmarks/liquids.rst @@ -0,0 +1,33 @@ +================= +Liquids +================= + +Water ethanol density curves +======= + +Summary +------- + +Benchmark of the density of water-ethanol mixtures using NPT MD for different concentrations of ethanol, compare to experiment. + + +Metrics +------- + +1. rms of the density difference. +2. rms of the excess volume difference. +3. Location of the minimal excess volume. + + +Data availability +----------------- +Input structures: +Packmol generated + +Reference densities: +From: M. Southard and D. Green, Perry’s Chemical Engineers’ Handbook, 9th Edition. McGraw-Hill Education, 2018. + +Computational cost +------------------ + +Very high - 1 ns of NPT MD on about 120 water/ethanol molecules, 7 times per model. diff --git a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py new file mode 100644 index 000000000..965bd8d0d --- /dev/null +++ b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py @@ -0,0 +1,422 @@ +"""Analyse ethanol-water density curves.""" + +from __future__ import annotations + +from pathlib import Path + +import numpy as np +import pytest + +from ml_peg.analysis.utils.decorators import build_table, plot_parity +from ml_peg.analysis.utils.utils import load_metrics_config, rmse +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.calcs.utils.utils import download_s3_data +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +CATEGORY = "liquids" +BENCHMARK = "ethanol_water_density" +CALC_PATH = CALCS_ROOT / CATEGORY / BENCHMARK / "outputs" +OUT_PATH = APP_ROOT / "data" / CATEGORY / BENCHMARK + +MODELS = get_model_names(current_models) +MODEL_INDEX = {name: i for i, name in enumerate(MODELS)} # duplicate in calc + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + +M_WATER = 18.01528 # g/mol +M_ETOH = 46.06844 # g/mol +LOG_INTERVAL_PS = 0.1 +EQUILIB_TIME_PS = 500 + +OUT_PATH.mkdir(parents=True, exist_ok=True) + + +def weight_to_mole_fraction(w): + r""" + Convert ethanol weight fraction to mole fraction. + + Parameters + ---------- + w : array-like + Ethanol weight fraction :math:`m_\mathrm{ethanol} / m_\mathrm{total}`. + + Returns + ------- + numpy.ndarray + Ethanol mole fraction. + """ + n_e = w / M_ETOH + n_w = (1 - w) / M_WATER + return n_e / (n_e + n_w) + + +def _excess_volume(x: np.ndarray, rhos: np.ndarray) -> np.ndarray: + """ + Compute excess volume given molar fraction and density respectively. + + Parameters + ---------- + x : numpy.ndarray + Composition grid (mol fraction). + rhos : numpy.ndarray + Density. + + Returns + ------- + numpy.ndarray + Excess values ``y - y_linear``. + """ + return (x * M_ETOH + (1 - x) * M_WATER) / rhos - ( + x * M_ETOH / rhos[-1] + (1 - x) * M_WATER / rhos[0] + ) + + +def _peak_x_quadratic(x: np.ndarray, y: np.ndarray) -> float: + """ + Estimate x position of the minimum by local quadratic fitting. + + Parameters + ---------- + x : numpy.ndarray + Composition grid. + y : numpy.ndarray + Property values. + + Returns + ------- + float + Estimated composition of the minimum. + """ + if len(x) < 3: + return float(x[int(np.argmin(y))]) + + i = int(np.argmin(y)) + if i == 0 or i == len(x) - 1: + return float(x[i]) + + # Fit a parabola to (i-1, i, i+1) + xs = x[i - 1 : i + 2] + ys = y[i - 1 : i + 2] + + # y = ax^2 + bx + c + a, b, _c = np.polyfit(xs, ys, deg=2) + if abs(a) < 1e-16: + return float(x[i]) + + xv = -b / (2.0 * a) + + # Clamp to local bracket so we don't get silly extrapolation + return float(np.clip(xv, xs.min(), xs.max())) + + +@pytest.fixture(scope="session") +def ref_curve() -> tuple[np.ndarray, np.ndarray]: + """ + Return the reference density curve on a sorted mole-fraction grid. + + Returns + ------- + tuple[numpy.ndarray, numpy.ndarray] + Sorted mole fractions and reference densities. + """ + data_dir = ( + download_s3_data( + key="inputs/liquids/ethanol_water_density/ethanol_water_density.zip", + filename="ethanol_water_density.zip", + ) + / "ethanol_water_density" + ) + ref_file = data_dir / "densities_293.15.txt" + rho_ref = np.loadtxt(ref_file) + + n = len(rho_ref) + + # weight fraction grid + w = np.linspace(0.0, 1.0, n) + + # convert to mole fraction + x = weight_to_mole_fraction(w) + + return x, rho_ref + + +def compute_density(fname, density_col=13): + """ + Compute average density from NPT log file. + + Parameters + ---------- + fname + Path to the log file. + density_col + Which column the density numbers are in. + + Returns + ------- + float + Average density in g/cm3. + """ + density_series = [] + with open(fname) as lines: + for line in lines: + items = line.strip().split() + if len(items) != 15: + continue + density_series.append(float(items[13])) + skip_frames = int(EQUILIB_TIME_PS / LOG_INTERVAL_PS) + return np.mean(density_series[skip_frames:]) + + +@pytest.fixture +def model_curves() -> dict[str, tuple[np.ndarray, np.ndarray]]: + """ + Return simulated model density curves on sorted composition grids. + + Returns + ------- + dict[str, tuple[numpy.ndarray, numpy.ndarray]] + Mapping from model name to x-grid and density values. + """ + curves: dict[str, tuple[np.ndarray, np.ndarray]] = {} + for model_name in MODELS: + model_dir = CALC_PATH / model_name + xs = [] + rhos = [] + for case_dir in model_dir.iterdir(): + rhos.append(compute_density(case_dir / f"{model_name}.log")) + xs.append(float(case_dir.name.split("_")[-1])) + x = np.asarray(xs, dtype=float) + rho = np.asarray(rhos, dtype=float) + + order = np.argsort(x) + curves[model_name] = (x[order], rho[order]) + return curves + + +def labels() -> list: + """ + Get list of calculated concentrations. + + Returns + ------- + list + List of all calculated concentrations. + """ + return sorted( + float(case_dir.name.split("_")[-1]) + for case_dir in (CALC_PATH / MODELS[0]).iterdir() + ) + + +@pytest.fixture +@plot_parity( + filename=OUT_PATH / "density_parity.json", + title="Ethanol–water density (293.15 K)", + x_label="Reference density / g cm⁻³", + y_label="Predicted density / g cm⁻³", + hoverdata={ + "Labels": labels(), + }, +) +def densities_parity(ref_curve, model_curves) -> dict[str, list]: + """ + Build parity-plot payload for model and reference densities. + + Parameters + ---------- + ref_curve : tuple[numpy.ndarray, numpy.ndarray] + Reference composition and density arrays. + model_curves : dict[str, tuple[numpy.ndarray, numpy.ndarray]] + Per-model composition and density arrays. + + Returns + ------- + dict[str, list] + Reference and model densities sampled on a common grid. + """ + x_ref, rho_ref = ref_curve + + # Use the first model's x grid for hover labels (parity requires same-length lists) + # We’ll choose the densest model grid if they differ. + model_name_for_grid = max(model_curves, key=lambda m: len(model_curves[m][0])) + x_grid = model_curves[model_name_for_grid][0] + + results: dict[str, list] = {"ref": []} | {m: [] for m in MODELS} + + rho_ref_on_grid = np.interp(x_grid, x_ref, rho_ref) + results["ref"] = list(rho_ref_on_grid) + + for m in MODELS: + x_m, rho_m = model_curves[m] + # Interpolate model to x_grid if needed + if len(x_m) != len(x_grid) or np.any(np.abs(x_m - x_grid) > 1e-12): + # This assumes model spans the grid range; otherwise raise. + rho_m_on_grid = np.interp(x_grid, x_m, rho_m) + else: + rho_m_on_grid = rho_m + results[m] = list(rho_m_on_grid) + + return results + + +@pytest.fixture +def rmse_density(ref_curve, model_curves) -> dict[str, float]: + """ + Compute density RMSE versus interpolated reference values. + + Parameters + ---------- + ref_curve : tuple[numpy.ndarray, numpy.ndarray] + Reference composition and density arrays. + model_curves : dict[str, tuple[numpy.ndarray, numpy.ndarray]] + Per-model composition and density arrays. + + Returns + ------- + dict[str, float] + RMSE values in g/cm^3 keyed by model name. + """ + x_ref, rho_ref = ref_curve + out: dict[str, float] = {} + for m, (x_m, rho_m) in model_curves.items(): + rho_ref_m = np.interp(x_m, x_ref, rho_ref) + out[m] = rmse(rho_m, rho_ref_m) + return out + + +@pytest.fixture +def rmse_excess_volume(ref_curve, model_curves) -> dict[str, float]: + """ + Compute RMSE of excess volume curves. + + Parameters + ---------- + ref_curve : tuple[numpy.ndarray, numpy.ndarray] + Reference composition and density arrays. + model_curves : dict[str, tuple[numpy.ndarray, numpy.ndarray]] + Per-model composition and density arrays. + + Returns + ------- + dict[str, float] + Excess-density RMSE values keyed by model name. + """ + x_ref, rho_ref = ref_curve + out: dict[str, float] = {} + + for m, (x_m, rho_m) in model_curves.items(): + rho_ref_m = np.interp(x_m, x_ref, rho_ref) + + ex_ref = _excess_volume(x_m, rho_ref_m) + ex_m = _excess_volume(x_m, rho_m) + + out[m] = rmse(ex_m, ex_ref) + + return out + + +@pytest.fixture +def peak_x_error(ref_curve, model_curves) -> dict[str, float]: + """ + Compute absolute error in composition of minimum excess volume. + + Parameters + ---------- + ref_curve : tuple[numpy.ndarray, numpy.ndarray] + Reference composition and density arrays. + model_curves : dict[str, tuple[numpy.ndarray, numpy.ndarray]] + Per-model composition and density arrays. + + Returns + ------- + dict[str, float] + Absolute peak-position error keyed by model name. + """ + x_ref, rho_ref = ref_curve + ex_ref_dense = _excess_volume(x_ref, rho_ref) + x_peak_ref = _peak_x_quadratic(x_ref, ex_ref_dense) + print("ref peak at:", x_peak_ref) + + out: dict[str, float] = {} + for m, (x_m, rho_m) in model_curves.items(): + ex_m = _excess_volume(x_m, rho_m) + x_peak_m = _peak_x_quadratic(x_m, ex_m) + out[m] = float(abs(x_peak_m - x_peak_ref)) + + return out + + +# ----------------------------------------------------------------------------- +# Table +# ----------------------------------------------------------------------------- + + +@pytest.fixture +@build_table( + thresholds=DEFAULT_THRESHOLDS, + filename=OUT_PATH / "density_metrics_table.json", + metric_tooltips={ + "Model": "Name of the model", + "RMSE density": "RMSE between model and reference density" + "at model compositions (g cm⁻³).", + "RMSE excess volume": ( + "RMSE of the excess volumebetween pure endpoints (cm³ mol^-1)." + ), + "Peak x error": ( + "Absolute difference in mole-fraction location of maximum excess density." + ), + }, +) +def metrics( + rmse_density: dict[str, float], + rmse_excess_volume: dict[str, float], + peak_x_error: dict[str, float], +) -> dict[str, dict]: + """ + Combine individual metrics into the table payload. + + Parameters + ---------- + rmse_density : dict[str, float] + Density RMSE values. + rmse_excess_volume : dict[str, float] + Excess-volume RMSE values. + peak_x_error : dict[str, float] + Peak-position errors. + + Returns + ------- + dict[str, dict] + Metric-name to per-model mapping. + """ + return { + "RMSE density": rmse_density, + "RMSE excess volume": rmse_excess_volume, + "Peak x error": peak_x_error, + } + + +def test_ethanol_water_density( + metrics: dict[str, dict], densities_parity: dict[str, list] +) -> None: + """ + Execute density analysis fixtures and emit debug output. + + Parameters + ---------- + metrics : dict[str, dict] + Metrics table payload. + densities_parity : dict[str, list] + Parity plot payload. + + Returns + ------- + None + The test validates fixture execution and writes artifacts. + """ + return diff --git a/ml_peg/analysis/liquids/ethanol_water_density/metrics.yml b/ml_peg/analysis/liquids/ethanol_water_density/metrics.yml new file mode 100644 index 000000000..16b83352d --- /dev/null +++ b/ml_peg/analysis/liquids/ethanol_water_density/metrics.yml @@ -0,0 +1,22 @@ +# the 'bad' defaults are approximately 3 times the mp0 result +metrics: + RMSE density: + good: 0.0003 # approximate error of the reference on the coarse, sampled grid + bad: 0.3 + unit: g cm^-3 + tooltip: "RMSE between model and reference densities at the sampled compositions" + level_of_theory: experiment + + RMSE excess volume: + good: 0.02 # approximate error of the reference on the coarse, sampled grid + bad: 1.5 + unit: cm^3 mol^-1 + tooltip: "RMSE of excess (non-ideal) volume" + level_of_theory: experiment + + Peak x error: + good: 0.026 # approximate error of the reference on the coarse, sampled grid + bad: 0.3 + unit: mole fraction + tooltip: "Absolute error in the mole-fraction location of minimal excess volume" + level_of_theory: experiment diff --git a/ml_peg/app/liquids/ethanol_water_density/app_ethanol_water_density.py b/ml_peg/app/liquids/ethanol_water_density/app_ethanol_water_density.py new file mode 100644 index 000000000..0531e645b --- /dev/null +++ b/ml_peg/app/liquids/ethanol_water_density/app_ethanol_water_density.py @@ -0,0 +1,80 @@ +# TODO: This does not work. Fix this + +"""Run ethanol–water density (decomposition curves) app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_column +from ml_peg.app.utils.load import read_plot + +# ----------------------------------------------------------------------------- +# Configuration +# ----------------------------------------------------------------------------- + +CATEGORY = "liquids" +BENCHMARK_NAME = "ethanol_water_density" + +DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/liquids.html#ethanol-water-density-curves" + +DATA_PATH = APP_ROOT / "data" / CATEGORY / BENCHMARK_NAME + + +class EthanolWaterDecompositionCurvesApp(BaseApp): + """Ethanol–water density benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + parity = read_plot( + DATA_PATH / "density_parity.json", id=f"{BENCHMARK_NAME}-figure" + ) + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={ + "density": parity, + }, + ) + + +def get_app() -> EthanolWaterDecompositionCurvesApp: + """ + Get ethanol–water benchmark app layout and callback registration. + + Returns + ------- + EthanolWaterDecompositionCurvesApp + Benchmark layout and callback registration. + """ + return EthanolWaterDecompositionCurvesApp( + name=BENCHMARK_NAME, + description=( + "Ethanol–water mixture density at 293.15 K. Metrics include density RMSE, " + "excess-volume RMSE, and error in the mole-fraction" + "location of the maximum excess volume." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "density_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + ], + ) + + +if __name__ == "__main__": + # Create Dash app + # assets_folder should be the parent of the "assets///..." tree + full_app = Dash(__name__, assets_folder=DATA_PATH.parent) + + # Construct layout and register callbacks + app = get_app() + full_app.layout = app.layout + app.register_callbacks() + + # Run app + full_app.run(port=8051, debug=True) diff --git a/ml_peg/app/liquids/liquids.yml b/ml_peg/app/liquids/liquids.yml new file mode 100644 index 000000000..19fcd877d --- /dev/null +++ b/ml_peg/app/liquids/liquids.yml @@ -0,0 +1,2 @@ +title: Liquids # may remove this and move contents to other category +description: Properties of liquids, including densities diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py new file mode 100644 index 000000000..ee00f5fee --- /dev/null +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -0,0 +1,293 @@ +"""Calculate ethanol-water density curves.""" + +from __future__ import annotations + +import csv +from dataclasses import dataclass +import logging +import os +from pathlib import Path +import time +from typing import Any + +from ase import units +from ase.io import Trajectory, read +from ase.md import Langevin +from ase.md.nose_hoover_chain import IsotropicMTKNPT +from ase.md.velocitydistribution import ( + MaxwellBoltzmannDistribution, + Stationary, + ZeroRotation, +) +import pytest + +from ml_peg.calcs.utils.utils import download_s3_data +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +OUT_PATH = Path(__file__).parent / "outputs" + +MODELS = load_models(current_models) +MODEL_INDEX = {name: i for i, name in enumerate(MODELS)} + +NUM_NPT_STEPS = 1000_000 +NUM_NVT_STEPS = 50_000 +TIMESTEP = 1 * units.fs +LOG_INTERVAL = 100 +ATM = 1.01325 * units.bar +TEMPERATURE = 298.15 +LANGEVIN_FRICTION = 1 / (500 * units.fs) + + +@dataclass(frozen=True) +class CompositionCase: + """ + Map composition value to structure filename. + + Attributes + ---------- + x_ethanol : float + Ethanol mole fraction for the case. + filename : str + Structure filename associated with the composition. + """ + + x_ethanol: float + filename: str + + +def load_compositions(data_path) -> list[CompositionCase]: + """ + Load composition grid from ``compositions.csv``. + + Parameters + ---------- + data_path : pathlib.Path + Path to the folder containing ``compositions.csv``. + + Returns + ------- + list[CompositionCase] + Parsed composition cases ordered as in the CSV file. + """ + comps_file = data_path / "compositions.csv" + cases: list[CompositionCase] = [] + with comps_file.open(newline="") as f: + reader = csv.DictReader(f) + for row in reader: + cases.append( + CompositionCase( + x_ethanol=float(row["x_ethanol"]), + filename=row["filename"], + ) + ) + if not cases: + raise RuntimeError("No compositions found in compositions.csv") + return cases + + +def run_one_case( + struct_path: str | Path, + calc: Any, + output_fname: str | Path, +): + """ + Run the full MD workflow for one composition case. + + Parameters + ---------- + struct_path : str | pathlib.Path + Input structure path. + calc : Any + ASE-compatible calculator. + output_fname : str | pathlib.Path + The output file path. + + Returns + ------- + collections.abc.Iterable[float] + Density time series in g/cm^3. + """ + atoms = read(struct_path) + atoms.set_pbc(True) + atoms.wrap() + atoms.calc = calc + + # velocities + MaxwellBoltzmannDistribution(atoms, temperature_K=TEMPERATURE) + Stationary(atoms) + ZeroRotation(atoms) + if not os.path.exists(output_fname): + # NVT + dyn = Langevin( + atoms, + timestep=TIMESTEP, + temperature_K=TEMPERATURE, + friction=LANGEVIN_FRICTION, + ) + dyn.run(NUM_NVT_STEPS) + + # NPT + run_npt(atoms, calc, output_fname) + + +def get_density_g_cm3(atoms): + """ + Return density in g/cm^3. + + Parameters + ---------- + atoms : ase.Atoms + Atomic configuration with periodic cell volume. + + Returns + ------- + float + Density in g/cm^3. + """ + amu_to_kg = 1.66053906660e-27 + v_a3 = atoms.get_volume() + v_m3 = v_a3 * 1e-30 + m_kg = atoms.get_masses().sum() * amu_to_kg + rho_kg_m3 = m_kg / v_m3 + return rho_kg_m3 / 1000.0 + + +def log_md(dyn, start_time): + """ + Log molecular dynamics simulation. + + Parameters + ---------- + dyn + ASE molecular dynamics object. + start_time + Real time of the simulation start, in seconds. + """ + current_time = time.time() - start_time + energy = dyn.atoms.get_potential_energy() + density = get_density_g_cm3(dyn.atoms) + temperature = dyn.atoms.get_temperature() + t = dyn.get_time() / (1000 * units.fs) + logging.info( + f"""t: {t:>8.3f} ps\ + Walltime: {current_time:>10.3f} s\ + T: {temperature:.1f} K\ + Epot: {energy:.2f} eV\ + density: {density:.3f} g/cm^3\ + """ + ) + + +def run_npt(atoms, calc, output_fname): + """ + Run NPT molecular dynamics using the isotropic MTK barostat. + + Parameters + ---------- + atoms + ASE Atoms of the system. + calc + ASE Calculator. + output_fname + File name to save the trajectory to. + """ + if os.path.exists(output_fname): + try: + traj = Trajectory(output_fname) + atoms = traj[-1] + nsteps = (len(traj) - 1) * LOG_INTERVAL + except Exception as e: + print(e) + nsteps = 0 + else: + nsteps = 0 + + atoms.calc = calc + + dyn = IsotropicMTKNPT( + atoms=atoms, + timestep=TIMESTEP, + temperature_K=TEMPERATURE, + pressure_au=ATM, + tdamp=50 * units.fs, + pdamp=500 * units.fs, + trajectory=output_fname, + loginterval=LOG_INTERVAL, + append_trajectory=True, + ) + dyn.nsteps = nsteps + dyn.attach(log_md, interval=LOG_INTERVAL, dyn=dyn, start_time=time.time()) + dyn.run(steps=NUM_NPT_STEPS) + + +@pytest.mark.very_slow +@pytest.mark.parametrize("mlip", MODELS.items(), ids=list(MODELS.keys())) +def test_water_ethanol_density_curves(mlip: tuple[str, Any]) -> None: + """ + Generate one density-curve case for a model and composition. + + Parameters + ---------- + mlip : tuple[str, Any] + Pair of model name and model object. + + Returns + ------- + None + This test writes output files for a single case. + """ + data_dir = ( + download_s3_data( + key="inputs/liquids/ethanol_water_density/ethanol_water_density.zip", + filename="ethanol_water_density.zip", + ) + / "ethanol_water_density" + ) + for case in load_compositions(data_dir): + water_ethanol_density_curve_one_case(mlip, case, data_dir) + + +def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case, data_dir) -> None: + """ + Run one MD simulation case and write its density time series. + + Parameters + ---------- + mlip : tuple[str, Any] + Pair of model name and model object. + case : Any + Composition case containing ``x_ethanol`` and ``filename``. + data_dir : str | pathlib.Path + Path to the data file. + + Returns + ------- + None + This function writes outputs for one composition. + """ + model_name, model = mlip + + model_out = OUT_PATH / model_name + model_out.mkdir(parents=True, exist_ok=True) + + calc = model.get_calculator() + calc = model.add_d3_calculator(calc) + + struct_path = data_dir / case.filename + if not struct_path.exists(): + raise FileNotFoundError( + f"Missing structure for x={case.x_ethanol}: {struct_path}" + ) + + case_dir = model_out / f"x_ethanol_{case.x_ethanol:.2f}" + case_dir.mkdir(parents=True, exist_ok=True) + + logging.basicConfig( + format="%(message)s", + level=logging.INFO, + filename=case_dir / f"{model_name}.log", + filemode="a", + force=True, + ) + run_one_case(struct_path, calc, case_dir / f"{model_name}.traj")