From a4cdf9a7de502d62c12f1b1e57a7122e84df5a76 Mon Sep 17 00:00:00 2001 From: Arn Date: Sat, 7 Feb 2026 21:47:06 +0000 Subject: [PATCH 01/29] ethanol-water density curve --- .../analyse_ethanol_water_density.py | 238 ++++++++++++++++++ .../liquids/ethanol_water_density/analysis.py | 103 ++++++++ .../liquids/ethanol_water_density/io_tools.py | 99 ++++++++ .../liquids/ethanol_water_density/metrics.yml | 23 ++ .../app_ethanol_water_density.py | 82 ++++++ ml_peg/app/liquids/liquids.yml | 2 + .../calc_ethanol_water_density.py | 112 +++++++++ .../ethanol_water_density/compositions.py | 34 +++ .../ethanol_water_density/fake_data.py | 191 ++++++++++++++ .../liquids/ethanol_water_density/md_code.py | 167 ++++++++++++ 10 files changed, 1051 insertions(+) create mode 100644 ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py create mode 100644 ml_peg/analysis/liquids/ethanol_water_density/analysis.py create mode 100644 ml_peg/analysis/liquids/ethanol_water_density/io_tools.py create mode 100644 ml_peg/analysis/liquids/ethanol_water_density/metrics.yml create mode 100644 ml_peg/app/liquids/ethanol_water_density/app_ethanol_water_density.py create mode 100644 ml_peg/app/liquids/liquids.yml create mode 100644 ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py create mode 100644 ml_peg/calcs/liquids/ethanol_water_density/compositions.py create mode 100644 ml_peg/calcs/liquids/ethanol_water_density/fake_data.py create mode 100644 ml_peg/calcs/liquids/ethanol_water_density/md_code.py diff --git a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py new file mode 100644 index 000000000..6c8c1a0b1 --- /dev/null +++ b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py @@ -0,0 +1,238 @@ +# TODO: remove hardcoded things? +from pathlib import Path + +import numpy as np +import matplotlib.pyplot as plt +import pytest + +from ml_peg.analysis.liquids.ethanol_water_density.analysis import _rmse, _interp_1d, \ + _excess_curve, _peak_x_quadratic, x_to_phi_ethanol +from ml_peg.analysis.liquids.ethanol_water_density.io_tools import OUT_PATH, _debug_plot_enabled, _savefig, \ + _read_model_curve, read_ref_curve +from ml_peg.analysis.utils.decorators import build_table, plot_parity +from ml_peg.analysis.utils.utils import load_metrics_config +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + + +MODELS = get_model_names(current_models) +MODEL_INDEX = {name: i for i, name in enumerate(MODELS)} # duplicate in calc + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +OUT_PATH.mkdir(parents=True, exist_ok=True) + + +@pytest.fixture(scope="session") +def ref_curve() -> tuple[np.ndarray, np.ndarray]: + x_ref, rho_ref = read_ref_curve() + x = np.asarray(x_ref, dtype=float) + rho = np.asarray(rho_ref, dtype=float) + + # Ensure monotonic x for interpolation + order = np.argsort(x) + return x[order], rho[order] + + +@pytest.fixture +def model_curves() -> dict[str, tuple[np.ndarray, np.ndarray]]: + curves: dict[str, tuple[np.ndarray, np.ndarray]] = {} + for model_name in MODELS: + xs, rhos = _read_model_curve(model_name) + x = np.asarray(xs, dtype=float) + rho = np.asarray(rhos, dtype=float) + + order = np.argsort(x) + curves[model_name] = (x[order], rho[order]) + return curves + + +@pytest.fixture +@plot_parity( + filename=OUT_PATH / "density_parity.json", + title="Ethanol–water density (293.15 K)", + x_label="Reference density / g cm⁻³", + y_label="Predicted density / g cm⁻³", + #hoverdata={ + # "x_ethanol": [], # filled in fixture + #}, +) # TODO: read docs!!! doesn't seem to work yet. +def densities_parity(ref_curve, model_curves) -> dict[str, list]: + x_ref, rho_ref = ref_curve + + # Use the first model's x grid for hover labels (parity requires same-length lists) + # We’ll choose the densest model grid if they differ. + model_name_for_grid = max(model_curves, key=lambda m: len(model_curves[m][0])) + x_grid = model_curves[model_name_for_grid][0] + + results: dict[str, list] = {"ref": []} | {m: [] for m in MODELS} + + rho_ref_on_grid = _interp_1d(x_ref, rho_ref, x_grid) + results["ref"] = list(rho_ref_on_grid) + + for m in MODELS: + x_m, rho_m = model_curves[m] + # Interpolate model to x_grid if needed + if len(x_m) != len(x_grid) or np.any(np.abs(x_m - x_grid) > 1e-12): + # This assumes model spans the grid range; otherwise raise. + rho_m_on_grid = _interp_1d(x_m, rho_m, x_grid) + else: + rho_m_on_grid = rho_m + results[m] = list(rho_m_on_grid) + + ## Patch hoverdata list in-place (decorator reads the dict) + ## NOTE: if your decorator captures hoverdata at decoration time, + ## switch to hoverdata={"x_ethanol": x_labels()} fixture pattern like the docs. + #densities_parity.__wrapped__.__dict__.setdefault("hoverdata", {})["x_ethanol"] = list(x_grid) + + return results + +@pytest.fixture +def debug_curve_plots(ref_curve, model_curves) -> None: # TODO should I remove or use a different format? + if not _debug_plot_enabled(): + return + print("plotting curves") + + x_ref, rho_ref = ref_curve + + for m, (x_m, rho_m) in model_curves.items(): + rho_ref_m = _interp_1d(x_ref, rho_ref, x_m) + + fig, ax = plt.subplots() + ax.plot(x_ref, rho_ref, label="ref (dense)") + ax.plot(x_m, rho_m, marker="o", label=f"{m} (model)") + ax.plot(x_m, rho_ref_m, marker="x", label="ref on model grid") + ax.set_title(f"Density curve: {m}") + ax.set_xlabel("x_ethanol") + ax.set_ylabel("rho / g cm$^{-3}$") + ax.legend() + + print("saving a curve at:", OUT_PATH / "debug" / m / "density_curve.svg") + + # excess density + _savefig(fig, OUT_PATH / "debug" / m / "density_curve.svg") + rho_ref_m = _interp_1d(x_ref, rho_ref, x_m) + + fig, ax = plt.subplots() + ax.plot(x_ref, _excess_curve(x_ref, rho_ref), label="ref (dense)") + ax.plot(x_m, _excess_curve(x_m, rho_m), marker="o", label=f"{m} (model)") + ax.plot(x_m, _excess_curve(x_m, rho_ref_m), marker="x", label="ref on model grid") + ax.set_title(f"Density curve: {m}") + ax.set_xlabel("x_ethanol") + ax.set_ylabel("rho / g cm$^{-3}$") + ax.legend() + + print("saving a curve at:", OUT_PATH / "debug" / m / "excess_density_curve.svg") + _savefig(fig, OUT_PATH / "debug" / m / "excess_density_curve.svg") + + # volume fraction plot + phi_ref = x_to_phi_ethanol(x_ref, rho_ref) + phi_m = x_to_phi_ethanol(x_m, rho_m) + + fig, ax = plt.subplots() + ax.plot(phi_ref, rho_ref, label="ref (dense)") + ax.plot(phi_m, rho_m, marker="o", label=f"{m} (model)") + ax.plot(phi_m, rho_ref_m, marker="x", label="ref on model grid") + + ax.set_title(f"Density curve (volume fraction): {m}") + ax.set_xlabel(r"$\phi_\mathrm{ethanol}$") + ax.set_ylabel("rho / g cm$^{-3}$") + ax.legend() + + out_phi = OUT_PATH / "debug" / m / "density_curve_phi.svg" + print("saving a curve at:", out_phi) + _savefig(fig, out_phi) + + +@pytest.fixture +def rmse_density(ref_curve, model_curves) -> dict[str, float]: + x_ref, rho_ref = ref_curve + out: dict[str, float] = {} + for m, (x_m, rho_m) in model_curves.items(): + rho_ref_m = _interp_1d(x_ref, rho_ref, x_m) + out[m] = _rmse(rho_m, rho_ref_m) + return out + + +@pytest.fixture +def rmse_excess_density(ref_curve, model_curves) -> dict[str, float]: + """ + RMSE of excess density (detrended by each dataset's own pure endpoints). + """ + x_ref, rho_ref = ref_curve + out: dict[str, float] = {} + + for m, (x_m, rho_m) in model_curves.items(): + rho_ref_m = _interp_1d(x_ref, rho_ref, x_m) + + ex_ref = _excess_curve(x_m, rho_ref_m) + ex_m = _excess_curve(x_m, rho_m) + + out[m] = _rmse(ex_m, ex_ref) + + return out + + +@pytest.fixture +def peak_x_error(ref_curve, model_curves) -> dict[str, float]: + """ + Absolute error in the x-position of the maximum excess density. + + Ref peak is computed on the dense reference curve. + Model peak is computed on its (coarse) grid with a local quadratic refinement. + """ + x_ref, rho_ref = ref_curve + ex_ref_dense = _excess_curve(x_ref, rho_ref) + x_peak_ref = _peak_x_quadratic(x_ref, ex_ref_dense) + print("ref peak at:", x_peak_ref) + + out: dict[str, float] = {} + for m, (x_m, rho_m) in model_curves.items(): + ex_m = _excess_curve(x_m, rho_m) + x_peak_m = _peak_x_quadratic(x_m, ex_m) + out[m] = float(abs(x_peak_m - x_peak_ref)) + + return out + +# ----------------------------------------------------------------------------- +# Table +# ----------------------------------------------------------------------------- + +@pytest.fixture +@build_table( + thresholds=DEFAULT_THRESHOLDS, + filename=OUT_PATH / "density_metrics_table.json", + metric_tooltips={ + "Model": "Name of the model", + "RMSE density": "RMSE between model and reference density at model compositions (g cm⁻³).", + "RMSE excess density": ( + "RMSE after subtracting each curve’s linear baseline between pure endpoints (g cm⁻³)." + ), + "Peak x error": ( + "Absolute difference in mole-fraction location of maximum excess density." + ), + }, +) +def metrics( + rmse_density: dict[str, float], + rmse_excess_density: dict[str, float], + peak_x_error: dict[str, float], +) -> dict[str, dict]: + return { + "RMSE density": rmse_density, + "RMSE excess density": rmse_excess_density, + "Peak x error": peak_x_error, + } + + +def test_ethanol_water_density(metrics: dict[str, dict], densities_parity: dict[str, list], debug_curve_plots) -> None: + """ + Launch analysis (decorators handle writing JSON artifacts for the app). + """ + print(MODEL_INDEX) # TODO: these print statements may be useful for debugging, but should I remove? + print({key0:{MODEL_INDEX[name]: value for name, value in value0.items()} for key0, value0 in metrics.items()}) + return diff --git a/ml_peg/analysis/liquids/ethanol_water_density/analysis.py b/ml_peg/analysis/liquids/ethanol_water_density/analysis.py new file mode 100644 index 000000000..c96c2aaf1 --- /dev/null +++ b/ml_peg/analysis/liquids/ethanol_water_density/analysis.py @@ -0,0 +1,103 @@ +import numpy as np + +M_WATER = 18.01528 # g/mol +M_ETOH = 46.06844 # g/mol + +# Pick densities consistent with your reference conditions (T,P!) +# If your ref curve is at ~20°C, these are around: +RHO_WATER_PURE = 0.9982 # g/cm^3 +RHO_ETH_PURE = 0.7893 # g/cm^3 + +def x_to_phi_ethanol(x, rho_mix, + *, M_eth=M_ETOH, M_water=M_WATER, + rho_eth=RHO_ETH_PURE, rho_water=RHO_WATER_PURE): + """ + Convert ethanol mole fraction x to ethanol volume fraction phi using + mixture density rho_mix and pure-component densities as volume proxies. + """ + x = np.asarray(x, dtype=float) + rho_mix = np.asarray(rho_mix, dtype=float) + + m_eth = x * M_eth + m_wat = (1.0 - x) * M_water + + V_mix = (m_eth + m_wat) / rho_mix # cm^3 per "1 mol mixture basis" + V_eth = m_eth / rho_eth # cm^3 (proxy) + phi = V_eth / V_mix + return phi + +def weight_to_mole_fraction(w): + """ + Convert ethanol weight fraction -> mole fraction. + + w = mass_ethanol / total_mass + """ + n_e = w / M_ETOH + n_w = (1 - w) / M_WATER + return n_e / (n_e + n_w) + + +def _rmse(a: np.ndarray, b: np.ndarray) -> float: + d = a - b + return float(np.sqrt(np.mean(d * d))) + + +def _interp_1d(x_src: np.ndarray, y_src: np.ndarray, x_tgt: np.ndarray) -> np.ndarray: + """ + Linear interpolation. Requires x_tgt within [min(x_src), max(x_src)]. + """ + if np.any(x_tgt < x_src.min() - 1e-12) or np.any(x_tgt > x_src.max() + 1e-12): + raise ValueError("Target x values fall outside reference interpolation range.") + return np.interp(x_tgt, x_src, y_src) + + +def _endpoints_at_0_1(x: np.ndarray, y: np.ndarray, tol: float = 1e-8) -> tuple[float, float]: + """ + Return y(x=0) and y(x=1). Requires that x includes (approximately) 0 and 1. + """ + i0 = np.where(np.isclose(x, 0.0, atol=tol))[0] + i1 = np.where(np.isclose(x, 1.0, atol=tol))[0] + if len(i0) != 1 or len(i1) != 1: + raise ValueError("Curve must include x=0 and x=1 to define linear baseline.") + return float(y[i0[0]]), float(y[i1[0]]) + + +def _linear_baseline(x: np.ndarray, y0: float, y1: float) -> np.ndarray: + return y0 + x * (y1 - y0) + + +def _excess_curve(x: np.ndarray, y: np.ndarray) -> np.ndarray: + """ + Excess relative to the dataset's own pure endpoints (x=0 and x=1). + """ + y0, y1 = _endpoints_at_0_1(x, y) + return y - _linear_baseline(x, y0, y1) + + +def _peak_x_quadratic(x: np.ndarray, y: np.ndarray) -> float: + """ + Estimate x position of minimum y. + - If min is interior and we have neighbors, fit quadratic through 3 points. + - Otherwise fall back to argmin x. + """ + if len(x) < 3: + return float(x[int(np.argmin(y))]) + + i = int(np.argmin(y)) + if i == 0 or i == len(x) - 1: + return float(x[i]) + + # Fit a parabola to (i-1, i, i+1) + xs = x[i - 1 : i + 2] + ys = y[i - 1 : i + 2] + + # y = ax^2 + bx + c + a, b, _c = np.polyfit(xs, ys, deg=2) + if abs(a) < 1e-16: + return float(x[i]) + + xv = -b / (2.0 * a) + + # Clamp to local bracket so we don't get silly extrapolation + xv = float(np.clip(xv, xs.min(), xs.max())) + return xv diff --git a/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py b/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py new file mode 100644 index 000000000..2d4032d9c --- /dev/null +++ b/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py @@ -0,0 +1,99 @@ +import csv +import os +from pathlib import Path + +import numpy as np +from matplotlib import pyplot as plt + +from ml_peg.analysis.liquids.ethanol_water_density.analysis import weight_to_mole_fraction +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT + +CATEGORY = "liquids" +BENCHMARK = "ethanol_water_density" +CALC_PATH = CALCS_ROOT / CATEGORY / BENCHMARK / "outputs" +OUT_PATH = APP_ROOT / "data" / CATEGORY / BENCHMARK +DATA_PATH = CALCS_ROOT / CATEGORY / BENCHMARK / "data" + + +def _debug_plot_enabled() -> bool: + # Turn on plots by: DEBUG_PLOTS=1 pytest ... + return os.environ.get("DEBUG_PLOTS", "0") not in ("0", "", "false", "False") + + +def _savefig(fig, outpath: Path) -> None: + outpath.parent.mkdir(parents=True, exist_ok=True) + fig.tight_layout() + fig.savefig(outpath, dpi=200) + plt.close(fig) + + +def _read_model_curve(model_name: str) -> tuple[list[float], list[float]]: + """ + Read model density curve by computing averages from raw time series. + + Expects per-composition files: + x_ethanol_XX/density_timeseries.csv + with columns: step, rho_g_cm3 + """ + model_dir = CALC_PATH / model_name + xs: list[float] = [] + rhos: list[float] = [] + + for case_dir in sorted(model_dir.glob("x_ethanol_*")): + x_ethanol = float(case_dir.name.replace("x_ethanol_", "")) + + ts_path = case_dir / "density_timeseries.csv" + if not ts_path.exists(): + raise FileNotFoundError(f"Missing density time series: {ts_path}") + + rho_vals = [] + steps = [] + with ts_path.open(newline="") as f: + r = csv.DictReader(f) + for row in r: + steps.append(int(row["step"])) + rho_vals.append(float(row["rho_g_cm3"])) + + if not rho_vals: + raise ValueError(f"No density samples found in {ts_path}") + + rho_mean = float(np.mean(rho_vals)) + xs.append(x_ethanol) + rhos.append(rho_mean) + + if _debug_plot_enabled(): + fig, ax = plt.subplots() + ax.plot(steps, rho_vals) + ax.axhline(rho_mean, linestyle="--") + ax.set_title(f"{model_name} x={x_ethanol:.2f} density timeseries") + ax.set_xlabel("step") + ax.set_ylabel("rho / g cm$^{-3}$") + + _savefig(fig, OUT_PATH / "debug" / model_name / f"x_{x_ethanol:.2f}_timeseries.svg") + + return xs, rhos + + +def read_ref_curve() -> tuple[list[float], list[float]]: + """ + Load densities given on a uniform weight-fraction grid + and convert to mole fraction. + + Assumes: + - 101 evenly spaced points + - first line = 0 wt% ethanol + - last line = 100 wt% + """ + ref_file = DATA_PATH / "densities_293.15.txt" + rho_ref = np.loadtxt(ref_file) + + n = len(rho_ref) + + # weight fraction grid + w = np.linspace(0.0, 1.0, n) + + # convert to mole fraction + x = weight_to_mole_fraction(w) + + return list(x), list(rho_ref) diff --git a/ml_peg/analysis/liquids/ethanol_water_density/metrics.yml b/ml_peg/analysis/liquids/ethanol_water_density/metrics.yml new file mode 100644 index 000000000..6da5b4f89 --- /dev/null +++ b/ml_peg/analysis/liquids/ethanol_water_density/metrics.yml @@ -0,0 +1,23 @@ +# TODO: the 'bad' defaults are quite arbitrary so pick decent ones; should I log rescale? +# TODO: add more metrics? e.g. radial distribution (or diffusion). +metrics: + RMSE density: + good: 0.0003 # approximate error of the reference on the coarse, sampled grid + bad: 0.02 + unit: g cm^-3 + tooltip: "RMSE between model and reference densities at the sampled compositions" + level_of_theory: experiment + + RMSE excess density: + good: 0.0003 # approximate error of the reference on the coarse, sampled grid + bad: 0.01 + unit: g cm^-3 + tooltip: "RMSE of excess (non-ideal) density after subtracting linear mixing baseline" + level_of_theory: experiment + + Peak x error: + good: 0.005 # approximate error of the reference on the coarse, sampled grid + bad: 0.20 + unit: mole fraction + tooltip: "Absolute error in the mole-fraction location of maximum excess density" + level_of_theory: experiment diff --git a/ml_peg/app/liquids/ethanol_water_density/app_ethanol_water_density.py b/ml_peg/app/liquids/ethanol_water_density/app_ethanol_water_density.py new file mode 100644 index 000000000..2ded1d118 --- /dev/null +++ b/ml_peg/app/liquids/ethanol_water_density/app_ethanol_water_density.py @@ -0,0 +1,82 @@ +#TODO: This does not work. Fix this + +"""Run ethanol–water density (decomposition curves) app.""" +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_column +from ml_peg.app.utils.load import read_plot + +# ----------------------------------------------------------------------------- +# Configuration +# ----------------------------------------------------------------------------- + +CATEGORY = "liquids" +BENCHMARK_NAME = "ethanol_water_density" + +DOCS_URL = ( + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/" # TODO: update to the right anchor +) + +DATA_PATH = APP_ROOT / "data" / CATEGORY / BENCHMARK_NAME + + +class EthanolWaterDecompositionCurvesApp(BaseApp): + """Ethanol–water density benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + parity = read_plot(DATA_PATH / "density_parity.json", id=f"{BENCHMARK_NAME}-figure") + + # When the user clicks a metric column in the table, show the parity plot. + # (This mirrors the GMTKN55 pattern: different columns can map to different plots; + # here they all map to the same parity plot artifact.) + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={ + "RMSE density": parity, + "RMSE excess density": parity, + "Peak x error": parity, + }, + ) + + +def get_app() -> EthanolWaterDecompositionCurvesApp: + """ + Get ethanol–water benchmark app layout and callback registration. + + Returns + ------- + EthanolWaterDecompositionCurvesApp + Benchmark layout and callback registration. + """ + return EthanolWaterDecompositionCurvesApp( + name=BENCHMARK_NAME, + description=( + "Ethanol–water mixture density at 293.15 K. Metrics include density RMSE, " + "excess-density RMSE (baseline-subtracted), and error in the mole-fraction " + "location of the maximum excess density." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "density_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + ], + ) + + +if __name__ == "__main__": + # Create Dash app + # assets_folder should be the parent of the "assets///..." tree + full_app = Dash(__name__, assets_folder=DATA_PATH.parent) + + # Construct layout and register callbacks + app = get_app() + full_app.layout = app.layout + app.register_callbacks() + + # Run app + full_app.run(port=8051, debug=True) diff --git a/ml_peg/app/liquids/liquids.yml b/ml_peg/app/liquids/liquids.yml new file mode 100644 index 000000000..19fcd877d --- /dev/null +++ b/ml_peg/app/liquids/liquids.yml @@ -0,0 +1,2 @@ +title: Liquids # may remove this and move contents to other category +description: Properties of liquids, including densities diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py new file mode 100644 index 000000000..6d105c7ad --- /dev/null +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -0,0 +1,112 @@ +import csv +import os +from pathlib import Path +from typing import Any + +import numpy as np +import pytest + +from ml_peg.calcs.liquids.ethanol_water_density.compositions import BENCH_ROOT, DATA_PATH, load_compositions +from ml_peg.calcs.liquids.ethanol_water_density.fake_data import make_fake_curve, make_fake_density_timeseries +from ml_peg.calcs.liquids.ethanol_water_density.md_code import run_one_case +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +# Local paths +OUT_PATH = BENCH_ROOT / "outputs" + +MODELS = load_models(current_models) +MODEL_INDEX = {name: i for i, name in enumerate(MODELS)} +FAKE_DATA = os.getenv("FAKE_DENSITY_DATA", "") == "1" + + +@pytest.mark.parametrize("mlip", MODELS.items()) +def test_water_ethanol_density_curve(mlip: tuple[str, Any]) -> None: + if not FAKE_DATA: + water_ethanol_density_curve(mlip) + else: + water_ethanol_density_dummy_data(mlip) + +def water_ethanol_density_curve(mlip: tuple[str, Any]) -> None: + """ + Run water–ethanol density curve benchmark for a single MLIP. + + Writes: + - per-composition density time series (raw data) + - a summary CSV derived from those time series + """ + model_name, model = mlip # TODO: dispersion ??? + cases = load_compositions() + + # Where this model writes outputs + model_out = OUT_PATH / model_name + model_out.mkdir(parents=True, exist_ok=True) + + # Get calculator + calc = model.get_calculator() + + for case in cases: + struct_path = DATA_PATH / case.filename + if not struct_path.exists(): + raise FileNotFoundError( + f"Missing structure for x={case.x_ethanol}: {struct_path}" + ) + + case_dir = model_out / f"x_ethanol_{case.x_ethanol:.2f}" + case_dir.mkdir(parents=True, exist_ok=True) + + # --- run simulation --- + rho_series = run_one_case( + struct_path, + calc, + workdir=case_dir, + ) + + # --- write raw density time series --- + ts_path = case_dir / "density_timeseries.csv" + with ts_path.open("w", newline="") as f: + w = csv.writer(f) + w.writerow(["step", "rho_g_cm3"]) + for i, rho in enumerate(rho_series): + w.writerow([i, f"{rho:.8f}"]) + + +def water_ethanol_density_dummy_data(mlip: tuple[str, Any]) -> None: + model_name, model = mlip + cases = load_compositions() + + model_out = OUT_PATH / model_name + model_out.mkdir(parents=True, exist_ok=True) + + # one curve per model + model_kind = MODEL_INDEX[model_name] % 4 + xs_curve, ys_curve = make_fake_curve(model_kind, seed=MODEL_INDEX[model_name]) + xs_curve = np.asarray(xs_curve, dtype=float) + ys_curve = np.asarray(ys_curve, dtype=float) + + for case in cases: + case_dir = model_out / f"x_ethanol_{case.x_ethanol:.2f}" + case_dir.mkdir(parents=True, exist_ok=True) + + rho_eq = float(np.interp(case.x_ethanol, xs_curve, ys_curve)) + n_steps = 200 # fixed for dummy data + + seed = (hash(model_name) ^ hash(round(case.x_ethanol, 4))) & 0xFFFFFFFF + rho_series = make_fake_density_timeseries( + rho_eq, n_steps, seed=seed, start_offset=0.01, tau=0.10, noise_sigma=0.0005 + ) + + ts_path = case_dir / "density_timeseries.csv" + with ts_path.open("w", newline="") as f: + w = csv.writer(f) + w.writerow(["step", "rho_g_cm3"]) + for i, rho in enumerate(rho_series): + w.writerow([i, f"{rho:.8f}"]) + + +if __name__ == "__main__": # TODO: delete this + # run a very small simulation to see if it does something reasonable + from mace.calculators import mace_mp + calc = mace_mp("data_old/mace-omat-0-small.model") + rho = run_one_case("data/mix_xe_0.10.extxyz", calc, nvt_stabilise_steps=250, npt_settle_steps=1000, nvt_thermalise_steps=250, npt_equil_steps=1000, npt_prod_steps=1000, log_every=50, workdir=Path("debug")) + print(rho) \ No newline at end of file diff --git a/ml_peg/calcs/liquids/ethanol_water_density/compositions.py b/ml_peg/calcs/liquids/ethanol_water_density/compositions.py new file mode 100644 index 000000000..0c47100bc --- /dev/null +++ b/ml_peg/calcs/liquids/ethanol_water_density/compositions.py @@ -0,0 +1,34 @@ +import csv +from dataclasses import dataclass +from pathlib import Path + +BENCH_ROOT = Path(__file__).resolve().parent +DATA_PATH = BENCH_ROOT / "data" + + +@dataclass(frozen=True) +class CompositionCase: + x_ethanol: float + filename: str + + +def load_compositions() -> list[CompositionCase]: + """ + Load composition grid. + + Expected CSV columns: x_ethanol, filename + """ + comps_file = DATA_PATH / "compositions.csv" + cases: list[CompositionCase] = [] + with comps_file.open(newline="") as f: + reader = csv.DictReader(f) + for row in reader: + cases.append( + CompositionCase( + x_ethanol=float(row["x_ethanol"]), + filename=row["filename"], + ) + ) + if not cases: + raise RuntimeError("No compositions found in compositions.csv") + return cases diff --git a/ml_peg/calcs/liquids/ethanol_water_density/fake_data.py b/ml_peg/calcs/liquids/ethanol_water_density/fake_data.py new file mode 100644 index 000000000..8fddcc073 --- /dev/null +++ b/ml_peg/calcs/liquids/ethanol_water_density/fake_data.py @@ -0,0 +1,191 @@ +# for debugging, to verify that metrics actually do something reasonable + +from dataclasses import dataclass +import numpy as np + +from ml_peg.analysis.liquids.ethanol_water_density.io_tools import read_ref_curve + +@dataclass(frozen=True) +class FakeCurveParams: + # Master knob: 0 -> perfect match, 1 -> very poor + severity: float = 0.0 + + # Individual error components (interpreted as "max at severity=1") + bias: float = 0.0 # additive offset in y-units + scale: float = 0.0 # multiplicative: y *= (1 + scale*...) + tilt: float = 0.0 # linear-in-x additive distortion + warp: float = 0.0 # smooth nonlinear additive distortion + + noise_sigma: float = 0.0 # iid gaussian noise in y-units + corr_len: float = 0.0 # if >0, adds correlated noise along x + + bump_amp: float = 0.0 # amplitude of local bump(s) + bump_center: float = 0.5 # x location of bump + bump_width: float = 0.08 # bump width (in x units) + + +def _smooth_random_field(xs: np.ndarray, corr_len: float, rng: np.random.Generator) -> np.ndarray: + """ + Create a zero-mean, ~unit-std smooth random field along xs using + a Gaussian kernel in x-distance. O(N^2) but tiny N here (6 points). + """ + if corr_len <= 0: + return np.zeros_like(xs) + + dx = xs[:, None] - xs[None, :] + K = np.exp(-0.5 * (dx / corr_len) ** 2) + # sample correlated normal: K^(1/2) z via cholesky (add jitter for stability) + L = np.linalg.cholesky(K + 1e-12 * np.eye(len(xs))) + z = rng.standard_normal(len(xs)) + field = L @ z + field = field - field.mean() + field = field / (field.std() + 1e-12) + return field + + +def make_fake_curve_from_ref( + xs_ref: list[float], + ys_ref: list[float], + *, + params: FakeCurveParams, + seed: int | None = 0, + clip: tuple[float | None, float | None] = (None, None), +) -> tuple[list[float], list[float]]: + """ + Return (xs, ys_fake) using the same xs as the reference. + Designed for density-like curves but works generically. + + `severity` scales *all* enabled components. For example, if bias=10 and + severity=0.2, you get ~2 units of bias (with a tiny randomization). + """ + sev = float(np.clip(params.severity, 0.0, 1.0)) + rng = np.random.default_rng(seed) + + xs = np.asarray(xs_ref, dtype=float) + y = np.asarray(ys_ref, dtype=float).copy() + + # Normalize x into [-1, 1] for stable “tilt/warp” magnitudes + x01 = (xs - xs.min()) / (xs.max() - xs.min() + 1e-12) + xpm = 2.0 * x01 - 1.0 + + # Small randomization so multiple models with same severity aren’t identical + # (but still deterministic for a given seed). + jitter = lambda: (0.85 + 0.30 * rng.random()) + + # 1) multiplicative scale error + if params.scale != 0.0 and sev > 0: + y *= (1.0 + (params.scale * sev * jitter())) + + # 2) additive bias + if params.bias != 0.0 and sev > 0: + y += (params.bias * sev * jitter()) + + # 3) linear tilt (additive) + if params.tilt != 0.0 and sev > 0: + y += (params.tilt * sev * jitter()) * xpm + + # 4) smooth nonlinear warp (additive): use a low-order smooth basis + if params.warp != 0.0 and sev > 0: + # cubic-ish shape distortion with zero mean + w = (xpm**3 - xpm * np.mean(xpm**2)) + w = w - w.mean() + w = w / (np.std(w) + 1e-12) + y += (params.warp * sev * jitter()) * w + + # 5) local bump to simulate specific composition failure + if params.bump_amp != 0.0 and sev > 0: + bump = np.exp(-0.5 * ((xs - params.bump_center) / (params.bump_width + 1e-12)) ** 2) + bump = bump / (bump.max() + 1e-12) + y += (params.bump_amp * sev * jitter()) * bump + + # 6) noise: iid + optional correlated component + if params.noise_sigma != 0.0 and sev > 0: + y += rng.normal(0.0, params.noise_sigma * sev, size=len(xs)) + + if params.corr_len > 0.0 and params.noise_sigma != 0.0 and sev > 0: + field = _smooth_random_field(xs, params.corr_len, rng) + y += field * (params.noise_sigma * sev * 0.8) + + lo, hi = clip + if lo is not None: + y = np.maximum(y, lo) + if hi is not None: + y = np.minimum(y, hi) + + return xs, y + + +# Convenience presets: "good", "medium", "bad" +def make_fake_curve( + kind: str|int, + seed: int | None = 0, +) -> tuple[list[float], list[float]]: + xs_ref, ys_ref = read_ref_curve() + + kind = kind.lower().strip() if isinstance(kind, str) else kind + if kind == "perfect" or kind == 0: + params = FakeCurveParams(severity=0.0) + elif kind == "good" or kind == 1: + params = FakeCurveParams( + severity=0.15, + bias=0.0, + scale=0.01, + tilt=0.003, + warp=0.002, + noise_sigma=0.001, + corr_len=0.12, + bump_amp=0.0, + ) + elif kind in {"medium", "ok"} or kind == 2: + params = FakeCurveParams( + severity=0.45, + bias=0.0, + scale=0.03, + tilt=0.01, + warp=0.01, + noise_sigma=0.004, + corr_len=0.15, + bump_amp=0.01, + bump_center=0.6, + bump_width=0.10, + ) + elif kind == "bad" or kind == 3: + params = FakeCurveParams( + severity=0.85, + bias=0.02, + scale=0.06, + tilt=0.03, + warp=0.04, + noise_sigma=0.01, + corr_len=0.18, + bump_amp=0.05, + bump_center=0.4, + bump_width=0.08, + ) + else: + raise ValueError(f"Unknown kind={kind!r} (use perfect/good/medium/bad)") + + return make_fake_curve_from_ref(xs_ref, ys_ref, params=params, seed=seed) + + +def make_fake_density_timeseries( + rho_eq: float, + n_steps: int, + *, + seed: int, + start_offset: float = 0.02, # initial deviation from eq + tau: float = 0.15, # relaxation rate (bigger -> faster) + noise_sigma: float = 0.001, # per-step noise +) -> list[float]: + rng = np.random.default_rng(seed) + rho0 = rho_eq + start_offset * (2 * rng.random() - 1) + + series = [] + rho = rho0 + for t in range(n_steps): + # exponential-ish relaxation to rho_eq + rho += tau * (rho_eq - rho) + # add noise + rho += rng.normal(0.0, noise_sigma) + series.append(float(rho)) + return series diff --git a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py new file mode 100644 index 000000000..a7adc65c0 --- /dev/null +++ b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py @@ -0,0 +1,167 @@ +import time +from contextlib import contextmanager +from pathlib import Path +from typing import Any + +import numpy as np +from ase.io import Trajectory, read, write +from ase.md import MDLogger, Langevin +from ase.md.langevinbaoab import LangevinBAOAB +from ase.md.nptberendsen import NPTBerendsen +from ase.md.velocitydistribution import MaxwellBoltzmannDistribution, Stationary, ZeroRotation +from ase.optimize import FIRE +from ase.units import fs, bar + + +def total_mass_kg(atoms): + amu_to_kg = 1.66053906660e-27 + return atoms.get_masses().sum() * amu_to_kg + + +def density_g_cm3(atoms): + V_A3 = atoms.get_volume() + V_m3 = V_A3 * 1e-30 + m_kg = total_mass_kg(atoms) + rho_kg_m3 = m_kg / V_m3 + return rho_kg_m3 / 1000.0 + + +def attach_basic_logging(dyn, atoms, md_logfile, log_every, t0): + logger = MDLogger( + dyn, + atoms, + md_logfile, + header=True, + stress=False, + peratom=False, + mode="a", + ) + dyn.attach(logger, interval=log_every) + + def progress(): + step = dyn.get_number_of_steps() + rho = density_g_cm3(atoms) + V = atoms.get_volume() + T = atoms.get_temperature() + elapsed = time.time() - t0 + + print( + f"[step {step:>8}] " + f"T={T:7.2f} K | " + f"V={V:10.2f} A^3 | " + f"rho={rho:7.4f} g/cm^3 | " + f"elapsed={elapsed:6.1f}s" + ) + + dyn.attach(progress, interval=log_every) + + +@contextmanager +def traj_logging(dyn, atoms, workdir, traj_every: int, name="md.traj"): + traj = None + if traj_every and traj_every > 0: + traj = Trajectory(str(workdir/name), "a", atoms) + dyn.attach(traj.write, interval=traj_every) + try: + yield traj + finally: + if traj is not None: + traj.close() + +def run_one_case( + struct_path: Path, + calc: Any, + *, + T_K: float = 298.15, + P_bar: float = 1.0, + dt_fs: float = 0.5, + nvt_stabilise_steps: int = 250, + npt_settle_steps = 7_500, + nvt_thermalise_steps: int = 1_000, + npt_equil_steps: int = 10_000, + npt_prod_steps: int = 25_000, + sample_every: int = 20, + log_every: int = 200, + log_trajectory_every: int=400, + dummy_data=False, + workdir: Path, +) -> float: + """ + Run NPT and return (mean_density, std_density). + + TODO: use lammps? Though I would guess GPU is the bottleneck so it wouldn't matter? + """ + if dummy_data: + return np.random.normal(loc=0.9,scale=0.05,size=npt_prod_steps//sample_every) + atoms = read(struct_path) + atoms.set_pbc(True) + atoms.wrap() + atoms.calc = calc + + # fast pre-relax + opt = FIRE(atoms, logfile=str(workdir / "opt.log")) + opt.run(fmax=0.15) + + # velocities + MaxwellBoltzmannDistribution(atoms, temperature_K=T_K) + Stationary(atoms) + ZeroRotation(atoms) + + dt = dt_fs * fs + t0 = time.time() + + # the used pre-relax is not good enough, do some Langevin NVT steps before starting NPT + dyn = Langevin(atoms, timestep=dt, temperature_K=T_K, friction=0.01) + attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) + with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): + dyn.run(nvt_stabilise_steps) + + # quick Berendsen settle close to target density + ps = 1000 * fs + dyn = NPTBerendsen( + atoms, + timestep=dt, + temperature_K=T_K, + pressure_au=P_bar * bar, + taut=0.07 * ps, + taup=0.4 * ps, + compressibility=4.5e-5, + ) + attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) + with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): + dyn.run(npt_settle_steps) + + # thermalise + MaxwellBoltzmannDistribution(atoms, temperature_K=T_K) + Stationary(atoms) + ZeroRotation(atoms) + dyn = Langevin(atoms, timestep=dt, temperature_K=T_K, friction=0.03) + attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) + with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): + dyn.run(nvt_thermalise_steps) + + # real NPT + dyn = LangevinBAOAB( # MTK + atoms, + timestep=dt, + temperature_K=T_K, + externalstress=P_bar * bar, + T_tau=0.1 * ps, + P_tau=1 * ps, + hydrostatic=True, + rng=0, + ) + attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) + with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): + dyn.run(npt_equil_steps) + + rhos = [] + n_samples = npt_prod_steps // sample_every + for _ in range(n_samples): + dyn.run(sample_every) + rhos.append(density_g_cm3(atoms)) + + # save final structure for debugging/repro + write(workdir / "final.extxyz", atoms) + + return np.array(rhos) From 2b5a1230622c11cbe57a5d6224bce78d7045a375 Mon Sep 17 00:00:00 2001 From: Arn Date: Sat, 7 Feb 2026 22:26:15 +0000 Subject: [PATCH 02/29] add reference --- ml_peg/analysis/liquids/ethanol_water_density/io_tools.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py b/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py index 2d4032d9c..88e662d1d 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py @@ -79,6 +79,8 @@ def read_ref_curve() -> tuple[list[float], list[float]]: """ Load densities given on a uniform weight-fraction grid and convert to mole fraction. + Densities from: + M. Southard and D. Green, Perry’s Chemical Engineers’ Handbook, 9th Edition. McGraw-Hill Education, 2018. Assumes: - 101 evenly spaced points From 7aa82c4ae04f624f4bd2e488e4a30816a3dda32f Mon Sep 17 00:00:00 2001 From: Arn Date: Tue, 10 Feb 2026 11:32:04 +0000 Subject: [PATCH 03/29] split calcs --- .../calc_ethanol_water_density.py | 118 +++++++++--------- 1 file changed, 56 insertions(+), 62 deletions(-) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index 6d105c7ad..7c3d3e7f6 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -6,107 +6,101 @@ import numpy as np import pytest -from ml_peg.calcs.liquids.ethanol_water_density.compositions import BENCH_ROOT, DATA_PATH, load_compositions -from ml_peg.calcs.liquids.ethanol_water_density.fake_data import make_fake_curve, make_fake_density_timeseries +from ml_peg.calcs.liquids.ethanol_water_density.compositions import ( + BENCH_ROOT, + DATA_PATH, + load_compositions, +) +from ml_peg.calcs.liquids.ethanol_water_density.fake_data import ( + make_fake_curve, + make_fake_density_timeseries, +) from ml_peg.calcs.liquids.ethanol_water_density.md_code import run_one_case from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models -# Local paths OUT_PATH = BENCH_ROOT / "outputs" MODELS = load_models(current_models) MODEL_INDEX = {name: i for i, name in enumerate(MODELS)} FAKE_DATA = os.getenv("FAKE_DENSITY_DATA", "") == "1" +# IMPORTANT: create the list once for parametrization +COMPOSITIONS = load_compositions() -@pytest.mark.parametrize("mlip", MODELS.items()) -def test_water_ethanol_density_curve(mlip: tuple[str, Any]) -> None: + +def _case_id(composition) -> str: + # nicer test ids in `pytest -vv` + return f"x={composition.x_ethanol:.2f}" + + +@pytest.mark.parametrize("mlip", MODELS.items(), ids=[n for n in MODELS.keys()]) +@pytest.mark.parametrize("composition", COMPOSITIONS, ids=[_case_id(c) for c in COMPOSITIONS]) +def test_water_ethanol_density_curve(mlip: tuple[str, Any], composition) -> None: if not FAKE_DATA: - water_ethanol_density_curve(mlip) + water_ethanol_density_curve_one_case(mlip, composition) else: - water_ethanol_density_dummy_data(mlip) + water_ethanol_density_dummy_data_one_case(mlip, composition) -def water_ethanol_density_curve(mlip: tuple[str, Any]) -> None: - """ - Run water–ethanol density curve benchmark for a single MLIP. - Writes: - - per-composition density time series (raw data) - - a summary CSV derived from those time series - """ +def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: model_name, model = mlip # TODO: dispersion ??? - cases = load_compositions() - # Where this model writes outputs model_out = OUT_PATH / model_name model_out.mkdir(parents=True, exist_ok=True) - # Get calculator calc = model.get_calculator() - for case in cases: - struct_path = DATA_PATH / case.filename - if not struct_path.exists(): - raise FileNotFoundError( - f"Missing structure for x={case.x_ethanol}: {struct_path}" - ) - - case_dir = model_out / f"x_ethanol_{case.x_ethanol:.2f}" - case_dir.mkdir(parents=True, exist_ok=True) - - # --- run simulation --- - rho_series = run_one_case( - struct_path, - calc, - workdir=case_dir, - ) - - # --- write raw density time series --- - ts_path = case_dir / "density_timeseries.csv" - with ts_path.open("w", newline="") as f: - w = csv.writer(f) - w.writerow(["step", "rho_g_cm3"]) - for i, rho in enumerate(rho_series): - w.writerow([i, f"{rho:.8f}"]) - - -def water_ethanol_density_dummy_data(mlip: tuple[str, Any]) -> None: + struct_path = DATA_PATH / case.filename + if not struct_path.exists(): + raise FileNotFoundError(f"Missing structure for x={case.x_ethanol}: {struct_path}") + + case_dir = model_out / f"x_ethanol_{case.x_ethanol:.2f}" + case_dir.mkdir(parents=True, exist_ok=True) + + rho_series = run_one_case(struct_path, calc, workdir=case_dir) + + ts_path = case_dir / "density_timeseries.csv" + with ts_path.open("w", newline="") as f: + w = csv.writer(f) + w.writerow(["step", "rho_g_cm3"]) + for i, rho in enumerate(rho_series): + w.writerow([i, f"{rho:.8f}"]) + + +def water_ethanol_density_dummy_data_one_case(mlip: tuple[str, Any], case) -> None: model_name, model = mlip - cases = load_compositions() model_out = OUT_PATH / model_name model_out.mkdir(parents=True, exist_ok=True) - # one curve per model model_kind = MODEL_INDEX[model_name] % 4 xs_curve, ys_curve = make_fake_curve(model_kind, seed=MODEL_INDEX[model_name]) xs_curve = np.asarray(xs_curve, dtype=float) ys_curve = np.asarray(ys_curve, dtype=float) - for case in cases: - case_dir = model_out / f"x_ethanol_{case.x_ethanol:.2f}" - case_dir.mkdir(parents=True, exist_ok=True) + case_dir = model_out / f"x_ethanol_{case.x_ethanol:.2f}" + case_dir.mkdir(parents=True, exist_ok=True) - rho_eq = float(np.interp(case.x_ethanol, xs_curve, ys_curve)) - n_steps = 200 # fixed for dummy data + rho_eq = float(np.interp(case.x_ethanol, xs_curve, ys_curve)) + n_steps = 200 # fixed for dummy data - seed = (hash(model_name) ^ hash(round(case.x_ethanol, 4))) & 0xFFFFFFFF - rho_series = make_fake_density_timeseries( - rho_eq, n_steps, seed=seed, start_offset=0.01, tau=0.10, noise_sigma=0.0005 - ) + seed = (hash(model_name) ^ hash(round(case.x_ethanol, 4))) & 0xFFFFFFFF + rho_series = make_fake_density_timeseries( + rho_eq, n_steps, seed=seed, start_offset=0.01, tau=0.10, noise_sigma=0.0005 + ) - ts_path = case_dir / "density_timeseries.csv" - with ts_path.open("w", newline="") as f: - w = csv.writer(f) - w.writerow(["step", "rho_g_cm3"]) - for i, rho in enumerate(rho_series): - w.writerow([i, f"{rho:.8f}"]) + ts_path = case_dir / "density_timeseries.csv" + with ts_path.open("w", newline="") as f: + w = csv.writer(f) + w.writerow(["step", "rho_g_cm3"]) + for i, rho in enumerate(rho_series): + w.writerow([i, f"{rho:.8f}"]) if __name__ == "__main__": # TODO: delete this # run a very small simulation to see if it does something reasonable from mace.calculators import mace_mp calc = mace_mp("data_old/mace-omat-0-small.model") - rho = run_one_case("data/mix_xe_0.10.extxyz", calc, nvt_stabilise_steps=250, npt_settle_steps=1000, nvt_thermalise_steps=250, npt_equil_steps=1000, npt_prod_steps=1000, log_every=50, workdir=Path("debug")) + rho = run_one_case("data/mix_xe_0.00.extxyz", calc, nvt_stabilise_steps=3000, npt_settle_steps=1000, nvt_thermalise_steps=250, npt_equil_steps=1000, npt_prod_steps=1000, log_every=50, workdir=Path("debug")) print(rho) \ No newline at end of file From 7140c3c267575892a5bdd7d5e305df5c0af5da84 Mon Sep 17 00:00:00 2001 From: Arn Date: Tue, 10 Feb 2026 12:07:45 +0000 Subject: [PATCH 04/29] Better checkpointing --- .../calc_ethanol_water_density.py | 15 +-- .../liquids/ethanol_water_density/io_tools.py | 120 ++++++++++++++++++ .../liquids/ethanol_water_density/md_code.py | 30 +++-- 3 files changed, 144 insertions(+), 21 deletions(-) create mode 100644 ml_peg/calcs/liquids/ethanol_water_density/io_tools.py diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index 7c3d3e7f6..1b0574554 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -15,6 +15,7 @@ make_fake_curve, make_fake_density_timeseries, ) +from ml_peg.calcs.liquids.ethanol_water_density.io_tools import write_density_timeseries_checkpointed from ml_peg.calcs.liquids.ethanol_water_density.md_code import run_one_case from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models @@ -61,11 +62,7 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: rho_series = run_one_case(struct_path, calc, workdir=case_dir) ts_path = case_dir / "density_timeseries.csv" - with ts_path.open("w", newline="") as f: - w = csv.writer(f) - w.writerow(["step", "rho_g_cm3"]) - for i, rho in enumerate(rho_series): - w.writerow([i, f"{rho:.8f}"]) + write_density_timeseries_checkpointed(ts_path, rho_series) def water_ethanol_density_dummy_data_one_case(mlip: tuple[str, Any], case) -> None: @@ -91,16 +88,12 @@ def water_ethanol_density_dummy_data_one_case(mlip: tuple[str, Any], case) -> No ) ts_path = case_dir / "density_timeseries.csv" - with ts_path.open("w", newline="") as f: - w = csv.writer(f) - w.writerow(["step", "rho_g_cm3"]) - for i, rho in enumerate(rho_series): - w.writerow([i, f"{rho:.8f}"]) + write_density_timeseries_checkpointed(ts_path, rho_series) if __name__ == "__main__": # TODO: delete this # run a very small simulation to see if it does something reasonable from mace.calculators import mace_mp calc = mace_mp("data_old/mace-omat-0-small.model") - rho = run_one_case("data/mix_xe_0.00.extxyz", calc, nvt_stabilise_steps=3000, npt_settle_steps=1000, nvt_thermalise_steps=250, npt_equil_steps=1000, npt_prod_steps=1000, log_every=50, workdir=Path("debug")) + rho = run_one_case("data/mix_xe_0.00.extxyz", calc, nvt_stabilise_steps=200, npt_settle_steps=1000, nvt_thermalise_steps=250, npt_equil_steps=1000, npt_prod_steps=1000, log_every=50, workdir=Path("debug")) print(rho) \ No newline at end of file diff --git a/ml_peg/calcs/liquids/ethanol_water_density/io_tools.py b/ml_peg/calcs/liquids/ethanol_water_density/io_tools.py new file mode 100644 index 000000000..e3a03a4c4 --- /dev/null +++ b/ml_peg/calcs/liquids/ethanol_water_density/io_tools.py @@ -0,0 +1,120 @@ +import csv +from pathlib import Path +from typing import Iterable + + +def write_density_timeseries_checkpointed( + ts_path: Path, + rho_series: Iterable[float], + *, + min_match_fraction: float = 0.8, +) -> None: + """ + Write density_timeseries.csv with checkpoint validation. + + Behavior + -------- + If file exists: + - read existing values + - verify >= min_match_fraction already match rho_series + - overwrite anyway + - raise AssertionError if insufficient match + + If file does not exist: + - just write + + Helps detect broken resume logic while still allowing overwrite. + """ + rho_series = list(rho_series) + + # ------------------------- + # 1) Validate existing file + # ------------------------- + if ts_path.exists(): + old_vals: list[float] = [] + + try: + with ts_path.open() as f: + r = csv.reader(f) + next(r, None) # header + for row in r: + if len(row) >= 2: + old_vals.append(float(row[1])) + except Exception: + old_vals = [] + + if old_vals: + n = min(len(old_vals), len(rho_series)) + + matches = sum( + abs(old_vals[i] - rho_series[i]) < 1e-6 + for i in range(n) + ) + + frac = matches / n if n else 0.0 + + if frac < min_match_fraction: + raise AssertionError( + f"{ts_path}: only {frac:.1%} of checkpoint values match " + f"(expected ≥ {min_match_fraction:.0%}). " + "run_one_case resume likely broken." + ) + + # ------------------------- + # 2) Always rewrite file + # ------------------------- + with ts_path.open("w", newline="") as f: + w = csv.writer(f) + w.writerow(["step", "rho_g_cm3"]) + for i, rho in enumerate(rho_series): + w.writerow([i, f"{rho:.8f}"]) + + + + + +class DensityTimeseriesLogger: + """ + Streaming CSV logger for density time series. + + - deletes existing file on start (optional) + - writes header once + - append rows as simulation runs + - flushes every write (crash-safe) + - usable as context manager + """ + + def __init__(self, path: Path, *, overwrite: bool = True): + self.path = Path(path) + self.overwrite = overwrite + self._f = None + self._writer = None + self._step = 0 + + # --------------------- + # context manager API + # --------------------- + def __enter__(self): + if self.overwrite and self.path.exists(): + self.path.unlink() + + self._f = self.path.open("w", newline="") + self._writer = csv.writer(self._f) + + self._writer.writerow(["step", "rho_g_cm3"]) + self._f.flush() + + return self + + def __exit__(self, exc_type, exc, tb): + if self._f: + self._f.close() + + # --------------------- + # logging + # --------------------- + def write(self, rho: float): + """Write one density value.""" + self._writer.writerow([self._step, f"{rho:.8f}"]) + self._f.flush() # critical for crash safety + self._step += 1 diff --git a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py index a7adc65c0..c8c2ba280 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py @@ -1,7 +1,7 @@ import time from contextlib import contextmanager from pathlib import Path -from typing import Any +from typing import Any, Iterable import numpy as np from ase.io import Trajectory, read, write @@ -12,6 +12,8 @@ from ase.optimize import FIRE from ase.units import fs, bar +from ml_peg.calcs.liquids.ethanol_water_density.io_tools import DensityTimeseriesLogger + def total_mass_kg(atoms): amu_to_kg = 1.66053906660e-27 @@ -75,7 +77,7 @@ def run_one_case( T_K: float = 298.15, P_bar: float = 1.0, dt_fs: float = 0.5, - nvt_stabilise_steps: int = 250, + nvt_stabilise_steps: int = 4_000, npt_settle_steps = 7_500, nvt_thermalise_steps: int = 1_000, npt_equil_steps: int = 10_000, @@ -85,14 +87,19 @@ def run_one_case( log_trajectory_every: int=400, dummy_data=False, workdir: Path, -) -> float: +) -> Iterable[float]: """ Run NPT and return (mean_density, std_density). TODO: use lammps? Though I would guess GPU is the bottleneck so it wouldn't matter? """ + ts_path = workdir / "density_timeseries.csv" if dummy_data: - return np.random.normal(loc=0.9,scale=0.05,size=npt_prod_steps//sample_every) + rho_series = np.random.normal(loc=0.9,scale=0.05,size=npt_prod_steps//sample_every) + with DensityTimeseriesLogger(ts_path) as density_log: + for rho in rho_series: + density_log.write(rho) + return rho_series atoms = read(struct_path) atoms.set_pbc(True) atoms.wrap() @@ -111,7 +118,7 @@ def run_one_case( t0 = time.time() # the used pre-relax is not good enough, do some Langevin NVT steps before starting NPT - dyn = Langevin(atoms, timestep=dt, temperature_K=T_K, friction=0.01) + dyn = Langevin(atoms, timestep=dt, temperature_K=T_K, friction=0.02) attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): dyn.run(nvt_stabilise_steps) @@ -155,11 +162,14 @@ def run_one_case( with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): dyn.run(npt_equil_steps) - rhos = [] - n_samples = npt_prod_steps // sample_every - for _ in range(n_samples): - dyn.run(sample_every) - rhos.append(density_g_cm3(atoms)) + rhos = [] + n_samples = npt_prod_steps // sample_every + with DensityTimeseriesLogger(ts_path) as density_log: + for _ in range(n_samples): + dyn.run(sample_every) + rho = density_g_cm3(atoms) + rhos.append(rho) + density_log.write(rho) # save final structure for debugging/repro write(workdir / "final.extxyz", atoms) From a2db61a4abe787ca8ec9fb1de77d8d99aeadec49 Mon Sep 17 00:00:00 2001 From: Arn Date: Tue, 10 Feb 2026 16:00:22 +0000 Subject: [PATCH 05/29] linting and ruff --- .../{analysis.py => _analysis.py} | 64 ++++++++------- .../{io_tools.py => _io_tools.py} | 25 ++++-- .../analyse_ethanol_water_density.py | 82 +++++++++++++------ .../app_ethanol_water_density.py | 15 ++-- .../{compositions.py => _compositions.py} | 6 ++ .../{fake_data.py => _fake_data.py} | 66 ++++++++------- .../{io_tools.py => _io_tools.py} | 16 ++-- .../calc_ethanol_water_density.py | 37 +++++++-- .../liquids/ethanol_water_density/md_code.py | 73 ++++++++++------- 9 files changed, 242 insertions(+), 142 deletions(-) rename ml_peg/analysis/liquids/ethanol_water_density/{analysis.py => _analysis.py} (65%) rename ml_peg/analysis/liquids/ethanol_water_density/{io_tools.py => _io_tools.py} (85%) rename ml_peg/calcs/liquids/ethanol_water_density/{compositions.py => _compositions.py} (89%) rename ml_peg/calcs/liquids/ethanol_water_density/{fake_data.py => _fake_data.py} (74%) rename ml_peg/calcs/liquids/ethanol_water_density/{io_tools.py => _io_tools.py} (92%) diff --git a/ml_peg/analysis/liquids/ethanol_water_density/analysis.py b/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py similarity index 65% rename from ml_peg/analysis/liquids/ethanol_water_density/analysis.py rename to ml_peg/analysis/liquids/ethanol_water_density/_analysis.py index c96c2aaf1..bcc5fbb55 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/analysis.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py @@ -1,30 +1,38 @@ +"""analyse ethanol-water density curves.""" + +from __future__ import annotations + import numpy as np -M_WATER = 18.01528 # g/mol -M_ETOH = 46.06844 # g/mol +M_WATER = 18.01528 # g/mol +M_ETOH = 46.06844 # g/mol # Pick densities consistent with your reference conditions (T,P!) # If your ref curve is at ~20°C, these are around: -RHO_WATER_PURE = 0.9982 # g/cm^3 -RHO_ETH_PURE = 0.7893 # g/cm^3 - -def x_to_phi_ethanol(x, rho_mix, - *, M_eth=M_ETOH, M_water=M_WATER, - rho_eth=RHO_ETH_PURE, rho_water=RHO_WATER_PURE): - """ - Convert ethanol mole fraction x to ethanol volume fraction phi using - mixture density rho_mix and pure-component densities as volume proxies. - """ +RHO_WATER_PURE = 0.9982 # g/cm^3 +RHO_ETH_PURE = 0.7893 # g/cm^3 + + +def x_to_phi_ethanol( + x, + rho_mix, + *, + m_eth=M_ETOH, + m_water=M_WATER, + rho_eth=RHO_ETH_PURE, + rho_water=RHO_WATER_PURE, +): # TODO: double check formula + """Convert ethanol mole fraction x to ethanol volume fraction phi.""" x = np.asarray(x, dtype=float) rho_mix = np.asarray(rho_mix, dtype=float) - m_eth = x * M_eth - m_wat = (1.0 - x) * M_water + m_eth = x * m_eth + m_wat = (1.0 - x) * m_water + + v_mix = (m_eth + m_wat) / rho_mix # cm^3 per "1 mol mixture basis" + v_eth = m_eth / rho_eth # cm^3 (proxy) + return v_eth / v_mix - V_mix = (m_eth + m_wat) / rho_mix # cm^3 per "1 mol mixture basis" - V_eth = m_eth / rho_eth # cm^3 (proxy) - phi = V_eth / V_mix - return phi def weight_to_mole_fraction(w): """ @@ -44,17 +52,19 @@ def _rmse(a: np.ndarray, b: np.ndarray) -> float: def _interp_1d(x_src: np.ndarray, y_src: np.ndarray, x_tgt: np.ndarray) -> np.ndarray: """ - Linear interpolation. Requires x_tgt within [min(x_src), max(x_src)]. + Linear interpolation. + + Requires x_tgt within [min(x_src), max(x_src)]. """ if np.any(x_tgt < x_src.min() - 1e-12) or np.any(x_tgt > x_src.max() + 1e-12): raise ValueError("Target x values fall outside reference interpolation range.") return np.interp(x_tgt, x_src, y_src) -def _endpoints_at_0_1(x: np.ndarray, y: np.ndarray, tol: float = 1e-8) -> tuple[float, float]: - """ - Return y(x=0) and y(x=1). Requires that x includes (approximately) 0 and 1. - """ +def _endpoints_at_0_1( + x: np.ndarray, y: np.ndarray, tol: float = 1e-8 +) -> tuple[float, float]: + """Return y(x=0) and y(x=1). Requires that x includes (approximately) 0 and 1.""" i0 = np.where(np.isclose(x, 0.0, atol=tol))[0] i1 = np.where(np.isclose(x, 1.0, atol=tol))[0] if len(i0) != 1 or len(i1) != 1: @@ -67,9 +77,7 @@ def _linear_baseline(x: np.ndarray, y0: float, y1: float) -> np.ndarray: def _excess_curve(x: np.ndarray, y: np.ndarray) -> np.ndarray: - """ - Excess relative to the dataset's own pure endpoints (x=0 and x=1). - """ + """Excess relative to the dataset's own pure endpoints (x=0 and x=1).""" y0, y1 = _endpoints_at_0_1(x, y) return y - _linear_baseline(x, y0, y1) @@ -77,6 +85,7 @@ def _excess_curve(x: np.ndarray, y: np.ndarray) -> np.ndarray: def _peak_x_quadratic(x: np.ndarray, y: np.ndarray) -> float: """ Estimate x position of minimum y. + - If min is interior and we have neighbors, fit quadratic through 3 points. - Otherwise fall back to argmin x. """ @@ -99,5 +108,4 @@ def _peak_x_quadratic(x: np.ndarray, y: np.ndarray) -> float: xv = -b / (2.0 * a) # Clamp to local bracket so we don't get silly extrapolation - xv = float(np.clip(xv, xs.min(), xs.max())) - return xv + return float(np.clip(xv, xs.min(), xs.max())) diff --git a/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py b/ml_peg/analysis/liquids/ethanol_water_density/_io_tools.py similarity index 85% rename from ml_peg/analysis/liquids/ethanol_water_density/io_tools.py rename to ml_peg/analysis/liquids/ethanol_water_density/_io_tools.py index 88e662d1d..4015f0fec 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/_io_tools.py @@ -1,11 +1,17 @@ +"""i/o tools for analysis of ethanol-water densities.""" + +from __future__ import annotations + import csv import os from pathlib import Path -import numpy as np from matplotlib import pyplot as plt +import numpy as np -from ml_peg.analysis.liquids.ethanol_water_density.analysis import weight_to_mole_fraction +from ml_peg.analysis.liquids.ethanol_water_density.analysis import ( + weight_to_mole_fraction, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT @@ -13,7 +19,7 @@ BENCHMARK = "ethanol_water_density" CALC_PATH = CALCS_ROOT / CATEGORY / BENCHMARK / "outputs" OUT_PATH = APP_ROOT / "data" / CATEGORY / BENCHMARK -DATA_PATH = CALCS_ROOT / CATEGORY / BENCHMARK / "data" +DATA_PATH = CALCS_ROOT / CATEGORY / BENCHMARK / "data" def _debug_plot_enabled() -> bool: @@ -70,17 +76,22 @@ def _read_model_curve(model_name: str) -> tuple[list[float], list[float]]: ax.set_xlabel("step") ax.set_ylabel("rho / g cm$^{-3}$") - _savefig(fig, OUT_PATH / "debug" / model_name / f"x_{x_ethanol:.2f}_timeseries.svg") + _savefig( + fig, + OUT_PATH / "debug" / model_name / f"x_{x_ethanol:.2f}_timeseries.svg", + ) return xs, rhos def read_ref_curve() -> tuple[list[float], list[float]]: """ - Load densities given on a uniform weight-fraction grid - and convert to mole fraction. + Load densities given on a uniform weight-fraction grid. + + And convert to mole fraction. Densities from: - M. Southard and D. Green, Perry’s Chemical Engineers’ Handbook, 9th Edition. McGraw-Hill Education, 2018. + M. Southard and D. Green, Perry’s Chemical Engineers’ Handbook, + 9th Edition. McGraw-Hill Education, 2018. Assumes: - 101 evenly spaced points diff --git a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py index 6c8c1a0b1..3827ec510 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py @@ -1,20 +1,33 @@ +"""analyse ethanol-water density curves.""" + # TODO: remove hardcoded things? +from __future__ import annotations + from pathlib import Path -import numpy as np import matplotlib.pyplot as plt +import numpy as np import pytest -from ml_peg.analysis.liquids.ethanol_water_density.analysis import _rmse, _interp_1d, \ - _excess_curve, _peak_x_quadratic, x_to_phi_ethanol -from ml_peg.analysis.liquids.ethanol_water_density.io_tools import OUT_PATH, _debug_plot_enabled, _savefig, \ - _read_model_curve, read_ref_curve +from ml_peg.analysis.liquids.ethanol_water_density.analysis import ( + _excess_curve, + _interp_1d, + _peak_x_quadratic, + _rmse, + x_to_phi_ethanol, +) +from ml_peg.analysis.liquids.ethanol_water_density.io_tools import ( + OUT_PATH, + _debug_plot_enabled, + _read_model_curve, + _savefig, + read_ref_curve, +) from ml_peg.analysis.utils.decorators import build_table, plot_parity from ml_peg.analysis.utils.utils import load_metrics_config from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models - MODELS = get_model_names(current_models) MODEL_INDEX = {name: i for i, name in enumerate(MODELS)} # duplicate in calc @@ -29,6 +42,7 @@ @pytest.fixture(scope="session") def ref_curve() -> tuple[np.ndarray, np.ndarray]: + """Return reference density curve.""" x_ref, rho_ref = read_ref_curve() x = np.asarray(x_ref, dtype=float) rho = np.asarray(rho_ref, dtype=float) @@ -40,6 +54,7 @@ def ref_curve() -> tuple[np.ndarray, np.ndarray]: @pytest.fixture def model_curves() -> dict[str, tuple[np.ndarray, np.ndarray]]: + """Return simulated density curves.""" curves: dict[str, tuple[np.ndarray, np.ndarray]] = {} for model_name in MODELS: xs, rhos = _read_model_curve(model_name) @@ -57,11 +72,12 @@ def model_curves() -> dict[str, tuple[np.ndarray, np.ndarray]]: title="Ethanol–water density (293.15 K)", x_label="Reference density / g cm⁻³", y_label="Predicted density / g cm⁻³", - #hoverdata={ + # hoverdata={ # "x_ethanol": [], # filled in fixture - #}, + # }, ) # TODO: read docs!!! doesn't seem to work yet. def densities_parity(ref_curve, model_curves) -> dict[str, list]: + """Parity plot of simulated and reference density.""" x_ref, rho_ref = ref_curve # Use the first model's x grid for hover labels (parity requires same-length lists) @@ -84,15 +100,14 @@ def densities_parity(ref_curve, model_curves) -> dict[str, list]: rho_m_on_grid = rho_m results[m] = list(rho_m_on_grid) - ## Patch hoverdata list in-place (decorator reads the dict) - ## NOTE: if your decorator captures hoverdata at decoration time, - ## switch to hoverdata={"x_ethanol": x_labels()} fixture pattern like the docs. - #densities_parity.__wrapped__.__dict__.setdefault("hoverdata", {})["x_ethanol"] = list(x_grid) - return results + @pytest.fixture -def debug_curve_plots(ref_curve, model_curves) -> None: # TODO should I remove or use a different format? +def debug_curve_plots( + ref_curve, model_curves +) -> None: # TODO should I remove or use a different format? + """Plot density curves.""" if not _debug_plot_enabled(): return print("plotting curves") @@ -120,7 +135,9 @@ def debug_curve_plots(ref_curve, model_curves) -> None: # TODO should I remove fig, ax = plt.subplots() ax.plot(x_ref, _excess_curve(x_ref, rho_ref), label="ref (dense)") ax.plot(x_m, _excess_curve(x_m, rho_m), marker="o", label=f"{m} (model)") - ax.plot(x_m, _excess_curve(x_m, rho_ref_m), marker="x", label="ref on model grid") + ax.plot( + x_m, _excess_curve(x_m, rho_ref_m), marker="x", label="ref on model grid" + ) ax.set_title(f"Density curve: {m}") ax.set_xlabel("x_ethanol") ax.set_ylabel("rho / g cm$^{-3}$") @@ -131,7 +148,7 @@ def debug_curve_plots(ref_curve, model_curves) -> None: # TODO should I remove # volume fraction plot phi_ref = x_to_phi_ethanol(x_ref, rho_ref) - phi_m = x_to_phi_ethanol(x_m, rho_m) + phi_m = x_to_phi_ethanol(x_m, rho_m) fig, ax = plt.subplots() ax.plot(phi_ref, rho_ref, label="ref (dense)") @@ -150,6 +167,7 @@ def debug_curve_plots(ref_curve, model_curves) -> None: # TODO should I remove @pytest.fixture def rmse_density(ref_curve, model_curves) -> dict[str, float]: + """RMSE of the density vs reference density.""" x_ref, rho_ref = ref_curve out: dict[str, float] = {} for m, (x_m, rho_m) in model_curves.items(): @@ -160,9 +178,7 @@ def rmse_density(ref_curve, model_curves) -> dict[str, float]: @pytest.fixture def rmse_excess_density(ref_curve, model_curves) -> dict[str, float]: - """ - RMSE of excess density (detrended by each dataset's own pure endpoints). - """ + """RMSE of excess density (detrended by each dataset's own pure endpoints).""" x_ref, rho_ref = ref_curve out: dict[str, float] = {} @@ -198,19 +214,23 @@ def peak_x_error(ref_curve, model_curves) -> dict[str, float]: return out + # ----------------------------------------------------------------------------- # Table # ----------------------------------------------------------------------------- + @pytest.fixture @build_table( thresholds=DEFAULT_THRESHOLDS, filename=OUT_PATH / "density_metrics_table.json", metric_tooltips={ "Model": "Name of the model", - "RMSE density": "RMSE between model and reference density at model compositions (g cm⁻³).", + "RMSE density": "RMSE between model and reference density" + "at model compositions (g cm⁻³).", "RMSE excess density": ( - "RMSE after subtracting each curve’s linear baseline between pure endpoints (g cm⁻³)." + "RMSE after subtracting each curve’s linear baseline" + "between pure endpoints (g cm⁻³)." ), "Peak x error": ( "Absolute difference in mole-fraction location of maximum excess density." @@ -222,6 +242,7 @@ def metrics( rmse_excess_density: dict[str, float], peak_x_error: dict[str, float], ) -> dict[str, dict]: + """Return metric data.""" return { "RMSE density": rmse_density, "RMSE excess density": rmse_excess_density, @@ -229,10 +250,17 @@ def metrics( } -def test_ethanol_water_density(metrics: dict[str, dict], densities_parity: dict[str, list], debug_curve_plots) -> None: - """ - Launch analysis (decorators handle writing JSON artifacts for the app). - """ - print(MODEL_INDEX) # TODO: these print statements may be useful for debugging, but should I remove? - print({key0:{MODEL_INDEX[name]: value for name, value in value0.items()} for key0, value0 in metrics.items()}) +def test_ethanol_water_density( + metrics: dict[str, dict], densities_parity: dict[str, list], debug_curve_plots +) -> None: + """Launch analysis.""" + print( + MODEL_INDEX + ) # TODO: these print statements may be useful for debugging, but should I remove? + print( + { + key0: {MODEL_INDEX[name]: value for name, value in value0.items()} + for key0, value0 in metrics.items() + } + ) return diff --git a/ml_peg/app/liquids/ethanol_water_density/app_ethanol_water_density.py b/ml_peg/app/liquids/ethanol_water_density/app_ethanol_water_density.py index 2ded1d118..cb4e3c400 100644 --- a/ml_peg/app/liquids/ethanol_water_density/app_ethanol_water_density.py +++ b/ml_peg/app/liquids/ethanol_water_density/app_ethanol_water_density.py @@ -1,6 +1,9 @@ -#TODO: This does not work. Fix this +# TODO: This does not work. Fix this """Run ethanol–water density (decomposition curves) app.""" + +from __future__ import annotations + from dash import Dash from dash.html import Div @@ -16,9 +19,7 @@ CATEGORY = "liquids" BENCHMARK_NAME = "ethanol_water_density" -DOCS_URL = ( - "https://ddmms.github.io/ml-peg/user_guide/benchmarks/" # TODO: update to the right anchor -) +DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/" DATA_PATH = APP_ROOT / "data" / CATEGORY / BENCHMARK_NAME @@ -28,11 +29,11 @@ class EthanolWaterDecompositionCurvesApp(BaseApp): def register_callbacks(self) -> None: """Register callbacks to app.""" - parity = read_plot(DATA_PATH / "density_parity.json", id=f"{BENCHMARK_NAME}-figure") + parity = read_plot( + DATA_PATH / "density_parity.json", id=f"{BENCHMARK_NAME}-figure" + ) # When the user clicks a metric column in the table, show the parity plot. - # (This mirrors the GMTKN55 pattern: different columns can map to different plots; - # here they all map to the same parity plot artifact.) plot_from_table_column( table_id=self.table_id, plot_id=f"{BENCHMARK_NAME}-figure-placeholder", diff --git a/ml_peg/calcs/liquids/ethanol_water_density/compositions.py b/ml_peg/calcs/liquids/ethanol_water_density/_compositions.py similarity index 89% rename from ml_peg/calcs/liquids/ethanol_water_density/compositions.py rename to ml_peg/calcs/liquids/ethanol_water_density/_compositions.py index 0c47100bc..bac4af789 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/compositions.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/_compositions.py @@ -1,3 +1,7 @@ +"""Load composition data.""" + +from __future__ import annotations + import csv from dataclasses import dataclass from pathlib import Path @@ -8,6 +12,8 @@ @dataclass(frozen=True) class CompositionCase: + """Map composition to file.""" + x_ethanol: float filename: str diff --git a/ml_peg/calcs/liquids/ethanol_water_density/fake_data.py b/ml_peg/calcs/liquids/ethanol_water_density/_fake_data.py similarity index 74% rename from ml_peg/calcs/liquids/ethanol_water_density/fake_data.py rename to ml_peg/calcs/liquids/ethanol_water_density/_fake_data.py index 8fddcc073..0b28195e0 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/fake_data.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/_fake_data.py @@ -1,46 +1,49 @@ -# for debugging, to verify that metrics actually do something reasonable +"""for debugging, to verify that metrics actually do something reasonable.""" + +from __future__ import annotations from dataclasses import dataclass + import numpy as np from ml_peg.analysis.liquids.ethanol_water_density.io_tools import read_ref_curve + @dataclass(frozen=True) class FakeCurveParams: + """Class for fake curve parameters.""" + # Master knob: 0 -> perfect match, 1 -> very poor severity: float = 0.0 # Individual error components (interpreted as "max at severity=1") - bias: float = 0.0 # additive offset in y-units - scale: float = 0.0 # multiplicative: y *= (1 + scale*...) - tilt: float = 0.0 # linear-in-x additive distortion - warp: float = 0.0 # smooth nonlinear additive distortion + bias: float = 0.0 # additive offset in y-units + scale: float = 0.0 # multiplicative: y *= (1 + scale*...) + tilt: float = 0.0 # linear-in-x additive distortion + warp: float = 0.0 # smooth nonlinear additive distortion - noise_sigma: float = 0.0 # iid gaussian noise in y-units - corr_len: float = 0.0 # if >0, adds correlated noise along x + noise_sigma: float = 0.0 # iid gaussian noise in y-units + corr_len: float = 0.0 # if >0, adds correlated noise along x - bump_amp: float = 0.0 # amplitude of local bump(s) - bump_center: float = 0.5 # x location of bump - bump_width: float = 0.08 # bump width (in x units) + bump_amp: float = 0.0 # amplitude of local bump(s) + bump_center: float = 0.5 # x location of bump + bump_width: float = 0.08 # bump width (in x units) -def _smooth_random_field(xs: np.ndarray, corr_len: float, rng: np.random.Generator) -> np.ndarray: - """ - Create a zero-mean, ~unit-std smooth random field along xs using - a Gaussian kernel in x-distance. O(N^2) but tiny N here (6 points). - """ +def _smooth_random_field( + xs: np.ndarray, corr_len: float, rng: np.random.Generator +) -> np.ndarray: + """Create a zero-mean, ~unit-std smooth random field along xs.""" if corr_len <= 0: return np.zeros_like(xs) dx = xs[:, None] - xs[None, :] - K = np.exp(-0.5 * (dx / corr_len) ** 2) + k = np.exp(-0.5 * (dx / corr_len) ** 2) # sample correlated normal: K^(1/2) z via cholesky (add jitter for stability) - L = np.linalg.cholesky(K + 1e-12 * np.eye(len(xs))) z = rng.standard_normal(len(xs)) - field = L @ z + field = np.linalg.cholesky(k + 1e-12 * np.eye(len(xs))) @ z field = field - field.mean() - field = field / (field.std() + 1e-12) - return field + return field / (field.std() + 1e-12) def make_fake_curve_from_ref( @@ -53,7 +56,6 @@ def make_fake_curve_from_ref( ) -> tuple[list[float], list[float]]: """ Return (xs, ys_fake) using the same xs as the reference. - Designed for density-like curves but works generically. `severity` scales *all* enabled components. For example, if bias=10 and severity=0.2, you get ~2 units of bias (with a tiny randomization). @@ -70,15 +72,16 @@ def make_fake_curve_from_ref( # Small randomization so multiple models with same severity aren’t identical # (but still deterministic for a given seed). - jitter = lambda: (0.85 + 0.30 * rng.random()) + def jitter(): + return 0.85 + 0.30 * rng.random() # 1) multiplicative scale error if params.scale != 0.0 and sev > 0: - y *= (1.0 + (params.scale * sev * jitter())) + y *= 1.0 + (params.scale * sev * jitter()) # 2) additive bias if params.bias != 0.0 and sev > 0: - y += (params.bias * sev * jitter()) + y += params.bias * sev * jitter() # 3) linear tilt (additive) if params.tilt != 0.0 and sev > 0: @@ -87,14 +90,16 @@ def make_fake_curve_from_ref( # 4) smooth nonlinear warp (additive): use a low-order smooth basis if params.warp != 0.0 and sev > 0: # cubic-ish shape distortion with zero mean - w = (xpm**3 - xpm * np.mean(xpm**2)) + w = xpm**3 - xpm * np.mean(xpm**2) w = w - w.mean() w = w / (np.std(w) + 1e-12) y += (params.warp * sev * jitter()) * w # 5) local bump to simulate specific composition failure if params.bump_amp != 0.0 and sev > 0: - bump = np.exp(-0.5 * ((xs - params.bump_center) / (params.bump_width + 1e-12)) ** 2) + bump = np.exp( + -0.5 * ((xs - params.bump_center) / (params.bump_width + 1e-12)) ** 2 + ) bump = bump / (bump.max() + 1e-12) y += (params.bump_amp * sev * jitter()) * bump @@ -115,11 +120,11 @@ def make_fake_curve_from_ref( return xs, y -# Convenience presets: "good", "medium", "bad" def make_fake_curve( - kind: str|int, + kind: str | int, seed: int | None = 0, ) -> tuple[list[float], list[float]]: + """Make a fake density curve based on a reference.""" xs_ref, ys_ref = read_ref_curve() kind = kind.lower().strip() if isinstance(kind, str) else kind @@ -174,15 +179,16 @@ def make_fake_density_timeseries( *, seed: int, start_offset: float = 0.02, # initial deviation from eq - tau: float = 0.15, # relaxation rate (bigger -> faster) + tau: float = 0.15, # relaxation rate (bigger -> faster) noise_sigma: float = 0.001, # per-step noise ) -> list[float]: + """Make a time series of fake density values.""" rng = np.random.default_rng(seed) rho0 = rho_eq + start_offset * (2 * rng.random() - 1) series = [] rho = rho0 - for t in range(n_steps): + for _ in range(n_steps): # exponential-ish relaxation to rho_eq rho += tau * (rho_eq - rho) # add noise diff --git a/ml_peg/calcs/liquids/ethanol_water_density/io_tools.py b/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py similarity index 92% rename from ml_peg/calcs/liquids/ethanol_water_density/io_tools.py rename to ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py index e3a03a4c4..0adfce05b 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/io_tools.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py @@ -1,6 +1,10 @@ +"""i/o tools for calculations.""" + +from __future__ import annotations + +from collections.abc import Iterable import csv from pathlib import Path -from typing import Iterable def write_density_timeseries_checkpointed( @@ -46,10 +50,7 @@ def write_density_timeseries_checkpointed( if old_vals: n = min(len(old_vals), len(rho_series)) - matches = sum( - abs(old_vals[i] - rho_series[i]) < 1e-6 - for i in range(n) - ) + matches = sum(abs(old_vals[i] - rho_series[i]) < 1e-6 for i in range(n)) frac = matches / n if n else 0.0 @@ -70,9 +71,6 @@ def write_density_timeseries_checkpointed( w.writerow([i, f"{rho:.8f}"]) - - - class DensityTimeseriesLogger: """ Streaming CSV logger for density time series. @@ -95,6 +93,7 @@ def __init__(self, path: Path, *, overwrite: bool = True): # context manager API # --------------------- def __enter__(self): + """Open the file.""" if self.overwrite and self.path.exists(): self.path.unlink() @@ -107,6 +106,7 @@ def __enter__(self): return self def __exit__(self, exc_type, exc, tb): + """Close the file.""" if self._f: self._f.close() diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index 1b0574554..928b178fb 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -1,4 +1,7 @@ -import csv +"""calculate ethanol-water density curves.""" + +from __future__ import annotations + import os from pathlib import Path from typing import Any @@ -15,7 +18,9 @@ make_fake_curve, make_fake_density_timeseries, ) -from ml_peg.calcs.liquids.ethanol_water_density.io_tools import write_density_timeseries_checkpointed +from ml_peg.calcs.liquids.ethanol_water_density.io_tools import ( + write_density_timeseries_checkpointed, +) from ml_peg.calcs.liquids.ethanol_water_density.md_code import run_one_case from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models @@ -35,9 +40,12 @@ def _case_id(composition) -> str: return f"x={composition.x_ethanol:.2f}" -@pytest.mark.parametrize("mlip", MODELS.items(), ids=[n for n in MODELS.keys()]) -@pytest.mark.parametrize("composition", COMPOSITIONS, ids=[_case_id(c) for c in COMPOSITIONS]) +@pytest.mark.parametrize("mlip", MODELS.items(), ids=list(MODELS.keys())) +@pytest.mark.parametrize( + "composition", COMPOSITIONS, ids=[_case_id(c) for c in COMPOSITIONS] +) def test_water_ethanol_density_curve(mlip: tuple[str, Any], composition) -> None: + """Either run the md simulation or fake the data.""" if not FAKE_DATA: water_ethanol_density_curve_one_case(mlip, composition) else: @@ -45,6 +53,7 @@ def test_water_ethanol_density_curve(mlip: tuple[str, Any], composition) -> None def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: + """Run an md simulation.""" model_name, model = mlip # TODO: dispersion ??? model_out = OUT_PATH / model_name @@ -54,7 +63,9 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: struct_path = DATA_PATH / case.filename if not struct_path.exists(): - raise FileNotFoundError(f"Missing structure for x={case.x_ethanol}: {struct_path}") + raise FileNotFoundError( + f"Missing structure for x={case.x_ethanol}: {struct_path}" + ) case_dir = model_out / f"x_ethanol_{case.x_ethanol:.2f}" case_dir.mkdir(parents=True, exist_ok=True) @@ -66,6 +77,7 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: def water_ethanol_density_dummy_data_one_case(mlip: tuple[str, Any], case) -> None: + """Generate fake data for debugging instead of running the test.""" model_name, model = mlip model_out = OUT_PATH / model_name @@ -94,6 +106,17 @@ def water_ethanol_density_dummy_data_one_case(mlip: tuple[str, Any], case) -> No if __name__ == "__main__": # TODO: delete this # run a very small simulation to see if it does something reasonable from mace.calculators import mace_mp + calc = mace_mp("data_old/mace-omat-0-small.model") - rho = run_one_case("data/mix_xe_0.00.extxyz", calc, nvt_stabilise_steps=200, npt_settle_steps=1000, nvt_thermalise_steps=250, npt_equil_steps=1000, npt_prod_steps=1000, log_every=50, workdir=Path("debug")) - print(rho) \ No newline at end of file + rho = run_one_case( + "data/mix_xe_0.00.extxyz", + calc, + nvt_stabilise_steps=200, + npt_settle_steps=1000, + nvt_thermalise_steps=250, + npt_equil_steps=1000, + npt_prod_steps=1000, + log_every=50, + workdir=Path("debug"), + ) + print(rho) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py index c8c2ba280..d905b4c59 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py @@ -1,34 +1,46 @@ -import time +"""code for md simulation.""" + +from __future__ import annotations + +from collections.abc import Iterable from contextlib import contextmanager from pathlib import Path -from typing import Any, Iterable +import time +from typing import Any -import numpy as np from ase.io import Trajectory, read, write -from ase.md import MDLogger, Langevin +from ase.md import Langevin, MDLogger from ase.md.langevinbaoab import LangevinBAOAB from ase.md.nptberendsen import NPTBerendsen -from ase.md.velocitydistribution import MaxwellBoltzmannDistribution, Stationary, ZeroRotation +from ase.md.velocitydistribution import ( + MaxwellBoltzmannDistribution, + Stationary, + ZeroRotation, +) from ase.optimize import FIRE -from ase.units import fs, bar +from ase.units import bar, fs +import numpy as np from ml_peg.calcs.liquids.ethanol_water_density.io_tools import DensityTimeseriesLogger def total_mass_kg(atoms): + """Return the mass in kg for ase atoms.""" amu_to_kg = 1.66053906660e-27 return atoms.get_masses().sum() * amu_to_kg def density_g_cm3(atoms): - V_A3 = atoms.get_volume() - V_m3 = V_A3 * 1e-30 + """Return density in g/cm^3.""" + v_a3 = atoms.get_volume() + v_m3 = v_a3 * 1e-30 m_kg = total_mass_kg(atoms) - rho_kg_m3 = m_kg / V_m3 + rho_kg_m3 = m_kg / v_m3 return rho_kg_m3 / 1000.0 def attach_basic_logging(dyn, atoms, md_logfile, log_every, t0): + """Attach a logger to an ase md simulation.""" logger = MDLogger( dyn, atoms, @@ -43,14 +55,14 @@ def attach_basic_logging(dyn, atoms, md_logfile, log_every, t0): def progress(): step = dyn.get_number_of_steps() rho = density_g_cm3(atoms) - V = atoms.get_volume() - T = atoms.get_temperature() + volume = atoms.get_volume() + temperature = atoms.get_temperature() elapsed = time.time() - t0 print( f"[step {step:>8}] " - f"T={T:7.2f} K | " - f"V={V:10.2f} A^3 | " + f"T={temperature:7.2f} K | " + f"V={volume:10.2f} A^3 | " f"rho={rho:7.4f} g/cm^3 | " f"elapsed={elapsed:6.1f}s" ) @@ -60,9 +72,10 @@ def progress(): @contextmanager def traj_logging(dyn, atoms, workdir, traj_every: int, name="md.traj"): + """Context manager for logging trajectory.""" traj = None if traj_every and traj_every > 0: - traj = Trajectory(str(workdir/name), "a", atoms) + traj = Trajectory(str(workdir / name), "a", atoms) dyn.attach(traj.write, interval=traj_every) try: yield traj @@ -70,21 +83,22 @@ def traj_logging(dyn, atoms, workdir, traj_every: int, name="md.traj"): if traj is not None: traj.close() + def run_one_case( struct_path: Path, calc: Any, *, - T_K: float = 298.15, - P_bar: float = 1.0, + temperature: float = 298.15, + p_bar: float = 1.0, dt_fs: float = 0.5, nvt_stabilise_steps: int = 4_000, - npt_settle_steps = 7_500, + npt_settle_steps=7_500, nvt_thermalise_steps: int = 1_000, npt_equil_steps: int = 10_000, npt_prod_steps: int = 25_000, sample_every: int = 20, log_every: int = 200, - log_trajectory_every: int=400, + log_trajectory_every: int = 400, dummy_data=False, workdir: Path, ) -> Iterable[float]: @@ -95,7 +109,9 @@ def run_one_case( """ ts_path = workdir / "density_timeseries.csv" if dummy_data: - rho_series = np.random.normal(loc=0.9,scale=0.05,size=npt_prod_steps//sample_every) + rho_series = np.random.normal( + loc=0.9, scale=0.05, size=npt_prod_steps // sample_every + ) with DensityTimeseriesLogger(ts_path) as density_log: for rho in rho_series: density_log.write(rho) @@ -110,15 +126,16 @@ def run_one_case( opt.run(fmax=0.15) # velocities - MaxwellBoltzmannDistribution(atoms, temperature_K=T_K) + MaxwellBoltzmannDistribution(atoms, temperature_K=temperature) Stationary(atoms) ZeroRotation(atoms) dt = dt_fs * fs t0 = time.time() - # the used pre-relax is not good enough, do some Langevin NVT steps before starting NPT - dyn = Langevin(atoms, timestep=dt, temperature_K=T_K, friction=0.02) + # the used pre-relax is not good enough + # do some Langevin NVT steps before starting NPT + dyn = Langevin(atoms, timestep=dt, temperature_K=temperature, friction=0.02) attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): dyn.run(nvt_stabilise_steps) @@ -128,8 +145,8 @@ def run_one_case( dyn = NPTBerendsen( atoms, timestep=dt, - temperature_K=T_K, - pressure_au=P_bar * bar, + temperature_K=temperature, + pressure_au=p_bar * bar, taut=0.07 * ps, taup=0.4 * ps, compressibility=4.5e-5, @@ -139,10 +156,10 @@ def run_one_case( dyn.run(npt_settle_steps) # thermalise - MaxwellBoltzmannDistribution(atoms, temperature_K=T_K) + MaxwellBoltzmannDistribution(atoms, temperature_K=temperature) Stationary(atoms) ZeroRotation(atoms) - dyn = Langevin(atoms, timestep=dt, temperature_K=T_K, friction=0.03) + dyn = Langevin(atoms, timestep=dt, temperature_K=temperature, friction=0.03) attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): dyn.run(nvt_thermalise_steps) @@ -151,8 +168,8 @@ def run_one_case( dyn = LangevinBAOAB( # MTK atoms, timestep=dt, - temperature_K=T_K, - externalstress=P_bar * bar, + temperature_K=temperature, + externalstress=p_bar * bar, T_tau=0.1 * ps, P_tau=1 * ps, hydrostatic=True, From c079f239c8c2f1f5899070bf2515b97fa1b40c77 Mon Sep 17 00:00:00 2001 From: Arn Date: Tue, 10 Feb 2026 16:46:05 +0000 Subject: [PATCH 06/29] fix imports --- .../analyse_ethanol_water_density.py | 2 +- .../ethanol_water_density/{_io_tools.py => io_tools.py} | 2 +- ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py | 3 ++- .../ethanol_water_density/calc_ethanol_water_density.py | 8 ++++---- ml_peg/calcs/liquids/ethanol_water_density/md_code.py | 2 +- 5 files changed, 9 insertions(+), 8 deletions(-) rename ml_peg/analysis/liquids/ethanol_water_density/{_io_tools.py => io_tools.py} (97%) diff --git a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py index 3827ec510..abf6111ff 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py @@ -9,7 +9,7 @@ import numpy as np import pytest -from ml_peg.analysis.liquids.ethanol_water_density.analysis import ( +from ml_peg.analysis.liquids.ethanol_water_density._analysis import ( _excess_curve, _interp_1d, _peak_x_quadratic, diff --git a/ml_peg/analysis/liquids/ethanol_water_density/_io_tools.py b/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py similarity index 97% rename from ml_peg/analysis/liquids/ethanol_water_density/_io_tools.py rename to ml_peg/analysis/liquids/ethanol_water_density/io_tools.py index 4015f0fec..5d469d6e1 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/_io_tools.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py @@ -9,7 +9,7 @@ from matplotlib import pyplot as plt import numpy as np -from ml_peg.analysis.liquids.ethanol_water_density.analysis import ( +from ml_peg.analysis.liquids.ethanol_water_density._analysis import ( weight_to_mole_fraction, ) from ml_peg.app import APP_ROOT diff --git a/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py b/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py index 0adfce05b..720aff283 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py @@ -12,6 +12,7 @@ def write_density_timeseries_checkpointed( rho_series: Iterable[float], *, min_match_fraction: float = 0.8, + do_not_raise: bool = False, ) -> None: """ Write density_timeseries.csv with checkpoint validation. @@ -54,7 +55,7 @@ def write_density_timeseries_checkpointed( frac = matches / n if n else 0.0 - if frac < min_match_fraction: + if frac < min_match_fraction and not do_not_raise: raise AssertionError( f"{ts_path}: only {frac:.1%} of checkpoint values match " f"(expected ≥ {min_match_fraction:.0%}). " diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index 928b178fb..d5a0e2e16 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -9,16 +9,16 @@ import numpy as np import pytest -from ml_peg.calcs.liquids.ethanol_water_density.compositions import ( +from ml_peg.calcs.liquids.ethanol_water_density._compositions import ( BENCH_ROOT, DATA_PATH, load_compositions, ) -from ml_peg.calcs.liquids.ethanol_water_density.fake_data import ( +from ml_peg.calcs.liquids.ethanol_water_density._fake_data import ( make_fake_curve, make_fake_density_timeseries, ) -from ml_peg.calcs.liquids.ethanol_water_density.io_tools import ( +from ml_peg.calcs.liquids.ethanol_water_density._io_tools import ( write_density_timeseries_checkpointed, ) from ml_peg.calcs.liquids.ethanol_water_density.md_code import run_one_case @@ -100,7 +100,7 @@ def water_ethanol_density_dummy_data_one_case(mlip: tuple[str, Any], case) -> No ) ts_path = case_dir / "density_timeseries.csv" - write_density_timeseries_checkpointed(ts_path, rho_series) + write_density_timeseries_checkpointed(ts_path, rho_series, do_not_raise=True) if __name__ == "__main__": # TODO: delete this diff --git a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py index d905b4c59..1e62b19bc 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py @@ -21,7 +21,7 @@ from ase.units import bar, fs import numpy as np -from ml_peg.calcs.liquids.ethanol_water_density.io_tools import DensityTimeseriesLogger +from ml_peg.calcs.liquids.ethanol_water_density._io_tools import DensityTimeseriesLogger def total_mass_kg(atoms): From af09d974326e64b9205fd88d2c05749dae637661 Mon Sep 17 00:00:00 2001 From: Arn De Moor <86566033+arnon-1@users.noreply.github.com> Date: Tue, 10 Feb 2026 16:58:42 +0000 Subject: [PATCH 07/29] Fix liquids numpydoc issues for pre-commit --- .../ethanol_water_density/_analysis.py | 143 ++++++++++++++++-- .../analyse_ethanol_water_density.py | 137 +++++++++++++++-- .../liquids/ethanol_water_density/io_tools.py | 49 ++++-- .../ethanol_water_density/_compositions.py | 18 ++- .../ethanol_water_density/_fake_data.py | 118 +++++++++++++-- .../ethanol_water_density/_io_tools.py | 95 +++++++++--- .../calc_ethanol_water_density.py | 63 +++++++- .../liquids/ethanol_water_density/md_code.py | 115 +++++++++++++- 8 files changed, 649 insertions(+), 89 deletions(-) diff --git a/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py b/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py index bcc5fbb55..5f292e345 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py @@ -1,4 +1,4 @@ -"""analyse ethanol-water density curves.""" +"""Analyse ethanol-water density curves.""" from __future__ import annotations @@ -22,7 +22,29 @@ def x_to_phi_ethanol( rho_eth=RHO_ETH_PURE, rho_water=RHO_WATER_PURE, ): # TODO: double check formula - """Convert ethanol mole fraction x to ethanol volume fraction phi.""" + """ + Convert ethanol mole fraction to ethanol volume fraction. + + Parameters + ---------- + x : array-like + Ethanol mole fraction. + rho_mix : array-like + Mixture density in g/cm^3 at each composition. + m_eth : float, optional + Ethanol molar mass in g/mol. + m_water : float, optional + Water molar mass in g/mol. + rho_eth : float, optional + Pure ethanol density in g/cm^3. + rho_water : float, optional + Pure water density in g/cm^3. + + Returns + ------- + numpy.ndarray + Ethanol volume fraction for each input composition. + """ x = np.asarray(x, dtype=float) rho_mix = np.asarray(rho_mix, dtype=float) @@ -35,10 +57,18 @@ def x_to_phi_ethanol( def weight_to_mole_fraction(w): - """ - Convert ethanol weight fraction -> mole fraction. - - w = mass_ethanol / total_mass + r""" + Convert ethanol weight fraction to mole fraction. + + Parameters + ---------- + w : array-like + Ethanol weight fraction :math:`m_\mathrm{ethanol} / m_\mathrm{total}`. + + Returns + ------- + numpy.ndarray + Ethanol mole fraction. """ n_e = w / M_ETOH n_w = (1 - w) / M_WATER @@ -46,15 +76,42 @@ def weight_to_mole_fraction(w): def _rmse(a: np.ndarray, b: np.ndarray) -> float: + """ + Compute root-mean-square error between two arrays. + + Parameters + ---------- + a : numpy.ndarray + First array. + b : numpy.ndarray + Second array. + + Returns + ------- + float + Root-mean-square error. + """ d = a - b return float(np.sqrt(np.mean(d * d))) def _interp_1d(x_src: np.ndarray, y_src: np.ndarray, x_tgt: np.ndarray) -> np.ndarray: """ - Linear interpolation. - - Requires x_tgt within [min(x_src), max(x_src)]. + Linearly interpolate onto target x values. + + Parameters + ---------- + x_src : numpy.ndarray + Source x grid. + y_src : numpy.ndarray + Source y values. + x_tgt : numpy.ndarray + Target x positions. + + Returns + ------- + numpy.ndarray + Interpolated y values at ``x_tgt``. """ if np.any(x_tgt < x_src.min() - 1e-12) or np.any(x_tgt > x_src.max() + 1e-12): raise ValueError("Target x values fall outside reference interpolation range.") @@ -64,7 +121,23 @@ def _interp_1d(x_src: np.ndarray, y_src: np.ndarray, x_tgt: np.ndarray) -> np.nd def _endpoints_at_0_1( x: np.ndarray, y: np.ndarray, tol: float = 1e-8 ) -> tuple[float, float]: - """Return y(x=0) and y(x=1). Requires that x includes (approximately) 0 and 1.""" + """ + Return y values at x=0 and x=1. + + Parameters + ---------- + x : numpy.ndarray + Composition grid. + y : numpy.ndarray + Property values. + tol : float, optional + Absolute tolerance used to identify endpoint compositions. + + Returns + ------- + tuple[float, float] + Pair ``(y0, y1)`` for x=0 and x=1. + """ i0 = np.where(np.isclose(x, 0.0, atol=tol))[0] i1 = np.where(np.isclose(x, 1.0, atol=tol))[0] if len(i0) != 1 or len(i1) != 1: @@ -73,21 +146,61 @@ def _endpoints_at_0_1( def _linear_baseline(x: np.ndarray, y0: float, y1: float) -> np.ndarray: + """ + Build the straight line connecting values at x=0 and x=1. + + Parameters + ---------- + x : numpy.ndarray + Composition grid. + y0 : float + Value at x=0. + y1 : float + Value at x=1. + + Returns + ------- + numpy.ndarray + Linear baseline evaluated at ``x``. + """ return y0 + x * (y1 - y0) def _excess_curve(x: np.ndarray, y: np.ndarray) -> np.ndarray: - """Excess relative to the dataset's own pure endpoints (x=0 and x=1).""" + """ + Compute excess curve relative to endpoint linear interpolation. + + Parameters + ---------- + x : numpy.ndarray + Composition grid. + y : numpy.ndarray + Property values. + + Returns + ------- + numpy.ndarray + Excess values ``y - y_linear``. + """ y0, y1 = _endpoints_at_0_1(x, y) return y - _linear_baseline(x, y0, y1) def _peak_x_quadratic(x: np.ndarray, y: np.ndarray) -> float: """ - Estimate x position of minimum y. - - - If min is interior and we have neighbors, fit quadratic through 3 points. - - Otherwise fall back to argmin x. + Estimate x position of the minimum by local quadratic fitting. + + Parameters + ---------- + x : numpy.ndarray + Composition grid. + y : numpy.ndarray + Property values. + + Returns + ------- + float + Estimated composition of the minimum. """ if len(x) < 3: return float(x[int(np.argmin(y))]) diff --git a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py index abf6111ff..142b802e0 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py @@ -1,4 +1,4 @@ -"""analyse ethanol-water density curves.""" +"""Analyse ethanol-water density curves.""" # TODO: remove hardcoded things? from __future__ import annotations @@ -42,7 +42,14 @@ @pytest.fixture(scope="session") def ref_curve() -> tuple[np.ndarray, np.ndarray]: - """Return reference density curve.""" + """ + Return the reference density curve on a sorted mole-fraction grid. + + Returns + ------- + tuple[numpy.ndarray, numpy.ndarray] + Sorted mole fractions and reference densities. + """ x_ref, rho_ref = read_ref_curve() x = np.asarray(x_ref, dtype=float) rho = np.asarray(rho_ref, dtype=float) @@ -54,7 +61,14 @@ def ref_curve() -> tuple[np.ndarray, np.ndarray]: @pytest.fixture def model_curves() -> dict[str, tuple[np.ndarray, np.ndarray]]: - """Return simulated density curves.""" + """ + Return simulated model density curves on sorted composition grids. + + Returns + ------- + dict[str, tuple[numpy.ndarray, numpy.ndarray]] + Mapping from model name to x-grid and density values. + """ curves: dict[str, tuple[np.ndarray, np.ndarray]] = {} for model_name in MODELS: xs, rhos = _read_model_curve(model_name) @@ -77,7 +91,21 @@ def model_curves() -> dict[str, tuple[np.ndarray, np.ndarray]]: # }, ) # TODO: read docs!!! doesn't seem to work yet. def densities_parity(ref_curve, model_curves) -> dict[str, list]: - """Parity plot of simulated and reference density.""" + """ + Build parity-plot payload for model and reference densities. + + Parameters + ---------- + ref_curve : tuple[numpy.ndarray, numpy.ndarray] + Reference composition and density arrays. + model_curves : dict[str, tuple[numpy.ndarray, numpy.ndarray]] + Per-model composition and density arrays. + + Returns + ------- + dict[str, list] + Reference and model densities sampled on a common grid. + """ x_ref, rho_ref = ref_curve # Use the first model's x grid for hover labels (parity requires same-length lists) @@ -107,7 +135,21 @@ def densities_parity(ref_curve, model_curves) -> dict[str, list]: def debug_curve_plots( ref_curve, model_curves ) -> None: # TODO should I remove or use a different format? - """Plot density curves.""" + """ + Generate optional debug plots for densities and excess properties. + + Parameters + ---------- + ref_curve : tuple[numpy.ndarray, numpy.ndarray] + Reference composition and density arrays. + model_curves : dict[str, tuple[numpy.ndarray, numpy.ndarray]] + Per-model composition and density arrays. + + Returns + ------- + None + Plots are written to disk when debug plotting is enabled. + """ if not _debug_plot_enabled(): return print("plotting curves") @@ -167,7 +209,21 @@ def debug_curve_plots( @pytest.fixture def rmse_density(ref_curve, model_curves) -> dict[str, float]: - """RMSE of the density vs reference density.""" + """ + Compute density RMSE versus interpolated reference values. + + Parameters + ---------- + ref_curve : tuple[numpy.ndarray, numpy.ndarray] + Reference composition and density arrays. + model_curves : dict[str, tuple[numpy.ndarray, numpy.ndarray]] + Per-model composition and density arrays. + + Returns + ------- + dict[str, float] + RMSE values in g/cm^3 keyed by model name. + """ x_ref, rho_ref = ref_curve out: dict[str, float] = {} for m, (x_m, rho_m) in model_curves.items(): @@ -178,7 +234,21 @@ def rmse_density(ref_curve, model_curves) -> dict[str, float]: @pytest.fixture def rmse_excess_density(ref_curve, model_curves) -> dict[str, float]: - """RMSE of excess density (detrended by each dataset's own pure endpoints).""" + """ + Compute RMSE of excess density curves. + + Parameters + ---------- + ref_curve : tuple[numpy.ndarray, numpy.ndarray] + Reference composition and density arrays. + model_curves : dict[str, tuple[numpy.ndarray, numpy.ndarray]] + Per-model composition and density arrays. + + Returns + ------- + dict[str, float] + Excess-density RMSE values keyed by model name. + """ x_ref, rho_ref = ref_curve out: dict[str, float] = {} @@ -196,10 +266,19 @@ def rmse_excess_density(ref_curve, model_curves) -> dict[str, float]: @pytest.fixture def peak_x_error(ref_curve, model_curves) -> dict[str, float]: """ - Absolute error in the x-position of the maximum excess density. - - Ref peak is computed on the dense reference curve. - Model peak is computed on its (coarse) grid with a local quadratic refinement. + Compute absolute error in composition of maximum excess density. + + Parameters + ---------- + ref_curve : tuple[numpy.ndarray, numpy.ndarray] + Reference composition and density arrays. + model_curves : dict[str, tuple[numpy.ndarray, numpy.ndarray]] + Per-model composition and density arrays. + + Returns + ------- + dict[str, float] + Absolute peak-position error keyed by model name. """ x_ref, rho_ref = ref_curve ex_ref_dense = _excess_curve(x_ref, rho_ref) @@ -242,7 +321,23 @@ def metrics( rmse_excess_density: dict[str, float], peak_x_error: dict[str, float], ) -> dict[str, dict]: - """Return metric data.""" + """ + Combine individual metrics into the table payload. + + Parameters + ---------- + rmse_density : dict[str, float] + Density RMSE values. + rmse_excess_density : dict[str, float] + Excess-density RMSE values. + peak_x_error : dict[str, float] + Peak-position errors. + + Returns + ------- + dict[str, dict] + Metric-name to per-model mapping. + """ return { "RMSE density": rmse_density, "RMSE excess density": rmse_excess_density, @@ -253,7 +348,23 @@ def metrics( def test_ethanol_water_density( metrics: dict[str, dict], densities_parity: dict[str, list], debug_curve_plots ) -> None: - """Launch analysis.""" + """ + Execute density analysis fixtures and emit debug output. + + Parameters + ---------- + metrics : dict[str, dict] + Metrics table payload. + densities_parity : dict[str, list] + Parity plot payload. + debug_curve_plots : None + Side-effect fixture for debug plotting. + + Returns + ------- + None + The test validates fixture execution and writes artifacts. + """ print( MODEL_INDEX ) # TODO: these print statements may be useful for debugging, but should I remove? diff --git a/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py b/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py index 5d469d6e1..b0e5351ec 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py @@ -1,4 +1,4 @@ -"""i/o tools for analysis of ethanol-water densities.""" +"""I/O tools for analysis of ethanol-water densities.""" from __future__ import annotations @@ -23,11 +23,29 @@ def _debug_plot_enabled() -> bool: + """ + Return whether debug plots are enabled via environment variable. + + Returns + ------- + bool + ``True`` when ``DEBUG_PLOTS`` is set to a truthy value. + """ # Turn on plots by: DEBUG_PLOTS=1 pytest ... return os.environ.get("DEBUG_PLOTS", "0") not in ("0", "", "false", "False") def _savefig(fig, outpath: Path) -> None: + """ + Save and close a Matplotlib figure. + + Parameters + ---------- + fig : matplotlib.figure.Figure + Figure object to save. + outpath : pathlib.Path + Output path. + """ outpath.parent.mkdir(parents=True, exist_ok=True) fig.tight_layout() fig.savefig(outpath, dpi=200) @@ -36,11 +54,17 @@ def _savefig(fig, outpath: Path) -> None: def _read_model_curve(model_name: str) -> tuple[list[float], list[float]]: """ - Read model density curve by computing averages from raw time series. + Read a model density curve by averaging per-case time series. + + Parameters + ---------- + model_name : str + Name of model output directory under calculation outputs. - Expects per-composition files: - x_ethanol_XX/density_timeseries.csv - with columns: step, rho_g_cm3 + Returns + ------- + tuple[list[float], list[float]] + Mole-fraction values and corresponding mean densities. """ model_dir = CALC_PATH / model_name xs: list[float] = [] @@ -86,17 +110,12 @@ def _read_model_curve(model_name: str) -> tuple[list[float], list[float]]: def read_ref_curve() -> tuple[list[float], list[float]]: """ - Load densities given on a uniform weight-fraction grid. - - And convert to mole fraction. - Densities from: - M. Southard and D. Green, Perry’s Chemical Engineers’ Handbook, - 9th Edition. McGraw-Hill Education, 2018. + Load the reference density curve and convert to mole fraction. - Assumes: - - 101 evenly spaced points - - first line = 0 wt% ethanol - - last line = 100 wt% + Returns + ------- + tuple[list[float], list[float]] + Mole-fraction x-values and reference densities in g/cm^3. """ ref_file = DATA_PATH / "densities_293.15.txt" rho_ref = np.loadtxt(ref_file) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/_compositions.py b/ml_peg/calcs/liquids/ethanol_water_density/_compositions.py index bac4af789..290bad22b 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/_compositions.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/_compositions.py @@ -12,7 +12,16 @@ @dataclass(frozen=True) class CompositionCase: - """Map composition to file.""" + """ + Map composition value to structure filename. + + Attributes + ---------- + x_ethanol : float + Ethanol mole fraction for the case. + filename : str + Structure filename associated with the composition. + """ x_ethanol: float filename: str @@ -20,9 +29,12 @@ class CompositionCase: def load_compositions() -> list[CompositionCase]: """ - Load composition grid. + Load composition grid from ``compositions.csv``. - Expected CSV columns: x_ethanol, filename + Returns + ------- + list[CompositionCase] + Parsed composition cases ordered as in the CSV file. """ comps_file = DATA_PATH / "compositions.csv" cases: list[CompositionCase] = [] diff --git a/ml_peg/calcs/liquids/ethanol_water_density/_fake_data.py b/ml_peg/calcs/liquids/ethanol_water_density/_fake_data.py index 0b28195e0..abc471ec5 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/_fake_data.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/_fake_data.py @@ -1,4 +1,4 @@ -"""for debugging, to verify that metrics actually do something reasonable.""" +"""Generate fake data for debugging density-curve metrics.""" from __future__ import annotations @@ -11,7 +11,32 @@ @dataclass(frozen=True) class FakeCurveParams: - """Class for fake curve parameters.""" + """ + Parameters controlling synthetic curve perturbations. + + Attributes + ---------- + severity : float + Global scaling of all enabled perturbation components. + bias : float + Additive offset magnitude. + scale : float + Multiplicative distortion magnitude. + tilt : float + Linear-in-composition additive distortion magnitude. + warp : float + Smooth nonlinear distortion magnitude. + noise_sigma : float + Standard deviation for additive white noise. + corr_len : float + Correlation length for correlated noise component. + bump_amp : float + Localized bump amplitude. + bump_center : float + Composition location for localized bump. + bump_width : float + Width of localized bump in composition units. + """ # Master knob: 0 -> perfect match, 1 -> very poor severity: float = 0.0 @@ -33,7 +58,23 @@ class FakeCurveParams: def _smooth_random_field( xs: np.ndarray, corr_len: float, rng: np.random.Generator ) -> np.ndarray: - """Create a zero-mean, ~unit-std smooth random field along xs.""" + """ + Create a smooth random field along the composition grid. + + Parameters + ---------- + xs : numpy.ndarray + Composition grid. + corr_len : float + Correlation length in composition units. + rng : numpy.random.Generator + Random number generator. + + Returns + ------- + numpy.ndarray + Approximately zero-mean, unit-variance correlated field. + """ if corr_len <= 0: return np.zeros_like(xs) @@ -55,10 +96,25 @@ def make_fake_curve_from_ref( clip: tuple[float | None, float | None] = (None, None), ) -> tuple[list[float], list[float]]: """ - Return (xs, ys_fake) using the same xs as the reference. - - `severity` scales *all* enabled components. For example, if bias=10 and - severity=0.2, you get ~2 units of bias (with a tiny randomization). + Build a synthetic curve from reference data and perturbation settings. + + Parameters + ---------- + xs_ref : list[float] + Reference x-grid. + ys_ref : list[float] + Reference y-values. + params : FakeCurveParams + Perturbation parameters controlling distortion type and magnitude. + seed : int | None, optional + Seed for deterministic random perturbations. + clip : tuple[float | None, float | None], optional + Optional ``(min, max)`` clipping bounds for generated y values. + + Returns + ------- + tuple[list[float], list[float]] + Synthetic x and y arrays. """ sev = float(np.clip(params.severity, 0.0, 1.0)) rng = np.random.default_rng(seed) @@ -73,6 +129,14 @@ def make_fake_curve_from_ref( # Small randomization so multiple models with same severity aren’t identical # (but still deterministic for a given seed). def jitter(): + """ + Return a small random multiplier around unity. + + Returns + ------- + float + Random scale factor in a narrow range near 1. + """ return 0.85 + 0.30 * rng.random() # 1) multiplicative scale error @@ -124,7 +188,21 @@ def make_fake_curve( kind: str | int, seed: int | None = 0, ) -> tuple[list[float], list[float]]: - """Make a fake density curve based on a reference.""" + """ + Generate a synthetic density curve with predefined quality level. + + Parameters + ---------- + kind : str | int + Quality label or index (perfect/good/medium/bad or 0-3). + seed : int | None, optional + Seed for deterministic randomness. + + Returns + ------- + tuple[list[float], list[float]] + Synthetic x and y arrays. + """ xs_ref, ys_ref = read_ref_curve() kind = kind.lower().strip() if isinstance(kind, str) else kind @@ -182,7 +260,29 @@ def make_fake_density_timeseries( tau: float = 0.15, # relaxation rate (bigger -> faster) noise_sigma: float = 0.001, # per-step noise ) -> list[float]: - """Make a time series of fake density values.""" + """ + Generate a fake density time series with relaxation dynamics. + + Parameters + ---------- + rho_eq : float + Equilibrium density target. + n_steps : int + Number of time steps to generate. + seed : int + Seed for deterministic random noise. + start_offset : float, optional + Magnitude of initial offset from equilibrium. + tau : float, optional + Relaxation factor per step. + noise_sigma : float, optional + Standard deviation of additive Gaussian noise per step. + + Returns + ------- + list[float] + Generated density values. + """ rng = np.random.default_rng(seed) rho0 = rho_eq + start_offset * (2 * rng.random() - 1) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py b/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py index 720aff283..93bb9835c 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py @@ -1,4 +1,4 @@ -"""i/o tools for calculations.""" +"""I/O tools for calculations.""" from __future__ import annotations @@ -15,20 +15,23 @@ def write_density_timeseries_checkpointed( do_not_raise: bool = False, ) -> None: """ - Write density_timeseries.csv with checkpoint validation. - - Behavior - -------- - If file exists: - - read existing values - - verify >= min_match_fraction already match rho_series - - overwrite anyway - - raise AssertionError if insufficient match - - If file does not exist: - - just write - - Helps detect broken resume logic while still allowing overwrite. + Write ``density_timeseries.csv`` with checkpoint validation. + + Parameters + ---------- + ts_path : pathlib.Path + Output CSV path. + rho_series : collections.abc.Iterable[float] + Density samples in g/cm^3. + min_match_fraction : float, optional + Required fraction of matching values versus existing checkpoint. + do_not_raise : bool, optional + If ``True``, skip assertion failures when checkpoint mismatches. + + Returns + ------- + None + This function writes the file in-place. """ rho_series = list(rho_series) @@ -76,14 +79,25 @@ class DensityTimeseriesLogger: """ Streaming CSV logger for density time series. - - deletes existing file on start (optional) - - writes header once - - append rows as simulation runs - - flushes every write (crash-safe) - - usable as context manager + Parameters + ---------- + path : pathlib.Path + Output CSV file path. + overwrite : bool, optional + Whether to delete pre-existing output when opening. """ def __init__(self, path: Path, *, overwrite: bool = True): + """ + Initialize the logger. + + Parameters + ---------- + path : pathlib.Path + Path to CSV output file. + overwrite : bool, optional + If ``True``, delete existing file when opening. + """ self.path = Path(path) self.overwrite = overwrite self._f = None @@ -94,7 +108,14 @@ def __init__(self, path: Path, *, overwrite: bool = True): # context manager API # --------------------- def __enter__(self): - """Open the file.""" + """ + Open the output file and return logger instance. + + Returns + ------- + DensityTimeseriesLogger + Logger ready to write rows. + """ if self.overwrite and self.path.exists(): self.path.unlink() @@ -107,7 +128,23 @@ def __enter__(self): return self def __exit__(self, exc_type, exc, tb): - """Close the file.""" + """ + Close the output file. + + Parameters + ---------- + exc_type : type | None + Exception type, if raised inside context. + exc : BaseException | None + Exception instance, if raised inside context. + tb : traceback | None + Traceback, if raised inside context. + + Returns + ------- + None + This method performs cleanup only. + """ if self._f: self._f.close() @@ -115,7 +152,19 @@ def __exit__(self, exc_type, exc, tb): # logging # --------------------- def write(self, rho: float): - """Write one density value.""" + """ + Write one density value to the CSV file. + + Parameters + ---------- + rho : float + Density in g/cm^3 for the current sample. + + Returns + ------- + None + This method appends one row to disk. + """ self._writer.writerow([self._step, f"{rho:.8f}"]) self._f.flush() # critical for crash safety self._step += 1 diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index d5a0e2e16..0b9728a08 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -1,4 +1,4 @@ -"""calculate ethanol-water density curves.""" +"""Calculate ethanol-water density curves.""" from __future__ import annotations @@ -36,6 +36,19 @@ def _case_id(composition) -> str: + """ + Build a readable test identifier for a composition case. + + Parameters + ---------- + composition : Any + Composition object with an ``x_ethanol`` attribute. + + Returns + ------- + str + Case identifier shown in pytest output. + """ # nicer test ids in `pytest -vv` return f"x={composition.x_ethanol:.2f}" @@ -45,7 +58,21 @@ def _case_id(composition) -> str: "composition", COMPOSITIONS, ids=[_case_id(c) for c in COMPOSITIONS] ) def test_water_ethanol_density_curve(mlip: tuple[str, Any], composition) -> None: - """Either run the md simulation or fake the data.""" + """ + Generate one density-curve case for a model and composition. + + Parameters + ---------- + mlip : tuple[str, Any] + Pair of model name and model object. + composition : Any + Composition case input. + + Returns + ------- + None + This test writes output files for a single case. + """ if not FAKE_DATA: water_ethanol_density_curve_one_case(mlip, composition) else: @@ -53,7 +80,21 @@ def test_water_ethanol_density_curve(mlip: tuple[str, Any], composition) -> None def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: - """Run an md simulation.""" + """ + Run one MD simulation case and write its density time series. + + Parameters + ---------- + mlip : tuple[str, Any] + Pair of model name and model object. + case : Any + Composition case containing ``x_ethanol`` and ``filename``. + + Returns + ------- + None + This function writes outputs for one composition. + """ model_name, model = mlip # TODO: dispersion ??? model_out = OUT_PATH / model_name @@ -77,7 +118,21 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: def water_ethanol_density_dummy_data_one_case(mlip: tuple[str, Any], case) -> None: - """Generate fake data for debugging instead of running the test.""" + """ + Generate one synthetic density time series for debugging. + + Parameters + ---------- + mlip : tuple[str, Any] + Pair of model name and model object. + case : Any + Composition case containing ``x_ethanol``. + + Returns + ------- + None + This function writes a fake density time series. + """ model_name, model = mlip model_out = OUT_PATH / model_name diff --git a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py index 1e62b19bc..8c968fe7e 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py @@ -1,4 +1,4 @@ -"""code for md simulation.""" +"""Code for molecular-dynamics simulation workflows.""" from __future__ import annotations @@ -25,13 +25,37 @@ def total_mass_kg(atoms): - """Return the mass in kg for ase atoms.""" + """ + Return atomic-system mass in kilograms. + + Parameters + ---------- + atoms : ase.Atoms + Atomic configuration. + + Returns + ------- + float + Total mass in kilograms. + """ amu_to_kg = 1.66053906660e-27 return atoms.get_masses().sum() * amu_to_kg def density_g_cm3(atoms): - """Return density in g/cm^3.""" + """ + Return density in g/cm^3. + + Parameters + ---------- + atoms : ase.Atoms + Atomic configuration with periodic cell volume. + + Returns + ------- + float + Density in g/cm^3. + """ v_a3 = atoms.get_volume() v_m3 = v_a3 * 1e-30 m_kg = total_mass_kg(atoms) @@ -40,7 +64,27 @@ def density_g_cm3(atoms): def attach_basic_logging(dyn, atoms, md_logfile, log_every, t0): - """Attach a logger to an ase md simulation.""" + """ + Attach text and progress loggers to an ASE dynamics object. + + Parameters + ---------- + dyn : ase.md.md.MolecularDynamics + Dynamics object to attach callbacks to. + atoms : ase.Atoms + Current atomic system. + md_logfile : str | pathlib.Path + Path to ASE MD log file. + log_every : int + Logging interval in MD steps. + t0 : float + Start timestamp from ``time.time()``. + + Returns + ------- + None + This function mutates ``dyn`` by attaching callbacks. + """ logger = MDLogger( dyn, atoms, @@ -53,6 +97,7 @@ def attach_basic_logging(dyn, atoms, md_logfile, log_every, t0): dyn.attach(logger, interval=log_every) def progress(): + """Print one progress line with thermodynamic state.""" step = dyn.get_number_of_steps() rho = density_g_cm3(atoms) volume = atoms.get_volume() @@ -72,7 +117,27 @@ def progress(): @contextmanager def traj_logging(dyn, atoms, workdir, traj_every: int, name="md.traj"): - """Context manager for logging trajectory.""" + """ + Attach trajectory logging inside a context manager. + + Parameters + ---------- + dyn : ase.md.md.MolecularDynamics + Dynamics object receiving callback. + atoms : ase.Atoms + Atomic system written to trajectory. + workdir : pathlib.Path + Output directory. + traj_every : int + Trajectory write interval in steps. + name : str, optional + Trajectory filename within ``workdir``. + + Yields + ------ + ase.io.trajectory.Trajectory | None + Open trajectory handle when enabled, otherwise ``None``. + """ traj = None if traj_every and traj_every > 0: traj = Trajectory(str(workdir / name), "a", atoms) @@ -103,9 +168,45 @@ def run_one_case( workdir: Path, ) -> Iterable[float]: """ - Run NPT and return (mean_density, std_density). + Run the full MD workflow for one composition case. + + Parameters + ---------- + struct_path : pathlib.Path + Input structure path. + calc : Any + ASE-compatible calculator. + temperature : float, optional + Target temperature in kelvin. + p_bar : float, optional + Target pressure in bar. + dt_fs : float, optional + Time step in femtoseconds. + nvt_stabilise_steps : int, optional + Initial NVT stabilization steps. + npt_settle_steps : int, optional + Berendsen NPT settling steps. + nvt_thermalise_steps : int, optional + NVT thermalization steps after settling. + npt_equil_steps : int, optional + BAOAB NPT equilibration steps. + npt_prod_steps : int, optional + Production NPT steps. + sample_every : int, optional + Sampling interval for density collection. + log_every : int, optional + Logging interval in MD steps. + log_trajectory_every : int, optional + Trajectory write interval in MD steps. + dummy_data : bool, optional + If ``True``, skip simulation and generate synthetic data. + workdir : pathlib.Path + Output directory for logs and trajectories. - TODO: use lammps? Though I would guess GPU is the bottleneck so it wouldn't matter? + Returns + ------- + collections.abc.Iterable[float] + Density time series in g/cm^3. """ ts_path = workdir / "density_timeseries.csv" if dummy_data: From 84e66b5b7d998dddad5d2e8a3c165360d64ebacc Mon Sep 17 00:00:00 2001 From: Arn Date: Wed, 11 Feb 2026 14:12:20 +0000 Subject: [PATCH 08/29] simplify logic --- .../calc_ethanol_water_density.py | 7 +-- .../liquids/ethanol_water_density/md_code.py | 52 ++++--------------- 2 files changed, 13 insertions(+), 46 deletions(-) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index d5a0e2e16..12fdda7b5 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -111,11 +111,8 @@ def water_ethanol_density_dummy_data_one_case(mlip: tuple[str, Any], case) -> No rho = run_one_case( "data/mix_xe_0.00.extxyz", calc, - nvt_stabilise_steps=200, - npt_settle_steps=1000, - nvt_thermalise_steps=250, - npt_equil_steps=1000, - npt_prod_steps=1000, + nvt_steps=1000, + npt_steps=3000, log_every=50, workdir=Path("debug"), ) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py index 1e62b19bc..6c3158306 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py @@ -11,7 +11,6 @@ from ase.io import Trajectory, read, write from ase.md import Langevin, MDLogger from ase.md.langevinbaoab import LangevinBAOAB -from ase.md.nptberendsen import NPTBerendsen from ase.md.velocitydistribution import ( MaxwellBoltzmannDistribution, Stationary, @@ -91,11 +90,8 @@ def run_one_case( temperature: float = 298.15, p_bar: float = 1.0, dt_fs: float = 0.5, - nvt_stabilise_steps: int = 4_000, - npt_settle_steps=7_500, - nvt_thermalise_steps: int = 1_000, - npt_equil_steps: int = 10_000, - npt_prod_steps: int = 25_000, + nvt_steps: int = 10_000, + npt_steps: int = 50_000, sample_every: int = 20, log_every: int = 200, log_trajectory_every: int = 400, @@ -110,7 +106,7 @@ def run_one_case( ts_path = workdir / "density_timeseries.csv" if dummy_data: rho_series = np.random.normal( - loc=0.9, scale=0.05, size=npt_prod_steps // sample_every + loc=0.9, scale=0.05, size=npt_steps // sample_every ) with DensityTimeseriesLogger(ts_path) as density_log: for rho in rho_series: @@ -132,55 +128,29 @@ def run_one_case( dt = dt_fs * fs t0 = time.time() - - # the used pre-relax is not good enough - # do some Langevin NVT steps before starting NPT - dyn = Langevin(atoms, timestep=dt, temperature_K=temperature, friction=0.02) - attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) - with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): - dyn.run(nvt_stabilise_steps) - - # quick Berendsen settle close to target density ps = 1000 * fs - dyn = NPTBerendsen( - atoms, - timestep=dt, - temperature_K=temperature, - pressure_au=p_bar * bar, - taut=0.07 * ps, - taup=0.4 * ps, - compressibility=4.5e-5, - ) - attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) - with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): - dyn.run(npt_settle_steps) + T_tau = 0.5 * ps - # thermalise - MaxwellBoltzmannDistribution(atoms, temperature_K=temperature) - Stationary(atoms) - ZeroRotation(atoms) - dyn = Langevin(atoms, timestep=dt, temperature_K=temperature, friction=0.03) + dyn = Langevin(atoms, timestep=dt, temperature_K=temperature, friction=1 / (T_tau)) attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): - dyn.run(nvt_thermalise_steps) - + dyn.run(nvt_steps) # real NPT - dyn = LangevinBAOAB( # MTK + dyn = LangevinBAOAB( # use MTK? atoms, timestep=dt, temperature_K=temperature, externalstress=p_bar * bar, - T_tau=0.1 * ps, - P_tau=1 * ps, + T_tau=T_tau, + P_tau=0.5 + * ps, # same timeconstants for baro/thermostat is fine for stochastic ones hydrostatic=True, rng=0, ) attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): - dyn.run(npt_equil_steps) - rhos = [] - n_samples = npt_prod_steps // sample_every + n_samples = npt_steps // sample_every with DensityTimeseriesLogger(ts_path) as density_log: for _ in range(n_samples): dyn.run(sample_every) From f99a1feeccaa3f9191c8c22d5e45caf968c045ca Mon Sep 17 00:00:00 2001 From: Arn Date: Wed, 11 Feb 2026 16:05:02 +0000 Subject: [PATCH 09/29] fix dispersion --- .../ethanol_water_density/calc_ethanol_water_density.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index 0263b019a..e0a50d8bd 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -95,12 +95,13 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: None This function writes outputs for one composition. """ - model_name, model = mlip # TODO: dispersion ??? + model_name, model = mlip model_out = OUT_PATH / model_name model_out.mkdir(parents=True, exist_ok=True) calc = model.get_calculator() + calc = model.add_d3_calculator(calc) struct_path = DATA_PATH / case.filename if not struct_path.exists(): @@ -162,7 +163,7 @@ def water_ethanol_density_dummy_data_one_case(mlip: tuple[str, Any], case) -> No # run a very small simulation to see if it does something reasonable from mace.calculators import mace_mp - calc = mace_mp("data_old/mace-omat-0-small.model") + calc = mace_mp("data_old/mace-omat-0-small.model", dispersion=True) rho = run_one_case( "data/mix_xe_0.00.extxyz", calc, From 5ca1e4c2ddbf39cb121f94b630f27842efc49fd2 Mon Sep 17 00:00:00 2001 From: Arn Date: Wed, 11 Feb 2026 16:50:43 +0000 Subject: [PATCH 10/29] longer run --- .../liquids/ethanol_water_density/md_code.py | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py index 2991c7ab5..37e0656bc 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py @@ -156,7 +156,7 @@ def run_one_case( p_bar: float = 1.0, dt_fs: float = 0.5, nvt_steps: int = 10_000, - npt_steps: int = 50_000, + npt_steps: int = 200_000, sample_every: int = 20, log_every: int = 200, log_trajectory_every: int = 400, @@ -178,16 +178,10 @@ def run_one_case( Target pressure in bar. dt_fs : float, optional Time step in femtoseconds. - nvt_stabilise_steps : int, optional - Initial NVT stabilization steps. - npt_settle_steps : int, optional - Berendsen NPT settling steps. - nvt_thermalise_steps : int, optional - NVT thermalization steps after settling. - npt_equil_steps : int, optional - BAOAB NPT equilibration steps. - npt_prod_steps : int, optional - Production NPT steps. + nvt_steps : int, optional + Initial NVT stabilisation and mixing steps. + npt_steps : int, optional + NPT steps. sample_every : int, optional Sampling interval for density collection. log_every : int, optional @@ -230,9 +224,14 @@ def run_one_case( dt = dt_fs * fs t0 = time.time() ps = 1000 * fs - T_tau = 0.5 * ps + thermostat_tau = 0.5 * ps - dyn = Langevin(atoms, timestep=dt, temperature_K=temperature, friction=1 / (T_tau)) + dyn = Langevin( + atoms, + timestep=dt, + temperature_K=temperature, + friction=1 / thermostat_tau, + ) attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): dyn.run(nvt_steps) @@ -242,7 +241,7 @@ def run_one_case( timestep=dt, temperature_K=temperature, externalstress=p_bar * bar, - T_tau=T_tau, + T_tau=thermostat_tau, P_tau=0.5 * ps, # same timeconstants for baro/thermostat is fine for stochastic ones hydrostatic=True, From fd4d8129d8bf228263f4ff3a6c2881e9e14b06ef Mon Sep 17 00:00:00 2001 From: Arn Date: Tue, 17 Feb 2026 21:24:15 +0000 Subject: [PATCH 11/29] shorten dispersion for testing --- .../calc_ethanol_water_density.py | 51 ++++++++++++++++++- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index e0a50d8bd..631c15ca3 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -35,6 +35,48 @@ COMPOSITIONS = load_compositions() +def add_shorter_d3_calculator(model, calcs): + """ + Add D3 dispersion to calculator(s). + + Parameters + ---------- + model + Model to add the dispersion. + calcs + Calculator, or list of calculators, to add D3 dispersion to via a + SumCalculator. + + Returns + ------- + SumCalculator | Calculator + Calculator(s) with D3 dispersion added, or the original calculator when + the model is already trained with D3 corrections. + """ + if model.trained_on_d3: + return calcs + from ase import units + from ase.calculators.mixing import SumCalculator + import torch + from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator + + if not isinstance(calcs, list): + calcs = [calcs] + + d3_calc = TorchDFTD3Calculator( + device=model.d3_kwargs.get("device", "cpu"), + damping=model.d3_kwargs.get("damping", "bj"), + xc=model.d3_kwargs.get("xc", "pbe"), + dtype=getattr(torch, model.d3_kwargs.get("dtype", "float32")), + cutoff=model.d3_kwargs.get( + "cutoff", 25.0 * units.Bohr + ), # shortened to make run more manageable. + ) + calcs.append(d3_calc) + + return SumCalculator(calcs) + + def _case_id(composition) -> str: """ Build a readable test identifier for a composition case. @@ -101,7 +143,7 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: model_out.mkdir(parents=True, exist_ok=True) calc = model.get_calculator() - calc = model.add_d3_calculator(calc) + calc = add_shorter_d3_calculator(model, calc) # TODO: don't forget to change back struct_path = DATA_PATH / case.filename if not struct_path.exists(): @@ -161,9 +203,14 @@ def water_ethanol_density_dummy_data_one_case(mlip: tuple[str, Any], case) -> No if __name__ == "__main__": # TODO: delete this # run a very small simulation to see if it does something reasonable + from ase import units from mace.calculators import mace_mp - calc = mace_mp("data_old/mace-omat-0-small.model", dispersion=True) + calc = mace_mp( + "data_old/mace-omat-0-small.model", + dispersion=True, + dispersion_cutoff=25 * units.Bohr, + ) rho = run_one_case( "data/mix_xe_0.00.extxyz", calc, From a2bb0ade7ee39d9d4b2461e25539a3618403a531 Mon Sep 17 00:00:00 2001 From: Arn Date: Wed, 18 Feb 2026 14:48:16 +0000 Subject: [PATCH 12/29] checkpoint loading --- .../ethanol_water_density/_io_tools.py | 38 ++++-- .../calc_ethanol_water_density.py | 9 +- .../liquids/ethanol_water_density/md_code.py | 129 ++++++++++++++---- 3 files changed, 134 insertions(+), 42 deletions(-) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py b/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py index 93bb9835c..a974ead78 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py @@ -104,9 +104,6 @@ def __init__(self, path: Path, *, overwrite: bool = True): self._writer = None self._step = 0 - # --------------------- - # context manager API - # --------------------- def __enter__(self): """ Open the output file and return logger instance. @@ -117,13 +114,35 @@ def __enter__(self): Logger ready to write rows. """ if self.overwrite and self.path.exists(): + mode = "w" self.path.unlink() - - self._f = self.path.open("w", newline="") + elif not self.path.exists(): + mode = "w" + else: + mode = "a" + # If appending, recover last step index + try: + with self.path.open("r", newline="") as f: + last_step = -1 + for row in csv.reader(f): + if not row or row[0] == "step": + continue + try: + last_step = int(row[0]) + except ValueError: + continue + self._step = last_step + 1 + except Exception: + # If file is corrupted or empty, just continue from 0 + self._step = 0 + + self._f = self.path.open(mode, newline="") self._writer = csv.writer(self._f) - self._writer.writerow(["step", "rho_g_cm3"]) - self._f.flush() + # Only write header if creating new file + if mode == "w": + self._writer.writerow(["step", "rho_g_cm3"]) + self._f.flush() return self @@ -148,9 +167,6 @@ def __exit__(self, exc_type, exc, tb): if self._f: self._f.close() - # --------------------- - # logging - # --------------------- def write(self, rho: float): """ Write one density value to the CSV file. @@ -166,5 +182,5 @@ def write(self, rho: float): This method appends one row to disk. """ self._writer.writerow([self._step, f"{rho:.8f}"]) - self._f.flush() # critical for crash safety + self._f.flush() self._step += 1 diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index 631c15ca3..8598b0db1 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -30,6 +30,7 @@ MODELS = load_models(current_models) MODEL_INDEX = {name: i for i, name in enumerate(MODELS)} FAKE_DATA = os.getenv("FAKE_DENSITY_DATA", "") == "1" +CONTINUE_RUNNING = os.getenv("CONTINUE_RUNNING", "") == "1" # IMPORTANT: create the list once for parametrization COMPOSITIONS = load_compositions() @@ -143,6 +144,7 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: model_out.mkdir(parents=True, exist_ok=True) calc = model.get_calculator() + # calc = model.add_d3_calculator(calc) calc = add_shorter_d3_calculator(model, calc) # TODO: don't forget to change back struct_path = DATA_PATH / case.filename @@ -154,7 +156,9 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: case_dir = model_out / f"x_ethanol_{case.x_ethanol:.2f}" case_dir.mkdir(parents=True, exist_ok=True) - rho_series = run_one_case(struct_path, calc, workdir=case_dir) + rho_series = run_one_case( + struct_path, calc, workdir=case_dir, continue_running=CONTINUE_RUNNING + ) ts_path = case_dir / "density_timeseries.csv" write_density_timeseries_checkpointed(ts_path, rho_series) @@ -215,8 +219,9 @@ def water_ethanol_density_dummy_data_one_case(mlip: tuple[str, Any], case) -> No "data/mix_xe_0.00.extxyz", calc, nvt_steps=1000, - npt_steps=3000, + npt_steps=4000, log_every=50, workdir=Path("debug"), + continue_running=True, ) print(rho) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py index 37e0656bc..ed74cc928 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py @@ -161,6 +161,7 @@ def run_one_case( log_every: int = 200, log_trajectory_every: int = 400, dummy_data=False, + continue_running=False, workdir: Path, ) -> Iterable[float]: """ @@ -190,6 +191,8 @@ def run_one_case( Trajectory write interval in MD steps. dummy_data : bool, optional If ``True``, skip simulation and generate synthetic data. + continue_running : bool, optional + If ``True``, continue running from the previous trajectory. workdir : pathlib.Path Output directory for logs and trajectories. @@ -199,6 +202,8 @@ def run_one_case( Density time series in g/cm^3. """ ts_path = workdir / "density_timeseries.csv" + traj_path = workdir / "md.traj" + if dummy_data: rho_series = np.random.normal( loc=0.9, scale=0.05, size=npt_steps // sample_every @@ -207,52 +212,107 @@ def run_one_case( for rho in rho_series: density_log.write(rho) return rho_series - atoms = read(struct_path) - atoms.set_pbc(True) - atoms.wrap() - atoms.calc = calc - - # fast pre-relax - opt = FIRE(atoms, logfile=str(workdir / "opt.log")) - opt.run(fmax=0.15) - # velocities - MaxwellBoltzmannDistribution(atoms, temperature_K=temperature) - Stationary(atoms) - ZeroRotation(atoms) + workdir.mkdir(parents=True, exist_ok=True) dt = dt_fs * fs t0 = time.time() ps = 1000 * fs thermostat_tau = 0.5 * ps - dyn = Langevin( - atoms, - timestep=dt, - temperature_K=temperature, - friction=1 / thermostat_tau, - ) - attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) - with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): - dyn.run(nvt_steps) - # real NPT - dyn = LangevinBAOAB( # use MTK? + target_samples = npt_steps // sample_every + + if continue_running: + # 1) Load last saved state (positions + cell + momenta if stored) + if traj_path.exists(): + with Trajectory(str(traj_path), "r") as tr: + if len(tr) == 0: + raise RuntimeError(f"{traj_path} exists but contains no frames.") + atoms = tr[-1] + else: + raise RuntimeError(f"No traj path at {traj_path}. Cannot continue running") + + # 2) Count how many samples are already present + if ts_path.exists(): + # assume one rho per non-empty line; ignore a header if present + n_lines = 0 + with open(ts_path, encoding="utf-8") as f: + for line in f: + s = line.strip() + if not s: + continue + # skip header-ish lines + if any(c.isalpha() for c in s): + continue + n_lines += 1 + already_samples = n_lines + else: + raise RuntimeError(f"no ts_path at {ts_path}. Cannot continue running") + + # If we've already finished, just return what we have + if already_samples >= target_samples: + # load and return existing rhos + rhos_existing = [] + if ts_path.exists(): + with open(ts_path, encoding="utf-8") as f: + for line in f: + s = line.strip() + if not s or any(c.isalpha() for c in s): + continue + # tolerate csv with extra columns + rhos_existing.append(float(s.split(",")[0])) + return np.array(rhos_existing) + atoms.calc = calc + else: + already_samples = 0 + atoms = read(struct_path) + atoms.set_pbc(True) + atoms.wrap() + atoms.calc = calc + + # fast pre-relax + opt = FIRE(atoms, logfile=str(workdir / "opt.log")) + opt.run(fmax=0.15) + + # velocities + MaxwellBoltzmannDistribution(atoms, temperature_K=temperature) + Stationary(atoms) + ZeroRotation(atoms) + + # NVT + dyn = Langevin( + atoms, + timestep=dt, + temperature_K=temperature, + friction=1 / thermostat_tau, + ) + attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) + with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): + dyn.run(nvt_steps) + + # NPT + dyn = LangevinBAOAB( atoms, timestep=dt, temperature_K=temperature, externalstress=p_bar * bar, T_tau=thermostat_tau, - P_tau=0.5 - * ps, # same timeconstants for baro/thermostat is fine for stochastic ones + P_tau=0.5 * ps, hydrostatic=True, rng=0, ) + dyn.nsteps = already_samples * sample_every # seems public enough to me + attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) + + remaining_samples = target_samples - already_samples + rhos = [] + with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): - rhos = [] - n_samples = npt_steps // sample_every - with DensityTimeseriesLogger(ts_path) as density_log: - for _ in range(n_samples): + with DensityTimeseriesLogger( + ts_path, overwrite=not continue_running + ) as density_log: + for _ in range(remaining_samples): dyn.run(sample_every) rho = density_g_cm3(atoms) rhos.append(rho) @@ -261,4 +321,15 @@ def run_one_case( # save final structure for debugging/repro write(workdir / "final.extxyz", atoms) + # If resuming, return the whole series (existing + new) for convenience + if continue_running and ts_path.exists(): + rhos_all = [] + with open(ts_path, encoding="utf-8") as f: + for line in f: + s = line.strip() + if not s or any(c.isalpha() for c in s): + continue + rhos_all.append(float(s.split(",")[0])) + return np.array(rhos_all) + return np.array(rhos) From 7797ed327b5eccd700bb03966dc2fbd90c7fbb50 Mon Sep 17 00:00:00 2001 From: Arn Date: Fri, 27 Feb 2026 10:57:11 +0000 Subject: [PATCH 13/29] error and density plots --- .../ethanol_water_density/_analysis.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py b/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py index 5f292e345..28ccc5830 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py @@ -186,6 +186,27 @@ def _excess_curve(x: np.ndarray, y: np.ndarray) -> np.ndarray: return y - _linear_baseline(x, y0, y1) +def _excess_volume(x: np.ndarray, rhos: np.ndarray) -> np.ndarray: + """ + Compute excess volume given molar fraction and density respectively. + + Parameters + ---------- + x : numpy.ndarray + Composition grid (mol fraction). + rhos : numpy.ndarray + Density. + + Returns + ------- + numpy.ndarray + Excess values ``y - y_linear``. + """ + return (x * M_ETOH + (1 - x) * M_WATER) / rhos - ( + x * M_ETOH / rhos[-1] + (1 - x) * M_WATER / rhos[0] + ) + + def _peak_x_quadratic(x: np.ndarray, y: np.ndarray) -> float: """ Estimate x position of the minimum by local quadratic fitting. From 2dc18c1e69061f238a71a9b4cc4c60759faafda0 Mon Sep 17 00:00:00 2001 From: Arn Date: Fri, 27 Feb 2026 14:17:06 +0000 Subject: [PATCH 14/29] unify settings --- .../calc_ethanol_water_density.py | 7 +++-- .../liquids/ethanol_water_density/md_code.py | 27 +++++++++---------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index 8598b0db1..3f862972a 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -144,8 +144,7 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: model_out.mkdir(parents=True, exist_ok=True) calc = model.get_calculator() - # calc = model.add_d3_calculator(calc) - calc = add_shorter_d3_calculator(model, calc) # TODO: don't forget to change back + calc = model.add_d3_calculator(calc) struct_path = DATA_PATH / case.filename if not struct_path.exists(): @@ -219,9 +218,9 @@ def water_ethanol_density_dummy_data_one_case(mlip: tuple[str, Any], case) -> No "data/mix_xe_0.00.extxyz", calc, nvt_steps=1000, - npt_steps=4000, + npt_steps=1000, log_every=50, workdir=Path("debug"), - continue_running=True, + continue_running=False, ) print(rho) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py index ed74cc928..18215d58b 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py @@ -10,7 +10,7 @@ from ase.io import Trajectory, read, write from ase.md import Langevin, MDLogger -from ase.md.langevinbaoab import LangevinBAOAB +from ase.md.nose_hoover_chain import IsotropicMTKNPT from ase.md.velocitydistribution import ( MaxwellBoltzmannDistribution, Stationary, @@ -154,12 +154,12 @@ def run_one_case( *, temperature: float = 298.15, p_bar: float = 1.0, - dt_fs: float = 0.5, - nvt_steps: int = 10_000, - npt_steps: int = 200_000, + dt_fs: float = 1.0, + nvt_steps: int = 50_000, + npt_steps: int = 1000_000, sample_every: int = 20, - log_every: int = 200, - log_trajectory_every: int = 400, + log_every: int = 250, + log_trajectory_every: int = 500, dummy_data=False, continue_running=False, workdir: Path, @@ -218,7 +218,8 @@ def run_one_case( dt = dt_fs * fs t0 = time.time() ps = 1000 * fs - thermostat_tau = 0.5 * ps + thermostat_tau = 50 * ps + barostat_tau = 100 * ps target_samples = npt_steps // sample_every @@ -284,22 +285,20 @@ def run_one_case( atoms, timestep=dt, temperature_K=temperature, - friction=1 / thermostat_tau, + friction=1 / (500 * fs), ) attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): dyn.run(nvt_steps) # NPT - dyn = LangevinBAOAB( + dyn = IsotropicMTKNPT( atoms, timestep=dt, temperature_K=temperature, - externalstress=p_bar * bar, - T_tau=thermostat_tau, - P_tau=0.5 * ps, - hydrostatic=True, - rng=0, + pressure_au=p_bar * bar, # TODO check units !!! + tdamp=thermostat_tau, + pdamp=barostat_tau, ) dyn.nsteps = already_samples * sample_every # seems public enough to me From 0ba0236434f965deff95b4713a91df266959b6d6 Mon Sep 17 00:00:00 2001 From: Arn Date: Fri, 27 Feb 2026 15:03:52 +0000 Subject: [PATCH 15/29] excess density to excess volume --- .../ethanol_water_density/_analysis.py | 111 ----------------- .../analyse_ethanol_water_density.py | 113 +++--------------- .../liquids/ethanol_water_density/metrics.yml | 21 ++-- 3 files changed, 25 insertions(+), 220 deletions(-) diff --git a/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py b/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py index 28ccc5830..4728ef217 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py @@ -13,49 +13,6 @@ RHO_ETH_PURE = 0.7893 # g/cm^3 -def x_to_phi_ethanol( - x, - rho_mix, - *, - m_eth=M_ETOH, - m_water=M_WATER, - rho_eth=RHO_ETH_PURE, - rho_water=RHO_WATER_PURE, -): # TODO: double check formula - """ - Convert ethanol mole fraction to ethanol volume fraction. - - Parameters - ---------- - x : array-like - Ethanol mole fraction. - rho_mix : array-like - Mixture density in g/cm^3 at each composition. - m_eth : float, optional - Ethanol molar mass in g/mol. - m_water : float, optional - Water molar mass in g/mol. - rho_eth : float, optional - Pure ethanol density in g/cm^3. - rho_water : float, optional - Pure water density in g/cm^3. - - Returns - ------- - numpy.ndarray - Ethanol volume fraction for each input composition. - """ - x = np.asarray(x, dtype=float) - rho_mix = np.asarray(rho_mix, dtype=float) - - m_eth = x * m_eth - m_wat = (1.0 - x) * m_water - - v_mix = (m_eth + m_wat) / rho_mix # cm^3 per "1 mol mixture basis" - v_eth = m_eth / rho_eth # cm^3 (proxy) - return v_eth / v_mix - - def weight_to_mole_fraction(w): r""" Convert ethanol weight fraction to mole fraction. @@ -118,74 +75,6 @@ def _interp_1d(x_src: np.ndarray, y_src: np.ndarray, x_tgt: np.ndarray) -> np.nd return np.interp(x_tgt, x_src, y_src) -def _endpoints_at_0_1( - x: np.ndarray, y: np.ndarray, tol: float = 1e-8 -) -> tuple[float, float]: - """ - Return y values at x=0 and x=1. - - Parameters - ---------- - x : numpy.ndarray - Composition grid. - y : numpy.ndarray - Property values. - tol : float, optional - Absolute tolerance used to identify endpoint compositions. - - Returns - ------- - tuple[float, float] - Pair ``(y0, y1)`` for x=0 and x=1. - """ - i0 = np.where(np.isclose(x, 0.0, atol=tol))[0] - i1 = np.where(np.isclose(x, 1.0, atol=tol))[0] - if len(i0) != 1 or len(i1) != 1: - raise ValueError("Curve must include x=0 and x=1 to define linear baseline.") - return float(y[i0[0]]), float(y[i1[0]]) - - -def _linear_baseline(x: np.ndarray, y0: float, y1: float) -> np.ndarray: - """ - Build the straight line connecting values at x=0 and x=1. - - Parameters - ---------- - x : numpy.ndarray - Composition grid. - y0 : float - Value at x=0. - y1 : float - Value at x=1. - - Returns - ------- - numpy.ndarray - Linear baseline evaluated at ``x``. - """ - return y0 + x * (y1 - y0) - - -def _excess_curve(x: np.ndarray, y: np.ndarray) -> np.ndarray: - """ - Compute excess curve relative to endpoint linear interpolation. - - Parameters - ---------- - x : numpy.ndarray - Composition grid. - y : numpy.ndarray - Property values. - - Returns - ------- - numpy.ndarray - Excess values ``y - y_linear``. - """ - y0, y1 = _endpoints_at_0_1(x, y) - return y - _linear_baseline(x, y0, y1) - - def _excess_volume(x: np.ndarray, rhos: np.ndarray) -> np.ndarray: """ Compute excess volume given molar fraction and density respectively. diff --git a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py index 142b802e0..d4ebada93 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py @@ -5,22 +5,18 @@ from pathlib import Path -import matplotlib.pyplot as plt import numpy as np import pytest from ml_peg.analysis.liquids.ethanol_water_density._analysis import ( - _excess_curve, + _excess_volume, _interp_1d, _peak_x_quadratic, _rmse, - x_to_phi_ethanol, ) from ml_peg.analysis.liquids.ethanol_water_density.io_tools import ( OUT_PATH, - _debug_plot_enabled, _read_model_curve, - _savefig, read_ref_curve, ) from ml_peg.analysis.utils.decorators import build_table, plot_parity @@ -131,82 +127,6 @@ def densities_parity(ref_curve, model_curves) -> dict[str, list]: return results -@pytest.fixture -def debug_curve_plots( - ref_curve, model_curves -) -> None: # TODO should I remove or use a different format? - """ - Generate optional debug plots for densities and excess properties. - - Parameters - ---------- - ref_curve : tuple[numpy.ndarray, numpy.ndarray] - Reference composition and density arrays. - model_curves : dict[str, tuple[numpy.ndarray, numpy.ndarray]] - Per-model composition and density arrays. - - Returns - ------- - None - Plots are written to disk when debug plotting is enabled. - """ - if not _debug_plot_enabled(): - return - print("plotting curves") - - x_ref, rho_ref = ref_curve - - for m, (x_m, rho_m) in model_curves.items(): - rho_ref_m = _interp_1d(x_ref, rho_ref, x_m) - - fig, ax = plt.subplots() - ax.plot(x_ref, rho_ref, label="ref (dense)") - ax.plot(x_m, rho_m, marker="o", label=f"{m} (model)") - ax.plot(x_m, rho_ref_m, marker="x", label="ref on model grid") - ax.set_title(f"Density curve: {m}") - ax.set_xlabel("x_ethanol") - ax.set_ylabel("rho / g cm$^{-3}$") - ax.legend() - - print("saving a curve at:", OUT_PATH / "debug" / m / "density_curve.svg") - - # excess density - _savefig(fig, OUT_PATH / "debug" / m / "density_curve.svg") - rho_ref_m = _interp_1d(x_ref, rho_ref, x_m) - - fig, ax = plt.subplots() - ax.plot(x_ref, _excess_curve(x_ref, rho_ref), label="ref (dense)") - ax.plot(x_m, _excess_curve(x_m, rho_m), marker="o", label=f"{m} (model)") - ax.plot( - x_m, _excess_curve(x_m, rho_ref_m), marker="x", label="ref on model grid" - ) - ax.set_title(f"Density curve: {m}") - ax.set_xlabel("x_ethanol") - ax.set_ylabel("rho / g cm$^{-3}$") - ax.legend() - - print("saving a curve at:", OUT_PATH / "debug" / m / "excess_density_curve.svg") - _savefig(fig, OUT_PATH / "debug" / m / "excess_density_curve.svg") - - # volume fraction plot - phi_ref = x_to_phi_ethanol(x_ref, rho_ref) - phi_m = x_to_phi_ethanol(x_m, rho_m) - - fig, ax = plt.subplots() - ax.plot(phi_ref, rho_ref, label="ref (dense)") - ax.plot(phi_m, rho_m, marker="o", label=f"{m} (model)") - ax.plot(phi_m, rho_ref_m, marker="x", label="ref on model grid") - - ax.set_title(f"Density curve (volume fraction): {m}") - ax.set_xlabel(r"$\phi_\mathrm{ethanol}$") - ax.set_ylabel("rho / g cm$^{-3}$") - ax.legend() - - out_phi = OUT_PATH / "debug" / m / "density_curve_phi.svg" - print("saving a curve at:", out_phi) - _savefig(fig, out_phi) - - @pytest.fixture def rmse_density(ref_curve, model_curves) -> dict[str, float]: """ @@ -233,9 +153,9 @@ def rmse_density(ref_curve, model_curves) -> dict[str, float]: @pytest.fixture -def rmse_excess_density(ref_curve, model_curves) -> dict[str, float]: +def rmse_excess_volume(ref_curve, model_curves) -> dict[str, float]: """ - Compute RMSE of excess density curves. + Compute RMSE of excess volume curves. Parameters ---------- @@ -255,8 +175,8 @@ def rmse_excess_density(ref_curve, model_curves) -> dict[str, float]: for m, (x_m, rho_m) in model_curves.items(): rho_ref_m = _interp_1d(x_ref, rho_ref, x_m) - ex_ref = _excess_curve(x_m, rho_ref_m) - ex_m = _excess_curve(x_m, rho_m) + ex_ref = _excess_volume(x_m, rho_ref_m) + ex_m = _excess_volume(x_m, rho_m) out[m] = _rmse(ex_m, ex_ref) @@ -266,7 +186,7 @@ def rmse_excess_density(ref_curve, model_curves) -> dict[str, float]: @pytest.fixture def peak_x_error(ref_curve, model_curves) -> dict[str, float]: """ - Compute absolute error in composition of maximum excess density. + Compute absolute error in composition of minimum excess volume. Parameters ---------- @@ -281,13 +201,13 @@ def peak_x_error(ref_curve, model_curves) -> dict[str, float]: Absolute peak-position error keyed by model name. """ x_ref, rho_ref = ref_curve - ex_ref_dense = _excess_curve(x_ref, rho_ref) + ex_ref_dense = _excess_volume(x_ref, rho_ref) x_peak_ref = _peak_x_quadratic(x_ref, ex_ref_dense) print("ref peak at:", x_peak_ref) out: dict[str, float] = {} for m, (x_m, rho_m) in model_curves.items(): - ex_m = _excess_curve(x_m, rho_m) + ex_m = _excess_volume(x_m, rho_m) x_peak_m = _peak_x_quadratic(x_m, ex_m) out[m] = float(abs(x_peak_m - x_peak_ref)) @@ -307,9 +227,8 @@ def peak_x_error(ref_curve, model_curves) -> dict[str, float]: "Model": "Name of the model", "RMSE density": "RMSE between model and reference density" "at model compositions (g cm⁻³).", - "RMSE excess density": ( - "RMSE after subtracting each curve’s linear baseline" - "between pure endpoints (g cm⁻³)." + "RMSE excess volume": ( + "RMSE of the excess volumebetween pure endpoints (cm³ mol^-1)." ), "Peak x error": ( "Absolute difference in mole-fraction location of maximum excess density." @@ -318,7 +237,7 @@ def peak_x_error(ref_curve, model_curves) -> dict[str, float]: ) def metrics( rmse_density: dict[str, float], - rmse_excess_density: dict[str, float], + rmse_excess_volume: dict[str, float], peak_x_error: dict[str, float], ) -> dict[str, dict]: """ @@ -328,8 +247,8 @@ def metrics( ---------- rmse_density : dict[str, float] Density RMSE values. - rmse_excess_density : dict[str, float] - Excess-density RMSE values. + rmse_excess_volume : dict[str, float] + Excess-volume RMSE values. peak_x_error : dict[str, float] Peak-position errors. @@ -340,13 +259,13 @@ def metrics( """ return { "RMSE density": rmse_density, - "RMSE excess density": rmse_excess_density, + "RMSE excess volume": rmse_excess_volume, "Peak x error": peak_x_error, } def test_ethanol_water_density( - metrics: dict[str, dict], densities_parity: dict[str, list], debug_curve_plots + metrics: dict[str, dict], densities_parity: dict[str, list] ) -> None: """ Execute density analysis fixtures and emit debug output. @@ -357,8 +276,6 @@ def test_ethanol_water_density( Metrics table payload. densities_parity : dict[str, list] Parity plot payload. - debug_curve_plots : None - Side-effect fixture for debug plotting. Returns ------- diff --git a/ml_peg/analysis/liquids/ethanol_water_density/metrics.yml b/ml_peg/analysis/liquids/ethanol_water_density/metrics.yml index 6da5b4f89..16b83352d 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/metrics.yml +++ b/ml_peg/analysis/liquids/ethanol_water_density/metrics.yml @@ -1,23 +1,22 @@ -# TODO: the 'bad' defaults are quite arbitrary so pick decent ones; should I log rescale? -# TODO: add more metrics? e.g. radial distribution (or diffusion). +# the 'bad' defaults are approximately 3 times the mp0 result metrics: RMSE density: good: 0.0003 # approximate error of the reference on the coarse, sampled grid - bad: 0.02 + bad: 0.3 unit: g cm^-3 tooltip: "RMSE between model and reference densities at the sampled compositions" level_of_theory: experiment - RMSE excess density: - good: 0.0003 # approximate error of the reference on the coarse, sampled grid - bad: 0.01 - unit: g cm^-3 - tooltip: "RMSE of excess (non-ideal) density after subtracting linear mixing baseline" + RMSE excess volume: + good: 0.02 # approximate error of the reference on the coarse, sampled grid + bad: 1.5 + unit: cm^3 mol^-1 + tooltip: "RMSE of excess (non-ideal) volume" level_of_theory: experiment Peak x error: - good: 0.005 # approximate error of the reference on the coarse, sampled grid - bad: 0.20 + good: 0.026 # approximate error of the reference on the coarse, sampled grid + bad: 0.3 unit: mole fraction - tooltip: "Absolute error in the mole-fraction location of maximum excess density" + tooltip: "Absolute error in the mole-fraction location of minimal excess volume" level_of_theory: experiment From 89a8cb0a9802cf3624c572be90fe31d711416bc6 Mon Sep 17 00:00:00 2001 From: Arn Date: Fri, 27 Feb 2026 22:56:39 +0000 Subject: [PATCH 16/29] fix app --- .../analyse_ethanol_water_density.py | 24 +++++++++++++++---- .../app_ethanol_water_density.py | 11 ++++----- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py index d4ebada93..e4f960be1 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py @@ -76,16 +76,31 @@ def model_curves() -> dict[str, tuple[np.ndarray, np.ndarray]]: return curves +def labels() -> list: + """ + Get list of calculated concentrations. + + Returns + ------- + list + List of all calculated concentrations. + """ + for model_name in MODELS: + labels_list, _ = _read_model_curve(model_name) + break + return labels_list + + @pytest.fixture @plot_parity( filename=OUT_PATH / "density_parity.json", title="Ethanol–water density (293.15 K)", x_label="Reference density / g cm⁻³", y_label="Predicted density / g cm⁻³", - # hoverdata={ - # "x_ethanol": [], # filled in fixture - # }, -) # TODO: read docs!!! doesn't seem to work yet. + hoverdata={ + "Labels": labels(), + }, +) def densities_parity(ref_curve, model_curves) -> dict[str, list]: """ Build parity-plot payload for model and reference densities. @@ -291,4 +306,5 @@ def test_ethanol_water_density( for key0, value0 in metrics.items() } ) + print(densities_parity) return diff --git a/ml_peg/app/liquids/ethanol_water_density/app_ethanol_water_density.py b/ml_peg/app/liquids/ethanol_water_density/app_ethanol_water_density.py index cb4e3c400..0531e645b 100644 --- a/ml_peg/app/liquids/ethanol_water_density/app_ethanol_water_density.py +++ b/ml_peg/app/liquids/ethanol_water_density/app_ethanol_water_density.py @@ -19,7 +19,7 @@ CATEGORY = "liquids" BENCHMARK_NAME = "ethanol_water_density" -DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/" +DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/liquids.html#ethanol-water-density-curves" DATA_PATH = APP_ROOT / "data" / CATEGORY / BENCHMARK_NAME @@ -33,14 +33,11 @@ def register_callbacks(self) -> None: DATA_PATH / "density_parity.json", id=f"{BENCHMARK_NAME}-figure" ) - # When the user clicks a metric column in the table, show the parity plot. plot_from_table_column( table_id=self.table_id, plot_id=f"{BENCHMARK_NAME}-figure-placeholder", column_to_plot={ - "RMSE density": parity, - "RMSE excess density": parity, - "Peak x error": parity, + "density": parity, }, ) @@ -58,8 +55,8 @@ def get_app() -> EthanolWaterDecompositionCurvesApp: name=BENCHMARK_NAME, description=( "Ethanol–water mixture density at 293.15 K. Metrics include density RMSE, " - "excess-density RMSE (baseline-subtracted), and error in the mole-fraction " - "location of the maximum excess density." + "excess-volume RMSE, and error in the mole-fraction" + "location of the maximum excess volume." ), docs_url=DOCS_URL, table_path=DATA_PATH / "density_metrics_table.json", From fb98e6de4c06b062c9c1b624b66778b281498413 Mon Sep 17 00:00:00 2001 From: Arn Date: Fri, 27 Feb 2026 22:57:09 +0000 Subject: [PATCH 17/29] mark test very slow --- .../liquids/ethanol_water_density/calc_ethanol_water_density.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index 3f862972a..3a4a229ab 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -96,6 +96,7 @@ def _case_id(composition) -> str: return f"x={composition.x_ethanol:.2f}" +@pytest.mark.very_slow @pytest.mark.parametrize("mlip", MODELS.items(), ids=list(MODELS.keys())) @pytest.mark.parametrize( "composition", COMPOSITIONS, ids=[_case_id(c) for c in COMPOSITIONS] From 504e68f934678f171b835b2146e350259faf6340 Mon Sep 17 00:00:00 2001 From: Arn Date: Fri, 27 Feb 2026 23:23:31 +0000 Subject: [PATCH 18/29] add docs --- docs/source/user_guide/benchmarks/liquids.rst | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 docs/source/user_guide/benchmarks/liquids.rst diff --git a/docs/source/user_guide/benchmarks/liquids.rst b/docs/source/user_guide/benchmarks/liquids.rst new file mode 100644 index 000000000..5e444d216 --- /dev/null +++ b/docs/source/user_guide/benchmarks/liquids.rst @@ -0,0 +1,33 @@ +================= +Liquids +================= + +Water ethanol density curves +======= + +Summary +------- + +Benchmark of the density of water-ethanol mixtures using NPT MD for different concentrations of ethanol, compare to experiment. + + +Metrics +------- + +1. rms of the density difference. +2. rms of the excess volume difference. +3. Location of the minimal excess volume. + + +Data availability +----------------- +Input structures: +Packmol generated + +Reference densities: +From: M. Southard and D. Green, Perry’s Chemical Engineers’ Handbook, 9th Edition. McGraw-Hill Education, 2018. + +Computational cost +------------------ + +Very high - 1 ns of NPT MD on about 120 water/ethanol molecules, 7 times per model. From a493f59f4e46edb8c091c2860f0a732da8fedff0 Mon Sep 17 00:00:00 2001 From: Arn Date: Fri, 27 Feb 2026 23:46:41 +0000 Subject: [PATCH 19/29] only use half of npt samples --- ml_peg/analysis/liquids/ethanol_water_density/io_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py b/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py index b0e5351ec..116febdb8 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py @@ -88,7 +88,7 @@ def _read_model_curve(model_name: str) -> tuple[list[float], list[float]]: if not rho_vals: raise ValueError(f"No density samples found in {ts_path}") - rho_mean = float(np.mean(rho_vals)) + rho_mean = float(np.mean(rho_vals[len(rho_vals) // 2 :])) xs.append(x_ethanol) rhos.append(rho_mean) From c2137e75b467c2a09f9fe17c4e9ed1f1e6fd0ad4 Mon Sep 17 00:00:00 2001 From: Arn Date: Wed, 18 Mar 2026 17:53:10 +0000 Subject: [PATCH 20/29] simplify --- .../ethanol_water_density/_fake_data.py | 297 --------------- .../calc_ethanol_water_density.py | 142 +------ .../liquids/ethanol_water_density/md_code.py | 350 +++++------------- 3 files changed, 103 insertions(+), 686 deletions(-) delete mode 100644 ml_peg/calcs/liquids/ethanol_water_density/_fake_data.py diff --git a/ml_peg/calcs/liquids/ethanol_water_density/_fake_data.py b/ml_peg/calcs/liquids/ethanol_water_density/_fake_data.py deleted file mode 100644 index abc471ec5..000000000 --- a/ml_peg/calcs/liquids/ethanol_water_density/_fake_data.py +++ /dev/null @@ -1,297 +0,0 @@ -"""Generate fake data for debugging density-curve metrics.""" - -from __future__ import annotations - -from dataclasses import dataclass - -import numpy as np - -from ml_peg.analysis.liquids.ethanol_water_density.io_tools import read_ref_curve - - -@dataclass(frozen=True) -class FakeCurveParams: - """ - Parameters controlling synthetic curve perturbations. - - Attributes - ---------- - severity : float - Global scaling of all enabled perturbation components. - bias : float - Additive offset magnitude. - scale : float - Multiplicative distortion magnitude. - tilt : float - Linear-in-composition additive distortion magnitude. - warp : float - Smooth nonlinear distortion magnitude. - noise_sigma : float - Standard deviation for additive white noise. - corr_len : float - Correlation length for correlated noise component. - bump_amp : float - Localized bump amplitude. - bump_center : float - Composition location for localized bump. - bump_width : float - Width of localized bump in composition units. - """ - - # Master knob: 0 -> perfect match, 1 -> very poor - severity: float = 0.0 - - # Individual error components (interpreted as "max at severity=1") - bias: float = 0.0 # additive offset in y-units - scale: float = 0.0 # multiplicative: y *= (1 + scale*...) - tilt: float = 0.0 # linear-in-x additive distortion - warp: float = 0.0 # smooth nonlinear additive distortion - - noise_sigma: float = 0.0 # iid gaussian noise in y-units - corr_len: float = 0.0 # if >0, adds correlated noise along x - - bump_amp: float = 0.0 # amplitude of local bump(s) - bump_center: float = 0.5 # x location of bump - bump_width: float = 0.08 # bump width (in x units) - - -def _smooth_random_field( - xs: np.ndarray, corr_len: float, rng: np.random.Generator -) -> np.ndarray: - """ - Create a smooth random field along the composition grid. - - Parameters - ---------- - xs : numpy.ndarray - Composition grid. - corr_len : float - Correlation length in composition units. - rng : numpy.random.Generator - Random number generator. - - Returns - ------- - numpy.ndarray - Approximately zero-mean, unit-variance correlated field. - """ - if corr_len <= 0: - return np.zeros_like(xs) - - dx = xs[:, None] - xs[None, :] - k = np.exp(-0.5 * (dx / corr_len) ** 2) - # sample correlated normal: K^(1/2) z via cholesky (add jitter for stability) - z = rng.standard_normal(len(xs)) - field = np.linalg.cholesky(k + 1e-12 * np.eye(len(xs))) @ z - field = field - field.mean() - return field / (field.std() + 1e-12) - - -def make_fake_curve_from_ref( - xs_ref: list[float], - ys_ref: list[float], - *, - params: FakeCurveParams, - seed: int | None = 0, - clip: tuple[float | None, float | None] = (None, None), -) -> tuple[list[float], list[float]]: - """ - Build a synthetic curve from reference data and perturbation settings. - - Parameters - ---------- - xs_ref : list[float] - Reference x-grid. - ys_ref : list[float] - Reference y-values. - params : FakeCurveParams - Perturbation parameters controlling distortion type and magnitude. - seed : int | None, optional - Seed for deterministic random perturbations. - clip : tuple[float | None, float | None], optional - Optional ``(min, max)`` clipping bounds for generated y values. - - Returns - ------- - tuple[list[float], list[float]] - Synthetic x and y arrays. - """ - sev = float(np.clip(params.severity, 0.0, 1.0)) - rng = np.random.default_rng(seed) - - xs = np.asarray(xs_ref, dtype=float) - y = np.asarray(ys_ref, dtype=float).copy() - - # Normalize x into [-1, 1] for stable “tilt/warp” magnitudes - x01 = (xs - xs.min()) / (xs.max() - xs.min() + 1e-12) - xpm = 2.0 * x01 - 1.0 - - # Small randomization so multiple models with same severity aren’t identical - # (but still deterministic for a given seed). - def jitter(): - """ - Return a small random multiplier around unity. - - Returns - ------- - float - Random scale factor in a narrow range near 1. - """ - return 0.85 + 0.30 * rng.random() - - # 1) multiplicative scale error - if params.scale != 0.0 and sev > 0: - y *= 1.0 + (params.scale * sev * jitter()) - - # 2) additive bias - if params.bias != 0.0 and sev > 0: - y += params.bias * sev * jitter() - - # 3) linear tilt (additive) - if params.tilt != 0.0 and sev > 0: - y += (params.tilt * sev * jitter()) * xpm - - # 4) smooth nonlinear warp (additive): use a low-order smooth basis - if params.warp != 0.0 and sev > 0: - # cubic-ish shape distortion with zero mean - w = xpm**3 - xpm * np.mean(xpm**2) - w = w - w.mean() - w = w / (np.std(w) + 1e-12) - y += (params.warp * sev * jitter()) * w - - # 5) local bump to simulate specific composition failure - if params.bump_amp != 0.0 and sev > 0: - bump = np.exp( - -0.5 * ((xs - params.bump_center) / (params.bump_width + 1e-12)) ** 2 - ) - bump = bump / (bump.max() + 1e-12) - y += (params.bump_amp * sev * jitter()) * bump - - # 6) noise: iid + optional correlated component - if params.noise_sigma != 0.0 and sev > 0: - y += rng.normal(0.0, params.noise_sigma * sev, size=len(xs)) - - if params.corr_len > 0.0 and params.noise_sigma != 0.0 and sev > 0: - field = _smooth_random_field(xs, params.corr_len, rng) - y += field * (params.noise_sigma * sev * 0.8) - - lo, hi = clip - if lo is not None: - y = np.maximum(y, lo) - if hi is not None: - y = np.minimum(y, hi) - - return xs, y - - -def make_fake_curve( - kind: str | int, - seed: int | None = 0, -) -> tuple[list[float], list[float]]: - """ - Generate a synthetic density curve with predefined quality level. - - Parameters - ---------- - kind : str | int - Quality label or index (perfect/good/medium/bad or 0-3). - seed : int | None, optional - Seed for deterministic randomness. - - Returns - ------- - tuple[list[float], list[float]] - Synthetic x and y arrays. - """ - xs_ref, ys_ref = read_ref_curve() - - kind = kind.lower().strip() if isinstance(kind, str) else kind - if kind == "perfect" or kind == 0: - params = FakeCurveParams(severity=0.0) - elif kind == "good" or kind == 1: - params = FakeCurveParams( - severity=0.15, - bias=0.0, - scale=0.01, - tilt=0.003, - warp=0.002, - noise_sigma=0.001, - corr_len=0.12, - bump_amp=0.0, - ) - elif kind in {"medium", "ok"} or kind == 2: - params = FakeCurveParams( - severity=0.45, - bias=0.0, - scale=0.03, - tilt=0.01, - warp=0.01, - noise_sigma=0.004, - corr_len=0.15, - bump_amp=0.01, - bump_center=0.6, - bump_width=0.10, - ) - elif kind == "bad" or kind == 3: - params = FakeCurveParams( - severity=0.85, - bias=0.02, - scale=0.06, - tilt=0.03, - warp=0.04, - noise_sigma=0.01, - corr_len=0.18, - bump_amp=0.05, - bump_center=0.4, - bump_width=0.08, - ) - else: - raise ValueError(f"Unknown kind={kind!r} (use perfect/good/medium/bad)") - - return make_fake_curve_from_ref(xs_ref, ys_ref, params=params, seed=seed) - - -def make_fake_density_timeseries( - rho_eq: float, - n_steps: int, - *, - seed: int, - start_offset: float = 0.02, # initial deviation from eq - tau: float = 0.15, # relaxation rate (bigger -> faster) - noise_sigma: float = 0.001, # per-step noise -) -> list[float]: - """ - Generate a fake density time series with relaxation dynamics. - - Parameters - ---------- - rho_eq : float - Equilibrium density target. - n_steps : int - Number of time steps to generate. - seed : int - Seed for deterministic random noise. - start_offset : float, optional - Magnitude of initial offset from equilibrium. - tau : float, optional - Relaxation factor per step. - noise_sigma : float, optional - Standard deviation of additive Gaussian noise per step. - - Returns - ------- - list[float] - Generated density values. - """ - rng = np.random.default_rng(seed) - rho0 = rho_eq + start_offset * (2 * rng.random() - 1) - - series = [] - rho = rho0 - for _ in range(n_steps): - # exponential-ish relaxation to rho_eq - rho += tau * (rho_eq - rho) - # add noise - rho += rng.normal(0.0, noise_sigma) - series.append(float(rho)) - return series diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index 3a4a229ab..e75265e5e 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -2,11 +2,10 @@ from __future__ import annotations +import logging import os -from pathlib import Path from typing import Any -import numpy as np import pytest from ml_peg.calcs.liquids.ethanol_water_density._compositions import ( @@ -14,13 +13,6 @@ DATA_PATH, load_compositions, ) -from ml_peg.calcs.liquids.ethanol_water_density._fake_data import ( - make_fake_curve, - make_fake_density_timeseries, -) -from ml_peg.calcs.liquids.ethanol_water_density._io_tools import ( - write_density_timeseries_checkpointed, -) from ml_peg.calcs.liquids.ethanol_water_density.md_code import run_one_case from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models @@ -36,72 +28,9 @@ COMPOSITIONS = load_compositions() -def add_shorter_d3_calculator(model, calcs): - """ - Add D3 dispersion to calculator(s). - - Parameters - ---------- - model - Model to add the dispersion. - calcs - Calculator, or list of calculators, to add D3 dispersion to via a - SumCalculator. - - Returns - ------- - SumCalculator | Calculator - Calculator(s) with D3 dispersion added, or the original calculator when - the model is already trained with D3 corrections. - """ - if model.trained_on_d3: - return calcs - from ase import units - from ase.calculators.mixing import SumCalculator - import torch - from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator - - if not isinstance(calcs, list): - calcs = [calcs] - - d3_calc = TorchDFTD3Calculator( - device=model.d3_kwargs.get("device", "cpu"), - damping=model.d3_kwargs.get("damping", "bj"), - xc=model.d3_kwargs.get("xc", "pbe"), - dtype=getattr(torch, model.d3_kwargs.get("dtype", "float32")), - cutoff=model.d3_kwargs.get( - "cutoff", 25.0 * units.Bohr - ), # shortened to make run more manageable. - ) - calcs.append(d3_calc) - - return SumCalculator(calcs) - - -def _case_id(composition) -> str: - """ - Build a readable test identifier for a composition case. - - Parameters - ---------- - composition : Any - Composition object with an ``x_ethanol`` attribute. - - Returns - ------- - str - Case identifier shown in pytest output. - """ - # nicer test ids in `pytest -vv` - return f"x={composition.x_ethanol:.2f}" - - @pytest.mark.very_slow @pytest.mark.parametrize("mlip", MODELS.items(), ids=list(MODELS.keys())) -@pytest.mark.parametrize( - "composition", COMPOSITIONS, ids=[_case_id(c) for c in COMPOSITIONS] -) -def test_water_ethanol_density_curve(mlip: tuple[str, Any], composition) -> None: +def test_water_ethanol_density_curves(mlip: tuple[str, Any], composition) -> None: """ Generate one density-curve case for a model and composition. @@ -117,10 +46,8 @@ def test_water_ethanol_density_curve(mlip: tuple[str, Any], composition) -> None None This test writes output files for a single case. """ - if not FAKE_DATA: - water_ethanol_density_curve_one_case(mlip, composition) - else: - water_ethanol_density_dummy_data_one_case(mlip, composition) + for case in COMPOSITIONS: + water_ethanol_density_curve_one_case(mlip, case) def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: @@ -147,6 +74,8 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: calc = model.get_calculator() calc = model.add_d3_calculator(calc) + # TODO: the data downloading thing here + struct_path = DATA_PATH / case.filename if not struct_path.exists(): raise FileNotFoundError( @@ -156,53 +85,14 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: case_dir = model_out / f"x_ethanol_{case.x_ethanol:.2f}" case_dir.mkdir(parents=True, exist_ok=True) - rho_series = run_one_case( - struct_path, calc, workdir=case_dir, continue_running=CONTINUE_RUNNING + logging.basicConfig( + format="%(message)s", + level=logging.INFO, + filename=case_dir / f"{model_name}.log", + filemode="a", + force=True, ) - - ts_path = case_dir / "density_timeseries.csv" - write_density_timeseries_checkpointed(ts_path, rho_series) - - -def water_ethanol_density_dummy_data_one_case(mlip: tuple[str, Any], case) -> None: - """ - Generate one synthetic density time series for debugging. - - Parameters - ---------- - mlip : tuple[str, Any] - Pair of model name and model object. - case : Any - Composition case containing ``x_ethanol``. - - Returns - ------- - None - This function writes a fake density time series. - """ - model_name, model = mlip - - model_out = OUT_PATH / model_name - model_out.mkdir(parents=True, exist_ok=True) - - model_kind = MODEL_INDEX[model_name] % 4 - xs_curve, ys_curve = make_fake_curve(model_kind, seed=MODEL_INDEX[model_name]) - xs_curve = np.asarray(xs_curve, dtype=float) - ys_curve = np.asarray(ys_curve, dtype=float) - - case_dir = model_out / f"x_ethanol_{case.x_ethanol:.2f}" - case_dir.mkdir(parents=True, exist_ok=True) - - rho_eq = float(np.interp(case.x_ethanol, xs_curve, ys_curve)) - n_steps = 200 # fixed for dummy data - - seed = (hash(model_name) ^ hash(round(case.x_ethanol, 4))) & 0xFFFFFFFF - rho_series = make_fake_density_timeseries( - rho_eq, n_steps, seed=seed, start_offset=0.01, tau=0.10, noise_sigma=0.0005 - ) - - ts_path = case_dir / "density_timeseries.csv" - write_density_timeseries_checkpointed(ts_path, rho_series, do_not_raise=True) + run_one_case(struct_path, calc, case_dir / f"{model_name}.traj") if __name__ == "__main__": # TODO: delete this @@ -218,10 +108,6 @@ def water_ethanol_density_dummy_data_one_case(mlip: tuple[str, Any], case) -> No rho = run_one_case( "data/mix_xe_0.00.extxyz", calc, - nvt_steps=1000, - npt_steps=1000, - log_every=50, - workdir=Path("debug"), - continue_running=False, + output_fname="debug/", ) print(rho) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py index 18215d58b..1cdb8b446 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py @@ -2,46 +2,32 @@ from __future__ import annotations -from collections.abc import Iterable -from contextlib import contextmanager +import logging +import os from pathlib import Path import time from typing import Any -from ase.io import Trajectory, read, write -from ase.md import Langevin, MDLogger +from ase import units +from ase.io import Trajectory, read +from ase.md import Langevin from ase.md.nose_hoover_chain import IsotropicMTKNPT from ase.md.velocitydistribution import ( MaxwellBoltzmannDistribution, Stationary, ZeroRotation, ) -from ase.optimize import FIRE -from ase.units import bar, fs -import numpy as np -from ml_peg.calcs.liquids.ethanol_water_density._io_tools import DensityTimeseriesLogger +NUM_NPT_STEPS = 1000_000 +NUM_NVT_STEPS = 50_000 +TIMESTEP = 1 * units.fs +LOG_INTERVAL = 100 +ATM = 1.01325 * units.bar +TEMPERATURE = 298.15 * units.K +LANGEVIN_FRICTION = 1 / (500 * units.fs) -def total_mass_kg(atoms): - """ - Return atomic-system mass in kilograms. - - Parameters - ---------- - atoms : ase.Atoms - Atomic configuration. - - Returns - ------- - float - Total mass in kilograms. - """ - amu_to_kg = 1.66053906660e-27 - return atoms.get_masses().sum() * amu_to_kg - - -def density_g_cm3(atoms): +def get_density_g_cm3(atoms): """ Return density in g/cm^3. @@ -55,280 +41,122 @@ def density_g_cm3(atoms): float Density in g/cm^3. """ + amu_to_kg = 1.66053906660e-27 v_a3 = atoms.get_volume() v_m3 = v_a3 * 1e-30 - m_kg = total_mass_kg(atoms) + m_kg = atoms.get_masses().sum() * amu_to_kg rho_kg_m3 = m_kg / v_m3 return rho_kg_m3 / 1000.0 -def attach_basic_logging(dyn, atoms, md_logfile, log_every, t0): +def log_md(dyn, start_time): """ - Attach text and progress loggers to an ASE dynamics object. + Log molecular dynamics simulation. Parameters ---------- - dyn : ase.md.md.MolecularDynamics - Dynamics object to attach callbacks to. - atoms : ase.Atoms - Current atomic system. - md_logfile : str | pathlib.Path - Path to ASE MD log file. - log_every : int - Logging interval in MD steps. - t0 : float - Start timestamp from ``time.time()``. - - Returns - ------- - None - This function mutates ``dyn`` by attaching callbacks. + dyn + ASE molecular dynamics object. + start_time + Real time of the simulation start, in seconds. """ - logger = MDLogger( - dyn, - atoms, - md_logfile, - header=True, - stress=False, - peratom=False, - mode="a", + current_time = time.time() - start_time + energy = dyn.atoms.get_potential_energy() + density = get_density_g_cm3(dyn.atoms) + temperature = dyn.atoms.get_temperature() + t = dyn.get_time() / (1000 * units.fs) + logging.info( + f"""t: {t:>8.3f} ps\ + Walltime: {current_time:>10.3f} s\ + T: {temperature:.1f} K\ + Epot: {energy:.2f} eV\ + density: {density:.3f} g/cm^3\ + """ ) - dyn.attach(logger, interval=log_every) - - def progress(): - """Print one progress line with thermodynamic state.""" - step = dyn.get_number_of_steps() - rho = density_g_cm3(atoms) - volume = atoms.get_volume() - temperature = atoms.get_temperature() - elapsed = time.time() - t0 - - print( - f"[step {step:>8}] " - f"T={temperature:7.2f} K | " - f"V={volume:10.2f} A^3 | " - f"rho={rho:7.4f} g/cm^3 | " - f"elapsed={elapsed:6.1f}s" - ) - - dyn.attach(progress, interval=log_every) -@contextmanager -def traj_logging(dyn, atoms, workdir, traj_every: int, name="md.traj"): +def run_npt(atoms, calc, output_fname): """ - Attach trajectory logging inside a context manager. + Run NPT molecular dynamics using the isotropic MTK barostat. Parameters ---------- - dyn : ase.md.md.MolecularDynamics - Dynamics object receiving callback. - atoms : ase.Atoms - Atomic system written to trajectory. - workdir : pathlib.Path - Output directory. - traj_every : int - Trajectory write interval in steps. - name : str, optional - Trajectory filename within ``workdir``. - - Yields - ------ - ase.io.trajectory.Trajectory | None - Open trajectory handle when enabled, otherwise ``None``. + atoms + ASE Atoms of the system. + calc + ASE Calculator. + output_fname + File name to save the trajectory to. """ - traj = None - if traj_every and traj_every > 0: - traj = Trajectory(str(workdir / name), "a", atoms) - dyn.attach(traj.write, interval=traj_every) - try: - yield traj - finally: - if traj is not None: - traj.close() + if os.path.exists(output_fname): + try: + traj = Trajectory(output_fname) + atoms = traj[-1] + nsteps = (len(traj) - 1) * LOG_INTERVAL + except Exception as e: + print(e) + nsteps = 0 + else: + nsteps = 0 + + atoms.calc = calc + + dyn = IsotropicMTKNPT( + atoms=atoms, + timestep=TIMESTEP, + temperature_K=TEMPERATURE, + pressure_au=ATM, + tdamp=50 * units.fs, + pdamp=500 * units.fs, + trajectory=output_fname, + loginterval=LOG_INTERVAL, + append_trajectory=True, + ) + dyn.nsteps = nsteps + dyn.attach(log_md, interval=LOG_INTERVAL, dyn=dyn, start_time=time.time()) + dyn.run(steps=NUM_NPT_STEPS) def run_one_case( - struct_path: Path, + struct_path: str | Path, calc: Any, - *, - temperature: float = 298.15, - p_bar: float = 1.0, - dt_fs: float = 1.0, - nvt_steps: int = 50_000, - npt_steps: int = 1000_000, - sample_every: int = 20, - log_every: int = 250, - log_trajectory_every: int = 500, - dummy_data=False, - continue_running=False, - workdir: Path, -) -> Iterable[float]: + output_fname: str | Path, +): """ Run the full MD workflow for one composition case. Parameters ---------- - struct_path : pathlib.Path + struct_path : str | pathlib.Path Input structure path. calc : Any ASE-compatible calculator. - temperature : float, optional - Target temperature in kelvin. - p_bar : float, optional - Target pressure in bar. - dt_fs : float, optional - Time step in femtoseconds. - nvt_steps : int, optional - Initial NVT stabilisation and mixing steps. - npt_steps : int, optional - NPT steps. - sample_every : int, optional - Sampling interval for density collection. - log_every : int, optional - Logging interval in MD steps. - log_trajectory_every : int, optional - Trajectory write interval in MD steps. - dummy_data : bool, optional - If ``True``, skip simulation and generate synthetic data. - continue_running : bool, optional - If ``True``, continue running from the previous trajectory. - workdir : pathlib.Path - Output directory for logs and trajectories. + output_fname : str | pathlib.Path + The output file path. Returns ------- collections.abc.Iterable[float] Density time series in g/cm^3. """ - ts_path = workdir / "density_timeseries.csv" - traj_path = workdir / "md.traj" - - if dummy_data: - rho_series = np.random.normal( - loc=0.9, scale=0.05, size=npt_steps // sample_every - ) - with DensityTimeseriesLogger(ts_path) as density_log: - for rho in rho_series: - density_log.write(rho) - return rho_series - - workdir.mkdir(parents=True, exist_ok=True) - - dt = dt_fs * fs - t0 = time.time() - ps = 1000 * fs - thermostat_tau = 50 * ps - barostat_tau = 100 * ps - - target_samples = npt_steps // sample_every - - if continue_running: - # 1) Load last saved state (positions + cell + momenta if stored) - if traj_path.exists(): - with Trajectory(str(traj_path), "r") as tr: - if len(tr) == 0: - raise RuntimeError(f"{traj_path} exists but contains no frames.") - atoms = tr[-1] - else: - raise RuntimeError(f"No traj path at {traj_path}. Cannot continue running") - - # 2) Count how many samples are already present - if ts_path.exists(): - # assume one rho per non-empty line; ignore a header if present - n_lines = 0 - with open(ts_path, encoding="utf-8") as f: - for line in f: - s = line.strip() - if not s: - continue - # skip header-ish lines - if any(c.isalpha() for c in s): - continue - n_lines += 1 - already_samples = n_lines - else: - raise RuntimeError(f"no ts_path at {ts_path}. Cannot continue running") - - # If we've already finished, just return what we have - if already_samples >= target_samples: - # load and return existing rhos - rhos_existing = [] - if ts_path.exists(): - with open(ts_path, encoding="utf-8") as f: - for line in f: - s = line.strip() - if not s or any(c.isalpha() for c in s): - continue - # tolerate csv with extra columns - rhos_existing.append(float(s.split(",")[0])) - return np.array(rhos_existing) - atoms.calc = calc - else: - already_samples = 0 - atoms = read(struct_path) - atoms.set_pbc(True) - atoms.wrap() - atoms.calc = calc - - # fast pre-relax - opt = FIRE(atoms, logfile=str(workdir / "opt.log")) - opt.run(fmax=0.15) - - # velocities - MaxwellBoltzmannDistribution(atoms, temperature_K=temperature) - Stationary(atoms) - ZeroRotation(atoms) - + atoms = read(struct_path) + atoms.set_pbc(True) + atoms.wrap() + atoms.calc = calc + + # velocities + MaxwellBoltzmannDistribution(atoms, temperature_K=TEMPERATURE) + Stationary(atoms) + ZeroRotation(atoms) + if os.path.exists(output_fname): # NVT dyn = Langevin( atoms, - timestep=dt, - temperature_K=temperature, - friction=1 / (500 * fs), + timestep=TIMESTEP, + temperature_K=TEMPERATURE, + friction=LANGEVIN_FRICTION, ) - attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) - with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): - dyn.run(nvt_steps) + dyn.run(NUM_NVT_STEPS) # NPT - dyn = IsotropicMTKNPT( - atoms, - timestep=dt, - temperature_K=temperature, - pressure_au=p_bar * bar, # TODO check units !!! - tdamp=thermostat_tau, - pdamp=barostat_tau, - ) - dyn.nsteps = already_samples * sample_every # seems public enough to me - - attach_basic_logging(dyn, atoms, str(workdir / "md.log"), log_every, t0) - - remaining_samples = target_samples - already_samples - rhos = [] - - with traj_logging(dyn, atoms, workdir, traj_every=log_trajectory_every): - with DensityTimeseriesLogger( - ts_path, overwrite=not continue_running - ) as density_log: - for _ in range(remaining_samples): - dyn.run(sample_every) - rho = density_g_cm3(atoms) - rhos.append(rho) - density_log.write(rho) - - # save final structure for debugging/repro - write(workdir / "final.extxyz", atoms) - - # If resuming, return the whole series (existing + new) for convenience - if continue_running and ts_path.exists(): - rhos_all = [] - with open(ts_path, encoding="utf-8") as f: - for line in f: - s = line.strip() - if not s or any(c.isalpha() for c in s): - continue - rhos_all.append(float(s.split(",")[0])) - return np.array(rhos_all) - - return np.array(rhos) + run_npt(atoms, calc, output_fname) From fb2963db5e03ff73d4ffb433aa4267a01fbc6d53 Mon Sep 17 00:00:00 2001 From: Arn Date: Wed, 18 Mar 2026 18:21:02 +0000 Subject: [PATCH 21/29] fix --- .../calc_ethanol_water_density.py | 12 ++++++++---- .../calcs/liquids/ethanol_water_density/md_code.py | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index e75265e5e..3d7305262 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -30,7 +30,7 @@ @pytest.mark.very_slow @pytest.mark.parametrize("mlip", MODELS.items(), ids=list(MODELS.keys())) -def test_water_ethanol_density_curves(mlip: tuple[str, Any], composition) -> None: +def terge_water_ethanol_density_curves(mlip: tuple[str, Any], composition) -> None: """ Generate one density-curve case for a model and composition. @@ -100,14 +100,18 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: from ase import units from mace.calculators import mace_mp + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(message)s", + ) + calc = mace_mp( "data_old/mace-omat-0-small.model", dispersion=True, dispersion_cutoff=25 * units.Bohr, ) - rho = run_one_case( + run_one_case( "data/mix_xe_0.00.extxyz", calc, - output_fname="debug/", + output_fname="debug/whatever.traj", ) - print(rho) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py index 1cdb8b446..c6f816421 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py @@ -23,7 +23,7 @@ TIMESTEP = 1 * units.fs LOG_INTERVAL = 100 ATM = 1.01325 * units.bar -TEMPERATURE = 298.15 * units.K +TEMPERATURE = 298.15 LANGEVIN_FRICTION = 1 / (500 * units.fs) @@ -148,7 +148,7 @@ def run_one_case( MaxwellBoltzmannDistribution(atoms, temperature_K=TEMPERATURE) Stationary(atoms) ZeroRotation(atoms) - if os.path.exists(output_fname): + if not os.path.exists(output_fname): # NVT dyn = Langevin( atoms, From 30b374474344a0f50facfa947463e11fb9035d26 Mon Sep 17 00:00:00 2001 From: Arn Date: Thu, 19 Mar 2026 22:04:23 +0000 Subject: [PATCH 22/29] simplify --- .../ethanol_water_density/_analysis.py | 43 ----------- .../analyse_ethanol_water_density.py | 74 ++++++++++++++----- .../calc_ethanol_water_density.py | 4 +- 3 files changed, 57 insertions(+), 64 deletions(-) diff --git a/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py b/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py index 4728ef217..dca9fda17 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py @@ -32,49 +32,6 @@ def weight_to_mole_fraction(w): return n_e / (n_e + n_w) -def _rmse(a: np.ndarray, b: np.ndarray) -> float: - """ - Compute root-mean-square error between two arrays. - - Parameters - ---------- - a : numpy.ndarray - First array. - b : numpy.ndarray - Second array. - - Returns - ------- - float - Root-mean-square error. - """ - d = a - b - return float(np.sqrt(np.mean(d * d))) - - -def _interp_1d(x_src: np.ndarray, y_src: np.ndarray, x_tgt: np.ndarray) -> np.ndarray: - """ - Linearly interpolate onto target x values. - - Parameters - ---------- - x_src : numpy.ndarray - Source x grid. - y_src : numpy.ndarray - Source y values. - x_tgt : numpy.ndarray - Target x positions. - - Returns - ------- - numpy.ndarray - Interpolated y values at ``x_tgt``. - """ - if np.any(x_tgt < x_src.min() - 1e-12) or np.any(x_tgt > x_src.max() + 1e-12): - raise ValueError("Target x values fall outside reference interpolation range.") - return np.interp(x_tgt, x_src, y_src) - - def _excess_volume(x: np.ndarray, rhos: np.ndarray) -> np.ndarray: """ Compute excess volume given molar fraction and density respectively. diff --git a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py index e4f960be1..ed3486c2a 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py @@ -10,17 +10,17 @@ from ml_peg.analysis.liquids.ethanol_water_density._analysis import ( _excess_volume, - _interp_1d, _peak_x_quadratic, - _rmse, + weight_to_mole_fraction, ) from ml_peg.analysis.liquids.ethanol_water_density.io_tools import ( + CALC_PATH, + DATA_PATH, OUT_PATH, _read_model_curve, - read_ref_curve, ) from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import load_metrics_config +from ml_peg.analysis.utils.utils import load_metrics_config, rmse from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models @@ -31,7 +31,8 @@ DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( METRICS_CONFIG_PATH ) - +LOG_INTERVAL_PS = 0.1 +EQUILIB_TIME_PS = 500 OUT_PATH.mkdir(parents=True, exist_ok=True) @@ -46,13 +47,45 @@ def ref_curve() -> tuple[np.ndarray, np.ndarray]: tuple[numpy.ndarray, numpy.ndarray] Sorted mole fractions and reference densities. """ - x_ref, rho_ref = read_ref_curve() - x = np.asarray(x_ref, dtype=float) - rho = np.asarray(rho_ref, dtype=float) + ref_file = DATA_PATH / "densities_293.15.txt" + rho_ref = np.loadtxt(ref_file) + + n = len(rho_ref) + + # weight fraction grid + w = np.linspace(0.0, 1.0, n) + + # convert to mole fraction + x = weight_to_mole_fraction(w) + + return x, rho_ref + + +def compute_density(fname, density_col=13): + """ + Compute average density from NPT log file. + + Parameters + ---------- + fname + Path to the log file. + density_col + Which column the density numbers are in. - # Ensure monotonic x for interpolation - order = np.argsort(x) - return x[order], rho[order] + Returns + ------- + float + Average density in g/cm3. + """ + density_series = [] + with open(fname) as lines: + for line in lines: + items = line.strip().split() + if len(items) != 15: + continue + density_series.append(float(items[13])) + skip_frames = int(EQUILIB_TIME_PS / LOG_INTERVAL_PS) + return np.mean(density_series[skip_frames:]) @pytest.fixture @@ -67,7 +100,12 @@ def model_curves() -> dict[str, tuple[np.ndarray, np.ndarray]]: """ curves: dict[str, tuple[np.ndarray, np.ndarray]] = {} for model_name in MODELS: - xs, rhos = _read_model_curve(model_name) + model_dir = CALC_PATH / model_name + xs = [] + rhos = [] + for case_dir in model_dir.iterdir(): + rhos.append(compute_density(case_dir / f"{model_name}.log")) + xs.append(float(case_dir.name.split("_")[-1])) x = np.asarray(xs, dtype=float) rho = np.asarray(rhos, dtype=float) @@ -126,7 +164,7 @@ def densities_parity(ref_curve, model_curves) -> dict[str, list]: results: dict[str, list] = {"ref": []} | {m: [] for m in MODELS} - rho_ref_on_grid = _interp_1d(x_ref, rho_ref, x_grid) + rho_ref_on_grid = np.interp(x_grid, x_ref, rho_ref) results["ref"] = list(rho_ref_on_grid) for m in MODELS: @@ -134,7 +172,7 @@ def densities_parity(ref_curve, model_curves) -> dict[str, list]: # Interpolate model to x_grid if needed if len(x_m) != len(x_grid) or np.any(np.abs(x_m - x_grid) > 1e-12): # This assumes model spans the grid range; otherwise raise. - rho_m_on_grid = _interp_1d(x_m, rho_m, x_grid) + rho_m_on_grid = np.interp(x_grid, x_m, rho_m) else: rho_m_on_grid = rho_m results[m] = list(rho_m_on_grid) @@ -162,8 +200,8 @@ def rmse_density(ref_curve, model_curves) -> dict[str, float]: x_ref, rho_ref = ref_curve out: dict[str, float] = {} for m, (x_m, rho_m) in model_curves.items(): - rho_ref_m = _interp_1d(x_ref, rho_ref, x_m) - out[m] = _rmse(rho_m, rho_ref_m) + rho_ref_m = np.interp(x_m, x_ref, rho_ref) + out[m] = rmse(rho_m, rho_ref_m) return out @@ -188,12 +226,12 @@ def rmse_excess_volume(ref_curve, model_curves) -> dict[str, float]: out: dict[str, float] = {} for m, (x_m, rho_m) in model_curves.items(): - rho_ref_m = _interp_1d(x_ref, rho_ref, x_m) + rho_ref_m = np.interp(x_m, x_ref, rho_ref) ex_ref = _excess_volume(x_m, rho_ref_m) ex_m = _excess_volume(x_m, rho_m) - out[m] = _rmse(ex_m, ex_ref) + out[m] = rmse(ex_m, ex_ref) return out diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index 3d7305262..6e3f2d40d 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -30,7 +30,7 @@ @pytest.mark.very_slow @pytest.mark.parametrize("mlip", MODELS.items(), ids=list(MODELS.keys())) -def terge_water_ethanol_density_curves(mlip: tuple[str, Any], composition) -> None: +def test_water_ethanol_density_curves(mlip: tuple[str, Any]) -> None: """ Generate one density-curve case for a model and composition. @@ -38,8 +38,6 @@ def terge_water_ethanol_density_curves(mlip: tuple[str, Any], composition) -> No ---------- mlip : tuple[str, Any] Pair of model name and model object. - composition : Any - Composition case input. Returns ------- From f59da939cea8217e2e97c525682932a8539fb8fd Mon Sep 17 00:00:00 2001 From: Arn Date: Thu, 19 Mar 2026 22:36:16 +0000 Subject: [PATCH 23/29] simplify --- .../analyse_ethanol_water_density.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py index ed3486c2a..cb504e9e1 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py @@ -1,6 +1,5 @@ """Analyse ethanol-water density curves.""" -# TODO: remove hardcoded things? from __future__ import annotations from pathlib import Path @@ -17,7 +16,6 @@ CALC_PATH, DATA_PATH, OUT_PATH, - _read_model_curve, ) from ml_peg.analysis.utils.decorators import build_table, plot_parity from ml_peg.analysis.utils.utils import load_metrics_config, rmse @@ -123,10 +121,10 @@ def labels() -> list: list List of all calculated concentrations. """ - for model_name in MODELS: - labels_list, _ = _read_model_curve(model_name) - break - return labels_list + return sorted( + float(case_dir.name.split("_")[-1]) + for case_dir in (CALC_PATH / MODELS[0]).iterdir() + ) @pytest.fixture From 63a0548c3b07dedb33c42e675dc1f1d38f36931f Mon Sep 17 00:00:00 2001 From: Arn Date: Thu, 19 Mar 2026 22:42:38 +0000 Subject: [PATCH 24/29] put all analysis in 1 file --- .../ethanol_water_density/_analysis.py | 91 ------------ .../analyse_ethanol_water_density.py | 99 +++++++++++-- .../liquids/ethanol_water_density/io_tools.py | 131 ------------------ 3 files changed, 89 insertions(+), 232 deletions(-) delete mode 100644 ml_peg/analysis/liquids/ethanol_water_density/_analysis.py delete mode 100644 ml_peg/analysis/liquids/ethanol_water_density/io_tools.py diff --git a/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py b/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py deleted file mode 100644 index dca9fda17..000000000 --- a/ml_peg/analysis/liquids/ethanol_water_density/_analysis.py +++ /dev/null @@ -1,91 +0,0 @@ -"""Analyse ethanol-water density curves.""" - -from __future__ import annotations - -import numpy as np - -M_WATER = 18.01528 # g/mol -M_ETOH = 46.06844 # g/mol - -# Pick densities consistent with your reference conditions (T,P!) -# If your ref curve is at ~20°C, these are around: -RHO_WATER_PURE = 0.9982 # g/cm^3 -RHO_ETH_PURE = 0.7893 # g/cm^3 - - -def weight_to_mole_fraction(w): - r""" - Convert ethanol weight fraction to mole fraction. - - Parameters - ---------- - w : array-like - Ethanol weight fraction :math:`m_\mathrm{ethanol} / m_\mathrm{total}`. - - Returns - ------- - numpy.ndarray - Ethanol mole fraction. - """ - n_e = w / M_ETOH - n_w = (1 - w) / M_WATER - return n_e / (n_e + n_w) - - -def _excess_volume(x: np.ndarray, rhos: np.ndarray) -> np.ndarray: - """ - Compute excess volume given molar fraction and density respectively. - - Parameters - ---------- - x : numpy.ndarray - Composition grid (mol fraction). - rhos : numpy.ndarray - Density. - - Returns - ------- - numpy.ndarray - Excess values ``y - y_linear``. - """ - return (x * M_ETOH + (1 - x) * M_WATER) / rhos - ( - x * M_ETOH / rhos[-1] + (1 - x) * M_WATER / rhos[0] - ) - - -def _peak_x_quadratic(x: np.ndarray, y: np.ndarray) -> float: - """ - Estimate x position of the minimum by local quadratic fitting. - - Parameters - ---------- - x : numpy.ndarray - Composition grid. - y : numpy.ndarray - Property values. - - Returns - ------- - float - Estimated composition of the minimum. - """ - if len(x) < 3: - return float(x[int(np.argmin(y))]) - - i = int(np.argmin(y)) - if i == 0 or i == len(x) - 1: - return float(x[i]) - - # Fit a parabola to (i-1, i, i+1) - xs = x[i - 1 : i + 2] - ys = y[i - 1 : i + 2] - - # y = ax^2 + bx + c - a, b, _c = np.polyfit(xs, ys, deg=2) - if abs(a) < 1e-16: - return float(x[i]) - - xv = -b / (2.0 * a) - - # Clamp to local bracket so we don't get silly extrapolation - return float(np.clip(xv, xs.min(), xs.max())) diff --git a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py index cb504e9e1..7f59da463 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py @@ -7,21 +7,19 @@ import numpy as np import pytest -from ml_peg.analysis.liquids.ethanol_water_density._analysis import ( - _excess_volume, - _peak_x_quadratic, - weight_to_mole_fraction, -) -from ml_peg.analysis.liquids.ethanol_water_density.io_tools import ( - CALC_PATH, - DATA_PATH, - OUT_PATH, -) from ml_peg.analysis.utils.decorators import build_table, plot_parity from ml_peg.analysis.utils.utils import load_metrics_config, rmse +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models +CATEGORY = "liquids" +BENCHMARK = "ethanol_water_density" +CALC_PATH = CALCS_ROOT / CATEGORY / BENCHMARK / "outputs" +OUT_PATH = APP_ROOT / "data" / CATEGORY / BENCHMARK +DATA_PATH = CALCS_ROOT / CATEGORY / BENCHMARK / "data" + MODELS = get_model_names(current_models) MODEL_INDEX = {name: i for i, name in enumerate(MODELS)} # duplicate in calc @@ -29,12 +27,93 @@ DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( METRICS_CONFIG_PATH ) + +M_WATER = 18.01528 # g/mol +M_ETOH = 46.06844 # g/mol LOG_INTERVAL_PS = 0.1 EQUILIB_TIME_PS = 500 OUT_PATH.mkdir(parents=True, exist_ok=True) +def weight_to_mole_fraction(w): + r""" + Convert ethanol weight fraction to mole fraction. + + Parameters + ---------- + w : array-like + Ethanol weight fraction :math:`m_\mathrm{ethanol} / m_\mathrm{total}`. + + Returns + ------- + numpy.ndarray + Ethanol mole fraction. + """ + n_e = w / M_ETOH + n_w = (1 - w) / M_WATER + return n_e / (n_e + n_w) + + +def _excess_volume(x: np.ndarray, rhos: np.ndarray) -> np.ndarray: + """ + Compute excess volume given molar fraction and density respectively. + + Parameters + ---------- + x : numpy.ndarray + Composition grid (mol fraction). + rhos : numpy.ndarray + Density. + + Returns + ------- + numpy.ndarray + Excess values ``y - y_linear``. + """ + return (x * M_ETOH + (1 - x) * M_WATER) / rhos - ( + x * M_ETOH / rhos[-1] + (1 - x) * M_WATER / rhos[0] + ) + + +def _peak_x_quadratic(x: np.ndarray, y: np.ndarray) -> float: + """ + Estimate x position of the minimum by local quadratic fitting. + + Parameters + ---------- + x : numpy.ndarray + Composition grid. + y : numpy.ndarray + Property values. + + Returns + ------- + float + Estimated composition of the minimum. + """ + if len(x) < 3: + return float(x[int(np.argmin(y))]) + + i = int(np.argmin(y)) + if i == 0 or i == len(x) - 1: + return float(x[i]) + + # Fit a parabola to (i-1, i, i+1) + xs = x[i - 1 : i + 2] + ys = y[i - 1 : i + 2] + + # y = ax^2 + bx + c + a, b, _c = np.polyfit(xs, ys, deg=2) + if abs(a) < 1e-16: + return float(x[i]) + + xv = -b / (2.0 * a) + + # Clamp to local bracket so we don't get silly extrapolation + return float(np.clip(xv, xs.min(), xs.max())) + + @pytest.fixture(scope="session") def ref_curve() -> tuple[np.ndarray, np.ndarray]: """ diff --git a/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py b/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py deleted file mode 100644 index 116febdb8..000000000 --- a/ml_peg/analysis/liquids/ethanol_water_density/io_tools.py +++ /dev/null @@ -1,131 +0,0 @@ -"""I/O tools for analysis of ethanol-water densities.""" - -from __future__ import annotations - -import csv -import os -from pathlib import Path - -from matplotlib import pyplot as plt -import numpy as np - -from ml_peg.analysis.liquids.ethanol_water_density._analysis import ( - weight_to_mole_fraction, -) -from ml_peg.app import APP_ROOT -from ml_peg.calcs import CALCS_ROOT - -CATEGORY = "liquids" -BENCHMARK = "ethanol_water_density" -CALC_PATH = CALCS_ROOT / CATEGORY / BENCHMARK / "outputs" -OUT_PATH = APP_ROOT / "data" / CATEGORY / BENCHMARK -DATA_PATH = CALCS_ROOT / CATEGORY / BENCHMARK / "data" - - -def _debug_plot_enabled() -> bool: - """ - Return whether debug plots are enabled via environment variable. - - Returns - ------- - bool - ``True`` when ``DEBUG_PLOTS`` is set to a truthy value. - """ - # Turn on plots by: DEBUG_PLOTS=1 pytest ... - return os.environ.get("DEBUG_PLOTS", "0") not in ("0", "", "false", "False") - - -def _savefig(fig, outpath: Path) -> None: - """ - Save and close a Matplotlib figure. - - Parameters - ---------- - fig : matplotlib.figure.Figure - Figure object to save. - outpath : pathlib.Path - Output path. - """ - outpath.parent.mkdir(parents=True, exist_ok=True) - fig.tight_layout() - fig.savefig(outpath, dpi=200) - plt.close(fig) - - -def _read_model_curve(model_name: str) -> tuple[list[float], list[float]]: - """ - Read a model density curve by averaging per-case time series. - - Parameters - ---------- - model_name : str - Name of model output directory under calculation outputs. - - Returns - ------- - tuple[list[float], list[float]] - Mole-fraction values and corresponding mean densities. - """ - model_dir = CALC_PATH / model_name - xs: list[float] = [] - rhos: list[float] = [] - - for case_dir in sorted(model_dir.glob("x_ethanol_*")): - x_ethanol = float(case_dir.name.replace("x_ethanol_", "")) - - ts_path = case_dir / "density_timeseries.csv" - if not ts_path.exists(): - raise FileNotFoundError(f"Missing density time series: {ts_path}") - - rho_vals = [] - steps = [] - with ts_path.open(newline="") as f: - r = csv.DictReader(f) - for row in r: - steps.append(int(row["step"])) - rho_vals.append(float(row["rho_g_cm3"])) - - if not rho_vals: - raise ValueError(f"No density samples found in {ts_path}") - - rho_mean = float(np.mean(rho_vals[len(rho_vals) // 2 :])) - xs.append(x_ethanol) - rhos.append(rho_mean) - - if _debug_plot_enabled(): - fig, ax = plt.subplots() - ax.plot(steps, rho_vals) - ax.axhline(rho_mean, linestyle="--") - ax.set_title(f"{model_name} x={x_ethanol:.2f} density timeseries") - ax.set_xlabel("step") - ax.set_ylabel("rho / g cm$^{-3}$") - - _savefig( - fig, - OUT_PATH / "debug" / model_name / f"x_{x_ethanol:.2f}_timeseries.svg", - ) - - return xs, rhos - - -def read_ref_curve() -> tuple[list[float], list[float]]: - """ - Load the reference density curve and convert to mole fraction. - - Returns - ------- - tuple[list[float], list[float]] - Mole-fraction x-values and reference densities in g/cm^3. - """ - ref_file = DATA_PATH / "densities_293.15.txt" - rho_ref = np.loadtxt(ref_file) - - n = len(rho_ref) - - # weight fraction grid - w = np.linspace(0.0, 1.0, n) - - # convert to mole fraction - x = weight_to_mole_fraction(w) - - return list(x), list(rho_ref) From 342dc12bd06125d388a9f7c5837384b8b5003dd2 Mon Sep 17 00:00:00 2001 From: Arn Date: Thu, 19 Mar 2026 22:57:40 +0000 Subject: [PATCH 25/29] put all calc in 1 file --- .../ethanol_water_density/_compositions.py | 52 ----- .../ethanol_water_density/_io_tools.py | 186 --------------- .../calc_ethanol_water_density.py | 211 +++++++++++++++++- .../liquids/ethanol_water_density/md_code.py | 162 -------------- 4 files changed, 202 insertions(+), 409 deletions(-) delete mode 100644 ml_peg/calcs/liquids/ethanol_water_density/_compositions.py delete mode 100644 ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py delete mode 100644 ml_peg/calcs/liquids/ethanol_water_density/md_code.py diff --git a/ml_peg/calcs/liquids/ethanol_water_density/_compositions.py b/ml_peg/calcs/liquids/ethanol_water_density/_compositions.py deleted file mode 100644 index 290bad22b..000000000 --- a/ml_peg/calcs/liquids/ethanol_water_density/_compositions.py +++ /dev/null @@ -1,52 +0,0 @@ -"""Load composition data.""" - -from __future__ import annotations - -import csv -from dataclasses import dataclass -from pathlib import Path - -BENCH_ROOT = Path(__file__).resolve().parent -DATA_PATH = BENCH_ROOT / "data" - - -@dataclass(frozen=True) -class CompositionCase: - """ - Map composition value to structure filename. - - Attributes - ---------- - x_ethanol : float - Ethanol mole fraction for the case. - filename : str - Structure filename associated with the composition. - """ - - x_ethanol: float - filename: str - - -def load_compositions() -> list[CompositionCase]: - """ - Load composition grid from ``compositions.csv``. - - Returns - ------- - list[CompositionCase] - Parsed composition cases ordered as in the CSV file. - """ - comps_file = DATA_PATH / "compositions.csv" - cases: list[CompositionCase] = [] - with comps_file.open(newline="") as f: - reader = csv.DictReader(f) - for row in reader: - cases.append( - CompositionCase( - x_ethanol=float(row["x_ethanol"]), - filename=row["filename"], - ) - ) - if not cases: - raise RuntimeError("No compositions found in compositions.csv") - return cases diff --git a/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py b/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py deleted file mode 100644 index a974ead78..000000000 --- a/ml_peg/calcs/liquids/ethanol_water_density/_io_tools.py +++ /dev/null @@ -1,186 +0,0 @@ -"""I/O tools for calculations.""" - -from __future__ import annotations - -from collections.abc import Iterable -import csv -from pathlib import Path - - -def write_density_timeseries_checkpointed( - ts_path: Path, - rho_series: Iterable[float], - *, - min_match_fraction: float = 0.8, - do_not_raise: bool = False, -) -> None: - """ - Write ``density_timeseries.csv`` with checkpoint validation. - - Parameters - ---------- - ts_path : pathlib.Path - Output CSV path. - rho_series : collections.abc.Iterable[float] - Density samples in g/cm^3. - min_match_fraction : float, optional - Required fraction of matching values versus existing checkpoint. - do_not_raise : bool, optional - If ``True``, skip assertion failures when checkpoint mismatches. - - Returns - ------- - None - This function writes the file in-place. - """ - rho_series = list(rho_series) - - # ------------------------- - # 1) Validate existing file - # ------------------------- - if ts_path.exists(): - old_vals: list[float] = [] - - try: - with ts_path.open() as f: - r = csv.reader(f) - next(r, None) # header - for row in r: - if len(row) >= 2: - old_vals.append(float(row[1])) - except Exception: - old_vals = [] - - if old_vals: - n = min(len(old_vals), len(rho_series)) - - matches = sum(abs(old_vals[i] - rho_series[i]) < 1e-6 for i in range(n)) - - frac = matches / n if n else 0.0 - - if frac < min_match_fraction and not do_not_raise: - raise AssertionError( - f"{ts_path}: only {frac:.1%} of checkpoint values match " - f"(expected ≥ {min_match_fraction:.0%}). " - "run_one_case resume likely broken." - ) - - # ------------------------- - # 2) Always rewrite file - # ------------------------- - with ts_path.open("w", newline="") as f: - w = csv.writer(f) - w.writerow(["step", "rho_g_cm3"]) - for i, rho in enumerate(rho_series): - w.writerow([i, f"{rho:.8f}"]) - - -class DensityTimeseriesLogger: - """ - Streaming CSV logger for density time series. - - Parameters - ---------- - path : pathlib.Path - Output CSV file path. - overwrite : bool, optional - Whether to delete pre-existing output when opening. - """ - - def __init__(self, path: Path, *, overwrite: bool = True): - """ - Initialize the logger. - - Parameters - ---------- - path : pathlib.Path - Path to CSV output file. - overwrite : bool, optional - If ``True``, delete existing file when opening. - """ - self.path = Path(path) - self.overwrite = overwrite - self._f = None - self._writer = None - self._step = 0 - - def __enter__(self): - """ - Open the output file and return logger instance. - - Returns - ------- - DensityTimeseriesLogger - Logger ready to write rows. - """ - if self.overwrite and self.path.exists(): - mode = "w" - self.path.unlink() - elif not self.path.exists(): - mode = "w" - else: - mode = "a" - # If appending, recover last step index - try: - with self.path.open("r", newline="") as f: - last_step = -1 - for row in csv.reader(f): - if not row or row[0] == "step": - continue - try: - last_step = int(row[0]) - except ValueError: - continue - self._step = last_step + 1 - except Exception: - # If file is corrupted or empty, just continue from 0 - self._step = 0 - - self._f = self.path.open(mode, newline="") - self._writer = csv.writer(self._f) - - # Only write header if creating new file - if mode == "w": - self._writer.writerow(["step", "rho_g_cm3"]) - self._f.flush() - - return self - - def __exit__(self, exc_type, exc, tb): - """ - Close the output file. - - Parameters - ---------- - exc_type : type | None - Exception type, if raised inside context. - exc : BaseException | None - Exception instance, if raised inside context. - tb : traceback | None - Traceback, if raised inside context. - - Returns - ------- - None - This method performs cleanup only. - """ - if self._f: - self._f.close() - - def write(self, rho: float): - """ - Write one density value to the CSV file. - - Parameters - ---------- - rho : float - Density in g/cm^3 for the current sample. - - Returns - ------- - None - This method appends one row to disk. - """ - self._writer.writerow([self._step, f"{rho:.8f}"]) - self._f.flush() - self._step += 1 diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index 6e3f2d40d..5ff34f0a7 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -2,32 +2,225 @@ from __future__ import annotations +import csv +from dataclasses import dataclass import logging import os +from pathlib import Path +import time from typing import Any +from ase import units +from ase.io import Trajectory, read +from ase.md import Langevin +from ase.md.nose_hoover_chain import IsotropicMTKNPT +from ase.md.velocitydistribution import ( + MaxwellBoltzmannDistribution, + Stationary, + ZeroRotation, +) import pytest -from ml_peg.calcs.liquids.ethanol_water_density._compositions import ( - BENCH_ROOT, - DATA_PATH, - load_compositions, -) -from ml_peg.calcs.liquids.ethanol_water_density.md_code import run_one_case from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models +BENCH_ROOT = Path(__file__).resolve().parent OUT_PATH = BENCH_ROOT / "outputs" MODELS = load_models(current_models) MODEL_INDEX = {name: i for i, name in enumerate(MODELS)} -FAKE_DATA = os.getenv("FAKE_DENSITY_DATA", "") == "1" -CONTINUE_RUNNING = os.getenv("CONTINUE_RUNNING", "") == "1" -# IMPORTANT: create the list once for parametrization +DATA_PATH = BENCH_ROOT / "data" + +NUM_NPT_STEPS = 1000_000 +NUM_NVT_STEPS = 50_000 +TIMESTEP = 1 * units.fs +LOG_INTERVAL = 100 +ATM = 1.01325 * units.bar +TEMPERATURE = 298.15 +LANGEVIN_FRICTION = 1 / (500 * units.fs) + + +@dataclass(frozen=True) +class CompositionCase: + """ + Map composition value to structure filename. + + Attributes + ---------- + x_ethanol : float + Ethanol mole fraction for the case. + filename : str + Structure filename associated with the composition. + """ + + x_ethanol: float + filename: str + + +def load_compositions() -> list[CompositionCase]: + """ + Load composition grid from ``compositions.csv``. + + Returns + ------- + list[CompositionCase] + Parsed composition cases ordered as in the CSV file. + """ + comps_file = DATA_PATH / "compositions.csv" + cases: list[CompositionCase] = [] + with comps_file.open(newline="") as f: + reader = csv.DictReader(f) + for row in reader: + cases.append( + CompositionCase( + x_ethanol=float(row["x_ethanol"]), + filename=row["filename"], + ) + ) + if not cases: + raise RuntimeError("No compositions found in compositions.csv") + return cases + + COMPOSITIONS = load_compositions() +def run_one_case( + struct_path: str | Path, + calc: Any, + output_fname: str | Path, +): + """ + Run the full MD workflow for one composition case. + + Parameters + ---------- + struct_path : str | pathlib.Path + Input structure path. + calc : Any + ASE-compatible calculator. + output_fname : str | pathlib.Path + The output file path. + + Returns + ------- + collections.abc.Iterable[float] + Density time series in g/cm^3. + """ + atoms = read(struct_path) + atoms.set_pbc(True) + atoms.wrap() + atoms.calc = calc + + # velocities + MaxwellBoltzmannDistribution(atoms, temperature_K=TEMPERATURE) + Stationary(atoms) + ZeroRotation(atoms) + if not os.path.exists(output_fname): + # NVT + dyn = Langevin( + atoms, + timestep=TIMESTEP, + temperature_K=TEMPERATURE, + friction=LANGEVIN_FRICTION, + ) + dyn.run(NUM_NVT_STEPS) + + # NPT + run_npt(atoms, calc, output_fname) + + +def get_density_g_cm3(atoms): + """ + Return density in g/cm^3. + + Parameters + ---------- + atoms : ase.Atoms + Atomic configuration with periodic cell volume. + + Returns + ------- + float + Density in g/cm^3. + """ + amu_to_kg = 1.66053906660e-27 + v_a3 = atoms.get_volume() + v_m3 = v_a3 * 1e-30 + m_kg = atoms.get_masses().sum() * amu_to_kg + rho_kg_m3 = m_kg / v_m3 + return rho_kg_m3 / 1000.0 + + +def log_md(dyn, start_time): + """ + Log molecular dynamics simulation. + + Parameters + ---------- + dyn + ASE molecular dynamics object. + start_time + Real time of the simulation start, in seconds. + """ + current_time = time.time() - start_time + energy = dyn.atoms.get_potential_energy() + density = get_density_g_cm3(dyn.atoms) + temperature = dyn.atoms.get_temperature() + t = dyn.get_time() / (1000 * units.fs) + logging.info( + f"""t: {t:>8.3f} ps\ + Walltime: {current_time:>10.3f} s\ + T: {temperature:.1f} K\ + Epot: {energy:.2f} eV\ + density: {density:.3f} g/cm^3\ + """ + ) + + +def run_npt(atoms, calc, output_fname): + """ + Run NPT molecular dynamics using the isotropic MTK barostat. + + Parameters + ---------- + atoms + ASE Atoms of the system. + calc + ASE Calculator. + output_fname + File name to save the trajectory to. + """ + if os.path.exists(output_fname): + try: + traj = Trajectory(output_fname) + atoms = traj[-1] + nsteps = (len(traj) - 1) * LOG_INTERVAL + except Exception as e: + print(e) + nsteps = 0 + else: + nsteps = 0 + + atoms.calc = calc + + dyn = IsotropicMTKNPT( + atoms=atoms, + timestep=TIMESTEP, + temperature_K=TEMPERATURE, + pressure_au=ATM, + tdamp=50 * units.fs, + pdamp=500 * units.fs, + trajectory=output_fname, + loginterval=LOG_INTERVAL, + append_trajectory=True, + ) + dyn.nsteps = nsteps + dyn.attach(log_md, interval=LOG_INTERVAL, dyn=dyn, start_time=time.time()) + dyn.run(steps=NUM_NPT_STEPS) + + @pytest.mark.very_slow @pytest.mark.parametrize("mlip", MODELS.items(), ids=list(MODELS.keys())) def test_water_ethanol_density_curves(mlip: tuple[str, Any]) -> None: diff --git a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py b/ml_peg/calcs/liquids/ethanol_water_density/md_code.py deleted file mode 100644 index c6f816421..000000000 --- a/ml_peg/calcs/liquids/ethanol_water_density/md_code.py +++ /dev/null @@ -1,162 +0,0 @@ -"""Code for molecular-dynamics simulation workflows.""" - -from __future__ import annotations - -import logging -import os -from pathlib import Path -import time -from typing import Any - -from ase import units -from ase.io import Trajectory, read -from ase.md import Langevin -from ase.md.nose_hoover_chain import IsotropicMTKNPT -from ase.md.velocitydistribution import ( - MaxwellBoltzmannDistribution, - Stationary, - ZeroRotation, -) - -NUM_NPT_STEPS = 1000_000 -NUM_NVT_STEPS = 50_000 -TIMESTEP = 1 * units.fs -LOG_INTERVAL = 100 -ATM = 1.01325 * units.bar -TEMPERATURE = 298.15 -LANGEVIN_FRICTION = 1 / (500 * units.fs) - - -def get_density_g_cm3(atoms): - """ - Return density in g/cm^3. - - Parameters - ---------- - atoms : ase.Atoms - Atomic configuration with periodic cell volume. - - Returns - ------- - float - Density in g/cm^3. - """ - amu_to_kg = 1.66053906660e-27 - v_a3 = atoms.get_volume() - v_m3 = v_a3 * 1e-30 - m_kg = atoms.get_masses().sum() * amu_to_kg - rho_kg_m3 = m_kg / v_m3 - return rho_kg_m3 / 1000.0 - - -def log_md(dyn, start_time): - """ - Log molecular dynamics simulation. - - Parameters - ---------- - dyn - ASE molecular dynamics object. - start_time - Real time of the simulation start, in seconds. - """ - current_time = time.time() - start_time - energy = dyn.atoms.get_potential_energy() - density = get_density_g_cm3(dyn.atoms) - temperature = dyn.atoms.get_temperature() - t = dyn.get_time() / (1000 * units.fs) - logging.info( - f"""t: {t:>8.3f} ps\ - Walltime: {current_time:>10.3f} s\ - T: {temperature:.1f} K\ - Epot: {energy:.2f} eV\ - density: {density:.3f} g/cm^3\ - """ - ) - - -def run_npt(atoms, calc, output_fname): - """ - Run NPT molecular dynamics using the isotropic MTK barostat. - - Parameters - ---------- - atoms - ASE Atoms of the system. - calc - ASE Calculator. - output_fname - File name to save the trajectory to. - """ - if os.path.exists(output_fname): - try: - traj = Trajectory(output_fname) - atoms = traj[-1] - nsteps = (len(traj) - 1) * LOG_INTERVAL - except Exception as e: - print(e) - nsteps = 0 - else: - nsteps = 0 - - atoms.calc = calc - - dyn = IsotropicMTKNPT( - atoms=atoms, - timestep=TIMESTEP, - temperature_K=TEMPERATURE, - pressure_au=ATM, - tdamp=50 * units.fs, - pdamp=500 * units.fs, - trajectory=output_fname, - loginterval=LOG_INTERVAL, - append_trajectory=True, - ) - dyn.nsteps = nsteps - dyn.attach(log_md, interval=LOG_INTERVAL, dyn=dyn, start_time=time.time()) - dyn.run(steps=NUM_NPT_STEPS) - - -def run_one_case( - struct_path: str | Path, - calc: Any, - output_fname: str | Path, -): - """ - Run the full MD workflow for one composition case. - - Parameters - ---------- - struct_path : str | pathlib.Path - Input structure path. - calc : Any - ASE-compatible calculator. - output_fname : str | pathlib.Path - The output file path. - - Returns - ------- - collections.abc.Iterable[float] - Density time series in g/cm^3. - """ - atoms = read(struct_path) - atoms.set_pbc(True) - atoms.wrap() - atoms.calc = calc - - # velocities - MaxwellBoltzmannDistribution(atoms, temperature_K=TEMPERATURE) - Stationary(atoms) - ZeroRotation(atoms) - if not os.path.exists(output_fname): - # NVT - dyn = Langevin( - atoms, - timestep=TIMESTEP, - temperature_K=TEMPERATURE, - friction=LANGEVIN_FRICTION, - ) - dyn.run(NUM_NVT_STEPS) - - # NPT - run_npt(atoms, calc, output_fname) From 78dd58cebd9f574e986b45ac5e311857c62f7efc Mon Sep 17 00:00:00 2001 From: Arn Date: Fri, 20 Mar 2026 12:04:04 +0000 Subject: [PATCH 26/29] remove ifmain --- .../calc_ethanol_water_density.py | 22 ------------------- 1 file changed, 22 deletions(-) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index 5ff34f0a7..309bceeb9 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -284,25 +284,3 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: force=True, ) run_one_case(struct_path, calc, case_dir / f"{model_name}.traj") - - -if __name__ == "__main__": # TODO: delete this - # run a very small simulation to see if it does something reasonable - from ase import units - from mace.calculators import mace_mp - - logging.basicConfig( - level=logging.INFO, - format="%(asctime)s %(levelname)s %(message)s", - ) - - calc = mace_mp( - "data_old/mace-omat-0-small.model", - dispersion=True, - dispersion_cutoff=25 * units.Bohr, - ) - run_one_case( - "data/mix_xe_0.00.extxyz", - calc, - output_fname="debug/whatever.traj", - ) From c9a27d68de9746cbfaba70aadc6279d786262585 Mon Sep 17 00:00:00 2001 From: Arn De Moor <86566033+arnon-1@users.noreply.github.com> Date: Fri, 20 Mar 2026 12:18:32 +0000 Subject: [PATCH 27/29] Apply suggestions from code review Co-authored-by: Joseph Hart <92541539+joehart2001@users.noreply.github.com> --- .../calc_ethanol_water_density.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index 309bceeb9..8b268638b 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -21,11 +21,11 @@ ) import pytest +from ml_peg.calcs.utils.utils import download_s3_data from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models -BENCH_ROOT = Path(__file__).resolve().parent -OUT_PATH = BENCH_ROOT / "outputs" +OUT_PATH = Path(__file__).parent / "outputs" MODELS = load_models(current_models) MODEL_INDEX = {name: i for i, name in enumerate(MODELS)} @@ -265,9 +265,15 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: calc = model.get_calculator() calc = model.add_d3_calculator(calc) - # TODO: the data downloading thing here + data_dir = ( + download_s3_data( + key="inputs/liquids/ethanol_water_density/ethanol_water_density.zip", + filename="ethanol_water_density.zip", + ) + / "ethanol_water_density" + ) - struct_path = DATA_PATH / case.filename + struct_path = data_dir / case.filename if not struct_path.exists(): raise FileNotFoundError( f"Missing structure for x={case.x_ethanol}: {struct_path}" From f91a26b96ab8c8f96ec45410816106a764cacdca Mon Sep 17 00:00:00 2001 From: Arn Date: Fri, 20 Mar 2026 14:37:26 +0000 Subject: [PATCH 28/29] fix --- .../calc_ethanol_water_density.py | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py index 8b268638b..ee00f5fee 100644 --- a/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py +++ b/ml_peg/calcs/liquids/ethanol_water_density/calc_ethanol_water_density.py @@ -30,8 +30,6 @@ MODELS = load_models(current_models) MODEL_INDEX = {name: i for i, name in enumerate(MODELS)} -DATA_PATH = BENCH_ROOT / "data" - NUM_NPT_STEPS = 1000_000 NUM_NVT_STEPS = 50_000 TIMESTEP = 1 * units.fs @@ -58,16 +56,21 @@ class CompositionCase: filename: str -def load_compositions() -> list[CompositionCase]: +def load_compositions(data_path) -> list[CompositionCase]: """ Load composition grid from ``compositions.csv``. + Parameters + ---------- + data_path : pathlib.Path + Path to the folder containing ``compositions.csv``. + Returns ------- list[CompositionCase] Parsed composition cases ordered as in the CSV file. """ - comps_file = DATA_PATH / "compositions.csv" + comps_file = data_path / "compositions.csv" cases: list[CompositionCase] = [] with comps_file.open(newline="") as f: reader = csv.DictReader(f) @@ -83,9 +86,6 @@ def load_compositions() -> list[CompositionCase]: return cases -COMPOSITIONS = load_compositions() - - def run_one_case( struct_path: str | Path, calc: Any, @@ -237,11 +237,18 @@ def test_water_ethanol_density_curves(mlip: tuple[str, Any]) -> None: None This test writes output files for a single case. """ - for case in COMPOSITIONS: - water_ethanol_density_curve_one_case(mlip, case) + data_dir = ( + download_s3_data( + key="inputs/liquids/ethanol_water_density/ethanol_water_density.zip", + filename="ethanol_water_density.zip", + ) + / "ethanol_water_density" + ) + for case in load_compositions(data_dir): + water_ethanol_density_curve_one_case(mlip, case, data_dir) -def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: +def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case, data_dir) -> None: """ Run one MD simulation case and write its density time series. @@ -251,6 +258,8 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: Pair of model name and model object. case : Any Composition case containing ``x_ethanol`` and ``filename``. + data_dir : str | pathlib.Path + Path to the data file. Returns ------- @@ -265,14 +274,6 @@ def water_ethanol_density_curve_one_case(mlip: tuple[str, Any], case) -> None: calc = model.get_calculator() calc = model.add_d3_calculator(calc) - data_dir = ( - download_s3_data( - key="inputs/liquids/ethanol_water_density/ethanol_water_density.zip", - filename="ethanol_water_density.zip", - ) - / "ethanol_water_density" - ) - struct_path = data_dir / case.filename if not struct_path.exists(): raise FileNotFoundError( From 08d154cae61aea3c99d03775b62d3438fa7a7bca Mon Sep 17 00:00:00 2001 From: Arn Date: Fri, 20 Mar 2026 15:12:31 +0000 Subject: [PATCH 29/29] download data --- .../analyse_ethanol_water_density.py | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py index 7f59da463..965bd8d0d 100644 --- a/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py +++ b/ml_peg/analysis/liquids/ethanol_water_density/analyse_ethanol_water_density.py @@ -11,6 +11,7 @@ from ml_peg.analysis.utils.utils import load_metrics_config, rmse from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT +from ml_peg.calcs.utils.utils import download_s3_data from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models @@ -18,7 +19,6 @@ BENCHMARK = "ethanol_water_density" CALC_PATH = CALCS_ROOT / CATEGORY / BENCHMARK / "outputs" OUT_PATH = APP_ROOT / "data" / CATEGORY / BENCHMARK -DATA_PATH = CALCS_ROOT / CATEGORY / BENCHMARK / "data" MODELS = get_model_names(current_models) MODEL_INDEX = {name: i for i, name in enumerate(MODELS)} # duplicate in calc @@ -124,7 +124,14 @@ def ref_curve() -> tuple[np.ndarray, np.ndarray]: tuple[numpy.ndarray, numpy.ndarray] Sorted mole fractions and reference densities. """ - ref_file = DATA_PATH / "densities_293.15.txt" + data_dir = ( + download_s3_data( + key="inputs/liquids/ethanol_water_density/ethanol_water_density.zip", + filename="ethanol_water_density.zip", + ) + / "ethanol_water_density" + ) + ref_file = data_dir / "densities_293.15.txt" rho_ref = np.loadtxt(ref_file) n = len(rho_ref) @@ -412,14 +419,4 @@ def test_ethanol_water_density( None The test validates fixture execution and writes artifacts. """ - print( - MODEL_INDEX - ) # TODO: these print statements may be useful for debugging, but should I remove? - print( - { - key0: {MODEL_INDEX[name]: value for name, value in value0.items()} - for key0, value0 in metrics.items() - } - ) - print(densities_parity) return