diff --git a/docs/source/user_guide/benchmarks/index.rst b/docs/source/user_guide/benchmarks/index.rst index ad3c82f96..8e907ce78 100644 --- a/docs/source/user_guide/benchmarks/index.rst +++ b/docs/source/user_guide/benchmarks/index.rst @@ -16,3 +16,4 @@ Benchmarks non_covalent_interactions tm_complexes conformers + molecular_dynamics diff --git a/docs/source/user_guide/benchmarks/molecular_dynamics.rst b/docs/source/user_guide/benchmarks/molecular_dynamics.rst new file mode 100644 index 000000000..1b7248cec --- /dev/null +++ b/docs/source/user_guide/benchmarks/molecular_dynamics.rst @@ -0,0 +1,40 @@ +================== +Molecular dynamics +================== + +Liquid densities +================ + +Summary +------- + +Performance in predicting densities for 61 organic liquids, each system consisting of +about 1000 atoms. The dataset covers aliphatic, aromatic molecules, as well as different +functional groups and halogenated molecules. + +Metrics +------- + +1. Density error + +For each system, the density is calculated by taking the average density of an NPT molecular +dynamics run. The initial part of the simulation, here 500 ps, is omitted from the density +calculation. This is compared to the reference density, obtained from experiment. + +Computational cost +------------------ + +Low: tests are likely to take several days to run on GPU. + +Data availability +----------------- + +Input structures: + +* Weber et al., Efficient Long-Range Machine Learning Force Fields for + Liquid and Materials Properties. + arXiv:2505.06462 [physics.chem-ph] + +Reference data: + +* Experiment diff --git a/ml_peg/analysis/molecular_dynamics/liquid_densities/analyse_liquid_densities.py b/ml_peg/analysis/molecular_dynamics/liquid_densities/analyse_liquid_densities.py new file mode 100644 index 000000000..8087931a9 --- /dev/null +++ b/ml_peg/analysis/molecular_dynamics/liquid_densities/analyse_liquid_densities.py @@ -0,0 +1,176 @@ +"""Analyse the liquid densities benchmark.""" + +from __future__ import annotations + +from pathlib import Path + +from ase.io import Trajectory, write +import numpy as np +import pytest + +from ml_peg.analysis.utils.decorators import build_table, plot_parity +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) +D3_MODEL_NAMES = build_dispersion_name_map(MODELS) + + +LOG_INTERVAL_PS = 0.1 +EQUILIB_TIME_PS = 500 + +CALC_PATH = CALCS_ROOT / "molecular_dynamics" / "liquid_densities" / "outputs" +OUT_PATH = APP_ROOT / "data" / "molecular_dynamics" / "liquid_densities" + + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +def labels() -> list: + """ + Get list of system names. + + Returns + ------- + list + List of all system names. + """ + for model_name in MODELS: + labels = [path.stem for path in (CALC_PATH / model_name).glob("*.log")] + return labels + + +def compute_density(fname, density_col=13): + """ + Compute average density from NPT log file. + + Parameters + ---------- + fname + Path to the log file. + density_col + Which column the density numbers are in. + + Returns + ------- + float + Average density in g/cm3. + """ + density_series = [] + with open(fname) as lines: + for line in lines: + items = line.strip().split() + if len(items) != 15: + continue + density_series.append(float(items[13])) + skip_frames = int(EQUILIB_TIME_PS / LOG_INTERVAL_PS) + return np.mean(density_series[skip_frames:]) + + +@pytest.fixture +@plot_parity( + filename=OUT_PATH / "figure_liquid_densities.json", + title="Densities", + x_label="Predicted density / kcal/mol", + y_label="Reference density / kcal/mol", + hoverdata={ + "Labels": labels(), + }, +) +def liquid_densities() -> dict[str, list]: + """ + Get liquid densities for all systems. + + Returns + ------- + dict[str, list] + Dictionary of all reference and predicted densities. + """ + results = {"ref": []} | {mlip: [] for mlip in MODELS} + ref_stored = False + + for model_name in MODELS: + for label in labels(): + atoms = Trajectory(CALC_PATH / model_name / f"{label}.traj")[-1] + + results[model_name].append( + compute_density(CALC_PATH / model_name / f"{label}.log") + ) + if not ref_stored: + results["ref"].append(atoms.info["exp_density"]) + + # Write structures for app + structs_dir = OUT_PATH / model_name + structs_dir.mkdir(parents=True, exist_ok=True) + write(structs_dir / f"{label}.xyz", atoms) + ref_stored = True + return results + + +@pytest.fixture +def get_mae(liquid_densities) -> dict[str, float]: + """ + Get mean absolute error for liquid densities. + + Parameters + ---------- + liquid_densities + Dictionary of reference and predicted liquid densities. + + Returns + ------- + dict[str, float] + Dictionary of predicted liquid densities errors for all models. + """ + results = {} + for model_name in MODELS: + results[model_name] = mae(liquid_densities["ref"], liquid_densities[model_name]) + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "liquid_densities_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + mlip_name_map=D3_MODEL_NAMES, +) +def metrics(get_mae: dict[str, float]) -> dict[str, dict]: + """ + Get all metrics. + + Parameters + ---------- + get_mae + Mean absolute errors for all models. + + Returns + ------- + dict[str, dict] + Metric names and values for all models. + """ + return { + "MAE": get_mae, + } + + +def test_liquid_densities(metrics: dict[str, dict]) -> None: + """ + Run liquid densities test. + + Parameters + ---------- + metrics + All new benchmark metric names and dictionary of values for each model. + """ + return diff --git a/ml_peg/analysis/molecular_dynamics/liquid_densities/metrics.yml b/ml_peg/analysis/molecular_dynamics/liquid_densities/metrics.yml new file mode 100644 index 000000000..7b5051980 --- /dev/null +++ b/ml_peg/analysis/molecular_dynamics/liquid_densities/metrics.yml @@ -0,0 +1,7 @@ +metrics: + MAE: + good: 0.0 + bad: 0.4 + unit: g/cm^3 + tooltip: Mean Absolute Error for all systems + level_of_theory: Experimental density diff --git a/ml_peg/app/molecular_dynamics/liquid_densities/app_liquid_densities.py b/ml_peg/app/molecular_dynamics/liquid_densities/app_liquid_densities.py new file mode 100644 index 000000000..d3bb19bdb --- /dev/null +++ b/ml_peg/app/molecular_dynamics/liquid_densities/app_liquid_densities.py @@ -0,0 +1,87 @@ +"""Run liquid densities app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import ( + plot_from_table_column, + struct_from_scatter, +) +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "DipCONFS" +DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular_dynamics.html#liquid_densities" +DATA_PATH = APP_ROOT / "data" / "molecular_dynamics" / "liquid_densities" + + +class LiquidDensitiesApp(BaseApp): + """Liquid densities benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter = read_plot( + DATA_PATH / "figure_liquid_densities.json", + id=f"{BENCHMARK_NAME}-figure", + ) + + model_dir = DATA_PATH / MODELS[0] + if model_dir.exists(): + labels = sorted([f.stem for f in model_dir.glob("*.xyz")]) + structs = [ + f"assets/molecular_dynamics/liquid_densities/{MODELS[0]}/{label}.xyz" + for label in labels + ] + else: + structs = [] + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={"MAE": scatter}, + ) + + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="struct", + ) + + +def get_app() -> LiquidDensitiesApp: + """ + Get liquid densities benchmark app layout and callback registration. + + Returns + ------- + LiquidDensitiesApp + Benchmark layout and callback registration. + """ + return LiquidDensitiesApp( + name=BENCHMARK_NAME, + description=( + "Performance in predicting organic liquid densities." + "Reference data is experimental densities." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "liquid_densities_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + ], + ) + + +if __name__ == "__main__": + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + benchmark_app = get_app() + full_app.layout = benchmark_app.layout + benchmark_app.register_callbacks() + full_app.run(port=8063, debug=True) diff --git a/ml_peg/calcs/molecular_dynamics/liquid_densities/calc_liquid_densities.py b/ml_peg/calcs/molecular_dynamics/liquid_densities/calc_liquid_densities.py new file mode 100644 index 000000000..3c571e54d --- /dev/null +++ b/ml_peg/calcs/molecular_dynamics/liquid_densities/calc_liquid_densities.py @@ -0,0 +1,175 @@ +""" +Calculate densities using NPT of 63 liquids. + +Liquids include 62 from arXiv:2505.06462 [physics.chem-ph], +in addition to 1-octanol. +""" + +from __future__ import annotations + +import logging +import os +from pathlib import Path +import time +from typing import Any + +from ase import Atoms, units +from ase.io import Trajectory, read +from ase.md.nose_hoover_chain import IsotropicMTKNPT +import pytest + +from ml_peg.calcs.utils.utils import download_s3_data +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +KCAL_TO_EV = units.kcal / units.mol + +OUT_PATH = Path(__file__).parent / "outputs" + +AU_TO_G_CM3 = 1e24 / units.mol +NUM_MD_STEPS = 1000_000 +TIMESTEP = 1 * units.fs +LOG_INTERVAL = 100 +ATM = 1.01325 * units.bar + + +def get_density_g_cm3(atoms: Atoms): + """ + Get the density of the system in g/cm^3. + + Parameters + ---------- + atoms + ASE.Atoms object of the periodic system. + + Returns + ------- + float + Density in g/cm^3. + """ + mass = atoms.get_masses().sum() + volume = atoms.get_volume() + return AU_TO_G_CM3 * mass / volume + + +def log_md(dyn, start_time): + """ + Log molecular dynamics simulation. + + Parameters + ---------- + dyn + ASE molecular dynamics object. + start_time + Real time of the simulation start, in seconds. + """ + current_time = time.time() - start_time + energy = dyn.atoms.get_potential_energy() + density = get_density_g_cm3(dyn.atoms) + temperature = dyn.atoms.get_temperature() + t = dyn.get_time() / (1000 * units.fs) + logging.info( + f"""t: {t:>8.3f} ps\ + Walltime: {current_time:>10.3f} s\ + T: {temperature:.1f} K\ + Epot: {energy:.2f} eV\ + density: {density:.3f} g/cm^3\ + """ + ) + + +def run_npt(atoms, calc, output_fname): + """ + Run NPT molecular dynamics using the isotropic MTK barostat. + + Parameters + ---------- + atoms + ASE Atoms of the system. + calc + ASE Calculator. + output_fname + File name to save the trajectory to. + """ + if os.path.exists(output_fname): + try: + traj = Trajectory(output_fname) + atoms = traj[-1] + nsteps = (len(traj) - 1) * LOG_INTERVAL + except Exception as e: + print(e) + nsteps = 0 + else: + nsteps = 0 + + atoms.calc = calc + + dyn = IsotropicMTKNPT( + atoms=atoms, + timestep=TIMESTEP, + temperature_K=atoms.info["exp_temperature"], + pressure_au=ATM, + tdamp=50 * units.fs, + pdamp=500 * units.fs, + trajectory=output_fname, + loginterval=LOG_INTERVAL, + append_trajectory=True, + ) + dyn.nsteps = nsteps + dyn.attach(log_md, interval=LOG_INTERVAL, dyn=dyn, start_time=time.time()) + dyn.run(steps=NUM_MD_STEPS - nsteps) + + +@pytest.mark.parametrize("mlip", MODELS.items()) +def test_liquid_densities(mlip: tuple[str, Any], system_id) -> None: + """ + Run Liquid Densities benchmark. + + Parameters + ---------- + mlip + Name of model use and model to get calculator. + system_id + Identifier of the system to run MD on. + """ + assert system_id in range(0, 63), ( + "system_id out of range. Please use a value from 0 to 62" + ) + + # Download data + data_path = ( + download_s3_data( + filename="liquid_densities.zip", + key="inputs/molecular_dynamics/liquid_densities/liquid_densities.zip", + ) + / "liquid_densities" + ) + + # Get system name + input_xyz_path = sorted((data_path / "equilibrated_structures_xyz").glob("*.xyz"))[ + system_id + ] + + model_name, model = mlip + model.default_dtype = "float32" + calc = model.get_calculator() + # Add D3 calculator for this test + calc = model.add_d3_calculator(calc) + + system_name = input_xyz_path.stem + out_dir = OUT_PATH / model_name + out_dir.mkdir(exist_ok=True, parents=True) + + logging.basicConfig( + format="%(message)s", + level=logging.INFO, + filename=out_dir / f"{system_name}.log", + filemode="a", + force=True, + ) + + atoms = read(input_xyz_path) + output_fname = out_dir / f"{system_name}.traj" + run_npt(atoms, calc, output_fname) diff --git a/ml_peg/calcs/molecular_dynamics/liquid_densities/conftest.py b/ml_peg/calcs/molecular_dynamics/liquid_densities/conftest.py new file mode 100644 index 000000000..32750d7ae --- /dev/null +++ b/ml_peg/calcs/molecular_dynamics/liquid_densities/conftest.py @@ -0,0 +1,35 @@ +"""Obtain the system_id input argument.""" + +from __future__ import annotations + +import pytest + + +def pytest_addoption(parser): + """ + Add pytest option. + + Parameters + ---------- + parser + Parser to use. + """ + parser.addoption("--system_id", action="store", default=0, type=int) + + +@pytest.fixture +def system_id(request): + """ + Get system_id argument. + + Parameters + ---------- + request + Request. + + Returns + ------- + option + Requested command line argument. + """ + return request.config.getoption("--system_id") diff --git a/ml_peg/cli/cli.py b/ml_peg/cli/cli.py index 36471650a..82a8876af 100644 --- a/ml_peg/cli/cli.py +++ b/ml_peg/cli/cli.py @@ -4,7 +4,7 @@ from typing import Annotated -from typer import Exit, Option, Typer +from typer import Context, Exit, Option, Typer from ml_peg import __version__ @@ -57,8 +57,13 @@ def run_dash_app( run_app(category=category, port=port, debug=debug) -@app.command(name="calc", help="Run calculations") +@app.command( + name="calc", + help="Run calculations", + context_settings={"allow_extra_args": True, "ignore_unknown_options": True}, +) def run_calcs( + ctx: Context, models: Annotated[ str | None, Option( @@ -86,6 +91,8 @@ def run_calcs( Parameters ---------- + ctx + Typer Context. Automatically set. models Models to run calculations for, in comma-separated list. Default is `None`, corresponding to all available models. @@ -124,6 +131,9 @@ def run_calcs( if models: options.extend(["--models", models]) + # Parse any custom options to pytest + options.extend(ctx.args) + pytest.main(options)