Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions docs/source/user_guide/benchmarks/molecular.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,53 @@ Reference data:

* Same as input data
* DLPNO-CCSD(T)/CBS


BMIM Cl RDF
===========

Summary
-------

Tests whether MLIPs incorrectly predict covalent bond formation between chloride
anions (Cl⁻) and carbon atoms in 1-butyl-3-methylimidazolium (BMIM⁺) cations.
Such Cl-C bonds should NOT form in the ionic liquid under normal conditions.

This benchmark runs NVT molecular dynamics simulations of BMIM Cl at
353.15 K and analyses the Cl-C RDF to detect any unphysical bond formation.


Metrics
-------

1. Cl-C Bonds Formed

Binary metric indicating whether unphysical Cl-C bonds formed during the MD simulation.

The Cl-C RDF is computed from the MD trajectory. If the RDF shows a peak (g(r) > 0.1)
at distances below 2.5 Å, this indicates bond formation and the model fails the test.

* 0 = no bonds formed (correct physical behaviour)
* 1 = bonds formed (unphysical, model failure)


Computational cost
------------------

Medium: tests require running 10,000 steps of Langevin MD for a system of 10 ion
pairs, which may take tens of minutes on GPU.


Data availability
-----------------

Input structures:

* Generated using molify from SMILES representations of BMIM⁺ (CCCCN1C=C[N+](=C1)C)
and Cl⁻ ions, packed to experimental density of 1052 kg/m³ at 353.15 K.
* Zills, F. molify: Molecular Structure Interface. Journal of Open Source Software
10, 8829 (2025). https://doi.org/10.21105/joss.08829
* Density from: Yang, F., Wang, D., Wang, X. & Liu, Z. Volumetric Properties of
Binary and Ternary Mixtures of Bis(2-hydroxyethyl)ammonium Acetate with Methanol,
N,N-Dimethylformamide, and Water at Several Temperatures. J. Chem. Eng. Data 62,
3958-3966 (2017). https://doi.org/10.1021/acs.jced.7b00654
199 changes: 199 additions & 0 deletions ml_peg/analysis/molecular/BMIMCl_RDF/analyse_BMIMCl_RDF.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
"""Analyse BMIMCl RDF benchmark for unphysical Cl-C bond formation."""

from __future__ import annotations

from pathlib import Path

from ase.data import atomic_numbers
from ase.geometry.rdf import get_rdf
import ase.io as aio
import numpy as np
import pytest
from tqdm import tqdm

from ml_peg.analysis.utils.decorators import build_table, plot_scatter
from ml_peg.analysis.utils.utils import load_metrics_config
from ml_peg.app import APP_ROOT
from ml_peg.calcs import CALCS_ROOT
from ml_peg.models.get_models import get_model_names
from ml_peg.models.models import current_models

MODELS = get_model_names(current_models)
CALC_PATH = CALCS_ROOT / "molecular" / "BMIMCl_RDF" / "outputs"
OUT_PATH = APP_ROOT / "data" / "molecular" / "BMIMCl_RDF"

METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
METRICS_CONFIG_PATH
)

# RDF parameters
ELEMENT1 = "Cl"
ELEMENT2 = "C"
Z1 = atomic_numbers[ELEMENT1]
Z2 = atomic_numbers[ELEMENT2]
BINS_PER_ANG = 50
UNPHYSICAL_CUTOFF = 2.5
# Angstrom - any Cl-C closer than this indicates
# a bond which should not be there
PEAK_THRESHOLD = 0.1 # g(r) threshold for bond detection


def compute_rdf_from_trajectory(traj_path: Path) -> tuple[np.ndarray, np.ndarray]:
"""
Compute Cl-C RDF from MD trajectory.

Parameters
----------
traj_path
Path to trajectory xyz file.

Returns
-------
tuple[np.ndarray, np.ndarray]
(r, rdf) arrays - distances and averaged g(r) values.
"""
images = aio.read(traj_path, index=":")

# Infer rmax from cell (NVT)
cell_lengths = images[0].cell.lengths()
rmax = min(cell_lengths) / 2 - 0.01
nbins = int(rmax * BINS_PER_ANG)

# Compute RDFs with progress bar
rdfs = []
r = None
for atoms in tqdm(images, desc="Computing RDF"):
rdf_frame, distances = get_rdf(
atoms, rmax, nbins, elements=(Z1, Z2), no_dists=False
)
rdfs.append(rdf_frame)
if r is None:
r = distances

rdf = np.mean(rdfs, axis=0)

return r, rdf


def check_bond_formation(r: np.ndarray, rdf: np.ndarray) -> int:
"""
Check if Cl-C bonds formed based on RDF.

Parameters
----------
r
Distance array.
rdf
The g(r) values.

Returns
-------
int
1 if bonds formed (bad), 0 if no bonds (good).
"""
mask = r < UNPHYSICAL_CUTOFF
max_gr = rdf[mask].max() if mask.any() else 0.0
return 1 if max_gr > PEAK_THRESHOLD else 0


@pytest.fixture
@plot_scatter(
title="Cl-C Radial Distribution Function",
x_label="r / Å",
y_label="g(r)",
show_line=True,
filename=str(OUT_PATH / "figure_rdf.json"),
)
def rdf_data() -> dict[str, list]:
"""
Compute RDF for all models.

Returns
-------
dict[str, list]
Dictionary mapping model names to [r, g(r)] arrays.
"""
results = {}

OUT_PATH.mkdir(parents=True, exist_ok=True)

for model_name in MODELS:
traj_path = CALC_PATH / model_name / "md.xyz"
if not traj_path.exists():
continue

r, rdf = compute_rdf_from_trajectory(traj_path)
results[model_name] = [r.tolist(), rdf.tolist()]

rdf_out = OUT_PATH / model_name
rdf_out.mkdir(parents=True, exist_ok=True)
np.savetxt(
rdf_out / "rdf.dat",
np.column_stack([r, rdf]),
header="r/A g(r)",
fmt="%.6f",
)

return results


@pytest.fixture
def bond_formation(rdf_data: dict[str, list]) -> dict[str, int]:
"""
Check bond formation for all models.

Parameters
----------
rdf_data
Dictionary of RDF data per model.

Returns
-------
dict[str, int]
Dictionary mapping model names to bond formation flag (0 or 1).
"""
results = {}
for model_name, (r, rdf) in rdf_data.items():
r_arr = np.array(r)
rdf_arr = np.array(rdf)
results[model_name] = check_bond_formation(r_arr, rdf_arr)
return results


@pytest.fixture
@build_table(
filename=str(OUT_PATH / "bmimcl_metrics_table.json"),
metric_tooltips=DEFAULT_TOOLTIPS,
thresholds=DEFAULT_THRESHOLDS,
weights=DEFAULT_WEIGHTS,
)
def metrics(bond_formation: dict[str, int]) -> dict[str, dict]:
"""
Build metrics table.

Parameters
----------
bond_formation
Bond formation flags per model.

Returns
-------
dict[str, dict]
Metrics dictionary for table building.
"""
return {
"Cl-C Bonds Formed": bond_formation,
}


def test_bmimcl_rdf(metrics: dict[str, dict]) -> None:
"""
Run BMIMCl RDF analysis.

Parameters
----------
metrics
Benchmark metrics generated by fixtures.
"""
return
7 changes: 7 additions & 0 deletions ml_peg/analysis/molecular/BMIMCl_RDF/metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
metrics:
Cl-C Bonds Formed:
good: 0.0
bad: 1.0
unit: null
tooltip: "Whether Cl-C bonds formed (g(r) > 0.1 for r < 2.5 A). 0 = no bonds (correct), 1 = bonds formed (wrong)."
weight: 1.0
66 changes: 66 additions & 0 deletions ml_peg/app/molecular/BMIMCl_RDF/app_BMIMCl_RDF.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Run BMIMCl RDF benchmark app."""

from __future__ import annotations

from dash import Dash
from dash.html import Div

from ml_peg.app import APP_ROOT
from ml_peg.app.base_app import BaseApp
from ml_peg.app.utils.build_callbacks import plot_from_table_column
from ml_peg.app.utils.load import read_plot

BENCHMARK_NAME = "BMIMCl Cl-C RDF"
DOCS_URL = (
"https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular.html#bmimcl-rdf"
)
DATA_PATH = APP_ROOT / "data" / "molecular" / "BMIMCl_RDF"


class BMIMClRDFApp(BaseApp):
"""BMIMCl RDF benchmark app layout and callbacks."""

def register_callbacks(self) -> None:
"""Register callbacks to app."""
rdf_plot = read_plot(
DATA_PATH / "figure_rdf.json",
id=f"{BENCHMARK_NAME}-figure",
)

plot_from_table_column(
table_id=self.table_id,
plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
column_to_plot={"Cl-C Bonds Formed": rdf_plot},
)


def get_app() -> BMIMClRDFApp:
"""
Get BMIMCl RDF benchmark app layout and callback registration.

Returns
-------
BMIMClRDFApp
Benchmark layout and callback registration.
"""
return BMIMClRDFApp(
name=BENCHMARK_NAME,
description=(
"Tests whether MLIPs incorrectly predict Cl-C bond formation "
"in BMIMCl ionic liquid. Bonds should NOT form."
),
docs_url=DOCS_URL,
table_path=DATA_PATH / "bmimcl_metrics_table.json",
extra_components=[
Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
],
)


if __name__ == "__main__":
full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
bmimcl_app = get_app()
full_app.layout = bmimcl_app.layout
bmimcl_app.register_callbacks()

full_app.run(port=8054, debug=True)
Loading