diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7066c22a4..6febbc5b2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ # pre-commit install repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: end-of-file-fixer - id: mixed-line-ending @@ -11,7 +11,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.11.13 + rev: v0.15.5 hooks: # Run the linter. - id: ruff-check @@ -20,7 +20,7 @@ repos: - id: ruff-format - repo: https://github.com/numpy/numpydoc - rev: v1.8.0 + rev: v1.10.0 hooks: - id: numpydoc-validation files: ^ml_peg/ diff --git a/docs/source/user_guide/benchmarks/conformers.rst b/docs/source/user_guide/benchmarks/conformers.rst new file mode 100644 index 000000000..5e9ed6d30 --- /dev/null +++ b/docs/source/user_guide/benchmarks/conformers.rst @@ -0,0 +1,42 @@ +========== +Conformers +========== + +ACONFL +====== + +Summary +------- + +Performance in predicting relative conformer energies of 12 C12H26, +16 C16H34 and 20 C20H42 conformers. Reference data from PNO-LCCSD(T)-F12/ AVQZ calculations. + +Metrics +------- + +1. Conformer energy error + +For each complex, the the relative energy is calculated by taking the difference in energy +between the given conformer and the reference (zero-energy) conformer. This is +compared to the reference conformer energy, calculated in the same way. + +Computational cost +------------------ + +Low: tests are likely to take minutes to run on CPU. + +Data availability +----------------- + +Input structures: + +* Conformational Energy Benchmark for Longer n-Alkane Chains + Sebastian Ehlert, Stefan Grimme, and Andreas Hansen + The Journal of Physical Chemistry A 2022 126 (22), 3521-3535 + DOI: 10.1021/acs.jpca.2c02439 + +Reference data: + +* Same as input data +* :math:`PNO-LCCSD(T)-F12/ AVQZ` level of theory: a local, explicitly + correlated coupled cluster method. diff --git a/docs/source/user_guide/benchmarks/index.rst b/docs/source/user_guide/benchmarks/index.rst index 83afd6ec5..ad3c82f96 100644 --- a/docs/source/user_guide/benchmarks/index.rst +++ b/docs/source/user_guide/benchmarks/index.rst @@ -14,3 +14,5 @@ Benchmarks bulk_crystal lanthanides non_covalent_interactions + tm_complexes + conformers diff --git a/docs/source/user_guide/benchmarks/molecular.rst b/docs/source/user_guide/benchmarks/molecular.rst index 21460d29c..7b1857d9e 100644 --- a/docs/source/user_guide/benchmarks/molecular.rst +++ b/docs/source/user_guide/benchmarks/molecular.rst @@ -124,3 +124,53 @@ Reference data: * Same as input data * DLPNO-CCSD(T)/CBS + + +BMIM Cl RDF +=========== + +Summary +------- + +Tests whether MLIPs incorrectly predict covalent bond formation between chloride +anions (Cl⁻) and carbon atoms in 1-butyl-3-methylimidazolium (BMIM⁺) cations. +Such Cl-C bonds should NOT form in the ionic liquid under normal conditions. + +This benchmark runs NVT molecular dynamics simulations of BMIM Cl at +353.15 K and analyses the Cl-C RDF to detect any unphysical bond formation. + + +Metrics +------- + +1. Cl-C Bonds Formed + +Binary metric indicating whether unphysical Cl-C bonds formed during the MD simulation. + +The Cl-C RDF is computed from the MD trajectory. If the RDF shows a peak (g(r) > 0.1) +at distances below 2.5 Å, this indicates bond formation and the model fails the test. + +* 0 = no bonds formed (correct physical behaviour) +* 1 = bonds formed (unphysical, model failure) + + +Computational cost +------------------ + +Medium: tests require running 10,000 steps of Langevin MD for a system of 10 ion +pairs, which may take tens of minutes on GPU. + + +Data availability +----------------- + +Input structures: + +* Generated using molify from SMILES representations of BMIM⁺ (CCCCN1C=C[N+](=C1)C) + and Cl⁻ ions, packed to experimental density of 1052 kg/m³ at 353.15 K. +* Zills, F. molify: Molecular Structure Interface. Journal of Open Source Software + 10, 8829 (2025). https://doi.org/10.21105/joss.08829 +* Density from: Yang, F., Wang, D., Wang, X. & Liu, Z. Volumetric Properties of + Binary and Ternary Mixtures of Bis(2-hydroxyethyl)ammonium Acetate with Methanol, + N,N-Dimethylformamide, and Water at Several Temperatures. J. Chem. Eng. Data 62, + 3958-3966 (2017). https://doi.org/10.1021/acs.jced.7b00654 diff --git a/docs/source/user_guide/benchmarks/molecular_dynamics.rst b/docs/source/user_guide/benchmarks/molecular_dynamics.rst new file mode 100644 index 000000000..dcae3cfb1 --- /dev/null +++ b/docs/source/user_guide/benchmarks/molecular_dynamics.rst @@ -0,0 +1,38 @@ + +================== +Molecular dynamics +================== + +Water density +================ + +Summary +------- + +Performance in predicting the density of water at temperatures of 270, 290, 300, and 330 K. +The water systems consist of 333 molecules. + +Metrics +------- + +1. Density error + +For each system, the density is calculated by taking the average density of an NPT molecular +dynamics run. The initial part of the simulation, here 500 ps, is omitted from the density +calculation. This is compared to the reference density, obtained from experiment. + +Computational cost +------------------ + +Low: tests are likely to take several days to run on GPU. + +Data availability +----------------- + +Input structures: + +* + +Reference data: + +* Experiment diff --git a/docs/source/user_guide/benchmarks/nebs.rst b/docs/source/user_guide/benchmarks/nebs.rst index e081babf1..9a741711e 100644 --- a/docs/source/user_guide/benchmarks/nebs.rst +++ b/docs/source/user_guide/benchmarks/nebs.rst @@ -46,3 +46,60 @@ Reference data: * Manually taken from https://doi.org/10.1149/1.1633511. * Meta-GGA (Perdew-Wang) exchange correlation functional + + +Si defects +========== + +Summary +------- + +Performance in predicting DFT singlepoint energies and forces along fixed nudged-elastic-band +(NEB) images for a silicon interstitial migration pathway. + +Metrics +------- + +For each of the three NEB datasets (64 atoms, 216 atoms, and 216 atoms di-to-single), MLIPs are +evaluated on the same ordered NEB images as the reference. + +1. Energy MAE + +Mean absolute error (MAE) of *relative* energies along the NEB, shifting image 0 to 0 eV +for both the DFT reference and the MLIP predictions. + +2. Force MAE + +Mean absolute error (MAE) of forces across all atoms and images along the NEB. + +Computational cost +------------------ + +Medium: tests are likely to take several minutes to run on CPU. + +Data availability +----------------- + +Input/reference data: + +* Reference extxyz trajectories (including per-image DFT energies and forces) are distributed as a + separate zip archive and downloaded on-demand from the ML-PEG data store. + The calculation script uses the public ML-PEG S3 bucket to retrieve these inputs. +* The reference DFT energies/forces come from Quantum ESPRESSO (PWscf) single-point calculations + with: + + - Code/version: Quantum ESPRESSO PWSCF v.7.0 + - XC functional: ``input_dft='PBE'`` + - Cutoffs: ``ecutwfc=30.0`` Ry, ``ecutrho=240.0`` Ry + - Smearing: ``occupations='smearing'``, ``smearing='mv'``, ``degauss=0.01`` Ry + - SCF convergence/mixing: ``conv_thr=1.0d-6``, ``electron_maxstep=250``, ``mixing_beta=0.2``, + ``mixing_mode='local-TF'`` + - Diagonalization: ``diagonalization='david'`` + - Symmetry: ``nosym=.false.``, ``noinv=.false.`` (symmetry enabled) + - Pseudopotential: ``Si.pbe-n-kjpaw_psl.1.0.0.UPF`` (PSLibrary) + + K-points by case: + + - 64 atoms: Γ-only (``K_POINTS automatic 1 1 1 0 0 0``) + - 216 atoms: Γ-only (``K_POINTS gamma``) + - 216 atoms di-to-single: Γ-only (``K_POINTS gamma``) diff --git a/docs/source/user_guide/benchmarks/tm_complexes.rst b/docs/source/user_guide/benchmarks/tm_complexes.rst new file mode 100644 index 000000000..80f8a1a29 --- /dev/null +++ b/docs/source/user_guide/benchmarks/tm_complexes.rst @@ -0,0 +1,42 @@ +========================== +Transition Metal Complexes +========================== + +3dTMV +======= + +Summary +------- + +Performance in predicting vertical ionization energies for 28 transition metal +complexes. + +Metrics +------- + +1. Ionization energy error + +For each complex, the ionization energy is calculated by taking the difference in energy +between the complex in its oxidized state and initial state, which differ by one electron +and spin multiplicity. This is compared to the reference ionization energy, calculated in the same way. + +Computational cost +------------------ + +Low: tests are likely to take minutes to run on CPU. + +Data availability +----------------- + +Input structures: + +* Toward Benchmark-Quality Ab Initio Predictions for 3d Transition Metal + Electrocatalysts: A Comparison of CCSD(T) and ph-AFQMC Hagen Neugebauer, Hung T. + Vuong, John L. Weber, Richard A. Friesner, James Shee, and Andreas Hansen Journal of + Chemical Theory and Computation 2023 19 (18), 6208-6225, + DOI: 10.1021/acs.jctc.3c00617 + +Reference data: + +* Same as input data +* ph-AFQMC level of theory: Auxiliary-Field Quantum Monte Carlo. diff --git a/ml_peg/analysis/conformers/37Conf8/analyse_37Conf8.py b/ml_peg/analysis/conformers/37Conf8/analyse_37Conf8.py index 226ad111c..cbc7ca78c 100644 --- a/ml_peg/analysis/conformers/37Conf8/analyse_37Conf8.py +++ b/ml_peg/analysis/conformers/37Conf8/analyse_37Conf8.py @@ -13,14 +13,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal CALC_PATH = CALCS_ROOT / "conformers" / "37Conf8" / "outputs" @@ -113,7 +117,7 @@ def get_mae(conformer_energies) -> dict[str, float]: filename=OUT_PATH / "37conf8_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/conformers/ACONFL/analyse_ACONFL.py b/ml_peg/analysis/conformers/ACONFL/analyse_ACONFL.py new file mode 100644 index 000000000..f15d642e1 --- /dev/null +++ b/ml_peg/analysis/conformers/ACONFL/analyse_ACONFL.py @@ -0,0 +1,153 @@ +""" +Analyse the ACONFL dataset for molecular conformer relative energies. + +Conformational Energy Benchmark for Longer n-Alkane Chains +Sebastian Ehlert, Stefan Grimme, and Andreas Hansen +The Journal of Physical Chemistry A 2022 126 (22), 3521-3535 +DOI: 10.1021/acs.jpca.2c02439 +""" + +from __future__ import annotations + +from pathlib import Path + +from ase import units +from ase.io import read, write +import pytest + +from ml_peg.analysis.utils.decorators import build_table, plot_parity +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) + +EV_TO_KCAL = units.mol / units.kcal +CALC_PATH = CALCS_ROOT / "conformers" / "ACONFL" / "outputs" +OUT_PATH = APP_ROOT / "data" / "conformers" / "ACONFL" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +def labels() -> list: + """ + Get list of system names. + + Returns + ------- + list + List of all system names. + """ + for model_name in MODELS: + labels_list = [path.stem for path in sorted((CALC_PATH / model_name).glob("*"))] + break + return labels_list + + +@pytest.fixture +@plot_parity( + filename=OUT_PATH / "figure_aconfl.json", + title="Energies", + x_label="Predicted energy / kcal/mol", + y_label="Reference energy / kcal/mol", + hoverdata={ + "Labels": labels(), + }, +) +def conformer_energies() -> dict[str, list]: + """ + Get conformer energies for all systems. + + Returns + ------- + dict[str, list] + Dictionary of all reference and predicted conformer energies. + """ + results = {"ref": []} | {mlip: [] for mlip in MODELS} + ref_stored = False + + for model_name in MODELS: + for label in labels(): + atoms = read(CALC_PATH / model_name / f"{label}.xyz") + + results[model_name].append(atoms.info["model_rel_energy"] * EV_TO_KCAL) + if not ref_stored: + results["ref"].append(atoms.info["ref_rel_energy"] * EV_TO_KCAL) + + # Write structures for app + structs_dir = OUT_PATH / model_name + structs_dir.mkdir(parents=True, exist_ok=True) + write(structs_dir / f"{label}.xyz", atoms) + ref_stored = True + return results + + +@pytest.fixture +def get_mae(conformer_energies) -> dict[str, float]: + """ + Get mean absolute error for conformer energies. + + Parameters + ---------- + conformer_energies + Dictionary of reference and predicted conformer energies. + + Returns + ------- + dict[str, float] + Dictionary of predicted conformer energies errors for all models. + """ + results = {} + for model_name in MODELS: + results[model_name] = mae( + conformer_energies["ref"], conformer_energies[model_name] + ) + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "aconfl_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + mlip_name_map=DISPERSION_NAME_MAP, +) +def metrics(get_mae: dict[str, float]) -> dict[str, dict]: + """ + Get all metrics. + + Parameters + ---------- + get_mae + Mean absolute errors for all models. + + Returns + ------- + dict[str, dict] + Metric names and values for all models. + """ + return { + "MAE": get_mae, + } + + +def test_aconfl(metrics: dict[str, dict]) -> None: + """ + Run ACONFL test. + + Parameters + ---------- + metrics + All new benchmark metric names and dictionary of values for each model. + """ + return diff --git a/ml_peg/analysis/conformers/ACONFL/metrics.yml b/ml_peg/analysis/conformers/ACONFL/metrics.yml new file mode 100644 index 000000000..40d89a17d --- /dev/null +++ b/ml_peg/analysis/conformers/ACONFL/metrics.yml @@ -0,0 +1,7 @@ +metrics: + MAE: + good: 0.0 + bad: 2.0 + unit: kcal/mol + tooltip: Mean Absolute Error for all systems + level_of_theory: PNO-LCCSD(T)-F12/ AVQZ diff --git a/ml_peg/analysis/conformers/DipCONFS/analyse_DipCONFS.py b/ml_peg/analysis/conformers/DipCONFS/analyse_DipCONFS.py index b5b9e7d4d..e959f3005 100644 --- a/ml_peg/analysis/conformers/DipCONFS/analyse_DipCONFS.py +++ b/ml_peg/analysis/conformers/DipCONFS/analyse_DipCONFS.py @@ -13,14 +13,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal CALC_PATH = CALCS_ROOT / "conformers" / "DipCONFS" / "outputs" @@ -113,7 +117,7 @@ def get_mae(conformer_energies) -> dict[str, float]: filename=OUT_PATH / "dipconfs_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/conformers/Glucose205/analyse_Glucose205.py b/ml_peg/analysis/conformers/Glucose205/analyse_Glucose205.py index bab17a63e..6a1b8b611 100644 --- a/ml_peg/analysis/conformers/Glucose205/analyse_Glucose205.py +++ b/ml_peg/analysis/conformers/Glucose205/analyse_Glucose205.py @@ -15,14 +15,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal CALC_PATH = CALCS_ROOT / "conformers" / "Glucose205" / "outputs" @@ -115,7 +119,7 @@ def get_mae(conformer_energies) -> dict[str, float]: filename=OUT_PATH / "glucose205_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/conformers/MPCONF196/analyse_MPCONF196.py b/ml_peg/analysis/conformers/MPCONF196/analyse_MPCONF196.py index 000b9d9ef..efc62faab 100644 --- a/ml_peg/analysis/conformers/MPCONF196/analyse_MPCONF196.py +++ b/ml_peg/analysis/conformers/MPCONF196/analyse_MPCONF196.py @@ -14,14 +14,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal CALC_PATH = CALCS_ROOT / "conformers" / "MPCONF196" / "outputs" @@ -115,7 +119,7 @@ def get_mae(conformer_energies) -> dict[str, float]: filename=OUT_PATH / "mpconf196_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/conformers/Maltose222/analyse_Maltose222.py b/ml_peg/analysis/conformers/Maltose222/analyse_Maltose222.py index 7ee1f4483..a05e64aa3 100644 --- a/ml_peg/analysis/conformers/Maltose222/analyse_Maltose222.py +++ b/ml_peg/analysis/conformers/Maltose222/analyse_Maltose222.py @@ -15,14 +15,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal CALC_PATH = CALCS_ROOT / "conformers" / "Maltose222" / "outputs" @@ -115,7 +119,7 @@ def get_mae(conformer_energies) -> dict[str, float]: filename=OUT_PATH / "maltose222_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/conformers/OpenFF_Tors/analyse_OpenFF_Tors.py b/ml_peg/analysis/conformers/OpenFF_Tors/analyse_OpenFF_Tors.py index f5df1a643..3f7378a94 100644 --- a/ml_peg/analysis/conformers/OpenFF_Tors/analyse_OpenFF_Tors.py +++ b/ml_peg/analysis/conformers/OpenFF_Tors/analyse_OpenFF_Tors.py @@ -14,14 +14,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) CALC_PATH = CALCS_ROOT / "conformers" / "OpenFF_Tors" / "outputs" OUT_PATH = APP_ROOT / "data" / "conformers" / "OpenFF_Tors" @@ -117,7 +121,7 @@ def get_mae(conformer_energies) -> dict[str, float]: filename=OUT_PATH / "openff_tors_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/conformers/UpU46/analyse_UpU46.py b/ml_peg/analysis/conformers/UpU46/analyse_UpU46.py index d93bb89b9..8f35e4268 100644 --- a/ml_peg/analysis/conformers/UpU46/analyse_UpU46.py +++ b/ml_peg/analysis/conformers/UpU46/analyse_UpU46.py @@ -15,14 +15,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal @@ -118,7 +122,7 @@ def get_mae(conformer_energies) -> dict[str, float]: filename=OUT_PATH / "upu46_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/conformers/solvMPCONF196/analyse_solvMPCONF196.py b/ml_peg/analysis/conformers/solvMPCONF196/analyse_solvMPCONF196.py index e28fa5abf..550931053 100644 --- a/ml_peg/analysis/conformers/solvMPCONF196/analyse_solvMPCONF196.py +++ b/ml_peg/analysis/conformers/solvMPCONF196/analyse_solvMPCONF196.py @@ -13,14 +13,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) CALC_PATH = CALCS_ROOT / "conformers" / "solvMPCONF196" / "outputs" OUT_PATH = APP_ROOT / "data" / "conformers" / "solvMPCONF196" @@ -128,7 +132,7 @@ def get_mae(conformer_energies) -> dict[str, float]: filename=OUT_PATH / "solv_mpconf196_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/molecular/BMIMCl_RDF/analyse_BMIMCl_RDF.py b/ml_peg/analysis/molecular/BMIMCl_RDF/analyse_BMIMCl_RDF.py new file mode 100644 index 000000000..1831dcad5 --- /dev/null +++ b/ml_peg/analysis/molecular/BMIMCl_RDF/analyse_BMIMCl_RDF.py @@ -0,0 +1,199 @@ +"""Analyse BMIMCl RDF benchmark for unphysical Cl-C bond formation.""" + +from __future__ import annotations + +from pathlib import Path + +from ase.data import atomic_numbers +from ase.geometry.rdf import get_rdf +import ase.io as aio +import numpy as np +import pytest +from tqdm import tqdm + +from ml_peg.analysis.utils.decorators import build_table, plot_scatter +from ml_peg.analysis.utils.utils import load_metrics_config +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +CALC_PATH = CALCS_ROOT / "molecular" / "BMIMCl_RDF" / "outputs" +OUT_PATH = APP_ROOT / "data" / "molecular" / "BMIMCl_RDF" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + +# RDF parameters +ELEMENT1 = "Cl" +ELEMENT2 = "C" +Z1 = atomic_numbers[ELEMENT1] +Z2 = atomic_numbers[ELEMENT2] +BINS_PER_ANG = 50 +UNPHYSICAL_CUTOFF = 2.5 +# Angstrom - any Cl-C closer than this indicates +# a bond which should not be there +PEAK_THRESHOLD = 0.1 # g(r) threshold for bond detection + + +def compute_rdf_from_trajectory(traj_path: Path) -> tuple[np.ndarray, np.ndarray]: + """ + Compute Cl-C RDF from MD trajectory. + + Parameters + ---------- + traj_path + Path to trajectory xyz file. + + Returns + ------- + tuple[np.ndarray, np.ndarray] + (r, rdf) arrays - distances and averaged g(r) values. + """ + images = aio.read(traj_path, index=":") + + # Infer rmax from cell (NVT) + cell_lengths = images[0].cell.lengths() + rmax = min(cell_lengths) / 2 - 0.01 + nbins = int(rmax * BINS_PER_ANG) + + # Compute RDFs with progress bar + rdfs = [] + r = None + for atoms in tqdm(images, desc="Computing RDF"): + rdf_frame, distances = get_rdf( + atoms, rmax, nbins, elements=(Z1, Z2), no_dists=False + ) + rdfs.append(rdf_frame) + if r is None: + r = distances + + rdf = np.mean(rdfs, axis=0) + + return r, rdf + + +def check_bond_formation(r: np.ndarray, rdf: np.ndarray) -> int: + """ + Check if Cl-C bonds formed based on RDF. + + Parameters + ---------- + r + Distance array. + rdf + The g(r) values. + + Returns + ------- + int + 1 if bonds formed (bad), 0 if no bonds (good). + """ + mask = r < UNPHYSICAL_CUTOFF + max_gr = rdf[mask].max() if mask.any() else 0.0 + return 1 if max_gr > PEAK_THRESHOLD else 0 + + +@pytest.fixture +@plot_scatter( + title="Cl-C Radial Distribution Function", + x_label="r / Å", + y_label="g(r)", + show_line=True, + filename=str(OUT_PATH / "figure_rdf.json"), +) +def rdf_data() -> dict[str, list]: + """ + Compute RDF for all models. + + Returns + ------- + dict[str, list] + Dictionary mapping model names to [r, g(r)] arrays. + """ + results = {} + + OUT_PATH.mkdir(parents=True, exist_ok=True) + + for model_name in MODELS: + traj_path = CALC_PATH / model_name / "md.xyz" + if not traj_path.exists(): + continue + + r, rdf = compute_rdf_from_trajectory(traj_path) + results[model_name] = [r.tolist(), rdf.tolist()] + + rdf_out = OUT_PATH / model_name + rdf_out.mkdir(parents=True, exist_ok=True) + np.savetxt( + rdf_out / "rdf.dat", + np.column_stack([r, rdf]), + header="r/A g(r)", + fmt="%.6f", + ) + + return results + + +@pytest.fixture +def bond_formation(rdf_data: dict[str, list]) -> dict[str, int]: + """ + Check bond formation for all models. + + Parameters + ---------- + rdf_data + Dictionary of RDF data per model. + + Returns + ------- + dict[str, int] + Dictionary mapping model names to bond formation flag (0 or 1). + """ + results = {} + for model_name, (r, rdf) in rdf_data.items(): + r_arr = np.array(r) + rdf_arr = np.array(rdf) + results[model_name] = check_bond_formation(r_arr, rdf_arr) + return results + + +@pytest.fixture +@build_table( + filename=str(OUT_PATH / "bmimcl_metrics_table.json"), + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + weights=DEFAULT_WEIGHTS, +) +def metrics(bond_formation: dict[str, int]) -> dict[str, dict]: + """ + Build metrics table. + + Parameters + ---------- + bond_formation + Bond formation flags per model. + + Returns + ------- + dict[str, dict] + Metrics dictionary for table building. + """ + return { + "Cl-C Bonds Formed": bond_formation, + } + + +def test_bmimcl_rdf(metrics: dict[str, dict]) -> None: + """ + Run BMIMCl RDF analysis. + + Parameters + ---------- + metrics + Benchmark metrics generated by fixtures. + """ + return diff --git a/ml_peg/analysis/molecular/BMIMCl_RDF/metrics.yml b/ml_peg/analysis/molecular/BMIMCl_RDF/metrics.yml new file mode 100644 index 000000000..75df3aab6 --- /dev/null +++ b/ml_peg/analysis/molecular/BMIMCl_RDF/metrics.yml @@ -0,0 +1,7 @@ +metrics: + Cl-C Bonds Formed: + good: 0.0 + bad: 1.0 + unit: null + tooltip: "Whether Cl-C bonds formed (g(r) > 0.1 for r < 2.5 A). 0 = no bonds (correct), 1 = bonds formed (wrong)." + weight: 1.0 diff --git a/ml_peg/analysis/molecular/GMTKN55/analyse_GMTKN55.py b/ml_peg/analysis/molecular/GMTKN55/analyse_GMTKN55.py index 149e87e19..20272be20 100644 --- a/ml_peg/analysis/molecular/GMTKN55/analyse_GMTKN55.py +++ b/ml_peg/analysis/molecular/GMTKN55/analyse_GMTKN55.py @@ -11,14 +11,14 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config +from ml_peg.analysis.utils.utils import build_dispersion_name_map, load_metrics_config from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models MODELS = get_model_names(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) CALC_PATH = CALCS_ROOT / "molecular" / "GMTKN55" / "outputs" OUT_PATH = APP_ROOT / "data" / "molecular" / "GMTKN55" @@ -291,7 +291,7 @@ def weighted_error(subset_errors: dict[str, dict[str, float]]) -> dict[str, floa metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, weights=DEFAULT_WEIGHTS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics( category_errors: dict[str, dict[str, float]], weighted_error: dict[str, float] diff --git a/ml_peg/analysis/molecular/Wiggle150/analyse_Wiggle150.py b/ml_peg/analysis/molecular/Wiggle150/analyse_Wiggle150.py index bfbf10479..d94545c15 100644 --- a/ml_peg/analysis/molecular/Wiggle150/analyse_Wiggle150.py +++ b/ml_peg/analysis/molecular/Wiggle150/analyse_Wiggle150.py @@ -9,14 +9,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models MODELS = get_model_names(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) CALC_PATH = CALCS_ROOT / "molecular" / "Wiggle150" / "outputs" OUT_PATH = APP_ROOT / "data" / "molecular" / "Wiggle150" @@ -161,7 +165,7 @@ def wiggle150_mae(relative_energies) -> dict[str, float]: filename=OUT_PATH / "wiggle150_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(wiggle150_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/molecular_crystal/CPOSS209/analyse_CPOSS209.py b/ml_peg/analysis/molecular_crystal/CPOSS209/analyse_CPOSS209.py index 93bb44594..81d0229c9 100644 --- a/ml_peg/analysis/molecular_crystal/CPOSS209/analyse_CPOSS209.py +++ b/ml_peg/analysis/molecular_crystal/CPOSS209/analyse_CPOSS209.py @@ -9,14 +9,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models MODELS = get_model_names(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) CALC_PATH = CALCS_ROOT / "molecular_crystal" / "CPOSS209" / "outputs" OUT_PATH = APP_ROOT / "data" / "molecular_crystal" / "CPOSS209" @@ -971,7 +975,7 @@ def cposs209_errors( filename=OUT_PATH / "cposs209_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, weights=DEFAULT_WEIGHTS, ) def metrics( diff --git a/ml_peg/analysis/molecular_crystal/DMC_ICE13/analyse_DMC_ICE13.py b/ml_peg/analysis/molecular_crystal/DMC_ICE13/analyse_DMC_ICE13.py index a3f1e0a9b..3c5f0b080 100644 --- a/ml_peg/analysis/molecular_crystal/DMC_ICE13/analyse_DMC_ICE13.py +++ b/ml_peg/analysis/molecular_crystal/DMC_ICE13/analyse_DMC_ICE13.py @@ -8,14 +8,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models MODELS = get_model_names(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) CALC_PATH = CALCS_ROOT / "molecular_crystal" / "DMC_ICE13" / "outputs" OUT_PATH = APP_ROOT / "data" / "molecular_crystal" / "DMC_ICE13" @@ -136,7 +140,7 @@ def dmc_ice13_errors(lattice_energies) -> dict[str, float]: filename=OUT_PATH / "dmc_ice13_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(dmc_ice13_errors: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/molecular_crystal/X23/analyse_X23.py b/ml_peg/analysis/molecular_crystal/X23/analyse_X23.py index fc180e3e5..d7110c24f 100644 --- a/ml_peg/analysis/molecular_crystal/X23/analyse_X23.py +++ b/ml_peg/analysis/molecular_crystal/X23/analyse_X23.py @@ -9,14 +9,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models MODELS = get_model_names(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) CALC_PATH = CALCS_ROOT / "molecular_crystal" / "X23" / "outputs" OUT_PATH = APP_ROOT / "data" / "molecular_crystal" / "X23" @@ -139,7 +143,7 @@ def x23_errors(lattice_energies) -> dict[str, float]: filename=OUT_PATH / "x23_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(x23_errors: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/molecular_dynamics/water_density/analyse_water_density.py b/ml_peg/analysis/molecular_dynamics/water_density/analyse_water_density.py new file mode 100644 index 000000000..b8516743c --- /dev/null +++ b/ml_peg/analysis/molecular_dynamics/water_density/analyse_water_density.py @@ -0,0 +1,182 @@ +"""Analyse the water density benchmark.""" + +from __future__ import annotations + +from pathlib import Path + +from ase.io import Trajectory, write +import numpy as np +import pytest + +from ml_peg.analysis.utils.decorators import build_table, plot_parity +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) +D3_MODEL_NAMES = build_dispersion_name_map(MODELS) + +INTERVAL_PS = 0.1 +EQUILIB_TIME_PS = 500 + +CALC_PATH = CALCS_ROOT / "molecular_dynamics" / "water_density" / "outputs" +OUT_PATH = APP_ROOT / "data" / "molecular_dynamics" / "water_density" + + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + +EXPERIMENTAL_DATA = { + "water_270.0_K": 0.9998, + "water_290.0_K": 0.9991, + "water_300.0_K": 0.9970, + "water_330.0_K": 0.9802, +} + + +def labels() -> list: + """ + Get list of system names. + + Returns + ------- + list + List of all system names. + """ + for model_name in MODELS: + return [path.stem for path in (CALC_PATH / model_name).glob("*.log")] + return [] + + +def compute_density(fname, density_col=13): + """ + Compute average density from NPT log file. + + Parameters + ---------- + fname + Path to the log file. + density_col + Which column the density numbers are in. + + Returns + ------- + float + Average density in g/cm3. + """ + density_series = [] + with open(fname) as lines: + for line in lines: + items = line.strip().split() + if len(items) != 15: + continue + density_series.append(float(items[13])) + skip_frames = int(EQUILIB_TIME_PS / INTERVAL_PS) + return np.mean(density_series[skip_frames:]) + + +@pytest.fixture +@plot_parity( + filename=OUT_PATH / "figure_water_density.json", + title="Densities", + x_label="Predicted density / kcal/mol", + y_label="Reference density / kcal/mol", + hoverdata={ + "Labels": labels(), + }, +) +def water_density() -> dict[str, list]: + """ + Get water densities for all temperatures. + + Returns + ------- + dict[str, list] + Dictionary of all reference and predicted densities. + """ + results = {"ref": []} | {mlip: [] for mlip in MODELS} + ref_stored = False + + for model_name in MODELS: + for label in labels(): + atoms = Trajectory(CALC_PATH / model_name / f"{label}.traj")[-1] + + results[model_name].append( + compute_density(CALC_PATH / model_name / f"{label}.log") + ) + if not ref_stored: + results["ref"].append(EXPERIMENTAL_DATA[label]) + + # Write structures for app + structs_dir = OUT_PATH / model_name + structs_dir.mkdir(parents=True, exist_ok=True) + write(structs_dir / f"{label}.xyz", atoms) + ref_stored = True + return results + + +@pytest.fixture +def get_mae(water_density) -> dict[str, float]: + """ + Get mean absolute error for water densities. + + Parameters + ---------- + water_density + Dictionary of reference and predicted water densities. + + Returns + ------- + dict[str, float] + Dictionary of predicted water density errors for all models. + """ + results = {} + for model_name in MODELS: + results[model_name] = mae(water_density["ref"], water_density[model_name]) + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "water_density_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + mlip_name_map=D3_MODEL_NAMES, +) +def metrics(get_mae: dict[str, float]) -> dict[str, dict]: + """ + Get all metrics. + + Parameters + ---------- + get_mae + Mean absolute errors for all models. + + Returns + ------- + dict[str, dict] + Metric names and values for all models. + """ + return { + "MAE": get_mae, + } + + +def test_water_density(metrics: dict[str, dict]) -> None: + """ + Run water density test. + + Parameters + ---------- + metrics + All new benchmark metric names and dictionary of values for each model. + """ + return diff --git a/ml_peg/analysis/molecular_dynamics/water_density/metrics.yml b/ml_peg/analysis/molecular_dynamics/water_density/metrics.yml new file mode 100644 index 000000000..0754e9a91 --- /dev/null +++ b/ml_peg/analysis/molecular_dynamics/water_density/metrics.yml @@ -0,0 +1,8 @@ +metrics: + MAE: + good: 0.0 + bad: 0.2 + unit: g/cm^3 + tooltip: Mean Absolute Error in density for all systems + level_of_theory: Experimental density + weight: 1 diff --git a/ml_peg/analysis/molecular_reactions/BH2O_36/analyse_BH2O_36.py b/ml_peg/analysis/molecular_reactions/BH2O_36/analyse_BH2O_36.py index 9519ef837..ccaef305d 100644 --- a/ml_peg/analysis/molecular_reactions/BH2O_36/analyse_BH2O_36.py +++ b/ml_peg/analysis/molecular_reactions/BH2O_36/analyse_BH2O_36.py @@ -13,14 +13,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) CALC_PATH = CALCS_ROOT / "molecular_reactions" / "BH2O_36" / "outputs" OUT_PATH = APP_ROOT / "data" / "molecular_reactions" / "BH2O_36" @@ -174,7 +178,7 @@ def get_mae(barrier_heights) -> dict[str, float]: filename=OUT_PATH / "bh2o_36_barriers_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/molecular_reactions/BH9/analyse_BH9.py b/ml_peg/analysis/molecular_reactions/BH9/analyse_BH9.py index 6828aef19..757763f8f 100644 --- a/ml_peg/analysis/molecular_reactions/BH9/analyse_BH9.py +++ b/ml_peg/analysis/molecular_reactions/BH9/analyse_BH9.py @@ -14,14 +14,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) CALC_PATH = CALCS_ROOT / "molecular_reactions" / "BH9" / "outputs" OUT_PATH = APP_ROOT / "data" / "molecular_reactions" / "BH9" @@ -174,7 +178,7 @@ def get_mae(barrier_heights: dict[str, list]) -> dict[str, float]: filename=OUT_PATH / "bh9_barriers_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/molecular_reactions/CYCLO70/analyse_CYCLO70.py b/ml_peg/analysis/molecular_reactions/CYCLO70/analyse_CYCLO70.py index ce113d3f1..52f9dc7c4 100644 --- a/ml_peg/analysis/molecular_reactions/CYCLO70/analyse_CYCLO70.py +++ b/ml_peg/analysis/molecular_reactions/CYCLO70/analyse_CYCLO70.py @@ -17,14 +17,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) CALC_PATH = CALCS_ROOT / "molecular_reactions" / "CYCLO70" / "outputs" OUT_PATH = APP_ROOT / "data" / "molecular_reactions" / "CYCLO70" @@ -130,7 +134,7 @@ def get_mae(barrier_heights) -> dict[str, float]: filename=OUT_PATH / "cyclo70_barriers_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/molecular_reactions/Criegee22/analyse_Criegee22.py b/ml_peg/analysis/molecular_reactions/Criegee22/analyse_Criegee22.py index 867335abb..7f2917b95 100644 --- a/ml_peg/analysis/molecular_reactions/Criegee22/analyse_Criegee22.py +++ b/ml_peg/analysis/molecular_reactions/Criegee22/analyse_Criegee22.py @@ -13,14 +13,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal CALC_PATH = CALCS_ROOT / "molecular_reactions" / "Criegee22" / "outputs" @@ -119,7 +123,7 @@ def get_mae(barrier_heights) -> dict[str, float]: filename=OUT_PATH / "criegee22_barriers_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py b/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py index 234396d76..a1d31efe6 100644 --- a/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py +++ b/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py @@ -19,9 +19,10 @@ from ml_peg.analysis.utils.decorators import build_table, plot_density_scatter from ml_peg.analysis.utils.utils import ( - build_d3_name_map, + build_dispersion_name_map, load_metrics_config, mae, + write_density_trajectories, ) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT @@ -29,7 +30,7 @@ from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal CALC_PATH = CALCS_ROOT / "molecular_reactions" / "RDB7" / "outputs" @@ -110,6 +111,7 @@ def barrier_density(barrier_heights: dict[str, list]) -> dict[str, dict]: Mapping of model name to density-scatter data. """ ref_vals = barrier_heights["ref"] + label_list = labels() density_inputs: dict[str, dict] = {} for model_name in MODELS: preds = barrier_heights.get(model_name, []) @@ -118,6 +120,14 @@ def barrier_density(barrier_heights: dict[str, list]) -> dict[str, dict]: "pred": preds, "meta": {"system_count": len([val for val in preds if val is not None])}, } + write_density_trajectories( + labels_list=label_list, + ref_vals=ref_vals, + pred_vals=preds, + struct_dir=OUT_PATH / model_name, + traj_dir=OUT_PATH / model_name / "density_traj", + struct_filename_builder=lambda label: f"{label}_ts.xyz", + ) return density_inputs @@ -147,7 +157,7 @@ def get_mae(barrier_heights) -> dict[str, float]: filename=OUT_PATH / "rdb7_barriers_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/nebs/si_defects/analyse_si_defects.py b/ml_peg/analysis/nebs/si_defects/analyse_si_defects.py new file mode 100644 index 000000000..43ea77029 --- /dev/null +++ b/ml_peg/analysis/nebs/si_defects/analyse_si_defects.py @@ -0,0 +1,294 @@ +"""Analyse Si defects DFT singlepoints (energies + forces).""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +from ase.atoms import Atoms +from ase.io import read, write +import numpy as np +import pandas as pd +import pytest + +from ml_peg.analysis.utils.decorators import build_table, plot_scatter +from ml_peg.analysis.utils.utils import load_metrics_config, mae +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) + +BENCHMARK_DIR = "si_defects" +CALC_PATH = CALCS_ROOT / "nebs" / BENCHMARK_DIR / "outputs" +OUT_PATH = APP_ROOT / "data" / "nebs" / BENCHMARK_DIR + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +@dataclass(frozen=True) +class _Case: + """Definition of a single Si defects NEB dataset.""" + + key: str + label: str + + +CASES: tuple[_Case, ...] = ( + _Case(key="64_atoms", label="64"), + _Case(key="216_atoms", label="216"), + _Case(key="216_atoms_di_to_single", label="216 di-to-single"), +) + + +@dataclass(frozen=True) +class _Results: + """Computed errors and assets for one (case, model) pair.""" + + energy_mae: float + force_mae: float + x: list[float] + energy_error: list[float] + force_rms: list[float] + structs: list[Atoms] + + +def _load_structs(case_key: str, model: str) -> list[Atoms] | None: + """ + Load MLIP-evaluated NEB frames (with DFT reference stored). + + Parameters + ---------- + case_key + Case key. + model + Model name. + + Returns + ------- + list[ase.atoms.Atoms] | None + Frames if the calculation output exists, otherwise ``None``. + """ + path = CALC_PATH / case_key / model / f"{BENCHMARK_DIR}.extxyz" + if not path.exists(): + return None + structs = read(path, index=":") + if not isinstance(structs, list) or not structs: + raise ValueError(f"Unexpected output content: {path}") + return structs + + +def _compute(structs: list[Atoms]) -> _Results: + """ + Compute per-image errors and global MAEs. + + Parameters + ---------- + structs + Ordered NEB frames with ``ref_energy_ev``, ``ref_forces``, ``pred_energy_ev``, + and ``pred_forces``. + + Returns + ------- + _Results + Computed errors and frames for downstream plotting/app. + """ + ref_e = np.asarray([float(s.info["ref_energy_ev"]) for s in structs]) + pred_e = np.asarray([float(s.info["pred_energy_ev"]) for s in structs]) + # Compare *relative* energies along the NEB (shift image 0 to 0 eV for both + # reference and predictions). This removes any constant energy offset. + ref_e_rel = ref_e - ref_e[0] + pred_e_rel = pred_e - pred_e[0] + energy_error = pred_e_rel - ref_e_rel + + ref_f = np.asarray([s.arrays["ref_forces"] for s in structs], dtype=float) + pred_f = np.asarray([s.arrays["pred_forces"] for s in structs], dtype=float) + force_error = pred_f - ref_f + + x = [float(s.info.get("image_index", idx)) for idx, s in enumerate(structs)] + return _Results( + energy_mae=mae(ref_e_rel, pred_e_rel), + force_mae=mae(ref_f.reshape(-1), pred_f.reshape(-1)), + x=x, + energy_error=[float(v) for v in energy_error], + force_rms=[float(v) for v in np.sqrt(np.mean(force_error**2, axis=(1, 2)))], + structs=structs, + ) + + +def _write_assets(case_key: str, model: str, results: _Results) -> None: + """ + Write structure trajectory and per-image CSV for the app. + + Parameters + ---------- + case_key + Case key. + model + Model name. + results + Computed errors and frames. + """ + out_dir = OUT_PATH / case_key / model + out_dir.mkdir(parents=True, exist_ok=True) + write(out_dir / f"{model}-neb-band.extxyz", results.structs) + pd.DataFrame( + { + "image": results.x, + "energy_error_ev": results.energy_error, + "force_rms_ev_per_ang": results.force_rms, + } + ).to_csv(out_dir / f"{model}-per_image_errors.csv", index=False) + + +def _write_plots(case_key: str, case_label: str, model: str, results: _Results) -> None: + """ + Write Dash/Plotly JSON plots for one (case, model) pair. + + Parameters + ---------- + case_key + Case key. + case_label + Human-friendly case label. + model + Model name. + results + Computed errors and frames. + """ + OUT_PATH.mkdir(parents=True, exist_ok=True) + + @plot_scatter( + filename=OUT_PATH / f"figure_{model}_{case_key}_energy_error.json", + title=f"Energy error along NEB ({case_label})", + x_label="Image", + y_label="Energy error / eV", + show_line=True, + ) + def plot_energy() -> dict[str, tuple[list[float], list[float]]]: + """ + Plot per-image energy errors for a single model and case. + + Returns + ------- + dict[str, tuple[list[float], list[float]]] + Mapping of model name to x/y arrays. + """ + return {model: [results.x, results.energy_error]} + + @plot_scatter( + filename=OUT_PATH / f"figure_{model}_{case_key}_force_rms.json", + title=f"Force RMS error along NEB ({case_label})", + x_label="Image", + y_label="Force RMS error / (eV/Å)", + show_line=True, + ) + def plot_forces() -> dict[str, tuple[list[float], list[float]]]: + """ + Plot per-image force RMS errors for a single model and case. + + Returns + ------- + dict[str, tuple[list[float], list[float]]] + Mapping of model name to x/y arrays. + """ + return {model: [results.x, results.force_rms]} + + plot_energy() + plot_forces() + + +@pytest.fixture +def case_results() -> dict[tuple[str, str], _Results]: + """ + Compute results for all available (case, model) combinations. + + Returns + ------- + dict[tuple[str, str], _Results] + Results indexed by ``(case_key, model_name)``. + """ + OUT_PATH.mkdir(parents=True, exist_ok=True) + results: dict[tuple[str, str], _Results] = {} + for case in CASES: + for model in MODELS: + structs = _load_structs(case.key, model) + if structs is None: + continue + computed = _compute(structs) + _write_assets(case.key, model, computed) + _write_plots(case.key, case.label, model, computed) + results[(case.key, model)] = computed + return results + + +@pytest.fixture +def metrics_dict( + case_results: dict[tuple[str, str], _Results], +) -> dict[str, dict[str, float]]: + """ + Build raw metric dict for the benchmark table. + + Parameters + ---------- + case_results + Results indexed by ``(case_key, model_name)``. + + Returns + ------- + dict[str, dict[str, float]] + Metric values for all models. + """ + metrics: dict[str, dict[str, float]] = {} + for case in CASES: + energy_key = f"Energy MAE ({case.label})" + force_key = f"Force MAE ({case.label})" + metrics[energy_key] = {} + metrics[force_key] = {} + for model in MODELS: + result = case_results.get((case.key, model)) + if result is None: + continue + metrics[energy_key][model] = result.energy_mae + metrics[force_key][model] = result.force_mae + return metrics + + +@pytest.fixture +@build_table( + filename=OUT_PATH / f"{BENCHMARK_DIR}_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + weights=DEFAULT_WEIGHTS, +) +def metrics(metrics_dict: dict[str, dict[str, float]]) -> dict[str, dict]: + """ + Build the benchmark table JSON. + + Parameters + ---------- + metrics_dict + Metric values for all models. + + Returns + ------- + dict[str, dict] + Metric names and values for all models. + """ + return metrics_dict + + +def test_si_defects(metrics: dict[str, dict]) -> None: + """ + Run analysis for Si defects DFT singlepoints. + + Parameters + ---------- + metrics + Benchmark metrics table. + """ + return diff --git a/ml_peg/analysis/nebs/si_defects/metrics.yml b/ml_peg/analysis/nebs/si_defects/metrics.yml new file mode 100644 index 000000000..fdd5cb755 --- /dev/null +++ b/ml_peg/analysis/nebs/si_defects/metrics.yml @@ -0,0 +1,37 @@ +metrics: + Energy MAE (64): + good: 0.02 + bad: 0.2 + unit: eV + tooltip: "MAE between MLIP and DFT (PBE) relative single-point energies along the NEB (image 0 shifted to 0 eV)." + level_of_theory: PBE + Force MAE (64): + good: 0.05 + bad: 0.5 + unit: eV/Å + tooltip: "MAE between MLIP and DFT (PBE) single-point forces across all atoms and images along the NEB." + level_of_theory: PBE + Energy MAE (216): + good: 0.02 + bad: 0.2 + unit: eV + tooltip: "MAE between MLIP and DFT (PBE) relative single-point energies along the NEB (image 0 shifted to 0 eV)." + level_of_theory: PBE + Force MAE (216): + good: 0.05 + bad: 0.5 + unit: eV/Å + tooltip: "MAE between MLIP and DFT (PBE) single-point forces across all atoms and images along the NEB." + level_of_theory: PBE + Energy MAE (216 di-to-single): + good: 0.02 + bad: 0.2 + unit: eV + tooltip: "MAE between MLIP and DFT (PBE) relative single-point energies along the NEB (image 0 shifted to 0 eV)." + level_of_theory: PBE + Force MAE (216 di-to-single): + good: 0.05 + bad: 0.5 + unit: eV/Å + tooltip: "MAE between MLIP and DFT (PBE) single-point forces across all atoms and images along the NEB." + level_of_theory: PBE diff --git a/ml_peg/analysis/non_covalent_interactions/IONPI19/analyse_IONPI19.py b/ml_peg/analysis/non_covalent_interactions/IONPI19/analyse_IONPI19.py index 92c6d6491..bdb21d5b0 100644 --- a/ml_peg/analysis/non_covalent_interactions/IONPI19/analyse_IONPI19.py +++ b/ml_peg/analysis/non_covalent_interactions/IONPI19/analyse_IONPI19.py @@ -10,7 +10,7 @@ from ml_peg.analysis.utils.decorators import build_table, plot_parity from ml_peg.analysis.utils.utils import ( - build_d3_name_map, + build_dispersion_name_map, load_metrics_config, mae, ) @@ -20,7 +20,7 @@ from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal @@ -166,7 +166,7 @@ def get_mae(conformer_energies) -> dict[str, float]: filename=OUT_PATH / "ionpi19_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/non_covalent_interactions/NCIA_D1200/analyse_NCIA_D1200.py b/ml_peg/analysis/non_covalent_interactions/NCIA_D1200/analyse_NCIA_D1200.py index 7e33b47e6..b30930574 100644 --- a/ml_peg/analysis/non_covalent_interactions/NCIA_D1200/analyse_NCIA_D1200.py +++ b/ml_peg/analysis/non_covalent_interactions/NCIA_D1200/analyse_NCIA_D1200.py @@ -13,9 +13,10 @@ plot_density_scatter, ) from ml_peg.analysis.utils.utils import ( - build_d3_name_map, + build_dispersion_name_map, load_metrics_config, mae, + write_density_trajectories, ) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT @@ -23,7 +24,7 @@ from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal CALC_PATH = CALCS_ROOT / "non_covalent_interactions" / "NCIA_D1200" / "outputs" @@ -104,6 +105,7 @@ def interaction_density(interaction_energies: dict[str, list]) -> dict[str, dict Mapping of model names to density-plot payloads. """ ref_vals = interaction_energies["ref"] + label_list = labels() density_inputs: dict[str, dict] = {} for model_name in MODELS: preds = interaction_energies.get(model_name, []) @@ -112,6 +114,14 @@ def interaction_density(interaction_energies: dict[str, list]) -> dict[str, dict "pred": preds, "meta": {"system_count": len([val for val in preds if val is not None])}, } + write_density_trajectories( + labels_list=label_list, + ref_vals=ref_vals, + pred_vals=preds, + struct_dir=OUT_PATH / model_name, + traj_dir=OUT_PATH / model_name / "density_traj", + struct_filename_builder=lambda label: f"{label}.xyz", + ) return density_inputs @@ -143,7 +153,7 @@ def get_mae(interaction_energies) -> dict[str, float]: filename=OUT_PATH / "ncia_d1200_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/non_covalent_interactions/NCIA_D442x10/analyse_NCIA_D442x10.py b/ml_peg/analysis/non_covalent_interactions/NCIA_D442x10/analyse_NCIA_D442x10.py index feacb87b6..fa9f38bbf 100644 --- a/ml_peg/analysis/non_covalent_interactions/NCIA_D442x10/analyse_NCIA_D442x10.py +++ b/ml_peg/analysis/non_covalent_interactions/NCIA_D442x10/analyse_NCIA_D442x10.py @@ -13,9 +13,10 @@ plot_density_scatter, ) from ml_peg.analysis.utils.utils import ( - build_d3_name_map, + build_dispersion_name_map, load_metrics_config, mae, + write_density_trajectories, ) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT @@ -23,7 +24,7 @@ from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal CALC_PATH = CALCS_ROOT / "non_covalent_interactions" / "NCIA_D442x10" / "outputs" @@ -104,6 +105,7 @@ def interaction_density(interaction_energies: dict[str, list]) -> dict[str, dict Mapping of model names to density-plot inputs. """ ref_vals = interaction_energies["ref"] + label_list = labels() density_inputs: dict[str, dict] = {} for model_name in MODELS: preds = interaction_energies.get(model_name, []) @@ -112,6 +114,14 @@ def interaction_density(interaction_energies: dict[str, list]) -> dict[str, dict "pred": preds, "meta": {"system_count": len([val for val in preds if val is not None])}, } + write_density_trajectories( + labels_list=label_list, + ref_vals=ref_vals, + pred_vals=preds, + struct_dir=OUT_PATH / model_name, + traj_dir=OUT_PATH / model_name / "density_traj", + struct_filename_builder=lambda label: f"{label}.xyz", + ) return density_inputs @@ -143,7 +153,7 @@ def get_mae(interaction_energies) -> dict[str, float]: filename=OUT_PATH / "ncia_d442x10_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/non_covalent_interactions/NCIA_HB300SPXx10/analyse_NCIA_HB300SPXx10.py b/ml_peg/analysis/non_covalent_interactions/NCIA_HB300SPXx10/analyse_NCIA_HB300SPXx10.py index fc62b9669..8a4cf3670 100644 --- a/ml_peg/analysis/non_covalent_interactions/NCIA_HB300SPXx10/analyse_NCIA_HB300SPXx10.py +++ b/ml_peg/analysis/non_covalent_interactions/NCIA_HB300SPXx10/analyse_NCIA_HB300SPXx10.py @@ -12,14 +12,19 @@ build_table, plot_density_scatter, ) -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, + write_density_trajectories, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal CALC_PATH = CALCS_ROOT / "non_covalent_interactions" / "NCIA_HB300SPXx10" / "outputs" @@ -100,6 +105,7 @@ def interaction_density(interaction_energies: dict[str, list]) -> dict[str, dict Mapping of model names to density-plot payloads. """ ref_vals = interaction_energies["ref"] + label_list = labels() density_inputs: dict[str, dict] = {} for model_name in MODELS: preds = interaction_energies.get(model_name, []) @@ -108,6 +114,14 @@ def interaction_density(interaction_energies: dict[str, list]) -> dict[str, dict "pred": preds, "meta": {"system_count": len([val for val in preds if val is not None])}, } + write_density_trajectories( + labels_list=label_list, + ref_vals=ref_vals, + pred_vals=preds, + struct_dir=OUT_PATH / model_name, + traj_dir=OUT_PATH / model_name / "density_traj", + struct_filename_builder=lambda label: f"{label}.xyz", + ) return density_inputs @@ -139,7 +153,7 @@ def get_mae(interaction_energies) -> dict[str, float]: filename=OUT_PATH / "ncia_hb300spxx10_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/non_covalent_interactions/NCIA_HB375x10/analyse_NCIA_HB375x10.py b/ml_peg/analysis/non_covalent_interactions/NCIA_HB375x10/analyse_NCIA_HB375x10.py index c709ea493..a9282f12e 100644 --- a/ml_peg/analysis/non_covalent_interactions/NCIA_HB375x10/analyse_NCIA_HB375x10.py +++ b/ml_peg/analysis/non_covalent_interactions/NCIA_HB375x10/analyse_NCIA_HB375x10.py @@ -12,14 +12,19 @@ build_table, plot_density_scatter, ) -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, + write_density_trajectories, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal CALC_PATH = CALCS_ROOT / "non_covalent_interactions" / "NCIA_HB375x10" / "outputs" @@ -100,6 +105,7 @@ def interaction_density(interaction_energies: dict[str, list]) -> dict[str, dict Mapping of model names to density-plot payloads. """ ref_vals = interaction_energies["ref"] + label_list = labels() density_inputs: dict[str, dict] = {} for model_name in MODELS: preds = interaction_energies.get(model_name, []) @@ -108,6 +114,14 @@ def interaction_density(interaction_energies: dict[str, list]) -> dict[str, dict "pred": preds, "meta": {"system_count": len([val for val in preds if val is not None])}, } + write_density_trajectories( + labels_list=label_list, + ref_vals=ref_vals, + pred_vals=preds, + struct_dir=OUT_PATH / model_name, + traj_dir=OUT_PATH / model_name / "density_traj", + struct_filename_builder=lambda label: f"{label}.xyz", + ) return density_inputs @@ -139,7 +153,7 @@ def get_mae(interaction_energies) -> dict[str, float]: filename=OUT_PATH / "ncia_hb375x10_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/non_covalent_interactions/NCIA_IHB100x10/analyse_NCIA_IHB100x10.py b/ml_peg/analysis/non_covalent_interactions/NCIA_IHB100x10/analyse_NCIA_IHB100x10.py index 778c50042..6ae6c6d88 100644 --- a/ml_peg/analysis/non_covalent_interactions/NCIA_IHB100x10/analyse_NCIA_IHB100x10.py +++ b/ml_peg/analysis/non_covalent_interactions/NCIA_IHB100x10/analyse_NCIA_IHB100x10.py @@ -12,14 +12,19 @@ build_table, plot_density_scatter, ) -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, + write_density_trajectories, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal CALC_PATH = CALCS_ROOT / "non_covalent_interactions" / "NCIA_IHB100x10" / "outputs" @@ -100,6 +105,7 @@ def interaction_density(interaction_energies: dict[str, list]) -> dict[str, dict Mapping of model names to density-plot payloads. """ ref_vals = interaction_energies["ref"] + label_list = labels() density_inputs: dict[str, dict] = {} for model_name in MODELS: preds = interaction_energies.get(model_name, []) @@ -108,6 +114,14 @@ def interaction_density(interaction_energies: dict[str, list]) -> dict[str, dict "pred": preds, "meta": {"system_count": len([val for val in preds if val is not None])}, } + write_density_trajectories( + labels_list=label_list, + ref_vals=ref_vals, + pred_vals=preds, + struct_dir=OUT_PATH / model_name, + traj_dir=OUT_PATH / model_name / "density_traj", + struct_filename_builder=lambda label: f"{label}.xyz", + ) return density_inputs @@ -139,7 +153,7 @@ def get_mae(interaction_energies) -> dict[str, float]: filename=OUT_PATH / "ncia_ihb100x10_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/non_covalent_interactions/NCIA_R739x5/analyse_NCIA_R739x5.py b/ml_peg/analysis/non_covalent_interactions/NCIA_R739x5/analyse_NCIA_R739x5.py index 333e01f2e..c7b7336c1 100644 --- a/ml_peg/analysis/non_covalent_interactions/NCIA_R739x5/analyse_NCIA_R739x5.py +++ b/ml_peg/analysis/non_covalent_interactions/NCIA_R739x5/analyse_NCIA_R739x5.py @@ -12,14 +12,19 @@ build_table, plot_density_scatter, ) -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, + write_density_trajectories, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal CALC_PATH = CALCS_ROOT / "non_covalent_interactions" / "NCIA_R739x5" / "outputs" @@ -100,6 +105,7 @@ def interaction_density(interaction_energies: dict[str, list]) -> dict[str, dict Mapping of model names to density-plot payloads. """ ref_vals = interaction_energies["ref"] + label_list = labels() density_inputs: dict[str, dict] = {} for model_name in MODELS: preds = interaction_energies.get(model_name, []) @@ -108,6 +114,14 @@ def interaction_density(interaction_energies: dict[str, list]) -> dict[str, dict "pred": preds, "meta": {"system_count": len([val for val in preds if val is not None])}, } + write_density_trajectories( + labels_list=label_list, + ref_vals=ref_vals, + pred_vals=preds, + struct_dir=OUT_PATH / model_name, + traj_dir=OUT_PATH / model_name / "density_traj", + struct_filename_builder=lambda label: f"{label}.xyz", + ) return density_inputs @@ -139,7 +153,7 @@ def get_mae(interaction_energies) -> dict[str, float]: filename=OUT_PATH / "ncia_r739x5_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/non_covalent_interactions/NCIA_SH250x10/analyse_NCIA_SH250x10.py b/ml_peg/analysis/non_covalent_interactions/NCIA_SH250x10/analyse_NCIA_SH250x10.py index f623ae9eb..d51aabe84 100644 --- a/ml_peg/analysis/non_covalent_interactions/NCIA_SH250x10/analyse_NCIA_SH250x10.py +++ b/ml_peg/analysis/non_covalent_interactions/NCIA_SH250x10/analyse_NCIA_SH250x10.py @@ -12,14 +12,19 @@ build_table, plot_density_scatter, ) -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, + write_density_trajectories, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal CALC_PATH = CALCS_ROOT / "non_covalent_interactions" / "NCIA_SH250x10" / "outputs" @@ -100,6 +105,7 @@ def interaction_density(interaction_energies: dict[str, list]) -> dict[str, dict Mapping of model names to density-plot payloads. """ ref_vals = interaction_energies["ref"] + label_list = labels() density_inputs: dict[str, dict] = {} for model_name in MODELS: preds = interaction_energies.get(model_name, []) @@ -108,6 +114,14 @@ def interaction_density(interaction_energies: dict[str, list]) -> dict[str, dict "pred": preds, "meta": {"system_count": len([val for val in preds if val is not None])}, } + write_density_trajectories( + labels_list=label_list, + ref_vals=ref_vals, + pred_vals=preds, + struct_dir=OUT_PATH / model_name, + traj_dir=OUT_PATH / model_name / "density_traj", + struct_filename_builder=lambda label: f"{label}.xyz", + ) return density_inputs @@ -139,7 +153,7 @@ def get_mae(interaction_energies) -> dict[str, float]: filename=OUT_PATH / "ncia_sh250x10_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/non_covalent_interactions/QUID/analyse_QUID.py b/ml_peg/analysis/non_covalent_interactions/QUID/analyse_QUID.py new file mode 100644 index 000000000..687b68f38 --- /dev/null +++ b/ml_peg/analysis/non_covalent_interactions/QUID/analyse_QUID.py @@ -0,0 +1,245 @@ +""" +Analyse the QUID benchmark for ligand-pocket interactions. + +Puleva, M., Medrano Sandonas, L., Lőrincz, B.D. et al, +Extending quantum-mechanical benchmark accuracy to biological ligand-pocket +interactions, +Nat Commun 16, 8583 (2025). https://doi.org/10.1038/s41467-025-63587-9 +""" + +from __future__ import annotations + +from pathlib import Path + +from ase import units +from ase.io import read, write +import pytest + +from ml_peg.analysis.utils.decorators import build_table, plot_parity +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) + +CALC_PATH = CALCS_ROOT / "non_covalent_interactions" / "QUID" / "outputs" +OUT_PATH = APP_ROOT / "data" / "non_covalent_interactions" / "QUID" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + +EV_TO_KCAL = units.mol / units.kcal + + +def labels() -> dict[str, list[int]]: + """ + Get dictionary of info for QUID structures. + + Returns + ------- + dict[str, list[int]] + Dictionary of system indices, complex atom counts, and complex charges. + """ + info = {"all": [], "equilibrium": [], "dissociation": []} + + for model_name in MODELS: + model_dir = CALC_PATH / model_name + if model_dir.exists(): + xyz_files = sorted(model_dir.glob("*.xyz")) + + info["all"] = [path.stem for path in xyz_files] + info["equilibrium"] = [label for label in info["all"] if "_" not in label] + info["dissociation"] = [label for label in info["all"] if "_" in label] + + return info + + +LABELS = labels() + + +@pytest.fixture +@plot_parity( + filename=OUT_PATH / "figure_quid.json", + title="Interaction energies", + x_label="Predicted energy / kcal/mol", + y_label="Reference energy / kcal/mol", + hoverdata={"Labels": LABELS["all"]}, +) +def interaction_energies() -> dict[str, list]: + """ + Get interaction energies for all systems. + + Returns + ------- + dict[str, list] + Dictionary of all reference and predicted interaction energies. + """ + results = {"ref": []} | {mlip: [] for mlip in MODELS} + + ref_stored = False + + for model_name in MODELS: + for label in LABELS["all"]: + atoms = read(CALC_PATH / model_name / f"{label}.xyz", index=0) + + if not ref_stored: + results["ref"].append(atoms.info["ref_int_energy"][()] * EV_TO_KCAL) + + results[model_name].append(atoms.info["model_int_energy"][()] * EV_TO_KCAL) + + # Write structures for app + structs_dir = OUT_PATH / model_name + structs_dir.mkdir(parents=True, exist_ok=True) + write(structs_dir / f"{label}.xyz", atoms) + + ref_stored = True + return results + + +@pytest.fixture +def total_mae(interaction_energies) -> dict[str, float]: + """ + Get mean absolute error for energies for all systems. + + Parameters + ---------- + interaction_energies + Dictionary of reference and predicted energies. + + Returns + ------- + dict[str, float] + Dictionary of predicted energy errors for all models. + """ + results = {} + for model_name in MODELS: + results[model_name] = mae( + interaction_energies["ref"], interaction_energies[model_name] + ) + return results + + +@pytest.fixture +def equilibrium_mae(interaction_energies) -> dict[str, float]: + """ + Get mean absolute error for charged systems only. + + Parameters + ---------- + interaction_energies + Dictionary of reference and predicted interaction energies. + + Returns + ------- + dict[str, float] + Dictionary of predicted interaction energy errors for charged systems. + """ + equilibrium_indices = [ + i for i, label in enumerate(LABELS["all"]) if label in LABELS["equilibrium"] + ] + + results = {} + for model_name in MODELS: + if interaction_energies[model_name]: + ref_equilibrium = [ + interaction_energies["ref"][i] for i in equilibrium_indices + ] + pred_equilibrium = [ + interaction_energies[model_name][i] for i in equilibrium_indices + ] + results[model_name] = mae(ref_equilibrium, pred_equilibrium) + else: + results[model_name] = None + return results + + +@pytest.fixture +def dissociation_mae(interaction_energies) -> dict[str, float]: + """ + Get mean absolute error for dissociation systems only. + + Parameters + ---------- + interaction_energies + Dictionary of reference and predicted interaction energies. + + Returns + ------- + dict[str, float] + Dictionary of predicted interaction energy errors for dissociation systems. + """ + dissociation_indices = [ + i for i, label in enumerate(LABELS["all"]) if label in LABELS["dissociation"] + ] + + results = {} + for model_name in MODELS: + if interaction_energies[model_name]: + ref_dissociation = [ + interaction_energies["ref"][i] for i in dissociation_indices + ] + pred_dissociation = [ + interaction_energies[model_name][i] for i in dissociation_indices + ] + results[model_name] = mae(ref_dissociation, pred_dissociation) + else: + results[model_name] = None + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "quid_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + weights=DEFAULT_WEIGHTS, + mlip_name_map=DISPERSION_NAME_MAP, +) +def metrics( + total_mae: dict[str, float], + equilibrium_mae: dict[str, float], + dissociation_mae: dict[str, float], +) -> dict[str, dict]: + """ + Get all metrics. + + Parameters + ---------- + total_mae + Mean absolute errors for all models. + equilibrium_mae + Mean absolute errors for all models for equilibrium systems. + dissociation_mae + Mean absolute errors for all models for dissociation systems. + + Returns + ------- + dict[str, dict] + Metric names and values for all models. + """ + return { + "Equilibrium MAE": equilibrium_mae, + "Dissociation MAE": dissociation_mae, + "Overall MAE": total_mae, + } + + +def test_quid(metrics: dict[str, dict]) -> None: + """ + Run QUID test. + + Parameters + ---------- + metrics + All new benchmark metric names and dictionary of values for each model. + """ + return diff --git a/ml_peg/analysis/non_covalent_interactions/QUID/metrics.yml b/ml_peg/analysis/non_covalent_interactions/QUID/metrics.yml new file mode 100644 index 000000000..f1aed47a1 --- /dev/null +++ b/ml_peg/analysis/non_covalent_interactions/QUID/metrics.yml @@ -0,0 +1,22 @@ +metrics: + Overall MAE: + good: 0.3 + bad: 2.0 + unit: kcal/mol + tooltip: Mean Absolute Error for all systems + level_of_theory: CCSD(T) + weight: 1 + Equilibrium MAE: + good: 0.3 + bad: 2.0 + unit: kcal/mol + tooltip: Mean Absolute Error for neutral systems + level_of_theory: CCSD(T) + weight: 0 + Dissociation MAE: + good: 0.3 + bad: 2.0 + unit: kcal/mol + tooltip: Mean Absolute Error for charged systems + level_of_theory: CCSD(T) + weight: 0 diff --git a/ml_peg/analysis/supramolecular/PLA15/analyse_PLA15.py b/ml_peg/analysis/supramolecular/PLA15/analyse_PLA15.py index f8da46021..1fb5c79d0 100644 --- a/ml_peg/analysis/supramolecular/PLA15/analyse_PLA15.py +++ b/ml_peg/analysis/supramolecular/PLA15/analyse_PLA15.py @@ -9,14 +9,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models MODELS = get_model_names(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) CALC_PATH = CALCS_ROOT / "supramolecular" / "PLA15" / "outputs" OUT_PATH = APP_ROOT / "data" / "supramolecular" / "PLA15" @@ -258,7 +262,7 @@ def pla15_ion_neutral_mae(interaction_energies) -> dict[str, float]: metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, weights=DEFAULT_WEIGHTS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics( pla15_mae: dict[str, float], diff --git a/ml_peg/analysis/supramolecular/PLF547/analyse_PLF547.py b/ml_peg/analysis/supramolecular/PLF547/analyse_PLF547.py index d7ca36b5d..d5b716f8e 100755 --- a/ml_peg/analysis/supramolecular/PLF547/analyse_PLF547.py +++ b/ml_peg/analysis/supramolecular/PLF547/analyse_PLF547.py @@ -9,14 +9,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) CALC_PATH = CALCS_ROOT / "supramolecular" / "PLF547" / "outputs" OUT_PATH = APP_ROOT / "data" / "supramolecular" / "PLF547" @@ -193,7 +197,7 @@ def neutral_mae(interaction_energies) -> dict[str, float]: metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, weights=DEFAULT_WEIGHTS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics( total_mae: dict[str, float], diff --git a/ml_peg/analysis/supramolecular/S30L/analyse_S30L.py b/ml_peg/analysis/supramolecular/S30L/analyse_S30L.py index a2b66e92f..82b53053f 100644 --- a/ml_peg/analysis/supramolecular/S30L/analyse_S30L.py +++ b/ml_peg/analysis/supramolecular/S30L/analyse_S30L.py @@ -9,14 +9,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models MODELS = get_model_names(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) CALC_PATH = CALCS_ROOT / "supramolecular" / "S30L" / "outputs" OUT_PATH = APP_ROOT / "data" / "supramolecular" / "S30L" @@ -221,7 +225,7 @@ def s30l_neutral_mae(interaction_energies) -> dict[str, float]: metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, weights=DEFAULT_WEIGHTS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics( s30l_mae: dict[str, float], diff --git a/ml_peg/analysis/surfaces/OC157/analyse_OC157.py b/ml_peg/analysis/surfaces/OC157/analyse_OC157.py index 96a932930..a0b447560 100644 --- a/ml_peg/analysis/surfaces/OC157/analyse_OC157.py +++ b/ml_peg/analysis/surfaces/OC157/analyse_OC157.py @@ -9,14 +9,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models MODELS = get_model_names(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) CALC_PATH = CALCS_ROOT / "surfaces" / "OC157" / "outputs" OUT_PATH = APP_ROOT / "data" / "surfaces" / "OC157" @@ -186,7 +190,7 @@ def ranking_error(relative_energies: dict[str, list]) -> dict[str, float]: filename=OUT_PATH / "oc157_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics( oc157_mae: dict[str, float], ranking_error: dict[str, float] diff --git a/ml_peg/analysis/surfaces/S24/analyse_S24.py b/ml_peg/analysis/surfaces/S24/analyse_S24.py index 9d8f0aef7..e3966f852 100644 --- a/ml_peg/analysis/surfaces/S24/analyse_S24.py +++ b/ml_peg/analysis/surfaces/S24/analyse_S24.py @@ -8,14 +8,18 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models MODELS = get_model_names(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) CALC_PATH = CALCS_ROOT / "surfaces" / "S24" / "outputs" OUT_PATH = APP_ROOT / "data" / "surfaces" / "S24" @@ -165,7 +169,7 @@ def s24_mae(adsorption_energies) -> dict[str, float]: filename=OUT_PATH / "s24_metrics_table.json", metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics(s24_mae: dict[str, float]) -> dict[str, dict]: """ diff --git a/ml_peg/analysis/tm_complexes/3dTMV/analyse_3dTMV.py b/ml_peg/analysis/tm_complexes/3dTMV/analyse_3dTMV.py new file mode 100644 index 000000000..0391cfa50 --- /dev/null +++ b/ml_peg/analysis/tm_complexes/3dTMV/analyse_3dTMV.py @@ -0,0 +1,283 @@ +"""Analyse the 3dTMV benchmark.""" + +from __future__ import annotations + +from pathlib import Path + +from ase import units +from ase.io import read, write +import pytest + +from ml_peg.analysis.utils.decorators import ( + build_table, + plot_parity, +) +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) + +EV_TO_KCAL = units.mol / units.kcal +CALC_PATH = CALCS_ROOT / "tm_complexes" / "3dTMV" / "outputs" +OUT_PATH = APP_ROOT / "data" / "tm_complexes" / "3dTMV" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + +SUBSETS = { + 1: "SR", + 2: "SR", + 3: "SR", + 4: "SR", + 5: "SR", + 6: "SR", + 7: "SR", + 8: "SR", + 9: "SR", + 10: "SR", + 11: "SR", + 12: "SR", + 13: "SR/MR", + 14: "SR/MR", + 15: "SR/MR", + 16: "SR/MR", + 17: "SR/MR", + 18: "SR/MR", + 19: "SR/MR", + 20: "SR/MR", + 21: "SR/MR", + 22: "SR/MR", + 23: "MR", + 24: "MR", + 25: "MR", + 26: "MR", + 27: "MR", + 28: "MR", +} + + +def labels(): + """ + Get complex ids. + + Returns + ------- + list + IDs of the complexes. + """ + return list(range(1, 29)) + + +@pytest.fixture +@plot_parity( + filename=OUT_PATH / "figure_3dtmv.json", + title="Ionization energies", + x_label="Predicted ionization energy / kcal/mol", + y_label="Reference ionization energy / kcal/mol", + hoverdata={ + "Labels": labels(), + }, +) +def ionization_energies() -> dict[str, list]: + """ + Get ionization energies for all systems. + + Returns + ------- + dict[str, list] + Dictionary of all reference and predicted energies. + """ + results = {"ref": []} | {mlip: [] for mlip in MODELS} + ref_stored = False + + for model_name in MODELS: + for complex_id in labels(): + atoms = read(CALC_PATH / model_name / f"{complex_id}.xyz") + model_ion_energy = atoms.info["model_ionization_energy"] + ref_ion_energy = atoms.info["ref_ionization_energy"] + # Write structures for app + structs_dir = OUT_PATH / model_name + structs_dir.mkdir(parents=True, exist_ok=True) + write(structs_dir / f"{complex_id}.xyz", atoms) + results[model_name].append(model_ion_energy * EV_TO_KCAL) + if not ref_stored: + results["ref"].append(ref_ion_energy * EV_TO_KCAL) + ref_stored = True + return results + + +@pytest.fixture +def sr_mae(ionization_energies) -> dict[str, float]: + """ + Get mean absolute error for SR subset. + + Parameters + ---------- + ionization_energies + Dictionary of reference and predicted energies. + + Returns + ------- + dict[str, float] + Dictionary of predicted energy errors for all models. + """ + results = {} + + for model_name in MODELS: + subsampled_ref_e = [ + ionization_energies["ref"][i - 1] for i in labels() if SUBSETS[i] == "SR" + ] + subsampled_model_e = [ + ionization_energies[model_name][i - 1] + for i in labels() + if SUBSETS[i] == "SR" + ] + results[model_name] = mae(subsampled_ref_e, subsampled_model_e) + return results + + +@pytest.fixture +def mr_mae(ionization_energies) -> dict[str, float]: + """ + Get mean absolute error for MR subset. + + Parameters + ---------- + ionization_energies + Dictionary of reference and predicted energies. + + Returns + ------- + dict[str, float] + Dictionary of predicted energy errors for all models. + """ + results = {} + + for model_name in MODELS: + subsampled_ref_e = [ + ionization_energies["ref"][i - 1] for i in labels() if SUBSETS[i] == "MR" + ] + subsampled_model_e = [ + ionization_energies[model_name][i - 1] + for i in labels() + if SUBSETS[i] == "MR" + ] + results[model_name] = mae(subsampled_ref_e, subsampled_model_e) + return results + + +@pytest.fixture +def sr_mr_mae(ionization_energies) -> dict[str, float]: + """ + Get mean absolute error for SR/MR subset. + + Parameters + ---------- + ionization_energies + Dictionary of reference and predicted energies. + + Returns + ------- + dict[str, float] + Dictionary of predicted energy errors for all models. + """ + results = {} + + for model_name in MODELS: + subsampled_ref_e = [ + ionization_energies["ref"][i - 1] for i in labels() if SUBSETS[i] == "SR/MR" + ] + subsampled_model_e = [ + ionization_energies[model_name][i - 1] + for i in labels() + if SUBSETS[i] == "SR/MR" + ] + results[model_name] = mae(subsampled_ref_e, subsampled_model_e) + return results + + +@pytest.fixture +def total_mae(ionization_energies) -> dict[str, float]: + """ + Get mean absolute error for all conmplexes. + + Parameters + ---------- + ionization_energies + Dictionary of reference and predicted energies. + + Returns + ------- + dict[str, float] + Dictionary of predicted energy errors for all models. + """ + results = {} + + for model_name in MODELS: + results[model_name] = mae( + ionization_energies["ref"], ionization_energies[model_name] + ) + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "3dtmv_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + mlip_name_map=DISPERSION_NAME_MAP, + weights=DEFAULT_WEIGHTS, +) +def metrics( + total_mae: dict[str, float], + sr_mae: dict[str, float], + mr_mae: dict[str, float], + sr_mr_mae: dict[str, float], +) -> dict[str, dict]: + """ + Get all metrics. + + Parameters + ---------- + total_mae + Mean absolute errors for all models, all complexes. + sr_mae + Mean absolute errors for all models, single-reference complexes. + mr_mae + Mean absolute errors for all models, multi-reference complexes. + sr_mr_mae + Mean absolute errors for all models, intermediate complexes. + + Returns + ------- + dict[str, dict] + Metric names and values for all models. + """ + return { + "Overall MAE": total_mae, + "SR MAE": sr_mae, + "MR MAE": mr_mae, + "SR/MR MAE": sr_mr_mae, + } + + +def test_3dtmv(metrics: dict[str, dict]) -> None: + """ + Run 3dTMV test. + + Parameters + ---------- + metrics + All new benchmark metric names and dictionary of values for each model. + """ + return diff --git a/ml_peg/analysis/tm_complexes/3dTMV/metrics.yml b/ml_peg/analysis/tm_complexes/3dTMV/metrics.yml new file mode 100644 index 000000000..0d7ba50b3 --- /dev/null +++ b/ml_peg/analysis/tm_complexes/3dTMV/metrics.yml @@ -0,0 +1,29 @@ +metrics: + Overall MAE: + good: 0.0 + bad: 50 + unit: kcal/mol + tooltip: Mean Absolute Error for all systems + level_of_theory: ph-AFQMC + weight: 1 + SR MAE: + good: 0.0 + bad: 50 + unit: kcal/mol + tooltip: Mean Absolute Error for the single reference (SR) subset + level_of_theory: ph-AFQMC + weight: 0 + MR MAE: + good: 0.0 + bad: 50 + unit: kcal/mol + tooltip: Mean Absolute Error for the multireference (MR) subset + level_of_theory: ph-AFQMC + weight: 0 + SR/MR MAE: + good: 0.0 + bad: 50 + unit: kcal/mol + tooltip: Mean Absolute Error for the SR/MR (intermediate category) subset + level_of_theory: ph-AFQMC + weight: 0 diff --git a/ml_peg/analysis/utils/analyse_gscdb138.py b/ml_peg/analysis/utils/analyse_gscdb138.py index cdffc72da..6c943c519 100644 --- a/ml_peg/analysis/utils/analyse_gscdb138.py +++ b/ml_peg/analysis/utils/analyse_gscdb138.py @@ -8,13 +8,13 @@ from ase.io import read, write from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, mae +from ml_peg.analysis.utils.utils import build_dispersion_name_map, mae from ml_peg.app.utils.utils import Thresholds from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) -D3_MODEL_NAMES = build_d3_name_map(MODELS) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) EV_TO_KCAL = units.mol / units.kcal @@ -176,7 +176,7 @@ def get_gscdb138_metrics( metric_tooltips=metric_tooltips, thresholds=thresholds, weights=weights, - mlip_name_map=D3_MODEL_NAMES, + mlip_name_map=DISPERSION_NAME_MAP, ) def metrics() -> dict[str, dict]: """ diff --git a/ml_peg/analysis/utils/decorators.py b/ml_peg/analysis/utils/decorators.py index cdcc72ba8..05a098931 100644 --- a/ml_peg/analysis/utils/decorators.py +++ b/ml_peg/analysis/utils/decorators.py @@ -2,7 +2,6 @@ from __future__ import annotations -from collections import defaultdict from collections.abc import Callable import functools import json @@ -15,7 +14,13 @@ import pandas as pd import plotly.graph_objects as go -from ml_peg.analysis.utils.utils import calc_table_scores +from ml_peg.analysis.utils.utils import ( + DENSITY_GRID_SIZE, + DENSITY_MAX_POINTS_PER_CELL, + DENSITY_SAMPLE_SEED, + calc_table_scores, + sample_density_grid, +) from ml_peg.app.utils.utils import Thresholds from ml_peg.models.get_models import get_model_names, load_model_configs @@ -541,9 +546,9 @@ def plot_density_scatter( y_label: str | None = None, filename: str = "density_scatter.json", colorbar_title: str = "Density", - grid_size: int = 80, - max_points_per_cell: int = 5, - seed: int = 0, + grid_size: int = DENSITY_GRID_SIZE, + max_points_per_cell: int = DENSITY_MAX_POINTS_PER_CELL, + seed: int = DENSITY_SAMPLE_SEED, hover_metadata: dict[str, str] | None = None, annotation_metadata: dict[str, str] | None = None, ) -> Callable: @@ -621,70 +626,6 @@ def plot_density_wrapper(*args, **kwargs) -> dict[str, Any]: dict Results dictionary. """ - - def _downsample( - ref_vals: np.ndarray, pred_vals: np.ndarray - ) -> tuple[list[float], list[float], list[int]]: - """ - Downsample data points while keeping dense regions representative. - - Parameters - ---------- - ref_vals - Reference (x-axis) values for all systems. - pred_vals - Predicted (y-axis) values for all systems. - - Returns - ------- - tuple[list[float], list[float], list[int]] - Downsampled reference values, predicted values, and density counts - corresponding to each retained point. - """ - if ref_vals.size == 0: - return [], [], [] - - delta_x = ref_vals.max() - ref_vals.min() - delta_y = pred_vals.max() - pred_vals.min() - eps = 1e-9 - - norm_x = np.clip( - # Normalise to [0, 1). Clamp to avoid hitting the upper bound so - # bin indices always live in [0, grid_size - 1]. - (ref_vals - ref_vals.min()) / max(delta_x, eps), - 0.0, - 0.999999, - ) - norm_y = np.clip( - (pred_vals - pred_vals.min()) / max(delta_y, eps), - 0.0, - 0.999999, - ) - bins_x = (norm_x * grid_size).astype(int) - bins_y = (norm_y * grid_size).astype(int) - cell_points: dict[tuple[int, int], list[int]] = defaultdict(list) - for idx, (cx, cy) in enumerate(zip(bins_x, bins_y, strict=True)): - cell_points[(int(cx), int(cy))].append(idx) - - rng = np.random.default_rng(seed) - sampled_x: list[float] = [] - sampled_y: list[float] = [] - sampled_density: list[int] = [] - for indices in cell_points.values(): - if len(indices) > max_points_per_cell: - chosen = rng.choice( - indices, size=max_points_per_cell, replace=False - ) - else: - chosen = indices - density = len(indices) - for idx in chosen: - sampled_x.append(float(ref_vals[idx])) - sampled_y.append(float(pred_vals[idx])) - sampled_density.append(density) - - return sampled_x, sampled_y, sampled_density - results = func(*args, **kwargs) if not isinstance(results, dict): raise TypeError( @@ -724,7 +665,16 @@ def _downsample( if ref_vals.size == 0 or pred_vals.size == 0: sampled = ([], [], []) else: - sampled = _downsample(ref_vals, pred_vals) + sampled_indices, sampled_density, _ = sample_density_grid( + ref_vals, + pred_vals, + grid_size=grid_size, + max_points_per_cell=max_points_per_cell, + seed=seed, + ) + sampled_x = [float(ref_vals[idx]) for idx in sampled_indices] + sampled_y = [float(pred_vals[idx]) for idx in sampled_indices] + sampled = (sampled_x, sampled_y, sampled_density) global_min = min(global_min, ref_vals.min(), pred_vals.min()) global_max = max(global_max, ref_vals.max(), pred_vals.max()) diff --git a/ml_peg/analysis/utils/utils.py b/ml_peg/analysis/utils/utils.py index d317b00f6..ff6c92e73 100644 --- a/ml_peg/analysis/utils/utils.py +++ b/ml_peg/analysis/utils/utils.py @@ -2,10 +2,12 @@ from __future__ import annotations +from collections import defaultdict from collections.abc import Callable, Iterable from pathlib import Path from typing import Any +from ase.io import read, write from matplotlib import cm from matplotlib.colors import Colormap import numpy as np @@ -24,31 +26,42 @@ TableRow = dict[str, object] -def build_d3_name_map( +def build_dispersion_name_map( models: Iterable[str], suffix: str = "-D3", ) -> dict[str, str]: """ - Return a suffix map for models requiring runtime D3 corrections. + Return a suffix map for models requiring runtime dispersion corrections. Parameters ---------- models Iterable of model identifiers to inspect. suffix - String appended to model names that need the D3 indicator. + String appended to model names that need the dispersion correction indicator. + Defaults to "-D3" for D3 dispersion corrections. Returns ------- dict[str, str] - Mapping of model -> display name for models not trained with D3 dispersion. + Mapping of model -> display name for models not trained with dispersion. """ configs, _ = load_model_configs(tuple(models)) - return { - model: f"{model}{suffix}" - for model in models - if not configs[model]["trained_on_d3"] - } + name_map: dict[str, str] = {} + + for model in models: + if configs[model]["trained_on_dispersion"]: + continue + dispersion_kwargs = configs[model].get("dispersion_kwargs") or {} + label = dispersion_kwargs.get("label") + suffix_to_use = ( + label.strip() if isinstance(label, str) and label.strip() else suffix + ) + if not suffix_to_use.startswith("-"): + suffix_to_use = f"-{suffix_to_use}" + name_map[model] = f"{model}{suffix_to_use}" + + return name_map def load_metrics_config(config_path: Path) -> tuple[Thresholds, dict[str, str]]: @@ -165,6 +178,83 @@ def rmse(ref: list, prediction: list) -> float: return mean_squared_error(ref, prediction) +DENSITY_GRID_SIZE = 80 +DENSITY_MAX_POINTS_PER_CELL = 5 +DENSITY_SAMPLE_SEED = 0 + + +def sample_density_grid( + ref_vals: list[float] | np.ndarray, + pred_vals: list[float] | np.ndarray, + *, + grid_size: int = DENSITY_GRID_SIZE, + max_points_per_cell: int = DENSITY_MAX_POINTS_PER_CELL, + seed: int = DENSITY_SAMPLE_SEED, +) -> tuple[list[int], list[int], list[list[int]]]: + """ + Sample indices from a density grid, returning density and cell memberships. + + This is the shared implementation used by ``plot_density_scatter`` so the + sampling order stays consistent across density-driven artifacts. + + Parameters + ---------- + ref_vals + Reference (x-axis) values for all systems, in the same order as the + data passed to the density scatter fixture. + pred_vals + Predicted (y-axis) values, same order as ``ref_vals``. + grid_size + Number of bins per axis. Must match ``@plot_density_scatter`` default. + max_points_per_cell + Maximum sampled points per cell. Must match decorator default. + seed + RNG seed for deterministic sampling. Must match decorator default. + + Returns + ------- + tuple[list[int], list[int], list[list[int]]] + ``sampled_indices``: original indices kept in plotting order. + ``sampled_density``: density value per sampled index. + ``sampled_mapping``: list of source indices for each sampled point. + """ + ref_arr = np.asarray(ref_vals, dtype=float) + pred_arr = np.asarray(pred_vals, dtype=float) + + if ref_arr.size == 0 or pred_arr.size == 0: + return [], [], [] + + delta_x = ref_arr.max() - ref_arr.min() + delta_y = pred_arr.max() - pred_arr.min() + eps = 1e-9 + + norm_x = np.clip((ref_arr - ref_arr.min()) / max(delta_x, eps), 0.0, 0.999999) + norm_y = np.clip((pred_arr - pred_arr.min()) / max(delta_y, eps), 0.0, 0.999999) + bins_x = (norm_x * grid_size).astype(int) + bins_y = (norm_y * grid_size).astype(int) + + cell_points: dict[tuple[int, int], list[int]] = defaultdict(list) + for idx, (cx, cy) in enumerate(zip(bins_x, bins_y, strict=True)): + cell_points[(int(cx), int(cy))].append(idx) + + rng = np.random.default_rng(seed) + sampled_indices: list[int] = [] + sampled_density: list[int] = [] + sampled_mapping: list[list[int]] = [] + for indices in cell_points.values(): + if len(indices) > max_points_per_cell: + chosen = rng.choice(indices, size=max_points_per_cell, replace=False) + else: + chosen = indices + density = len(indices) + for idx in chosen: + sampled_indices.append(int(idx)) + sampled_density.append(density) + sampled_mapping.append(indices) + + return sampled_indices, sampled_density, sampled_mapping + + def build_density_inputs( models: list[str], model_results: dict[str, dict[str, Any]], @@ -214,6 +304,61 @@ def build_density_inputs( return inputs +def write_density_trajectories( + *, + labels_list: list[str], + ref_vals: list[float], + pred_vals: list[float], + struct_dir: Path, + traj_dir: Path, + struct_filename_builder: Callable[[str], str], + grid_size: int = DENSITY_GRID_SIZE, + max_points_per_cell: int = DENSITY_MAX_POINTS_PER_CELL, + seed: int = DENSITY_SAMPLE_SEED, +) -> None: + """ + Write one extxyz trajectory per sampled density point for WEAS display. + + Parameters + ---------- + labels_list + Ordered system labels matching ``ref_vals``/``pred_vals``. + ref_vals + Reference values passed to the density scatter. + pred_vals + Predicted values passed to the density scatter. + struct_dir + Directory containing per-system structure files. + traj_dir + Output directory for sampled density trajectories. + struct_filename_builder + Function to convert each label into the source structure filename. + grid_size + Number of bins per axis. Must match ``@plot_density_scatter``. + max_points_per_cell + Maximum sampled points per occupied density cell. + seed + RNG seed for deterministic sampling. + """ + _, _, sampled_mapping = sample_density_grid( + ref_vals, + pred_vals, + grid_size=grid_size, + max_points_per_cell=max_points_per_cell, + seed=seed, + ) + + traj_dir.mkdir(parents=True, exist_ok=True) + + for point_idx, source_indices in enumerate(sampled_mapping): + frames = [] + for source_idx in source_indices: + label = labels_list[source_idx] + struct_path = struct_dir / struct_filename_builder(label) + frames.append(read(struct_path)) + write(traj_dir / f"{point_idx}.extxyz", frames) + + def calc_metric_scores( metrics_data: list[MetricRow], thresholds: Thresholds | None = None, diff --git a/ml_peg/app/build_app.py b/ml_peg/app/build_app.py index ff7b61ef3..6867dc10f 100644 --- a/ml_peg/app/build_app.py +++ b/ml_peg/app/build_app.py @@ -7,7 +7,7 @@ from dash import Dash, Input, Output, callback from dash.dash_table import DataTable -from dash.dcc import Store, Tab, Tabs +from dash.dcc import Loading, Store, Tab, Tabs from dash.html import H1, H3, Div from yaml import safe_load @@ -382,7 +382,25 @@ def build_tabs( [ H1("ML-PEG"), Tabs(id="all-tabs", value="summary-tab", children=all_tabs), - Div(id="tabs-content"), + Loading( + Div(id="tabs-content"), + type="circle", + color="#119DFF", + fullscreen=False, + # dont trigger both start up load wheel + tab change load wheel + # (when switching to summary tab) + target_components={"tabs-content": "children"}, + style={ + # Pin near the top so the spinner is visible on long pages + # (default is centre of page) + "position": "fixed", + "top": "300px", + "left": "50%", + "transform": "translateX(-50%)", + "zIndex": "1100", + }, + parent_style={"position": "relative"}, + ), ], style={"flex": "1", "marginBottom": "40px"}, ), diff --git a/ml_peg/app/conformers/ACONFL/app_ACONFL.py b/ml_peg/app/conformers/ACONFL/app_ACONFL.py new file mode 100644 index 000000000..d8d85b91a --- /dev/null +++ b/ml_peg/app/conformers/ACONFL/app_ACONFL.py @@ -0,0 +1,87 @@ +"""Run ACONFL app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import ( + plot_from_table_column, + struct_from_scatter, +) +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "ACONFL" +DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/conformers.html#aconfl" +DATA_PATH = APP_ROOT / "data" / "conformers" / "ACONFL" + + +class ACONFLApp(BaseApp): + """ACONFL benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter = read_plot( + DATA_PATH / "figure_aconfl.json", + id=f"{BENCHMARK_NAME}-figure", + ) + + model_dir = DATA_PATH / MODELS[0] + if model_dir.exists(): + labels = sorted([f.stem for f in model_dir.glob("*.xyz")]) + structs = [ + f"assets/conformers/ACONFL/{MODELS[0]}/{label}.xyz" for label in labels + ] + else: + structs = [] + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={"MAE": scatter}, + ) + + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="struct", + ) + + +def get_app() -> ACONFLApp: + """ + Get ACONFL benchmark app layout and callback registration. + + Returns + ------- + ACONFLApp + Benchmark layout and callback registration. + """ + return ACONFLApp( + name=BENCHMARK_NAME, + description=( + "Performance in predicting relative conformer energies " + "of 12 C12H26, 16 C16H34 and 20 C20H42 conformers. " + "Reference data from PNO-LCCSD(T)-F12/ AVQZ calculations." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "aconfl_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + ], + ) + + +if __name__ == "__main__": + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + benchmark_app = get_app() + full_app.layout = benchmark_app.layout + benchmark_app.register_callbacks() + full_app.run(port=8062, debug=True) diff --git a/ml_peg/app/molecular/BMIMCl_RDF/app_BMIMCl_RDF.py b/ml_peg/app/molecular/BMIMCl_RDF/app_BMIMCl_RDF.py new file mode 100644 index 000000000..c8dbc5c06 --- /dev/null +++ b/ml_peg/app/molecular/BMIMCl_RDF/app_BMIMCl_RDF.py @@ -0,0 +1,66 @@ +"""Run BMIMCl RDF benchmark app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_column +from ml_peg.app.utils.load import read_plot + +BENCHMARK_NAME = "BMIMCl Cl-C RDF" +DOCS_URL = ( + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular.html#bmimcl-rdf" +) +DATA_PATH = APP_ROOT / "data" / "molecular" / "BMIMCl_RDF" + + +class BMIMClRDFApp(BaseApp): + """BMIMCl RDF benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + rdf_plot = read_plot( + DATA_PATH / "figure_rdf.json", + id=f"{BENCHMARK_NAME}-figure", + ) + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={"Cl-C Bonds Formed": rdf_plot}, + ) + + +def get_app() -> BMIMClRDFApp: + """ + Get BMIMCl RDF benchmark app layout and callback registration. + + Returns + ------- + BMIMClRDFApp + Benchmark layout and callback registration. + """ + return BMIMClRDFApp( + name=BENCHMARK_NAME, + description=( + "Tests whether MLIPs incorrectly predict Cl-C bond formation " + "in BMIMCl ionic liquid. Bonds should NOT form." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "bmimcl_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + ], + ) + + +if __name__ == "__main__": + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + bmimcl_app = get_app() + full_app.layout = bmimcl_app.layout + bmimcl_app.register_callbacks() + + full_app.run(port=8054, debug=True) diff --git a/ml_peg/app/molecular_dynamics/water_density/app_water_density.py b/ml_peg/app/molecular_dynamics/water_density/app_water_density.py new file mode 100644 index 000000000..b5f7c5769 --- /dev/null +++ b/ml_peg/app/molecular_dynamics/water_density/app_water_density.py @@ -0,0 +1,69 @@ +"""Run water density app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import ( + plot_from_table_column, +) +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "WaterDensity" +DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular_dynamics.html#Water_Density" +DATA_PATH = APP_ROOT / "data" / "molecular_dynamics" / "water_density" + + +class WaterDensityApp(BaseApp): + """Water density benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter = read_plot( + DATA_PATH / "figure_water_density.json", + id=f"{BENCHMARK_NAME}-figure", + ) + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={"MAE": scatter}, + ) + + +def get_app() -> WaterDensityApp: + """ + Get water density benchmark app layout and callback registration. + + Returns + ------- + WaterDensityApp + Benchmark layout and callback registration. + """ + return WaterDensityApp( + name=BENCHMARK_NAME, + description=( + "Performance in predicting water density at different temperatures." + "Reference data is experimental densities." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "water_density_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + ], + ) + + +if __name__ == "__main__": + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + benchmark_app = get_app() + full_app.layout = benchmark_app.layout + benchmark_app.register_callbacks() + full_app.run(port=8063, debug=True) diff --git a/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py b/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py index fd471061e..199452a12 100644 --- a/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py +++ b/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py @@ -9,8 +9,9 @@ from ml_peg.app.base_app import BaseApp from ml_peg.app.utils.build_callbacks import ( plot_from_table_cell, + struct_from_scatter, ) -from ml_peg.app.utils.load import read_density_plot_for_model +from ml_peg.app.utils.load import collect_traj_assets, read_density_plot_for_model from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models @@ -49,6 +50,20 @@ def register_callbacks(self) -> None: cell_to_plot=density_plots, ) + struct_trajs = collect_traj_assets( + data_path=DATA_PATH, + assets_prefix="assets/molecular_reactions/RDB7", + models=MODELS, + ) + + for model in struct_trajs: + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-{model}-barrier-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=struct_trajs[model], + mode="traj", + ) + def get_app() -> RDB7App: """ @@ -70,6 +85,7 @@ def get_app() -> RDB7App: table_path=DATA_PATH / "rdb7_barriers_metrics_table.json", extra_components=[ Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), ], ) diff --git a/ml_peg/app/nebs/si_defects/app_si_defects.py b/ml_peg/app/nebs/si_defects/app_si_defects.py new file mode 100644 index 000000000..a15a6936c --- /dev/null +++ b/ml_peg/app/nebs/si_defects/app_si_defects.py @@ -0,0 +1,123 @@ +"""Run app for Si defects benchmark.""" + +from __future__ import annotations + +from dataclasses import dataclass + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_cell, struct_from_scatter +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) + +BENCHMARK_NAME = "Si defects" +DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/nebs.html#si-defects" +DATA_PATH = APP_ROOT / "data" / "nebs" / "si_defects" + + +@dataclass(frozen=True) +class _Case: + """Definition of a single Si defects NEB dataset.""" + + key: str + label: str + + +CASES: tuple[_Case, ...] = ( + _Case(key="64_atoms", label="64"), + _Case(key="216_atoms", label="216"), + _Case(key="216_atoms_di_to_single", label="216 di-to-single"), +) + + +class SiDefectNebSinglepointsApp(BaseApp): + """Si defects benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register interactive callbacks for plot and structure viewing.""" + scatter_plots: dict[str, dict] = {} + + for model in MODELS: + model_plots: dict[str, object] = {} + for case in CASES: + energy_plot_path = ( + DATA_PATH / f"figure_{model}_{case.key}_energy_error.json" + ) + force_plot_path = ( + DATA_PATH / f"figure_{model}_{case.key}_force_rms.json" + ) + if energy_plot_path.exists(): + model_plots[f"Energy MAE ({case.label})"] = read_plot( + energy_plot_path, + id=f"{BENCHMARK_NAME}-{model}-{case.key}-energy-figure", + ) + if force_plot_path.exists(): + model_plots[f"Force MAE ({case.label})"] = read_plot( + force_plot_path, + id=f"{BENCHMARK_NAME}-{model}-{case.key}-force-figure", + ) + if model_plots: + scatter_plots[model] = model_plots + + plot_from_table_cell( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + cell_to_plot=scatter_plots, + ) + + for model in scatter_plots: + for case in CASES: + structs = ( + f"assets/nebs/si_defects/{case.key}/{model}/{model}-neb-band.extxyz" + ) + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-{model}-{case.key}-energy-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="traj", + ) + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-{model}-{case.key}-force-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="traj", + ) + + +def get_app() -> SiDefectNebSinglepointsApp: + """ + Get Si defects app. + + Returns + ------- + SiDefectNebSinglepointsApp + App instance. + """ + return SiDefectNebSinglepointsApp( + name=BENCHMARK_NAME, + description=( + "Energy/force MAE of MLIPs on fixed Si interstitial migration NEB images, " + "referenced to DFT singlepoints." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "si_defects_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + ], + ) + + +if __name__ == "__main__": + # Use APP_ROOT/data as assets root so `assets/nebs/...` resolves correctly. + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + benchmark_app = get_app() + full_app.layout = benchmark_app.layout + benchmark_app.register_callbacks() + full_app.run(port=8060, debug=True) diff --git a/ml_peg/app/non_covalent_interactions/NCIA_D1200/app_NCIA_D1200.py b/ml_peg/app/non_covalent_interactions/NCIA_D1200/app_NCIA_D1200.py index 9d4b4ea32..e249dac79 100644 --- a/ml_peg/app/non_covalent_interactions/NCIA_D1200/app_NCIA_D1200.py +++ b/ml_peg/app/non_covalent_interactions/NCIA_D1200/app_NCIA_D1200.py @@ -7,8 +7,8 @@ from ml_peg.app import APP_ROOT from ml_peg.app.base_app import BaseApp -from ml_peg.app.utils.build_callbacks import plot_from_table_cell -from ml_peg.app.utils.load import read_density_plot_for_model +from ml_peg.app.utils.build_callbacks import plot_from_table_cell, struct_from_scatter +from ml_peg.app.utils.load import collect_traj_assets, read_density_plot_for_model from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models @@ -39,6 +39,21 @@ def register_callbacks(self) -> None: cell_to_plot=density_plots, ) + struct_trajs = collect_traj_assets( + data_path=DATA_PATH, + assets_prefix="assets/non_covalent_interactions/NCIA_D1200", + models=MODELS, + traj_dirname="density_traj", + suffix=".extxyz", + ) + for model in struct_trajs: + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-{model}-density", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=struct_trajs[model], + mode="traj", + ) + def get_app() -> NCIAD1200App: """ @@ -60,6 +75,7 @@ def get_app() -> NCIAD1200App: table_path=DATA_PATH / "ncia_d1200_metrics_table.json", extra_components=[ Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), ], ) diff --git a/ml_peg/app/non_covalent_interactions/NCIA_D442x10/app_NCIA_D442x10.py b/ml_peg/app/non_covalent_interactions/NCIA_D442x10/app_NCIA_D442x10.py index af22dd273..5202f89fc 100644 --- a/ml_peg/app/non_covalent_interactions/NCIA_D442x10/app_NCIA_D442x10.py +++ b/ml_peg/app/non_covalent_interactions/NCIA_D442x10/app_NCIA_D442x10.py @@ -7,8 +7,8 @@ from ml_peg.app import APP_ROOT from ml_peg.app.base_app import BaseApp -from ml_peg.app.utils.build_callbacks import plot_from_table_cell -from ml_peg.app.utils.load import read_density_plot_for_model +from ml_peg.app.utils.build_callbacks import plot_from_table_cell, struct_from_scatter +from ml_peg.app.utils.load import collect_traj_assets, read_density_plot_for_model from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models @@ -39,6 +39,21 @@ def register_callbacks(self) -> None: cell_to_plot=density_plots, ) + struct_trajs = collect_traj_assets( + data_path=DATA_PATH, + assets_prefix="assets/non_covalent_interactions/NCIA_D442x10", + models=MODELS, + traj_dirname="density_traj", + suffix=".extxyz", + ) + for model in struct_trajs: + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-{model}-density", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=struct_trajs[model], + mode="traj", + ) + def get_app() -> NCIAD442x10App: """ @@ -60,6 +75,7 @@ def get_app() -> NCIAD442x10App: table_path=DATA_PATH / "ncia_d442x10_metrics_table.json", extra_components=[ Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), ], ) diff --git a/ml_peg/app/non_covalent_interactions/NCIA_HB300SPXx10/app_NCIA_HB300SPXx10.py b/ml_peg/app/non_covalent_interactions/NCIA_HB300SPXx10/app_NCIA_HB300SPXx10.py index 2b3288dec..16ae5db9f 100644 --- a/ml_peg/app/non_covalent_interactions/NCIA_HB300SPXx10/app_NCIA_HB300SPXx10.py +++ b/ml_peg/app/non_covalent_interactions/NCIA_HB300SPXx10/app_NCIA_HB300SPXx10.py @@ -7,8 +7,8 @@ from ml_peg.app import APP_ROOT from ml_peg.app.base_app import BaseApp -from ml_peg.app.utils.build_callbacks import plot_from_table_cell -from ml_peg.app.utils.load import read_density_plot_for_model +from ml_peg.app.utils.build_callbacks import plot_from_table_cell, struct_from_scatter +from ml_peg.app.utils.load import collect_traj_assets, read_density_plot_for_model from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models @@ -42,6 +42,21 @@ def register_callbacks(self) -> None: cell_to_plot=density_plots, ) + struct_trajs = collect_traj_assets( + data_path=DATA_PATH, + assets_prefix="assets/non_covalent_interactions/NCIA_HB300SPXx10", + models=MODELS, + traj_dirname="density_traj", + suffix=".extxyz", + ) + for model in struct_trajs: + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-{model}-density", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=struct_trajs[model], + mode="traj", + ) + def get_app() -> NCIANHB300SPXx10App: """ @@ -63,6 +78,7 @@ def get_app() -> NCIANHB300SPXx10App: table_path=DATA_PATH / "ncia_hb300spxx10_metrics_table.json", extra_components=[ Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), ], ) diff --git a/ml_peg/app/non_covalent_interactions/NCIA_HB375x10/app_NCIA_HB375x10.py b/ml_peg/app/non_covalent_interactions/NCIA_HB375x10/app_NCIA_HB375x10.py index 578a55e6c..6b1f9d39e 100644 --- a/ml_peg/app/non_covalent_interactions/NCIA_HB375x10/app_NCIA_HB375x10.py +++ b/ml_peg/app/non_covalent_interactions/NCIA_HB375x10/app_NCIA_HB375x10.py @@ -7,8 +7,8 @@ from ml_peg.app import APP_ROOT from ml_peg.app.base_app import BaseApp -from ml_peg.app.utils.build_callbacks import plot_from_table_cell -from ml_peg.app.utils.load import read_density_plot_for_model +from ml_peg.app.utils.build_callbacks import plot_from_table_cell, struct_from_scatter +from ml_peg.app.utils.load import collect_traj_assets, read_density_plot_for_model from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models @@ -42,6 +42,21 @@ def register_callbacks(self) -> None: cell_to_plot=density_plots, ) + struct_trajs = collect_traj_assets( + data_path=DATA_PATH, + assets_prefix="assets/non_covalent_interactions/NCIA_HB375x10", + models=MODELS, + traj_dirname="density_traj", + suffix=".extxyz", + ) + for model in struct_trajs: + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-{model}-density", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=struct_trajs[model], + mode="traj", + ) + def get_app() -> NCIANHB375x10App: """ @@ -63,6 +78,7 @@ def get_app() -> NCIANHB375x10App: table_path=DATA_PATH / "ncia_hb375x10_metrics_table.json", extra_components=[ Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), ], ) diff --git a/ml_peg/app/non_covalent_interactions/NCIA_IHB100x10/app_NCIA_IHB100x10.py b/ml_peg/app/non_covalent_interactions/NCIA_IHB100x10/app_NCIA_IHB100x10.py index f21bb8be6..5ef64908f 100644 --- a/ml_peg/app/non_covalent_interactions/NCIA_IHB100x10/app_NCIA_IHB100x10.py +++ b/ml_peg/app/non_covalent_interactions/NCIA_IHB100x10/app_NCIA_IHB100x10.py @@ -7,8 +7,8 @@ from ml_peg.app import APP_ROOT from ml_peg.app.base_app import BaseApp -from ml_peg.app.utils.build_callbacks import plot_from_table_cell -from ml_peg.app.utils.load import read_density_plot_for_model +from ml_peg.app.utils.build_callbacks import plot_from_table_cell, struct_from_scatter +from ml_peg.app.utils.load import collect_traj_assets, read_density_plot_for_model from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models @@ -42,6 +42,21 @@ def register_callbacks(self) -> None: cell_to_plot=density_plots, ) + struct_trajs = collect_traj_assets( + data_path=DATA_PATH, + assets_prefix="assets/non_covalent_interactions/NCIA_IHB100x10", + models=MODELS, + traj_dirname="density_traj", + suffix=".extxyz", + ) + for model in struct_trajs: + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-{model}-density", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=struct_trajs[model], + mode="traj", + ) + def get_app() -> NCIANIHB100x10App: """ @@ -63,6 +78,7 @@ def get_app() -> NCIANIHB100x10App: table_path=DATA_PATH / "ncia_ihb100x10_metrics_table.json", extra_components=[ Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), ], ) diff --git a/ml_peg/app/non_covalent_interactions/NCIA_R739x5/app_NCIA_R739x5.py b/ml_peg/app/non_covalent_interactions/NCIA_R739x5/app_NCIA_R739x5.py index f9ea4f6c6..d508d184f 100644 --- a/ml_peg/app/non_covalent_interactions/NCIA_R739x5/app_NCIA_R739x5.py +++ b/ml_peg/app/non_covalent_interactions/NCIA_R739x5/app_NCIA_R739x5.py @@ -7,8 +7,8 @@ from ml_peg.app import APP_ROOT from ml_peg.app.base_app import BaseApp -from ml_peg.app.utils.build_callbacks import plot_from_table_cell -from ml_peg.app.utils.load import read_density_plot_for_model +from ml_peg.app.utils.build_callbacks import plot_from_table_cell, struct_from_scatter +from ml_peg.app.utils.load import collect_traj_assets, read_density_plot_for_model from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models @@ -42,6 +42,21 @@ def register_callbacks(self) -> None: cell_to_plot=density_plots, ) + struct_trajs = collect_traj_assets( + data_path=DATA_PATH, + assets_prefix="assets/non_covalent_interactions/NCIA_R739x5", + models=MODELS, + traj_dirname="density_traj", + suffix=".extxyz", + ) + for model in struct_trajs: + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-{model}-density", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=struct_trajs[model], + mode="traj", + ) + def get_app() -> NCIAR739x5App: """ @@ -63,6 +78,7 @@ def get_app() -> NCIAR739x5App: table_path=DATA_PATH / "ncia_r739x5_metrics_table.json", extra_components=[ Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), ], ) diff --git a/ml_peg/app/non_covalent_interactions/NCIA_SH250x10/app_NCIA_SH250x10.py b/ml_peg/app/non_covalent_interactions/NCIA_SH250x10/app_NCIA_SH250x10.py index d6a7c0391..c213b6053 100644 --- a/ml_peg/app/non_covalent_interactions/NCIA_SH250x10/app_NCIA_SH250x10.py +++ b/ml_peg/app/non_covalent_interactions/NCIA_SH250x10/app_NCIA_SH250x10.py @@ -7,8 +7,8 @@ from ml_peg.app import APP_ROOT from ml_peg.app.base_app import BaseApp -from ml_peg.app.utils.build_callbacks import plot_from_table_cell -from ml_peg.app.utils.load import read_density_plot_for_model +from ml_peg.app.utils.build_callbacks import plot_from_table_cell, struct_from_scatter +from ml_peg.app.utils.load import collect_traj_assets, read_density_plot_for_model from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models @@ -42,6 +42,21 @@ def register_callbacks(self) -> None: cell_to_plot=density_plots, ) + struct_trajs = collect_traj_assets( + data_path=DATA_PATH, + assets_prefix="assets/non_covalent_interactions/NCIA_SH250x10", + models=MODELS, + traj_dirname="density_traj", + suffix=".extxyz", + ) + for model in struct_trajs: + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-{model}-density", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=struct_trajs[model], + mode="traj", + ) + def get_app() -> NCIASH250x10App: """ @@ -63,6 +78,7 @@ def get_app() -> NCIASH250x10App: table_path=DATA_PATH / "ncia_sh250x10_metrics_table.json", extra_components=[ Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), ], ) diff --git a/ml_peg/app/non_covalent_interactions/QUID/app_QUID.py b/ml_peg/app/non_covalent_interactions/QUID/app_QUID.py new file mode 100644 index 000000000..c8947295d --- /dev/null +++ b/ml_peg/app/non_covalent_interactions/QUID/app_QUID.py @@ -0,0 +1,92 @@ +"""Run QUID app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import ( + plot_from_table_column, + struct_from_scatter, +) +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "QUID" +DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/non_covalent_interactions.html#quid" +DATA_PATH = APP_ROOT / "data" / "non_covalent_interactions" / "QUID" + + +class QUIDApp(BaseApp): + """QUID benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter = read_plot( + DATA_PATH / "figure_quid.json", + id=f"{BENCHMARK_NAME}-figure", + ) + + model_dir = DATA_PATH / MODELS[0] + if model_dir.exists(): + # Note: sorting different to rxn_count order in calc + ts_files = sorted(model_dir.glob("*.xyz")) + structs = [ + f"assets/non_covalent_interactions/QUID/{MODELS[0]}/{ts_file.name}" + for ts_file in ts_files + ] + else: + structs = [] + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={ + "Overall MAE": scatter, + "Equilibrium MAE": scatter, + "Dissociation MAE": scatter, + }, + ) + + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="struct", + ) + + +def get_app() -> QUIDApp: + """ + Get QUID benchmark app layout and callback registration. + + Returns + ------- + QUIDApp + Benchmark layout and callback registration. + """ + return QUIDApp( + name=BENCHMARK_NAME, + description=( + "Performance in predicting ligand-pocket interaction energies " + "for the QUID dataset." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "quid_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + ], + ) + + +if __name__ == "__main__": + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + benchmark_app = get_app() + full_app.layout = benchmark_app.layout + benchmark_app.register_callbacks() + full_app.run(port=8071, debug=True) diff --git a/ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py b/ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py new file mode 100644 index 000000000..2ac3a0fb8 --- /dev/null +++ b/ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py @@ -0,0 +1,94 @@ +"""Run 3dTMV barriers app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import ( + plot_from_table_column, + struct_from_scatter, +) +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "3dTMV" +DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/tm_complexes.html#dtmv" +DATA_PATH = APP_ROOT / "data" / "tm_complexes" / "3dTMV" + + +class Benchmark3dTMVApp(BaseApp): + """3dTMV benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter = read_plot( + DATA_PATH / "figure_3dtmv.json", + id=f"{BENCHMARK_NAME}-figure", + ) + + model_dir = DATA_PATH / MODELS[0] + if model_dir.exists(): + # Note: sorting different to rxn_count order in calc + ts_files = sorted(model_dir.glob("*.xyz"), key=lambda path: int(path.stem)) + structs = [ + f"assets/tm_complexes/3dTMV/{MODELS[0]}/{ts_file.name}" + for ts_file in ts_files + ] + else: + structs = [] + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={ + "Overall MAE": scatter, + "SR MAE": scatter, + "MR MAE": scatter, + "SR/MR MAE": scatter, + }, + ) + + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="struct", + ) + + +def get_app() -> Benchmark3dTMVApp: + """ + Get 3dTMV benchmark app layout and callback registration. + + Returns + ------- + Benchmark3dTMVApp + Benchmark layout and callback registration. + """ + return Benchmark3dTMVApp( + name=BENCHMARK_NAME, + description=( + "Performance in predicting vertical ionization energies for the " + "3dTMV dataset of 28 transition metal complexes." + "Reference data from ph-AFQMC calculations." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "3dtmv_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + ], + ) + + +if __name__ == "__main__": + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + benchmark_app = get_app() + full_app.layout = benchmark_app.layout + benchmark_app.register_callbacks() + full_app.run(port=8071, debug=True) diff --git a/ml_peg/app/utils/load.py b/ml_peg/app/utils/load.py index 861c395c4..78abc20be 100644 --- a/ml_peg/app/utils/load.py +++ b/ml_peg/app/utils/load.py @@ -334,3 +334,49 @@ def read_density_plot_for_model( return None return Graph(id=id, figure=filtered_fig) + + +def collect_traj_assets( + *, + data_path: Path, + assets_prefix: str, + models: list[str], + traj_dirname: str = "density_traj", + suffix: str = ".extxyz", +) -> dict[str, list[str]]: + """ + Collect trajectory asset paths for each model. + + Parameters + ---------- + data_path + Base data directory containing per-model folders. + assets_prefix + Assets URL prefix (e.g., ``"assets/molecular_reactions/RDB7"``). + models + Ordered list of model names to include. + traj_dirname + Subdirectory name containing trajectories (default: ``"density_traj"``). + suffix + File suffix to match (default: ``".extxyz"``). + + Returns + ------- + dict[str, list[str]] + Mapping of model name to list of asset paths. + """ + assets_base = assets_prefix.rstrip("/") + struct_trajs: dict[str, list[str]] = {} + + for model in models: + traj_dir = data_path / model / traj_dirname + traj_files = sorted( + traj_dir.glob(f"*{suffix}"), key=lambda path: int(path.stem) + ) + if traj_files: + struct_trajs[model] = [ + f"{assets_base}/{model}/{traj_dirname}/{traj_file.name}" + for traj_file in traj_files + ] + + return struct_trajs diff --git a/ml_peg/app/utils/weas.py b/ml_peg/app/utils/weas.py index 1a178f7bf..d790ccd59 100644 --- a/ml_peg/app/utils/weas.py +++ b/ml_peg/app/utils/weas.py @@ -42,6 +42,14 @@ def generate_weas_html( +