diff --git a/docs/source/user_guide/benchmarks/index.rst b/docs/source/user_guide/benchmarks/index.rst
index ad3c82f96..8e907ce78 100644
--- a/docs/source/user_guide/benchmarks/index.rst
+++ b/docs/source/user_guide/benchmarks/index.rst
@@ -16,3 +16,4 @@ Benchmarks
     non_covalent_interactions
     tm_complexes
     conformers
+    molecular_dynamics
diff --git a/docs/source/user_guide/benchmarks/molecular_dynamics.rst b/docs/source/user_guide/benchmarks/molecular_dynamics.rst
new file mode 100644
index 000000000..1b7248cec
--- /dev/null
+++ b/docs/source/user_guide/benchmarks/molecular_dynamics.rst
@@ -0,0 +1,40 @@
+==================
+Molecular dynamics
+==================
+
+Liquid densities
+================
+
+Summary
+-------
+
+Performance in predicting densities for 61 organic liquids, each system consisting of
+about 1000 atoms. The dataset covers aliphatic, aromatic molecules, as well as different
+functional groups and halogenated molecules.
+
+Metrics
+-------
+
+1. Density error
+
+For each system, the density is calculated by taking the average density of an NPT molecular
+dynamics run. The initial part of the simulation, here 500 ps, is omitted from the density
+calculation. This is compared to the reference density, obtained from experiment.
+
+Computational cost
+------------------
+
+Low: tests are likely to take several days to run on GPU.
+
+Data availability
+-----------------
+
+Input structures:
+
+* Weber et al., Efficient Long-Range Machine Learning Force Fields for
+    Liquid and Materials Properties.
+    arXiv:2505.06462 [physics.chem-ph]
+
+Reference data:
+
+* Experiment
diff --git a/ml_peg/analysis/molecular_dynamics/liquid_densities/analyse_liquid_densities.py b/ml_peg/analysis/molecular_dynamics/liquid_densities/analyse_liquid_densities.py
new file mode 100644
index 000000000..8087931a9
--- /dev/null
+++ b/ml_peg/analysis/molecular_dynamics/liquid_densities/analyse_liquid_densities.py
@@ -0,0 +1,176 @@
+"""Analyse the liquid densities benchmark."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from ase.io import Trajectory, write
+import numpy as np
+import pytest
+
+from ml_peg.analysis.utils.decorators import build_table, plot_parity
+from ml_peg.analysis.utils.utils import (
+    build_dispersion_name_map,
+    load_metrics_config,
+    mae,
+)
+from ml_peg.app import APP_ROOT
+from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+D3_MODEL_NAMES = build_dispersion_name_map(MODELS)
+
+
+LOG_INTERVAL_PS = 0.1
+EQUILIB_TIME_PS = 500
+
+CALC_PATH = CALCS_ROOT / "molecular_dynamics" / "liquid_densities" / "outputs"
+OUT_PATH = APP_ROOT / "data" / "molecular_dynamics" / "liquid_densities"
+
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+
+def labels() -> list:
+    """
+    Get list of system names.
+
+    Returns
+    -------
+    list
+        List of all system names.
+    """
+    for model_name in MODELS:
+        labels = [path.stem for path in (CALC_PATH / model_name).glob("*.log")]
+    return labels
+
+
+def compute_density(fname, density_col=13):
+    """
+    Compute average density from NPT log file.
+
+    Parameters
+    ----------
+    fname
+        Path to the log file.
+    density_col
+        Which column the density numbers are in.
+
+    Returns
+    -------
+    float
+        Average density in g/cm3.
+    """
+    density_series = []
+    with open(fname) as lines:
+        for line in lines:
+            items = line.strip().split()
+            if len(items) != 15:
+                continue
+            density_series.append(float(items[13]))
+    skip_frames = int(EQUILIB_TIME_PS / LOG_INTERVAL_PS)
+    return np.mean(density_series[skip_frames:])
+
+
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure_liquid_densities.json",
+    title="Densities",
+    x_label="Predicted density / kcal/mol",
+    y_label="Reference density / kcal/mol",
+    hoverdata={
+        "Labels": labels(),
+    },
+)
+def liquid_densities() -> dict[str, list]:
+    """
+    Get liquid densities for all systems.
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of all reference and predicted densities.
+    """
+    results = {"ref": []} | {mlip: [] for mlip in MODELS}
+    ref_stored = False
+
+    for model_name in MODELS:
+        for label in labels():
+            atoms = Trajectory(CALC_PATH / model_name / f"{label}.traj")[-1]
+
+            results[model_name].append(
+                compute_density(CALC_PATH / model_name / f"{label}.log")
+            )
+            if not ref_stored:
+                results["ref"].append(atoms.info["exp_density"])
+
+            # Write structures for app
+            structs_dir = OUT_PATH / model_name
+            structs_dir.mkdir(parents=True, exist_ok=True)
+            write(structs_dir / f"{label}.xyz", atoms)
+        ref_stored = True
+    return results
+
+
+@pytest.fixture
+def get_mae(liquid_densities) -> dict[str, float]:
+    """
+    Get mean absolute error for liquid densities.
+
+    Parameters
+    ----------
+    liquid_densities
+        Dictionary of reference and predicted liquid densities.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted liquid densities errors for all models.
+    """
+    results = {}
+    for model_name in MODELS:
+        results[model_name] = mae(liquid_densities["ref"], liquid_densities[model_name])
+    return results
+
+
+@pytest.fixture
+@build_table(
+    filename=OUT_PATH / "liquid_densities_metrics_table.json",
+    metric_tooltips=DEFAULT_TOOLTIPS,
+    thresholds=DEFAULT_THRESHOLDS,
+    mlip_name_map=D3_MODEL_NAMES,
+)
+def metrics(get_mae: dict[str, float]) -> dict[str, dict]:
+    """
+    Get all metrics.
+
+    Parameters
+    ----------
+    get_mae
+        Mean absolute errors for all models.
+
+    Returns
+    -------
+    dict[str, dict]
+        Metric names and values for all models.
+    """
+    return {
+        "MAE": get_mae,
+    }
+
+
+def test_liquid_densities(metrics: dict[str, dict]) -> None:
+    """
+    Run liquid densities test.
+
+    Parameters
+    ----------
+    metrics
+        All new benchmark metric names and dictionary of values for each model.
+    """
+    return
diff --git a/ml_peg/analysis/molecular_dynamics/liquid_densities/metrics.yml b/ml_peg/analysis/molecular_dynamics/liquid_densities/metrics.yml
new file mode 100644
index 000000000..7b5051980
--- /dev/null
+++ b/ml_peg/analysis/molecular_dynamics/liquid_densities/metrics.yml
@@ -0,0 +1,7 @@
+metrics:
+  MAE:
+    good: 0.0
+    bad: 0.4
+    unit: g/cm^3
+    tooltip: Mean Absolute Error for all systems
+    level_of_theory: Experimental density
diff --git a/ml_peg/app/molecular_dynamics/liquid_densities/app_liquid_densities.py b/ml_peg/app/molecular_dynamics/liquid_densities/app_liquid_densities.py
new file mode 100644
index 000000000..d3bb19bdb
--- /dev/null
+++ b/ml_peg/app/molecular_dynamics/liquid_densities/app_liquid_densities.py
@@ -0,0 +1,87 @@
+"""Run liquid densities app."""
+
+from __future__ import annotations
+
+from dash import Dash
+from dash.html import Div
+
+from ml_peg.app import APP_ROOT
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.build_callbacks import (
+    plot_from_table_column,
+    struct_from_scatter,
+)
+from ml_peg.app.utils.load import read_plot
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+MODELS = get_model_names(current_models)
+BENCHMARK_NAME = "DipCONFS"
+DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular_dynamics.html#liquid_densities"
+DATA_PATH = APP_ROOT / "data" / "molecular_dynamics" / "liquid_densities"
+
+
+class LiquidDensitiesApp(BaseApp):
+    """Liquid densities benchmark app layout and callbacks."""
+
+    def register_callbacks(self) -> None:
+        """Register callbacks to app."""
+        scatter = read_plot(
+            DATA_PATH / "figure_liquid_densities.json",
+            id=f"{BENCHMARK_NAME}-figure",
+        )
+
+        model_dir = DATA_PATH / MODELS[0]
+        if model_dir.exists():
+            labels = sorted([f.stem for f in model_dir.glob("*.xyz")])
+            structs = [
+                f"assets/molecular_dynamics/liquid_densities/{MODELS[0]}/{label}.xyz"
+                for label in labels
+            ]
+        else:
+            structs = []
+
+        plot_from_table_column(
+            table_id=self.table_id,
+            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
+            column_to_plot={"MAE": scatter},
+        )
+
+        struct_from_scatter(
+            scatter_id=f"{BENCHMARK_NAME}-figure",
+            struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
+            structs=structs,
+            mode="struct",
+        )
+
+
+def get_app() -> LiquidDensitiesApp:
+    """
+    Get liquid densities benchmark app layout and callback registration.
+
+    Returns
+    -------
+    LiquidDensitiesApp
+        Benchmark layout and callback registration.
+    """
+    return LiquidDensitiesApp(
+        name=BENCHMARK_NAME,
+        description=(
+            "Performance in predicting organic liquid densities."
+            "Reference data is experimental densities."
+        ),
+        docs_url=DOCS_URL,
+        table_path=DATA_PATH / "liquid_densities_metrics_table.json",
+        extra_components=[
+            Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
+            Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
+        ],
+    )
+
+
+if __name__ == "__main__":
+    full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
+    benchmark_app = get_app()
+    full_app.layout = benchmark_app.layout
+    benchmark_app.register_callbacks()
+    full_app.run(port=8063, debug=True)
diff --git a/ml_peg/calcs/molecular_dynamics/liquid_densities/calc_liquid_densities.py b/ml_peg/calcs/molecular_dynamics/liquid_densities/calc_liquid_densities.py
new file mode 100644
index 000000000..3c571e54d
--- /dev/null
+++ b/ml_peg/calcs/molecular_dynamics/liquid_densities/calc_liquid_densities.py
@@ -0,0 +1,175 @@
+"""
+Calculate densities using NPT of 63 liquids.
+
+Liquids include 62 from arXiv:2505.06462 [physics.chem-ph],
+in addition to 1-octanol.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from pathlib import Path
+import time
+from typing import Any
+
+from ase import Atoms, units
+from ase.io import Trajectory, read
+from ase.md.nose_hoover_chain import IsotropicMTKNPT
+import pytest
+
+from ml_peg.calcs.utils.utils import download_s3_data
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+
+KCAL_TO_EV = units.kcal / units.mol
+
+OUT_PATH = Path(__file__).parent / "outputs"
+
+AU_TO_G_CM3 = 1e24 / units.mol
+NUM_MD_STEPS = 1000_000
+TIMESTEP = 1 * units.fs
+LOG_INTERVAL = 100
+ATM = 1.01325 * units.bar
+
+
+def get_density_g_cm3(atoms: Atoms):
+    """
+    Get the density of the system in g/cm^3.
+
+    Parameters
+    ----------
+    atoms
+        ASE.Atoms object of the periodic system.
+
+    Returns
+    -------
+    float
+        Density in g/cm^3.
+    """
+    mass = atoms.get_masses().sum()
+    volume = atoms.get_volume()
+    return AU_TO_G_CM3 * mass / volume
+
+
+def log_md(dyn, start_time):
+    """
+    Log molecular dynamics simulation.
+
+    Parameters
+    ----------
+    dyn
+        ASE molecular dynamics object.
+    start_time
+        Real time of the simulation start, in seconds.
+    """
+    current_time = time.time() - start_time
+    energy = dyn.atoms.get_potential_energy()
+    density = get_density_g_cm3(dyn.atoms)
+    temperature = dyn.atoms.get_temperature()
+    t = dyn.get_time() / (1000 * units.fs)
+    logging.info(
+        f"""t: {t:>8.3f} ps\
+            Walltime: {current_time:>10.3f} s\
+            T: {temperature:.1f} K\
+            Epot: {energy:.2f} eV\
+            density: {density:.3f} g/cm^3\
+        """
+    )
+
+
+def run_npt(atoms, calc, output_fname):
+    """
+    Run NPT molecular dynamics using the isotropic MTK barostat.
+
+    Parameters
+    ----------
+    atoms
+        ASE Atoms of the system.
+    calc
+        ASE Calculator.
+    output_fname
+        File name to save the trajectory to.
+    """
+    if os.path.exists(output_fname):
+        try:
+            traj = Trajectory(output_fname)
+            atoms = traj[-1]
+            nsteps = (len(traj) - 1) * LOG_INTERVAL
+        except Exception as e:
+            print(e)
+            nsteps = 0
+    else:
+        nsteps = 0
+
+    atoms.calc = calc
+
+    dyn = IsotropicMTKNPT(
+        atoms=atoms,
+        timestep=TIMESTEP,
+        temperature_K=atoms.info["exp_temperature"],
+        pressure_au=ATM,
+        tdamp=50 * units.fs,
+        pdamp=500 * units.fs,
+        trajectory=output_fname,
+        loginterval=LOG_INTERVAL,
+        append_trajectory=True,
+    )
+    dyn.nsteps = nsteps
+    dyn.attach(log_md, interval=LOG_INTERVAL, dyn=dyn, start_time=time.time())
+    dyn.run(steps=NUM_MD_STEPS - nsteps)
+
+
+@pytest.mark.parametrize("mlip", MODELS.items())
+def test_liquid_densities(mlip: tuple[str, Any], system_id) -> None:
+    """
+    Run Liquid Densities benchmark.
+
+    Parameters
+    ----------
+    mlip
+        Name of model use and model to get calculator.
+    system_id
+        Identifier of the system to run MD on.
+    """
+    assert system_id in range(0, 63), (
+        "system_id out of range. Please use a value from 0 to 62"
+    )
+
+    # Download data
+    data_path = (
+        download_s3_data(
+            filename="liquid_densities.zip",
+            key="inputs/molecular_dynamics/liquid_densities/liquid_densities.zip",
+        )
+        / "liquid_densities"
+    )
+
+    # Get system name
+    input_xyz_path = sorted((data_path / "equilibrated_structures_xyz").glob("*.xyz"))[
+        system_id
+    ]
+
+    model_name, model = mlip
+    model.default_dtype = "float32"
+    calc = model.get_calculator()
+    # Add D3 calculator for this test
+    calc = model.add_d3_calculator(calc)
+
+    system_name = input_xyz_path.stem
+    out_dir = OUT_PATH / model_name
+    out_dir.mkdir(exist_ok=True, parents=True)
+
+    logging.basicConfig(
+        format="%(message)s",
+        level=logging.INFO,
+        filename=out_dir / f"{system_name}.log",
+        filemode="a",
+        force=True,
+    )
+
+    atoms = read(input_xyz_path)
+    output_fname = out_dir / f"{system_name}.traj"
+    run_npt(atoms, calc, output_fname)
diff --git a/ml_peg/calcs/molecular_dynamics/liquid_densities/conftest.py b/ml_peg/calcs/molecular_dynamics/liquid_densities/conftest.py
new file mode 100644
index 000000000..32750d7ae
--- /dev/null
+++ b/ml_peg/calcs/molecular_dynamics/liquid_densities/conftest.py
@@ -0,0 +1,35 @@
+"""Obtain the system_id input argument."""
+
+from __future__ import annotations
+
+import pytest
+
+
+def pytest_addoption(parser):
+    """
+    Add pytest option.
+
+    Parameters
+    ----------
+    parser
+        Parser to use.
+    """
+    parser.addoption("--system_id", action="store", default=0, type=int)
+
+
+@pytest.fixture
+def system_id(request):
+    """
+    Get system_id argument.
+
+    Parameters
+    ----------
+    request
+        Request.
+
+    Returns
+    -------
+    option
+        Requested command line argument.
+    """
+    return request.config.getoption("--system_id")
diff --git a/ml_peg/cli/cli.py b/ml_peg/cli/cli.py
index 36471650a..82a8876af 100644
--- a/ml_peg/cli/cli.py
+++ b/ml_peg/cli/cli.py
@@ -4,7 +4,7 @@
 
 from typing import Annotated
 
-from typer import Exit, Option, Typer
+from typer import Context, Exit, Option, Typer
 
 from ml_peg import __version__
 
@@ -57,8 +57,13 @@ def run_dash_app(
     run_app(category=category, port=port, debug=debug)
 
 
-@app.command(name="calc", help="Run calculations")
+@app.command(
+    name="calc",
+    help="Run calculations",
+    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+)
 def run_calcs(
+    ctx: Context,
     models: Annotated[
         str | None,
         Option(
@@ -86,6 +91,8 @@ def run_calcs(
 
     Parameters
     ----------
+    ctx
+        Typer Context. Automatically set.
     models
         Models to run calculations for, in comma-separated list. Default is `None`,
         corresponding to all available models.
@@ -124,6 +131,9 @@ def run_calcs(
     if models:
         options.extend(["--models", models])
 
+    # Parse any custom options to pytest
+    options.extend(ctx.args)
+
     pytest.main(options)