PaddlePaddle · r-cloudforge · Mar 28, 2026 · Apr 10, 2026 · Apr 11, 2026 · Apr 11, 2026
diff --git a/interatomic_potentials/configs/schnet/schnet_md17_ethanol.yaml b/interatomic_potentials/configs/schnet/schnet_md17_ethanol.yaml
@@ -0,0 +1,107 @@
+Global:
+  do_train: True
+  do_eval: True
+  do_test: False
+
+  label_names: ['energy']
+
+  graph_converter:
+    __class_name__: FindPointsInSpheres
+    __init_params__:
+      cutoff: 5.0
+
+  prim_eager_enabled: True
+
+
+Trainer:
+  max_epochs: 500
+  seed: 42
+  output_dir: ./output/schnet_md17_ethanol
+  save_freq: 50
+  log_freq: 50
+
+  start_eval_epoch: 1
+  eval_freq: 5
+  pretrained_model_path: null
+  pretrained_weight_name: null
+  resume_from_checkpoint: null
+  use_amp: False
+  eval_with_no_grad: True
+  gradient_accumulation_steps: 1
+
+  best_metric_indicator: 'eval_metric'
+  name_for_best_metric: "energy"
+  greater_is_better: False
+
+
+Model:
+  __class_name__: SchNet
+  __init_params__:
+    n_atom_basis: 64
+    n_interactions: 6
+    n_filters: 64
+    cutoff: 5.0
+    n_gaussians: 25
+    max_z: 100
+    readout: "sum"
+    property_names: ${Global.label_names}
+    data_mean: 0.0
+    data_std: 1.0
+    loss_type: "l1_loss"
+    compute_forces: False
+
+
+Optimizer:
+  __class_name__: Adam
+  __init_params__:
+    lr:
+      __class_name__: Cosine
+      __init_params__:
+        learning_rate: 1e-4
+        eta_min: 1e-7
+        by_epoch: False
+
+
+Metric:
+  energy:
+    __class_name__: IgnoreNanMetricWrapper
+    __init_params__:
+      __class_name__: paddle.nn.L1Loss
+      __init_params__: {}
+
+
+Dataset:
+  train:
+    dataset:
+      __class_name__: MD17Dataset
+      __init_params__:
+        path: "./data/md17"
+        molecule: "ethanol"
+        property_names: ${Global.label_names}
+        build_graph_cfg: ${Global.graph_converter}
+        max_samples: 50000
+    num_workers: 4
+    use_shared_memory: False
+    sampler:
+      __class_name__: BatchSampler
+      __init_params__:
+        shuffle: True
+        drop_last: False
+        batch_size: 64
+  val:
+    dataset:
+      __class_name__: MD17Dataset
+      __init_params__:
+        path: "./data/md17"
+        molecule: "ethanol"
+        property_names: ${Global.label_names}
+        build_graph_cfg: ${Global.graph_converter}
+        max_samples: 10000
+    num_workers: 4
+    use_shared_memory: False
+    sampler:
+      __class_name__: BatchSampler
+      __init_params__:
+        shuffle: False
+        drop_last: False
+        batch_size: 64
diff --git a/interatomic_potentials/configs/schnet/schnet_qm9_U0.yaml b/interatomic_potentials/configs/schnet/schnet_qm9_U0.yaml
@@ -0,0 +1,109 @@
+Global:
+  do_train: True
+  do_eval: True
+  do_test: False
+
+  label_names: ['energy_U0']
+
+  graph_converter:
+    __class_name__: FindPointsInSpheres
+    __init_params__:
+      cutoff: 10.0
+
+  prim_eager_enabled: True
+
+
+Trainer:
+  max_epochs: 200
+  seed: 42
+  output_dir: ./output/schnet_qm9_U0
+  save_freq: 20
+  log_freq: 50
+
+  start_eval_epoch: 1
+  eval_freq: 5
+  pretrained_model_path: null
+  pretrained_weight_name: null
+  resume_from_checkpoint: null
+  use_amp: False
+  eval_with_no_grad: True
+  gradient_accumulation_steps: 1
+
+  best_metric_indicator: 'eval_metric'
+  name_for_best_metric: "energy_U0"
+  greater_is_better: False
+
+
+Model:
+  __class_name__: SchNet
+  __init_params__:
+    n_atom_basis: 128
+    n_interactions: 6
+    n_filters: 128
+    cutoff: 10.0
+    n_gaussians: 50
+    max_z: 100
+    readout: "sum"
+    property_names: ${Global.label_names}
+    data_mean: -76.1160
+    data_std: 10.3238
+    loss_type: "l1_loss"
+    compute_forces: False
+
+
+Optimizer:
+  __class_name__: Adam
+  __init_params__:
+    lr:
+      __class_name__: Cosine
+      __init_params__:
+        learning_rate: 1e-4
+        eta_min: 1e-7
+        by_epoch: False
+
+
+Metric:
+  energy_U0:
+    __class_name__: IgnoreNanMetricWrapper
+    __init_params__:
+      __class_name__: paddle.nn.L1Loss
+      __init_params__: {}
+
+
+Dataset:
+  train:
+    dataset:
+      __class_name__: QM9Dataset
+      __init_params__:
+        path: "./data/qm9"
+        property_names: ${Global.label_names}
+        build_graph_cfg: ${Global.graph_converter}
+        cache_path: "./data/qm9"
+        overwrite: False
+        filter_unvalid: True
+    num_workers: 4
+    use_shared_memory: False
+    sampler:
+      __class_name__: BatchSampler
+      __init_params__:
+        shuffle: True
+        drop_last: False
+        batch_size: 64
+  val:
+    dataset:
+      __class_name__: QM9Dataset
+      __init_params__:
+        path: "./data/qm9"
+        property_names: ${Global.label_names}
+        build_graph_cfg: ${Global.graph_converter}
+        cache_path: "./data/qm9"
+        overwrite: False
+        filter_unvalid: True
+    num_workers: 4
+    use_shared_memory: False
+    sampler:
+      __class_name__: BatchSampler
+      __init_params__:
+        shuffle: False
+        drop_last: False
+        batch_size: 64
diff --git a/ppmat/datasets/alloy_dataset.py b/ppmat/datasets/alloy_dataset.py
@@ -0,0 +1,88 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+AlloyDataset — tabular dataset for metallic glass alloy compositions.
+
+Loads Alloy_train.csv produced by tools/prepare_alloy_data.py.
+Each sample is a 66-dimensional float vector:
+    columns  0-39: element composition fractions (40 elements)
+    columns 40-42: Tg, Tx, Tl (thermal transition temperatures in K)
+    columns 43-65: 23 GFA criteria (derived from Tg/Tx/Tl)
+
+The "source" column is dropped on load (same as original AlloyGAN).
+"""
+
+import numpy as np
+import paddle
+from paddle.io import Dataset
+
+from ppmat.utils import logger
+
+
+class AlloyDataset(Dataset):
+    """Tabular dataset for AlloyGAN training.
+
+    Args:
+        path: Path to Alloy_train.csv.
+        categories: Optional list of dominant-element categories to filter
+            (e.g., ["Cu", "Fe", "Ti", "Zr"]). Default uses all entries.
+        normalize: Whether to normalize composition fractions to [0, 1].
+            Default True (divides compositions by 100).
+    """
+
+    # Top 40 elements in order (matches CSV columns 0-39)
+    ELEMENTS = [
+        "Cu", "Zr", "Al", "Ni", "Ti", "Ag", "Fe", "Mg", "B", "Si",
+        "Nb", "Y", "Ca", "La", "Co", "Be", "C", "Mo", "Pd", "P",
+        "Sn", "Cr", "Hf", "Zn", "Gd", "Ce", "Er", "Ga", "Au", "Nd",
+        "Dy", "W", "Pr", "Ta", "Sc", "Li", "Sm", "S", "Pt", "Mn",
+    ]
+
+    def __init__(self, path, categories=None, normalize=True):
+        super().__init__()
+        import pandas as pd
+
+        df = pd.read_csv(path)
+
+        # Drop the "source" column if present (same as original code)
+        if "source" in df.columns:
+            df = df.drop(columns=["source"])
+
+        # Optional category filtering by dominant element
+        if categories is not None:
+            elem_cols = df.columns[:40]
+            dominant = df[elem_cols].idxmax(axis=1)
+            mask = dominant.isin(categories)
+            df = df[mask].reset_index(drop=True)
+            logger.info(
+                f"Filtered to categories {categories}: "
+                f"{len(df)} entries"
+            )
+
+        self.data = df.values.astype(np.float32)
+
+        if normalize:
+            # Normalize composition fractions (0-100) to (0-1)
+            self.data[:, :40] = self.data[:, :40] / 100.0
+
+        logger.info(
+            f"Loaded AlloyDataset: {len(self.data)} samples, "
+            f"{self.data.shape[1]} features from {path}"
+        )
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, idx):
+        return {"data": self.data[idx]}