Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions interatomic_potentials/configs/schnet/schnet_md17_ethanol.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
Global:
do_train: True
do_eval: True
do_test: False

label_names: ['energy']

graph_converter:
__class_name__: FindPointsInSpheres
__init_params__:
cutoff: 5.0

prim_eager_enabled: True


Trainer:
max_epochs: 500
seed: 42
output_dir: ./output/schnet_md17_ethanol
save_freq: 50
log_freq: 50

start_eval_epoch: 1
eval_freq: 5
pretrained_model_path: null
pretrained_weight_name: null
resume_from_checkpoint: null
use_amp: False
eval_with_no_grad: True
gradient_accumulation_steps: 1

best_metric_indicator: 'eval_metric'
name_for_best_metric: "energy"
greater_is_better: False


Model:
__class_name__: SchNet
__init_params__:
n_atom_basis: 64
n_interactions: 6
n_filters: 64
cutoff: 5.0
n_gaussians: 25
max_z: 100
readout: "sum"
property_names: ${Global.label_names}
data_mean: 0.0
data_std: 1.0
loss_type: "l1_loss"
compute_forces: False


Optimizer:
__class_name__: Adam
__init_params__:
lr:
__class_name__: Cosine
__init_params__:
learning_rate: 1e-4
eta_min: 1e-7
by_epoch: False


Metric:
energy:
__class_name__: IgnoreNanMetricWrapper
__init_params__:
__class_name__: paddle.nn.L1Loss
__init_params__: {}


Dataset:
train:
dataset:
__class_name__: MD17Dataset
__init_params__:
path: "./data/md17"
molecule: "ethanol"
property_names: ${Global.label_names}
build_graph_cfg: ${Global.graph_converter}
max_samples: 50000
num_workers: 4
use_shared_memory: False
sampler:
__class_name__: BatchSampler
__init_params__:
shuffle: True
drop_last: False
batch_size: 64
val:
dataset:
__class_name__: MD17Dataset
__init_params__:
path: "./data/md17"
molecule: "ethanol"
property_names: ${Global.label_names}
build_graph_cfg: ${Global.graph_converter}
max_samples: 10000
num_workers: 4
use_shared_memory: False
sampler:
__class_name__: BatchSampler
__init_params__:
shuffle: False
drop_last: False
batch_size: 64
109 changes: 109 additions & 0 deletions interatomic_potentials/configs/schnet/schnet_qm9_U0.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
Global:
do_train: True
do_eval: True
do_test: False

label_names: ['energy_U0']

graph_converter:
__class_name__: FindPointsInSpheres
__init_params__:
cutoff: 10.0

prim_eager_enabled: True


Trainer:
max_epochs: 200
seed: 42
output_dir: ./output/schnet_qm9_U0
save_freq: 20
log_freq: 50

start_eval_epoch: 1
eval_freq: 5
pretrained_model_path: null
pretrained_weight_name: null
resume_from_checkpoint: null
use_amp: False
eval_with_no_grad: True
gradient_accumulation_steps: 1

best_metric_indicator: 'eval_metric'
name_for_best_metric: "energy_U0"
greater_is_better: False


Model:
__class_name__: SchNet
__init_params__:
n_atom_basis: 128
n_interactions: 6
n_filters: 128
cutoff: 10.0
n_gaussians: 50
max_z: 100
readout: "sum"
property_names: ${Global.label_names}
data_mean: -76.1160
data_std: 10.3238
loss_type: "l1_loss"
compute_forces: False


Optimizer:
__class_name__: Adam
__init_params__:
lr:
__class_name__: Cosine
__init_params__:
learning_rate: 1e-4
eta_min: 1e-7
by_epoch: False


Metric:
energy_U0:
__class_name__: IgnoreNanMetricWrapper
__init_params__:
__class_name__: paddle.nn.L1Loss
__init_params__: {}


Dataset:
train:
dataset:
__class_name__: QM9Dataset
__init_params__:
path: "./data/qm9"
property_names: ${Global.label_names}
build_graph_cfg: ${Global.graph_converter}
cache_path: "./data/qm9"
overwrite: False
filter_unvalid: True
num_workers: 4
use_shared_memory: False
sampler:
__class_name__: BatchSampler
__init_params__:
shuffle: True
drop_last: False
batch_size: 64
val:
dataset:
__class_name__: QM9Dataset
__init_params__:
path: "./data/qm9"
property_names: ${Global.label_names}
build_graph_cfg: ${Global.graph_converter}
cache_path: "./data/qm9"
overwrite: False
filter_unvalid: True
num_workers: 4
use_shared_memory: False
sampler:
__class_name__: BatchSampler
__init_params__:
shuffle: False
drop_last: False
batch_size: 64
88 changes: 88 additions & 0 deletions ppmat/datasets/alloy_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
AlloyDataset — tabular dataset for metallic glass alloy compositions.

Loads Alloy_train.csv produced by tools/prepare_alloy_data.py.
Each sample is a 66-dimensional float vector:
columns 0-39: element composition fractions (40 elements)
columns 40-42: Tg, Tx, Tl (thermal transition temperatures in K)
columns 43-65: 23 GFA criteria (derived from Tg/Tx/Tl)

The "source" column is dropped on load (same as original AlloyGAN).
"""

import numpy as np
import paddle
from paddle.io import Dataset

from ppmat.utils import logger


class AlloyDataset(Dataset):
"""Tabular dataset for AlloyGAN training.

Args:
path: Path to Alloy_train.csv.
categories: Optional list of dominant-element categories to filter
(e.g., ["Cu", "Fe", "Ti", "Zr"]). Default uses all entries.
normalize: Whether to normalize composition fractions to [0, 1].
Default True (divides compositions by 100).
"""

# Top 40 elements in order (matches CSV columns 0-39)
ELEMENTS = [
"Cu", "Zr", "Al", "Ni", "Ti", "Ag", "Fe", "Mg", "B", "Si",
"Nb", "Y", "Ca", "La", "Co", "Be", "C", "Mo", "Pd", "P",
"Sn", "Cr", "Hf", "Zn", "Gd", "Ce", "Er", "Ga", "Au", "Nd",
"Dy", "W", "Pr", "Ta", "Sc", "Li", "Sm", "S", "Pt", "Mn",
]

def __init__(self, path, categories=None, normalize=True):
super().__init__()
import pandas as pd

df = pd.read_csv(path)

# Drop the "source" column if present (same as original code)
if "source" in df.columns:
df = df.drop(columns=["source"])

# Optional category filtering by dominant element
if categories is not None:
elem_cols = df.columns[:40]
dominant = df[elem_cols].idxmax(axis=1)
mask = dominant.isin(categories)
df = df[mask].reset_index(drop=True)
logger.info(
f"Filtered to categories {categories}: "
f"{len(df)} entries"
)

self.data = df.values.astype(np.float32)

if normalize:
# Normalize composition fractions (0-100) to (0-1)
self.data[:, :40] = self.data[:, :40] / 100.0

logger.info(
f"Loaded AlloyDataset: {len(self.data)} samples, "
f"{self.data.shape[1]} features from {path}"
)

def __len__(self):
return len(self.data)

def __getitem__(self, idx):
return {"data": self.data[idx]}
Loading