From 3825264d16e3cd4456f3b59d211f2edbab0630af Mon Sep 17 00:00:00 2001 From: dimanaStoyanova7 Date: Wed, 20 May 2026 11:43:29 +0200 Subject: [PATCH 01/12] add slurm scripts, configs and CLAUDE.md --- CLAUDE.md | 110 ++++++++++++++++++++ configs/darcy_cocogen.yaml | 13 +++ configs/darcy_diffusion.yaml | 13 +++ configs/darcy_pg.yaml | 13 +++ configs/darcy_pidm_me.yaml | 13 +++ configs/darcy_pidm_se.yaml | 13 +++ configs/mechanics_cocogen.yaml | 16 +++ configs/mechanics_diffusion.yaml | 13 +++ configs/mechanics_pg.yaml | 16 +++ configs/mechanics_pidm_me.yaml | 13 +++ configs/mechanics_pidm_se.yaml | 13 +++ slurm/darcy_cocogen.slurm | 42 ++++++++ slurm/darcy_diffusion.slurm | 42 ++++++++ slurm/darcy_pg.slurm | 42 ++++++++ slurm/darcy_pidm_me.slurm | 42 ++++++++ slurm/darcy_pidm_se.slurm | 42 ++++++++ slurm/eval_darcy_pidm_me.slurm | 37 +++++++ slurm/eval_darcy_pidm_se.slurm | 37 +++++++ slurm/eval_topology_diffusion.slurm | 37 +++++++ slurm/eval_topology_pidm.slurm | 37 +++++++ slurm/logs/.gitkeep | 0 slurm/logs/eval_darcy_me_9912172.err | 4 + slurm/logs/eval_darcy_me_9912172.out | 0 slurm/logs/eval_darcy_me_9912378.err | 28 +++++ slurm/logs/eval_darcy_me_9912378.out | 2 + slurm/logs/eval_darcy_me_9912408.err | 0 slurm/logs/eval_darcy_me_9912408.out | 4 + slurm/logs/eval_darcy_se_9912173.err | 4 + slurm/logs/eval_darcy_se_9912173.out | 0 slurm/logs/eval_darcy_se_9912379.err | 28 +++++ slurm/logs/eval_darcy_se_9912379.out | 2 + slurm/logs/eval_darcy_se_9912409.err | 0 slurm/logs/eval_darcy_se_9912409.out | 4 + slurm/logs/eval_topo_diff_9912175.err | 4 + slurm/logs/eval_topo_diff_9912175.out | 0 slurm/logs/eval_topo_diff_9912381.err | 1 + slurm/logs/eval_topo_diff_9912381.out | 0 slurm/logs/eval_topo_pidm_9912174.err | 4 + slurm/logs/eval_topo_pidm_9912174.out | 0 slurm/logs/eval_topo_pidm_9912380.err | 1 + slurm/logs/eval_topo_pidm_9912380.out | 0 slurm/logs/pidm_darcy_cocogen_9913377.err | 1 + slurm/logs/pidm_darcy_cocogen_9913377.out | 1 + slurm/logs/pidm_darcy_cocogen_9913564.err | 1 + slurm/logs/pidm_darcy_cocogen_9913564.out | 1 + slurm/logs/pidm_darcy_diffusion_9913375.err | 6 ++ slurm/logs/pidm_darcy_diffusion_9913375.out | 1 + slurm/logs/pidm_darcy_diffusion_9913543.err | 1 + slurm/logs/pidm_darcy_diffusion_9913543.out | 1 + slurm/logs/pidm_darcy_diffusion_9940441.err | 11 ++ slurm/logs/pidm_darcy_diffusion_9940441.out | 1 + slurm/logs/pidm_darcy_pg_9913376.err | 6 ++ slurm/logs/pidm_darcy_pg_9913376.out | 1 + slurm/logs/pidm_darcy_pg_9913544.err | 1 + slurm/logs/pidm_darcy_pg_9913544.out | 1 + slurm/logs/pidm_toy_9908779.err | 7 ++ slurm/logs/pidm_toy_9908779.out | 0 slurm/logs/pidm_toy_9912059.err | 2 + slurm/logs/pidm_toy_9912059.out | 0 slurm/logs/pidm_toy_9912070.err | 1 + slurm/logs/pidm_toy_9912070.out | 1 + slurm/logs/pidm_toy_9912868.err | 1 + slurm/logs/pidm_toy_9912868.out | 21 ++++ slurm/mechanics_cocogen.slurm | 46 ++++++++ slurm/mechanics_diffusion.slurm | 42 ++++++++ slurm/mechanics_pg.slurm | 46 ++++++++ slurm/mechanics_pidm_me.slurm | 42 ++++++++ slurm/mechanics_pidm_se.slurm | 42 ++++++++ slurm/test_gpu.slurm | 27 +++++ slurm/toy.slurm | 43 ++++++++ 70 files changed, 1045 insertions(+) create mode 100644 CLAUDE.md create mode 100644 configs/darcy_cocogen.yaml create mode 100644 configs/darcy_diffusion.yaml create mode 100644 configs/darcy_pg.yaml create mode 100644 configs/darcy_pidm_me.yaml create mode 100644 configs/darcy_pidm_se.yaml create mode 100644 configs/mechanics_cocogen.yaml create mode 100644 configs/mechanics_diffusion.yaml create mode 100644 configs/mechanics_pg.yaml create mode 100644 configs/mechanics_pidm_me.yaml create mode 100644 configs/mechanics_pidm_se.yaml create mode 100644 slurm/darcy_cocogen.slurm create mode 100644 slurm/darcy_diffusion.slurm create mode 100644 slurm/darcy_pg.slurm create mode 100644 slurm/darcy_pidm_me.slurm create mode 100644 slurm/darcy_pidm_se.slurm create mode 100644 slurm/eval_darcy_pidm_me.slurm create mode 100644 slurm/eval_darcy_pidm_se.slurm create mode 100644 slurm/eval_topology_diffusion.slurm create mode 100644 slurm/eval_topology_pidm.slurm create mode 100644 slurm/logs/.gitkeep create mode 100644 slurm/logs/eval_darcy_me_9912172.err create mode 100644 slurm/logs/eval_darcy_me_9912172.out create mode 100644 slurm/logs/eval_darcy_me_9912378.err create mode 100644 slurm/logs/eval_darcy_me_9912378.out create mode 100644 slurm/logs/eval_darcy_me_9912408.err create mode 100644 slurm/logs/eval_darcy_me_9912408.out create mode 100644 slurm/logs/eval_darcy_se_9912173.err create mode 100644 slurm/logs/eval_darcy_se_9912173.out create mode 100644 slurm/logs/eval_darcy_se_9912379.err create mode 100644 slurm/logs/eval_darcy_se_9912379.out create mode 100644 slurm/logs/eval_darcy_se_9912409.err create mode 100644 slurm/logs/eval_darcy_se_9912409.out create mode 100644 slurm/logs/eval_topo_diff_9912175.err create mode 100644 slurm/logs/eval_topo_diff_9912175.out create mode 100644 slurm/logs/eval_topo_diff_9912381.err create mode 100644 slurm/logs/eval_topo_diff_9912381.out create mode 100644 slurm/logs/eval_topo_pidm_9912174.err create mode 100644 slurm/logs/eval_topo_pidm_9912174.out create mode 100644 slurm/logs/eval_topo_pidm_9912380.err create mode 100644 slurm/logs/eval_topo_pidm_9912380.out create mode 100644 slurm/logs/pidm_darcy_cocogen_9913377.err create mode 100644 slurm/logs/pidm_darcy_cocogen_9913377.out create mode 100644 slurm/logs/pidm_darcy_cocogen_9913564.err create mode 100644 slurm/logs/pidm_darcy_cocogen_9913564.out create mode 100644 slurm/logs/pidm_darcy_diffusion_9913375.err create mode 100644 slurm/logs/pidm_darcy_diffusion_9913375.out create mode 100644 slurm/logs/pidm_darcy_diffusion_9913543.err create mode 100644 slurm/logs/pidm_darcy_diffusion_9913543.out create mode 100644 slurm/logs/pidm_darcy_diffusion_9940441.err create mode 100644 slurm/logs/pidm_darcy_diffusion_9940441.out create mode 100644 slurm/logs/pidm_darcy_pg_9913376.err create mode 100644 slurm/logs/pidm_darcy_pg_9913376.out create mode 100644 slurm/logs/pidm_darcy_pg_9913544.err create mode 100644 slurm/logs/pidm_darcy_pg_9913544.out create mode 100644 slurm/logs/pidm_toy_9908779.err create mode 100644 slurm/logs/pidm_toy_9908779.out create mode 100644 slurm/logs/pidm_toy_9912059.err create mode 100644 slurm/logs/pidm_toy_9912059.out create mode 100644 slurm/logs/pidm_toy_9912070.err create mode 100644 slurm/logs/pidm_toy_9912070.out create mode 100644 slurm/logs/pidm_toy_9912868.err create mode 100644 slurm/logs/pidm_toy_9912868.out create mode 100644 slurm/mechanics_cocogen.slurm create mode 100644 slurm/mechanics_diffusion.slurm create mode 100644 slurm/mechanics_pg.slurm create mode 100644 slurm/mechanics_pidm_me.slurm create mode 100644 slurm/mechanics_pidm_se.slurm create mode 100644 slurm/test_gpu.slurm create mode 100644 slurm/toy.slurm diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..d7d6476 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,110 @@ +# PIDM Reproduction Project — Claude Context + +## What this project is +Reproduction of "Physics-Informed Diffusion Models" (Bastek et al., ICLR 2025). +We are reproducing the Darcy flow and topology optimization experiments from Section 4, +running hyperparameter sweeps, and writing a blog post about our findings. + +Paper: https://arxiv.org/abs/2403.14404 +Repo: https://github.com/jhbastek/PhysicsInformedDiffusionModels + +## Who is working on this +- Person 1 (dstoyanova): week 4 setup, Darcy flow reproduction, blog assembly +- Person 2: topology optimization reproduction + ablation study +- Person 3: hyperparameter sweep on Darcy flow + +## Cluster: DelftBlue (TU Delft HPC) +- Login: `ssh dstoyanova@login.delftblue.tudelft.nl` +- Home dir: `/home/dstoyanova/` — limited quota, do NOT store data or checkpoints here +- Scratch dir: `/scratch/dstoyanova/` — use this for all data, checkpoints, outputs +- Scheduler: SLURM +- GPU partitions available: + - `gpu` — NVIDIA Tesla V100S, 32GB VRAM (phase 1) + - `gpu-a100` — NVIDIA A100, 80GB VRAM (phase 2) + - `gpu-a100-small` — A100 partitioned into 10GB instances (not suitable for us) +- Use `gpu` or `gpu-a100` partition for all training jobs +- Must be on TU Delft network or EduVPN to SSH in + +## Repo structure +``` +PhysicsInformedDiffusionModels/ +├── main.py # main training script for Darcy + topology opt. +├── main_toy.py # toy problem (unit circle), ~12 min, use as sanity check +├── sample.py # inference/evaluation script +├── model.yaml # config file — change this to switch between model variants +├── src/ # model architecture and utilities +├── data/ # place downloaded data here (darcy/ and mechanics/) +└── trained_models/ # place downloaded pretrained models here +``` + +Data must be downloaded from ETHZ Research Collection: +https://doi.org/10.3929/ethz-b-000674074 +Place unzipped contents under `/scratch/dstoyanova/PhysicsInformedDiffusionModels/` + +## Conda environment +Environment name: `pidm` +Python: 3.11 +Key packages: pytorch>=2.0.1, findiff, solidspy, pandas, einops, einops-exts, + rotary_embedding_torch, torchvision, opencv, tqdm, matplotlib, + imageio, wandb (optional) + +To activate: `conda activate pidm` +Installed at: `/home/dstoyanova/miniconda3/envs/pidm` + +## The 5 model variants and their yaml configs + +All variants use the same main.py — only model.yaml changes. + +| Variant | c_residual | x0_estimation | residual_grad_guidance | M_correction | N_correction | +|---------------|------------|---------------|------------------------|--------------|--------------| +| Diffusion | 0 | mean | False | 0 | 0 | +| PG-Diffusion | 0 | mean | True | 0 | 0 | +| CoCoGen | 0 | mean | False | 25 | 50 | +| PIDM-ME | 0.001 | mean | False | 0 | 0 | +| PIDM-SE | 0.00001 | sample | False | 0 | 0 | + +Fixed for all variants: +- c_data: 1 +- c_ineq: 0 +- lambda_opt: 0 +- diff_steps: 100 +- fd_acc: 2 +- gov_eqs: darcy (for Darcy); mechanics (for topology opt.) + +Separate yaml files are stored as: +`configs/darcy_diffusion.yaml` +`configs/darcy_pg.yaml` +`configs/darcy_cocogen.yaml` +`configs/darcy_pidm_me.yaml` +`configs/darcy_pidm_se.yaml` +(and equivalent mechanics_ variants for topology opt.) + +## SLURM job scripts +Stored in `slurm/` +- `toy.slurm` — sanity check, ~12 min, 1 GPU +- `darcy_.slurm` — Darcy flow training, ~13-22h, 1 GPU +- `mechanics_.slurm` — topology opt. training, ~48-54h, 1 GPU + +All jobs request 1 GPU, 1 node, appropriate wall time. +Submit with: `sbatch slurm/