IDEALLab · AnnaDelbeke · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026
diff --git a/engibench/problems/__init__.py b/engibench/problems/__init__.py
@@ -1 +1,2 @@
 """Contains all the different problems modeled in the library."""
+from .wings3D.v0 import Wings3D
diff --git a/engibench/problems/photonics2d/__init__.py b/engibench/problems/photonics2d/__init__.py
@@ -1,5 +1,5 @@
 """Photonics2D problem module."""
 
-from engibench.problems.photonics2d.v0 import Photonics2D
+#from engibench.problems.photonics2d.v0 import Photonics2D
 
 __all__ = ["Photonics2D"]
diff --git a/engibench/problems/wings3D/README.md b/engibench/problems/wings3D/README.md
@@ -0,0 +1,90 @@
+# Airfoil 2D
+
+**Lead**: Cashen Diniz @cashend
+
+Airfoil 2D is a benchmark problem that aims to optimize the shape of an airfoil to maximize the lift-to-drag ratio.
+We rely on MACH-Aero for the simulations.
+
+## Side notes
+
+Here is the script I've used to upload the data to HF using the pickle files here: https://github.com/IDEALLab/OptimizingDiffusionSciTech2024/tree/main/data/optimized_data
+
+```python
+from datasets import Dataset
+from datasets import DatasetDict
+import numpy as np
+import pandas as pd
+
+opt_train_airfoils, opt_test_airfoils, opt_val_airfoils = pd.read_pickle("train_test_val_opt_airfoils.pkl")
+init_train_airfoils, init_test_airfoils, init_val_airfoils = pd.read_pickle("train_test_val_init_airfoils.pkl")
+train_params, test_params, val_params = pd.read_pickle("train_test_val_opt_params.pkl")
+
+# For each airfoil, we need one row containing the initial and optimized airfoil, as well as the parameters
+
+dataset_train = []
+
+for o, i, p in zip(opt_train_airfoils, init_train_airfoils, train_params):
+    dataset_train.append(
+        {
+            "initial_design": {"coords": i, "angle_of_attack": np.asarray(p[4], dtype=np.float32)},
+            "optimal_design": {"coords": o, "angle_of_attack": np.asarray(p[4], dtype=np.float32)},
+            "mach": p[0],
+            "reynolds": p[1],
+            "cl_target": p[2],
+            "area_ratio_min": p[3],
+            "area_initial": p[5],
+            "cd": p[6],
+            "cl": p[7],
+            "cl_con_violation": p[8],
+            "area_ratio": p[9],
+        }
+    )
+
+dataset_val = []
+
+for o, i, p in zip(opt_test_airfoils, init_test_airfoils, test_params):
+    dataset_val.append(
+        {
+            "initial_design": {"coords": i, "angle_of_attack": np.asarray(p[4], dtype=np.float32)},
+            "optimal_design": {"coords": o, "angle_of_attack": np.asarray(p[4], dtype=np.float32)},
+            "mach": p[0],
+            "reynolds": p[1],
+            "cl_target": p[2],
+            "area_ratio_min": p[3],
+            "area_initial": p[5],
+            "cd": p[6],
+            "cl": p[7],
+            "cl_con_violation": p[8],
+            "area_ratio": p[9],
+        }
+    )
+
+dataset_testt = []
+
+for o, i, p in zip(opt_val_airfoils, init_val_airfoils, val_params):
+    dataset_testt.append(
+        {
+            "initial_design": {"coords": i, "angle_of_attack": np.asarray(p[4], dtype=np.float32)},
+            "optimal_design": {"coords": o, "angle_of_attack": np.asarray(p[4], dtype=np.float32)},
+            "mach": p[0],
+            "reynolds": p[1],
+            "cl_target": p[2],
+            "area_ratio_min": p[3],
+            "area_initial": p[5],
+            "cd": p[6],
+            "cl": p[7],
+            "cl_con_violation": p[8],
+            "area_ratio": p[9],
+        }
+    )
+
+
+# Create a huggingface dataset from the three splits above
+train_spit = Dataset.from_list(dataset_train)
+print(train_spit.shape)
+val_spit = Dataset.from_list(dataset_val)
+test_spit = Dataset.from_list(dataset_testt)
+dataset_dict = DatasetDict({"train": train_spit, "val": val_spit, "test": test_spit})
+dataset_dict.push_to_hub("IDEALLab/airfoil_v0")
+
+```
diff --git a/engibench/problems/wings3D/__init__.py b/engibench/problems/wings3D/__init__.py
@@ -0,0 +1,5 @@
+"""Airfoil problem module."""
+
+from engibench.problems.airfoil.v0 import Airfoil
+
+__all__ = ["Airfoil"]
diff --git a/engibench/problems/wings3D/dataset_hf_wings3d.py b/engibench/problems/wings3D/dataset_hf_wings3d.py
@@ -0,0 +1,54 @@
+"""
+Dataset loader for the Wings3D problem.
+
+Tries Hugging Face first. Optionally falls back to a local file for development.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Optional
+
+import numpy as np
+import pandas as pd
+from datasets import Dataset, DatasetDict, load_dataset
+
+
+def _pandas_to_datasetdict(df: pd.DataFrame, split: str = "train") -> DatasetDict:
+    # Convert numpy arrays to lists so HF Dataset can serialize them
+    df2 = df.copy()
+    if "coords" in df2.columns:
+        df2["coords"] = df2["coords"].apply(lambda x: np.asarray(x).tolist())
+    return DatasetDict({split: Dataset.from_pandas(df2, preserve_index=False)})
+
+
+def load_wings3d_dataset(dataset_id: str, local_path: Optional[str] = None) -> DatasetDict:
+    if not dataset_id:
+        raise ValueError("dataset_id must be a non-empty string")
+
+    try:
+        return load_dataset(dataset_id)
+    except Exception as e:
+        if local_path is None:
+            raise RuntimeError(
+                f"Could not load Hugging Face dataset '{dataset_id}'.\n"
+                f"- If it hasn't been uploaded yet, this is expected.\n"
+                f"- If it's private, run: huggingface-cli login\n"
+                f"Original error: {type(e).__name__}: {e}"
+            ) from e
+
+        p = Path(local_path)
+        if not p.exists():
+            raise RuntimeError(
+                f"HF dataset '{dataset_id}' not available AND local_path not found: {local_path}"
+            ) from e
+
+        # Load local df
+        if p.suffix == ".pkl":
+            df = pd.read_pickle(p)
+        elif p.suffix == ".parquet":
+            df = pd.read_parquet(p)
+        else:
+            raise ValueError(f"Unsupported local dataset format: {p.suffix} (use .pkl or .parquet)")
+
+        return _pandas_to_datasetdict(df, split="train")
diff --git a/engibench/problems/wings3D/dataset_slurm_airfoil.py b/engibench/problems/wings3D/dataset_slurm_airfoil.py
@@ -0,0 +1,167 @@
+"""Dataset Generation for Airfoil Problem via SLURM.
+
+This script generates a dataset for the Airfoil problem using the SLURM API
+"""
+
+from argparse import ArgumentParser
+
+from datasets import load_dataset
+import numpy as np
+from scipy.stats import qmc
+
+from engibench.problems.airfoil.simulation_jobs import simulate_slurm
+from engibench.utils import slurm
+
+
+def calculate_runtime(group_size, minutes_per_sim=5):
+    """Calculate runtime based on group size and (rough) estimate of minutes per simulation."""
+    total_minutes = group_size * minutes_per_sim
+    hours = total_minutes // 60
+    minutes = total_minutes % 60
+    return f"{hours:02d}:{minutes:02d}:00"
+
+
+if __name__ == "__main__":
+    """Dataset Generation, Simulation, and Rendering for Airfoil Problem via SLURM.
+
+    This script generates a dataset for the Airfoil problem using the SLURM API, though it could
+    be generalized to other problems as well. It includes functions for simulation of designs.
+
+    Command Line Arguments:
+    -n_designs, --num_designs: How many airfoil designs should we use?
+    -n_flows, --num_flow_conditions: How many flow conditions should we use per design?
+    -n_aoas, --num_angles_of_attack: How many angles of attack should we use per design & flow condition pairing?
+    -group_size, --group_size: How many simulations should we group together on a single cpu?
+    -n_slurm_array, --num_slurm_array: How many slurm jobs to spawn and submit via slurm arrays? Note this may be limited by the HPC system.
+    """
+    # Fetch command line arguments for render and simulate to know whether to run those functions
+    parser = ArgumentParser()
+    parser.add_argument(
+        "-n_designs",
+        "--num_designs",
+        type=int,
+        default=10,
+        help="How many airfoil designs should we use?",
+    )
+    parser.add_argument(
+        "-n_flows",
+        "--num_flow_conditions",
+        type=int,
+        default=1,
+        help="How many flow conditions (Mach Number and Reynolds Number) should we sample for each design?",
+    )
+    parser.add_argument(
+        "-n_aoas",
+        "--num_angles_of_attack",
+        type=int,
+        default=1,
+        help="How many angles of attack should we sample for each design?",
+    )
+    parser.add_argument(
+        "-group_size",
+        "--group_size",
+        type=int,
+        default=2,
+        help="How many simulations do you wish to batch within each individual slurm job?",
+    )
+    parser.add_argument(
+        "-n_slurm_array",
+        "--num_slurm_array",
+        type=int,
+        default=1000,
+        help="What is the maximum size of the Slurm array (Will vary from HPC system to HPC system)?",
+    )
+    args = parser.parse_args()
+
+    n_designs = args.num_designs
+    n_flows = args.num_flow_conditions
+    n_aoas = args.num_angles_of_attack
+    group_size = args.group_size
+    n_slurm_array = args.num_slurm_array
+
+    # ============== Problem-specific elements ===================
+    # The following elements are specific to the problem and should be modified accordingly
+
+    # Define flow parameter and angle of attack ranges
+    Ma_min, Ma_max = 0.5, 0.9  # Mach number range
+    Re_min, Re_max = 1.0e6, 2.0e7  # Reynolds number range
+    aoa_min, aoa_max = 0.0, 20.0  # Angle of attack range
+
+    # Load airfoil designs from HF Database
+    ds = load_dataset("IDEALLab/airfoil_v0")
+    designs = (
+        ds["train"]["initial_design"]
+        + ds["train"]["optimal_design"]
+        + ds["val"]["initial_design"]
+        + ds["val"]["optimal_design"]
+        + ds["test"]["initial_design"]
+        + ds["test"]["optimal_design"]
+    )
+
+    # Use specified number of designs
+    designs = designs[:n_designs]
+
+    # Generate LHS samples
+    rng = np.random.default_rng(seed=42)  # Optional seed for reproducibility
+    sampler = qmc.LatinHypercube(d=2, seed=rng)
+    samples = sampler.random(n=n_designs * n_flows)  # n samples needed
+
+    # Scale to your flow domain
+    bounds = np.array([[Ma_min, Ma_max], [Re_min, Re_max]])
+    scaled_samples = qmc.scale(samples, bounds[:, 0], bounds[:, 1])
+    mach_values = scaled_samples[:, 0]
+    reynolds_values = scaled_samples[:, 1]
+
+    # Generate all simulation configurations
+    config_id = 0
+    simulate_configs_designs = []
+    for i, design in enumerate(designs):
+        for j in range(n_flows):
+            ma = mach_values[i * n_flows + j]
+            re = reynolds_values[i * n_flows + j]
+            for alpha in rng.uniform(low=aoa_min, high=aoa_max, size=n_aoas):
+                problem_configuration = {"mach": ma, "reynolds": re, "alpha": alpha}
+                config = {"problem_configuration": problem_configuration, "configuration_id": config_id}
+                config["design"] = design["coords"]
+                simulate_configs_designs.append(config)
+                config_id += 1
+
+    print(f"Generated {len(simulate_configs_designs)} configurations for simulation.")
+
+    # Calculate total number of simulation jobs and number of sbatch maps needed
+    n_simulations = len(simulate_configs_designs)
+    n_sbatch_maps = np.ceil(n_simulations / (group_size * n_slurm_array))
+
+    slurm_config = slurm.SlurmConfig(
+        name="Airfoil_dataset_generation",
+        runtime=calculate_runtime(group_size, minutes_per_sim=5),
+        ntasks=1,
+        cpus_per_task=1,
+        log_dir="./sim_logs/",
+    )
+    print(calculate_runtime(group_size, minutes_per_sim=5))
+
+    submitted_jobs = []
+    for ibatch in range(int(n_sbatch_maps)):
+        sim_batch_configs = simulate_configs_designs[
+            ibatch * group_size * n_slurm_array : (ibatch + 1) * group_size * n_slurm_array
+        ]
+        print(len(sim_batch_configs))
+        print(f"Submitting batch {ibatch + 1}/{int(n_sbatch_maps)}")
+
+        job_array = slurm.sbatch_map(
+            f=simulate_slurm,
+            args=sim_batch_configs,
+            slurm_args=slurm_config,
+            group_size=group_size,  # Number of jobs to batch in sequence to reduce job array size
+            work_dir="scratch",
+        )
+
+        # Save the job array reference
+        submitted_jobs.append(job_array)
+
+        # Wait for this job to complete by calling save()
+        # This will submit a dependent job that waits for the array to finish
+        print(f"Waiting for batch {ibatch + 1} to complete...")
+        job_array.save(f"results_{ibatch}.pkl", slurm_args=slurm_config)
+        print(f"Batch {ibatch + 1} completed!")
diff --git a/engibench/problems/wings3D/fake_pyoptsparse/__init__.py b/engibench/problems/wings3D/fake_pyoptsparse/__init__.py
@@ -0,0 +1,36 @@
+"""Drop-in module for pyoptsparse to unpickle ahistory when pyoptsparse is not installed."""
+
+from types import ModuleType
+
+
+class FakePyOptSparseObject:
+    """Drop-in for objects needed to unpickle a pyoptsparse history when pyoptsparse is not installed."""
+
+    def __init__(self, *args, **kwargs) -> None:
+        self.args = args
+        self.kwargs = kwargs
+
+    def _mapContoOpt_Dict(self, d):  # noqa: N802
+        return d
+
+    def _mapXtoOpt_Dict(self, d):  # noqa: N802
+        return d
+
+    def _mapObjtoOpt_Dict(self, d):  # noqa: N802
+        return d
+
+
+class Optimization(FakePyOptSparseObject):
+    """Drop-in."""
+
+
+class Variable(FakePyOptSparseObject):
+    """Drop-in."""
+
+
+class Constraint(FakePyOptSparseObject):
+    """Drop-in."""
+
+
+class Objective(FakePyOptSparseObject):
+    """Drop-in."""
diff --git a/engibench/problems/wings3D/fake_pyoptsparse/pyOpt_constraint.py b/engibench/problems/wings3D/fake_pyoptsparse/pyOpt_constraint.py
@@ -0,0 +1,6 @@
+# noqa: N999
+"""Drop-in."""
+
+from engibench.problems.airfoil.fake_pyoptsparse import FakePyOptSparseObject as Constraint
+
+__all__ = ["Constraint"]
diff --git a/engibench/problems/wings3D/fake_pyoptsparse/pyOpt_objective.py b/engibench/problems/wings3D/fake_pyoptsparse/pyOpt_objective.py
@@ -0,0 +1,6 @@
+# noqa: N999
+"""Drop-in."""
+
+from engibench.problems.airfoil.fake_pyoptsparse import FakePyOptSparseObject as Objective
+
+__all__ = ["Objective"]
diff --git a/engibench/problems/wings3D/fake_pyoptsparse/pyOpt_optimization.py b/engibench/problems/wings3D/fake_pyoptsparse/pyOpt_optimization.py
@@ -0,0 +1,6 @@
+# noqa: N999
+"""Drop-in."""
+
+from engibench.problems.airfoil.fake_pyoptsparse import FakePyOptSparseObject as Optimization
+
+__all__ = ["Optimization"]
diff --git a/engibench/problems/wings3D/fake_pyoptsparse/pyOpt_variable.py b/engibench/problems/wings3D/fake_pyoptsparse/pyOpt_variable.py
@@ -0,0 +1,6 @@
+# noqa: N999
+"""Drop-in."""
+
+from engibench.problems.airfoil.fake_pyoptsparse import FakePyOptSparseObject as Variable
+
+__all__ = ["Variable"]
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		"""Contains all the different problems modeled in the library."""
		from .wings3D.v0 import Wings3D