Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
*.o
*.so
*.mod
*.smod
*.out
Expand Down
15 changes: 13 additions & 2 deletions Libraries/Modules/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ SRCS = Mat_subroutines_mod.F90 \
DEPSFILE := .deps.mk
DEPSGEN := ../../Prog/gen_deps.py

lib: $(LIB)
lib: $(LIB) libfakeintel.so
Comment thread
jonasschwab marked this conversation as resolved.
Comment thread
jonasschwab marked this conversation as resolved.

Comment thread
jonasschwab marked this conversation as resolved.
# OBJS and MODS are auto-generated into $(DEPSFILE) by gen_deps.py
-include $(DEPSFILE)
Expand All @@ -56,11 +56,22 @@ lattices_armv8.4-a.so: runtime_error_mod.F90 lattices_interface_mod.F90 matrix_m
%.o: %.F90
$(ALF_FC) -c $(ALF_FLAGS_MODULES) $<

libfakeintel.so: fakeintel.c
@for cc in gcc clang icc icx cc; do \
if command -v $$cc > /dev/null 2>&1; then \
$$cc -shared -fPIC -o libfakeintel.so fakeintel.c; \
break; \
fi; \
done; \
if [ ! -f libfakeintel.so ]; then \
echo "No C compiler found, libfakeintel.so not created." 1>&2; \
Comment thread
jonasschwab marked this conversation as resolved.
fi

Comment thread
jonasschwab marked this conversation as resolved.
$(DEPSFILE): $(SRCS) $(DEPSGEN)
python3 $(DEPSGEN) $(SRCS) > $(DEPSFILE)

clean: $(DEPSFILE)
rm -f $(OBJS) $(MODS) $(LIB) $(DEPSFILE)
rm -f $(OBJS) $(MODS) $(LIB) $(DEPSFILE) libfakeintel.so

#unused modules: histograms histograms_v2 log_mesh natural_constants
#unused, except for in Prog/FFA_Orginals/: precdef
16 changes: 16 additions & 0 deletions Libraries/Modules/fakeintel.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// This file is a fake implementation of the MKL function mkl_serv_get_cpu_true,
// which is used by MKL to determine if the CPU is an Intel CPU.
// This is used to trick MKL into thinking that it is running on an Intel CPU,
// which allows it to use the optimized code paths for Intel CPUs, even when running on an AMD CPU.
// This has shown to improve performance on AMD CPUs significantly, especially on AMD Zen CPUs.
// See: https://danieldk.eu/Software/Misc/Intel-MKL-on-AMD-Zen

int mkl_serv_intel_cpu_true(void) {
return 1;
}

typedef int (*fakeintel_fptr)(void);

fakeintel_fptr mkl_serv_get_cpu_true(void) {
return &mkl_serv_intel_cpu_true;
}
Comment thread
jonasschwab marked this conversation as resolved.
2 changes: 1 addition & 1 deletion Scripts_and_Parameters_files/JobfileFritz.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ module load intel
module load intelmpi
module load mkl

# the follwing environment variables generate an optimal pinning (to the best of our knowledge)
# the following environment variables generate an optimal pinning (to the best of our knowledge)
# This DOES NOT have to be adapted to the choice of Ntasks
# FIRST EXCEPTION: If you chose to use hyper-threading (not recommended) you should set I_MPI_PIN_CELL=cpu
# SECOND EXCEPTION: The following environment variables are Intel specific.
Expand Down
63 changes: 63 additions & 0 deletions Scripts_and_Parameters_files/JobfileHelma.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/bin/bash -l
#
# Sample jobscript for Helma CPU of NHR@FAU
#
# The following jobscript contains a few 'variables' marked by the ##...## pattern.
# The user has to provide the appropriate values, e.g. replace ##Nnodes## by 1 if the job is supposed to run on a single node.
# Most variables are self-explanatory, one exception might be Nthreads, which is referring to the number of OpenMP threads per MPI task.
# In general we found that ALF does not profit from hyper-threading such that we suggest to only use physical cores.
#
# On Helma, a single node has 384 cores.
# There are two partitions, "cpu" and "preempt_cpu".
# -"cpu": Single-node and multi-node jobs are possible.
# Multi-node jobs always get all 384 cores of each node,
# while single-node jobs can request cores in multiples of 48 (one NUMA domain).
# -"preempt_cpu":
# Only single node, but an arbitrary number of cores is allowed.
# The walltime is up to 48 hours, but jobs might get cancelled after 2 hours
# to make room for jobs in the "cpu" partition (with a grace time as in the GPU preempt partition).
#
# For single node jobs, the number of cores is ##NtaskPnode## * ##Nthreads##.
#
Comment thread
jonasschwab marked this conversation as resolved.
#SBATCH --job-name ##NAME##
#SBATCH --output=out.%j.log
#SBATCH --error=err.%j.log
#Notification and type
#SBATCH --mail-type=ALL
#SBATCH --mail-user=##EMAIL##
# Wall clock limit (HH:MM:SS):
#SBATCH --time=##TIME##
#SBATCH --no-requeue
#Setup of execution environment
#SBATCH --export=NONE

#available partitions: cpu, preempt_cpu
#SBATCH --partition=##PARTITION##
Comment thread
jonasschwab marked this conversation as resolved.
#SBATCH --nodes=##Nnodes##
#SBATCH --ntasks-per-node=##NtaskPnode##
#SBATCH --cpus-per-task=##Nthreads##

unset SLURM_EXPORT_ENV
module --force switch gpu-env/2025 cpu-env/2026
module load intel/2025.3.1
module load intelmpi/2021.17.0
module load mkl/2024.2.2

# the following environment variables generate an optimal pinning (to the best of our knowledge)
# This DOES NOT have to be adapted to the choice of Ntasks
# FIRST EXCEPTION: If you chose to use hyper-threading (not recommended) you should set I_MPI_PIN_CELL=cpu
# SECOND EXCEPTION: The following environment variables are Intel specific.
#export KMP_AFFINITY=verbose,granularity=fine,compact
export KMP_AFFINITY=granularity=fine,compact
export I_MPI_PIN_CELL=core
export I_MPI_PIN_DOMAIN=auto:cache
export I_MPI_PIN_ORDER=scatter

export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK

# Uncomment the following line to speed up MKL by making it believe it runs on an Intel CPU.
# After `make`, `libfakeintel.so` is expected under `Libraries/Modules/`; use its absolute path here, e.g.:
# export LD_PRELOAD=/absolute/path/to/ALF/Libraries/Modules/libfakeintel.so

bash ./out_to_in.sh
mpiexec -n $SLURM_NTASKS ##EXECUTABLE##
2 changes: 1 addition & 1 deletion Scripts_and_Parameters_files/JobfileSuperMUC-NG.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ module load slurm_setup

module load hdf5/1.10.7-intel21

# the follwing environment variables generate an optimal pinning (to the best of our knowledge)
# the following environment variables generate an optimal pinning (to the best of our knowledge)
# This DOES NOT have to be adapted to the choice of Ntasks
# FIRST EXCEPTION: If you chose to use hyper-threading (not recommended) you should set I_MPI_PIN_CELL=cpu
# SECOND EXCEPTION: The following environment variables are Intel specific.
Expand Down
6 changes: 3 additions & 3 deletions Scripts_and_Parameters_files/JobfileSuperMUC.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ module switch mkl mkl/2018

export OMP_NUM_THREADS=##Nthreads##

# the follwing eviroment variables generate an optimal pinning (to the best of our knowledge)
# This DOES NOT have to be addepted to the choice of Ntasks
# the following environment variables generate an optimal pinning (to the best of our knowledge)
# This DOES NOT have to be adapted to the choice of Ntasks
# FIRST EXCEPTION: If you chose to use hyperthreading (not recommended) you should set I_MPI_PIN_CELL=cpu
# SECOND EXCEPTION: The following enviroment variables are Intel specific.
# SECOND EXCEPTION: The following environment variables are Intel specific.
export KMP_AFFINITY=verbose,granularity=fine,compact
export I_MPI_PIN_CELL=core
export I_MPI_PIN_DOMAIN=auto:cache3
Expand Down
21 changes: 21 additions & 0 deletions configure.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Please choose one of the following MACHINEs:
* SuperMUC-NG
* JUWELS
* FRITZ
* HELMA
Possible MODEs are:
* MPI (default)
* noMPI
Expand Down Expand Up @@ -458,6 +459,26 @@ case $MACHINE in
set_hdf5_flags "$INTELCC" ifort "$INTELCXX" || return 1
fi
;;


#NHR@FAU Helma CPU cluster
HELMA)
module --force switch gpu-env/2025 cpu-env/2026
Comment thread
jonasschwab marked this conversation as resolved.
module load intel/2025.3.1
module load intelmpi/2021.17.0
module load mkl/2024.2.2

F90OPTFLAGS="$INTELLLVMOPTFLAGS"
F90USEFULFLAGS="$INTELLLVMUSEFULFLAGS"
ALF_FC="$INTELLLVMCOMPILER"
find_mkl_flag || return 1
LIB_BLAS_LAPACK="${INTELMKL}"
if [ "${HDF5_ENABLED}" = "1" ]; then
set_intelcc
set_intelcxx
set_hdf5_flags "$INTELCC" ifx "$INTELCXX" || return 1
fi
;;
#Default (unknown machine)
*)
if [ "$NO_FALLBACK" = "1" ]; then
Expand Down
Loading