7070
7171# YASK compiler settings for offload.
7272ifeq ($(offload ) ,1)
73- inner_loop_dim := 1
74- outer_domain_layout := 1
73+
74+ # BKMs for Intel GPUs.
75+ ifeq ($(cxx_is_llvm_intel),1)
76+ outer_domain_layout := 1
77+ early_loads := 0
78+ min_buffer_len := 1
79+ inner_loop_dim := 1
80+ endif
81+
82+ # BKMs for Nvidia GPUs.
7583 ifeq ($(cxx_is_nv),1)
84+ outer_domain_layout := 0
85+ early_loads := 0
7686 min_buffer_len := 99
7787 endif
7888endif
@@ -482,7 +492,11 @@ RANK_LOOP_CODE := $(RANK_LOOP_MODS) loop($(RANK_LOOP_ORDER)) { }
482492# 'omp' modifier creates an outer OpenMP loop so that each block is assigned
483493# to a top-level OpenMP thread.
484494MEGA_BLOCK_LOOP_MODS :=
485- MEGA_BLOCK_LOOP_OMP := omp parallel for schedule(dynamic,1) proc_bind(spread)
495+ ifeq ($(cxx_is_llvm_intel ) ,1)
496+ MEGA_BLOCK_LOOP_OMP := omp parallel for schedule(dynamic,1) proc_bind(spread)
497+ else
498+ MEGA_BLOCK_LOOP_OMP := omp parallel for schedule(dynamic,1)
499+ endif
486500MEGA_BLOCK_LOOP_FLAGS := -prefix mega_block_ -omp '$(MEGA_BLOCK_LOOP_OMP ) '
487501MEGA_BLOCK_LOOP_ORDER := DOMAIN_LOOP_DIMS
488502MEGA_BLOCK_LOOP_CODE := $(MEGA_BLOCK_LOOP_MODS ) omp loop($(MEGA_BLOCK_LOOP_ORDER ) ) { }
@@ -501,7 +515,11 @@ BLOCK_LOOP_CODE := $(BLOCK_LOOP_MODS) loop($(BLOCK_LOOP_ORDER)) { }
501515# nested OpenMP thread. The OpenMP construct is not used when running with
502516# '-bind_inner_threads' because another parallel section is created.
503517MICRO_BLOCK_LOOP_MODS :=
504- MICRO_BLOCK_LOOP_OMP := omp parallel for schedule(static,1) proc_bind(spread)
518+ ifeq ($(cxx_is_llvm_intel ) ,1)
519+ MICRO_BLOCK_LOOP_OMP := omp parallel for schedule(static,1) proc_bind(spread)
520+ else
521+ MICRO_BLOCK_LOOP_OMP := omp parallel for schedule(static,1)
522+ endif
505523MICRO_BLOCK_LOOP_FLAGS := -prefix micro_block_ -omp '$(MICRO_BLOCK_LOOP_OMP ) '
506524MICRO_BLOCK_LOOP_ORDER := DOMAIN_LOOP_DIMS
507525MICRO_BLOCK_LOOP_CODE := $(MICRO_BLOCK_LOOP_MODS ) omp loop($(MICRO_BLOCK_LOOP_ORDER ) ) { }
0 commit comments