From 2aeef37c44561f4fa67c4f637dd70d067c37228b Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Fri, 8 May 2026 02:33:10 +0000 Subject: [PATCH 1/2] add explicit Kokkos::fence() before mpi to secure against problem with Intel GPUs --- src/framework/containers/particles_comm.cpp | 15 +++++++++++++++ src/framework/domain/comm_mpi.hpp | 15 +++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/framework/containers/particles_comm.cpp b/src/framework/containers/particles_comm.cpp index 8fe9d1ec5..0fa140f93 100644 --- a/src/framework/containers/particles_comm.cpp +++ b/src/framework/containers/particles_comm.cpp @@ -28,6 +28,11 @@ namespace ntt { npart_t nsend, npart_t nrecv, npart_t offset) { +#if defined(DEVICE_ENABLED) + // guard for Intel GPUs. + // Should be a null-operation for other architectures. + Kokkos::fence(); +#endif #if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) MPI_Sendrecv(send_arr.data(), nsend, @@ -98,6 +103,11 @@ namespace ntt { template void send(array_t& send_arr, int send_rank, npart_t nsend) { +#if defined(DEVICE_ENABLED) + // guard for Intel GPUs. + // Should be a null-operation for other architectures. + Kokkos::fence(); +#endif #if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) MPI_Send(send_arr.data(), nsend, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); #else @@ -109,6 +119,11 @@ namespace ntt { template void recv(array_t& recv_arr, int recv_rank, npart_t nrecv, npart_t offset) { +#if defined(DEVICE_ENABLED) + // guard for Intel GPUs. + // Should be a null-operation for other architectures. + Kokkos::fence(); +#endif #if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) MPI_Recv(recv_arr.data() + offset, nrecv, diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index e3598f8b0..52103c170 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -33,6 +33,11 @@ namespace comm { int recv_rank, ncells_t nsend, ncells_t nrecv) { +#if defined(DEVICE_ENABLED) + // guard for Intel GPUs. + // Should be a null-operation for other architectures. + Kokkos::fence(); +#endif #if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) MPI_Sendrecv(send_arr.data(), nsend, @@ -68,6 +73,11 @@ namespace comm { template void send(ndarray_t& send_arr, int send_rank, ncells_t nsend) { +#if defined(DEVICE_ENABLED) + // guard for Intel GPUs. + // Should be a null-operation for other architectures. + Kokkos::fence(); +#endif #if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) MPI_Send(send_arr.data(), nsend, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); #else @@ -84,6 +94,11 @@ namespace comm { template void recv(ndarray_t& recv_arr, int recv_rank, ncells_t nrecv) { +#if defined(DEVICE_ENABLED) + // guard for Intel GPUs. + // Should be a null-operation for other architectures. + Kokkos::fence(); +#endif #if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) MPI_Recv(recv_arr.data(), nrecv, From 2e16419db3a3914dbbb25d4553f93ef2febc2146 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Fri, 8 May 2026 02:33:43 +0000 Subject: [PATCH 2/2] remove duplicate `em` comm --- src/framework/domain/metadomain_comm.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/framework/domain/metadomain_comm.cpp b/src/framework/domain/metadomain_comm.cpp index e492752c4..f8c0f60d7 100644 --- a/src/framework/domain/metadomain_comm.cpp +++ b/src/framework/domain/metadomain_comm.cpp @@ -349,19 +349,6 @@ namespace ntt { false); } } else { - if (comm_em) { - comm::CommunicateField(domain.index(), - domain.fields.em, - domain.fields.em, - send_ind, - recv_ind, - send_rank, - recv_rank, - send_slice, - recv_slice, - comp_range_fld, - false); - } if (comm_j) { comm::CommunicateField(domain.index(), domain.fields.cur,