diff --git a/src/framework/containers/particles_comm.cpp b/src/framework/containers/particles_comm.cpp index 8fe9d1ec..0fa140f9 100644 --- a/src/framework/containers/particles_comm.cpp +++ b/src/framework/containers/particles_comm.cpp @@ -28,6 +28,11 @@ namespace ntt { npart_t nsend, npart_t nrecv, npart_t offset) { +#if defined(DEVICE_ENABLED) + // guard for Intel GPUs. + // Should be a null-operation for other architectures. + Kokkos::fence(); +#endif #if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) MPI_Sendrecv(send_arr.data(), nsend, @@ -98,6 +103,11 @@ namespace ntt { template void send(array_t& send_arr, int send_rank, npart_t nsend) { +#if defined(DEVICE_ENABLED) + // guard for Intel GPUs. + // Should be a null-operation for other architectures. + Kokkos::fence(); +#endif #if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) MPI_Send(send_arr.data(), nsend, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); #else @@ -109,6 +119,11 @@ namespace ntt { template void recv(array_t& recv_arr, int recv_rank, npart_t nrecv, npart_t offset) { +#if defined(DEVICE_ENABLED) + // guard for Intel GPUs. + // Should be a null-operation for other architectures. + Kokkos::fence(); +#endif #if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) MPI_Recv(recv_arr.data() + offset, nrecv, diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index e3598f8b..52103c17 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -33,6 +33,11 @@ namespace comm { int recv_rank, ncells_t nsend, ncells_t nrecv) { +#if defined(DEVICE_ENABLED) + // guard for Intel GPUs. + // Should be a null-operation for other architectures. + Kokkos::fence(); +#endif #if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) MPI_Sendrecv(send_arr.data(), nsend, @@ -68,6 +73,11 @@ namespace comm { template void send(ndarray_t& send_arr, int send_rank, ncells_t nsend) { +#if defined(DEVICE_ENABLED) + // guard for Intel GPUs. + // Should be a null-operation for other architectures. + Kokkos::fence(); +#endif #if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) MPI_Send(send_arr.data(), nsend, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); #else @@ -84,6 +94,11 @@ namespace comm { template void recv(ndarray_t& recv_arr, int recv_rank, ncells_t nrecv) { +#if defined(DEVICE_ENABLED) + // guard for Intel GPUs. + // Should be a null-operation for other architectures. + Kokkos::fence(); +#endif #if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) MPI_Recv(recv_arr.data(), nrecv, diff --git a/src/framework/domain/metadomain_comm.cpp b/src/framework/domain/metadomain_comm.cpp index e492752c..f8c0f60d 100644 --- a/src/framework/domain/metadomain_comm.cpp +++ b/src/framework/domain/metadomain_comm.cpp @@ -349,19 +349,6 @@ namespace ntt { false); } } else { - if (comm_em) { - comm::CommunicateField(domain.index(), - domain.fields.em, - domain.fields.em, - send_ind, - recv_ind, - send_rank, - recv_rank, - send_slice, - recv_slice, - comp_range_fld, - false); - } if (comm_j) { comm::CommunicateField(domain.index(), domain.fields.cur,