Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/framework/containers/particles_comm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ namespace ntt {
npart_t nsend,
npart_t nrecv,
npart_t offset) {
#if defined(DEVICE_ENABLED)
// guard for Intel GPUs.
// Should be a null-operation for other architectures.
Kokkos::fence();
#endif
#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI)
MPI_Sendrecv(send_arr.data(),
nsend,
Expand Down Expand Up @@ -98,6 +103,11 @@ namespace ntt {

template <typename T>
void send(array_t<T*>& send_arr, int send_rank, npart_t nsend) {
#if defined(DEVICE_ENABLED)
// guard for Intel GPUs.
// Should be a null-operation for other architectures.
Kokkos::fence();
#endif
#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI)
MPI_Send(send_arr.data(), nsend, mpi::get_type<T>(), send_rank, 0, MPI_COMM_WORLD);
#else
Expand All @@ -109,6 +119,11 @@ namespace ntt {

template <typename T>
void recv(array_t<T*>& recv_arr, int recv_rank, npart_t nrecv, npart_t offset) {
#if defined(DEVICE_ENABLED)
// guard for Intel GPUs.
// Should be a null-operation for other architectures.
Kokkos::fence();
#endif
#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI)
MPI_Recv(recv_arr.data() + offset,
nrecv,
Expand Down
20 changes: 13 additions & 7 deletions src/framework/containers/particles_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,27 +25,33 @@ namespace ntt {
const auto num_tags = ntags();
array_t<npart_t*> npptag { "nparts_per_tag", ntags() };

// count # of particles per each tag
auto npptag_scat = Kokkos::Experimental::create_scatter_view(npptag);
// count # of particles per each tag, skipping the alive bin in-kernel.
constexpr short tag_alive_s = static_cast<short>(ParticleTag::alive);
Kokkos::parallel_for(
"NpartPerTag",
rangeActiveParticles(),
Lambda(prtlidx_t p) {
auto npptag_acc = npptag_scat.access();
if (this_tag(p) < 0 || this_tag(p) >= static_cast<short>(num_tags)) {
const short t = this_tag(p);
if (t < 0 || t >= static_cast<short>(num_tags)) {
raise::KernelError(HERE, "Invalid tag value");
}
npptag_acc(this_tag(p)) += 1;
if (t != tag_alive_s) {
Kokkos::atomic_add(&npptag(t), static_cast<npart_t>(1));
}
});
Kokkos::Experimental::contribute(npptag, npptag_scat);

// copy the count to a vector on the host
// copy the count to a vector on the host and reconstruct the alive bin
auto npptag_h = Kokkos::create_mirror_view(npptag);
Kokkos::deep_copy(npptag_h, npptag);
std::vector<npart_t> npptag_vec(num_tags);
npart_t non_alive_total = 0;
for (auto t { 0u }; t < num_tags; ++t) {
npptag_vec[t] = npptag_h(t);
if (static_cast<short>(t) != tag_alive_s) {
non_alive_total += npptag_h(t);
}
}
npptag_vec[tag_alive_s] = npart() - non_alive_total;

// count the offsets on the host and copy to device
const array_t<npart_t*> tag_offsets("tag_offsets", num_tags - 3);
Expand Down
15 changes: 15 additions & 0 deletions src/framework/domain/comm_mpi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ namespace comm {
int recv_rank,
ncells_t nsend,
ncells_t nrecv) {
#if defined(DEVICE_ENABLED)
// guard for Intel GPUs.
// Should be a null-operation for other architectures.
Kokkos::fence();
#endif
#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI)
MPI_Sendrecv(send_arr.data(),
nsend,
Expand Down Expand Up @@ -68,6 +73,11 @@ namespace comm {

template <unsigned short D>
void send(ndarray_t<D>& send_arr, int send_rank, ncells_t nsend) {
#if defined(DEVICE_ENABLED)
// guard for Intel GPUs.
// Should be a null-operation for other architectures.
Kokkos::fence();
#endif
#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI)
MPI_Send(send_arr.data(), nsend, mpi::get_type<real_t>(), send_rank, 0, MPI_COMM_WORLD);
#else
Expand All @@ -84,6 +94,11 @@ namespace comm {

template <unsigned short D>
void recv(ndarray_t<D>& recv_arr, int recv_rank, ncells_t nrecv) {
#if defined(DEVICE_ENABLED)
// guard for Intel GPUs.
// Should be a null-operation for other architectures.
Kokkos::fence();
#endif
#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI)
MPI_Recv(recv_arr.data(),
nrecv,
Expand Down
13 changes: 0 additions & 13 deletions src/framework/domain/metadomain_comm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -349,19 +349,6 @@ namespace ntt {
false);
}
} else {
if (comm_em) {
comm::CommunicateField<M::Dim, 6>(domain.index(),
domain.fields.em,
domain.fields.em,
send_ind,
recv_ind,
send_rank,
recv_rank,
send_slice,
recv_slice,
comp_range_fld,
false);
}
if (comm_j) {
comm::CommunicateField<M::Dim, 3>(domain.index(),
domain.fields.cur,
Expand Down
Loading