Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ endif()

# --- Options ---
option(ELEMENTS_ENABLE_SERIAL "Enable Serial support in Kokkos" ON)
option(ELEMENTS_ENABLE_OPENMP "Enable OpenMP support in Kokkos" ON)
option(ELEMENTS_ENABLE_OPENMP "Enable OpenMP support in Kokkos" OFF)
option(ELEMENTS_ENABLE_PTHREADS "Enable PThreads support in Kokkos" OFF)
option(ELEMENTS_ENABLE_CUDA "Enable CUDA support in Kokkos" OFF)
option(ELEMENTS_ENABLE_HIP "Enable HIP support in Kokkos" OFF)
Expand Down
20 changes: 11 additions & 9 deletions examples/decomp_example/include/mesh_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,7 @@ void write_vtu(swage::Mesh& mesh,
const size_t num_nodes = mesh.num_owned_nodes;
const size_t num_elems = mesh.num_owned_elems;
const size_t num_dims = mesh.num_dims;
printf("num_dims = %zu\n", num_dims);

// save the cell state to an array for exporting to graphics files
auto elem_fields = CArray<double>(num_elems, num_cell_scalar_vars);
Expand Down Expand Up @@ -612,7 +613,7 @@ void write_vtu(swage::Mesh& mesh,

CArray<int> convert_fierro_to_vtk((Pn_order+1)*(Pn_order+1)*(Pn_order+1));
int this_node_lid = 0;
for (int k = 0; k <= Pn_order; k++) {
for (int k = 0; k <= Pn_order_z; k++) {
for (int j = 0; j <= Pn_order; j++) {
for (int i = 0; i <= Pn_order; i++) {

Expand All @@ -637,11 +638,12 @@ void write_vtu(swage::Mesh& mesh,
fprintf(vtu_file, " "); // adding indentation before printing nodes in element

int this_node_lid = 0;
for (int k = 0; k <= Pn_order; k++) {
for (int k = 0; k <= Pn_order_z; k++) {
for (int j = 0; j <= Pn_order; j++) {
for (int i = 0; i <= Pn_order; i++) {
// size_t node_lid = PointIndexFromIJK(i, j, k, order);
// size_t node_lid = PointIndexFromIJK(i, j, k, order);
int node_lid = convert_fierro_to_vtk(this_node_lid);
if (num_dims == 2) node_lid = this_node_lid;
fprintf(vtu_file, " %zu", static_cast<unsigned long>(mesh.nodes_in_elem.host(elem_gid, node_lid)));
this_node_lid ++;
}
Expand Down Expand Up @@ -711,9 +713,9 @@ void write_vtu(swage::Mesh& mesh,
// Point vector variables
for (int var = 0; var < num_point_vec_vars; var++) {
fprintf(vtu_file, " <DataArray type=\"Float32\" Name=\"%s\" NumberOfComponents=\"%d\" format=\"ascii\">\n",
point_vec_var_names[var], num_dims);
point_vec_var_names[var], static_cast<int>(num_dims));
for (size_t node_gid = 0; node_gid < num_nodes; node_gid++) {
for (int dim = 0; dim < num_dims; dim++) {
for (int dim = 0; dim < static_cast<int>(num_dims); dim++) {
fprintf(vtu_file, " %f\n", vec_fields(node_gid, var, dim));
}
}
Expand All @@ -737,10 +739,10 @@ void write_vtu(swage::Mesh& mesh,
// Cell vector variables
for (int var = 0; var < num_cell_vec_vars; var++) {
fprintf(vtu_file, " <DataArray type=\"Float32\" Name=\"%s\" NumberOfComponents=\"%d\" format=\"ascii\">\n",
cell_vec_var_names[var], num_dims);
cell_vec_var_names[var], static_cast<int>(num_dims));
for (size_t elem_gid = 0; elem_gid < num_elems; elem_gid++) {
// TODO: Populate cell vector field data from appropriate source
for (int dim = 0; dim < num_dims; dim++) {
for (int dim = 0; dim < static_cast<int>(num_dims); dim++) {
fprintf(vtu_file, " %f\n",
gauss_point.fields_vec.host(elem_gid, dim));
}
Expand Down Expand Up @@ -799,7 +801,7 @@ void write_vtu(swage::Mesh& mesh,
fprintf(pvtu_file, " <PPointData>\n");
for (int var = 0; var < num_point_vec_vars; var++) {
fprintf(pvtu_file, " <PDataArray type=\"Float32\" Name=\"%s\" NumberOfComponents=\"%d\"/>\n",
point_vec_var_names[var], num_dims);
point_vec_var_names[var], static_cast<int>(num_dims));
}
for (int var = 0; var < num_point_scalar_vars; var++) {
fprintf(pvtu_file, " <PDataArray type=\"Float32\" Name=\"%s\"/>\n",
Expand All @@ -811,7 +813,7 @@ void write_vtu(swage::Mesh& mesh,
fprintf(pvtu_file, " <PCellData>\n");
for (int var = 0; var < num_cell_vec_vars; var++) {
fprintf(pvtu_file, " <PDataArray type=\"Float32\" Name=\"%s\" NumberOfComponents=\"%d\"/>\n",
cell_vec_var_names[var], num_dims);
cell_vec_var_names[var], static_cast<int>(num_dims));
}
for (int var = 0; var < num_cell_scalar_vars; var++) {
fprintf(pvtu_file, " <PDataArray type=\"Float32\" Name=\"%s\"/>\n",
Expand Down
19 changes: 15 additions & 4 deletions examples/decomp_example/src/mesh_decomp_example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ int main(int argc, char** argv) {
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);

int num_dims = 3;
int Pn_order = 2;
int num_dims = 2;
int Pn_order = 1;

double t_main_start = MPI_Wtime();

Expand All @@ -55,8 +55,8 @@ int main(int argc, char** argv) {
double outer_radius = 2.0;
double start_angle = 0.0;
double end_angle = 45.0;
int num_elems_i = 100;
int num_elems_j = 100;
int num_elems_i = 40;
int num_elems_j = 40;

// Initial mesh built on rank zero
swage::Mesh initial_mesh;
Expand All @@ -79,6 +79,9 @@ int main(int argc, char** argv) {
std::cout<<"Rank "<<rank<<" Building initial mesh"<<std::endl;

std::cout<<"Initializing mesh"<<std::endl;
initial_mesh.num_dims = num_dims;
initial_mesh.Pn = Pn_order;

if(num_dims == 3) {
build_3d_box(initial_mesh, initial_node_coords, origin, length, num_elems_dim, Pn_order);
} else if(num_dims == 2) {
Expand Down Expand Up @@ -126,16 +129,24 @@ int main(int argc, char** argv) {

if(world_size != 1) { // pass through the partitioning function if not a single rank
elements::partition_mesh(initial_mesh, final_mesh, initial_node_coords, final_node_coords, element_communication_plan, node_communication_plan, world_size, rank);

MPI_Barrier(MPI_COMM_WORLD);
if(rank == 0) printf("After partitioning\n");

} else {
final_mesh = initial_mesh;
final_mesh.num_owned_elems = initial_mesh.num_elems;
final_mesh.num_owned_nodes = initial_mesh.num_nodes;
final_node_coords = initial_node_coords;
final_mesh.num_dims = num_dims;
final_mesh.Pn = Pn_order;
}
double t_partition_end = MPI_Wtime();

std::cout<<"Final mesh has " << final_mesh.num_elems << " elements and " << final_mesh.num_nodes << " nodes" << std::endl;
std::cout<<"Final mesh has " << final_mesh.num_owned_elems << " owned elements and " << final_mesh.num_owned_nodes << " owned nodes" << std::endl;
std::cout<<"Final mesh num_dims = " << final_mesh.num_dims << std::endl;
MPI_Barrier(MPI_COMM_WORLD);

// Verify communicaiton plans
if(world_size != 1) element_communication_plan.verify_graph_communicator();
Expand Down
52 changes: 37 additions & 15 deletions src/decomp_utilities/decomp_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ inline void naive_partition_mesh(
// All ranks must agree on dimensionality before initializing per-rank meshes
MPI_Bcast(&num_dim, 1, MPI_INT, 0, MPI_COMM_WORLD);

if (rank == 0 && print_info) printf("Num dim: %d\n", num_dim);


// Compute the number of elements to send to each rank and num_nodes_per_elem
std::vector<int> elems_per_rank(world_size); // number of elements to send to each rank size(world_size)
Expand Down Expand Up @@ -487,7 +489,7 @@ inline void naive_partition_mesh(
// Initialize the naive_mesh data structures for each rank
// ******************************************************************************************
naive_mesh.initialize_nodes(num_nodes_on_rank);
if (initial_mesh.Pn > 0){
if (initial_mesh.Pn > 1){
naive_mesh.initialize_elems_Pn(num_elements_on_rank, num_dim, initial_mesh.Pn);
} else {
naive_mesh.initialize_elems(num_elements_on_rank, num_dim);
Expand Down Expand Up @@ -1120,7 +1122,10 @@ inline void build_ghost(


output_mesh.initialize_nodes(total_extended_nodes);
if (input_mesh.Pn > 0){
// Use the high-order (Pn) initialization only for Pn > 1; for the linear case
// (Pn == 1) use initialize_elems to keep the element kind/layout consistent
// with naive_mesh and intermediate_mesh.
if (input_mesh.Pn > 1){
output_mesh.initialize_elems_Pn(total_extended_elems, input_mesh.num_dims, input_mesh.Pn);
}
else {
Expand Down Expand Up @@ -1361,7 +1366,7 @@ inline void build_ghost(
}

MPI_Barrier(MPI_COMM_WORLD);
if(rank == 0 && print_info) std::cout<<"After boundary_elem_targets"<<std::endl;
if(rank == 0 && print_info) printf("After boundary_elem_targets\n");

// Add a vector to store boundary element local_ids (those who have ghost destinations across ranks)
std::vector<int> boundary_elem_local_ids;
Expand Down Expand Up @@ -1401,15 +1406,15 @@ inline void build_ghost(

MPI_Barrier(MPI_COMM_WORLD);

if(rank == 0 && print_info) printf("After ghost_comm_ranks\n");

output_mesh.num_boundary_elems = boundary_elem_local_ids.size();
output_mesh.boundary_elem_local_ids = DCArrayKokkos<size_t>(output_mesh.num_boundary_elems, "boundary_elem_local_ids");
for (int i = 0; i < output_mesh.num_boundary_elems; i++) {
output_mesh.boundary_elem_local_ids.host(i) = boundary_elem_local_ids[i];
}
output_mesh.boundary_elem_local_ids.update_device();

print_info = false;


MPI_Barrier(MPI_COMM_WORLD);

Expand Down Expand Up @@ -1438,6 +1443,7 @@ inline void build_ghost(
}

MPI_Barrier(MPI_COMM_WORLD);
if(rank == 0 && print_info) printf("After node_set_to_send_by_rank\n");

std::map<int, std::vector<int>> nodes_to_send_by_rank; // rank -> list of global node indices

Expand Down Expand Up @@ -1483,6 +1489,7 @@ inline void build_ghost(
// Initialize the graph communicator for element communication
element_communication_plan.initialize_graph_communicator(outdegree, ghost_comm_ranks_vec.data(), elem_indegree, ghost_elem_receive_ranks_vec.data());
MPI_Barrier(MPI_COMM_WORLD);
if(rank == 0 && print_info) printf("After element graph communicator\n");

// Optional: Verify the graph communicator was created successfully
// if(print_info) element_communication_plan.verify_graph_communicator();
Expand Down Expand Up @@ -1585,9 +1592,9 @@ inline void build_ghost(
elems_to_recv_by_rank_rr.update_device();
MATAR_FENCE();
element_communication_plan.setup_send_recv(elems_to_send_by_rank_rr, elems_to_recv_by_rank_rr);

MPI_Barrier(MPI_COMM_WORLD);

if(rank == 0 && print_info) printf("After element communication plan setup\n");
// --------------------------------------------------------------------------------------
// Build the send pattern for nodes
// --------------------------------------------------------------------------------------
Expand Down Expand Up @@ -1654,7 +1661,7 @@ inline void build_ghost(
size_t node_gid = output_mesh.local_to_global_node_mapping.host(i);
node_gid_to_ghost_lid[node_gid] = i;
}

if(rank == 0 && print_info) printf("After node_gid_to_ghost_lid\n");
// Now convert the GID sets to local index vectors
for (const auto& pair : node_set_to_recv_by_rank) {
int source_rank = pair.first;
Expand Down Expand Up @@ -1687,7 +1694,7 @@ inline void build_ghost(
}
}
nodes_to_recv_by_rank_rr.update_device();

if(rank == 0 && print_info) printf("After nodes_to_recv_by_rank_rr\n");
MPI_Barrier(MPI_COMM_WORLD);

node_communication_plan.setup_send_recv(nodes_to_send_by_rank_rr, nodes_to_recv_by_rank_rr);
Expand Down Expand Up @@ -1734,7 +1741,8 @@ inline void partition_mesh(
CommunicationPlan& element_communication_plan,
CommunicationPlan& node_communication_plan,
int world_size,
int rank){
int rank)
{

bool print_info = false;

Expand All @@ -1746,10 +1754,14 @@ inline void partition_mesh(
unsigned long long Pn_order = static_cast<unsigned long long>(initial_mesh.Pn);
MPI_Bcast(&Pn_order, 1, MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD);
initial_mesh.Pn = static_cast<size_t>(Pn_order);
final_mesh.num_dims = initial_mesh.num_dims;
final_mesh.Pn = initial_mesh.Pn;

// Create mesh, gauss points, and node data structures on each rank
// This is the initial partitioned mesh
swage::Mesh naive_mesh;
naive_mesh.num_dims = initial_mesh.num_dims;
naive_mesh.Pn = initial_mesh.Pn;
MPICArrayKokkos<double> naive_node_coords;

// Mesh partitioned by pt-scotch, not including ghost
Expand Down Expand Up @@ -2101,9 +2113,9 @@ inline void partition_mesh(
MPI_Barrier(MPI_COMM_WORLD);
}

// ******************************************************************************************
// Build the intermediate mesh (without ghost nodes and elements) from the repartition
// ******************************************************************************************
// ******************************************************************************************
// Build the intermediate mesh (without ghost nodes and elements) from the repartition
// ******************************************************************************************



Expand Down Expand Up @@ -2263,8 +2275,18 @@ inline void partition_mesh(
if(rank == 0 && print_info) std::cout<<" Finished exchanging node coordinates"<<std::endl;

// -------------- Phase 6: Build the intermediate_mesh --------------
if(rank == 0 && print_info) printf("Building the intermediate_mesh\n");
if(rank == 0 && print_info) printf("naive mesh num_dims = %zu\n", naive_mesh.num_dims);

intermediate_mesh.num_dims = naive_mesh.num_dims;
if(rank == 0 && print_info) printf("intermediate mesh num_dims = %zu\n", intermediate_mesh.num_dims);

intermediate_mesh.initialize_nodes(num_new_nodes);
if (initial_mesh.Pn > 0){
// Use the high-order (Pn) initialization only for Pn > 1; for the linear case
// (Pn == 1) use initialize_elems to match naive_mesh and avoid creating an
// arbitrary_tensor_element layout whose connectivity-build path writes
// 8 nodes-per-zone unconditionally and would corrupt the heap in 2D.
if (initial_mesh.Pn > 1){
intermediate_mesh.initialize_elems_Pn(num_new_elems, naive_mesh.num_dims, initial_mesh.Pn);
} else {
intermediate_mesh.initialize_elems(num_new_elems, naive_mesh.num_dims);
Expand Down Expand Up @@ -2351,7 +2373,7 @@ inline void partition_mesh(

MPI_Barrier(MPI_COMM_WORLD);
if(rank == 0 && print_info) std::cout<<" Finished the ghost element and node construction"<<std::endl;

MPI_Barrier(MPI_COMM_WORLD);
}

} // namespace elements
Expand Down
Loading
Loading