diff --git a/CMakeLists.txt b/CMakeLists.txt index 4430ef81..b738fbff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,7 +57,7 @@ endif() # --- Options --- option(ELEMENTS_ENABLE_SERIAL "Enable Serial support in Kokkos" ON) -option(ELEMENTS_ENABLE_OPENMP "Enable OpenMP support in Kokkos" ON) +option(ELEMENTS_ENABLE_OPENMP "Enable OpenMP support in Kokkos" OFF) option(ELEMENTS_ENABLE_PTHREADS "Enable PThreads support in Kokkos" OFF) option(ELEMENTS_ENABLE_CUDA "Enable CUDA support in Kokkos" OFF) option(ELEMENTS_ENABLE_HIP "Enable HIP support in Kokkos" OFF) diff --git a/examples/decomp_example/include/mesh_io.h b/examples/decomp_example/include/mesh_io.h index a2ddba87..33931ff9 100644 --- a/examples/decomp_example/include/mesh_io.h +++ b/examples/decomp_example/include/mesh_io.h @@ -501,6 +501,7 @@ void write_vtu(swage::Mesh& mesh, const size_t num_nodes = mesh.num_owned_nodes; const size_t num_elems = mesh.num_owned_elems; const size_t num_dims = mesh.num_dims; + printf("num_dims = %zu\n", num_dims); // save the cell state to an array for exporting to graphics files auto elem_fields = CArray(num_elems, num_cell_scalar_vars); @@ -612,7 +613,7 @@ void write_vtu(swage::Mesh& mesh, CArray convert_fierro_to_vtk((Pn_order+1)*(Pn_order+1)*(Pn_order+1)); int this_node_lid = 0; - for (int k = 0; k <= Pn_order; k++) { + for (int k = 0; k <= Pn_order_z; k++) { for (int j = 0; j <= Pn_order; j++) { for (int i = 0; i <= Pn_order; i++) { @@ -637,11 +638,12 @@ void write_vtu(swage::Mesh& mesh, fprintf(vtu_file, " "); // adding indentation before printing nodes in element int this_node_lid = 0; - for (int k = 0; k <= Pn_order; k++) { + for (int k = 0; k <= Pn_order_z; k++) { for (int j = 0; j <= Pn_order; j++) { for (int i = 0; i <= Pn_order; i++) { - // size_t node_lid = PointIndexFromIJK(i, j, k, order); + // size_t node_lid = PointIndexFromIJK(i, j, k, order); int node_lid = convert_fierro_to_vtk(this_node_lid); + if (num_dims == 2) node_lid = this_node_lid; fprintf(vtu_file, " %zu", static_cast(mesh.nodes_in_elem.host(elem_gid, node_lid))); this_node_lid ++; } @@ -711,9 +713,9 @@ void write_vtu(swage::Mesh& mesh, // Point vector variables for (int var = 0; var < num_point_vec_vars; var++) { fprintf(vtu_file, " \n", - point_vec_var_names[var], num_dims); + point_vec_var_names[var], static_cast(num_dims)); for (size_t node_gid = 0; node_gid < num_nodes; node_gid++) { - for (int dim = 0; dim < num_dims; dim++) { + for (int dim = 0; dim < static_cast(num_dims); dim++) { fprintf(vtu_file, " %f\n", vec_fields(node_gid, var, dim)); } } @@ -737,10 +739,10 @@ void write_vtu(swage::Mesh& mesh, // Cell vector variables for (int var = 0; var < num_cell_vec_vars; var++) { fprintf(vtu_file, " \n", - cell_vec_var_names[var], num_dims); + cell_vec_var_names[var], static_cast(num_dims)); for (size_t elem_gid = 0; elem_gid < num_elems; elem_gid++) { // TODO: Populate cell vector field data from appropriate source - for (int dim = 0; dim < num_dims; dim++) { + for (int dim = 0; dim < static_cast(num_dims); dim++) { fprintf(vtu_file, " %f\n", gauss_point.fields_vec.host(elem_gid, dim)); } @@ -799,7 +801,7 @@ void write_vtu(swage::Mesh& mesh, fprintf(pvtu_file, " \n"); for (int var = 0; var < num_point_vec_vars; var++) { fprintf(pvtu_file, " \n", - point_vec_var_names[var], num_dims); + point_vec_var_names[var], static_cast(num_dims)); } for (int var = 0; var < num_point_scalar_vars; var++) { fprintf(pvtu_file, " \n", @@ -811,7 +813,7 @@ void write_vtu(swage::Mesh& mesh, fprintf(pvtu_file, " \n"); for (int var = 0; var < num_cell_vec_vars; var++) { fprintf(pvtu_file, " \n", - cell_vec_var_names[var], num_dims); + cell_vec_var_names[var], static_cast(num_dims)); } for (int var = 0; var < num_cell_scalar_vars; var++) { fprintf(pvtu_file, " \n", diff --git a/examples/decomp_example/src/mesh_decomp_example.cpp b/examples/decomp_example/src/mesh_decomp_example.cpp index b82cb839..7fad86d7 100644 --- a/examples/decomp_example/src/mesh_decomp_example.cpp +++ b/examples/decomp_example/src/mesh_decomp_example.cpp @@ -40,8 +40,8 @@ int main(int argc, char** argv) { MPI_Comm_size(MPI_COMM_WORLD, &world_size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); - int num_dims = 3; - int Pn_order = 2; + int num_dims = 2; + int Pn_order = 1; double t_main_start = MPI_Wtime(); @@ -55,8 +55,8 @@ int main(int argc, char** argv) { double outer_radius = 2.0; double start_angle = 0.0; double end_angle = 45.0; - int num_elems_i = 100; - int num_elems_j = 100; + int num_elems_i = 40; + int num_elems_j = 40; // Initial mesh built on rank zero swage::Mesh initial_mesh; @@ -79,6 +79,9 @@ int main(int argc, char** argv) { std::cout<<"Rank "< elems_per_rank(world_size); // number of elements to send to each rank size(world_size) @@ -487,7 +489,7 @@ inline void naive_partition_mesh( // Initialize the naive_mesh data structures for each rank // ****************************************************************************************** naive_mesh.initialize_nodes(num_nodes_on_rank); - if (initial_mesh.Pn > 0){ + if (initial_mesh.Pn > 1){ naive_mesh.initialize_elems_Pn(num_elements_on_rank, num_dim, initial_mesh.Pn); } else { naive_mesh.initialize_elems(num_elements_on_rank, num_dim); @@ -1120,7 +1122,10 @@ inline void build_ghost( output_mesh.initialize_nodes(total_extended_nodes); - if (input_mesh.Pn > 0){ + // Use the high-order (Pn) initialization only for Pn > 1; for the linear case + // (Pn == 1) use initialize_elems to keep the element kind/layout consistent + // with naive_mesh and intermediate_mesh. + if (input_mesh.Pn > 1){ output_mesh.initialize_elems_Pn(total_extended_elems, input_mesh.num_dims, input_mesh.Pn); } else { @@ -1361,7 +1366,7 @@ inline void build_ghost( } MPI_Barrier(MPI_COMM_WORLD); - if(rank == 0 && print_info) std::cout<<"After boundary_elem_targets"< boundary_elem_local_ids; @@ -1401,6 +1406,8 @@ inline void build_ghost( MPI_Barrier(MPI_COMM_WORLD); + if(rank == 0 && print_info) printf("After ghost_comm_ranks\n"); + output_mesh.num_boundary_elems = boundary_elem_local_ids.size(); output_mesh.boundary_elem_local_ids = DCArrayKokkos(output_mesh.num_boundary_elems, "boundary_elem_local_ids"); for (int i = 0; i < output_mesh.num_boundary_elems; i++) { @@ -1408,8 +1415,6 @@ inline void build_ghost( } output_mesh.boundary_elem_local_ids.update_device(); - print_info = false; - MPI_Barrier(MPI_COMM_WORLD); @@ -1438,6 +1443,7 @@ inline void build_ghost( } MPI_Barrier(MPI_COMM_WORLD); + if(rank == 0 && print_info) printf("After node_set_to_send_by_rank\n"); std::map> nodes_to_send_by_rank; // rank -> list of global node indices @@ -1483,6 +1489,7 @@ inline void build_ghost( // Initialize the graph communicator for element communication element_communication_plan.initialize_graph_communicator(outdegree, ghost_comm_ranks_vec.data(), elem_indegree, ghost_elem_receive_ranks_vec.data()); MPI_Barrier(MPI_COMM_WORLD); + if(rank == 0 && print_info) printf("After element graph communicator\n"); // Optional: Verify the graph communicator was created successfully // if(print_info) element_communication_plan.verify_graph_communicator(); @@ -1585,9 +1592,9 @@ inline void build_ghost( elems_to_recv_by_rank_rr.update_device(); MATAR_FENCE(); element_communication_plan.setup_send_recv(elems_to_send_by_rank_rr, elems_to_recv_by_rank_rr); - + MPI_Barrier(MPI_COMM_WORLD); - + if(rank == 0 && print_info) printf("After element communication plan setup\n"); // -------------------------------------------------------------------------------------- // Build the send pattern for nodes // -------------------------------------------------------------------------------------- @@ -1654,7 +1661,7 @@ inline void build_ghost( size_t node_gid = output_mesh.local_to_global_node_mapping.host(i); node_gid_to_ghost_lid[node_gid] = i; } - + if(rank == 0 && print_info) printf("After node_gid_to_ghost_lid\n"); // Now convert the GID sets to local index vectors for (const auto& pair : node_set_to_recv_by_rank) { int source_rank = pair.first; @@ -1687,7 +1694,7 @@ inline void build_ghost( } } nodes_to_recv_by_rank_rr.update_device(); - + if(rank == 0 && print_info) printf("After nodes_to_recv_by_rank_rr\n"); MPI_Barrier(MPI_COMM_WORLD); node_communication_plan.setup_send_recv(nodes_to_send_by_rank_rr, nodes_to_recv_by_rank_rr); @@ -1734,7 +1741,8 @@ inline void partition_mesh( CommunicationPlan& element_communication_plan, CommunicationPlan& node_communication_plan, int world_size, - int rank){ + int rank) +{ bool print_info = false; @@ -1746,10 +1754,14 @@ inline void partition_mesh( unsigned long long Pn_order = static_cast(initial_mesh.Pn); MPI_Bcast(&Pn_order, 1, MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD); initial_mesh.Pn = static_cast(Pn_order); + final_mesh.num_dims = initial_mesh.num_dims; + final_mesh.Pn = initial_mesh.Pn; // Create mesh, gauss points, and node data structures on each rank // This is the initial partitioned mesh swage::Mesh naive_mesh; + naive_mesh.num_dims = initial_mesh.num_dims; + naive_mesh.Pn = initial_mesh.Pn; MPICArrayKokkos naive_node_coords; // Mesh partitioned by pt-scotch, not including ghost @@ -2101,9 +2113,9 @@ inline void partition_mesh( MPI_Barrier(MPI_COMM_WORLD); } -// ****************************************************************************************** -// Build the intermediate mesh (without ghost nodes and elements) from the repartition -// ****************************************************************************************** + // ****************************************************************************************** + // Build the intermediate mesh (without ghost nodes and elements) from the repartition + // ****************************************************************************************** @@ -2263,8 +2275,18 @@ inline void partition_mesh( if(rank == 0 && print_info) std::cout<<" Finished exchanging node coordinates"< 0){ + // Use the high-order (Pn) initialization only for Pn > 1; for the linear case + // (Pn == 1) use initialize_elems to match naive_mesh and avoid creating an + // arbitrary_tensor_element layout whose connectivity-build path writes + // 8 nodes-per-zone unconditionally and would corrupt the heap in 2D. + if (initial_mesh.Pn > 1){ intermediate_mesh.initialize_elems_Pn(num_new_elems, naive_mesh.num_dims, initial_mesh.Pn); } else { intermediate_mesh.initialize_elems(num_new_elems, naive_mesh.num_dims); @@ -2351,7 +2373,7 @@ inline void partition_mesh( MPI_Barrier(MPI_COMM_WORLD); if(rank == 0 && print_info) std::cout<<" Finished the ghost element and node construction"<(num_nodes, "mesh.num_corners_in_node"); // stride sizes // initializing the number of corners (node-cell pair) to be zero @@ -459,6 +474,10 @@ struct Mesh // build elem connectivity arrays void build_elem_elem_connectivity() { + if (num_dims == 0) { + Kokkos::abort("Error: mesh.num_dims is not set. Exiting at build_elem_elem_connectivity()."); + } + // find the max number of elems around a node size_t max_num_elems_in_node; size_t max_num_lcl; @@ -539,6 +558,9 @@ struct Mesh // build the patches void build_patch_connectivity() { + if (num_dims == 0) { + Kokkos::abort("Error: mesh.num_dims is not set. Exiting at build_patch_connectivity()."); + } // WARNING WARNING // the mesh element kind should be in the input file and set when reading mesh // mesh_elem_kind = mesh_init::linear_tensor_element; // MUST BE SET @@ -1331,6 +1353,9 @@ struct Mesh // build the patches void build_node_node_connectivity() { + if (num_dims == 0) { + Kokkos::abort("Error: mesh.num_dims is not set. Exiting at build_node_node_connectivity()."); + } // find the max number of elems around a node size_t max_num_elems_in_node; size_t max_num_lcl; @@ -1462,6 +1487,9 @@ struct Mesh ///////////////////////////////////////////////////////////////////////////// void build_connectivity() { + if (num_dims == 0) { + Kokkos::abort("Error: mesh.num_dims is not set. Exiting at build_connectivity()."); + } build_corner_connectivity(); if (verbose) printf("Built corner connectivity \n"); @@ -1484,6 +1512,9 @@ struct Mesh ///////////////////////////////////////////////////////////////////////////// void init_bdy_sets(size_t num_bcs) { + if (num_dims == 0) { + Kokkos::abort("Error: mesh.num_dims is not set. Exiting at init_bdy_sets()."); + } // if (num_bcs == 0) { // printf("ERROR: number of boundary sets = 0, set it = 1"); // num_bcs = 1;